aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging')
-rw-r--r--drivers/staging/Kconfig6
-rw-r--r--drivers/staging/Makefile3
-rw-r--r--drivers/staging/android/ashmem.c3
-rw-r--r--drivers/staging/android/binder.c2
-rw-r--r--drivers/staging/android/sw_sync.c6
-rw-r--r--drivers/staging/android/sync.c22
-rw-r--r--drivers/staging/asus_oled/asus_oled.c38
-rw-r--r--drivers/staging/bcm/Bcmchar.c2
-rw-r--r--drivers/staging/bcm/InterfaceIdleMode.c199
-rw-r--r--drivers/staging/bcm/Version.h8
-rw-r--r--drivers/staging/bcm/vendorspecificextn.c198
-rw-r--r--drivers/staging/btmtk_usb/Kconfig11
-rw-r--r--drivers/staging/btmtk_usb/Makefile1
-rw-r--r--drivers/staging/btmtk_usb/README14
-rw-r--r--drivers/staging/btmtk_usb/TODO10
-rw-r--r--drivers/staging/btmtk_usb/btmtk_usb.c1784
-rw-r--r--drivers/staging/btmtk_usb/btmtk_usb.h138
-rw-r--r--drivers/staging/ced1401/ced_ioc.c665
-rw-r--r--drivers/staging/ced1401/ced_ioctl.h2
-rw-r--r--drivers/staging/ced1401/machine.h32
-rw-r--r--drivers/staging/ced1401/usb1401.c613
-rw-r--r--drivers/staging/ced1401/usb1401.h285
-rw-r--r--drivers/staging/ced1401/use1401.h201
-rw-r--r--drivers/staging/ced1401/use14_ioc.h503
-rw-r--r--drivers/staging/ced1401/userspace/use1401.c152
-rw-r--r--drivers/staging/comedi/Kconfig61
-rw-r--r--drivers/staging/comedi/comedi.h5
-rw-r--r--drivers/staging/comedi/comedi_buf.c4
-rw-r--r--drivers/staging/comedi/comedi_compat32.c5
-rw-r--r--drivers/staging/comedi/comedi_compat32.h5
-rw-r--r--drivers/staging/comedi/comedi_fops.c26
-rw-r--r--drivers/staging/comedi/comedi_pci.c4
-rw-r--r--drivers/staging/comedi/comedi_pcmcia.c4
-rw-r--r--drivers/staging/comedi/comedi_usb.c16
-rw-r--r--drivers/staging/comedi/comedidev.h30
-rw-r--r--drivers/staging/comedi/comedilib.h5
-rw-r--r--drivers/staging/comedi/drivers.c52
-rw-r--r--drivers/staging/comedi/drivers/8253.h5
-rw-r--r--drivers/staging/comedi/drivers/8255.c11
-rw-r--r--drivers/staging/comedi/drivers/8255.h5
-rw-r--r--drivers/staging/comedi/drivers/8255_pci.c7
-rw-r--r--drivers/staging/comedi/drivers/Makefile5
-rw-r--r--drivers/staging/comedi/drivers/acl7225b.c136
-rw-r--r--drivers/staging/comedi/drivers/addi-data/APCI1710_Chrono.c4
-rw-r--r--drivers/staging/comedi/drivers/addi-data/APCI1710_Dig_io.c4
-rw-r--r--drivers/staging/comedi/drivers/addi-data/APCI1710_INCCPT.c4
-rw-r--r--drivers/staging/comedi/drivers/addi-data/APCI1710_Inp_cpt.c4
-rw-r--r--drivers/staging/comedi/drivers/addi-data/APCI1710_Pwm.c4
-rw-r--r--drivers/staging/comedi/drivers/addi-data/APCI1710_Ssi.c4
-rw-r--r--drivers/staging/comedi/drivers/addi-data/APCI1710_Tor.c4
-rw-r--r--drivers/staging/comedi/drivers/addi-data/APCI1710_Ttl.c4
-rw-r--r--drivers/staging/comedi/drivers/addi-data/addi_common.c61
-rw-r--r--drivers/staging/comedi/drivers/addi-data/addi_common.h44
-rw-r--r--drivers/staging/comedi/drivers/addi-data/addi_eeprom.c9
-rw-r--r--drivers/staging/comedi/drivers/addi-data/hwdrv_APCI1710.c4
-rw-r--r--drivers/staging/comedi/drivers/addi-data/hwdrv_apci035.c4
-rw-r--r--drivers/staging/comedi/drivers/addi-data/hwdrv_apci1500.c4
-rw-r--r--drivers/staging/comedi/drivers/addi-data/hwdrv_apci1564.c4
-rw-r--r--drivers/staging/comedi/drivers/addi-data/hwdrv_apci3120.c10
-rw-r--r--drivers/staging/comedi/drivers/addi-data/hwdrv_apci3200.c4
-rw-r--r--drivers/staging/comedi/drivers/addi-data/hwdrv_apci3xxx.c1376
-rw-r--r--drivers/staging/comedi/drivers/addi_apci_1032.c7
-rw-r--r--drivers/staging/comedi/drivers/addi_apci_1516.c8
-rw-r--r--drivers/staging/comedi/drivers/addi_apci_16xx.c7
-rw-r--r--drivers/staging/comedi/drivers/addi_apci_2032.c8
-rw-r--r--drivers/staging/comedi/drivers/addi_apci_2200.c8
-rw-r--r--drivers/staging/comedi/drivers/addi_apci_3120.c5
-rw-r--r--drivers/staging/comedi/drivers/addi_apci_3501.c7
-rw-r--r--drivers/staging/comedi/drivers/addi_apci_3xxx.c1409
-rw-r--r--drivers/staging/comedi/drivers/addi_watchdog.c8
-rw-r--r--drivers/staging/comedi/drivers/adl_pci6208.c4
-rw-r--r--drivers/staging/comedi/drivers/adl_pci7x3x.c4
-rw-r--r--drivers/staging/comedi/drivers/adl_pci8164.c4
-rw-r--r--drivers/staging/comedi/drivers/adl_pci9111.c4
-rw-r--r--drivers/staging/comedi/drivers/adq12b.c5
-rw-r--r--drivers/staging/comedi/drivers/adv_pci1723.c9
-rw-r--r--drivers/staging/comedi/drivers/adv_pci1724.c7
-rw-r--r--drivers/staging/comedi/drivers/adv_pci_dio.c8
-rw-r--r--drivers/staging/comedi/drivers/aio_aio12_8.c12
-rw-r--r--drivers/staging/comedi/drivers/aio_iiro_16.c4
-rw-r--r--drivers/staging/comedi/drivers/amplc_dio200.c5
-rw-r--r--drivers/staging/comedi/drivers/amplc_dio200.h5
-rw-r--r--drivers/staging/comedi/drivers/amplc_dio200_common.c33
-rw-r--r--drivers/staging/comedi/drivers/amplc_dio200_pci.c5
-rw-r--r--drivers/staging/comedi/drivers/amplc_pc236.c6
-rw-r--r--drivers/staging/comedi/drivers/amplc_pc263.c5
-rw-r--r--drivers/staging/comedi/drivers/amplc_pci224.c5
-rw-r--r--drivers/staging/comedi/drivers/amplc_pci230.c5
-rw-r--r--drivers/staging/comedi/drivers/amplc_pci263.c5
-rw-r--r--drivers/staging/comedi/drivers/c6xdigio.c5
-rw-r--r--drivers/staging/comedi/drivers/cb_das16_cs.c4
-rw-r--r--drivers/staging/comedi/drivers/cb_pcidas.c7
-rw-r--r--drivers/staging/comedi/drivers/cb_pcidas64.c8
-rw-r--r--drivers/staging/comedi/drivers/cb_pcidda.c13
-rw-r--r--drivers/staging/comedi/drivers/cb_pcimdas.c5
-rw-r--r--drivers/staging/comedi/drivers/cb_pcimdda.c13
-rw-r--r--drivers/staging/comedi/drivers/comedi_bond.c5
-rw-r--r--drivers/staging/comedi/drivers/comedi_fc.c7
-rw-r--r--drivers/staging/comedi/drivers/comedi_fc.h7
-rw-r--r--drivers/staging/comedi/drivers/comedi_parport.c5
-rw-r--r--drivers/staging/comedi/drivers/comedi_test.c7
-rw-r--r--drivers/staging/comedi/drivers/contec_pci_dio.c5
-rw-r--r--drivers/staging/comedi/drivers/daqboard2000.c30
-rw-r--r--drivers/staging/comedi/drivers/das08.c12
-rw-r--r--drivers/staging/comedi/drivers/das08.h6
-rw-r--r--drivers/staging/comedi/drivers/das08_cs.c15
-rw-r--r--drivers/staging/comedi/drivers/das08_isa.c12
-rw-r--r--drivers/staging/comedi/drivers/das08_pci.c12
-rw-r--r--drivers/staging/comedi/drivers/das16.c7
-rw-r--r--drivers/staging/comedi/drivers/das16m1.c7
-rw-r--r--drivers/staging/comedi/drivers/das1800.c6
-rw-r--r--drivers/staging/comedi/drivers/das6402.c5
-rw-r--r--drivers/staging/comedi/drivers/das800.c6
-rw-r--r--drivers/staging/comedi/drivers/dmm32at.c5
-rw-r--r--drivers/staging/comedi/drivers/dt2811.c4
-rw-r--r--drivers/staging/comedi/drivers/dt2814.c5
-rw-r--r--drivers/staging/comedi/drivers/dt2815.c5
-rw-r--r--drivers/staging/comedi/drivers/dt2817.c5
-rw-r--r--drivers/staging/comedi/drivers/dt282x.c5
-rw-r--r--drivers/staging/comedi/drivers/dt3000.c5
-rw-r--r--drivers/staging/comedi/drivers/dt9812.c1031
-rw-r--r--drivers/staging/comedi/drivers/dyna_pci10xx.c4
-rw-r--r--drivers/staging/comedi/drivers/gsc_hpdi.c7
-rw-r--r--drivers/staging/comedi/drivers/icp_multi.c5
-rw-r--r--drivers/staging/comedi/drivers/jr3_pci.c51
-rw-r--r--drivers/staging/comedi/drivers/ke_counter.c5
-rw-r--r--drivers/staging/comedi/drivers/me4000.c5
-rw-r--r--drivers/staging/comedi/drivers/me_daq.c28
-rw-r--r--drivers/staging/comedi/drivers/mite.c5
-rw-r--r--drivers/staging/comedi/drivers/mite.h5
-rw-r--r--drivers/staging/comedi/drivers/mpc624.c5
-rw-r--r--drivers/staging/comedi/drivers/multiq3.c5
-rw-r--r--drivers/staging/comedi/drivers/ni_6527.c5
-rw-r--r--drivers/staging/comedi/drivers/ni_65xx.c36
-rw-r--r--drivers/staging/comedi/drivers/ni_660x.c4
-rw-r--r--drivers/staging/comedi/drivers/ni_670x.c5
-rw-r--r--drivers/staging/comedi/drivers/ni_at_a2150.c6
-rw-r--r--drivers/staging/comedi/drivers/ni_at_ao.c5
-rw-r--r--drivers/staging/comedi/drivers/ni_atmio.c4
-rw-r--r--drivers/staging/comedi/drivers/ni_atmio16d.c6
-rw-r--r--drivers/staging/comedi/drivers/ni_daq_700.c9
-rw-r--r--drivers/staging/comedi/drivers/ni_daq_dio24.c14
-rw-r--r--drivers/staging/comedi/drivers/ni_labpc.c74
-rw-r--r--drivers/staging/comedi/drivers/ni_labpc.h29
-rw-r--r--drivers/staging/comedi/drivers/ni_labpc_cs.c19
-rw-r--r--drivers/staging/comedi/drivers/ni_labpc_pci.c12
-rw-r--r--drivers/staging/comedi/drivers/ni_mio_common.c6
-rw-r--r--drivers/staging/comedi/drivers/ni_mio_cs.c5
-rw-r--r--drivers/staging/comedi/drivers/ni_pcidio.c30
-rw-r--r--drivers/staging/comedi/drivers/ni_pcimio.c4
-rw-r--r--drivers/staging/comedi/drivers/ni_stc.h5
-rw-r--r--drivers/staging/comedi/drivers/ni_tio.c4
-rw-r--r--drivers/staging/comedi/drivers/ni_tio.h5
-rw-r--r--drivers/staging/comedi/drivers/ni_tio_internal.h5
-rw-r--r--drivers/staging/comedi/drivers/ni_tiocmd.c4
-rw-r--r--drivers/staging/comedi/drivers/pcl711.c5
-rw-r--r--drivers/staging/comedi/drivers/pcl724.c219
-rw-r--r--drivers/staging/comedi/drivers/pcl725.c91
-rw-r--r--drivers/staging/comedi/drivers/pcl726.c5
-rw-r--r--drivers/staging/comedi/drivers/pcl730.c332
-rw-r--r--drivers/staging/comedi/drivers/pcm3724.c11
-rw-r--r--drivers/staging/comedi/drivers/pcm3730.c136
-rw-r--r--drivers/staging/comedi/drivers/pcmad.c197
-rw-r--r--drivers/staging/comedi/drivers/pcmda12.c245
-rw-r--r--drivers/staging/comedi/drivers/pcmmio.c4
-rw-r--r--drivers/staging/comedi/drivers/pcmuio.c961
-rw-r--r--drivers/staging/comedi/drivers/plx9052.h5
-rw-r--r--drivers/staging/comedi/drivers/poc.c58
-rw-r--r--drivers/staging/comedi/drivers/rtd520.c4
-rw-r--r--drivers/staging/comedi/drivers/rti800.c4
-rw-r--r--drivers/staging/comedi/drivers/rti802.c5
-rw-r--r--drivers/staging/comedi/drivers/s526.c5
-rw-r--r--drivers/staging/comedi/drivers/s626.c5
-rw-r--r--drivers/staging/comedi/drivers/s626.h5
-rw-r--r--drivers/staging/comedi/drivers/serial2002.c5
-rw-r--r--drivers/staging/comedi/drivers/skel.c5
-rw-r--r--drivers/staging/comedi/drivers/ssv_dnp.c5
-rw-r--r--drivers/staging/comedi/drivers/unioxx5.c25
-rw-r--r--drivers/staging/comedi/drivers/usbdux.c236
-rw-r--r--drivers/staging/comedi/drivers/usbduxfast.c1282
-rw-r--r--drivers/staging/comedi/drivers/usbduxsigma.c2958
-rw-r--r--drivers/staging/comedi/drivers/vmk80xx.c88
-rw-r--r--drivers/staging/comedi/kcomedilib/kcomedilib_main.c5
-rw-r--r--drivers/staging/comedi/proc.c5
-rw-r--r--drivers/staging/comedi/range.c5
-rw-r--r--drivers/staging/cptm1217/clearpad_tm1217.c4
-rw-r--r--drivers/staging/crystalhd/bc_dts_glob_lnx.h19
-rw-r--r--drivers/staging/crystalhd/crystalhd_cmds.c28
-rw-r--r--drivers/staging/crystalhd/crystalhd_cmds.h19
-rw-r--r--drivers/staging/crystalhd/crystalhd_fw_if.h81
-rw-r--r--drivers/staging/crystalhd/crystalhd_hw.c231
-rw-r--r--drivers/staging/crystalhd/crystalhd_hw.h121
-rw-r--r--drivers/staging/crystalhd/crystalhd_lnx.c33
-rw-r--r--drivers/staging/crystalhd/crystalhd_lnx.h4
-rw-r--r--drivers/staging/crystalhd/crystalhd_misc.c39
-rw-r--r--drivers/staging/crystalhd/crystalhd_misc.h37
-rw-r--r--drivers/staging/csr/bh.c213
-rw-r--r--drivers/staging/csr/csr_framework_ext.c16
-rw-r--r--drivers/staging/csr/csr_framework_ext.h8
-rw-r--r--drivers/staging/csr/csr_wifi_nme_ap_sef.c4
-rw-r--r--drivers/staging/csr/drv.c6
-rw-r--r--drivers/staging/csr/io.c18
-rw-r--r--drivers/staging/csr/monitor.c2
-rw-r--r--drivers/staging/csr/netdev.c100
-rw-r--r--drivers/staging/csr/sdio_mmc.c4
-rw-r--r--drivers/staging/csr/sme_blocking.c24
-rw-r--r--drivers/staging/csr/sme_native.c2
-rw-r--r--drivers/staging/csr/sme_sys.c148
-rw-r--r--drivers/staging/csr/sme_userspace.c4
-rw-r--r--drivers/staging/csr/sme_wext.c116
-rw-r--r--drivers/staging/csr/ul_int.c2
-rw-r--r--drivers/staging/csr/unifi_event.c20
-rw-r--r--drivers/staging/csr/unifi_pdu_processing.c492
-rw-r--r--drivers/staging/csr/unifi_priv.h30
-rw-r--r--drivers/staging/csr/unifi_sme.c24
-rw-r--r--drivers/staging/csr/unifi_sme.h8
-rw-r--r--drivers/staging/cxt1e1/comet.c819
-rw-r--r--drivers/staging/cxt1e1/functions.c10
-rw-r--r--drivers/staging/cxt1e1/hwprobe.c6
-rw-r--r--drivers/staging/cxt1e1/linux.c64
-rw-r--r--drivers/staging/cxt1e1/musycc.c42
-rw-r--r--drivers/staging/cxt1e1/pmcc4.h10
-rw-r--r--drivers/staging/cxt1e1/pmcc4_drv.c52
-rw-r--r--drivers/staging/cxt1e1/sbecom_inline_linux.h6
-rw-r--r--drivers/staging/cxt1e1/sbeid.c6
-rw-r--r--drivers/staging/cxt1e1/sbeproc.h4
-rw-r--r--drivers/staging/dgrp/dgrp_dpa_ops.c2
-rw-r--r--drivers/staging/dgrp/dgrp_net_ops.c6
-rw-r--r--drivers/staging/dgrp/drp.h2
-rw-r--r--drivers/staging/dwc2/core.c44
-rw-r--r--drivers/staging/dwc2/core_intr.c15
-rw-r--r--drivers/staging/dwc2/hcd.c17
-rw-r--r--drivers/staging/dwc2/hcd.h12
-rw-r--r--drivers/staging/dwc2/hcd_intr.c26
-rw-r--r--drivers/staging/dwc2/pci.c19
-rw-r--r--drivers/staging/echo/echo.c78
-rw-r--r--drivers/staging/echo/echo.h26
-rw-r--r--drivers/staging/frontier/alphatrack.c51
-rw-r--r--drivers/staging/frontier/alphatrack.h6
-rw-r--r--drivers/staging/frontier/tranzport.c20
-rw-r--r--drivers/staging/ft1000/ft1000-pcmcia/ft1000_dnld.c43
-rw-r--r--drivers/staging/ft1000/ft1000-usb/ft1000_debug.c439
-rw-r--r--drivers/staging/ft1000/ft1000-usb/ft1000_ioctl.h156
-rw-r--r--drivers/staging/ft1000/ft1000-usb/ft1000_usb.c10
-rw-r--r--drivers/staging/fwserial/fwserial.c135
-rw-r--r--drivers/staging/fwserial/fwserial.h2
-rw-r--r--drivers/staging/gdm72xx/Kconfig10
-rw-r--r--drivers/staging/gdm72xx/gdm_wimax.c3
-rw-r--r--drivers/staging/goldfish/goldfish_audio.c30
-rw-r--r--drivers/staging/goldfish/goldfish_nand.c11
-rw-r--r--drivers/staging/goldfish/goldfish_nand_reg.h35
-rw-r--r--drivers/staging/iio/adc/ad7192.c2
-rw-r--r--drivers/staging/iio/adc/ad7280a.c2
-rw-r--r--drivers/staging/iio/adc/ad7291.c217
-rw-r--r--drivers/staging/iio/adc/ad7291.h12
-rw-r--r--drivers/staging/iio/adc/ad7606_core.c12
-rw-r--r--drivers/staging/iio/adc/ad7606_par.c2
-rw-r--r--drivers/staging/iio/adc/ad7816.c8
-rw-r--r--drivers/staging/iio/adc/ad799x_core.c4
-rw-r--r--drivers/staging/iio/adc/lpc32xx_adc.c1
-rw-r--r--drivers/staging/iio/adc/mxs-lradc.c8
-rw-r--r--drivers/staging/iio/adc/spear_adc.c3
-rw-r--r--drivers/staging/iio/gyro/Kconfig7
-rw-r--r--drivers/staging/iio/gyro/Makefile3
-rw-r--r--drivers/staging/iio/gyro/adis16130_core.c178
-rw-r--r--drivers/staging/iio/trigger/Kconfig17
-rw-r--r--drivers/staging/iio/trigger/Makefile2
-rw-r--r--drivers/staging/iio/trigger/iio-trig-gpio.c167
-rw-r--r--drivers/staging/iio/trigger/iio-trig-sysfs.c227
-rw-r--r--drivers/staging/imx-drm/Kconfig8
-rw-r--r--drivers/staging/imx-drm/Makefile1
-rw-r--r--drivers/staging/imx-drm/TODO1
-rw-r--r--drivers/staging/imx-drm/imx-drm-core.c5
-rw-r--r--drivers/staging/imx-drm/imx-ldb.c625
-rw-r--r--drivers/staging/imx-drm/imx-tve.c18
-rw-r--r--drivers/staging/imx-drm/ipu-v3/ipu-common.c125
-rw-r--r--drivers/staging/imx-drm/ipu-v3/ipu-di.c13
-rw-r--r--drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c22
-rw-r--r--drivers/staging/imx-drm/ipu-v3/ipu-prv.h4
-rw-r--r--drivers/staging/imx-drm/ipuv3-crtc.c7
-rw-r--r--drivers/staging/imx-drm/parallel-display.c11
-rw-r--r--drivers/staging/keucr/init.c116
-rw-r--r--drivers/staging/keucr/scsiglue.c19
-rw-r--r--drivers/staging/keucr/smil.h28
-rw-r--r--drivers/staging/keucr/smilmain.c1937
-rw-r--r--drivers/staging/keucr/smilsub.c52
-rw-r--r--drivers/staging/keucr/smscsi.c38
-rw-r--r--drivers/staging/keucr/transport.c49
-rw-r--r--drivers/staging/keucr/transport.h3
-rw-r--r--drivers/staging/keucr/usb.c67
-rw-r--r--drivers/staging/keucr/usb.h119
-rw-r--r--drivers/staging/line6/pcm.c23
-rw-r--r--drivers/staging/lustre/Kconfig3
-rw-r--r--drivers/staging/lustre/Makefile4
-rw-r--r--drivers/staging/lustre/TODO13
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/bitmap.h111
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/curproc.h110
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs.h234
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h214
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h201
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h350
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h170
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h851
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_heap.h200
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h222
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_kernelcomm.h117
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h101
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_private.h577
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_string.h137
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_time.h132
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h110
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/kp30.h286
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h125
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/linux-bitops.h38
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h175
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/linux-crypto.h49
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/linux-fs.h92
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/linux-lock.h204
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h120
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/linux-prim.h241
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/linux-tcpip.h87
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h275
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/linux-types.h36
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/portals_compat25.h114
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/lucache.h162
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/params_tree.h166
-rw-r--r--drivers/staging/lustre/include/linux/lnet/api-support.h44
-rw-r--r--drivers/staging/lustre/include/linux/lnet/api.h220
-rw-r--r--drivers/staging/lustre/include/linux/lnet/lib-lnet.h874
-rw-r--r--drivers/staging/lustre/include/linux/lnet/lib-types.h765
-rw-r--r--drivers/staging/lustre/include/linux/lnet/linux/api-support.h43
-rw-r--r--drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h72
-rw-r--r--drivers/staging/lustre/include/linux/lnet/linux/lib-types.h45
-rw-r--r--drivers/staging/lustre/include/linux/lnet/linux/lnet.h56
-rw-r--r--drivers/staging/lustre/include/linux/lnet/lnet-sysctl.h51
-rw-r--r--drivers/staging/lustre/include/linux/lnet/lnet.h51
-rw-r--r--drivers/staging/lustre/include/linux/lnet/lnetctl.h80
-rw-r--r--drivers/staging/lustre/include/linux/lnet/lnetst.h491
-rw-r--r--drivers/staging/lustre/include/linux/lnet/ptllnd.h94
-rw-r--r--drivers/staging/lustre/include/linux/lnet/ptllnd_wire.h124
-rw-r--r--drivers/staging/lustre/include/linux/lnet/socklnd.h103
-rw-r--r--drivers/staging/lustre/include/linux/lnet/types.h503
-rw-r--r--drivers/staging/lustre/lnet/Kconfig40
-rw-r--r--drivers/staging/lustre/lnet/Makefile1
-rw-r--r--drivers/staging/lustre/lnet/klnds/Makefile1
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile5
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c3259
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h1057
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c3529
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c493
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/Makefile7
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c2902
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h602
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c2664
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c1088
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.h91
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c198
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c797
-rw-r--r--drivers/staging/lustre/lnet/lnet/Makefile8
-rw-r--r--drivers/staging/lustre/lnet/lnet/acceptor.c527
-rw-r--r--drivers/staging/lustre/lnet/lnet/api-errno.c39
-rw-r--r--drivers/staging/lustre/lnet/lnet/api-ni.c1941
-rw-r--r--drivers/staging/lustre/lnet/lnet/config.c1264
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-eq.c447
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-md.c451
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-me.c297
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-move.c2441
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-msg.c650
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-ptl.c938
-rw-r--r--drivers/staging/lustre/lnet/lnet/lo.c120
-rw-r--r--drivers/staging/lustre/lnet/lnet/module.c154
-rw-r--r--drivers/staging/lustre/lnet/lnet/peer.c337
-rw-r--r--drivers/staging/lustre/lnet/lnet/router.c1694
-rw-r--r--drivers/staging/lustre/lnet/lnet/router_proc.c950
-rw-r--r--drivers/staging/lustre/lnet/selftest/Makefile6
-rw-r--r--drivers/staging/lustre/lnet/selftest/brw_test.c499
-rw-r--r--drivers/staging/lustre/lnet/selftest/conctl.c931
-rw-r--r--drivers/staging/lustre/lnet/selftest/conrpc.c1397
-rw-r--r--drivers/staging/lustre/lnet/selftest/conrpc.h146
-rw-r--r--drivers/staging/lustre/lnet/selftest/console.c2071
-rw-r--r--drivers/staging/lustre/lnet/selftest/console.h232
-rw-r--r--drivers/staging/lustre/lnet/selftest/framework.c1814
-rw-r--r--drivers/staging/lustre/lnet/selftest/module.c169
-rw-r--r--drivers/staging/lustre/lnet/selftest/ping_test.c229
-rw-r--r--drivers/staging/lustre/lnet/selftest/rpc.c1666
-rw-r--r--drivers/staging/lustre/lnet/selftest/rpc.h302
-rw-r--r--drivers/staging/lustre/lnet/selftest/selftest.h611
-rw-r--r--drivers/staging/lustre/lnet/selftest/timer.c253
-rw-r--r--drivers/staging/lustre/lnet/selftest/timer.h53
-rw-r--r--drivers/staging/lustre/lustre/Kconfig51
-rw-r--r--drivers/staging/lustre/lustre/Makefile2
-rw-r--r--drivers/staging/lustre/lustre/fid/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_handler.c661
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_internal.h84
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_lib.c97
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_request.c522
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_store.c259
-rw-r--r--drivers/staging/lustre/lustre/fid/lproc_fid.c222
-rw-r--r--drivers/staging/lustre/lustre/fld/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_cache.c566
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_handler.c447
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_index.c426
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_internal.h223
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_request.c519
-rw-r--r--drivers/staging/lustre/lustre/fld/lproc_fld.c373
-rw-r--r--drivers/staging/lustre/lustre/include/cl_object.h3279
-rw-r--r--drivers/staging/lustre/lustre/include/dt_object.h1498
-rw-r--r--drivers/staging/lustre/lustre/include/interval_tree.h124
-rw-r--r--drivers/staging/lustre/lustre/include/ioctl.h106
-rw-r--r--drivers/staging/lustre/lustre/include/lclient.h437
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lprocfs_status.h58
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_acl.h66
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_common.h22
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_compat25.h349
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_debug.h47
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_dlm.h46
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_fsfilt.h181
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_handles.h53
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_intent.h62
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_lib.h87
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_lite.h100
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_log.h57
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_net.h50
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h85
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_quota.h47
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_user.h67
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lvfs.h134
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lvfs_linux.h66
-rw-r--r--drivers/staging/lustre/lustre/include/linux/obd.h128
-rw-r--r--drivers/staging/lustre/lustre/include/linux/obd_class.h58
-rw-r--r--drivers/staging/lustre/lustre/include/linux/obd_support.h63
-rw-r--r--drivers/staging/lustre/lustre/include/lprocfs_status.h1043
-rw-r--r--drivers/staging/lustre/lustre/include/lu_object.h1346
-rw-r--r--drivers/staging/lustre/lustre/include/lu_ref.h170
-rw-r--r--drivers/staging/lustre/lustre/include/lu_target.h91
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/libiam.h145
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/liblustreapi.h43
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h121
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_build_version.h2
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_idl.h3653
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_lfsck_user.h95
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_user.h1145
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustreapi.h310
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_acl.h42
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_capa.h305
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_cfg.h299
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_debug.h76
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_disk.h543
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_dlm.h1671
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_eacl.h95
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_export.h389
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_fid.h762
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_fld.h202
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_fsfilt.h48
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_ha.h67
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_handles.h93
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_idmap.h104
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_import.h367
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lib.h667
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_linkea.h57
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lite.h147
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_log.h576
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_mdc.h176
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_mds.h81
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_mdt.h84
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_net.h3451
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_param.h121
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_quota.h239
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_req_layout.h334
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_sec.h1145
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_update.h189
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_ver.h24
-rw-r--r--drivers/staging/lustre/lustre/include/lvfs.h57
-rw-r--r--drivers/staging/lustre/lustre/include/md_object.h908
-rw-r--r--drivers/staging/lustre/lustre/include/obd.h1677
-rw-r--r--drivers/staging/lustre/lustre/include/obd_cache.h39
-rw-r--r--drivers/staging/lustre/lustre/include/obd_cksum.h176
-rw-r--r--drivers/staging/lustre/lustre/include/obd_class.h2281
-rw-r--r--drivers/staging/lustre/lustre/include/obd_lov.h126
-rw-r--r--drivers/staging/lustre/lustre/include/obd_ost.h96
-rw-r--r--drivers/staging/lustre/lustre/include/obd_support.h851
-rw-r--r--drivers/staging/lustre/lustre/lclient/glimpse.c274
-rw-r--r--drivers/staging/lustre/lustre/lclient/lcommon_cl.c1325
-rw-r--r--drivers/staging/lustre/lustre/lclient/lcommon_misc.c194
-rw-r--r--drivers/staging/lustre/lustre/ldlm/interval_tree.c764
-rw-r--r--drivers/staging/lustre/lustre/ldlm/l_lock.c76
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_extent.c242
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_flock.c849
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c75
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_internal.h276
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lib.c868
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lock.c2429
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c1238
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_plain.c72
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_pool.c1384
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_request.c2333
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_resource.c1409
-rw-r--r--drivers/staging/lustre/lustre/libcfs/Makefile21
-rw-r--r--drivers/staging/lustre/lustre/libcfs/debug.c476
-rw-r--r--drivers/staging/lustre/lustre/libcfs/fail.c137
-rw-r--r--drivers/staging/lustre/lustre/libcfs/hash.c2123
-rw-r--r--drivers/staging/lustre/lustre/libcfs/heap.c475
-rw-r--r--drivers/staging/lustre/lustre/libcfs/kernel_user_comm.c346
-rw-r--r--drivers/staging/lustre/lustre/libcfs/libcfs_cpu.c204
-rw-r--r--drivers/staging/lustre/lustre/libcfs/libcfs_lock.c192
-rw-r--r--drivers/staging/lustre/lustre/libcfs/libcfs_mem.c205
-rw-r--r--drivers/staging/lustre/lustre/libcfs/libcfs_string.c647
-rw-r--r--drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c1085
-rw-r--r--drivers/staging/lustre/lustre/libcfs/linux/linux-crypto-adler.c144
-rw-r--r--drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.c289
-rw-r--r--drivers/staging/lustre/lustre/libcfs/linux/linux-curproc.c339
-rw-r--r--drivers/staging/lustre/lustre/libcfs/linux/linux-debug.c264
-rw-r--r--drivers/staging/lustre/lustre/libcfs/linux/linux-module.c183
-rw-r--r--drivers/staging/lustre/lustre/libcfs/linux/linux-prim.c259
-rw-r--r--drivers/staging/lustre/lustre/libcfs/linux/linux-proc.c580
-rw-r--r--drivers/staging/lustre/lustre/libcfs/linux/linux-tcpip.c659
-rw-r--r--drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c275
-rw-r--r--drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.h48
-rw-r--r--drivers/staging/lustre/lustre/libcfs/lwt.c266
-rw-r--r--drivers/staging/lustre/lustre/libcfs/module.c498
-rw-r--r--drivers/staging/lustre/lustre/libcfs/nidstrings.c867
-rw-r--r--drivers/staging/lustre/lustre/libcfs/prng.c139
-rw-r--r--drivers/staging/lustre/lustre/libcfs/tracefile.c1195
-rw-r--r--drivers/staging/lustre/lustre/libcfs/tracefile.h340
-rw-r--r--drivers/staging/lustre/lustre/libcfs/upcall_cache.c462
-rw-r--r--drivers/staging/lustre/lustre/libcfs/watchdog.c516
-rw-r--r--drivers/staging/lustre/lustre/libcfs/workitem.c475
-rw-r--r--drivers/staging/lustre/lustre/llite/Makefile13
-rw-r--r--drivers/staging/lustre/lustre/llite/dcache.c675
-rw-r--r--drivers/staging/lustre/lustre/llite/dir.c1978
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c3198
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_capa.c661
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_close.c412
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h1576
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_lib.c2408
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_mmap.c507
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_nfs.c319
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_rmtacl.c301
-rw-r--r--drivers/staging/lustre/lustre/llite/lloop.c867
-rw-r--r--drivers/staging/lustre/lustre/llite/lproc_llite.c1370
-rw-r--r--drivers/staging/lustre/lustre/llite/namei.c1279
-rw-r--r--drivers/staging/lustre/lustre/llite/remote_perm.c333
-rw-r--r--drivers/staging/lustre/lustre/llite/rw.c1314
-rw-r--r--drivers/staging/lustre/lustre/llite/rw26.c586
-rw-r--r--drivers/staging/lustre/lustre/llite/statahead.c1722
-rw-r--r--drivers/staging/lustre/lustre/llite/super25.c226
-rw-r--r--drivers/staging/lustre/lustre/llite/symlink.c192
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_dev.c546
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_internal.h62
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c1186
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_lock.c85
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_object.c186
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_page.c558
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr.c578
-rw-r--r--drivers/staging/lustre/lustre/lmv/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_fld.c88
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_intent.c328
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_internal.h159
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_obd.c2727
-rw-r--r--drivers/staging/lustre/lustre/lmv/lproc_lmv.c235
-rw-r--r--drivers/staging/lustre/lustre/lov/Makefile9
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_cl_internal.h820
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_dev.c533
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_ea.c333
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_internal.h323
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_io.c967
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_lock.c1253
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_log.c278
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_merge.c218
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_obd.c2916
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_object.c942
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_offset.c267
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_pack.c678
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_page.c235
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_pool.c681
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_request.c1551
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_dev.c211
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_io.c55
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_lock.c485
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_object.c170
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_page.c72
-rw-r--r--drivers/staging/lustre/lustre/lov/lproc_lov.c302
-rw-r--r--drivers/staging/lustre/lustre/lvfs/Makefile6
-rw-r--r--drivers/staging/lustre/lustre/lvfs/fsfilt.c138
-rw-r--r--drivers/staging/lustre/lustre/lvfs/fsfilt_ext3.c761
-rw-r--r--drivers/staging/lustre/lustre/lvfs/lvfs_lib.c173
-rw-r--r--drivers/staging/lustre/lustre/lvfs/lvfs_linux.c295
-rw-r--r--drivers/staging/lustre/lustre/mdc/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/mdc/lproc_mdc.c219
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_internal.h180
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_lib.c564
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_locks.c1229
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_reint.c489
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_request.c2753
-rw-r--r--drivers/staging/lustre/lustre/mgc/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/mgc/libmgc.c166
-rw-r--r--drivers/staging/lustre/lustre/mgc/lproc_mgc.c84
-rw-r--r--drivers/staging/lustre/lustre/mgc/mgc_internal.h73
-rw-r--r--drivers/staging/lustre/lustre/mgc/mgc_request.c1860
-rw-r--r--drivers/staging/lustre/lustre/obdclass/Makefile13
-rw-r--r--drivers/staging/lustre/lustre/obdclass/acl.c546
-rw-r--r--drivers/staging/lustre/lustre/obdclass/capa.c401
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_internal.h121
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_io.c1753
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_lock.c2304
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_object.c1148
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_page.c1605
-rw-r--r--drivers/staging/lustre/lustre/obdclass/class_obd.c689
-rw-r--r--drivers/staging/lustre/lustre/obdclass/debug.c124
-rw-r--r--drivers/staging/lustre/lustre/obdclass/dt_object.c1055
-rw-r--r--drivers/staging/lustre/lustre/obdclass/genops.c1853
-rw-r--r--drivers/staging/lustre/lustre/obdclass/idmap.c474
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linkea.c194
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-module.c408
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c222
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c445
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog.c966
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_cat.c833
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_internal.h98
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_ioctl.c427
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_lvfs.c862
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_obd.c319
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_osd.c1323
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_swab.c407
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_test.c1087
-rw-r--r--drivers/staging/lustre/lustre/obdclass/local_storage.c903
-rw-r--r--drivers/staging/lustre/lustre/obdclass/local_storage.h88
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lprocfs_jobstats.c562
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lprocfs_status.c1985
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lu_object.c2185
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lu_ref.c50
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lu_ucred.c107
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lustre_handles.c263
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lustre_peer.c218
-rw-r--r--drivers/staging/lustre/lustre/obdclass/md_attrs.c202
-rw-r--r--drivers/staging/lustre/lustre/obdclass/mea.c112
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obd_config.c1904
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obd_mount.c1321
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obdo.c362
-rw-r--r--drivers/staging/lustre/lustre/obdclass/statfs_pack.c75
-rw-r--r--drivers/staging/lustre/lustre/obdclass/uuid.c82
-rw-r--r--drivers/staging/lustre/lustre/obdecho/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/obdecho/echo.c679
-rw-r--r--drivers/staging/lustre/lustre/obdecho/echo_client.c3223
-rw-r--r--drivers/staging/lustre/lustre/obdecho/echo_internal.h47
-rw-r--r--drivers/staging/lustre/lustre/obdecho/lproc_echo.c57
-rw-r--r--drivers/staging/lustre/lustre/osc/Makefile7
-rw-r--r--drivers/staging/lustre/lustre/osc/lproc_osc.c728
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_cache.c2916
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_cl_internal.h677
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_dev.c261
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_internal.h208
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_io.c836
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_lock.c1663
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_object.c275
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_page.c927
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_quota.c332
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_request.c3708
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/Makefile23
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/client.c3059
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/connection.c248
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/events.c595
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/Makefile8
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_api.h179
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_asn1.h84
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_bulk.c512
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_cli_upcall.c447
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_err.h193
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_generic_token.c285
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_internal.h526
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_keyring.c1424
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_krb5.h163
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_krb5_mech.c1786
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_mech_switch.c359
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_pipefs.c1252
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_rawobj.c242
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/gss_svc_upcall.c1099
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/lproc_gss.c219
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/gss/sec_gss.c2916
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/import.c1613
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/layout.c2396
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/llog_client.c354
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/llog_net.c75
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/llog_server.c466
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c1345
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/niobuf.c728
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/nrs.c1790
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/nrs_crr.c40
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c270
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pack_generic.c2575
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pers.c75
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pinger.c763
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h302
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c154
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c827
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/recover.c357
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec.c2465
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c880
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_config.c1233
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_gc.c250
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c199
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_null.c464
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_plain.c1021
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/service.c3129
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/wirehdr.c47
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/wiretest.c4474
-rw-r--r--drivers/staging/netlogic/xlr_net.c11
-rw-r--r--drivers/staging/nvec/nvec.c50
-rw-r--r--drivers/staging/nvec/nvec.h25
-rw-r--r--drivers/staging/nvec/nvec_kbd.c10
-rw-r--r--drivers/staging/octeon-usb/Kconfig10
-rw-r--r--drivers/staging/octeon-usb/Makefile3
-rw-r--r--drivers/staging/octeon-usb/TODO11
-rw-r--r--drivers/staging/octeon-usb/cvmx-usb.c3229
-rw-r--r--drivers/staging/octeon-usb/cvmx-usb.h1085
-rw-r--r--drivers/staging/octeon-usb/cvmx-usbcx-defs.h1551
-rw-r--r--drivers/staging/octeon-usb/cvmx-usbnx-defs.h887
-rw-r--r--drivers/staging/octeon-usb/octeon-hcd.c832
-rw-r--r--drivers/staging/ozwpan/Kbuild3
-rw-r--r--drivers/staging/ozwpan/ozappif.h2
-rw-r--r--drivers/staging/ozwpan/ozcdev.c7
-rw-r--r--drivers/staging/ozwpan/ozconfig.h1
-rw-r--r--drivers/staging/ozwpan/ozevent.c195
-rw-r--r--drivers/staging/ozwpan/ozevent.h32
-rw-r--r--drivers/staging/ozwpan/ozeventdef.h40
-rw-r--r--drivers/staging/ozwpan/ozhcd.c49
-rw-r--r--drivers/staging/ozwpan/ozmain.c9
-rw-r--r--drivers/staging/ozwpan/ozpd.c23
-rw-r--r--drivers/staging/ozwpan/ozproto.c17
-rw-r--r--drivers/staging/ozwpan/ozusbsvc.c5
-rw-r--r--drivers/staging/ozwpan/ozusbsvc1.c5
-rw-r--r--drivers/staging/panel/panel.c25
-rw-r--r--drivers/staging/rtl8187se/ieee80211/ieee80211_rx.c4
-rw-r--r--drivers/staging/rtl8192e/rtl8192e/r8192E_cmdpkt.c51
-rw-r--r--drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c5
-rw-r--r--drivers/staging/rtl8192e/rtllib_rx.c2
-rw-r--r--drivers/staging/rtl8192e/rtllib_softmac.c5
-rw-r--r--drivers/staging/rtl8192u/ieee80211/dot11d.c14
-rw-r--r--drivers/staging/rtl8192u/ieee80211/dot11d.h10
-rw-r--r--drivers/staging/rtl8192u/ieee80211/ieee80211.h94
-rw-r--r--drivers/staging/rtl8192u/ieee80211/ieee80211_crypt.c4
-rw-r--r--drivers/staging/rtl8192u/ieee80211/ieee80211_crypt.h2
-rw-r--r--drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_ccmp.c14
-rw-r--r--drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c8
-rw-r--r--drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_wep.c4
-rw-r--r--drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c37
-rw-r--r--drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c78
-rw-r--r--drivers/staging/rtl8192u/ieee80211/ieee80211_tx.c26
-rw-r--r--drivers/staging/rtl8192u/ieee80211/rtl819x_BAProc.c106
-rw-r--r--drivers/staging/rtl8192u/ieee80211/rtl819x_HTProc.c82
-rw-r--r--drivers/staging/rtl8192u/ieee80211/rtl819x_Qos.h10
-rw-r--r--drivers/staging/rtl8192u/ieee80211/rtl819x_TSProc.c30
-rw-r--r--drivers/staging/rtl8192u/r8180_93cx6.c44
-rw-r--r--drivers/staging/rtl8192u/r8190_rtl8256.c22
-rw-r--r--drivers/staging/rtl8192u/r8190_rtl8256.h10
-rw-r--r--drivers/staging/rtl8192u/r8192U.h808
-rw-r--r--drivers/staging/rtl8192u/r8192U_core.c3590
-rw-r--r--drivers/staging/rtl8192u/r8192U_dm.c144
-rw-r--r--drivers/staging/rtl8192u/r8192U_hw.h9
-rw-r--r--drivers/staging/rtl8192u/r8192U_wx.c36
-rw-r--r--drivers/staging/rtl8192u/r819xU_cmdpkt.c38
-rw-r--r--drivers/staging/rtl8192u/r819xU_cmdpkt.h6
-rw-r--r--drivers/staging/rtl8192u/r819xU_firmware.c54
-rw-r--r--drivers/staging/rtl8192u/r819xU_phy.c2390
-rw-r--r--drivers/staging/rtl8192u/r819xU_phy.h88
-rw-r--r--drivers/staging/rts5139/rts51x_transport.c8
-rw-r--r--drivers/staging/sb105x/sb_pci_mp.c34
-rw-r--r--drivers/staging/sb105x/sb_pci_mp.h1
-rw-r--r--drivers/staging/serqt_usb2/serqt_usb2.c288
-rw-r--r--drivers/staging/silicom/bp_mod.h2
-rw-r--r--drivers/staging/silicom/bpctl_mod.c483
-rw-r--r--drivers/staging/silicom/bypasslib/bp_ioctl.h64
-rw-r--r--drivers/staging/silicom/bypasslib/bplibk.h12
-rw-r--r--drivers/staging/silicom/bypasslib/bypass.c89
-rw-r--r--drivers/staging/slicoss/slicoss.c18
-rw-r--r--drivers/staging/speakup/Kconfig32
-rw-r--r--drivers/staging/speakup/devsynth.c14
-rw-r--r--drivers/staging/speakup/i18n.c12
-rw-r--r--drivers/staging/speakup/kobjects.c48
-rw-r--r--drivers/staging/speakup/main.c79
-rw-r--r--drivers/staging/speakup/serialio.c4
-rw-r--r--drivers/staging/speakup/speakup_acntpc.c22
-rw-r--r--drivers/staging/speakup/speakup_apollo.c20
-rw-r--r--drivers/staging/speakup/speakup_decext.c20
-rw-r--r--drivers/staging/speakup/speakup_decpc.c22
-rw-r--r--drivers/staging/speakup/speakup_dectlk.c20
-rw-r--r--drivers/staging/speakup/speakup_dtlk.c22
-rw-r--r--drivers/staging/speakup/speakup_keypc.c22
-rw-r--r--drivers/staging/speakup/speakup_soft.c34
-rw-r--r--drivers/staging/speakup/spk_priv.h13
-rw-r--r--drivers/staging/speakup/synth.c38
-rw-r--r--drivers/staging/speakup/thread.c4
-rw-r--r--drivers/staging/speakup/varhandlers.c19
-rw-r--r--drivers/staging/ti-soc-thermal/ti-thermal-common.c30
-rw-r--r--drivers/staging/ti-soc-thermal/ti_soc_thermal.txt1
-rw-r--r--drivers/staging/tidspbridge/core/_tiomap.h6
-rw-r--r--drivers/staging/tidspbridge/core/_tiomap_pwr.h10
-rw-r--r--drivers/staging/tidspbridge/core/tiomap3430_pwr.c10
-rw-r--r--drivers/staging/tidspbridge/core/ue_deh.c2
-rw-r--r--drivers/staging/tidspbridge/core/wdt.c4
-rw-r--r--drivers/staging/tidspbridge/rmgr/drv_interface.c1
-rw-r--r--drivers/staging/usbip/usbip_event.c2
-rw-r--r--drivers/staging/vme/devices/vme_user.c6
-rw-r--r--drivers/staging/vme/devices/vme_user.h12
-rw-r--r--drivers/staging/vt6655/80211hdr.h2
-rw-r--r--drivers/staging/vt6655/80211mgr.c72
-rw-r--r--drivers/staging/vt6655/80211mgr.h107
-rw-r--r--drivers/staging/vt6655/aes_ccmp.c37
-rw-r--r--drivers/staging/vt6655/aes_ccmp.h2
-rw-r--r--drivers/staging/vt6655/hostap.c6
-rw-r--r--drivers/staging/vt6655/ioctl.c9
-rw-r--r--drivers/staging/vt6656/mac.c51
-rw-r--r--drivers/staging/vt6656/rf.c657
-rw-r--r--drivers/staging/vt6656/tether.h10
-rw-r--r--drivers/staging/vt6656/tmacro.h6
-rw-r--r--drivers/staging/winbond/phy_calibration.c2
-rw-r--r--drivers/staging/winbond/reg.c25
-rw-r--r--drivers/staging/winbond/wb35reg.c265
-rw-r--r--drivers/staging/winbond/wb35rx.c3
-rw-r--r--drivers/staging/wlags49_h2/wl_cs.c16
-rw-r--r--drivers/staging/wlags49_h2/wl_cs.h4
-rw-r--r--drivers/staging/wlags49_h2/wl_main.c4
-rw-r--r--drivers/staging/wlan-ng/prism2sta.c3
-rw-r--r--drivers/staging/xgifb/vb_def.h5
-rw-r--r--drivers/staging/xgifb/vb_init.c92
-rw-r--r--drivers/staging/xgifb/vb_setmode.c433
-rw-r--r--drivers/staging/xgifb/vb_setmode.h1
-rw-r--r--drivers/staging/zram/Makefile2
-rw-r--r--drivers/staging/zram/zram_drv.c569
-rw-r--r--drivers/staging/zram/zram_drv.h36
-rw-r--r--drivers/staging/zram/zram_sysfs.c227
-rw-r--r--drivers/staging/zsmalloc/zsmalloc-main.c7
-rw-r--r--drivers/staging/zsmalloc/zsmalloc.h2
833 files changed, 278576 insertions, 21362 deletions
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index aefe820a8005..f64b662c74db 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -62,6 +62,8 @@ source "drivers/staging/line6/Kconfig"
source "drivers/staging/octeon/Kconfig"
+source "drivers/staging/octeon-usb/Kconfig"
+
source "drivers/staging/serqt_usb2/Kconfig"
source "drivers/staging/vt6655/Kconfig"
@@ -140,4 +142,8 @@ source "drivers/staging/netlogic/Kconfig"
source "drivers/staging/dwc2/Kconfig"
+source "drivers/staging/lustre/Kconfig"
+
+source "drivers/staging/btmtk_usb/Kconfig"
+
endif # STAGING
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 415772ea306d..1fb58a1562cb 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_LINE6_USB) += line6/
obj-$(CONFIG_NETLOGIC_XLR_NET) += netlogic/
obj-$(CONFIG_USB_SERIAL_QUATECH2) += serqt_usb2/
obj-$(CONFIG_OCTEON_ETHERNET) += octeon/
+obj-$(CONFIG_OCTEON_USB) += octeon-usb/
obj-$(CONFIG_VT6655) += vt6655/
obj-$(CONFIG_VT6656) += vt6656/
obj-$(CONFIG_VME_BUS) += vme/
@@ -62,3 +63,5 @@ obj-$(CONFIG_FIREWIRE_SERIAL) += fwserial/
obj-$(CONFIG_ZCACHE) += zcache/
obj-$(CONFIG_GOLDFISH) += goldfish/
obj-$(CONFIG_USB_DWC2) += dwc2/
+obj-$(CONFIG_LUSTRE_FS) += lustre/
+obj-$(CONFIG_USB_BTMTK) += btmtk_usb/
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index e681bdd9aa5f..21a3f7250531 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -704,7 +704,8 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
/* support of 32bit userspace on 64bit platforms */
#ifdef CONFIG_COMPAT
-static long compat_ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long compat_ashmem_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
{
switch (cmd) {
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index 1567ac296b39..c6dc1846943f 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -790,7 +790,7 @@ static void binder_delete_free_buffer(struct binder_proc *proc,
list_del(&buffer->entry);
if (free_page_start || free_page_end) {
binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
- "%d: merge free, buffer %p do not share page%s%s with with %p or %p\n",
+ "%d: merge free, buffer %p do not share page%s%s with %p or %p\n",
proc->pid, buffer, free_page_start ? "" : " end",
free_page_end ? "" : " start", prev, next);
binder_update_page_range(proc, 0, free_page_start ?
diff --git a/drivers/staging/android/sw_sync.c b/drivers/staging/android/sw_sync.c
index 4928f93bdf3d..765c757b120f 100644
--- a/drivers/staging/android/sw_sync.c
+++ b/drivers/staging/android/sw_sync.c
@@ -160,7 +160,8 @@ static int sw_sync_release(struct inode *inode, struct file *file)
return 0;
}
-static long sw_sync_ioctl_create_fence(struct sw_sync_timeline *obj, unsigned long arg)
+static long sw_sync_ioctl_create_fence(struct sw_sync_timeline *obj,
+ unsigned long arg)
{
int fd = get_unused_fd();
int err;
@@ -218,7 +219,8 @@ static long sw_sync_ioctl_inc(struct sw_sync_timeline *obj, unsigned long arg)
return 0;
}
-static long sw_sync_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long sw_sync_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
{
struct sw_sync_timeline *obj = file->private_data;
diff --git a/drivers/staging/android/sync.c b/drivers/staging/android/sync.c
index 3893a3574769..2996077fedef 100644
--- a/drivers/staging/android/sync.c
+++ b/drivers/staging/android/sync.c
@@ -125,9 +125,9 @@ static void sync_timeline_remove_pt(struct sync_pt *pt)
spin_unlock_irqrestore(&obj->active_list_lock, flags);
spin_lock_irqsave(&obj->child_list_lock, flags);
- if (!list_empty(&pt->child_list)) {
+ if (!list_empty(&pt->child_list))
list_del_init(&pt->child_list);
- }
+
spin_unlock_irqrestore(&obj->child_list_lock, flags);
}
@@ -876,11 +876,11 @@ static void sync_print_pt(struct seq_file *s, struct sync_pt *pt, bool fence)
seq_printf(s, " / %s", value);
}
} else if (pt->parent->ops->print_pt) {
- seq_printf(s, ": ");
+ seq_puts(s, ": ");
pt->parent->ops->print_pt(s, pt);
}
- seq_printf(s, "\n");
+ seq_puts(s, "\n");
}
static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
@@ -895,11 +895,11 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
obj->ops->timeline_value_str(obj, value, sizeof(value));
seq_printf(s, ": %s", value);
} else if (obj->ops->print_obj) {
- seq_printf(s, ": ");
+ seq_puts(s, ": ");
obj->ops->print_obj(s, obj);
}
- seq_printf(s, "\n");
+ seq_puts(s, "\n");
spin_lock_irqsave(&obj->child_list_lock, flags);
list_for_each(pos, &obj->child_list_head) {
@@ -940,7 +940,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused)
unsigned long flags;
struct list_head *pos;
- seq_printf(s, "objs:\n--------------\n");
+ seq_puts(s, "objs:\n--------------\n");
spin_lock_irqsave(&sync_timeline_list_lock, flags);
list_for_each(pos, &sync_timeline_list_head) {
@@ -949,11 +949,11 @@ static int sync_debugfs_show(struct seq_file *s, void *unused)
sync_timeline_list);
sync_print_obj(s, obj);
- seq_printf(s, "\n");
+ seq_puts(s, "\n");
}
spin_unlock_irqrestore(&sync_timeline_list_lock, flags);
- seq_printf(s, "fences:\n--------------\n");
+ seq_puts(s, "fences:\n--------------\n");
spin_lock_irqsave(&sync_fence_list_lock, flags);
list_for_each(pos, &sync_fence_list_head) {
@@ -961,7 +961,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused)
container_of(pos, struct sync_fence, sync_fence_list);
sync_print_fence(s, fence);
- seq_printf(s, "\n");
+ seq_puts(s, "\n");
}
spin_unlock_irqrestore(&sync_fence_list_lock, flags);
return 0;
@@ -988,7 +988,7 @@ late_initcall(sync_debugfs_init);
#define DUMP_CHUNK 256
static char sync_dump_buf[64 * 1024];
-void sync_dump(void)
+static void sync_dump(void)
{
struct seq_file s = {
.buf = sync_dump_buf,
diff --git a/drivers/staging/asus_oled/asus_oled.c b/drivers/staging/asus_oled/asus_oled.c
index d0a5a28a8fe2..3654dc32a0c6 100644
--- a/drivers/staging/asus_oled/asus_oled.c
+++ b/drivers/staging/asus_oled/asus_oled.c
@@ -50,9 +50,9 @@
#define ASUS_OLED_DISP_HEIGHT 32
#define ASUS_OLED_PACKET_BUF_SIZE 256
-#define USB_VENDOR_ID_ASUS 0x0b05
-#define USB_DEVICE_ID_ASUS_LCM 0x1726
-#define USB_DEVICE_ID_ASUS_LCM2 0x175b
+#define USB_VENDOR_ID_ASUS 0x0b05
+#define USB_DEVICE_ID_ASUS_LCM 0x1726
+#define USB_DEVICE_ID_ASUS_LCM2 0x175b
MODULE_AUTHOR("Jakub Schmidtke, sjakub@gmail.com");
MODULE_DESCRIPTION("Asus OLED Driver");
@@ -324,9 +324,11 @@ static void send_data(struct asus_oled_dev *odev)
return;
if (odev->pack_mode == PACK_MODE_G1) {
- /* When sending roll-mode data the display updated only
- first packet. I have no idea why, but when static picture
- is sent just before rolling picture everything works fine. */
+ /*
+ * When sending roll-mode data the display updated only
+ * first packet. I have no idea why, but when static picture
+ * is sent just before rolling picture everything works fine.
+ */
if (odev->pic_mode == ASUS_OLED_ROLL)
send_packets(odev->udev, packet, odev->buf,
ASUS_OLED_STATIC, 2);
@@ -363,9 +365,11 @@ static int append_values(struct asus_oled_dev *odev, uint8_t val, size_t count)
switch (odev->pack_mode) {
case PACK_MODE_G1:
- /* i = (x/128)*640 + 127 - x + (y/8)*128;
- This one for 128 is the same, but might be better
- for different widths? */
+ /*
+ * i = (x/128)*640 + 127 - x + (y/8)*128;
+ * This one for 128 is the same, but might be better
+ * for different widths?
+ */
i = (x/odev->dev_width)*640 +
odev->dev_width - 1 - x +
(y/8)*odev->dev_width;
@@ -383,10 +387,8 @@ static int append_values(struct asus_oled_dev *odev, uint8_t val, size_t count)
}
if (i >= odev->buf_size) {
- dev_err(odev->dev, "Buffer overflow! Report a bug:"
- "offs: %d >= %d i: %d (x: %d y: %d)\n",
- (int) odev->buf_offs, (int) odev->buf_size,
- (int) i, (int) x, (int) y);
+ dev_err(odev->dev, "Buffer overflow! Report a bug: offs: %zu >= %zu i: %zu (x: %zu y: %zu)\n",
+ odev->buf_offs, odev->buf_size, i, x, y);
return -EIO;
}
@@ -401,7 +403,7 @@ static int append_values(struct asus_oled_dev *odev, uint8_t val, size_t count)
default:
/* cannot get here; stops gcc complaining*/
- ;
+ break;
}
odev->buf_offs++;
@@ -566,9 +568,11 @@ static ssize_t odev_set_picture(struct asus_oled_dev *odev,
if (ret < 0)
return ret;
} else if (buf[offs] == '\n') {
- /* New line detected. Lets assume, that all characters
- till the end of the line were equal to the last
- character in this line.*/
+ /*
+ * New line detected. Lets assume, that all characters
+ * till the end of the line were equal to the last
+ * character in this line.
+ */
if (odev->buf_offs % odev->width != 0)
ret = append_values(odev, odev->last_val,
odev->width -
diff --git a/drivers/staging/bcm/Bcmchar.c b/drivers/staging/bcm/Bcmchar.c
index 35641e529396..f67a22536cbf 100644
--- a/drivers/staging/bcm/Bcmchar.c
+++ b/drivers/staging/bcm/Bcmchar.c
@@ -13,7 +13,7 @@
* Returns - Zero(Success)
****************************************************************/
-static int bcm_char_open(struct inode *inode, struct file * filp)
+static int bcm_char_open(struct inode *inode, struct file *filp)
{
struct bcm_mini_adapter *Adapter = NULL;
struct bcm_tarang_data *pTarang = NULL;
diff --git a/drivers/staging/bcm/InterfaceIdleMode.c b/drivers/staging/bcm/InterfaceIdleMode.c
index a1bf21579d3f..534782866042 100644
--- a/drivers/staging/bcm/InterfaceIdleMode.c
+++ b/drivers/staging/bcm/InterfaceIdleMode.c
@@ -42,107 +42,95 @@ send to f/w with in 200 ms after the Idle/Shutdown req issued
*/
-int InterfaceIdleModeRespond(struct bcm_mini_adapter *Adapter, unsigned int* puiBuffer)
+int InterfaceIdleModeRespond(struct bcm_mini_adapter *Adapter, unsigned int *puiBuffer)
{
int status = STATUS_SUCCESS;
unsigned int uiRegRead = 0;
int bytes;
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL,"SubType of Message :0x%X", ntohl(*puiBuffer));
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "SubType of Message :0x%X", ntohl(*puiBuffer));
- if(ntohl(*puiBuffer) == GO_TO_IDLE_MODE_PAYLOAD)
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL," Got GO_TO_IDLE_MODE_PAYLOAD(210) Msg Subtype");
- if(ntohl(*(puiBuffer+1)) == 0 )
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL,"Got IDLE MODE WAKE UP Response From F/W");
+ if (ntohl(*puiBuffer) == GO_TO_IDLE_MODE_PAYLOAD) {
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, " Got GO_TO_IDLE_MODE_PAYLOAD(210) Msg Subtype");
+ if (ntohl(*(puiBuffer+1)) == 0 ) {
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Got IDLE MODE WAKE UP Response From F/W");
- status = wrmalt (Adapter,SW_ABORT_IDLEMODE_LOC, &uiRegRead, sizeof(uiRegRead));
- if(status)
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_PRINTK, 0, 0, "wrm failed while clearing Idle Mode Reg");
+ status = wrmalt (Adapter, SW_ABORT_IDLEMODE_LOC, &uiRegRead, sizeof(uiRegRead));
+ if (status) {
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_PRINTK, 0, 0, "wrm failed while clearing Idle Mode Reg");
return status;
}
- if(Adapter->ulPowerSaveMode == DEVICE_POWERSAVE_MODE_AS_MANUAL_CLOCK_GATING)
- {
+ if (Adapter->ulPowerSaveMode == DEVICE_POWERSAVE_MODE_AS_MANUAL_CLOCK_GATING) {
uiRegRead = 0x00000000 ;
- status = wrmalt (Adapter,DEBUG_INTERRUPT_GENERATOR_REGISTOR, &uiRegRead, sizeof(uiRegRead));
- if(status)
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_PRINTK, 0, 0, "wrm failed while clearing Idle Mode Reg");
+ status = wrmalt (Adapter, DEBUG_INTERRUPT_GENERATOR_REGISTOR, &uiRegRead, sizeof(uiRegRead));
+ if (status) {
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_PRINTK, 0, 0, "wrm failed while clearing Idle Mode Reg");
return status;
}
}
- //Below Register should not br read in case of Manual and Protocol Idle mode.
- else if(Adapter->ulPowerSaveMode != DEVICE_POWERSAVE_MODE_AS_PROTOCOL_IDLE_MODE)
- {
- //clear on read Register
+ /* Below Register should not br read in case of Manual and Protocol Idle mode */
+ else if (Adapter->ulPowerSaveMode != DEVICE_POWERSAVE_MODE_AS_PROTOCOL_IDLE_MODE) {
+ /* clear on read Register */
bytes = rdmalt(Adapter, DEVICE_INT_OUT_EP_REG0, &uiRegRead, sizeof(uiRegRead));
if (bytes < 0) {
status = bytes;
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_PRINTK, 0, 0, "rdm failed while clearing H/W Abort Reg0");
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_PRINTK, 0, 0, "rdm failed while clearing H/W Abort Reg0");
return status;
}
- //clear on read Register
+ /* clear on read Register */
bytes = rdmalt(Adapter, DEVICE_INT_OUT_EP_REG1, &uiRegRead, sizeof(uiRegRead));
if (bytes < 0) {
status = bytes;
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_PRINTK, 0, 0, "rdm failed while clearing H/W Abort Reg1");
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_PRINTK, 0, 0, "rdm failed while clearing H/W Abort Reg1");
return status;
}
}
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Device Up from Idle Mode");
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Device Up from Idle Mode");
- // Set Idle Mode Flag to False and Clear IdleMode reg.
+ /* Set Idle Mode Flag to False and Clear IdleMode reg. */
Adapter->IdleMode = FALSE;
Adapter->bTriedToWakeUpFromlowPowerMode = FALSE;
wake_up(&Adapter->lowpower_mode_wait_queue);
- }
- else
- {
- if(TRUE == Adapter->IdleMode)
+ } else {
+ if (TRUE == Adapter->IdleMode)
{
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL,"Device is already in Idle mode....");
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Device is already in Idle mode....");
return status ;
}
uiRegRead = 0;
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Got Req from F/W to go in IDLE mode \n");
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Got Req from F/W to go in IDLE mode \n");
- if (Adapter->chip_id== BCS220_2 ||
+ if (Adapter->chip_id == BCS220_2 ||
Adapter->chip_id == BCS220_2BC ||
- Adapter->chip_id== BCS250_BC ||
- Adapter->chip_id== BCS220_3)
- {
+ Adapter->chip_id == BCS250_BC ||
+ Adapter->chip_id == BCS220_3) {
bytes = rdmalt(Adapter, HPM_CONFIG_MSW, &uiRegRead, sizeof(uiRegRead));
if (bytes < 0) {
status = bytes;
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "rdm failed while Reading HPM_CONFIG_LDO145 Reg 0\n");
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "rdm failed while Reading HPM_CONFIG_LDO145 Reg 0\n");
return status;
}
uiRegRead |= (1<<17);
- status = wrmalt (Adapter,HPM_CONFIG_MSW, &uiRegRead, sizeof(uiRegRead));
- if(status)
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_PRINTK, 0, 0, "wrm failed while clearing Idle Mode Reg\n");
+ status = wrmalt (Adapter, HPM_CONFIG_MSW, &uiRegRead, sizeof(uiRegRead));
+ if (status) {
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_PRINTK, 0, 0, "wrm failed while clearing Idle Mode Reg\n");
return status;
}
}
SendIdleModeResponse(Adapter);
}
- }
- else if(ntohl(*puiBuffer) == IDLE_MODE_SF_UPDATE_MSG)
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "OverRiding Service Flow Params");
- OverrideServiceFlowParams(Adapter,puiBuffer);
+ } else if (ntohl(*puiBuffer) == IDLE_MODE_SF_UPDATE_MSG) {
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "OverRiding Service Flow Params");
+ OverrideServiceFlowParams(Adapter, puiBuffer);
}
return status;
}
@@ -152,46 +140,40 @@ static int InterfaceAbortIdlemode(struct bcm_mini_adapter *Adapter, unsigned int
int status = STATUS_SUCCESS;
unsigned int value;
unsigned int chip_id ;
- unsigned long timeout = 0 ,itr = 0;
+ unsigned long timeout = 0, itr = 0;
int lenwritten = 0;
- unsigned char aucAbortPattern[8]={0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
+ unsigned char aucAbortPattern[8] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
struct bcm_interface_adapter *psInterfaceAdapter = Adapter->pvInterfaceAdapter;
- //Abort Bus suspend if its already suspended
- if((TRUE == psInterfaceAdapter->bSuspended) && (TRUE == Adapter->bDoSuspend))
- {
+ /* Abort Bus suspend if its already suspended */
+ if ((TRUE == psInterfaceAdapter->bSuspended) && (TRUE == Adapter->bDoSuspend)) {
status = usb_autopm_get_interface(psInterfaceAdapter->interface);
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL,"Bus got wakeup..Aborting Idle mode... status:%d \n",status);
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Bus got wakeup..Aborting Idle mode... status:%d \n", status);
}
- if((Adapter->ulPowerSaveMode == DEVICE_POWERSAVE_MODE_AS_MANUAL_CLOCK_GATING)
+ if ((Adapter->ulPowerSaveMode == DEVICE_POWERSAVE_MODE_AS_MANUAL_CLOCK_GATING)
||
- (Adapter->ulPowerSaveMode == DEVICE_POWERSAVE_MODE_AS_PROTOCOL_IDLE_MODE))
- {
- //write the SW abort pattern.
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Writing pattern<%d> to SW_ABORT_IDLEMODE_LOC\n", Pattern);
- status = wrmalt(Adapter,SW_ABORT_IDLEMODE_LOC, &Pattern, sizeof(Pattern));
- if(status)
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL,"WRM to Register SW_ABORT_IDLEMODE_LOC failed..");
+ (Adapter->ulPowerSaveMode == DEVICE_POWERSAVE_MODE_AS_PROTOCOL_IDLE_MODE)) {
+ /* write the SW abort pattern. */
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Writing pattern<%d> to SW_ABORT_IDLEMODE_LOC\n", Pattern);
+ status = wrmalt(Adapter, SW_ABORT_IDLEMODE_LOC, &Pattern, sizeof(Pattern));
+ if (status) {
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "WRM to Register SW_ABORT_IDLEMODE_LOC failed..");
return status;
}
}
- if(Adapter->ulPowerSaveMode == DEVICE_POWERSAVE_MODE_AS_MANUAL_CLOCK_GATING)
- {
+ if (Adapter->ulPowerSaveMode == DEVICE_POWERSAVE_MODE_AS_MANUAL_CLOCK_GATING) {
value = 0x80000000;
- status = wrmalt(Adapter,DEBUG_INTERRUPT_GENERATOR_REGISTOR, &value, sizeof(value));
- if(status)
+ status = wrmalt(Adapter, DEBUG_INTERRUPT_GENERATOR_REGISTOR, &value, sizeof(value));
+ if (status)
{
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL,"WRM to DEBUG_INTERRUPT_GENERATOR_REGISTOR Register failed");
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "WRM to DEBUG_INTERRUPT_GENERATOR_REGISTOR Register failed");
return status;
}
- }
- else if(Adapter->ulPowerSaveMode != DEVICE_POWERSAVE_MODE_AS_PROTOCOL_IDLE_MODE)
- {
+ } else if (Adapter->ulPowerSaveMode != DEVICE_POWERSAVE_MODE_AS_PROTOCOL_IDLE_MODE) {
/*
* Get a Interrupt Out URB and send 8 Bytes Down
* To be Done in Thread Context.
@@ -204,43 +186,32 @@ static int InterfaceAbortIdlemode(struct bcm_mini_adapter *Adapter, unsigned int
8,
&lenwritten,
5000);
- if(status)
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Sending Abort pattern down fails with status:%d..\n",status);
+ if (status) {
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Sending Abort pattern down fails with status:%d..\n", status);
return status;
- }
- else
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "NOB Sent down :%d", lenwritten);
+ } else {
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "NOB Sent down :%d", lenwritten);
}
- //mdelay(25);
+ /* mdelay(25); */
- timeout= jiffies + msecs_to_jiffies(50) ;
- while( timeout > jiffies )
- {
+ timeout = jiffies + msecs_to_jiffies(50) ;
+ while ( timeout > jiffies ) {
itr++ ;
rdmalt(Adapter, CHIP_ID_REG, &chip_id, sizeof(UINT));
- if(0xbece3200==(chip_id&~(0xF0)))
- {
+ if (0xbece3200 == (chip_id&~(0xF0)))
chip_id = chip_id&~(0xF0);
- }
- if(chip_id == Adapter->chip_id)
+ if (chip_id == Adapter->chip_id)
break;
}
- if(timeout < jiffies )
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL,"Not able to read chip-id even after 25 msec");
- }
+ if (timeout < jiffies )
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Not able to read chip-id even after 25 msec");
else
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL,"Number of completed iteration to read chip-id :%lu", itr);
- }
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Number of completed iteration to read chip-id :%lu", itr);
- status = wrmalt(Adapter,SW_ABORT_IDLEMODE_LOC, &Pattern, sizeof(status));
- if(status)
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_PRINTK, 0, 0,"WRM to Register SW_ABORT_IDLEMODE_LOC failed..");
+ status = wrmalt(Adapter, SW_ABORT_IDLEMODE_LOC, &Pattern, sizeof(status));
+ if (status) {
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_PRINTK, 0, 0, "WRM to Register SW_ABORT_IDLEMODE_LOC failed..");
return status;
}
}
@@ -249,13 +220,10 @@ static int InterfaceAbortIdlemode(struct bcm_mini_adapter *Adapter, unsigned int
int InterfaceIdleModeWakeup(struct bcm_mini_adapter *Adapter)
{
ULONG Status = 0;
- if(Adapter->bTriedToWakeUpFromlowPowerMode)
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Wake up already attempted.. ignoring\n");
- }
- else
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL,"Writing Low Power Mode Abort pattern to the Device\n");
+ if (Adapter->bTriedToWakeUpFromlowPowerMode) {
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Wake up already attempted.. ignoring\n");
+ } else {
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, IDLE_MODE, DBG_LVL_ALL, "Writing Low Power Mode Abort pattern to the Device\n");
Adapter->bTriedToWakeUpFromlowPowerMode = TRUE;
InterfaceAbortIdlemode(Adapter, Adapter->usIdleModePattern);
@@ -269,33 +237,30 @@ void InterfaceHandleShutdownModeWakeup(struct bcm_mini_adapter *Adapter)
INT Status = 0;
int bytes;
- if(Adapter->ulPowerSaveMode == DEVICE_POWERSAVE_MODE_AS_MANUAL_CLOCK_GATING)
- {
- // clear idlemode interrupt.
+ if (Adapter->ulPowerSaveMode == DEVICE_POWERSAVE_MODE_AS_MANUAL_CLOCK_GATING) {
+ /* clear idlemode interrupt. */
uiRegVal = 0;
- Status =wrmalt(Adapter,DEBUG_INTERRUPT_GENERATOR_REGISTOR, &uiRegVal, sizeof(uiRegVal));
- if(Status)
- {
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_PRINTK, 0, 0,"WRM to DEBUG_INTERRUPT_GENERATOR_REGISTOR Failed with err :%d", Status);
+ Status = wrmalt(Adapter, DEBUG_INTERRUPT_GENERATOR_REGISTOR, &uiRegVal, sizeof(uiRegVal));
+ if (Status) {
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_PRINTK, 0, 0,"WRM to DEBUG_INTERRUPT_GENERATOR_REGISTOR Failed with err :%d", Status);
return;
}
}
- else
- {
+ else {
- //clear Interrupt EP registers.
- bytes = rdmalt(Adapter,DEVICE_INT_OUT_EP_REG0, &uiRegVal, sizeof(uiRegVal));
+ /* clear Interrupt EP registers. */
+ bytes = rdmalt(Adapter, DEVICE_INT_OUT_EP_REG0, &uiRegVal, sizeof(uiRegVal));
if (bytes < 0) {
Status = bytes;
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_PRINTK, 0, 0,"RDM of DEVICE_INT_OUT_EP_REG0 failed with Err :%d", Status);
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_PRINTK, 0, 0, "RDM of DEVICE_INT_OUT_EP_REG0 failed with Err :%d", Status);
return;
}
- bytes = rdmalt(Adapter,DEVICE_INT_OUT_EP_REG1, &uiRegVal, sizeof(uiRegVal));
+ bytes = rdmalt(Adapter, DEVICE_INT_OUT_EP_REG1, &uiRegVal, sizeof(uiRegVal));
if (bytes < 0) {
Status = bytes;
- BCM_DEBUG_PRINT(Adapter,DBG_TYPE_PRINTK, 0, 0,"RDM of DEVICE_INT_OUT_EP_REG1 failed with Err :%d", Status);
+ BCM_DEBUG_PRINT(Adapter, DBG_TYPE_PRINTK, 0, 0, "RDM of DEVICE_INT_OUT_EP_REG1 failed with Err :%d", Status);
return;
}
}
diff --git a/drivers/staging/bcm/Version.h b/drivers/staging/bcm/Version.h
index a07b956b9ff5..f1cb9de734a6 100644
--- a/drivers/staging/bcm/Version.h
+++ b/drivers/staging/bcm/Version.h
@@ -1,4 +1,3 @@
-
/*Copyright (c) 2005 Beceem Communications Inc.
Module Name:
@@ -17,7 +16,6 @@ Abstract:
#define VER_FILETYPE VFT_DRV
#define VER_FILESUBTYPE VFT2_DRV_NETWORK
-
#define VER_FILEVERSION 5.2.45
#define VER_FILEVERSION_STR "5.2.45"
@@ -28,8 +26,4 @@ Abstract:
#define VER_PRODUCTVERSION_STR VER_FILEVERSION_STR
-
-
-//#include "common.ver"
-
-#endif //VERSION_H
+#endif /* VERSION_H */
diff --git a/drivers/staging/bcm/vendorspecificextn.c b/drivers/staging/bcm/vendorspecificextn.c
index be1f91d955aa..d38a06f762df 100644
--- a/drivers/staging/bcm/vendorspecificextn.c
+++ b/drivers/staging/bcm/vendorspecificextn.c
@@ -1,70 +1,70 @@
#include "headers.h"
-//-----------------------------------------------------------------------------
-// Procedure: vendorextnGetSectionInfo
-//
-// Description: Finds the type of NVM used.
-//
-// Arguments:
-// Adapter - ptr to Adapter object instance
-// pNVMType - ptr to NVM type.
-// Returns:
-// STATUS_SUCCESS/STATUS_FAILURE
-//
-//-----------------------------------------------------------------------------
+/*
+ * Procedure: vendorextnGetSectionInfo
+ *
+ * Description: Finds the type of NVM used.
+ *
+ * Arguments:
+ * Adapter - ptr to Adapter object instance
+ * pNVMType - ptr to NVM type.
+ * Returns:
+ * STATUS_SUCCESS/STATUS_FAILURE
+ *
+ */
INT vendorextnGetSectionInfo(PVOID pContext, struct bcm_flash2x_vendor_info *pVendorInfo)
{
return STATUS_FAILURE;
}
-//-----------------------------------------------------------------------------
-// Procedure: vendorextnInit
-//
-// Description: Initializing the vendor extension NVM interface
-//
-// Arguments:
-// Adapter - Pointer to MINI Adapter Structure.
-
-// Returns:
-// STATUS_SUCCESS/STATUS_FAILURE
-//
-//-----------------------------------------------------------------------------
+/*
+ * Procedure: vendorextnInit
+ *
+ * Description: Initializing the vendor extension NVM interface
+ *
+ * Arguments:
+ * Adapter - Pointer to MINI Adapter Structure
+ * Returns:
+ * STATUS_SUCCESS/STATUS_FAILURE
+ *
+ *
+ */
INT vendorextnInit(struct bcm_mini_adapter *Adapter)
{
return STATUS_SUCCESS;
}
-//-----------------------------------------------------------------------------
-// Procedure: vendorextnExit
-//
-// Description: Free the resource associated with vendor extension NVM interface
-//
-// Arguments:
-// Adapter - Pointer to MINI Adapter Structure.
-
-// Returns:
-// STATUS_SUCCESS/STATUS_FAILURE
-//
-//-----------------------------------------------------------------------------
+/*
+ * Procedure: vendorextnExit
+ *
+ * Description: Free the resource associated with vendor extension NVM interface
+ *
+ * Arguments:
+ *
+ * Returns:
+ * STATUS_SUCCESS/STATUS_FAILURE
+ *
+ *
+ */
INT vendorextnExit(struct bcm_mini_adapter *Adapter)
{
return STATUS_SUCCESS;
}
-//------------------------------------------------------------------------
-// Procedure: vendorextnIoctl
-//
-// Description: execute the vendor extension specific ioctl
-//
-//Arguments:
-// Adapter -Beceem private Adapter Structure
-// cmd -vendor extension specific Ioctl commad
-// arg -input parameter sent by vendor
-//
-// Returns:
-// CONTINUE_COMMON_PATH in case it is not meant to be processed by vendor ioctls
-// STATUS_SUCCESS/STATUS_FAILURE as per the IOCTL return value
-//
-//--------------------------------------------------------------------------
+/*
+ * Procedure: vendorextnIoctl
+ *
+ * Description: execute the vendor extension specific ioctl
+ *
+ * Arguments:
+ * Adapter -Beceem private Adapter Structure
+ * cmd -vendor extension specific Ioctl commad
+ * arg -input parameter sent by vendor
+ *
+ * Returns:
+ * CONTINUE_COMMON_PATH in case it is not meant to be processed by vendor ioctls
+ * STATUS_SUCCESS/STATUS_FAILURE as per the IOCTL return value
+ */
+
INT vendorextnIoctl(struct bcm_mini_adapter *Adapter, UINT cmd, ULONG arg)
{
return CONTINUE_COMMON_PATH;
@@ -72,22 +72,21 @@ INT vendorextnIoctl(struct bcm_mini_adapter *Adapter, UINT cmd, ULONG arg)
-//------------------------------------------------------------------
-// Procedure: vendorextnReadSection
-//
-// Description: Reads from a section of NVM
-//
-// Arguments:
-// pContext - ptr to Adapter object instance
-// pBuffer - Read the data from Vendor Area to this buffer
-// SectionVal - Value of type of Section
-// Offset - Read from the Offset of the Vendor Section.
-// numOfBytes - Read numOfBytes from the Vendor section to Buffer
-//
-// Returns:
-// STATUS_SUCCESS/STATUS_FAILURE
-//
-//------------------------------------------------------------------
+/*
+ * Procedure: vendorextnReadSection
+ *
+ * Description: Reads from a section of NVM
+ *
+ * Arguments:
+ * pContext - ptr to Adapter object instance
+ * pBuffer - Read the data from Vendor Area to this buffer
+ * SectionVal - Value of type of Section
+ * Offset - Read from the Offset of the Vendor Section.
+ * numOfBytes - Read numOfBytes from the Vendor section to Buffer
+ *
+ * Returns:
+ * STATUS_SUCCESS/STATUS_FAILURE
+ */
INT vendorextnReadSection(PVOID pContext, PUCHAR pBuffer, enum bcm_flash2x_section_val SectionVal,
UINT offset, UINT numOfBytes)
@@ -97,23 +96,22 @@ INT vendorextnReadSection(PVOID pContext, PUCHAR pBuffer, enum bcm_flash2x_sect
-//------------------------------------------------------------------
-// Procedure: vendorextnWriteSection
-//
-// Description: Write to a Section of NVM
-//
-// Arguments:
-// pContext - ptr to Adapter object instance
-// pBuffer - Write the data provided in the buffer
-// SectionVal - Value of type of Section
-// Offset - Writes to the Offset of the Vendor Section.
-// numOfBytes - Write num Bytes after reading from pBuffer.
-// bVerify - the Buffer Written should be verified.
-//
-// Returns:
-// STATUS_SUCCESS/STATUS_FAILURE
-//
-//------------------------------------------------------------------
+/*
+ * Procedure: vendorextnWriteSection
+ *
+ * Description: Write to a Section of NVM
+ *
+ * Arguments:
+ * pContext - ptr to Adapter object instance
+ * pBuffer - Write the data provided in the buffer
+ * SectionVal - Value of type of Section
+ * Offset - Writes to the Offset of the Vendor Section.
+ * numOfBytes - Write num Bytes after reading from pBuffer.
+ * bVerify - the Buffer Written should be verified.
+ *
+ * Returns:
+ * STATUS_SUCCESS/STATUS_FAILURE
+ */
INT vendorextnWriteSection(PVOID pContext, PUCHAR pBuffer, enum bcm_flash2x_section_val SectionVal,
UINT offset, UINT numOfBytes, BOOLEAN bVerify)
{
@@ -122,25 +120,23 @@ INT vendorextnWriteSection(PVOID pContext, PUCHAR pBuffer, enum bcm_flash2x_sec
-//------------------------------------------------------------------
-// Procedure: vendorextnWriteSectionWithoutErase
-//
-// Description: Write to a Section of NVM without erasing the sector
-//
-// Arguments:
-// pContext - ptr to Adapter object instance
-// pBuffer - Write the data provided in the buffer
-// SectionVal - Value of type of Section
-// Offset - Writes to the Offset of the Vendor Section.
-// numOfBytes - Write num Bytes after reading from pBuffer.
-//
-// Returns:
-// STATUS_SUCCESS/STATUS_FAILURE
-//
-//------------------------------------------------------------------
+/*
+ * Procedure: vendorextnWriteSectionWithoutErase
+ *
+ * Description: Write to a Section of NVM without erasing the sector
+ *
+ * Arguments:
+ * pContext - ptr to Adapter object instance
+ * pBuffer - Write the data provided in the buffer
+ * SectionVal - Value of type of Section
+ * Offset - Writes to the Offset of the Vendor Section.
+ * numOfBytes - Write num Bytes after reading from pBuffer.
+ *
+ * Returns:
+ * STATUS_SUCCESS/STATUS_FAILURE
+ */
INT vendorextnWriteSectionWithoutErase(PVOID pContext, PUCHAR pBuffer, enum bcm_flash2x_section_val SectionVal,
UINT offset, UINT numOfBytes)
{
return STATUS_FAILURE;
}
-
diff --git a/drivers/staging/btmtk_usb/Kconfig b/drivers/staging/btmtk_usb/Kconfig
new file mode 100644
index 000000000000..a425ebda6c7a
--- /dev/null
+++ b/drivers/staging/btmtk_usb/Kconfig
@@ -0,0 +1,11 @@
+config USB_BTMTK
+ tristate "Mediatek Bluetooth support"
+ depends on USB && BT && m
+ ---help---
+ Say Y here if you wish to control a MTK USB Bluetooth.
+
+ This option depends on 'USB' support being enabled
+
+ To compile this driver as a module, choose M here: the
+ module will be called btmtk_usb.
+
diff --git a/drivers/staging/btmtk_usb/Makefile b/drivers/staging/btmtk_usb/Makefile
new file mode 100644
index 000000000000..4d6c9d764621
--- /dev/null
+++ b/drivers/staging/btmtk_usb/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_USB_BTMTK) += btmtk_usb.o
diff --git a/drivers/staging/btmtk_usb/README b/drivers/staging/btmtk_usb/README
new file mode 100644
index 000000000000..c046c8e96b2d
--- /dev/null
+++ b/drivers/staging/btmtk_usb/README
@@ -0,0 +1,14 @@
+-build driver modules
+ make
+
+-install driver modules
+ make install
+
+-remove driver modules
+ make clean
+
+-dynamic debug message
+ turn on CONFIG_DYNAMIC_DEBUG compiler flag for current kernel
+ mount -t debugfs none /sys/kernel/debug/
+ echo "module module_name +p" > /sys/kernel/debug/dynamic_debug/control(turn on debug messages, module name such as btmtk_usb)
+ echo "module module_name -p" > /sys/kernel/debug/dynamic_debug/control(turn off debug messages, module name such as btmtk_usb)
diff --git a/drivers/staging/btmtk_usb/TODO b/drivers/staging/btmtk_usb/TODO
new file mode 100644
index 000000000000..a71d1297942d
--- /dev/null
+++ b/drivers/staging/btmtk_usb/TODO
@@ -0,0 +1,10 @@
+TODO:
+ - checkpatch.pl clean
+ - determine if the driver should not be using a duplicate
+ version of the usb-bluetooth interface code, but should
+ be merged into the drivers/bluetooth/ directory and
+ infrastructure instead.
+ - review by the bluetooth developer community
+
+Please send any patches for this driver to Yu-Chen, Cho <acho@suse.com> and
+jay.hung@mediatek.com
diff --git a/drivers/staging/btmtk_usb/btmtk_usb.c b/drivers/staging/btmtk_usb/btmtk_usb.c
new file mode 100644
index 000000000000..0e783e8d71ca
--- /dev/null
+++ b/drivers/staging/btmtk_usb/btmtk_usb.c
@@ -0,0 +1,1784 @@
+/*
+ * MediaTek Bluetooth USB Driver
+ *
+ * Copyright (C) 2013, MediaTek co.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * or on the worldwide web at http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/completion.h>
+#include <linux/firmware.h>
+#include <linux/usb.h>
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#include "btmtk_usb.h"
+
+#define VERSION "1.0.4"
+#define MT7650_FIRMWARE "mt7650.bin"
+#define MT7662_FIRMWARE "mt7662.bin"
+
+static struct usb_driver btmtk_usb_driver;
+
+
+static int btmtk_usb_load_rom_patch(struct btmtk_usb_data *);
+static int btmtk_usb_load_fw(struct btmtk_usb_data *);
+
+static void hex_dump(char *str, u8 *src_buf, u32 src_buf_len)
+{
+ unsigned char *pt;
+ int x;
+
+ pt = src_buf;
+
+ BT_DBG("%s: %p, len = %d\n", str, src_buf, src_buf_len);
+
+ for (x = 0; x < src_buf_len; x++) {
+ if (x % 16 == 0)
+ BT_DBG("0x%04x : ", x);
+ BT_DBG("%02x ", ((unsigned char)pt[x]));
+ if (x % 16 == 15)
+ BT_DBG("\n");
+ }
+
+ BT_DBG("\n");
+}
+
+static int btmtk_usb_reset(struct usb_device *udev)
+{
+ int ret;
+
+ BT_DBG("%s\n", __func__);
+
+ ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x01, DEVICE_VENDOR_REQUEST_OUT,
+ 0x01, 0x00, NULL, 0x00, CONTROL_TIMEOUT_JIFFIES);
+
+ if (ret < 0) {
+ BT_ERR("%s error(%d)\n", __func__, ret);
+ return ret;
+ }
+
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
+static int btmtk_usb_io_read32(struct btmtk_usb_data *data, u32 reg, u32 *val)
+{
+ u8 request = data->r_request;
+ struct usb_device *udev = data->udev;
+ int ret;
+
+ ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), request, DEVICE_VENDOR_REQUEST_IN,
+ 0x0, reg, data->io_buf, 4,
+ CONTROL_TIMEOUT_JIFFIES);
+
+ if (ret < 0) {
+ *val = 0xffffffff;
+ BT_ERR("%s error(%d), reg=%x, value=%x\n", __func__, ret, reg, *val);
+ return ret;
+ }
+
+ memmove(val, data->io_buf, 4);
+
+ *val = le32_to_cpu(*val);
+
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
+static int btmtk_usb_io_write32(struct btmtk_usb_data *data, u32 reg, u32 val)
+{
+ u16 value, index;
+ u8 request = data->w_request;
+ struct usb_device *udev = data->udev;
+ int ret;
+
+ index = (u16)reg;
+ value = val & 0x0000ffff;
+
+ ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), request, DEVICE_VENDOR_REQUEST_OUT,
+ value, index, NULL, 0,
+ CONTROL_TIMEOUT_JIFFIES);
+
+ if (ret < 0) {
+ BT_ERR("%s error(%d), reg=%x, value=%x\n", __func__, ret, reg, val);
+ return ret;
+ }
+
+ index = (u16)(reg + 2);
+ value = (val & 0xffff0000) >> 16;
+
+ ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
+ request, DEVICE_VENDOR_REQUEST_OUT,
+ value, index, NULL, 0, CONTROL_TIMEOUT_JIFFIES);
+
+ if (ret < 0) {
+ BT_ERR("%s error(%d), reg=%x, value=%x\n", __func__, ret, reg, val);
+ return ret;
+ }
+
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
+static int btmtk_usb_switch_iobase(struct btmtk_usb_data *data, int base)
+{
+ int ret = 0;
+
+ switch (base) {
+ case SYSCTL:
+ data->w_request = 0x42;
+ data->r_request = 0x47;
+ break;
+ case WLAN:
+ data->w_request = 0x02;
+ data->r_request = 0x07;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static void btmtk_usb_cap_init(struct btmtk_usb_data *data)
+{
+ const struct firmware *firmware;
+ struct usb_device *udev = data->udev;
+ int ret;
+
+ btmtk_usb_io_read32(data, 0x00, &data->chip_id);
+
+ BT_DBG("chip id = %x\n", data->chip_id);
+
+ if (is_mt7630(data) || is_mt7650(data)) {
+ data->need_load_fw = 1;
+ data->need_load_rom_patch = 0;
+ ret = request_firmware(&firmware, MT7650_FIRMWARE, &udev->dev);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ BT_ERR("Firmware file \"%s\" not found \n", MT7650_FIRMWARE);
+ } else {
+ BT_ERR("Firmware file \"%s\" request failed (err=%d) \n",
+ MT7650_FIRMWARE, ret);
+ }
+ } else {
+ BT_DBG("Firmware file \"%s\" Found \n", MT7650_FIRMWARE);
+ /* load firmware here */
+ data->firmware = firmware;
+ btmtk_usb_load_fw(data);
+ }
+ release_firmware(firmware);
+ } else if (is_mt7632(data) || is_mt7662(data)) {
+ data->need_load_fw = 0;
+ data->need_load_rom_patch = 1;
+ data->rom_patch_offset = 0x90000;
+ ret = request_firmware(&firmware, MT7662_FIRMWARE, &udev->dev);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ BT_ERR("Firmware file \"%s\" not found\n", MT7662_FIRMWARE);
+ } else {
+ BT_ERR("Firmware file \"%s\" request failed (err=%d)\n",
+ MT7662_FIRMWARE, ret);
+ }
+ } else {
+ BT_DBG("Firmware file \"%s\" Found\n", MT7662_FIRMWARE);
+ /* load rom patch here */
+ data->firmware = firmware;
+ data->rom_patch_len = firmware->size;
+ btmtk_usb_load_rom_patch(data);
+ }
+ release_firmware(firmware);
+ } else {
+ BT_ERR("unknow chip(%x)\n", data->chip_id);
+ }
+}
+
+static u16 checksume16(u8 *pData, int len)
+{
+ int sum = 0;
+
+ while (len > 1) {
+ sum += *((u16 *)pData);
+
+ pData = pData + 2;
+
+ if (sum & 0x80000000)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+
+ len -= 2;
+ }
+
+ if (len)
+ sum += *((u8 *)pData);
+
+ while (sum >> 16) {
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ }
+
+ return ~sum;
+}
+
+static int btmtk_usb_chk_crc(struct btmtk_usb_data *data, u32 checksum_len)
+{
+ int ret = 0;
+ struct usb_device *udev = data->udev;
+
+ BT_DBG("%s\n", __func__);
+
+ memmove(data->io_buf, &data->rom_patch_offset, 4);
+ memmove(&data->io_buf[4], &checksum_len, 4);
+
+ ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x1, DEVICE_VENDOR_REQUEST_IN,
+ 0x20, 0x00, data->io_buf, 8,
+ CONTROL_TIMEOUT_JIFFIES);
+
+ if (ret < 0) {
+ BT_ERR("%s error(%d)\n", __func__, ret);
+ }
+
+ return ret;
+}
+
+static u16 btmtk_usb_get_crc(struct btmtk_usb_data *data)
+{
+ int ret = 0;
+ struct usb_device *udev = data->udev;
+ u16 crc, count = 0;
+
+ BT_DBG("%s\n", __func__);
+
+ while (1) {
+ ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
+ 0x01, DEVICE_VENDOR_REQUEST_IN,
+ 0x21, 0x00, data->io_buf, 2,
+ CONTROL_TIMEOUT_JIFFIES);
+
+ if (ret < 0) {
+ crc = 0xFFFF;
+ BT_ERR("%s error(%d)\n", __func__, ret);
+ }
+
+ memmove(&crc, data->io_buf, 2);
+
+ crc = le16_to_cpu(crc);
+
+ if (crc != 0xFFFF)
+ break;
+
+ mdelay(100);
+
+ if (count++ > 100) {
+ BT_ERR("Query CRC over %d times\n", count);
+ break;
+ }
+ }
+
+ return crc;
+}
+
+static int btmtk_usb_reset_wmt(struct btmtk_usb_data *data)
+{
+ int ret = 0;
+
+ /* reset command */
+ u8 cmd[8] = {0x6F, 0xFC, 0x05, 0x01, 0x07, 0x01, 0x00, 0x04};
+
+ memmove(data->io_buf, cmd, 8);
+
+ BT_DBG("%s\n", __func__);
+
+ ret = usb_control_msg(data->udev, usb_sndctrlpipe(data->udev, 0), 0x01,
+ DEVICE_CLASS_REQUEST_OUT, 0x12, 0x00, data->io_buf,
+ 8, CONTROL_TIMEOUT_JIFFIES);
+
+ if (ret)
+ BT_ERR("%s:(%d)\n", __func__, ret);
+
+ return ret;
+}
+
+static void load_rom_patch_complete(struct urb *urb)
+{
+
+ struct completion *sent_to_mcu_done = (struct completion *)urb->context;
+
+ complete(sent_to_mcu_done);
+}
+
+static int btmtk_usb_load_rom_patch(struct btmtk_usb_data *data)
+{
+ u32 loop = 0;
+ u32 value;
+ s32 sent_len;
+ int ret = 0, total_checksum = 0;
+ struct urb *urb;
+ u32 patch_len = 0;
+ u32 cur_len = 0;
+ dma_addr_t data_dma;
+ struct completion sent_to_mcu_done;
+ int first_block = 1;
+ unsigned char phase;
+ void *buf;
+ char *pos;
+ unsigned int pipe = usb_sndbulkpipe(data->udev, data->bulk_tx_ep->bEndpointAddress);
+
+ if (!data->firmware) {
+ BT_ERR("%s:please assign a rom patch\n", __func__);
+ return -1;
+ }
+
+load_patch_protect:
+ btmtk_usb_switch_iobase(data, WLAN);
+ btmtk_usb_io_read32(data, SEMAPHORE_03, &value);
+ loop++;
+
+ if (((value & 0x01) == 0x00) && (loop < 600)) {
+ mdelay(1);
+ goto load_patch_protect;
+ }
+
+ btmtk_usb_io_write32(data, 0x1004, 0x2c);
+
+ btmtk_usb_switch_iobase(data, SYSCTL);
+
+ btmtk_usb_io_write32(data, 0x1c, 0x30);
+
+ /* Enable USB_DMA_CFG */
+ btmtk_usb_io_write32(data, 0x9018, 0x00c00020);
+
+ btmtk_usb_switch_iobase(data, WLAN);
+
+ /* check ROM patch if upgrade */
+ btmtk_usb_io_read32(data, COM_REG0, &value);
+
+ if ((value & 0x02) == 0x02)
+ goto error0;
+
+ urb = usb_alloc_urb(0, GFP_ATOMIC);
+
+ if (!urb) {
+ ret = -ENOMEM;
+ goto error0;
+ }
+
+ buf = usb_alloc_coherent(data->udev, UPLOAD_PATCH_UNIT, GFP_ATOMIC, &data_dma);
+
+ if (!buf) {
+ ret = -ENOMEM;
+ goto error1;
+ }
+
+ pos = buf;
+ BT_DBG("loading rom patch");
+
+ init_completion(&sent_to_mcu_done);
+
+ cur_len = 0x00;
+ patch_len = data->rom_patch_len - PATCH_INFO_SIZE;
+
+ /* loading rom patch */
+ while (1) {
+ s32 sent_len_max = UPLOAD_PATCH_UNIT - PATCH_HEADER_SIZE;
+ sent_len = (patch_len - cur_len) >= sent_len_max ? sent_len_max : (patch_len - cur_len);
+
+ BT_DBG("patch_len = %d\n", patch_len);
+ BT_DBG("cur_len = %d\n", cur_len);
+ BT_DBG("sent_len = %d\n", sent_len);
+
+ if (sent_len > 0) {
+ if (first_block == 1) {
+ if (sent_len < sent_len_max)
+ phase = PATCH_PHASE3;
+ else
+ phase = PATCH_PHASE1;
+ first_block = 0;
+ } else if (sent_len == sent_len_max) {
+ phase = PATCH_PHASE2;
+ } else {
+ phase = PATCH_PHASE3;
+ }
+
+ /* prepare HCI header */
+ pos[0] = 0x6F;
+ pos[1] = 0xFC;
+ pos[2] = (sent_len + 5) & 0xFF;
+ pos[3] = ((sent_len + 5) >> 8) & 0xFF;
+
+ /* prepare WMT header */
+ pos[4] = 0x01;
+ pos[5] = 0x01;
+ pos[6] = (sent_len + 1) & 0xFF;
+ pos[7] = ((sent_len + 1) >> 8) & 0xFF;
+
+ pos[8] = phase;
+
+ memcpy(&pos[9], data->firmware->data + PATCH_INFO_SIZE + cur_len, sent_len);
+
+ BT_DBG("sent_len + PATCH_HEADER_SIZE = %d, phase = %d\n",
+ sent_len + PATCH_HEADER_SIZE, phase);
+
+ usb_fill_bulk_urb(urb,
+ data->udev,
+ pipe,
+ buf,
+ sent_len + PATCH_HEADER_SIZE,
+ load_rom_patch_complete,
+ &sent_to_mcu_done);
+
+ urb->transfer_dma = data_dma;
+ urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+ ret = usb_submit_urb(urb, GFP_ATOMIC);
+
+ if (ret)
+ goto error2;
+
+ if (!wait_for_completion_timeout(&sent_to_mcu_done, msecs_to_jiffies(1000))) {
+ usb_kill_urb(urb);
+ BT_ERR("upload rom_patch timeout\n");
+ goto error2;
+ }
+
+ BT_DBG(".");
+
+ mdelay(200);
+
+ cur_len += sent_len;
+
+ } else {
+ break;
+ }
+ }
+
+ total_checksum = checksume16((u8 *)data->firmware->data + PATCH_INFO_SIZE, patch_len);
+
+ BT_DBG("Send checksum req..\n");
+
+ btmtk_usb_chk_crc(data, patch_len);
+
+ mdelay(20);
+
+ if (total_checksum != btmtk_usb_get_crc(data)) {
+ BT_ERR("checksum fail!, local(0x%x) <> fw(0x%x)\n",
+ total_checksum, btmtk_usb_get_crc(data));
+ ret = -1;
+ goto error2;
+ }
+
+ mdelay(20);
+
+ ret = btmtk_usb_reset_wmt(data);
+
+ mdelay(20);
+
+error2:
+ usb_free_coherent(data->udev, UPLOAD_PATCH_UNIT, buf, data_dma);
+error1:
+ usb_free_urb(urb);
+error0:
+ btmtk_usb_io_write32(data, SEMAPHORE_03, 0x1);
+ return ret;
+}
+
+
+static int load_fw_iv(struct btmtk_usb_data *data)
+{
+ int ret;
+ struct usb_device *udev = data->udev;
+ char *buf = kmalloc(64, GFP_ATOMIC);
+
+ memmove(buf, data->firmware->data + 32, 64);
+
+ ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x01,
+ DEVICE_VENDOR_REQUEST_OUT, 0x12, 0x0, buf, 64,
+ CONTROL_TIMEOUT_JIFFIES);
+
+ if (ret < 0) {
+ BT_ERR("%s error(%d) step4\n", __func__, ret);
+ kfree(buf);
+ return ret;
+ }
+
+ if (ret > 0)
+ ret = 0;
+
+ kfree(buf);
+
+ return ret;
+}
+
+static void load_fw_complete(struct urb *urb)
+{
+
+ struct completion *sent_to_mcu_done = (struct completion *)urb->context;
+
+ complete(sent_to_mcu_done);
+}
+
+static int btmtk_usb_load_fw(struct btmtk_usb_data *data)
+{
+ struct usb_device *udev = data->udev;
+ struct urb *urb;
+ void *buf;
+ u32 cur_len = 0;
+ u32 packet_header = 0;
+ u32 value;
+ u32 ilm_len = 0, dlm_len = 0;
+ u16 fw_ver, build_ver;
+ u32 loop = 0;
+ dma_addr_t data_dma;
+ int ret = 0, sent_len;
+ struct completion sent_to_mcu_done;
+ unsigned int pipe = usb_sndbulkpipe(data->udev, data->bulk_tx_ep->bEndpointAddress);
+
+ if (!data->firmware) {
+ BT_ERR("%s:please assign a fw\n", __func__);
+ return -1;
+ }
+
+ BT_DBG("bulk_tx_ep = %x\n", data->bulk_tx_ep->bEndpointAddress);
+
+loadfw_protect:
+ btmtk_usb_switch_iobase(data, WLAN);
+ btmtk_usb_io_read32(data, SEMAPHORE_00, &value);
+ loop++;
+
+ if (((value & 0x1) == 0) && (loop < 10000))
+ goto loadfw_protect;
+
+ /* check MCU if ready */
+ btmtk_usb_io_read32(data, COM_REG0, &value);
+
+ if ((value & 0x01) == 0x01)
+ goto error0;
+
+ /* Enable MPDMA TX and EP2 load FW mode */
+ btmtk_usb_io_write32(data, 0x238, 0x1c000000);
+
+ btmtk_usb_reset(udev);
+ mdelay(100);
+
+ ilm_len = (*(data->firmware->data + 3) << 24)
+ | (*(data->firmware->data + 2) << 16)
+ | (*(data->firmware->data + 1) << 8)
+ | (*data->firmware->data);
+
+ dlm_len = (*(data->firmware->data + 7) << 24)
+ | (*(data->firmware->data + 6) << 16)
+ | (*(data->firmware->data + 5) << 8)
+ | (*(data->firmware->data + 4));
+
+ fw_ver = (*(data->firmware->data + 11) << 8) | (*(data->firmware->data + 10));
+
+ build_ver = (*(data->firmware->data + 9) << 8) | (*(data->firmware->data + 8));
+
+ BT_DBG("fw version:%d.%d.%02d ",
+ (fw_ver & 0xf000) >> 8,
+ (fw_ver & 0x0f00) >> 8,
+ (fw_ver & 0x00ff));
+
+ BT_DBG("build:%x\n", build_ver);
+
+ BT_DBG("build Time =");
+
+ for (loop = 0; loop < 16; loop++)
+ BT_DBG("%c", *(data->firmware->data + 16 + loop));
+
+ BT_DBG("\n");
+
+ BT_DBG("ILM length = %d(bytes)\n", ilm_len);
+ BT_DBG("DLM length = %d(bytes)\n", dlm_len);
+
+ btmtk_usb_switch_iobase(data, SYSCTL);
+
+ /* U2M_PDMA rx_ring_base_ptr */
+ btmtk_usb_io_write32(data, 0x790, 0x400230);
+
+ /* U2M_PDMA rx_ring_max_cnt */
+ btmtk_usb_io_write32(data, 0x794, 0x1);
+
+ /* U2M_PDMA cpu_idx */
+ btmtk_usb_io_write32(data, 0x798, 0x1);
+
+ /* U2M_PDMA enable */
+ btmtk_usb_io_write32(data, 0x704, 0x44);
+
+ urb = usb_alloc_urb(0, GFP_ATOMIC);
+
+ if (!urb) {
+ ret = -ENOMEM;
+ goto error1;
+ }
+
+ buf = usb_alloc_coherent(udev, 14592, GFP_ATOMIC, &data_dma);
+
+ if (!buf) {
+ ret = -ENOMEM;
+ goto error2;
+ }
+
+ BT_DBG("loading fw");
+
+ init_completion(&sent_to_mcu_done);
+
+ btmtk_usb_switch_iobase(data, SYSCTL);
+
+ cur_len = 0x40;
+
+ /* Loading ILM */
+ while (1) {
+ sent_len = (ilm_len - cur_len) >= 14336 ? 14336 : (ilm_len - cur_len);
+
+ if (sent_len > 0) {
+ packet_header &= ~(0xffffffff);
+ packet_header |= (sent_len << 16);
+ packet_header = cpu_to_le32(packet_header);
+
+ memmove(buf, &packet_header, 4);
+ memmove(buf + 4, data->firmware->data + 32 + cur_len, sent_len);
+
+ /* U2M_PDMA descriptor */
+ btmtk_usb_io_write32(data, 0x230, cur_len);
+
+ while ((sent_len % 4) != 0) {
+ sent_len++;
+ }
+
+ /* U2M_PDMA length */
+ btmtk_usb_io_write32(data, 0x234, sent_len << 16);
+
+ usb_fill_bulk_urb(urb,
+ udev,
+ pipe,
+ buf,
+ sent_len + 4,
+ load_fw_complete,
+ &sent_to_mcu_done);
+
+ urb->transfer_dma = data_dma;
+ urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+ ret = usb_submit_urb(urb, GFP_ATOMIC);
+
+ if (ret)
+ goto error3;
+
+ if (!wait_for_completion_timeout(&sent_to_mcu_done, msecs_to_jiffies(1000))) {
+ usb_kill_urb(urb);
+ BT_ERR("upload ilm fw timeout\n");
+ goto error3;
+ }
+
+ BT_DBG(".");
+
+ mdelay(200);
+
+ cur_len += sent_len;
+ } else {
+ break;
+ }
+ }
+
+ init_completion(&sent_to_mcu_done);
+ cur_len = 0x00;
+
+ /* Loading DLM */
+ while (1) {
+ sent_len = (dlm_len - cur_len) >= 14336 ? 14336 : (dlm_len - cur_len);
+
+ if (sent_len > 0) {
+ packet_header &= ~(0xffffffff);
+ packet_header |= (sent_len << 16);
+ packet_header = cpu_to_le32(packet_header);
+
+ memmove(buf, &packet_header, 4);
+ memmove(buf + 4, data->firmware->data + 32 + ilm_len + cur_len, sent_len);
+
+ /* U2M_PDMA descriptor */
+ btmtk_usb_io_write32(data, 0x230, 0x80000 + cur_len);
+
+ while ((sent_len % 4) != 0) {
+ BT_DBG("sent_len is not divided by 4\n");
+ sent_len++;
+ }
+
+ /* U2M_PDMA length */
+ btmtk_usb_io_write32(data, 0x234, sent_len << 16);
+
+ usb_fill_bulk_urb(urb,
+ udev,
+ pipe,
+ buf,
+ sent_len + 4,
+ load_fw_complete,
+ &sent_to_mcu_done);
+
+ urb->transfer_dma = data_dma;
+ urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+ ret = usb_submit_urb(urb, GFP_ATOMIC);
+
+ if (ret)
+ goto error3;
+
+ if (!wait_for_completion_timeout(&sent_to_mcu_done, msecs_to_jiffies(1000))) {
+ usb_kill_urb(urb);
+ BT_ERR("upload dlm fw timeout\n");
+ goto error3;
+ }
+
+ BT_DBG(".");
+
+ mdelay(500);
+
+ cur_len += sent_len;
+
+ } else {
+ break;
+ }
+ }
+
+ /* upload 64bytes interrupt vector */
+ ret = load_fw_iv(data);
+ mdelay(100);
+
+ btmtk_usb_switch_iobase(data, WLAN);
+
+ /* check MCU if ready */
+ loop = 0;
+
+ do {
+ btmtk_usb_io_read32(data, COM_REG0, &value);
+
+ if (value == 0x01)
+ break;
+
+ mdelay(10);
+ loop++;
+ } while (loop <= 100);
+
+ if (loop > 1000) {
+ BT_ERR("wait for 100 times\n");
+ ret = -ENODEV;
+ }
+
+error3:
+ usb_free_coherent(udev, 14592, buf, data_dma);
+error2:
+ usb_free_urb(urb);
+error1:
+ /* Disbale load fw mode */
+ btmtk_usb_io_read32(data, 0x238, &value);
+ value = value & ~(0x10000000);
+ btmtk_usb_io_write32(data, 0x238, value);
+error0:
+ btmtk_usb_io_write32(data, SEMAPHORE_00, 0x1);
+ return ret;
+}
+
+static int inc_tx(struct btmtk_usb_data *data)
+{
+ unsigned long flags;
+ int rv;
+
+ spin_lock_irqsave(&data->txlock, flags);
+ rv = test_bit(BTUSB_SUSPENDING, &data->flags);
+ if (!rv)
+ data->tx_in_flight++;
+ spin_unlock_irqrestore(&data->txlock, flags);
+
+ return rv;
+}
+
+static void btmtk_usb_intr_complete(struct urb *urb)
+{
+ struct hci_dev *hdev = urb->context;
+ struct btmtk_usb_data *data = hci_get_drvdata(hdev);
+ int err;
+
+ BT_DBG("%s: %s urb %p status %d count %d\n", __func__, hdev->name,
+ urb, urb->status, urb->actual_length);
+
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ return;
+
+ if (urb->status == 0) {
+ hdev->stat.byte_rx += urb->actual_length;
+
+ hex_dump("hci event", urb->transfer_buffer, urb->actual_length);
+
+ if (hci_recv_fragment(hdev, HCI_EVENT_PKT,
+ urb->transfer_buffer,
+ urb->actual_length) < 0) {
+ BT_ERR("%s corrupted event packet", hdev->name);
+ hdev->stat.err_rx++;
+ }
+ }
+
+ if (!test_bit(BTUSB_INTR_RUNNING, &data->flags))
+ return;
+
+ usb_mark_last_busy(data->udev);
+ usb_anchor_urb(urb, &data->intr_anchor);
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+
+ if (err < 0) {
+ /* -EPERM: urb is being killed;
+ * -ENODEV: device got disconnected */
+ if (err != -EPERM && err != -ENODEV)
+ BT_ERR("%s urb %p failed to resubmit (%d)",
+ hdev->name, urb, -err);
+ usb_unanchor_urb(urb);
+ }
+}
+
+static int btmtk_usb_submit_intr_urb(struct hci_dev *hdev, gfp_t mem_flags)
+{
+ struct btmtk_usb_data *data = hci_get_drvdata(hdev);
+ struct urb *urb;
+ unsigned char *buf;
+ unsigned int pipe;
+ int err, size;
+
+ BT_DBG("%s\n", __func__);
+
+ if (!data->intr_ep)
+ return -ENODEV;
+
+ urb = usb_alloc_urb(0, mem_flags);
+ if (!urb)
+ return -ENOMEM;
+
+ size = le16_to_cpu(data->intr_ep->wMaxPacketSize);
+
+ buf = kmalloc(size, mem_flags);
+ if (!buf) {
+ usb_free_urb(urb);
+ return -ENOMEM;
+ }
+
+ pipe = usb_rcvintpipe(data->udev, data->intr_ep->bEndpointAddress);
+
+ usb_fill_int_urb(urb, data->udev, pipe, buf, size,
+ btmtk_usb_intr_complete, hdev,
+ data->intr_ep->bInterval);
+
+ urb->transfer_flags |= URB_FREE_BUFFER;
+
+ usb_anchor_urb(urb, &data->intr_anchor);
+
+ err = usb_submit_urb(urb, mem_flags);
+ if (err < 0) {
+ if (err != -EPERM && err != -ENODEV)
+ BT_ERR("%s urb %p submission failed (%d)",
+ hdev->name, urb, -err);
+ usb_unanchor_urb(urb);
+ }
+
+ usb_free_urb(urb);
+
+ return err;
+
+}
+
+static void btmtk_usb_bulk_in_complete(struct urb *urb)
+{
+ struct hci_dev *hdev = urb->context;
+ struct btmtk_usb_data *data = hci_get_drvdata(hdev);
+ int err;
+
+ BT_DBG("%s:%s urb %p status %d count %d", __func__, hdev->name,
+ urb, urb->status, urb->actual_length);
+
+ if (!test_bit(HCI_RUNNING, &hdev->flags)) {
+ return;
+ }
+
+ if (urb->status == 0) {
+ hdev->stat.byte_rx += urb->actual_length;
+
+ if (hci_recv_fragment(hdev, HCI_ACLDATA_PKT,
+ urb->transfer_buffer,
+ urb->actual_length) < 0) {
+ BT_ERR("%s corrupted ACL packet", hdev->name);
+ hdev->stat.err_rx++;
+ }
+ }
+
+ if (!test_bit(BTUSB_BULK_RUNNING, &data->flags))
+ return;
+
+ usb_anchor_urb(urb, &data->bulk_anchor);
+ usb_mark_last_busy(data->udev);
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+ if (err < 0) {
+ /* -EPERM: urb is being killed;
+ * -ENODEV: device got disconnected */
+ if (err != -EPERM && err != -ENODEV)
+ BT_ERR("%s urb %p failed to resubmit (%d)",
+ hdev->name, urb, -err);
+ usb_unanchor_urb(urb);
+ }
+}
+
+static int btmtk_usb_submit_bulk_in_urb(struct hci_dev *hdev, gfp_t mem_flags)
+{
+ struct btmtk_usb_data *data = hci_get_drvdata(hdev);
+ struct urb *urb;
+ unsigned char *buf;
+ unsigned int pipe;
+ int err, size = HCI_MAX_FRAME_SIZE;
+
+ BT_DBG("%s:%s\n", __func__, hdev->name);
+
+ if (!data->bulk_rx_ep)
+ return -ENODEV;
+
+ urb = usb_alloc_urb(0, mem_flags);
+ if (!urb)
+ return -ENOMEM;
+
+ buf = kmalloc(size, mem_flags);
+ if (!buf) {
+ usb_free_urb(urb);
+ return -ENOMEM;
+ }
+
+ pipe = usb_rcvbulkpipe(data->udev, data->bulk_rx_ep->bEndpointAddress);
+
+ usb_fill_bulk_urb(urb, data->udev, pipe,
+ buf, size, btmtk_usb_bulk_in_complete, hdev);
+
+ urb->transfer_flags |= URB_FREE_BUFFER;
+
+ usb_mark_last_busy(data->udev);
+ usb_anchor_urb(urb, &data->bulk_anchor);
+
+ err = usb_submit_urb(urb, mem_flags);
+ if (err < 0) {
+ if (err != -EPERM && err != -ENODEV)
+ BT_ERR("%s urb %p submission failed (%d)",
+ hdev->name, urb, -err);
+ usb_unanchor_urb(urb);
+ }
+
+ usb_free_urb(urb);
+
+ return err;
+}
+
+static void btmtk_usb_isoc_in_complete(struct urb *urb)
+
+{
+ struct hci_dev *hdev = urb->context;
+ struct btmtk_usb_data *data = hci_get_drvdata(hdev);
+ int i, err;
+
+ BT_DBG("%s: %s urb %p status %d count %d", __func__, hdev->name,
+ urb, urb->status, urb->actual_length);
+
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ return;
+
+ if (urb->status == 0) {
+ for (i = 0; i < urb->number_of_packets; i++) {
+ unsigned int offset = urb->iso_frame_desc[i].offset;
+ unsigned int length = urb->iso_frame_desc[i].actual_length;
+
+ if (urb->iso_frame_desc[i].status)
+ continue;
+
+ hdev->stat.byte_rx += length;
+
+ if (hci_recv_fragment(hdev, HCI_SCODATA_PKT,
+ urb->transfer_buffer + offset,
+ length) < 0) {
+ BT_ERR("%s corrupted SCO packet", hdev->name);
+ hdev->stat.err_rx++;
+ }
+ }
+ }
+
+ if (!test_bit(BTUSB_ISOC_RUNNING, &data->flags))
+ return;
+
+ usb_anchor_urb(urb, &data->isoc_anchor);
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+ if (err < 0) {
+ /* -EPERM: urb is being killed;
+ * -ENODEV: device got disconnected */
+ if (err != -EPERM && err != -ENODEV)
+ BT_ERR("%s urb %p failed to resubmit (%d)",
+ hdev->name, urb, -err);
+ usb_unanchor_urb(urb);
+ }
+}
+
+static inline void __fill_isoc_descriptor(struct urb *urb, int len, int mtu)
+{
+ int i, offset = 0;
+
+ BT_DBG("len %d mtu %d", len, mtu);
+
+ for (i = 0; i < BTUSB_MAX_ISOC_FRAMES && len >= mtu;
+ i++, offset += mtu, len -= mtu) {
+ urb->iso_frame_desc[i].offset = offset;
+ urb->iso_frame_desc[i].length = mtu;
+ }
+
+ if (len && i < BTUSB_MAX_ISOC_FRAMES) {
+ urb->iso_frame_desc[i].offset = offset;
+ urb->iso_frame_desc[i].length = len;
+ i++;
+ }
+
+ urb->number_of_packets = i;
+}
+
+static int btmtk_usb_submit_isoc_in_urb(struct hci_dev *hdev, gfp_t mem_flags)
+{
+ struct btmtk_usb_data *data = hci_get_drvdata(hdev);
+ struct urb *urb;
+ unsigned char *buf;
+ unsigned int pipe;
+ int err, size;
+
+ BT_DBG("%s\n", __func__);
+
+ if (!data->isoc_rx_ep)
+ return -ENODEV;
+
+ urb = usb_alloc_urb(BTUSB_MAX_ISOC_FRAMES, mem_flags);
+ if (!urb)
+ return -ENOMEM;
+
+ size = le16_to_cpu(data->isoc_rx_ep->wMaxPacketSize) *
+ BTUSB_MAX_ISOC_FRAMES;
+
+ buf = kmalloc(size, mem_flags);
+ if (!buf) {
+ usb_free_urb(urb);
+ return -ENOMEM;
+ }
+
+ pipe = usb_rcvisocpipe(data->udev, data->isoc_rx_ep->bEndpointAddress);
+
+ usb_fill_int_urb(urb, data->udev, pipe, buf, size, btmtk_usb_isoc_in_complete,
+ hdev, data->isoc_rx_ep->bInterval);
+
+ urb->transfer_flags = URB_FREE_BUFFER | URB_ISO_ASAP;
+
+ __fill_isoc_descriptor(urb, size,
+ le16_to_cpu(data->isoc_rx_ep->wMaxPacketSize));
+
+ usb_anchor_urb(urb, &data->isoc_anchor);
+
+ err = usb_submit_urb(urb, mem_flags);
+ if (err < 0) {
+ if (err != -EPERM && err != -ENODEV)
+ BT_ERR("%s urb %p submission failed (%d)",
+ hdev->name, urb, -err);
+ usb_unanchor_urb(urb);
+ }
+
+ usb_free_urb(urb);
+
+ return err;
+}
+
+static int btmtk_usb_open(struct hci_dev *hdev)
+{
+ struct btmtk_usb_data *data = hci_get_drvdata(hdev);
+ int err;
+
+ BT_DBG("%s\n", __func__);
+
+ err = usb_autopm_get_interface(data->intf);
+ if (err < 0)
+ return err;
+
+ data->intf->needs_remote_wakeup = 1;
+
+ if (test_and_set_bit(HCI_RUNNING, &hdev->flags))
+ goto done;
+
+ if (test_and_set_bit(BTUSB_INTR_RUNNING, &data->flags))
+ goto done;
+
+ err = btmtk_usb_submit_intr_urb(hdev, GFP_KERNEL);
+ if (err < 0)
+ goto failed;
+
+ err = btmtk_usb_submit_bulk_in_urb(hdev, GFP_KERNEL);
+ if (err < 0) {
+ usb_kill_anchored_urbs(&data->intr_anchor);
+ goto failed;
+ }
+
+ set_bit(BTUSB_BULK_RUNNING, &data->flags);
+ btmtk_usb_submit_bulk_in_urb(hdev, GFP_KERNEL);
+
+done:
+ usb_autopm_put_interface(data->intf);
+ return 0;
+
+failed:
+ clear_bit(BTUSB_INTR_RUNNING, &data->flags);
+ clear_bit(HCI_RUNNING, &hdev->flags);
+ usb_autopm_put_interface(data->intf);
+ return err;
+}
+
+static void btmtk_usb_stop_traffic(struct btmtk_usb_data *data)
+{
+ BT_DBG("%s\n", __func__);
+
+ usb_kill_anchored_urbs(&data->intr_anchor);
+ usb_kill_anchored_urbs(&data->bulk_anchor);
+ usb_kill_anchored_urbs(&data->isoc_anchor);
+}
+
+static int btmtk_usb_close(struct hci_dev *hdev)
+{
+ struct btmtk_usb_data *data = hci_get_drvdata(hdev);
+ int err;
+
+ BT_DBG("%s\n", __func__);
+
+ if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags))
+ return 0;
+
+ cancel_work_sync(&data->work);
+ cancel_work_sync(&data->waker);
+
+ clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
+ clear_bit(BTUSB_BULK_RUNNING, &data->flags);
+ clear_bit(BTUSB_INTR_RUNNING, &data->flags);
+
+ btmtk_usb_stop_traffic(data);
+
+ err = usb_autopm_get_interface(data->intf);
+ if (err < 0)
+ goto failed;
+
+ data->intf->needs_remote_wakeup = 0;
+ usb_autopm_put_interface(data->intf);
+
+failed:
+ usb_scuttle_anchored_urbs(&data->deferred);
+ return 0;
+}
+
+static int btmtk_usb_flush(struct hci_dev *hdev)
+{
+ struct btmtk_usb_data *data = hci_get_drvdata(hdev);
+
+ BT_DBG("%s\n", __func__);
+
+ usb_kill_anchored_urbs(&data->tx_anchor);
+
+ return 0;
+}
+
+static void btmtk_usb_tx_complete(struct urb *urb)
+{
+ struct sk_buff *skb = urb->context;
+ struct hci_dev *hdev = (struct hci_dev *)skb->dev;
+ struct btmtk_usb_data *data = hci_get_drvdata(hdev);
+
+ BT_DBG("%s: %s urb %p status %d count %d\n", __func__, hdev->name,
+ urb, urb->status, urb->actual_length);
+
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ goto done;
+
+ if (!urb->status)
+ hdev->stat.byte_tx += urb->transfer_buffer_length;
+ else
+ hdev->stat.err_tx++;
+
+done:
+ spin_lock(&data->txlock);
+ data->tx_in_flight--;
+ spin_unlock(&data->txlock);
+
+ kfree(urb->setup_packet);
+
+ kfree_skb(skb);
+}
+
+static void btmtk_usb_isoc_tx_complete(struct urb *urb)
+{
+ struct sk_buff *skb = urb->context;
+ struct hci_dev *hdev = (struct hci_dev *) skb->dev;
+
+ BT_DBG("%s: %s urb %p status %d count %d", __func__, hdev->name,
+ urb, urb->status, urb->actual_length);
+
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ goto done;
+
+ if (!urb->status)
+ hdev->stat.byte_tx += urb->transfer_buffer_length;
+ else
+ hdev->stat.err_tx++;
+
+done:
+ kfree(urb->setup_packet);
+
+ kfree_skb(skb);
+}
+
+static int btmtk_usb_send_frame(struct sk_buff *skb)
+{
+ struct hci_dev *hdev = (struct hci_dev *)skb->dev;
+ struct btmtk_usb_data *data = hci_get_drvdata(hdev);
+ struct usb_ctrlrequest *dr;
+ struct urb *urb;
+ unsigned int pipe;
+ int err;
+
+ BT_DBG("%s\n", __func__);
+
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ return -EBUSY;
+
+ switch (bt_cb(skb)->pkt_type) {
+ case HCI_COMMAND_PKT:
+ urb = usb_alloc_urb(0, GFP_ATOMIC);
+ if (!urb)
+ return -ENOMEM;
+
+ dr = kmalloc(sizeof(*dr), GFP_ATOMIC);
+ if (!dr) {
+ usb_free_urb(urb);
+ return -ENOMEM;
+ }
+
+ dr->bRequestType = data->cmdreq_type;
+ dr->bRequest = 0;
+ dr->wIndex = 0;
+ dr->wValue = 0;
+ dr->wLength = __cpu_to_le16(skb->len);
+
+ pipe = usb_sndctrlpipe(data->udev, 0x00);
+
+ if (test_bit(HCI_RUNNING, &hdev->flags)) {
+ u16 op_code;
+ memcpy(&op_code, skb->data, 2);
+ BT_DBG("ogf = %x\n", (op_code & 0xfc00) >> 10);
+ BT_DBG("ocf = %x\n", op_code & 0x03ff);
+ hex_dump("hci command", skb->data, skb->len);
+
+ }
+
+ usb_fill_control_urb(urb, data->udev, pipe, (void *) dr,
+ skb->data, skb->len, btmtk_usb_tx_complete, skb);
+
+ hdev->stat.cmd_tx++;
+ break;
+
+ case HCI_ACLDATA_PKT:
+ if (!data->bulk_tx_ep)
+ return -ENODEV;
+
+ urb = usb_alloc_urb(0, GFP_ATOMIC);
+ if (!urb)
+ return -ENOMEM;
+
+ pipe = usb_sndbulkpipe(data->udev,
+ data->bulk_tx_ep->bEndpointAddress);
+
+ usb_fill_bulk_urb(urb, data->udev, pipe,
+ skb->data, skb->len, btmtk_usb_tx_complete, skb);
+
+ hdev->stat.acl_tx++;
+ BT_DBG("HCI_ACLDATA_PKT:\n");
+ break;
+
+ case HCI_SCODATA_PKT:
+ if (!data->isoc_tx_ep || hdev->conn_hash.sco_num < 1)
+ return -ENODEV;
+
+ urb = usb_alloc_urb(BTUSB_MAX_ISOC_FRAMES, GFP_ATOMIC);
+ if (!urb)
+ return -ENOMEM;
+
+ pipe = usb_sndisocpipe(data->udev,
+ data->isoc_tx_ep->bEndpointAddress);
+
+ usb_fill_int_urb(urb, data->udev, pipe,
+ skb->data, skb->len, btmtk_usb_isoc_tx_complete,
+ skb, data->isoc_tx_ep->bInterval);
+
+ urb->transfer_flags = URB_ISO_ASAP;
+
+ __fill_isoc_descriptor(urb, skb->len,
+ le16_to_cpu(data->isoc_tx_ep->wMaxPacketSize));
+
+ hdev->stat.sco_tx++;
+ BT_DBG("HCI_SCODATA_PKT:\n");
+ goto skip_waking;
+
+ default:
+ return -EILSEQ;
+ }
+
+ err = inc_tx(data);
+
+ if (err) {
+ usb_anchor_urb(urb, &data->deferred);
+ schedule_work(&data->waker);
+ err = 0;
+ goto done;
+ }
+
+skip_waking:
+ usb_anchor_urb(urb, &data->tx_anchor);
+
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+ if (err < 0) {
+ if (err != -EPERM && err != -ENODEV)
+ BT_ERR("%s urb %p submission failed (%d)",
+ hdev->name, urb, -err);
+ kfree(urb->setup_packet);
+ usb_unanchor_urb(urb);
+ } else {
+ usb_mark_last_busy(data->udev);
+ }
+
+done:
+ usb_free_urb(urb);
+ return err;
+}
+
+static void btmtk_usb_notify(struct hci_dev *hdev, unsigned int evt)
+{
+ struct btmtk_usb_data *data = hci_get_drvdata(hdev);
+
+ BT_DBG("%s evt %d", hdev->name, evt);
+
+ if (hdev->conn_hash.sco_num != data->sco_num) {
+ data->sco_num = hdev->conn_hash.sco_num;
+ schedule_work(&data->work);
+ }
+}
+
+static inline int __set_isoc_interface(struct hci_dev *hdev, int altsetting)
+{
+ struct btmtk_usb_data *data = hci_get_drvdata(hdev);
+ struct usb_interface *intf = data->isoc;
+ struct usb_endpoint_descriptor *ep_desc;
+ int i, err;
+
+ if (!data->isoc)
+ return -ENODEV;
+
+ err = usb_set_interface(data->udev, 1, altsetting);
+ if (err < 0) {
+ BT_ERR("%s setting interface failed (%d)", hdev->name, -err);
+ return err;
+ }
+
+ data->isoc_altsetting = altsetting;
+
+ data->isoc_tx_ep = NULL;
+ data->isoc_rx_ep = NULL;
+
+ for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) {
+ ep_desc = &intf->cur_altsetting->endpoint[i].desc;
+
+ if (!data->isoc_tx_ep && usb_endpoint_is_isoc_out(ep_desc)) {
+ data->isoc_tx_ep = ep_desc;
+ continue;
+ }
+
+ if (!data->isoc_rx_ep && usb_endpoint_is_isoc_in(ep_desc)) {
+ data->isoc_rx_ep = ep_desc;
+ continue;
+ }
+ }
+
+ if (!data->isoc_tx_ep || !data->isoc_rx_ep) {
+ BT_ERR("%s invalid SCO descriptors", hdev->name);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static void btmtk_usb_work(struct work_struct *work)
+{
+ struct btmtk_usb_data *data = container_of(work, struct btmtk_usb_data, work);
+ struct hci_dev *hdev = data->hdev;
+ int new_alts;
+ int err;
+
+ BT_DBG("%s\n", __func__);
+
+ if (hdev->conn_hash.sco_num > 0) {
+ if (!test_bit(BTUSB_DID_ISO_RESUME, &data->flags)) {
+ err = usb_autopm_get_interface(data->isoc ? data->isoc : data->intf);
+ if (err < 0) {
+ clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
+ usb_kill_anchored_urbs(&data->isoc_anchor);
+ return;
+ }
+
+ set_bit(BTUSB_DID_ISO_RESUME, &data->flags);
+ }
+
+ if (hdev->voice_setting & 0x0020) {
+ static const int alts[3] = { 2, 4, 5 };
+ new_alts = alts[hdev->conn_hash.sco_num - 1];
+ } else {
+ new_alts = hdev->conn_hash.sco_num;
+ }
+
+ if (data->isoc_altsetting != new_alts) {
+ clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
+ usb_kill_anchored_urbs(&data->isoc_anchor);
+
+ if (__set_isoc_interface(hdev, new_alts) < 0)
+ return;
+ }
+
+ if (!test_and_set_bit(BTUSB_ISOC_RUNNING, &data->flags)) {
+ if (btmtk_usb_submit_isoc_in_urb(hdev, GFP_KERNEL) < 0)
+ clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
+ else
+ btmtk_usb_submit_isoc_in_urb(hdev, GFP_KERNEL);
+ }
+ } else {
+ clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
+ usb_kill_anchored_urbs(&data->isoc_anchor);
+
+ __set_isoc_interface(hdev, 0);
+
+ if (test_and_clear_bit(BTUSB_DID_ISO_RESUME, &data->flags))
+ usb_autopm_put_interface(data->isoc ? data->isoc : data->intf);
+ }
+}
+
+static void btmtk_usb_waker(struct work_struct *work)
+{
+ struct btmtk_usb_data *data = container_of(work, struct btmtk_usb_data, waker);
+ int err;
+
+ err = usb_autopm_get_interface(data->intf);
+
+ if (err < 0)
+ return;
+
+ usb_autopm_put_interface(data->intf);
+}
+
+static int btmtk_usb_probe(struct usb_interface *intf,
+ const struct usb_device_id *id)
+{
+ struct btmtk_usb_data *data;
+ struct usb_endpoint_descriptor *ep_desc;
+ int i, err;
+ struct hci_dev *hdev;
+
+ /* interface numbers are hardcoded in the spec */
+ if (intf->cur_altsetting->desc.bInterfaceNumber != 0)
+ return -ENODEV;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+
+ if (!data)
+ return -ENOMEM;
+
+ for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) {
+ ep_desc = &intf->cur_altsetting->endpoint[i].desc;
+
+ if (!data->intr_ep && usb_endpoint_is_int_in(ep_desc)) {
+ data->intr_ep = ep_desc;
+ continue;
+ }
+
+ if (!data->bulk_tx_ep && usb_endpoint_is_bulk_out(ep_desc)) {
+ data->bulk_tx_ep = ep_desc;
+ continue;
+ }
+
+ if (!data->bulk_rx_ep && usb_endpoint_is_bulk_in(ep_desc)) {
+ data->bulk_rx_ep = ep_desc;
+ continue;
+ }
+ }
+
+ if (!data->intr_ep || !data->bulk_tx_ep || !data->bulk_rx_ep) {
+ kfree(data);
+ return -ENODEV;
+ }
+
+ data->cmdreq_type = USB_TYPE_CLASS;
+
+ data->udev = interface_to_usbdev(intf);
+ data->intf = intf;
+
+ spin_lock_init(&data->lock);
+ INIT_WORK(&data->work, btmtk_usb_work);
+ INIT_WORK(&data->waker, btmtk_usb_waker);
+ spin_lock_init(&data->txlock);
+
+ init_usb_anchor(&data->tx_anchor);
+ init_usb_anchor(&data->intr_anchor);
+ init_usb_anchor(&data->bulk_anchor);
+ init_usb_anchor(&data->isoc_anchor);
+ init_usb_anchor(&data->deferred);
+
+ hdev = hci_alloc_dev();
+ if (!hdev) {
+ kfree(data);
+ return -ENOMEM;
+ }
+
+ hdev->bus = HCI_USB;
+
+ hci_set_drvdata(hdev, data);
+
+ data->hdev = hdev;
+
+ SET_HCIDEV_DEV(hdev, &intf->dev);
+
+ hdev->open = btmtk_usb_open;
+ hdev->close = btmtk_usb_close;
+ hdev->flush = btmtk_usb_flush;
+ hdev->send = btmtk_usb_send_frame;
+ hdev->notify = btmtk_usb_notify;
+
+ /* Interface numbers are hardcoded in the specification */
+ data->isoc = usb_ifnum_to_if(data->udev, 1);
+
+ if (data->isoc) {
+ err = usb_driver_claim_interface(&btmtk_usb_driver,
+ data->isoc, data);
+ if (err < 0) {
+ hci_free_dev(hdev);
+ kfree(data);
+ return err;
+ }
+ }
+
+ data->io_buf = kmalloc(256, GFP_KERNEL);
+ if (!data->io_buf) {
+ hci_free_dev(hdev);
+ kfree(data);
+ return -ENOMEM;
+ }
+
+ btmtk_usb_switch_iobase(data, WLAN);
+
+ btmtk_usb_cap_init(data);
+
+ err = hci_register_dev(hdev);
+ if (err < 0) {
+ hci_free_dev(hdev);
+ kfree(data);
+ return err;
+ }
+
+ usb_set_intfdata(intf, data);
+
+ return 0;
+}
+
+static void btmtk_usb_disconnect(struct usb_interface *intf)
+{
+ struct btmtk_usb_data *data = usb_get_intfdata(intf);
+ struct hci_dev *hdev;
+
+ BT_DBG("%s\n", __func__);
+
+ if (!data)
+ return;
+
+ hdev = data->hdev;
+ usb_set_intfdata(data->intf, NULL);
+
+ if (data->isoc)
+ usb_set_intfdata(data->isoc, NULL);
+
+ hci_unregister_dev(hdev);
+
+ if (intf == data->isoc)
+ usb_driver_release_interface(&btmtk_usb_driver, data->intf);
+ else if (data->isoc)
+ usb_driver_release_interface(&btmtk_usb_driver, data->isoc);
+
+ hci_free_dev(hdev);
+
+ kfree(data->io_buf);
+
+ kfree(data);
+}
+
+#ifdef CONFIG_PM
+static int btmtk_usb_suspend(struct usb_interface *intf, pm_message_t message)
+{
+ struct btmtk_usb_data *data = usb_get_intfdata(intf);
+
+ BT_DBG("%s\n", __func__);
+
+ if (data->suspend_count++)
+ return 0;
+
+ spin_lock_irq(&data->txlock);
+ if (!(PMSG_IS_AUTO(message) && data->tx_in_flight)) {
+ set_bit(BTUSB_SUSPENDING, &data->flags);
+ spin_unlock_irq(&data->txlock);
+ } else {
+ spin_unlock_irq(&data->txlock);
+ data->suspend_count--;
+ return -EBUSY;
+ }
+
+ cancel_work_sync(&data->work);
+
+ btmtk_usb_stop_traffic(data);
+ usb_kill_anchored_urbs(&data->tx_anchor);
+
+ return 0;
+}
+
+static void play_deferred(struct btmtk_usb_data *data)
+{
+ struct urb *urb;
+ int err;
+
+ while ((urb = usb_get_from_anchor(&data->deferred))) {
+ err = usb_submit_urb(urb, GFP_ATOMIC);
+ if (err < 0)
+ break;
+
+ data->tx_in_flight++;
+ }
+
+ usb_scuttle_anchored_urbs(&data->deferred);
+}
+
+static int btmtk_usb_resume(struct usb_interface *intf)
+{
+ struct btmtk_usb_data *data = usb_get_intfdata(intf);
+ struct hci_dev *hdev = data->hdev;
+ int err = 0;
+
+ BT_DBG("%s\n", __func__);
+
+ if (--data->suspend_count)
+ return 0;
+
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ goto done;
+
+ if (test_bit(BTUSB_INTR_RUNNING, &data->flags)) {
+ err = btmtk_usb_submit_intr_urb(hdev, GFP_NOIO);
+ if (err < 0) {
+ clear_bit(BTUSB_INTR_RUNNING, &data->flags);
+ goto failed;
+ }
+ }
+
+ if (test_bit(BTUSB_BULK_RUNNING, &data->flags)) {
+ err = btmtk_usb_submit_bulk_in_urb(hdev, GFP_NOIO);
+ if (err < 0) {
+ clear_bit(BTUSB_BULK_RUNNING, &data->flags);
+ goto failed;
+ }
+
+ btmtk_usb_submit_bulk_in_urb(hdev, GFP_NOIO);
+ }
+
+ if (test_bit(BTUSB_ISOC_RUNNING, &data->flags)) {
+ if (btmtk_usb_submit_isoc_in_urb(hdev, GFP_NOIO) < 0)
+ clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
+ else
+ btmtk_usb_submit_isoc_in_urb(hdev, GFP_NOIO);
+ }
+
+ spin_lock_irq(&data->txlock);
+ play_deferred(data);
+ clear_bit(BTUSB_SUSPENDING, &data->flags);
+ spin_unlock_irq(&data->txlock);
+ schedule_work(&data->work);
+
+ return 0;
+
+failed:
+ usb_scuttle_anchored_urbs(&data->deferred);
+done:
+ spin_lock_irq(&data->txlock);
+ clear_bit(BTUSB_SUSPENDING, &data->flags);
+ spin_unlock_irq(&data->txlock);
+
+ return err;
+}
+#endif
+
+static struct usb_device_id btmtk_usb_table[] = {
+ /* Mediatek MT7650 */
+ { USB_DEVICE(0x0e8d, 0x7650) },
+ { USB_DEVICE(0x0e8d, 0x7630) },
+ { USB_DEVICE(0x0e8d, 0x763e) },
+ /* Mediatek MT662 */
+ { USB_DEVICE(0x0e8d, 0x7662) },
+ { USB_DEVICE(0x0e8d, 0x7632) },
+ { } /* Terminating entry */
+};
+
+static struct usb_driver btmtk_usb_driver = {
+ .name = "btmtk_usb",
+ .probe = btmtk_usb_probe,
+ .disconnect = btmtk_usb_disconnect,
+#ifdef CONFIG_PM
+ .suspend = btmtk_usb_suspend,
+ .resume = btmtk_usb_resume,
+#endif
+ .id_table = btmtk_usb_table,
+ .supports_autosuspend = 1,
+ .disable_hub_initiated_lpm = 1,
+};
+
+module_usb_driver(btmtk_usb_driver);
+
+MODULE_DESCRIPTION("Mediatek Bluetooth USB driver ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
+MODULE_FIRMWARE(MT7650_FIRMWARE);
+MODULE_FIRMWARE(MT7662_FIRMWARE);
diff --git a/drivers/staging/btmtk_usb/btmtk_usb.h b/drivers/staging/btmtk_usb/btmtk_usb.h
new file mode 100644
index 000000000000..12f0d3b27bfe
--- /dev/null
+++ b/drivers/staging/btmtk_usb/btmtk_usb.h
@@ -0,0 +1,138 @@
+/*
+ * MediaTek Bluetooth USB Driver
+ *
+ * Copyright (C) 2013, MediaTek co.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * or on the worldwide web at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
+ *
+ */
+
+#ifndef __BTMTK_USB_H__
+#define __BTMTK_USB_H_
+
+/* Memory map for MTK BT */
+
+/* SYS Control */
+#define SYSCTL 0x400000
+
+/* WLAN */
+#define WLAN 0x410000
+
+/* MCUCTL */
+#define INT_LEVEL 0x0718
+#define COM_REG0 0x0730
+#define SEMAPHORE_00 0x07B0
+#define SEMAPHORE_01 0x07B4
+#define SEMAPHORE_02 0x07B8
+#define SEMAPHORE_03 0x07BC
+
+/* Chip definition */
+
+#define CONTROL_TIMEOUT_JIFFIES ((300 * HZ) / 100)
+#define DEVICE_VENDOR_REQUEST_OUT 0x40
+#define DEVICE_VENDOR_REQUEST_IN 0xc0
+#define DEVICE_CLASS_REQUEST_OUT 0x20
+
+#define BTUSB_MAX_ISOC_FRAMES 10
+#define BTUSB_INTR_RUNNING 0
+#define BTUSB_BULK_RUNNING 1
+#define BTUSB_ISOC_RUNNING 2
+#define BTUSB_SUSPENDING 3
+#define BTUSB_DID_ISO_RESUME 4
+
+/* ROM Patch */
+#define PATCH_HCI_HEADER_SIZE 4
+#define PATCH_WMT_HEADER_SIZE 5
+#define PATCH_HEADER_SIZE (PATCH_HCI_HEADER_SIZE + PATCH_WMT_HEADER_SIZE)
+#define UPLOAD_PATCH_UNIT 2048
+#define PATCH_INFO_SIZE 30
+#define PATCH_PHASE1 1
+#define PATCH_PHASE2 2
+#define PATCH_PHASE3 3
+
+struct btmtk_usb_data {
+ struct hci_dev *hdev;
+ struct usb_device *udev;
+ struct usb_interface *intf;
+ struct usb_interface *isoc;
+
+ spinlock_t lock;
+
+ unsigned long flags;
+ struct work_struct work;
+ struct work_struct waker;
+
+ struct usb_anchor tx_anchor;
+ struct usb_anchor intr_anchor;
+ struct usb_anchor bulk_anchor;
+ struct usb_anchor isoc_anchor;
+ struct usb_anchor deferred;
+ int tx_in_flight;
+ spinlock_t txlock;
+
+ struct usb_endpoint_descriptor *intr_ep;
+ struct usb_endpoint_descriptor *bulk_tx_ep;
+ struct usb_endpoint_descriptor *bulk_rx_ep;
+ struct usb_endpoint_descriptor *isoc_tx_ep;
+ struct usb_endpoint_descriptor *isoc_rx_ep;
+
+ __u8 cmdreq_type;
+
+ unsigned int sco_num;
+ int isoc_altsetting;
+ int suspend_count;
+
+ /* request for different io operation */
+ u8 w_request;
+ u8 r_request;
+
+ /* io buffer for usb control transfer */
+ char *io_buf;
+
+ struct semaphore fw_upload_sem;
+
+ /* unsigned char *fw_image; */
+ /* unsigned char *rom_patch; */
+ const struct firmware *firmware;
+ u32 chip_id;
+ u8 need_load_fw;
+ u8 need_load_rom_patch;
+ u32 rom_patch_offset;
+ u32 rom_patch_len;
+};
+
+static inline int is_mt7630(struct btmtk_usb_data *data)
+{
+ return ((data->chip_id & 0xffff0000) == 0x76300000);
+}
+
+static inline int is_mt7650(struct btmtk_usb_data *data)
+{
+ return ((data->chip_id & 0xffff0000) == 0x76500000);
+}
+
+static inline int is_mt7632(struct btmtk_usb_data *data)
+{
+ return ((data->chip_id & 0xffff0000) == 0x76320000);
+}
+
+static inline int is_mt7662(struct btmtk_usb_data *data)
+{
+ return ((data->chip_id & 0xffff0000) == 0x76620000);
+}
+
+#endif
diff --git a/drivers/staging/ced1401/ced_ioc.c b/drivers/staging/ced1401/ced_ioc.c
index 82a333f6433e..2dbaf39e2fc2 100644
--- a/drivers/staging/ced1401/ced_ioc.c
+++ b/drivers/staging/ced1401/ced_ioc.c
@@ -37,13 +37,14 @@
**
** Empties the Output buffer and sets int lines. Used from user level only
****************************************************************************/
-void FlushOutBuff(DEVICE_EXTENSION * pdx)
+static void FlushOutBuff(DEVICE_EXTENSION *pdx)
{
dev_dbg(&pdx->interface->dev, "%s currentState=%d", __func__,
pdx->sCurrentState);
if (pdx->sCurrentState == U14ERR_TIME) /* Do nothing if hardware in trouble */
return;
-// CharSend_Cancel(pdx); /* Kill off any pending I/O */
+ /* Kill off any pending I/O */
+ /* CharSend_Cancel(pdx); */
spin_lock_irq(&pdx->charOutLock);
pdx->dwNumOutput = 0;
pdx->dwOutBuffGet = 0;
@@ -57,13 +58,14 @@ void FlushOutBuff(DEVICE_EXTENSION * pdx)
**
** Empties the input buffer and sets int lines
****************************************************************************/
-void FlushInBuff(DEVICE_EXTENSION * pdx)
+static void FlushInBuff(DEVICE_EXTENSION *pdx)
{
dev_dbg(&pdx->interface->dev, "%s currentState=%d", __func__,
pdx->sCurrentState);
if (pdx->sCurrentState == U14ERR_TIME) /* Do nothing if hardware in trouble */
return;
-// CharRead_Cancel(pDevObject); /* Kill off any pending I/O */
+ /* Kill off any pending I/O */
+ /* CharRead_Cancel(pDevObject); */
spin_lock_irq(&pdx->charInLock);
pdx->dwNumInput = 0;
pdx->dwInBuffGet = 0;
@@ -77,11 +79,11 @@ void FlushInBuff(DEVICE_EXTENSION * pdx)
** Utility routine to copy chars into the output buffer and fire them off.
** called from user mode, holds charOutLock.
****************************************************************************/
-static int PutChars(DEVICE_EXTENSION * pdx, const char *pCh,
+static int PutChars(DEVICE_EXTENSION *pdx, const char *pCh,
unsigned int uCount)
{
int iReturn;
- spin_lock_irq(&pdx->charOutLock); // get the output spin lock
+ spin_lock_irq(&pdx->charOutLock); /* get the output spin lock */
if ((OUTBUF_SZ - pdx->dwNumOutput) >= uCount) {
unsigned int u;
for (u = 0; u < uCount; u++) {
@@ -91,9 +93,9 @@ static int PutChars(DEVICE_EXTENSION * pdx, const char *pCh,
}
pdx->dwNumOutput += uCount;
spin_unlock_irq(&pdx->charOutLock);
- iReturn = SendChars(pdx); // ...give a chance to transmit data
+ iReturn = SendChars(pdx); /* ...give a chance to transmit data */
} else {
- iReturn = U14ERR_NOOUT; // no room at the out (ha-ha)
+ iReturn = U14ERR_NOOUT; /* no room at the out (ha-ha) */
spin_unlock_irq(&pdx->charOutLock);
}
return iReturn;
@@ -104,26 +106,25 @@ static int PutChars(DEVICE_EXTENSION * pdx, const char *pCh,
** trigger an output transfer if this is appropriate. User mode.
** Holds the io_mutex
*****************************************************************************/
-int SendString(DEVICE_EXTENSION * pdx, const char __user * pData,
+int SendString(DEVICE_EXTENSION *pdx, const char __user *pData,
unsigned int n)
{
- int iReturn = U14ERR_NOERROR; // assume all will be well
- char buffer[OUTBUF_SZ + 1]; // space in our address space for characters
- if (n > OUTBUF_SZ) // check space in local buffer...
- return U14ERR_NOOUT; // ...too many characters
+ int iReturn = U14ERR_NOERROR; /* assume all will be well */
+ char buffer[OUTBUF_SZ + 1]; /* space in our address space for characters */
+ if (n > OUTBUF_SZ) /* check space in local buffer... */
+ return U14ERR_NOOUT; /* ...too many characters */
if (copy_from_user(buffer, pData, n))
return -EFAULT;
- buffer[n] = 0; // terminate for debug purposes
+ buffer[n] = 0; /* terminate for debug purposes */
- mutex_lock(&pdx->io_mutex); // Protect disconnect from new i/o
- if (n > 0) // do nothing if nowt to do!
- {
+ mutex_lock(&pdx->io_mutex); /* Protect disconnect from new i/o */
+ if (n > 0) { /* do nothing if nowt to do! */
dev_dbg(&pdx->interface->dev, "%s n=%d>%s<", __func__, n,
buffer);
iReturn = PutChars(pdx, buffer, n);
}
- Allowi(pdx); // make sure we have input int
+ Allowi(pdx); /* make sure we have input int */
mutex_unlock(&pdx->io_mutex);
return iReturn;
@@ -134,13 +135,13 @@ int SendString(DEVICE_EXTENSION * pdx, const char __user * pData,
**
** Sends a single character to the 1401. User mode, holds io_mutex.
****************************************************************************/
-int SendChar(DEVICE_EXTENSION * pdx, char c)
+int SendChar(DEVICE_EXTENSION *pdx, char c)
{
int iReturn;
- mutex_lock(&pdx->io_mutex); // Protect disconnect from new i/o
+ mutex_lock(&pdx->io_mutex); /* Protect disconnect from new i/o */
iReturn = PutChars(pdx, &c, 1);
dev_dbg(&pdx->interface->dev, "SendChar >%c< (0x%02x)", c, c);
- Allowi(pdx); // Make sure char reads are running
+ Allowi(pdx); /* Make sure char reads are running */
mutex_unlock(&pdx->io_mutex);
return iReturn;
}
@@ -171,20 +172,20 @@ int SendChar(DEVICE_EXTENSION * pdx, char c)
**
** return error code (U14ERR_NOERROR for OK)
*/
-int Get1401State(DEVICE_EXTENSION * pdx, __u32 * state, __u32 * error)
+int Get1401State(DEVICE_EXTENSION *pdx, __u32 *state, __u32 *error)
{
int nGot;
dev_dbg(&pdx->interface->dev, "Get1401State() entry");
- *state = 0xFFFFFFFF; // Start off with invalid state
+ *state = 0xFFFFFFFF; /* Start off with invalid state */
nGot = usb_control_msg(pdx->udev, usb_rcvctrlpipe(pdx->udev, 0),
GET_STATUS, (D_TO_H | VENDOR | DEVREQ), 0, 0,
pdx->statBuf, sizeof(pdx->statBuf), HZ);
if (nGot != sizeof(pdx->statBuf)) {
dev_err(&pdx->interface->dev,
"Get1401State() FAILED, return code %d", nGot);
- pdx->sCurrentState = U14ERR_TIME; // Indicate that things are very wrong indeed
- *state = 0; // Force status values to a known state
+ pdx->sCurrentState = U14ERR_TIME; /* Indicate that things are very wrong indeed */
+ *state = 0; /* Force status values to a known state */
*error = 0;
} else {
int nDevice;
@@ -192,17 +193,16 @@ int Get1401State(DEVICE_EXTENSION * pdx, __u32 * state, __u32 * error)
"Get1401State() Success, state: 0x%x, 0x%x",
pdx->statBuf[0], pdx->statBuf[1]);
- *state = pdx->statBuf[0]; // Return the state values to the calling code
+ *state = pdx->statBuf[0]; /* Return the state values to the calling code */
*error = pdx->statBuf[1];
- nDevice = pdx->udev->descriptor.bcdDevice >> 8; // 1401 type code value
- switch (nDevice) // so we can clean up current state
- {
+ nDevice = pdx->udev->descriptor.bcdDevice >> 8; /* 1401 type code value */
+ switch (nDevice) { /* so we can clean up current state */
case 0:
pdx->sCurrentState = U14ERR_U1401;
break;
- default: // allow lots of device codes for future 1401s
+ default: /* allow lots of device codes for future 1401s */
if ((nDevice >= 1) && (nDevice <= 23))
pdx->sCurrentState = (short)(nDevice + 6);
else
@@ -219,7 +219,7 @@ int Get1401State(DEVICE_EXTENSION * pdx, __u32 * state, __u32 * error)
**
** Kills off staged read\write request from the USB if one is pending.
****************************************************************************/
-int ReadWrite_Cancel(DEVICE_EXTENSION * pdx)
+int ReadWrite_Cancel(DEVICE_EXTENSION *pdx)
{
dev_dbg(&pdx->interface->dev, "ReadWrite_Cancel entry %d",
pdx->bStagedUrbPending);
@@ -227,24 +227,23 @@ int ReadWrite_Cancel(DEVICE_EXTENSION * pdx)
int ntStatus = STATUS_SUCCESS;
bool bResult = false;
unsigned int i;
- // We can fill this in when we know how we will implement the staged transfer stuff
+ /* We can fill this in when we know how we will implement the staged transfer stuff */
spin_lock_irq(&pdx->stagedLock);
- if (pdx->bStagedUrbPending) // anything to be cancelled? May need more...
- {
+ if (pdx->bStagedUrbPending) { /* anything to be cancelled? May need more... */
dev_info(&pdx->interface - dev,
"ReadWrite_Cancel about to cancel Urb");
-
- // KeClearEvent(&pdx->StagingDoneEvent); // Clear the staging done flag
+ /* Clear the staging done flag */
+ /* KeClearEvent(&pdx->StagingDoneEvent); */
USB_ASSERT(pdx->pStagedIrp != NULL);
- // Release the spinlock first otherwise the completion routine may hang
- // on the spinlock while this function hands waiting for the event.
+ /* Release the spinlock first otherwise the completion routine may hang */
+ /* on the spinlock while this function hands waiting for the event. */
spin_unlock_irq(&pdx->stagedLock);
- bResult = IoCancelIrp(pdx->pStagedIrp); // Actually do the cancel
+ bResult = IoCancelIrp(pdx->pStagedIrp); /* Actually do the cancel */
if (bResult) {
LARGE_INTEGER timeout;
- timeout.QuadPart = -10000000; // Use a timeout of 1 second
+ timeout.QuadPart = -10000000; /* Use a timeout of 1 second */
dev_info(&pdx->interface - dev,
"ReadWrite_Cancel about to wait till done");
ntStatus =
@@ -274,14 +273,14 @@ int ReadWrite_Cancel(DEVICE_EXTENSION * pdx)
** InSelfTest - utility to check in self test. Return 1 for ST, 0 for not or
** a -ve error code if we failed for some reason.
***************************************************************************/
-static int InSelfTest(DEVICE_EXTENSION * pdx, unsigned int *pState)
+static int InSelfTest(DEVICE_EXTENSION *pdx, unsigned int *pState)
{
unsigned int state, error;
- int iReturn = Get1401State(pdx, &state, &error); // see if in self-test
- if (iReturn == U14ERR_NOERROR) // if all still OK
- iReturn = (state == (unsigned int)-1) || // TX problem or...
- ((state & 0xff) == 0x80); // ...self test
- *pState = state; // return actual state
+ int iReturn = Get1401State(pdx, &state, &error); /* see if in self-test */
+ if (iReturn == U14ERR_NOERROR) /* if all still OK */
+ iReturn = (state == (unsigned int)-1) || /* TX problem or... */
+ ((state & 0xff) == 0x80); /* ...self test */
+ *pState = state; /* return actual state */
return iReturn;
}
@@ -303,48 +302,45 @@ static int InSelfTest(DEVICE_EXTENSION * pdx, unsigned int *pState)
**
** Returns TRUE if a 1401 detected and OK, else FALSE
****************************************************************************/
-bool Is1401(DEVICE_EXTENSION * pdx)
+bool Is1401(DEVICE_EXTENSION *pdx)
{
int iReturn;
dev_dbg(&pdx->interface->dev, "%s", __func__);
- ced_draw_down(pdx); // wait for, then kill outstanding Urbs
- FlushInBuff(pdx); // Clear out input buffer & pipe
- FlushOutBuff(pdx); // Clear output buffer & pipe
+ ced_draw_down(pdx); /* wait for, then kill outstanding Urbs */
+ FlushInBuff(pdx); /* Clear out input buffer & pipe */
+ FlushOutBuff(pdx); /* Clear output buffer & pipe */
- // The next call returns 0 if OK, but has returned 1 in the past, meaning that
- // usb_unlock_device() is needed... now it always is
+ /* The next call returns 0 if OK, but has returned 1 in the past, meaning that */
+ /* usb_unlock_device() is needed... now it always is */
iReturn = usb_lock_device_for_reset(pdx->udev, pdx->interface);
- // release the io_mutex because if we don't, we will deadlock due to system
- // calls back into the driver.
- mutex_unlock(&pdx->io_mutex); // locked, so we will not get system calls
- if (iReturn >= 0) // if we failed
- {
- iReturn = usb_reset_device(pdx->udev); // try to do the reset
- usb_unlock_device(pdx->udev); // undo the lock
+ /* release the io_mutex because if we don't, we will deadlock due to system */
+ /* calls back into the driver. */
+ mutex_unlock(&pdx->io_mutex); /* locked, so we will not get system calls */
+ if (iReturn >= 0) { /* if we failed */
+ iReturn = usb_reset_device(pdx->udev); /* try to do the reset */
+ usb_unlock_device(pdx->udev); /* undo the lock */
}
- mutex_lock(&pdx->io_mutex); // hold stuff off while we wait
- pdx->dwDMAFlag = MODE_CHAR; // Clear DMA mode flag regardless!
- if (iReturn == 0) // if all is OK still
- {
+ mutex_lock(&pdx->io_mutex); /* hold stuff off while we wait */
+ pdx->dwDMAFlag = MODE_CHAR; /* Clear DMA mode flag regardless! */
+ if (iReturn == 0) { /* if all is OK still */
unsigned int state;
- iReturn = InSelfTest(pdx, &state); // see if likely in self test
- if (iReturn > 0) // do we need to wait for self-test?
- {
- unsigned long ulTimeOut = jiffies + 30 * HZ; // when to give up
+ iReturn = InSelfTest(pdx, &state); /* see if likely in self test */
+ if (iReturn > 0) { /* do we need to wait for self-test? */
+ unsigned long ulTimeOut = jiffies + 30 * HZ; /* when to give up */
while ((iReturn > 0) && time_before(jiffies, ulTimeOut)) {
- schedule(); // let other stuff run
- iReturn = InSelfTest(pdx, &state); // see if done yet
+ schedule(); /* let other stuff run */
+ iReturn = InSelfTest(pdx, &state); /* see if done yet */
}
}
- if (iReturn == 0) // if all is OK...
- iReturn = state == 0; // then success is that the state is 0
+ if (iReturn == 0) /* if all is OK... */
+ iReturn = state == 0; /* then success is that the state is 0 */
} else
- iReturn = 0; // we failed
- pdx->bForceReset = false; // Clear forced reset flag now
+ iReturn = 0; /* we failed */
+ pdx->bForceReset = false; /* Clear forced reset flag now */
return iReturn > 0;
}
@@ -363,45 +359,42 @@ bool Is1401(DEVICE_EXTENSION * pdx)
**
** The return value is TRUE if a useable 1401 is found, FALSE if not
*/
-bool QuickCheck(DEVICE_EXTENSION * pdx, bool bTestBuff, bool bCanReset)
+bool QuickCheck(DEVICE_EXTENSION *pdx, bool bTestBuff, bool bCanReset)
{
- bool bRet = false; // assume it will fail and we will reset
+ bool bRet = false; /* assume it will fail and we will reset */
bool bShortTest;
- bShortTest = ((pdx->dwDMAFlag == MODE_CHAR) && // no DMA running
- (!pdx->bForceReset) && // Not had a real reset forced
- (pdx->sCurrentState >= U14ERR_STD)); // No 1401 errors stored
+ bShortTest = ((pdx->dwDMAFlag == MODE_CHAR) && /* no DMA running */
+ (!pdx->bForceReset) && /* Not had a real reset forced */
+ (pdx->sCurrentState >= U14ERR_STD)); /* No 1401 errors stored */
dev_dbg(&pdx->interface->dev,
"%s DMAFlag:%d, state:%d, force:%d, testBuff:%d, short:%d",
__func__, pdx->dwDMAFlag, pdx->sCurrentState, pdx->bForceReset,
bTestBuff, bShortTest);
- if ((bTestBuff) && // Buffer check requested, and...
- (pdx->dwNumInput || pdx->dwNumOutput)) // ...characters were in the buffer?
- {
- bShortTest = false; // Then do the full test
+ if ((bTestBuff) && /* Buffer check requested, and... */
+ (pdx->dwNumInput || pdx->dwNumOutput)) { /* ...characters were in the buffer? */
+ bShortTest = false; /* Then do the full test */
dev_dbg(&pdx->interface->dev,
"%s will reset as buffers not empty", __func__);
}
- if (bShortTest || !bCanReset) // Still OK to try the short test?
- { // Always test if no reset - we want state update
+ if (bShortTest || !bCanReset) { /* Still OK to try the short test? */
+ /* Always test if no reset - we want state update */
unsigned int state, error;
dev_dbg(&pdx->interface->dev, "%s->Get1401State", __func__);
- if (Get1401State(pdx, &state, &error) == U14ERR_NOERROR) // Check on the 1401 state
- {
- if ((state & 0xFF) == 0) // If call worked, check the status value
- bRet = true; // If that was zero, all is OK, no reset needed
+ if (Get1401State(pdx, &state, &error) == U14ERR_NOERROR) { /* Check on the 1401 state */
+ if ((state & 0xFF) == 0) /* If call worked, check the status value */
+ bRet = true; /* If that was zero, all is OK, no reset needed */
}
}
- if (!bRet && bCanReset) // If all not OK, then
- {
+ if (!bRet && bCanReset) { /* If all not OK, then */
dev_info(&pdx->interface->dev, "%s->Is1401 %d %d %d %d",
__func__, bShortTest, pdx->sCurrentState, bTestBuff,
pdx->bForceReset);
- bRet = Is1401(pdx); // do full test
+ bRet = Is1401(pdx); /* do full test */
}
return bRet;
@@ -412,11 +405,11 @@ bool QuickCheck(DEVICE_EXTENSION * pdx, bool bTestBuff, bool bCanReset)
**
** Resets the 1401 and empties the i/o buffers
*****************************************************************************/
-int Reset1401(DEVICE_EXTENSION * pdx)
+int Reset1401(DEVICE_EXTENSION *pdx)
{
- mutex_lock(&pdx->io_mutex); // Protect disconnect from new i/o
+ mutex_lock(&pdx->io_mutex); /* Protect disconnect from new i/o */
dev_dbg(&pdx->interface->dev, "ABout to call QuickCheck");
- QuickCheck(pdx, true, true); // Check 1401, reset if not OK
+ QuickCheck(pdx, true, true); /* Check 1401, reset if not OK */
mutex_unlock(&pdx->io_mutex);
return U14ERR_NOERROR;
}
@@ -426,30 +419,29 @@ int Reset1401(DEVICE_EXTENSION * pdx)
**
** Gets a single character from the 1401
****************************************************************************/
-int GetChar(DEVICE_EXTENSION * pdx)
+int GetChar(DEVICE_EXTENSION *pdx)
{
- int iReturn = U14ERR_NOIN; // assume we will get nothing
- mutex_lock(&pdx->io_mutex); // Protect disconnect from new i/o
+ int iReturn = U14ERR_NOIN; /* assume we will get nothing */
+ mutex_lock(&pdx->io_mutex); /* Protect disconnect from new i/o */
dev_dbg(&pdx->interface->dev, "GetChar");
- Allowi(pdx); // Make sure char reads are running
- SendChars(pdx); // and send any buffered chars
+ Allowi(pdx); /* Make sure char reads are running */
+ SendChars(pdx); /* and send any buffered chars */
spin_lock_irq(&pdx->charInLock);
- if (pdx->dwNumInput > 0) // worth looking
- {
+ if (pdx->dwNumInput > 0) { /* worth looking */
iReturn = pdx->inputBuffer[pdx->dwInBuffGet++];
if (pdx->dwInBuffGet >= INBUF_SZ)
pdx->dwInBuffGet = 0;
pdx->dwNumInput--;
} else
- iReturn = U14ERR_NOIN; // no input data to read
+ iReturn = U14ERR_NOIN; /* no input data to read */
spin_unlock_irq(&pdx->charInLock);
- Allowi(pdx); // Make sure char reads are running
+ Allowi(pdx); /* Make sure char reads are running */
- mutex_unlock(&pdx->io_mutex); // Protect disconnect from new i/o
+ mutex_unlock(&pdx->io_mutex); /* Protect disconnect from new i/o */
return iReturn;
}
@@ -464,46 +456,43 @@ int GetChar(DEVICE_EXTENSION * pdx)
** returns the count of characters (including the terminator, or 0 if none
** or a negative error code.
****************************************************************************/
-int GetString(DEVICE_EXTENSION * pdx, char __user * pUser, int n)
+int GetString(DEVICE_EXTENSION *pdx, char __user *pUser, int n)
{
- int nAvailable; // character in the buffer
+ int nAvailable; /* character in the buffer */
int iReturn = U14ERR_NOIN;
if (n <= 0)
return -ENOMEM;
- mutex_lock(&pdx->io_mutex); // Protect disconnect from new i/o
- Allowi(pdx); // Make sure char reads are running
- SendChars(pdx); // and send any buffered chars
+ mutex_lock(&pdx->io_mutex); /* Protect disconnect from new i/o */
+ Allowi(pdx); /* Make sure char reads are running */
+ SendChars(pdx); /* and send any buffered chars */
spin_lock_irq(&pdx->charInLock);
- nAvailable = pdx->dwNumInput; // characters available now
- if (nAvailable > n) // read max of space in pUser...
- nAvailable = n; // ...or input characters
+ nAvailable = pdx->dwNumInput; /* characters available now */
+ if (nAvailable > n) /* read max of space in pUser... */
+ nAvailable = n; /* ...or input characters */
- if (nAvailable > 0) // worth looking?
- {
- char buffer[INBUF_SZ + 1]; // space for a linear copy of data
+ if (nAvailable > 0) { /* worth looking? */
+ char buffer[INBUF_SZ + 1]; /* space for a linear copy of data */
int nGot = 0;
- int nCopyToUser; // number to copy to user
+ int nCopyToUser; /* number to copy to user */
char cData;
do {
cData = pdx->inputBuffer[pdx->dwInBuffGet++];
- if (cData == CR_CHAR) // replace CR with zero
+ if (cData == CR_CHAR) /* replace CR with zero */
cData = (char)0;
if (pdx->dwInBuffGet >= INBUF_SZ)
- pdx->dwInBuffGet = 0; // wrap buffer pointer
+ pdx->dwInBuffGet = 0; /* wrap buffer pointer */
- buffer[nGot++] = cData; // save the output
- }
- while ((nGot < nAvailable) && cData);
-
- nCopyToUser = nGot; // what to copy...
- if (cData) // do we need null
- {
- buffer[nGot] = (char)0; // make it tidy
- if (nGot < n) // if space in user buffer...
- ++nCopyToUser; // ...copy the 0 as well.
+ buffer[nGot++] = cData; /* save the output */
+ } while ((nGot < nAvailable) && cData);
+
+ nCopyToUser = nGot; /* what to copy... */
+ if (cData) { /* do we need null */
+ buffer[nGot] = (char)0; /* make it tidy */
+ if (nGot < n) /* if space in user buffer... */
+ ++nCopyToUser; /* ...copy the 0 as well. */
}
pdx->dwNumInput -= nGot;
@@ -514,12 +503,12 @@ int GetString(DEVICE_EXTENSION * pdx, char __user * pUser, int n)
if (copy_to_user(pUser, buffer, nCopyToUser))
iReturn = -EFAULT;
else
- iReturn = nGot; // report characters read
+ iReturn = nGot; /* report characters read */
} else
spin_unlock_irq(&pdx->charInLock);
- Allowi(pdx); // Make sure char reads are running
- mutex_unlock(&pdx->io_mutex); // Protect disconnect from new i/o
+ Allowi(pdx); /* Make sure char reads are running */
+ mutex_unlock(&pdx->io_mutex); /* Protect disconnect from new i/o */
return iReturn;
}
@@ -527,14 +516,14 @@ int GetString(DEVICE_EXTENSION * pdx, char __user * pUser, int n)
/*******************************************************************************
** Get count of characters in the inout buffer.
*******************************************************************************/
-int Stat1401(DEVICE_EXTENSION * pdx)
+int Stat1401(DEVICE_EXTENSION *pdx)
{
int iReturn;
- mutex_lock(&pdx->io_mutex); // Protect disconnect from new i/o
- Allowi(pdx); // make sure we allow pending chars
- SendChars(pdx); // in both directions
- iReturn = pdx->dwNumInput; // no lock as single read
- mutex_unlock(&pdx->io_mutex); // Protect disconnect from new i/o
+ mutex_lock(&pdx->io_mutex); /* Protect disconnect from new i/o */
+ Allowi(pdx); /* make sure we allow pending chars */
+ SendChars(pdx); /* in both directions */
+ iReturn = pdx->dwNumInput; /* no lock as single read */
+ mutex_unlock(&pdx->io_mutex); /* Protect disconnect from new i/o */
return iReturn;
}
@@ -545,32 +534,30 @@ int Stat1401(DEVICE_EXTENSION * pdx)
** any fancy interlocks as we only read the interrupt routine data, and the
** system is arranged so nothing can be destroyed.
****************************************************************************/
-int LineCount(DEVICE_EXTENSION * pdx)
+int LineCount(DEVICE_EXTENSION *pdx)
{
- int iReturn = 0; // will be count of line ends
+ int iReturn = 0; /* will be count of line ends */
- mutex_lock(&pdx->io_mutex); // Protect disconnect from new i/o
- Allowi(pdx); // Make sure char reads are running
- SendChars(pdx); // and send any buffered chars
- spin_lock_irq(&pdx->charInLock); // Get protection
+ mutex_lock(&pdx->io_mutex); /* Protect disconnect from new i/o */
+ Allowi(pdx); /* Make sure char reads are running */
+ SendChars(pdx); /* and send any buffered chars */
+ spin_lock_irq(&pdx->charInLock); /* Get protection */
- if (pdx->dwNumInput > 0) // worth looking?
- {
- unsigned int dwIndex = pdx->dwInBuffGet; // start at first available
- unsigned int dwEnd = pdx->dwInBuffPut; // Position for search end
+ if (pdx->dwNumInput > 0) { /* worth looking? */
+ unsigned int dwIndex = pdx->dwInBuffGet; /* start at first available */
+ unsigned int dwEnd = pdx->dwInBuffPut; /* Position for search end */
do {
if (pdx->inputBuffer[dwIndex++] == CR_CHAR)
- ++iReturn; // inc count if CR
+ ++iReturn; /* inc count if CR */
- if (dwIndex >= INBUF_SZ) // see if we fall off buff
+ if (dwIndex >= INBUF_SZ) /* see if we fall off buff */
dwIndex = 0;
- }
- while (dwIndex != dwEnd); // go to last available
+ } while (dwIndex != dwEnd); /* go to last available */
}
spin_unlock_irq(&pdx->charInLock);
dev_dbg(&pdx->interface->dev, "LineCount returned %d", iReturn);
- mutex_unlock(&pdx->io_mutex); // Protect disconnect from new i/o
+ mutex_unlock(&pdx->io_mutex); /* Protect disconnect from new i/o */
return iReturn;
}
@@ -579,14 +566,14 @@ int LineCount(DEVICE_EXTENSION * pdx)
**
** Gets the space in the output buffer. Called from user code.
*****************************************************************************/
-int GetOutBufSpace(DEVICE_EXTENSION * pdx)
+int GetOutBufSpace(DEVICE_EXTENSION *pdx)
{
int iReturn;
- mutex_lock(&pdx->io_mutex); // Protect disconnect from new i/o
- SendChars(pdx); // send any buffered chars
- iReturn = (int)(OUTBUF_SZ - pdx->dwNumOutput); // no lock needed for single read
+ mutex_lock(&pdx->io_mutex); /* Protect disconnect from new i/o */
+ SendChars(pdx); /* send any buffered chars */
+ iReturn = (int)(OUTBUF_SZ - pdx->dwNumOutput); /* no lock needed for single read */
dev_dbg(&pdx->interface->dev, "OutBufSpace %d", iReturn);
- mutex_unlock(&pdx->io_mutex); // Protect disconnect from new i/o
+ mutex_unlock(&pdx->io_mutex); /* Protect disconnect from new i/o */
return iReturn;
}
@@ -597,7 +584,7 @@ int GetOutBufSpace(DEVICE_EXTENSION * pdx)
** Clears up a transfer area. This is always called in the context of a user
** request, never from a call-back.
****************************************************************************/
-int ClearArea(DEVICE_EXTENSION * pdx, int nArea)
+int ClearArea(DEVICE_EXTENSION *pdx, int nArea)
{
int iReturn = U14ERR_NOERROR;
@@ -606,14 +593,14 @@ int ClearArea(DEVICE_EXTENSION * pdx, int nArea)
dev_err(&pdx->interface->dev, "%s Attempt to clear area %d",
__func__, nArea);
} else {
- TRANSAREA *pTA = &pdx->rTransDef[nArea]; // to save typing
- if (!pTA->bUsed) // if not used...
- iReturn = U14ERR_NOTSET; // ...nothing to be done
+ TRANSAREA *pTA = &pdx->rTransDef[nArea]; /* to save typing */
+ if (!pTA->bUsed) /* if not used... */
+ iReturn = U14ERR_NOTSET; /* ...nothing to be done */
else {
- // We must save the memory we return as we shouldn't mess with memory while
- // holding a spin lock.
- struct page **pPages = 0; // save page address list
- int nPages = 0; // and number of pages
+ /* We must save the memory we return as we shouldn't mess with memory while */
+ /* holding a spin lock. */
+ struct page **pPages = NULL; /*save page address list*/
+ int nPages = 0; /* and number of pages */
int np;
dev_dbg(&pdx->interface->dev, "%s area %d", __func__,
@@ -621,33 +608,32 @@ int ClearArea(DEVICE_EXTENSION * pdx, int nArea)
spin_lock_irq(&pdx->stagedLock);
if ((pdx->StagedId == nArea)
&& (pdx->dwDMAFlag > MODE_CHAR)) {
- iReturn = U14ERR_UNLOCKFAIL; // cannot delete as in use
+ iReturn = U14ERR_UNLOCKFAIL; /* cannot delete as in use */
dev_err(&pdx->interface->dev,
"%s call on area %d while active",
__func__, nArea);
} else {
- pPages = pTA->pPages; // save page address list
- nPages = pTA->nPages; // and page count
- if (pTA->dwEventSz) // if events flagging in use
- wake_up_interruptible(&pTA->wqEvent); // release anything that was waiting
+ pPages = pTA->pPages; /* save page address list */
+ nPages = pTA->nPages; /* and page count */
+ if (pTA->dwEventSz) /* if events flagging in use */
+ wake_up_interruptible(&pTA->wqEvent); /* release anything that was waiting */
if (pdx->bXFerWaiting
&& (pdx->rDMAInfo.wIdent == nArea))
- pdx->bXFerWaiting = false; // Cannot have pending xfer if area cleared
+ pdx->bXFerWaiting = false; /* Cannot have pending xfer if area cleared */
- // Clean out the TRANSAREA except for the wait queue, which is at the end
- // This sets bUsed to false and dwEventSz to 0 to say area not used and no events.
+ /* Clean out the TRANSAREA except for the wait queue, which is at the end */
+ /* This sets bUsed to false and dwEventSz to 0 to say area not used and no events. */
memset(pTA, 0,
sizeof(TRANSAREA) -
sizeof(wait_queue_head_t));
}
spin_unlock_irq(&pdx->stagedLock);
- if (pPages) // if we decided to release the memory
- {
- // Now we must undo the pinning down of the pages. We will assume the worst and mark
- // all the pages as dirty. Don't be tempted to move this up above as you must not be
- // holding a spin lock to do this stuff as it is not atomic.
+ if (pPages) { /* if we decided to release the memory */
+ /* Now we must undo the pinning down of the pages. We will assume the worst and mark */
+ /* all the pages as dirty. Don't be tempted to move this up above as you must not be */
+ /* holding a spin lock to do this stuff as it is not atomic. */
dev_dbg(&pdx->interface->dev, "%s nPages=%d",
__func__, nPages);
@@ -674,29 +660,29 @@ int ClearArea(DEVICE_EXTENSION * pdx, int nArea)
** Sets up a transfer area - the functional part. Called by both
** SetTransfer and SetCircular.
****************************************************************************/
-static int SetArea(DEVICE_EXTENSION * pdx, int nArea, char __user * puBuf,
+static int SetArea(DEVICE_EXTENSION *pdx, int nArea, char __user *puBuf,
unsigned int dwLength, bool bCircular, bool bCircToHost)
{
- // Start by working out the page aligned start of the area and the size
- // of the area in pages, allowing for the start not being aligned and the
- // end needing to be rounded up to a page boundary.
+ /* Start by working out the page aligned start of the area and the size */
+ /* of the area in pages, allowing for the start not being aligned and the */
+ /* end needing to be rounded up to a page boundary. */
unsigned long ulStart = ((unsigned long)puBuf) & PAGE_MASK;
unsigned int ulOffset = ((unsigned long)puBuf) & (PAGE_SIZE - 1);
int len = (dwLength + ulOffset + PAGE_SIZE - 1) >> PAGE_SHIFT;
- TRANSAREA *pTA = &pdx->rTransDef[nArea]; // to save typing
- struct page **pPages = 0; // space for page tables
- int nPages = 0; // and number of pages
+ TRANSAREA *pTA = &pdx->rTransDef[nArea]; /* to save typing */
+ struct page **pPages = NULL; /* space for page tables */
+ int nPages = 0; /* and number of pages */
- int iReturn = ClearArea(pdx, nArea); // see if OK to use this area
- if ((iReturn != U14ERR_NOTSET) && // if not area unused and...
- (iReturn != U14ERR_NOERROR)) // ...not all OK, then...
- return iReturn; // ...we cannot use this area
+ int iReturn = ClearArea(pdx, nArea); /* see if OK to use this area */
+ if ((iReturn != U14ERR_NOTSET) && /* if not area unused and... */
+ (iReturn != U14ERR_NOERROR)) /* ...not all OK, then... */
+ return iReturn; /* ...we cannot use this area */
- if (!access_ok(VERIFY_WRITE, puBuf, dwLength)) // if we cannot access the memory...
- return -EFAULT; // ...then we are done
+ if (!access_ok(VERIFY_WRITE, puBuf, dwLength)) /* if we cannot access the memory... */
+ return -EFAULT; /* ...then we are done */
- // Now allocate space to hold the page pointer and virtual address pointer tables
+ /* Now allocate space to hold the page pointer and virtual address pointer tables */
pPages = kmalloc(len * sizeof(struct page *), GFP_KERNEL);
if (!pPages) {
iReturn = U14ERR_NOMEMORY;
@@ -705,24 +691,23 @@ static int SetArea(DEVICE_EXTENSION * pdx, int nArea, char __user * puBuf,
dev_dbg(&pdx->interface->dev, "%s %p, length=%06x, circular %d",
__func__, puBuf, dwLength, bCircular);
- // To pin down user pages we must first acquire the mapping semaphore.
- down_read(&current->mm->mmap_sem); // get memory map semaphore
- nPages =
- get_user_pages(current, current->mm, ulStart, len, 1, 0, pPages, 0);
- up_read(&current->mm->mmap_sem); // release the semaphore
+ /* To pin down user pages we must first acquire the mapping semaphore. */
+ down_read(&current->mm->mmap_sem); /* get memory map semaphore */
+ nPages = get_user_pages(current, current->mm, ulStart, len, 1, 0,
+ pPages, NULL);
+ up_read(&current->mm->mmap_sem); /* release the semaphore */
dev_dbg(&pdx->interface->dev, "%s nPages = %d", __func__, nPages);
- if (nPages > 0) // if we succeeded
- {
- // If you are tempted to use page_address (form LDD3), forget it. You MUST use
- // kmap() or kmap_atomic() to get a virtual address. page_address will give you
- // (null) or at least it does in this context with an x86 machine.
+ if (nPages > 0) { /* if we succeeded */
+ /* If you are tempted to use page_address (form LDD3), forget it. You MUST use */
+ /* kmap() or kmap_atomic() to get a virtual address. page_address will give you */
+ /* (null) or at least it does in this context with an x86 machine. */
spin_lock_irq(&pdx->stagedLock);
- pTA->lpvBuff = puBuf; // keep start of region (user address)
- pTA->dwBaseOffset = ulOffset; // save offset in first page to start of xfer
- pTA->dwLength = dwLength; // Size if the region in bytes
- pTA->pPages = pPages; // list of pages that are used by buffer
- pTA->nPages = nPages; // number of pages
+ pTA->lpvBuff = puBuf; /* keep start of region (user address) */
+ pTA->dwBaseOffset = ulOffset; /* save offset in first page to start of xfer */
+ pTA->dwLength = dwLength; /* Size if the region in bytes */
+ pTA->pPages = pPages; /* list of pages that are used by buffer */
+ pTA->nPages = nPages; /* number of pages */
pTA->bCircular = bCircular;
pTA->bCircToHost = bCircToHost;
@@ -731,10 +716,10 @@ static int SetArea(DEVICE_EXTENSION * pdx, int nArea, char __user * puBuf,
pTA->aBlocks[0].dwSize = 0;
pTA->aBlocks[1].dwOffset = 0;
pTA->aBlocks[1].dwSize = 0;
- pTA->bUsed = true; // This is now a used block
+ pTA->bUsed = true; /* This is now a used block */
spin_unlock_irq(&pdx->stagedLock);
- iReturn = U14ERR_NOERROR; // say all was well
+ iReturn = U14ERR_NOERROR; /* say all was well */
} else {
iReturn = U14ERR_LOCKFAIL;
goto error;
@@ -754,7 +739,7 @@ error:
** unset it. Unsetting will fail if the area is booked, and a transfer to that
** area is in progress. Otherwise, we will release the area and re-assign it.
****************************************************************************/
-int SetTransfer(DEVICE_EXTENSION * pdx, TRANSFERDESC __user * pTD)
+int SetTransfer(DEVICE_EXTENSION *pdx, TRANSFERDESC __user *pTD)
{
int iReturn;
TRANSFERDESC td;
@@ -765,9 +750,9 @@ int SetTransfer(DEVICE_EXTENSION * pdx, TRANSFERDESC __user * pTD)
mutex_lock(&pdx->io_mutex);
dev_dbg(&pdx->interface->dev, "%s area:%d, size:%08x", __func__,
td.wAreaNum, td.dwLength);
- // The strange cast is done so that we don't get warnings in 32-bit linux about the size of the
- // pointer. The pointer is always passed as a 64-bit object so that we don't have problems using
- // a 32-bit program on a 64-bit system. unsigned long is 64-bits on a 64-bit system.
+ /* The strange cast is done so that we don't get warnings in 32-bit linux about the size of the */
+ /* pointer. The pointer is always passed as a 64-bit object so that we don't have problems using */
+ /* a 32-bit program on a 64-bit system. unsigned long is 64-bits on a 64-bit system. */
iReturn =
SetArea(pdx, td.wAreaNum,
(char __user *)((unsigned long)td.lpvBuff), td.dwLength,
@@ -780,7 +765,7 @@ int SetTransfer(DEVICE_EXTENSION * pdx, TRANSFERDESC __user * pTD)
** UnSetTransfer
** Erases a transfer area record
****************************************************************************/
-int UnsetTransfer(DEVICE_EXTENSION * pdx, int nArea)
+int UnsetTransfer(DEVICE_EXTENSION *pdx, int nArea)
{
int iReturn;
mutex_lock(&pdx->io_mutex);
@@ -797,27 +782,26 @@ int UnsetTransfer(DEVICE_EXTENSION * pdx, int nArea)
** pretend that whatever the user asked for was achieved, so we return 1 if
** try to create one, and 0 if they ask to remove (assuming all else was OK).
****************************************************************************/
-int SetEvent(DEVICE_EXTENSION * pdx, TRANSFEREVENT __user * pTE)
+int SetEvent(DEVICE_EXTENSION *pdx, TRANSFEREVENT __user *pTE)
{
int iReturn = U14ERR_NOERROR;
TRANSFEREVENT te;
- // get a local copy of the data
+ /* get a local copy of the data */
if (copy_from_user(&te, pTE, sizeof(te)))
return -EFAULT;
- if (te.wAreaNum >= MAX_TRANSAREAS) // the area must exist
+ if (te.wAreaNum >= MAX_TRANSAREAS) /* the area must exist */
return U14ERR_BADAREA;
else {
TRANSAREA *pTA = &pdx->rTransDef[te.wAreaNum];
- mutex_lock(&pdx->io_mutex); // make sure we have no competitor
+ mutex_lock(&pdx->io_mutex); /* make sure we have no competitor */
spin_lock_irq(&pdx->stagedLock);
- if (pTA->bUsed) // area must be in use
- {
- pTA->dwEventSt = te.dwStart; // set area regions
- pTA->dwEventSz = te.dwLength; // set size (0 cancels it)
- pTA->bEventToHost = te.wFlags & 1; // set the direction
- pTA->iWakeUp = 0; // zero the wake up count
+ if (pTA->bUsed) { /* area must be in use */
+ pTA->dwEventSt = te.dwStart; /* set area regions */
+ pTA->dwEventSz = te.dwLength; /* set size (0 cancels it) */
+ pTA->bEventToHost = te.wFlags & 1; /* set the direction */
+ pTA->iWakeUp = 0; /* zero the wake up count */
} else
iReturn = U14ERR_NOTSET;
spin_unlock_irq(&pdx->stagedLock);
@@ -833,7 +817,7 @@ int SetEvent(DEVICE_EXTENSION * pdx, TRANSFEREVENT __user * pTE)
** of times that a block met the event condition since we last cleared it or
** 0 if timed out, or -ve error (bad area or not set, or signal).
****************************************************************************/
-int WaitEvent(DEVICE_EXTENSION * pdx, int nArea, int msTimeOut)
+int WaitEvent(DEVICE_EXTENSION *pdx, int nArea, int msTimeOut)
{
int iReturn;
if ((unsigned)nArea >= MAX_TRANSAREAS)
@@ -841,15 +825,15 @@ int WaitEvent(DEVICE_EXTENSION * pdx, int nArea, int msTimeOut)
else {
int iWait;
TRANSAREA *pTA = &pdx->rTransDef[nArea];
- msTimeOut = (msTimeOut * HZ + 999) / 1000; // convert timeout to jiffies
-
- // We cannot wait holding the mutex, but we check the flags while holding
- // it. This may well be pointless as another thread could get in between
- // releasing it and the wait call. However, this would have to clear the
- // iWakeUp flag. However, the !pTA-bUsed may help us in this case.
- mutex_lock(&pdx->io_mutex); // make sure we have no competitor
- if (!pTA->bUsed || !pTA->dwEventSz) // check something to wait for...
- return U14ERR_NOTSET; // ...else we do nothing
+ msTimeOut = (msTimeOut * HZ + 999) / 1000; /* convert timeout to jiffies */
+
+ /* We cannot wait holding the mutex, but we check the flags while holding */
+ /* it. This may well be pointless as another thread could get in between */
+ /* releasing it and the wait call. However, this would have to clear the */
+ /* iWakeUp flag. However, the !pTA-bUsed may help us in this case. */
+ mutex_lock(&pdx->io_mutex); /* make sure we have no competitor */
+ if (!pTA->bUsed || !pTA->dwEventSz) /* check something to wait for... */
+ return U14ERR_NOTSET; /* ...else we do nothing */
mutex_unlock(&pdx->io_mutex);
if (msTimeOut)
@@ -863,12 +847,12 @@ int WaitEvent(DEVICE_EXTENSION * pdx, int nArea, int msTimeOut)
wait_event_interruptible(pTA->wqEvent, pTA->iWakeUp
|| !pTA->bUsed);
if (iWait)
- iReturn = -ERESTARTSYS; // oops - we have had a SIGNAL
+ iReturn = -ERESTARTSYS; /* oops - we have had a SIGNAL */
else
- iReturn = pTA->iWakeUp; // else the wakeup count
+ iReturn = pTA->iWakeUp; /* else the wakeup count */
spin_lock_irq(&pdx->stagedLock);
- pTA->iWakeUp = 0; // clear the flag
+ pTA->iWakeUp = 0; /* clear the flag */
spin_unlock_irq(&pdx->stagedLock);
}
return iReturn;
@@ -880,17 +864,17 @@ int WaitEvent(DEVICE_EXTENSION * pdx, int nArea, int msTimeOut)
** number of times a block completed since the last call, or 0 if none or a
** negative error.
****************************************************************************/
-int TestEvent(DEVICE_EXTENSION * pdx, int nArea)
+int TestEvent(DEVICE_EXTENSION *pdx, int nArea)
{
int iReturn;
if ((unsigned)nArea >= MAX_TRANSAREAS)
iReturn = U14ERR_BADAREA;
else {
TRANSAREA *pTA = &pdx->rTransDef[nArea];
- mutex_lock(&pdx->io_mutex); // make sure we have no competitor
+ mutex_lock(&pdx->io_mutex); /* make sure we have no competitor */
spin_lock_irq(&pdx->stagedLock);
- iReturn = pTA->iWakeUp; // get wakeup count since last call
- pTA->iWakeUp = 0; // clear the count
+ iReturn = pTA->iWakeUp; /* get wakeup count since last call */
+ pTA->iWakeUp = 0; /* clear the count */
spin_unlock_irq(&pdx->stagedLock);
mutex_unlock(&pdx->io_mutex);
}
@@ -901,17 +885,17 @@ int TestEvent(DEVICE_EXTENSION * pdx, int nArea)
** GetTransferInfo
** Puts the current state of the 1401 in a TGET_TX_BLOCK.
*****************************************************************************/
-int GetTransfer(DEVICE_EXTENSION * pdx, TGET_TX_BLOCK __user * pTX)
+int GetTransfer(DEVICE_EXTENSION *pdx, TGET_TX_BLOCK __user *pTX)
{
int iReturn = U14ERR_NOERROR;
unsigned int dwIdent;
mutex_lock(&pdx->io_mutex);
- dwIdent = pdx->StagedId; // area ident for last xfer
+ dwIdent = pdx->StagedId; /* area ident for last xfer */
if (dwIdent >= MAX_TRANSAREAS)
iReturn = U14ERR_BADAREA;
else {
- // Return the best information we have - we don't have physical addresses
+ /* Return the best information we have - we don't have physical addresses */
TGET_TX_BLOCK *tx;
tx = kzalloc(sizeof(*tx), GFP_KERNEL);
@@ -921,8 +905,8 @@ int GetTransfer(DEVICE_EXTENSION * pdx, TGET_TX_BLOCK __user * pTX)
}
tx->size = pdx->rTransDef[dwIdent].dwLength;
tx->linear = (long long)((long)pdx->rTransDef[dwIdent].lpvBuff);
- tx->avail = GET_TX_MAXENTRIES; // how many blocks we could return
- tx->used = 1; // number we actually return
+ tx->avail = GET_TX_MAXENTRIES; /* how many blocks we could return */
+ tx->used = 1; /* number we actually return */
tx->entries[0].physical =
(long long)(tx->linear + pdx->StagedOffset);
tx->entries[0].size = tx->size;
@@ -940,7 +924,7 @@ int GetTransfer(DEVICE_EXTENSION * pdx, TGET_TX_BLOCK __user * pTX)
**
** Empties the host i/o buffers
****************************************************************************/
-int KillIO1401(DEVICE_EXTENSION * pdx)
+int KillIO1401(DEVICE_EXTENSION *pdx)
{
dev_dbg(&pdx->interface->dev, "%s", __func__);
mutex_lock(&pdx->io_mutex);
@@ -955,7 +939,7 @@ int KillIO1401(DEVICE_EXTENSION * pdx)
** Returns a 0 or a 1 for whether DMA is happening. No point holding a mutex
** for this as it only does one read.
*****************************************************************************/
-int BlkTransState(DEVICE_EXTENSION * pdx)
+int BlkTransState(DEVICE_EXTENSION *pdx)
{
int iReturn = pdx->dwDMAFlag != MODE_CHAR;
dev_dbg(&pdx->interface->dev, "%s = %d", __func__, iReturn);
@@ -967,12 +951,12 @@ int BlkTransState(DEVICE_EXTENSION * pdx)
**
** Puts the current state of the 1401 in the Irp return buffer.
*****************************************************************************/
-int StateOf1401(DEVICE_EXTENSION * pdx)
+int StateOf1401(DEVICE_EXTENSION *pdx)
{
int iReturn;
mutex_lock(&pdx->io_mutex);
- QuickCheck(pdx, false, false); // get state up to date, no reset
+ QuickCheck(pdx, false, false); /* get state up to date, no reset */
iReturn = pdx->sCurrentState;
mutex_unlock(&pdx->io_mutex);
@@ -987,20 +971,23 @@ int StateOf1401(DEVICE_EXTENSION * pdx)
** Initiates a self-test cycle. The assumption is that we have no interrupts
** active, so we should make sure that this is the case.
*****************************************************************************/
-int StartSelfTest(DEVICE_EXTENSION * pdx)
+int StartSelfTest(DEVICE_EXTENSION *pdx)
{
int nGot;
mutex_lock(&pdx->io_mutex);
dev_dbg(&pdx->interface->dev, "%s", __func__);
- ced_draw_down(pdx); // wait for, then kill outstanding Urbs
- FlushInBuff(pdx); // Clear out input buffer & pipe
- FlushOutBuff(pdx); // Clear output buffer & pipe
-// ReadWrite_Cancel(pDeviceObject); /* so things stay tidy */
+ ced_draw_down(pdx); /* wait for, then kill outstanding Urbs */
+ FlushInBuff(pdx); /* Clear out input buffer & pipe */
+ FlushOutBuff(pdx); /* Clear output buffer & pipe */
+ /* so things stay tidy */
+ /* ReadWrite_Cancel(pDeviceObject); */
pdx->dwDMAFlag = MODE_CHAR; /* Clear DMA mode flags here */
- nGot = usb_control_msg(pdx->udev, usb_rcvctrlpipe(pdx->udev, 0), DB_SELFTEST, (H_TO_D | VENDOR | DEVREQ), 0, 0, 0, 0, HZ); // allow 1 second timeout
- pdx->ulSelfTestTime = jiffies + HZ * 30; // 30 seconds into the future
+ nGot = usb_control_msg(pdx->udev, usb_rcvctrlpipe(pdx->udev, 0),
+ DB_SELFTEST, (H_TO_D | VENDOR | DEVREQ),
+ 0, 0, NULL, 0, HZ); /* allow 1 second timeout */
+ pdx->ulSelfTestTime = jiffies + HZ * 30; /* 30 seconds into the future */
mutex_unlock(&pdx->io_mutex);
if (nGot < 0)
@@ -1013,53 +1000,49 @@ int StartSelfTest(DEVICE_EXTENSION * pdx)
**
** Check progress of a self-test cycle
****************************************************************************/
-int CheckSelfTest(DEVICE_EXTENSION * pdx, TGET_SELFTEST __user * pGST)
+int CheckSelfTest(DEVICE_EXTENSION *pdx, TGET_SELFTEST __user *pGST)
{
unsigned int state, error;
int iReturn;
- TGET_SELFTEST gst; // local work space
- memset(&gst, 0, sizeof(gst)); // clear out the space (sets code 0)
+ TGET_SELFTEST gst; /* local work space */
+ memset(&gst, 0, sizeof(gst)); /* clear out the space (sets code 0) */
mutex_lock(&pdx->io_mutex);
dev_dbg(&pdx->interface->dev, "%s", __func__);
iReturn = Get1401State(pdx, &state, &error);
- if (iReturn == U14ERR_NOERROR) // Only accept zero if it happens twice
+ if (iReturn == U14ERR_NOERROR) /* Only accept zero if it happens twice */
iReturn = Get1401State(pdx, &state, &error);
- if (iReturn != U14ERR_NOERROR) // Self-test can cause comms errors
- { // so we assume still testing
+ if (iReturn != U14ERR_NOERROR) { /* Self-test can cause comms errors */
+ /* so we assume still testing */
dev_err(&pdx->interface->dev,
"%s Get1401State=%d, assuming still testing", __func__,
iReturn);
- state = 0x80; // Force still-testing, no error
+ state = 0x80; /* Force still-testing, no error */
error = 0;
iReturn = U14ERR_NOERROR;
}
- if ((state == -1) && (error == -1)) // If Get1401State had problems
- {
+ if ((state == -1) && (error == -1)) { /* If Get1401State had problems */
dev_err(&pdx->interface->dev,
"%s Get1401State failed, assuming still testing",
__func__);
- state = 0x80; // Force still-testing, no error
+ state = 0x80; /* Force still-testing, no error */
error = 0;
}
- if ((state & 0xFF) == 0x80) // If we are still in self-test
- {
- if (state & 0x00FF0000) // Have we got an error?
- {
- gst.code = (state & 0x00FF0000) >> 16; // read the error code
- gst.x = error & 0x0000FFFF; // Error data X
- gst.y = (error & 0xFFFF0000) >> 16; // and data Y
+ if ((state & 0xFF) == 0x80) { /* If we are still in self-test */
+ if (state & 0x00FF0000) { /* Have we got an error? */
+ gst.code = (state & 0x00FF0000) >> 16; /* read the error code */
+ gst.x = error & 0x0000FFFF; /* Error data X */
+ gst.y = (error & 0xFFFF0000) >> 16; /* and data Y */
dev_dbg(&pdx->interface->dev, "Self-test error code %d",
gst.code);
- } else // No error, check for timeout
- {
- unsigned long ulNow = jiffies; // get current time
+ } else { /* No error, check for timeout */
+ unsigned long ulNow = jiffies; /* get current time */
if (time_after(ulNow, pdx->ulSelfTestTime)) {
- gst.code = -2; // Flag the timeout
+ gst.code = -2; /* Flag the timeout */
dev_dbg(&pdx->interface->dev,
"Self-test timed-out");
} else
@@ -1067,16 +1050,16 @@ int CheckSelfTest(DEVICE_EXTENSION * pdx, TGET_SELFTEST __user * pGST)
"Self-test on-going");
}
} else {
- gst.code = -1; // Flag the test is done
+ gst.code = -1; /* Flag the test is done */
dev_dbg(&pdx->interface->dev, "Self-test done");
}
- if (gst.code < 0) // If we have a problem or finished
- { // If using the 2890 we should reset properly
+ if (gst.code < 0) { /* If we have a problem or finished */
+ /* If using the 2890 we should reset properly */
if ((pdx->nPipes == 4) && (pdx->s1401Type <= TYPEPOWER))
- Is1401(pdx); // Get 1401 reset and OK
+ Is1401(pdx); /* Get 1401 reset and OK */
else
- QuickCheck(pdx, true, true); // Otherwise check without reset unless problems
+ QuickCheck(pdx, true, true); /* Otherwise check without reset unless problems */
}
mutex_unlock(&pdx->io_mutex);
@@ -1091,7 +1074,7 @@ int CheckSelfTest(DEVICE_EXTENSION * pdx, TGET_SELFTEST __user * pGST)
**
** Returns code for standard, plus, micro1401, power1401 or none
****************************************************************************/
-int TypeOf1401(DEVICE_EXTENSION * pdx)
+int TypeOf1401(DEVICE_EXTENSION *pdx)
{
int iReturn = TYPEUNKNOWN;
mutex_lock(&pdx->io_mutex);
@@ -1100,7 +1083,7 @@ int TypeOf1401(DEVICE_EXTENSION * pdx)
switch (pdx->s1401Type) {
case TYPE1401:
iReturn = U14ERR_STD;
- break; // Handle these types directly
+ break; /* Handle these types directly */
case TYPEPLUS:
iReturn = U14ERR_PLUS;
break;
@@ -1109,9 +1092,9 @@ int TypeOf1401(DEVICE_EXTENSION * pdx)
break;
default:
if ((pdx->s1401Type >= TYPEPOWER) && (pdx->s1401Type <= 25))
- iReturn = pdx->s1401Type + 4; // We can calculate types
- else // for up-coming 1401 designs
- iReturn = TYPEUNKNOWN; // Don't know or not there
+ iReturn = pdx->s1401Type + 4; /* We can calculate types */
+ else /* for up-coming 1401 designs */
+ iReturn = TYPEUNKNOWN; /* Don't know or not there */
}
dev_dbg(&pdx->interface->dev, "%s %d", __func__, iReturn);
mutex_unlock(&pdx->io_mutex);
@@ -1124,13 +1107,13 @@ int TypeOf1401(DEVICE_EXTENSION * pdx)
**
** Returns flags on block transfer abilities
****************************************************************************/
-int TransferFlags(DEVICE_EXTENSION * pdx)
+int TransferFlags(DEVICE_EXTENSION *pdx)
{
- int iReturn = U14TF_MULTIA | U14TF_DIAG | // we always have multiple DMA area
- U14TF_NOTIFY | U14TF_CIRCTH; // diagnostics, notify and circular
+ int iReturn = U14TF_MULTIA | U14TF_DIAG | /* we always have multiple DMA area */
+ U14TF_NOTIFY | U14TF_CIRCTH; /* diagnostics, notify and circular */
dev_dbg(&pdx->interface->dev, "%s", __func__);
mutex_lock(&pdx->io_mutex);
- if (pdx->bIsUSB2) // Set flag for USB2 if appropriate
+ if (pdx->bIsUSB2) /* Set flag for USB2 if appropriate */
iReturn |= U14TF_USB2;
mutex_unlock(&pdx->io_mutex);
@@ -1142,12 +1125,16 @@ int TransferFlags(DEVICE_EXTENSION * pdx)
** Issues a debug\diagnostic command to the 1401 along with a 32-bit datum
** This is a utility command used for dbg operations.
*/
-static int DbgCmd1401(DEVICE_EXTENSION * pdx, unsigned char cmd,
+static int DbgCmd1401(DEVICE_EXTENSION *pdx, unsigned char cmd,
unsigned int data)
{
int iReturn;
dev_dbg(&pdx->interface->dev, "%s entry", __func__);
- iReturn = usb_control_msg(pdx->udev, usb_sndctrlpipe(pdx->udev, 0), cmd, (H_TO_D | VENDOR | DEVREQ), (unsigned short)data, (unsigned short)(data >> 16), 0, 0, HZ); // allow 1 second timeout
+ iReturn = usb_control_msg(pdx->udev, usb_sndctrlpipe(pdx->udev, 0), cmd,
+ (H_TO_D | VENDOR | DEVREQ),
+ (unsigned short)data,
+ (unsigned short)(data >> 16), NULL, 0, HZ);
+ /* allow 1 second timeout */
if (iReturn < 0)
dev_err(&pdx->interface->dev, "%s fail code=%d", __func__,
iReturn);
@@ -1160,7 +1147,7 @@ static int DbgCmd1401(DEVICE_EXTENSION * pdx, unsigned char cmd,
**
** Execute the diagnostic peek operation. Uses address, width and repeats.
****************************************************************************/
-int DbgPeek(DEVICE_EXTENSION * pdx, TDBGBLOCK __user * pDB)
+int DbgPeek(DEVICE_EXTENSION *pdx, TDBGBLOCK __user *pDB)
{
int iReturn;
TDBGBLOCK db;
@@ -1189,7 +1176,7 @@ int DbgPeek(DEVICE_EXTENSION * pdx, TDBGBLOCK __user * pDB)
** Execute the diagnostic poke operation. Parameters are in the CSBLOCK struct
** in order address, size, repeats and value to poke.
****************************************************************************/
-int DbgPoke(DEVICE_EXTENSION * pdx, TDBGBLOCK __user * pDB)
+int DbgPoke(DEVICE_EXTENSION *pdx, TDBGBLOCK __user *pDB)
{
int iReturn;
TDBGBLOCK db;
@@ -1218,7 +1205,7 @@ int DbgPoke(DEVICE_EXTENSION * pdx, TDBGBLOCK __user * pDB)
** Execute the diagnostic ramp data operation. Parameters are in the CSBLOCK struct
** in order address, default, enable mask, size and repeats.
****************************************************************************/
-int DbgRampData(DEVICE_EXTENSION * pdx, TDBGBLOCK __user * pDB)
+int DbgRampData(DEVICE_EXTENSION *pdx, TDBGBLOCK __user *pDB)
{
int iReturn;
TDBGBLOCK db;
@@ -1250,7 +1237,7 @@ int DbgRampData(DEVICE_EXTENSION * pdx, TDBGBLOCK __user * pDB)
**
** Execute the diagnostic ramp address operation
****************************************************************************/
-int DbgRampAddr(DEVICE_EXTENSION * pdx, TDBGBLOCK __user * pDB)
+int DbgRampAddr(DEVICE_EXTENSION *pdx, TDBGBLOCK __user *pDB)
{
int iReturn;
TDBGBLOCK db;
@@ -1280,16 +1267,16 @@ int DbgRampAddr(DEVICE_EXTENSION * pdx, TDBGBLOCK __user * pDB)
**
** Retrieve the data resulting from the last debug Peek operation
****************************************************************************/
-int DbgGetData(DEVICE_EXTENSION * pdx, TDBGBLOCK __user * pDB)
+int DbgGetData(DEVICE_EXTENSION *pdx, TDBGBLOCK __user *pDB)
{
int iReturn;
TDBGBLOCK db;
- memset(&db, 0, sizeof(db)); // fill returned block with 0s
+ memset(&db, 0, sizeof(db)); /* fill returned block with 0s */
mutex_lock(&pdx->io_mutex);
dev_dbg(&pdx->interface->dev, "%s", __func__);
- // Read back the last peeked value from the 1401.
+ /* Read back the last peeked value from the 1401. */
iReturn = usb_control_msg(pdx->udev, usb_rcvctrlpipe(pdx->udev, 0),
DB_DATA, (D_TO_H | VENDOR | DEVREQ), 0, 0,
&db.iData, sizeof(db.iData), HZ);
@@ -1313,7 +1300,7 @@ int DbgGetData(DEVICE_EXTENSION * pdx, TDBGBLOCK __user * pDB)
** Stop any never-ending debug loop, we just call Get1401State for USB
**
****************************************************************************/
-int DbgStopLoop(DEVICE_EXTENSION * pdx)
+int DbgStopLoop(DEVICE_EXTENSION *pdx)
{
int iReturn;
unsigned int uState, uErr;
@@ -1334,7 +1321,7 @@ int DbgStopLoop(DEVICE_EXTENSION * pdx)
** booked and a transfer to that area is in progress. Otherwise, we will
** release the area and re-assign it.
****************************************************************************/
-int SetCircular(DEVICE_EXTENSION * pdx, TRANSFERDESC __user * pTD)
+int SetCircular(DEVICE_EXTENSION *pdx, TRANSFERDESC __user *pTD)
{
int iReturn;
bool bToHost;
@@ -1346,11 +1333,11 @@ int SetCircular(DEVICE_EXTENSION * pdx, TRANSFERDESC __user * pTD)
mutex_lock(&pdx->io_mutex);
dev_dbg(&pdx->interface->dev, "%s area:%d, size:%08x", __func__,
td.wAreaNum, td.dwLength);
- bToHost = td.eSize != 0; // this is used as the tohost flag
+ bToHost = td.eSize != 0; /* this is used as the tohost flag */
- // The strange cast is done so that we don't get warnings in 32-bit linux about the size of the
- // pointer. The pointer is always passed as a 64-bit object so that we don't have problems using
- // a 32-bit program on a 64-bit system. unsigned long is 64-bits on a 64-bit system.
+ /* The strange cast is done so that we don't get warnings in 32-bit linux about the size of the */
+ /* pointer. The pointer is always passed as a 64-bit object so that we don't have problems using */
+ /* a 32-bit program on a 64-bit system. unsigned long is 64-bits on a 64-bit system. */
iReturn =
SetArea(pdx, td.wAreaNum,
(char __user *)((unsigned long)td.lpvBuff), td.dwLength,
@@ -1364,7 +1351,7 @@ int SetCircular(DEVICE_EXTENSION * pdx, TRANSFERDESC __user * pTD)
**
** Return the next available block of circularly-transferred data.
****************************************************************************/
-int GetCircBlock(DEVICE_EXTENSION * pdx, TCIRCBLOCK __user * pCB)
+int GetCircBlock(DEVICE_EXTENSION *pdx, TCIRCBLOCK __user *pCB)
{
int iReturn = U14ERR_NOERROR;
unsigned int nArea;
@@ -1377,20 +1364,17 @@ int GetCircBlock(DEVICE_EXTENSION * pdx, TCIRCBLOCK __user * pCB)
mutex_lock(&pdx->io_mutex);
- nArea = cb.nArea; // Retrieve parameters first
- cb.dwOffset = 0; // set default result (nothing)
+ nArea = cb.nArea; /* Retrieve parameters first */
+ cb.dwOffset = 0; /* set default result (nothing) */
cb.dwSize = 0;
- if (nArea < MAX_TRANSAREAS) // The area number must be OK
- {
- TRANSAREA *pArea = &pdx->rTransDef[nArea]; // Pointer to relevant info
- spin_lock_irq(&pdx->stagedLock); // Lock others out
+ if (nArea < MAX_TRANSAREAS) { /* The area number must be OK */
+ TRANSAREA *pArea = &pdx->rTransDef[nArea]; /* Pointer to relevant info */
+ spin_lock_irq(&pdx->stagedLock); /* Lock others out */
- if ((pArea->bUsed) && (pArea->bCircular) && // Must be circular area
- (pArea->bCircToHost)) // For now at least must be to host
- {
- if (pArea->aBlocks[0].dwSize > 0) // Got anything?
- {
+ if ((pArea->bUsed) && (pArea->bCircular) && /* Must be circular area */
+ (pArea->bCircToHost)) { /* For now at least must be to host */
+ if (pArea->aBlocks[0].dwSize > 0) { /* Got anything? */
cb.dwOffset = pArea->aBlocks[0].dwOffset;
cb.dwSize = pArea->aBlocks[0].dwSize;
dev_dbg(&pdx->interface->dev,
@@ -1416,7 +1400,7 @@ int GetCircBlock(DEVICE_EXTENSION * pdx, TCIRCBLOCK __user * pCB)
**
** Frees a block of circularly-transferred data and returns the next one.
****************************************************************************/
-int FreeCircBlock(DEVICE_EXTENSION * pdx, TCIRCBLOCK __user * pCB)
+int FreeCircBlock(DEVICE_EXTENSION *pdx, TCIRCBLOCK __user *pCB)
{
int iReturn = U14ERR_NOERROR;
unsigned int nArea, uStart, uSize;
@@ -1429,33 +1413,28 @@ int FreeCircBlock(DEVICE_EXTENSION * pdx, TCIRCBLOCK __user * pCB)
mutex_lock(&pdx->io_mutex);
- nArea = cb.nArea; // Retrieve parameters first
+ nArea = cb.nArea; /* Retrieve parameters first */
uStart = cb.dwOffset;
uSize = cb.dwSize;
- cb.dwOffset = 0; // then set default result (nothing)
+ cb.dwOffset = 0; /* then set default result (nothing) */
cb.dwSize = 0;
- if (nArea < MAX_TRANSAREAS) // The area number must be OK
- {
- TRANSAREA *pArea = &pdx->rTransDef[nArea]; // Pointer to relevant info
- spin_lock_irq(&pdx->stagedLock); // Lock others out
+ if (nArea < MAX_TRANSAREAS) { /* The area number must be OK */
+ TRANSAREA *pArea = &pdx->rTransDef[nArea]; /* Pointer to relevant info */
+ spin_lock_irq(&pdx->stagedLock); /* Lock others out */
- if ((pArea->bUsed) && (pArea->bCircular) && // Must be circular area
- (pArea->bCircToHost)) // For now at least must be to host
- {
+ if ((pArea->bUsed) && (pArea->bCircular) && /* Must be circular area */
+ (pArea->bCircToHost)) { /* For now at least must be to host */
bool bWaiting = false;
- if ((pArea->aBlocks[0].dwSize >= uSize) && // Got anything?
- (pArea->aBlocks[0].dwOffset == uStart)) // Must be legal data
- {
+ if ((pArea->aBlocks[0].dwSize >= uSize) && /* Got anything? */
+ (pArea->aBlocks[0].dwOffset == uStart)) { /* Must be legal data */
pArea->aBlocks[0].dwSize -= uSize;
pArea->aBlocks[0].dwOffset += uSize;
- if (pArea->aBlocks[0].dwSize == 0) // Have we emptied this block?
- {
- if (pArea->aBlocks[1].dwSize) // Is there a second block?
- {
- pArea->aBlocks[0] = pArea->aBlocks[1]; // Copy down block 2 data
- pArea->aBlocks[1].dwSize = 0; // and mark the second block as unused
+ if (pArea->aBlocks[0].dwSize == 0) { /* Have we emptied this block? */
+ if (pArea->aBlocks[1].dwSize) { /* Is there a second block? */
+ pArea->aBlocks[0] = pArea->aBlocks[1]; /* Copy down block 2 data */
+ pArea->aBlocks[1].dwSize = 0; /* and mark the second block as unused */
pArea->aBlocks[1].dwOffset = 0;
} else
pArea->aBlocks[0].dwOffset = 0;
@@ -1468,9 +1447,8 @@ int FreeCircBlock(DEVICE_EXTENSION * pdx, TCIRCBLOCK __user * pCB)
pArea->aBlocks[0].dwOffset,
pdx->bXFerWaiting);
- // Return the next available block of memory as well
- if (pArea->aBlocks[0].dwSize > 0) // Got anything?
- {
+ /* Return the next available block of memory as well */
+ if (pArea->aBlocks[0].dwSize > 0) { /* Got anything? */
cb.dwOffset =
pArea->aBlocks[0].dwOffset;
cb.dwSize = pArea->aBlocks[0].dwSize;
@@ -1492,9 +1470,8 @@ int FreeCircBlock(DEVICE_EXTENSION * pdx, TCIRCBLOCK __user * pCB)
iReturn = U14ERR_NOMEMORY;
}
- // If we have one, kick off pending transfer
- if (bWaiting) // Got a block xfer waiting?
- {
+ /* If we have one, kick off pending transfer */
+ if (bWaiting) { /* Got a block xfer waiting? */
int RWMStat =
ReadWriteMem(pdx, !pdx->rDMAInfo.bOutWard,
pdx->rDMAInfo.wIdent,
diff --git a/drivers/staging/ced1401/ced_ioctl.h b/drivers/staging/ced1401/ced_ioctl.h
index 0895c9414b4f..aa68878bd251 100644
--- a/drivers/staging/ced1401/ced_ioctl.h
+++ b/drivers/staging/ced1401/ced_ioctl.h
@@ -35,7 +35,7 @@ typedef struct TransferDesc {
short eSize; /* element size - is tohost flag for circular */
} TRANSFERDESC;
-typedef TRANSFERDESC * LPTRANSFERDESC;
+typedef TRANSFERDESC *LPTRANSFERDESC;
typedef struct TransferEvent {
unsigned int dwStart; /* offset into the area */
diff --git a/drivers/staging/ced1401/machine.h b/drivers/staging/ced1401/machine.h
index af073790b942..dbd4036d9bdd 100644
--- a/drivers/staging/ced1401/machine.h
+++ b/drivers/staging/ced1401/machine.h
@@ -77,20 +77,13 @@
#endif
#if defined(LINUX) || defined(MAXOSX)
- #define FAR
+ #define FAR
- typedef int BOOL; // To match Windows
- typedef char * LPSTR;
- typedef const char * LPCSTR;
- typedef unsigned short WORD;
- typedef unsigned int DWORD;
- typedef unsigned char BYTE;
- typedef BYTE BOOLEAN;
- typedef unsigned char UCHAR;
- #define __packed __attribute__((packed))
- typedef BYTE * LPBYTE;
- #define HIWORD(x) (WORD)(((x)>>16) & 0xffff)
- #define LOWORD(x) (WORD)((x) & 0xffff)
+ typedef int BOOL; /* To match Windows */
+ typedef unsigned char BYTE;
+ #define __packed __attribute__((packed))
+ #define HIWORD(x) (unsigned short)(((x)>>16) & 0xffff)
+ #define LOWORD(x) (unsigned short)((x) & 0xffff)
#endif
#ifdef _IS_WINDOWS_
@@ -104,21 +97,20 @@
** a synonym.
*/
#ifdef GNUC
- #define DllExport __attribute__((dllexport))
- #define DllImport __attribute__((dllimport))
+ #define DllExport __attribute__((dllexport))
+ #define DllImport __attribute__((dllimport))
#endif
#ifndef DllExport
#ifdef _IS_WINDOWS_
- #define DllExport __declspec(dllexport)
- #define DllImport __declspec(dllimport)
+ #define DllExport __declspec(dllexport)
+ #define DllImport __declspec(dllimport)
#else
- #define DllExport
- #define DllImport
+ #define DllExport
+ #define DllImport
#endif
#endif /* _IS_WINDOWS_ */
-
#ifndef TRUE
#define TRUE 1
#define FALSE 0
diff --git a/drivers/staging/ced1401/usb1401.c b/drivers/staging/ced1401/usb1401.c
index 254131d8be5f..97c55f9e5151 100644
--- a/drivers/staging/ced1401/usb1401.c
+++ b/drivers/staging/ced1401/usb1401.c
@@ -126,18 +126,18 @@ static void ced_delete(struct kref *kref)
{
DEVICE_EXTENSION *pdx = to_DEVICE_EXTENSION(kref);
- // Free up the output buffer, then free the output urb. Note that the interface member
- // of pdx will probably be NULL, so cannot be used to get to dev.
+ /* Free up the output buffer, then free the output urb. Note that the interface member */
+ /* of pdx will probably be NULL, so cannot be used to get to dev. */
usb_free_coherent(pdx->udev, OUTBUF_SZ, pdx->pCoherCharOut,
pdx->pUrbCharOut->transfer_dma);
usb_free_urb(pdx->pUrbCharOut);
- // Do the same for chan input
+ /* Do the same for chan input */
usb_free_coherent(pdx->udev, INBUF_SZ, pdx->pCoherCharIn,
pdx->pUrbCharIn->transfer_dma);
usb_free_urb(pdx->pUrbCharIn);
- // Do the same for the block transfers
+ /* Do the same for the block transfers */
usb_free_coherent(pdx->udev, STAGED_SZ, pdx->pCoherStagedIO,
pdx->pStagedUrb->transfer_dma);
usb_free_urb(pdx->pStagedUrb);
@@ -146,7 +146,7 @@ static void ced_delete(struct kref *kref)
kfree(pdx);
}
-// This is the driver end of the open() call from user space.
+/* This is the driver end of the open() call from user space. */
static int ced_open(struct inode *inode, struct file *file)
{
DEVICE_EXTENSION *pdx;
@@ -184,7 +184,7 @@ static int ced_open(struct inode *inode, struct file *file)
kref_put(&pdx->kref, ced_delete);
goto exit;
}
- } else { //uncomment this block if you want exclusive open
+ } else { /* uncomment this block if you want exclusive open */
dev_err(&interface->dev, "%s fail: already open", __func__);
retval = -EBUSY;
pdx->open_count--;
@@ -210,11 +210,11 @@ static int ced_release(struct inode *inode, struct file *file)
dev_dbg(&pdx->interface->dev, "%s called", __func__);
mutex_lock(&pdx->io_mutex);
- if (!--pdx->open_count && pdx->interface) // Allow autosuspend
+ if (!--pdx->open_count && pdx->interface) /* Allow autosuspend */
usb_autopm_put_interface(pdx->interface);
mutex_unlock(&pdx->io_mutex);
- kref_put(&pdx->kref, ced_delete); // decrement the count on our device
+ kref_put(&pdx->kref, ced_delete); /* decrement the count on our device */
return 0;
}
@@ -252,9 +252,9 @@ static int ced_flush(struct file *file, fl_owner_t id)
** not help with a device extension held by a file.
** return true if can accept new io requests, else false
*/
-static bool CanAcceptIoRequests(DEVICE_EXTENSION * pdx)
+static bool CanAcceptIoRequests(DEVICE_EXTENSION *pdx)
{
- return pdx && pdx->interface; // Can we accept IO requests
+ return pdx && pdx->interface; /* Can we accept IO requests */
}
/****************************************************************************
@@ -264,9 +264,9 @@ static bool CanAcceptIoRequests(DEVICE_EXTENSION * pdx)
static void ced_writechar_callback(struct urb *pUrb)
{
DEVICE_EXTENSION *pdx = pUrb->context;
- int nGot = pUrb->actual_length; // what we transferred
+ int nGot = pUrb->actual_length; /* what we transferred */
- if (pUrb->status) { // sync/async unlink faults aren't errors
+ if (pUrb->status) { /* sync/async unlink faults aren't errors */
if (!
(pUrb->status == -ENOENT || pUrb->status == -ECONNRESET
|| pUrb->status == -ESHUTDOWN)) {
@@ -278,36 +278,35 @@ static void ced_writechar_callback(struct urb *pUrb)
spin_lock(&pdx->err_lock);
pdx->errors = pUrb->status;
spin_unlock(&pdx->err_lock);
- nGot = 0; // and tidy up again if so
+ nGot = 0; /* and tidy up again if so */
- spin_lock(&pdx->charOutLock); // already at irq level
- pdx->dwOutBuffGet = 0; // Reset the output buffer
+ spin_lock(&pdx->charOutLock); /* already at irq level */
+ pdx->dwOutBuffGet = 0; /* Reset the output buffer */
pdx->dwOutBuffPut = 0;
- pdx->dwNumOutput = 0; // Clear the char count
- pdx->bPipeError[0] = 1; // Flag an error for later
- pdx->bSendCharsPending = false; // Allow other threads again
- spin_unlock(&pdx->charOutLock); // already at irq level
+ pdx->dwNumOutput = 0; /* Clear the char count */
+ pdx->bPipeError[0] = 1; /* Flag an error for later */
+ pdx->bSendCharsPending = false; /* Allow other threads again */
+ spin_unlock(&pdx->charOutLock); /* already at irq level */
dev_dbg(&pdx->interface->dev,
"%s - char out done, 0 chars sent", __func__);
} else {
dev_dbg(&pdx->interface->dev,
"%s - char out done, %d chars sent", __func__, nGot);
- spin_lock(&pdx->charOutLock); // already at irq level
- pdx->dwNumOutput -= nGot; // Now adjust the char send buffer
- pdx->dwOutBuffGet += nGot; // to match what we did
- if (pdx->dwOutBuffGet >= OUTBUF_SZ) // Can't do this any earlier as data could be overwritten
+ spin_lock(&pdx->charOutLock); /* already at irq level */
+ pdx->dwNumOutput -= nGot; /* Now adjust the char send buffer */
+ pdx->dwOutBuffGet += nGot; /* to match what we did */
+ if (pdx->dwOutBuffGet >= OUTBUF_SZ) /* Can't do this any earlier as data could be overwritten */
pdx->dwOutBuffGet = 0;
- if (pdx->dwNumOutput > 0) // if more to be done...
- {
- int nPipe = 0; // The pipe number to use
+ if (pdx->dwNumOutput > 0) { /* if more to be done... */
+ int nPipe = 0; /* The pipe number to use */
int iReturn;
char *pDat = &pdx->outputBuffer[pdx->dwOutBuffGet];
- unsigned int dwCount = pdx->dwNumOutput; // maximum to send
- if ((pdx->dwOutBuffGet + dwCount) > OUTBUF_SZ) // does it cross buffer end?
+ unsigned int dwCount = pdx->dwNumOutput; /* maximum to send */
+ if ((pdx->dwOutBuffGet + dwCount) > OUTBUF_SZ) /* does it cross buffer end? */
dwCount = OUTBUF_SZ - pdx->dwOutBuffGet;
- spin_unlock(&pdx->charOutLock); // we are done with stuff that changes
- memcpy(pdx->pCoherCharOut, pDat, dwCount); // copy output data to the buffer
+ spin_unlock(&pdx->charOutLock); /* we are done with stuff that changes */
+ memcpy(pdx->pCoherCharOut, pDat, dwCount); /* copy output data to the buffer */
usb_fill_bulk_urb(pdx->pUrbCharOut, pdx->udev,
usb_sndbulkpipe(pdx->udev,
pdx->epAddr[0]),
@@ -315,22 +314,22 @@ static void ced_writechar_callback(struct urb *pUrb)
ced_writechar_callback, pdx);
pdx->pUrbCharOut->transfer_flags |=
URB_NO_TRANSFER_DMA_MAP;
- usb_anchor_urb(pdx->pUrbCharOut, &pdx->submitted); // in case we need to kill it
+ usb_anchor_urb(pdx->pUrbCharOut, &pdx->submitted); /* in case we need to kill it */
iReturn = usb_submit_urb(pdx->pUrbCharOut, GFP_ATOMIC);
dev_dbg(&pdx->interface->dev, "%s n=%d>%s<", __func__,
dwCount, pDat);
- spin_lock(&pdx->charOutLock); // grab lock for errors
+ spin_lock(&pdx->charOutLock); /* grab lock for errors */
if (iReturn) {
- pdx->bPipeError[nPipe] = 1; // Flag an error to be handled later
- pdx->bSendCharsPending = false; // Allow other threads again
+ pdx->bPipeError[nPipe] = 1; /* Flag an error to be handled later */
+ pdx->bSendCharsPending = false; /* Allow other threads again */
usb_unanchor_urb(pdx->pUrbCharOut);
dev_err(&pdx->interface->dev,
"%s usb_submit_urb() returned %d",
__func__, iReturn);
}
} else
- pdx->bSendCharsPending = false; // Allow other threads again
- spin_unlock(&pdx->charOutLock); // already at irq level
+ pdx->bSendCharsPending = false; /* Allow other threads again */
+ spin_unlock(&pdx->charOutLock); /* already at irq level */
}
}
@@ -339,44 +338,43 @@ static void ced_writechar_callback(struct urb *pUrb)
** Transmit the characters in the output buffer to the 1401. This may need
** breaking down into multiple transfers.
****************************************************************************/
-int SendChars(DEVICE_EXTENSION * pdx)
+int SendChars(DEVICE_EXTENSION *pdx)
{
int iReturn = U14ERR_NOERROR;
- spin_lock_irq(&pdx->charOutLock); // Protect ourselves
+ spin_lock_irq(&pdx->charOutLock); /* Protect ourselves */
- if ((!pdx->bSendCharsPending) && // Not currently sending
- (pdx->dwNumOutput > 0) && // has characters to output
- (CanAcceptIoRequests(pdx))) // and current activity is OK
- {
- unsigned int dwCount = pdx->dwNumOutput; // Get a copy of the character count
- pdx->bSendCharsPending = true; // Set flag to lock out other threads
+ if ((!pdx->bSendCharsPending) && /* Not currently sending */
+ (pdx->dwNumOutput > 0) && /* has characters to output */
+ (CanAcceptIoRequests(pdx))) { /* and current activity is OK */
+ unsigned int dwCount = pdx->dwNumOutput; /* Get a copy of the character count */
+ pdx->bSendCharsPending = true; /* Set flag to lock out other threads */
dev_dbg(&pdx->interface->dev,
"Send %d chars to 1401, EP0 flag %d\n", dwCount,
pdx->nPipes == 3);
- // If we have only 3 end points we must send the characters to the 1401 using EP0.
+ /* If we have only 3 end points we must send the characters to the 1401 using EP0. */
if (pdx->nPipes == 3) {
- // For EP0 character transmissions to the 1401, we have to hang about until they
- // are gone, as otherwise without more character IO activity they will never go.
- unsigned int count = dwCount; // Local char counter
- unsigned int index = 0; // The index into the char buffer
+ /* For EP0 character transmissions to the 1401, we have to hang about until they */
+ /* are gone, as otherwise without more character IO activity they will never go. */
+ unsigned int count = dwCount; /* Local char counter */
+ unsigned int index = 0; /* The index into the char buffer */
- spin_unlock_irq(&pdx->charOutLock); // Free spinlock as we call USBD
+ spin_unlock_irq(&pdx->charOutLock); /* Free spinlock as we call USBD */
while ((count > 0) && (iReturn == U14ERR_NOERROR)) {
- // We have to break the transfer up into 64-byte chunks because of a 2270 problem
- int n = count > 64 ? 64 : count; // Chars for this xfer, max of 64
+ /* We have to break the transfer up into 64-byte chunks because of a 2270 problem */
+ int n = count > 64 ? 64 : count; /* Chars for this xfer, max of 64 */
int nSent = usb_control_msg(pdx->udev,
- usb_sndctrlpipe(pdx->udev, 0), // use end point 0
- DB_CHARS, // bRequest
- (H_TO_D | VENDOR | DEVREQ), // to the device, vendor request to the device
- 0, 0, // value and index are both 0
- &pdx->outputBuffer[index], // where to send from
- n, // how much to send
- 1000); // timeout in jiffies
+ usb_sndctrlpipe(pdx->udev, 0), /* use end point 0 */
+ DB_CHARS, /* bRequest */
+ (H_TO_D | VENDOR | DEVREQ), /* to the device, vendor request to the device */
+ 0, 0, /* value and index are both 0 */
+ &pdx->outputBuffer[index], /* where to send from */
+ n, /* how much to send */
+ 1000); /* timeout in jiffies */
if (nSent <= 0) {
- iReturn = nSent ? nSent : -ETIMEDOUT; // if 0 chars says we timed out
+ iReturn = nSent ? nSent : -ETIMEDOUT; /* if 0 chars says we timed out */
dev_err(&pdx->interface->dev,
"Send %d chars by EP0 failed: %d",
n, iReturn);
@@ -388,19 +386,19 @@ int SendChars(DEVICE_EXTENSION * pdx)
}
}
- spin_lock_irq(&pdx->charOutLock); // Protect pdx changes, released by general code
- pdx->dwOutBuffGet = 0; // so reset the output buffer
+ spin_lock_irq(&pdx->charOutLock); /* Protect pdx changes, released by general code */
+ pdx->dwOutBuffGet = 0; /* so reset the output buffer */
pdx->dwOutBuffPut = 0;
- pdx->dwNumOutput = 0; // and clear the buffer count
- pdx->bSendCharsPending = false; // Allow other threads again
- } else { // Here for sending chars normally - we hold the spin lock
- int nPipe = 0; // The pipe number to use
+ pdx->dwNumOutput = 0; /* and clear the buffer count */
+ pdx->bSendCharsPending = false; /* Allow other threads again */
+ } else { /* Here for sending chars normally - we hold the spin lock */
+ int nPipe = 0; /* The pipe number to use */
char *pDat = &pdx->outputBuffer[pdx->dwOutBuffGet];
- if ((pdx->dwOutBuffGet + dwCount) > OUTBUF_SZ) // does it cross buffer end?
+ if ((pdx->dwOutBuffGet + dwCount) > OUTBUF_SZ) /* does it cross buffer end? */
dwCount = OUTBUF_SZ - pdx->dwOutBuffGet;
- spin_unlock_irq(&pdx->charOutLock); // we are done with stuff that changes
- memcpy(pdx->pCoherCharOut, pDat, dwCount); // copy output data to the buffer
+ spin_unlock_irq(&pdx->charOutLock); /* we are done with stuff that changes */
+ memcpy(pdx->pCoherCharOut, pDat, dwCount); /* copy output data to the buffer */
usb_fill_bulk_urb(pdx->pUrbCharOut, pdx->udev,
usb_sndbulkpipe(pdx->udev,
pdx->epAddr[0]),
@@ -410,11 +408,11 @@ int SendChars(DEVICE_EXTENSION * pdx)
URB_NO_TRANSFER_DMA_MAP;
usb_anchor_urb(pdx->pUrbCharOut, &pdx->submitted);
iReturn = usb_submit_urb(pdx->pUrbCharOut, GFP_KERNEL);
- spin_lock_irq(&pdx->charOutLock); // grab lock for errors
+ spin_lock_irq(&pdx->charOutLock); /* grab lock for errors */
if (iReturn) {
- pdx->bPipeError[nPipe] = 1; // Flag an error to be handled later
- pdx->bSendCharsPending = false; // Allow other threads again
- usb_unanchor_urb(pdx->pUrbCharOut); // remove from list of active urbs
+ pdx->bPipeError[nPipe] = 1; /* Flag an error to be handled later */
+ pdx->bSendCharsPending = false; /* Allow other threads again */
+ usb_unanchor_urb(pdx->pUrbCharOut); /* remove from list of active urbs */
}
}
} else if (pdx->bSendCharsPending && (pdx->dwNumOutput > 0))
@@ -422,7 +420,7 @@ int SendChars(DEVICE_EXTENSION * pdx)
"SendChars bSendCharsPending:true");
dev_dbg(&pdx->interface->dev, "SendChars exit code: %d", iReturn);
- spin_unlock_irq(&pdx->charOutLock); // Now let go of the spinlock
+ spin_unlock_irq(&pdx->charOutLock); /* Now let go of the spinlock */
return iReturn;
}
@@ -440,14 +438,14 @@ int SendChars(DEVICE_EXTENSION * pdx)
** pdx Is our device extension which holds all we know about the transfer.
** n The number of bytes to move one way or the other.
***************************************************************************/
-static void CopyUserSpace(DEVICE_EXTENSION * pdx, int n)
+static void CopyUserSpace(DEVICE_EXTENSION *pdx, int n)
{
unsigned int nArea = pdx->StagedId;
if (nArea < MAX_TRANSAREAS) {
- TRANSAREA *pArea = &pdx->rTransDef[nArea]; // area to be used
+ TRANSAREA *pArea = &pdx->rTransDef[nArea]; /* area to be used */
unsigned int dwOffset =
pdx->StagedDone + pdx->StagedOffset + pArea->dwBaseOffset;
- char *pCoherBuf = pdx->pCoherStagedIO; // coherent buffer
+ char *pCoherBuf = pdx->pCoherStagedIO; /* coherent buffer */
if (!pArea->bUsed) {
dev_err(&pdx->interface->dev, "%s area %d unused",
__func__, nArea);
@@ -455,15 +453,15 @@ static void CopyUserSpace(DEVICE_EXTENSION * pdx, int n)
}
while (n) {
- int nPage = dwOffset >> PAGE_SHIFT; // page number in table
+ int nPage = dwOffset >> PAGE_SHIFT; /* page number in table */
if (nPage < pArea->nPages) {
char *pvAddress =
(char *)kmap_atomic(pArea->pPages[nPage]);
if (pvAddress) {
- unsigned int uiPageOff = dwOffset & (PAGE_SIZE - 1); // offset into the page
- size_t uiXfer = PAGE_SIZE - uiPageOff; // max to transfer on this page
- if (uiXfer > n) // limit byte count if too much
- uiXfer = n; // for the page
+ unsigned int uiPageOff = dwOffset & (PAGE_SIZE - 1); /* offset into the page */
+ size_t uiXfer = PAGE_SIZE - uiPageOff; /* max to transfer on this page */
+ if (uiXfer > n) /* limit byte count if too much */
+ uiXfer = n; /* for the page */
if (pdx->StagedRead)
memcpy(pvAddress + uiPageOff,
pCoherBuf, uiXfer);
@@ -494,8 +492,8 @@ static void CopyUserSpace(DEVICE_EXTENSION * pdx, int n)
nArea);
}
-// Forward declarations for stuff used circularly
-static int StageChunk(DEVICE_EXTENSION * pdx);
+/* Forward declarations for stuff used circularly */
+static int StageChunk(DEVICE_EXTENSION *pdx);
/***************************************************************************
** ReadWrite_Complete
**
@@ -504,14 +502,14 @@ static int StageChunk(DEVICE_EXTENSION * pdx);
static void staged_callback(struct urb *pUrb)
{
DEVICE_EXTENSION *pdx = pUrb->context;
- unsigned int nGot = pUrb->actual_length; // what we transferred
+ unsigned int nGot = pUrb->actual_length; /* what we transferred */
bool bCancel = false;
- bool bRestartCharInput; // used at the end
+ bool bRestartCharInput; /* used at the end */
- spin_lock(&pdx->stagedLock); // stop ReadWriteMem() action while this routine is running
- pdx->bStagedUrbPending = false; // clear the flag for staged IRP pending
+ spin_lock(&pdx->stagedLock); /* stop ReadWriteMem() action while this routine is running */
+ pdx->bStagedUrbPending = false; /* clear the flag for staged IRP pending */
- if (pUrb->status) { // sync/async unlink faults aren't errors
+ if (pUrb->status) { /* sync/async unlink faults aren't errors */
if (!
(pUrb->status == -ENOENT || pUrb->status == -ECONNRESET
|| pUrb->status == -ESHUTDOWN)) {
@@ -525,40 +523,37 @@ static void staged_callback(struct urb *pUrb)
spin_lock(&pdx->err_lock);
pdx->errors = pUrb->status;
spin_unlock(&pdx->err_lock);
- nGot = 0; // and tidy up again if so
+ nGot = 0; /* and tidy up again if so */
bCancel = true;
} else {
dev_dbg(&pdx->interface->dev, "%s %d chars xferred", __func__,
nGot);
- if (pdx->StagedRead) // if reading, save to user space
- CopyUserSpace(pdx, nGot); // copy from buffer to user
+ if (pdx->StagedRead) /* if reading, save to user space */
+ CopyUserSpace(pdx, nGot); /* copy from buffer to user */
if (nGot == 0)
dev_dbg(&pdx->interface->dev, "%s ZLP", __func__);
}
- // Update the transfer length based on the TransferBufferLength value in the URB
+ /* Update the transfer length based on the TransferBufferLength value in the URB */
pdx->StagedDone += nGot;
dev_dbg(&pdx->interface->dev, "%s, done %d bytes of %d", __func__,
pdx->StagedDone, pdx->StagedLength);
- if ((pdx->StagedDone == pdx->StagedLength) || // If no more to do
- (bCancel)) // or this IRP was cancelled
- {
- TRANSAREA *pArea = &pdx->rTransDef[pdx->StagedId]; // Transfer area info
+ if ((pdx->StagedDone == pdx->StagedLength) || /* If no more to do */
+ (bCancel)) { /* or this IRP was cancelled */
+ TRANSAREA *pArea = &pdx->rTransDef[pdx->StagedId]; /* Transfer area info */
dev_dbg(&pdx->interface->dev,
"%s transfer done, bytes %d, cancel %d", __func__,
pdx->StagedDone, bCancel);
- // Here is where we sort out what to do with this transfer if using a circular buffer. We have
- // a completed transfer that can be assumed to fit into the transfer area. We should be able to
- // add this to the end of a growing block or to use it to start a new block unless the code
- // that calculates the offset to use (in ReadWriteMem) is totally duff.
- if ((pArea->bCircular) && (pArea->bCircToHost) && (!bCancel) && // Time to sort out circular buffer info?
- (pdx->StagedRead)) // Only for tohost transfers for now
- {
- if (pArea->aBlocks[1].dwSize > 0) // If block 1 is in use we must append to it
- {
+ /* Here is where we sort out what to do with this transfer if using a circular buffer. We have */
+ /* a completed transfer that can be assumed to fit into the transfer area. We should be able to */
+ /* add this to the end of a growing block or to use it to start a new block unless the code */
+ /* that calculates the offset to use (in ReadWriteMem) is totally duff. */
+ if ((pArea->bCircular) && (pArea->bCircToHost) && (!bCancel) && /* Time to sort out circular buffer info? */
+ (pdx->StagedRead)) { /* Only for tohost transfers for now */
+ if (pArea->aBlocks[1].dwSize > 0) { /* If block 1 is in use we must append to it */
if (pdx->StagedOffset ==
(pArea->aBlocks[1].dwOffset +
pArea->aBlocks[1].dwSize)) {
@@ -569,7 +564,7 @@ static void staged_callback(struct urb *pUrb)
pArea->aBlocks[1].dwSize,
pArea->aBlocks[1].dwOffset);
} else {
- // Here things have gone very, very, wrong, but I cannot see how this can actually be achieved
+ /* Here things have gone very, very, wrong, but I cannot see how this can actually be achieved */
pArea->aBlocks[1].dwOffset =
pdx->StagedOffset;
pArea->aBlocks[1].dwSize =
@@ -580,22 +575,20 @@ static void staged_callback(struct urb *pUrb)
pArea->aBlocks[1].dwSize,
pArea->aBlocks[1].dwOffset);
}
- } else // If block 1 is not used, we try to add to block 0
- {
- if (pArea->aBlocks[0].dwSize > 0) // Got stored block 0 information?
- { // Must append onto the existing block 0
+ } else { /* If block 1 is not used, we try to add to block 0 */
+ if (pArea->aBlocks[0].dwSize > 0) { /* Got stored block 0 information? */
+ /* Must append onto the existing block 0 */
if (pdx->StagedOffset ==
(pArea->aBlocks[0].dwOffset +
pArea->aBlocks[0].dwSize)) {
- pArea->aBlocks[0].dwSize += pdx->StagedLength; // Just add this transfer in
+ pArea->aBlocks[0].dwSize += pdx->StagedLength; /* Just add this transfer in */
dev_dbg(&pdx->interface->dev,
"RWM_Complete, circ block 0 now %d bytes at %d",
pArea->aBlocks[0].
dwSize,
pArea->aBlocks[0].
dwOffset);
- } else // If it doesn't append, put into new block 1
- {
+ } else { /* If it doesn't append, put into new block 1 */
pArea->aBlocks[1].dwOffset =
pdx->StagedOffset;
pArea->aBlocks[1].dwSize =
@@ -607,8 +600,7 @@ static void staged_callback(struct urb *pUrb)
pArea->aBlocks[1].
dwOffset);
}
- } else // No info stored yet, just save in block 0
- {
+ } else { /* No info stored yet, just save in block 0 */
pArea->aBlocks[0].dwOffset =
pdx->StagedOffset;
pArea->aBlocks[0].dwSize =
@@ -621,21 +613,19 @@ static void staged_callback(struct urb *pUrb)
}
}
- if (!bCancel) // Don't generate an event if cancelled
- {
+ if (!bCancel) { /* Don't generate an event if cancelled */
dev_dbg(&pdx->interface->dev,
"RWM_Complete, bCircular %d, bToHost %d, eStart %d, eSize %d",
pArea->bCircular, pArea->bEventToHost,
pArea->dwEventSt, pArea->dwEventSz);
- if ((pArea->dwEventSz) && // Set a user-mode event...
- (pdx->StagedRead == pArea->bEventToHost)) // ...on transfers in this direction?
- {
- int iWakeUp = 0; // assume
- // If we have completed the right sort of DMA transfer then set the event to notify
- // the user code to wake up anyone that is waiting.
- if ((pArea->bCircular) && // Circular areas use a simpler test
- (pArea->bCircToHost)) // only in supported direction
- { // Is total data waiting up to size limit?
+ if ((pArea->dwEventSz) && /* Set a user-mode event... */
+ (pdx->StagedRead == pArea->bEventToHost)) { /* ...on transfers in this direction? */
+ int iWakeUp = 0; /* assume */
+ /* If we have completed the right sort of DMA transfer then set the event to notify */
+ /* the user code to wake up anyone that is waiting. */
+ if ((pArea->bCircular) && /* Circular areas use a simpler test */
+ (pArea->bCircToHost)) { /* only in supported direction */
+ /* Is total data waiting up to size limit? */
unsigned int dwTotal =
pArea->aBlocks[0].dwSize +
pArea->aBlocks[1].dwSize;
@@ -653,19 +643,17 @@ static void staged_callback(struct urb *pUrb)
if (iWakeUp) {
dev_dbg(&pdx->interface->dev,
"About to set event to notify app");
- wake_up_interruptible(&pArea->wqEvent); // wake up waiting processes
- ++pArea->iWakeUp; // increment wakeup count
+ wake_up_interruptible(&pArea->wqEvent); /* wake up waiting processes */
+ ++pArea->iWakeUp; /* increment wakeup count */
}
}
}
- pdx->dwDMAFlag = MODE_CHAR; // Switch back to char mode before ReadWriteMem call
+ pdx->dwDMAFlag = MODE_CHAR; /* Switch back to char mode before ReadWriteMem call */
- if (!bCancel) // Don't look for waiting transfer if cancelled
- {
- // If we have a transfer waiting, kick it off
- if (pdx->bXFerWaiting) // Got a block xfer waiting?
- {
+ if (!bCancel) { /* Don't look for waiting transfer if cancelled */
+ /* If we have a transfer waiting, kick it off */
+ if (pdx->bXFerWaiting) { /* Got a block xfer waiting? */
int iReturn;
dev_info(&pdx->interface->dev,
"*** RWM_Complete *** pending transfer will now be set up!!!");
@@ -682,22 +670,22 @@ static void staged_callback(struct urb *pUrb)
}
}
- } else // Here for more to do
- StageChunk(pdx); // fire off the next bit
+ } else /* Here for more to do */
+ StageChunk(pdx); /* fire off the next bit */
- // While we hold the stagedLock, see if we should reallow character input ints
- // Don't allow if cancelled, or if a new block has started or if there is a waiting block.
- // This feels wrong as we should ask which spin lock protects dwDMAFlag.
+ /* While we hold the stagedLock, see if we should reallow character input ints */
+ /* Don't allow if cancelled, or if a new block has started or if there is a waiting block. */
+ /* This feels wrong as we should ask which spin lock protects dwDMAFlag. */
bRestartCharInput = !bCancel && (pdx->dwDMAFlag == MODE_CHAR)
&& !pdx->bXFerWaiting;
- spin_unlock(&pdx->stagedLock); // Finally release the lock again
+ spin_unlock(&pdx->stagedLock); /* Finally release the lock again */
- // This is not correct as dwDMAFlag is protected by the staged lock, but it is treated
- // in Allowi as if it were protected by the char lock. In any case, most systems will
- // not be upset by char input during DMA... sigh. Needs sorting out.
- if (bRestartCharInput) // may be out of date, but...
- Allowi(pdx); // ...Allowi tests a lock too.
+ /* This is not correct as dwDMAFlag is protected by the staged lock, but it is treated */
+ /* in Allowi as if it were protected by the char lock. In any case, most systems will */
+ /* not be upset by char input during DMA... sigh. Needs sorting out. */
+ if (bRestartCharInput) /* may be out of date, but... */
+ Allowi(pdx); /* ...Allowi tests a lock too. */
dev_dbg(&pdx->interface->dev, "%s done", __func__);
}
@@ -709,29 +697,28 @@ static void staged_callback(struct urb *pUrb)
** The calling code must have acquired the staging spinlock before calling
** this function, and is responsible for releasing it. We are at callback level.
****************************************************************************/
-static int StageChunk(DEVICE_EXTENSION * pdx)
+static int StageChunk(DEVICE_EXTENSION *pdx)
{
int iReturn = U14ERR_NOERROR;
unsigned int ChunkSize;
- int nPipe = pdx->StagedRead ? 3 : 2; // The pipe number to use for reads or writes
+ int nPipe = pdx->StagedRead ? 3 : 2; /* The pipe number to use for reads or writes */
if (pdx->nPipes == 3)
- nPipe--; // Adjust for the 3-pipe case
- if (nPipe < 0) // and trap case that should never happen
+ nPipe--; /* Adjust for the 3-pipe case */
+ if (nPipe < 0) /* and trap case that should never happen */
return U14ERR_FAIL;
- if (!CanAcceptIoRequests(pdx)) // got sudden remove?
- {
+ if (!CanAcceptIoRequests(pdx)) { /* got sudden remove? */
dev_info(&pdx->interface->dev, "%s sudden remove, giving up",
__func__);
- return U14ERR_FAIL; // could do with a better error
+ return U14ERR_FAIL; /* could do with a better error */
}
- ChunkSize = (pdx->StagedLength - pdx->StagedDone); // transfer length remaining
- if (ChunkSize > STAGED_SZ) // make sure to keep legal
- ChunkSize = STAGED_SZ; // limit to max allowed
+ ChunkSize = (pdx->StagedLength - pdx->StagedDone); /* transfer length remaining */
+ if (ChunkSize > STAGED_SZ) /* make sure to keep legal */
+ ChunkSize = STAGED_SZ; /* limit to max allowed */
- if (!pdx->StagedRead) // if writing...
- CopyUserSpace(pdx, ChunkSize); // ...copy data into the buffer
+ if (!pdx->StagedRead) /* if writing... */
+ CopyUserSpace(pdx, ChunkSize); /* ...copy data into the buffer */
usb_fill_bulk_urb(pdx->pStagedUrb, pdx->udev,
pdx->StagedRead ? usb_rcvbulkpipe(pdx->udev,
@@ -740,15 +727,15 @@ static int StageChunk(DEVICE_EXTENSION * pdx)
usb_sndbulkpipe(pdx->udev, pdx->epAddr[nPipe]),
pdx->pCoherStagedIO, ChunkSize, staged_callback, pdx);
pdx->pStagedUrb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
- usb_anchor_urb(pdx->pStagedUrb, &pdx->submitted); // in case we need to kill it
+ usb_anchor_urb(pdx->pStagedUrb, &pdx->submitted); /* in case we need to kill it */
iReturn = usb_submit_urb(pdx->pStagedUrb, GFP_ATOMIC);
if (iReturn) {
- usb_unanchor_urb(pdx->pStagedUrb); // kill it
- pdx->bPipeError[nPipe] = 1; // Flag an error to be handled later
+ usb_unanchor_urb(pdx->pStagedUrb); /* kill it */
+ pdx->bPipeError[nPipe] = 1; /* Flag an error to be handled later */
dev_err(&pdx->interface->dev, "%s submit urb failed, code %d",
__func__, iReturn);
} else
- pdx->bStagedUrbPending = true; // Set the flag for staged URB pending
+ pdx->bStagedUrbPending = true; /* Set the flag for staged URB pending */
dev_dbg(&pdx->interface->dev, "%s done so far:%d, this size:%d",
__func__, pdx->StagedDone, ChunkSize);
@@ -772,13 +759,12 @@ static int StageChunk(DEVICE_EXTENSION * pdx)
** transfer.
** dwLen - the number of bytes to transfer.
*/
-int ReadWriteMem(DEVICE_EXTENSION * pdx, bool Read, unsigned short wIdent,
+int ReadWriteMem(DEVICE_EXTENSION *pdx, bool Read, unsigned short wIdent,
unsigned int dwOffs, unsigned int dwLen)
{
- TRANSAREA *pArea = &pdx->rTransDef[wIdent]; // Transfer area info
+ TRANSAREA *pArea = &pdx->rTransDef[wIdent]; /* Transfer area info */
- if (!CanAcceptIoRequests(pdx)) // Are we in a state to accept new requests?
- {
+ if (!CanAcceptIoRequests(pdx)) { /* Are we in a state to accept new requests? */
dev_err(&pdx->interface->dev, "%s can't accept requests",
__func__);
return U14ERR_FAIL;
@@ -788,56 +774,51 @@ int ReadWriteMem(DEVICE_EXTENSION * pdx, bool Read, unsigned short wIdent,
"%s xfer %d bytes to %s, offset %d, area %d", __func__, dwLen,
Read ? "host" : "1401", dwOffs, wIdent);
- // Amazingly, we can get an escape sequence back before the current staged Urb is done, so we
- // have to check for this situation and, if so, wait until all is OK.
+ /* Amazingly, we can get an escape sequence back before the current staged Urb is done, so we */
+ /* have to check for this situation and, if so, wait until all is OK. */
if (pdx->bStagedUrbPending) {
- pdx->bXFerWaiting = true; // Flag we are waiting
+ pdx->bXFerWaiting = true; /* Flag we are waiting */
dev_info(&pdx->interface->dev,
"%s xfer is waiting, as previous staged pending",
__func__);
return U14ERR_NOERROR;
}
- if (dwLen == 0) // allow 0-len read or write; just return success
- {
+ if (dwLen == 0) { /* allow 0-len read or write; just return success */
dev_dbg(&pdx->interface->dev,
"%s OK; zero-len read/write request", __func__);
return U14ERR_NOERROR;
}
- if ((pArea->bCircular) && // Circular transfer?
- (pArea->bCircToHost) && (Read)) // In a supported direction
- { // If so, we sort out offset ourself
- bool bWait = false; // Flag for transfer having to wait
+ if ((pArea->bCircular) && /* Circular transfer? */
+ (pArea->bCircToHost) && (Read)) { /* In a supported direction */
+ /* If so, we sort out offset ourself */
+ bool bWait = false; /* Flag for transfer having to wait */
dev_dbg(&pdx->interface->dev,
"Circular buffers are %d at %d and %d at %d",
pArea->aBlocks[0].dwSize, pArea->aBlocks[0].dwOffset,
pArea->aBlocks[1].dwSize, pArea->aBlocks[1].dwOffset);
- if (pArea->aBlocks[1].dwSize > 0) // Using the second block already?
- {
- dwOffs = pArea->aBlocks[1].dwOffset + pArea->aBlocks[1].dwSize; // take offset from that
- bWait = (dwOffs + dwLen) > pArea->aBlocks[0].dwOffset; // Wait if will overwrite block 0?
- bWait |= (dwOffs + dwLen) > pArea->dwLength; // or if it overflows the buffer
- } else // Area 1 not in use, try to use area 0
- {
- if (pArea->aBlocks[0].dwSize == 0) // Reset block 0 if not in use
+ if (pArea->aBlocks[1].dwSize > 0) { /* Using the second block already? */
+ dwOffs = pArea->aBlocks[1].dwOffset + pArea->aBlocks[1].dwSize; /* take offset from that */
+ bWait = (dwOffs + dwLen) > pArea->aBlocks[0].dwOffset; /* Wait if will overwrite block 0? */
+ bWait |= (dwOffs + dwLen) > pArea->dwLength; /* or if it overflows the buffer */
+ } else { /* Area 1 not in use, try to use area 0 */
+ if (pArea->aBlocks[0].dwSize == 0) /* Reset block 0 if not in use */
pArea->aBlocks[0].dwOffset = 0;
dwOffs =
pArea->aBlocks[0].dwOffset +
pArea->aBlocks[0].dwSize;
- if ((dwOffs + dwLen) > pArea->dwLength) // Off the end of the buffer?
- {
- pArea->aBlocks[1].dwOffset = 0; // Set up to use second block
+ if ((dwOffs + dwLen) > pArea->dwLength) { /* Off the end of the buffer? */
+ pArea->aBlocks[1].dwOffset = 0; /* Set up to use second block */
dwOffs = 0;
- bWait = (dwOffs + dwLen) > pArea->aBlocks[0].dwOffset; // Wait if will overwrite block 0?
- bWait |= (dwOffs + dwLen) > pArea->dwLength; // or if it overflows the buffer
+ bWait = (dwOffs + dwLen) > pArea->aBlocks[0].dwOffset; /* Wait if will overwrite block 0? */
+ bWait |= (dwOffs + dwLen) > pArea->dwLength; /* or if it overflows the buffer */
}
}
- if (bWait) // This transfer will have to wait?
- {
- pdx->bXFerWaiting = true; // Flag we are waiting
+ if (bWait) { /* This transfer will have to wait? */
+ pdx->bXFerWaiting = true; /* Flag we are waiting */
dev_dbg(&pdx->interface->dev,
"%s xfer waiting for circular buffer space",
__func__);
@@ -848,17 +829,17 @@ int ReadWriteMem(DEVICE_EXTENSION * pdx, bool Read, unsigned short wIdent,
"%s circular xfer, %d bytes starting at %d", __func__,
dwLen, dwOffs);
}
- // Save the parameters for the read\write transfer
- pdx->StagedRead = Read; // Save the parameters for this read
- pdx->StagedId = wIdent; // ID allows us to get transfer area info
- pdx->StagedOffset = dwOffs; // The area within the transfer area
+ /* Save the parameters for the read\write transfer */
+ pdx->StagedRead = Read; /* Save the parameters for this read */
+ pdx->StagedId = wIdent; /* ID allows us to get transfer area info */
+ pdx->StagedOffset = dwOffs; /* The area within the transfer area */
pdx->StagedLength = dwLen;
- pdx->StagedDone = 0; // Initialise the byte count
- pdx->dwDMAFlag = MODE_LINEAR; // Set DMA mode flag at this point
- pdx->bXFerWaiting = false; // Clearly not a transfer waiting now
+ pdx->StagedDone = 0; /* Initialise the byte count */
+ pdx->dwDMAFlag = MODE_LINEAR; /* Set DMA mode flag at this point */
+ pdx->bXFerWaiting = false; /* Clearly not a transfer waiting now */
-// KeClearEvent(&pdx->StagingDoneEvent); // Clear the transfer done event
- StageChunk(pdx); // fire off the first chunk
+/* KeClearEvent(&pdx->StagingDoneEvent); // Clear the transfer done event */
+ StageChunk(pdx); /* fire off the first chunk */
return U14ERR_NOERROR;
}
@@ -877,12 +858,11 @@ static bool ReadChar(unsigned char *pChar, char *pBuf, unsigned int *pdDone,
bool bRead = false;
unsigned int dDone = *pdDone;
- if (dDone < dGot) // If there is more data
- {
- *pChar = (unsigned char)pBuf[dDone]; // Extract the next char
- dDone++; // Increment the done count
+ if (dDone < dGot) { /* If there is more data */
+ *pChar = (unsigned char)pBuf[dDone]; /* Extract the next char */
+ dDone++; /* Increment the done count */
*pdDone = dDone;
- bRead = true; // and flag success
+ bRead = true; /* and flag success */
}
return bRead;
@@ -962,32 +942,32 @@ static bool ReadHuff(volatile unsigned int *pDWord, char *pBuf,
** we start handling the data at offset zero.
**
*****************************************************************************/
-static bool ReadDMAInfo(volatile DMADESC * pDmaDesc, DEVICE_EXTENSION * pdx,
+static bool ReadDMAInfo(volatile DMADESC *pDmaDesc, DEVICE_EXTENSION *pdx,
char *pBuf, unsigned int dwCount)
{
- bool bResult = false; // assume we won't succeed
+ bool bResult = false; /* assume we won't succeed */
unsigned char ucData;
- unsigned int dDone = 0; // We haven't parsed anything so far
+ unsigned int dDone = 0; /* We haven't parsed anything so far */
dev_dbg(&pdx->interface->dev, "%s", __func__);
if (ReadChar(&ucData, pBuf, &dDone, dwCount)) {
- unsigned char ucTransCode = (ucData & 0x0F); // get code for transfer type
- unsigned short wIdent = ((ucData >> 4) & 0x07); // and area identifier
+ unsigned char ucTransCode = (ucData & 0x0F); /* get code for transfer type */
+ unsigned short wIdent = ((ucData >> 4) & 0x07); /* and area identifier */
- // fill in the structure we were given
- pDmaDesc->wTransType = ucTransCode; // type of transfer
- pDmaDesc->wIdent = wIdent; // area to use
- pDmaDesc->dwSize = 0; // initialise other bits
+ /* fill in the structure we were given */
+ pDmaDesc->wTransType = ucTransCode; /* type of transfer */
+ pDmaDesc->wIdent = wIdent; /* area to use */
+ pDmaDesc->dwSize = 0; /* initialise other bits */
pDmaDesc->dwOffset = 0;
dev_dbg(&pdx->interface->dev, "%s type: %d ident: %d", __func__,
pDmaDesc->wTransType, pDmaDesc->wIdent);
- pDmaDesc->bOutWard = (ucTransCode != TM_EXTTOHOST); // set transfer direction
+ pDmaDesc->bOutWard = (ucTransCode != TM_EXTTOHOST); /* set transfer direction */
switch (ucTransCode) {
- case TM_EXTTOHOST: // Extended linear transfer modes (the only ones!)
+ case TM_EXTTOHOST: /* Extended linear transfer modes (the only ones!) */
case TM_EXTTO1401:
{
bResult =
@@ -1001,14 +981,14 @@ static bool ReadDMAInfo(volatile DMADESC * pDmaDesc, DEVICE_EXTENSION * pdx,
__func__, pDmaDesc->dwOffset,
pDmaDesc->dwSize);
- if ((wIdent >= MAX_TRANSAREAS) || // Illegal area number, or...
- (!pdx->rTransDef[wIdent].bUsed) || // area not set up, or...
- (pDmaDesc->dwOffset > pdx->rTransDef[wIdent].dwLength) || // range/size
+ if ((wIdent >= MAX_TRANSAREAS) || /* Illegal area number, or... */
+ (!pdx->rTransDef[wIdent].bUsed) || /* area not set up, or... */
+ (pDmaDesc->dwOffset > pdx->rTransDef[wIdent].dwLength) || /* range/size */
((pDmaDesc->dwOffset +
pDmaDesc->dwSize) >
(pdx->rTransDef[wIdent].
dwLength))) {
- bResult = false; // bad parameter(s)
+ bResult = false; /* bad parameter(s) */
dev_dbg(&pdx->interface->dev,
"%s bad param - id %d, bUsed %d, offset %d, size %d, area length %d",
__func__, wIdent,
@@ -1028,7 +1008,7 @@ static bool ReadDMAInfo(volatile DMADESC * pDmaDesc, DEVICE_EXTENSION * pdx,
} else
bResult = false;
- if (!bResult) // now check parameters for validity
+ if (!bResult) /* now check parameters for validity */
dev_err(&pdx->interface->dev, "%s error reading Esc sequence",
__func__);
@@ -1049,30 +1029,29 @@ static bool ReadDMAInfo(volatile DMADESC * pDmaDesc, DEVICE_EXTENSION * pdx,
** this is known to be at least 2 or we will not be called.
**
****************************************************************************/
-static int Handle1401Esc(DEVICE_EXTENSION * pdx, char *pCh,
+static int Handle1401Esc(DEVICE_EXTENSION *pdx, char *pCh,
unsigned int dwCount)
{
int iReturn = U14ERR_FAIL;
- // I have no idea what this next test is about. '?' is 0x3f, which is area 3, code
- // 15. At the moment, this is not used, so it does no harm, but unless someone can
- // tell me what this is for, it should be removed from this and the Windows driver.
- if (pCh[0] == '?') // Is this an information response
- { // Parse and save the information
+ /* I have no idea what this next test is about. '?' is 0x3f, which is area 3, code */
+ /* 15. At the moment, this is not used, so it does no harm, but unless someone can */
+ /* tell me what this is for, it should be removed from this and the Windows driver. */
+ if (pCh[0] == '?') { /* Is this an information response */
+ /* Parse and save the information */
} else {
- spin_lock(&pdx->stagedLock); // Lock others out
+ spin_lock(&pdx->stagedLock); /* Lock others out */
- if (ReadDMAInfo(&pdx->rDMAInfo, pdx, pCh, dwCount)) // Get DMA parameters
- {
- unsigned short wTransType = pdx->rDMAInfo.wTransType; // check transfer type
+ if (ReadDMAInfo(&pdx->rDMAInfo, pdx, pCh, dwCount)) { /* Get DMA parameters */
+ unsigned short wTransType = pdx->rDMAInfo.wTransType; /* check transfer type */
dev_dbg(&pdx->interface->dev,
"%s xfer to %s, offset %d, length %d", __func__,
pdx->rDMAInfo.bOutWard ? "1401" : "host",
pdx->rDMAInfo.dwOffset, pdx->rDMAInfo.dwSize);
- if (pdx->bXFerWaiting) // Check here for badly out of kilter...
- { // This can never happen, really
+ if (pdx->bXFerWaiting) { /* Check here for badly out of kilter... */
+ /* This can never happen, really */
dev_err(&pdx->interface->dev,
"ERROR: DMA setup while transfer still waiting");
spin_unlock(&pdx->stagedLock);
@@ -1090,16 +1069,16 @@ static int Handle1401Esc(DEVICE_EXTENSION * pdx, char *pCh,
dev_err(&pdx->interface->dev,
"%s ReadWriteMem() failed %d",
__func__, iReturn);
- } else // This covers non-linear transfer setup
+ } else /* This covers non-linear transfer setup */
dev_err(&pdx->interface->dev,
"%s Unknown block xfer type %d",
__func__, wTransType);
}
- } else // Failed to read parameters
+ } else /* Failed to read parameters */
dev_err(&pdx->interface->dev, "%s ReadDMAInfo() fail",
__func__);
- spin_unlock(&pdx->stagedLock); // OK here
+ spin_unlock(&pdx->stagedLock); /* OK here */
}
dev_dbg(&pdx->interface->dev, "%s returns %d", __func__, iReturn);
@@ -1113,12 +1092,11 @@ static int Handle1401Esc(DEVICE_EXTENSION * pdx, char *pCh,
static void ced_readchar_callback(struct urb *pUrb)
{
DEVICE_EXTENSION *pdx = pUrb->context;
- int nGot = pUrb->actual_length; // what we transferred
+ int nGot = pUrb->actual_length; /* what we transferred */
- if (pUrb->status) // Do we have a problem to handle?
- {
- int nPipe = pdx->nPipes == 4 ? 1 : 0; // The pipe number to use for error
- // sync/async unlink faults aren't errors... just saying device removed or stopped
+ if (pUrb->status) { /* Do we have a problem to handle? */
+ int nPipe = pdx->nPipes == 4 ? 1 : 0; /* The pipe number to use for error */
+ /* sync/async unlink faults aren't errors... just saying device removed or stopped */
if (!
(pUrb->status == -ENOENT || pUrb->status == -ECONNRESET
|| pUrb->status == -ESHUTDOWN)) {
@@ -1133,27 +1111,26 @@ static void ced_readchar_callback(struct urb *pUrb)
spin_lock(&pdx->err_lock);
pdx->errors = pUrb->status;
spin_unlock(&pdx->err_lock);
- nGot = 0; // and tidy up again if so
+ nGot = 0; /* and tidy up again if so */
- spin_lock(&pdx->charInLock); // already at irq level
- pdx->bPipeError[nPipe] = 1; // Flag an error for later
+ spin_lock(&pdx->charInLock); /* already at irq level */
+ pdx->bPipeError[nPipe] = 1; /* Flag an error for later */
} else {
- if ((nGot > 1) && ((pdx->pCoherCharIn[0] & 0x7f) == 0x1b)) // Esc sequence?
- {
- Handle1401Esc(pdx, &pdx->pCoherCharIn[1], nGot - 1); // handle it
- spin_lock(&pdx->charInLock); // already at irq level
+ if ((nGot > 1) && ((pdx->pCoherCharIn[0] & 0x7f) == 0x1b)) { /* Esc sequence? */
+ Handle1401Esc(pdx, &pdx->pCoherCharIn[1], nGot - 1); /* handle it */
+ spin_lock(&pdx->charInLock); /* already at irq level */
} else {
- spin_lock(&pdx->charInLock); // already at irq level
+ spin_lock(&pdx->charInLock); /* already at irq level */
if (nGot > 0) {
unsigned int i;
if (nGot < INBUF_SZ) {
- pdx->pCoherCharIn[nGot] = 0; // tidy the string
+ pdx->pCoherCharIn[nGot] = 0; /* tidy the string */
dev_dbg(&pdx->interface->dev,
"%s got %d chars >%s<",
__func__, nGot,
pdx->pCoherCharIn);
}
- // We know that whatever we read must fit in the input buffer
+ /* We know that whatever we read must fit in the input buffer */
for (i = 0; i < nGot; i++) {
pdx->inputBuffer[pdx->dwInBuffPut++] =
pdx->pCoherCharIn[i] & 0x7F;
@@ -1162,17 +1139,17 @@ static void ced_readchar_callback(struct urb *pUrb)
}
if ((pdx->dwNumInput + nGot) <= INBUF_SZ)
- pdx->dwNumInput += nGot; // Adjust the buffer count accordingly
+ pdx->dwNumInput += nGot; /* Adjust the buffer count accordingly */
} else
dev_dbg(&pdx->interface->dev, "%s read ZLP",
__func__);
}
}
- pdx->bReadCharsPending = false; // No longer have a pending read
- spin_unlock(&pdx->charInLock); // already at irq level
+ pdx->bReadCharsPending = false; /* No longer have a pending read */
+ spin_unlock(&pdx->charInLock); /* already at irq level */
- Allowi(pdx); // see if we can do the next one
+ Allowi(pdx); /* see if we can do the next one */
}
/****************************************************************************
@@ -1182,25 +1159,25 @@ static void ced_readchar_callback(struct urb *pUrb)
** we can pick up any inward transfers. This can be called in multiple contexts
** so we use the irqsave version of the spinlock.
****************************************************************************/
-int Allowi(DEVICE_EXTENSION * pdx)
+int Allowi(DEVICE_EXTENSION *pdx)
{
int iReturn = U14ERR_NOERROR;
unsigned long flags;
- spin_lock_irqsave(&pdx->charInLock, flags); // can be called in multiple contexts
-
- // We don't want char input running while DMA is in progress as we know that this
- // can cause sequencing problems for the 2270. So don't. It will also allow the
- // ERR response to get back to the host code too early on some PCs, even if there
- // is no actual driver failure, so we don't allow this at all.
- if (!pdx->bInDrawDown && // stop input if
- !pdx->bReadCharsPending && // If no read request outstanding
- (pdx->dwNumInput < (INBUF_SZ / 2)) && // and there is some space
- (pdx->dwDMAFlag == MODE_CHAR) && // not doing any DMA
- (!pdx->bXFerWaiting) && // no xfer waiting to start
- (CanAcceptIoRequests(pdx))) // and activity is generally OK
- { // then off we go
- unsigned int nMax = INBUF_SZ - pdx->dwNumInput; // max we could read
- int nPipe = pdx->nPipes == 4 ? 1 : 0; // The pipe number to use
+ spin_lock_irqsave(&pdx->charInLock, flags); /* can be called in multiple contexts */
+
+ /* We don't want char input running while DMA is in progress as we know that this */
+ /* can cause sequencing problems for the 2270. So don't. It will also allow the */
+ /* ERR response to get back to the host code too early on some PCs, even if there */
+ /* is no actual driver failure, so we don't allow this at all. */
+ if (!pdx->bInDrawDown && /* stop input if */
+ !pdx->bReadCharsPending && /* If no read request outstanding */
+ (pdx->dwNumInput < (INBUF_SZ / 2)) && /* and there is some space */
+ (pdx->dwDMAFlag == MODE_CHAR) && /* not doing any DMA */
+ (!pdx->bXFerWaiting) && /* no xfer waiting to start */
+ (CanAcceptIoRequests(pdx))) { /* and activity is generally OK */
+ /* then off we go */
+ unsigned int nMax = INBUF_SZ - pdx->dwNumInput; /* max we could read */
+ int nPipe = pdx->nPipes == 4 ? 1 : 0; /* The pipe number to use */
dev_dbg(&pdx->interface->dev, "%s %d chars in input buffer",
__func__, pdx->dwNumInput);
@@ -1209,16 +1186,16 @@ int Allowi(DEVICE_EXTENSION * pdx)
usb_rcvintpipe(pdx->udev, pdx->epAddr[nPipe]),
pdx->pCoherCharIn, nMax, ced_readchar_callback,
pdx, pdx->bInterval);
- pdx->pUrbCharIn->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; // short xfers are OK by default
- usb_anchor_urb(pdx->pUrbCharIn, &pdx->submitted); // in case we need to kill it
+ pdx->pUrbCharIn->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; /* short xfers are OK by default */
+ usb_anchor_urb(pdx->pUrbCharIn, &pdx->submitted); /* in case we need to kill it */
iReturn = usb_submit_urb(pdx->pUrbCharIn, GFP_ATOMIC);
if (iReturn) {
- usb_unanchor_urb(pdx->pUrbCharIn); // remove from list of active Urbs
- pdx->bPipeError[nPipe] = 1; // Flag an error to be handled later
+ usb_unanchor_urb(pdx->pUrbCharIn); /* remove from list of active Urbs */
+ pdx->bPipeError[nPipe] = 1; /* Flag an error to be handled later */
dev_err(&pdx->interface->dev,
"%s submit urb failed: %d", __func__, iReturn);
} else
- pdx->bReadCharsPending = true; // Flag that we are active here
+ pdx->bReadCharsPending = true; /* Flag that we are active here */
}
spin_unlock_irqrestore(&pdx->charInLock, flags);
@@ -1238,15 +1215,15 @@ static long ced_ioctl(struct file *file, unsigned int cmd, unsigned long ulArg)
{
int err = 0;
DEVICE_EXTENSION *pdx = file->private_data;
- if (!CanAcceptIoRequests(pdx)) // check we still exist
+ if (!CanAcceptIoRequests(pdx)) /* check we still exist */
return -ENODEV;
- // Check that access is allowed, where is is needed. Anything that would have an indeterminate
- // size will be checked by the specific command.
- if (_IOC_DIR(cmd) & _IOC_READ) // read from point of view of user...
- err = !access_ok(VERIFY_WRITE, (void __user *)ulArg, _IOC_SIZE(cmd)); // is kernel write
- else if (_IOC_DIR(cmd) & _IOC_WRITE) // and write from point of view of user...
- err = !access_ok(VERIFY_READ, (void __user *)ulArg, _IOC_SIZE(cmd)); // is kernel read
+ /* Check that access is allowed, where is is needed. Anything that would have an indeterminate */
+ /* size will be checked by the specific command. */
+ if (_IOC_DIR(cmd) & _IOC_READ) /* read from point of view of user... */
+ err = !access_ok(VERIFY_WRITE, (void __user *)ulArg, _IOC_SIZE(cmd)); /* is kernel write */
+ else if (_IOC_DIR(cmd) & _IOC_WRITE) /* and write from point of view of user... */
+ err = !access_ok(VERIFY_READ, (void __user *)ulArg, _IOC_SIZE(cmd)); /* is kernel read */
if (err)
return -EFAULT;
@@ -1289,7 +1266,7 @@ static long ced_ioctl(struct file *file, unsigned int cmd, unsigned long ulArg)
return -1;
case _IOC_NR(IOCTL_CED_GETDRIVERREVISION):
- return (2 << 24) | (DRIVERMAJREV << 16) | DRIVERMINREV; // USB | MAJOR | MINOR
+ return (2 << 24) | (DRIVERMAJREV << 16) | DRIVERMINREV; /* USB | MAJOR | MINOR */
case _IOC_NR(IOCTL_CED_GETTRANSFER):
return GetTransfer(pdx, (TGET_TX_BLOCK __user *) ulArg);
@@ -1335,7 +1312,7 @@ static long ced_ioctl(struct file *file, unsigned int cmd, unsigned long ulArg)
return DbgStopLoop(pdx);
case _IOC_NR(IOCTL_CED_FULLRESET):
- pdx->bForceReset = true; // Set a flag for a full reset
+ pdx->bForceReset = true; /* Set a flag for a full reset */
break;
case _IOC_NR(IOCTL_CED_SETCIRCULAR):
@@ -1378,8 +1355,8 @@ static struct usb_class_driver ced_class = {
.minor_base = USB_CED_MINOR_BASE,
};
-// Check that the device that matches a 1401 vendor and product ID is OK to use and
-// initialise our DEVICE_EXTENSION.
+/* Check that the device that matches a 1401 vendor and product ID is OK to use and */
+/* initialise our DEVICE_EXTENSION. */
static int ced_probe(struct usb_interface *interface,
const struct usb_device_id *id)
{
@@ -1389,23 +1366,22 @@ static int ced_probe(struct usb_interface *interface,
int i, bcdDevice;
int retval = -ENOMEM;
- // allocate memory for our device extension and initialize it
+ /* allocate memory for our device extension and initialize it */
pdx = kzalloc(sizeof(*pdx), GFP_KERNEL);
if (!pdx)
goto error;
- for (i = 0; i < MAX_TRANSAREAS; ++i) // Initialise the wait queues
- {
+ for (i = 0; i < MAX_TRANSAREAS; ++i) { /* Initialise the wait queues */
init_waitqueue_head(&pdx->rTransDef[i].wqEvent);
}
- // Put initialises for our stuff here. Note that all of *pdx is zero, so
- // no need to explicitly zero it.
+ /* Put initialises for our stuff here. Note that all of *pdx is zero, so */
+ /* no need to explicitly zero it. */
spin_lock_init(&pdx->charOutLock);
spin_lock_init(&pdx->charInLock);
spin_lock_init(&pdx->stagedLock);
- // Initialises from the skeleton stuff
+ /* Initialises from the skeleton stuff */
kref_init(&pdx->kref);
mutex_init(&pdx->io_mutex);
spin_lock_init(&pdx->err_lock);
@@ -1414,7 +1390,7 @@ static int ced_probe(struct usb_interface *interface,
pdx->udev = usb_get_dev(interface_to_usbdev(interface));
pdx->interface = interface;
- // Attempt to identify the device
+ /* Attempt to identify the device */
bcdDevice = pdx->udev->descriptor.bcdDevice;
i = (bcdDevice >> 8);
if (i == 0)
@@ -1426,8 +1402,8 @@ static int ced_probe(struct usb_interface *interface,
__func__, bcdDevice);
goto error;
}
- // set up the endpoint information. We only care about the number of EP as
- // we know that we are dealing with a 1401 device.
+ /* set up the endpoint information. We only care about the number of EP as */
+ /* we know that we are dealing with a 1401 device. */
iface_desc = interface->cur_altsetting;
pdx->nPipes = iface_desc->desc.bNumEndpoints;
dev_info(&interface->dev, "1401Type=%d with %d End Points",
@@ -1435,10 +1411,10 @@ static int ced_probe(struct usb_interface *interface,
if ((pdx->nPipes < 3) || (pdx->nPipes > 4))
goto error;
- // Allocate the URBs we hold for performing transfers
- pdx->pUrbCharOut = usb_alloc_urb(0, GFP_KERNEL); // character output URB
- pdx->pUrbCharIn = usb_alloc_urb(0, GFP_KERNEL); // character input URB
- pdx->pStagedUrb = usb_alloc_urb(0, GFP_KERNEL); // block transfer URB
+ /* Allocate the URBs we hold for performing transfers */
+ pdx->pUrbCharOut = usb_alloc_urb(0, GFP_KERNEL); /* character output URB */
+ pdx->pUrbCharIn = usb_alloc_urb(0, GFP_KERNEL); /* character input URB */
+ pdx->pStagedUrb = usb_alloc_urb(0, GFP_KERNEL); /* block transfer URB */
if (!pdx->pUrbCharOut || !pdx->pUrbCharIn || !pdx->pStagedUrb) {
dev_err(&interface->dev, "%s URB alloc failed", __func__);
goto error;
@@ -1464,15 +1440,14 @@ static int ced_probe(struct usb_interface *interface,
pdx->epAddr[i] = endpoint->bEndpointAddress;
dev_info(&interface->dev, "Pipe %d, ep address %02x", i,
pdx->epAddr[i]);
- if (((pdx->nPipes == 3) && (i == 0)) || // if char input end point
+ if (((pdx->nPipes == 3) && (i == 0)) || /* if char input end point */
((pdx->nPipes == 4) && (i == 1))) {
- pdx->bInterval = endpoint->bInterval; // save the endpoint interrupt interval
+ pdx->bInterval = endpoint->bInterval; /* save the endpoint interrupt interval */
dev_info(&interface->dev, "Pipe %d, bInterval = %d", i,
pdx->bInterval);
}
- // Detect USB2 by checking last ep size (64 if USB1)
- if (i == pdx->nPipes - 1) // if this is the last ep (bulk)
- {
+ /* Detect USB2 by checking last ep size (64 if USB1) */
+ if (i == pdx->nPipes - 1) { /* if this is the last ep (bulk) */
pdx->bIsUSB2 =
le16_to_cpu(endpoint->wMaxPacketSize) > 64;
dev_info(&pdx->interface->dev, "USB%d",
@@ -1501,7 +1476,7 @@ static int ced_probe(struct usb_interface *interface,
error:
if (pdx)
- kref_put(&pdx->kref, ced_delete); // frees allocated memory
+ kref_put(&pdx->kref, ced_delete); /* frees allocated memory */
return retval;
}
@@ -1511,39 +1486,39 @@ static void ced_disconnect(struct usb_interface *interface)
int minor = interface->minor;
int i;
- usb_set_intfdata(interface, NULL); // remove the pdx from the interface
- usb_deregister_dev(interface, &ced_class); // give back our minor device number
+ usb_set_intfdata(interface, NULL); /* remove the pdx from the interface */
+ usb_deregister_dev(interface, &ced_class); /* give back our minor device number */
- mutex_lock(&pdx->io_mutex); // stop more I/O starting while...
- ced_draw_down(pdx); // ...wait for then kill any io
+ mutex_lock(&pdx->io_mutex); /* stop more I/O starting while... */
+ ced_draw_down(pdx); /* ...wait for then kill any io */
for (i = 0; i < MAX_TRANSAREAS; ++i) {
- int iErr = ClearArea(pdx, i); // ...release any used memory
+ int iErr = ClearArea(pdx, i); /* ...release any used memory */
if (iErr == U14ERR_UNLOCKFAIL)
dev_err(&pdx->interface->dev, "%s Area %d was in used",
__func__, i);
}
- pdx->interface = NULL; // ...we kill off link to interface
+ pdx->interface = NULL; /* ...we kill off link to interface */
mutex_unlock(&pdx->io_mutex);
usb_kill_anchored_urbs(&pdx->submitted);
- kref_put(&pdx->kref, ced_delete); // decrement our usage count
+ kref_put(&pdx->kref, ced_delete); /* decrement our usage count */
dev_info(&interface->dev, "USB cedusb #%d now disconnected", minor);
}
-// Wait for all the urbs we know of to be done with, then kill off any that
-// are left. NBNB we will need to have a mechanism to stop circular xfers
-// from trying to fire off more urbs. We will wait up to 3 seconds for Urbs
-// to be done.
-void ced_draw_down(DEVICE_EXTENSION * pdx)
+/* Wait for all the urbs we know of to be done with, then kill off any that */
+/* are left. NBNB we will need to have a mechanism to stop circular xfers */
+/* from trying to fire off more urbs. We will wait up to 3 seconds for Urbs */
+/* to be done. */
+void ced_draw_down(DEVICE_EXTENSION *pdx)
{
int time;
dev_dbg(&pdx->interface->dev, "%s called", __func__);
pdx->bInDrawDown = true;
time = usb_wait_anchor_empty_timeout(&pdx->submitted, 3000);
- if (!time) { // if we timed out we kill the urbs
+ if (!time) { /* if we timed out we kill the urbs */
usb_kill_anchored_urbs(&pdx->submitted);
dev_err(&pdx->interface->dev, "%s timed out", __func__);
}
diff --git a/drivers/staging/ced1401/usb1401.h b/drivers/staging/ced1401/usb1401.h
index 8fc6958b6f08..f031e3a2c7cf 100644
--- a/drivers/staging/ced1401/usb1401.h
+++ b/drivers/staging/ced1401/usb1401.h
@@ -26,31 +26,32 @@
#define UINT unsigned int
#endif
-/// Device type codes, but these don't need to be extended - a succession is assumed
-/// These are set for usb from the bcdDevice field (suitably mangled). Future devices
-/// will be added in order of device creation to the list, so the names here are just
-/// to help use remember which device is which. The U14ERR_... values follow the same
-/// pattern for modern devices.
-#define TYPEUNKNOWN -1 // dont know
-#define TYPE1401 0 // standard 1401
-#define TYPEPLUS 1 // 1401 plus
-#define TYPEU1401 2 // u1401
-#define TYPEPOWER 3 // Power1401
-#define TYPEU14012 4 // u1401 mkII
-#define TYPEPOWER2 5 // Power1401 mk II
-#define TYPEMICRO3 6 // Micro1401-3
-#define TYPEPOWER3 7 // Power1401-3
-
-/// Some useful defines of constants. DONT FORGET to change the version in the
-/// resources whenever you change it here!.
-#define DRIVERMAJREV 2 // driver revision level major (match windows)
-#define DRIVERMINREV 0 // driver revision level minor
-
-/// Definitions of the various block transfer command codes
-#define TM_EXTTOHOST 8 // extended tohost
-#define TM_EXTTO1401 9 // extended to1401
-
-/// Definitions of values in usbReqtype. Used in sorting out setup actions
+/** Device type codes, but these don't need to be extended - a succession is assumed
+** These are set for usb from the bcdDevice field (suitably mangled). Future devices
+** will be added in order of device creation to the list, so the names here are just
+** to help use remember which device is which. The U14ERR_... values follow the same
+** pattern for modern devices.a
+**/
+#define TYPEUNKNOWN -1 /* dont know */
+#define TYPE1401 0 /* standard 1401 */
+#define TYPEPLUS 1 /* 1401 plus */
+#define TYPEU1401 2 /* u1401 */
+#define TYPEPOWER 3 /* Power1401 */
+#define TYPEU14012 4 /* u1401 mkII */
+#define TYPEPOWER2 5 /* Power1401 mk II */
+#define TYPEMICRO3 6 /* Micro1401-3 */
+#define TYPEPOWER3 7 /* Power1401-3 */
+
+/* Some useful defines of constants. DONT FORGET to change the version in the */
+/* resources whenever you change it here!. */
+#define DRIVERMAJREV 2 /* driver revision level major (match windows) */
+#define DRIVERMINREV 0 /* driver revision level minor */
+
+/* Definitions of the various block transfer command codes */
+#define TM_EXTTOHOST 8 /* extended tohost */
+#define TM_EXTTO1401 9 /* extended to1401 */
+
+/* Definitions of values in usbReqtype. Used in sorting out setup actions */
#define H_TO_D 0x00
#define D_TO_H 0x80
#define VENDOR 0x40
@@ -58,7 +59,7 @@
#define INTREQ 0x01
#define ENDREQ 0x02
-/// Definition of values in usbRequest, again used to sort out setup
+/* Definition of values in usbRequest, again used to sort out setup */
#define GET_STATUS 0x00
#define CLEAR_FEATURE 0x01
#define SET_FEATURE 0x03
@@ -71,8 +72,8 @@
#define SET_INTERFACE 0x0b
#define SYNCH_FRAME 0x0c
-/// Definitions of the various debug command codes understood by the 1401. These
-/// are used in various vendor-specific commands to achieve the desired effect
+/* Definitions of the various debug command codes understood by the 1401. These */
+/* are used in various vendor-specific commands to achieve the desired effect */
#define DB_GRAB 0x50 /* Grab is a NOP for USB */
#define DB_FREE 0x51 /* Free is a NOP for the USB */
#define DB_SETADD 0x52 /* Set debug address (double) */
@@ -91,139 +92,135 @@
#define CR_CHAR 0x0D /* The carriage return character */
#define CR_CHAR_80 0x8d /* and with bit 7 set */
-/// A structure holding information about a block of memory for use in circular transfers
-typedef struct circBlk
-{
- volatile UINT dwOffset; /* Offset within area of block start */
- volatile UINT dwSize; /* Size of the block, in bytes (0 = unused) */
+/* A structure holding information about a block of memory for use in circular transfers */
+typedef struct circBlk {
+ volatile UINT dwOffset; /* Offset within area of block start */
+ volatile UINT dwSize; /* Size of the block, in bytes (0 = unused) */
} CIRCBLK;
-/// A structure holding all of the information about a transfer area - an area of
-/// memory set up for use either as a source or destination in DMA transfers.
-typedef struct transarea
-{
- void* lpvBuff; // User address of xfer area saved for completeness
- UINT dwBaseOffset; // offset to start of xfer area in first page
- UINT dwLength; // Length of xfer area, in bytes
- struct page **pPages; // Points at array of locked down pages
- int nPages; // number of pages that are locked down
- bool bUsed; // Is this structure in use?
- bool bCircular; // Is this area for circular transfers?
- bool bCircToHost; // Flag for direction of circular transfer
- bool bEventToHost; // Set event on transfer to host?
- int iWakeUp; // Set 1 on event, cleared by TestEvent()
- UINT dwEventSt; // Defines section within xfer area for...
- UINT dwEventSz; // ...notification by the event SZ is 0 if unset
- CIRCBLK aBlocks[2]; // Info on a pair of circular blocks
- wait_queue_head_t wqEvent; // The wait queue for events in this area MUST BE LAST
+/* A structure holding all of the information about a transfer area - an area of */
+/* memory set up for use either as a source or destination in DMA transfers. */
+typedef struct transarea {
+ void *lpvBuff; /* User address of xfer area saved for completeness */
+ UINT dwBaseOffset; /* offset to start of xfer area in first page */
+ UINT dwLength; /* Length of xfer area, in bytes */
+ struct page **pPages; /* Points at array of locked down pages */
+ int nPages; /* number of pages that are locked down */
+ bool bUsed; /* Is this structure in use? */
+ bool bCircular; /* Is this area for circular transfers? */
+ bool bCircToHost; /* Flag for direction of circular transfer */
+ bool bEventToHost; /* Set event on transfer to host? */
+ int iWakeUp; /* Set 1 on event, cleared by TestEvent() */
+ UINT dwEventSt; /* Defines section within xfer area for... */
+ UINT dwEventSz; /* ...notification by the event SZ is 0 if unset */
+ CIRCBLK aBlocks[2]; /* Info on a pair of circular blocks */
+ wait_queue_head_t wqEvent; /* The wait queue for events in this area MUST BE LAST */
} TRANSAREA;
-/// The DMADESC structure is used to hold information on the transfer in progress. It
-/// is set up by ReadDMAInfo, using information sent by the 1401 in an escape sequence.
-typedef struct dmadesc
-{
- unsigned short wTransType; /* transfer type as TM_xxx above */
- unsigned short wIdent; /* identifier word */
- unsigned int dwSize; /* bytes to transfer */
- unsigned int dwOffset; /* offset into transfer area for trans */
- bool bOutWard; /* true when data is going TO 1401 */
+/* The DMADESC structure is used to hold information on the transfer in progress. It */
+/* is set up by ReadDMAInfo, using information sent by the 1401 in an escape sequence. */
+typedef struct dmadesc {
+ unsigned short wTransType; /* transfer type as TM_xxx above */
+ unsigned short wIdent; /* identifier word */
+ unsigned int dwSize; /* bytes to transfer */
+ unsigned int dwOffset; /* offset into transfer area for trans */
+ bool bOutWard; /* true when data is going TO 1401 */
} DMADESC;
#define INBUF_SZ 256 /* input buffer size */
#define OUTBUF_SZ 256 /* output buffer size */
-#define STAGED_SZ 0x10000 // size of coherent buffer for staged transfers
-
-/// Structure to hold all of our device specific stuff. We are making this as similar as we
-/// can to the Windows driver to help in our understanding of what is going on.
-typedef struct _DEVICE_EXTENSION
-{
- char inputBuffer[INBUF_SZ]; /* The two buffers */
- char outputBuffer[OUTBUF_SZ]; /* accessed by the host functions */
- volatile unsigned int dwNumInput; /* num of chars in input buffer */
- volatile unsigned int dwInBuffGet; /* where to get from input buffer */
- volatile unsigned int dwInBuffPut; /* where to put into input buffer */
- volatile unsigned int dwNumOutput; /* num of chars in output buffer */
- volatile unsigned int dwOutBuffGet; /* where to get from output buffer*/
- volatile unsigned int dwOutBuffPut; /* where to put into output buffer*/
-
- volatile bool bSendCharsPending; /* Flag to indicate sendchar active */
- volatile bool bReadCharsPending; /* Flag to indicate a read is primed */
- char* pCoherCharOut; /* special aligned buffer for chars to 1401 */
- struct urb* pUrbCharOut; /* urb used for chars to 1401 */
- char* pCoherCharIn; /* special aligned buffer for chars to host */
- struct urb* pUrbCharIn; /* urb used for chars to host */
-
- spinlock_t charOutLock; /* to protect the outputBuffer and outputting */
- spinlock_t charInLock; /* to protect the inputBuffer and char reads */
- __u8 bInterval; /* Interrupt end point interval */
-
- volatile unsigned int dwDMAFlag; /* state of DMA */
- TRANSAREA rTransDef[MAX_TRANSAREAS];/* transfer area info */
- volatile DMADESC rDMAInfo; // info on current DMA transfer
- volatile bool bXFerWaiting; // Flag set if DMA transfer stalled
- volatile bool bInDrawDown; // Flag that we want to halt transfers
-
- // Parameters relating to a block read\write that is in progress. Some of these values
- // are equivalent to values in rDMAInfo. The values here are those in use, while those
- // in rDMAInfo are those received from the 1401 via an escape sequence. If another
- // escape sequence arrives before the previous xfer ends, rDMAInfo values are updated while these
- // are used to finish off the current transfer.
- volatile short StagedId; // The transfer area id for this transfer
- volatile bool StagedRead; // Flag TRUE for read from 1401, FALSE for write
- volatile unsigned int StagedLength; // Total length of this transfer
- volatile unsigned int StagedOffset; // Offset within memory area for transfer start
- volatile unsigned int StagedDone; // Bytes transferred so far
- volatile bool bStagedUrbPending; // Flag to indicate active
- char* pCoherStagedIO; // buffer used for block transfers
- struct urb* pStagedUrb; // The URB to use
- spinlock_t stagedLock; // protects ReadWriteMem() and circular buffer stuff
-
- short s1401Type; // type of 1401 attached
- short sCurrentState; // current error state
- bool bIsUSB2; // type of the interface we connect to
- bool bForceReset; // Flag to make sure we get a real reset
- __u32 statBuf[2]; // buffer for 1401 state info
-
- unsigned long ulSelfTestTime; // used to timeout self test
-
- int nPipes; // Should be 3 or 4 depending on 1401 usb chip
- int bPipeError[4]; // set non-zero if an error on one of the pipe
- __u8 epAddr[4]; // addresses of the 3/4 end points
-
- struct usb_device *udev; // the usb device for this device
- struct usb_interface *interface; // the interface for this device, NULL if removed
- struct usb_anchor submitted; // in case we need to retract our submissions
- struct mutex io_mutex; // synchronize I/O with disconnect, one user-mode caller at a time
-
- int errors; // the last request tanked
- int open_count; // count the number of openers
- spinlock_t err_lock; // lock for errors
- struct kref kref;
-}DEVICE_EXTENSION, *PDEVICE_EXTENSION;
+#define STAGED_SZ 0x10000 /* size of coherent buffer for staged transfers */
+
+/* Structure to hold all of our device specific stuff. We are making this as similar as we */
+/* can to the Windows driver to help in our understanding of what is going on. */
+typedef struct _DEVICE_EXTENSION {
+ char inputBuffer[INBUF_SZ]; /* The two buffers */
+ char outputBuffer[OUTBUF_SZ]; /* accessed by the host functions */
+ volatile unsigned int dwNumInput; /* num of chars in input buffer */
+ volatile unsigned int dwInBuffGet; /* where to get from input buffer */
+ volatile unsigned int dwInBuffPut; /* where to put into input buffer */
+ volatile unsigned int dwNumOutput; /* num of chars in output buffer */
+ volatile unsigned int dwOutBuffGet; /* where to get from output buffer*/
+ volatile unsigned int dwOutBuffPut; /* where to put into output buffer*/
+
+ volatile bool bSendCharsPending; /* Flag to indicate sendchar active */
+ volatile bool bReadCharsPending; /* Flag to indicate a read is primed */
+ char *pCoherCharOut; /* special aligned buffer for chars to 1401 */
+ struct urb *pUrbCharOut; /* urb used for chars to 1401 */
+ char *pCoherCharIn; /* special aligned buffer for chars to host */
+ struct urb *pUrbCharIn; /* urb used for chars to host */
+
+ spinlock_t charOutLock; /* to protect the outputBuffer and outputting */
+ spinlock_t charInLock; /* to protect the inputBuffer and char reads */
+ __u8 bInterval; /* Interrupt end point interval */
+
+ volatile unsigned int dwDMAFlag; /* state of DMA */
+ TRANSAREA rTransDef[MAX_TRANSAREAS];/* transfer area info */
+ volatile DMADESC rDMAInfo; /* info on current DMA transfer */
+ volatile bool bXFerWaiting; /* Flag set if DMA transfer stalled */
+ volatile bool bInDrawDown; /* Flag that we want to halt transfers */
+
+ /* Parameters relating to a block read\write that is in progress. Some of these values */
+ /* are equivalent to values in rDMAInfo. The values here are those in use, while those */
+ /* in rDMAInfo are those received from the 1401 via an escape sequence. If another */
+ /* escape sequence arrives before the previous xfer ends, rDMAInfo values are updated while these */
+ /* are used to finish off the current transfer. */
+ volatile short StagedId; /* The transfer area id for this transfer */
+ volatile bool StagedRead; /* Flag TRUE for read from 1401, FALSE for write */
+ volatile unsigned int StagedLength; /* Total length of this transfer */
+ volatile unsigned int StagedOffset; /* Offset within memory area for transfer start */
+ volatile unsigned int StagedDone; /* Bytes transferred so far */
+ volatile bool bStagedUrbPending; /* Flag to indicate active */
+ char *pCoherStagedIO; /* buffer used for block transfers */
+ struct urb *pStagedUrb; /* The URB to use */
+ spinlock_t stagedLock; /* protects ReadWriteMem() and circular buffer stuff */
+
+ short s1401Type; /* type of 1401 attached */
+ short sCurrentState; /* current error state */
+ bool bIsUSB2; /* type of the interface we connect to */
+ bool bForceReset; /* Flag to make sure we get a real reset */
+ __u32 statBuf[2]; /* buffer for 1401 state info */
+
+ unsigned long ulSelfTestTime; /* used to timeout self test */
+
+ int nPipes; /* Should be 3 or 4 depending on 1401 usb chip */
+ int bPipeError[4]; /* set non-zero if an error on one of the pipe */
+ __u8 epAddr[4]; /* addresses of the 3/4 end points */
+
+ struct usb_device *udev; /* the usb device for this device */
+ struct usb_interface *interface; /* the interface for this device, NULL if removed */
+ struct usb_anchor submitted; /* in case we need to retract our submissions */
+ struct mutex io_mutex; /* synchronize I/O with disconnect, one user-mode caller at a time */
+
+ int errors; /* the last request tanked */
+ int open_count; /* count the number of openers */
+ spinlock_t err_lock; /* lock for errors */
+ struct kref kref;
+} DEVICE_EXTENSION, *PDEVICE_EXTENSION;
#define to_DEVICE_EXTENSION(d) container_of(d, DEVICE_EXTENSION, kref)
-/// Definitions of routimes used between compilation object files
-// in usb1401.c
-extern int Allowi(DEVICE_EXTENSION* pdx);
-extern int SendChars(DEVICE_EXTENSION* pdx);
+/* Definitions of routimes used between compilation object files */
+/* in usb1401.c */
+extern int Allowi(DEVICE_EXTENSION *pdx);
+extern int SendChars(DEVICE_EXTENSION *pdx);
extern void ced_draw_down(DEVICE_EXTENSION *pdx);
extern int ReadWriteMem(DEVICE_EXTENSION *pdx, bool Read, unsigned short wIdent,
- unsigned int dwOffs, unsigned int dwLen);
+ unsigned int dwOffs, unsigned int dwLen);
-// in ced_ioc.c
+/* in ced_ioc.c */
extern int ClearArea(DEVICE_EXTENSION *pdx, int nArea);
-extern int SendString(DEVICE_EXTENSION* pdx, const char __user* pData, unsigned int n);
+extern int SendString(DEVICE_EXTENSION *pdx, const char __user *pData, unsigned int n);
extern int SendChar(DEVICE_EXTENSION *pdx, char c);
-extern int Get1401State(DEVICE_EXTENSION* pdx, __u32* state, __u32* error);
+extern int Get1401State(DEVICE_EXTENSION *pdx, __u32 *state, __u32 *error);
extern int ReadWrite_Cancel(DEVICE_EXTENSION *pdx);
-extern bool Is1401(DEVICE_EXTENSION* pdx);
-extern bool QuickCheck(DEVICE_EXTENSION* pdx, bool bTestBuff, bool bCanReset);
+extern bool Is1401(DEVICE_EXTENSION *pdx);
+extern bool QuickCheck(DEVICE_EXTENSION *pdx, bool bTestBuff, bool bCanReset);
extern int Reset1401(DEVICE_EXTENSION *pdx);
extern int GetChar(DEVICE_EXTENSION *pdx);
-extern int GetString(DEVICE_EXTENSION *pdx, char __user* pUser, int n);
+extern int GetString(DEVICE_EXTENSION *pdx, char __user *pUser, int n);
extern int SetTransfer(DEVICE_EXTENSION *pdx, TRANSFERDESC __user *pTD);
extern int UnsetTransfer(DEVICE_EXTENSION *pdx, int nArea);
-extern int SetEvent(DEVICE_EXTENSION *pdx, TRANSFEREVENT __user*pTE);
+extern int SetEvent(DEVICE_EXTENSION *pdx, TRANSFEREVENT __user *pTE);
extern int Stat1401(DEVICE_EXTENSION *pdx);
extern int LineCount(DEVICE_EXTENSION *pdx);
extern int GetOutBufSpace(DEVICE_EXTENSION *pdx);
@@ -235,15 +232,15 @@ extern int StartSelfTest(DEVICE_EXTENSION *pdx);
extern int CheckSelfTest(DEVICE_EXTENSION *pdx, TGET_SELFTEST __user *pGST);
extern int TypeOf1401(DEVICE_EXTENSION *pdx);
extern int TransferFlags(DEVICE_EXTENSION *pdx);
-extern int DbgPeek(DEVICE_EXTENSION *pdx, TDBGBLOCK __user* pDB);
+extern int DbgPeek(DEVICE_EXTENSION *pdx, TDBGBLOCK __user *pDB);
extern int DbgPoke(DEVICE_EXTENSION *pdx, TDBGBLOCK __user *pDB);
extern int DbgRampData(DEVICE_EXTENSION *pdx, TDBGBLOCK __user *pDB);
extern int DbgRampAddr(DEVICE_EXTENSION *pdx, TDBGBLOCK __user *pDB);
extern int DbgGetData(DEVICE_EXTENSION *pdx, TDBGBLOCK __user *pDB);
extern int DbgStopLoop(DEVICE_EXTENSION *pdx);
extern int SetCircular(DEVICE_EXTENSION *pdx, TRANSFERDESC __user *pTD);
-extern int GetCircBlock(DEVICE_EXTENSION *pdx, TCIRCBLOCK __user* pCB);
-extern int FreeCircBlock(DEVICE_EXTENSION *pdx, TCIRCBLOCK __user* pCB);
+extern int GetCircBlock(DEVICE_EXTENSION *pdx, TCIRCBLOCK __user *pCB);
+extern int FreeCircBlock(DEVICE_EXTENSION *pdx, TCIRCBLOCK __user *pCB);
extern int WaitEvent(DEVICE_EXTENSION *pdx, int nArea, int msTimeOut);
extern int TestEvent(DEVICE_EXTENSION *pdx, int nArea);
#endif
diff --git a/drivers/staging/ced1401/use1401.h b/drivers/staging/ced1401/use1401.h
index 86294e21db0c..b7997c9835c2 100644
--- a/drivers/staging/ced1401/use1401.h
+++ b/drivers/staging/ced1401/use1401.h
@@ -11,16 +11,16 @@
#define __USE1401_H__
#include "machine.h"
-// Some definitions to make things compatible. If you want to use Use1401 directly
-// from a Windows program you should define U14_NOT_DLL, in which case you also
-// MUST make sure that your application startup code calls U14InitLib().
-// DLL_USE1401 is defined when you are building the Use1401 dll, not otherwise.
+/* Some definitions to make things compatible. If you want to use Use1401 directly */
+/* from a Windows program you should define U14_NOT_DLL, in which case you also */
+/* MUST make sure that your application startup code calls U14InitLib(). */
+/* DLL_USE1401 is defined when you are building the Use1401 dll, not otherwise. */
#ifdef _IS_WINDOWS_
#ifndef U14_NOT_DLL
#ifdef DLL_USE1401
-#define U14API(retType) retType DllExport __stdcall
+#define U14API(retType) (retType DllExport __stdcall)
#else
-#define U14API(retType) retType DllImport __stdcall
+#define U14API(retType) (retType DllImport __stdcall)
#endif
#endif
@@ -36,7 +36,7 @@
#ifdef _QT
#ifndef U14_NOT_DLL
#undef U14API
-#define U14API(retType) retType __declspec(dllimport) __stdcall
+#define U14API(retType) (retType __declspec(dllimport) __stdcall)
#endif
#undef U14LONG
#define U14LONG int
@@ -50,20 +50,20 @@
#define U14LONG long
#endif
-/// Error codes: We need them here as user space can see them.
-#define U14ERR_NOERROR 0 // no problems
+/* Error codes: We need them here as user space can see them. */
+#define U14ERR_NOERROR 0 /* no problems */
-/// Device error codes, but these don't need to be extended - a succession is assumed
-#define U14ERR_STD 4 // standard 1401 connected
-#define U14ERR_U1401 5 // u1401 connected
-#define U14ERR_PLUS 6 // 1401 plus connected
-#define U14ERR_POWER 7 // Power1401 connected
-#define U14ERR_U14012 8 // u1401 mkII connected
+/* Device error codes, but these don't need to be extended - a succession is assumed */
+#define U14ERR_STD 4 /* standard 1401 connected */
+#define U14ERR_U1401 5 /* u1401 connected */
+#define U14ERR_PLUS 6 /* 1401 plus connected */
+#define U14ERR_POWER 7 /* Power1401 connected */
+#define U14ERR_U14012 8 /* u1401 mkII connected */
#define U14ERR_POWER2 9
#define U14ERR_U14013 10
#define U14ERR_POWER3 11
-/// NBNB Error numbers need shifting as some linux error codes start at 512
+/* NBNB Error numbers need shifting as some linux error codes start at 512 */
#define U14ERR(n) (n+U14ERRBASE)
#define U14ERR_OFF U14ERR(0) /* 1401 there but switched off */
#define U14ERR_NC U14ERR(-1) /* 1401 not connected */
@@ -113,7 +113,7 @@
#define U14ERR_DRIVCOMMS U14ERR(-110) /* failed talking to driver */
#define U14ERR_OUTOFMEMORY U14ERR(-111) /* needed memory and couldnt get it*/
-/// 1401 type codes.
+/* / 1401 type codes. */
#define U14TYPE1401 0 /* standard 1401 */
#define U14TYPEPLUS 1 /* 1401 plus */
#define U14TYPEU1401 2 /* u1401 */
@@ -124,9 +124,9 @@
#define U14TYPEPOWER3 7 /* power1401-3 */
#define U14TYPEUNKNOWN -1 /* dont know */
-/// Transfer flags to allow driver capabilities to be interrogated
+/* Transfer flags to allow driver capabilities to be interrogated */
-/// Constants for transfer flags
+/* Constants for transfer flags */
#define U14TF_USEDMA 1 /* Transfer flag for use DMA */
#define U14TF_MULTIA 2 /* Transfer flag for multi areas */
#define U14TF_FIFO 4 /* for FIFO interface card */
@@ -138,18 +138,18 @@
#define U14TF_DIAG 256 /* Diagnostics/debug functions */
#define U14TF_CIRC14 512 /* Circular-mode to 1401 */
-/// Definitions of element sizes for DMA transfers - to allow byte-swapping
+/* Definitions of element sizes for DMA transfers - to allow byte-swapping */
#define ESZBYTES 0 /* BYTE element size value */
-#define ESZWORDS 1 /* WORD element size value */
+#define ESZWORDS 1 /* unsigned short element size value */
#define ESZLONGS 2 /* long element size value */
#define ESZUNKNOWN 0 /* unknown element size value */
-/// These define required access types for the debug/diagnostics function
+/* These define required access types for the debug/diagnostics function */
#define BYTE_SIZE 1 /* 8-bit access */
#define WORD_SIZE 2 /* 16-bit access */
#define LONG_SIZE 3 /* 32-bit access */
-/// Stuff used by U14_GetTransfer
+/* Stuff used by U14_GetTransfer */
#define GET_TX_MAXENTRIES 257 /* (max length / page size + 1) */
#ifdef _IS_WINDOWS_
@@ -157,19 +157,19 @@
typedef struct /* used for U14_GetTransfer results */
{ /* Info on a single mapped block */
- U14LONG physical;
- U14LONG size;
+ U14LONG physical;
+ U14LONG size;
} TXENTRY;
typedef struct TGetTxBlock /* used for U14_GetTransfer results */
{ /* matches structure in VXD */
- U14LONG size;
- U14LONG linear;
- short seg;
- short reserved;
- short avail; /* number of available entries */
- short used; /* number of used entries */
- TXENTRY entries[GET_TX_MAXENTRIES]; /* Array of mapped block info */
+ U14LONG size;
+ U14LONG linear;
+ short seg;
+ short reserved;
+ short avail; /* number of available entries */
+ short used; /* number of used entries */
+ TXENTRY entries[GET_TX_MAXENTRIES]; /* Array of mapped block info */
} TGET_TX_BLOCK;
typedef TGET_TX_BLOCK *LPGET_TX_BLOCK;
@@ -180,19 +180,19 @@ typedef TGET_TX_BLOCK *LPGET_TX_BLOCK;
#ifdef LINUX
typedef struct /* used for U14_GetTransfer results */
{ /* Info on a single mapped block */
- long long physical;
- long size;
+ long long physical;
+ long size;
} TXENTRY;
typedef struct TGetTxBlock /* used for U14_GetTransfer results */
{ /* matches structure in VXD */
- long long linear; /* linear address */
- long size; /* total size of the mapped area, holds id when called */
- short seg; /* segment of the address for Win16 */
- short reserved;
- short avail; /* number of available entries */
- short used; /* number of used entries */
- TXENTRY entries[GET_TX_MAXENTRIES]; /* Array of mapped block info */
+ long long linear; /* linear address */
+ long size; /* total size of the mapped area, holds id when called */
+ short seg; /* segment of the address for Win16 */
+ short reserved;
+ short avail; /* number of available entries */
+ short used; /* number of used entries */
+ TXENTRY entries[GET_TX_MAXENTRIES]; /* Array of mapped block info */
} TGET_TX_BLOCK;
#endif
@@ -200,84 +200,84 @@ typedef struct TGetTxBlock /* used for U14_GetTransfer results */
extern "C" {
#endif
-U14API(int) U14WhenToTimeOut(short hand); // when to timeout in ms
-U14API(short) U14PassedTime(int iTime); // non-zero if iTime passed
+U14API(int) U14WhenToTimeOut(short hand); /* when to timeout in ms */
+U14API(short) U14PassedTime(int iTime); /* non-zero if iTime passed */
-U14API(short) U14LastErrCode(short hand);
+U14API(short) U14LastErrCode(short hand);
-U14API(short) U14Open1401(short n1401);
-U14API(short) U14Close1401(short hand);
-U14API(short) U14Reset1401(short hand);
-U14API(short) U14ForceReset(short hand);
-U14API(short) U14TypeOf1401(short hand);
-U14API(short) U14NameOf1401(short hand, char* pBuf, WORD wMax);
+U14API(short) U14Open1401(short n1401);
+U14API(short) U14Close1401(short hand);
+U14API(short) U14Reset1401(short hand);
+U14API(short) U14ForceReset(short hand);
+U14API(short) U14TypeOf1401(short hand);
+U14API(short) U14NameOf1401(short hand, char *pBuf, unsigned short wMax);
-U14API(short) U14Stat1401(short hand);
-U14API(short) U14CharCount(short hand);
-U14API(short) U14LineCount(short hand);
+U14API(short) U14Stat1401(short hand);
+U14API(short) U14CharCount(short hand);
+U14API(short) U14LineCount(short hand);
-U14API(short) U14SendString(short hand, const char* pString);
-U14API(short) U14GetString(short hand, char* pBuffer, WORD wMaxLen);
-U14API(short) U14SendChar(short hand, char cChar);
-U14API(short) U14GetChar(short hand, char* pcChar);
+U14API(short) U14SendString(short hand, const char *pString);
+U14API(short) U14GetString(short hand, char *pBuffer, unsigned short wMaxLen);
+U14API(short) U14SendChar(short hand, char cChar);
+U14API(short) U14GetChar(short hand, char *pcChar);
-U14API(short) U14LdCmd(short hand, const char* command);
-U14API(DWORD) U14Ld(short hand, const char* vl, const char* str);
+U14API(short) U14LdCmd(short hand, const char *command);
+U14API(unsigned int) U14Ld(short hand, const char *vl, const char *str);
-U14API(short) U14SetTransArea(short hand, WORD wArea, void *pvBuff,
- DWORD dwLength, short eSz);
-U14API(short) U14UnSetTransfer(short hand, WORD wArea);
-U14API(short) U14SetTransferEvent(short hand, WORD wArea, BOOL bEvent,
- BOOL bToHost, DWORD dwStart, DWORD dwLength);
-U14API(int) U14TestTransferEvent(short hand, WORD wArea);
-U14API(int) U14WaitTransferEvent(short hand, WORD wArea, int msTimeOut);
-U14API(short) U14GetTransfer(short hand, TGET_TX_BLOCK *pTransBlock);
+U14API(short) U14SetTransArea(short hand, unsigned short wArea, void *pvBuff,
+ unsigned int dwLength, short eSz);
+U14API(short) U14UnSetTransfer(short hand, unsigned short wArea);
+U14API(short) U14SetTransferEvent(short hand, unsigned short wArea, BOOL bEvent,
+ BOOL bToHost, unsigned int dwStart, unsigned int dwLength);
+U14API(int) U14TestTransferEvent(short hand, unsigned short wArea);
+U14API(int) U14WaitTransferEvent(short hand, unsigned short wArea, int msTimeOut);
+U14API(short) U14GetTransfer(short hand, TGET_TX_BLOCK *pTransBlock);
-U14API(short) U14ToHost(short hand, char* pAddrHost,DWORD dwSize,DWORD dw1401,
- short eSz);
-U14API(short) U14To1401(short hand, const char* pAddrHost,DWORD dwSize,DWORD dw1401,
- short eSz);
+U14API(short) U14ToHost(short hand, char *pAddrHost, unsigned int dwSize, unsigned int dw1401,
+ short eSz);
+U14API(short) U14To1401(short hand, const char *pAddrHost, unsigned int dwSize, unsigned int dw1401,
+ short eSz);
-U14API(short) U14SetCircular(short hand, WORD wArea, BOOL bToHost, void *pvBuff,
- DWORD dwLength);
+U14API(short) U14SetCircular(short hand, unsigned short wArea, BOOL bToHost, void *pvBuff,
+ unsigned int dwLength);
-U14API(int) U14GetCircBlk(short hand, WORD wArea, DWORD *pdwOffs);
-U14API(int) U14FreeCircBlk(short hand, WORD wArea, DWORD dwOffs, DWORD dwSize,
- DWORD *pdwOffs);
+U14API(int) U14GetCircBlk(short hand, unsigned short wArea, unsigned int *pdwOffs);
+U14API(int) U14FreeCircBlk(short hand, unsigned short wArea, unsigned int dwOffs, unsigned int dwSize,
+ unsigned int *pdwOffs);
-U14API(short) U14StrToLongs(const char* pszBuff, U14LONG *palNums, short sMaxLongs);
-U14API(short) U14LongsFrom1401(short hand, U14LONG *palBuff, short sMaxLongs);
+U14API(short) U14StrToLongs(const char *pszBuff, U14LONG *palNums, short sMaxLongs);
+U14API(short) U14LongsFrom1401(short hand, U14LONG *palBuff, short sMaxLongs);
U14API(void) U14SetTimeout(short hand, int lTimeout);
U14API(int) U14GetTimeout(short hand);
-U14API(short) U14OutBufSpace(short hand);
+U14API(short) U14OutBufSpace(short hand);
U14API(int) U14BaseAddr1401(short hand);
U14API(int) U14DriverVersion(short hand);
U14API(int) U14DriverType(short hand);
-U14API(short) U14DriverName(short hand, char* pBuf, WORD wMax);
-U14API(short) U14GetUserMemorySize(short hand, DWORD *pMemorySize);
-U14API(short) U14KillIO1401(short hand);
-
-U14API(short) U14BlkTransState(short hand);
-U14API(short) U14StateOf1401(short hand);
-
-U14API(short) U14Grab1401(short hand);
-U14API(short) U14Free1401(short hand);
-U14API(short) U14Peek1401(short hand, DWORD dwAddr, int nSize, int nRepeats);
-U14API(short) U14Poke1401(short hand, DWORD dwAddr, DWORD dwValue, int nSize, int nRepeats);
-U14API(short) U14Ramp1401(short hand, DWORD dwAddr, DWORD dwDef, DWORD dwEnable, int nSize, int nRepeats);
-U14API(short) U14RampAddr(short hand, DWORD dwDef, DWORD dwEnable, int nSize, int nRepeats);
-U14API(short) U14StopDebugLoop(short hand);
-U14API(short) U14GetDebugData(short hand, U14LONG *plValue);
-
-U14API(short) U14StartSelfTest(short hand);
-U14API(short) U14CheckSelfTest(short hand, U14LONG *pData);
-U14API(short) U14TransferFlags(short hand);
-U14API(void) U14GetErrorString(short nErr, char* pStr, WORD wMax);
+U14API(short) U14DriverName(short hand, char *pBuf, unsigned short wMax);
+U14API(short) U14GetUserMemorySize(short hand, unsigned int *pMemorySize);
+U14API(short) U14KillIO1401(short hand);
+
+U14API(short) U14BlkTransState(short hand);
+U14API(short) U14StateOf1401(short hand);
+
+U14API(short) U14Grab1401(short hand);
+U14API(short) U14Free1401(short hand);
+U14API(short) U14Peek1401(short hand, unsigned int dwAddr, int nSize, int nRepeats);
+U14API(short) U14Poke1401(short hand, unsigned int dwAddr, unsigned int dwValue, int nSize, int nRepeats);
+U14API(short) U14Ramp1401(short hand, unsigned int dwAddr, unsigned int dwDef, unsigned int dwEnable, int nSize, int nRepeats);
+U14API(short) U14RampAddr(short hand, unsigned int dwDef, unsigned int dwEnable, int nSize, int nRepeats);
+U14API(short) U14StopDebugLoop(short hand);
+U14API(short) U14GetDebugData(short hand, U14LONG *plValue);
+
+U14API(short) U14StartSelfTest(short hand);
+U14API(short) U14CheckSelfTest(short hand, U14LONG *pData);
+U14API(short) U14TransferFlags(short hand);
+U14API(void) U14GetErrorString(short nErr, char *pStr, unsigned short wMax);
U14API(int) U14MonitorRev(short hand);
U14API(void) U14CloseAll(void);
-U14API(short) U14WorkingSet(DWORD dwMinKb, DWORD dwMaxKb);
+U14API(short) U14WorkingSet(unsigned int dwMinKb, unsigned int dwMaxKb);
U14API(int) U14InitLib(void);
#ifdef __cplusplus
@@ -285,3 +285,4 @@ U14API(int) U14InitLib(void);
#endif
#endif /* End of ifndef __USE1401_H__ */
+
diff --git a/drivers/staging/ced1401/use14_ioc.h b/drivers/staging/ced1401/use14_ioc.h
index 15ca63888380..97d7913840dc 100644
--- a/drivers/staging/ced1401/use14_ioc.h
+++ b/drivers/staging/ced1401/use14_ioc.h
@@ -19,283 +19,282 @@
** The IOCTL function codes from 0x80 to 0xFF are for developer use.
*/
#define FILE_DEVICE_CED1401 0x8001
-#define FNNUMBASE 0x800
-
-#define U14_OPEN1401 CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_CLOSE1401 CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+1, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_SENDSTRING CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+2, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_RESET1401 CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+3, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_GETCHAR CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+4, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_SENDCHAR CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+5, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_STAT1401 CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+6, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_LINECOUNT CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+7, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_GETSTRING CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+8, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_REGCALLBACK CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+9, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_GETMONITORBUF CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+10, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_SETTRANSFER CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+11, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_UNSETTRANSFER CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+12, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_SETTRANSEVENT CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+13, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_GETOUTBUFSPACE CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+14, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_GETBASEADDRESS CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+15, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_GETDRIVERREVISION CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+16, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_GETTRANSFER CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+17, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_KILLIO1401 CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+18, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_BLKTRANSSTATE CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+19, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_BYTECOUNT CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+20, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_ZEROBLOCKCOUNT CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+21, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_STOPCIRCULAR CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+22, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_STATEOF1401 CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+23, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_REGISTERS1401 CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+24, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_GRAB1401 CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+25, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_FREE1401 CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+26, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_STEP1401 CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+27, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_SET1401REGISTERS CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+28, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_STEPTILL1401 CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+29, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_SETORIN CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+30, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_STARTSELFTEST CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+31, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_CHECKSELFTEST CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+32, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_TYPEOF1401 CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+33, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_TRANSFERFLAGS CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+34, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_DBGPEEK CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+35, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_DBGPOKE CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+36, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_DBGRAMPDATA CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+37, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_DBGRAMPADDR CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+38, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_DBGGETDATA CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+39, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_DBGSTOPLOOP CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+40, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_FULLRESET CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+41, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_SETCIRCULAR CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+42, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_GETCIRCBLK CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+43, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-#define U14_FREECIRCBLK CTL_CODE( FILE_DEVICE_CED1401, \
- FNNUMBASE+44, \
- METHOD_BUFFERED, \
- FILE_ANY_ACCESS)
-
-//--------------- Structures that are shared with the driver -------------
+ FNNUMBASE 0x800
+
+#define U14_OPEN1401 CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_CLOSE1401 CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+1, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_SENDSTRING CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+2, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_RESET1401 CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+3, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_GETCHAR CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+4, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_SENDCHAR CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+5, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_STAT1401 CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+6, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_LINECOUNT CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+7, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_GETSTRING CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+8, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_REGCALLBACK CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+9, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_GETMONITORBUF CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+10, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_SETTRANSFER CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+11, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_UNSETTRANSFER CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+12, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_SETTRANSEVENT CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+13, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_GETOUTBUFSPACE CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+14, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_GETBASEADDRESS CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+15, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_GETDRIVERREVISION CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+16, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_GETTRANSFER CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+17, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_KILLIO1401 CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+18, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_BLKTRANSSTATE CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+19, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_BYTECOUNT CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+20, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_ZEROBLOCKCOUNT CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+21, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_STOPCIRCULAR CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+22, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_STATEOF1401 CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+23, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_REGISTERS1401 CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+24, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_GRAB1401 CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+25, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_FREE1401 CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+26, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_STEP1401 CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+27, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_SET1401REGISTERS CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+28, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_STEPTILL1401 CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+29, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_SETORIN CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+30, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_STARTSELFTEST CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+31, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_CHECKSELFTEST CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+32, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_TYPEOF1401 CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+33, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_TRANSFERFLAGS CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+34, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_DBGPEEK CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+35, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_DBGPOKE CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+36, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_DBGRAMPDATA CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+37, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_DBGRAMPADDR CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+38, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_DBGGETDATA CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+39, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_DBGSTOPLOOP CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+40, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_FULLRESET CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+41, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_SETCIRCULAR CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+42, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_GETCIRCBLK CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+43, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+#define U14_FREECIRCBLK CTL_CODE(FILE_DEVICE_CED1401, \
+ FNNUMBASE+44, \
+ METHOD_BUFFERED, \
+ FILE_ANY_ACCESS)
+
+/*--------------- Structures that are shared with the driver ------------- */
#pragma pack(1)
typedef struct /* used for get/set standard 1401 registers */
{
- short sPC;
- char A;
- char X;
- char Y;
- char stat;
- char rubbish;
+ short sPC;
+ char A;
+ char X;
+ char Y;
+ char stat;
+ char rubbish;
} T1401REGISTERS;
typedef union /* to communicate with 1401 driver status & control funcs */
{
- char chrs[22];
- short ints[11];
- long longs[5];
- T1401REGISTERS registers;
+ char chrs[22];
+ short ints[11];
+ long longs[5];
+ T1401REGISTERS registers;
} TCSBLOCK;
typedef TCSBLOCK* LPTCSBLOCK;
-typedef struct paramBlk
-{
- short sState;
- TCSBLOCK csBlock;
+typedef struct paramBlk {
+ short sState;
+ TCSBLOCK csBlock;
} PARAMBLK;
typedef PARAMBLK* PPARAMBLK;
typedef struct TransferDesc /* Structure and type for SetTransArea */
{
- WORD wArea; /* number of transfer area to set up */
- void FAR * lpvBuff; /* address of transfer area */
- DWORD dwLength; /* length of area to set up */
- short eSize; /* size to move (for swapping on MAC) */
+ unsigned short wArea; /* number of transfer area to set up */
+ void FAR *lpvBuff; /* address of transfer area */
+ unsigned int dwLength; /* length of area to set up */
+ short eSize; /* size to move (for swapping on MAC) */
} TRANSFERDESC;
-typedef TRANSFERDESC FAR * LPTRANSFERDESC;
+typedef TRANSFERDESC FAR *LPTRANSFERDESC;
/* This is the structure used to set up a transfer area */
typedef struct VXTransferDesc /* use1401.c and use1432x.x use only */
{
- WORD wArea; /* number of transfer area to set up */
- WORD wAddrSel; /* 16 bit selector for area */
- DWORD dwAddrOfs; /* 32 bit offset for area start */
- DWORD dwLength; /* length of area to set up */
+ unsigned short wArea; /* number of transfer area to set up */
+ unsigned short wAddrSel; /* 16 bit selector for area */
+ unsigned int dwAddrOfs; /* 32 bit offset for area start */
+ unsigned int dwLength; /* length of area to set up */
} VXTRANSFERDESC;
#pragma pack()
-#endif \ No newline at end of file
+#endif
diff --git a/drivers/staging/ced1401/userspace/use1401.c b/drivers/staging/ced1401/userspace/use1401.c
index 38e7c1c82d43..c9bc2ebfef1a 100644
--- a/drivers/staging/ced1401/userspace/use1401.c
+++ b/drivers/staging/ced1401/userspace/use1401.c
@@ -36,7 +36,7 @@
** Under Windows 9x and NT, Use1401 uses DeviceIoControl to get access to
** the 1401 driver. This has parameters for the device handle, the function
** code, an input pointer and byte count, an output pointer and byte count
-** and a pointer to a DWORD to hold the output byte count. Note that input
+** and a pointer to a unsigned int to hold the output byte count. Note that input
** and output are from the point-of-view of the driver, so the output stuff
** is used to read values from the 1401, not send to the 1401. The use of
** these parameters varies with the function in use and the operating
@@ -250,7 +250,7 @@ static int iAttached = 0; // counts process attaches so ca
static HANDLE aHand1401[MAX1401] = {0}; // handles for 1401s
static HANDLE aXferEvent[MAX1401] = {0}; // transfer events for the 1401s
static LPVOID apAreas[MAX1401][MAX_TRANSAREAS]; // Locked areas
-static DWORD auAreas[MAX1401][MAX_TRANSAREAS]; // Size of locked areas
+static unsigned int auAreas[MAX1401][MAX_TRANSAREAS]; // Size of locked areas
static BOOL bWindows9x = FALSE; // if we are Windows 95 or better
#ifdef _WIN64
#define USE_NT_DIOC(ind) TRUE
@@ -276,8 +276,8 @@ static int aHand1401[MAX1401] = {0}; // handles for 1401s
typedef struct CmdHead // defines header block on command
{ // for PC commands
char acBasic[5]; // BASIC information - needed to align things
- WORD wBasicSz; // size as seen by BASIC
- WORD wCmdSize; // size of the following info
+ unsigned short wBasicSz; // size as seen by BASIC
+ unsigned short wCmdSize; // size of the following info
} __packed CMDHEAD;
#pragma pack() // back to normal
@@ -311,7 +311,7 @@ static short CheckHandle(short h)
****************************************************************************/
static short U14Status1401(short sHand, LONG lCode, TCSBLOCK* pBlk)
{
- DWORD dwBytes = 0;
+ unsigned int dwBytes = 0;
if ((sHand < 0) || (sHand >= MAX1401)) /* Check parameters */
return U14ERR_BADHAND;
@@ -345,7 +345,7 @@ static short U14Status1401(short sHand, LONG lCode, TCSBLOCK* pBlk)
****************************************************************************/
static short U14Control1401(short sHand, LONG lCode, TCSBLOCK* pBlk)
{
- DWORD dwBytes = 0;
+ unsigned int dwBytes = 0;
if ((sHand < 0) || (sHand >= MAX1401)) /* Check parameters */
return U14ERR_BADHAND;
@@ -455,7 +455,7 @@ static void TranslateString(char* pStr)
****************************************************************************/
U14API(short) U14StrToLongs(const char* pszBuff, U14LONG *palNums, short sMaxLongs)
{
- WORD wChInd = 0; // index into source
+ unsigned short wChInd = 0; // index into source
short sLgInd = 0; // index into result longs
while (pszBuff[wChInd] && // until we get to end of string...
@@ -681,7 +681,7 @@ U14API(int) U14DriverType(short hand)
** U14DriverName
** Returns the driver type as 3 character (ISA, PCI, USB or HSS))
****************************************************************************/
-U14API(short) U14DriverName(short hand, char* pBuf, WORD wMax)
+U14API(short) U14DriverName(short hand, char* pBuf, unsigned short wMax)
{
char* pName;
*pBuf = 0; // Start off with a blank string
@@ -779,7 +779,7 @@ U14API(short) U14Free1401(short hand)
** is called. After the peek is done, use U14GetDebugData to retrieve
** the results of the peek.
****************************************************************************/
-U14API(short) U14Peek1401(short hand, DWORD dwAddr, int nSize, int nRepeats)
+U14API(short) U14Peek1401(short hand, unsigned int dwAddr, int nSize, int nRepeats)
{
short sErr = CheckHandle(hand);
if (sErr == U14ERR_NOERROR)
@@ -813,7 +813,7 @@ U14API(short) U14Peek1401(short hand, DWORD dwAddr, int nSize, int nRepeats)
** If lRepeats is zero, the loop will continue until U14StopDebugLoop
** is called.
****************************************************************************/
-U14API(short) U14Poke1401(short hand, DWORD dwAddr, DWORD dwValue,
+U14API(short) U14Poke1401(short hand, unsigned int dwAddr, unsigned int dwValue,
int nSize, int nRepeats)
{
short sErr = CheckHandle(hand);
@@ -849,7 +849,7 @@ U14API(short) U14Poke1401(short hand, DWORD dwAddr, DWORD dwValue,
** DESCRIPTION Cause the 1401 to loop, writing a ramp to a location.
** If lRepeats is zero, the loop will continue until U14StopDebugLoop.
****************************************************************************/
-U14API(short) U14Ramp1401(short hand, DWORD dwAddr, DWORD dwDef, DWORD dwEnable,
+U14API(short) U14Ramp1401(short hand, unsigned int dwAddr, unsigned int dwDef, unsigned int dwEnable,
int nSize, int nRepeats)
{
short sErr = CheckHandle(hand);
@@ -887,7 +887,7 @@ U14API(short) U14Ramp1401(short hand, DWORD dwAddr, DWORD dwDef, DWORD dwEnable,
** DESCRIPTION Cause the 1401 to loop, reading from a ramping location.
** If lRepeats is zero, the loop will continue until U14StopDebugLoop
****************************************************************************/
-U14API(short) U14RampAddr(short hand, DWORD dwDef, DWORD dwEnable,
+U14API(short) U14RampAddr(short hand, unsigned int dwDef, unsigned int dwEnable,
int nSize, int nRepeats)
{
short sErr = CheckHandle(hand);
@@ -1024,7 +1024,7 @@ U14API(short) U14CheckSelfTest(short hand, U14LONG *pData)
/****************************************************************************
** U14GetUserMemorySize
****************************************************************************/
-U14API(short) U14GetUserMemorySize(short hand, DWORD *pMemorySize)
+U14API(short) U14GetUserMemorySize(short hand, unsigned int *pMemorySize)
{
// The original 1401 used a different command for getting the size
short sErr = U14SendString(hand, (asType1401[hand] == U14TYPE1401) ? "MEMTOP;" : "MEMTOP,?;");
@@ -1061,7 +1061,7 @@ U14API(short) U14TypeOf1401(short hand)
** U14NameOf1401
** Returns the type of the 1401 as a string, blank if unknown
****************************************************************************/
-U14API(short) U14NameOf1401(short hand, char* pBuf, WORD wMax)
+U14API(short) U14NameOf1401(short hand, char* pBuf, unsigned short wMax)
{
short sErr = CheckHandle(hand);
if (sErr == U14ERR_NOERROR)
@@ -1207,7 +1207,7 @@ static short U14TryToOpen(int n1401, long* plRetVal, short* psHandle)
{
short sErr = U14ERR_NOERROR;
HANDLE hDevice = INVALID_HANDLE_VALUE;
- DWORD dwErr = 0;
+ unsigned int dwErr = 0;
int nFirst, nLast, nDev = 0; /* Used for the search for a 1401 */
BOOL bOldName = FALSE; /* start by looking for a modern driver */
@@ -1262,7 +1262,7 @@ static short U14TryToOpen(int n1401, long* plRetVal, short* psHandle)
}
else
{
- DWORD dwe = GetLastError(); /* Get error code otherwise */
+ unsigned int dwe = GetLastError(); /* Get error code otherwise */
if ((dwe != ERROR_FILE_NOT_FOUND) || (dwErr == 0))
dwErr = dwe; /* Ignore repeats of 'not found' */
}
@@ -1454,7 +1454,7 @@ U14API(short) U14Close1401(short hand)
U14Reset1401(hand); // in case an active transfer running
for (j = 0; j < MAX_TRANSAREAS; ++j) // Locate locked areas
if (iAreaMask & (1 << j)) // And kill off any transfers
- U14UnSetTransfer(hand, (WORD)j);
+ U14UnSetTransfer(hand, (unsigned short)j);
}
#ifdef _IS_WINDOWS_
@@ -1581,7 +1581,7 @@ U14API(short) U14SendString(short hand, const char* pString)
if (bSpaceToSend)
{
PARAMBLK rData;
- DWORD dwBytes;
+ unsigned int dwBytes;
char tstr[MAXSTRLEN+5]; /* Buffer for chars */
if ((hand < 0) || (hand >= MAX1401))
@@ -1592,18 +1592,18 @@ U14API(short) U14SendString(short hand, const char* pString)
#ifndef _WIN64
if (!USE_NT_DIOC(hand)) /* Using WIN 95 driver access? */
{
- int iOK = DeviceIoControl(aHand1401[hand], (DWORD)U14_SENDSTRING,
+ int iOK = DeviceIoControl(aHand1401[hand], (unsigned int)U14_SENDSTRING,
NULL, 0, tstr, nChars,
&dwBytes, NULL);
if (iOK)
- sErr = (dwBytes >= (DWORD)nChars) ? U14ERR_NOERROR : U14ERR_DRIVCOMMS;
+ sErr = (dwBytes >= (unsigned int)nChars) ? U14ERR_NOERROR : U14ERR_DRIVCOMMS;
else
sErr = (short)GetLastError();
}
else
#endif
{
- int iOK = DeviceIoControl(aHand1401[hand],(DWORD)U14_SENDSTRING,
+ int iOK = DeviceIoControl(aHand1401[hand],(unsigned int)U14_SENDSTRING,
tstr, nChars,
&rData,sizeof(PARAMBLK),&dwBytes,NULL);
if (iOK && (dwBytes >= sizeof(PARAMBLK)))
@@ -1697,7 +1697,7 @@ U14API(short) U14SendChar(short hand, char cChar)
** error code. Any error from the device causes us to set up for
** a full reset.
****************************************************************************/
-U14API(short) U14GetString(short hand, char* pBuffer, WORD wMaxLen)
+U14API(short) U14GetString(short hand, char* pBuffer, unsigned short wMaxLen)
{
short sErr = CheckHandle(hand);
if (sErr != U14ERR_NOERROR) // If an error...
@@ -1726,8 +1726,8 @@ U14API(short) U14GetString(short hand, char* pBuffer, WORD wMaxLen)
{
if (asLastRetCode[hand] == U14ERR_NOERROR) /* all ok so far */
{
- DWORD dwBytes = 0;
- *((WORD *)pBuffer) = wMaxLen; /* set up length */
+ unsigned int dwBytes = 0;
+ *((unsigned short *)pBuffer) = wMaxLen; /* set up length */
#ifndef _WIN64
if (!USE_NT_DIOC(hand)) /* Win 95 DIOC here ? */
{
@@ -1737,9 +1737,9 @@ U14API(short) U14GetString(short hand, char* pBuffer, WORD wMaxLen)
if (wMaxLen > MAXSTRLEN) /* Truncate length */
wMaxLen = MAXSTRLEN;
- *((WORD *)tstr) = wMaxLen; /* set len */
+ *((unsigned short *)tstr) = wMaxLen; /* set len */
- iOK = DeviceIoControl(aHand1401[hand],(DWORD)U14_GETSTRING,
+ iOK = DeviceIoControl(aHand1401[hand],(unsigned int)U14_GETSTRING,
NULL, 0, tstr, wMaxLen+sizeof(short),
&dwBytes, NULL);
if (iOK) /* Device IO control OK ? */
@@ -1768,7 +1768,7 @@ U14API(short) U14GetString(short hand, char* pBuffer, WORD wMaxLen)
char* pMem = (char*)GlobalLock(hMem);
if (pMem)
{
- int iOK = DeviceIoControl(aHand1401[hand],(DWORD)U14_GETSTRING,
+ int iOK = DeviceIoControl(aHand1401[hand],(unsigned int)U14_GETSTRING,
NULL, 0, pMem, wMaxLen+sizeof(short),
&dwBytes, NULL);
if (iOK) /* Device IO control OK ? */
@@ -1946,7 +1946,7 @@ U14API(short) U14LineCount(short hand)
** other functions after getting an error and before using
** this function.
****************************************************************************/
-U14API(void) U14GetErrorString(short nErr, char* pStr, WORD wMax)
+U14API(void) U14GetErrorString(short nErr, char* pStr, unsigned short wMax)
{
char wstr[150];
@@ -2105,7 +2105,7 @@ U14API(void) U14GetErrorString(short nErr, char* pStr, WORD wMax)
break;
}
- if ((WORD)strlen(wstr) >= wMax-1) /* Check for string being too long */
+ if ((unsigned short)strlen(wstr) >= wMax-1) /* Check for string being too long */
wstr[wMax-1] = 0; /* and truncate it if so */
strcpy(pStr, wstr); /* Return the error string */
}
@@ -2120,8 +2120,8 @@ U14API(short) U14GetTransfer(short hand, TGET_TX_BLOCK *pTransBlock)
#ifdef _IS_WINDOWS_
if (sErr == U14ERR_NOERROR)
{
- DWORD dwBytes = 0;
- BOOL bOK = DeviceIoControl(aHand1401[hand], (DWORD)U14_GETTRANSFER, NULL, 0, pTransBlock,
+ unsigned int dwBytes = 0;
+ BOOL bOK = DeviceIoControl(aHand1401[hand], (unsigned int)U14_GETTRANSFER, NULL, 0, pTransBlock,
sizeof(TGET_TX_BLOCK), &dwBytes, NULL);
if (bOK && (dwBytes >= sizeof(TGET_TX_BLOCK)))
@@ -2145,12 +2145,12 @@ U14API(short) U14GetTransfer(short hand, TGET_TX_BLOCK *pTransBlock)
// 1 unable to access process (insufficient rights?)
// 2 unable to read process working set
// 3 unable to set process working set - bad parameters?
-U14API(short) U14WorkingSet(DWORD dwMinKb, DWORD dwMaxKb)
+U14API(short) U14WorkingSet(unsigned int dwMinKb, unsigned int dwMaxKb)
{
#ifdef _IS_WINDOWS_
short sRetVal = 0; // 0 means all is OK
HANDLE hProcess;
- DWORD dwVer = GetVersion();
+ unsigned int dwVer = GetVersion();
if (dwVer & 0x80000000) // is this not NT?
return 0; // then give up right now
@@ -2164,8 +2164,8 @@ U14API(short) U14WorkingSet(DWORD dwMinKb, DWORD dwMaxKb)
SIZE_T dwMinSize,dwMaxSize;
if (GetProcessWorkingSetSize(hProcess, &dwMinSize, &dwMaxSize))
{
- DWORD dwMin = dwMinKb << 10; // convert from kb to bytes
- DWORD dwMax = dwMaxKb << 10;
+ unsigned int dwMin = dwMinKb << 10; // convert from kb to bytes
+ unsigned int dwMax = dwMaxKb << 10;
// if we get here, we have managed to read the current size
if (dwMin > dwMinSize) // need to change sizes?
@@ -2200,7 +2200,7 @@ U14API(short) U14WorkingSet(DWORD dwMinKb, DWORD dwMaxKb)
** U14UnSetTransfer Cancels a transfer area
** wArea The index of a block previously used in by SetTransfer
*****************************************************************************/
-U14API(short) U14UnSetTransfer(short hand, WORD wArea)
+U14API(short) U14UnSetTransfer(short hand, unsigned short wArea)
{
short sErr = CheckHandle(hand);
#ifdef _IS_WINDOWS_
@@ -2223,13 +2223,13 @@ U14API(short) U14UnSetTransfer(short hand, WORD wArea)
/****************************************************************************
** U14SetTransArea Sets an area up to be used for transfers
-** WORD wArea The area number to set up
+** unsigned short wArea The area number to set up
** void *pvBuff The address of the buffer for the data.
-** DWORD dwLength The length of the buffer for the data
+** unsigned int dwLength The length of the buffer for the data
** short eSz The element size (used for byte swapping on the Mac)
****************************************************************************/
-U14API(short) U14SetTransArea(short hand, WORD wArea, void *pvBuff,
- DWORD dwLength, short eSz)
+U14API(short) U14SetTransArea(short hand, unsigned short wArea, void *pvBuff,
+ unsigned int dwLength, short eSz)
{
TRANSFERDESC td;
short sErr = CheckHandle(hand);
@@ -2254,7 +2254,7 @@ U14API(short) U14SetTransArea(short hand, WORD wArea, void *pvBuff,
#ifndef _WIN64
if (!USE_NT_DIOC(hand)) /* Use Win 9x DIOC? */
{
- DWORD dwBytes;
+ unsigned int dwBytes;
VXTRANSFERDESC vxDesc; /* Structure to pass to VXD */
vxDesc.wArea = wArea; /* Copy across simple params */
vxDesc.dwLength = dwLength;
@@ -2264,10 +2264,10 @@ U14API(short) U14SetTransArea(short hand, WORD wArea, void *pvBuff,
sErr = U14ERR_DRIVTOOOLD;
else
{
- vxDesc.dwAddrOfs = (DWORD)pvBuff; /* 32 bit offset */
+ vxDesc.dwAddrOfs = (unsigned int)pvBuff; /* 32 bit offset */
vxDesc.wAddrSel = 0;
- if (DeviceIoControl(aHand1401[hand], (DWORD)U14_SETTRANSFER,
+ if (DeviceIoControl(aHand1401[hand], (unsigned int)U14_SETTRANSFER,
pvBuff,dwLength, /* Will translate pointer */
&vxDesc,sizeof(VXTRANSFERDESC),
&dwBytes,NULL))
@@ -2285,13 +2285,13 @@ U14API(short) U14SetTransArea(short hand, WORD wArea, void *pvBuff,
#endif
{
PARAMBLK rWork;
- DWORD dwBytes;
+ unsigned int dwBytes;
td.wArea = wArea; /* Pure NT - put data into struct */
td.lpvBuff = pvBuff;
td.dwLength = dwLength;
td.eSize = 0; // Dummy element size
- if (DeviceIoControl(aHand1401[hand],(DWORD)U14_SETTRANSFER,
+ if (DeviceIoControl(aHand1401[hand],(unsigned int)U14_SETTRANSFER,
&td,sizeof(TRANSFERDESC),
&rWork,sizeof(PARAMBLK),&dwBytes,NULL))
{
@@ -2344,8 +2344,8 @@ U14API(short) U14SetTransArea(short hand, WORD wArea, void *pvBuff,
** Returns 1 if an event handle exists, 0 if all OK and no event handle or
** a negative code for an error.
****************************************************************************/
-U14API(short) U14SetTransferEvent(short hand, WORD wArea, BOOL bEvent,
- BOOL bToHost, DWORD dwStart, DWORD dwLength)
+U14API(short) U14SetTransferEvent(short hand, unsigned short wArea, BOOL bEvent,
+ BOOL bToHost, unsigned int dwStart, unsigned int dwLength)
{
#ifdef _IS_WINDOWS_
TCSBLOCK csBlock;
@@ -2416,7 +2416,7 @@ U14API(short) U14SetTransferEvent(short hand, WORD wArea, BOOL bEvent,
** Would a U14WaitTransferEvent() call return immediately? return 1 if so,
** 0 if not or a negative code if a problem.
****************************************************************************/
-U14API(int) U14TestTransferEvent(short hand, WORD wArea)
+U14API(int) U14TestTransferEvent(short hand, unsigned short wArea)
{
#ifdef _IS_WINDOWS_
int iErr = CheckHandle(hand);
@@ -2441,7 +2441,7 @@ U14API(int) U14TestTransferEvent(short hand, WORD wArea)
** Returns If no event handle then return immediately. Else return 1 if
** timed out or 0=event, and a negative code if a problem.
****************************************************************************/
-U14API(int) U14WaitTransferEvent(short hand, WORD wArea, int msTimeOut)
+U14API(int) U14WaitTransferEvent(short hand, unsigned short wArea, int msTimeOut)
{
#ifdef _IS_WINDOWS_
int iErr = CheckHandle(hand);
@@ -2466,13 +2466,13 @@ U14API(int) U14WaitTransferEvent(short hand, WORD wArea, int msTimeOut)
/****************************************************************************
** U14SetCircular Sets an area up for circular DMA transfers
-** WORD wArea The area number to set up
+** unsigned short wArea The area number to set up
** BOOL bToHost Sets the direction of data transfer
** void *pvBuff The address of the buffer for the data
-** DWORD dwLength The length of the buffer for the data
+** unsigned int dwLength The length of the buffer for the data
****************************************************************************/
-U14API(short) U14SetCircular(short hand, WORD wArea, BOOL bToHost,
- void *pvBuff, DWORD dwLength)
+U14API(short) U14SetCircular(short hand, unsigned short wArea, BOOL bToHost,
+ void *pvBuff, unsigned int dwLength)
{
short sErr = CheckHandle(hand);
if (sErr != U14ERR_NOERROR)
@@ -2495,14 +2495,14 @@ U14API(short) U14SetCircular(short hand, WORD wArea, BOOL bToHost,
else
{
PARAMBLK rWork;
- DWORD dwBytes;
+ unsigned int dwBytes;
TRANSFERDESC txDesc;
txDesc.wArea = wArea; /* Pure NT - put data into struct */
txDesc.lpvBuff = pvBuff;
txDesc.dwLength = dwLength;
txDesc.eSize = (short)bToHost; /* Use this for direction flag */
- if (DeviceIoControl(aHand1401[hand],(DWORD)U14_SETCIRCULAR,
+ if (DeviceIoControl(aHand1401[hand],(unsigned int)U14_SETCIRCULAR,
&txDesc, sizeof(TRANSFERDESC),
&rWork, sizeof(PARAMBLK),&dwBytes,NULL))
{
@@ -2542,7 +2542,7 @@ U14API(short) U14SetCircular(short hand, WORD wArea, BOOL bToHost,
** Function GetCircBlk returns the size (& start offset) of the next
** available block of circular data.
****************************************************************************/
-U14API(int) U14GetCircBlk(short hand, WORD wArea, DWORD *pdwOffs)
+U14API(int) U14GetCircBlk(short hand, unsigned short wArea, unsigned int *pdwOffs)
{
int lErr = CheckHandle(hand);
if (lErr != U14ERR_NOERROR)
@@ -2555,10 +2555,10 @@ U14API(int) U14GetCircBlk(short hand, WORD wArea, DWORD *pdwOffs)
#ifdef _IS_WINDOWS_
PARAMBLK rWork;
TCSBLOCK csBlock;
- DWORD dwBytes;
+ unsigned int dwBytes;
csBlock.longs[0] = wArea; // Area number into control block
rWork.sState = U14ERR_DRIVCOMMS;
- if (DeviceIoControl(aHand1401[hand], (DWORD)U14_GETCIRCBLK, &csBlock, sizeof(TCSBLOCK), &rWork, sizeof(PARAMBLK), &dwBytes, NULL) &&
+ if (DeviceIoControl(aHand1401[hand], (unsigned int)U14_GETCIRCBLK, &csBlock, sizeof(TCSBLOCK), &rWork, sizeof(PARAMBLK), &dwBytes, NULL) &&
(dwBytes >= sizeof(PARAMBLK)))
lErr = rWork.sState;
else
@@ -2591,8 +2591,8 @@ U14API(int) U14GetCircBlk(short hand, WORD wArea, DWORD *pdwOffs)
** resuse for circular transfers and returns the size (& start
** offset) of the next available block of circular data.
****************************************************************************/
-U14API(int) U14FreeCircBlk(short hand, WORD wArea, DWORD dwOffs, DWORD dwSize,
- DWORD *pdwOffs)
+U14API(int) U14FreeCircBlk(short hand, unsigned short wArea, unsigned int dwOffs, unsigned int dwSize,
+ unsigned int *pdwOffs)
{
int lErr = CheckHandle(hand);
if (lErr != U14ERR_NOERROR)
@@ -2603,12 +2603,12 @@ U14API(int) U14FreeCircBlk(short hand, WORD wArea, DWORD dwOffs, DWORD dwSize,
#ifdef _IS_WINDOWS_
PARAMBLK rWork;
TCSBLOCK csBlock;
- DWORD dwBytes;
+ unsigned int dwBytes;
csBlock.longs[0] = wArea; // Area number into control block
csBlock.longs[1] = dwOffs;
csBlock.longs[2] = dwSize;
rWork.sState = U14ERR_DRIVCOMMS;
- if (DeviceIoControl(aHand1401[hand], (DWORD)U14_FREECIRCBLK, &csBlock, sizeof(TCSBLOCK),
+ if (DeviceIoControl(aHand1401[hand], (unsigned int)U14_FREECIRCBLK, &csBlock, sizeof(TCSBLOCK),
&rWork, sizeof(PARAMBLK), &dwBytes, NULL) &&
(dwBytes >= sizeof(PARAMBLK)))
lErr = rWork.sState;
@@ -2647,7 +2647,7 @@ U14API(int) U14FreeCircBlk(short hand, WORD wArea, DWORD dwOffs, DWORD dwSize,
** which it should be to get a pointer
*****************************************************************************/
static short Transfer(short hand, BOOL bTo1401, char* pData,
- DWORD dwSize, DWORD dw1401, short eSz)
+ unsigned int dwSize, unsigned int dw1401, short eSz)
{
char strcopy[MAXSTRLEN+1]; // to hold copy of work string
short sResult = U14SetTransArea(hand, 0, (void *)pData, dwSize, eSz);
@@ -2670,8 +2670,8 @@ static short Transfer(short hand, BOOL bTo1401, char* pData,
/****************************************************************************
** Function ToHost transfers data into the host from the 1401
****************************************************************************/
-U14API(short) U14ToHost(short hand, char* pAddrHost, DWORD dwSize,
- DWORD dw1401, short eSz)
+U14API(short) U14ToHost(short hand, char* pAddrHost, unsigned int dwSize,
+ unsigned int dw1401, short eSz)
{
short sErr = CheckHandle(hand);
if ((sErr == U14ERR_NOERROR) && dwSize) // TOHOST is a constant
@@ -2682,8 +2682,8 @@ U14API(short) U14ToHost(short hand, char* pAddrHost, DWORD dwSize,
/****************************************************************************
** Function To1401 transfers data into the 1401 from the host
****************************************************************************/
-U14API(short) U14To1401(short hand, const char* pAddrHost,DWORD dwSize,
- DWORD dw1401, short eSz)
+U14API(short) U14To1401(short hand, const char* pAddrHost,unsigned int dwSize,
+ unsigned int dw1401, short eSz)
{
short sErr = CheckHandle(hand);
if ((sErr == U14ERR_NOERROR) && dwSize) // TO1401 is a constant
@@ -2707,7 +2707,7 @@ U14API(short) U14To1401(short hand, const char* pAddrHost,DWORD dwSize,
#define file_close(h) close(h)
#define file_seek(h, pos) lseek(h, pos, SEEK_SET)
#define file_read(h, buffer, size) (read(h, buffer, size) == (ssize_t)size)
-static DWORD GetModuleFileName(void* dummy, char* buffer, int max)
+static unsigned int GetModuleFileName(void* dummy, char* buffer, int max)
{
// The following works for Linux systems with a /proc file system.
char szProcPath[32];
@@ -2766,7 +2766,7 @@ U14API(short) U14LdCmd(short hand, const char* command)
// application was run from.
if (!bGotIt) // Still not got it?
{
- DWORD dwLen = GetModuleFileName(NULL, filnam, FNSZ); // Get app path
+ unsigned int dwLen = GetModuleFileName(NULL, filnam, FNSZ); // Get app path
if (dwLen > 0) // and use it as path if found
{
char* pStr = strrchr(filnam, PATHSEP); // Point to last separator
@@ -2821,7 +2821,7 @@ U14API(short) U14LdCmd(short hand, const char* command)
file_seek(iFHandle, sizeof(CMDHEAD));
if (file_read(iFHandle, pMem, (UINT)nComSize))
{
- sErr = U14SetTransArea(hand, 0, (void *)pMem, (DWORD)nComSize, ESZBYTES);
+ sErr = U14SetTransArea(hand, 0, (void *)pMem, (unsigned int)nComSize, ESZBYTES);
if (sErr == U14ERR_NOERROR)
{
sprintf(strcopy, "CLOAD,0,$%X;", (int)nComSize);
@@ -2858,9 +2858,9 @@ U14API(short) U14LdCmd(short hand, const char* command)
** Returns NOERROR code or a long with error in lo word and index of
** command that failed in high word
****************************************************************************/
-U14API(DWORD) U14Ld(short hand, const char* vl, const char* str)
+U14API(unsigned int) U14Ld(short hand, const char* vl, const char* str)
{
- DWORD dwIndex = 0; // index to current command
+ unsigned int dwIndex = 0; // index to current command
long lErr = U14ERR_NOERROR; // what the error was that went wrong
char strcopy[MAXSTRLEN+1]; // stores unmodified str parameter
char szFExt[8]; // The command file extension
@@ -2939,7 +2939,7 @@ U14API(DWORD) U14Ld(short hand, const char* vl, const char* str)
return lErr;
}
else
- return ((dwIndex<<16) | ((DWORD)lErr & 0x0000FFFF));
+ return ((dwIndex<<16) | ((unsigned int)lErr & 0x0000FFFF));
}
// Initialise the library (if not initialised) and return the library version
@@ -2951,7 +2951,7 @@ U14API(int) U14InitLib(void)
int i;
#ifdef _IS_WINDOWS_
int j;
- DWORD dwVersion = GetVersion();
+ unsigned int dwVersion = GetVersion();
bWindows9x = FALSE; // Assume not Win9x
if (dwVersion & 0x80000000) // if not windows NT
@@ -2993,12 +2993,12 @@ U14API(int) U14InitLib(void)
#ifdef _IS_WINDOWS_
#ifndef U14_NOT_DLL
/****************************************************************************
-** FUNCTION: DllMain(HANDLE, DWORD, LPVOID)
+** FUNCTION: DllMain(HANDLE, unsigned int, LPVOID)
** LibMain is called by Windows when the DLL is initialized, Thread Attached,
** and other times. Refer to SDK documentation, as to the different ways this
** may be called.
****************************************************************************/
-INT APIENTRY DllMain(HANDLE hInst, DWORD ul_reason_being_called, LPVOID lpReserved)
+INT APIENTRY DllMain(HANDLE hInst, unsigned int ul_reason_being_called, LPVOID lpReserved)
{
int iRetVal = 1;
diff --git a/drivers/staging/comedi/Kconfig b/drivers/staging/comedi/Kconfig
index 87e852a0ef49..8c8a55132257 100644
--- a/drivers/staging/comedi/Kconfig
+++ b/drivers/staging/comedi/Kconfig
@@ -110,15 +110,6 @@ menuconfig COMEDI_ISA_DRIVERS
if COMEDI_ISA_DRIVERS
-config COMEDI_ACL7225B
- tristate "ADlink NuDAQ ACL-7225b and compatibles support"
- ---help---
- Enable support for ADlink NuDAQ ACL-7225b and compatibles,
- ADlink ACL-7225b (acl7225b), ICP P16R16DIO (p16r16dio)
-
- To compile this driver as a module, choose M here: the module will be
- called acl7225b.
-
config COMEDI_PCL711
tristate "Advantech PCL-711/711b and ADlink ACL-8112 ISA card support"
---help---
@@ -137,14 +128,6 @@ config COMEDI_PCL724
To compile this driver as a module, choose M here: the module will be
called pcl724.
-config COMEDI_PCL725
- tristate "Advantech PCL-725 and compatible ISA card support"
- ---help---
- Enable support for Advantech PCL-725 and compatible ISA cards.
-
- To compile this driver as a module, choose M here: the module will be
- called pcl725.
-
config COMEDI_PCL726
tristate "Advantech PCL-726 and compatible ISA card support"
---help---
@@ -154,10 +137,21 @@ config COMEDI_PCL726
called pcl726.
config COMEDI_PCL730
- tristate "Advantech PCL-730 and ADlink ACL-7130 ISA card support"
+ tristate "Simple Digital I/O board support (8-bit ports)"
---help---
- Enable support for Advantech PCL-730, ICP ISO-730 and ADlink
- ACL-7130 ISA cards
+ Enable support for various simple ISA or PC/104 Digital I/O boards.
+ These boards all use 8-bit I/O ports.
+
+ Advantech PCL-730 isolated - 16 in/16 out ttl - 16 in/16 out
+ ICP ISO-730 isolated - 16 in/16 out ttl - 16 in/16 out
+ ADlink ACL-7130 isolated - 16 in/16 out ttl - 16 in/16 out
+ Advantech PCM-3730 isolated - 8 in/8 out ttl - 16 in/16 out
+ Advantech PCL-725 isolated - 8 in/8 out
+ ICP P8R8-DIO isolated - 8 in/8 out
+ ADlink ACL-7225b isolated - 16 in/16 out
+ ICP P16R16-DIO isolated - 16 in/16 out
+ Advantech PCL-733 isolated - 32 in
+ Advantech PCL-734 isolated - 32 out
To compile this driver as a module, choose M here: the module will be
called pcl730.
@@ -201,14 +195,6 @@ config COMEDI_PCM3724
To compile this driver as a module, choose M here: the module will be
called pcm3724.
-config COMEDI_PCM3730
- tristate "Advantech PCM-3730 and clone PC/104 board support"
- ---help---
- Enable support for Advantech PCM-3730 and clone PC/104 boards
-
- To compile this driver as a module, choose M here: the module will be
- called pcm3730.
-
config COMEDI_AMPLC_DIO200_ISA
tristate "Amplicon PC212E/PC214E/PC215E/PC218E/PC272E"
select COMEDI_AMPLC_DIO200
@@ -543,12 +529,19 @@ config COMEDI_POC
tristate "Generic driver for very simple devices"
---help---
Enable generic support for very simple / POC (Piece of Crap) boards,
- Keithley Metrabyte DAC-02 (dac02), Advantech PCL-733 (pcl733) and
- PCL-734 (pcl734)
+ Keithley Metrabyte DAC-02 (dac02).
To compile this driver as a module, choose M here: the module will be
called poc.
+config COMEDI_S526
+ tristate "Sensoray s526 support"
+ ---help---
+ Enable support for Sensoray s526
+
+ To compile this driver as a module, choose M here: the module will be
+ called s526.
+
endif # COMEDI_ISA_DRIVERS
menuconfig COMEDI_PCI_DRIVERS
@@ -1076,14 +1069,6 @@ config COMEDI_RTD520
To compile this driver as a module, choose M here: the module will be
called rtd520.
-config COMEDI_S526
- tristate "Sensoray s526 support"
- ---help---
- Enable support for Sensoray s526
-
- To compile this driver as a module, choose M here: the module will be
- called s526.
-
config COMEDI_S626
tristate "Sensoray 626 support"
select COMEDI_FC
diff --git a/drivers/staging/comedi/comedi.h b/drivers/staging/comedi/comedi.h
index 4233605df30a..6bbbe5b08954 100644
--- a/drivers/staging/comedi/comedi.h
+++ b/drivers/staging/comedi/comedi.h
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#ifndef _COMEDI_H
diff --git a/drivers/staging/comedi/comedi_buf.c b/drivers/staging/comedi/comedi_buf.c
index d4be0e68509b..b4c001b6f88f 100644
--- a/drivers/staging/comedi/comedi_buf.c
+++ b/drivers/staging/comedi/comedi_buf.c
@@ -13,10 +13,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "comedidev.h"
diff --git a/drivers/staging/comedi/comedi_compat32.c b/drivers/staging/comedi/comedi_compat32.c
index ad208cdd53d4..2dfb06aedb15 100644
--- a/drivers/staging/comedi/comedi_compat32.c
+++ b/drivers/staging/comedi/comedi_compat32.c
@@ -17,11 +17,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#include <linux/uaccess.h>
diff --git a/drivers/staging/comedi/comedi_compat32.h b/drivers/staging/comedi/comedi_compat32.h
index 60cf51c4a793..28e3c3059037 100644
--- a/drivers/staging/comedi/comedi_compat32.h
+++ b/drivers/staging/comedi/comedi_compat32.h
@@ -17,11 +17,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#ifndef _COMEDI_COMPAT32_H
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 924c54c9c31f..0794aacc928a 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#undef DEBUG
@@ -536,6 +531,23 @@ static bool comedi_is_subdevice_idle(struct comedi_subdevice *s)
return (runflags & (SRF_ERROR | SRF_RUNNING)) ? false : true;
}
+/**
+ * comedi_alloc_spriv() - Allocate memory for the subdevice private data.
+ * @s: comedi_subdevice struct
+ * @size: size of the memory to allocate
+ *
+ * This also sets the subdevice runflags to allow the core to automatically
+ * free the private data during the detach.
+ */
+void *comedi_alloc_spriv(struct comedi_subdevice *s, size_t size)
+{
+ s->private = kzalloc(size, GFP_KERNEL);
+ if (s->private)
+ comedi_set_subdevice_runflags(s, ~0, SRF_FREE_SPRIV);
+ return s->private;
+}
+EXPORT_SYMBOL_GPL(comedi_alloc_spriv);
+
/*
This function restores a subdevice to an idle state.
*/
@@ -665,7 +677,7 @@ static int do_bufconfig_ioctl(struct comedi_device *dev,
if (copy_from_user(&bc, arg, sizeof(bc)))
return -EFAULT;
- if (bc.subdevice >= dev->n_subdevices || bc.subdevice < 0)
+ if (bc.subdevice >= dev->n_subdevices)
return -EINVAL;
s = &dev->subdevices[bc.subdevice];
@@ -918,7 +930,7 @@ static int do_bufinfo_ioctl(struct comedi_device *dev,
if (copy_from_user(&bi, arg, sizeof(bi)))
return -EFAULT;
- if (bi.subdevice >= dev->n_subdevices || bi.subdevice < 0)
+ if (bi.subdevice >= dev->n_subdevices)
return -EINVAL;
s = &dev->subdevices[bi.subdevice];
diff --git a/drivers/staging/comedi/comedi_pci.c b/drivers/staging/comedi/comedi_pci.c
index 5fad084cfbd4..abbc0e4f5c51 100644
--- a/drivers/staging/comedi/comedi_pci.c
+++ b/drivers/staging/comedi/comedi_pci.c
@@ -14,10 +14,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/pci.h>
diff --git a/drivers/staging/comedi/comedi_pcmcia.c b/drivers/staging/comedi/comedi_pcmcia.c
index 453ff3b28617..9d49d5d01ad9 100644
--- a/drivers/staging/comedi/comedi_pcmcia.c
+++ b/drivers/staging/comedi/comedi_pcmcia.c
@@ -14,10 +14,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/kernel.h>
diff --git a/drivers/staging/comedi/comedi_usb.c b/drivers/staging/comedi/comedi_usb.c
index 9d9716a248f1..13f18bef6091 100644
--- a/drivers/staging/comedi/comedi_usb.c
+++ b/drivers/staging/comedi/comedi_usb.c
@@ -14,10 +14,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/usb.h>
@@ -35,6 +31,18 @@ struct usb_interface *comedi_to_usb_interface(struct comedi_device *dev)
EXPORT_SYMBOL_GPL(comedi_to_usb_interface);
/**
+ * comedi_to_usb_dev() - comedi_device pointer to usb_device pointer.
+ * @dev: comedi_device struct
+ */
+struct usb_device *comedi_to_usb_dev(struct comedi_device *dev)
+{
+ struct usb_interface *intf = comedi_to_usb_interface(dev);
+
+ return intf ? interface_to_usbdev(intf) : NULL;
+}
+EXPORT_SYMBOL_GPL(comedi_to_usb_dev);
+
+/**
* comedi_usb_auto_config() - Configure/probe a comedi USB driver.
* @intf: usb_interface struct
* @driver: comedi_driver struct
diff --git a/drivers/staging/comedi/comedidev.h b/drivers/staging/comedi/comedidev.h
index cdd472094cee..b75915f30f48 100644
--- a/drivers/staging/comedi/comedidev.h
+++ b/drivers/staging/comedi/comedidev.h
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#ifndef _COMEDIDEV_H
@@ -270,11 +265,14 @@ enum subdevice_runflags {
/* indicates an COMEDI_CB_ERROR event has occurred since the last
* command was started */
SRF_ERROR = 0x00000004,
- SRF_RUNNING = 0x08000000
+ SRF_RUNNING = 0x08000000,
+ SRF_FREE_SPRIV = 0x80000000, /* free s->private on detach */
};
bool comedi_is_subdevice_running(struct comedi_subdevice *s);
+void *comedi_alloc_spriv(struct comedi_subdevice *s, size_t size);
+
int comedi_check_chanlist(struct comedi_subdevice *s,
int n,
unsigned int *chanlist);
@@ -312,6 +310,18 @@ struct comedi_lrange {
struct comedi_krange range[GCC_ZERO_LENGTH_ARRAY];
};
+static inline bool comedi_range_is_bipolar(struct comedi_subdevice *s,
+ unsigned int range)
+{
+ return s->range_table->range[range].min < 0;
+}
+
+static inline bool comedi_range_is_unipolar(struct comedi_subdevice *s,
+ unsigned int range)
+{
+ return s->range_table->range[range].min >= 0;
+}
+
/* some silly little inline functions */
static inline unsigned int bytes_per_sample(const struct comedi_subdevice *subd)
@@ -349,7 +359,12 @@ void comedi_buf_memcpy_from(struct comedi_async *async, unsigned int offset,
int comedi_alloc_subdevices(struct comedi_device *, int);
-void comedi_spriv_free(struct comedi_device *, int subdev_num);
+int comedi_load_firmware(struct comedi_device *, struct device *,
+ const char *name,
+ int (*cb)(struct comedi_device *,
+ const u8 *data, size_t size,
+ unsigned long context),
+ unsigned long context);
int __comedi_request_region(struct comedi_device *,
unsigned long start, unsigned long len);
@@ -489,6 +504,7 @@ struct usb_driver;
struct usb_interface;
struct usb_interface *comedi_to_usb_interface(struct comedi_device *);
+struct usb_device *comedi_to_usb_dev(struct comedi_device *);
int comedi_usb_auto_config(struct usb_interface *, struct comedi_driver *,
unsigned long context);
diff --git a/drivers/staging/comedi/comedilib.h b/drivers/staging/comedi/comedilib.h
index ca92c43fdb38..1a78b15543c4 100644
--- a/drivers/staging/comedi/comedilib.h
+++ b/drivers/staging/comedi/comedilib.h
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#ifndef _LINUX_COMEDILIB_H
diff --git a/drivers/staging/comedi/drivers.c b/drivers/staging/comedi/drivers.c
index 06d190f8fd34..e25eba5713c1 100644
--- a/drivers/staging/comedi/drivers.c
+++ b/drivers/staging/comedi/drivers.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#include <linux/device.h>
@@ -38,6 +33,7 @@
#include <linux/dma-mapping.h>
#include <linux/io.h>
#include <linux/interrupt.h>
+#include <linux/firmware.h>
#include "comedidev.h"
#include "comedi_internal.h"
@@ -87,18 +83,6 @@ int comedi_alloc_subdevices(struct comedi_device *dev, int num_subdevices)
}
EXPORT_SYMBOL_GPL(comedi_alloc_subdevices);
-void comedi_spriv_free(struct comedi_device *dev, int subdev_num)
-{
- struct comedi_subdevice *s;
-
- if (dev->subdevices && subdev_num < dev->n_subdevices) {
- s = &dev->subdevices[subdev_num];
- kfree(s->private);
- s->private = NULL;
- }
-}
-EXPORT_SYMBOL_GPL(comedi_spriv_free);
-
static void cleanup_device(struct comedi_device *dev)
{
int i;
@@ -107,6 +91,8 @@ static void cleanup_device(struct comedi_device *dev)
if (dev->subdevices) {
for (i = 0; i < dev->n_subdevices; i++) {
s = &dev->subdevices[i];
+ if (s->runflags & SRF_FREE_SPRIV)
+ kfree(s->private);
comedi_free_subdevice_minor(s);
if (s->async) {
comedi_buf_alloc(dev, s, 0);
@@ -352,6 +338,38 @@ static void comedi_report_boards(struct comedi_driver *driv)
}
/**
+ * comedi_load_firmware() - Request and load firmware for a device.
+ * @dev: comedi_device struct
+ * @hw_device: device struct for the comedi_device
+ * @name: the name of the firmware image
+ * @cb: callback to the upload the firmware image
+ * @context: private context from the driver
+ */
+int comedi_load_firmware(struct comedi_device *dev,
+ struct device *device,
+ const char *name,
+ int (*cb)(struct comedi_device *dev,
+ const u8 *data, size_t size,
+ unsigned long context),
+ unsigned long context)
+{
+ const struct firmware *fw;
+ int ret;
+
+ if (!cb)
+ return -EINVAL;
+
+ ret = request_firmware(&fw, name, device);
+ if (ret == 0) {
+ ret = cb(dev, fw->data, fw->size, context);
+ release_firmware(fw);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(comedi_load_firmware);
+
+/**
* __comedi_request_region() - Request an I/O reqion for a legacy driver.
* @dev: comedi_device struct
* @start: base address of the I/O reqion
diff --git a/drivers/staging/comedi/drivers/8253.h b/drivers/staging/comedi/drivers/8253.h
index 429e0d60c0a3..3abedcd2527b 100644
--- a/drivers/staging/comedi/drivers/8253.h
+++ b/drivers/staging/comedi/drivers/8253.h
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#ifndef _8253_H
diff --git a/drivers/staging/comedi/drivers/8255.c b/drivers/staging/comedi/drivers/8255.c
index 1d48aa602ece..94e17500150f 100644
--- a/drivers/staging/comedi/drivers/8255.c
+++ b/drivers/staging/comedi/drivers/8255.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: 8255
@@ -81,7 +76,6 @@ I/O port base address can be found in the output of 'lspci -v'.
#include "../comedidev.h"
#include <linux/ioport.h>
-#include <linux/slab.h>
#include "comedi_fc.h"
#include "8255.h"
@@ -290,15 +284,13 @@ int subdev_8255_init(struct comedi_device *dev, struct comedi_subdevice *s,
{
struct subdev_8255_private *spriv;
- spriv = kzalloc(sizeof(*spriv), GFP_KERNEL);
+ spriv = comedi_alloc_spriv(s, sizeof(*spriv));
if (!spriv)
return -ENOMEM;
spriv->iobase = iobase;
spriv->io = io ? io : subdev_8255_io;
- s->private = spriv;
-
s->type = COMEDI_SUBD_DIO;
s->subdev_flags = SDF_READABLE | SDF_WRITABLE;
s->n_chan = 24;
@@ -391,7 +383,6 @@ static void dev_8255_detach(struct comedi_device *dev)
spriv = s->private;
release_region(spriv->iobase, _8255_SIZE);
}
- comedi_spriv_free(dev, i);
}
}
diff --git a/drivers/staging/comedi/drivers/8255.h b/drivers/staging/comedi/drivers/8255.h
index 0f6e7492b7db..4f16ea78f86a 100644
--- a/drivers/staging/comedi/drivers/8255.h
+++ b/drivers/staging/comedi/drivers/8255.h
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#ifndef _8255_H
diff --git a/drivers/staging/comedi/drivers/8255_pci.c b/drivers/staging/comedi/drivers/8255_pci.c
index 76dec96aeb2a..3d3547c19480 100644
--- a/drivers/staging/comedi/drivers/8255_pci.c
+++ b/drivers/staging/comedi/drivers/8255_pci.c
@@ -19,10 +19,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
@@ -242,10 +238,7 @@ static int pci_8255_auto_attach(struct comedi_device *dev,
static void pci_8255_detach(struct comedi_device *dev)
{
struct pci_8255_private *devpriv = dev->private;
- int i;
- for (i = 0; i < dev->n_subdevices; i++)
- comedi_spriv_free(dev, i);
if (devpriv && devpriv->mmio_base)
iounmap(devpriv->mmio_base);
comedi_pci_disable(dev);
diff --git a/drivers/staging/comedi/drivers/Makefile b/drivers/staging/comedi/drivers/Makefile
index 57e984f0f462..dbb93e332487 100644
--- a/drivers/staging/comedi/drivers/Makefile
+++ b/drivers/staging/comedi/drivers/Makefile
@@ -11,19 +11,16 @@ obj-$(CONFIG_COMEDI_SERIAL2002) += serial2002.o
obj-$(CONFIG_COMEDI_SKEL) += skel.o
# Comedi ISA drivers
-obj-$(CONFIG_COMEDI_ACL7225B) += acl7225b.o
obj-$(CONFIG_COMEDI_AMPLC_DIO200_ISA) += amplc_dio200.o
obj-$(CONFIG_COMEDI_AMPLC_PC263_ISA) += amplc_pc263.o
obj-$(CONFIG_COMEDI_PCL711) += pcl711.o
obj-$(CONFIG_COMEDI_PCL724) += pcl724.o
-obj-$(CONFIG_COMEDI_PCL725) += pcl725.o
obj-$(CONFIG_COMEDI_PCL726) += pcl726.o
obj-$(CONFIG_COMEDI_PCL730) += pcl730.o
obj-$(CONFIG_COMEDI_PCL812) += pcl812.o
obj-$(CONFIG_COMEDI_PCL816) += pcl816.o
obj-$(CONFIG_COMEDI_PCL818) += pcl818.o
obj-$(CONFIG_COMEDI_PCM3724) += pcm3724.o
-obj-$(CONFIG_COMEDI_PCM3730) += pcm3730.o
obj-$(CONFIG_COMEDI_RTI800) += rti800.o
obj-$(CONFIG_COMEDI_RTI802) += rti802.o
obj-$(CONFIG_COMEDI_DAS16M1) += das16m1.o
@@ -55,6 +52,7 @@ obj-$(CONFIG_COMEDI_PCMMIO) += pcmmio.o
obj-$(CONFIG_COMEDI_PCMUIO) += pcmuio.o
obj-$(CONFIG_COMEDI_MULTIQ3) += multiq3.o
obj-$(CONFIG_COMEDI_POC) += poc.o
+obj-$(CONFIG_COMEDI_S526) += s526.o
# Comedi PCI drivers
obj-$(CONFIG_COMEDI_8255_PCI) += 8255_pci.o
@@ -110,7 +108,6 @@ obj-$(CONFIG_COMEDI_NI_LABPC_PCI) += ni_labpc_pci.o
obj-$(CONFIG_COMEDI_NI_PCIDIO) += ni_pcidio.o
obj-$(CONFIG_COMEDI_NI_PCIMIO) += ni_pcimio.o
obj-$(CONFIG_COMEDI_RTD520) += rtd520.o
-obj-$(CONFIG_COMEDI_S526) += s526.o
obj-$(CONFIG_COMEDI_S626) += s626.o
obj-$(CONFIG_COMEDI_SSV_DNP) += ssv_dnp.o
diff --git a/drivers/staging/comedi/drivers/acl7225b.c b/drivers/staging/comedi/drivers/acl7225b.c
deleted file mode 100644
index 9e2c7aeea535..000000000000
--- a/drivers/staging/comedi/drivers/acl7225b.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * comedi/drivers/acl7225b.c
- * Driver for Adlink NuDAQ ACL-7225b and clones
- * José Luis Sánchez
- */
-/*
-Driver: acl7225b
-Description: Adlink NuDAQ ACL-7225b & compatibles
-Author: José Luis Sánchez (jsanchezv@teleline.es)
-Status: testing
-Devices: [Adlink] ACL-7225b (acl7225b), [ICP] P16R16DIO (p16r16dio)
-*/
-
-#include "../comedidev.h"
-
-#include <linux/ioport.h>
-
-#define ACL7225_RIO_LO 0 /* Relays input/output low byte (R0-R7) */
-#define ACL7225_RIO_HI 1 /* Relays input/output high byte (R8-R15) */
-#define ACL7225_DI_LO 2 /* Digital input low byte (DI0-DI7) */
-#define ACL7225_DI_HI 3 /* Digital input high byte (DI8-DI15) */
-
-struct acl7225b_boardinfo {
- const char *name;
- int io_range;
-};
-
-static const struct acl7225b_boardinfo acl7225b_boards[] = {
- {
- .name = "acl7225b",
- .io_range = 8, /* only 4 are used */
- }, {
- .name = "p16r16dio",
- .io_range = 4,
- },
-};
-
-static int acl7225b_do_insn_bits(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn,
- unsigned int *data)
-{
- unsigned long reg = (unsigned long)s->private;
- unsigned int mask = data[0];
- unsigned int bits = data[1];
-
- if (mask) {
- s->state &= ~mask;
- s->state |= (bits & mask);
-
- if (mask & 0x00ff)
- outb(s->state & 0xff, dev->iobase + reg);
- if (mask & 0xff00)
- outb((s->state >> 8), dev->iobase + reg + 1);
- }
-
- data[1] = s->state;
-
- return insn->n;
-}
-
-static int acl7225b_di_insn_bits(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn,
- unsigned int *data)
-{
- unsigned long reg = (unsigned long)s->private;
-
- data[1] = inb(dev->iobase + reg) |
- (inb(dev->iobase + reg + 1) << 8);
-
- return insn->n;
-}
-
-static int acl7225b_attach(struct comedi_device *dev,
- struct comedi_devconfig *it)
-{
- const struct acl7225b_boardinfo *board = comedi_board(dev);
- struct comedi_subdevice *s;
- int ret;
-
- ret = comedi_request_region(dev, it->options[0], board->io_range);
- if (ret)
- return ret;
-
- ret = comedi_alloc_subdevices(dev, 3);
- if (ret)
- return ret;
-
- s = &dev->subdevices[0];
- /* Relays outputs */
- s->type = COMEDI_SUBD_DO;
- s->subdev_flags = SDF_WRITABLE;
- s->maxdata = 1;
- s->n_chan = 16;
- s->insn_bits = acl7225b_do_insn_bits;
- s->range_table = &range_digital;
- s->private = (void *)ACL7225_RIO_LO;
-
- s = &dev->subdevices[1];
- /* Relays status */
- s->type = COMEDI_SUBD_DI;
- s->subdev_flags = SDF_READABLE;
- s->maxdata = 1;
- s->n_chan = 16;
- s->insn_bits = acl7225b_di_insn_bits;
- s->range_table = &range_digital;
- s->private = (void *)ACL7225_RIO_LO;
-
- s = &dev->subdevices[2];
- /* Isolated digital inputs */
- s->type = COMEDI_SUBD_DI;
- s->subdev_flags = SDF_READABLE;
- s->maxdata = 1;
- s->n_chan = 16;
- s->insn_bits = acl7225b_di_insn_bits;
- s->range_table = &range_digital;
- s->private = (void *)ACL7225_DI_LO;
-
- return 0;
-}
-
-static struct comedi_driver acl7225b_driver = {
- .driver_name = "acl7225b",
- .module = THIS_MODULE,
- .attach = acl7225b_attach,
- .detach = comedi_legacy_detach,
- .board_name = &acl7225b_boards[0].name,
- .num_names = ARRAY_SIZE(acl7225b_boards),
- .offset = sizeof(struct acl7225b_boardinfo),
-};
-module_comedi_driver(acl7225b_driver);
-
-MODULE_DESCRIPTION("Comedi: NuDAQ ACL-7225B, 16 Relay & 16 Isolated DI Card");
-MODULE_AUTHOR("Comedi http://www.comedi.org");
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/comedi/drivers/addi-data/APCI1710_Chrono.c b/drivers/staging/comedi/drivers/addi-data/APCI1710_Chrono.c
index 5bd7fe64637c..d91f586fdd26 100644
--- a/drivers/staging/comedi/drivers/addi-data/APCI1710_Chrono.c
+++ b/drivers/staging/comedi/drivers/addi-data/APCI1710_Chrono.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
diff --git a/drivers/staging/comedi/drivers/addi-data/APCI1710_Dig_io.c b/drivers/staging/comedi/drivers/addi-data/APCI1710_Dig_io.c
index 6b38ce7a275b..27de18e79895 100644
--- a/drivers/staging/comedi/drivers/addi-data/APCI1710_Dig_io.c
+++ b/drivers/staging/comedi/drivers/addi-data/APCI1710_Dig_io.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
diff --git a/drivers/staging/comedi/drivers/addi-data/APCI1710_INCCPT.c b/drivers/staging/comedi/drivers/addi-data/APCI1710_INCCPT.c
index 70a7f953fa2f..c9db601da2c9 100644
--- a/drivers/staging/comedi/drivers/addi-data/APCI1710_INCCPT.c
+++ b/drivers/staging/comedi/drivers/addi-data/APCI1710_INCCPT.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
diff --git a/drivers/staging/comedi/drivers/addi-data/APCI1710_Inp_cpt.c b/drivers/staging/comedi/drivers/addi-data/APCI1710_Inp_cpt.c
index be0c6adbdc94..6bbcb06cc279 100644
--- a/drivers/staging/comedi/drivers/addi-data/APCI1710_Inp_cpt.c
+++ b/drivers/staging/comedi/drivers/addi-data/APCI1710_Inp_cpt.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
diff --git a/drivers/staging/comedi/drivers/addi-data/APCI1710_Pwm.c b/drivers/staging/comedi/drivers/addi-data/APCI1710_Pwm.c
index a211e78dd3ba..5c830337db85 100644
--- a/drivers/staging/comedi/drivers/addi-data/APCI1710_Pwm.c
+++ b/drivers/staging/comedi/drivers/addi-data/APCI1710_Pwm.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
diff --git a/drivers/staging/comedi/drivers/addi-data/APCI1710_Ssi.c b/drivers/staging/comedi/drivers/addi-data/APCI1710_Ssi.c
index 97e7eec343d7..6ef1d6a434d9 100644
--- a/drivers/staging/comedi/drivers/addi-data/APCI1710_Ssi.c
+++ b/drivers/staging/comedi/drivers/addi-data/APCI1710_Ssi.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
diff --git a/drivers/staging/comedi/drivers/addi-data/APCI1710_Tor.c b/drivers/staging/comedi/drivers/addi-data/APCI1710_Tor.c
index 3bc9826ce40b..0b79531ac24b 100644
--- a/drivers/staging/comedi/drivers/addi-data/APCI1710_Tor.c
+++ b/drivers/staging/comedi/drivers/addi-data/APCI1710_Tor.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
diff --git a/drivers/staging/comedi/drivers/addi-data/APCI1710_Ttl.c b/drivers/staging/comedi/drivers/addi-data/APCI1710_Ttl.c
index c8238b8921cd..fb56360444ee 100644
--- a/drivers/staging/comedi/drivers/addi-data/APCI1710_Ttl.c
+++ b/drivers/staging/comedi/drivers/addi-data/APCI1710_Ttl.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
diff --git a/drivers/staging/comedi/drivers/addi-data/addi_common.c b/drivers/staging/comedi/drivers/addi-data/addi_common.c
index 0c3db57a50f4..f25e0085219d 100644
--- a/drivers/staging/comedi/drivers/addi-data/addi_common.c
+++ b/drivers/staging/comedi/drivers/addi-data/addi_common.c
@@ -20,13 +20,6 @@ This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this
-source code.
-
@endverbatim
*/
/*
@@ -46,10 +39,6 @@ source code.
+-----------------------------------------------------------------------+
*/
-#ifndef COMEDI_SUBD_TTLIO
-#define COMEDI_SUBD_TTLIO 11 /* Digital Input Output But TTL */
-#endif
-
static int i_ADDIDATA_InsnReadEeprom(struct comedi_device *dev,
struct comedi_subdevice *s,
struct comedi_insn *insn,
@@ -105,23 +94,14 @@ static int addi_auto_attach(struct comedi_device *dev,
if (ret)
return ret;
- if (!this_board->pc_EepromChip ||
- strcmp(this_board->pc_EepromChip, ADDIDATA_9054)) {
- /* board does not have an eeprom or is not ADDIDATA_9054 */
- if (this_board->i_IorangeBase1)
- dev->iobase = pci_resource_start(pcidev, 1);
- else
- dev->iobase = pci_resource_start(pcidev, 0);
-
- devpriv->iobase = dev->iobase;
- devpriv->i_IobaseAmcc = pci_resource_start(pcidev, 0);
- devpriv->i_IobaseAddon = pci_resource_start(pcidev, 2);
- } else {
- /* board has an ADDIDATA_9054 eeprom */
- dev->iobase = pci_resource_start(pcidev, 2);
- devpriv->iobase = pci_resource_start(pcidev, 2);
- devpriv->dw_AiBase = pci_ioremap_bar(pcidev, 3);
- }
+ if (this_board->i_IorangeBase1)
+ dev->iobase = pci_resource_start(pcidev, 1);
+ else
+ dev->iobase = pci_resource_start(pcidev, 0);
+
+ devpriv->iobase = dev->iobase;
+ devpriv->i_IobaseAmcc = pci_resource_start(pcidev, 0);
+ devpriv->i_IobaseAddon = pci_resource_start(pcidev, 2);
devpriv->i_IobaseReserved = pci_resource_start(pcidev, 3);
/* Initialize parameters that can be overridden in EEPROM */
@@ -132,7 +112,6 @@ static int addi_auto_attach(struct comedi_device *dev,
devpriv->s_EeParameters.i_NbrDiChannel = this_board->i_NbrDiChannel;
devpriv->s_EeParameters.i_NbrDoChannel = this_board->i_NbrDoChannel;
devpriv->s_EeParameters.i_DoMaxdata = this_board->i_DoMaxdata;
- devpriv->s_EeParameters.i_Dma = this_board->i_Dma;
devpriv->s_EeParameters.i_Timer = this_board->i_Timer;
devpriv->s_EeParameters.ui_MinAcquisitiontimeNs =
this_board->ui_MinAcquisitiontimeNs;
@@ -191,9 +170,6 @@ static int addi_auto_attach(struct comedi_device *dev,
s->len_chanlist = this_board->i_AiChannelList;
s->range_table = this_board->pr_AiRangelist;
- /* Set the initialisation flag */
- devpriv->b_AiInitialisation = 1;
-
s->insn_config = this_board->ai_config;
s->insn_read = this_board->ai_read;
s->insn_write = this_board->ai_write;
@@ -215,8 +191,6 @@ static int addi_auto_attach(struct comedi_device *dev,
s->maxdata = devpriv->s_EeParameters.i_AoMaxdata;
s->len_chanlist =
devpriv->s_EeParameters.i_NbrAoChannel;
- s->range_table = this_board->pr_AoRangelist;
- s->insn_config = this_board->ao_config;
s->insn_write = this_board->ao_write;
} else {
s->type = COMEDI_SUBD_UNUSED;
@@ -281,22 +255,7 @@ static int addi_auto_attach(struct comedi_device *dev,
/* Allocate and Initialise TTL */
s = &dev->subdevices[5];
- if (this_board->i_NbrTTLChannel) {
- s->type = COMEDI_SUBD_TTLIO;
- s->subdev_flags =
- SDF_WRITEABLE | SDF_READABLE | SDF_GROUND | SDF_COMMON;
- s->n_chan = this_board->i_NbrTTLChannel;
- s->maxdata = 1;
- s->io_bits = 0; /* all bits input */
- s->len_chanlist = this_board->i_NbrTTLChannel;
- s->range_table = &range_digital;
- s->insn_config = this_board->ttl_config;
- s->insn_bits = this_board->ttl_bits;
- s->insn_read = this_board->ttl_read;
- s->insn_write = this_board->ttl_write;
- } else {
- s->type = COMEDI_SUBD_UNUSED;
- }
+ s->type = COMEDI_SUBD_UNUSED;
/* EEPROM */
s = &dev->subdevices[6];
@@ -323,8 +282,6 @@ static void i_ADDI_Detach(struct comedi_device *dev)
i_ADDI_Reset(dev);
if (dev->irq)
free_irq(dev->irq, dev);
- if (devpriv->dw_AiBase)
- iounmap(devpriv->dw_AiBase);
}
comedi_pci_disable(dev);
}
diff --git a/drivers/staging/comedi/drivers/addi-data/addi_common.h b/drivers/staging/comedi/drivers/addi-data/addi_common.h
index c034bf1426eb..f1be5ade9962 100644
--- a/drivers/staging/comedi/drivers/addi-data/addi_common.h
+++ b/drivers/staging/comedi/drivers/addi-data/addi_common.h
@@ -18,12 +18,8 @@
#include <linux/sched.h>
#include <linux/interrupt.h>
-#define LOBYTE(W) (unsigned char)((W) & 0xFF)
-#define HIBYTE(W) (unsigned char)(((W) >> 8) & 0xFF)
-#define MAKEWORD(H, L) (unsigned short)((L) | ((H) << 8))
#define LOWORD(W) (unsigned short)((W) & 0xFFFF)
#define HIWORD(W) (unsigned short)(((W) >> 16) & 0xFFFF)
-#define MAKEDWORD(H, L) (unsigned int)((L) | ((H) << 16))
#define ADDI_ENABLE 1
#define ADDI_DISABLE 0
@@ -33,8 +29,6 @@
#define ADDIDATA_NO_EEPROM 0
#define ADDIDATA_93C76 "93C76"
#define ADDIDATA_S5920 "S5920"
-#define ADDIDATA_S5933 "S5933"
-#define ADDIDATA_9054 "9054"
/* ADDIDATA Enable Disable */
#define ADDIDATA_ENABLE 1
@@ -55,17 +49,12 @@ struct addi_board {
int i_AiMaxdata; /* resolution of A/D */
int i_AoMaxdata; /* resolution of D/A */
const struct comedi_lrange *pr_AiRangelist; /* rangelist for A/D */
- const struct comedi_lrange *pr_AoRangelist; /* rangelist for D/A */
int i_NbrDiChannel; /* Number of DI channels */
int i_NbrDoChannel; /* Number of DO channels */
int i_DoMaxdata; /* data to set all channels high */
- int i_NbrTTLChannel; /* Number of TTL channels */
-
- int i_Dma; /* dma present or not */
int i_Timer; /* timer subdevice present or not */
- unsigned char b_AvailableConvertUnit;
unsigned int ui_MinAcquisitiontimeNs; /* Minimum Acquisition in Nano secs */
unsigned int ui_MinDelaytimeNs; /* Minimum Delay in Nano secs */
@@ -90,12 +79,8 @@ struct addi_board {
int (*ai_cancel)(struct comedi_device *, struct comedi_subdevice *);
/* Analog Output */
- int (*ao_config)(struct comedi_device *, struct comedi_subdevice *,
- struct comedi_insn *, unsigned int *);
int (*ao_write)(struct comedi_device *, struct comedi_subdevice *,
struct comedi_insn *, unsigned int *);
- int (*ao_bits)(struct comedi_device *, struct comedi_subdevice *,
- struct comedi_insn *, unsigned int *);
/* Digital Input */
int (*di_config)(struct comedi_device *, struct comedi_subdevice *,
@@ -126,16 +111,6 @@ struct addi_board {
struct comedi_insn *, unsigned int *);
int (*timer_bits)(struct comedi_device *, struct comedi_subdevice *,
struct comedi_insn *, unsigned int *);
-
- /* TTL IO */
- int (*ttl_config)(struct comedi_device *, struct comedi_subdevice *,
- struct comedi_insn *, unsigned int *);
- int (*ttl_bits)(struct comedi_device *, struct comedi_subdevice *,
- struct comedi_insn *, unsigned int *);
- int (*ttl_read)(struct comedi_device *, struct comedi_subdevice *,
- struct comedi_insn *, unsigned int *);
- int (*ttl_write)(struct comedi_device *, struct comedi_subdevice *,
- struct comedi_insn *, unsigned int *);
};
/* MODULE INFO STRUCTURE */
@@ -283,58 +258,41 @@ union str_ModuleInfo {
/* Private structure for the addi_apci3120 driver */
struct addi_private {
-
int iobase;
int i_IobaseAmcc; /* base+size for AMCC chip */
int i_IobaseAddon; /* addon base address */
int i_IobaseReserved;
- void __iomem *dw_AiBase;
unsigned char b_AiContinuous; /* we do unlimited AI */
- unsigned char b_AiInitialisation;
unsigned int ui_AiActualScan; /* how many scans we finished */
- unsigned int ui_AiBufferPtr; /* data buffer ptr in samples */
unsigned int ui_AiNbrofChannels; /* how many channels is measured */
unsigned int ui_AiScanLength; /* Length of actual scanlist */
- unsigned int ui_AiActualScanPosition; /* position in actual scan */
unsigned int *pui_AiChannelList; /* actual chanlist */
unsigned int ui_AiChannelList[32]; /* actual chanlist */
- unsigned char b_AiChannelConfiguration[32]; /* actual chanlist */
unsigned int ui_AiReadData[32];
- unsigned int dw_AiInitialised;
unsigned int ui_AiTimer0; /* Timer Constant for Timer0 */
unsigned int ui_AiTimer1; /* Timer constant for Timer1 */
unsigned int ui_AiFlags;
unsigned int ui_AiDataLength;
- short *AiData; /* Pointer to sample data */
unsigned int ui_AiNbrofScans; /* number of scans to do */
unsigned short us_UseDma; /* To use Dma or not */
unsigned char b_DmaDoubleBuffer; /* we can use double buffering */
unsigned int ui_DmaActualBuffer; /* which buffer is used now */
- /* UPDATE-0.7.57->0.7.68 */
- /* unsigned int ul_DmaBufferVirtual[2]; pointers to begin of DMA buffer */
short *ul_DmaBufferVirtual[2]; /* pointers to begin of DMA buffer */
unsigned int ul_DmaBufferHw[2]; /* hw address of DMA buff */
unsigned int ui_DmaBufferSize[2]; /* size of dma buffer in bytes */
unsigned int ui_DmaBufferUsesize[2]; /* which size we may now used for transfer */
- unsigned int ui_DmaBufferSamples[2]; /* size in samples */
unsigned int ui_DmaBufferPages[2]; /* number of pages in buffer */
unsigned char b_DigitalOutputRegister; /* Digital Output Register */
unsigned char b_OutputMemoryStatus;
- unsigned char b_AnalogInputChannelNbr; /* Analog input channel Nbr */
- unsigned char b_AnalogOutputChannelNbr; /* Analog input Output Nbr */
unsigned char b_TimerSelectMode; /* Contain data written at iobase + 0C */
unsigned char b_ModeSelectRegister; /* Contain data written at iobase + 0E */
unsigned short us_OutputRegister; /* Contain data written at iobase + 0 */
- unsigned char b_InterruptState;
- unsigned char b_TimerInit; /* Specify if InitTimerWatchdog was load */
- unsigned char b_TimerStarted; /* Specify if timer 2 is running or not */
unsigned char b_Timer2Mode; /* Specify the timer 2 mode */
unsigned char b_Timer2Interrupt; /* Timer2 interrupt enable or disable */
unsigned char b_AiCyclicAcquisition; /* indicate cyclic acquisition */
unsigned char b_InterruptMode; /* eoc eos or dma */
unsigned char b_EocEosInterrupt; /* Enable disable eoc eos interrupt */
unsigned int ui_EocEosConversionTime;
- unsigned char b_EocEosConversionTimeBase;
unsigned char b_SingelDiff;
unsigned char b_ExttrigEnable; /* To enable or disable external trigger */
@@ -365,7 +323,6 @@ struct addi_private {
} s_InterruptParameters;
union str_ModuleInfo s_ModuleInfo[4];
- unsigned int ul_TTLPortConfiguration[10];
/* Parameters read from EEPROM overriding static board info */
struct {
@@ -376,7 +333,6 @@ struct addi_private {
int i_NbrDiChannel; /* Number of DI channels */
int i_NbrDoChannel; /* Number of DO channels */
int i_DoMaxdata; /* data to set all channels high */
- int i_Dma; /* dma present or not */
int i_Timer; /* timer subdevice present or not */
unsigned int ui_MinAcquisitiontimeNs;
/* Minimum Acquisition in Nano secs */
diff --git a/drivers/staging/comedi/drivers/addi-data/addi_eeprom.c b/drivers/staging/comedi/drivers/addi-data/addi_eeprom.c
index 5124ac9f1818..dc031c494a27 100644
--- a/drivers/staging/comedi/drivers/addi-data/addi_eeprom.c
+++ b/drivers/staging/comedi/drivers/addi-data/addi_eeprom.c
@@ -20,13 +20,6 @@
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * You should also find the complete GPL in the COPYING file accompanying
- * this source code.
*/
#define NVRAM_USER_DATA_START 0x100
@@ -302,7 +295,7 @@ static void addi_eeprom_read_ai_info(struct comedi_device *dev,
devpriv->s_EeParameters.ui_MinDelaytimeNs = tmp * 1000;
tmp = addi_eeprom_readw(iobase, type, addr + 20);
- devpriv->s_EeParameters.i_Dma = (tmp >> 13) & 0x01;
+ /* dma = (tmp >> 13) & 0x01; */
tmp = addi_eeprom_readw(iobase, type, addr + 72) & 0xff;
if (tmp) { /* > 0 */
diff --git a/drivers/staging/comedi/drivers/addi-data/hwdrv_APCI1710.c b/drivers/staging/comedi/drivers/addi-data/hwdrv_APCI1710.c
index b05f8505c894..b1a7ec1035e1 100644
--- a/drivers/staging/comedi/drivers/addi-data/hwdrv_APCI1710.c
+++ b/drivers/staging/comedi/drivers/addi-data/hwdrv_APCI1710.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
diff --git a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci035.c b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci035.c
index 3d66e48e0cf7..1128c22e7517 100644
--- a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci035.c
+++ b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci035.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
diff --git a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci1500.c b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci1500.c
index 24c4c983db38..054910511e9e 100644
--- a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci1500.c
+++ b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci1500.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
diff --git a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci1564.c b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci1564.c
index fc31c4b93407..e3cc429403c0 100644
--- a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci1564.c
+++ b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci1564.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
diff --git a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3120.c b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3120.c
index 74065baa3c08..a89e505c8a3a 100644
--- a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3120.c
+++ b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3120.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
@@ -724,9 +720,7 @@ static int i_APCI3120_StopCyclicAcquisition(struct comedi_device *dev,
inb(dev->iobase + APCI3120_RESET_FIFO);
inw(dev->iobase + APCI3120_RD_STATUS);
devpriv->ui_AiActualScan = 0;
- devpriv->ui_AiActualScanPosition = 0;
s->async->cur_chan = 0;
- devpriv->ui_AiBufferPtr = 0;
devpriv->b_AiContinuous = 0;
devpriv->ui_DmaActualBuffer = 0;
@@ -895,9 +889,7 @@ static int i_APCI3120_CyclicAnalogInput(int mode,
/* END JK 07.05.04: Comparison between WIN32 and Linux driver */
devpriv->ui_AiActualScan = 0;
- devpriv->ui_AiActualScanPosition = 0;
s->async->cur_chan = 0;
- devpriv->ui_AiBufferPtr = 0;
devpriv->ui_DmaActualBuffer = 0;
/* value for timer2 minus -2 has to be done .....dunno y?? */
@@ -1351,8 +1343,6 @@ static int i_APCI3120_CommandAnalogInput(struct comedi_device *dev,
devpriv->ui_AiScanLength = cmd->scan_end_arg;
devpriv->pui_AiChannelList = cmd->chanlist;
- /* UPDATE-0.7.57->0.7.68devpriv->AiData=s->async->data; */
- devpriv->AiData = s->async->prealloc_buf;
/* UPDATE-0.7.57->0.7.68devpriv->ui_AiDataLength=s->async->data_len; */
devpriv->ui_AiDataLength = s->async->prealloc_bufsz;
diff --git a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3200.c b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3200.c
index c7908730caa5..32dce0329fd5 100644
--- a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3200.c
+++ b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3200.c
@@ -15,10 +15,6 @@ This program is free software; you can redistribute it and/or modify it under th
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
@endverbatim
*/
/*
diff --git a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3xxx.c b/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3xxx.c
deleted file mode 100644
index a45a2a26e0da..000000000000
--- a/drivers/staging/comedi/drivers/addi-data/hwdrv_apci3xxx.c
+++ /dev/null
@@ -1,1376 +0,0 @@
-/**
-@verbatim
-
-Copyright (C) 2004,2005 ADDI-DATA GmbH for the source code of this module.
-
- ADDI-DATA GmbH
- Dieselstrasse 3
- D-77833 Ottersweier
- Tel: +19(0)7223/9493-0
- Fax: +49(0)7223/9493-92
- http://www.addi-data.com
- info@addi-data.com
-
-This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-You should also find the complete GPL in the COPYING file accompanying this source code.
-
-@endverbatim
-*/
-/*
- +-----------------------------------------------------------------------+
- | (C) ADDI-DATA GmbH Dieselstrasse 3 D-77833 Ottersweier |
- +-----------------------------------------------------------------------+
- | Tel : +49 (0) 7223/9493-0 | email : info@addi-data.com |
- | Fax : +49 (0) 7223/9493-92 | Internet : http://www.addi-data.com |
- +-----------------------------------------------------------------------+
- | Project : APCI-3XXX | Compiler : GCC |
- | Module name : hwdrv_apci3xxx.c| Version : 2.96 |
- +-------------------------------+---------------------------------------+
- | Project manager: S. Weber | Date : 15/09/2005 |
- +-----------------------------------------------------------------------+
- | Description :APCI3XXX Module. Hardware abstraction Layer for APCI3XXX|
- +-----------------------------------------------------------------------+
- | UPDATE'S |
- +-----------------------------------------------------------------------+
- | Date | Author | Description of updates |
- +----------+-----------+------------------------------------------------+
- | | | |
- | | | |
- +----------+-----------+------------------------------------------------+
-*/
-
-#ifndef COMEDI_SUBD_TTLIO
-#define COMEDI_SUBD_TTLIO 11 /* Digital Input Output But TTL */
-#endif
-
-#define APCI3XXX_SINGLE 0
-#define APCI3XXX_DIFF 1
-#define APCI3XXX_CONFIGURATION 0
-
-#define APCI3XXX_TTL_INIT_DIRECTION_PORT2 0
-
-static const struct comedi_lrange range_apci3XXX_ai = {
- 8, {
- BIP_RANGE(10),
- BIP_RANGE(5),
- BIP_RANGE(2),
- BIP_RANGE(1),
- UNI_RANGE(10),
- UNI_RANGE(5),
- UNI_RANGE(2),
- UNI_RANGE(1)
- }
-};
-
-static const struct comedi_lrange range_apci3XXX_ao = {
- 2, {
- BIP_RANGE(10),
- UNI_RANGE(10)
- }
-};
-
-/*
-+----------------------------------------------------------------------------+
-| ANALOG INPUT FUNCTIONS |
-+----------------------------------------------------------------------------+
-*/
-
-/*
-+----------------------------------------------------------------------------+
-| Function Name : int i_APCI3XXX_TestConversionStarted |
-| (struct comedi_device *dev) |
-+----------------------------------------------------------------------------+
-| Task Test if any conversion started |
-+----------------------------------------------------------------------------+
-| Input Parameters : - |
-+----------------------------------------------------------------------------+
-| Output Parameters : - |
-+----------------------------------------------------------------------------+
-| Return Value : 0 : Conversion not started |
-| 1 : Conversion started |
-+----------------------------------------------------------------------------+
-*/
-static int i_APCI3XXX_TestConversionStarted(struct comedi_device *dev)
-{
- struct addi_private *devpriv = dev->private;
-
- if ((readl(devpriv->dw_AiBase + 8) & 0x80000UL) == 0x80000UL)
- return 1;
- else
- return 0;
-
-}
-
-/*
-+----------------------------------------------------------------------------+
-| Function Name : int i_APCI3XXX_AnalogInputConfigOperatingMode |
-| (struct comedi_device *dev, |
-| struct comedi_subdevice *s, |
-| struct comedi_insn *insn, |
-| unsigned int *data) |
-+----------------------------------------------------------------------------+
-| Task Converting mode and convert time selection |
-+----------------------------------------------------------------------------+
-| Input Parameters : b_SingleDiff = (unsigned char) data[1]; |
-| b_TimeBase = (unsigned char) data[2]; (0: ns, 1:micros 2:ms)|
-| dw_ReloadValue = (unsigned int) data[3]; |
-| ........ |
-+----------------------------------------------------------------------------+
-| Output Parameters : - |
-+----------------------------------------------------------------------------+
-| Return Value :>0 : No error |
-| -1 : Single/Diff selection error |
-| -2 : Convert time base unity selection error |
-| -3 : Convert time value selection error |
-| -10: Any conversion started |
-| .... |
-| -100 : Config command error |
-| -101 : Data size error |
-+----------------------------------------------------------------------------+
-*/
-static int i_APCI3XXX_AnalogInputConfigOperatingMode(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn,
- unsigned int *data)
-{
- const struct addi_board *this_board = comedi_board(dev);
- struct addi_private *devpriv = dev->private;
- int i_ReturnValue = insn->n;
- unsigned char b_TimeBase = 0;
- unsigned char b_SingleDiff = 0;
- unsigned int dw_ReloadValue = 0;
- unsigned int dw_TestReloadValue = 0;
-
- /************************/
- /* Test the buffer size */
- /************************/
-
- if (insn->n == 4) {
- /****************************/
- /* Get the Singel/Diff flag */
- /****************************/
-
- b_SingleDiff = (unsigned char) data[1];
-
- /****************************/
- /* Get the time base unitiy */
- /****************************/
-
- b_TimeBase = (unsigned char) data[2];
-
- /*************************************/
- /* Get the convert time reload value */
- /*************************************/
-
- dw_ReloadValue = (unsigned int) data[3];
-
- /**********************/
- /* Test the time base */
- /**********************/
-
- if ((this_board->b_AvailableConvertUnit & (1 << b_TimeBase)) !=
- 0) {
- /*******************************/
- /* Test the convert time value */
- /*******************************/
-
- if (dw_ReloadValue <= 65535) {
- dw_TestReloadValue = dw_ReloadValue;
-
- if (b_TimeBase == 1) {
- dw_TestReloadValue =
- dw_TestReloadValue * 1000UL;
- }
- if (b_TimeBase == 2) {
- dw_TestReloadValue =
- dw_TestReloadValue * 1000000UL;
- }
-
- /*******************************/
- /* Test the convert time value */
- /*******************************/
-
- if (dw_TestReloadValue >=
- devpriv->s_EeParameters.
- ui_MinAcquisitiontimeNs) {
- if ((b_SingleDiff == APCI3XXX_SINGLE)
- || (b_SingleDiff ==
- APCI3XXX_DIFF)) {
- if (((b_SingleDiff == APCI3XXX_SINGLE)
- && (devpriv->s_EeParameters.i_NbrAiChannel == 0))
- || ((b_SingleDiff == APCI3XXX_DIFF)
- && (this_board->i_NbrAiChannelDiff == 0))
- ) {
- /*******************************/
- /* Single/Diff selection error */
- /*******************************/
-
- printk("Single/Diff selection error\n");
- i_ReturnValue = -1;
- } else {
- /**********************************/
- /* Test if conversion not started */
- /**********************************/
-
- if (i_APCI3XXX_TestConversionStarted(dev) == 0) {
- devpriv->
- ui_EocEosConversionTime
- =
- (unsigned int)
- dw_ReloadValue;
- devpriv->
- b_EocEosConversionTimeBase
- =
- b_TimeBase;
- devpriv->
- b_SingelDiff
- =
- b_SingleDiff;
- devpriv->
- b_AiInitialisation
- = 1;
-
- /*******************************/
- /* Set the convert timing unit */
- /*******************************/
-
- writel((unsigned int)b_TimeBase,
- devpriv->dw_AiBase + 36);
-
- /**************************/
- /* Set the convert timing */
- /*************************/
-
- writel(dw_ReloadValue, devpriv->dw_AiBase + 32);
- } else {
- /**************************/
- /* Any conversion started */
- /**************************/
-
- printk("Any conversion started\n");
- i_ReturnValue =
- -10;
- }
- }
- } else {
- /*******************************/
- /* Single/Diff selection error */
- /*******************************/
-
- printk("Single/Diff selection error\n");
- i_ReturnValue = -1;
- }
- } else {
- /************************/
- /* Time selection error */
- /************************/
-
- printk("Convert time value selection error\n");
- i_ReturnValue = -3;
- }
- } else {
- /************************/
- /* Time selection error */
- /************************/
-
- printk("Convert time value selection error\n");
- i_ReturnValue = -3;
- }
- } else {
- /*****************************/
- /* Time base selection error */
- /*****************************/
-
- printk("Convert time base unity selection error\n");
- i_ReturnValue = -2;
- }
- } else {
- /*******************/
- /* Data size error */
- /*******************/
-
- printk("Buffer size error\n");
- i_ReturnValue = -101;
- }
-
- return i_ReturnValue;
-}
-
-/*
-+----------------------------------------------------------------------------+
-| Function Name : int i_APCI3XXX_InsnConfigAnalogInput |
-| (struct comedi_device *dev, |
-| struct comedi_subdevice *s, |
-| struct comedi_insn *insn, |
-| unsigned int *data) |
-+----------------------------------------------------------------------------+
-| Task Converting mode and convert time selection |
-+----------------------------------------------------------------------------+
-| Input Parameters : b_ConvertMode = (unsigned char) data[0]; |
-| b_TimeBase = (unsigned char) data[1]; (0: ns, 1:micros 2:ms)|
-| dw_ReloadValue = (unsigned int) data[2]; |
-| ........ |
-+----------------------------------------------------------------------------+
-| Output Parameters : - |
-+----------------------------------------------------------------------------+
-| Return Value :>0: No error |
-| .... |
-| -100 : Config command error |
-| -101 : Data size error |
-+----------------------------------------------------------------------------+
-*/
-static int i_APCI3XXX_InsnConfigAnalogInput(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn,
- unsigned int *data)
-{
- int i_ReturnValue = insn->n;
-
- /************************/
- /* Test the buffer size */
- /************************/
-
- if (insn->n >= 1) {
- switch ((unsigned char) data[0]) {
- case APCI3XXX_CONFIGURATION:
- i_ReturnValue =
- i_APCI3XXX_AnalogInputConfigOperatingMode(dev,
- s, insn, data);
- break;
-
- default:
- i_ReturnValue = -100;
- printk("Config command error %d\n", data[0]);
- break;
- }
- } else {
- /*******************/
- /* Data size error */
- /*******************/
-
- printk("Buffer size error\n");
- i_ReturnValue = -101;
- }
-
- return i_ReturnValue;
-}
-
-/*
-+----------------------------------------------------------------------------+
-| Function Name : int i_APCI3XXX_InsnReadAnalogInput |
-| (struct comedi_device *dev, |
-| struct comedi_subdevice *s, |
-| struct comedi_insn *insn, |
-| unsigned int *data) |
-+----------------------------------------------------------------------------+
-| Task Read 1 analog input |
-+----------------------------------------------------------------------------+
-| Input Parameters : b_Range = CR_RANGE(insn->chanspec); |
-| b_Channel = CR_CHAN(insn->chanspec); |
-| dw_NbrOfAcquisition = insn->n; |
-+----------------------------------------------------------------------------+
-| Output Parameters : - |
-+----------------------------------------------------------------------------+
-| Return Value :>0: No error |
-| -3 : Channel selection error |
-| -4 : Configuration selelection error |
-| -10: Any conversion started |
-| .... |
-| -100 : Config command error |
-| -101 : Data size error |
-+----------------------------------------------------------------------------+
-*/
-static int i_APCI3XXX_InsnReadAnalogInput(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn,
- unsigned int *data)
-{
- const struct addi_board *this_board = comedi_board(dev);
- struct addi_private *devpriv = dev->private;
- int i_ReturnValue = insn->n;
- unsigned char b_Configuration = (unsigned char) CR_RANGE(insn->chanspec);
- unsigned char b_Channel = (unsigned char) CR_CHAN(insn->chanspec);
- unsigned int dw_Temp = 0;
- unsigned int dw_Configuration = 0;
- unsigned int dw_AcquisitionCpt = 0;
- unsigned char b_Interrupt = 0;
-
- /*************************************/
- /* Test if operating mode configured */
- /*************************************/
-
- if (devpriv->b_AiInitialisation) {
- /***************************/
- /* Test the channel number */
- /***************************/
-
- if (((b_Channel < devpriv->s_EeParameters.i_NbrAiChannel)
- && (devpriv->b_SingelDiff == APCI3XXX_SINGLE))
- || ((b_Channel < this_board->i_NbrAiChannelDiff)
- && (devpriv->b_SingelDiff == APCI3XXX_DIFF))) {
- /**********************************/
- /* Test the channel configuration */
- /**********************************/
-
- if (b_Configuration > 7) {
- /***************************/
- /* Channel not initialised */
- /***************************/
-
- i_ReturnValue = -4;
- printk("Channel %d range %d selection error\n",
- b_Channel, b_Configuration);
- }
- } else {
- /***************************/
- /* Channel selection error */
- /***************************/
-
- i_ReturnValue = -3;
- printk("Channel %d selection error\n", b_Channel);
- }
-
- /**************************/
- /* Test if no error occur */
- /**************************/
-
- if (i_ReturnValue >= 0) {
- /************************/
- /* Test the buffer size */
- /************************/
-
- if ((b_Interrupt != 0) || ((b_Interrupt == 0)
- && (insn->n >= 1))) {
- /**********************************/
- /* Test if conversion not started */
- /**********************************/
-
- if (i_APCI3XXX_TestConversionStarted(dev) == 0) {
- /******************/
- /* Clear the FIFO */
- /******************/
-
- writel(0x10000UL, devpriv->dw_AiBase + 12);
-
- /*******************************/
- /* Get and save the delay mode */
- /*******************************/
-
- dw_Temp = readl(devpriv->dw_AiBase + 4);
- dw_Temp = dw_Temp & 0xFFFFFEF0UL;
-
- /***********************************/
- /* Channel configuration selection */
- /***********************************/
-
- writel(dw_Temp, devpriv->dw_AiBase + 4);
-
- /**************************/
- /* Make the configuration */
- /**************************/
-
- dw_Configuration =
- (b_Configuration & 3) |
- ((unsigned int) (b_Configuration >> 2)
- << 6) | ((unsigned int) devpriv->
- b_SingelDiff << 7);
-
- /***************************/
- /* Write the configuration */
- /***************************/
-
- writel(dw_Configuration,
- devpriv->dw_AiBase + 0);
-
- /*********************/
- /* Channel selection */
- /*********************/
-
- writel(dw_Temp | 0x100UL,
- devpriv->dw_AiBase + 4);
- writel((unsigned int) b_Channel,
- devpriv->dw_AiBase + 0);
-
- /***********************/
- /* Restaure delay mode */
- /***********************/
-
- writel(dw_Temp, devpriv->dw_AiBase + 4);
-
- /***********************************/
- /* Set the number of sequence to 1 */
- /***********************************/
-
- writel(1, devpriv->dw_AiBase + 48);
-
- /***************************/
- /* Save the interrupt flag */
- /***************************/
-
- devpriv->b_EocEosInterrupt =
- b_Interrupt;
-
- /*******************************/
- /* Save the number of channels */
- /*******************************/
-
- devpriv->ui_AiNbrofChannels = 1;
-
- /******************************/
- /* Test if interrupt not used */
- /******************************/
-
- if (b_Interrupt == 0) {
- for (dw_AcquisitionCpt = 0;
- dw_AcquisitionCpt <
- insn->n;
- dw_AcquisitionCpt++) {
- /************************/
- /* Start the conversion */
- /************************/
-
- writel(0x80000UL, devpriv->dw_AiBase + 8);
-
- /****************/
- /* Wait the EOS */
- /****************/
-
- do {
- dw_Temp = readl(devpriv->dw_AiBase + 20);
- dw_Temp = dw_Temp & 1;
- } while (dw_Temp != 1);
-
- /*************************/
- /* Read the analog value */
- /*************************/
-
- data[dw_AcquisitionCpt] = (unsigned int)readl(devpriv->dw_AiBase + 28);
- }
- } else {
- /************************/
- /* Start the conversion */
- /************************/
-
- writel(0x180000UL, devpriv->dw_AiBase + 8);
- }
- } else {
- /**************************/
- /* Any conversion started */
- /**************************/
-
- printk("Any conversion started\n");
- i_ReturnValue = -10;
- }
- } else {
- /*******************/
- /* Data size error */
- /*******************/
-
- printk("Buffer size error\n");
- i_ReturnValue = -101;
- }
- }
- } else {
- /***************************/
- /* Channel selection error */
- /***************************/
-
- printk("Operating mode not configured\n");
- i_ReturnValue = -1;
- }
- return i_ReturnValue;
-}
-
-/*
-+----------------------------------------------------------------------------+
-| Function name : void v_APCI3XXX_Interrupt (int irq, |
-| void *d) |
-+----------------------------------------------------------------------------+
-| Task :Interrupt handler for APCI3XXX |
-| When interrupt occurs this gets called. |
-| First it finds which interrupt has been generated and |
-| handles corresponding interrupt |
-+----------------------------------------------------------------------------+
-| Input Parameters : - |
-+----------------------------------------------------------------------------+
-| Return Value : - |
-+----------------------------------------------------------------------------+
-*/
-
-static void v_APCI3XXX_Interrupt(int irq, void *d)
-{
- struct comedi_device *dev = d;
- struct addi_private *devpriv = dev->private;
- unsigned char b_CopyCpt = 0;
- unsigned int dw_Status = 0;
-
- /***************************/
- /* Test if interrupt occur */
- /***************************/
-
- dw_Status = readl(devpriv->dw_AiBase + 16);
- if ( (dw_Status & 0x2UL) == 0x2UL) {
- /***********************/
- /* Reset the interrupt */
- /***********************/
-
- writel(dw_Status, devpriv->dw_AiBase + 16);
-
- /*****************************/
- /* Test if interrupt enabled */
- /*****************************/
-
- if (devpriv->b_EocEosInterrupt == 1) {
- /********************************/
- /* Read all analog inputs value */
- /********************************/
-
- for (b_CopyCpt = 0;
- b_CopyCpt < devpriv->ui_AiNbrofChannels;
- b_CopyCpt++) {
- devpriv->ui_AiReadData[b_CopyCpt] =
- (unsigned int)readl(devpriv->dw_AiBase + 28);
- }
-
- /**************************/
- /* Set the interrupt flag */
- /**************************/
-
- devpriv->b_EocEosInterrupt = 2;
-
- /**********************************************/
- /* Send a signal to from kernel to user space */
- /**********************************************/
-
- send_sig(SIGIO, devpriv->tsk_Current, 0);
- }
- }
-}
-
-/*
-+----------------------------------------------------------------------------+
-| ANALOG OUTPUT SUBDEVICE |
-+----------------------------------------------------------------------------+
-*/
-
-/*
-+----------------------------------------------------------------------------+
-| Function Name : int i_APCI3XXX_InsnWriteAnalogOutput |
-| (struct comedi_device *dev, |
-| struct comedi_subdevice *s, |
-| struct comedi_insn *insn, |
-| unsigned int *data) |
-+----------------------------------------------------------------------------+
-| Task Read 1 analog input |
-+----------------------------------------------------------------------------+
-| Input Parameters : b_Range = CR_RANGE(insn->chanspec); |
-| b_Channel = CR_CHAN(insn->chanspec); |
-| data[0] = analog value; |
-+----------------------------------------------------------------------------+
-| Output Parameters : - |
-+----------------------------------------------------------------------------+
-| Return Value :>0: No error |
-| -3 : Channel selection error |
-| -4 : Configuration selelection error |
-| .... |
-| -101 : Data size error |
-+----------------------------------------------------------------------------+
-*/
-static int i_APCI3XXX_InsnWriteAnalogOutput(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn,
- unsigned int *data)
-{
- struct addi_private *devpriv = dev->private;
- unsigned char b_Range = (unsigned char) CR_RANGE(insn->chanspec);
- unsigned char b_Channel = (unsigned char) CR_CHAN(insn->chanspec);
- unsigned int dw_Status = 0;
- int i_ReturnValue = insn->n;
-
- /************************/
- /* Test the buffer size */
- /************************/
-
- if (insn->n >= 1) {
- /***************************/
- /* Test the channel number */
- /***************************/
-
- if (b_Channel < devpriv->s_EeParameters.i_NbrAoChannel) {
- /**********************************/
- /* Test the channel configuration */
- /**********************************/
-
- if (b_Range < 2) {
- /***************************/
- /* Set the range selection */
- /***************************/
-
- writel(b_Range, devpriv->dw_AiBase + 96);
-
- /**************************************************/
- /* Write the analog value to the selected channel */
- /**************************************************/
-
- writel((data[0] << 8) | b_Channel,
- devpriv->dw_AiBase + 100);
-
- /****************************/
- /* Wait the end of transfer */
- /****************************/
-
- do {
- dw_Status = readl(devpriv->dw_AiBase + 96);
- } while ((dw_Status & 0x100) != 0x100);
- } else {
- /***************************/
- /* Channel not initialised */
- /***************************/
-
- i_ReturnValue = -4;
- printk("Channel %d range %d selection error\n",
- b_Channel, b_Range);
- }
- } else {
- /***************************/
- /* Channel selection error */
- /***************************/
-
- i_ReturnValue = -3;
- printk("Channel %d selection error\n", b_Channel);
- }
- } else {
- /*******************/
- /* Data size error */
- /*******************/
-
- printk("Buffer size error\n");
- i_ReturnValue = -101;
- }
-
- return i_ReturnValue;
-}
-
-/*
-+----------------------------------------------------------------------------+
-| TTL FUNCTIONS |
-+----------------------------------------------------------------------------+
-*/
-
-/*
-+----------------------------------------------------------------------------+
-| Function Name : int i_APCI3XXX_InsnConfigInitTTLIO |
-| (struct comedi_device *dev, |
-| struct comedi_subdevice *s, |
-| struct comedi_insn *insn, |
-| unsigned int *data) |
-+----------------------------------------------------------------------------+
-| Task You must calling this function be |
-| for you call any other function witch access of TTL. |
-| APCI3XXX_TTL_INIT_DIRECTION_PORT2(user inputs for direction)|
-+----------------------------------------------------------------------------+
-| Input Parameters : b_InitType = (unsigned char) data[0]; |
-| b_Port2Mode = (unsigned char) data[1]; |
-+----------------------------------------------------------------------------+
-| Output Parameters : - |
-+----------------------------------------------------------------------------+
-| Return Value :>0: No error |
-| -1: Port 2 mode selection is wrong |
-| .... |
-| -100 : Config command error |
-| -101 : Data size error |
-+----------------------------------------------------------------------------+
-*/
-static int i_APCI3XXX_InsnConfigInitTTLIO(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn,
- unsigned int *data)
-{
- struct addi_private *devpriv = dev->private;
- int i_ReturnValue = insn->n;
- unsigned char b_Command = 0;
-
- /************************/
- /* Test the buffer size */
- /************************/
-
- if (insn->n >= 1) {
- /*******************/
- /* Get the command */
- /* **************** */
-
- b_Command = (unsigned char) data[0];
-
- /********************/
- /* Test the command */
- /********************/
-
- if (b_Command == APCI3XXX_TTL_INIT_DIRECTION_PORT2) {
- /***************************************/
- /* Test the initialisation buffer size */
- /***************************************/
-
- if ((b_Command == APCI3XXX_TTL_INIT_DIRECTION_PORT2)
- && (insn->n != 2)) {
- /*******************/
- /* Data size error */
- /*******************/
-
- printk("Buffer size error\n");
- i_ReturnValue = -101;
- }
- } else {
- /************************/
- /* Config command error */
- /************************/
-
- printk("Command selection error\n");
- i_ReturnValue = -100;
- }
- } else {
- /*******************/
- /* Data size error */
- /*******************/
-
- printk("Buffer size error\n");
- i_ReturnValue = -101;
- }
-
- /*********************************************************************************/
- /* Test if no error occur and APCI3XXX_TTL_INIT_DIRECTION_PORT2 command selected */
- /*********************************************************************************/
-
- if ((i_ReturnValue >= 0)
- && (b_Command == APCI3XXX_TTL_INIT_DIRECTION_PORT2)) {
- /**********************/
- /* Test the direction */
- /**********************/
-
- if ((data[1] == 0) || (data[1] == 0xFF)) {
- /**************************/
- /* Save the configuration */
- /**************************/
-
- devpriv->ul_TTLPortConfiguration[0] =
- devpriv->ul_TTLPortConfiguration[0] | data[1];
- } else {
- /************************/
- /* Port direction error */
- /************************/
-
- printk("Port 2 direction selection error\n");
- i_ReturnValue = -1;
- }
- }
-
- /**************************/
- /* Test if no error occur */
- /**************************/
-
- if (i_ReturnValue >= 0) {
- /***********************************/
- /* Test if TTL port initilaisation */
- /***********************************/
-
- if (b_Command == APCI3XXX_TTL_INIT_DIRECTION_PORT2) {
- /*************************/
- /* Set the configuration */
- /*************************/
-
- outl(data[1], devpriv->iobase + 224);
- }
- }
-
- return i_ReturnValue;
-}
-
-/*
-+----------------------------------------------------------------------------+
-| TTL INPUT FUNCTIONS |
-+----------------------------------------------------------------------------+
-*/
-
-/*
-+----------------------------------------------------------------------------+
-| Function Name : int i_APCI3XXX_InsnBitsTTLIO |
-| (struct comedi_device *dev, |
-| struct comedi_subdevice *s, |
-| struct comedi_insn *insn, |
-| unsigned int *data) |
-+----------------------------------------------------------------------------+
-| Task : Write the selected output mask and read the status from|
-| all TTL channles |
-+----------------------------------------------------------------------------+
-| Input Parameters : dw_ChannelMask = data [0]; |
-| dw_BitMask = data [1]; |
-+----------------------------------------------------------------------------+
-| Output Parameters : data[1] : All TTL channles states |
-+----------------------------------------------------------------------------+
-| Return Value : >0 : No error |
-| -4 : Channel mask error |
-| -101 : Data size error |
-+----------------------------------------------------------------------------+
-*/
-static int i_APCI3XXX_InsnBitsTTLIO(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn,
- unsigned int *data)
-{
- struct addi_private *devpriv = dev->private;
- int i_ReturnValue = insn->n;
- unsigned char b_ChannelCpt = 0;
- unsigned int dw_ChannelMask = 0;
- unsigned int dw_BitMask = 0;
- unsigned int dw_Status = 0;
-
- /************************/
- /* Test the buffer size */
- /************************/
-
- if (insn->n >= 2) {
- /*******************************/
- /* Get the channe and bit mask */
- /*******************************/
-
- dw_ChannelMask = data[0];
- dw_BitMask = data[1];
-
- /*************************/
- /* Test the channel mask */
- /*************************/
-
- if (((dw_ChannelMask & 0XFF00FF00) == 0) &&
- (((devpriv->ul_TTLPortConfiguration[0] & 0xFF) == 0xFF)
- || (((devpriv->ul_TTLPortConfiguration[0] &
- 0xFF) == 0)
- && ((dw_ChannelMask & 0XFF0000) ==
- 0)))) {
- /*********************************/
- /* Test if set/reset any channel */
- /*********************************/
-
- if (dw_ChannelMask) {
- /****************************************/
- /* Test if set/rest any port 0 channels */
- /****************************************/
-
- if (dw_ChannelMask & 0xFF) {
- /*******************************************/
- /* Read port 0 (first digital output port) */
- /*******************************************/
-
- dw_Status = inl(devpriv->iobase + 80);
-
- for (b_ChannelCpt = 0; b_ChannelCpt < 8;
- b_ChannelCpt++) {
- if ((dw_ChannelMask >>
- b_ChannelCpt) &
- 1) {
- dw_Status =
- (dw_Status &
- (0xFF - (1 << b_ChannelCpt))) | (dw_BitMask & (1 << b_ChannelCpt));
- }
- }
-
- outl(dw_Status, devpriv->iobase + 80);
- }
-
- /****************************************/
- /* Test if set/rest any port 2 channels */
- /****************************************/
-
- if (dw_ChannelMask & 0xFF0000) {
- dw_BitMask = dw_BitMask >> 16;
- dw_ChannelMask = dw_ChannelMask >> 16;
-
- /********************************************/
- /* Read port 2 (second digital output port) */
- /********************************************/
-
- dw_Status = inl(devpriv->iobase + 112);
-
- for (b_ChannelCpt = 0; b_ChannelCpt < 8;
- b_ChannelCpt++) {
- if ((dw_ChannelMask >>
- b_ChannelCpt) &
- 1) {
- dw_Status =
- (dw_Status &
- (0xFF - (1 << b_ChannelCpt))) | (dw_BitMask & (1 << b_ChannelCpt));
- }
- }
-
- outl(dw_Status, devpriv->iobase + 112);
- }
- }
-
- /*******************************************/
- /* Read port 0 (first digital output port) */
- /*******************************************/
-
- data[1] = inl(devpriv->iobase + 80);
-
- /******************************************/
- /* Read port 1 (first digital input port) */
- /******************************************/
-
- data[1] = data[1] | (inl(devpriv->iobase + 64) << 8);
-
- /************************/
- /* Test if port 2 input */
- /************************/
-
- if ((devpriv->ul_TTLPortConfiguration[0] & 0xFF) == 0) {
- data[1] =
- data[1] | (inl(devpriv->iobase +
- 96) << 16);
- } else {
- data[1] =
- data[1] | (inl(devpriv->iobase +
- 112) << 16);
- }
- } else {
- /************************/
- /* Config command error */
- /************************/
-
- printk("Channel mask error\n");
- i_ReturnValue = -4;
- }
- } else {
- /*******************/
- /* Data size error */
- /*******************/
-
- printk("Buffer size error\n");
- i_ReturnValue = -101;
- }
-
- return i_ReturnValue;
-}
-
-/*
-+----------------------------------------------------------------------------+
-| Function Name : int i_APCI3XXX_InsnReadTTLIO |
-| (struct comedi_device *dev, |
-| struct comedi_subdevice *s, |
-| struct comedi_insn *insn, |
-| unsigned int *data) |
-+----------------------------------------------------------------------------+
-| Task : Read the status from selected channel |
-+----------------------------------------------------------------------------+
-| Input Parameters : b_Channel = CR_CHAN(insn->chanspec) |
-+----------------------------------------------------------------------------+
-| Output Parameters : data[0] : Selected TTL channel state |
-+----------------------------------------------------------------------------+
-| Return Value : 0 : No error |
-| -3 : Channel selection error |
-| -101 : Data size error |
-+----------------------------------------------------------------------------+
-*/
-static int i_APCI3XXX_InsnReadTTLIO(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn,
- unsigned int *data)
-{
- struct addi_private *devpriv = dev->private;
- unsigned char b_Channel = (unsigned char) CR_CHAN(insn->chanspec);
- int i_ReturnValue = insn->n;
- unsigned int *pls_ReadData = data;
-
- /************************/
- /* Test the buffer size */
- /************************/
-
- if (insn->n >= 1) {
- /***********************/
- /* Test if read port 0 */
- /***********************/
-
- if (b_Channel < 8) {
- /*******************************************/
- /* Read port 0 (first digital output port) */
- /*******************************************/
-
- pls_ReadData[0] = inl(devpriv->iobase + 80);
- pls_ReadData[0] = (pls_ReadData[0] >> b_Channel) & 1;
- } else {
- /***********************/
- /* Test if read port 1 */
- /***********************/
-
- if ((b_Channel > 7) && (b_Channel < 16)) {
- /******************************************/
- /* Read port 1 (first digital input port) */
- /******************************************/
-
- pls_ReadData[0] = inl(devpriv->iobase + 64);
- pls_ReadData[0] =
- (pls_ReadData[0] >> (b_Channel -
- 8)) & 1;
- } else {
- /***********************/
- /* Test if read port 2 */
- /***********************/
-
- if ((b_Channel > 15) && (b_Channel < 24)) {
- /************************/
- /* Test if port 2 input */
- /************************/
-
- if ((devpriv->ul_TTLPortConfiguration[0]
- & 0xFF) == 0) {
- pls_ReadData[0] =
- inl(devpriv->iobase +
- 96);
- pls_ReadData[0] =
- (pls_ReadData[0] >>
- (b_Channel - 16)) & 1;
- } else {
- pls_ReadData[0] =
- inl(devpriv->iobase +
- 112);
- pls_ReadData[0] =
- (pls_ReadData[0] >>
- (b_Channel - 16)) & 1;
- }
- } else {
- /***************************/
- /* Channel selection error */
- /***************************/
-
- i_ReturnValue = -3;
- printk("Channel %d selection error\n",
- b_Channel);
- }
- }
- }
- } else {
- /*******************/
- /* Data size error */
- /*******************/
-
- printk("Buffer size error\n");
- i_ReturnValue = -101;
- }
-
- return i_ReturnValue;
-}
-
-/*
-+----------------------------------------------------------------------------+
-| TTL OUTPUT FUNCTIONS |
-+----------------------------------------------------------------------------+
-*/
-
-/*
-+----------------------------------------------------------------------------+
-| Function Name : int i_APCI3XXX_InsnWriteTTLIO |
-| (struct comedi_device *dev, |
-| struct comedi_subdevice *s, |
-| struct comedi_insn *insn, |
-| unsigned int *data) |
-+----------------------------------------------------------------------------+
-| Task : Set the state from TTL output channel |
-+----------------------------------------------------------------------------+
-| Input Parameters : b_Channel = CR_CHAN(insn->chanspec) |
-| b_State = data [0] |
-+----------------------------------------------------------------------------+
-| Output Parameters : - |
-+----------------------------------------------------------------------------+
-| Return Value : 0 : No error |
-| -3 : Channel selection error |
-| -101 : Data size error |
-+----------------------------------------------------------------------------+
-*/
-static int i_APCI3XXX_InsnWriteTTLIO(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn,
- unsigned int *data)
-{
- struct addi_private *devpriv = dev->private;
- int i_ReturnValue = insn->n;
- unsigned char b_Channel = (unsigned char) CR_CHAN(insn->chanspec);
- unsigned char b_State = 0;
- unsigned int dw_Status = 0;
-
- /************************/
- /* Test the buffer size */
- /************************/
-
- if (insn->n >= 1) {
- b_State = (unsigned char) data[0];
-
- /***********************/
- /* Test if read port 0 */
- /***********************/
-
- if (b_Channel < 8) {
- /*****************************************************************************/
- /* Read port 0 (first digital output port) and set/reset the selected channel */
- /*****************************************************************************/
-
- dw_Status = inl(devpriv->iobase + 80);
- dw_Status =
- (dw_Status & (0xFF -
- (1 << b_Channel))) | ((b_State & 1) <<
- b_Channel);
- outl(dw_Status, devpriv->iobase + 80);
- } else {
- /***********************/
- /* Test if read port 2 */
- /***********************/
-
- if ((b_Channel > 15) && (b_Channel < 24)) {
- /*************************/
- /* Test if port 2 output */
- /*************************/
-
- if ((devpriv->ul_TTLPortConfiguration[0] & 0xFF)
- == 0xFF) {
- /*****************************************************************************/
- /* Read port 2 (first digital output port) and set/reset the selected channel */
- /*****************************************************************************/
-
- dw_Status = inl(devpriv->iobase + 112);
- dw_Status =
- (dw_Status & (0xFF -
- (1 << (b_Channel -
- 16)))) |
- ((b_State & 1) << (b_Channel -
- 16));
- outl(dw_Status, devpriv->iobase + 112);
- } else {
- /***************************/
- /* Channel selection error */
- /***************************/
-
- i_ReturnValue = -3;
- printk("Channel %d selection error\n",
- b_Channel);
- }
- } else {
- /***************************/
- /* Channel selection error */
- /***************************/
-
- i_ReturnValue = -3;
- printk("Channel %d selection error\n",
- b_Channel);
- }
- }
- } else {
- /*******************/
- /* Data size error */
- /*******************/
-
- printk("Buffer size error\n");
- i_ReturnValue = -101;
- }
-
- return i_ReturnValue;
-}
-
-static int apci3xxx_di_insn_bits(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn,
- unsigned int *data)
-{
- struct addi_private *devpriv = dev->private;
-
- data[1] = inl(devpriv->iobase + 32) & 0xf;
-
- return insn->n;
-}
-
-static int apci3xxx_do_insn_bits(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn,
- unsigned int *data)
-{
- struct addi_private *devpriv = dev->private;
- unsigned int mask = data[0];
- unsigned int bits = data[1];
-
- s->state = inl(devpriv->iobase + 48) & 0xf;
- if (mask) {
- s->state &= ~mask;
- s->state |= (bits & mask);
-
- outl(s->state, devpriv->iobase + 48);
- }
-
- data[1] = s->state;
-
- return insn->n;
-}
-
-/*
-+----------------------------------------------------------------------------+
-| Function Name : int i_APCI3XXX_Reset(struct comedi_device *dev) | +----------------------------------------------------------------------------+
-| Task :resets all the registers |
-+----------------------------------------------------------------------------+
-| Input Parameters : struct comedi_device *dev |
-+----------------------------------------------------------------------------+
-| Output Parameters : - |
-+----------------------------------------------------------------------------+
-| Return Value : - |
-+----------------------------------------------------------------------------+
-*/
-
-static int i_APCI3XXX_Reset(struct comedi_device *dev)
-{
- struct addi_private *devpriv = dev->private;
- unsigned char b_Cpt = 0;
-
- /*************************/
- /* Disable the interrupt */
- /*************************/
-
- disable_irq(dev->irq);
-
- /****************************/
- /* Reset the interrupt flag */
- /****************************/
-
- devpriv->b_EocEosInterrupt = 0;
-
- /***************************/
- /* Clear the start command */
- /***************************/
-
- writel(0, devpriv->dw_AiBase + 8);
-
- /*****************************/
- /* Reset the interrupt flags */
- /*****************************/
-
- writel(readl(devpriv->dw_AiBase + 16), devpriv->dw_AiBase + 16);
-
- /*****************/
- /* clear the EOS */
- /*****************/
-
- readl(devpriv->dw_AiBase + 20);
-
- /******************/
- /* Clear the FIFO */
- /******************/
-
- for (b_Cpt = 0; b_Cpt < 16; b_Cpt++) {
- readl(devpriv->dw_AiBase + 28);
- }
-
- /************************/
- /* Enable the interrupt */
- /************************/
-
- enable_irq(dev->irq);
-
- return 0;
-}
diff --git a/drivers/staging/comedi/drivers/addi_apci_1032.c b/drivers/staging/comedi/drivers/addi_apci_1032.c
index 3d4878facc26..8a93542faedc 100644
--- a/drivers/staging/comedi/drivers/addi_apci_1032.c
+++ b/drivers/staging/comedi/drivers/addi_apci_1032.c
@@ -20,13 +20,6 @@
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * You should also find the complete GPL in the COPYING file accompanying this
- * source code.
*/
#include <linux/pci.h>
diff --git a/drivers/staging/comedi/drivers/addi_apci_1516.c b/drivers/staging/comedi/drivers/addi_apci_1516.c
index ed01c56630bb..b626738bb73c 100644
--- a/drivers/staging/comedi/drivers/addi_apci_1516.c
+++ b/drivers/staging/comedi/drivers/addi_apci_1516.c
@@ -20,13 +20,6 @@
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * You should also find the complete GPL in the COPYING file accompanying
- * this source code.
*/
#include <linux/pci.h>
@@ -203,7 +196,6 @@ static void apci1516_detach(struct comedi_device *dev)
{
if (dev->iobase)
apci1516_reset(dev);
- comedi_spriv_free(dev, 2);
comedi_pci_disable(dev);
}
diff --git a/drivers/staging/comedi/drivers/addi_apci_16xx.c b/drivers/staging/comedi/drivers/addi_apci_16xx.c
index 4c6a9b5a06ae..1f7bed9a3f7f 100644
--- a/drivers/staging/comedi/drivers/addi_apci_16xx.c
+++ b/drivers/staging/comedi/drivers/addi_apci_16xx.c
@@ -20,13 +20,6 @@
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * You should also find the complete GPL in the COPYING file accompanying
- * this source code.
*/
#include <linux/pci.h>
diff --git a/drivers/staging/comedi/drivers/addi_apci_2032.c b/drivers/staging/comedi/drivers/addi_apci_2032.c
index b666637f61be..89ead8eb3c70 100644
--- a/drivers/staging/comedi/drivers/addi_apci_2032.c
+++ b/drivers/staging/comedi/drivers/addi_apci_2032.c
@@ -20,13 +20,6 @@
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * You should also find the complete GPL in the COPYING file accompanying
- * this source code.
*/
#include <linux/pci.h>
@@ -354,7 +347,6 @@ static void apci2032_detach(struct comedi_device *dev)
free_irq(dev->irq, dev);
if (dev->read_subdev)
kfree(dev->read_subdev->private);
- comedi_spriv_free(dev, 1);
comedi_pci_disable(dev);
}
diff --git a/drivers/staging/comedi/drivers/addi_apci_2200.c b/drivers/staging/comedi/drivers/addi_apci_2200.c
index 1cdc08d79792..ca1bd92ecb17 100644
--- a/drivers/staging/comedi/drivers/addi_apci_2200.c
+++ b/drivers/staging/comedi/drivers/addi_apci_2200.c
@@ -20,13 +20,6 @@
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * You should also find the complete GPL in the COPYING file accompanying
- * this source code.
*/
#include <linux/pci.h>
@@ -130,7 +123,6 @@ static void apci2200_detach(struct comedi_device *dev)
{
if (dev->iobase)
apci2200_reset(dev);
- comedi_spriv_free(dev, 2);
comedi_pci_disable(dev);
}
diff --git a/drivers/staging/comedi/drivers/addi_apci_3120.c b/drivers/staging/comedi/drivers/addi_apci_3120.c
index 317a26d97c2e..61452848510f 100644
--- a/drivers/staging/comedi/drivers/addi_apci_3120.c
+++ b/drivers/staging/comedi/drivers/addi_apci_3120.c
@@ -103,8 +103,6 @@ static int apci3120_auto_attach(struct comedi_device *dev,
if (devpriv->ul_DmaBufferVirtual[i]) {
devpriv->ui_DmaBufferPages[i] = pages;
devpriv->ui_DmaBufferSize[i] = PAGE_SIZE * pages;
- devpriv->ui_DmaBufferSamples[i] =
- devpriv->ui_DmaBufferSize[i] >> 1;
devpriv->ul_DmaBufferHw[i] =
virt_to_bus((void *)devpriv->
ul_DmaBufferVirtual[i]);
@@ -138,9 +136,6 @@ static int apci3120_auto_attach(struct comedi_device *dev,
s->len_chanlist = this_board->i_AiChannelList;
s->range_table = &range_apci3120_ai;
- /* Set the initialisation flag */
- devpriv->b_AiInitialisation = 1;
-
s->insn_config = i_APCI3120_InsnConfigAnalogInput;
s->insn_read = i_APCI3120_InsnReadAnalogInput;
s->do_cmdtest = i_APCI3120_CommandTestAnalogInput;
diff --git a/drivers/staging/comedi/drivers/addi_apci_3501.c b/drivers/staging/comedi/drivers/addi_apci_3501.c
index a0cf6ecdef0e..f9b63689a12a 100644
--- a/drivers/staging/comedi/drivers/addi_apci_3501.c
+++ b/drivers/staging/comedi/drivers/addi_apci_3501.c
@@ -20,13 +20,6 @@
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * You should also find the complete GPL in the COPYING file accompanying
- * this source code.
*/
#include <linux/pci.h>
diff --git a/drivers/staging/comedi/drivers/addi_apci_3xxx.c b/drivers/staging/comedi/drivers/addi_apci_3xxx.c
index ec4d6ca6863f..5b37cbf9228e 100644
--- a/drivers/staging/comedi/drivers/addi_apci_3xxx.c
+++ b/drivers/staging/comedi/drivers/addi_apci_3xxx.c
@@ -1,14 +1,57 @@
+/*
+ * addi_apci_3xxx.c
+ * Copyright (C) 2004,2005 ADDI-DATA GmbH for the source code of this module.
+ * Project manager: S. Weber
+ *
+ * ADDI-DATA GmbH
+ * Dieselstrasse 3
+ * D-77833 Ottersweier
+ * Tel: +19(0)7223/9493-0
+ * Fax: +49(0)7223/9493-92
+ * http://www.addi-data.com
+ * info@addi-data.com
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
#include <linux/pci.h>
+#include <linux/interrupt.h>
#include "../comedidev.h"
+
#include "comedi_fc.h"
-#include "amcc_s5933.h"
-#include "addi-data/addi_common.h"
+#define CONV_UNIT_NS (1 << 0)
+#define CONV_UNIT_US (1 << 1)
+#define CONV_UNIT_MS (1 << 2)
-#include "addi-data/addi_eeprom.c"
-#include "addi-data/hwdrv_apci3xxx.c"
-#include "addi-data/addi_common.c"
+static const struct comedi_lrange apci3xxx_ai_range = {
+ 8, {
+ BIP_RANGE(10),
+ BIP_RANGE(5),
+ BIP_RANGE(2),
+ BIP_RANGE(1),
+ UNI_RANGE(10),
+ UNI_RANGE(5),
+ UNI_RANGE(2),
+ UNI_RANGE(1)
+ }
+};
+
+static const struct comedi_lrange apci3xxx_ao_range = {
+ 2, {
+ BIP_RANGE(10),
+ UNI_RANGE(10)
+ }
+};
enum apci3xxx_boardid {
BOARD_APCI3000_16,
@@ -38,651 +81,853 @@ enum apci3xxx_boardid {
BOARD_APCI3500,
};
-static const struct addi_board apci3xxx_boardtypes[] = {
+struct apci3xxx_boardinfo {
+ const char *name;
+ int ai_subdev_flags;
+ int ai_n_chan;
+ unsigned int ai_maxdata;
+ unsigned char ai_conv_units;
+ unsigned int ai_min_acq_ns;
+ unsigned int has_ao:1;
+ unsigned int has_dig_in:1;
+ unsigned int has_dig_out:1;
+ unsigned int has_ttl_io:1;
+};
+
+static const struct apci3xxx_boardinfo apci3xxx_boardtypes[] = {
[BOARD_APCI3000_16] = {
- .pc_DriverName = "apci3000-16",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 16,
- .i_NbrAiChannelDiff = 8,
- .i_AiChannelList = 16,
- .i_AiMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 10000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3000-16",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 16,
+ .ai_maxdata = 0x0fff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 10000,
+ .has_ttl_io = 1,
},
[BOARD_APCI3000_8] = {
- .pc_DriverName = "apci3000-8",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 8,
- .i_NbrAiChannelDiff = 4,
- .i_AiChannelList = 8,
- .i_AiMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 10000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3000-8",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 8,
+ .ai_maxdata = 0x0fff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 10000,
+ .has_ttl_io = 1,
},
[BOARD_APCI3000_4] = {
- .pc_DriverName = "apci3000-4",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 4,
- .i_NbrAiChannelDiff = 2,
- .i_AiChannelList = 4,
- .i_AiMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 10000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3000-4",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 4,
+ .ai_maxdata = 0x0fff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 10000,
+ .has_ttl_io = 1,
},
[BOARD_APCI3006_16] = {
- .pc_DriverName = "apci3006-16",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 16,
- .i_NbrAiChannelDiff = 8,
- .i_AiChannelList = 16,
- .i_AiMaxdata = 65535,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 10000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3006-16",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 16,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 10000,
+ .has_ttl_io = 1,
},
[BOARD_APCI3006_8] = {
- .pc_DriverName = "apci3006-8",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 8,
- .i_NbrAiChannelDiff = 4,
- .i_AiChannelList = 8,
- .i_AiMaxdata = 65535,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 10000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3006-8",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 8,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 10000,
+ .has_ttl_io = 1,
},
[BOARD_APCI3006_4] = {
- .pc_DriverName = "apci3006-4",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 4,
- .i_NbrAiChannelDiff = 2,
- .i_AiChannelList = 4,
- .i_AiMaxdata = 65535,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 10000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3006-4",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 4,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 10000,
+ .has_ttl_io = 1,
},
[BOARD_APCI3010_16] = {
- .pc_DriverName = "apci3010-16",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 16,
- .i_NbrAiChannelDiff = 8,
- .i_AiChannelList = 16,
- .i_AiMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 5000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3010-16",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 16,
+ .ai_maxdata = 0x0fff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 5000,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3010_8] = {
- .pc_DriverName = "apci3010-8",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 8,
- .i_NbrAiChannelDiff = 4,
- .i_AiChannelList = 8,
- .i_AiMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 5000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3010-8",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 8,
+ .ai_maxdata = 0x0fff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 5000,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3010_4] = {
- .pc_DriverName = "apci3010-4",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 4,
- .i_NbrAiChannelDiff = 2,
- .i_AiChannelList = 4,
- .i_AiMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 5000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3010-4",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 4,
+ .ai_maxdata = 0x0fff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 5000,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3016_16] = {
- .pc_DriverName = "apci3016-16",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 16,
- .i_NbrAiChannelDiff = 8,
- .i_AiChannelList = 16,
- .i_AiMaxdata = 65535,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 5000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3016-16",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 16,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 5000,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3016_8] = {
- .pc_DriverName = "apci3016-8",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 8,
- .i_NbrAiChannelDiff = 4,
- .i_AiChannelList = 8,
- .i_AiMaxdata = 65535,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 5000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3016-8",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 8,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 5000,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3016_4] = {
- .pc_DriverName = "apci3016-4",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 4,
- .i_NbrAiChannelDiff = 2,
- .i_AiChannelList = 4,
- .i_AiMaxdata = 65535,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 5000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3016-4",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 4,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 5000,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3100_16_4] = {
- .pc_DriverName = "apci3100-16-4",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 16,
- .i_NbrAiChannelDiff = 8,
- .i_AiChannelList = 16,
- .i_NbrAoChannel = 4,
- .i_AiMaxdata = 4095,
- .i_AoMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .pr_AoRangelist = &range_apci3XXX_ao,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 10000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ao_write = i_APCI3XXX_InsnWriteAnalogOutput,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3100-16-4",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 16,
+ .ai_maxdata = 0x0fff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 10000,
+ .has_ao = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3100_8_4] = {
- .pc_DriverName = "apci3100-8-4",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 8,
- .i_NbrAiChannelDiff = 4,
- .i_AiChannelList = 8,
- .i_NbrAoChannel = 4,
- .i_AiMaxdata = 4095,
- .i_AoMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .pr_AoRangelist = &range_apci3XXX_ao,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 10000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ao_write = i_APCI3XXX_InsnWriteAnalogOutput,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3100-8-4",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 8,
+ .ai_maxdata = 0x0fff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 10000,
+ .has_ao = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3106_16_4] = {
- .pc_DriverName = "apci3106-16-4",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 16,
- .i_NbrAiChannelDiff = 8,
- .i_AiChannelList = 16,
- .i_NbrAoChannel = 4,
- .i_AiMaxdata = 65535,
- .i_AoMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .pr_AoRangelist = &range_apci3XXX_ao,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 10000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ao_write = i_APCI3XXX_InsnWriteAnalogOutput,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3106-16-4",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 16,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 10000,
+ .has_ao = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3106_8_4] = {
- .pc_DriverName = "apci3106-8-4",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 8,
- .i_NbrAiChannelDiff = 4,
- .i_AiChannelList = 8,
- .i_NbrAoChannel = 4,
- .i_AiMaxdata = 65535,
- .i_AoMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .pr_AoRangelist = &range_apci3XXX_ao,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 10000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ao_write = i_APCI3XXX_InsnWriteAnalogOutput,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3106-8-4",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 8,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 10000,
+ .has_ao = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3110_16_4] = {
- .pc_DriverName = "apci3110-16-4",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 16,
- .i_NbrAiChannelDiff = 8,
- .i_AiChannelList = 16,
- .i_NbrAoChannel = 4,
- .i_AiMaxdata = 4095,
- .i_AoMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .pr_AoRangelist = &range_apci3XXX_ao,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 5000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ao_write = i_APCI3XXX_InsnWriteAnalogOutput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3110-16-4",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 16,
+ .ai_maxdata = 0x0fff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 5000,
+ .has_ao = 1,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3110_8_4] = {
- .pc_DriverName = "apci3110-8-4",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 8,
- .i_NbrAiChannelDiff = 4,
- .i_AiChannelList = 8,
- .i_NbrAoChannel = 4,
- .i_AiMaxdata = 4095,
- .i_AoMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .pr_AoRangelist = &range_apci3XXX_ao,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 5000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ao_write = i_APCI3XXX_InsnWriteAnalogOutput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3110-8-4",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 8,
+ .ai_maxdata = 0x0fff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 5000,
+ .has_ao = 1,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3116_16_4] = {
- .pc_DriverName = "apci3116-16-4",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 16,
- .i_NbrAiChannelDiff = 8,
- .i_AiChannelList = 16,
- .i_NbrAoChannel = 4,
- .i_AiMaxdata = 65535,
- .i_AoMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .pr_AoRangelist = &range_apci3XXX_ao,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 5000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ao_write = i_APCI3XXX_InsnWriteAnalogOutput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3116-16-4",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 16,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 5000,
+ .has_ao = 1,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3116_8_4] = {
- .pc_DriverName = "apci3116-8-4",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannel = 8,
- .i_NbrAiChannelDiff = 4,
- .i_AiChannelList = 8,
- .i_NbrAoChannel = 4,
- .i_AiMaxdata = 65535,
- .i_AoMaxdata = 4095,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .pr_AoRangelist = &range_apci3XXX_ao,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .i_NbrTTLChannel = 24,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 5000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .ao_write = i_APCI3XXX_InsnWriteAnalogOutput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3116-8-4",
+ .ai_subdev_flags = SDF_COMMON | SDF_GROUND | SDF_DIFF,
+ .ai_n_chan = 8,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 5000,
+ .has_ao = 1,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
+ .has_ttl_io = 1,
},
[BOARD_APCI3003] = {
- .pc_DriverName = "apci3003",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannelDiff = 4,
- .i_AiChannelList = 4,
- .i_AiMaxdata = 65535,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .b_AvailableConvertUnit = 7,
- .ui_MinAcquisitiontimeNs = 2500,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
+ .name = "apci3003",
+ .ai_subdev_flags = SDF_DIFF,
+ .ai_n_chan = 4,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US |
+ CONV_UNIT_NS,
+ .ai_min_acq_ns = 2500,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
},
[BOARD_APCI3002_16] = {
- .pc_DriverName = "apci3002-16",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannelDiff = 16,
- .i_AiChannelList = 16,
- .i_AiMaxdata = 65535,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 5000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
+ .name = "apci3002-16",
+ .ai_subdev_flags = SDF_DIFF,
+ .ai_n_chan = 16,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 5000,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
},
[BOARD_APCI3002_8] = {
- .pc_DriverName = "apci3002-8",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannelDiff = 8,
- .i_AiChannelList = 8,
- .i_AiMaxdata = 65535,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 5000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
+ .name = "apci3002-8",
+ .ai_subdev_flags = SDF_DIFF,
+ .ai_n_chan = 8,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 5000,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
},
[BOARD_APCI3002_4] = {
- .pc_DriverName = "apci3002-4",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAiChannelDiff = 4,
- .i_AiChannelList = 4,
- .i_AiMaxdata = 65535,
- .pr_AiRangelist = &range_apci3XXX_ai,
- .i_NbrDiChannel = 4,
- .i_NbrDoChannel = 4,
- .i_DoMaxdata = 1,
- .b_AvailableConvertUnit = 6,
- .ui_MinAcquisitiontimeNs = 5000,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ai_config = i_APCI3XXX_InsnConfigAnalogInput,
- .ai_read = i_APCI3XXX_InsnReadAnalogInput,
- .di_bits = apci3xxx_di_insn_bits,
- .do_bits = apci3xxx_do_insn_bits,
+ .name = "apci3002-4",
+ .ai_subdev_flags = SDF_DIFF,
+ .ai_n_chan = 4,
+ .ai_maxdata = 0xffff,
+ .ai_conv_units = CONV_UNIT_MS | CONV_UNIT_US,
+ .ai_min_acq_ns = 5000,
+ .has_dig_in = 1,
+ .has_dig_out = 1,
},
[BOARD_APCI3500] = {
- .pc_DriverName = "apci3500",
- .i_IorangeBase1 = 256,
- .i_PCIEeprom = ADDIDATA_NO_EEPROM,
- .pc_EepromChip = ADDIDATA_9054,
- .i_NbrAoChannel = 4,
- .i_AoMaxdata = 4095,
- .pr_AoRangelist = &range_apci3XXX_ao,
- .i_NbrTTLChannel = 24,
- .interrupt = v_APCI3XXX_Interrupt,
- .reset = i_APCI3XXX_Reset,
- .ao_write = i_APCI3XXX_InsnWriteAnalogOutput,
- .ttl_config = i_APCI3XXX_InsnConfigInitTTLIO,
- .ttl_bits = i_APCI3XXX_InsnBitsTTLIO,
- .ttl_read = i_APCI3XXX_InsnReadTTLIO,
- .ttl_write = i_APCI3XXX_InsnWriteTTLIO,
+ .name = "apci3500",
+ .has_ao = 1,
+ .has_ttl_io = 1,
},
};
+struct apci3xxx_private {
+ void __iomem *mmio;
+ unsigned int ai_timer;
+ unsigned char ai_time_base;
+};
+
+static irqreturn_t apci3xxx_irq_handler(int irq, void *d)
+{
+ struct comedi_device *dev = d;
+ struct apci3xxx_private *devpriv = dev->private;
+ struct comedi_subdevice *s = dev->read_subdev;
+ unsigned int status;
+ unsigned int val;
+
+ /* Test if interrupt occur */
+ status = readl(devpriv->mmio + 16);
+ if ((status & 0x2) == 0x2) {
+ /* Reset the interrupt */
+ writel(status, devpriv->mmio + 16);
+
+ val = readl(devpriv->mmio + 28);
+ comedi_buf_put(s->async, val);
+
+ s->async->events |= COMEDI_CB_EOA;
+ comedi_event(dev, s);
+
+ return IRQ_HANDLED;
+ }
+ return IRQ_NONE;
+}
+
+static int apci3xxx_ai_started(struct comedi_device *dev)
+{
+ struct apci3xxx_private *devpriv = dev->private;
+
+ if ((readl(devpriv->mmio + 8) & 0x80000) == 0x80000)
+ return 1;
+ else
+ return 0;
+
+}
+
+static int apci3xxx_ai_setup(struct comedi_device *dev, unsigned int chanspec)
+{
+ struct apci3xxx_private *devpriv = dev->private;
+ unsigned int chan = CR_CHAN(chanspec);
+ unsigned int range = CR_RANGE(chanspec);
+ unsigned int aref = CR_AREF(chanspec);
+ unsigned int delay_mode;
+ unsigned int val;
+
+ if (apci3xxx_ai_started(dev))
+ return -EBUSY;
+
+ /* Clear the FIFO */
+ writel(0x10000, devpriv->mmio + 12);
+
+ /* Get and save the delay mode */
+ delay_mode = readl(devpriv->mmio + 4);
+ delay_mode &= 0xfffffef0;
+
+ /* Channel configuration selection */
+ writel(delay_mode, devpriv->mmio + 4);
+
+ /* Make the configuration */
+ val = (range & 3) | ((range >> 2) << 6) |
+ ((aref == AREF_DIFF) << 7);
+ writel(val, devpriv->mmio + 0);
+
+ /* Channel selection */
+ writel(delay_mode | 0x100, devpriv->mmio + 4);
+ writel(chan, devpriv->mmio + 0);
+
+ /* Restore delay mode */
+ writel(delay_mode, devpriv->mmio + 4);
+
+ /* Set the number of sequence to 1 */
+ writel(1, devpriv->mmio + 48);
+
+ return 0;
+}
+
+static int apci3xxx_ai_insn_read(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
+{
+ struct apci3xxx_private *devpriv = dev->private;
+ unsigned int val;
+ int ret;
+ int i;
+
+ ret = apci3xxx_ai_setup(dev, insn->chanspec);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < insn->n; i++) {
+ /* Start the conversion */
+ writel(0x80000, devpriv->mmio + 8);
+
+ /* Wait the EOS */
+ do {
+ val = readl(devpriv->mmio + 20);
+ val &= 0x1;
+ } while (!val);
+
+ /* Read the analog value */
+ data[i] = readl(devpriv->mmio + 28);
+ }
+
+ return insn->n;
+}
+
+static int apci3xxx_ai_ns_to_timer(struct comedi_device *dev,
+ unsigned int *ns, int round_mode)
+{
+ const struct apci3xxx_boardinfo *board = comedi_board(dev);
+ struct apci3xxx_private *devpriv = dev->private;
+ unsigned int base;
+ unsigned int timer;
+ int time_base;
+
+ /* time_base: 0 = ns, 1 = us, 2 = ms */
+ for (time_base = 0; time_base < 3; time_base++) {
+ /* skip unsupported time bases */
+ if (!(board->ai_conv_units & (1 << time_base)))
+ continue;
+
+ switch (time_base) {
+ case 0:
+ base = 1;
+ break;
+ case 1:
+ base = 1000;
+ break;
+ case 2:
+ base = 1000000;
+ break;
+ }
+
+ switch (round_mode) {
+ case TRIG_ROUND_NEAREST:
+ default:
+ timer = (*ns + base / 2) / base;
+ break;
+ case TRIG_ROUND_DOWN:
+ timer = *ns / base;
+ break;
+ case TRIG_ROUND_UP:
+ timer = (*ns + base - 1) / base;
+ break;
+ }
+
+ if (timer < 0x10000) {
+ devpriv->ai_time_base = time_base;
+ devpriv->ai_timer = timer;
+ *ns = timer * time_base;
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+
+static int apci3xxx_ai_cmdtest(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_cmd *cmd)
+{
+ const struct apci3xxx_boardinfo *board = comedi_board(dev);
+ int err = 0;
+ unsigned int tmp;
+
+ /* Step 1 : check if triggers are trivially valid */
+
+ err |= cfc_check_trigger_src(&cmd->start_src, TRIG_NOW);
+ err |= cfc_check_trigger_src(&cmd->scan_begin_src, TRIG_FOLLOW);
+ err |= cfc_check_trigger_src(&cmd->convert_src, TRIG_TIMER);
+ err |= cfc_check_trigger_src(&cmd->scan_end_src, TRIG_COUNT);
+ err |= cfc_check_trigger_src(&cmd->stop_src, TRIG_COUNT | TRIG_NONE);
+
+ if (err)
+ return 1;
+
+ /* Step 2a : make sure trigger sources are unique */
+
+ err |= cfc_check_trigger_is_unique(cmd->stop_src);
+
+ /* Step 2b : and mutually compatible */
+
+ if (err)
+ return 2;
+
+ /* Step 3: check if arguments are trivially valid */
+
+ err |= cfc_check_trigger_arg_is(&cmd->start_arg, 0);
+ err |= cfc_check_trigger_arg_is(&cmd->scan_begin_arg, 0);
+ err |= cfc_check_trigger_arg_min(&cmd->convert_arg,
+ board->ai_min_acq_ns);
+ err |= cfc_check_trigger_arg_is(&cmd->scan_end_arg, cmd->chanlist_len);
+
+ if (cmd->stop_src == TRIG_COUNT)
+ err |= cfc_check_trigger_arg_min(&cmd->stop_arg, 1);
+ else /* TRIG_NONE */
+ err |= cfc_check_trigger_arg_is(&cmd->stop_arg, 0);
+
+ if (err)
+ return 3;
+
+ /* step 4: fix up any arguments */
+
+ /*
+ * FIXME: The hardware supports multiple scan modes but the original
+ * addi-data driver only supported reading a single channel with
+ * interrupts. Need a proper datasheet to fix this.
+ *
+ * The following scan modes are supported by the hardware:
+ * 1) Single software scan
+ * 2) Single hardware triggered scan
+ * 3) Continuous software scan
+ * 4) Continuous software scan with timer delay
+ * 5) Continuous hardware triggered scan
+ * 6) Continuous hardware triggered scan with timer delay
+ *
+ * For now, limit the chanlist to a single channel.
+ */
+ if (cmd->chanlist_len > 1) {
+ cmd->chanlist_len = 1;
+ err |= -EINVAL;
+ }
+
+ tmp = cmd->convert_arg;
+ err |= apci3xxx_ai_ns_to_timer(dev, &cmd->convert_arg,
+ cmd->flags & TRIG_ROUND_MASK);
+ if (tmp != cmd->convert_arg)
+ err |= -EINVAL;
+
+ if (err)
+ return 4;
+
+ return 0;
+}
+
+static int apci3xxx_ai_cmd(struct comedi_device *dev,
+ struct comedi_subdevice *s)
+{
+ struct apci3xxx_private *devpriv = dev->private;
+ struct comedi_cmd *cmd = &s->async->cmd;
+ int ret;
+
+ ret = apci3xxx_ai_setup(dev, cmd->chanlist[0]);
+ if (ret)
+ return ret;
+
+ /* Set the convert timing unit */
+ writel(devpriv->ai_time_base, devpriv->mmio + 36);
+
+ /* Set the convert timing */
+ writel(devpriv->ai_timer, devpriv->mmio + 32);
+
+ /* Start the conversion */
+ writel(0x180000, devpriv->mmio + 8);
+
+ return 0;
+}
+
+static int apci3xxx_ai_cancel(struct comedi_device *dev,
+ struct comedi_subdevice *s)
+{
+ return 0;
+}
+
+static int apci3xxx_ao_insn_write(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
+{
+ struct apci3xxx_private *devpriv = dev->private;
+ unsigned int chan = CR_CHAN(insn->chanspec);
+ unsigned int range = CR_RANGE(insn->chanspec);
+ unsigned int status;
+ int i;
+
+ for (i = 0; i < insn->n; i++) {
+ /* Set the range selection */
+ writel(range, devpriv->mmio + 96);
+
+ /* Write the analog value to the selected channel */
+ writel((data[i] << 8) | chan, devpriv->mmio + 100);
+
+ /* Wait the end of transfer */
+ do {
+ status = readl(devpriv->mmio + 96);
+ } while ((status & 0x100) != 0x100);
+ }
+
+ return insn->n;
+}
+
+static int apci3xxx_di_insn_bits(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
+{
+ data[1] = inl(dev->iobase + 32) & 0xf;
+
+ return insn->n;
+}
+
+static int apci3xxx_do_insn_bits(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
+{
+ unsigned int mask = data[0];
+ unsigned int bits = data[1];
+
+ s->state = inl(dev->iobase + 48) & 0xf;
+ if (mask) {
+ s->state &= ~mask;
+ s->state |= (bits & mask);
+
+ outl(s->state, dev->iobase + 48);
+ }
+
+ data[1] = s->state;
+
+ return insn->n;
+}
+
+static int apci3xxx_dio_insn_config(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
+{
+ unsigned int chan = CR_CHAN(insn->chanspec);
+ unsigned int mask = 1 << chan;
+ unsigned int bits;
+
+ /*
+ * Port 0 (channels 0-7) are always inputs
+ * Port 1 (channels 8-15) are always outputs
+ * Port 2 (channels 16-23) are programmable i/o
+ *
+ * Changing any channel in port 2 changes the entire port.
+ */
+ if (mask & 0xff0000)
+ bits = 0xff0000;
+ else
+ bits = 0;
+
+ switch (data[0]) {
+ case INSN_CONFIG_DIO_INPUT:
+ s->io_bits &= ~bits;
+ break;
+ case INSN_CONFIG_DIO_OUTPUT:
+ s->io_bits |= bits;
+ break;
+ case INSN_CONFIG_DIO_QUERY:
+ data[1] = (s->io_bits & bits) ? COMEDI_OUTPUT : COMEDI_INPUT;
+ return insn->n;
+ default:
+ return -EINVAL;
+ }
+
+ /* update port 2 configuration */
+ if (bits)
+ outl((s->io_bits >> 24) & 0xff, dev->iobase + 224);
+
+ return insn->n;
+}
+
+static int apci3xxx_dio_insn_bits(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
+{
+ unsigned int mask = data[0];
+ unsigned int bits = data[1];
+ unsigned int val;
+
+ /* only update output channels */
+ mask &= s->io_bits;
+ if (mask) {
+ s->state &= ~mask;
+ s->state |= (bits & mask);
+
+ if (mask & 0xff)
+ outl(s->state & 0xff, dev->iobase + 80);
+ if (mask & 0xff0000)
+ outl((s->state >> 16) & 0xff, dev->iobase + 112);
+ }
+
+ val = inl(dev->iobase + 80);
+ val |= (inl(dev->iobase + 64) << 8);
+ if (s->io_bits & 0xff0000)
+ val |= (inl(dev->iobase + 112) << 16);
+ else
+ val |= (inl(dev->iobase + 96) << 16);
+
+ data[1] = val;
+
+ return insn->n;
+}
+
+static int apci3xxx_reset(struct comedi_device *dev)
+{
+ struct apci3xxx_private *devpriv = dev->private;
+ unsigned int val;
+ int i;
+
+ /* Disable the interrupt */
+ disable_irq(dev->irq);
+
+ /* Clear the start command */
+ writel(0, devpriv->mmio + 8);
+
+ /* Reset the interrupt flags */
+ val = readl(devpriv->mmio + 16);
+ writel(val, devpriv->mmio + 16);
+
+ /* clear the EOS */
+ readl(devpriv->mmio + 20);
+
+ /* Clear the FIFO */
+ for (i = 0; i < 16; i++)
+ val = readl(devpriv->mmio + 28);
+
+ /* Enable the interrupt */
+ enable_irq(dev->irq);
+
+ return 0;
+}
+
static int apci3xxx_auto_attach(struct comedi_device *dev,
unsigned long context)
{
- const struct addi_board *board = NULL;
+ struct pci_dev *pcidev = comedi_to_pci_dev(dev);
+ const struct apci3xxx_boardinfo *board = NULL;
+ struct apci3xxx_private *devpriv;
+ struct comedi_subdevice *s;
+ int n_subdevices;
+ int subdev;
+ int ret;
if (context < ARRAY_SIZE(apci3xxx_boardtypes))
board = &apci3xxx_boardtypes[context];
if (!board)
return -ENODEV;
dev->board_ptr = board;
+ dev->board_name = board->name;
+
+ devpriv = kzalloc(sizeof(*devpriv), GFP_KERNEL);
+ if (!devpriv)
+ return -ENOMEM;
+ dev->private = devpriv;
+
+ ret = comedi_pci_enable(dev);
+ if (ret)
+ return ret;
+
+ dev->iobase = pci_resource_start(pcidev, 2);
+ devpriv->mmio = pci_ioremap_bar(pcidev, 3);
+
+ if (pcidev->irq > 0) {
+ ret = request_irq(pcidev->irq, apci3xxx_irq_handler,
+ IRQF_SHARED, dev->board_name, dev);
+ if (ret == 0)
+ dev->irq = pcidev->irq;
+ }
+
+ n_subdevices = (board->ai_n_chan ? 0 : 1) + board->has_ao +
+ board->has_dig_in + board->has_dig_out +
+ board->has_ttl_io;
+ ret = comedi_alloc_subdevices(dev, n_subdevices);
+ if (ret)
+ return ret;
+
+ subdev = 0;
+
+ /* Analog Input subdevice */
+ if (board->ai_n_chan) {
+ s = &dev->subdevices[subdev];
+ s->type = COMEDI_SUBD_AI;
+ s->subdev_flags = SDF_READABLE | board->ai_subdev_flags;
+ s->n_chan = board->ai_n_chan;
+ s->maxdata = board->ai_maxdata;
+ s->len_chanlist = s->n_chan;
+ s->range_table = &apci3xxx_ai_range;
+ s->insn_read = apci3xxx_ai_insn_read;
+ if (dev->irq) {
+ dev->read_subdev = s;
+ s->subdev_flags |= SDF_CMD_READ;
+ s->do_cmdtest = apci3xxx_ai_cmdtest;
+ s->do_cmd = apci3xxx_ai_cmd;
+ s->cancel = apci3xxx_ai_cancel;
+ }
+
+ subdev++;
+ }
+
+ /* Analog Output subdevice */
+ if (board->has_ao) {
+ s = &dev->subdevices[subdev];
+ s->type = COMEDI_SUBD_AO;
+ s->subdev_flags = SDF_WRITEABLE | SDF_GROUND | SDF_COMMON;
+ s->n_chan = 4;
+ s->maxdata = 0x0fff;
+ s->range_table = &apci3xxx_ao_range;
+ s->insn_write = apci3xxx_ao_insn_write;
+
+ subdev++;
+ }
+
+ /* Digital Input subdevice */
+ if (board->has_dig_in) {
+ s = &dev->subdevices[subdev];
+ s->type = COMEDI_SUBD_DI;
+ s->subdev_flags = SDF_READABLE;
+ s->n_chan = 4;
+ s->maxdata = 1;
+ s->range_table = &range_digital;
+ s->insn_bits = apci3xxx_di_insn_bits;
+
+ subdev++;
+ }
+
+ /* Digital Output subdevice */
+ if (board->has_dig_out) {
+ s = &dev->subdevices[subdev];
+ s->type = COMEDI_SUBD_DO;
+ s->subdev_flags = SDF_WRITEABLE;
+ s->n_chan = 4;
+ s->maxdata = 1;
+ s->range_table = &range_digital;
+ s->insn_bits = apci3xxx_do_insn_bits;
+
+ subdev++;
+ }
+
+ /* TTL Digital I/O subdevice */
+ if (board->has_ttl_io) {
+ s = &dev->subdevices[subdev];
+ s->type = COMEDI_SUBD_DIO;
+ s->subdev_flags = SDF_READABLE | SDF_WRITEABLE;
+ s->n_chan = 24;
+ s->maxdata = 1;
+ s->io_bits = 0xff; /* channels 0-7 are always outputs */
+ s->range_table = &range_digital;
+ s->insn_config = apci3xxx_dio_insn_config;
+ s->insn_bits = apci3xxx_dio_insn_bits;
+
+ subdev++;
+ }
+
+ apci3xxx_reset(dev);
+ return 0;
+}
+
+static void apci3xxx_detach(struct comedi_device *dev)
+{
+ struct apci3xxx_private *devpriv = dev->private;
- return addi_auto_attach(dev, context);
+ if (devpriv) {
+ if (dev->iobase)
+ apci3xxx_reset(dev);
+ if (dev->irq)
+ free_irq(dev->irq, dev);
+ if (devpriv->mmio)
+ iounmap(devpriv->mmio);
+ }
+ comedi_pci_disable(dev);
}
static struct comedi_driver apci3xxx_driver = {
.driver_name = "addi_apci_3xxx",
.module = THIS_MODULE,
.auto_attach = apci3xxx_auto_attach,
- .detach = i_ADDI_Detach,
+ .detach = apci3xxx_detach,
};
static int apci3xxx_pci_probe(struct pci_dev *dev,
diff --git a/drivers/staging/comedi/drivers/addi_watchdog.c b/drivers/staging/comedi/drivers/addi_watchdog.c
index 1666b5f510d3..7b21acc93929 100644
--- a/drivers/staging/comedi/drivers/addi_watchdog.c
+++ b/drivers/staging/comedi/drivers/addi_watchdog.c
@@ -16,10 +16,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "../comedidev.h"
@@ -130,14 +126,12 @@ int addi_watchdog_init(struct comedi_subdevice *s, unsigned long iobase)
{
struct addi_watchdog_private *spriv;
- spriv = kzalloc(sizeof(*spriv), GFP_KERNEL);
+ spriv = comedi_alloc_spriv(s, sizeof(*spriv));
if (!spriv)
return -ENOMEM;
spriv->iobase = iobase;
- s->private = spriv;
-
s->type = COMEDI_SUBD_TIMER;
s->subdev_flags = SDF_WRITEABLE;
s->n_chan = 1;
diff --git a/drivers/staging/comedi/drivers/adl_pci6208.c b/drivers/staging/comedi/drivers/adl_pci6208.c
index 8a438ff1bd45..b5e4e53f737f 100644
--- a/drivers/staging/comedi/drivers/adl_pci6208.c
+++ b/drivers/staging/comedi/drivers/adl_pci6208.c
@@ -20,10 +20,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
Driver: adl_pci6208
diff --git a/drivers/staging/comedi/drivers/adl_pci7x3x.c b/drivers/staging/comedi/drivers/adl_pci7x3x.c
index e3960745f506..0d9243a5f495 100644
--- a/drivers/staging/comedi/drivers/adl_pci7x3x.c
+++ b/drivers/staging/comedi/drivers/adl_pci7x3x.c
@@ -19,10 +19,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
diff --git a/drivers/staging/comedi/drivers/adl_pci8164.c b/drivers/staging/comedi/drivers/adl_pci8164.c
index b3ec60afe3a0..0b591b0b5501 100644
--- a/drivers/staging/comedi/drivers/adl_pci8164.c
+++ b/drivers/staging/comedi/drivers/adl_pci8164.c
@@ -13,10 +13,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
diff --git a/drivers/staging/comedi/drivers/adl_pci9111.c b/drivers/staging/comedi/drivers/adl_pci9111.c
index 6247fdcedcbf..af51c7460048 100644
--- a/drivers/staging/comedi/drivers/adl_pci9111.c
+++ b/drivers/staging/comedi/drivers/adl_pci9111.c
@@ -17,10 +17,6 @@ This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
diff --git a/drivers/staging/comedi/drivers/adq12b.c b/drivers/staging/comedi/drivers/adq12b.c
index 71142e36e7fb..d187a7bf0a55 100644
--- a/drivers/staging/comedi/drivers/adq12b.c
+++ b/drivers/staging/comedi/drivers/adq12b.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: adq12b
diff --git a/drivers/staging/comedi/drivers/adv_pci1723.c b/drivers/staging/comedi/drivers/adv_pci1723.c
index ccc114d6c08b..8430a27ec1b5 100644
--- a/drivers/staging/comedi/drivers/adv_pci1723.c
+++ b/drivers/staging/comedi/drivers/adv_pci1723.c
@@ -1,4 +1,4 @@
-/*******************************************************************************
+/*
comedi/drivers/pci1723.c
COMEDI - Linux Control and Measurement Device Interface
@@ -13,12 +13,7 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-*******************************************************************************/
+*/
/*
Driver: adv_pci1723
Description: Advantech PCI-1723
diff --git a/drivers/staging/comedi/drivers/adv_pci1724.c b/drivers/staging/comedi/drivers/adv_pci1724.c
index e60f12578d44..da7462e01faa 100644
--- a/drivers/staging/comedi/drivers/adv_pci1724.c
+++ b/drivers/staging/comedi/drivers/adv_pci1724.c
@@ -17,12 +17,7 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************/
+*/
/*
diff --git a/drivers/staging/comedi/drivers/adv_pci_dio.c b/drivers/staging/comedi/drivers/adv_pci_dio.c
index f70c67471c13..8e6ec75bd294 100644
--- a/drivers/staging/comedi/drivers/adv_pci_dio.c
+++ b/drivers/staging/comedi/drivers/adv_pci_dio.c
@@ -1173,19 +1173,11 @@ static int pci_dio_auto_attach(struct comedi_device *dev,
static void pci_dio_detach(struct comedi_device *dev)
{
struct pci_dio_private *devpriv = dev->private;
- struct comedi_subdevice *s;
- int i;
if (devpriv) {
if (devpriv->valid)
pci_dio_reset(dev);
}
- for (i = 0; i < dev->n_subdevices; i++) {
- s = &dev->subdevices[i];
- if (s->type == COMEDI_SUBD_DIO)
- comedi_spriv_free(dev, i);
- s->private = NULL; /* some private data is static */
- }
comedi_pci_disable(dev);
}
diff --git a/drivers/staging/comedi/drivers/aio_aio12_8.c b/drivers/staging/comedi/drivers/aio_aio12_8.c
index e2dc08a058bc..279dfe8951f7 100644
--- a/drivers/staging/comedi/drivers/aio_aio12_8.c
+++ b/drivers/staging/comedi/drivers/aio_aio12_8.c
@@ -14,10 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
@@ -259,17 +255,11 @@ static int aio_aio12_8_attach(struct comedi_device *dev,
return 0;
}
-static void aio_aio12_8_detach(struct comedi_device *dev)
-{
- comedi_spriv_free(dev, 2);
- comedi_legacy_detach(dev);
-}
-
static struct comedi_driver aio_aio12_8_driver = {
.driver_name = "aio_aio12_8",
.module = THIS_MODULE,
.attach = aio_aio12_8_attach,
- .detach = aio_aio12_8_detach,
+ .detach = comedi_legacy_detach,
.board_name = &board_types[0].name,
.num_names = ARRAY_SIZE(board_types),
.offset = sizeof(struct aio12_8_boardtype),
diff --git a/drivers/staging/comedi/drivers/aio_iiro_16.c b/drivers/staging/comedi/drivers/aio_iiro_16.c
index 126854c05093..029834d0ff1f 100644
--- a/drivers/staging/comedi/drivers/aio_iiro_16.c
+++ b/drivers/staging/comedi/drivers/aio_iiro_16.c
@@ -14,10 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
diff --git a/drivers/staging/comedi/drivers/amplc_dio200.c b/drivers/staging/comedi/drivers/amplc_dio200.c
index 297750bef0f7..e2478105ac1a 100644
--- a/drivers/staging/comedi/drivers/amplc_dio200.c
+++ b/drivers/staging/comedi/drivers/amplc_dio200.c
@@ -17,11 +17,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
* Driver: amplc_dio200
diff --git a/drivers/staging/comedi/drivers/amplc_dio200.h b/drivers/staging/comedi/drivers/amplc_dio200.h
index cf2e7261740e..43160b9944bb 100644
--- a/drivers/staging/comedi/drivers/amplc_dio200.h
+++ b/drivers/staging/comedi/drivers/amplc_dio200.h
@@ -18,11 +18,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#ifndef AMPLC_DIO200_H_INCLUDED
diff --git a/drivers/staging/comedi/drivers/amplc_dio200_common.c b/drivers/staging/comedi/drivers/amplc_dio200_common.c
index 3403e5ccfa93..649fc69724fb 100644
--- a/drivers/staging/comedi/drivers/amplc_dio200_common.c
+++ b/drivers/staging/comedi/drivers/amplc_dio200_common.c
@@ -17,11 +17,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#include <linux/interrupt.h>
@@ -561,7 +556,7 @@ dio200_subdev_intr_init(struct comedi_device *dev, struct comedi_subdevice *s,
const struct dio200_layout *layout = dio200_dev_layout(dev);
struct dio200_subdev_intr *subpriv;
- subpriv = kzalloc(sizeof(*subpriv), GFP_KERNEL);
+ subpriv = comedi_alloc_spriv(s, sizeof(*subpriv));
if (!subpriv)
return -ENOMEM;
@@ -573,7 +568,6 @@ dio200_subdev_intr_init(struct comedi_device *dev, struct comedi_subdevice *s,
/* Disable interrupt sources. */
dio200_write8(dev, subpriv->ofs, 0);
- s->private = subpriv;
s->type = COMEDI_SUBD_DI;
s->subdev_flags = SDF_READABLE | SDF_CMD_READ;
if (layout->has_int_sce) {
@@ -888,11 +882,10 @@ dio200_subdev_8254_init(struct comedi_device *dev, struct comedi_subdevice *s,
struct dio200_subdev_8254 *subpriv;
unsigned int chan;
- subpriv = kzalloc(sizeof(*subpriv), GFP_KERNEL);
+ subpriv = comedi_alloc_spriv(s, sizeof(*subpriv));
if (!subpriv)
return -ENOMEM;
- s->private = subpriv;
s->type = COMEDI_SUBD_COUNTER;
s->subdev_flags = SDF_WRITABLE | SDF_READABLE;
s->n_chan = 3;
@@ -1024,11 +1017,12 @@ static int dio200_subdev_8255_init(struct comedi_device *dev,
{
struct dio200_subdev_8255 *subpriv;
- subpriv = kzalloc(sizeof(*subpriv), GFP_KERNEL);
+ subpriv = comedi_alloc_spriv(s, sizeof(*subpriv));
if (!subpriv)
return -ENOMEM;
+
subpriv->ofs = offset;
- s->private = subpriv;
+
s->type = COMEDI_SUBD_DIO;
s->subdev_flags = SDF_READABLE | SDF_WRITABLE;
s->n_chan = 24;
@@ -1230,28 +1224,11 @@ void amplc_dio200_common_detach(struct comedi_device *dev)
{
const struct dio200_board *thisboard = comedi_board(dev);
struct dio200_private *devpriv = dev->private;
- const struct dio200_layout *layout;
- unsigned n;
if (!thisboard || !devpriv)
return;
if (dev->irq)
free_irq(dev->irq, dev);
- if (dev->subdevices) {
- layout = dio200_board_layout(thisboard);
- for (n = 0; n < dev->n_subdevices; n++) {
- switch (layout->sdtype[n]) {
- case sd_8254:
- case sd_8255:
- case sd_intr:
- comedi_spriv_free(dev, n);
- break;
- case sd_timer:
- default:
- break;
- }
- }
- }
}
EXPORT_SYMBOL_GPL(amplc_dio200_common_detach);
diff --git a/drivers/staging/comedi/drivers/amplc_dio200_pci.c b/drivers/staging/comedi/drivers/amplc_dio200_pci.c
index 4be44e877373..d7d9f5cc3ab4 100644
--- a/drivers/staging/comedi/drivers/amplc_dio200_pci.c
+++ b/drivers/staging/comedi/drivers/amplc_dio200_pci.c
@@ -16,11 +16,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
* Driver: amplc_dio200_pci
diff --git a/drivers/staging/comedi/drivers/amplc_pc236.c b/drivers/staging/comedi/drivers/amplc_pc236.c
index 115ecd51677e..4e889b82cbf2 100644
--- a/drivers/staging/comedi/drivers/amplc_pc236.c
+++ b/drivers/staging/comedi/drivers/amplc_pc236.c
@@ -16,11 +16,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: amplc_pc236
@@ -543,7 +538,6 @@ static void pc236_detach(struct comedi_device *dev)
return;
if (dev->iobase)
pc236_intr_disable(dev);
- comedi_spriv_free(dev, 0);
if (is_isa_board(thisboard)) {
comedi_legacy_detach(dev);
} else if (is_pci_board(thisboard)) {
diff --git a/drivers/staging/comedi/drivers/amplc_pc263.c b/drivers/staging/comedi/drivers/amplc_pc263.c
index 94a752d852bb..6546095e7a45 100644
--- a/drivers/staging/comedi/drivers/amplc_pc263.c
+++ b/drivers/staging/comedi/drivers/amplc_pc263.c
@@ -16,11 +16,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: amplc_pc263
diff --git a/drivers/staging/comedi/drivers/amplc_pci224.c b/drivers/staging/comedi/drivers/amplc_pci224.c
index 4d7eab9b5565..f1e36f08b103 100644
--- a/drivers/staging/comedi/drivers/amplc_pci224.c
+++ b/drivers/staging/comedi/drivers/amplc_pci224.c
@@ -16,11 +16,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: amplc_pci224
diff --git a/drivers/staging/comedi/drivers/amplc_pci230.c b/drivers/staging/comedi/drivers/amplc_pci230.c
index 49200fbd60b9..846d6448fa4d 100644
--- a/drivers/staging/comedi/drivers/amplc_pci230.c
+++ b/drivers/staging/comedi/drivers/amplc_pci230.c
@@ -16,10 +16,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
Driver: amplc_pci230
@@ -2834,7 +2830,6 @@ static void pci230_detach(struct comedi_device *dev)
{
struct pci_dev *pcidev = comedi_to_pci_dev(dev);
- comedi_spriv_free(dev, 2);
if (dev->irq)
free_irq(dev->irq, dev);
comedi_pci_disable(dev);
diff --git a/drivers/staging/comedi/drivers/amplc_pci263.c b/drivers/staging/comedi/drivers/amplc_pci263.c
index 8b57533bf406..4da900cc5845 100644
--- a/drivers/staging/comedi/drivers/amplc_pci263.c
+++ b/drivers/staging/comedi/drivers/amplc_pci263.c
@@ -16,11 +16,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: amplc_pci263
diff --git a/drivers/staging/comedi/drivers/c6xdigio.c b/drivers/staging/comedi/drivers/c6xdigio.c
index 92376dc86dd8..929218a35975 100644
--- a/drivers/staging/comedi/drivers/c6xdigio.c
+++ b/drivers/staging/comedi/drivers/c6xdigio.c
@@ -16,11 +16,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: c6xdigio
diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c
index f874fff44523..ae9a2082b5a4 100644
--- a/drivers/staging/comedi/drivers/cb_das16_cs.c
+++ b/drivers/staging/comedi/drivers/cb_das16_cs.c
@@ -15,10 +15,6 @@
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
PCMCIA support code for this driver is adapted from the dummy_cs.c
driver of the Linux PCMCIA Card Services package.
diff --git a/drivers/staging/comedi/drivers/cb_pcidas.c b/drivers/staging/comedi/drivers/cb_pcidas.c
index 53dd298d2b54..58bca184bf22 100644
--- a/drivers/staging/comedi/drivers/cb_pcidas.c
+++ b/drivers/staging/comedi/drivers/cb_pcidas.c
@@ -19,12 +19,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************
*/
/*
Driver: cb_pcidas
@@ -1608,7 +1602,6 @@ static void cb_pcidas_detach(struct comedi_device *dev)
}
if (dev->irq)
free_irq(dev->irq, dev);
- comedi_spriv_free(dev, 2);
comedi_pci_disable(dev);
}
diff --git a/drivers/staging/comedi/drivers/cb_pcidas64.c b/drivers/staging/comedi/drivers/cb_pcidas64.c
index c3e5495b4f06..43c0bf58771a 100644
--- a/drivers/staging/comedi/drivers/cb_pcidas64.c
+++ b/drivers/staging/comedi/drivers/cb_pcidas64.c
@@ -28,12 +28,7 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************/
+*/
/*
* Driver: cb_pcidas64
@@ -4163,7 +4158,6 @@ static void detach(struct comedi_device *dev)
devpriv->ao_dma_desc_bus_addr);
}
}
- comedi_spriv_free(dev, 4);
comedi_pci_disable(dev);
}
diff --git a/drivers/staging/comedi/drivers/cb_pcidda.c b/drivers/staging/comedi/drivers/cb_pcidda.c
index f9b459888b8b..2d3e920e5987 100644
--- a/drivers/staging/comedi/drivers/cb_pcidda.c
+++ b/drivers/staging/comedi/drivers/cb_pcidda.c
@@ -17,10 +17,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
@@ -397,18 +393,11 @@ static int cb_pcidda_auto_attach(struct comedi_device *dev,
return 0;
}
-static void cb_pcidda_detach(struct comedi_device *dev)
-{
- comedi_spriv_free(dev, 1);
- comedi_spriv_free(dev, 2);
- comedi_pci_disable(dev);
-}
-
static struct comedi_driver cb_pcidda_driver = {
.driver_name = "cb_pcidda",
.module = THIS_MODULE,
.auto_attach = cb_pcidda_auto_attach,
- .detach = cb_pcidda_detach,
+ .detach = comedi_pci_disable,
};
static int cb_pcidda_pci_probe(struct pci_dev *dev,
diff --git a/drivers/staging/comedi/drivers/cb_pcimdas.c b/drivers/staging/comedi/drivers/cb_pcimdas.c
index 29813c9d4a2a..8b5c198862a1 100644
--- a/drivers/staging/comedi/drivers/cb_pcimdas.c
+++ b/drivers/staging/comedi/drivers/cb_pcimdas.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: cb_pcimdas
diff --git a/drivers/staging/comedi/drivers/cb_pcimdda.c b/drivers/staging/comedi/drivers/cb_pcimdda.c
index 88f03ae6f3e6..406cba8cba88 100644
--- a/drivers/staging/comedi/drivers/cb_pcimdda.c
+++ b/drivers/staging/comedi/drivers/cb_pcimdda.c
@@ -15,11 +15,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: cb_pcimdda
@@ -197,17 +192,11 @@ static int cb_pcimdda_auto_attach(struct comedi_device *dev,
return 1;
}
-static void cb_pcimdda_detach(struct comedi_device *dev)
-{
- comedi_spriv_free(dev, 1);
- comedi_pci_disable(dev);
-}
-
static struct comedi_driver cb_pcimdda_driver = {
.driver_name = "cb_pcimdda",
.module = THIS_MODULE,
.auto_attach = cb_pcimdda_auto_attach,
- .detach = cb_pcimdda_detach,
+ .detach = comedi_pci_disable,
};
static int cb_pcimdda_pci_probe(struct pci_dev *dev,
diff --git a/drivers/staging/comedi/drivers/comedi_bond.c b/drivers/staging/comedi/drivers/comedi_bond.c
index 1bb53816eca3..1a51866be6f7 100644
--- a/drivers/staging/comedi/drivers/comedi_bond.c
+++ b/drivers/staging/comedi/drivers/comedi_bond.c
@@ -15,11 +15,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: comedi_bond
diff --git a/drivers/staging/comedi/drivers/comedi_fc.c b/drivers/staging/comedi/drivers/comedi_fc.c
index 37dc79637d2a..b3d89c82d087 100644
--- a/drivers/staging/comedi/drivers/comedi_fc.c
+++ b/drivers/staging/comedi/drivers/comedi_fc.c
@@ -17,12 +17,7 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************/
+*/
#include "../comedidev.h"
diff --git a/drivers/staging/comedi/drivers/comedi_fc.h b/drivers/staging/comedi/drivers/comedi_fc.h
index 31afab79f39a..a4dea7cb86be 100644
--- a/drivers/staging/comedi/drivers/comedi_fc.h
+++ b/drivers/staging/comedi/drivers/comedi_fc.h
@@ -17,12 +17,7 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************/
+*/
#ifndef _COMEDI_FC_H
#define _COMEDI_FC_H
diff --git a/drivers/staging/comedi/drivers/comedi_parport.c b/drivers/staging/comedi/drivers/comedi_parport.c
index 3e061cc9b48e..772a8f5f0c1c 100644
--- a/drivers/staging/comedi/drivers/comedi_parport.c
+++ b/drivers/staging/comedi/drivers/comedi_parport.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: comedi_parport
diff --git a/drivers/staging/comedi/drivers/comedi_test.c b/drivers/staging/comedi/drivers/comedi_test.c
index c1d8e86f53a2..907e7a3822f5 100644
--- a/drivers/staging/comedi/drivers/comedi_test.c
+++ b/drivers/staging/comedi/drivers/comedi_test.c
@@ -21,12 +21,7 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************/
+*/
/*
Driver: comedi_test
Description: generates fake waveforms
diff --git a/drivers/staging/comedi/drivers/contec_pci_dio.c b/drivers/staging/comedi/drivers/contec_pci_dio.c
index f2230bfd4eb9..0fb9027dde2d 100644
--- a/drivers/staging/comedi/drivers/contec_pci_dio.c
+++ b/drivers/staging/comedi/drivers/contec_pci_dio.c
@@ -13,11 +13,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: contec_pci_dio
diff --git a/drivers/staging/comedi/drivers/daqboard2000.c b/drivers/staging/comedi/drivers/daqboard2000.c
index b87f95c3e17d..44c912b48b6e 100644
--- a/drivers/staging/comedi/drivers/daqboard2000.c
+++ b/drivers/staging/comedi/drivers/daqboard2000.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: daqboard2000
@@ -110,7 +105,6 @@ Configuration options: not applicable, uses PCI auto config
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
-#include <linux/firmware.h>
#include "../comedidev.h"
@@ -524,7 +518,8 @@ static int daqboard2000_writeCPLD(struct comedi_device *dev, int data)
}
static int initialize_daqboard2000(struct comedi_device *dev,
- const u8 *cpld_array, size_t len)
+ const u8 *cpld_array, size_t len,
+ unsigned long context)
{
struct daqboard2000_private *devpriv = dev->private;
int result = -EIO;
@@ -565,22 +560,6 @@ static int initialize_daqboard2000(struct comedi_device *dev,
return result;
}
-static int daqboard2000_upload_firmware(struct comedi_device *dev)
-{
- struct pci_dev *pcidev = comedi_to_pci_dev(dev);
- const struct firmware *fw;
- int ret;
-
- ret = request_firmware(&fw, DAQBOARD2000_FIRMWARE, &pcidev->dev);
- if (ret)
- return ret;
-
- ret = initialize_daqboard2000(dev, fw->data, fw->size);
- release_firmware(fw);
-
- return ret;
-}
-
static void daqboard2000_adcStopDmaTransfer(struct comedi_device *dev)
{
}
@@ -724,7 +703,9 @@ static int daqboard2000_auto_attach(struct comedi_device *dev,
readl(devpriv->plx + 0x6c);
- result = daqboard2000_upload_firmware(dev);
+ result = comedi_load_firmware(dev, &comedi_to_pci_dev(dev)->dev,
+ DAQBOARD2000_FIRMWARE,
+ initialize_daqboard2000, 0);
if (result < 0)
return result;
@@ -766,7 +747,6 @@ static void daqboard2000_detach(struct comedi_device *dev)
{
struct daqboard2000_private *devpriv = dev->private;
- comedi_spriv_free(dev, 2);
if (dev->irq)
free_irq(dev->irq, dev);
if (devpriv) {
diff --git a/drivers/staging/comedi/drivers/das08.c b/drivers/staging/comedi/drivers/das08.c
index ba12c1d605fb..2e7e3e202390 100644
--- a/drivers/staging/comedi/drivers/das08.c
+++ b/drivers/staging/comedi/drivers/das08.c
@@ -16,12 +16,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- *****************************************************************
*/
/*
@@ -566,12 +560,6 @@ int das08_common_attach(struct comedi_device *dev, unsigned long iobase)
}
EXPORT_SYMBOL_GPL(das08_common_attach);
-void das08_common_detach(struct comedi_device *dev)
-{
- comedi_spriv_free(dev, 4);
-}
-EXPORT_SYMBOL_GPL(das08_common_detach);
-
static int __init das08_init(void)
{
return 0;
diff --git a/drivers/staging/comedi/drivers/das08.h b/drivers/staging/comedi/drivers/das08.h
index 89bb8d6fdfc6..cce1b584200a 100644
--- a/drivers/staging/comedi/drivers/das08.h
+++ b/drivers/staging/comedi/drivers/das08.h
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#ifndef _DAS08_H
@@ -52,6 +47,5 @@ struct das08_private_struct {
};
int das08_common_attach(struct comedi_device *dev, unsigned long iobase);
-void das08_common_detach(struct comedi_device *dev);
#endif /* _DAS08_H */
diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c
index d9f3e92317d3..885fb179c9b4 100644
--- a/drivers/staging/comedi/drivers/das08_cs.c
+++ b/drivers/staging/comedi/drivers/das08_cs.c
@@ -16,19 +16,12 @@
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
PCMCIA support code for this driver is adapted from the dummy_cs.c
driver of the Linux PCMCIA Card Services package.
The initial developer of the original code is David A. Hinds
<dahinds@users.sourceforge.net>. Portions created by David A. Hinds
are Copyright (C) 1999 David A. Hinds. All Rights Reserved.
-
-*****************************************************************
-
*/
/*
Driver: das08_cs
@@ -93,17 +86,11 @@ static int das08_cs_auto_attach(struct comedi_device *dev,
return das08_common_attach(dev, iobase);
}
-static void das08_cs_detach(struct comedi_device *dev)
-{
- das08_common_detach(dev);
- comedi_pcmcia_disable(dev);
-}
-
static struct comedi_driver driver_das08_cs = {
.driver_name = "das08_cs",
.module = THIS_MODULE,
.auto_attach = das08_cs_auto_attach,
- .detach = das08_cs_detach,
+ .detach = comedi_pcmcia_disable,
};
static int das08_pcmcia_attach(struct pcmcia_device *link)
diff --git a/drivers/staging/comedi/drivers/das08_isa.c b/drivers/staging/comedi/drivers/das08_isa.c
index f09f6966ed65..21a94389b8b2 100644
--- a/drivers/staging/comedi/drivers/das08_isa.c
+++ b/drivers/staging/comedi/drivers/das08_isa.c
@@ -16,10 +16,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
@@ -193,17 +189,11 @@ static int das08_isa_attach(struct comedi_device *dev,
return das08_common_attach(dev, dev->iobase);
}
-static void das08_isa_detach(struct comedi_device *dev)
-{
- das08_common_detach(dev);
- comedi_legacy_detach(dev);
-}
-
static struct comedi_driver das08_isa_driver = {
.driver_name = "isa-das08",
.module = THIS_MODULE,
.attach = das08_isa_attach,
- .detach = das08_isa_detach,
+ .detach = comedi_legacy_detach,
.board_name = &das08_isa_boards[0].name,
.num_names = ARRAY_SIZE(das08_isa_boards),
.offset = sizeof(das08_isa_boards[0]),
diff --git a/drivers/staging/comedi/drivers/das08_pci.c b/drivers/staging/comedi/drivers/das08_pci.c
index 53fa943dd0b7..9c5d234e063f 100644
--- a/drivers/staging/comedi/drivers/das08_pci.c
+++ b/drivers/staging/comedi/drivers/das08_pci.c
@@ -16,10 +16,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
@@ -79,17 +75,11 @@ static int das08_pci_auto_attach(struct comedi_device *dev,
return das08_common_attach(dev, dev->iobase);
}
-static void das08_pci_detach(struct comedi_device *dev)
-{
- das08_common_detach(dev);
- comedi_pci_disable(dev);
-}
-
static struct comedi_driver das08_pci_comedi_driver = {
.driver_name = "pci-das08",
.module = THIS_MODULE,
.auto_attach = das08_pci_auto_attach,
- .detach = das08_pci_detach,
+ .detach = comedi_pci_disable,
};
static int das08_pci_probe(struct pci_dev *dev,
diff --git a/drivers/staging/comedi/drivers/das16.c b/drivers/staging/comedi/drivers/das16.c
index 762b5a6eac5a..dbec3ba99548 100644
--- a/drivers/staging/comedi/drivers/das16.c
+++ b/drivers/staging/comedi/drivers/das16.c
@@ -16,12 +16,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************
*/
/*
Driver: das16
@@ -1339,7 +1333,6 @@ static void das16_detach(struct comedi_device *dev)
struct das16_private_struct *devpriv = dev->private;
das16_reset(dev);
- comedi_spriv_free(dev, 4);
if (devpriv) {
int i;
for (i = 0; i < 2; i++) {
diff --git a/drivers/staging/comedi/drivers/das16m1.c b/drivers/staging/comedi/drivers/das16m1.c
index 9cb9c3b04797..0b33808c3a7d 100644
--- a/drivers/staging/comedi/drivers/das16m1.c
+++ b/drivers/staging/comedi/drivers/das16m1.c
@@ -17,12 +17,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************
*/
/*
Driver: das16m1
@@ -672,7 +666,6 @@ static void das16m1_detach(struct comedi_device *dev)
{
struct das16m1_private_struct *devpriv = dev->private;
- comedi_spriv_free(dev, 3);
if (devpriv && devpriv->extra_iobase)
release_region(devpriv->extra_iobase, DAS16M1_SIZE2);
comedi_legacy_detach(dev);
diff --git a/drivers/staging/comedi/drivers/das1800.c b/drivers/staging/comedi/drivers/das1800.c
index abf7638a9f71..23b4a661eb1a 100644
--- a/drivers/staging/comedi/drivers/das1800.c
+++ b/drivers/staging/comedi/drivers/das1800.c
@@ -15,12 +15,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************
*/
/*
Driver: das1800
diff --git a/drivers/staging/comedi/drivers/das6402.c b/drivers/staging/comedi/drivers/das6402.c
index 11424fb5b4d4..f0530778bb3b 100644
--- a/drivers/staging/comedi/drivers/das6402.c
+++ b/drivers/staging/comedi/drivers/das6402.c
@@ -22,11 +22,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: das6402
diff --git a/drivers/staging/comedi/drivers/das800.c b/drivers/staging/comedi/drivers/das800.c
index 9ce6cbcc7ee8..091cd911b38a 100644
--- a/drivers/staging/comedi/drivers/das800.c
+++ b/drivers/staging/comedi/drivers/das800.c
@@ -15,12 +15,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************
*/
/*
Driver: das800
diff --git a/drivers/staging/comedi/drivers/dmm32at.c b/drivers/staging/comedi/drivers/dmm32at.c
index 6c85dd2d549b..e29847d73b43 100644
--- a/drivers/staging/comedi/drivers/dmm32at.c
+++ b/drivers/staging/comedi/drivers/dmm32at.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: dmm32at
diff --git a/drivers/staging/comedi/drivers/dt2811.c b/drivers/staging/comedi/drivers/dt2811.c
index 8757b54ad4ac..5348cdae408a 100644
--- a/drivers/staging/comedi/drivers/dt2811.c
+++ b/drivers/staging/comedi/drivers/dt2811.c
@@ -18,10 +18,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
Driver: dt2811
diff --git a/drivers/staging/comedi/drivers/dt2814.c b/drivers/staging/comedi/drivers/dt2814.c
index 7c95b3b68131..87e9749c4be7 100644
--- a/drivers/staging/comedi/drivers/dt2814.c
+++ b/drivers/staging/comedi/drivers/dt2814.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: dt2814
diff --git a/drivers/staging/comedi/drivers/dt2815.c b/drivers/staging/comedi/drivers/dt2815.c
index b24e87681fe3..0fcd4fe7acdc 100644
--- a/drivers/staging/comedi/drivers/dt2815.c
+++ b/drivers/staging/comedi/drivers/dt2815.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: dt2815
diff --git a/drivers/staging/comedi/drivers/dt2817.c b/drivers/staging/comedi/drivers/dt2817.c
index b5c8e8213faf..2f46be715f79 100644
--- a/drivers/staging/comedi/drivers/dt2817.c
+++ b/drivers/staging/comedi/drivers/dt2817.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: dt2817
diff --git a/drivers/staging/comedi/drivers/dt282x.c b/drivers/staging/comedi/drivers/dt282x.c
index 90f2de9bc402..c1950e3b19a2 100644
--- a/drivers/staging/comedi/drivers/dt282x.c
+++ b/drivers/staging/comedi/drivers/dt282x.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: dt282x
diff --git a/drivers/staging/comedi/drivers/dt3000.c b/drivers/staging/comedi/drivers/dt3000.c
index 7e03929c9a14..01a2f889d5b0 100644
--- a/drivers/staging/comedi/drivers/dt3000.c
+++ b/drivers/staging/comedi/drivers/dt3000.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: dt3000
diff --git a/drivers/staging/comedi/drivers/dt9812.c b/drivers/staging/comedi/drivers/dt9812.c
index 81eb5ed6ec97..6c60949d9193 100644
--- a/drivers/staging/comedi/drivers/dt9812.c
+++ b/drivers/staging/comedi/drivers/dt9812.c
@@ -15,11 +15,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
*/
/*
@@ -43,14 +38,11 @@ for my needs.
* says P1).
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <linux/kref.h>
#include <linux/uaccess.h>
#include <linux/usb.h>
@@ -60,6 +52,9 @@ for my needs.
#define DT9812_MAX_WRITE_CMD_PIPE_SIZE 32
#define DT9812_MAX_READ_CMD_PIPE_SIZE 32
+/* usb_bulk_msg() timout in milliseconds */
+#define DT9812_USB_TIMEOUT 1000
+
/*
* See Silican Laboratories C8051F020/1/2/3 manual
*/
@@ -242,87 +237,25 @@ struct dt9812_usb_cmd {
struct dt9812_write_multi write_multi_info;
struct dt9812_rmw_multi rmw_multi_info;
} u;
-#if 0
- WRITE_BYTE_INFO WriteByteInfo;
- READ_BYTE_INFO ReadByteInfo;
- WRITE_MULTI_INFO WriteMultiInfo;
- READ_MULTI_INFO ReadMultiInfo;
- RMW_BYTE_INFO RMWByteInfo;
- RMW_MULTI_INFO RMWMultiInfo;
- DAC_THRESHOLD_INFO DacThresholdInfo;
- INT_ON_CHANGE_MASK_INFO IntOnChangeMaskInfo;
- CGL_INFO CglInfo;
- SUBSYSTEM_INFO SubsystemInfo;
- CAL_POT_CMD CalPotCmd;
- WRITE_DEV_BYTE_INFO WriteDevByteInfo;
- READ_DEV_BYTE_INFO ReadDevByteInfo;
- WRITE_DEV_MULTI_INFO WriteDevMultiInfo;
- READ_DEV_MULTI_INFO ReadDevMultiInfo;
- READ_SINGLE_VALUE_INFO ReadSingleValueInfo;
- WRITE_SINGLE_VALUE_INFO WriteSingleValueInfo;
-#endif
};
-#define DT9812_NUM_SLOTS 16
-
-static DEFINE_SEMAPHORE(dt9812_mutex);
-
-static const struct usb_device_id dt9812_table[] = {
- {USB_DEVICE(0x0867, 0x9812)},
- {} /* Terminating entry */
-};
-
-MODULE_DEVICE_TABLE(usb, dt9812_table);
-
-struct usb_dt9812 {
- struct slot_dt9812 *slot;
- struct usb_device *udev;
- struct usb_interface *interface;
- u16 vendor;
- u16 product;
- u16 device;
- u32 serial;
+struct dt9812_private {
+ struct semaphore sem;
struct {
__u8 addr;
size_t size;
- } message_pipe, command_write, command_read, write_stream, read_stream;
- struct kref kref;
- u16 analog_out_shadow[2];
- u8 digital_out_shadow;
-};
-
-struct comedi_dt9812 {
- struct slot_dt9812 *slot;
- u32 serial;
-};
-
-struct slot_dt9812 {
- struct semaphore mutex;
- u32 serial;
- struct usb_dt9812 *usb;
- struct comedi_dt9812 *comedi;
+ } cmd_wr, cmd_rd;
+ u16 device;
+ u16 ao_shadow[2];
};
-static struct slot_dt9812 dt9812[DT9812_NUM_SLOTS];
-
-static inline struct usb_dt9812 *to_dt9812_dev(struct kref *d)
-{
- return container_of(d, struct usb_dt9812, kref);
-}
-
-static void dt9812_delete(struct kref *kref)
-{
- struct usb_dt9812 *dev = to_dt9812_dev(kref);
-
- usb_put_dev(dev->udev);
- kfree(dev);
-}
-
-static int dt9812_read_info(struct usb_dt9812 *dev, int offset, void *buf,
- size_t buf_size)
+static int dt9812_read_info(struct comedi_device *dev,
+ int offset, void *buf, size_t buf_size)
{
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct dt9812_private *devpriv = dev->private;
struct dt9812_usb_cmd cmd;
- int count, retval;
+ int count, ret;
cmd.cmd = cpu_to_le32(DT9812_R_FLASH_DATA);
cmd.u.flash_data_info.address =
@@ -330,25 +263,23 @@ static int dt9812_read_info(struct usb_dt9812 *dev, int offset, void *buf,
cmd.u.flash_data_info.numbytes = cpu_to_le16(buf_size);
/* DT9812 only responds to 32 byte writes!! */
- count = 32;
- retval = usb_bulk_msg(dev->udev,
- usb_sndbulkpipe(dev->udev,
- dev->command_write.addr),
- &cmd, 32, &count, HZ * 1);
- if (retval)
- return retval;
- retval = usb_bulk_msg(dev->udev,
- usb_rcvbulkpipe(dev->udev,
- dev->command_read.addr),
- buf, buf_size, &count, HZ * 1);
- return retval;
+ ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr),
+ &cmd, 32, &count, DT9812_USB_TIMEOUT);
+ if (ret)
+ return ret;
+
+ return usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr),
+ buf, buf_size, &count, DT9812_USB_TIMEOUT);
}
-static int dt9812_read_multiple_registers(struct usb_dt9812 *dev, int reg_count,
- u8 *address, u8 *value)
+static int dt9812_read_multiple_registers(struct comedi_device *dev,
+ int reg_count, u8 *address,
+ u8 *value)
{
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct dt9812_private *devpriv = dev->private;
struct dt9812_usb_cmd cmd;
- int i, count, retval;
+ int i, count, ret;
cmd.cmd = cpu_to_le32(DT9812_R_MULTI_BYTE_REG);
cmd.u.read_multi_info.count = reg_count;
@@ -356,26 +287,23 @@ static int dt9812_read_multiple_registers(struct usb_dt9812 *dev, int reg_count,
cmd.u.read_multi_info.address[i] = address[i];
/* DT9812 only responds to 32 byte writes!! */
- count = 32;
- retval = usb_bulk_msg(dev->udev,
- usb_sndbulkpipe(dev->udev,
- dev->command_write.addr),
- &cmd, 32, &count, HZ * 1);
- if (retval)
- return retval;
- retval = usb_bulk_msg(dev->udev,
- usb_rcvbulkpipe(dev->udev,
- dev->command_read.addr),
- value, reg_count, &count, HZ * 1);
- return retval;
+ ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr),
+ &cmd, 32, &count, DT9812_USB_TIMEOUT);
+ if (ret)
+ return ret;
+
+ return usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr),
+ value, reg_count, &count, DT9812_USB_TIMEOUT);
}
-static int dt9812_write_multiple_registers(struct usb_dt9812 *dev,
+static int dt9812_write_multiple_registers(struct comedi_device *dev,
int reg_count, u8 *address,
u8 *value)
{
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct dt9812_private *devpriv = dev->private;
struct dt9812_usb_cmd cmd;
- int i, count, retval;
+ int i, count;
cmd.cmd = cpu_to_le32(DT9812_W_MULTI_BYTE_REG);
cmd.u.read_multi_info.count = reg_count;
@@ -383,19 +311,20 @@ static int dt9812_write_multiple_registers(struct usb_dt9812 *dev,
cmd.u.write_multi_info.write[i].address = address[i];
cmd.u.write_multi_info.write[i].value = value[i];
}
+
/* DT9812 only responds to 32 byte writes!! */
- retval = usb_bulk_msg(dev->udev,
- usb_sndbulkpipe(dev->udev,
- dev->command_write.addr),
- &cmd, 32, &count, HZ * 1);
- return retval;
+ return usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr),
+ &cmd, 32, &count, DT9812_USB_TIMEOUT);
}
-static int dt9812_rmw_multiple_registers(struct usb_dt9812 *dev, int reg_count,
+static int dt9812_rmw_multiple_registers(struct comedi_device *dev,
+ int reg_count,
struct dt9812_rmw_byte *rmw)
{
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct dt9812_private *devpriv = dev->private;
struct dt9812_usb_cmd cmd;
- int i, count, retval;
+ int i, count;
cmd.cmd = cpu_to_le32(DT9812_RMW_MULTI_BYTE_REG);
cmd.u.rmw_multi_info.count = reg_count;
@@ -403,76 +332,52 @@ static int dt9812_rmw_multiple_registers(struct usb_dt9812 *dev, int reg_count,
cmd.u.rmw_multi_info.rmw[i] = rmw[i];
/* DT9812 only responds to 32 byte writes!! */
- retval = usb_bulk_msg(dev->udev,
- usb_sndbulkpipe(dev->udev,
- dev->command_write.addr),
- &cmd, 32, &count, HZ * 1);
- return retval;
+ return usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr),
+ &cmd, 32, &count, DT9812_USB_TIMEOUT);
}
-static int dt9812_digital_in(struct slot_dt9812 *slot, u8 *bits)
+static int dt9812_digital_in(struct comedi_device *dev, u8 *bits)
{
- int result = -ENODEV;
-
- down(&slot->mutex);
- if (slot->usb) {
- u8 reg[2] = { F020_SFR_P3, F020_SFR_P1 };
- u8 value[2];
+ struct dt9812_private *devpriv = dev->private;
+ u8 reg[2] = { F020_SFR_P3, F020_SFR_P1 };
+ u8 value[2];
+ int ret;
- result = dt9812_read_multiple_registers(slot->usb, 2, reg,
- value);
- if (result == 0) {
- /*
- * bits 0-6 in F020_SFR_P3 are bits 0-6 in the digital
- * input port bit 3 in F020_SFR_P1 is bit 7 in the
- * digital input port
- */
- *bits = (value[0] & 0x7f) | ((value[1] & 0x08) << 4);
- /* printk("%2.2x, %2.2x -> %2.2x\n",
- value[0], value[1], *bits); */
- }
+ down(&devpriv->sem);
+ ret = dt9812_read_multiple_registers(dev, 2, reg, value);
+ if (ret == 0) {
+ /*
+ * bits 0-6 in F020_SFR_P3 are bits 0-6 in the digital
+ * input port bit 3 in F020_SFR_P1 is bit 7 in the
+ * digital input port
+ */
+ *bits = (value[0] & 0x7f) | ((value[1] & 0x08) << 4);
}
- up(&slot->mutex);
+ up(&devpriv->sem);
- return result;
+ return ret;
}
-static int dt9812_digital_out(struct slot_dt9812 *slot, u8 bits)
+static int dt9812_digital_out(struct comedi_device *dev, u8 bits)
{
- int result = -ENODEV;
-
- down(&slot->mutex);
- if (slot->usb) {
- u8 reg[1];
- u8 value[1];
-
- reg[0] = F020_SFR_P2;
- value[0] = bits;
- result = dt9812_write_multiple_registers(slot->usb, 1, reg,
- value);
- slot->usb->digital_out_shadow = bits;
- }
- up(&slot->mutex);
- return result;
-}
+ struct dt9812_private *devpriv = dev->private;
+ u8 reg[1] = { F020_SFR_P2 };
+ u8 value[1] = { bits };
+ int ret;
-static int dt9812_digital_out_shadow(struct slot_dt9812 *slot, u8 *bits)
-{
- int result = -ENODEV;
+ down(&devpriv->sem);
+ ret = dt9812_write_multiple_registers(dev, 1, reg, value);
+ up(&devpriv->sem);
- down(&slot->mutex);
- if (slot->usb) {
- *bits = slot->usb->digital_out_shadow;
- result = 0;
- }
- up(&slot->mutex);
- return result;
+ return ret;
}
-static void dt9812_configure_mux(struct usb_dt9812 *dev,
+static void dt9812_configure_mux(struct comedi_device *dev,
struct dt9812_rmw_byte *rmw, int channel)
{
- if (dev->device == DT9812_DEVID_DT9812_10) {
+ struct dt9812_private *devpriv = dev->private;
+
+ if (devpriv->device == DT9812_DEVID_DT9812_10) {
/* In the DT9812/10V MUX is selected by P1.5-7 */
rmw->address = F020_SFR_P1;
rmw->and_mask = 0xe0;
@@ -485,18 +390,21 @@ static void dt9812_configure_mux(struct usb_dt9812 *dev,
}
}
-static void dt9812_configure_gain(struct usb_dt9812 *dev,
+static void dt9812_configure_gain(struct comedi_device *dev,
struct dt9812_rmw_byte *rmw,
enum dt9812_gain gain)
{
- if (dev->device == DT9812_DEVID_DT9812_10) {
- /* In the DT9812/10V, there is an external gain of 0.5 */
+ struct dt9812_private *devpriv = dev->private;
+
+ /* In the DT9812/10V, there is an external gain of 0.5 */
+ if (devpriv->device == DT9812_DEVID_DT9812_10)
gain <<= 1;
- }
rmw->address = F020_SFR_ADC0CF;
rmw->and_mask = F020_MASK_ADC0CF_AMP0GN2 |
- F020_MASK_ADC0CF_AMP0GN1 | F020_MASK_ADC0CF_AMP0GN0;
+ F020_MASK_ADC0CF_AMP0GN1 |
+ F020_MASK_ADC0CF_AMP0GN0;
+
switch (gain) {
/*
* 000 -> Gain = 1
@@ -508,8 +416,10 @@ static void dt9812_configure_gain(struct usb_dt9812 *dev,
*/
case DT9812_GAIN_0PT5:
rmw->or_value = F020_MASK_ADC0CF_AMP0GN2 |
- F020_MASK_ADC0CF_AMP0GN1;
+ F020_MASK_ADC0CF_AMP0GN1;
break;
+ default:
+ /* this should never happen, just use a gain of 1 */
case DT9812_GAIN_1:
rmw->or_value = 0x00;
break;
@@ -521,20 +431,18 @@ static void dt9812_configure_gain(struct usb_dt9812 *dev,
break;
case DT9812_GAIN_8:
rmw->or_value = F020_MASK_ADC0CF_AMP0GN1 |
- F020_MASK_ADC0CF_AMP0GN0;
+ F020_MASK_ADC0CF_AMP0GN0;
break;
case DT9812_GAIN_16:
rmw->or_value = F020_MASK_ADC0CF_AMP0GN2;
break;
- default:
- dev_err(&dev->interface->dev, "Illegal gain %d\n", gain);
-
}
}
-static int dt9812_analog_in(struct slot_dt9812 *slot, int channel, u16 *value,
- enum dt9812_gain gain)
+static int dt9812_analog_in(struct comedi_device *dev,
+ int channel, u16 *value, enum dt9812_gain gain)
{
+ struct dt9812_private *devpriv = dev->private;
struct dt9812_rmw_byte rmw[3];
u8 reg[3] = {
F020_SFR_ADC0CN,
@@ -542,31 +450,30 @@ static int dt9812_analog_in(struct slot_dt9812 *slot, int channel, u16 *value,
F020_SFR_ADC0L
};
u8 val[3];
- int result = -ENODEV;
+ int ret;
- down(&slot->mutex);
- if (!slot->usb)
- goto exit;
+ down(&devpriv->sem);
/* 1 select the gain */
- dt9812_configure_gain(slot->usb, &rmw[0], gain);
+ dt9812_configure_gain(dev, &rmw[0], gain);
/* 2 set the MUX to select the channel */
- dt9812_configure_mux(slot->usb, &rmw[1], channel);
+ dt9812_configure_mux(dev, &rmw[1], channel);
/* 3 start conversion */
rmw[2].address = F020_SFR_ADC0CN;
rmw[2].and_mask = 0xff;
rmw[2].or_value = F020_MASK_ADC0CN_AD0EN | F020_MASK_ADC0CN_AD0BUSY;
- result = dt9812_rmw_multiple_registers(slot->usb, 3, rmw);
- if (result)
+ ret = dt9812_rmw_multiple_registers(dev, 3, rmw);
+ if (ret)
goto exit;
/* read the status and ADC */
- result = dt9812_read_multiple_registers(slot->usb, 3, reg, val);
- if (result)
+ ret = dt9812_read_multiple_registers(dev, 3, reg, val);
+ if (ret)
goto exit;
+
/*
* An ADC conversion takes 16 SAR clocks cycles, i.e. about 9us.
* Therefore, between the instant that AD0BUSY was set via
@@ -578,7 +485,7 @@ static int dt9812_analog_in(struct slot_dt9812 *slot, int channel, u16 *value,
*/
if ((val[0] & (F020_MASK_ADC0CN_AD0INT | F020_MASK_ADC0CN_AD0BUSY)) ==
F020_MASK_ADC0CN_AD0INT) {
- switch (slot->usb->device) {
+ switch (devpriv->device) {
case DT9812_DEVID_DT9812_10:
/*
* For DT9812-10V the personality module set the
@@ -594,422 +501,284 @@ static int dt9812_analog_in(struct slot_dt9812 *slot, int channel, u16 *value,
}
exit:
- up(&slot->mutex);
- return result;
+ up(&devpriv->sem);
+
+ return ret;
}
-static int dt9812_analog_out_shadow(struct slot_dt9812 *slot, int channel,
- u16 *value)
+static int dt9812_analog_out(struct comedi_device *dev, int channel, u16 value)
{
- int result = -ENODEV;
+ struct dt9812_private *devpriv = dev->private;
+ struct dt9812_rmw_byte rmw[3];
+ int ret;
- down(&slot->mutex);
- if (slot->usb) {
- *value = slot->usb->analog_out_shadow[channel];
- result = 0;
+ down(&devpriv->sem);
+
+ switch (channel) {
+ case 0:
+ /* 1. Set DAC mode */
+ rmw[0].address = F020_SFR_DAC0CN;
+ rmw[0].and_mask = 0xff;
+ rmw[0].or_value = F020_MASK_DACxCN_DACxEN;
+
+ /* 2 load low byte of DAC value first */
+ rmw[1].address = F020_SFR_DAC0L;
+ rmw[1].and_mask = 0xff;
+ rmw[1].or_value = value & 0xff;
+
+ /* 3 load high byte of DAC value next to latch the
+ 12-bit value */
+ rmw[2].address = F020_SFR_DAC0H;
+ rmw[2].and_mask = 0xff;
+ rmw[2].or_value = (value >> 8) & 0xf;
+ break;
+
+ case 1:
+ /* 1. Set DAC mode */
+ rmw[0].address = F020_SFR_DAC1CN;
+ rmw[0].and_mask = 0xff;
+ rmw[0].or_value = F020_MASK_DACxCN_DACxEN;
+
+ /* 2 load low byte of DAC value first */
+ rmw[1].address = F020_SFR_DAC1L;
+ rmw[1].and_mask = 0xff;
+ rmw[1].or_value = value & 0xff;
+
+ /* 3 load high byte of DAC value next to latch the
+ 12-bit value */
+ rmw[2].address = F020_SFR_DAC1H;
+ rmw[2].and_mask = 0xff;
+ rmw[2].or_value = (value >> 8) & 0xf;
+ break;
}
- up(&slot->mutex);
+ ret = dt9812_rmw_multiple_registers(dev, 3, rmw);
+ devpriv->ao_shadow[channel] = value;
- return result;
+ up(&devpriv->sem);
+
+ return ret;
}
-static int dt9812_analog_out(struct slot_dt9812 *slot, int channel, u16 value)
+static int dt9812_di_insn_bits(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
{
- int result = -ENODEV;
+ u8 bits = 0;
+ int ret;
- down(&slot->mutex);
- if (slot->usb) {
- struct dt9812_rmw_byte rmw[3];
+ ret = dt9812_digital_in(dev, &bits);
+ if (ret)
+ return ret;
- switch (channel) {
- case 0:
- /* 1. Set DAC mode */
- rmw[0].address = F020_SFR_DAC0CN;
- rmw[0].and_mask = 0xff;
- rmw[0].or_value = F020_MASK_DACxCN_DACxEN;
-
- /* 2 load low byte of DAC value first */
- rmw[1].address = F020_SFR_DAC0L;
- rmw[1].and_mask = 0xff;
- rmw[1].or_value = value & 0xff;
-
- /* 3 load high byte of DAC value next to latch the
- 12-bit value */
- rmw[2].address = F020_SFR_DAC0H;
- rmw[2].and_mask = 0xff;
- rmw[2].or_value = (value >> 8) & 0xf;
- break;
+ data[1] = bits;
- case 1:
- /* 1. Set DAC mode */
- rmw[0].address = F020_SFR_DAC1CN;
- rmw[0].and_mask = 0xff;
- rmw[0].or_value = F020_MASK_DACxCN_DACxEN;
-
- /* 2 load low byte of DAC value first */
- rmw[1].address = F020_SFR_DAC1L;
- rmw[1].and_mask = 0xff;
- rmw[1].or_value = value & 0xff;
-
- /* 3 load high byte of DAC value next to latch the
- 12-bit value */
- rmw[2].address = F020_SFR_DAC1H;
- rmw[2].and_mask = 0xff;
- rmw[2].or_value = (value >> 8) & 0xf;
- break;
- }
- result = dt9812_rmw_multiple_registers(slot->usb, 3, rmw);
- slot->usb->analog_out_shadow[channel] = value;
+ return insn->n;
+}
+
+static int dt9812_do_insn_bits(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
+{
+ unsigned int mask = data[0];
+ unsigned int bits = data[1];
+
+ if (mask) {
+ s->state &= ~mask;
+ s->state |= (bits & mask);
+
+ dt9812_digital_out(dev, s->state);
}
- up(&slot->mutex);
- return result;
+ data[1] = s->state;
+
+ return insn->n;
}
-/*
- * USB framework functions
- */
+static int dt9812_ai_insn_read(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
+{
+ unsigned int chan = CR_CHAN(insn->chanspec);
+ u16 val = 0;
+ int ret;
+ int i;
-static int dt9812_probe(struct usb_interface *interface,
- const struct usb_device_id *id)
+ for (i = 0; i < insn->n; i++) {
+ ret = dt9812_analog_in(dev, chan, &val, DT9812_GAIN_1);
+ if (ret)
+ return ret;
+ data[i] = val;
+ }
+
+ return insn->n;
+}
+
+static int dt9812_ao_insn_read(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
{
- int retval = -ENOMEM;
- struct usb_dt9812 *dev = NULL;
- struct usb_host_interface *iface_desc;
- struct usb_endpoint_descriptor *endpoint;
+ struct dt9812_private *devpriv = dev->private;
+ unsigned int chan = CR_CHAN(insn->chanspec);
int i;
- u8 fw;
- /* allocate memory for our device state and initialize it */
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (dev == NULL)
- goto error;
+ down(&devpriv->sem);
+ for (i = 0; i < insn->n; i++)
+ data[i] = devpriv->ao_shadow[chan];
+ up(&devpriv->sem);
+
+ return insn->n;
+}
+
+static int dt9812_ao_insn_write(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
+{
+ unsigned int chan = CR_CHAN(insn->chanspec);
+ int ret;
+ int i;
- kref_init(&dev->kref);
+ for (i = 0; i < insn->n; i++) {
+ ret = dt9812_analog_out(dev, chan, data[i]);
+ if (ret)
+ return ret;
+ }
- dev->udev = usb_get_dev(interface_to_usbdev(interface));
- dev->interface = interface;
+ return insn->n;
+}
- /* Check endpoints */
- iface_desc = interface->cur_altsetting;
+static int dt9812_find_endpoints(struct comedi_device *dev)
+{
+ struct usb_interface *intf = comedi_to_usb_interface(dev);
+ struct usb_host_interface *host = intf->cur_altsetting;
+ struct dt9812_private *devpriv = dev->private;
+ struct usb_endpoint_descriptor *ep;
+ int i;
- if (iface_desc->desc.bNumEndpoints != 5) {
- dev_err(&interface->dev, "Wrong number of endpoints.\n");
- retval = -ENODEV;
- goto error;
+ if (host->desc.bNumEndpoints != 5) {
+ dev_err(dev->class_dev, "Wrong number of endpoints\n");
+ return -ENODEV;
}
- for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
- int direction = -1;
- endpoint = &iface_desc->endpoint[i].desc;
+ for (i = 0; i < host->desc.bNumEndpoints; ++i) {
+ int dir = -1;
+ ep = &host->endpoint[i].desc;
switch (i) {
case 0:
- direction = USB_DIR_IN;
- dev->message_pipe.addr = endpoint->bEndpointAddress;
- dev->message_pipe.size =
- le16_to_cpu(endpoint->wMaxPacketSize);
-
+ /* unused message pipe */
+ dir = USB_DIR_IN;
break;
case 1:
- direction = USB_DIR_OUT;
- dev->command_write.addr = endpoint->bEndpointAddress;
- dev->command_write.size =
- le16_to_cpu(endpoint->wMaxPacketSize);
+ dir = USB_DIR_OUT;
+ devpriv->cmd_wr.addr = ep->bEndpointAddress;
+ devpriv->cmd_wr.size = le16_to_cpu(ep->wMaxPacketSize);
break;
case 2:
- direction = USB_DIR_IN;
- dev->command_read.addr = endpoint->bEndpointAddress;
- dev->command_read.size =
- le16_to_cpu(endpoint->wMaxPacketSize);
+ dir = USB_DIR_IN;
+ devpriv->cmd_rd.addr = ep->bEndpointAddress;
+ devpriv->cmd_rd.size = le16_to_cpu(ep->wMaxPacketSize);
break;
case 3:
- direction = USB_DIR_OUT;
- dev->write_stream.addr = endpoint->bEndpointAddress;
- dev->write_stream.size =
- le16_to_cpu(endpoint->wMaxPacketSize);
+ /* unused write stream */
+ dir = USB_DIR_OUT;
break;
case 4:
- direction = USB_DIR_IN;
- dev->read_stream.addr = endpoint->bEndpointAddress;
- dev->read_stream.size =
- le16_to_cpu(endpoint->wMaxPacketSize);
+ /* unused read stream */
+ dir = USB_DIR_IN;
break;
}
- if ((endpoint->bEndpointAddress & USB_DIR_IN) != direction) {
- dev_err(&interface->dev,
- "Endpoint has wrong direction.\n");
- retval = -ENODEV;
- goto error;
+ if ((ep->bEndpointAddress & USB_DIR_IN) != dir) {
+ dev_err(dev->class_dev,
+ "Endpoint has wrong direction\n");
+ return -ENODEV;
}
}
- if (dt9812_read_info(dev, 0, &fw, sizeof(fw)) != 0) {
+ return 0;
+}
+
+static int dt9812_reset_device(struct comedi_device *dev)
+{
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct dt9812_private *devpriv = dev->private;
+ u32 serial;
+ u16 vendor;
+ u16 product;
+ u16 tmp16;
+ u8 tmp8;
+ int ret;
+ int i;
+
+ ret = dt9812_read_info(dev, 0, &tmp8, sizeof(tmp8));
+ if (ret) {
/*
* Seems like a configuration reset is necessary if driver is
* reloaded while device is attached
*/
- usb_reset_configuration(dev->udev);
+ usb_reset_configuration(usb);
for (i = 0; i < 10; i++) {
- retval = dt9812_read_info(dev, 1, &fw, sizeof(fw));
- if (retval == 0) {
- dev_info(&interface->dev,
- "usb_reset_configuration succeeded "
- "after %d iterations\n", i);
+ ret = dt9812_read_info(dev, 1, &tmp8, sizeof(tmp8));
+ if (ret == 0)
break;
- }
}
- }
-
- if (dt9812_read_info(dev, 1, &dev->vendor, sizeof(dev->vendor)) != 0) {
- dev_err(&interface->dev, "Failed to read vendor.\n");
- retval = -ENODEV;
- goto error;
- }
- if (dt9812_read_info(dev, 3, &dev->product, sizeof(dev->product)) != 0) {
- dev_err(&interface->dev, "Failed to read product.\n");
- retval = -ENODEV;
- goto error;
- }
- if (dt9812_read_info(dev, 5, &dev->device, sizeof(dev->device)) != 0) {
- dev_err(&interface->dev, "Failed to read device.\n");
- retval = -ENODEV;
- goto error;
- }
- if (dt9812_read_info(dev, 7, &dev->serial, sizeof(dev->serial)) != 0) {
- dev_err(&interface->dev, "Failed to read serial.\n");
- retval = -ENODEV;
- goto error;
- }
-
- dev->vendor = le16_to_cpu(dev->vendor);
- dev->product = le16_to_cpu(dev->product);
- dev->device = le16_to_cpu(dev->device);
- dev->serial = le32_to_cpu(dev->serial);
- switch (dev->device) {
- case DT9812_DEVID_DT9812_10:
- dev->analog_out_shadow[0] = 0x0800;
- dev->analog_out_shadow[1] = 0x800;
- break;
- case DT9812_DEVID_DT9812_2PT5:
- dev->analog_out_shadow[0] = 0x0000;
- dev->analog_out_shadow[1] = 0x0000;
- break;
- }
- dev->digital_out_shadow = 0;
-
- /* save our data pointer in this interface device */
- usb_set_intfdata(interface, dev);
-
- /* let the user know what node this device is now attached to */
- dev_info(&interface->dev, "USB DT9812 (%4.4x.%4.4x.%4.4x) #0x%8.8x\n",
- dev->vendor, dev->product, dev->device, dev->serial);
-
- down(&dt9812_mutex);
- {
- /* Find a slot for the USB device */
- struct slot_dt9812 *first = NULL;
- struct slot_dt9812 *best = NULL;
-
- for (i = 0; i < DT9812_NUM_SLOTS; i++) {
- if (!first && !dt9812[i].usb && dt9812[i].serial == 0)
- first = &dt9812[i];
- if (!best && dt9812[i].serial == dev->serial)
- best = &dt9812[i];
- }
-
- if (!best)
- best = first;
-
- if (best) {
- down(&best->mutex);
- best->usb = dev;
- dev->slot = best;
- up(&best->mutex);
+ if (ret) {
+ dev_err(dev->class_dev,
+ "unable to reset configuration\n");
+ return ret;
}
}
- up(&dt9812_mutex);
-
- return 0;
-error:
- if (dev)
- kref_put(&dev->kref, dt9812_delete);
- return retval;
-}
-
-static void dt9812_disconnect(struct usb_interface *interface)
-{
- struct usb_dt9812 *dev;
- int minor = interface->minor;
-
- down(&dt9812_mutex);
- dev = usb_get_intfdata(interface);
- if (dev->slot) {
- down(&dev->slot->mutex);
- dev->slot->usb = NULL;
- up(&dev->slot->mutex);
- dev->slot = NULL;
+ ret = dt9812_read_info(dev, 1, &vendor, sizeof(vendor));
+ if (ret) {
+ dev_err(dev->class_dev, "failed to read vendor id\n");
+ return ret;
}
- usb_set_intfdata(interface, NULL);
- up(&dt9812_mutex);
-
- /* queue final destruction */
- kref_put(&dev->kref, dt9812_delete);
-
- dev_info(&interface->dev, "USB Dt9812 #%d now disconnected\n", minor);
-}
+ vendor = le16_to_cpu(vendor);
-static struct usb_driver dt9812_usb_driver = {
- .name = "dt9812",
- .probe = dt9812_probe,
- .disconnect = dt9812_disconnect,
- .id_table = dt9812_table,
-};
-
-/*
- * Comedi functions
- */
-
-static int dt9812_comedi_open(struct comedi_device *dev)
-{
- struct comedi_dt9812 *devpriv = dev->private;
- int result = -ENODEV;
-
- down(&devpriv->slot->mutex);
- if (devpriv->slot->usb) {
- /* We have an attached device, fill in current range info */
- struct comedi_subdevice *s;
-
- s = &dev->subdevices[0];
- s->n_chan = 8;
- s->maxdata = 1;
-
- s = &dev->subdevices[1];
- s->n_chan = 8;
- s->maxdata = 1;
-
- s = &dev->subdevices[2];
- s->n_chan = 8;
- switch (devpriv->slot->usb->device) {
- case 0:{
- s->maxdata = 4095;
- s->range_table = &range_bipolar10;
- }
- break;
- case 1:{
- s->maxdata = 4095;
- s->range_table = &range_unipolar2_5;
- }
- break;
- }
-
- s = &dev->subdevices[3];
- s->n_chan = 2;
- switch (devpriv->slot->usb->device) {
- case 0:{
- s->maxdata = 4095;
- s->range_table = &range_bipolar10;
- }
- break;
- case 1:{
- s->maxdata = 4095;
- s->range_table = &range_unipolar2_5;
- }
- break;
- }
- result = 0;
+ ret = dt9812_read_info(dev, 3, &product, sizeof(product));
+ if (ret) {
+ dev_err(dev->class_dev, "failed to read product id\n");
+ return ret;
}
- up(&devpriv->slot->mutex);
- return result;
-}
-
-static int dt9812_di_rinsn(struct comedi_device *dev,
- struct comedi_subdevice *s, struct comedi_insn *insn,
- unsigned int *data)
-{
- struct comedi_dt9812 *devpriv = dev->private;
- unsigned int channel = CR_CHAN(insn->chanspec);
- int n;
- u8 bits = 0;
-
- dt9812_digital_in(devpriv->slot, &bits);
- for (n = 0; n < insn->n; n++)
- data[n] = ((1 << channel) & bits) != 0;
- return n;
-}
+ product = le16_to_cpu(product);
-static int dt9812_do_winsn(struct comedi_device *dev,
- struct comedi_subdevice *s, struct comedi_insn *insn,
- unsigned int *data)
-{
- struct comedi_dt9812 *devpriv = dev->private;
- unsigned int channel = CR_CHAN(insn->chanspec);
- int n;
- u8 bits = 0;
-
- dt9812_digital_out_shadow(devpriv->slot, &bits);
- for (n = 0; n < insn->n; n++) {
- u8 mask = 1 << channel;
-
- bits &= ~mask;
- if (data[n])
- bits |= mask;
+ ret = dt9812_read_info(dev, 5, &tmp16, sizeof(tmp16));
+ if (ret) {
+ dev_err(dev->class_dev, "failed to read device id\n");
+ return ret;
}
- dt9812_digital_out(devpriv->slot, bits);
- return n;
-}
+ devpriv->device = le16_to_cpu(tmp16);
-static int dt9812_ai_rinsn(struct comedi_device *dev,
- struct comedi_subdevice *s, struct comedi_insn *insn,
- unsigned int *data)
-{
- struct comedi_dt9812 *devpriv = dev->private;
- unsigned int channel = CR_CHAN(insn->chanspec);
- int n;
-
- for (n = 0; n < insn->n; n++) {
- u16 value = 0;
-
- dt9812_analog_in(devpriv->slot, channel, &value, DT9812_GAIN_1);
- data[n] = value;
+ ret = dt9812_read_info(dev, 7, &serial, sizeof(serial));
+ if (ret) {
+ dev_err(dev->class_dev, "failed to read serial number\n");
+ return ret;
}
- return n;
-}
+ serial = le32_to_cpu(serial);
-static int dt9812_ao_rinsn(struct comedi_device *dev,
- struct comedi_subdevice *s, struct comedi_insn *insn,
- unsigned int *data)
-{
- struct comedi_dt9812 *devpriv = dev->private;
- unsigned int channel = CR_CHAN(insn->chanspec);
- int n;
- u16 value;
-
- for (n = 0; n < insn->n; n++) {
- value = 0;
- dt9812_analog_out_shadow(devpriv->slot, channel, &value);
- data[n] = value;
- }
- return n;
-}
+ /* let the user know what node this device is now attached to */
+ dev_info(dev->class_dev, "USB DT9812 (%4.4x.%4.4x.%4.4x) #0x%8.8x\n",
+ vendor, product, devpriv->device, serial);
-static int dt9812_ao_winsn(struct comedi_device *dev,
- struct comedi_subdevice *s, struct comedi_insn *insn,
- unsigned int *data)
-{
- struct comedi_dt9812 *devpriv = dev->private;
- unsigned int channel = CR_CHAN(insn->chanspec);
- int n;
+ if (devpriv->device != DT9812_DEVID_DT9812_10 &&
+ devpriv->device != DT9812_DEVID_DT9812_2PT5) {
+ dev_err(dev->class_dev, "Unsupported device!\n");
+ return -EINVAL;
+ }
- for (n = 0; n < insn->n; n++)
- dt9812_analog_out(devpriv->slot, channel, data[n]);
- return n;
+ return 0;
}
-static int dt9812_attach(struct comedi_device *dev, struct comedi_devconfig *it)
+static int dt9812_auto_attach(struct comedi_device *dev,
+ unsigned long context)
{
- struct comedi_dt9812 *devpriv;
- int i;
+ struct usb_interface *intf = comedi_to_usb_interface(dev);
+ struct dt9812_private *devpriv;
struct comedi_subdevice *s;
+ bool is_unipolar;
int ret;
devpriv = kzalloc(sizeof(*devpriv), GFP_KERNEL);
@@ -1017,125 +786,107 @@ static int dt9812_attach(struct comedi_device *dev, struct comedi_devconfig *it)
return -ENOMEM;
dev->private = devpriv;
- /*
- * Special open routine, since USB unit may be unattached at
- * comedi_config time, hence range can not be determined
- */
- dev->open = dt9812_comedi_open;
+ sema_init(&devpriv->sem, 1);
+ usb_set_intfdata(intf, devpriv);
- devpriv->serial = it->options[0];
+ ret = dt9812_find_endpoints(dev);
+ if (ret)
+ return ret;
+
+ ret = dt9812_reset_device(dev);
+ if (ret)
+ return ret;
+
+ is_unipolar = (devpriv->device == DT9812_DEVID_DT9812_2PT5);
ret = comedi_alloc_subdevices(dev, 4);
if (ret)
return ret;
- /* digital input subdevice */
+ /* Digital Input subdevice */
s = &dev->subdevices[0];
- s->type = COMEDI_SUBD_DI;
- s->subdev_flags = SDF_READABLE;
- s->n_chan = 0;
- s->maxdata = 1;
- s->range_table = &range_digital;
- s->insn_read = &dt9812_di_rinsn;
-
- /* digital output subdevice */
+ s->type = COMEDI_SUBD_DI;
+ s->subdev_flags = SDF_READABLE;
+ s->n_chan = 8;
+ s->maxdata = 1;
+ s->range_table = &range_digital;
+ s->insn_bits = dt9812_di_insn_bits;
+
+ /* Digital Output subdevice */
s = &dev->subdevices[1];
- s->type = COMEDI_SUBD_DO;
- s->subdev_flags = SDF_WRITEABLE;
- s->n_chan = 0;
- s->maxdata = 1;
- s->range_table = &range_digital;
- s->insn_write = &dt9812_do_winsn;
-
- /* analog input subdevice */
+ s->type = COMEDI_SUBD_DO;
+ s->subdev_flags = SDF_WRITEABLE;
+ s->n_chan = 8;
+ s->maxdata = 1;
+ s->range_table = &range_digital;
+ s->insn_bits = dt9812_do_insn_bits;
+
+ /* Analog Input subdevice */
s = &dev->subdevices[2];
- s->type = COMEDI_SUBD_AI;
- s->subdev_flags = SDF_READABLE | SDF_GROUND;
- s->n_chan = 0;
- s->maxdata = 1;
- s->range_table = NULL;
- s->insn_read = &dt9812_ai_rinsn;
-
- /* analog output subdevice */
+ s->type = COMEDI_SUBD_AI;
+ s->subdev_flags = SDF_READABLE | SDF_GROUND;
+ s->n_chan = 8;
+ s->maxdata = 0x0fff;
+ s->range_table = is_unipolar ? &range_unipolar2_5 : &range_bipolar10;
+ s->insn_read = dt9812_ai_insn_read;
+
+ /* Analog Output subdevice */
s = &dev->subdevices[3];
- s->type = COMEDI_SUBD_AO;
- s->subdev_flags = SDF_WRITEABLE;
- s->n_chan = 0;
- s->maxdata = 1;
- s->range_table = NULL;
- s->insn_write = &dt9812_ao_winsn;
- s->insn_read = &dt9812_ao_rinsn;
-
- dev_info(dev->class_dev, "successfully attached to dt9812.\n");
-
- down(&dt9812_mutex);
- /* Find a slot for the comedi device */
- {
- struct slot_dt9812 *first = NULL;
- struct slot_dt9812 *best = NULL;
- for (i = 0; i < DT9812_NUM_SLOTS; i++) {
- if (!first && !dt9812[i].comedi) {
- /* First free slot from comedi side */
- first = &dt9812[i];
- }
- if (!best &&
- dt9812[i].usb &&
- dt9812[i].usb->serial == devpriv->serial) {
- /* We have an attaced device with matching ID */
- best = &dt9812[i];
- }
- }
- if (!best)
- best = first;
- if (best) {
- down(&best->mutex);
- best->comedi = devpriv;
- best->serial = devpriv->serial;
- devpriv->slot = best;
- up(&best->mutex);
- }
- }
- up(&dt9812_mutex);
+ s->type = COMEDI_SUBD_AO;
+ s->subdev_flags = SDF_WRITEABLE;
+ s->n_chan = 2;
+ s->maxdata = 0x0fff;
+ s->range_table = is_unipolar ? &range_unipolar2_5 : &range_bipolar10;
+ s->insn_write = dt9812_ao_insn_write;
+ s->insn_read = dt9812_ao_insn_read;
+
+ devpriv->ao_shadow[0] = is_unipolar ? 0x0000 : 0x0800;
+ devpriv->ao_shadow[1] = is_unipolar ? 0x0000 : 0x0800;
return 0;
}
static void dt9812_detach(struct comedi_device *dev)
{
- /* Nothing to cleanup */
-}
+ struct usb_interface *intf = comedi_to_usb_interface(dev);
+ struct dt9812_private *devpriv = dev->private;
-static struct comedi_driver dt9812_comedi_driver = {
- .module = THIS_MODULE,
- .driver_name = "dt9812",
- .attach = dt9812_attach,
- .detach = dt9812_detach,
-};
+ if (!devpriv)
+ return;
-static int __init usb_dt9812_init(void)
-{
- int i;
+ down(&devpriv->sem);
- /* Initialize all driver slots */
- for (i = 0; i < DT9812_NUM_SLOTS; i++) {
- sema_init(&dt9812[i].mutex, 1);
- dt9812[i].serial = 0;
- dt9812[i].usb = NULL;
- dt9812[i].comedi = NULL;
- }
- dt9812[12].serial = 0x0;
+ usb_set_intfdata(intf, NULL);
- return comedi_usb_driver_register(&dt9812_comedi_driver,
- &dt9812_usb_driver);
+ up(&devpriv->sem);
}
-static void __exit usb_dt9812_exit(void)
+static struct comedi_driver dt9812_driver = {
+ .driver_name = "dt9812",
+ .module = THIS_MODULE,
+ .auto_attach = dt9812_auto_attach,
+ .detach = dt9812_detach,
+};
+
+static int dt9812_usb_probe(struct usb_interface *intf,
+ const struct usb_device_id *id)
{
- comedi_usb_driver_unregister(&dt9812_comedi_driver, &dt9812_usb_driver);
+ return comedi_usb_auto_config(intf, &dt9812_driver, id->driver_info);
}
-module_init(usb_dt9812_init);
-module_exit(usb_dt9812_exit);
+static const struct usb_device_id dt9812_usb_table[] = {
+ { USB_DEVICE(0x0867, 0x9812) },
+ { }
+};
+MODULE_DEVICE_TABLE(usb, dt9812_usb_table);
+
+static struct usb_driver dt9812_usb_driver = {
+ .name = "dt9812",
+ .id_table = dt9812_usb_table,
+ .probe = dt9812_usb_probe,
+ .disconnect = comedi_usb_auto_unconfig,
+};
+module_comedi_usb_driver(dt9812_driver, dt9812_usb_driver);
MODULE_AUTHOR("Anders Blomdell <anders.blomdell@control.lth.se>");
MODULE_DESCRIPTION("Comedi DT9812 driver");
diff --git a/drivers/staging/comedi/drivers/dyna_pci10xx.c b/drivers/staging/comedi/drivers/dyna_pci10xx.c
index 93ec8e492ccc..e14dd3ae9ec6 100644
--- a/drivers/staging/comedi/drivers/dyna_pci10xx.c
+++ b/drivers/staging/comedi/drivers/dyna_pci10xx.c
@@ -11,10 +11,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
diff --git a/drivers/staging/comedi/drivers/gsc_hpdi.c b/drivers/staging/comedi/drivers/gsc_hpdi.c
index 0c061df0978c..2fceff93867b 100644
--- a/drivers/staging/comedi/drivers/gsc_hpdi.c
+++ b/drivers/staging/comedi/drivers/gsc_hpdi.c
@@ -18,12 +18,7 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************/
+*/
/*
* Driver: gsc_hpdi
diff --git a/drivers/staging/comedi/drivers/icp_multi.c b/drivers/staging/comedi/drivers/icp_multi.c
index 08ab9d6e7190..a11e015dc03d 100644
--- a/drivers/staging/comedi/drivers/icp_multi.c
+++ b/drivers/staging/comedi/drivers/icp_multi.c
@@ -13,11 +13,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
diff --git a/drivers/staging/comedi/drivers/jr3_pci.c b/drivers/staging/comedi/drivers/jr3_pci.c
index 90b303ab2300..94609f4aa4c9 100644
--- a/drivers/staging/comedi/drivers/jr3_pci.c
+++ b/drivers/staging/comedi/drivers/jr3_pci.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
* Driver: jr3_pci
@@ -46,7 +41,6 @@
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/ctype.h>
-#include <linux/firmware.h>
#include <linux/jiffies.h>
#include <linux/slab.h>
#include <linux/timer.h>
@@ -97,37 +91,6 @@ struct jr3_pci_subdev_private {
int retries;
};
-/* Hotplug firmware loading stuff */
-static int comedi_load_firmware(struct comedi_device *dev, const char *name,
- int (*cb)(struct comedi_device *dev,
- const u8 *data, size_t size))
-{
- struct pci_dev *pcidev = comedi_to_pci_dev(dev);
- int result = 0;
- const struct firmware *fw;
- char *firmware_path;
- static const char *prefix = "comedi/";
-
- firmware_path = kmalloc(strlen(prefix) + strlen(name) + 1, GFP_KERNEL);
- if (!firmware_path) {
- result = -ENOMEM;
- } else {
- firmware_path[0] = '\0';
- strcat(firmware_path, prefix);
- strcat(firmware_path, name);
- result = request_firmware(&fw, firmware_path, &pcidev->dev);
- if (result == 0) {
- if (!cb)
- result = -EINVAL;
- else
- result = cb(dev, fw->data, fw->size);
- release_firmware(fw);
- }
- kfree(firmware_path);
- }
- return result;
-}
-
static struct poll_delay_t poll_delay_min_max(int min, int max)
{
struct poll_delay_t result;
@@ -362,8 +325,9 @@ static int read_idm_word(const u8 *data, size_t size, int *pos,
return result;
}
-static int jr3_download_firmware(struct comedi_device *dev, const u8 *data,
- size_t size)
+static int jr3_download_firmware(struct comedi_device *dev,
+ const u8 *data, size_t size,
+ unsigned long context)
{
/*
* IDM file format is:
@@ -768,7 +732,9 @@ static int jr3_pci_auto_attach(struct comedi_device *dev,
/* Reset DSP card */
writel(0, &devpriv->iobase->channel[0].reset);
- result = comedi_load_firmware(dev, "jr3pci.idm", jr3_download_firmware);
+ result = comedi_load_firmware(dev, &comedi_to_pci_dev(dev)->dev,
+ "comedi/jr3pci.idm",
+ jr3_download_firmware, 0);
dev_dbg(dev->class_dev, "Firmare load %d\n", result);
if (result < 0)
@@ -778,8 +744,9 @@ static int jr3_pci_auto_attach(struct comedi_device *dev,
* format:
* model serial Fx Fy Fz Mx My Mz\n
*
- * comedi_load_firmware(dev, "jr3_offsets_table",
- * jr3_download_firmware);
+ * comedi_load_firmware(dev, &comedi_to_pci_dev(dev)->dev,
+ * "comedi/jr3_offsets_table",
+ * jr3_download_firmware, 1);
*/
/*
diff --git a/drivers/staging/comedi/drivers/ke_counter.c b/drivers/staging/comedi/drivers/ke_counter.c
index e0e64752e310..f10cf10e5fe3 100644
--- a/drivers/staging/comedi/drivers/ke_counter.c
+++ b/drivers/staging/comedi/drivers/ke_counter.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: ke_counter
diff --git a/drivers/staging/comedi/drivers/me4000.c b/drivers/staging/comedi/drivers/me4000.c
index 641e693d5d0e..c2308fd24d6a 100644
--- a/drivers/staging/comedi/drivers/me4000.c
+++ b/drivers/staging/comedi/drivers/me4000.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: me4000
diff --git a/drivers/staging/comedi/drivers/me_daq.c b/drivers/staging/comedi/drivers/me_daq.c
index 09f2a9feaf7c..7533ece3670e 100644
--- a/drivers/staging/comedi/drivers/me_daq.c
+++ b/drivers/staging/comedi/drivers/me_daq.c
@@ -14,10 +14,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
@@ -37,7 +33,6 @@
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
-#include <linux/firmware.h>
#include "../comedidev.h"
@@ -391,7 +386,8 @@ static int me_ao_insn_read(struct comedi_device *dev,
}
static int me2600_xilinx_download(struct comedi_device *dev,
- const u8 *data, size_t size)
+ const u8 *data, size_t size,
+ unsigned long context)
{
struct me_private_data *dev_private = dev->private;
unsigned int value;
@@ -460,22 +456,6 @@ static int me2600_xilinx_download(struct comedi_device *dev,
return 0;
}
-static int me2600_upload_firmware(struct comedi_device *dev)
-{
- struct pci_dev *pcidev = comedi_to_pci_dev(dev);
- const struct firmware *fw;
- int ret;
-
- ret = request_firmware(&fw, ME2600_FIRMWARE, &pcidev->dev);
- if (ret)
- return ret;
-
- ret = me2600_xilinx_download(dev, fw->data, fw->size);
- release_firmware(fw);
-
- return ret;
-}
-
static int me_reset(struct comedi_device *dev)
{
struct me_private_data *dev_private = dev->private;
@@ -529,7 +509,9 @@ static int me_auto_attach(struct comedi_device *dev,
/* Download firmware and reset card */
if (board->needs_firmware) {
- ret = me2600_upload_firmware(dev);
+ ret = comedi_load_firmware(dev, &comedi_to_pci_dev(dev)->dev,
+ ME2600_FIRMWARE,
+ me2600_xilinx_download, 0);
if (ret < 0)
return ret;
}
diff --git a/drivers/staging/comedi/drivers/mite.c b/drivers/staging/comedi/drivers/mite.c
index 523c6564ffca..12c34db61d63 100644
--- a/drivers/staging/comedi/drivers/mite.c
+++ b/drivers/staging/comedi/drivers/mite.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
diff --git a/drivers/staging/comedi/drivers/mite.h b/drivers/staging/comedi/drivers/mite.h
index 255b8ba9c917..d4487e888e64 100644
--- a/drivers/staging/comedi/drivers/mite.h
+++ b/drivers/staging/comedi/drivers/mite.h
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#ifndef _MITE_H_
diff --git a/drivers/staging/comedi/drivers/mpc624.c b/drivers/staging/comedi/drivers/mpc624.c
index 4717be4ad268..713842ad6ff6 100644
--- a/drivers/staging/comedi/drivers/mpc624.c
+++ b/drivers/staging/comedi/drivers/mpc624.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: mpc624
diff --git a/drivers/staging/comedi/drivers/multiq3.c b/drivers/staging/comedi/drivers/multiq3.c
index 7a8292086e18..5ecd1b1666fb 100644
--- a/drivers/staging/comedi/drivers/multiq3.c
+++ b/drivers/staging/comedi/drivers/multiq3.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: multiq3
diff --git a/drivers/staging/comedi/drivers/ni_6527.c b/drivers/staging/comedi/drivers/ni_6527.c
index d10f777b7f17..903c2ef5dd9a 100644
--- a/drivers/staging/comedi/drivers/ni_6527.c
+++ b/drivers/staging/comedi/drivers/ni_6527.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: ni_6527
diff --git a/drivers/staging/comedi/drivers/ni_65xx.c b/drivers/staging/comedi/drivers/ni_65xx.c
index 3f71f0f54d3c..42a78de47316 100644
--- a/drivers/staging/comedi/drivers/ni_65xx.c
+++ b/drivers/staging/comedi/drivers/ni_65xx.c
@@ -17,11 +17,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: ni_65xx
@@ -286,15 +281,6 @@ static inline struct ni_65xx_subdevice_private *sprivate(struct comedi_subdevice
return subdev->private;
}
-static struct ni_65xx_subdevice_private *ni_65xx_alloc_subdevice_private(void)
-{
- struct ni_65xx_subdevice_private *subdev_private =
- kzalloc(sizeof(struct ni_65xx_subdevice_private), GFP_KERNEL);
- if (subdev_private == NULL)
- return NULL;
- return subdev_private;
-}
-
static int ni_65xx_config_filter(struct comedi_device *dev,
struct comedi_subdevice *s,
struct comedi_insn *insn, unsigned int *data)
@@ -589,6 +575,7 @@ static int ni_65xx_auto_attach(struct comedi_device *dev,
struct pci_dev *pcidev = comedi_to_pci_dev(dev);
const struct ni_65xx_board *board = NULL;
struct ni_65xx_private *devpriv;
+ struct ni_65xx_subdevice_private *spriv;
struct comedi_subdevice *s;
unsigned i;
int ret;
@@ -637,10 +624,10 @@ static int ni_65xx_auto_attach(struct comedi_device *dev,
s->maxdata = 1;
s->insn_config = ni_65xx_dio_insn_config;
s->insn_bits = ni_65xx_dio_insn_bits;
- s->private = ni_65xx_alloc_subdevice_private();
- if (s->private == NULL)
+ spriv = comedi_alloc_spriv(s, sizeof(*spriv));
+ if (!spriv)
return -ENOMEM;
- sprivate(s)->base_port = 0;
+ spriv->base_port = 0;
} else {
s->type = COMEDI_SUBD_UNUSED;
}
@@ -654,10 +641,10 @@ static int ni_65xx_auto_attach(struct comedi_device *dev,
s->range_table = &range_digital;
s->maxdata = 1;
s->insn_bits = ni_65xx_dio_insn_bits;
- s->private = ni_65xx_alloc_subdevice_private();
- if (s->private == NULL)
+ spriv = comedi_alloc_spriv(s, sizeof(*spriv));
+ if (!spriv)
return -ENOMEM;
- sprivate(s)->base_port = board->num_di_ports;
+ spriv->base_port = board->num_di_ports;
} else {
s->type = COMEDI_SUBD_UNUSED;
}
@@ -672,10 +659,10 @@ static int ni_65xx_auto_attach(struct comedi_device *dev,
s->maxdata = 1;
s->insn_config = ni_65xx_dio_insn_config;
s->insn_bits = ni_65xx_dio_insn_bits;
- s->private = ni_65xx_alloc_subdevice_private();
- if (s->private == NULL)
+ spriv = comedi_alloc_spriv(s, sizeof(*spriv));
+ if (!spriv)
return -ENOMEM;
- sprivate(s)->base_port = 0;
+ spriv->base_port = 0;
for (i = 0; i < board->num_dio_ports; ++i) {
/* configure all ports for input */
writeb(0x1,
@@ -730,7 +717,6 @@ static int ni_65xx_auto_attach(struct comedi_device *dev,
static void ni_65xx_detach(struct comedi_device *dev)
{
struct ni_65xx_private *devpriv = dev->private;
- int i;
if (devpriv && devpriv->mite && devpriv->mite->daq_io_addr) {
writeb(0x00,
@@ -739,8 +725,6 @@ static void ni_65xx_detach(struct comedi_device *dev)
}
if (dev->irq)
free_irq(dev->irq, dev);
- for (i = 0; i < dev->n_subdevices; ++i)
- comedi_spriv_free(dev, i);
if (devpriv) {
if (devpriv->mite) {
mite_unsetup(devpriv->mite);
diff --git a/drivers/staging/comedi/drivers/ni_660x.c b/drivers/staging/comedi/drivers/ni_660x.c
index 5cdda7fe97a7..a9e000461ec7 100644
--- a/drivers/staging/comedi/drivers/ni_660x.c
+++ b/drivers/staging/comedi/drivers/ni_660x.c
@@ -11,10 +11,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
diff --git a/drivers/staging/comedi/drivers/ni_670x.c b/drivers/staging/comedi/drivers/ni_670x.c
index 42ab6dbf9d39..1a185b9c529f 100644
--- a/drivers/staging/comedi/drivers/ni_670x.c
+++ b/drivers/staging/comedi/drivers/ni_670x.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: ni_670x
diff --git a/drivers/staging/comedi/drivers/ni_at_a2150.c b/drivers/staging/comedi/drivers/ni_at_a2150.c
index 2d375168f36d..7ea5aa32e9d2 100644
--- a/drivers/staging/comedi/drivers/ni_at_a2150.c
+++ b/drivers/staging/comedi/drivers/ni_at_a2150.c
@@ -15,12 +15,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************
*/
/*
Driver: ni_at_a2150
diff --git a/drivers/staging/comedi/drivers/ni_at_ao.c b/drivers/staging/comedi/drivers/ni_at_ao.c
index 7e5783a4f4e7..e080053c697b 100644
--- a/drivers/staging/comedi/drivers/ni_at_ao.c
+++ b/drivers/staging/comedi/drivers/ni_at_ao.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: ni_at_ao
diff --git a/drivers/staging/comedi/drivers/ni_atmio.c b/drivers/staging/comedi/drivers/ni_atmio.c
index 4ced7ba119b0..713edd55a91b 100644
--- a/drivers/staging/comedi/drivers/ni_atmio.c
+++ b/drivers/staging/comedi/drivers/ni_atmio.c
@@ -14,10 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
Driver: ni_atmio
diff --git a/drivers/staging/comedi/drivers/ni_atmio16d.c b/drivers/staging/comedi/drivers/ni_atmio16d.c
index 6c97a0925aad..da7396f94297 100644
--- a/drivers/staging/comedi/drivers/ni_atmio16d.c
+++ b/drivers/staging/comedi/drivers/ni_atmio16d.c
@@ -12,11 +12,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: ni_atmio16d
@@ -767,7 +762,6 @@ static int atmio16d_attach(struct comedi_device *dev,
static void atmio16d_detach(struct comedi_device *dev)
{
- comedi_spriv_free(dev, 3);
reset_atmio16d(dev);
comedi_legacy_detach(dev);
}
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index d067ef70e194..3c50e31ecc60 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -15,11 +15,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
*/
/*
@@ -40,7 +35,7 @@ port, bit 0; channel 8 corresponds to the input port, bit 0.
Digital direction configuration: channels 0-7 output, 8-15 input (8225 device
emu as port A output, port B input, port C N/A).
-Analog: The input range is 0 to 4095 for -10 to +10 volts
+Analog: The input range is 0 to 4095 for -10 to +10 volts
IRQ is assigned but not used.
Version 0.1 Original DIO only driver
@@ -183,7 +178,7 @@ static int daq700_ai_rinsn(struct comedi_device *dev,
*/
static void daq700_ai_config(struct comedi_device *dev,
struct comedi_subdevice *s)
-{
+{
unsigned long iobase = dev->iobase;
outb(0x80, iobase + CMD_R1); /* disable scanning, ADC to chan 0 */
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index 9b7805fda932..d3d4eb9356a7 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -18,12 +18,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************
*/
/*
Driver: ni_daq_dio24
@@ -71,17 +65,11 @@ static int dio24_auto_attach(struct comedi_device *dev,
return 0;
}
-static void dio24_detach(struct comedi_device *dev)
-{
- comedi_spriv_free(dev, 0);
- comedi_pcmcia_disable(dev);
-}
-
static struct comedi_driver driver_dio24 = {
.driver_name = "ni_daq_dio24",
.module = THIS_MODULE,
.auto_attach = dio24_auto_attach,
- .detach = dio24_detach,
+ .detach = comedi_pcmcia_disable,
};
static int dio24_cs_attach(struct pcmcia_device *link)
diff --git a/drivers/staging/comedi/drivers/ni_labpc.c b/drivers/staging/comedi/drivers/ni_labpc.c
index 77a7bb632580..f161e70b3a0d 100644
--- a/drivers/staging/comedi/drivers/ni_labpc.c
+++ b/drivers/staging/comedi/drivers/ni_labpc.c
@@ -12,10 +12,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
@@ -88,7 +84,7 @@
#define CMD1_REG 0x00 /* W: Command 1 reg */
#define CMD1_MA(x) (((x) & 0x7) << 0)
#define CMD1_TWOSCMP (1 << 3)
-#define CMD1_GAIN_MASK (7 << 4)
+#define CMD1_GAIN(x) (((x) & 0x7) << 4)
#define CMD1_SCANEN (1 << 7)
#define CMD2_REG 0x01 /* W: Command 2 reg */
#define CMD2_PRETRIG (1 << 0)
@@ -153,11 +149,6 @@ enum scan_mode {
MODE_MULT_CHAN_DOWN,
};
-static const int labpc_plus_ai_gain_bits[] = {
- 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
- 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
-};
-
static const struct comedi_lrange range_labpc_plus_ai = {
16, {
BIP_RANGE(5),
@@ -179,13 +170,7 @@ static const struct comedi_lrange range_labpc_plus_ai = {
}
};
-const int labpc_1200_ai_gain_bits[] = {
- 0x00, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
- 0x00, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
-};
-EXPORT_SYMBOL_GPL(labpc_1200_ai_gain_bits);
-
-const struct comedi_lrange range_labpc_1200_ai = {
+static const struct comedi_lrange range_labpc_1200_ai = {
14, {
BIP_RANGE(5),
BIP_RANGE(2.5),
@@ -203,7 +188,6 @@ const struct comedi_lrange range_labpc_1200_ai = {
UNI_RANGE(0.1)
}
};
-EXPORT_SYMBOL_GPL(range_labpc_1200_ai);
static const struct comedi_lrange range_labpc_ao = {
2, {
@@ -239,25 +223,18 @@ static const struct labpc_boardinfo labpc_boards[] = {
{
.name = "lab-pc-1200",
.ai_speed = 10000,
- .register_layout = labpc_1200_layout,
- .has_ao = 1,
- .ai_range_table = &range_labpc_1200_ai,
- .ai_range_code = labpc_1200_ai_gain_bits,
.ai_scan_up = 1,
+ .has_ao = 1,
+ .is_labpc1200 = 1,
}, {
.name = "lab-pc-1200ai",
.ai_speed = 10000,
- .register_layout = labpc_1200_layout,
- .ai_range_table = &range_labpc_1200_ai,
- .ai_range_code = labpc_1200_ai_gain_bits,
.ai_scan_up = 1,
+ .is_labpc1200 = 1,
}, {
.name = "lab-pc+",
.ai_speed = 12000,
- .register_layout = labpc_plus_layout,
.has_ao = 1,
- .ai_range_table = &range_labpc_plus_ai,
- .ai_range_code = labpc_plus_ai_gain_bits,
},
};
#endif
@@ -326,12 +303,21 @@ static void labpc_ai_set_chan_and_gain(struct comedi_device *dev,
const struct labpc_boardinfo *board = comedi_board(dev);
struct labpc_private *devpriv = dev->private;
+ if (board->is_labpc1200) {
+ /*
+ * The LabPC-1200 boards do not have a gain
+ * of '0x10'. Skip the range values that would
+ * result in this gain.
+ */
+ range += (range > 0) + (range > 7);
+ }
+
/* munge channel bits for differential/scan disabled mode */
if ((mode == MODE_SINGLE_CHAN || mode == MODE_SINGLE_CHAN_INTERVAL) &&
aref == AREF_DIFF)
chan *= 2;
devpriv->cmd1 = CMD1_MA(chan);
- devpriv->cmd1 |= board->ai_range_code[range];
+ devpriv->cmd1 |= CMD1_GAIN(range);
devpriv->write_byte(devpriv->cmd1, dev->iobase + CMD1_REG);
}
@@ -347,7 +333,7 @@ static void labpc_setup_cmd6_reg(struct comedi_device *dev,
const struct labpc_boardinfo *board = comedi_board(dev);
struct labpc_private *devpriv = dev->private;
- if (board->register_layout != labpc_1200_layout)
+ if (!board->is_labpc1200)
return;
/* reference inputs to ground or common? */
@@ -759,7 +745,7 @@ static int labpc_ai_cmdtest(struct comedi_device *dev,
err |= cfc_check_trigger_src(&cmd->scan_end_src, TRIG_COUNT);
stop_mask = TRIG_COUNT | TRIG_NONE;
- if (board->register_layout == labpc_1200_layout)
+ if (board->is_labpc1200)
stop_mask |= TRIG_EXT;
err |= cfc_check_trigger_src(&cmd->stop_src, stop_mask);
@@ -895,7 +881,7 @@ static int labpc_ai_cmd(struct comedi_device *dev, struct comedi_subdevice *s)
/* pc-plus has no fifo-half full interrupt */
} else
#endif
- if (board->register_layout == labpc_1200_layout &&
+ if (board->is_labpc1200 &&
/* wake-end-of-scan should interrupt on fifo not empty */
(cmd->flags & TRIG_WAKE_EOS) == 0 &&
/* make sure we are taking more than just a few points */
@@ -1175,7 +1161,7 @@ static irqreturn_t labpc_interrupt(int irq, void *d)
/* read board status */
devpriv->stat1 = devpriv->read_byte(dev->iobase + STAT1_REG);
- if (board->register_layout == labpc_1200_layout)
+ if (board->is_labpc1200)
devpriv->stat2 = devpriv->read_byte(dev->iobase + STAT2_REG);
if ((devpriv->stat1 & (STAT1_GATA0 | STAT1_CNTINT | STAT1_OVERFLOW |
@@ -1201,8 +1187,7 @@ static irqreturn_t labpc_interrupt(int irq, void *d)
* has occurred
*/
if (devpriv->stat1 & STAT1_GATA0 ||
- (board->register_layout == labpc_1200_layout
- && devpriv->stat2 & STAT2_OUTA1)) {
+ (board->is_labpc1200 && devpriv->stat2 & STAT2_OUTA1)) {
handle_isa_dma(dev);
}
} else
@@ -1266,7 +1251,7 @@ static int labpc_ao_insn_write(struct comedi_device *dev,
spin_unlock_irqrestore(&dev->spinlock, flags);
/* set range */
- if (board->register_layout == labpc_1200_layout) {
+ if (board->is_labpc1200) {
range = CR_RANGE(insn->chanspec);
if (labpc_range_is_unipolar(s, range))
devpriv->cmd6 |= CMD6_DACUNI(channel);
@@ -1603,7 +1588,7 @@ int labpc_common_attach(struct comedi_device *dev,
devpriv->write_byte(devpriv->cmd2, dev->iobase + CMD2_REG);
devpriv->write_byte(devpriv->cmd3, dev->iobase + CMD3_REG);
devpriv->write_byte(devpriv->cmd4, dev->iobase + CMD4_REG);
- if (board->register_layout == labpc_1200_layout) {
+ if (board->is_labpc1200) {
devpriv->write_byte(devpriv->cmd5, dev->iobase + CMD5_REG);
devpriv->write_byte(devpriv->cmd6, dev->iobase + CMD6_REG);
}
@@ -1626,7 +1611,8 @@ int labpc_common_attach(struct comedi_device *dev,
s->n_chan = 8;
s->len_chanlist = 8;
s->maxdata = 0x0fff;
- s->range_table = board->ai_range_table;
+ s->range_table = board->is_labpc1200
+ ? &range_labpc_1200_ai : &range_labpc_plus_ai;
s->insn_read = labpc_ai_insn_read;
if (dev->irq) {
dev->read_subdev = s;
@@ -1671,7 +1657,7 @@ int labpc_common_attach(struct comedi_device *dev,
/* calibration subdevices for boards that have one */
s = &dev->subdevices[3];
- if (board->register_layout == labpc_1200_layout) {
+ if (board->is_labpc1200) {
s->type = COMEDI_SUBD_CALIB;
s->subdev_flags = SDF_READABLE | SDF_WRITABLE | SDF_INTERNAL;
s->n_chan = 16;
@@ -1686,7 +1672,7 @@ int labpc_common_attach(struct comedi_device *dev,
/* EEPROM */
s = &dev->subdevices[4];
- if (board->register_layout == labpc_1200_layout) {
+ if (board->is_labpc1200) {
s->type = COMEDI_SUBD_MEMORY;
s->subdev_flags = SDF_READABLE | SDF_WRITABLE | SDF_INTERNAL;
s->n_chan = EEPROM_SIZE;
@@ -1703,12 +1689,6 @@ int labpc_common_attach(struct comedi_device *dev,
}
EXPORT_SYMBOL_GPL(labpc_common_attach);
-void labpc_common_detach(struct comedi_device *dev)
-{
- comedi_spriv_free(dev, 2);
-}
-EXPORT_SYMBOL_GPL(labpc_common_detach);
-
#if IS_ENABLED(CONFIG_COMEDI_NI_LABPC_ISA)
static int labpc_attach(struct comedi_device *dev, struct comedi_devconfig *it)
{
@@ -1761,8 +1741,6 @@ static void labpc_detach(struct comedi_device *dev)
{
struct labpc_private *devpriv = dev->private;
- labpc_common_detach(dev);
-
if (devpriv) {
kfree(devpriv->dma_buffer);
if (devpriv->dma_chan)
diff --git a/drivers/staging/comedi/drivers/ni_labpc.h b/drivers/staging/comedi/drivers/ni_labpc.h
index 4b691f5a9965..486589fa6fd8 100644
--- a/drivers/staging/comedi/drivers/ni_labpc.h
+++ b/drivers/staging/comedi/drivers/ni_labpc.h
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#ifndef _NI_LABPC_H
@@ -27,27 +22,17 @@
#define EEPROM_SIZE 256 /* 256 byte eeprom */
#define NUM_AO_CHAN 2 /* boards have two analog output channels */
-enum labpc_register_layout { labpc_plus_layout, labpc_1200_layout };
enum transfer_type { fifo_not_empty_transfer, fifo_half_full_transfer,
isa_dma_transfer
};
struct labpc_boardinfo {
const char *name;
- int device_id; /* device id for pci and pcmcia boards */
- int ai_speed; /* maximum input speed in nanoseconds */
-
- /* 1200 has extra registers compared to pc+ */
- enum labpc_register_layout register_layout;
- int has_ao; /* has analog output true/false */
- const struct comedi_lrange *ai_range_table;
- const int *ai_range_code;
-
- /* board can auto scan up in ai channels, not just down */
- unsigned ai_scan_up:1;
-
- /* uses memory mapped io instead of ioports */
- unsigned has_mmio:1;
+ int ai_speed; /* maximum input speed in ns */
+ unsigned ai_scan_up:1; /* can auto scan up in ai channels */
+ unsigned has_ao:1; /* has analog outputs */
+ unsigned is_labpc1200:1; /* has extra regs compared to pc+ */
+ unsigned has_mmio:1; /* uses memory mapped io */
};
struct labpc_private {
@@ -101,9 +86,5 @@ struct labpc_private {
int labpc_common_attach(struct comedi_device *dev,
unsigned int irq, unsigned long isr_flags);
-void labpc_common_detach(struct comedi_device *dev);
-
-extern const int labpc_1200_ai_gain_bits[];
-extern const struct comedi_lrange range_labpc_1200_ai;
#endif /* _NI_LABPC_H */
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index 9e3737c6918d..ce67f4bbb1f5 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -18,12 +18,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-************************************************************************
*/
/*
Driver: ni_labpc_cs
@@ -76,12 +70,9 @@ NI manuals:
static const struct labpc_boardinfo labpc_cs_boards[] = {
{
.name = "daqcard-1200",
- .device_id = 0x103,
.ai_speed = 10000,
- .register_layout = labpc_1200_layout,
.has_ao = 1,
- .ai_range_table = &range_labpc_1200_ai,
- .ai_range_code = labpc_1200_ai_gain_bits,
+ .is_labpc1200 = 1,
},
};
@@ -113,17 +104,11 @@ static int labpc_auto_attach(struct comedi_device *dev,
return labpc_common_attach(dev, link->irq, IRQF_SHARED);
}
-static void labpc_detach(struct comedi_device *dev)
-{
- labpc_common_detach(dev);
- comedi_pcmcia_disable(dev);
-}
-
static struct comedi_driver driver_labpc_cs = {
.driver_name = "ni_labpc_cs",
.module = THIS_MODULE,
.auto_attach = labpc_auto_attach,
- .detach = labpc_detach,
+ .detach = comedi_pcmcia_disable,
};
static int labpc_cs_attach(struct pcmcia_device *link)
diff --git a/drivers/staging/comedi/drivers/ni_labpc_pci.c b/drivers/staging/comedi/drivers/ni_labpc_pci.c
index 8e916f86ccea..6c79237b2b5c 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_pci.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_pci.c
@@ -12,10 +12,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
@@ -50,11 +46,9 @@ static const struct labpc_boardinfo labpc_pci_boards[] = {
[BOARD_NI_PCI1200] = {
.name = "ni_pci-1200",
.ai_speed = 10000,
- .register_layout = labpc_1200_layout,
- .has_ao = 1,
- .ai_range_table = &range_labpc_1200_ai,
- .ai_range_code = labpc_1200_ai_gain_bits,
.ai_scan_up = 1,
+ .has_ao = 1,
+ .is_labpc1200 = 1,
.has_mmio = 1,
},
};
@@ -98,8 +92,6 @@ static void labpc_pci_detach(struct comedi_device *dev)
{
struct labpc_private *devpriv = dev->private;
- labpc_common_detach(dev);
-
if (devpriv && devpriv->mite) {
mite_unsetup(devpriv->mite);
mite_free(devpriv->mite);
diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
index 8c5dee9b3b05..3e9f544e67fc 100644
--- a/drivers/staging/comedi/drivers/ni_mio_common.c
+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
@@ -15,11 +15,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
@@ -4077,7 +4072,6 @@ static void mio_common_detach(struct comedi_device *dev)
ni_gpct_device_destroy(devpriv->counter_dev);
}
}
- comedi_spriv_free(dev, NI_8255_DIO_SUBDEV);
}
static void init_ao_67xx(struct comedi_device *dev, struct comedi_subdevice *s)
diff --git a/drivers/staging/comedi/drivers/ni_mio_cs.c b/drivers/staging/comedi/drivers/ni_mio_cs.c
index 888be7b89d2d..f813f5763671 100644
--- a/drivers/staging/comedi/drivers/ni_mio_cs.c
+++ b/drivers/staging/comedi/drivers/ni_mio_cs.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: ni_mio_cs
diff --git a/drivers/staging/comedi/drivers/ni_pcidio.c b/drivers/staging/comedi/drivers/ni_pcidio.c
index b5f340c186ec..5b2f72e102e1 100644
--- a/drivers/staging/comedi/drivers/ni_pcidio.c
+++ b/drivers/staging/comedi/drivers/ni_pcidio.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: ni_pcidio
@@ -58,7 +53,6 @@ comedi_nonfree_firmware tarball available from http://www.comedi.org
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
-#include <linux/firmware.h>
#include "../comedidev.h"
@@ -971,11 +965,13 @@ static int ni_pcidio_change(struct comedi_device *dev,
return 0;
}
-static int pci_6534_load_fpga(struct comedi_device *dev, int fpga_index,
- const u8 *data, size_t data_len)
+static int pci_6534_load_fpga(struct comedi_device *dev,
+ const u8 *data, size_t data_len,
+ unsigned long context)
{
struct nidio96_private *devpriv = dev->private;
static const int timeout = 1000;
+ int fpga_index = context;
int i;
size_t j;
@@ -1033,7 +1029,7 @@ static int pci_6534_load_fpga(struct comedi_device *dev, int fpga_index,
static int pci_6534_reset_fpga(struct comedi_device *dev, int fpga_index)
{
- return pci_6534_load_fpga(dev, fpga_index, NULL, 0);
+ return pci_6534_load_fpga(dev, NULL, 0, fpga_index);
}
static int pci_6534_reset_fpgas(struct comedi_device *dev)
@@ -1067,13 +1063,12 @@ static void pci_6534_init_main_fpga(struct comedi_device *dev)
static int pci_6534_upload_firmware(struct comedi_device *dev)
{
struct nidio96_private *devpriv = dev->private;
- int ret;
- const struct firmware *fw;
static const char *const fw_file[3] = {
FW_PCI_6534_SCARAB_DI, /* loaded into scarab A for DI */
FW_PCI_6534_SCARAB_DO, /* loaded into scarab B for DO */
FW_PCI_6534_MAIN, /* loaded into main FPGA */
};
+ int ret;
int n;
ret = pci_6534_reset_fpgas(dev);
@@ -1081,14 +1076,11 @@ static int pci_6534_upload_firmware(struct comedi_device *dev)
return ret;
/* load main FPGA first, then the two scarabs */
for (n = 2; n >= 0; n--) {
- ret = request_firmware(&fw, fw_file[n],
- &devpriv->mite->pcidev->dev);
- if (ret == 0) {
- ret = pci_6534_load_fpga(dev, n, fw->data, fw->size);
- if (ret == 0 && n == 2)
- pci_6534_init_main_fpga(dev);
- release_firmware(fw);
- }
+ ret = comedi_load_firmware(dev, &devpriv->mite->pcidev->dev,
+ fw_file[n],
+ pci_6534_load_fpga, n);
+ if (ret == 0 && n == 2)
+ pci_6534_init_main_fpga(dev);
if (ret < 0)
break;
}
diff --git a/drivers/staging/comedi/drivers/ni_pcimio.c b/drivers/staging/comedi/drivers/ni_pcimio.c
index 634d02303aa0..35681ba1f369 100644
--- a/drivers/staging/comedi/drivers/ni_pcimio.c
+++ b/drivers/staging/comedi/drivers/ni_pcimio.c
@@ -14,10 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
Driver: ni_pcimio
diff --git a/drivers/staging/comedi/drivers/ni_stc.h b/drivers/staging/comedi/drivers/ni_stc.h
index 0a613c077608..11bf0aab82ea 100644
--- a/drivers/staging/comedi/drivers/ni_stc.h
+++ b/drivers/staging/comedi/drivers/ni_stc.h
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
diff --git a/drivers/staging/comedi/drivers/ni_tio.c b/drivers/staging/comedi/drivers/ni_tio.c
index 225287769dc1..f2cf76d15d78 100644
--- a/drivers/staging/comedi/drivers/ni_tio.c
+++ b/drivers/staging/comedi/drivers/ni_tio.c
@@ -13,10 +13,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
diff --git a/drivers/staging/comedi/drivers/ni_tio.h b/drivers/staging/comedi/drivers/ni_tio.h
index 8572996539fa..7e13697b3254 100644
--- a/drivers/staging/comedi/drivers/ni_tio.h
+++ b/drivers/staging/comedi/drivers/ni_tio.h
@@ -13,11 +13,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#ifndef _COMEDI_NI_TIO_H
diff --git a/drivers/staging/comedi/drivers/ni_tio_internal.h b/drivers/staging/comedi/drivers/ni_tio_internal.h
index 5e00212aa022..b009876754a8 100644
--- a/drivers/staging/comedi/drivers/ni_tio_internal.h
+++ b/drivers/staging/comedi/drivers/ni_tio_internal.h
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#ifndef _COMEDI_NI_TIO_INTERNAL_H
diff --git a/drivers/staging/comedi/drivers/ni_tiocmd.c b/drivers/staging/comedi/drivers/ni_tiocmd.c
index 13747f324936..cff50bc45bcd 100644
--- a/drivers/staging/comedi/drivers/ni_tiocmd.c
+++ b/drivers/staging/comedi/drivers/ni_tiocmd.c
@@ -13,10 +13,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
diff --git a/drivers/staging/comedi/drivers/pcl711.c b/drivers/staging/comedi/drivers/pcl711.c
index 8be2a4c503cc..7abf3f74144e 100644
--- a/drivers/staging/comedi/drivers/pcl711.c
+++ b/drivers/staging/comedi/drivers/pcl711.c
@@ -17,11 +17,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: pcl711
diff --git a/drivers/staging/comedi/drivers/pcl724.c b/drivers/staging/comedi/drivers/pcl724.c
index 4f033d88eeca..cea657c7801d 100644
--- a/drivers/staging/comedi/drivers/pcl724.c
+++ b/drivers/staging/comedi/drivers/pcl724.c
@@ -1,42 +1,28 @@
/*
- comedi/drivers/pcl724.c
-
- Michal Dobes <dobes@tesnet.cz>
-
- hardware driver for Advantech cards:
- card: PCL-724, PCL-722, PCL-731
- driver: pcl724, pcl722, pcl731
- and ADLink cards:
- card: ACL-7122, ACL-7124, PET-48DIO
- driver: acl7122, acl7124, pet48dio
-
- Options for PCL-724, PCL-731, ACL-7124 and PET-48DIO:
- [0] - IO Base
-
- Options for PCL-722 and ACL-7122:
- [0] - IO Base
- [1] - IRQ (0=disable IRQ) IRQ isn't supported at this time!
- [2] -number of DIO:
- 0, 144: 144 DIO configuration
- 1, 96: 96 DIO configuration
-*/
-/*
-Driver: pcl724
-Description: Advantech PCL-724, PCL-722, PCL-731 ADLink ACL-7122, ACL-7124,
- PET-48DIO
-Author: Michal Dobes <dobes@tesnet.cz>
-Devices: [Advantech] PCL-724 (pcl724), PCL-722 (pcl722), PCL-731 (pcl731),
- [ADLink] ACL-7122 (acl7122), ACL-7124 (acl7124), PET-48DIO (pet48dio)
-Status: untested
-
-This is driver for digital I/O boards PCL-722/724/731 with 144/24/48 DIO
-and for digital I/O boards ACL-7122/7124/PET-48DIO with 144/24/48 DIO.
-It need 8255.o for operations and only immediate mode is supported.
-See the source for configuration details.
-*/
+ * pcl724.c
+ * Comedi driver for 8255 based ISA DIO boards
+ *
+ * Michal Dobes <dobes@tesnet.cz>
+ */
+
/*
- * check_driver overrides:
- * struct comedi_insn
+ * Driver: pcl724
+ * Description: Comedi driver for 8255 based ISA DIO boards
+ * Devices: (Advantech) PCL-724 [pcl724]
+ * (Advantech) PCL-722 [pcl722]
+ * (Advantech) PCL-731 [pcl731]
+ * (ADLink) ACL-7122 [acl7122]
+ * (ADLink) ACL-7124 [acl7124]
+ * (ADLink) PET-48DIO [pet48dio]
+ * Author: Michal Dobes <dobes@tesnet.cz>
+ * Status: untested
+ *
+ * Configuration options:
+ * [0] - IO Base
+ * [1] - IRQ (not supported)
+ * [2] - number of DIO (pcl722 and acl7122 boards)
+ * 0, 144: 144 DIO configuration
+ * 1, 96: 96 DIO configuration
*/
#include "../comedidev.h"
@@ -46,40 +32,48 @@ See the source for configuration details.
#include "8255.h"
-#define PCL722_SIZE 32
-#define PCL722_96_SIZE 16
-#define PCL724_SIZE 4
-#define PCL731_SIZE 8
-#define PET48_SIZE 2
-
#define SIZE_8255 4
-/* #define PCL724_IRQ 1 no IRQ support now */
-
struct pcl724_board {
-
- const char *name; /* board name */
- int dio; /* num of DIO */
- int numofports; /* num of 8255 subdevices */
- unsigned int IRQbits; /* allowed interrupts */
- unsigned int io_range; /* len of IO space */
- char can_have96;
- char is_pet48;
+ const char *name;
+ unsigned int io_range;
+ unsigned int can_have96:1;
+ unsigned int is_pet48:1;
+ int numofports;
};
-static int subdev_8255_cb(int dir, int port, int data, unsigned long arg)
-{
- unsigned long iobase = arg;
-
- if (dir) {
- outb(data, iobase + port);
- return 0;
- } else {
- return inb(iobase + port);
- }
-}
+static const struct pcl724_board boardtypes[] = {
+ {
+ .name = "pcl724",
+ .io_range = 0x04,
+ .numofports = 1, /* 24 DIO channels */
+ }, {
+ .name = "pcl722",
+ .io_range = 0x20,
+ .can_have96 = 1,
+ .numofports = 6, /* 144 (or 96) DIO channels */
+ }, {
+ .name = "pcl731",
+ .io_range = 0x08,
+ .numofports = 2, /* 48 DIO channels */
+ }, {
+ .name = "acl7122",
+ .io_range = 0x20,
+ .can_have96 = 1,
+ .numofports = 6, /* 144 (or 96) DIO channels */
+ }, {
+ .name = "acl7124",
+ .io_range = 0x04,
+ .numofports = 1, /* 24 DIO channels */
+ }, {
+ .name = "pet48dio",
+ .io_range = 0x02,
+ .is_pet48 = 1,
+ .numofports = 2, /* 48 DIO channels */
+ },
+};
-static int subdev_8255mapped_cb(int dir, int port, int data,
+static int pcl724_8255mapped_io(int dir, int port, int data,
unsigned long iobase)
{
int movport = SIZE_8255 * (iobase >> 12);
@@ -96,57 +90,30 @@ static int subdev_8255mapped_cb(int dir, int port, int data,
}
}
-static int pcl724_attach(struct comedi_device *dev, struct comedi_devconfig *it)
+static int pcl724_attach(struct comedi_device *dev,
+ struct comedi_devconfig *it)
{
const struct pcl724_board *board = comedi_board(dev);
struct comedi_subdevice *s;
+ unsigned long iobase;
unsigned int iorange;
- int ret, i, n_subdevices;
-#ifdef PCL724_IRQ
- unsigned int irq;
-#endif
+ int n_subdevices;
+ int ret;
+ int i;
iorange = board->io_range;
- if ((board->can_have96) &&
- ((it->options[1] == 1) || (it->options[1] == 96)))
- iorange = PCL722_96_SIZE; /* PCL-724 in 96 DIO configuration */
- ret = comedi_request_region(dev, it->options[0], iorange);
- if (ret)
- return ret;
+ n_subdevices = board->numofports;
-#ifdef PCL724_IRQ
- irq = 0;
- if (board->IRQbits != 0) { /* board support IRQ */
- irq = it->options[1];
- if (irq) { /* we want to use IRQ */
- if (((1 << irq) & board->IRQbits) == 0) {
- printk(KERN_WARNING
- ", IRQ %u is out of allowed range, "
- "DISABLING IT", irq);
- irq = 0; /* Bad IRQ */
- } else {
- if (request_irq(irq, interrupt_pcl724, 0,
- dev->board_name, dev)) {
- printk(KERN_WARNING
- ", unable to allocate IRQ %u, "
- "DISABLING IT", irq);
- irq = 0; /* Can't use IRQ */
- } else {
- printk(", irq=%u", irq);
- }
- }
- }
+ /* Handle PCL-724 in 96 DIO configuration */
+ if (board->can_have96 &&
+ (it->options[2] == 1 || it->options[2] == 96)) {
+ iorange = 0x10;
+ n_subdevices = 4;
}
- dev->irq = irq;
-#endif
-
- printk("\n");
-
- n_subdevices = board->numofports;
- if ((board->can_have96) && ((it->options[1] == 1)
- || (it->options[1] == 96)))
- n_subdevices = 4; /* PCL-724 in 96 DIO configuration */
+ ret = comedi_request_region(dev, it->options[0], iorange);
+ if (ret)
+ return ret;
ret = comedi_alloc_subdevices(dev, n_subdevices);
if (ret)
@@ -155,41 +122,25 @@ static int pcl724_attach(struct comedi_device *dev, struct comedi_devconfig *it)
for (i = 0; i < dev->n_subdevices; i++) {
s = &dev->subdevices[i];
if (board->is_pet48) {
- subdev_8255_init(dev, s, subdev_8255mapped_cb,
- (unsigned long)(dev->iobase +
- i * 0x1000));
- } else
- subdev_8255_init(dev, s, subdev_8255_cb,
- (unsigned long)(dev->iobase +
- SIZE_8255 * i));
+ iobase = dev->iobase + (i * 0x1000);
+ ret = subdev_8255_init(dev, s, pcl724_8255mapped_io,
+ iobase);
+ } else {
+ iobase = dev->iobase + (i * SIZE_8255);
+ ret = subdev_8255_init(dev, s, NULL, iobase);
+ }
+ if (ret)
+ return ret;
}
return 0;
}
-static void pcl724_detach(struct comedi_device *dev)
-{
- int i;
-
- for (i = 0; i < dev->n_subdevices; i++)
- comedi_spriv_free(dev, i);
- comedi_legacy_detach(dev);
-}
-
-static const struct pcl724_board boardtypes[] = {
- { "pcl724", 24, 1, 0x00fc, PCL724_SIZE, 0, 0, },
- { "pcl722", 144, 6, 0x00fc, PCL722_SIZE, 1, 0, },
- { "pcl731", 48, 2, 0x9cfc, PCL731_SIZE, 0, 0, },
- { "acl7122", 144, 6, 0x9ee8, PCL722_SIZE, 1, 0, },
- { "acl7124", 24, 1, 0x00fc, PCL724_SIZE, 0, 0, },
- { "pet48dio", 48, 2, 0x9eb8, PET48_SIZE, 0, 1, },
-};
-
static struct comedi_driver pcl724_driver = {
.driver_name = "pcl724",
.module = THIS_MODULE,
.attach = pcl724_attach,
- .detach = pcl724_detach,
+ .detach = comedi_legacy_detach,
.board_name = &boardtypes[0].name,
.num_names = ARRAY_SIZE(boardtypes),
.offset = sizeof(struct pcl724_board),
@@ -197,5 +148,5 @@ static struct comedi_driver pcl724_driver = {
module_comedi_driver(pcl724_driver);
MODULE_AUTHOR("Comedi http://www.comedi.org");
-MODULE_DESCRIPTION("Comedi low-level driver");
+MODULE_DESCRIPTION("Comedi driver for 8255 based ISA DIO boards");
MODULE_LICENSE("GPL");
diff --git a/drivers/staging/comedi/drivers/pcl725.c b/drivers/staging/comedi/drivers/pcl725.c
deleted file mode 100644
index 6b02f0631b4c..000000000000
--- a/drivers/staging/comedi/drivers/pcl725.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * comedi/drivers/pcl725.c
- * Driver for PCL725 and clones
- * David A. Schleef
- */
-/*
-Driver: pcl725
-Description: Advantech PCL-725 (& compatibles)
-Author: ds
-Status: unknown
-Devices: [Advantech] PCL-725 (pcl725)
-*/
-
-#include "../comedidev.h"
-
-#include <linux/ioport.h>
-
-#define PCL725_SIZE 2
-
-#define PCL725_DO 0
-#define PCL725_DI 1
-
-static int pcl725_do_insn(struct comedi_device *dev, struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
-{
- if (data[0]) {
- s->state &= ~data[0];
- s->state |= (data[0] & data[1]);
- outb(s->state, dev->iobase + PCL725_DO);
- }
-
- data[1] = s->state;
-
- return insn->n;
-}
-
-static int pcl725_di_insn(struct comedi_device *dev, struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
-{
- data[1] = inb(dev->iobase + PCL725_DI);
-
- return insn->n;
-}
-
-static int pcl725_attach(struct comedi_device *dev, struct comedi_devconfig *it)
-{
- struct comedi_subdevice *s;
- int ret;
-
- ret = comedi_request_region(dev, it->options[0], PCL725_SIZE);
- if (ret)
- return ret;
-
- ret = comedi_alloc_subdevices(dev, 2);
- if (ret)
- return ret;
-
- s = &dev->subdevices[0];
- /* do */
- s->type = COMEDI_SUBD_DO;
- s->subdev_flags = SDF_WRITABLE;
- s->maxdata = 1;
- s->n_chan = 8;
- s->insn_bits = pcl725_do_insn;
- s->range_table = &range_digital;
-
- s = &dev->subdevices[1];
- /* di */
- s->type = COMEDI_SUBD_DI;
- s->subdev_flags = SDF_READABLE;
- s->maxdata = 1;
- s->n_chan = 8;
- s->insn_bits = pcl725_di_insn;
- s->range_table = &range_digital;
-
- printk(KERN_INFO "\n");
-
- return 0;
-}
-
-static struct comedi_driver pcl725_driver = {
- .driver_name = "pcl725",
- .module = THIS_MODULE,
- .attach = pcl725_attach,
- .detach = comedi_legacy_detach,
-};
-module_comedi_driver(pcl725_driver);
-
-MODULE_AUTHOR("Comedi http://www.comedi.org");
-MODULE_DESCRIPTION("Comedi low-level driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/comedi/drivers/pcl726.c b/drivers/staging/comedi/drivers/pcl726.c
index 4aa994393fae..893f012a1b7a 100644
--- a/drivers/staging/comedi/drivers/pcl726.c
+++ b/drivers/staging/comedi/drivers/pcl726.c
@@ -20,11 +20,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: pcl726
diff --git a/drivers/staging/comedi/drivers/pcl730.c b/drivers/staging/comedi/drivers/pcl730.c
index 2879db75da3e..862e75fd68fd 100644
--- a/drivers/staging/comedi/drivers/pcl730.c
+++ b/drivers/staging/comedi/drivers/pcl730.c
@@ -3,135 +3,299 @@
* Driver for Advantech PCL-730 and clones
* José Luis Sánchez
*/
-/*
-Driver: pcl730
-Description: Advantech PCL-730 (& compatibles)
-Author: José Luis Sánchez (jsanchezv@teleline.es)
-Status: untested
-Devices: [Advantech] PCL-730 (pcl730), [ICP] ISO-730 (iso730),
- [Adlink] ACL-7130 (acl7130)
-Interrupts are not supported.
-The ACL-7130 card have an 8254 timer/counter not supported by this driver.
-*/
+/*
+ * Driver: pcl730
+ * Description: Advantech PCL-730 (& compatibles)
+ * Devices: (Advantech) PCL-730 [pcl730]
+ * (ICP) ISO-730 [iso730]
+ * (Adlink) ACL-7130 [acl7130]
+ * (Advantech) PCM-3730 [pcm3730]
+ * (Advantech) PCL-725 [pcl725]
+ * (ICP) P8R8-DIO [p16r16dio]
+ * (Adlink) ACL-7225b [acl7225b]
+ * (ICP) P16R16-DIO [p16r16dio]
+ * (Advantech) PCL-733 [pcl733]
+ * (Advantech) PCL-734 [pcl734]
+ * Author: José Luis Sánchez (jsanchezv@teleline.es)
+ * Status: untested
+ *
+ * Configuration options:
+ * [0] - I/O port base
+ *
+ * Interrupts are not supported.
+ * The ACL-7130 card has an 8254 timer/counter not supported by this driver.
+ */
#include "../comedidev.h"
#include <linux/ioport.h>
-#define PCL730_SIZE 4
-#define ACL7130_SIZE 8
-#define PCL730_IDIO_LO 0 /* Isolated Digital I/O low byte (ID0-ID7) */
-#define PCL730_IDIO_HI 1 /* Isolated Digital I/O high byte (ID8-ID15) */
-#define PCL730_DIO_LO 2 /* TTL Digital I/O low byte (D0-D7) */
-#define PCL730_DIO_HI 3 /* TTL Digital I/O high byte (D8-D15) */
+/*
+ * Register map
+ *
+ * The register map varies slightly depending on the board type but
+ * all registers are 8-bit.
+ *
+ * The boardinfo 'io_range' is used to allow comedi to request the
+ * proper range required by the board.
+ *
+ * The comedi_subdevice 'private' data is used to pass the register
+ * offset to the (*insn_bits) functions to read/write the correct
+ * registers.
+ *
+ * The basic register mapping looks like this:
+ *
+ * BASE+0 Isolated outputs 0-7 (write) / inputs 0-7 (read)
+ * BASE+1 Isolated outputs 8-15 (write) / inputs 8-15 (read)
+ * BASE+2 TTL outputs 0-7 (write) / inputs 0-7 (read)
+ * BASE+3 TTL outputs 8-15 (write) / inputs 8-15 (read)
+ *
+ * The pcm3730 board does not have register BASE+1.
+ *
+ * The pcl725 and p8r8dio only have registers BASE+0 and BASE+1:
+ *
+ * BASE+0 Isolated outputs 0-7 (write) (read back on p8r8dio)
+ * BASE+1 Isolated inputs 0-7 (read)
+ *
+ * The acl7225b and p16r16dio boards have this register mapping:
+ *
+ * BASE+0 Isolated outputs 0-7 (write) (read back)
+ * BASE+1 Isolated outputs 8-15 (write) (read back)
+ * BASE+2 Isolated inputs 0-7 (read)
+ * BASE+3 Isolated inputs 8-15 (read)
+ *
+ * The pcl733 and pcl733 boards have this register mapping:
+ *
+ * BASE+0 Isolated outputs 0-7 (write) or inputs 0-7 (read)
+ * BASE+1 Isolated outputs 8-15 (write) or inputs 8-15 (read)
+ * BASE+2 Isolated outputs 16-23 (write) or inputs 16-23 (read)
+ * BASE+3 Isolated outputs 24-31 (write) or inputs 24-31 (read)
+ */
struct pcl730_board {
+ const char *name;
+ unsigned int io_range;
+ unsigned is_pcl725:1;
+ unsigned is_acl7225b:1;
+ unsigned has_readback:1;
+ unsigned has_ttl_io:1;
+ int n_subdevs;
+ int n_iso_out_chan;
+ int n_iso_in_chan;
+ int n_ttl_chan;
+};
- const char *name; /* board name */
- unsigned int io_range; /* len of I/O space */
+static const struct pcl730_board pcl730_boards[] = {
+ {
+ .name = "pcl730",
+ .io_range = 0x04,
+ .has_ttl_io = 1,
+ .n_subdevs = 4,
+ .n_iso_out_chan = 16,
+ .n_iso_in_chan = 16,
+ .n_ttl_chan = 16,
+ }, {
+ .name = "iso730",
+ .io_range = 0x04,
+ .n_subdevs = 4,
+ .n_iso_out_chan = 16,
+ .n_iso_in_chan = 16,
+ .n_ttl_chan = 16,
+ }, {
+ .name = "acl7130",
+ .io_range = 0x08,
+ .has_ttl_io = 1,
+ .n_subdevs = 4,
+ .n_iso_out_chan = 16,
+ .n_iso_in_chan = 16,
+ .n_ttl_chan = 16,
+ }, {
+ .name = "pcm3730",
+ .io_range = 0x04,
+ .has_ttl_io = 1,
+ .n_subdevs = 4,
+ .n_iso_out_chan = 8,
+ .n_iso_in_chan = 8,
+ .n_ttl_chan = 16,
+ }, {
+ .name = "pcl725",
+ .io_range = 0x02,
+ .is_pcl725 = 1,
+ .n_subdevs = 2,
+ .n_iso_out_chan = 8,
+ .n_iso_in_chan = 8,
+ }, {
+ .name = "p8r8dio",
+ .io_range = 0x02,
+ .is_pcl725 = 1,
+ .has_readback = 1,
+ .n_subdevs = 2,
+ .n_iso_out_chan = 8,
+ .n_iso_in_chan = 8,
+ }, {
+ .name = "acl7225b",
+ .io_range = 0x08, /* only 4 are used */
+ .is_acl7225b = 1,
+ .has_readback = 1,
+ .n_subdevs = 2,
+ .n_iso_out_chan = 16,
+ .n_iso_in_chan = 16,
+ }, {
+ .name = "p16r16dio",
+ .io_range = 0x04,
+ .is_acl7225b = 1,
+ .has_readback = 1,
+ .n_subdevs = 2,
+ .n_iso_out_chan = 16,
+ .n_iso_in_chan = 16,
+ }, {
+ .name = "pcl733",
+ .io_range = 0x04,
+ .n_subdevs = 1,
+ .n_iso_in_chan = 32,
+ }, {
+ .name = "pcl734",
+ .io_range = 0x04,
+ .n_subdevs = 1,
+ .n_iso_out_chan = 32,
+ },
};
-static int pcl730_do_insn(struct comedi_device *dev, struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
+static int pcl730_do_insn_bits(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
{
- if (data[0]) {
- s->state &= ~data[0];
- s->state |= (data[0] & data[1]);
+ unsigned long reg = (unsigned long)s->private;
+ unsigned int mask = data[0];
+ unsigned int bits = data[1];
+
+ if (mask) {
+ s->state &= ~mask;
+ s->state |= (bits & mask);
+
+ if (mask & 0x00ff)
+ outb(s->state & 0xff, dev->iobase + reg);
+ if ((mask & 0xff00) && (s->n_chan > 8))
+ outb((s->state >> 8) & 0xff, dev->iobase + reg + 1);
+ if ((mask & 0xff0000) && (s->n_chan > 16))
+ outb((s->state >> 16) & 0xff, dev->iobase + reg + 2);
+ if ((mask & 0xff000000) && (s->n_chan > 24))
+ outb((s->state >> 24) & 0xff, dev->iobase + reg + 3);
}
- if (data[0] & 0x00ff)
- outb(s->state & 0xff,
- dev->iobase + ((unsigned long)s->private));
- if (data[0] & 0xff00)
- outb((s->state >> 8),
- dev->iobase + ((unsigned long)s->private) + 1);
data[1] = s->state;
return insn->n;
}
-static int pcl730_di_insn(struct comedi_device *dev, struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
+static unsigned int pcl730_get_bits(struct comedi_device *dev,
+ struct comedi_subdevice *s)
{
- data[1] = inb(dev->iobase + ((unsigned long)s->private)) |
- (inb(dev->iobase + ((unsigned long)s->private) + 1) << 8);
+ unsigned long reg = (unsigned long)s->private;
+ unsigned int val;
+
+ val = inb(dev->iobase + reg);
+ if (s->n_chan > 8)
+ val |= (inb(dev->iobase + reg + 1) << 8);
+ if (s->n_chan > 16)
+ val |= (inb(dev->iobase + reg + 2) << 16);
+ if (s->n_chan > 24)
+ val |= (inb(dev->iobase + reg + 3) << 24);
+
+ return val;
+}
+
+static int pcl730_di_insn_bits(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
+{
+ data[1] = pcl730_get_bits(dev, s);
return insn->n;
}
-static int pcl730_attach(struct comedi_device *dev, struct comedi_devconfig *it)
+static int pcl730_attach(struct comedi_device *dev,
+ struct comedi_devconfig *it)
{
const struct pcl730_board *board = comedi_board(dev);
struct comedi_subdevice *s;
+ int subdev;
int ret;
ret = comedi_request_region(dev, it->options[0], board->io_range);
if (ret)
return ret;
- ret = comedi_alloc_subdevices(dev, 4);
+ ret = comedi_alloc_subdevices(dev, board->n_subdevs);
if (ret)
return ret;
- s = &dev->subdevices[0];
- /* Isolated do */
- s->type = COMEDI_SUBD_DO;
- s->subdev_flags = SDF_WRITABLE;
- s->maxdata = 1;
- s->n_chan = 16;
- s->insn_bits = pcl730_do_insn;
- s->range_table = &range_digital;
- s->private = (void *)PCL730_IDIO_LO;
-
- s = &dev->subdevices[1];
- /* Isolated di */
- s->type = COMEDI_SUBD_DI;
- s->subdev_flags = SDF_READABLE;
- s->maxdata = 1;
- s->n_chan = 16;
- s->insn_bits = pcl730_di_insn;
- s->range_table = &range_digital;
- s->private = (void *)PCL730_IDIO_LO;
-
- s = &dev->subdevices[2];
- /* TTL do */
- s->type = COMEDI_SUBD_DO;
- s->subdev_flags = SDF_WRITABLE;
- s->maxdata = 1;
- s->n_chan = 16;
- s->insn_bits = pcl730_do_insn;
- s->range_table = &range_digital;
- s->private = (void *)PCL730_DIO_LO;
-
- s = &dev->subdevices[3];
- /* TTL di */
- s->type = COMEDI_SUBD_DI;
- s->subdev_flags = SDF_READABLE;
- s->maxdata = 1;
- s->n_chan = 16;
- s->insn_bits = pcl730_di_insn;
- s->range_table = &range_digital;
- s->private = (void *)PCL730_DIO_LO;
-
- printk(KERN_INFO "\n");
+ subdev = 0;
+
+ if (board->n_iso_out_chan) {
+ /* Isolated Digital Outputs */
+ s = &dev->subdevices[subdev++];
+ s->type = COMEDI_SUBD_DO;
+ s->subdev_flags = SDF_WRITABLE;
+ s->n_chan = board->n_iso_out_chan;
+ s->maxdata = 1;
+ s->range_table = &range_digital;
+ s->insn_bits = pcl730_do_insn_bits;
+ s->private = (void *)0;
+
+ /* get the initial state if supported */
+ if (board->has_readback)
+ s->state = pcl730_get_bits(dev, s);
+ }
+
+ if (board->n_iso_in_chan) {
+ /* Isolated Digital Inputs */
+ s = &dev->subdevices[subdev++];
+ s->type = COMEDI_SUBD_DI;
+ s->subdev_flags = SDF_READABLE;
+ s->n_chan = board->n_iso_in_chan;
+ s->maxdata = 1;
+ s->range_table = &range_digital;
+ s->insn_bits = pcl730_di_insn_bits;
+ s->private = board->is_acl7225b ? (void *)2 :
+ board->is_pcl725 ? (void *)1 : (void *)0;
+ }
+
+ if (board->has_ttl_io) {
+ /* TTL Digital Outputs */
+ s = &dev->subdevices[subdev++];
+ s->type = COMEDI_SUBD_DO;
+ s->subdev_flags = SDF_WRITABLE;
+ s->n_chan = board->n_ttl_chan;
+ s->maxdata = 1;
+ s->range_table = &range_digital;
+ s->insn_bits = pcl730_do_insn_bits;
+ s->private = (void *)2;
+
+ /* TTL Digital Inputs */
+ s = &dev->subdevices[subdev++];
+ s->type = COMEDI_SUBD_DI;
+ s->subdev_flags = SDF_READABLE;
+ s->n_chan = board->n_ttl_chan;
+ s->maxdata = 1;
+ s->range_table = &range_digital;
+ s->insn_bits = pcl730_di_insn_bits;
+ s->private = (void *)2;
+ }
return 0;
}
-static const struct pcl730_board boardtypes[] = {
- { "pcl730", PCL730_SIZE, },
- { "iso730", PCL730_SIZE, },
- { "acl7130", ACL7130_SIZE, },
-};
-
static struct comedi_driver pcl730_driver = {
.driver_name = "pcl730",
.module = THIS_MODULE,
.attach = pcl730_attach,
.detach = comedi_legacy_detach,
- .board_name = &boardtypes[0].name,
- .num_names = ARRAY_SIZE(boardtypes),
+ .board_name = &pcl730_boards[0].name,
+ .num_names = ARRAY_SIZE(pcl730_boards),
.offset = sizeof(struct pcl730_board),
};
module_comedi_driver(pcl730_driver);
diff --git a/drivers/staging/comedi/drivers/pcm3724.c b/drivers/staging/comedi/drivers/pcm3724.c
index 4ef0df30b07a..5a9cd38e15f2 100644
--- a/drivers/staging/comedi/drivers/pcm3724.c
+++ b/drivers/staging/comedi/drivers/pcm3724.c
@@ -250,20 +250,11 @@ static int pcm3724_attach(struct comedi_device *dev,
return 0;
}
-static void pcm3724_detach(struct comedi_device *dev)
-{
- int i;
-
- for (i = 0; i < dev->n_subdevices; i++)
- comedi_spriv_free(dev, i);
- comedi_legacy_detach(dev);
-}
-
static struct comedi_driver pcm3724_driver = {
.driver_name = "pcm3724",
.module = THIS_MODULE,
.attach = pcm3724_attach,
- .detach = pcm3724_detach,
+ .detach = comedi_legacy_detach,
};
module_comedi_driver(pcm3724_driver);
diff --git a/drivers/staging/comedi/drivers/pcm3730.c b/drivers/staging/comedi/drivers/pcm3730.c
deleted file mode 100644
index 3a3ce2c769a2..000000000000
--- a/drivers/staging/comedi/drivers/pcm3730.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * comedi/drivers/pcm3730.c
- * Driver for PCM3730 and clones
- * Blaine Lee
- * from pcl725 by David S.
- */
-/*
-Driver: pcm3730
-Description: PCM3730
-Author: Blaine Lee
-Devices: [Advantech] PCM-3730 (pcm3730)
-Status: unknown
-
-Configuration options:
- [0] - I/O port base
-*/
-
-#include "../comedidev.h"
-
-#include <linux/ioport.h>
-
-#define PCM3730_SIZE 4 /* consecutive io port addresses */
-
-#define PCM3730_DOA 0 /* offsets for each port */
-#define PCM3730_DOB 2
-#define PCM3730_DOC 3
-#define PCM3730_DIA 0
-#define PCM3730_DIB 2
-#define PCM3730_DIC 3
-
-static int pcm3730_do_insn_bits(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
-{
- if (data[0]) {
- s->state &= ~data[0];
- s->state |= (data[0] & data[1]);
- outb(s->state, dev->iobase + (unsigned long)(s->private));
- }
- data[1] = s->state;
-
- return insn->n;
-}
-
-static int pcm3730_di_insn_bits(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
-{
- data[1] = inb(dev->iobase + (unsigned long)(s->private));
- return insn->n;
-}
-
-static int pcm3730_attach(struct comedi_device *dev,
- struct comedi_devconfig *it)
-{
- struct comedi_subdevice *s;
- int ret;
-
- ret = comedi_request_region(dev, it->options[0], PCM3730_SIZE);
- if (ret)
- return ret;
-
- ret = comedi_alloc_subdevices(dev, 6);
- if (ret)
- return ret;
-
- s = &dev->subdevices[0];
- s->type = COMEDI_SUBD_DO;
- s->subdev_flags = SDF_WRITABLE;
- s->maxdata = 1;
- s->n_chan = 8;
- s->insn_bits = pcm3730_do_insn_bits;
- s->range_table = &range_digital;
- s->private = (void *)PCM3730_DOA;
-
- s = &dev->subdevices[1];
- s->type = COMEDI_SUBD_DO;
- s->subdev_flags = SDF_WRITABLE;
- s->maxdata = 1;
- s->n_chan = 8;
- s->insn_bits = pcm3730_do_insn_bits;
- s->range_table = &range_digital;
- s->private = (void *)PCM3730_DOB;
-
- s = &dev->subdevices[2];
- s->type = COMEDI_SUBD_DO;
- s->subdev_flags = SDF_WRITABLE;
- s->maxdata = 1;
- s->n_chan = 8;
- s->insn_bits = pcm3730_do_insn_bits;
- s->range_table = &range_digital;
- s->private = (void *)PCM3730_DOC;
-
- s = &dev->subdevices[3];
- s->type = COMEDI_SUBD_DI;
- s->subdev_flags = SDF_READABLE;
- s->maxdata = 1;
- s->n_chan = 8;
- s->insn_bits = pcm3730_di_insn_bits;
- s->range_table = &range_digital;
- s->private = (void *)PCM3730_DIA;
-
- s = &dev->subdevices[4];
- s->type = COMEDI_SUBD_DI;
- s->subdev_flags = SDF_READABLE;
- s->maxdata = 1;
- s->n_chan = 8;
- s->insn_bits = pcm3730_di_insn_bits;
- s->range_table = &range_digital;
- s->private = (void *)PCM3730_DIB;
-
- s = &dev->subdevices[5];
- s->type = COMEDI_SUBD_DI;
- s->subdev_flags = SDF_READABLE;
- s->maxdata = 1;
- s->n_chan = 8;
- s->insn_bits = pcm3730_di_insn_bits;
- s->range_table = &range_digital;
- s->private = (void *)PCM3730_DIC;
-
- printk(KERN_INFO "\n");
-
- return 0;
-}
-
-static struct comedi_driver pcm3730_driver = {
- .driver_name = "pcm3730",
- .module = THIS_MODULE,
- .attach = pcm3730_attach,
- .detach = comedi_legacy_detach,
-};
-module_comedi_driver(pcm3730_driver);
-
-MODULE_AUTHOR("Comedi http://www.comedi.org");
-MODULE_DESCRIPTION("Comedi low-level driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/comedi/drivers/pcmad.c b/drivers/staging/comedi/drivers/pcmad.c
index b7c932e152e0..d5c728dc6192 100644
--- a/drivers/staging/comedi/drivers/pcmad.c
+++ b/drivers/staging/comedi/drivers/pcmad.c
@@ -1,52 +1,44 @@
/*
- comedi/drivers/pcmad.c
- Hardware driver for Winsystems PCM-A/D12 and PCM-A/D16
-
- COMEDI - Linux Control and Measurement Device Interface
- Copyright (C) 2000,2001 David A. Schleef <ds@schleef.org>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * pcmad.c
+ * Hardware driver for Winsystems PCM-A/D12 and PCM-A/D16
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 2000,2001 David A. Schleef <ds@schleef.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
-*/
/*
-Driver: pcmad
-Description: Winsystems PCM-A/D12, PCM-A/D16
-Author: ds
-Devices: [Winsystems] PCM-A/D12 (pcmad12), PCM-A/D16 (pcmad16)
-Status: untested
-
-This driver was written on a bet that I couldn't write a driver
-in less than 2 hours. I won the bet, but never got paid. =(
-
-Configuration options:
- [0] - I/O port base
- [1] - unused
- [2] - Analog input reference
- 0 = single ended
- 1 = differential
- [3] - Analog input encoding (must match jumpers)
- 0 = straight binary
- 1 = two's complement
-*/
-
-#include <linux/interrupt.h>
-#include "../comedidev.h"
-
-#include <linux/ioport.h>
+ * Driver: pcmad
+ * Description: Winsystems PCM-A/D12, PCM-A/D16
+ * Devices: (Winsystems) PCM-A/D12 [pcmad12]
+ * (Winsystems) PCM-A/D16 [pcmad16]
+ * Author: ds
+ * Status: untested
+ *
+ * This driver was written on a bet that I couldn't write a driver
+ * in less than 2 hours. I won the bet, but never got paid. =(
+ *
+ * Configuration options:
+ * [0] - I/O port base
+ * [1] - IRQ (unused)
+ * [2] - Analog input reference (must match jumpers)
+ * 0 = single-ended (16 channels)
+ * 1 = differential (8 channels)
+ * [3] - Analog input encoding (must match jumpers)
+ * 0 = straight binary (0-5V input range)
+ * 1 = two's complement (+-10V input range)
+ */
-#define PCMAD_SIZE 4
+#include "../comedidev.h"
#define PCMAD_STATUS 0
#define PCMAD_LSB 1
@@ -55,60 +47,82 @@ Configuration options:
struct pcmad_board_struct {
const char *name;
- int n_ai_bits;
+ unsigned int ai_maxdata;
};
-struct pcmad_priv_struct {
- int differential;
- int twos_comp;
+static const struct pcmad_board_struct pcmad_boards[] = {
+ {
+ .name = "pcmad12",
+ .ai_maxdata = 0x0fff,
+ }, {
+ .name = "pcmad16",
+ .ai_maxdata = 0xffff,
+ },
};
#define TIMEOUT 100
+static int pcmad_ai_wait_for_eoc(struct comedi_device *dev,
+ int timeout)
+{
+ int i;
+
+ for (i = 0; i < timeout; i++) {
+ if ((inb(dev->iobase + PCMAD_STATUS) & 0x3) == 0x3)
+ return 0;
+ }
+ return -ETIME;
+}
+
+static bool pcmad_range_is_bipolar(struct comedi_subdevice *s,
+ unsigned int range)
+{
+ return s->range_table->range[range].min < 0;
+}
+
static int pcmad_ai_insn_read(struct comedi_device *dev,
struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
+ struct comedi_insn *insn,
+ unsigned int *data)
{
- const struct pcmad_board_struct *board = comedi_board(dev);
- struct pcmad_priv_struct *devpriv = dev->private;
+ unsigned int chan = CR_CHAN(insn->chanspec);
+ unsigned int range = CR_RANGE(insn->chanspec);
+ unsigned int val;
+ int ret;
int i;
- int chan;
- int n;
- chan = CR_CHAN(insn->chanspec);
-
- for (n = 0; n < insn->n; n++) {
+ for (i = 0; i < insn->n; i++) {
outb(chan, dev->iobase + PCMAD_CONVERT);
- for (i = 0; i < TIMEOUT; i++) {
- if ((inb(dev->iobase + PCMAD_STATUS) & 0x3) == 0x3)
- break;
+ ret = pcmad_ai_wait_for_eoc(dev, TIMEOUT);
+ if (ret)
+ return ret;
+
+ val = inb(dev->iobase + PCMAD_LSB) |
+ (inb(dev->iobase + PCMAD_MSB) << 8);
+
+ /* data is shifted on the pcmad12, fix it */
+ if (s->maxdata == 0x0fff)
+ val >>= 4;
+
+ if (pcmad_range_is_bipolar(s, range)) {
+ /* munge the two's complement value */
+ val ^= ((s->maxdata + 1) >> 1);
}
- data[n] = inb(dev->iobase + PCMAD_LSB);
- data[n] |= (inb(dev->iobase + PCMAD_MSB) << 8);
- if (devpriv->twos_comp)
- data[n] ^= (1 << (board->n_ai_bits - 1));
+ data[i] = val;
}
- return n;
+ return insn->n;
}
-/*
- * options:
- * 0 i/o base
- * 1 unused
- * 2 0=single ended 1=differential
- * 3 0=straight binary 1=two's comp
- */
static int pcmad_attach(struct comedi_device *dev, struct comedi_devconfig *it)
{
const struct pcmad_board_struct *board = comedi_board(dev);
- struct pcmad_priv_struct *devpriv;
struct comedi_subdevice *s;
int ret;
- ret = comedi_request_region(dev, it->options[0], PCMAD_SIZE);
+ ret = comedi_request_region(dev, it->options[0], 0x04);
if (ret)
return ret;
@@ -116,32 +130,25 @@ static int pcmad_attach(struct comedi_device *dev, struct comedi_devconfig *it)
if (ret)
return ret;
- devpriv = kzalloc(sizeof(*devpriv), GFP_KERNEL);
- if (!devpriv)
- return -ENOMEM;
- dev->private = devpriv;
-
s = &dev->subdevices[0];
- s->type = COMEDI_SUBD_AI;
- s->subdev_flags = SDF_READABLE | AREF_GROUND;
- s->n_chan = 16; /* XXX */
- s->len_chanlist = 1;
- s->insn_read = pcmad_ai_insn_read;
- s->maxdata = (1 << board->n_ai_bits) - 1;
- s->range_table = &range_unknown;
+ s->type = COMEDI_SUBD_AI;
+ if (it->options[1]) {
+ /* 8 differential channels */
+ s->subdev_flags = SDF_READABLE | AREF_DIFF;
+ s->n_chan = 8;
+ } else {
+ /* 16 single-ended channels */
+ s->subdev_flags = SDF_READABLE | AREF_GROUND;
+ s->n_chan = 16;
+ }
+ s->len_chanlist = 1;
+ s->maxdata = board->ai_maxdata;
+ s->range_table = it->options[2] ? &range_bipolar10 : &range_unipolar5;
+ s->insn_read = pcmad_ai_insn_read;
return 0;
}
-static const struct pcmad_board_struct pcmad_boards[] = {
- {
- .name = "pcmad12",
- .n_ai_bits = 12,
- }, {
- .name = "pcmad16",
- .n_ai_bits = 16,
- },
-};
static struct comedi_driver pcmad_driver = {
.driver_name = "pcmad",
.module = THIS_MODULE,
diff --git a/drivers/staging/comedi/drivers/pcmda12.c b/drivers/staging/comedi/drivers/pcmda12.c
index 61e7fd14a1e8..774a63dfe040 100644
--- a/drivers/staging/comedi/drivers/pcmda12.c
+++ b/drivers/staging/comedi/drivers/pcmda12.c
@@ -1,152 +1,130 @@
/*
- comedi/drivers/pcmda12.c
- Driver for Winsystems PC-104 based PCM-D/A-12 8-channel AO board.
-
- COMEDI - Linux Control and Measurement Device Interface
- Copyright (C) 2006 Calin A. Culianu <calin@ajvar.org>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
+ * pcmda12.c
+ * Driver for Winsystems PC-104 based PCM-D/A-12 8-channel AO board.
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 2006 Calin A. Culianu <calin@ajvar.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
/*
-Driver: pcmda12
-Description: A driver for the Winsystems PCM-D/A-12
-Devices: [Winsystems] PCM-D/A-12 (pcmda12)
-Author: Calin Culianu <calin@ajvar.org>
-Updated: Fri, 13 Jan 2006 12:01:01 -0500
-Status: works
-
-A driver for the relatively straightforward-to-program PCM-D/A-12.
-This board doesn't support commands, and the only way to set its
-analog output range is to jumper the board. As such,
-comedi_data_write() ignores the range value specified.
-
-The board uses 16 consecutive I/O addresses starting at the I/O port
-base address. Each address corresponds to the LSB then MSB of a
-particular channel from 0-7.
-
-Note that the board is not ISA-PNP capable and thus
-needs the I/O port comedi_config parameter.
-
-Note that passing a nonzero value as the second config option will
-enable "simultaneous xfer" mode for this board, in which AO writes
-will not take effect until a subsequent read of any AO channel. This
-is so that one can speed up programming by preloading all AO registers
-with values before simultaneously setting them to take effect with one
-read command.
-
-Configuration Options:
- [0] - I/O port base address
- [1] - Do Simultaneous Xfer (see description)
-*/
+ * Driver: pcmda12
+ * Description: A driver for the Winsystems PCM-D/A-12
+ * Devices: (Winsystems) PCM-D/A-12 [pcmda12]
+ * Author: Calin Culianu <calin@ajvar.org>
+ * Updated: Fri, 13 Jan 2006 12:01:01 -0500
+ * Status: works
+ *
+ * A driver for the relatively straightforward-to-program PCM-D/A-12.
+ * This board doesn't support commands, and the only way to set its
+ * analog output range is to jumper the board. As such,
+ * comedi_data_write() ignores the range value specified.
+ *
+ * The board uses 16 consecutive I/O addresses starting at the I/O port
+ * base address. Each address corresponds to the LSB then MSB of a
+ * particular channel from 0-7.
+ *
+ * Note that the board is not ISA-PNP capable and thus needs the I/O
+ * port comedi_config parameter.
+ *
+ * Note that passing a nonzero value as the second config option will
+ * enable "simultaneous xfer" mode for this board, in which AO writes
+ * will not take effect until a subsequent read of any AO channel. This
+ * is so that one can speed up programming by preloading all AO registers
+ * with values before simultaneously setting them to take effect with one
+ * read command.
+ *
+ * Configuration Options:
+ * [0] - I/O port base address
+ * [1] - Do Simultaneous Xfer (see description)
+ */
#include "../comedidev.h"
-#define CHANS 8
-#define IOSIZE 16
-#define LSB(x) ((unsigned char)((x) & 0xff))
-#define MSB(x) ((unsigned char)((((unsigned short)(x))>>8) & 0xff))
-#define LSB_PORT(chan) (dev->iobase + (chan)*2)
-#define MSB_PORT(chan) (LSB_PORT(chan)+1)
-#define BITS 12
-
-/* note these have no effect and are merely here for reference..
- these are configured by jumpering the board! */
+/* AI range is not configurable, it's set by jumpers on the board */
static const struct comedi_lrange pcmda12_ranges = {
- 3,
- {
- UNI_RANGE(5), UNI_RANGE(10), BIP_RANGE(5)
- }
+ 3, {
+ UNI_RANGE(5),
+ UNI_RANGE(10),
+ BIP_RANGE(5)
+ }
};
struct pcmda12_private {
-
- unsigned int ao_readback[CHANS];
+ unsigned int ao_readback[8];
int simultaneous_xfer_mode;
};
-static void zero_chans(struct comedi_device *dev)
-{ /* sets up an
- ASIC chip to defaults */
- int i;
- for (i = 0; i < CHANS; ++i) {
-/* /\* do this as one instruction?? *\/ */
-/* outw(0, LSB_PORT(chan)); */
- outb(0, LSB_PORT(i));
- outb(0, MSB_PORT(i));
- }
- inb(LSB_PORT(0)); /* update chans. */
-}
-
-static int ao_winsn(struct comedi_device *dev, struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
+static int pcmda12_ao_insn_write(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
{
struct pcmda12_private *devpriv = dev->private;
+ unsigned int chan = CR_CHAN(insn->chanspec);
+ unsigned int val = devpriv->ao_readback[chan];
+ unsigned long ioreg = dev->iobase + (chan * 2);
int i;
- int chan = CR_CHAN(insn->chanspec);
- /* Writing a list of values to an AO channel is probably not
- * very useful, but that's how the interface is defined. */
for (i = 0; i < insn->n; ++i) {
-
-/* /\* do this as one instruction?? *\/ */
-/* outw(data[i], LSB_PORT(chan)); */
-
- /* Need to do this as two instructions due to 8-bit bus?? */
- /* first, load the low byte */
- outb(LSB(data[i]), LSB_PORT(chan));
- /* next, write the high byte */
- outb(MSB(data[i]), MSB_PORT(chan));
-
- /* save shadow register */
- devpriv->ao_readback[chan] = data[i];
-
+ val = data[i];
+ outb(val & 0xff, ioreg);
+ outb((val >> 8) & 0xff, ioreg + 1);
+
+ /*
+ * Initiate transfer if not in simultaneaous xfer
+ * mode by reading one of the AO registers.
+ */
if (!devpriv->simultaneous_xfer_mode)
- inb(LSB_PORT(chan));
+ inb(ioreg);
}
+ devpriv->ao_readback[chan] = val;
- /* return the number of samples written */
- return i;
+ return insn->n;
}
-/* AO subdevices should have a read insn as well as a write insn.
-
- Usually this means copying a value stored in devpriv->ao_readback.
- However, since this driver supports simultaneous xfer then sometimes
- this function actually accomplishes work.
-
- Simultaneaous xfer mode is accomplished by loading ALL the values
- you want for AO in all the channels, then READing off one of the AO
- registers to initiate the instantaneous simultaneous update of all
- DAC outputs, which makes all AO channels update simultaneously.
- This is useful for some control applications, I would imagine.
-*/
-static int ao_rinsn(struct comedi_device *dev, struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
+static int pcmda12_ao_insn_read(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
{
struct pcmda12_private *devpriv = dev->private;
+ unsigned int chan = CR_CHAN(insn->chanspec);
int i;
- int chan = CR_CHAN(insn->chanspec);
- for (i = 0; i < insn->n; i++) {
- if (devpriv->simultaneous_xfer_mode)
- inb(LSB_PORT(chan));
- /* read back shadow register */
+ /*
+ * Initiate simultaneaous xfer mode by reading one of the
+ * AO registers. All analog outputs will then be updated.
+ */
+ if (devpriv->simultaneous_xfer_mode)
+ inb(dev->iobase);
+
+ for (i = 0; i < insn->n; i++)
data[i] = devpriv->ao_readback[chan];
- }
- return i;
+ return insn->n;
+}
+
+static void pcmda12_ao_reset(struct comedi_device *dev,
+ struct comedi_subdevice *s)
+{
+ int i;
+
+ for (i = 0; i < s->n_chan; ++i) {
+ outb(0, dev->iobase + (i * 2));
+ outb(0, dev->iobase + (i * 2) + 1);
+ }
+ /* Initiate transfer by reading one of the AO registers. */
+ inb(dev->iobase);
}
static int pcmda12_attach(struct comedi_device *dev,
@@ -156,7 +134,7 @@ static int pcmda12_attach(struct comedi_device *dev,
struct comedi_subdevice *s;
int ret;
- ret = comedi_request_region(dev, it->options[0], IOSIZE);
+ ret = comedi_request_region(dev, it->options[0], 0x10);
if (ret)
return ret;
@@ -172,18 +150,17 @@ static int pcmda12_attach(struct comedi_device *dev,
return ret;
s = &dev->subdevices[0];
- s->private = NULL;
- s->maxdata = (0x1 << BITS) - 1;
- s->range_table = &pcmda12_ranges;
- s->type = COMEDI_SUBD_AO;
- s->subdev_flags = SDF_READABLE | SDF_WRITABLE;
- s->n_chan = CHANS;
- s->insn_write = &ao_winsn;
- s->insn_read = &ao_rinsn;
-
- zero_chans(dev); /* clear out all the registers, basically */
-
- return 1;
+ s->type = COMEDI_SUBD_AO;
+ s->subdev_flags = SDF_READABLE | SDF_WRITABLE;
+ s->n_chan = 8;
+ s->maxdata = 0x0fff;
+ s->range_table = &pcmda12_ranges;
+ s->insn_write = pcmda12_ao_insn_write;
+ s->insn_read = pcmda12_ao_insn_read;
+
+ pcmda12_ao_reset(dev, s);
+
+ return 0;
}
static struct comedi_driver pcmda12_driver = {
diff --git a/drivers/staging/comedi/drivers/pcmmio.c b/drivers/staging/comedi/drivers/pcmmio.c
index 5a236cd5b33d..9f76b1f59983 100644
--- a/drivers/staging/comedi/drivers/pcmmio.c
+++ b/drivers/staging/comedi/drivers/pcmmio.c
@@ -14,10 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
Driver: pcmmio
diff --git a/drivers/staging/comedi/drivers/pcmuio.c b/drivers/staging/comedi/drivers/pcmuio.c
index 0c98e26bbba1..c43b6334ceae 100644
--- a/drivers/staging/comedi/drivers/pcmuio.c
+++ b/drivers/staging/comedi/drivers/pcmuio.c
@@ -1,79 +1,77 @@
/*
- comedi/drivers/pcmuio.c
- Driver for Winsystems PC-104 based 48-channel and 96-channel DIO boards.
-
- COMEDI - Linux Control and Measurement Device Interface
- Copyright (C) 2006 Calin A. Culianu <calin@ajvar.org>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
+ * pcmuio.c
+ * Comedi driver for Winsystems PC-104 based 48/96-channel DIO boards.
+ *
+ * COMEDI - Linux Control and Measurement Device Interface
+ * Copyright (C) 2006 Calin A. Culianu <calin@ajvar.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
/*
-Driver: pcmuio
-Description: A driver for the PCM-UIO48A and PCM-UIO96A boards from Winsystems.
-Devices: [Winsystems] PCM-UIO48A (pcmuio48), PCM-UIO96A (pcmuio96)
-Author: Calin Culianu <calin@ajvar.org>
-Updated: Fri, 13 Jan 2006 12:01:01 -0500
-Status: works
-
-A driver for the relatively straightforward-to-program PCM-UIO48A and
-PCM-UIO96A boards from Winsystems. These boards use either one or two
-(in the 96-DIO version) WS16C48 ASIC HighDensity I/O Chips (HDIO).
-This chip is interesting in that each I/O line is individually
-programmable for INPUT or OUTPUT (thus comedi_dio_config can be done
-on a per-channel basis). Also, each chip supports edge-triggered
-interrupts for the first 24 I/O lines. Of course, since the
-96-channel version of the board has two ASICs, it can detect polarity
-changes on up to 48 I/O lines. Since this is essentially an (non-PnP)
-ISA board, I/O Address and IRQ selection are done through jumpers on
-the board. You need to pass that information to this driver as the
-first and second comedi_config option, respectively. Note that the
-48-channel version uses 16 bytes of IO memory and the 96-channel
-version uses 32-bytes (in case you are worried about conflicts). The
-48-channel board is split into two 24-channel comedi subdevices.
-The 96-channel board is split into 4 24-channel DIO subdevices.
-
-Note that IRQ support has been added, but it is untested.
-
-To use edge-detection IRQ support, pass the IRQs of both ASICS
-(for the 96 channel version) or just 1 ASIC (for 48-channel version).
-Then, use use comedi_commands with TRIG_NOW.
-Your callback will be called each time an edge is triggered, and the data
-values will be two sample_t's, which should be concatenated to form one
-32-bit unsigned int. This value is the mask of channels that had
-edges detected from your channel list. Note that the bits positions
-in the mask correspond to positions in your chanlist when you specified
-the command and *not* channel id's!
-
-To set the polarity of the edge-detection interrupts pass a nonzero value for
-either CR_RANGE or CR_AREF for edge-up polarity, or a zero value for both
-CR_RANGE and CR_AREF if you want edge-down polarity.
-
-In the 48-channel version:
-
-On subdev 0, the first 24 channels channels are edge-detect channels.
-
-In the 96-channel board you have the collowing channels that can do edge detection:
-
-subdev 0, channels 0-24 (first 24 channels of 1st ASIC)
-subdev 2, channels 0-24 (first 24 channels of 2nd ASIC)
-
-Configuration Options:
- [0] - I/O port base address
- [1] - IRQ (for first ASIC, or first 24 channels)
- [2] - IRQ for second ASIC (pcmuio96 only - IRQ for chans 48-72 .. can be the same as first irq!)
-*/
+ * Driver: pcmuio
+ * Description: Winsystems PC-104 based 48/96-channel DIO boards.
+ * Devices: (Winsystems) PCM-UIO48A [pcmuio48]
+ * (Winsystems) PCM-UIO96A [pcmuio96]
+ * Author: Calin Culianu <calin@ajvar.org>
+ * Updated: Fri, 13 Jan 2006 12:01:01 -0500
+ * Status: works
+ *
+ * A driver for the relatively straightforward-to-program PCM-UIO48A and
+ * PCM-UIO96A boards from Winsystems. These boards use either one or two
+ * (in the 96-DIO version) WS16C48 ASIC HighDensity I/O Chips (HDIO). This
+ * chip is interesting in that each I/O line is individually programmable
+ * for INPUT or OUTPUT (thus comedi_dio_config can be done on a per-channel
+ * basis). Also, each chip supports edge-triggered interrupts for the first
+ * 24 I/O lines. Of course, since the 96-channel version of the board has
+ * two ASICs, it can detect polarity changes on up to 48 I/O lines. Since
+ * this is essentially an (non-PnP) ISA board, I/O Address and IRQ selection
+ * are done through jumpers on the board. You need to pass that information
+ * to this driver as the first and second comedi_config option, respectively.
+ * Note that the 48-channel version uses 16 bytes of IO memory and the 96-
+ * channel version uses 32-bytes (in case you are worried about conflicts).
+ * The 48-channel board is split into two 24-channel comedi subdevices. The
+ * 96-channel board is split into 4 24-channel DIO subdevices.
+ *
+ * Note that IRQ support has been added, but it is untested.
+ *
+ * To use edge-detection IRQ support, pass the IRQs of both ASICS (for the
+ * 96 channel version) or just 1 ASIC (for 48-channel version). Then, use
+ * comedi_commands with TRIG_NOW. Your callback will be called each time an
+ * edge is triggered, and the data values will be two sample_t's, which
+ * should be concatenated to form one 32-bit unsigned int. This value is
+ * the mask of channels that had edges detected from your channel list. Note
+ * that the bits positions in the mask correspond to positions in your
+ * chanlist when you specified the command and *not* channel id's!
+ *
+ * To set the polarity of the edge-detection interrupts pass a nonzero value
+ * for either CR_RANGE or CR_AREF for edge-up polarity, or a zero value for
+ * both CR_RANGE and CR_AREF if you want edge-down polarity.
+ *
+ * In the 48-channel version:
+ *
+ * On subdev 0, the first 24 channels channels are edge-detect channels.
+ *
+ * In the 96-channel board you have the following channels that can do edge
+ * detection:
+ *
+ * subdev 0, channels 0-24 (first 24 channels of 1st ASIC)
+ * subdev 2, channels 0-24 (first 24 channels of 2nd ASIC)
+ *
+ * Configuration Options:
+ * [0] - I/O port base address
+ * [1] - IRQ (for first ASIC, or first 24 channels)
+ * [2] - IRQ (for second ASIC, pcmuio96 only - IRQ for chans 48-72
+ * can be the same as first irq!)
+ */
#include <linux/interrupt.h>
#include <linux/slab.h>
@@ -82,94 +80,62 @@ Configuration Options:
#include "comedi_fc.h"
-#define CHANS_PER_PORT 8
-#define PORTS_PER_ASIC 6
-#define INTR_PORTS_PER_ASIC 3
-#define MAX_CHANS_PER_SUBDEV 24 /* number of channels per comedi subdevice */
-#define PORTS_PER_SUBDEV (MAX_CHANS_PER_SUBDEV/CHANS_PER_PORT)
-#define CHANS_PER_ASIC (CHANS_PER_PORT*PORTS_PER_ASIC)
-#define INTR_CHANS_PER_ASIC 24
-#define INTR_PORTS_PER_SUBDEV (INTR_CHANS_PER_ASIC/CHANS_PER_PORT)
-#define MAX_DIO_CHANS (PORTS_PER_ASIC*2*CHANS_PER_PORT)
-#define MAX_ASICS (MAX_DIO_CHANS/CHANS_PER_ASIC)
-#define CALC_N_SUBDEVS(nchans) ((nchans)/MAX_CHANS_PER_SUBDEV + (!!((nchans)%MAX_CHANS_PER_SUBDEV)) /*+ (nchans > INTR_CHANS_PER_ASIC ? 2 : 1)*/)
-/* IO Memory sizes */
-#define ASIC_IOSIZE (0x10)
-#define PCMUIO48_IOSIZE ASIC_IOSIZE
-#define PCMUIO96_IOSIZE (ASIC_IOSIZE*2)
-
-/* Some offsets - these are all in the 16byte IO memory offset from
- the base address. Note that there is a paging scheme to swap out
- offsets 0x8-0xA using the PAGELOCK register. See the table below.
-
- Register(s) Pages R/W? Description
- --------------------------------------------------------------
- REG_PORTx All R/W Read/Write/Configure IO
- REG_INT_PENDING All ReadOnly Quickly see which INT_IDx has int.
- REG_PAGELOCK All WriteOnly Select a page
- REG_POLx Pg. 1 only WriteOnly Select edge-detection polarity
- REG_ENABx Pg. 2 only WriteOnly Enable/Disable edge-detect. int.
- REG_INT_IDx Pg. 3 only R/W See which ports/bits have ints.
- */
-#define REG_PORT0 0x0
-#define REG_PORT1 0x1
-#define REG_PORT2 0x2
-#define REG_PORT3 0x3
-#define REG_PORT4 0x4
-#define REG_PORT5 0x5
-#define REG_INT_PENDING 0x6
-#define REG_PAGELOCK 0x7 /* page selector register, upper 2 bits select a page
- and bits 0-5 are used to 'lock down' a particular
- port above to make it readonly. */
-#define REG_POL0 0x8
-#define REG_POL1 0x9
-#define REG_POL2 0xA
-#define REG_ENAB0 0x8
-#define REG_ENAB1 0x9
-#define REG_ENAB2 0xA
-#define REG_INT_ID0 0x8
-#define REG_INT_ID1 0x9
-#define REG_INT_ID2 0xA
-
-#define NUM_PAGED_REGS 3
-#define NUM_PAGES 4
-#define FIRST_PAGED_REG 0x8
-#define REG_PAGE_BITOFFSET 6
-#define REG_LOCK_BITOFFSET 0
-#define REG_PAGE_MASK (~((0x1<<REG_PAGE_BITOFFSET)-1))
-#define REG_LOCK_MASK ~(REG_PAGE_MASK)
-#define PAGE_POL 1
-#define PAGE_ENAB 2
-#define PAGE_INT_ID 3
-
/*
- * Board descriptions for two imaginary boards. Describing the
- * boards in this way is optional, and completely driver-dependent.
- * Some drivers use arrays such as this, other do not.
+ * Register I/O map
+ *
+ * Offset Page 0 Page 1 Page 2 Page 3
+ * ------ ----------- ----------- ----------- -----------
+ * 0x00 Port 0 I/O Port 0 I/O Port 0 I/O Port 0 I/O
+ * 0x01 Port 1 I/O Port 1 I/O Port 1 I/O Port 1 I/O
+ * 0x02 Port 2 I/O Port 2 I/O Port 2 I/O Port 2 I/O
+ * 0x03 Port 3 I/O Port 3 I/O Port 3 I/O Port 3 I/O
+ * 0x04 Port 4 I/O Port 4 I/O Port 4 I/O Port 4 I/O
+ * 0x05 Port 5 I/O Port 5 I/O Port 5 I/O Port 5 I/O
+ * 0x06 INT_PENDING INT_PENDING INT_PENDING INT_PENDING
+ * 0x07 Page/Lock Page/Lock Page/Lock Page/Lock
+ * 0x08 N/A POL_0 ENAB_0 INT_ID0
+ * 0x09 N/A POL_1 ENAB_1 INT_ID1
+ * 0x0a N/A POL_2 ENAB_2 INT_ID2
*/
+#define PCMUIO_PORT_REG(x) (0x00 + (x))
+#define PCMUIO_INT_PENDING_REG 0x06
+#define PCMUIO_PAGE_LOCK_REG 0x07
+#define PCMUIO_LOCK_PORT(x) ((1 << (x)) & 0x3f)
+#define PCMUIO_PAGE(x) (((x) & 0x3) << 6)
+#define PCMUIO_PAGE_MASK PCMUIO_PAGE(3)
+#define PCMUIO_PAGE_POL 1
+#define PCMUIO_PAGE_ENAB 2
+#define PCMUIO_PAGE_INT_ID 3
+#define PCMUIO_PAGE_REG(x) (0x08 + (x))
+
+#define PCMUIO_ASIC_IOSIZE 0x10
+#define PCMUIO_MAX_ASICS 2
+
struct pcmuio_board {
const char *name;
const int num_asics;
- const int num_channels_per_port;
- const int num_ports;
};
-/* this structure is for data unique to this subdevice. */
-struct pcmuio_subdev_private {
- /* mapping of halfwords (bytes) in port/chanarray to iobase */
- unsigned long iobases[PORTS_PER_SUBDEV];
+static const struct pcmuio_board pcmuio_boards[] = {
+ {
+ .name = "pcmuio48",
+ .num_asics = 1,
+ }, {
+ .name = "pcmuio96",
+ .num_asics = 2,
+ },
+};
+struct pcmuio_subdev_private {
/* The below is only used for intr subdevices */
struct {
- int asic; /* if non-negative, this subdev has an interrupt asic */
- int first_chan; /* if nonnegative, the first channel id for
- interrupts. */
- int num_asic_chans; /* the number of asic channels in this subdev
- that have interrutps */
- int asic_chan; /* if nonnegative, the first channel id with
- respect to the asic that has interrupts */
- int enabled_mask; /* subdev-relative channel mask for channels
- we are interested in */
+ /* if non-negative, this subdev has an interrupt asic */
+ int asic;
+ /*
+ * subdev-relative channel mask for channels
+ * we are interested in
+ */
+ int enabled_mask;
int active;
int stop_count;
int continuous;
@@ -177,160 +143,112 @@ struct pcmuio_subdev_private {
} intr;
};
-/* this structure is for data unique to this hardware driver. If
- several hardware drivers keep similar information in this structure,
- feel free to suggest moving the variable to the struct comedi_device struct. */
struct pcmuio_private {
struct {
- unsigned char pagelock; /* current page and lock */
- unsigned char pol[NUM_PAGED_REGS]; /* shadow of POLx registers */
- unsigned char enab[NUM_PAGED_REGS]; /* shadow of ENABx registers */
- int num;
- unsigned long iobase;
unsigned int irq;
spinlock_t spinlock;
- } asics[MAX_ASICS];
+ } asics[PCMUIO_MAX_ASICS];
struct pcmuio_subdev_private *sprivs;
};
-#define subpriv ((struct pcmuio_subdev_private *)s->private)
+static void pcmuio_write(struct comedi_device *dev, unsigned int val,
+ int asic, int page, int port)
+{
+ unsigned long iobase = dev->iobase + (asic * PCMUIO_ASIC_IOSIZE);
+
+ if (page == 0) {
+ /* Port registers are valid for any page */
+ outb(val & 0xff, iobase + PCMUIO_PORT_REG(port + 0));
+ outb((val >> 8) & 0xff, iobase + PCMUIO_PORT_REG(port + 1));
+ outb((val >> 16) & 0xff, iobase + PCMUIO_PORT_REG(port + 2));
+ } else {
+ outb(PCMUIO_PAGE(page), iobase + PCMUIO_PAGE_LOCK_REG);
+ outb(val & 0xff, iobase + PCMUIO_PAGE_REG(0));
+ outb((val >> 8) & 0xff, iobase + PCMUIO_PAGE_REG(1));
+ outb((val >> 16) & 0xff, iobase + PCMUIO_PAGE_REG(2));
+ }
+}
+
+static unsigned int pcmuio_read(struct comedi_device *dev,
+ int asic, int page, int port)
+{
+ unsigned long iobase = dev->iobase + (asic * PCMUIO_ASIC_IOSIZE);
+ unsigned int val;
+
+ if (page == 0) {
+ /* Port registers are valid for any page */
+ val = inb(iobase + PCMUIO_PORT_REG(port + 0));
+ val |= (inb(iobase + PCMUIO_PORT_REG(port + 1)) << 8);
+ val |= (inb(iobase + PCMUIO_PORT_REG(port + 2)) << 16);
+ } else {
+ outb(PCMUIO_PAGE(page), iobase + PCMUIO_PAGE_LOCK_REG);
+ val = inb(iobase + PCMUIO_PAGE_REG(0));
+ val |= (inb(iobase + PCMUIO_PAGE_REG(1)) << 8);
+ val |= (inb(iobase + PCMUIO_PAGE_REG(2)) << 16);
+ }
-/* DIO devices are slightly special. Although it is possible to
- * implement the insn_read/insn_write interface, it is much more
- * useful to applications if you implement the insn_bits interface.
- * This allows packed reading/writing of the DIO channels. The
- * comedi core can convert between insn_bits and insn_read/write */
+ return val;
+}
+
+/*
+ * Each channel can be individually programmed for input or output.
+ * Writing a '0' to a channel causes the corresponding output pin
+ * to go to a high-z state (pulled high by an external 10K resistor).
+ * This allows it to be used as an input. When used in the input mode,
+ * a read reflects the inverted state of the I/O pin, such that a
+ * high on the pin will read as a '0' in the register. Writing a '1'
+ * to a bit position causes the pin to sink current (up to 12mA),
+ * effectively pulling it low.
+ */
static int pcmuio_dio_insn_bits(struct comedi_device *dev,
struct comedi_subdevice *s,
struct comedi_insn *insn, unsigned int *data)
{
- int byte_no;
-
- /* NOTE:
- reading a 0 means this channel was high
- writine a 0 sets the channel high
- reading a 1 means this channel was low
- writing a 1 means set this channel low
-
- Therefore everything is always inverted. */
-
- /* The insn data is a mask in data[0] and the new data
- * in data[1], each channel cooresponding to a bit. */
-
-#ifdef DAMMIT_ITS_BROKEN
- /* DEBUG */
- dev_dbg(dev->class_dev, "write mask: %08x data: %08x\n", data[0],
- data[1]);
-#endif
-
- s->state = 0;
-
- for (byte_no = 0; byte_no < s->n_chan / CHANS_PER_PORT; ++byte_no) {
- /* address of 8-bit port */
- unsigned long ioaddr = subpriv->iobases[byte_no],
- /* bit offset of port in 32-bit doubleword */
- offset = byte_no * 8;
- /* this 8-bit port's data */
- unsigned char byte = 0,
- /* The write mask for this port (if any) */
- write_mask_byte = (data[0] >> offset) & 0xff,
- /* The data byte for this port */
- data_byte = (data[1] >> offset) & 0xff;
-
- byte = inb(ioaddr); /* read all 8-bits for this port */
-
-#ifdef DAMMIT_ITS_BROKEN
- /* DEBUG */
- printk
- ("byte %d wmb %02x db %02x offset %02d io %04x, data_in %02x ",
- byte_no, (unsigned)write_mask_byte, (unsigned)data_byte,
- offset, ioaddr, (unsigned)byte);
-#endif
-
- if (write_mask_byte) {
- /* this byte has some write_bits -- so set the output lines */
- byte &= ~write_mask_byte; /* clear bits for write mask */
- byte |= ~data_byte & write_mask_byte; /* set to inverted data_byte */
- /* Write out the new digital output state */
- outb(byte, ioaddr);
- }
-#ifdef DAMMIT_ITS_BROKEN
- /* DEBUG */
- dev_dbg(dev->class_dev, "data_out_byte %02x\n", (unsigned)byte);
-#endif
- /* save the digital input lines for this byte.. */
- s->state |= ((unsigned int)byte) << offset;
- }
+ unsigned int mask = data[0] & s->io_bits; /* outputs only */
+ unsigned int bits = data[1];
+ int asic = s->index / 2;
+ int port = (s->index % 2) ? 3 : 0;
+ unsigned int val;
+
+ /* get inverted state of the channels from the port */
+ val = pcmuio_read(dev, asic, 0, port);
+
+ /* get the true state of the channels */
+ s->state = val ^ ((0x1 << s->n_chan) - 1);
- /* now return the DIO lines to data[1] - note they came inverted! */
- data[1] = ~s->state;
+ if (mask) {
+ s->state &= ~mask;
+ s->state |= (mask & bits);
-#ifdef DAMMIT_ITS_BROKEN
- /* DEBUG */
- dev_dbg(dev->class_dev, "s->state %08x data_out %08x\n", s->state,
- data[1]);
-#endif
+ /* invert the state and update the channels */
+ val = s->state ^ ((0x1 << s->n_chan) - 1);
+ pcmuio_write(dev, val, asic, 0, port);
+ }
+
+ data[1] = s->state;
return insn->n;
}
-/* The input or output configuration of each digital line is
- * configured by a special insn_config instruction. chanspec
- * contains the channel to be changed, and data[0] contains the
- * value COMEDI_INPUT or COMEDI_OUTPUT. */
static int pcmuio_dio_insn_config(struct comedi_device *dev,
struct comedi_subdevice *s,
struct comedi_insn *insn, unsigned int *data)
{
- int chan = CR_CHAN(insn->chanspec), byte_no = chan / 8, bit_no =
- chan % 8;
- unsigned long ioaddr;
- unsigned char byte;
-
- /* Compute ioaddr for this channel */
- ioaddr = subpriv->iobases[byte_no];
-
- /* NOTE:
- writing a 0 an IO channel's bit sets the channel to INPUT
- and pulls the line high as well
-
- writing a 1 to an IO channel's bit pulls the line low
-
- All channels are implicitly always in OUTPUT mode -- but when
- they are high they can be considered to be in INPUT mode..
-
- Thus, we only force channels low if the config request was INPUT,
- otherwise we do nothing to the hardware. */
+ unsigned int chan_mask = 1 << CR_CHAN(insn->chanspec);
+ int asic = s->index / 2;
+ int port = (s->index % 2) ? 3 : 0;
switch (data[0]) {
case INSN_CONFIG_DIO_OUTPUT:
- /* save to io_bits -- don't actually do anything since
- all input channels are also output channels... */
- s->io_bits |= 1 << chan;
+ s->io_bits |= chan_mask;
break;
case INSN_CONFIG_DIO_INPUT:
- /* write a 0 to the actual register representing the channel
- to set it to 'input'. 0 means "float high". */
- byte = inb(ioaddr);
- byte &= ~(1 << bit_no);
- /**< set input channel to '0' */
-
- /* write out byte -- this is the only time we actually affect the
- hardware as all channels are implicitly output -- but input
- channels are set to float-high */
- outb(byte, ioaddr);
-
- /* save to io_bits */
- s->io_bits &= ~(1 << chan);
+ s->io_bits &= ~chan_mask;
+ pcmuio_write(dev, s->io_bits, asic, 0, port);
break;
-
case INSN_CONFIG_DIO_QUERY:
- /* retrieve from shadow register */
- data[1] =
- (s->io_bits & (1 << chan)) ? COMEDI_OUTPUT : COMEDI_INPUT;
- return insn->n;
+ data[1] = (s->io_bits & chan_mask) ? COMEDI_OUTPUT : COMEDI_INPUT;
break;
-
default:
return -EINVAL;
break;
@@ -339,263 +257,154 @@ static int pcmuio_dio_insn_config(struct comedi_device *dev,
return insn->n;
}
-static void switch_page(struct comedi_device *dev, int asic, int page)
+static void pcmuio_reset(struct comedi_device *dev)
{
const struct pcmuio_board *board = comedi_board(dev);
- struct pcmuio_private *devpriv = dev->private;
-
- if (asic < 0 || asic >= board->num_asics)
- return; /* paranoia */
- if (page < 0 || page >= NUM_PAGES)
- return; /* more paranoia */
+ int asic;
- devpriv->asics[asic].pagelock &= ~REG_PAGE_MASK;
- devpriv->asics[asic].pagelock |= page << REG_PAGE_BITOFFSET;
+ for (asic = 0; asic < board->num_asics; ++asic) {
+ /* first, clear all the DIO port bits */
+ pcmuio_write(dev, 0, asic, 0, 0);
+ pcmuio_write(dev, 0, asic, 0, 3);
- /* now write out the shadow register */
- outb(devpriv->asics[asic].pagelock,
- dev->iobase + ASIC_IOSIZE * asic + REG_PAGELOCK);
+ /* Next, clear all the paged registers for each page */
+ pcmuio_write(dev, 0, asic, PCMUIO_PAGE_POL, 0);
+ pcmuio_write(dev, 0, asic, PCMUIO_PAGE_ENAB, 0);
+ pcmuio_write(dev, 0, asic, PCMUIO_PAGE_INT_ID, 0);
+ }
}
-static void init_asics(struct comedi_device *dev)
-{ /* sets up an
- ASIC chip to defaults */
- const struct pcmuio_board *board = comedi_board(dev);
+static void pcmuio_stop_intr(struct comedi_device *dev,
+ struct comedi_subdevice *s)
+{
+ struct pcmuio_subdev_private *subpriv = s->private;
int asic;
- for (asic = 0; asic < board->num_asics; ++asic) {
- int port, page;
- unsigned long baseaddr = dev->iobase + asic * ASIC_IOSIZE;
+ asic = subpriv->intr.asic;
+ if (asic < 0)
+ return; /* not an interrupt subdev */
- switch_page(dev, asic, 0); /* switch back to page 0 */
+ subpriv->intr.enabled_mask = 0;
+ subpriv->intr.active = 0;
+ s->async->inttrig = NULL;
- /* first, clear all the DIO port bits */
- for (port = 0; port < PORTS_PER_ASIC; ++port)
- outb(0, baseaddr + REG_PORT0 + port);
+ /* disable all intrs for this subdev.. */
+ pcmuio_write(dev, 0, asic, PCMUIO_PAGE_ENAB, 0);
+}
- /* Next, clear all the paged registers for each page */
- for (page = 1; page < NUM_PAGES; ++page) {
- int reg;
- /* now clear all the paged registers */
- switch_page(dev, asic, page);
- for (reg = FIRST_PAGED_REG;
- reg < FIRST_PAGED_REG + NUM_PAGED_REGS; ++reg)
- outb(0, baseaddr + reg);
- }
+static void pcmuio_handle_intr_subdev(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ unsigned triggered)
+{
+ struct pcmuio_subdev_private *subpriv = s->private;
+ unsigned int len = s->async->cmd.chanlist_len;
+ unsigned oldevents = s->async->events;
+ unsigned int val = 0;
+ unsigned long flags;
+ unsigned mytrig;
+ unsigned int i;
- /* DEBUG set rising edge interrupts on port0 of both asics */
- /*switch_page(dev, asic, PAGE_POL);
- outb(0xff, baseaddr + REG_POL0);
- switch_page(dev, asic, PAGE_ENAB);
- outb(0xff, baseaddr + REG_ENAB0); */
- /* END DEBUG */
+ spin_lock_irqsave(&subpriv->intr.spinlock, flags);
- switch_page(dev, asic, 0); /* switch back to default page 0 */
+ if (!subpriv->intr.active)
+ goto done;
+ mytrig = triggered;
+ mytrig &= ((0x1 << s->n_chan) - 1);
+
+ if (!(mytrig & subpriv->intr.enabled_mask))
+ goto done;
+
+ for (i = 0; i < len; i++) {
+ unsigned int chan = CR_CHAN(s->async->cmd.chanlist[i]);
+ if (mytrig & (1U << chan))
+ val |= (1U << i);
}
-}
-#ifdef notused
-static void lock_port(struct comedi_device *dev, int asic, int port)
-{
- const struct pcmuio_board *board = comedi_board(dev);
- struct pcmuio_private *devpriv = dev->private;
+ /* Write the scan to the buffer. */
+ if (comedi_buf_put(s->async, ((short *)&val)[0]) &&
+ comedi_buf_put(s->async, ((short *)&val)[1])) {
+ s->async->events |= (COMEDI_CB_BLOCK | COMEDI_CB_EOS);
+ } else {
+ /* Overflow! Stop acquisition!! */
+ /* TODO: STOP_ACQUISITION_CALL_HERE!! */
+ pcmuio_stop_intr(dev, s);
+ }
- if (asic < 0 || asic >= board->num_asics)
- return; /* paranoia */
- if (port < 0 || port >= PORTS_PER_ASIC)
- return; /* more paranoia */
+ /* Check for end of acquisition. */
+ if (!subpriv->intr.continuous) {
+ /* stop_src == TRIG_COUNT */
+ if (subpriv->intr.stop_count > 0) {
+ subpriv->intr.stop_count--;
+ if (subpriv->intr.stop_count == 0) {
+ s->async->events |= COMEDI_CB_EOA;
+ /* TODO: STOP_ACQUISITION_CALL_HERE!! */
+ pcmuio_stop_intr(dev, s);
+ }
+ }
+ }
+
+done:
+ spin_unlock_irqrestore(&subpriv->intr.spinlock, flags);
- devpriv->asics[asic].pagelock |= 0x1 << port;
- /* now write out the shadow register */
- outb(devpriv->asics[asic].pagelock,
- dev->iobase + ASIC_IOSIZE * asic + REG_PAGELOCK);
+ if (oldevents != s->async->events)
+ comedi_event(dev, s);
}
-static void unlock_port(struct comedi_device *dev, int asic, int port)
+static int pcmuio_handle_asic_interrupt(struct comedi_device *dev, int asic)
{
- const struct pcmuio_board *board = comedi_board(dev);
struct pcmuio_private *devpriv = dev->private;
+ struct pcmuio_subdev_private *subpriv;
+ unsigned long iobase = dev->iobase + (asic * PCMUIO_ASIC_IOSIZE);
+ unsigned int triggered = 0;
+ int got1 = 0;
+ unsigned long flags;
+ unsigned char int_pend;
+ int i;
- if (asic < 0 || asic >= board->num_asics)
- return; /* paranoia */
- if (port < 0 || port >= PORTS_PER_ASIC)
- return; /* more paranoia */
- devpriv->asics[asic].pagelock &= ~(0x1 << port) | REG_LOCK_MASK;
- /* now write out the shadow register */
- outb(devpriv->asics[asic].pagelock,
- dev->iobase + ASIC_IOSIZE * asic + REG_PAGELOCK);
-}
-#endif /* notused */
+ spin_lock_irqsave(&devpriv->asics[asic].spinlock, flags);
-static void pcmuio_stop_intr(struct comedi_device *dev,
- struct comedi_subdevice *s)
-{
- int nports, firstport, asic, port;
- struct pcmuio_private *devpriv = dev->private;
+ int_pend = inb(iobase + PCMUIO_INT_PENDING_REG) & 0x07;
+ if (int_pend) {
+ triggered = pcmuio_read(dev, asic, PCMUIO_PAGE_INT_ID, 0);
+ pcmuio_write(dev, 0, asic, PCMUIO_PAGE_INT_ID, 0);
- asic = subpriv->intr.asic;
- if (asic < 0)
- return; /* not an interrupt subdev */
+ ++got1;
+ }
- subpriv->intr.enabled_mask = 0;
- subpriv->intr.active = 0;
- s->async->inttrig = NULL;
- nports = subpriv->intr.num_asic_chans / CHANS_PER_PORT;
- firstport = subpriv->intr.asic_chan / CHANS_PER_PORT;
- switch_page(dev, asic, PAGE_ENAB);
- for (port = firstport; port < firstport + nports; ++port) {
- /* disable all intrs for this subdev.. */
- outb(0, devpriv->asics[asic].iobase + REG_ENAB0 + port);
+ spin_unlock_irqrestore(&devpriv->asics[asic].spinlock, flags);
+
+ if (triggered) {
+ struct comedi_subdevice *s;
+ /* TODO here: dispatch io lines to subdevs with commands.. */
+ for (i = 0; i < dev->n_subdevices; i++) {
+ s = &dev->subdevices[i];
+ subpriv = s->private;
+ if (subpriv->intr.asic == asic) {
+ /*
+ * This is an interrupt subdev, and it
+ * matches this asic!
+ */
+ pcmuio_handle_intr_subdev(dev, s,
+ triggered);
+ }
+ }
}
+ return got1;
}
-static irqreturn_t interrupt_pcmuio(int irq, void *d)
+static irqreturn_t pcmuio_interrupt(int irq, void *d)
{
- int asic, got1 = 0;
- struct comedi_device *dev = (struct comedi_device *)d;
+ struct comedi_device *dev = d;
struct pcmuio_private *devpriv = dev->private;
- int i;
+ int got1 = 0;
+ int asic;
- for (asic = 0; asic < MAX_ASICS; ++asic) {
+ for (asic = 0; asic < PCMUIO_MAX_ASICS; ++asic) {
if (irq == devpriv->asics[asic].irq) {
- unsigned long flags;
- unsigned triggered = 0;
- unsigned long iobase = devpriv->asics[asic].iobase;
/* it is an interrupt for ASIC #asic */
- unsigned char int_pend;
-
- spin_lock_irqsave(&devpriv->asics[asic].spinlock,
- flags);
-
- int_pend = inb(iobase + REG_INT_PENDING) & 0x07;
-
- if (int_pend) {
- int port;
- for (port = 0; port < INTR_PORTS_PER_ASIC;
- ++port) {
- if (int_pend & (0x1 << port)) {
- unsigned char
- io_lines_with_edges = 0;
- switch_page(dev, asic,
- PAGE_INT_ID);
- io_lines_with_edges =
- inb(iobase +
- REG_INT_ID0 + port);
-
- if (io_lines_with_edges)
- /* clear pending interrupt */
- outb(0, iobase +
- REG_INT_ID0 +
- port);
-
- triggered |=
- io_lines_with_edges <<
- port * 8;
- }
- }
-
- ++got1;
- }
-
- spin_unlock_irqrestore(&devpriv->asics[asic].spinlock,
- flags);
-
- if (triggered) {
- struct comedi_subdevice *s;
- /* TODO here: dispatch io lines to subdevs with commands.. */
- printk
- ("PCMUIO DEBUG: got edge detect interrupt %d asic %d which_chans: %06x\n",
- irq, asic, triggered);
- for (i = 0; i < dev->n_subdevices; i++) {
- s = &dev->subdevices[i];
- if (subpriv->intr.asic == asic) { /* this is an interrupt subdev, and it matches this asic! */
- unsigned long flags;
- unsigned oldevents;
-
- spin_lock_irqsave(&subpriv->
- intr.spinlock,
- flags);
-
- oldevents = s->async->events;
-
- if (subpriv->intr.active) {
- unsigned mytrig =
- ((triggered >>
- subpriv->intr.asic_chan)
- &
- ((0x1 << subpriv->
- intr.
- num_asic_chans) -
- 1)) << subpriv->
- intr.first_chan;
- if (mytrig &
- subpriv->intr.enabled_mask)
- {
- unsigned int val
- = 0;
- unsigned int n,
- ch, len;
-
- len =
- s->
- async->cmd.chanlist_len;
- for (n = 0;
- n < len;
- n++) {
- ch = CR_CHAN(s->async->cmd.chanlist[n]);
- if (mytrig & (1U << ch)) {
- val |= (1U << n);
- }
- }
- /* Write the scan to the buffer. */
- if (comedi_buf_put(s->async, ((short *)&val)[0])
- &&
- comedi_buf_put
- (s->async,
- ((short *)
- &val)[1]))
- {
- s->async->events |= (COMEDI_CB_BLOCK | COMEDI_CB_EOS);
- } else {
- /* Overflow! Stop acquisition!! */
- /* TODO: STOP_ACQUISITION_CALL_HERE!! */
- pcmuio_stop_intr
- (dev,
- s);
- }
-
- /* Check for end of acquisition. */
- if (!subpriv->intr.continuous) {
- /* stop_src == TRIG_COUNT */
- if (subpriv->intr.stop_count > 0) {
- subpriv->intr.stop_count--;
- if (subpriv->intr.stop_count == 0) {
- s->async->events |= COMEDI_CB_EOA;
- /* TODO: STOP_ACQUISITION_CALL_HERE!! */
- pcmuio_stop_intr
- (dev,
- s);
- }
- }
- }
- }
- }
-
- spin_unlock_irqrestore
- (&subpriv->intr.spinlock,
- flags);
-
- if (oldevents !=
- s->async->events) {
- comedi_event(dev, s);
- }
-
- }
-
- }
- }
-
+ if (pcmuio_handle_asic_interrupt(dev, asic))
+ got1++;
}
}
if (!got1)
@@ -606,7 +415,7 @@ static irqreturn_t interrupt_pcmuio(int irq, void *d)
static int pcmuio_start_intr(struct comedi_device *dev,
struct comedi_subdevice *s)
{
- struct pcmuio_private *devpriv = dev->private;
+ struct pcmuio_subdev_private *subpriv = s->private;
if (!subpriv->intr.continuous && subpriv->intr.stop_count == 0) {
/* An empty acquisition! */
@@ -615,7 +424,7 @@ static int pcmuio_start_intr(struct comedi_device *dev,
return 1;
} else {
unsigned bits = 0, pol_bits = 0, n;
- int nports, firstport, asic, port;
+ int asic;
struct comedi_cmd *cmd = &s->async->cmd;
asic = subpriv->intr.asic;
@@ -624,8 +433,6 @@ static int pcmuio_start_intr(struct comedi_device *dev,
subdev */
subpriv->intr.enabled_mask = 0;
subpriv->intr.active = 1;
- nports = subpriv->intr.num_asic_chans / CHANS_PER_PORT;
- firstport = subpriv->intr.asic_chan / CHANS_PER_PORT;
if (cmd->chanlist) {
for (n = 0; n < cmd->chanlist_len; n++) {
bits |= (1U << CR_CHAN(cmd->chanlist[n]));
@@ -635,31 +442,19 @@ static int pcmuio_start_intr(struct comedi_device *dev,
<< CR_CHAN(cmd->chanlist[n]);
}
}
- bits &= ((0x1 << subpriv->intr.num_asic_chans) -
- 1) << subpriv->intr.first_chan;
+ bits &= ((0x1 << s->n_chan) - 1);
subpriv->intr.enabled_mask = bits;
- switch_page(dev, asic, PAGE_ENAB);
- for (port = firstport; port < firstport + nports; ++port) {
- unsigned enab =
- bits >> (subpriv->intr.first_chan + (port -
- firstport) *
- 8) & 0xff, pol =
- pol_bits >> (subpriv->intr.first_chan +
- (port - firstport) * 8) & 0xff;
- /* set enab intrs for this subdev.. */
- outb(enab,
- devpriv->asics[asic].iobase + REG_ENAB0 + port);
- switch_page(dev, asic, PAGE_POL);
- outb(pol,
- devpriv->asics[asic].iobase + REG_ENAB0 + port);
- }
+ /* set pol and enab intrs for this subdev.. */
+ pcmuio_write(dev, pol_bits, asic, PCMUIO_PAGE_POL, 0);
+ pcmuio_write(dev, bits, asic, PCMUIO_PAGE_ENAB, 0);
}
return 0;
}
static int pcmuio_cancel(struct comedi_device *dev, struct comedi_subdevice *s)
{
+ struct pcmuio_subdev_private *subpriv = s->private;
unsigned long flags;
spin_lock_irqsave(&subpriv->intr.spinlock, flags);
@@ -677,6 +472,7 @@ static int
pcmuio_inttrig_start_intr(struct comedi_device *dev, struct comedi_subdevice *s,
unsigned int trignum)
{
+ struct pcmuio_subdev_private *subpriv = s->private;
unsigned long flags;
int event = 0;
@@ -701,6 +497,7 @@ pcmuio_inttrig_start_intr(struct comedi_device *dev, struct comedi_subdevice *s,
*/
static int pcmuio_cmd(struct comedi_device *dev, struct comedi_subdevice *s)
{
+ struct pcmuio_subdev_private *subpriv = s->private;
struct comedi_cmd *cmd = &s->async->cmd;
unsigned long flags;
int event = 0;
@@ -797,17 +594,18 @@ static int pcmuio_cmdtest(struct comedi_device *dev,
static int pcmuio_attach(struct comedi_device *dev, struct comedi_devconfig *it)
{
const struct pcmuio_board *board = comedi_board(dev);
- struct pcmuio_private *devpriv;
struct comedi_subdevice *s;
- int sdev_no, chans_left, n_subdevs, port, asic, thisasic_chanct = 0;
- unsigned int irq[MAX_ASICS];
+ struct pcmuio_private *devpriv;
+ struct pcmuio_subdev_private *subpriv;
+ int sdev_no, n_subdevs, asic;
+ unsigned int irq[PCMUIO_MAX_ASICS];
int ret;
irq[0] = it->options[1];
irq[1] = it->options[2];
ret = comedi_request_region(dev, it->options[0],
- board->num_asics * ASIC_IOSIZE);
+ board->num_asics * PCMUIO_ASIC_IOSIZE);
if (ret)
return ret;
@@ -816,20 +614,11 @@ static int pcmuio_attach(struct comedi_device *dev, struct comedi_devconfig *it)
return -ENOMEM;
dev->private = devpriv;
- for (asic = 0; asic < MAX_ASICS; ++asic) {
- devpriv->asics[asic].num = asic;
- devpriv->asics[asic].iobase = dev->iobase + asic * ASIC_IOSIZE;
- devpriv->asics[asic].irq = 0; /* this gets actually set at the end of
- this function when we
- request_irqs */
+ for (asic = 0; asic < PCMUIO_MAX_ASICS; ++asic)
spin_lock_init(&devpriv->asics[asic].spinlock);
- }
- chans_left = CHANS_PER_ASIC * board->num_asics;
- n_subdevs = CALC_N_SUBDEVS(chans_left);
- devpriv->sprivs = kcalloc(n_subdevs,
- sizeof(struct pcmuio_subdev_private),
- GFP_KERNEL);
+ n_subdevs = board->num_asics * 2;
+ devpriv->sprivs = kcalloc(n_subdevs, sizeof(*subpriv), GFP_KERNEL);
if (!devpriv->sprivs)
return -ENOMEM;
@@ -837,74 +626,40 @@ static int pcmuio_attach(struct comedi_device *dev, struct comedi_devconfig *it)
if (ret)
return ret;
- port = 0;
- asic = 0;
for (sdev_no = 0; sdev_no < (int)dev->n_subdevices; ++sdev_no) {
- int byte_no;
-
s = &dev->subdevices[sdev_no];
- s->private = &devpriv->sprivs[sdev_no];
+ subpriv = &devpriv->sprivs[sdev_no];
+ s->private = subpriv;
s->maxdata = 1;
s->range_table = &range_digital;
s->subdev_flags = SDF_READABLE | SDF_WRITABLE;
s->type = COMEDI_SUBD_DIO;
s->insn_bits = pcmuio_dio_insn_bits;
s->insn_config = pcmuio_dio_insn_config;
- s->n_chan = min(chans_left, MAX_CHANS_PER_SUBDEV);
- subpriv->intr.asic = -1;
- subpriv->intr.first_chan = -1;
- subpriv->intr.asic_chan = -1;
- subpriv->intr.num_asic_chans = -1;
- subpriv->intr.active = 0;
- s->len_chanlist = 1;
-
- /* save the ioport address for each 'port' of 8 channels in the
- subdevice */
- for (byte_no = 0; byte_no < PORTS_PER_SUBDEV; ++byte_no, ++port) {
- if (port >= PORTS_PER_ASIC) {
- port = 0;
- ++asic;
- thisasic_chanct = 0;
- }
- subpriv->iobases[byte_no] =
- devpriv->asics[asic].iobase + port;
-
- if (thisasic_chanct <
- CHANS_PER_PORT * INTR_PORTS_PER_ASIC
- && subpriv->intr.asic < 0) {
- /* this is an interrupt subdevice, so setup the struct */
- subpriv->intr.asic = asic;
- subpriv->intr.active = 0;
- subpriv->intr.stop_count = 0;
- subpriv->intr.first_chan = byte_no * 8;
- subpriv->intr.asic_chan = thisasic_chanct;
- subpriv->intr.num_asic_chans =
- s->n_chan - subpriv->intr.first_chan;
- dev->read_subdev = s;
- s->subdev_flags |= SDF_CMD_READ;
- s->cancel = pcmuio_cancel;
- s->do_cmd = pcmuio_cmd;
- s->do_cmdtest = pcmuio_cmdtest;
- s->len_chanlist = subpriv->intr.num_asic_chans;
- }
- thisasic_chanct += CHANS_PER_PORT;
+ s->n_chan = 24;
+
+ /* subdevices 0 and 2 suppport interrupts */
+ if ((sdev_no % 2) == 0) {
+ /* setup the interrupt subdevice */
+ subpriv->intr.asic = sdev_no / 2;
+ dev->read_subdev = s;
+ s->subdev_flags |= SDF_CMD_READ;
+ s->cancel = pcmuio_cancel;
+ s->do_cmd = pcmuio_cmd;
+ s->do_cmdtest = pcmuio_cmdtest;
+ s->len_chanlist = s->n_chan;
+ } else {
+ subpriv->intr.asic = -1;
+ s->len_chanlist = 1;
}
spin_lock_init(&subpriv->intr.spinlock);
-
- chans_left -= s->n_chan;
-
- if (!chans_left) {
- asic = 0; /* reset the asic to our first asic, to do intr subdevs */
- port = 0;
- }
-
}
- init_asics(dev); /* clear out all the registers, basically */
+ pcmuio_reset(dev);
- for (asic = 0; irq[0] && asic < MAX_ASICS; ++asic) {
+ for (asic = 0; irq[0] && asic < PCMUIO_MAX_ASICS; ++asic) {
if (irq[asic]
- && request_irq(irq[asic], interrupt_pcmuio,
+ && request_irq(irq[asic], pcmuio_interrupt,
IRQF_SHARED, board->name, dev)) {
int i;
/* unroll the allocated irqs.. */
@@ -917,17 +672,7 @@ static int pcmuio_attach(struct comedi_device *dev, struct comedi_devconfig *it)
devpriv->asics[asic].irq = irq[asic];
}
- if (irq[0]) {
- dev_dbg(dev->class_dev, "irq: %u\n", irq[0]);
- if (irq[1] && board->num_asics == 2)
- dev_dbg(dev->class_dev, "second ASIC irq: %u\n",
- irq[1]);
- } else {
- dev_dbg(dev->class_dev, "(IRQ mode disabled)\n");
- }
-
-
- return 1;
+ return 0;
}
static void pcmuio_detach(struct comedi_device *dev)
@@ -935,7 +680,7 @@ static void pcmuio_detach(struct comedi_device *dev)
struct pcmuio_private *devpriv = dev->private;
int i;
- for (i = 0; i < MAX_ASICS; ++i) {
+ for (i = 0; i < PCMUIO_MAX_ASICS; ++i) {
if (devpriv->asics[i].irq)
free_irq(devpriv->asics[i].irq, dev);
}
@@ -944,18 +689,6 @@ static void pcmuio_detach(struct comedi_device *dev)
comedi_legacy_detach(dev);
}
-static const struct pcmuio_board pcmuio_boards[] = {
- {
- .name = "pcmuio48",
- .num_asics = 1,
- .num_ports = 6,
- }, {
- .name = "pcmuio96",
- .num_asics = 2,
- .num_ports = 12,
- },
-};
-
static struct comedi_driver pcmuio_driver = {
.driver_name = "pcmuio",
.module = THIS_MODULE,
diff --git a/drivers/staging/comedi/drivers/plx9052.h b/drivers/staging/comedi/drivers/plx9052.h
index ff76fbb4b3ef..fbcf25069807 100644
--- a/drivers/staging/comedi/drivers/plx9052.h
+++ b/drivers/staging/comedi/drivers/plx9052.h
@@ -16,11 +16,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#ifndef _PLX9052_H_
diff --git a/drivers/staging/comedi/drivers/poc.c b/drivers/staging/comedi/drivers/poc.c
index b55a16baeb14..005fbefae295 100644
--- a/drivers/staging/comedi/drivers/poc.c
+++ b/drivers/staging/comedi/drivers/poc.c
@@ -13,25 +13,18 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
Driver: poc
Description: Generic driver for very simple devices
Author: ds
-Devices: [Keithley Metrabyte] DAC-02 (dac02), [Advantech] PCL-733 (pcl733),
- PCL-734 (pcl734)
+Devices: [Keithley Metrabyte] DAC-02 (dac02)
Updated: Sat, 16 Mar 2002 17:34:48 -0800
Status: unknown
This driver is indended to support very simple ISA-based devices,
including:
dac02 - Keithley DAC-02 analog output board
- pcl733 - Advantech PCL-733
- pcl734 - Advantech PCL-734
Configuration options:
[0] - I/O port base
@@ -101,39 +94,6 @@ static int dac02_ao_winsn(struct comedi_device *dev, struct comedi_subdevice *s,
return 1;
}
-static int pcl733_insn_bits(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
-{
- data[1] = inb(dev->iobase + 0);
- data[1] |= (inb(dev->iobase + 1) << 8);
- data[1] |= (inb(dev->iobase + 2) << 16);
- data[1] |= (inb(dev->iobase + 3) << 24);
-
- return insn->n;
-}
-
-static int pcl734_insn_bits(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
-{
- if (data[0]) {
- s->state &= ~data[0];
- s->state |= (data[0] & data[1]);
- if ((data[0] >> 0) & 0xff)
- outb((s->state >> 0) & 0xff, dev->iobase + 0);
- if ((data[0] >> 8) & 0xff)
- outb((s->state >> 8) & 0xff, dev->iobase + 1);
- if ((data[0] >> 16) & 0xff)
- outb((s->state >> 16) & 0xff, dev->iobase + 2);
- if ((data[0] >> 24) & 0xff)
- outb((s->state >> 24) & 0xff, dev->iobase + 3);
- }
- data[1] = s->state;
-
- return insn->n;
-}
-
static int poc_attach(struct comedi_device *dev, struct comedi_devconfig *it)
{
const struct boarddef_struct *board = comedi_board(dev);
@@ -180,22 +140,6 @@ static const struct boarddef_struct boards[] = {
.winsn = dac02_ao_winsn,
.rinsn = readback_insn,
.range = &range_unknown,
- }, {
- .name = "pcl733",
- .iosize = 4,
- .type = COMEDI_SUBD_DI,
- .n_chan = 32,
- .n_bits = 1,
- .insnbits = pcl733_insn_bits,
- .range = &range_digital,
- }, {
- .name = "pcl734",
- .iosize = 4,
- .type = COMEDI_SUBD_DO,
- .n_chan = 32,
- .n_bits = 1,
- .insnbits = pcl734_insn_bits,
- .range = &range_digital,
},
};
diff --git a/drivers/staging/comedi/drivers/rtd520.c b/drivers/staging/comedi/drivers/rtd520.c
index 30a17284fac9..9b93a1fc4a59 100644
--- a/drivers/staging/comedi/drivers/rtd520.c
+++ b/drivers/staging/comedi/drivers/rtd520.c
@@ -14,10 +14,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
diff --git a/drivers/staging/comedi/drivers/rti800.c b/drivers/staging/comedi/drivers/rti800.c
index f4163fd35a00..f698c7fc5726 100644
--- a/drivers/staging/comedi/drivers/rti800.c
+++ b/drivers/staging/comedi/drivers/rti800.c
@@ -14,10 +14,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
diff --git a/drivers/staging/comedi/drivers/rti802.c b/drivers/staging/comedi/drivers/rti802.c
index 46dbbe6cdd76..9e7445055482 100644
--- a/drivers/staging/comedi/drivers/rti802.c
+++ b/drivers/staging/comedi/drivers/rti802.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: rti802
diff --git a/drivers/staging/comedi/drivers/s526.c b/drivers/staging/comedi/drivers/s526.c
index d240ce87bd68..e1587e58a732 100644
--- a/drivers/staging/comedi/drivers/s526.c
+++ b/drivers/staging/comedi/drivers/s526.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: s526
diff --git a/drivers/staging/comedi/drivers/s626.c b/drivers/staging/comedi/drivers/s626.c
index 0cf4b3d1279a..48c4b70b736a 100644
--- a/drivers/staging/comedi/drivers/s626.c
+++ b/drivers/staging/comedi/drivers/s626.c
@@ -17,11 +17,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
diff --git a/drivers/staging/comedi/drivers/s626.h b/drivers/staging/comedi/drivers/s626.h
index 99cd57b092ea..d2756b83b62d 100644
--- a/drivers/staging/comedi/drivers/s626.h
+++ b/drivers/staging/comedi/drivers/s626.h
@@ -17,11 +17,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
diff --git a/drivers/staging/comedi/drivers/serial2002.c b/drivers/staging/comedi/drivers/serial2002.c
index 8900086374db..b4f5fe35b0fa 100644
--- a/drivers/staging/comedi/drivers/serial2002.c
+++ b/drivers/staging/comedi/drivers/serial2002.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
diff --git a/drivers/staging/comedi/drivers/skel.c b/drivers/staging/comedi/drivers/skel.c
index dbc8c54d6da7..06aee302bbc2 100644
--- a/drivers/staging/comedi/drivers/skel.c
+++ b/drivers/staging/comedi/drivers/skel.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: skel
diff --git a/drivers/staging/comedi/drivers/ssv_dnp.c b/drivers/staging/comedi/drivers/ssv_dnp.c
index a76df092a57b..45c661cbdbb9 100644
--- a/drivers/staging/comedi/drivers/ssv_dnp.c
+++ b/drivers/staging/comedi/drivers/ssv_dnp.c
@@ -15,11 +15,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: ssv_dnp
diff --git a/drivers/staging/comedi/drivers/unioxx5.c b/drivers/staging/comedi/drivers/unioxx5.c
index 0c243477cbe5..c9201d821fbc 100644
--- a/drivers/staging/comedi/drivers/unioxx5.c
+++ b/drivers/staging/comedi/drivers/unioxx5.c
@@ -18,10 +18,6 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
- * You should have received a copy of the GNU General Public License *
- * along with this program; if not, write to the Free Software *
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *
- * *
***************************************************************************/
/*
@@ -375,15 +371,13 @@ static int __unioxx5_subdev_init(struct comedi_device *dev,
int i, to, ndef_flag = 0;
int ret;
- usp = kzalloc(sizeof(*usp), GFP_KERNEL);
- if (usp == NULL)
+ usp = comedi_alloc_spriv(s, sizeof(*usp));
+ if (!usp)
return -ENOMEM;
ret = __comedi_request_region(dev, iobase, UNIOXX5_SIZE);
- if (ret) {
- kfree(usp);
+ if (ret)
return ret;
- }
usp->usp_iobase = iobase;
/* defining modules types */
@@ -417,7 +411,6 @@ static int __unioxx5_subdev_init(struct comedi_device *dev,
/* initial subdevice for digital or analog i/o */
s->type = COMEDI_SUBD_DIO;
- s->private = usp;
s->subdev_flags = SDF_READABLE | SDF_WRITABLE;
s->n_chan = UNIOXX5_NUM_OF_CHANS;
s->maxdata = 0xFFF;
@@ -478,15 +471,15 @@ static int unioxx5_attach(struct comedi_device *dev,
static void unioxx5_detach(struct comedi_device *dev)
{
+ struct comedi_subdevice *s;
+ struct unioxx5_subd_priv *spriv;
int i;
- struct comedi_subdevice *subdev;
- struct unioxx5_subd_priv *usp;
for (i = 0; i < dev->n_subdevices; i++) {
- subdev = &dev->subdevices[i];
- usp = subdev->private;
- release_region(usp->usp_iobase, UNIOXX5_SIZE);
- kfree(subdev->private);
+ s = &dev->subdevices[i];
+ spriv = s->private;
+ if (spriv && spriv->usp_iobase)
+ release_region(spriv->usp_iobase, UNIOXX5_SIZE);
}
}
diff --git a/drivers/staging/comedi/drivers/usbdux.c b/drivers/staging/comedi/drivers/usbdux.c
index 6f5da67e26cb..279e5bd493fa 100644
--- a/drivers/staging/comedi/drivers/usbdux.c
+++ b/drivers/staging/comedi/drivers/usbdux.c
@@ -11,11 +11,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: usbdux
@@ -94,7 +89,6 @@ sampling rate. If you sample two channels you get 4kHz and so on.
#include <linux/usb.h>
#include <linux/fcntl.h>
#include <linux/compiler.h>
-#include <linux/firmware.h>
#include "../comedidev.h"
@@ -727,154 +721,82 @@ static void usbduxsub_ao_isoc_irq(struct urb *urb)
}
}
-static int usbduxsub_start(struct usbduxsub *usbduxsub)
-{
- int errcode = 0;
- uint8_t *local_transfer_buffer;
-
- local_transfer_buffer = kmalloc(1, GFP_KERNEL);
- if (!local_transfer_buffer)
- return -ENOMEM;
-
- /* 7f92 to zero */
- *local_transfer_buffer = 0;
- errcode = usb_control_msg(usbduxsub->usbdev,
- /* create a pipe for a control transfer */
- usb_sndctrlpipe(usbduxsub->usbdev, 0),
- /* bRequest, "Firmware" */
- USBDUXSUB_FIRMWARE,
- /* bmRequestType */
- VENDOR_DIR_OUT,
- /* Value */
- USBDUXSUB_CPUCS,
- /* Index */
- 0x0000,
- /* address of the transfer buffer */
- local_transfer_buffer,
- /* Length */
- 1,
- /* Timeout */
- BULK_TIMEOUT);
- if (errcode < 0)
- dev_err(&usbduxsub->interface->dev,
- "comedi_: control msg failed (start)\n");
-
- kfree(local_transfer_buffer);
- return errcode;
-}
-
-static int usbduxsub_stop(struct usbduxsub *usbduxsub)
-{
- int errcode = 0;
- uint8_t *local_transfer_buffer;
-
- local_transfer_buffer = kmalloc(1, GFP_KERNEL);
- if (!local_transfer_buffer)
- return -ENOMEM;
-
- /* 7f92 to one */
- *local_transfer_buffer = 1;
- errcode = usb_control_msg(usbduxsub->usbdev,
- usb_sndctrlpipe(usbduxsub->usbdev, 0),
- /* bRequest, "Firmware" */
- USBDUXSUB_FIRMWARE,
- /* bmRequestType */
- VENDOR_DIR_OUT,
- /* Value */
- USBDUXSUB_CPUCS,
- /* Index */
- 0x0000, local_transfer_buffer,
- /* Length */
- 1,
- /* Timeout */
- BULK_TIMEOUT);
- if (errcode < 0)
- dev_err(&usbduxsub->interface->dev,
- "comedi_: control msg failed (stop)\n");
-
- kfree(local_transfer_buffer);
- return errcode;
-}
-
-static int usbduxsub_upload(struct usbduxsub *usbduxsub,
- uint8_t *local_transfer_buffer,
- unsigned int start_addr, unsigned int len)
-{
- int errcode;
-
- errcode = usb_control_msg(usbduxsub->usbdev,
- usb_sndctrlpipe(usbduxsub->usbdev, 0),
- /* brequest, firmware */
- USBDUXSUB_FIRMWARE,
- /* bmRequestType */
- VENDOR_DIR_OUT,
- /* value */
- start_addr,
- /* index */
- 0x0000,
- /* our local safe buffer */
- local_transfer_buffer,
- /* length */
- len,
- /* timeout */
- BULK_TIMEOUT);
- dev_dbg(&usbduxsub->interface->dev, "comedi_: result=%d\n", errcode);
- if (errcode < 0) {
- dev_err(&usbduxsub->interface->dev, "comedi_: upload failed\n");
- return errcode;
- }
- return 0;
-}
-
#define FIRMWARE_MAX_LEN 0x2000
-static int firmware_upload(struct usbduxsub *usbduxsub,
- const u8 *firmware_binary, int size_firmware)
+static int usbdux_firmware_upload(struct comedi_device *dev,
+ const u8 *data, size_t size,
+ unsigned long context)
{
+ struct usbduxsub *usbduxsub = dev->private;
+ struct usb_device *usb = usbduxsub->usbdev;
+ uint8_t *buf;
+ uint8_t *tmp;
int ret;
- uint8_t *fw_buf;
- if (!firmware_binary)
+ if (!data)
return 0;
- if (size_firmware > FIRMWARE_MAX_LEN) {
+ if (size > FIRMWARE_MAX_LEN) {
dev_err(&usbduxsub->interface->dev,
"usbdux firmware binary it too large for FX2.\n");
return -ENOMEM;
}
/* we generate a local buffer for the firmware */
- fw_buf = kmemdup(firmware_binary, size_firmware, GFP_KERNEL);
- if (!fw_buf) {
- dev_err(&usbduxsub->interface->dev,
- "comedi_: mem alloc for firmware failed\n");
+ buf = kmemdup(data, size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* we need a malloc'ed buffer for usb_control_msg() */
+ tmp = kmalloc(1, GFP_KERNEL);
+ if (!tmp) {
+ kfree(buf);
return -ENOMEM;
}
- ret = usbduxsub_stop(usbduxsub);
+ /* stop the current firmware on the device */
+ *tmp = 1; /* 7f92 to one */
+ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0),
+ USBDUXSUB_FIRMWARE,
+ VENDOR_DIR_OUT,
+ USBDUXSUB_CPUCS, 0x0000,
+ tmp, 1,
+ BULK_TIMEOUT);
if (ret < 0) {
dev_err(&usbduxsub->interface->dev,
"comedi_: can not stop firmware\n");
- kfree(fw_buf);
- return ret;
+ goto done;
}
- ret = usbduxsub_upload(usbduxsub, fw_buf, 0, size_firmware);
+ /* upload the new firmware to the device */
+ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0),
+ USBDUXSUB_FIRMWARE,
+ VENDOR_DIR_OUT,
+ 0, 0x0000,
+ buf, size,
+ BULK_TIMEOUT);
if (ret < 0) {
dev_err(&usbduxsub->interface->dev,
"comedi_: firmware upload failed\n");
- kfree(fw_buf);
- return ret;
+ goto done;
}
- ret = usbduxsub_start(usbduxsub);
- if (ret < 0) {
+
+ /* start the new firmware on the device */
+ *tmp = 0; /* 7f92 to zero */
+ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0),
+ USBDUXSUB_FIRMWARE,
+ VENDOR_DIR_OUT,
+ USBDUXSUB_CPUCS, 0x0000,
+ tmp, 1,
+ BULK_TIMEOUT);
+ if (ret < 0)
dev_err(&usbduxsub->interface->dev,
"comedi_: can not start firmware\n");
- kfree(fw_buf);
- return ret;
- }
- kfree(fw_buf);
- return 0;
+
+done:
+ kfree(tmp);
+ kfree(buf);
+ return ret;
}
static int usbduxsub_submit_inurbs(struct usbduxsub *usbduxsub)
@@ -2328,13 +2250,21 @@ static int usbdux_auto_attach(struct comedi_device *dev,
unsigned long context_unused)
{
struct usb_interface *uinterf = comedi_to_usb_interface(dev);
+ struct usbduxsub *this_usbduxsub = usb_get_intfdata(uinterf);
+ struct usb_device *usb = usbduxsub->usbdev;
int ret;
- struct usbduxsub *this_usbduxsub;
+
+ dev->private = this_usbduxsub; /* This is temporary... */
+ ret = comedi_load_firmware(dev, &usb->dev, FIRMWARE,
+ usbdux_firmware_upload, 0);
+ if (ret < 0) {
+ dev->private = NULL;
+ return ret;
+ }
dev->private = NULL;
down(&start_stop_sem);
- this_usbduxsub = usb_get_intfdata(uinterf);
if (!this_usbduxsub || !this_usbduxsub->probed) {
dev_err(dev->class_dev,
"usbdux: error: auto_attach failed, not connected\n");
@@ -2369,35 +2299,6 @@ static struct comedi_driver usbdux_driver = {
.detach = usbdux_detach,
};
-static void usbdux_firmware_request_complete_handler(const struct firmware *fw,
- void *context)
-{
- struct usbduxsub *usbduxsub_tmp = context;
- struct usb_interface *uinterf = usbduxsub_tmp->interface;
- int ret;
-
- if (fw == NULL) {
- dev_err(&uinterf->dev,
- "Firmware complete handler without firmware!\n");
- return;
- }
-
- /*
- * we need to upload the firmware here because fw will be
- * freed once we've left this function
- */
- ret = firmware_upload(usbduxsub_tmp, fw->data, fw->size);
-
- if (ret) {
- dev_err(&uinterf->dev,
- "Could not upload firmware (err=%d)\n", ret);
- goto out;
- }
- comedi_usb_auto_config(uinterf, &usbdux_driver, 0);
- out:
- release_firmware(fw);
-}
-
static int usbdux_usb_probe(struct usb_interface *uinterf,
const struct usb_device_id *id)
{
@@ -2405,7 +2306,6 @@ static int usbdux_usb_probe(struct usb_interface *uinterf,
struct device *dev = &uinterf->dev;
int i;
int index;
- int ret;
dev_dbg(dev, "comedi_: usbdux_: "
"finding a free structure for the usb-device\n");
@@ -2622,23 +2522,7 @@ static int usbdux_usb_probe(struct usb_interface *uinterf,
usbduxsub[index].probed = 1;
up(&start_stop_sem);
- ret = request_firmware_nowait(THIS_MODULE,
- FW_ACTION_HOTPLUG,
- FIRMWARE,
- &udev->dev,
- GFP_KERNEL,
- usbduxsub + index,
- usbdux_firmware_request_complete_handler);
-
- if (ret) {
- dev_err(dev, "Could not load firmware (err=%d)\n", ret);
- return ret;
- }
-
- dev_info(dev, "comedi_: usbdux%d "
- "has been successfully initialised.\n", index);
- /* success */
- return 0;
+ return comedi_usb_auto_config(uinterf, &usbdux_driver, 0);
}
static void usbdux_usb_disconnect(struct usb_interface *intf)
diff --git a/drivers/staging/comedi/drivers/usbduxfast.c b/drivers/staging/comedi/drivers/usbduxfast.c
index 7f95af33085d..27898c44e543 100644
--- a/drivers/staging/comedi/drivers/usbduxfast.c
+++ b/drivers/staging/comedi/drivers/usbduxfast.c
@@ -10,10 +10,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
@@ -40,7 +36,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
-#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -60,6 +55,7 @@
* constants for "firmware" upload and download
*/
#define FIRMWARE "usbduxfast_firmware.bin"
+#define FIRMWARE_MAX_LEN 0x2000
#define USBDUXFASTSUB_FIRMWARE 0xA0
#define VENDOR_DIR_IN 0xC0
#define VENDOR_DIR_OUT 0x40
@@ -112,7 +108,7 @@
/*
* size of the buffer for the dux commands in bytes
*/
-#define SIZEOFDUXBUFFER 256
+#define SIZEOFDUXBUF 256
/*
* number of in-URBs which receive the data: min=5
@@ -120,16 +116,6 @@
#define NUMOFINBUFFERSHIGH 10
/*
- * total number of usbduxfast devices
- */
-#define NUMUSBDUXFAST 16
-
-/*
- * analogue in subdevice
- */
-#define SUBDEV_AD 0
-
-/*
* min delay steps for more than one channel
* basically when the mux gives up ;-)
*
@@ -161,143 +147,83 @@ static const struct comedi_lrange range_usbduxfast_ai_range = {
* this is the structure which holds all the data of this driver
* one sub device just now: A/D
*/
-struct usbduxfastsub_s {
- int attached; /* is attached? */
- int probed; /* is it associated with a subdevice? */
- struct usb_device *usbdev; /* pointer to the usb-device */
- struct urb *urbIn; /* BULK-transfer handling: urb */
- int8_t *transfer_buffer;
- int16_t *insnBuffer; /* input buffer for single insn */
- int ifnum; /* interface number */
- struct usb_interface *interface; /* interface structure */
- /* comedi device for the interrupt context */
- struct comedi_device *comedidev;
+struct usbduxfast_private {
+ struct urb *urb; /* BULK-transfer handling: urb */
+ uint8_t *duxbuf;
+ int8_t *inbuf;
short int ai_cmd_running; /* asynchronous command is running */
short int ai_continous; /* continous acquisition */
long int ai_sample_count; /* number of samples to acquire */
- uint8_t *dux_commands; /* commands */
int ignore; /* counter which ignores the first
buffers */
struct semaphore sem;
};
/*
- * The pointer to the private usb-data of the driver
- * is also the private data for the comedi-device.
- * This has to be global as the usb subsystem needs
- * global variables. The other reason is that this
- * structure must be there _before_ any comedi
- * command is issued. The usb subsystem must be
- * initialised before comedi can access it.
- */
-static struct usbduxfastsub_s usbduxfastsub[NUMUSBDUXFAST];
-
-static DEFINE_SEMAPHORE(start_stop_sem);
-
-/*
* bulk transfers to usbduxfast
*/
#define SENDADCOMMANDS 0
#define SENDINITEP6 1
-static int send_dux_commands(struct usbduxfastsub_s *udfs, int cmd_type)
+static int usbduxfast_send_cmd(struct comedi_device *dev, int cmd_type)
{
- int tmp, nsent;
-
- udfs->dux_commands[0] = cmd_type;
-
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi%d: usbduxfast: dux_commands: ",
- udfs->comedidev->minor);
- for (tmp = 0; tmp < SIZEOFDUXBUFFER; tmp++)
- printk(" %02x", udfs->dux_commands[tmp]);
- printk("\n");
-#endif
-
- tmp = usb_bulk_msg(udfs->usbdev,
- usb_sndbulkpipe(udfs->usbdev, CHANNELLISTEP),
- udfs->dux_commands, SIZEOFDUXBUFFER, &nsent, 10000);
- if (tmp < 0)
- dev_err(&udfs->interface->dev,
- "could not transmit dux_commands to the usb-device, err=%d\n",
- tmp);
- return tmp;
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct usbduxfast_private *devpriv = dev->private;
+ int nsent;
+ int ret;
+
+ devpriv->duxbuf[0] = cmd_type;
+
+ ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, CHANNELLISTEP),
+ devpriv->duxbuf, SIZEOFDUXBUF,
+ &nsent, 10000);
+ if (ret < 0)
+ dev_err(dev->class_dev,
+ "could not transmit command to the usb-device, err=%d\n",
+ ret);
+ return ret;
}
-/*
- * Stops the data acquision.
- * It should be safe to call this function from any context.
- */
-static int usbduxfastsub_unlink_InURBs(struct usbduxfastsub_s *udfs)
+static void usbduxfast_cmd_data(struct comedi_device *dev, int index,
+ uint8_t len, uint8_t op, uint8_t out,
+ uint8_t log)
{
- int j = 0;
- int err = 0;
+ struct usbduxfast_private *devpriv = dev->private;
- if (udfs && udfs->urbIn) {
- udfs->ai_cmd_running = 0;
- /* waits until a running transfer is over */
- usb_kill_urb(udfs->urbIn);
- j = 0;
- }
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi: usbduxfast: unlinked InURB: res=%d\n", j);
-#endif
- return err;
+ /* Set the GPIF bytes, the first byte is the command byte */
+ devpriv->duxbuf[1 + 0x00 + index] = len;
+ devpriv->duxbuf[1 + 0x08 + index] = op;
+ devpriv->duxbuf[1 + 0x10 + index] = out;
+ devpriv->duxbuf[1 + 0x18 + index] = log;
}
-/*
- * This will stop a running acquisition operation.
- * Is called from within this driver from both the
- * interrupt context and from comedi.
- */
-static int usbduxfast_ai_stop(struct usbduxfastsub_s *udfs, int do_unlink)
+static int usbduxfast_ai_stop(struct comedi_device *dev, int do_unlink)
{
- int ret = 0;
+ struct usbduxfast_private *devpriv = dev->private;
- if (!udfs) {
- pr_err("%s: udfs=NULL!\n", __func__);
- return -EFAULT;
- }
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi: usbduxfast_ai_stop\n");
-#endif
+ /* stop aquistion */
+ devpriv->ai_cmd_running = 0;
- udfs->ai_cmd_running = 0;
-
- if (do_unlink)
- /* stop aquistion */
- ret = usbduxfastsub_unlink_InURBs(udfs);
+ if (do_unlink && devpriv->urb) {
+ /* kill the running transfer */
+ usb_kill_urb(devpriv->urb);
+ }
- return ret;
+ return 0;
}
-/*
- * This will cancel a running acquisition operation.
- * This is called by comedi but never from inside the driver.
- */
static int usbduxfast_ai_cancel(struct comedi_device *dev,
struct comedi_subdevice *s)
{
- struct usbduxfastsub_s *udfs;
+ struct usbduxfast_private *devpriv = dev->private;
int ret;
- /* force unlink of all urbs */
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi: usbduxfast_ai_cancel\n");
-#endif
- udfs = dev->private;
- if (!udfs) {
- dev_err(dev->class_dev, "%s: udfs=NULL\n", __func__);
+ if (!devpriv)
return -EFAULT;
- }
- down(&udfs->sem);
- if (!udfs->probed) {
- up(&udfs->sem);
- return -ENODEV;
- }
- /* unlink */
- ret = usbduxfast_ai_stop(udfs, 1);
- up(&udfs->sem);
+
+ down(&devpriv->sem);
+ ret = usbduxfast_ai_stop(dev, 1);
+ up(&devpriv->sem);
return ret;
}
@@ -306,32 +232,17 @@ static int usbduxfast_ai_cancel(struct comedi_device *dev,
* analogue IN
* interrupt service routine
*/
-static void usbduxfastsub_ai_Irq(struct urb *urb)
+static void usbduxfast_ai_interrupt(struct urb *urb)
{
+ struct comedi_device *dev = urb->context;
+ struct comedi_subdevice *s = dev->read_subdev;
+ struct comedi_async *async = s->async;
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct usbduxfast_private *devpriv = dev->private;
int n, err;
- struct usbduxfastsub_s *udfs;
- struct comedi_device *this_comedidev;
- struct comedi_subdevice *s;
- /* sanity checks - is the urb there? */
- if (!urb) {
- pr_err("ao int-handler called with urb=NULL!\n");
- return;
- }
- /* the context variable points to the subdevice */
- this_comedidev = urb->context;
- if (!this_comedidev) {
- pr_err("urb context is a NULL pointer!\n");
- return;
- }
- /* the private structure of the subdevice is usbduxfastsub_s */
- udfs = this_comedidev->private;
- if (!udfs) {
- pr_err("private of comedi subdev is a NULL pointer!\n");
- return;
- }
/* are we running a command? */
- if (unlikely(!udfs->ai_cmd_running)) {
+ if (unlikely(!devpriv->ai_cmd_running)) {
/*
* not running a command
* do not continue execution if no asynchronous command
@@ -340,13 +251,6 @@ static void usbduxfastsub_ai_Irq(struct urb *urb)
return;
}
- if (unlikely(!udfs->attached)) {
- /* no comedi device there */
- return;
- }
- /* subdevice which is the AD converter */
- s = &this_comedidev->subdevices[SUBDEV_AD];
-
/* first we test if something unusual has just happened */
switch (urb->status) {
case 0:
@@ -361,189 +265,93 @@ static void usbduxfastsub_ai_Irq(struct urb *urb)
case -ESHUTDOWN:
case -ECONNABORTED:
/* tell this comedi */
- s->async->events |= COMEDI_CB_EOA;
- s->async->events |= COMEDI_CB_ERROR;
- comedi_event(udfs->comedidev, s);
+ async->events |= COMEDI_CB_EOA;
+ async->events |= COMEDI_CB_ERROR;
+ comedi_event(dev, s);
/* stop the transfer w/o unlink */
- usbduxfast_ai_stop(udfs, 0);
+ usbduxfast_ai_stop(dev, 0);
return;
default:
pr_err("non-zero urb status received in ai intr context: %d\n",
urb->status);
- s->async->events |= COMEDI_CB_EOA;
- s->async->events |= COMEDI_CB_ERROR;
- comedi_event(udfs->comedidev, s);
- usbduxfast_ai_stop(udfs, 0);
+ async->events |= COMEDI_CB_EOA;
+ async->events |= COMEDI_CB_ERROR;
+ comedi_event(dev, s);
+ usbduxfast_ai_stop(dev, 0);
return;
}
- if (!udfs->ignore) {
- if (!udfs->ai_continous) {
+ if (!devpriv->ignore) {
+ if (!devpriv->ai_continous) {
/* not continuous, fixed number of samples */
n = urb->actual_length / sizeof(uint16_t);
- if (unlikely(udfs->ai_sample_count < n)) {
- /*
- * we have send only a fraction of the bytes
- * received
- */
+ if (unlikely(devpriv->ai_sample_count < n)) {
+ unsigned int num_bytes;
+
+ /* partial sample received */
+ num_bytes = devpriv->ai_sample_count *
+ sizeof(uint16_t);
cfc_write_array_to_buffer(s,
urb->transfer_buffer,
- udfs->ai_sample_count
- * sizeof(uint16_t));
- usbduxfast_ai_stop(udfs, 0);
+ num_bytes);
+ usbduxfast_ai_stop(dev, 0);
/* tell comedi that the acquistion is over */
- s->async->events |= COMEDI_CB_EOA;
- comedi_event(udfs->comedidev, s);
+ async->events |= COMEDI_CB_EOA;
+ comedi_event(dev, s);
return;
}
- udfs->ai_sample_count -= n;
+ devpriv->ai_sample_count -= n;
}
/* write the full buffer to comedi */
err = cfc_write_array_to_buffer(s, urb->transfer_buffer,
urb->actual_length);
if (unlikely(err == 0)) {
/* buffer overflow */
- usbduxfast_ai_stop(udfs, 0);
+ usbduxfast_ai_stop(dev, 0);
return;
}
/* tell comedi that data is there */
- comedi_event(udfs->comedidev, s);
-
+ comedi_event(dev, s);
} else {
/* ignore this packet */
- udfs->ignore--;
+ devpriv->ignore--;
}
/*
* command is still running
* resubmit urb for BULK transfer
*/
- urb->dev = udfs->usbdev;
+ urb->dev = usb;
urb->status = 0;
err = usb_submit_urb(urb, GFP_ATOMIC);
if (err < 0) {
- dev_err(&urb->dev->dev,
+ dev_err(dev->class_dev,
"urb resubm failed: %d", err);
- s->async->events |= COMEDI_CB_EOA;
- s->async->events |= COMEDI_CB_ERROR;
- comedi_event(udfs->comedidev, s);
- usbduxfast_ai_stop(udfs, 0);
- }
-}
-
-static int usbduxfastsub_start(struct usbduxfastsub_s *udfs)
-{
- int ret;
- unsigned char *local_transfer_buffer;
-
- local_transfer_buffer = kmalloc(1, GFP_KERNEL);
- if (!local_transfer_buffer)
- return -ENOMEM;
-
- /* 7f92 to zero */
- *local_transfer_buffer = 0;
- /* bRequest, "Firmware" */
- ret = usb_control_msg(udfs->usbdev, usb_sndctrlpipe(udfs->usbdev, 0),
- USBDUXFASTSUB_FIRMWARE,
- VENDOR_DIR_OUT, /* bmRequestType */
- USBDUXFASTSUB_CPUCS, /* Value */
- 0x0000, /* Index */
- /* address of the transfer buffer */
- local_transfer_buffer,
- 1, /* Length */
- EZTIMEOUT); /* Timeout */
- if (ret < 0)
- dev_err(&udfs->interface->dev,
- "control msg failed (start)\n");
-
- kfree(local_transfer_buffer);
- return ret;
-}
-
-static int usbduxfastsub_stop(struct usbduxfastsub_s *udfs)
-{
- int ret;
- unsigned char *local_transfer_buffer;
-
- local_transfer_buffer = kmalloc(1, GFP_KERNEL);
- if (!local_transfer_buffer)
- return -ENOMEM;
-
- /* 7f92 to one */
- *local_transfer_buffer = 1;
- /* bRequest, "Firmware" */
- ret = usb_control_msg(udfs->usbdev, usb_sndctrlpipe(udfs->usbdev, 0),
- USBDUXFASTSUB_FIRMWARE,
- VENDOR_DIR_OUT, /* bmRequestType */
- USBDUXFASTSUB_CPUCS, /* Value */
- 0x0000, /* Index */
- local_transfer_buffer, 1, /* Length */
- EZTIMEOUT); /* Timeout */
- if (ret < 0)
- dev_err(&udfs->interface->dev,
- "control msg failed (stop)\n");
-
- kfree(local_transfer_buffer);
- return ret;
-}
-
-static int usbduxfastsub_upload(struct usbduxfastsub_s *udfs,
- unsigned char *local_transfer_buffer,
- unsigned int startAddr, unsigned int len)
-{
- int ret;
-
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi: usbduxfast: uploading %d bytes", len);
- printk(KERN_DEBUG " to addr %d, first byte=%d.\n",
- startAddr, local_transfer_buffer[0]);
-#endif
- /* brequest, firmware */
- ret = usb_control_msg(udfs->usbdev, usb_sndctrlpipe(udfs->usbdev, 0),
- USBDUXFASTSUB_FIRMWARE,
- VENDOR_DIR_OUT, /* bmRequestType */
- startAddr, /* value */
- 0x0000, /* index */
- /* our local safe buffer */
- local_transfer_buffer,
- len, /* length */
- EZTIMEOUT); /* timeout */
-
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi_: usbduxfast: result=%d\n", ret);
-#endif
-
- if (ret < 0) {
- dev_err(&udfs->interface->dev, "uppload failed\n");
- return ret;
+ async->events |= COMEDI_CB_EOA;
+ async->events |= COMEDI_CB_ERROR;
+ comedi_event(dev, s);
+ usbduxfast_ai_stop(dev, 0);
}
-
- return 0;
}
-static int usbduxfastsub_submit_InURBs(struct usbduxfastsub_s *udfs)
+static int usbduxfast_submit_urb(struct comedi_device *dev)
{
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct usbduxfast_private *devpriv = dev->private;
int ret;
- if (!udfs)
+ if (!devpriv)
return -EFAULT;
- usb_fill_bulk_urb(udfs->urbIn, udfs->usbdev,
- usb_rcvbulkpipe(udfs->usbdev, BULKINEP),
- udfs->transfer_buffer,
- SIZEINBUF, usbduxfastsub_ai_Irq, udfs->comedidev);
-
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi%d: usbduxfast: submitting in-urb: "
- "0x%p,0x%p\n", udfs->comedidev->minor, udfs->urbIn->context,
- udfs->urbIn->dev);
-#endif
- ret = usb_submit_urb(udfs->urbIn, GFP_ATOMIC);
+ usb_fill_bulk_urb(devpriv->urb, usb, usb_rcvbulkpipe(usb, BULKINEP),
+ devpriv->inbuf, SIZEINBUF,
+ usbduxfast_ai_interrupt, dev);
+
+ ret = usb_submit_urb(devpriv->urb, GFP_ATOMIC);
if (ret) {
- dev_err(&udfs->interface->dev,
- "ai: usb_submit_urb error %d\n", ret);
+ dev_err(dev->class_dev, "usb_submit_urb error %d\n", ret);
return ret;
}
return 0;
@@ -553,13 +361,9 @@ static int usbduxfast_ai_cmdtest(struct comedi_device *dev,
struct comedi_subdevice *s,
struct comedi_cmd *cmd)
{
- struct usbduxfastsub_s *udfs = dev->private;
int err = 0;
long int steps, tmp;
- int minSamplPer;
-
- if (!udfs->probed)
- return -ENODEV;
+ int min_sample_period;
/* Step 1 : check if triggers are trivially valid */
@@ -601,14 +405,14 @@ static int usbduxfast_ai_cmdtest(struct comedi_device *dev,
err |= cfc_check_trigger_arg_is(&cmd->scan_end_arg, cmd->chanlist_len);
if (cmd->chanlist_len == 1)
- minSamplPer = 1;
+ min_sample_period = 1;
else
- minSamplPer = MIN_SAMPLING_PERIOD;
+ min_sample_period = MIN_SAMPLING_PERIOD;
if (cmd->convert_src == TRIG_TIMER) {
steps = cmd->convert_arg * 30;
- if (steps < (minSamplPer * 1000))
- steps = minSamplPer * 1000;
+ if (steps < (min_sample_period * 1000))
+ steps = min_sample_period * 1000;
if (steps > (MAX_SAMPLING_PERIOD * 1000))
steps = MAX_SAMPLING_PERIOD * 1000;
@@ -650,80 +454,53 @@ static int usbduxfast_ai_inttrig(struct comedi_device *dev,
struct comedi_subdevice *s,
unsigned int trignum)
{
+ struct usbduxfast_private *devpriv = dev->private;
int ret;
- struct usbduxfastsub_s *udfs = dev->private;
- if (!udfs)
+ if (!devpriv)
return -EFAULT;
- down(&udfs->sem);
- if (!udfs->probed) {
- up(&udfs->sem);
- return -ENODEV;
- }
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi%d: usbduxfast_ai_inttrig\n", dev->minor);
-#endif
+ down(&devpriv->sem);
if (trignum != 0) {
- dev_err(dev->class_dev, "%s: invalid trignum\n", __func__);
- up(&udfs->sem);
+ dev_err(dev->class_dev, "invalid trignum\n");
+ up(&devpriv->sem);
return -EINVAL;
}
- if (!udfs->ai_cmd_running) {
- udfs->ai_cmd_running = 1;
- ret = usbduxfastsub_submit_InURBs(udfs);
+ if (!devpriv->ai_cmd_running) {
+ devpriv->ai_cmd_running = 1;
+ ret = usbduxfast_submit_urb(dev);
if (ret < 0) {
- dev_err(dev->class_dev,
- "%s: urbSubmit: err=%d\n", __func__, ret);
- udfs->ai_cmd_running = 0;
- up(&udfs->sem);
+ dev_err(dev->class_dev, "urbSubmit: err=%d\n", ret);
+ devpriv->ai_cmd_running = 0;
+ up(&devpriv->sem);
return ret;
}
s->async->inttrig = NULL;
} else {
- dev_err(dev->class_dev,
- "ai_inttrig but acqu is already running\n");
+ dev_err(dev->class_dev, "ai is already running\n");
}
- up(&udfs->sem);
+ up(&devpriv->sem);
return 1;
}
-/*
- * offsets for the GPIF bytes
- * the first byte is the command byte
- */
-#define LENBASE (1+0x00)
-#define OPBASE (1+0x08)
-#define OUTBASE (1+0x10)
-#define LOGBASE (1+0x18)
-
static int usbduxfast_ai_cmd(struct comedi_device *dev,
struct comedi_subdevice *s)
{
+ struct usbduxfast_private *devpriv = dev->private;
struct comedi_cmd *cmd = &s->async->cmd;
unsigned int chan, gain, rngmask = 0xff;
int i, j, ret;
- struct usbduxfastsub_s *udfs;
int result;
long steps, steps_tmp;
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi%d: usbduxfast_ai_cmd\n", dev->minor);
-#endif
- udfs = dev->private;
- if (!udfs)
+ if (!devpriv)
return -EFAULT;
- down(&udfs->sem);
- if (!udfs->probed) {
- up(&udfs->sem);
- return -ENODEV;
- }
- if (udfs->ai_cmd_running) {
- dev_err(dev->class_dev,
- "ai_cmd not possible. Another ai_cmd is running.\n");
- up(&udfs->sem);
+ down(&devpriv->sem);
+ if (devpriv->ai_cmd_running) {
+ dev_err(dev->class_dev, "ai_cmd not possible\n");
+ up(&devpriv->sem);
return -EBUSY;
}
/* set current channel of the running acquisition to zero */
@@ -733,7 +510,7 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
* ignore the first buffers from the device if there
* is an error condition
*/
- udfs->ignore = PACKETS_TO_IGNORE;
+ devpriv->ignore = PACKETS_TO_IGNORE;
if (cmd->chanlist_len > 0) {
gain = CR_RANGE(cmd->chanlist[0]);
@@ -741,20 +518,19 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
chan = CR_CHAN(cmd->chanlist[i]);
if (chan != i) {
dev_err(dev->class_dev,
- "cmd is accepting only consecutive channels.\n");
- up(&udfs->sem);
+ "channels are not consecutive\n");
+ up(&devpriv->sem);
return -EINVAL;
}
if ((gain != CR_RANGE(cmd->chanlist[i]))
&& (cmd->chanlist_len > 3)) {
dev_err(dev->class_dev,
- "the gain must be the same for all channels.\n");
- up(&udfs->sem);
+ "gain must be the same for all channels\n");
+ up(&devpriv->sem);
return -EINVAL;
}
if (i >= NUMCHANNELS) {
- dev_err(dev->class_dev,
- "channel list too long\n");
+ dev_err(dev->class_dev, "chanlist too long\n");
break;
}
}
@@ -762,8 +538,8 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
steps = 0;
if (cmd->scan_begin_src == TRIG_TIMER) {
dev_err(dev->class_dev,
- "scan_begin_src==TRIG_TIMER not valid.\n");
- up(&udfs->sem);
+ "scan_begin_src==TRIG_TIMER not valid\n");
+ up(&devpriv->sem);
return -EINVAL;
}
if (cmd->convert_src == TRIG_TIMER)
@@ -771,27 +547,23 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
if ((steps < MIN_SAMPLING_PERIOD) && (cmd->chanlist_len != 1)) {
dev_err(dev->class_dev,
- "ai_cmd: steps=%ld, scan_begin_arg=%d. Not properly tested by cmdtest?\n",
+ "steps=%ld, scan_begin_arg=%d. Not properly tested by cmdtest?\n",
steps, cmd->scan_begin_arg);
- up(&udfs->sem);
+ up(&devpriv->sem);
return -EINVAL;
}
if (steps > MAX_SAMPLING_PERIOD) {
- dev_err(dev->class_dev, "ai_cmd: sampling rate too low.\n");
- up(&udfs->sem);
+ dev_err(dev->class_dev, "sampling rate too low\n");
+ up(&devpriv->sem);
return -EINVAL;
}
if ((cmd->start_src == TRIG_EXT) && (cmd->chanlist_len != 1)
&& (cmd->chanlist_len != 16)) {
dev_err(dev->class_dev,
- "ai_cmd: TRIG_EXT only with 1 or 16 channels possible.\n");
- up(&udfs->sem);
+ "TRIG_EXT only with 1 or 16 channels possible\n");
+ up(&devpriv->sem);
return -EINVAL;
}
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi%d: usbduxfast: steps=%ld, convert_arg=%u\n",
- dev->minor, steps, cmd->convert_arg);
-#endif
switch (cmd->chanlist_len) {
case 1:
@@ -812,17 +584,11 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
/* we loop here until ready has been set */
if (cmd->start_src == TRIG_EXT) {
/* branch back to state 0 */
- udfs->dux_commands[LENBASE + 0] = 0x01;
/* deceision state w/o data */
- udfs->dux_commands[OPBASE + 0] = 0x01;
- udfs->dux_commands[OUTBASE + 0] = 0xFF & rngmask;
/* RDY0 = 0 */
- udfs->dux_commands[LOGBASE + 0] = 0x00;
+ usbduxfast_cmd_data(dev, 0, 0x01, 0x01, rngmask, 0x00);
} else { /* we just proceed to state 1 */
- udfs->dux_commands[LENBASE + 0] = 1;
- udfs->dux_commands[OPBASE + 0] = 0;
- udfs->dux_commands[OUTBASE + 0] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 0] = 0;
+ usbduxfast_cmd_data(dev, 0, 0x01, 0x00, rngmask, 0x00);
}
if (steps < MIN_SAMPLING_PERIOD) {
@@ -835,33 +601,25 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
*/
/* branch back to state 1 */
- udfs->dux_commands[LENBASE + 1] = 0x89;
/* deceision state with data */
- udfs->dux_commands[OPBASE + 1] = 0x03;
- udfs->dux_commands[OUTBASE + 1] =
- 0xFF & rngmask;
/* doesn't matter */
- udfs->dux_commands[LOGBASE + 1] = 0xFF;
+ usbduxfast_cmd_data(dev, 1,
+ 0x89, 0x03, rngmask, 0xff);
} else {
/*
* we loop through two states: data and delay
* max rate is 15MHz
*/
- udfs->dux_commands[LENBASE + 1] = steps - 1;
/* data */
- udfs->dux_commands[OPBASE + 1] = 0x02;
- udfs->dux_commands[OUTBASE + 1] =
- 0xFF & rngmask;
/* doesn't matter */
- udfs->dux_commands[LOGBASE + 1] = 0;
+ usbduxfast_cmd_data(dev, 1, steps - 1,
+ 0x02, rngmask, 0x00);
+
/* branch back to state 1 */
- udfs->dux_commands[LENBASE + 2] = 0x09;
/* deceision state w/o data */
- udfs->dux_commands[OPBASE + 2] = 0x01;
- udfs->dux_commands[OUTBASE + 2] =
- 0xFF & rngmask;
/* doesn't matter */
- udfs->dux_commands[LOGBASE + 2] = 0xFF;
+ usbduxfast_cmd_data(dev, 2,
+ 0x09, 0x01, rngmask, 0xff);
}
} else {
/*
@@ -873,26 +631,20 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
steps = steps - 1;
/* do the first part of the delay */
- udfs->dux_commands[LENBASE + 1] = steps / 2;
- udfs->dux_commands[OPBASE + 1] = 0;
- udfs->dux_commands[OUTBASE + 1] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 1] = 0;
+ usbduxfast_cmd_data(dev, 1,
+ steps / 2, 0x00, rngmask, 0x00);
/* and the second part */
- udfs->dux_commands[LENBASE + 2] = steps - steps / 2;
- udfs->dux_commands[OPBASE + 2] = 0;
- udfs->dux_commands[OUTBASE + 2] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 2] = 0;
+ usbduxfast_cmd_data(dev, 2, steps - steps / 2,
+ 0x00, rngmask, 0x00);
/* get the data and branch back */
/* branch back to state 1 */
- udfs->dux_commands[LENBASE + 3] = 0x09;
/* deceision state w data */
- udfs->dux_commands[OPBASE + 3] = 0x03;
- udfs->dux_commands[OUTBASE + 3] = 0xFF & rngmask;
/* doesn't matter */
- udfs->dux_commands[LOGBASE + 3] = 0xFF;
+ usbduxfast_cmd_data(dev, 3,
+ 0x09, 0x03, rngmask, 0xff);
}
break;
@@ -907,11 +659,8 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
else
rngmask = 0xff;
- udfs->dux_commands[LENBASE + 0] = 1;
/* data */
- udfs->dux_commands[OPBASE + 0] = 0x02;
- udfs->dux_commands[OUTBASE + 0] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 0] = 0;
+ usbduxfast_cmd_data(dev, 0, 0x01, 0x02, rngmask, 0x00);
/* we have 1 state with duration 1: state 0 */
steps_tmp = steps - 1;
@@ -922,23 +671,16 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
rngmask = 0xff;
/* do the first part of the delay */
- udfs->dux_commands[LENBASE + 1] = steps_tmp / 2;
- udfs->dux_commands[OPBASE + 1] = 0;
/* count */
- udfs->dux_commands[OUTBASE + 1] = 0xFE & rngmask;
- udfs->dux_commands[LOGBASE + 1] = 0;
+ usbduxfast_cmd_data(dev, 1, steps_tmp / 2,
+ 0x00, 0xfe & rngmask, 0x00);
/* and the second part */
- udfs->dux_commands[LENBASE + 2] = steps_tmp - steps_tmp / 2;
- udfs->dux_commands[OPBASE + 2] = 0;
- udfs->dux_commands[OUTBASE + 2] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 2] = 0;
+ usbduxfast_cmd_data(dev, 2, steps_tmp - steps_tmp / 2,
+ 0x00, rngmask, 0x00);
- udfs->dux_commands[LENBASE + 3] = 1;
/* data */
- udfs->dux_commands[OPBASE + 3] = 0x02;
- udfs->dux_commands[OUTBASE + 3] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 3] = 0;
+ usbduxfast_cmd_data(dev, 3, 0x01, 0x02, rngmask, 0x00);
/*
* we have 2 states with duration 1: step 6 and
@@ -952,22 +694,15 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
rngmask = 0xff;
/* do the first part of the delay */
- udfs->dux_commands[LENBASE + 4] = steps_tmp / 2;
- udfs->dux_commands[OPBASE + 4] = 0;
/* reset */
- udfs->dux_commands[OUTBASE + 4] = (0xFF - 0x02) & rngmask;
- udfs->dux_commands[LOGBASE + 4] = 0;
+ usbduxfast_cmd_data(dev, 4, steps_tmp / 2,
+ 0x00, (0xff - 0x02) & rngmask, 0x00);
/* and the second part */
- udfs->dux_commands[LENBASE + 5] = steps_tmp - steps_tmp / 2;
- udfs->dux_commands[OPBASE + 5] = 0;
- udfs->dux_commands[OUTBASE + 5] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 5] = 0;
-
- udfs->dux_commands[LENBASE + 6] = 1;
- udfs->dux_commands[OPBASE + 6] = 0;
- udfs->dux_commands[OUTBASE + 6] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 6] = 0;
+ usbduxfast_cmd_data(dev, 5, steps_tmp - steps_tmp / 2,
+ 0x00, rngmask, 0x00);
+
+ usbduxfast_cmd_data(dev, 6, 0x01, 0x00, rngmask, 0x00);
break;
case 3:
@@ -975,6 +710,8 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
* three channels
*/
for (j = 0; j < 1; j++) {
+ int index = j * 2;
+
if (CR_RANGE(cmd->chanlist[j]) > 0)
rngmask = 0xff - 0x04;
else
@@ -983,12 +720,10 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
* commit data to the FIFO and do the first part
* of the delay
*/
- udfs->dux_commands[LENBASE + j * 2] = steps / 2;
/* data */
- udfs->dux_commands[OPBASE + j * 2] = 0x02;
/* no change */
- udfs->dux_commands[OUTBASE + j * 2] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + j * 2] = 0;
+ usbduxfast_cmd_data(dev, index, steps / 2,
+ 0x02, rngmask, 0x00);
if (CR_RANGE(cmd->chanlist[j + 1]) > 0)
rngmask = 0xff - 0x04;
@@ -996,25 +731,19 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
rngmask = 0xff;
/* do the second part of the delay */
- udfs->dux_commands[LENBASE + j * 2 + 1] =
- steps - steps / 2;
/* no data */
- udfs->dux_commands[OPBASE + j * 2 + 1] = 0;
/* count */
- udfs->dux_commands[OUTBASE + j * 2 + 1] =
- 0xFE & rngmask;
- udfs->dux_commands[LOGBASE + j * 2 + 1] = 0;
+ usbduxfast_cmd_data(dev, index + 1, steps - steps / 2,
+ 0x00, 0xfe & rngmask, 0x00);
}
/* 2 steps with duration 1: the idele step and step 6: */
steps_tmp = steps - 2;
/* commit data to the FIFO and do the first part of the delay */
- udfs->dux_commands[LENBASE + 4] = steps_tmp / 2;
/* data */
- udfs->dux_commands[OPBASE + 4] = 0x02;
- udfs->dux_commands[OUTBASE + 4] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 4] = 0;
+ usbduxfast_cmd_data(dev, 4, steps_tmp / 2,
+ 0x02, rngmask, 0x00);
if (CR_RANGE(cmd->chanlist[0]) > 0)
rngmask = 0xff - 0x04;
@@ -1022,17 +751,12 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
rngmask = 0xff;
/* do the second part of the delay */
- udfs->dux_commands[LENBASE + 5] = steps_tmp - steps_tmp / 2;
/* no data */
- udfs->dux_commands[OPBASE + 5] = 0;
/* reset */
- udfs->dux_commands[OUTBASE + 5] = (0xFF - 0x02) & rngmask;
- udfs->dux_commands[LOGBASE + 5] = 0;
+ usbduxfast_cmd_data(dev, 5, steps_tmp - steps_tmp / 2,
+ 0x00, (0xff - 0x02) & rngmask, 0x00);
- udfs->dux_commands[LENBASE + 6] = 1;
- udfs->dux_commands[OPBASE + 6] = 0;
- udfs->dux_commands[OUTBASE + 6] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 6] = 0;
+ usbduxfast_cmd_data(dev, 6, 0x01, 0x00, rngmask, 0x00);
case 16:
if (CR_RANGE(cmd->chanlist[0]) > 0)
@@ -1046,101 +770,79 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
*/
/* branch back to state 0 */
- udfs->dux_commands[LENBASE + 0] = 0x01;
/* deceision state w/o data */
- udfs->dux_commands[OPBASE + 0] = 0x01;
/* reset */
- udfs->dux_commands[OUTBASE + 0] =
- (0xFF - 0x02) & rngmask;
/* RDY0 = 0 */
- udfs->dux_commands[LOGBASE + 0] = 0x00;
+ usbduxfast_cmd_data(dev, 0, 0x01, 0x01,
+ (0xff - 0x02) & rngmask, 0x00);
} else {
/*
* we just proceed to state 1
*/
/* 30us reset pulse */
- udfs->dux_commands[LENBASE + 0] = 255;
- udfs->dux_commands[OPBASE + 0] = 0;
/* reset */
- udfs->dux_commands[OUTBASE + 0] =
- (0xFF - 0x02) & rngmask;
- udfs->dux_commands[LOGBASE + 0] = 0;
+ usbduxfast_cmd_data(dev, 0, 0xff, 0x00,
+ (0xff - 0x02) & rngmask, 0x00);
}
/* commit data to the FIFO */
- udfs->dux_commands[LENBASE + 1] = 1;
/* data */
- udfs->dux_commands[OPBASE + 1] = 0x02;
- udfs->dux_commands[OUTBASE + 1] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 1] = 0;
+ usbduxfast_cmd_data(dev, 1, 0x01, 0x02, rngmask, 0x00);
/* we have 2 states with duration 1 */
steps = steps - 2;
/* do the first part of the delay */
- udfs->dux_commands[LENBASE + 2] = steps / 2;
- udfs->dux_commands[OPBASE + 2] = 0;
- udfs->dux_commands[OUTBASE + 2] = 0xFE & rngmask;
- udfs->dux_commands[LOGBASE + 2] = 0;
+ usbduxfast_cmd_data(dev, 2, steps / 2,
+ 0x00, 0xfe & rngmask, 0x00);
/* and the second part */
- udfs->dux_commands[LENBASE + 3] = steps - steps / 2;
- udfs->dux_commands[OPBASE + 3] = 0;
- udfs->dux_commands[OUTBASE + 3] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 3] = 0;
+ usbduxfast_cmd_data(dev, 3, steps - steps / 2,
+ 0x00, rngmask, 0x00);
/* branch back to state 1 */
- udfs->dux_commands[LENBASE + 4] = 0x09;
/* deceision state w/o data */
- udfs->dux_commands[OPBASE + 4] = 0x01;
- udfs->dux_commands[OUTBASE + 4] = 0xFF & rngmask;
/* doesn't matter */
- udfs->dux_commands[LOGBASE + 4] = 0xFF;
+ usbduxfast_cmd_data(dev, 4, 0x09, 0x01, rngmask, 0xff);
break;
default:
dev_err(dev->class_dev, "unsupported combination of channels\n");
- up(&udfs->sem);
+ up(&devpriv->sem);
return -EFAULT;
}
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi %d: sending commands to the usb device\n",
- dev->minor);
-#endif
/* 0 means that the AD commands are sent */
- result = send_dux_commands(udfs, SENDADCOMMANDS);
+ result = usbduxfast_send_cmd(dev, SENDADCOMMANDS);
if (result < 0) {
- dev_err(dev->class_dev,
- "adc command could not be submitted. Aborting...\n");
- up(&udfs->sem);
+ up(&devpriv->sem);
return result;
}
if (cmd->stop_src == TRIG_COUNT) {
- udfs->ai_sample_count = cmd->stop_arg * cmd->scan_end_arg;
- if (udfs->ai_sample_count < 1) {
+ devpriv->ai_sample_count = cmd->stop_arg * cmd->scan_end_arg;
+ if (devpriv->ai_sample_count < 1) {
dev_err(dev->class_dev,
- "(cmd->stop_arg)*(cmd->scan_end_arg)<1, aborting.\n");
- up(&udfs->sem);
+ "(cmd->stop_arg)*(cmd->scan_end_arg)<1, aborting\n");
+ up(&devpriv->sem);
return -EFAULT;
}
- udfs->ai_continous = 0;
+ devpriv->ai_continous = 0;
} else {
/* continous acquisition */
- udfs->ai_continous = 1;
- udfs->ai_sample_count = 0;
+ devpriv->ai_continous = 1;
+ devpriv->ai_sample_count = 0;
}
if ((cmd->start_src == TRIG_NOW) || (cmd->start_src == TRIG_EXT)) {
/* enable this acquisition operation */
- udfs->ai_cmd_running = 1;
- ret = usbduxfastsub_submit_InURBs(udfs);
+ devpriv->ai_cmd_running = 1;
+ ret = usbduxfast_submit_urb(dev);
if (ret < 0) {
- udfs->ai_cmd_running = 0;
+ devpriv->ai_cmd_running = 0;
/* fixme: unlink here?? */
- up(&udfs->sem);
+ up(&devpriv->sem);
return ret;
}
s->async->inttrig = NULL;
@@ -1152,7 +854,7 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
*/
s->async->inttrig = usbduxfast_ai_inttrig;
}
- up(&udfs->sem);
+ up(&devpriv->sem);
return 0;
}
@@ -1162,490 +864,283 @@ static int usbduxfast_ai_cmd(struct comedi_device *dev,
*/
static int usbduxfast_ai_insn_read(struct comedi_device *dev,
struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
+ struct comedi_insn *insn,
+ unsigned int *data)
{
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct usbduxfast_private *devpriv = dev->private;
+ unsigned int chan = CR_CHAN(insn->chanspec);
+ unsigned int range = CR_RANGE(insn->chanspec);
+ uint8_t rngmask = range ? (0xff - 0x04) : 0xff;
int i, j, n, actual_length;
- int chan, range, rngmask;
- int err;
- struct usbduxfastsub_s *udfs;
+ int ret;
- udfs = dev->private;
- if (!udfs) {
- dev_err(dev->class_dev, "%s: no usb dev.\n", __func__);
- return -ENODEV;
- }
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi%d: ai_insn_read, insn->n=%d, "
- "insn->subdev=%d\n", dev->minor, insn->n, insn->subdev);
-#endif
- down(&udfs->sem);
- if (!udfs->probed) {
- up(&udfs->sem);
- return -ENODEV;
- }
- if (udfs->ai_cmd_running) {
+ down(&devpriv->sem);
+
+ if (devpriv->ai_cmd_running) {
dev_err(dev->class_dev,
- "ai_insn_read not possible. Async Command is running.\n");
- up(&udfs->sem);
+ "ai_insn_read not possible, async cmd is running\n");
+ up(&devpriv->sem);
return -EBUSY;
}
- /* sample one channel */
- chan = CR_CHAN(insn->chanspec);
- range = CR_RANGE(insn->chanspec);
- /* set command for the first channel */
- if (range > 0)
- rngmask = 0xff - 0x04;
- else
- rngmask = 0xff;
+ /* set command for the first channel */
/* commit data to the FIFO */
- udfs->dux_commands[LENBASE + 0] = 1;
/* data */
- udfs->dux_commands[OPBASE + 0] = 0x02;
- udfs->dux_commands[OUTBASE + 0] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 0] = 0;
+ usbduxfast_cmd_data(dev, 0, 0x01, 0x02, rngmask, 0x00);
/* do the first part of the delay */
- udfs->dux_commands[LENBASE + 1] = 12;
- udfs->dux_commands[OPBASE + 1] = 0;
- udfs->dux_commands[OUTBASE + 1] = 0xFE & rngmask;
- udfs->dux_commands[LOGBASE + 1] = 0;
-
- udfs->dux_commands[LENBASE + 2] = 1;
- udfs->dux_commands[OPBASE + 2] = 0;
- udfs->dux_commands[OUTBASE + 2] = 0xFE & rngmask;
- udfs->dux_commands[LOGBASE + 2] = 0;
-
- udfs->dux_commands[LENBASE + 3] = 1;
- udfs->dux_commands[OPBASE + 3] = 0;
- udfs->dux_commands[OUTBASE + 3] = 0xFE & rngmask;
- udfs->dux_commands[LOGBASE + 3] = 0;
-
- udfs->dux_commands[LENBASE + 4] = 1;
- udfs->dux_commands[OPBASE + 4] = 0;
- udfs->dux_commands[OUTBASE + 4] = 0xFE & rngmask;
- udfs->dux_commands[LOGBASE + 4] = 0;
+ usbduxfast_cmd_data(dev, 1, 0x0c, 0x00, 0xfe & rngmask, 0x00);
+ usbduxfast_cmd_data(dev, 2, 0x01, 0x00, 0xfe & rngmask, 0x00);
+ usbduxfast_cmd_data(dev, 3, 0x01, 0x00, 0xfe & rngmask, 0x00);
+ usbduxfast_cmd_data(dev, 4, 0x01, 0x00, 0xfe & rngmask, 0x00);
/* second part */
- udfs->dux_commands[LENBASE + 5] = 12;
- udfs->dux_commands[OPBASE + 5] = 0;
- udfs->dux_commands[OUTBASE + 5] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 5] = 0;
-
- udfs->dux_commands[LENBASE + 6] = 1;
- udfs->dux_commands[OPBASE + 6] = 0;
- udfs->dux_commands[OUTBASE + 6] = 0xFF & rngmask;
- udfs->dux_commands[LOGBASE + 0] = 0;
-
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi %d: sending commands to the usb device\n",
- dev->minor);
-#endif
- /* 0 means that the AD commands are sent */
- err = send_dux_commands(udfs, SENDADCOMMANDS);
- if (err < 0) {
- dev_err(dev->class_dev,
- "adc command could not be submitted. Aborting...\n");
- up(&udfs->sem);
- return err;
+ usbduxfast_cmd_data(dev, 5, 0x0c, 0x00, rngmask, 0x00);
+ usbduxfast_cmd_data(dev, 6, 0x01, 0x00, rngmask, 0x00);
+
+ ret = usbduxfast_send_cmd(dev, SENDADCOMMANDS);
+ if (ret < 0) {
+ up(&devpriv->sem);
+ return ret;
}
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi%d: usbduxfast: submitting in-urb: "
- "0x%p,0x%p\n", udfs->comedidev->minor, udfs->urbIn->context,
- udfs->urbIn->dev);
-#endif
+
for (i = 0; i < PACKETS_TO_IGNORE; i++) {
- err = usb_bulk_msg(udfs->usbdev,
- usb_rcvbulkpipe(udfs->usbdev, BULKINEP),
- udfs->transfer_buffer, SIZEINBUF,
+ ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, BULKINEP),
+ devpriv->inbuf, SIZEINBUF,
&actual_length, 10000);
- if (err < 0) {
- dev_err(dev->class_dev, "insn timeout. No data.\n");
- up(&udfs->sem);
- return err;
+ if (ret < 0) {
+ dev_err(dev->class_dev, "insn timeout, no data\n");
+ up(&devpriv->sem);
+ return ret;
}
}
- /* data points */
+
for (i = 0; i < insn->n;) {
- err = usb_bulk_msg(udfs->usbdev,
- usb_rcvbulkpipe(udfs->usbdev, BULKINEP),
- udfs->transfer_buffer, SIZEINBUF,
+ ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, BULKINEP),
+ devpriv->inbuf, SIZEINBUF,
&actual_length, 10000);
- if (err < 0) {
- dev_err(dev->class_dev, "insn data error: %d\n", err);
- up(&udfs->sem);
- return err;
+ if (ret < 0) {
+ dev_err(dev->class_dev, "insn data error: %d\n", ret);
+ up(&devpriv->sem);
+ return ret;
}
n = actual_length / sizeof(uint16_t);
if ((n % 16) != 0) {
- dev_err(dev->class_dev, "insn data packet corrupted.\n");
- up(&udfs->sem);
+ dev_err(dev->class_dev, "insn data packet corrupted\n");
+ up(&devpriv->sem);
return -EINVAL;
}
for (j = chan; (j < n) && (i < insn->n); j = j + 16) {
- data[i] = ((uint16_t *) (udfs->transfer_buffer))[j];
+ data[i] = ((uint16_t *) (devpriv->inbuf))[j];
i++;
}
}
- up(&udfs->sem);
- return i;
-}
-
-#define FIRMWARE_MAX_LEN 0x2000
-
-static int firmwareUpload(struct usbduxfastsub_s *usbduxfastsub,
- const u8 *firmwareBinary, int sizeFirmware)
-{
- int ret;
- uint8_t *fwBuf;
-
- if (!firmwareBinary)
- return 0;
-
- if (sizeFirmware > FIRMWARE_MAX_LEN) {
- dev_err(&usbduxfastsub->interface->dev,
- "comedi_: usbduxfast firmware binary it too large for FX2.\n");
- return -ENOMEM;
- }
-
- /* we generate a local buffer for the firmware */
- fwBuf = kmemdup(firmwareBinary, sizeFirmware, GFP_KERNEL);
- if (!fwBuf) {
- dev_err(&usbduxfastsub->interface->dev,
- "comedi_: mem alloc for firmware failed\n");
- return -ENOMEM;
- }
-
- ret = usbduxfastsub_stop(usbduxfastsub);
- if (ret < 0) {
- dev_err(&usbduxfastsub->interface->dev,
- "comedi_: can not stop firmware\n");
- kfree(fwBuf);
- return ret;
- }
-
- ret = usbduxfastsub_upload(usbduxfastsub, fwBuf, 0, sizeFirmware);
- if (ret < 0) {
- dev_err(&usbduxfastsub->interface->dev,
- "comedi_: firmware upload failed\n");
- kfree(fwBuf);
- return ret;
- }
- ret = usbduxfastsub_start(usbduxfastsub);
- if (ret < 0) {
- dev_err(&usbduxfastsub->interface->dev,
- "comedi_: can not start firmware\n");
- kfree(fwBuf);
- return ret;
- }
- kfree(fwBuf);
- return 0;
-}
-
-static void tidy_up(struct usbduxfastsub_s *udfs)
-{
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi_: usbduxfast: tiding up\n");
-#endif
-
- if (!udfs)
- return;
- /* shows the usb subsystem that the driver is down */
- if (udfs->interface)
- usb_set_intfdata(udfs->interface, NULL);
+ up(&devpriv->sem);
- udfs->probed = 0;
-
- if (udfs->urbIn) {
- /* waits until a running transfer is over */
- usb_kill_urb(udfs->urbIn);
-
- kfree(udfs->transfer_buffer);
- udfs->transfer_buffer = NULL;
-
- usb_free_urb(udfs->urbIn);
- udfs->urbIn = NULL;
- }
-
- kfree(udfs->insnBuffer);
- udfs->insnBuffer = NULL;
-
- kfree(udfs->dux_commands);
- udfs->dux_commands = NULL;
-
- udfs->ai_cmd_running = 0;
+ return insn->n;
}
-static int usbduxfast_attach_common(struct comedi_device *dev,
- struct usbduxfastsub_s *udfs)
+static int usbduxfast_attach_common(struct comedi_device *dev)
{
- int ret;
+ struct usbduxfast_private *devpriv = dev->private;
struct comedi_subdevice *s;
+ int ret;
- down(&udfs->sem);
- /* pointer back to the corresponding comedi device */
- udfs->comedidev = dev;
+ down(&devpriv->sem);
ret = comedi_alloc_subdevices(dev, 1);
if (ret) {
- up(&udfs->sem);
+ up(&devpriv->sem);
return ret;
}
- /* private structure is also simply the usb-structure */
- dev->private = udfs;
- /* the first subdevice is the A/D converter */
- s = &dev->subdevices[SUBDEV_AD];
- /*
- * the URBs get the comedi subdevice which is responsible for reading
- * this is the subdevice which reads data
- */
+
+ /* Analog Input subdevice */
+ s = &dev->subdevices[0];
dev->read_subdev = s;
- /* the subdevice receives as private structure the usb-structure */
- s->private = NULL;
- /* analog input */
- s->type = COMEDI_SUBD_AI;
- /* readable and ref is to ground */
- s->subdev_flags = SDF_READABLE | SDF_GROUND | SDF_CMD_READ;
- /* 16 channels */
- s->n_chan = 16;
- /* length of the channellist */
- s->len_chanlist = 16;
- /* callback functions */
- s->insn_read = usbduxfast_ai_insn_read;
- s->do_cmdtest = usbduxfast_ai_cmdtest;
- s->do_cmd = usbduxfast_ai_cmd;
- s->cancel = usbduxfast_ai_cancel;
- /* max value from the A/D converter (12bit+1 bit for overflow) */
- s->maxdata = 0x1000;
- /* range table to convert to physical units */
- s->range_table = &range_usbduxfast_ai_range;
- /* finally decide that it's attached */
- udfs->attached = 1;
- up(&udfs->sem);
- dev_info(dev->class_dev, "successfully attached to usbduxfast.\n");
+ s->type = COMEDI_SUBD_AI;
+ s->subdev_flags = SDF_READABLE | SDF_GROUND | SDF_CMD_READ;
+ s->n_chan = 16;
+ s->len_chanlist = 16;
+ s->insn_read = usbduxfast_ai_insn_read;
+ s->do_cmdtest = usbduxfast_ai_cmdtest;
+ s->do_cmd = usbduxfast_ai_cmd;
+ s->cancel = usbduxfast_ai_cancel;
+ s->maxdata = 0x1000;
+ s->range_table = &range_usbduxfast_ai_range;
+
+ up(&devpriv->sem);
+
return 0;
}
-static int usbduxfast_auto_attach(struct comedi_device *dev,
- unsigned long context_unused)
+static int usbduxfast_upload_firmware(struct comedi_device *dev,
+ const u8 *data, size_t size,
+ unsigned long context)
{
- struct usb_interface *uinterf = comedi_to_usb_interface(dev);
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ uint8_t *buf;
+ unsigned char *tmp;
int ret;
- struct usbduxfastsub_s *udfs;
- dev->private = NULL;
- down(&start_stop_sem);
- udfs = usb_get_intfdata(uinterf);
- if (!udfs || !udfs->probed) {
- dev_err(dev->class_dev,
- "usbduxfast: error: auto_attach failed, not connected\n");
- ret = -ENODEV;
- } else if (udfs->attached) {
- dev_err(dev->class_dev,
- "usbduxfast: error: auto_attach failed, already attached\n");
- ret = -ENODEV;
- } else
- ret = usbduxfast_attach_common(dev, udfs);
- up(&start_stop_sem);
- return ret;
-}
+ if (!data)
+ return 0;
-static void usbduxfast_detach(struct comedi_device *dev)
-{
- struct usbduxfastsub_s *usb = dev->private;
-
- if (usb) {
- down(&usb->sem);
- down(&start_stop_sem);
- dev->private = NULL;
- usb->attached = 0;
- usb->comedidev = NULL;
- up(&start_stop_sem);
- up(&usb->sem);
+ if (size > FIRMWARE_MAX_LEN) {
+ dev_err(dev->class_dev, "firmware binary too large for FX2\n");
+ return -ENOMEM;
}
-}
-
-static struct comedi_driver usbduxfast_driver = {
- .driver_name = "usbduxfast",
- .module = THIS_MODULE,
- .auto_attach = usbduxfast_auto_attach,
- .detach = usbduxfast_detach,
-};
-static void usbduxfast_firmware_request_complete_handler(const struct firmware
- *fw, void *context)
-{
- struct usbduxfastsub_s *usbduxfastsub_tmp = context;
- struct usb_interface *uinterf = usbduxfastsub_tmp->interface;
- int ret;
+ /* we generate a local buffer for the firmware */
+ buf = kmemdup(data, size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
- if (fw == NULL)
- return;
+ /* we need a malloc'ed buffer for usb_control_msg() */
+ tmp = kmalloc(1, GFP_KERNEL);
+ if (!tmp) {
+ kfree(buf);
+ return -ENOMEM;
+ }
- /*
- * we need to upload the firmware here because fw will be
- * freed once we've left this function
- */
- ret = firmwareUpload(usbduxfastsub_tmp, fw->data, fw->size);
+ /* stop the current firmware on the device */
+ *tmp = 1; /* 7f92 to one */
+ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0),
+ USBDUXFASTSUB_FIRMWARE,
+ VENDOR_DIR_OUT,
+ USBDUXFASTSUB_CPUCS, 0x0000,
+ tmp, 1,
+ EZTIMEOUT);
+ if (ret < 0) {
+ dev_err(dev->class_dev, "can not stop firmware\n");
+ goto done;
+ }
- if (ret) {
- dev_err(&uinterf->dev,
- "Could not upload firmware (err=%d)\n", ret);
- goto out;
+ /* upload the new firmware to the device */
+ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0),
+ USBDUXFASTSUB_FIRMWARE,
+ VENDOR_DIR_OUT,
+ 0, 0x0000,
+ buf, size,
+ EZTIMEOUT);
+ if (ret < 0) {
+ dev_err(dev->class_dev, "firmware upload failed\n");
+ goto done;
}
- comedi_usb_auto_config(uinterf, &usbduxfast_driver, 0);
- out:
- release_firmware(fw);
+ /* start the new firmware on the device */
+ *tmp = 0; /* 7f92 to zero */
+ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0),
+ USBDUXFASTSUB_FIRMWARE,
+ VENDOR_DIR_OUT,
+ USBDUXFASTSUB_CPUCS, 0x0000,
+ tmp, 1,
+ EZTIMEOUT);
+ if (ret < 0)
+ dev_err(dev->class_dev, "can not start firmware\n");
+
+done:
+ kfree(tmp);
+ kfree(buf);
+ return ret;
}
-static int usbduxfast_usb_probe(struct usb_interface *uinterf,
- const struct usb_device_id *id)
+static int usbduxfast_auto_attach(struct comedi_device *dev,
+ unsigned long context_unused)
{
- struct usb_device *udev = interface_to_usbdev(uinterf);
- int i;
- int index;
+ struct usb_interface *intf = comedi_to_usb_interface(dev);
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct usbduxfast_private *devpriv;
int ret;
- if (udev->speed != USB_SPEED_HIGH) {
- dev_err(&uinterf->dev,
+ if (usb->speed != USB_SPEED_HIGH) {
+ dev_err(dev->class_dev,
"This driver needs USB 2.0 to operate. Aborting...\n");
return -ENODEV;
}
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi_: usbduxfast_: finding a free structure for "
- "the usb-device\n");
-#endif
- down(&start_stop_sem);
- /* look for a free place in the usbduxfast array */
- index = -1;
- for (i = 0; i < NUMUSBDUXFAST; i++) {
- if (!usbduxfastsub[i].probed) {
- index = i;
- break;
- }
- }
- /* no more space */
- if (index == -1) {
- dev_err(&uinterf->dev,
- "Too many usbduxfast-devices connected.\n");
- up(&start_stop_sem);
- return -EMFILE;
- }
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi_: usbduxfast: usbduxfastsub[%d] is ready to "
- "connect to comedi.\n", index);
-#endif
-
- sema_init(&(usbduxfastsub[index].sem), 1);
- /* save a pointer to the usb device */
- usbduxfastsub[index].usbdev = udev;
-
- /* save the interface itself */
- usbduxfastsub[index].interface = uinterf;
- /* get the interface number from the interface */
- usbduxfastsub[index].ifnum = uinterf->altsetting->desc.bInterfaceNumber;
- /*
- * hand the private data over to the usb subsystem
- * will be needed for disconnect
- */
- usb_set_intfdata(uinterf, &(usbduxfastsub[index]));
-
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi_: usbduxfast: ifnum=%d\n",
- usbduxfastsub[index].ifnum);
-#endif
- /* create space for the commands going to the usb device */
- usbduxfastsub[index].dux_commands = kmalloc(SIZEOFDUXBUFFER,
- GFP_KERNEL);
- if (!usbduxfastsub[index].dux_commands) {
- tidy_up(&(usbduxfastsub[index]));
- up(&start_stop_sem);
+ devpriv = kzalloc(sizeof(*devpriv), GFP_KERNEL);
+ if (!devpriv)
return -ENOMEM;
- }
- /* create space of the instruction buffer */
- usbduxfastsub[index].insnBuffer = kmalloc(SIZEINSNBUF, GFP_KERNEL);
- if (!usbduxfastsub[index].insnBuffer) {
- tidy_up(&(usbduxfastsub[index]));
- up(&start_stop_sem);
+ dev->private = devpriv;
+
+ sema_init(&devpriv->sem, 1);
+ usb_set_intfdata(intf, devpriv);
+
+ devpriv->duxbuf = kmalloc(SIZEOFDUXBUF, GFP_KERNEL);
+ if (!devpriv->duxbuf)
return -ENOMEM;
- }
- /* setting to alternate setting 1: enabling bulk ep */
- i = usb_set_interface(usbduxfastsub[index].usbdev,
- usbduxfastsub[index].ifnum, 1);
- if (i < 0) {
- dev_err(&uinterf->dev,
- "usbduxfast%d: could not switch to alternate setting 1.\n",
- index);
- tidy_up(&(usbduxfastsub[index]));
- up(&start_stop_sem);
+
+ ret = usb_set_interface(usb,
+ intf->altsetting->desc.bInterfaceNumber, 1);
+ if (ret < 0) {
+ dev_err(dev->class_dev,
+ "could not switch to alternate setting 1\n");
return -ENODEV;
}
- usbduxfastsub[index].urbIn = usb_alloc_urb(0, GFP_KERNEL);
- if (!usbduxfastsub[index].urbIn) {
- dev_err(&uinterf->dev,
- "usbduxfast%d: Could not alloc. urb\n", index);
- tidy_up(&(usbduxfastsub[index]));
- up(&start_stop_sem);
+
+ devpriv->urb = usb_alloc_urb(0, GFP_KERNEL);
+ if (!devpriv->urb) {
+ dev_err(dev->class_dev, "Could not alloc. urb\n");
return -ENOMEM;
}
- usbduxfastsub[index].transfer_buffer = kmalloc(SIZEINBUF, GFP_KERNEL);
- if (!usbduxfastsub[index].transfer_buffer) {
- tidy_up(&(usbduxfastsub[index]));
- up(&start_stop_sem);
+
+ devpriv->inbuf = kmalloc(SIZEINBUF, GFP_KERNEL);
+ if (!devpriv->inbuf)
return -ENOMEM;
- }
- /* we've reached the bottom of the function */
- usbduxfastsub[index].probed = 1;
- up(&start_stop_sem);
-
- ret = request_firmware_nowait(THIS_MODULE,
- FW_ACTION_HOTPLUG,
- FIRMWARE,
- &udev->dev,
- GFP_KERNEL,
- usbduxfastsub + index,
- usbduxfast_firmware_request_complete_handler);
- if (ret) {
- dev_err(&uinterf->dev, "could not load firmware (err=%d)\n", ret);
+ ret = comedi_load_firmware(dev, &usb->dev, FIRMWARE,
+ usbduxfast_upload_firmware, 0);
+ if (ret)
return ret;
- }
- dev_info(&uinterf->dev,
- "usbduxfast%d has been successfully initialized.\n", index);
- /* success */
- return 0;
+ return usbduxfast_attach_common(dev);
}
-static void usbduxfast_usb_disconnect(struct usb_interface *intf)
+static void usbduxfast_detach(struct comedi_device *dev)
{
- struct usbduxfastsub_s *udfs = usb_get_intfdata(intf);
- struct usb_device *udev = interface_to_usbdev(intf);
+ struct usb_interface *intf = comedi_to_usb_interface(dev);
+ struct usbduxfast_private *devpriv = dev->private;
- if (!udfs) {
- dev_err(&intf->dev, "disconnect called with null pointer.\n");
- return;
- }
- if (udfs->usbdev != udev) {
- dev_err(&intf->dev, "BUG! called with wrong ptr!!!\n");
+ if (!devpriv)
return;
+
+ down(&devpriv->sem);
+
+ usb_set_intfdata(intf, NULL);
+
+ if (devpriv->urb) {
+ /* waits until a running transfer is over */
+ usb_kill_urb(devpriv->urb);
+
+ kfree(devpriv->inbuf);
+ devpriv->inbuf = NULL;
+
+ usb_free_urb(devpriv->urb);
+ devpriv->urb = NULL;
}
- comedi_usb_auto_unconfig(intf);
+ kfree(devpriv->duxbuf);
+ devpriv->duxbuf = NULL;
- down(&start_stop_sem);
- down(&udfs->sem);
- tidy_up(udfs);
- up(&udfs->sem);
- up(&start_stop_sem);
+ devpriv->ai_cmd_running = 0;
-#ifdef CONFIG_COMEDI_DEBUG
- printk(KERN_DEBUG "comedi_: usbduxfast: disconnected from the usb\n");
-#endif
+ up(&devpriv->sem);
+}
+
+static struct comedi_driver usbduxfast_driver = {
+ .driver_name = "usbduxfast",
+ .module = THIS_MODULE,
+ .auto_attach = usbduxfast_auto_attach,
+ .detach = usbduxfast_detach,
+};
+
+static int usbduxfast_usb_probe(struct usb_interface *intf,
+ const struct usb_device_id *id)
+{
+ return comedi_usb_auto_config(intf, &usbduxfast_driver, 0);
}
static const struct usb_device_id usbduxfast_usb_table[] = {
@@ -1657,12 +1152,9 @@ static const struct usb_device_id usbduxfast_usb_table[] = {
MODULE_DEVICE_TABLE(usb, usbduxfast_usb_table);
static struct usb_driver usbduxfast_usb_driver = {
-#ifdef COMEDI_HAVE_USB_DRIVER_OWNER
- .owner = THIS_MODULE,
-#endif
.name = "usbduxfast",
.probe = usbduxfast_usb_probe,
- .disconnect = usbduxfast_usb_disconnect,
+ .disconnect = comedi_usb_auto_unconfig,
.id_table = usbduxfast_usb_table,
};
module_comedi_usb_driver(usbduxfast_driver, usbduxfast_usb_driver);
diff --git a/drivers/staging/comedi/drivers/usbduxsigma.c b/drivers/staging/comedi/drivers/usbduxsigma.c
index d3bc1b9910a7..898c3c450406 100644
--- a/drivers/staging/comedi/drivers/usbduxsigma.c
+++ b/drivers/staging/comedi/drivers/usbduxsigma.c
@@ -1,30 +1,27 @@
/*
- comedi/drivers/usbdux.c
- Copyright (C) 2011 Bernd Porr, Bernd.Porr@f2s.com
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
+ * usbduxsigma.c
+ * Copyright (C) 2011 Bernd Porr, Bernd.Porr@f2s.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
*/
+
/*
-Driver: usbduxsigma
-Description: University of Stirling USB DAQ & INCITE Technology Limited
-Devices: [ITL] USB-DUX (usbduxsigma.o)
-Author: Bernd Porr <BerndPorr@f2s.com>
-Updated: 8 Nov 2011
-Status: testing
-*/
+ * Driver: usbduxsigma
+ * Description: University of Stirling USB DAQ & INCITE Technology Limited
+ * Devices: (ITL) USB-DUX [usbduxsigma]
+ * Author: Bernd Porr <BerndPorr@f2s.com>
+ * Updated: 8 Nov 2011
+ * Status: testing
+ */
+
/*
* I must give credit here to Chris Baugher who
* wrote the driver for AT-MIO-16d. I used some parts of this
@@ -44,9 +41,6 @@ Status: testing
* 0.6: corrected wrong input range
*/
-/* generates loads of debug info */
-/* #define NOISY_DUX_DEBUGBUG */
-
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -55,7 +49,7 @@ Status: testing
#include <linux/usb.h>
#include <linux/fcntl.h>
#include <linux/compiler.h>
-#include <linux/firmware.h>
+
#include "comedi_fc.h"
#include "../comedidev.h"
@@ -63,38 +57,21 @@ Status: testing
#define BULK_TIMEOUT 1000
/* constants for "firmware" upload and download */
-#define FIRMWARE "usbduxsigma_firmware.bin"
-#define USBDUXSUB_FIRMWARE 0xA0
-#define VENDOR_DIR_IN 0xC0
-#define VENDOR_DIR_OUT 0x40
+#define FIRMWARE "usbduxsigma_firmware.bin"
+#define FIRMWARE_MAX_LEN 0x4000
+#define USBDUXSUB_FIRMWARE 0xa0
+#define VENDOR_DIR_IN 0xc0
+#define VENDOR_DIR_OUT 0x40
/* internal addresses of the 8051 processor */
#define USBDUXSUB_CPUCS 0xE600
-/*
- * the minor device number, major is 180 only for debugging purposes and to
- * upload special firmware (programming the eeprom etc) which is not
- * compatible with the comedi framwork
- */
-#define USBDUXSUB_MINOR 32
-
-/* max lenghth of the transfer-buffer for software upload */
-#define TB_LEN 0x2000
-
-/* Input endpoint number: ISO/IRQ */
-#define ISOINEP 6
-
-/* Output endpoint number: ISO/IRQ */
-#define ISOOUTEP 2
-
-/* This EP sends DUX commands to USBDUX */
-#define COMMAND_OUT_EP 1
-
-/* This EP receives the DUX commands from USBDUX */
-#define COMMAND_IN_EP 8
-
-/* Output endpoint for PWM */
-#define PWM_EP 4
+/* USB endpoints */
+#define USBDUXSIGMA_CMD_OUT_EP 1 /* command output */
+#define USBDUXSIGMA_ISO_OUT_EP 2 /* analog output ISO/IRQ */
+#define USBDUXSIGMA_PWM_OUT_EP 4 /* pwm output */
+#define USBDUXSIGMA_ISO_IN_EP 6 /* analog input ISO/IRQ */
+#define USBDUXSIGMA_CMD_IN_EP 8 /* command input */
/* 300Hz max frequ under PWM */
#define MIN_PWM_PERIOD ((long)(1E9/300))
@@ -105,6 +82,8 @@ Status: testing
/* Number of channels (16 AD and offset)*/
#define NUMCHANNELS 16
+#define USBDUXSIGMA_NUM_AO_CHAN 4
+
/* Size of one A/D value */
#define SIZEADIN ((sizeof(int32_t)))
@@ -150,84 +129,54 @@ Status: testing
/* must have more buffers due to buggy USB ctr */
#define NUMOFOUTBUFFERSHIGH 10
-/* Total number of usbdux devices */
-#define NUMUSBDUX 16
-
-/* Analogue in subdevice */
-#define SUBDEV_AD 0
-
-/* Analogue out subdevice */
-#define SUBDEV_DA 1
-
-/* Digital I/O */
-#define SUBDEV_DIO 2
-
-/* timer aka pwm output */
-#define SUBDEV_PWM 3
-
/* number of retries to get the right dux command */
#define RETRIES 10
-/**************************************************/
-/* comedi constants */
-static const struct comedi_lrange range_usbdux_ai_range = { 1, {
- BIP_RANGE
- (2.65/2.0)
- }
-};
+/* bulk transfer commands to usbduxsigma */
+#define USBBUXSIGMA_AD_CMD 0
+#define USBDUXSIGMA_DA_CMD 1
+#define USBDUXSIGMA_DIO_CFG_CMD 2
+#define USBDUXSIGMA_DIO_BITS_CMD 3
+#define USBDUXSIGMA_SINGLE_AD_CMD 4
+#define USBDUXSIGMA_PWM_ON_CMD 7
+#define USBDUXSIGMA_PWM_OFF_CMD 8
-/*
- * private structure of one subdevice
- */
+static const struct comedi_lrange usbduxsigma_ai_range = {
+ 1, {
+ BIP_RANGE(2.65 / 2.0)
+ }
+};
-/*
- * This is the structure which holds all the data of
- * this driver one sub device just now: A/D
- */
-struct usbduxsub {
- /* attached? */
- int attached;
- /* is it associated with a subdevice? */
- int probed;
- /* pointer to the usb-device */
- struct usb_device *usbdev;
+struct usbduxsigma_private {
/* actual number of in-buffers */
- int numOfInBuffers;
+ int n_ai_urbs;
/* actual number of out-buffers */
- int numOfOutBuffers;
+ int n_ao_urbs;
/* ISO-transfer handling: buffers */
- struct urb **urbIn;
- struct urb **urbOut;
+ struct urb **ai_urbs;
+ struct urb **ao_urbs;
/* pwm-transfer handling */
- struct urb *urbPwm;
+ struct urb *pwm_urb;
/* PWM period */
- unsigned int pwmPeriod;
+ unsigned int pwm_period;
/* PWM internal delay for the GPIF in the FX2 */
- uint8_t pwmDelay;
+ uint8_t pwm_delay;
/* size of the PWM buffer which holds the bit pattern */
- int sizePwmBuf;
+ int pwm_buf_sz;
/* input buffer for the ISO-transfer */
- int32_t *inBuffer;
+ int32_t *in_buf;
/* input buffer for single insn */
- int8_t *insnBuffer;
- /* output buffer for single DA outputs */
- int16_t *outBuffer;
- /* interface number */
- int ifnum;
- /* interface structure in 2.6 */
- struct usb_interface *interface;
- /* comedi device for the interrupt context */
- struct comedi_device *comedidev;
- /* is it USB_SPEED_HIGH or not? */
- short int high_speed;
- /* asynchronous command is running */
- short int ai_cmd_running;
- short int ao_cmd_running;
- /* pwm is running */
- short int pwm_cmd_running;
- /* continuous acquisition */
- short int ai_continuous;
- short int ao_continuous;
+ int8_t *insn_buf;
+
+ unsigned int ao_readback[USBDUXSIGMA_NUM_AO_CHAN];
+
+ unsigned high_speed:1;
+ unsigned ai_cmd_running:1;
+ unsigned ai_continuous:1;
+ unsigned ao_cmd_running:1;
+ unsigned ao_continuous:1;
+ unsigned pwm_cmd_running:1;
+
/* number of samples to acquire */
int ai_sample_count;
int ao_sample_count;
@@ -246,126 +195,58 @@ struct usbduxsub {
struct semaphore sem;
};
-/*
- * The pointer to the private usb-data of the driver is also the private data
- * for the comedi-device. This has to be global as the usb subsystem needs
- * global variables. The other reason is that this structure must be there
- * _before_ any comedi command is issued. The usb subsystem must be initialised
- * before comedi can access it.
- */
-static struct usbduxsub usbduxsub[NUMUSBDUX];
-
-static DEFINE_SEMAPHORE(start_stop_sem);
-
-/*
- * Stops the data acquision
- * It should be safe to call this function from any context
- */
-static int usbduxsub_unlink_InURBs(struct usbduxsub *usbduxsub_tmp)
+static void usbduxsigma_ai_stop(struct comedi_device *dev, int do_unlink)
{
- int i = 0;
- int err = 0;
-
- if (usbduxsub_tmp && usbduxsub_tmp->urbIn) {
- for (i = 0; i < usbduxsub_tmp->numOfInBuffers; i++) {
- if (usbduxsub_tmp->urbIn[i]) {
- /* We wait here until all transfers have been
- * cancelled. */
- usb_kill_urb(usbduxsub_tmp->urbIn[i]);
- }
- dev_dbg(&usbduxsub_tmp->interface->dev,
- "comedi: usbdux: unlinked InURB %d, err=%d\n",
- i, err);
- }
- }
- return err;
-}
-
-/*
- * This will stop a running acquisition operation
- * Is called from within this driver from both the
- * interrupt context and from comedi
- */
-static int usbdux_ai_stop(struct usbduxsub *this_usbduxsub, int do_unlink)
-{
- int ret = 0;
-
- if (!this_usbduxsub) {
- pr_err("comedi?: usbdux_ai_stop: this_usbduxsub=NULL!\n");
- return -EFAULT;
- }
- dev_dbg(&this_usbduxsub->interface->dev, "comedi: usbdux_ai_stop\n");
+ struct usbduxsigma_private *devpriv = dev->private;
if (do_unlink) {
- /* stop aquistion */
- ret = usbduxsub_unlink_InURBs(this_usbduxsub);
- }
+ int i;
- this_usbduxsub->ai_cmd_running = 0;
+ for (i = 0; i < devpriv->n_ai_urbs; i++) {
+ if (devpriv->ai_urbs[i])
+ usb_kill_urb(devpriv->ai_urbs[i]);
+ }
+ }
- return ret;
+ devpriv->ai_cmd_running = 0;
}
-/*
- * This will cancel a running acquisition operation.
- * This is called by comedi but never from inside the driver.
- */
-static int usbdux_ai_cancel(struct comedi_device *dev,
- struct comedi_subdevice *s)
+static int usbduxsigma_ai_cancel(struct comedi_device *dev,
+ struct comedi_subdevice *s)
{
- struct usbduxsub *this_usbduxsub;
- int res = 0;
-
- /* force unlink of all urbs */
- this_usbduxsub = dev->private;
- if (!this_usbduxsub)
- return -EFAULT;
+ struct usbduxsigma_private *devpriv = dev->private;
- dev_dbg(&this_usbduxsub->interface->dev, "comedi: usbdux_ai_cancel\n");
+ down(&devpriv->sem);
+ /* unlink only if it is really running */
+ usbduxsigma_ai_stop(dev, devpriv->ai_cmd_running);
+ up(&devpriv->sem);
- /* prevent other CPUs from submitting new commands just now */
- down(&this_usbduxsub->sem);
- if (!(this_usbduxsub->probed)) {
- up(&this_usbduxsub->sem);
- return -ENODEV;
- }
- /* unlink only if the urb really has been submitted */
- res = usbdux_ai_stop(this_usbduxsub, this_usbduxsub->ai_cmd_running);
- up(&this_usbduxsub->sem);
- return res;
+ return 0;
}
-/* analogue IN - interrupt service routine */
-static void usbduxsub_ai_IsocIrq(struct urb *urb)
+static void usbduxsigma_ai_urb_complete(struct urb *urb)
{
- int i, err, n;
- struct usbduxsub *this_usbduxsub;
- struct comedi_device *this_comedidev;
- struct comedi_subdevice *s;
- int32_t v;
+ struct comedi_device *dev = urb->context;
+ struct usbduxsigma_private *devpriv = dev->private;
+ struct comedi_subdevice *s = dev->read_subdev;
unsigned int dio_state;
-
- /* the context variable points to the comedi device */
- this_comedidev = urb->context;
- /* the private structure of the subdevice is struct usbduxsub */
- this_usbduxsub = this_comedidev->private;
- /* subdevice which is the AD converter */
- s = &this_comedidev->subdevices[SUBDEV_AD];
+ int32_t val;
+ int ret;
+ int i;
/* first we test if something unusual has just happened */
switch (urb->status) {
case 0:
/* copy the result in the transfer buffer */
- memcpy(this_usbduxsub->inBuffer,
- urb->transfer_buffer, SIZEINBUF);
+ memcpy(devpriv->in_buf, urb->transfer_buffer, SIZEINBUF);
break;
case -EILSEQ:
- /* error in the ISOchronous data */
- /* we don't copy the data into the transfer buffer */
- /* and recycle the last data byte */
- dev_dbg(&urb->dev->dev,
- "comedi%d: usbdux: CRC error in ISO IN stream.\n",
- this_usbduxsub->comedidev->minor);
+ /*
+ * error in the ISOchronous data
+ * we don't copy the data into the transfer buffer
+ * and recycle the last data byte
+ */
+ dev_dbg(dev->class_dev, "CRC error in ISO IN stream\n");
break;
@@ -374,185 +255,127 @@ static void usbduxsub_ai_IsocIrq(struct urb *urb)
case -ESHUTDOWN:
case -ECONNABORTED:
/* happens after an unlink command */
- if (this_usbduxsub->ai_cmd_running) {
- /* we are still running a command */
- /* tell this comedi */
- s->async->events |= COMEDI_CB_EOA;
- s->async->events |= COMEDI_CB_ERROR;
- comedi_event(this_usbduxsub->comedidev, s);
- /* stop the transfer w/o unlink */
- usbdux_ai_stop(this_usbduxsub, 0);
+ if (devpriv->ai_cmd_running) {
+ usbduxsigma_ai_stop(dev, 0); /* w/o unlink */
+ /* we are still running a command, tell comedi */
+ s->async->events |= (COMEDI_CB_EOA | COMEDI_CB_ERROR);
+ comedi_event(dev, s);
}
return;
default:
- /* a real error on the bus */
- /* pass error to comedi if we are really running a command */
- if (this_usbduxsub->ai_cmd_running) {
- dev_err(&urb->dev->dev,
- "Non-zero urb status received in ai intr "
- "context: %d\n", urb->status);
- s->async->events |= COMEDI_CB_EOA;
- s->async->events |= COMEDI_CB_ERROR;
- comedi_event(this_usbduxsub->comedidev, s);
- /* don't do an unlink here */
- usbdux_ai_stop(this_usbduxsub, 0);
+ /*
+ * a real error on the bus
+ * pass error to comedi if we are really running a command
+ */
+ if (devpriv->ai_cmd_running) {
+ dev_err(dev->class_dev,
+ "%s: non-zero urb status (%d)\n",
+ __func__, urb->status);
+ usbduxsigma_ai_stop(dev, 0); /* w/o unlink */
+ s->async->events |= (COMEDI_CB_EOA | COMEDI_CB_ERROR);
+ comedi_event(dev, s);
}
return;
}
- /*
- * at this point we are reasonably sure that nothing dodgy has happened
- * are we running a command?
- */
- if (unlikely((!(this_usbduxsub->ai_cmd_running)))) {
- /*
- * not running a command, do not continue execution if no
- * asynchronous command is running in particular not resubmit
- */
+ if (unlikely(!devpriv->ai_cmd_running))
return;
- }
- urb->dev = this_usbduxsub->usbdev;
-
- /* resubmit the urb */
- err = usb_submit_urb(urb, GFP_ATOMIC);
- if (unlikely(err < 0)) {
- dev_err(&urb->dev->dev,
- "comedi_: urb resubmit failed in int-context!"
- "err=%d\n",
- err);
- if (err == -EL2NSYNC)
- dev_err(&urb->dev->dev,
- "buggy USB host controller or bug in IRQ "
- "handler!\n");
- s->async->events |= COMEDI_CB_EOA;
- s->async->events |= COMEDI_CB_ERROR;
- comedi_event(this_usbduxsub->comedidev, s);
- /* don't do an unlink here */
- usbdux_ai_stop(this_usbduxsub, 0);
+ urb->dev = comedi_to_usb_dev(dev);
+
+ ret = usb_submit_urb(urb, GFP_ATOMIC);
+ if (unlikely(ret < 0)) {
+ dev_err(dev->class_dev, "%s: urb resubmit failed (%d)\n",
+ __func__, ret);
+ if (ret == -EL2NSYNC)
+ dev_err(dev->class_dev,
+ "buggy USB host controller or bug in IRQ handler\n");
+ usbduxsigma_ai_stop(dev, 0); /* w/o unlink */
+ s->async->events |= (COMEDI_CB_EOA | COMEDI_CB_ERROR);
+ comedi_event(dev, s);
return;
}
/* get the state of the dio pins to allow external trigger */
- dio_state = be32_to_cpu(this_usbduxsub->inBuffer[0]);
+ dio_state = be32_to_cpu(devpriv->in_buf[0]);
- this_usbduxsub->ai_counter--;
- if (likely(this_usbduxsub->ai_counter > 0))
+ devpriv->ai_counter--;
+ if (likely(devpriv->ai_counter > 0))
return;
/* timer zero, transfer measurements to comedi */
- this_usbduxsub->ai_counter = this_usbduxsub->ai_timer;
+ devpriv->ai_counter = devpriv->ai_timer;
- /* test, if we transmit only a fixed number of samples */
- if (!(this_usbduxsub->ai_continuous)) {
+ if (!devpriv->ai_continuous) {
/* not continuous, fixed number of samples */
- this_usbduxsub->ai_sample_count--;
- /* all samples received? */
- if (this_usbduxsub->ai_sample_count < 0) {
- /* prevent a resubmit next time */
- usbdux_ai_stop(this_usbduxsub, 0);
- /* say comedi that the acquistion is over */
+ devpriv->ai_sample_count--;
+ if (devpriv->ai_sample_count < 0) {
+ usbduxsigma_ai_stop(dev, 0); /* w/o unlink */
+ /* acquistion is over, tell comedi */
s->async->events |= COMEDI_CB_EOA;
- comedi_event(this_usbduxsub->comedidev, s);
+ comedi_event(dev, s);
return;
}
}
+
/* get the data from the USB bus and hand it over to comedi */
- n = s->async->cmd.chanlist_len;
- for (i = 0; i < n; i++) {
+ for (i = 0; i < s->async->cmd.chanlist_len; i++) {
/* transfer data, note first byte is the DIO state */
- v = be32_to_cpu(this_usbduxsub->inBuffer[i+1]);
- /* strip status byte */
- v = v & 0x00ffffff;
- /* convert to unsigned */
- v = v ^ 0x00800000;
- /* write the byte to the buffer */
- err = cfc_write_array_to_buffer(s, &v, sizeof(uint32_t));
- if (unlikely(err == 0)) {
+ val = be32_to_cpu(devpriv->in_buf[i+1]);
+ val &= 0x00ffffff; /* strip status byte */
+ val ^= 0x00800000; /* convert to unsigned */
+
+ ret = cfc_write_array_to_buffer(s, &val, sizeof(uint32_t));
+ if (unlikely(ret == 0)) {
/* buffer overflow */
- usbdux_ai_stop(this_usbduxsub, 0);
+ usbduxsigma_ai_stop(dev, 0); /* w/o unlink */
return;
}
}
/* tell comedi that data is there */
- s->async->events |= COMEDI_CB_BLOCK | COMEDI_CB_EOS;
- comedi_event(this_usbduxsub->comedidev, s);
+ s->async->events |= (COMEDI_CB_BLOCK | COMEDI_CB_EOS);
+ comedi_event(dev, s);
}
-static int usbduxsub_unlink_OutURBs(struct usbduxsub *usbduxsub_tmp)
+static void usbduxsigma_ao_stop(struct comedi_device *dev, int do_unlink)
{
- int i = 0;
- int err = 0;
+ struct usbduxsigma_private *devpriv = dev->private;
- if (usbduxsub_tmp && usbduxsub_tmp->urbOut) {
- for (i = 0; i < usbduxsub_tmp->numOfOutBuffers; i++) {
- if (usbduxsub_tmp->urbOut[i])
- usb_kill_urb(usbduxsub_tmp->urbOut[i]);
+ if (do_unlink) {
+ int i;
- dev_dbg(&usbduxsub_tmp->interface->dev,
- "comedi: usbdux: unlinked OutURB %d: res=%d\n",
- i, err);
+ for (i = 0; i < devpriv->n_ao_urbs; i++) {
+ if (devpriv->ao_urbs[i])
+ usb_kill_urb(devpriv->ao_urbs[i]);
}
}
- return err;
-}
-/* This will cancel a running acquisition operation
- * in any context.
- */
-static int usbdux_ao_stop(struct usbduxsub *this_usbduxsub, int do_unlink)
-{
- int ret = 0;
-
- if (!this_usbduxsub)
- return -EFAULT;
- dev_dbg(&this_usbduxsub->interface->dev, "comedi: usbdux_ao_cancel\n");
-
- if (do_unlink)
- ret = usbduxsub_unlink_OutURBs(this_usbduxsub);
-
- this_usbduxsub->ao_cmd_running = 0;
-
- return ret;
+ devpriv->ao_cmd_running = 0;
}
-/* force unlink, is called by comedi */
-static int usbdux_ao_cancel(struct comedi_device *dev,
- struct comedi_subdevice *s)
+static int usbduxsigma_ao_cancel(struct comedi_device *dev,
+ struct comedi_subdevice *s)
{
- struct usbduxsub *this_usbduxsub = dev->private;
- int res = 0;
+ struct usbduxsigma_private *devpriv = dev->private;
- if (!this_usbduxsub)
- return -EFAULT;
-
- /* prevent other CPUs from submitting a command just now */
- down(&this_usbduxsub->sem);
- if (!(this_usbduxsub->probed)) {
- up(&this_usbduxsub->sem);
- return -ENODEV;
- }
+ down(&devpriv->sem);
/* unlink only if it is really running */
- res = usbdux_ao_stop(this_usbduxsub, this_usbduxsub->ao_cmd_running);
- up(&this_usbduxsub->sem);
- return res;
+ usbduxsigma_ao_stop(dev, devpriv->ao_cmd_running);
+ up(&devpriv->sem);
+
+ return 0;
}
-static void usbduxsub_ao_IsocIrq(struct urb *urb)
+static void usbduxsigma_ao_urb_complete(struct urb *urb)
{
- int i, ret;
+ struct comedi_device *dev = urb->context;
+ struct usbduxsigma_private *devpriv = dev->private;
+ struct comedi_subdevice *s = dev->write_subdev;
uint8_t *datap;
- struct usbduxsub *this_usbduxsub;
- struct comedi_device *this_comedidev;
- struct comedi_subdevice *s;
-
- /* the context variable points to the subdevice */
- this_comedidev = urb->context;
- /* the private structure of the subdevice is struct usbduxsub */
- this_usbduxsub = this_comedidev->private;
-
- s = &this_comedidev->subdevices[SUBDEV_DA];
+ int len;
+ int ret;
+ int i;
switch (urb->status) {
case 0:
@@ -563,347 +386,141 @@ static void usbduxsub_ao_IsocIrq(struct urb *urb)
case -ENOENT:
case -ESHUTDOWN:
case -ECONNABORTED:
- /* after an unlink command, unplug, ... etc */
- /* no unlink needed here. Already shutting down. */
- if (this_usbduxsub->ao_cmd_running) {
+ /* happens after an unlink command */
+ if (devpriv->ao_cmd_running) {
+ usbduxsigma_ao_stop(dev, 0); /* w/o unlink */
s->async->events |= COMEDI_CB_EOA;
- comedi_event(this_usbduxsub->comedidev, s);
- usbdux_ao_stop(this_usbduxsub, 0);
+ comedi_event(dev, s);
}
return;
default:
/* a real error */
- if (this_usbduxsub->ao_cmd_running) {
- dev_err(&urb->dev->dev,
- "comedi_: Non-zero urb status received in ao "
- "intr context: %d\n", urb->status);
- s->async->events |= COMEDI_CB_ERROR;
- s->async->events |= COMEDI_CB_EOA;
- comedi_event(this_usbduxsub->comedidev, s);
- /* we do an unlink if we are in the high speed mode */
- usbdux_ao_stop(this_usbduxsub, 0);
+ if (devpriv->ao_cmd_running) {
+ dev_err(dev->class_dev,
+ "%s: non-zero urb status (%d)\n",
+ __func__, urb->status);
+ usbduxsigma_ao_stop(dev, 0); /* w/o unlink */
+ s->async->events |= (COMEDI_CB_ERROR | COMEDI_CB_EOA);
+ comedi_event(dev, s);
}
return;
}
- /* are we actually running? */
- if (!(this_usbduxsub->ao_cmd_running))
+ if (!devpriv->ao_cmd_running)
return;
- /* normal operation: executing a command in this subdevice */
- this_usbduxsub->ao_counter--;
- if ((int)this_usbduxsub->ao_counter <= 0) {
- /* timer zero */
- this_usbduxsub->ao_counter = this_usbduxsub->ao_timer;
-
- /* handle non continuous acquisition */
- if (!(this_usbduxsub->ao_continuous)) {
- /* fixed number of samples */
- this_usbduxsub->ao_sample_count--;
- if (this_usbduxsub->ao_sample_count < 0) {
- /* all samples transmitted */
- usbdux_ao_stop(this_usbduxsub, 0);
+ devpriv->ao_counter--;
+ if ((int)devpriv->ao_counter <= 0) {
+ /* timer zero, transfer from comedi */
+ devpriv->ao_counter = devpriv->ao_timer;
+
+ if (!devpriv->ao_continuous) {
+ /* not continuous, fixed number of samples */
+ devpriv->ao_sample_count--;
+ if (devpriv->ao_sample_count < 0) {
+ usbduxsigma_ao_stop(dev, 0); /* w/o unlink */
+ /* acquistion is over, tell comedi */
s->async->events |= COMEDI_CB_EOA;
- comedi_event(this_usbduxsub->comedidev, s);
- /* no resubmit of the urb */
+ comedi_event(dev, s);
return;
}
}
+
/* transmit data to the USB bus */
- ((uint8_t *) (urb->transfer_buffer))[0] =
- s->async->cmd.chanlist_len;
- for (i = 0; i < s->async->cmd.chanlist_len; i++) {
- short temp;
- if (i >= NUMOUTCHANNELS)
- break;
-
- /* pointer to the DA */
- datap =
- (&(((uint8_t *) urb->transfer_buffer)[i * 2 + 1]));
- /* get the data from comedi */
- ret = comedi_buf_get(s->async, &temp);
- datap[0] = temp;
- datap[1] = this_usbduxsub->dac_commands[i];
- /* printk("data[0]=%x, data[1]=%x, data[2]=%x\n", */
- /* datap[0],datap[1],datap[2]); */
+ datap = urb->transfer_buffer;
+ len = s->async->cmd.chanlist_len;
+ *datap++ = len;
+ for (i = 0; i < len; i++) {
+ unsigned int chan = devpriv->dac_commands[i];
+ short val;
+
+ ret = comedi_buf_get(s->async, &val);
if (ret < 0) {
- dev_err(&urb->dev->dev,
- "comedi: buffer underflow\n");
- s->async->events |= COMEDI_CB_EOA;
- s->async->events |= COMEDI_CB_OVERFLOW;
+ dev_err(dev->class_dev, "buffer underflow\n");
+ s->async->events |= (COMEDI_CB_EOA |
+ COMEDI_CB_OVERFLOW);
}
- /* transmit data to comedi */
+ *datap++ = val;
+ *datap++ = chan;
+ devpriv->ao_readback[chan] = val;
+
s->async->events |= COMEDI_CB_BLOCK;
- comedi_event(this_usbduxsub->comedidev, s);
+ comedi_event(dev, s);
}
}
+
urb->transfer_buffer_length = SIZEOUTBUF;
- urb->dev = this_usbduxsub->usbdev;
+ urb->dev = comedi_to_usb_dev(dev);
urb->status = 0;
- if (this_usbduxsub->ao_cmd_running) {
- if (this_usbduxsub->high_speed) {
- /* uframes */
- urb->interval = 8;
- } else {
- /* frames */
- urb->interval = 1;
- }
- urb->number_of_packets = 1;
- urb->iso_frame_desc[0].offset = 0;
- urb->iso_frame_desc[0].length = SIZEOUTBUF;
- urb->iso_frame_desc[0].status = 0;
- ret = usb_submit_urb(urb, GFP_ATOMIC);
- if (ret < 0) {
- dev_err(&urb->dev->dev,
- "comedi_: ao urb resubm failed in int-cont. "
- "ret=%d", ret);
- if (ret == EL2NSYNC)
- dev_err(&urb->dev->dev,
- "buggy USB host controller or bug in "
- "IRQ handling!\n");
-
- s->async->events |= COMEDI_CB_EOA;
- s->async->events |= COMEDI_CB_ERROR;
- comedi_event(this_usbduxsub->comedidev, s);
- /* don't do an unlink here */
- usbdux_ao_stop(this_usbduxsub, 0);
- }
- }
-}
-
-static int usbduxsub_start(struct usbduxsub *usbduxsub)
-{
- int errcode = 0;
- uint8_t *local_transfer_buffer;
-
- local_transfer_buffer = kmalloc(16, GFP_KERNEL);
- if (!local_transfer_buffer)
- return -ENOMEM;
-
- /* 7f92 to zero */
- local_transfer_buffer[0] = 0;
- errcode = usb_control_msg(usbduxsub->usbdev,
- /* create a pipe for a control transfer */
- usb_sndctrlpipe(usbduxsub->usbdev, 0),
- /* bRequest, "Firmware" */
- USBDUXSUB_FIRMWARE,
- /* bmRequestType */
- VENDOR_DIR_OUT,
- /* Value */
- USBDUXSUB_CPUCS,
- /* Index */
- 0x0000,
- /* address of the transfer buffer */
- local_transfer_buffer,
- /* Length */
- 1,
- /* Timeout */
- BULK_TIMEOUT);
- if (errcode < 0)
- dev_err(&usbduxsub->interface->dev,
- "comedi_: control msg failed (start)\n");
-
- kfree(local_transfer_buffer);
- return errcode;
-}
-
-static int usbduxsub_stop(struct usbduxsub *usbduxsub)
-{
- int errcode = 0;
- uint8_t *local_transfer_buffer;
-
- local_transfer_buffer = kmalloc(16, GFP_KERNEL);
- if (!local_transfer_buffer)
- return -ENOMEM;
-
- /* 7f92 to one */
- local_transfer_buffer[0] = 1;
- errcode = usb_control_msg(usbduxsub->usbdev,
- usb_sndctrlpipe(usbduxsub->usbdev, 0),
- /* bRequest, "Firmware" */
- USBDUXSUB_FIRMWARE,
- /* bmRequestType */
- VENDOR_DIR_OUT,
- /* Value */
- USBDUXSUB_CPUCS,
- /* Index */
- 0x0000, local_transfer_buffer,
- /* Length */
- 1,
- /* Timeout */
- BULK_TIMEOUT);
- if (errcode < 0)
- dev_err(&usbduxsub->interface->dev,
- "comedi_: control msg failed (stop)\n");
-
- kfree(local_transfer_buffer);
- return errcode;
-}
-
-static int usbduxsub_upload(struct usbduxsub *usbduxsub,
- uint8_t *local_transfer_buffer,
- unsigned int startAddr, unsigned int len)
-{
- int errcode;
-
- errcode = usb_control_msg(usbduxsub->usbdev,
- usb_sndctrlpipe(usbduxsub->usbdev, 0),
- /* brequest, firmware */
- USBDUXSUB_FIRMWARE,
- /* bmRequestType */
- VENDOR_DIR_OUT,
- /* value */
- startAddr,
- /* index */
- 0x0000,
- /* our local safe buffer */
- local_transfer_buffer,
- /* length */
- len,
- /* timeout */
- BULK_TIMEOUT);
- dev_dbg(&usbduxsub->interface->dev, "comedi_: result=%d\n", errcode);
- if (errcode < 0) {
- dev_err(&usbduxsub->interface->dev,
- "comedi_: upload failed\n");
- return errcode;
- }
- return 0;
-}
-
-/* the FX2LP has twice as much as the standard FX2 */
-#define FIRMWARE_MAX_LEN 0x4000
-
-static int firmwareUpload(struct usbduxsub *usbduxsub,
- const u8 *firmwareBinary, int sizeFirmware)
-{
- int ret;
- uint8_t *fwBuf;
-
- if (!firmwareBinary)
- return 0;
-
- if (sizeFirmware > FIRMWARE_MAX_LEN) {
- dev_err(&usbduxsub->interface->dev,
- "usbduxsigma firmware binary it too large for FX2.\n");
- return -ENOMEM;
- }
-
- /* we generate a local buffer for the firmware */
- fwBuf = kmemdup(firmwareBinary, sizeFirmware, GFP_KERNEL);
- if (!fwBuf) {
- dev_err(&usbduxsub->interface->dev,
- "comedi_: mem alloc for firmware failed\n");
- return -ENOMEM;
- }
-
- ret = usbduxsub_stop(usbduxsub);
- if (ret < 0) {
- dev_err(&usbduxsub->interface->dev,
- "comedi_: can not stop firmware\n");
- kfree(fwBuf);
- return ret;
- }
-
- ret = usbduxsub_upload(usbduxsub, fwBuf, 0, sizeFirmware);
- if (ret < 0) {
- dev_err(&usbduxsub->interface->dev,
- "comedi_: firmware upload failed\n");
- kfree(fwBuf);
- return ret;
- }
- ret = usbduxsub_start(usbduxsub);
+ if (devpriv->high_speed)
+ urb->interval = 8; /* uframes */
+ else
+ urb->interval = 1; /* frames */
+ urb->number_of_packets = 1;
+ urb->iso_frame_desc[0].offset = 0;
+ urb->iso_frame_desc[0].length = SIZEOUTBUF;
+ urb->iso_frame_desc[0].status = 0;
+ ret = usb_submit_urb(urb, GFP_ATOMIC);
if (ret < 0) {
- dev_err(&usbduxsub->interface->dev,
- "comedi_: can not start firmware\n");
- kfree(fwBuf);
- return ret;
+ dev_err(dev->class_dev,
+ "%s: urb resubmit failed (%d)\n",
+ __func__, ret);
+ if (ret == EL2NSYNC)
+ dev_err(dev->class_dev,
+ "buggy USB host controller or bug in IRQ handler\n");
+ usbduxsigma_ao_stop(dev, 0); /* w/o unlink */
+ s->async->events |= (COMEDI_CB_EOA | COMEDI_CB_ERROR);
+ comedi_event(dev, s);
}
- kfree(fwBuf);
- return 0;
}
-static int usbduxsub_submit_InURBs(struct usbduxsub *usbduxsub)
+static int usbduxsigma_submit_urbs(struct comedi_device *dev,
+ struct urb **urbs, int num_urbs,
+ int input_urb)
{
- int i, errFlag;
-
- if (!usbduxsub)
- return -EFAULT;
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct usbduxsigma_private *devpriv = dev->private;
+ struct urb *urb;
+ int ret;
+ int i;
/* Submit all URBs and start the transfer on the bus */
- for (i = 0; i < usbduxsub->numOfInBuffers; i++) {
- /* in case of a resubmission after an unlink... */
- usbduxsub->urbIn[i]->interval = usbduxsub->ai_interval;
- usbduxsub->urbIn[i]->context = usbduxsub->comedidev;
- usbduxsub->urbIn[i]->dev = usbduxsub->usbdev;
- usbduxsub->urbIn[i]->status = 0;
- usbduxsub->urbIn[i]->transfer_flags = URB_ISO_ASAP;
- dev_dbg(&usbduxsub->interface->dev,
- "comedi%d: submitting in-urb[%d]: %p,%p intv=%d\n",
- usbduxsub->comedidev->minor, i,
- (usbduxsub->urbIn[i]->context),
- (usbduxsub->urbIn[i]->dev),
- (usbduxsub->urbIn[i]->interval));
- errFlag = usb_submit_urb(usbduxsub->urbIn[i], GFP_ATOMIC);
- if (errFlag) {
- dev_err(&usbduxsub->interface->dev,
- "comedi_: ai: usb_submit_urb(%d) error %d\n",
- i, errFlag);
- return errFlag;
- }
- }
- return 0;
-}
-
-static int usbduxsub_submit_OutURBs(struct usbduxsub *usbduxsub)
-{
- int i, errFlag;
-
- if (!usbduxsub)
- return -EFAULT;
+ for (i = 0; i < num_urbs; i++) {
+ urb = urbs[i];
- for (i = 0; i < usbduxsub->numOfOutBuffers; i++) {
- dev_dbg(&usbduxsub->interface->dev,
- "comedi_: submitting out-urb[%d]\n", i);
/* in case of a resubmission after an unlink... */
- usbduxsub->urbOut[i]->context = usbduxsub->comedidev;
- usbduxsub->urbOut[i]->dev = usbduxsub->usbdev;
- usbduxsub->urbOut[i]->status = 0;
- usbduxsub->urbOut[i]->transfer_flags = URB_ISO_ASAP;
- errFlag = usb_submit_urb(usbduxsub->urbOut[i], GFP_ATOMIC);
- if (errFlag) {
- dev_err(&usbduxsub->interface->dev,
- "comedi_: ao: usb_submit_urb(%d) error %d\n",
- i, errFlag);
- return errFlag;
- }
+ if (input_urb)
+ urb->interval = devpriv->ai_interval;
+ urb->context = dev;
+ urb->dev = usb;
+ urb->status = 0;
+ urb->transfer_flags = URB_ISO_ASAP;
+
+ ret = usb_submit_urb(urb, GFP_ATOMIC);
+ if (ret)
+ return ret;
}
return 0;
}
-static int chanToInterval(int nChannels)
+static int usbduxsigma_chans_to_interval(int num_chan)
{
- if (nChannels <= 2)
- /* 4kHz */
- return 2;
- if (nChannels <= 8)
- /* 2kHz */
- return 4;
- /* 1kHz */
- return 8;
+ if (num_chan <= 2)
+ return 2; /* 4kHz */
+ if (num_chan <= 8)
+ return 4; /* 2kHz */
+ return 8; /* 1kHz */
}
-static int usbdux_ai_cmdtest(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_cmd *cmd)
+static int usbduxsigma_ai_cmdtest(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_cmd *cmd)
{
- struct usbduxsub *this_usbduxsub = dev->private;
- int err = 0, i;
- unsigned int tmpTimer;
-
- if (!(this_usbduxsub->probed))
- return -ENODEV;
+ struct usbduxsigma_private *devpriv = dev->private;
+ int high_speed = devpriv->high_speed;
+ int interval = usbduxsigma_chans_to_interval(cmd->chanlist_len);
+ int err = 0;
/* Step 1 : check if triggers are trivially valid */
@@ -934,34 +551,28 @@ static int usbdux_ai_cmdtest(struct comedi_device *dev,
err |= cfc_check_trigger_arg_is(&cmd->scan_begin_arg, 0);
if (cmd->scan_begin_src == TRIG_TIMER) {
- if (this_usbduxsub->high_speed) {
+ unsigned int tmp;
+
+ if (high_speed) {
/*
* In high speed mode microframes are possible.
* However, during one microframe we can roughly
* sample two channels. Thus, the more channels
* are in the channel list the more time we need.
*/
- i = chanToInterval(cmd->chanlist_len);
err |= cfc_check_trigger_arg_min(&cmd->scan_begin_arg,
- (1000000 / 8 * i));
- /* now calc the real sampling rate with all the
- * rounding errors */
- tmpTimer =
- ((unsigned int)(cmd->scan_begin_arg / 125000)) *
- 125000;
+ (1000000 / 8 * interval));
+
+ tmp = (cmd->scan_begin_arg / 125000) * 125000;
} else {
/* full speed */
/* 1kHz scans every USB frame */
err |= cfc_check_trigger_arg_min(&cmd->scan_begin_arg,
1000000);
- /*
- * calc the real sampling rate with the rounding errors
- */
- tmpTimer = ((unsigned int)(cmd->scan_begin_arg /
- 1000000)) * 1000000;
+
+ tmp = (cmd->scan_begin_arg / 1000000) * 1000000;
}
- err |= cfc_check_trigger_arg_is(&cmd->scan_begin_arg,
- tmpTimer);
+ err |= cfc_check_trigger_arg_is(&cmd->scan_begin_arg, tmp);
}
err |= cfc_check_trigger_arg_is(&cmd->scan_end_arg, cmd->chanlist_len);
@@ -976,6 +587,37 @@ static int usbdux_ai_cmdtest(struct comedi_device *dev,
if (err)
return 3;
+ /* Step 4: fix up any arguments */
+
+ if (high_speed) {
+ /*
+ * every 2 channels get a time window of 125us. Thus, if we
+ * sample all 16 channels we need 1ms. If we sample only one
+ * channel we need only 125us
+ */
+ devpriv->ai_interval = interval;
+ devpriv->ai_timer = cmd->scan_begin_arg / (125000 * interval);
+ } else {
+ /* interval always 1ms */
+ devpriv->ai_interval = 1;
+ devpriv->ai_timer = cmd->scan_begin_arg / 1000000;
+ }
+ if (devpriv->ai_timer < 1)
+ err |= -EINVAL;
+
+ if (cmd->stop_src == TRIG_COUNT) {
+ /* data arrives as one packet */
+ devpriv->ai_sample_count = cmd->stop_arg;
+ devpriv->ai_continuous = 0;
+ } else {
+ /* continuous acquisition */
+ devpriv->ai_continuous = 1;
+ devpriv->ai_sample_count = 0;
+ }
+
+ if (err)
+ return 4;
+
return 0;
}
@@ -993,536 +635,278 @@ static void create_adc_command(unsigned int chan,
(*muxsg1) = (*muxsg1) | (1 << (chan-8));
}
+static int usbbuxsigma_send_cmd(struct comedi_device *dev, int cmd_type)
+{
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct usbduxsigma_private *devpriv = dev->private;
+ int nsent;
-/* bulk transfers to usbdux */
-
-#define SENDADCOMMANDS 0
-#define SENDDACOMMANDS 1
-#define SENDDIOCONFIGCOMMAND 2
-#define SENDDIOBITSCOMMAND 3
-#define SENDSINGLEAD 4
-#define SENDPWMON 7
-#define SENDPWMOFF 8
+ devpriv->dux_commands[0] = cmd_type;
-static int send_dux_commands(struct usbduxsub *this_usbduxsub, int cmd_type)
-{
- int result, nsent;
-
- this_usbduxsub->dux_commands[0] = cmd_type;
-#ifdef NOISY_DUX_DEBUGBUG
- printk(KERN_DEBUG "comedi%d: usbdux: dux_commands: ",
- this_usbduxsub->comedidev->minor);
- for (result = 0; result < SIZEOFDUXBUFFER; result++)
- printk(" %02x", this_usbduxsub->dux_commands[result]);
- printk("\n");
-#endif
- result = usb_bulk_msg(this_usbduxsub->usbdev,
- usb_sndbulkpipe(this_usbduxsub->usbdev,
- COMMAND_OUT_EP),
- this_usbduxsub->dux_commands, SIZEOFDUXBUFFER,
- &nsent, BULK_TIMEOUT);
- if (result < 0)
- dev_err(&this_usbduxsub->interface->dev, "comedi%d: "
- "could not transmit dux_command to the usb-device, "
- "err=%d\n", this_usbduxsub->comedidev->minor, result);
-
- return result;
+ return usb_bulk_msg(usb, usb_sndbulkpipe(usb, USBDUXSIGMA_CMD_OUT_EP),
+ devpriv->dux_commands, SIZEOFDUXBUFFER,
+ &nsent, BULK_TIMEOUT);
}
-static int receive_dux_commands(struct usbduxsub *this_usbduxsub, int command)
+static int usbduxsigma_receive_cmd(struct comedi_device *dev, int command)
{
- int result = (-EFAULT);
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct usbduxsigma_private *devpriv = dev->private;
int nrec;
+ int ret;
int i;
for (i = 0; i < RETRIES; i++) {
- result = usb_bulk_msg(this_usbduxsub->usbdev,
- usb_rcvbulkpipe(this_usbduxsub->usbdev,
- COMMAND_IN_EP),
- this_usbduxsub->insnBuffer, SIZEINSNBUF,
- &nrec, BULK_TIMEOUT);
- if (result < 0) {
- dev_err(&this_usbduxsub->interface->dev, "comedi%d: "
- "insn: USB error %d "
- "while receiving DUX command"
- "\n", this_usbduxsub->comedidev->minor,
- result);
- return result;
- }
- if (this_usbduxsub->insnBuffer[0] == command)
- return result;
+ ret = usb_bulk_msg(usb,
+ usb_rcvbulkpipe(usb, USBDUXSIGMA_CMD_IN_EP),
+ devpriv->insn_buf, SIZEINSNBUF,
+ &nrec, BULK_TIMEOUT);
+ if (ret < 0)
+ return ret;
+
+ if (devpriv->insn_buf[0] == command)
+ return 0;
}
- /* this is only reached if the data has been requested a couple of
- * times */
- dev_err(&this_usbduxsub->interface->dev, "comedi%d: insn: "
- "wrong data returned from firmware: want %d, got %d.\n",
- this_usbduxsub->comedidev->minor, command,
- this_usbduxsub->insnBuffer[0]);
+ /*
+ * This is only reached if the data has been requested a
+ * couple of times and the command was not received.
+ */
return -EFAULT;
}
-static int usbdux_ai_inttrig(struct comedi_device *dev,
- struct comedi_subdevice *s, unsigned int trignum)
+static int usbduxsigma_ai_inttrig(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ unsigned int trignum)
{
+ struct usbduxsigma_private *devpriv = dev->private;
int ret;
- struct usbduxsub *this_usbduxsub = dev->private;
- if (!this_usbduxsub)
- return -EFAULT;
- down(&this_usbduxsub->sem);
- if (!(this_usbduxsub->probed)) {
- up(&this_usbduxsub->sem);
- return -ENODEV;
- }
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi%d: usbdux_ai_inttrig\n", dev->minor);
-
- if (trignum != 0) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: usbdux_ai_inttrig: invalid trignum\n",
- dev->minor);
- up(&this_usbduxsub->sem);
+ if (trignum != 0)
return -EINVAL;
- }
- if (!(this_usbduxsub->ai_cmd_running)) {
- this_usbduxsub->ai_cmd_running = 1;
- ret = usbduxsub_submit_InURBs(this_usbduxsub);
+
+ down(&devpriv->sem);
+ if (!devpriv->ai_cmd_running) {
+ ret = usbduxsigma_submit_urbs(dev, devpriv->ai_urbs,
+ devpriv->n_ai_urbs, 1);
if (ret < 0) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: usbdux_ai_inttrig: "
- "urbSubmit: err=%d\n", dev->minor, ret);
- this_usbduxsub->ai_cmd_running = 0;
- up(&this_usbduxsub->sem);
+ up(&devpriv->sem);
return ret;
}
+ devpriv->ai_cmd_running = 1;
s->async->inttrig = NULL;
- } else {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: ai_inttrig but acqu is already running\n",
- dev->minor);
}
- up(&this_usbduxsub->sem);
+ up(&devpriv->sem);
+
return 1;
}
-static int usbdux_ai_cmd(struct comedi_device *dev, struct comedi_subdevice *s)
+static int usbduxsigma_ai_cmd(struct comedi_device *dev,
+ struct comedi_subdevice *s)
{
+ struct usbduxsigma_private *devpriv = dev->private;
struct comedi_cmd *cmd = &s->async->cmd;
- unsigned int chan;
- int i, ret;
- struct usbduxsub *this_usbduxsub = dev->private;
- int result;
+ unsigned int len = cmd->chanlist_len;
uint8_t muxsg0 = 0;
uint8_t muxsg1 = 0;
uint8_t sysred = 0;
+ int ret;
+ int i;
- if (!this_usbduxsub)
- return -EFAULT;
-
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi%d: usbdux_ai_cmd\n", dev->minor);
-
- /* block other CPUs from starting an ai_cmd */
- down(&this_usbduxsub->sem);
+ down(&devpriv->sem);
- if (!(this_usbduxsub->probed)) {
- up(&this_usbduxsub->sem);
- return -ENODEV;
- }
- if (this_usbduxsub->ai_cmd_running) {
- dev_err(&this_usbduxsub->interface->dev, "comedi%d: "
- "ai_cmd not possible. Another ai_cmd is running.\n",
- dev->minor);
- up(&this_usbduxsub->sem);
- return -EBUSY;
- }
/* set current channel of the running acquisition to zero */
s->async->cur_chan = 0;
+ for (i = 0; i < len; i++) {
+ unsigned int chan = CR_CHAN(cmd->chanlist[i]);
- /* first the number of channels per time step */
- this_usbduxsub->dux_commands[1] = cmd->chanlist_len;
-
- /* CONFIG0 */
- this_usbduxsub->dux_commands[2] = 0x12;
-
- /* CONFIG1: 23kHz sampling rate, delay = 0us, */
- this_usbduxsub->dux_commands[3] = 0x03;
-
- /* CONFIG3: differential channels off */
- this_usbduxsub->dux_commands[4] = 0x00;
-
- for (i = 0; i < cmd->chanlist_len; i++) {
- chan = CR_CHAN(cmd->chanlist[i]);
create_adc_command(chan, &muxsg0, &muxsg1);
- if (i >= NUMCHANNELS) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: channel list too long\n",
- dev->minor);
- break;
- }
- }
- this_usbduxsub->dux_commands[5] = muxsg0;
- this_usbduxsub->dux_commands[6] = muxsg1;
- this_usbduxsub->dux_commands[7] = sysred;
-
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi %d: sending commands to the usb device: size=%u\n",
- dev->minor, NUMCHANNELS);
-
- result = send_dux_commands(this_usbduxsub, SENDADCOMMANDS);
- if (result < 0) {
- up(&this_usbduxsub->sem);
- return result;
}
- if (this_usbduxsub->high_speed) {
- /*
- * every 2 channels get a time window of 125us. Thus, if we
- * sample all 16 channels we need 1ms. If we sample only one
- * channel we need only 125us
- */
- this_usbduxsub->ai_interval =
- chanToInterval(cmd->chanlist_len);
- this_usbduxsub->ai_timer = cmd->scan_begin_arg / (125000 *
- (this_usbduxsub->
- ai_interval));
- } else {
- /* interval always 1ms */
- this_usbduxsub->ai_interval = 1;
- this_usbduxsub->ai_timer = cmd->scan_begin_arg / 1000000;
- }
- if (this_usbduxsub->ai_timer < 1) {
- dev_err(&this_usbduxsub->interface->dev, "comedi%d: ai_cmd: "
- "timer=%d, scan_begin_arg=%d. "
- "Not properly tested by cmdtest?\n", dev->minor,
- this_usbduxsub->ai_timer, cmd->scan_begin_arg);
- up(&this_usbduxsub->sem);
- return -EINVAL;
- }
- this_usbduxsub->ai_counter = this_usbduxsub->ai_timer;
+ devpriv->dux_commands[1] = len; /* num channels per time step */
+ devpriv->dux_commands[2] = 0x12; /* CONFIG0 */
+ devpriv->dux_commands[3] = 0x03; /* CONFIG1: 23kHz sample, delay 0us */
+ devpriv->dux_commands[4] = 0x00; /* CONFIG3: diff. channels off */
+ devpriv->dux_commands[5] = muxsg0;
+ devpriv->dux_commands[6] = muxsg1;
+ devpriv->dux_commands[7] = sysred;
- if (cmd->stop_src == TRIG_COUNT) {
- /* data arrives as one packet */
- this_usbduxsub->ai_sample_count = cmd->stop_arg;
- this_usbduxsub->ai_continuous = 0;
- } else {
- /* continuous acquisition */
- this_usbduxsub->ai_continuous = 1;
- this_usbduxsub->ai_sample_count = 0;
+ ret = usbbuxsigma_send_cmd(dev, USBBUXSIGMA_AD_CMD);
+ if (ret < 0) {
+ up(&devpriv->sem);
+ return ret;
}
+ devpriv->ai_counter = devpriv->ai_timer;
+
if (cmd->start_src == TRIG_NOW) {
/* enable this acquisition operation */
- this_usbduxsub->ai_cmd_running = 1;
- ret = usbduxsub_submit_InURBs(this_usbduxsub);
+ ret = usbduxsigma_submit_urbs(dev, devpriv->ai_urbs,
+ devpriv->n_ai_urbs, 1);
if (ret < 0) {
- this_usbduxsub->ai_cmd_running = 0;
- /* fixme: unlink here?? */
- up(&this_usbduxsub->sem);
+ up(&devpriv->sem);
return ret;
}
s->async->inttrig = NULL;
- } else {
- /* TRIG_INT */
- /* don't enable the acquision operation */
- /* wait for an internal signal */
- s->async->inttrig = usbdux_ai_inttrig;
+ devpriv->ai_cmd_running = 1;
+ } else { /* TRIG_INT */
+ /* wait for an internal signal and submit the urbs later */
+ s->async->inttrig = usbduxsigma_ai_inttrig;
}
- up(&this_usbduxsub->sem);
+
+ up(&devpriv->sem);
+
return 0;
}
-/* Mode 0 is used to get a single conversion on demand */
-static int usbdux_ai_insn_read(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
+static int usbduxsigma_ai_insn_read(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
{
- int i;
- int32_t one = 0;
- int chan;
- int err;
- struct usbduxsub *this_usbduxsub = dev->private;
+ struct usbduxsigma_private *devpriv = dev->private;
+ unsigned int chan = CR_CHAN(insn->chanspec);
uint8_t muxsg0 = 0;
uint8_t muxsg1 = 0;
uint8_t sysred = 0;
+ int ret;
+ int i;
- if (!this_usbduxsub)
- return 0;
-
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi%d: ai_insn_read, insn->n=%d, insn->subdev=%d\n",
- dev->minor, insn->n, insn->subdev);
-
- down(&this_usbduxsub->sem);
- if (!(this_usbduxsub->probed)) {
- up(&this_usbduxsub->sem);
- return -ENODEV;
- }
- if (this_usbduxsub->ai_cmd_running) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: ai_insn_read not possible. "
- "Async Command is running.\n", dev->minor);
- up(&this_usbduxsub->sem);
- return 0;
+ down(&devpriv->sem);
+ if (devpriv->ai_cmd_running) {
+ up(&devpriv->sem);
+ return -EBUSY;
}
- /* sample one channel */
- /* CONFIG0: chopper on */
- this_usbduxsub->dux_commands[1] = 0x16;
-
- /* CONFIG1: 2kHz sampling rate */
- this_usbduxsub->dux_commands[2] = 0x80;
-
- /* CONFIG3: differential channels off */
- this_usbduxsub->dux_commands[3] = 0x00;
-
- chan = CR_CHAN(insn->chanspec);
create_adc_command(chan, &muxsg0, &muxsg1);
- this_usbduxsub->dux_commands[4] = muxsg0;
- this_usbduxsub->dux_commands[5] = muxsg1;
- this_usbduxsub->dux_commands[6] = sysred;
+ /* Mode 0 is used to get a single conversion on demand */
+ devpriv->dux_commands[1] = 0x16; /* CONFIG0: chopper on */
+ devpriv->dux_commands[2] = 0x80; /* CONFIG1: 2kHz sampling rate */
+ devpriv->dux_commands[3] = 0x00; /* CONFIG3: diff. channels off */
+ devpriv->dux_commands[4] = muxsg0;
+ devpriv->dux_commands[5] = muxsg1;
+ devpriv->dux_commands[6] = sysred;
/* adc commands */
- err = send_dux_commands(this_usbduxsub, SENDSINGLEAD);
- if (err < 0) {
- up(&this_usbduxsub->sem);
- return err;
+ ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_SINGLE_AD_CMD);
+ if (ret < 0) {
+ up(&devpriv->sem);
+ return ret;
}
for (i = 0; i < insn->n; i++) {
- err = receive_dux_commands(this_usbduxsub, SENDSINGLEAD);
- if (err < 0) {
- up(&this_usbduxsub->sem);
- return 0;
- }
- /* 32 bits big endian from the A/D converter */
- one = be32_to_cpu(*((int32_t *)
- ((this_usbduxsub->insnBuffer)+1)));
- /* mask out the status byte */
- one = one & 0x00ffffff;
- /* turn it into an unsigned integer */
- one = one ^ 0x00800000;
- data[i] = one;
- }
- up(&this_usbduxsub->sem);
- return i;
-}
-
-
-
+ int32_t val;
-static int usbdux_getstatusinfo(struct comedi_device *dev, int chan)
-{
- struct usbduxsub *this_usbduxsub = dev->private;
- uint8_t sysred = 0;
- uint32_t one;
- int err;
-
- if (!this_usbduxsub)
- return 0;
+ ret = usbduxsigma_receive_cmd(dev, USBDUXSIGMA_SINGLE_AD_CMD);
+ if (ret < 0) {
+ up(&devpriv->sem);
+ return ret;
+ }
- if (this_usbduxsub->ai_cmd_running) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: status read not possible. "
- "Async Command is running.\n", dev->minor);
- return 0;
- }
+ /* 32 bits big endian from the A/D converter */
+ val = be32_to_cpu(*((int32_t *)((devpriv->insn_buf) + 1)));
+ val &= 0x00ffffff; /* strip status byte */
+ val ^= 0x00800000; /* convert to unsigned */
- /* CONFIG0 */
- this_usbduxsub->dux_commands[1] = 0x12;
-
- /* CONFIG1: 2kHz sampling rate */
- this_usbduxsub->dux_commands[2] = 0x80;
-
- /* CONFIG3: differential channels off */
- this_usbduxsub->dux_commands[3] = 0x00;
-
- if (chan == 1) {
- /* ADC offset */
- sysred = sysred | 1;
- } else if (chan == 2) {
- /* VCC */
- sysred = sysred | 4;
- } else if (chan == 3) {
- /* temperature */
- sysred = sysred | 8;
- } else if (chan == 4) {
- /* gain */
- sysred = sysred | 16;
- } else if (chan == 5) {
- /* ref */
- sysred = sysred | 32;
+ data[i] = val;
}
+ up(&devpriv->sem);
- this_usbduxsub->dux_commands[4] = 0;
- this_usbduxsub->dux_commands[5] = 0;
- this_usbduxsub->dux_commands[6] = sysred;
-
- /* adc commands */
- err = send_dux_commands(this_usbduxsub, SENDSINGLEAD);
- if (err < 0)
- return err;
-
- err = receive_dux_commands(this_usbduxsub, SENDSINGLEAD);
- if (err < 0)
- return err;
-
- /* 32 bits big endian from the A/D converter */
- one = be32_to_cpu(*((int32_t *)((this_usbduxsub->insnBuffer)+1)));
- /* mask out the status byte */
- one = one & 0x00ffffff;
- one = one ^ 0x00800000;
-
- return (int)one;
+ return insn->n;
}
-
-
-
-
-
-/************************************/
-/* analog out */
-
-static int usbdux_ao_insn_read(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
+static int usbduxsigma_ao_insn_read(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
{
+ struct usbduxsigma_private *devpriv = dev->private;
+ unsigned int chan = CR_CHAN(insn->chanspec);
int i;
- int chan = CR_CHAN(insn->chanspec);
- struct usbduxsub *this_usbduxsub = dev->private;
-
- if (!this_usbduxsub)
- return -EFAULT;
- down(&this_usbduxsub->sem);
- if (!(this_usbduxsub->probed)) {
- up(&this_usbduxsub->sem);
- return -ENODEV;
- }
+ down(&devpriv->sem);
for (i = 0; i < insn->n; i++)
- data[i] = this_usbduxsub->outBuffer[chan];
+ data[i] = devpriv->ao_readback[chan];
+ up(&devpriv->sem);
- up(&this_usbduxsub->sem);
- return i;
+ return insn->n;
}
-static int usbdux_ao_insn_write(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
+static int usbduxsigma_ao_insn_write(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
{
- int i, err;
- int chan = CR_CHAN(insn->chanspec);
- struct usbduxsub *this_usbduxsub = dev->private;
-
- if (!this_usbduxsub)
- return -EFAULT;
-
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi%d: ao_insn_write\n", dev->minor);
+ struct usbduxsigma_private *devpriv = dev->private;
+ unsigned int chan = CR_CHAN(insn->chanspec);
+ int ret;
+ int i;
- down(&this_usbduxsub->sem);
- if (!(this_usbduxsub->probed)) {
- up(&this_usbduxsub->sem);
- return -ENODEV;
- }
- if (this_usbduxsub->ao_cmd_running) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: ao_insn_write: "
- "ERROR: asynchronous ao_cmd is running\n", dev->minor);
- up(&this_usbduxsub->sem);
- return 0;
+ down(&devpriv->sem);
+ if (devpriv->ao_cmd_running) {
+ up(&devpriv->sem);
+ return -EBUSY;
}
for (i = 0; i < insn->n; i++) {
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi%d: ao_insn_write: data[chan=%d,i=%d]=%d\n",
- dev->minor, chan, i, data[i]);
-
- /* number of channels: 1 */
- this_usbduxsub->dux_commands[1] = 1;
- /* channel number */
- this_usbduxsub->dux_commands[2] = data[i];
- this_usbduxsub->outBuffer[chan] = data[i];
- this_usbduxsub->dux_commands[3] = chan;
- err = send_dux_commands(this_usbduxsub, SENDDACOMMANDS);
- if (err < 0) {
- up(&this_usbduxsub->sem);
- return err;
+ devpriv->dux_commands[1] = 1; /* num channels */
+ devpriv->dux_commands[2] = data[i]; /* value */
+ devpriv->dux_commands[3] = chan; /* channel number */
+ ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_DA_CMD);
+ if (ret < 0) {
+ up(&devpriv->sem);
+ return ret;
}
+ devpriv->ao_readback[chan] = data[i];
}
- up(&this_usbduxsub->sem);
+ up(&devpriv->sem);
- return i;
+ return insn->n;
}
-static int usbdux_ao_inttrig(struct comedi_device *dev,
- struct comedi_subdevice *s, unsigned int trignum)
+static int usbduxsigma_ao_inttrig(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ unsigned int trignum)
{
+ struct usbduxsigma_private *devpriv = dev->private;
int ret;
- struct usbduxsub *this_usbduxsub = dev->private;
- if (!this_usbduxsub)
- return -EFAULT;
-
- down(&this_usbduxsub->sem);
+ if (trignum != 0)
+ return -EINVAL;
- if (!(this_usbduxsub->probed)) {
- ret = -ENODEV;
- goto out;
- }
- if (trignum != 0) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: usbdux_ao_inttrig: invalid trignum\n",
- dev->minor);
- ret = -EINVAL;
- goto out;
- }
- if (!(this_usbduxsub->ao_cmd_running)) {
- this_usbduxsub->ao_cmd_running = 1;
- ret = usbduxsub_submit_OutURBs(this_usbduxsub);
+ down(&devpriv->sem);
+ if (!devpriv->ao_cmd_running) {
+ ret = usbduxsigma_submit_urbs(dev, devpriv->ao_urbs,
+ devpriv->n_ao_urbs, 0);
if (ret < 0) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: usbdux_ao_inttrig: submitURB: "
- "err=%d\n", dev->minor, ret);
- this_usbduxsub->ao_cmd_running = 0;
- goto out;
+ up(&devpriv->sem);
+ return ret;
}
+ devpriv->ao_cmd_running = 1;
s->async->inttrig = NULL;
- } else {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: ao_inttrig but acqu is already running.\n",
- dev->minor);
}
- ret = 1;
-out:
- up(&this_usbduxsub->sem);
- return ret;
+ up(&devpriv->sem);
+
+ return 1;
}
-static int usbdux_ao_cmdtest(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_cmd *cmd)
+static int usbduxsigma_ao_cmdtest(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_cmd *cmd)
{
- struct usbduxsub *this_usbduxsub = dev->private;
+ struct usbduxsigma_private *devpriv = dev->private;
int err = 0;
+ int high_speed;
unsigned int flags;
- if (!this_usbduxsub)
- return -EFAULT;
-
- if (!(this_usbduxsub->probed))
- return -ENODEV;
-
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi%d: usbdux_ao_cmdtest\n", dev->minor);
+ /* high speed conversions are not used yet */
+ high_speed = 0; /* (devpriv->high_speed) */
/* Step 1 : check if triggers are trivially valid */
err |= cfc_check_trigger_src(&cmd->start_src, TRIG_NOW | TRIG_INT);
- if (0) { /* (this_usbduxsub->high_speed) */
+ if (high_speed) {
/*
* start immediately a new scan
* the sampling rate is set by the coversion rate
@@ -1538,8 +922,10 @@ static int usbdux_ao_cmdtest(struct comedi_device *dev,
err |= cfc_check_trigger_src(&cmd->scan_end_src, TRIG_COUNT);
err |= cfc_check_trigger_src(&cmd->stop_src, TRIG_COUNT | TRIG_NONE);
- if (err)
+ if (err) {
+ up(&devpriv->sem);
return 1;
+ }
/* Step 2a : make sure trigger sources are unique */
@@ -1578,272 +964,186 @@ static int usbdux_ao_cmdtest(struct comedi_device *dev,
if (err)
return 3;
- return 0;
-}
-
-static int usbdux_ao_cmd(struct comedi_device *dev, struct comedi_subdevice *s)
-{
- struct comedi_cmd *cmd = &s->async->cmd;
- unsigned int chan, gain;
- int i, ret;
- struct usbduxsub *this_usbduxsub = dev->private;
-
- if (!this_usbduxsub)
- return -EFAULT;
+ /* Step 4: fix up any arguments */
- down(&this_usbduxsub->sem);
- if (!(this_usbduxsub->probed)) {
- up(&this_usbduxsub->sem);
- return -ENODEV;
- }
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi%d: %s\n", dev->minor, __func__);
-
- /* set current channel of the running acquisition to zero */
- s->async->cur_chan = 0;
- for (i = 0; i < cmd->chanlist_len; ++i) {
- chan = CR_CHAN(cmd->chanlist[i]);
- gain = CR_RANGE(cmd->chanlist[i]);
- if (i >= NUMOUTCHANNELS) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: %s: channel list too long\n",
- dev->minor, __func__);
- break;
- }
- this_usbduxsub->dac_commands[i] = chan;
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi%d: dac command for ch %d is %x\n",
- dev->minor, i, this_usbduxsub->dac_commands[i]);
- }
-
- /* we count in steps of 1ms (125us) */
- /* 125us mode not used yet */
- if (0) { /* (this_usbduxsub->high_speed) */
- /* 125us */
+ /* we count in timer steps */
+ if (high_speed) {
/* timing of the conversion itself: every 125 us */
- this_usbduxsub->ao_timer = cmd->convert_arg / 125000;
+ devpriv->ao_timer = cmd->convert_arg / 125000;
} else {
- /* 1ms */
- /* timing of the scan: we get all channels at once */
- this_usbduxsub->ao_timer = cmd->scan_begin_arg / 1000000;
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi%d: scan_begin_src=%d, scan_begin_arg=%d, "
- "convert_src=%d, convert_arg=%d\n", dev->minor,
- cmd->scan_begin_src, cmd->scan_begin_arg,
- cmd->convert_src, cmd->convert_arg);
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi%d: ao_timer=%d (ms)\n",
- dev->minor, this_usbduxsub->ao_timer);
- if (this_usbduxsub->ao_timer < 1) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: usbdux: ao_timer=%d, "
- "scan_begin_arg=%d. "
- "Not properly tested by cmdtest?\n",
- dev->minor, this_usbduxsub->ao_timer,
- cmd->scan_begin_arg);
- up(&this_usbduxsub->sem);
- return -EINVAL;
- }
+ /*
+ * timing of the scan: every 1ms
+ * we get all channels at once
+ */
+ devpriv->ao_timer = cmd->scan_begin_arg / 1000000;
}
- this_usbduxsub->ao_counter = this_usbduxsub->ao_timer;
+ if (devpriv->ao_timer < 1)
+ err |= -EINVAL;
if (cmd->stop_src == TRIG_COUNT) {
- /* not continuous */
- /* counter */
- /* high speed also scans everything at once */
- if (0) { /* (this_usbduxsub->high_speed) */
- this_usbduxsub->ao_sample_count =
- (cmd->stop_arg) * (cmd->scan_end_arg);
+ /* not continuous, use counter */
+ if (high_speed) {
+ /* high speed also scans everything at once */
+ devpriv->ao_sample_count = cmd->stop_arg *
+ cmd->scan_end_arg;
} else {
- /* there's no scan as the scan has been */
- /* perf inside the FX2 */
- /* data arrives as one packet */
- this_usbduxsub->ao_sample_count = cmd->stop_arg;
+ /*
+ * There's no scan as the scan has been
+ * handled inside the FX2. Data arrives as
+ * one packet.
+ */
+ devpriv->ao_sample_count = cmd->stop_arg;
}
- this_usbduxsub->ao_continuous = 0;
+ devpriv->ao_continuous = 0;
} else {
/* continuous acquisition */
- this_usbduxsub->ao_continuous = 1;
- this_usbduxsub->ao_sample_count = 0;
+ devpriv->ao_continuous = 1;
+ devpriv->ao_sample_count = 0;
}
+ if (err)
+ return 4;
+
+ return 0;
+}
+
+static int usbduxsigma_ao_cmd(struct comedi_device *dev,
+ struct comedi_subdevice *s)
+{
+ struct usbduxsigma_private *devpriv = dev->private;
+ struct comedi_cmd *cmd = &s->async->cmd;
+ int ret;
+ int i;
+
+ down(&devpriv->sem);
+
+ /* set current channel of the running acquisition to zero */
+ s->async->cur_chan = 0;
+ for (i = 0; i < cmd->chanlist_len; ++i)
+ devpriv->dac_commands[i] = CR_CHAN(cmd->chanlist[i]);
+
+ devpriv->ao_counter = devpriv->ao_timer;
+
if (cmd->start_src == TRIG_NOW) {
/* enable this acquisition operation */
- this_usbduxsub->ao_cmd_running = 1;
- ret = usbduxsub_submit_OutURBs(this_usbduxsub);
+ ret = usbduxsigma_submit_urbs(dev, devpriv->ao_urbs,
+ devpriv->n_ao_urbs, 0);
if (ret < 0) {
- this_usbduxsub->ao_cmd_running = 0;
- /* fixme: unlink here?? */
- up(&this_usbduxsub->sem);
+ up(&devpriv->sem);
return ret;
}
s->async->inttrig = NULL;
- } else {
- /* TRIG_INT */
- /* submit the urbs later */
- /* wait for an internal signal */
- s->async->inttrig = usbdux_ao_inttrig;
+ devpriv->ao_cmd_running = 1;
+ } else { /* TRIG_INT */
+ /* wait for an internal signal and submit the urbs later */
+ s->async->inttrig = usbduxsigma_ao_inttrig;
}
- up(&this_usbduxsub->sem);
+ up(&devpriv->sem);
+
return 0;
}
-static int usbdux_dio_insn_config(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
+static int usbduxsigma_dio_insn_config(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
{
- int chan = CR_CHAN(insn->chanspec);
-
- /* The input or output configuration of each digital line is
- * configured by a special insn_config instruction. chanspec
- * contains the channel to be changed, and data[0] contains the
- * value COMEDI_INPUT or COMEDI_OUTPUT. */
+ unsigned int chan = CR_CHAN(insn->chanspec);
+ unsigned int mask = 1 << chan;
switch (data[0]) {
case INSN_CONFIG_DIO_OUTPUT:
- s->io_bits |= 1 << chan; /* 1 means Out */
+ s->io_bits |= mask;
break;
case INSN_CONFIG_DIO_INPUT:
- s->io_bits &= ~(1 << chan);
+ s->io_bits &= ~mask;
break;
case INSN_CONFIG_DIO_QUERY:
- data[1] =
- (s->io_bits & (1 << chan)) ? COMEDI_OUTPUT : COMEDI_INPUT;
+ data[1] = (s->io_bits & mask) ? COMEDI_OUTPUT : COMEDI_INPUT;
break;
default:
return -EINVAL;
break;
}
- /* we don't tell the firmware here as it would take 8 frames */
- /* to submit the information. We do it in the insn_bits. */
+
+ /*
+ * We don't tell the firmware here as it would take 8 frames
+ * to submit the information. We do it in the (*insn_bits).
+ */
return insn->n;
}
-static int usbdux_dio_insn_bits(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn,
- unsigned int *data)
+static int usbduxsigma_dio_insn_bits(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
{
+ struct usbduxsigma_private *devpriv = dev->private;
+ unsigned int mask = data[0];
+ unsigned int bits = data[1];
+ int ret;
- struct usbduxsub *this_usbduxsub = dev->private;
- int err;
+ down(&devpriv->sem);
- if (!this_usbduxsub)
- return -EFAULT;
+ s->state &= ~mask;
+ s->state |= (bits & mask);
- down(&this_usbduxsub->sem);
+ devpriv->dux_commands[1] = s->io_bits & 0xff;
+ devpriv->dux_commands[4] = s->state & 0xff;
+ devpriv->dux_commands[2] = (s->io_bits >> 8) & 0xff;
+ devpriv->dux_commands[5] = (s->state >> 8) & 0xff;
+ devpriv->dux_commands[3] = (s->io_bits >> 16) & 0xff;
+ devpriv->dux_commands[6] = (s->state >> 16) & 0xff;
- if (!(this_usbduxsub->probed)) {
- up(&this_usbduxsub->sem);
- return -ENODEV;
- }
+ ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_DIO_BITS_CMD);
+ if (ret < 0)
+ goto done;
+ ret = usbduxsigma_receive_cmd(dev, USBDUXSIGMA_DIO_BITS_CMD);
+ if (ret < 0)
+ goto done;
- /* The insn data is a mask in data[0] and the new data
- * in data[1], each channel cooresponding to a bit. */
- s->state &= ~data[0];
- s->state |= data[0] & data[1];
- /* The commands are 8 bits wide */
- this_usbduxsub->dux_commands[1] = (s->io_bits) & 0x000000FF;
- this_usbduxsub->dux_commands[4] = (s->state) & 0x000000FF;
- this_usbduxsub->dux_commands[2] = ((s->io_bits) & 0x0000FF00) >> 8;
- this_usbduxsub->dux_commands[5] = ((s->state) & 0x0000FF00) >> 8;
- this_usbduxsub->dux_commands[3] = ((s->io_bits) & 0x00FF0000) >> 16;
- this_usbduxsub->dux_commands[6] = ((s->state) & 0x00FF0000) >> 16;
-
- /* This command also tells the firmware to return */
- /* the digital input lines */
- err = send_dux_commands(this_usbduxsub, SENDDIOBITSCOMMAND);
- if (err < 0) {
- up(&this_usbduxsub->sem);
- return err;
- }
- err = receive_dux_commands(this_usbduxsub, SENDDIOBITSCOMMAND);
- if (err < 0) {
- up(&this_usbduxsub->sem);
- return err;
- }
+ s->state = devpriv->insn_buf[1] |
+ (devpriv->insn_buf[2] << 8) |
+ (devpriv->insn_buf[3] << 16);
- data[1] = (((unsigned int)(this_usbduxsub->insnBuffer[1]))&0xff) |
- ((((unsigned int)(this_usbduxsub->insnBuffer[2]))&0xff) << 8) |
- ((((unsigned int)(this_usbduxsub->insnBuffer[3]))&0xff) << 16);
+ data[1] = s->state;
+ ret = insn->n;
- s->state = data[1];
+done:
+ up(&devpriv->sem);
- up(&this_usbduxsub->sem);
- return insn->n;
+ return ret;
}
-/***********************************/
-/* PWM */
-
-static int usbduxsub_unlink_PwmURBs(struct usbduxsub *usbduxsub_tmp)
+static void usbduxsigma_pwm_stop(struct comedi_device *dev, int do_unlink)
{
- int err = 0;
+ struct usbduxsigma_private *devpriv = dev->private;
- if (usbduxsub_tmp && usbduxsub_tmp->urbPwm) {
- if (usbduxsub_tmp->urbPwm)
- usb_kill_urb(usbduxsub_tmp->urbPwm);
- dev_dbg(&usbduxsub_tmp->interface->dev,
- "comedi: unlinked PwmURB: res=%d\n", err);
+ if (do_unlink) {
+ if (devpriv->pwm_urb)
+ usb_kill_urb(devpriv->pwm_urb);
}
- return err;
-}
-
-/* This cancels a running acquisition operation
- * in any context.
- */
-static int usbdux_pwm_stop(struct usbduxsub *this_usbduxsub, int do_unlink)
-{
- int ret = 0;
- if (!this_usbduxsub)
- return -EFAULT;
-
- dev_dbg(&this_usbduxsub->interface->dev, "comedi: %s\n", __func__);
- if (do_unlink)
- ret = usbduxsub_unlink_PwmURBs(this_usbduxsub);
-
- this_usbduxsub->pwm_cmd_running = 0;
-
- return ret;
+ devpriv->pwm_cmd_running = 0;
}
-/* force unlink - is called by comedi */
-static int usbdux_pwm_cancel(struct comedi_device *dev,
- struct comedi_subdevice *s)
+static int usbduxsigma_pwm_cancel(struct comedi_device *dev,
+ struct comedi_subdevice *s)
{
- struct usbduxsub *this_usbduxsub = dev->private;
- int res = 0;
+ struct usbduxsigma_private *devpriv = dev->private;
/* unlink only if it is really running */
- res = usbdux_pwm_stop(this_usbduxsub, this_usbduxsub->pwm_cmd_running);
+ usbduxsigma_pwm_stop(dev, devpriv->pwm_cmd_running);
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi %d: sending pwm off command to the usb device.\n",
- dev->minor);
- res = send_dux_commands(this_usbduxsub, SENDPWMOFF);
- if (res < 0)
- return res;
-
- return res;
+ return usbbuxsigma_send_cmd(dev, USBDUXSIGMA_PWM_OFF_CMD);
}
-static void usbduxsub_pwm_irq(struct urb *urb)
+static void usbduxsigma_pwm_urb_complete(struct urb *urb)
{
+ struct comedi_device *dev = urb->context;
+ struct usbduxsigma_private *devpriv = dev->private;
int ret;
- struct usbduxsub *this_usbduxsub;
- struct comedi_device *this_comedidev;
- struct comedi_subdevice *s;
-
- /* printk(KERN_DEBUG "PWM: IRQ\n"); */
-
- /* the context variable points to the subdevice */
- this_comedidev = urb->context;
- /* the private structure of the subdevice is struct usbduxsub */
- this_usbduxsub = this_comedidev->private;
-
- s = &this_comedidev->subdevices[SUBDEV_DA];
switch (urb->status) {
case 0:
@@ -1854,260 +1154,180 @@ static void usbduxsub_pwm_irq(struct urb *urb)
case -ENOENT:
case -ESHUTDOWN:
case -ECONNABORTED:
- /*
- * after an unlink command, unplug, ... etc
- * no unlink needed here. Already shutting down.
- */
- if (this_usbduxsub->pwm_cmd_running)
- usbdux_pwm_stop(this_usbduxsub, 0);
-
+ /* happens after an unlink command */
+ if (devpriv->pwm_cmd_running)
+ usbduxsigma_pwm_stop(dev, 0); /* w/o unlink */
return;
default:
/* a real error */
- if (this_usbduxsub->pwm_cmd_running) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi_: Non-zero urb status received in "
- "pwm intr context: %d\n", urb->status);
- usbdux_pwm_stop(this_usbduxsub, 0);
+ if (devpriv->pwm_cmd_running) {
+ dev_err(dev->class_dev,
+ "%s: non-zero urb status (%d)\n",
+ __func__, urb->status);
+ usbduxsigma_pwm_stop(dev, 0); /* w/o unlink */
}
return;
}
- /* are we actually running? */
- if (!(this_usbduxsub->pwm_cmd_running))
+ if (!devpriv->pwm_cmd_running)
return;
- urb->transfer_buffer_length = this_usbduxsub->sizePwmBuf;
- urb->dev = this_usbduxsub->usbdev;
+ urb->transfer_buffer_length = devpriv->pwm_buf_sz;
+ urb->dev = comedi_to_usb_dev(dev);
urb->status = 0;
- if (this_usbduxsub->pwm_cmd_running) {
- ret = usb_submit_urb(urb, GFP_ATOMIC);
- if (ret < 0) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi_: pwm urb resubm failed in int-cont. "
- "ret=%d", ret);
- if (ret == EL2NSYNC)
- dev_err(&this_usbduxsub->interface->dev,
- "buggy USB host controller or bug in "
- "IRQ handling!\n");
-
- /* don't do an unlink here */
- usbdux_pwm_stop(this_usbduxsub, 0);
- }
+ ret = usb_submit_urb(urb, GFP_ATOMIC);
+ if (ret < 0) {
+ dev_err(dev->class_dev, "%s: urb resubmit failed (%d)\n",
+ __func__, ret);
+ if (ret == EL2NSYNC)
+ dev_err(dev->class_dev,
+ "buggy USB host controller or bug in IRQ handler\n");
+ usbduxsigma_pwm_stop(dev, 0); /* w/o unlink */
}
}
-static int usbduxsub_submit_PwmURBs(struct usbduxsub *usbduxsub)
+static int usbduxsigma_submit_pwm_urb(struct comedi_device *dev)
{
- int errFlag;
-
- if (!usbduxsub)
- return -EFAULT;
-
- dev_dbg(&usbduxsub->interface->dev, "comedi_: submitting pwm-urb\n");
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct usbduxsigma_private *devpriv = dev->private;
+ struct urb *urb = devpriv->pwm_urb;
/* in case of a resubmission after an unlink... */
- usb_fill_bulk_urb(usbduxsub->urbPwm,
- usbduxsub->usbdev,
- usb_sndbulkpipe(usbduxsub->usbdev, PWM_EP),
- usbduxsub->urbPwm->transfer_buffer,
- usbduxsub->sizePwmBuf, usbduxsub_pwm_irq,
- usbduxsub->comedidev);
-
- errFlag = usb_submit_urb(usbduxsub->urbPwm, GFP_ATOMIC);
- if (errFlag) {
- dev_err(&usbduxsub->interface->dev,
- "comedi_: usbduxsigma: pwm: usb_submit_urb error %d\n",
- errFlag);
- return errFlag;
- }
- return 0;
+ usb_fill_bulk_urb(urb,
+ usb, usb_sndbulkpipe(usb, USBDUXSIGMA_PWM_OUT_EP),
+ urb->transfer_buffer, devpriv->pwm_buf_sz,
+ usbduxsigma_pwm_urb_complete, dev);
+
+ return usb_submit_urb(urb, GFP_ATOMIC);
}
-static int usbdux_pwm_period(struct comedi_device *dev,
- struct comedi_subdevice *s, unsigned int period)
+static int usbduxsigma_pwm_period(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ unsigned int period)
{
- struct usbduxsub *this_usbduxsub = dev->private;
+ struct usbduxsigma_private *devpriv = dev->private;
int fx2delay = 255;
if (period < MIN_PWM_PERIOD) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: illegal period setting for pwm.\n",
- dev->minor);
return -EAGAIN;
} else {
- fx2delay = period / ((int)(6 * 512 * (1.0 / 0.033))) - 6;
- if (fx2delay > 255) {
- dev_err(&this_usbduxsub->interface->dev,
- "comedi%d: period %d for pwm is too low.\n",
- dev->minor, period);
+ fx2delay = (period / (6 * 512 * 1000 / 33)) - 6;
+ if (fx2delay > 255)
return -EAGAIN;
- }
}
- this_usbduxsub->pwmDelay = fx2delay;
- this_usbduxsub->pwmPeriod = period;
- dev_dbg(&this_usbduxsub->interface->dev, "%s: frequ=%d, period=%d\n",
- __func__, period, fx2delay);
+ devpriv->pwm_delay = fx2delay;
+ devpriv->pwm_period = period;
return 0;
}
-/* is called from insn so there's no need to do all the sanity checks */
-static int usbdux_pwm_start(struct comedi_device *dev,
- struct comedi_subdevice *s)
+static int usbduxsigma_pwm_start(struct comedi_device *dev,
+ struct comedi_subdevice *s)
{
- int ret, i;
- struct usbduxsub *this_usbduxsub = dev->private;
-
- dev_dbg(&this_usbduxsub->interface->dev, "comedi%d: %s\n",
- dev->minor, __func__);
+ struct usbduxsigma_private *devpriv = dev->private;
+ int ret;
- if (this_usbduxsub->pwm_cmd_running) {
- /* already running */
+ if (devpriv->pwm_cmd_running)
return 0;
- }
- this_usbduxsub->dux_commands[1] = ((uint8_t) this_usbduxsub->pwmDelay);
- ret = send_dux_commands(this_usbduxsub, SENDPWMON);
+ devpriv->dux_commands[1] = devpriv->pwm_delay;
+ ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_PWM_ON_CMD);
if (ret < 0)
return ret;
- /* initialise the buffer */
- for (i = 0; i < this_usbduxsub->sizePwmBuf; i++)
- ((char *)(this_usbduxsub->urbPwm->transfer_buffer))[i] = 0;
+ memset(devpriv->pwm_urb->transfer_buffer, 0, devpriv->pwm_buf_sz);
- this_usbduxsub->pwm_cmd_running = 1;
- ret = usbduxsub_submit_PwmURBs(this_usbduxsub);
- if (ret < 0) {
- this_usbduxsub->pwm_cmd_running = 0;
+ ret = usbduxsigma_submit_pwm_urb(dev);
+ if (ret < 0)
return ret;
- }
+ devpriv->pwm_cmd_running = 1;
+
return 0;
}
-/* generates the bit pattern for PWM with the optional sign bit */
-static int usbdux_pwm_pattern(struct comedi_device *dev,
- struct comedi_subdevice *s, int channel,
- unsigned int value, unsigned int sign)
+static int usbduxsigma_pwm_pattern(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ unsigned int chan,
+ unsigned int value,
+ unsigned int sign)
{
- struct usbduxsub *this_usbduxsub = dev->private;
- int i, szbuf;
- char *pBuf;
- char pwm_mask;
- char sgn_mask;
- char c;
-
- if (!this_usbduxsub)
- return -EFAULT;
-
- /* this is the DIO bit which carries the PWM data */
- pwm_mask = (1 << channel);
- /* this is the DIO bit which carries the optional direction bit */
- sgn_mask = (16 << channel);
- /* this is the buffer which will be filled with the with bit */
- /* pattern for one period */
- szbuf = this_usbduxsub->sizePwmBuf;
- pBuf = (char *)(this_usbduxsub->urbPwm->transfer_buffer);
+ struct usbduxsigma_private *devpriv = dev->private;
+ char pwm_mask = (1 << chan); /* DIO bit for the PWM data */
+ char sgn_mask = (16 << chan); /* DIO bit for the sign */
+ char *buf = (char *)(devpriv->pwm_urb->transfer_buffer);
+ int szbuf = devpriv->pwm_buf_sz;
+ int i;
+
for (i = 0; i < szbuf; i++) {
- c = *pBuf;
- /* reset bits */
- c = c & (~pwm_mask);
- /* set the bit as long as the index is lower than the value */
+ char c = *buf;
+
+ c &= ~pwm_mask;
if (i < value)
- c = c | pwm_mask;
- /* set the optional sign bit for a relay */
- if (!sign) {
- /* positive value */
- c = c & (~sgn_mask);
- } else {
- /* negative value */
- c = c | sgn_mask;
- }
- *(pBuf++) = c;
+ c |= pwm_mask;
+ if (!sign)
+ c &= ~sgn_mask;
+ else
+ c |= sgn_mask;
+ *buf++ = c;
}
return 1;
}
-static int usbdux_pwm_write(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
+static int usbduxsigma_pwm_write(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
{
- struct usbduxsub *this_usbduxsub = dev->private;
+ unsigned int chan = CR_CHAN(insn->chanspec);
- if (!this_usbduxsub)
- return -EFAULT;
-
- if ((insn->n) != 1) {
- /*
- * doesn't make sense to have more than one value here because
- * it would just overwrite the PWM buffer a couple of times
- */
+ /*
+ * It doesn't make sense to support more than one value here
+ * because it would just overwrite the PWM buffer.
+ */
+ if (insn->n != 1)
return -EINVAL;
- }
/*
- * the sign is set via a special INSN only, this gives us 8 bits for
- * normal operation
- * relay sign 0 by default
+ * The sign is set via a special INSN only, this gives us 8 bits
+ * for normal operation, sign is 0 by default.
*/
- return usbdux_pwm_pattern(dev, s, CR_CHAN(insn->chanspec), data[0], 0);
+ return usbduxsigma_pwm_pattern(dev, s, chan, data[0], 0);
}
-static int usbdux_pwm_read(struct comedi_device *x1,
- struct comedi_subdevice *x2, struct comedi_insn *x3,
- unsigned int *x4)
+static int usbduxsigma_pwm_config(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ struct comedi_insn *insn,
+ unsigned int *data)
{
- /* not needed */
- return -EINVAL;
-};
+ struct usbduxsigma_private *devpriv = dev->private;
+ unsigned int chan = CR_CHAN(insn->chanspec);
-/* switches on/off PWM */
-static int usbdux_pwm_config(struct comedi_device *dev,
- struct comedi_subdevice *s,
- struct comedi_insn *insn, unsigned int *data)
-{
- struct usbduxsub *this_usbduxsub = dev->private;
switch (data[0]) {
case INSN_CONFIG_ARM:
- /* switch it on */
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi%d: %s: pwm on\n", dev->minor, __func__);
/*
* if not zero the PWM is limited to a certain time which is
* not supported here
*/
if (data[1] != 0)
return -EINVAL;
- return usbdux_pwm_start(dev, s);
+ return usbduxsigma_pwm_start(dev, s);
case INSN_CONFIG_DISARM:
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi%d: %s: pwm off\n", dev->minor, __func__);
- return usbdux_pwm_cancel(dev, s);
+ return usbduxsigma_pwm_cancel(dev, s);
case INSN_CONFIG_GET_PWM_STATUS:
- /*
- * to check if the USB transmission has failed or in case PWM
- * was limited to n cycles to check if it has terminated
- */
- data[1] = this_usbduxsub->pwm_cmd_running;
+ data[1] = devpriv->pwm_cmd_running;
return 0;
case INSN_CONFIG_PWM_SET_PERIOD:
- dev_dbg(&this_usbduxsub->interface->dev,
- "comedi%d: %s: setting period\n", dev->minor,
- __func__);
- return usbdux_pwm_period(dev, s, data[1]);
+ return usbduxsigma_pwm_period(dev, s, data[1]);
case INSN_CONFIG_PWM_GET_PERIOD:
- data[1] = this_usbduxsub->pwmPeriod;
+ data[1] = devpriv->pwm_period;
return 0;
case INSN_CONFIG_PWM_SET_H_BRIDGE:
- /* value in the first byte and the sign in the second for a
- relay */
- return usbdux_pwm_pattern(dev, s,
- /* the channel number */
- CR_CHAN(insn->chanspec),
- /* actual PWM data */
- data[1],
- /* just a sign */
- (data[2] != 0));
+ /*
+ * data[1] = value
+ * data[2] = sign (for a relay)
+ */
+ return usbduxsigma_pwm_pattern(dev, s, chan,
+ data[1], (data[2] != 0));
case INSN_CONFIG_PWM_GET_H_BRIDGE:
/* values are not kept in this driver, nothing to return */
return -EINVAL;
@@ -2115,542 +1335,412 @@ static int usbdux_pwm_config(struct comedi_device *dev,
return -EINVAL;
}
-/* end of PWM */
-/*****************************************************************/
-
-static void tidy_up(struct usbduxsub *usbduxsub_tmp)
+static int usbduxsigma_getstatusinfo(struct comedi_device *dev, int chan)
{
- int i;
+ struct usbduxsigma_private *devpriv = dev->private;
+ uint8_t sysred;
+ uint32_t val;
+ int ret;
- if (!usbduxsub_tmp)
- return;
- dev_dbg(&usbduxsub_tmp->interface->dev, "comedi_: tiding up\n");
+ switch (chan) {
+ default:
+ case 0:
+ sysred = 0; /* ADC zero */
+ break;
+ case 1:
+ sysred = 1; /* ADC offset */
+ break;
+ case 2:
+ sysred = 4; /* VCC */
+ break;
+ case 3:
+ sysred = 8; /* temperature */
+ break;
+ case 4:
+ sysred = 16; /* gain */
+ break;
+ case 5:
+ sysred = 32; /* ref */
+ break;
+ }
- /* shows the usb subsystem that the driver is down */
- if (usbduxsub_tmp->interface)
- usb_set_intfdata(usbduxsub_tmp->interface, NULL);
+ devpriv->dux_commands[1] = 0x12; /* CONFIG0 */
+ devpriv->dux_commands[2] = 0x80; /* CONFIG1: 2kHz sampling rate */
+ devpriv->dux_commands[3] = 0x00; /* CONFIG3: diff. channels off */
+ devpriv->dux_commands[4] = 0;
+ devpriv->dux_commands[5] = 0;
+ devpriv->dux_commands[6] = sysred;
+ ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_SINGLE_AD_CMD);
+ if (ret < 0)
+ return ret;
- usbduxsub_tmp->probed = 0;
+ ret = usbduxsigma_receive_cmd(dev, USBDUXSIGMA_SINGLE_AD_CMD);
+ if (ret < 0)
+ return ret;
- if (usbduxsub_tmp->urbIn) {
- if (usbduxsub_tmp->ai_cmd_running) {
- usbduxsub_tmp->ai_cmd_running = 0;
- usbduxsub_unlink_InURBs(usbduxsub_tmp);
- }
- for (i = 0; i < usbduxsub_tmp->numOfInBuffers; i++) {
- kfree(usbduxsub_tmp->urbIn[i]->transfer_buffer);
- usbduxsub_tmp->urbIn[i]->transfer_buffer = NULL;
- usb_kill_urb(usbduxsub_tmp->urbIn[i]);
- usb_free_urb(usbduxsub_tmp->urbIn[i]);
- usbduxsub_tmp->urbIn[i] = NULL;
- }
- kfree(usbduxsub_tmp->urbIn);
- usbduxsub_tmp->urbIn = NULL;
- }
- if (usbduxsub_tmp->urbOut) {
- if (usbduxsub_tmp->ao_cmd_running) {
- usbduxsub_tmp->ao_cmd_running = 0;
- usbduxsub_unlink_OutURBs(usbduxsub_tmp);
- }
- for (i = 0; i < usbduxsub_tmp->numOfOutBuffers; i++) {
- if (usbduxsub_tmp->urbOut[i]->transfer_buffer) {
- kfree(usbduxsub_tmp->
- urbOut[i]->transfer_buffer);
- usbduxsub_tmp->urbOut[i]->transfer_buffer =
- NULL;
- }
- if (usbduxsub_tmp->urbOut[i]) {
- usb_kill_urb(usbduxsub_tmp->urbOut[i]);
- usb_free_urb(usbduxsub_tmp->urbOut[i]);
- usbduxsub_tmp->urbOut[i] = NULL;
- }
- }
- kfree(usbduxsub_tmp->urbOut);
- usbduxsub_tmp->urbOut = NULL;
- }
- if (usbduxsub_tmp->urbPwm) {
- if (usbduxsub_tmp->pwm_cmd_running) {
- usbduxsub_tmp->pwm_cmd_running = 0;
- usbduxsub_unlink_PwmURBs(usbduxsub_tmp);
- }
- kfree(usbduxsub_tmp->urbPwm->transfer_buffer);
- usbduxsub_tmp->urbPwm->transfer_buffer = NULL;
- usb_kill_urb(usbduxsub_tmp->urbPwm);
- usb_free_urb(usbduxsub_tmp->urbPwm);
- usbduxsub_tmp->urbPwm = NULL;
- }
- kfree(usbduxsub_tmp->inBuffer);
- usbduxsub_tmp->inBuffer = NULL;
- kfree(usbduxsub_tmp->insnBuffer);
- usbduxsub_tmp->insnBuffer = NULL;
- kfree(usbduxsub_tmp->outBuffer);
- usbduxsub_tmp->outBuffer = NULL;
- kfree(usbduxsub_tmp->dac_commands);
- usbduxsub_tmp->dac_commands = NULL;
- kfree(usbduxsub_tmp->dux_commands);
- usbduxsub_tmp->dux_commands = NULL;
- usbduxsub_tmp->ai_cmd_running = 0;
- usbduxsub_tmp->ao_cmd_running = 0;
- usbduxsub_tmp->pwm_cmd_running = 0;
+ /* 32 bits big endian from the A/D converter */
+ val = be32_to_cpu(*((int32_t *)((devpriv->insn_buf)+1)));
+ val &= 0x00ffffff; /* strip status byte */
+ val ^= 0x00800000; /* convert to unsigned */
+
+ return (int)val;
}
-static int usbduxsigma_attach_common(struct comedi_device *dev,
- struct usbduxsub *uds)
+static int usbduxsigma_attach_common(struct comedi_device *dev)
{
- int ret;
+ struct usbduxsigma_private *devpriv = dev->private;
struct comedi_subdevice *s;
int n_subdevs;
int offset;
+ int ret;
- down(&uds->sem);
- /* pointer back to the corresponding comedi device */
- uds->comedidev = dev;
+ down(&devpriv->sem);
- /* set number of subdevices */
- if (uds->high_speed)
+ if (devpriv->high_speed)
n_subdevs = 4; /* with pwm */
else
n_subdevs = 3; /* without pwm */
ret = comedi_alloc_subdevices(dev, n_subdevs);
if (ret) {
- up(&uds->sem);
+ up(&devpriv->sem);
return ret;
}
- /* private structure is also simply the usb-structure */
- dev->private = uds;
- /* the first subdevice is the A/D converter */
- s = &dev->subdevices[SUBDEV_AD];
- /* the URBs get the comedi subdevice */
- /* which is responsible for reading */
- /* this is the subdevice which reads data */
+
+ /* Analog Input subdevice */
+ s = &dev->subdevices[0];
dev->read_subdev = s;
- /* the subdevice receives as private structure the */
- /* usb-structure */
- s->private = NULL;
- /* analog input */
- s->type = COMEDI_SUBD_AI;
- /* readable and ref is to ground, 32 bit wide data! */
- s->subdev_flags = SDF_READABLE | SDF_GROUND |
- SDF_CMD_READ | SDF_LSAMPL;
- /* 16 A/D channels */
- s->n_chan = NUMCHANNELS;
- /* length of the channellist */
- s->len_chanlist = NUMCHANNELS;
- /* callback functions */
- s->insn_read = usbdux_ai_insn_read;
- s->do_cmdtest = usbdux_ai_cmdtest;
- s->do_cmd = usbdux_ai_cmd;
- s->cancel = usbdux_ai_cancel;
- /* max value from the A/D converter (24bit) */
- s->maxdata = 0x00FFFFFF;
- /* range table to convert to physical units */
- s->range_table = (&range_usbdux_ai_range);
- /* analog output subdevice */
- s = &dev->subdevices[SUBDEV_DA];
- /* analog out */
- s->type = COMEDI_SUBD_AO;
- /* backward pointer */
+ s->type = COMEDI_SUBD_AI;
+ s->subdev_flags = SDF_READABLE | SDF_GROUND | SDF_CMD_READ | SDF_LSAMPL;
+ s->n_chan = NUMCHANNELS;
+ s->len_chanlist = NUMCHANNELS;
+ s->maxdata = 0x00ffffff;
+ s->range_table = &usbduxsigma_ai_range;
+ s->insn_read = usbduxsigma_ai_insn_read;
+ s->do_cmdtest = usbduxsigma_ai_cmdtest;
+ s->do_cmd = usbduxsigma_ai_cmd;
+ s->cancel = usbduxsigma_ai_cancel;
+
+ /* Analog Output subdevice */
+ s = &dev->subdevices[1];
dev->write_subdev = s;
- /* the subdevice receives as private structure the */
- /* usb-structure */
- s->private = NULL;
- /* are writable */
- s->subdev_flags = SDF_WRITABLE | SDF_GROUND | SDF_CMD_WRITE;
- /* 4 channels */
- s->n_chan = 4;
- /* length of the channellist */
- s->len_chanlist = 4;
- /* 8 bit resolution */
- s->maxdata = 0x00ff;
- /* unipolar range */
- s->range_table = &range_unipolar2_5;
- /* callback */
- s->do_cmdtest = usbdux_ao_cmdtest;
- s->do_cmd = usbdux_ao_cmd;
- s->cancel = usbdux_ao_cancel;
- s->insn_read = usbdux_ao_insn_read;
- s->insn_write = usbdux_ao_insn_write;
- /* digital I/O subdevice */
- s = &dev->subdevices[SUBDEV_DIO];
- s->type = COMEDI_SUBD_DIO;
- s->subdev_flags = SDF_READABLE | SDF_WRITABLE;
- /* 8 external and 16 internal channels */
- s->n_chan = 24;
- s->maxdata = 1;
- s->range_table = (&range_digital);
- s->insn_bits = usbdux_dio_insn_bits;
- s->insn_config = usbdux_dio_insn_config;
- /* we don't use it */
- s->private = NULL;
- if (uds->high_speed) {
- /* timer / pwm subdevice */
- s = &dev->subdevices[SUBDEV_PWM];
- s->type = COMEDI_SUBD_PWM;
- s->subdev_flags = SDF_WRITABLE | SDF_PWM_HBRIDGE;
- s->n_chan = 8;
- /* this defines the max duty cycle resolution */
- s->maxdata = uds->sizePwmBuf;
- s->insn_write = usbdux_pwm_write;
- s->insn_read = usbdux_pwm_read;
- s->insn_config = usbdux_pwm_config;
- usbdux_pwm_period(dev, s, PWM_DEFAULT_PERIOD);
- }
- /* finally decide that it's attached */
- uds->attached = 1;
- up(&uds->sem);
- offset = usbdux_getstatusinfo(dev, 0);
+ s->type = COMEDI_SUBD_AO;
+ s->subdev_flags = SDF_WRITABLE | SDF_GROUND | SDF_CMD_WRITE;
+ s->n_chan = USBDUXSIGMA_NUM_AO_CHAN;
+ s->len_chanlist = s->n_chan;
+ s->maxdata = 0x00ff;
+ s->range_table = &range_unipolar2_5;
+ s->insn_write = usbduxsigma_ao_insn_write;
+ s->insn_read = usbduxsigma_ao_insn_read;
+ s->do_cmdtest = usbduxsigma_ao_cmdtest;
+ s->do_cmd = usbduxsigma_ao_cmd;
+ s->cancel = usbduxsigma_ao_cancel;
+
+ /* Digital I/O subdevice */
+ s = &dev->subdevices[2];
+ s->type = COMEDI_SUBD_DIO;
+ s->subdev_flags = SDF_READABLE | SDF_WRITABLE;
+ s->n_chan = 24;
+ s->maxdata = 1;
+ s->range_table = &range_digital;
+ s->insn_bits = usbduxsigma_dio_insn_bits;
+ s->insn_config = usbduxsigma_dio_insn_config;
+
+ if (devpriv->high_speed) {
+ /* Timer / pwm subdevice */
+ s = &dev->subdevices[3];
+ s->type = COMEDI_SUBD_PWM;
+ s->subdev_flags = SDF_WRITABLE | SDF_PWM_HBRIDGE;
+ s->n_chan = 8;
+ s->maxdata = devpriv->pwm_buf_sz;
+ s->insn_write = usbduxsigma_pwm_write;
+ s->insn_config = usbduxsigma_pwm_config;
+
+ usbduxsigma_pwm_period(dev, s, PWM_DEFAULT_PERIOD);
+ }
+
+ up(&devpriv->sem);
+
+ offset = usbduxsigma_getstatusinfo(dev, 0);
if (offset < 0)
- dev_err(&uds->interface->dev,
- "Communication to USBDUXSIGMA failed! Check firmware and cabling.");
- dev_info(&uds->interface->dev,
- "comedi%d: attached, ADC_zero = %x\n", dev->minor, offset);
+ dev_err(dev->class_dev,
+ "Communication to USBDUXSIGMA failed! Check firmware and cabling\n");
+
+ dev_info(dev->class_dev, "attached, ADC_zero = %x\n", offset);
+
return 0;
}
-static int usbduxsigma_auto_attach(struct comedi_device *dev,
- unsigned long context_unused)
+static int usbduxsigma_firmware_upload(struct comedi_device *dev,
+ const u8 *data, size_t size,
+ unsigned long context)
{
- struct usb_interface *uinterf = comedi_to_usb_interface(dev);
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ uint8_t *buf;
+ uint8_t *tmp;
int ret;
- struct usbduxsub *uds;
- dev->private = NULL;
- down(&start_stop_sem);
- uds = usb_get_intfdata(uinterf);
- if (!uds || !uds->probed) {
- dev_err(dev->class_dev,
- "usbduxsigma: error: auto_attach failed, not connected\n");
- ret = -ENODEV;
- } else if (uds->attached) {
- dev_err(dev->class_dev,
- "usbduxsigma: error: auto_attach failed, already attached\n");
- ret = -ENODEV;
- } else
- ret = usbduxsigma_attach_common(dev, uds);
- up(&start_stop_sem);
- return ret;
-}
+ if (!data)
+ return 0;
-static void usbduxsigma_detach(struct comedi_device *dev)
-{
- struct usbduxsub *usb = dev->private;
-
- if (usb) {
- down(&usb->sem);
- dev->private = NULL;
- usb->attached = 0;
- usb->comedidev = NULL;
- up(&usb->sem);
+ if (size > FIRMWARE_MAX_LEN) {
+ dev_err(dev->class_dev, "firmware binary too large for FX2\n");
+ return -ENOMEM;
}
-}
-static struct comedi_driver usbduxsigma_driver = {
- .driver_name = "usbduxsigma",
- .module = THIS_MODULE,
- .auto_attach = usbduxsigma_auto_attach,
- .detach = usbduxsigma_detach,
-};
+ /* we generate a local buffer for the firmware */
+ buf = kmemdup(data, size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
-static void usbdux_firmware_request_complete_handler(const struct firmware *fw,
- void *context)
-{
- struct usbduxsub *usbduxsub_tmp = context;
- struct usb_interface *uinterf = usbduxsub_tmp->interface;
- int ret;
+ /* we need a malloc'ed buffer for usb_control_msg() */
+ tmp = kmalloc(1, GFP_KERNEL);
+ if (!tmp) {
+ kfree(buf);
+ return -ENOMEM;
+ }
- if (fw == NULL) {
- dev_err(&uinterf->dev,
- "Firmware complete handler without firmware!\n");
- return;
+ /* stop the current firmware on the device */
+ *tmp = 1; /* 7f92 to one */
+ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0),
+ USBDUXSUB_FIRMWARE,
+ VENDOR_DIR_OUT,
+ USBDUXSUB_CPUCS, 0x0000,
+ tmp, 1,
+ BULK_TIMEOUT);
+ if (ret < 0) {
+ dev_err(dev->class_dev, "can not stop firmware\n");
+ goto done;
}
- /*
- * we need to upload the firmware here because fw will be
- * freed once we've left this function
- */
- ret = firmwareUpload(usbduxsub_tmp, fw->data, fw->size);
+ /* upload the new firmware to the device */
+ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0),
+ USBDUXSUB_FIRMWARE,
+ VENDOR_DIR_OUT,
+ 0, 0x0000,
+ buf, size,
+ BULK_TIMEOUT);
+ if (ret < 0) {
+ dev_err(dev->class_dev, "firmware upload failed\n");
+ goto done;
+ }
+
+ /* start the new firmware on the device */
+ *tmp = 0; /* 7f92 to zero */
+ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0),
+ USBDUXSUB_FIRMWARE,
+ VENDOR_DIR_OUT,
+ USBDUXSUB_CPUCS, 0x0000,
+ tmp, 1,
+ BULK_TIMEOUT);
+ if (ret < 0)
+ dev_err(dev->class_dev, "can not start firmware\n");
- if (ret) {
- dev_err(&uinterf->dev,
- "Could not upload firmware (err=%d)\n", ret);
- goto out;
- }
- comedi_usb_auto_config(uinterf, &usbduxsigma_driver, 0);
-out:
- release_firmware(fw);
+done:
+ kfree(tmp);
+ kfree(buf);
+ return ret;
}
-static int usbduxsigma_usb_probe(struct usb_interface *uinterf,
- const struct usb_device_id *id)
+static int usbduxsigma_alloc_usb_buffers(struct comedi_device *dev)
{
- struct usb_device *udev = interface_to_usbdev(uinterf);
- struct device *dev = &uinterf->dev;
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct usbduxsigma_private *devpriv = dev->private;
+ struct urb *urb;
int i;
- int index;
- int ret;
-
- dev_dbg(dev, "comedi_: usbdux_: "
- "finding a free structure for the usb-device\n");
-
- down(&start_stop_sem);
- /* look for a free place in the usbdux array */
- index = -1;
- for (i = 0; i < NUMUSBDUX; i++) {
- if (!(usbduxsub[i].probed)) {
- index = i;
- break;
- }
- }
-
- /* no more space */
- if (index == -1) {
- dev_err(dev, "Too many usbduxsigma-devices connected.\n");
- up(&start_stop_sem);
- return -EMFILE;
- }
- dev_dbg(dev, "comedi_: usbdux: "
- "usbduxsub[%d] is ready to connect to comedi.\n", index);
-
- sema_init(&(usbduxsub[index].sem), 1);
- /* save a pointer to the usb device */
- usbduxsub[index].usbdev = udev;
- /* save the interface itself */
- usbduxsub[index].interface = uinterf;
- /* get the interface number from the interface */
- usbduxsub[index].ifnum = uinterf->altsetting->desc.bInterfaceNumber;
- /* hand the private data over to the usb subsystem */
- /* will be needed for disconnect */
- usb_set_intfdata(uinterf, &(usbduxsub[index]));
-
- dev_dbg(dev, "comedi_: usbdux: ifnum=%d\n", usbduxsub[index].ifnum);
-
- /* test if it is high speed (USB 2.0) */
- usbduxsub[index].high_speed =
- (usbduxsub[index].usbdev->speed == USB_SPEED_HIGH);
-
- /* create space for the commands of the DA converter */
- usbduxsub[index].dac_commands = kzalloc(NUMOUTCHANNELS, GFP_KERNEL);
- if (!usbduxsub[index].dac_commands) {
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
- return -ENOMEM;
- }
- /* create space for the commands going to the usb device */
- usbduxsub[index].dux_commands = kzalloc(SIZEOFDUXBUFFER, GFP_KERNEL);
- if (!usbduxsub[index].dux_commands) {
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
- return -ENOMEM;
- }
- /* create space for the in buffer and set it to zero */
- usbduxsub[index].inBuffer = kzalloc(SIZEINBUF, GFP_KERNEL);
- if (!(usbduxsub[index].inBuffer)) {
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
- return -ENOMEM;
- }
- /* create space of the instruction buffer */
- usbduxsub[index].insnBuffer = kzalloc(SIZEINSNBUF, GFP_KERNEL);
- if (!(usbduxsub[index].insnBuffer)) {
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
+ devpriv->dac_commands = kzalloc(NUMOUTCHANNELS, GFP_KERNEL);
+ devpriv->dux_commands = kzalloc(SIZEOFDUXBUFFER, GFP_KERNEL);
+ devpriv->in_buf = kzalloc(SIZEINBUF, GFP_KERNEL);
+ devpriv->insn_buf = kzalloc(SIZEINSNBUF, GFP_KERNEL);
+ devpriv->ai_urbs = kcalloc(devpriv->n_ai_urbs, sizeof(*urb),
+ GFP_KERNEL);
+ devpriv->ao_urbs = kcalloc(devpriv->n_ao_urbs, sizeof(*urb),
+ GFP_KERNEL);
+ if (!devpriv->dac_commands || !devpriv->dux_commands ||
+ !devpriv->in_buf || !devpriv->insn_buf ||
+ !devpriv->ai_urbs || !devpriv->ao_urbs)
return -ENOMEM;
- }
- /* create space for the outbuffer */
- usbduxsub[index].outBuffer = kzalloc(SIZEOUTBUF, GFP_KERNEL);
- if (!(usbduxsub[index].outBuffer)) {
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
- return -ENOMEM;
- }
- /* setting to alternate setting 3: enabling iso ep and bulk ep. */
- i = usb_set_interface(usbduxsub[index].usbdev,
- usbduxsub[index].ifnum, 3);
- if (i < 0) {
- dev_err(dev, "comedi_: usbduxsigma%d: "
- "could not set alternate setting 3 in high speed.\n",
- index);
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
- return -ENODEV;
- }
- if (usbduxsub[index].high_speed)
- usbduxsub[index].numOfInBuffers = NUMOFINBUFFERSHIGH;
- else
- usbduxsub[index].numOfInBuffers = NUMOFINBUFFERSFULL;
-
- usbduxsub[index].urbIn = kcalloc(usbduxsub[index].numOfInBuffers,
- sizeof(struct urb *),
- GFP_KERNEL);
- if (!(usbduxsub[index].urbIn)) {
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
- return -ENOMEM;
- }
- for (i = 0; i < usbduxsub[index].numOfInBuffers; i++) {
+
+ for (i = 0; i < devpriv->n_ai_urbs; i++) {
/* one frame: 1ms */
- usbduxsub[index].urbIn[i] = usb_alloc_urb(1, GFP_KERNEL);
- if (usbduxsub[index].urbIn[i] == NULL) {
- dev_err(dev, "comedi_: usbduxsigma%d: "
- "Could not alloc. urb(%d)\n", index, i);
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
+ urb = usb_alloc_urb(1, GFP_KERNEL);
+ if (!urb)
return -ENOMEM;
- }
- usbduxsub[index].urbIn[i]->dev = usbduxsub[index].usbdev;
+ devpriv->ai_urbs[i] = urb;
+ urb->dev = usb;
/* will be filled later with a pointer to the comedi-device */
/* and ONLY then the urb should be submitted */
- usbduxsub[index].urbIn[i]->context = NULL;
- usbduxsub[index].urbIn[i]->pipe =
- usb_rcvisocpipe(usbduxsub[index].usbdev, ISOINEP);
- usbduxsub[index].urbIn[i]->transfer_flags = URB_ISO_ASAP;
- usbduxsub[index].urbIn[i]->transfer_buffer =
- kzalloc(SIZEINBUF, GFP_KERNEL);
- if (!(usbduxsub[index].urbIn[i]->transfer_buffer)) {
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
+ urb->context = NULL;
+ urb->pipe = usb_rcvisocpipe(usb, USBDUXSIGMA_ISO_IN_EP);
+ urb->transfer_flags = URB_ISO_ASAP;
+ urb->transfer_buffer = kzalloc(SIZEINBUF, GFP_KERNEL);
+ if (!urb->transfer_buffer)
return -ENOMEM;
- }
- usbduxsub[index].urbIn[i]->complete = usbduxsub_ai_IsocIrq;
- usbduxsub[index].urbIn[i]->number_of_packets = 1;
- usbduxsub[index].urbIn[i]->transfer_buffer_length = SIZEINBUF;
- usbduxsub[index].urbIn[i]->iso_frame_desc[0].offset = 0;
- usbduxsub[index].urbIn[i]->iso_frame_desc[0].length =
- SIZEINBUF;
+ urb->complete = usbduxsigma_ai_urb_complete;
+ urb->number_of_packets = 1;
+ urb->transfer_buffer_length = SIZEINBUF;
+ urb->iso_frame_desc[0].offset = 0;
+ urb->iso_frame_desc[0].length = SIZEINBUF;
}
- /* out */
- if (usbduxsub[index].high_speed)
- usbduxsub[index].numOfOutBuffers = NUMOFOUTBUFFERSHIGH;
- else
- usbduxsub[index].numOfOutBuffers = NUMOFOUTBUFFERSFULL;
-
- usbduxsub[index].urbOut = kcalloc(usbduxsub[index].numOfOutBuffers,
- sizeof(struct urb *), GFP_KERNEL);
- if (!(usbduxsub[index].urbOut)) {
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
- return -ENOMEM;
- }
- for (i = 0; i < usbduxsub[index].numOfOutBuffers; i++) {
+ for (i = 0; i < devpriv->n_ao_urbs; i++) {
/* one frame: 1ms */
- usbduxsub[index].urbOut[i] = usb_alloc_urb(1, GFP_KERNEL);
- if (usbduxsub[index].urbOut[i] == NULL) {
- dev_err(dev, "comedi_: usbduxsigma%d: "
- "Could not alloc. urb(%d)\n", index, i);
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
+ urb = usb_alloc_urb(1, GFP_KERNEL);
+ if (!urb)
return -ENOMEM;
- }
- usbduxsub[index].urbOut[i]->dev = usbduxsub[index].usbdev;
+ devpriv->ao_urbs[i] = urb;
+ urb->dev = usb;
/* will be filled later with a pointer to the comedi-device */
/* and ONLY then the urb should be submitted */
- usbduxsub[index].urbOut[i]->context = NULL;
- usbduxsub[index].urbOut[i]->pipe =
- usb_sndisocpipe(usbduxsub[index].usbdev, ISOOUTEP);
- usbduxsub[index].urbOut[i]->transfer_flags = URB_ISO_ASAP;
- usbduxsub[index].urbOut[i]->transfer_buffer =
- kzalloc(SIZEOUTBUF, GFP_KERNEL);
- if (!(usbduxsub[index].urbOut[i]->transfer_buffer)) {
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
+ urb->context = NULL;
+ urb->pipe = usb_sndisocpipe(usb, USBDUXSIGMA_ISO_OUT_EP);
+ urb->transfer_flags = URB_ISO_ASAP;
+ urb->transfer_buffer = kzalloc(SIZEOUTBUF, GFP_KERNEL);
+ if (!urb->transfer_buffer)
return -ENOMEM;
- }
- usbduxsub[index].urbOut[i]->complete = usbduxsub_ao_IsocIrq;
- usbduxsub[index].urbOut[i]->number_of_packets = 1;
- usbduxsub[index].urbOut[i]->transfer_buffer_length =
- SIZEOUTBUF;
- usbduxsub[index].urbOut[i]->iso_frame_desc[0].offset = 0;
- usbduxsub[index].urbOut[i]->iso_frame_desc[0].length =
- SIZEOUTBUF;
- if (usbduxsub[index].high_speed) {
- /* uframes */
- usbduxsub[index].urbOut[i]->interval = 8;
- } else {
- /* frames */
- usbduxsub[index].urbOut[i]->interval = 1;
- }
+ urb->complete = usbduxsigma_ao_urb_complete;
+ urb->number_of_packets = 1;
+ urb->transfer_buffer_length = SIZEOUTBUF;
+ urb->iso_frame_desc[0].offset = 0;
+ urb->iso_frame_desc[0].length = SIZEOUTBUF;
+ if (devpriv->high_speed)
+ urb->interval = 8; /* uframes */
+ else
+ urb->interval = 1; /* frames */
}
- /* pwm */
- if (usbduxsub[index].high_speed) {
+ if (devpriv->high_speed) {
/* max bulk ep size in high speed */
- usbduxsub[index].sizePwmBuf = 512;
- usbduxsub[index].urbPwm = usb_alloc_urb(0, GFP_KERNEL);
- if (usbduxsub[index].urbPwm == NULL) {
- dev_err(dev, "comedi_: usbduxsigma%d: "
- "Could not alloc. pwm urb\n", index);
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
+ devpriv->pwm_buf_sz = 512;
+ urb = usb_alloc_urb(0, GFP_KERNEL);
+ if (!urb)
return -ENOMEM;
- }
- usbduxsub[index].urbPwm->transfer_buffer =
- kzalloc(usbduxsub[index].sizePwmBuf, GFP_KERNEL);
- if (!(usbduxsub[index].urbPwm->transfer_buffer)) {
- tidy_up(&(usbduxsub[index]));
- up(&start_stop_sem);
+ devpriv->pwm_urb = urb;
+ urb->transfer_buffer = kzalloc(devpriv->pwm_buf_sz, GFP_KERNEL);
+ if (!urb->transfer_buffer)
return -ENOMEM;
- }
} else {
- usbduxsub[index].urbPwm = NULL;
- usbduxsub[index].sizePwmBuf = 0;
+ devpriv->pwm_urb = NULL;
+ devpriv->pwm_buf_sz = 0;
}
- usbduxsub[index].ai_cmd_running = 0;
- usbduxsub[index].ao_cmd_running = 0;
- usbduxsub[index].pwm_cmd_running = 0;
+ return 0;
+}
- /* we've reached the bottom of the function */
- usbduxsub[index].probed = 1;
- up(&start_stop_sem);
+static void usbduxsigma_free_usb_buffers(struct comedi_device *dev)
+{
+ struct usbduxsigma_private *devpriv = dev->private;
+ struct urb *urb;
+ int i;
- ret = request_firmware_nowait(THIS_MODULE,
- FW_ACTION_HOTPLUG,
- FIRMWARE,
- &udev->dev,
- GFP_KERNEL,
- usbduxsub + index,
- usbdux_firmware_request_complete_handler
- );
+ /* force unlink all urbs */
+ usbduxsigma_ai_stop(dev, 1);
+ usbduxsigma_ao_stop(dev, 1);
+ usbduxsigma_pwm_stop(dev, 1);
+
+ urb = devpriv->pwm_urb;
+ if (urb) {
+ kfree(urb->transfer_buffer);
+ usb_free_urb(urb);
+ }
+ if (devpriv->ao_urbs) {
+ for (i = 0; i < devpriv->n_ao_urbs; i++) {
+ urb = devpriv->ao_urbs[i];
+ if (urb) {
+ kfree(urb->transfer_buffer);
+ usb_free_urb(urb);
+ }
+ }
+ kfree(devpriv->ao_urbs);
+ }
+ if (devpriv->ai_urbs) {
+ for (i = 0; i < devpriv->n_ai_urbs; i++) {
+ urb = devpriv->ai_urbs[i];
+ if (urb) {
+ kfree(urb->transfer_buffer);
+ usb_free_urb(urb);
+ }
+ }
+ kfree(devpriv->ai_urbs);
+ }
+ kfree(devpriv->insn_buf);
+ kfree(devpriv->in_buf);
+ kfree(devpriv->dux_commands);
+ kfree(devpriv->dac_commands);
+}
- if (ret) {
- dev_err(dev, "Could not load firmware (err=%d)\n", ret);
- return ret;
+static int usbduxsigma_auto_attach(struct comedi_device *dev,
+ unsigned long context_unused)
+{
+ struct usb_interface *intf = comedi_to_usb_interface(dev);
+ struct usb_device *usb = comedi_to_usb_dev(dev);
+ struct usbduxsigma_private *devpriv;
+ int ret;
+
+ devpriv = kzalloc(sizeof(*devpriv), GFP_KERNEL);
+ if (!devpriv)
+ return -ENOMEM;
+ dev->private = devpriv;
+
+ sema_init(&devpriv->sem, 1);
+ usb_set_intfdata(intf, devpriv);
+
+ ret = usb_set_interface(usb,
+ intf->altsetting->desc.bInterfaceNumber, 3);
+ if (ret < 0) {
+ dev_err(dev->class_dev,
+ "could not set alternate setting 3 in high speed\n");
+ return -ENODEV;
}
- dev_info(dev, "comedi_: successfully initialised.\n");
- /* success */
- return 0;
+ /* test if it is high speed (USB 2.0) */
+ devpriv->high_speed = (usb->speed == USB_SPEED_HIGH);
+ if (devpriv->high_speed) {
+ devpriv->n_ai_urbs = NUMOFINBUFFERSHIGH;
+ devpriv->n_ao_urbs = NUMOFOUTBUFFERSHIGH;
+ } else {
+ devpriv->n_ai_urbs = NUMOFINBUFFERSFULL;
+ devpriv->n_ao_urbs = NUMOFOUTBUFFERSFULL;
+ }
+
+ ret = usbduxsigma_alloc_usb_buffers(dev);
+ if (ret)
+ return ret;
+
+ ret = comedi_load_firmware(dev, &usb->dev, FIRMWARE,
+ usbduxsigma_firmware_upload, 0);
+ if (ret)
+ return ret;
+
+ return usbduxsigma_attach_common(dev);
}
-static void usbduxsigma_usb_disconnect(struct usb_interface *intf)
+static void usbduxsigma_detach(struct comedi_device *dev)
{
- struct usbduxsub *usbduxsub_tmp = usb_get_intfdata(intf);
- struct usb_device *udev = interface_to_usbdev(intf);
+ struct usb_interface *intf = comedi_to_usb_interface(dev);
+ struct usbduxsigma_private *devpriv = dev->private;
- if (!usbduxsub_tmp) {
- dev_err(&intf->dev,
- "comedi_: disconnect called with null pointer.\n");
+ if (!devpriv)
return;
- }
- if (usbduxsub_tmp->usbdev != udev) {
- dev_err(&intf->dev, "comedi_: BUG! wrong ptr!\n");
- return;
- }
- if (usbduxsub_tmp->ai_cmd_running)
- /* we are still running a command */
- usbdux_ai_stop(usbduxsub_tmp, 1);
- if (usbduxsub_tmp->ao_cmd_running)
- /* we are still running a command */
- usbdux_ao_stop(usbduxsub_tmp, 1);
- comedi_usb_auto_unconfig(intf);
- down(&start_stop_sem);
- down(&usbduxsub_tmp->sem);
- tidy_up(usbduxsub_tmp);
- up(&usbduxsub_tmp->sem);
- up(&start_stop_sem);
- dev_info(&intf->dev, "comedi_: disconnected from the usb\n");
+
+ usb_set_intfdata(intf, NULL);
+
+ down(&devpriv->sem);
+ usbduxsigma_free_usb_buffers(dev);
+ up(&devpriv->sem);
+}
+
+static struct comedi_driver usbduxsigma_driver = {
+ .driver_name = "usbduxsigma",
+ .module = THIS_MODULE,
+ .auto_attach = usbduxsigma_auto_attach,
+ .detach = usbduxsigma_detach,
+};
+
+static int usbduxsigma_usb_probe(struct usb_interface *intf,
+ const struct usb_device_id *id)
+{
+ return comedi_usb_auto_config(intf, &usbduxsigma_driver, 0);
}
static const struct usb_device_id usbduxsigma_usb_table[] = {
@@ -2664,7 +1754,7 @@ MODULE_DEVICE_TABLE(usb, usbduxsigma_usb_table);
static struct usb_driver usbduxsigma_usb_driver = {
.name = "usbduxsigma",
.probe = usbduxsigma_usb_probe,
- .disconnect = usbduxsigma_usb_disconnect,
+ .disconnect = comedi_usb_auto_unconfig,
.id_table = usbduxsigma_usb_table,
};
module_comedi_usb_driver(usbduxsigma_driver, usbduxsigma_usb_driver);
diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c
index 2be5087414f6..0ab04c0dd410 100644
--- a/drivers/staging/comedi/drivers/vmk80xx.c
+++ b/drivers/staging/comedi/drivers/vmk80xx.c
@@ -16,11 +16,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
Driver: vmk80xx
@@ -159,8 +154,6 @@ static const struct vmk80xx_board vmk80xx_boardinfo[] = {
};
struct vmk80xx_private {
- struct usb_device *usb;
- struct usb_interface *intf;
struct usb_endpoint_descriptor *ep_rx;
struct usb_endpoint_descriptor *ep_tx;
struct firmware_version fw;
@@ -170,9 +163,10 @@ struct vmk80xx_private {
enum vmk80xx_model model;
};
-static int vmk80xx_check_data_link(struct vmk80xx_private *devpriv)
+static int vmk80xx_check_data_link(struct comedi_device *dev)
{
- struct usb_device *usb = devpriv->usb;
+ struct vmk80xx_private *devpriv = dev->private;
+ struct usb_device *usb = comedi_to_usb_dev(dev);
unsigned int tx_pipe;
unsigned int rx_pipe;
unsigned char tx[1];
@@ -194,9 +188,10 @@ static int vmk80xx_check_data_link(struct vmk80xx_private *devpriv)
return (int)rx[1];
}
-static void vmk80xx_read_eeprom(struct vmk80xx_private *devpriv, int flag)
+static void vmk80xx_read_eeprom(struct comedi_device *dev, int flag)
{
- struct usb_device *usb = devpriv->usb;
+ struct vmk80xx_private *devpriv = dev->private;
+ struct usb_device *usb = comedi_to_usb_dev(dev);
unsigned int tx_pipe;
unsigned int rx_pipe;
unsigned char tx[1];
@@ -223,9 +218,10 @@ static void vmk80xx_read_eeprom(struct vmk80xx_private *devpriv, int flag)
strncpy(devpriv->fw.ic6_vers, rx + 25, 24);
}
-static void vmk80xx_do_bulk_msg(struct vmk80xx_private *devpriv)
+static void vmk80xx_do_bulk_msg(struct comedi_device *dev)
{
- struct usb_device *usb = devpriv->usb;
+ struct vmk80xx_private *devpriv = dev->private;
+ struct usb_device *usb = comedi_to_usb_dev(dev);
__u8 tx_addr;
__u8 rx_addr;
unsigned int tx_pipe;
@@ -248,21 +244,18 @@ static void vmk80xx_do_bulk_msg(struct vmk80xx_private *devpriv)
usb_bulk_msg(usb, rx_pipe, devpriv->usb_rx_buf, size, NULL, HZ * 10);
}
-static int vmk80xx_read_packet(struct vmk80xx_private *devpriv)
+static int vmk80xx_read_packet(struct comedi_device *dev)
{
- struct usb_device *usb;
+ struct vmk80xx_private *devpriv = dev->private;
+ struct usb_device *usb = comedi_to_usb_dev(dev);
struct usb_endpoint_descriptor *ep;
unsigned int pipe;
- if (!devpriv->intf)
- return -ENODEV;
-
if (devpriv->model == VMK8061_MODEL) {
- vmk80xx_do_bulk_msg(devpriv);
+ vmk80xx_do_bulk_msg(dev);
return 0;
}
- usb = devpriv->usb;
ep = devpriv->ep_rx;
pipe = usb_rcvintpipe(usb, ep->bEndpointAddress);
return usb_interrupt_msg(usb, pipe, devpriv->usb_rx_buf,
@@ -270,23 +263,20 @@ static int vmk80xx_read_packet(struct vmk80xx_private *devpriv)
HZ * 10);
}
-static int vmk80xx_write_packet(struct vmk80xx_private *devpriv, int cmd)
+static int vmk80xx_write_packet(struct comedi_device *dev, int cmd)
{
- struct usb_device *usb;
+ struct vmk80xx_private *devpriv = dev->private;
+ struct usb_device *usb = comedi_to_usb_dev(dev);
struct usb_endpoint_descriptor *ep;
unsigned int pipe;
- if (!devpriv->intf)
- return -ENODEV;
-
devpriv->usb_tx_buf[0] = cmd;
if (devpriv->model == VMK8061_MODEL) {
- vmk80xx_do_bulk_msg(devpriv);
+ vmk80xx_do_bulk_msg(dev);
return 0;
}
- usb = devpriv->usb;
ep = devpriv->ep_tx;
pipe = usb_sndintpipe(usb, ep->bEndpointAddress);
return usb_interrupt_msg(usb, pipe, devpriv->usb_tx_buf,
@@ -294,18 +284,19 @@ static int vmk80xx_write_packet(struct vmk80xx_private *devpriv, int cmd)
HZ * 10);
}
-static int vmk80xx_reset_device(struct vmk80xx_private *devpriv)
+static int vmk80xx_reset_device(struct comedi_device *dev)
{
+ struct vmk80xx_private *devpriv = dev->private;
size_t size;
int retval;
size = le16_to_cpu(devpriv->ep_tx->wMaxPacketSize);
memset(devpriv->usb_tx_buf, 0, size);
- retval = vmk80xx_write_packet(devpriv, VMK8055_CMD_RST);
+ retval = vmk80xx_write_packet(dev, VMK8055_CMD_RST);
if (retval)
return retval;
/* set outputs to known state as we cannot read them */
- return vmk80xx_write_packet(devpriv, VMK8055_CMD_WRT_AD);
+ return vmk80xx_write_packet(dev, VMK8055_CMD_WRT_AD);
}
static int vmk80xx_ai_insn_read(struct comedi_device *dev,
@@ -338,7 +329,7 @@ static int vmk80xx_ai_insn_read(struct comedi_device *dev,
}
for (n = 0; n < insn->n; n++) {
- if (vmk80xx_read_packet(devpriv))
+ if (vmk80xx_read_packet(dev))
break;
if (devpriv->model == VMK8055_MODEL) {
@@ -388,7 +379,7 @@ static int vmk80xx_ao_insn_write(struct comedi_device *dev,
for (n = 0; n < insn->n; n++) {
devpriv->usb_tx_buf[reg] = data[n];
- if (vmk80xx_write_packet(devpriv, cmd))
+ if (vmk80xx_write_packet(dev, cmd))
break;
}
@@ -415,7 +406,7 @@ static int vmk80xx_ao_insn_read(struct comedi_device *dev,
devpriv->usb_tx_buf[0] = VMK8061_CMD_RD_AO;
for (n = 0; n < insn->n; n++) {
- if (vmk80xx_read_packet(devpriv))
+ if (vmk80xx_read_packet(dev))
break;
data[n] = devpriv->usb_rx_buf[reg + chan];
@@ -447,7 +438,7 @@ static int vmk80xx_di_insn_bits(struct comedi_device *dev,
reg = VMK8055_DI_REG;
}
- retval = vmk80xx_read_packet(devpriv);
+ retval = vmk80xx_read_packet(dev);
if (!retval) {
if (devpriv->model == VMK8055_MODEL)
@@ -492,7 +483,7 @@ static int vmk80xx_do_insn_bits(struct comedi_device *dev,
tx_buf[reg] &= ~data[0];
tx_buf[reg] |= (data[0] & data[1]);
- retval = vmk80xx_write_packet(devpriv, cmd);
+ retval = vmk80xx_write_packet(dev, cmd);
if (retval)
goto out;
@@ -501,7 +492,7 @@ static int vmk80xx_do_insn_bits(struct comedi_device *dev,
if (devpriv->model == VMK8061_MODEL) {
tx_buf[0] = VMK8061_CMD_RD_DO;
- retval = vmk80xx_read_packet(devpriv);
+ retval = vmk80xx_read_packet(dev);
if (!retval) {
data[1] = rx_buf[reg];
@@ -547,7 +538,7 @@ static int vmk80xx_cnt_insn_read(struct comedi_device *dev,
}
for (n = 0; n < insn->n; n++) {
- if (vmk80xx_read_packet(devpriv))
+ if (vmk80xx_read_packet(dev))
break;
if (devpriv->model == VMK8055_MODEL)
@@ -597,7 +588,7 @@ static int vmk80xx_cnt_insn_config(struct comedi_device *dev,
}
for (n = 0; n < insn->n; n++)
- if (vmk80xx_write_packet(devpriv, cmd))
+ if (vmk80xx_write_packet(dev, cmd))
break;
up(&devpriv->limit_sem);
@@ -640,7 +631,7 @@ static int vmk80xx_cnt_insn_write(struct comedi_device *dev,
devpriv->usb_tx_buf[6 + chan] = val;
- if (vmk80xx_write_packet(devpriv, cmd))
+ if (vmk80xx_write_packet(dev, cmd))
break;
}
@@ -671,7 +662,7 @@ static int vmk80xx_pwm_insn_read(struct comedi_device *dev,
tx_buf[0] = VMK8061_CMD_RD_PWM;
for (n = 0; n < insn->n; n++) {
- if (vmk80xx_read_packet(devpriv))
+ if (vmk80xx_read_packet(dev))
break;
data[n] = rx_buf[reg[0]] + 4 * rx_buf[reg[1]];
@@ -719,7 +710,7 @@ static int vmk80xx_pwm_insn_write(struct comedi_device *dev,
tx_buf[reg[0]] = (unsigned char)(data[n] & 0x03);
tx_buf[reg[1]] = (unsigned char)(data[n] >> 2) & 0xff;
- if (vmk80xx_write_packet(devpriv, cmd))
+ if (vmk80xx_write_packet(dev, cmd))
break;
}
@@ -731,7 +722,7 @@ static int vmk80xx_pwm_insn_write(struct comedi_device *dev,
static int vmk80xx_find_usb_endpoints(struct comedi_device *dev)
{
struct vmk80xx_private *devpriv = dev->private;
- struct usb_interface *intf = devpriv->intf;
+ struct usb_interface *intf = comedi_to_usb_interface(dev);
struct usb_host_interface *iface_desc = intf->cur_altsetting;
struct usb_endpoint_descriptor *ep_desc;
int i;
@@ -889,8 +880,6 @@ static int vmk80xx_auto_attach(struct comedi_device *dev,
return -ENOMEM;
dev->private = devpriv;
- devpriv->usb = interface_to_usbdev(intf);
- devpriv->intf = intf;
devpriv->model = boardinfo->model;
ret = vmk80xx_find_usb_endpoints(dev);
@@ -906,23 +895,24 @@ static int vmk80xx_auto_attach(struct comedi_device *dev,
usb_set_intfdata(intf, devpriv);
if (devpriv->model == VMK8061_MODEL) {
- vmk80xx_read_eeprom(devpriv, IC3_VERSION);
+ vmk80xx_read_eeprom(dev, IC3_VERSION);
dev_info(&intf->dev, "%s\n", devpriv->fw.ic3_vers);
- if (vmk80xx_check_data_link(devpriv)) {
- vmk80xx_read_eeprom(devpriv, IC6_VERSION);
+ if (vmk80xx_check_data_link(dev)) {
+ vmk80xx_read_eeprom(dev, IC6_VERSION);
dev_info(&intf->dev, "%s\n", devpriv->fw.ic6_vers);
}
}
if (devpriv->model == VMK8055_MODEL)
- vmk80xx_reset_device(devpriv);
+ vmk80xx_reset_device(dev);
return vmk80xx_init_subdevices(dev);
}
static void vmk80xx_detach(struct comedi_device *dev)
{
+ struct usb_interface *intf = comedi_to_usb_interface(dev);
struct vmk80xx_private *devpriv = dev->private;
if (!devpriv)
@@ -930,7 +920,7 @@ static void vmk80xx_detach(struct comedi_device *dev)
down(&devpriv->limit_sem);
- usb_set_intfdata(devpriv->intf, NULL);
+ usb_set_intfdata(intf, NULL);
kfree(devpriv->usb_rx_buf);
kfree(devpriv->usb_tx_buf);
diff --git a/drivers/staging/comedi/kcomedilib/kcomedilib_main.c b/drivers/staging/comedi/kcomedilib/kcomedilib_main.c
index 3231a483f561..da8988c6bf50 100644
--- a/drivers/staging/comedi/kcomedilib/kcomedilib_main.c
+++ b/drivers/staging/comedi/kcomedilib/kcomedilib_main.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#include <linux/module.h>
diff --git a/drivers/staging/comedi/proc.c b/drivers/staging/comedi/proc.c
index 886c202de9ab..8ee94424bc8f 100644
--- a/drivers/staging/comedi/proc.c
+++ b/drivers/staging/comedi/proc.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
/*
diff --git a/drivers/staging/comedi/range.c b/drivers/staging/comedi/range.c
index 1dc391b76447..1f20332cc459 100644
--- a/drivers/staging/comedi/range.c
+++ b/drivers/staging/comedi/range.c
@@ -14,11 +14,6 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
*/
#include <linux/uaccess.h>
diff --git a/drivers/staging/cptm1217/clearpad_tm1217.c b/drivers/staging/cptm1217/clearpad_tm1217.c
index e96eee3ca898..42a5f5c8d3d1 100644
--- a/drivers/staging/cptm1217/clearpad_tm1217.c
+++ b/drivers/staging/cptm1217/clearpad_tm1217.c
@@ -547,10 +547,8 @@ fail_gpio:
fail:
/* Clean up before returning failure */
for (i = 0; i < TOUCH_SUPPORTED; i++) {
- if (ts->cp_input_info[i].input) {
+ if (ts->cp_input_info[i].input)
input_unregister_device(ts->cp_input_info[i].input);
- input_free_device(ts->cp_input_info[i].input);
- }
}
kfree(ts);
return retval;
diff --git a/drivers/staging/crystalhd/bc_dts_glob_lnx.h b/drivers/staging/crystalhd/bc_dts_glob_lnx.h
index fd1a6e680c8a..981708f3ee39 100644
--- a/drivers/staging/crystalhd/bc_dts_glob_lnx.h
+++ b/drivers/staging/crystalhd/bc_dts_glob_lnx.h
@@ -58,11 +58,11 @@
* between the driver and the application.
*/
enum BC_DTS_GLOBALS {
- BC_MAX_FW_CMD_BUFF_SZ = 0x40, /* FW passthrough cmd/rsp buffer size */
+ BC_MAX_FW_CMD_BUFF_SZ = 0x40, /* FW passthrough cmd/rsp buffer size */
PCI_CFG_SIZE = 256, /* PCI config size buffer */
BC_IOCTL_DATA_POOL_SIZE = 8, /* BC_IOCTL_DATA Pool size */
- BC_LINK_MAX_OPENS = 3, /* Maximum simultaneous opens*/
- BC_LINK_MAX_SGLS = 1024, /* Maximum SG elements 4M/4K */
+ BC_LINK_MAX_OPENS = 3, /* Maximum simultaneous opens*/
+ BC_LINK_MAX_SGLS = 1024, /* Maximum SG elements 4M/4K */
BC_TX_LIST_CNT = 2, /* Max Tx DMA Rings */
BC_RX_LIST_CNT = 8, /* Max Rx DMA Rings*/
BC_PROC_OUTPUT_TIMEOUT = 3000, /* Milliseconds */
@@ -240,11 +240,14 @@ enum BC_DRV_CMD {
DRV_CMD_ADD_RXBUFFS, /* Add Rx side buffers to driver pool */
DRV_CMD_FETCH_RXBUFF, /* Get Rx DMAed buffer */
DRV_CMD_START_RX_CAP, /* Start Rx Buffer Capture */
- DRV_CMD_FLUSH_RX_CAP, /* Stop the capture for now...we will enhance this later*/
+ DRV_CMD_FLUSH_RX_CAP, /* Stop the capture for now...
+ we will enhance this later*/
DRV_CMD_GET_DRV_STAT, /* Get Driver Internal Statistics */
DRV_CMD_RST_DRV_STAT, /* Reset Driver Internal Statistics */
- DRV_CMD_NOTIFY_MODE, /* Notify the Mode to driver in which the application is Operating*/
- DRV_CMD_CHANGE_CLOCK, /* Change the core clock to either save power or improve performance */
+ DRV_CMD_NOTIFY_MODE, /* Notify the Mode to driver
+ in which the application is Operating*/
+ DRV_CMD_CHANGE_CLOCK, /* Change the core clock to either save power
+ or improve performance */
/* MUST be the last one.. */
DRV_CMD_END, /* End of the List.. */
@@ -283,8 +286,8 @@ struct crystalhd_ioctl_data {
struct BC_IOCTL_DATA udata; /* IOCTL from App..*/
uint32_t u_id; /* Driver specific user ID */
uint32_t cmd; /* Cmd ID for driver's use. */
- void *add_cdata; /* Additional command specific data..*/
- uint32_t add_cdata_sz; /* Additional command specific data size */
+ void *add_cdata; /* Additional command specific data..*/
+ uint32_t add_cdata_sz; /* Additional command specific data size */
struct crystalhd_ioctl_data *next; /* List/Fifo management */
};
diff --git a/drivers/staging/crystalhd/crystalhd_cmds.c b/drivers/staging/crystalhd/crystalhd_cmds.c
index ed99daa6ef46..3ab502b8c3be 100644
--- a/drivers/staging/crystalhd/crystalhd_cmds.c
+++ b/drivers/staging/crystalhd/crystalhd_cmds.c
@@ -472,8 +472,8 @@ static enum BC_STATUS bc_cproc_hw_txdma(struct crystalhd_cmd *ctx,
}
/* Helper function to check on user buffers */
-static enum BC_STATUS bc_cproc_check_inbuffs(bool pin, void *ubuff, uint32_t ub_sz,
- uint32_t uv_off, bool en_422)
+static enum BC_STATUS bc_cproc_check_inbuffs(bool pin, void *ubuff,
+ uint32_t ub_sz, uint32_t uv_off, bool en_422)
{
if (!ubuff || !ub_sz) {
BCMLOG_ERR("%s->Invalid Arg %p %x\n",
@@ -483,8 +483,9 @@ static enum BC_STATUS bc_cproc_check_inbuffs(bool pin, void *ubuff, uint32_t ub_
/* Check for alignment */
if (((uintptr_t)ubuff) & 0x03) {
- BCMLOG_ERR("%s-->Un-aligned address not implemented yet.. %p\n",
- ((pin) ? "TX" : "RX"), ubuff);
+ BCMLOG_ERR(
+ "%s-->Un-aligned address not implemented yet.. %p\n",
+ ((pin) ? "TX" : "RX"), ubuff);
return BC_STS_NOT_IMPL;
}
if (pin)
@@ -572,7 +573,8 @@ static enum BC_STATUS bc_cproc_add_cap_buff(struct crystalhd_cmd *ctx,
if (!dio_hnd)
return BC_STS_ERROR;
- sts = crystalhd_hw_add_cap_buffer(&ctx->hw_ctx, dio_hnd, (ctx->state == BC_LINK_READY));
+ sts = crystalhd_hw_add_cap_buffer(&ctx->hw_ctx, dio_hnd,
+ (ctx->state == BC_LINK_READY));
if ((sts != BC_STS_SUCCESS) && (sts != BC_STS_BUSY)) {
crystalhd_unmap_dio(ctx->adp, dio_hnd);
return sts;
@@ -618,7 +620,8 @@ static enum BC_STATUS bc_cproc_fetch_frame(struct crystalhd_cmd *ctx,
sts = crystalhd_hw_get_cap_buffer(&ctx->hw_ctx, &frame->PibInfo, &dio);
if (sts != BC_STS_SUCCESS)
- return (ctx->state & BC_LINK_SUSPEND) ? BC_STS_IO_USER_ABORT : sts;
+ return (ctx->state & BC_LINK_SUSPEND) ?
+ BC_STS_IO_USER_ABORT : sts;
frame->Flags = dio->uinfo.comp_flags;
@@ -673,7 +676,8 @@ static enum BC_STATUS bc_cproc_flush_cap_buffs(struct crystalhd_cmd *ctx,
frame = &idata->udata.u.DecOutData;
for (count = 0; count < BC_RX_LIST_CNT; count++) {
- sts = crystalhd_hw_get_cap_buffer(&ctx->hw_ctx, &frame->PibInfo, &dio);
+ sts = crystalhd_hw_get_cap_buffer(&ctx->hw_ctx,
+ &frame->PibInfo, &dio);
if (sts != BC_STS_SUCCESS)
break;
@@ -916,7 +920,8 @@ enum BC_STATUS crystalhd_user_open(struct crystalhd_cmd *ctx,
* Closer application handle and release app specific
* resources.
*/
-enum BC_STATUS crystalhd_user_close(struct crystalhd_cmd *ctx, struct crystalhd_user *uc)
+enum BC_STATUS crystalhd_user_close(struct crystalhd_cmd *ctx,
+ struct crystalhd_user *uc)
{
uint32_t mode = uc->mode;
@@ -1008,8 +1013,8 @@ enum BC_STATUS crystalhd_delete_cmd_context(struct crystalhd_cmd *ctx)
* mode of operation and returns the function pointer
* from the cproc table.
*/
-crystalhd_cmd_proc crystalhd_get_cmd_proc(struct crystalhd_cmd *ctx, uint32_t cmd,
- struct crystalhd_user *uc)
+crystalhd_cmd_proc crystalhd_get_cmd_proc(struct crystalhd_cmd *ctx,
+ uint32_t cmd, struct crystalhd_user *uc)
{
crystalhd_cmd_proc cproc = NULL;
unsigned int i, tbl_sz;
@@ -1024,7 +1029,8 @@ crystalhd_cmd_proc crystalhd_get_cmd_proc(struct crystalhd_cmd *ctx, uint32_t cm
return NULL;
}
- tbl_sz = sizeof(g_crystalhd_cproc_tbl) / sizeof(struct crystalhd_cmd_tbl);
+ tbl_sz = sizeof(g_crystalhd_cproc_tbl) /
+ sizeof(struct crystalhd_cmd_tbl);
for (i = 0; i < tbl_sz; i++) {
if (g_crystalhd_cproc_tbl[i].cmd_id == cmd) {
if ((uc->mode == DTS_MONITOR_MODE) &&
diff --git a/drivers/staging/crystalhd/crystalhd_cmds.h b/drivers/staging/crystalhd/crystalhd_cmds.h
index 4066ba393a17..377cd9d68b08 100644
--- a/drivers/staging/crystalhd/crystalhd_cmds.h
+++ b/drivers/staging/crystalhd/crystalhd_cmds.h
@@ -66,7 +66,8 @@ struct crystalhd_cmd {
struct crystalhd_hw hw_ctx;
};
-typedef enum BC_STATUS(*crystalhd_cmd_proc)(struct crystalhd_cmd *, struct crystalhd_ioctl_data *);
+typedef enum BC_STATUS(*crystalhd_cmd_proc)(struct crystalhd_cmd *,
+ struct crystalhd_ioctl_data *);
struct crystalhd_cmd_tbl {
uint32_t cmd_id;
@@ -74,13 +75,17 @@ struct crystalhd_cmd_tbl {
uint32_t block_mon;
};
-enum BC_STATUS crystalhd_suspend(struct crystalhd_cmd *ctx, struct crystalhd_ioctl_data *idata);
+enum BC_STATUS crystalhd_suspend(struct crystalhd_cmd *ctx,
+ struct crystalhd_ioctl_data *idata);
enum BC_STATUS crystalhd_resume(struct crystalhd_cmd *ctx);
-crystalhd_cmd_proc crystalhd_get_cmd_proc(struct crystalhd_cmd *ctx, uint32_t cmd,
- struct crystalhd_user *uc);
-enum BC_STATUS crystalhd_user_open(struct crystalhd_cmd *ctx, struct crystalhd_user **user_ctx);
-enum BC_STATUS crystalhd_user_close(struct crystalhd_cmd *ctx, struct crystalhd_user *uc);
-enum BC_STATUS crystalhd_setup_cmd_context(struct crystalhd_cmd *ctx, struct crystalhd_adp *adp);
+crystalhd_cmd_proc crystalhd_get_cmd_proc(struct crystalhd_cmd *ctx,
+ uint32_t cmd, struct crystalhd_user *uc);
+enum BC_STATUS crystalhd_user_open(struct crystalhd_cmd *ctx,
+ struct crystalhd_user **user_ctx);
+enum BC_STATUS crystalhd_user_close(struct crystalhd_cmd *ctx,
+ struct crystalhd_user *uc);
+enum BC_STATUS crystalhd_setup_cmd_context(struct crystalhd_cmd *ctx,
+ struct crystalhd_adp *adp);
enum BC_STATUS crystalhd_delete_cmd_context(struct crystalhd_cmd *ctx);
bool crystalhd_cmd_interrupt(struct crystalhd_cmd *ctx);
diff --git a/drivers/staging/crystalhd/crystalhd_fw_if.h b/drivers/staging/crystalhd/crystalhd_fw_if.h
index 9e2831e68bba..4b363a5069d7 100644
--- a/drivers/staging/crystalhd/crystalhd_fw_if.h
+++ b/drivers/staging/crystalhd/crystalhd_fw_if.h
@@ -106,7 +106,8 @@ struct ppb_vc1 {
struct fgt_sei {
struct fgt_sei *next;
- unsigned char model_values[3][MAX_FGT_VALUE_INTERVAL][MAX_FGT_MODEL_VALUE];
+ unsigned char
+ model_values[3][MAX_FGT_VALUE_INTERVAL][MAX_FGT_MODEL_VALUE];
unsigned char upper_bound[3][MAX_FGT_VALUE_INTERVAL];
unsigned char lower_bound[3][MAX_FGT_VALUE_INTERVAL];
@@ -125,10 +126,12 @@ struct fgt_sei {
unsigned char blending_mode_id; /* Blending mode. */
unsigned char log2_scale_factor; /* Log2 scale factor (2-7). */
- unsigned char comp_flag[3]; /* Components [0,2] parameters present flag. */
- unsigned char num_intervals_minus1[3]; /* Number of intensity level intervals. */
+ unsigned char comp_flag[3]; /* Components [0,2]
+ parameters present flag. */
+ unsigned char num_intervals_minus1[3]; /* Number of
+ intensity level intervals. */
unsigned char num_model_values[3]; /* Number of model values. */
- uint16_t repetition_period; /* Repetition period (0-16384) */
+ uint16_t repetition_period; /* Repetition period (0-16384) */
};
@@ -266,40 +269,40 @@ enum c011_ts_cmd {
/* Decoding commands */
eCMD_C011_DEC_CHAN_OPEN = eCMD_C011_CMD_BASE + 0x100,
- eCMD_C011_DEC_CHAN_CLOSE = eCMD_C011_CMD_BASE + 0x101,
- eCMD_C011_DEC_CHAN_ACTIVATE = eCMD_C011_CMD_BASE + 0x102,
- eCMD_C011_DEC_CHAN_STATUS = eCMD_C011_CMD_BASE + 0x103,
- eCMD_C011_DEC_CHAN_FLUSH = eCMD_C011_CMD_BASE + 0x104,
+ eCMD_C011_DEC_CHAN_CLOSE = eCMD_C011_CMD_BASE + 0x101,
+ eCMD_C011_DEC_CHAN_ACTIVATE = eCMD_C011_CMD_BASE + 0x102,
+ eCMD_C011_DEC_CHAN_STATUS = eCMD_C011_CMD_BASE + 0x103,
+ eCMD_C011_DEC_CHAN_FLUSH = eCMD_C011_CMD_BASE + 0x104,
eCMD_C011_DEC_CHAN_TRICK_PLAY = eCMD_C011_CMD_BASE + 0x105,
- eCMD_C011_DEC_CHAN_TS_PIDS = eCMD_C011_CMD_BASE + 0x106,
+ eCMD_C011_DEC_CHAN_TS_PIDS = eCMD_C011_CMD_BASE + 0x106,
eCMD_C011_DEC_CHAN_PS_STREAM_ID = eCMD_C011_CMD_BASE + 0x107,
eCMD_C011_DEC_CHAN_INPUT_PARAMS = eCMD_C011_CMD_BASE + 0x108,
eCMD_C011_DEC_CHAN_VIDEO_OUTPUT = eCMD_C011_CMD_BASE + 0x109,
- eCMD_C011_DEC_CHAN_OUTPUT_FORMAT = eCMD_C011_CMD_BASE + 0x10A,
- eCMD_C011_DEC_CHAN_SCALING_FILTERS = eCMD_C011_CMD_BASE + 0x10B,
- eCMD_C011_DEC_CHAN_OSD_MODE = eCMD_C011_CMD_BASE + 0x10D,
+ eCMD_C011_DEC_CHAN_OUTPUT_FORMAT = eCMD_C011_CMD_BASE + 0x10A,
+ eCMD_C011_DEC_CHAN_SCALING_FILTERS = eCMD_C011_CMD_BASE + 0x10B,
+ eCMD_C011_DEC_CHAN_OSD_MODE = eCMD_C011_CMD_BASE + 0x10D,
eCMD_C011_DEC_CHAN_DROP = eCMD_C011_CMD_BASE + 0x10E,
- eCMD_C011_DEC_CHAN_RELEASE = eCMD_C011_CMD_BASE + 0x10F,
- eCMD_C011_DEC_CHAN_STREAM_SETTINGS = eCMD_C011_CMD_BASE + 0x110,
+ eCMD_C011_DEC_CHAN_RELEASE = eCMD_C011_CMD_BASE + 0x10F,
+ eCMD_C011_DEC_CHAN_STREAM_SETTINGS = eCMD_C011_CMD_BASE + 0x110,
eCMD_C011_DEC_CHAN_PAUSE_OUTPUT = eCMD_C011_CMD_BASE + 0x111,
- eCMD_C011_DEC_CHAN_CHANGE = eCMD_C011_CMD_BASE + 0x112,
- eCMD_C011_DEC_CHAN_SET_STC = eCMD_C011_CMD_BASE + 0x113,
- eCMD_C011_DEC_CHAN_SET_PTS = eCMD_C011_CMD_BASE + 0x114,
- eCMD_C011_DEC_CHAN_CC_MODE = eCMD_C011_CMD_BASE + 0x115,
- eCMD_C011_DEC_CREATE_AUDIO_CONTEXT = eCMD_C011_CMD_BASE + 0x116,
- eCMD_C011_DEC_COPY_AUDIO_CONTEXT = eCMD_C011_CMD_BASE + 0x117,
- eCMD_C011_DEC_DELETE_AUDIO_CONTEXT = eCMD_C011_CMD_BASE + 0x118,
- eCMD_C011_DEC_CHAN_SET_DECYPTION = eCMD_C011_CMD_BASE + 0x119,
+ eCMD_C011_DEC_CHAN_CHANGE = eCMD_C011_CMD_BASE + 0x112,
+ eCMD_C011_DEC_CHAN_SET_STC = eCMD_C011_CMD_BASE + 0x113,
+ eCMD_C011_DEC_CHAN_SET_PTS = eCMD_C011_CMD_BASE + 0x114,
+ eCMD_C011_DEC_CHAN_CC_MODE = eCMD_C011_CMD_BASE + 0x115,
+ eCMD_C011_DEC_CREATE_AUDIO_CONTEXT = eCMD_C011_CMD_BASE + 0x116,
+ eCMD_C011_DEC_COPY_AUDIO_CONTEXT = eCMD_C011_CMD_BASE + 0x117,
+ eCMD_C011_DEC_DELETE_AUDIO_CONTEXT = eCMD_C011_CMD_BASE + 0x118,
+ eCMD_C011_DEC_CHAN_SET_DECYPTION = eCMD_C011_CMD_BASE + 0x119,
eCMD_C011_DEC_CHAN_START_VIDEO = eCMD_C011_CMD_BASE + 0x11A,
eCMD_C011_DEC_CHAN_STOP_VIDEO = eCMD_C011_CMD_BASE + 0x11B,
eCMD_C011_DEC_CHAN_PIC_CAPTURE = eCMD_C011_CMD_BASE + 0x11C,
- eCMD_C011_DEC_CHAN_PAUSE = eCMD_C011_CMD_BASE + 0x11D,
+ eCMD_C011_DEC_CHAN_PAUSE = eCMD_C011_CMD_BASE + 0x11D,
eCMD_C011_DEC_CHAN_PAUSE_STATE = eCMD_C011_CMD_BASE + 0x11E,
- eCMD_C011_DEC_CHAN_SET_SLOWM_RATE = eCMD_C011_CMD_BASE + 0x11F,
- eCMD_C011_DEC_CHAN_GET_SLOWM_RATE = eCMD_C011_CMD_BASE + 0x120,
+ eCMD_C011_DEC_CHAN_SET_SLOWM_RATE = eCMD_C011_CMD_BASE + 0x11F,
+ eCMD_C011_DEC_CHAN_GET_SLOWM_RATE = eCMD_C011_CMD_BASE + 0x120,
eCMD_C011_DEC_CHAN_SET_FF_RATE = eCMD_C011_CMD_BASE + 0x121,
eCMD_C011_DEC_CHAN_GET_FF_RATE = eCMD_C011_CMD_BASE + 0x122,
- eCMD_C011_DEC_CHAN_FRAME_ADVANCE = eCMD_C011_CMD_BASE + 0x123,
+ eCMD_C011_DEC_CHAN_FRAME_ADVANCE = eCMD_C011_CMD_BASE + 0x123,
eCMD_C011_DEC_CHAN_SET_SKIP_PIC_MODE = eCMD_C011_CMD_BASE + 0x124,
eCMD_C011_DEC_CHAN_GET_SKIP_PIC_MODE = eCMD_C011_CMD_BASE + 0x125,
eCMD_C011_DEC_CHAN_FILL_PIC_BUF = eCMD_C011_CMD_BASE + 0x126,
@@ -308,15 +311,16 @@ enum c011_ts_cmd {
eCMD_C011_DEC_CHAN_SET_BRCM_TRICK_MODE = eCMD_C011_CMD_BASE + 0x129,
eCMD_C011_DEC_CHAN_GET_BRCM_TRICK_MODE = eCMD_C011_CMD_BASE + 0x12A,
eCMD_C011_DEC_CHAN_REVERSE_FIELD_STATUS = eCMD_C011_CMD_BASE + 0x12B,
- eCMD_C011_DEC_CHAN_I_PICTURE_FOUND = eCMD_C011_CMD_BASE + 0x12C,
- eCMD_C011_DEC_CHAN_SET_PARAMETER = eCMD_C011_CMD_BASE + 0x12D,
+ eCMD_C011_DEC_CHAN_I_PICTURE_FOUND = eCMD_C011_CMD_BASE + 0x12C,
+ eCMD_C011_DEC_CHAN_SET_PARAMETER = eCMD_C011_CMD_BASE + 0x12D,
eCMD_C011_DEC_CHAN_SET_USER_DATA_MODE = eCMD_C011_CMD_BASE + 0x12E,
- eCMD_C011_DEC_CHAN_SET_PAUSE_DISPLAY_MODE = eCMD_C011_CMD_BASE + 0x12F,
- eCMD_C011_DEC_CHAN_SET_SLOW_DISPLAY_MODE = eCMD_C011_CMD_BASE + 0x130,
+ eCMD_C011_DEC_CHAN_SET_PAUSE_DISPLAY_MODE = eCMD_C011_CMD_BASE + 0x12F,
+ eCMD_C011_DEC_CHAN_SET_SLOW_DISPLAY_MODE = eCMD_C011_CMD_BASE + 0x130,
eCMD_C011_DEC_CHAN_SET_FF_DISPLAY_MODE = eCMD_C011_CMD_BASE + 0x131,
- eCMD_C011_DEC_CHAN_SET_DISPLAY_TIMING_MODE = eCMD_C011_CMD_BASE + 0x132,
- eCMD_C011_DEC_CHAN_SET_DISPLAY_MODE = eCMD_C011_CMD_BASE + 0x133,
- eCMD_C011_DEC_CHAN_GET_DISPLAY_MODE = eCMD_C011_CMD_BASE + 0x134,
+ eCMD_C011_DEC_CHAN_SET_DISPLAY_TIMING_MODE = eCMD_C011_CMD_BASE +
+ 0x132,
+ eCMD_C011_DEC_CHAN_SET_DISPLAY_MODE = eCMD_C011_CMD_BASE + 0x133,
+ eCMD_C011_DEC_CHAN_GET_DISPLAY_MODE = eCMD_C011_CMD_BASE + 0x134,
eCMD_C011_DEC_CHAN_SET_REVERSE_FIELD = eCMD_C011_CMD_BASE + 0x135,
eCMD_C011_DEC_CHAN_STREAM_OPEN = eCMD_C011_CMD_BASE + 0x136,
eCMD_C011_DEC_CHAN_SET_PCR_PID = eCMD_C011_CMD_BASE + 0x137,
@@ -328,19 +332,22 @@ enum c011_ts_cmd {
eCMD_C011_DEC_CHAN_GET_DISPLAY_ORDER = eCMD_C011_CMD_BASE + 0x143,
eCMD_C011_DEC_CHAN_SET_HOST_TRICK_MODE = eCMD_C011_CMD_BASE + 0x144,
eCMD_C011_DEC_CHAN_SET_OPERATION_MODE = eCMD_C011_CMD_BASE + 0x145,
- eCMD_C011_DEC_CHAN_DISPLAY_PAUSE_UNTO_PTS = eCMD_C011_CMD_BASE + 0x146,
- eCMD_C011_DEC_CHAN_SET_PTS_STC_DIFF_THRESHOLD = eCMD_C011_CMD_BASE + 0x147,
+ eCMD_C011_DEC_CHAN_DISPLAY_PAUSE_UNTO_PTS = eCMD_C011_CMD_BASE + 0x146,
+ eCMD_C011_DEC_CHAN_SET_PTS_STC_DIFF_THRESHOLD = eCMD_C011_CMD_BASE +
+ 0x147,
eCMD_C011_DEC_CHAN_SEND_COMPRESSED_BUF = eCMD_C011_CMD_BASE + 0x148,
eCMD_C011_DEC_CHAN_SET_CLIPPING = eCMD_C011_CMD_BASE + 0x149,
eCMD_C011_DEC_CHAN_SET_PARAMETERS_FOR_HARD_RESET_INTERRUPT_TO_HOST
= eCMD_C011_CMD_BASE + 0x150,
/* Decoder RevD commands */
- eCMD_C011_DEC_CHAN_SET_CSC = eCMD_C011_CMD_BASE + 0x180, /* color space conversion */
+ eCMD_C011_DEC_CHAN_SET_CSC = eCMD_C011_CMD_BASE + 0x180, /* color
+ space conversion */
eCMD_C011_DEC_CHAN_SET_RANGE_REMAP = eCMD_C011_CMD_BASE + 0x181,
eCMD_C011_DEC_CHAN_SET_FGT = eCMD_C011_CMD_BASE + 0x182,
/* Note: 0x183 not implemented yet in Rev D main */
- eCMD_C011_DEC_CHAN_SET_LASTPICTURE_PADDING = eCMD_C011_CMD_BASE + 0x183,
+ eCMD_C011_DEC_CHAN_SET_LASTPICTURE_PADDING = eCMD_C011_CMD_BASE +
+ 0x183,
/* Decoder 7412 commands (7412-only) */
eCMD_C011_DEC_CHAN_SET_CONTENT_KEY = eCMD_C011_CMD_BASE + 0x190,
diff --git a/drivers/staging/crystalhd/crystalhd_hw.c b/drivers/staging/crystalhd/crystalhd_hw.c
index e617d2fcbb1f..0c8cb329420f 100644
--- a/drivers/staging/crystalhd/crystalhd_hw.c
+++ b/drivers/staging/crystalhd/crystalhd_hw.c
@@ -94,15 +94,19 @@ static bool crystalhd_bring_out_of_rst(struct crystalhd_adp *adp)
* Enable clocks while 7412 reset is asserted, delay
* De-assert 7412 reset
*/
- rst_deco_cntrl.whole_reg = crystalhd_reg_rd(adp, MISC_PERST_DECODER_CTRL);
+ rst_deco_cntrl.whole_reg = crystalhd_reg_rd(adp,
+ MISC_PERST_DECODER_CTRL);
rst_deco_cntrl.stop_bcm_7412_clk = 0;
rst_deco_cntrl.bcm7412_rst = 1;
- crystalhd_reg_wr(adp, MISC_PERST_DECODER_CTRL, rst_deco_cntrl.whole_reg);
+ crystalhd_reg_wr(adp, MISC_PERST_DECODER_CTRL,
+ rst_deco_cntrl.whole_reg);
msleep_interruptible(10);
- rst_deco_cntrl.whole_reg = crystalhd_reg_rd(adp, MISC_PERST_DECODER_CTRL);
+ rst_deco_cntrl.whole_reg = crystalhd_reg_rd(adp,
+ MISC_PERST_DECODER_CTRL);
rst_deco_cntrl.bcm7412_rst = 0;
- crystalhd_reg_wr(adp, MISC_PERST_DECODER_CTRL, rst_deco_cntrl.whole_reg);
+ crystalhd_reg_wr(adp, MISC_PERST_DECODER_CTRL,
+ rst_deco_cntrl.whole_reg);
msleep_interruptible(50);
/* Disable OTP_CONTENT_MISC to 0 to disable all secure modes */
@@ -132,9 +136,11 @@ static bool crystalhd_put_in_reset(struct crystalhd_adp *adp)
* Assert 7412 reset, delay
* Assert 7412 stop clock
*/
- rst_deco_cntrl.whole_reg = crystalhd_reg_rd(adp, MISC_PERST_DECODER_CTRL);
+ rst_deco_cntrl.whole_reg = crystalhd_reg_rd(adp,
+ MISC_PERST_DECODER_CTRL);
rst_deco_cntrl.stop_bcm_7412_clk = 1;
- crystalhd_reg_wr(adp, MISC_PERST_DECODER_CTRL, rst_deco_cntrl.whole_reg);
+ crystalhd_reg_wr(adp, MISC_PERST_DECODER_CTRL,
+ rst_deco_cntrl.whole_reg);
msleep_interruptible(50);
/* Bus Arbiter Timeout: GISB_ARBITER_TIMER
@@ -213,7 +219,8 @@ static void crystalhd_clear_errors(struct crystalhd_adp *adp)
{
uint32_t reg;
- /* FIXME: jarod: wouldn't we want to write a 0 to the reg? Or does the write clear the bits specified? */
+ /* FIXME: jarod: wouldn't we want to write a 0 to the reg?
+ Or does the write clear the bits specified? */
reg = crystalhd_reg_rd(adp, MISC1_Y_RX_ERROR_STATUS);
if (reg)
crystalhd_reg_wr(adp, MISC1_Y_RX_ERROR_STATUS, reg);
@@ -263,10 +270,12 @@ static bool crystalhd_load_firmware_config(struct crystalhd_adp *adp)
crystalhd_reg_wr(adp, DCI_DRAM_BASE_ADDR, (BC_DRAM_FW_CFG_ADDR >> 19));
crystalhd_reg_wr(adp, AES_CMD, 0);
- crystalhd_reg_wr(adp, AES_CONFIG_INFO, (BC_DRAM_FW_CFG_ADDR & 0x7FFFF));
+ crystalhd_reg_wr(adp, AES_CONFIG_INFO,
+ (BC_DRAM_FW_CFG_ADDR & 0x7FFFF));
crystalhd_reg_wr(adp, AES_CMD, 0x1);
- /* FIXME: jarod: I've seen this fail, and introducing extra delays helps... */
+ /* FIXME: jarod: I've seen this fail,
+ and introducing extra delays helps... */
for (i = 0; i < 100; ++i) {
reg = crystalhd_reg_rd(adp, AES_STATUS);
if (reg & 0x1)
@@ -349,7 +358,8 @@ static bool crystalhd_stop_device(struct crystalhd_adp *adp)
return true;
}
-static struct crystalhd_rx_dma_pkt *crystalhd_hw_alloc_rx_pkt(struct crystalhd_hw *hw)
+static struct crystalhd_rx_dma_pkt *crystalhd_hw_alloc_rx_pkt(
+ struct crystalhd_hw *hw)
{
unsigned long flags = 0;
struct crystalhd_rx_dma_pkt *temp = NULL;
@@ -484,8 +494,8 @@ hw_create_ioq_err:
}
-static bool crystalhd_code_in_full(struct crystalhd_adp *adp, uint32_t needed_sz,
- bool b_188_byte_pkts, uint8_t flags)
+static bool crystalhd_code_in_full(struct crystalhd_adp *adp,
+ uint32_t needed_sz, bool b_188_byte_pkts, uint8_t flags)
{
uint32_t base, end, writep, readp;
uint32_t cpbSize, cpbFullness, fifoSize;
@@ -525,7 +535,7 @@ static bool crystalhd_code_in_full(struct crystalhd_adp *adp, uint32_t needed_sz
}
static enum BC_STATUS crystalhd_hw_tx_req_complete(struct crystalhd_hw *hw,
- uint32_t list_id, enum BC_STATUS cs)
+ uint32_t list_id, enum BC_STATUS cs)
{
struct tx_dma_pkt *tx_req;
@@ -536,7 +546,8 @@ static enum BC_STATUS crystalhd_hw_tx_req_complete(struct crystalhd_hw *hw,
hw->pwr_lock--;
- tx_req = (struct tx_dma_pkt *)crystalhd_dioq_find_and_fetch(hw->tx_actq, list_id);
+ tx_req = (struct tx_dma_pkt *)crystalhd_dioq_find_and_fetch(
+ hw->tx_actq, list_id);
if (!tx_req) {
if (cs != BC_STS_IO_USER_ABORT)
BCMLOG_ERR("Find and Fetch Did not find req\n");
@@ -559,7 +570,8 @@ static enum BC_STATUS crystalhd_hw_tx_req_complete(struct crystalhd_hw *hw,
return crystalhd_dioq_add(hw->tx_freeq, tx_req, false, 0);
}
-static bool crystalhd_tx_list0_handler(struct crystalhd_hw *hw, uint32_t err_sts)
+static bool crystalhd_tx_list0_handler(struct crystalhd_hw *hw,
+ uint32_t err_sts)
{
uint32_t err_mask, tmp;
unsigned long flags = 0;
@@ -591,7 +603,8 @@ static bool crystalhd_tx_list0_handler(struct crystalhd_hw *hw, uint32_t err_sts
return true;
}
-static bool crystalhd_tx_list1_handler(struct crystalhd_hw *hw, uint32_t err_sts)
+static bool crystalhd_tx_list1_handler(struct crystalhd_hw *hw,
+ uint32_t err_sts)
{
uint32_t err_mask, tmp;
unsigned long flags = 0;
@@ -663,14 +676,15 @@ static void crystalhd_hw_dump_desc(struct dma_descriptor *p_dma_desc,
if (!p_dma_desc || !cnt)
return;
- /* FIXME: jarod: perhaps a modparam desc_debug to enable this, rather than
- * setting ll (log level, I presume) to non-zero? */
+ /* FIXME: jarod: perhaps a modparam desc_debug to enable this,
+ rather than setting ll (log level, I presume) to non-zero? */
if (!ll)
return;
for (ix = ul_desc_index; ix < (ul_desc_index + cnt); ix++) {
- BCMLOG(ll, "%s[%d] Buff[%x:%x] Next:[%x:%x] XferSz:%x Intr:%x,Last:%x\n",
- ((p_dma_desc[ul_desc_index].dma_dir) ? "TDesc" : "RDesc"),
+ BCMLOG(ll,
+ "%s[%d] Buff[%x:%x] Next:[%x:%x] XferSz:%x Intr:%x,Last:%x\n",
+ ((p_dma_desc[ul_desc_index].dma_dir) ? "TDesc" : "RDesc"),
ul_desc_index,
p_dma_desc[ul_desc_index].buff_addr_high,
p_dma_desc[ul_desc_index].buff_addr_low,
@@ -707,7 +721,8 @@ static enum BC_STATUS crystalhd_hw_fill_desc(struct crystalhd_dio_req *ioreq,
/* Get SGLE length */
len = crystalhd_get_sgle_len(ioreq, sg_ix);
if (len % 4) {
- BCMLOG_ERR(" len in sg %d %d %d\n", len, sg_ix, sg_cnt);
+ BCMLOG_ERR(" len in sg %d %d %d\n", len, sg_ix,
+ sg_cnt);
return BC_STS_NOT_IMPL;
}
/* Setup DMA desc with Phy addr & Length at current index. */
@@ -722,7 +737,8 @@ static enum BC_STATUS crystalhd_hw_fill_desc(struct crystalhd_dio_req *ioreq,
desc[ix].dma_dir = ioreq->uinfo.dir_tx;
/* Chain DMA descriptor. */
- addr_temp.full_addr = desc_phy_addr + sizeof(struct dma_descriptor);
+ addr_temp.full_addr = desc_phy_addr +
+ sizeof(struct dma_descriptor);
desc[ix].next_desc_addr_low = addr_temp.low_part;
desc[ix].next_desc_addr_high = addr_temp.high_part;
@@ -731,8 +747,9 @@ static enum BC_STATUS crystalhd_hw_fill_desc(struct crystalhd_dio_req *ioreq,
/* Debug.. */
if ((!len) || (len > crystalhd_get_sgle_len(ioreq, sg_ix))) {
- BCMLOG_ERR("inv-len(%x) Ix(%d) count:%x xfr_sz:%x sg_cnt:%d\n",
- len, ix, count, xfr_sz, sg_cnt);
+ BCMLOG_ERR(
+ "inv-len(%x) Ix(%d) count:%x xfr_sz:%x sg_cnt:%d\n",
+ len, ix, count, xfr_sz, sg_cnt);
return BC_STS_ERROR;
}
/* Length expects Multiple of 4 */
@@ -774,7 +791,8 @@ static enum BC_STATUS crystalhd_hw_fill_desc(struct crystalhd_dio_req *ioreq,
return BC_STS_SUCCESS;
}
-static enum BC_STATUS crystalhd_xlat_sgl_to_dma_desc(struct crystalhd_dio_req *ioreq,
+static enum BC_STATUS crystalhd_xlat_sgl_to_dma_desc(
+ struct crystalhd_dio_req *ioreq,
struct dma_desc_mem *pdesc_mem,
uint32_t *uv_desc_index)
{
@@ -887,12 +905,14 @@ static enum BC_STATUS crystalhd_stop_tx_dma_engine(struct crystalhd_hw *hw)
while ((l1 || l2) && cnt) {
if (l1) {
- l1 = crystalhd_reg_rd(hw->adp, MISC1_TX_FIRST_DESC_L_ADDR_LIST0);
+ l1 = crystalhd_reg_rd(hw->adp,
+ MISC1_TX_FIRST_DESC_L_ADDR_LIST0);
l1 &= DMA_START_BIT;
}
if (l2) {
- l2 = crystalhd_reg_rd(hw->adp, MISC1_TX_FIRST_DESC_L_ADDR_LIST1);
+ l2 = crystalhd_reg_rd(hw->adp,
+ MISC1_TX_FIRST_DESC_L_ADDR_LIST1);
l2 &= DMA_START_BIT;
}
@@ -986,7 +1006,8 @@ static uint32_t crystalhd_get_addr_from_pib_Q(struct crystalhd_hw *hw)
return addr_entry;
}
-static bool crystalhd_rel_addr_to_pib_Q(struct crystalhd_hw *hw, uint32_t addr_to_rel)
+static bool crystalhd_rel_addr_to_pib_Q(struct crystalhd_hw *hw,
+ uint32_t addr_to_rel)
{
uint32_t Q_addr;
uint32_t r_offset, w_offset, n_offset;
@@ -1021,7 +1042,8 @@ static bool crystalhd_rel_addr_to_pib_Q(struct crystalhd_hw *hw, uint32_t addr_t
return true;
}
-static void cpy_pib_to_app(struct c011_pib *src_pib, struct BC_PIC_INFO_BLOCK *dst_pib)
+static void cpy_pib_to_app(struct c011_pib *src_pib,
+ struct BC_PIC_INFO_BLOCK *dst_pib)
{
if (!src_pib || !dst_pib) {
BCMLOG_ERR("Invalid Arguments\n");
@@ -1063,11 +1085,13 @@ static void crystalhd_hw_proc_pib(struct crystalhd_hw *hw)
(uint32_t *)&src_pib);
if (src_pib.bFormatChange) {
- rx_pkt = (struct crystalhd_rx_dma_pkt *)crystalhd_dioq_fetch(hw->rx_freeq);
+ rx_pkt = (struct crystalhd_rx_dma_pkt *)
+ crystalhd_dioq_fetch(hw->rx_freeq);
if (!rx_pkt)
return;
rx_pkt->flags = 0;
- rx_pkt->flags |= COMP_FLAG_PIB_VALID | COMP_FLAG_FMT_CHANGE;
+ rx_pkt->flags |= COMP_FLAG_PIB_VALID |
+ COMP_FLAG_FMT_CHANGE;
AppPib = &rx_pkt->pib;
cpy_pib_to_app(&src_pib, AppPib);
@@ -1084,7 +1108,8 @@ static void crystalhd_hw_proc_pib(struct crystalhd_hw *hw)
rx_pkt->pib.pulldown,
rx_pkt->pib.ycom);
- crystalhd_dioq_add(hw->rx_rdyq, (void *)rx_pkt, true, rx_pkt->pkt_tag);
+ crystalhd_dioq_add(hw->rx_rdyq, (void *)rx_pkt, true,
+ rx_pkt->pkt_tag);
}
@@ -1096,16 +1121,20 @@ static void crystalhd_start_rx_dma_engine(struct crystalhd_hw *hw)
{
uint32_t dma_cntrl;
- dma_cntrl = crystalhd_reg_rd(hw->adp, MISC1_Y_RX_SW_DESC_LIST_CTRL_STS);
+ dma_cntrl = crystalhd_reg_rd(hw->adp,
+ MISC1_Y_RX_SW_DESC_LIST_CTRL_STS);
if (!(dma_cntrl & DMA_START_BIT)) {
dma_cntrl |= DMA_START_BIT;
- crystalhd_reg_wr(hw->adp, MISC1_Y_RX_SW_DESC_LIST_CTRL_STS, dma_cntrl);
+ crystalhd_reg_wr(hw->adp,
+ MISC1_Y_RX_SW_DESC_LIST_CTRL_STS, dma_cntrl);
}
- dma_cntrl = crystalhd_reg_rd(hw->adp, MISC1_UV_RX_SW_DESC_LIST_CTRL_STS);
+ dma_cntrl = crystalhd_reg_rd(hw->adp,
+ MISC1_UV_RX_SW_DESC_LIST_CTRL_STS);
if (!(dma_cntrl & DMA_START_BIT)) {
dma_cntrl |= DMA_START_BIT;
- crystalhd_reg_wr(hw->adp, MISC1_UV_RX_SW_DESC_LIST_CTRL_STS, dma_cntrl);
+ crystalhd_reg_wr(hw->adp,
+ MISC1_UV_RX_SW_DESC_LIST_CTRL_STS, dma_cntrl);
}
return;
@@ -1116,44 +1145,52 @@ static void crystalhd_stop_rx_dma_engine(struct crystalhd_hw *hw)
uint32_t dma_cntrl = 0, count = 30;
uint32_t l0y = 1, l0uv = 1, l1y = 1, l1uv = 1;
- dma_cntrl = crystalhd_reg_rd(hw->adp, MISC1_Y_RX_SW_DESC_LIST_CTRL_STS);
+ dma_cntrl = crystalhd_reg_rd(hw->adp,
+ MISC1_Y_RX_SW_DESC_LIST_CTRL_STS);
if ((dma_cntrl & DMA_START_BIT)) {
dma_cntrl &= ~DMA_START_BIT;
- crystalhd_reg_wr(hw->adp, MISC1_Y_RX_SW_DESC_LIST_CTRL_STS, dma_cntrl);
+ crystalhd_reg_wr(hw->adp,
+ MISC1_Y_RX_SW_DESC_LIST_CTRL_STS, dma_cntrl);
}
- dma_cntrl = crystalhd_reg_rd(hw->adp, MISC1_UV_RX_SW_DESC_LIST_CTRL_STS);
+ dma_cntrl = crystalhd_reg_rd(hw->adp,
+ MISC1_UV_RX_SW_DESC_LIST_CTRL_STS);
if ((dma_cntrl & DMA_START_BIT)) {
dma_cntrl &= ~DMA_START_BIT;
- crystalhd_reg_wr(hw->adp, MISC1_UV_RX_SW_DESC_LIST_CTRL_STS, dma_cntrl);
+ crystalhd_reg_wr(hw->adp,
+ MISC1_UV_RX_SW_DESC_LIST_CTRL_STS, dma_cntrl);
}
/* Poll for 3seconds (30 * 100ms) on both the lists..*/
while ((l0y || l0uv || l1y || l1uv) && count) {
if (l0y) {
- l0y = crystalhd_reg_rd(hw->adp, MISC1_Y_RX_FIRST_DESC_L_ADDR_LIST0);
+ l0y = crystalhd_reg_rd(hw->adp,
+ MISC1_Y_RX_FIRST_DESC_L_ADDR_LIST0);
l0y &= DMA_START_BIT;
if (!l0y)
hw->rx_list_sts[0] &= ~rx_waiting_y_intr;
}
if (l1y) {
- l1y = crystalhd_reg_rd(hw->adp, MISC1_Y_RX_FIRST_DESC_L_ADDR_LIST1);
+ l1y = crystalhd_reg_rd(hw->adp,
+ MISC1_Y_RX_FIRST_DESC_L_ADDR_LIST1);
l1y &= DMA_START_BIT;
if (!l1y)
hw->rx_list_sts[1] &= ~rx_waiting_y_intr;
}
if (l0uv) {
- l0uv = crystalhd_reg_rd(hw->adp, MISC1_UV_RX_FIRST_DESC_L_ADDR_LIST0);
+ l0uv = crystalhd_reg_rd(hw->adp,
+ MISC1_UV_RX_FIRST_DESC_L_ADDR_LIST0);
l0uv &= DMA_START_BIT;
if (!l0uv)
hw->rx_list_sts[0] &= ~rx_waiting_uv_intr;
}
if (l1uv) {
- l1uv = crystalhd_reg_rd(hw->adp, MISC1_UV_RX_FIRST_DESC_L_ADDR_LIST1);
+ l1uv = crystalhd_reg_rd(hw->adp,
+ MISC1_UV_RX_FIRST_DESC_L_ADDR_LIST1);
l1uv &= DMA_START_BIT;
if (!l1uv)
hw->rx_list_sts[1] &= ~rx_waiting_uv_intr;
@@ -1168,7 +1205,8 @@ static void crystalhd_stop_rx_dma_engine(struct crystalhd_hw *hw)
count, hw->rx_list_sts[0], hw->rx_list_sts[1]);
}
-static enum BC_STATUS crystalhd_hw_prog_rxdma(struct crystalhd_hw *hw, struct crystalhd_rx_dma_pkt *rx_pkt)
+static enum BC_STATUS crystalhd_hw_prog_rxdma(struct crystalhd_hw *hw,
+ struct crystalhd_rx_dma_pkt *rx_pkt)
{
uint32_t y_low_addr_reg, y_high_addr_reg;
uint32_t uv_low_addr_reg, uv_high_addr_reg;
@@ -1186,7 +1224,8 @@ static enum BC_STATUS crystalhd_hw_prog_rxdma(struct crystalhd_hw *hw, struct cr
}
spin_lock_irqsave(&hw->rx_lock, flags);
- /* FIXME: jarod: sts_free is an enum for 0, in crystalhd_hw.h... yuk... */
+ /* FIXME: jarod: sts_free is an enum for 0,
+ in crystalhd_hw.h... yuk... */
if (sts_free != hw->rx_list_sts[hw->rx_list_post_index]) {
spin_unlock_irqrestore(&hw->rx_lock, flags);
return BC_STS_BUSY;
@@ -1210,7 +1249,8 @@ static enum BC_STATUS crystalhd_hw_prog_rxdma(struct crystalhd_hw *hw, struct cr
hw->rx_list_post_index = (hw->rx_list_post_index + 1) % DMA_ENGINE_CNT;
spin_unlock_irqrestore(&hw->rx_lock, flags);
- crystalhd_dioq_add(hw->rx_actq, (void *)rx_pkt, false, rx_pkt->pkt_tag);
+ crystalhd_dioq_add(hw->rx_actq, (void *)rx_pkt, false,
+ rx_pkt->pkt_tag);
crystalhd_start_rx_dma_engine(hw);
/* Program the Y descriptor */
@@ -1221,8 +1261,10 @@ static enum BC_STATUS crystalhd_hw_prog_rxdma(struct crystalhd_hw *hw, struct cr
if (rx_pkt->uv_phy_addr) {
/* Program the UV descriptor */
desc_addr.full_addr = rx_pkt->uv_phy_addr;
- crystalhd_reg_wr(hw->adp, uv_high_addr_reg, desc_addr.high_part);
- crystalhd_reg_wr(hw->adp, uv_low_addr_reg, desc_addr.low_part | 0x01);
+ crystalhd_reg_wr(hw->adp, uv_high_addr_reg,
+ desc_addr.high_part);
+ crystalhd_reg_wr(hw->adp, uv_low_addr_reg,
+ desc_addr.low_part | 0x01);
}
return BC_STS_SUCCESS;
@@ -1268,16 +1310,20 @@ static void crystalhd_hw_finalize_pause(struct crystalhd_hw *hw)
hw->stop_pending = 0;
- dma_cntrl = crystalhd_reg_rd(hw->adp, MISC1_Y_RX_SW_DESC_LIST_CTRL_STS);
+ dma_cntrl = crystalhd_reg_rd(hw->adp,
+ MISC1_Y_RX_SW_DESC_LIST_CTRL_STS);
if (dma_cntrl & DMA_START_BIT) {
dma_cntrl &= ~DMA_START_BIT;
- crystalhd_reg_wr(hw->adp, MISC1_Y_RX_SW_DESC_LIST_CTRL_STS, dma_cntrl);
+ crystalhd_reg_wr(hw->adp,
+ MISC1_Y_RX_SW_DESC_LIST_CTRL_STS, dma_cntrl);
}
- dma_cntrl = crystalhd_reg_rd(hw->adp, MISC1_UV_RX_SW_DESC_LIST_CTRL_STS);
+ dma_cntrl = crystalhd_reg_rd(hw->adp,
+ MISC1_UV_RX_SW_DESC_LIST_CTRL_STS);
if (dma_cntrl & DMA_START_BIT) {
dma_cntrl &= ~DMA_START_BIT;
- crystalhd_reg_wr(hw->adp, MISC1_UV_RX_SW_DESC_LIST_CTRL_STS, dma_cntrl);
+ crystalhd_reg_wr(hw->adp,
+ MISC1_UV_RX_SW_DESC_LIST_CTRL_STS, dma_cntrl);
}
hw->rx_list_post_index = 0;
@@ -1287,8 +1333,8 @@ static void crystalhd_hw_finalize_pause(struct crystalhd_hw *hw)
crystalhd_reg_wr(hw->adp, PCIE_DLL_DATA_LINK_CONTROL, aspm);
}
-static enum BC_STATUS crystalhd_rx_pkt_done(struct crystalhd_hw *hw, uint32_t list_index,
- enum BC_STATUS comp_sts)
+static enum BC_STATUS crystalhd_rx_pkt_done(struct crystalhd_hw *hw,
+ uint32_t list_index, enum BC_STATUS comp_sts)
{
struct crystalhd_rx_dma_pkt *rx_pkt = NULL;
uint32_t y_dw_dnsz, uv_dw_dnsz;
@@ -1302,7 +1348,8 @@ static enum BC_STATUS crystalhd_rx_pkt_done(struct crystalhd_hw *hw, uint32_t li
rx_pkt = crystalhd_dioq_find_and_fetch(hw->rx_actq,
hw->rx_pkt_tag_seed + list_index);
if (!rx_pkt) {
- BCMLOG_ERR("Act-Q:PostIx:%x L0Sts:%x L1Sts:%x current L:%x tag:%x comp:%x\n",
+ BCMLOG_ERR(
+ "Act-Q:PostIx:%x L0Sts:%x L1Sts:%x current L:%x tag:%x comp:%x\n",
hw->rx_list_post_index, hw->rx_list_sts[0],
hw->rx_list_sts[1], list_index,
hw->rx_pkt_tag_seed + list_index, comp_sts);
@@ -1324,8 +1371,8 @@ static enum BC_STATUS crystalhd_rx_pkt_done(struct crystalhd_hw *hw, uint32_t li
return crystalhd_hw_post_cap_buff(hw, rx_pkt);
}
-static bool crystalhd_rx_list0_handler(struct crystalhd_hw *hw, uint32_t int_sts,
- uint32_t y_err_sts, uint32_t uv_err_sts)
+static bool crystalhd_rx_list0_handler(struct crystalhd_hw *hw,
+ uint32_t int_sts, uint32_t y_err_sts, uint32_t uv_err_sts)
{
uint32_t tmp;
enum list_sts tmp_lsts;
@@ -1367,7 +1414,8 @@ static bool crystalhd_rx_list0_handler(struct crystalhd_hw *hw, uint32_t int_sts
tmp &= ~MISC1_UV_RX_ERROR_STATUS_RX_L0_UNDERRUN_ERROR_MASK;
}
- if (uv_err_sts & MISC1_UV_RX_ERROR_STATUS_RX_L0_FIFO_FULL_ERRORS_MASK) {
+ if (uv_err_sts &
+ MISC1_UV_RX_ERROR_STATUS_RX_L0_FIFO_FULL_ERRORS_MASK) {
hw->rx_list_sts[0] &= ~rx_uv_mask;
hw->rx_list_sts[0] |= rx_uv_error;
tmp &= ~MISC1_UV_RX_ERROR_STATUS_RX_L0_FIFO_FULL_ERRORS_MASK;
@@ -1392,8 +1440,8 @@ static bool crystalhd_rx_list0_handler(struct crystalhd_hw *hw, uint32_t int_sts
return (tmp_lsts != hw->rx_list_sts[0]);
}
-static bool crystalhd_rx_list1_handler(struct crystalhd_hw *hw, uint32_t int_sts,
- uint32_t y_err_sts, uint32_t uv_err_sts)
+static bool crystalhd_rx_list1_handler(struct crystalhd_hw *hw,
+ uint32_t int_sts, uint32_t y_err_sts, uint32_t uv_err_sts)
{
uint32_t tmp;
enum list_sts tmp_lsts;
@@ -1486,9 +1534,11 @@ static void crystalhd_rx_isr(struct crystalhd_hw *hw, uint32_t intr_sts)
/* Update States..*/
spin_lock_irqsave(&hw->rx_lock, flags);
if (i == 0)
- ret = crystalhd_rx_list0_handler(hw, intr_sts, y_err_sts, uv_err_sts);
+ ret = crystalhd_rx_list0_handler(hw, intr_sts,
+ y_err_sts, uv_err_sts);
else
- ret = crystalhd_rx_list1_handler(hw, intr_sts, y_err_sts, uv_err_sts);
+ ret = crystalhd_rx_list1_handler(hw, intr_sts,
+ y_err_sts, uv_err_sts);
if (ret) {
switch (hw->rx_list_sts[i]) {
case sts_free:
@@ -1501,11 +1551,13 @@ static void crystalhd_rx_isr(struct crystalhd_hw *hw, uint32_t intr_sts)
/* We got error on both or Y or uv. */
hw->stats.rx_errors++;
crystalhd_get_dnsz(hw, i, &y_dn_sz, &uv_dn_sz);
- /* FIXME: jarod: this is where my mini pci-e card is tripping up */
+ /* FIXME: jarod: this is where
+ my mini pci-e card is tripping up */
BCMLOG(BCMLOG_DBG, "list_index:%x rx[%d] Y:%x "
"UV:%x Int:%x YDnSz:%x UVDnSz:%x\n",
i, hw->stats.rx_errors, y_err_sts,
- uv_err_sts, intr_sts, y_dn_sz, uv_dn_sz);
+ uv_err_sts, intr_sts, y_dn_sz,
+ uv_dn_sz);
hw->rx_list_sts[i] = sts_free;
comp_sts = BC_STS_ERROR;
break;
@@ -1567,14 +1619,17 @@ static enum BC_STATUS crystalhd_put_ddr2sleep(struct crystalhd_hw *hw)
union link_misc_perst_decoder_ctrl rst_cntrl_reg;
/* Pulse reset pin of 7412 (MISC_PERST_DECODER_CTRL) */
- rst_cntrl_reg.whole_reg = crystalhd_reg_rd(hw->adp, MISC_PERST_DECODER_CTRL);
+ rst_cntrl_reg.whole_reg = crystalhd_reg_rd(hw->adp,
+ MISC_PERST_DECODER_CTRL);
rst_cntrl_reg.bcm_7412_rst = 1;
- crystalhd_reg_wr(hw->adp, MISC_PERST_DECODER_CTRL, rst_cntrl_reg.whole_reg);
+ crystalhd_reg_wr(hw->adp, MISC_PERST_DECODER_CTRL,
+ rst_cntrl_reg.whole_reg);
msleep_interruptible(50);
rst_cntrl_reg.bcm_7412_rst = 0;
- crystalhd_reg_wr(hw->adp, MISC_PERST_DECODER_CTRL, rst_cntrl_reg.whole_reg);
+ crystalhd_reg_wr(hw->adp, MISC_PERST_DECODER_CTRL,
+ rst_cntrl_reg.whole_reg);
/* Close all banks, put DDR in idle */
bc_dec_reg_wr(hw->adp, SDRAM_PRECHARGE, 0);
@@ -1622,7 +1677,8 @@ static enum BC_STATUS crystalhd_put_ddr2sleep(struct crystalhd_hw *hw)
**
*************************************************/
-enum BC_STATUS crystalhd_download_fw(struct crystalhd_adp *adp, void *buffer, uint32_t sz)
+enum BC_STATUS crystalhd_download_fw(struct crystalhd_adp *adp, void *buffer,
+ uint32_t sz)
{
uint32_t reg_data, cnt, *temp_buff;
uint32_t fw_sig_len = 36;
@@ -1828,7 +1884,8 @@ bool crystalhd_hw_interrupt(struct crystalhd_adp *adp, struct crystalhd_hw *hw)
crystalhd_hw_proc_pib(hw);
bc_dec_reg_wr(adp, Stream2Host_Intr_Sts, deco_intr);
- /* FIXME: jarod: No udelay? might this be the real reason mini pci-e cards were stalling out? */
+ /* FIXME: jarod: No udelay? might this be
+ the real reason mini pci-e cards were stalling out? */
bc_dec_reg_wr(adp, Stream2Host_Intr_Sts, 0);
rc = 1;
}
@@ -1852,7 +1909,8 @@ bool crystalhd_hw_interrupt(struct crystalhd_adp *adp, struct crystalhd_hw *hw)
return rc;
}
-enum BC_STATUS crystalhd_hw_open(struct crystalhd_hw *hw, struct crystalhd_adp *adp)
+enum BC_STATUS crystalhd_hw_open(struct crystalhd_hw *hw,
+ struct crystalhd_adp *adp)
{
if (!hw || !adp) {
BCMLOG_ERR("Invalid Arguments\n");
@@ -1967,7 +2025,8 @@ enum BC_STATUS crystalhd_hw_setup_dma_rings(struct crystalhd_hw *hw)
}
rpkt->desc_mem.pdma_desc_start = mem;
rpkt->desc_mem.phy_addr = phy_addr;
- rpkt->desc_mem.sz = BC_LINK_MAX_SGLS * sizeof(struct dma_descriptor);
+ rpkt->desc_mem.sz = BC_LINK_MAX_SGLS *
+ sizeof(struct dma_descriptor);
rpkt->pkt_tag = hw->rx_pkt_tag_seed + i;
crystalhd_hw_free_rx_pkt(hw, rpkt);
}
@@ -2013,7 +2072,8 @@ enum BC_STATUS crystalhd_hw_free_dma_rings(struct crystalhd_hw *hw)
return BC_STS_SUCCESS;
}
-enum BC_STATUS crystalhd_hw_post_tx(struct crystalhd_hw *hw, struct crystalhd_dio_req *ioreq,
+enum BC_STATUS crystalhd_hw_post_tx(struct crystalhd_hw *hw,
+ struct crystalhd_dio_req *ioreq,
hw_comp_callback call_back,
wait_queue_head_t *cb_event, uint32_t *list_id,
uint8_t data_flags)
@@ -2047,7 +2107,8 @@ enum BC_STATUS crystalhd_hw_post_tx(struct crystalhd_hw *hw, struct crystalhd_di
}
/* Get a list from TxFreeQ */
- tx_dma_packet = (struct tx_dma_pkt *)crystalhd_dioq_fetch(hw->tx_freeq);
+ tx_dma_packet = (struct tx_dma_pkt *)crystalhd_dioq_fetch(
+ hw->tx_freeq);
if (!tx_dma_packet) {
BCMLOG_ERR("No empty elements..\n");
return BC_STS_ERR_USAGE;
@@ -2105,7 +2166,8 @@ enum BC_STATUS crystalhd_hw_post_tx(struct crystalhd_hw *hw, struct crystalhd_di
crystalhd_start_tx_dma_engine(hw);
crystalhd_reg_wr(hw->adp, first_desc_u_addr, desc_addr.high_part);
- crystalhd_reg_wr(hw->adp, first_desc_l_addr, desc_addr.low_part | 0x01);
+ crystalhd_reg_wr(hw->adp, first_desc_l_addr, desc_addr.low_part |
+ 0x01);
/* Be sure we set the valid bit ^^^^ */
return BC_STS_SUCCESS;
@@ -2120,7 +2182,8 @@ enum BC_STATUS crystalhd_hw_post_tx(struct crystalhd_hw *hw, struct crystalhd_di
*
* FIX_ME: Not Tested the actual condition..
*/
-enum BC_STATUS crystalhd_hw_cancel_tx(struct crystalhd_hw *hw, uint32_t list_id)
+enum BC_STATUS crystalhd_hw_cancel_tx(struct crystalhd_hw *hw,
+ uint32_t list_id)
{
if (!hw || !list_id) {
BCMLOG_ERR("Invalid Arguments\n");
@@ -2134,7 +2197,7 @@ enum BC_STATUS crystalhd_hw_cancel_tx(struct crystalhd_hw *hw, uint32_t list_id)
}
enum BC_STATUS crystalhd_hw_add_cap_buffer(struct crystalhd_hw *hw,
- struct crystalhd_dio_req *ioreq, bool en_post)
+ struct crystalhd_dio_req *ioreq, bool en_post)
{
struct crystalhd_rx_dma_pkt *rpkt;
uint32_t tag, uv_desc_ix = 0;
@@ -2154,7 +2217,8 @@ enum BC_STATUS crystalhd_hw_add_cap_buffer(struct crystalhd_hw *hw,
rpkt->dio_req = ioreq;
tag = rpkt->pkt_tag;
- sts = crystalhd_xlat_sgl_to_dma_desc(ioreq, &rpkt->desc_mem, &uv_desc_ix);
+ sts = crystalhd_xlat_sgl_to_dma_desc(ioreq, &rpkt->desc_mem,
+ &uv_desc_ix);
if (sts != BC_STS_SUCCESS)
return sts;
@@ -2163,7 +2227,7 @@ enum BC_STATUS crystalhd_hw_add_cap_buffer(struct crystalhd_hw *hw,
/* Store the address of UV in the rx packet for post*/
if (uv_desc_ix)
rpkt->uv_phy_addr = rpkt->desc_mem.phy_addr +
- (sizeof(struct dma_descriptor) * (uv_desc_ix + 1));
+ (sizeof(struct dma_descriptor) * (uv_desc_ix + 1));
if (en_post)
sts = crystalhd_hw_post_cap_buff(hw, rpkt);
@@ -2190,7 +2254,8 @@ enum BC_STATUS crystalhd_hw_get_cap_buffer(struct crystalhd_hw *hw,
rpkt = crystalhd_dioq_fetch_wait(hw->rx_rdyq, timeout, &sig_pending);
if (!rpkt) {
if (sig_pending) {
- BCMLOG(BCMLOG_INFO, "wait on frame time out %d\n", sig_pending);
+ BCMLOG(BCMLOG_INFO, "wait on frame time out %d\n",
+ sig_pending);
return BC_STS_IO_USER_ABORT;
} else {
return BC_STS_TIMEOUT;
@@ -2305,7 +2370,8 @@ enum BC_STATUS crystalhd_hw_suspend(struct crystalhd_hw *hw)
return BC_STS_SUCCESS;
}
-void crystalhd_hw_stats(struct crystalhd_hw *hw, struct crystalhd_hw_stats *stats)
+void crystalhd_hw_stats(struct crystalhd_hw *hw,
+ struct crystalhd_hw_stats *stats)
{
if (!hw) {
BCMLOG_ERR("Invalid Arguments\n");
@@ -2378,7 +2444,8 @@ enum BC_STATUS crystalhd_hw_set_core_clock(struct crystalhd_hw *hw)
if (reg & 0x00020000) {
hw->prev_n = n;
- /* FIXME: jarod: outputting a random "C" is... confusing... */
+ /* FIXME: jarod: outputting
+ a random "C" is... confusing... */
BCMLOG(BCMLOG_INFO, "C");
return BC_STS_SUCCESS;
} else {
diff --git a/drivers/staging/crystalhd/crystalhd_hw.h b/drivers/staging/crystalhd/crystalhd_hw.h
index 2d0e6c6005e5..37809442c553 100644
--- a/drivers/staging/crystalhd/crystalhd_hw.h
+++ b/drivers/staging/crystalhd/crystalhd_hw.h
@@ -46,7 +46,7 @@
#define Cpu2HstMbx1 0x00100F04
#define MbxStat1 0x00100F08
#define Stream2Host_Intr_Sts 0x00100F24
-#define C011_RET_SUCCESS 0x0 /* Reutrn status of firmware command. */
+#define C011_RET_SUCCESS 0x0 /* Reutrn status of firmware command. */
/* TS input status register */
#define TS_StreamAFIFOStatus 0x0010044C
@@ -103,7 +103,7 @@
#define BC_FWIMG_ST_ADDR 0x00000000
/* FIXME: jarod: there's a kernel function that'll do this for us... */
#define rotr32_1(x, n) (((x) >> n) | ((x) << (32 - n)))
-#define bswap_32_1(x) ((rotr32_1((x), 24) & 0x00ff00ff) | (rotr32_1((x), 8) & 0xff00ff00))
+#define bswap_32_1(x) ((rotr32_1((x), 24) & 0x00ff00ff) | (rotr32_1((x), 8) & 0xff00ff00))
#define DecHt_HostSwReset 0x340000
#define BC_DRAM_FW_CFG_ADDR 0x001c2000
@@ -136,9 +136,11 @@ union intr_mask_reg {
union link_misc_perst_deco_ctrl {
struct {
- uint32_t bcm7412_rst:1; /* 1 -> BCM7412 is held in reset. Reset value 1.*/
+ uint32_t bcm7412_rst:1; /* 1 -> BCM7412 is held
+ in reset. Reset value 1.*/
uint32_t reserved0:3; /* Reserved.No Effect*/
- uint32_t stop_bcm_7412_clk:1; /* 1 ->Stops branch of 27MHz clk used to clk BCM7412*/
+ uint32_t stop_bcm_7412_clk:1; /* 1 ->Stops branch of
+ 27MHz clk used to clk BCM7412*/
uint32_t reserved1:27; /* Reseved. No Effect*/
};
@@ -148,13 +150,18 @@ union link_misc_perst_deco_ctrl {
union link_misc_perst_clk_ctrl {
struct {
- uint32_t sel_alt_clk:1; /* When set, selects a 6.75MHz clock as the source of core_clk */
- uint32_t stop_core_clk:1; /* When set, stops the branch of core_clk that is not needed for low power operation */
- uint32_t pll_pwr_dn:1; /* When set, powers down the main PLL. The alternate clock bit should be set
- to select an alternate clock before setting this bit.*/
+ uint32_t sel_alt_clk:1; /* When set, selects a
+ 6.75MHz clock as the source of core_clk */
+ uint32_t stop_core_clk:1; /* When set, stops the branch
+ of core_clk that is not needed for low power operation */
+ uint32_t pll_pwr_dn:1; /* When set, powers down the
+ main PLL. The alternate clock bit should be set to
+ select an alternate clock before setting this bit.*/
uint32_t reserved0:5; /* Reserved */
- uint32_t pll_mult:8; /* This setting controls the multiplier for the PLL. */
- uint32_t pll_div:4; /* This setting controls the divider for the PLL. */
+ uint32_t pll_mult:8; /* This setting controls
+ the multiplier for the PLL. */
+ uint32_t pll_div:4; /* This setting controls
+ the divider for the PLL. */
uint32_t reserved1:12; /* Reserved */
};
@@ -164,9 +171,11 @@ union link_misc_perst_clk_ctrl {
union link_misc_perst_decoder_ctrl {
struct {
- uint32_t bcm_7412_rst:1; /* 1 -> BCM7412 is held in reset. Reset value 1.*/
+ uint32_t bcm_7412_rst:1; /* 1 -> BCM7412 is held
+ in reset. Reset value 1.*/
uint32_t res0:3; /* Reserved.No Effect*/
- uint32_t stop_7412_clk:1; /* 1 ->Stops branch of 27MHz clk used to clk BCM7412*/
+ uint32_t stop_7412_clk:1; /* 1 ->Stops branch of 27MHz
+ clk used to clk BCM7412*/
uint32_t res1:27; /* Reseved. No Effect */
};
@@ -225,10 +234,12 @@ struct dma_descriptor { /* 8 32-bit values */
* The virtual address will determine what should be freed.
*/
struct dma_desc_mem {
- struct dma_descriptor *pdma_desc_start; /* 32-bytes for dma descriptor. should be first element */
- dma_addr_t phy_addr; /* physical address of each DMA desc */
+ struct dma_descriptor *pdma_desc_start; /* 32-bytes for dma
+ descriptor. should be first element */
+ dma_addr_t phy_addr; /* physical address
+ of each DMA desc */
uint32_t sz;
- struct _dma_desc_mem_ *Next; /* points to Next Descriptor in chain */
+ struct _dma_desc_mem_ *Next; /* points to Next Descriptor in chain */
};
@@ -323,50 +334,54 @@ struct crystalhd_hw {
#define CLOCK_PRESET 175
/* DMA engine register BIT mask wrappers.. */
-#define DMA_START_BIT MISC1_TX_SW_DESC_LIST_CTRL_STS_TX_DMA_RUN_STOP_MASK
-
-#define GET_RX_INTR_MASK (INTR_INTR_STATUS_L1_UV_RX_DMA_ERR_INTR_MASK | \
- INTR_INTR_STATUS_L1_UV_RX_DMA_DONE_INTR_MASK | \
- INTR_INTR_STATUS_L1_Y_RX_DMA_ERR_INTR_MASK | \
- INTR_INTR_STATUS_L1_Y_RX_DMA_DONE_INTR_MASK | \
- INTR_INTR_STATUS_L0_UV_RX_DMA_ERR_INTR_MASK | \
- INTR_INTR_STATUS_L0_UV_RX_DMA_DONE_INTR_MASK | \
- INTR_INTR_STATUS_L0_Y_RX_DMA_ERR_INTR_MASK | \
- INTR_INTR_STATUS_L0_Y_RX_DMA_DONE_INTR_MASK)
-
-#define GET_Y0_ERR_MSK (MISC1_Y_RX_ERROR_STATUS_RX_L0_OVERRUN_ERROR_MASK | \
- MISC1_Y_RX_ERROR_STATUS_RX_L0_UNDERRUN_ERROR_MASK | \
- MISC1_Y_RX_ERROR_STATUS_RX_L0_DESC_TX_ABORT_ERRORS_MASK | \
- MISC1_Y_RX_ERROR_STATUS_RX_L0_FIFO_FULL_ERRORS_MASK)
-
-#define GET_UV0_ERR_MSK (MISC1_UV_RX_ERROR_STATUS_RX_L0_OVERRUN_ERROR_MASK | \
- MISC1_UV_RX_ERROR_STATUS_RX_L0_UNDERRUN_ERROR_MASK | \
- MISC1_UV_RX_ERROR_STATUS_RX_L0_DESC_TX_ABORT_ERRORS_MASK | \
- MISC1_UV_RX_ERROR_STATUS_RX_L0_FIFO_FULL_ERRORS_MASK)
-
-#define GET_Y1_ERR_MSK (MISC1_Y_RX_ERROR_STATUS_RX_L1_OVERRUN_ERROR_MASK | \
- MISC1_Y_RX_ERROR_STATUS_RX_L1_UNDERRUN_ERROR_MASK | \
- MISC1_Y_RX_ERROR_STATUS_RX_L1_DESC_TX_ABORT_ERRORS_MASK | \
- MISC1_Y_RX_ERROR_STATUS_RX_L1_FIFO_FULL_ERRORS_MASK)
-
-#define GET_UV1_ERR_MSK (MISC1_UV_RX_ERROR_STATUS_RX_L1_OVERRUN_ERROR_MASK | \
- MISC1_UV_RX_ERROR_STATUS_RX_L1_UNDERRUN_ERROR_MASK | \
- MISC1_UV_RX_ERROR_STATUS_RX_L1_DESC_TX_ABORT_ERRORS_MASK | \
- MISC1_UV_RX_ERROR_STATUS_RX_L1_FIFO_FULL_ERRORS_MASK)
+#define DMA_START_BIT MISC1_TX_SW_DESC_LIST_CTRL_STS_TX_DMA_RUN_STOP_MASK
+
+#define GET_RX_INTR_MASK (INTR_INTR_STATUS_L1_UV_RX_DMA_ERR_INTR_MASK | \
+ INTR_INTR_STATUS_L1_UV_RX_DMA_DONE_INTR_MASK | \
+ INTR_INTR_STATUS_L1_Y_RX_DMA_ERR_INTR_MASK | \
+ INTR_INTR_STATUS_L1_Y_RX_DMA_DONE_INTR_MASK | \
+ INTR_INTR_STATUS_L0_UV_RX_DMA_ERR_INTR_MASK | \
+ INTR_INTR_STATUS_L0_UV_RX_DMA_DONE_INTR_MASK | \
+ INTR_INTR_STATUS_L0_Y_RX_DMA_ERR_INTR_MASK | \
+ INTR_INTR_STATUS_L0_Y_RX_DMA_DONE_INTR_MASK)
+
+#define GET_Y0_ERR_MSK (MISC1_Y_RX_ERROR_STATUS_RX_L0_OVERRUN_ERROR_MASK | \
+ MISC1_Y_RX_ERROR_STATUS_RX_L0_UNDERRUN_ERROR_MASK | \
+ MISC1_Y_RX_ERROR_STATUS_RX_L0_DESC_TX_ABORT_ERRORS_MASK | \
+ MISC1_Y_RX_ERROR_STATUS_RX_L0_FIFO_FULL_ERRORS_MASK)
+
+#define GET_UV0_ERR_MSK (MISC1_UV_RX_ERROR_STATUS_RX_L0_OVERRUN_ERROR_MASK | \
+ MISC1_UV_RX_ERROR_STATUS_RX_L0_UNDERRUN_ERROR_MASK | \
+ MISC1_UV_RX_ERROR_STATUS_RX_L0_DESC_TX_ABORT_ERRORS_MASK | \
+ MISC1_UV_RX_ERROR_STATUS_RX_L0_FIFO_FULL_ERRORS_MASK)
+
+#define GET_Y1_ERR_MSK (MISC1_Y_RX_ERROR_STATUS_RX_L1_OVERRUN_ERROR_MASK | \
+ MISC1_Y_RX_ERROR_STATUS_RX_L1_UNDERRUN_ERROR_MASK | \
+ MISC1_Y_RX_ERROR_STATUS_RX_L1_DESC_TX_ABORT_ERRORS_MASK | \
+ MISC1_Y_RX_ERROR_STATUS_RX_L1_FIFO_FULL_ERRORS_MASK)
+
+#define GET_UV1_ERR_MSK (MISC1_UV_RX_ERROR_STATUS_RX_L1_OVERRUN_ERROR_MASK | \
+ MISC1_UV_RX_ERROR_STATUS_RX_L1_UNDERRUN_ERROR_MASK | \
+ MISC1_UV_RX_ERROR_STATUS_RX_L1_DESC_TX_ABORT_ERRORS_MASK | \
+ MISC1_UV_RX_ERROR_STATUS_RX_L1_FIFO_FULL_ERRORS_MASK)
/**** API Exposed to the other layers ****/
enum BC_STATUS crystalhd_download_fw(struct crystalhd_adp *adp,
void *buffer, uint32_t sz);
-enum BC_STATUS crystalhd_do_fw_cmd(struct crystalhd_hw *hw, struct BC_FW_CMD *fw_cmd);
-bool crystalhd_hw_interrupt(struct crystalhd_adp *adp, struct crystalhd_hw *hw);
-enum BC_STATUS crystalhd_hw_open(struct crystalhd_hw *, struct crystalhd_adp *);
+enum BC_STATUS crystalhd_do_fw_cmd(struct crystalhd_hw *hw,
+ struct BC_FW_CMD *fw_cmd);
+bool crystalhd_hw_interrupt(struct crystalhd_adp *adp,
+ struct crystalhd_hw *hw);
+enum BC_STATUS crystalhd_hw_open(struct crystalhd_hw *,
+ struct crystalhd_adp *);
enum BC_STATUS crystalhd_hw_close(struct crystalhd_hw *);
enum BC_STATUS crystalhd_hw_setup_dma_rings(struct crystalhd_hw *);
enum BC_STATUS crystalhd_hw_free_dma_rings(struct crystalhd_hw *);
-enum BC_STATUS crystalhd_hw_post_tx(struct crystalhd_hw *hw, struct crystalhd_dio_req *ioreq,
+enum BC_STATUS crystalhd_hw_post_tx(struct crystalhd_hw *hw,
+ struct crystalhd_dio_req *ioreq,
hw_comp_callback call_back,
wait_queue_head_t *cb_event,
uint32_t *list_id, uint8_t data_flags);
@@ -374,15 +389,17 @@ enum BC_STATUS crystalhd_hw_post_tx(struct crystalhd_hw *hw, struct crystalhd_di
enum BC_STATUS crystalhd_hw_pause(struct crystalhd_hw *hw);
enum BC_STATUS crystalhd_hw_unpause(struct crystalhd_hw *hw);
enum BC_STATUS crystalhd_hw_suspend(struct crystalhd_hw *hw);
-enum BC_STATUS crystalhd_hw_cancel_tx(struct crystalhd_hw *hw, uint32_t list_id);
+enum BC_STATUS crystalhd_hw_cancel_tx(struct crystalhd_hw *hw,
+ uint32_t list_id);
enum BC_STATUS crystalhd_hw_add_cap_buffer(struct crystalhd_hw *hw,
- struct crystalhd_dio_req *ioreq, bool en_post);
+ struct crystalhd_dio_req *ioreq, bool en_post);
enum BC_STATUS crystalhd_hw_get_cap_buffer(struct crystalhd_hw *hw,
struct BC_PIC_INFO_BLOCK *pib,
struct crystalhd_dio_req **ioreq);
enum BC_STATUS crystalhd_hw_stop_capture(struct crystalhd_hw *hw);
enum BC_STATUS crystalhd_hw_start_capture(struct crystalhd_hw *hw);
-void crystalhd_hw_stats(struct crystalhd_hw *hw, struct crystalhd_hw_stats *stats);
+void crystalhd_hw_stats(struct crystalhd_hw *hw,
+ struct crystalhd_hw_stats *stats);
/* API to program the core clock on the decoder */
enum BC_STATUS crystalhd_hw_set_core_clock(struct crystalhd_hw *);
diff --git a/drivers/staging/crystalhd/crystalhd_lnx.c b/drivers/staging/crystalhd/crystalhd_lnx.c
index 85f51fb18425..c1f6163cdeb8 100644
--- a/drivers/staging/crystalhd/crystalhd_lnx.c
+++ b/drivers/staging/crystalhd/crystalhd_lnx.c
@@ -75,7 +75,8 @@ static int chd_dec_disable_int(struct crystalhd_adp *adp)
return 0;
}
-struct crystalhd_ioctl_data *chd_dec_alloc_iodata(struct crystalhd_adp *adp, bool isr)
+struct crystalhd_ioctl_data *chd_dec_alloc_iodata(struct crystalhd_adp *adp,
+ bool isr)
{
unsigned long flags = 0;
struct crystalhd_ioctl_data *temp;
@@ -95,8 +96,8 @@ struct crystalhd_ioctl_data *chd_dec_alloc_iodata(struct crystalhd_adp *adp, boo
return temp;
}
-void chd_dec_free_iodata(struct crystalhd_adp *adp, struct crystalhd_ioctl_data *iodata,
- bool isr)
+void chd_dec_free_iodata(struct crystalhd_adp *adp,
+ struct crystalhd_ioctl_data *iodata, bool isr)
{
unsigned long flags = 0;
@@ -109,7 +110,8 @@ void chd_dec_free_iodata(struct crystalhd_adp *adp, struct crystalhd_ioctl_data
spin_unlock_irqrestore(&adp->lock, flags);
}
-static inline int crystalhd_user_data(unsigned long ud, void *dr, int size, int set)
+static inline int crystalhd_user_data(unsigned long ud, void *dr,
+ int size, int set)
{
int rc;
@@ -131,8 +133,8 @@ static inline int crystalhd_user_data(unsigned long ud, void *dr, int size, int
return rc;
}
-static int chd_dec_fetch_cdata(struct crystalhd_adp *adp, struct crystalhd_ioctl_data *io,
- uint32_t m_sz, unsigned long ua)
+static int chd_dec_fetch_cdata(struct crystalhd_adp *adp,
+ struct crystalhd_ioctl_data *io, uint32_t m_sz, unsigned long ua)
{
unsigned long ua_off;
int rc = 0;
@@ -163,7 +165,7 @@ static int chd_dec_fetch_cdata(struct crystalhd_adp *adp, struct crystalhd_ioctl
}
static int chd_dec_release_cdata(struct crystalhd_adp *adp,
- struct crystalhd_ioctl_data *io, unsigned long ua)
+ struct crystalhd_ioctl_data *io, unsigned long ua)
{
unsigned long ua_off;
int rc;
@@ -178,8 +180,9 @@ static int chd_dec_release_cdata(struct crystalhd_adp *adp,
rc = crystalhd_user_data(ua_off, io->add_cdata,
io->add_cdata_sz, 1);
if (rc) {
- BCMLOG_ERR("failed to push add_cdata sz:%x ua_off:%x\n",
- io->add_cdata_sz, (unsigned int)ua_off);
+ BCMLOG_ERR(
+ "failed to push add_cdata sz:%x ua_off:%x\n",
+ io->add_cdata_sz, (unsigned int)ua_off);
return -ENODATA;
}
}
@@ -252,10 +255,7 @@ static int chd_dec_api_cmd(struct crystalhd_adp *adp, unsigned long ua,
rc = chd_dec_proc_user_data(adp, temp, ua, 1);
}
- if (temp) {
- chd_dec_free_iodata(adp, temp, 0);
- temp = NULL;
- }
+ chd_dec_free_iodata(adp, temp, 0);
return rc;
}
@@ -378,8 +378,8 @@ static int chd_dec_init_chdev(struct crystalhd_adp *adp)
goto class_create_fail;
}
- dev = device_create(crystalhd_class, NULL, MKDEV(adp->chd_dec_major, 0),
- NULL, "crystalhd");
+ dev = device_create(crystalhd_class, NULL,
+ MKDEV(adp->chd_dec_major, 0), NULL, "crystalhd");
if (IS_ERR(dev)) {
rc = PTR_ERR(dev);
BCMLOG_ERR("failed to create device\n");
@@ -394,7 +394,8 @@ static int chd_dec_init_chdev(struct crystalhd_adp *adp)
/* Allocate general purpose ioctl pool. */
for (i = 0; i < CHD_IODATA_POOL_SZ; i++) {
- temp = kzalloc(sizeof(struct crystalhd_ioctl_data), GFP_KERNEL);
+ temp = kzalloc(sizeof(struct crystalhd_ioctl_data),
+ GFP_KERNEL);
if (!temp) {
BCMLOG_ERR("ioctl data pool kzalloc failed\n");
rc = -ENOMEM;
diff --git a/drivers/staging/crystalhd/crystalhd_lnx.h b/drivers/staging/crystalhd/crystalhd_lnx.h
index a9e36336d097..bac572a8bc2e 100644
--- a/drivers/staging/crystalhd/crystalhd_lnx.h
+++ b/drivers/staging/crystalhd/crystalhd_lnx.h
@@ -77,8 +77,8 @@ struct crystalhd_adp {
int chd_dec_major;
unsigned int cfg_users;
- struct crystalhd_ioctl_data *idata_free_head; /* ioctl data pool */
- struct crystalhd_elem *elem_pool_head; /* Queue element pool */
+ struct crystalhd_ioctl_data *idata_free_head; /* ioctl data pool */
+ struct crystalhd_elem *elem_pool_head; /* Queue element pool */
struct crystalhd_cmd cmds;
diff --git a/drivers/staging/crystalhd/crystalhd_misc.c b/drivers/staging/crystalhd/crystalhd_misc.c
index a5f109c632dc..51f698052aff 100644
--- a/drivers/staging/crystalhd/crystalhd_misc.c
+++ b/drivers/staging/crystalhd/crystalhd_misc.c
@@ -30,19 +30,22 @@
uint32_t g_linklog_level;
-static inline uint32_t crystalhd_dram_rd(struct crystalhd_adp *adp, uint32_t mem_off)
+static inline uint32_t crystalhd_dram_rd(struct crystalhd_adp *adp,
+ uint32_t mem_off)
{
crystalhd_reg_wr(adp, DCI_DRAM_BASE_ADDR, (mem_off >> 19));
return bc_dec_reg_rd(adp, (0x00380000 | (mem_off & 0x0007FFFF)));
}
-static inline void crystalhd_dram_wr(struct crystalhd_adp *adp, uint32_t mem_off, uint32_t val)
+static inline void crystalhd_dram_wr(struct crystalhd_adp *adp,
+ uint32_t mem_off, uint32_t val)
{
crystalhd_reg_wr(adp, DCI_DRAM_BASE_ADDR, (mem_off >> 19));
bc_dec_reg_wr(adp, (0x00380000 | (mem_off & 0x0007FFFF)), val);
}
-static inline enum BC_STATUS bc_chk_dram_range(struct crystalhd_adp *adp, uint32_t start_off, uint32_t cnt)
+static inline enum BC_STATUS bc_chk_dram_range(struct crystalhd_adp *adp,
+ uint32_t start_off, uint32_t cnt)
{
return BC_STS_SUCCESS;
}
@@ -66,7 +69,8 @@ static struct crystalhd_dio_req *crystalhd_alloc_dio(struct crystalhd_adp *adp)
return temp;
}
-static void crystalhd_free_dio(struct crystalhd_adp *adp, struct crystalhd_dio_req *dio)
+static void crystalhd_free_dio(struct crystalhd_adp *adp,
+ struct crystalhd_dio_req *dio)
{
unsigned long flags = 0;
@@ -99,7 +103,8 @@ static struct crystalhd_elem *crystalhd_alloc_elem(struct crystalhd_adp *adp)
return temp;
}
-static void crystalhd_free_elem(struct crystalhd_adp *adp, struct crystalhd_elem *elem)
+static void crystalhd_free_elem(struct crystalhd_adp *adp,
+ struct crystalhd_elem *elem)
{
unsigned long flags = 0;
@@ -120,7 +125,8 @@ static inline void crystalhd_set_sg(struct scatterlist *sg, struct page *page,
#endif
}
-static inline void crystalhd_init_sg(struct scatterlist *sg, unsigned int entries)
+static inline void crystalhd_init_sg(struct scatterlist *sg,
+ unsigned int entries)
{
/* http://lkml.org/lkml/2007/11/27/68 */
sg_init_table(sg, entries);
@@ -208,7 +214,8 @@ uint32_t crystalhd_reg_rd(struct crystalhd_adp *adp, uint32_t reg_off)
* configuration space.
*
*/
-void crystalhd_reg_wr(struct crystalhd_adp *adp, uint32_t reg_off, uint32_t val)
+void crystalhd_reg_wr(struct crystalhd_adp *adp, uint32_t reg_off,
+ uint32_t val)
{
if (!adp || (reg_off > adp->pci_i2o_len)) {
BCMLOG_ERR("link_wr_reg_off outof range: 0x%08x\n", reg_off);
@@ -469,7 +476,8 @@ enum BC_STATUS crystalhd_create_dioq(struct crystalhd_adp *adp,
* by calling the call back provided during creation.
*
*/
-void crystalhd_delete_dioq(struct crystalhd_adp *adp, struct crystalhd_dioq *dioq)
+void crystalhd_delete_dioq(struct crystalhd_adp *adp,
+ struct crystalhd_dioq *dioq)
{
void *temp;
@@ -639,7 +647,8 @@ void *crystalhd_dioq_fetch_wait(struct crystalhd_dioq *ioq, uint32_t to_secs,
while ((ioq->count == 0) && count) {
spin_unlock_irqrestore(&ioq->lock, flags);
- crystalhd_wait_on_event(&ioq->event, (ioq->count > 0), 1000, rc, 0);
+ crystalhd_wait_on_event(&ioq->event,
+ (ioq->count > 0), 1000, rc, 0);
if (rc == 0) {
goto out;
} else if (rc == -EINTR) {
@@ -678,7 +687,8 @@ enum BC_STATUS crystalhd_map_dio(struct crystalhd_adp *adp, void *ubuff,
struct crystalhd_dio_req **dio_hnd)
{
struct crystalhd_dio_req *dio;
- /* FIXME: jarod: should some of these unsigned longs be uint32_t or uintptr_t? */
+ /* FIXME: jarod: should some of these
+ unsigned longs be uint32_t or uintptr_t? */
unsigned long start = 0, end = 0, uaddr = 0, count = 0;
unsigned long spsz = 0, uv_start = 0;
int i = 0, rw = 0, res = 0, nr_pages = 0, skip_fb_sg = 0;
@@ -723,7 +733,8 @@ enum BC_STATUS crystalhd_map_dio(struct crystalhd_adp *adp, void *ubuff,
if (uv_offset) {
uv_start = (uaddr + (unsigned long)uv_offset) >> PAGE_SHIFT;
dio->uinfo.uv_sg_ix = uv_start - start;
- dio->uinfo.uv_sg_off = ((uaddr + (unsigned long)uv_offset) & ~PAGE_MASK);
+ dio->uinfo.uv_sg_off = ((uaddr + (unsigned long)uv_offset) &
+ ~PAGE_MASK);
}
dio->fb_size = ubuff_sz & 0x03;
@@ -819,7 +830,8 @@ enum BC_STATUS crystalhd_map_dio(struct crystalhd_adp *adp, void *ubuff,
*
* This routine is to unmap the user buffer pages.
*/
-enum BC_STATUS crystalhd_unmap_dio(struct crystalhd_adp *adp, struct crystalhd_dio_req *dio)
+enum BC_STATUS crystalhd_unmap_dio(struct crystalhd_adp *adp,
+ struct crystalhd_dio_req *dio)
{
struct page *page = NULL;
int j = 0;
@@ -841,7 +853,8 @@ enum BC_STATUS crystalhd_unmap_dio(struct crystalhd_adp *adp, struct crystalhd_d
}
}
if (dio->sig == crystalhd_dio_sg_mapped)
- pci_unmap_sg(adp->pdev, dio->sg, dio->page_cnt, dio->direction);
+ pci_unmap_sg(adp->pdev, dio->sg, dio->page_cnt,
+ dio->direction);
crystalhd_free_dio(adp, dio);
diff --git a/drivers/staging/crystalhd/crystalhd_misc.h b/drivers/staging/crystalhd/crystalhd_misc.h
index 8cdaa7a34814..4dae3a797e95 100644
--- a/drivers/staging/crystalhd/crystalhd_misc.h
+++ b/drivers/staging/crystalhd/crystalhd_misc.h
@@ -127,12 +127,16 @@ uint32_t crystalhd_reg_rd(struct crystalhd_adp *, uint32_t);
void crystalhd_reg_wr(struct crystalhd_adp *, uint32_t, uint32_t);
/*========= Decoder (7412) memory access routines..=================*/
-enum BC_STATUS crystalhd_mem_rd(struct crystalhd_adp *, uint32_t, uint32_t, uint32_t *);
-enum BC_STATUS crystalhd_mem_wr(struct crystalhd_adp *, uint32_t, uint32_t, uint32_t *);
+enum BC_STATUS crystalhd_mem_rd(struct crystalhd_adp *,
+ uint32_t, uint32_t, uint32_t *);
+enum BC_STATUS crystalhd_mem_wr(struct crystalhd_adp *,
+ uint32_t, uint32_t, uint32_t *);
/*==========Link (70012) PCIe Config access routines.================*/
-enum BC_STATUS crystalhd_pci_cfg_rd(struct crystalhd_adp *, uint32_t, uint32_t, uint32_t *);
-enum BC_STATUS crystalhd_pci_cfg_wr(struct crystalhd_adp *, uint32_t, uint32_t, uint32_t);
+enum BC_STATUS crystalhd_pci_cfg_rd(struct crystalhd_adp *,
+ uint32_t, uint32_t, uint32_t *);
+enum BC_STATUS crystalhd_pci_cfg_wr(struct crystalhd_adp *,
+ uint32_t, uint32_t, uint32_t);
/*========= Linux Kernel Interface routines. ======================= */
void *bc_kern_dma_alloc(struct crystalhd_adp *, uint32_t, dma_addr_t *);
@@ -168,20 +172,26 @@ do { \
/*================ Direct IO mapping routines ==================*/
extern int crystalhd_create_dio_pool(struct crystalhd_adp *, uint32_t);
extern void crystalhd_destroy_dio_pool(struct crystalhd_adp *);
-extern enum BC_STATUS crystalhd_map_dio(struct crystalhd_adp *, void *, uint32_t,
- uint32_t, bool, bool, struct crystalhd_dio_req**);
+extern enum BC_STATUS crystalhd_map_dio(struct crystalhd_adp *, void *,
+ uint32_t, uint32_t, bool, bool, struct crystalhd_dio_req**);
-extern enum BC_STATUS crystalhd_unmap_dio(struct crystalhd_adp *, struct crystalhd_dio_req*);
+extern enum BC_STATUS crystalhd_unmap_dio(struct crystalhd_adp *,
+ struct crystalhd_dio_req*);
#define crystalhd_get_sgle_paddr(_dio, _ix) (cpu_to_le64(sg_dma_address(&_dio->sg[_ix])))
#define crystalhd_get_sgle_len(_dio, _ix) (cpu_to_le32(sg_dma_len(&_dio->sg[_ix])))
/*================ General Purpose Queues ==================*/
-extern enum BC_STATUS crystalhd_create_dioq(struct crystalhd_adp *, struct crystalhd_dioq **, crystalhd_data_free_cb , void *);
-extern void crystalhd_delete_dioq(struct crystalhd_adp *, struct crystalhd_dioq *);
-extern enum BC_STATUS crystalhd_dioq_add(struct crystalhd_dioq *ioq, void *data, bool wake, uint32_t tag);
+extern enum BC_STATUS crystalhd_create_dioq(struct crystalhd_adp *,
+ struct crystalhd_dioq **, crystalhd_data_free_cb , void *);
+extern void crystalhd_delete_dioq(struct crystalhd_adp *,
+ struct crystalhd_dioq *);
+extern enum BC_STATUS crystalhd_dioq_add(struct crystalhd_dioq *ioq,
+ void *data, bool wake, uint32_t tag);
extern void *crystalhd_dioq_fetch(struct crystalhd_dioq *ioq);
-extern void *crystalhd_dioq_find_and_fetch(struct crystalhd_dioq *ioq, uint32_t tag);
-extern void *crystalhd_dioq_fetch_wait(struct crystalhd_dioq *ioq, uint32_t to_secs, uint32_t *sig_pend);
+extern void *crystalhd_dioq_find_and_fetch(struct crystalhd_dioq *ioq,
+ uint32_t tag);
+extern void *crystalhd_dioq_fetch_wait(struct crystalhd_dioq *ioq,
+ uint32_t to_secs, uint32_t *sig_pend);
#define crystalhd_dioq_count(_ioq) ((_ioq) ? _ioq->count : 0)
@@ -190,7 +200,8 @@ extern void crystalhd_delete_elem_pool(struct crystalhd_adp *);
/*================ Debug routines/macros .. ================================*/
-extern void crystalhd_show_buffer(uint32_t off, uint8_t *buff, uint32_t dwcount);
+extern void crystalhd_show_buffer(uint32_t off, uint8_t *buff,
+ uint32_t dwcount);
enum _chd_log_levels {
BCMLOG_ERROR = 0x80000000, /* Don't disable this option */
diff --git a/drivers/staging/csr/bh.c b/drivers/staging/csr/bh.c
index b53a9e29a97c..d795852ccb1c 100644
--- a/drivers/staging/csr/bh.c
+++ b/drivers/staging/csr/bh.c
@@ -241,67 +241,72 @@ static int bh_thread_function(void *arg)
this_thread = &priv->bh_thread;
t = timeout = 0;
- while (!kthread_should_stop()) {
- /* wait until an error occurs, or we need to process something. */
- unifi_trace(priv, UDBG3, "bh_thread goes to sleep.\n");
-
- if (timeout > 0) {
- /* Convert t in ms to jiffies */
- t = msecs_to_jiffies(timeout);
- ret = wait_event_interruptible_timeout(this_thread->wakeup_q,
- (this_thread->wakeup_flag && !this_thread->block_thread) ||
- kthread_should_stop(),
- t);
- timeout = (ret > 0) ? jiffies_to_msecs(ret) : 0;
- } else {
- ret = wait_event_interruptible(this_thread->wakeup_q,
- (this_thread->wakeup_flag && !this_thread->block_thread) ||
- kthread_should_stop());
- }
-
- if (kthread_should_stop()) {
- unifi_trace(priv, UDBG2, "bh_thread: signalled to exit\n");
- break;
- }
-
- if (ret < 0) {
- unifi_notice(priv,
- "bh_thread: wait_event returned %d, thread will exit\n",
- ret);
- uf_wait_for_thread_to_stop(priv, this_thread);
- break;
- }
-
- this_thread->wakeup_flag = 0;
-
- unifi_trace(priv, UDBG3, "bh_thread calls unifi_bh().\n");
-
- CsrSdioClaim(priv->sdio);
- csrResult = unifi_bh(priv->card, &timeout);
- if(csrResult != CSR_RESULT_SUCCESS) {
- if (csrResult == CSR_WIFI_HIP_RESULT_NO_DEVICE) {
- CsrSdioRelease(priv->sdio);
- uf_wait_for_thread_to_stop(priv, this_thread);
- break;
- }
- /* Errors must be delivered to the error task */
- handle_bh_error(priv);
- }
- CsrSdioRelease(priv->sdio);
- }
-
- /*
- * I would normally try to call csr_sdio_remove_irq() here to make sure
- * that we do not get any interrupts while this thread is not running.
- * However, the MMC/SDIO driver tries to kill its' interrupt thread.
- * The kernel threads implementation does not allow to kill threads
- * from a signalled to stop thread.
- * So, instead call csr_sdio_linux_remove_irq() always after calling
- * uf_stop_thread() to kill this thread.
- */
-
- unifi_trace(priv, UDBG2, "bh_thread exiting....\n");
- return 0;
+ while (!kthread_should_stop()) {
+ /*
+ * wait until an error occurs,
+ * or we need to process something.
+ */
+ unifi_trace(priv, UDBG3, "bh_thread goes to sleep.\n");
+
+ if (timeout > 0) {
+ /* Convert t in ms to jiffies */
+ t = msecs_to_jiffies(timeout);
+ ret = wait_event_interruptible_timeout(
+ this_thread->wakeup_q,
+ (this_thread->wakeup_flag && !this_thread->block_thread) ||
+ kthread_should_stop(),
+ t);
+ timeout = (ret > 0) ? jiffies_to_msecs(ret) : 0;
+ } else {
+ ret = wait_event_interruptible(this_thread->wakeup_q,
+ (this_thread->wakeup_flag && !this_thread->block_thread) ||
+ kthread_should_stop());
+ }
+
+ if (kthread_should_stop()) {
+ unifi_trace(priv, UDBG2,
+ "bh_thread: signalled to exit\n");
+ break;
+ }
+
+ if (ret < 0) {
+ unifi_notice(priv,
+ "bh_thread: wait_event returned %d, thread will exit\n",
+ ret);
+ uf_wait_for_thread_to_stop(priv, this_thread);
+ break;
+ }
+
+ this_thread->wakeup_flag = 0;
+
+ unifi_trace(priv, UDBG3, "bh_thread calls unifi_bh().\n");
+
+ CsrSdioClaim(priv->sdio);
+ csrResult = unifi_bh(priv->card, &timeout);
+ if (csrResult != CSR_RESULT_SUCCESS) {
+ if (csrResult == CSR_WIFI_HIP_RESULT_NO_DEVICE) {
+ CsrSdioRelease(priv->sdio);
+ uf_wait_for_thread_to_stop(priv, this_thread);
+ break;
+ }
+ /* Errors must be delivered to the error task */
+ handle_bh_error(priv);
+ }
+ CsrSdioRelease(priv->sdio);
+ }
+
+ /*
+ * I would normally try to call csr_sdio_remove_irq() here to make sure
+ * that we do not get any interrupts while this thread is not running.
+ * However, the MMC/SDIO driver tries to kill its' interrupt thread.
+ * The kernel threads implementation does not allow to kill threads
+ * from a signalled to stop thread.
+ * So, instead call csr_sdio_linux_remove_irq() always after calling
+ * uf_stop_thread() to kill this thread.
+ */
+
+ unifi_trace(priv, UDBG2, "bh_thread exiting....\n");
+ return 0;
} /* bh_thread_function() */
@@ -319,33 +324,33 @@ static int bh_thread_function(void *arg)
* 0 on success or else a Linux error code.
* ---------------------------------------------------------------------------
*/
- int
+int
uf_init_bh(unifi_priv_t *priv)
{
- int r;
+ int r;
- /* Enable mlme interface. */
- priv->io_aborted = 0;
+ /* Enable mlme interface. */
+ priv->io_aborted = 0;
- /* Start the BH thread */
- r = uf_start_thread(priv, &priv->bh_thread, bh_thread_function);
- if (r) {
- unifi_error(priv,
- "uf_init_bh: failed to start the BH thread.\n");
- return r;
- }
+ /* Start the BH thread */
+ r = uf_start_thread(priv, &priv->bh_thread, bh_thread_function);
+ if (r) {
+ unifi_error(priv,
+ "uf_init_bh: failed to start the BH thread.\n");
+ return r;
+ }
- /* Allow interrupts */
- r = csr_sdio_linux_install_irq(priv->sdio);
- if (r) {
- unifi_error(priv,
- "uf_init_bh: failed to install the IRQ.\n");
+ /* Allow interrupts */
+ r = csr_sdio_linux_install_irq(priv->sdio);
+ if (r) {
+ unifi_error(priv,
+ "uf_init_bh: failed to install the IRQ.\n");
- uf_stop_thread(priv, &priv->bh_thread);
- }
+ uf_stop_thread(priv, &priv->bh_thread);
+ }
- return r;
+ return r;
} /* uf_init_bh() */
@@ -370,28 +375,30 @@ uf_init_bh(unifi_priv_t *priv)
*/
CsrResult unifi_run_bh(void *ospriv)
{
- unifi_priv_t *priv = ospriv;
-
- /*
- * If an error has occurred, we discard silently all messages from the bh
- * until the error has been processed and the unifi has been reinitialised.
- */
- if (priv->bh_thread.block_thread == 1) {
- unifi_trace(priv, UDBG3, "unifi_run_bh: discard message.\n");
- /*
- * Do not try to acknowledge a pending interrupt here.
- * This function is called by unifi_send_signal() which in turn can be
- * running in an atomic or 'disabled irq' level if a signal is sent
- * from a workqueue task (i.e multicass addresses set).
- * We can not hold the SDIO lock because it might sleep.
- */
- return CSR_RESULT_FAILURE;
- }
-
- priv->bh_thread.wakeup_flag = 1;
- /* wake up I/O thread */
- wake_up_interruptible(&priv->bh_thread.wakeup_q);
-
- return CSR_RESULT_SUCCESS;
+ unifi_priv_t *priv = ospriv;
+
+ /*
+ * If an error has occurred, we discard silently all messages from the bh
+ * until the error has been processed and the unifi has been
+ * reinitialised.
+ */
+ if (priv->bh_thread.block_thread == 1) {
+ unifi_trace(priv, UDBG3, "unifi_run_bh: discard message.\n");
+ /*
+ * Do not try to acknowledge a pending interrupt here.
+ * This function is called by unifi_send_signal()
+ * which in turn can be running in an atomic or 'disabled irq'
+ * level if a signal is sent from a workqueue task
+ * (i.e multicass addresses set). We can not hold the SDIO lock
+ * because it might sleep.
+ */
+ return CSR_RESULT_FAILURE;
+ }
+
+ priv->bh_thread.wakeup_flag = 1;
+ /* wake up I/O thread */
+ wake_up_interruptible(&priv->bh_thread.wakeup_q);
+
+ return CSR_RESULT_SUCCESS;
} /* unifi_run_bh() */
diff --git a/drivers/staging/csr/csr_framework_ext.c b/drivers/staging/csr/csr_framework_ext.c
index 2aabb6c6b0af..98122bce1427 100644
--- a/drivers/staging/csr/csr_framework_ext.c
+++ b/drivers/staging/csr/csr_framework_ext.c
@@ -1,10 +1,10 @@
/*****************************************************************************
- (c) Cambridge Silicon Radio Limited 2010
- All rights reserved and confidential information of CSR
+ (c) Cambridge Silicon Radio Limited 2010
+ All rights reserved and confidential information of CSR
- Refer to LICENSE.txt included with this source for details
- on the license terms.
+ Refer to LICENSE.txt included with this source for details
+ on the license terms.
*****************************************************************************/
@@ -31,10 +31,10 @@
*----------------------------------------------------------------------------*/
void CsrThreadSleep(u16 sleepTimeInMs)
{
- unsigned long t;
+ unsigned long t;
- /* Convert t in ms to jiffies and round up */
- t = ((sleepTimeInMs * HZ) + 999) / 1000;
- schedule_timeout_uninterruptible(t);
+ /* Convert t in ms to jiffies and round up */
+ t = ((sleepTimeInMs * HZ) + 999) / 1000;
+ schedule_timeout_uninterruptible(t);
}
EXPORT_SYMBOL_GPL(CsrThreadSleep);
diff --git a/drivers/staging/csr/csr_framework_ext.h b/drivers/staging/csr/csr_framework_ext.h
index e8ae490c09d6..6d26ac6173b0 100644
--- a/drivers/staging/csr/csr_framework_ext.h
+++ b/drivers/staging/csr/csr_framework_ext.h
@@ -2,11 +2,11 @@
#define CSR_FRAMEWORK_EXT_H__
/*****************************************************************************
- (c) Cambridge Silicon Radio Limited 2010
- All rights reserved and confidential information of CSR
+ (c) Cambridge Silicon Radio Limited 2010
+ All rights reserved and confidential information of CSR
- Refer to LICENSE.txt included with this source for details
- on the license terms.
+ Refer to LICENSE.txt included with this source for details
+ on the license terms.
*****************************************************************************/
diff --git a/drivers/staging/csr/csr_wifi_nme_ap_sef.c b/drivers/staging/csr/csr_wifi_nme_ap_sef.c
index e048848883d5..bfebb1529779 100644
--- a/drivers/staging/csr/csr_wifi_nme_ap_sef.c
+++ b/drivers/staging/csr/csr_wifi_nme_ap_sef.c
@@ -21,10 +21,10 @@ void CsrWifiNmeApUpstreamStateHandlers(void* drvpriv, CsrWifiFsmEvent* msg)
CsrWifiNmeApStopCfmHandler(drvpriv, msg);
break;
case CSR_WIFI_NME_AP_CONFIG_SET_CFM:
- CsrWifiNmeApConfigSetCfmHandler(drvpriv,msg);
+ CsrWifiNmeApConfigSetCfmHandler(drvpriv, msg);
break;
default:
- unifi_error(drvpriv, "CsrWifiNmeApUpstreamStateHandlers: unhandled NME_AP message type 0x%.4X\n",msg->type);
+ unifi_error(drvpriv, "CsrWifiNmeApUpstreamStateHandlers: unhandled NME_AP message type 0x%.4X\n", msg->type);
break;
}
}
diff --git a/drivers/staging/csr/drv.c b/drivers/staging/csr/drv.c
index bdc25236ab00..92898de921f5 100644
--- a/drivers/staging/csr/drv.c
+++ b/drivers/staging/csr/drv.c
@@ -1159,13 +1159,13 @@ unifi_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
break;
#ifdef CSR_SUPPORT_SME
case UNIFI_CFG_CORE_DUMP:
- CsrWifiRouterCtrlWifiOffIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0,CSR_WIFI_SME_CONTROL_INDICATION_ERROR);
+ CsrWifiRouterCtrlWifiOffIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0, CSR_WIFI_SME_CONTROL_INDICATION_ERROR);
unifi_trace(priv, UDBG2, "UNIFI_CFG_CORE_DUMP: sent wifi off indication\n");
break;
#endif
#ifdef CSR_SUPPORT_WEXT_AP
case UNIFI_CFG_SET_AP_CONFIG:
- r= unifi_cfg_set_ap_config(priv,(unsigned char*)arg);
+ r= unifi_cfg_set_ap_config(priv, (unsigned char*)arg);
break;
#endif
default:
@@ -1275,7 +1275,7 @@ unifi_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
/* Attach the network device to the stack */
if (!interfacePriv->netdev_registered)
{
- r = uf_register_netdev(priv,interfaceTag);
+ r = uf_register_netdev(priv, interfaceTag);
if (r) {
unifi_error(priv, "Failed to register the network device.\n");
goto out;
diff --git a/drivers/staging/csr/io.c b/drivers/staging/csr/io.c
index fe4a7ba2acc9..f903022b4079 100644
--- a/drivers/staging/csr/io.c
+++ b/drivers/staging/csr/io.c
@@ -117,7 +117,7 @@ static CsrResult signal_buffer_init(unifi_priv_t * priv, int size)
if (priv->rxSignalBuffer.rx_buff[i].bufptr == NULL)
{
int j;
- unifi_error(priv,"signal_buffer_init:Failed to Allocate shared memory for T-H signals \n");
+ unifi_error(priv, "signal_buffer_init:Failed to Allocate shared memory for T-H signals \n");
for(j=0;j<i;j++)
{
priv->rxSignalBuffer.rx_buff[j].sig_len=0;
@@ -360,13 +360,13 @@ register_unifi_sdio(CsrSdioFunction *sdio_dev, int bus_id, struct device *dev)
for(i=1;i<CSR_WIFI_NUM_INTERFACES;i++)
{
- if( !uf_alloc_netdevice_for_other_interfaces(priv,i) )
+ if( !uf_alloc_netdevice_for_other_interfaces(priv, i) )
{
/* error occured while allocating the net_device for interface[i]. The net_device are
* allocated for the interfaces with id<i. Dont worry, all the allocated net_device will
* be releasing chen the control goes to the label failed0.
*/
- unifi_error(priv, "Failed to allocate driver private for interface[%d]\n",i);
+ unifi_error(priv, "Failed to allocate driver private for interface[%d]\n", i);
goto failed0;
}
else
@@ -391,12 +391,12 @@ register_unifi_sdio(CsrSdioFunction *sdio_dev, int bus_id, struct device *dev)
#ifdef CSR_WIFI_RX_PATH_SPLIT
if (signal_buffer_init(priv, CSR_WIFI_RX_SIGNAL_BUFFER_SIZE))
{
- unifi_error(priv,"Failed to allocate shared memory for T-H signals\n");
+ unifi_error(priv, "Failed to allocate shared memory for T-H signals\n");
goto failed2;
}
priv->rx_workqueue = create_singlethread_workqueue("rx_workq");
if (priv->rx_workqueue == NULL) {
- unifi_error(priv,"create_singlethread_workqueue failed \n");
+ unifi_error(priv, "create_singlethread_workqueue failed \n");
goto failed3;
}
INIT_WORK(&priv->rx_work_struct, rx_wq_handler);
@@ -442,7 +442,7 @@ if (log_hip_signals)
flush_workqueue(priv->rx_workqueue);
destroy_workqueue(priv->rx_workqueue);
failed3:
- signal_buffer_free(priv,CSR_WIFI_RX_SIGNAL_BUFFER_SIZE);
+ signal_buffer_free(priv, CSR_WIFI_RX_SIGNAL_BUFFER_SIZE);
failed2:
#endif
/* Remove the device nodes */
@@ -558,8 +558,8 @@ cleanup_unifi_sdio(unifi_priv_t *priv)
/* Free any packets left in the Rx queues */
for(i=0;i<CSR_WIFI_NUM_INTERFACES;i++)
{
- uf_free_pending_rx_packets(priv, UF_UNCONTROLLED_PORT_Q, broadcast_address,i);
- uf_free_pending_rx_packets(priv, UF_CONTROLLED_PORT_Q, broadcast_address,i);
+ uf_free_pending_rx_packets(priv, UF_UNCONTROLLED_PORT_Q, broadcast_address, i);
+ uf_free_pending_rx_packets(priv, UF_CONTROLLED_PORT_Q, broadcast_address, i);
}
/*
* We need to free the resources held by the core, which include tx skbs,
@@ -595,7 +595,7 @@ cleanup_unifi_sdio(unifi_priv_t *priv)
#ifdef CSR_WIFI_RX_PATH_SPLIT
flush_workqueue(priv->rx_workqueue);
destroy_workqueue(priv->rx_workqueue);
- signal_buffer_free(priv,CSR_WIFI_RX_SIGNAL_BUFFER_SIZE);
+ signal_buffer_free(priv, CSR_WIFI_RX_SIGNAL_BUFFER_SIZE);
#endif
/* Priv is freed as part of the net_device */
diff --git a/drivers/staging/csr/monitor.c b/drivers/staging/csr/monitor.c
index c8e20e4c6111..e11f6cba8266 100644
--- a/drivers/staging/csr/monitor.c
+++ b/drivers/staging/csr/monitor.c
@@ -188,7 +188,7 @@ netrx_radiotap(unifi_priv_t *priv,
skb->dev = dev;
- skb->mac_header = skb->data;
+ skb_reset_mac_header(skb);
skb->pkt_type = PACKET_OTHERHOST;
skb->protocol = __constant_htons(ETH_P_80211_RAW);
memset(skb->cb, 0, sizeof(skb->cb));
diff --git a/drivers/staging/csr/netdev.c b/drivers/staging/csr/netdev.c
index a0177d998978..5ead2d404115 100644
--- a/drivers/staging/csr/netdev.c
+++ b/drivers/staging/csr/netdev.c
@@ -754,7 +754,7 @@ get_packet_priority(unifi_priv_t *priv, struct sk_buff *skb, const struct ethhdr
case CSR_WIFI_ROUTER_CTRL_MODE_IBSS:
{
CsrWifiRouterCtrlStaInfo_t * dstStaInfo =
- CsrWifiRouterCtrlGetStationRecordFromPeerMacAddress(priv,ehdr->h_dest, interfacePriv->InterfaceTag);
+ CsrWifiRouterCtrlGetStationRecordFromPeerMacAddress(priv, ehdr->h_dest, interfacePriv->InterfaceTag);
unifi_trace(priv, UDBG4, "mode is AP \n");
if (!(ehdr->h_dest[0] & 0x01) && dstStaInfo && dstStaInfo->wmmOrQosEnabled) {
/* If packet is not Broadcast/multicast */
@@ -1011,7 +1011,7 @@ skb_80211_to_ether(unifi_priv_t *priv, struct sk_buff *skb,
#endif
if(skb== NULL || daddr == NULL || saddr == NULL){
- unifi_error(priv,"skb_80211_to_ether: PBC fail\n");
+ unifi_error(priv, "skb_80211_to_ether: PBC fail\n");
return 1;
}
@@ -1198,7 +1198,7 @@ int prepare_and_add_macheader(unifi_priv_t *priv, struct sk_buff *skb, struct sk
u8 bQosNull = false;
if (skb == NULL) {
- unifi_error(priv,"prepare_and_add_macheader: Invalid SKB reference\n");
+ unifi_error(priv, "prepare_and_add_macheader: Invalid SKB reference\n");
return -1;
}
@@ -1383,7 +1383,7 @@ int prepare_and_add_macheader(unifi_priv_t *priv, struct sk_buff *skb, struct sk
macHeaderLengthInBytes -= ETH_ALEN;
break;
default:
- unifi_error(priv,"Unknown direction =%d : Not handled now\n",direction);
+ unifi_error(priv, "Unknown direction =%d : Not handled now\n", direction);
return -1;
}
/* 2 bytes of frame control field, appended by firmware */
@@ -1569,8 +1569,8 @@ send_ma_pkt_request(unifi_priv_t *priv, struct sk_buff *skb, const struct ethhdr
memcpy(peerAddress.a, ((u8 *) bulkdata.d[0].os_data_ptr) + 4, ETH_ALEN);
unifi_trace(priv, UDBG5, "RA[0]=%x, RA[1]=%x, RA[2]=%x, RA[3]=%x, RA[4]=%x, RA[5]=%x\n",
- peerAddress.a[0],peerAddress.a[1], peerAddress.a[2], peerAddress.a[3],
- peerAddress.a[4],peerAddress.a[5]);
+ peerAddress.a[0], peerAddress.a[1], peerAddress.a[2], peerAddress.a[3],
+ peerAddress.a[4], peerAddress.a[5]);
if ((proto == ETH_P_PAE)
@@ -1865,10 +1865,10 @@ unifi_pause_xmit(void *ospriv, unifi_TrafficQueue queue)
#ifdef CSR_SUPPORT_SME
if(queue<=3) {
- routerStartBuffering(priv,queue);
- unifi_trace(priv,UDBG2,"Start buffering %d\n", queue);
+ routerStartBuffering(priv, queue);
+ unifi_trace(priv, UDBG2, "Start buffering %d\n", queue);
} else {
- routerStartBuffering(priv,0);
+ routerStartBuffering(priv, 0);
unifi_error(priv, "Start buffering %d defaulting to 0\n", queue);
}
#endif
@@ -1893,11 +1893,11 @@ unifi_restart_xmit(void *ospriv, unifi_TrafficQueue queue)
#ifdef CSR_SUPPORT_SME
if(queue <=3) {
- routerStopBuffering(priv,queue);
- uf_send_buffered_frames(priv,queue);
+ routerStopBuffering(priv, queue);
+ uf_send_buffered_frames(priv, queue);
} else {
- routerStopBuffering(priv,0);
- uf_send_buffered_frames(priv,0);
+ routerStopBuffering(priv, 0);
+ uf_send_buffered_frames(priv, 0);
}
#endif
} /* unifi_restart_xmit() */
@@ -2102,14 +2102,14 @@ uf_resume_data_plane(unifi_priv_t *priv, int queue,
netif_tx_schedule_all(priv->netdev[interfaceTag]);
}
#endif
- uf_process_rx_pending_queue(priv, queue, peer_address, 1,interfaceTag);
+ uf_process_rx_pending_queue(priv, queue, peer_address, 1, interfaceTag);
}
} /* uf_resume_data_plane() */
-void uf_free_pending_rx_packets(unifi_priv_t *priv, int queue, CsrWifiMacAddress peer_address,u16 interfaceTag)
+void uf_free_pending_rx_packets(unifi_priv_t *priv, int queue, CsrWifiMacAddress peer_address, u16 interfaceTag)
{
- uf_process_rx_pending_queue(priv, queue, peer_address, 0,interfaceTag);
+ uf_process_rx_pending_queue(priv, queue, peer_address, 0, interfaceTag);
} /* uf_free_pending_rx_packets() */
@@ -2153,7 +2153,7 @@ unifi_rx(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_data_param_t *bulkdata)
if (interfaceTag >= CSR_WIFI_NUM_INTERFACES)
{
unifi_error(priv, "%s: MA-PACKET indication with bad interfaceTag %d\n", __FUNCTION__, interfaceTag);
- unifi_net_data_free(priv,&bulkdata->d[0]);
+ unifi_net_data_free(priv, &bulkdata->d[0]);
return;
}
@@ -2167,7 +2167,7 @@ unifi_rx(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_data_param_t *bulkdata)
if (bulkdata->d[0].data_length == 0) {
unifi_warning(priv, "%s: MA-PACKET indication with zero bulk data\n", __FUNCTION__);
- unifi_net_data_free(priv,&bulkdata->d[0]);
+ unifi_net_data_free(priv, &bulkdata->d[0]);
return;
}
@@ -2179,8 +2179,8 @@ unifi_rx(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_data_param_t *bulkdata)
toDs = (skb->data[1] & 0x01) ? 1 : 0;
fromDs = (skb->data[1] & 0x02) ? 1 : 0;
- memcpy(da,(skb->data+4+toDs*12),ETH_ALEN);/* Address1 or 3 */
- memcpy(sa,(skb->data+10+fromDs*(6+toDs*8)),ETH_ALEN); /* Address2, 3 or 4 */
+ memcpy(da, (skb->data+4+toDs*12), ETH_ALEN);/* Address1 or 3 */
+ memcpy(sa, (skb->data+10+fromDs*(6+toDs*8)), ETH_ALEN); /* Address2, 3 or 4 */
pData = &bulkdata->d[0];
@@ -2189,7 +2189,7 @@ unifi_rx(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_data_param_t *bulkdata)
dataFrameType =((frameControl & 0x00f0) >> 4);
unifi_trace(priv, UDBG6,
- "%s: Receive Data Frame Type %d \n", __FUNCTION__,dataFrameType);
+ "%s: Receive Data Frame Type %d \n", __FUNCTION__, dataFrameType);
switch(dataFrameType)
{
@@ -2276,7 +2276,7 @@ unifi_rx(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_data_param_t *bulkdata)
/* AP/P2PGO specific handling here */
CsrWifiRouterCtrlStaInfo_t * srcStaInfo =
- CsrWifiRouterCtrlGetStationRecordFromPeerMacAddress(priv,sa,interfaceTag);
+ CsrWifiRouterCtrlGetStationRecordFromPeerMacAddress(priv, sa, interfaceTag);
/* Defensive check only; Source address is already checked in
process_ma_packet_ind and we should have a valid source address here */
@@ -2284,10 +2284,10 @@ unifi_rx(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_data_param_t *bulkdata)
if(srcStaInfo == NULL) {
CsrWifiMacAddress peerMacAddress;
/* Unknown data PDU */
- memcpy(peerMacAddress.a,sa,ETH_ALEN);
+ memcpy(peerMacAddress.a, sa, ETH_ALEN);
unifi_trace(priv, UDBG1, "%s: Unexpected frame from peer = %x:%x:%x:%x:%x:%x\n", __FUNCTION__,
- sa[0], sa[1],sa[2], sa[3], sa[4],sa[5]);
- CsrWifiRouterCtrlUnexpectedFrameIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0,interfaceTag,peerMacAddress);
+ sa[0], sa[1], sa[2], sa[3], sa[4], sa[5]);
+ CsrWifiRouterCtrlUnexpectedFrameIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0, interfaceTag, peerMacAddress);
unifi_net_data_free(priv, &bulkdata->d[0]);
return;
}
@@ -2296,11 +2296,11 @@ unifi_rx(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_data_param_t *bulkdata)
if (port_action != CSR_WIFI_ROUTER_CTRL_PORT_ACTION_8021X_PORT_OPEN) {
/* Drop the packet and return */
CsrWifiMacAddress peerMacAddress;
- memcpy(peerMacAddress.a,sa,ETH_ALEN);
+ memcpy(peerMacAddress.a, sa, ETH_ALEN);
unifi_trace(priv, UDBG3, "%s: Port is not open: unexpected frame from peer = %x:%x:%x:%x:%x:%x\n",
- __FUNCTION__, sa[0], sa[1],sa[2], sa[3], sa[4],sa[5]);
+ __FUNCTION__, sa[0], sa[1], sa[2], sa[3], sa[4], sa[5]);
- CsrWifiRouterCtrlUnexpectedFrameIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0,interfaceTag,peerMacAddress);
+ CsrWifiRouterCtrlUnexpectedFrameIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0, interfaceTag, peerMacAddress);
interfacePriv->stats.rx_dropped++;
unifi_net_data_free(priv, &bulkdata->d[0]);
unifi_notice(priv, "%s: Dropping packet, proto=0x%04x, %s port\n", __FUNCTION__,
@@ -2328,7 +2328,7 @@ unifi_rx(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_data_param_t *bulkdata)
{
return;
}
- unifi_trace(priv, UDBG5, "unifi_rx: no specific AP handling process as normal frame, MAC Header len %d\n",macHeaderLengthInBytes);
+ unifi_trace(priv, UDBG5, "unifi_rx: no specific AP handling process as normal frame, MAC Header len %d\n", macHeaderLengthInBytes);
/* Remove the MAC header for subsequent conversion */
skb_pull(skb, macHeaderLengthInBytes);
pData->os_data_ptr = skb->data;
@@ -2422,7 +2422,7 @@ static void process_ma_packet_cfm(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_d
if(interfacePriv->interfaceMode == CSR_WIFI_ROUTER_CTRL_MODE_AP ||
interfacePriv->interfaceMode == CSR_WIFI_ROUTER_CTRL_MODE_P2PGO) {
- uf_process_ma_pkt_cfm_for_ap(priv,interfaceTag,pkt_cfm);
+ uf_process_ma_pkt_cfm_for_ap(priv, interfaceTag, pkt_cfm);
} else if (interfacePriv->m4_sent && (pkt_cfm->HostTag == interfacePriv->m4_hostTag)) {
/* Check if this is a confirm for EAPOL M4 frame and we need to send transmistted ind*/
CsrResult result = pkt_cfm->TransmissionStatus == CSR_TX_SUCCESSFUL?CSR_RESULT_SUCCESS:CSR_RESULT_FAILURE;
@@ -2486,7 +2486,7 @@ static void process_ma_packet_ind(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_d
if (interfaceTag >= CSR_WIFI_NUM_INTERFACES)
{
unifi_error(priv, "%s: MA-PACKET indication with bad interfaceTag %d\n", __FUNCTION__, interfaceTag);
- unifi_net_data_free(priv,&bulkdata->d[0]);
+ unifi_net_data_free(priv, &bulkdata->d[0]);
return;
}
@@ -2500,7 +2500,7 @@ static void process_ma_packet_ind(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_d
if (bulkdata->d[0].data_length == 0) {
unifi_warning(priv, "%s: MA-PACKET indication with zero bulk data\n", __FUNCTION__);
- unifi_net_data_free(priv,&bulkdata->d[0]);
+ unifi_net_data_free(priv, &bulkdata->d[0]);
return;
}
/* For monitor mode we need to pass this indication to the registered application
@@ -2508,8 +2508,8 @@ static void process_ma_packet_ind(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_d
/* MIC failure is already taken care of so no need to send the PDUs which are not successfully received in non-monitor mode*/
if(pkt_ind->ReceptionStatus != CSR_RX_SUCCESS)
{
- unifi_warning(priv, "%s: MA-PACKET indication with status = %d\n",__FUNCTION__, pkt_ind->ReceptionStatus);
- unifi_net_data_free(priv,&bulkdata->d[0]);
+ unifi_warning(priv, "%s: MA-PACKET indication with status = %d\n", __FUNCTION__, pkt_ind->ReceptionStatus);
+ unifi_net_data_free(priv, &bulkdata->d[0]);
return;
}
@@ -2521,8 +2521,8 @@ static void process_ma_packet_ind(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_d
toDs = (skb->data[1] & 0x01) ? 1 : 0;
fromDs = (skb->data[1] & 0x02) ? 1 : 0;
- memcpy(da,(skb->data+4+toDs*12),ETH_ALEN);/* Address1 or 3 */
- memcpy(sa,(skb->data+10+fromDs*(6+toDs*8)),ETH_ALEN); /* Address2, 3 or 4 */
+ memcpy(da, (skb->data+4+toDs*12), ETH_ALEN);/* Address1 or 3 */
+ memcpy(sa, (skb->data+10+fromDs*(6+toDs*8)), ETH_ALEN); /* Address2, 3 or 4 */
/* Find the BSSID, which will be used to match the BA session */
if (toDs && fromDs)
@@ -2539,7 +2539,7 @@ static void process_ma_packet_ind(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_d
frameControl = CSR_GET_UINT16_FROM_LITTLE_ENDIAN(pData->os_data_ptr);
frameType = ((frameControl & 0x000C) >> 2);
- unifi_trace(priv, UDBG3, "Rx Frame Type: %d sn: %d\n",frameType,
+ unifi_trace(priv, UDBG3, "Rx Frame Type: %d sn: %d\n", frameType,
(le16_to_cpu(*((u16*)(bulkdata->d[0].os_data_ptr + IEEE802_11_SEQUENCE_CONTROL_OFFSET))) >> 4) & 0xfff);
if(frameType == IEEE802_11_FRAMETYPE_CONTROL){
#ifdef CSR_SUPPORT_SME
@@ -2550,18 +2550,18 @@ static void process_ma_packet_ind(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_d
u8 pmBit = (frameControl & 0x1000)?0x01:0x00;
unifi_trace(priv, UDBG6, "%s: Received PS-POLL Frame\n", __FUNCTION__);
- uf_process_ps_poll(priv,sa,da,pmBit,interfaceTag);
+ uf_process_ps_poll(priv, sa, da, pmBit, interfaceTag);
}
else {
unifi_warning(priv, "%s: Non PS-POLL control frame is received\n", __FUNCTION__);
}
#endif
- unifi_net_data_free(priv,&bulkdata->d[0]);
+ unifi_net_data_free(priv, &bulkdata->d[0]);
return;
}
if(frameType != IEEE802_11_FRAMETYPE_DATA) {
- unifi_warning(priv, "%s: Non control Non Data frame is received\n",__FUNCTION__);
- unifi_net_data_free(priv,&bulkdata->d[0]);
+ unifi_warning(priv, "%s: Non control Non Data frame is received\n", __FUNCTION__);
+ unifi_net_data_free(priv, &bulkdata->d[0]);
return;
}
@@ -2569,15 +2569,15 @@ static void process_ma_packet_ind(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_d
if((interfacePriv->interfaceMode == CSR_WIFI_ROUTER_CTRL_MODE_AP) ||
(interfacePriv->interfaceMode == CSR_WIFI_ROUTER_CTRL_MODE_P2PGO)){
- srcStaInfo = CsrWifiRouterCtrlGetStationRecordFromPeerMacAddress(priv,sa,interfaceTag);
+ srcStaInfo = CsrWifiRouterCtrlGetStationRecordFromPeerMacAddress(priv, sa, interfaceTag);
if(srcStaInfo == NULL) {
CsrWifiMacAddress peerMacAddress;
/* Unknown data PDU */
- memcpy(peerMacAddress.a,sa,ETH_ALEN);
+ memcpy(peerMacAddress.a, sa, ETH_ALEN);
unifi_trace(priv, UDBG1, "%s: Unexpected frame from peer = %x:%x:%x:%x:%x:%x\n", __FUNCTION__,
- sa[0], sa[1],sa[2], sa[3], sa[4],sa[5]);
- CsrWifiRouterCtrlUnexpectedFrameIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0,interfaceTag,peerMacAddress);
+ sa[0], sa[1], sa[2], sa[3], sa[4], sa[5]);
+ CsrWifiRouterCtrlUnexpectedFrameIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0, interfaceTag, peerMacAddress);
unifi_net_data_free(priv, &bulkdata->d[0]);
return;
}
@@ -2591,7 +2591,7 @@ static void process_ma_packet_ind(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_d
*/
pmBit = (frameControl & 0x1000)?0x01:0x00;
- powerSaveChanged = uf_process_pm_bit_for_peer(priv,srcStaInfo,pmBit,interfaceTag);
+ powerSaveChanged = uf_process_pm_bit_for_peer(priv, srcStaInfo, pmBit, interfaceTag);
/* Update station last activity time */
srcStaInfo->activity_flag = TRUE;
@@ -2616,8 +2616,8 @@ static void process_ma_packet_ind(unifi_priv_t *priv, CSR_SIGNAL *signal, bulk_d
else{
qosControl = CSR_GET_UINT16_FROM_LITTLE_ENDIAN(pData->os_data_ptr + 24);
}
- unifi_trace(priv, UDBG5, "%s: Check if U-APSD operations are triggered for qosControl: 0x%x\n",__FUNCTION__,qosControl);
- uf_process_wmm_deliver_ac_uapsd(priv,srcStaInfo,qosControl,interfaceTag);
+ unifi_trace(priv, UDBG5, "%s: Check if U-APSD operations are triggered for qosControl: 0x%x\n", __FUNCTION__, qosControl);
+ uf_process_wmm_deliver_ac_uapsd(priv, srcStaInfo, qosControl, interfaceTag);
}
}
}
@@ -2918,8 +2918,8 @@ uf_netdev_event(struct notifier_block *notif, unsigned long event, void* ptr) {
interfacePriv->connected = UnifiConnected;
interfacePriv->wait_netdev_change = FALSE;
/* Note: passing the broadcast address here will allow anyone to attempt to join our adhoc network */
- uf_process_rx_pending_queue(priv, UF_UNCONTROLLED_PORT_Q, broadcast_address, 1,interfacePriv->InterfaceTag);
- uf_process_rx_pending_queue(priv, UF_CONTROLLED_PORT_Q, broadcast_address, 1,interfacePriv->InterfaceTag);
+ uf_process_rx_pending_queue(priv, UF_UNCONTROLLED_PORT_Q, broadcast_address, 1, interfacePriv->InterfaceTag);
+ uf_process_rx_pending_queue(priv, UF_CONTROLLED_PORT_Q, broadcast_address, 1, interfacePriv->InterfaceTag);
}
break;
diff --git a/drivers/staging/csr/sdio_mmc.c b/drivers/staging/csr/sdio_mmc.c
index 30271d35af55..2b503c23efae 100644
--- a/drivers/staging/csr/sdio_mmc.c
+++ b/drivers/staging/csr/sdio_mmc.c
@@ -1135,8 +1135,8 @@ uf_glue_sdio_remove(struct sdio_func *func)
* them from the list passed in csr_sdio_register_driver().
*/
static const struct sdio_device_id unifi_ids[] = {
- { SDIO_DEVICE(SDIO_MANF_ID_CSR,SDIO_CARD_ID_UNIFI_3) },
- { SDIO_DEVICE(SDIO_MANF_ID_CSR,SDIO_CARD_ID_UNIFI_4) },
+ { SDIO_DEVICE(SDIO_MANF_ID_CSR, SDIO_CARD_ID_UNIFI_3) },
+ { SDIO_DEVICE(SDIO_MANF_ID_CSR, SDIO_CARD_ID_UNIFI_4) },
{ /* end: all zeroes */ },
};
diff --git a/drivers/staging/csr/sme_blocking.c b/drivers/staging/csr/sme_blocking.c
index d88ccd5bd428..0c6e21636e7f 100644
--- a/drivers/staging/csr/sme_blocking.c
+++ b/drivers/staging/csr/sme_blocking.c
@@ -1280,7 +1280,7 @@ int sme_sys_suspend(unifi_priv_t *priv)
return -EIO;
/* Suspend the SME, which MAY cause it to power down UniFi */
- CsrWifiRouterCtrlSuspendIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0, 0, priv->wol_suspend);
+ CsrWifiRouterCtrlSuspendIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0, 0, priv->wol_suspend);
r = sme_wait_for_reply(priv, UNIFI_SME_SYS_LONG_TIMEOUT);
if (r) {
/* No reply - forcibly power down in case the request wasn't processed */
@@ -1366,7 +1366,7 @@ int sme_sys_resume(unifi_priv_t *priv)
if (r)
return -EIO;
- CsrWifiRouterCtrlResumeIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0, priv->wol_suspend);
+ CsrWifiRouterCtrlResumeIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0, priv->wol_suspend);
r = sme_wait_for_reply(priv, UNIFI_SME_SYS_LONG_TIMEOUT);
if (r)
@@ -1377,7 +1377,7 @@ int sme_sys_resume(unifi_priv_t *priv)
}
#ifdef CSR_SUPPORT_WEXT_AP
-int sme_ap_stop(unifi_priv_t *priv,u16 interface_tag)
+int sme_ap_stop(unifi_priv_t *priv, u16 interface_tag)
{
int r;
@@ -1390,7 +1390,7 @@ int sme_ap_stop(unifi_priv_t *priv,u16 interface_tag)
if (r)
return -EIO;
- CsrWifiNmeApStopReqSend(0,interface_tag);
+ CsrWifiNmeApStopReqSend(0, interface_tag);
r = sme_wait_for_reply(priv, UNIFI_SME_MGT_SHORT_TIMEOUT);
if (r)
@@ -1403,12 +1403,12 @@ int sme_ap_stop(unifi_priv_t *priv,u16 interface_tag)
}
-int sme_ap_start(unifi_priv_t *priv,u16 interface_tag,
+int sme_ap_start(unifi_priv_t *priv, u16 interface_tag,
CsrWifiSmeApConfig_t * ap_config)
{
int r;
CsrWifiSmeApP2pGoConfig p2p_go_param;
- memset(&p2p_go_param,0,sizeof(CsrWifiSmeApP2pGoConfig));
+ memset(&p2p_go_param, 0, sizeof(CsrWifiSmeApP2pGoConfig));
if (priv->smepriv == NULL) {
unifi_error(priv, "sme_ap_start: invalid smepriv\n");
@@ -1419,10 +1419,10 @@ int sme_ap_start(unifi_priv_t *priv,u16 interface_tag,
if (r)
return -EIO;
- CsrWifiNmeApStartReqSend(0,interface_tag,CSR_WIFI_AP_TYPE_LEGACY,FALSE,
- ap_config->ssid,1,ap_config->channel,
- ap_config->credentials,ap_config->max_connections,
- p2p_go_param,FALSE);
+ CsrWifiNmeApStartReqSend(0, interface_tag, CSR_WIFI_AP_TYPE_LEGACY, FALSE,
+ ap_config->ssid, 1, ap_config->channel,
+ ap_config->credentials, ap_config->max_connections,
+ p2p_go_param, FALSE);
r = sme_wait_for_reply(priv, UNIFI_SME_MGT_SHORT_TIMEOUT);
if (r)
@@ -1440,7 +1440,7 @@ int sme_ap_config(unifi_priv_t *priv,
{
int r;
CsrWifiSmeApP2pGoConfig p2p_go_param;
- memset(&p2p_go_param,0,sizeof(CsrWifiSmeApP2pGoConfig));
+ memset(&p2p_go_param, 0, sizeof(CsrWifiSmeApP2pGoConfig));
if (priv->smepriv == NULL) {
unifi_error(priv, "sme_ap_config: invalid smepriv\n");
@@ -1451,7 +1451,7 @@ int sme_ap_config(unifi_priv_t *priv,
if (r)
return -EIO;
- CsrWifiNmeApConfigSetReqSend(0,*group_security_config,
+ CsrWifiNmeApConfigSetReqSend(0, *group_security_config,
*ap_mac_config);
r = sme_wait_for_reply(priv, UNIFI_SME_MGT_SHORT_TIMEOUT);
diff --git a/drivers/staging/csr/sme_native.c b/drivers/staging/csr/sme_native.c
index ca55249bde3e..d0b9be31e12c 100644
--- a/drivers/staging/csr/sme_native.c
+++ b/drivers/staging/csr/sme_native.c
@@ -55,7 +55,7 @@ uf_sme_deinit(unifi_priv_t *priv)
int sme_mgt_wifi_on(unifi_priv_t *priv)
{
- int r,i;
+ int r, i;
s32 csrResult;
if (priv == NULL) {
diff --git a/drivers/staging/csr/sme_sys.c b/drivers/staging/csr/sme_sys.c
index b1151a28d8e3..b5258d71d250 100644
--- a/drivers/staging/csr/sme_sys.c
+++ b/drivers/staging/csr/sme_sys.c
@@ -158,7 +158,7 @@ void CsrWifiRouterCtrlMediaStatusReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
unifi_error(priv, "CsrWifiRouterCtrlMediaStatusReqHandler: invalid interfaceTag\n");
return;
}
- unifi_trace(priv, UDBG3, "CsrWifiRouterCtrlMediaStatusReqHandler: Mode = %d req->mediaStatus = %d\n",interfacePriv->interfaceMode,req->mediaStatus);
+ unifi_trace(priv, UDBG3, "CsrWifiRouterCtrlMediaStatusReqHandler: Mode = %d req->mediaStatus = %d\n", interfacePriv->interfaceMode, req->mediaStatus);
if (interfacePriv->interfaceMode != CSR_WIFI_ROUTER_CTRL_MODE_AMP) {
bulk_data_desc_t bulk_data;
@@ -389,7 +389,7 @@ void CsrWifiRouterCtrlHipReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
unifi_error(priv,
"CsrWifiRouterCtrlHipReqHandler: Failed to send signal (0x%.4X - %u)\n",
*((u16*)signal_ptr), r);
- CsrWifiRouterCtrlWifiOffIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0,CSR_WIFI_SME_CONTROL_INDICATION_ERROR);
+ CsrWifiRouterCtrlWifiOffIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0, CSR_WIFI_SME_CONTROL_INDICATION_ERROR);
}
unifi_trace(priv, UDBG4, "CsrWifiRouterCtrlHipReqHandler: <----\n");
@@ -474,7 +474,7 @@ uf_send_gratuitous_arp(unifi_priv_t *priv, u16 interfaceTag)
r = ul_send_signal_unpacked(priv, &signal, &bulkdata);
if (r)
{
- unifi_error(priv, "CsrWifiSmeRoamCompleteIndHandler: failed to send QOS data null packet result: %d\n",r);
+ unifi_error(priv, "CsrWifiSmeRoamCompleteIndHandler: failed to send QOS data null packet result: %d\n", r);
unifi_net_data_free(priv, &bulkdata.d[0]);
return;
}
@@ -574,7 +574,7 @@ configure_data_port(unifi_priv_t *priv,
/* If port is closed, discard all the pending Rx packets */
if (port_action == CSR_WIFI_ROUTER_CTRL_PORT_ACTION_8021X_PORT_CLOSED_DISCARD) {
- uf_free_pending_rx_packets(priv, queue, *macAddress,interfaceTag);
+ uf_free_pending_rx_packets(priv, queue, *macAddress, interfaceTag);
}
}
} else {
@@ -645,7 +645,7 @@ configure_data_port(unifi_priv_t *priv,
* coming from the peer station.
*/
if (port_action == CSR_WIFI_ROUTER_CTRL_PORT_ACTION_8021X_PORT_CLOSED_DISCARD) {
- uf_free_pending_rx_packets(priv, queue, *macAddress,interfaceTag);
+ uf_free_pending_rx_packets(priv, queue, *macAddress, interfaceTag);
}
unifi_trace(priv, UDBG2,
@@ -712,7 +712,7 @@ void CsrWifiRouterCtrlPortConfigureReqHandler(void* drvpriv, CsrWifiFsmEvent* ms
configure_data_port(priv, req->controlledPortAction, (const CsrWifiMacAddress *)&req->macAddress,
UF_CONTROLLED_PORT_Q, req->interfaceTag);
- CsrWifiRouterCtrlPortConfigureCfmSend(msg->source,req->clientData,req->interfaceTag,
+ CsrWifiRouterCtrlPortConfigureCfmSend(msg->source, req->clientData, req->interfaceTag,
CSR_RESULT_SUCCESS, req->macAddress);
unifi_trace(priv, UDBG3, "leaving CsrWifiRouterCtrlPortConfigureReqHandler\n");
}
@@ -723,7 +723,7 @@ void CsrWifiRouterCtrlWifiOnReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
unifi_priv_t *priv = (unifi_priv_t*)drvpriv;
CsrWifiRouterCtrlVersions versions;
CsrWifiRouterCtrlWifiOnReq* req = (CsrWifiRouterCtrlWifiOnReq*)msg;
- int r,i;
+ int r, i;
CsrResult csrResult;
if (priv == NULL) {
@@ -963,7 +963,7 @@ void CsrWifiRouterCtrlWifiOffReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
}
wifi_off(priv);
- CsrWifiRouterCtrlWifiOffCfmSend(msg->source,req->clientData);
+ CsrWifiRouterCtrlWifiOffCfmSend(msg->source, req->clientData);
/* If this is called in response to closing the character device, the
* caller must use uf_sme_cancel_request() to terminate any pending SME
@@ -1239,7 +1239,7 @@ void CsrWifiRouterMaPacketSubscribeReqHandler(void* drvpriv, CsrWifiFsmEvent* ms
unifi_trace(priv, UDBG1,
"subscribe_req: encap=%d, handle=%d, result=%d\n",
req->encapsulation, i, result);
- CsrWifiRouterMaPacketSubscribeCfmSend(msg->source,req->interfaceTag, i, result, 0);
+ CsrWifiRouterMaPacketSubscribeCfmSend(msg->source, req->interfaceTag, i, result, 0);
}
@@ -1268,7 +1268,7 @@ void CsrWifiRouterMaPacketUnsubscribeReqHandler(void* drvpriv, CsrWifiFsmEvent*
unifi_trace(priv, UDBG1,
"unsubscribe_req: handle=%d, result=%d\n",
req->subscriptionHandle, result);
- CsrWifiRouterMaPacketUnsubscribeCfmSend(msg->source,req->interfaceTag, result);
+ CsrWifiRouterMaPacketUnsubscribeCfmSend(msg->source, req->interfaceTag, result);
}
@@ -1282,7 +1282,7 @@ void CsrWifiRouterCtrlCapabilitiesReqHandler(void* drvpriv, CsrWifiFsmEvent* msg
return;
}
- CsrWifiRouterCtrlCapabilitiesCfmSend(msg->source,req->clientData,
+ CsrWifiRouterCtrlCapabilitiesCfmSend(msg->source, req->clientData,
UNIFI_SOFT_COMMAND_Q_LENGTH - 1,
UNIFI_SOFT_TRAFFIC_Q_LENGTH - 1);
}
@@ -1404,7 +1404,7 @@ _sys_packet_req(unifi_priv_t *priv, const CSR_SIGNAL *signal,
if (r) {
unifi_error(priv,
"_sys_packet_req: failed to translate eth frame.\n");
- unifi_net_data_free(priv,&bulkdata.d[0]);
+ unifi_net_data_free(priv, &bulkdata.d[0]);
return r;
}
@@ -1439,7 +1439,7 @@ _sys_packet_req(unifi_priv_t *priv, const CSR_SIGNAL *signal,
#ifdef CSR_SUPPORT_SME
if ((protection = uf_get_protection_bit_from_interfacemode(priv, interfaceTag, peerMacAddress.a)) < 0) {
unifi_error(priv, "unicast address, but destination not in station record database\n");
- unifi_net_data_free(priv,&bulkdata.d[0]);
+ unifi_net_data_free(priv, &bulkdata.d[0]);
return -1;
}
#else
@@ -1453,7 +1453,7 @@ _sys_packet_req(unifi_priv_t *priv, const CSR_SIGNAL *signal,
/* add Mac header */
if (prepare_and_add_macheader(priv, skb, newSkb, req.Priority, &bulkdata, interfaceTag, frame, frame + ETH_ALEN, protection)) {
unifi_error(priv, "failed to create MAC header\n");
- unifi_net_data_free(priv,&bulkdata.d[0]);
+ unifi_net_data_free(priv, &bulkdata.d[0]);
return -1;
}
@@ -1479,7 +1479,7 @@ _sys_packet_req(unifi_priv_t *priv, const CSR_SIGNAL *signal,
if (r) {
unifi_error(priv,
"_sys_packet_req: failed to send signal.\n");
- unifi_net_data_free(priv,&bulkdata.d[0]);
+ unifi_net_data_free(priv, &bulkdata.d[0]);
return r;
}
/* The final CsrWifiRouterMaPacketCfmSend() will called when the actual MA-PACKET.cfm is received from the chip */
@@ -1558,7 +1558,7 @@ void CsrWifiRouterMaPacketReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
memcpy(req->Ra.x, daddr, ETH_ALEN);
req->Priority = mareq->priority;
req->TransmitRate = 0; /* Let firmware select the rate*/
- req->VirtualInterfaceIdentifier = uf_get_vif_identifier(interfacePriv->interfaceMode,interfaceTag);
+ req->VirtualInterfaceIdentifier = uf_get_vif_identifier(interfacePriv->interfaceMode, interfaceTag);
req->HostTag = mareq->hostTag;
if(mareq->cfmRequested)
@@ -1571,7 +1571,7 @@ void CsrWifiRouterMaPacketReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
if (r && mareq->cfmRequested)
{
- CsrWifiRouterMaPacketCfmSend(msg->source,interfaceTag,
+ CsrWifiRouterMaPacketCfmSend(msg->source, interfaceTag,
CSR_RESULT_FAILURE,
mareq->hostTag, 0);
}
@@ -1637,7 +1637,7 @@ void CsrWifiRouterCtrlM4TransmitReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
/* reset the station records when the mode is set as CSR_WIFI_ROUTER_CTRL_MODE_NONE */
static void CsrWifiRouterCtrlResetStationRecordList(unifi_priv_t *priv, u16 interfaceTag)
{
- u8 i,j;
+ u8 i, j;
CsrWifiRouterCtrlStaInfo_t *staInfo=NULL;
netInterface_priv_t *interfacePriv = priv->interfacePriv[interfaceTag];
unsigned long lock_flags;
@@ -1658,15 +1658,15 @@ static void CsrWifiRouterCtrlResetStationRecordList(unifi_priv_t *priv, u16 inte
uf_prepare_send_cfm_list_for_queued_pkts(priv,
&send_cfm_list,
&(staInfo->mgtFrames));
- uf_flush_list(priv,&(staInfo->mgtFrames));
+ uf_flush_list(priv, &(staInfo->mgtFrames));
for(j=0;j<MAX_ACCESS_CATOGORY;j++){
uf_prepare_send_cfm_list_for_queued_pkts(priv,
&send_cfm_list,
&(staInfo->dataPdu[j]));
- uf_flush_list(priv,&(staInfo->dataPdu[j]));
+ uf_flush_list(priv, &(staInfo->dataPdu[j]));
}
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
/* Removing station record information from port config array */
memset(staInfo->peerControlledPort, 0, sizeof(unifi_port_cfg_t));
staInfo->peerControlledPort->port_action = CSR_WIFI_ROUTER_CTRL_PORT_ACTION_8021X_PORT_CLOSED_DISCARD;
@@ -1680,7 +1680,7 @@ static void CsrWifiRouterCtrlResetStationRecordList(unifi_priv_t *priv, u16 inte
kfree(interfacePriv->staInfo[i]);
interfacePriv->staInfo[i] = NULL;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
}
}
/* after the critical region process the list of frames that requested cfm
@@ -1697,9 +1697,9 @@ static void CsrWifiRouterCtrlResetStationRecordList(unifi_priv_t *priv, u16 inte
case CSR_WIFI_ROUTER_CTRL_MODE_NONE:
if (priv->noOfPktQueuedInDriver) {
unifi_warning(priv, "After reset the noOfPktQueuedInDriver = %x\n", priv->noOfPktQueuedInDriver);
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
priv->noOfPktQueuedInDriver = 0;
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
}
break;
case CSR_WIFI_ROUTER_CTRL_MODE_IBSS:
@@ -1745,18 +1745,18 @@ void CsrWifiRouterCtrlInterfaceReset(unifi_priv_t *priv, u16 interfaceTag)
uf_prepare_send_cfm_list_for_queued_pkts(priv,
&send_cfm_list,
&(interfacePriv->genericMgtFrames));
- uf_flush_list(priv,&(interfacePriv->genericMgtFrames));
+ uf_flush_list(priv, &(interfacePriv->genericMgtFrames));
uf_prepare_send_cfm_list_for_queued_pkts(priv,
&send_cfm_list,
&(interfacePriv->genericMulticastOrBroadCastMgtFrames));
- uf_flush_list(priv,&(interfacePriv->genericMulticastOrBroadCastMgtFrames));
+ uf_flush_list(priv, &(interfacePriv->genericMulticastOrBroadCastMgtFrames));
uf_prepare_send_cfm_list_for_queued_pkts(priv,
&send_cfm_list,
&(interfacePriv->genericMulticastOrBroadCastFrames));
- uf_flush_list(priv,&(interfacePriv->genericMulticastOrBroadCastFrames));
+ uf_flush_list(priv, &(interfacePriv->genericMulticastOrBroadCastFrames));
/* process the list of frames that requested cfm
and send cfm to requestor one by one */
@@ -1772,7 +1772,7 @@ void CsrWifiRouterCtrlInterfaceReset(unifi_priv_t *priv, u16 interfaceTag)
/* station records not available in these modes */
break;
default:
- CsrWifiRouterCtrlResetStationRecordList(priv,interfaceTag);
+ CsrWifiRouterCtrlResetStationRecordList(priv, interfaceTag);
}
interfacePriv->num_stations_joined = 0;
@@ -1880,7 +1880,7 @@ void CsrWifiRouterCtrlModeSetReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
* other then CSR_WIFI_TIM_SET or CSR_WIFI_TIM_RESET value
*/
interfacePriv->bcTimSetReqQueued =0xFF;
- CsrWifiRouterCtrlInterfaceReset(priv,req->interfaceTag);
+ CsrWifiRouterCtrlInterfaceReset(priv, req->interfaceTag);
if(req->mode == CSR_WIFI_ROUTER_CTRL_MODE_AP ||
req->mode == CSR_WIFI_ROUTER_CTRL_MODE_P2PGO) {
@@ -1900,7 +1900,7 @@ void CsrWifiRouterCtrlModeSetReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
}
}
else {
- unifi_error(priv, "CsrWifiRouterCtrlModeSetReqHandler: invalid interfaceTag :%d\n",req->interfaceTag);
+ unifi_error(priv, "CsrWifiRouterCtrlModeSetReqHandler: invalid interfaceTag :%d\n", req->interfaceTag);
}
}
@@ -1941,15 +1941,15 @@ static int peer_delete_record(unifi_priv_t *priv, CsrWifiRouterCtrlPeerDelReq *r
&send_cfm_list,
&(staInfo->mgtFrames));
- uf_flush_list(priv,&(staInfo->mgtFrames));
+ uf_flush_list(priv, &(staInfo->mgtFrames));
for(j=0;j<MAX_ACCESS_CATOGORY;j++){
uf_prepare_send_cfm_list_for_queued_pkts(priv,
&send_cfm_list,
&(staInfo->dataPdu[j]));
- uf_flush_list(priv,&(staInfo->dataPdu[j]));
+ uf_flush_list(priv, &(staInfo->dataPdu[j]));
}
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
/* clear the port configure array info, for the corresponding peer entry */
controlledPort = &interfacePriv->controlled_data_port;
unControlledPort = &interfacePriv->uncontrolled_data_port;
@@ -1975,12 +1975,12 @@ static int peer_delete_record(unifi_priv_t *priv, CsrWifiRouterCtrlPeerDelReq *r
unifi_warning(priv, "number of uncontrolled port entries is zero, trying to decrement: debug\n");
}
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
/* update the TIM with zero */
if (interfacePriv->interfaceMode != CSR_WIFI_ROUTER_CTRL_MODE_IBSS &&
staInfo->timSet == CSR_WIFI_TIM_SET) {
unifi_trace(priv, UDBG3, "peer is deleted so TIM updated to 0, in firmware\n");
- update_tim(priv,staInfo->aid,0,req->interfaceTag, req->peerRecordHandle);
+ update_tim(priv, staInfo->aid, 0, req->interfaceTag, req->peerRecordHandle);
}
@@ -2021,7 +2021,7 @@ static int peer_delete_record(unifi_priv_t *priv, CsrWifiRouterCtrlPeerDelReq *r
cancel_work_sync(&staInfo->send_disconnected_ind_task);
#endif
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
#ifdef CSR_SUPPORT_SME
interfacePriv->num_stations_joined--;
@@ -2039,7 +2039,7 @@ static int peer_delete_record(unifi_priv_t *priv, CsrWifiRouterCtrlPeerDelReq *r
/* Free the station record for corresponding peer */
kfree(interfacePriv->staInfo[req->peerRecordHandle]);
interfacePriv->staInfo[req->peerRecordHandle] = NULL;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
/* after the critical region process the list of frames that requested cfm
and send cfm to requestor one by one */
@@ -2092,12 +2092,12 @@ void CsrWifiRouterCtrlPeerDelReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
break;
}
- CsrWifiRouterCtrlPeerDelCfmSend(msg->source,req->clientData,req->interfaceTag,status);
+ CsrWifiRouterCtrlPeerDelCfmSend(msg->source, req->clientData, req->interfaceTag, status);
unifi_trace(priv, UDBG2, "leaving CsrWifiRouterCtrlPeerDelReqHandler \n");
}
/* Add the new station to the station record data base */
-static int peer_add_new_record(unifi_priv_t *priv,CsrWifiRouterCtrlPeerAddReq *req,u32 *handle)
+static int peer_add_new_record(unifi_priv_t *priv, CsrWifiRouterCtrlPeerAddReq *req, u32 *handle)
{
u8 i, powerModeTemp = 0;
u8 freeSlotFound = FALSE;
@@ -2135,11 +2135,11 @@ static int peer_add_new_record(unifi_priv_t *priv,CsrWifiRouterCtrlPeerAddReq *r
req->staInfo.listenIntervalInTus);
/* disable the preemption until station record updated */
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
interfacePriv->staInfo[i] = newRecord;
/* Initialize the record*/
- memset(newRecord,0,sizeof(CsrWifiRouterCtrlStaInfo_t));
+ memset(newRecord, 0, sizeof(CsrWifiRouterCtrlStaInfo_t));
/* update the station record */
memcpy(newRecord->peerMacAddress.a, req->peerMacAddress.a, ETH_ALEN);
newRecord->wmmOrQosEnabled = req->staInfo.wmmOrQosEnabled;
@@ -2182,11 +2182,11 @@ static int peer_add_new_record(unifi_priv_t *priv,CsrWifiRouterCtrlPeerAddReq *r
u8 k;
for(k=0; k< MAX_ACCESS_CATOGORY ;k++)
unifi_trace(priv, UDBG2, "peer_add_new_record: WMM : %d ,AC %d, powersaveMode %x \n",
- req->staInfo.wmmOrQosEnabled,k,newRecord->powersaveMode[k]);
+ req->staInfo.wmmOrQosEnabled, k, newRecord->powersaveMode[k]);
}
unifi_trace(priv, UDBG3, "newRecord->wmmOrQosEnabled : %d , MAX SP : %d\n",
- newRecord->wmmOrQosEnabled,newRecord->maxSpLength);
+ newRecord->wmmOrQosEnabled, newRecord->maxSpLength);
/* Initialize the mgtFrames & data Pdu list */
{
@@ -2201,7 +2201,7 @@ static int peer_add_new_record(unifi_priv_t *priv,CsrWifiRouterCtrlPeerAddReq *r
newRecord->activity_flag = TRUE;
/* enable the preemption as station record updated */
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
/* First time port actions are set for the peer with below information */
configure_data_port(priv, CSR_WIFI_ROUTER_CTRL_PORT_ACTION_8021X_PORT_OPEN, &newRecord->peerMacAddress,
@@ -2216,7 +2216,7 @@ static int peer_add_new_record(unifi_priv_t *priv,CsrWifiRouterCtrlPeerAddReq *r
}
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
/* Port status must be already set before calling the Add Peer request */
newRecord->peerControlledPort = uf_sme_port_config_handle(priv, newRecord->peerMacAddress.a,
UF_CONTROLLED_PORT_Q, req->interfaceTag);
@@ -2228,7 +2228,7 @@ static int peer_add_new_record(unifi_priv_t *priv,CsrWifiRouterCtrlPeerAddReq *r
unifi_warning(priv, "Un/ControlledPort record not found in port configuration array index = %d\n", i);
kfree(interfacePriv->staInfo[i]);
interfacePriv->staInfo[i] = NULL;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
return CSR_RESULT_FAILURE;
}
@@ -2279,7 +2279,7 @@ static int peer_add_new_record(unifi_priv_t *priv,CsrWifiRouterCtrlPeerAddReq *r
}
#endif
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
break;
}
}
@@ -2446,7 +2446,7 @@ void uf_send_disconnected_ind_wq(struct work_struct *work)
&send_cfm_list,
&(staInfo->dataPdu[j]));
- uf_flush_list(priv,&(staInfo->dataPdu[j]));
+ uf_flush_list(priv, &(staInfo->dataPdu[j]));
}
send_auto_ma_packet_confirm(priv, staInfo->interfacePriv, &send_cfm_list);
@@ -2471,7 +2471,7 @@ void uf_send_disconnected_ind_wq(struct work_struct *work)
#endif
-void CsrWifiRouterCtrlPeerAddReqHandler(void* drvpriv,CsrWifiFsmEvent* msg)
+void CsrWifiRouterCtrlPeerAddReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
{
CsrWifiRouterCtrlPeerAddReq* req = (CsrWifiRouterCtrlPeerAddReq*)msg;
CsrResult status = CSR_RESULT_SUCCESS;
@@ -2500,7 +2500,7 @@ void CsrWifiRouterCtrlPeerAddReqHandler(void* drvpriv,CsrWifiFsmEvent* msg)
case CSR_WIFI_ROUTER_CTRL_MODE_IBSS:
case CSR_WIFI_ROUTER_CTRL_MODE_P2PGO:
/* Add station record */
- status = peer_add_new_record(priv,req,&handle);
+ status = peer_add_new_record(priv, req, &handle);
break;
case CSR_WIFI_ROUTER_CTRL_MODE_STA:
case CSR_WIFI_ROUTER_CTRL_MODE_P2PCLI:
@@ -2509,11 +2509,11 @@ void CsrWifiRouterCtrlPeerAddReqHandler(void* drvpriv,CsrWifiFsmEvent* msg)
break;
}
- CsrWifiRouterCtrlPeerAddCfmSend(msg->source,req->clientData,req->interfaceTag,req->peerMacAddress,handle,status);
+ CsrWifiRouterCtrlPeerAddCfmSend(msg->source, req->clientData, req->interfaceTag, req->peerMacAddress, handle, status);
unifi_trace(priv, UDBG2, "leaving CsrWifiRouterCtrlPeerAddReqHandler \n");
}
-void CsrWifiRouterCtrlPeerUpdateReqHandler(void* drvpriv,CsrWifiFsmEvent* msg)
+void CsrWifiRouterCtrlPeerUpdateReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
{
CsrWifiRouterCtrlPeerUpdateReq* req = (CsrWifiRouterCtrlPeerUpdateReq*)msg;
CsrResult status = CSR_RESULT_SUCCESS;
@@ -2526,7 +2526,7 @@ void CsrWifiRouterCtrlPeerUpdateReqHandler(void* drvpriv,CsrWifiFsmEvent* msg)
return;
}
- CsrWifiRouterCtrlPeerUpdateCfmSend(msg->source,req->clientData,req->interfaceTag,status);
+ CsrWifiRouterCtrlPeerUpdateCfmSend(msg->source, req->clientData, req->interfaceTag, status);
unifi_trace(priv, UDBG2, "leaving CsrWifiRouterCtrlPeerUpdateReqHandler \n");
}
@@ -2986,13 +2986,13 @@ void CsrWifiRouterCtrlWapiMulticastFilterReqHandler(void* drvpriv, CsrWifiFsmEve
unifi_trace(priv, UDBG6, "<<%s\n", __FUNCTION__);
} else {
- unifi_warning(priv, "%s is NOT applicable for interface mode - %d\n", __FUNCTION__,interfacePriv->interfaceMode);
+ unifi_warning(priv, "%s is NOT applicable for interface mode - %d\n", __FUNCTION__, interfacePriv->interfaceMode);
}
#elif defined(UNIFI_DEBUG)
/*WAPI Disabled*/
unifi_priv_t *priv = (unifi_priv_t*)drvpriv;
- unifi_error(priv,"CsrWifiRouterCtrlWapiMulticastFilterReqHandler: called when WAPI isn't enabled\n");
+ unifi_error(priv, "CsrWifiRouterCtrlWapiMulticastFilterReqHandler: called when WAPI isn't enabled\n");
#endif
}
@@ -3022,13 +3022,13 @@ void CsrWifiRouterCtrlWapiUnicastFilterReqHandler(void* drvpriv, CsrWifiFsmEvent
unifi_trace(priv, UDBG6, "<<%s\n", __FUNCTION__);
} else {
- unifi_warning(priv, "%s is NOT applicable for interface mode - %d\n", __FUNCTION__,interfacePriv->interfaceMode);
+ unifi_warning(priv, "%s is NOT applicable for interface mode - %d\n", __FUNCTION__, interfacePriv->interfaceMode);
}
#elif defined(UNIFI_DEBUG)
/*WAPI Disabled*/
unifi_priv_t *priv = (unifi_priv_t*)drvpriv;
- unifi_error(priv,"CsrWifiRouterCtrlWapiUnicastFilterReqHandler: called when WAPI isn't enabled\n");
+ unifi_error(priv, "CsrWifiRouterCtrlWapiUnicastFilterReqHandler: called when WAPI isn't enabled\n");
#endif
}
@@ -3064,13 +3064,13 @@ void CsrWifiRouterCtrlWapiRxPktReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
if (req->dataLength == 0 || req->data == NULL) {
- unifi_error(priv, "CsrWifiRouterCtrlWapiRxPktReq: invalid request\n",__FUNCTION__);
+ unifi_error(priv, "CsrWifiRouterCtrlWapiRxPktReq: invalid request\n", __FUNCTION__);
return;
}
res = unifi_net_data_malloc(priv, &bulkdata.d[0], req->dataLength);
if (res != CSR_RESULT_SUCCESS) {
- unifi_error(priv, "CsrWifiRouterCtrlWapiRxPktReq: Could not allocate net data\n",__FUNCTION__);
+ unifi_error(priv, "CsrWifiRouterCtrlWapiRxPktReq: Could not allocate net data\n", __FUNCTION__);
return;
}
@@ -3078,15 +3078,15 @@ void CsrWifiRouterCtrlWapiRxPktReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
* So reset the reception status to rx_success */
res = read_unpack_signal(req->signal, &signal);
if (res) {
- unifi_error(priv,"CsrWifiRouterCtrlWapiRxPktReqHandler: Received unknown or corrupted signal.\n");
+ unifi_error(priv, "CsrWifiRouterCtrlWapiRxPktReqHandler: Received unknown or corrupted signal.\n");
return;
}
pkt_ind = (CSR_MA_PACKET_INDICATION*) (&((&signal)->u).MaPacketIndication);
if (pkt_ind->ReceptionStatus != CSR_MICHAEL_MIC_ERROR) {
- unifi_error(priv,"CsrWifiRouterCtrlWapiRxPktReqHandler: Unknown signal with reception status = %d\n",pkt_ind->ReceptionStatus);
+ unifi_error(priv, "CsrWifiRouterCtrlWapiRxPktReqHandler: Unknown signal with reception status = %d\n", pkt_ind->ReceptionStatus);
return;
} else {
- unifi_trace(priv, UDBG4,"CsrWifiRouterCtrlWapiRxPktReqHandler: MIC verified , RX_SUCCESS \n",__FUNCTION__);
+ unifi_trace(priv, UDBG4, "CsrWifiRouterCtrlWapiRxPktReqHandler: MIC verified , RX_SUCCESS \n", __FUNCTION__);
pkt_ind->ReceptionStatus = CSR_RX_SUCCESS;
write_pack(&signal, req->signal, &(req->signalLength));
}
@@ -3113,12 +3113,12 @@ void CsrWifiRouterCtrlWapiRxPktReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
unifi_trace(priv, UDBG6, "<<%s\n", __FUNCTION__);
} else {
- unifi_warning(priv, "%s is NOT applicable for interface mode - %d\n", __FUNCTION__,interfacePriv->interfaceMode);
+ unifi_warning(priv, "%s is NOT applicable for interface mode - %d\n", __FUNCTION__, interfacePriv->interfaceMode);
}
#elif defined(UNIFI_DEBUG)
/*WAPI Disabled*/
unifi_priv_t *priv = (unifi_priv_t*)drvpriv;
- unifi_error(priv,"CsrWifiRouterCtrlWapiRxPktReqHandler: called when WAPI isn't enabled\n");
+ unifi_error(priv, "CsrWifiRouterCtrlWapiRxPktReqHandler: called when WAPI isn't enabled\n");
#endif
}
@@ -3142,15 +3142,15 @@ void CsrWifiRouterCtrlWapiUnicastTxPktReqHandler(void* drvpriv, CsrWifiFsmEvent*
unifi_trace(priv, UDBG6, ">>%s\n", __FUNCTION__);
if (priv == NULL) {
- unifi_error(priv, "CsrWifiRouterCtrlWapiUnicastTxPktReqHandler : invalid priv\n",__FUNCTION__);
+ unifi_error(priv, "CsrWifiRouterCtrlWapiUnicastTxPktReqHandler : invalid priv\n", __FUNCTION__);
return;
}
if (priv->smepriv == NULL) {
- unifi_error(priv, "CsrWifiRouterCtrlWapiUnicastTxPktReqHandler : invalid sme priv\n",__FUNCTION__);
+ unifi_error(priv, "CsrWifiRouterCtrlWapiUnicastTxPktReqHandler : invalid sme priv\n", __FUNCTION__);
return;
}
if (req->data == NULL) {
- unifi_error(priv, "CsrWifiRouterCtrlWapiUnicastTxPktReqHandler: invalid request\n",__FUNCTION__);
+ unifi_error(priv, "CsrWifiRouterCtrlWapiUnicastTxPktReqHandler: invalid request\n", __FUNCTION__);
return;
} else {
/* If it is QoS data (type = data subtype = QoS), frame header contains QoS control field */
@@ -3159,7 +3159,7 @@ void CsrWifiRouterCtrlWapiUnicastTxPktReqHandler(void* drvpriv, CsrWifiFsmEvent*
}
}
if ( !(req->dataLength>(macHeaderLengthInBytes+appendedCryptoFields)) ) {
- unifi_error(priv, "CsrWifiRouterCtrlWapiUnicastTxPktReqHandler: invalid dataLength\n",__FUNCTION__);
+ unifi_error(priv, "CsrWifiRouterCtrlWapiUnicastTxPktReqHandler: invalid dataLength\n", __FUNCTION__);
return;
}
@@ -3174,7 +3174,7 @@ void CsrWifiRouterCtrlWapiUnicastTxPktReqHandler(void* drvpriv, CsrWifiFsmEvent*
*/
result = unifi_net_data_malloc(priv, &bulkdata.d[0], req->dataLength);
if (result != CSR_RESULT_SUCCESS) {
- unifi_error(priv, "CsrWifiRouterCtrlWapiUnicastTxPktReqHandler: Could not allocate net data\n",__FUNCTION__);
+ unifi_error(priv, "CsrWifiRouterCtrlWapiUnicastTxPktReqHandler: Could not allocate net data\n", __FUNCTION__);
return;
}
memcpy((void*)bulkdata.d[0].os_data_ptr, req->data, req->dataLength);
@@ -3217,13 +3217,13 @@ void CsrWifiRouterCtrlWapiUnicastTxPktReqHandler(void* drvpriv, CsrWifiFsmEvent*
} else {
- unifi_warning(priv, "%s is NOT applicable for interface mode - %d\n", __FUNCTION__,interfacePriv->interfaceMode);
+ unifi_warning(priv, "%s is NOT applicable for interface mode - %d\n", __FUNCTION__, interfacePriv->interfaceMode);
}
#elif defined(UNIFI_DEBUG)
/*WAPI Disabled*/
unifi_priv_t *priv = (unifi_priv_t*)drvpriv;
- unifi_error(priv,"CsrWifiRouterCtrlWapiUnicastTxPktReqHandler: called when WAPI SW ENCRYPTION isn't enabled\n");
+ unifi_error(priv, "CsrWifiRouterCtrlWapiUnicastTxPktReqHandler: called when WAPI SW ENCRYPTION isn't enabled\n");
#endif
}
@@ -3240,14 +3240,14 @@ void CsrWifiRouterCtrlWapiFilterReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
unifi_trace(priv, UDBG6, ">>%s\n", __FUNCTION__);
- unifi_trace(priv, UDBG1, "CsrWifiRouterCtrlWapiFilterReq: req->isWapiConnected [0/1] = %d \n",req->isWapiConnected);
+ unifi_trace(priv, UDBG1, "CsrWifiRouterCtrlWapiFilterReq: req->isWapiConnected [0/1] = %d \n", req->isWapiConnected);
priv->isWapiConnection = req->isWapiConnected;
unifi_trace(priv, UDBG6, "<<%s\n", __FUNCTION__);
} else {
- unifi_warning(priv, "%s is NOT applicable for interface mode - %d\n", __FUNCTION__,interfacePriv->interfaceMode);
+ unifi_warning(priv, "%s is NOT applicable for interface mode - %d\n", __FUNCTION__, interfacePriv->interfaceMode);
}
#endif
@@ -3255,6 +3255,6 @@ void CsrWifiRouterCtrlWapiFilterReqHandler(void* drvpriv, CsrWifiFsmEvent* msg)
#elif defined(UNIFI_DEBUG)
/*WAPI Disabled*/
unifi_priv_t *priv = (unifi_priv_t*)drvpriv;
- unifi_error(priv,"CsrWifiRouterCtrlWapiFilterReq: called when WAPI isn't enabled\n");
+ unifi_error(priv, "CsrWifiRouterCtrlWapiFilterReq: called when WAPI isn't enabled\n");
#endif
}
diff --git a/drivers/staging/csr/sme_userspace.c b/drivers/staging/csr/sme_userspace.c
index abcb446fb8c0..b919b001ef7c 100644
--- a/drivers/staging/csr/sme_userspace.c
+++ b/drivers/staging/csr/sme_userspace.c
@@ -118,7 +118,7 @@ uf_sme_init(unifi_priv_t *priv)
void
uf_sme_deinit(unifi_priv_t *priv)
{
- int i,j;
+ int i, j;
u8 ba_session_idx;
ba_session_rx_struct *ba_session_rx = NULL;
ba_session_tx_struct *ba_session_tx = NULL;
@@ -224,7 +224,7 @@ unifi_ta_indicate_protocol(void *ospriv,
if (CSR_WIFI_ROUTER_CTRL_PROTOCOL_DIRECTION_RX == direction)
{
u16 interfaceTag = 0;
- CsrWifiRouterCtrlTrafficProtocolIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0,
+ CsrWifiRouterCtrlTrafficProtocolIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0,
interfaceTag,
packet_type,
direction,
diff --git a/drivers/staging/csr/sme_wext.c b/drivers/staging/csr/sme_wext.c
index 4129a6436b76..84f11cb53596 100644
--- a/drivers/staging/csr/sme_wext.c
+++ b/drivers/staging/csr/sme_wext.c
@@ -120,7 +120,7 @@ channel_to_mhz(int ch, int dot11a)
#ifdef CSR_SUPPORT_WEXT_AP
void uf_sme_wext_ap_set_defaults(unifi_priv_t *priv)
{
- memcpy(priv->ap_config.ssid.ssid,"defaultssid",sizeof("defaultssid"));
+ memcpy(priv->ap_config.ssid.ssid, "defaultssid", sizeof("defaultssid"));
priv->ap_config.ssid.length = 8;
priv->ap_config.channel = 6;
@@ -202,7 +202,7 @@ void uf_sme_wext_ap_set_defaults(unifi_priv_t *priv)
to enable different types of
devices to join us */
priv->ap_mac_config.supportedRatesCount =
- uf_configure_supported_rates(priv->ap_mac_config.supportedRates,priv->ap_mac_config.phySupportedBitmap);
+ uf_configure_supported_rates(priv->ap_mac_config.supportedRates, priv->ap_mac_config.phySupportedBitmap);
}
#endif
/*
@@ -459,7 +459,7 @@ static int decode_parameter_from_string(unifi_priv_t* priv, char **str_ptr,
{
u8 int_str[7] = "0";
u32 param_str_len;
- u8 *param_str_begin,*param_str_end;
+ u8 *param_str_begin, *param_str_end;
u8 *orig_str = *str_ptr;
if (!strncmp(*str_ptr, token, strlen(token))) {
@@ -472,41 +472,41 @@ static int decode_parameter_from_string(unifi_priv_t* priv, char **str_ptr,
param_str_end = *str_ptr-1;
param_str_len = param_str_end - param_str_begin;
}
- unifi_trace(priv,UDBG2,"'token:%s', len:%d, ", token, param_str_len);
+ unifi_trace(priv, UDBG2, "'token:%s', len:%d, ", token, param_str_len);
if (param_str_len > param_max_len) {
- unifi_notice(priv,"extracted param len:%d is > MAX:%d\n",param_str_len, param_max_len);
+ unifi_notice(priv, "extracted param len:%d is > MAX:%d\n", param_str_len, param_max_len);
param_str_len = param_max_len;
}
switch (param_type) {
case PARAM_TYPE_INT:
{
- u32 *pdst_int = dst,num =0;
- int i,j=0;
+ u32 *pdst_int = dst, num =0;
+ int i, j=0;
if (param_str_len > sizeof(int_str)) {
param_str_len = sizeof(int_str);
}
memcpy(int_str, param_str_begin, param_str_len);
for(i = param_str_len; i>0;i--) {
if(int_str[i-1] >= '0' && int_str[i-1] <='9') {
- num += ((int_str[i-1]-'0')*power(10,j));
+ num += ((int_str[i-1]-'0')*power(10, j));
j++;
} else {
- unifi_error(priv,"decode_parameter_from_string:not a number %c\n",(int_str[i-1]));
+ unifi_error(priv, "decode_parameter_from_string:not a number %c\n", (int_str[i-1]));
return -1;
}
}
*pdst_int = num;
- unifi_trace(priv,UDBG2,"decode_parameter_from_string:decoded int = %d\n",*pdst_int);
+ unifi_trace(priv, UDBG2, "decode_parameter_from_string:decoded int = %d\n", *pdst_int);
}
break;
default:
memcpy(dst, param_str_begin, param_str_len);
*((char *)dst + param_str_len) = 0;
- unifi_trace(priv,UDBG2,"decode_parameter_from_string:decoded string = %s\n",(char *)dst);
+ unifi_trace(priv, UDBG2, "decode_parameter_from_string:decoded string = %s\n", (char *)dst);
break;
}
} else {
- unifi_error(priv,"decode_parameter_from_string: Token:%s not found in %s \n",token,orig_str);
+ unifi_error(priv, "decode_parameter_from_string: Token:%s not found in %s \n", token, orig_str);
return -1;
}
return 0;
@@ -514,7 +514,7 @@ static int decode_parameter_from_string(unifi_priv_t* priv, char **str_ptr,
static int store_ap_advanced_config_from_string(unifi_priv_t *priv, char *param_str)
{
char * str_ptr=param_str;
- int ret = 0,tmp_var;
+ int ret = 0, tmp_var;
char phy_mode[6];
CsrWifiSmeApMacConfig * ap_mac_config = &priv->ap_mac_config;
@@ -522,36 +522,36 @@ static int store_ap_advanced_config_from_string(unifi_priv_t *priv, char *param_
ret = decode_parameter_from_string(priv, &str_ptr, "BI=",
PARAM_TYPE_INT, &tmp_var, 5);
if(ret) {
- unifi_error(priv,"store_ap_advanced_config_from_string: BI not found\n");
+ unifi_error(priv, "store_ap_advanced_config_from_string: BI not found\n");
return -1;
}
ap_mac_config->beaconInterval = tmp_var;
ret = decode_parameter_from_string(priv, &str_ptr, "DTIM_PER=",
PARAM_TYPE_INT, &tmp_var, 5);
if(ret) {
- unifi_error(priv,"store_ap_advanced_config_from_string: DTIM_PER not found\n");
+ unifi_error(priv, "store_ap_advanced_config_from_string: DTIM_PER not found\n");
return -1;
}
ap_mac_config->dtimPeriod = tmp_var;
ret = decode_parameter_from_string(priv, &str_ptr, "WMM=",
PARAM_TYPE_INT, &tmp_var, 5);
if(ret) {
- unifi_error(priv,"store_ap_advanced_config_from_string: WMM not found\n");
+ unifi_error(priv, "store_ap_advanced_config_from_string: WMM not found\n");
return -1;
}
ap_mac_config->wmmEnabled = tmp_var;
ret = decode_parameter_from_string(priv, &str_ptr, "PHY=",
PARAM_TYPE_STRING, phy_mode, 5);
if(ret) {
- unifi_error(priv,"store_ap_advanced_config_from_string: PHY not found\n");
+ unifi_error(priv, "store_ap_advanced_config_from_string: PHY not found\n");
} else {
- if(strstr(phy_mode,"b")){
+ if(strstr(phy_mode, "b")){
ap_mac_config->phySupportedBitmap = CSR_WIFI_SME_AP_PHY_SUPPORT_B;
}
- if(strstr(phy_mode,"g")) {
+ if(strstr(phy_mode, "g")) {
ap_mac_config->phySupportedBitmap |= CSR_WIFI_SME_AP_PHY_SUPPORT_G;
}
- if(strstr(phy_mode,"n")) {
+ if(strstr(phy_mode, "n")) {
ap_mac_config->phySupportedBitmap |= CSR_WIFI_SME_AP_PHY_SUPPORT_N;
}
ap_mac_config->supportedRatesCount =
@@ -560,39 +560,39 @@ static int store_ap_advanced_config_from_string(unifi_priv_t *priv, char *param_
return ret;
}
-static int store_ap_config_from_string( unifi_priv_t * priv,char *param_str)
+static int store_ap_config_from_string( unifi_priv_t * priv, char *param_str)
{
char *str_ptr = param_str;
char sub_cmd[16];
char sec[CSR_WIFI_MAX_SEC_LEN];
char key[CSR_WIFI_MAX_KEY_LEN];
- int ret = 0,tmp_var;
+ int ret = 0, tmp_var;
CsrWifiSmeApConfig_t *ap_config = &priv->ap_config;
CsrWifiSmeApMacConfig * ap_mac_config = &priv->ap_mac_config;
memset(sub_cmd, 0, sizeof(sub_cmd));
- if(!strstr(param_str,"END")) {
- unifi_error(priv,"store_ap_config_from_string:Invalid config string:%s\n",param_str);
+ if(!strstr(param_str, "END")) {
+ unifi_error(priv, "store_ap_config_from_string:Invalid config string:%s\n", param_str);
return -1;
}
- if (decode_parameter_from_string(priv,&str_ptr, "ASCII_CMD=",
+ if (decode_parameter_from_string(priv, &str_ptr, "ASCII_CMD=",
PARAM_TYPE_STRING, sub_cmd, 6) != 0) {
return -1;
}
if (strncmp(sub_cmd, "AP_CFG", 6)) {
- if(!strncmp(sub_cmd ,"ADVCFG", 6)) {
+ if(!strncmp(sub_cmd , "ADVCFG", 6)) {
return store_ap_advanced_config_from_string(priv, str_ptr);
}
- unifi_error(priv,"store_ap_config_from_string: sub_cmd:%s != 'AP_CFG or ADVCFG'!\n", sub_cmd);
+ unifi_error(priv, "store_ap_config_from_string: sub_cmd:%s != 'AP_CFG or ADVCFG'!\n", sub_cmd);
return -1;
}
memset(ap_config, 0, sizeof(CsrWifiSmeApConfig_t));
- ret = decode_parameter_from_string(priv,&str_ptr, "SSID=",
+ ret = decode_parameter_from_string(priv, &str_ptr, "SSID=",
PARAM_TYPE_STRING, ap_config->ssid.ssid,
CSR_WIFI_MAX_SSID_LEN);
if(ret) {
- unifi_error(priv,"store_ap_config_from_string: SSID not found\n");
+ unifi_error(priv, "store_ap_config_from_string: SSID not found\n");
return -1;
}
ap_config->ssid.length = strlen(ap_config->ssid.ssid);
@@ -600,27 +600,27 @@ static int store_ap_config_from_string( unifi_priv_t * priv,char *param_str)
ret = decode_parameter_from_string(priv, &str_ptr, "SEC=",
PARAM_TYPE_STRING, sec, CSR_WIFI_MAX_SEC_LEN);
if(ret) {
- unifi_error(priv,"store_ap_config_from_string: SEC not found\n");
+ unifi_error(priv, "store_ap_config_from_string: SEC not found\n");
return -1;
}
- ret = decode_parameter_from_string(priv,&str_ptr, "KEY=",
- PARAM_TYPE_STRING, key, CSR_WIFI_MAX_KEY_LEN);
- if(!strcasecmp(sec,"open")) {
- unifi_trace(priv,UDBG2,"store_ap_config_from_string: security open");
+ ret = decode_parameter_from_string(priv, &str_ptr, "KEY=",
+ PARAM_TYPE_STRING, key, CSR_WIFI_MAX_KEY_LEN);
+ if(!strcasecmp(sec, "open")) {
+ unifi_trace(priv, UDBG2, "store_ap_config_from_string: security open");
ap_config->credentials.authType = CSR_WIFI_SME_AP_AUTH_TYPE_OPEN_SYSTEM;
if(ret) {
- unifi_notice(priv,"store_ap_config_from_string: KEY not found:fine with Open\n");
+ unifi_notice(priv, "store_ap_config_from_string: KEY not found:fine with Open\n");
}
}
- else if(!strcasecmp(sec,"wpa2-psk")) {
- int i,j=0;
+ else if(!strcasecmp(sec, "wpa2-psk")) {
+ int i, j=0;
CsrWifiNmeApAuthPers *pers =
((CsrWifiNmeApAuthPers *)&(ap_config->credentials.nmeAuthType.authTypePersonal));
u8 *psk = pers->authPers_credentials.psk.psk;
- unifi_trace(priv,UDBG2,"store_ap_config_from_string: security WPA2");
+ unifi_trace(priv, UDBG2, "store_ap_config_from_string: security WPA2");
if(ret) {
- unifi_error(priv,"store_ap_config_from_string: KEY not found for WPA2\n");
+ unifi_error(priv, "store_ap_config_from_string: KEY not found for WPA2\n");
return -1;
}
ap_config->credentials.authType = CSR_WIFI_SME_AP_AUTH_TYPE_PERSONAL;
@@ -636,21 +636,21 @@ static int store_ap_config_from_string( unifi_priv_t * priv,char *param_str)
}
} else {
- unifi_notice(priv,"store_ap_config_from_string: Unknown security: Assuming Open");
+ unifi_notice(priv, "store_ap_config_from_string: Unknown security: Assuming Open");
ap_config->credentials.authType = CSR_WIFI_SME_AP_AUTH_TYPE_OPEN_SYSTEM;
return -1;
}
/* Get the decoded value in a temp int variable to ensure that other fields within the struct
which are of type other than int are not over written */
- ret = decode_parameter_from_string(priv,&str_ptr, "CHANNEL=", PARAM_TYPE_INT, &tmp_var, 5);
+ ret = decode_parameter_from_string(priv, &str_ptr, "CHANNEL=", PARAM_TYPE_INT, &tmp_var, 5);
if(ret)
return -1;
ap_config->channel = tmp_var;
- ret = decode_parameter_from_string(priv,&str_ptr, "PREAMBLE=", PARAM_TYPE_INT, &tmp_var, 5);
+ ret = decode_parameter_from_string(priv, &str_ptr, "PREAMBLE=", PARAM_TYPE_INT, &tmp_var, 5);
if(ret)
return -1;
ap_mac_config->preamble = tmp_var;
- ret = decode_parameter_from_string(priv,&str_ptr, "MAX_SCB=", PARAM_TYPE_INT, &tmp_var, 5);
+ ret = decode_parameter_from_string(priv, &str_ptr, "MAX_SCB=", PARAM_TYPE_INT, &tmp_var, 5);
ap_config->max_connections = tmp_var;
return ret;
}
@@ -664,9 +664,9 @@ iwprivsapstart(struct net_device *dev, struct iw_request_info *info,
int r;
unifi_trace(priv, UDBG1, "iwprivsapstart\n" );
- r = sme_ap_start(priv,interfacePriv->InterfaceTag,&priv->ap_config);
+ r = sme_ap_start(priv, interfacePriv->InterfaceTag, &priv->ap_config);
if(r) {
- unifi_error(priv,"iwprivsapstart AP START failed : %d\n",-r);
+ unifi_error(priv, "iwprivsapstart AP START failed : %d\n", -r);
}
return r;
}
@@ -692,28 +692,28 @@ iwprivsapconfig(struct net_device *dev, struct iw_request_info *info,
return -EFAULT;
}
cfg_str[wrqu->data.length] = 0;
- unifi_trace(priv,UDBG2,"length:%d\n",wrqu->data.length);
- unifi_trace(priv,UDBG2,"AP configuration string:%s\n",cfg_str);
+ unifi_trace(priv, UDBG2, "length:%d\n", wrqu->data.length);
+ unifi_trace(priv, UDBG2, "AP configuration string:%s\n", cfg_str);
str = cfg_str;
- if ((r = store_ap_config_from_string(priv,str))) {
- unifi_error(priv, "iwprivsapconfig:Failed to decode the string %d\n",r);
+ if ((r = store_ap_config_from_string(priv, str))) {
+ unifi_error(priv, "iwprivsapconfig:Failed to decode the string %d\n", r);
kfree(cfg_str);
return -EIO;
}
} else {
- unifi_error(priv,"iwprivsapconfig argument length = 0 \n");
+ unifi_error(priv, "iwprivsapconfig argument length = 0 \n");
return -EIO;
}
r = sme_ap_config(priv, &priv->ap_mac_config, &priv->group_sec_config);
if(r) {
- unifi_error(priv,"iwprivsapstop AP Config failed : %d\n",-r);
+ unifi_error(priv, "iwprivsapstop AP Config failed : %d\n", -r);
} else if(interfacePriv->interfaceMode == CSR_WIFI_ROUTER_CTRL_MODE_AP ||
interfacePriv->interfaceMode == CSR_WIFI_ROUTER_CTRL_MODE_P2PGO) {
unifi_trace(priv, UDBG1, "iwprivsapconfig: Starting the AP");
- r = sme_ap_start(priv,interfacePriv->InterfaceTag,&priv->ap_config);
+ r = sme_ap_start(priv, interfacePriv->InterfaceTag, &priv->ap_config);
if(r) {
- unifi_error(priv,"iwprivsapstart AP START failed : %d\n",-r);
+ unifi_error(priv, "iwprivsapstart AP START failed : %d\n", -r);
}
}
kfree(cfg_str);
@@ -730,9 +730,9 @@ iwprivsapstop(struct net_device *dev, struct iw_request_info *info,
u16 interface_tag = interfacePriv->InterfaceTag;
unifi_trace(priv, UDBG1, "iwprivsapstop\n" );
- r = sme_ap_stop(priv,interface_tag);
+ r = sme_ap_stop(priv, interface_tag);
if(r) {
- unifi_error(priv,"iwprivsapstop AP STOP failed : %d\n",-r);
+ unifi_error(priv, "iwprivsapstop AP STOP failed : %d\n", -r);
}
return r;
}
@@ -778,14 +778,14 @@ iwprivsstackstop(struct net_device *dev, struct iw_request_info *info,
break;
case CSR_WIFI_ROUTER_CTRL_MODE_AP:
case CSR_WIFI_ROUTER_CTRL_MODE_P2PGO:
- r = sme_ap_stop(priv,interface_tag);
+ r = sme_ap_stop(priv, interface_tag);
break;
default :
break;
}
if(r) {
- unifi_error(priv,"iwprivsstackstop Stack stop failed : %d\n",-r);
+ unifi_error(priv, "iwprivsstackstop Stack stop failed : %d\n", -r);
}
return 0;
}
@@ -3167,7 +3167,7 @@ static const struct iw_priv_args unifi_private_args[] = {
#endif
#ifdef CSR_SUPPORT_WEXT_AP
{ SIOCIWSAPCFGPRIV, IW_PRIV_TYPE_CHAR | 256, IW_PRIV_TYPE_NONE, "AP_SET_CFG" },
- { SIOCIWSAPSTARTPRIV, 0,IW_PRIV_TYPE_CHAR | IW_PRIV_SIZE_FIXED|IWPRIV_SME_MAX_STRING,"AP_BSS_START" },
+ { SIOCIWSAPSTARTPRIV, 0, IW_PRIV_TYPE_CHAR | IW_PRIV_SIZE_FIXED|IWPRIV_SME_MAX_STRING, "AP_BSS_START" },
{ SIOCIWSAPSTOPPRIV, IW_PRIV_TYPE_CHAR |IW_PRIV_SIZE_FIXED|0,
IW_PRIV_TYPE_CHAR |IW_PRIV_SIZE_FIXED|0, "AP_BSS_STOP" },
#ifdef ANDROID_BUILD
diff --git a/drivers/staging/csr/ul_int.c b/drivers/staging/csr/ul_int.c
index 0fae6f48f79b..eb286e5f7467 100644
--- a/drivers/staging/csr/ul_int.c
+++ b/drivers/staging/csr/ul_int.c
@@ -258,7 +258,7 @@ ul_log_config_ind(unifi_priv_t *priv, u8 *conf_param, int len)
unifi_notice(priv, "ul_log_config_ind: wifi on in progress, suppress error\n");
} else {
/* wifi_off_ind (error or exit) */
- CsrWifiRouterCtrlWifiOffIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0, (CsrWifiRouterCtrlControlIndication)(*conf_param));
+ CsrWifiRouterCtrlWifiOffIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0, (CsrWifiRouterCtrlControlIndication)(*conf_param));
}
#ifdef CSR_WIFI_HIP_DEBUG_OFFLINE
unifi_debug_buf_dump();
diff --git a/drivers/staging/csr/unifi_event.c b/drivers/staging/csr/unifi_event.c
index e81a99878272..71fdb2180e3d 100644
--- a/drivers/staging/csr/unifi_event.c
+++ b/drivers/staging/csr/unifi_event.c
@@ -105,7 +105,7 @@ static u8 check_routing_pkt_data_ind(unifi_priv_t *priv,
u8 isDataFrameSubTypeNoData = FALSE;
#ifdef CSR_WIFI_SECURITY_WAPI_ENABLE
- static const u8 wapiProtocolIdSNAPHeader[] = {0x88,0xb4};
+ static const u8 wapiProtocolIdSNAPHeader[] = {0x88, 0xb4};
static const u8 wapiProtocolIdSNAPHeaderOffset = 6;
u8 *destAddr;
u8 *srcAddr;
@@ -206,7 +206,7 @@ static u8 check_routing_pkt_data_ind(unifi_priv_t *priv,
unifi_trace(priv, UDBG4, "Discarding the contents of the frame with MIC failure \n");
if (isWapiUnicastPkt &&
- ((uf_sme_port_state(priv,srcAddr,UF_CONTROLLED_PORT_Q,interfaceTag) != CSR_WIFI_ROUTER_CTRL_PORT_ACTION_8021X_PORT_OPEN)||
+ ((uf_sme_port_state(priv, srcAddr, UF_CONTROLLED_PORT_Q, interfaceTag) != CSR_WIFI_ROUTER_CTRL_PORT_ACTION_8021X_PORT_OPEN)||
#ifndef CSR_WIFI_SECURITY_WAPI_SW_ENCRYPTION
(priv->wapi_unicast_filter) ||
#endif
@@ -231,7 +231,7 @@ static u8 check_routing_pkt_data_ind(unifi_priv_t *priv,
unifi_trace(priv, UDBG6, "check_routing_pkt_data_ind - MIC FAILURE : Dest Addr %x:%x:%x:%x:%x:%x\n",
destAddr[0], destAddr[1], destAddr[2], destAddr[3], destAddr[4], destAddr[5]);
unifi_trace(priv, UDBG6, "check_routing_pkt_data_ind - MIC FAILURE : Control Port State - 0x%.4X \n",
- uf_sme_port_state(priv,srcAddr,UF_CONTROLLED_PORT_Q,interfaceTag));
+ uf_sme_port_state(priv, srcAddr, UF_CONTROLLED_PORT_Q, interfaceTag));
unifi_error(priv, "MIC failure in %s\n", __FUNCTION__);
@@ -285,9 +285,9 @@ static u8 check_routing_pkt_data_ind(unifi_priv_t *priv,
if (llcSnapHeaderOffset > 0) {
/* QoS data or Data */
- unifi_trace(priv, UDBG6, "check_routing_pkt_data_ind(): SNAP header found & its offset %d\n",llcSnapHeaderOffset);
+ unifi_trace(priv, UDBG6, "check_routing_pkt_data_ind(): SNAP header found & its offset %d\n", llcSnapHeaderOffset);
if (memcmp((u8 *)(bulkdata->d[0].os_data_ptr+llcSnapHeaderOffset+wapiProtocolIdSNAPHeaderOffset),
- wapiProtocolIdSNAPHeader,sizeof(wapiProtocolIdSNAPHeader))) {
+ wapiProtocolIdSNAPHeader, sizeof(wapiProtocolIdSNAPHeader))) {
unifi_trace(priv, UDBG6, "check_routing_pkt_data_ind(): This is a data & NOT a WAI protocol packet\n");
/* On the first unicast data pkt that is decrypted successfully after re-keying, reset the filter */
@@ -584,14 +584,14 @@ void unifi_rx_queue_flush(void *ospriv)
unifi_priv_t *priv = (unifi_priv_t*)ospriv;
unifi_trace(priv, UDBG4, "rx_wq_handler: RdPtr = %d WritePtr = %d\n",
- priv->rxSignalBuffer.readPointer,priv->rxSignalBuffer.writePointer);
+ priv->rxSignalBuffer.readPointer, priv->rxSignalBuffer.writePointer);
if(priv != NULL) {
u8 readPointer = priv->rxSignalBuffer.readPointer;
while (readPointer != priv->rxSignalBuffer.writePointer)
{
rx_buff_struct_t *buf = &priv->rxSignalBuffer.rx_buff[readPointer];
unifi_trace(priv, UDBG6, "rx_wq_handler: RdPtr = %d WritePtr = %d\n",
- readPointer,priv->rxSignalBuffer.writePointer);
+ readPointer, priv->rxSignalBuffer.writePointer);
unifi_process_receive_event(priv, buf->bufptr, buf->sig_len, &buf->data_ptrs);
readPointer ++;
if(readPointer >= priv->rxSignalBuffer.size) {
@@ -661,7 +661,7 @@ unifi_receive_event(void *ospriv,
CSR_GET_UINT16_FROM_LITTLE_ENDIAN((sigdata) + sizeof(s16)*6) & 0xFFFF,
CSR_GET_UINT16_FROM_LITTLE_ENDIAN((sigdata) + sizeof(s16)*7) & 0xFFFF, siglen);
if(signal_buffer_is_full(priv)) {
- unifi_error(priv,"TO HOST signal queue FULL dropping the PDU\n");
+ unifi_error(priv, "TO HOST signal queue FULL dropping the PDU\n");
for (i = 0; i < UNIFI_MAX_DATA_REFERENCES; i++) {
if (bulkdata->d[i].data_length != 0) {
unifi_net_data_free(priv, (void *)&bulkdata->d[i]);
@@ -671,14 +671,14 @@ unifi_receive_event(void *ospriv,
}
writePointer = priv->rxSignalBuffer.writePointer;
rx_buff = &priv->rxSignalBuffer.rx_buff[writePointer];
- memcpy(rx_buff->bufptr,sigdata,siglen);
+ memcpy(rx_buff->bufptr, sigdata, siglen);
rx_buff->sig_len = siglen;
rx_buff->data_ptrs = *bulkdata;
writePointer++;
if(writePointer >= priv->rxSignalBuffer.size) {
writePointer =0;
}
- unifi_trace(priv, UDBG4, "unifi_receive_event:writePtr = %d\n",priv->rxSignalBuffer.writePointer);
+ unifi_trace(priv, UDBG4, "unifi_receive_event:writePtr = %d\n", priv->rxSignalBuffer.writePointer);
priv->rxSignalBuffer.writePointer = writePointer;
#ifndef CSR_WIFI_RX_PATH_SPLIT_DONT_USE_WQ
diff --git a/drivers/staging/csr/unifi_pdu_processing.c b/drivers/staging/csr/unifi_pdu_processing.c
index f9b421b5aa35..04fe9e2acf0e 100644
--- a/drivers/staging/csr/unifi_pdu_processing.c
+++ b/drivers/staging/csr/unifi_pdu_processing.c
@@ -38,7 +38,7 @@ static void _update_buffered_pkt_params_after_alignment(unifi_priv_t *priv, bulk
skb = (struct sk_buff*)bulkdata->d[0].os_net_buf_ptr;
align_offset = (u32)(long)(bulkdata->d[0].os_data_ptr) & (CSR_WIFI_ALIGN_BYTES-1);
if(align_offset){
- skb_pull(skb,align_offset);
+ skb_pull(skb, align_offset);
}
buffered_pkt->bulkdata.os_data_ptr = bulkdata->d[0].os_data_ptr;
@@ -86,7 +86,7 @@ unifi_frame_ma_packet_req(unifi_priv_t *priv, CSR_PRIORITY priority,
*/
req->TransmissionControl = transmissionControl;
req->VirtualInterfaceIdentifier =
- uf_get_vif_identifier(interfacePriv->interfaceMode,interfaceTag);
+ uf_get_vif_identifier(interfacePriv->interfaceMode, interfaceTag);
memcpy(req->Ra.x, peerMacAddress, ETH_ALEN);
if (hostTag == 0xffffffff) {
@@ -124,8 +124,8 @@ unifi_frame_ma_packet_req(unifi_priv_t *priv, CSR_PRIORITY priority,
#define TRANSMISSION_CONTROL_EOSP_MASK 0x0002
static
-int frame_and_send_queued_pdu(unifi_priv_t* priv,tx_buffered_packets_t* buffered_pkt,
- CsrWifiRouterCtrlStaInfo_t *staRecord,u8 moreData , u8 eosp)
+int frame_and_send_queued_pdu(unifi_priv_t* priv, tx_buffered_packets_t* buffered_pkt,
+ CsrWifiRouterCtrlStaInfo_t *staRecord, u8 moreData , u8 eosp)
{
CSR_SIGNAL signal;
@@ -135,7 +135,7 @@ int frame_and_send_queued_pdu(unifi_priv_t* priv,tx_buffered_packets_t* buffered
u8 *qc;
u16 *fc = (u16*)(buffered_pkt->bulkdata.os_data_ptr);
unsigned long lock_flags;
- unifi_trace(priv, UDBG3, "frame_and_send_queued_pdu with moreData: %d , EOSP: %d\n",moreData,eosp);
+ unifi_trace(priv, UDBG3, "frame_and_send_queued_pdu with moreData: %d , EOSP: %d\n", moreData, eosp);
unifi_frame_ma_packet_req(priv, buffered_pkt->priority, buffered_pkt->rate, buffered_pkt->hostTag,
buffered_pkt->interfaceTag, buffered_pkt->transmissionControl,
buffered_pkt->leSenderProcessId, buffered_pkt->peerMacAddress.a, &signal);
@@ -156,7 +156,7 @@ int frame_and_send_queued_pdu(unifi_priv_t* priv,tx_buffered_packets_t* buffered
if((staRecord != NULL)&& (staRecord->wmmOrQosEnabled == TRUE))
{
- unifi_trace(priv, UDBG3, "frame_and_send_queued_pdu WMM Enabled: %d \n",staRecord->wmmOrQosEnabled);
+ unifi_trace(priv, UDBG3, "frame_and_send_queued_pdu WMM Enabled: %d \n", staRecord->wmmOrQosEnabled);
toDs = (*fc & cpu_to_le16(IEEE802_11_FC_TO_DS_MASK))?1 : 0;
fromDs = (*fc & cpu_to_le16(IEEE802_11_FC_FROM_DS_MASK))? 1: 0;
@@ -190,7 +190,7 @@ int frame_and_send_queued_pdu(unifi_priv_t* priv,tx_buffered_packets_t* buffered
}
result = ul_send_signal_unpacked(priv, &signal, &bulkdata);
if(result){
- _update_buffered_pkt_params_after_alignment(priv, &bulkdata,buffered_pkt);
+ _update_buffered_pkt_params_after_alignment(priv, &bulkdata, buffered_pkt);
}
/* Decrement the packet counts queued in driver */
@@ -199,13 +199,13 @@ int frame_and_send_queued_pdu(unifi_priv_t* priv,tx_buffered_packets_t* buffered
if (!priv->noOfPktQueuedInDriver) {
unifi_error(priv, "packets queued in driver 0 still decrementing\n");
} else {
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
priv->noOfPktQueuedInDriver--;
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
}
/* Sta Record is available for all unicast (except genericMgt Frames) & in other case its NULL */
if (staRecord) {
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
if (!staRecord->noOfPktQueued) {
unifi_error(priv, "packets queued in driver per station is 0 still decrementing\n");
} else {
@@ -217,7 +217,7 @@ int frame_and_send_queued_pdu(unifi_priv_t* priv,tx_buffered_packets_t* buffered
staRecord->nullDataHostTag = INVALID_HOST_TAG;
}
}
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
}
}
@@ -243,24 +243,24 @@ void set_eosp_transmit_ctrl(unifi_priv_t *priv, struct list_head *txList)
/* return the last node , and modify it. */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_for_each_prev_safe(listHead, placeHolder, txList) {
tx_q_item = list_entry(listHead, tx_buffered_packets_t, q);
tx_q_item->transmissionControl |= TRANSMISSION_CONTROL_EOSP_MASK;
tx_q_item->transmissionControl = (tx_q_item->transmissionControl & ~(CSR_NO_CONFIRM_REQUIRED));
unifi_trace(priv, UDBG1,
- "set_eosp_transmit_ctrl Transmission Control = 0x%x hostTag = 0x%x \n",tx_q_item->transmissionControl,tx_q_item->hostTag);
- unifi_trace(priv,UDBG3,"in set_eosp_transmit_ctrl no.of buffered frames %d\n",priv->noOfPktQueuedInDriver);
+ "set_eosp_transmit_ctrl Transmission Control = 0x%x hostTag = 0x%x \n", tx_q_item->transmissionControl, tx_q_item->hostTag);
+ unifi_trace(priv, UDBG3, "in set_eosp_transmit_ctrl no.of buffered frames %d\n", priv->noOfPktQueuedInDriver);
break;
}
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
- unifi_trace(priv, UDBG1,"List Empty %d\n",list_empty(txList));
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
+ unifi_trace(priv, UDBG1, "List Empty %d\n", list_empty(txList));
unifi_trace(priv, UDBG5, "leaving set_eosp_transmit_ctrl\n");
return;
}
static
-void send_vif_availibility_rsp(unifi_priv_t *priv,CSR_VIF_IDENTIFIER vif,CSR_RESULT_CODE resultCode)
+void send_vif_availibility_rsp(unifi_priv_t *priv, CSR_VIF_IDENTIFIER vif, CSR_RESULT_CODE resultCode)
{
CSR_SIGNAL signal;
CSR_MA_VIF_AVAILABILITY_RESPONSE *rsp;
@@ -269,7 +269,7 @@ void send_vif_availibility_rsp(unifi_priv_t *priv,CSR_VIF_IDENTIFIER vif,CSR_RES
unifi_trace(priv, UDBG3, "send_vif_availibility_rsp : invoked with resultCode = %d \n", resultCode);
- memset(&signal,0,sizeof(CSR_SIGNAL));
+ memset(&signal, 0, sizeof(CSR_SIGNAL));
rsp = &signal.u.MaVifAvailabilityResponse;
rsp->VirtualInterfaceIdentifier = vif;
rsp->ResultCode = resultCode;
@@ -280,7 +280,7 @@ void send_vif_availibility_rsp(unifi_priv_t *priv,CSR_VIF_IDENTIFIER vif,CSR_RES
/* Send the signal to UniFi */
r = ul_send_signal_unpacked(priv, &signal, bulkdata);
if(r) {
- unifi_error(priv,"Availibility response sending failed %x status %d\n",vif,r);
+ unifi_error(priv, "Availibility response sending failed %x status %d\n", vif, r);
}
else {
unifi_trace(priv, UDBG3, "send_vif_availibility_rsp : status = %d \n", r);
@@ -295,7 +295,7 @@ void verify_and_accomodate_tx_packet(unifi_priv_t *priv)
unsigned long lock_flags;
struct list_head *listHead, *list;
struct list_head *placeHolder;
- u8 i, j,eospFramedeleted=0;
+ u8 i, j, eospFramedeleted=0;
u8 thresholdExcedeDueToBroadcast = TRUE;
/* it will be made it interface Specific in the future when multi interfaces are supported ,
right now interface 0 is considered */
@@ -311,10 +311,10 @@ void verify_and_accomodate_tx_packet(unifi_priv_t *priv)
* packets for station record crossed the threshold limit (64 for AP supporting
* 8 peers)
*/
- unifi_trace(priv,UDBG3,"number of station pkts queued= %d for sta id = %d\n", staInfo->noOfPktQueued, staInfo->aid);
+ unifi_trace(priv, UDBG3, "number of station pkts queued= %d for sta id = %d\n", staInfo->noOfPktQueued, staInfo->aid);
for(j = 0; j < MAX_ACCESS_CATOGORY; j++) {
list = &staInfo->dataPdu[j];
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_for_each_safe(listHead, placeHolder, list) {
tx_q_item = list_entry(listHead, tx_buffered_packets_t, q);
list_del(listHead);
@@ -339,7 +339,7 @@ void verify_and_accomodate_tx_packet(unifi_priv_t *priv)
}
break;
}
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
}
}
}
@@ -347,13 +347,13 @@ void verify_and_accomodate_tx_packet(unifi_priv_t *priv)
/* Remove the packets from genericMulticastOrBroadCastFrames queue
* (the max packets in driver is reached due to broadcast/multicast frames)
*/
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_for_each_safe(listHead, placeHolder, &interfacePriv->genericMulticastOrBroadCastFrames) {
tx_q_item = list_entry(listHead, tx_buffered_packets_t, q);
if(eospFramedeleted){
tx_q_item->transmissionControl |= TRANSMISSION_CONTROL_EOSP_MASK;
tx_q_item->transmissionControl = (tx_q_item->transmissionControl & ~(CSR_NO_CONFIRM_REQUIRED));
- unifi_trace(priv, UDBG1,"updating eosp for next packet hostTag:= 0x%x ",tx_q_item->hostTag);
+ unifi_trace(priv, UDBG1, "updating eosp for next packet hostTag:= 0x%x ", tx_q_item->hostTag);
eospFramedeleted =0;
break;
}
@@ -361,7 +361,7 @@ void verify_and_accomodate_tx_packet(unifi_priv_t *priv)
if(tx_q_item->transmissionControl & TRANSMISSION_CONTROL_EOSP_MASK ){
eospFramedeleted = 1;
}
- unifi_trace(priv,UDBG1, "freeing of multicast packets ToC = 0x%x hostTag = 0x%x \n",tx_q_item->transmissionControl,tx_q_item->hostTag);
+ unifi_trace(priv, UDBG1, "freeing of multicast packets ToC = 0x%x hostTag = 0x%x \n", tx_q_item->transmissionControl, tx_q_item->hostTag);
list_del(listHead);
unifi_net_data_free(priv, &tx_q_item->bulkdata);
kfree(tx_q_item);
@@ -373,7 +373,7 @@ void verify_and_accomodate_tx_packet(unifi_priv_t *priv)
break;
}
}
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
}
unifi_trace(priv, UDBG3, "leaving verify_and_accomodate_tx_packet\n");
}
@@ -391,13 +391,13 @@ CsrResult enque_tx_data_pdu(unifi_priv_t *priv, bulk_data_param_t *bulkdata,
unifi_trace(priv, UDBG5, "entering enque_tx_data_pdu\n");
if(!list) {
- unifi_error(priv,"List is not specified\n");
+ unifi_error(priv, "List is not specified\n");
return CSR_RESULT_FAILURE;
}
/* Removes aged packets & adds the incoming packet */
if (priv->noOfPktQueuedInDriver >= CSR_WIFI_DRIVER_SUPPORT_FOR_MAX_PKT_QUEUEING) {
- unifi_trace(priv,UDBG3,"number of pkts queued= %d \n", priv->noOfPktQueuedInDriver);
+ unifi_trace(priv, UDBG3, "number of pkts queued= %d \n", priv->noOfPktQueuedInDriver);
verify_and_accomodate_tx_packet(priv);
}
@@ -412,7 +412,7 @@ CsrResult enque_tx_data_pdu(unifi_priv_t *priv, bulk_data_param_t *bulkdata,
}
/* disable the preemption */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
INIT_LIST_HEAD(&tx_q_item->q);
/* fill the tx_q structure members */
tx_q_item->bulkdata.os_data_ptr = bulkdata->d[0].os_data_ptr;
@@ -437,7 +437,7 @@ CsrResult enque_tx_data_pdu(unifi_priv_t *priv, bulk_data_param_t *bulkdata,
/* Count of packet queued in driver */
priv->noOfPktQueuedInDriver++;
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
unifi_trace(priv, UDBG5, "leaving enque_tx_data_pdu\n");
return CSR_RESULT_SUCCESS;
}
@@ -655,13 +655,13 @@ void uf_handle_tim_cfm(unifi_priv_t *priv, CSR_MLME_SET_TIM_CONFIRM *cfm, u16 re
}
if (handle != CSR_WIFI_BROADCAST_OR_MULTICAST_HANDLE) {
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
if ((staRecord = ((CsrWifiRouterCtrlStaInfo_t *) (interfacePriv->staInfo[handle]))) == NULL) {
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
unifi_warning(priv, "uf_handle_tim_cfm: station record is NULL handle = %x\n", handle);
return;
}
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
}
switch(timSetStatus)
{
@@ -909,13 +909,13 @@ void update_tim(unifi_priv_t * priv, u16 aid, u8 setTim, u16 interfaceTag, u32 h
(u8*)&signal.SignalPrimitiveHeader.SenderProcessId);
/* set The virtual interfaceIdentifier, aid, tim value */
- req->VirtualInterfaceIdentifier = uf_get_vif_identifier(interfacePriv->interfaceMode,interfaceTag);
+ req->VirtualInterfaceIdentifier = uf_get_vif_identifier(interfacePriv->interfaceMode, interfaceTag);
req->AssociationId = aid;
req->TimValue = setTim;
unifi_trace(priv, UDBG2, "update_tim:AID %x,senderIdLsb = 0x%x, handle = 0x%x, timSetStatus = %x, sender proceesID = %x \n",
- aid,senderIdLsb, handle, timSetStatus, signal.SignalPrimitiveHeader.SenderProcessId);
+ aid, senderIdLsb, handle, timSetStatus, signal.SignalPrimitiveHeader.SenderProcessId);
/* Send the signal to UniFi */
r = ul_send_signal_unpacked(priv, &signal, bulkdata);
@@ -953,17 +953,17 @@ void process_peer_active_transition(unifi_priv_t * priv,
CsrWifiRouterCtrlStaInfo_t *staRecord,
u16 interfaceTag)
{
- int r,i;
- u8 spaceAvail[4] = {TRUE,TRUE,TRUE,TRUE};
+ int r, i;
+ u8 spaceAvail[4] = {TRUE, TRUE, TRUE, TRUE};
tx_buffered_packets_t * buffered_pkt = NULL;
unsigned long lock_flags;
netInterface_priv_t *interfacePriv = priv->interfacePriv[interfaceTag];
unifi_trace(priv, UDBG5, "entering process_peer_active_transition\n");
- if(IS_DTIM_ACTIVE(interfacePriv->dtimActive,interfacePriv->multicastPduHostTag)) {
+ if(IS_DTIM_ACTIVE(interfacePriv->dtimActive, interfacePriv->multicastPduHostTag)) {
/* giving more priority to multicast packets so delaying unicast packets*/
- unifi_trace(priv,UDBG2, "Multicast transmission is going on so resume unicast transmission after DTIM over\n");
+ unifi_trace(priv, UDBG2, "Multicast transmission is going on so resume unicast transmission after DTIM over\n");
/* As station is active now, even though AP is not able to send frames to it
* because of DTIM, it needs to reset the TIM here
@@ -987,12 +987,12 @@ void process_peer_active_transition(unifi_priv_t * priv,
while((buffered_pkt=dequeue_tx_data_pdu(priv, &staRecord->mgtFrames))) {
buffered_pkt->transmissionControl &=
~(TRANSMISSION_CONTROL_TRIGGER_MASK|TRANSMISSION_CONTROL_EOSP_MASK);
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,staRecord,0,FALSE)) == -ENOSPC) {
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, staRecord, 0, FALSE)) == -ENOSPC) {
unifi_trace(priv, UDBG2, "p_p_a_t:(ENOSPC) Mgt Frame queueing \n");
/* Enqueue at the head of the queue */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_add(&buffered_pkt->q, &staRecord->mgtFrames);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
priv->pausedStaHandle[3]=(u8)(staRecord->assignedHandle);
spaceAvail[3] = FALSE;
break;
@@ -1008,7 +1008,7 @@ void process_peer_active_transition(unifi_priv_t * priv,
if (!staRecord->timRequestPendingFlag) {
if (staRecord->txSuspend) {
if(staRecord->timSet == CSR_WIFI_TIM_SET) {
- update_tim(priv,staRecord->aid,0,interfaceTag, staRecord->assignedHandle);
+ update_tim(priv, staRecord->aid, 0, interfaceTag, staRecord->assignedHandle);
}
return;
}
@@ -1025,16 +1025,16 @@ void process_peer_active_transition(unifi_priv_t * priv,
for(i=3;i>=0;i--) {
if(!spaceAvail[i])
continue;
- unifi_trace(priv, UDBG6, "p_p_a_t:data pkt sending for AC %d \n",i);
+ unifi_trace(priv, UDBG6, "p_p_a_t:data pkt sending for AC %d \n", i);
while((buffered_pkt=dequeue_tx_data_pdu(priv, &staRecord->dataPdu[i]))) {
buffered_pkt->transmissionControl &=
~(TRANSMISSION_CONTROL_TRIGGER_MASK|TRANSMISSION_CONTROL_EOSP_MASK);
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,staRecord,0,FALSE)) == -ENOSPC) {
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, staRecord, 0, FALSE)) == -ENOSPC) {
/* Clear the trigger bit transmission control*/
/* Enqueue at the head of the queue */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_add(&buffered_pkt->q, &staRecord->dataPdu[i]);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
priv->pausedStaHandle[i]=(u8)(staRecord->assignedHandle);
break;
} else {
@@ -1050,7 +1050,7 @@ void process_peer_active_transition(unifi_priv_t * priv,
if (!staRecord->timRequestPendingFlag){
if((staRecord->timSet == CSR_WIFI_TIM_SET) || (staRecord->timSet == CSR_WIFI_TIM_SETTING)) {
unifi_trace(priv, UDBG3, "p_p_a_t:resetting tim .....\n");
- update_tim(priv,staRecord->aid,0,interfaceTag, staRecord->assignedHandle);
+ update_tim(priv, staRecord->aid, 0, interfaceTag, staRecord->assignedHandle);
}
}
else
@@ -1067,7 +1067,7 @@ void process_peer_active_transition(unifi_priv_t * priv,
-void uf_process_ma_pkt_cfm_for_ap(unifi_priv_t *priv,u16 interfaceTag, const CSR_MA_PACKET_CONFIRM *pkt_cfm)
+void uf_process_ma_pkt_cfm_for_ap(unifi_priv_t *priv, u16 interfaceTag, const CSR_MA_PACKET_CONFIRM *pkt_cfm)
{
netInterface_priv_t *interfacePriv;
u8 i;
@@ -1076,16 +1076,16 @@ void uf_process_ma_pkt_cfm_for_ap(unifi_priv_t *priv,u16 interfaceTag, const CSR
if(pkt_cfm->HostTag == interfacePriv->multicastPduHostTag) {
- unifi_trace(priv,UDBG2,"CFM for marked Multicast Tag = %x\n",interfacePriv->multicastPduHostTag);
+ unifi_trace(priv, UDBG2, "CFM for marked Multicast Tag = %x\n", interfacePriv->multicastPduHostTag);
interfacePriv->multicastPduHostTag = 0xffffffff;
- resume_suspended_uapsd(priv,interfaceTag);
- resume_unicast_buffered_frames(priv,interfaceTag);
+ resume_suspended_uapsd(priv, interfaceTag);
+ resume_unicast_buffered_frames(priv, interfaceTag);
if(list_empty(&interfacePriv->genericMulticastOrBroadCastMgtFrames) &&
list_empty(&interfacePriv->genericMulticastOrBroadCastFrames)) {
- unifi_trace(priv,UDBG1,"Resetting multicastTIM");
+ unifi_trace(priv, UDBG1, "Resetting multicastTIM");
if (!interfacePriv->bcTimSetReqPendingFlag)
{
- update_tim(priv,0,CSR_WIFI_TIM_RESET,interfaceTag, 0xFFFFFFFF);
+ update_tim(priv, 0, CSR_WIFI_TIM_RESET, interfaceTag, 0xFFFFFFFF);
}
else
{
@@ -1164,7 +1164,7 @@ void uf_process_ma_pkt_cfm_for_ap(unifi_priv_t *priv,u16 interfaceTag, const CSR
&send_cfm_list,
&(staRecord->dataPdu[j]));
- uf_flush_list(priv,&(staRecord->dataPdu[j]));
+ uf_flush_list(priv, &(staRecord->dataPdu[j]));
}
send_auto_ma_packet_confirm(priv, staRecord->interfacePriv, &send_cfm_list);
@@ -1469,7 +1469,7 @@ static int update_macheader(unifi_priv_t *priv, struct sk_buff *skb,
}
/* prepare the complete skb, by pushing the MAC header to the beginning of the skb->data */
- unifi_trace(priv, UDBG5, "updated Mac Header: %d \n",macHeaderLengthInBytes);
+ unifi_trace(priv, UDBG5, "updated Mac Header: %d \n", macHeaderLengthInBytes);
memcpy(bufPtr, macHeaderBuf, macHeaderLengthInBytes);
unifi_trace(priv, UDBG5, "leaving the update_macheader function\n");
@@ -1515,7 +1515,7 @@ uf_ap_process_data_pdu(unifi_priv_t *priv, struct sk_buff *skb,
CsrWifiRouterCtrlStaInfo_t *dstStaInfo = NULL;
netInterface_priv_t *interfacePriv;
- unifi_trace(priv, UDBG5, "entering uf_ap_process_data_pdu %d\n",macHeaderLengthInBytes);
+ unifi_trace(priv, UDBG5, "entering uf_ap_process_data_pdu %d\n", macHeaderLengthInBytes);
/* InterfaceTag validation from MA_PACKET.indication */
if (interfaceTag >= CSR_WIFI_NUM_INTERFACES) {
unifi_trace(priv, UDBG1, "Interface Tag is Invalid in uf_ap_process_data_pdu\n");
@@ -1608,7 +1608,7 @@ uf_ap_process_data_pdu(unifi_priv_t *priv, struct sk_buff *skb,
unifi_trace(priv, UDBG3, "Mac Header updated...calling uf_process_ma_packet_req \n");
/* Packet is ready to send to unifi ,transmissionControl = 0x0004, confirmation is not needed for data packets */
- if (uf_process_ma_packet_req(priv, ehdr->h_dest, 0xffffffff, interfaceTag, CSR_NO_CONFIRM_REQUIRED, (CSR_RATE)0,priority, priv->netdev_client->sender_id, bulkdata)) {
+ if (uf_process_ma_packet_req(priv, ehdr->h_dest, 0xffffffff, interfaceTag, CSR_NO_CONFIRM_REQUIRED, (CSR_RATE)0, priority, priv->netdev_client->sender_id, bulkdata)) {
if (sendToNetdev) {
unifi_trace(priv, UDBG1, "In uf_ap_process_data_pdu, (Packet Drop) uf_process_ma_packet_req failed. freeing skb_copy data (original data sent to Netdev)\n");
/* Free's the skb_copy(skbPtr) data since packet processing failed */
@@ -1750,7 +1750,7 @@ CsrResult uf_process_ma_packet_req(unifi_priv_t *priv,
/* push the packet to the unifi if list is empty (if packet lost how to re-enque) */
if (list_empty(&interfacePriv->genericMgtFrames)) {
#ifdef CSR_SUPPORT_SME
- if(!(IS_DTIM_ACTIVE(interfacePriv->dtimActive,interfacePriv->multicastPduHostTag))) {
+ if(!(IS_DTIM_ACTIVE(interfacePriv->dtimActive, interfacePriv->multicastPduHostTag))) {
#endif
unifi_trace(priv, UDBG3, "genericMgtFrames list is empty uf_process_ma_packet_req \n");
@@ -1765,8 +1765,8 @@ CsrResult uf_process_ma_packet_req(unifi_priv_t *priv,
#ifdef CSR_SUPPORT_SME
}else{
list = &interfacePriv->genericMgtFrames;
- unifi_trace(priv, UDBG3, "genericMgtFrames queue empty and dtim started\n hosttag is 0x%x,\n",signal.u.MaPacketRequest.HostTag);
- update_eosp_to_head_of_broadcast_list_head(priv,interfaceTag);
+ unifi_trace(priv, UDBG3, "genericMgtFrames queue empty and dtim started\n hosttag is 0x%x,\n", signal.u.MaPacketRequest.HostTag);
+ update_eosp_to_head_of_broadcast_list_head(priv, interfaceTag);
}
#endif
} else {
@@ -1776,15 +1776,15 @@ CsrResult uf_process_ma_packet_req(unifi_priv_t *priv,
}
} else {
/* check peer power state */
- if (queuePacketDozing || !list_empty(&staRecord->mgtFrames) || IS_DTIM_ACTIVE(interfacePriv->dtimActive,interfacePriv->multicastPduHostTag)) {
+ if (queuePacketDozing || !list_empty(&staRecord->mgtFrames) || IS_DTIM_ACTIVE(interfacePriv->dtimActive, interfacePriv->multicastPduHostTag)) {
/* peer is in dozing mode, so queue packet in mgt frame list of station record */
/*if multicast traffic is going on, buffer the unicast packets*/
list = &staRecord->mgtFrames;
unifi_trace(priv, UDBG1, "staRecord->MgtFrames list empty? = %s, handle = %d, queuePacketDozing = %d\n",
(list_empty(&staRecord->mgtFrames))? "YES": "NO", staRecord->assignedHandle, queuePacketDozing);
- if(IS_DTIM_ACTIVE(interfacePriv->dtimActive,interfacePriv->multicastPduHostTag)){
- update_eosp_to_head_of_broadcast_list_head(priv,interfaceTag);
+ if(IS_DTIM_ACTIVE(interfacePriv->dtimActive, interfacePriv->multicastPduHostTag)){
+ update_eosp_to_head_of_broadcast_list_head(priv, interfaceTag);
}
} else {
@@ -1794,7 +1794,7 @@ CsrResult uf_process_ma_packet_req(unifi_priv_t *priv,
/* requeue the failed packet to staRecord->mgtFrames with same position */
list = &staRecord->mgtFrames;
requeueOnSamePos = TRUE;
- unifi_trace(priv, UDBG1, "(ENOSPC) Sending MgtFrames Failed handle = %d so buffering\n",staRecord->assignedHandle);
+ unifi_trace(priv, UDBG1, "(ENOSPC) Sending MgtFrames Failed handle = %d so buffering\n", staRecord->assignedHandle);
priv->pausedStaHandle[0]=(u8)(staRecord->assignedHandle);
} else if (result) {
status = CSR_RESULT_FAILURE;
@@ -1827,11 +1827,11 @@ CsrResult uf_process_ma_packet_req(unifi_priv_t *priv,
if(!staRecord) {
unifi_error(priv, "In %s unicast but staRecord = NULL\n", __FUNCTION__);
return CSR_RESULT_FAILURE;
- } else if (queuePacketDozing || isRouterBufferEnabled(priv,priority_q)|| !list_empty(&staRecord->dataPdu[priority_q]) || IS_DTIM_ACTIVE(interfacePriv->dtimActive,interfacePriv->multicastPduHostTag)) {
+ } else if (queuePacketDozing || isRouterBufferEnabled(priv, priority_q)|| !list_empty(&staRecord->dataPdu[priority_q]) || IS_DTIM_ACTIVE(interfacePriv->dtimActive, interfacePriv->multicastPduHostTag)) {
/* peer is in dozing mode, so queue packet in mgt frame list of station record */
/* if multicast traffic is going on, buffet the unicast packets */
unifi_trace(priv, UDBG2, "Enqueued to staRecord->dataPdu[%d] queuePacketDozing=%d,\
- Buffering enabled = %d \n", priority_q,queuePacketDozing,isRouterBufferEnabled(priv,priority_q));
+ Buffering enabled = %d \n", priority_q, queuePacketDozing, isRouterBufferEnabled(priv, priority_q));
list = &staRecord->dataPdu[priority_q];
} else {
unifi_trace(priv, UDBG5, "staRecord->dataPdu[%d] list is empty uf_process_ma_packet_req \n", priority_q);
@@ -1839,12 +1839,12 @@ CsrResult uf_process_ma_packet_req(unifi_priv_t *priv,
result = ul_send_signal_unpacked(priv, &signal, bulkdata);
if(result == -ENOSPC) {
/* requeue the failed packet to staRecord->dataPdu[priority_q] with same position */
- unifi_trace(priv, UDBG1, "(ENOSPC) Sending Unicast DataPDU to queue %d Failed so buffering\n",priority_q);
+ unifi_trace(priv, UDBG1, "(ENOSPC) Sending Unicast DataPDU to queue %d Failed so buffering\n", priority_q);
requeueOnSamePos = TRUE;
list = &staRecord->dataPdu[priority_q];
priv->pausedStaHandle[priority_q]=(u8)(staRecord->assignedHandle);
- if(!isRouterBufferEnabled(priv,priority_q)) {
- unifi_error(priv,"Buffering Not enabled for queue %d \n",priority_q);
+ if(!isRouterBufferEnabled(priv, priority_q)) {
+ unifi_error(priv, "Buffering Not enabled for queue %d \n", priority_q);
}
} else if (result) {
status = CSR_RESULT_FAILURE;
@@ -1869,19 +1869,19 @@ CsrResult uf_process_ma_packet_req(unifi_priv_t *priv,
unifi_error(priv, "unrecognized frame type\n");
}
if(list) {
- status = enque_tx_data_pdu(priv, bulkdata,list, &signal,requeueOnSamePos);
+ status = enque_tx_data_pdu(priv, bulkdata, list, &signal, requeueOnSamePos);
/* Record no. of packet queued for each peer */
if (staRecord && (pktType == CSR_WIFI_UNICAST_PDU) && (!status)) {
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
staRecord->noOfPktQueued++;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
}
else if ((pktType == CSR_WIFI_MULTICAST_PDU) && (!status))
{
/* If broadcast Tim is set && queuing is successful, then only update TIM */
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
interfacePriv->noOfbroadcastPktQueued++;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
}
}
/* If broadcast Tim is set && queuing is successful, then only update TIM */
@@ -1889,7 +1889,7 @@ CsrResult uf_process_ma_packet_req(unifi_priv_t *priv,
unifi_trace(priv, UDBG3, "tim set due to broadcast pkt\n");
if (!interfacePriv->bcTimSetReqPendingFlag)
{
- update_tim(priv,0,CSR_WIFI_TIM_SET,interfaceTag, handle);
+ update_tim(priv, 0, CSR_WIFI_TIM_SET, interfaceTag, handle);
}
else
{
@@ -1909,7 +1909,7 @@ CsrResult uf_process_ma_packet_req(unifi_priv_t *priv,
!list_empty(&staRecord->dataPdu[UNIFI_TRAFFIC_Q_CONTENTION])) {
unifi_trace(priv, UDBG3, "tim set due to unicast pkt & peer in powersave\n");
if (!staRecord->timRequestPendingFlag){
- update_tim(priv,staRecord->aid,1,interfaceTag, handle);
+ update_tim(priv, staRecord->aid, 1, interfaceTag, handle);
}
else
{
@@ -1929,7 +1929,7 @@ CsrResult uf_process_ma_packet_req(unifi_priv_t *priv,
if (uf_is_more_data_for_non_delivery_ac(staRecord) || (allDeliveryEnabled && dataAvailable)
|| (!list_empty(&staRecord->mgtFrames))) {
if (!staRecord->timRequestPendingFlag) {
- update_tim(priv,staRecord->aid,1,interfaceTag, handle);
+ update_tim(priv, staRecord->aid, 1, interfaceTag, handle);
}
else
{
@@ -1945,8 +1945,8 @@ CsrResult uf_process_ma_packet_req(unifi_priv_t *priv,
}
}
- if((list) && (pktType == CSR_WIFI_UNICAST_PDU && !queuePacketDozing) && !(isRouterBufferEnabled(priv,priority_q)) && !(IS_DTIM_ACTIVE(interfacePriv->dtimActive,interfacePriv->multicastPduHostTag))) {
- unifi_trace(priv, UDBG2, "buffering cleared for queue = %d So resending buffered frames\n",priority_q);
+ if((list) && (pktType == CSR_WIFI_UNICAST_PDU && !queuePacketDozing) && !(isRouterBufferEnabled(priv, priority_q)) && !(IS_DTIM_ACTIVE(interfacePriv->dtimActive, interfacePriv->multicastPduHostTag))) {
+ unifi_trace(priv, UDBG2, "buffering cleared for queue = %d So resending buffered frames\n", priority_q);
uf_send_buffered_frames(priv, priority_q);
}
unifi_trace(priv, UDBG5, "leaving uf_process_ma_packet_req \n");
@@ -2022,23 +2022,23 @@ u8 send_multicast_frames(unifi_priv_t *priv, u16 interfaceTag)
netInterface_priv_t *interfacePriv = priv->interfacePriv[interfaceTag];
u32 hostTag = 0xffffffff;
- if(!isRouterBufferEnabled(priv,UNIFI_TRAFFIC_Q_VO)) {
- while((interfacePriv->dtimActive)&& (buffered_pkt=dequeue_tx_data_pdu(priv,&interfacePriv->genericMulticastOrBroadCastMgtFrames))) {
+ if(!isRouterBufferEnabled(priv, UNIFI_TRAFFIC_Q_VO)) {
+ while((interfacePriv->dtimActive)&& (buffered_pkt=dequeue_tx_data_pdu(priv, &interfacePriv->genericMulticastOrBroadCastMgtFrames))) {
buffered_pkt->transmissionControl |= (TRANSMISSION_CONTROL_TRIGGER_MASK);
moreData = (buffered_pkt->transmissionControl & TRANSMISSION_CONTROL_EOSP_MASK)?FALSE:TRUE;
- unifi_trace(priv,UDBG2,"DTIM Occurred for interface:sending Mgt packet %d\n",interfaceTag);
+ unifi_trace(priv, UDBG2, "DTIM Occurred for interface:sending Mgt packet %d\n", interfaceTag);
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,NULL,moreData,FALSE)) == -ENOSPC) {
- unifi_trace(priv,UDBG1,"frame_and_send_queued_pdu failed with ENOSPC for host tag = %x\n", buffered_pkt->hostTag);
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, NULL, moreData, FALSE)) == -ENOSPC) {
+ unifi_trace(priv, UDBG1, "frame_and_send_queued_pdu failed with ENOSPC for host tag = %x\n", buffered_pkt->hostTag);
/* Enqueue at the head of the queue */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_add(&buffered_pkt->q, &interfacePriv->genericMulticastOrBroadCastMgtFrames);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
break;
} else {
- unifi_trace(priv,UDBG1,"send_multicast_frames: Send genericMulticastOrBroadCastMgtFrames (%x, %x)\n",
+ unifi_trace(priv, UDBG1, "send_multicast_frames: Send genericMulticastOrBroadCastMgtFrames (%x, %x)\n",
buffered_pkt->hostTag,
r);
if(r) {
@@ -2051,35 +2051,35 @@ u8 send_multicast_frames(unifi_priv_t *priv, u16 interfaceTag)
hostTag = buffered_pkt->hostTag;
pduSent++;
} else {
- send_vif_availibility_rsp(priv,uf_get_vif_identifier(interfacePriv->interfaceMode,interfaceTag),CSR_RC_UNSPECIFIED_FAILURE);
+ send_vif_availibility_rsp(priv, uf_get_vif_identifier(interfacePriv->interfaceMode, interfaceTag), CSR_RC_UNSPECIFIED_FAILURE);
}
}
/* Buffered frame sent successfully */
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
interfacePriv->noOfbroadcastPktQueued--;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
kfree(buffered_pkt);
}
}
}
- if(!isRouterBufferEnabled(priv,UNIFI_TRAFFIC_Q_CONTENTION)) {
- while((interfacePriv->dtimActive)&& (buffered_pkt=dequeue_tx_data_pdu(priv,&interfacePriv->genericMulticastOrBroadCastFrames))) {
+ if(!isRouterBufferEnabled(priv, UNIFI_TRAFFIC_Q_CONTENTION)) {
+ while((interfacePriv->dtimActive)&& (buffered_pkt=dequeue_tx_data_pdu(priv, &interfacePriv->genericMulticastOrBroadCastFrames))) {
buffered_pkt->transmissionControl |= TRANSMISSION_CONTROL_TRIGGER_MASK;
moreData = (buffered_pkt->transmissionControl & TRANSMISSION_CONTROL_EOSP_MASK)?FALSE:TRUE;
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,NULL,moreData,FALSE)) == -ENOSPC) {
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, NULL, moreData, FALSE)) == -ENOSPC) {
/* Clear the trigger bit transmission control*/
buffered_pkt->transmissionControl &= ~(TRANSMISSION_CONTROL_TRIGGER_MASK);
/* Enqueue at the head of the queue */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_add(&buffered_pkt->q, &interfacePriv->genericMulticastOrBroadCastFrames);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
break;
} else {
if(r) {
- unifi_trace(priv,UDBG1,"send_multicast_frames: Send genericMulticastOrBroadCastFrame failed (%x, %x)\n",
+ unifi_trace(priv, UDBG1, "send_multicast_frames: Send genericMulticastOrBroadCastFrame failed (%x, %x)\n",
buffered_pkt->hostTag,
r);
unifi_net_data_free(priv, &buffered_pkt->bulkdata);
@@ -2090,26 +2090,26 @@ u8 send_multicast_frames(unifi_priv_t *priv, u16 interfaceTag)
pduSent ++;
hostTag = buffered_pkt->hostTag;
} else {
- send_vif_availibility_rsp(priv,uf_get_vif_identifier(interfacePriv->interfaceMode,interfaceTag),CSR_RC_UNSPECIFIED_FAILURE);
+ send_vif_availibility_rsp(priv, uf_get_vif_identifier(interfacePriv->interfaceMode, interfaceTag), CSR_RC_UNSPECIFIED_FAILURE);
}
}
/* Buffered frame sent successfully */
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
interfacePriv->noOfbroadcastPktQueued--;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
kfree(buffered_pkt);
}
}
}
if((interfacePriv->dtimActive == FALSE)) {
/* Record the host Tag*/
- unifi_trace(priv,UDBG2,"send_multicast_frames: Recorded hostTag of EOSP packet: = 0x%x\n",hostTag);
+ unifi_trace(priv, UDBG2, "send_multicast_frames: Recorded hostTag of EOSP packet: = 0x%x\n", hostTag);
interfacePriv->multicastPduHostTag = hostTag;
}
return pduSent;
}
#endif
-void uf_process_ma_vif_availibility_ind(unifi_priv_t *priv,u8 *sigdata,
+void uf_process_ma_vif_availibility_ind(unifi_priv_t *priv, u8 *sigdata,
u32 siglen)
{
#ifdef CSR_SUPPORT_SME
@@ -2148,15 +2148,15 @@ void uf_process_ma_vif_availibility_ind(unifi_priv_t *priv,u8 *sigdata,
/* This condition can occur because of a potential race where the
TIM is not yet reset as host is waiting for confirm but it is sent
by firmware and DTIM occurs*/
- unifi_notice(priv,"ma_vif_availibility_ind recevied for multicast but queues are empty%d\n",interfaceTag);
- send_vif_availibility_rsp(priv,ind->VirtualInterfaceIdentifier,CSR_RC_NO_BUFFERED_BROADCAST_MULTICAST_FRAMES);
+ unifi_notice(priv, "ma_vif_availibility_ind recevied for multicast but queues are empty%d\n", interfaceTag);
+ send_vif_availibility_rsp(priv, ind->VirtualInterfaceIdentifier, CSR_RC_NO_BUFFERED_BROADCAST_MULTICAST_FRAMES);
interfacePriv->dtimActive = FALSE;
if(interfacePriv->multicastPduHostTag == 0xffffffff) {
- unifi_notice(priv,"ma_vif_availibility_ind recevied for multicast but queues are empty%d\n",interfaceTag);
+ unifi_notice(priv, "ma_vif_availibility_ind recevied for multicast but queues are empty%d\n", interfaceTag);
/* This may be an extra request in very rare race conditions but it is fine as it would atleast remove the potential lock up */
if (!interfacePriv->bcTimSetReqPendingFlag)
{
- update_tim(priv,0,CSR_WIFI_TIM_RESET,interfaceTag, 0xFFFFFFFF);
+ update_tim(priv, 0, CSR_WIFI_TIM_RESET, interfaceTag, 0xFFFFFFFF);
}
else
{
@@ -2171,23 +2171,23 @@ void uf_process_ma_vif_availibility_ind(unifi_priv_t *priv,u8 *sigdata,
return;
}
if(interfacePriv->dtimActive) {
- unifi_trace(priv,UDBG2,"DTIM Occurred for already active DTIM interface %d\n",interfaceTag);
+ unifi_trace(priv, UDBG2, "DTIM Occurred for already active DTIM interface %d\n", interfaceTag);
return;
} else {
- unifi_trace(priv,UDBG2,"DTIM Occurred for interface %d\n",interfaceTag);
+ unifi_trace(priv, UDBG2, "DTIM Occurred for interface %d\n", interfaceTag);
if(list_empty(&interfacePriv->genericMulticastOrBroadCastFrames)) {
- set_eosp_transmit_ctrl(priv,&interfacePriv->genericMulticastOrBroadCastMgtFrames);
+ set_eosp_transmit_ctrl(priv, &interfacePriv->genericMulticastOrBroadCastMgtFrames);
} else {
- set_eosp_transmit_ctrl(priv,&interfacePriv->genericMulticastOrBroadCastFrames);
+ set_eosp_transmit_ctrl(priv, &interfacePriv->genericMulticastOrBroadCastFrames);
}
}
interfacePriv->dtimActive = TRUE;
- pduSent = send_multicast_frames(priv,interfaceTag);
+ pduSent = send_multicast_frames(priv, interfaceTag);
}
else {
- unifi_error(priv,"Interface switching is not supported %d\n",interfaceTag);
+ unifi_error(priv, "Interface switching is not supported %d\n", interfaceTag);
resultCode = CSR_RC_NOT_SUPPORTED;
- send_vif_availibility_rsp(priv,ind->VirtualInterfaceIdentifier,CSR_RC_NOT_SUPPORTED);
+ send_vif_availibility_rsp(priv, ind->VirtualInterfaceIdentifier, CSR_RC_NOT_SUPPORTED);
}
#endif
}
@@ -2204,12 +2204,12 @@ static u8 uf_is_more_data_for_delivery_ac(unifi_priv_t *priv, CsrWifiRouterCtrlS
if(((staRecord->powersaveMode[i]==CSR_WIFI_AC_DELIVERY_ONLY_ENABLE)
||(staRecord->powersaveMode[i]==CSR_WIFI_AC_TRIGGER_AND_DELIVERY_ENABLED))
&&(!list_empty(&staRecord->dataPdu[i]))) {
- unifi_trace(priv,UDBG2,"uf_is_more_data_for_delivery_ac: Data Available AC = %d\n", i);
+ unifi_trace(priv, UDBG2, "uf_is_more_data_for_delivery_ac: Data Available AC = %d\n", i);
return TRUE;
}
}
- unifi_trace(priv,UDBG2,"uf_is_more_data_for_delivery_ac: Data NOT Available \n");
+ unifi_trace(priv, UDBG2, "uf_is_more_data_for_delivery_ac: Data NOT Available \n");
return FALSE;
}
@@ -2222,12 +2222,12 @@ static u8 uf_is_more_data_for_usp_delivery(unifi_priv_t *priv, CsrWifiRouterCtrl
if(((staRecord->powersaveMode[i]==CSR_WIFI_AC_DELIVERY_ONLY_ENABLE)
||(staRecord->powersaveMode[i]==CSR_WIFI_AC_TRIGGER_AND_DELIVERY_ENABLED))
&&(!list_empty(&staRecord->dataPdu[i]))) {
- unifi_trace(priv,UDBG2,"uf_is_more_data_for_usp_delivery: Data Available AC = %d\n", i);
+ unifi_trace(priv, UDBG2, "uf_is_more_data_for_usp_delivery: Data Available AC = %d\n", i);
return TRUE;
}
}
- unifi_trace(priv,UDBG2,"uf_is_more_data_for_usp_delivery: Data NOT Available \n");
+ unifi_trace(priv, UDBG2, "uf_is_more_data_for_usp_delivery: Data NOT Available \n");
return FALSE;
}
@@ -2272,18 +2272,18 @@ void uf_send_buffered_data_from_delivery_ac(unifi_priv_t *priv,
return;
}
while((buffered_pkt=dequeue_tx_data_pdu(priv, txList))) {
- if((IS_DTIM_ACTIVE(interfacePriv->dtimActive,interfacePriv->multicastPduHostTag))) {
+ if((IS_DTIM_ACTIVE(interfacePriv->dtimActive, interfacePriv->multicastPduHostTag))) {
unifi_trace(priv, UDBG2, "uf_send_buffered_data_from_delivery_ac: DTIM Active, suspend UAPSD, staId: 0x%x\n",
staInfo->aid);
/* Once resume called, the U-APSD delivery operation will resume */
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
staInfo->uspSuspend = TRUE;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
/* re-queueing the packet as DTIM started */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
- list_add(&buffered_pkt->q,txList);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
+ list_add(&buffered_pkt->q, txList);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
break;
}
@@ -2315,20 +2315,20 @@ void uf_send_buffered_data_from_delivery_ac(unifi_priv_t *priv,
unifi_warning(priv, "uf_send_buffered_data_from_delivery_ac: non U-APSD !!! \n");
}
- unifi_trace(priv,UDBG2,"uf_send_buffered_data_from_delivery_ac : MoreData:%d, EOSP:%d\n",moreData,eosp);
+ unifi_trace(priv, UDBG2, "uf_send_buffered_data_from_delivery_ac : MoreData:%d, EOSP:%d\n", moreData, eosp);
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,staInfo,moreData,eosp)) == -ENOSPC) {
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, staInfo, moreData, eosp)) == -ENOSPC) {
unifi_trace(priv, UDBG2, "uf_send_buffered_data_from_delivery_ac: UASPD suspended, ENOSPC in hipQ=%x\n", queue);
/* Once resume called, the U-APSD delivery operation will resume */
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
staInfo->uspSuspend = TRUE;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
- list_add(&buffered_pkt->q,txList);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
+ list_add(&buffered_pkt->q, txList);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
priv->pausedStaHandle[queue]=(u8)(staInfo->assignedHandle);
break;
} else {
@@ -2337,17 +2337,17 @@ void uf_send_buffered_data_from_delivery_ac(unifi_priv_t *priv,
unifi_net_data_free(priv, &buffered_pkt->bulkdata);
}
kfree(buffered_pkt);
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
staInfo->noOfSpFramesSent++;
if((!moreData) || (staInfo->noOfSpFramesSent == staInfo->maxSpLength)) {
unifi_trace(priv, UDBG2, "uf_send_buffered_data_from_delivery_ac: Terminating USP\n");
staInfo->uapsdActive = FALSE;
staInfo->uspSuspend = FALSE;
staInfo->noOfSpFramesSent = 0;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
break;
}
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
}
}
unifi_trace(priv, UDBG2, "--uf_send_buffered_data_from_delivery_ac, active=%x\n", staInfo->uapsdActive);
@@ -2364,25 +2364,25 @@ void uf_send_buffered_data_from_ac(unifi_priv_t *priv,
u8 moreData = FALSE;
s8 r =0;
- unifi_trace(priv,UDBG2,"uf_send_buffered_data_from_ac :\n");
+ unifi_trace(priv, UDBG2, "uf_send_buffered_data_from_ac :\n");
- while(!isRouterBufferEnabled(priv,queue) &&
+ while(!isRouterBufferEnabled(priv, queue) &&
((buffered_pkt=dequeue_tx_data_pdu(priv, txList))!=NULL)){
buffered_pkt->transmissionControl &=
~(TRANSMISSION_CONTROL_TRIGGER_MASK|TRANSMISSION_CONTROL_EOSP_MASK);
- unifi_trace(priv,UDBG3,"uf_send_buffered_data_from_ac : MoreData:%d, EOSP:%d\n",moreData,eosp);
+ unifi_trace(priv, UDBG3, "uf_send_buffered_data_from_ac : MoreData:%d, EOSP:%d\n", moreData, eosp);
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,staInfo,moreData,eosp)) == -ENOSPC) {
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, staInfo, moreData, eosp)) == -ENOSPC) {
/* Enqueue at the head of the queue */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
- list_add(&buffered_pkt->q,txList);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
+ list_add(&buffered_pkt->q, txList);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
if(staInfo != NULL){
priv->pausedStaHandle[queue]=(u8)(staInfo->assignedHandle);
}
- unifi_trace(priv,UDBG3," uf_send_buffered_data_from_ac: PDU sending failed .. no space for queue %d \n",queue);
+ unifi_trace(priv, UDBG3, " uf_send_buffered_data_from_ac: PDU sending failed .. no space for queue %d \n", queue);
} else {
if(r){
/* the PDU failed where we can't do any thing so free the storage */
@@ -2394,10 +2394,10 @@ void uf_send_buffered_data_from_ac(unifi_priv_t *priv,
}
-void uf_send_buffered_frames(unifi_priv_t *priv,unifi_TrafficQueue q)
+void uf_send_buffered_frames(unifi_priv_t *priv, unifi_TrafficQueue q)
{
u16 interfaceTag = GET_ACTIVE_INTERFACE_TAG(priv);
- u32 startIndex=0,endIndex=0;
+ u32 startIndex=0, endIndex=0;
CsrWifiRouterCtrlStaInfo_t * staInfo = NULL;
u8 queue;
u8 moreData = FALSE;
@@ -2412,14 +2412,14 @@ void uf_send_buffered_frames(unifi_priv_t *priv,unifi_TrafficQueue q)
if(interfacePriv->dtimActive) {
/* this function updates dtimActive*/
- send_multicast_frames(priv,interfaceTag);
+ send_multicast_frames(priv, interfaceTag);
if(!interfacePriv->dtimActive) {
moreData = (!list_empty(&interfacePriv->genericMulticastOrBroadCastMgtFrames) ||
!list_empty(&interfacePriv->genericMulticastOrBroadCastFrames));
if(!moreData) {
if (!interfacePriv->bcTimSetReqPendingFlag)
{
- update_tim(priv,0,CSR_WIFI_TIM_RESET,interfaceTag, 0XFFFFFFFF);
+ update_tim(priv, 0, CSR_WIFI_TIM_RESET, interfaceTag, 0XFFFFFFFF);
}
else
{
@@ -2436,8 +2436,8 @@ void uf_send_buffered_frames(unifi_priv_t *priv,unifi_TrafficQueue q)
!list_empty(&interfacePriv->genericMulticastOrBroadCastFrames));
if(!moreData) {
/* This should never happen but if it happens, we need a way out */
- unifi_error(priv,"ERROR: No More Data but DTIM is active sending Response\n");
- send_vif_availibility_rsp(priv,uf_get_vif_identifier(interfacePriv->interfaceMode,interfaceTag),CSR_RC_NO_BUFFERED_BROADCAST_MULTICAST_FRAMES);
+ unifi_error(priv, "ERROR: No More Data but DTIM is active sending Response\n");
+ send_vif_availibility_rsp(priv, uf_get_vif_identifier(interfacePriv->interfaceMode, interfaceTag), CSR_RC_NO_BUFFERED_BROADCAST_MULTICAST_FRAMES);
interfacePriv->dtimActive = FALSE;
}
}
@@ -2450,9 +2450,9 @@ void uf_send_buffered_frames(unifi_priv_t *priv,unifi_TrafficQueue q)
if(queue == UNIFI_TRAFFIC_Q_VO) {
- unifi_trace(priv,UDBG2,"uf_send_buffered_frames : trying mgt from queue=%d\n",queue);
+ unifi_trace(priv, UDBG2, "uf_send_buffered_frames : trying mgt from queue=%d\n", queue);
for(startIndex= 0; startIndex < UNIFI_MAX_CONNECTIONS;startIndex++) {
- staInfo = CsrWifiRouterCtrlGetStationRecordFromHandle(priv,startIndex,interfaceTag);
+ staInfo = CsrWifiRouterCtrlGetStationRecordFromHandle(priv, startIndex, interfaceTag);
if(!staInfo ) {
continue;
} else if((staInfo->currentPeerState == CSR_WIFI_ROUTER_CTRL_PEER_CONNECTED_POWER_SAVE)
@@ -2464,31 +2464,31 @@ void uf_send_buffered_frames(unifi_priv_t *priv,unifi_TrafficQueue q)
&&(staInfo->uapsdActive == FALSE)){
/*Non-UAPSD case push the management frames out*/
if(!list_empty(&staInfo->mgtFrames)){
- uf_send_buffered_data_from_ac(priv,staInfo, UNIFI_TRAFFIC_Q_VO, &staInfo->mgtFrames);
+ uf_send_buffered_data_from_ac(priv, staInfo, UNIFI_TRAFFIC_Q_VO, &staInfo->mgtFrames);
}
}
- if(isRouterBufferEnabled(priv,queue)) {
- unifi_notice(priv,"uf_send_buffered_frames : No space Left for queue = %d\n",queue);
+ if(isRouterBufferEnabled(priv, queue)) {
+ unifi_notice(priv, "uf_send_buffered_frames : No space Left for queue = %d\n", queue);
break;
}
}
/*push generic management frames out*/
if(!list_empty(&interfacePriv->genericMgtFrames)) {
- unifi_trace(priv,UDBG2,"uf_send_buffered_frames : trying generic mgt from queue=%d\n",queue);
- uf_send_buffered_data_from_ac(priv,staInfo, UNIFI_TRAFFIC_Q_VO, &interfacePriv->genericMgtFrames);
+ unifi_trace(priv, UDBG2, "uf_send_buffered_frames : trying generic mgt from queue=%d\n", queue);
+ uf_send_buffered_data_from_ac(priv, staInfo, UNIFI_TRAFFIC_Q_VO, &interfacePriv->genericMgtFrames);
}
}
- unifi_trace(priv,UDBG2,"uf_send_buffered_frames : Resume called for Queue=%d\n",queue);
- unifi_trace(priv,UDBG2,"uf_send_buffered_frames : start=%d end=%d\n",startIndex,endIndex);
+ unifi_trace(priv, UDBG2, "uf_send_buffered_frames : Resume called for Queue=%d\n", queue);
+ unifi_trace(priv, UDBG2, "uf_send_buffered_frames : start=%d end=%d\n", startIndex, endIndex);
startIndex = priv->pausedStaHandle[queue];
endIndex = (startIndex + UNIFI_MAX_CONNECTIONS -1) % UNIFI_MAX_CONNECTIONS;
while(startIndex != endIndex) {
- staInfo = CsrWifiRouterCtrlGetStationRecordFromHandle(priv,startIndex,interfaceTag);
+ staInfo = CsrWifiRouterCtrlGetStationRecordFromHandle(priv, startIndex, interfaceTag);
if(!staInfo) {
startIndex ++;
if(startIndex >= UNIFI_MAX_CONNECTIONS) {
@@ -2504,7 +2504,7 @@ void uf_send_buffered_frames(unifi_priv_t *priv,unifi_TrafficQueue q)
continue;
}
/* Peer is active or U-APSD is active so send PDUs to the peer */
- unifi_trace(priv,UDBG2,"uf_send_buffered_frames : trying data from queue=%d\n",queue);
+ unifi_trace(priv, UDBG2, "uf_send_buffered_frames : trying data from queue=%d\n", queue);
if((staInfo != NULL)&&(staInfo->currentPeerState == CSR_WIFI_ROUTER_CTRL_PEER_CONNECTED_ACTIVE)
@@ -2520,7 +2520,7 @@ void uf_send_buffered_frames(unifi_priv_t *priv,unifi_TrafficQueue q)
startIndex = 0;
}
}
- if(isRouterBufferEnabled(priv,queue)) {
+ if(isRouterBufferEnabled(priv, queue)) {
priv->pausedStaHandle[queue] = endIndex;
} else {
priv->pausedStaHandle[queue] = 0;
@@ -2561,7 +2561,7 @@ u8 uf_is_more_data_for_non_delivery_ac(CsrWifiRouterCtrlStaInfo_t *staRecord)
}
-int uf_process_station_records_for_sending_data(unifi_priv_t *priv,u16 interfaceTag,
+int uf_process_station_records_for_sending_data(unifi_priv_t *priv, u16 interfaceTag,
CsrWifiRouterCtrlStaInfo_t *srcStaInfo,
CsrWifiRouterCtrlStaInfo_t *dstStaInfo)
{
@@ -2647,10 +2647,10 @@ static void uf_handle_uspframes_delivery(unifi_priv_t * priv, CsrWifiRouterCtrlS
return;
}
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
staInfo->uapsdActive = TRUE;
staInfo->uspSuspend = FALSE;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
if(((staInfo->powersaveMode[UNIFI_TRAFFIC_Q_VO]==CSR_WIFI_AC_TRIGGER_AND_DELIVERY_ENABLED)||
(staInfo->powersaveMode[UNIFI_TRAFFIC_Q_VO]==CSR_WIFI_AC_DELIVERY_ONLY_ENABLE))
@@ -2666,9 +2666,9 @@ static void uf_handle_uspframes_delivery(unifi_priv_t * priv, CsrWifiRouterCtrlS
* NOTE: If we have sent Mgt frame also, we must send QNULL followed to terminate USP
*/
if (!staInfo->uspSuspend) {
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
staInfo->uapsdActive = FALSE;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
unifi_trace(priv, UDBG2, "uf_handle_uspframes_delivery: sending QNull for trigger\n");
uf_send_qos_null(priv, interfaceTag, staInfo->peerMacAddress.a, (CSR_PRIORITY) staInfo->triggerFramePriority, staInfo);
@@ -2687,12 +2687,12 @@ static void uf_handle_uspframes_delivery(unifi_priv_t * priv, CsrWifiRouterCtrlS
}
if ((!staInfo->uapsdActive) ||
- (staInfo->uspSuspend && IS_DTIM_ACTIVE(interfacePriv->dtimActive,interfacePriv->multicastPduHostTag))) {
+ (staInfo->uspSuspend && IS_DTIM_ACTIVE(interfacePriv->dtimActive, interfacePriv->multicastPduHostTag))) {
/* If DTIM active found on one AC, No need to parse the remaining AC's
* as USP suspended. Break out of loop
*/
unifi_trace(priv, UDBG2, "uf_handle_uspframes_delivery: suspend=%x, DTIM=%x, USP terminated=%s\n",
- staInfo->uspSuspend, IS_DTIM_ACTIVE(interfacePriv->dtimActive,interfacePriv->multicastPduHostTag),
+ staInfo->uspSuspend, IS_DTIM_ACTIVE(interfacePriv->dtimActive, interfacePriv->multicastPduHostTag),
staInfo->uapsdActive?"NO":"YES");
break;
}
@@ -2704,7 +2704,7 @@ static void uf_handle_uspframes_delivery(unifi_priv_t * priv, CsrWifiRouterCtrlS
*/
is_all_ac_deliver_enabled_and_moredata(staInfo, &allDeliveryEnabled, &dataAvailable);
if ((allDeliveryEnabled && !dataAvailable)) {
- if ((staInfo->timSet != CSR_WIFI_TIM_RESET) || (staInfo->timSet != CSR_WIFI_TIM_RESETTING)) {
+ if ((staInfo->timSet != CSR_WIFI_TIM_RESET) && (staInfo->timSet != CSR_WIFI_TIM_RESETTING)) {
staInfo->updateTimReqQueued = (u8) CSR_WIFI_TIM_RESET;
unifi_trace(priv, UDBG4, " --uf_handle_uspframes_delivery, UAPSD timset\n");
if (!staInfo->timRequestPendingFlag) {
@@ -2734,9 +2734,9 @@ void uf_process_wmm_deliver_ac_uapsd(unifi_priv_t * priv,
if((srcStaInfo->powersaveMode[priority_q]==CSR_WIFI_AC_TRIGGER_ONLY_ENABLED)
||(srcStaInfo->powersaveMode[priority_q]==CSR_WIFI_AC_TRIGGER_AND_DELIVERY_ENABLED)) {
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
srcStaInfo->triggerFramePriority = priority;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
unifi_trace(priv, UDBG2, "uf_process_wmm_deliver_ac_uapsd: trigger frame, Begin U-APSD, triggerQ=%x\n", priority_q);
uf_handle_uspframes_delivery(priv, srcStaInfo, interfaceTag);
}
@@ -2744,7 +2744,7 @@ void uf_process_wmm_deliver_ac_uapsd(unifi_priv_t * priv,
}
-void uf_send_qos_null(unifi_priv_t * priv,u16 interfaceTag, const u8 *da,CSR_PRIORITY priority,CsrWifiRouterCtrlStaInfo_t * srcStaInfo)
+void uf_send_qos_null(unifi_priv_t * priv, u16 interfaceTag, const u8 *da, CSR_PRIORITY priority, CsrWifiRouterCtrlStaInfo_t * srcStaInfo)
{
bulk_data_param_t bulkdata;
CsrResult csrResult;
@@ -2806,14 +2806,14 @@ void uf_send_qos_null(unifi_priv_t * priv,u16 interfaceTag, const u8 *da,CSR_PRI
r = ul_send_signal_unpacked(priv, &signal, &bulkdata);
if(r) {
- unifi_error(priv, "failed to send QOS data null packet result: %d\n",r);
+ unifi_error(priv, "failed to send QOS data null packet result: %d\n", r);
unifi_net_data_free(priv, &bulkdata.d[0]);
}
return;
}
-void uf_send_nulldata(unifi_priv_t * priv,u16 interfaceTag, const u8 *da,CSR_PRIORITY priority,CsrWifiRouterCtrlStaInfo_t * srcStaInfo)
+void uf_send_nulldata(unifi_priv_t * priv, u16 interfaceTag, const u8 *da, CSR_PRIORITY priority, CsrWifiRouterCtrlStaInfo_t * srcStaInfo)
{
bulk_data_param_t bulkdata;
CsrResult csrResult;
@@ -2882,14 +2882,14 @@ void uf_send_nulldata(unifi_priv_t * priv,u16 interfaceTag, const u8 *da,CSR_PRI
if(r == -ENOSPC) {
unifi_trace(priv, UDBG1, "uf_send_nulldata: ENOSPC Requeue the Null frame\n");
enque_tx_data_pdu(priv, &bulkdata, &srcStaInfo->dataPdu[priority_q], &signal, 1);
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
srcStaInfo->noOfPktQueued++;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
}
if(r && r != -ENOSPC){
- unifi_error(priv, "uf_send_nulldata: Failed to send Null frame Error = %d\n",r);
+ unifi_error(priv, "uf_send_nulldata: Failed to send Null frame Error = %d\n", r);
unifi_net_data_free(priv, &bulkdata.d[0]);
srcStaInfo->nullDataHostTag = INVALID_HOST_TAG;
}
@@ -2939,7 +2939,7 @@ u8 uf_check_broadcast_bssid(unifi_priv_t *priv, const bulk_data_param_t *bulkdat
u8 uf_process_pm_bit_for_peer(unifi_priv_t * priv, CsrWifiRouterCtrlStaInfo_t * srcStaInfo,
- u8 pmBit,u16 interfaceTag)
+ u8 pmBit, u16 interfaceTag)
{
u8 moreData = FALSE;
u8 powerSaveChanged = FALSE;
@@ -2955,22 +2955,22 @@ u8 uf_process_pm_bit_for_peer(unifi_priv_t * priv, CsrWifiRouterCtrlStaInfo_t *
if(srcStaInfo->currentPeerState == CSR_WIFI_ROUTER_CTRL_PEER_CONNECTED_ACTIVE) {
/* disable the preemption */
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
srcStaInfo->currentPeerState =CSR_WIFI_ROUTER_CTRL_PEER_CONNECTED_POWER_SAVE;
powerSaveChanged = TRUE;
/* enable the preemption */
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
} else {
return powerSaveChanged;
}
} else {
if(srcStaInfo->currentPeerState == CSR_WIFI_ROUTER_CTRL_PEER_CONNECTED_POWER_SAVE) {
/* disable the preemption */
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
srcStaInfo->currentPeerState = CSR_WIFI_ROUTER_CTRL_PEER_CONNECTED_ACTIVE;
powerSaveChanged = TRUE;
/* enable the preemption */
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
}else {
return powerSaveChanged;
}
@@ -2978,10 +2978,10 @@ u8 uf_process_pm_bit_for_peer(unifi_priv_t * priv, CsrWifiRouterCtrlStaInfo_t *
if(srcStaInfo->currentPeerState == CSR_WIFI_ROUTER_CTRL_PEER_CONNECTED_ACTIVE) {
- unifi_trace(priv,UDBG3, "Peer with AID = %d is active now\n",srcStaInfo->aid);
- process_peer_active_transition(priv,srcStaInfo,interfaceTag);
+ unifi_trace(priv, UDBG3, "Peer with AID = %d is active now\n", srcStaInfo->aid);
+ process_peer_active_transition(priv, srcStaInfo, interfaceTag);
} else {
- unifi_trace(priv,UDBG3, "Peer with AID = %d is in PS Now\n",srcStaInfo->aid);
+ unifi_trace(priv, UDBG3, "Peer with AID = %d is in PS Now\n", srcStaInfo->aid);
/* Set TIM if needed */
if(!srcStaInfo->wmmOrQosEnabled) {
moreData = (!list_empty(&srcStaInfo->mgtFrames) ||
@@ -2990,7 +2990,7 @@ u8 uf_process_pm_bit_for_peer(unifi_priv_t * priv, CsrWifiRouterCtrlStaInfo_t *
if(moreData && (srcStaInfo->timSet == CSR_WIFI_TIM_RESET)) {
unifi_trace(priv, UDBG3, "This condition should not occur\n");
if (!srcStaInfo->timRequestPendingFlag){
- update_tim(priv,srcStaInfo->aid,1,interfaceTag, srcStaInfo->assignedHandle);
+ update_tim(priv, srcStaInfo->aid, 1, interfaceTag, srcStaInfo->assignedHandle);
}
else
{
@@ -3013,7 +3013,7 @@ u8 uf_process_pm_bit_for_peer(unifi_priv_t * priv, CsrWifiRouterCtrlStaInfo_t *
if(moreData && (srcStaInfo->timSet == CSR_WIFI_TIM_RESET)) {
if (!srcStaInfo->timRequestPendingFlag){
- update_tim(priv,srcStaInfo->aid,1,interfaceTag, srcStaInfo->assignedHandle);
+ update_tim(priv, srcStaInfo->aid, 1, interfaceTag, srcStaInfo->assignedHandle);
}
else
{
@@ -3033,7 +3033,7 @@ u8 uf_process_pm_bit_for_peer(unifi_priv_t * priv, CsrWifiRouterCtrlStaInfo_t *
-void uf_process_ps_poll(unifi_priv_t *priv,u8* sa,u8* da,u8 pmBit,u16 interfaceTag)
+void uf_process_ps_poll(unifi_priv_t *priv, u8* sa, u8* da, u8 pmBit, u16 interfaceTag)
{
CsrWifiRouterCtrlStaInfo_t *staRecord =
CsrWifiRouterCtrlGetStationRecordFromPeerMacAddress(priv, sa, interfaceTag);
@@ -3046,27 +3046,27 @@ void uf_process_ps_poll(unifi_priv_t *priv,u8* sa,u8* da,u8 pmBit,u16 interfaceT
unifi_trace(priv, UDBG3, "entering uf_process_ps_poll\n");
if(!staRecord) {
- memcpy(peerMacAddress.a,sa,ETH_ALEN);
+ memcpy(peerMacAddress.a, sa, ETH_ALEN);
unifi_trace(priv, UDBG3, "In uf_process_ps_poll, sta record not found:unexpected frame addr = %x:%x:%x:%x:%x:%x\n",
- sa[0], sa[1],sa[2], sa[3], sa[4],sa[5]);
- CsrWifiRouterCtrlUnexpectedFrameIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0,interfaceTag,peerMacAddress);
+ sa[0], sa[1], sa[2], sa[3], sa[4], sa[5]);
+ CsrWifiRouterCtrlUnexpectedFrameIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0, interfaceTag, peerMacAddress);
return;
}
- uf_process_pm_bit_for_peer(priv,staRecord,pmBit,interfaceTag);
+ uf_process_pm_bit_for_peer(priv, staRecord, pmBit, interfaceTag);
/* Update station last activity time */
staRecord->activity_flag = TRUE;
/* This should not change the PM bit as PS-POLL has PM bit always set */
if(!pmBit) {
- unifi_notice (priv," PM bit reset in PS-POLL\n");
+ unifi_notice (priv, " PM bit reset in PS-POLL\n");
return;
}
- if(IS_DTIM_ACTIVE(interfacePriv->dtimActive,interfacePriv->multicastPduHostTag)) {
+ if(IS_DTIM_ACTIVE(interfacePriv->dtimActive, interfacePriv->multicastPduHostTag)) {
/* giving more priority to multicast packets so dropping ps-poll*/
- unifi_notice (priv," multicast transmission is going on so don't take action on PS-POLL\n");
+ unifi_notice (priv, " multicast transmission is going on so don't take action on PS-POLL\n");
return;
}
@@ -3078,13 +3078,13 @@ void uf_process_ps_poll(unifi_priv_t *priv,u8* sa,u8* da,u8 pmBit,u16 interfaceT
!list_empty(&staRecord->mgtFrames));
buffered_pkt->transmissionControl |= (TRANSMISSION_CONTROL_TRIGGER_MASK | TRANSMISSION_CONTROL_EOSP_MASK);
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,staRecord,moreData,FALSE)) == -ENOSPC) {
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, staRecord, moreData, FALSE)) == -ENOSPC) {
/* Clear the trigger bit transmission control*/
buffered_pkt->transmissionControl &= ~(TRANSMISSION_CONTROL_TRIGGER_MASK | TRANSMISSION_CONTROL_EOSP_MASK);
/* Enqueue at the head of the queue */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_add(&buffered_pkt->q, &staRecord->mgtFrames);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
unifi_trace(priv, UDBG1, "(ENOSPC) PS-POLL received : PDU sending failed \n");
priv->pausedStaHandle[3]=(u8)(staRecord->assignedHandle);
} else {
@@ -3101,13 +3101,13 @@ void uf_process_ps_poll(unifi_priv_t *priv,u8* sa,u8* da,u8 pmBit,u16 interfaceT
!list_empty(&staRecord->dataPdu[UNIFI_TRAFFIC_Q_VO]));
buffered_pkt->transmissionControl |= (TRANSMISSION_CONTROL_TRIGGER_MASK | TRANSMISSION_CONTROL_EOSP_MASK);
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,staRecord,moreData,FALSE)) == -ENOSPC) {
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, staRecord, moreData, FALSE)) == -ENOSPC) {
/* Clear the trigger bit transmission control*/
buffered_pkt->transmissionControl &= ~(TRANSMISSION_CONTROL_TRIGGER_MASK | TRANSMISSION_CONTROL_EOSP_MASK);
/* Enqueue at the head of the queue */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_add(&buffered_pkt->q, &staRecord->dataPdu[UNIFI_TRAFFIC_Q_VO]);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
priv->pausedStaHandle[3]=(u8)(staRecord->assignedHandle);
unifi_trace(priv, UDBG1, "(ENOSPC) PS-POLL received : PDU sending failed \n");
} else {
@@ -3123,13 +3123,13 @@ void uf_process_ps_poll(unifi_priv_t *priv,u8* sa,u8* da,u8 pmBit,u16 interfaceT
moreData = !list_empty(&staRecord->dataPdu[UNIFI_TRAFFIC_Q_CONTENTION]);
buffered_pkt->transmissionControl |= (TRANSMISSION_CONTROL_TRIGGER_MASK | TRANSMISSION_CONTROL_EOSP_MASK);
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,staRecord,moreData,FALSE)) == -ENOSPC) {
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, staRecord, moreData, FALSE)) == -ENOSPC) {
/* Clear the trigger bit transmission control*/
buffered_pkt->transmissionControl &= ~(TRANSMISSION_CONTROL_TRIGGER_MASK | TRANSMISSION_CONTROL_EOSP_MASK);
/* Enqueue at the head of the queue */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_add(&buffered_pkt->q, &staRecord->dataPdu[UNIFI_TRAFFIC_Q_CONTENTION]);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
priv->pausedStaHandle[0]=(u8)(staRecord->assignedHandle);
unifi_trace(priv, UDBG1, "(ENOSPC) PS-POLL received : PDU sending failed \n");
} else {
@@ -3150,7 +3150,7 @@ void uf_process_ps_poll(unifi_priv_t *priv,u8* sa,u8* da,u8 pmBit,u16 interfaceT
if(!moreData && (staRecord->timSet == CSR_WIFI_TIM_SET)) {
unifi_trace(priv, UDBG3, "more data = NULL, set tim to 0 in uf_process_ps_poll\n");
if (!staRecord->timRequestPendingFlag){
- update_tim(priv,staRecord->aid,0,interfaceTag, staRecord->assignedHandle);
+ update_tim(priv, staRecord->aid, 0, interfaceTag, staRecord->assignedHandle);
}
else
{
@@ -3165,7 +3165,7 @@ void uf_process_ps_poll(unifi_priv_t *priv,u8* sa,u8* da,u8 pmBit,u16 interfaceT
} else {
u8 allDeliveryEnabled = 0, dataAvailable = 0;
- unifi_trace(priv, UDBG3,"Qos Support station.Processing PS-Poll\n");
+ unifi_trace(priv, UDBG3, "Qos Support station.Processing PS-Poll\n");
/*Send Data From Management Frames*/
/* Priority orders for delivering the buffered packets are
@@ -3179,7 +3179,7 @@ void uf_process_ps_poll(unifi_priv_t *priv,u8* sa,u8* da,u8 pmBit,u16 interfaceT
if (allDeliveryEnabled) {
unifi_trace(priv, UDBG3, "uf_process_ps_poll: All ACs are delivery enable so Sending QOS Null in response of Ps-poll\n");
- uf_send_qos_null(priv,interfaceTag,sa,CSR_QOS_UP0,staRecord);
+ uf_send_qos_null(priv, interfaceTag, sa, CSR_QOS_UP0, staRecord);
return;
}
@@ -3192,13 +3192,13 @@ void uf_process_ps_poll(unifi_priv_t *priv,u8* sa,u8* da,u8 pmBit,u16 interfaceT
buffered_pkt->transmissionControl |= (TRANSMISSION_CONTROL_TRIGGER_MASK | TRANSMISSION_CONTROL_EOSP_MASK);
/* Last parameter is EOSP & its false always for PS-POLL processing */
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,staRecord,moreData,FALSE)) == -ENOSPC) {
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, staRecord, moreData, FALSE)) == -ENOSPC) {
/* Clear the trigger bit transmission control*/
buffered_pkt->transmissionControl &= ~(TRANSMISSION_CONTROL_TRIGGER_MASK | TRANSMISSION_CONTROL_EOSP_MASK);
/* Enqueue at the head of the queue */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_add(&buffered_pkt->q, &staRecord->mgtFrames);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
priv->pausedStaHandle[0]=(u8)(staRecord->assignedHandle);
unifi_trace(priv, UDBG1, "(ENOSPC) PS-POLL received : PDU sending failed \n");
} else {
@@ -3227,13 +3227,13 @@ void uf_process_ps_poll(unifi_priv_t *priv,u8* sa,u8* da,u8 pmBit,u16 interfaceT
buffered_pkt->transmissionControl |= (TRANSMISSION_CONTROL_TRIGGER_MASK | TRANSMISSION_CONTROL_EOSP_MASK);
/* Last parameter is EOSP & its false always for PS-POLL processing */
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,staRecord,moreData,FALSE)) == -ENOSPC) {
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, staRecord, moreData, FALSE)) == -ENOSPC) {
/* Clear the trigger bit transmission control*/
buffered_pkt->transmissionControl &= ~(TRANSMISSION_CONTROL_TRIGGER_MASK | TRANSMISSION_CONTROL_EOSP_MASK);
/* Enqueue at the head of the queue */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_add(&buffered_pkt->q, &staRecord->dataPdu[i]);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
priv->pausedStaHandle[0]=(u8)(staRecord->assignedHandle);
unifi_trace(priv, UDBG1, "(ENOSPC) PS-POLL received : PDU sending failed \n");
} else {
@@ -3256,7 +3256,7 @@ void uf_process_ps_poll(unifi_priv_t *priv,u8* sa,u8* da,u8 pmBit,u16 interfaceT
if(!moreData && (staRecord->timSet == CSR_WIFI_TIM_SET)) {
unifi_trace(priv, UDBG3, "more data = NULL, set tim to 0 in uf_process_ps_poll\n");
if (!staRecord->timRequestPendingFlag){
- update_tim(priv,staRecord->aid,0,interfaceTag, staRecord->assignedHandle);
+ update_tim(priv, staRecord->aid, 0, interfaceTag, staRecord->assignedHandle);
}
else
{
@@ -3311,7 +3311,7 @@ void uf_prepare_send_cfm_list_for_queued_pkts(unifi_priv_t * priv,
struct list_head *placeHolder;
unsigned long lock_flags;
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
/* Search through the list and if confirmation required for any frames,
add it to the send_cfm list */
@@ -3337,7 +3337,7 @@ void uf_prepare_send_cfm_list_for_queued_pkts(unifi_priv_t * priv,
}
}
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
}
@@ -3352,7 +3352,7 @@ void uf_flush_list(unifi_priv_t * priv, struct list_head * list)
unifi_trace(priv, UDBG5, "entering the uf_flush_list \n");
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
/* go through list, delete & free memory */
list_for_each_safe(listHead, placeHolder, list) {
tx_q_item = list_entry(listHead, tx_buffered_packets_t, q);
@@ -3378,7 +3378,7 @@ void uf_flush_list(unifi_priv_t * priv, struct list_head * list)
priv->noOfPktQueuedInDriver--;
}
}
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
}
tx_buffered_packets_t *dequeue_tx_data_pdu(unifi_priv_t *priv, struct list_head *txList)
@@ -3403,13 +3403,13 @@ tx_buffered_packets_t *dequeue_tx_data_pdu(unifi_priv_t *priv, struct list_head
}
/* return first node after header, & delete from the list && atleast one item exist */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_for_each_safe(listHead, placeHolder, txList) {
tx_q_item = list_entry(listHead, tx_buffered_packets_t, q);
list_del(listHead);
break;
}
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
if (tx_q_item) {
unifi_trace(priv, UDBG5,
@@ -3440,20 +3440,20 @@ CsrWifiRouterCtrlStaInfo_t *CsrWifiRouterCtrlGetStationRecordFromPeerMacAddress(
interfacePriv = priv->interfacePriv[interfaceTag];
/* disable the preemption until station record is fetched */
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
for (i = 0; i < UNIFI_MAX_CONNECTIONS; i++) {
if (interfacePriv->staInfo[i]!= NULL) {
if (!memcmp(((CsrWifiRouterCtrlStaInfo_t *) (interfacePriv->staInfo[i]))->peerMacAddress.a, peerMacAddress, ETH_ALEN)) {
/* enable the preemption as station record is fetched */
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
unifi_trace(priv, UDBG5, "peer entry found in station record\n");
return ((CsrWifiRouterCtrlStaInfo_t *) (interfacePriv->staInfo[i]));
}
}
}
/* enable the preemption as station record is fetched */
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
unifi_trace(priv, UDBG5, "peer entry not found in station record\n");
return NULL;
}
@@ -3487,7 +3487,7 @@ void uf_check_inactivity(unifi_priv_t *priv, u16 interfaceTag, u32 currentTime)
return;
}
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
/* Go through the list of stations to check for inactivity */
for(i = 0; i < UNIFI_MAX_CONNECTIONS; i++) {
staInfo = CsrWifiRouterCtrlGetStationRecordFromHandle(priv, i, interfaceTag);
@@ -3502,7 +3502,7 @@ void uf_check_inactivity(unifi_priv_t *priv, u16 interfaceTag, u32 currentTime)
elapsedTime = (currentTime >= staInfo->lastActivity)?
(currentTime - staInfo->lastActivity):
(~((u32)0) - staInfo->lastActivity + currentTime);
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
if (elapsedTime > MAX_INACTIVITY_INTERVAL) {
memcpy((u8*)&peerMacAddress, (u8*)&staInfo->peerMacAddress, sizeof(CsrWifiMacAddress));
@@ -3545,7 +3545,7 @@ void uf_update_sta_activity(unifi_priv_t *priv, u16 interfaceTag, const u8 *peer
return;
}
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
/* Update activity */
staInfo->lastActivity = currentTime;
@@ -3558,7 +3558,7 @@ void uf_update_sta_activity(unifi_priv_t *priv, u16 interfaceTag, const u8 *peer
(currentTime - interfacePriv->last_inactivity_check):
(~((u32)0) - interfacePriv->last_inactivity_check + currentTime);
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
/* Check if it is time to run the inactivity handler */
if (elapsedTime > INACTIVITY_CHECK_INTERVAL) {
@@ -3572,19 +3572,19 @@ void resume_unicast_buffered_frames(unifi_priv_t *priv, u16 interfaceTag)
u8 i;
int j;
tx_buffered_packets_t * buffered_pkt = NULL;
- u8 hipslotFree[4] = {TRUE,TRUE,TRUE,TRUE};
+ u8 hipslotFree[4] = {TRUE, TRUE, TRUE, TRUE};
int r;
unsigned long lock_flags;
- while(!isRouterBufferEnabled(priv,3) &&
- ((buffered_pkt=dequeue_tx_data_pdu(priv,&interfacePriv->genericMgtFrames))!=NULL)) {
+ while(!isRouterBufferEnabled(priv, 3) &&
+ ((buffered_pkt=dequeue_tx_data_pdu(priv, &interfacePriv->genericMgtFrames))!=NULL)) {
buffered_pkt->transmissionControl &=
~(TRANSMISSION_CONTROL_TRIGGER_MASK|TRANSMISSION_CONTROL_EOSP_MASK);
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,NULL,0,FALSE)) == -ENOSPC) {
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, NULL, 0, FALSE)) == -ENOSPC) {
/* Enqueue at the head of the queue */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_add(&buffered_pkt->q, &interfacePriv->genericMgtFrames);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
hipslotFree[3]=FALSE;
break;
}else {
@@ -3606,12 +3606,12 @@ void resume_unicast_buffered_frames(unifi_priv_t *priv, u16 interfaceTag)
while((( TRUE == hipslotFree[3] ) && (buffered_pkt=dequeue_tx_data_pdu(priv, &staInfo->mgtFrames)))) {
buffered_pkt->transmissionControl &=
~(TRANSMISSION_CONTROL_TRIGGER_MASK|TRANSMISSION_CONTROL_EOSP_MASK);
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,staInfo,0,FALSE)) == -ENOSPC) {
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, staInfo, 0, FALSE)) == -ENOSPC) {
unifi_trace(priv, UDBG3, "(ENOSPC) in resume_unicast_buffered_frames:: hip slots are full for voice queue\n");
/* Enqueue at the head of the queue */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_add(&buffered_pkt->q, &staInfo->mgtFrames);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
priv->pausedStaHandle[3]=(u8)(staInfo->assignedHandle);
hipslotFree[3] = FALSE;
break;
@@ -3632,11 +3632,11 @@ void resume_unicast_buffered_frames(unifi_priv_t *priv, u16 interfaceTag)
while((buffered_pkt=dequeue_tx_data_pdu(priv, &staInfo->dataPdu[j]))) {
buffered_pkt->transmissionControl &=
~(TRANSMISSION_CONTROL_TRIGGER_MASK|TRANSMISSION_CONTROL_EOSP_MASK);
- if((r=frame_and_send_queued_pdu(priv,buffered_pkt,staInfo,0,FALSE)) == -ENOSPC) {
+ if((r=frame_and_send_queued_pdu(priv, buffered_pkt, staInfo, 0, FALSE)) == -ENOSPC) {
/* Enqueue at the head of the queue */
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_add(&buffered_pkt->q, &staInfo->dataPdu[j]);
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
priv->pausedStaHandle[j]=(u8)(staInfo->assignedHandle);
hipslotFree[j]=FALSE;
break;
@@ -3653,7 +3653,7 @@ void resume_unicast_buffered_frames(unifi_priv_t *priv, u16 interfaceTag)
}
}
}
-void update_eosp_to_head_of_broadcast_list_head(unifi_priv_t *priv,u16 interfaceTag)
+void update_eosp_to_head_of_broadcast_list_head(unifi_priv_t *priv, u16 interfaceTag)
{
netInterface_priv_t *interfacePriv = priv->interfacePriv[interfaceTag];
@@ -3668,15 +3668,15 @@ void update_eosp_to_head_of_broadcast_list_head(unifi_priv_t *priv,u16 interface
* because we have received any mgmt packet so it should not hold for long time
* peer may time out.
*/
- spin_lock_irqsave(&priv->tx_q_lock,lock_flags);
+ spin_lock_irqsave(&priv->tx_q_lock, lock_flags);
list_for_each_safe(listHead, placeHolder, &interfacePriv->genericMulticastOrBroadCastFrames) {
tx_q_item = list_entry(listHead, tx_buffered_packets_t, q);
tx_q_item->transmissionControl |= TRANSMISSION_CONTROL_EOSP_MASK;
tx_q_item->transmissionControl = (tx_q_item->transmissionControl & ~(CSR_NO_CONFIRM_REQUIRED));
- unifi_trace(priv, UDBG1,"updating eosp for list Head hostTag:= 0x%x ",tx_q_item->hostTag);
+ unifi_trace(priv, UDBG1, "updating eosp for list Head hostTag:= 0x%x ", tx_q_item->hostTag);
break;
}
- spin_unlock_irqrestore(&priv->tx_q_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->tx_q_lock, lock_flags);
}
}
@@ -3692,7 +3692,7 @@ void update_eosp_to_head_of_broadcast_list_head(unifi_priv_t *priv,u16 interface
* interfaceTag For which resume should happen
* ---------------------------------------------------------------------------
*/
-void resume_suspended_uapsd(unifi_priv_t* priv,u16 interfaceTag)
+void resume_suspended_uapsd(unifi_priv_t* priv, u16 interfaceTag)
{
u8 startIndex;
@@ -3701,7 +3701,7 @@ void resume_suspended_uapsd(unifi_priv_t* priv,u16 interfaceTag)
unifi_trace(priv, UDBG2, "++resume_suspended_uapsd: \n");
for(startIndex= 0; startIndex < UNIFI_MAX_CONNECTIONS;startIndex++) {
- staInfo = CsrWifiRouterCtrlGetStationRecordFromHandle(priv,startIndex,interfaceTag);
+ staInfo = CsrWifiRouterCtrlGetStationRecordFromHandle(priv, startIndex, interfaceTag);
if(!staInfo || !staInfo->wmmOrQosEnabled) {
continue;
@@ -3716,10 +3716,10 @@ void resume_suspended_uapsd(unifi_priv_t* priv,u16 interfaceTag)
staInfo->currentPeerState, staInfo->uapsdActive, staInfo->uspSuspend);
if (staInfo->currentPeerState == CSR_WIFI_ROUTER_CTRL_PEER_CONNECTED_ACTIVE)
{
- spin_lock_irqsave(&priv->staRecord_lock,lock_flags);
+ spin_lock_irqsave(&priv->staRecord_lock, lock_flags);
staInfo->uapsdActive = FALSE;
staInfo->uspSuspend = FALSE;
- spin_unlock_irqrestore(&priv->staRecord_lock,lock_flags);
+ spin_unlock_irqrestore(&priv->staRecord_lock, lock_flags);
}
}
}
diff --git a/drivers/staging/csr/unifi_priv.h b/drivers/staging/csr/unifi_priv.h
index d20d74ce56cb..37302f3c2f6c 100644
--- a/drivers/staging/csr/unifi_priv.h
+++ b/drivers/staging/csr/unifi_priv.h
@@ -259,7 +259,7 @@ typedef u8 CsrWifiAcPowersaveMode;
#define IS_DELIVERY_ENABLED(mode) (mode & CSR_WIFI_AC_DELIVERY_ONLY_ENABLE)? 1: 0
#define IS_DELIVERY_AND_TRIGGER_ENABLED(mode) ((mode & CSR_WIFI_AC_DELIVERY_ONLY_ENABLE)||(mode & CSR_WIFI_AC_TRIGGER_AND_DELIVERY_ENABLED))? 1: 0
-#define IS_DTIM_ACTIVE(flag,hostTag) ((flag == TRUE || hostTag != INVALID_HOST_TAG))
+#define IS_DTIM_ACTIVE(flag, hostTag) ((flag == TRUE || hostTag != INVALID_HOST_TAG))
#define INVALID_HOST_TAG 0xFFFFFFFF
#define UNIFI_TRAFFIC_Q_CONTENTION UNIFI_TRAFFIC_Q_BE
@@ -767,9 +767,9 @@ typedef struct netInterface_priv
} netInterface_priv_t;
#ifdef CSR_SUPPORT_SME
-#define routerStartBuffering(priv,queue) priv->routerBufferEnable[(queue)] = TRUE;
-#define routerStopBuffering(priv,queue) priv->routerBufferEnable[(queue)] = FALSE;
-#define isRouterBufferEnabled(priv,queue) priv->routerBufferEnable[(queue)]
+#define routerStartBuffering(priv, queue) priv->routerBufferEnable[(queue)] = TRUE;
+#define routerStopBuffering(priv, queue) priv->routerBufferEnable[(queue)] = FALSE;
+#define isRouterBufferEnabled(priv, queue) priv->routerBufferEnable[(queue)]
#endif
#ifdef USE_DRIVER_LOCK
@@ -919,8 +919,8 @@ int uf_verify_m4(unifi_priv_t *priv, const unsigned char *packet,
#ifdef CSR_SUPPORT_SME
u8 uf_check_broadcast_bssid(unifi_priv_t *priv, const bulk_data_param_t *bulkdata);
-u8 uf_process_pm_bit_for_peer(unifi_priv_t * priv, CsrWifiRouterCtrlStaInfo_t * srcStaInfo,u8 pmBit,u16 interfaceTag);
-void uf_process_ps_poll(unifi_priv_t *priv,u8* sa,u8* da,u8 pmBit,u16 interfaceTag);
+u8 uf_process_pm_bit_for_peer(unifi_priv_t * priv, CsrWifiRouterCtrlStaInfo_t * srcStaInfo, u8 pmBit, u16 interfaceTag);
+void uf_process_ps_poll(unifi_priv_t *priv, u8* sa, u8* da, u8 pmBit, u16 interfaceTag);
int uf_ap_process_data_pdu(unifi_priv_t *priv, struct sk_buff *skb,
struct ethhdr *ehdr, CsrWifiRouterCtrlStaInfo_t * srcStaInfo,
const CSR_SIGNAL *signal,
@@ -936,17 +936,17 @@ void uf_send_buffered_data_from_ac(unifi_priv_t *priv, CsrWifiRouterCtrlStaInfo_
void uf_send_buffered_data_from_delivery_ac(unifi_priv_t *priv, CsrWifiRouterCtrlStaInfo_t * staInfo, u8 queue, struct list_head *txList);
void uf_continue_uapsd(unifi_priv_t *priv, CsrWifiRouterCtrlStaInfo_t * staInfo);
-void uf_send_qos_null(unifi_priv_t * priv,u16 interfaceTag, const u8 *da,CSR_PRIORITY priority,CsrWifiRouterCtrlStaInfo_t * srcStaInfo);
-void uf_send_nulldata(unifi_priv_t * priv,u16 interfaceTag, const u8 *da,CSR_PRIORITY priority,CsrWifiRouterCtrlStaInfo_t * srcStaInfo);
+void uf_send_qos_null(unifi_priv_t * priv, u16 interfaceTag, const u8 *da, CSR_PRIORITY priority, CsrWifiRouterCtrlStaInfo_t * srcStaInfo);
+void uf_send_nulldata(unifi_priv_t * priv, u16 interfaceTag, const u8 *da, CSR_PRIORITY priority, CsrWifiRouterCtrlStaInfo_t * srcStaInfo);
#endif
CsrResult uf_process_ma_packet_req(unifi_priv_t *priv, u8 *peerMacAddress, CSR_CLIENT_TAG hostTag, u16 interfaceTag, CSR_TRANSMISSION_CONTROL transmissionControl, CSR_RATE TransmitRate, CSR_PRIORITY priority, CSR_PROCESS_ID senderId, bulk_data_param_t *bulkdata);
-void uf_process_ma_vif_availibility_ind(unifi_priv_t *priv,u8 *sigdata, u32 siglen);
+void uf_process_ma_vif_availibility_ind(unifi_priv_t *priv, u8 *sigdata, u32 siglen);
#ifdef CSR_SUPPORT_SME
-void uf_send_buffered_frames(unifi_priv_t *priv,unifi_TrafficQueue queue);
-int uf_process_station_records_for_sending_data(unifi_priv_t *priv,u16 interfaceTag,
+void uf_send_buffered_frames(unifi_priv_t *priv, unifi_TrafficQueue queue);
+int uf_process_station_records_for_sending_data(unifi_priv_t *priv, u16 interfaceTag,
CsrWifiRouterCtrlStaInfo_t *srcStaInfo,
CsrWifiRouterCtrlStaInfo_t *dstStaInfo);
void uf_prepare_send_cfm_list_for_queued_pkts(unifi_priv_t * priv,
@@ -958,8 +958,8 @@ void send_auto_ma_packet_confirm(unifi_priv_t *priv,
void uf_flush_list(unifi_priv_t * priv, struct list_head * list);
tx_buffered_packets_t *dequeue_tx_data_pdu(unifi_priv_t *priv, struct list_head *txList);
void resume_unicast_buffered_frames(unifi_priv_t *priv, u16 interfaceTag);
-void update_eosp_to_head_of_broadcast_list_head(unifi_priv_t *priv,u16 interfaceTag);
-void resume_suspended_uapsd(unifi_priv_t* priv,u16 interfaceTag);
+void update_eosp_to_head_of_broadcast_list_head(unifi_priv_t *priv, u16 interfaceTag);
+void resume_suspended_uapsd(unifi_priv_t* priv, u16 interfaceTag);
#endif
/*
* netdev.c
@@ -1048,14 +1048,14 @@ CsrWifiRouterCtrlStaInfo_t * CsrWifiRouterCtrlGetStationRecordFromHandle(unifi_p
u16 interfaceTag);
void uf_update_sta_activity(unifi_priv_t *priv, u16 interfaceTag, const u8 *peerMacAddress);
-void uf_process_ma_pkt_cfm_for_ap(unifi_priv_t *priv,u16 interfaceTag, const CSR_MA_PACKET_CONFIRM *pkt_cfm);
+void uf_process_ma_pkt_cfm_for_ap(unifi_priv_t *priv, u16 interfaceTag, const CSR_MA_PACKET_CONFIRM *pkt_cfm);
#endif
void uf_resume_data_plane(unifi_priv_t *priv, int queue,
CsrWifiMacAddress peer_address,
u16 interfaceTag);
void uf_free_pending_rx_packets(unifi_priv_t *priv, int queue,
- CsrWifiMacAddress peer_address,u16 interfaceTag);
+ CsrWifiMacAddress peer_address, u16 interfaceTag);
int uf_register_netdev(unifi_priv_t *priv, int numOfInterface);
void uf_unregister_netdev(unifi_priv_t *priv);
diff --git a/drivers/staging/csr/unifi_sme.c b/drivers/staging/csr/unifi_sme.c
index 90295035621f..50908822b3c8 100644
--- a/drivers/staging/csr/unifi_sme.c
+++ b/drivers/staging/csr/unifi_sme.c
@@ -133,7 +133,7 @@ sme_log_event(ul_client_t *pcli,
unicastPdu = FALSE;
CsrWifiRouterCtrlMicFailureIndSend (priv->CSR_WIFI_SME_IFACEQUEUE, 0,
- (ind->VirtualInterfaceIdentifier & 0xff),peerMacAddress,
+ (ind->VirtualInterfaceIdentifier & 0xff), peerMacAddress,
unicastPdu);
return;
}
@@ -143,10 +143,10 @@ sme_log_event(ul_client_t *pcli,
{
u8 pmBit = (frmCtrl & 0x1000)?0x01:0x00;
u16 interfaceTag = (ind->VirtualInterfaceIdentifier & 0xff);
- CsrWifiRouterCtrlStaInfo_t *srcStaInfo = CsrWifiRouterCtrlGetStationRecordFromPeerMacAddress(priv,taddr,interfaceTag);
+ CsrWifiRouterCtrlStaInfo_t *srcStaInfo = CsrWifiRouterCtrlGetStationRecordFromPeerMacAddress(priv, taddr, interfaceTag);
if((srcStaInfo != NULL) && (uf_check_broadcast_bssid(priv, bulkdata)== FALSE))
{
- uf_process_pm_bit_for_peer(priv,srcStaInfo,pmBit,interfaceTag);
+ uf_process_pm_bit_for_peer(priv, srcStaInfo, pmBit, interfaceTag);
/* Update station last activity flag */
srcStaInfo->activity_flag = TRUE;
@@ -169,7 +169,7 @@ sme_log_event(ul_client_t *pcli,
return;
}
- unifi_trace(priv,UDBG1,"MA-PACKET Confirm (%x, %x)\n", cfm->HostTag, cfm->TransmissionStatus);
+ unifi_trace(priv, UDBG1, "MA-PACKET Confirm (%x, %x)\n", cfm->HostTag, cfm->TransmissionStatus);
interfacePriv = priv->interfacePriv[interfaceTag];
#ifdef CSR_SUPPORT_SME
@@ -177,7 +177,7 @@ sme_log_event(ul_client_t *pcli,
interfacePriv->interfaceMode == CSR_WIFI_ROUTER_CTRL_MODE_P2PGO) {
if(cfm->HostTag == interfacePriv->multicastPduHostTag){
- uf_process_ma_pkt_cfm_for_ap(priv ,interfaceTag, cfm);
+ uf_process_ma_pkt_cfm_for_ap(priv, interfaceTag, cfm);
}
}
#endif
@@ -395,7 +395,7 @@ uf_multicast_list_wq(struct work_struct *work)
interfacePriv->mc_list_count);
/* Flush the current list */
- CsrWifiRouterCtrlMulticastAddressIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0, interfaceTag, CSR_WIFI_SME_LIST_ACTION_FLUSH, 0, NULL);
+ CsrWifiRouterCtrlMulticastAddressIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0, interfaceTag, CSR_WIFI_SME_LIST_ACTION_FLUSH, 0, NULL);
mc_count = interfacePriv->mc_list_count;
mc_list = interfacePriv->mc_list;
@@ -419,7 +419,7 @@ uf_multicast_list_wq(struct work_struct *work)
return;
}
- CsrWifiRouterCtrlMulticastAddressIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0,
+ CsrWifiRouterCtrlMulticastAddressIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0,
interfaceTag,
CSR_WIFI_SME_LIST_ACTION_ADD,
mc_count, multicast_address_list);
@@ -950,7 +950,7 @@ int
}
return i;
}
-int unifi_cfg_set_ap_config(unifi_priv_t * priv,unsigned char* arg)
+int unifi_cfg_set_ap_config(unifi_priv_t * priv, unsigned char* arg)
{
uf_cfg_ap_config_t cfg_ap_config;
char *buffer;
@@ -981,7 +981,7 @@ int unifi_cfg_set_ap_config(unifi_priv_t * priv,unsigned char* arg)
priv->ap_mac_config.phySupportedBitmap = cfg_ap_config.phySupportedBitmap;
priv->ap_mac_config.maxListenInterval=cfg_ap_config.maxListenInterval;
- priv->ap_mac_config.supportedRatesCount= uf_configure_supported_rates(priv->ap_mac_config.supportedRates,priv->ap_mac_config.phySupportedBitmap);
+ priv->ap_mac_config.supportedRatesCount= uf_configure_supported_rates(priv->ap_mac_config.supportedRates, priv->ap_mac_config.phySupportedBitmap);
return 0;
}
@@ -1051,7 +1051,7 @@ uf_ta_ind_wq(struct work_struct *work)
u16 interfaceTag = 0;
- CsrWifiRouterCtrlTrafficProtocolIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0,
+ CsrWifiRouterCtrlTrafficProtocolIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0,
interfaceTag,
ind->packet_type,
ind->direction,
@@ -1119,7 +1119,7 @@ uf_ta_sample_ind_wq(struct work_struct *work)
}
}
- CsrWifiRouterCtrlTrafficSampleIndSend(priv->CSR_WIFI_SME_IFACEQUEUE,0, interfaceTag, ind->stats);
+ CsrWifiRouterCtrlTrafficSampleIndSend(priv->CSR_WIFI_SME_IFACEQUEUE, 0, interfaceTag, ind->stats);
ind->in_use = 0;
@@ -1219,7 +1219,7 @@ void uf_send_pkt_to_encrypt(struct work_struct *work)
kfree(pktBulkData); /* Would have been copied over by the SME Handler */
} else {
- unifi_warning(priv, "uf_send_pkt_to_encrypt() is NOT applicable for interface mode - %d\n",interfacePriv->interfaceMode);
+ unifi_warning(priv, "uf_send_pkt_to_encrypt() is NOT applicable for interface mode - %d\n", interfacePriv->interfaceMode);
}
}/* uf_send_pkt_to_encrypt() */
#endif
diff --git a/drivers/staging/csr/unifi_sme.h b/drivers/staging/csr/unifi_sme.h
index b689cfe2b100..aff9aa178124 100644
--- a/drivers/staging/csr/unifi_sme.h
+++ b/drivers/staging/csr/unifi_sme.h
@@ -210,9 +210,9 @@ int sme_mgt_mib_get(unifi_priv_t *priv,
int sme_mgt_mib_set(unifi_priv_t *priv,
unsigned char *varbind, int length);
#ifdef CSR_SUPPORT_WEXT_AP
-int sme_ap_start(unifi_priv_t *priv,u16 interface_tag,CsrWifiSmeApConfig_t *ap_config);
-int sme_ap_stop(unifi_priv_t *priv,u16 interface_tag);
-int sme_ap_config(unifi_priv_t *priv,CsrWifiSmeApMacConfig *ap_mac_config, CsrWifiNmeApConfig *group_security_config);
+int sme_ap_start(unifi_priv_t *priv, u16 interface_tag, CsrWifiSmeApConfig_t *ap_config);
+int sme_ap_stop(unifi_priv_t *priv, u16 interface_tag);
+int sme_ap_config(unifi_priv_t *priv, CsrWifiSmeApMacConfig *ap_mac_config, CsrWifiNmeApConfig *group_security_config);
int uf_configure_supported_rates(u8 * supportedRates, u8 phySupportedBitmap);
#endif
int unifi_translate_scan(struct net_device *dev,
@@ -234,7 +234,7 @@ int unifi_cfg_get_info(unifi_priv_t *priv, unsigned char *arg);
int unifi_cfg_strict_draft_n(unifi_priv_t *priv, unsigned char *arg);
int unifi_cfg_enable_okc(unifi_priv_t *priv, unsigned char *arg);
#ifdef CSR_SUPPORT_WEXT_AP
-int unifi_cfg_set_ap_config(unifi_priv_t * priv,unsigned char* arg);
+int unifi_cfg_set_ap_config(unifi_priv_t * priv, unsigned char* arg);
#endif
diff --git a/drivers/staging/cxt1e1/comet.c b/drivers/staging/cxt1e1/comet.c
index 52224cdc967d..fabfd779c668 100644
--- a/drivers/staging/cxt1e1/comet.c
+++ b/drivers/staging/cxt1e1/comet.c
@@ -13,7 +13,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <asm/io.h>
+#include <linux/io.h>
#include <linux/hdlc.h>
#include "pmcc4_sysdep.h"
#include "sbecom_inline_linux.h"
@@ -35,235 +35,253 @@ extern int cxt1e1_log_level;
#define COMET_NUM_UNITS 5 /* Number of points per entry in table */
/* forward references */
-STATIC void SetPwrLevel (comet_t * comet);
-STATIC void WrtRcvEqualizerTbl (ci_t * ci, comet_t * comet, u_int32_t *table);
-STATIC void WrtXmtWaveformTbl (ci_t * ci, comet_t * comet, u_int8_t table[COMET_NUM_SAMPLES][COMET_NUM_UNITS]);
+STATIC void SetPwrLevel(comet_t *comet);
+STATIC void WrtRcvEqualizerTbl(ci_t *ci, comet_t *comet, u_int32_t *table);
+STATIC void WrtXmtWaveformTbl(ci_t *ci, comet_t *comet, u_int8_t table[COMET_NUM_SAMPLES][COMET_NUM_UNITS]);
void *TWV_table[12] = {
- TWVLongHaul0DB, TWVLongHaul7_5DB, TWVLongHaul15DB, TWVLongHaul22_5DB,
- TWVShortHaul0, TWVShortHaul1, TWVShortHaul2, TWVShortHaul3, TWVShortHaul4,
- TWVShortHaul5,
- TWV_E1_75Ohm, /** PORT POINT - 75 Ohm not supported **/
- TWV_E1_120Ohm
+ TWVLongHaul0DB, TWVLongHaul7_5DB, TWVLongHaul15DB, TWVLongHaul22_5DB,
+ TWVShortHaul0, TWVShortHaul1, TWVShortHaul2, TWVShortHaul3,
+ TWVShortHaul4, TWVShortHaul5,
+ /** PORT POINT - 75 Ohm not supported **/
+ TWV_E1_75Ohm,
+ TWV_E1_120Ohm
};
static int
-lbo_tbl_lkup (int t1, int lbo)
-{
- if ((lbo < CFG_LBO_LH0) || (lbo > CFG_LBO_E120)) /* error switches to
- * default */
- {
- if (t1)
- lbo = CFG_LBO_LH0; /* default T1 waveform table */
- else
- lbo = CFG_LBO_E120; /* default E1 waveform table */
- }
- return (lbo - 1); /* make index ZERO relative */
+lbo_tbl_lkup(int t1, int lbo) {
+ /* error switches to default */
+ if ((lbo < CFG_LBO_LH0) || (lbo > CFG_LBO_E120)) {
+ if (t1)
+ /* default T1 waveform table */
+ lbo = CFG_LBO_LH0;
+ else
+ /* default E1 waveform table */
+ lbo = CFG_LBO_E120;
+ }
+ /* make index ZERO relative */
+ return lbo - 1;
}
-
-void
-init_comet (void *ci, comet_t * comet, u_int32_t port_mode, int clockmaster,
- u_int8_t moreParams)
+void init_comet(void *ci, comet_t *comet, u_int32_t port_mode, int clockmaster,
+ u_int8_t moreParams)
{
- u_int8_t isT1mode;
- u_int8_t tix = CFG_LBO_LH0; /* T1 default */
-
- isT1mode = IS_FRAME_ANY_T1 (port_mode);
- /* T1 or E1 */
- if (isT1mode)
- {
- pci_write_32 ((u_int32_t *) &comet->gbl_cfg, 0xa0); /* Select T1 Mode & PIO
- * output enabled */
- tix = lbo_tbl_lkup (isT1mode, CFG_LBO_LH0); /* default T1 waveform
- * table */
- } else
- {
- pci_write_32 ((u_int32_t *) &comet->gbl_cfg, 0x81); /* Select E1 Mode & PIO
- * output enabled */
- tix = lbo_tbl_lkup (isT1mode, CFG_LBO_E120); /* default E1 waveform
- * table */
- }
-
- if (moreParams & CFG_LBO_MASK)
- tix = lbo_tbl_lkup (isT1mode, moreParams & CFG_LBO_MASK); /* dial-in requested
- * waveform table */
-
- /* Tx line Intfc cfg ** Set for analog & no special patterns */
- pci_write_32 ((u_int32_t *) &comet->tx_line_cfg, 0x00); /* Transmit Line
- * Interface Config. */
-
- /* master test ** Ignore Test settings for now */
- pci_write_32 ((u_int32_t *) &comet->mtest, 0x00); /* making sure it's
- * Default value */
-
- /* Turn on Center (CENT) and everything else off */
- pci_write_32 ((u_int32_t *) &comet->rjat_cfg, 0x10); /* RJAT cfg */
- /* Set Jitter Attenuation to recommend T1 values */
- if (isT1mode)
- {
- pci_write_32 ((u_int32_t *) &comet->rjat_n1clk, 0x2F); /* RJAT Divider N1
- * Control */
- pci_write_32 ((u_int32_t *) &comet->rjat_n2clk, 0x2F); /* RJAT Divider N2
- * Control */
- } else
- {
- pci_write_32 ((u_int32_t *) &comet->rjat_n1clk, 0xFF); /* RJAT Divider N1
- * Control */
- pci_write_32 ((u_int32_t *) &comet->rjat_n2clk, 0xFF); /* RJAT Divider N2
- * Control */
- }
-
- /* Turn on Center (CENT) and everything else off */
- pci_write_32 ((u_int32_t *) &comet->tjat_cfg, 0x10); /* TJAT Config. */
-
- /* Do not bypass jitter attenuation and bypass elastic store */
- pci_write_32 ((u_int32_t *) &comet->rx_opt, 0x00); /* rx opts */
-
- /* TJAT ctrl & TJAT divider ctrl */
- /* Set Jitter Attenuation to recommended T1 values */
- if (isT1mode)
- {
- pci_write_32 ((u_int32_t *) &comet->tjat_n1clk, 0x2F); /* TJAT Divider N1
- * Control */
- pci_write_32 ((u_int32_t *) &comet->tjat_n2clk, 0x2F); /* TJAT Divider N2
- * Control */
- } else
- {
- pci_write_32 ((u_int32_t *) &comet->tjat_n1clk, 0xFF); /* TJAT Divider N1
- * Control */
- pci_write_32 ((u_int32_t *) &comet->tjat_n2clk, 0xFF); /* TJAT Divider N2
- * Control */
- }
-
- /* 1c: rx ELST cfg 20: tx ELST cfg 28&38: rx&tx data link ctrl */
- if (isT1mode)
- { /* Select 193-bit frame format */
- pci_write_32 ((u_int32_t *) &comet->rx_elst_cfg, 0x00);
- pci_write_32 ((u_int32_t *) &comet->tx_elst_cfg, 0x00);
- } else
- { /* Select 256-bit frame format */
- pci_write_32 ((u_int32_t *) &comet->rx_elst_cfg, 0x03);
- pci_write_32 ((u_int32_t *) &comet->tx_elst_cfg, 0x03);
- pci_write_32 ((u_int32_t *) &comet->rxce1_ctl, 0x00); /* disable T1 data link
- * receive */
- pci_write_32 ((u_int32_t *) &comet->txci1_ctl, 0x00); /* disable T1 data link
- * transmit */
- }
+ u_int8_t isT1mode;
+ /* T1 default */
+ u_int8_t tix = CFG_LBO_LH0;
+ isT1mode = IS_FRAME_ANY_T1(port_mode);
+ /* T1 or E1 */
+ if (isT1mode) {
+ /* Select T1 Mode & PIO output enabled */
+ pci_write_32((u_int32_t *) &comet->gbl_cfg, 0xa0);
+ /* default T1 waveform table */
+ tix = lbo_tbl_lkup(isT1mode, CFG_LBO_LH0);
+ } else {
+ /* Select E1 Mode & PIO output enabled */
+ pci_write_32((u_int32_t *) &comet->gbl_cfg, 0x81);
+ /* default E1 waveform table */
+ tix = lbo_tbl_lkup(isT1mode, CFG_LBO_E120);
+ }
+
+ if (moreParams & CFG_LBO_MASK)
+ /* dial-in requested waveform table */
+ tix = lbo_tbl_lkup(isT1mode, moreParams & CFG_LBO_MASK);
+ /* Tx line Intfc cfg Set for analog & no special patterns */
+ /* Transmit Line Interface Config. */
+ pci_write_32((u_int32_t *) &comet->tx_line_cfg, 0x00);
+ /* master test Ignore Test settings for now */
+ /* making sure it's Default value */
+ pci_write_32((u_int32_t *) &comet->mtest, 0x00);
+ /* Turn on Center (CENT) and everything else off */
+ /* RJAT cfg */
+ pci_write_32((u_int32_t *) &comet->rjat_cfg, 0x10);
+ /* Set Jitter Attenuation to recommend T1 values */
+ if (isT1mode) {
+ /* RJAT Divider N1 Control */
+ pci_write_32((u_int32_t *) &comet->rjat_n1clk, 0x2F);
+ /* RJAT Divider N2 Control */
+ pci_write_32((u_int32_t *) &comet->rjat_n2clk, 0x2F);
+ } else {
+ /* RJAT Divider N1 Control */
+ pci_write_32((u_int32_t *) &comet->rjat_n1clk, 0xFF);
+ /* RJAT Divider N2 Control */
+ pci_write_32((u_int32_t *) &comet->rjat_n2clk, 0xFF);
+ }
+
+ /* Turn on Center (CENT) and everything else off */
+ /* TJAT Config. */
+ pci_write_32((u_int32_t *) &comet->tjat_cfg, 0x10);
+
+ /* Do not bypass jitter attenuation and bypass elastic store */
+ /* rx opts */
+ pci_write_32((u_int32_t *) &comet->rx_opt, 0x00);
+
+ /* TJAT ctrl & TJAT divider ctrl */
+ /* Set Jitter Attenuation to recommended T1 values */
+ if (isT1mode) {
+ /* TJAT Divider N1 Control */
+ pci_write_32((u_int32_t *) &comet->tjat_n1clk, 0x2F);
+ /* TJAT Divider N2 Control */
+ pci_write_32((u_int32_t *) &comet->tjat_n2clk, 0x2F);
+ } else {
+ /* TJAT Divider N1 Control */
+ pci_write_32((u_int32_t *) &comet->tjat_n1clk, 0xFF);
+ /* TJAT Divider N2 Control */
+ pci_write_32((u_int32_t *) &comet->tjat_n2clk, 0xFF);
+ }
+
+ /* 1c: rx ELST cfg 20: tx ELST cfg 28&38: rx&tx data link ctrl */
+
+ /* Select 193-bit frame format */
+ if (isT1mode) {
+ pci_write_32((u_int32_t *) &comet->rx_elst_cfg, 0x00);
+ pci_write_32((u_int32_t *) &comet->tx_elst_cfg, 0x00);
+ } else {
+ /* Select 256-bit frame format */
+ pci_write_32((u_int32_t *) &comet->rx_elst_cfg, 0x03);
+ pci_write_32((u_int32_t *) &comet->tx_elst_cfg, 0x03);
+ /* disable T1 data link receive */
+ pci_write_32((u_int32_t *) &comet->rxce1_ctl, 0x00);
+ /* disable T1 data link transmit */
+ pci_write_32((u_int32_t *) &comet->txci1_ctl, 0x00);
+ }
/* the following is a default value */
/* Enable 8 out of 10 validation */
- pci_write_32 ((u_int32_t *) &comet->t1_rboc_ena, 0x00); /* t1RBOC
- * enable(BOC:BitOriented
- * Code) */
- if (isT1mode)
- {
-
- /* IBCD cfg: aka Inband Code Detection ** loopback code length set to */
- pci_write_32 ((u_int32_t *) &comet->ibcd_cfg, 0x04); /* 6 bit down, 5 bit up
- * (assert) */
- pci_write_32 ((u_int32_t *) &comet->ibcd_act, 0x08); /* line loopback
- * activate pattern */
- pci_write_32 ((u_int32_t *) &comet->ibcd_deact, 0x24); /* deactivate code
- * pattern (i.e.001) */
- }
+ /* t1RBOC enable(BOC:BitOriented Code) */
+ pci_write_32((u_int32_t *) &comet->t1_rboc_ena, 0x00);
+ if (isT1mode)
+ {
+
+ /* IBCD cfg: aka Inband Code Detection ** loopback code length set to */
+ /* 6 bit down, 5 bit up (assert) */
+ pci_write_32((u_int32_t *) &comet->ibcd_cfg, 0x04);
+ /* line loopback activate pattern */
+ pci_write_32((u_int32_t *) &comet->ibcd_act, 0x08);
+ /* deactivate code pattern (i.e.001) */
+ pci_write_32((u_int32_t *) &comet->ibcd_deact, 0x24);
+ }
/* 10: CDRC cfg 28&38: rx&tx data link 1 ctrl 48: t1 frmr cfg */
/* 50: SIGX cfg, COSS (change of signaling state) 54: XBAS cfg */
/* 60: t1 ALMI cfg */
/* Configure Line Coding */
- switch (port_mode)
- {
- case CFG_FRAME_SF: /* 1 - T1 B8ZS */
- pci_write_32 ((u_int32_t *) &comet->cdrc_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->t1_frmr_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->sigx_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->t1_xbas_cfg, 0x20); /* 5:B8ZS */
- pci_write_32 ((u_int32_t *) &comet->t1_almi_cfg, 0);
- break;
- case CFG_FRAME_ESF: /* 2 - T1 B8ZS */
- pci_write_32 ((u_int32_t *) &comet->cdrc_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->rxce1_ctl, 0x20); /* Bit 5: T1 DataLink
- * Enable */
- pci_write_32 ((u_int32_t *) &comet->txci1_ctl, 0x20); /* 5: T1 DataLink Enable */
- pci_write_32 ((u_int32_t *) &comet->t1_frmr_cfg, 0x30); /* 4:ESF 5:ESFFA */
- pci_write_32 ((u_int32_t *) &comet->sigx_cfg, 0x04); /* 2:ESF */
- pci_write_32 ((u_int32_t *) &comet->t1_xbas_cfg, 0x30); /* 4:ESF 5:B8ZS */
- pci_write_32 ((u_int32_t *) &comet->t1_almi_cfg, 0x10); /* 4:ESF */
- break;
- case CFG_FRAME_E1PLAIN: /* 3 - HDB3 */
- pci_write_32 ((u_int32_t *) &comet->cdrc_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->sigx_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->e1_tran_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->e1_frmr_aopts, 0x40);
- break;
- case CFG_FRAME_E1CAS: /* 4 - HDB3 */
- pci_write_32 ((u_int32_t *) &comet->cdrc_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->sigx_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->e1_tran_cfg, 0x60);
- pci_write_32 ((u_int32_t *) &comet->e1_frmr_aopts, 0);
- break;
- case CFG_FRAME_E1CRC: /* 5 - HDB3 */
- pci_write_32 ((u_int32_t *) &comet->cdrc_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->sigx_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->e1_tran_cfg, 0x10);
- pci_write_32 ((u_int32_t *) &comet->e1_frmr_aopts, 0xc2);
- break;
- case CFG_FRAME_E1CRC_CAS: /* 6 - HDB3 */
- pci_write_32 ((u_int32_t *) &comet->cdrc_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->sigx_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->e1_tran_cfg, 0x70);
- pci_write_32 ((u_int32_t *) &comet->e1_frmr_aopts, 0x82);
- break;
- case CFG_FRAME_SF_AMI: /* 7 - T1 AMI */
- pci_write_32 ((u_int32_t *) &comet->cdrc_cfg, 0x80); /* Enable AMI Line
- * Decoding */
- pci_write_32 ((u_int32_t *) &comet->t1_frmr_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->t1_xbas_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->t1_almi_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->sigx_cfg, 0);
- break;
- case CFG_FRAME_ESF_AMI: /* 8 - T1 AMI */
- pci_write_32 ((u_int32_t *) &comet->cdrc_cfg, 0x80); /* Enable AMI Line
- * Decoding */
- pci_write_32 ((u_int32_t *) &comet->rxce1_ctl, 0x20); /* 5: T1 DataLink Enable */
- pci_write_32 ((u_int32_t *) &comet->txci1_ctl, 0x20); /* 5: T1 DataLink Enable */
- pci_write_32 ((u_int32_t *) &comet->t1_frmr_cfg, 0x30); /* Bit 4:ESF 5:ESFFA */
- pci_write_32 ((u_int32_t *) &comet->sigx_cfg, 0x04); /* 2:ESF */
- pci_write_32 ((u_int32_t *) &comet->t1_xbas_cfg, 0x10); /* 4:ESF */
- pci_write_32 ((u_int32_t *) &comet->t1_almi_cfg, 0x10); /* 4:ESF */
- break;
- case CFG_FRAME_E1PLAIN_AMI: /* 9 - AMI */
- pci_write_32 ((u_int32_t *) &comet->cdrc_cfg, 0x80); /* Enable AMI Line
- * Decoding */
- pci_write_32 ((u_int32_t *) &comet->sigx_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->e1_tran_cfg, 0x80);
- pci_write_32 ((u_int32_t *) &comet->e1_frmr_aopts, 0x40);
- break;
- case CFG_FRAME_E1CAS_AMI: /* 10 - AMI */
- pci_write_32 ((u_int32_t *) &comet->cdrc_cfg, 0x80); /* Enable AMI Line
- * Decoding */
- pci_write_32 ((u_int32_t *) &comet->sigx_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->e1_tran_cfg, 0xe0);
- pci_write_32 ((u_int32_t *) &comet->e1_frmr_aopts, 0);
- break;
- case CFG_FRAME_E1CRC_AMI: /* 11 - AMI */
- pci_write_32 ((u_int32_t *) &comet->cdrc_cfg, 0x80); /* Enable AMI Line
- * Decoding */
- pci_write_32 ((u_int32_t *) &comet->sigx_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->e1_tran_cfg, 0x90);
- pci_write_32 ((u_int32_t *) &comet->e1_frmr_aopts, 0xc2);
- break;
- case CFG_FRAME_E1CRC_CAS_AMI: /* 12 - AMI */
- pci_write_32 ((u_int32_t *) &comet->cdrc_cfg, 0x80); /* Enable AMI Line
- * Decoding */
- pci_write_32 ((u_int32_t *) &comet->sigx_cfg, 0);
- pci_write_32 ((u_int32_t *) &comet->e1_tran_cfg, 0xf0);
- pci_write_32 ((u_int32_t *) &comet->e1_frmr_aopts, 0x82);
- break;
- } /* end switch */
+ switch (port_mode)
+ {
+ /* 1 - T1 B8ZS */
+ case CFG_FRAME_SF:
+ pci_write_32((u_int32_t *) &comet->cdrc_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->t1_frmr_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->sigx_cfg, 0);
+ /* 5:B8ZS */
+ pci_write_32((u_int32_t *) &comet->t1_xbas_cfg, 0x20);
+ pci_write_32((u_int32_t *) &comet->t1_almi_cfg, 0);
+ break;
+ /* 2 - T1 B8ZS */
+ case CFG_FRAME_ESF:
+ pci_write_32((u_int32_t *) &comet->cdrc_cfg, 0);
+ /* Bit 5: T1 DataLink Enable */
+ pci_write_32((u_int32_t *) &comet->rxce1_ctl, 0x20);
+ /* 5: T1 DataLink Enable */
+ pci_write_32((u_int32_t *) &comet->txci1_ctl, 0x20);
+ /* 4:ESF 5:ESFFA */
+ pci_write_32((u_int32_t *) &comet->t1_frmr_cfg, 0x30);
+ /* 2:ESF */
+ pci_write_32((u_int32_t *) &comet->sigx_cfg, 0x04);
+ /* 4:ESF 5:B8ZS */
+ pci_write_32((u_int32_t *) &comet->t1_xbas_cfg, 0x30);
+ /* 4:ESF */
+ pci_write_32((u_int32_t *) &comet->t1_almi_cfg, 0x10);
+ break;
+ /* 3 - HDB3 */
+ case CFG_FRAME_E1PLAIN:
+ pci_write_32((u_int32_t *) &comet->cdrc_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->sigx_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->e1_tran_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->e1_frmr_aopts, 0x40);
+ break;
+ /* 4 - HDB3 */
+ case CFG_FRAME_E1CAS:
+ pci_write_32((u_int32_t *) &comet->cdrc_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->sigx_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->e1_tran_cfg, 0x60);
+ pci_write_32((u_int32_t *) &comet->e1_frmr_aopts, 0);
+ break;
+ /* 5 - HDB3 */
+ case CFG_FRAME_E1CRC:
+ pci_write_32((u_int32_t *) &comet->cdrc_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->sigx_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->e1_tran_cfg, 0x10);
+ pci_write_32((u_int32_t *) &comet->e1_frmr_aopts, 0xc2);
+ break;
+ /* 6 - HDB3 */
+ case CFG_FRAME_E1CRC_CAS:
+ pci_write_32((u_int32_t *) &comet->cdrc_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->sigx_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->e1_tran_cfg, 0x70);
+ pci_write_32((u_int32_t *) &comet->e1_frmr_aopts, 0x82);
+ break;
+ /* 7 - T1 AMI */
+ case CFG_FRAME_SF_AMI:
+ /* Enable AMI Line Decoding */
+ pci_write_32((u_int32_t *) &comet->cdrc_cfg, 0x80);
+ pci_write_32((u_int32_t *) &comet->t1_frmr_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->t1_xbas_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->t1_almi_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->sigx_cfg, 0);
+ break;
+ /* 8 - T1 AMI */
+ case CFG_FRAME_ESF_AMI:
+ /* Enable AMI Line Decoding */
+ pci_write_32((u_int32_t *) &comet->cdrc_cfg, 0x80);
+ /* 5: T1 DataLink Enable */
+ pci_write_32((u_int32_t *) &comet->rxce1_ctl, 0x20);
+ /* 5: T1 DataLink Enable */
+ pci_write_32((u_int32_t *) &comet->txci1_ctl, 0x20);
+ /* Bit 4:ESF 5:ESFFA */
+ pci_write_32((u_int32_t *) &comet->t1_frmr_cfg, 0x30);
+ /* 2:ESF */
+ pci_write_32((u_int32_t *) &comet->sigx_cfg, 0x04);
+ /* 4:ESF */
+ pci_write_32((u_int32_t *) &comet->t1_xbas_cfg, 0x10);
+ /* 4:ESF */
+ pci_write_32((u_int32_t *) &comet->t1_almi_cfg, 0x10);
+ break;
+ /* 9 - AMI */
+ case CFG_FRAME_E1PLAIN_AMI:
+ /* Enable AMI Line Decoding */
+ pci_write_32((u_int32_t *) &comet->cdrc_cfg, 0x80);
+ pci_write_32((u_int32_t *) &comet->sigx_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->e1_tran_cfg, 0x80);
+ pci_write_32((u_int32_t *) &comet->e1_frmr_aopts, 0x40);
+ break;
+ /* 10 - AMI */
+ case CFG_FRAME_E1CAS_AMI:
+ /* Enable AMI Line Decoding */
+ pci_write_32((u_int32_t *) &comet->cdrc_cfg, 0x80);
+ pci_write_32((u_int32_t *) &comet->sigx_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->e1_tran_cfg, 0xe0);
+ pci_write_32((u_int32_t *) &comet->e1_frmr_aopts, 0);
+ break;
+ /* 11 - AMI */
+ case CFG_FRAME_E1CRC_AMI:
+ /* Enable AMI Line Decoding */
+ pci_write_32((u_int32_t *) &comet->cdrc_cfg, 0x80);
+ pci_write_32((u_int32_t *) &comet->sigx_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->e1_tran_cfg, 0x90);
+ pci_write_32((u_int32_t *) &comet->e1_frmr_aopts, 0xc2);
+ break;
+ /* 12 - AMI */
+ case CFG_FRAME_E1CRC_CAS_AMI:
+ /* Enable AMI Line Decoding */
+ pci_write_32((u_int32_t *) &comet->cdrc_cfg, 0x80);
+ pci_write_32((u_int32_t *) &comet->sigx_cfg, 0);
+ pci_write_32((u_int32_t *) &comet->e1_tran_cfg, 0xf0);
+ pci_write_32((u_int32_t *) &comet->e1_frmr_aopts, 0x82);
+ break;
+ } /* end switch */
/***
* Set Full Frame mode (NXDSO[1] = 0, NXDSO[0] = 0)
@@ -277,101 +295,109 @@ init_comet (void *ci, comet_t * comet, u_int32_t port_mode, int clockmaster,
/* 0x30: "BRIF cfg"; 0x20 is 'CMODE', 0x03 is (bit) rate */
/* note "rate bits can only be set once after reset" */
- if (clockmaster)
- { /* CMODE == clockMode, 0=clock master (so
- * all 3 others should be slave) */
- if (isT1mode) /* rate = 1.544 Mb/s */
- pci_write_32 ((u_int32_t *) &comet->brif_cfg, 0x00); /* Comet 0 Master
- * Mode(CMODE=0) */
- else /* rate = 2.048 Mb/s */
- pci_write_32 ((u_int32_t *) &comet->brif_cfg, 0x01); /* Comet 0 Master
- * Mode(CMODE=0) */
-
- /* 31: BRIF frame pulse cfg 06: tx timing options */
- pci_write_32 ((u_int32_t *) &comet->brif_fpcfg, 0x00); /* Master Mode
- * i.e.FPMODE=0 (@0x20) */
- if ((moreParams & CFG_CLK_PORT_MASK) == CFG_CLK_PORT_INTERNAL)
- {
- if (cxt1e1_log_level >= LOG_SBEBUG12)
- pr_info(">> %s: clockmaster internal clock\n", __func__);
- pci_write_32 ((u_int32_t *) &comet->tx_time, 0x0d); /* internal oscillator */
- } else /* external clock source */
- {
- if (cxt1e1_log_level >= LOG_SBEBUG12)
- pr_info(">> %s: clockmaster external clock\n", __func__);
- pci_write_32 ((u_int32_t *) &comet->tx_time, 0x09); /* loop timing
- * (external) */
- }
-
- } else /* slave */
- {
- if (isT1mode)
- pci_write_32 ((u_int32_t *) &comet->brif_cfg, 0x20); /* Slave Mode(CMODE=1,
- * see above) */
- else
- pci_write_32 ((u_int32_t *) &comet->brif_cfg, 0x21); /* Slave Mode (CMODE=1) */
- pci_write_32 ((u_int32_t *) &comet->brif_fpcfg, 0x20); /* Slave Mode i.e.
- * FPMODE=1 (@0x20) */
- if (cxt1e1_log_level >= LOG_SBEBUG12)
- pr_info(">> %s: clockslave internal clock\n", __func__);
- pci_write_32 ((u_int32_t *) &comet->tx_time, 0x0d); /* oscillator timing */
- }
-
- /* 32: BRIF parity F-bit cfg */
- /* Totem-pole operation */
- pci_write_32 ((u_int32_t *) &comet->brif_pfcfg, 0x01); /* Receive Backplane
- * Parity/F-bit */
+ if (clockmaster)
+ {
+ /* CMODE == clockMode, 0=clock master (so all 3 others should be slave) */
+ /* rate = 1.544 Mb/s */
+ if (isT1mode)
+ /* Comet 0 Master Mode(CMODE=0) */
+ pci_write_32((u_int32_t *) &comet->brif_cfg, 0x00);
+ /* rate = 2.048 Mb/s */
+ else
+ /* Comet 0 Master Mode(CMODE=0) */
+ pci_write_32((u_int32_t *) &comet->brif_cfg, 0x01);
+
+ /* 31: BRIF frame pulse cfg 06: tx timing options */
+
+ /* Master Mode i.e.FPMODE=0 (@0x20) */
+ pci_write_32((u_int32_t *) &comet->brif_fpcfg, 0x00);
+ if ((moreParams & CFG_CLK_PORT_MASK) == CFG_CLK_PORT_INTERNAL)
+ {
+ if (cxt1e1_log_level >= LOG_SBEBUG12)
+ pr_info(">> %s: clockmaster internal clock\n", __func__);
+ /* internal oscillator */
+ pci_write_32((u_int32_t *) &comet->tx_time, 0x0d);
+ } else {
+ /* external clock source */
+ if (cxt1e1_log_level >= LOG_SBEBUG12)
+ pr_info(">> %s: clockmaster external clock\n", __func__);
+ /* loop timing(external) */
+ pci_write_32((u_int32_t *) &comet->tx_time, 0x09);
+ }
+
+ } else {
+ /* slave */
+ if (isT1mode)
+ /* Slave Mode(CMODE=1, see above) */
+ pci_write_32((u_int32_t *) &comet->brif_cfg, 0x20);
+ else
+ /* Slave Mode(CMODE=1)*/
+ pci_write_32((u_int32_t *) &comet->brif_cfg, 0x21);
+ /* Slave Mode i.e. FPMODE=1 (@0x20) */
+ pci_write_32((u_int32_t *) &comet->brif_fpcfg, 0x20);
+ if (cxt1e1_log_level >= LOG_SBEBUG12)
+ pr_info(">> %s: clockslave internal clock\n", __func__);
+ /* oscillator timing */
+ pci_write_32((u_int32_t *) &comet->tx_time, 0x0d);
+ }
+
+ /* 32: BRIF parity F-bit cfg */
+ /* Totem-pole operation */
+ /* Receive Backplane Parity/F-bit */
+ pci_write_32((u_int32_t *) &comet->brif_pfcfg, 0x01);
/* dc: RLPS equalizer V ref */
/* Configuration */
- if (isT1mode)
- pci_write_32 ((u_int32_t *) &comet->rlps_eqvr, 0x2c); /* RLPS Equalizer
- * Voltage */
- else
- pci_write_32 ((u_int32_t *) &comet->rlps_eqvr, 0x34); /* RLPS Equalizer
- * Voltage */
+ if (isT1mode)
+ /* RLPS Equalizer Voltage */
+ pci_write_32((u_int32_t *) &comet->rlps_eqvr, 0x2c);
+ else
+ /* RLPS Equalizer Voltage */
+ pci_write_32((u_int32_t *) &comet->rlps_eqvr, 0x34);
/* Reserved bit set and SQUELCH enabled */
/* f8: RLPS cfg & status f9: RLPS ALOS detect/clear threshold */
- pci_write_32 ((u_int32_t *) &comet->rlps_cfgsts, 0x11); /* RLPS Configuration
- * Status */
- if (isT1mode)
- pci_write_32 ((u_int32_t *) &comet->rlps_alos_thresh, 0x55); /* ? */
- else
- pci_write_32 ((u_int32_t *) &comet->rlps_alos_thresh, 0x22); /* ? */
+ /* RLPS Configuration Status */
+ pci_write_32((u_int32_t *) &comet->rlps_cfgsts, 0x11);
+ if (isT1mode)
+ /* ? */
+ pci_write_32((u_int32_t *) &comet->rlps_alos_thresh, 0x55);
+ else
+ /* ? */
+ pci_write_32((u_int32_t *) &comet->rlps_alos_thresh, 0x22);
/* Set Full Frame mode (NXDSO[1] = 0, NXDSO[0] = 0) */
/* CMODE=0: Clock slave mode with BTCLK as an input, DE=1: Use rising */
/* edge of BTCLK for data, FE=1: Use rising edge of BTCLK for frame, */
/* CMS=0: Use backplane freq, RATE[1:0]=0,0: T1 */
-/*** Transmit side is always an Input, Slave Clock*/
- /* 40: BTIF cfg 41: BTIF frame pulse cfg */
- if (isT1mode)
- pci_write_32 ((u_int32_t *) &comet->btif_cfg, 0x38); /* BTIF Configuration
- * Reg. */
- else
- pci_write_32 ((u_int32_t *) &comet->btif_cfg, 0x39); /* BTIF Configuration
- * Reg. */
-
- pci_write_32 ((u_int32_t *) &comet->btif_fpcfg, 0x01); /* BTIF Frame Pulse
- * Config. */
+ /*** Transmit side is always an Input, Slave Clock*/
+ /* 40: BTIF cfg 41: loop timing(external) */
+ /*BTIF frame pulse cfg */
+ if (isT1mode)
+ /* BTIF Configuration Reg. */
+ pci_write_32((u_int32_t *) &comet->btif_cfg, 0x38);
+ else
+ /* BTIF Configuration Reg. */
+ pci_write_32((u_int32_t *) &comet->btif_cfg, 0x39);
+ /* BTIF Frame Pulse Config. */
+ pci_write_32((u_int32_t *) &comet->btif_fpcfg, 0x01);
/* 0a: master diag 06: tx timing options */
/* if set Comet to loop back */
/* Comets set to normal */
- pci_write_32 ((u_int32_t *) &comet->mdiag, 0x00);
+ pci_write_32((u_int32_t *) &comet->mdiag, 0x00);
/* BTCLK driven by TCLKI internally (crystal driven) and Xmt Elasted */
/* Store is enabled. */
- WrtXmtWaveformTbl (ci, comet, TWV_table[tix]);
- if (isT1mode)
- WrtRcvEqualizerTbl ((ci_t *) ci, comet, &T1_Equalizer[0]);
- else
- WrtRcvEqualizerTbl ((ci_t *) ci, comet, &E1_Equalizer[0]);
- SetPwrLevel (comet);
+ WrtXmtWaveformTbl(ci, comet, TWV_table[tix]);
+ if (isT1mode)
+ WrtRcvEqualizerTbl((ci_t *) ci, comet, &T1_Equalizer[0]);
+ else
+ WrtRcvEqualizerTbl((ci_t *) ci, comet, &E1_Equalizer[0]);
+ SetPwrLevel(comet);
}
/*
@@ -382,15 +408,15 @@ init_comet (void *ci, comet_t * comet, u_int32_t port_mode, int clockmaster,
** Returns: Nothing
*/
STATIC void
-WrtXmtWaveform (ci_t * ci, comet_t * comet, u_int32_t sample, u_int32_t unit, u_int8_t data)
+WrtXmtWaveform(ci_t *ci, comet_t *comet, u_int32_t sample, u_int32_t unit, u_int8_t data)
{
- u_int8_t WaveformAddr;
+ u_int8_t WaveformAddr;
- WaveformAddr = (sample << 3) + (unit & 7);
- pci_write_32 ((u_int32_t *) &comet->xlpg_pwave_addr, WaveformAddr);
- pci_flush_write (ci); /* for write order preservation when
- * Optimizing driver */
- pci_write_32 ((u_int32_t *) &comet->xlpg_pwave_data, 0x7F & data);
+ WaveformAddr = (sample << 3) + (unit & 7);
+ pci_write_32((u_int32_t *) &comet->xlpg_pwave_addr, WaveformAddr);
+ /* for write order preservation when Optimizing driver */
+ pci_flush_write(ci);
+ pci_write_32((u_int32_t *) &comet->xlpg_pwave_data, 0x7F & data);
}
/*
@@ -400,19 +426,19 @@ WrtXmtWaveform (ci_t * ci, comet_t * comet, u_int32_t sample, u_int32_t unit, u_
** Returns: Nothing
*/
STATIC void
-WrtXmtWaveformTbl (ci_t * ci, comet_t * comet,
- u_int8_t table[COMET_NUM_SAMPLES][COMET_NUM_UNITS])
+WrtXmtWaveformTbl(ci_t *ci, comet_t *comet,
+ u_int8_t table[COMET_NUM_SAMPLES][COMET_NUM_UNITS])
{
- u_int32_t sample, unit;
+ u_int32_t sample, unit;
- for (sample = 0; sample < COMET_NUM_SAMPLES; sample++)
- {
- for (unit = 0; unit < COMET_NUM_UNITS; unit++)
- WrtXmtWaveform (ci, comet, sample, unit, table[sample][unit]);
- }
+ for (sample = 0; sample < COMET_NUM_SAMPLES; sample++)
+ {
+ for (unit = 0; unit < COMET_NUM_UNITS; unit++)
+ WrtXmtWaveform(ci, comet, sample, unit, table[sample][unit]);
+ }
/* Enable transmitter and set output amplitude */
- pci_write_32 ((u_int32_t *) &comet->xlpg_cfg, table[COMET_NUM_SAMPLES][0]);
+ pci_write_32((u_int32_t *) &comet->xlpg_cfg, table[COMET_NUM_SAMPLES][0]);
}
@@ -427,60 +453,60 @@ WrtXmtWaveformTbl (ci_t * ci, comet_t * comet,
*/
STATIC void
-WrtRcvEqualizerTbl (ci_t * ci, comet_t * comet, u_int32_t *table)
+WrtRcvEqualizerTbl(ci_t *ci, comet_t *comet, u_int32_t *table)
{
- u_int32_t ramaddr;
- volatile u_int32_t value;
-
- for (ramaddr = 0; ramaddr < 256; ramaddr++)
- {
- /*** the following lines are per Errata 7, 2.5 ***/
- {
- pci_write_32 ((u_int32_t *) &comet->rlps_eq_rwsel, 0x80); /* Set up for a read
- * operation */
- pci_flush_write (ci); /* for write order preservation when
- * Optimizing driver */
- pci_write_32 ((u_int32_t *) &comet->rlps_eq_iaddr, (u_int8_t) ramaddr); /* write the addr,
- * initiate a read */
- pci_flush_write (ci); /* for write order preservation when
- * Optimizing driver */
- /*
- * wait 3 line rate clock cycles to ensure address bits are
- * captured by T1/E1 clock
- */
- OS_uwait (4, "wret"); /* 683ns * 3 = 1366 ns, approx 2us (but
- * use 4us) */
- }
-
- value = *table++;
- pci_write_32 ((u_int32_t *) &comet->rlps_idata3, (u_int8_t) (value >> 24));
- pci_write_32 ((u_int32_t *) &comet->rlps_idata2, (u_int8_t) (value >> 16));
- pci_write_32 ((u_int32_t *) &comet->rlps_idata1, (u_int8_t) (value >> 8));
- pci_write_32 ((u_int32_t *) &comet->rlps_idata0, (u_int8_t) value);
- pci_flush_write (ci); /* for write order preservation when
- * Optimizing driver */
-
- /* Storing RAM address, causes RAM to be updated */
-
- pci_write_32 ((u_int32_t *) &comet->rlps_eq_rwsel, 0); /* Set up for a write
- * operation */
- pci_flush_write (ci); /* for write order preservation when
- * Optimizing driver */
- pci_write_32 ((u_int32_t *) &comet->rlps_eq_iaddr, (u_int8_t) ramaddr); /* write the addr,
- * initiate a read */
- pci_flush_write (ci); /* for write order preservation when
- * Optimizing driver */
- /*
- * wait 3 line rate clock cycles to ensure address bits are captured
- * by T1/E1 clock
- */
- OS_uwait (4, "wret"); /* 683ns * 3 = 1366 ns, approx 2us (but
- * use 4us) */
- }
-
- pci_write_32 ((u_int32_t *) &comet->rlps_eq_cfg, 0xCB); /* Enable Equalizer &
- * set it to use 256
- * periods */
+ u_int32_t ramaddr;
+ volatile u_int32_t value;
+
+ for (ramaddr = 0; ramaddr < 256; ramaddr++) {
+ /*** the following lines are per Errata 7, 2.5 ***/
+ {
+ /* Set up for a read operation */
+ pci_write_32((u_int32_t *) &comet->rlps_eq_rwsel, 0x80);
+ /* for write order preservation when Optimizing driver */
+ pci_flush_write(ci);
+ /* write the addr, initiate a read */
+ pci_write_32((u_int32_t *) &comet->rlps_eq_iaddr, (u_int8_t) ramaddr);
+ /* for write order preservation when Optimizing driver */
+ pci_flush_write(ci);
+ /*
+ * wait 3 line rate clock cycles to ensure address bits are
+ * captured by T1/E1 clock
+ */
+
+ /* 683ns * 3 = 1366 ns, approx 2us (but use 4us) */
+ OS_uwait(4, "wret");
+ }
+
+ value = *table++;
+ pci_write_32((u_int32_t *) &comet->rlps_idata3, (u_int8_t) (value >> 24));
+ pci_write_32((u_int32_t *) &comet->rlps_idata2, (u_int8_t) (value >> 16));
+ pci_write_32((u_int32_t *) &comet->rlps_idata1, (u_int8_t) (value >> 8));
+ pci_write_32((u_int32_t *) &comet->rlps_idata0, (u_int8_t) value);
+ /* for write order preservation when Optimizing driver */
+ pci_flush_write(ci);
+
+ /* Storing RAM address, causes RAM to be updated */
+
+ /* Set up for a write operation */
+ pci_write_32((u_int32_t *) &comet->rlps_eq_rwsel, 0);
+ /* for write order preservation when optimizing driver */
+ pci_flush_write(ci);
+ /* write the addr, initiate a read */
+ pci_write_32((u_int32_t *) &comet->rlps_eq_iaddr, (u_int8_t) ramaddr);
+ /* for write order preservation when optimizing driver */
+ pci_flush_write(ci);
+
+ /*
+ * wait 3 line rate clock cycles to ensure address bits are captured
+ * by T1/E1 clock
+ */
+ /* 683ns * 3 = 1366 ns, approx 2us (but use 4us) */
+ OS_uwait(4, "wret");
+ }
+
+ /* Enable Equalizer & set it to use 256 periods */
+ pci_write_32((u_int32_t *) &comet->rlps_eq_cfg, 0xCB);
}
@@ -491,9 +517,9 @@ WrtRcvEqualizerTbl (ci_t * ci, comet_t * comet, u_int32_t *table)
*/
STATIC void
-SetPwrLevel (comet_t * comet)
+SetPwrLevel(comet_t *comet)
{
- volatile u_int32_t temp;
+ volatile u_int32_t temp;
/*
** Algorithm to Balance the Power Distribution of Ttip Tring
@@ -507,22 +533,20 @@ SetPwrLevel (comet_t * comet)
** Repeat these steps for register F5
** Write 0x01 to register F6
*/
- pci_write_32 ((u_int32_t *) &comet->xlpg_fdata_sel, 0x00); /* XLPG Fuse Data Select */
-
- pci_write_32 ((u_int32_t *) &comet->xlpg_atest_pctl, 0x01); /* XLPG Analog Test
- * Positive control */
- pci_write_32 ((u_int32_t *) &comet->xlpg_atest_pctl, 0x01);
-
- temp = pci_read_32 ((u_int32_t *) &comet->xlpg_atest_pctl) & 0xfe;
- pci_write_32 ((u_int32_t *) &comet->xlpg_atest_pctl, temp);
-
- pci_write_32 ((u_int32_t *) &comet->xlpg_atest_nctl, 0x01); /* XLPG Analog Test
- * Negative control */
- pci_write_32 ((u_int32_t *) &comet->xlpg_atest_nctl, 0x01);
-
- temp = pci_read_32 ((u_int32_t *) &comet->xlpg_atest_nctl) & 0xfe;
- pci_write_32 ((u_int32_t *) &comet->xlpg_atest_nctl, temp);
- pci_write_32 ((u_int32_t *) &comet->xlpg_fdata_sel, 0x01); /* XLPG */
+ /* XLPG Fuse Data Select */
+ pci_write_32((u_int32_t *) &comet->xlpg_fdata_sel, 0x00);
+ /* XLPG Analog Test Positive control */
+ pci_write_32((u_int32_t *) &comet->xlpg_atest_pctl, 0x01);
+ pci_write_32((u_int32_t *) &comet->xlpg_atest_pctl, 0x01);
+ temp = pci_read_32((u_int32_t *) &comet->xlpg_atest_pctl) & 0xfe;
+ pci_write_32((u_int32_t *) &comet->xlpg_atest_pctl, temp);
+ pci_write_32((u_int32_t *) &comet->xlpg_atest_nctl, 0x01);
+ pci_write_32((u_int32_t *) &comet->xlpg_atest_nctl, 0x01);
+ /* XLPG Analog Test Negative control */
+ temp = pci_read_32((u_int32_t *) &comet->xlpg_atest_nctl) & 0xfe;
+ pci_write_32((u_int32_t *) &comet->xlpg_atest_nctl, temp);
+ /* XLPG */
+ pci_write_32((u_int32_t *) &comet->xlpg_fdata_sel, 0x01);
}
@@ -535,33 +559,30 @@ SetPwrLevel (comet_t * comet)
*/
#if 0
STATIC void
-SetCometOps (comet_t * comet)
+SetCometOps(comet_t *comet)
{
- volatile u_int8_t rd_value;
-
- if (comet == mConfig.C4Func1Base + (COMET0_OFFSET >> 2))
- {
- rd_value = (u_int8_t) pci_read_32 ((u_int32_t *) &comet->brif_cfg); /* read the BRIF
- * Configuration */
- rd_value &= ~0x20;
- pci_write_32 ((u_int32_t *) &comet->brif_cfg, (u_int32_t) rd_value);
-
- rd_value = (u_int8_t) pci_read_32 ((u_int32_t *) &comet->brif_fpcfg); /* read the BRIF Frame
- * Pulse Configuration */
- rd_value &= ~0x20;
- pci_write_32 ((u_int32_t *) &comet->brif_fpcfg, (u_int8_t) rd_value);
- } else
- {
- rd_value = (u_int8_t) pci_read_32 ((u_int32_t *) &comet->brif_cfg); /* read the BRIF
- * Configuration */
- rd_value |= 0x20;
- pci_write_32 ((u_int32_t *) &comet->brif_cfg, (u_int32_t) rd_value);
-
- rd_value = (u_int8_t) pci_read_32 ((u_int32_t *) &comet->brif_fpcfg); /* read the BRIF Frame
- * Pulse Configuration */
- rd_value |= 0x20;
- pci_write_32 ((u_int32_t *) &comet->brif_fpcfg, (u_int8_t) rd_value);
- }
+ volatile u_int8_t rd_value;
+
+ if (comet == mConfig.C4Func1Base + (COMET0_OFFSET >> 2))
+ {
+ /* read the BRIF Configuration */
+ rd_value = (u_int8_t) pci_read_32((u_int32_t *) &comet->brif_cfg);
+ rd_value &= ~0x20;
+ pci_write_32((u_int32_t *) &comet->brif_cfg, (u_int32_t) rd_value);
+ /* read the BRIF Frame Pulse Configuration */
+ rd_value = (u_int8_t) pci_read_32((u_int32_t *) &comet->brif_fpcfg);
+ rd_value &= ~0x20;
+ pci_write_32((u_int32_t *) &comet->brif_fpcfg, (u_int8_t) rd_value);
+ } else {
+ /* read the BRIF Configuration */
+ rd_value = (u_int8_t) pci_read_32((u_int32_t *) &comet->brif_cfg);
+ rd_value |= 0x20;
+ pci_write_32((u_int32_t *) &comet->brif_cfg, (u_int32_t) rd_value);
+ /* read the BRIF Frame Pulse Configuration */
+ rd_value = (u_int8_t) pci_read_32((u_int32_t *) &comet->brif_fpcfg);
+ rd_value |= 0x20;
+ pci_write_32(u_int32_t *) & comet->brif_fpcfg, (u_int8_t) rd_value);
+ }
}
#endif
diff --git a/drivers/staging/cxt1e1/functions.c b/drivers/staging/cxt1e1/functions.c
index d9a9aa3571d9..6167dc574577 100644
--- a/drivers/staging/cxt1e1/functions.c
+++ b/drivers/staging/cxt1e1/functions.c
@@ -14,7 +14,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/slab.h>
-#include <asm/io.h>
+#include <linux/io.h>
#include <asm/byteorder.h>
#include <linux/netdevice.h>
#include <linux/delay.h>
@@ -97,7 +97,7 @@ pci_write_32 (u_int32_t *p, u_int32_t v)
void
-pci_flush_write (ci_t * ci)
+pci_flush_write (ci_t *ci)
{
volatile u_int32_t v;
@@ -202,7 +202,7 @@ sd_line_is_ok (void *user)
{
struct net_device *ndev = (struct net_device *) user;
- return (netif_carrier_ok (ndev));
+ return netif_carrier_ok (ndev);
}
void
@@ -246,7 +246,7 @@ sd_queue_stopped (void *user)
{
struct net_device *ndev = (struct net_device *) user;
- return (netif_queue_stopped (ndev));
+ return netif_queue_stopped (ndev);
}
void sd_recv_consume(void *token, size_t len, void *user)
@@ -279,7 +279,7 @@ VMETRO_TRACE (void *x)
void
-VMETRO_TRIGGER (ci_t * ci, int x)
+VMETRO_TRIGGER (ci_t *ci, int x)
{
comet_t *comet;
volatile u_int32_t data;
diff --git a/drivers/staging/cxt1e1/hwprobe.c b/drivers/staging/cxt1e1/hwprobe.c
index de8ac0bc24fb..110c252d38d7 100644
--- a/drivers/staging/cxt1e1/hwprobe.c
+++ b/drivers/staging/cxt1e1/hwprobe.c
@@ -50,7 +50,7 @@ struct s_hdw_info hdw_info[MAX_BOARDS];
void __init
-show_two (hdw_info_t * hi, int brdno)
+show_two (hdw_info_t *hi, int brdno)
{
ci_t *ci;
struct pci_dev *pdev;
@@ -102,7 +102,7 @@ show_two (hdw_info_t * hi, int brdno)
void __init
-hdw_sn_get (hdw_info_t * hi, int brdno)
+hdw_sn_get (hdw_info_t *hi, int brdno)
{
/* obtain hardware EEPROM information */
long addr;
@@ -222,7 +222,7 @@ cleanup_devs (void)
STATIC int __init
-c4_hdw_init (struct pci_dev * pdev, int found)
+c4_hdw_init (struct pci_dev *pdev, int found)
{
hdw_info_t *hi;
int i;
diff --git a/drivers/staging/cxt1e1/linux.c b/drivers/staging/cxt1e1/linux.c
index a829b6231a66..e5889ef190a2 100644
--- a/drivers/staging/cxt1e1/linux.c
+++ b/drivers/staging/cxt1e1/linux.c
@@ -144,7 +144,7 @@ getuserbychan (int channum)
char *
-get_hdlc_name (hdlc_device * hdlc)
+get_hdlc_name (hdlc_device *hdlc)
{
struct c4_priv *priv = hdlc->priv;
struct net_device *dev = getuserbychan (priv->channum);
@@ -185,7 +185,7 @@ mkret (int bsd)
* within a port's group.
*/
void
-c4_wk_chan_restart (mch_t * ch)
+c4_wk_chan_restart (mch_t *ch)
{
mpi_t *pi = ch->up;
@@ -203,7 +203,7 @@ c4_wk_chan_restart (mch_t * ch)
}
status_t
-c4_wk_chan_init (mpi_t * pi, mch_t * ch)
+c4_wk_chan_init (mpi_t *pi, mch_t *ch)
{
/*
* this will be used to restart a stopped channel
@@ -218,7 +218,7 @@ c4_wk_chan_init (mpi_t * pi, mch_t * ch)
}
status_t
-c4_wq_port_init (mpi_t * pi)
+c4_wq_port_init (mpi_t *pi)
{
char name[16], *np; /* NOTE: name of the queue limited by system
@@ -241,7 +241,7 @@ c4_wq_port_init (mpi_t * pi)
}
void
-c4_wq_port_cleanup (mpi_t * pi)
+c4_wq_port_cleanup (mpi_t *pi)
{
/*
* PORT POINT: cannot call this if WQ is statically allocated w/in
@@ -278,7 +278,7 @@ c4_ebus_interrupt (int irq, void *dev_instance)
static int
-void_open (struct net_device * ndev)
+void_open (struct net_device *ndev)
{
pr_info("%s: trying to open master device !\n", ndev->name);
return -1;
@@ -286,7 +286,7 @@ void_open (struct net_device * ndev)
STATIC int
-chan_open (struct net_device * ndev)
+chan_open (struct net_device *ndev)
{
hdlc_device *hdlc = dev_to_hdlc (ndev);
const struct c4_priv *priv = hdlc->priv;
@@ -306,7 +306,7 @@ chan_open (struct net_device * ndev)
STATIC int
-chan_close (struct net_device * ndev)
+chan_close (struct net_device *ndev)
{
hdlc_device *hdlc = dev_to_hdlc (ndev);
const struct c4_priv *priv = hdlc->priv;
@@ -320,14 +320,14 @@ chan_close (struct net_device * ndev)
STATIC int
-chan_dev_ioctl (struct net_device * dev, struct ifreq * ifr, int cmd)
+chan_dev_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
{
return hdlc_ioctl (dev, ifr, cmd);
}
STATIC int
-chan_attach_noop (struct net_device * ndev, unsigned short foo_1, unsigned short foo_2)
+chan_attach_noop (struct net_device *ndev, unsigned short foo_1, unsigned short foo_2)
{
return 0; /* our driver has nothing to do here, show's
* over, go home */
@@ -335,7 +335,7 @@ chan_attach_noop (struct net_device * ndev, unsigned short foo_1, unsigned short
STATIC struct net_device_stats *
-chan_get_stats (struct net_device * ndev)
+chan_get_stats (struct net_device *ndev)
{
mch_t *ch;
struct net_device_stats *nstats;
@@ -388,14 +388,14 @@ chan_get_stats (struct net_device * ndev)
static ci_t *
-get_ci_by_dev (struct net_device * ndev)
+get_ci_by_dev (struct net_device *ndev)
{
return (ci_t *)(netdev_priv(ndev));
}
STATIC int
-c4_linux_xmit (struct sk_buff * skb, struct net_device * ndev)
+c4_linux_xmit (struct sk_buff *skb, struct net_device *ndev)
{
const struct c4_priv *priv;
int rval;
@@ -417,8 +417,8 @@ static const struct net_device_ops chan_ops = {
};
STATIC struct net_device *
-create_chan (struct net_device * ndev, ci_t * ci,
- struct sbecom_chan_param * cp)
+create_chan (struct net_device *ndev, ci_t *ci,
+ struct sbecom_chan_param *cp)
{
hdlc_device *hdlc;
struct net_device *dev;
@@ -510,7 +510,7 @@ create_chan (struct net_device * ndev, ci_t * ci,
/* the idea here is to get port information and pass it back (using pointer) */
STATIC status_t
-do_get_port (struct net_device * ndev, void *data)
+do_get_port (struct net_device *ndev, void *data)
{
int ret;
ci_t *ci; /* ci stands for card information */
@@ -535,7 +535,7 @@ do_get_port (struct net_device * ndev, void *data)
/* this function copys the user data and then calls the real action function */
STATIC status_t
-do_set_port (struct net_device * ndev, void *data)
+do_set_port (struct net_device *ndev, void *data)
{
ci_t *ci; /* ci stands for card information */
struct sbecom_port_param pp;/* copy data to kernel land */
@@ -557,7 +557,7 @@ do_set_port (struct net_device * ndev, void *data)
/* work the port loopback mode as per directed */
STATIC status_t
-do_port_loop (struct net_device * ndev, void *data)
+do_port_loop (struct net_device *ndev, void *data)
{
struct sbecom_port_param pp;
ci_t *ci;
@@ -572,7 +572,7 @@ do_port_loop (struct net_device * ndev, void *data)
/* set the specified register with the given value / or just read it */
STATIC status_t
-do_framer_rw (struct net_device * ndev, void *data)
+do_framer_rw (struct net_device *ndev, void *data)
{
struct sbecom_port_param pp;
ci_t *ci;
@@ -593,7 +593,7 @@ do_framer_rw (struct net_device * ndev, void *data)
/* set the specified register with the given value / or just read it */
STATIC status_t
-do_pld_rw (struct net_device * ndev, void *data)
+do_pld_rw (struct net_device *ndev, void *data)
{
struct sbecom_port_param pp;
ci_t *ci;
@@ -614,7 +614,7 @@ do_pld_rw (struct net_device * ndev, void *data)
/* set the specified register with the given value / or just read it */
STATIC status_t
-do_musycc_rw (struct net_device * ndev, void *data)
+do_musycc_rw (struct net_device *ndev, void *data)
{
struct c4_musycc_param mp;
ci_t *ci;
@@ -634,7 +634,7 @@ do_musycc_rw (struct net_device * ndev, void *data)
}
STATIC status_t
-do_get_chan (struct net_device * ndev, void *data)
+do_get_chan (struct net_device *ndev, void *data)
{
struct sbecom_chan_param cp;
int ret;
@@ -652,7 +652,7 @@ do_get_chan (struct net_device * ndev, void *data)
}
STATIC status_t
-do_set_chan (struct net_device * ndev, void *data)
+do_set_chan (struct net_device *ndev, void *data)
{
struct sbecom_chan_param cp;
int ret;
@@ -673,7 +673,7 @@ do_set_chan (struct net_device * ndev, void *data)
}
STATIC status_t
-do_create_chan (struct net_device * ndev, void *data)
+do_create_chan (struct net_device *ndev, void *data)
{
ci_t *ci;
struct net_device *dev;
@@ -700,7 +700,7 @@ do_create_chan (struct net_device * ndev, void *data)
}
STATIC status_t
-do_get_chan_stats (struct net_device * ndev, void *data)
+do_get_chan_stats (struct net_device *ndev, void *data)
{
struct c4_chan_stats_wrap ccs;
int ret;
@@ -721,7 +721,7 @@ do_get_chan_stats (struct net_device * ndev, void *data)
return 0;
}
STATIC status_t
-do_set_loglevel (struct net_device * ndev, void *data)
+do_set_loglevel (struct net_device *ndev, void *data)
{
unsigned int cxt1e1_log_level;
@@ -732,7 +732,7 @@ do_set_loglevel (struct net_device * ndev, void *data)
}
STATIC status_t
-do_deluser (struct net_device * ndev, int lockit)
+do_deluser (struct net_device *ndev, int lockit)
{
if (ndev->flags & IFF_UP)
return -EBUSY;
@@ -763,7 +763,7 @@ do_deluser (struct net_device * ndev, int lockit)
}
int
-do_del_chan (struct net_device * musycc_dev, void *data)
+do_del_chan (struct net_device *musycc_dev, void *data)
{
struct sbecom_chan_param cp;
char buf[sizeof (CHANNAME) + 3];
@@ -787,7 +787,7 @@ do_del_chan (struct net_device * musycc_dev, void *data)
int c4_reset_board (void *);
int
-do_reset (struct net_device * musycc_dev, void *data)
+do_reset (struct net_device *musycc_dev, void *data)
{
const struct c4_priv *priv;
int i;
@@ -816,7 +816,7 @@ do_reset (struct net_device * musycc_dev, void *data)
}
int
-do_reset_chan_stats (struct net_device * musycc_dev, void *data)
+do_reset_chan_stats (struct net_device *musycc_dev, void *data)
{
struct sbecom_chan_param cp;
@@ -827,7 +827,7 @@ do_reset_chan_stats (struct net_device * musycc_dev, void *data)
}
STATIC status_t
-c4_ioctl (struct net_device * ndev, struct ifreq * ifr, int cmd)
+c4_ioctl (struct net_device *ndev, struct ifreq *ifr, int cmd)
{
ci_t *ci;
void *data;
@@ -954,7 +954,7 @@ static void c4_setup(struct net_device *dev)
}
struct net_device *__init
-c4_add_dev (hdw_info_t * hi, int brdno, unsigned long f0, unsigned long f1,
+c4_add_dev (hdw_info_t *hi, int brdno, unsigned long f0, unsigned long f1,
int irq0, int irq1)
{
struct net_device *ndev;
diff --git a/drivers/staging/cxt1e1/musycc.c b/drivers/staging/cxt1e1/musycc.c
index b2cc68a1fe87..1037086d00a7 100644
--- a/drivers/staging/cxt1e1/musycc.c
+++ b/drivers/staging/cxt1e1/musycc.c
@@ -74,7 +74,7 @@ void musycc_update_timeslots(mpi_t *);
#if 1
STATIC int
-musycc_dump_rxbuffer_ring(mch_t * ch, int lockit)
+musycc_dump_rxbuffer_ring(mch_t *ch, int lockit)
{
struct mdesc *m;
unsigned long flags = 0;
@@ -140,7 +140,7 @@ musycc_dump_rxbuffer_ring(mch_t * ch, int lockit)
#if 1
STATIC int
-musycc_dump_txbuffer_ring(mch_t * ch, int lockit)
+musycc_dump_txbuffer_ring(mch_t *ch, int lockit)
{
struct mdesc *m;
unsigned long flags = 0;
@@ -205,7 +205,7 @@ musycc_dump_txbuffer_ring(mch_t * ch, int lockit)
*/
status_t
-musycc_dump_ring(ci_t * ci, unsigned int chan)
+musycc_dump_ring(ci_t *ci, unsigned int chan)
{
mch_t *ch;
@@ -248,7 +248,7 @@ musycc_dump_ring(ci_t * ci, unsigned int chan)
status_t
-musycc_dump_rings(ci_t * ci, unsigned int start_chan)
+musycc_dump_rings(ci_t *ci, unsigned int start_chan)
{
unsigned int chan;
@@ -264,7 +264,7 @@ musycc_dump_rings(ci_t * ci, unsigned int start_chan)
*/
void
-musycc_init_mdt(mpi_t * pi)
+musycc_init_mdt(mpi_t *pi)
{
u_int32_t *addr, cfg;
int i;
@@ -288,7 +288,7 @@ musycc_init_mdt(mpi_t * pi)
/* Set TX thp to the next unprocessed md */
void
-musycc_update_tx_thp(mch_t * ch)
+musycc_update_tx_thp(mch_t *ch)
{
struct mdesc *md;
unsigned long flags;
@@ -443,7 +443,7 @@ musycc_wq_chan_restart(void *arg) /* channel private structure */
*/
void
-musycc_chan_restart(mch_t * ch)
+musycc_chan_restart(mch_t *ch)
{
#ifdef RLD_RESTART_DEBUG
pr_info("++ musycc_chan_restart[%d]: txd_irq_srv @ %p = sts %x\n",
@@ -461,7 +461,7 @@ musycc_chan_restart(mch_t * ch)
void
-rld_put_led(mpi_t * pi, u_int32_t ledval)
+rld_put_led(mpi_t *pi, u_int32_t ledval)
{
static u_int32_t led = 0;
@@ -477,7 +477,7 @@ rld_put_led(mpi_t * pi, u_int32_t ledval)
#define MUSYCC_SR_RETRY_CNT 9
void
-musycc_serv_req(mpi_t * pi, u_int32_t req)
+musycc_serv_req(mpi_t *pi, u_int32_t req)
{
volatile u_int32_t r;
int rcnt;
@@ -578,7 +578,7 @@ rewrite:
#ifdef SBE_PMCC4_ENABLE
void
-musycc_update_timeslots(mpi_t * pi)
+musycc_update_timeslots(mpi_t *pi)
{
int i, ch;
char e1mode = IS_FRAME_ANY_E1(pi->p.port_mode);
@@ -640,7 +640,7 @@ musycc_update_timeslots(mpi_t * pi)
#ifdef SBE_WAN256T3_ENABLE
void
-musycc_update_timeslots(mpi_t * pi)
+musycc_update_timeslots(mpi_t *pi)
{
mch_t *ch;
@@ -703,7 +703,7 @@ musycc_chan_proto(int proto)
#ifdef SBE_WAN256T3_ENABLE
STATIC void __init
-musycc_init_port(mpi_t * pi)
+musycc_init_port(mpi_t *pi)
{
pci_write_32((u_int32_t *) &pi->reg->gbp, OS_vtophys(pi->regram));
@@ -737,7 +737,7 @@ musycc_init_port(mpi_t * pi)
status_t __init
-musycc_init(ci_t * ci)
+musycc_init(ci_t *ci)
{
char *regaddr; /* temp for address boundary calculations */
int i, gchan;
@@ -832,7 +832,7 @@ musycc_init(ci_t * ci)
void
-musycc_bh_tx_eom(mpi_t * pi, int gchan)
+musycc_bh_tx_eom(mpi_t *pi, int gchan)
{
mch_t *ch;
struct mdesc *md;
@@ -1010,7 +1010,7 @@ musycc_bh_tx_eom(mpi_t * pi, int gchan)
STATIC void
-musycc_bh_rx_eom(mpi_t * pi, int gchan)
+musycc_bh_rx_eom(mpi_t *pi, int gchan)
{
mch_t *ch;
void *m, *m2;
@@ -1229,7 +1229,7 @@ unsigned long
#else
void
#endif
-musycc_intr_bh_tasklet(ci_t * ci)
+musycc_intr_bh_tasklet(ci_t *ci)
{
mpi_t *pi;
mch_t *ch;
@@ -1517,7 +1517,7 @@ musycc_intr_bh_tasklet(ci_t * ci)
#if 0
int __init
-musycc_new_chan(ci_t * ci, int channum, void *user)
+musycc_new_chan(ci_t *ci, int channum, void *user)
{
mch_t *ch;
@@ -1546,7 +1546,7 @@ musycc_new_chan(ci_t * ci, int channum, void *user)
#ifdef SBE_PMCC4_ENABLE
status_t
-musycc_chan_down(ci_t * dummy, int channum)
+musycc_chan_down(ci_t *dummy, int channum)
{
mpi_t *pi;
mch_t *ch;
@@ -1597,7 +1597,7 @@ musycc_chan_down(ci_t * dummy, int channum)
int
-musycc_del_chan(ci_t * ci, int channum)
+musycc_del_chan(ci_t *ci, int channum)
{
mch_t *ch;
@@ -1613,7 +1613,7 @@ musycc_del_chan(ci_t * ci, int channum)
int
-musycc_del_chan_stats(ci_t * ci, int channum)
+musycc_del_chan_stats(ci_t *ci, int channum)
{
mch_t *ch;
@@ -1628,7 +1628,7 @@ musycc_del_chan_stats(ci_t * ci, int channum)
int
-musycc_start_xmit(ci_t * ci, int channum, void *mem_token)
+musycc_start_xmit(ci_t *ci, int channum, void *mem_token)
{
mch_t *ch;
struct mdesc *md;
diff --git a/drivers/staging/cxt1e1/pmcc4.h b/drivers/staging/cxt1e1/pmcc4.h
index b0ed4ad13011..003eb8690190 100644
--- a/drivers/staging/cxt1e1/pmcc4.h
+++ b/drivers/staging/cxt1e1/pmcc4.h
@@ -85,15 +85,15 @@ void c4_cleanup (void);
status_t c4_chan_up (ci_t *, int channum);
status_t c4_del_chan_stats (int channum);
status_t c4_del_chan (int channum);
-status_t c4_get_iidinfo (ci_t * ci, struct sbe_iid_info * iip);
+status_t c4_get_iidinfo (ci_t *ci, struct sbe_iid_info *iip);
int c4_is_chan_up (int channum);
void *getuserbychan (int channum);
-void pci_flush_write (ci_t * ci);
+void pci_flush_write (ci_t *ci);
void sbecom_set_loglevel (int debuglevel);
-char *sbeid_get_bdname (ci_t * ci);
-void sbeid_set_bdtype (ci_t * ci);
-void sbeid_set_hdwbid (ci_t * ci);
+char *sbeid_get_bdname (ci_t *ci);
+void sbeid_set_bdtype (ci_t *ci);
+void sbeid_set_hdwbid (ci_t *ci);
u_int32_t sbeCrc (u_int8_t *, u_int32_t, u_int32_t, u_int32_t *);
void VMETRO_TRACE (void *); /* put data into 8 LEDs */
diff --git a/drivers/staging/cxt1e1/pmcc4_drv.c b/drivers/staging/cxt1e1/pmcc4_drv.c
index 8d8a22be5b2e..32d7a216a419 100644
--- a/drivers/staging/cxt1e1/pmcc4_drv.c
+++ b/drivers/staging/cxt1e1/pmcc4_drv.c
@@ -28,7 +28,7 @@
#include <linux/sched.h> /* include for timer */
#include <linux/timer.h> /* include for timer */
#include <linux/hdlc.h>
-#include <asm/io.h>
+#include <linux/io.h>
#include "sbecom_inline_linux.h"
#include "libsbew.h"
@@ -123,7 +123,7 @@ c4_find_chan (int channum)
{
if ((ch->state != UNASSIGNED) &&
(ch->channum == channum))
- return (ch);
+ return ch;
}
}
return 0;
@@ -193,7 +193,7 @@ c4_new (void *hi)
#define COMET_LBCMD_READ 0x80 /* read only (do not set, return read value) */
void
-checkPorts (ci_t * ci)
+checkPorts (ci_t *ci)
{
#ifndef CONFIG_SBE_PMCC4_NCOMM
/*
@@ -459,7 +459,7 @@ checkPorts (ci_t * ci)
STATIC void
-c4_watchdog (ci_t * ci)
+c4_watchdog (ci_t *ci)
{
if (drvr_state != SBE_DRVR_AVAILABLE)
{
@@ -512,7 +512,7 @@ c4_cleanup (void)
*/
int
-c4_get_portcfg (ci_t * ci)
+c4_get_portcfg (ci_t *ci)
{
comet_t *comet;
int portnum, mask;
@@ -536,7 +536,7 @@ c4_get_portcfg (ci_t * ci)
/* nothing herein should generate interrupts */
status_t __init
-c4_init (ci_t * ci, u_char *func0, u_char *func1)
+c4_init (ci_t *ci, u_char *func0, u_char *func1)
{
mpi_t *pi;
mch_t *ch;
@@ -670,7 +670,7 @@ c4_init (ci_t * ci, u_char *func0, u_char *func1)
/* better be fully setup to handle interrupts when you call this */
status_t __init
-c4_init2 (ci_t * ci)
+c4_init2 (ci_t *ci)
{
status_t ret;
@@ -698,7 +698,7 @@ c4_init2 (ci_t * ci)
/* This function sets the loopback mode (or clears it, as the case may be). */
int
-c4_loop_port (ci_t * ci, int portnum, u_int8_t cmd)
+c4_loop_port (ci_t *ci, int portnum, u_int8_t cmd)
{
comet_t *comet;
volatile u_int32_t loopValue;
@@ -757,7 +757,7 @@ c4_loop_port (ci_t * ci, int portnum, u_int8_t cmd)
*/
status_t
-c4_frame_rw (ci_t * ci, struct sbecom_port_param * pp)
+c4_frame_rw (ci_t *ci, struct sbecom_port_param *pp)
{
comet_t *comet;
volatile u_int32_t data;
@@ -796,7 +796,7 @@ c4_frame_rw (ci_t * ci, struct sbecom_port_param * pp)
*/
status_t
-c4_pld_rw (ci_t * ci, struct sbecom_port_param * pp)
+c4_pld_rw (ci_t *ci, struct sbecom_port_param *pp)
{
volatile u_int32_t *regaddr;
volatile u_int32_t data;
@@ -834,7 +834,7 @@ c4_pld_rw (ci_t * ci, struct sbecom_port_param * pp)
*/
status_t
-c4_musycc_rw (ci_t * ci, struct c4_musycc_param * mcp)
+c4_musycc_rw (ci_t *ci, struct c4_musycc_param *mcp)
{
mpi_t *pi;
volatile u_int32_t *dph; /* hardware implemented register */
@@ -898,7 +898,7 @@ c4_musycc_rw (ci_t * ci, struct c4_musycc_param * mcp)
}
status_t
-c4_get_port (ci_t * ci, int portnum)
+c4_get_port (ci_t *ci, int portnum)
{
if (portnum >= ci->max_port) /* sanity check */
return ENXIO;
@@ -913,7 +913,7 @@ c4_get_port (ci_t * ci, int portnum)
}
status_t
-c4_set_port (ci_t * ci, int portnum)
+c4_set_port (ci_t *ci, int portnum)
{
mpi_t *pi;
struct sbecom_port_param *pp;
@@ -942,7 +942,7 @@ c4_set_port (ci_t * ci, int portnum)
if ((ret = c4_wq_port_init (pi))) /* create/init
* workqueue_struct */
- return (ret);
+ return ret;
}
init_comet (ci, pi->cometbase, pp->port_mode, 1 /* clockmaster == true */ , pp->portP);
@@ -1018,7 +1018,7 @@ c4_set_port (ci_t * ci, int portnum)
unsigned int max_int = 0;
status_t
-c4_new_chan (ci_t * ci, int portnum, int channum, void *user)
+c4_new_chan (ci_t *ci, int portnum, int channum, void *user)
{
mpi_t *pi;
mch_t *ch;
@@ -1111,7 +1111,7 @@ c4_del_chan_stats (int channum)
status_t
-c4_set_chan (int channum, struct sbecom_chan_param * p)
+c4_set_chan (int channum, struct sbecom_chan_param *p)
{
mch_t *ch;
int i, x = 0;
@@ -1162,7 +1162,7 @@ c4_set_chan (int channum, struct sbecom_chan_param * p)
status_t
-c4_get_chan (int channum, struct sbecom_chan_param * p)
+c4_get_chan (int channum, struct sbecom_chan_param *p)
{
mch_t *ch;
@@ -1173,7 +1173,7 @@ c4_get_chan (int channum, struct sbecom_chan_param * p)
}
status_t
-c4_get_chan_stats (int channum, struct sbecom_chan_stats * p)
+c4_get_chan_stats (int channum, struct sbecom_chan_stats *p)
{
mch_t *ch;
@@ -1185,7 +1185,7 @@ c4_get_chan_stats (int channum, struct sbecom_chan_stats * p)
}
STATIC int
-c4_fifo_alloc (mpi_t * pi, int chan, int *len)
+c4_fifo_alloc (mpi_t *pi, int chan, int *len)
{
int i, l = 0, start = 0, max = 0, maxstart = 0;
@@ -1222,7 +1222,7 @@ c4_fifo_alloc (mpi_t * pi, int chan, int *len)
}
void
-c4_fifo_free (mpi_t * pi, int chan)
+c4_fifo_free (mpi_t *pi, int chan)
{
int i;
@@ -1236,7 +1236,7 @@ c4_fifo_free (mpi_t * pi, int chan)
status_t
-c4_chan_up (ci_t * ci, int channum)
+c4_chan_up (ci_t *ci, int channum)
{
mpi_t *pi;
mch_t *ch;
@@ -1467,7 +1467,7 @@ errfree:
/* stop the hardware from servicing & interrupting */
void
-c4_stopwd (ci_t * ci)
+c4_stopwd (ci_t *ci)
{
OS_stop_watchdog (&ci->wd);
SD_SEM_TAKE (&ci->sem_wdbusy, "_stop_"); /* ensure WD not running */
@@ -1476,7 +1476,7 @@ c4_stopwd (ci_t * ci)
void
-sbecom_get_brdinfo (ci_t * ci, struct sbe_brd_info * bip, u_int8_t *bsn)
+sbecom_get_brdinfo (ci_t *ci, struct sbe_brd_info *bip, u_int8_t *bsn)
{
char *np;
u_int32_t sn = 0;
@@ -1485,7 +1485,7 @@ sbecom_get_brdinfo (ci_t * ci, struct sbe_brd_info * bip, u_int8_t *bsn)
bip->brdno = ci->brdno; /* our board number */
bip->brd_id = ci->brd_id;
bip->brd_hdw_id = ci->hdw_bid;
- bip->brd_chan_cnt = MUSYCC_NCHANS * ci->max_port; /* number of channels
+ bip->brd_chan_cnt = MUSYCC_NCHANS *ci->max_port; /* number of channels
* being used */
bip->brd_port_cnt = ci->max_port; /* number of ports being used */
bip->brd_pci_speed = BINFO_PCI_SPEED_unk; /* PCI speed not yet
@@ -1535,7 +1535,7 @@ sbecom_get_brdinfo (ci_t * ci, struct sbe_brd_info * bip, u_int8_t *bsn)
status_t
-c4_get_iidinfo (ci_t * ci, struct sbe_iid_info * iip)
+c4_get_iidinfo (ci_t *ci, struct sbe_iid_info *iip)
{
struct net_device *dev;
char *np;
@@ -1624,7 +1624,7 @@ wanpmcC4T1E1_getBaseAddress (int cardID, int deviceID)
}
ci = ci->next; /* next board, if any */
}
- return (base);
+ return base;
}
#endif /*** CONFIG_SBE_PMCC4_NCOMM ***/
diff --git a/drivers/staging/cxt1e1/sbecom_inline_linux.h b/drivers/staging/cxt1e1/sbecom_inline_linux.h
index 68ed445ab0cb..3c6d1c0fc6d6 100644
--- a/drivers/staging/cxt1e1/sbecom_inline_linux.h
+++ b/drivers/staging/cxt1e1/sbecom_inline_linux.h
@@ -177,7 +177,7 @@ struct watchdog
static inline int
-OS_start_watchdog (struct watchdog * wd)
+OS_start_watchdog (struct watchdog *wd)
{
wd->h.expires = jiffies + wd->ticks;
add_timer (&wd->h);
@@ -186,7 +186,7 @@ OS_start_watchdog (struct watchdog * wd)
static inline int
-OS_stop_watchdog (struct watchdog * wd)
+OS_stop_watchdog (struct watchdog *wd)
{
del_timer_sync (&wd->h);
return 0;
@@ -194,7 +194,7 @@ OS_stop_watchdog (struct watchdog * wd)
static inline int
-OS_free_watchdog (struct watchdog * wd)
+OS_free_watchdog (struct watchdog *wd)
{
OS_stop_watchdog (wd);
OS_kfree (wd);
diff --git a/drivers/staging/cxt1e1/sbeid.c b/drivers/staging/cxt1e1/sbeid.c
index a2243b10ef05..0f9bd5f8136c 100644
--- a/drivers/staging/cxt1e1/sbeid.c
+++ b/drivers/staging/cxt1e1/sbeid.c
@@ -27,7 +27,7 @@
char *
-sbeid_get_bdname (ci_t * ci)
+sbeid_get_bdname (ci_t *ci)
{
char *np = 0;
@@ -73,7 +73,7 @@ sbeid_get_bdname (ci_t * ci)
/* given the presetting of brd_id, set the corresponding hdw_id */
void
-sbeid_set_hdwbid (ci_t * ci)
+sbeid_set_hdwbid (ci_t *ci)
{
/*
* set SBE's unique hardware identification (for legacy boards might not
@@ -170,7 +170,7 @@ sbeid_set_hdwbid (ci_t * ci)
/* given the presetting of hdw_bid, set the corresponding brd_id */
void
-sbeid_set_bdtype (ci_t * ci)
+sbeid_set_bdtype (ci_t *ci)
{
/* set SBE's unique PCI VENDOR/DEVID */
switch (ci->hdw_bid)
diff --git a/drivers/staging/cxt1e1/sbeproc.h b/drivers/staging/cxt1e1/sbeproc.h
index e5c072cf1952..37285df359c1 100644
--- a/drivers/staging/cxt1e1/sbeproc.h
+++ b/drivers/staging/cxt1e1/sbeproc.h
@@ -28,11 +28,11 @@ int __init sbecom_proc_brd_init (ci_t *);
#else
-static inline void sbecom_proc_brd_cleanup(ci_t * ci)
+static inline void sbecom_proc_brd_cleanup(ci_t *ci)
{
}
-static inline int __init sbecom_proc_brd_init(ci_t * ci)
+static inline int __init sbecom_proc_brd_init(ci_t *ci)
{
return 0;
}
diff --git a/drivers/staging/dgrp/dgrp_dpa_ops.c b/drivers/staging/dgrp/dgrp_dpa_ops.c
index 114799cddd85..69bfe309376d 100644
--- a/drivers/staging/dgrp/dgrp_dpa_ops.c
+++ b/drivers/staging/dgrp/dgrp_dpa_ops.c
@@ -392,7 +392,7 @@ static long dgrp_dpa_ioctl(struct file *file, unsigned int cmd,
getnode.nd_rx_byte = nd->nd_rx_byte;
memset(&getnode.nd_ps_desc, 0, MAX_DESC_LEN);
- strncpy(getnode.nd_ps_desc, nd->nd_ps_desc, MAX_DESC_LEN);
+ strlcpy(getnode.nd_ps_desc, nd->nd_ps_desc, MAX_DESC_LEN);
if (copy_to_user(uarg, &getnode, sizeof(struct digi_node)))
return -EFAULT;
diff --git a/drivers/staging/dgrp/dgrp_net_ops.c b/drivers/staging/dgrp/dgrp_net_ops.c
index 5b7833f593ff..33ac7fb88cbd 100644
--- a/drivers/staging/dgrp/dgrp_net_ops.c
+++ b/drivers/staging/dgrp/dgrp_net_ops.c
@@ -278,7 +278,7 @@ static void parity_scan(struct ch_struct *ch, unsigned char *cbuf,
switch (ch->ch_pscan_state) {
default:
/* reset to sanity and fall through */
- ch->ch_pscan_state = 0 ;
+ ch->ch_pscan_state = 0;
case 0:
/* No FF seen yet */
@@ -1607,7 +1607,7 @@ static int dgrp_send(struct nd_struct *nd, long tmax)
if ((ch->ch_pun.un_flag & UN_LOW) != 0 ?
(n <= TBUF_LOW) :
(ch->ch_pun.un_flag & UN_TIME) != 0 ?
- ((jiffies - ch->ch_waketime) >= 0) :
+ time_is_before_jiffies(ch->ch_waketime) :
(n == 0 && ch->ch_s_tpos == ch->ch_s_tin) &&
((ch->ch_pun.un_flag & UN_EMPTY) != 0 ||
((ch->ch_tun.un_open_count &&
@@ -3083,7 +3083,7 @@ check_query:
nd->nd_hw_ver = (b[8] << 8) | b[9];
nd->nd_sw_ver = (b[10] << 8) | b[11];
nd->nd_hw_id = b[6];
- desclen = ((plen - 12) > MAX_DESC_LEN) ? MAX_DESC_LEN :
+ desclen = (plen - 12 > MAX_DESC_LEN - 1) ? MAX_DESC_LEN - 1 :
plen - 12;
if (desclen <= 0) {
diff --git a/drivers/staging/dgrp/drp.h b/drivers/staging/dgrp/drp.h
index 84a1e7be4899..4024b488eba9 100644
--- a/drivers/staging/dgrp/drp.h
+++ b/drivers/staging/dgrp/drp.h
@@ -674,7 +674,7 @@ struct nd_struct {
ushort nd_hw_ver; /* HW version returned from PS */
ushort nd_sw_ver; /* SW version returned from PS */
uint nd_hw_id; /* HW ID returned from PS */
- u8 nd_ps_desc[MAX_DESC_LEN+1]; /* Description from PS */
+ u8 nd_ps_desc[MAX_DESC_LEN]; /* Description from PS */
uint nd_vpd_len; /* VPD len, if any */
u8 nd_vpd[VPDSIZE]; /* VPD, if any */
diff --git a/drivers/staging/dwc2/core.c b/drivers/staging/dwc2/core.c
index 3177db2380bf..e3a0e770301d 100644
--- a/drivers/staging/dwc2/core.c
+++ b/drivers/staging/dwc2/core.c
@@ -506,8 +506,7 @@ static void dwc2_config_fifos(struct dwc2_hsotg *hsotg)
struct dwc2_core_params *params = hsotg->core_params;
u32 rxfsiz, nptxfsiz, ptxfsiz, hptxfsiz, dfifocfg;
- if (!(hsotg->hwcfg2 & GHWCFG2_DYNAMIC_FIFO) ||
- !params->enable_dynamic_fifo)
+ if (!params->enable_dynamic_fifo)
return;
dev_dbg(hsotg->dev, "Total FIFO Size=%d\n", hsotg->total_fifo_size);
@@ -1146,16 +1145,10 @@ void dwc2_hc_cleanup(struct dwc2_hsotg *hsotg, struct dwc2_host_chan *chan)
static void dwc2_hc_set_even_odd_frame(struct dwc2_hsotg *hsotg,
struct dwc2_host_chan *chan, u32 *hcchar)
{
- u32 hfnum, frnum;
-
if (chan->ep_type == USB_ENDPOINT_XFER_INT ||
chan->ep_type == USB_ENDPOINT_XFER_ISOC) {
- hfnum = readl(hsotg->regs + HFNUM);
- frnum = hfnum >> HFNUM_FRNUM_SHIFT &
- HFNUM_FRNUM_MASK >> HFNUM_FRNUM_SHIFT;
-
/* 1 if _next_ frame is odd, 0 if it's even */
- if (frnum & 0x1)
+ if (!(dwc2_hcd_get_frame_number(hsotg) & 0x1))
*hcchar |= HCCHAR_ODDFRM;
}
}
@@ -1696,7 +1689,7 @@ u32 dwc2_calc_frame_interval(struct dwc2_hsotg *hsotg)
GHWCFG2_FS_PHY_TYPE_DEDICATED)
clock = 48;
- if ((hprt0 & HPRT0_SPD_MASK) == 0)
+ if ((hprt0 & HPRT0_SPD_MASK) == HPRT0_SPD_HIGH_SPEED)
/* High speed case */
return 125 * clock;
else
@@ -1815,8 +1808,6 @@ void dwc2_dump_global_registers(struct dwc2_hsotg *hsotg)
{
#ifdef DEBUG
u32 __iomem *addr;
- int i, ep_num;
- char *txfsiz;
dev_dbg(hsotg->dev, "Core Global Registers\n");
addr = hsotg->regs + GOTGCTL;
@@ -1892,23 +1883,6 @@ void dwc2_dump_global_registers(struct dwc2_hsotg *hsotg)
dev_dbg(hsotg->dev, "HPTXFSIZ @0x%08lX : 0x%08X\n",
(unsigned long)addr, readl(addr));
- if (hsotg->core_params->en_multiple_tx_fifo <= 0) {
- ep_num = hsotg->hwcfg4 >> GHWCFG4_NUM_DEV_PERIO_IN_EP_SHIFT &
- GHWCFG4_NUM_DEV_PERIO_IN_EP_MASK >>
- GHWCFG4_NUM_DEV_PERIO_IN_EP_SHIFT;
- txfsiz = "DPTXFSIZ";
- } else {
- ep_num = hsotg->hwcfg4 >> GHWCFG4_NUM_IN_EPS_SHIFT &
- GHWCFG4_NUM_IN_EPS_MASK >> GHWCFG4_NUM_IN_EPS_SHIFT;
- txfsiz = "DIENPTXF";
- }
-
- for (i = 0; i < ep_num; i++) {
- addr = hsotg->regs + DPTXFSIZN(i + 1);
- dev_dbg(hsotg->dev, "%s[%d] @0x%08lX : 0x%08X\n", txfsiz, i + 1,
- (unsigned long)addr, readl(addr));
- }
-
addr = hsotg->regs + PCGCTL;
dev_dbg(hsotg->dev, "PCGCTL @0x%08lX : 0x%08X\n",
(unsigned long)addr, readl(addr));
@@ -2298,7 +2272,7 @@ int dwc2_set_param_phy_type(struct dwc2_hsotg *hsotg, int val)
#ifndef NO_FS_PHY_HW_CHECKS
valid = 0;
#else
- val = 0;
+ val = DWC2_PHY_TYPE_PARAM_FS;
dev_dbg(hsotg->dev, "Setting phy_type to %d\n", val);
retval = -EINVAL;
#endif
@@ -2325,7 +2299,7 @@ int dwc2_set_param_phy_type(struct dwc2_hsotg *hsotg, int val)
dev_err(hsotg->dev,
"%d invalid for phy_type. Check HW configuration.\n",
val);
- val = 0;
+ val = DWC2_PHY_TYPE_PARAM_FS;
if (hs_phy_type != GHWCFG2_HS_PHY_TYPE_NOT_SUPPORTED) {
if (hs_phy_type == GHWCFG2_HS_PHY_TYPE_UTMI ||
hs_phy_type == GHWCFG2_HS_PHY_TYPE_UTMI_ULPI)
@@ -2360,8 +2334,8 @@ int dwc2_set_param_speed(struct dwc2_hsotg *hsotg, int val)
valid = 0;
}
- if (val == 0 && dwc2_get_param_phy_type(hsotg) ==
- DWC2_PHY_TYPE_PARAM_FS)
+ if (val == DWC2_SPEED_PARAM_HIGH &&
+ dwc2_get_param_phy_type(hsotg) == DWC2_PHY_TYPE_PARAM_FS)
valid = 0;
if (!valid) {
@@ -2370,7 +2344,7 @@ int dwc2_set_param_speed(struct dwc2_hsotg *hsotg, int val)
"%d invalid for speed parameter. Check HW configuration.\n",
val);
val = dwc2_get_param_phy_type(hsotg) == DWC2_PHY_TYPE_PARAM_FS ?
- 1 : 0;
+ DWC2_SPEED_PARAM_FULL : DWC2_SPEED_PARAM_HIGH;
dev_dbg(hsotg->dev, "Setting speed to %d\n", val);
retval = -EINVAL;
}
@@ -2668,7 +2642,7 @@ int dwc2_set_param_otg_ver(struct dwc2_hsotg *hsotg, int val)
* for the DWC_otg core. It returns non-0 if any parameters are invalid.
*/
int dwc2_set_parameters(struct dwc2_hsotg *hsotg,
- struct dwc2_core_params *params)
+ const struct dwc2_core_params *params)
{
int retval = 0;
diff --git a/drivers/staging/dwc2/core_intr.c b/drivers/staging/dwc2/core_intr.c
index 4c9ad14e90ec..98c51bba6622 100644
--- a/drivers/staging/dwc2/core_intr.c
+++ b/drivers/staging/dwc2/core_intr.c
@@ -403,8 +403,7 @@ static void dwc2_handle_usb_suspend_intr(struct dwc2_hsotg *hsotg)
#define GINTMSK_COMMON (GINTSTS_WKUPINT | GINTSTS_SESSREQINT | \
GINTSTS_CONIDSTSCHNG | GINTSTS_OTGINT | \
GINTSTS_MODEMIS | GINTSTS_DISCONNINT | \
- GINTSTS_USBSUSP | GINTSTS_RESTOREDONE | \
- GINTSTS_PRTINT)
+ GINTSTS_USBSUSP | GINTSTS_PRTINT)
/*
* This function returns the Core Interrupt register
@@ -450,7 +449,7 @@ irqreturn_t dwc2_handle_common_intr(int irq, void *dev)
{
struct dwc2_hsotg *hsotg = dev;
u32 gintsts;
- int retval = 0;
+ irqreturn_t retval = IRQ_NONE;
if (dwc2_check_core_status(hsotg) < 0) {
dev_warn(hsotg->dev, "Controller is disconnected\n");
@@ -461,7 +460,7 @@ irqreturn_t dwc2_handle_common_intr(int irq, void *dev)
gintsts = dwc2_read_common_intr(hsotg);
if (gintsts & ~GINTSTS_PRTINT)
- retval = 1;
+ retval = IRQ_HANDLED;
if (gintsts & GINTSTS_MODEMIS)
dwc2_handle_mode_mismatch_intr(hsotg);
@@ -478,12 +477,6 @@ irqreturn_t dwc2_handle_common_intr(int irq, void *dev)
if (gintsts & GINTSTS_USBSUSP)
dwc2_handle_usb_suspend_intr(hsotg);
- if (gintsts & GINTSTS_RESTOREDONE) {
- gintsts = GINTSTS_RESTOREDONE;
- writel(gintsts, hsotg->regs + GINTSTS);
- dev_dbg(hsotg->dev, " --Restore done interrupt received--\n");
- }
-
if (gintsts & GINTSTS_PRTINT) {
/*
* The port interrupt occurs while in device mode with HPRT0
@@ -500,6 +493,6 @@ irqreturn_t dwc2_handle_common_intr(int irq, void *dev)
spin_unlock(&hsotg->lock);
out:
- return IRQ_RETVAL(retval);
+ return retval;
}
EXPORT_SYMBOL_GPL(dwc2_handle_common_intr);
diff --git a/drivers/staging/dwc2/hcd.c b/drivers/staging/dwc2/hcd.c
index 8551ccedf037..2ed54b172a3b 100644
--- a/drivers/staging/dwc2/hcd.c
+++ b/drivers/staging/dwc2/hcd.c
@@ -1563,9 +1563,9 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg *hsotg, u16 typereq,
break;
case GetPortStatus:
- dev_dbg(hsotg->dev,
- "GetPortStatus wIndex=0x%04x flags=0x%08x\n", windex,
- hsotg->flags.d32);
+ dev_vdbg(hsotg->dev,
+ "GetPortStatus wIndex=0x%04x flags=0x%08x\n", windex,
+ hsotg->flags.d32);
if (!windex || windex > 1)
goto error;
@@ -1598,7 +1598,7 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg *hsotg, u16 typereq,
}
hprt0 = readl(hsotg->regs + HPRT0);
- dev_dbg(hsotg->dev, " HPRT0: 0x%08x\n", hprt0);
+ dev_vdbg(hsotg->dev, " HPRT0: 0x%08x\n", hprt0);
if (hprt0 & HPRT0_CONNSTS)
port_status |= USB_PORT_STAT_CONNECTION;
@@ -1623,7 +1623,7 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg *hsotg, u16 typereq,
port_status |= USB_PORT_STAT_TEST;
/* USB_PORT_FEAT_INDICATOR unsupported always 0 */
- dev_dbg(hsotg->dev, "port_status=%08x\n", port_status);
+ dev_vdbg(hsotg->dev, "port_status=%08x\n", port_status);
*(__le32 *)buf = cpu_to_le32(port_status);
break;
@@ -2533,9 +2533,8 @@ static void _dwc2_hcd_endpoint_reset(struct usb_hcd *hcd,
static irqreturn_t _dwc2_hcd_irq(struct usb_hcd *hcd)
{
struct dwc2_hsotg *hsotg = dwc2_hcd_to_hsotg(hcd);
- int retval = dwc2_hcd_intr(hsotg);
- return IRQ_RETVAL(retval);
+ return dwc2_handle_hcd_intr(hsotg);
}
/*
@@ -2702,7 +2701,7 @@ EXPORT_SYMBOL_GPL(dwc2_set_all_params);
* a negative error on failure.
*/
int dwc2_hcd_init(struct dwc2_hsotg *hsotg, int irq,
- struct dwc2_core_params *params)
+ const struct dwc2_core_params *params)
{
struct usb_hcd *hcd;
struct dwc2_host_chan *channel;
@@ -2919,7 +2918,7 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg, int irq,
* allocates the DMA buffer pool, registers the USB bus, requests the
* IRQ line, and calls hcd_start method.
*/
- retval = usb_add_hcd(hcd, irq, IRQF_SHARED | IRQF_DISABLED);
+ retval = usb_add_hcd(hcd, irq, IRQF_SHARED);
if (retval < 0)
goto error3;
diff --git a/drivers/staging/dwc2/hcd.h b/drivers/staging/dwc2/hcd.h
index d071f1a05df1..cf6c055aec8d 100644
--- a/drivers/staging/dwc2/hcd.h
+++ b/drivers/staging/dwc2/hcd.h
@@ -448,10 +448,10 @@ static inline u8 dwc2_hcd_is_pipe_out(struct dwc2_hcd_pipe_info *pipe)
}
extern int dwc2_hcd_init(struct dwc2_hsotg *hsotg, int irq,
- struct dwc2_core_params *params);
+ const struct dwc2_core_params *params);
extern void dwc2_hcd_remove(struct dwc2_hsotg *hsotg);
extern int dwc2_set_parameters(struct dwc2_hsotg *hsotg,
- struct dwc2_core_params *params);
+ const struct dwc2_core_params *params);
extern void dwc2_set_all_params(struct dwc2_core_params *params, int value);
/* Transaction Execution Functions */
@@ -646,14 +646,14 @@ extern void dwc2_hcd_save_data_toggle(struct dwc2_hsotg *hsotg,
/* HCD Core API */
/**
- * dwc2_hcd_intr() - Called on every hardware interrupt
+ * dwc2_handle_hcd_intr() - Called on every hardware interrupt
*
* @hsotg: The DWC2 HCD
*
- * Returns non zero if interrupt is handled
- * Return 0 if interrupt is not handled
+ * Returns IRQ_HANDLED if interrupt is handled
+ * Return IRQ_NONE if interrupt is not handled
*/
-extern int dwc2_hcd_intr(struct dwc2_hsotg *hsotg);
+extern irqreturn_t dwc2_handle_hcd_intr(struct dwc2_hsotg *hsotg);
/**
* dwc2_hcd_stop() - Halts the DWC_otg host mode operation
diff --git a/drivers/staging/dwc2/hcd_intr.c b/drivers/staging/dwc2/hcd_intr.c
index e24062f0a49e..e75dccb3b80b 100644
--- a/drivers/staging/dwc2/hcd_intr.c
+++ b/drivers/staging/dwc2/hcd_intr.c
@@ -115,16 +115,13 @@ static void dwc2_sof_intr(struct dwc2_hsotg *hsotg)
{
struct list_head *qh_entry;
struct dwc2_qh *qh;
- u32 hfnum;
enum dwc2_transaction_type tr_type;
#ifdef DEBUG_SOF
dev_vdbg(hsotg->dev, "--Start of Frame Interrupt--\n");
#endif
- hfnum = readl(hsotg->regs + HFNUM);
- hsotg->frame_number = hfnum >> HFNUM_FRNUM_SHIFT &
- HFNUM_FRNUM_MASK >> HFNUM_FRNUM_SHIFT;
+ hsotg->frame_number = dwc2_hcd_get_frame_number(hsotg);
dwc2_track_missed_sofs(hsotg);
@@ -244,6 +241,7 @@ static void dwc2_hprt0_enable(struct dwc2_hsotg *hsotg, u32 hprt0,
u32 usbcfg;
u32 prtspd;
u32 hcfg;
+ u32 fslspclksel;
u32 hfir;
dev_vdbg(hsotg->dev, "%s(%p)\n", __func__, hsotg);
@@ -275,6 +273,7 @@ static void dwc2_hprt0_enable(struct dwc2_hsotg *hsotg, u32 hprt0,
}
hcfg = readl(hsotg->regs + HCFG);
+ fslspclksel = hcfg & HCFG_FSLSPCLKSEL_MASK;
if (prtspd == HPRT0_SPD_LOW_SPEED &&
params->host_ls_low_power_phy_clk ==
@@ -282,8 +281,7 @@ static void dwc2_hprt0_enable(struct dwc2_hsotg *hsotg, u32 hprt0,
/* 6 MHZ */
dev_vdbg(hsotg->dev,
"FS_PHY programming HCFG to 6 MHz\n");
- if ((hcfg & HCFG_FSLSPCLKSEL_MASK) !=
- HCFG_FSLSPCLKSEL_6_MHZ) {
+ if (fslspclksel != HCFG_FSLSPCLKSEL_6_MHZ) {
hcfg &= ~HCFG_FSLSPCLKSEL_MASK;
hcfg |= HCFG_FSLSPCLKSEL_6_MHZ;
writel(hcfg, hsotg->regs + HCFG);
@@ -293,8 +291,7 @@ static void dwc2_hprt0_enable(struct dwc2_hsotg *hsotg, u32 hprt0,
/* 48 MHZ */
dev_vdbg(hsotg->dev,
"FS_PHY programming HCFG to 48 MHz\n");
- if ((hcfg & HCFG_FSLSPCLKSEL_MASK) !=
- HCFG_FSLSPCLKSEL_48_MHZ) {
+ if (fslspclksel != HCFG_FSLSPCLKSEL_48_MHZ) {
hcfg &= ~HCFG_FSLSPCLKSEL_MASK;
hcfg |= HCFG_FSLSPCLKSEL_48_MHZ;
writel(hcfg, hsotg->regs + HCFG);
@@ -2060,14 +2057,14 @@ static void dwc2_hc_intr(struct dwc2_hsotg *hsotg)
}
/* This function handles interrupts for the HCD */
-int dwc2_hcd_intr(struct dwc2_hsotg *hsotg)
+irqreturn_t dwc2_handle_hcd_intr(struct dwc2_hsotg *hsotg)
{
u32 gintsts, dbg_gintsts;
- int retval = 0;
+ irqreturn_t retval = IRQ_NONE;
if (dwc2_check_core_status(hsotg) < 0) {
dev_warn(hsotg->dev, "Controller is disconnected\n");
- return 0;
+ return retval;
}
spin_lock(&hsotg->lock);
@@ -2077,10 +2074,10 @@ int dwc2_hcd_intr(struct dwc2_hsotg *hsotg)
gintsts = dwc2_read_core_intr(hsotg);
if (!gintsts) {
spin_unlock(&hsotg->lock);
- return 0;
+ return retval;
}
- retval = 1;
+ retval = IRQ_HANDLED;
dbg_gintsts = gintsts;
#ifndef DEBUG_SOF
@@ -2102,9 +2099,6 @@ int dwc2_hcd_intr(struct dwc2_hsotg *hsotg)
dwc2_rx_fifo_level_intr(hsotg);
if (gintsts & GINTSTS_NPTXFEMP)
dwc2_np_tx_fifo_empty_intr(hsotg);
- if (gintsts & GINTSTS_I2CINT)
- /* Todo: Implement i2cintr handler */
- writel(GINTSTS_I2CINT, hsotg->regs + GINTSTS);
if (gintsts & GINTSTS_PRTINT)
dwc2_port_intr(hsotg);
if (gintsts & GINTSTS_HCHINT)
diff --git a/drivers/staging/dwc2/pci.c b/drivers/staging/dwc2/pci.c
index 69c65eb8683f..3ca54d6782fd 100644
--- a/drivers/staging/dwc2/pci.c
+++ b/drivers/staging/dwc2/pci.c
@@ -59,7 +59,7 @@
static const char dwc2_driver_name[] = "dwc2";
-static struct dwc2_core_params dwc2_module_params = {
+static const struct dwc2_core_params dwc2_module_params = {
.otg_cap = -1,
.otg_ver = -1,
.dma_enable = -1,
@@ -101,8 +101,6 @@ static void dwc2_driver_remove(struct pci_dev *dev)
{
struct dwc2_hsotg *hsotg = pci_get_drvdata(dev);
- dev_dbg(&dev->dev, "%s(%p)\n", __func__, dev);
-
dwc2_hcd_remove(hsotg);
pci_disable_device(dev);
}
@@ -125,18 +123,14 @@ static int dwc2_driver_probe(struct pci_dev *dev,
struct dwc2_hsotg *hsotg;
int retval;
- dev_dbg(&dev->dev, "%s(%p)\n", __func__, dev);
-
hsotg = devm_kzalloc(&dev->dev, sizeof(*hsotg), GFP_KERNEL);
if (!hsotg)
return -ENOMEM;
- pci_set_power_state(dev, PCI_D0);
-
hsotg->dev = &dev->dev;
- hsotg->regs = devm_request_and_ioremap(&dev->dev, &dev->resource[0]);
- if (!hsotg->regs)
- return -ENOMEM;
+ hsotg->regs = devm_ioremap_resource(&dev->dev, &dev->resource[0]);
+ if (IS_ERR(hsotg->regs))
+ return PTR_ERR(hsotg->regs);
dev_dbg(&dev->dev, "mapped PA %08lx to VA %p\n",
(unsigned long)pci_resource_start(dev, 0), hsotg->regs);
@@ -153,7 +147,6 @@ static int dwc2_driver_probe(struct pci_dev *dev,
}
pci_set_drvdata(dev, hsotg);
- dev_dbg(&dev->dev, "hsotg=%p\n", hsotg);
return retval;
}
@@ -162,6 +155,10 @@ static DEFINE_PCI_DEVICE_TABLE(dwc2_pci_ids) = {
{
PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, PCI_PRODUCT_ID_HAPS_HSOTG),
},
+ {
+ PCI_DEVICE(PCI_VENDOR_ID_STMICRO,
+ PCI_DEVICE_ID_STMICRO_USB_OTG),
+ },
{ /* end: all zeroes */ }
};
MODULE_DEVICE_TABLE(pci, dwc2_pci_ids);
diff --git a/drivers/staging/echo/echo.c b/drivers/staging/echo/echo.c
index 5882139d49af..9597e9523cac 100644
--- a/drivers/staging/echo/echo.c
+++ b/drivers/staging/echo/echo.c
@@ -267,13 +267,13 @@ struct oslec_state *oslec_create(int len, int adaption_mode)
goto error_snap;
ec->cond_met = 0;
- ec->Pstates = 0;
- ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
- ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
+ ec->pstates = 0;
+ ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0;
+ ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0;
ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
- ec->Lbgn = ec->Lbgn_acc = 0;
- ec->Lbgn_upper = 200;
- ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
+ ec->lbgn = ec->lbgn_acc = 0;
+ ec->lbgn_upper = 200;
+ ec->lbgn_upper_acc = ec->lbgn_upper << 13;
return ec;
@@ -314,13 +314,13 @@ void oslec_flush(struct oslec_state *ec)
{
int i;
- ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
- ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
+ ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0;
+ ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0;
ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
- ec->Lbgn = ec->Lbgn_acc = 0;
- ec->Lbgn_upper = 200;
- ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
+ ec->lbgn = ec->lbgn_acc = 0;
+ ec->lbgn_upper = 200;
+ ec->lbgn_upper_acc = ec->lbgn_upper << 13;
ec->nonupdate_dwell = 0;
@@ -332,7 +332,7 @@ void oslec_flush(struct oslec_state *ec)
memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
ec->curr_pos = ec->taps - 1;
- ec->Pstates = 0;
+ ec->pstates = 0;
}
EXPORT_SYMBOL_GPL(oslec_flush);
@@ -418,33 +418,33 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
new = (int)tx * (int)tx;
old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
(int)ec->fir_state.history[ec->fir_state.curr_pos];
- ec->Pstates +=
+ ec->pstates +=
((new - old) + (1 << (ec->log2taps - 1))) >> ec->log2taps;
- if (ec->Pstates < 0)
- ec->Pstates = 0;
+ if (ec->pstates < 0)
+ ec->pstates = 0;
}
/* Calculate short term average levels using simple single pole IIRs */
- ec->Ltxacc += abs(tx) - ec->Ltx;
- ec->Ltx = (ec->Ltxacc + (1 << 4)) >> 5;
- ec->Lrxacc += abs(rx) - ec->Lrx;
- ec->Lrx = (ec->Lrxacc + (1 << 4)) >> 5;
+ ec->ltxacc += abs(tx) - ec->ltx;
+ ec->ltx = (ec->ltxacc + (1 << 4)) >> 5;
+ ec->lrxacc += abs(rx) - ec->lrx;
+ ec->lrx = (ec->lrxacc + (1 << 4)) >> 5;
/* Foreground filter */
ec->fir_state.coeffs = ec->fir_taps16[0];
echo_value = fir16(&ec->fir_state, tx);
ec->clean = rx - echo_value;
- ec->Lcleanacc += abs(ec->clean) - ec->Lclean;
- ec->Lclean = (ec->Lcleanacc + (1 << 4)) >> 5;
+ ec->lcleanacc += abs(ec->clean) - ec->lclean;
+ ec->lclean = (ec->lcleanacc + (1 << 4)) >> 5;
/* Background filter */
echo_value = fir16(&ec->fir_state_bg, tx);
clean_bg = rx - echo_value;
- ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg;
- ec->Lclean_bg = (ec->Lclean_bgacc + (1 << 4)) >> 5;
+ ec->lclean_bgacc += abs(clean_bg) - ec->lclean_bg;
+ ec->lclean_bg = (ec->lclean_bgacc + (1 << 4)) >> 5;
/* Background Filter adaption */
@@ -455,7 +455,7 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
ec->factor = 0;
ec->shift = 0;
if ((ec->nonupdate_dwell == 0)) {
- int P, logP, shift;
+ int p, logp, shift;
/* Determine:
@@ -490,9 +490,9 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
for a divide versus a top_bit() implementation.
*/
- P = MIN_TX_POWER_FOR_ADAPTION + ec->Pstates;
- logP = top_bit(P) + ec->log2taps;
- shift = 30 - 2 - logP;
+ p = MIN_TX_POWER_FOR_ADAPTION + ec->pstates;
+ logp = top_bit(p) + ec->log2taps;
+ shift = 30 - 2 - logp;
ec->shift = shift;
lms_adapt_bg(ec, clean_bg, shift);
@@ -502,7 +502,7 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
near end speech */
ec->adapt = 0;
- if ((ec->Lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->Lrx > ec->Ltx))
+ if ((ec->lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->lrx > ec->ltx))
ec->nonupdate_dwell = DTD_HANGOVER;
if (ec->nonupdate_dwell)
ec->nonupdate_dwell--;
@@ -515,9 +515,9 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
(ec->nonupdate_dwell == 0) &&
/* (ec->Lclean_bg < 0.875*ec->Lclean) */
- (8 * ec->Lclean_bg < 7 * ec->Lclean) &&
+ (8 * ec->lclean_bg < 7 * ec->lclean) &&
/* (ec->Lclean_bg < 0.125*ec->Ltx) */
- (8 * ec->Lclean_bg < ec->Ltx)) {
+ (8 * ec->lclean_bg < ec->ltx)) {
if (ec->cond_met == 6) {
/*
* BG filter has had better results for 6 consecutive
@@ -541,14 +541,14 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
* non-linearity in the channel.".
*/
- if ((16 * ec->Lclean < ec->Ltx)) {
+ if ((16 * ec->lclean < ec->ltx)) {
/*
* Our e/c has improved echo by at least 24 dB (each
* factor of 2 is 6dB, so 2*2*2*2=16 is the same as
* 6+6+6+6=24dB)
*/
if (ec->adaption_mode & ECHO_CAN_USE_CNG) {
- ec->cng_level = ec->Lbgn;
+ ec->cng_level = ec->lbgn;
/*
* Very elementary comfort noise generation.
@@ -571,10 +571,10 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
} else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) {
/* This sounds much better than CNG */
- if (ec->clean_nlp > ec->Lbgn)
- ec->clean_nlp = ec->Lbgn;
- if (ec->clean_nlp < -ec->Lbgn)
- ec->clean_nlp = -ec->Lbgn;
+ if (ec->clean_nlp > ec->lbgn)
+ ec->clean_nlp = ec->lbgn;
+ if (ec->clean_nlp < -ec->lbgn)
+ ec->clean_nlp = -ec->lbgn;
} else {
/*
* just mute the residual, doesn't sound very
@@ -593,9 +593,9 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
* level signals like near end speech. When combined
* with CNG or especially CLIP seems to work OK.
*/
- if (ec->Lclean < 40) {
- ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn;
- ec->Lbgn = (ec->Lbgn_acc + (1 << 11)) >> 12;
+ if (ec->lclean < 40) {
+ ec->lbgn_acc += abs(ec->clean) - ec->lbgn;
+ ec->lbgn = (ec->lbgn_acc + (1 << 11)) >> 12;
}
}
}
diff --git a/drivers/staging/echo/echo.h b/drivers/staging/echo/echo.h
index 32ca9dedeca4..9b08c63e6369 100644
--- a/drivers/staging/echo/echo.h
+++ b/drivers/staging/echo/echo.h
@@ -139,24 +139,24 @@ struct oslec_state {
int adaption_mode;
int cond_met;
- int32_t Pstates;
+ int32_t pstates;
int16_t adapt;
int32_t factor;
int16_t shift;
/* Average levels and averaging filter states */
- int Ltxacc;
- int Lrxacc;
- int Lcleanacc;
- int Lclean_bgacc;
- int Ltx;
- int Lrx;
- int Lclean;
- int Lclean_bg;
- int Lbgn;
- int Lbgn_acc;
- int Lbgn_upper;
- int Lbgn_upper_acc;
+ int ltxacc;
+ int lrxacc;
+ int lcleanacc;
+ int lclean_bgacc;
+ int ltx;
+ int lrx;
+ int lclean;
+ int lclean_bg;
+ int lbgn;
+ int lbgn_acc;
+ int lbgn_upper;
+ int lbgn_upper_acc;
/* foreground and background filter states */
struct fir16_state_t fir_state;
diff --git a/drivers/staging/frontier/alphatrack.c b/drivers/staging/frontier/alphatrack.c
index ea9362d7e589..5590ebf1da15 100644
--- a/drivers/staging/frontier/alphatrack.c
+++ b/drivers/staging/frontier/alphatrack.c
@@ -24,13 +24,14 @@
* raw interrupt reports.
*/
-/* Note: this currently uses a dumb ringbuffer for reads and writes.
+/*
+ * Note: this currently uses a dumb ringbuffer for reads and writes.
* A more optimal driver would cache and kill off outstanding urbs that are
* now invalid, and ignore ones that already were in the queue but valid
* as we only have 30 commands for the alphatrack. In particular this is
* key for getting lights to flash in time as otherwise many commands
* can be buffered up before the light change makes it to the interface.
-*/
+ */
#include <linux/kernel.h>
#include <linux/errno.h>
@@ -100,7 +101,8 @@ static int debug = ALPHATRACK_DEBUG;
module_param(debug, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(debug, "Debug enabled or not");
-/* All interrupt in transfers are collected in a ring buffer to
+/*
+ * All interrupt in transfers are collected in a ring buffer to
* avoid racing conditions and get better performance of the driver.
*/
@@ -109,8 +111,7 @@ static int ring_buffer_size = RING_BUFFER_SIZE;
module_param(ring_buffer_size, int, S_IRUGO);
MODULE_PARM_DESC(ring_buffer_size, "Read ring buffer size");
-/* The write_buffer can one day contain more than one interrupt out transfer.
- */
+/* The write_buffer can one day contain more than one interrupt out transfer.*/
static int write_buffer_size = WRITE_BUFFER_SIZE;
module_param(write_buffer_size, int, S_IRUGO);
@@ -199,9 +200,7 @@ static void usb_alphatrack_abort_transfers(struct usb_alphatrack *dev)
usb_kill_urb(dev->interrupt_out_urb);
}
-/**
- * usb_alphatrack_delete
- */
+/** usb_alphatrack_delete */
static void usb_alphatrack_delete(struct usb_alphatrack *dev)
{
usb_alphatrack_abort_transfers(dev);
@@ -213,9 +212,7 @@ static void usb_alphatrack_delete(struct usb_alphatrack *dev)
kfree(dev); /* fixme oldi_buffer */
}
-/**
- * usb_alphatrack_interrupt_in_callback
- */
+/** usb_alphatrack_interrupt_in_callback */
static void usb_alphatrack_interrupt_in_callback(struct urb *urb)
{
@@ -296,9 +293,7 @@ exit:
wake_up_interruptible(&dev->read_wait);
}
-/**
- * usb_alphatrack_interrupt_out_callback
- */
+/** usb_alphatrack_interrupt_out_callback */
static void usb_alphatrack_interrupt_out_callback(struct urb *urb)
{
struct usb_alphatrack *dev = urb->context;
@@ -315,9 +310,7 @@ static void usb_alphatrack_interrupt_out_callback(struct urb *urb)
wake_up_interruptible(&dev->write_wait);
}
-/**
- * usb_alphatrack_open
- */
+/** usb_alphatrack_open */
static int usb_alphatrack_open(struct inode *inode, struct file *file)
{
struct usb_alphatrack *dev;
@@ -398,9 +391,7 @@ unlock_disconnect_exit:
return retval;
}
-/**
- * usb_alphatrack_release
- */
+/** usb_alphatrack_release */
static int usb_alphatrack_release(struct inode *inode, struct file *file)
{
struct usb_alphatrack *dev;
@@ -447,9 +438,7 @@ exit:
return retval;
}
-/**
- * usb_alphatrack_poll
- */
+/** usb_alphatrack_poll */
static unsigned int usb_alphatrack_poll(struct file *file, poll_table *wait)
{
struct usb_alphatrack *dev;
@@ -468,9 +457,7 @@ static unsigned int usb_alphatrack_poll(struct file *file, poll_table *wait)
return mask;
}
-/**
- * usb_alphatrack_read
- */
+/** usb_alphatrack_read */
static ssize_t usb_alphatrack_read(struct file *file, char __user *buffer,
size_t count, loff_t *ppos)
{
@@ -539,9 +526,7 @@ exit:
return retval;
}
-/**
- * usb_alphatrack_write
- */
+/** usb_alphatrack_write */
static ssize_t usb_alphatrack_write(struct file *file,
const char __user *buffer, size_t count,
loff_t *ppos)
@@ -601,7 +586,7 @@ static ssize_t usb_alphatrack_write(struct file *file,
}
if (dev->interrupt_out_endpoint == NULL) {
- dev_err(&dev->intf->dev, "Endpoint should not be be null!\n");
+ dev_err(&dev->intf->dev, "Endpoint should not be null!\n");
goto unlock_exit;
}
@@ -718,8 +703,10 @@ static int usb_alphatrack_probe(struct usb_interface *intf,
true_size = min(ring_buffer_size, RING_BUFFER_SIZE);
- /* FIXME - there are more usb_alloc routines for dma correctness.
- Needed? */
+ /*
+ * FIXME - there are more usb_alloc routines for dma correctness.
+ * Needed?
+ */
dev->ring_buffer = kmalloc_array(true_size,
sizeof(struct alphatrack_icmd),
GFP_KERNEL);
diff --git a/drivers/staging/frontier/alphatrack.h b/drivers/staging/frontier/alphatrack.h
index 10a797263594..418c6053c027 100644
--- a/drivers/staging/frontier/alphatrack.h
+++ b/drivers/staging/frontier/alphatrack.h
@@ -6,7 +6,8 @@ struct alphatrack_ocmd {
unsigned char cmd[8];
};
-/* These are unused by the present driver but provide documentation for the
+/*
+ * These are unused by the present driver but provide documentation for the
* userspace API.
*/
enum LightID {
@@ -58,7 +59,8 @@ enum LightID {
#define BUTTONMASK_PRESS2 0x00008010
#define BUTTONMASK_PRESS3 0x00002020
-/* last 3 bytes are the slider position
+/*
+ * last 3 bytes are the slider position
* 40 is the actual slider moving, the most sig bits, and 3 lsb
*/
diff --git a/drivers/staging/frontier/tranzport.c b/drivers/staging/frontier/tranzport.c
index 04b5e66d9861..6cbf9c7c1d38 100644
--- a/drivers/staging/frontier/tranzport.c
+++ b/drivers/staging/frontier/tranzport.c
@@ -86,7 +86,8 @@ static int debug = TRANZPORT_DEBUG;
module_param(debug, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(debug, "Debug enabled or not");
-/* All interrupt in transfers are collected in a ring buffer to
+/*
+ * All interrupt in transfers are collected in a ring buffer to
* avoid racing conditions and get better performance of the driver.
*/
@@ -95,7 +96,8 @@ static int ring_buffer_size = RING_BUFFER_SIZE;
module_param(ring_buffer_size, int, S_IRUGO);
MODULE_PARM_DESC(ring_buffer_size, "Read ring buffer size in reports");
-/* The write_buffer can one day contain more than one interrupt out transfer.
+/*
+ * The write_buffer can one day contain more than one interrupt out transfer.
*/
static int write_buffer_size = WRITE_BUFFER_SIZE;
module_param(write_buffer_size, int, S_IRUGO);
@@ -565,9 +567,9 @@ static ssize_t usb_tranzport_read(struct file *file, char __user *buffer,
newwheel = (*dev->ring_buffer)[next_tail].cmd[6];
oldwheel = (*dev->ring_buffer)[dev->ring_tail].cmd[6];
/* if both are wheel events, and
- no buttons have changes (FIXME, do I have to check?),
- and we are the same sign, we can compress +- 7F
- */
+ * no buttons have changes (FIXME, do I have to check?),
+ * and we are the same sign, we can compress +- 7F
+ */
dbg_info(&dev->intf->dev,
"%s: trying to compress: "
"%02x%02x%02x%02x%02x%02x%02x%02x\n",
@@ -729,7 +731,7 @@ static ssize_t usb_tranzport_write(struct file *file,
}
if (dev->interrupt_out_endpoint == NULL) {
- dev_err(&dev->intf->dev, "Endpoint should not be be null!\n");
+ dev_err(&dev->intf->dev, "Endpoint should not be null!\n");
goto unlock_exit;
}
@@ -842,8 +844,10 @@ static int usb_tranzport_probe(struct usb_interface *intf,
ring_buffer_size = RING_BUFFER_SIZE;
true_size = min(ring_buffer_size, RING_BUFFER_SIZE);
- /* FIXME - there are more usb_alloc routines for dma correctness.
- Needed? */
+ /*
+ * FIXME - there are more usb_alloc routines for dma correctness.
+ * Needed?
+ */
dev->ring_buffer =
kmalloc((true_size * sizeof(struct tranzport_cmd)) + 8, GFP_KERNEL);
diff --git a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_dnld.c b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_dnld.c
index 47cc365c630b..6311b2ff5816 100644
--- a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_dnld.c
+++ b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_dnld.c
@@ -132,16 +132,16 @@ void card_bootload(struct net_device *dev)
pdata = (u32 *) bootimage;
size = sizeof(bootimage);
- // check for odd word
- if (size & 0x0003) {
+ /* check for odd word */
+ if (size & 0x0003)
size += 4;
- }
- // Provide mutual exclusive access while reading ASIC registers.
+
+ /* Provide mutual exclusive access while reading ASIC registers. */
spin_lock_irqsave(&info->dpram_lock, flags);
- // need to set i/o base address initially and hardware will autoincrement
+ /* need to set i/o base address initially and hardware will autoincrement */
ft1000_write_reg(dev, FT1000_REG_DPRAM_ADDR, FT1000_DPRAM_BASE);
- // write bytes
+ /* write bytes */
for (i = 0; i < (size >> 2); i++) {
templong = *pdata++;
outl(templong, dev->base_addr + FT1000_REG_MAG_DPDATA);
@@ -345,11 +345,10 @@ int card_download(struct net_device *dev, const u8 *pFileStart,
handshake = get_handshake(dev, HANDSHAKE_DSP_BL_READY);
- if (handshake == HANDSHAKE_DSP_BL_READY) {
+ if (handshake == HANDSHAKE_DSP_BL_READY)
put_handshake(dev, HANDSHAKE_DRIVER_READY);
- } else {
+ else
Status = FAILURE;
- }
uiState = STATE_BOOT_DWNLD;
@@ -391,7 +390,7 @@ int card_download(struct net_device *dev, const u8 *pFileStart,
Status = FAILURE;
break;
}
- // Provide mutual exclusive access while reading ASIC registers.
+ /* Provide mutual exclusive access while reading ASIC registers. */
spin_lock_irqsave(&info->dpram_lock,
flags);
/*
@@ -505,15 +504,15 @@ int card_download(struct net_device *dev, const u8 *pFileStart,
break;
case REQUEST_MAILBOX_DATA:
- // Convert length from byte count to word count. Make sure we round up.
+ /* Convert length from byte count to word count. Make sure we round up. */
word_length =
(long)(info->DSPInfoBlklen + 1) / 2;
put_request_value(dev, word_length);
pMailBoxData =
- (struct drv_msg *) & info->DSPInfoBlk[0];
+ (struct drv_msg *) &info->DSPInfoBlk[0];
pUsData =
- (u16 *) & pMailBoxData->data[0];
- // Provide mutual exclusive access while reading ASIC registers.
+ (u16 *) &pMailBoxData->data[0];
+ /* Provide mutual exclusive access while reading ASIC registers. */
spin_lock_irqsave(&info->dpram_lock,
flags);
if (file_version == 5) {
@@ -538,9 +537,9 @@ int card_download(struct net_device *dev, const u8 *pFileStart,
outw(DWNLD_MAG_PS_HDR_LOC,
dev->base_addr +
FT1000_REG_DPRAM_ADDR);
- if (word_length & 0x01) {
+ if (word_length & 0x01)
word_length++;
- }
+
word_length = word_length / 2;
for (; word_length > 0; word_length--) { /* In words */
@@ -565,7 +564,7 @@ int card_download(struct net_device *dev, const u8 *pFileStart,
(u16 *) ((long)pFileStart +
pFileHdr5->
version_data_offset);
- // Provide mutual exclusive access while reading ASIC registers.
+ /* Provide mutual exclusive access while reading ASIC registers. */
spin_lock_irqsave(&info->dpram_lock,
flags);
/*
@@ -692,7 +691,7 @@ int card_download(struct net_device *dev, const u8 *pFileStart,
if (pHdr->portdest == 0x80 /* DspOAM */
&& (pHdr->portsrc == 0x00 /* Driver */
- || pHdr->portsrc == 0x10 /* FMM */ )) {
+ || pHdr->portsrc == 0x10 /* FMM */)) {
uiState = STATE_SECTION_PROV;
} else {
DEBUG(1,
@@ -711,13 +710,13 @@ int card_download(struct net_device *dev, const u8 *pFileStart,
pHdr = (struct pseudo_hdr *) pUcFile;
if (pHdr->checksum == hdr_checksum(pHdr)) {
- if (pHdr->portdest != 0x80 /* Dsp OAM */ ) {
+ if (pHdr->portdest != 0x80 /* Dsp OAM */) {
uiState = STATE_DONE_PROV;
break;
}
usHdrLength = ntohs(pHdr->length); /* Byte length for PROV records */
- // Get buffer for provisioning data
+ /* Get buffer for provisioning data */
pbuffer =
kmalloc((usHdrLength + sizeof(struct pseudo_hdr)),
GFP_ATOMIC);
@@ -725,7 +724,7 @@ int card_download(struct net_device *dev, const u8 *pFileStart,
memcpy(pbuffer, (void *)pUcFile,
(u32) (usHdrLength +
sizeof(struct pseudo_hdr)));
- // link provisioning data
+ /* link provisioning data */
pprov_record =
kmalloc(sizeof(struct prov_record),
GFP_ATOMIC);
@@ -735,7 +734,7 @@ int card_download(struct net_device *dev, const u8 *pFileStart,
list_add_tail(&pprov_record->
list,
&info->prov_list);
- // Move to next entry if available
+ /* Move to next entry if available */
pUcFile =
(u8 *) ((unsigned long) pUcFile +
(unsigned long) ((usHdrLength + 1) & 0xFFFFFFFE) + sizeof(struct pseudo_hdr));
diff --git a/drivers/staging/ft1000/ft1000-usb/ft1000_debug.c b/drivers/staging/ft1000/ft1000-usb/ft1000_debug.c
index 3251d2e073b5..68a55ce69200 100644
--- a/drivers/staging/ft1000/ft1000-usb/ft1000_debug.c
+++ b/drivers/staging/ft1000/ft1000-usb/ft1000_debug.c
@@ -1,29 +1,31 @@
-//---------------------------------------------------------------------------
-// FT1000 driver for Flarion Flash OFDM NIC Device
-//
-// Copyright (C) 2006 Flarion Technologies, All rights reserved.
-//
-// This program is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License as published by the Free
-// Software Foundation; either version 2 of the License, or (at your option) any
-// later version. This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-// more details. You should have received a copy of the GNU General Public
-// License along with this program; if not, write to the
-// Free Software Foundation, Inc., 59 Temple Place -
-// Suite 330, Boston, MA 02111-1307, USA.
-//---------------------------------------------------------------------------
-//
-// File: ft1000_chdev.c
-//
-// Description: Custom character device dispatch routines.
-//
-// History:
-// 8/29/02 Whc Ported to Linux.
-// 6/05/06 Whc Porting to Linux 2.6.9
-//
-//---------------------------------------------------------------------------
+/*
+*---------------------------------------------------------------------------
+* FT1000 driver for Flarion Flash OFDM NIC Device
+*
+* Copyright (C) 2006 Flarion Technologies, All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License as published by the Free
+* Software Foundation; either version 2 of the License, or (at your option) any
+* later version. This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+* more details. You should have received a copy of the GNU General Public
+* License along with this program; if not, write to the
+* Free Software Foundation, Inc., 59 Temple Place -
+* Suite 330, Boston, MA 02111-1307, USA.
+*---------------------------------------------------------------------------
+*
+* File: ft1000_chdev.c
+*
+* Description: Custom character device dispatch routines.
+*
+* History:
+* 8/29/02 Whc Ported to Linux.
+* 6/05/06 Whc Porting to Linux 2.6.9
+*
+*---------------------------------------------------------------------------
+*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -38,25 +40,24 @@
static int ft1000_flarion_cnt = 0;
-static int ft1000_open (struct inode *inode, struct file *file);
+static int ft1000_open(struct inode *inode, struct file *file);
static unsigned int ft1000_poll_dev(struct file *file, poll_table *wait);
static long ft1000_ioctl(struct file *file, unsigned int command,
unsigned long argument);
-static int ft1000_release (struct inode *inode, struct file *file);
+static int ft1000_release(struct inode *inode, struct file *file);
-// List to free receive command buffer pool
+/* List to free receive command buffer pool */
struct list_head freercvpool;
-// lock to arbitrate free buffer list for receive command data
+/* lock to arbitrate free buffer list for receive command data */
spinlock_t free_buff_lock;
int numofmsgbuf = 0;
-//
-// Table of entry-point routines for char device
-//
-static const struct file_operations ft1000fops =
-{
+/*
+* Table of entry-point routines for char device
+*/
+static const struct file_operations ft1000fops = {
.unlocked_ioctl = ft1000_ioctl,
.poll = ft1000_poll_dev,
.open = ft1000_open,
@@ -64,34 +65,35 @@ static const struct file_operations ft1000fops =
.llseek = no_llseek,
};
-//---------------------------------------------------------------------------
-// Function: ft1000_get_buffer
-//
-// Parameters:
-//
-// Returns:
-//
-// Description:
-//
-// Notes:
-//
-//---------------------------------------------------------------------------
+/*
+---------------------------------------------------------------------------
+* Function: ft1000_get_buffer
+*
+* Parameters:
+*
+* Returns:
+*
+* Description:
+*
+* Notes:
+*
+*---------------------------------------------------------------------------
+*/
struct dpram_blk *ft1000_get_buffer(struct list_head *bufflist)
{
unsigned long flags;
struct dpram_blk *ptr;
spin_lock_irqsave(&free_buff_lock, flags);
- // Check if buffer is available
- if ( list_empty(bufflist) ) {
+ /* Check if buffer is available */
+ if (list_empty(bufflist)) {
DEBUG("ft1000_get_buffer: No more buffer - %d\n", numofmsgbuf);
ptr = NULL;
- }
- else {
+ } else {
numofmsgbuf--;
ptr = list_entry(bufflist->next, struct dpram_blk, list);
list_del(&ptr->list);
- //DEBUG("ft1000_get_buffer: number of free msg buffers = %d\n", numofmsgbuf);
+ /* DEBUG("ft1000_get_buffer: number of free msg buffers = %d\n", numofmsgbuf); */
}
spin_unlock_irqrestore(&free_buff_lock, flags);
@@ -101,42 +103,46 @@ struct dpram_blk *ft1000_get_buffer(struct list_head *bufflist)
-//---------------------------------------------------------------------------
-// Function: ft1000_free_buffer
-//
-// Parameters:
-//
-// Returns:
-//
-// Description:
-//
-// Notes:
-//
-//---------------------------------------------------------------------------
+/*
+*---------------------------------------------------------------------------
+* Function: ft1000_free_buffer
+*
+* Parameters:
+*
+* Returns:
+*
+* Description:
+*
+* Notes:
+*
+*---------------------------------------------------------------------------
+*/
void ft1000_free_buffer(struct dpram_blk *pdpram_blk, struct list_head *plist)
{
unsigned long flags;
spin_lock_irqsave(&free_buff_lock, flags);
- // Put memory back to list
+ /* Put memory back to list */
list_add_tail(&pdpram_blk->list, plist);
numofmsgbuf++;
- //DEBUG("ft1000_free_buffer: number of free msg buffers = %d\n", numofmsgbuf);
+ /*DEBUG("ft1000_free_buffer: number of free msg buffers = %d\n", numofmsgbuf); */
spin_unlock_irqrestore(&free_buff_lock, flags);
}
-//---------------------------------------------------------------------------
-// Function: ft1000_CreateDevice
-//
-// Parameters: dev - pointer to adapter object
-//
-// Returns: 0 if successful
-//
-// Description: Creates a private char device.
-//
-// Notes: Only called by init_module().
-//
-//---------------------------------------------------------------------------
+/*
+*---------------------------------------------------------------------------
+* Function: ft1000_CreateDevice
+*
+* Parameters: dev - pointer to adapter object
+*
+* Returns: 0 if successful
+*
+* Description: Creates a private char device.
+*
+* Notes: Only called by init_module().
+*
+*---------------------------------------------------------------------------
+*/
int ft1000_create_dev(struct ft1000_usb *dev)
{
int result;
@@ -144,20 +150,19 @@ int ft1000_create_dev(struct ft1000_usb *dev)
struct dentry *dir, *file;
struct ft1000_debug_dirs *tmp;
- // make a new device name
+ /* make a new device name */
sprintf(dev->DeviceName, "%s%d", "FT1000_", dev->CardNumber);
DEBUG("%s: number of instance = %d\n", __func__, ft1000_flarion_cnt);
DEBUG("DeviceCreated = %x\n", dev->DeviceCreated);
- if (dev->DeviceCreated)
- {
+ if (dev->DeviceCreated) {
DEBUG("%s: \"%s\" already registered\n", __func__, dev->DeviceName);
return -EIO;
}
- // register the device
+ /* register the device */
DEBUG("%s: \"%s\" debugfs device registration\n", __func__, dev->DeviceName);
tmp = kmalloc(sizeof(struct ft1000_debug_dirs), GFP_KERNEL);
@@ -186,7 +191,7 @@ int ft1000_create_dev(struct ft1000_usb *dev)
DEBUG("%s: registered debugfs directory \"%s\"\n", __func__, dev->DeviceName);
- // initialize application information
+ /* initialize application information */
dev->appcnt = 0;
for (i=0; i<MAX_NUM_APP; i++) {
dev->app_info[i].nTxMsg = 0;
@@ -198,7 +203,7 @@ int ft1000_create_dev(struct ft1000_usb *dev)
dev->app_info[i].DspBCMsgFlag = 0;
dev->app_info[i].NumOfMsg = 0;
init_waitqueue_head(&dev->app_info[i].wait_dpram_msg);
- INIT_LIST_HEAD (&dev->app_info[i].app_sqlist);
+ INIT_LIST_HEAD(&dev->app_info[i].app_sqlist);
}
dev->DeviceCreated = TRUE;
@@ -214,16 +219,18 @@ fail:
return result;
}
-//---------------------------------------------------------------------------
-// Function: ft1000_DestroyDeviceDEBUG
-//
-// Parameters: dev - pointer to adapter object
-//
-// Description: Destroys a private char device.
-//
-// Notes: Only called by cleanup_module().
-//
-//---------------------------------------------------------------------------
+/*
+*---------------------------------------------------------------------------
+* Function: ft1000_DestroyDeviceDEBUG
+*
+* Parameters: dev - pointer to adapter object
+*
+* Description: Destroys a private char device.
+*
+* Notes: Only called by cleanup_module().
+*
+*---------------------------------------------------------------------------
+*/
void ft1000_destroy_dev(struct net_device *netdev)
{
struct ft1000_info *info = netdev_priv(netdev);
@@ -238,8 +245,7 @@ void ft1000_destroy_dev(struct net_device *netdev)
- if (dev->DeviceCreated)
- {
+ if (dev->DeviceCreated) {
ft1000_flarion_cnt--;
list_for_each_safe(pos, q, &dev->nodes.list) {
dir = list_entry(pos, struct ft1000_debug_dirs, list);
@@ -253,7 +259,7 @@ void ft1000_destroy_dev(struct net_device *netdev)
DEBUG("%s: unregistered device \"%s\"\n", __func__,
dev->DeviceName);
- // Make sure we free any memory reserve for slow Queue
+ /* Make sure we free any memory reserve for slow Queue */
for (i=0; i<MAX_NUM_APP; i++) {
while (list_empty(&dev->app_info[i].app_sqlist) == 0) {
pdpram_blk = list_entry(dev->app_info[i].app_sqlist.next, struct dpram_blk, list);
@@ -264,7 +270,7 @@ void ft1000_destroy_dev(struct net_device *netdev)
wake_up_interruptible(&dev->app_info[i].wait_dpram_msg);
}
- // Remove buffer allocated for receive command data
+ /* Remove buffer allocated for receive command data */
if (ft1000_flarion_cnt == 0) {
while (list_empty(&freercvpool) == 0) {
ptr = list_entry(freercvpool.next, struct dpram_blk, list);
@@ -279,17 +285,19 @@ void ft1000_destroy_dev(struct net_device *netdev)
}
-//---------------------------------------------------------------------------
-// Function: ft1000_open
-//
-// Parameters:
-//
-// Description:
-//
-// Notes:
-//
-//---------------------------------------------------------------------------
-static int ft1000_open (struct inode *inode, struct file *file)
+/*
+*---------------------------------------------------------------------------
+* Function: ft1000_open
+*
+* Parameters:
+*
+* Description:
+*
+* Notes:
+*
+*---------------------------------------------------------------------------
+*/
+static int ft1000_open(struct inode *inode, struct file *file)
{
struct ft1000_info *info;
struct ft1000_usb *dev = (struct ft1000_usb *)inode->i_private;
@@ -301,22 +309,22 @@ static int ft1000_open (struct inode *inode, struct file *file)
info = file->private_data = netdev_priv(dev->net);
- DEBUG("f_owner = %p number of application = %d\n", (&file->f_owner), dev->appcnt );
+ DEBUG("f_owner = %p number of application = %d\n", (&file->f_owner), dev->appcnt);
- // Check if maximum number of application exceeded
+ /* Check if maximum number of application exceeded */
if (dev->appcnt > MAX_NUM_APP) {
DEBUG("Maximum number of application exceeded\n");
return -EACCES;
}
- // Search for available application info block
+ /* Search for available application info block */
for (i=0; i<MAX_NUM_APP; i++) {
- if ( (dev->app_info[i].fileobject == NULL) ) {
+ if ((dev->app_info[i].fileobject == NULL)) {
break;
}
}
- // Fail due to lack of application info block
+ /* Fail due to lack of application info block */
if (i == MAX_NUM_APP) {
DEBUG("Could not find an application info block\n");
return -EACCES;
@@ -334,16 +342,18 @@ static int ft1000_open (struct inode *inode, struct file *file)
}
-//---------------------------------------------------------------------------
-// Function: ft1000_poll_dev
-//
-// Parameters:
-//
-// Description:
-//
-// Notes:
-//
-//---------------------------------------------------------------------------
+/*
+*---------------------------------------------------------------------------
+* Function: ft1000_poll_dev
+*
+* Parameters:
+*
+* Description:
+*
+* Notes:
+*
+*---------------------------------------------------------------------------
+*/
static unsigned int ft1000_poll_dev(struct file *file, poll_table *wait)
{
@@ -352,24 +362,24 @@ static unsigned int ft1000_poll_dev(struct file *file, poll_table *wait)
struct ft1000_usb *dev = info->priv;
int i;
- //DEBUG("ft1000_poll_dev called\n");
+ /* DEBUG("ft1000_poll_dev called\n"); */
if (ft1000_flarion_cnt == 0) {
DEBUG("FT1000:ft1000_poll_dev called when ft1000_flarion_cnt is zero\n");
return (-EBADF);
}
- // Search for matching file object
+ /* Search for matching file object */
for (i=0; i<MAX_NUM_APP; i++) {
- if ( dev->app_info[i].fileobject == &file->f_owner) {
- //DEBUG("FT1000:ft1000_ioctl: Message is for AppId = %d\n", dev->app_info[i].app_id);
+ if (dev->app_info[i].fileobject == &file->f_owner) {
+ /* DEBUG("FT1000:ft1000_ioctl: Message is for AppId = %d\n", dev->app_info[i].app_id); */
break;
}
}
- // Could not find application info block
+ /* Could not find application info block */
if (i == MAX_NUM_APP) {
DEBUG("FT1000:ft1000_ioctl:Could not find application info block\n");
- return ( -EACCES );
+ return (-EACCES);
}
if (list_empty(&dev->app_info[i].app_sqlist) == 0) {
@@ -377,23 +387,25 @@ static unsigned int ft1000_poll_dev(struct file *file, poll_table *wait)
return(POLLIN | POLLRDNORM | POLLPRI);
}
- poll_wait (file, &dev->app_info[i].wait_dpram_msg, wait);
- //DEBUG("FT1000:ft1000_poll_dev:Polling for data from DSP\n");
+ poll_wait(file, &dev->app_info[i].wait_dpram_msg, wait);
+ /* DEBUG("FT1000:ft1000_poll_dev:Polling for data from DSP\n"); */
return (0);
}
-//---------------------------------------------------------------------------
-// Function: ft1000_ioctl
-//
-// Parameters:
-//
-// Description:
-//
-// Notes:
-//
-//---------------------------------------------------------------------------
-static long ft1000_ioctl (struct file *file, unsigned int command,
+/*
+*---------------------------------------------------------------------------
+* Function: ft1000_ioctl
+*
+* Parameters:
+*
+* Description:
+*
+* Notes:
+*
+*---------------------------------------------------------------------------
+*/
+static long ft1000_ioctl(struct file *file, unsigned int command,
unsigned long argument)
{
void __user *argp = (void __user *)argument;
@@ -417,21 +429,21 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
unsigned short ledStat=0;
unsigned short conStat=0;
- //DEBUG("ft1000_ioctl called\n");
+ /* DEBUG("ft1000_ioctl called\n"); */
if (ft1000_flarion_cnt == 0) {
DEBUG("FT1000:ft1000_ioctl called when ft1000_flarion_cnt is zero\n");
return (-EBADF);
}
- //DEBUG("FT1000:ft1000_ioctl:command = 0x%x argument = 0x%8x\n", command, (u32)argument);
+ /* DEBUG("FT1000:ft1000_ioctl:command = 0x%x argument = 0x%8x\n", command, (u32)argument); */
info = file->private_data;
ft1000dev = info->priv;
cmd = _IOC_NR(command);
- //DEBUG("FT1000:ft1000_ioctl:cmd = 0x%x\n", cmd);
+ /* DEBUG("FT1000:ft1000_ioctl:cmd = 0x%x\n", cmd); */
- // process the command
+ /* process the command */
switch (cmd) {
case IOCTL_REGISTER_CMD:
DEBUG("FT1000:ft1000_ioctl: IOCTL_FT1000_REGISTER called\n");
@@ -441,7 +453,7 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
break;
}
if (tempword == DSPBCMSGID) {
- // Search for matching file object
+ /* Search for matching file object */
for (i=0; i<MAX_NUM_APP; i++) {
if (ft1000dev->app_info[i].fileobject == &file->f_owner) {
ft1000dev->app_info[i].DspBCMsgFlag = 1;
@@ -457,7 +469,7 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
get_ver_data.drv_ver = FT1000_DRV_VER;
- if (copy_to_user(argp, &get_ver_data, sizeof(get_ver_data)) ) {
+ if (copy_to_user(argp, &get_ver_data, sizeof(get_ver_data))) {
DEBUG("FT1000:ft1000_ioctl: copy fault occurred\n");
result = -EFAULT;
break;
@@ -467,20 +479,20 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
break;
case IOCTL_CONNECT:
- // Connect Message
+ /* Connect Message */
DEBUG("FT1000:ft1000_ioctl: IOCTL_FT1000_CONNECT\n");
ConnectionMsg[79] = 0xfc;
card_send_command(ft1000dev, (unsigned short *)ConnectionMsg, 0x4c);
break;
case IOCTL_DISCONNECT:
- // Disconnect Message
+ /* Disconnect Message */
DEBUG("FT1000:ft1000_ioctl: IOCTL_FT1000_DISCONNECT\n");
ConnectionMsg[79] = 0xfd;
card_send_command(ft1000dev, (unsigned short *)ConnectionMsg, 0x4c);
break;
case IOCTL_GET_DSP_STAT_CMD:
- //DEBUG("FT1000:ft1000_ioctl: IOCTL_FT1000_GET_DSP_STAT called\n");
+ /* DEBUG("FT1000:ft1000_ioctl: IOCTL_FT1000_GET_DSP_STAT called\n"); */
memset(&get_stat_data, 0, sizeof(get_stat_data));
memcpy(get_stat_data.DspVer, info->DspVer, DSPVERSZ);
memcpy(get_stat_data.HwSerNum, info->HwSerNum, HWSERNUMSZ);
@@ -494,8 +506,7 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
ft1000_read_dpram16(ft1000dev, FT1000_MAG_DSP_CON_STATE, (u8 *)&conStat, FT1000_MAG_DSP_CON_STATE_INDX);
get_stat_data.ConStat = ntohs(conStat);
DEBUG("FT1000:ft1000_ioctl: ConStat = 0x%x\n", get_stat_data.ConStat);
- }
- else {
+ } else {
get_stat_data.ConStat = 0x0f;
}
@@ -504,10 +515,10 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
get_stat_data.nRxPkts = info->stats.rx_packets;
get_stat_data.nTxBytes = info->stats.tx_bytes;
get_stat_data.nRxBytes = info->stats.rx_bytes;
- do_gettimeofday ( &tv );
+ do_gettimeofday(&tv);
get_stat_data.ConTm = (u32)(tv.tv_sec - info->ConTm);
DEBUG("Connection Time = %d\n", (int)get_stat_data.ConTm);
- if (copy_to_user(argp, &get_stat_data, sizeof(get_stat_data)) ) {
+ if (copy_to_user(argp, &get_stat_data, sizeof(get_stat_data))) {
DEBUG("FT1000:ft1000_ioctl: copy fault occurred\n");
result = -EFAULT;
break;
@@ -517,7 +528,7 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
case IOCTL_SET_DPRAM_CMD:
{
IOCTL_DPRAM_BLK *dpram_data = NULL;
- //IOCTL_DPRAM_COMMAND dpram_command;
+ /* IOCTL_DPRAM_COMMAND dpram_command; */
u16 qtype;
u16 msgsz;
struct pseudo_hdr *ppseudo_hdr;
@@ -526,7 +537,7 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
u16 app_index;
u16 status;
- //DEBUG("FT1000:ft1000_ioctl: IOCTL_FT1000_SET_DPRAM called\n");
+ /* DEBUG("FT1000:ft1000_ioctl: IOCTL_FT1000_SET_DPRAM called\n");*/
if (ft1000_flarion_cnt == 0) {
@@ -545,12 +556,12 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
if (info->CardReady) {
- //DEBUG("FT1000:ft1000_ioctl: try to SET_DPRAM \n");
+ /* DEBUG("FT1000:ft1000_ioctl: try to SET_DPRAM \n"); */
- // Get the length field to see how many bytes to copy
+ /* Get the length field to see how many bytes to copy */
result = get_user(msgsz, (__u16 __user *)argp);
- msgsz = ntohs (msgsz);
- //DEBUG("FT1000:ft1000_ioctl: length of message = %d\n", msgsz);
+ msgsz = ntohs(msgsz);
+ /* DEBUG("FT1000:ft1000_ioctl: length of message = %d\n", msgsz); */
if (msgsz > MAX_CMD_SQSIZE) {
DEBUG("FT1000:ft1000_ioctl: bad message length = %d\n", msgsz);
@@ -563,12 +574,11 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
if (!dpram_data)
break;
- if ( copy_from_user(dpram_data, argp, msgsz+2) ) {
+ if (copy_from_user(dpram_data, argp, msgsz+2)) {
DEBUG("FT1000:ft1000_ChIoctl: copy fault occurred\n");
result = -EFAULT;
- }
- else {
- // Check if this message came from a registered application
+ } else {
+ /* Check if this message came from a registered application */
for (i=0; i<MAX_NUM_APP; i++) {
if (ft1000dev->app_info[i].fileobject == &file->f_owner) {
break;
@@ -582,28 +592,27 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
}
app_index = i;
- // Check message qtype type which is the lower byte within qos_class
+ /* Check message qtype type which is the lower byte within qos_class */
qtype = ntohs(dpram_data->pseudohdr.qos_class) & 0xff;
- //DEBUG("FT1000_ft1000_ioctl: qtype = %d\n", qtype);
+ /* DEBUG("FT1000_ft1000_ioctl: qtype = %d\n", qtype); */
if (qtype) {
- }
- else {
- // Put message into Slow Queue
- // Only put a message into the DPRAM if msg doorbell is available
+ } else {
+ /* Put message into Slow Queue */
+ /* Only put a message into the DPRAM if msg doorbell is available */
status = ft1000_read_register(ft1000dev, &tempword, FT1000_REG_DOORBELL);
- //DEBUG("FT1000_ft1000_ioctl: READ REGISTER tempword=%x\n", tempword);
+ /* DEBUG("FT1000_ft1000_ioctl: READ REGISTER tempword=%x\n", tempword); */
if (tempword & FT1000_DB_DPRAM_TX) {
- // Suspend for 2ms and try again due to DSP doorbell busy
+ /* Suspend for 2ms and try again due to DSP doorbell busy */
mdelay(2);
status = ft1000_read_register(ft1000dev, &tempword, FT1000_REG_DOORBELL);
if (tempword & FT1000_DB_DPRAM_TX) {
- // Suspend for 1ms and try again due to DSP doorbell busy
+ /* Suspend for 1ms and try again due to DSP doorbell busy */
mdelay(1);
status = ft1000_read_register(ft1000dev, &tempword, FT1000_REG_DOORBELL);
if (tempword & FT1000_DB_DPRAM_TX) {
status = ft1000_read_register(ft1000dev, &tempword, FT1000_REG_DOORBELL);
if (tempword & FT1000_DB_DPRAM_TX) {
- // Suspend for 3ms and try again due to DSP doorbell busy
+ /* Suspend for 3ms and try again due to DSP doorbell busy */
mdelay(3);
status = ft1000_read_register(ft1000dev, &tempword, FT1000_REG_DOORBELL);
if (tempword & FT1000_DB_DPRAM_TX) {
@@ -617,11 +626,11 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
}
}
- //DEBUG("FT1000_ft1000_ioctl: finished reading register\n");
+ /*DEBUG("FT1000_ft1000_ioctl: finished reading register\n"); */
- // Make sure we are within the limits of the slow queue memory limitation
- if ( (msgsz < MAX_CMD_SQSIZE) && (msgsz > PSEUDOSZ) ) {
- // Need to put sequence number plus new checksum for message
+ /* Make sure we are within the limits of the slow queue memory limitation */
+ if ((msgsz < MAX_CMD_SQSIZE) && (msgsz > PSEUDOSZ)) {
+ /* Need to put sequence number plus new checksum for message */
pmsg = (u16 *)&dpram_data->pseudohdr;
ppseudo_hdr = (struct pseudo_hdr *)pmsg;
total_len = msgsz+2;
@@ -629,15 +638,15 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
total_len++;
}
- // Insert slow queue sequence number
+ /* Insert slow queue sequence number */
ppseudo_hdr->seq_num = info->squeseqnum++;
ppseudo_hdr->portsrc = ft1000dev->app_info[app_index].app_id;
- // Calculate new checksum
+ /* Calculate new checksum */
ppseudo_hdr->checksum = *pmsg++;
- //DEBUG("checksum = 0x%x\n", ppseudo_hdr->checksum);
+ /* DEBUG("checksum = 0x%x\n", ppseudo_hdr->checksum); */
for (i=1; i<7; i++) {
ppseudo_hdr->checksum ^= *pmsg++;
- //DEBUG("checksum = 0x%x\n", ppseudo_hdr->checksum);
+ /* DEBUG("checksum = 0x%x\n", ppseudo_hdr->checksum); */
}
pmsg++;
ppseudo_hdr = (struct pseudo_hdr *)pmsg;
@@ -645,14 +654,12 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
ft1000dev->app_info[app_index].nTxMsg++;
- }
- else {
+ } else {
result = -EINVAL;
}
}
}
- }
- else {
+ } else {
DEBUG("FT1000:ft1000_ioctl: Card not ready take messages\n");
result = -EACCES;
}
@@ -666,21 +673,21 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
IOCTL_DPRAM_BLK __user *pioctl_dpram;
int msglen;
- //DEBUG("FT1000:ft1000_ioctl: IOCTL_FT1000_GET_DPRAM called\n");
+ /* DEBUG("FT1000:ft1000_ioctl: IOCTL_FT1000_GET_DPRAM called\n"); */
if (ft1000_flarion_cnt == 0) {
return (-EBADF);
}
- // Search for matching file object
+ /* Search for matching file object */
for (i=0; i<MAX_NUM_APP; i++) {
if (ft1000dev->app_info[i].fileobject == &file->f_owner) {
- //DEBUG("FT1000:ft1000_ioctl: Message is for AppId = %d\n", ft1000dev->app_info[i].app_id);
+ /*DEBUG("FT1000:ft1000_ioctl: Message is for AppId = %d\n", ft1000dev->app_info[i].app_id); */
break;
}
}
- // Could not find application info block
+ /* Could not find application info block */
if (i == MAX_NUM_APP) {
DEBUG("FT1000:ft1000_ioctl:Could not find application info block\n");
result = -EBADF;
@@ -690,30 +697,29 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
result = 0;
pioctl_dpram = argp;
if (list_empty(&ft1000dev->app_info[i].app_sqlist) == 0) {
- //DEBUG("FT1000:ft1000_ioctl:Message detected in slow queue\n");
+ /* DEBUG("FT1000:ft1000_ioctl:Message detected in slow queue\n"); */
spin_lock_irqsave(&free_buff_lock, flags);
pdpram_blk = list_entry(ft1000dev->app_info[i].app_sqlist.next, struct dpram_blk, list);
list_del(&pdpram_blk->list);
ft1000dev->app_info[i].NumOfMsg--;
- //DEBUG("FT1000:ft1000_ioctl:NumOfMsg for app %d = %d\n", i, ft1000dev->app_info[i].NumOfMsg);
+ /* DEBUG("FT1000:ft1000_ioctl:NumOfMsg for app %d = %d\n", i, ft1000dev->app_info[i].NumOfMsg); */
spin_unlock_irqrestore(&free_buff_lock, flags);
msglen = ntohs(*(u16 *)pdpram_blk->pbuffer) + PSEUDOSZ;
result = get_user(msglen, &pioctl_dpram->total_len);
if (result)
break;
msglen = htons(msglen);
- //DEBUG("FT1000:ft1000_ioctl:msg length = %x\n", msglen);
- if(copy_to_user (&pioctl_dpram->pseudohdr, pdpram_blk->pbuffer, msglen))
- {
+ /* DEBUG("FT1000:ft1000_ioctl:msg length = %x\n", msglen); */
+ if (copy_to_user (&pioctl_dpram->pseudohdr, pdpram_blk->pbuffer, msglen)) {
DEBUG("FT1000:ft1000_ioctl: copy fault occurred\n");
- result = -EFAULT;
- break;
+ result = -EFAULT;
+ break;
}
ft1000_free_buffer(pdpram_blk, &freercvpool);
result = msglen;
}
- //DEBUG("FT1000:ft1000_ioctl: IOCTL_FT1000_GET_DPRAM no message\n");
+ /* DEBUG("FT1000:ft1000_ioctl: IOCTL_FT1000_GET_DPRAM no message\n"); */
}
break;
@@ -726,17 +732,19 @@ static long ft1000_ioctl (struct file *file, unsigned int command,
return result;
}
-//---------------------------------------------------------------------------
-// Function: ft1000_release
-//
-// Parameters:
-//
-// Description:
-//
-// Notes:
-//
-//---------------------------------------------------------------------------
-static int ft1000_release (struct inode *inode, struct file *file)
+/*
+*---------------------------------------------------------------------------
+* Function: ft1000_release
+*
+* Parameters:
+*
+* Description:
+*
+* Notes:
+*
+*---------------------------------------------------------------------------
+*/
+static int ft1000_release(struct inode *inode, struct file *file)
{
struct ft1000_info *info;
struct net_device *dev;
@@ -755,10 +763,10 @@ static int ft1000_release (struct inode *inode, struct file *file)
return (-EBADF);
}
- // Search for matching file object
+ /* Search for matching file object */
for (i=0; i<MAX_NUM_APP; i++) {
- if ( ft1000dev->app_info[i].fileobject == &file->f_owner) {
- //DEBUG("FT1000:ft1000_ioctl: Message is for AppId = %d\n", ft1000dev->app_info[i].app_id);
+ if (ft1000dev->app_info[i].fileobject == &file->f_owner) {
+ /* DEBUG("FT1000:ft1000_ioctl: Message is for AppId = %d\n", ft1000dev->app_info[i].app_id); */
break;
}
}
@@ -773,11 +781,10 @@ static int ft1000_release (struct inode *inode, struct file *file)
ft1000_free_buffer(pdpram_blk, &freercvpool);
}
- // initialize application information
+ /* initialize application information */
ft1000dev->appcnt--;
DEBUG("ft1000_chdev:%s:appcnt = %d\n", __FUNCTION__, ft1000dev->appcnt);
ft1000dev->app_info[i].fileobject = NULL;
return 0;
}
-
diff --git a/drivers/staging/ft1000/ft1000-usb/ft1000_ioctl.h b/drivers/staging/ft1000/ft1000-usb/ft1000_ioctl.h
index 3f4207fd1597..24b8d77a132c 100644
--- a/drivers/staging/ft1000/ft1000-usb/ft1000_ioctl.h
+++ b/drivers/staging/ft1000/ft1000-usb/ft1000_ioctl.h
@@ -1,91 +1,89 @@
-//---------------------------------------------------------------------------
-// FT1000 driver for Flarion Flash OFDM NIC Device
-//
-// Copyright (C) 2002 Flarion Technologies, All rights reserved.
-//
-// This program is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License as published by the Free
-// Software Foundation; either version 2 of the License, or (at your option) any
-// later version. This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-// more details. You should have received a copy of the GNU General Public
-// License along with this program; if not, write to the
-// Free Software Foundation, Inc., 59 Temple Place -
-// Suite 330, Boston, MA 02111-1307, USA.
-//---------------------------------------------------------------------------
-//
-// File: ft1000_ioctl.h
-//
-// Description: Common structures and defines relating to IOCTL
-//
-// History:
-// 11/5/02 Whc Created.
-//
-//---------------------------------------------------------------------------//---------------------------------------------------------------------------
+/*
+*---------------------------------------------------------------------------
+* FT1000 driver for Flarion Flash OFDM NIC Device
+*
+* Copyright (C) 2002 Flarion Technologies, All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License as published by the Free
+* Software Foundation; either version 2 of the License, or (at your option) any
+* later version. This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+* more details. You should have received a copy of the GNU General Public
+* License along with this program; if not, write to the
+* Free Software Foundation, Inc., 59 Temple Place -
+* Suite 330, Boston, MA 02111-1307, USA.
+*---------------------------------------------------------------------------
+*
+* File: ft1000_ioctl.h
+*
+* Description: Common structures and defines relating to IOCTL
+*
+* History:
+* 11/5/02 Whc Created.
+*
+*---------------------------------------------------------------------------//---------------------------------------------------------------------------
+*/
#ifndef _FT1000IOCTLH_
#define _FT1000IOCTLH_
-typedef struct _IOCTL_GET_VER
-{
+typedef struct _IOCTL_GET_VER {
unsigned long drv_ver;
} __attribute__ ((packed)) IOCTL_GET_VER, *PIOCTL_GET_VER;
-//Data structure for Dsp statistics
-typedef struct _IOCTL_GET_DSP_STAT
-{
- unsigned char DspVer[DSPVERSZ]; // DSP version number
- unsigned char HwSerNum[HWSERNUMSZ]; // Hardware Serial Number
- unsigned char Sku[SKUSZ]; // SKU
- unsigned char eui64[EUISZ]; // EUI64
- unsigned short ConStat; // Connection Status
- // Bits 0-3 = Connection Status Field
- // 0000=Idle (Disconnect)
- // 0001=Searching
- // 0010=Active (Connected)
- // 0011=Waiting for L2 down
- // 0100=Sleep
- unsigned short LedStat; // Led Status
- // Bits 0-3 = Signal Strength Field
- // 0000 = -105dBm to -92dBm
- // 0001 = -92dBm to -85dBm
- // 0011 = -85dBm to -75dBm
- // 0111 = -75dBm to -50dBm
- // 1111 = -50dBm to 0dBm
- // Bits 4-7 = Reserved
- // Bits 8-11 = SNR Field
- // 0000 = <2dB
- // 0001 = 2dB to 8dB
- // 0011 = 8dB to 15dB
- // 0111 = 15dB to 22dB
- // 1111 = >22dB
- // Bits 12-15 = Reserved
- unsigned long nTxPkts; // Number of packets transmitted from host to dsp
- unsigned long nRxPkts; // Number of packets received from dsp to host
- unsigned long nTxBytes; // Number of bytes transmitted from host to dsp
- unsigned long nRxBytes; // Number of bytes received from dsp to host
- unsigned long ConTm; // Current session connection time in seconds
- unsigned char CalVer[CALVERSZ]; // Proprietary Calibration Version
- unsigned char CalDate[CALDATESZ]; // Proprietary Calibration Date
+/* Data structure for Dsp statistics */
+typedef struct _IOCTL_GET_DSP_STAT {
+ unsigned char DspVer[DSPVERSZ]; /* DSP version number */
+ unsigned char HwSerNum[HWSERNUMSZ]; /* Hardware Serial Number */
+ unsigned char Sku[SKUSZ]; /* SKU */
+ unsigned char eui64[EUISZ]; /* EUI64 */
+ unsigned short ConStat; /* Connection Status */
+ /* Bits 0-3 = Connection Status Field */
+ /* 0000=Idle (Disconnect) */
+ /* 0001=Searching */
+ /* 0010=Active (Connected) */
+ /* 0011=Waiting for L2 down */
+ /* 0100=Sleep */
+ unsigned short LedStat; /* Led Status */
+ /* Bits 0-3 = Signal Strength Field */
+ /* 0000 = -105dBm to -92dBm */
+ /* 0001 = -92dBm to -85dBm */
+ /* 0011 = -85dBm to -75dBm */
+ /* 0111 = -75dBm to -50dBm */
+ /* 1111 = -50dBm to 0dBm */
+ /* Bits 4-7 = Reserved */
+ /* Bits 8-11 = SNR Field */
+ /* 0000 = <2dB */
+ /* 0001 = 2dB to 8dB */
+ /* 0011 = 8dB to 15dB */
+ /* 0111 = 15dB to 22dB */
+ /* 1111 = >22dB */
+ /* Bits 12-15 = Reserved */
+ unsigned long nTxPkts; /* Number of packets transmitted from host to dsp */
+ unsigned long nRxPkts; /* Number of packets received from dsp to host */
+ unsigned long nTxBytes; /* Number of bytes transmitted from host to dsp */
+ unsigned long nRxBytes; /* Number of bytes received from dsp to host */
+ unsigned long ConTm; /* Current session connection time in seconds */
+ unsigned char CalVer[CALVERSZ]; /* Proprietary Calibration Version */
+ unsigned char CalDate[CALDATESZ]; /* Proprietary Calibration Date */
} __attribute__ ((packed)) IOCTL_GET_DSP_STAT, *PIOCTL_GET_DSP_STAT;
-//Data structure for Dual Ported RAM messaging between Host and Dsp
-typedef struct _IOCTL_DPRAM_BLK
-{
+/* Data structure for Dual Ported RAM messaging between Host and Dsp */
+typedef struct _IOCTL_DPRAM_BLK {
unsigned short total_len;
struct pseudo_hdr pseudohdr;
unsigned char buffer[1780];
} __attribute__ ((packed)) IOCTL_DPRAM_BLK, *PIOCTL_DPRAM_BLK;
-typedef struct _IOCTL_DPRAM_COMMAND
-{
+typedef struct _IOCTL_DPRAM_COMMAND {
unsigned short extra;
IOCTL_DPRAM_BLK dpram_blk;
} __attribute__ ((packed)) IOCTL_DPRAM_COMMAND, *PIOCTL_DPRAM_COMMAND;
-//
-// Custom IOCTL command codes
-//
+/*
+* Custom IOCTL command codes
+*/
#define FT1000_MAGIC_CODE 'F'
#define IOCTL_REGISTER_CMD 0
@@ -96,12 +94,12 @@ typedef struct _IOCTL_DPRAM_COMMAND
#define IOCTL_CONNECT 10
#define IOCTL_DISCONNECT 11
-#define IOCTL_FT1000_GET_DSP_STAT _IOR (FT1000_MAGIC_CODE, IOCTL_GET_DSP_STAT_CMD, sizeof(IOCTL_GET_DSP_STAT) )
-#define IOCTL_FT1000_GET_VER _IOR (FT1000_MAGIC_CODE, IOCTL_GET_VER_CMD, sizeof(IOCTL_GET_VER) )
-#define IOCTL_FT1000_CONNECT _IOW (FT1000_MAGIC_CODE, IOCTL_CONNECT, 0 )
-#define IOCTL_FT1000_DISCONNECT _IOW (FT1000_MAGIC_CODE, IOCTL_DISCONNECT, 0 )
-#define IOCTL_FT1000_SET_DPRAM _IOW (FT1000_MAGIC_CODE, IOCTL_SET_DPRAM_CMD, sizeof(IOCTL_DPRAM_BLK) )
-#define IOCTL_FT1000_GET_DPRAM _IOR (FT1000_MAGIC_CODE, IOCTL_GET_DPRAM_CMD, sizeof(IOCTL_DPRAM_BLK) )
-#define IOCTL_FT1000_REGISTER _IOW (FT1000_MAGIC_CODE, IOCTL_REGISTER_CMD, sizeof(unsigned short *) )
-#endif // _FT1000IOCTLH_
+#define IOCTL_FT1000_GET_DSP_STAT _IOR(FT1000_MAGIC_CODE, IOCTL_GET_DSP_STAT_CMD, sizeof(IOCTL_GET_DSP_STAT)
+#define IOCTL_FT1000_GET_VER _IOR(FT1000_MAGIC_CODE, IOCTL_GET_VER_CMD, sizeof(IOCTL_GET_VER)
+#define IOCTL_FT1000_CONNECT _IOW(FT1000_MAGIC_CODE, IOCTL_CONNECT, 0
+#define IOCTL_FT1000_DISCONNECT _IOW(FT1000_MAGIC_CODE, IOCTL_DISCONNECT, 0
+#define IOCTL_FT1000_SET_DPRAM _IOW(FT1000_MAGIC_CODE, IOCTL_SET_DPRAM_CMD, sizeof(IOCTL_DPRAM_BLK)
+#define IOCTL_FT1000_GET_DPRAM _IOR(FT1000_MAGIC_CODE, IOCTL_GET_DPRAM_CMD, sizeof(IOCTL_DPRAM_BLK)
+#define IOCTL_FT1000_REGISTER _IOW(FT1000_MAGIC_CODE, IOCTL_REGISTER_CMD, sizeof(unsigned short *)
+#endif /* _FT1000IOCTLH_ */
diff --git a/drivers/staging/ft1000/ft1000-usb/ft1000_usb.c b/drivers/staging/ft1000/ft1000-usb/ft1000_usb.c
index 614db55a8171..29a7cd23845d 100644
--- a/drivers/staging/ft1000/ft1000-usb/ft1000_usb.c
+++ b/drivers/staging/ft1000/ft1000-usb/ft1000_usb.c
@@ -79,8 +79,12 @@ static int ft1000_probe(struct usb_interface *interface,
ft1000dev->dev = dev;
ft1000dev->status = 0;
ft1000dev->net = NULL;
- ft1000dev->tx_urb = usb_alloc_urb(0, GFP_ATOMIC);
- ft1000dev->rx_urb = usb_alloc_urb(0, GFP_ATOMIC);
+ ft1000dev->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
+ ft1000dev->rx_urb = usb_alloc_urb(0, GFP_KERNEL);
+ if (!ft1000dev->tx_urb || !ft1000dev->rx_urb) {
+ ret = -ENOMEM;
+ goto err_fw;
+ }
DEBUG("ft1000_probe is called\n");
numaltsetting = interface->num_altsetting;
@@ -209,6 +213,8 @@ err_thread:
err_load:
kfree(pFileStart);
err_fw:
+ usb_free_urb(ft1000dev->rx_urb);
+ usb_free_urb(ft1000dev->tx_urb);
kfree(ft1000dev);
return ret;
}
diff --git a/drivers/staging/fwserial/fwserial.c b/drivers/staging/fwserial/fwserial.c
index e5818a1c2262..4e1cd5e9ea37 100644
--- a/drivers/staging/fwserial/fwserial.c
+++ b/drivers/staging/fwserial/fwserial.c
@@ -18,6 +18,8 @@
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/device.h>
@@ -101,13 +103,16 @@ struct fwtty_transaction {
};
#define to_device(a, b) (a->b)
-#define fwtty_err(p, s, v...) dev_err(to_device(p, device), s, ##v)
-#define fwtty_info(p, s, v...) dev_info(to_device(p, device), s, ##v)
-#define fwtty_notice(p, s, v...) dev_notice(to_device(p, device), s, ##v)
-#define fwtty_dbg(p, s, v...) \
- dev_dbg(to_device(p, device), "%s: " s, __func__, ##v)
-#define fwtty_err_ratelimited(p, s, v...) \
- dev_err_ratelimited(to_device(p, device), s, ##v)
+#define fwtty_err(p, fmt, ...) \
+ dev_err(to_device(p, device), fmt, ##__VA_ARGS__)
+#define fwtty_info(p, fmt, ...) \
+ dev_info(to_device(p, device), fmt, ##__VA_ARGS__)
+#define fwtty_notice(p, fmt, ...) \
+ dev_notice(to_device(p, device), fmt, ##__VA_ARGS__)
+#define fwtty_dbg(p, fmt, ...) \
+ dev_dbg(to_device(p, device), "%s: " fmt, __func__, ##__VA_ARGS__)
+#define fwtty_err_ratelimited(p, fmt, ...) \
+ dev_err_ratelimited(to_device(p, device), fmt, ##__VA_ARGS__)
#ifdef DEBUG
static inline void debug_short_write(struct fwtty_port *port, int c, int n)
@@ -118,7 +123,7 @@ static inline void debug_short_write(struct fwtty_port *port, int c, int n)
spin_lock_bh(&port->lock);
avail = dma_fifo_avail(&port->tx_fifo);
spin_unlock_bh(&port->lock);
- fwtty_dbg(port, "short write: avail:%d req:%d wrote:%d",
+ fwtty_dbg(port, "short write: avail:%d req:%d wrote:%d\n",
avail, c, n);
}
}
@@ -197,22 +202,22 @@ static void fwtty_log_tx_error(struct fwtty_port *port, int rcode)
{
switch (rcode) {
case RCODE_SEND_ERROR:
- fwtty_err_ratelimited(port, "card busy");
+ fwtty_err_ratelimited(port, "card busy\n");
break;
case RCODE_ADDRESS_ERROR:
- fwtty_err_ratelimited(port, "bad unit addr or write length");
+ fwtty_err_ratelimited(port, "bad unit addr or write length\n");
break;
case RCODE_DATA_ERROR:
- fwtty_err_ratelimited(port, "failed rx");
+ fwtty_err_ratelimited(port, "failed rx\n");
break;
case RCODE_NO_ACK:
- fwtty_err_ratelimited(port, "missing ack");
+ fwtty_err_ratelimited(port, "missing ack\n");
break;
case RCODE_BUSY:
- fwtty_err_ratelimited(port, "remote busy");
+ fwtty_err_ratelimited(port, "remote busy\n");
break;
default:
- fwtty_err_ratelimited(port, "failed tx: %d", rcode);
+ fwtty_err_ratelimited(port, "failed tx: %d\n", rcode);
}
}
@@ -287,7 +292,7 @@ static void __fwtty_restart_tx(struct fwtty_port *port)
schedule_delayed_work(&port->drain, 0);
avail = dma_fifo_avail(&port->tx_fifo);
- fwtty_dbg(port, "fifo len: %d avail: %d", len, avail);
+ fwtty_dbg(port, "fifo len: %d avail: %d\n", len, avail);
}
static void fwtty_restart_tx(struct fwtty_port *port)
@@ -323,7 +328,7 @@ static void fwtty_update_port_status(struct fwtty_port *port, unsigned status)
if (delta & TIOCM_CTS)
++port->icount.cts;
- fwtty_dbg(port, "status: %x delta: %x", status, delta);
+ fwtty_dbg(port, "status: %x delta: %x\n", status, delta);
if (delta & TIOCM_CAR) {
tty = tty_port_tty_get(&port->port);
@@ -509,7 +514,7 @@ static void fwtty_emit_breaks(struct work_struct *work)
n = (elapsed * port->cps) / HZ + 1;
port->break_last = now;
- fwtty_dbg(port, "sending %d brks", n);
+ fwtty_dbg(port, "sending %d brks\n", n);
while (n) {
t = min(n, 16);
@@ -570,7 +575,7 @@ static int fwtty_buffer_rx(struct fwtty_port *port, unsigned char *d, size_t n)
size_t size = (n + sizeof(struct buffered_rx) + 0xFF) & ~0xFF;
if (port->buffered + n > HIGH_WATERMARK) {
- fwtty_err_ratelimited(port, "overflowed rx buffer: buffered: %d new: %zu wtrmk: %d",
+ fwtty_err_ratelimited(port, "overflowed rx buffer: buffered: %d new: %zu wtrmk: %d\n",
port->buffered, n, HIGH_WATERMARK);
return 0;
}
@@ -599,7 +604,7 @@ static int fwtty_rx(struct fwtty_port *port, unsigned char *data, size_t len)
unsigned lsr;
int err = 0;
- fwtty_dbg(port, "%d", n);
+ fwtty_dbg(port, "%d\n", n);
profile_size_distrib(port->stats.reads, n);
if (port->write_only) {
@@ -689,7 +694,7 @@ static void fwtty_port_handler(struct fw_card *card,
rcu_read_unlock();
if (!peer || peer != rcu_access_pointer(port->peer)) {
rcode = RCODE_ADDRESS_ERROR;
- fwtty_err_ratelimited(port, "ignoring unauthenticated data");
+ fwtty_err_ratelimited(port, "ignoring unauthenticated data\n");
goto respond;
}
@@ -746,7 +751,7 @@ static void fwtty_tx_complete(struct fw_card *card, int rcode,
struct fwtty_port *port = txn->port;
int len;
- fwtty_dbg(port, "rcode: %d", rcode);
+ fwtty_dbg(port, "rcode: %d\n", rcode);
switch (rcode) {
case RCODE_COMPLETE:
@@ -809,7 +814,7 @@ static int fwtty_tx(struct fwtty_port *port, bool drain)
n = dma_fifo_out_pend(&port->tx_fifo, &txn->dma_pended);
spin_unlock_bh(&port->lock);
- fwtty_dbg(port, "out: %u rem: %d", txn->dma_pended.len, n);
+ fwtty_dbg(port, "out: %u rem: %d\n", txn->dma_pended.len, n);
if (n < 0) {
kmem_cache_free(fwtty_txn_cache, txn);
@@ -819,7 +824,8 @@ static int fwtty_tx(struct fwtty_port *port, bool drain)
profile_size_distrib(port->stats.txns, 0);
else {
++port->stats.fifo_errs;
- fwtty_err_ratelimited(port, "fifo err: %d", n);
+ fwtty_err_ratelimited(port, "fifo err: %d\n",
+ n);
}
break;
}
@@ -877,7 +883,7 @@ static void fwtty_write_xchar(struct fwtty_port *port, char ch)
++port->stats.xchars;
- fwtty_dbg(port, "%02x", ch);
+ fwtty_dbg(port, "%02x\n", ch);
rcu_read_lock();
peer = rcu_dereference(port->peer);
@@ -964,7 +970,7 @@ static void fwtty_port_dtr_rts(struct tty_port *tty_port, int on)
{
struct fwtty_port *port = to_port(tty_port, port);
- fwtty_dbg(port, "on/off: %d", on);
+ fwtty_dbg(port, "on/off: %d\n", on);
spin_lock_bh(&port->lock);
/* Don't change carrier state if this is a console */
@@ -992,7 +998,7 @@ static int fwtty_port_carrier_raised(struct tty_port *tty_port)
rc = (port->mstatus & TIOCM_CAR);
- fwtty_dbg(port, "%d", rc);
+ fwtty_dbg(port, "%d\n", rc);
return rc;
}
@@ -1177,7 +1183,7 @@ static int fwtty_write(struct tty_struct *tty, const unsigned char *buf, int c)
struct fwtty_port *port = tty->driver_data;
int n, len;
- fwtty_dbg(port, "%d", c);
+ fwtty_dbg(port, "%d\n", c);
profile_size_distrib(port->stats.writes, c);
spin_lock_bh(&port->lock);
@@ -1204,7 +1210,7 @@ static int fwtty_write_room(struct tty_struct *tty)
n = dma_fifo_avail(&port->tx_fifo);
spin_unlock_bh(&port->lock);
- fwtty_dbg(port, "%d", n);
+ fwtty_dbg(port, "%d\n", n);
return n;
}
@@ -1218,7 +1224,7 @@ static int fwtty_chars_in_buffer(struct tty_struct *tty)
n = dma_fifo_level(&port->tx_fifo);
spin_unlock_bh(&port->lock);
- fwtty_dbg(port, "%d", n);
+ fwtty_dbg(port, "%d\n", n);
return n;
}
@@ -1227,7 +1233,7 @@ static void fwtty_send_xchar(struct tty_struct *tty, char ch)
{
struct fwtty_port *port = tty->driver_data;
- fwtty_dbg(port, "%02x", ch);
+ fwtty_dbg(port, "%02x\n", ch);
fwtty_write_xchar(port, ch);
}
@@ -1254,7 +1260,7 @@ static void fwtty_unthrottle(struct tty_struct *tty)
{
struct fwtty_port *port = tty->driver_data;
- fwtty_dbg(port, "CRTSCTS: %d", (C_CRTSCTS(tty) != 0));
+ fwtty_dbg(port, "CRTSCTS: %d\n", (C_CRTSCTS(tty) != 0));
profile_fifo_avail(port, port->stats.unthrottle);
@@ -1409,7 +1415,7 @@ static int fwtty_break_ctl(struct tty_struct *tty, int state)
struct fwtty_port *port = tty->driver_data;
long ret;
- fwtty_dbg(port, "%d", state);
+ fwtty_dbg(port, "%d\n", state);
if (state == -1) {
set_bit(STOP_TX, &port->flags);
@@ -1446,7 +1452,7 @@ static int fwtty_tiocmget(struct tty_struct *tty)
tiocm = (port->mctrl & MCTRL_MASK) | (port->mstatus & ~MCTRL_MASK);
spin_unlock_bh(&port->lock);
- fwtty_dbg(port, "%x", tiocm);
+ fwtty_dbg(port, "%x\n", tiocm);
return tiocm;
}
@@ -1455,7 +1461,7 @@ static int fwtty_tiocmset(struct tty_struct *tty, unsigned set, unsigned clear)
{
struct fwtty_port *port = tty->driver_data;
- fwtty_dbg(port, "set: %x clear: %x", set, clear);
+ fwtty_dbg(port, "set: %x clear: %x\n", set, clear);
/* TODO: simulate loopback if TIOCM_LOOP set */
@@ -1775,7 +1781,7 @@ static void fwserial_virt_plug_complete(struct fwtty_peer *peer,
if (port->port.console && port->fwcon_ops->notify != NULL)
(*port->fwcon_ops->notify)(FWCON_NOTIFY_ATTACH, port->con_data);
- fwtty_info(&peer->unit, "peer (guid:%016llx) connected on %s",
+ fwtty_info(&peer->unit, "peer (guid:%016llx) connected on %s\n",
(unsigned long long)peer->guid, dev_name(port->device));
}
@@ -1797,7 +1803,7 @@ static inline int fwserial_send_mgmt_sync(struct fwtty_peer *peer,
pkt, be16_to_cpu(pkt->hdr.len));
if (rcode == RCODE_BUSY || rcode == RCODE_SEND_ERROR ||
rcode == RCODE_GENERATION) {
- fwtty_dbg(&peer->unit, "mgmt write error: %d", rcode);
+ fwtty_dbg(&peer->unit, "mgmt write error: %d\n", rcode);
continue;
} else
break;
@@ -1918,7 +1924,7 @@ static int fwserial_connect_peer(struct fwtty_peer *peer)
port = fwserial_find_port(peer);
if (!port) {
- fwtty_err(&peer->unit, "avail ports in use");
+ fwtty_err(&peer->unit, "avail ports in use\n");
err = -EBUSY;
goto free_pkt;
}
@@ -2056,7 +2062,7 @@ static struct fwtty_peer *__fwserial_peer_by_node_id(struct fw_card *card,
* has created its remote unit device before this driver has
* been probed for any unit devices...
*/
- fwtty_err(card, "unknown card (guid %016llx)",
+ fwtty_err(card, "unknown card (guid %016llx)\n",
(unsigned long long) card->guid);
return NULL;
}
@@ -2084,8 +2090,8 @@ static void __dump_peer_list(struct fw_card *card)
list_for_each_entry_rcu(peer, &serial->peer_list, list) {
int g = peer->generation;
smp_rmb();
- fwtty_dbg(card, "peer(%d:%x) guid: %016llx\n", g,
- peer->node_id, (unsigned long long) peer->guid);
+ fwtty_dbg(card, "peer(%d:%x) guid: %016llx\n",
+ g, peer->node_id, (unsigned long long) peer->guid);
}
}
#else
@@ -2173,7 +2179,7 @@ static int fwserial_add_peer(struct fw_serial *serial, struct fw_unit *unit)
peer->serial = serial;
list_add_rcu(&peer->list, &serial->peer_list);
- fwtty_info(&peer->unit, "peer added (guid:%016llx)",
+ fwtty_info(&peer->unit, "peer added (guid:%016llx)\n",
(unsigned long long)peer->guid);
/* identify the local unit & virt cable to loopback port */
@@ -2236,7 +2242,7 @@ static void fwserial_remove_peer(struct fwtty_peer *peer)
list_del_rcu(&peer->list);
- fwtty_info(&peer->unit, "peer removed (guid:%016llx)",
+ fwtty_info(&peer->unit, "peer removed (guid:%016llx)\n",
(unsigned long long)peer->guid);
spin_unlock_bh(&peer->lock);
@@ -2324,7 +2330,7 @@ static int fwserial_create(struct fw_unit *unit)
err = fwtty_ports_add(serial);
if (err) {
- fwtty_err(&unit, "no space in port table");
+ fwtty_err(&unit, "no space in port table\n");
goto free_ports;
}
@@ -2335,7 +2341,8 @@ static int fwserial_create(struct fw_unit *unit)
card->device);
if (IS_ERR(tty_dev)) {
err = PTR_ERR(tty_dev);
- fwtty_err(&unit, "register tty device error (%d)", err);
+ fwtty_err(&unit, "register tty device error (%d)\n",
+ err);
goto unregister_ttys;
}
@@ -2352,7 +2359,8 @@ static int fwserial_create(struct fw_unit *unit)
card->device);
if (IS_ERR(loop_dev)) {
err = PTR_ERR(loop_dev);
- fwtty_err(&unit, "create loop device failed (%d)", err);
+ fwtty_err(&unit, "create loop device failed (%d)\n",
+ err);
goto unregister_ttys;
}
serial->ports[j]->device = loop_dev;
@@ -2372,14 +2380,14 @@ static int fwserial_create(struct fw_unit *unit)
list_add_rcu(&serial->list, &fwserial_list);
- fwtty_notice(&unit, "TTY over FireWire on device %s (guid %016llx)",
+ fwtty_notice(&unit, "TTY over FireWire on device %s (guid %016llx)\n",
dev_name(card->device), (unsigned long long) card->guid);
err = fwserial_add_peer(serial, unit);
if (!err)
return 0;
- fwtty_err(&unit, "unable to add peer unit device (%d)", err);
+ fwtty_err(&unit, "unable to add peer unit device (%d)\n", err);
/* fall-through to error processing */
debugfs_remove_recursive(serial->debugfs);
@@ -2621,7 +2629,7 @@ static void fwserial_handle_plug_req(struct work_struct *work)
switch (peer->state) {
case FWPS_NOT_ATTACHED:
if (!port) {
- fwtty_err(&peer->unit, "no more ports avail");
+ fwtty_err(&peer->unit, "no more ports avail\n");
fill_plug_rsp_nack(pkt);
} else {
peer->port = port;
@@ -2663,7 +2671,7 @@ static void fwserial_handle_plug_req(struct work_struct *work)
fwtty_write_port_status(tmp);
spin_lock_bh(&peer->lock);
} else {
- fwtty_err(&peer->unit, "PLUG_RSP error (%d)", rcode);
+ fwtty_err(&peer->unit, "PLUG_RSP error (%d)\n", rcode);
port = peer_revert_state(peer);
}
}
@@ -2715,7 +2723,8 @@ static void fwserial_handle_unplug_req(struct work_struct *work)
spin_lock_bh(&peer->lock);
if (peer->state == FWPS_UNPLUG_RESPONDING) {
if (rcode != RCODE_COMPLETE)
- fwtty_err(&peer->unit, "UNPLUG_RSP error (%d)", rcode);
+ fwtty_err(&peer->unit, "UNPLUG_RSP error (%d)\n",
+ rcode);
port = peer_revert_state(peer);
}
cleanup:
@@ -2750,19 +2759,19 @@ static int fwserial_parse_mgmt_write(struct fwtty_peer *peer,
* already removed from the bus -- and the removal was
* processed before we rec'd this transaction
*/
- fwtty_err(&peer->unit, "peer already removed");
+ fwtty_err(&peer->unit, "peer already removed\n");
spin_unlock_bh(&peer->lock);
return RCODE_ADDRESS_ERROR;
}
rcode = RCODE_COMPLETE;
- fwtty_dbg(&peer->unit, "mgmt: hdr.code: %04hx", pkt->hdr.code);
+ fwtty_dbg(&peer->unit, "mgmt: hdr.code: %04hx\n", pkt->hdr.code);
switch (be16_to_cpu(pkt->hdr.code) & FWSC_CODE_MASK) {
case FWSC_VIRT_CABLE_PLUG:
if (work_pending(&peer->work)) {
- fwtty_err(&peer->unit, "plug req: busy");
+ fwtty_err(&peer->unit, "plug req: busy\n");
rcode = RCODE_CONFLICT_ERROR;
} else {
@@ -2777,7 +2786,7 @@ static int fwserial_parse_mgmt_write(struct fwtty_peer *peer,
rcode = RCODE_CONFLICT_ERROR;
} else if (be16_to_cpu(pkt->hdr.code) & FWSC_RSP_NACK) {
- fwtty_notice(&peer->unit, "NACK plug rsp");
+ fwtty_notice(&peer->unit, "NACK plug rsp\n");
port = peer_revert_state(peer);
} else {
@@ -2793,7 +2802,7 @@ static int fwserial_parse_mgmt_write(struct fwtty_peer *peer,
case FWSC_VIRT_CABLE_UNPLUG:
if (work_pending(&peer->work)) {
- fwtty_err(&peer->unit, "unplug req: busy");
+ fwtty_err(&peer->unit, "unplug req: busy\n");
rcode = RCODE_CONFLICT_ERROR;
} else {
PREPARE_WORK(&peer->work, fwserial_handle_unplug_req);
@@ -2806,14 +2815,14 @@ static int fwserial_parse_mgmt_write(struct fwtty_peer *peer,
rcode = RCODE_CONFLICT_ERROR;
else {
if (be16_to_cpu(pkt->hdr.code) & FWSC_RSP_NACK)
- fwtty_notice(&peer->unit, "NACK unplug?");
+ fwtty_notice(&peer->unit, "NACK unplug?\n");
port = peer_revert_state(peer);
reset = true;
}
break;
default:
- fwtty_err(&peer->unit, "unknown mgmt code %d",
+ fwtty_err(&peer->unit, "unknown mgmt code %d\n",
be16_to_cpu(pkt->hdr.code));
rcode = RCODE_DATA_ERROR;
}
@@ -2847,7 +2856,7 @@ static void fwserial_mgmt_handler(struct fw_card *card,
rcu_read_lock();
peer = __fwserial_peer_by_node_id(card, generation, source);
if (!peer) {
- fwtty_dbg(card, "peer(%d:%x) not found", generation, source);
+ fwtty_dbg(card, "peer(%d:%x) not found\n", generation, source);
__dump_peer_list(card);
rcode = RCODE_CONFLICT_ERROR;
@@ -2897,7 +2906,7 @@ static int __init fwserial_init(void)
err = tty_register_driver(fwtty_driver);
if (err) {
- driver_err("register tty driver failed (%d)", err);
+ pr_err("register tty driver failed (%d)\n", err);
goto put_tty;
}
@@ -2922,7 +2931,7 @@ static int __init fwserial_init(void)
err = tty_register_driver(fwloop_driver);
if (err) {
- driver_err("register loop driver failed (%d)", err);
+ pr_err("register loop driver failed (%d)\n", err);
goto put_loop;
}
}
@@ -2948,7 +2957,7 @@ static int __init fwserial_init(void)
err = fw_core_add_address_handler(&fwserial_mgmt_addr_handler,
&fwserial_mgmt_addr_region);
if (err) {
- driver_err("add management handler failed (%d)", err);
+ pr_err("add management handler failed (%d)\n", err);
goto destroy_cache;
}
@@ -2956,13 +2965,13 @@ static int __init fwserial_init(void)
FW_UNIT_ADDRESS(fwserial_mgmt_addr_handler.offset);
err = fw_core_add_descriptor(&fwserial_unit_directory);
if (err) {
- driver_err("add unit descriptor failed (%d)", err);
+ pr_err("add unit descriptor failed (%d)\n", err);
goto remove_handler;
}
err = driver_register(&fwserial_driver.driver);
if (err) {
- driver_err("register fwserial driver failed (%d)", err);
+ pr_err("register fwserial driver failed (%d)\n", err);
goto remove_descriptor;
}
diff --git a/drivers/staging/fwserial/fwserial.h b/drivers/staging/fwserial/fwserial.h
index 514f57173259..24635014a2ac 100644
--- a/drivers/staging/fwserial/fwserial.h
+++ b/drivers/staging/fwserial/fwserial.h
@@ -356,8 +356,6 @@ static const char loop_dev_name[] = "fwloop";
extern struct tty_driver *fwtty_driver;
-#define driver_err(s, v...) pr_err(KBUILD_MODNAME ": " s, ##v)
-
struct fwtty_port *fwtty_port_get(unsigned index);
void fwtty_port_put(struct fwtty_port *port);
diff --git a/drivers/staging/gdm72xx/Kconfig b/drivers/staging/gdm72xx/Kconfig
index 69059138de4a..dd8a3913f6b9 100644
--- a/drivers/staging/gdm72xx/Kconfig
+++ b/drivers/staging/gdm72xx/Kconfig
@@ -4,7 +4,7 @@
menuconfig WIMAX_GDM72XX
tristate "GCT GDM72xx WiMAX support"
- depends on NET
+ depends on NET && (USB || MMC)
help
Support for the GCT GDM72xx WiMAX chip
@@ -19,7 +19,7 @@ config WIMAX_GDM72XX_K_MODE
default n
config WIMAX_GDM72XX_WIMAX2
- bool "Enable WIMAX2 support"
+ bool "Enable WiMAX2 support"
default n
choice
@@ -27,18 +27,18 @@ choice
config WIMAX_GDM72XX_USB
bool "USB interface"
- depends on USB
+ depends on (USB = y || USB = WIMAX_GDM72XX)
config WIMAX_GDM72XX_SDIO
bool "SDIO interface"
- depends on MMC
+ depends on (MMC = y || MMC = WIMAX_GDM72XX)
endchoice
if WIMAX_GDM72XX_USB
config WIMAX_GDM72XX_USB_PM
- bool "Enable power managerment support"
+ bool "Enable power management support"
depends on PM_RUNTIME
endif # WIMAX_GDM72XX_USB
diff --git a/drivers/staging/gdm72xx/gdm_wimax.c b/drivers/staging/gdm72xx/gdm_wimax.c
index 41efbeeb62f1..dd854975db7d 100644
--- a/drivers/staging/gdm72xx/gdm_wimax.c
+++ b/drivers/staging/gdm72xx/gdm_wimax.c
@@ -939,8 +939,7 @@ int register_wimax_device(struct phy_dev *phy_dev, struct device *pdev)
struct net_device *dev;
int ret;
- dev = (struct net_device *)alloc_netdev(sizeof(*nic),
- "wm%d", ether_setup);
+ dev = alloc_netdev(sizeof(*nic), "wm%d", ether_setup);
if (dev == NULL) {
pr_err("alloc_etherdev failed\n");
diff --git a/drivers/staging/goldfish/goldfish_audio.c b/drivers/staging/goldfish/goldfish_audio.c
index d3bed21f4072..f96dcec740ae 100644
--- a/drivers/staging/goldfish/goldfish_audio.c
+++ b/drivers/staging/goldfish/goldfish_audio.c
@@ -1,4 +1,5 @@
-/* drivers/misc/goldfish_audio.c
+/*
+ * drivers/misc/goldfish_audio.c
*
* Copyright (C) 2007 Google, Inc.
* Copyright (C) 2012 Intel, Inc.
@@ -47,10 +48,11 @@ struct goldfish_audio {
int read_supported; /* true if we have audio input support */
};
-/* We will allocate two read buffers and two write buffers.
- Having two read buffers facilitate stereo -> mono conversion.
- Having two write buffers facilitate interleaved IO.
-*/
+/*
+ * We will allocate two read buffers and two write buffers.
+ * Having two read buffers facilitate stereo -> mono conversion.
+ * Having two write buffers facilitate interleaved IO.
+ */
#define READ_BUFFER_SIZE 16384
#define WRITE_BUFFER_SIZE 16384
#define COMBINED_BUFFER_SIZE ((2 * READ_BUFFER_SIZE) + \
@@ -59,8 +61,10 @@ struct goldfish_audio {
#define AUDIO_READ(data, addr) (readl(data->reg_base + addr))
#define AUDIO_WRITE(data, addr, x) (writel(x, data->reg_base + addr))
-/* temporary variable used between goldfish_audio_probe() and
- goldfish_audio_open() */
+/*
+ * temporary variable used between goldfish_audio_probe() and
+ * goldfish_audio_open()
+ */
static struct goldfish_audio *audio_data;
enum {
@@ -161,8 +165,10 @@ static ssize_t goldfish_audio_write(struct file *fp, const char __user *buf,
}
spin_lock_irqsave(&data->lock, irq_flags);
- /* clear the buffer empty flag, and signal the emulator
- * to start writing the buffer */
+ /*
+ * clear the buffer empty flag, and signal the emulator
+ * to start writing the buffer
+ */
if (kbuf == data->write_buffer1) {
data->buffer_status &= ~AUDIO_INT_WRITE_BUFFER_1_EMPTY;
AUDIO_WRITE(data, AUDIO_WRITE_BUFFER_1, copy);
@@ -225,8 +231,10 @@ static irqreturn_t goldfish_audio_interrupt(int irq, void *dev_id)
/* read buffer status flags */
status = AUDIO_READ(data, AUDIO_INT_STATUS);
status &= AUDIO_INT_MASK;
- /* if buffers are newly empty, wake up blocked
- goldfish_audio_write() call */
+ /*
+ * if buffers are newly empty, wake up blocked
+ * goldfish_audio_write() call
+ */
if (status) {
data->buffer_status = status;
wake_up(&data->wait);
diff --git a/drivers/staging/goldfish/goldfish_nand.c b/drivers/staging/goldfish/goldfish_nand.c
index ab1f01952b48..81e2ad4038fe 100644
--- a/drivers/staging/goldfish/goldfish_nand.c
+++ b/drivers/staging/goldfish/goldfish_nand.c
@@ -326,9 +326,10 @@ static int goldfish_nand_init_device(struct platform_device *pdev,
(mtd->writesize + mtd->oobsize) * mtd->writesize;
do_div(mtd->size, mtd->writesize + mtd->oobsize);
mtd->size *= mtd->writesize;
- dev_dbg(&pdev->dev,
+ dev_dbg(&pdev->dev,
"goldfish nand dev%d: size %llx, page %d, extra %d, erase %d\n",
- id, mtd->size, mtd->writesize, mtd->oobsize, mtd->erasesize);
+ id, mtd->size, mtd->writesize,
+ mtd->oobsize, mtd->erasesize);
spin_unlock_irqrestore(&nand->lock, irq_flags);
mtd->priv = nand;
@@ -340,7 +341,7 @@ static int goldfish_nand_init_device(struct platform_device *pdev,
result = goldfish_nand_cmd(mtd, NAND_CMD_GET_DEV_NAME, 0, name_len,
name);
if (result != name_len) {
- dev_err(&pdev->dev,
+ dev_err(&pdev->dev,
"goldfish_nand_init_device failed to get dev name %d != %d\n",
result, name_len);
return -ENODEV;
@@ -391,7 +392,7 @@ static int goldfish_nand_probe(struct platform_device *pdev)
version = readl(base + NAND_VERSION);
if (version != NAND_VERSION_CURRENT) {
- dev_err(&pdev->dev,
+ dev_err(&pdev->dev,
"goldfish_nand_init: version mismatch, got %d, expected %d\n",
version, NAND_VERSION_CURRENT);
return -ENODEV;
@@ -400,7 +401,7 @@ static int goldfish_nand_probe(struct platform_device *pdev)
if (num_dev == 0)
return -ENODEV;
- nand = devm_kzalloc(&pdev->dev, sizeof(*nand) +
+ nand = devm_kzalloc(&pdev->dev, sizeof(*nand) +
sizeof(struct mtd_info) * num_dev, GFP_KERNEL);
if (nand == NULL)
return -ENOMEM;
diff --git a/drivers/staging/goldfish/goldfish_nand_reg.h b/drivers/staging/goldfish/goldfish_nand_reg.h
index 956c6c304b6e..ddfda71ab27a 100644
--- a/drivers/staging/goldfish/goldfish_nand_reg.h
+++ b/drivers/staging/goldfish/goldfish_nand_reg.h
@@ -1,27 +1,30 @@
-/* drivers/mtd/devices/goldfish_nand_reg.h
-**
-** Copyright (C) 2007 Google, Inc.
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-** GNU General Public License for more details.
-**
-*/
+/*
+ * drivers/mtd/devices/goldfish_nand_reg.h
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
#ifndef GOLDFISH_NAND_REG_H
#define GOLDFISH_NAND_REG_H
enum nand_cmd {
- NAND_CMD_GET_DEV_NAME, /* Write device name for NAND_DEV to NAND_DATA (vaddr) */
+ /* Write device name for NAND_DEV to NAND_DATA (vaddr) */
+ NAND_CMD_GET_DEV_NAME,
NAND_CMD_READ,
NAND_CMD_WRITE,
NAND_CMD_ERASE,
- NAND_CMD_BLOCK_BAD_GET, /* NAND_RESULT is 1 if block is bad, 0 if it is not */
+ /* NAND_RESULT is 1 if block is bad, 0 if it is not */
+ NAND_CMD_BLOCK_BAD_GET,
NAND_CMD_BLOCK_BAD_SET,
NAND_CMD_READ_WITH_PARAMS,
NAND_CMD_WRITE_WITH_PARAMS,
diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c
index 504701940585..3283e2829536 100644
--- a/drivers/staging/iio/adc/ad7192.c
+++ b/drivers/staging/iio/adc/ad7192.c
@@ -326,7 +326,7 @@ static ssize_t ad7192_write_frequency(struct device *dev,
unsigned long lval;
int div, ret;
- ret = strict_strtoul(buf, 10, &lval);
+ ret = kstrtoul(buf, 10, &lval);
if (ret)
return ret;
if (lval == 0)
diff --git a/drivers/staging/iio/adc/ad7280a.c b/drivers/staging/iio/adc/ad7280a.c
index 2fd6ee3c1902..c19618bc37c4 100644
--- a/drivers/staging/iio/adc/ad7280a.c
+++ b/drivers/staging/iio/adc/ad7280a.c
@@ -632,7 +632,7 @@ static ssize_t ad7280_write_channel_config(struct device *dev,
long val;
int ret;
- ret = strict_strtol(buf, 10, &val);
+ ret = kstrtol(buf, 10, &val);
if (ret)
return ret;
diff --git a/drivers/staging/iio/adc/ad7291.c b/drivers/staging/iio/adc/ad7291.c
index d088c662d5cd..3fc79e582750 100644
--- a/drivers/staging/iio/adc/ad7291.c
+++ b/drivers/staging/iio/adc/ad7291.c
@@ -21,6 +21,8 @@
#include <linux/iio/sysfs.h>
#include <linux/iio/events.h>
+#include "ad7291.h"
+
/*
* Simplified handling
*
@@ -39,33 +41,9 @@
#define AD7291_VOLTAGE 0x01
#define AD7291_T_SENSE 0x02
#define AD7291_T_AVERAGE 0x03
-#define AD7291_CH0_DATA_HIGH 0x04
-#define AD7291_CH0_DATA_LOW 0x05
-#define AD7291_CH0_HYST 0x06
-#define AD7291_CH1_DATA_HIGH 0x07
-#define AD7291_CH1_DATA_LOW 0x08
-#define AD7291_CH1_HYST 0x09
-#define AD7291_CH2_DATA_HIGH 0x0A
-#define AD7291_CH2_DATA_LOW 0x0B
-#define AD7291_CH2_HYST 0x0C
-#define AD7291_CH3_DATA_HIGH 0x0D
-#define AD7291_CH3_DATA_LOW 0x0E
-#define AD7291_CH3_HYST 0x0F
-#define AD7291_CH4_DATA_HIGH 0x10
-#define AD7291_CH4_DATA_LOW 0x11
-#define AD7291_CH4_HYST 0x12
-#define AD7291_CH5_DATA_HIGH 0x13
-#define AD7291_CH5_DATA_LOW 0x14
-#define AD7291_CH5_HYST 0x15
-#define AD7291_CH6_DATA_HIGH 0x16
-#define AD7291_CH6_DATA_LOW 0x17
-#define AD7291_CH6_HYST 0x18
-#define AD7291_CH7_DATA_HIGH 0x19
-#define AD7291_CH7_DATA_LOW 0x1A
-#define AD7291_CH7_HYST 0x2B
-#define AD7291_T_SENSE_HIGH 0x1C
-#define AD7291_T_SENSE_LOW 0x1D
-#define AD7291_T_SENSE_HYST 0x1E
+#define AD7291_DATA_HIGH(x) ((x) * 3 + 0x4)
+#define AD7291_DATA_LOW(x) ((x) * 3 + 0x5)
+#define AD7291_HYST(x) ((x) * 3 + 0x6)
#define AD7291_VOLTAGE_ALERT_STATUS 0x1F
#define AD7291_T_ALERT_STATUS 0x20
@@ -100,7 +78,6 @@
struct ad7291_chip_info {
struct i2c_client *client;
struct regulator *reg;
- u16 int_vref_mv;
u16 command;
u16 c_mask; /* Active voltage channels for events */
struct mutex state_lock;
@@ -111,45 +88,22 @@ static int ad7291_i2c_read(struct ad7291_chip_info *chip, u8 reg, u16 *data)
struct i2c_client *client = chip->client;
int ret = 0;
- ret = i2c_smbus_read_word_data(client, reg);
+ ret = i2c_smbus_read_word_swapped(client, reg);
if (ret < 0) {
dev_err(&client->dev, "I2C read error\n");
return ret;
}
- *data = swab16((u16)ret);
+ *data = ret;
return 0;
}
static int ad7291_i2c_write(struct ad7291_chip_info *chip, u8 reg, u16 data)
{
- return i2c_smbus_write_word_data(chip->client, reg, swab16(data));
-}
-
-static ssize_t ad7291_store_reset(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t len)
-{
- struct iio_dev *indio_dev = dev_to_iio_dev(dev);
- struct ad7291_chip_info *chip = iio_priv(indio_dev);
-
- return ad7291_i2c_write(chip, AD7291_COMMAND,
- chip->command | AD7291_RESET);
+ return i2c_smbus_write_word_swapped(chip->client, reg, data);
}
-static IIO_DEVICE_ATTR(reset, S_IWUSR, NULL, ad7291_store_reset, 0);
-
-static struct attribute *ad7291_attributes[] = {
- &iio_dev_attr_reset.dev_attr.attr,
- NULL,
-};
-
-static const struct attribute_group ad7291_attribute_group = {
- .attrs = ad7291_attributes,
-};
-
static irqreturn_t ad7291_event_handler(int irq, void *private)
{
struct iio_dev *indio_dev = private;
@@ -255,31 +209,31 @@ static inline ssize_t ad7291_set_hyst(struct device *dev,
static IIO_DEVICE_ATTR(in_temp0_thresh_both_hyst_raw,
S_IRUGO | S_IWUSR,
ad7291_show_hyst, ad7291_set_hyst,
- AD7291_T_SENSE_HYST);
+ AD7291_HYST(8));
static IIO_DEVICE_ATTR(in_voltage0_thresh_both_hyst_raw,
S_IRUGO | S_IWUSR,
- ad7291_show_hyst, ad7291_set_hyst, AD7291_CH0_HYST);
+ ad7291_show_hyst, ad7291_set_hyst, AD7291_HYST(0));
static IIO_DEVICE_ATTR(in_voltage1_thresh_both_hyst_raw,
S_IRUGO | S_IWUSR,
- ad7291_show_hyst, ad7291_set_hyst, AD7291_CH1_HYST);
+ ad7291_show_hyst, ad7291_set_hyst, AD7291_HYST(1));
static IIO_DEVICE_ATTR(in_voltage2_thresh_both_hyst_raw,
S_IRUGO | S_IWUSR,
- ad7291_show_hyst, ad7291_set_hyst, AD7291_CH2_HYST);
+ ad7291_show_hyst, ad7291_set_hyst, AD7291_HYST(2));
static IIO_DEVICE_ATTR(in_voltage3_thresh_both_hyst_raw,
S_IRUGO | S_IWUSR,
- ad7291_show_hyst, ad7291_set_hyst, AD7291_CH3_HYST);
+ ad7291_show_hyst, ad7291_set_hyst, AD7291_HYST(3));
static IIO_DEVICE_ATTR(in_voltage4_thresh_both_hyst_raw,
S_IRUGO | S_IWUSR,
- ad7291_show_hyst, ad7291_set_hyst, AD7291_CH4_HYST);
+ ad7291_show_hyst, ad7291_set_hyst, AD7291_HYST(4));
static IIO_DEVICE_ATTR(in_voltage5_thresh_both_hyst_raw,
S_IRUGO | S_IWUSR,
- ad7291_show_hyst, ad7291_set_hyst, AD7291_CH5_HYST);
+ ad7291_show_hyst, ad7291_set_hyst, AD7291_HYST(5));
static IIO_DEVICE_ATTR(in_voltage6_thresh_both_hyst_raw,
S_IRUGO | S_IWUSR,
- ad7291_show_hyst, ad7291_set_hyst, AD7291_CH6_HYST);
+ ad7291_show_hyst, ad7291_set_hyst, AD7291_HYST(6));
static IIO_DEVICE_ATTR(in_voltage7_thresh_both_hyst_raw,
S_IRUGO | S_IWUSR,
- ad7291_show_hyst, ad7291_set_hyst, AD7291_CH7_HYST);
+ ad7291_show_hyst, ad7291_set_hyst, AD7291_HYST(7));
static struct attribute *ad7291_event_attributes[] = {
&iio_dev_attr_in_temp0_thresh_both_hyst_raw.dev_attr.attr,
@@ -294,53 +248,45 @@ static struct attribute *ad7291_event_attributes[] = {
NULL,
};
-/* high / low */
-static u8 ad7291_limit_regs[9][2] = {
- { AD7291_CH0_DATA_HIGH, AD7291_CH0_DATA_LOW },
- { AD7291_CH1_DATA_HIGH, AD7291_CH1_DATA_LOW },
- { AD7291_CH2_DATA_HIGH, AD7291_CH2_DATA_LOW },
- { AD7291_CH3_DATA_HIGH, AD7291_CH3_DATA_LOW }, /* FIXME: ? */
- { AD7291_CH4_DATA_HIGH, AD7291_CH4_DATA_LOW },
- { AD7291_CH5_DATA_HIGH, AD7291_CH5_DATA_LOW },
- { AD7291_CH6_DATA_HIGH, AD7291_CH6_DATA_LOW },
- { AD7291_CH7_DATA_HIGH, AD7291_CH7_DATA_LOW },
- /* temp */
- { AD7291_T_SENSE_HIGH, AD7291_T_SENSE_LOW },
-};
+static unsigned int ad7291_threshold_reg(u64 event_code)
+{
+ unsigned int offset;
+
+ switch (IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(event_code)) {
+ case IIO_VOLTAGE:
+ offset = IIO_EVENT_CODE_EXTRACT_CHAN(event_code);
+ break;
+ case IIO_TEMP:
+ offset = 8;
+ break;
+ default:
+ return 0;
+ }
+
+ if (IIO_EVENT_CODE_EXTRACT_DIR(event_code) == IIO_EV_DIR_FALLING)
+ return AD7291_DATA_LOW(offset);
+ else
+ return AD7291_DATA_HIGH(offset);
+}
static int ad7291_read_event_value(struct iio_dev *indio_dev,
u64 event_code,
int *val)
{
struct ad7291_chip_info *chip = iio_priv(indio_dev);
-
int ret;
- u8 reg;
u16 uval;
- s16 signval;
+
+ ret = ad7291_i2c_read(chip, ad7291_threshold_reg(event_code), &uval);
+ if (ret < 0)
+ return ret;
switch (IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(event_code)) {
case IIO_VOLTAGE:
- reg = ad7291_limit_regs[IIO_EVENT_CODE_EXTRACT_CHAN(event_code)]
- [!(IIO_EVENT_CODE_EXTRACT_DIR(event_code) ==
- IIO_EV_DIR_RISING)];
-
- ret = ad7291_i2c_read(chip, reg, &uval);
- if (ret < 0)
- return ret;
*val = uval & AD7291_VALUE_MASK;
return 0;
-
case IIO_TEMP:
- reg = ad7291_limit_regs[8]
- [!(IIO_EVENT_CODE_EXTRACT_DIR(event_code) ==
- IIO_EV_DIR_RISING)];
-
- ret = ad7291_i2c_read(chip, reg, &signval);
- if (ret < 0)
- return ret;
- signval = (s16)((signval & AD7291_VALUE_MASK) << 4) >> 4;
- *val = signval;
+ *val = sign_extend32(uval, 11);
return 0;
default:
return -EINVAL;
@@ -352,28 +298,21 @@ static int ad7291_write_event_value(struct iio_dev *indio_dev,
int val)
{
struct ad7291_chip_info *chip = iio_priv(indio_dev);
- u8 reg;
- s16 signval;
switch (IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(event_code)) {
case IIO_VOLTAGE:
if (val > AD7291_VALUE_MASK || val < 0)
return -EINVAL;
- reg = ad7291_limit_regs[IIO_EVENT_CODE_EXTRACT_CHAN(event_code)]
- [!(IIO_EVENT_CODE_EXTRACT_DIR(event_code) ==
- IIO_EV_DIR_RISING)];
- return ad7291_i2c_write(chip, reg, val);
+ break;
case IIO_TEMP:
if (val > 2047 || val < -2048)
return -EINVAL;
- reg = ad7291_limit_regs[8]
- [!(IIO_EVENT_CODE_EXTRACT_DIR(event_code) ==
- IIO_EV_DIR_RISING)];
- signval = val;
- return ad7291_i2c_write(chip, reg, *(u16 *)&signval);
+ break;
default:
return -EINVAL;
- };
+ }
+
+ return ad7291_i2c_write(chip, ad7291_threshold_reg(event_code), val);
}
static int ad7291_read_event_config(struct iio_dev *indio_dev,
@@ -456,9 +395,7 @@ static int ad7291_read_raw(struct iio_dev *indio_dev,
{
int ret;
struct ad7291_chip_info *chip = iio_priv(indio_dev);
- unsigned int scale_uv;
u16 regval;
- s16 signval;
switch (mask) {
case IIO_CHAN_INFO_RAW:
@@ -479,44 +416,47 @@ static int ad7291_read_raw(struct iio_dev *indio_dev,
return ret;
}
/* Read voltage */
- ret = i2c_smbus_read_word_data(chip->client,
+ ret = i2c_smbus_read_word_swapped(chip->client,
AD7291_VOLTAGE);
if (ret < 0) {
mutex_unlock(&chip->state_lock);
return ret;
}
- *val = swab16((u16)ret) & AD7291_VALUE_MASK;
+ *val = ret & AD7291_VALUE_MASK;
mutex_unlock(&chip->state_lock);
return IIO_VAL_INT;
case IIO_TEMP:
/* Assumes tsense bit of command register always set */
- ret = i2c_smbus_read_word_data(chip->client,
+ ret = i2c_smbus_read_word_swapped(chip->client,
AD7291_T_SENSE);
if (ret < 0)
return ret;
- signval = (s16)((swab16((u16)ret) &
- AD7291_VALUE_MASK) << 4) >> 4;
- *val = signval;
+ *val = sign_extend32(ret, 11);
return IIO_VAL_INT;
default:
return -EINVAL;
}
case IIO_CHAN_INFO_AVERAGE_RAW:
- ret = i2c_smbus_read_word_data(chip->client,
+ ret = i2c_smbus_read_word_swapped(chip->client,
AD7291_T_AVERAGE);
if (ret < 0)
return ret;
- signval = (s16)((swab16((u16)ret) &
- AD7291_VALUE_MASK) << 4) >> 4;
- *val = signval;
+ *val = sign_extend32(ret, 11);
return IIO_VAL_INT;
case IIO_CHAN_INFO_SCALE:
switch (chan->type) {
case IIO_VOLTAGE:
- scale_uv = (chip->int_vref_mv * 1000) >> AD7291_BITS;
- *val = scale_uv / 1000;
- *val2 = (scale_uv % 1000) * 1000;
- return IIO_VAL_INT_PLUS_MICRO;
+ if (chip->reg) {
+ int vref;
+ vref = regulator_get_voltage(chip->reg);
+ if (vref < 0)
+ return vref;
+ *val = vref / 1000;
+ } else {
+ *val = 2500;
+ }
+ *val2 = AD7291_BITS;
+ return IIO_VAL_FRACTIONAL_LOG2;
case IIO_TEMP:
/*
* One LSB of the ADC corresponds to 0.25 deg C.
@@ -571,7 +511,6 @@ static struct attribute_group ad7291_event_attribute_group = {
};
static const struct iio_info ad7291_info = {
- .attrs = &ad7291_attribute_group,
.read_raw = &ad7291_read_raw,
.read_event_config = &ad7291_read_event_config,
.write_event_config = &ad7291_write_event_config,
@@ -583,9 +522,10 @@ static const struct iio_info ad7291_info = {
static int ad7291_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
+ struct ad7291_platform_data *pdata = client->dev.platform_data;
struct ad7291_chip_info *chip;
struct iio_dev *indio_dev;
- int ret = 0, voltage_uv = 0;
+ int ret = 0;
indio_dev = iio_device_alloc(sizeof(*chip));
if (indio_dev == NULL) {
@@ -594,12 +534,14 @@ static int ad7291_probe(struct i2c_client *client,
}
chip = iio_priv(indio_dev);
- chip->reg = regulator_get(&client->dev, "vcc");
- if (!IS_ERR(chip->reg)) {
+ if (pdata && pdata->use_external_ref) {
+ chip->reg = regulator_get(&client->dev, "vref");
+ if (IS_ERR(chip->reg))
+ goto error_free;
+
ret = regulator_enable(chip->reg);
if (ret)
goto error_put_reg;
- voltage_uv = regulator_get_voltage(chip->reg);
}
mutex_init(&chip->state_lock);
@@ -612,12 +554,8 @@ static int ad7291_probe(struct i2c_client *client,
AD7291_T_SENSE_MASK | /* Tsense always enabled */
AD7291_ALERT_POLARITY; /* set irq polarity low level */
- if (voltage_uv) {
- chip->int_vref_mv = voltage_uv / 1000;
+ if (pdata && pdata->use_external_ref)
chip->command |= AD7291_EXT_REF;
- } else {
- chip->int_vref_mv = 2500; /* Build-in ref */
- }
indio_dev->name = id->name;
indio_dev->channels = ad7291_channels;
@@ -654,21 +592,18 @@ static int ad7291_probe(struct i2c_client *client,
if (ret)
goto error_unreg_irq;
- dev_info(&client->dev, "%s ADC registered.\n",
- id->name);
-
return 0;
error_unreg_irq:
if (client->irq)
free_irq(client->irq, indio_dev);
error_disable_reg:
- if (!IS_ERR(chip->reg))
+ if (chip->reg)
regulator_disable(chip->reg);
error_put_reg:
- if (!IS_ERR(chip->reg))
+ if (chip->reg)
regulator_put(chip->reg);
-
+error_free:
iio_device_free(indio_dev);
error_ret:
return ret;
@@ -684,7 +619,7 @@ static int ad7291_remove(struct i2c_client *client)
if (client->irq)
free_irq(client->irq, indio_dev);
- if (!IS_ERR(chip->reg)) {
+ if (chip->reg) {
regulator_disable(chip->reg);
regulator_put(chip->reg);
}
diff --git a/drivers/staging/iio/adc/ad7291.h b/drivers/staging/iio/adc/ad7291.h
new file mode 100644
index 000000000000..bbd89fa51188
--- /dev/null
+++ b/drivers/staging/iio/adc/ad7291.h
@@ -0,0 +1,12 @@
+#ifndef __IIO_AD7291_H__
+#define __IIO_AD7291_H__
+
+/**
+ * struct ad7291_platform_data - AD7291 platform data
+ * @use_external_ref: Whether to use an external or internal reference voltage
+ */
+struct ad7291_platform_data {
+ bool use_external_ref;
+};
+
+#endif
diff --git a/drivers/staging/iio/adc/ad7606_core.c b/drivers/staging/iio/adc/ad7606_core.c
index d104b4378424..72868ceda360 100644
--- a/drivers/staging/iio/adc/ad7606_core.c
+++ b/drivers/staging/iio/adc/ad7606_core.c
@@ -125,9 +125,12 @@ static ssize_t ad7606_store_range(struct device *dev,
struct iio_dev *indio_dev = dev_to_iio_dev(dev);
struct ad7606_state *st = iio_priv(indio_dev);
unsigned long lval;
+ int ret;
+
+ ret = kstrtoul(buf, 10, &lval);
+ if (ret)
+ return ret;
- if (strict_strtoul(buf, 10, &lval))
- return -EINVAL;
if (!(lval == 5000 || lval == 10000)) {
dev_err(dev, "range is not supported\n");
return -EINVAL;
@@ -173,8 +176,9 @@ static ssize_t ad7606_store_oversampling_ratio(struct device *dev,
unsigned long lval;
int ret;
- if (strict_strtoul(buf, 10, &lval))
- return -EINVAL;
+ ret = kstrtoul(buf, 10, &lval);
+ if (ret)
+ return ret;
ret = ad7606_oversampling_get_index(lval);
if (ret < 0) {
diff --git a/drivers/staging/iio/adc/ad7606_par.c b/drivers/staging/iio/adc/ad7606_par.c
index 58cfddea9637..8a48d18de788 100644
--- a/drivers/staging/iio/adc/ad7606_par.c
+++ b/drivers/staging/iio/adc/ad7606_par.c
@@ -112,8 +112,6 @@ static int ad7606_par_remove(struct platform_device *pdev)
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
release_mem_region(res->start, resource_size(res));
- platform_set_drvdata(pdev, NULL);
-
return 0;
}
diff --git a/drivers/staging/iio/adc/ad7816.c b/drivers/staging/iio/adc/ad7816.c
index 928477146c2f..8470036a3378 100644
--- a/drivers/staging/iio/adc/ad7816.c
+++ b/drivers/staging/iio/adc/ad7816.c
@@ -175,9 +175,9 @@ static ssize_t ad7816_store_channel(struct device *dev,
unsigned long data;
int ret;
- ret = strict_strtoul(buf, 10, &data);
+ ret = kstrtoul(buf, 10, &data);
if (ret)
- return -EINVAL;
+ return ret;
if (data > AD7816_CS_MAX && data != AD7816_CS_MASK) {
dev_err(&chip->spi_dev->dev, "Invalid channel id %lu for %s.\n",
@@ -290,7 +290,9 @@ static inline ssize_t ad7816_set_oti(struct device *dev,
u8 data;
int ret;
- ret = strict_strtol(buf, 10, &value);
+ ret = kstrtol(buf, 10, &value);
+ if (ret)
+ return ret;
if (chip->channel_id > AD7816_CS_MAX) {
dev_err(dev, "Invalid oti channel id %d.\n", chip->channel_id);
diff --git a/drivers/staging/iio/adc/ad799x_core.c b/drivers/staging/iio/adc/ad799x_core.c
index 8dc97b36e05a..2b2049c8bc6b 100644
--- a/drivers/staging/iio/adc/ad799x_core.c
+++ b/drivers/staging/iio/adc/ad799x_core.c
@@ -226,7 +226,7 @@ static ssize_t ad799x_write_frequency(struct device *dev,
int ret, i;
u8 t;
- ret = strict_strtol(buf, 10, &val);
+ ret = kstrtol(buf, 10, &val);
if (ret)
return ret;
@@ -337,7 +337,7 @@ static ssize_t ad799x_write_channel_config(struct device *dev,
long val;
int ret;
- ret = strict_strtol(buf, 10, &val);
+ ret = kstrtol(buf, 10, &val);
if (ret)
return ret;
diff --git a/drivers/staging/iio/adc/lpc32xx_adc.c b/drivers/staging/iio/adc/lpc32xx_adc.c
index 2f2f7fdd0691..9a4bb0999b51 100644
--- a/drivers/staging/iio/adc/lpc32xx_adc.c
+++ b/drivers/staging/iio/adc/lpc32xx_adc.c
@@ -215,7 +215,6 @@ static int lpc32xx_adc_remove(struct platform_device *pdev)
iio_device_unregister(iodev);
free_irq(irq, info);
- platform_set_drvdata(pdev, NULL);
clk_put(info->clk);
iounmap(info->adc_base);
iio_device_free(iodev);
diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c
index 163c638e4095..d92c97a59d61 100644
--- a/drivers/staging/iio/adc/mxs-lradc.c
+++ b/drivers/staging/iio/adc/mxs-lradc.c
@@ -620,7 +620,7 @@ static irqreturn_t mxs_lradc_trigger_handler(int irq, void *p)
((LRADC_DELAY_TIMER_LOOP - 1) << LRADC_CH_NUM_SAMPLES_OFFSET);
unsigned int i, j = 0;
- for_each_set_bit(i, iio->active_scan_mask, iio->masklength) {
+ for_each_set_bit(i, iio->active_scan_mask, LRADC_MAX_TOTAL_CHANS) {
lradc->buffer[j] = readl(lradc->base + LRADC_CH(j));
writel(chan_value, lradc->base + LRADC_CH(j));
lradc->buffer[j] &= LRADC_CH_VALUE_MASK;
@@ -774,8 +774,7 @@ static bool mxs_lradc_validate_scan_mask(struct iio_dev *iio,
const unsigned long *mask)
{
struct mxs_lradc *lradc = iio_priv(iio);
- const int len = iio->masklength;
- const int map_chans = bitmap_weight(mask, len);
+ const int map_chans = bitmap_weight(mask, LRADC_MAX_TOTAL_CHANS);
int rsvd_chans = 0;
unsigned long rsvd_mask = 0;
@@ -792,7 +791,7 @@ static bool mxs_lradc_validate_scan_mask(struct iio_dev *iio,
rsvd_chans++;
/* Test for attempts to map channels with special mode of operation. */
- if (bitmap_intersects(mask, &rsvd_mask, len))
+ if (bitmap_intersects(mask, &rsvd_mask, LRADC_MAX_TOTAL_CHANS))
return false;
/* Test for attempts to map more channels then available slots. */
@@ -968,6 +967,7 @@ static int mxs_lradc_probe(struct platform_device *pdev)
iio->modes = INDIO_DIRECT_MODE;
iio->channels = mxs_lradc_chan_spec;
iio->num_channels = ARRAY_SIZE(mxs_lradc_chan_spec);
+ iio->masklength = LRADC_MAX_TOTAL_CHANS;
ret = iio_triggered_buffer_setup(iio, &iio_pollfunc_store_time,
&mxs_lradc_trigger_handler,
diff --git a/drivers/staging/iio/adc/spear_adc.c b/drivers/staging/iio/adc/spear_adc.c
index f45da4266950..736219c30308 100644
--- a/drivers/staging/iio/adc/spear_adc.c
+++ b/drivers/staging/iio/adc/spear_adc.c
@@ -407,7 +407,6 @@ static int spear_adc_remove(struct platform_device *pdev)
struct spear_adc_info *info = iio_priv(iodev);
iio_device_unregister(iodev);
- platform_set_drvdata(pdev, NULL);
clk_disable_unprepare(info->clk);
clk_put(info->clk);
iounmap(info->adc_base_spear6xx);
@@ -416,11 +415,13 @@ static int spear_adc_remove(struct platform_device *pdev)
return 0;
}
+#ifdef CONFIG_OF
static const struct of_device_id spear_adc_dt_ids[] = {
{ .compatible = "st,spear600-adc", },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, spear_adc_dt_ids);
+#endif
static struct platform_driver spear_adc_driver = {
.probe = spear_adc_probe,
diff --git a/drivers/staging/iio/gyro/Kconfig b/drivers/staging/iio/gyro/Kconfig
index 836066287192..b4333715536e 100644
--- a/drivers/staging/iio/gyro/Kconfig
+++ b/drivers/staging/iio/gyro/Kconfig
@@ -10,13 +10,6 @@ config ADIS16060
Say yes here to build support for Analog Devices adis16060 wide bandwidth
yaw rate gyroscope with SPI.
-config ADIS16130
- tristate "Analog Devices ADIS16130 High Precision Angular Rate Sensor driver"
- depends on SPI
- help
- Say yes here to build support for Analog Devices ADIS16130 High Precision
- Angular Rate Sensor driver.
-
config ADIS16260
tristate "Analog Devices ADIS16260 Digital Gyroscope Sensor SPI driver"
depends on SPI
diff --git a/drivers/staging/iio/gyro/Makefile b/drivers/staging/iio/gyro/Makefile
index 98e650061a3a..975f95b141da 100644
--- a/drivers/staging/iio/gyro/Makefile
+++ b/drivers/staging/iio/gyro/Makefile
@@ -5,8 +5,5 @@
adis16060-y := adis16060_core.o
obj-$(CONFIG_ADIS16060) += adis16060.o
-adis16130-y := adis16130_core.o
-obj-$(CONFIG_ADIS16130) += adis16130.o
-
adis16260-y := adis16260_core.o
obj-$(CONFIG_ADIS16260) += adis16260.o
diff --git a/drivers/staging/iio/gyro/adis16130_core.c b/drivers/staging/iio/gyro/adis16130_core.c
deleted file mode 100644
index 531b803cb2ac..000000000000
--- a/drivers/staging/iio/gyro/adis16130_core.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * ADIS16130 Digital Output, High Precision Angular Rate Sensor driver
- *
- * Copyright 2010 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#include <linux/delay.h>
-#include <linux/mutex.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/spi/spi.h>
-#include <linux/slab.h>
-#include <linux/sysfs.h>
-#include <linux/list.h>
-#include <linux/module.h>
-
-#include <linux/iio/iio.h>
-#include <linux/iio/sysfs.h>
-
-#define ADIS16130_CON 0x0
-#define ADIS16130_CON_RD (1 << 6)
-#define ADIS16130_IOP 0x1
-
-/* 1 = data-ready signal low when unread data on all channels; */
-#define ADIS16130_IOP_ALL_RDY (1 << 3)
-#define ADIS16130_IOP_SYNC (1 << 0) /* 1 = synchronization enabled */
-#define ADIS16130_RATEDATA 0x8 /* Gyroscope output, rate of rotation */
-#define ADIS16130_TEMPDATA 0xA /* Temperature output */
-#define ADIS16130_RATECS 0x28 /* Gyroscope channel setup */
-#define ADIS16130_RATECS_EN (1 << 3) /* 1 = channel enable; */
-#define ADIS16130_TEMPCS 0x2A /* Temperature channel setup */
-#define ADIS16130_TEMPCS_EN (1 << 3)
-#define ADIS16130_RATECONV 0x30
-#define ADIS16130_TEMPCONV 0x32
-#define ADIS16130_MODE 0x38
-#define ADIS16130_MODE_24BIT (1 << 1) /* 1 = 24-bit resolution; */
-
-/**
- * struct adis16130_state - device instance specific data
- * @us: actual spi_device to write data
- * @buf_lock: mutex to protect tx and rx
- * @buf: unified tx/rx buffer
- **/
-struct adis16130_state {
- struct spi_device *us;
- struct mutex buf_lock;
- u8 buf[4] ____cacheline_aligned;
-};
-
-static int adis16130_spi_read(struct iio_dev *indio_dev, u8 reg_addr, u32 *val)
-{
- int ret;
- struct adis16130_state *st = iio_priv(indio_dev);
- struct spi_message msg;
- struct spi_transfer xfer = {
- .tx_buf = st->buf,
- .rx_buf = st->buf,
- .len = 4,
- };
-
- mutex_lock(&st->buf_lock);
-
- st->buf[0] = ADIS16130_CON_RD | reg_addr;
- st->buf[1] = st->buf[2] = st->buf[3] = 0;
-
- spi_message_init(&msg);
- spi_message_add_tail(&xfer, &msg);
- ret = spi_sync(st->us, &msg);
- ret = spi_read(st->us, st->buf, 4);
-
- if (ret == 0)
- *val = (st->buf[1] << 16) | (st->buf[2] << 8) | st->buf[3];
- mutex_unlock(&st->buf_lock);
-
- return ret;
-}
-
-static int adis16130_read_raw(struct iio_dev *indio_dev,
- struct iio_chan_spec const *chan,
- int *val, int *val2,
- long mask)
-{
- int ret;
- u32 temp;
-
- /* Take the iio_dev status lock */
- mutex_lock(&indio_dev->mlock);
- ret = adis16130_spi_read(indio_dev, chan->address, &temp);
- mutex_unlock(&indio_dev->mlock);
- if (ret)
- return ret;
- *val = temp;
- return IIO_VAL_INT;
-}
-
-static const struct iio_chan_spec adis16130_channels[] = {
- {
- .type = IIO_ANGL_VEL,
- .modified = 1,
- .channel2 = IIO_MOD_Z,
- .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
- .address = ADIS16130_RATEDATA,
- }, {
- .type = IIO_TEMP,
- .indexed = 1,
- .channel = 0,
- .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
- .address = ADIS16130_TEMPDATA,
- }
-};
-
-static const struct iio_info adis16130_info = {
- .read_raw = &adis16130_read_raw,
- .driver_module = THIS_MODULE,
-};
-
-static int adis16130_probe(struct spi_device *spi)
-{
- int ret;
- struct adis16130_state *st;
- struct iio_dev *indio_dev;
-
- /* setup the industrialio driver allocated elements */
- indio_dev = iio_device_alloc(sizeof(*st));
- if (indio_dev == NULL) {
- ret = -ENOMEM;
- goto error_ret;
- }
- st = iio_priv(indio_dev);
- /* this is only used for removal purposes */
- spi_set_drvdata(spi, indio_dev);
- st->us = spi;
- mutex_init(&st->buf_lock);
- indio_dev->name = spi->dev.driver->name;
- indio_dev->channels = adis16130_channels;
- indio_dev->num_channels = ARRAY_SIZE(adis16130_channels);
- indio_dev->dev.parent = &spi->dev;
- indio_dev->info = &adis16130_info;
- indio_dev->modes = INDIO_DIRECT_MODE;
-
- ret = iio_device_register(indio_dev);
- if (ret)
- goto error_free_dev;
-
- return 0;
-
-error_free_dev:
- iio_device_free(indio_dev);
-
-error_ret:
- return ret;
-}
-
-/* fixme, confirm ordering in this function */
-static int adis16130_remove(struct spi_device *spi)
-{
- iio_device_unregister(spi_get_drvdata(spi));
- iio_device_free(spi_get_drvdata(spi));
-
- return 0;
-}
-
-static struct spi_driver adis16130_driver = {
- .driver = {
- .name = "adis16130",
- .owner = THIS_MODULE,
- },
- .probe = adis16130_probe,
- .remove = adis16130_remove,
-};
-module_spi_driver(adis16130_driver);
-
-MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");
-MODULE_DESCRIPTION("Analog Devices ADIS16130 High Precision Angular Rate");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("spi:adis16130");
diff --git a/drivers/staging/iio/trigger/Kconfig b/drivers/staging/iio/trigger/Kconfig
index 1a051da62505..2fd18c60323d 100644
--- a/drivers/staging/iio/trigger/Kconfig
+++ b/drivers/staging/iio/trigger/Kconfig
@@ -12,23 +12,6 @@ config IIO_PERIODIC_RTC_TRIGGER
Provides support for using periodic capable real time
clocks as IIO triggers.
-config IIO_GPIO_TRIGGER
- tristate "GPIO trigger"
- depends on GPIOLIB
- help
- Provides support for using GPIO pins as IIO triggers.
-
-config IIO_SYSFS_TRIGGER
- tristate "SYSFS trigger"
- depends on SYSFS
- select IRQ_WORK
- help
- Provides support for using SYSFS entry as IIO triggers.
- If unsure, say N (but it's safe to say "Y").
-
- To compile this driver as a module, choose M here: the
- module will be called iio-trig-sysfs.
-
config IIO_BFIN_TMR_TRIGGER
tristate "Blackfin TIMER trigger"
depends on BLACKFIN
diff --git a/drivers/staging/iio/trigger/Makefile b/drivers/staging/iio/trigger/Makefile
index b088b57da335..238481b78e72 100644
--- a/drivers/staging/iio/trigger/Makefile
+++ b/drivers/staging/iio/trigger/Makefile
@@ -3,6 +3,4 @@
#
obj-$(CONFIG_IIO_PERIODIC_RTC_TRIGGER) += iio-trig-periodic-rtc.o
-obj-$(CONFIG_IIO_GPIO_TRIGGER) += iio-trig-gpio.o
-obj-$(CONFIG_IIO_SYSFS_TRIGGER) += iio-trig-sysfs.o
obj-$(CONFIG_IIO_BFIN_TMR_TRIGGER) += iio-trig-bfin-timer.o
diff --git a/drivers/staging/iio/trigger/iio-trig-gpio.c b/drivers/staging/iio/trigger/iio-trig-gpio.c
deleted file mode 100644
index 7c593d18a910..000000000000
--- a/drivers/staging/iio/trigger/iio-trig-gpio.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Industrial I/O - gpio based trigger support
- *
- * Copyright (c) 2008 Jonathan Cameron
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * Currently this is more of a functioning proof of concept than a full
- * fledged trigger driver.
- *
- * TODO:
- *
- * Add board config elements to allow specification of startup settings.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-#include <linux/gpio.h>
-#include <linux/slab.h>
-
-#include <linux/iio/iio.h>
-#include <linux/iio/trigger.h>
-
-static LIST_HEAD(iio_gpio_trigger_list);
-static DEFINE_MUTEX(iio_gpio_trigger_list_lock);
-
-struct iio_gpio_trigger_info {
- struct mutex in_use;
- unsigned int irq;
-};
-/*
- * Need to reference count these triggers and only enable gpio interrupts
- * as appropriate.
- */
-
-/* So what functionality do we want in here?... */
-/* set high / low as interrupt type? */
-
-static irqreturn_t iio_gpio_trigger_poll(int irq, void *private)
-{
- /* Timestamp not currently provided */
- iio_trigger_poll(private, 0);
- return IRQ_HANDLED;
-}
-
-static const struct iio_trigger_ops iio_gpio_trigger_ops = {
- .owner = THIS_MODULE,
-};
-
-static int iio_gpio_trigger_probe(struct platform_device *pdev)
-{
- struct iio_gpio_trigger_info *trig_info;
- struct iio_trigger *trig, *trig2;
- unsigned long irqflags;
- struct resource *irq_res;
- int irq, ret = 0, irq_res_cnt = 0;
-
- do {
- irq_res = platform_get_resource(pdev,
- IORESOURCE_IRQ, irq_res_cnt);
-
- if (irq_res == NULL) {
- if (irq_res_cnt == 0)
- dev_err(&pdev->dev, "No GPIO IRQs specified");
- break;
- }
- irqflags = (irq_res->flags & IRQF_TRIGGER_MASK) | IRQF_SHARED;
-
- for (irq = irq_res->start; irq <= irq_res->end; irq++) {
-
- trig = iio_trigger_alloc("irqtrig%d", irq);
- if (!trig) {
- ret = -ENOMEM;
- goto error_free_completed_registrations;
- }
-
- trig_info = kzalloc(sizeof(*trig_info), GFP_KERNEL);
- if (!trig_info) {
- ret = -ENOMEM;
- goto error_put_trigger;
- }
- iio_trigger_set_drvdata(trig, trig_info);
- trig_info->irq = irq;
- trig->ops = &iio_gpio_trigger_ops;
- ret = request_irq(irq, iio_gpio_trigger_poll,
- irqflags, trig->name, trig);
- if (ret) {
- dev_err(&pdev->dev,
- "request IRQ-%d failed", irq);
- goto error_free_trig_info;
- }
-
- ret = iio_trigger_register(trig);
- if (ret)
- goto error_release_irq;
-
- list_add_tail(&trig->alloc_list,
- &iio_gpio_trigger_list);
- }
-
- irq_res_cnt++;
- } while (irq_res != NULL);
-
-
- return 0;
-
-/* First clean up the partly allocated trigger */
-error_release_irq:
- free_irq(irq, trig);
-error_free_trig_info:
- kfree(trig_info);
-error_put_trigger:
- iio_trigger_put(trig);
-error_free_completed_registrations:
- /* The rest should have been added to the iio_gpio_trigger_list */
- list_for_each_entry_safe(trig,
- trig2,
- &iio_gpio_trigger_list,
- alloc_list) {
- trig_info = iio_trigger_get_drvdata(trig);
- free_irq(gpio_to_irq(trig_info->irq), trig);
- kfree(trig_info);
- iio_trigger_unregister(trig);
- }
-
- return ret;
-}
-
-static int iio_gpio_trigger_remove(struct platform_device *pdev)
-{
- struct iio_trigger *trig, *trig2;
- struct iio_gpio_trigger_info *trig_info;
-
- mutex_lock(&iio_gpio_trigger_list_lock);
- list_for_each_entry_safe(trig,
- trig2,
- &iio_gpio_trigger_list,
- alloc_list) {
- trig_info = iio_trigger_get_drvdata(trig);
- iio_trigger_unregister(trig);
- free_irq(trig_info->irq, trig);
- kfree(trig_info);
- iio_trigger_put(trig);
- }
- mutex_unlock(&iio_gpio_trigger_list_lock);
-
- return 0;
-}
-
-static struct platform_driver iio_gpio_trigger_driver = {
- .probe = iio_gpio_trigger_probe,
- .remove = iio_gpio_trigger_remove,
- .driver = {
- .name = "iio_gpio_trigger",
- .owner = THIS_MODULE,
- },
-};
-
-module_platform_driver(iio_gpio_trigger_driver);
-
-MODULE_AUTHOR("Jonathan Cameron <jic23@kernel.org>");
-MODULE_DESCRIPTION("Example gpio trigger for the iio subsystem");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/staging/iio/trigger/iio-trig-sysfs.c b/drivers/staging/iio/trigger/iio-trig-sysfs.c
deleted file mode 100644
index b727bde8b7fe..000000000000
--- a/drivers/staging/iio/trigger/iio-trig-sysfs.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright 2011 Analog Devices Inc.
- *
- * Licensed under the GPL-2.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/irq_work.h>
-
-#include <linux/iio/iio.h>
-#include <linux/iio/trigger.h>
-
-struct iio_sysfs_trig {
- struct iio_trigger *trig;
- struct irq_work work;
- int id;
- struct list_head l;
-};
-
-static LIST_HEAD(iio_sysfs_trig_list);
-static DEFINE_MUTEX(iio_syfs_trig_list_mut);
-
-static int iio_sysfs_trigger_probe(int id);
-static ssize_t iio_sysfs_trig_add(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t len)
-{
- int ret;
- unsigned long input;
-
- ret = strict_strtoul(buf, 10, &input);
- if (ret)
- return ret;
- ret = iio_sysfs_trigger_probe(input);
- if (ret)
- return ret;
- return len;
-}
-static DEVICE_ATTR(add_trigger, S_IWUSR, NULL, &iio_sysfs_trig_add);
-
-static int iio_sysfs_trigger_remove(int id);
-static ssize_t iio_sysfs_trig_remove(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t len)
-{
- int ret;
- unsigned long input;
-
- ret = strict_strtoul(buf, 10, &input);
- if (ret)
- return ret;
- ret = iio_sysfs_trigger_remove(input);
- if (ret)
- return ret;
- return len;
-}
-
-static DEVICE_ATTR(remove_trigger, S_IWUSR, NULL, &iio_sysfs_trig_remove);
-
-static struct attribute *iio_sysfs_trig_attrs[] = {
- &dev_attr_add_trigger.attr,
- &dev_attr_remove_trigger.attr,
- NULL,
-};
-
-static const struct attribute_group iio_sysfs_trig_group = {
- .attrs = iio_sysfs_trig_attrs,
-};
-
-static const struct attribute_group *iio_sysfs_trig_groups[] = {
- &iio_sysfs_trig_group,
- NULL
-};
-
-
-/* Nothing to actually do upon release */
-static void iio_trigger_sysfs_release(struct device *dev)
-{
-}
-
-static struct device iio_sysfs_trig_dev = {
- .bus = &iio_bus_type,
- .groups = iio_sysfs_trig_groups,
- .release = &iio_trigger_sysfs_release,
-};
-
-static void iio_sysfs_trigger_work(struct irq_work *work)
-{
- struct iio_sysfs_trig *trig = container_of(work, struct iio_sysfs_trig,
- work);
-
- iio_trigger_poll(trig->trig, 0);
-}
-
-static ssize_t iio_sysfs_trigger_poll(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
-{
- struct iio_trigger *trig = to_iio_trigger(dev);
- struct iio_sysfs_trig *sysfs_trig = iio_trigger_get_drvdata(trig);
-
- irq_work_queue(&sysfs_trig->work);
-
- return count;
-}
-
-static DEVICE_ATTR(trigger_now, S_IWUSR, NULL, iio_sysfs_trigger_poll);
-
-static struct attribute *iio_sysfs_trigger_attrs[] = {
- &dev_attr_trigger_now.attr,
- NULL,
-};
-
-static const struct attribute_group iio_sysfs_trigger_attr_group = {
- .attrs = iio_sysfs_trigger_attrs,
-};
-
-static const struct attribute_group *iio_sysfs_trigger_attr_groups[] = {
- &iio_sysfs_trigger_attr_group,
- NULL
-};
-
-static const struct iio_trigger_ops iio_sysfs_trigger_ops = {
- .owner = THIS_MODULE,
-};
-
-static int iio_sysfs_trigger_probe(int id)
-{
- struct iio_sysfs_trig *t;
- int ret;
- bool foundit = false;
- mutex_lock(&iio_syfs_trig_list_mut);
- list_for_each_entry(t, &iio_sysfs_trig_list, l)
- if (id == t->id) {
- foundit = true;
- break;
- }
- if (foundit) {
- ret = -EINVAL;
- goto out1;
- }
- t = kmalloc(sizeof(*t), GFP_KERNEL);
- if (t == NULL) {
- ret = -ENOMEM;
- goto out1;
- }
- t->id = id;
- t->trig = iio_trigger_alloc("sysfstrig%d", id);
- if (!t->trig) {
- ret = -ENOMEM;
- goto free_t;
- }
-
- t->trig->dev.groups = iio_sysfs_trigger_attr_groups;
- t->trig->ops = &iio_sysfs_trigger_ops;
- t->trig->dev.parent = &iio_sysfs_trig_dev;
- iio_trigger_set_drvdata(t->trig, t);
-
- init_irq_work(&t->work, iio_sysfs_trigger_work);
-
- ret = iio_trigger_register(t->trig);
- if (ret)
- goto out2;
- list_add(&t->l, &iio_sysfs_trig_list);
- __module_get(THIS_MODULE);
- mutex_unlock(&iio_syfs_trig_list_mut);
- return 0;
-
-out2:
- iio_trigger_put(t->trig);
-free_t:
- kfree(t);
-out1:
- mutex_unlock(&iio_syfs_trig_list_mut);
- return ret;
-}
-
-static int iio_sysfs_trigger_remove(int id)
-{
- bool foundit = false;
- struct iio_sysfs_trig *t;
- mutex_lock(&iio_syfs_trig_list_mut);
- list_for_each_entry(t, &iio_sysfs_trig_list, l)
- if (id == t->id) {
- foundit = true;
- break;
- }
- if (!foundit) {
- mutex_unlock(&iio_syfs_trig_list_mut);
- return -EINVAL;
- }
-
- iio_trigger_unregister(t->trig);
- iio_trigger_free(t->trig);
-
- list_del(&t->l);
- kfree(t);
- module_put(THIS_MODULE);
- mutex_unlock(&iio_syfs_trig_list_mut);
- return 0;
-}
-
-
-static int __init iio_sysfs_trig_init(void)
-{
- device_initialize(&iio_sysfs_trig_dev);
- dev_set_name(&iio_sysfs_trig_dev, "iio_sysfs_trigger");
- return device_add(&iio_sysfs_trig_dev);
-}
-module_init(iio_sysfs_trig_init);
-
-static void __exit iio_sysfs_trig_exit(void)
-{
- device_unregister(&iio_sysfs_trig_dev);
-}
-module_exit(iio_sysfs_trig_exit);
-
-MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
-MODULE_DESCRIPTION("Sysfs based trigger for the iio subsystem");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:iio-trig-sysfs");
diff --git a/drivers/staging/imx-drm/Kconfig b/drivers/staging/imx-drm/Kconfig
index ef699f753186..22339059837f 100644
--- a/drivers/staging/imx-drm/Kconfig
+++ b/drivers/staging/imx-drm/Kconfig
@@ -30,6 +30,14 @@ config DRM_IMX_TVE
Choose this to enable the internal Television Encoder (TVe)
found on i.MX53 processors.
+config DRM_IMX_LDB
+ tristate "Support for LVDS displays"
+ depends on DRM_IMX
+ select OF_VIDEOMODE
+ help
+ Choose this to enable the internal LVDS Display Bridge (LDB)
+ found on i.MX53 and i.MX6 processors.
+
config DRM_IMX_IPUV3_CORE
tristate "IPUv3 core support"
depends on DRM_IMX
diff --git a/drivers/staging/imx-drm/Makefile b/drivers/staging/imx-drm/Makefile
index 7e50184523c4..bfaf69378ac2 100644
--- a/drivers/staging/imx-drm/Makefile
+++ b/drivers/staging/imx-drm/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_DRM_IMX) += imxdrm.o
obj-$(CONFIG_DRM_IMX_PARALLEL_DISPLAY) += parallel-display.o
obj-$(CONFIG_DRM_IMX_TVE) += imx-tve.o
+obj-$(CONFIG_DRM_IMX_LDB) += imx-ldb.o
obj-$(CONFIG_DRM_IMX_FB_HELPER) += imx-fbdev.o
obj-$(CONFIG_DRM_IMX_IPUV3_CORE) += ipu-v3/
obj-$(CONFIG_DRM_IMX_IPUV3) += ipuv3-crtc.o
diff --git a/drivers/staging/imx-drm/TODO b/drivers/staging/imx-drm/TODO
index 123acbe9b379..f80641528f75 100644
--- a/drivers/staging/imx-drm/TODO
+++ b/drivers/staging/imx-drm/TODO
@@ -6,7 +6,6 @@ TODO:
- Factor out more code to common helper functions
- decide where to put the base driver. It is not specific to a subsystem
and would be used by DRM/KMS and media/V4L2
-- convert irq driver to irq_domain_add_linear
Missing features (not necessarily for moving out of staging):
diff --git a/drivers/staging/imx-drm/imx-drm-core.c b/drivers/staging/imx-drm/imx-drm-core.c
index 64553058b67e..9854a1daf606 100644
--- a/drivers/staging/imx-drm/imx-drm-core.c
+++ b/drivers/staging/imx-drm/imx-drm-core.c
@@ -144,7 +144,7 @@ int imx_drm_crtc_panel_format(struct drm_crtc *crtc, u32 encoder_type,
u32 interface_pix_fmt)
{
return imx_drm_crtc_panel_format_pins(crtc, encoder_type,
- interface_pix_fmt, 0, 0);
+ interface_pix_fmt, 2, 3);
}
EXPORT_SYMBOL_GPL(imx_drm_crtc_panel_format);
@@ -491,7 +491,6 @@ int imx_drm_add_crtc(struct drm_crtc *crtc,
{
struct imx_drm_device *imxdrm = __imx_drm_device();
struct imx_drm_crtc *imx_drm_crtc;
- const struct drm_crtc_funcs *crtc_funcs;
int ret;
mutex_lock(&imxdrm->mutex);
@@ -512,8 +511,6 @@ int imx_drm_add_crtc(struct drm_crtc *crtc,
imx_drm_crtc->cookie.cookie = cookie;
imx_drm_crtc->cookie.id = id;
- crtc_funcs = imx_drm_helper_funcs->crtc_funcs;
-
imx_drm_crtc->crtc = crtc;
imx_drm_crtc->imxdrm = imxdrm;
diff --git a/drivers/staging/imx-drm/imx-ldb.c b/drivers/staging/imx-drm/imx-ldb.c
new file mode 100644
index 000000000000..8af7f3b40bae
--- /dev/null
+++ b/drivers/staging/imx-drm/imx-ldb.c
@@ -0,0 +1,625 @@
+/*
+ * i.MX drm driver - LVDS display bridge
+ *
+ * Copyright (C) 2012 Sascha Hauer, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <drm/drmP.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <video/of_videomode.h>
+#include <linux/regmap.h>
+#include <linux/videodev2.h>
+
+#include "imx-drm.h"
+
+#define DRIVER_NAME "imx-ldb"
+
+#define LDB_CH0_MODE_EN_TO_DI0 (1 << 0)
+#define LDB_CH0_MODE_EN_TO_DI1 (3 << 0)
+#define LDB_CH0_MODE_EN_MASK (3 << 0)
+#define LDB_CH1_MODE_EN_TO_DI0 (1 << 2)
+#define LDB_CH1_MODE_EN_TO_DI1 (3 << 2)
+#define LDB_CH1_MODE_EN_MASK (3 << 2)
+#define LDB_SPLIT_MODE_EN (1 << 4)
+#define LDB_DATA_WIDTH_CH0_24 (1 << 5)
+#define LDB_BIT_MAP_CH0_JEIDA (1 << 6)
+#define LDB_DATA_WIDTH_CH1_24 (1 << 7)
+#define LDB_BIT_MAP_CH1_JEIDA (1 << 8)
+#define LDB_DI0_VS_POL_ACT_LOW (1 << 9)
+#define LDB_DI1_VS_POL_ACT_LOW (1 << 10)
+#define LDB_BGREF_RMODE_INT (1 << 15)
+
+#define con_to_imx_ldb_ch(x) container_of(x, struct imx_ldb_channel, connector)
+#define enc_to_imx_ldb_ch(x) container_of(x, struct imx_ldb_channel, encoder)
+
+struct imx_ldb;
+
+struct imx_ldb_channel {
+ struct imx_ldb *ldb;
+ struct drm_connector connector;
+ struct imx_drm_connector *imx_drm_connector;
+ struct drm_encoder encoder;
+ struct imx_drm_encoder *imx_drm_encoder;
+ int chno;
+ void *edid;
+ int edid_len;
+ struct drm_display_mode mode;
+ int mode_valid;
+};
+
+struct bus_mux {
+ int reg;
+ int shift;
+ int mask;
+};
+
+struct imx_ldb {
+ struct regmap *regmap;
+ struct device *dev;
+ struct imx_ldb_channel channel[2];
+ struct clk *clk[2]; /* our own clock */
+ struct clk *clk_sel[4]; /* parent of display clock */
+ struct clk *clk_pll[2]; /* upstream clock we can adjust */
+ u32 ldb_ctrl;
+ const struct bus_mux *lvds_mux;
+};
+
+static enum drm_connector_status imx_ldb_connector_detect(
+ struct drm_connector *connector, bool force)
+{
+ return connector_status_connected;
+}
+
+static void imx_ldb_connector_destroy(struct drm_connector *connector)
+{
+ /* do not free here */
+}
+
+static int imx_ldb_connector_get_modes(struct drm_connector *connector)
+{
+ struct imx_ldb_channel *imx_ldb_ch = con_to_imx_ldb_ch(connector);
+ int num_modes = 0;
+
+ if (imx_ldb_ch->edid) {
+ drm_mode_connector_update_edid_property(connector,
+ imx_ldb_ch->edid);
+ num_modes = drm_add_edid_modes(connector, imx_ldb_ch->edid);
+ }
+
+ if (imx_ldb_ch->mode_valid) {
+ struct drm_display_mode *mode;
+
+ mode = drm_mode_create(connector->dev);
+ drm_mode_copy(mode, &imx_ldb_ch->mode);
+ mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+ drm_mode_probed_add(connector, mode);
+ num_modes++;
+ }
+
+ return num_modes;
+}
+
+static int imx_ldb_connector_mode_valid(struct drm_connector *connector,
+ struct drm_display_mode *mode)
+{
+ return 0;
+}
+
+static struct drm_encoder *imx_ldb_connector_best_encoder(
+ struct drm_connector *connector)
+{
+ struct imx_ldb_channel *imx_ldb_ch = con_to_imx_ldb_ch(connector);
+
+ return &imx_ldb_ch->encoder;
+}
+
+static void imx_ldb_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+}
+
+static bool imx_ldb_encoder_mode_fixup(struct drm_encoder *encoder,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ return true;
+}
+
+static void imx_ldb_set_clock(struct imx_ldb *ldb, int mux, int chno,
+ unsigned long serial_clk, unsigned long di_clk)
+{
+ int ret;
+
+ dev_dbg(ldb->dev, "%s: now: %ld want: %ld\n", __func__,
+ clk_get_rate(ldb->clk_pll[chno]), serial_clk);
+ clk_set_rate(ldb->clk_pll[chno], serial_clk);
+
+ dev_dbg(ldb->dev, "%s after: %ld\n", __func__,
+ clk_get_rate(ldb->clk_pll[chno]));
+
+ dev_dbg(ldb->dev, "%s: now: %ld want: %ld\n", __func__,
+ clk_get_rate(ldb->clk[chno]),
+ (long int)di_clk);
+ clk_set_rate(ldb->clk[chno], di_clk);
+
+ dev_dbg(ldb->dev, "%s after: %ld\n", __func__,
+ clk_get_rate(ldb->clk[chno]));
+
+ /* set display clock mux to LDB input clock */
+ ret = clk_set_parent(ldb->clk_sel[mux], ldb->clk[chno]);
+ if (ret) {
+ dev_err(ldb->dev, "unable to set di%d parent clock to ldb_di%d\n", mux, chno);
+ }
+}
+
+static void imx_ldb_encoder_prepare(struct drm_encoder *encoder)
+{
+ struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder);
+ struct imx_ldb *ldb = imx_ldb_ch->ldb;
+ struct drm_display_mode *mode = &encoder->crtc->mode;
+ u32 pixel_fmt;
+ unsigned long serial_clk;
+ unsigned long di_clk = mode->clock * 1000;
+ int mux = imx_drm_encoder_get_mux_id(imx_ldb_ch->imx_drm_encoder,
+ encoder->crtc);
+
+ if (ldb->ldb_ctrl & LDB_SPLIT_MODE_EN) {
+ /* dual channel LVDS mode */
+ serial_clk = 3500UL * mode->clock;
+ imx_ldb_set_clock(ldb, mux, 0, serial_clk, di_clk);
+ imx_ldb_set_clock(ldb, mux, 1, serial_clk, di_clk);
+ } else {
+ serial_clk = 7000UL * mode->clock;
+ imx_ldb_set_clock(ldb, mux, imx_ldb_ch->chno, serial_clk, di_clk);
+ }
+
+ switch (imx_ldb_ch->chno) {
+ case 0:
+ pixel_fmt = (ldb->ldb_ctrl & LDB_DATA_WIDTH_CH0_24) ?
+ V4L2_PIX_FMT_RGB24 : V4L2_PIX_FMT_BGR666;
+ break;
+ case 1:
+ pixel_fmt = (ldb->ldb_ctrl & LDB_DATA_WIDTH_CH1_24) ?
+ V4L2_PIX_FMT_RGB24 : V4L2_PIX_FMT_BGR666;
+ break;
+ default:
+ dev_err(ldb->dev, "unable to config di%d panel format\n",
+ imx_ldb_ch->chno);
+ pixel_fmt = V4L2_PIX_FMT_RGB24;
+ }
+
+ imx_drm_crtc_panel_format(encoder->crtc, DRM_MODE_ENCODER_LVDS,
+ pixel_fmt);
+}
+
+static void imx_ldb_encoder_commit(struct drm_encoder *encoder)
+{
+ struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder);
+ struct imx_ldb *ldb = imx_ldb_ch->ldb;
+ int dual = ldb->ldb_ctrl & LDB_SPLIT_MODE_EN;
+ int mux = imx_drm_encoder_get_mux_id(imx_ldb_ch->imx_drm_encoder,
+ encoder->crtc);
+
+ if (dual) {
+ clk_prepare_enable(ldb->clk[0]);
+ clk_prepare_enable(ldb->clk[1]);
+ }
+
+ if (imx_ldb_ch == &ldb->channel[0] || dual) {
+ ldb->ldb_ctrl &= ~LDB_CH0_MODE_EN_MASK;
+ if (mux == 0 || ldb->lvds_mux)
+ ldb->ldb_ctrl |= LDB_CH0_MODE_EN_TO_DI0;
+ else if (mux == 1)
+ ldb->ldb_ctrl |= LDB_CH0_MODE_EN_TO_DI1;
+ }
+ if (imx_ldb_ch == &ldb->channel[1] || dual) {
+ ldb->ldb_ctrl &= ~LDB_CH1_MODE_EN_MASK;
+ if (mux == 1 || ldb->lvds_mux)
+ ldb->ldb_ctrl |= LDB_CH1_MODE_EN_TO_DI1;
+ else if (mux == 0)
+ ldb->ldb_ctrl |= LDB_CH1_MODE_EN_TO_DI0;
+ }
+
+ if (ldb->lvds_mux) {
+ const struct bus_mux *lvds_mux = NULL;
+
+ if (imx_ldb_ch == &ldb->channel[0])
+ lvds_mux = &ldb->lvds_mux[0];
+ else if (imx_ldb_ch == &ldb->channel[1])
+ lvds_mux = &ldb->lvds_mux[1];
+
+ regmap_update_bits(ldb->regmap, lvds_mux->reg, lvds_mux->mask,
+ mux << lvds_mux->shift);
+ }
+
+ regmap_write(ldb->regmap, IOMUXC_GPR2, ldb->ldb_ctrl);
+}
+
+static void imx_ldb_encoder_mode_set(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder);
+ struct imx_ldb *ldb = imx_ldb_ch->ldb;
+ int dual = ldb->ldb_ctrl & LDB_SPLIT_MODE_EN;
+
+ if (mode->clock > 170000) {
+ dev_warn(ldb->dev,
+ "%s: mode exceeds 170 MHz pixel clock\n", __func__);
+ }
+ if (mode->clock > 85000 && !dual) {
+ dev_warn(ldb->dev,
+ "%s: mode exceeds 85 MHz pixel clock\n", __func__);
+ }
+
+ /* FIXME - assumes straight connections DI0 --> CH0, DI1 --> CH1 */
+ if (imx_ldb_ch == &ldb->channel[0]) {
+ if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+ ldb->ldb_ctrl |= LDB_DI0_VS_POL_ACT_LOW;
+ else if (mode->flags & DRM_MODE_FLAG_PVSYNC)
+ ldb->ldb_ctrl &= ~LDB_DI0_VS_POL_ACT_LOW;
+ }
+ if (imx_ldb_ch == &ldb->channel[1]) {
+ if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+ ldb->ldb_ctrl |= LDB_DI1_VS_POL_ACT_LOW;
+ else if (mode->flags & DRM_MODE_FLAG_PVSYNC)
+ ldb->ldb_ctrl &= ~LDB_DI1_VS_POL_ACT_LOW;
+ }
+}
+
+static void imx_ldb_encoder_disable(struct drm_encoder *encoder)
+{
+ struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder);
+ struct imx_ldb *ldb = imx_ldb_ch->ldb;
+
+ /*
+ * imx_ldb_encoder_disable is called by
+ * drm_helper_disable_unused_functions without
+ * the encoder being enabled before.
+ */
+ if (imx_ldb_ch == &ldb->channel[0] &&
+ (ldb->ldb_ctrl & LDB_CH0_MODE_EN_MASK) == 0)
+ return;
+ else if (imx_ldb_ch == &ldb->channel[1] &&
+ (ldb->ldb_ctrl & LDB_CH1_MODE_EN_MASK) == 0)
+ return;
+
+ if (imx_ldb_ch == &ldb->channel[0])
+ ldb->ldb_ctrl &= ~LDB_CH0_MODE_EN_MASK;
+ else if (imx_ldb_ch == &ldb->channel[1])
+ ldb->ldb_ctrl &= ~LDB_CH1_MODE_EN_MASK;
+
+ regmap_write(ldb->regmap, IOMUXC_GPR2, ldb->ldb_ctrl);
+
+ if (ldb->ldb_ctrl & LDB_SPLIT_MODE_EN) {
+ clk_disable_unprepare(ldb->clk[0]);
+ clk_disable_unprepare(ldb->clk[1]);
+ }
+}
+
+static void imx_ldb_encoder_destroy(struct drm_encoder *encoder)
+{
+ /* do not free here */
+}
+
+static struct drm_connector_funcs imx_ldb_connector_funcs = {
+ .dpms = drm_helper_connector_dpms,
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .detect = imx_ldb_connector_detect,
+ .destroy = imx_ldb_connector_destroy,
+};
+
+static struct drm_connector_helper_funcs imx_ldb_connector_helper_funcs = {
+ .get_modes = imx_ldb_connector_get_modes,
+ .best_encoder = imx_ldb_connector_best_encoder,
+ .mode_valid = imx_ldb_connector_mode_valid,
+};
+
+static struct drm_encoder_funcs imx_ldb_encoder_funcs = {
+ .destroy = imx_ldb_encoder_destroy,
+};
+
+static struct drm_encoder_helper_funcs imx_ldb_encoder_helper_funcs = {
+ .dpms = imx_ldb_encoder_dpms,
+ .mode_fixup = imx_ldb_encoder_mode_fixup,
+ .prepare = imx_ldb_encoder_prepare,
+ .commit = imx_ldb_encoder_commit,
+ .mode_set = imx_ldb_encoder_mode_set,
+ .disable = imx_ldb_encoder_disable,
+};
+
+static int imx_ldb_get_clk(struct imx_ldb *ldb, int chno)
+{
+ char clkname[16];
+
+ sprintf(clkname, "di%d", chno);
+ ldb->clk[chno] = devm_clk_get(ldb->dev, clkname);
+ if (IS_ERR(ldb->clk[chno]))
+ return PTR_ERR(ldb->clk[chno]);
+
+ sprintf(clkname, "di%d_pll", chno);
+ ldb->clk_pll[chno] = devm_clk_get(ldb->dev, clkname);
+ if (IS_ERR(ldb->clk_pll[chno]))
+ return PTR_ERR(ldb->clk_pll[chno]);
+
+ return 0;
+}
+
+static int imx_ldb_register(struct imx_ldb_channel *imx_ldb_ch)
+{
+ int ret;
+ struct imx_ldb *ldb = imx_ldb_ch->ldb;
+
+ ret = imx_ldb_get_clk(ldb, imx_ldb_ch->chno);
+ if (ret)
+ return ret;
+ if (ldb->ldb_ctrl & LDB_SPLIT_MODE_EN) {
+ ret |= imx_ldb_get_clk(ldb, 1);
+ if (ret)
+ return ret;
+ }
+
+ imx_ldb_ch->connector.funcs = &imx_ldb_connector_funcs;
+ imx_ldb_ch->encoder.funcs = &imx_ldb_encoder_funcs;
+
+ imx_ldb_ch->encoder.encoder_type = DRM_MODE_ENCODER_LVDS;
+ imx_ldb_ch->connector.connector_type = DRM_MODE_CONNECTOR_LVDS;
+
+ drm_encoder_helper_add(&imx_ldb_ch->encoder,
+ &imx_ldb_encoder_helper_funcs);
+ ret = imx_drm_add_encoder(&imx_ldb_ch->encoder,
+ &imx_ldb_ch->imx_drm_encoder, THIS_MODULE);
+ if (ret) {
+ dev_err(ldb->dev, "adding encoder failed with %d\n", ret);
+ return ret;
+ }
+
+ drm_connector_helper_add(&imx_ldb_ch->connector,
+ &imx_ldb_connector_helper_funcs);
+
+ ret = imx_drm_add_connector(&imx_ldb_ch->connector,
+ &imx_ldb_ch->imx_drm_connector, THIS_MODULE);
+ if (ret) {
+ imx_drm_remove_encoder(imx_ldb_ch->imx_drm_encoder);
+ dev_err(ldb->dev, "adding connector failed with %d\n", ret);
+ return ret;
+ }
+
+ drm_mode_connector_attach_encoder(&imx_ldb_ch->connector,
+ &imx_ldb_ch->encoder);
+
+ return 0;
+}
+
+enum {
+ LVDS_BIT_MAP_SPWG,
+ LVDS_BIT_MAP_JEIDA
+};
+
+static const char *imx_ldb_bit_mappings[] = {
+ [LVDS_BIT_MAP_SPWG] = "spwg",
+ [LVDS_BIT_MAP_JEIDA] = "jeida",
+};
+
+const int of_get_data_mapping(struct device_node *np)
+{
+ const char *bm;
+ int ret, i;
+
+ ret = of_property_read_string(np, "fsl,data-mapping", &bm);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < ARRAY_SIZE(imx_ldb_bit_mappings); i++)
+ if (!strcasecmp(bm, imx_ldb_bit_mappings[i]))
+ return i;
+
+ return -EINVAL;
+}
+
+static struct bus_mux imx6q_lvds_mux[2] = {
+ {
+ .reg = IOMUXC_GPR3,
+ .shift = 6,
+ .mask = IMX6Q_GPR3_LVDS0_MUX_CTL_MASK,
+ }, {
+ .reg = IOMUXC_GPR3,
+ .shift = 8,
+ .mask = IMX6Q_GPR3_LVDS1_MUX_CTL_MASK,
+ }
+};
+
+/*
+ * For a device declaring compatible = "fsl,imx6q-ldb", "fsl,imx53-ldb",
+ * of_match_device will walk through this list and take the first entry
+ * matching any of its compatible values. Therefore, the more generic
+ * entries (in this case fsl,imx53-ldb) need to be ordered last.
+ */
+static const struct of_device_id imx_ldb_dt_ids[] = {
+ { .compatible = "fsl,imx6q-ldb", .data = imx6q_lvds_mux, },
+ { .compatible = "fsl,imx53-ldb", .data = NULL, },
+ { }
+};
+MODULE_DEVICE_TABLE(of, imx_ldb_dt_ids);
+
+static int imx_ldb_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ const struct of_device_id *of_id =
+ of_match_device(of_match_ptr(imx_ldb_dt_ids),
+ &pdev->dev);
+ struct device_node *child;
+ const u8 *edidp;
+ struct imx_ldb *imx_ldb;
+ int datawidth;
+ int mapping;
+ int dual;
+ int ret;
+ int i;
+
+ imx_ldb = devm_kzalloc(&pdev->dev, sizeof(*imx_ldb), GFP_KERNEL);
+ if (!imx_ldb)
+ return -ENOMEM;
+
+ imx_ldb->regmap = syscon_regmap_lookup_by_phandle(np, "gpr");
+ if (IS_ERR(imx_ldb->regmap)) {
+ dev_err(&pdev->dev, "failed to get parent regmap\n");
+ return PTR_ERR(imx_ldb->regmap);
+ }
+
+ imx_ldb->dev = &pdev->dev;
+
+ if (of_id)
+ imx_ldb->lvds_mux = of_id->data;
+
+ dual = of_property_read_bool(np, "fsl,dual-channel");
+ if (dual)
+ imx_ldb->ldb_ctrl |= LDB_SPLIT_MODE_EN;
+
+ /*
+ * There are three diferent possible clock mux configurations:
+ * i.MX53: ipu1_di0_sel, ipu1_di1_sel
+ * i.MX6q: ipu1_di0_sel, ipu1_di1_sel, ipu2_di0_sel, ipu2_di1_sel
+ * i.MX6dl: ipu1_di0_sel, ipu1_di1_sel, lcdif_sel
+ * Map them all to di0_sel...di3_sel.
+ */
+ for (i = 0; i < 4; i++) {
+ char clkname[16];
+
+ sprintf(clkname, "di%d_sel", i);
+ imx_ldb->clk_sel[i] = devm_clk_get(imx_ldb->dev, clkname);
+ if (IS_ERR(imx_ldb->clk_sel[i])) {
+ ret = PTR_ERR(imx_ldb->clk_sel[i]);
+ imx_ldb->clk_sel[i] = NULL;
+ break;
+ }
+ }
+ if (i == 0)
+ return ret;
+
+ for_each_child_of_node(np, child) {
+ struct imx_ldb_channel *channel;
+
+ ret = of_property_read_u32(child, "reg", &i);
+ if (ret || i < 0 || i > 1)
+ return -EINVAL;
+
+ if (dual && i > 0) {
+ dev_warn(&pdev->dev, "dual-channel mode, ignoring second output\n");
+ continue;
+ }
+
+ if (!of_device_is_available(child))
+ continue;
+
+ channel = &imx_ldb->channel[i];
+ channel->ldb = imx_ldb;
+ channel->chno = i;
+
+ edidp = of_get_property(child, "edid", &channel->edid_len);
+ if (edidp) {
+ channel->edid = kmemdup(edidp, channel->edid_len,
+ GFP_KERNEL);
+ } else {
+ ret = of_get_drm_display_mode(child, &channel->mode, 0);
+ if (!ret)
+ channel->mode_valid = 1;
+ }
+
+ ret = of_property_read_u32(child, "fsl,data-width", &datawidth);
+ if (ret)
+ datawidth = 0;
+ else if (datawidth != 18 && datawidth != 24)
+ return -EINVAL;
+
+ mapping = of_get_data_mapping(child);
+ switch (mapping) {
+ case LVDS_BIT_MAP_SPWG:
+ if (datawidth == 24) {
+ if (i == 0 || dual)
+ imx_ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH0_24;
+ if (i == 1 || dual)
+ imx_ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH1_24;
+ }
+ break;
+ case LVDS_BIT_MAP_JEIDA:
+ if (datawidth == 18) {
+ dev_err(&pdev->dev, "JEIDA standard only supported in 24 bit\n");
+ return -EINVAL;
+ }
+ if (i == 0 || dual)
+ imx_ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH0_24 | LDB_BIT_MAP_CH0_JEIDA;
+ if (i == 1 || dual)
+ imx_ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH1_24 | LDB_BIT_MAP_CH1_JEIDA;
+ break;
+ default:
+ dev_err(&pdev->dev, "data mapping not specified or invalid\n");
+ return -EINVAL;
+ }
+
+ ret = imx_ldb_register(channel);
+ if (ret)
+ return ret;
+
+ imx_drm_encoder_add_possible_crtcs(channel->imx_drm_encoder, child);
+ }
+
+ platform_set_drvdata(pdev, imx_ldb);
+
+ return 0;
+}
+
+static int imx_ldb_remove(struct platform_device *pdev)
+{
+ struct imx_ldb *imx_ldb = platform_get_drvdata(pdev);
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ struct imx_ldb_channel *channel = &imx_ldb->channel[i];
+ struct drm_connector *connector = &channel->connector;
+ struct drm_encoder *encoder = &channel->encoder;
+
+ drm_mode_connector_detach_encoder(connector, encoder);
+
+ imx_drm_remove_connector(channel->imx_drm_connector);
+ imx_drm_remove_encoder(channel->imx_drm_encoder);
+ }
+
+ return 0;
+}
+
+static struct platform_driver imx_ldb_driver = {
+ .probe = imx_ldb_probe,
+ .remove = imx_ldb_remove,
+ .driver = {
+ .of_match_table = imx_ldb_dt_ids,
+ .name = DRIVER_NAME,
+ .owner = THIS_MODULE,
+ },
+};
+
+module_platform_driver(imx_ldb_driver);
+
+MODULE_DESCRIPTION("i.MX LVDS driver");
+MODULE_AUTHOR("Sascha Hauer, Pengutronix");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/imx-drm/imx-tve.c b/drivers/staging/imx-drm/imx-tve.c
index 03892de9bd7e..a56797d88edc 100644
--- a/drivers/staging/imx-drm/imx-tve.c
+++ b/drivers/staging/imx-drm/imx-tve.c
@@ -22,7 +22,6 @@
#include <linux/clk-provider.h>
#include <linux/module.h>
#include <linux/of_i2c.h>
-#include <linux/pinctrl/consumer.h>
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/spinlock.h>
@@ -610,15 +609,6 @@ static int imx_tve_probe(struct platform_device *pdev)
}
if (tve->mode == TVE_MODE_VGA) {
- struct pinctrl *pinctrl;
-
- pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
- if (IS_ERR(pinctrl)) {
- ret = PTR_ERR(pinctrl);
- dev_warn(&pdev->dev, "failed to setup pinctrl: %d", ret);
- return ret;
- }
-
ret = of_property_read_u32(np, "fsl,hsync-pin", &tve->hsync_pin);
if (ret < 0) {
dev_err(&pdev->dev, "failed to get vsync pin\n");
@@ -638,11 +628,9 @@ static int imx_tve_probe(struct platform_device *pdev)
return -ENOENT;
}
- base = devm_request_and_ioremap(&pdev->dev, res);
- if (!base) {
- dev_err(&pdev->dev, "failed to remap memory region\n");
- return -ENOENT;
- }
+ base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
tve_regmap_config.lock_arg = tve;
tve->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "tve", base,
diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-common.c b/drivers/staging/imx-drm/ipu-v3/ipu-common.c
index 0127601c26c7..e35d0bf03c7b 100644
--- a/drivers/staging/imx-drm/ipu-v3/ipu-common.c
+++ b/drivers/staging/imx-drm/ipu-v3/ipu-common.c
@@ -27,6 +27,7 @@
#include <linux/list.h>
#include <linux/irq.h>
#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
#include <linux/of_device.h>
#include "imx-ipu-v3.h"
@@ -799,16 +800,18 @@ err_di_0:
static void ipu_irq_handle(struct ipu_soc *ipu, const int *regs, int num_regs)
{
unsigned long status;
- int i, bit, irq_base;
+ int i, bit, irq;
for (i = 0; i < num_regs; i++) {
status = ipu_cm_read(ipu, IPU_INT_STAT(regs[i]));
status &= ipu_cm_read(ipu, IPU_INT_CTRL(regs[i]));
- irq_base = ipu->irq_start + regs[i] * 32;
- for_each_set_bit(bit, &status, 32)
- generic_handle_irq(irq_base + bit);
+ for_each_set_bit(bit, &status, 32) {
+ irq = irq_linear_revmap(ipu->domain, regs[i] * 32 + bit);
+ if (irq)
+ generic_handle_irq(irq);
+ }
}
}
@@ -838,57 +841,15 @@ static void ipu_err_irq_handler(unsigned int irq, struct irq_desc *desc)
chained_irq_exit(chip, desc);
}
-static void ipu_ack_irq(struct irq_data *d)
-{
- struct ipu_soc *ipu = irq_data_get_irq_chip_data(d);
- unsigned int irq = d->irq - ipu->irq_start;
-
- ipu_cm_write(ipu, 1 << (irq % 32), IPU_INT_STAT(irq / 32));
-}
-
-static void ipu_unmask_irq(struct irq_data *d)
-{
- struct ipu_soc *ipu = irq_data_get_irq_chip_data(d);
- unsigned int irq = d->irq - ipu->irq_start;
- unsigned long flags;
- u32 reg;
-
- spin_lock_irqsave(&ipu->lock, flags);
-
- reg = ipu_cm_read(ipu, IPU_INT_CTRL(irq / 32));
- reg |= 1 << (irq % 32);
- ipu_cm_write(ipu, reg, IPU_INT_CTRL(irq / 32));
-
- spin_unlock_irqrestore(&ipu->lock, flags);
-}
-
-static void ipu_mask_irq(struct irq_data *d)
-{
- struct ipu_soc *ipu = irq_data_get_irq_chip_data(d);
- unsigned int irq = d->irq - ipu->irq_start;
- unsigned long flags;
- u32 reg;
-
- spin_lock_irqsave(&ipu->lock, flags);
-
- reg = ipu_cm_read(ipu, IPU_INT_CTRL(irq / 32));
- reg &= ~(1 << (irq % 32));
- ipu_cm_write(ipu, reg, IPU_INT_CTRL(irq / 32));
-
- spin_unlock_irqrestore(&ipu->lock, flags);
-}
-
-static struct irq_chip ipu_irq_chip = {
- .name = "IPU",
- .irq_ack = ipu_ack_irq,
- .irq_mask = ipu_mask_irq,
- .irq_unmask = ipu_unmask_irq,
-};
-
int ipu_idmac_channel_irq(struct ipu_soc *ipu, struct ipuv3_channel *channel,
enum ipu_channel_irq irq_type)
{
- return ipu->irq_start + irq_type + channel->num;
+ int irq = irq_linear_revmap(ipu->domain, irq_type + channel->num);
+
+ if (!irq)
+ irq = irq_create_mapping(ipu->domain, irq_type + channel->num);
+
+ return irq;
}
EXPORT_SYMBOL_GPL(ipu_idmac_channel_irq);
@@ -975,18 +936,48 @@ err_register:
return ret;
}
+
static int ipu_irq_init(struct ipu_soc *ipu)
{
- int i;
+ struct irq_chip_generic *gc;
+ struct irq_chip_type *ct;
+ unsigned long unused[IPU_NUM_IRQS / 32] = {
+ 0x400100d0, 0xffe000fd,
+ 0x400100d0, 0xffe000fd,
+ 0x400100d0, 0xffe000fd,
+ 0x4077ffff, 0xffe7e1fd,
+ 0x23fffffe, 0x8880fff0,
+ 0xf98fe7d0, 0xfff81fff,
+ 0x400100d0, 0xffe000fd,
+ 0x00000000,
+ };
+ int ret, i;
+
+ ipu->domain = irq_domain_add_linear(ipu->dev->of_node, IPU_NUM_IRQS,
+ &irq_generic_chip_ops, ipu);
+ if (!ipu->domain) {
+ dev_err(ipu->dev, "failed to add irq domain\n");
+ return -ENODEV;
+ }
- ipu->irq_start = irq_alloc_descs(-1, 0, IPU_NUM_IRQS, 0);
- if (ipu->irq_start < 0)
- return ipu->irq_start;
+ ret = irq_alloc_domain_generic_chips(ipu->domain, 32, 1, "IPU",
+ handle_level_irq, 0, IRQF_VALID, 0);
+ if (ret < 0) {
+ dev_err(ipu->dev, "failed to alloc generic irq chips\n");
+ irq_domain_remove(ipu->domain);
+ return ret;
+ }
- for (i = ipu->irq_start; i < ipu->irq_start + IPU_NUM_IRQS; i++) {
- irq_set_chip_and_handler(i, &ipu_irq_chip, handle_level_irq);
- set_irq_flags(i, IRQF_VALID);
- irq_set_chip_data(i, ipu);
+ for (i = 0; i < IPU_NUM_IRQS; i += 32) {
+ gc = irq_get_domain_generic_chip(ipu->domain, i);
+ gc->reg_base = ipu->cm_reg;
+ gc->unused = unused[i / 32];
+ ct = gc->chip_types;
+ ct->chip.irq_ack = irq_gc_ack_set_bit;
+ ct->chip.irq_mask = irq_gc_mask_clr_bit;
+ ct->chip.irq_unmask = irq_gc_mask_set_bit;
+ ct->regs.ack = IPU_INT_STAT(i / 32);
+ ct->regs.mask = IPU_INT_CTRL(i / 32);
}
irq_set_chained_handler(ipu->irq_sync, ipu_irq_handler);
@@ -999,20 +990,22 @@ static int ipu_irq_init(struct ipu_soc *ipu)
static void ipu_irq_exit(struct ipu_soc *ipu)
{
- int i;
+ int i, irq;
irq_set_chained_handler(ipu->irq_err, NULL);
irq_set_handler_data(ipu->irq_err, NULL);
irq_set_chained_handler(ipu->irq_sync, NULL);
irq_set_handler_data(ipu->irq_sync, NULL);
- for (i = ipu->irq_start; i < ipu->irq_start + IPU_NUM_IRQS; i++) {
- set_irq_flags(i, 0);
- irq_set_chip(i, NULL);
- irq_set_chip_data(i, NULL);
+ /* TODO: remove irq_domain_generic_chips */
+
+ for (i = 0; i < IPU_NUM_IRQS; i++) {
+ irq = irq_linear_revmap(ipu->domain, i);
+ if (irq)
+ irq_dispose_mapping(irq);
}
- irq_free_descs(ipu->irq_start, IPU_NUM_IRQS);
+ irq_domain_remove(ipu->domain);
}
static int ipu_probe(struct platform_device *pdev)
diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-di.c b/drivers/staging/imx-drm/ipu-v3/ipu-di.c
index 19d777e39d0b..0b6806e2069c 100644
--- a/drivers/staging/imx-drm/ipu-v3/ipu-di.c
+++ b/drivers/staging/imx-drm/ipu-v3/ipu-di.c
@@ -603,7 +603,12 @@ int ipu_di_init_sync_panel(struct ipu_di *di, struct ipu_di_signal_cfg *sig)
vsync_cnt = 3;
if (di->id == 1)
- vsync_cnt = 6;
+ /*
+ * TODO: change only for TVEv2, parallel display
+ * uses pin 2 / 3
+ */
+ if (!(sig->hsync_pin == 2 && sig->vsync_pin == 3))
+ vsync_cnt = 6;
if (sig->Hsync_pol) {
if (sig->hsync_pin == 2)
@@ -614,11 +619,11 @@ int ipu_di_init_sync_panel(struct ipu_di *di, struct ipu_di_signal_cfg *sig)
di_gen |= DI_GEN_POLARITY_7;
}
if (sig->Vsync_pol) {
- if (sig->hsync_pin == 3)
+ if (sig->vsync_pin == 3)
di_gen |= DI_GEN_POLARITY_3;
- else if (sig->hsync_pin == 6)
+ else if (sig->vsync_pin == 6)
di_gen |= DI_GEN_POLARITY_6;
- else if (sig->hsync_pin == 8)
+ else if (sig->vsync_pin == 8)
di_gen |= DI_GEN_POLARITY_8;
}
}
diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c b/drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c
index 91821bc30f41..2e97c33b81e7 100644
--- a/drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c
+++ b/drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c
@@ -61,7 +61,7 @@ struct dmfc_channel_data {
static const struct dmfc_channel_data dmfcdata[] = {
{
- .ipu_channel = 23,
+ .ipu_channel = IPUV3_CHANNEL_MEM_BG_SYNC,
.channel_reg = DMFC_DP_CHAN,
.shift = DMFC_DP_CHAN_5B_23,
.eot_shift = 20,
@@ -73,13 +73,13 @@ static const struct dmfc_channel_data dmfcdata[] = {
.eot_shift = 22,
.max_fifo_lines = 1,
}, {
- .ipu_channel = 27,
+ .ipu_channel = IPUV3_CHANNEL_MEM_FG_SYNC,
.channel_reg = DMFC_DP_CHAN,
.shift = DMFC_DP_CHAN_5F_27,
.eot_shift = 21,
.max_fifo_lines = 2,
}, {
- .ipu_channel = 28,
+ .ipu_channel = IPUV3_CHANNEL_MEM_DC_SYNC,
.channel_reg = DMFC_WR_CHAN,
.shift = DMFC_WR_CHAN_1_28,
.eot_shift = 16,
@@ -292,7 +292,7 @@ int ipu_dmfc_alloc_bandwidth(struct dmfc_channel *dmfc,
{
struct ipu_dmfc_priv *priv = dmfc->priv;
int slots = dmfc_bandwidth_to_slots(priv, bandwidth_pixel_per_second);
- int segment = 0, ret = 0;
+ int segment = -1, ret = 0;
dev_dbg(priv->dev, "dmfc: trying to allocate %ldMpixel/s for IPU channel %d\n",
bandwidth_pixel_per_second / 1000000,
@@ -307,7 +307,17 @@ int ipu_dmfc_alloc_bandwidth(struct dmfc_channel *dmfc,
goto out;
}
- segment = dmfc_find_slots(priv, slots);
+ /* Always allocate at least 128*4 bytes (2 slots) */
+ if (slots < 2)
+ slots = 2;
+
+ /* For the MEM_BG channel, first try to allocate twice the slots */
+ if (dmfc->data->ipu_channel == IPUV3_CHANNEL_MEM_BG_SYNC)
+ segment = dmfc_find_slots(priv, slots * 2);
+ if (segment >= 0)
+ slots *= 2;
+ else
+ segment = dmfc_find_slots(priv, slots);
if (segment < 0) {
ret = -EBUSY;
goto out;
@@ -391,7 +401,7 @@ int ipu_dmfc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base,
* We have a total bandwidth of clkrate * 4pixel divided
* into 8 slots.
*/
- priv->bandwidth_per_slot = clk_get_rate(ipu_clk) / 8;
+ priv->bandwidth_per_slot = clk_get_rate(ipu_clk) * 4 / 8;
dev_dbg(dev, "dmfc: 8 slots with %ldMpixel/s bandwidth each\n",
priv->bandwidth_per_slot / 1000000);
diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-prv.h b/drivers/staging/imx-drm/ipu-v3/ipu-prv.h
index 551802863fd5..4df00501adc2 100644
--- a/drivers/staging/imx-drm/ipu-v3/ipu-prv.h
+++ b/drivers/staging/imx-drm/ipu-v3/ipu-prv.h
@@ -110,7 +110,7 @@ struct ipu_soc;
#define IDMAC_BAND_EN(ch) IPU_IDMAC_REG(0x0040 + 4 * ((ch) / 32))
#define IDMAC_CHA_BUSY(ch) IPU_IDMAC_REG(0x0100 + 4 * ((ch) / 32))
-#define IPU_NUM_IRQS (32 * 5)
+#define IPU_NUM_IRQS (32 * 15)
enum ipu_modules {
IPU_CONF_CSI0_EN = (1 << 0),
@@ -170,9 +170,9 @@ struct ipu_soc {
struct ipuv3_channel channel[64];
- int irq_start;
int irq_sync;
int irq_err;
+ struct irq_domain *domain;
struct ipu_dc_priv *dc_priv;
struct ipu_dp_priv *dp_priv;
diff --git a/drivers/staging/imx-drm/ipuv3-crtc.c b/drivers/staging/imx-drm/ipuv3-crtc.c
index ff5c63350932..4a7eedfafbdb 100644
--- a/drivers/staging/imx-drm/ipuv3-crtc.c
+++ b/drivers/staging/imx-drm/ipuv3-crtc.c
@@ -22,7 +22,6 @@
#include <linux/device.h>
#include <linux/platform_device.h>
#include <drm/drmP.h>
-#include <drm/drm_fb_helper.h>
#include <drm/drm_crtc_helper.h>
#include <linux/fb.h>
#include <linux/clk.h>
@@ -42,9 +41,6 @@ struct ipu_framebuffer {
};
struct ipu_crtc {
- struct drm_fb_helper fb_helper;
- struct ipu_framebuffer ifb;
- int num_crtcs;
struct device *dev;
struct drm_crtc base;
struct imx_drm_crtc *imx_crtc;
@@ -54,7 +50,6 @@ struct ipu_crtc {
struct dmfc_channel *dmfc;
struct ipu_di *di;
int enabled;
- struct ipu_priv *ipu_priv;
struct drm_pending_vblank_event *page_flip_event;
struct drm_framebuffer *newfb;
int irq;
@@ -152,6 +147,7 @@ static int ipu_page_flip(struct drm_crtc *crtc,
ipu_crtc->newfb = fb;
ipu_crtc->page_flip_event = event;
+ crtc->fb = fb;
return 0;
}
@@ -334,7 +330,6 @@ static irqreturn_t ipu_irq_handler(int irq, void *dev_id)
imx_drm_handle_vblank(ipu_crtc->imx_crtc);
if (ipu_crtc->newfb) {
- ipu_crtc->base.fb = ipu_crtc->newfb;
ipu_crtc->newfb = NULL;
ipu_drm_set_base(&ipu_crtc->base, 0, 0);
ipu_crtc_handle_pageflip(ipu_crtc);
diff --git a/drivers/staging/imx-drm/parallel-display.c b/drivers/staging/imx-drm/parallel-display.c
index e7fba62c10e9..cea9f14fff4a 100644
--- a/drivers/staging/imx-drm/parallel-display.c
+++ b/drivers/staging/imx-drm/parallel-display.c
@@ -23,7 +23,6 @@
#include <drm/drm_fb_helper.h>
#include <drm/drm_crtc_helper.h>
#include <linux/videodev2.h>
-#include <linux/pinctrl/consumer.h>
#include "imx-drm.h"
@@ -206,20 +205,11 @@ static int imx_pd_probe(struct platform_device *pdev)
struct imx_parallel_display *imxpd;
int ret;
const char *fmt;
- struct pinctrl *pinctrl;
imxpd = devm_kzalloc(&pdev->dev, sizeof(*imxpd), GFP_KERNEL);
if (!imxpd)
return -ENOMEM;
- pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
- if (IS_ERR(pinctrl)) {
- ret = PTR_ERR(pinctrl);
- dev_warn(&pdev->dev, "pinctrl_get_select_default failed with %d",
- ret);
- return ret;
- }
-
edidp = of_get_property(np, "edid", &imxpd->edid_len);
if (edidp)
imxpd->edid = kmemdup(edidp, imxpd->edid_len, GFP_KERNEL);
@@ -265,6 +255,7 @@ static const struct of_device_id imx_pd_dt_ids[] = {
{ .compatible = "fsl,imx-parallel-display", },
{ /* sentinel */ }
};
+MODULE_DEVICE_TABLE(of, imx_pd_dt_ids);
static struct platform_driver imx_pd_driver = {
.probe = imx_pd_probe,
diff --git a/drivers/staging/keucr/init.c b/drivers/staging/keucr/init.c
index 231611dc0f74..f5d41e0348ce 100644
--- a/drivers/staging/keucr/init.c
+++ b/drivers/staging/keucr/init.c
@@ -19,13 +19,13 @@ int ENE_InitMedia(struct us_data *us)
int result;
BYTE MiscReg03 = 0;
- printk(KERN_INFO "--- Init Media ---\n");
- result = ENE_Read_BYTE(us, REG_CARD_STATUS, &MiscReg03);
+ dev_info(&us->pusb_dev->dev, "--- Init Media ---\n");
+ result = ene_read_byte(us, REG_CARD_STATUS, &MiscReg03);
if (result != USB_STOR_XFER_GOOD) {
- printk(KERN_ERR "Read register fail !!\n");
+ dev_err(&us->pusb_dev->dev, "Failed to read register\n");
return USB_STOR_TRANSPORT_ERROR;
}
- printk(KERN_INFO "MiscReg03 = %x\n", MiscReg03);
+ dev_info(&us->pusb_dev->dev, "MiscReg03 = %x\n", MiscReg03);
if (MiscReg03 & 0x02) {
if (!us->SM_Status.Ready && !us->MS_Status.Ready) {
@@ -39,9 +39,9 @@ int ENE_InitMedia(struct us_data *us)
}
/*
- * ENE_Read_BYTE() :
+ * ene_read_byte() :
*/
-int ENE_Read_BYTE(struct us_data *us, WORD index, void *buf)
+int ene_read_byte(struct us_data *us, WORD index, void *buf)
{
struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf;
int result;
@@ -67,11 +67,13 @@ int ENE_SMInit(struct us_data *us)
int result;
BYTE buf[0x200];
- printk(KERN_INFO "transport --- ENE_SMInit\n");
+ dev_dbg(&us->pusb_dev->dev, "transport --- ENE_SMInit\n");
result = ENE_LoadBinCode(us, SM_INIT_PATTERN);
if (result != USB_STOR_XFER_GOOD) {
- printk(KERN_INFO "Load SM Init Code Fail !!\n");
+ dev_info(&us->pusb_dev->dev,
+ "Failed to load SmartMedia init code\n: result= %x\n",
+ result);
return USB_STOR_TRANSPORT_ERROR;
}
@@ -84,26 +86,33 @@ int ENE_SMInit(struct us_data *us)
result = ENE_SendScsiCmd(us, FDIR_READ, &buf, 0);
if (result != USB_STOR_XFER_GOOD) {
- printk(KERN_ERR
- "Execution SM Init Code Fail !! result = %x\n", result);
+ dev_err(&us->pusb_dev->dev,
+ "Failed to load SmartMedia init code: result = %x\n",
+ result);
return USB_STOR_TRANSPORT_ERROR;
}
- us->SM_Status = *(PSM_STATUS)&buf[0];
+ us->SM_Status = *(struct keucr_sm_status *)&buf[0];
us->SM_DeviceID = buf[1];
us->SM_CardID = buf[2];
if (us->SM_Status.Insert && us->SM_Status.Ready) {
- printk(KERN_INFO "Insert = %x\n", us->SM_Status.Insert);
- printk(KERN_INFO "Ready = %x\n", us->SM_Status.Ready);
- printk(KERN_INFO "WtP = %x\n", us->SM_Status.WtP);
- printk(KERN_INFO "DeviceID = %x\n", us->SM_DeviceID);
- printk(KERN_INFO "CardID = %x\n", us->SM_CardID);
+ dev_info(&us->pusb_dev->dev, "Insert = %x\n",
+ us->SM_Status.Insert);
+ dev_info(&us->pusb_dev->dev, "Ready = %x\n",
+ us->SM_Status.Ready);
+ dev_info(&us->pusb_dev->dev, "WtP = %x\n",
+ us->SM_Status.WtP);
+ dev_info(&us->pusb_dev->dev, "DeviceID = %x\n",
+ us->SM_DeviceID);
+ dev_info(&us->pusb_dev->dev, "CardID = %x\n",
+ us->SM_CardID);
MediaChange = 1;
Check_D_MediaFmt(us);
} else {
- printk(KERN_ERR "SM Card Not Ready --- %x\n", buf[0]);
+ dev_err(&us->pusb_dev->dev,
+ "SmartMedia Card Not Ready --- %x\n", buf[0]);
return USB_STOR_TRANSPORT_ERROR;
}
@@ -120,7 +129,7 @@ int ENE_LoadBinCode(struct us_data *us, BYTE flag)
/* void *buf; */
PBYTE buf;
- /* printk(KERN_INFO "transport --- ENE_LoadBinCode\n"); */
+ /* dev_info(&us->pusb_dev->dev, "transport --- ENE_LoadBinCode\n"); */
if (us->BIN_FLAG == flag)
return USB_STOR_TRANSPORT_GOOD;
@@ -130,11 +139,11 @@ int ENE_LoadBinCode(struct us_data *us, BYTE flag)
switch (flag) {
/* For SS */
case SM_INIT_PATTERN:
- printk(KERN_INFO "SM_INIT_PATTERN\n");
+ dev_dbg(&us->pusb_dev->dev, "SM_INIT_PATTERN\n");
memcpy(buf, SM_Init, 0x800);
break;
case SM_RW_PATTERN:
- printk(KERN_INFO "SM_RW_PATTERN\n");
+ dev_dbg(&us->pusb_dev->dev, "SM_RW_PATTERN\n");
memcpy(buf, SM_Rdwr, 0x800);
break;
}
@@ -165,12 +174,13 @@ int ENE_SendScsiCmd(struct us_data *us, BYTE fDir, void *buf, int use_sg)
cswlen = 0, partial = 0;
unsigned int residue;
- /* printk(KERN_INFO "transport --- ENE_SendScsiCmd\n"); */
+ /* dev_dbg(&us->pusb_dev->dev, "transport --- ENE_SendScsiCmd\n"); */
/* send cmd to out endpoint */
result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe,
bcb, US_BULK_CB_WRAP_LEN, NULL);
if (result != USB_STOR_XFER_GOOD) {
- printk(KERN_ERR "send cmd to out endpoint fail ---\n");
+ dev_err(&us->pusb_dev->dev,
+ "send cmd to out endpoint fail ---\n");
return USB_STOR_TRANSPORT_ERROR;
}
@@ -189,7 +199,7 @@ int ENE_SendScsiCmd(struct us_data *us, BYTE fDir, void *buf, int use_sg)
result = usb_stor_bulk_transfer_sg(us, pipe, buf,
transfer_length, 0, &partial);
if (result != USB_STOR_XFER_GOOD) {
- printk(KERN_ERR "data transfer fail ---\n");
+ dev_err(&us->pusb_dev->dev, "data transfer fail ---\n");
return USB_STOR_TRANSPORT_ERROR;
}
}
@@ -199,14 +209,16 @@ int ENE_SendScsiCmd(struct us_data *us, BYTE fDir, void *buf, int use_sg)
US_BULK_CS_WRAP_LEN, &cswlen);
if (result == USB_STOR_XFER_SHORT && cswlen == 0) {
- printk(KERN_WARNING "Received 0-length CSW; retrying...\n");
+ dev_warn(&us->pusb_dev->dev,
+ "Received 0-length CSW; retrying...\n");
result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe,
bcs, US_BULK_CS_WRAP_LEN, &cswlen);
}
if (result == USB_STOR_XFER_STALLED) {
/* get the status again */
- printk(KERN_WARNING "Attempting to get CSW (2nd try)...\n");
+ dev_warn(&us->pusb_dev->dev,
+ "Attempting to get CSW (2nd try)...\n");
result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe,
bcs, US_BULK_CS_WRAP_LEN, NULL);
}
@@ -243,7 +255,7 @@ int ENE_Read_Data(struct us_data *us, void *buf, unsigned int length)
struct bulk_cs_wrap *bcs = (struct bulk_cs_wrap *) us->iobuf;
int result;
- /* printk(KERN_INFO "transport --- ENE_Read_Data\n"); */
+ /* dev_dbg(&us->pusb_dev->dev, "transport --- ENE_Read_Data\n"); */
/* set up the command wrapper */
memset(bcb, 0, sizeof(struct bulk_cb_wrap));
bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN);
@@ -318,55 +330,3 @@ int ENE_Write_Data(struct us_data *us, void *buf, unsigned int length)
return USB_STOR_TRANSPORT_GOOD;
}
-/*
- * usb_stor_print_cmd():
- */
-void usb_stor_print_cmd(struct scsi_cmnd *srb)
-{
- PBYTE Cdb = srb->cmnd;
- DWORD cmd = Cdb[0];
- DWORD bn = ((Cdb[2] << 24) & 0xff000000) |
- ((Cdb[3] << 16) & 0x00ff0000) |
- ((Cdb[4] << 8) & 0x0000ff00) |
- ((Cdb[5] << 0) & 0x000000ff);
- WORD blen = ((Cdb[7] << 8) & 0xff00) | ((Cdb[8] << 0) & 0x00ff);
-
- switch (cmd) {
- case TEST_UNIT_READY:
- /* printk(KERN_INFO
- "scsi cmd %X --- SCSIOP_TEST_UNIT_READY\n", cmd); */
- break;
- case INQUIRY:
- printk(KERN_INFO "scsi cmd %X --- SCSIOP_INQUIRY\n", cmd);
- break;
- case MODE_SENSE:
- printk(KERN_INFO "scsi cmd %X --- SCSIOP_MODE_SENSE\n", cmd);
- break;
- case START_STOP:
- printk(KERN_INFO "scsi cmd %X --- SCSIOP_START_STOP\n", cmd);
- break;
- case READ_CAPACITY:
- printk(KERN_INFO "scsi cmd %X --- SCSIOP_READ_CAPACITY\n", cmd);
- break;
- case READ_10:
- /* printk(KERN_INFO
- "scsi cmd %X --- SCSIOP_READ,bn = %X, blen = %X\n"
- ,cmd, bn, blen); */
- break;
- case WRITE_10:
- /* printk(KERN_INFO
- "scsi cmd %X --- SCSIOP_WRITE,
- bn = %X, blen = %X\n" , cmd, bn, blen); */
- break;
- case ALLOW_MEDIUM_REMOVAL:
- printk(KERN_INFO
- "scsi cmd %X --- SCSIOP_ALLOW_MEDIUM_REMOVAL\n", cmd);
- break;
- default:
- printk(KERN_INFO "scsi cmd %X --- Other cmd\n", cmd);
- break;
- }
- bn = 0;
- blen = 0;
-}
-
diff --git a/drivers/staging/keucr/scsiglue.c b/drivers/staging/keucr/scsiglue.c
index 48e1005349da..afb00d84679d 100644
--- a/drivers/staging/keucr/scsiglue.c
+++ b/drivers/staging/keucr/scsiglue.c
@@ -73,7 +73,8 @@ static int slave_configure(struct scsi_device *sdev)
if (us->fflags & US_FL_CAPACITY_HEURISTICS)
sdev->guess_capacity = 1;
if (sdev->scsi_level > SCSI_2)
- sdev->sdev_target->scsi_level = sdev->scsi_level = SCSI_2;
+ sdev->sdev_target->scsi_level = sdev->scsi_level
+ = SCSI_2;
sdev->retry_hwerror = 1;
sdev->allow_restart = 1;
sdev->last_sector_bug = 1;
@@ -144,7 +145,7 @@ static int command_abort(struct scsi_cmnd *srb)
scsi_lock(us_to_host(us));
if (us->srb != srb) {
scsi_unlock(us_to_host(us));
- printk("-- nothing to abort\n");
+ dev_info(&us->pusb_dev->dev, "-- nothing to abort\n");
return FAILED;
}
@@ -319,8 +320,11 @@ static ssize_t store_max_sectors(struct device *dev,
return -EINVAL;
}
-static DEVICE_ATTR(max_sectors, S_IRUGO | S_IWUSR, show_max_sectors, store_max_sectors);
-static struct device_attribute *sysfs_device_attr_list[] = {&dev_attr_max_sectors, NULL, };
+static DEVICE_ATTR(max_sectors, S_IRUGO | S_IWUSR, show_max_sectors,
+ store_max_sectors);
+static struct device_attribute *sysfs_device_attr_list[] = {
+ &dev_attr_max_sectors, NULL,
+};
/* this defines our host template, with which we'll allocate hosts */
@@ -393,8 +397,9 @@ unsigned char usb_stor_sense_invalidCDB[18] = {
/*
* usb_stor_access_xfer_buf()
*/
-unsigned int usb_stor_access_xfer_buf(struct us_data *us, unsigned char *buffer,
- unsigned int buflen, struct scsi_cmnd *srb, struct scatterlist **sgptr,
+unsigned int usb_stor_access_xfer_buf(struct us_data *us,
+ unsigned char *buffer, unsigned int buflen,
+ struct scsi_cmnd *srb, struct scatterlist **sgptr,
unsigned int *offset, enum xfer_buf_dir dir)
{
unsigned int cnt;
@@ -424,7 +429,7 @@ unsigned int usb_stor_access_xfer_buf(struct us_data *us, unsigned char *buffer,
while (sglen > 0) {
unsigned int plen = min(sglen,
- (unsigned int)PAGE_SIZE - poff);
+ (unsigned int)PAGE_SIZE - poff);
unsigned char *ptr = kmap(page);
if (dir == TO_XFER_BUF)
diff --git a/drivers/staging/keucr/smil.h b/drivers/staging/keucr/smil.h
index 24a636a4aa1c..1538d7bd600f 100644
--- a/drivers/staging/keucr/smil.h
+++ b/drivers/staging/keucr/smil.h
@@ -168,7 +168,7 @@ SmartMedia Model & Attribute
/***************************************************************************
Struct Definition
***************************************************************************/
-struct SSFDCTYPE {
+struct keucr_media_info {
BYTE Model;
BYTE Attribute;
BYTE MaxZones;
@@ -177,30 +177,14 @@ struct SSFDCTYPE {
WORD MaxLogBlocks;
};
-typedef struct SSFDCTYPE_T {
- BYTE Model;
- BYTE Attribute;
- BYTE MaxZones;
- BYTE MaxSectors;
- WORD MaxBlocks;
- WORD MaxLogBlocks;
-} *SSFDCTYPE_T;
-
-struct ADDRESS {
+struct keucr_media_address {
BYTE Zone; /* Zone Number */
BYTE Sector; /* Sector(512byte) Number on Block */
WORD PhyBlock; /* Physical Block Number on Zone */
WORD LogBlock; /* Logical Block Number of Zone */
};
-typedef struct ADDRESS_T {
- BYTE Zone; /* Zone Number */
- BYTE Sector; /* Sector(512byte) Number on Block */
- WORD PhyBlock; /* Physical Block Number on Zone */
- WORD LogBlock; /* Logical Block Number of Zone */
-} *ADDRESS_T;
-
-struct CIS_AREA {
+struct keucr_media_area {
BYTE Sector; /* Sector(512byte) Number on Block */
WORD PhyBlock; /* Physical Block Number on Zone 0 */
};
@@ -215,9 +199,9 @@ extern WORD ReadBlock;
extern WORD WriteBlock;
extern DWORD MediaChange;
-extern struct SSFDCTYPE Ssfdc;
-extern struct ADDRESS Media;
-extern struct CIS_AREA CisArea;
+extern struct keucr_media_info Ssfdc;
+extern struct keucr_media_address Media;
+extern struct keucr_media_area CisArea;
/*
* SMILMain.c
diff --git a/drivers/staging/keucr/smilmain.c b/drivers/staging/keucr/smilmain.c
index cc49038e55d6..2786808fde9f 100644
--- a/drivers/staging/keucr/smilmain.c
+++ b/drivers/staging/keucr/smilmain.c
@@ -4,204 +4,135 @@
#include "smcommon.h"
#include "smil.h"
-int Check_D_LogCHS (WORD *,BYTE *,BYTE *);
-void Initialize_D_Media (void);
-void PowerOff_D_Media (void);
-int Check_D_MediaPower (void);
-int Check_D_MediaExist (void);
-int Check_D_MediaWP (void);
-int Check_D_MediaFmt (struct us_data *);
-int Check_D_MediaFmtForEraseAll (struct us_data *);
-int Conv_D_MediaAddr (struct us_data *, DWORD);
-int Inc_D_MediaAddr (struct us_data *);
-int Check_D_FirstSect (void);
-int Check_D_LastSect (void);
-int Media_D_ReadOneSect (struct us_data *, WORD, BYTE *);
-int Media_D_WriteOneSect (struct us_data *, WORD, BYTE *);
-int Media_D_CopyBlockHead (struct us_data *);
-int Media_D_CopyBlockTail (struct us_data *);
-int Media_D_EraseOneBlock (void);
-int Media_D_EraseAllBlock (void);
-
-int Copy_D_BlockAll (struct us_data *, DWORD);
-int Copy_D_BlockHead (struct us_data *);
-int Copy_D_BlockTail (struct us_data *);
-int Reassign_D_BlockHead (struct us_data *);
-
-int Assign_D_WriteBlock (void);
-int Release_D_ReadBlock (struct us_data *);
-int Release_D_WriteBlock (struct us_data *);
-int Release_D_CopySector (struct us_data *);
-
-int Copy_D_PhyOneSect (struct us_data *);
-int Read_D_PhyOneSect (struct us_data *, WORD, BYTE *);
-int Write_D_PhyOneSect (struct us_data *, WORD, BYTE *);
-int Erase_D_PhyOneBlock (struct us_data *);
-
-int Set_D_PhyFmtValue (struct us_data *);
-int Search_D_CIS (struct us_data *);
-int Make_D_LogTable (struct us_data *);
-void Check_D_BlockIsFull (void);
-
-int MarkFail_D_PhyOneBlock (struct us_data *);
+int Check_D_LogCHS(WORD *, BYTE *, BYTE *);
+void Initialize_D_Media(void);
+void PowerOff_D_Media(void);
+int Check_D_MediaPower(void);
+int Check_D_MediaExist(void);
+int Check_D_MediaWP(void);
+int Check_D_MediaFmt(struct us_data *);
+int Check_D_MediaFmtForEraseAll(struct us_data *);
+int Conv_D_MediaAddr(struct us_data *, DWORD);
+int Inc_D_MediaAddr(struct us_data *);
+int Check_D_FirstSect(void);
+int Check_D_LastSect(void);
+int Media_D_ReadOneSect(struct us_data *, WORD, BYTE *);
+int Media_D_WriteOneSect(struct us_data *, WORD, BYTE *);
+int Media_D_CopyBlockHead(struct us_data *);
+int Media_D_CopyBlockTail(struct us_data *);
+int Media_D_EraseOneBlock(void);
+int Media_D_EraseAllBlock(void);
+
+int Copy_D_BlockAll(struct us_data *, DWORD);
+int Copy_D_BlockHead(struct us_data *);
+int Copy_D_BlockTail(struct us_data *);
+int Reassign_D_BlockHead(struct us_data *);
+
+int Assign_D_WriteBlock(void);
+int Release_D_ReadBlock(struct us_data *);
+int Release_D_WriteBlock(struct us_data *);
+int Release_D_CopySector(struct us_data *);
+
+int Copy_D_PhyOneSect(struct us_data *);
+int Read_D_PhyOneSect(struct us_data *, WORD, BYTE *);
+int Write_D_PhyOneSect(struct us_data *, WORD, BYTE *);
+int Erase_D_PhyOneBlock(struct us_data *);
+
+int Set_D_PhyFmtValue(struct us_data *);
+int Search_D_CIS(struct us_data *);
+int Make_D_LogTable(struct us_data *);
+void Check_D_BlockIsFull(void);
+
+int MarkFail_D_PhyOneBlock(struct us_data *);
DWORD ErrXDCode;
DWORD ErrCode;
-//BYTE SectBuf[SECTSIZE];
static BYTE WorkBuf[SECTSIZE];
static BYTE Redundant[REDTSIZE];
static BYTE WorkRedund[REDTSIZE];
-//WORD Log2Phy[MAX_ZONENUM][MAX_LOGBLOCK];
-static WORD *Log2Phy[MAX_ZONENUM]; // 128 x 1000, Log2Phy[MAX_ZONENUM][MAX_LOGBLOCK];
-static BYTE Assign[MAX_ZONENUM][MAX_BLOCKNUM/8];
+/* 128 x 1000, Log2Phy[MAX_ZONENUM][MAX_LOGBLOCK]; */
+static WORD *Log2Phy[MAX_ZONENUM];
+static BYTE Assign[MAX_ZONENUM][MAX_BLOCKNUM / 8];
static WORD AssignStart[MAX_ZONENUM];
WORD ReadBlock;
WORD WriteBlock;
DWORD MediaChange;
static DWORD SectCopyMode;
-//BIT Control Macro
-static BYTE BitData[] = { 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80 } ;
-#define Set_D_Bit(a,b) (a[(BYTE)((b)/8)]|= BitData[(b)%8])
-#define Clr_D_Bit(a,b) (a[(BYTE)((b)/8)]&=~BitData[(b)%8])
-#define Chk_D_Bit(a,b) (a[(BYTE)((b)/8)] & BitData[(b)%8])
+/* BIT Control Macro */
+static BYTE BitData[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 };
+#define Set_D_Bit(a, b) (a[(BYTE)((b) / 8)] |= BitData[(b) % 8])
+#define Clr_D_Bit(a, b) (a[(BYTE)((b) / 8)] &= ~BitData[(b) % 8])
+#define Chk_D_Bit(a, b) (a[(BYTE)((b) / 8)] & BitData[(b) % 8])
-//extern PBYTE SMHostAddr;
BYTE IsSSFDCCompliance;
BYTE IsXDCompliance;
-//
-////Power Control & Media Exist Check Function
-////----- Init_D_SmartMedia() --------------------------------------------
-//int Init_D_SmartMedia(void)
-//{
-// int i;
-//
-// EMCR_Print("Init_D_SmartMedia start\n");
-// for (i=0; i<MAX_ZONENUM; i++)
-// {
-// if (Log2Phy[i]!=NULL)
-// {
-// EMCR_Print("ExFreePool Zone = %x, Addr = %x\n", i, Log2Phy[i]);
-// ExFreePool(Log2Phy[i]);
-// Log2Phy[i] = NULL;
-// }
-// }
-//
-// Initialize_D_Media();
-// return(NO_ERROR);
-//}
-
-//----- SM_FreeMem() -------------------------------------------------
+/* ----- SM_FreeMem() ------------------------------------------------- */
int SM_FreeMem(void)
{
int i;
pr_info("SM_FreeMem start\n");
- for (i=0; i<MAX_ZONENUM; i++)
- {
- if (Log2Phy[i]!=NULL)
- {
+ for (i = 0; i < MAX_ZONENUM; i++) {
+ if (Log2Phy[i] != NULL) {
pr_info("Free Zone = %x, Addr = %p\n", i, Log2Phy[i]);
kfree(Log2Phy[i]);
Log2Phy[i] = NULL;
}
}
- return(NO_ERROR);
+ return NO_ERROR;
}
-////----- Pwoff_D_SmartMedia() -------------------------------------------
-//int Pwoff_D_SmartMedia(void)
-//{
-// PowerOff_D_Media();
-// return(NO_ERROR);
-//}
-//
-////----- Check_D_SmartMedia() -------------------------------------------
-//int Check_D_SmartMedia(void)
-//{
-// if (Check_D_MediaExist())
-// return(ErrCode);
-//
-// return(NO_ERROR);
-//}
-//
-////----- Check_D_Parameter() --------------------------------------------
-//int Check_D_Parameter(PFDO_DEVICE_EXTENSION fdoExt,WORD *pcyl,BYTE *phead,BYTE *psect)
-//{
-// if (Check_D_MediaPower())
-// return(ErrCode);
-//
-// if (Check_D_MediaFmt(fdoExt))
-// return(ErrCode);
-//
-// if (Check_D_LogCHS(pcyl,phead,psect))
-// return(ErrCode);
-//
-// return(NO_ERROR);
-//}
-
-//SmartMedia Read/Write/Erase Function
-//----- Media_D_ReadSector() -------------------------------------------
-int Media_D_ReadSector(struct us_data *us, DWORD start,WORD count,BYTE *buf)
+/* SmartMedia Read/Write/Erase Function */
+/* ----- Media_D_ReadSector() ------------------------------------------- */
+int Media_D_ReadSector(struct us_data *us, DWORD start, WORD count, BYTE *buf)
{
WORD len, bn;
- //if (Check_D_MediaPower()) ; ¦b 6250 don't care
- // return(ErrCode);
- //if (Check_D_MediaFmt(fdoExt)) ;
- // return(ErrCode);
if (Conv_D_MediaAddr(us, start))
- return(ErrCode);
+ return ErrCode;
- while(1)
- {
+ while (1) {
len = Ssfdc.MaxSectors - Media.Sector;
if (count > len)
bn = len;
else
bn = count;
- //if (Media_D_ReadOneSect(fdoExt, SectBuf))
- //if (Media_D_ReadOneSect(fdoExt, count, buf))
- if (Media_D_ReadOneSect(us, bn, buf))
- {
+
+ if (Media_D_ReadOneSect(us, bn, buf)) {
ErrCode = ERR_EccReadErr;
- return(ErrCode);
+ return ErrCode;
}
Media.Sector += bn;
count -= bn;
- if (count<=0)
+ if (count <= 0)
break;
buf += bn * SECTSIZE;
if (Inc_D_MediaAddr(us))
- return(ErrCode);
+ return ErrCode;
}
- return(NO_ERROR);
+ return NO_ERROR;
}
-// here
-//----- Media_D_CopySector() ------------------------------------------
-int Media_D_CopySector(struct us_data *us, DWORD start,WORD count,BYTE *buf)
+/* here */
+/* ----- Media_D_CopySector() ------------------------------------------ */
+int Media_D_CopySector(struct us_data *us, DWORD start, WORD count, BYTE *buf)
{
- //DWORD mode;
- //int i;
WORD len, bn;
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
/* pr_info("Media_D_CopySector !!!\n"); */
if (Conv_D_MediaAddr(us, start))
- return(ErrCode);
+ return ErrCode;
- while(1)
- {
+ while (1) {
if (Assign_D_WriteBlock())
- return(ERROR);
+ return ERROR;
len = Ssfdc.MaxSectors - Media.Sector;
if (count > len)
@@ -209,607 +140,137 @@ int Media_D_CopySector(struct us_data *us, DWORD start,WORD count,BYTE *buf)
else
bn = count;
- //if (Ssfdc_D_CopyBlock(fdoExt,count,buf,Redundant))
- if (Ssfdc_D_CopyBlock(us,bn,buf,Redundant))
- {
+ if (Ssfdc_D_CopyBlock(us, bn, buf, Redundant)) {
ErrCode = ERR_WriteFault;
- return(ErrCode);
+ return ErrCode;
}
Media.Sector = 0x1F;
- //if (Release_D_ReadBlock(fdoExt))
- if (Release_D_CopySector(us))
- {
- if (ErrCode==ERR_HwError)
- {
+ if (Release_D_CopySector(us)) {
+ if (ErrCode == ERR_HwError) {
ErrCode = ERR_WriteFault;
- return(ErrCode);
+ return ErrCode;
}
}
count -= bn;
- if (count<=0)
+ if (count <= 0)
break;
buf += bn * SECTSIZE;
if (Inc_D_MediaAddr(us))
- return(ErrCode);
+ return ErrCode;
}
- return(NO_ERROR);
+ return NO_ERROR;
}
-//----- Release_D_CopySector() ------------------------------------------
+/* ----- Release_D_CopySector() ------------------------------------------ */
int Release_D_CopySector(struct us_data *us)
{
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
-
- Log2Phy[Media.Zone][Media.LogBlock]=WriteBlock;
- Media.PhyBlock=ReadBlock;
+ Log2Phy[Media.Zone][Media.LogBlock] = WriteBlock;
+ Media.PhyBlock = ReadBlock;
- if (Media.PhyBlock==NO_ASSIGN)
- {
- Media.PhyBlock=WriteBlock;
- return(SMSUCCESS);
+ if (Media.PhyBlock == NO_ASSIGN) {
+ Media.PhyBlock = WriteBlock;
+ return SMSUCCESS;
}
- Clr_D_Bit(Assign[Media.Zone],Media.PhyBlock);
- Media.PhyBlock=WriteBlock;
+ Clr_D_Bit(Assign[Media.Zone], Media.PhyBlock);
+ Media.PhyBlock = WriteBlock;
- return(SMSUCCESS);
-}
-/*
-//----- Media_D_WriteSector() ------------------------------------------
-int Media_D_WriteSector(PFDO_DEVICE_EXTENSION fdoExt, DWORD start,WORD count,BYTE *buf)
-{
- int i;
- WORD len, bn;
- SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- ADDRESS_T bb = (ADDRESS_T) &Media;
-
- //if (Check_D_MediaPower())
- // return(ErrCode);
- //
- //if (Check_D_MediaFmt(fdoExt))
- // return(ErrCode);
- //
- //if (Check_D_MediaWP())
- // return(ErrCode);
-
- if (Conv_D_MediaAddr(fdoExt, start))
- return(ErrCode);
-
- //ENE_Print("Media_D_WriteSector --- Sector = %x\n", Media.Sector);
- if (Check_D_FirstSect())
- {
- if (Media_D_CopyBlockHead(fdoExt))
- {
- ErrCode = ERR_WriteFault;
- return(ErrCode);
- }
- }
-
- while(1)
- {
- if (!Check_D_FirstSect())
- {
- if (Assign_D_WriteBlock())
- return(ErrCode);
- }
-
- len = Ssfdc.MaxSectors - Media.Sector;
- if (count > len)
- bn = len;
- else
- bn = count;
- //for(i=0;i<SECTSIZE;i++)
- // SectBuf[i]=*buf++;
-
- //if (Media_D_WriteOneSect(fdoExt, SectBuf))
- if (Media_D_WriteOneSect(fdoExt, bn, buf))
- {
- ErrCode = ERR_WriteFault;
- return(ErrCode);
- }
-
- Media.Sector += bn - 1;
-
- if (!Check_D_LastSect())
- {
- if (Release_D_ReadBlock(fdoExt))
-
- { if (ErrCode==ERR_HwError)
- {
- ErrCode = ERR_WriteFault;
- return(ErrCode);
- }
- }
- }
-
- count -= bn;
-
- if (count<=0)
- break;
-
- buf += bn * SECTSIZE;
-
- //if (--count<=0)
- // break;
-
- if (Inc_D_MediaAddr(fdoExt))
- return(ErrCode);
- }
-
- if (!Check_D_LastSect())
- return(NO_ERROR);
-
- if (Inc_D_MediaAddr(fdoExt))
- return(ErrCode);
-
- if (Media_D_CopyBlockTail(fdoExt))
- {
- ErrCode = ERR_WriteFault;
- return(ErrCode);
- }
-
- return(NO_ERROR);
+ return SMSUCCESS;
}
-//
-////----- Media_D_EraseBlock() -------------------------------------------
-//int Media_D_EraseBlock(PFDO_DEVICE_EXTENSION fdoExt, DWORD start,WORD count)
-//{
-// if (Check_D_MediaPower())
-// return(ErrCode);
-//
-// if (Check_D_MediaFmt(fdoExt))
-// return(ErrCode);
-//
-// if (Check_D_MediaWP())
-// return(ErrCode);
-//
-// if (Conv_D_MediaAddr(start))
-// return(ErrCode);
-//
-// while(Check_D_FirstSect()) {
-// if (Inc_D_MediaAddr(fdoExt))
-// return(ErrCode);
-//
-// if (--count<=0)
-// return(NO_ERROR);
-// }
-//
-// while(1) {
-// if (!Check_D_LastSect())
-// if (Media_D_EraseOneBlock())
-// if (ErrCode==ERR_HwError)
-// {
-// ErrCode = ERR_WriteFault;
-// return(ErrCode);
-// }
-//
-// if (Inc_D_MediaAddr(fdoExt))
-// return(ErrCode);
-//
-// if (--count<=0)
-// return(NO_ERROR);
-// }
-//}
-//
-////----- Media_D_EraseAll() ---------------------------------------------
-//int Media_D_EraseAll(PFDO_DEVICE_EXTENSION fdoExt)
-//{
-// if (Check_D_MediaPower())
-// return(ErrCode);
-//
-// if (Check_D_MediaFmtForEraseAll(fdoExt))
-// return(ErrCode);
-//
-// if (Check_D_MediaWP())
-// return(ErrCode);
-//
-// if (Media_D_EraseAllBlock())
-// return(ErrCode);
-//
-// return(NO_ERROR);
-//}
-
-//SmartMedia Write Function for One Sector Write Mode
-//----- Media_D_OneSectWriteStart() ------------------------------------
-int Media_D_OneSectWriteStart(PFDO_DEVICE_EXTENSION fdoExt,DWORD start,BYTE *buf)
-{
-// int i;
-// SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
-// ADDRESS_T bb = (ADDRESS_T) &Media;
-//
-// //if (Check_D_MediaPower())
-// // return(ErrCode);
-// //if (Check_D_MediaFmt(fdoExt))
-// // return(ErrCode);
-// //if (Check_D_MediaWP())
-// // return(ErrCode);
-// if (Conv_D_MediaAddr(fdoExt, start))
-// return(ErrCode);
-//
-// if (Check_D_FirstSect())
-// if (Media_D_CopyBlockHead(fdoExt))
-// {
-// ErrCode = ERR_WriteFault;
-// return(ErrCode);
-// }
-//
-// if (!Check_D_FirstSect())
-// if (Assign_D_WriteBlock())
-// return(ErrCode);
-//
-// //for(i=0;i<SECTSIZE;i++)
-// // SectBuf[i]=*buf++;
-//
-// //if (Media_D_WriteOneSect(fdoExt, SectBuf))
-// if (Media_D_WriteOneSect(fdoExt, buf))
-// {
-// ErrCode = ERR_WriteFault;
-// return(ErrCode);
-// }
-//
-// if (!Check_D_LastSect())
-// {
-// if (Release_D_ReadBlock(fdoExt))
-// if (ErrCode==ERR_HwError)
-// {
-// ErrCode = ERR_WriteFault;
-// return(ErrCode);
-// }
-// }
-
- return(NO_ERROR);
-}
-
-//----- Media_D_OneSectWriteNext() -------------------------------------
-int Media_D_OneSectWriteNext(PFDO_DEVICE_EXTENSION fdoExt, BYTE *buf)
-{
-// int i;
-// SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
-// ADDRESS_T bb = (ADDRESS_T) &Media;
-//
-// if (Inc_D_MediaAddr(fdoExt))
-// return(ErrCode);
-//
-// if (!Check_D_FirstSect())
-// if (Assign_D_WriteBlock())
-// return(ErrCode);
-//
-// //for(i=0;i<SECTSIZE;i++)
-// // SectBuf[i]=*buf++;
-//
-// //if (Media_D_WriteOneSect(fdoExt, SectBuf))
-// if (Media_D_WriteOneSect(fdoExt, buf))
-// {
-// ErrCode = ERR_WriteFault;
-// return(ErrCode);
-// }
-//
-// if (!Check_D_LastSect())
-// {
-// if (Release_D_ReadBlock(fdoExt))
-// if (ErrCode==ERR_HwError)
-// {
-// ErrCode = ERR_WriteFault;
-// return(ErrCode);
-// }
-// }
-
- return(NO_ERROR);
-}
-
-//----- Media_D_OneSectWriteFlush() ------------------------------------
-int Media_D_OneSectWriteFlush(PFDO_DEVICE_EXTENSION fdoExt)
-{
- if (!Check_D_LastSect())
- return(NO_ERROR);
-
- if (Inc_D_MediaAddr(fdoExt))
- return(ErrCode);
-
- if (Media_D_CopyBlockTail(fdoExt))
- {
- ErrCode = ERR_WriteFault;
- return(ErrCode);
- }
- return(NO_ERROR);
-}
-//
-////LED Tern On/Off Subroutine
-////----- SM_EnableLED() -----------------------------------------------
-//void SM_EnableLED(PFDO_DEVICE_EXTENSION fdoExt, BOOLEAN enable)
-//{
-// if (fdoExt->Drive_IsSWLED)
-// {
-// if (enable)
-// Led_D_TernOn();
-// else
-// Led_D_TernOff();
-// }
-//}
-//
-////----- Led_D_TernOn() -------------------------------------------------
-//void Led_D_TernOn(void)
-//{
-// if (Check_D_CardStsChg())
-// MediaChange=ERROR;
-//
-// Cnt_D_LedOn();
-//}
-//
-////----- Led_D_TernOff() ------------------------------------------------
-//void Led_D_TernOff(void)
-//{
-// if (Check_D_CardStsChg())
-// MediaChange=ERROR;
-//
-// Cnt_D_LedOff();
-//}
-//
-////SmartMedia Logical Format Subroutine
-////----- Check_D_LogCHS() -----------------------------------------------
-//int Check_D_LogCHS(WORD *c,BYTE *h,BYTE *s)
-//{
-// switch(Ssfdc.Model) {
-// case SSFDC1MB: *c=125; *h= 4; *s= 4; break;
-// case SSFDC2MB: *c=125; *h= 4; *s= 8; break;
-// case SSFDC4MB: *c=250; *h= 4; *s= 8; break;
-// case SSFDC8MB: *c=250; *h= 4; *s=16; break;
-// case SSFDC16MB: *c=500; *h= 4; *s=16; break;
-// case SSFDC32MB: *c=500; *h= 8; *s=16; break;
-// case SSFDC64MB: *c=500; *h= 8; *s=32; break;
-// case SSFDC128MB: *c=500; *h=16; *s=32; break;
-// default: *c= 0; *h= 0; *s= 0; ErrCode = ERR_NoSmartMedia; return(ERROR);
-// }
-//
-// return(SMSUCCESS);
-//}
-//
-////Power Control & Media Exist Check Subroutine
-////----- Initialize_D_Media() -------------------------------------------
-//void Initialize_D_Media(void)
-//{
-// ErrCode = NO_ERROR;
-// MediaChange = ERROR;
-// SectCopyMode = COMPLETED;
-// Cnt_D_Reset();
-//}
-//
-////----- PowerOff_D_Media() ---------------------------------------------
-//void PowerOff_D_Media(void)
-//{
-// Cnt_D_PowerOff();
-//}
-//
-////----- Check_D_MediaPower() -------------------------------------------
-//int Check_D_MediaPower(void)
-//{
-// //usleep(56*1024);
-// if (Check_D_CardStsChg())
-// MediaChange = ERROR;
-// //usleep(56*1024);
-// if ((!Check_D_CntPower())&&(!MediaChange)) // ¦³ power & Media ¨S³Q change, «h return success
-// return(SMSUCCESS);
-// //usleep(56*1024);
-//
-// if (Check_D_CardExist()) // Check if card is not exist, return err
-// {
-// ErrCode = ERR_NoSmartMedia;
-// MediaChange = ERROR;
-// return(ERROR);
-// }
-// //usleep(56*1024);
-// if (Cnt_D_PowerOn())
-// {
-// ErrCode = ERR_NoSmartMedia;
-// MediaChange = ERROR;
-// return(ERROR);
-// }
-// //usleep(56*1024);
-// Ssfdc_D_Reset(fdoExt);
-// //usleep(56*1024);
-// return(SMSUCCESS);
-//}
-//
-////-----Check_D_MediaExist() --------------------------------------------
-//int Check_D_MediaExist(void)
-//{
-// if (Check_D_CardStsChg())
-// MediaChange = ERROR;
-//
-// if (!Check_D_CardExist())
-// {
-// if (!MediaChange)
-// return(SMSUCCESS);
-//
-// ErrCode = ERR_ChangedMedia;
-// return(ERROR);
-// }
-//
-// ErrCode = ERR_NoSmartMedia;
-//
-// return(ERROR);
-//}
-//
-////----- Check_D_MediaWP() ----------------------------------------------
-//int Check_D_MediaWP(void)
-//{
-// if (Ssfdc.Attribute &MWP)
-// {
-// ErrCode = ERR_WrtProtect;
-// return(ERROR);
-// }
-//
-// return(SMSUCCESS);
-//}
-*/
-//SmartMedia Physical Format Test Subroutine
-//----- Check_D_MediaFmt() ---------------------------------------------
+/* SmartMedia Physical Format Test Subroutine */
+/* ----- Check_D_MediaFmt() --------------------------------------------- */
int Check_D_MediaFmt(struct us_data *us)
{
pr_info("Check_D_MediaFmt\n");
- //ULONG i,j, result=FALSE, zone,block;
- //usleep(56*1024);
if (!MediaChange)
- return(SMSUCCESS);
+ return SMSUCCESS;
MediaChange = ERROR;
SectCopyMode = COMPLETED;
- //usleep(56*1024);
- if (Set_D_PhyFmtValue(us))
- {
+ if (Set_D_PhyFmtValue(us)) {
ErrCode = ERR_UnknownMedia;
- return(ERROR);
+ return ERROR;
}
-
- //usleep(56*1024);
- if (Search_D_CIS(us))
- {
+
+ if (Search_D_CIS(us)) {
ErrCode = ERR_IllegalFmt;
- return(ERROR);
+ return ERROR;
}
-
- MediaChange = SMSUCCESS;
- return(SMSUCCESS);
+ MediaChange = SMSUCCESS;
+ return SMSUCCESS;
}
-/*
-////----- Check_D_BlockIsFull() ----------------------------------
-//void Check_D_BlockIsFull()
-//{
-// ULONG i, block;
-//
-// if (IsXDCompliance || IsSSFDCCompliance)
-// {
-// // If the blocks are full then return write-protect.
-// block = Ssfdc.MaxBlocks/8;
-// for (Media.Zone=0; Media.Zone<Ssfdc.MaxZones; Media.Zone++)
-// {
-// if (Log2Phy[Media.Zone]==NULL)
-// {
-// if (Make_D_LogTable())
-// {
-// ErrCode = ERR_IllegalFmt;
-// return;
-// }
-// }
-//
-// for (i=0; i<block; i++)
-// {
-// if (Assign[Media.Zone][i] != 0xFF)
-// return;
-// }
-// }
-// Ssfdc.Attribute |= WP;
-// }
-//}
-//
-//
-////----- Check_D_MediaFmtForEraseAll() ----------------------------------
-//int Check_D_MediaFmtForEraseAll(PFDO_DEVICE_EXTENSION fdoExt)
-//{
-// MediaChange = ERROR;
-// SectCopyMode = COMPLETED;
-//
-// if (Set_D_PhyFmtValue(fdoExt))
-// {
-// ErrCode = ERR_UnknownMedia;
-// return(ERROR);
-// }
-//
-// if (Search_D_CIS(fdoExt))
-// {
-// ErrCode = ERR_IllegalFmt;
-// return(ERROR);
-// }
-//
-// return(SMSUCCESS);
-//}
-*/
-//SmartMedia Physical Address Control Subroutine
-//----- Conv_D_MediaAddr() ---------------------------------------------
+
+/* SmartMedia Physical Address Control Subroutine */
+/* ----- Conv_D_MediaAddr() --------------------------------------------- */
int Conv_D_MediaAddr(struct us_data *us, DWORD addr)
{
DWORD temp;
- //ULONG zz;
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
- temp = addr/Ssfdc.MaxSectors;
- Media.Zone = (BYTE) (temp/Ssfdc.MaxLogBlocks);
+ temp = addr / Ssfdc.MaxSectors;
+ Media.Zone = (BYTE) (temp / Ssfdc.MaxLogBlocks);
- if (Log2Phy[Media.Zone]==NULL)
- {
- if (Make_D_LogTable(us))
- {
+ if (Log2Phy[Media.Zone] == NULL) {
+ if (Make_D_LogTable(us)) {
ErrCode = ERR_IllegalFmt;
- return(ERROR);
+ return ERROR;
}
}
- Media.Sector = (BYTE) (addr%Ssfdc.MaxSectors);
- Media.LogBlock = (WORD) (temp%Ssfdc.MaxLogBlocks);
+ Media.Sector = (BYTE) (addr % Ssfdc.MaxSectors);
+ Media.LogBlock = (WORD) (temp % Ssfdc.MaxLogBlocks);
- if (Media.Zone<Ssfdc.MaxZones)
- {
+ if (Media.Zone < Ssfdc.MaxZones) {
Clr_D_RedundantData(Redundant);
Set_D_LogBlockAddr(Redundant);
Media.PhyBlock = Log2Phy[Media.Zone][Media.LogBlock];
- return(SMSUCCESS);
+ return SMSUCCESS;
}
ErrCode = ERR_OutOfLBA;
- return(ERROR);
+ return ERROR;
}
-//----- Inc_D_MediaAddr() ----------------------------------------------
+/* ----- Inc_D_MediaAddr() ---------------------------------------------- */
int Inc_D_MediaAddr(struct us_data *us)
{
WORD LogBlock = Media.LogBlock;
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
- if (++Media.Sector<Ssfdc.MaxSectors)
- return(SMSUCCESS);
+ if (++Media.Sector < Ssfdc.MaxSectors)
+ return SMSUCCESS;
- if (Log2Phy[Media.Zone]==NULL)
- {
- if (Make_D_LogTable(us))
- {
+ if (Log2Phy[Media.Zone] == NULL) {
+ if (Make_D_LogTable(us)) {
ErrCode = ERR_IllegalFmt;
- return(ERROR);
+ return ERROR;
}
}
- Media.Sector=0;
+ Media.Sector = 0;
Media.LogBlock = LogBlock;
- if (++Media.LogBlock<Ssfdc.MaxLogBlocks)
- {
+ if (++Media.LogBlock < Ssfdc.MaxLogBlocks) {
Clr_D_RedundantData(Redundant);
Set_D_LogBlockAddr(Redundant);
- Media.PhyBlock=Log2Phy[Media.Zone][Media.LogBlock];
- return(SMSUCCESS);
+ Media.PhyBlock = Log2Phy[Media.Zone][Media.LogBlock];
+ return SMSUCCESS;
}
- Media.LogBlock=0;
+ Media.LogBlock = 0;
- if (++Media.Zone<Ssfdc.MaxZones)
- {
- if (Log2Phy[Media.Zone]==NULL)
- {
- if (Make_D_LogTable(us))
- {
+ if (++Media.Zone < Ssfdc.MaxZones) {
+ if (Log2Phy[Media.Zone] == NULL) {
+ if (Make_D_LogTable(us)) {
ErrCode = ERR_IllegalFmt;
- return(ERROR);
+ return ERROR;
}
}
@@ -817,1034 +278,508 @@ int Inc_D_MediaAddr(struct us_data *us)
Clr_D_RedundantData(Redundant);
Set_D_LogBlockAddr(Redundant);
- Media.PhyBlock=Log2Phy[Media.Zone][Media.LogBlock];
- return(SMSUCCESS);
+ Media.PhyBlock = Log2Phy[Media.Zone][Media.LogBlock];
+ return SMSUCCESS;
}
- Media.Zone=0;
+ Media.Zone = 0;
ErrCode = ERR_OutOfLBA;
- return(ERROR);
+ return ERROR;
}
-/*
-//----- Check_D_FirstSect() --------------------------------------------
-int Check_D_FirstSect(void)
-{
- SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- ADDRESS_T bb = (ADDRESS_T) &Media;
-
- if (!Media.Sector)
- return(SMSUCCESS);
-
- return(ERROR);
-}
-
-//----- Check_D_LastSect() ---------------------------------------------
-int Check_D_LastSect(void)
-{
- SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- ADDRESS_T bb = (ADDRESS_T) &Media;
- if (Media.Sector<(Ssfdc.MaxSectors-1))
- return(ERROR);
-
- return(SMSUCCESS);
-}
-*/
-//SmartMedia Read/Write Subroutine with Retry
-//----- Media_D_ReadOneSect() ------------------------------------------
+/* SmartMedia Read/Write Subroutine with Retry */
+/* ----- Media_D_ReadOneSect() ------------------------------------------ */
int Media_D_ReadOneSect(struct us_data *us, WORD count, BYTE *buf)
{
DWORD err, retry;
if (!Read_D_PhyOneSect(us, count, buf))
- return(SMSUCCESS);
- if (ErrCode==ERR_HwError)
- return(ERROR);
- if (ErrCode==ERR_DataStatus)
- return(ERROR);
+ return SMSUCCESS;
+ if (ErrCode == ERR_HwError)
+ return ERROR;
+ if (ErrCode == ERR_DataStatus)
+ return ERROR;
#ifdef RDERR_REASSIGN
- if (Ssfdc.Attribute &MWP)
- {
- if (ErrCode==ERR_CorReadErr)
- return(SMSUCCESS);
- return(ERROR);
+ if (Ssfdc.Attribute & MWP) {
+ if (ErrCode == ERR_CorReadErr)
+ return SMSUCCESS;
+ return ERROR;
}
- err=ErrCode;
- for(retry=0; retry<2; retry++)
- {
- if (Copy_D_BlockAll(us, (err==ERR_EccReadErr)?REQ_FAIL:REQ_ERASE))
- {
- if (ErrCode==ERR_HwError)
- return(ERROR);
+ err = ErrCode;
+ for (retry = 0; retry < 2; retry++) {
+ if (Copy_D_BlockAll(us,
+ (err == ERR_EccReadErr) ? REQ_FAIL : REQ_ERASE)) {
+ if (ErrCode == ERR_HwError)
+ return ERROR;
continue;
}
ErrCode = err;
- if (ErrCode==ERR_CorReadErr)
- return(SMSUCCESS);
- return(ERROR);
+ if (ErrCode == ERR_CorReadErr)
+ return SMSUCCESS;
+ return ERROR;
}
MediaChange = ERROR;
#else
- if (ErrCode==ERR_CorReadErr) return(SMSUCCESS);
+ if (ErrCode == ERR_CorReadErr)
+ return SMSUCCESS;
#endif
- return(ERROR);
-}
-/*
-//----- Media_D_WriteOneSect() -----------------------------------------
-int Media_D_WriteOneSect(PFDO_DEVICE_EXTENSION fdoExt, WORD count, BYTE *buf)
-{
- DWORD retry;
- SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- ADDRESS_T bb = (ADDRESS_T) &Media;
-
- if (!Write_D_PhyOneSect(fdoExt, count, buf))
- return(SMSUCCESS);
- if (ErrCode==ERR_HwError)
- return(ERROR);
-
- for(retry=1; retry<2; retry++)
- {
- if (Reassign_D_BlockHead(fdoExt))
- {
- if (ErrCode==ERR_HwError)
- return(ERROR);
- continue;
- }
-
- if (!Write_D_PhyOneSect(fdoExt, count, buf))
- return(SMSUCCESS);
- if (ErrCode==ERR_HwError)
- return(ERROR);
- }
-
- if (Release_D_WriteBlock(fdoExt))
- return(ERROR);
-
- ErrCode = ERR_WriteFault;
- MediaChange = ERROR;
- return(ERROR);
-}
-
-//SmartMedia Data Copy Subroutine with Retry
-//----- Media_D_CopyBlockHead() ----------------------------------------
-int Media_D_CopyBlockHead(PFDO_DEVICE_EXTENSION fdoExt)
-{
- DWORD retry;
-
- for(retry=0; retry<2; retry++)
- {
- if (!Copy_D_BlockHead(fdoExt))
- return(SMSUCCESS);
- if (ErrCode==ERR_HwError)
- return(ERROR);
- }
-
- MediaChange = ERROR;
- return(ERROR);
+ return ERROR;
}
-//----- Media_D_CopyBlockTail() ----------------------------------------
-int Media_D_CopyBlockTail(PFDO_DEVICE_EXTENSION fdoExt)
-{
- DWORD retry;
-
- if (!Copy_D_BlockTail(fdoExt))
- return(SMSUCCESS);
- if (ErrCode==ERR_HwError)
- return(ERROR);
-
- for(retry=1; retry<2; retry++)
- {
- if (Reassign_D_BlockHead(fdoExt))
- {
- if (ErrCode==ERR_HwError)
- return(ERROR);
- continue;
- }
-
- if (!Copy_D_BlockTail(fdoExt))
- return(SMSUCCESS);
- if (ErrCode==ERR_HwError)
- return(ERROR);
- }
-
- if (Release_D_WriteBlock(fdoExt))
- return(ERROR);
-
- ErrCode = ERR_WriteFault;
- MediaChange = ERROR;
- return(ERROR);
-}
-//
-////----- Media_D_EraseOneBlock() ----------------------------------------
-//int Media_D_EraseOneBlock(void)
-//{
-// WORD LogBlock = Media.LogBlock;
-// WORD PhyBlock = Media.PhyBlock;
-// SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
-// ADDRESS_T bb = (ADDRESS_T) &Media;
-//
-// if (Media.PhyBlock==NO_ASSIGN)
-// return(SMSUCCESS);
-//
-// if (Log2Phy[Media.Zone]==NULL)
-// {
-// if (Make_D_LogTable())
-// {
-// ErrCode = ERR_IllegalFmt;
-// return(ERROR);
-// }
-// }
-// Media.LogBlock = LogBlock;
-// Media.PhyBlock = PhyBlock;
-//
-// Log2Phy[Media.Zone][Media.LogBlock]=NO_ASSIGN;
-//
-// if (Erase_D_PhyOneBlock(fdoExt))
-// {
-// if (ErrCode==ERR_HwError)
-// return(ERROR);
-// if (MarkFail_D_PhyOneBlock())
-// return(ERROR);
-//
-// ErrCode = ERR_WriteFault;
-// return(ERROR);
-// }
-//
-// Clr_D_Bit(Assign[Media.Zone],Media.PhyBlock);
-// Media.PhyBlock=NO_ASSIGN;
-// return(SMSUCCESS);
-//}
-//
-////SmartMedia Erase Subroutine
-////----- Media_D_EraseAllBlock() ----------------------------------------
-//int Media_D_EraseAllBlock(void)
-//{
-// WORD cis=0;
-//
-// SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
-// ADDRESS_T bb = (ADDRESS_T) &Media;
-//
-// MediaChange = ERROR;
-// Media.Sector = 0;
-//
-// for(Media.Zone=0; Media.Zone<Ssfdc.MaxZones; Media.Zone++)
-// for(Media.PhyBlock=0; Media.PhyBlock<Ssfdc.MaxBlocks; Media.PhyBlock++) {
-// if (Ssfdc_D_ReadRedtData(Redundant))
-// {
-// Ssfdc_D_Reset(fdoExt);
-// return(ERROR);
-// }
-//
-// Ssfdc_D_Reset(fdoExt);
-// if (!Check_D_FailBlock(Redundant))
-// {
-// if (cis)
-// {
-// if (Ssfdc_D_EraseBlock(fdoExt))
-// {
-// ErrCode = ERR_HwError;
-// return(ERROR);
-// }
-//
-// if (Ssfdc_D_CheckStatus())
-// {
-// if (MarkFail_D_PhyOneBlock())
-// return(ERROR);
-// }
-//
-// continue;
-// }
-//
-// if (Media.PhyBlock!=CisArea.PhyBlock)
-// {
-// ErrCode = ERR_IllegalFmt;
-// return(ERROR);
-// }
-//
-// cis++;
-// }
-//
-// }
-// return(SMSUCCESS);
-//}
-*/
-//SmartMedia Physical Sector Data Copy Subroutine
-//----- Copy_D_BlockAll() ----------------------------------------------
+/* SmartMedia Physical Sector Data Copy Subroutine */
+/* ----- Copy_D_BlockAll() ---------------------------------------------- */
int Copy_D_BlockAll(struct us_data *us, DWORD mode)
{
BYTE sect;
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
- sect=Media.Sector;
+ sect = Media.Sector;
if (Assign_D_WriteBlock())
- return(ERROR);
- if (mode==REQ_FAIL)
- SectCopyMode=REQ_FAIL;
-
- for(Media.Sector=0; Media.Sector<Ssfdc.MaxSectors; Media.Sector++)
- {
- if (Copy_D_PhyOneSect(us))
- {
- if (ErrCode==ERR_HwError)
- return(ERROR);
+ return ERROR;
+ if (mode == REQ_FAIL)
+ SectCopyMode = REQ_FAIL;
+
+ for (Media.Sector = 0; Media.Sector < Ssfdc.MaxSectors;
+ Media.Sector++) {
+ if (Copy_D_PhyOneSect(us)) {
+ if (ErrCode == ERR_HwError)
+ return ERROR;
if (Release_D_WriteBlock(us))
- return(ERROR);
+ return ERROR;
ErrCode = ERR_WriteFault;
- Media.PhyBlock=ReadBlock;
- Media.Sector=sect;
+ Media.PhyBlock = ReadBlock;
+ Media.Sector = sect;
- return(ERROR);
+ return ERROR;
}
}
if (Release_D_ReadBlock(us))
- return(ERROR);
-
- Media.PhyBlock=WriteBlock;
- Media.Sector=sect;
- return(SMSUCCESS);
-}
-/*
-//----- Copy_D_BlockHead() ---------------------------------------------
-int Copy_D_BlockHead(PFDO_DEVICE_EXTENSION fdoExt)
-{
- BYTE sect;
- SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- ADDRESS_T bb = (ADDRESS_T) &Media;
-
- sect=Media.Sector;
- if (Assign_D_WriteBlock())
- return(ERROR);
-
- for(Media.Sector=0; Media.Sector<sect; Media.Sector++)
- {
- if (Copy_D_PhyOneSect(fdoExt))
- {
- if (ErrCode==ERR_HwError)
- return(ERROR);
- if (Release_D_WriteBlock(fdoExt))
- return(ERROR);
-
- ErrCode = ERR_WriteFault;
- Media.PhyBlock=ReadBlock;
- Media.Sector=sect;
-
- return(ERROR);
- }
- }
-
- Media.PhyBlock=WriteBlock;
- Media.Sector=sect;
- return(SMSUCCESS);
-}
+ return ERROR;
-//----- Copy_D_BlockTail() ---------------------------------------------
-int Copy_D_BlockTail(PFDO_DEVICE_EXTENSION fdoExt)
-{
- BYTE sect;
- SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- ADDRESS_T bb = (ADDRESS_T) &Media;
-
- for(sect=Media.Sector; Media.Sector<Ssfdc.MaxSectors; Media.Sector++)
- {
- if (Copy_D_PhyOneSect(fdoExt))
- {
- if (ErrCode==ERR_HwError)
- return(ERROR);
-
- Media.PhyBlock=WriteBlock;
- Media.Sector=sect;
-
- return(ERROR);
- }
- }
-
- if (Release_D_ReadBlock(fdoExt))
- return(ERROR);
-
- Media.PhyBlock=WriteBlock;
- Media.Sector=sect;
- return(SMSUCCESS);
+ Media.PhyBlock = WriteBlock;
+ Media.Sector = sect;
+ return SMSUCCESS;
}
-//----- Reassign_D_BlockHead() -----------------------------------------
-int Reassign_D_BlockHead(PFDO_DEVICE_EXTENSION fdoExt)
-{
- DWORD mode;
- WORD block;
- BYTE sect;
- SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- ADDRESS_T bb = (ADDRESS_T) &Media;
-
- mode=SectCopyMode;
- block=ReadBlock;
- sect=Media.Sector;
-
- if (Assign_D_WriteBlock())
- return(ERROR);
-
- SectCopyMode=REQ_FAIL;
-
- for(Media.Sector=0; Media.Sector<sect; Media.Sector++)
- {
- if (Copy_D_PhyOneSect(fdoExt))
- {
- if (ErrCode==ERR_HwError)
- return(ERROR);
- if (Release_D_WriteBlock(fdoExt))
- return(ERROR);
-
- ErrCode = ERR_WriteFault;
- SectCopyMode=mode;
- WriteBlock=ReadBlock;
- ReadBlock=block;
- Media.Sector=sect;
- Media.PhyBlock=WriteBlock;
-
- return(ERROR);
- }
- }
-
- if (Release_D_ReadBlock(fdoExt))
- return(ERROR);
-
- SectCopyMode=mode;
- ReadBlock=block;
- Media.Sector=sect;
- Media.PhyBlock=WriteBlock;
- return(SMSUCCESS);
-}
-*/
-//SmartMedia Physical Block Assign/Release Subroutine
-//----- Assign_D_WriteBlock() ------------------------------------------
+/* SmartMedia Physical Block Assign/Release Subroutine */
+/* ----- Assign_D_WriteBlock() ------------------------------------------ */
int Assign_D_WriteBlock(void)
{
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
- ReadBlock=Media.PhyBlock;
-
- for(WriteBlock=AssignStart[Media.Zone]; WriteBlock<Ssfdc.MaxBlocks; WriteBlock++)
- {
- if (!Chk_D_Bit(Assign[Media.Zone],WriteBlock))
- {
- Set_D_Bit(Assign[Media.Zone],WriteBlock);
- AssignStart[Media.Zone]=WriteBlock+1;
- Media.PhyBlock=WriteBlock;
- SectCopyMode=REQ_ERASE;
- //ErrXDCode = NO_ERROR;
- return(SMSUCCESS);
+ ReadBlock = Media.PhyBlock;
+
+ for (WriteBlock = AssignStart[Media.Zone];
+ WriteBlock < Ssfdc.MaxBlocks; WriteBlock++) {
+ if (!Chk_D_Bit(Assign[Media.Zone], WriteBlock)) {
+ Set_D_Bit(Assign[Media.Zone], WriteBlock);
+ AssignStart[Media.Zone] = WriteBlock + 1;
+ Media.PhyBlock = WriteBlock;
+ SectCopyMode = REQ_ERASE;
+ return SMSUCCESS;
}
}
- for(WriteBlock=0; WriteBlock<AssignStart[Media.Zone]; WriteBlock++)
- {
- if (!Chk_D_Bit(Assign[Media.Zone],WriteBlock))
- {
- Set_D_Bit(Assign[Media.Zone],WriteBlock);
- AssignStart[Media.Zone]=WriteBlock+1;
- Media.PhyBlock=WriteBlock;
- SectCopyMode=REQ_ERASE;
- //ErrXDCode = NO_ERROR;
- return(SMSUCCESS);
+ for (WriteBlock = 0;
+ WriteBlock < AssignStart[Media.Zone]; WriteBlock++) {
+ if (!Chk_D_Bit(Assign[Media.Zone], WriteBlock)) {
+ Set_D_Bit(Assign[Media.Zone], WriteBlock);
+ AssignStart[Media.Zone] = WriteBlock + 1;
+ Media.PhyBlock = WriteBlock;
+ SectCopyMode = REQ_ERASE;
+ return SMSUCCESS;
}
}
- WriteBlock=NO_ASSIGN;
+ WriteBlock = NO_ASSIGN;
ErrCode = ERR_WriteFault;
- // For xD test
- //Ssfdc.Attribute |= WP;
- //ErrXDCode = ERR_WrtProtect;
- return(ERROR);
+
+ return ERROR;
}
-//----- Release_D_ReadBlock() ------------------------------------------
+/* ----- Release_D_ReadBlock() ------------------------------------------ */
int Release_D_ReadBlock(struct us_data *us)
{
DWORD mode;
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
- mode=SectCopyMode;
- SectCopyMode=COMPLETED;
+ mode = SectCopyMode;
+ SectCopyMode = COMPLETED;
- if (mode==COMPLETED)
- return(SMSUCCESS);
+ if (mode == COMPLETED)
+ return SMSUCCESS;
- Log2Phy[Media.Zone][Media.LogBlock]=WriteBlock;
- Media.PhyBlock=ReadBlock;
+ Log2Phy[Media.Zone][Media.LogBlock] = WriteBlock;
+ Media.PhyBlock = ReadBlock;
- if (Media.PhyBlock==NO_ASSIGN)
- {
- Media.PhyBlock=WriteBlock;
- return(SMSUCCESS);
+ if (Media.PhyBlock == NO_ASSIGN) {
+ Media.PhyBlock = WriteBlock;
+ return SMSUCCESS;
}
- if (mode==REQ_ERASE)
- {
- if (Erase_D_PhyOneBlock(us))
- {
- if (ErrCode==ERR_HwError) return(ERROR);
- if (MarkFail_D_PhyOneBlock(us)) return(ERROR);
- }
- else
- Clr_D_Bit(Assign[Media.Zone],Media.PhyBlock);
- }
- else if (MarkFail_D_PhyOneBlock(us))
- return(ERROR);
+ if (mode == REQ_ERASE) {
+ if (Erase_D_PhyOneBlock(us)) {
+ if (ErrCode == ERR_HwError)
+ return ERROR;
+ if (MarkFail_D_PhyOneBlock(us))
+ return ERROR;
+ } else
+ Clr_D_Bit(Assign[Media.Zone], Media.PhyBlock);
+ } else if (MarkFail_D_PhyOneBlock(us))
+ return ERROR;
- Media.PhyBlock=WriteBlock;
- return(SMSUCCESS);
+ Media.PhyBlock = WriteBlock;
+ return SMSUCCESS;
}
-//----- Release_D_WriteBlock() -----------------------------------------
+/* ----- Release_D_WriteBlock() ----------------------------------------- */
int Release_D_WriteBlock(struct us_data *us)
{
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
- SectCopyMode=COMPLETED;
- Media.PhyBlock=WriteBlock;
+ SectCopyMode = COMPLETED;
+ Media.PhyBlock = WriteBlock;
if (MarkFail_D_PhyOneBlock(us))
- return(ERROR);
+ return ERROR;
- Media.PhyBlock=ReadBlock;
- return(SMSUCCESS);
+ Media.PhyBlock = ReadBlock;
+ return SMSUCCESS;
}
-//SmartMedia Physical Sector Data Copy Subroutine
-//----- Copy_D_PhyOneSect() --------------------------------------------
+/* SmartMedia Physical Sector Data Copy Subroutine */
+/* ----- Copy_D_PhyOneSect() -------------------------------------------- */
int Copy_D_PhyOneSect(struct us_data *us)
{
int i;
DWORD err, retry;
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
/* pr_info("Copy_D_PhyOneSect --- Secotr = %x\n", Media.Sector); */
- if (ReadBlock!=NO_ASSIGN)
- {
- Media.PhyBlock=ReadBlock;
- for(retry=0; retry<2; retry++)
- {
- if (retry!=0)
- {
+ if (ReadBlock != NO_ASSIGN) {
+ Media.PhyBlock = ReadBlock;
+ for (retry = 0; retry < 2; retry++) {
+ if (retry != 0) {
Ssfdc_D_Reset(us);
- if (Ssfdc_D_ReadCisSect(us,WorkBuf,WorkRedund))
- { ErrCode = ERR_HwError; MediaChange=ERROR; return(ERROR); }
+ if (Ssfdc_D_ReadCisSect(us, WorkBuf,
+ WorkRedund)) {
+ ErrCode = ERR_HwError;
+ MediaChange = ERROR;
+ return ERROR;
+ }
- if (Check_D_CISdata(WorkBuf,WorkRedund))
- { ErrCode = ERR_HwError; MediaChange=ERROR; return(ERROR); }
+ if (Check_D_CISdata(WorkBuf, WorkRedund)) {
+ ErrCode = ERR_HwError;
+ MediaChange = ERROR;
+ return ERROR;
+ }
+ }
+
+ if (Ssfdc_D_ReadSect(us, WorkBuf, WorkRedund)) {
+ ErrCode = ERR_HwError;
+ MediaChange = ERROR;
+ return ERROR;
+ }
+ if (Check_D_DataStatus(WorkRedund)) {
+ err = ERROR;
+ break;
+ }
+ if (!Check_D_ReadError(WorkRedund)) {
+ err = SMSUCCESS;
+ break;
+ }
+ if (!Check_D_Correct(WorkBuf, WorkRedund)) {
+ err = SMSUCCESS;
+ break;
}
- if (Ssfdc_D_ReadSect(us,WorkBuf,WorkRedund))
- { ErrCode = ERR_HwError; MediaChange=ERROR; return(ERROR); }
- if (Check_D_DataStatus(WorkRedund))
- { err=ERROR; break; }
- if (!Check_D_ReadError(WorkRedund))
- { err=SMSUCCESS; break; }
- if (!Check_D_Correct(WorkBuf,WorkRedund))
- { err=SMSUCCESS; break; }
-
- err=ERROR;
- SectCopyMode=REQ_FAIL;
+ err = ERROR;
+ SectCopyMode = REQ_FAIL;
}
- }
- else
- {
- err=SMSUCCESS;
- for(i=0; i<SECTSIZE; i++)
- WorkBuf[i]=DUMMY_DATA;
+ } else {
+ err = SMSUCCESS;
+ for (i = 0; i < SECTSIZE; i++)
+ WorkBuf[i] = DUMMY_DATA;
Clr_D_RedundantData(WorkRedund);
}
Set_D_LogBlockAddr(WorkRedund);
- if (err==ERROR)
- {
+ if (err == ERROR) {
Set_D_RightECC(WorkRedund);
Set_D_DataStaus(WorkRedund);
}
- Media.PhyBlock=WriteBlock;
+ Media.PhyBlock = WriteBlock;
- if (Ssfdc_D_WriteSectForCopy(us, WorkBuf, WorkRedund))
- { ErrCode = ERR_HwError; MediaChange=ERROR; return(ERROR); }
- if (Ssfdc_D_CheckStatus())
- { ErrCode = ERR_WriteFault; return(ERROR); }
+ if (Ssfdc_D_WriteSectForCopy(us, WorkBuf, WorkRedund)) {
+ ErrCode = ERR_HwError;
+ MediaChange = ERROR;
+ return ERROR;
+ }
+ if (Ssfdc_D_CheckStatus()) {
+ ErrCode = ERR_WriteFault;
+ return ERROR;
+ }
- Media.PhyBlock=ReadBlock;
- return(SMSUCCESS);
+ Media.PhyBlock = ReadBlock;
+ return SMSUCCESS;
}
-//SmartMedia Physical Sector Read/Write/Erase Subroutine
-//----- Read_D_PhyOneSect() --------------------------------------------
+/* SmartMedia Physical Sector Read/Write/Erase Subroutine */
+/* ----- Read_D_PhyOneSect() -------------------------------------------- */
int Read_D_PhyOneSect(struct us_data *us, WORD count, BYTE *buf)
{
int i;
DWORD retry;
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
-
- if (Media.PhyBlock==NO_ASSIGN)
- {
- for(i=0; i<SECTSIZE; i++)
- *buf++=DUMMY_DATA;
- return(SMSUCCESS);
+
+ if (Media.PhyBlock == NO_ASSIGN) {
+ for (i = 0; i < SECTSIZE; i++)
+ *buf++ = DUMMY_DATA;
+ return SMSUCCESS;
}
- for(retry=0; retry<2; retry++)
- {
- if (retry!=0)
- {
+ for (retry = 0; retry < 2; retry++) {
+ if (retry != 0) {
Ssfdc_D_Reset(us);
- if (Ssfdc_D_ReadCisSect(us,WorkBuf,WorkRedund))
- { ErrCode = ERR_HwError; MediaChange=ERROR; return(ERROR); }
- if (Check_D_CISdata(WorkBuf,WorkRedund))
- { ErrCode = ERR_HwError; MediaChange=ERROR; return(ERROR); }
+ if (Ssfdc_D_ReadCisSect(us, WorkBuf, WorkRedund)) {
+ ErrCode = ERR_HwError;
+ MediaChange = ERROR;
+ return ERROR;
+ }
+ if (Check_D_CISdata(WorkBuf, WorkRedund)) {
+ ErrCode = ERR_HwError;
+ MediaChange = ERROR;
+ return ERROR;
+ }
}
- //if (Ssfdc_D_ReadSect(fdoExt,buf,Redundant))
- if (Ssfdc_D_ReadBlock(us,count,buf,Redundant))
- { ErrCode = ERR_HwError; MediaChange=ERROR; return(ERROR); }
- if (Check_D_DataStatus(Redundant))
- { ErrCode = ERR_DataStatus; return(ERROR); }
+ if (Ssfdc_D_ReadBlock(us, count, buf, Redundant)) {
+ ErrCode = ERR_HwError;
+ MediaChange = ERROR;
+ return ERROR;
+ }
+ if (Check_D_DataStatus(Redundant)) {
+ ErrCode = ERR_DataStatus;
+ return ERROR;
+ }
if (!Check_D_ReadError(Redundant))
- return(SMSUCCESS);
+ return SMSUCCESS;
- if (!Check_D_Correct(buf,Redundant))
- { ErrCode = ERR_CorReadErr; return(ERROR); }
+ if (!Check_D_Correct(buf, Redundant)) {
+ ErrCode = ERR_CorReadErr;
+ return ERROR;
+ }
}
ErrCode = ERR_EccReadErr;
- return(ERROR);
+ return ERROR;
}
-/*
-//----- Write_D_PhyOneSect() -------------------------------------------
-int Write_D_PhyOneSect(PFDO_DEVICE_EXTENSION fdoExt, WORD count, BYTE *buf)
-{
- SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- ADDRESS_T bb = (ADDRESS_T) &Media;
-
- //if (Ssfdc_D_WriteSect(fdoExt,buf,Redundant))
- if (Ssfdc_D_WriteBlock(fdoExt,count,buf,Redundant))
- { ErrCode = ERR_HwError; MediaChange=ERROR; return(ERROR); }
- if (Ssfdc_D_CheckStatus())
- { ErrCode = ERR_WriteFault; return(ERROR); }
- return(SMSUCCESS);
-}
-*/
-//----- Erase_D_PhyOneBlock() ------------------------------------------
+/* ----- Erase_D_PhyOneBlock() ------------------------------------------ */
int Erase_D_PhyOneBlock(struct us_data *us)
{
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
-
- if (Ssfdc_D_EraseBlock(us))
- { ErrCode = ERR_HwError; MediaChange=ERROR; return(ERROR); }
- if (Ssfdc_D_CheckStatus())
- { ErrCode = ERR_WriteFault; return(ERROR); }
+ if (Ssfdc_D_EraseBlock(us)) {
+ ErrCode = ERR_HwError;
+ MediaChange = ERROR;
+ return ERROR;
+ }
+ if (Ssfdc_D_CheckStatus()) {
+ ErrCode = ERR_WriteFault;
+ return ERROR;
+ }
- return(SMSUCCESS);
+ return SMSUCCESS;
}
-//SmartMedia Physical Format Check Local Subroutine
-//----- Set_D_PhyFmtValue() --------------------------------------------
+/* SmartMedia Physical Format Check Local Subroutine */
+/* ----- Set_D_PhyFmtValue() -------------------------------------------- */
int Set_D_PhyFmtValue(struct us_data *us)
{
-// PPDO_DEVICE_EXTENSION pdoExt;
-// BYTE idcode[4];
-// DWORD UserDefData_1, UserDefData_2, Data, mask;
-//
-// //if (!fdoExt->ChildDeviceObject) return(ERROR);
-// //pdoExt = fdoExt->ChildDeviceObject->DeviceExtension;
-//
-// Ssfdc_D_ReadID(idcode, READ_ID_1);
-//
- //if (Set_D_SsfdcModel(idcode[1]))
- if (Set_D_SsfdcModel(us->SM_DeviceID))
- return(ERROR);
-
-// //Use Multi-function pin to differentiate SM and xD.
-// UserDefData_1 = ReadPCIReg(fdoExt->BusID, fdoExt->DevID, fdoExt->FuncID, PCI_REG_USER_DEF) & 0x80;
-// if (UserDefData_1)
-// {
-// if ( READ_PORT_BYTE(SM_REG_INT_STATUS) & 0x80 ) fdoExt->DiskType = DISKTYPE_XD;
-// if ( READ_PORT_BYTE(SM_REG_INT_STATUS) & 0x40 ) fdoExt->DiskType = DISKTYPE_SM;
-//
-// if ( IsXDCompliance && (fdoExt->DiskType == DISKTYPE_XD) )
-// {
-// Ssfdc_D_ReadID(idcode, READ_ID_3);
-// if (idcode[2] != 0xB5)
-// return(ERROR);
-// }
-// }
-//
-// //Use GPIO to differentiate SM and xD.
-// UserDefData_2 = ReadPCIReg(fdoExt->BusID, fdoExt->DevID, fdoExt->FuncID, PCI_REG_USER_DEF) >> 8;
-// if ( UserDefData_2 )
-// {
-// Data = ReadPCIReg(fdoExt->BusID, fdoExt->DevID, 0, 0xAC);
-//
-// mask = 1 << (UserDefData_2-1);
-// // 1 : xD , 0 : SM
-// if ( Data & mask)
-// fdoExt->DiskType = DISKTYPE_XD;
-// else
-// fdoExt->DiskType = DISKTYPE_SM;
-//
-// if ( IsXDCompliance && (fdoExt->DiskType == DISKTYPE_XD) )
-// {
-// Ssfdc_D_ReadID(idcode, READ_ID_3);
-// if (idcode[2] != 0xB5)
-// return(ERROR);
-// }
-// }
-//
-// if ( !(UserDefData_1 | UserDefData_2) )
-// {
-// // Use UserDefine Register to differentiate SM and xD.
-// Ssfdc_D_ReadID(idcode, READ_ID_3);
-//
-// if (idcode[2] == 0xB5)
-// fdoExt->DiskType = DISKTYPE_XD;
-// else
-// {
-// if (!IsXDCompliance)
-// fdoExt->DiskType = DISKTYPE_SM;
-// else
-// return(ERROR);
-// }
-//
-// if (fdoExt->UserDef_DiskType == 0x04) fdoExt->DiskType = DISKTYPE_XD;
-// if (fdoExt->UserDef_DiskType == 0x08) fdoExt->DiskType = DISKTYPE_SM;
-// }
-//
-// if (!fdoExt->UserDef_DisableWP)
-// {
-// if (fdoExt->DiskType == DISKTYPE_SM)
-// {
-// if (Check_D_SsfdcWP())
-// Ssfdc.Attribute|=WP;
-// }
-// }
-
- return(SMSUCCESS);
+ if (Set_D_SsfdcModel(us->SM_DeviceID))
+ return ERROR;
+
+ return SMSUCCESS;
}
-//----- Search_D_CIS() -------------------------------------------------
+/* ----- Search_D_CIS() ------------------------------------------------- */
int Search_D_CIS(struct us_data *us)
{
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
-
- Media.Zone=0; Media.Sector=0;
+ Media.Zone = 0;
+ Media.Sector = 0;
- for (Media.PhyBlock=0; Media.PhyBlock<(Ssfdc.MaxBlocks-Ssfdc.MaxLogBlocks-1); Media.PhyBlock++)
- {
- if (Ssfdc_D_ReadRedtData(us, Redundant))
- {
+ for (Media.PhyBlock = 0;
+ Media.PhyBlock < (Ssfdc.MaxBlocks - Ssfdc.MaxLogBlocks - 1);
+ Media.PhyBlock++) {
+ if (Ssfdc_D_ReadRedtData(us, Redundant)) {
Ssfdc_D_Reset(us);
- return(ERROR);
+ return ERROR;
}
if (!Check_D_FailBlock(Redundant))
break;
}
- if (Media.PhyBlock==(Ssfdc.MaxBlocks-Ssfdc.MaxLogBlocks-1))
- {
+ if (Media.PhyBlock == (Ssfdc.MaxBlocks - Ssfdc.MaxLogBlocks - 1)) {
Ssfdc_D_Reset(us);
- return(ERROR);
+ return ERROR;
}
- while (Media.Sector<CIS_SEARCH_SECT)
- {
- if (Media.Sector)
- {
- if (Ssfdc_D_ReadRedtData(us, Redundant))
- {
+ while (Media.Sector < CIS_SEARCH_SECT) {
+ if (Media.Sector) {
+ if (Ssfdc_D_ReadRedtData(us, Redundant)) {
Ssfdc_D_Reset(us);
- return(ERROR);
+ return ERROR;
}
}
- if (!Check_D_DataStatus(Redundant))
- {
- if (Ssfdc_D_ReadSect(us,WorkBuf,Redundant))
- {
+ if (!Check_D_DataStatus(Redundant)) {
+ if (Ssfdc_D_ReadSect(us, WorkBuf, Redundant)) {
Ssfdc_D_Reset(us);
- return(ERROR);
+ return ERROR;
}
- if (Check_D_CISdata(WorkBuf,Redundant))
- {
+ if (Check_D_CISdata(WorkBuf, Redundant)) {
Ssfdc_D_Reset(us);
- return(ERROR);
+ return ERROR;
}
- CisArea.PhyBlock=Media.PhyBlock;
- CisArea.Sector=Media.Sector;
+ CisArea.PhyBlock = Media.PhyBlock;
+ CisArea.Sector = Media.Sector;
Ssfdc_D_Reset(us);
- return(SMSUCCESS);
+ return SMSUCCESS;
}
Media.Sector++;
}
Ssfdc_D_Reset(us);
- return(ERROR);
+ return ERROR;
}
-//----- Make_D_LogTable() ----------------------------------------------
+/* ----- Make_D_LogTable() ---------------------------------------------- */
int Make_D_LogTable(struct us_data *us)
{
- WORD phyblock,logblock;
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
+ WORD phyblock, logblock;
- if (Log2Phy[Media.Zone]==NULL)
- {
- Log2Phy[Media.Zone] = kmalloc(MAX_LOGBLOCK*sizeof(WORD), GFP_KERNEL);
+ if (Log2Phy[Media.Zone] == NULL) {
+ Log2Phy[Media.Zone] = kmalloc(MAX_LOGBLOCK * sizeof(WORD),
+ GFP_KERNEL);
/* pr_info("ExAllocatePool Zone = %x, Addr = %x\n",
Media.Zone, Log2Phy[Media.Zone]); */
- if (Log2Phy[Media.Zone]==NULL)
- return(ERROR);
+ if (Log2Phy[Media.Zone] == NULL)
+ return ERROR;
}
- Media.Sector=0;
-
- //for(Media.Zone=0; Media.Zone<MAX_ZONENUM; Media.Zone++)
- //for(Media.Zone=0; Media.Zone<Ssfdc.MaxZones; Media.Zone++)
- {
- /* pr_info("Make_D_LogTable --- MediaZone = 0x%x\n",
- Media.Zone); */
- for(Media.LogBlock=0; Media.LogBlock<Ssfdc.MaxLogBlocks; Media.LogBlock++)
- Log2Phy[Media.Zone][Media.LogBlock]=NO_ASSIGN;
-
- for(Media.PhyBlock=0; Media.PhyBlock<(MAX_BLOCKNUM/8); Media.PhyBlock++)
- Assign[Media.Zone][Media.PhyBlock]=0x00;
-
- for(Media.PhyBlock=0; Media.PhyBlock<Ssfdc.MaxBlocks; Media.PhyBlock++)
- {
- if ((!Media.Zone) && (Media.PhyBlock<=CisArea.PhyBlock))
- {
- Set_D_Bit(Assign[Media.Zone],Media.PhyBlock);
- continue;
- }
+ Media.Sector = 0;
+
+ /* pr_info("Make_D_LogTable --- MediaZone = 0x%x\n",
+ Media.Zone); */
+ for (Media.LogBlock = 0; Media.LogBlock < Ssfdc.MaxLogBlocks;
+ Media.LogBlock++)
+ Log2Phy[Media.Zone][Media.LogBlock] = NO_ASSIGN;
+
+ for (Media.PhyBlock = 0; Media.PhyBlock < (MAX_BLOCKNUM / 8);
+ Media.PhyBlock++)
+ Assign[Media.Zone][Media.PhyBlock] = 0x00;
+
+ for (Media.PhyBlock = 0; Media.PhyBlock < Ssfdc.MaxBlocks;
+ Media.PhyBlock++) {
+ if ((!Media.Zone) && (Media.PhyBlock <= CisArea.PhyBlock)) {
+ Set_D_Bit(Assign[Media.Zone], Media.PhyBlock);
+ continue;
+ }
+
+ if (Ssfdc_D_ReadRedtData(us, Redundant)) {
+ Ssfdc_D_Reset(us);
+ return ERROR;
+ }
+
+ if (!Check_D_DataBlank(Redundant))
+ continue;
+
+ Set_D_Bit(Assign[Media.Zone], Media.PhyBlock);
- if (Ssfdc_D_ReadRedtData(us, Redundant))
- { Ssfdc_D_Reset(us); return(ERROR); }
+ if (Check_D_FailBlock(Redundant))
+ continue;
- if (!Check_D_DataBlank(Redundant))
- continue;
+ if (Load_D_LogBlockAddr(Redundant))
+ continue;
- Set_D_Bit(Assign[Media.Zone],Media.PhyBlock);
+ if (Media.LogBlock >= Ssfdc.MaxLogBlocks)
+ continue;
- if (Check_D_FailBlock(Redundant))
- continue;
+ if (Log2Phy[Media.Zone][Media.LogBlock] == NO_ASSIGN) {
+ Log2Phy[Media.Zone][Media.LogBlock] = Media.PhyBlock;
+ continue;
+ }
- //if (Check_D_DataStatus(Redundant))
- // continue;
+ phyblock = Media.PhyBlock;
+ logblock = Media.LogBlock;
+ Media.Sector = (BYTE)(Ssfdc.MaxSectors - 1);
- if (Load_D_LogBlockAddr(Redundant))
- continue;
+ if (Ssfdc_D_ReadRedtData(us, Redundant)) {
+ Ssfdc_D_Reset(us);
+ return ERROR;
+ }
- if (Media.LogBlock>=Ssfdc.MaxLogBlocks)
- continue;
+ if (!Load_D_LogBlockAddr(Redundant) &&
+ (Media.LogBlock == logblock)) {
+ Media.PhyBlock = Log2Phy[Media.Zone][logblock];
- if (Log2Phy[Media.Zone][Media.LogBlock]==NO_ASSIGN)
- {
- Log2Phy[Media.Zone][Media.LogBlock]=Media.PhyBlock;
- continue;
+ if (Ssfdc_D_ReadRedtData(us, Redundant)) {
+ Ssfdc_D_Reset(us);
+ return ERROR;
}
- phyblock = Media.PhyBlock;
- logblock = Media.LogBlock;
- Media.Sector = (BYTE)(Ssfdc.MaxSectors-1);
-
- if (Ssfdc_D_ReadRedtData(us, Redundant))
- { Ssfdc_D_Reset(us); return(ERROR); }
-
- if (!Load_D_LogBlockAddr(Redundant))
- {
- if (Media.LogBlock==logblock)
- {
- Media.PhyBlock=Log2Phy[Media.Zone][logblock];
-
- if (Ssfdc_D_ReadRedtData(us, Redundant))
- { Ssfdc_D_Reset(us); return(ERROR); }
-
- Media.PhyBlock=phyblock;
-
- if (!Load_D_LogBlockAddr(Redundant))
- {
- if (Media.LogBlock!=logblock)
- {
- Media.PhyBlock=Log2Phy[Media.Zone][logblock];
- Log2Phy[Media.Zone][logblock]=phyblock;
- }
- }
- else
- {
- Media.PhyBlock=Log2Phy[Media.Zone][logblock];
- Log2Phy[Media.Zone][logblock]=phyblock;
- }
+ Media.PhyBlock = phyblock;
+
+ if (!Load_D_LogBlockAddr(Redundant)) {
+ if (Media.LogBlock != logblock) {
+ Media.PhyBlock =
+ Log2Phy[Media.Zone][logblock];
+ Log2Phy[Media.Zone][logblock] =
+ phyblock;
}
+ } else {
+ Media.PhyBlock = Log2Phy[Media.Zone][logblock];
+ Log2Phy[Media.Zone][logblock] = phyblock;
}
+ }
+
+ Media.Sector = 0;
+ Media.PhyBlock = phyblock;
+
+ AssignStart[Media.Zone] = 0;
- Media.Sector=0;
-
-// here Not yet
-//#ifdef L2P_ERR_ERASE
-// if (!(Ssfdc.Attribute &MWP))
-// {
-// Ssfdc_D_Reset(fdoExt);
-// if (Ssfdc_D_EraseBlock(fdoExt))
-// return(ERROR);
-//
-// if (Ssfdc_D_CheckStatus())
-// {
-// if (MarkFail_D_PhyOneBlock())
-// return(ERROR);
-// }
-// else
-// Clr_D_Bit(Assign[Media.Zone],Media.PhyBlock);
-// }
-//#else
-// Ssfdc.Attribute|=MWP;
-//#endif
- Media.PhyBlock=phyblock;
-
- } // End for (Media.PhyBlock<Ssfdc.MaxBlocks)
-
- AssignStart[Media.Zone]=0;
-
- } // End for (Media.Zone<MAX_ZONENUM)
+ } /* End for (Media.Zone<MAX_ZONENUM) */
Ssfdc_D_Reset(us);
- return(SMSUCCESS);
+ return SMSUCCESS;
}
-//----- MarkFail_D_PhyOneBlock() ---------------------------------------
+/* ----- MarkFail_D_PhyOneBlock() --------------------------------------- */
int MarkFail_D_PhyOneBlock(struct us_data *us)
{
BYTE sect;
- //SSFDCTYPE_T aa = (SSFDCTYPE_T ) &Ssfdc;
- //ADDRESS_T bb = (ADDRESS_T) &Media;
- sect=Media.Sector;
+ sect = Media.Sector;
Set_D_FailBlock(WorkRedund);
- //Ssfdc_D_WriteRedtMode();
- for(Media.Sector=0; Media.Sector<Ssfdc.MaxSectors; Media.Sector++)
- {
- if (Ssfdc_D_WriteRedtData(us, WorkRedund))
- {
+ for (Media.Sector = 0; Media.Sector < Ssfdc.MaxSectors;
+ Media.Sector++) {
+ if (Ssfdc_D_WriteRedtData(us, WorkRedund)) {
Ssfdc_D_Reset(us);
Media.Sector = sect;
ErrCode = ERR_HwError;
MediaChange = ERROR;
- return(ERROR);
- } // NO Status Check
+ return ERROR;
+ } /* NO Status Check */
}
Ssfdc_D_Reset(us);
- Media.Sector=sect;
- return(SMSUCCESS);
+ Media.Sector = sect;
+ return SMSUCCESS;
}
-/*
-//
-////----- SM_Init() ----------------------------------------------------
-//void SM_Init(void)
-//{
-// _Hw_D_ClrIntCardChg();
-// _Hw_D_SetIntMask();
-// // For DMA Interrupt
-// _Hw_D_ClrDMAIntCardChg();
-// _Hw_D_SetDMAIntMask();
-//}
-//
-////----- Media_D_EraseAllRedtData() -----------------------------------
-//int Media_D_EraseAllRedtData(DWORD Index, BOOLEAN CheckBlock)
-//{
-// BYTE i;
-//
-// if (Check_D_MediaPower())
-// return(ErrCode);
-//
-// if (Check_D_MediaWP())
-// return(ErrCode);
-//
-// for (i=0; i<REDTSIZE; i++)
-// WorkRedund[i] = 0xFF;
-//
-// Media.Zone = (BYTE)Index;
-// for (Media.PhyBlock=0; Media.PhyBlock<Ssfdc.MaxBlocks; Media.PhyBlock++)
-// {
-// if ((!Media.Zone) && (Media.PhyBlock<=CisArea.PhyBlock))
-// continue;
-//
-// if (Ssfdc_D_EraseBlock(fdoExt))
-// {
-// ErrCode = ERR_HwError;
-// return(ERROR);
-// }
-//
-// for(Media.Sector=0; Media.Sector<Ssfdc.MaxSectors; Media.Sector++)
-// {
-// Ssfdc_D_WriteRedtMode();
-//
-// if (Ssfdc_D_WriteRedtData(WorkRedund))
-// {
-// Ssfdc_D_Reset(fdoExt);
-// ErrCode = ERR_HwError;
-// MediaChange = ERROR;
-// return(ERROR);
-// } // NO Status Check
-// }
-//
-// Ssfdc_D_Reset(fdoExt);
-// }
-//
-// Ssfdc_D_Reset(fdoExt);
-//
-// return(SMSUCCESS);
-//}
-//
-////----- Media_D_GetMediaInfo() ---------------------------------------
-//DWORD Media_D_GetMediaInfo(PFDO_DEVICE_EXTENSION fdoExt, PIOCTL_MEDIA_INFO_IN pParamIn, PIOCTL_MEDIA_INFO_OUT pParamOut)
-//{
-// pParamOut->ErrCode = STATUS_CMD_FAIL;
-//
-// Init_D_SmartMedia();
-//
-// if (Check_D_MediaPower())
-// return (ErrCode==ERR_NoSmartMedia) ? STATUS_CMD_NO_MEDIA : STATUS_CMD_FAIL;
-//
-// if (Set_D_PhyFmtValue(fdoExt))
-// return STATUS_CMD_FAIL;
-//
-// //usleep(56*1024);
-// if (Search_D_CIS(fdoExt))
-// return STATUS_CMD_FAIL;
-//
-// if (Check_D_MediaWP())
-// return STATUS_CMD_MEDIA_WP;
-//
-// pParamOut->PageSize = Ssfdc.MaxSectors;
-// pParamOut->BlockSize = Ssfdc.MaxBlocks;
-// pParamOut->ZoneSize = Ssfdc.MaxZones;
-//
-// return STATUS_CMD_SUCCESS;
-//}*/
diff --git a/drivers/staging/keucr/smilsub.c b/drivers/staging/keucr/smilsub.c
index d4dd5ed516ce..346c5702f411 100644
--- a/drivers/staging/keucr/smilsub.c
+++ b/drivers/staging/keucr/smilsub.c
@@ -33,9 +33,9 @@ void _Set_D_ECCdata(BYTE, BYTE *);
void _Calc_D_ECCdata(BYTE *);
-struct SSFDCTYPE Ssfdc;
-struct ADDRESS Media;
-struct CIS_AREA CisArea;
+struct keucr_media_info Ssfdc;
+struct keucr_media_address Media;
+struct keucr_media_area CisArea;
static BYTE EccBuf[6];
extern PBYTE SMHostAddr;
@@ -103,8 +103,10 @@ int Load_D_LogBlockAddr(BYTE *redundant)
{
WORD addr1, addr2;
- addr1 = (WORD)*(redundant + REDT_ADDR1H)*0x0100 + (WORD)*(redundant + REDT_ADDR1L);
- addr2 = (WORD)*(redundant + REDT_ADDR2H)*0x0100 + (WORD)*(redundant + REDT_ADDR2L);
+ addr1 = (WORD)*(redundant + REDT_ADDR1H)*0x0100 +
+ (WORD)*(redundant + REDT_ADDR1L);
+ addr2 = (WORD)*(redundant + REDT_ADDR2H)*0x0100 +
+ (WORD)*(redundant + REDT_ADDR2L);
if (addr1 == addr2)
if ((addr1 & 0xF000) == 0x1000) {
@@ -151,7 +153,8 @@ void Set_D_LogBlockAddr(BYTE *redundant)
if ((hweight16(addr) % 2))
addr++;
- *(redundant + REDT_ADDR1H) = *(redundant + REDT_ADDR2H) = (BYTE)(addr / 0x0100);
+ *(redundant + REDT_ADDR1H) = *(redundant + REDT_ADDR2H) =
+ (BYTE)(addr / 0x0100);
*(redundant + REDT_ADDR1L) = *(redundant + REDT_ADDR2L) = (BYTE)addr;
}
@@ -191,7 +194,9 @@ int Ssfdc_D_ReadCisSect(struct us_data *us, BYTE *buf, BYTE *redundant)
Media.Sector = CisArea.Sector;
if (Ssfdc_D_ReadSect(us, buf, redundant)) {
- Media.Zone = zone; Media.PhyBlock = block; Media.Sector = sector;
+ Media.Zone = zone;
+ Media.PhyBlock = block;
+ Media.Sector = sector;
return ERROR;
}
@@ -209,7 +214,8 @@ int Ssfdc_D_ReadSect(struct us_data *us, BYTE *buf, BYTE *redundant)
result = ENE_LoadBinCode(us, SM_RW_PATTERN);
if (result != USB_STOR_XFER_GOOD) {
- printk("Load SM RW Code Fail !!\n");
+ dev_err(&us->pusb_dev->dev,
+ "Failed to load SmartMedia read/write code\n");
return USB_STOR_TRANSPORT_ERROR;
}
@@ -252,7 +258,8 @@ int Ssfdc_D_ReadSect(struct us_data *us, BYTE *buf, BYTE *redundant)
}
/* ----- Ssfdc_D_ReadBlock() --------------------------------------------- */
-int Ssfdc_D_ReadBlock(struct us_data *us, WORD count, BYTE *buf, BYTE *redundant)
+int Ssfdc_D_ReadBlock(struct us_data *us, WORD count, BYTE *buf,
+ BYTE *redundant)
{
struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf;
int result;
@@ -260,7 +267,8 @@ int Ssfdc_D_ReadBlock(struct us_data *us, WORD count, BYTE *buf, BYTE *redundant
result = ENE_LoadBinCode(us, SM_RW_PATTERN);
if (result != USB_STOR_XFER_GOOD) {
- printk("Load SM RW Code Fail !!\n");
+ dev_err(&us->pusb_dev->dev,
+ "Failed to load SmartMedia read/write code\n");
return USB_STOR_TRANSPORT_ERROR;
}
@@ -304,7 +312,8 @@ int Ssfdc_D_ReadBlock(struct us_data *us, WORD count, BYTE *buf, BYTE *redundant
/* ----- Ssfdc_D_CopyBlock() -------------------------------------------- */
-int Ssfdc_D_CopyBlock(struct us_data *us, WORD count, BYTE *buf, BYTE *redundant)
+int Ssfdc_D_CopyBlock(struct us_data *us, WORD count, BYTE *buf,
+ BYTE *redundant)
{
struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf;
int result;
@@ -312,7 +321,8 @@ int Ssfdc_D_CopyBlock(struct us_data *us, WORD count, BYTE *buf, BYTE *redundant
result = ENE_LoadBinCode(us, SM_RW_PATTERN);
if (result != USB_STOR_XFER_GOOD) {
- printk("Load SM RW Code Fail !!\n");
+ dev_err(&us->pusb_dev->dev,
+ "Failed to load SmartMedia read/write code\n");
return USB_STOR_TRANSPORT_ERROR;
}
@@ -358,7 +368,8 @@ int Ssfdc_D_WriteSectForCopy(struct us_data *us, BYTE *buf, BYTE *redundant)
result = ENE_LoadBinCode(us, SM_RW_PATTERN);
if (result != USB_STOR_XFER_GOOD) {
- printk("Load SM RW Code Fail !!\n");
+ dev_err(&us->pusb_dev->dev,
+ "Failed to load SmartMedia read/write code\n");
return USB_STOR_TRANSPORT_ERROR;
}
@@ -396,7 +407,8 @@ int Ssfdc_D_EraseBlock(struct us_data *us)
result = ENE_LoadBinCode(us, SM_RW_PATTERN);
if (result != USB_STOR_XFER_GOOD) {
- printk("Load SM RW Code Fail !!\n");
+ dev_err(&us->pusb_dev->dev,
+ "Failed to load SmartMedia read/write code\n");
return USB_STOR_TRANSPORT_ERROR;
}
@@ -431,7 +443,8 @@ int Ssfdc_D_ReadRedtData(struct us_data *us, BYTE *redundant)
result = ENE_LoadBinCode(us, SM_RW_PATTERN);
if (result != USB_STOR_XFER_GOOD) {
- printk("Load SM RW Code Fail !!\n");
+ dev_err(&us->pusb_dev->dev,
+ "Failed to load SmartMedia read/write code\n");
return USB_STOR_TRANSPORT_ERROR;
}
@@ -470,7 +483,8 @@ int Ssfdc_D_WriteRedtData(struct us_data *us, BYTE *redundant)
result = ENE_LoadBinCode(us, SM_RW_PATTERN);
if (result != USB_STOR_XFER_GOOD) {
- printk("Load SM RW Code Fail !!\n");
+ dev_err(&us->pusb_dev->dev,
+ "Failed to load SmartMedia read/write code\n");
return USB_STOR_TRANSPORT_ERROR;
}
@@ -611,7 +625,7 @@ int Set_D_SsfdcModel(BYTE dcode)
return ERROR;
}
- return SMSUCCESS;
+ return SMSUCCESS;
}
/* ----- _Check_D_DevCode() --------------------------------------------- */
@@ -686,8 +700,8 @@ int Check_D_CISdata(BYTE *buf, BYTE *redundant)
/* ----- Set_D_RightECC() ---------------------------------------------- */
void Set_D_RightECC(BYTE *redundant)
{
- /* Driver ECC Check */
- return;
+ /* Driver ECC Check */
+ return;
}
diff --git a/drivers/staging/keucr/smscsi.c b/drivers/staging/keucr/smscsi.c
index 58b555571185..572d6489b66b 100644
--- a/drivers/staging/keucr/smscsi.c
+++ b/drivers/staging/keucr/smscsi.c
@@ -56,7 +56,7 @@ int SM_SCSIIrp(struct us_data *us, struct scsi_cmnd *srb)
return result;
}
-/* ----- SM_SCSI_Test_Unit_Ready() -------------------------------------------------- */
+/* ----- SM_SCSI_Test_Unit_Ready() ------------------------------------- */
int SM_SCSI_Test_Unit_Ready(struct us_data *us, struct scsi_cmnd *srb)
{
if (us->SM_Status.Insert && us->SM_Status.Ready)
@@ -69,21 +69,27 @@ int SM_SCSI_Test_Unit_Ready(struct us_data *us, struct scsi_cmnd *srb)
return USB_STOR_TRANSPORT_GOOD;
}
-/* ----- SM_SCSI_Inquiry() -------------------------------------------------- */
+/* ----- SM_SCSI_Inquiry() --------------------------------------------- */
int SM_SCSI_Inquiry(struct us_data *us, struct scsi_cmnd *srb)
{
- BYTE data_ptr[36] = {0x00, 0x80, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x55, 0x53, 0x42, 0x32, 0x2E, 0x30, 0x20, 0x20, 0x43, 0x61, 0x72, 0x64, 0x52, 0x65, 0x61, 0x64, 0x65, 0x72, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x30, 0x31, 0x30, 0x30};
+ BYTE data_ptr[36] = {0x00, 0x80, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00,
+ 0x55, 0x53, 0x42, 0x32, 0x2E, 0x30, 0x20,
+ 0x20, 0x43, 0x61, 0x72, 0x64, 0x52, 0x65,
+ 0x61, 0x64, 0x65, 0x72, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x30, 0x31, 0x30, 0x30};
usb_stor_set_xfer_buf(us, data_ptr, 36, srb, TO_XFER_BUF);
return USB_STOR_TRANSPORT_GOOD;
}
-/* ----- SM_SCSI_Mode_Sense() -------------------------------------------------- */
+/* ----- SM_SCSI_Mode_Sense() ------------------------------------------ */
int SM_SCSI_Mode_Sense(struct us_data *us, struct scsi_cmnd *srb)
{
- BYTE mediaNoWP[12] = {0x0b, 0x00, 0x00, 0x08, 0x00, 0x00, 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00};
- BYTE mediaWP[12] = {0x0b, 0x00, 0x80, 0x08, 0x00, 0x00, 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00};
+ BYTE mediaNoWP[12] = {0x0b, 0x00, 0x00, 0x08, 0x00, 0x00,
+ 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00};
+ BYTE mediaWP[12] = {0x0b, 0x00, 0x80, 0x08, 0x00, 0x00,
+ 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00};
if (us->SM_Status.WtP)
usb_stor_set_xfer_buf(us, mediaWP, 12, srb, TO_XFER_BUF);
@@ -94,7 +100,7 @@ int SM_SCSI_Mode_Sense(struct us_data *us, struct scsi_cmnd *srb)
return USB_STOR_TRANSPORT_GOOD;
}
-/* ----- SM_SCSI_Read_Capacity() -------------------------------------------------- */
+/* ----- SM_SCSI_Read_Capacity() --------------------------------------- */
int SM_SCSI_Read_Capacity(struct us_data *us, struct scsi_cmnd *srb)
{
unsigned int offset = 0;
@@ -103,14 +109,14 @@ int SM_SCSI_Read_Capacity(struct us_data *us, struct scsi_cmnd *srb)
WORD bl_len;
BYTE buf[8];
- printk("SM_SCSI_Read_Capacity\n");
+ dev_dbg(&us->pusb_dev->dev, "SM_SCSI_Read_Capacity\n");
bl_len = 0x200;
bl_num = Ssfdc.MaxLogBlocks * Ssfdc.MaxSectors * Ssfdc.MaxZones - 1;
us->bl_num = bl_num;
- printk("bl_len = %x\n", bl_len);
- printk("bl_num = %x\n", bl_num);
+ dev_dbg(&us->pusb_dev->dev, "bl_len = %x\n", bl_len);
+ dev_dbg(&us->pusb_dev->dev, "bl_num = %x\n", bl_num);
buf[0] = (bl_num >> 24) & 0xff;
buf[1] = (bl_num >> 16) & 0xff;
@@ -131,8 +137,10 @@ int SM_SCSI_Read(struct us_data *us, struct scsi_cmnd *srb)
{
int result = 0;
PBYTE Cdb = srb->cmnd;
- DWORD bn = ((Cdb[2] << 24) & 0xff000000) | ((Cdb[3] << 16) & 0x00ff0000) |
- ((Cdb[4] << 8) & 0x0000ff00) | ((Cdb[5] << 0) & 0x000000ff);
+ DWORD bn = ((Cdb[2] << 24) & 0xff000000) |
+ ((Cdb[3] << 16) & 0x00ff0000) |
+ ((Cdb[4] << 8) & 0x0000ff00) |
+ ((Cdb[5] << 0) & 0x000000ff);
WORD blen = ((Cdb[7] << 8) & 0xff00) | ((Cdb[8] << 0) & 0x00ff);
DWORD blenByte = blen * 0x200;
void *buf;
@@ -161,8 +169,10 @@ int SM_SCSI_Write(struct us_data *us, struct scsi_cmnd *srb)
{
int result = 0;
PBYTE Cdb = srb->cmnd;
- DWORD bn = ((Cdb[2] << 24) & 0xff000000) | ((Cdb[3] << 16) & 0x00ff0000) |
- ((Cdb[4] << 8) & 0x0000ff00) | ((Cdb[5] << 0) & 0x000000ff);
+ DWORD bn = ((Cdb[2] << 24) & 0xff000000) |
+ ((Cdb[3] << 16) & 0x00ff0000) |
+ ((Cdb[4] << 8) & 0x0000ff00) |
+ ((Cdb[5] << 0) & 0x000000ff);
WORD blen = ((Cdb[7] << 8) & 0xff00) | ((Cdb[8] << 0) & 0x00ff);
DWORD blenByte = blen * 0x200;
void *buf;
diff --git a/drivers/staging/keucr/transport.c b/drivers/staging/keucr/transport.c
index 1a8837df0766..aeb2186d62ca 100644
--- a/drivers/staging/keucr/transport.c
+++ b/drivers/staging/keucr/transport.c
@@ -79,6 +79,47 @@ static int usb_stor_msg_common(struct us_data *us, int timeout)
}
/*
+ * usb_stor_print_cmd():
+ */
+static void usb_stor_print_cmd(struct us_data *us, struct scsi_cmnd *srb)
+{
+ PBYTE Cdb = srb->cmnd;
+ DWORD cmd = Cdb[0];
+
+ switch (cmd) {
+ case TEST_UNIT_READY:
+ break;
+ case INQUIRY:
+ dev_dbg(&us->pusb_dev->dev,
+ "scsi cmd %X --- SCSIOP_INQUIRY\n", cmd);
+ break;
+ case MODE_SENSE:
+ dev_dbg(&us->pusb_dev->dev,
+ "scsi cmd %X --- SCSIOP_MODE_SENSE\n", cmd);
+ break;
+ case START_STOP:
+ dev_dbg(&us->pusb_dev->dev,
+ "scsi cmd %X --- SCSIOP_START_STOP\n", cmd);
+ break;
+ case READ_CAPACITY:
+ dev_dbg(&us->pusb_dev->dev,
+ "scsi cmd %X --- SCSIOP_READ_CAPACITY\n", cmd);
+ break;
+ case READ_10:
+ break;
+ case WRITE_10:
+ break;
+ case ALLOW_MEDIUM_REMOVAL:
+ dev_dbg(&us->pusb_dev->dev,
+ "scsi cmd %X --- SCSIOP_ALLOW_MEDIUM_REMOVAL\n", cmd);
+ break;
+ default:
+ dev_dbg(&us->pusb_dev->dev, "scsi cmd %X --- Other cmd\n", cmd);
+ break;
+ }
+}
+
+/*
* usb_stor_control_msg()
*/
int usb_stor_control_msg(struct us_data *us, unsigned int pipe,
@@ -303,7 +344,7 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
int result;
/* pr_info("transport --- usb_stor_invoke_transport\n"); */
- usb_stor_print_cmd(srb);
+ usb_stor_print_cmd(us, srb);
/* send the command to the transport layer */
scsi_set_resid(srb, 0);
result = us->transport(srb, us); /* usb_stor_Bulk_transport; */
@@ -429,7 +470,7 @@ void ENE_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
int result = 0;
/* pr_info("transport --- ENE_stor_invoke_transport\n"); */
- usb_stor_print_cmd(srb);
+ usb_stor_print_cmd(us, srb);
/* send the command to the transport layer */
scsi_set_resid(srb, 0);
if (!(us->SM_Status.Ready))
@@ -708,8 +749,8 @@ int usb_stor_Bulk_transport(struct scsi_cmnd *srb, struct us_data *us)
} else {
residue = min(residue, transfer_length);
- scsi_set_resid(srb, max(scsi_get_resid(srb),
- (int) residue));
+ scsi_set_resid(srb, max_t(int, scsi_get_resid(srb),
+ residue));
}
}
diff --git a/drivers/staging/keucr/transport.h b/drivers/staging/keucr/transport.h
index 2a11a98375d7..df34474ae568 100644
--- a/drivers/staging/keucr/transport.h
+++ b/drivers/staging/keucr/transport.h
@@ -29,7 +29,6 @@
extern int usb_stor_Bulk_transport(struct scsi_cmnd *, struct us_data*);
extern int usb_stor_Bulk_max_lun(struct us_data *);
extern int usb_stor_Bulk_reset(struct us_data *);
-extern void usb_stor_print_cmd(struct scsi_cmnd *);
extern void usb_stor_invoke_transport(struct scsi_cmnd *, struct us_data*);
extern void usb_stor_stop_transport(struct us_data *);
extern int usb_stor_control_msg(struct us_data *us, unsigned int pipe,
@@ -61,7 +60,7 @@ extern int ENE_InitMedia(struct us_data *);
extern int ENE_SMInit(struct us_data *);
extern int ENE_SendScsiCmd(struct us_data*, BYTE, void*, int);
extern int ENE_LoadBinCode(struct us_data*, BYTE);
-extern int ENE_Read_BYTE(struct us_data*, WORD index, void *buf);
+extern int ene_read_byte(struct us_data*, WORD index, void *buf);
extern int ENE_Read_Data(struct us_data*, void *buf, unsigned int length);
extern int ENE_Write_Data(struct us_data*, void *buf, unsigned int length);
extern void BuildSenseBuffer(struct scsi_cmnd *, int);
diff --git a/drivers/staging/keucr/usb.c b/drivers/staging/keucr/usb.c
index f656f8aeeda3..ddd2e7390b46 100644
--- a/drivers/staging/keucr/usb.c
+++ b/drivers/staging/keucr/usb.c
@@ -24,13 +24,13 @@ MODULE_LICENSE("GPL");
static unsigned int delay_use = 1;
-static struct usb_device_id eucr_usb_ids [] = {
+static struct usb_device_id eucr_usb_ids[] = {
{ USB_DEVICE(0x058f, 0x6366) },
{ USB_DEVICE(0x0cf2, 0x6230) },
{ USB_DEVICE(0x0cf2, 0x6250) },
{ } /* Terminating entry */
};
-MODULE_DEVICE_TABLE (usb, eucr_usb_ids);
+MODULE_DEVICE_TABLE(usb, eucr_usb_ids);
#ifdef CONFIG_PM
@@ -65,7 +65,7 @@ static int eucr_resume(struct usb_interface *iface)
us->Power_IsResum = true;
- us->SM_Status = *(PSM_STATUS)&tmp;
+ us->SM_Status = *(struct keucr_sm_status *)&tmp;
return 0;
}
@@ -85,9 +85,9 @@ static int eucr_reset_resume(struct usb_interface *iface)
* the device
*/
- us->Power_IsResum = true;
+ us->Power_IsResum = true;
- us->SM_Status = *(PSM_STATUS)&tmp;
+ us->SM_Status = *(struct keucr_sm_status *)&tmp;
return 0;
}
@@ -124,16 +124,18 @@ static int eucr_post_reset(struct usb_interface *iface)
return 0;
}
-void fill_inquiry_response(struct us_data *us, unsigned char *data, unsigned int data_len)
+void fill_inquiry_response(struct us_data *us, unsigned char *data,
+ unsigned int data_len)
{
pr_info("usb --- fill_inquiry_response\n");
if (data_len < 36) /* You lose. */
return;
if (data[0]&0x20) {
- memset(data+8,0,28);
+ memset(data+8, 0, 28);
} else {
- u16 bcdDevice = le16_to_cpu(us->pusb_dev->descriptor.bcdDevice);
+ u16 bcdDevice =
+ le16_to_cpu(us->pusb_dev->descriptor.bcdDevice);
memcpy(data+8, us->unusual_dev->vendorName,
strlen(us->unusual_dev->vendorName) > 8 ? 8 :
strlen(us->unusual_dev->vendorName));
@@ -148,7 +150,7 @@ void fill_inquiry_response(struct us_data *us, unsigned char *data, unsigned int
usb_stor_set_xfer_buf(us, data, data_len, us->srb, TO_XFER_BUF);
}
-static int usb_stor_control_thread(void * __us)
+static int usb_stor_control_thread(void *__us)
{
struct us_data *us = (struct us_data *)__us;
struct Scsi_Host *host = us_to_host(us);
@@ -194,7 +196,8 @@ static int usb_stor_control_thread(void * __us)
us->srb->result = DID_BAD_TARGET << 16;
} else if ((us->srb->cmnd[0] == INQUIRY)
&& (us->fflags & US_FL_FIX_INQUIRY)) {
- unsigned char data_ptr[36] = {0x00, 0x80, 0x02, 0x02, 0x1F, 0x00, 0x00, 0x00};
+ unsigned char data_ptr[36] = {0x00, 0x80, 0x02, 0x02,
+ 0x1F, 0x00, 0x00, 0x00};
fill_inquiry_response(us, data_ptr, 36);
us->srb->result = SAM_STAT_GOOD;
@@ -253,13 +256,15 @@ static int associate_dev(struct us_data *us, struct usb_interface *intf)
usb_set_intfdata(intf, us);
/* Allocate the device-related DMA-mapped buffers */
- us->cr = usb_alloc_coherent(us->pusb_dev, sizeof(*us->cr), GFP_KERNEL, &us->cr_dma);
+ us->cr = usb_alloc_coherent(us->pusb_dev, sizeof(*us->cr), GFP_KERNEL,
+ &us->cr_dma);
if (!us->cr) {
pr_info("usb_ctrlrequest allocation failed\n");
return -ENOMEM;
}
- us->iobuf = usb_alloc_coherent(us->pusb_dev, US_IOBUF_SIZE, GFP_KERNEL, &us->iobuf_dma);
+ us->iobuf = usb_alloc_coherent(us->pusb_dev, US_IOBUF_SIZE, GFP_KERNEL,
+ &us->iobuf_dma);
if (!us->iobuf) {
pr_info("I/O buffer allocation failed\n");
return -ENOMEM;
@@ -275,7 +280,8 @@ static int associate_dev(struct us_data *us, struct usb_interface *intf)
static int get_device_info(struct us_data *us, const struct usb_device_id *id)
{
struct usb_device *dev = us->pusb_dev;
- struct usb_interface_descriptor *idesc = &us->pusb_intf->cur_altsetting->desc;
+ struct usb_interface_descriptor *idesc =
+ &us->pusb_intf->cur_altsetting->desc;
pr_info("usb --- get_device_info\n");
@@ -374,10 +380,13 @@ static int get_pipes(struct us_data *us)
/* Calculate and store the pipe values */
us->send_ctrl_pipe = usb_sndctrlpipe(us->pusb_dev, 0);
us->recv_ctrl_pipe = usb_rcvctrlpipe(us->pusb_dev, 0);
- us->send_bulk_pipe = usb_sndbulkpipe(us->pusb_dev, ep_out->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
- us->recv_bulk_pipe = usb_rcvbulkpipe(us->pusb_dev, ep_in->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
+ us->send_bulk_pipe = usb_sndbulkpipe(us->pusb_dev,
+ ep_out->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
+ us->recv_bulk_pipe = usb_rcvbulkpipe(us->pusb_dev,
+ ep_in->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
if (ep_int) {
- us->recv_intr_pipe = usb_rcvintpipe(us->pusb_dev, ep_int->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
+ us->recv_intr_pipe = usb_rcvintpipe(us->pusb_dev,
+ ep_int->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
us->ep_bInterval = ep_int->bInterval;
}
return 0;
@@ -433,10 +442,9 @@ static void dissociate_dev(struct us_data *us)
kfree(us->sensebuf);
/* Free the device-related DMA-mapped buffers */
- if (us->cr)
- usb_free_coherent(us->pusb_dev, sizeof(*us->cr), us->cr, us->cr_dma);
- if (us->iobuf)
- usb_free_coherent(us->pusb_dev, US_IOBUF_SIZE, us->iobuf, us->iobuf_dma);
+ usb_free_coherent(us->pusb_dev, sizeof(*us->cr), us->cr, us->cr_dma);
+ usb_free_coherent(us->pusb_dev, US_IOBUF_SIZE, us->iobuf,
+ us->iobuf_dma);
/* Remove our private data from the interface */
usb_set_intfdata(us->pusb_intf, NULL);
@@ -485,7 +493,7 @@ static void release_everything(struct us_data *us)
scsi_host_put(us_to_host(us));
}
-static int usb_stor_scan_thread(void * __us)
+static int usb_stor_scan_thread(void *__us)
{
struct us_data *us = (struct us_data *)__us;
@@ -515,7 +523,8 @@ static int usb_stor_scan_thread(void * __us)
complete_and_exit(&us->scanning_done, 0);
}
-static int eucr_probe(struct usb_interface *intf, const struct usb_device_id *id)
+static int eucr_probe(struct usb_interface *intf,
+ const struct usb_device_id *id)
{
struct Scsi_Host *host;
struct us_data *us;
@@ -525,7 +534,7 @@ static int eucr_probe(struct usb_interface *intf, const struct usb_device_id *id
pr_info("usb --- eucr_probe\n");
- host = scsi_host_alloc(&usb_stor_host_template, sizeof(*us));
+ host = scsi_host_alloc(&usb_stor_host_template, sizeof(*us));
if (!host) {
pr_info("Unable to allocate the scsi host\n");
return -ENOMEM;
@@ -585,7 +594,7 @@ static int eucr_probe(struct usb_interface *intf, const struct usb_device_id *id
wake_up_process(th);
/* probe card type */
- result = ENE_Read_BYTE(us, REG_CARD_STATUS, &MiscReg03);
+ result = ene_read_byte(us, REG_CARD_STATUS, &MiscReg03);
if (result != USB_STOR_XFER_GOOD) {
result = USB_STOR_TRANSPORT_ERROR;
quiesce_and_remove_host(us);
@@ -595,9 +604,9 @@ static int eucr_probe(struct usb_interface *intf, const struct usb_device_id *id
if (!(MiscReg03 & 0x02)) {
result = -ENODEV;
quiesce_and_remove_host(us);
- pr_info("keucr: The driver only supports SM/MS card.\
- To use SD card, \
- please build driver/usb/storage/ums-eneub6250.ko\n");
+ pr_info("keucr: The driver only supports SM/MS card. "
+ "To use SD card, "
+ "please build driver/usb/storage/ums-eneub6250.ko\n");
goto BadDevice;
}
@@ -623,9 +632,9 @@ static void eucr_disconnect(struct usb_interface *intf)
static struct usb_driver usb_storage_driver = {
.name = "eucr",
.probe = eucr_probe,
- .suspend = eucr_suspend,
+ .suspend = eucr_suspend,
.resume = eucr_resume,
- .reset_resume = eucr_reset_resume,
+ .reset_resume = eucr_reset_resume,
.disconnect = eucr_disconnect,
.pre_reset = eucr_pre_reset,
.post_reset = eucr_post_reset,
diff --git a/drivers/staging/keucr/usb.h b/drivers/staging/keucr/usb.h
index a5f7a16c11c9..d665af177b96 100644
--- a/drivers/staging/keucr/usb.h
+++ b/drivers/staging/keucr/usb.h
@@ -1,4 +1,4 @@
-// Driver for USB Mass Storage compliant devices
+/* Driver for USB Mass Storage compliant devices */
#ifndef _USB_H_
#define _USB_H_
@@ -19,26 +19,26 @@ struct scsi_cmnd;
*/
struct us_unusual_dev {
- const char* vendorName;
- const char* productName;
+ const char *vendorName;
+ const char *productName;
__u8 useProtocol;
__u8 useTransport;
int (*initFunction)(struct us_data *);
};
-//EnE HW Register
+/* EnE HW Register */
#define REG_CARD_STATUS 0xFF83
#define REG_HW_TRAP1 0xFF89
-// SRB Status. Refers /usr/include/wine/wine/wnaspi32.h & SCSI sense key
-#define SS_SUCCESS 0x00 // No Sense
+/* SRB Status. Refers /usr/include/wine/wine/wnaspi32.h & SCSI sense key */
+#define SS_SUCCESS 0x00 /* No Sense */
#define SS_NOT_READY 0x02
#define SS_MEDIUM_ERR 0x03
#define SS_HW_ERR 0x04
#define SS_ILLEGAL_REQUEST 0x05
#define SS_UNIT_ATTENTION 0x06
-//ENE Load FW Pattern
+/* ENE Load FW Pattern */
#define SD_INIT1_PATTERN 1
#define SD_INIT2_PATTERN 2
#define SD_RW_PATTERN 3
@@ -51,39 +51,40 @@ struct us_unusual_dev {
#define FDIR_WRITE 0
#define FDIR_READ 1
-typedef struct _SD_STATUS {
- BYTE Insert:1;
- BYTE Ready:1;
- BYTE MediaChange:1;
- BYTE IsMMC:1;
- BYTE HiCapacity:1;
- BYTE HiSpeed:1;
- BYTE WtP:1;
- BYTE Reserved:1;
-} SD_STATUS, *PSD_STATUS;
-
-typedef struct _MS_STATUS {
- BYTE Insert:1;
- BYTE Ready:1;
- BYTE MediaChange:1;
- BYTE IsMSPro:1;
- BYTE IsMSPHG:1;
- BYTE Reserved1:1;
- BYTE WtP:1;
- BYTE Reserved2:1;
-} MS_STATUS, *PMS_STATUS;
-
-typedef struct _SM_STATUS {
- BYTE Insert:1;
- BYTE Ready:1;
- BYTE MediaChange:1;
- BYTE Reserved:3;
- BYTE WtP:1;
- BYTE IsMS:1;
-} SM_STATUS, *PSM_STATUS;
-
-// SD Block Length
-#define SD_BLOCK_LEN 9 // 2^9 = 512 Bytes, The HW maximum read/write data length
+struct keucr_sd_status {
+ BYTE Insert:1;
+ BYTE Ready:1;
+ BYTE MediaChange:1;
+ BYTE IsMMC:1;
+ BYTE HiCapacity:1;
+ BYTE HiSpeed:1;
+ BYTE WtP:1;
+ BYTE Reserved:1;
+};
+
+struct keucr_ms_status {
+ BYTE Insert:1;
+ BYTE Ready:1;
+ BYTE MediaChange:1;
+ BYTE IsMSPro:1;
+ BYTE IsMSPHG:1;
+ BYTE Reserved1:1;
+ BYTE WtP:1;
+ BYTE Reserved2:1;
+};
+
+struct keucr_sm_status {
+ BYTE Insert:1;
+ BYTE Ready:1;
+ BYTE MediaChange:1;
+ BYTE Reserved:3;
+ BYTE WtP:1;
+ BYTE IsMS:1;
+};
+
+/* SD Block Length */
+#define SD_BLOCK_LEN 9 /* 2^9 = 512 Bytes,
+ The HW maximum read/write data length */
/* Dynamic bitflag definitions (us->dflags): used in set_bit() etc. */
#define US_FLIDX_URB_ACTIVE 0 /* current_urb is in use */
@@ -107,9 +108,9 @@ typedef struct _SM_STATUS {
#define US_IOBUF_SIZE 64 /* Size of the DMA-mapped I/O buffer */
#define US_SENSE_SIZE 18 /* Size of the autosense data buffer */
-typedef int (*trans_cmnd)(struct scsi_cmnd *, struct us_data*);
-typedef int (*trans_reset)(struct us_data*);
-typedef void (*proto_cmnd)(struct scsi_cmnd*, struct us_data*);
+typedef int (*trans_cmnd)(struct scsi_cmnd *, struct us_data *);
+typedef int (*trans_reset)(struct us_data *);
+typedef void (*proto_cmnd)(struct scsi_cmnd *, struct us_data *);
typedef void (*extra_data_destructor)(void *); /* extra data destructor */
typedef void (*pm_hook)(struct us_data *, int); /* power management hook */
@@ -176,19 +177,19 @@ struct us_data {
#ifdef CONFIG_PM
pm_hook suspend_resume_hook;
#endif
- // for 6250 code
- SD_STATUS SD_Status;
- MS_STATUS MS_Status;
- SM_STATUS SM_Status;
+ /* for 6250 code */
+ struct keucr_sd_status SD_Status;
+ struct keucr_ms_status MS_Status;
+ struct keucr_sm_status SM_Status;
- //----- SD Control Data ----------------
- //SD_REGISTER SD_Regs;
+ /* ----- SD Control Data ---------------- */
+ /* SD_REGISTER SD_Regs; */
WORD SD_Block_Mult;
BYTE SD_READ_BL_LEN;
WORD SD_C_SIZE;
BYTE SD_C_SIZE_MULT;
- // SD/MMC New spec.
+ /* SD/MMC New spec. */
BYTE SD_SPEC_VER;
BYTE SD_CSD_VER;
BYTE SD20_HIGH_CAPACITY;
@@ -196,15 +197,15 @@ struct us_data {
BYTE MMC_SPEC_VER;
BYTE MMC_BusWidth;
BYTE MMC_HIGH_CAPACITY;
-
- //----- MS Control Data ----------------
+
+ /* ----- MS Control Data ---------------- */
BOOLEAN MS_SWWP;
DWORD MSP_TotalBlock;
/* MS_LibControl MS_Lib; */
BOOLEAN MS_IsRWPage;
WORD MS_Model;
- //----- SM Control Data ----------------
+ /* ----- SM Control Data ---------------- */
BYTE SM_DeviceID;
BYTE SM_CardID;
@@ -212,16 +213,18 @@ struct us_data {
BYTE BIN_FLAG;
DWORD bl_num;
int SrbStatus;
-
- //------Power Managerment ---------------
- BOOLEAN Power_IsResum;
+
+ /* ------Power Managerment --------------- */
+ BOOLEAN Power_IsResum;
};
/* Convert between us_data and the corresponding Scsi_Host */
-static inline struct Scsi_Host *us_to_host(struct us_data *us) {
+static inline struct Scsi_Host *us_to_host(struct us_data *us)
+{
return container_of((void *) us, struct Scsi_Host, hostdata);
}
-static inline struct us_data *host_to_us(struct Scsi_Host *host) {
+static inline struct us_data *host_to_us(struct Scsi_Host *host)
+{
return (struct us_data *) host->hostdata;
}
diff --git a/drivers/staging/line6/pcm.c b/drivers/staging/line6/pcm.c
index 02f77d74809f..4795f1284906 100644
--- a/drivers/staging/line6/pcm.c
+++ b/drivers/staging/line6/pcm.c
@@ -107,11 +107,15 @@ static bool test_flags(unsigned long flags0, unsigned long flags1,
int line6_pcm_acquire(struct snd_line6_pcm *line6pcm, int channels)
{
- unsigned long flags_old =
- __sync_fetch_and_or(&line6pcm->flags, channels);
- unsigned long flags_new = flags_old | channels;
- unsigned long flags_final = flags_old;
- int err = 0;
+ unsigned long flags_old, flags_new, flags_final;
+ int err;
+
+ do {
+ flags_old = ACCESS_ONCE(line6pcm->flags);
+ flags_new = flags_old | channels;
+ } while (cmpxchg(&line6pcm->flags, flags_old, flags_new) != flags_old);
+
+ flags_final = flags_old;
line6pcm->prev_fbuf = NULL;
@@ -197,9 +201,12 @@ pcm_acquire_error:
int line6_pcm_release(struct snd_line6_pcm *line6pcm, int channels)
{
- unsigned long flags_old =
- __sync_fetch_and_and(&line6pcm->flags, ~channels);
- unsigned long flags_new = flags_old & ~channels;
+ unsigned long flags_old, flags_new;
+
+ do {
+ flags_old = ACCESS_ONCE(line6pcm->flags);
+ flags_new = flags_old & ~channels;
+ } while (cmpxchg(&line6pcm->flags, flags_old, flags_new) != flags_old);
if (test_flags(flags_new, flags_old, LINE6_BITS_CAPTURE_STREAM))
line6_unlink_audio_in_urbs(line6pcm);
diff --git a/drivers/staging/lustre/Kconfig b/drivers/staging/lustre/Kconfig
new file mode 100644
index 000000000000..a224d88bf43d
--- /dev/null
+++ b/drivers/staging/lustre/Kconfig
@@ -0,0 +1,3 @@
+source "drivers/staging/lustre/lustre/Kconfig"
+
+source "drivers/staging/lustre/lnet/Kconfig"
diff --git a/drivers/staging/lustre/Makefile b/drivers/staging/lustre/Makefile
new file mode 100644
index 000000000000..26162893fd20
--- /dev/null
+++ b/drivers/staging/lustre/Makefile
@@ -0,0 +1,4 @@
+subdir-ccflags-y := -I$(src)/include/
+
+obj-$(CONFIG_LUSTRE_FS) += lustre/
+obj-$(CONFIG_LNET) += lnet/
diff --git a/drivers/staging/lustre/TODO b/drivers/staging/lustre/TODO
new file mode 100644
index 000000000000..22742d6d62a8
--- /dev/null
+++ b/drivers/staging/lustre/TODO
@@ -0,0 +1,13 @@
+* Possible remaining coding style fix.
+* Remove deadcode.
+* Seperate client/server functionality. Functions only used by server can be
+ removed from client.
+* Clean up libcfs layer. Ideally we can remove include/linux/libcfs entirely.
+* Clean up CLIO layer. Lustre client readahead/writeback control needs to better
+ suit kernel providings.
+* Add documents in Documentation.
+* Other minor misc cleanups...
+
+Please send any patches to Greg Kroah-Hartman <greg@kroah.com>, Andreas Dilger
+<andreas.dilger@intel.com> and Peng Tao <tao.peng@emc.com>. CCing
+hpdd-discuss <hpdd-discuss@lists.01.org> would be great too.
diff --git a/drivers/staging/lustre/include/linux/libcfs/bitmap.h b/drivers/staging/lustre/include/linux/libcfs/bitmap.h
new file mode 100644
index 000000000000..3f1c37b4bb7a
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/bitmap.h
@@ -0,0 +1,111 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#ifndef _LIBCFS_BITMAP_H_
+#define _LIBCFS_BITMAP_H_
+
+
+typedef struct {
+ int size;
+ unsigned long data[0];
+} cfs_bitmap_t;
+
+#define CFS_BITMAP_SIZE(nbits) \
+ (((nbits/BITS_PER_LONG)+1)*sizeof(long)+sizeof(cfs_bitmap_t))
+
+static inline
+cfs_bitmap_t *CFS_ALLOCATE_BITMAP(int size)
+{
+ cfs_bitmap_t *ptr;
+
+ OBD_ALLOC(ptr, CFS_BITMAP_SIZE(size));
+ if (ptr == NULL)
+ RETURN(ptr);
+
+ ptr->size = size;
+
+ RETURN (ptr);
+}
+
+#define CFS_FREE_BITMAP(ptr) OBD_FREE(ptr, CFS_BITMAP_SIZE(ptr->size))
+
+static inline
+void cfs_bitmap_set(cfs_bitmap_t *bitmap, int nbit)
+{
+ set_bit(nbit, bitmap->data);
+}
+
+static inline
+void cfs_bitmap_clear(cfs_bitmap_t *bitmap, int nbit)
+{
+ test_and_clear_bit(nbit, bitmap->data);
+}
+
+static inline
+int cfs_bitmap_check(cfs_bitmap_t *bitmap, int nbit)
+{
+ return test_bit(nbit, bitmap->data);
+}
+
+static inline
+int cfs_bitmap_test_and_clear(cfs_bitmap_t *bitmap, int nbit)
+{
+ return test_and_clear_bit(nbit, bitmap->data);
+}
+
+/* return 0 is bitmap has none set bits */
+static inline
+int cfs_bitmap_check_empty(cfs_bitmap_t *bitmap)
+{
+ return find_first_bit(bitmap->data, bitmap->size) == bitmap->size;
+}
+
+static inline
+void cfs_bitmap_copy(cfs_bitmap_t *new, cfs_bitmap_t *old)
+{
+ int newsize;
+
+ LASSERT(new->size >= old->size);
+ newsize = new->size;
+ memcpy(new, old, CFS_BITMAP_SIZE(old->size));
+ new->size = newsize;
+}
+
+#define cfs_foreach_bit(bitmap, pos) \
+ for ((pos) = find_first_bit((bitmap)->data, bitmap->size); \
+ (pos) < (bitmap)->size; \
+ (pos) = find_next_bit((bitmap)->data, (bitmap)->size, (pos) + 1))
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/curproc.h b/drivers/staging/lustre/include/linux/libcfs/curproc.h
new file mode 100644
index 000000000000..90d7ce630e94
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/curproc.h
@@ -0,0 +1,110 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/curproc.h
+ *
+ * Lustre curproc API declaration
+ *
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ */
+
+#ifndef __LIBCFS_CURPROC_H__
+#define __LIBCFS_CURPROC_H__
+
+/*
+ * Portable API to access common characteristics of "current" UNIX process.
+ *
+ * Implemented in portals/include/libcfs/<os>/
+ */
+int cfs_curproc_groups_nr(void);
+int current_is_in_group(gid_t group);
+void cfs_curproc_groups_dump(gid_t *array, int size);
+
+/*
+ * Plus, platform-specific constant
+ *
+ * CFS_CURPROC_COMM_MAX,
+ *
+ * and opaque scalar type
+ *
+ * kernel_cap_t
+ */
+
+/* check if task is running in compat mode.*/
+int current_is_32bit(void);
+#define current_pid() (current->pid)
+#define current_comm() (current->comm)
+int cfs_get_environ(const char *key, char *value, int *val_len);
+
+typedef __u32 cfs_cap_t;
+
+#define CFS_CAP_CHOWN 0
+#define CFS_CAP_DAC_OVERRIDE 1
+#define CFS_CAP_DAC_READ_SEARCH 2
+#define CFS_CAP_FOWNER 3
+#define CFS_CAP_FSETID 4
+#define CFS_CAP_LINUX_IMMUTABLE 9
+#define CFS_CAP_SYS_ADMIN 21
+#define CFS_CAP_SYS_BOOT 23
+#define CFS_CAP_SYS_RESOURCE 24
+
+#define CFS_CAP_FS_MASK ((1 << CFS_CAP_CHOWN) | \
+ (1 << CFS_CAP_DAC_OVERRIDE) | \
+ (1 << CFS_CAP_DAC_READ_SEARCH) | \
+ (1 << CFS_CAP_FOWNER) | \
+ (1 << CFS_CAP_FSETID ) | \
+ (1 << CFS_CAP_LINUX_IMMUTABLE) | \
+ (1 << CFS_CAP_SYS_ADMIN) | \
+ (1 << CFS_CAP_SYS_BOOT) | \
+ (1 << CFS_CAP_SYS_RESOURCE))
+
+void cfs_cap_raise(cfs_cap_t cap);
+void cfs_cap_lower(cfs_cap_t cap);
+int cfs_cap_raised(cfs_cap_t cap);
+cfs_cap_t cfs_curproc_cap_pack(void);
+void cfs_curproc_cap_unpack(cfs_cap_t cap);
+int cfs_capable(cfs_cap_t cap);
+
+/* __LIBCFS_CURPROC_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
new file mode 100644
index 000000000000..1ab1f2be9aa5
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
@@ -0,0 +1,234 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LIBCFS_LIBCFS_H__
+#define __LIBCFS_LIBCFS_H__
+
+#if !__GNUC__
+#define __attribute__(x)
+#endif
+
+#include <linux/libcfs/linux/libcfs.h>
+
+#include "curproc.h"
+
+#ifndef offsetof
+# define offsetof(typ,memb) ((long)(long_ptr_t)((char *)&(((typ *)0)->memb)))
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(a) ((sizeof (a)) / (sizeof ((a)[0])))
+#endif
+
+#if !defined(swap)
+#define swap(x,y) do { typeof(x) z = x; x = y; y = z; } while (0)
+#endif
+
+#if !defined(container_of)
+/* given a pointer @ptr to the field @member embedded into type (usually
+ * struct) @type, return pointer to the embedding instance of @type. */
+#define container_of(ptr, type, member) \
+ ((type *)((char *)(ptr)-(char *)(&((type *)0)->member)))
+#endif
+
+static inline int __is_po2(unsigned long long val)
+{
+ return !(val & (val - 1));
+}
+
+#define IS_PO2(val) __is_po2((unsigned long long)(val))
+
+#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
+
+/*
+ * Lustre Error Checksum: calculates checksum
+ * of Hex number by XORing each bit.
+ */
+#define LERRCHKSUM(hexnum) (((hexnum) & 0xf) ^ ((hexnum) >> 4 & 0xf) ^ \
+ ((hexnum) >> 8 & 0xf))
+
+
+/*
+ * Some (nomina odiosa sunt) platforms define NULL as naked 0. This confuses
+ * Lustre RETURN(NULL) macro.
+ */
+#if defined(NULL)
+#undef NULL
+#endif
+
+#define NULL ((void *)0)
+
+#define LUSTRE_SRV_LNET_PID LUSTRE_LNET_PID
+
+
+#include <linux/list.h>
+
+#ifndef cfs_for_each_possible_cpu
+# error cfs_for_each_possible_cpu is not supported by kernel!
+#endif
+
+/* libcfs tcpip */
+int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask);
+int libcfs_ipif_enumerate(char ***names);
+void libcfs_ipif_free_enumeration(char **names, int n);
+int libcfs_sock_listen(socket_t **sockp, __u32 ip, int port, int backlog);
+int libcfs_sock_accept(socket_t **newsockp, socket_t *sock);
+void libcfs_sock_abort_accept(socket_t *sock);
+int libcfs_sock_connect(socket_t **sockp, int *fatal,
+ __u32 local_ip, int local_port,
+ __u32 peer_ip, int peer_port);
+int libcfs_sock_setbuf(socket_t *socket, int txbufsize, int rxbufsize);
+int libcfs_sock_getbuf(socket_t *socket, int *txbufsize, int *rxbufsize);
+int libcfs_sock_getaddr(socket_t *socket, int remote, __u32 *ip, int *port);
+int libcfs_sock_write(socket_t *sock, void *buffer, int nob, int timeout);
+int libcfs_sock_read(socket_t *sock, void *buffer, int nob, int timeout);
+void libcfs_sock_release(socket_t *sock);
+
+/* libcfs watchdogs */
+struct lc_watchdog;
+
+/* Add a watchdog which fires after "time" milliseconds of delay. You have to
+ * touch it once to enable it. */
+struct lc_watchdog *lc_watchdog_add(int time,
+ void (*cb)(pid_t pid, void *),
+ void *data);
+
+/* Enables a watchdog and resets its timer. */
+void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout);
+#define CFS_GET_TIMEOUT(svc) (max_t(int, obd_timeout, \
+ AT_OFF ? 0 : at_get(&svc->srv_at_estimate)) * \
+ svc->srv_watchdog_factor)
+
+/* Disable a watchdog; touch it to restart it. */
+void lc_watchdog_disable(struct lc_watchdog *lcw);
+
+/* Clean up the watchdog */
+void lc_watchdog_delete(struct lc_watchdog *lcw);
+
+/* Dump a debug log */
+void lc_watchdog_dumplog(pid_t pid, void *data);
+
+
+/* need both kernel and user-land acceptor */
+#define LNET_ACCEPTOR_MIN_RESERVED_PORT 512
+#define LNET_ACCEPTOR_MAX_RESERVED_PORT 1023
+
+/*
+ * libcfs pseudo device operations
+ *
+ * struct psdev_t and
+ * misc_register() and
+ * misc_deregister() are declared in
+ * libcfs/<os>/<os>-prim.h
+ *
+ * It's just draft now.
+ */
+
+struct cfs_psdev_file {
+ unsigned long off;
+ void *private_data;
+ unsigned long reserved1;
+ unsigned long reserved2;
+};
+
+struct cfs_psdev_ops {
+ int (*p_open)(unsigned long, void *);
+ int (*p_close)(unsigned long, void *);
+ int (*p_read)(struct cfs_psdev_file *, char *, unsigned long);
+ int (*p_write)(struct cfs_psdev_file *, char *, unsigned long);
+ int (*p_ioctl)(struct cfs_psdev_file *, unsigned long, void *);
+};
+
+/*
+ * Drop into debugger, if possible. Implementation is provided by platform.
+ */
+
+void cfs_enter_debugger(void);
+
+/*
+ * Defined by platform
+ */
+int unshare_fs_struct(void);
+sigset_t cfs_get_blocked_sigs(void);
+sigset_t cfs_block_allsigs(void);
+sigset_t cfs_block_sigs(unsigned long sigs);
+sigset_t cfs_block_sigsinv(unsigned long sigs);
+void cfs_restore_sigs(sigset_t);
+int cfs_signal_pending(void);
+void cfs_clear_sigpending(void);
+
+/*
+ * Random number handling
+ */
+
+/* returns a random 32-bit integer */
+unsigned int cfs_rand(void);
+/* seed the generator */
+void cfs_srand(unsigned int, unsigned int);
+void cfs_get_random_bytes(void *buf, int size);
+
+#include <linux/libcfs/libcfs_debug.h>
+#include <linux/libcfs/libcfs_cpu.h>
+#include <linux/libcfs/libcfs_private.h>
+#include <linux/libcfs/libcfs_ioctl.h>
+#include <linux/libcfs/libcfs_prim.h>
+#include <linux/libcfs/libcfs_time.h>
+#include <linux/libcfs/libcfs_string.h>
+#include <linux/libcfs/libcfs_kernelcomm.h>
+#include <linux/libcfs/libcfs_workitem.h>
+#include <linux/libcfs/libcfs_hash.h>
+#include <linux/libcfs/libcfs_heap.h>
+#include <linux/libcfs/libcfs_fail.h>
+#include <linux/libcfs/params_tree.h>
+#include <linux/libcfs/libcfs_crypto.h>
+
+/* container_of depends on "likely" which is defined in libcfs_private.h */
+static inline void *__container_of(void *ptr, unsigned long shift)
+{
+ if (unlikely(IS_ERR(ptr) || ptr == NULL))
+ return ptr;
+ else
+ return (char *)ptr - shift;
+}
+
+#define container_of0(ptr, type, member) \
+ ((type *)__container_of((void *)(ptr), offsetof(type, member)))
+
+#define SET_BUT_UNUSED(a) do { } while(sizeof(a) - sizeof(a))
+
+#define _LIBCFS_H
+
+#endif /* _LIBCFS_H */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
new file mode 100644
index 000000000000..6ae7415a3b99
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -0,0 +1,214 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/libcfs_cpu.h
+ *
+ * CPU partition
+ * . CPU partition is virtual processing unit
+ *
+ * . CPU partition can present 1-N cores, or 1-N NUMA nodes,
+ * in other words, CPU partition is a processors pool.
+ *
+ * CPU Partition Table (CPT)
+ * . a set of CPU partitions
+ *
+ * . There are two modes for CPT: CFS_CPU_MODE_NUMA and CFS_CPU_MODE_SMP
+ *
+ * . User can specify total number of CPU partitions while creating a
+ * CPT, ID of CPU partition is always start from 0.
+ *
+ * Example: if there are 8 cores on the system, while creating a CPT
+ * with cpu_npartitions=4:
+ * core[0, 1] = partition[0], core[2, 3] = partition[1]
+ * core[4, 5] = partition[2], core[6, 7] = partition[3]
+ *
+ * cpu_npartitions=1:
+ * core[0, 1, ... 7] = partition[0]
+ *
+ * . User can also specify CPU partitions by string pattern
+ *
+ * Examples: cpu_partitions="0[0,1], 1[2,3]"
+ * cpu_partitions="N 0[0-3], 1[4-8]"
+ *
+ * The first character "N" means following numbers are numa ID
+ *
+ * . NUMA allocators, CPU affinity threads are built over CPU partitions,
+ * instead of HW CPUs or HW nodes.
+ *
+ * . By default, Lustre modules should refer to the global cfs_cpt_table,
+ * instead of accessing HW CPUs directly, so concurrency of Lustre can be
+ * configured by cpu_npartitions of the global cfs_cpt_table
+ *
+ * . If cpu_npartitions=1(all CPUs in one pool), lustre should work the
+ * same way as 2.2 or earlier versions
+ *
+ * Author: liang@whamcloud.com
+ */
+
+#ifndef __LIBCFS_CPU_H__
+#define __LIBCFS_CPU_H__
+
+#ifndef HAVE_LIBCFS_CPT
+
+typedef unsigned long cpumask_t;
+typedef unsigned long nodemask_t;
+
+struct cfs_cpt_table {
+ /* # of CPU partitions */
+ int ctb_nparts;
+ /* cpu mask */
+ cpumask_t ctb_mask;
+ /* node mask */
+ nodemask_t ctb_nodemask;
+ /* version */
+ __u64 ctb_version;
+};
+
+#endif /* !HAVE_LIBCFS_CPT */
+
+/* any CPU partition */
+#define CFS_CPT_ANY (-1)
+
+extern struct cfs_cpt_table *cfs_cpt_table;
+
+/**
+ * destroy a CPU partition table
+ */
+void cfs_cpt_table_free(struct cfs_cpt_table *cptab);
+/**
+ * create a cfs_cpt_table with \a ncpt number of partitions
+ */
+struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt);
+/**
+ * print string information of cpt-table
+ */
+int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len);
+/**
+ * return total number of CPU partitions in \a cptab
+ */
+int
+cfs_cpt_number(struct cfs_cpt_table *cptab);
+/**
+ * return number of HW cores or hypter-threadings in a CPU partition \a cpt
+ */
+int cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt);
+/**
+ * is there any online CPU in CPU partition \a cpt
+ */
+int cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt);
+/**
+ * return cpumask of CPU partition \a cpt
+ */
+cpumask_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt);
+/**
+ * return nodemask of CPU partition \a cpt
+ */
+nodemask_t *cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt);
+/**
+ * shadow current HW processor ID to CPU-partition ID of \a cptab
+ */
+int cfs_cpt_current(struct cfs_cpt_table *cptab, int remap);
+/**
+ * shadow HW processor ID \a CPU to CPU-partition ID by \a cptab
+ */
+int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu);
+/**
+ * bind current thread on a CPU-partition \a cpt of \a cptab
+ */
+int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt);
+/**
+ * add \a cpu to CPU partion @cpt of \a cptab, return 1 for success,
+ * otherwise 0 is returned
+ */
+int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu);
+/**
+ * remove \a cpu from CPU partition \a cpt of \a cptab
+ */
+void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu);
+/**
+ * add all cpus in \a mask to CPU partition \a cpt
+ * return 1 if successfully set all CPUs, otherwise return 0
+ */
+int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab,
+ int cpt, cpumask_t *mask);
+/**
+ * remove all cpus in \a mask from CPU partition \a cpt
+ */
+void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab,
+ int cpt, cpumask_t *mask);
+/**
+ * add all cpus in NUMA node \a node to CPU partition \a cpt
+ * return 1 if successfully set all CPUs, otherwise return 0
+ */
+int cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node);
+/**
+ * remove all cpus in NUMA node \a node from CPU partition \a cpt
+ */
+void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node);
+
+/**
+ * add all cpus in node mask \a mask to CPU partition \a cpt
+ * return 1 if successfully set all CPUs, otherwise return 0
+ */
+int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab,
+ int cpt, nodemask_t *mask);
+/**
+ * remove all cpus in node mask \a mask from CPU partition \a cpt
+ */
+void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab,
+ int cpt, nodemask_t *mask);
+/**
+ * unset all cpus for CPU partition \a cpt
+ */
+void cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt);
+/**
+ * convert partition id \a cpt to numa node id, if there are more than one
+ * nodes in this partition, it might return a different node id each time.
+ */
+int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt);
+
+/**
+ * iterate over all CPU partitions in \a cptab
+ */
+#define cfs_cpt_for_each(i, cptab) \
+ for (i = 0; i < cfs_cpt_number(cptab); i++)
+
+#ifndef __read_mostly
+# define __read_mostly
+#endif
+
+#ifndef ____cacheline_aligned
+#define ____cacheline_aligned
+#endif
+
+int cfs_cpu_init(void);
+void cfs_cpu_fini(void);
+
+#endif /* __LIBCFS_CPU_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
new file mode 100644
index 000000000000..64ca62f0cc93
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
@@ -0,0 +1,201 @@
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ */
+
+#ifndef _LIBCFS_CRYPTO_H
+#define _LIBCFS_CRYPTO_H
+
+struct cfs_crypto_hash_type {
+ char *cht_name; /**< hash algorithm name, equal to
+ * format name for crypto api */
+ unsigned int cht_key; /**< init key by default (vaild for
+ * 4 bytes context like crc32, adler */
+ unsigned int cht_size; /**< hash digest size */
+};
+
+enum cfs_crypto_hash_alg {
+ CFS_HASH_ALG_NULL = 0,
+ CFS_HASH_ALG_ADLER32,
+ CFS_HASH_ALG_CRC32,
+ CFS_HASH_ALG_MD5,
+ CFS_HASH_ALG_SHA1,
+ CFS_HASH_ALG_SHA256,
+ CFS_HASH_ALG_SHA384,
+ CFS_HASH_ALG_SHA512,
+ CFS_HASH_ALG_CRC32C,
+ CFS_HASH_ALG_MAX
+};
+
+static struct cfs_crypto_hash_type hash_types[] = {
+ [CFS_HASH_ALG_NULL] = { "null", 0, 0 },
+ [CFS_HASH_ALG_ADLER32] = { "adler32", 1, 4 },
+ [CFS_HASH_ALG_CRC32] = { "crc32", ~0, 4 },
+ [CFS_HASH_ALG_CRC32C] = { "crc32c", ~0, 4 },
+ [CFS_HASH_ALG_MD5] = { "md5", 0, 16 },
+ [CFS_HASH_ALG_SHA1] = { "sha1", 0, 20 },
+ [CFS_HASH_ALG_SHA256] = { "sha256", 0, 32 },
+ [CFS_HASH_ALG_SHA384] = { "sha384", 0, 48 },
+ [CFS_HASH_ALG_SHA512] = { "sha512", 0, 64 },
+};
+
+/** Return pointer to type of hash for valid hash algorithm identifier */
+static inline const struct cfs_crypto_hash_type *
+ cfs_crypto_hash_type(unsigned char hash_alg)
+{
+ struct cfs_crypto_hash_type *ht;
+
+ if (hash_alg < CFS_HASH_ALG_MAX) {
+ ht = &hash_types[hash_alg];
+ if (ht->cht_name)
+ return ht;
+ }
+ return NULL;
+}
+
+/** Return hash name for valid hash algorithm identifier or "unknown" */
+static inline const char *cfs_crypto_hash_name(unsigned char hash_alg)
+{
+ const struct cfs_crypto_hash_type *ht;
+
+ ht = cfs_crypto_hash_type(hash_alg);
+ if (ht)
+ return ht->cht_name;
+ else
+ return "unknown";
+}
+
+/** Return digest size for valid algorithm identifier or 0 */
+static inline int cfs_crypto_hash_digestsize(unsigned char hash_alg)
+{
+ const struct cfs_crypto_hash_type *ht;
+
+ ht = cfs_crypto_hash_type(hash_alg);
+ if (ht)
+ return ht->cht_size;
+ else
+ return 0;
+}
+
+/** Return hash identifier for valid hash algorithm name or 0xFF */
+static inline unsigned char cfs_crypto_hash_alg(const char *algname)
+{
+ unsigned char i;
+
+ for (i = 0; i < CFS_HASH_ALG_MAX; i++)
+ if (!strcmp(hash_types[i].cht_name, algname))
+ break;
+ return (i == CFS_HASH_ALG_MAX ? 0xFF : i);
+}
+
+/** Calculate hash digest for buffer.
+ * @param alg id of hash algorithm
+ * @param buf buffer of data
+ * @param buf_len buffer len
+ * @param key initial value for algorithm, if it is NULL,
+ * default initial value should be used.
+ * @param key_len len of initial value
+ * @param hash [out] pointer to hash, if it is NULL, hash_len is
+ * set to valid digest size in bytes, retval -ENOSPC.
+ * @param hash_len [in,out] size of hash buffer
+ * @returns status of operation
+ * @retval -EINVAL if buf, buf_len, hash_len or alg_id is invalid
+ * @retval -ENODEV if this algorithm is unsupported
+ * @retval -ENOSPC if pointer to hash is NULL, or hash_len less than
+ * digest size
+ * @retval 0 for success
+ * @retval < 0 other errors from lower layers.
+ */
+int cfs_crypto_hash_digest(unsigned char alg,
+ const void *buf, unsigned int buf_len,
+ unsigned char *key, unsigned int key_len,
+ unsigned char *hash, unsigned int *hash_len);
+
+/* cfs crypto hash descriptor */
+struct cfs_crypto_hash_desc;
+
+/** Allocate and initialize desriptor for hash algorithm.
+ * @param alg algorithm id
+ * @param key initial value for algorithm, if it is NULL,
+ * default initial value should be used.
+ * @param key_len len of initial value
+ * @returns pointer to descriptor of hash instance
+ * @retval ERR_PTR(error) when errors occured.
+ */
+struct cfs_crypto_hash_desc*
+ cfs_crypto_hash_init(unsigned char alg,
+ unsigned char *key, unsigned int key_len);
+
+/** Update digest by part of data.
+ * @param desc hash descriptor
+ * @param page data page
+ * @param offset data offset
+ * @param len data len
+ * @returns status of operation
+ * @retval 0 for success.
+ */
+int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *desc,
+ struct page *page, unsigned int offset,
+ unsigned int len);
+
+/** Update digest by part of data.
+ * @param desc hash descriptor
+ * @param buf pointer to data buffer
+ * @param buf_len size of data at buffer
+ * @returns status of operation
+ * @retval 0 for success.
+ */
+int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *desc, const void *buf,
+ unsigned int buf_len);
+
+/** Finalize hash calculation, copy hash digest to buffer, destroy hash
+ * descriptor.
+ * @param desc hash descriptor
+ * @param hash buffer pointer to store hash digest
+ * @param hash_len pointer to hash buffer size, if NULL
+ * destory hash descriptor
+ * @returns status of operation
+ * @retval -ENOSPC if hash is NULL, or *hash_len less than
+ * digest size
+ * @retval 0 for success
+ * @retval < 0 other errors from lower layers.
+ */
+int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *desc,
+ unsigned char *hash, unsigned int *hash_len);
+/**
+ * Register crypto hash algorithms
+ */
+int cfs_crypto_register(void);
+
+/**
+ * Unregister
+ */
+void cfs_crypto_unregister(void);
+
+/** Return hash speed in Mbytes per second for valid hash algorithm
+ * identifier. If test was unsuccessfull -1 would be return.
+ */
+int cfs_crypto_hash_speed(unsigned char hash_alg);
+#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
new file mode 100644
index 000000000000..dd8ac2f52c9f
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
@@ -0,0 +1,350 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/libcfs_debug.h
+ *
+ * Debug messages and assertions
+ *
+ */
+
+#ifndef __LIBCFS_DEBUG_H__
+#define __LIBCFS_DEBUG_H__
+
+/*
+ * Debugging
+ */
+extern unsigned int libcfs_subsystem_debug;
+extern unsigned int libcfs_stack;
+extern unsigned int libcfs_debug;
+extern unsigned int libcfs_printk;
+extern unsigned int libcfs_console_ratelimit;
+extern unsigned int libcfs_watchdog_ratelimit;
+extern unsigned int libcfs_console_max_delay;
+extern unsigned int libcfs_console_min_delay;
+extern unsigned int libcfs_console_backoff;
+extern unsigned int libcfs_debug_binary;
+extern char libcfs_debug_file_path_arr[PATH_MAX];
+
+int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys);
+int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys);
+
+/* Has there been an LBUG? */
+extern unsigned int libcfs_catastrophe;
+extern unsigned int libcfs_panic_on_lbug;
+
+/**
+ * Format for debug message headers
+ */
+struct ptldebug_header {
+ __u32 ph_len;
+ __u32 ph_flags;
+ __u32 ph_subsys;
+ __u32 ph_mask;
+ __u16 ph_cpu_id;
+ __u16 ph_type;
+ __u32 ph_sec;
+ __u64 ph_usec;
+ __u32 ph_stack;
+ __u32 ph_pid;
+ __u32 ph_extern_pid;
+ __u32 ph_line_num;
+} __attribute__((packed));
+
+
+#define PH_FLAG_FIRST_RECORD 1
+
+/* Debugging subsystems (32 bits, non-overlapping) */
+/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
+#define S_UNDEFINED 0x00000001
+#define S_MDC 0x00000002
+#define S_MDS 0x00000004
+#define S_OSC 0x00000008
+#define S_OST 0x00000010
+#define S_CLASS 0x00000020
+#define S_LOG 0x00000040
+#define S_LLITE 0x00000080
+#define S_RPC 0x00000100
+#define S_MGMT 0x00000200
+#define S_LNET 0x00000400
+#define S_LND 0x00000800 /* ALL LNDs */
+#define S_PINGER 0x00001000
+#define S_FILTER 0x00002000
+/* unused */
+#define S_ECHO 0x00008000
+#define S_LDLM 0x00010000
+#define S_LOV 0x00020000
+#define S_LQUOTA 0x00040000
+#define S_OSD 0x00080000
+/* unused */
+/* unused */
+/* unused */
+#define S_LMV 0x00800000 /* b_new_cmd */
+/* unused */
+#define S_SEC 0x02000000 /* upcall cache */
+#define S_GSS 0x04000000 /* b_new_cmd */
+/* unused */
+#define S_MGC 0x10000000
+#define S_MGS 0x20000000
+#define S_FID 0x40000000 /* b_new_cmd */
+#define S_FLD 0x80000000 /* b_new_cmd */
+/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
+
+/* Debugging masks (32 bits, non-overlapping) */
+/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
+#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */
+#define D_INODE 0x00000002
+#define D_SUPER 0x00000004
+#define D_EXT2 0x00000008 /* anything from ext2_debug */
+#define D_MALLOC 0x00000010 /* print malloc, free information */
+#define D_CACHE 0x00000020 /* cache-related items */
+#define D_INFO 0x00000040 /* general information */
+#define D_IOCTL 0x00000080 /* ioctl related information */
+#define D_NETERROR 0x00000100 /* network errors */
+#define D_NET 0x00000200 /* network communications */
+#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
+#define D_BUFFS 0x00000800
+#define D_OTHER 0x00001000
+#define D_DENTRY 0x00002000
+#define D_NETTRACE 0x00004000
+#define D_PAGE 0x00008000 /* bulk page handling */
+#define D_DLMTRACE 0x00010000
+#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
+#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
+#define D_HA 0x00080000 /* recovery and failover */
+#define D_RPCTRACE 0x00100000 /* for distributed debugging */
+#define D_VFSTRACE 0x00200000
+#define D_READA 0x00400000 /* read-ahead */
+#define D_MMAP 0x00800000
+#define D_CONFIG 0x01000000
+#define D_CONSOLE 0x02000000
+#define D_QUOTA 0x04000000
+#define D_SEC 0x08000000
+#define D_LFSCK 0x10000000 /* For both OI scrub and LFSCK */
+/* keep these in sync with lnet/{utils,libcfs}/debug.c */
+
+#define D_HSM D_TRACE
+
+#define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE)
+
+#ifndef DEBUG_SUBSYSTEM
+# define DEBUG_SUBSYSTEM S_UNDEFINED
+#endif
+
+#define CDEBUG_DEFAULT_MAX_DELAY (cfs_time_seconds(600)) /* jiffies */
+#define CDEBUG_DEFAULT_MIN_DELAY ((cfs_time_seconds(1) + 1) / 2) /* jiffies */
+#define CDEBUG_DEFAULT_BACKOFF 2
+typedef struct {
+ cfs_time_t cdls_next;
+ unsigned int cdls_delay;
+ int cdls_count;
+} cfs_debug_limit_state_t;
+
+struct libcfs_debug_msg_data {
+ const char *msg_file;
+ const char *msg_fn;
+ int msg_subsys;
+ int msg_line;
+ int msg_mask;
+ cfs_debug_limit_state_t *msg_cdls;
+};
+
+#define LIBCFS_DEBUG_MSG_DATA_INIT(data, mask, cdls) \
+do { \
+ (data)->msg_subsys = DEBUG_SUBSYSTEM; \
+ (data)->msg_file = __FILE__; \
+ (data)->msg_fn = __FUNCTION__; \
+ (data)->msg_line = __LINE__; \
+ (data)->msg_cdls = (cdls); \
+ (data)->msg_mask = (mask); \
+} while (0)
+
+#define LIBCFS_DEBUG_MSG_DATA_DECL(dataname, mask, cdls) \
+ static struct libcfs_debug_msg_data dataname = { \
+ .msg_subsys = DEBUG_SUBSYSTEM, \
+ .msg_file = __FILE__, \
+ .msg_fn = __FUNCTION__, \
+ .msg_line = __LINE__, \
+ .msg_cdls = (cdls) }; \
+ dataname.msg_mask = (mask);
+
+
+
+/**
+ * Filters out logging messages based on mask and subsystem.
+ */
+static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem)
+{
+ return mask & D_CANTMASK ||
+ ((libcfs_debug & mask) && (libcfs_subsystem_debug & subsystem));
+}
+
+#define __CDEBUG(cdls, mask, format, ...) \
+do { \
+ static struct libcfs_debug_msg_data msgdata; \
+ \
+ CFS_CHECK_STACK(&msgdata, mask, cdls); \
+ \
+ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ LIBCFS_DEBUG_MSG_DATA_INIT(&msgdata, mask, cdls); \
+ libcfs_debug_msg(&msgdata, format, ## __VA_ARGS__); \
+ } \
+} while (0)
+
+#define CDEBUG(mask, format, ...) __CDEBUG(NULL, mask, format, ## __VA_ARGS__)
+
+#define CDEBUG_LIMIT(mask, format, ...) \
+do { \
+ static cfs_debug_limit_state_t cdls; \
+ \
+ __CDEBUG(&cdls, mask, format, ## __VA_ARGS__);\
+} while (0)
+
+
+
+
+#define CWARN(format, ...) CDEBUG_LIMIT(D_WARNING, format, ## __VA_ARGS__)
+#define CERROR(format, ...) CDEBUG_LIMIT(D_ERROR, format, ## __VA_ARGS__)
+#define CNETERR(format, a...) CDEBUG_LIMIT(D_NETERROR, format, ## a)
+#define CEMERG(format, ...) CDEBUG_LIMIT(D_EMERG, format, ## __VA_ARGS__)
+
+#define LCONSOLE(mask, format, ...) CDEBUG(D_CONSOLE | (mask), format, ## __VA_ARGS__)
+#define LCONSOLE_INFO(format, ...) CDEBUG_LIMIT(D_CONSOLE, format, ## __VA_ARGS__)
+#define LCONSOLE_WARN(format, ...) CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## __VA_ARGS__)
+#define LCONSOLE_ERROR_MSG(errnum, format, ...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \
+ "%x-%x: " format, errnum, LERRCHKSUM(errnum), ## __VA_ARGS__)
+#define LCONSOLE_ERROR(format, ...) LCONSOLE_ERROR_MSG(0x00, format, ## __VA_ARGS__)
+
+#define LCONSOLE_EMERG(format, ...) CDEBUG(D_CONSOLE | D_EMERG, format, ## __VA_ARGS__)
+
+
+void libcfs_log_goto(struct libcfs_debug_msg_data *, const char *, long_ptr_t);
+#define GOTO(label, rc) \
+do { \
+ if (cfs_cdebug_show(D_TRACE, DEBUG_SUBSYSTEM)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_TRACE, NULL); \
+ libcfs_log_goto(&msgdata, #label, (long_ptr_t)(rc)); \
+ } else { \
+ (void)(rc); \
+ } \
+ goto label; \
+} while (0)
+
+
+/*
+ * if rc == NULL, we need to code as RETURN((void *)NULL), otherwise
+ * there will be a warning in osx.
+ */
+#if defined(__GNUC__)
+
+long libcfs_log_return(struct libcfs_debug_msg_data *, long rc);
+#if BITS_PER_LONG > 32
+#define RETURN(rc) \
+do { \
+ EXIT_NESTING; \
+ if (cfs_cdebug_show(D_TRACE, DEBUG_SUBSYSTEM)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_TRACE, NULL); \
+ return (typeof(rc))libcfs_log_return(&msgdata, \
+ (long)(rc)); \
+ } \
+ \
+ return (rc); \
+} while (0)
+#else /* BITS_PER_LONG == 32 */
+/* We need an on-stack variable, because we cannot case a 32-bit pointer
+ * directly to (long long) without generating a complier warning/error, yet
+ * casting directly to (long) will truncate 64-bit return values. The log
+ * values will print as 32-bit values, but they always have been. LU-1436
+ */
+#define RETURN(rc) \
+do { \
+ EXIT_NESTING; \
+ if (cfs_cdebug_show(D_TRACE, DEBUG_SUBSYSTEM)) { \
+ typeof(rc) __rc = (rc); \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_TRACE, NULL); \
+ libcfs_log_return(&msgdata, (long_ptr_t)__rc); \
+ return __rc; \
+ } \
+ \
+ return (rc); \
+} while (0)
+#endif /* BITS_PER_LONG > 32 */
+
+#elif defined(_MSC_VER)
+#define RETURN(rc) \
+do { \
+ CDEBUG(D_TRACE, "Process leaving.\n"); \
+ EXIT_NESTING; \
+ return (rc); \
+} while (0)
+#else
+# error "Unkown compiler"
+#endif /* __GNUC__ */
+
+#define ENTRY \
+ENTRY_NESTING; \
+do { \
+ CDEBUG(D_TRACE, "Process entered\n"); \
+} while (0)
+
+#define EXIT \
+do { \
+ CDEBUG(D_TRACE, "Process leaving\n"); \
+ EXIT_NESTING; \
+} while(0)
+
+#define RETURN_EXIT \
+do { \
+ EXIT; \
+ return; \
+} while (0)
+
+extern int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
+ const char *format1, ...)
+ __attribute__ ((format (printf, 2, 3)));
+
+extern int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
+ const char *format1,
+ va_list args, const char *format2, ...)
+ __attribute__ ((format (printf, 4, 5)));
+
+/* other external symbols that tracefile provides: */
+extern int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
+ const char *usr_buffer, int usr_buffer_nob);
+extern int cfs_trace_copyout_string(char *usr_buffer, int usr_buffer_nob,
+ const char *knl_buffer, char *append);
+
+#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log"
+
+#endif /* __LIBCFS_DEBUG_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
new file mode 100644
index 000000000000..8393c2703ce6
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
@@ -0,0 +1,170 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please contact Oracle Corporation, Inc., 500 Oracle Parkway, Redwood Shores,
+ * CA 94065 USA or visit www.oracle.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Oracle Corporation, Inc.
+ */
+
+#ifndef _LIBCFS_FAIL_H
+#define _LIBCFS_FAIL_H
+
+extern unsigned long cfs_fail_loc;
+extern unsigned int cfs_fail_val;
+
+extern wait_queue_head_t cfs_race_waitq;
+extern int cfs_race_state;
+
+int __cfs_fail_check_set(__u32 id, __u32 value, int set);
+int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set);
+
+enum {
+ CFS_FAIL_LOC_NOSET = 0,
+ CFS_FAIL_LOC_ORSET = 1,
+ CFS_FAIL_LOC_RESET = 2,
+ CFS_FAIL_LOC_VALUE = 3
+};
+
+/* Failure injection control */
+#define CFS_FAIL_MASK_SYS 0x0000FF00
+#define CFS_FAIL_MASK_LOC (0x000000FF | CFS_FAIL_MASK_SYS)
+
+#define CFS_FAILED_BIT 30
+/* CFS_FAILED is 0x40000000 */
+#define CFS_FAILED (1 << CFS_FAILED_BIT)
+
+#define CFS_FAIL_ONCE_BIT 31
+/* CFS_FAIL_ONCE is 0x80000000 */
+#define CFS_FAIL_ONCE (1 << CFS_FAIL_ONCE_BIT)
+
+/* The following flags aren't made to be combined */
+#define CFS_FAIL_SKIP 0x20000000 /* skip N times then fail */
+#define CFS_FAIL_SOME 0x10000000 /* only fail N times */
+#define CFS_FAIL_RAND 0x08000000 /* fail 1/N of the times */
+#define CFS_FAIL_USR1 0x04000000 /* user flag */
+
+#define CFS_FAIL_PRECHECK(id) (cfs_fail_loc && \
+ (cfs_fail_loc & CFS_FAIL_MASK_LOC) == \
+ ((id) & CFS_FAIL_MASK_LOC))
+
+static inline int cfs_fail_check_set(__u32 id, __u32 value,
+ int set, int quiet)
+{
+ int ret = 0;
+
+ if (unlikely(CFS_FAIL_PRECHECK(id) &&
+ (ret = __cfs_fail_check_set(id, value, set)))) {
+ if (quiet) {
+ CDEBUG(D_INFO, "*** cfs_fail_loc=%x, val=%u***\n",
+ id, value);
+ } else {
+ LCONSOLE_INFO("*** cfs_fail_loc=%x, val=%u***\n",
+ id, value);
+ }
+ }
+
+ return ret;
+}
+
+/* If id hit cfs_fail_loc, return 1, otherwise return 0 */
+#define CFS_FAIL_CHECK(id) \
+ cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 0)
+#define CFS_FAIL_CHECK_QUIET(id) \
+ cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 1)
+
+/* If id hit cfs_fail_loc and cfs_fail_val == (-1 or value) return 1,
+ * otherwise return 0 */
+#define CFS_FAIL_CHECK_VALUE(id, value) \
+ cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 0)
+#define CFS_FAIL_CHECK_VALUE_QUIET(id, value) \
+ cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 1)
+
+/* If id hit cfs_fail_loc, cfs_fail_loc |= value and return 1,
+ * otherwise return 0 */
+#define CFS_FAIL_CHECK_ORSET(id, value) \
+ cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 0)
+#define CFS_FAIL_CHECK_ORSET_QUIET(id, value) \
+ cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 1)
+
+/* If id hit cfs_fail_loc, cfs_fail_loc = value and return 1,
+ * otherwise return 0 */
+#define CFS_FAIL_CHECK_RESET(id, value) \
+ cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 0)
+#define CFS_FAIL_CHECK_RESET_QUIET(id, value) \
+ cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 1)
+
+static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
+{
+ if (unlikely(CFS_FAIL_PRECHECK(id)))
+ return __cfs_fail_timeout_set(id, value, ms, set);
+ else
+ return 0;
+}
+
+/* If id hit cfs_fail_loc, sleep for seconds or milliseconds */
+#define CFS_FAIL_TIMEOUT(id, secs) \
+ cfs_fail_timeout_set(id, 0, secs * 1000, CFS_FAIL_LOC_NOSET)
+
+#define CFS_FAIL_TIMEOUT_MS(id, ms) \
+ cfs_fail_timeout_set(id, 0, ms, CFS_FAIL_LOC_NOSET)
+
+/* If id hit cfs_fail_loc, cfs_fail_loc |= value and
+ * sleep seconds or milliseconds */
+#define CFS_FAIL_TIMEOUT_ORSET(id, value, secs) \
+ cfs_fail_timeout_set(id, value, secs * 1000, CFS_FAIL_LOC_ORSET)
+
+#define CFS_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \
+ cfs_fail_timeout_set(id, value, ms, CFS_FAIL_LOC_ORSET)
+
+/* The idea here is to synchronise two threads to force a race. The
+ * first thread that calls this with a matching fail_loc is put to
+ * sleep. The next thread that calls with the same fail_loc wakes up
+ * the first and continues. */
+static inline void cfs_race(__u32 id)
+{
+
+ if (CFS_FAIL_PRECHECK(id)) {
+ if (unlikely(__cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET))) {
+ int rc;
+ cfs_race_state = 0;
+ CERROR("cfs_race id %x sleeping\n", id);
+ cfs_wait_event_interruptible(cfs_race_waitq,
+ cfs_race_state != 0, rc);
+ CERROR("cfs_fail_race id %x awake, rc=%d\n", id, rc);
+ } else {
+ CERROR("cfs_fail_race id %x waking\n", id);
+ cfs_race_state = 1;
+ wake_up(&cfs_race_waitq);
+ }
+ }
+}
+#define CFS_RACE(id) cfs_race(id)
+
+#endif /* _LIBCFS_FAIL_H */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
new file mode 100644
index 000000000000..f6361b3f0a0c
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
@@ -0,0 +1,851 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/libcfs_hash.h
+ *
+ * Hashing routines
+ *
+ */
+
+#ifndef __LIBCFS_HASH_H__
+#define __LIBCFS_HASH_H__
+/*
+ * Knuth recommends primes in approximately golden ratio to the maximum
+ * integer representable by a machine word for multiplicative hashing.
+ * Chuck Lever verified the effectiveness of this technique:
+ * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
+ *
+ * These primes are chosen to be bit-sparse, that is operations on
+ * them can use shifts and additions instead of multiplications for
+ * machines where multiplications are slow.
+ */
+/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
+#define CFS_GOLDEN_RATIO_PRIME_32 0x9e370001UL
+/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
+#define CFS_GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL
+
+/*
+ * Ideally we would use HAVE_HASH_LONG for this, but on linux we configure
+ * the linux kernel and user space at the same time, so we need to differentiate
+ * between them explicitely. If this is not needed on other architectures, then
+ * we'll need to move the functions to archi specific headers.
+ */
+
+#include <linux/hash.h>
+
+#define cfs_hash_long(val, bits) hash_long(val, bits)
+
+/** disable debug */
+#define CFS_HASH_DEBUG_NONE 0
+/** record hash depth and output to console when it's too deep,
+ * computing overhead is low but consume more memory */
+#define CFS_HASH_DEBUG_1 1
+/** expensive, check key validation */
+#define CFS_HASH_DEBUG_2 2
+
+#define CFS_HASH_DEBUG_LEVEL CFS_HASH_DEBUG_NONE
+
+struct cfs_hash_ops;
+struct cfs_hash_lock_ops;
+struct cfs_hash_hlist_ops;
+
+typedef union {
+ rwlock_t rw; /**< rwlock */
+ spinlock_t spin; /**< spinlock */
+} cfs_hash_lock_t;
+
+/**
+ * cfs_hash_bucket is a container of:
+ * - lock, couter ...
+ * - array of hash-head starting from hsb_head[0], hash-head can be one of
+ * . cfs_hash_head_t
+ * . cfs_hash_head_dep_t
+ * . cfs_hash_dhead_t
+ * . cfs_hash_dhead_dep_t
+ * which depends on requirement of user
+ * - some extra bytes (caller can require it while creating hash)
+ */
+typedef struct cfs_hash_bucket {
+ cfs_hash_lock_t hsb_lock; /**< bucket lock */
+ __u32 hsb_count; /**< current entries */
+ __u32 hsb_version; /**< change version */
+ unsigned int hsb_index; /**< index of bucket */
+ int hsb_depmax; /**< max depth on bucket */
+ long hsb_head[0]; /**< hash-head array */
+} cfs_hash_bucket_t;
+
+/**
+ * cfs_hash bucket descriptor, it's normally in stack of caller
+ */
+typedef struct cfs_hash_bd {
+ cfs_hash_bucket_t *bd_bucket; /**< address of bucket */
+ unsigned int bd_offset; /**< offset in bucket */
+} cfs_hash_bd_t;
+
+#define CFS_HASH_NAME_LEN 16 /**< default name length */
+#define CFS_HASH_BIGNAME_LEN 64 /**< bigname for param tree */
+
+#define CFS_HASH_BKT_BITS 3 /**< default bits of bucket */
+#define CFS_HASH_BITS_MAX 30 /**< max bits of bucket */
+#define CFS_HASH_BITS_MIN CFS_HASH_BKT_BITS
+
+/**
+ * common hash attributes.
+ */
+enum cfs_hash_tag {
+ /**
+ * don't need any lock, caller will protect operations with it's
+ * own lock. With this flag:
+ * . CFS_HASH_NO_BKTLOCK, CFS_HASH_RW_BKTLOCK, CFS_HASH_SPIN_BKTLOCK
+ * will be ignored.
+ * . Some functions will be disabled with this flag, i.e:
+ * cfs_hash_for_each_empty, cfs_hash_rehash
+ */
+ CFS_HASH_NO_LOCK = 1 << 0,
+ /** no bucket lock, use one spinlock to protect the whole hash */
+ CFS_HASH_NO_BKTLOCK = 1 << 1,
+ /** rwlock to protect bucket */
+ CFS_HASH_RW_BKTLOCK = 1 << 2,
+ /** spinlcok to protect bucket */
+ CFS_HASH_SPIN_BKTLOCK = 1 << 3,
+ /** always add new item to tail */
+ CFS_HASH_ADD_TAIL = 1 << 4,
+ /** hash-table doesn't have refcount on item */
+ CFS_HASH_NO_ITEMREF = 1 << 5,
+ /** big name for param-tree */
+ CFS_HASH_BIGNAME = 1 << 6,
+ /** track global count */
+ CFS_HASH_COUNTER = 1 << 7,
+ /** rehash item by new key */
+ CFS_HASH_REHASH_KEY = 1 << 8,
+ /** Enable dynamic hash resizing */
+ CFS_HASH_REHASH = 1 << 9,
+ /** can shrink hash-size */
+ CFS_HASH_SHRINK = 1 << 10,
+ /** assert hash is empty on exit */
+ CFS_HASH_ASSERT_EMPTY = 1 << 11,
+ /** record hlist depth */
+ CFS_HASH_DEPTH = 1 << 12,
+ /**
+ * rehash is always scheduled in a different thread, so current
+ * change on hash table is non-blocking
+ */
+ CFS_HASH_NBLK_CHANGE = 1 << 13,
+ /** NB, we typed hs_flags as __u16, please change it
+ * if you need to extend >=16 flags */
+};
+
+/** most used attributes */
+#define CFS_HASH_DEFAULT (CFS_HASH_RW_BKTLOCK | \
+ CFS_HASH_COUNTER | CFS_HASH_REHASH)
+
+/**
+ * cfs_hash is a hash-table implementation for general purpose, it can support:
+ * . two refcount modes
+ * hash-table with & without refcount
+ * . four lock modes
+ * nolock, one-spinlock, rw-bucket-lock, spin-bucket-lock
+ * . general operations
+ * lookup, add(add_tail or add_head), delete
+ * . rehash
+ * grows or shrink
+ * . iteration
+ * locked iteration and unlocked iteration
+ * . bigname
+ * support long name hash
+ * . debug
+ * trace max searching depth
+ *
+ * Rehash:
+ * When the htable grows or shrinks, a separate task (cfs_hash_rehash_worker)
+ * is spawned to handle the rehash in the background, it's possible that other
+ * processes can concurrently perform additions, deletions, and lookups
+ * without being blocked on rehash completion, because rehash will release
+ * the global wrlock for each bucket.
+ *
+ * rehash and iteration can't run at the same time because it's too tricky
+ * to keep both of them safe and correct.
+ * As they are relatively rare operations, so:
+ * . if iteration is in progress while we try to launch rehash, then
+ * it just giveup, iterator will launch rehash at the end.
+ * . if rehash is in progress while we try to iterate the hash table,
+ * then we just wait (shouldn't be very long time), anyway, nobody
+ * should expect iteration of whole hash-table to be non-blocking.
+ *
+ * During rehashing, a (key,object) pair may be in one of two buckets,
+ * depending on whether the worker task has yet to transfer the object
+ * to its new location in the table. Lookups and deletions need to search both
+ * locations; additions must take care to only insert into the new bucket.
+ */
+
+typedef struct cfs_hash {
+ /** serialize with rehash, or serialize all operations if
+ * the hash-table has CFS_HASH_NO_BKTLOCK */
+ cfs_hash_lock_t hs_lock;
+ /** hash operations */
+ struct cfs_hash_ops *hs_ops;
+ /** hash lock operations */
+ struct cfs_hash_lock_ops *hs_lops;
+ /** hash list operations */
+ struct cfs_hash_hlist_ops *hs_hops;
+ /** hash buckets-table */
+ cfs_hash_bucket_t **hs_buckets;
+ /** total number of items on this hash-table */
+ atomic_t hs_count;
+ /** hash flags, see cfs_hash_tag for detail */
+ __u16 hs_flags;
+ /** # of extra-bytes for bucket, for user saving extended attributes */
+ __u16 hs_extra_bytes;
+ /** wants to iterate */
+ __u8 hs_iterating;
+ /** hash-table is dying */
+ __u8 hs_exiting;
+ /** current hash bits */
+ __u8 hs_cur_bits;
+ /** min hash bits */
+ __u8 hs_min_bits;
+ /** max hash bits */
+ __u8 hs_max_bits;
+ /** bits for rehash */
+ __u8 hs_rehash_bits;
+ /** bits for each bucket */
+ __u8 hs_bkt_bits;
+ /** resize min threshold */
+ __u16 hs_min_theta;
+ /** resize max threshold */
+ __u16 hs_max_theta;
+ /** resize count */
+ __u32 hs_rehash_count;
+ /** # of iterators (caller of cfs_hash_for_each_*) */
+ __u32 hs_iterators;
+ /** rehash workitem */
+ cfs_workitem_t hs_rehash_wi;
+ /** refcount on this hash table */
+ atomic_t hs_refcount;
+ /** rehash buckets-table */
+ cfs_hash_bucket_t **hs_rehash_buckets;
+#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
+ /** serialize debug members */
+ spinlock_t hs_dep_lock;
+ /** max depth */
+ unsigned int hs_dep_max;
+ /** id of the deepest bucket */
+ unsigned int hs_dep_bkt;
+ /** offset in the deepest bucket */
+ unsigned int hs_dep_off;
+ /** bits when we found the max depth */
+ unsigned int hs_dep_bits;
+ /** workitem to output max depth */
+ cfs_workitem_t hs_dep_wi;
+#endif
+ /** name of htable */
+ char hs_name[0];
+} cfs_hash_t;
+
+typedef struct cfs_hash_lock_ops {
+ /** lock the hash table */
+ void (*hs_lock)(cfs_hash_lock_t *lock, int exclusive);
+ /** unlock the hash table */
+ void (*hs_unlock)(cfs_hash_lock_t *lock, int exclusive);
+ /** lock the hash bucket */
+ void (*hs_bkt_lock)(cfs_hash_lock_t *lock, int exclusive);
+ /** unlock the hash bucket */
+ void (*hs_bkt_unlock)(cfs_hash_lock_t *lock, int exclusive);
+} cfs_hash_lock_ops_t;
+
+typedef struct cfs_hash_hlist_ops {
+ /** return hlist_head of hash-head of @bd */
+ struct hlist_head *(*hop_hhead)(cfs_hash_t *hs, cfs_hash_bd_t *bd);
+ /** return hash-head size */
+ int (*hop_hhead_size)(cfs_hash_t *hs);
+ /** add @hnode to hash-head of @bd */
+ int (*hop_hnode_add)(cfs_hash_t *hs,
+ cfs_hash_bd_t *bd, struct hlist_node *hnode);
+ /** remove @hnode from hash-head of @bd */
+ int (*hop_hnode_del)(cfs_hash_t *hs,
+ cfs_hash_bd_t *bd, struct hlist_node *hnode);
+} cfs_hash_hlist_ops_t;
+
+typedef struct cfs_hash_ops {
+ /** return hashed value from @key */
+ unsigned (*hs_hash)(cfs_hash_t *hs, const void *key, unsigned mask);
+ /** return key address of @hnode */
+ void * (*hs_key)(struct hlist_node *hnode);
+ /** copy key from @hnode to @key */
+ void (*hs_keycpy)(struct hlist_node *hnode, void *key);
+ /**
+ * compare @key with key of @hnode
+ * returns 1 on a match
+ */
+ int (*hs_keycmp)(const void *key, struct hlist_node *hnode);
+ /** return object address of @hnode, i.e: container_of(...hnode) */
+ void * (*hs_object)(struct hlist_node *hnode);
+ /** get refcount of item, always called with holding bucket-lock */
+ void (*hs_get)(cfs_hash_t *hs, struct hlist_node *hnode);
+ /** release refcount of item */
+ void (*hs_put)(cfs_hash_t *hs, struct hlist_node *hnode);
+ /** release refcount of item, always called with holding bucket-lock */
+ void (*hs_put_locked)(cfs_hash_t *hs, struct hlist_node *hnode);
+ /** it's called before removing of @hnode */
+ void (*hs_exit)(cfs_hash_t *hs, struct hlist_node *hnode);
+} cfs_hash_ops_t;
+
+/** total number of buckets in @hs */
+#define CFS_HASH_NBKT(hs) \
+ (1U << ((hs)->hs_cur_bits - (hs)->hs_bkt_bits))
+
+/** total number of buckets in @hs while rehashing */
+#define CFS_HASH_RH_NBKT(hs) \
+ (1U << ((hs)->hs_rehash_bits - (hs)->hs_bkt_bits))
+
+/** number of hlist for in bucket */
+#define CFS_HASH_BKT_NHLIST(hs) (1U << (hs)->hs_bkt_bits)
+
+/** total number of hlist in @hs */
+#define CFS_HASH_NHLIST(hs) (1U << (hs)->hs_cur_bits)
+
+/** total number of hlist in @hs while rehashing */
+#define CFS_HASH_RH_NHLIST(hs) (1U << (hs)->hs_rehash_bits)
+
+static inline int
+cfs_hash_with_no_lock(cfs_hash_t *hs)
+{
+ /* caller will serialize all operations for this hash-table */
+ return (hs->hs_flags & CFS_HASH_NO_LOCK) != 0;
+}
+
+static inline int
+cfs_hash_with_no_bktlock(cfs_hash_t *hs)
+{
+ /* no bucket lock, one single lock to protect the hash-table */
+ return (hs->hs_flags & CFS_HASH_NO_BKTLOCK) != 0;
+}
+
+static inline int
+cfs_hash_with_rw_bktlock(cfs_hash_t *hs)
+{
+ /* rwlock to protect hash bucket */
+ return (hs->hs_flags & CFS_HASH_RW_BKTLOCK) != 0;
+}
+
+static inline int
+cfs_hash_with_spin_bktlock(cfs_hash_t *hs)
+{
+ /* spinlock to protect hash bucket */
+ return (hs->hs_flags & CFS_HASH_SPIN_BKTLOCK) != 0;
+}
+
+static inline int
+cfs_hash_with_add_tail(cfs_hash_t *hs)
+{
+ return (hs->hs_flags & CFS_HASH_ADD_TAIL) != 0;
+}
+
+static inline int
+cfs_hash_with_no_itemref(cfs_hash_t *hs)
+{
+ /* hash-table doesn't keep refcount on item,
+ * item can't be removed from hash unless it's
+ * ZERO refcount */
+ return (hs->hs_flags & CFS_HASH_NO_ITEMREF) != 0;
+}
+
+static inline int
+cfs_hash_with_bigname(cfs_hash_t *hs)
+{
+ return (hs->hs_flags & CFS_HASH_BIGNAME) != 0;
+}
+
+static inline int
+cfs_hash_with_counter(cfs_hash_t *hs)
+{
+ return (hs->hs_flags & CFS_HASH_COUNTER) != 0;
+}
+
+static inline int
+cfs_hash_with_rehash(cfs_hash_t *hs)
+{
+ return (hs->hs_flags & CFS_HASH_REHASH) != 0;
+}
+
+static inline int
+cfs_hash_with_rehash_key(cfs_hash_t *hs)
+{
+ return (hs->hs_flags & CFS_HASH_REHASH_KEY) != 0;
+}
+
+static inline int
+cfs_hash_with_shrink(cfs_hash_t *hs)
+{
+ return (hs->hs_flags & CFS_HASH_SHRINK) != 0;
+}
+
+static inline int
+cfs_hash_with_assert_empty(cfs_hash_t *hs)
+{
+ return (hs->hs_flags & CFS_HASH_ASSERT_EMPTY) != 0;
+}
+
+static inline int
+cfs_hash_with_depth(cfs_hash_t *hs)
+{
+ return (hs->hs_flags & CFS_HASH_DEPTH) != 0;
+}
+
+static inline int
+cfs_hash_with_nblk_change(cfs_hash_t *hs)
+{
+ return (hs->hs_flags & CFS_HASH_NBLK_CHANGE) != 0;
+}
+
+static inline int
+cfs_hash_is_exiting(cfs_hash_t *hs)
+{ /* cfs_hash_destroy is called */
+ return hs->hs_exiting;
+}
+
+static inline int
+cfs_hash_is_rehashing(cfs_hash_t *hs)
+{ /* rehash is launched */
+ return hs->hs_rehash_bits != 0;
+}
+
+static inline int
+cfs_hash_is_iterating(cfs_hash_t *hs)
+{ /* someone is calling cfs_hash_for_each_* */
+ return hs->hs_iterating || hs->hs_iterators != 0;
+}
+
+static inline int
+cfs_hash_bkt_size(cfs_hash_t *hs)
+{
+ return offsetof(cfs_hash_bucket_t, hsb_head[0]) +
+ hs->hs_hops->hop_hhead_size(hs) * CFS_HASH_BKT_NHLIST(hs) +
+ hs->hs_extra_bytes;
+}
+
+#define CFS_HOP(hs, op) (hs)->hs_ops->hs_ ## op
+
+static inline unsigned
+cfs_hash_id(cfs_hash_t *hs, const void *key, unsigned mask)
+{
+ return CFS_HOP(hs, hash)(hs, key, mask);
+}
+
+static inline void *
+cfs_hash_key(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ return CFS_HOP(hs, key)(hnode);
+}
+
+static inline void
+cfs_hash_keycpy(cfs_hash_t *hs, struct hlist_node *hnode, void *key)
+{
+ if (CFS_HOP(hs, keycpy) != NULL)
+ CFS_HOP(hs, keycpy)(hnode, key);
+}
+
+/**
+ * Returns 1 on a match,
+ */
+static inline int
+cfs_hash_keycmp(cfs_hash_t *hs, const void *key, struct hlist_node *hnode)
+{
+ return CFS_HOP(hs, keycmp)(key, hnode);
+}
+
+static inline void *
+cfs_hash_object(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ return CFS_HOP(hs, object)(hnode);
+}
+
+static inline void
+cfs_hash_get(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ return CFS_HOP(hs, get)(hs, hnode);
+}
+
+static inline void
+cfs_hash_put_locked(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ LASSERT(CFS_HOP(hs, put_locked) != NULL);
+
+ return CFS_HOP(hs, put_locked)(hs, hnode);
+}
+
+static inline void
+cfs_hash_put(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ LASSERT(CFS_HOP(hs, put) != NULL);
+
+ return CFS_HOP(hs, put)(hs, hnode);
+}
+
+static inline void
+cfs_hash_exit(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ if (CFS_HOP(hs, exit))
+ CFS_HOP(hs, exit)(hs, hnode);
+}
+
+static inline void cfs_hash_lock(cfs_hash_t *hs, int excl)
+{
+ hs->hs_lops->hs_lock(&hs->hs_lock, excl);
+}
+
+static inline void cfs_hash_unlock(cfs_hash_t *hs, int excl)
+{
+ hs->hs_lops->hs_unlock(&hs->hs_lock, excl);
+}
+
+static inline int cfs_hash_dec_and_lock(cfs_hash_t *hs,
+ atomic_t *condition)
+{
+ LASSERT(cfs_hash_with_no_bktlock(hs));
+ return atomic_dec_and_lock(condition, &hs->hs_lock.spin);
+}
+
+static inline void cfs_hash_bd_lock(cfs_hash_t *hs,
+ cfs_hash_bd_t *bd, int excl)
+{
+ hs->hs_lops->hs_bkt_lock(&bd->bd_bucket->hsb_lock, excl);
+}
+
+static inline void cfs_hash_bd_unlock(cfs_hash_t *hs,
+ cfs_hash_bd_t *bd, int excl)
+{
+ hs->hs_lops->hs_bkt_unlock(&bd->bd_bucket->hsb_lock, excl);
+}
+
+/**
+ * operations on cfs_hash bucket (bd: bucket descriptor),
+ * they are normally for hash-table without rehash
+ */
+void cfs_hash_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bd);
+
+static inline void cfs_hash_bd_get_and_lock(cfs_hash_t *hs, const void *key,
+ cfs_hash_bd_t *bd, int excl)
+{
+ cfs_hash_bd_get(hs, key, bd);
+ cfs_hash_bd_lock(hs, bd, excl);
+}
+
+static inline unsigned cfs_hash_bd_index_get(cfs_hash_t *hs, cfs_hash_bd_t *bd)
+{
+ return bd->bd_offset | (bd->bd_bucket->hsb_index << hs->hs_bkt_bits);
+}
+
+static inline void cfs_hash_bd_index_set(cfs_hash_t *hs,
+ unsigned index, cfs_hash_bd_t *bd)
+{
+ bd->bd_bucket = hs->hs_buckets[index >> hs->hs_bkt_bits];
+ bd->bd_offset = index & (CFS_HASH_BKT_NHLIST(hs) - 1U);
+}
+
+static inline void *
+cfs_hash_bd_extra_get(cfs_hash_t *hs, cfs_hash_bd_t *bd)
+{
+ return (void *)bd->bd_bucket +
+ cfs_hash_bkt_size(hs) - hs->hs_extra_bytes;
+}
+
+static inline __u32
+cfs_hash_bd_version_get(cfs_hash_bd_t *bd)
+{
+ /* need hold cfs_hash_bd_lock */
+ return bd->bd_bucket->hsb_version;
+}
+
+static inline __u32
+cfs_hash_bd_count_get(cfs_hash_bd_t *bd)
+{
+ /* need hold cfs_hash_bd_lock */
+ return bd->bd_bucket->hsb_count;
+}
+
+static inline int
+cfs_hash_bd_depmax_get(cfs_hash_bd_t *bd)
+{
+ return bd->bd_bucket->hsb_depmax;
+}
+
+static inline int
+cfs_hash_bd_compare(cfs_hash_bd_t *bd1, cfs_hash_bd_t *bd2)
+{
+ if (bd1->bd_bucket->hsb_index != bd2->bd_bucket->hsb_index)
+ return bd1->bd_bucket->hsb_index - bd2->bd_bucket->hsb_index;
+
+ if (bd1->bd_offset != bd2->bd_offset)
+ return bd1->bd_offset - bd2->bd_offset;
+
+ return 0;
+}
+
+void cfs_hash_bd_add_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode);
+void cfs_hash_bd_del_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode);
+void cfs_hash_bd_move_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd_old,
+ cfs_hash_bd_t *bd_new, struct hlist_node *hnode);
+
+static inline int cfs_hash_bd_dec_and_lock(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ atomic_t *condition)
+{
+ LASSERT(cfs_hash_with_spin_bktlock(hs));
+ return atomic_dec_and_lock(condition,
+ &bd->bd_bucket->hsb_lock.spin);
+}
+
+static inline struct hlist_head *cfs_hash_bd_hhead(cfs_hash_t *hs,
+ cfs_hash_bd_t *bd)
+{
+ return hs->hs_hops->hop_hhead(hs, bd);
+}
+
+struct hlist_node *cfs_hash_bd_lookup_locked(cfs_hash_t *hs,
+ cfs_hash_bd_t *bd, const void *key);
+struct hlist_node *cfs_hash_bd_peek_locked(cfs_hash_t *hs,
+ cfs_hash_bd_t *bd, const void *key);
+struct hlist_node *cfs_hash_bd_findadd_locked(cfs_hash_t *hs,
+ cfs_hash_bd_t *bd, const void *key,
+ struct hlist_node *hnode,
+ int insist_add);
+struct hlist_node *cfs_hash_bd_finddel_locked(cfs_hash_t *hs,
+ cfs_hash_bd_t *bd, const void *key,
+ struct hlist_node *hnode);
+
+/**
+ * operations on cfs_hash bucket (bd: bucket descriptor),
+ * they are safe for hash-table with rehash
+ */
+void cfs_hash_dual_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bds);
+void cfs_hash_dual_bd_lock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl);
+void cfs_hash_dual_bd_unlock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl);
+
+static inline void cfs_hash_dual_bd_get_and_lock(cfs_hash_t *hs, const void *key,
+ cfs_hash_bd_t *bds, int excl)
+{
+ cfs_hash_dual_bd_get(hs, key, bds);
+ cfs_hash_dual_bd_lock(hs, bds, excl);
+}
+
+struct hlist_node *cfs_hash_dual_bd_lookup_locked(cfs_hash_t *hs,
+ cfs_hash_bd_t *bds,
+ const void *key);
+struct hlist_node *cfs_hash_dual_bd_findadd_locked(cfs_hash_t *hs,
+ cfs_hash_bd_t *bds,
+ const void *key,
+ struct hlist_node *hnode,
+ int insist_add);
+struct hlist_node *cfs_hash_dual_bd_finddel_locked(cfs_hash_t *hs,
+ cfs_hash_bd_t *bds,
+ const void *key,
+ struct hlist_node *hnode);
+
+/* Hash init/cleanup functions */
+cfs_hash_t *cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits,
+ unsigned bkt_bits, unsigned extra_bytes,
+ unsigned min_theta, unsigned max_theta,
+ cfs_hash_ops_t *ops, unsigned flags);
+
+cfs_hash_t *cfs_hash_getref(cfs_hash_t *hs);
+void cfs_hash_putref(cfs_hash_t *hs);
+
+/* Hash addition functions */
+void cfs_hash_add(cfs_hash_t *hs, const void *key,
+ struct hlist_node *hnode);
+int cfs_hash_add_unique(cfs_hash_t *hs, const void *key,
+ struct hlist_node *hnode);
+void *cfs_hash_findadd_unique(cfs_hash_t *hs, const void *key,
+ struct hlist_node *hnode);
+
+/* Hash deletion functions */
+void *cfs_hash_del(cfs_hash_t *hs, const void *key, struct hlist_node *hnode);
+void *cfs_hash_del_key(cfs_hash_t *hs, const void *key);
+
+/* Hash lookup/for_each functions */
+#define CFS_HASH_LOOP_HOG 1024
+
+typedef int (*cfs_hash_for_each_cb_t)(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *node, void *data);
+void *cfs_hash_lookup(cfs_hash_t *hs, const void *key);
+void cfs_hash_for_each(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data);
+void cfs_hash_for_each_safe(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data);
+int cfs_hash_for_each_nolock(cfs_hash_t *hs,
+ cfs_hash_for_each_cb_t, void *data);
+int cfs_hash_for_each_empty(cfs_hash_t *hs,
+ cfs_hash_for_each_cb_t, void *data);
+void cfs_hash_for_each_key(cfs_hash_t *hs, const void *key,
+ cfs_hash_for_each_cb_t, void *data);
+typedef int (*cfs_hash_cond_opt_cb_t)(void *obj, void *data);
+void cfs_hash_cond_del(cfs_hash_t *hs, cfs_hash_cond_opt_cb_t, void *data);
+
+void cfs_hash_hlist_for_each(cfs_hash_t *hs, unsigned hindex,
+ cfs_hash_for_each_cb_t, void *data);
+int cfs_hash_is_empty(cfs_hash_t *hs);
+__u64 cfs_hash_size_get(cfs_hash_t *hs);
+
+/*
+ * Rehash - Theta is calculated to be the average chained
+ * hash depth assuming a perfectly uniform hash funcion.
+ */
+void cfs_hash_rehash_cancel_locked(cfs_hash_t *hs);
+void cfs_hash_rehash_cancel(cfs_hash_t *hs);
+int cfs_hash_rehash(cfs_hash_t *hs, int do_rehash);
+void cfs_hash_rehash_key(cfs_hash_t *hs, const void *old_key,
+ void *new_key, struct hlist_node *hnode);
+
+#if CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1
+/* Validate hnode references the correct key */
+static inline void
+cfs_hash_key_validate(cfs_hash_t *hs, const void *key,
+ struct hlist_node *hnode)
+{
+ LASSERT(cfs_hash_keycmp(hs, key, hnode));
+}
+
+/* Validate hnode is in the correct bucket */
+static inline void
+cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode)
+{
+ cfs_hash_bd_t bds[2];
+
+ cfs_hash_dual_bd_get(hs, cfs_hash_key(hs, hnode), bds);
+ LASSERT(bds[0].bd_bucket == bd->bd_bucket ||
+ bds[1].bd_bucket == bd->bd_bucket);
+}
+
+#else /* CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 */
+
+static inline void
+cfs_hash_key_validate(cfs_hash_t *hs, const void *key,
+ struct hlist_node *hnode) {}
+
+static inline void
+cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode) {}
+
+#endif /* CFS_HASH_DEBUG_LEVEL */
+
+#define CFS_HASH_THETA_BITS 10
+#define CFS_HASH_MIN_THETA (1U << (CFS_HASH_THETA_BITS - 1))
+#define CFS_HASH_MAX_THETA (1U << (CFS_HASH_THETA_BITS + 1))
+
+/* Return integer component of theta */
+static inline int __cfs_hash_theta_int(int theta)
+{
+ return (theta >> CFS_HASH_THETA_BITS);
+}
+
+/* Return a fractional value between 0 and 999 */
+static inline int __cfs_hash_theta_frac(int theta)
+{
+ return ((theta * 1000) >> CFS_HASH_THETA_BITS) -
+ (__cfs_hash_theta_int(theta) * 1000);
+}
+
+static inline int __cfs_hash_theta(cfs_hash_t *hs)
+{
+ return (atomic_read(&hs->hs_count) <<
+ CFS_HASH_THETA_BITS) >> hs->hs_cur_bits;
+}
+
+static inline void __cfs_hash_set_theta(cfs_hash_t *hs, int min, int max)
+{
+ LASSERT(min < max);
+ hs->hs_min_theta = (__u16)min;
+ hs->hs_max_theta = (__u16)max;
+}
+
+/* Generic debug formatting routines mainly for proc handler */
+struct seq_file;
+int cfs_hash_debug_header(struct seq_file *m);
+int cfs_hash_debug_str(cfs_hash_t *hs, struct seq_file *m);
+
+/*
+ * Generic djb2 hash algorithm for character arrays.
+ */
+static inline unsigned
+cfs_hash_djb2_hash(const void *key, size_t size, unsigned mask)
+{
+ unsigned i, hash = 5381;
+
+ LASSERT(key != NULL);
+
+ for (i = 0; i < size; i++)
+ hash = hash * 33 + ((char *)key)[i];
+
+ return (hash & mask);
+}
+
+/*
+ * Generic u32 hash algorithm.
+ */
+static inline unsigned
+cfs_hash_u32_hash(const __u32 key, unsigned mask)
+{
+ return ((key * CFS_GOLDEN_RATIO_PRIME_32) & mask);
+}
+
+/*
+ * Generic u64 hash algorithm.
+ */
+static inline unsigned
+cfs_hash_u64_hash(const __u64 key, unsigned mask)
+{
+ return ((unsigned)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask);
+}
+
+/** iterate over all buckets in @bds (array of cfs_hash_bd_t) */
+#define cfs_hash_for_each_bd(bds, n, i) \
+ for (i = 0; i < n && (bds)[i].bd_bucket != NULL; i++)
+
+/** iterate over all buckets of @hs */
+#define cfs_hash_for_each_bucket(hs, bd, pos) \
+ for (pos = 0; \
+ pos < CFS_HASH_NBKT(hs) && \
+ ((bd)->bd_bucket = (hs)->hs_buckets[pos]) != NULL; pos++)
+
+/** iterate over all hlist of bucket @bd */
+#define cfs_hash_bd_for_each_hlist(hs, bd, hlist) \
+ for ((bd)->bd_offset = 0; \
+ (bd)->bd_offset < CFS_HASH_BKT_NHLIST(hs) && \
+ (hlist = cfs_hash_bd_hhead(hs, bd)) != NULL; \
+ (bd)->bd_offset++)
+
+/* !__LIBCFS__HASH_H__ */
+#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_heap.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_heap.h
new file mode 100644
index 000000000000..bfa6d7b245ea
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_heap.h
@@ -0,0 +1,200 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details. A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011 Intel Corporation
+ */
+/*
+ * libcfs/include/libcfs/heap.h
+ *
+ * Author: Eric Barton <eeb@whamcloud.com>
+ * Liang Zhen <liang@whamcloud.com>
+ */
+
+#ifndef __LIBCFS_HEAP_H__
+#define __LIBCFS_HEAP_H__
+
+/** \defgroup heap Binary heap
+ *
+ * The binary heap is a scalable data structure created using a binary tree. It
+ * is capable of maintaining large sets of elements sorted usually by one or
+ * more element properties, but really based on anything that can be used as a
+ * binary predicate in order to determine the relevant ordering of any two nodes
+ * that belong to the set. There is no search operation, rather the intention is
+ * for the element of the lowest priority which will always be at the root of
+ * the tree (as this is an implementation of a min-heap) to be removed by users
+ * for consumption.
+ *
+ * Users of the heap should embed a \e cfs_binheap_node_t object instance on
+ * every object of the set that they wish the binary heap instance to handle,
+ * and (at a minimum) provide a cfs_binheap_ops_t::hop_compare() implementation
+ * which is used by the heap as the binary predicate during its internal sorting
+ * operations.
+ *
+ * The current implementation enforces no locking scheme, and so assumes the
+ * user caters for locking between calls to insert, delete and lookup
+ * operations. Since the only consumer for the data structure at this point
+ * are NRS policies, and these operate on a per-CPT basis, binary heap instances
+ * are tied to a specific CPT.
+ * @{
+ */
+
+/**
+ * Binary heap node.
+ *
+ * Objects of this type are embedded into objects of the ordered set that is to
+ * be maintained by a \e cfs_binheap_t instance.
+ */
+typedef struct {
+ /** Index into the binary tree */
+ unsigned int chn_index;
+} cfs_binheap_node_t;
+
+#define CBH_SHIFT 9
+#define CBH_SIZE (1 << CBH_SHIFT) /* # ptrs per level */
+#define CBH_MASK (CBH_SIZE - 1)
+#define CBH_NOB (CBH_SIZE * sizeof(cfs_binheap_node_t *))
+
+#define CBH_POISON 0xdeadbeef
+
+/**
+ * Binary heap flags.
+ */
+enum {
+ CBH_FLAG_ATOMIC_GROW = 1,
+};
+
+struct cfs_binheap;
+
+/**
+ * Binary heap operations.
+ */
+typedef struct {
+ /**
+ * Called right before inserting a node into the binary heap.
+ *
+ * Implementing this operation is optional.
+ *
+ * \param[in] h The heap
+ * \param[in] e The node
+ *
+ * \retval 0 success
+ * \retval != 0 error
+ */
+ int (*hop_enter)(struct cfs_binheap *h,
+ cfs_binheap_node_t *e);
+ /**
+ * Called right after removing a node from the binary heap.
+ *
+ * Implementing this operation is optional.
+ *
+ * \param[in] h The heap
+ * \param[in] e The node
+ */
+ void (*hop_exit)(struct cfs_binheap *h,
+ cfs_binheap_node_t *e);
+ /**
+ * A binary predicate which is called during internal heap sorting
+ * operations, and used in order to determine the relevant ordering of
+ * two heap nodes.
+ *
+ * Implementing this operation is mandatory.
+ *
+ * \param[in] a The first heap node
+ * \param[in] b The second heap node
+ *
+ * \retval 0 Node a > node b
+ * \retval 1 Node a < node b
+ *
+ * \see cfs_binheap_bubble()
+ * \see cfs_biheap_sink()
+ */
+ int (*hop_compare)(cfs_binheap_node_t *a,
+ cfs_binheap_node_t *b);
+} cfs_binheap_ops_t;
+
+/**
+ * Binary heap object.
+ *
+ * Sorts elements of type \e cfs_binheap_node_t
+ */
+typedef struct cfs_binheap {
+ /** Triple indirect */
+ cfs_binheap_node_t ****cbh_elements3;
+ /** double indirect */
+ cfs_binheap_node_t ***cbh_elements2;
+ /** single indirect */
+ cfs_binheap_node_t **cbh_elements1;
+ /** # elements referenced */
+ unsigned int cbh_nelements;
+ /** high water mark */
+ unsigned int cbh_hwm;
+ /** user flags */
+ unsigned int cbh_flags;
+ /** operations table */
+ cfs_binheap_ops_t *cbh_ops;
+ /** private data */
+ void *cbh_private;
+ /** associated CPT table */
+ struct cfs_cpt_table *cbh_cptab;
+ /** associated CPT id of this cfs_binheap_t::cbh_cptab */
+ int cbh_cptid;
+} cfs_binheap_t;
+
+void cfs_binheap_destroy(cfs_binheap_t *h);
+cfs_binheap_t *cfs_binheap_create(cfs_binheap_ops_t *ops, unsigned int flags,
+ unsigned count, void *arg,
+ struct cfs_cpt_table *cptab, int cptid);
+cfs_binheap_node_t *cfs_binheap_find(cfs_binheap_t *h, unsigned int idx);
+int cfs_binheap_insert(cfs_binheap_t *h, cfs_binheap_node_t *e);
+void cfs_binheap_remove(cfs_binheap_t *h, cfs_binheap_node_t *e);
+
+static inline int
+cfs_binheap_size(cfs_binheap_t *h)
+{
+ return h->cbh_nelements;
+}
+
+static inline int
+cfs_binheap_is_empty(cfs_binheap_t *h)
+{
+ return h->cbh_nelements == 0;
+}
+
+static inline cfs_binheap_node_t *
+cfs_binheap_root(cfs_binheap_t *h)
+{
+ return cfs_binheap_find(h, 0);
+}
+
+static inline cfs_binheap_node_t *
+cfs_binheap_remove_root(cfs_binheap_t *h)
+{
+ cfs_binheap_node_t *e = cfs_binheap_find(h, 0);
+
+ if (e != NULL)
+ cfs_binheap_remove(h, e);
+ return e;
+}
+
+/** @} heap */
+
+#endif /* __LIBCFS_HEAP_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
new file mode 100644
index 000000000000..5be367973508
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
@@ -0,0 +1,222 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/libcfs_ioctl.h
+ *
+ * Low-level ioctl data structures. Kernel ioctl functions declared here,
+ * and user space functions are in libcfsutil_ioctl.h.
+ *
+ */
+
+#ifndef __LIBCFS_IOCTL_H__
+#define __LIBCFS_IOCTL_H__
+
+
+#define LIBCFS_IOCTL_VERSION 0x0001000a
+
+struct libcfs_ioctl_data {
+ __u32 ioc_len;
+ __u32 ioc_version;
+
+ __u64 ioc_nid;
+ __u64 ioc_u64[1];
+
+ __u32 ioc_flags;
+ __u32 ioc_count;
+ __u32 ioc_net;
+ __u32 ioc_u32[7];
+
+ __u32 ioc_inllen1;
+ char *ioc_inlbuf1;
+ __u32 ioc_inllen2;
+ char *ioc_inlbuf2;
+
+ __u32 ioc_plen1; /* buffers in userspace */
+ char *ioc_pbuf1;
+ __u32 ioc_plen2; /* buffers in userspace */
+ char *ioc_pbuf2;
+
+ char ioc_bulk[0];
+};
+
+
+struct libcfs_ioctl_hdr {
+ __u32 ioc_len;
+ __u32 ioc_version;
+};
+
+struct libcfs_debug_ioctl_data
+{
+ struct libcfs_ioctl_hdr hdr;
+ unsigned int subs;
+ unsigned int debug;
+};
+
+#define LIBCFS_IOC_INIT(data) \
+do { \
+ memset(&data, 0, sizeof(data)); \
+ data.ioc_version = LIBCFS_IOCTL_VERSION; \
+ data.ioc_len = sizeof(data); \
+} while (0)
+
+
+struct libcfs_ioctl_handler {
+ struct list_head item;
+ int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_data *data);
+};
+
+#define DECLARE_IOCTL_HANDLER(ident, func) \
+ struct libcfs_ioctl_handler ident = { \
+ /* .item = */ LIST_HEAD_INIT(ident.item), \
+ /* .handle_ioctl = */ func \
+ }
+
+
+/* FIXME check conflict with lustre_lib.h */
+#define LIBCFS_IOC_DEBUG_MASK _IOWR('f', 250, long)
+
+
+/* ioctls for manipulating snapshots 30- */
+#define IOC_LIBCFS_TYPE 'e'
+#define IOC_LIBCFS_MIN_NR 30
+/* libcfs ioctls */
+#define IOC_LIBCFS_PANIC _IOWR('e', 30, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_CLEAR_DEBUG _IOWR('e', 31, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_MARK_DEBUG _IOWR('e', 32, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_LWT_CONTROL _IOWR('e', 33, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_LWT_SNAPSHOT _IOWR('e', 34, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_LWT_LOOKUP_STRING _IOWR('e', 35, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_MEMHOG _IOWR('e', 36, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_PING_TEST _IOWR('e', 37, IOCTL_LIBCFS_TYPE)
+/* lnet ioctls */
+#define IOC_LIBCFS_GET_NI _IOWR('e', 50, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_FAIL_NID _IOWR('e', 51, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_ADD_ROUTE _IOWR('e', 52, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_DEL_ROUTE _IOWR('e', 53, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_ROUTE _IOWR('e', 54, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_NOTIFY_ROUTER _IOWR('e', 55, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_UNCONFIGURE _IOWR('e', 56, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_PORTALS_COMPATIBILITY _IOWR('e', 57, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_LNET_DIST _IOWR('e', 58, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_CONFIGURE _IOWR('e', 59, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_TESTPROTOCOMPAT _IOWR('e', 60, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_PING _IOWR('e', 61, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_DEBUG_PEER _IOWR('e', 62, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_LNETST _IOWR('e', 63, IOCTL_LIBCFS_TYPE)
+/* lnd ioctls */
+#define IOC_LIBCFS_REGISTER_MYNID _IOWR('e', 70, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_CLOSE_CONNECTION _IOWR('e', 71, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_PUSH_CONNECTION _IOWR('e', 72, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_CONN _IOWR('e', 73, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_DEL_PEER _IOWR('e', 74, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_ADD_PEER _IOWR('e', 75, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_PEER _IOWR('e', 76, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_TXDESC _IOWR('e', 77, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_ADD_INTERFACE _IOWR('e', 78, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_DEL_INTERFACE _IOWR('e', 79, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_INTERFACE _IOWR('e', 80, IOCTL_LIBCFS_TYPE)
+
+#define IOC_LIBCFS_MAX_NR 80
+
+static inline int libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
+{
+ int len = sizeof(*data);
+ len += cfs_size_round(data->ioc_inllen1);
+ len += cfs_size_round(data->ioc_inllen2);
+ return len;
+}
+
+static inline int libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
+{
+ if (data->ioc_len > (1<<30)) {
+ CERROR ("LIBCFS ioctl: ioc_len larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inllen1 > (1<<30)) {
+ CERROR ("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inllen2 > (1<<30)) {
+ CERROR ("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
+ CERROR ("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
+ CERROR ("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_pbuf1 && !data->ioc_plen1) {
+ CERROR ("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_pbuf2 && !data->ioc_plen2) {
+ CERROR ("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_plen1 && !data->ioc_pbuf1) {
+ CERROR ("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
+ return 1;
+ }
+ if (data->ioc_plen2 && !data->ioc_pbuf2) {
+ CERROR ("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
+ return 1;
+ }
+ if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_len ) {
+ CERROR ("LIBCFS ioctl: packlen != ioc_len\n");
+ return 1;
+ }
+ if (data->ioc_inllen1 &&
+ data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
+ CERROR ("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
+ return 1;
+ }
+ if (data->ioc_inllen2 &&
+ data->ioc_bulk[cfs_size_round(data->ioc_inllen1) +
+ data->ioc_inllen2 - 1] != '\0') {
+ CERROR ("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
+ return 1;
+ }
+ return 0;
+}
+
+
+extern int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
+extern int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
+extern int libcfs_ioctl_getdata(char *buf, char *end, void *arg);
+extern int libcfs_ioctl_popdata(void *arg, void *buf, int size);
+
+
+#endif /* __LIBCFS_IOCTL_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_kernelcomm.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_kernelcomm.h
new file mode 100644
index 000000000000..596a15fc8996
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_kernelcomm.h
@@ -0,0 +1,117 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Author: Nathan Rutman <nathan.rutman@sun.com>
+ *
+ * libcfs/include/libcfs/libcfs_kernelcomm.h
+ *
+ * Kernel <-> userspace communication routines.
+ * The definitions below are used in the kernel and userspace.
+ *
+ */
+
+#ifndef __LIBCFS_KERNELCOMM_H__
+#define __LIBCFS_KERNELCOMM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
+#endif
+
+
+/* KUC message header.
+ * All current and future KUC messages should use this header.
+ * To avoid having to include Lustre headers from libcfs, define this here.
+ */
+struct kuc_hdr {
+ __u16 kuc_magic;
+ __u8 kuc_transport; /* Each new Lustre feature should use a different
+ transport */
+ __u8 kuc_flags;
+ __u16 kuc_msgtype; /* Message type or opcode, transport-specific */
+ __u16 kuc_msglen; /* Including header */
+} __attribute__((aligned(sizeof(__u64))));
+
+#define KUC_MAGIC 0x191C /*Lustre9etLinC */
+#define KUC_FL_BLOCK 0x01 /* Wait for send */
+
+/* kuc_msgtype values are defined in each transport */
+enum kuc_transport_type {
+ KUC_TRANSPORT_GENERIC = 1,
+ KUC_TRANSPORT_HSM = 2,
+ KUC_TRANSPORT_CHANGELOG = 3,
+};
+
+enum kuc_generic_message_type {
+ KUC_MSG_SHUTDOWN = 1,
+};
+
+/* prototype for callback function on kuc groups */
+typedef int (*libcfs_kkuc_cb_t)(__u32 data, void *cb_arg);
+
+/* KUC Broadcast Groups. This determines which userspace process hears which
+ * messages. Mutliple transports may be used within a group, or multiple
+ * groups may use the same transport. Broadcast
+ * groups need not be used if e.g. a UID is specified instead;
+ * use group 0 to signify unicast.
+ */
+#define KUC_GRP_HSM 0x02
+#define KUC_GRP_MAX KUC_GRP_HSM
+
+/* Kernel methods */
+extern int libcfs_kkuc_msg_put(struct file *fp, void *payload);
+extern int libcfs_kkuc_group_put(int group, void *payload);
+extern int libcfs_kkuc_group_add(struct file *fp, int uid, int group,
+ __u32 data);
+extern int libcfs_kkuc_group_rem(int uid, int group);
+extern int libcfs_kkuc_group_foreach(int group, libcfs_kkuc_cb_t cb_func,
+ void *cb_arg);
+
+#define LK_FLG_STOP 0x01
+
+/* kernelcomm control structure, passed from userspace to kernel */
+typedef struct lustre_kernelcomm {
+ __u32 lk_wfd;
+ __u32 lk_rfd;
+ __u32 lk_uid;
+ __u32 lk_group;
+ __u32 lk_data;
+ __u32 lk_flags;
+} __attribute__((packed)) lustre_kernelcomm;
+
+/* Userspace methods */
+extern int libcfs_ukuc_start(lustre_kernelcomm *l, int groups);
+extern int libcfs_ukuc_stop(lustre_kernelcomm *l);
+extern int libcfs_ukuc_msg_get(lustre_kernelcomm *l, char *buf, int maxsize,
+ int transport);
+
+#endif /* __LIBCFS_KERNELCOMM_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h
new file mode 100644
index 000000000000..9c40ed904da5
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h
@@ -0,0 +1,101 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/libcfs_prim.h
+ *
+ * General primitives.
+ *
+ */
+
+#ifndef __LIBCFS_PRIM_H__
+#define __LIBCFS_PRIM_H__
+
+#ifndef EXPORT_SYMBOL
+# define EXPORT_SYMBOL(s)
+#endif
+
+/*
+ * Schedule
+ */
+void cfs_pause(cfs_duration_t ticks);
+
+/*
+ * Timer
+ */
+typedef void (cfs_timer_func_t)(ulong_ptr_t);
+void schedule_timeout_and_set_state(cfs_task_state_t, int64_t);
+
+void init_waitqueue_entry_current(wait_queue_t *link);
+int64_t waitq_timedwait(wait_queue_t *, cfs_task_state_t, int64_t);
+void waitq_wait(wait_queue_t *, cfs_task_state_t);
+void add_wait_queue_exclusive_head(wait_queue_head_t *, wait_queue_t *);
+
+void cfs_init_timer(timer_list_t *t);
+void cfs_timer_init(timer_list_t *t, cfs_timer_func_t *func, void *arg);
+void cfs_timer_done(timer_list_t *t);
+void cfs_timer_arm(timer_list_t *t, cfs_time_t deadline);
+void cfs_timer_disarm(timer_list_t *t);
+int cfs_timer_is_armed(timer_list_t *t);
+cfs_time_t cfs_timer_deadline(timer_list_t *t);
+
+/*
+ * Memory
+ */
+#ifndef memory_pressure_get
+#define memory_pressure_get() (0)
+#endif
+#ifndef memory_pressure_set
+#define memory_pressure_set() do {} while (0)
+#endif
+#ifndef memory_pressure_clr
+#define memory_pressure_clr() do {} while (0)
+#endif
+
+static inline int cfs_memory_pressure_get_and_set(void)
+{
+ int old = memory_pressure_get();
+
+ if (!old)
+ memory_pressure_set();
+ return old;
+}
+
+static inline void cfs_memory_pressure_restore(int old)
+{
+ if (old)
+ memory_pressure_set();
+ else
+ memory_pressure_clr();
+ return;
+}
+#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
new file mode 100644
index 000000000000..056caa467126
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -0,0 +1,577 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/libcfs_private.h
+ *
+ * Various defines for libcfs.
+ *
+ */
+
+#ifndef __LIBCFS_PRIVATE_H__
+#define __LIBCFS_PRIVATE_H__
+
+/* XXX this layering violation is for nidstrings */
+#include <linux/lnet/types.h>
+
+#ifndef DEBUG_SUBSYSTEM
+# define DEBUG_SUBSYSTEM S_UNDEFINED
+#endif
+
+
+
+/*
+ * When this is on, LASSERT macro includes check for assignment used instead
+ * of equality check, but doesn't have unlikely(). Turn this on from time to
+ * time to make test-builds. This shouldn't be on for production release.
+ */
+#define LASSERT_CHECKED (0)
+
+
+#define LASSERTF(cond, fmt, ...) \
+do { \
+ if (unlikely(!(cond))) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(__msg_data, D_EMERG, NULL); \
+ libcfs_debug_msg(&__msg_data, \
+ "ASSERTION( %s ) failed: " fmt, #cond, \
+ ## __VA_ARGS__); \
+ lbug_with_loc(&__msg_data); \
+ } \
+} while (0)
+
+#define LASSERT(cond) LASSERTF(cond, "\n")
+
+#ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK
+/**
+ * This is for more expensive checks that one doesn't want to be enabled all
+ * the time. LINVRNT() has to be explicitly enabled by
+ * CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK option.
+ */
+# define LINVRNT(exp) LASSERT(exp)
+#else
+# define LINVRNT(exp) ((void)sizeof!!(exp))
+#endif
+
+#define KLASSERT(e) LASSERT(e)
+
+void lbug_with_loc(struct libcfs_debug_msg_data *) __attribute__((noreturn));
+
+#define LBUG() \
+do { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL); \
+ lbug_with_loc(&msgdata); \
+} while(0)
+
+extern atomic_t libcfs_kmemory;
+/*
+ * Memory
+ */
+
+# define libcfs_kmem_inc(ptr, size) \
+do { \
+ atomic_add(size, &libcfs_kmemory); \
+} while (0)
+
+# define libcfs_kmem_dec(ptr, size) \
+do { \
+ atomic_sub(size, &libcfs_kmemory); \
+} while (0)
+
+# define libcfs_kmem_read() \
+ atomic_read(&libcfs_kmemory)
+
+
+#ifndef LIBCFS_VMALLOC_SIZE
+#define LIBCFS_VMALLOC_SIZE (2 << PAGE_CACHE_SHIFT) /* 2 pages */
+#endif
+
+#define LIBCFS_ALLOC_PRE(size, mask) \
+do { \
+ LASSERT(!in_interrupt() || \
+ ((size) <= LIBCFS_VMALLOC_SIZE && \
+ ((mask) & GFP_ATOMIC)) != 0); \
+} while (0)
+
+#define LIBCFS_ALLOC_POST(ptr, size) \
+do { \
+ if (unlikely((ptr) == NULL)) { \
+ CERROR("LNET: out of memory at %s:%d (tried to alloc '" \
+ #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size)); \
+ CERROR("LNET: %d total bytes allocated by lnet\n", \
+ libcfs_kmem_read()); \
+ } else { \
+ memset((ptr), 0, (size)); \
+ libcfs_kmem_inc((ptr), (size)); \
+ CDEBUG(D_MALLOC, "alloc '" #ptr "': %d at %p (tot %d).\n", \
+ (int)(size), (ptr), libcfs_kmem_read()); \
+ } \
+} while (0)
+
+/**
+ * allocate memory with GFP flags @mask
+ */
+#define LIBCFS_ALLOC_GFP(ptr, size, mask) \
+do { \
+ LIBCFS_ALLOC_PRE((size), (mask)); \
+ (ptr) = (size) <= LIBCFS_VMALLOC_SIZE ? \
+ kmalloc((size), (mask)) : vmalloc(size); \
+ LIBCFS_ALLOC_POST((ptr), (size)); \
+} while (0)
+
+/**
+ * default allocator
+ */
+#define LIBCFS_ALLOC(ptr, size) \
+ LIBCFS_ALLOC_GFP(ptr, size, __GFP_IO)
+
+/**
+ * non-sleeping allocator
+ */
+#define LIBCFS_ALLOC_ATOMIC(ptr, size) \
+ LIBCFS_ALLOC_GFP(ptr, size, GFP_ATOMIC)
+
+/**
+ * allocate memory for specified CPU partition
+ * \a cptab != NULL, \a cpt is CPU partition id of \a cptab
+ * \a cptab == NULL, \a cpt is HW NUMA node id
+ */
+#define LIBCFS_CPT_ALLOC_GFP(ptr, cptab, cpt, size, mask) \
+do { \
+ LIBCFS_ALLOC_PRE((size), (mask)); \
+ (ptr) = (size) <= LIBCFS_VMALLOC_SIZE ? \
+ kmalloc_node((size), (mask), cfs_cpt_spread_node(cptab, cpt)) :\
+ vmalloc_node(size, cfs_cpt_spread_node(cptab, cpt)); \
+ LIBCFS_ALLOC_POST((ptr), (size)); \
+} while (0)
+
+/** default numa allocator */
+#define LIBCFS_CPT_ALLOC(ptr, cptab, cpt, size) \
+ LIBCFS_CPT_ALLOC_GFP(ptr, cptab, cpt, size, __GFP_IO)
+
+#define LIBCFS_FREE(ptr, size) \
+do { \
+ int s = (size); \
+ if (unlikely((ptr) == NULL)) { \
+ CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \
+ "%s:%d\n", s, __FILE__, __LINE__); \
+ break; \
+ } \
+ libcfs_kmem_dec((ptr), s); \
+ CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \
+ s, (ptr), libcfs_kmem_read()); \
+ if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \
+ vfree(ptr); \
+ else \
+ kfree(ptr); \
+} while (0)
+
+/******************************************************************************/
+
+/* htonl hack - either this, or compile with -O2. Stupid byteorder/generic.h */
+#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__)
+#define ___htonl(x) __cpu_to_be32(x)
+#define ___htons(x) __cpu_to_be16(x)
+#define ___ntohl(x) __be32_to_cpu(x)
+#define ___ntohs(x) __be16_to_cpu(x)
+#define htonl(x) ___htonl(x)
+#define ntohl(x) ___ntohl(x)
+#define htons(x) ___htons(x)
+#define ntohs(x) ___ntohs(x)
+#endif
+
+void libcfs_debug_dumpstack(task_t *tsk);
+void libcfs_run_upcall(char **argv);
+void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *);
+void libcfs_debug_dumplog(void);
+int libcfs_debug_init(unsigned long bufsize);
+int libcfs_debug_cleanup(void);
+int libcfs_debug_clear_buffer(void);
+int libcfs_debug_mark_buffer(const char *text);
+
+void libcfs_debug_set_level(unsigned int debug_level);
+
+
+/*
+ * allocate per-cpu-partition data, returned value is an array of pointers,
+ * variable can be indexed by CPU ID.
+ * cptable != NULL: size of array is number of CPU partitions
+ * cptable == NULL: size of array is number of HW cores
+ */
+void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size);
+/*
+ * destory per-cpu-partition variable
+ */
+void cfs_percpt_free(void *vars);
+int cfs_percpt_number(void *vars);
+void *cfs_percpt_current(void *vars);
+void *cfs_percpt_index(void *vars, int idx);
+
+#define cfs_percpt_for_each(var, i, vars) \
+ for (i = 0; i < cfs_percpt_number(vars) && \
+ ((var) = (vars)[i]) != NULL; i++)
+
+/*
+ * allocate a variable array, returned value is an array of pointers.
+ * Caller can specify length of array by count.
+ */
+void *cfs_array_alloc(int count, unsigned int size);
+void cfs_array_free(void *vars);
+
+#define LASSERT_ATOMIC_ENABLED (1)
+
+#if LASSERT_ATOMIC_ENABLED
+
+/** assert value of @a is equal to @v */
+#define LASSERT_ATOMIC_EQ(a, v) \
+do { \
+ LASSERTF(atomic_read(a) == v, \
+ "value: %d\n", atomic_read((a))); \
+} while (0)
+
+/** assert value of @a is unequal to @v */
+#define LASSERT_ATOMIC_NE(a, v) \
+do { \
+ LASSERTF(atomic_read(a) != v, \
+ "value: %d\n", atomic_read((a))); \
+} while (0)
+
+/** assert value of @a is little than @v */
+#define LASSERT_ATOMIC_LT(a, v) \
+do { \
+ LASSERTF(atomic_read(a) < v, \
+ "value: %d\n", atomic_read((a))); \
+} while (0)
+
+/** assert value of @a is little/equal to @v */
+#define LASSERT_ATOMIC_LE(a, v) \
+do { \
+ LASSERTF(atomic_read(a) <= v, \
+ "value: %d\n", atomic_read((a))); \
+} while (0)
+
+/** assert value of @a is great than @v */
+#define LASSERT_ATOMIC_GT(a, v) \
+do { \
+ LASSERTF(atomic_read(a) > v, \
+ "value: %d\n", atomic_read((a))); \
+} while (0)
+
+/** assert value of @a is great/equal to @v */
+#define LASSERT_ATOMIC_GE(a, v) \
+do { \
+ LASSERTF(atomic_read(a) >= v, \
+ "value: %d\n", atomic_read((a))); \
+} while (0)
+
+/** assert value of @a is great than @v1 and little than @v2 */
+#define LASSERT_ATOMIC_GT_LT(a, v1, v2) \
+do { \
+ int __v = atomic_read(a); \
+ LASSERTF(__v > v1 && __v < v2, "value: %d\n", __v); \
+} while (0)
+
+/** assert value of @a is great than @v1 and little/equal to @v2 */
+#define LASSERT_ATOMIC_GT_LE(a, v1, v2) \
+do { \
+ int __v = atomic_read(a); \
+ LASSERTF(__v > v1 && __v <= v2, "value: %d\n", __v); \
+} while (0)
+
+/** assert value of @a is great/equal to @v1 and little than @v2 */
+#define LASSERT_ATOMIC_GE_LT(a, v1, v2) \
+do { \
+ int __v = atomic_read(a); \
+ LASSERTF(__v >= v1 && __v < v2, "value: %d\n", __v); \
+} while (0)
+
+/** assert value of @a is great/equal to @v1 and little/equal to @v2 */
+#define LASSERT_ATOMIC_GE_LE(a, v1, v2) \
+do { \
+ int __v = atomic_read(a); \
+ LASSERTF(__v >= v1 && __v <= v2, "value: %d\n", __v); \
+} while (0)
+
+#else /* !LASSERT_ATOMIC_ENABLED */
+
+#define LASSERT_ATOMIC_EQ(a, v) do {} while (0)
+#define LASSERT_ATOMIC_NE(a, v) do {} while (0)
+#define LASSERT_ATOMIC_LT(a, v) do {} while (0)
+#define LASSERT_ATOMIC_LE(a, v) do {} while (0)
+#define LASSERT_ATOMIC_GT(a, v) do {} while (0)
+#define LASSERT_ATOMIC_GE(a, v) do {} while (0)
+#define LASSERT_ATOMIC_GT_LT(a, v1, v2) do {} while (0)
+#define LASSERT_ATOMIC_GT_LE(a, v1, v2) do {} while (0)
+#define LASSERT_ATOMIC_GE_LT(a, v1, v2) do {} while (0)
+#define LASSERT_ATOMIC_GE_LE(a, v1, v2) do {} while (0)
+
+#endif /* LASSERT_ATOMIC_ENABLED */
+
+#define LASSERT_ATOMIC_ZERO(a) LASSERT_ATOMIC_EQ(a, 0)
+#define LASSERT_ATOMIC_POS(a) LASSERT_ATOMIC_GT(a, 0)
+
+#define CFS_ALLOC_PTR(ptr) LIBCFS_ALLOC(ptr, sizeof (*(ptr)));
+#define CFS_FREE_PTR(ptr) LIBCFS_FREE(ptr, sizeof (*(ptr)));
+
+/*
+ * percpu partition lock
+ *
+ * There are some use-cases like this in Lustre:
+ * . each CPU partition has it's own private data which is frequently changed,
+ * and mostly by the local CPU partition.
+ * . all CPU partitions share some global data, these data are rarely changed.
+ *
+ * LNet is typical example.
+ * CPU partition lock is designed for this kind of use-cases:
+ * . each CPU partition has it's own private lock
+ * . change on private data just needs to take the private lock
+ * . read on shared data just needs to take _any_ of private locks
+ * . change on shared data needs to take _all_ private locks,
+ * which is slow and should be really rare.
+ */
+
+enum {
+ CFS_PERCPT_LOCK_EX = -1, /* negative */
+};
+
+
+struct cfs_percpt_lock {
+ /* cpu-partition-table for this lock */
+ struct cfs_cpt_table *pcl_cptab;
+ /* exclusively locked */
+ unsigned int pcl_locked;
+ /* private lock table */
+ spinlock_t **pcl_locks;
+};
+
+/* return number of private locks */
+static inline int
+cfs_percpt_lock_num(struct cfs_percpt_lock *pcl)
+{
+ return cfs_cpt_number(pcl->pcl_cptab);
+}
+
+
+/*
+ * create a cpu-partition lock based on CPU partition table \a cptab,
+ * each private lock has extra \a psize bytes padding data
+ */
+struct cfs_percpt_lock *cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab);
+/* destroy a cpu-partition lock */
+void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl);
+
+/* lock private lock \a index of \a pcl */
+void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index);
+/* unlock private lock \a index of \a pcl */
+void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index);
+/* create percpt (atomic) refcount based on @cptab */
+atomic_t **cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int val);
+/* destroy percpt refcount */
+void cfs_percpt_atomic_free(atomic_t **refs);
+/* return sum of all percpu refs */
+int cfs_percpt_atomic_summary(atomic_t **refs);
+
+
+/** Compile-time assertion.
+
+ * Check an invariant described by a constant expression at compile time by
+ * forcing a compiler error if it does not hold. \a cond must be a constant
+ * expression as defined by the ISO C Standard:
+ *
+ * 6.8.4.2 The switch statement
+ * ....
+ * [#3] The expression of each case label shall be an integer
+ * constant expression and no two of the case constant
+ * expressions in the same switch statement shall have the same
+ * value after conversion...
+ *
+ */
+#define CLASSERT(cond) do {switch(42) {case (cond): case 0: break;}} while (0)
+
+/* support decl needed both by kernel and liblustre */
+int libcfs_isknown_lnd(int type);
+char *libcfs_lnd2modname(int type);
+char *libcfs_lnd2str(int type);
+int libcfs_str2lnd(const char *str);
+char *libcfs_net2str(__u32 net);
+char *libcfs_nid2str(lnet_nid_t nid);
+__u32 libcfs_str2net(const char *str);
+lnet_nid_t libcfs_str2nid(const char *str);
+int libcfs_str2anynid(lnet_nid_t *nid, const char *str);
+char *libcfs_id2str(lnet_process_id_t id);
+void cfs_free_nidlist(struct list_head *list);
+int cfs_parse_nidlist(char *str, int len, struct list_head *list);
+int cfs_match_nid(lnet_nid_t nid, struct list_head *list);
+
+/** \addtogroup lnet_addr
+ * @{ */
+/* how an LNET NID encodes net:address */
+/** extract the address part of an lnet_nid_t */
+#define LNET_NIDADDR(nid) ((__u32)((nid) & 0xffffffff))
+/** extract the network part of an lnet_nid_t */
+#define LNET_NIDNET(nid) ((__u32)(((nid) >> 32)) & 0xffffffff)
+/** make an lnet_nid_t from a network part and an address part */
+#define LNET_MKNID(net,addr) ((((__u64)(net))<<32)|((__u64)(addr)))
+/* how net encodes type:number */
+#define LNET_NETNUM(net) ((net) & 0xffff)
+#define LNET_NETTYP(net) (((net) >> 16) & 0xffff)
+#define LNET_MKNET(typ,num) ((((__u32)(typ))<<16)|((__u32)(num)))
+/** @} lnet_addr */
+
+/* max value for numeric network address */
+#define MAX_NUMERIC_VALUE 0xffffffff
+
+/* implication */
+#define ergo(a, b) (!(a) || (b))
+/* logical equivalence */
+#define equi(a, b) (!!(a) == !!(b))
+
+#ifndef CFS_CURRENT_TIME
+# define CFS_CURRENT_TIME time(0)
+#endif
+
+/* --------------------------------------------------------------------
+ * Light-weight trace
+ * Support for temporary event tracing with minimal Heisenberg effect.
+ * All stuff about lwt are put in arch/kp30.h
+ * -------------------------------------------------------------------- */
+
+struct libcfs_device_userstate
+{
+ int ldu_memhog_pages;
+ struct page *ldu_memhog_root_page;
+};
+
+/* what used to be in portals_lib.h */
+#ifndef MIN
+# define MIN(a,b) (((a)<(b)) ? (a): (b))
+#endif
+#ifndef MAX
+# define MAX(a,b) (((a)>(b)) ? (a): (b))
+#endif
+
+#define MKSTR(ptr) ((ptr))? (ptr) : ""
+
+static inline int cfs_size_round4 (int val)
+{
+ return (val + 3) & (~0x3);
+}
+
+#ifndef HAVE_CFS_SIZE_ROUND
+static inline int cfs_size_round (int val)
+{
+ return (val + 7) & (~0x7);
+}
+#define HAVE_CFS_SIZE_ROUND
+#endif
+
+static inline int cfs_size_round16(int val)
+{
+ return (val + 0xf) & (~0xf);
+}
+
+static inline int cfs_size_round32(int val)
+{
+ return (val + 0x1f) & (~0x1f);
+}
+
+static inline int cfs_size_round0(int val)
+{
+ if (!val)
+ return 0;
+ return (val + 1 + 7) & (~0x7);
+}
+
+static inline size_t cfs_round_strlen(char *fset)
+{
+ return (size_t)cfs_size_round((int)strlen(fset) + 1);
+}
+
+/* roundup \a val to power2 */
+static inline unsigned int cfs_power2_roundup(unsigned int val)
+{
+ if (val != LOWEST_BIT_SET(val)) { /* not a power of 2 already */
+ do {
+ val &= ~LOWEST_BIT_SET(val);
+ } while (val != LOWEST_BIT_SET(val));
+ /* ...and round up */
+ val <<= 1;
+ }
+ return val;
+}
+
+#define LOGL(var,len,ptr) \
+do { \
+ if (var) \
+ memcpy((char *)ptr, (const char *)var, len); \
+ ptr += cfs_size_round(len); \
+} while (0)
+
+#define LOGU(var,len,ptr) \
+do { \
+ if (var) \
+ memcpy((char *)var, (const char *)ptr, len); \
+ ptr += cfs_size_round(len); \
+} while (0)
+
+#define LOGL0(var,len,ptr) \
+do { \
+ if (!len) \
+ break; \
+ memcpy((char *)ptr, (const char *)var, len); \
+ *((char *)(ptr) + len) = 0; \
+ ptr += cfs_size_round(len + 1); \
+} while (0)
+
+/**
+ * Lustre Network Driver types.
+ */
+enum {
+ /* Only add to these values (i.e. don't ever change or redefine them):
+ * network addresses depend on them... */
+ QSWLND = 1,
+ SOCKLND = 2,
+ GMLND = 3, /* obsolete, keep it so that libcfs_nid2str works */
+ PTLLND = 4,
+ O2IBLND = 5,
+ CIBLND = 6,
+ OPENIBLND = 7,
+ IIBLND = 8,
+ LOLND = 9,
+ RALND = 10,
+ VIBLND = 11,
+ MXLND = 12,
+ GNILND = 13,
+};
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
new file mode 100644
index 000000000000..a6bac9c36339
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
@@ -0,0 +1,137 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/libcfs_string.h
+ *
+ * Generic string manipulation functions.
+ *
+ * Author: Nathan Rutman <nathan.rutman@sun.com>
+ */
+
+#ifndef __LIBCFS_STRING_H__
+#define __LIBCFS_STRING_H__
+
+/* libcfs_string.c */
+/* string comparison ignoring case */
+int cfs_strncasecmp(const char *s1, const char *s2, size_t n);
+/* Convert a text string to a bitmask */
+int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
+ int *oldmask, int minmask, int allmask);
+
+/* Allocate space for and copy an existing string.
+ * Must free with kfree().
+ */
+char *cfs_strdup(const char *str, u_int32_t flags);
+
+/* safe vsnprintf */
+int cfs_vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+
+/* safe snprintf */
+int cfs_snprintf(char *buf, size_t size, const char *fmt, ...);
+
+/* trim leading and trailing space characters */
+char *cfs_firststr(char *str, size_t size);
+
+/**
+ * Structure to represent NULL-less strings.
+ */
+struct cfs_lstr {
+ char *ls_str;
+ int ls_len;
+};
+
+/*
+ * Structure to represent \<range_expr\> token of the syntax.
+ */
+struct cfs_range_expr {
+ /*
+ * Link to cfs_expr_list::el_exprs.
+ */
+ struct list_head re_link;
+ __u32 re_lo;
+ __u32 re_hi;
+ __u32 re_stride;
+};
+
+struct cfs_expr_list {
+ struct list_head el_link;
+ struct list_head el_exprs;
+};
+
+static inline int
+cfs_iswhite(char c)
+{
+ switch (c) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ return 1;
+ default:
+ break;
+ }
+ return 0;
+}
+
+char *cfs_trimwhite(char *str);
+int cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res);
+int cfs_str2num_check(char *str, int nob, unsigned *num,
+ unsigned min, unsigned max);
+int cfs_range_expr_parse(struct cfs_lstr *src, unsigned min, unsigned max,
+ int single_tok, struct cfs_range_expr **expr);
+int cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list);
+int cfs_expr_list_values(struct cfs_expr_list *expr_list,
+ int max, __u32 **values);
+static inline void
+cfs_expr_list_values_free(__u32 *values, int num)
+{
+ /* This array is allocated by LIBCFS_ALLOC(), so it shouldn't be freed
+ * by OBD_FREE() if it's called by module other than libcfs & LNet,
+ * otherwise we will see fake memory leak */
+ LIBCFS_FREE(values, num * sizeof(values[0]));
+}
+
+void cfs_expr_list_free(struct cfs_expr_list *expr_list);
+void cfs_expr_list_print(struct cfs_expr_list *expr_list);
+int cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max,
+ struct cfs_expr_list **elpp);
+void cfs_expr_list_free_list(struct list_head *list);
+int cfs_ip_addr_parse(char *str, int len, struct list_head *list);
+int cfs_ip_addr_match(__u32 addr, struct list_head *list);
+void cfs_ip_addr_free(struct list_head *list);
+
+#define strtoul(str, endp, base) simple_strtoul(str, endp, base)
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h
new file mode 100644
index 000000000000..4bdd77163d5e
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h
@@ -0,0 +1,132 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/libcfs_time.h
+ *
+ * Time functions.
+ *
+ */
+
+#ifndef __LIBCFS_TIME_H__
+#define __LIBCFS_TIME_H__
+/*
+ * generic time manipulation functions.
+ */
+
+static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
+{
+ return (cfs_time_t)(t + d);
+}
+
+static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
+{
+ return (cfs_time_t)(t1 - t2);
+}
+
+static inline int cfs_time_after(cfs_time_t t1, cfs_time_t t2)
+{
+ return cfs_time_before(t2, t1);
+}
+
+static inline int cfs_time_aftereq(cfs_time_t t1, cfs_time_t t2)
+{
+ return cfs_time_beforeq(t2, t1);
+}
+
+
+static inline cfs_time_t cfs_time_shift(int seconds)
+{
+ return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds));
+}
+
+static inline long cfs_timeval_sub(struct timeval *large, struct timeval *small,
+ struct timeval *result)
+{
+ long r = (long) (
+ (large->tv_sec - small->tv_sec) * ONE_MILLION +
+ (large->tv_usec - small->tv_usec));
+ if (result != NULL) {
+ result->tv_usec = r % ONE_MILLION;
+ result->tv_sec = r / ONE_MILLION;
+ }
+ return r;
+}
+
+static inline void cfs_slow_warning(cfs_time_t now, int seconds, char *msg)
+{
+ if (cfs_time_after(cfs_time_current(),
+ cfs_time_add(now, cfs_time_seconds(15))))
+ CERROR("slow %s "CFS_TIME_T" sec\n", msg,
+ cfs_duration_sec(cfs_time_sub(cfs_time_current(),now)));
+}
+
+#define CFS_RATELIMIT(seconds) \
+({ \
+ /* \
+ * XXX nikita: non-portable initializer \
+ */ \
+ static time_t __next_message = 0; \
+ int result; \
+ \
+ if (cfs_time_after(cfs_time_current(), __next_message)) \
+ result = 1; \
+ else { \
+ __next_message = cfs_time_shift(seconds); \
+ result = 0; \
+ } \
+ result; \
+})
+
+/*
+ * helper function similar to do_gettimeofday() of Linux kernel
+ */
+static inline void cfs_fs_timeval(struct timeval *tv)
+{
+ cfs_fs_time_t time;
+
+ cfs_fs_time_current(&time);
+ cfs_fs_time_usec(&time, tv);
+}
+
+/*
+ * return valid time-out based on user supplied one. Currently we only check
+ * that time-out is not shorted than allowed.
+ */
+static inline cfs_duration_t cfs_timeout_cap(cfs_duration_t timeout)
+{
+ if (timeout < CFS_TICK)
+ timeout = CFS_TICK;
+ return timeout;
+}
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h
new file mode 100644
index 000000000000..5cc64f327a87
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h
@@ -0,0 +1,110 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/libcfs_workitem.h
+ *
+ * Author: Isaac Huang <he.h.huang@oracle.com>
+ * Liang Zhen <zhen.liang@sun.com>
+ *
+ * A workitems is deferred work with these semantics:
+ * - a workitem always runs in thread context.
+ * - a workitem can be concurrent with other workitems but is strictly
+ * serialized with respect to itself.
+ * - no CPU affinity, a workitem does not necessarily run on the same CPU
+ * that schedules it. However, this might change in the future.
+ * - if a workitem is scheduled again before it has a chance to run, it
+ * runs only once.
+ * - if a workitem is scheduled while it runs, it runs again after it
+ * completes; this ensures that events occurring while other events are
+ * being processed receive due attention. This behavior also allows a
+ * workitem to reschedule itself.
+ *
+ * Usage notes:
+ * - a workitem can sleep but it should be aware of how that sleep might
+ * affect others.
+ * - a workitem runs inside a kernel thread so there's no user space to access.
+ * - do not use a workitem if the scheduling latency can't be tolerated.
+ *
+ * When wi_action returns non-zero, it means the workitem has either been
+ * freed or reused and workitem scheduler won't touch it any more.
+ */
+
+#ifndef __LIBCFS_WORKITEM_H__
+#define __LIBCFS_WORKITEM_H__
+
+struct cfs_wi_sched;
+
+void cfs_wi_sched_destroy(struct cfs_wi_sched *);
+int cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, int cpt,
+ int nthrs, struct cfs_wi_sched **);
+
+struct cfs_workitem;
+
+typedef int (*cfs_wi_action_t) (struct cfs_workitem *);
+typedef struct cfs_workitem {
+ /** chain on runq or rerunq */
+ struct list_head wi_list;
+ /** working function */
+ cfs_wi_action_t wi_action;
+ /** arg for working function */
+ void *wi_data;
+ /** in running */
+ unsigned short wi_running:1;
+ /** scheduled */
+ unsigned short wi_scheduled:1;
+} cfs_workitem_t;
+
+static inline void
+cfs_wi_init(cfs_workitem_t *wi, void *data, cfs_wi_action_t action)
+{
+ INIT_LIST_HEAD(&wi->wi_list);
+
+ wi->wi_running = 0;
+ wi->wi_scheduled = 0;
+ wi->wi_data = data;
+ wi->wi_action = action;
+}
+
+void cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi);
+int cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi);
+void cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi);
+
+int cfs_wi_startup(void);
+void cfs_wi_shutdown(void);
+
+/** # workitem scheduler loops before reschedule */
+#define CFS_WI_RESCHED 128
+
+#endif /* __LIBCFS_WORKITEM_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/kp30.h b/drivers/staging/lustre/include/linux/libcfs/linux/kp30.h
new file mode 100644
index 000000000000..4b7ae1c5bd3b
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/kp30.h
@@ -0,0 +1,286 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LIBCFS_LINUX_KP30_H__
+#define __LIBCFS_LINUX_KP30_H__
+
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/kmod.h>
+#include <linux/notifier.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/time.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+#include <linux/rwsem.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
+#include <linux/smp.h>
+#include <linux/ctype.h>
+#include <linux/compiler.h>
+#ifdef HAVE_MM_INLINE
+# include <linux/mm_inline.h>
+#endif
+#include <linux/kallsyms.h>
+#include <linux/moduleparam.h>
+#include <linux/scatterlist.h>
+
+#include <linux/libcfs/linux/portals_compat25.h>
+
+
+#define prepare_work(wq,cb,cbdata) \
+do { \
+ INIT_WORK((wq), (void *)(cb)); \
+} while (0)
+
+#define cfs_get_work_data(type,field,data) container_of(data,type,field)
+
+
+#define our_recalc_sigpending(current) recalc_sigpending()
+#define strtok(a,b) strpbrk(a, b)
+#define work_struct_t struct work_struct
+
+#ifdef CONFIG_SMP
+#else
+#endif
+
+
+#define SEM_COUNT(sem) ((sem)->count)
+
+
+/* ------------------------------------------------------------------- */
+
+#define PORTAL_SYMBOL_REGISTER(x)
+#define PORTAL_SYMBOL_UNREGISTER(x)
+
+
+
+
+/******************************************************************************/
+/* Module parameter support */
+#define CFS_MODULE_PARM(name, t, type, perm, desc) \
+ module_param(name, type, perm);\
+ MODULE_PARM_DESC(name, desc)
+
+#define CFS_SYSFS_MODULE_PARM 1 /* module parameters accessible via sysfs */
+
+/******************************************************************************/
+
+#if (__GNUC__)
+/* Use the special GNU C __attribute__ hack to have the compiler check the
+ * printf style argument string against the actual argument count and
+ * types.
+ */
+#ifdef printf
+# warning printf has been defined as a macro...
+# undef printf
+#endif
+
+#endif /* __GNUC__ */
+
+# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b)
+# define printf(format, b...) CDEBUG(D_OTHER, format , ## b)
+# define time(a) CURRENT_TIME
+
+# define cfs_num_present_cpus() num_present_cpus()
+
+/******************************************************************************/
+/* Light-weight trace
+ * Support for temporary event tracing with minimal Heisenberg effect. */
+#define LWT_SUPPORT 0
+
+#define LWT_MEMORY (16<<20)
+
+#ifndef KLWT_SUPPORT
+# if !defined(BITS_PER_LONG)
+# error "BITS_PER_LONG not defined"
+# endif
+
+/* kernel hasn't defined this? */
+typedef struct {
+ long long lwte_when;
+ char *lwte_where;
+ void *lwte_task;
+ long lwte_p1;
+ long lwte_p2;
+ long lwte_p3;
+ long lwte_p4;
+# if BITS_PER_LONG > 32
+ long lwte_pad;
+# endif
+} lwt_event_t;
+#endif /* !KLWT_SUPPORT */
+
+#if LWT_SUPPORT
+# if !KLWT_SUPPORT
+
+typedef struct _lwt_page {
+ struct list_head lwtp_list;
+ struct page *lwtp_page;
+ lwt_event_t *lwtp_events;
+} lwt_page_t;
+
+typedef struct {
+ int lwtc_current_index;
+ lwt_page_t *lwtc_current_page;
+} lwt_cpu_t;
+
+extern int lwt_enabled;
+extern lwt_cpu_t lwt_cpus[];
+
+/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
+ * This stuff is meant for finding specific problems; it never stays in
+ * production code... */
+
+#define LWTSTR(n) #n
+#define LWTWHERE(f,l) f ":" LWTSTR(l)
+#define LWT_EVENTS_PER_PAGE (PAGE_CACHE_SIZE / sizeof (lwt_event_t))
+
+#define LWT_EVENT(p1, p2, p3, p4) \
+do { \
+ unsigned long flags; \
+ lwt_cpu_t *cpu; \
+ lwt_page_t *p; \
+ lwt_event_t *e; \
+ \
+ if (lwt_enabled) { \
+ local_irq_save (flags); \
+ \
+ cpu = &lwt_cpus[smp_processor_id()]; \
+ p = cpu->lwtc_current_page; \
+ e = &p->lwtp_events[cpu->lwtc_current_index++]; \
+ \
+ if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) { \
+ cpu->lwtc_current_page = \
+ list_entry (p->lwtp_list.next, \
+ lwt_page_t, lwtp_list); \
+ cpu->lwtc_current_index = 0; \
+ } \
+ \
+ e->lwte_when = get_cycles(); \
+ e->lwte_where = LWTWHERE(__FILE__,__LINE__); \
+ e->lwte_task = current; \
+ e->lwte_p1 = (long)(p1); \
+ e->lwte_p2 = (long)(p2); \
+ e->lwte_p3 = (long)(p3); \
+ e->lwte_p4 = (long)(p4); \
+ \
+ local_irq_restore (flags); \
+ } \
+} while (0)
+
+#endif /* !KLWT_SUPPORT */
+
+extern int lwt_init (void);
+extern void lwt_fini (void);
+extern int lwt_lookup_string (int *size, char *knlptr,
+ char *usrptr, int usrsize);
+extern int lwt_control (int enable, int clear);
+extern int lwt_snapshot (cfs_cycles_t *now, int *ncpu, int *total_size,
+ void *user_ptr, int user_size);
+#endif /* LWT_SUPPORT */
+
+/* ------------------------------------------------------------------ */
+
+#define IOCTL_LIBCFS_TYPE long
+
+#ifdef __CYGWIN__
+# ifndef BITS_PER_LONG
+# define BITS_PER_LONG 64
+# endif
+#endif
+
+# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
+
+/* this is a bit chunky */
+
+#define _LWORDSIZE BITS_PER_LONG
+
+# define LPU64 "%llu"
+# define LPD64 "%lld"
+# define LPX64 "%#llx"
+# define LPX64i "%llx"
+# define LPO64 "%#llo"
+# define LPF64 "L"
+
+/*
+ * long_ptr_t & ulong_ptr_t, same to "long" for gcc
+ */
+# define LPLU "%lu"
+# define LPLD "%ld"
+# define LPLX "%#lx"
+
+/*
+ * pid_t
+ */
+# define LPPID "%d"
+
+
+#undef _LWORDSIZE
+
+/* compat macroses */
+
+
+#ifndef get_cpu
+# ifdef CONFIG_PREEMPT
+# define get_cpu() ({ preempt_disable(); smp_processor_id(); })
+# define put_cpu() preempt_enable()
+# else
+# define get_cpu() smp_processor_id()
+# define put_cpu()
+# endif
+#else
+#endif /* get_cpu & put_cpu */
+
+#define INIT_CTL_NAME(a)
+#define INIT_STRATEGY(a)
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
new file mode 100644
index 000000000000..292a3ba1fb96
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
@@ -0,0 +1,125 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LIBCFS_LINUX_LIBCFS_H__
+#define __LIBCFS_LINUX_LIBCFS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
+#endif
+
+
+
+#include <stdarg.h>
+#include <linux/libcfs/linux/linux-cpu.h>
+#include <linux/libcfs/linux/linux-time.h>
+#include <linux/libcfs/linux/linux-mem.h>
+#include <linux/libcfs/linux/linux-prim.h>
+#include <linux/libcfs/linux/linux-lock.h>
+#include <linux/libcfs/linux/linux-fs.h>
+#include <linux/libcfs/linux/linux-tcpip.h>
+#include <linux/libcfs/linux/linux-bitops.h>
+#include <linux/libcfs/linux/linux-types.h>
+#include <linux/libcfs/linux/kp30.h>
+
+#include <asm/types.h>
+#include <linux/types.h>
+#include <asm/timex.h>
+#include <linux/sched.h> /* THREAD_SIZE */
+#include <linux/rbtree.h>
+
+#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
+
+#if !defined(__x86_64__)
+# ifdef __ia64__
+# define CDEBUG_STACK() (THREAD_SIZE - \
+ ((unsigned long)__builtin_dwarf_cfa() & \
+ (THREAD_SIZE - 1)))
+# else
+# define CDEBUG_STACK() (THREAD_SIZE - \
+ ((unsigned long)__builtin_frame_address(0) & \
+ (THREAD_SIZE - 1)))
+# endif /* __ia64__ */
+
+#define __CHECK_STACK(msgdata, mask, cdls) \
+do { \
+ if (unlikely(CDEBUG_STACK() > libcfs_stack)) { \
+ LIBCFS_DEBUG_MSG_DATA_INIT(msgdata, D_WARNING, NULL); \
+ libcfs_stack = CDEBUG_STACK(); \
+ libcfs_debug_msg(msgdata, \
+ "maximum lustre stack %lu\n", \
+ CDEBUG_STACK()); \
+ (msgdata)->msg_mask = mask; \
+ (msgdata)->msg_cdls = cdls; \
+ dump_stack(); \
+ /*panic("LBUG");*/ \
+ } \
+} while (0)
+#define CFS_CHECK_STACK(msgdata, mask, cdls) __CHECK_STACK(msgdata, mask, cdls)
+#else /* __x86_64__ */
+#define CFS_CHECK_STACK(msgdata, mask, cdls) do {} while(0)
+#define CDEBUG_STACK() (0L)
+#endif /* __x86_64__ */
+
+/* initial pid */
+#define LUSTRE_LNET_PID 12345
+
+#define ENTRY_NESTING_SUPPORT (1)
+#define ENTRY_NESTING do {;} while (0)
+#define EXIT_NESTING do {;} while (0)
+#define __current_nesting_level() (0)
+
+/**
+ * Platform specific declarations for cfs_curproc API (libcfs/curproc.h)
+ *
+ * Implementation is in linux-curproc.c
+ */
+#define CFS_CURPROC_COMM_MAX (sizeof ((struct task_struct *)0)->comm)
+
+#include <linux/capability.h>
+
+/* long integer with size equal to pointer */
+typedef unsigned long ulong_ptr_t;
+typedef long long_ptr_t;
+
+#ifndef WITH_WATCHDOG
+#define WITH_WATCHDOG
+#endif
+
+
+
+
+#endif /* _LINUX_LIBCFS_H */
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-bitops.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-bitops.h
new file mode 100644
index 000000000000..43936e349dd4
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-bitops.h
@@ -0,0 +1,38 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/linux/linux-bitops.h
+ */
+#include <linux/bitops.h>
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
new file mode 100644
index 000000000000..224371c92f7c
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
@@ -0,0 +1,175 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/linux/linux-mem.h
+ *
+ * Basic library routines.
+ *
+ * Author: liang@whamcloud.com
+ */
+
+#ifndef __LIBCFS_LINUX_CPU_H__
+#define __LIBCFS_LINUX_CPU_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
+#endif
+
+
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/topology.h>
+#include <linux/version.h>
+
+
+#ifdef CONFIG_SMP
+
+#define HAVE_LIBCFS_CPT
+
+/** virtual processing unit */
+struct cfs_cpu_partition {
+ /* CPUs mask for this partition */
+ cpumask_t *cpt_cpumask;
+ /* nodes mask for this partition */
+ nodemask_t *cpt_nodemask;
+ /* spread rotor for NUMA allocator */
+ unsigned cpt_spread_rotor;
+};
+
+/** descriptor for CPU partitions */
+struct cfs_cpt_table {
+ /* version, reserved for hotplug */
+ unsigned ctb_version;
+ /* spread rotor for NUMA allocator */
+ unsigned ctb_spread_rotor;
+ /* # of CPU partitions */
+ unsigned ctb_nparts;
+ /* partitions tables */
+ struct cfs_cpu_partition *ctb_parts;
+ /* shadow HW CPU to CPU partition ID */
+ int *ctb_cpu2cpt;
+ /* all cpus in this partition table */
+ cpumask_t *ctb_cpumask;
+ /* all nodes in this partition table */
+ nodemask_t *ctb_nodemask;
+};
+
+void cfs_cpu_core_siblings(int cpu, cpumask_t *mask);
+void cfs_cpu_ht_siblings(int cpu, cpumask_t *mask);
+void cfs_node_to_cpumask(int node, cpumask_t *mask);
+int cfs_cpu_core_nsiblings(int cpu);
+int cfs_cpu_ht_nsiblings(int cpu);
+
+/**
+ * comment out definitions for compatible layer
+ * #define CFS_CPU_NR NR_CPUS
+ *
+ * typedef cpumask_t cfs_cpumask_t;
+ *
+ * #define cfs_cpu_current() smp_processor_id()
+ * #define cfs_cpu_online(i) cpu_online(i)
+ * #define cfs_cpu_online_num() num_online_cpus()
+ * #define cfs_cpu_online_for_each(i) for_each_online_cpu(i)
+ * #define cfs_cpu_possible_num() num_possible_cpus()
+ * #define cfs_cpu_possible_for_each(i) for_each_possible_cpu(i)
+ *
+ * #ifdef CONFIG_CPUMASK_SIZE
+ * #define cfs_cpu_mask_size() cpumask_size()
+ * #else
+ * #define cfs_cpu_mask_size() sizeof(cfs_cpumask_t)
+ * #endif
+ *
+ * #define cfs_cpu_mask_set(i, mask) cpu_set(i, mask)
+ * #define cfs_cpu_mask_unset(i, mask) cpu_clear(i, mask)
+ * #define cfs_cpu_mask_isset(i, mask) cpu_isset(i, mask)
+ * #define cfs_cpu_mask_clear(mask) cpus_clear(mask)
+ * #define cfs_cpu_mask_empty(mask) cpus_empty(mask)
+ * #define cfs_cpu_mask_weight(mask) cpus_weight(mask)
+ * #define cfs_cpu_mask_first(mask) first_cpu(mask)
+ * #define cfs_cpu_mask_any_online(mask) (any_online_cpu(mask) != NR_CPUS)
+ * #define cfs_cpu_mask_for_each(i, mask) for_each_cpu_mask(i, mask)
+ * #define cfs_cpu_mask_bind(t, mask) set_cpus_allowed(t, mask)
+ *
+ * #ifdef HAVE_CPUMASK_COPY
+ * #define cfs_cpu_mask_copy(dst, src) cpumask_copy(dst, src)
+ * #else
+ * #define cfs_cpu_mask_copy(dst, src) memcpy(dst, src, sizeof(*src))
+ * #endif
+ *
+ * static inline void
+ * cfs_cpu_mask_of_online(cfs_cpumask_t *mask)
+ * {
+ * cfs_cpu_mask_copy(mask, &cpu_online_map);
+ * }
+ *
+ * #ifdef CONFIG_NUMA
+ *
+ * #define CFS_NODE_NR MAX_NUMNODES
+ *
+ * typedef nodemask_t cfs_node_mask_t;
+ *
+ * #define cfs_node_of_cpu(cpu) cpu_to_node(cpu)
+ * #define cfs_node_online(i) node_online(i)
+ * #define cfs_node_online_num() num_online_nodes()
+ * #define cfs_node_online_for_each(i) for_each_online_node(i)
+ * #define cfs_node_possible_num() num_possible_nodes()
+ * #define cfs_node_possible_for_each(i) for_each_node(i)
+ *
+ * static inline void cfs_node_to_cpumask(int node, cfs_cpumask_t *mask)
+ * {
+ * #if defined(HAVE_NODE_TO_CPUMASK)
+ * *mask = node_to_cpumask(node);
+ * #elif defined(HAVE_CPUMASK_OF_NODE)
+ * cfs_cpu_mask_copy(mask, cpumask_of_node(node));
+ * #else
+ * # error "Needs node_to_cpumask or cpumask_of_node"
+ * #endif
+ * }
+ *
+ * #define cfs_node_mask_set(i, mask) node_set(i, mask)
+ * #define cfs_node_mask_unset(i, mask) node_clear(i, mask)
+ * #define cfs_node_mask_isset(i, mask) node_isset(i, mask)
+ * #define cfs_node_mask_clear(mask) nodes_reset(mask)
+ * #define cfs_node_mask_empty(mask) nodes_empty(mask)
+ * #define cfs_node_mask_weight(mask) nodes_weight(mask)
+ * #define cfs_node_mask_for_each(i, mask) for_each_node_mask(i, mask)
+ * #define cfs_node_mask_copy(dst, src) memcpy(dst, src, sizeof(*src))
+ *
+ * static inline void
+ * cfs_node_mask_of_online(cfs_node_mask_t *mask)
+ * {
+ * cfs_node_mask_copy(mask, &node_online_map);
+ * }
+ *
+ * #endif
+ */
+
+#endif /* CONFIG_SMP */
+#endif /* __LIBCFS_LINUX_CPU_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-crypto.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-crypto.h
new file mode 100644
index 000000000000..97c771cf691f
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-crypto.h
@@ -0,0 +1,49 @@
+ /*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ */
+
+/**
+ * Linux crypto hash specific functions.
+ */
+
+/**
+ * Functions for start/stop shash CRC32 algorithm.
+ */
+int cfs_crypto_crc32_register(void);
+void cfs_crypto_crc32_unregister(void);
+
+/**
+ * Functions for start/stop shash adler32 algorithm.
+ */
+int cfs_crypto_adler32_register(void);
+void cfs_crypto_adler32_unregister(void);
+
+/**
+ * Functions for start/stop shash crc32 pclmulqdq
+ */
+int cfs_crypto_crc32_pclmul_register(void);
+void cfs_crypto_crc32_pclmul_unregister(void);
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-fs.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-fs.h
new file mode 100644
index 000000000000..eebf138f21e5
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-fs.h
@@ -0,0 +1,92 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/linux/linux-fs.h
+ *
+ * Basic library routines.
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_FS_H__
+#define __LIBCFS_LINUX_CFS_FS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
+#endif
+
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/mount.h>
+#include <linux/backing-dev.h>
+#include <linux/posix_acl_xattr.h>
+
+#define filp_size(f) \
+ (i_size_read((f)->f_dentry->d_inode))
+#define filp_poff(f) \
+ (&(f)->f_pos)
+
+# define do_fsync(fp, flag) \
+ ((fp)->f_op->fsync(fp, 0, LLONG_MAX, flag))
+
+#define filp_read(fp, buf, size, pos) \
+ ((fp)->f_op->read((fp), (buf), (size), pos))
+
+#define filp_write(fp, buf, size, pos) \
+ ((fp)->f_op->write((fp), (buf), (size), pos))
+
+#define filp_fsync(fp) \
+ do_fsync(fp, 1)
+
+#define flock_type(fl) ((fl)->fl_type)
+#define flock_set_type(fl, type) do { (fl)->fl_type = (type); } while (0)
+#define flock_pid(fl) ((fl)->fl_pid)
+#define flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while (0)
+#define flock_start(fl) ((fl)->fl_start)
+#define flock_set_start(fl, st) do { (fl)->fl_start = (st); } while (0)
+#define flock_end(fl) ((fl)->fl_end)
+#define flock_set_end(fl, end) do { (fl)->fl_end = (end); } while (0)
+
+#ifndef IFSHIFT
+#define IFSHIFT 12
+#endif
+
+#ifndef IFTODT
+#define IFTODT(type) (((type) & S_IFMT) >> IFSHIFT)
+#endif
+#ifndef DTTOIF
+#define DTTOIF(dirtype) ((dirtype) << IFSHIFT)
+#endif
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-lock.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-lock.h
new file mode 100644
index 000000000000..6fbcbf3ab0d3
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-lock.h
@@ -0,0 +1,204 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/linux/linux-lock.h
+ *
+ * Basic library routines.
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_LOCK_H__
+#define __LIBCFS_LINUX_CFS_LOCK_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
+#endif
+
+
+#include <linux/mutex.h>
+
+/*
+ * IMPORTANT !!!!!!!!
+ *
+ * All locks' declaration are not guaranteed to be initialized,
+ * Althought some of they are initialized in Linux. All locks
+ * declared by CFS_DECL_* should be initialized explicitly.
+ */
+
+/*
+ * spin_lock "implementation" (use Linux kernel's primitives)
+ *
+ * - spin_lock_init(x)
+ * - spin_lock(x)
+ * - spin_lock_bh(x)
+ * - spin_lock_bh_init(x)
+ * - spin_unlock(x)
+ * - spin_unlock_bh(x)
+ * - spin_trylock(x)
+ * - spin_is_locked(x)
+ *
+ * - spin_lock_irq(x)
+ * - spin_lock_irqsave(x, f)
+ * - spin_unlock_irqrestore(x, f)
+ * - read_lock_irqsave(lock, f)
+ * - write_lock_irqsave(lock, f)
+ * - write_unlock_irqrestore(lock, f)
+ */
+
+/*
+ * spinlock "implementation"
+ */
+
+
+
+
+/*
+ * rw_semaphore "implementation" (use Linux kernel's primitives)
+ *
+ * - sema_init(x)
+ * - init_rwsem(x)
+ * - down_read(x)
+ * - up_read(x)
+ * - down_write(x)
+ * - up_write(x)
+ */
+
+
+#define fini_rwsem(s) do {} while (0)
+
+
+/*
+ * rwlock_t "implementation" (use Linux kernel's primitives)
+ *
+ * - rwlock_init(x)
+ * - read_lock(x)
+ * - read_unlock(x)
+ * - write_lock(x)
+ * - write_unlock(x)
+ * - write_lock_bh(x)
+ * - write_unlock_bh(x)
+ *
+ * - RW_LOCK_UNLOCKED
+ */
+
+
+#ifndef DEFINE_RWLOCK
+#define DEFINE_RWLOCK(lock) rwlock_t lock = __RW_LOCK_UNLOCKED(lock)
+#endif
+
+/*
+ * completion "implementation" (use Linux kernel's primitives)
+ *
+ * - DECLARE_COMPLETION(work)
+ * - INIT_COMPLETION(c)
+ * - COMPLETION_INITIALIZER(work)
+ * - init_completion(c)
+ * - complete(c)
+ * - wait_for_completion(c)
+ * - wait_for_completion_interruptible(c)
+ * - fini_completion(c)
+ */
+#define fini_completion(c) do { } while (0)
+
+/*
+ * semaphore "implementation" (use Linux kernel's primitives)
+ * - DEFINE_SEMAPHORE(name)
+ * - sema_init(sem, val)
+ * - up(sem)
+ * - down(sem)
+ * - down_interruptible(sem)
+ * - down_trylock(sem)
+ */
+
+/*
+ * mutex "implementation" (use Linux kernel's primitives)
+ *
+ * - DEFINE_MUTEX(name)
+ * - mutex_init(x)
+ * - mutex_lock(x)
+ * - mutex_unlock(x)
+ * - mutex_trylock(x)
+ * - mutex_is_locked(x)
+ * - mutex_destroy(x)
+ */
+
+#ifndef lockdep_set_class
+
+/**************************************************************************
+ *
+ * Lockdep "implementation". Also see liblustre.h
+ *
+ **************************************************************************/
+
+struct lock_class_key {
+ ;
+};
+
+#define lockdep_set_class(lock, key) \
+ do { (void)sizeof(lock); (void)sizeof(key); } while (0)
+/* This has to be a macro, so that `subclass' can be undefined in kernels
+ * that do not support lockdep. */
+
+
+static inline void lockdep_off(void)
+{
+}
+
+static inline void lockdep_on(void)
+{
+}
+#else
+
+#endif /* lockdep_set_class */
+
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
+#ifndef mutex_lock_nested
+#define mutex_lock_nested(mutex, subclass) mutex_lock(mutex)
+#endif
+
+#ifndef spin_lock_nested
+#define spin_lock_nested(lock, subclass) spin_lock(lock)
+#endif
+
+#ifndef down_read_nested
+#define down_read_nested(lock, subclass) down_read(lock)
+#endif
+
+#ifndef down_write_nested
+#define down_write_nested(lock, subclass) down_write(lock)
+#endif
+#endif /* CONFIG_DEBUG_LOCK_ALLOC */
+
+
+#endif /* __LIBCFS_LINUX_CFS_LOCK_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
new file mode 100644
index 000000000000..042a2bc432be
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
@@ -0,0 +1,120 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/linux/linux-mem.h
+ *
+ * Basic library routines.
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_MEM_H__
+#define __LIBCFS_LINUX_CFS_MEM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
+#endif
+
+
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/memcontrol.h>
+#include <linux/mm_inline.h>
+
+#define CFS_PAGE_MASK (~((__u64)PAGE_CACHE_SIZE-1))
+#define page_index(p) ((p)->index)
+
+#define memory_pressure_get() (current->flags & PF_MEMALLOC)
+#define memory_pressure_set() do { current->flags |= PF_MEMALLOC; } while (0)
+#define memory_pressure_clr() do { current->flags &= ~PF_MEMALLOC; } while (0)
+
+#if BITS_PER_LONG == 32
+/* limit to lowmem on 32-bit systems */
+#define NUM_CACHEPAGES \
+ min(num_physpages, 1UL << (30 - PAGE_CACHE_SHIFT) * 3 / 4)
+#else
+#define NUM_CACHEPAGES num_physpages
+#endif
+
+/*
+ * In Linux there is no way to determine whether current execution context is
+ * blockable.
+ */
+#define ALLOC_ATOMIC_TRY GFP_ATOMIC
+
+#define DECL_MMSPACE mm_segment_t __oldfs
+#define MMSPACE_OPEN \
+ do { __oldfs = get_fs(); set_fs(get_ds());} while(0)
+#define MMSPACE_CLOSE set_fs(__oldfs)
+
+/*
+ * Shrinker
+ */
+
+# define SHRINKER_ARGS(sc, nr_to_scan, gfp_mask) \
+ struct shrinker *shrinker, \
+ struct shrink_control *sc
+# define shrink_param(sc, var) ((sc)->var)
+
+typedef int (*shrinker_t)(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask));
+
+static inline
+struct shrinker *set_shrinker(int seek, shrinker_t func)
+{
+ struct shrinker *s;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (s == NULL)
+ return (NULL);
+
+ s->shrink = func;
+ s->seeks = seek;
+
+ register_shrinker(s);
+
+ return s;
+}
+
+static inline
+void remove_shrinker(struct shrinker *shrinker)
+{
+ if (shrinker == NULL)
+ return;
+
+ unregister_shrinker(shrinker);
+ kfree(shrinker);
+}
+
+#endif /* __LINUX_CFS_MEM_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-prim.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-prim.h
new file mode 100644
index 000000000000..a4963a8dfdd8
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-prim.h
@@ -0,0 +1,241 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/linux/linux-prim.h
+ *
+ * Basic library routines.
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_PRIM_H__
+#define __LIBCFS_LINUX_CFS_PRIM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
+#endif
+
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/proc_fs.h>
+#include <linux/mm.h>
+#include <linux/timer.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kthread.h>
+#include <linux/random.h>
+
+#include <linux/miscdevice.h>
+#include <linux/libcfs/linux/portals_compat25.h>
+#include <asm/div64.h>
+
+#include <linux/libcfs/linux/linux-time.h>
+
+
+/*
+ * CPU
+ */
+#ifdef for_each_possible_cpu
+#define cfs_for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
+#elif defined(for_each_cpu)
+#define cfs_for_each_possible_cpu(cpu) for_each_cpu(cpu)
+#endif
+
+#ifdef NR_CPUS
+#else
+#define NR_CPUS 1
+#endif
+
+/*
+ * cache
+ */
+
+/*
+ * IRQs
+ */
+
+
+/*
+ * Pseudo device register
+ */
+typedef struct miscdevice psdev_t;
+
+/*
+ * Sysctl register
+ */
+typedef struct ctl_table ctl_table_t;
+typedef struct ctl_table_header ctl_table_header_t;
+
+#define cfs_register_sysctl_table(t, a) register_sysctl_table(t)
+
+#define DECLARE_PROC_HANDLER(name) \
+static int \
+LL_PROC_PROTO(name) \
+{ \
+ DECLARE_LL_PROC_PPOS_DECL; \
+ \
+ return proc_call_handler(table->data, write, \
+ ppos, buffer, lenp, \
+ __##name); \
+}
+
+/*
+ * Symbol register
+ */
+#define cfs_symbol_register(s, p) do {} while(0)
+#define cfs_symbol_unregister(s) do {} while(0)
+#define cfs_symbol_get(s) symbol_get(s)
+#define cfs_symbol_put(s) symbol_put(s)
+
+typedef struct module module_t;
+
+/*
+ * Proc file system APIs
+ */
+typedef struct proc_dir_entry proc_dir_entry_t;
+
+/*
+ * Wait Queue
+ */
+
+
+typedef long cfs_task_state_t;
+
+#define CFS_DECL_WAITQ(wq) DECLARE_WAIT_QUEUE_HEAD(wq)
+
+/*
+ * Task struct
+ */
+typedef struct task_struct task_t;
+#define DECL_JOURNAL_DATA void *journal_info
+#define PUSH_JOURNAL do { \
+ journal_info = current->journal_info; \
+ current->journal_info = NULL; \
+ } while(0)
+#define POP_JOURNAL do { \
+ current->journal_info = journal_info; \
+ } while(0)
+
+/* Module interfaces */
+#define cfs_module(name, version, init, fini) \
+ module_init(init); \
+ module_exit(fini)
+
+/*
+ * Signal
+ */
+
+/*
+ * Timer
+ */
+typedef struct timer_list timer_list_t;
+
+
+#ifndef wait_event_timeout /* Only for RHEL3 2.4.21 kernel */
+#define __wait_event_timeout(wq, condition, timeout, ret) \
+do { \
+ int __ret = 0; \
+ if (!(condition)) { \
+ wait_queue_t __wait; \
+ unsigned long expire; \
+ \
+ init_waitqueue_entry(&__wait, current); \
+ expire = timeout + jiffies; \
+ add_wait_queue(&wq, &__wait); \
+ for (;;) { \
+ set_current_state(TASK_UNINTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ if (jiffies > expire) { \
+ ret = jiffies - expire; \
+ break; \
+ } \
+ schedule_timeout(timeout); \
+ } \
+ current->state = TASK_RUNNING; \
+ remove_wait_queue(&wq, &__wait); \
+ } \
+} while (0)
+/*
+ retval == 0; condition met; we're good.
+ retval > 0; timed out.
+*/
+#define cfs_waitq_wait_event_timeout(wq, condition, timeout, ret) \
+do { \
+ ret = 0; \
+ if (!(condition)) \
+ __wait_event_timeout(wq, condition, timeout, ret); \
+} while (0)
+#else
+#define cfs_waitq_wait_event_timeout(wq, condition, timeout, ret) \
+ ret = wait_event_timeout(wq, condition, timeout)
+#endif
+
+#define cfs_waitq_wait_event_interruptible_timeout(wq, c, timeout, ret) \
+ ret = wait_event_interruptible_timeout(wq, c, timeout)
+
+/*
+ * atomic
+ */
+
+
+#define cfs_atomic_add_unless(atom, a, u) atomic_add_unless(atom, a, u)
+#define cfs_atomic_cmpxchg(atom, old, nv) atomic_cmpxchg(atom, old, nv)
+
+/*
+ * membar
+ */
+
+
+/*
+ * interrupt
+ */
+
+
+/*
+ * might_sleep
+ */
+
+/*
+ * group_info
+ */
+typedef struct group_info group_info_t;
+
+
+/*
+ * Random bytes
+ */
+#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-tcpip.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-tcpip.h
new file mode 100644
index 000000000000..687f33f4e8a7
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-tcpip.h
@@ -0,0 +1,87 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/linux/linux-tcpip.h
+ *
+ * Basic library routines.
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_TCP_H__
+#define __LIBCFS_LINUX_CFS_TCP_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
+#endif
+
+
+#include <net/sock.h>
+
+#ifndef HIPQUAD
+// XXX Should just kill all users
+#if defined(__LITTLE_ENDIAN)
+#define HIPQUAD(addr) \
+ ((unsigned char *)&addr)[3], \
+ ((unsigned char *)&addr)[2], \
+ ((unsigned char *)&addr)[1], \
+ ((unsigned char *)&addr)[0]
+#elif defined(__BIG_ENDIAN)
+#define HIPQUAD NIPQUAD
+#else
+#error "Please fix asm/byteorder.h"
+#endif /* __LITTLE_ENDIAN */
+#endif
+
+typedef struct socket socket_t;
+
+#define SOCK_SNDBUF(so) ((so)->sk->sk_sndbuf)
+#define SOCK_TEST_NOSPACE(so) test_bit(SOCK_NOSPACE, &(so)->flags)
+
+static inline int
+cfs_sock_error(struct socket *sock)
+{
+ return sock->sk->sk_err;
+}
+
+static inline int
+cfs_sock_wmem_queued(struct socket *sock)
+{
+ return sock->sk->sk_wmem_queued;
+}
+
+#define cfs_sk_sleep(sk) sk_sleep(sk)
+
+#define DEFAULT_NET (&init_net)
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h
new file mode 100644
index 000000000000..4a48b914b42a
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h
@@ -0,0 +1,275 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/linux/linux-time.h
+ *
+ * Implementation of portable time API for Linux (kernel and user-level).
+ *
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ */
+
+#ifndef __LIBCFS_LINUX_LINUX_TIME_H__
+#define __LIBCFS_LINUX_LINUX_TIME_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
+#endif
+
+
+/* Portable time API */
+
+/*
+ * Platform provides three opaque data-types:
+ *
+ * cfs_time_t represents point in time. This is internal kernel
+ * time rather than "wall clock". This time bears no
+ * relation to gettimeofday().
+ *
+ * cfs_duration_t represents time interval with resolution of internal
+ * platform clock
+ *
+ * cfs_fs_time_t represents instance in world-visible time. This is
+ * used in file-system time-stamps
+ *
+ * cfs_time_t cfs_time_current(void);
+ * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t);
+ * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t);
+ * int cfs_impl_time_before (cfs_time_t, cfs_time_t);
+ * int cfs_impl_time_before_eq(cfs_time_t, cfs_time_t);
+ *
+ * cfs_duration_t cfs_duration_build(int64_t);
+ *
+ * time_t cfs_duration_sec (cfs_duration_t);
+ * void cfs_duration_usec(cfs_duration_t, struct timeval *);
+ * void cfs_duration_nsec(cfs_duration_t, struct timespec *);
+ *
+ * void cfs_fs_time_current(cfs_fs_time_t *);
+ * time_t cfs_fs_time_sec (cfs_fs_time_t *);
+ * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *);
+ * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *);
+ * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
+ * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
+ *
+ * CFS_TIME_FORMAT
+ * CFS_DURATION_FORMAT
+ *
+ */
+
+#define ONE_BILLION ((u_int64_t)1000000000)
+#define ONE_MILLION 1000000
+
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/time.h>
+#include <asm/div64.h>
+
+#include <linux/libcfs/linux/portals_compat25.h>
+
+/*
+ * post 2.5 kernels.
+ */
+
+#include <linux/jiffies.h>
+
+typedef struct timespec cfs_fs_time_t;
+
+static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
+{
+ v->tv_sec = t->tv_sec;
+ v->tv_usec = t->tv_nsec / 1000;
+}
+
+static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
+{
+ *s = *t;
+}
+
+/*
+ * internal helper function used by cfs_fs_time_before*()
+ */
+static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t)
+{
+ return (unsigned long long)t->tv_sec * ONE_BILLION + t->tv_nsec;
+}
+
+
+/*
+ * Generic kernel stuff
+ */
+
+typedef unsigned long cfs_time_t; /* jiffies */
+typedef long cfs_duration_t;
+typedef cycles_t cfs_cycles_t;
+
+static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
+{
+ return time_before(t1, t2);
+}
+
+static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
+{
+ return time_before_eq(t1, t2);
+}
+
+static inline cfs_time_t cfs_time_current(void)
+{
+ return jiffies;
+}
+
+static inline time_t cfs_time_current_sec(void)
+{
+ return get_seconds();
+}
+
+static inline void cfs_fs_time_current(cfs_fs_time_t *t)
+{
+ *t = CURRENT_TIME;
+}
+
+static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t)
+{
+ return t->tv_sec;
+}
+
+static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+ return __cfs_fs_time_flat(t1) < __cfs_fs_time_flat(t2);
+}
+
+static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+ return __cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2);
+}
+
+#if 0
+static inline cfs_duration_t cfs_duration_build(int64_t nano)
+{
+#if (BITS_PER_LONG == 32)
+ /* We cannot use do_div(t, ONE_BILLION), do_div can only process
+ * 64 bits n and 32 bits base */
+ int64_t t = nano * HZ;
+ do_div(t, 1000);
+ do_div(t, 1000000);
+ return (cfs_duration_t)t;
+#else
+ return (nano * HZ / ONE_BILLION);
+#endif
+}
+#endif
+
+static inline cfs_duration_t cfs_time_seconds(int seconds)
+{
+ return ((cfs_duration_t)seconds) * HZ;
+}
+
+static inline time_t cfs_duration_sec(cfs_duration_t d)
+{
+ return d / HZ;
+}
+
+static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
+{
+#if (BITS_PER_LONG == 32) && (HZ > 4096)
+ __u64 t;
+
+ s->tv_sec = d / HZ;
+ t = (d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION;
+ do_div(t, HZ);
+ s->tv_usec = t;
+#else
+ s->tv_sec = d / HZ;
+ s->tv_usec = ((d - (cfs_duration_t)s->tv_sec * HZ) * \
+ ONE_MILLION) / HZ;
+#endif
+}
+
+static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
+{
+#if (BITS_PER_LONG == 32)
+ __u64 t;
+
+ s->tv_sec = d / HZ;
+ t = (d - s->tv_sec * HZ) * ONE_BILLION;
+ do_div(t, HZ);
+ s->tv_nsec = t;
+#else
+ s->tv_sec = d / HZ;
+ s->tv_nsec = ((d - s->tv_sec * HZ) * ONE_BILLION) / HZ;
+#endif
+}
+
+#define cfs_time_current_64 get_jiffies_64
+
+static inline __u64 cfs_time_add_64(__u64 t, __u64 d)
+{
+ return t + d;
+}
+
+static inline __u64 cfs_time_shift_64(int seconds)
+{
+ return cfs_time_add_64(cfs_time_current_64(),
+ cfs_time_seconds(seconds));
+}
+
+static inline int cfs_time_before_64(__u64 t1, __u64 t2)
+{
+ return (__s64)t2 - (__s64)t1 > 0;
+}
+
+static inline int cfs_time_beforeq_64(__u64 t1, __u64 t2)
+{
+ return (__s64)t2 - (__s64)t1 >= 0;
+}
+
+
+/*
+ * One jiffy
+ */
+#define CFS_TICK (1)
+
+#define CFS_TIME_T "%lu"
+#define CFS_DURATION_T "%ld"
+
+
+#endif /* __LIBCFS_LINUX_LINUX_TIME_H__ */
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-types.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-types.h
new file mode 100644
index 000000000000..142394925567
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-types.h
@@ -0,0 +1,36 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/include/libcfs/user-bitops.h
+ */
+#include <linux/types.h>
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/portals_compat25.h b/drivers/staging/lustre/include/linux/libcfs/linux/portals_compat25.h
new file mode 100644
index 000000000000..132a4bec3575
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/portals_compat25.h
@@ -0,0 +1,114 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LIBCFS_LINUX_PORTALS_COMPAT_H__
+#define __LIBCFS_LINUX_PORTALS_COMPAT_H__
+
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+#if defined(SPINLOCK_DEBUG) && SPINLOCK_DEBUG
+# define SIGNAL_MASK_ASSERT() \
+ LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
+#else
+# define SIGNAL_MASK_ASSERT()
+#endif
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+
+#define SIGNAL_MASK_LOCK(task, flags) \
+ spin_lock_irqsave(&task->sighand->siglock, flags)
+#define SIGNAL_MASK_UNLOCK(task, flags) \
+ spin_unlock_irqrestore(&task->sighand->siglock, flags)
+#define USERMODEHELPER(path, argv, envp) \
+ call_usermodehelper(path, argv, envp, 1)
+#define clear_tsk_thread_flag(current, TIF_SIGPENDING) clear_tsk_thread_flag(current, \
+ TIF_SIGPENDING)
+# define smp_num_cpus num_online_cpus()
+
+#define cfs_wait_event_interruptible(wq, condition, ret) \
+ ret = wait_event_interruptible(wq, condition)
+#define cfs_wait_event_interruptible_exclusive(wq, condition, ret) \
+ ret = wait_event_interruptible_exclusive(wq, condition)
+
+#define THREAD_NAME(comm, len, fmt, a...) \
+ snprintf(comm, len, fmt, ## a)
+
+/* 2.6 alloc_page users can use page->lru */
+#define PAGE_LIST_ENTRY lru
+#define PAGE_LIST(page) ((page)->lru)
+
+#ifndef __user
+#define __user
+#endif
+
+#ifndef __fls
+#define __cfs_fls fls
+#else
+#define __cfs_fls __fls
+#endif
+
+#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos) \
+ proc_dointvec(table, write, buffer, lenp, ppos);
+
+#define ll_proc_dolongvec(table, write, filp, buffer, lenp, ppos) \
+ proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
+#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos) \
+ proc_dostring(table, write, buffer, lenp, ppos);
+#define LL_PROC_PROTO(name) \
+ name(ctl_table_t *table, int write, \
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+#define DECLARE_LL_PROC_PPOS_DECL
+
+/* helper for sysctl handlers */
+int proc_call_handler(void *data, int write,
+ loff_t *ppos, void *buffer, size_t *lenp,
+ int (*handler)(void *data, int write,
+ loff_t pos, void *buffer, int len));
+/*
+ * CPU
+ */
+#ifdef for_each_possible_cpu
+#define cfs_for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
+#elif defined(for_each_cpu)
+#define cfs_for_each_possible_cpu(cpu) for_each_cpu(cpu)
+#endif
+
+#ifdef NR_CPUS
+#else
+#define NR_CPUS 1
+#endif
+
+#define cfs_register_sysctl_table(t, a) register_sysctl_table(t)
+
+#endif /* _PORTALS_COMPAT_H */
diff --git a/drivers/staging/lustre/include/linux/libcfs/lucache.h b/drivers/staging/lustre/include/linux/libcfs/lucache.h
new file mode 100644
index 000000000000..7ae36fc88d77
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/lucache.h
@@ -0,0 +1,162 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LUCACHE_H
+#define _LUCACHE_H
+
+#include <linux/libcfs/libcfs.h>
+
+/** \defgroup ucache ucache
+ *
+ * @{
+ */
+
+#define UC_CACHE_NEW 0x01
+#define UC_CACHE_ACQUIRING 0x02
+#define UC_CACHE_INVALID 0x04
+#define UC_CACHE_EXPIRED 0x08
+
+#define UC_CACHE_IS_NEW(i) ((i)->ue_flags & UC_CACHE_NEW)
+#define UC_CACHE_IS_INVALID(i) ((i)->ue_flags & UC_CACHE_INVALID)
+#define UC_CACHE_IS_ACQUIRING(i) ((i)->ue_flags & UC_CACHE_ACQUIRING)
+#define UC_CACHE_IS_EXPIRED(i) ((i)->ue_flags & UC_CACHE_EXPIRED)
+#define UC_CACHE_IS_VALID(i) ((i)->ue_flags == 0)
+
+#define UC_CACHE_SET_NEW(i) (i)->ue_flags |= UC_CACHE_NEW
+#define UC_CACHE_SET_INVALID(i) (i)->ue_flags |= UC_CACHE_INVALID
+#define UC_CACHE_SET_ACQUIRING(i) (i)->ue_flags |= UC_CACHE_ACQUIRING
+#define UC_CACHE_SET_EXPIRED(i) (i)->ue_flags |= UC_CACHE_EXPIRED
+#define UC_CACHE_SET_VALID(i) (i)->ue_flags = 0
+
+#define UC_CACHE_CLEAR_NEW(i) (i)->ue_flags &= ~UC_CACHE_NEW
+#define UC_CACHE_CLEAR_ACQUIRING(i) (i)->ue_flags &= ~UC_CACHE_ACQUIRING
+#define UC_CACHE_CLEAR_INVALID(i) (i)->ue_flags &= ~UC_CACHE_INVALID
+#define UC_CACHE_CLEAR_EXPIRED(i) (i)->ue_flags &= ~UC_CACHE_EXPIRED
+
+struct upcall_cache_entry;
+
+struct md_perm {
+ lnet_nid_t mp_nid;
+ __u32 mp_perm;
+};
+
+struct md_identity {
+ struct upcall_cache_entry *mi_uc_entry;
+ uid_t mi_uid;
+ gid_t mi_gid;
+ group_info_t *mi_ginfo;
+ int mi_nperms;
+ struct md_perm *mi_perms;
+};
+
+struct upcall_cache_entry {
+ struct list_head ue_hash;
+ __u64 ue_key;
+ atomic_t ue_refcount;
+ int ue_flags;
+ wait_queue_head_t ue_waitq;
+ cfs_time_t ue_acquire_expire;
+ cfs_time_t ue_expire;
+ union {
+ struct md_identity identity;
+ } u;
+};
+
+#define UC_CACHE_HASH_SIZE (128)
+#define UC_CACHE_HASH_INDEX(id) ((id) & (UC_CACHE_HASH_SIZE - 1))
+#define UC_CACHE_UPCALL_MAXPATH (1024UL)
+
+struct upcall_cache;
+
+struct upcall_cache_ops {
+ void (*init_entry)(struct upcall_cache_entry *, void *args);
+ void (*free_entry)(struct upcall_cache *,
+ struct upcall_cache_entry *);
+ int (*upcall_compare)(struct upcall_cache *,
+ struct upcall_cache_entry *,
+ __u64 key, void *args);
+ int (*downcall_compare)(struct upcall_cache *,
+ struct upcall_cache_entry *,
+ __u64 key, void *args);
+ int (*do_upcall)(struct upcall_cache *,
+ struct upcall_cache_entry *);
+ int (*parse_downcall)(struct upcall_cache *,
+ struct upcall_cache_entry *, void *);
+};
+
+struct upcall_cache {
+ struct list_head uc_hashtable[UC_CACHE_HASH_SIZE];
+ spinlock_t uc_lock;
+ rwlock_t uc_upcall_rwlock;
+
+ char uc_name[40]; /* for upcall */
+ char uc_upcall[UC_CACHE_UPCALL_MAXPATH];
+ int uc_acquire_expire; /* seconds */
+ int uc_entry_expire; /* seconds */
+ struct upcall_cache_ops *uc_ops;
+};
+
+struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache,
+ __u64 key, void *args);
+void upcall_cache_put_entry(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry);
+int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key,
+ void *args);
+void upcall_cache_flush_idle(struct upcall_cache *cache);
+void upcall_cache_flush_all(struct upcall_cache *cache);
+void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args);
+struct upcall_cache *upcall_cache_init(const char *name, const char *upcall,
+ struct upcall_cache_ops *ops);
+void upcall_cache_cleanup(struct upcall_cache *cache);
+
+#if 0
+struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
+ __u64 key, __u32 primary,
+ __u32 ngroups, __u32 *groups);
+void upcall_cache_put_entry(struct upcall_cache *hash,
+ struct upcall_cache_entry *entry);
+int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key,
+ __u32 primary, __u32 ngroups, __u32 *groups);
+void upcall_cache_flush_idle(struct upcall_cache *cache);
+void upcall_cache_flush_all(struct upcall_cache *cache);
+struct upcall_cache *upcall_cache_init(const char *name);
+void upcall_cache_cleanup(struct upcall_cache *hash);
+
+#endif
+
+/** @} ucache */
+
+#endif /* _LUCACHE_H */
diff --git a/drivers/staging/lustre/include/linux/libcfs/params_tree.h b/drivers/staging/lustre/include/linux/libcfs/params_tree.h
new file mode 100644
index 000000000000..3f18a4467037
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/libcfs/params_tree.h
@@ -0,0 +1,166 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * API and structure definitions for params_tree.
+ *
+ * Author: LiuYing <emoly.liu@oracle.com>
+ */
+#ifndef __PARAMS_TREE_H__
+#define __PARAMS_TREE_H__
+
+#include <linux/libcfs/libcfs.h>
+
+#undef LPROCFS
+#if defined(CONFIG_PROC_FS)
+# define LPROCFS
+#endif
+
+#ifdef LPROCFS
+typedef struct file cfs_param_file_t;
+typedef struct inode cfs_inode_t;
+typedef struct proc_inode cfs_proc_inode_t;
+typedef struct seq_file cfs_seq_file_t;
+typedef struct seq_operations cfs_seq_ops_t;
+typedef struct file_operations cfs_param_file_ops_t;
+typedef module_t *cfs_param_module_t;
+typedef struct proc_dir_entry cfs_param_dentry_t;
+typedef struct poll_table_struct cfs_poll_table_t;
+#define CFS_PARAM_MODULE THIS_MODULE
+#define cfs_file_private(file) (file->private_data)
+#define cfs_dentry_data(dentry) (dentry->data)
+#define cfs_proc_inode_pde(proc_inode) (proc_inode->pde)
+#define cfs_proc_inode(proc_inode) (proc_inode->vfs_inode)
+#define cfs_seq_read_common seq_read
+#define cfs_seq_lseek_common seq_lseek
+#define cfs_seq_private(seq) (seq->private)
+#define cfs_seq_printf(seq, format, ...) seq_printf(seq, format, \
+ ## __VA_ARGS__)
+#define cfs_seq_release(inode, file) seq_release(inode, file)
+#define cfs_seq_puts(seq, s) seq_puts(seq, s)
+#define cfs_seq_putc(seq, s) seq_putc(seq, s)
+#define cfs_seq_read(file, buf, count, ppos, rc) (rc = seq_read(file, buf, \
+ count, ppos))
+#define cfs_seq_open(file, ops, rc) (rc = seq_open(file, ops))
+
+#else /* !LPROCFS */
+
+typedef struct cfs_params_file {
+ void *param_private;
+ loff_t param_pos;
+ unsigned int param_flags;
+} cfs_param_file_t;
+
+typedef struct cfs_param_inode {
+ void *param_private;
+} cfs_inode_t;
+
+typedef struct cfs_param_dentry {
+ void *param_data;
+} cfs_param_dentry_t;
+
+typedef struct cfs_proc_inode {
+ cfs_param_dentry_t *param_pde;
+ cfs_inode_t param_inode;
+} cfs_proc_inode_t;
+
+struct cfs_seq_operations;
+typedef struct cfs_seq_file {
+ char *buf;
+ size_t size;
+ size_t from;
+ size_t count;
+ loff_t index;
+ loff_t version;
+ struct mutex lock;
+ struct cfs_seq_operations *op;
+ void *private;
+} cfs_seq_file_t;
+
+typedef struct cfs_seq_operations {
+ void *(*start) (cfs_seq_file_t *m, loff_t *pos);
+ void (*stop) (cfs_seq_file_t *m, void *v);
+ void *(*next) (cfs_seq_file_t *m, void *v, loff_t *pos);
+ int (*show) (cfs_seq_file_t *m, void *v);
+} cfs_seq_ops_t;
+
+typedef void *cfs_param_module_t;
+typedef void *cfs_poll_table_t;
+
+typedef struct cfs_param_file_ops {
+ cfs_param_module_t owner;
+ int (*open) (cfs_inode_t *, struct file *);
+ loff_t (*llseek)(struct file *, loff_t, int);
+ int (*release) (cfs_inode_t *, cfs_param_file_t *);
+ unsigned int (*poll) (struct file *, cfs_poll_table_t *);
+ ssize_t (*write) (struct file *, const char *, size_t, loff_t *);
+ ssize_t (*read)(struct file *, char *, size_t, loff_t *);
+} cfs_param_file_ops_t;
+typedef cfs_param_file_ops_t *cfs_lproc_filep_t;
+
+static inline cfs_proc_inode_t *FAKE_PROC_I(const cfs_inode_t *inode)
+{
+ return container_of(inode, cfs_proc_inode_t, param_inode);
+}
+
+#define CFS_PARAM_MODULE NULL
+#define cfs_file_private(file) (file->param_private)
+#define cfs_dentry_data(dentry) (dentry->param_data)
+#define cfs_proc_inode(proc_inode) (proc_inode->param_inode)
+#define cfs_proc_inode_pde(proc_inode) (proc_inode->param_pde)
+#define cfs_seq_read_common NULL
+#define cfs_seq_lseek_common NULL
+#define cfs_seq_private(seq) (seq->private)
+#define cfs_seq_read(file, buf, count, ppos, rc) do {} while(0)
+#define cfs_seq_open(file, ops, rc) \
+do { \
+ cfs_seq_file_t *p = cfs_file_private(file); \
+ if (!p) { \
+ LIBCFS_ALLOC(p, sizeof(*p)); \
+ if (!p) { \
+ rc = -ENOMEM; \
+ break; \
+ } \
+ cfs_file_private(file) = p; \
+ } \
+ memset(p, 0, sizeof(*p)); \
+ p->op = ops; \
+ rc = 0; \
+} while(0)
+
+#endif /* LPROCFS */
+
+/* XXX: params_tree APIs */
+
+#endif /* __PARAMS_TREE_H__ */
diff --git a/drivers/staging/lustre/include/linux/lnet/api-support.h b/drivers/staging/lustre/include/linux/lnet/api-support.h
new file mode 100644
index 000000000000..a8d91dbe6060
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/api-support.h
@@ -0,0 +1,44 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LNET_API_SUPPORT_H__
+#define __LNET_API_SUPPORT_H__
+
+#include <linux/lnet/linux/api-support.h>
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/types.h>
+#include <linux/lnet/lnet.h>
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/api.h b/drivers/staging/lustre/include/linux/lnet/api.h
new file mode 100644
index 000000000000..e8642e33860d
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/api.h
@@ -0,0 +1,220 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LNET_API_H__
+#define __LNET_API_H__
+
+/** \defgroup lnet LNet
+ *
+ * The Lustre Networking subsystem.
+ *
+ * LNet is an asynchronous message-passing API, which provides an unreliable
+ * connectionless service that can't guarantee any order. It supports OFA IB,
+ * TCP/IP, and Cray Portals, and routes between heterogeneous networks.
+ *
+ * LNet can run both in OS kernel space and in userspace as a library.
+ * @{
+ */
+
+#include <linux/lnet/types.h>
+
+/** \defgroup lnet_init_fini Initialization and cleanup
+ * The LNet must be properly initialized before any LNet calls can be made.
+ * @{ */
+int LNetInit(void);
+void LNetFini(void);
+
+int LNetNIInit(lnet_pid_t requested_pid);
+int LNetNIFini(void);
+/** @} lnet_init_fini */
+
+/** \defgroup lnet_addr LNet addressing and basic types
+ *
+ * Addressing scheme and basic data types of LNet.
+ *
+ * The LNet API is memory-oriented, so LNet must be able to address not only
+ * end-points but also memory region within a process address space.
+ * An ::lnet_nid_t addresses an end-point. An ::lnet_pid_t identifies a process
+ * in a node. A portal represents an opening in the address space of a
+ * process. Match bits is criteria to identify a region of memory inside a
+ * portal, and offset specifies an offset within the memory region.
+ *
+ * LNet creates a table of portals for each process during initialization.
+ * This table has MAX_PORTALS entries and its size can't be dynamically
+ * changed. A portal stays empty until the owning process starts to add
+ * memory regions to it. A portal is sometimes called an index because
+ * it's an entry in the portals table of a process.
+ *
+ * \see LNetMEAttach
+ * @{ */
+int LNetGetId(unsigned int index, lnet_process_id_t *id);
+int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, __u32 *order);
+void LNetSnprintHandle(char *str, int str_len, lnet_handle_any_t handle);
+
+/** @} lnet_addr */
+
+
+/** \defgroup lnet_me Match entries
+ *
+ * A match entry (abbreviated as ME) describes a set of criteria to accept
+ * incoming requests.
+ *
+ * A portal is essentially a match list plus a set of attributes. A match
+ * list is a chain of MEs. Each ME includes a pointer to a memory descriptor
+ * and a set of match criteria. The match criteria can be used to reject
+ * incoming requests based on process ID or the match bits provided in the
+ * request. MEs can be dynamically inserted into a match list by LNetMEAttach()
+ * and LNetMEInsert(), and removed from its list by LNetMEUnlink().
+ * @{ */
+int LNetMEAttach(unsigned int portal,
+ lnet_process_id_t match_id_in,
+ __u64 match_bits_in,
+ __u64 ignore_bits_in,
+ lnet_unlink_t unlink_in,
+ lnet_ins_pos_t pos_in,
+ lnet_handle_me_t *handle_out);
+
+int LNetMEInsert(lnet_handle_me_t current_in,
+ lnet_process_id_t match_id_in,
+ __u64 match_bits_in,
+ __u64 ignore_bits_in,
+ lnet_unlink_t unlink_in,
+ lnet_ins_pos_t position_in,
+ lnet_handle_me_t *handle_out);
+
+int LNetMEUnlink(lnet_handle_me_t current_in);
+/** @} lnet_me */
+
+/** \defgroup lnet_md Memory descriptors
+ *
+ * A memory descriptor contains information about a region of a user's
+ * memory (either in kernel or user space) and optionally points to an
+ * event queue where information about the operations performed on the
+ * memory descriptor are recorded. Memory descriptor is abbreviated as
+ * MD and can be used interchangeably with the memory region it describes.
+ *
+ * The LNet API provides two operations to create MDs: LNetMDAttach()
+ * and LNetMDBind(); one operation to unlink and release the resources
+ * associated with a MD: LNetMDUnlink().
+ * @{ */
+int LNetMDAttach(lnet_handle_me_t current_in,
+ lnet_md_t md_in,
+ lnet_unlink_t unlink_in,
+ lnet_handle_md_t *handle_out);
+
+int LNetMDBind(lnet_md_t md_in,
+ lnet_unlink_t unlink_in,
+ lnet_handle_md_t *handle_out);
+
+int LNetMDUnlink(lnet_handle_md_t md_in);
+/** @} lnet_md */
+
+/** \defgroup lnet_eq Events and event queues
+ *
+ * Event queues (abbreviated as EQ) are used to log operations performed on
+ * local MDs. In particular, they signal the completion of a data transmission
+ * into or out of a MD. They can also be used to hold acknowledgments for
+ * completed PUT operations and indicate when a MD has been unlinked. Multiple
+ * MDs can share a single EQ. An EQ may have an optional event handler
+ * associated with it. If an event handler exists, it will be run for each
+ * event that is deposited into the EQ.
+ *
+ * In addition to the lnet_handle_eq_t, the LNet API defines two types
+ * associated with events: The ::lnet_event_kind_t defines the kinds of events
+ * that can be stored in an EQ. The lnet_event_t defines a structure that
+ * holds the information about with an event.
+ *
+ * There are five functions for dealing with EQs: LNetEQAlloc() is used to
+ * create an EQ and allocate the resources needed, while LNetEQFree()
+ * releases these resources and free the EQ. LNetEQGet() retrieves the next
+ * event from an EQ, and LNetEQWait() can be used to block a process until
+ * an EQ has at least one event. LNetEQPoll() can be used to test or wait
+ * on multiple EQs.
+ * @{ */
+int LNetEQAlloc(unsigned int count_in,
+ lnet_eq_handler_t handler,
+ lnet_handle_eq_t *handle_out);
+
+int LNetEQFree(lnet_handle_eq_t eventq_in);
+
+int LNetEQGet(lnet_handle_eq_t eventq_in,
+ lnet_event_t *event_out);
+
+
+int LNetEQWait(lnet_handle_eq_t eventq_in,
+ lnet_event_t *event_out);
+
+int LNetEQPoll(lnet_handle_eq_t *eventqs_in,
+ int neq_in,
+ int timeout_ms,
+ lnet_event_t *event_out,
+ int *which_eq_out);
+/** @} lnet_eq */
+
+/** \defgroup lnet_data Data movement operations
+ *
+ * The LNet API provides two data movement operations: LNetPut()
+ * and LNetGet().
+ * @{ */
+int LNetPut(lnet_nid_t self,
+ lnet_handle_md_t md_in,
+ lnet_ack_req_t ack_req_in,
+ lnet_process_id_t target_in,
+ unsigned int portal_in,
+ __u64 match_bits_in,
+ unsigned int offset_in,
+ __u64 hdr_data_in);
+
+int LNetGet(lnet_nid_t self,
+ lnet_handle_md_t md_in,
+ lnet_process_id_t target_in,
+ unsigned int portal_in,
+ __u64 match_bits_in,
+ unsigned int offset_in);
+/** @} lnet_data */
+
+
+/** \defgroup lnet_misc Miscellaneous operations.
+ * Miscellaneous operations.
+ * @{ */
+
+int LNetSetLazyPortal(int portal);
+int LNetClearLazyPortal(int portal);
+int LNetCtl(unsigned int cmd, void *arg);
+int LNetSetAsync(lnet_process_id_t id, int nasync);
+
+/** @} lnet_misc */
+
+/** @} lnet */
+#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
new file mode 100644
index 000000000000..59bff0bea816
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -0,0 +1,874 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/include/lnet/lib-lnet.h
+ *
+ * Top level include for library side routines
+ */
+
+#ifndef __LNET_LIB_LNET_H__
+#define __LNET_LIB_LNET_H__
+
+#include <linux/lnet/linux/lib-lnet.h>
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/types.h>
+#include <linux/lnet/lnet.h>
+#include <linux/lnet/lib-types.h>
+
+extern lnet_t the_lnet; /* THE network */
+
+#if defined(LNET_USE_LIB_FREELIST)
+/* 1 CPT, simplify implementation... */
+# define LNET_CPT_MAX_BITS 0
+
+#else /* KERNEL and no freelist */
+
+# if (BITS_PER_LONG == 32)
+/* 2 CPTs, allowing more CPTs might make us under memory pressure */
+# define LNET_CPT_MAX_BITS 1
+
+# else /* 64-bit system */
+/*
+ * 256 CPTs for thousands of CPUs, allowing more CPTs might make us
+ * under risk of consuming all lh_cookie.
+ */
+# define LNET_CPT_MAX_BITS 8
+# endif /* BITS_PER_LONG == 32 */
+#endif
+
+/* max allowed CPT number */
+#define LNET_CPT_MAX (1 << LNET_CPT_MAX_BITS)
+
+#define LNET_CPT_NUMBER (the_lnet.ln_cpt_number)
+#define LNET_CPT_BITS (the_lnet.ln_cpt_bits)
+#define LNET_CPT_MASK ((1ULL << LNET_CPT_BITS) - 1)
+
+/** exclusive lock */
+#define LNET_LOCK_EX CFS_PERCPT_LOCK_EX
+
+static inline int lnet_is_wire_handle_none (lnet_handle_wire_t *wh)
+{
+ return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_COOKIE_NONE &&
+ wh->wh_object_cookie == LNET_WIRE_HANDLE_COOKIE_NONE);
+}
+
+static inline int lnet_md_exhausted (lnet_libmd_t *md)
+{
+ return (md->md_threshold == 0 ||
+ ((md->md_options & LNET_MD_MAX_SIZE) != 0 &&
+ md->md_offset + md->md_max_size > md->md_length));
+}
+
+static inline int lnet_md_unlinkable (lnet_libmd_t *md)
+{
+ /* Should unlink md when its refcount is 0 and either:
+ * - md has been flagged for deletion (by auto unlink or LNetM[DE]Unlink,
+ * in the latter case md may not be exhausted).
+ * - auto unlink is on and md is exhausted.
+ */
+ if (md->md_refcount != 0)
+ return 0;
+
+ if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) != 0)
+ return 1;
+
+ return ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0 &&
+ lnet_md_exhausted(md));
+}
+
+#define lnet_cpt_table() (the_lnet.ln_cpt_table)
+#define lnet_cpt_current() cfs_cpt_current(the_lnet.ln_cpt_table, 1)
+
+static inline int
+lnet_cpt_of_cookie(__u64 cookie)
+{
+ unsigned int cpt = (cookie >> LNET_COOKIE_TYPE_BITS) & LNET_CPT_MASK;
+
+ /* LNET_CPT_NUMBER doesn't have to be power2, which means we can
+ * get illegal cpt from it's invalid cookie */
+ return cpt < LNET_CPT_NUMBER ? cpt : cpt % LNET_CPT_NUMBER;
+}
+
+static inline void
+lnet_res_lock(int cpt)
+{
+ cfs_percpt_lock(the_lnet.ln_res_lock, cpt);
+}
+
+static inline void
+lnet_res_unlock(int cpt)
+{
+ cfs_percpt_unlock(the_lnet.ln_res_lock, cpt);
+}
+
+static inline int
+lnet_res_lock_current(void)
+{
+ int cpt = lnet_cpt_current();
+
+ lnet_res_lock(cpt);
+ return cpt;
+}
+
+static inline void
+lnet_net_lock(int cpt)
+{
+ cfs_percpt_lock(the_lnet.ln_net_lock, cpt);
+}
+
+static inline void
+lnet_net_unlock(int cpt)
+{
+ cfs_percpt_unlock(the_lnet.ln_net_lock, cpt);
+}
+
+static inline int
+lnet_net_lock_current(void)
+{
+ int cpt = lnet_cpt_current();
+
+ lnet_net_lock(cpt);
+ return cpt;
+}
+
+#define LNET_LOCK() lnet_net_lock(LNET_LOCK_EX)
+#define LNET_UNLOCK() lnet_net_unlock(LNET_LOCK_EX)
+
+
+#define lnet_ptl_lock(ptl) spin_lock(&(ptl)->ptl_lock)
+#define lnet_ptl_unlock(ptl) spin_unlock(&(ptl)->ptl_lock)
+#define lnet_eq_wait_lock() spin_lock(&the_lnet.ln_eq_wait_lock)
+#define lnet_eq_wait_unlock() spin_unlock(&the_lnet.ln_eq_wait_lock)
+#define lnet_ni_lock(ni) spin_lock(&(ni)->ni_lock)
+#define lnet_ni_unlock(ni) spin_unlock(&(ni)->ni_lock)
+#define LNET_MUTEX_LOCK(m) mutex_lock(m)
+#define LNET_MUTEX_UNLOCK(m) mutex_unlock(m)
+
+
+#define MAX_PORTALS 64
+
+/* these are only used by code with LNET_USE_LIB_FREELIST, but we still
+ * exported them to !LNET_USE_LIB_FREELIST for easy implemetation */
+#define LNET_FL_MAX_MES 2048
+#define LNET_FL_MAX_MDS 2048
+#define LNET_FL_MAX_EQS 512
+#define LNET_FL_MAX_MSGS 2048 /* Outstanding messages */
+
+#ifdef LNET_USE_LIB_FREELIST
+
+int lnet_freelist_init(lnet_freelist_t *fl, int n, int size);
+void lnet_freelist_fini(lnet_freelist_t *fl);
+
+static inline void *
+lnet_freelist_alloc (lnet_freelist_t *fl)
+{
+ /* ALWAYS called with liblock held */
+ lnet_freeobj_t *o;
+
+ if (list_empty (&fl->fl_list))
+ return (NULL);
+
+ o = list_entry (fl->fl_list.next, lnet_freeobj_t, fo_list);
+ list_del (&o->fo_list);
+ return ((void *)&o->fo_contents);
+}
+
+static inline void
+lnet_freelist_free (lnet_freelist_t *fl, void *obj)
+{
+ /* ALWAYS called with liblock held */
+ lnet_freeobj_t *o = list_entry (obj, lnet_freeobj_t, fo_contents);
+
+ list_add (&o->fo_list, &fl->fl_list);
+}
+
+
+static inline lnet_eq_t *
+lnet_eq_alloc (void)
+{
+ /* NEVER called with resource lock held */
+ struct lnet_res_container *rec = &the_lnet.ln_eq_container;
+ lnet_eq_t *eq;
+
+ LASSERT(LNET_CPT_NUMBER == 1);
+
+ lnet_res_lock(0);
+ eq = (lnet_eq_t *)lnet_freelist_alloc(&rec->rec_freelist);
+ lnet_res_unlock(0);
+
+ return eq;
+}
+
+static inline void
+lnet_eq_free_locked(lnet_eq_t *eq)
+{
+ /* ALWAYS called with resource lock held */
+ struct lnet_res_container *rec = &the_lnet.ln_eq_container;
+
+ LASSERT(LNET_CPT_NUMBER == 1);
+ lnet_freelist_free(&rec->rec_freelist, eq);
+}
+
+static inline void
+lnet_eq_free(lnet_eq_t *eq)
+{
+ lnet_res_lock(0);
+ lnet_eq_free_locked(eq);
+ lnet_res_unlock(0);
+}
+
+static inline lnet_libmd_t *
+lnet_md_alloc (lnet_md_t *umd)
+{
+ /* NEVER called with resource lock held */
+ struct lnet_res_container *rec = the_lnet.ln_md_containers[0];
+ lnet_libmd_t *md;
+
+ LASSERT(LNET_CPT_NUMBER == 1);
+
+ lnet_res_lock(0);
+ md = (lnet_libmd_t *)lnet_freelist_alloc(&rec->rec_freelist);
+ lnet_res_unlock(0);
+
+ if (md != NULL)
+ INIT_LIST_HEAD(&md->md_list);
+
+ return md;
+}
+
+static inline void
+lnet_md_free_locked(lnet_libmd_t *md)
+{
+ /* ALWAYS called with resource lock held */
+ struct lnet_res_container *rec = the_lnet.ln_md_containers[0];
+
+ LASSERT(LNET_CPT_NUMBER == 1);
+ lnet_freelist_free(&rec->rec_freelist, md);
+}
+
+static inline void
+lnet_md_free(lnet_libmd_t *md)
+{
+ lnet_res_lock(0);
+ lnet_md_free_locked(md);
+ lnet_res_unlock(0);
+}
+
+static inline lnet_me_t *
+lnet_me_alloc(void)
+{
+ /* NEVER called with resource lock held */
+ struct lnet_res_container *rec = the_lnet.ln_me_containers[0];
+ lnet_me_t *me;
+
+ LASSERT(LNET_CPT_NUMBER == 1);
+
+ lnet_res_lock(0);
+ me = (lnet_me_t *)lnet_freelist_alloc(&rec->rec_freelist);
+ lnet_res_unlock(0);
+
+ return me;
+}
+
+static inline void
+lnet_me_free_locked(lnet_me_t *me)
+{
+ /* ALWAYS called with resource lock held */
+ struct lnet_res_container *rec = the_lnet.ln_me_containers[0];
+
+ LASSERT(LNET_CPT_NUMBER == 1);
+ lnet_freelist_free(&rec->rec_freelist, me);
+}
+
+static inline void
+lnet_me_free(lnet_me_t *me)
+{
+ lnet_res_lock(0);
+ lnet_me_free_locked(me);
+ lnet_res_unlock(0);
+}
+
+static inline lnet_msg_t *
+lnet_msg_alloc (void)
+{
+ /* NEVER called with network lock held */
+ struct lnet_msg_container *msc = the_lnet.ln_msg_containers[0];
+ lnet_msg_t *msg;
+
+ LASSERT(LNET_CPT_NUMBER == 1);
+
+ lnet_net_lock(0);
+ msg = (lnet_msg_t *)lnet_freelist_alloc(&msc->msc_freelist);
+ lnet_net_unlock(0);
+
+ if (msg != NULL) {
+ /* NULL pointers, clear flags etc */
+ memset(msg, 0, sizeof(*msg));
+ }
+ return msg;
+}
+
+static inline void
+lnet_msg_free_locked(lnet_msg_t *msg)
+{
+ /* ALWAYS called with network lock held */
+ struct lnet_msg_container *msc = the_lnet.ln_msg_containers[0];
+
+ LASSERT(LNET_CPT_NUMBER == 1);
+ LASSERT(!msg->msg_onactivelist);
+ lnet_freelist_free(&msc->msc_freelist, msg);
+}
+
+static inline void
+lnet_msg_free (lnet_msg_t *msg)
+{
+ lnet_net_lock(0);
+ lnet_msg_free_locked(msg);
+ lnet_net_unlock(0);
+}
+
+#else /* !LNET_USE_LIB_FREELIST */
+
+static inline lnet_eq_t *
+lnet_eq_alloc (void)
+{
+ /* NEVER called with liblock held */
+ lnet_eq_t *eq;
+
+ LIBCFS_ALLOC(eq, sizeof(*eq));
+ return (eq);
+}
+
+static inline void
+lnet_eq_free(lnet_eq_t *eq)
+{
+ /* ALWAYS called with resource lock held */
+ LIBCFS_FREE(eq, sizeof(*eq));
+}
+
+static inline lnet_libmd_t *
+lnet_md_alloc (lnet_md_t *umd)
+{
+ /* NEVER called with liblock held */
+ lnet_libmd_t *md;
+ unsigned int size;
+ unsigned int niov;
+
+ if ((umd->options & LNET_MD_KIOV) != 0) {
+ niov = umd->length;
+ size = offsetof(lnet_libmd_t, md_iov.kiov[niov]);
+ } else {
+ niov = ((umd->options & LNET_MD_IOVEC) != 0) ?
+ umd->length : 1;
+ size = offsetof(lnet_libmd_t, md_iov.iov[niov]);
+ }
+
+ LIBCFS_ALLOC(md, size);
+
+ if (md != NULL) {
+ /* Set here in case of early free */
+ md->md_options = umd->options;
+ md->md_niov = niov;
+ INIT_LIST_HEAD(&md->md_list);
+ }
+
+ return (md);
+}
+
+static inline void
+lnet_md_free(lnet_libmd_t *md)
+{
+ /* ALWAYS called with resource lock held */
+ unsigned int size;
+
+ if ((md->md_options & LNET_MD_KIOV) != 0)
+ size = offsetof(lnet_libmd_t, md_iov.kiov[md->md_niov]);
+ else
+ size = offsetof(lnet_libmd_t, md_iov.iov[md->md_niov]);
+
+ LIBCFS_FREE(md, size);
+}
+
+static inline lnet_me_t *
+lnet_me_alloc (void)
+{
+ /* NEVER called with liblock held */
+ lnet_me_t *me;
+
+ LIBCFS_ALLOC(me, sizeof(*me));
+ return (me);
+}
+
+static inline void
+lnet_me_free(lnet_me_t *me)
+{
+ /* ALWAYS called with resource lock held */
+ LIBCFS_FREE(me, sizeof(*me));
+}
+
+static inline lnet_msg_t *
+lnet_msg_alloc(void)
+{
+ /* NEVER called with liblock held */
+ lnet_msg_t *msg;
+
+ LIBCFS_ALLOC(msg, sizeof(*msg));
+
+ /* no need to zero, LIBCFS_ALLOC does for us */
+ return (msg);
+}
+
+static inline void
+lnet_msg_free(lnet_msg_t *msg)
+{
+ /* ALWAYS called with network lock held */
+ LASSERT(!msg->msg_onactivelist);
+ LIBCFS_FREE(msg, sizeof(*msg));
+}
+
+#define lnet_eq_free_locked(eq) lnet_eq_free(eq)
+#define lnet_md_free_locked(md) lnet_md_free(md)
+#define lnet_me_free_locked(me) lnet_me_free(me)
+#define lnet_msg_free_locked(msg) lnet_msg_free(msg)
+
+#endif /* LNET_USE_LIB_FREELIST */
+
+lnet_libhandle_t *lnet_res_lh_lookup(struct lnet_res_container *rec,
+ __u64 cookie);
+void lnet_res_lh_initialize(struct lnet_res_container *rec,
+ lnet_libhandle_t *lh);
+static inline void
+lnet_res_lh_invalidate(lnet_libhandle_t *lh)
+{
+ /* ALWAYS called with resource lock held */
+ /* NB: cookie is still useful, don't reset it */
+ list_del(&lh->lh_hash_chain);
+}
+
+static inline void
+lnet_eq2handle (lnet_handle_eq_t *handle, lnet_eq_t *eq)
+{
+ if (eq == NULL) {
+ LNetInvalidateHandle(handle);
+ return;
+ }
+
+ handle->cookie = eq->eq_lh.lh_cookie;
+}
+
+static inline lnet_eq_t *
+lnet_handle2eq(lnet_handle_eq_t *handle)
+{
+ /* ALWAYS called with resource lock held */
+ lnet_libhandle_t *lh;
+
+ lh = lnet_res_lh_lookup(&the_lnet.ln_eq_container, handle->cookie);
+ if (lh == NULL)
+ return NULL;
+
+ return lh_entry(lh, lnet_eq_t, eq_lh);
+}
+
+static inline void
+lnet_md2handle (lnet_handle_md_t *handle, lnet_libmd_t *md)
+{
+ handle->cookie = md->md_lh.lh_cookie;
+}
+
+static inline lnet_libmd_t *
+lnet_handle2md(lnet_handle_md_t *handle)
+{
+ /* ALWAYS called with resource lock held */
+ lnet_libhandle_t *lh;
+ int cpt;
+
+ cpt = lnet_cpt_of_cookie(handle->cookie);
+ lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt],
+ handle->cookie);
+ if (lh == NULL)
+ return NULL;
+
+ return lh_entry(lh, lnet_libmd_t, md_lh);
+}
+
+static inline lnet_libmd_t *
+lnet_wire_handle2md(lnet_handle_wire_t *wh)
+{
+ /* ALWAYS called with resource lock held */
+ lnet_libhandle_t *lh;
+ int cpt;
+
+ if (wh->wh_interface_cookie != the_lnet.ln_interface_cookie)
+ return NULL;
+
+ cpt = lnet_cpt_of_cookie(wh->wh_object_cookie);
+ lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt],
+ wh->wh_object_cookie);
+ if (lh == NULL)
+ return NULL;
+
+ return lh_entry(lh, lnet_libmd_t, md_lh);
+}
+
+static inline void
+lnet_me2handle (lnet_handle_me_t *handle, lnet_me_t *me)
+{
+ handle->cookie = me->me_lh.lh_cookie;
+}
+
+static inline lnet_me_t *
+lnet_handle2me(lnet_handle_me_t *handle)
+{
+ /* ALWAYS called with resource lock held */
+ lnet_libhandle_t *lh;
+ int cpt;
+
+ cpt = lnet_cpt_of_cookie(handle->cookie);
+ lh = lnet_res_lh_lookup(the_lnet.ln_me_containers[cpt],
+ handle->cookie);
+ if (lh == NULL)
+ return NULL;
+
+ return lh_entry(lh, lnet_me_t, me_lh);
+}
+
+static inline void
+lnet_peer_addref_locked(lnet_peer_t *lp)
+{
+ LASSERT (lp->lp_refcount > 0);
+ lp->lp_refcount++;
+}
+
+extern void lnet_destroy_peer_locked(lnet_peer_t *lp);
+
+static inline void
+lnet_peer_decref_locked(lnet_peer_t *lp)
+{
+ LASSERT (lp->lp_refcount > 0);
+ lp->lp_refcount--;
+ if (lp->lp_refcount == 0)
+ lnet_destroy_peer_locked(lp);
+}
+
+static inline int
+lnet_isrouter(lnet_peer_t *lp)
+{
+ return lp->lp_rtr_refcount != 0;
+}
+
+static inline void
+lnet_ni_addref_locked(lnet_ni_t *ni, int cpt)
+{
+ LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER);
+ LASSERT(*ni->ni_refs[cpt] >= 0);
+
+ (*ni->ni_refs[cpt])++;
+}
+
+static inline void
+lnet_ni_addref(lnet_ni_t *ni)
+{
+ lnet_net_lock(0);
+ lnet_ni_addref_locked(ni, 0);
+ lnet_net_unlock(0);
+}
+
+static inline void
+lnet_ni_decref_locked(lnet_ni_t *ni, int cpt)
+{
+ LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER);
+ LASSERT(*ni->ni_refs[cpt] > 0);
+
+ (*ni->ni_refs[cpt])--;
+}
+
+static inline void
+lnet_ni_decref(lnet_ni_t *ni)
+{
+ lnet_net_lock(0);
+ lnet_ni_decref_locked(ni, 0);
+ lnet_net_unlock(0);
+}
+
+void lnet_ni_free(lnet_ni_t *ni);
+
+static inline int
+lnet_nid2peerhash(lnet_nid_t nid)
+{
+ return cfs_hash_long(nid, LNET_PEER_HASH_BITS);
+}
+
+static inline struct list_head *
+lnet_net2rnethash(__u32 net)
+{
+ return &the_lnet.ln_remote_nets_hash[(LNET_NETNUM(net) +
+ LNET_NETTYP(net)) &
+ ((1U << the_lnet.ln_remote_nets_hbits) - 1)];
+}
+
+extern lnd_t the_lolnd;
+
+
+extern int lnet_cpt_of_nid_locked(lnet_nid_t nid);
+extern int lnet_cpt_of_nid(lnet_nid_t nid);
+extern lnet_ni_t *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
+extern lnet_ni_t *lnet_net2ni_locked(__u32 net, int cpt);
+extern lnet_ni_t *lnet_net2ni(__u32 net);
+
+int lnet_notify(lnet_ni_t *ni, lnet_nid_t peer, int alive, cfs_time_t when);
+void lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, cfs_time_t when);
+int lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway_nid);
+int lnet_check_routes(void);
+int lnet_del_route(__u32 net, lnet_nid_t gw_nid);
+void lnet_destroy_routes(void);
+int lnet_get_route(int idx, __u32 *net, __u32 *hops,
+ lnet_nid_t *gateway, __u32 *alive);
+void lnet_proc_init(void);
+void lnet_proc_fini(void);
+int lnet_rtrpools_alloc(int im_a_router);
+void lnet_rtrpools_free(void);
+lnet_remotenet_t *lnet_find_net_locked (__u32 net);
+
+int lnet_islocalnid(lnet_nid_t nid);
+int lnet_islocalnet(__u32 net);
+
+void lnet_msg_attach_md(lnet_msg_t *msg, lnet_libmd_t *md,
+ unsigned int offset, unsigned int mlen);
+void lnet_msg_detach_md(lnet_msg_t *msg, int status);
+void lnet_build_unlink_event(lnet_libmd_t *md, lnet_event_t *ev);
+void lnet_build_msg_event(lnet_msg_t *msg, lnet_event_kind_t ev_type);
+void lnet_msg_commit(lnet_msg_t *msg, int cpt);
+void lnet_msg_decommit(lnet_msg_t *msg, int cpt, int status);
+
+void lnet_eq_enqueue_event(lnet_eq_t *eq, lnet_event_t *ev);
+void lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target,
+ unsigned int offset, unsigned int len);
+int lnet_send(lnet_nid_t nid, lnet_msg_t *msg, lnet_nid_t rtr_nid);
+void lnet_return_tx_credits_locked(lnet_msg_t *msg);
+void lnet_return_rx_credits_locked(lnet_msg_t *msg);
+
+/* portals functions */
+/* portals attributes */
+static inline int
+lnet_ptl_is_lazy(lnet_portal_t *ptl)
+{
+ return !!(ptl->ptl_options & LNET_PTL_LAZY);
+}
+
+static inline int
+lnet_ptl_is_unique(lnet_portal_t *ptl)
+{
+ return !!(ptl->ptl_options & LNET_PTL_MATCH_UNIQUE);
+}
+
+static inline int
+lnet_ptl_is_wildcard(lnet_portal_t *ptl)
+{
+ return !!(ptl->ptl_options & LNET_PTL_MATCH_WILDCARD);
+}
+
+static inline void
+lnet_ptl_setopt(lnet_portal_t *ptl, int opt)
+{
+ ptl->ptl_options |= opt;
+}
+
+static inline void
+lnet_ptl_unsetopt(lnet_portal_t *ptl, int opt)
+{
+ ptl->ptl_options &= ~opt;
+}
+
+/* match-table functions */
+struct list_head *lnet_mt_match_head(struct lnet_match_table *mtable,
+ lnet_process_id_t id, __u64 mbits);
+struct lnet_match_table *lnet_mt_of_attach(unsigned int index,
+ lnet_process_id_t id, __u64 mbits,
+ __u64 ignore_bits,
+ lnet_ins_pos_t pos);
+int lnet_mt_match_md(struct lnet_match_table *mtable,
+ struct lnet_match_info *info, struct lnet_msg *msg);
+
+/* portals match/attach functions */
+void lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
+ struct list_head *matches, struct list_head *drops);
+void lnet_ptl_detach_md(lnet_me_t *me, lnet_libmd_t *md);
+int lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg);
+
+/* initialized and finalize portals */
+int lnet_portals_create(void);
+void lnet_portals_destroy(void);
+
+/* message functions */
+int lnet_parse (lnet_ni_t *ni, lnet_hdr_t *hdr,
+ lnet_nid_t fromnid, void *private, int rdma_req);
+void lnet_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
+ unsigned int offset, unsigned int mlen, unsigned int rlen);
+lnet_msg_t *lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *get_msg);
+void lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *msg, unsigned int len);
+void lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int rc);
+void lnet_drop_delayed_msg_list(struct list_head *head, char *reason);
+void lnet_recv_delayed_msg_list(struct list_head *head);
+
+int lnet_msg_container_setup(struct lnet_msg_container *container, int cpt);
+void lnet_msg_container_cleanup(struct lnet_msg_container *container);
+void lnet_msg_containers_destroy(void);
+int lnet_msg_containers_create(void);
+
+char *lnet_msgtyp2str (int type);
+void lnet_print_hdr (lnet_hdr_t * hdr);
+int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold);
+
+void lnet_counters_get(lnet_counters_t *counters);
+void lnet_counters_reset(void);
+
+unsigned int lnet_iov_nob (unsigned int niov, struct iovec *iov);
+int lnet_extract_iov (int dst_niov, struct iovec *dst,
+ int src_niov, struct iovec *src,
+ unsigned int offset, unsigned int len);
+
+unsigned int lnet_kiov_nob (unsigned int niov, lnet_kiov_t *iov);
+int lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst,
+ int src_niov, lnet_kiov_t *src,
+ unsigned int offset, unsigned int len);
+
+void lnet_copy_iov2iov (unsigned int ndiov, struct iovec *diov,
+ unsigned int doffset,
+ unsigned int nsiov, struct iovec *siov,
+ unsigned int soffset, unsigned int nob);
+void lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov,
+ unsigned int iovoffset,
+ unsigned int nkiov, lnet_kiov_t *kiov,
+ unsigned int kiovoffset, unsigned int nob);
+void lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov,
+ unsigned int kiovoffset,
+ unsigned int niov, struct iovec *iov,
+ unsigned int iovoffset, unsigned int nob);
+void lnet_copy_kiov2kiov (unsigned int ndkiov, lnet_kiov_t *dkiov,
+ unsigned int doffset,
+ unsigned int nskiov, lnet_kiov_t *skiov,
+ unsigned int soffset, unsigned int nob);
+
+static inline void
+lnet_copy_iov2flat(int dlen, void *dest, unsigned int doffset,
+ unsigned int nsiov, struct iovec *siov, unsigned int soffset,
+ unsigned int nob)
+{
+ struct iovec diov = {/*.iov_base = */ dest, /*.iov_len = */ dlen};
+
+ lnet_copy_iov2iov(1, &diov, doffset,
+ nsiov, siov, soffset, nob);
+}
+
+static inline void
+lnet_copy_kiov2flat(int dlen, void *dest, unsigned int doffset,
+ unsigned int nsiov, lnet_kiov_t *skiov, unsigned int soffset,
+ unsigned int nob)
+{
+ struct iovec diov = {/* .iov_base = */ dest, /* .iov_len = */ dlen};
+
+ lnet_copy_kiov2iov(1, &diov, doffset,
+ nsiov, skiov, soffset, nob);
+}
+
+static inline void
+lnet_copy_flat2iov(unsigned int ndiov, struct iovec *diov, unsigned int doffset,
+ int slen, void *src, unsigned int soffset, unsigned int nob)
+{
+ struct iovec siov = {/*.iov_base = */ src, /*.iov_len = */slen};
+ lnet_copy_iov2iov(ndiov, diov, doffset,
+ 1, &siov, soffset, nob);
+}
+
+static inline void
+lnet_copy_flat2kiov(unsigned int ndiov, lnet_kiov_t *dkiov, unsigned int doffset,
+ int slen, void *src, unsigned int soffset, unsigned int nob)
+{
+ struct iovec siov = {/* .iov_base = */ src, /* .iov_len = */ slen};
+ lnet_copy_iov2kiov(ndiov, dkiov, doffset,
+ 1, &siov, soffset, nob);
+}
+
+void lnet_me_unlink(lnet_me_t *me);
+
+void lnet_md_unlink(lnet_libmd_t *md);
+void lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd);
+
+void lnet_register_lnd(lnd_t *lnd);
+void lnet_unregister_lnd(lnd_t *lnd);
+int lnet_set_ip_niaddr (lnet_ni_t *ni);
+
+int lnet_connect(socket_t **sockp, lnet_nid_t peer_nid,
+ __u32 local_ip, __u32 peer_ip, int peer_port);
+void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
+ __u32 peer_ip, int port);
+int lnet_count_acceptor_nis(void);
+int lnet_acceptor_timeout(void);
+int lnet_acceptor_port(void);
+
+int lnet_count_acceptor_nis(void);
+int lnet_acceptor_port(void);
+
+int lnet_acceptor_start(void);
+void lnet_acceptor_stop(void);
+
+void lnet_get_tunables(void);
+int lnet_peers_start_down(void);
+int lnet_peer_buffer_credits(lnet_ni_t *ni);
+
+int lnet_router_checker_start(void);
+void lnet_router_checker_stop(void);
+void lnet_swap_pinginfo(lnet_ping_info_t *info);
+
+int lnet_ping_target_init(void);
+void lnet_ping_target_fini(void);
+int lnet_ping(lnet_process_id_t id, int timeout_ms,
+ lnet_process_id_t *ids, int n_ids);
+
+int lnet_parse_ip2nets (char **networksp, char *ip2nets);
+int lnet_parse_routes (char *route_str, int *im_a_router);
+int lnet_parse_networks (struct list_head *nilist, char *networks);
+
+int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt);
+lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable,
+ lnet_nid_t nid);
+void lnet_peer_tables_cleanup(void);
+void lnet_peer_tables_destroy(void);
+int lnet_peer_tables_create(void);
+void lnet_debug_peer(lnet_nid_t nid);
+
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
new file mode 100644
index 000000000000..86428d4b993e
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
@@ -0,0 +1,765 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/include/lnet/lib-types.h
+ *
+ * Types used by the library side routines that do not need to be
+ * exposed to the user application
+ */
+
+#ifndef __LNET_LIB_TYPES_H__
+#define __LNET_LIB_TYPES_H__
+
+#include <linux/lnet/linux/lib-types.h>
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/list.h>
+#include <linux/lnet/types.h>
+
+#define WIRE_ATTR __attribute__((packed))
+
+/* Packed version of lnet_process_id_t to transfer via network */
+typedef struct {
+ lnet_nid_t nid;
+ lnet_pid_t pid; /* node id / process id */
+} WIRE_ATTR lnet_process_id_packed_t;
+
+/* The wire handle's interface cookie only matches one network interface in
+ * one epoch (i.e. new cookie when the interface restarts or the node
+ * reboots). The object cookie only matches one object on that interface
+ * during that object's lifetime (i.e. no cookie re-use). */
+typedef struct {
+ __u64 wh_interface_cookie;
+ __u64 wh_object_cookie;
+} WIRE_ATTR lnet_handle_wire_t;
+
+typedef enum {
+ LNET_MSG_ACK = 0,
+ LNET_MSG_PUT,
+ LNET_MSG_GET,
+ LNET_MSG_REPLY,
+ LNET_MSG_HELLO,
+} lnet_msg_type_t;
+
+/* The variant fields of the portals message header are aligned on an 8
+ * byte boundary in the message header. Note that all types used in these
+ * wire structs MUST be fixed size and the smaller types are placed at the
+ * end. */
+typedef struct lnet_ack {
+ lnet_handle_wire_t dst_wmd;
+ __u64 match_bits;
+ __u32 mlength;
+} WIRE_ATTR lnet_ack_t;
+
+typedef struct lnet_put {
+ lnet_handle_wire_t ack_wmd;
+ __u64 match_bits;
+ __u64 hdr_data;
+ __u32 ptl_index;
+ __u32 offset;
+} WIRE_ATTR lnet_put_t;
+
+typedef struct lnet_get {
+ lnet_handle_wire_t return_wmd;
+ __u64 match_bits;
+ __u32 ptl_index;
+ __u32 src_offset;
+ __u32 sink_length;
+} WIRE_ATTR lnet_get_t;
+
+typedef struct lnet_reply {
+ lnet_handle_wire_t dst_wmd;
+} WIRE_ATTR lnet_reply_t;
+
+typedef struct lnet_hello {
+ __u64 incarnation;
+ __u32 type;
+} WIRE_ATTR lnet_hello_t;
+
+typedef struct {
+ lnet_nid_t dest_nid;
+ lnet_nid_t src_nid;
+ lnet_pid_t dest_pid;
+ lnet_pid_t src_pid;
+ __u32 type; /* lnet_msg_type_t */
+ __u32 payload_length; /* payload data to follow */
+ /*<------__u64 aligned------->*/
+ union {
+ lnet_ack_t ack;
+ lnet_put_t put;
+ lnet_get_t get;
+ lnet_reply_t reply;
+ lnet_hello_t hello;
+ } msg;
+} WIRE_ATTR lnet_hdr_t;
+
+/* A HELLO message contains a magic number and protocol version
+ * code in the header's dest_nid, the peer's NID in the src_nid, and
+ * LNET_MSG_HELLO in the type field. All other common fields are zero
+ * (including payload_size; i.e. no payload).
+ * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is
+ * running the same protocol and to find out its NID. These LNDs should
+ * exchange HELLO messages when a connection is first established. Individual
+ * LNDs can put whatever else they fancy in lnet_hdr_t::msg.
+ */
+typedef struct {
+ __u32 magic; /* LNET_PROTO_TCP_MAGIC */
+ __u16 version_major; /* increment on incompatible change */
+ __u16 version_minor; /* increment on compatible change */
+} WIRE_ATTR lnet_magicversion_t;
+
+/* PROTO MAGIC for LNDs */
+#define LNET_PROTO_IB_MAGIC 0x0be91b91
+#define LNET_PROTO_RA_MAGIC 0x0be91b92
+#define LNET_PROTO_QSW_MAGIC 0x0be91b93
+#define LNET_PROTO_GNI_MAGIC 0xb00fbabe /* ask Kim */
+#define LNET_PROTO_TCP_MAGIC 0xeebc0ded
+#define LNET_PROTO_PTL_MAGIC 0x50746C4E /* 'PtlN' unique magic */
+#define LNET_PROTO_MX_MAGIC 0x4d583130 /* 'MX10'! */
+#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100
+#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */
+
+/* Placeholder for a future "unified" protocol across all LNDs */
+/* Current LNDs that receive a request with this magic will respond with a
+ * "stub" reply using their current protocol */
+#define LNET_PROTO_MAGIC 0x45726963 /* ! */
+
+
+#define LNET_PROTO_TCP_VERSION_MAJOR 1
+#define LNET_PROTO_TCP_VERSION_MINOR 0
+
+/* Acceptor connection request */
+typedef struct {
+ __u32 acr_magic; /* PTL_ACCEPTOR_PROTO_MAGIC */
+ __u32 acr_version; /* protocol version */
+ __u64 acr_nid; /* target NID */
+} WIRE_ATTR lnet_acceptor_connreq_t;
+
+#define LNET_PROTO_ACCEPTOR_VERSION 1
+
+/* forward refs */
+struct lnet_libmd;
+
+typedef struct lnet_msg {
+ struct list_head msg_activelist;
+ struct list_head msg_list; /* Q for credits/MD */
+
+ lnet_process_id_t msg_target;
+ /* where is it from, it's only for building event */
+ lnet_nid_t msg_from;
+ __u32 msg_type;
+
+ /* commited for sending */
+ unsigned int msg_tx_committed:1;
+ /* CPT # this message committed for sending */
+ unsigned int msg_tx_cpt:15;
+ /* commited for receiving */
+ unsigned int msg_rx_committed:1;
+ /* CPT # this message committed for receiving */
+ unsigned int msg_rx_cpt:15;
+ /* queued for tx credit */
+ unsigned int msg_tx_delayed:1;
+ /* queued for RX buffer */
+ unsigned int msg_rx_delayed:1;
+ /* ready for pending on RX delay list */
+ unsigned int msg_rx_ready_delay:1;
+
+ unsigned int msg_vmflush:1; /* VM trying to free memory */
+ unsigned int msg_target_is_router:1; /* sending to a router */
+ unsigned int msg_routing:1; /* being forwarded */
+ unsigned int msg_ack:1; /* ack on finalize (PUT) */
+ unsigned int msg_sending:1; /* outgoing message */
+ unsigned int msg_receiving:1; /* being received */
+ unsigned int msg_txcredit:1; /* taken an NI send credit */
+ unsigned int msg_peertxcredit:1; /* taken a peer send credit */
+ unsigned int msg_rtrcredit:1; /* taken a globel router credit */
+ unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */
+ unsigned int msg_onactivelist:1; /* on the activelist */
+
+ struct lnet_peer *msg_txpeer; /* peer I'm sending to */
+ struct lnet_peer *msg_rxpeer; /* peer I received from */
+
+ void *msg_private;
+ struct lnet_libmd *msg_md;
+
+ unsigned int msg_len;
+ unsigned int msg_wanted;
+ unsigned int msg_offset;
+ unsigned int msg_niov;
+ struct iovec *msg_iov;
+ lnet_kiov_t *msg_kiov;
+
+ lnet_event_t msg_ev;
+ lnet_hdr_t msg_hdr;
+} lnet_msg_t;
+
+
+typedef struct lnet_libhandle {
+ struct list_head lh_hash_chain;
+ __u64 lh_cookie;
+} lnet_libhandle_t;
+
+#define lh_entry(ptr, type, member) \
+ ((type *)((char *)(ptr)-(char *)(&((type *)0)->member)))
+
+typedef struct lnet_eq {
+ struct list_head eq_list;
+ lnet_libhandle_t eq_lh;
+ lnet_seq_t eq_enq_seq;
+ lnet_seq_t eq_deq_seq;
+ unsigned int eq_size;
+ lnet_eq_handler_t eq_callback;
+ lnet_event_t *eq_events;
+ int **eq_refs; /* percpt refcount for EQ */
+} lnet_eq_t;
+
+typedef struct lnet_me {
+ struct list_head me_list;
+ lnet_libhandle_t me_lh;
+ lnet_process_id_t me_match_id;
+ unsigned int me_portal;
+ unsigned int me_pos; /* hash offset in mt_hash */
+ __u64 me_match_bits;
+ __u64 me_ignore_bits;
+ lnet_unlink_t me_unlink;
+ struct lnet_libmd *me_md;
+} lnet_me_t;
+
+typedef struct lnet_libmd {
+ struct list_head md_list;
+ lnet_libhandle_t md_lh;
+ lnet_me_t *md_me;
+ char *md_start;
+ unsigned int md_offset;
+ unsigned int md_length;
+ unsigned int md_max_size;
+ int md_threshold;
+ int md_refcount;
+ unsigned int md_options;
+ unsigned int md_flags;
+ void *md_user_ptr;
+ lnet_eq_t *md_eq;
+ unsigned int md_niov; /* # frags */
+ union {
+ struct iovec iov[LNET_MAX_IOV];
+ lnet_kiov_t kiov[LNET_MAX_IOV];
+ } md_iov;
+} lnet_libmd_t;
+
+#define LNET_MD_FLAG_ZOMBIE (1 << 0)
+#define LNET_MD_FLAG_AUTO_UNLINK (1 << 1)
+
+#ifdef LNET_USE_LIB_FREELIST
+typedef struct
+{
+ void *fl_objs; /* single contiguous array of objects */
+ int fl_nobjs; /* the number of them */
+ int fl_objsize; /* the size (including overhead) of each of them */
+ struct list_head fl_list; /* where they are enqueued */
+} lnet_freelist_t;
+
+typedef struct
+{
+ struct list_head fo_list; /* enqueue on fl_list */
+ void *fo_contents; /* aligned contents */
+} lnet_freeobj_t;
+#endif
+
+typedef struct {
+ /* info about peers we are trying to fail */
+ struct list_head tp_list; /* ln_test_peers */
+ lnet_nid_t tp_nid; /* matching nid */
+ unsigned int tp_threshold; /* # failures to simulate */
+} lnet_test_peer_t;
+
+#define LNET_COOKIE_TYPE_MD 1
+#define LNET_COOKIE_TYPE_ME 2
+#define LNET_COOKIE_TYPE_EQ 3
+#define LNET_COOKIE_TYPE_BITS 2
+#define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL)
+
+struct lnet_ni; /* forward ref */
+
+typedef struct lnet_lnd
+{
+ /* fields managed by portals */
+ struct list_head lnd_list; /* stash in the LND table */
+ int lnd_refcount; /* # active instances */
+
+ /* fields initialised by the LND */
+ unsigned int lnd_type;
+
+ int (*lnd_startup) (struct lnet_ni *ni);
+ void (*lnd_shutdown) (struct lnet_ni *ni);
+ int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg);
+
+ /* In data movement APIs below, payload buffers are described as a set
+ * of 'niov' fragments which are...
+ * EITHER
+ * in virtual memory (struct iovec *iov != NULL)
+ * OR
+ * in pages (kernel only: plt_kiov_t *kiov != NULL).
+ * The LND may NOT overwrite these fragment descriptors.
+ * An 'offset' and may specify a byte offset within the set of
+ * fragments to start from
+ */
+
+ /* Start sending a preformatted message. 'private' is NULL for PUT and
+ * GET messages; otherwise this is a response to an incoming message
+ * and 'private' is the 'private' passed to lnet_parse(). Return
+ * non-zero for immediate failure, otherwise complete later with
+ * lnet_finalize() */
+ int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg);
+
+ /* Start receiving 'mlen' bytes of payload data, skipping the following
+ * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to
+ * lnet_parse(). Return non-zero for immedaite failure, otherwise
+ * complete later with lnet_finalize(). This also gives back a receive
+ * credit if the LND does flow control. */
+ int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg,
+ int delayed, unsigned int niov,
+ struct iovec *iov, lnet_kiov_t *kiov,
+ unsigned int offset, unsigned int mlen, unsigned int rlen);
+
+ /* lnet_parse() has had to delay processing of this message
+ * (e.g. waiting for a forwarding buffer or send credits). Give the
+ * LND a chance to free urgently needed resources. If called, return 0
+ * for success and do NOT give back a receive credit; that has to wait
+ * until lnd_recv() gets called. On failure return < 0 and
+ * release resources; lnd_recv() will not be called. */
+ int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg,
+ void **new_privatep);
+
+ /* notification of peer health */
+ void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive);
+
+ /* query of peer aliveness */
+ void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, cfs_time_t *when);
+
+ /* accept a new connection */
+ int (*lnd_accept)(struct lnet_ni *ni, socket_t *sock);
+
+} lnd_t;
+
+#define LNET_NI_STATUS_UP 0x15aac0de
+#define LNET_NI_STATUS_DOWN 0xdeadface
+#define LNET_NI_STATUS_INVALID 0x00000000
+typedef struct {
+ lnet_nid_t ns_nid;
+ __u32 ns_status;
+ __u32 ns_unused;
+} WIRE_ATTR lnet_ni_status_t;
+
+struct lnet_tx_queue {
+ int tq_credits; /* # tx credits free */
+ int tq_credits_min; /* lowest it's been */
+ int tq_credits_max; /* total # tx credits */
+ struct list_head tq_delayed; /* delayed TXs */
+};
+
+#define LNET_MAX_INTERFACES 16
+
+typedef struct lnet_ni {
+ spinlock_t ni_lock;
+ struct list_head ni_list; /* chain on ln_nis */
+ struct list_head ni_cptlist; /* chain on ln_nis_cpt */
+ int ni_maxtxcredits; /* # tx credits */
+ /* # per-peer send credits */
+ int ni_peertxcredits;
+ /* # per-peer router buffer credits */
+ int ni_peerrtrcredits;
+ /* seconds to consider peer dead */
+ int ni_peertimeout;
+ int ni_ncpts; /* number of CPTs */
+ __u32 *ni_cpts; /* bond NI on some CPTs */
+ lnet_nid_t ni_nid; /* interface's NID */
+ void *ni_data; /* instance-specific data */
+ lnd_t *ni_lnd; /* procedural interface */
+ struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */
+ int **ni_refs; /* percpt reference count */
+ long ni_last_alive; /* when I was last alive */
+ lnet_ni_status_t *ni_status; /* my health status */
+ /* equivalent interfaces to use */
+ char *ni_interfaces[LNET_MAX_INTERFACES];
+} lnet_ni_t;
+
+#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL
+
+/* NB: value of these features equal to LNET_PROTO_PING_VERSION_x
+ * of old LNet, so there shouldn't be any compatibility issue */
+#define LNET_PING_FEAT_INVAL (0) /* no feature */
+#define LNET_PING_FEAT_BASE (1 << 0) /* just a ping */
+#define LNET_PING_FEAT_NI_STATUS (1 << 1) /* return NI status */
+
+#define LNET_PING_FEAT_MASK (LNET_PING_FEAT_BASE | \
+ LNET_PING_FEAT_NI_STATUS)
+
+typedef struct {
+ __u32 pi_magic;
+ __u32 pi_features;
+ lnet_pid_t pi_pid;
+ __u32 pi_nnis;
+ lnet_ni_status_t pi_ni[0];
+} WIRE_ATTR lnet_ping_info_t;
+
+/* router checker data, per router */
+#define LNET_MAX_RTR_NIS 16
+#define LNET_PINGINFO_SIZE offsetof(lnet_ping_info_t, pi_ni[LNET_MAX_RTR_NIS])
+typedef struct {
+ /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */
+ struct list_head rcd_list;
+ lnet_handle_md_t rcd_mdh; /* ping buffer MD */
+ struct lnet_peer *rcd_gateway; /* reference to gateway */
+ lnet_ping_info_t *rcd_pinginfo; /* ping buffer */
+} lnet_rc_data_t;
+
+typedef struct lnet_peer {
+ struct list_head lp_hashlist; /* chain on peer hash */
+ struct list_head lp_txq; /* messages blocking for tx credits */
+ struct list_head lp_rtrq; /* messages blocking for router credits */
+ struct list_head lp_rtr_list; /* chain on router list */
+ int lp_txcredits; /* # tx credits available */
+ int lp_mintxcredits; /* low water mark */
+ int lp_rtrcredits; /* # router credits */
+ int lp_minrtrcredits; /* low water mark */
+ unsigned int lp_alive:1; /* alive/dead? */
+ unsigned int lp_notify:1; /* notification outstanding? */
+ unsigned int lp_notifylnd:1; /* outstanding notification for LND? */
+ unsigned int lp_notifying:1; /* some thread is handling notification */
+ unsigned int lp_ping_notsent; /* SEND event outstanding from ping */
+ int lp_alive_count; /* # times router went dead<->alive */
+ long lp_txqnob; /* bytes queued for sending */
+ cfs_time_t lp_timestamp; /* time of last aliveness news */
+ cfs_time_t lp_ping_timestamp; /* time of last ping attempt */
+ cfs_time_t lp_ping_deadline; /* != 0 if ping reply expected */
+ cfs_time_t lp_last_alive; /* when I was last alive */
+ cfs_time_t lp_last_query; /* when lp_ni was queried last time */
+ lnet_ni_t *lp_ni; /* interface peer is on */
+ lnet_nid_t lp_nid; /* peer's NID */
+ int lp_refcount; /* # refs */
+ int lp_cpt; /* CPT this peer attached on */
+ /* # refs from lnet_route_t::lr_gateway */
+ int lp_rtr_refcount;
+ /* returned RC ping features */
+ unsigned int lp_ping_feats;
+ struct list_head lp_routes; /* routers on this peer */
+ lnet_rc_data_t *lp_rcd; /* router checker state */
+} lnet_peer_t;
+
+
+/* peer hash size */
+#define LNET_PEER_HASH_BITS 9
+#define LNET_PEER_HASH_SIZE (1 << LNET_PEER_HASH_BITS)
+
+/* peer hash table */
+struct lnet_peer_table {
+ int pt_version; /* /proc validity stamp */
+ int pt_number; /* # peers extant */
+ struct list_head pt_deathrow; /* zombie peers */
+ struct list_head *pt_hash; /* NID->peer hash */
+};
+
+/* peer aliveness is enabled only on routers for peers in a network where the
+ * lnet_ni_t::ni_peertimeout has been set to a positive value */
+#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \
+ (lp)->lp_ni->ni_peertimeout > 0)
+
+typedef struct {
+ struct list_head lr_list; /* chain on net */
+ struct list_head lr_gwlist; /* chain on gateway */
+ lnet_peer_t *lr_gateway; /* router node */
+ __u32 lr_net; /* remote network number */
+ int lr_seq; /* sequence for round-robin */
+ unsigned int lr_downis; /* number of down NIs */
+ unsigned int lr_hops; /* how far I am */
+} lnet_route_t;
+
+#define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7)
+#define LNET_REMOTE_NETS_HASH_MAX (1U << 16)
+#define LNET_REMOTE_NETS_HASH_SIZE (1 << the_lnet.ln_remote_nets_hbits)
+
+typedef struct {
+ struct list_head lrn_list; /* chain on ln_remote_nets_hash */
+ struct list_head lrn_routes; /* routes to me */
+ __u32 lrn_net; /* my net number */
+} lnet_remotenet_t;
+
+typedef struct {
+ struct list_head rbp_bufs; /* my free buffer pool */
+ struct list_head rbp_msgs; /* messages blocking for a buffer */
+ int rbp_npages; /* # pages in each buffer */
+ int rbp_nbuffers; /* # buffers */
+ int rbp_credits; /* # free buffers / blocked messages */
+ int rbp_mincredits; /* low water mark */
+} lnet_rtrbufpool_t;
+
+typedef struct {
+ struct list_head rb_list; /* chain on rbp_bufs */
+ lnet_rtrbufpool_t *rb_pool; /* owning pool */
+ lnet_kiov_t rb_kiov[0]; /* the buffer space */
+} lnet_rtrbuf_t;
+
+typedef struct {
+ __u32 msgs_alloc;
+ __u32 msgs_max;
+ __u32 errors;
+ __u32 send_count;
+ __u32 recv_count;
+ __u32 route_count;
+ __u32 drop_count;
+ __u64 send_length;
+ __u64 recv_length;
+ __u64 route_length;
+ __u64 drop_length;
+} WIRE_ATTR lnet_counters_t;
+
+#define LNET_PEER_HASHSIZE 503 /* prime! */
+
+#define LNET_NRBPOOLS 3 /* # different router buffer pools */
+
+enum {
+ /* Didn't match anything */
+ LNET_MATCHMD_NONE = (1 << 0),
+ /* Matched OK */
+ LNET_MATCHMD_OK = (1 << 1),
+ /* Must be discarded */
+ LNET_MATCHMD_DROP = (1 << 2),
+ /* match and buffer is exhausted */
+ LNET_MATCHMD_EXHAUSTED = (1 << 3),
+ /* match or drop */
+ LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP),
+};
+
+/* Options for lnet_portal_t::ptl_options */
+#define LNET_PTL_LAZY (1 << 0)
+#define LNET_PTL_MATCH_UNIQUE (1 << 1) /* unique match, for RDMA */
+#define LNET_PTL_MATCH_WILDCARD (1 << 2) /* wildcard match, request portal */
+
+/* parameter for matching operations (GET, PUT) */
+struct lnet_match_info {
+ __u64 mi_mbits;
+ lnet_process_id_t mi_id;
+ unsigned int mi_opc;
+ unsigned int mi_portal;
+ unsigned int mi_rlength;
+ unsigned int mi_roffset;
+};
+
+/* ME hash of RDMA portal */
+#define LNET_MT_HASH_BITS 8
+#define LNET_MT_HASH_SIZE (1 << LNET_MT_HASH_BITS)
+#define LNET_MT_HASH_MASK (LNET_MT_HASH_SIZE - 1)
+/* we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash,
+ * the last entry is reserved for MEs with ignore-bits */
+#define LNET_MT_HASH_IGNORE LNET_MT_HASH_SIZE
+/* __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which
+ * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the
+ * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] */
+#define LNET_MT_BITS_U64 6 /* 2^6 bits */
+#define LNET_MT_EXHAUSTED_BITS (LNET_MT_HASH_BITS - LNET_MT_BITS_U64)
+#define LNET_MT_EXHAUSTED_BMAP ((1 << LNET_MT_EXHAUSTED_BITS) + 1)
+
+/* portal match table */
+struct lnet_match_table {
+ /* reserved for upcoming patches, CPU partition ID */
+ unsigned int mt_cpt;
+ unsigned int mt_portal; /* portal index */
+ /* match table is set as "enabled" if there's non-exhausted MD
+ * attached on mt_mhash, it's only valide for wildcard portal */
+ unsigned int mt_enabled;
+ /* bitmap to flag whether MEs on mt_hash are exhausted or not */
+ __u64 mt_exhausted[LNET_MT_EXHAUSTED_BMAP];
+ struct list_head *mt_mhash; /* matching hash */
+};
+
+/* these are only useful for wildcard portal */
+/* Turn off message rotor for wildcard portals */
+#define LNET_PTL_ROTOR_OFF 0
+/* round-robin dispatch all PUT messages for wildcard portals */
+#define LNET_PTL_ROTOR_ON 1
+/* round-robin dispatch routed PUT message for wildcard portals */
+#define LNET_PTL_ROTOR_RR_RT 2
+/* dispatch routed PUT message by hashing source NID for wildcard portals */
+#define LNET_PTL_ROTOR_HASH_RT 3
+
+typedef struct lnet_portal {
+ spinlock_t ptl_lock;
+ unsigned int ptl_index; /* portal ID, reserved */
+ /* flags on this portal: lazy, unique... */
+ unsigned int ptl_options;
+ /* list of messags which are stealing buffer */
+ struct list_head ptl_msg_stealing;
+ /* messages blocking for MD */
+ struct list_head ptl_msg_delayed;
+ /* Match table for each CPT */
+ struct lnet_match_table **ptl_mtables;
+ /* spread rotor of incoming "PUT" */
+ int ptl_rotor;
+ /* # active entries for this portal */
+ int ptl_mt_nmaps;
+ /* array of active entries' cpu-partition-id */
+ int ptl_mt_maps[0];
+} lnet_portal_t;
+
+#define LNET_LH_HASH_BITS 12
+#define LNET_LH_HASH_SIZE (1ULL << LNET_LH_HASH_BITS)
+#define LNET_LH_HASH_MASK (LNET_LH_HASH_SIZE - 1)
+
+/* resource container (ME, MD, EQ) */
+struct lnet_res_container {
+ unsigned int rec_type; /* container type */
+ __u64 rec_lh_cookie; /* cookie generator */
+ struct list_head rec_active; /* active resource list */
+ struct list_head *rec_lh_hash; /* handle hash */
+#ifdef LNET_USE_LIB_FREELIST
+ lnet_freelist_t rec_freelist; /* freelist for resources */
+#endif
+};
+
+/* message container */
+struct lnet_msg_container {
+ int msc_init; /* initialized or not */
+ /* max # threads finalizing */
+ int msc_nfinalizers;
+ /* msgs waiting to complete finalizing */
+ struct list_head msc_finalizing;
+ struct list_head msc_active; /* active message list */
+ /* threads doing finalization */
+ void **msc_finalizers;
+#ifdef LNET_USE_LIB_FREELIST
+ lnet_freelist_t msc_freelist; /* freelist for messages */
+#endif
+};
+
+/* Router Checker states */
+#define LNET_RC_STATE_SHUTDOWN 0 /* not started */
+#define LNET_RC_STATE_RUNNING 1 /* started up OK */
+#define LNET_RC_STATE_STOPPING 2 /* telling thread to stop */
+
+typedef struct
+{
+ /* CPU partition table of LNet */
+ struct cfs_cpt_table *ln_cpt_table;
+ /* number of CPTs in ln_cpt_table */
+ unsigned int ln_cpt_number;
+ unsigned int ln_cpt_bits;
+
+ /* protect LNet resources (ME/MD/EQ) */
+ struct cfs_percpt_lock *ln_res_lock;
+ /* # portals */
+ int ln_nportals;
+ /* the vector of portals */
+ lnet_portal_t **ln_portals;
+ /* percpt ME containers */
+ struct lnet_res_container **ln_me_containers;
+ /* percpt MD container */
+ struct lnet_res_container **ln_md_containers;
+
+ /* Event Queue container */
+ struct lnet_res_container ln_eq_container;
+ wait_queue_head_t ln_eq_waitq;
+ spinlock_t ln_eq_wait_lock;
+ unsigned int ln_remote_nets_hbits;
+
+ /* protect NI, peer table, credits, routers, rtrbuf... */
+ struct cfs_percpt_lock *ln_net_lock;
+ /* percpt message containers for active/finalizing/freed message */
+ struct lnet_msg_container **ln_msg_containers;
+ lnet_counters_t **ln_counters;
+ struct lnet_peer_table **ln_peer_tables;
+ /* failure simulation */
+ struct list_head ln_test_peers;
+
+ struct list_head ln_nis; /* LND instances */
+ /* NIs bond on specific CPT(s) */
+ struct list_head ln_nis_cpt;
+ /* dying LND instances */
+ struct list_head ln_nis_zombie;
+ lnet_ni_t *ln_loni; /* the loopback NI */
+ /* NI to wait for events in */
+ lnet_ni_t *ln_eq_waitni;
+
+ /* remote networks with routes to them */
+ struct list_head *ln_remote_nets_hash;
+ /* validity stamp */
+ __u64 ln_remote_nets_version;
+ /* list of all known routers */
+ struct list_head ln_routers;
+ /* validity stamp */
+ __u64 ln_routers_version;
+ /* percpt router buffer pools */
+ lnet_rtrbufpool_t **ln_rtrpools;
+
+ lnet_handle_md_t ln_ping_target_md;
+ lnet_handle_eq_t ln_ping_target_eq;
+ lnet_ping_info_t *ln_ping_info;
+
+ /* router checker startup/shutdown state */
+ int ln_rc_state;
+ /* router checker's event queue */
+ lnet_handle_eq_t ln_rc_eqh;
+ /* rcd still pending on net */
+ struct list_head ln_rcd_deathrow;
+ /* rcd ready for free */
+ struct list_head ln_rcd_zombie;
+ /* serialise startup/shutdown */
+ struct semaphore ln_rc_signal;
+
+ struct mutex ln_api_mutex;
+ struct mutex ln_lnd_mutex;
+ int ln_init; /* LNetInit() called? */
+ /* Have I called LNetNIInit myself? */
+ int ln_niinit_self;
+ /* LNetNIInit/LNetNIFini counter */
+ int ln_refcount;
+ /* shutdown in progress */
+ int ln_shutdown;
+
+ int ln_routing; /* am I a router? */
+ lnet_pid_t ln_pid; /* requested pid */
+ /* uniquely identifies this ni in this epoch */
+ __u64 ln_interface_cookie;
+ /* registered LNDs */
+ struct list_head ln_lnds;
+
+ /* space for network names */
+ char *ln_network_tokens;
+ int ln_network_tokens_nob;
+ /* test protocol compatibility flags */
+ int ln_testprotocompat;
+
+} lnet_t;
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/linux/api-support.h b/drivers/staging/lustre/include/linux/lnet/linux/api-support.h
new file mode 100644
index 000000000000..ca78a0a4e908
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/linux/api-support.h
@@ -0,0 +1,43 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LINUX_API_SUPPORT_H__
+#define __LINUX_API_SUPPORT_H__
+
+#ifndef __LNET_API_SUPPORT_H__
+#error Do not #include this file directly. #include <lnet /api-support.h> instead
+#endif
+
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h
new file mode 100644
index 000000000000..d2c0a70f1f7e
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h
@@ -0,0 +1,72 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LNET_LINUX_LIB_LNET_H__
+#define __LNET_LINUX_LIB_LNET_H__
+
+#ifndef __LNET_LIB_LNET_H__
+#error Do not #include this file directly. #include <linux/lnet/lib-lnet.h> instead
+#endif
+
+# include <asm/page.h>
+# include <linux/string.h>
+# include <asm/io.h>
+# include <linux/libcfs/libcfs.h>
+
+static inline __u64
+lnet_page2phys (struct page *p)
+{
+ /* compiler optimizer will elide unused branches */
+
+ switch (sizeof(typeof(page_to_phys(p)))) {
+ case 4:
+ /* page_to_phys returns a 32 bit physical address. This must
+ * be a 32 bit machine with <= 4G memory and we must ensure we
+ * don't sign extend when converting to 64 bits. */
+ return (unsigned long)page_to_phys(p);
+
+ case 8:
+ /* page_to_phys returns a 64 bit physical address :) */
+ return page_to_phys(p);
+
+ default:
+ LBUG();
+ return 0;
+ }
+}
+
+
+#define LNET_ROUTER
+
+#endif /* __LNET_LINUX_LIB_LNET_H__ */
diff --git a/drivers/staging/lustre/include/linux/lnet/linux/lib-types.h b/drivers/staging/lustre/include/linux/lnet/linux/lib-types.h
new file mode 100644
index 000000000000..669e8c038534
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/linux/lib-types.h
@@ -0,0 +1,45 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LNET_LINUX_LIB_TYPES_H__
+#define __LNET_LINUX_LIB_TYPES_H__
+
+#ifndef __LNET_LIB_TYPES_H__
+#error Do not #include this file directly. #include <linux/lnet/lib-types.h> instead
+#endif
+
+# include <linux/uio.h>
+# include <linux/types.h>
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/linux/lnet.h b/drivers/staging/lustre/include/linux/lnet/linux/lnet.h
new file mode 100644
index 000000000000..1e888f1efc45
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/linux/lnet.h
@@ -0,0 +1,56 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LNET_LINUX_LNET_H__
+#define __LNET_LINUX_LNET_H__
+
+#ifndef __LNET_H__
+#error Do not #include this file directly. #include <linux/lnet/lnet.h> instead
+#endif
+
+/*
+ * lnet.h
+ *
+ * User application interface file
+ */
+
+#include <linux/uio.h>
+#include <linux/types.h>
+
+#define cfs_tcp_sendpage(sk, page, offset, size, flags) \
+ tcp_sendpage(sk, page, offset, size, flags)
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/lnet-sysctl.h b/drivers/staging/lustre/include/linux/lnet/lnet-sysctl.h
new file mode 100644
index 000000000000..1bde44ebb911
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/lnet-sysctl.h
@@ -0,0 +1,51 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LNET_SYSCTL_H__
+#define __LNET_SYSCTL_H__
+
+#if defined(CONFIG_SYSCTL)
+
+
+#define CTL_KRANAL 201
+#define CTL_O2IBLND 205
+#define CTL_PTLLND 206
+#define CTL_QSWNAL 207
+#define CTL_SOCKLND 208
+#define CTL_GNILND 210
+
+
+#endif
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/lnet.h b/drivers/staging/lustre/include/linux/lnet/lnet.h
new file mode 100644
index 000000000000..c532b15d7643
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/lnet.h
@@ -0,0 +1,51 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LNET_H__
+#define __LNET_H__
+
+/*
+ * lnet.h
+ *
+ * User application interface file
+ */
+#include <linux/lnet/linux/lnet.h>
+
+#include <linux/lnet/types.h>
+#include <linux/lnet/api.h>
+
+#define LNET_NIDSTR_COUNT 1024 /* # of nidstrings */
+#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/lnetctl.h b/drivers/staging/lustre/include/linux/lnet/lnetctl.h
new file mode 100644
index 000000000000..b22daa234255
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/lnetctl.h
@@ -0,0 +1,80 @@
+/*
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * header for libptlctl.a
+ */
+#ifndef _PTLCTL_H_
+#define _PTLCTL_H_
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/types.h>
+
+#define LNET_DEV_ID 0
+#define LNET_DEV_PATH "/dev/lnet"
+#define LNET_DEV_MAJOR 10
+#define LNET_DEV_MINOR 240
+#define OBD_DEV_ID 1
+#define OBD_DEV_NAME "obd"
+#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME
+#define OBD_DEV_MAJOR 10
+#define OBD_DEV_MINOR 241
+#define SMFS_DEV_ID 2
+#define SMFS_DEV_PATH "/dev/snapdev"
+#define SMFS_DEV_MAJOR 10
+#define SMFS_DEV_MINOR 242
+
+int ptl_initialize(int argc, char **argv);
+int jt_ptl_network(int argc, char **argv);
+int jt_ptl_list_nids(int argc, char **argv);
+int jt_ptl_which_nid(int argc, char **argv);
+int jt_ptl_print_interfaces(int argc, char **argv);
+int jt_ptl_add_interface(int argc, char **argv);
+int jt_ptl_del_interface(int argc, char **argv);
+int jt_ptl_print_peers (int argc, char **argv);
+int jt_ptl_add_peer (int argc, char **argv);
+int jt_ptl_del_peer (int argc, char **argv);
+int jt_ptl_print_connections (int argc, char **argv);
+int jt_ptl_disconnect(int argc, char **argv);
+int jt_ptl_push_connection(int argc, char **argv);
+int jt_ptl_print_active_txs(int argc, char **argv);
+int jt_ptl_ping(int argc, char **argv);
+int jt_ptl_mynid(int argc, char **argv);
+int jt_ptl_add_uuid(int argc, char **argv);
+int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */
+int jt_ptl_close_uuid(int argc, char **argv);
+int jt_ptl_del_uuid(int argc, char **argv);
+int jt_ptl_add_route (int argc, char **argv);
+int jt_ptl_del_route (int argc, char **argv);
+int jt_ptl_notify_router (int argc, char **argv);
+int jt_ptl_print_routes (int argc, char **argv);
+int jt_ptl_fail_nid (int argc, char **argv);
+int jt_ptl_lwt(int argc, char **argv);
+int jt_ptl_testprotocompat(int argc, char **argv);
+int jt_ptl_memhog(int argc, char **argv);
+
+int dbg_initialize(int argc, char **argv);
+int jt_dbg_filter(int argc, char **argv);
+int jt_dbg_show(int argc, char **argv);
+int jt_dbg_list(int argc, char **argv);
+int jt_dbg_debug_kernel(int argc, char **argv);
+int jt_dbg_debug_daemon(int argc, char **argv);
+int jt_dbg_debug_file(int argc, char **argv);
+int jt_dbg_clear_debug_buf(int argc, char **argv);
+int jt_dbg_mark_debug_buf(int argc, char **argv);
+int jt_dbg_modules(int argc, char **argv);
+int jt_dbg_panic(int argc, char **argv);
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/lnetst.h b/drivers/staging/lustre/include/linux/lnet/lnetst.h
new file mode 100644
index 000000000000..d90f94e94601
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/lnetst.h
@@ -0,0 +1,491 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/include/lnet/lnetst.h
+ *
+ * Author: Liang Zhen <liangzhen@clusterfs.com>
+ */
+
+#ifndef __LNET_ST_H__
+#define __LNET_ST_H__
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/lnet.h>
+#include <linux/lnet/lib-types.h>
+
+#define LST_FEAT_NONE (0)
+#define LST_FEAT_BULK_LEN (1 << 0) /* enable variable page size */
+
+#define LST_FEATS_EMPTY (LST_FEAT_NONE)
+#define LST_FEATS_MASK (LST_FEAT_NONE | LST_FEAT_BULK_LEN)
+
+#define LST_NAME_SIZE 32 /* max name buffer length */
+
+#define LSTIO_DEBUG 0xC00 /* debug */
+#define LSTIO_SESSION_NEW 0xC01 /* create session */
+#define LSTIO_SESSION_END 0xC02 /* end session */
+#define LSTIO_SESSION_INFO 0xC03 /* query session */
+#define LSTIO_GROUP_ADD 0xC10 /* add group */
+#define LSTIO_GROUP_LIST 0xC11 /* list all groups in session */
+#define LSTIO_GROUP_INFO 0xC12 /* query defailt infomation of specified group */
+#define LSTIO_GROUP_DEL 0xC13 /* delete group */
+#define LSTIO_NODES_ADD 0xC14 /* add nodes to specified group */
+#define LSTIO_GROUP_UPDATE 0xC15 /* update group */
+#define LSTIO_BATCH_ADD 0xC20 /* add batch */
+#define LSTIO_BATCH_START 0xC21 /* start batch */
+#define LSTIO_BATCH_STOP 0xC22 /* stop batch */
+#define LSTIO_BATCH_DEL 0xC23 /* delete batch */
+#define LSTIO_BATCH_LIST 0xC24 /* show all batches in the session */
+#define LSTIO_BATCH_INFO 0xC25 /* show defail of specified batch */
+#define LSTIO_TEST_ADD 0xC26 /* add test (to batch) */
+#define LSTIO_BATCH_QUERY 0xC27 /* query batch status */
+#define LSTIO_STAT_QUERY 0xC30 /* get stats */
+
+typedef struct {
+ lnet_nid_t ses_nid; /* nid of console node */
+ __u64 ses_stamp; /* time stamp */
+} lst_sid_t; /*** session id */
+
+extern lst_sid_t LST_INVALID_SID;
+
+typedef struct {
+ __u64 bat_id; /* unique id in session */
+} lst_bid_t; /*** batch id (group of tests) */
+
+/* Status of test node */
+#define LST_NODE_ACTIVE 0x1 /* node in this session */
+#define LST_NODE_BUSY 0x2 /* node is taken by other session */
+#define LST_NODE_DOWN 0x4 /* node is down */
+#define LST_NODE_UNKNOWN 0x8 /* node not in session */
+
+typedef struct {
+ lnet_process_id_t nde_id; /* id of node */
+ int nde_state; /* state of node */
+} lstcon_node_ent_t; /*** node entry, for list_group command */
+
+typedef struct {
+ int nle_nnode; /* # of nodes */
+ int nle_nactive; /* # of active nodes */
+ int nle_nbusy; /* # of busy nodes */
+ int nle_ndown; /* # of down nodes */
+ int nle_nunknown; /* # of unknown nodes */
+} lstcon_ndlist_ent_t; /*** node_list entry, for list_batch command */
+
+typedef struct {
+ int tse_type; /* test type */
+ int tse_loop; /* loop count */
+ int tse_concur; /* concurrency of test */
+} lstcon_test_ent_t; /*** test summary entry, for list_batch command */
+
+typedef struct {
+ int bae_state; /* batch status */
+ int bae_timeout; /* batch timeout */
+ int bae_ntest; /* # of tests in the batch */
+} lstcon_batch_ent_t; /*** batch summary entry, for list_batch command */
+
+typedef struct {
+ lstcon_ndlist_ent_t tbe_cli_nle; /* client (group) node_list entry */
+ lstcon_ndlist_ent_t tbe_srv_nle; /* server (group) node_list entry */
+ union {
+ lstcon_test_ent_t tbe_test; /* test entry */
+ lstcon_batch_ent_t tbe_batch; /* batch entry */
+ } u;
+} lstcon_test_batch_ent_t; /*** test/batch verbose information entry,
+ *** for list_batch command */
+
+typedef struct {
+ struct list_head rpe_link; /* link chain */
+ lnet_process_id_t rpe_peer; /* peer's id */
+ struct timeval rpe_stamp; /* time stamp of RPC */
+ int rpe_state; /* peer's state */
+ int rpe_rpc_errno; /* RPC errno */
+
+ lst_sid_t rpe_sid; /* peer's session id */
+ int rpe_fwk_errno; /* framework errno */
+ int rpe_priv[4]; /* private data */
+ char rpe_payload[0]; /* private reply payload */
+} lstcon_rpc_ent_t;
+
+typedef struct {
+ int trs_rpc_stat[4]; /* RPCs stat (0: total, 1: failed, 2: finished, 4: reserved */
+ int trs_rpc_errno; /* RPC errno */
+ int trs_fwk_stat[8]; /* framework stat */
+ int trs_fwk_errno; /* errno of the first remote error */
+ void *trs_fwk_private; /* private framework stat */
+} lstcon_trans_stat_t;
+
+static inline int
+lstcon_rpc_stat_total(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_rpc_stat[0] : stat->trs_rpc_stat[0];
+}
+
+static inline int
+lstcon_rpc_stat_success(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_rpc_stat[1] : stat->trs_rpc_stat[1];
+}
+
+static inline int
+lstcon_rpc_stat_failure(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_rpc_stat[2] : stat->trs_rpc_stat[2];
+}
+
+static inline int
+lstcon_sesop_stat_success(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
+}
+
+static inline int
+lstcon_sesop_stat_failure(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
+}
+
+static inline int
+lstcon_sesqry_stat_active(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
+}
+
+static inline int
+lstcon_sesqry_stat_busy(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
+}
+
+static inline int
+lstcon_sesqry_stat_unknown(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
+}
+
+static inline int
+lstcon_tsbop_stat_success(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
+}
+
+static inline int
+lstcon_tsbop_stat_failure(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
+}
+
+static inline int
+lstcon_tsbqry_stat_idle(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
+}
+
+static inline int
+lstcon_tsbqry_stat_run(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
+}
+
+static inline int
+lstcon_tsbqry_stat_failure(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
+}
+
+static inline int
+lstcon_statqry_stat_success(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
+}
+
+static inline int
+lstcon_statqry_stat_failure(lstcon_trans_stat_t *stat, int inc)
+{
+ return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
+}
+
+/* create a session */
+typedef struct {
+ int lstio_ses_key; /* IN: local key */
+ int lstio_ses_timeout; /* IN: session timeout */
+ int lstio_ses_force; /* IN: force create ? */
+ /** IN: session features */
+ unsigned lstio_ses_feats;
+ lst_sid_t *lstio_ses_idp; /* OUT: session id */
+ int lstio_ses_nmlen; /* IN: name length */
+ char *lstio_ses_namep; /* IN: session name */
+} lstio_session_new_args_t;
+
+/* query current session */
+typedef struct {
+ lst_sid_t *lstio_ses_idp; /* OUT: session id */
+ int *lstio_ses_keyp; /* OUT: local key */
+ /** OUT: session features */
+ unsigned *lstio_ses_featp;
+ lstcon_ndlist_ent_t *lstio_ses_ndinfo; /* OUT: */
+ int lstio_ses_nmlen; /* IN: name length */
+ char *lstio_ses_namep; /* OUT: session name */
+} lstio_session_info_args_t;
+
+/* delete a session */
+typedef struct {
+ int lstio_ses_key; /* IN: session key */
+} lstio_session_end_args_t;
+
+#define LST_OPC_SESSION 1
+#define LST_OPC_GROUP 2
+#define LST_OPC_NODES 3
+#define LST_OPC_BATCHCLI 4
+#define LST_OPC_BATCHSRV 5
+
+typedef struct {
+ int lstio_dbg_key; /* IN: session key */
+ int lstio_dbg_type; /* IN: debug sessin|batch|group|nodes list */
+ int lstio_dbg_flags; /* IN: reserved debug flags */
+ int lstio_dbg_timeout; /* IN: timeout of debug */
+
+ int lstio_dbg_nmlen; /* IN: len of name */
+ char *lstio_dbg_namep; /* IN: name of group|batch */
+ int lstio_dbg_count; /* IN: # of test nodes to debug */
+ lnet_process_id_t *lstio_dbg_idsp; /* IN: id of test nodes */
+ struct list_head *lstio_dbg_resultp; /* OUT: list head of result buffer */
+} lstio_debug_args_t;
+
+typedef struct {
+ int lstio_grp_key; /* IN: session key */
+ int lstio_grp_nmlen; /* IN: name length */
+ char *lstio_grp_namep; /* IN: group name */
+} lstio_group_add_args_t;
+
+typedef struct {
+ int lstio_grp_key; /* IN: session key */
+ int lstio_grp_nmlen; /* IN: name length */
+ char *lstio_grp_namep; /* IN: group name */
+} lstio_group_del_args_t;
+
+#define LST_GROUP_CLEAN 1 /* remove inactive nodes in the group */
+#define LST_GROUP_REFRESH 2 /* refresh inactive nodes in the group */
+#define LST_GROUP_RMND 3 /* delete nodes from the group */
+
+typedef struct {
+ int lstio_grp_key; /* IN: session key */
+ int lstio_grp_opc; /* IN: OPC */
+ int lstio_grp_args; /* IN: arguments */
+ int lstio_grp_nmlen; /* IN: name length */
+ char *lstio_grp_namep; /* IN: group name */
+ int lstio_grp_count; /* IN: # of nodes id */
+ lnet_process_id_t *lstio_grp_idsp; /* IN: array of nodes */
+ struct list_head *lstio_grp_resultp; /* OUT: list head of result buffer */
+} lstio_group_update_args_t;
+
+typedef struct {
+ int lstio_grp_key; /* IN: session key */
+ int lstio_grp_nmlen; /* IN: name length */
+ char *lstio_grp_namep; /* IN: group name */
+ int lstio_grp_count; /* IN: # of nodes */
+ /** OUT: session features */
+ unsigned *lstio_grp_featp;
+ lnet_process_id_t *lstio_grp_idsp; /* IN: nodes */
+ struct list_head *lstio_grp_resultp; /* OUT: list head of result buffer */
+} lstio_group_nodes_args_t;
+
+typedef struct {
+ int lstio_grp_key; /* IN: session key */
+ int lstio_grp_idx; /* IN: group idx */
+ int lstio_grp_nmlen; /* IN: name len */
+ char *lstio_grp_namep; /* OUT: name */
+} lstio_group_list_args_t;
+
+typedef struct {
+ int lstio_grp_key; /* IN: session key */
+ int lstio_grp_nmlen; /* IN: name len */
+ char *lstio_grp_namep; /* IN: name */
+ lstcon_ndlist_ent_t *lstio_grp_entp; /* OUT: description of group */
+
+ int *lstio_grp_idxp; /* IN/OUT: node index */
+ int *lstio_grp_ndentp; /* IN/OUT: # of nodent */
+ lstcon_node_ent_t *lstio_grp_dentsp; /* OUT: nodent array */
+} lstio_group_info_args_t;
+
+#define LST_DEFAULT_BATCH "batch" /* default batch name */
+
+typedef struct {
+ int lstio_bat_key; /* IN: session key */
+ int lstio_bat_nmlen; /* IN: name length */
+ char *lstio_bat_namep; /* IN: batch name */
+} lstio_batch_add_args_t;
+
+typedef struct {
+ int lstio_bat_key; /* IN: session key */
+ int lstio_bat_nmlen; /* IN: name length */
+ char *lstio_bat_namep; /* IN: batch name */
+} lstio_batch_del_args_t;
+
+typedef struct {
+ int lstio_bat_key; /* IN: session key */
+ int lstio_bat_timeout; /* IN: timeout for the batch */
+ int lstio_bat_nmlen; /* IN: name length */
+ char *lstio_bat_namep; /* IN: batch name */
+ struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */
+} lstio_batch_run_args_t;
+
+typedef struct {
+ int lstio_bat_key; /* IN: session key */
+ int lstio_bat_force; /* IN: abort unfinished test RPC */
+ int lstio_bat_nmlen; /* IN: name length */
+ char *lstio_bat_namep; /* IN: batch name */
+ struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */
+} lstio_batch_stop_args_t;
+
+typedef struct {
+ int lstio_bat_key; /* IN: session key */
+ int lstio_bat_testidx; /* IN: test index */
+ int lstio_bat_client; /* IN: is test client? */
+ int lstio_bat_timeout; /* IN: timeout for waiting */
+ int lstio_bat_nmlen; /* IN: name length */
+ char *lstio_bat_namep; /* IN: batch name */
+ struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */
+} lstio_batch_query_args_t;
+
+typedef struct {
+ int lstio_bat_key; /* IN: session key */
+ int lstio_bat_idx; /* IN: index */
+ int lstio_bat_nmlen; /* IN: name length */
+ char *lstio_bat_namep; /* IN: batch name */
+} lstio_batch_list_args_t;
+
+typedef struct {
+ int lstio_bat_key; /* IN: session key */
+ int lstio_bat_nmlen; /* IN: name length */
+ char *lstio_bat_namep; /* IN: name */
+ int lstio_bat_server; /* IN: query server or not */
+ int lstio_bat_testidx; /* IN: test index */
+ lstcon_test_batch_ent_t *lstio_bat_entp; /* OUT: batch ent */
+
+ int *lstio_bat_idxp; /* IN/OUT: index of node */
+ int *lstio_bat_ndentp; /* IN/OUT: # of nodent */
+ lstcon_node_ent_t *lstio_bat_dentsp; /* array of nodent */
+} lstio_batch_info_args_t;
+
+/* add stat in session */
+typedef struct {
+ int lstio_sta_key; /* IN: session key */
+ int lstio_sta_timeout; /* IN: timeout for stat requst */
+ int lstio_sta_nmlen; /* IN: group name length */
+ char *lstio_sta_namep; /* IN: group name */
+ int lstio_sta_count; /* IN: # of pid */
+ lnet_process_id_t *lstio_sta_idsp; /* IN: pid */
+ struct list_head *lstio_sta_resultp; /* OUT: list head of result buffer */
+} lstio_stat_args_t;
+
+typedef enum {
+ LST_TEST_BULK = 1,
+ LST_TEST_PING = 2
+} lst_test_type_t;
+
+/* create a test in a batch */
+#define LST_MAX_CONCUR 1024 /* Max concurrency of test */
+
+typedef struct {
+ int lstio_tes_key; /* IN: session key */
+ int lstio_tes_bat_nmlen; /* IN: batch name len */
+ char *lstio_tes_bat_name; /* IN: batch name */
+ int lstio_tes_type; /* IN: test type */
+ int lstio_tes_oneside; /* IN: one sided test */
+ int lstio_tes_loop; /* IN: loop count */
+ int lstio_tes_concur; /* IN: concurrency */
+
+ int lstio_tes_dist; /* IN: node distribution in destination groups */
+ int lstio_tes_span; /* IN: node span in destination groups */
+ int lstio_tes_sgrp_nmlen; /* IN: source group name length */
+ char *lstio_tes_sgrp_name; /* IN: group name */
+ int lstio_tes_dgrp_nmlen; /* IN: destination group name length */
+ char *lstio_tes_dgrp_name; /* IN: group name */
+
+ int lstio_tes_param_len; /* IN: param buffer len */
+ void *lstio_tes_param; /* IN: parameter for specified test:
+ lstio_bulk_param_t,
+ lstio_ping_param_t,
+ ... more */
+ int *lstio_tes_retp; /* OUT: private returned value */
+ struct list_head *lstio_tes_resultp; /* OUT: list head of result buffer */
+} lstio_test_args_t;
+
+typedef enum {
+ LST_BRW_READ = 1,
+ LST_BRW_WRITE = 2
+} lst_brw_type_t;
+
+typedef enum {
+ LST_BRW_CHECK_NONE = 1,
+ LST_BRW_CHECK_SIMPLE = 2,
+ LST_BRW_CHECK_FULL = 3
+} lst_brw_flags_t;
+
+typedef struct {
+ int blk_opc; /* bulk operation code */
+ int blk_size; /* size (bytes) */
+ int blk_time; /* time of running the test*/
+ int blk_flags; /* reserved flags */
+} lst_test_bulk_param_t;
+
+typedef struct {
+ int png_size; /* size of ping message */
+ int png_time; /* time */
+ int png_loop; /* loop */
+ int png_flags; /* reserved flags */
+} lst_test_ping_param_t;
+
+/* more tests */
+typedef struct {
+ __u32 errors;
+ __u32 rpcs_sent;
+ __u32 rpcs_rcvd;
+ __u32 rpcs_dropped;
+ __u32 rpcs_expired;
+ __u64 bulk_get;
+ __u64 bulk_put;
+} WIRE_ATTR srpc_counters_t;
+
+typedef struct {
+ /** milliseconds since current session started */
+ __u32 running_ms;
+ __u32 active_batches;
+ __u32 zombie_sessions;
+ __u32 brw_errors;
+ __u32 ping_errors;
+} WIRE_ATTR sfw_counters_t;
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/ptllnd.h b/drivers/staging/lustre/include/linux/lnet/ptllnd.h
new file mode 100644
index 000000000000..fc1ce8ed1f8b
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/ptllnd.h
@@ -0,0 +1,94 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/include/lnet/ptllnd.h
+ *
+ * Author: PJ Kirner <pjkirner@clusterfs.com>
+ */
+
+/*
+ * The PTLLND was designed to support Portals with
+ * Lustre and non-lustre UNLINK semantics.
+ * However for now the two targets are Cray Portals
+ * on the XT3 and Lustre Portals (for testing) both
+ * have Lustre UNLINK semantics, so this is defined
+ * by default.
+ */
+#define LUSTRE_PORTALS_UNLINK_SEMANTICS
+
+
+#ifdef _USING_LUSTRE_PORTALS_
+
+/* NIDs are 64-bits on Lustre Portals */
+#define FMT_NID LPU64
+#define FMT_PID "%d"
+
+/* When using Lustre Portals Lustre completion semantics are imlicit*/
+#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0
+
+#else /* _USING_CRAY_PORTALS_ */
+
+/* NIDs are integers on Cray Portals */
+#define FMT_NID "%u"
+#define FMT_PID "%d"
+
+/* When using Cray Portals this is defined in the Cray Portals Header*/
+/*#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS */
+
+/* Can compare handles directly on Cray Portals */
+#define PtlHandleIsEqual(a,b) ((a) == (b))
+
+/* Diffrent error types on Cray Portals*/
+#define ptl_err_t ptl_ni_fail_t
+
+/*
+ * The Cray Portals has no maximum number of IOVs. The
+ * maximum is limited only by memory and size of the
+ * int parameters (2^31-1).
+ * Lustre only really require that the underyling
+ * implemenation to support at least LNET_MAX_IOV,
+ * so for Cray portals we can safely just use that
+ * value here.
+ *
+ */
+#define PTL_MD_MAX_IOV LNET_MAX_IOV
+
+#endif
+
+#define FMT_PTLID "ptlid:"FMT_PID"-"FMT_NID
+
+/* Align incoming small request messages to an 8 byte boundary if this is
+ * supported to avoid alignment issues on some architectures */
+#ifndef PTL_MD_LOCAL_ALIGN8
+# define PTL_MD_LOCAL_ALIGN8 0
+#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/ptllnd_wire.h b/drivers/staging/lustre/include/linux/lnet/ptllnd_wire.h
new file mode 100644
index 000000000000..7d12b3a23a96
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/ptllnd_wire.h
@@ -0,0 +1,124 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/include/lnet/ptllnd_wire.h
+ *
+ * Author: PJ Kirner <pjkirner@clusterfs.com>
+ */
+
+/* Minimum buffer size that any peer will post to receive ptllnd messages */
+#define PTLLND_MIN_BUFFER_SIZE 256
+
+/************************************************************************
+ * Tunable defaults that {u,k}lnds/ptllnd should have in common.
+ */
+
+#define PTLLND_PORTAL 9 /* The same portal PTLPRC used when talking to cray portals */
+#define PTLLND_PID 9 /* The Portals PID */
+#define PTLLND_PEERCREDITS 8 /* concurrent sends to 1 peer */
+
+/* Default buffer size for kernel ptllnds (guaranteed eager) */
+#define PTLLND_MAX_KLND_MSG_SIZE 512
+
+/* Default buffer size for catamount ptllnds (not guaranteed eager) - large
+ * enough to avoid RDMA for anything sent while control is not in liblustre */
+#define PTLLND_MAX_ULND_MSG_SIZE 512
+
+
+/************************************************************************
+ * Portals LND Wire message format.
+ * These are sent in sender's byte order (i.e. receiver flips).
+ */
+
+#define PTL_RESERVED_MATCHBITS 0x100 /* below this value is reserved
+ * above is for bulk data transfer */
+#define LNET_MSG_MATCHBITS 0 /* the value for the message channel */
+
+typedef struct
+{
+ lnet_hdr_t kptlim_hdr; /* portals header */
+ char kptlim_payload[0]; /* piggy-backed payload */
+} WIRE_ATTR kptl_immediate_msg_t;
+
+typedef struct
+{
+ lnet_hdr_t kptlrm_hdr; /* portals header */
+ __u64 kptlrm_matchbits; /* matchbits */
+} WIRE_ATTR kptl_rdma_msg_t;
+
+typedef struct
+{
+ __u64 kptlhm_matchbits; /* matchbits */
+ __u32 kptlhm_max_msg_size; /* max message size */
+} WIRE_ATTR kptl_hello_msg_t;
+
+typedef struct
+{
+ /* First 2 fields fixed FOR ALL TIME */
+ __u32 ptlm_magic; /* I'm a Portals LND message */
+ __u16 ptlm_version; /* this is my version number */
+ __u8 ptlm_type; /* the message type */
+ __u8 ptlm_credits; /* returned credits */
+ __u32 ptlm_nob; /* # bytes in whole message */
+ __u32 ptlm_cksum; /* checksum (0 == no checksum) */
+ __u64 ptlm_srcnid; /* sender's NID */
+ __u64 ptlm_srcstamp; /* sender's incarnation */
+ __u64 ptlm_dstnid; /* destination's NID */
+ __u64 ptlm_dststamp; /* destination's incarnation */
+ __u32 ptlm_srcpid; /* sender's PID */
+ __u32 ptlm_dstpid; /* destination's PID */
+
+ union {
+ kptl_immediate_msg_t immediate;
+ kptl_rdma_msg_t rdma;
+ kptl_hello_msg_t hello;
+ } WIRE_ATTR ptlm_u;
+
+} kptl_msg_t;
+
+/* kptl_msg_t::ptlm_credits is only a __u8 */
+#define PTLLND_MSG_MAX_CREDITS ((typeof(((kptl_msg_t*) 0)->ptlm_credits)) -1)
+
+#define PTLLND_MSG_MAGIC LNET_PROTO_PTL_MAGIC
+#define PTLLND_MSG_VERSION 0x04
+
+#define PTLLND_RDMA_OK 0x00
+#define PTLLND_RDMA_FAIL 0x01
+
+#define PTLLND_MSG_TYPE_INVALID 0x00
+#define PTLLND_MSG_TYPE_PUT 0x01
+#define PTLLND_MSG_TYPE_GET 0x02
+#define PTLLND_MSG_TYPE_IMMEDIATE 0x03 /* No bulk data xfer*/
+#define PTLLND_MSG_TYPE_NOOP 0x04
+#define PTLLND_MSG_TYPE_HELLO 0x05
+#define PTLLND_MSG_TYPE_NAK 0x06
diff --git a/drivers/staging/lustre/include/linux/lnet/socklnd.h b/drivers/staging/lustre/include/linux/lnet/socklnd.h
new file mode 100644
index 000000000000..bacc74933a39
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/socklnd.h
@@ -0,0 +1,103 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/include/lnet/socklnd.h
+ *
+ * #defines shared between socknal implementation and utilities
+ */
+#ifndef __LNET_LNET_SOCKLND_H__
+#define __LNET_LNET_SOCKLND_H__
+
+#include <linux/lnet/types.h>
+#include <linux/lnet/lib-types.h>
+
+#define SOCKLND_CONN_NONE (-1)
+#define SOCKLND_CONN_ANY 0
+#define SOCKLND_CONN_CONTROL 1
+#define SOCKLND_CONN_BULK_IN 2
+#define SOCKLND_CONN_BULK_OUT 3
+#define SOCKLND_CONN_NTYPES 4
+
+#define SOCKLND_CONN_ACK SOCKLND_CONN_BULK_IN
+
+typedef struct {
+ __u32 kshm_magic; /* magic number of socklnd message */
+ __u32 kshm_version; /* version of socklnd message */
+ lnet_nid_t kshm_src_nid; /* sender's nid */
+ lnet_nid_t kshm_dst_nid; /* destination nid */
+ lnet_pid_t kshm_src_pid; /* sender's pid */
+ lnet_pid_t kshm_dst_pid; /* destination pid */
+ __u64 kshm_src_incarnation; /* sender's incarnation */
+ __u64 kshm_dst_incarnation; /* destination's incarnation */
+ __u32 kshm_ctype; /* connection type */
+ __u32 kshm_nips; /* # IP addrs */
+ __u32 kshm_ips[0]; /* IP addrs */
+} WIRE_ATTR ksock_hello_msg_t;
+
+typedef struct {
+ lnet_hdr_t ksnm_hdr; /* lnet hdr */
+
+ /*
+ * ksnm_payload is removed because of winnt compiler's limitation:
+ * zero-sized array can only be placed at the tail of [nested]
+ * structure definitions. lnet payload will be stored just after
+ * the body of structure ksock_lnet_msg_t
+ */
+} WIRE_ATTR ksock_lnet_msg_t;
+
+typedef struct {
+ __u32 ksm_type; /* type of socklnd message */
+ __u32 ksm_csum; /* checksum if != 0 */
+ __u64 ksm_zc_cookies[2]; /* Zero-Copy request/ACK cookie */
+ union {
+ ksock_lnet_msg_t lnetmsg; /* lnet message, it's empty if it's NOOP */
+ } WIRE_ATTR ksm_u;
+} WIRE_ATTR ksock_msg_t;
+
+static inline void
+socklnd_init_msg(ksock_msg_t *msg, int type)
+{
+ msg->ksm_csum = 0;
+ msg->ksm_type = type;
+ msg->ksm_zc_cookies[0] = msg->ksm_zc_cookies[1] = 0;
+}
+
+#define KSOCK_MSG_NOOP 0xc0 /* ksm_u empty */
+#define KSOCK_MSG_LNET 0xc1 /* lnet msg */
+
+/* We need to know this number to parse hello msg from ksocklnd in
+ * other LND (usocklnd, for example) */
+#define KSOCK_PROTO_V2 2
+#define KSOCK_PROTO_V3 3
+
+#endif
diff --git a/drivers/staging/lustre/include/linux/lnet/types.h b/drivers/staging/lustre/include/linux/lnet/types.h
new file mode 100644
index 000000000000..4f63b7acb9d7
--- /dev/null
+++ b/drivers/staging/lustre/include/linux/lnet/types.h
@@ -0,0 +1,503 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LNET_TYPES_H__
+#define __LNET_TYPES_H__
+
+/** \addtogroup lnet
+ * @{ */
+
+#include <linux/libcfs/libcfs.h>
+
+/** \addtogroup lnet_addr
+ * @{ */
+
+/** Portal reserved for LNet's own use.
+ * \see lustre/include/lustre/lustre_idl.h for Lustre portal assignments.
+ */
+#define LNET_RESERVED_PORTAL 0
+
+/**
+ * Address of an end-point in an LNet network.
+ *
+ * A node can have multiple end-points and hence multiple addresses.
+ * An LNet network can be a simple network (e.g. tcp0) or a network of
+ * LNet networks connected by LNet routers. Therefore an end-point address
+ * has two parts: network ID, and address within a network.
+ *
+ * \see LNET_NIDNET, LNET_NIDADDR, and LNET_MKNID.
+ */
+typedef __u64 lnet_nid_t;
+/**
+ * ID of a process in a node. Shortened as PID to distinguish from
+ * lnet_process_id_t, the global process ID.
+ */
+typedef __u32 lnet_pid_t;
+
+/** wildcard NID that matches any end-point address */
+#define LNET_NID_ANY ((lnet_nid_t) -1)
+/** wildcard PID that matches any lnet_pid_t */
+#define LNET_PID_ANY ((lnet_pid_t) -1)
+
+#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */
+#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */
+
+#define LNET_TIME_FOREVER (-1)
+
+/**
+ * Objects maintained by the LNet are accessed through handles. Handle types
+ * have names of the form lnet_handle_xx_t, where xx is one of the two letter
+ * object type codes ('eq' for event queue, 'md' for memory descriptor, and
+ * 'me' for match entry).
+ * Each type of object is given a unique handle type to enhance type checking.
+ * The type lnet_handle_any_t can be used when a generic handle is needed.
+ * Every handle value can be converted into a value of type lnet_handle_any_t
+ * without loss of information.
+ */
+typedef struct {
+ __u64 cookie;
+} lnet_handle_any_t;
+
+typedef lnet_handle_any_t lnet_handle_eq_t;
+typedef lnet_handle_any_t lnet_handle_md_t;
+typedef lnet_handle_any_t lnet_handle_me_t;
+
+#define LNET_WIRE_HANDLE_COOKIE_NONE (-1)
+
+/**
+ * Invalidate handle \a h.
+ */
+static inline void LNetInvalidateHandle(lnet_handle_any_t *h)
+{
+ h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE;
+}
+
+/**
+ * Compare handles \a h1 and \a h2.
+ *
+ * \return 1 if handles are equal, 0 if otherwise.
+ */
+static inline int LNetHandleIsEqual (lnet_handle_any_t h1, lnet_handle_any_t h2)
+{
+ return (h1.cookie == h2.cookie);
+}
+
+/**
+ * Check whether handle \a h is invalid.
+ *
+ * \return 1 if handle is invalid, 0 if valid.
+ */
+static inline int LNetHandleIsInvalid(lnet_handle_any_t h)
+{
+ return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie);
+}
+
+/**
+ * Global process ID.
+ */
+typedef struct {
+ /** node id */
+ lnet_nid_t nid;
+ /** process id */
+ lnet_pid_t pid;
+} lnet_process_id_t;
+/** @} lnet_addr */
+
+/** \addtogroup lnet_me
+ * @{ */
+
+/**
+ * Specifies whether the match entry or memory descriptor should be unlinked
+ * automatically (LNET_UNLINK) or not (LNET_RETAIN).
+ */
+typedef enum {
+ LNET_RETAIN = 0,
+ LNET_UNLINK
+} lnet_unlink_t;
+
+/**
+ * Values of the type lnet_ins_pos_t are used to control where a new match
+ * entry is inserted. The value LNET_INS_BEFORE is used to insert the new
+ * entry before the current entry or before the head of the list. The value
+ * LNET_INS_AFTER is used to insert the new entry after the current entry
+ * or after the last item in the list.
+ */
+typedef enum {
+ /** insert ME before current position or head of the list */
+ LNET_INS_BEFORE,
+ /** insert ME after current position or tail of the list */
+ LNET_INS_AFTER,
+ /** attach ME at tail of local CPU partition ME list */
+ LNET_INS_LOCAL
+} lnet_ins_pos_t;
+
+/** @} lnet_me */
+
+/** \addtogroup lnet_md
+ * @{ */
+
+/**
+ * Defines the visible parts of a memory descriptor. Values of this type
+ * are used to initialize memory descriptors.
+ */
+typedef struct {
+ /**
+ * Specify the memory region associated with the memory descriptor.
+ * If the options field has:
+ * - LNET_MD_KIOV bit set: The start field points to the starting
+ * address of an array of lnet_kiov_t and the length field specifies
+ * the number of entries in the array. The length can't be bigger
+ * than LNET_MAX_IOV. The lnet_kiov_t is used to describe page-based
+ * fragments that are not necessarily mapped in virtal memory.
+ * - LNET_MD_IOVEC bit set: The start field points to the starting
+ * address of an array of struct iovec and the length field specifies
+ * the number of entries in the array. The length can't be bigger
+ * than LNET_MAX_IOV. The struct iovec is used to describe fragments
+ * that have virtual addresses.
+ * - Otherwise: The memory region is contiguous. The start field
+ * specifies the starting address for the memory region and the
+ * length field specifies its length.
+ *
+ * When the memory region is fragmented, all fragments but the first
+ * one must start on page boundary, and all but the last must end on
+ * page boundary.
+ */
+ void *start;
+ unsigned int length;
+ /**
+ * Specifies the maximum number of operations that can be performed
+ * on the memory descriptor. An operation is any action that could
+ * possibly generate an event. In the usual case, the threshold value
+ * is decremented for each operation on the MD. When the threshold
+ * drops to zero, the MD becomes inactive and does not respond to
+ * operations. A threshold value of LNET_MD_THRESH_INF indicates that
+ * there is no bound on the number of operations that may be applied
+ * to a MD.
+ */
+ int threshold;
+ /**
+ * Specifies the largest incoming request that the memory descriptor
+ * should respond to. When the unused portion of a MD (length -
+ * local offset) falls below this value, the MD becomes inactive and
+ * does not respond to further operations. This value is only used
+ * if the LNET_MD_MAX_SIZE option is set.
+ */
+ int max_size;
+ /**
+ * Specifies the behavior of the memory descriptor. A bitwise OR
+ * of the following values can be used:
+ * - LNET_MD_OP_PUT: The LNet PUT operation is allowed on this MD.
+ * - LNET_MD_OP_GET: The LNet GET operation is allowed on this MD.
+ * - LNET_MD_MANAGE_REMOTE: The offset used in accessing the memory
+ * region is provided by the incoming request. By default, the
+ * offset is maintained locally. When maintained locally, the
+ * offset is incremented by the length of the request so that
+ * the next operation (PUT or GET) will access the next part of
+ * the memory region. Note that only one offset variable exists
+ * per memory descriptor. If both PUT and GET operations are
+ * performed on a memory descriptor, the offset is updated each time.
+ * - LNET_MD_TRUNCATE: The length provided in the incoming request can
+ * be reduced to match the memory available in the region (determined
+ * by subtracting the offset from the length of the memory region).
+ * By default, if the length in the incoming operation is greater
+ * than the amount of memory available, the operation is rejected.
+ * - LNET_MD_ACK_DISABLE: An acknowledgment should not be sent for
+ * incoming PUT operations, even if requested. By default,
+ * acknowledgments are sent for PUT operations that request an
+ * acknowledgment. Acknowledgments are never sent for GET operations.
+ * The data sent in the REPLY serves as an implicit acknowledgment.
+ * - LNET_MD_KIOV: The start and length fields specify an array of
+ * lnet_kiov_t.
+ * - LNET_MD_IOVEC: The start and length fields specify an array of
+ * struct iovec.
+ * - LNET_MD_MAX_SIZE: The max_size field is valid.
+ *
+ * Note:
+ * - LNET_MD_KIOV or LNET_MD_IOVEC allows for a scatter/gather
+ * capability for memory descriptors. They can't be both set.
+ * - When LNET_MD_MAX_SIZE is set, the total length of the memory
+ * region (i.e. sum of all fragment lengths) must not be less than
+ * \a max_size.
+ */
+ unsigned int options;
+ /**
+ * A user-specified value that is associated with the memory
+ * descriptor. The value does not need to be a pointer, but must fit
+ * in the space used by a pointer. This value is recorded in events
+ * associated with operations on this MD.
+ */
+ void *user_ptr;
+ /**
+ * A handle for the event queue used to log the operations performed on
+ * the memory region. If this argument is a NULL handle (i.e. nullified
+ * by LNetInvalidateHandle()), operations performed on this memory
+ * descriptor are not logged.
+ */
+ lnet_handle_eq_t eq_handle;
+} lnet_md_t;
+
+/* Max Transfer Unit (minimum supported everywhere).
+ * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks)
+ * these limits are system wide and not interface-local. */
+#define LNET_MTU_BITS 20
+#define LNET_MTU (1 << LNET_MTU_BITS)
+
+/** limit on the number of fragments in discontiguous MDs */
+#define LNET_MAX_IOV 256
+
+/* Max payload size */
+# define LNET_MAX_PAYLOAD CONFIG_LNET_MAX_PAYLOAD
+# if (LNET_MAX_PAYLOAD < LNET_MTU)
+# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb"
+# else
+# if (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV))
+/* PAGE_SIZE is a constant: check with cpp! */
+# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb"
+# endif
+# endif
+
+/**
+ * Options for the MD structure. See lnet_md_t::options.
+ */
+#define LNET_MD_OP_PUT (1 << 0)
+/** See lnet_md_t::options. */
+#define LNET_MD_OP_GET (1 << 1)
+/** See lnet_md_t::options. */
+#define LNET_MD_MANAGE_REMOTE (1 << 2)
+/* unused (1 << 3) */
+/** See lnet_md_t::options. */
+#define LNET_MD_TRUNCATE (1 << 4)
+/** See lnet_md_t::options. */
+#define LNET_MD_ACK_DISABLE (1 << 5)
+/** See lnet_md_t::options. */
+#define LNET_MD_IOVEC (1 << 6)
+/** See lnet_md_t::options. */
+#define LNET_MD_MAX_SIZE (1 << 7)
+/** See lnet_md_t::options. */
+#define LNET_MD_KIOV (1 << 8)
+
+/* For compatibility with Cray Portals */
+#define LNET_MD_PHYS 0
+
+/** Infinite threshold on MD operations. See lnet_md_t::threshold */
+#define LNET_MD_THRESH_INF (-1)
+
+/* NB lustre portals uses struct iovec internally! */
+typedef struct iovec lnet_md_iovec_t;
+
+/**
+ * A page-based fragment of a MD.
+ */
+typedef struct {
+ /** Pointer to the page where the fragment resides */
+ struct page *kiov_page;
+ /** Length in bytes of the fragment */
+ unsigned int kiov_len;
+ /**
+ * Starting offset of the fragment within the page. Note that the
+ * end of the fragment must not pass the end of the page; i.e.,
+ * kiov_len + kiov_offset <= PAGE_CACHE_SIZE.
+ */
+ unsigned int kiov_offset;
+} lnet_kiov_t;
+/** @} lnet_md */
+
+/** \addtogroup lnet_eq
+ * @{ */
+
+/**
+ * Six types of events can be logged in an event queue.
+ */
+typedef enum {
+ /** An incoming GET operation has completed on the MD. */
+ LNET_EVENT_GET = 1,
+ /**
+ * An incoming PUT operation has completed on the MD. The
+ * underlying layers will not alter the memory (on behalf of this
+ * operation) once this event has been logged.
+ */
+ LNET_EVENT_PUT,
+ /**
+ * A REPLY operation has completed. This event is logged after the
+ * data (if any) from the REPLY has been written into the MD.
+ */
+ LNET_EVENT_REPLY,
+ /** An acknowledgment has been received. */
+ LNET_EVENT_ACK,
+ /**
+ * An outgoing send (PUT or GET) operation has completed. This event
+ * is logged after the entire buffer has been sent and it is safe for
+ * the caller to reuse the buffer.
+ *
+ * Note:
+ * - The LNET_EVENT_SEND doesn't guarantee message delivery. It can
+ * happen even when the message has not yet been put out on wire.
+ * - It's unsafe to assume that in an outgoing GET operation
+ * the LNET_EVENT_SEND event would happen before the
+ * LNET_EVENT_REPLY event. The same holds for LNET_EVENT_SEND and
+ * LNET_EVENT_ACK events in an outgoing PUT operation.
+ */
+ LNET_EVENT_SEND,
+ /**
+ * A MD has been unlinked. Note that LNetMDUnlink() does not
+ * necessarily trigger an LNET_EVENT_UNLINK event.
+ * \see LNetMDUnlink
+ */
+ LNET_EVENT_UNLINK,
+} lnet_event_kind_t;
+
+#define LNET_SEQ_BASETYPE long
+typedef unsigned LNET_SEQ_BASETYPE lnet_seq_t;
+#define LNET_SEQ_GT(a,b) (((signed LNET_SEQ_BASETYPE)((a) - (b))) > 0)
+
+/* XXX
+ * cygwin need the pragma line, not clear if it's needed in other places.
+ * checking!!!
+ */
+#ifdef __CYGWIN__
+#pragma pack(push, 4)
+#endif
+
+/**
+ * Information about an event on a MD.
+ */
+typedef struct {
+ /** The identifier (nid, pid) of the target. */
+ lnet_process_id_t target;
+ /** The identifier (nid, pid) of the initiator. */
+ lnet_process_id_t initiator;
+ /**
+ * The NID of the immediate sender. If the request has been forwarded
+ * by routers, this is the NID of the last hop; otherwise it's the
+ * same as the initiator.
+ */
+ lnet_nid_t sender;
+ /** Indicates the type of the event. */
+ lnet_event_kind_t type;
+ /** The portal table index specified in the request */
+ unsigned int pt_index;
+ /** A copy of the match bits specified in the request. */
+ __u64 match_bits;
+ /** The length (in bytes) specified in the request. */
+ unsigned int rlength;
+ /**
+ * The length (in bytes) of the data that was manipulated by the
+ * operation. For truncated operations, the manipulated length will be
+ * the number of bytes specified by the MD (possibly with an offset,
+ * see lnet_md_t). For all other operations, the manipulated length
+ * will be the length of the requested operation, i.e. rlength.
+ */
+ unsigned int mlength;
+ /**
+ * The handle to the MD associated with the event. The handle may be
+ * invalid if the MD has been unlinked.
+ */
+ lnet_handle_md_t md_handle;
+ /**
+ * A snapshot of the state of the MD immediately after the event has
+ * been processed. In particular, the threshold field in md will
+ * reflect the value of the threshold after the operation occurred.
+ */
+ lnet_md_t md;
+ /**
+ * 64 bits of out-of-band user data. Only valid for LNET_EVENT_PUT.
+ * \see LNetPut
+ */
+ __u64 hdr_data;
+ /**
+ * Indicates the completion status of the operation. It's 0 for
+ * successful operations, otherwise it's an error code.
+ */
+ int status;
+ /**
+ * Indicates whether the MD has been unlinked. Note that:
+ * - An event with unlinked set is the last event on the MD.
+ * - This field is also set for an explicit LNET_EVENT_UNLINK event.
+ * \see LNetMDUnlink
+ */
+ int unlinked;
+ /**
+ * The displacement (in bytes) into the memory region that the
+ * operation used. The offset can be determined by the operation for
+ * a remote managed MD or by the local MD.
+ * \see lnet_md_t::options
+ */
+ unsigned int offset;
+ /**
+ * The sequence number for this event. Sequence numbers are unique
+ * to each event.
+ */
+ volatile lnet_seq_t sequence;
+} lnet_event_t;
+#ifdef __CYGWIN__
+#pragma pop
+#endif
+
+/**
+ * Event queue handler function type.
+ *
+ * The EQ handler runs for each event that is deposited into the EQ. The
+ * handler is supplied with a pointer to the event that triggered the
+ * handler invocation.
+ *
+ * The handler must not block, must be reentrant, and must not call any LNet
+ * API functions. It should return as quickly as possible.
+ */
+typedef void (*lnet_eq_handler_t)(lnet_event_t *event);
+#define LNET_EQ_HANDLER_NONE NULL
+/** @} lnet_eq */
+
+/** \addtogroup lnet_data
+ * @{ */
+
+/**
+ * Specify whether an acknowledgment should be sent by target when the PUT
+ * operation completes (i.e., when the data has been written to a MD of the
+ * target process).
+ *
+ * \see lnet_md_t::options for the discussion on LNET_MD_ACK_DISABLE by which
+ * acknowledgments can be disabled for a MD.
+ */
+typedef enum {
+ /** Request an acknowledgment */
+ LNET_ACK_REQ,
+ /** Request that no acknowledgment should be generated. */
+ LNET_NOACK_REQ
+} lnet_ack_req_t;
+/** @} lnet_data */
+
+/** @} lnet */
+#endif
diff --git a/drivers/staging/lustre/lnet/Kconfig b/drivers/staging/lustre/lnet/Kconfig
new file mode 100644
index 000000000000..00850eeb6a8c
--- /dev/null
+++ b/drivers/staging/lustre/lnet/Kconfig
@@ -0,0 +1,40 @@
+config LNET
+ tristate "Lustre networking subsystem"
+ depends on LUSTRE_FS
+
+config LNET_MAX_PAYLOAD
+ int "Lustre lnet max transfer payload (default 2MB)"
+ depends on LUSTRE_FS
+ default "1048576"
+ help
+ This option defines the maximum size of payload in bytes that lnet
+ can put into its transport.
+
+ If unsure, use default.
+
+config LNET_SELFTEST
+ tristate "Lustre networking self testing"
+ depends on LNET
+ help
+ Choose Y here if you want to do lnet self testing. To compile this
+ as a module, choose M here: the module will be called lnet_selftest.
+
+ To compile this as a kernel modules, choose M here and it will be
+ called lnet_selftest.
+
+ If unsure, say N.
+
+ See also http://wiki.lustre.org/
+
+config LNET_XPRT_IB
+ tristate "LNET infiniband support"
+ depends on LNET && INFINIBAND && INFINIBAND_ADDR_TRANS
+ default LNET && INFINIBAND
+ help
+ This option allows the LNET users to use infiniband as an
+ RDMA-enabled transport.
+
+ To compile this as a kernel module, choose M here and it will be
+ called ko2iblnd.
+
+ If unsure, say N.
diff --git a/drivers/staging/lustre/lnet/Makefile b/drivers/staging/lustre/lnet/Makefile
new file mode 100644
index 000000000000..374212b1555a
--- /dev/null
+++ b/drivers/staging/lustre/lnet/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_LNET) := klnds/ lnet/ selftest/
diff --git a/drivers/staging/lustre/lnet/klnds/Makefile b/drivers/staging/lustre/lnet/klnds/Makefile
new file mode 100644
index 000000000000..c23e4f67f837
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_LNET) += o2iblnd/ socklnd/
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile b/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile
new file mode 100644
index 000000000000..71b7d8418357
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_LNET_XPRT_IB) += ko2iblnd.o
+ko2iblnd-y := o2iblnd.o o2iblnd_cb.o o2iblnd_modparams.o
+
+
+ccflags-y := -I$(src)/../../include
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
new file mode 100644
index 000000000000..29a97943e4c7
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -0,0 +1,3259 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/klnds/o2iblnd/o2iblnd.c
+ *
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ */
+
+#include "o2iblnd.h"
+#include <asm/div64.h>
+
+lnd_t the_o2iblnd = {
+ .lnd_type = O2IBLND,
+ .lnd_startup = kiblnd_startup,
+ .lnd_shutdown = kiblnd_shutdown,
+ .lnd_ctl = kiblnd_ctl,
+ .lnd_query = kiblnd_query,
+ .lnd_send = kiblnd_send,
+ .lnd_recv = kiblnd_recv,
+};
+
+kib_data_t kiblnd_data;
+
+__u32
+kiblnd_cksum (void *ptr, int nob)
+{
+ char *c = ptr;
+ __u32 sum = 0;
+
+ while (nob-- > 0)
+ sum = ((sum << 1) | (sum >> 31)) + *c++;
+
+ /* ensure I don't return 0 (== no checksum) */
+ return (sum == 0) ? 1 : sum;
+}
+
+static char *
+kiblnd_msgtype2str(int type)
+{
+ switch (type) {
+ case IBLND_MSG_CONNREQ:
+ return "CONNREQ";
+
+ case IBLND_MSG_CONNACK:
+ return "CONNACK";
+
+ case IBLND_MSG_NOOP:
+ return "NOOP";
+
+ case IBLND_MSG_IMMEDIATE:
+ return "IMMEDIATE";
+
+ case IBLND_MSG_PUT_REQ:
+ return "PUT_REQ";
+
+ case IBLND_MSG_PUT_NAK:
+ return "PUT_NAK";
+
+ case IBLND_MSG_PUT_ACK:
+ return "PUT_ACK";
+
+ case IBLND_MSG_PUT_DONE:
+ return "PUT_DONE";
+
+ case IBLND_MSG_GET_REQ:
+ return "GET_REQ";
+
+ case IBLND_MSG_GET_DONE:
+ return "GET_DONE";
+
+ default:
+ return "???";
+ }
+}
+
+static int
+kiblnd_msgtype2size(int type)
+{
+ const int hdr_size = offsetof(kib_msg_t, ibm_u);
+
+ switch (type) {
+ case IBLND_MSG_CONNREQ:
+ case IBLND_MSG_CONNACK:
+ return hdr_size + sizeof(kib_connparams_t);
+
+ case IBLND_MSG_NOOP:
+ return hdr_size;
+
+ case IBLND_MSG_IMMEDIATE:
+ return offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]);
+
+ case IBLND_MSG_PUT_REQ:
+ return hdr_size + sizeof(kib_putreq_msg_t);
+
+ case IBLND_MSG_PUT_ACK:
+ return hdr_size + sizeof(kib_putack_msg_t);
+
+ case IBLND_MSG_GET_REQ:
+ return hdr_size + sizeof(kib_get_msg_t);
+
+ case IBLND_MSG_PUT_NAK:
+ case IBLND_MSG_PUT_DONE:
+ case IBLND_MSG_GET_DONE:
+ return hdr_size + sizeof(kib_completion_msg_t);
+ default:
+ return -1;
+ }
+}
+
+static int
+kiblnd_unpack_rd(kib_msg_t *msg, int flip)
+{
+ kib_rdma_desc_t *rd;
+ int nob;
+ int n;
+ int i;
+
+ LASSERT (msg->ibm_type == IBLND_MSG_GET_REQ ||
+ msg->ibm_type == IBLND_MSG_PUT_ACK);
+
+ rd = msg->ibm_type == IBLND_MSG_GET_REQ ?
+ &msg->ibm_u.get.ibgm_rd :
+ &msg->ibm_u.putack.ibpam_rd;
+
+ if (flip) {
+ __swab32s(&rd->rd_key);
+ __swab32s(&rd->rd_nfrags);
+ }
+
+ n = rd->rd_nfrags;
+
+ if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) {
+ CERROR("Bad nfrags: %d, should be 0 < n <= %d\n",
+ n, IBLND_MAX_RDMA_FRAGS);
+ return 1;
+ }
+
+ nob = offsetof (kib_msg_t, ibm_u) +
+ kiblnd_rd_msg_size(rd, msg->ibm_type, n);
+
+ if (msg->ibm_nob < nob) {
+ CERROR("Short %s: %d(%d)\n",
+ kiblnd_msgtype2str(msg->ibm_type), msg->ibm_nob, nob);
+ return 1;
+ }
+
+ if (!flip)
+ return 0;
+
+ for (i = 0; i < n; i++) {
+ __swab32s(&rd->rd_frags[i].rf_nob);
+ __swab64s(&rd->rd_frags[i].rf_addr);
+ }
+
+ return 0;
+}
+
+void
+kiblnd_pack_msg (lnet_ni_t *ni, kib_msg_t *msg, int version,
+ int credits, lnet_nid_t dstnid, __u64 dststamp)
+{
+ kib_net_t *net = ni->ni_data;
+
+ /* CAVEAT EMPTOR! all message fields not set here should have been
+ * initialised previously. */
+ msg->ibm_magic = IBLND_MSG_MAGIC;
+ msg->ibm_version = version;
+ /* ibm_type */
+ msg->ibm_credits = credits;
+ /* ibm_nob */
+ msg->ibm_cksum = 0;
+ msg->ibm_srcnid = ni->ni_nid;
+ msg->ibm_srcstamp = net->ibn_incarnation;
+ msg->ibm_dstnid = dstnid;
+ msg->ibm_dststamp = dststamp;
+
+ if (*kiblnd_tunables.kib_cksum) {
+ /* NB ibm_cksum zero while computing cksum */
+ msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob);
+ }
+}
+
+int
+kiblnd_unpack_msg(kib_msg_t *msg, int nob)
+{
+ const int hdr_size = offsetof(kib_msg_t, ibm_u);
+ __u32 msg_cksum;
+ __u16 version;
+ int msg_nob;
+ int flip;
+
+ /* 6 bytes are enough to have received magic + version */
+ if (nob < 6) {
+ CERROR("Short message: %d\n", nob);
+ return -EPROTO;
+ }
+
+ if (msg->ibm_magic == IBLND_MSG_MAGIC) {
+ flip = 0;
+ } else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) {
+ flip = 1;
+ } else {
+ CERROR("Bad magic: %08x\n", msg->ibm_magic);
+ return -EPROTO;
+ }
+
+ version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
+ if (version != IBLND_MSG_VERSION &&
+ version != IBLND_MSG_VERSION_1) {
+ CERROR("Bad version: %x\n", version);
+ return -EPROTO;
+ }
+
+ if (nob < hdr_size) {
+ CERROR("Short message: %d\n", nob);
+ return -EPROTO;
+ }
+
+ msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
+ if (msg_nob > nob) {
+ CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
+ return -EPROTO;
+ }
+
+ /* checksum must be computed with ibm_cksum zero and BEFORE anything
+ * gets flipped */
+ msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
+ msg->ibm_cksum = 0;
+ if (msg_cksum != 0 &&
+ msg_cksum != kiblnd_cksum(msg, msg_nob)) {
+ CERROR("Bad checksum\n");
+ return -EPROTO;
+ }
+
+ msg->ibm_cksum = msg_cksum;
+
+ if (flip) {
+ /* leave magic unflipped as a clue to peer endianness */
+ msg->ibm_version = version;
+ CLASSERT (sizeof(msg->ibm_type) == 1);
+ CLASSERT (sizeof(msg->ibm_credits) == 1);
+ msg->ibm_nob = msg_nob;
+ __swab64s(&msg->ibm_srcnid);
+ __swab64s(&msg->ibm_srcstamp);
+ __swab64s(&msg->ibm_dstnid);
+ __swab64s(&msg->ibm_dststamp);
+ }
+
+ if (msg->ibm_srcnid == LNET_NID_ANY) {
+ CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
+ return -EPROTO;
+ }
+
+ if (msg_nob < kiblnd_msgtype2size(msg->ibm_type)) {
+ CERROR("Short %s: %d(%d)\n", kiblnd_msgtype2str(msg->ibm_type),
+ msg_nob, kiblnd_msgtype2size(msg->ibm_type));
+ return -EPROTO;
+ }
+
+ switch (msg->ibm_type) {
+ default:
+ CERROR("Unknown message type %x\n", msg->ibm_type);
+ return -EPROTO;
+
+ case IBLND_MSG_NOOP:
+ case IBLND_MSG_IMMEDIATE:
+ case IBLND_MSG_PUT_REQ:
+ break;
+
+ case IBLND_MSG_PUT_ACK:
+ case IBLND_MSG_GET_REQ:
+ if (kiblnd_unpack_rd(msg, flip))
+ return -EPROTO;
+ break;
+
+ case IBLND_MSG_PUT_NAK:
+ case IBLND_MSG_PUT_DONE:
+ case IBLND_MSG_GET_DONE:
+ if (flip)
+ __swab32s(&msg->ibm_u.completion.ibcm_status);
+ break;
+
+ case IBLND_MSG_CONNREQ:
+ case IBLND_MSG_CONNACK:
+ if (flip) {
+ __swab16s(&msg->ibm_u.connparams.ibcp_queue_depth);
+ __swab16s(&msg->ibm_u.connparams.ibcp_max_frags);
+ __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
+ }
+ break;
+ }
+ return 0;
+}
+
+int
+kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid)
+{
+ kib_peer_t *peer;
+ kib_net_t *net = ni->ni_data;
+ int cpt = lnet_cpt_of_nid(nid);
+ unsigned long flags;
+
+ LASSERT(net != NULL);
+ LASSERT(nid != LNET_NID_ANY);
+
+ LIBCFS_CPT_ALLOC(peer, lnet_cpt_table(), cpt, sizeof(*peer));
+ if (peer == NULL) {
+ CERROR("Cannot allocate peer\n");
+ return -ENOMEM;
+ }
+
+ memset(peer, 0, sizeof(*peer)); /* zero flags etc */
+
+ peer->ibp_ni = ni;
+ peer->ibp_nid = nid;
+ peer->ibp_error = 0;
+ peer->ibp_last_alive = 0;
+ atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */
+
+ INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */
+ INIT_LIST_HEAD(&peer->ibp_conns);
+ INIT_LIST_HEAD(&peer->ibp_tx_queue);
+
+ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ /* always called with a ref on ni, which prevents ni being shutdown */
+ LASSERT (net->ibn_shutdown == 0);
+
+ /* npeers only grows with the global lock held */
+ atomic_inc(&net->ibn_npeers);
+
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ *peerp = peer;
+ return 0;
+}
+
+void
+kiblnd_destroy_peer (kib_peer_t *peer)
+{
+ kib_net_t *net = peer->ibp_ni->ni_data;
+
+ LASSERT (net != NULL);
+ LASSERT (atomic_read(&peer->ibp_refcount) == 0);
+ LASSERT (!kiblnd_peer_active(peer));
+ LASSERT (peer->ibp_connecting == 0);
+ LASSERT (peer->ibp_accepting == 0);
+ LASSERT (list_empty(&peer->ibp_conns));
+ LASSERT (list_empty(&peer->ibp_tx_queue));
+
+ LIBCFS_FREE(peer, sizeof(*peer));
+
+ /* NB a peer's connections keep a reference on their peer until
+ * they are destroyed, so we can be assured that _all_ state to do
+ * with this peer has been cleaned up when its refcount drops to
+ * zero. */
+ atomic_dec(&net->ibn_npeers);
+}
+
+kib_peer_t *
+kiblnd_find_peer_locked (lnet_nid_t nid)
+{
+ /* the caller is responsible for accounting the additional reference
+ * that this creates */
+ struct list_head *peer_list = kiblnd_nid2peerlist(nid);
+ struct list_head *tmp;
+ kib_peer_t *peer;
+
+ list_for_each (tmp, peer_list) {
+
+ peer = list_entry(tmp, kib_peer_t, ibp_list);
+
+ LASSERT (peer->ibp_connecting > 0 || /* creating conns */
+ peer->ibp_accepting > 0 ||
+ !list_empty(&peer->ibp_conns)); /* active conn */
+
+ if (peer->ibp_nid != nid)
+ continue;
+
+ CDEBUG(D_NET, "got peer [%p] -> %s (%d) version: %x\n",
+ peer, libcfs_nid2str(nid),
+ atomic_read(&peer->ibp_refcount),
+ peer->ibp_version);
+ return peer;
+ }
+ return NULL;
+}
+
+void
+kiblnd_unlink_peer_locked (kib_peer_t *peer)
+{
+ LASSERT (list_empty(&peer->ibp_conns));
+
+ LASSERT (kiblnd_peer_active(peer));
+ list_del_init(&peer->ibp_list);
+ /* lose peerlist's ref */
+ kiblnd_peer_decref(peer);
+}
+
+int
+kiblnd_get_peer_info (lnet_ni_t *ni, int index,
+ lnet_nid_t *nidp, int *count)
+{
+ kib_peer_t *peer;
+ struct list_head *ptmp;
+ int i;
+ unsigned long flags;
+
+ read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
+
+ list_for_each (ptmp, &kiblnd_data.kib_peers[i]) {
+
+ peer = list_entry(ptmp, kib_peer_t, ibp_list);
+ LASSERT (peer->ibp_connecting > 0 ||
+ peer->ibp_accepting > 0 ||
+ !list_empty(&peer->ibp_conns));
+
+ if (peer->ibp_ni != ni)
+ continue;
+
+ if (index-- > 0)
+ continue;
+
+ *nidp = peer->ibp_nid;
+ *count = atomic_read(&peer->ibp_refcount);
+
+ read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
+ flags);
+ return 0;
+ }
+ }
+
+ read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+ return -ENOENT;
+}
+
+void
+kiblnd_del_peer_locked (kib_peer_t *peer)
+{
+ struct list_head *ctmp;
+ struct list_head *cnxt;
+ kib_conn_t *conn;
+
+ if (list_empty(&peer->ibp_conns)) {
+ kiblnd_unlink_peer_locked(peer);
+ } else {
+ list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
+ conn = list_entry(ctmp, kib_conn_t, ibc_list);
+
+ kiblnd_close_conn_locked(conn, 0);
+ }
+ /* NB closing peer's last conn unlinked it. */
+ }
+ /* NB peer now unlinked; might even be freed if the peer table had the
+ * last ref on it. */
+}
+
+int
+kiblnd_del_peer (lnet_ni_t *ni, lnet_nid_t nid)
+{
+ LIST_HEAD (zombies);
+ struct list_head *ptmp;
+ struct list_head *pnxt;
+ kib_peer_t *peer;
+ int lo;
+ int hi;
+ int i;
+ unsigned long flags;
+ int rc = -ENOENT;
+
+ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ if (nid != LNET_NID_ANY) {
+ lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
+ } else {
+ lo = 0;
+ hi = kiblnd_data.kib_peer_hash_size - 1;
+ }
+
+ for (i = lo; i <= hi; i++) {
+ list_for_each_safe (ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
+ peer = list_entry(ptmp, kib_peer_t, ibp_list);
+ LASSERT (peer->ibp_connecting > 0 ||
+ peer->ibp_accepting > 0 ||
+ !list_empty(&peer->ibp_conns));
+
+ if (peer->ibp_ni != ni)
+ continue;
+
+ if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
+ continue;
+
+ if (!list_empty(&peer->ibp_tx_queue)) {
+ LASSERT (list_empty(&peer->ibp_conns));
+
+ list_splice_init(&peer->ibp_tx_queue,
+ &zombies);
+ }
+
+ kiblnd_del_peer_locked(peer);
+ rc = 0; /* matched something */
+ }
+ }
+
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ kiblnd_txlist_done(ni, &zombies, -EIO);
+
+ return rc;
+}
+
+kib_conn_t *
+kiblnd_get_conn_by_idx (lnet_ni_t *ni, int index)
+{
+ kib_peer_t *peer;
+ struct list_head *ptmp;
+ kib_conn_t *conn;
+ struct list_head *ctmp;
+ int i;
+ unsigned long flags;
+
+ read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
+ list_for_each (ptmp, &kiblnd_data.kib_peers[i]) {
+
+ peer = list_entry(ptmp, kib_peer_t, ibp_list);
+ LASSERT (peer->ibp_connecting > 0 ||
+ peer->ibp_accepting > 0 ||
+ !list_empty(&peer->ibp_conns));
+
+ if (peer->ibp_ni != ni)
+ continue;
+
+ list_for_each (ctmp, &peer->ibp_conns) {
+ if (index-- > 0)
+ continue;
+
+ conn = list_entry(ctmp, kib_conn_t,
+ ibc_list);
+ kiblnd_conn_addref(conn);
+ read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
+ flags);
+ return conn;
+ }
+ }
+ }
+
+ read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+ return NULL;
+}
+
+void
+kiblnd_debug_rx (kib_rx_t *rx)
+{
+ CDEBUG(D_CONSOLE, " %p status %d msg_type %x cred %d\n",
+ rx, rx->rx_status, rx->rx_msg->ibm_type,
+ rx->rx_msg->ibm_credits);
+}
+
+void
+kiblnd_debug_tx (kib_tx_t *tx)
+{
+ CDEBUG(D_CONSOLE, " %p snd %d q %d w %d rc %d dl %lx "
+ "cookie "LPX64" msg %s%s type %x cred %d\n",
+ tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting,
+ tx->tx_status, tx->tx_deadline, tx->tx_cookie,
+ tx->tx_lntmsg[0] == NULL ? "-" : "!",
+ tx->tx_lntmsg[1] == NULL ? "-" : "!",
+ tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits);
+}
+
+void
+kiblnd_debug_conn (kib_conn_t *conn)
+{
+ struct list_head *tmp;
+ int i;
+
+ spin_lock(&conn->ibc_lock);
+
+ CDEBUG(D_CONSOLE, "conn[%d] %p [version %x] -> %s: \n",
+ atomic_read(&conn->ibc_refcount), conn,
+ conn->ibc_version, libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ CDEBUG(D_CONSOLE, " state %d nposted %d/%d cred %d o_cred %d r_cred %d\n",
+ conn->ibc_state, conn->ibc_noops_posted,
+ conn->ibc_nsends_posted, conn->ibc_credits,
+ conn->ibc_outstanding_credits, conn->ibc_reserved_credits);
+ CDEBUG(D_CONSOLE, " comms_err %d\n", conn->ibc_comms_error);
+
+ CDEBUG(D_CONSOLE, " early_rxs:\n");
+ list_for_each(tmp, &conn->ibc_early_rxs)
+ kiblnd_debug_rx(list_entry(tmp, kib_rx_t, rx_list));
+
+ CDEBUG(D_CONSOLE, " tx_noops:\n");
+ list_for_each(tmp, &conn->ibc_tx_noops)
+ kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
+
+ CDEBUG(D_CONSOLE, " tx_queue_nocred:\n");
+ list_for_each(tmp, &conn->ibc_tx_queue_nocred)
+ kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
+
+ CDEBUG(D_CONSOLE, " tx_queue_rsrvd:\n");
+ list_for_each(tmp, &conn->ibc_tx_queue_rsrvd)
+ kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
+
+ CDEBUG(D_CONSOLE, " tx_queue:\n");
+ list_for_each(tmp, &conn->ibc_tx_queue)
+ kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
+
+ CDEBUG(D_CONSOLE, " active_txs:\n");
+ list_for_each(tmp, &conn->ibc_active_txs)
+ kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
+
+ CDEBUG(D_CONSOLE, " rxs:\n");
+ for (i = 0; i < IBLND_RX_MSGS(conn->ibc_version); i++)
+ kiblnd_debug_rx(&conn->ibc_rxs[i]);
+
+ spin_unlock(&conn->ibc_lock);
+}
+
+int
+kiblnd_translate_mtu(int value)
+{
+ switch (value) {
+ default:
+ return -1;
+ case 0:
+ return 0;
+ case 256:
+ return IB_MTU_256;
+ case 512:
+ return IB_MTU_512;
+ case 1024:
+ return IB_MTU_1024;
+ case 2048:
+ return IB_MTU_2048;
+ case 4096:
+ return IB_MTU_4096;
+ }
+}
+
+static void
+kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid)
+{
+ int mtu;
+
+ /* XXX There is no path record for iWARP, set by netdev->change_mtu? */
+ if (cmid->route.path_rec == NULL)
+ return;
+
+ mtu = kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu);
+ LASSERT (mtu >= 0);
+ if (mtu != 0)
+ cmid->route.path_rec->mtu = mtu;
+}
+
+static int
+kiblnd_get_completion_vector(kib_conn_t *conn, int cpt)
+{
+ cpumask_t *mask;
+ int vectors;
+ int off;
+ int i;
+ lnet_nid_t nid = conn->ibc_peer->ibp_nid;
+
+ vectors = conn->ibc_cmid->device->num_comp_vectors;
+ if (vectors <= 1)
+ return 0;
+
+ mask = cfs_cpt_cpumask(lnet_cpt_table(), cpt);
+
+ /* hash NID to CPU id in this partition... */
+ off = do_div(nid, cpus_weight(*mask));
+ for_each_cpu_mask(i, *mask) {
+ if (off-- == 0)
+ return i % vectors;
+ }
+
+ LBUG();
+ return 1;
+}
+
+kib_conn_t *
+kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
+ int state, int version)
+{
+ /* CAVEAT EMPTOR:
+ * If the new conn is created successfully it takes over the caller's
+ * ref on 'peer'. It also "owns" 'cmid' and destroys it when it itself
+ * is destroyed. On failure, the caller's ref on 'peer' remains and
+ * she must dispose of 'cmid'. (Actually I'd block forever if I tried
+ * to destroy 'cmid' here since I'm called from the CM which still has
+ * its ref on 'cmid'). */
+ rwlock_t *glock = &kiblnd_data.kib_global_lock;
+ kib_net_t *net = peer->ibp_ni->ni_data;
+ kib_dev_t *dev;
+ struct ib_qp_init_attr *init_qp_attr;
+ struct kib_sched_info *sched;
+ kib_conn_t *conn;
+ struct ib_cq *cq;
+ unsigned long flags;
+ int cpt;
+ int rc;
+ int i;
+
+ LASSERT(net != NULL);
+ LASSERT(!in_interrupt());
+
+ dev = net->ibn_dev;
+
+ cpt = lnet_cpt_of_nid(peer->ibp_nid);
+ sched = kiblnd_data.kib_scheds[cpt];
+
+ LASSERT(sched->ibs_nthreads > 0);
+
+ LIBCFS_CPT_ALLOC(init_qp_attr, lnet_cpt_table(), cpt,
+ sizeof(*init_qp_attr));
+ if (init_qp_attr == NULL) {
+ CERROR("Can't allocate qp_attr for %s\n",
+ libcfs_nid2str(peer->ibp_nid));
+ goto failed_0;
+ }
+
+ LIBCFS_CPT_ALLOC(conn, lnet_cpt_table(), cpt, sizeof(*conn));
+ if (conn == NULL) {
+ CERROR("Can't allocate connection for %s\n",
+ libcfs_nid2str(peer->ibp_nid));
+ goto failed_1;
+ }
+
+ conn->ibc_state = IBLND_CONN_INIT;
+ conn->ibc_version = version;
+ conn->ibc_peer = peer; /* I take the caller's ref */
+ cmid->context = conn; /* for future CM callbacks */
+ conn->ibc_cmid = cmid;
+
+ INIT_LIST_HEAD(&conn->ibc_early_rxs);
+ INIT_LIST_HEAD(&conn->ibc_tx_noops);
+ INIT_LIST_HEAD(&conn->ibc_tx_queue);
+ INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd);
+ INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred);
+ INIT_LIST_HEAD(&conn->ibc_active_txs);
+ spin_lock_init(&conn->ibc_lock);
+
+ LIBCFS_CPT_ALLOC(conn->ibc_connvars, lnet_cpt_table(), cpt,
+ sizeof(*conn->ibc_connvars));
+ if (conn->ibc_connvars == NULL) {
+ CERROR("Can't allocate in-progress connection state\n");
+ goto failed_2;
+ }
+
+ write_lock_irqsave(glock, flags);
+ if (dev->ibd_failover) {
+ write_unlock_irqrestore(glock, flags);
+ CERROR("%s: failover in progress\n", dev->ibd_ifname);
+ goto failed_2;
+ }
+
+ if (dev->ibd_hdev->ibh_ibdev != cmid->device) {
+ /* wakeup failover thread and teardown connection */
+ if (kiblnd_dev_can_failover(dev)) {
+ list_add_tail(&dev->ibd_fail_list,
+ &kiblnd_data.kib_failed_devs);
+ wake_up(&kiblnd_data.kib_failover_waitq);
+ }
+
+ write_unlock_irqrestore(glock, flags);
+ CERROR("cmid HCA(%s), kib_dev(%s) need failover\n",
+ cmid->device->name, dev->ibd_ifname);
+ goto failed_2;
+ }
+
+ kiblnd_hdev_addref_locked(dev->ibd_hdev);
+ conn->ibc_hdev = dev->ibd_hdev;
+
+ kiblnd_setup_mtu_locked(cmid);
+
+ write_unlock_irqrestore(glock, flags);
+
+ LIBCFS_CPT_ALLOC(conn->ibc_rxs, lnet_cpt_table(), cpt,
+ IBLND_RX_MSGS(version) * sizeof(kib_rx_t));
+ if (conn->ibc_rxs == NULL) {
+ CERROR("Cannot allocate RX buffers\n");
+ goto failed_2;
+ }
+
+ rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt,
+ IBLND_RX_MSG_PAGES(version));
+ if (rc != 0)
+ goto failed_2;
+
+ kiblnd_map_rx_descs(conn);
+
+ cq = ib_create_cq(cmid->device,
+ kiblnd_cq_completion, kiblnd_cq_event, conn,
+ IBLND_CQ_ENTRIES(version),
+ kiblnd_get_completion_vector(conn, cpt));
+ if (IS_ERR(cq)) {
+ CERROR("Can't create CQ: %ld, cqe: %d\n",
+ PTR_ERR(cq), IBLND_CQ_ENTRIES(version));
+ goto failed_2;
+ }
+
+ conn->ibc_cq = cq;
+
+ rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ if (rc != 0) {
+ CERROR("Can't request completion notificiation: %d\n", rc);
+ goto failed_2;
+ }
+
+ init_qp_attr->event_handler = kiblnd_qp_event;
+ init_qp_attr->qp_context = conn;
+ init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(version);
+ init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(version);
+ init_qp_attr->cap.max_send_sge = 1;
+ init_qp_attr->cap.max_recv_sge = 1;
+ init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+ init_qp_attr->qp_type = IB_QPT_RC;
+ init_qp_attr->send_cq = cq;
+ init_qp_attr->recv_cq = cq;
+
+ conn->ibc_sched = sched;
+
+ rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, init_qp_attr);
+ if (rc != 0) {
+ CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d\n",
+ rc, init_qp_attr->cap.max_send_wr,
+ init_qp_attr->cap.max_recv_wr);
+ goto failed_2;
+ }
+
+ LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
+
+ /* 1 ref for caller and each rxmsg */
+ atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(version));
+ conn->ibc_nrx = IBLND_RX_MSGS(version);
+
+ /* post receives */
+ for (i = 0; i < IBLND_RX_MSGS(version); i++) {
+ rc = kiblnd_post_rx(&conn->ibc_rxs[i],
+ IBLND_POSTRX_NO_CREDIT);
+ if (rc != 0) {
+ CERROR("Can't post rxmsg: %d\n", rc);
+
+ /* Make posted receives complete */
+ kiblnd_abort_receives(conn);
+
+ /* correct # of posted buffers
+ * NB locking needed now I'm racing with completion */
+ spin_lock_irqsave(&sched->ibs_lock, flags);
+ conn->ibc_nrx -= IBLND_RX_MSGS(version) - i;
+ spin_unlock_irqrestore(&sched->ibs_lock, flags);
+
+ /* cmid will be destroyed by CM(ofed) after cm_callback
+ * returned, so we can't refer it anymore
+ * (by kiblnd_connd()->kiblnd_destroy_conn) */
+ rdma_destroy_qp(conn->ibc_cmid);
+ conn->ibc_cmid = NULL;
+
+ /* Drop my own and unused rxbuffer refcounts */
+ while (i++ <= IBLND_RX_MSGS(version))
+ kiblnd_conn_decref(conn);
+
+ return NULL;
+ }
+ }
+
+ /* Init successful! */
+ LASSERT (state == IBLND_CONN_ACTIVE_CONNECT ||
+ state == IBLND_CONN_PASSIVE_WAIT);
+ conn->ibc_state = state;
+
+ /* 1 more conn */
+ atomic_inc(&net->ibn_nconns);
+ return conn;
+
+ failed_2:
+ kiblnd_destroy_conn(conn);
+ failed_1:
+ LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
+ failed_0:
+ return NULL;
+}
+
+void
+kiblnd_destroy_conn (kib_conn_t *conn)
+{
+ struct rdma_cm_id *cmid = conn->ibc_cmid;
+ kib_peer_t *peer = conn->ibc_peer;
+ int rc;
+
+ LASSERT (!in_interrupt());
+ LASSERT (atomic_read(&conn->ibc_refcount) == 0);
+ LASSERT (list_empty(&conn->ibc_early_rxs));
+ LASSERT (list_empty(&conn->ibc_tx_noops));
+ LASSERT (list_empty(&conn->ibc_tx_queue));
+ LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd));
+ LASSERT (list_empty(&conn->ibc_tx_queue_nocred));
+ LASSERT (list_empty(&conn->ibc_active_txs));
+ LASSERT (conn->ibc_noops_posted == 0);
+ LASSERT (conn->ibc_nsends_posted == 0);
+
+ switch (conn->ibc_state) {
+ default:
+ /* conn must be completely disengaged from the network */
+ LBUG();
+
+ case IBLND_CONN_DISCONNECTED:
+ /* connvars should have been freed already */
+ LASSERT (conn->ibc_connvars == NULL);
+ break;
+
+ case IBLND_CONN_INIT:
+ break;
+ }
+
+ /* conn->ibc_cmid might be destroyed by CM already */
+ if (cmid != NULL && cmid->qp != NULL)
+ rdma_destroy_qp(cmid);
+
+ if (conn->ibc_cq != NULL) {
+ rc = ib_destroy_cq(conn->ibc_cq);
+ if (rc != 0)
+ CWARN("Error destroying CQ: %d\n", rc);
+ }
+
+ if (conn->ibc_rx_pages != NULL)
+ kiblnd_unmap_rx_descs(conn);
+
+ if (conn->ibc_rxs != NULL) {
+ LIBCFS_FREE(conn->ibc_rxs,
+ IBLND_RX_MSGS(conn->ibc_version) * sizeof(kib_rx_t));
+ }
+
+ if (conn->ibc_connvars != NULL)
+ LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
+
+ if (conn->ibc_hdev != NULL)
+ kiblnd_hdev_decref(conn->ibc_hdev);
+
+ /* See CAVEAT EMPTOR above in kiblnd_create_conn */
+ if (conn->ibc_state != IBLND_CONN_INIT) {
+ kib_net_t *net = peer->ibp_ni->ni_data;
+
+ kiblnd_peer_decref(peer);
+ rdma_destroy_id(cmid);
+ atomic_dec(&net->ibn_nconns);
+ }
+
+ LIBCFS_FREE(conn, sizeof(*conn));
+}
+
+int
+kiblnd_close_peer_conns_locked (kib_peer_t *peer, int why)
+{
+ kib_conn_t *conn;
+ struct list_head *ctmp;
+ struct list_head *cnxt;
+ int count = 0;
+
+ list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
+ conn = list_entry(ctmp, kib_conn_t, ibc_list);
+
+ CDEBUG(D_NET, "Closing conn -> %s, "
+ "version: %x, reason: %d\n",
+ libcfs_nid2str(peer->ibp_nid),
+ conn->ibc_version, why);
+
+ kiblnd_close_conn_locked(conn, why);
+ count++;
+ }
+
+ return count;
+}
+
+int
+kiblnd_close_stale_conns_locked (kib_peer_t *peer,
+ int version, __u64 incarnation)
+{
+ kib_conn_t *conn;
+ struct list_head *ctmp;
+ struct list_head *cnxt;
+ int count = 0;
+
+ list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
+ conn = list_entry(ctmp, kib_conn_t, ibc_list);
+
+ if (conn->ibc_version == version &&
+ conn->ibc_incarnation == incarnation)
+ continue;
+
+ CDEBUG(D_NET, "Closing stale conn -> %s version: %x, "
+ "incarnation:"LPX64"(%x, "LPX64")\n",
+ libcfs_nid2str(peer->ibp_nid),
+ conn->ibc_version, conn->ibc_incarnation,
+ version, incarnation);
+
+ kiblnd_close_conn_locked(conn, -ESTALE);
+ count++;
+ }
+
+ return count;
+}
+
+int
+kiblnd_close_matching_conns (lnet_ni_t *ni, lnet_nid_t nid)
+{
+ kib_peer_t *peer;
+ struct list_head *ptmp;
+ struct list_head *pnxt;
+ int lo;
+ int hi;
+ int i;
+ unsigned long flags;
+ int count = 0;
+
+ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ if (nid != LNET_NID_ANY)
+ lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers;
+ else {
+ lo = 0;
+ hi = kiblnd_data.kib_peer_hash_size - 1;
+ }
+
+ for (i = lo; i <= hi; i++) {
+ list_for_each_safe (ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
+
+ peer = list_entry(ptmp, kib_peer_t, ibp_list);
+ LASSERT (peer->ibp_connecting > 0 ||
+ peer->ibp_accepting > 0 ||
+ !list_empty(&peer->ibp_conns));
+
+ if (peer->ibp_ni != ni)
+ continue;
+
+ if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
+ continue;
+
+ count += kiblnd_close_peer_conns_locked(peer, 0);
+ }
+ }
+
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ /* wildcards always succeed */
+ if (nid == LNET_NID_ANY)
+ return 0;
+
+ return (count == 0) ? -ENOENT : 0;
+}
+
+int
+kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
+{
+ struct libcfs_ioctl_data *data = arg;
+ int rc = -EINVAL;
+
+ switch(cmd) {
+ case IOC_LIBCFS_GET_PEER: {
+ lnet_nid_t nid = 0;
+ int count = 0;
+
+ rc = kiblnd_get_peer_info(ni, data->ioc_count,
+ &nid, &count);
+ data->ioc_nid = nid;
+ data->ioc_count = count;
+ break;
+ }
+
+ case IOC_LIBCFS_DEL_PEER: {
+ rc = kiblnd_del_peer(ni, data->ioc_nid);
+ break;
+ }
+ case IOC_LIBCFS_GET_CONN: {
+ kib_conn_t *conn;
+
+ rc = 0;
+ conn = kiblnd_get_conn_by_idx(ni, data->ioc_count);
+ if (conn == NULL) {
+ rc = -ENOENT;
+ break;
+ }
+
+ LASSERT (conn->ibc_cmid != NULL);
+ data->ioc_nid = conn->ibc_peer->ibp_nid;
+ if (conn->ibc_cmid->route.path_rec == NULL)
+ data->ioc_u32[0] = 0; /* iWarp has no path MTU */
+ else
+ data->ioc_u32[0] =
+ ib_mtu_enum_to_int(conn->ibc_cmid->route.path_rec->mtu);
+ kiblnd_conn_decref(conn);
+ break;
+ }
+ case IOC_LIBCFS_CLOSE_CONNECTION: {
+ rc = kiblnd_close_matching_conns(ni, data->ioc_nid);
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return rc;
+}
+
+void
+kiblnd_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when)
+{
+ cfs_time_t last_alive = 0;
+ cfs_time_t now = cfs_time_current();
+ rwlock_t *glock = &kiblnd_data.kib_global_lock;
+ kib_peer_t *peer;
+ unsigned long flags;
+
+ read_lock_irqsave(glock, flags);
+
+ peer = kiblnd_find_peer_locked(nid);
+ if (peer != NULL) {
+ LASSERT (peer->ibp_connecting > 0 || /* creating conns */
+ peer->ibp_accepting > 0 ||
+ !list_empty(&peer->ibp_conns)); /* active conn */
+ last_alive = peer->ibp_last_alive;
+ }
+
+ read_unlock_irqrestore(glock, flags);
+
+ if (last_alive != 0)
+ *when = last_alive;
+
+ /* peer is not persistent in hash, trigger peer creation
+ * and connection establishment with a NULL tx */
+ if (peer == NULL)
+ kiblnd_launch_tx(ni, NULL, nid);
+
+ CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago\n",
+ libcfs_nid2str(nid), peer,
+ last_alive ? cfs_duration_sec(now - last_alive) : -1);
+ return;
+}
+
+void
+kiblnd_free_pages(kib_pages_t *p)
+{
+ int npages = p->ibp_npages;
+ int i;
+
+ for (i = 0; i < npages; i++) {
+ if (p->ibp_pages[i] != NULL)
+ __free_page(p->ibp_pages[i]);
+ }
+
+ LIBCFS_FREE(p, offsetof(kib_pages_t, ibp_pages[npages]));
+}
+
+int
+kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages)
+{
+ kib_pages_t *p;
+ int i;
+
+ LIBCFS_CPT_ALLOC(p, lnet_cpt_table(), cpt,
+ offsetof(kib_pages_t, ibp_pages[npages]));
+ if (p == NULL) {
+ CERROR("Can't allocate descriptor for %d pages\n", npages);
+ return -ENOMEM;
+ }
+
+ memset(p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
+ p->ibp_npages = npages;
+
+ for (i = 0; i < npages; i++) {
+ p->ibp_pages[i] = alloc_pages_node(
+ cfs_cpt_spread_node(lnet_cpt_table(), cpt),
+ __GFP_IO, 0);
+ if (p->ibp_pages[i] == NULL) {
+ CERROR("Can't allocate page %d of %d\n", i, npages);
+ kiblnd_free_pages(p);
+ return -ENOMEM;
+ }
+ }
+
+ *pp = p;
+ return 0;
+}
+
+void
+kiblnd_unmap_rx_descs(kib_conn_t *conn)
+{
+ kib_rx_t *rx;
+ int i;
+
+ LASSERT (conn->ibc_rxs != NULL);
+ LASSERT (conn->ibc_hdev != NULL);
+
+ for (i = 0; i < IBLND_RX_MSGS(conn->ibc_version); i++) {
+ rx = &conn->ibc_rxs[i];
+
+ LASSERT (rx->rx_nob >= 0); /* not posted */
+
+ kiblnd_dma_unmap_single(conn->ibc_hdev->ibh_ibdev,
+ KIBLND_UNMAP_ADDR(rx, rx_msgunmap,
+ rx->rx_msgaddr),
+ IBLND_MSG_SIZE, DMA_FROM_DEVICE);
+ }
+
+ kiblnd_free_pages(conn->ibc_rx_pages);
+
+ conn->ibc_rx_pages = NULL;
+}
+
+void
+kiblnd_map_rx_descs(kib_conn_t *conn)
+{
+ kib_rx_t *rx;
+ struct page *pg;
+ int pg_off;
+ int ipg;
+ int i;
+
+ for (pg_off = ipg = i = 0;
+ i < IBLND_RX_MSGS(conn->ibc_version); i++) {
+ pg = conn->ibc_rx_pages->ibp_pages[ipg];
+ rx = &conn->ibc_rxs[i];
+
+ rx->rx_conn = conn;
+ rx->rx_msg = (kib_msg_t *)(((char *)page_address(pg)) + pg_off);
+
+ rx->rx_msgaddr = kiblnd_dma_map_single(conn->ibc_hdev->ibh_ibdev,
+ rx->rx_msg, IBLND_MSG_SIZE,
+ DMA_FROM_DEVICE);
+ LASSERT (!kiblnd_dma_mapping_error(conn->ibc_hdev->ibh_ibdev,
+ rx->rx_msgaddr));
+ KIBLND_UNMAP_ADDR_SET(rx, rx_msgunmap, rx->rx_msgaddr);
+
+ CDEBUG(D_NET,"rx %d: %p "LPX64"("LPX64")\n",
+ i, rx->rx_msg, rx->rx_msgaddr,
+ lnet_page2phys(pg) + pg_off);
+
+ pg_off += IBLND_MSG_SIZE;
+ LASSERT (pg_off <= PAGE_SIZE);
+
+ if (pg_off == PAGE_SIZE) {
+ pg_off = 0;
+ ipg++;
+ LASSERT (ipg <= IBLND_RX_MSG_PAGES(conn->ibc_version));
+ }
+ }
+}
+
+static void
+kiblnd_unmap_tx_pool(kib_tx_pool_t *tpo)
+{
+ kib_hca_dev_t *hdev = tpo->tpo_hdev;
+ kib_tx_t *tx;
+ int i;
+
+ LASSERT (tpo->tpo_pool.po_allocated == 0);
+
+ if (hdev == NULL)
+ return;
+
+ for (i = 0; i < tpo->tpo_pool.po_size; i++) {
+ tx = &tpo->tpo_tx_descs[i];
+ kiblnd_dma_unmap_single(hdev->ibh_ibdev,
+ KIBLND_UNMAP_ADDR(tx, tx_msgunmap,
+ tx->tx_msgaddr),
+ IBLND_MSG_SIZE, DMA_TO_DEVICE);
+ }
+
+ kiblnd_hdev_decref(hdev);
+ tpo->tpo_hdev = NULL;
+}
+
+static kib_hca_dev_t *
+kiblnd_current_hdev(kib_dev_t *dev)
+{
+ kib_hca_dev_t *hdev;
+ unsigned long flags;
+ int i = 0;
+
+ read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+ while (dev->ibd_failover) {
+ read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+ if (i++ % 50 == 0)
+ CDEBUG(D_NET, "%s: Wait for failover\n",
+ dev->ibd_ifname);
+ schedule_timeout(cfs_time_seconds(1) / 100);
+
+ read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+ }
+
+ kiblnd_hdev_addref_locked(dev->ibd_hdev);
+ hdev = dev->ibd_hdev;
+
+ read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ return hdev;
+}
+
+static void
+kiblnd_map_tx_pool(kib_tx_pool_t *tpo)
+{
+ kib_pages_t *txpgs = tpo->tpo_tx_pages;
+ kib_pool_t *pool = &tpo->tpo_pool;
+ kib_net_t *net = pool->po_owner->ps_net;
+ kib_dev_t *dev;
+ struct page *page;
+ kib_tx_t *tx;
+ int page_offset;
+ int ipage;
+ int i;
+
+ LASSERT (net != NULL);
+
+ dev = net->ibn_dev;
+
+ /* pre-mapped messages are not bigger than 1 page */
+ CLASSERT (IBLND_MSG_SIZE <= PAGE_SIZE);
+
+ /* No fancy arithmetic when we do the buffer calculations */
+ CLASSERT (PAGE_SIZE % IBLND_MSG_SIZE == 0);
+
+ tpo->tpo_hdev = kiblnd_current_hdev(dev);
+
+ for (ipage = page_offset = i = 0; i < pool->po_size; i++) {
+ page = txpgs->ibp_pages[ipage];
+ tx = &tpo->tpo_tx_descs[i];
+
+ tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) +
+ page_offset);
+
+ tx->tx_msgaddr = kiblnd_dma_map_single(
+ tpo->tpo_hdev->ibh_ibdev, tx->tx_msg,
+ IBLND_MSG_SIZE, DMA_TO_DEVICE);
+ LASSERT (!kiblnd_dma_mapping_error(tpo->tpo_hdev->ibh_ibdev,
+ tx->tx_msgaddr));
+ KIBLND_UNMAP_ADDR_SET(tx, tx_msgunmap, tx->tx_msgaddr);
+
+ list_add(&tx->tx_list, &pool->po_free_list);
+
+ page_offset += IBLND_MSG_SIZE;
+ LASSERT (page_offset <= PAGE_SIZE);
+
+ if (page_offset == PAGE_SIZE) {
+ page_offset = 0;
+ ipage++;
+ LASSERT (ipage <= txpgs->ibp_npages);
+ }
+ }
+}
+
+struct ib_mr *
+kiblnd_find_dma_mr(kib_hca_dev_t *hdev, __u64 addr, __u64 size)
+{
+ __u64 index;
+
+ LASSERT (hdev->ibh_mrs[0] != NULL);
+
+ if (hdev->ibh_nmrs == 1)
+ return hdev->ibh_mrs[0];
+
+ index = addr >> hdev->ibh_mr_shift;
+
+ if (index < hdev->ibh_nmrs &&
+ index == ((addr + size - 1) >> hdev->ibh_mr_shift))
+ return hdev->ibh_mrs[index];
+
+ return NULL;
+}
+
+struct ib_mr *
+kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd)
+{
+ struct ib_mr *prev_mr;
+ struct ib_mr *mr;
+ int i;
+
+ LASSERT (hdev->ibh_mrs[0] != NULL);
+
+ if (*kiblnd_tunables.kib_map_on_demand > 0 &&
+ *kiblnd_tunables.kib_map_on_demand <= rd->rd_nfrags)
+ return NULL;
+
+ if (hdev->ibh_nmrs == 1)
+ return hdev->ibh_mrs[0];
+
+ for (i = 0, mr = prev_mr = NULL;
+ i < rd->rd_nfrags; i++) {
+ mr = kiblnd_find_dma_mr(hdev,
+ rd->rd_frags[i].rf_addr,
+ rd->rd_frags[i].rf_nob);
+ if (prev_mr == NULL)
+ prev_mr = mr;
+
+ if (mr == NULL || prev_mr != mr) {
+ /* Can't covered by one single MR */
+ mr = NULL;
+ break;
+ }
+ }
+
+ return mr;
+}
+
+void
+kiblnd_destroy_fmr_pool(kib_fmr_pool_t *pool)
+{
+ LASSERT (pool->fpo_map_count == 0);
+
+ if (pool->fpo_fmr_pool != NULL)
+ ib_destroy_fmr_pool(pool->fpo_fmr_pool);
+
+ if (pool->fpo_hdev != NULL)
+ kiblnd_hdev_decref(pool->fpo_hdev);
+
+ LIBCFS_FREE(pool, sizeof(kib_fmr_pool_t));
+}
+
+void
+kiblnd_destroy_fmr_pool_list(struct list_head *head)
+{
+ kib_fmr_pool_t *pool;
+
+ while (!list_empty(head)) {
+ pool = list_entry(head->next, kib_fmr_pool_t, fpo_list);
+ list_del(&pool->fpo_list);
+ kiblnd_destroy_fmr_pool(pool);
+ }
+}
+
+static int kiblnd_fmr_pool_size(int ncpts)
+{
+ int size = *kiblnd_tunables.kib_fmr_pool_size / ncpts;
+
+ return max(IBLND_FMR_POOL, size);
+}
+
+static int kiblnd_fmr_flush_trigger(int ncpts)
+{
+ int size = *kiblnd_tunables.kib_fmr_flush_trigger / ncpts;
+
+ return max(IBLND_FMR_POOL_FLUSH, size);
+}
+
+int
+kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t **pp_fpo)
+{
+ /* FMR pool for RDMA */
+ kib_dev_t *dev = fps->fps_net->ibn_dev;
+ kib_fmr_pool_t *fpo;
+ struct ib_fmr_pool_param param = {
+ .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE,
+ .page_shift = PAGE_SHIFT,
+ .access = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE),
+ .pool_size = fps->fps_pool_size,
+ .dirty_watermark = fps->fps_flush_trigger,
+ .flush_function = NULL,
+ .flush_arg = NULL,
+ .cache = !!*kiblnd_tunables.kib_fmr_cache};
+ int rc;
+
+ LIBCFS_CPT_ALLOC(fpo, lnet_cpt_table(), fps->fps_cpt, sizeof(*fpo));
+ if (fpo == NULL)
+ return -ENOMEM;
+
+ fpo->fpo_hdev = kiblnd_current_hdev(dev);
+
+ fpo->fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd, &param);
+ if (IS_ERR(fpo->fpo_fmr_pool)) {
+ rc = PTR_ERR(fpo->fpo_fmr_pool);
+ CERROR("Failed to create FMR pool: %d\n", rc);
+
+ kiblnd_hdev_decref(fpo->fpo_hdev);
+ LIBCFS_FREE(fpo, sizeof(kib_fmr_pool_t));
+ return rc;
+ }
+
+ fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
+ fpo->fpo_owner = fps;
+ *pp_fpo = fpo;
+
+ return 0;
+}
+
+static void
+kiblnd_fail_fmr_poolset(kib_fmr_poolset_t *fps, struct list_head *zombies)
+{
+ if (fps->fps_net == NULL) /* intialized? */
+ return;
+
+ spin_lock(&fps->fps_lock);
+
+ while (!list_empty(&fps->fps_pool_list)) {
+ kib_fmr_pool_t *fpo = list_entry(fps->fps_pool_list.next,
+ kib_fmr_pool_t, fpo_list);
+ fpo->fpo_failed = 1;
+ list_del(&fpo->fpo_list);
+ if (fpo->fpo_map_count == 0)
+ list_add(&fpo->fpo_list, zombies);
+ else
+ list_add(&fpo->fpo_list, &fps->fps_failed_pool_list);
+ }
+
+ spin_unlock(&fps->fps_lock);
+}
+
+static void
+kiblnd_fini_fmr_poolset(kib_fmr_poolset_t *fps)
+{
+ if (fps->fps_net != NULL) { /* initialized? */
+ kiblnd_destroy_fmr_pool_list(&fps->fps_failed_pool_list);
+ kiblnd_destroy_fmr_pool_list(&fps->fps_pool_list);
+ }
+}
+
+static int
+kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, kib_net_t *net,
+ int pool_size, int flush_trigger)
+{
+ kib_fmr_pool_t *fpo;
+ int rc;
+
+ memset(fps, 0, sizeof(kib_fmr_poolset_t));
+
+ fps->fps_net = net;
+ fps->fps_cpt = cpt;
+ fps->fps_pool_size = pool_size;
+ fps->fps_flush_trigger = flush_trigger;
+ spin_lock_init(&fps->fps_lock);
+ INIT_LIST_HEAD(&fps->fps_pool_list);
+ INIT_LIST_HEAD(&fps->fps_failed_pool_list);
+
+ rc = kiblnd_create_fmr_pool(fps, &fpo);
+ if (rc == 0)
+ list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
+
+ return rc;
+}
+
+static int
+kiblnd_fmr_pool_is_idle(kib_fmr_pool_t *fpo, cfs_time_t now)
+{
+ if (fpo->fpo_map_count != 0) /* still in use */
+ return 0;
+ if (fpo->fpo_failed)
+ return 1;
+ return cfs_time_aftereq(now, fpo->fpo_deadline);
+}
+
+void
+kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status)
+{
+ LIST_HEAD (zombies);
+ kib_fmr_pool_t *fpo = fmr->fmr_pool;
+ kib_fmr_poolset_t *fps = fpo->fpo_owner;
+ cfs_time_t now = cfs_time_current();
+ kib_fmr_pool_t *tmp;
+ int rc;
+
+ rc = ib_fmr_pool_unmap(fmr->fmr_pfmr);
+ LASSERT (rc == 0);
+
+ if (status != 0) {
+ rc = ib_flush_fmr_pool(fpo->fpo_fmr_pool);
+ LASSERT (rc == 0);
+ }
+
+ fmr->fmr_pool = NULL;
+ fmr->fmr_pfmr = NULL;
+
+ spin_lock(&fps->fps_lock);
+ fpo->fpo_map_count --; /* decref the pool */
+
+ list_for_each_entry_safe(fpo, tmp, &fps->fps_pool_list, fpo_list) {
+ /* the first pool is persistent */
+ if (fps->fps_pool_list.next == &fpo->fpo_list)
+ continue;
+
+ if (kiblnd_fmr_pool_is_idle(fpo, now)) {
+ list_move(&fpo->fpo_list, &zombies);
+ fps->fps_version ++;
+ }
+ }
+ spin_unlock(&fps->fps_lock);
+
+ if (!list_empty(&zombies))
+ kiblnd_destroy_fmr_pool_list(&zombies);
+}
+
+int
+kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages,
+ __u64 iov, kib_fmr_t *fmr)
+{
+ struct ib_pool_fmr *pfmr;
+ kib_fmr_pool_t *fpo;
+ __u64 version;
+ int rc;
+
+ again:
+ spin_lock(&fps->fps_lock);
+ version = fps->fps_version;
+ list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) {
+ fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
+ fpo->fpo_map_count++;
+ spin_unlock(&fps->fps_lock);
+
+ pfmr = ib_fmr_pool_map_phys(fpo->fpo_fmr_pool,
+ pages, npages, iov);
+ if (likely(!IS_ERR(pfmr))) {
+ fmr->fmr_pool = fpo;
+ fmr->fmr_pfmr = pfmr;
+ return 0;
+ }
+
+ spin_lock(&fps->fps_lock);
+ fpo->fpo_map_count--;
+ if (PTR_ERR(pfmr) != -EAGAIN) {
+ spin_unlock(&fps->fps_lock);
+ return PTR_ERR(pfmr);
+ }
+
+ /* EAGAIN and ... */
+ if (version != fps->fps_version) {
+ spin_unlock(&fps->fps_lock);
+ goto again;
+ }
+ }
+
+ if (fps->fps_increasing) {
+ spin_unlock(&fps->fps_lock);
+ CDEBUG(D_NET, "Another thread is allocating new "
+ "FMR pool, waiting for her to complete\n");
+ schedule();
+ goto again;
+
+ }
+
+ if (cfs_time_before(cfs_time_current(), fps->fps_next_retry)) {
+ /* someone failed recently */
+ spin_unlock(&fps->fps_lock);
+ return -EAGAIN;
+ }
+
+ fps->fps_increasing = 1;
+ spin_unlock(&fps->fps_lock);
+
+ CDEBUG(D_NET, "Allocate new FMR pool\n");
+ rc = kiblnd_create_fmr_pool(fps, &fpo);
+ spin_lock(&fps->fps_lock);
+ fps->fps_increasing = 0;
+ if (rc == 0) {
+ fps->fps_version++;
+ list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
+ } else {
+ fps->fps_next_retry = cfs_time_shift(IBLND_POOL_RETRY);
+ }
+ spin_unlock(&fps->fps_lock);
+
+ goto again;
+}
+
+static void
+kiblnd_fini_pool(kib_pool_t *pool)
+{
+ LASSERT (list_empty(&pool->po_free_list));
+ LASSERT (pool->po_allocated == 0);
+
+ CDEBUG(D_NET, "Finalize %s pool\n", pool->po_owner->ps_name);
+}
+
+static void
+kiblnd_init_pool(kib_poolset_t *ps, kib_pool_t *pool, int size)
+{
+ CDEBUG(D_NET, "Initialize %s pool\n", ps->ps_name);
+
+ memset(pool, 0, sizeof(kib_pool_t));
+ INIT_LIST_HEAD(&pool->po_free_list);
+ pool->po_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
+ pool->po_owner = ps;
+ pool->po_size = size;
+}
+
+void
+kiblnd_destroy_pool_list(struct list_head *head)
+{
+ kib_pool_t *pool;
+
+ while (!list_empty(head)) {
+ pool = list_entry(head->next, kib_pool_t, po_list);
+ list_del(&pool->po_list);
+
+ LASSERT (pool->po_owner != NULL);
+ pool->po_owner->ps_pool_destroy(pool);
+ }
+}
+
+static void
+kiblnd_fail_poolset(kib_poolset_t *ps, struct list_head *zombies)
+{
+ if (ps->ps_net == NULL) /* intialized? */
+ return;
+
+ spin_lock(&ps->ps_lock);
+ while (!list_empty(&ps->ps_pool_list)) {
+ kib_pool_t *po = list_entry(ps->ps_pool_list.next,
+ kib_pool_t, po_list);
+ po->po_failed = 1;
+ list_del(&po->po_list);
+ if (po->po_allocated == 0)
+ list_add(&po->po_list, zombies);
+ else
+ list_add(&po->po_list, &ps->ps_failed_pool_list);
+ }
+ spin_unlock(&ps->ps_lock);
+}
+
+static void
+kiblnd_fini_poolset(kib_poolset_t *ps)
+{
+ if (ps->ps_net != NULL) { /* initialized? */
+ kiblnd_destroy_pool_list(&ps->ps_failed_pool_list);
+ kiblnd_destroy_pool_list(&ps->ps_pool_list);
+ }
+}
+
+static int
+kiblnd_init_poolset(kib_poolset_t *ps, int cpt,
+ kib_net_t *net, char *name, int size,
+ kib_ps_pool_create_t po_create,
+ kib_ps_pool_destroy_t po_destroy,
+ kib_ps_node_init_t nd_init,
+ kib_ps_node_fini_t nd_fini)
+{
+ kib_pool_t *pool;
+ int rc;
+
+ memset(ps, 0, sizeof(kib_poolset_t));
+
+ ps->ps_cpt = cpt;
+ ps->ps_net = net;
+ ps->ps_pool_create = po_create;
+ ps->ps_pool_destroy = po_destroy;
+ ps->ps_node_init = nd_init;
+ ps->ps_node_fini = nd_fini;
+ ps->ps_pool_size = size;
+ if (strlcpy(ps->ps_name, name, sizeof(ps->ps_name))
+ >= sizeof(ps->ps_name))
+ return -E2BIG;
+ spin_lock_init(&ps->ps_lock);
+ INIT_LIST_HEAD(&ps->ps_pool_list);
+ INIT_LIST_HEAD(&ps->ps_failed_pool_list);
+
+ rc = ps->ps_pool_create(ps, size, &pool);
+ if (rc == 0)
+ list_add(&pool->po_list, &ps->ps_pool_list);
+ else
+ CERROR("Failed to create the first pool for %s\n", ps->ps_name);
+
+ return rc;
+}
+
+static int
+kiblnd_pool_is_idle(kib_pool_t *pool, cfs_time_t now)
+{
+ if (pool->po_allocated != 0) /* still in use */
+ return 0;
+ if (pool->po_failed)
+ return 1;
+ return cfs_time_aftereq(now, pool->po_deadline);
+}
+
+void
+kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node)
+{
+ LIST_HEAD (zombies);
+ kib_poolset_t *ps = pool->po_owner;
+ kib_pool_t *tmp;
+ cfs_time_t now = cfs_time_current();
+
+ spin_lock(&ps->ps_lock);
+
+ if (ps->ps_node_fini != NULL)
+ ps->ps_node_fini(pool, node);
+
+ LASSERT (pool->po_allocated > 0);
+ list_add(node, &pool->po_free_list);
+ pool->po_allocated --;
+
+ list_for_each_entry_safe(pool, tmp, &ps->ps_pool_list, po_list) {
+ /* the first pool is persistent */
+ if (ps->ps_pool_list.next == &pool->po_list)
+ continue;
+
+ if (kiblnd_pool_is_idle(pool, now))
+ list_move(&pool->po_list, &zombies);
+ }
+ spin_unlock(&ps->ps_lock);
+
+ if (!list_empty(&zombies))
+ kiblnd_destroy_pool_list(&zombies);
+}
+
+struct list_head *
+kiblnd_pool_alloc_node(kib_poolset_t *ps)
+{
+ struct list_head *node;
+ kib_pool_t *pool;
+ int rc;
+
+ again:
+ spin_lock(&ps->ps_lock);
+ list_for_each_entry(pool, &ps->ps_pool_list, po_list) {
+ if (list_empty(&pool->po_free_list))
+ continue;
+
+ pool->po_allocated ++;
+ pool->po_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
+ node = pool->po_free_list.next;
+ list_del(node);
+
+ if (ps->ps_node_init != NULL) {
+ /* still hold the lock */
+ ps->ps_node_init(pool, node);
+ }
+ spin_unlock(&ps->ps_lock);
+ return node;
+ }
+
+ /* no available tx pool and ... */
+ if (ps->ps_increasing) {
+ /* another thread is allocating a new pool */
+ spin_unlock(&ps->ps_lock);
+ CDEBUG(D_NET, "Another thread is allocating new "
+ "%s pool, waiting for her to complete\n",
+ ps->ps_name);
+ schedule();
+ goto again;
+ }
+
+ if (cfs_time_before(cfs_time_current(), ps->ps_next_retry)) {
+ /* someone failed recently */
+ spin_unlock(&ps->ps_lock);
+ return NULL;
+ }
+
+ ps->ps_increasing = 1;
+ spin_unlock(&ps->ps_lock);
+
+ CDEBUG(D_NET, "%s pool exhausted, allocate new pool\n", ps->ps_name);
+
+ rc = ps->ps_pool_create(ps, ps->ps_pool_size, &pool);
+
+ spin_lock(&ps->ps_lock);
+ ps->ps_increasing = 0;
+ if (rc == 0) {
+ list_add_tail(&pool->po_list, &ps->ps_pool_list);
+ } else {
+ ps->ps_next_retry = cfs_time_shift(IBLND_POOL_RETRY);
+ CERROR("Can't allocate new %s pool because out of memory\n",
+ ps->ps_name);
+ }
+ spin_unlock(&ps->ps_lock);
+
+ goto again;
+}
+
+void
+kiblnd_pmr_pool_unmap(kib_phys_mr_t *pmr)
+{
+ kib_pmr_pool_t *ppo = pmr->pmr_pool;
+ struct ib_mr *mr = pmr->pmr_mr;
+
+ pmr->pmr_mr = NULL;
+ kiblnd_pool_free_node(&ppo->ppo_pool, &pmr->pmr_list);
+ if (mr != NULL)
+ ib_dereg_mr(mr);
+}
+
+int
+kiblnd_pmr_pool_map(kib_pmr_poolset_t *pps, kib_hca_dev_t *hdev,
+ kib_rdma_desc_t *rd, __u64 *iova, kib_phys_mr_t **pp_pmr)
+{
+ kib_phys_mr_t *pmr;
+ struct list_head *node;
+ int rc;
+ int i;
+
+ node = kiblnd_pool_alloc_node(&pps->pps_poolset);
+ if (node == NULL) {
+ CERROR("Failed to allocate PMR descriptor\n");
+ return -ENOMEM;
+ }
+
+ pmr = container_of(node, kib_phys_mr_t, pmr_list);
+ if (pmr->pmr_pool->ppo_hdev != hdev) {
+ kiblnd_pool_free_node(&pmr->pmr_pool->ppo_pool, node);
+ return -EAGAIN;
+ }
+
+ for (i = 0; i < rd->rd_nfrags; i ++) {
+ pmr->pmr_ipb[i].addr = rd->rd_frags[i].rf_addr;
+ pmr->pmr_ipb[i].size = rd->rd_frags[i].rf_nob;
+ }
+
+ pmr->pmr_mr = ib_reg_phys_mr(hdev->ibh_pd,
+ pmr->pmr_ipb, rd->rd_nfrags,
+ IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE,
+ iova);
+ if (!IS_ERR(pmr->pmr_mr)) {
+ pmr->pmr_iova = *iova;
+ *pp_pmr = pmr;
+ return 0;
+ }
+
+ rc = PTR_ERR(pmr->pmr_mr);
+ CERROR("Failed ib_reg_phys_mr: %d\n", rc);
+
+ pmr->pmr_mr = NULL;
+ kiblnd_pool_free_node(&pmr->pmr_pool->ppo_pool, node);
+
+ return rc;
+}
+
+static void
+kiblnd_destroy_pmr_pool(kib_pool_t *pool)
+{
+ kib_pmr_pool_t *ppo = container_of(pool, kib_pmr_pool_t, ppo_pool);
+ kib_phys_mr_t *pmr;
+
+ LASSERT (pool->po_allocated == 0);
+
+ while (!list_empty(&pool->po_free_list)) {
+ pmr = list_entry(pool->po_free_list.next,
+ kib_phys_mr_t, pmr_list);
+
+ LASSERT (pmr->pmr_mr == NULL);
+ list_del(&pmr->pmr_list);
+
+ if (pmr->pmr_ipb != NULL) {
+ LIBCFS_FREE(pmr->pmr_ipb,
+ IBLND_MAX_RDMA_FRAGS *
+ sizeof(struct ib_phys_buf));
+ }
+
+ LIBCFS_FREE(pmr, sizeof(kib_phys_mr_t));
+ }
+
+ kiblnd_fini_pool(pool);
+ if (ppo->ppo_hdev != NULL)
+ kiblnd_hdev_decref(ppo->ppo_hdev);
+
+ LIBCFS_FREE(ppo, sizeof(kib_pmr_pool_t));
+}
+
+static inline int kiblnd_pmr_pool_size(int ncpts)
+{
+ int size = *kiblnd_tunables.kib_pmr_pool_size / ncpts;
+
+ return max(IBLND_PMR_POOL, size);
+}
+
+static int
+kiblnd_create_pmr_pool(kib_poolset_t *ps, int size, kib_pool_t **pp_po)
+{
+ struct kib_pmr_pool *ppo;
+ struct kib_pool *pool;
+ kib_phys_mr_t *pmr;
+ int i;
+
+ LIBCFS_CPT_ALLOC(ppo, lnet_cpt_table(),
+ ps->ps_cpt, sizeof(kib_pmr_pool_t));
+ if (ppo == NULL) {
+ CERROR("Failed to allocate PMR pool\n");
+ return -ENOMEM;
+ }
+
+ pool = &ppo->ppo_pool;
+ kiblnd_init_pool(ps, pool, size);
+
+ for (i = 0; i < size; i++) {
+ LIBCFS_CPT_ALLOC(pmr, lnet_cpt_table(),
+ ps->ps_cpt, sizeof(kib_phys_mr_t));
+ if (pmr == NULL)
+ break;
+
+ pmr->pmr_pool = ppo;
+ LIBCFS_CPT_ALLOC(pmr->pmr_ipb, lnet_cpt_table(), ps->ps_cpt,
+ IBLND_MAX_RDMA_FRAGS * sizeof(*pmr->pmr_ipb));
+ if (pmr->pmr_ipb == NULL)
+ break;
+
+ list_add(&pmr->pmr_list, &pool->po_free_list);
+ }
+
+ if (i < size) {
+ ps->ps_pool_destroy(pool);
+ return -ENOMEM;
+ }
+
+ ppo->ppo_hdev = kiblnd_current_hdev(ps->ps_net->ibn_dev);
+ *pp_po = pool;
+ return 0;
+}
+
+static void
+kiblnd_destroy_tx_pool(kib_pool_t *pool)
+{
+ kib_tx_pool_t *tpo = container_of(pool, kib_tx_pool_t, tpo_pool);
+ int i;
+
+ LASSERT (pool->po_allocated == 0);
+
+ if (tpo->tpo_tx_pages != NULL) {
+ kiblnd_unmap_tx_pool(tpo);
+ kiblnd_free_pages(tpo->tpo_tx_pages);
+ }
+
+ if (tpo->tpo_tx_descs == NULL)
+ goto out;
+
+ for (i = 0; i < pool->po_size; i++) {
+ kib_tx_t *tx = &tpo->tpo_tx_descs[i];
+
+ list_del(&tx->tx_list);
+ if (tx->tx_pages != NULL)
+ LIBCFS_FREE(tx->tx_pages,
+ LNET_MAX_IOV *
+ sizeof(*tx->tx_pages));
+ if (tx->tx_frags != NULL)
+ LIBCFS_FREE(tx->tx_frags,
+ IBLND_MAX_RDMA_FRAGS *
+ sizeof(*tx->tx_frags));
+ if (tx->tx_wrq != NULL)
+ LIBCFS_FREE(tx->tx_wrq,
+ (1 + IBLND_MAX_RDMA_FRAGS) *
+ sizeof(*tx->tx_wrq));
+ if (tx->tx_sge != NULL)
+ LIBCFS_FREE(tx->tx_sge,
+ (1 + IBLND_MAX_RDMA_FRAGS) *
+ sizeof(*tx->tx_sge));
+ if (tx->tx_rd != NULL)
+ LIBCFS_FREE(tx->tx_rd,
+ offsetof(kib_rdma_desc_t,
+ rd_frags[IBLND_MAX_RDMA_FRAGS]));
+ }
+
+ LIBCFS_FREE(tpo->tpo_tx_descs,
+ pool->po_size * sizeof(kib_tx_t));
+out:
+ kiblnd_fini_pool(pool);
+ LIBCFS_FREE(tpo, sizeof(kib_tx_pool_t));
+}
+
+static int kiblnd_tx_pool_size(int ncpts)
+{
+ int ntx = *kiblnd_tunables.kib_ntx / ncpts;
+
+ return max(IBLND_TX_POOL, ntx);
+}
+
+static int
+kiblnd_create_tx_pool(kib_poolset_t *ps, int size, kib_pool_t **pp_po)
+{
+ int i;
+ int npg;
+ kib_pool_t *pool;
+ kib_tx_pool_t *tpo;
+
+ LIBCFS_CPT_ALLOC(tpo, lnet_cpt_table(), ps->ps_cpt, sizeof(*tpo));
+ if (tpo == NULL) {
+ CERROR("Failed to allocate TX pool\n");
+ return -ENOMEM;
+ }
+
+ pool = &tpo->tpo_pool;
+ kiblnd_init_pool(ps, pool, size);
+ tpo->tpo_tx_descs = NULL;
+ tpo->tpo_tx_pages = NULL;
+
+ npg = (size * IBLND_MSG_SIZE + PAGE_SIZE - 1) / PAGE_SIZE;
+ if (kiblnd_alloc_pages(&tpo->tpo_tx_pages, ps->ps_cpt, npg) != 0) {
+ CERROR("Can't allocate tx pages: %d\n", npg);
+ LIBCFS_FREE(tpo, sizeof(kib_tx_pool_t));
+ return -ENOMEM;
+ }
+
+ LIBCFS_CPT_ALLOC(tpo->tpo_tx_descs, lnet_cpt_table(), ps->ps_cpt,
+ size * sizeof(kib_tx_t));
+ if (tpo->tpo_tx_descs == NULL) {
+ CERROR("Can't allocate %d tx descriptors\n", size);
+ ps->ps_pool_destroy(pool);
+ return -ENOMEM;
+ }
+
+ memset(tpo->tpo_tx_descs, 0, size * sizeof(kib_tx_t));
+
+ for (i = 0; i < size; i++) {
+ kib_tx_t *tx = &tpo->tpo_tx_descs[i];
+
+ tx->tx_pool = tpo;
+ if (ps->ps_net->ibn_fmr_ps != NULL) {
+ LIBCFS_CPT_ALLOC(tx->tx_pages,
+ lnet_cpt_table(), ps->ps_cpt,
+ LNET_MAX_IOV * sizeof(*tx->tx_pages));
+ if (tx->tx_pages == NULL)
+ break;
+ }
+
+ LIBCFS_CPT_ALLOC(tx->tx_frags, lnet_cpt_table(), ps->ps_cpt,
+ IBLND_MAX_RDMA_FRAGS * sizeof(*tx->tx_frags));
+ if (tx->tx_frags == NULL)
+ break;
+
+ sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS);
+
+ LIBCFS_CPT_ALLOC(tx->tx_wrq, lnet_cpt_table(), ps->ps_cpt,
+ (1 + IBLND_MAX_RDMA_FRAGS) *
+ sizeof(*tx->tx_wrq));
+ if (tx->tx_wrq == NULL)
+ break;
+
+ LIBCFS_CPT_ALLOC(tx->tx_sge, lnet_cpt_table(), ps->ps_cpt,
+ (1 + IBLND_MAX_RDMA_FRAGS) *
+ sizeof(*tx->tx_sge));
+ if (tx->tx_sge == NULL)
+ break;
+
+ LIBCFS_CPT_ALLOC(tx->tx_rd, lnet_cpt_table(), ps->ps_cpt,
+ offsetof(kib_rdma_desc_t,
+ rd_frags[IBLND_MAX_RDMA_FRAGS]));
+ if (tx->tx_rd == NULL)
+ break;
+ }
+
+ if (i == size) {
+ kiblnd_map_tx_pool(tpo);
+ *pp_po = pool;
+ return 0;
+ }
+
+ ps->ps_pool_destroy(pool);
+ return -ENOMEM;
+}
+
+static void
+kiblnd_tx_init(kib_pool_t *pool, struct list_head *node)
+{
+ kib_tx_poolset_t *tps = container_of(pool->po_owner, kib_tx_poolset_t,
+ tps_poolset);
+ kib_tx_t *tx = list_entry(node, kib_tx_t, tx_list);
+
+ tx->tx_cookie = tps->tps_next_tx_cookie ++;
+}
+
+void
+kiblnd_net_fini_pools(kib_net_t *net)
+{
+ int i;
+
+ cfs_cpt_for_each(i, lnet_cpt_table()) {
+ kib_tx_poolset_t *tps;
+ kib_fmr_poolset_t *fps;
+ kib_pmr_poolset_t *pps;
+
+ if (net->ibn_tx_ps != NULL) {
+ tps = net->ibn_tx_ps[i];
+ kiblnd_fini_poolset(&tps->tps_poolset);
+ }
+
+ if (net->ibn_fmr_ps != NULL) {
+ fps = net->ibn_fmr_ps[i];
+ kiblnd_fini_fmr_poolset(fps);
+ }
+
+ if (net->ibn_pmr_ps != NULL) {
+ pps = net->ibn_pmr_ps[i];
+ kiblnd_fini_poolset(&pps->pps_poolset);
+ }
+ }
+
+ if (net->ibn_tx_ps != NULL) {
+ cfs_percpt_free(net->ibn_tx_ps);
+ net->ibn_tx_ps = NULL;
+ }
+
+ if (net->ibn_fmr_ps != NULL) {
+ cfs_percpt_free(net->ibn_fmr_ps);
+ net->ibn_fmr_ps = NULL;
+ }
+
+ if (net->ibn_pmr_ps != NULL) {
+ cfs_percpt_free(net->ibn_pmr_ps);
+ net->ibn_pmr_ps = NULL;
+ }
+}
+
+int
+kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
+{
+ unsigned long flags;
+ int cpt;
+ int rc;
+ int i;
+
+ read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+ if (*kiblnd_tunables.kib_map_on_demand == 0 &&
+ net->ibn_dev->ibd_hdev->ibh_nmrs == 1) {
+ read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
+ flags);
+ goto create_tx_pool;
+ }
+
+ read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ if (*kiblnd_tunables.kib_fmr_pool_size <
+ *kiblnd_tunables.kib_ntx / 4) {
+ CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n",
+ *kiblnd_tunables.kib_fmr_pool_size,
+ *kiblnd_tunables.kib_ntx / 4);
+ rc = -EINVAL;
+ goto failed;
+ }
+
+ /* TX pool must be created later than FMR/PMR, see LU-2268
+ * for details */
+ LASSERT(net->ibn_tx_ps == NULL);
+
+ /* premapping can fail if ibd_nmr > 1, so we always create
+ * FMR/PMR pool and map-on-demand if premapping failed */
+
+ net->ibn_fmr_ps = cfs_percpt_alloc(lnet_cpt_table(),
+ sizeof(kib_fmr_poolset_t));
+ if (net->ibn_fmr_ps == NULL) {
+ CERROR("Failed to allocate FMR pool array\n");
+ rc = -ENOMEM;
+ goto failed;
+ }
+
+ for (i = 0; i < ncpts; i++) {
+ cpt = (cpts == NULL) ? i : cpts[i];
+ rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, net,
+ kiblnd_fmr_pool_size(ncpts),
+ kiblnd_fmr_flush_trigger(ncpts));
+ if (rc == -ENOSYS && i == 0) /* no FMR */
+ break; /* create PMR pool */
+
+ if (rc != 0) { /* a real error */
+ CERROR("Can't initialize FMR pool for CPT %d: %d\n",
+ cpt, rc);
+ goto failed;
+ }
+ }
+
+ if (i > 0) {
+ LASSERT(i == ncpts);
+ goto create_tx_pool;
+ }
+
+ cfs_percpt_free(net->ibn_fmr_ps);
+ net->ibn_fmr_ps = NULL;
+
+ CWARN("Device does not support FMR, failing back to PMR\n");
+
+ if (*kiblnd_tunables.kib_pmr_pool_size <
+ *kiblnd_tunables.kib_ntx / 4) {
+ CERROR("Can't set pmr pool size (%d) < ntx / 4(%d)\n",
+ *kiblnd_tunables.kib_pmr_pool_size,
+ *kiblnd_tunables.kib_ntx / 4);
+ rc = -EINVAL;
+ goto failed;
+ }
+
+ net->ibn_pmr_ps = cfs_percpt_alloc(lnet_cpt_table(),
+ sizeof(kib_pmr_poolset_t));
+ if (net->ibn_pmr_ps == NULL) {
+ CERROR("Failed to allocate PMR pool array\n");
+ rc = -ENOMEM;
+ goto failed;
+ }
+
+ for (i = 0; i < ncpts; i++) {
+ cpt = (cpts == NULL) ? i : cpts[i];
+ rc = kiblnd_init_poolset(&net->ibn_pmr_ps[cpt]->pps_poolset,
+ cpt, net, "PMR",
+ kiblnd_pmr_pool_size(ncpts),
+ kiblnd_create_pmr_pool,
+ kiblnd_destroy_pmr_pool, NULL, NULL);
+ if (rc != 0) {
+ CERROR("Can't initialize PMR pool for CPT %d: %d\n",
+ cpt, rc);
+ goto failed;
+ }
+ }
+
+ create_tx_pool:
+ net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(),
+ sizeof(kib_tx_poolset_t));
+ if (net->ibn_tx_ps == NULL) {
+ CERROR("Failed to allocate tx pool array\n");
+ rc = -ENOMEM;
+ goto failed;
+ }
+
+ for (i = 0; i < ncpts; i++) {
+ cpt = (cpts == NULL) ? i : cpts[i];
+ rc = kiblnd_init_poolset(&net->ibn_tx_ps[cpt]->tps_poolset,
+ cpt, net, "TX",
+ kiblnd_tx_pool_size(ncpts),
+ kiblnd_create_tx_pool,
+ kiblnd_destroy_tx_pool,
+ kiblnd_tx_init, NULL);
+ if (rc != 0) {
+ CERROR("Can't initialize TX pool for CPT %d: %d\n",
+ cpt, rc);
+ goto failed;
+ }
+ }
+
+ return 0;
+ failed:
+ kiblnd_net_fini_pools(net);
+ LASSERT(rc != 0);
+ return rc;
+}
+
+static int
+kiblnd_hdev_get_attr(kib_hca_dev_t *hdev)
+{
+ struct ib_device_attr *attr;
+ int rc;
+
+ /* It's safe to assume a HCA can handle a page size
+ * matching that of the native system */
+ hdev->ibh_page_shift = PAGE_SHIFT;
+ hdev->ibh_page_size = 1 << PAGE_SHIFT;
+ hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1);
+
+ LIBCFS_ALLOC(attr, sizeof(*attr));
+ if (attr == NULL) {
+ CERROR("Out of memory\n");
+ return -ENOMEM;
+ }
+
+ rc = ib_query_device(hdev->ibh_ibdev, attr);
+ if (rc == 0)
+ hdev->ibh_mr_size = attr->max_mr_size;
+
+ LIBCFS_FREE(attr, sizeof(*attr));
+
+ if (rc != 0) {
+ CERROR("Failed to query IB device: %d\n", rc);
+ return rc;
+ }
+
+ if (hdev->ibh_mr_size == ~0ULL) {
+ hdev->ibh_mr_shift = 64;
+ return 0;
+ }
+
+ for (hdev->ibh_mr_shift = 0;
+ hdev->ibh_mr_shift < 64; hdev->ibh_mr_shift ++) {
+ if (hdev->ibh_mr_size == (1ULL << hdev->ibh_mr_shift) ||
+ hdev->ibh_mr_size == (1ULL << hdev->ibh_mr_shift) - 1)
+ return 0;
+ }
+
+ CERROR("Invalid mr size: "LPX64"\n", hdev->ibh_mr_size);
+ return -EINVAL;
+}
+
+void
+kiblnd_hdev_cleanup_mrs(kib_hca_dev_t *hdev)
+{
+ int i;
+
+ if (hdev->ibh_nmrs == 0 || hdev->ibh_mrs == NULL)
+ return;
+
+ for (i = 0; i < hdev->ibh_nmrs; i++) {
+ if (hdev->ibh_mrs[i] == NULL)
+ break;
+
+ ib_dereg_mr(hdev->ibh_mrs[i]);
+ }
+
+ LIBCFS_FREE(hdev->ibh_mrs, sizeof(*hdev->ibh_mrs) * hdev->ibh_nmrs);
+ hdev->ibh_mrs = NULL;
+ hdev->ibh_nmrs = 0;
+}
+
+void
+kiblnd_hdev_destroy(kib_hca_dev_t *hdev)
+{
+ kiblnd_hdev_cleanup_mrs(hdev);
+
+ if (hdev->ibh_pd != NULL)
+ ib_dealloc_pd(hdev->ibh_pd);
+
+ if (hdev->ibh_cmid != NULL)
+ rdma_destroy_id(hdev->ibh_cmid);
+
+ LIBCFS_FREE(hdev, sizeof(*hdev));
+}
+
+int
+kiblnd_hdev_setup_mrs(kib_hca_dev_t *hdev)
+{
+ struct ib_mr *mr;
+ int i;
+ int rc;
+ __u64 mm_size;
+ __u64 mr_size;
+ int acflags = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE;
+
+ rc = kiblnd_hdev_get_attr(hdev);
+ if (rc != 0)
+ return rc;
+
+ if (hdev->ibh_mr_shift == 64) {
+ LIBCFS_ALLOC(hdev->ibh_mrs, 1 * sizeof(*hdev->ibh_mrs));
+ if (hdev->ibh_mrs == NULL) {
+ CERROR("Failed to allocate MRs table\n");
+ return -ENOMEM;
+ }
+
+ hdev->ibh_mrs[0] = NULL;
+ hdev->ibh_nmrs = 1;
+
+ mr = ib_get_dma_mr(hdev->ibh_pd, acflags);
+ if (IS_ERR(mr)) {
+ CERROR("Failed ib_get_dma_mr : %ld\n", PTR_ERR(mr));
+ kiblnd_hdev_cleanup_mrs(hdev);
+ return PTR_ERR(mr);
+ }
+
+ hdev->ibh_mrs[0] = mr;
+
+ goto out;
+ }
+
+ mr_size = (1ULL << hdev->ibh_mr_shift);
+ mm_size = (unsigned long)high_memory - PAGE_OFFSET;
+
+ hdev->ibh_nmrs = (int)((mm_size + mr_size - 1) >> hdev->ibh_mr_shift);
+
+ if (hdev->ibh_mr_shift < 32 || hdev->ibh_nmrs > 1024) {
+ /* it's 4T..., assume we will re-code at that time */
+ CERROR("Can't support memory size: x"LPX64
+ " with MR size: x"LPX64"\n", mm_size, mr_size);
+ return -EINVAL;
+ }
+
+ /* create an array of MRs to cover all memory */
+ LIBCFS_ALLOC(hdev->ibh_mrs, sizeof(*hdev->ibh_mrs) * hdev->ibh_nmrs);
+ if (hdev->ibh_mrs == NULL) {
+ CERROR("Failed to allocate MRs' table\n");
+ return -ENOMEM;
+ }
+
+ memset(hdev->ibh_mrs, 0, sizeof(*hdev->ibh_mrs) * hdev->ibh_nmrs);
+
+ for (i = 0; i < hdev->ibh_nmrs; i++) {
+ struct ib_phys_buf ipb;
+ __u64 iova;
+
+ ipb.size = hdev->ibh_mr_size;
+ ipb.addr = i * mr_size;
+ iova = ipb.addr;
+
+ mr = ib_reg_phys_mr(hdev->ibh_pd, &ipb, 1, acflags, &iova);
+ if (IS_ERR(mr)) {
+ CERROR("Failed ib_reg_phys_mr addr "LPX64
+ " size "LPX64" : %ld\n",
+ ipb.addr, ipb.size, PTR_ERR(mr));
+ kiblnd_hdev_cleanup_mrs(hdev);
+ return PTR_ERR(mr);
+ }
+
+ LASSERT (iova == ipb.addr);
+
+ hdev->ibh_mrs[i] = mr;
+ }
+
+out:
+ if (hdev->ibh_mr_size != ~0ULL || hdev->ibh_nmrs != 1)
+ LCONSOLE_INFO("Register global MR array, MR size: "
+ LPX64", array size: %d\n",
+ hdev->ibh_mr_size, hdev->ibh_nmrs);
+ return 0;
+}
+
+static int
+kiblnd_dummy_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
+{ /* DUMMY */
+ return 0;
+}
+
+static int
+kiblnd_dev_need_failover(kib_dev_t *dev)
+{
+ struct rdma_cm_id *cmid;
+ struct sockaddr_in srcaddr;
+ struct sockaddr_in dstaddr;
+ int rc;
+
+ if (dev->ibd_hdev == NULL || /* initializing */
+ dev->ibd_hdev->ibh_cmid == NULL || /* listener is dead */
+ *kiblnd_tunables.kib_dev_failover > 1) /* debugging */
+ return 1;
+
+ /* XXX: it's UGLY, but I don't have better way to find
+ * ib-bonding HCA failover because:
+ *
+ * a. no reliable CM event for HCA failover...
+ * b. no OFED API to get ib_device for current net_device...
+ *
+ * We have only two choices at this point:
+ *
+ * a. rdma_bind_addr(), it will conflict with listener cmid
+ * b. rdma_resolve_addr() to zero addr */
+ cmid = kiblnd_rdma_create_id(kiblnd_dummy_callback, dev, RDMA_PS_TCP,
+ IB_QPT_RC);
+ if (IS_ERR(cmid)) {
+ rc = PTR_ERR(cmid);
+ CERROR("Failed to create cmid for failover: %d\n", rc);
+ return rc;
+ }
+
+ memset(&srcaddr, 0, sizeof(srcaddr));
+ srcaddr.sin_family = AF_INET;
+ srcaddr.sin_addr.s_addr = (__force u32)htonl(dev->ibd_ifip);
+
+ memset(&dstaddr, 0, sizeof(dstaddr));
+ dstaddr.sin_family = AF_INET;
+ rc = rdma_resolve_addr(cmid, (struct sockaddr *)&srcaddr,
+ (struct sockaddr *)&dstaddr, 1);
+ if (rc != 0 || cmid->device == NULL) {
+ CERROR("Failed to bind %s:%u.%u.%u.%u to device(%p): %d\n",
+ dev->ibd_ifname, HIPQUAD(dev->ibd_ifip),
+ cmid->device, rc);
+ rdma_destroy_id(cmid);
+ return rc;
+ }
+
+ if (dev->ibd_hdev->ibh_ibdev == cmid->device) {
+ /* don't need device failover */
+ rdma_destroy_id(cmid);
+ return 0;
+ }
+
+ return 1;
+}
+
+int
+kiblnd_dev_failover(kib_dev_t *dev)
+{
+ LIST_HEAD (zombie_tpo);
+ LIST_HEAD (zombie_ppo);
+ LIST_HEAD (zombie_fpo);
+ struct rdma_cm_id *cmid = NULL;
+ kib_hca_dev_t *hdev = NULL;
+ kib_hca_dev_t *old;
+ struct ib_pd *pd;
+ kib_net_t *net;
+ struct sockaddr_in addr;
+ unsigned long flags;
+ int rc = 0;
+ int i;
+
+ LASSERT (*kiblnd_tunables.kib_dev_failover > 1 ||
+ dev->ibd_can_failover ||
+ dev->ibd_hdev == NULL);
+
+ rc = kiblnd_dev_need_failover(dev);
+ if (rc <= 0)
+ goto out;
+
+ if (dev->ibd_hdev != NULL &&
+ dev->ibd_hdev->ibh_cmid != NULL) {
+ /* XXX it's not good to close old listener at here,
+ * because we can fail to create new listener.
+ * But we have to close it now, otherwise rdma_bind_addr
+ * will return EADDRINUSE... How crap! */
+ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ cmid = dev->ibd_hdev->ibh_cmid;
+ /* make next schedule of kiblnd_dev_need_failover()
+ * return 1 for me */
+ dev->ibd_hdev->ibh_cmid = NULL;
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ rdma_destroy_id(cmid);
+ }
+
+ cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, dev, RDMA_PS_TCP,
+ IB_QPT_RC);
+ if (IS_ERR(cmid)) {
+ rc = PTR_ERR(cmid);
+ CERROR("Failed to create cmid for failover: %d\n", rc);
+ goto out;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = (__force u32)htonl(dev->ibd_ifip);
+ addr.sin_port = htons(*kiblnd_tunables.kib_service);
+
+ /* Bind to failover device or port */
+ rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr);
+ if (rc != 0 || cmid->device == NULL) {
+ CERROR("Failed to bind %s:%u.%u.%u.%u to device(%p): %d\n",
+ dev->ibd_ifname, HIPQUAD(dev->ibd_ifip),
+ cmid->device, rc);
+ rdma_destroy_id(cmid);
+ goto out;
+ }
+
+ LIBCFS_ALLOC(hdev, sizeof(*hdev));
+ if (hdev == NULL) {
+ CERROR("Failed to allocate kib_hca_dev\n");
+ rdma_destroy_id(cmid);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ atomic_set(&hdev->ibh_ref, 1);
+ hdev->ibh_dev = dev;
+ hdev->ibh_cmid = cmid;
+ hdev->ibh_ibdev = cmid->device;
+
+ pd = ib_alloc_pd(cmid->device);
+ if (IS_ERR(pd)) {
+ rc = PTR_ERR(pd);
+ CERROR("Can't allocate PD: %d\n", rc);
+ goto out;
+ }
+
+ hdev->ibh_pd = pd;
+
+ rc = rdma_listen(cmid, 0);
+ if (rc != 0) {
+ CERROR("Can't start new listener: %d\n", rc);
+ goto out;
+ }
+
+ rc = kiblnd_hdev_setup_mrs(hdev);
+ if (rc != 0) {
+ CERROR("Can't setup device: %d\n", rc);
+ goto out;
+ }
+
+ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ old = dev->ibd_hdev;
+ dev->ibd_hdev = hdev; /* take over the refcount */
+ hdev = old;
+
+ list_for_each_entry(net, &dev->ibd_nets, ibn_list) {
+ cfs_cpt_for_each(i, lnet_cpt_table()) {
+ kiblnd_fail_poolset(&net->ibn_tx_ps[i]->tps_poolset,
+ &zombie_tpo);
+
+ if (net->ibn_fmr_ps != NULL) {
+ kiblnd_fail_fmr_poolset(net->ibn_fmr_ps[i],
+ &zombie_fpo);
+
+ } else if (net->ibn_pmr_ps != NULL) {
+ kiblnd_fail_poolset(&net->ibn_pmr_ps[i]->
+ pps_poolset, &zombie_ppo);
+ }
+ }
+ }
+
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+ out:
+ if (!list_empty(&zombie_tpo))
+ kiblnd_destroy_pool_list(&zombie_tpo);
+ if (!list_empty(&zombie_ppo))
+ kiblnd_destroy_pool_list(&zombie_ppo);
+ if (!list_empty(&zombie_fpo))
+ kiblnd_destroy_fmr_pool_list(&zombie_fpo);
+ if (hdev != NULL)
+ kiblnd_hdev_decref(hdev);
+
+ if (rc != 0)
+ dev->ibd_failed_failover++;
+ else
+ dev->ibd_failed_failover = 0;
+
+ return rc;
+}
+
+void
+kiblnd_destroy_dev (kib_dev_t *dev)
+{
+ LASSERT (dev->ibd_nnets == 0);
+ LASSERT (list_empty(&dev->ibd_nets));
+
+ list_del(&dev->ibd_fail_list);
+ list_del(&dev->ibd_list);
+
+ if (dev->ibd_hdev != NULL)
+ kiblnd_hdev_decref(dev->ibd_hdev);
+
+ LIBCFS_FREE(dev, sizeof(*dev));
+}
+
+kib_dev_t *
+kiblnd_create_dev(char *ifname)
+{
+ struct net_device *netdev;
+ kib_dev_t *dev;
+ __u32 netmask;
+ __u32 ip;
+ int up;
+ int rc;
+
+ rc = libcfs_ipif_query(ifname, &up, &ip, &netmask);
+ if (rc != 0) {
+ CERROR("Can't query IPoIB interface %s: %d\n",
+ ifname, rc);
+ return NULL;
+ }
+
+ if (!up) {
+ CERROR("Can't query IPoIB interface %s: it's down\n", ifname);
+ return NULL;
+ }
+
+ LIBCFS_ALLOC(dev, sizeof(*dev));
+ if (dev == NULL)
+ return NULL;
+
+ memset(dev, 0, sizeof(*dev));
+ netdev = dev_get_by_name(&init_net, ifname);
+ if (netdev == NULL) {
+ dev->ibd_can_failover = 0;
+ } else {
+ dev->ibd_can_failover = !!(netdev->flags & IFF_MASTER);
+ dev_put(netdev);
+ }
+
+ INIT_LIST_HEAD(&dev->ibd_nets);
+ INIT_LIST_HEAD(&dev->ibd_list); /* not yet in kib_devs */
+ INIT_LIST_HEAD(&dev->ibd_fail_list);
+ dev->ibd_ifip = ip;
+ strcpy(&dev->ibd_ifname[0], ifname);
+
+ /* initialize the device */
+ rc = kiblnd_dev_failover(dev);
+ if (rc != 0) {
+ CERROR("Can't initialize device: %d\n", rc);
+ LIBCFS_FREE(dev, sizeof(*dev));
+ return NULL;
+ }
+
+ list_add_tail(&dev->ibd_list,
+ &kiblnd_data.kib_devs);
+ return dev;
+}
+
+void
+kiblnd_base_shutdown(void)
+{
+ struct kib_sched_info *sched;
+ int i;
+
+ LASSERT (list_empty(&kiblnd_data.kib_devs));
+
+ CDEBUG(D_MALLOC, "before LND base cleanup: kmem %d\n",
+ atomic_read(&libcfs_kmemory));
+
+ switch (kiblnd_data.kib_init) {
+ default:
+ LBUG();
+
+ case IBLND_INIT_ALL:
+ case IBLND_INIT_DATA:
+ LASSERT (kiblnd_data.kib_peers != NULL);
+ for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
+ LASSERT (list_empty(&kiblnd_data.kib_peers[i]));
+ }
+ LASSERT (list_empty(&kiblnd_data.kib_connd_zombies));
+ LASSERT (list_empty(&kiblnd_data.kib_connd_conns));
+
+ /* flag threads to terminate; wake and wait for them to die */
+ kiblnd_data.kib_shutdown = 1;
+
+ /* NB: we really want to stop scheduler threads net by net
+ * instead of the whole module, this should be improved
+ * with dynamic configuration LNet */
+ cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds)
+ wake_up_all(&sched->ibs_waitq);
+
+ wake_up_all(&kiblnd_data.kib_connd_waitq);
+ wake_up_all(&kiblnd_data.kib_failover_waitq);
+
+ i = 2;
+ while (atomic_read(&kiblnd_data.kib_nthreads) != 0) {
+ i++;
+ CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+ "Waiting for %d threads to terminate\n",
+ atomic_read(&kiblnd_data.kib_nthreads));
+ cfs_pause(cfs_time_seconds(1));
+ }
+
+ /* fall through */
+
+ case IBLND_INIT_NOTHING:
+ break;
+ }
+
+ if (kiblnd_data.kib_peers != NULL) {
+ LIBCFS_FREE(kiblnd_data.kib_peers,
+ sizeof(struct list_head) *
+ kiblnd_data.kib_peer_hash_size);
+ }
+
+ if (kiblnd_data.kib_scheds != NULL)
+ cfs_percpt_free(kiblnd_data.kib_scheds);
+
+ CDEBUG(D_MALLOC, "after LND base cleanup: kmem %d\n",
+ atomic_read(&libcfs_kmemory));
+
+ kiblnd_data.kib_init = IBLND_INIT_NOTHING;
+ module_put(THIS_MODULE);
+}
+
+void
+kiblnd_shutdown (lnet_ni_t *ni)
+{
+ kib_net_t *net = ni->ni_data;
+ rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
+ int i;
+ unsigned long flags;
+
+ LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL);
+
+ if (net == NULL)
+ goto out;
+
+ CDEBUG(D_MALLOC, "before LND net cleanup: kmem %d\n",
+ atomic_read(&libcfs_kmemory));
+
+ write_lock_irqsave(g_lock, flags);
+ net->ibn_shutdown = 1;
+ write_unlock_irqrestore(g_lock, flags);
+
+ switch (net->ibn_init) {
+ default:
+ LBUG();
+
+ case IBLND_INIT_ALL:
+ /* nuke all existing peers within this net */
+ kiblnd_del_peer(ni, LNET_NID_ANY);
+
+ /* Wait for all peer state to clean up */
+ i = 2;
+ while (atomic_read(&net->ibn_npeers) != 0) {
+ i++;
+ CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */
+ "%s: waiting for %d peers to disconnect\n",
+ libcfs_nid2str(ni->ni_nid),
+ atomic_read(&net->ibn_npeers));
+ cfs_pause(cfs_time_seconds(1));
+ }
+
+ kiblnd_net_fini_pools(net);
+
+ write_lock_irqsave(g_lock, flags);
+ LASSERT(net->ibn_dev->ibd_nnets > 0);
+ net->ibn_dev->ibd_nnets--;
+ list_del(&net->ibn_list);
+ write_unlock_irqrestore(g_lock, flags);
+
+ /* fall through */
+
+ case IBLND_INIT_NOTHING:
+ LASSERT (atomic_read(&net->ibn_nconns) == 0);
+
+ if (net->ibn_dev != NULL &&
+ net->ibn_dev->ibd_nnets == 0)
+ kiblnd_destroy_dev(net->ibn_dev);
+
+ break;
+ }
+
+ CDEBUG(D_MALLOC, "after LND net cleanup: kmem %d\n",
+ atomic_read(&libcfs_kmemory));
+
+ net->ibn_init = IBLND_INIT_NOTHING;
+ ni->ni_data = NULL;
+
+ LIBCFS_FREE(net, sizeof(*net));
+
+out:
+ if (list_empty(&kiblnd_data.kib_devs))
+ kiblnd_base_shutdown();
+ return;
+}
+
+int
+kiblnd_base_startup(void)
+{
+ struct kib_sched_info *sched;
+ int rc;
+ int i;
+
+ LASSERT (kiblnd_data.kib_init == IBLND_INIT_NOTHING);
+
+ try_module_get(THIS_MODULE);
+ memset(&kiblnd_data, 0, sizeof(kiblnd_data)); /* zero pointers, flags etc */
+
+ rwlock_init(&kiblnd_data.kib_global_lock);
+
+ INIT_LIST_HEAD(&kiblnd_data.kib_devs);
+ INIT_LIST_HEAD(&kiblnd_data.kib_failed_devs);
+
+ kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE;
+ LIBCFS_ALLOC(kiblnd_data.kib_peers,
+ sizeof(struct list_head) *
+ kiblnd_data.kib_peer_hash_size);
+ if (kiblnd_data.kib_peers == NULL) {
+ goto failed;
+ }
+ for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++)
+ INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]);
+
+ spin_lock_init(&kiblnd_data.kib_connd_lock);
+ INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns);
+ INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies);
+ init_waitqueue_head(&kiblnd_data.kib_connd_waitq);
+ init_waitqueue_head(&kiblnd_data.kib_failover_waitq);
+
+ kiblnd_data.kib_scheds = cfs_percpt_alloc(lnet_cpt_table(),
+ sizeof(*sched));
+ if (kiblnd_data.kib_scheds == NULL)
+ goto failed;
+
+ cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) {
+ int nthrs;
+
+ spin_lock_init(&sched->ibs_lock);
+ INIT_LIST_HEAD(&sched->ibs_conns);
+ init_waitqueue_head(&sched->ibs_waitq);
+
+ nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
+ if (*kiblnd_tunables.kib_nscheds > 0) {
+ nthrs = min(nthrs, *kiblnd_tunables.kib_nscheds);
+ } else {
+ /* max to half of CPUs, another half is reserved for
+ * upper layer modules */
+ nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs);
+ }
+
+ sched->ibs_nthreads_max = nthrs;
+ sched->ibs_cpt = i;
+ }
+
+ kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR;
+
+ /* lists/ptrs/locks initialised */
+ kiblnd_data.kib_init = IBLND_INIT_DATA;
+ /*****************************************************/
+
+ rc = kiblnd_thread_start(kiblnd_connd, NULL, "kiblnd_connd");
+ if (rc != 0) {
+ CERROR("Can't spawn o2iblnd connd: %d\n", rc);
+ goto failed;
+ }
+
+ if (*kiblnd_tunables.kib_dev_failover != 0)
+ rc = kiblnd_thread_start(kiblnd_failover_thread, NULL,
+ "kiblnd_failover");
+
+ if (rc != 0) {
+ CERROR("Can't spawn o2iblnd failover thread: %d\n", rc);
+ goto failed;
+ }
+
+ /* flag everything initialised */
+ kiblnd_data.kib_init = IBLND_INIT_ALL;
+ /*****************************************************/
+
+ return 0;
+
+ failed:
+ kiblnd_base_shutdown();
+ return -ENETDOWN;
+}
+
+int
+kiblnd_start_schedulers(struct kib_sched_info *sched)
+{
+ int rc = 0;
+ int nthrs;
+ int i;
+
+ if (sched->ibs_nthreads == 0) {
+ if (*kiblnd_tunables.kib_nscheds > 0) {
+ nthrs = sched->ibs_nthreads_max;
+ } else {
+ nthrs = cfs_cpt_weight(lnet_cpt_table(),
+ sched->ibs_cpt);
+ nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs);
+ nthrs = min(IBLND_N_SCHED_HIGH, nthrs);
+ }
+ } else {
+ LASSERT(sched->ibs_nthreads <= sched->ibs_nthreads_max);
+ /* increase one thread if there is new interface */
+ nthrs = (sched->ibs_nthreads < sched->ibs_nthreads_max);
+ }
+
+ for (i = 0; i < nthrs; i++) {
+ long id;
+ char name[20];
+ id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i);
+ snprintf(name, sizeof(name), "kiblnd_sd_%02ld_%02ld",
+ KIB_THREAD_CPT(id), KIB_THREAD_TID(id));
+ rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, name);
+ if (rc == 0)
+ continue;
+
+ CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
+ sched->ibs_cpt, sched->ibs_nthreads + i, rc);
+ break;
+ }
+
+ sched->ibs_nthreads += i;
+ return rc;
+}
+
+int
+kiblnd_dev_start_threads(kib_dev_t *dev, int newdev, __u32 *cpts, int ncpts)
+{
+ int cpt;
+ int rc;
+ int i;
+
+ for (i = 0; i < ncpts; i++) {
+ struct kib_sched_info *sched;
+
+ cpt = (cpts == NULL) ? i : cpts[i];
+ sched = kiblnd_data.kib_scheds[cpt];
+
+ if (!newdev && sched->ibs_nthreads > 0)
+ continue;
+
+ rc = kiblnd_start_schedulers(kiblnd_data.kib_scheds[cpt]);
+ if (rc != 0) {
+ CERROR("Failed to start scheduler threads for %s\n",
+ dev->ibd_ifname);
+ return rc;
+ }
+ }
+ return 0;
+}
+
+kib_dev_t *
+kiblnd_dev_search(char *ifname)
+{
+ kib_dev_t *alias = NULL;
+ kib_dev_t *dev;
+ char *colon;
+ char *colon2;
+
+ colon = strchr(ifname, ':');
+ list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) {
+ if (strcmp(&dev->ibd_ifname[0], ifname) == 0)
+ return dev;
+
+ if (alias != NULL)
+ continue;
+
+ colon2 = strchr(dev->ibd_ifname, ':');
+ if (colon != NULL)
+ *colon = 0;
+ if (colon2 != NULL)
+ *colon2 = 0;
+
+ if (strcmp(&dev->ibd_ifname[0], ifname) == 0)
+ alias = dev;
+
+ if (colon != NULL)
+ *colon = ':';
+ if (colon2 != NULL)
+ *colon2 = ':';
+ }
+ return alias;
+}
+
+int
+kiblnd_startup (lnet_ni_t *ni)
+{
+ char *ifname;
+ kib_dev_t *ibdev = NULL;
+ kib_net_t *net;
+ struct timeval tv;
+ unsigned long flags;
+ int rc;
+ int newdev;
+
+ LASSERT (ni->ni_lnd == &the_o2iblnd);
+
+ if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
+ rc = kiblnd_base_startup();
+ if (rc != 0)
+ return rc;
+ }
+
+ LIBCFS_ALLOC(net, sizeof(*net));
+ ni->ni_data = net;
+ if (net == NULL)
+ goto failed;
+
+ memset(net, 0, sizeof(*net));
+
+ do_gettimeofday(&tv);
+ net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
+
+ ni->ni_peertimeout = *kiblnd_tunables.kib_peertimeout;
+ ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits;
+ ni->ni_peertxcredits = *kiblnd_tunables.kib_peertxcredits;
+ ni->ni_peerrtrcredits = *kiblnd_tunables.kib_peerrtrcredits;
+
+ if (ni->ni_interfaces[0] != NULL) {
+ /* Use the IPoIB interface specified in 'networks=' */
+
+ CLASSERT (LNET_MAX_INTERFACES > 1);
+ if (ni->ni_interfaces[1] != NULL) {
+ CERROR("Multiple interfaces not supported\n");
+ goto failed;
+ }
+
+ ifname = ni->ni_interfaces[0];
+ } else {
+ ifname = *kiblnd_tunables.kib_default_ipif;
+ }
+
+ if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) {
+ CERROR("IPoIB interface name too long: %s\n", ifname);
+ goto failed;
+ }
+
+ ibdev = kiblnd_dev_search(ifname);
+
+ newdev = ibdev == NULL;
+ /* hmm...create kib_dev even for alias */
+ if (ibdev == NULL || strcmp(&ibdev->ibd_ifname[0], ifname) != 0)
+ ibdev = kiblnd_create_dev(ifname);
+
+ if (ibdev == NULL)
+ goto failed;
+
+ net->ibn_dev = ibdev;
+ ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip);
+
+ rc = kiblnd_dev_start_threads(ibdev, newdev,
+ ni->ni_cpts, ni->ni_ncpts);
+ if (rc != 0)
+ goto failed;
+
+ rc = kiblnd_net_init_pools(net, ni->ni_cpts, ni->ni_ncpts);
+ if (rc != 0) {
+ CERROR("Failed to initialize NI pools: %d\n", rc);
+ goto failed;
+ }
+
+ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+ ibdev->ibd_nnets++;
+ list_add_tail(&net->ibn_list, &ibdev->ibd_nets);
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ net->ibn_init = IBLND_INIT_ALL;
+
+ return 0;
+
+failed:
+ if (net->ibn_dev == NULL && ibdev != NULL)
+ kiblnd_destroy_dev(ibdev);
+
+ kiblnd_shutdown(ni);
+
+ CDEBUG(D_NET, "kiblnd_startup failed\n");
+ return -ENETDOWN;
+}
+
+void __exit
+kiblnd_module_fini (void)
+{
+ lnet_unregister_lnd(&the_o2iblnd);
+ kiblnd_tunables_fini();
+}
+
+int __init
+kiblnd_module_init (void)
+{
+ int rc;
+
+ CLASSERT (sizeof(kib_msg_t) <= IBLND_MSG_SIZE);
+ CLASSERT (offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
+ <= IBLND_MSG_SIZE);
+ CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
+ <= IBLND_MSG_SIZE);
+
+ rc = kiblnd_tunables_init();
+ if (rc != 0)
+ return rc;
+
+ lnet_register_lnd(&the_o2iblnd);
+
+ return 0;
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Kernel OpenIB gen2 LND v2.00");
+MODULE_LICENSE("GPL");
+
+module_init(kiblnd_module_init);
+module_exit(kiblnd_module_fini);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
new file mode 100644
index 000000000000..e4626bf82fc7
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -0,0 +1,1057 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/klnds/o2iblnd/o2iblnd.h
+ *
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/uio.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <linux/kmod.h>
+#include <linux/sysctl.h>
+#include <linux/pci.h>
+
+#include <net/sock.h>
+#include <linux/in.h>
+
+#define DEBUG_SUBSYSTEM S_LND
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/lnet.h>
+#include <linux/lnet/lib-lnet.h>
+#include <linux/lnet/lnet-sysctl.h>
+
+#include <rdma/rdma_cm.h>
+#include <rdma/ib_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_fmr_pool.h>
+
+#define IBLND_PEER_HASH_SIZE 101 /* # peer lists */
+/* # scheduler loops before reschedule */
+#define IBLND_RESCHED 100
+
+#define IBLND_N_SCHED 2
+#define IBLND_N_SCHED_HIGH 4
+
+typedef struct
+{
+ int *kib_dev_failover; /* HCA failover */
+ unsigned int *kib_service; /* IB service number */
+ int *kib_min_reconnect_interval; /* first failed connection retry... */
+ int *kib_max_reconnect_interval; /* ...exponentially increasing to this */
+ int *kib_cksum; /* checksum kib_msg_t? */
+ int *kib_timeout; /* comms timeout (seconds) */
+ int *kib_keepalive; /* keepalive timeout (seconds) */
+ int *kib_ntx; /* # tx descs */
+ int *kib_credits; /* # concurrent sends */
+ int *kib_peertxcredits; /* # concurrent sends to 1 peer */
+ int *kib_peerrtrcredits; /* # per-peer router buffer credits */
+ int *kib_peercredits_hiw; /* # when eagerly to return credits */
+ int *kib_peertimeout; /* seconds to consider peer dead */
+ char **kib_default_ipif; /* default IPoIB interface */
+ int *kib_retry_count;
+ int *kib_rnr_retry_count;
+ int *kib_concurrent_sends; /* send work queue sizing */
+ int *kib_ib_mtu; /* IB MTU */
+ int *kib_map_on_demand; /* map-on-demand if RD has more fragments
+ * than this value, 0 disable map-on-demand */
+ int *kib_pmr_pool_size; /* # physical MR in pool */
+ int *kib_fmr_pool_size; /* # FMRs in pool */
+ int *kib_fmr_flush_trigger; /* When to trigger FMR flush */
+ int *kib_fmr_cache; /* enable FMR pool cache? */
+#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
+ ctl_table_header_t *kib_sysctl; /* sysctl interface */
+#endif
+ int *kib_require_priv_port;/* accept only privileged ports */
+ int *kib_use_priv_port; /* use privileged port for active connect */
+ /* # threads on each CPT */
+ int *kib_nscheds;
+} kib_tunables_t;
+
+extern kib_tunables_t kiblnd_tunables;
+
+#define IBLND_MSG_QUEUE_SIZE_V1 8 /* V1 only : # messages/RDMAs in-flight */
+#define IBLND_CREDIT_HIGHWATER_V1 7 /* V1 only : when eagerly to return credits */
+
+#define IBLND_CREDITS_DEFAULT 8 /* default # of peer credits */
+#define IBLND_CREDITS_MAX ((typeof(((kib_msg_t*) 0)->ibm_credits)) - 1) /* Max # of peer credits */
+
+#define IBLND_MSG_QUEUE_SIZE(v) ((v) == IBLND_MSG_VERSION_1 ? \
+ IBLND_MSG_QUEUE_SIZE_V1 : \
+ *kiblnd_tunables.kib_peertxcredits) /* # messages/RDMAs in-flight */
+#define IBLND_CREDITS_HIGHWATER(v) ((v) == IBLND_MSG_VERSION_1 ? \
+ IBLND_CREDIT_HIGHWATER_V1 : \
+ *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */
+
+#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps, qpt)
+
+static inline int
+kiblnd_concurrent_sends_v1(void)
+{
+ if (*kiblnd_tunables.kib_concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
+ return IBLND_MSG_QUEUE_SIZE_V1 * 2;
+
+ if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
+ return IBLND_MSG_QUEUE_SIZE_V1 / 2;
+
+ return *kiblnd_tunables.kib_concurrent_sends;
+}
+
+#define IBLND_CONCURRENT_SENDS(v) ((v) == IBLND_MSG_VERSION_1 ? \
+ kiblnd_concurrent_sends_v1() : \
+ *kiblnd_tunables.kib_concurrent_sends)
+/* 2 OOB shall suffice for 1 keepalive and 1 returning credits */
+#define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1)
+#define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0)
+
+#define IBLND_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */
+#define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV /* max # of fragments supported */
+#define IBLND_CFG_RDMA_FRAGS (*kiblnd_tunables.kib_map_on_demand != 0 ? \
+ *kiblnd_tunables.kib_map_on_demand : \
+ IBLND_MAX_RDMA_FRAGS) /* max # of fragments configured by user */
+#define IBLND_RDMA_FRAGS(v) ((v) == IBLND_MSG_VERSION_1 ? \
+ IBLND_MAX_RDMA_FRAGS : IBLND_CFG_RDMA_FRAGS)
+
+/************************/
+/* derived constants... */
+/* Pools (shared by connections on each CPT) */
+/* These pools can grow at runtime, so don't need give a very large value */
+#define IBLND_TX_POOL 256
+#define IBLND_PMR_POOL 256
+#define IBLND_FMR_POOL 256
+#define IBLND_FMR_POOL_FLUSH 192
+
+/* TX messages (shared by all connections) */
+#define IBLND_TX_MSGS() (*kiblnd_tunables.kib_ntx)
+
+/* RX messages (per connection) */
+#define IBLND_RX_MSGS(v) (IBLND_MSG_QUEUE_SIZE(v) * 2 + IBLND_OOB_MSGS(v))
+#define IBLND_RX_MSG_BYTES(v) (IBLND_RX_MSGS(v) * IBLND_MSG_SIZE)
+#define IBLND_RX_MSG_PAGES(v) ((IBLND_RX_MSG_BYTES(v) + PAGE_SIZE - 1) / PAGE_SIZE)
+
+/* WRs and CQEs (per connection) */
+#define IBLND_RECV_WRS(v) IBLND_RX_MSGS(v)
+#define IBLND_SEND_WRS(v) ((IBLND_RDMA_FRAGS(v) + 1) * IBLND_CONCURRENT_SENDS(v))
+#define IBLND_CQ_ENTRIES(v) (IBLND_RECV_WRS(v) + IBLND_SEND_WRS(v))
+
+struct kib_hca_dev;
+
+/* o2iblnd can run over aliased interface */
+#ifdef IFALIASZ
+#define KIB_IFNAME_SIZE IFALIASZ
+#else
+#define KIB_IFNAME_SIZE 256
+#endif
+
+typedef struct
+{
+ struct list_head ibd_list; /* chain on kib_devs */
+ struct list_head ibd_fail_list; /* chain on kib_failed_devs */
+ __u32 ibd_ifip; /* IPoIB interface IP */
+ /** IPoIB interface name */
+ char ibd_ifname[KIB_IFNAME_SIZE];
+ int ibd_nnets; /* # nets extant */
+
+ cfs_time_t ibd_next_failover;
+ int ibd_failed_failover; /* # failover failures */
+ unsigned int ibd_failover; /* failover in progress */
+ unsigned int ibd_can_failover; /* IPoIB interface is a bonding master */
+ struct list_head ibd_nets;
+ struct kib_hca_dev *ibd_hdev;
+} kib_dev_t;
+
+typedef struct kib_hca_dev
+{
+ struct rdma_cm_id *ibh_cmid; /* listener cmid */
+ struct ib_device *ibh_ibdev; /* IB device */
+ int ibh_page_shift; /* page shift of current HCA */
+ int ibh_page_size; /* page size of current HCA */
+ __u64 ibh_page_mask; /* page mask of current HCA */
+ int ibh_mr_shift; /* bits shift of max MR size */
+ __u64 ibh_mr_size; /* size of MR */
+ int ibh_nmrs; /* # of global MRs */
+ struct ib_mr **ibh_mrs; /* global MR */
+ struct ib_pd *ibh_pd; /* PD */
+ kib_dev_t *ibh_dev; /* owner */
+ atomic_t ibh_ref; /* refcount */
+} kib_hca_dev_t;
+
+/** # of seconds to keep pool alive */
+#define IBLND_POOL_DEADLINE 300
+/** # of seconds to retry if allocation failed */
+#define IBLND_POOL_RETRY 1
+
+typedef struct
+{
+ int ibp_npages; /* # pages */
+ struct page *ibp_pages[0]; /* page array */
+} kib_pages_t;
+
+struct kib_pmr_pool;
+
+typedef struct {
+ struct list_head pmr_list; /* chain node */
+ struct ib_phys_buf *pmr_ipb; /* physical buffer */
+ struct ib_mr *pmr_mr; /* IB MR */
+ struct kib_pmr_pool *pmr_pool; /* owner of this MR */
+ __u64 pmr_iova; /* Virtual I/O address */
+ int pmr_refcount; /* reference count */
+} kib_phys_mr_t;
+
+struct kib_pool;
+struct kib_poolset;
+
+typedef int (*kib_ps_pool_create_t)(struct kib_poolset *ps,
+ int inc, struct kib_pool **pp_po);
+typedef void (*kib_ps_pool_destroy_t)(struct kib_pool *po);
+typedef void (*kib_ps_node_init_t)(struct kib_pool *po, struct list_head *node);
+typedef void (*kib_ps_node_fini_t)(struct kib_pool *po, struct list_head *node);
+
+struct kib_net;
+
+#define IBLND_POOL_NAME_LEN 32
+
+typedef struct kib_poolset
+{
+ spinlock_t ps_lock; /* serialize */
+ struct kib_net *ps_net; /* network it belongs to */
+ char ps_name[IBLND_POOL_NAME_LEN]; /* pool set name */
+ struct list_head ps_pool_list; /* list of pools */
+ struct list_head ps_failed_pool_list; /* failed pool list */
+ cfs_time_t ps_next_retry; /* time stamp for retry if failed to allocate */
+ int ps_increasing; /* is allocating new pool */
+ int ps_pool_size; /* new pool size */
+ int ps_cpt; /* CPT id */
+
+ kib_ps_pool_create_t ps_pool_create; /* create a new pool */
+ kib_ps_pool_destroy_t ps_pool_destroy; /* destroy a pool */
+ kib_ps_node_init_t ps_node_init; /* initialize new allocated node */
+ kib_ps_node_fini_t ps_node_fini; /* finalize node */
+} kib_poolset_t;
+
+typedef struct kib_pool
+{
+ struct list_head po_list; /* chain on pool list */
+ struct list_head po_free_list; /* pre-allocated node */
+ kib_poolset_t *po_owner; /* pool_set of this pool */
+ cfs_time_t po_deadline; /* deadline of this pool */
+ int po_allocated; /* # of elements in use */
+ int po_failed; /* pool is created on failed HCA */
+ int po_size; /* # of pre-allocated elements */
+} kib_pool_t;
+
+typedef struct {
+ kib_poolset_t tps_poolset; /* pool-set */
+ __u64 tps_next_tx_cookie; /* cookie of TX */
+} kib_tx_poolset_t;
+
+typedef struct {
+ kib_pool_t tpo_pool; /* pool */
+ struct kib_hca_dev *tpo_hdev; /* device for this pool */
+ struct kib_tx *tpo_tx_descs; /* all the tx descriptors */
+ kib_pages_t *tpo_tx_pages; /* premapped tx msg pages */
+} kib_tx_pool_t;
+
+typedef struct {
+ kib_poolset_t pps_poolset; /* pool-set */
+} kib_pmr_poolset_t;
+
+typedef struct kib_pmr_pool {
+ struct kib_hca_dev *ppo_hdev; /* device for this pool */
+ kib_pool_t ppo_pool; /* pool */
+} kib_pmr_pool_t;
+
+typedef struct
+{
+ spinlock_t fps_lock; /* serialize */
+ struct kib_net *fps_net; /* IB network */
+ struct list_head fps_pool_list; /* FMR pool list */
+ struct list_head fps_failed_pool_list; /* FMR pool list */
+ __u64 fps_version; /* validity stamp */
+ int fps_cpt; /* CPT id */
+ int fps_pool_size;
+ int fps_flush_trigger;
+ /* is allocating new pool */
+ int fps_increasing;
+ /* time stamp for retry if failed to allocate */
+ cfs_time_t fps_next_retry;
+} kib_fmr_poolset_t;
+
+typedef struct
+{
+ struct list_head fpo_list; /* chain on pool list */
+ struct kib_hca_dev *fpo_hdev; /* device for this pool */
+ kib_fmr_poolset_t *fpo_owner; /* owner of this pool */
+ struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */
+ cfs_time_t fpo_deadline; /* deadline of this pool */
+ int fpo_failed; /* fmr pool is failed */
+ int fpo_map_count; /* # of mapped FMR */
+} kib_fmr_pool_t;
+
+typedef struct {
+ struct ib_pool_fmr *fmr_pfmr; /* IB pool fmr */
+ kib_fmr_pool_t *fmr_pool; /* pool of FMR */
+} kib_fmr_t;
+
+typedef struct kib_net
+{
+ struct list_head ibn_list; /* chain on kib_dev_t::ibd_nets */
+ __u64 ibn_incarnation; /* my epoch */
+ int ibn_init; /* initialisation state */
+ int ibn_shutdown; /* shutting down? */
+
+ atomic_t ibn_npeers; /* # peers extant */
+ atomic_t ibn_nconns; /* # connections extant */
+
+ kib_tx_poolset_t **ibn_tx_ps; /* tx pool-set */
+ kib_fmr_poolset_t **ibn_fmr_ps; /* fmr pool-set */
+ kib_pmr_poolset_t **ibn_pmr_ps; /* pmr pool-set */
+
+ kib_dev_t *ibn_dev; /* underlying IB device */
+} kib_net_t;
+
+#define KIB_THREAD_SHIFT 16
+#define KIB_THREAD_ID(cpt, tid) ((cpt) << KIB_THREAD_SHIFT | (tid))
+#define KIB_THREAD_CPT(id) ((id) >> KIB_THREAD_SHIFT)
+#define KIB_THREAD_TID(id) ((id) & ((1UL << KIB_THREAD_SHIFT) - 1))
+
+struct kib_sched_info {
+ /* serialise */
+ spinlock_t ibs_lock;
+ /* schedulers sleep here */
+ wait_queue_head_t ibs_waitq;
+ /* conns to check for rx completions */
+ struct list_head ibs_conns;
+ /* number of scheduler threads */
+ int ibs_nthreads;
+ /* max allowed scheduler threads */
+ int ibs_nthreads_max;
+ int ibs_cpt; /* CPT id */
+};
+
+typedef struct
+{
+ int kib_init; /* initialisation state */
+ int kib_shutdown; /* shut down? */
+ struct list_head kib_devs; /* IB devices extant */
+ /* list head of failed devices */
+ struct list_head kib_failed_devs;
+ /* schedulers sleep here */
+ wait_queue_head_t kib_failover_waitq;
+ atomic_t kib_nthreads; /* # live threads */
+ /* stabilize net/dev/peer/conn ops */
+ rwlock_t kib_global_lock;
+ /* hash table of all my known peers */
+ struct list_head *kib_peers;
+ /* size of kib_peers */
+ int kib_peer_hash_size;
+ /* the connd task (serialisation assertions) */
+ void *kib_connd;
+ /* connections to setup/teardown */
+ struct list_head kib_connd_conns;
+ /* connections with zero refcount */
+ struct list_head kib_connd_zombies;
+ /* connection daemon sleeps here */
+ wait_queue_head_t kib_connd_waitq;
+ spinlock_t kib_connd_lock; /* serialise */
+ struct ib_qp_attr kib_error_qpa; /* QP->ERROR */
+ /* percpt data for schedulers */
+ struct kib_sched_info **kib_scheds;
+} kib_data_t;
+
+#define IBLND_INIT_NOTHING 0
+#define IBLND_INIT_DATA 1
+#define IBLND_INIT_ALL 2
+
+/************************************************************************
+ * IB Wire message format.
+ * These are sent in sender's byte order (i.e. receiver flips).
+ */
+
+typedef struct kib_connparams
+{
+ __u16 ibcp_queue_depth;
+ __u16 ibcp_max_frags;
+ __u32 ibcp_max_msg_size;
+} WIRE_ATTR kib_connparams_t;
+
+typedef struct
+{
+ lnet_hdr_t ibim_hdr; /* portals header */
+ char ibim_payload[0]; /* piggy-backed payload */
+} WIRE_ATTR kib_immediate_msg_t;
+
+typedef struct
+{
+ __u32 rf_nob; /* # bytes this frag */
+ __u64 rf_addr; /* CAVEAT EMPTOR: misaligned!! */
+} WIRE_ATTR kib_rdma_frag_t;
+
+typedef struct
+{
+ __u32 rd_key; /* local/remote key */
+ __u32 rd_nfrags; /* # fragments */
+ kib_rdma_frag_t rd_frags[0]; /* buffer frags */
+} WIRE_ATTR kib_rdma_desc_t;
+
+typedef struct
+{
+ lnet_hdr_t ibprm_hdr; /* portals header */
+ __u64 ibprm_cookie; /* opaque completion cookie */
+} WIRE_ATTR kib_putreq_msg_t;
+
+typedef struct
+{
+ __u64 ibpam_src_cookie; /* reflected completion cookie */
+ __u64 ibpam_dst_cookie; /* opaque completion cookie */
+ kib_rdma_desc_t ibpam_rd; /* sender's sink buffer */
+} WIRE_ATTR kib_putack_msg_t;
+
+typedef struct
+{
+ lnet_hdr_t ibgm_hdr; /* portals header */
+ __u64 ibgm_cookie; /* opaque completion cookie */
+ kib_rdma_desc_t ibgm_rd; /* rdma descriptor */
+} WIRE_ATTR kib_get_msg_t;
+
+typedef struct
+{
+ __u64 ibcm_cookie; /* opaque completion cookie */
+ __s32 ibcm_status; /* < 0 failure: >= 0 length */
+} WIRE_ATTR kib_completion_msg_t;
+
+typedef struct
+{
+ /* First 2 fields fixed FOR ALL TIME */
+ __u32 ibm_magic; /* I'm an ibnal message */
+ __u16 ibm_version; /* this is my version number */
+
+ __u8 ibm_type; /* msg type */
+ __u8 ibm_credits; /* returned credits */
+ __u32 ibm_nob; /* # bytes in whole message */
+ __u32 ibm_cksum; /* checksum (0 == no checksum) */
+ __u64 ibm_srcnid; /* sender's NID */
+ __u64 ibm_srcstamp; /* sender's incarnation */
+ __u64 ibm_dstnid; /* destination's NID */
+ __u64 ibm_dststamp; /* destination's incarnation */
+
+ union {
+ kib_connparams_t connparams;
+ kib_immediate_msg_t immediate;
+ kib_putreq_msg_t putreq;
+ kib_putack_msg_t putack;
+ kib_get_msg_t get;
+ kib_completion_msg_t completion;
+ } WIRE_ATTR ibm_u;
+} WIRE_ATTR kib_msg_t;
+
+#define IBLND_MSG_MAGIC LNET_PROTO_IB_MAGIC /* unique magic */
+
+#define IBLND_MSG_VERSION_1 0x11
+#define IBLND_MSG_VERSION_2 0x12
+#define IBLND_MSG_VERSION IBLND_MSG_VERSION_2
+
+#define IBLND_MSG_CONNREQ 0xc0 /* connection request */
+#define IBLND_MSG_CONNACK 0xc1 /* connection acknowledge */
+#define IBLND_MSG_NOOP 0xd0 /* nothing (just credits) */
+#define IBLND_MSG_IMMEDIATE 0xd1 /* immediate */
+#define IBLND_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */
+#define IBLND_MSG_PUT_NAK 0xd3 /* completion (sink->src) */
+#define IBLND_MSG_PUT_ACK 0xd4 /* putack (sink->src) */
+#define IBLND_MSG_PUT_DONE 0xd5 /* completion (src->sink) */
+#define IBLND_MSG_GET_REQ 0xd6 /* getreq (sink->src) */
+#define IBLND_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */
+
+typedef struct {
+ __u32 ibr_magic; /* sender's magic */
+ __u16 ibr_version; /* sender's version */
+ __u8 ibr_why; /* reject reason */
+ __u8 ibr_padding; /* padding */
+ __u64 ibr_incarnation; /* incarnation of peer */
+ kib_connparams_t ibr_cp; /* connection parameters */
+} WIRE_ATTR kib_rej_t;
+
+/* connection rejection reasons */
+#define IBLND_REJECT_CONN_RACE 1 /* You lost connection race */
+#define IBLND_REJECT_NO_RESOURCES 2 /* Out of memory/conns etc */
+#define IBLND_REJECT_FATAL 3 /* Anything else */
+
+#define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer */
+#define IBLND_REJECT_CONN_STALE 5 /* stale peer */
+
+#define IBLND_REJECT_RDMA_FRAGS 6 /* Fatal: peer's rdma frags can't match mine */
+#define IBLND_REJECT_MSG_QUEUE_SIZE 7 /* Fatal: peer's msg queue size can't match mine */
+
+/***********************************************************************/
+
+typedef struct kib_rx /* receive message */
+{
+ struct list_head rx_list; /* queue for attention */
+ struct kib_conn *rx_conn; /* owning conn */
+ int rx_nob; /* # bytes received (-1 while posted) */
+ enum ib_wc_status rx_status; /* completion status */
+ kib_msg_t *rx_msg; /* message buffer (host vaddr) */
+ __u64 rx_msgaddr; /* message buffer (I/O addr) */
+ DECLARE_PCI_UNMAP_ADDR (rx_msgunmap); /* for dma_unmap_single() */
+ struct ib_recv_wr rx_wrq; /* receive work item... */
+ struct ib_sge rx_sge; /* ...and its memory */
+} kib_rx_t;
+
+#define IBLND_POSTRX_DONT_POST 0 /* don't post */
+#define IBLND_POSTRX_NO_CREDIT 1 /* post: no credits */
+#define IBLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */
+#define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give myself back 1 reserved credit */
+
+typedef struct kib_tx /* transmit message */
+{
+ struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */
+ kib_tx_pool_t *tx_pool; /* pool I'm from */
+ struct kib_conn *tx_conn; /* owning conn */
+ short tx_sending; /* # tx callbacks outstanding */
+ short tx_queued; /* queued for sending */
+ short tx_waiting; /* waiting for peer */
+ int tx_status; /* LNET completion status */
+ unsigned long tx_deadline; /* completion deadline */
+ __u64 tx_cookie; /* completion cookie */
+ lnet_msg_t *tx_lntmsg[2]; /* lnet msgs to finalize on completion */
+ kib_msg_t *tx_msg; /* message buffer (host vaddr) */
+ __u64 tx_msgaddr; /* message buffer (I/O addr) */
+ DECLARE_PCI_UNMAP_ADDR (tx_msgunmap); /* for dma_unmap_single() */
+ int tx_nwrq; /* # send work items */
+ struct ib_send_wr *tx_wrq; /* send work items... */
+ struct ib_sge *tx_sge; /* ...and their memory */
+ kib_rdma_desc_t *tx_rd; /* rdma descriptor */
+ int tx_nfrags; /* # entries in... */
+ struct scatterlist *tx_frags; /* dma_map_sg descriptor */
+ __u64 *tx_pages; /* rdma phys page addrs */
+ union {
+ kib_phys_mr_t *pmr; /* MR for physical buffer */
+ kib_fmr_t fmr; /* FMR */
+ } tx_u;
+ int tx_dmadir; /* dma direction */
+} kib_tx_t;
+
+typedef struct kib_connvars
+{
+ /* connection-in-progress variables */
+ kib_msg_t cv_msg;
+} kib_connvars_t;
+
+typedef struct kib_conn
+{
+ struct kib_sched_info *ibc_sched; /* scheduler information */
+ struct kib_peer *ibc_peer; /* owning peer */
+ kib_hca_dev_t *ibc_hdev; /* HCA bound on */
+ struct list_head ibc_list; /* stash on peer's conn list */
+ struct list_head ibc_sched_list; /* schedule for attention */
+ __u16 ibc_version; /* version of connection */
+ __u64 ibc_incarnation; /* which instance of the peer */
+ atomic_t ibc_refcount; /* # users */
+ int ibc_state; /* what's happening */
+ int ibc_nsends_posted; /* # uncompleted sends */
+ int ibc_noops_posted; /* # uncompleted NOOPs */
+ int ibc_credits; /* # credits I have */
+ int ibc_outstanding_credits; /* # credits to return */
+ int ibc_reserved_credits;/* # ACK/DONE msg credits */
+ int ibc_comms_error; /* set on comms error */
+ unsigned int ibc_nrx:16; /* receive buffers owned */
+ unsigned int ibc_scheduled:1; /* scheduled for attention */
+ unsigned int ibc_ready:1; /* CQ callback fired */
+ /* time of last send */
+ unsigned long ibc_last_send;
+ /** link chain for kiblnd_check_conns only */
+ struct list_head ibc_connd_list;
+ /** rxs completed before ESTABLISHED */
+ struct list_head ibc_early_rxs;
+ /** IBLND_MSG_NOOPs for IBLND_MSG_VERSION_1 */
+ struct list_head ibc_tx_noops;
+ struct list_head ibc_tx_queue; /* sends that need a credit */
+ struct list_head ibc_tx_queue_nocred;/* sends that don't need a credit */
+ struct list_head ibc_tx_queue_rsrvd; /* sends that need to reserve an ACK/DONE msg */
+ struct list_head ibc_active_txs; /* active tx awaiting completion */
+ spinlock_t ibc_lock; /* serialise */
+ kib_rx_t *ibc_rxs; /* the rx descs */
+ kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */
+
+ struct rdma_cm_id *ibc_cmid; /* CM id */
+ struct ib_cq *ibc_cq; /* completion queue */
+
+ kib_connvars_t *ibc_connvars; /* in-progress connection state */
+} kib_conn_t;
+
+#define IBLND_CONN_INIT 0 /* being initialised */
+#define IBLND_CONN_ACTIVE_CONNECT 1 /* active sending req */
+#define IBLND_CONN_PASSIVE_WAIT 2 /* passive waiting for rtu */
+#define IBLND_CONN_ESTABLISHED 3 /* connection established */
+#define IBLND_CONN_CLOSING 4 /* being closed */
+#define IBLND_CONN_DISCONNECTED 5 /* disconnected */
+
+typedef struct kib_peer
+{
+ struct list_head ibp_list; /* stash on global peer list */
+ lnet_nid_t ibp_nid; /* who's on the other end(s) */
+ lnet_ni_t *ibp_ni; /* LNet interface */
+ atomic_t ibp_refcount; /* # users */
+ struct list_head ibp_conns; /* all active connections */
+ struct list_head ibp_tx_queue; /* msgs waiting for a conn */
+ __u16 ibp_version; /* version of peer */
+ __u64 ibp_incarnation; /* incarnation of peer */
+ int ibp_connecting; /* current active connection attempts */
+ int ibp_accepting; /* current passive connection attempts */
+ int ibp_error; /* errno on closing this peer */
+ cfs_time_t ibp_last_alive; /* when (in jiffies) I was last alive */
+} kib_peer_t;
+
+extern kib_data_t kiblnd_data;
+
+extern void kiblnd_hdev_destroy(kib_hca_dev_t *hdev);
+
+static inline void
+kiblnd_hdev_addref_locked(kib_hca_dev_t *hdev)
+{
+ LASSERT (atomic_read(&hdev->ibh_ref) > 0);
+ atomic_inc(&hdev->ibh_ref);
+}
+
+static inline void
+kiblnd_hdev_decref(kib_hca_dev_t *hdev)
+{
+ LASSERT (atomic_read(&hdev->ibh_ref) > 0);
+ if (atomic_dec_and_test(&hdev->ibh_ref))
+ kiblnd_hdev_destroy(hdev);
+}
+
+static inline int
+kiblnd_dev_can_failover(kib_dev_t *dev)
+{
+ if (!list_empty(&dev->ibd_fail_list)) /* already scheduled */
+ return 0;
+
+ if (*kiblnd_tunables.kib_dev_failover == 0) /* disabled */
+ return 0;
+
+ if (*kiblnd_tunables.kib_dev_failover > 1) /* force failover */
+ return 1;
+
+ return dev->ibd_can_failover;
+}
+
+#define kiblnd_conn_addref(conn) \
+do { \
+ CDEBUG(D_NET, "conn[%p] (%d)++\n", \
+ (conn), atomic_read(&(conn)->ibc_refcount)); \
+ atomic_inc(&(conn)->ibc_refcount); \
+} while (0)
+
+#define kiblnd_conn_decref(conn) \
+do { \
+ unsigned long flags; \
+ \
+ CDEBUG(D_NET, "conn[%p] (%d)--\n", \
+ (conn), atomic_read(&(conn)->ibc_refcount)); \
+ LASSERT_ATOMIC_POS(&(conn)->ibc_refcount); \
+ if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \
+ spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); \
+ list_add_tail(&(conn)->ibc_list, \
+ &kiblnd_data.kib_connd_zombies); \
+ wake_up(&kiblnd_data.kib_connd_waitq); \
+ spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);\
+ } \
+} while (0)
+
+#define kiblnd_peer_addref(peer) \
+do { \
+ CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \
+ (peer), libcfs_nid2str((peer)->ibp_nid), \
+ atomic_read (&(peer)->ibp_refcount)); \
+ atomic_inc(&(peer)->ibp_refcount); \
+} while (0)
+
+#define kiblnd_peer_decref(peer) \
+do { \
+ CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \
+ (peer), libcfs_nid2str((peer)->ibp_nid), \
+ atomic_read (&(peer)->ibp_refcount)); \
+ LASSERT_ATOMIC_POS(&(peer)->ibp_refcount); \
+ if (atomic_dec_and_test(&(peer)->ibp_refcount)) \
+ kiblnd_destroy_peer(peer); \
+} while (0)
+
+static inline struct list_head *
+kiblnd_nid2peerlist (lnet_nid_t nid)
+{
+ unsigned int hash =
+ ((unsigned int)nid) % kiblnd_data.kib_peer_hash_size;
+
+ return (&kiblnd_data.kib_peers [hash]);
+}
+
+static inline int
+kiblnd_peer_active (kib_peer_t *peer)
+{
+ /* Am I in the peer hash table? */
+ return (!list_empty(&peer->ibp_list));
+}
+
+static inline kib_conn_t *
+kiblnd_get_conn_locked (kib_peer_t *peer)
+{
+ LASSERT (!list_empty(&peer->ibp_conns));
+
+ /* just return the first connection */
+ return list_entry(peer->ibp_conns.next, kib_conn_t, ibc_list);
+}
+
+static inline int
+kiblnd_send_keepalive(kib_conn_t *conn)
+{
+ return (*kiblnd_tunables.kib_keepalive > 0) &&
+ cfs_time_after(jiffies, conn->ibc_last_send +
+ *kiblnd_tunables.kib_keepalive*HZ);
+}
+
+static inline int
+kiblnd_need_noop(kib_conn_t *conn)
+{
+ LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED);
+
+ if (conn->ibc_outstanding_credits <
+ IBLND_CREDITS_HIGHWATER(conn->ibc_version) &&
+ !kiblnd_send_keepalive(conn))
+ return 0; /* No need to send NOOP */
+
+ if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
+ if (!list_empty(&conn->ibc_tx_queue_nocred))
+ return 0; /* NOOP can be piggybacked */
+
+ /* No tx to piggyback NOOP onto or no credit to send a tx */
+ return (list_empty(&conn->ibc_tx_queue) ||
+ conn->ibc_credits == 0);
+ }
+
+ if (!list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */
+ !list_empty(&conn->ibc_tx_queue_nocred) || /* piggyback NOOP */
+ conn->ibc_credits == 0) /* no credit */
+ return 0;
+
+ if (conn->ibc_credits == 1 && /* last credit reserved for */
+ conn->ibc_outstanding_credits == 0) /* giving back credits */
+ return 0;
+
+ /* No tx to piggyback NOOP onto or no credit to send a tx */
+ return (list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 1);
+}
+
+static inline void
+kiblnd_abort_receives(kib_conn_t *conn)
+{
+ ib_modify_qp(conn->ibc_cmid->qp,
+ &kiblnd_data.kib_error_qpa, IB_QP_STATE);
+}
+
+static inline const char *
+kiblnd_queue2str (kib_conn_t *conn, struct list_head *q)
+{
+ if (q == &conn->ibc_tx_queue)
+ return "tx_queue";
+
+ if (q == &conn->ibc_tx_queue_rsrvd)
+ return "tx_queue_rsrvd";
+
+ if (q == &conn->ibc_tx_queue_nocred)
+ return "tx_queue_nocred";
+
+ if (q == &conn->ibc_active_txs)
+ return "active_txs";
+
+ LBUG();
+ return NULL;
+}
+
+/* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the
+ * lowest bits of the work request id to stash the work item type. */
+
+#define IBLND_WID_TX 0
+#define IBLND_WID_RDMA 1
+#define IBLND_WID_RX 2
+#define IBLND_WID_MASK 3UL
+
+static inline __u64
+kiblnd_ptr2wreqid (void *ptr, int type)
+{
+ unsigned long lptr = (unsigned long)ptr;
+
+ LASSERT ((lptr & IBLND_WID_MASK) == 0);
+ LASSERT ((type & ~IBLND_WID_MASK) == 0);
+ return (__u64)(lptr | type);
+}
+
+static inline void *
+kiblnd_wreqid2ptr (__u64 wreqid)
+{
+ return (void *)(((unsigned long)wreqid) & ~IBLND_WID_MASK);
+}
+
+static inline int
+kiblnd_wreqid2type (__u64 wreqid)
+{
+ return (wreqid & IBLND_WID_MASK);
+}
+
+static inline void
+kiblnd_set_conn_state (kib_conn_t *conn, int state)
+{
+ conn->ibc_state = state;
+ mb();
+}
+
+static inline void
+kiblnd_init_msg (kib_msg_t *msg, int type, int body_nob)
+{
+ msg->ibm_type = type;
+ msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;
+}
+
+static inline int
+kiblnd_rd_size (kib_rdma_desc_t *rd)
+{
+ int i;
+ int size;
+
+ for (i = size = 0; i < rd->rd_nfrags; i++)
+ size += rd->rd_frags[i].rf_nob;
+
+ return size;
+}
+
+static inline __u64
+kiblnd_rd_frag_addr(kib_rdma_desc_t *rd, int index)
+{
+ return rd->rd_frags[index].rf_addr;
+}
+
+static inline __u32
+kiblnd_rd_frag_size(kib_rdma_desc_t *rd, int index)
+{
+ return rd->rd_frags[index].rf_nob;
+}
+
+static inline __u32
+kiblnd_rd_frag_key(kib_rdma_desc_t *rd, int index)
+{
+ return rd->rd_key;
+}
+
+static inline int
+kiblnd_rd_consume_frag(kib_rdma_desc_t *rd, int index, __u32 nob)
+{
+ if (nob < rd->rd_frags[index].rf_nob) {
+ rd->rd_frags[index].rf_addr += nob;
+ rd->rd_frags[index].rf_nob -= nob;
+ } else {
+ index ++;
+ }
+
+ return index;
+}
+
+static inline int
+kiblnd_rd_msg_size(kib_rdma_desc_t *rd, int msgtype, int n)
+{
+ LASSERT (msgtype == IBLND_MSG_GET_REQ ||
+ msgtype == IBLND_MSG_PUT_ACK);
+
+ return msgtype == IBLND_MSG_GET_REQ ?
+ offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]) :
+ offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]);
+}
+
+
+static inline __u64
+kiblnd_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+ return ib_dma_mapping_error(dev, dma_addr);
+}
+
+static inline __u64 kiblnd_dma_map_single(struct ib_device *dev,
+ void *msg, size_t size,
+ enum dma_data_direction direction)
+{
+ return ib_dma_map_single(dev, msg, size, direction);
+}
+
+static inline void kiblnd_dma_unmap_single(struct ib_device *dev,
+ __u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ ib_dma_unmap_single(dev, addr, size, direction);
+}
+
+#define KIBLND_UNMAP_ADDR_SET(p, m, a) do {} while (0)
+#define KIBLND_UNMAP_ADDR(p, m, a) (a)
+
+static inline int kiblnd_dma_map_sg(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ return ib_dma_map_sg(dev, sg, nents, direction);
+}
+
+static inline void kiblnd_dma_unmap_sg(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ ib_dma_unmap_sg(dev, sg, nents, direction);
+}
+
+static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev,
+ struct scatterlist *sg)
+{
+ return ib_sg_dma_address(dev, sg);
+}
+
+static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
+ struct scatterlist *sg)
+{
+ return ib_sg_dma_len(dev, sg);
+}
+
+/* XXX We use KIBLND_CONN_PARAM(e) as writable buffer, it's not strictly
+ * right because OFED1.2 defines it as const, to use it we have to add
+ * (void *) cast to overcome "const" */
+
+#define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data)
+#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
+
+
+struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev,
+ kib_rdma_desc_t *rd);
+struct ib_mr *kiblnd_find_dma_mr(kib_hca_dev_t *hdev,
+ __u64 addr, __u64 size);
+void kiblnd_map_rx_descs(kib_conn_t *conn);
+void kiblnd_unmap_rx_descs(kib_conn_t *conn);
+int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx,
+ kib_rdma_desc_t *rd, int nfrags);
+void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx);
+void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
+struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps);
+
+int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages,
+ int npages, __u64 iov, kib_fmr_t *fmr);
+void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);
+
+int kiblnd_pmr_pool_map(kib_pmr_poolset_t *pps, kib_hca_dev_t *hdev,
+ kib_rdma_desc_t *rd, __u64 *iova, kib_phys_mr_t **pp_pmr);
+void kiblnd_pmr_pool_unmap(kib_phys_mr_t *pmr);
+
+int kiblnd_startup (lnet_ni_t *ni);
+void kiblnd_shutdown (lnet_ni_t *ni);
+int kiblnd_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg);
+void kiblnd_query (struct lnet_ni *ni, lnet_nid_t nid, cfs_time_t *when);
+
+int kiblnd_tunables_init(void);
+void kiblnd_tunables_fini(void);
+
+int kiblnd_connd (void *arg);
+int kiblnd_scheduler(void *arg);
+int kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name);
+int kiblnd_failover_thread (void *arg);
+
+int kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages);
+void kiblnd_free_pages (kib_pages_t *p);
+
+int kiblnd_cm_callback(struct rdma_cm_id *cmid,
+ struct rdma_cm_event *event);
+int kiblnd_translate_mtu(int value);
+
+int kiblnd_dev_failover(kib_dev_t *dev);
+int kiblnd_create_peer (lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
+void kiblnd_destroy_peer (kib_peer_t *peer);
+void kiblnd_destroy_dev (kib_dev_t *dev);
+void kiblnd_unlink_peer_locked (kib_peer_t *peer);
+void kiblnd_peer_alive (kib_peer_t *peer);
+kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid);
+void kiblnd_peer_connect_failed (kib_peer_t *peer, int active, int error);
+int kiblnd_close_stale_conns_locked (kib_peer_t *peer,
+ int version, __u64 incarnation);
+int kiblnd_close_peer_conns_locked (kib_peer_t *peer, int why);
+
+void kiblnd_connreq_done(kib_conn_t *conn, int status);
+kib_conn_t *kiblnd_create_conn (kib_peer_t *peer, struct rdma_cm_id *cmid,
+ int state, int version);
+void kiblnd_destroy_conn (kib_conn_t *conn);
+void kiblnd_close_conn (kib_conn_t *conn, int error);
+void kiblnd_close_conn_locked (kib_conn_t *conn, int error);
+
+int kiblnd_init_rdma (kib_conn_t *conn, kib_tx_t *tx, int type,
+ int nob, kib_rdma_desc_t *dstrd, __u64 dstcookie);
+
+void kiblnd_launch_tx (lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid);
+void kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn);
+void kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn);
+void kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob);
+void kiblnd_txlist_done (lnet_ni_t *ni, struct list_head *txlist,
+ int status);
+void kiblnd_check_sends (kib_conn_t *conn);
+
+void kiblnd_qp_event(struct ib_event *event, void *arg);
+void kiblnd_cq_event(struct ib_event *event, void *arg);
+void kiblnd_cq_completion(struct ib_cq *cq, void *arg);
+
+void kiblnd_pack_msg (lnet_ni_t *ni, kib_msg_t *msg, int version,
+ int credits, lnet_nid_t dstnid, __u64 dststamp);
+int kiblnd_unpack_msg(kib_msg_t *msg, int nob);
+int kiblnd_post_rx (kib_rx_t *rx, int credit);
+
+int kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
+int kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
+ unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
+ unsigned int offset, unsigned int mlen, unsigned int rlen);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
new file mode 100644
index 000000000000..cc6232126dd0
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -0,0 +1,3529 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/klnds/o2iblnd/o2iblnd_cb.c
+ *
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ */
+
+#include "o2iblnd.h"
+
+void
+kiblnd_tx_done (lnet_ni_t *ni, kib_tx_t *tx)
+{
+ lnet_msg_t *lntmsg[2];
+ kib_net_t *net = ni->ni_data;
+ int rc;
+ int i;
+
+ LASSERT (net != NULL);
+ LASSERT (!in_interrupt());
+ LASSERT (!tx->tx_queued); /* mustn't be queued for sending */
+ LASSERT (tx->tx_sending == 0); /* mustn't be awaiting sent callback */
+ LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer response */
+ LASSERT (tx->tx_pool != NULL);
+
+ kiblnd_unmap_tx(ni, tx);
+
+ /* tx may have up to 2 lnet msgs to finalise */
+ lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL;
+ lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL;
+ rc = tx->tx_status;
+
+ if (tx->tx_conn != NULL) {
+ LASSERT (ni == tx->tx_conn->ibc_peer->ibp_ni);
+
+ kiblnd_conn_decref(tx->tx_conn);
+ tx->tx_conn = NULL;
+ }
+
+ tx->tx_nwrq = 0;
+ tx->tx_status = 0;
+
+ kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list);
+
+ /* delay finalize until my descs have been freed */
+ for (i = 0; i < 2; i++) {
+ if (lntmsg[i] == NULL)
+ continue;
+
+ lnet_finalize(ni, lntmsg[i], rc);
+ }
+}
+
+void
+kiblnd_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int status)
+{
+ kib_tx_t *tx;
+
+ while (!list_empty (txlist)) {
+ tx = list_entry (txlist->next, kib_tx_t, tx_list);
+
+ list_del(&tx->tx_list);
+ /* complete now */
+ tx->tx_waiting = 0;
+ tx->tx_status = status;
+ kiblnd_tx_done(ni, tx);
+ }
+}
+
+kib_tx_t *
+kiblnd_get_idle_tx(lnet_ni_t *ni, lnet_nid_t target)
+{
+ kib_net_t *net = (kib_net_t *)ni->ni_data;
+ struct list_head *node;
+ kib_tx_t *tx;
+ kib_tx_poolset_t *tps;
+
+ tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)];
+ node = kiblnd_pool_alloc_node(&tps->tps_poolset);
+ if (node == NULL)
+ return NULL;
+ tx = container_of(node, kib_tx_t, tx_list);
+
+ LASSERT (tx->tx_nwrq == 0);
+ LASSERT (!tx->tx_queued);
+ LASSERT (tx->tx_sending == 0);
+ LASSERT (!tx->tx_waiting);
+ LASSERT (tx->tx_status == 0);
+ LASSERT (tx->tx_conn == NULL);
+ LASSERT (tx->tx_lntmsg[0] == NULL);
+ LASSERT (tx->tx_lntmsg[1] == NULL);
+ LASSERT (tx->tx_u.pmr == NULL);
+ LASSERT (tx->tx_nfrags == 0);
+
+ return tx;
+}
+
+void
+kiblnd_drop_rx(kib_rx_t *rx)
+{
+ kib_conn_t *conn = rx->rx_conn;
+ struct kib_sched_info *sched = conn->ibc_sched;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sched->ibs_lock, flags);
+ LASSERT(conn->ibc_nrx > 0);
+ conn->ibc_nrx--;
+ spin_unlock_irqrestore(&sched->ibs_lock, flags);
+
+ kiblnd_conn_decref(conn);
+}
+
+int
+kiblnd_post_rx (kib_rx_t *rx, int credit)
+{
+ kib_conn_t *conn = rx->rx_conn;
+ kib_net_t *net = conn->ibc_peer->ibp_ni->ni_data;
+ struct ib_recv_wr *bad_wrq = NULL;
+ struct ib_mr *mr;
+ int rc;
+
+ LASSERT (net != NULL);
+ LASSERT (!in_interrupt());
+ LASSERT (credit == IBLND_POSTRX_NO_CREDIT ||
+ credit == IBLND_POSTRX_PEER_CREDIT ||
+ credit == IBLND_POSTRX_RSRVD_CREDIT);
+
+ mr = kiblnd_find_dma_mr(conn->ibc_hdev, rx->rx_msgaddr, IBLND_MSG_SIZE);
+ LASSERT (mr != NULL);
+
+ rx->rx_sge.lkey = mr->lkey;
+ rx->rx_sge.addr = rx->rx_msgaddr;
+ rx->rx_sge.length = IBLND_MSG_SIZE;
+
+ rx->rx_wrq.next = NULL;
+ rx->rx_wrq.sg_list = &rx->rx_sge;
+ rx->rx_wrq.num_sge = 1;
+ rx->rx_wrq.wr_id = kiblnd_ptr2wreqid(rx, IBLND_WID_RX);
+
+ LASSERT (conn->ibc_state >= IBLND_CONN_INIT);
+ LASSERT (rx->rx_nob >= 0); /* not posted */
+
+ if (conn->ibc_state > IBLND_CONN_ESTABLISHED) {
+ kiblnd_drop_rx(rx); /* No more posts for this rx */
+ return 0;
+ }
+
+ rx->rx_nob = -1; /* flag posted */
+
+ rc = ib_post_recv(conn->ibc_cmid->qp, &rx->rx_wrq, &bad_wrq);
+ if (rc != 0) {
+ CERROR("Can't post rx for %s: %d, bad_wrq: %p\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), rc, bad_wrq);
+ rx->rx_nob = 0;
+ }
+
+ if (conn->ibc_state < IBLND_CONN_ESTABLISHED) /* Initial post */
+ return rc;
+
+ if (rc != 0) {
+ kiblnd_close_conn(conn, rc);
+ kiblnd_drop_rx(rx); /* No more posts for this rx */
+ return rc;
+ }
+
+ if (credit == IBLND_POSTRX_NO_CREDIT)
+ return 0;
+
+ spin_lock(&conn->ibc_lock);
+ if (credit == IBLND_POSTRX_PEER_CREDIT)
+ conn->ibc_outstanding_credits++;
+ else
+ conn->ibc_reserved_credits++;
+ spin_unlock(&conn->ibc_lock);
+
+ kiblnd_check_sends(conn);
+ return 0;
+}
+
+kib_tx_t *
+kiblnd_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie)
+{
+ struct list_head *tmp;
+
+ list_for_each(tmp, &conn->ibc_active_txs) {
+ kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list);
+
+ LASSERT (!tx->tx_queued);
+ LASSERT (tx->tx_sending != 0 || tx->tx_waiting);
+
+ if (tx->tx_cookie != cookie)
+ continue;
+
+ if (tx->tx_waiting &&
+ tx->tx_msg->ibm_type == txtype)
+ return tx;
+
+ CWARN("Bad completion: %swaiting, type %x (wanted %x)\n",
+ tx->tx_waiting ? "" : "NOT ",
+ tx->tx_msg->ibm_type, txtype);
+ }
+ return NULL;
+}
+
+void
+kiblnd_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie)
+{
+ kib_tx_t *tx;
+ lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
+ int idle;
+
+ spin_lock(&conn->ibc_lock);
+
+ tx = kiblnd_find_waiting_tx_locked(conn, txtype, cookie);
+ if (tx == NULL) {
+ spin_unlock(&conn->ibc_lock);
+
+ CWARN("Unmatched completion type %x cookie "LPX64" from %s\n",
+ txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ kiblnd_close_conn(conn, -EPROTO);
+ return;
+ }
+
+ if (tx->tx_status == 0) { /* success so far */
+ if (status < 0) { /* failed? */
+ tx->tx_status = status;
+ } else if (txtype == IBLND_MSG_GET_REQ) {
+ lnet_set_reply_msg_len(ni, tx->tx_lntmsg[1], status);
+ }
+ }
+
+ tx->tx_waiting = 0;
+
+ idle = !tx->tx_queued && (tx->tx_sending == 0);
+ if (idle)
+ list_del(&tx->tx_list);
+
+ spin_unlock(&conn->ibc_lock);
+
+ if (idle)
+ kiblnd_tx_done(ni, tx);
+}
+
+void
+kiblnd_send_completion(kib_conn_t *conn, int type, int status, __u64 cookie)
+{
+ lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
+ kib_tx_t *tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
+
+ if (tx == NULL) {
+ CERROR("Can't get tx for completion %x for %s\n",
+ type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ return;
+ }
+
+ tx->tx_msg->ibm_u.completion.ibcm_status = status;
+ tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie;
+ kiblnd_init_tx_msg(ni, tx, type, sizeof(kib_completion_msg_t));
+
+ kiblnd_queue_tx(tx, conn);
+}
+
+void
+kiblnd_handle_rx (kib_rx_t *rx)
+{
+ kib_msg_t *msg = rx->rx_msg;
+ kib_conn_t *conn = rx->rx_conn;
+ lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
+ int credits = msg->ibm_credits;
+ kib_tx_t *tx;
+ int rc = 0;
+ int rc2;
+ int post_credit;
+
+ LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED);
+
+ CDEBUG (D_NET, "Received %x[%d] from %s\n",
+ msg->ibm_type, credits,
+ libcfs_nid2str(conn->ibc_peer->ibp_nid));
+
+ if (credits != 0) {
+ /* Have I received credits that will let me send? */
+ spin_lock(&conn->ibc_lock);
+
+ if (conn->ibc_credits + credits >
+ IBLND_MSG_QUEUE_SIZE(conn->ibc_version)) {
+ rc2 = conn->ibc_credits;
+ spin_unlock(&conn->ibc_lock);
+
+ CERROR("Bad credits from %s: %d + %d > %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid),
+ rc2, credits,
+ IBLND_MSG_QUEUE_SIZE(conn->ibc_version));
+
+ kiblnd_close_conn(conn, -EPROTO);
+ kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT);
+ return;
+ }
+
+ conn->ibc_credits += credits;
+
+ /* This ensures the credit taken by NOOP can be returned */
+ if (msg->ibm_type == IBLND_MSG_NOOP &&
+ !IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */
+ conn->ibc_outstanding_credits++;
+
+ spin_unlock(&conn->ibc_lock);
+ kiblnd_check_sends(conn);
+ }
+
+ switch (msg->ibm_type) {
+ default:
+ CERROR("Bad IBLND message type %x from %s\n",
+ msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ post_credit = IBLND_POSTRX_NO_CREDIT;
+ rc = -EPROTO;
+ break;
+
+ case IBLND_MSG_NOOP:
+ if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
+ post_credit = IBLND_POSTRX_NO_CREDIT;
+ break;
+ }
+
+ if (credits != 0) /* credit already posted */
+ post_credit = IBLND_POSTRX_NO_CREDIT;
+ else /* a keepalive NOOP */
+ post_credit = IBLND_POSTRX_PEER_CREDIT;
+ break;
+
+ case IBLND_MSG_IMMEDIATE:
+ post_credit = IBLND_POSTRX_DONT_POST;
+ rc = lnet_parse(ni, &msg->ibm_u.immediate.ibim_hdr,
+ msg->ibm_srcnid, rx, 0);
+ if (rc < 0) /* repost on error */
+ post_credit = IBLND_POSTRX_PEER_CREDIT;
+ break;
+
+ case IBLND_MSG_PUT_REQ:
+ post_credit = IBLND_POSTRX_DONT_POST;
+ rc = lnet_parse(ni, &msg->ibm_u.putreq.ibprm_hdr,
+ msg->ibm_srcnid, rx, 1);
+ if (rc < 0) /* repost on error */
+ post_credit = IBLND_POSTRX_PEER_CREDIT;
+ break;
+
+ case IBLND_MSG_PUT_NAK:
+ CWARN ("PUT_NACK from %s\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ post_credit = IBLND_POSTRX_RSRVD_CREDIT;
+ kiblnd_handle_completion(conn, IBLND_MSG_PUT_REQ,
+ msg->ibm_u.completion.ibcm_status,
+ msg->ibm_u.completion.ibcm_cookie);
+ break;
+
+ case IBLND_MSG_PUT_ACK:
+ post_credit = IBLND_POSTRX_RSRVD_CREDIT;
+
+ spin_lock(&conn->ibc_lock);
+ tx = kiblnd_find_waiting_tx_locked(conn, IBLND_MSG_PUT_REQ,
+ msg->ibm_u.putack.ibpam_src_cookie);
+ if (tx != NULL)
+ list_del(&tx->tx_list);
+ spin_unlock(&conn->ibc_lock);
+
+ if (tx == NULL) {
+ CERROR("Unmatched PUT_ACK from %s\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ rc = -EPROTO;
+ break;
+ }
+
+ LASSERT (tx->tx_waiting);
+ /* CAVEAT EMPTOR: I could be racing with tx_complete, but...
+ * (a) I can overwrite tx_msg since my peer has received it!
+ * (b) tx_waiting set tells tx_complete() it's not done. */
+
+ tx->tx_nwrq = 0; /* overwrite PUT_REQ */
+
+ rc2 = kiblnd_init_rdma(conn, tx, IBLND_MSG_PUT_DONE,
+ kiblnd_rd_size(&msg->ibm_u.putack.ibpam_rd),
+ &msg->ibm_u.putack.ibpam_rd,
+ msg->ibm_u.putack.ibpam_dst_cookie);
+ if (rc2 < 0)
+ CERROR("Can't setup rdma for PUT to %s: %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2);
+
+ spin_lock(&conn->ibc_lock);
+ tx->tx_waiting = 0; /* clear waiting and queue atomically */
+ kiblnd_queue_tx_locked(tx, conn);
+ spin_unlock(&conn->ibc_lock);
+ break;
+
+ case IBLND_MSG_PUT_DONE:
+ post_credit = IBLND_POSTRX_PEER_CREDIT;
+ kiblnd_handle_completion(conn, IBLND_MSG_PUT_ACK,
+ msg->ibm_u.completion.ibcm_status,
+ msg->ibm_u.completion.ibcm_cookie);
+ break;
+
+ case IBLND_MSG_GET_REQ:
+ post_credit = IBLND_POSTRX_DONT_POST;
+ rc = lnet_parse(ni, &msg->ibm_u.get.ibgm_hdr,
+ msg->ibm_srcnid, rx, 1);
+ if (rc < 0) /* repost on error */
+ post_credit = IBLND_POSTRX_PEER_CREDIT;
+ break;
+
+ case IBLND_MSG_GET_DONE:
+ post_credit = IBLND_POSTRX_RSRVD_CREDIT;
+ kiblnd_handle_completion(conn, IBLND_MSG_GET_REQ,
+ msg->ibm_u.completion.ibcm_status,
+ msg->ibm_u.completion.ibcm_cookie);
+ break;
+ }
+
+ if (rc < 0) /* protocol error */
+ kiblnd_close_conn(conn, rc);
+
+ if (post_credit != IBLND_POSTRX_DONT_POST)
+ kiblnd_post_rx(rx, post_credit);
+}
+
+void
+kiblnd_rx_complete (kib_rx_t *rx, int status, int nob)
+{
+ kib_msg_t *msg = rx->rx_msg;
+ kib_conn_t *conn = rx->rx_conn;
+ lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
+ kib_net_t *net = ni->ni_data;
+ int rc;
+ int err = -EIO;
+
+ LASSERT (net != NULL);
+ LASSERT (rx->rx_nob < 0); /* was posted */
+ rx->rx_nob = 0; /* isn't now */
+
+ if (conn->ibc_state > IBLND_CONN_ESTABLISHED)
+ goto ignore;
+
+ if (status != IB_WC_SUCCESS) {
+ CNETERR("Rx from %s failed: %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), status);
+ goto failed;
+ }
+
+ LASSERT (nob >= 0);
+ rx->rx_nob = nob;
+
+ rc = kiblnd_unpack_msg(msg, rx->rx_nob);
+ if (rc != 0) {
+ CERROR ("Error %d unpacking rx from %s\n",
+ rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ goto failed;
+ }
+
+ if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid ||
+ msg->ibm_dstnid != ni->ni_nid ||
+ msg->ibm_srcstamp != conn->ibc_incarnation ||
+ msg->ibm_dststamp != net->ibn_incarnation) {
+ CERROR ("Stale rx from %s\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ err = -ESTALE;
+ goto failed;
+ }
+
+ /* set time last known alive */
+ kiblnd_peer_alive(conn->ibc_peer);
+
+ /* racing with connection establishment/teardown! */
+
+ if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
+ rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
+ unsigned long flags;
+
+ write_lock_irqsave(g_lock, flags);
+ /* must check holding global lock to eliminate race */
+ if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
+ list_add_tail(&rx->rx_list, &conn->ibc_early_rxs);
+ write_unlock_irqrestore(g_lock, flags);
+ return;
+ }
+ write_unlock_irqrestore(g_lock, flags);
+ }
+ kiblnd_handle_rx(rx);
+ return;
+
+ failed:
+ CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
+ kiblnd_close_conn(conn, err);
+ ignore:
+ kiblnd_drop_rx(rx); /* Don't re-post rx. */
+}
+
+struct page *
+kiblnd_kvaddr_to_page (unsigned long vaddr)
+{
+ struct page *page;
+
+ if (vaddr >= VMALLOC_START &&
+ vaddr < VMALLOC_END) {
+ page = vmalloc_to_page ((void *)vaddr);
+ LASSERT (page != NULL);
+ return page;
+ }
+#ifdef CONFIG_HIGHMEM
+ if (vaddr >= PKMAP_BASE &&
+ vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) {
+ /* No highmem pages only used for bulk (kiov) I/O */
+ CERROR("find page for address in highmem\n");
+ LBUG();
+ }
+#endif
+ page = virt_to_page (vaddr);
+ LASSERT (page != NULL);
+ return page;
+}
+
+static int
+kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob)
+{
+ kib_hca_dev_t *hdev;
+ __u64 *pages = tx->tx_pages;
+ kib_fmr_poolset_t *fps;
+ int npages;
+ int size;
+ int cpt;
+ int rc;
+ int i;
+
+ LASSERT(tx->tx_pool != NULL);
+ LASSERT(tx->tx_pool->tpo_pool.po_owner != NULL);
+
+ hdev = tx->tx_pool->tpo_hdev;
+
+ for (i = 0, npages = 0; i < rd->rd_nfrags; i++) {
+ for (size = 0; size < rd->rd_frags[i].rf_nob;
+ size += hdev->ibh_page_size) {
+ pages[npages ++] = (rd->rd_frags[i].rf_addr &
+ hdev->ibh_page_mask) + size;
+ }
+ }
+
+ cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt;
+
+ fps = net->ibn_fmr_ps[cpt];
+ rc = kiblnd_fmr_pool_map(fps, pages, npages, 0, &tx->tx_u.fmr);
+ if (rc != 0) {
+ CERROR ("Can't map %d pages: %d\n", npages, rc);
+ return rc;
+ }
+
+ /* If rd is not tx_rd, it's going to get sent to a peer, who will need
+ * the rkey */
+ rd->rd_key = (rd != tx->tx_rd) ? tx->tx_u.fmr.fmr_pfmr->fmr->rkey :
+ tx->tx_u.fmr.fmr_pfmr->fmr->lkey;
+ rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask;
+ rd->rd_frags[0].rf_nob = nob;
+ rd->rd_nfrags = 1;
+
+ return 0;
+}
+
+static int
+kiblnd_pmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob)
+{
+ kib_hca_dev_t *hdev;
+ kib_pmr_poolset_t *pps;
+ __u64 iova;
+ int cpt;
+ int rc;
+
+ LASSERT(tx->tx_pool != NULL);
+ LASSERT(tx->tx_pool->tpo_pool.po_owner != NULL);
+
+ hdev = tx->tx_pool->tpo_hdev;
+
+ iova = rd->rd_frags[0].rf_addr & ~hdev->ibh_page_mask;
+
+ cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt;
+
+ pps = net->ibn_pmr_ps[cpt];
+ rc = kiblnd_pmr_pool_map(pps, hdev, rd, &iova, &tx->tx_u.pmr);
+ if (rc != 0) {
+ CERROR("Failed to create MR by phybuf: %d\n", rc);
+ return rc;
+ }
+
+ /* If rd is not tx_rd, it's going to get sent to a peer, who will need
+ * the rkey */
+ rd->rd_key = (rd != tx->tx_rd) ? tx->tx_u.pmr->pmr_mr->rkey :
+ tx->tx_u.pmr->pmr_mr->lkey;
+ rd->rd_nfrags = 1;
+ rd->rd_frags[0].rf_addr = iova;
+ rd->rd_frags[0].rf_nob = nob;
+
+ return 0;
+}
+
+void
+kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx)
+{
+ kib_net_t *net = ni->ni_data;
+
+ LASSERT(net != NULL);
+
+ if (net->ibn_fmr_ps != NULL && tx->tx_u.fmr.fmr_pfmr != NULL) {
+ kiblnd_fmr_pool_unmap(&tx->tx_u.fmr, tx->tx_status);
+ tx->tx_u.fmr.fmr_pfmr = NULL;
+
+ } else if (net->ibn_pmr_ps != NULL && tx->tx_u.pmr != NULL) {
+ kiblnd_pmr_pool_unmap(tx->tx_u.pmr);
+ tx->tx_u.pmr = NULL;
+ }
+
+ if (tx->tx_nfrags != 0) {
+ kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev,
+ tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir);
+ tx->tx_nfrags = 0;
+ }
+}
+
+int
+kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx,
+ kib_rdma_desc_t *rd, int nfrags)
+{
+ kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev;
+ kib_net_t *net = ni->ni_data;
+ struct ib_mr *mr = NULL;
+ __u32 nob;
+ int i;
+
+ /* If rd is not tx_rd, it's going to get sent to a peer and I'm the
+ * RDMA sink */
+ tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+ tx->tx_nfrags = nfrags;
+
+ rd->rd_nfrags =
+ kiblnd_dma_map_sg(hdev->ibh_ibdev,
+ tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir);
+
+ for (i = 0, nob = 0; i < rd->rd_nfrags; i++) {
+ rd->rd_frags[i].rf_nob = kiblnd_sg_dma_len(
+ hdev->ibh_ibdev, &tx->tx_frags[i]);
+ rd->rd_frags[i].rf_addr = kiblnd_sg_dma_address(
+ hdev->ibh_ibdev, &tx->tx_frags[i]);
+ nob += rd->rd_frags[i].rf_nob;
+ }
+
+ /* looking for pre-mapping MR */
+ mr = kiblnd_find_rd_dma_mr(hdev, rd);
+ if (mr != NULL) {
+ /* found pre-mapping MR */
+ rd->rd_key = (rd != tx->tx_rd) ? mr->rkey : mr->lkey;
+ return 0;
+ }
+
+ if (net->ibn_fmr_ps != NULL)
+ return kiblnd_fmr_map_tx(net, tx, rd, nob);
+ else if (net->ibn_pmr_ps != NULL)
+ return kiblnd_pmr_map_tx(net, tx, rd, nob);
+
+ return -EINVAL;
+}
+
+
+int
+kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
+ unsigned int niov, struct iovec *iov, int offset, int nob)
+{
+ kib_net_t *net = ni->ni_data;
+ struct page *page;
+ struct scatterlist *sg;
+ unsigned long vaddr;
+ int fragnob;
+ int page_offset;
+
+ LASSERT (nob > 0);
+ LASSERT (niov > 0);
+ LASSERT (net != NULL);
+
+ while (offset >= iov->iov_len) {
+ offset -= iov->iov_len;
+ niov--;
+ iov++;
+ LASSERT (niov > 0);
+ }
+
+ sg = tx->tx_frags;
+ do {
+ LASSERT (niov > 0);
+
+ vaddr = ((unsigned long)iov->iov_base) + offset;
+ page_offset = vaddr & (PAGE_SIZE - 1);
+ page = kiblnd_kvaddr_to_page(vaddr);
+ if (page == NULL) {
+ CERROR ("Can't find page\n");
+ return -EFAULT;
+ }
+
+ fragnob = min((int)(iov->iov_len - offset), nob);
+ fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
+
+ sg_set_page(sg, page, fragnob, page_offset);
+ sg++;
+
+ if (offset + fragnob < iov->iov_len) {
+ offset += fragnob;
+ } else {
+ offset = 0;
+ iov++;
+ niov--;
+ }
+ nob -= fragnob;
+ } while (nob > 0);
+
+ return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
+}
+
+int
+kiblnd_setup_rd_kiov (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
+ int nkiov, lnet_kiov_t *kiov, int offset, int nob)
+{
+ kib_net_t *net = ni->ni_data;
+ struct scatterlist *sg;
+ int fragnob;
+
+ CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
+
+ LASSERT (nob > 0);
+ LASSERT (nkiov > 0);
+ LASSERT (net != NULL);
+
+ while (offset >= kiov->kiov_len) {
+ offset -= kiov->kiov_len;
+ nkiov--;
+ kiov++;
+ LASSERT (nkiov > 0);
+ }
+
+ sg = tx->tx_frags;
+ do {
+ LASSERT (nkiov > 0);
+
+ fragnob = min((int)(kiov->kiov_len - offset), nob);
+
+ sg_set_page(sg, kiov->kiov_page, fragnob,
+ kiov->kiov_offset + offset);
+ sg++;
+
+ offset = 0;
+ kiov++;
+ nkiov--;
+ nob -= fragnob;
+ } while (nob > 0);
+
+ return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
+}
+
+int
+kiblnd_post_tx_locked (kib_conn_t *conn, kib_tx_t *tx, int credit)
+{
+ kib_msg_t *msg = tx->tx_msg;
+ kib_peer_t *peer = conn->ibc_peer;
+ int ver = conn->ibc_version;
+ int rc;
+ int done;
+ struct ib_send_wr *bad_wrq;
+
+ LASSERT (tx->tx_queued);
+ /* We rely on this for QP sizing */
+ LASSERT (tx->tx_nwrq > 0);
+ LASSERT (tx->tx_nwrq <= 1 + IBLND_RDMA_FRAGS(ver));
+
+ LASSERT (credit == 0 || credit == 1);
+ LASSERT (conn->ibc_outstanding_credits >= 0);
+ LASSERT (conn->ibc_outstanding_credits <= IBLND_MSG_QUEUE_SIZE(ver));
+ LASSERT (conn->ibc_credits >= 0);
+ LASSERT (conn->ibc_credits <= IBLND_MSG_QUEUE_SIZE(ver));
+
+ if (conn->ibc_nsends_posted == IBLND_CONCURRENT_SENDS(ver)) {
+ /* tx completions outstanding... */
+ CDEBUG(D_NET, "%s: posted enough\n",
+ libcfs_nid2str(peer->ibp_nid));
+ return -EAGAIN;
+ }
+
+ if (credit != 0 && conn->ibc_credits == 0) { /* no credits */
+ CDEBUG(D_NET, "%s: no credits\n",
+ libcfs_nid2str(peer->ibp_nid));
+ return -EAGAIN;
+ }
+
+ if (credit != 0 && !IBLND_OOB_CAPABLE(ver) &&
+ conn->ibc_credits == 1 && /* last credit reserved */
+ msg->ibm_type != IBLND_MSG_NOOP) { /* for NOOP */
+ CDEBUG(D_NET, "%s: not using last credit\n",
+ libcfs_nid2str(peer->ibp_nid));
+ return -EAGAIN;
+ }
+
+ /* NB don't drop ibc_lock before bumping tx_sending */
+ list_del(&tx->tx_list);
+ tx->tx_queued = 0;
+
+ if (msg->ibm_type == IBLND_MSG_NOOP &&
+ (!kiblnd_need_noop(conn) || /* redundant NOOP */
+ (IBLND_OOB_CAPABLE(ver) && /* posted enough NOOP */
+ conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) {
+ /* OK to drop when posted enough NOOPs, since
+ * kiblnd_check_sends will queue NOOP again when
+ * posted NOOPs complete */
+ spin_unlock(&conn->ibc_lock);
+ kiblnd_tx_done(peer->ibp_ni, tx);
+ spin_lock(&conn->ibc_lock);
+ CDEBUG(D_NET, "%s(%d): redundant or enough NOOP\n",
+ libcfs_nid2str(peer->ibp_nid),
+ conn->ibc_noops_posted);
+ return 0;
+ }
+
+ kiblnd_pack_msg(peer->ibp_ni, msg, ver, conn->ibc_outstanding_credits,
+ peer->ibp_nid, conn->ibc_incarnation);
+
+ conn->ibc_credits -= credit;
+ conn->ibc_outstanding_credits = 0;
+ conn->ibc_nsends_posted++;
+ if (msg->ibm_type == IBLND_MSG_NOOP)
+ conn->ibc_noops_posted++;
+
+ /* CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA
+ * PUT. If so, it was first queued here as a PUT_REQ, sent and
+ * stashed on ibc_active_txs, matched by an incoming PUT_ACK,
+ * and then re-queued here. It's (just) possible that
+ * tx_sending is non-zero if we've not done the tx_complete()
+ * from the first send; hence the ++ rather than = below. */
+ tx->tx_sending++;
+ list_add(&tx->tx_list, &conn->ibc_active_txs);
+
+ /* I'm still holding ibc_lock! */
+ if (conn->ibc_state != IBLND_CONN_ESTABLISHED) {
+ rc = -ECONNABORTED;
+ } else if (tx->tx_pool->tpo_pool.po_failed ||
+ conn->ibc_hdev != tx->tx_pool->tpo_hdev) {
+ /* close_conn will launch failover */
+ rc = -ENETDOWN;
+ } else {
+ rc = ib_post_send(conn->ibc_cmid->qp,
+ tx->tx_wrq, &bad_wrq);
+ }
+
+ conn->ibc_last_send = jiffies;
+
+ if (rc == 0)
+ return 0;
+
+ /* NB credits are transferred in the actual
+ * message, which can only be the last work item */
+ conn->ibc_credits += credit;
+ conn->ibc_outstanding_credits += msg->ibm_credits;
+ conn->ibc_nsends_posted--;
+ if (msg->ibm_type == IBLND_MSG_NOOP)
+ conn->ibc_noops_posted--;
+
+ tx->tx_status = rc;
+ tx->tx_waiting = 0;
+ tx->tx_sending--;
+
+ done = (tx->tx_sending == 0);
+ if (done)
+ list_del(&tx->tx_list);
+
+ spin_unlock(&conn->ibc_lock);
+
+ if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
+ CERROR("Error %d posting transmit to %s\n",
+ rc, libcfs_nid2str(peer->ibp_nid));
+ else
+ CDEBUG(D_NET, "Error %d posting transmit to %s\n",
+ rc, libcfs_nid2str(peer->ibp_nid));
+
+ kiblnd_close_conn(conn, rc);
+
+ if (done)
+ kiblnd_tx_done(peer->ibp_ni, tx);
+
+ spin_lock(&conn->ibc_lock);
+
+ return -EIO;
+}
+
+void
+kiblnd_check_sends (kib_conn_t *conn)
+{
+ int ver = conn->ibc_version;
+ lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
+ kib_tx_t *tx;
+
+ /* Don't send anything until after the connection is established */
+ if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
+ CDEBUG(D_NET, "%s too soon\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ return;
+ }
+
+ spin_lock(&conn->ibc_lock);
+
+ LASSERT (conn->ibc_nsends_posted <= IBLND_CONCURRENT_SENDS(ver));
+ LASSERT (!IBLND_OOB_CAPABLE(ver) ||
+ conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver));
+ LASSERT (conn->ibc_reserved_credits >= 0);
+
+ while (conn->ibc_reserved_credits > 0 &&
+ !list_empty(&conn->ibc_tx_queue_rsrvd)) {
+ tx = list_entry(conn->ibc_tx_queue_rsrvd.next,
+ kib_tx_t, tx_list);
+ list_del(&tx->tx_list);
+ list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
+ conn->ibc_reserved_credits--;
+ }
+
+ if (kiblnd_need_noop(conn)) {
+ spin_unlock(&conn->ibc_lock);
+
+ tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
+ if (tx != NULL)
+ kiblnd_init_tx_msg(ni, tx, IBLND_MSG_NOOP, 0);
+
+ spin_lock(&conn->ibc_lock);
+ if (tx != NULL)
+ kiblnd_queue_tx_locked(tx, conn);
+ }
+
+ kiblnd_conn_addref(conn); /* 1 ref for me.... (see b21911) */
+
+ for (;;) {
+ int credit;
+
+ if (!list_empty(&conn->ibc_tx_queue_nocred)) {
+ credit = 0;
+ tx = list_entry(conn->ibc_tx_queue_nocred.next,
+ kib_tx_t, tx_list);
+ } else if (!list_empty(&conn->ibc_tx_noops)) {
+ LASSERT (!IBLND_OOB_CAPABLE(ver));
+ credit = 1;
+ tx = list_entry(conn->ibc_tx_noops.next,
+ kib_tx_t, tx_list);
+ } else if (!list_empty(&conn->ibc_tx_queue)) {
+ credit = 1;
+ tx = list_entry(conn->ibc_tx_queue.next,
+ kib_tx_t, tx_list);
+ } else
+ break;
+
+ if (kiblnd_post_tx_locked(conn, tx, credit) != 0)
+ break;
+ }
+
+ spin_unlock(&conn->ibc_lock);
+
+ kiblnd_conn_decref(conn); /* ...until here */
+}
+
+void
+kiblnd_tx_complete (kib_tx_t *tx, int status)
+{
+ int failed = (status != IB_WC_SUCCESS);
+ kib_conn_t *conn = tx->tx_conn;
+ int idle;
+
+ LASSERT (tx->tx_sending > 0);
+
+ if (failed) {
+ if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
+ CNETERR("Tx -> %s cookie "LPX64
+ " sending %d waiting %d: failed %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid),
+ tx->tx_cookie, tx->tx_sending, tx->tx_waiting,
+ status);
+
+ kiblnd_close_conn(conn, -EIO);
+ } else {
+ kiblnd_peer_alive(conn->ibc_peer);
+ }
+
+ spin_lock(&conn->ibc_lock);
+
+ /* I could be racing with rdma completion. Whoever makes 'tx' idle
+ * gets to free it, which also drops its ref on 'conn'. */
+
+ tx->tx_sending--;
+ conn->ibc_nsends_posted--;
+ if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP)
+ conn->ibc_noops_posted--;
+
+ if (failed) {
+ tx->tx_waiting = 0; /* don't wait for peer */
+ tx->tx_status = -EIO;
+ }
+
+ idle = (tx->tx_sending == 0) && /* This is the final callback */
+ !tx->tx_waiting && /* Not waiting for peer */
+ !tx->tx_queued; /* Not re-queued (PUT_DONE) */
+ if (idle)
+ list_del(&tx->tx_list);
+
+ kiblnd_conn_addref(conn); /* 1 ref for me.... */
+
+ spin_unlock(&conn->ibc_lock);
+
+ if (idle)
+ kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx);
+
+ kiblnd_check_sends(conn);
+
+ kiblnd_conn_decref(conn); /* ...until here */
+}
+
+void
+kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob)
+{
+ kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev;
+ struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq];
+ struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
+ int nob = offsetof (kib_msg_t, ibm_u) + body_nob;
+ struct ib_mr *mr;
+
+ LASSERT (tx->tx_nwrq >= 0);
+ LASSERT (tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1);
+ LASSERT (nob <= IBLND_MSG_SIZE);
+
+ kiblnd_init_msg(tx->tx_msg, type, body_nob);
+
+ mr = kiblnd_find_dma_mr(hdev, tx->tx_msgaddr, nob);
+ LASSERT (mr != NULL);
+
+ sge->lkey = mr->lkey;
+ sge->addr = tx->tx_msgaddr;
+ sge->length = nob;
+
+ memset(wrq, 0, sizeof(*wrq));
+
+ wrq->next = NULL;
+ wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX);
+ wrq->sg_list = sge;
+ wrq->num_sge = 1;
+ wrq->opcode = IB_WR_SEND;
+ wrq->send_flags = IB_SEND_SIGNALED;
+
+ tx->tx_nwrq++;
+}
+
+int
+kiblnd_init_rdma (kib_conn_t *conn, kib_tx_t *tx, int type,
+ int resid, kib_rdma_desc_t *dstrd, __u64 dstcookie)
+{
+ kib_msg_t *ibmsg = tx->tx_msg;
+ kib_rdma_desc_t *srcrd = tx->tx_rd;
+ struct ib_sge *sge = &tx->tx_sge[0];
+ struct ib_send_wr *wrq = &tx->tx_wrq[0];
+ int rc = resid;
+ int srcidx;
+ int dstidx;
+ int wrknob;
+
+ LASSERT (!in_interrupt());
+ LASSERT (tx->tx_nwrq == 0);
+ LASSERT (type == IBLND_MSG_GET_DONE ||
+ type == IBLND_MSG_PUT_DONE);
+
+ srcidx = dstidx = 0;
+
+ while (resid > 0) {
+ if (srcidx >= srcrd->rd_nfrags) {
+ CERROR("Src buffer exhausted: %d frags\n", srcidx);
+ rc = -EPROTO;
+ break;
+ }
+
+ if (dstidx == dstrd->rd_nfrags) {
+ CERROR("Dst buffer exhausted: %d frags\n", dstidx);
+ rc = -EPROTO;
+ break;
+ }
+
+ if (tx->tx_nwrq == IBLND_RDMA_FRAGS(conn->ibc_version)) {
+ CERROR("RDMA too fragmented for %s (%d): "
+ "%d/%d src %d/%d dst frags\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid),
+ IBLND_RDMA_FRAGS(conn->ibc_version),
+ srcidx, srcrd->rd_nfrags,
+ dstidx, dstrd->rd_nfrags);
+ rc = -EMSGSIZE;
+ break;
+ }
+
+ wrknob = MIN(MIN(kiblnd_rd_frag_size(srcrd, srcidx),
+ kiblnd_rd_frag_size(dstrd, dstidx)), resid);
+
+ sge = &tx->tx_sge[tx->tx_nwrq];
+ sge->addr = kiblnd_rd_frag_addr(srcrd, srcidx);
+ sge->lkey = kiblnd_rd_frag_key(srcrd, srcidx);
+ sge->length = wrknob;
+
+ wrq = &tx->tx_wrq[tx->tx_nwrq];
+
+ wrq->next = wrq + 1;
+ wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA);
+ wrq->sg_list = sge;
+ wrq->num_sge = 1;
+ wrq->opcode = IB_WR_RDMA_WRITE;
+ wrq->send_flags = 0;
+
+ wrq->wr.rdma.remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx);
+ wrq->wr.rdma.rkey = kiblnd_rd_frag_key(dstrd, dstidx);
+
+ srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob);
+ dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob);
+
+ resid -= wrknob;
+
+ tx->tx_nwrq++;
+ wrq++;
+ sge++;
+ }
+
+ if (rc < 0) /* no RDMA if completing with failure */
+ tx->tx_nwrq = 0;
+
+ ibmsg->ibm_u.completion.ibcm_status = rc;
+ ibmsg->ibm_u.completion.ibcm_cookie = dstcookie;
+ kiblnd_init_tx_msg(conn->ibc_peer->ibp_ni, tx,
+ type, sizeof (kib_completion_msg_t));
+
+ return rc;
+}
+
+void
+kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn)
+{
+ struct list_head *q;
+
+ LASSERT (tx->tx_nwrq > 0); /* work items set up */
+ LASSERT (!tx->tx_queued); /* not queued for sending already */
+ LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED);
+
+ tx->tx_queued = 1;
+ tx->tx_deadline = jiffies + (*kiblnd_tunables.kib_timeout * HZ);
+
+ if (tx->tx_conn == NULL) {
+ kiblnd_conn_addref(conn);
+ tx->tx_conn = conn;
+ LASSERT (tx->tx_msg->ibm_type != IBLND_MSG_PUT_DONE);
+ } else {
+ /* PUT_DONE first attached to conn as a PUT_REQ */
+ LASSERT (tx->tx_conn == conn);
+ LASSERT (tx->tx_msg->ibm_type == IBLND_MSG_PUT_DONE);
+ }
+
+ switch (tx->tx_msg->ibm_type) {
+ default:
+ LBUG();
+
+ case IBLND_MSG_PUT_REQ:
+ case IBLND_MSG_GET_REQ:
+ q = &conn->ibc_tx_queue_rsrvd;
+ break;
+
+ case IBLND_MSG_PUT_NAK:
+ case IBLND_MSG_PUT_ACK:
+ case IBLND_MSG_PUT_DONE:
+ case IBLND_MSG_GET_DONE:
+ q = &conn->ibc_tx_queue_nocred;
+ break;
+
+ case IBLND_MSG_NOOP:
+ if (IBLND_OOB_CAPABLE(conn->ibc_version))
+ q = &conn->ibc_tx_queue_nocred;
+ else
+ q = &conn->ibc_tx_noops;
+ break;
+
+ case IBLND_MSG_IMMEDIATE:
+ q = &conn->ibc_tx_queue;
+ break;
+ }
+
+ list_add_tail(&tx->tx_list, q);
+}
+
+void
+kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn)
+{
+ spin_lock(&conn->ibc_lock);
+ kiblnd_queue_tx_locked(tx, conn);
+ spin_unlock(&conn->ibc_lock);
+
+ kiblnd_check_sends(conn);
+}
+
+static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
+ struct sockaddr_in *srcaddr,
+ struct sockaddr_in *dstaddr,
+ int timeout_ms)
+{
+ unsigned short port;
+ int rc;
+
+ /* allow the port to be reused */
+ rc = rdma_set_reuseaddr(cmid, 1);
+ if (rc != 0) {
+ CERROR("Unable to set reuse on cmid: %d\n", rc);
+ return rc;
+ }
+
+ /* look for a free privileged port */
+ for (port = PROT_SOCK-1; port > 0; port--) {
+ srcaddr->sin_port = htons(port);
+ rc = rdma_resolve_addr(cmid,
+ (struct sockaddr *)srcaddr,
+ (struct sockaddr *)dstaddr,
+ timeout_ms);
+ if (rc == 0) {
+ CDEBUG(D_NET, "bound to port %hu\n", port);
+ return 0;
+ } else if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL) {
+ CDEBUG(D_NET, "bind to port %hu failed: %d\n",
+ port, rc);
+ } else {
+ return rc;
+ }
+ }
+
+ CERROR("Failed to bind to a free privileged port\n");
+ return rc;
+}
+
+void
+kiblnd_connect_peer (kib_peer_t *peer)
+{
+ struct rdma_cm_id *cmid;
+ kib_dev_t *dev;
+ kib_net_t *net = peer->ibp_ni->ni_data;
+ struct sockaddr_in srcaddr;
+ struct sockaddr_in dstaddr;
+ int rc;
+
+ LASSERT (net != NULL);
+ LASSERT (peer->ibp_connecting > 0);
+
+ cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP,
+ IB_QPT_RC);
+
+ if (IS_ERR(cmid)) {
+ CERROR("Can't create CMID for %s: %ld\n",
+ libcfs_nid2str(peer->ibp_nid), PTR_ERR(cmid));
+ rc = PTR_ERR(cmid);
+ goto failed;
+ }
+
+ dev = net->ibn_dev;
+ memset(&srcaddr, 0, sizeof(srcaddr));
+ srcaddr.sin_family = AF_INET;
+ srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip);
+
+ memset(&dstaddr, 0, sizeof(dstaddr));
+ dstaddr.sin_family = AF_INET;
+ dstaddr.sin_port = htons(*kiblnd_tunables.kib_service);
+ dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid));
+
+ kiblnd_peer_addref(peer); /* cmid's ref */
+
+ if (*kiblnd_tunables.kib_use_priv_port) {
+ rc = kiblnd_resolve_addr(cmid, &srcaddr, &dstaddr,
+ *kiblnd_tunables.kib_timeout * 1000);
+ } else {
+ rc = rdma_resolve_addr(cmid,
+ (struct sockaddr *)&srcaddr,
+ (struct sockaddr *)&dstaddr,
+ *kiblnd_tunables.kib_timeout * 1000);
+ }
+ if (rc != 0) {
+ /* Can't initiate address resolution: */
+ CERROR("Can't resolve addr for %s: %d\n",
+ libcfs_nid2str(peer->ibp_nid), rc);
+ goto failed2;
+ }
+
+ LASSERT (cmid->device != NULL);
+ CDEBUG(D_NET, "%s: connection bound to %s:%u.%u.%u.%u:%s\n",
+ libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname,
+ HIPQUAD(dev->ibd_ifip), cmid->device->name);
+
+ return;
+
+ failed2:
+ kiblnd_peer_decref(peer); /* cmid's ref */
+ rdma_destroy_id(cmid);
+ failed:
+ kiblnd_peer_connect_failed(peer, 1, rc);
+}
+
+void
+kiblnd_launch_tx (lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid)
+{
+ kib_peer_t *peer;
+ kib_peer_t *peer2;
+ kib_conn_t *conn;
+ rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
+ unsigned long flags;
+ int rc;
+
+ /* If I get here, I've committed to send, so I complete the tx with
+ * failure on any problems */
+
+ LASSERT (tx == NULL || tx->tx_conn == NULL); /* only set when assigned a conn */
+ LASSERT (tx == NULL || tx->tx_nwrq > 0); /* work items have been set up */
+
+ /* First time, just use a read lock since I expect to find my peer
+ * connected */
+ read_lock_irqsave(g_lock, flags);
+
+ peer = kiblnd_find_peer_locked(nid);
+ if (peer != NULL && !list_empty(&peer->ibp_conns)) {
+ /* Found a peer with an established connection */
+ conn = kiblnd_get_conn_locked(peer);
+ kiblnd_conn_addref(conn); /* 1 ref for me... */
+
+ read_unlock_irqrestore(g_lock, flags);
+
+ if (tx != NULL)
+ kiblnd_queue_tx(tx, conn);
+ kiblnd_conn_decref(conn); /* ...to here */
+ return;
+ }
+
+ read_unlock(g_lock);
+ /* Re-try with a write lock */
+ write_lock(g_lock);
+
+ peer = kiblnd_find_peer_locked(nid);
+ if (peer != NULL) {
+ if (list_empty(&peer->ibp_conns)) {
+ /* found a peer, but it's still connecting... */
+ LASSERT (peer->ibp_connecting != 0 ||
+ peer->ibp_accepting != 0);
+ if (tx != NULL)
+ list_add_tail(&tx->tx_list,
+ &peer->ibp_tx_queue);
+ write_unlock_irqrestore(g_lock, flags);
+ } else {
+ conn = kiblnd_get_conn_locked(peer);
+ kiblnd_conn_addref(conn); /* 1 ref for me... */
+
+ write_unlock_irqrestore(g_lock, flags);
+
+ if (tx != NULL)
+ kiblnd_queue_tx(tx, conn);
+ kiblnd_conn_decref(conn); /* ...to here */
+ }
+ return;
+ }
+
+ write_unlock_irqrestore(g_lock, flags);
+
+ /* Allocate a peer ready to add to the peer table and retry */
+ rc = kiblnd_create_peer(ni, &peer, nid);
+ if (rc != 0) {
+ CERROR("Can't create peer %s\n", libcfs_nid2str(nid));
+ if (tx != NULL) {
+ tx->tx_status = -EHOSTUNREACH;
+ tx->tx_waiting = 0;
+ kiblnd_tx_done(ni, tx);
+ }
+ return;
+ }
+
+ write_lock_irqsave(g_lock, flags);
+
+ peer2 = kiblnd_find_peer_locked(nid);
+ if (peer2 != NULL) {
+ if (list_empty(&peer2->ibp_conns)) {
+ /* found a peer, but it's still connecting... */
+ LASSERT (peer2->ibp_connecting != 0 ||
+ peer2->ibp_accepting != 0);
+ if (tx != NULL)
+ list_add_tail(&tx->tx_list,
+ &peer2->ibp_tx_queue);
+ write_unlock_irqrestore(g_lock, flags);
+ } else {
+ conn = kiblnd_get_conn_locked(peer2);
+ kiblnd_conn_addref(conn); /* 1 ref for me... */
+
+ write_unlock_irqrestore(g_lock, flags);
+
+ if (tx != NULL)
+ kiblnd_queue_tx(tx, conn);
+ kiblnd_conn_decref(conn); /* ...to here */
+ }
+
+ kiblnd_peer_decref(peer);
+ return;
+ }
+
+ /* Brand new peer */
+ LASSERT (peer->ibp_connecting == 0);
+ peer->ibp_connecting = 1;
+
+ /* always called with a ref on ni, which prevents ni being shutdown */
+ LASSERT (((kib_net_t *)ni->ni_data)->ibn_shutdown == 0);
+
+ if (tx != NULL)
+ list_add_tail(&tx->tx_list, &peer->ibp_tx_queue);
+
+ kiblnd_peer_addref(peer);
+ list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
+
+ write_unlock_irqrestore(g_lock, flags);
+
+ kiblnd_connect_peer(peer);
+ kiblnd_peer_decref(peer);
+}
+
+int
+kiblnd_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
+{
+ lnet_hdr_t *hdr = &lntmsg->msg_hdr;
+ int type = lntmsg->msg_type;
+ lnet_process_id_t target = lntmsg->msg_target;
+ int target_is_router = lntmsg->msg_target_is_router;
+ int routing = lntmsg->msg_routing;
+ unsigned int payload_niov = lntmsg->msg_niov;
+ struct iovec *payload_iov = lntmsg->msg_iov;
+ lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
+ unsigned int payload_offset = lntmsg->msg_offset;
+ unsigned int payload_nob = lntmsg->msg_len;
+ kib_msg_t *ibmsg;
+ kib_tx_t *tx;
+ int nob;
+ int rc;
+
+ /* NB 'private' is different depending on what we're sending.... */
+
+ CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
+ payload_nob, payload_niov, libcfs_id2str(target));
+
+ LASSERT (payload_nob == 0 || payload_niov > 0);
+ LASSERT (payload_niov <= LNET_MAX_IOV);
+
+ /* Thread context */
+ LASSERT (!in_interrupt());
+ /* payload is either all vaddrs or all pages */
+ LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
+
+ switch (type) {
+ default:
+ LBUG();
+ return (-EIO);
+
+ case LNET_MSG_ACK:
+ LASSERT (payload_nob == 0);
+ break;
+
+ case LNET_MSG_GET:
+ if (routing || target_is_router)
+ break; /* send IMMEDIATE */
+
+ /* is the REPLY message too small for RDMA? */
+ nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
+ if (nob <= IBLND_MSG_SIZE)
+ break; /* send IMMEDIATE */
+
+ tx = kiblnd_get_idle_tx(ni, target.nid);
+ if (tx == NULL) {
+ CERROR("Can't allocate txd for GET to %s\n",
+ libcfs_nid2str(target.nid));
+ return -ENOMEM;
+ }
+
+ ibmsg = tx->tx_msg;
+
+ if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
+ rc = kiblnd_setup_rd_iov(ni, tx,
+ &ibmsg->ibm_u.get.ibgm_rd,
+ lntmsg->msg_md->md_niov,
+ lntmsg->msg_md->md_iov.iov,
+ 0, lntmsg->msg_md->md_length);
+ else
+ rc = kiblnd_setup_rd_kiov(ni, tx,
+ &ibmsg->ibm_u.get.ibgm_rd,
+ lntmsg->msg_md->md_niov,
+ lntmsg->msg_md->md_iov.kiov,
+ 0, lntmsg->msg_md->md_length);
+ if (rc != 0) {
+ CERROR("Can't setup GET sink for %s: %d\n",
+ libcfs_nid2str(target.nid), rc);
+ kiblnd_tx_done(ni, tx);
+ return -EIO;
+ }
+
+ nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[tx->tx_nfrags]);
+ ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie;
+ ibmsg->ibm_u.get.ibgm_hdr = *hdr;
+
+ kiblnd_init_tx_msg(ni, tx, IBLND_MSG_GET_REQ, nob);
+
+ tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg);
+ if (tx->tx_lntmsg[1] == NULL) {
+ CERROR("Can't create reply for GET -> %s\n",
+ libcfs_nid2str(target.nid));
+ kiblnd_tx_done(ni, tx);
+ return -EIO;
+ }
+
+ tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */
+ tx->tx_waiting = 1; /* waiting for GET_DONE */
+ kiblnd_launch_tx(ni, tx, target.nid);
+ return 0;
+
+ case LNET_MSG_REPLY:
+ case LNET_MSG_PUT:
+ /* Is the payload small enough not to need RDMA? */
+ nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
+ if (nob <= IBLND_MSG_SIZE)
+ break; /* send IMMEDIATE */
+
+ tx = kiblnd_get_idle_tx(ni, target.nid);
+ if (tx == NULL) {
+ CERROR("Can't allocate %s txd for %s\n",
+ type == LNET_MSG_PUT ? "PUT" : "REPLY",
+ libcfs_nid2str(target.nid));
+ return -ENOMEM;
+ }
+
+ if (payload_kiov == NULL)
+ rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
+ payload_niov, payload_iov,
+ payload_offset, payload_nob);
+ else
+ rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
+ payload_niov, payload_kiov,
+ payload_offset, payload_nob);
+ if (rc != 0) {
+ CERROR("Can't setup PUT src for %s: %d\n",
+ libcfs_nid2str(target.nid), rc);
+ kiblnd_tx_done(ni, tx);
+ return -EIO;
+ }
+
+ ibmsg = tx->tx_msg;
+ ibmsg->ibm_u.putreq.ibprm_hdr = *hdr;
+ ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie;
+ kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ, sizeof(kib_putreq_msg_t));
+
+ tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
+ tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */
+ kiblnd_launch_tx(ni, tx, target.nid);
+ return 0;
+ }
+
+ /* send IMMEDIATE */
+
+ LASSERT (offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob])
+ <= IBLND_MSG_SIZE);
+
+ tx = kiblnd_get_idle_tx(ni, target.nid);
+ if (tx == NULL) {
+ CERROR ("Can't send %d to %s: tx descs exhausted\n",
+ type, libcfs_nid2str(target.nid));
+ return -ENOMEM;
+ }
+
+ ibmsg = tx->tx_msg;
+ ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
+
+ if (payload_kiov != NULL)
+ lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg,
+ offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
+ payload_niov, payload_kiov,
+ payload_offset, payload_nob);
+ else
+ lnet_copy_iov2flat(IBLND_MSG_SIZE, ibmsg,
+ offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
+ payload_niov, payload_iov,
+ payload_offset, payload_nob);
+
+ nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]);
+ kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
+
+ tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
+ kiblnd_launch_tx(ni, tx, target.nid);
+ return 0;
+}
+
+void
+kiblnd_reply (lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg)
+{
+ lnet_process_id_t target = lntmsg->msg_target;
+ unsigned int niov = lntmsg->msg_niov;
+ struct iovec *iov = lntmsg->msg_iov;
+ lnet_kiov_t *kiov = lntmsg->msg_kiov;
+ unsigned int offset = lntmsg->msg_offset;
+ unsigned int nob = lntmsg->msg_len;
+ kib_tx_t *tx;
+ int rc;
+
+ tx = kiblnd_get_idle_tx(ni, rx->rx_conn->ibc_peer->ibp_nid);
+ if (tx == NULL) {
+ CERROR("Can't get tx for REPLY to %s\n",
+ libcfs_nid2str(target.nid));
+ goto failed_0;
+ }
+
+ if (nob == 0)
+ rc = 0;
+ else if (kiov == NULL)
+ rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd,
+ niov, iov, offset, nob);
+ else
+ rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
+ niov, kiov, offset, nob);
+
+ if (rc != 0) {
+ CERROR("Can't setup GET src for %s: %d\n",
+ libcfs_nid2str(target.nid), rc);
+ goto failed_1;
+ }
+
+ rc = kiblnd_init_rdma(rx->rx_conn, tx,
+ IBLND_MSG_GET_DONE, nob,
+ &rx->rx_msg->ibm_u.get.ibgm_rd,
+ rx->rx_msg->ibm_u.get.ibgm_cookie);
+ if (rc < 0) {
+ CERROR("Can't setup rdma for GET from %s: %d\n",
+ libcfs_nid2str(target.nid), rc);
+ goto failed_1;
+ }
+
+ if (nob == 0) {
+ /* No RDMA: local completion may happen now! */
+ lnet_finalize(ni, lntmsg, 0);
+ } else {
+ /* RDMA: lnet_finalize(lntmsg) when it
+ * completes */
+ tx->tx_lntmsg[0] = lntmsg;
+ }
+
+ kiblnd_queue_tx(tx, rx->rx_conn);
+ return;
+
+ failed_1:
+ kiblnd_tx_done(ni, tx);
+ failed_0:
+ lnet_finalize(ni, lntmsg, -EIO);
+}
+
+int
+kiblnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
+ unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
+ unsigned int offset, unsigned int mlen, unsigned int rlen)
+{
+ kib_rx_t *rx = private;
+ kib_msg_t *rxmsg = rx->rx_msg;
+ kib_conn_t *conn = rx->rx_conn;
+ kib_tx_t *tx;
+ kib_msg_t *txmsg;
+ int nob;
+ int post_credit = IBLND_POSTRX_PEER_CREDIT;
+ int rc = 0;
+
+ LASSERT (mlen <= rlen);
+ LASSERT (!in_interrupt());
+ /* Either all pages or all vaddrs */
+ LASSERT (!(kiov != NULL && iov != NULL));
+
+ switch (rxmsg->ibm_type) {
+ default:
+ LBUG();
+
+ case IBLND_MSG_IMMEDIATE:
+ nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]);
+ if (nob > rx->rx_nob) {
+ CERROR ("Immediate message from %s too big: %d(%d)\n",
+ libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid),
+ nob, rx->rx_nob);
+ rc = -EPROTO;
+ break;
+ }
+
+ if (kiov != NULL)
+ lnet_copy_flat2kiov(niov, kiov, offset,
+ IBLND_MSG_SIZE, rxmsg,
+ offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
+ mlen);
+ else
+ lnet_copy_flat2iov(niov, iov, offset,
+ IBLND_MSG_SIZE, rxmsg,
+ offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
+ mlen);
+ lnet_finalize (ni, lntmsg, 0);
+ break;
+
+ case IBLND_MSG_PUT_REQ:
+ if (mlen == 0) {
+ lnet_finalize(ni, lntmsg, 0);
+ kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0,
+ rxmsg->ibm_u.putreq.ibprm_cookie);
+ break;
+ }
+
+ tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid);
+ if (tx == NULL) {
+ CERROR("Can't allocate tx for %s\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ /* Not replying will break the connection */
+ rc = -ENOMEM;
+ break;
+ }
+
+ txmsg = tx->tx_msg;
+ if (kiov == NULL)
+ rc = kiblnd_setup_rd_iov(ni, tx,
+ &txmsg->ibm_u.putack.ibpam_rd,
+ niov, iov, offset, mlen);
+ else
+ rc = kiblnd_setup_rd_kiov(ni, tx,
+ &txmsg->ibm_u.putack.ibpam_rd,
+ niov, kiov, offset, mlen);
+ if (rc != 0) {
+ CERROR("Can't setup PUT sink for %s: %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
+ kiblnd_tx_done(ni, tx);
+ /* tell peer it's over */
+ kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, rc,
+ rxmsg->ibm_u.putreq.ibprm_cookie);
+ break;
+ }
+
+ nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[tx->tx_nfrags]);
+ txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie;
+ txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie;
+
+ kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_ACK, nob);
+
+ tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
+ tx->tx_waiting = 1; /* waiting for PUT_DONE */
+ kiblnd_queue_tx(tx, conn);
+
+ /* reposted buffer reserved for PUT_DONE */
+ post_credit = IBLND_POSTRX_NO_CREDIT;
+ break;
+
+ case IBLND_MSG_GET_REQ:
+ if (lntmsg != NULL) {
+ /* Optimized GET; RDMA lntmsg's payload */
+ kiblnd_reply(ni, rx, lntmsg);
+ } else {
+ /* GET didn't match anything */
+ kiblnd_send_completion(rx->rx_conn, IBLND_MSG_GET_DONE,
+ -ENODATA,
+ rxmsg->ibm_u.get.ibgm_cookie);
+ }
+ break;
+ }
+
+ kiblnd_post_rx(rx, post_credit);
+ return rc;
+}
+
+int
+kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name)
+{
+ task_t *task = kthread_run(fn, arg, name);
+
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+
+ atomic_inc(&kiblnd_data.kib_nthreads);
+ return 0;
+}
+
+void
+kiblnd_thread_fini (void)
+{
+ atomic_dec (&kiblnd_data.kib_nthreads);
+}
+
+void
+kiblnd_peer_alive (kib_peer_t *peer)
+{
+ /* This is racy, but everyone's only writing cfs_time_current() */
+ peer->ibp_last_alive = cfs_time_current();
+ mb();
+}
+
+void
+kiblnd_peer_notify (kib_peer_t *peer)
+{
+ int error = 0;
+ cfs_time_t last_alive = 0;
+ unsigned long flags;
+
+ read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ if (list_empty(&peer->ibp_conns) &&
+ peer->ibp_accepting == 0 &&
+ peer->ibp_connecting == 0 &&
+ peer->ibp_error != 0) {
+ error = peer->ibp_error;
+ peer->ibp_error = 0;
+
+ last_alive = peer->ibp_last_alive;
+ }
+
+ read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ if (error != 0)
+ lnet_notify(peer->ibp_ni,
+ peer->ibp_nid, 0, last_alive);
+}
+
+void
+kiblnd_close_conn_locked (kib_conn_t *conn, int error)
+{
+ /* This just does the immediate housekeeping. 'error' is zero for a
+ * normal shutdown which can happen only after the connection has been
+ * established. If the connection is established, schedule the
+ * connection to be finished off by the connd. Otherwise the connd is
+ * already dealing with it (either to set it up or tear it down).
+ * Caller holds kib_global_lock exclusively in irq context */
+ kib_peer_t *peer = conn->ibc_peer;
+ kib_dev_t *dev;
+ unsigned long flags;
+
+ LASSERT (error != 0 || conn->ibc_state >= IBLND_CONN_ESTABLISHED);
+
+ if (error != 0 && conn->ibc_comms_error == 0)
+ conn->ibc_comms_error = error;
+
+ if (conn->ibc_state != IBLND_CONN_ESTABLISHED)
+ return; /* already being handled */
+
+ if (error == 0 &&
+ list_empty(&conn->ibc_tx_noops) &&
+ list_empty(&conn->ibc_tx_queue) &&
+ list_empty(&conn->ibc_tx_queue_rsrvd) &&
+ list_empty(&conn->ibc_tx_queue_nocred) &&
+ list_empty(&conn->ibc_active_txs)) {
+ CDEBUG(D_NET, "closing conn to %s\n",
+ libcfs_nid2str(peer->ibp_nid));
+ } else {
+ CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n",
+ libcfs_nid2str(peer->ibp_nid), error,
+ list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
+ list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
+ list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
+ list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
+ list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
+ }
+
+ dev = ((kib_net_t *)peer->ibp_ni->ni_data)->ibn_dev;
+ list_del(&conn->ibc_list);
+ /* connd (see below) takes over ibc_list's ref */
+
+ if (list_empty (&peer->ibp_conns) && /* no more conns */
+ kiblnd_peer_active(peer)) { /* still in peer table */
+ kiblnd_unlink_peer_locked(peer);
+
+ /* set/clear error on last conn */
+ peer->ibp_error = conn->ibc_comms_error;
+ }
+
+ kiblnd_set_conn_state(conn, IBLND_CONN_CLOSING);
+
+ if (error != 0 &&
+ kiblnd_dev_can_failover(dev)) {
+ list_add_tail(&dev->ibd_fail_list,
+ &kiblnd_data.kib_failed_devs);
+ wake_up(&kiblnd_data.kib_failover_waitq);
+ }
+
+ spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
+
+ list_add_tail(&conn->ibc_list, &kiblnd_data.kib_connd_conns);
+ wake_up(&kiblnd_data.kib_connd_waitq);
+
+ spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
+}
+
+void
+kiblnd_close_conn(kib_conn_t *conn, int error)
+{
+ unsigned long flags;
+
+ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ kiblnd_close_conn_locked(conn, error);
+
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+}
+
+void
+kiblnd_handle_early_rxs(kib_conn_t *conn)
+{
+ unsigned long flags;
+ kib_rx_t *rx;
+
+ LASSERT(!in_interrupt());
+ LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
+
+ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+ while (!list_empty(&conn->ibc_early_rxs)) {
+ rx = list_entry(conn->ibc_early_rxs.next,
+ kib_rx_t, rx_list);
+ list_del(&rx->rx_list);
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ kiblnd_handle_rx(rx);
+
+ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+ }
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+}
+
+void
+kiblnd_abort_txs(kib_conn_t *conn, struct list_head *txs)
+{
+ LIST_HEAD (zombies);
+ struct list_head *tmp;
+ struct list_head *nxt;
+ kib_tx_t *tx;
+
+ spin_lock(&conn->ibc_lock);
+
+ list_for_each_safe (tmp, nxt, txs) {
+ tx = list_entry (tmp, kib_tx_t, tx_list);
+
+ if (txs == &conn->ibc_active_txs) {
+ LASSERT (!tx->tx_queued);
+ LASSERT (tx->tx_waiting ||
+ tx->tx_sending != 0);
+ } else {
+ LASSERT (tx->tx_queued);
+ }
+
+ tx->tx_status = -ECONNABORTED;
+ tx->tx_waiting = 0;
+
+ if (tx->tx_sending == 0) {
+ tx->tx_queued = 0;
+ list_del (&tx->tx_list);
+ list_add (&tx->tx_list, &zombies);
+ }
+ }
+
+ spin_unlock(&conn->ibc_lock);
+
+ kiblnd_txlist_done(conn->ibc_peer->ibp_ni, &zombies, -ECONNABORTED);
+}
+
+void
+kiblnd_finalise_conn (kib_conn_t *conn)
+{
+ LASSERT (!in_interrupt());
+ LASSERT (conn->ibc_state > IBLND_CONN_INIT);
+
+ kiblnd_set_conn_state(conn, IBLND_CONN_DISCONNECTED);
+
+ /* abort_receives moves QP state to IB_QPS_ERR. This is only required
+ * for connections that didn't get as far as being connected, because
+ * rdma_disconnect() does this for free. */
+ kiblnd_abort_receives(conn);
+
+ /* Complete all tx descs not waiting for sends to complete.
+ * NB we should be safe from RDMA now that the QP has changed state */
+
+ kiblnd_abort_txs(conn, &conn->ibc_tx_noops);
+ kiblnd_abort_txs(conn, &conn->ibc_tx_queue);
+ kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
+ kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred);
+ kiblnd_abort_txs(conn, &conn->ibc_active_txs);
+
+ kiblnd_handle_early_rxs(conn);
+}
+
+void
+kiblnd_peer_connect_failed (kib_peer_t *peer, int active, int error)
+{
+ LIST_HEAD (zombies);
+ unsigned long flags;
+
+ LASSERT (error != 0);
+ LASSERT (!in_interrupt());
+
+ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ if (active) {
+ LASSERT (peer->ibp_connecting > 0);
+ peer->ibp_connecting--;
+ } else {
+ LASSERT (peer->ibp_accepting > 0);
+ peer->ibp_accepting--;
+ }
+
+ if (peer->ibp_connecting != 0 ||
+ peer->ibp_accepting != 0) {
+ /* another connection attempt under way... */
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock,
+ flags);
+ return;
+ }
+
+ if (list_empty(&peer->ibp_conns)) {
+ /* Take peer's blocked transmits to complete with error */
+ list_add(&zombies, &peer->ibp_tx_queue);
+ list_del_init(&peer->ibp_tx_queue);
+
+ if (kiblnd_peer_active(peer))
+ kiblnd_unlink_peer_locked(peer);
+
+ peer->ibp_error = error;
+ } else {
+ /* Can't have blocked transmits if there are connections */
+ LASSERT (list_empty(&peer->ibp_tx_queue));
+ }
+
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ kiblnd_peer_notify(peer);
+
+ if (list_empty (&zombies))
+ return;
+
+ CNETERR("Deleting messages for %s: connection failed\n",
+ libcfs_nid2str(peer->ibp_nid));
+
+ kiblnd_txlist_done(peer->ibp_ni, &zombies, -EHOSTUNREACH);
+}
+
+void
+kiblnd_connreq_done(kib_conn_t *conn, int status)
+{
+ kib_peer_t *peer = conn->ibc_peer;
+ kib_tx_t *tx;
+ struct list_head txs;
+ unsigned long flags;
+ int active;
+
+ active = (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
+
+ CDEBUG(D_NET,"%s: active(%d), version(%x), status(%d)\n",
+ libcfs_nid2str(peer->ibp_nid), active,
+ conn->ibc_version, status);
+
+ LASSERT (!in_interrupt());
+ LASSERT ((conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT &&
+ peer->ibp_connecting > 0) ||
+ (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT &&
+ peer->ibp_accepting > 0));
+
+ LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
+ conn->ibc_connvars = NULL;
+
+ if (status != 0) {
+ /* failed to establish connection */
+ kiblnd_peer_connect_failed(peer, active, status);
+ kiblnd_finalise_conn(conn);
+ return;
+ }
+
+ /* connection established */
+ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ conn->ibc_last_send = jiffies;
+ kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED);
+ kiblnd_peer_alive(peer);
+
+ /* Add conn to peer's list and nuke any dangling conns from a different
+ * peer instance... */
+ kiblnd_conn_addref(conn); /* +1 ref for ibc_list */
+ list_add(&conn->ibc_list, &peer->ibp_conns);
+ if (active)
+ peer->ibp_connecting--;
+ else
+ peer->ibp_accepting--;
+
+ if (peer->ibp_version == 0) {
+ peer->ibp_version = conn->ibc_version;
+ peer->ibp_incarnation = conn->ibc_incarnation;
+ }
+
+ if (peer->ibp_version != conn->ibc_version ||
+ peer->ibp_incarnation != conn->ibc_incarnation) {
+ kiblnd_close_stale_conns_locked(peer, conn->ibc_version,
+ conn->ibc_incarnation);
+ peer->ibp_version = conn->ibc_version;
+ peer->ibp_incarnation = conn->ibc_incarnation;
+ }
+
+ /* grab pending txs while I have the lock */
+ list_add(&txs, &peer->ibp_tx_queue);
+ list_del_init(&peer->ibp_tx_queue);
+
+ if (!kiblnd_peer_active(peer) || /* peer has been deleted */
+ conn->ibc_comms_error != 0) { /* error has happened already */
+ lnet_ni_t *ni = peer->ibp_ni;
+
+ /* start to shut down connection */
+ kiblnd_close_conn_locked(conn, -ECONNABORTED);
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ kiblnd_txlist_done(ni, &txs, -ECONNABORTED);
+
+ return;
+ }
+
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ /* Schedule blocked txs */
+ spin_lock(&conn->ibc_lock);
+ while (!list_empty(&txs)) {
+ tx = list_entry(txs.next, kib_tx_t, tx_list);
+ list_del(&tx->tx_list);
+
+ kiblnd_queue_tx_locked(tx, conn);
+ }
+ spin_unlock(&conn->ibc_lock);
+
+ kiblnd_check_sends(conn);
+
+ /* schedule blocked rxs */
+ kiblnd_handle_early_rxs(conn);
+}
+
+void
+kiblnd_reject(struct rdma_cm_id *cmid, kib_rej_t *rej)
+{
+ int rc;
+
+ rc = rdma_reject(cmid, rej, sizeof(*rej));
+
+ if (rc != 0)
+ CWARN("Error %d sending reject\n", rc);
+}
+
+int
+kiblnd_passive_connect (struct rdma_cm_id *cmid, void *priv, int priv_nob)
+{
+ rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
+ kib_msg_t *reqmsg = priv;
+ kib_msg_t *ackmsg;
+ kib_dev_t *ibdev;
+ kib_peer_t *peer;
+ kib_peer_t *peer2;
+ kib_conn_t *conn;
+ lnet_ni_t *ni = NULL;
+ kib_net_t *net = NULL;
+ lnet_nid_t nid;
+ struct rdma_conn_param cp;
+ kib_rej_t rej;
+ int version = IBLND_MSG_VERSION;
+ unsigned long flags;
+ int rc;
+ struct sockaddr_in *peer_addr;
+ LASSERT (!in_interrupt());
+
+ /* cmid inherits 'context' from the corresponding listener id */
+ ibdev = (kib_dev_t *)cmid->context;
+ LASSERT (ibdev != NULL);
+
+ memset(&rej, 0, sizeof(rej));
+ rej.ibr_magic = IBLND_MSG_MAGIC;
+ rej.ibr_why = IBLND_REJECT_FATAL;
+ rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE;
+
+ peer_addr = (struct sockaddr_in *)&(cmid->route.addr.dst_addr);
+ if (*kiblnd_tunables.kib_require_priv_port &&
+ ntohs(peer_addr->sin_port) >= PROT_SOCK) {
+ __u32 ip = ntohl(peer_addr->sin_addr.s_addr);
+ CERROR("Peer's port (%u.%u.%u.%u:%hu) is not privileged\n",
+ HIPQUAD(ip), ntohs(peer_addr->sin_port));
+ goto failed;
+ }
+
+ if (priv_nob < offsetof(kib_msg_t, ibm_type)) {
+ CERROR("Short connection request\n");
+ goto failed;
+ }
+
+ /* Future protocol version compatibility support! If the
+ * o2iblnd-specific protocol changes, or when LNET unifies
+ * protocols over all LNDs, the initial connection will
+ * negotiate a protocol version. I trap this here to avoid
+ * console errors; the reject tells the peer which protocol I
+ * speak. */
+ if (reqmsg->ibm_magic == LNET_PROTO_MAGIC ||
+ reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
+ goto failed;
+ if (reqmsg->ibm_magic == IBLND_MSG_MAGIC &&
+ reqmsg->ibm_version != IBLND_MSG_VERSION &&
+ reqmsg->ibm_version != IBLND_MSG_VERSION_1)
+ goto failed;
+ if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) &&
+ reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) &&
+ reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1))
+ goto failed;
+
+ rc = kiblnd_unpack_msg(reqmsg, priv_nob);
+ if (rc != 0) {
+ CERROR("Can't parse connection request: %d\n", rc);
+ goto failed;
+ }
+
+ nid = reqmsg->ibm_srcnid;
+ ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid));
+
+ if (ni != NULL) {
+ net = (kib_net_t *)ni->ni_data;
+ rej.ibr_incarnation = net->ibn_incarnation;
+ }
+
+ if (ni == NULL || /* no matching net */
+ ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */
+ net->ibn_dev != ibdev) { /* wrong device */
+ CERROR("Can't accept %s on %s (%s:%d:%u.%u.%u.%u): "
+ "bad dst nid %s\n", libcfs_nid2str(nid),
+ ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid),
+ ibdev->ibd_ifname, ibdev->ibd_nnets,
+ HIPQUAD(ibdev->ibd_ifip),
+ libcfs_nid2str(reqmsg->ibm_dstnid));
+
+ goto failed;
+ }
+
+ /* check time stamp as soon as possible */
+ if (reqmsg->ibm_dststamp != 0 &&
+ reqmsg->ibm_dststamp != net->ibn_incarnation) {
+ CWARN("Stale connection request\n");
+ rej.ibr_why = IBLND_REJECT_CONN_STALE;
+ goto failed;
+ }
+
+ /* I can accept peer's version */
+ version = reqmsg->ibm_version;
+
+ if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) {
+ CERROR("Unexpected connreq msg type: %x from %s\n",
+ reqmsg->ibm_type, libcfs_nid2str(nid));
+ goto failed;
+ }
+
+ if (reqmsg->ibm_u.connparams.ibcp_queue_depth !=
+ IBLND_MSG_QUEUE_SIZE(version)) {
+ CERROR("Can't accept %s: incompatible queue depth %d (%d wanted)\n",
+ libcfs_nid2str(nid), reqmsg->ibm_u.connparams.ibcp_queue_depth,
+ IBLND_MSG_QUEUE_SIZE(version));
+
+ if (version == IBLND_MSG_VERSION)
+ rej.ibr_why = IBLND_REJECT_MSG_QUEUE_SIZE;
+
+ goto failed;
+ }
+
+ if (reqmsg->ibm_u.connparams.ibcp_max_frags !=
+ IBLND_RDMA_FRAGS(version)) {
+ CERROR("Can't accept %s(version %x): "
+ "incompatible max_frags %d (%d wanted)\n",
+ libcfs_nid2str(nid), version,
+ reqmsg->ibm_u.connparams.ibcp_max_frags,
+ IBLND_RDMA_FRAGS(version));
+
+ if (version == IBLND_MSG_VERSION)
+ rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
+
+ goto failed;
+
+ }
+
+ if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
+ CERROR("Can't accept %s: message size %d too big (%d max)\n",
+ libcfs_nid2str(nid),
+ reqmsg->ibm_u.connparams.ibcp_max_msg_size,
+ IBLND_MSG_SIZE);
+ goto failed;
+ }
+
+ /* assume 'nid' is a new peer; create */
+ rc = kiblnd_create_peer(ni, &peer, nid);
+ if (rc != 0) {
+ CERROR("Can't create peer for %s\n", libcfs_nid2str(nid));
+ rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
+ goto failed;
+ }
+
+ write_lock_irqsave(g_lock, flags);
+
+ peer2 = kiblnd_find_peer_locked(nid);
+ if (peer2 != NULL) {
+ if (peer2->ibp_version == 0) {
+ peer2->ibp_version = version;
+ peer2->ibp_incarnation = reqmsg->ibm_srcstamp;
+ }
+
+ /* not the guy I've talked with */
+ if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp ||
+ peer2->ibp_version != version) {
+ kiblnd_close_peer_conns_locked(peer2, -ESTALE);
+ write_unlock_irqrestore(g_lock, flags);
+
+ CWARN("Conn stale %s [old ver: %x, new ver: %x]\n",
+ libcfs_nid2str(nid), peer2->ibp_version, version);
+
+ kiblnd_peer_decref(peer);
+ rej.ibr_why = IBLND_REJECT_CONN_STALE;
+ goto failed;
+ }
+
+ /* tie-break connection race in favour of the higher NID */
+ if (peer2->ibp_connecting != 0 &&
+ nid < ni->ni_nid) {
+ write_unlock_irqrestore(g_lock, flags);
+
+ CWARN("Conn race %s\n", libcfs_nid2str(peer2->ibp_nid));
+
+ kiblnd_peer_decref(peer);
+ rej.ibr_why = IBLND_REJECT_CONN_RACE;
+ goto failed;
+ }
+
+ peer2->ibp_accepting++;
+ kiblnd_peer_addref(peer2);
+
+ write_unlock_irqrestore(g_lock, flags);
+ kiblnd_peer_decref(peer);
+ peer = peer2;
+ } else {
+ /* Brand new peer */
+ LASSERT (peer->ibp_accepting == 0);
+ LASSERT (peer->ibp_version == 0 &&
+ peer->ibp_incarnation == 0);
+
+ peer->ibp_accepting = 1;
+ peer->ibp_version = version;
+ peer->ibp_incarnation = reqmsg->ibm_srcstamp;
+
+ /* I have a ref on ni that prevents it being shutdown */
+ LASSERT (net->ibn_shutdown == 0);
+
+ kiblnd_peer_addref(peer);
+ list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
+
+ write_unlock_irqrestore(g_lock, flags);
+ }
+
+ conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT, version);
+ if (conn == NULL) {
+ kiblnd_peer_connect_failed(peer, 0, -ENOMEM);
+ kiblnd_peer_decref(peer);
+ rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
+ goto failed;
+ }
+
+ /* conn now "owns" cmid, so I return success from here on to ensure the
+ * CM callback doesn't destroy cmid. */
+
+ conn->ibc_incarnation = reqmsg->ibm_srcstamp;
+ conn->ibc_credits = IBLND_MSG_QUEUE_SIZE(version);
+ conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE(version);
+ LASSERT (conn->ibc_credits + conn->ibc_reserved_credits + IBLND_OOB_MSGS(version)
+ <= IBLND_RX_MSGS(version));
+
+ ackmsg = &conn->ibc_connvars->cv_msg;
+ memset(ackmsg, 0, sizeof(*ackmsg));
+
+ kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
+ sizeof(ackmsg->ibm_u.connparams));
+ ackmsg->ibm_u.connparams.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE(version);
+ ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
+ ackmsg->ibm_u.connparams.ibcp_max_frags = IBLND_RDMA_FRAGS(version);
+
+ kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
+
+ memset(&cp, 0, sizeof(cp));
+ cp.private_data = ackmsg;
+ cp.private_data_len = ackmsg->ibm_nob;
+ cp.responder_resources = 0; /* No atomic ops or RDMA reads */
+ cp.initiator_depth = 0;
+ cp.flow_control = 1;
+ cp.retry_count = *kiblnd_tunables.kib_retry_count;
+ cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count;
+
+ CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid));
+
+ rc = rdma_accept(cmid, &cp);
+ if (rc != 0) {
+ CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc);
+ rej.ibr_version = version;
+ rej.ibr_why = IBLND_REJECT_FATAL;
+
+ kiblnd_reject(cmid, &rej);
+ kiblnd_connreq_done(conn, rc);
+ kiblnd_conn_decref(conn);
+ }
+
+ lnet_ni_decref(ni);
+ return 0;
+
+ failed:
+ if (ni != NULL)
+ lnet_ni_decref(ni);
+
+ rej.ibr_version = version;
+ rej.ibr_cp.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE(version);
+ rej.ibr_cp.ibcp_max_frags = IBLND_RDMA_FRAGS(version);
+ kiblnd_reject(cmid, &rej);
+
+ return -ECONNREFUSED;
+}
+
+void
+kiblnd_reconnect (kib_conn_t *conn, int version,
+ __u64 incarnation, int why, kib_connparams_t *cp)
+{
+ kib_peer_t *peer = conn->ibc_peer;
+ char *reason;
+ int retry = 0;
+ unsigned long flags;
+
+ LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
+ LASSERT (peer->ibp_connecting > 0); /* 'conn' at least */
+
+ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ /* retry connection if it's still needed and no other connection
+ * attempts (active or passive) are in progress
+ * NB: reconnect is still needed even when ibp_tx_queue is
+ * empty if ibp_version != version because reconnect may be
+ * initiated by kiblnd_query() */
+ if ((!list_empty(&peer->ibp_tx_queue) ||
+ peer->ibp_version != version) &&
+ peer->ibp_connecting == 1 &&
+ peer->ibp_accepting == 0) {
+ retry = 1;
+ peer->ibp_connecting++;
+
+ peer->ibp_version = version;
+ peer->ibp_incarnation = incarnation;
+ }
+
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ if (!retry)
+ return;
+
+ switch (why) {
+ default:
+ reason = "Unknown";
+ break;
+
+ case IBLND_REJECT_CONN_STALE:
+ reason = "stale";
+ break;
+
+ case IBLND_REJECT_CONN_RACE:
+ reason = "conn race";
+ break;
+
+ case IBLND_REJECT_CONN_UNCOMPAT:
+ reason = "version negotiation";
+ break;
+ }
+
+ CNETERR("%s: retrying (%s), %x, %x, "
+ "queue_dep: %d, max_frag: %d, msg_size: %d\n",
+ libcfs_nid2str(peer->ibp_nid),
+ reason, IBLND_MSG_VERSION, version,
+ cp != NULL? cp->ibcp_queue_depth :IBLND_MSG_QUEUE_SIZE(version),
+ cp != NULL? cp->ibcp_max_frags : IBLND_RDMA_FRAGS(version),
+ cp != NULL? cp->ibcp_max_msg_size: IBLND_MSG_SIZE);
+
+ kiblnd_connect_peer(peer);
+}
+
+void
+kiblnd_rejected (kib_conn_t *conn, int reason, void *priv, int priv_nob)
+{
+ kib_peer_t *peer = conn->ibc_peer;
+
+ LASSERT (!in_interrupt());
+ LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
+
+ switch (reason) {
+ case IB_CM_REJ_STALE_CONN:
+ kiblnd_reconnect(conn, IBLND_MSG_VERSION, 0,
+ IBLND_REJECT_CONN_STALE, NULL);
+ break;
+
+ case IB_CM_REJ_INVALID_SERVICE_ID:
+ CNETERR("%s rejected: no listener at %d\n",
+ libcfs_nid2str(peer->ibp_nid),
+ *kiblnd_tunables.kib_service);
+ break;
+
+ case IB_CM_REJ_CONSUMER_DEFINED:
+ if (priv_nob >= offsetof(kib_rej_t, ibr_padding)) {
+ kib_rej_t *rej = priv;
+ kib_connparams_t *cp = NULL;
+ int flip = 0;
+ __u64 incarnation = -1;
+
+ /* NB. default incarnation is -1 because:
+ * a) V1 will ignore dst incarnation in connreq.
+ * b) V2 will provide incarnation while rejecting me,
+ * -1 will be overwrote.
+ *
+ * if I try to connect to a V1 peer with V2 protocol,
+ * it rejected me then upgrade to V2, I have no idea
+ * about the upgrading and try to reconnect with V1,
+ * in this case upgraded V2 can find out I'm trying to
+ * talk to the old guy and reject me(incarnation is -1).
+ */
+
+ if (rej->ibr_magic == __swab32(IBLND_MSG_MAGIC) ||
+ rej->ibr_magic == __swab32(LNET_PROTO_MAGIC)) {
+ __swab32s(&rej->ibr_magic);
+ __swab16s(&rej->ibr_version);
+ flip = 1;
+ }
+
+ if (priv_nob >= sizeof(kib_rej_t) &&
+ rej->ibr_version > IBLND_MSG_VERSION_1) {
+ /* priv_nob is always 148 in current version
+ * of OFED, so we still need to check version.
+ * (define of IB_CM_REJ_PRIVATE_DATA_SIZE) */
+ cp = &rej->ibr_cp;
+
+ if (flip) {
+ __swab64s(&rej->ibr_incarnation);
+ __swab16s(&cp->ibcp_queue_depth);
+ __swab16s(&cp->ibcp_max_frags);
+ __swab32s(&cp->ibcp_max_msg_size);
+ }
+
+ incarnation = rej->ibr_incarnation;
+ }
+
+ if (rej->ibr_magic != IBLND_MSG_MAGIC &&
+ rej->ibr_magic != LNET_PROTO_MAGIC) {
+ CERROR("%s rejected: consumer defined fatal error\n",
+ libcfs_nid2str(peer->ibp_nid));
+ break;
+ }
+
+ if (rej->ibr_version != IBLND_MSG_VERSION &&
+ rej->ibr_version != IBLND_MSG_VERSION_1) {
+ CERROR("%s rejected: o2iblnd version %x error\n",
+ libcfs_nid2str(peer->ibp_nid),
+ rej->ibr_version);
+ break;
+ }
+
+ if (rej->ibr_why == IBLND_REJECT_FATAL &&
+ rej->ibr_version == IBLND_MSG_VERSION_1) {
+ CDEBUG(D_NET, "rejected by old version peer %s: %x\n",
+ libcfs_nid2str(peer->ibp_nid), rej->ibr_version);
+
+ if (conn->ibc_version != IBLND_MSG_VERSION_1)
+ rej->ibr_why = IBLND_REJECT_CONN_UNCOMPAT;
+ }
+
+ switch (rej->ibr_why) {
+ case IBLND_REJECT_CONN_RACE:
+ case IBLND_REJECT_CONN_STALE:
+ case IBLND_REJECT_CONN_UNCOMPAT:
+ kiblnd_reconnect(conn, rej->ibr_version,
+ incarnation, rej->ibr_why, cp);
+ break;
+
+ case IBLND_REJECT_MSG_QUEUE_SIZE:
+ CERROR("%s rejected: incompatible message queue depth %d, %d\n",
+ libcfs_nid2str(peer->ibp_nid), cp->ibcp_queue_depth,
+ IBLND_MSG_QUEUE_SIZE(conn->ibc_version));
+ break;
+
+ case IBLND_REJECT_RDMA_FRAGS:
+ CERROR("%s rejected: incompatible # of RDMA fragments %d, %d\n",
+ libcfs_nid2str(peer->ibp_nid), cp->ibcp_max_frags,
+ IBLND_RDMA_FRAGS(conn->ibc_version));
+ break;
+
+ case IBLND_REJECT_NO_RESOURCES:
+ CERROR("%s rejected: o2iblnd no resources\n",
+ libcfs_nid2str(peer->ibp_nid));
+ break;
+
+ case IBLND_REJECT_FATAL:
+ CERROR("%s rejected: o2iblnd fatal error\n",
+ libcfs_nid2str(peer->ibp_nid));
+ break;
+
+ default:
+ CERROR("%s rejected: o2iblnd reason %d\n",
+ libcfs_nid2str(peer->ibp_nid),
+ rej->ibr_why);
+ break;
+ }
+ break;
+ }
+ /* fall through */
+ default:
+ CNETERR("%s rejected: reason %d, size %d\n",
+ libcfs_nid2str(peer->ibp_nid), reason, priv_nob);
+ break;
+ }
+
+ kiblnd_connreq_done(conn, -ECONNREFUSED);
+}
+
+void
+kiblnd_check_connreply (kib_conn_t *conn, void *priv, int priv_nob)
+{
+ kib_peer_t *peer = conn->ibc_peer;
+ lnet_ni_t *ni = peer->ibp_ni;
+ kib_net_t *net = ni->ni_data;
+ kib_msg_t *msg = priv;
+ int ver = conn->ibc_version;
+ int rc = kiblnd_unpack_msg(msg, priv_nob);
+ unsigned long flags;
+
+ LASSERT (net != NULL);
+
+ if (rc != 0) {
+ CERROR("Can't unpack connack from %s: %d\n",
+ libcfs_nid2str(peer->ibp_nid), rc);
+ goto failed;
+ }
+
+ if (msg->ibm_type != IBLND_MSG_CONNACK) {
+ CERROR("Unexpected message %d from %s\n",
+ msg->ibm_type, libcfs_nid2str(peer->ibp_nid));
+ rc = -EPROTO;
+ goto failed;
+ }
+
+ if (ver != msg->ibm_version) {
+ CERROR("%s replied version %x is different with "
+ "requested version %x\n",
+ libcfs_nid2str(peer->ibp_nid), msg->ibm_version, ver);
+ rc = -EPROTO;
+ goto failed;
+ }
+
+ if (msg->ibm_u.connparams.ibcp_queue_depth !=
+ IBLND_MSG_QUEUE_SIZE(ver)) {
+ CERROR("%s has incompatible queue depth %d(%d wanted)\n",
+ libcfs_nid2str(peer->ibp_nid),
+ msg->ibm_u.connparams.ibcp_queue_depth,
+ IBLND_MSG_QUEUE_SIZE(ver));
+ rc = -EPROTO;
+ goto failed;
+ }
+
+ if (msg->ibm_u.connparams.ibcp_max_frags !=
+ IBLND_RDMA_FRAGS(ver)) {
+ CERROR("%s has incompatible max_frags %d (%d wanted)\n",
+ libcfs_nid2str(peer->ibp_nid),
+ msg->ibm_u.connparams.ibcp_max_frags,
+ IBLND_RDMA_FRAGS(ver));
+ rc = -EPROTO;
+ goto failed;
+ }
+
+ if (msg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
+ CERROR("%s max message size %d too big (%d max)\n",
+ libcfs_nid2str(peer->ibp_nid),
+ msg->ibm_u.connparams.ibcp_max_msg_size,
+ IBLND_MSG_SIZE);
+ rc = -EPROTO;
+ goto failed;
+ }
+
+ read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+ if (msg->ibm_dstnid == ni->ni_nid &&
+ msg->ibm_dststamp == net->ibn_incarnation)
+ rc = 0;
+ else
+ rc = -ESTALE;
+ read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ if (rc != 0) {
+ CERROR("Bad connection reply from %s, rc = %d, "
+ "version: %x max_frags: %d\n",
+ libcfs_nid2str(peer->ibp_nid), rc,
+ msg->ibm_version, msg->ibm_u.connparams.ibcp_max_frags);
+ goto failed;
+ }
+
+ conn->ibc_incarnation = msg->ibm_srcstamp;
+ conn->ibc_credits =
+ conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE(ver);
+ LASSERT (conn->ibc_credits + conn->ibc_reserved_credits + IBLND_OOB_MSGS(ver)
+ <= IBLND_RX_MSGS(ver));
+
+ kiblnd_connreq_done(conn, 0);
+ return;
+
+ failed:
+ /* NB My QP has already established itself, so I handle anything going
+ * wrong here by setting ibc_comms_error.
+ * kiblnd_connreq_done(0) moves the conn state to ESTABLISHED, but then
+ * immediately tears it down. */
+
+ LASSERT (rc != 0);
+ conn->ibc_comms_error = rc;
+ kiblnd_connreq_done(conn, 0);
+}
+
+int
+kiblnd_active_connect (struct rdma_cm_id *cmid)
+{
+ kib_peer_t *peer = (kib_peer_t *)cmid->context;
+ kib_conn_t *conn;
+ kib_msg_t *msg;
+ struct rdma_conn_param cp;
+ int version;
+ __u64 incarnation;
+ unsigned long flags;
+ int rc;
+
+ read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ incarnation = peer->ibp_incarnation;
+ version = (peer->ibp_version == 0) ? IBLND_MSG_VERSION :
+ peer->ibp_version;
+
+ read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT, version);
+ if (conn == NULL) {
+ kiblnd_peer_connect_failed(peer, 1, -ENOMEM);
+ kiblnd_peer_decref(peer); /* lose cmid's ref */
+ return -ENOMEM;
+ }
+
+ /* conn "owns" cmid now, so I return success from here on to ensure the
+ * CM callback doesn't destroy cmid. conn also takes over cmid's ref
+ * on peer */
+
+ msg = &conn->ibc_connvars->cv_msg;
+
+ memset(msg, 0, sizeof(*msg));
+ kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
+ msg->ibm_u.connparams.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE(version);
+ msg->ibm_u.connparams.ibcp_max_frags = IBLND_RDMA_FRAGS(version);
+ msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
+
+ kiblnd_pack_msg(peer->ibp_ni, msg, version,
+ 0, peer->ibp_nid, incarnation);
+
+ memset(&cp, 0, sizeof(cp));
+ cp.private_data = msg;
+ cp.private_data_len = msg->ibm_nob;
+ cp.responder_resources = 0; /* No atomic ops or RDMA reads */
+ cp.initiator_depth = 0;
+ cp.flow_control = 1;
+ cp.retry_count = *kiblnd_tunables.kib_retry_count;
+ cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count;
+
+ LASSERT(cmid->context == (void *)conn);
+ LASSERT(conn->ibc_cmid == cmid);
+
+ rc = rdma_connect(cmid, &cp);
+ if (rc != 0) {
+ CERROR("Can't connect to %s: %d\n",
+ libcfs_nid2str(peer->ibp_nid), rc);
+ kiblnd_connreq_done(conn, rc);
+ kiblnd_conn_decref(conn);
+ }
+
+ return 0;
+}
+
+int
+kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
+{
+ kib_peer_t *peer;
+ kib_conn_t *conn;
+ int rc;
+
+ switch (event->event) {
+ default:
+ CERROR("Unexpected event: %d, status: %d\n",
+ event->event, event->status);
+ LBUG();
+
+ case RDMA_CM_EVENT_CONNECT_REQUEST:
+ /* destroy cmid on failure */
+ rc = kiblnd_passive_connect(cmid,
+ (void *)KIBLND_CONN_PARAM(event),
+ KIBLND_CONN_PARAM_LEN(event));
+ CDEBUG(D_NET, "connreq: %d\n", rc);
+ return rc;
+
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ peer = (kib_peer_t *)cmid->context;
+ CNETERR("%s: ADDR ERROR %d\n",
+ libcfs_nid2str(peer->ibp_nid), event->status);
+ kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
+ kiblnd_peer_decref(peer);
+ return -EHOSTUNREACH; /* rc != 0 destroys cmid */
+
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ peer = (kib_peer_t *)cmid->context;
+
+ CDEBUG(D_NET,"%s Addr resolved: %d\n",
+ libcfs_nid2str(peer->ibp_nid), event->status);
+
+ if (event->status != 0) {
+ CNETERR("Can't resolve address for %s: %d\n",
+ libcfs_nid2str(peer->ibp_nid), event->status);
+ rc = event->status;
+ } else {
+ rc = rdma_resolve_route(
+ cmid, *kiblnd_tunables.kib_timeout * 1000);
+ if (rc == 0)
+ return 0;
+ /* Can't initiate route resolution */
+ CERROR("Can't resolve route for %s: %d\n",
+ libcfs_nid2str(peer->ibp_nid), rc);
+ }
+ kiblnd_peer_connect_failed(peer, 1, rc);
+ kiblnd_peer_decref(peer);
+ return rc; /* rc != 0 destroys cmid */
+
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ peer = (kib_peer_t *)cmid->context;
+ CNETERR("%s: ROUTE ERROR %d\n",
+ libcfs_nid2str(peer->ibp_nid), event->status);
+ kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
+ kiblnd_peer_decref(peer);
+ return -EHOSTUNREACH; /* rc != 0 destroys cmid */
+
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ peer = (kib_peer_t *)cmid->context;
+ CDEBUG(D_NET,"%s Route resolved: %d\n",
+ libcfs_nid2str(peer->ibp_nid), event->status);
+
+ if (event->status == 0)
+ return kiblnd_active_connect(cmid);
+
+ CNETERR("Can't resolve route for %s: %d\n",
+ libcfs_nid2str(peer->ibp_nid), event->status);
+ kiblnd_peer_connect_failed(peer, 1, event->status);
+ kiblnd_peer_decref(peer);
+ return event->status; /* rc != 0 destroys cmid */
+
+ case RDMA_CM_EVENT_UNREACHABLE:
+ conn = (kib_conn_t *)cmid->context;
+ LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
+ conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
+ CNETERR("%s: UNREACHABLE %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
+ kiblnd_connreq_done(conn, -ENETDOWN);
+ kiblnd_conn_decref(conn);
+ return 0;
+
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ conn = (kib_conn_t *)cmid->context;
+ LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
+ conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
+ CNETERR("%s: CONNECT ERROR %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
+ kiblnd_connreq_done(conn, -ENOTCONN);
+ kiblnd_conn_decref(conn);
+ return 0;
+
+ case RDMA_CM_EVENT_REJECTED:
+ conn = (kib_conn_t *)cmid->context;
+ switch (conn->ibc_state) {
+ default:
+ LBUG();
+
+ case IBLND_CONN_PASSIVE_WAIT:
+ CERROR ("%s: REJECTED %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid),
+ event->status);
+ kiblnd_connreq_done(conn, -ECONNRESET);
+ break;
+
+ case IBLND_CONN_ACTIVE_CONNECT:
+ kiblnd_rejected(conn, event->status,
+ (void *)KIBLND_CONN_PARAM(event),
+ KIBLND_CONN_PARAM_LEN(event));
+ break;
+ }
+ kiblnd_conn_decref(conn);
+ return 0;
+
+ case RDMA_CM_EVENT_ESTABLISHED:
+ conn = (kib_conn_t *)cmid->context;
+ switch (conn->ibc_state) {
+ default:
+ LBUG();
+
+ case IBLND_CONN_PASSIVE_WAIT:
+ CDEBUG(D_NET, "ESTABLISHED (passive): %s\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ kiblnd_connreq_done(conn, 0);
+ break;
+
+ case IBLND_CONN_ACTIVE_CONNECT:
+ CDEBUG(D_NET, "ESTABLISHED(active): %s\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ kiblnd_check_connreply(conn,
+ (void *)KIBLND_CONN_PARAM(event),
+ KIBLND_CONN_PARAM_LEN(event));
+ break;
+ }
+ /* net keeps its ref on conn! */
+ return 0;
+
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ CDEBUG(D_NET, "Ignore TIMEWAIT_EXIT event\n");
+ return 0;
+ case RDMA_CM_EVENT_DISCONNECTED:
+ conn = (kib_conn_t *)cmid->context;
+ if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
+ CERROR("%s DISCONNECTED\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ kiblnd_connreq_done(conn, -ECONNRESET);
+ } else {
+ kiblnd_close_conn(conn, 0);
+ }
+ kiblnd_conn_decref(conn);
+ cmid->context = NULL;
+ return 0;
+
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ LCONSOLE_ERROR_MSG(0x131,
+ "Received notification of device removal\n"
+ "Please shutdown LNET to allow this to proceed\n");
+ /* Can't remove network from underneath LNET for now, so I have
+ * to ignore this */
+ return 0;
+
+ case RDMA_CM_EVENT_ADDR_CHANGE:
+ LCONSOLE_INFO("Physical link changed (eg hca/port)\n");
+ return 0;
+ }
+}
+
+static int
+kiblnd_check_txs_locked(kib_conn_t *conn, struct list_head *txs)
+{
+ kib_tx_t *tx;
+ struct list_head *ttmp;
+
+ list_for_each (ttmp, txs) {
+ tx = list_entry (ttmp, kib_tx_t, tx_list);
+
+ if (txs != &conn->ibc_active_txs) {
+ LASSERT (tx->tx_queued);
+ } else {
+ LASSERT (!tx->tx_queued);
+ LASSERT (tx->tx_waiting || tx->tx_sending != 0);
+ }
+
+ if (cfs_time_aftereq (jiffies, tx->tx_deadline)) {
+ CERROR("Timed out tx: %s, %lu seconds\n",
+ kiblnd_queue2str(conn, txs),
+ cfs_duration_sec(jiffies - tx->tx_deadline));
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+kiblnd_conn_timed_out_locked(kib_conn_t *conn)
+{
+ return kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue) ||
+ kiblnd_check_txs_locked(conn, &conn->ibc_tx_noops) ||
+ kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_rsrvd) ||
+ kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_nocred) ||
+ kiblnd_check_txs_locked(conn, &conn->ibc_active_txs);
+}
+
+void
+kiblnd_check_conns (int idx)
+{
+ LIST_HEAD (closes);
+ LIST_HEAD (checksends);
+ struct list_head *peers = &kiblnd_data.kib_peers[idx];
+ struct list_head *ptmp;
+ kib_peer_t *peer;
+ kib_conn_t *conn;
+ struct list_head *ctmp;
+ unsigned long flags;
+
+ /* NB. We expect to have a look at all the peers and not find any
+ * RDMAs to time out, so we just use a shared lock while we
+ * take a look... */
+ read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+
+ list_for_each (ptmp, peers) {
+ peer = list_entry (ptmp, kib_peer_t, ibp_list);
+
+ list_for_each (ctmp, &peer->ibp_conns) {
+ int timedout;
+ int sendnoop;
+
+ conn = list_entry(ctmp, kib_conn_t, ibc_list);
+
+ LASSERT (conn->ibc_state == IBLND_CONN_ESTABLISHED);
+
+ spin_lock(&conn->ibc_lock);
+
+ sendnoop = kiblnd_need_noop(conn);
+ timedout = kiblnd_conn_timed_out_locked(conn);
+ if (!sendnoop && !timedout) {
+ spin_unlock(&conn->ibc_lock);
+ continue;
+ }
+
+ if (timedout) {
+ CERROR("Timed out RDMA with %s (%lu): "
+ "c: %u, oc: %u, rc: %u\n",
+ libcfs_nid2str(peer->ibp_nid),
+ cfs_duration_sec(cfs_time_current() -
+ peer->ibp_last_alive),
+ conn->ibc_credits,
+ conn->ibc_outstanding_credits,
+ conn->ibc_reserved_credits);
+ list_add(&conn->ibc_connd_list, &closes);
+ } else {
+ list_add(&conn->ibc_connd_list,
+ &checksends);
+ }
+ /* +ref for 'closes' or 'checksends' */
+ kiblnd_conn_addref(conn);
+
+ spin_unlock(&conn->ibc_lock);
+ }
+ }
+
+ read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ /* Handle timeout by closing the whole
+ * connection. We can only be sure RDMA activity
+ * has ceased once the QP has been modified. */
+ while (!list_empty(&closes)) {
+ conn = list_entry(closes.next,
+ kib_conn_t, ibc_connd_list);
+ list_del(&conn->ibc_connd_list);
+ kiblnd_close_conn(conn, -ETIMEDOUT);
+ kiblnd_conn_decref(conn);
+ }
+
+ /* In case we have enough credits to return via a
+ * NOOP, but there were no non-blocking tx descs
+ * free to do it last time... */
+ while (!list_empty(&checksends)) {
+ conn = list_entry(checksends.next,
+ kib_conn_t, ibc_connd_list);
+ list_del(&conn->ibc_connd_list);
+ kiblnd_check_sends(conn);
+ kiblnd_conn_decref(conn);
+ }
+}
+
+void
+kiblnd_disconnect_conn (kib_conn_t *conn)
+{
+ LASSERT (!in_interrupt());
+ LASSERT (current == kiblnd_data.kib_connd);
+ LASSERT (conn->ibc_state == IBLND_CONN_CLOSING);
+
+ rdma_disconnect(conn->ibc_cmid);
+ kiblnd_finalise_conn(conn);
+
+ kiblnd_peer_notify(conn->ibc_peer);
+}
+
+int
+kiblnd_connd (void *arg)
+{
+ wait_queue_t wait;
+ unsigned long flags;
+ kib_conn_t *conn;
+ int timeout;
+ int i;
+ int dropped_lock;
+ int peer_index = 0;
+ unsigned long deadline = jiffies;
+
+ cfs_block_allsigs ();
+
+ init_waitqueue_entry_current (&wait);
+ kiblnd_data.kib_connd = current;
+
+ spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
+
+ while (!kiblnd_data.kib_shutdown) {
+
+ dropped_lock = 0;
+
+ if (!list_empty (&kiblnd_data.kib_connd_zombies)) {
+ conn = list_entry(kiblnd_data. \
+ kib_connd_zombies.next,
+ kib_conn_t, ibc_list);
+ list_del(&conn->ibc_list);
+
+ spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock,
+ flags);
+ dropped_lock = 1;
+
+ kiblnd_destroy_conn(conn);
+
+ spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
+ }
+
+ if (!list_empty(&kiblnd_data.kib_connd_conns)) {
+ conn = list_entry(kiblnd_data.kib_connd_conns.next,
+ kib_conn_t, ibc_list);
+ list_del(&conn->ibc_list);
+
+ spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock,
+ flags);
+ dropped_lock = 1;
+
+ kiblnd_disconnect_conn(conn);
+ kiblnd_conn_decref(conn);
+
+ spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
+ }
+
+ /* careful with the jiffy wrap... */
+ timeout = (int)(deadline - jiffies);
+ if (timeout <= 0) {
+ const int n = 4;
+ const int p = 1;
+ int chunk = kiblnd_data.kib_peer_hash_size;
+
+ spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
+ dropped_lock = 1;
+
+ /* Time to check for RDMA timeouts on a few more
+ * peers: I do checks every 'p' seconds on a
+ * proportion of the peer table and I need to check
+ * every connection 'n' times within a timeout
+ * interval, to ensure I detect a timeout on any
+ * connection within (n+1)/n times the timeout
+ * interval. */
+
+ if (*kiblnd_tunables.kib_timeout > n * p)
+ chunk = (chunk * n * p) /
+ *kiblnd_tunables.kib_timeout;
+ if (chunk == 0)
+ chunk = 1;
+
+ for (i = 0; i < chunk; i++) {
+ kiblnd_check_conns(peer_index);
+ peer_index = (peer_index + 1) %
+ kiblnd_data.kib_peer_hash_size;
+ }
+
+ deadline += p * HZ;
+ spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
+ }
+
+ if (dropped_lock)
+ continue;
+
+ /* Nothing to do for 'timeout' */
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
+ spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
+
+ waitq_timedwait(&wait, TASK_INTERRUPTIBLE, timeout);
+
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
+ spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
+ }
+
+ spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);
+
+ kiblnd_thread_fini();
+ return 0;
+}
+
+void
+kiblnd_qp_event(struct ib_event *event, void *arg)
+{
+ kib_conn_t *conn = arg;
+
+ switch (event->event) {
+ case IB_EVENT_COMM_EST:
+ CDEBUG(D_NET, "%s established\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ return;
+
+ default:
+ CERROR("%s: Async QP event type %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
+ return;
+ }
+}
+
+void
+kiblnd_complete (struct ib_wc *wc)
+{
+ switch (kiblnd_wreqid2type(wc->wr_id)) {
+ default:
+ LBUG();
+
+ case IBLND_WID_RDMA:
+ /* We only get RDMA completion notification if it fails. All
+ * subsequent work items, including the final SEND will fail
+ * too. However we can't print out any more info about the
+ * failing RDMA because 'tx' might be back on the idle list or
+ * even reused already if we didn't manage to post all our work
+ * items */
+ CNETERR("RDMA (tx: %p) failed: %d\n",
+ kiblnd_wreqid2ptr(wc->wr_id), wc->status);
+ return;
+
+ case IBLND_WID_TX:
+ kiblnd_tx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status);
+ return;
+
+ case IBLND_WID_RX:
+ kiblnd_rx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status,
+ wc->byte_len);
+ return;
+ }
+}
+
+void
+kiblnd_cq_completion(struct ib_cq *cq, void *arg)
+{
+ /* NB I'm not allowed to schedule this conn once its refcount has
+ * reached 0. Since fundamentally I'm racing with scheduler threads
+ * consuming my CQ I could be called after all completions have
+ * occurred. But in this case, ibc_nrx == 0 && ibc_nsends_posted == 0
+ * and this CQ is about to be destroyed so I NOOP. */
+ kib_conn_t *conn = (kib_conn_t *)arg;
+ struct kib_sched_info *sched = conn->ibc_sched;
+ unsigned long flags;
+
+ LASSERT(cq == conn->ibc_cq);
+
+ spin_lock_irqsave(&sched->ibs_lock, flags);
+
+ conn->ibc_ready = 1;
+
+ if (!conn->ibc_scheduled &&
+ (conn->ibc_nrx > 0 ||
+ conn->ibc_nsends_posted > 0)) {
+ kiblnd_conn_addref(conn); /* +1 ref for sched_conns */
+ conn->ibc_scheduled = 1;
+ list_add_tail(&conn->ibc_sched_list, &sched->ibs_conns);
+
+ if (waitqueue_active(&sched->ibs_waitq))
+ wake_up(&sched->ibs_waitq);
+ }
+
+ spin_unlock_irqrestore(&sched->ibs_lock, flags);
+}
+
+void
+kiblnd_cq_event(struct ib_event *event, void *arg)
+{
+ kib_conn_t *conn = arg;
+
+ CERROR("%s: async CQ event type %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
+}
+
+int
+kiblnd_scheduler(void *arg)
+{
+ long id = (long)arg;
+ struct kib_sched_info *sched;
+ kib_conn_t *conn;
+ wait_queue_t wait;
+ unsigned long flags;
+ struct ib_wc wc;
+ int did_something;
+ int busy_loops = 0;
+ int rc;
+
+ cfs_block_allsigs();
+
+ init_waitqueue_entry_current(&wait);
+
+ sched = kiblnd_data.kib_scheds[KIB_THREAD_CPT(id)];
+
+ rc = cfs_cpt_bind(lnet_cpt_table(), sched->ibs_cpt);
+ if (rc != 0) {
+ CWARN("Failed to bind on CPT %d, please verify whether "
+ "all CPUs are healthy and reload modules if necessary, "
+ "otherwise your system might under risk of low "
+ "performance\n", sched->ibs_cpt);
+ }
+
+ spin_lock_irqsave(&sched->ibs_lock, flags);
+
+ while (!kiblnd_data.kib_shutdown) {
+ if (busy_loops++ >= IBLND_RESCHED) {
+ spin_unlock_irqrestore(&sched->ibs_lock, flags);
+
+ cond_resched();
+ busy_loops = 0;
+
+ spin_lock_irqsave(&sched->ibs_lock, flags);
+ }
+
+ did_something = 0;
+
+ if (!list_empty(&sched->ibs_conns)) {
+ conn = list_entry(sched->ibs_conns.next,
+ kib_conn_t, ibc_sched_list);
+ /* take over kib_sched_conns' ref on conn... */
+ LASSERT(conn->ibc_scheduled);
+ list_del(&conn->ibc_sched_list);
+ conn->ibc_ready = 0;
+
+ spin_unlock_irqrestore(&sched->ibs_lock, flags);
+
+ rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
+ if (rc == 0) {
+ rc = ib_req_notify_cq(conn->ibc_cq,
+ IB_CQ_NEXT_COMP);
+ if (rc < 0) {
+ CWARN("%s: ib_req_notify_cq failed: %d, "
+ "closing connection\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
+ kiblnd_close_conn(conn, -EIO);
+ kiblnd_conn_decref(conn);
+ spin_lock_irqsave(&sched->ibs_lock,
+ flags);
+ continue;
+ }
+
+ rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
+ }
+
+ if (rc < 0) {
+ CWARN("%s: ib_poll_cq failed: %d, "
+ "closing connection\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid),
+ rc);
+ kiblnd_close_conn(conn, -EIO);
+ kiblnd_conn_decref(conn);
+ spin_lock_irqsave(&sched->ibs_lock, flags);
+ continue;
+ }
+
+ spin_lock_irqsave(&sched->ibs_lock, flags);
+
+ if (rc != 0 || conn->ibc_ready) {
+ /* There may be another completion waiting; get
+ * another scheduler to check while I handle
+ * this one... */
+ /* +1 ref for sched_conns */
+ kiblnd_conn_addref(conn);
+ list_add_tail(&conn->ibc_sched_list,
+ &sched->ibs_conns);
+ if (waitqueue_active(&sched->ibs_waitq))
+ wake_up(&sched->ibs_waitq);
+ } else {
+ conn->ibc_scheduled = 0;
+ }
+
+ if (rc != 0) {
+ spin_unlock_irqrestore(&sched->ibs_lock, flags);
+ kiblnd_complete(&wc);
+
+ spin_lock_irqsave(&sched->ibs_lock, flags);
+ }
+
+ kiblnd_conn_decref(conn); /* ...drop my ref from above */
+ did_something = 1;
+ }
+
+ if (did_something)
+ continue;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue_exclusive(&sched->ibs_waitq, &wait);
+ spin_unlock_irqrestore(&sched->ibs_lock, flags);
+
+ waitq_wait(&wait, TASK_INTERRUPTIBLE);
+ busy_loops = 0;
+
+ remove_wait_queue(&sched->ibs_waitq, &wait);
+ set_current_state(TASK_RUNNING);
+ spin_lock_irqsave(&sched->ibs_lock, flags);
+ }
+
+ spin_unlock_irqrestore(&sched->ibs_lock, flags);
+
+ kiblnd_thread_fini();
+ return 0;
+}
+
+int
+kiblnd_failover_thread(void *arg)
+{
+ rwlock_t *glock = &kiblnd_data.kib_global_lock;
+ kib_dev_t *dev;
+ wait_queue_t wait;
+ unsigned long flags;
+ int rc;
+
+ LASSERT (*kiblnd_tunables.kib_dev_failover != 0);
+
+ cfs_block_allsigs ();
+
+ init_waitqueue_entry_current(&wait);
+ write_lock_irqsave(glock, flags);
+
+ while (!kiblnd_data.kib_shutdown) {
+ int do_failover = 0;
+ int long_sleep;
+
+ list_for_each_entry(dev, &kiblnd_data.kib_failed_devs,
+ ibd_fail_list) {
+ if (cfs_time_before(cfs_time_current(),
+ dev->ibd_next_failover))
+ continue;
+ do_failover = 1;
+ break;
+ }
+
+ if (do_failover) {
+ list_del_init(&dev->ibd_fail_list);
+ dev->ibd_failover = 1;
+ write_unlock_irqrestore(glock, flags);
+
+ rc = kiblnd_dev_failover(dev);
+
+ write_lock_irqsave(glock, flags);
+
+ LASSERT (dev->ibd_failover);
+ dev->ibd_failover = 0;
+ if (rc >= 0) { /* Device is OK or failover succeed */
+ dev->ibd_next_failover = cfs_time_shift(3);
+ continue;
+ }
+
+ /* failed to failover, retry later */
+ dev->ibd_next_failover =
+ cfs_time_shift(min(dev->ibd_failed_failover, 10));
+ if (kiblnd_dev_can_failover(dev)) {
+ list_add_tail(&dev->ibd_fail_list,
+ &kiblnd_data.kib_failed_devs);
+ }
+
+ continue;
+ }
+
+ /* long sleep if no more pending failover */
+ long_sleep = list_empty(&kiblnd_data.kib_failed_devs);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
+ write_unlock_irqrestore(glock, flags);
+
+ rc = schedule_timeout(long_sleep ? cfs_time_seconds(10) :
+ cfs_time_seconds(1));
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
+ write_lock_irqsave(glock, flags);
+
+ if (!long_sleep || rc != 0)
+ continue;
+
+ /* have a long sleep, routine check all active devices,
+ * we need checking like this because if there is not active
+ * connection on the dev and no SEND from local, we may listen
+ * on wrong HCA for ever while there is a bonding failover */
+ list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) {
+ if (kiblnd_dev_can_failover(dev)) {
+ list_add_tail(&dev->ibd_fail_list,
+ &kiblnd_data.kib_failed_devs);
+ }
+ }
+ }
+
+ write_unlock_irqrestore(glock, flags);
+
+ kiblnd_thread_fini();
+ return 0;
+}
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
new file mode 100644
index 000000000000..e21028b72302
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
@@ -0,0 +1,493 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/klnds/o2iblnd/o2iblnd_modparams.c
+ *
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ */
+
+#include "o2iblnd.h"
+
+static int service = 987;
+CFS_MODULE_PARM(service, "i", int, 0444,
+ "service number (within RDMA_PS_TCP)");
+
+static int cksum = 0;
+CFS_MODULE_PARM(cksum, "i", int, 0644,
+ "set non-zero to enable message (not RDMA) checksums");
+
+static int timeout = 50;
+CFS_MODULE_PARM(timeout, "i", int, 0644,
+ "timeout (seconds)");
+
+/* Number of threads in each scheduler pool which is percpt,
+ * we will estimate reasonable value based on CPUs if it's set to zero. */
+static int nscheds;
+CFS_MODULE_PARM(nscheds, "i", int, 0444,
+ "number of threads in each scheduler pool");
+
+/* NB: this value is shared by all CPTs, it can grow at runtime */
+static int ntx = 512;
+CFS_MODULE_PARM(ntx, "i", int, 0444,
+ "# of message descriptors allocated for each pool");
+
+/* NB: this value is shared by all CPTs */
+static int credits = 256;
+CFS_MODULE_PARM(credits, "i", int, 0444,
+ "# concurrent sends");
+
+static int peer_credits = 8;
+CFS_MODULE_PARM(peer_credits, "i", int, 0444,
+ "# concurrent sends to 1 peer");
+
+static int peer_credits_hiw = 0;
+CFS_MODULE_PARM(peer_credits_hiw, "i", int, 0444,
+ "when eagerly to return credits");
+
+static int peer_buffer_credits = 0;
+CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
+ "# per-peer router buffer credits");
+
+static int peer_timeout = 180;
+CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
+ "Seconds without aliveness news to declare peer dead (<=0 to disable)");
+
+static char *ipif_name = "ib0";
+CFS_MODULE_PARM(ipif_name, "s", charp, 0444,
+ "IPoIB interface name");
+
+static int retry_count = 5;
+CFS_MODULE_PARM(retry_count, "i", int, 0644,
+ "Retransmissions when no ACK received");
+
+static int rnr_retry_count = 6;
+CFS_MODULE_PARM(rnr_retry_count, "i", int, 0644,
+ "RNR retransmissions");
+
+static int keepalive = 100;
+CFS_MODULE_PARM(keepalive, "i", int, 0644,
+ "Idle time in seconds before sending a keepalive");
+
+static int ib_mtu = 0;
+CFS_MODULE_PARM(ib_mtu, "i", int, 0444,
+ "IB MTU 256/512/1024/2048/4096");
+
+static int concurrent_sends = 0;
+CFS_MODULE_PARM(concurrent_sends, "i", int, 0444,
+ "send work-queue sizing");
+
+static int map_on_demand = 0;
+CFS_MODULE_PARM(map_on_demand, "i", int, 0444,
+ "map on demand");
+
+/* NB: this value is shared by all CPTs, it can grow at runtime */
+static int fmr_pool_size = 512;
+CFS_MODULE_PARM(fmr_pool_size, "i", int, 0444,
+ "size of fmr pool on each CPT (>= ntx / 4)");
+
+/* NB: this value is shared by all CPTs, it can grow at runtime */
+static int fmr_flush_trigger = 384;
+CFS_MODULE_PARM(fmr_flush_trigger, "i", int, 0444,
+ "# dirty FMRs that triggers pool flush");
+
+static int fmr_cache = 1;
+CFS_MODULE_PARM(fmr_cache, "i", int, 0444,
+ "non-zero to enable FMR caching");
+
+/* NB: this value is shared by all CPTs, it can grow at runtime */
+static int pmr_pool_size = 512;
+CFS_MODULE_PARM(pmr_pool_size, "i", int, 0444,
+ "size of MR cache pmr pool on each CPT");
+
+/*
+ * 0: disable failover
+ * 1: enable failover if necessary
+ * 2: force to failover (for debug)
+ */
+static int dev_failover = 0;
+CFS_MODULE_PARM(dev_failover, "i", int, 0444,
+ "HCA failover for bonding (0 off, 1 on, other values reserved)");
+
+
+static int require_privileged_port = 0;
+CFS_MODULE_PARM(require_privileged_port, "i", int, 0644,
+ "require privileged port when accepting connection");
+
+static int use_privileged_port = 1;
+CFS_MODULE_PARM(use_privileged_port, "i", int, 0644,
+ "use privileged port when initiating connection");
+
+kib_tunables_t kiblnd_tunables = {
+ .kib_dev_failover = &dev_failover,
+ .kib_service = &service,
+ .kib_cksum = &cksum,
+ .kib_timeout = &timeout,
+ .kib_keepalive = &keepalive,
+ .kib_ntx = &ntx,
+ .kib_credits = &credits,
+ .kib_peertxcredits = &peer_credits,
+ .kib_peercredits_hiw = &peer_credits_hiw,
+ .kib_peerrtrcredits = &peer_buffer_credits,
+ .kib_peertimeout = &peer_timeout,
+ .kib_default_ipif = &ipif_name,
+ .kib_retry_count = &retry_count,
+ .kib_rnr_retry_count = &rnr_retry_count,
+ .kib_concurrent_sends = &concurrent_sends,
+ .kib_ib_mtu = &ib_mtu,
+ .kib_map_on_demand = &map_on_demand,
+ .kib_fmr_pool_size = &fmr_pool_size,
+ .kib_fmr_flush_trigger = &fmr_flush_trigger,
+ .kib_fmr_cache = &fmr_cache,
+ .kib_pmr_pool_size = &pmr_pool_size,
+ .kib_require_priv_port = &require_privileged_port,
+ .kib_use_priv_port = &use_privileged_port,
+ .kib_nscheds = &nscheds
+};
+
+#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
+
+static char ipif_basename_space[32];
+
+
+enum {
+ O2IBLND_SERVICE = 1,
+ O2IBLND_CKSUM,
+ O2IBLND_TIMEOUT,
+ O2IBLND_NTX,
+ O2IBLND_CREDITS,
+ O2IBLND_PEER_TXCREDITS,
+ O2IBLND_PEER_CREDITS_HIW,
+ O2IBLND_PEER_RTRCREDITS,
+ O2IBLND_PEER_TIMEOUT,
+ O2IBLND_IPIF_BASENAME,
+ O2IBLND_RETRY_COUNT,
+ O2IBLND_RNR_RETRY_COUNT,
+ O2IBLND_KEEPALIVE,
+ O2IBLND_CONCURRENT_SENDS,
+ O2IBLND_IB_MTU,
+ O2IBLND_MAP_ON_DEMAND,
+ O2IBLND_FMR_POOL_SIZE,
+ O2IBLND_FMR_FLUSH_TRIGGER,
+ O2IBLND_FMR_CACHE,
+ O2IBLND_PMR_POOL_SIZE,
+ O2IBLND_DEV_FAILOVER
+};
+
+static ctl_table_t kiblnd_ctl_table[] = {
+ {
+ .ctl_name = O2IBLND_SERVICE,
+ .procname = "service",
+ .data = &service,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_CKSUM,
+ .procname = "cksum",
+ .data = &cksum,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_TIMEOUT,
+ .procname = "timeout",
+ .data = &timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_NTX,
+ .procname = "ntx",
+ .data = &ntx,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_CREDITS,
+ .procname = "credits",
+ .data = &credits,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_PEER_TXCREDITS,
+ .procname = "peer_credits",
+ .data = &peer_credits,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_PEER_CREDITS_HIW,
+ .procname = "peer_credits_hiw",
+ .data = &peer_credits_hiw,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_PEER_RTRCREDITS,
+ .procname = "peer_buffer_credits",
+ .data = &peer_buffer_credits,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_PEER_TIMEOUT,
+ .procname = "peer_timeout",
+ .data = &peer_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_IPIF_BASENAME,
+ .procname = "ipif_name",
+ .data = ipif_basename_space,
+ .maxlen = sizeof(ipif_basename_space),
+ .mode = 0444,
+ .proc_handler = &proc_dostring
+ },
+ {
+ .ctl_name = O2IBLND_RETRY_COUNT,
+ .procname = "retry_count",
+ .data = &retry_count,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_RNR_RETRY_COUNT,
+ .procname = "rnr_retry_count",
+ .data = &rnr_retry_count,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_KEEPALIVE,
+ .procname = "keepalive",
+ .data = &keepalive,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_CONCURRENT_SENDS,
+ .procname = "concurrent_sends",
+ .data = &concurrent_sends,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_IB_MTU,
+ .procname = "ib_mtu",
+ .data = &ib_mtu,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_MAP_ON_DEMAND,
+ .procname = "map_on_demand",
+ .data = &map_on_demand,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+
+ {
+ .ctl_name = O2IBLND_FMR_POOL_SIZE,
+ .procname = "fmr_pool_size",
+ .data = &fmr_pool_size,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_FMR_FLUSH_TRIGGER,
+ .procname = "fmr_flush_trigger",
+ .data = &fmr_flush_trigger,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_FMR_CACHE,
+ .procname = "fmr_cache",
+ .data = &fmr_cache,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_PMR_POOL_SIZE,
+ .procname = "pmr_pool_size",
+ .data = &pmr_pool_size,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = O2IBLND_DEV_FAILOVER,
+ .procname = "dev_failover",
+ .data = &dev_failover,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {0}
+};
+
+static ctl_table_t kiblnd_top_ctl_table[] = {
+ {
+ .ctl_name = CTL_O2IBLND,
+ .procname = "o2iblnd",
+ .data = NULL,
+ .maxlen = 0,
+ .mode = 0555,
+ .child = kiblnd_ctl_table
+ },
+ {0}
+};
+
+void
+kiblnd_initstrtunable(char *space, char *str, int size)
+{
+ strncpy(space, str, size);
+ space[size-1] = 0;
+}
+
+void
+kiblnd_sysctl_init (void)
+{
+ kiblnd_initstrtunable(ipif_basename_space, ipif_name,
+ sizeof(ipif_basename_space));
+
+ kiblnd_tunables.kib_sysctl =
+ cfs_register_sysctl_table(kiblnd_top_ctl_table, 0);
+
+ if (kiblnd_tunables.kib_sysctl == NULL)
+ CWARN("Can't setup /proc tunables\n");
+}
+
+void
+kiblnd_sysctl_fini (void)
+{
+ if (kiblnd_tunables.kib_sysctl != NULL)
+ unregister_sysctl_table(kiblnd_tunables.kib_sysctl);
+}
+
+#else
+
+void
+kiblnd_sysctl_init (void)
+{
+}
+
+void
+kiblnd_sysctl_fini (void)
+{
+}
+
+#endif
+
+int
+kiblnd_tunables_init (void)
+{
+ if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
+ CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
+ *kiblnd_tunables.kib_ib_mtu);
+ return -EINVAL;
+ }
+
+ if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT)
+ *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT;
+
+ if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX)
+ *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX;
+
+ if (*kiblnd_tunables.kib_peertxcredits > *kiblnd_tunables.kib_credits)
+ *kiblnd_tunables.kib_peertxcredits = *kiblnd_tunables.kib_credits;
+
+ if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2)
+ *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2;
+
+ if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits)
+ *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1;
+
+ if (*kiblnd_tunables.kib_map_on_demand < 0 ||
+ *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS)
+ *kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
+
+ if (*kiblnd_tunables.kib_map_on_demand == 1)
+ *kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */
+
+ if (*kiblnd_tunables.kib_concurrent_sends == 0) {
+ if (*kiblnd_tunables.kib_map_on_demand > 0 &&
+ *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
+ *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2;
+ else
+ *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits);
+ }
+
+ if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2)
+ *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2;
+
+ if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2)
+ *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2;
+
+ if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) {
+ CWARN("Concurrent sends %d is lower than message queue size: %d, "
+ "performance may drop slightly.\n",
+ *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits);
+ }
+
+ kiblnd_sysctl_init();
+ return 0;
+}
+
+void
+kiblnd_tunables_fini (void)
+{
+ kiblnd_sysctl_fini();
+}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/Makefile b/drivers/staging/lustre/lnet/klnds/socklnd/Makefile
new file mode 100644
index 000000000000..6494b2bada05
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_LNET) += ksocklnd.o
+
+ksocklnd-y := socklnd.o socklnd_cb.o socklnd_proto.o socklnd_modparams.o socklnd_lib-linux.o
+
+
+
+ccflags-y := -I$(src)/../../include
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
new file mode 100644
index 000000000000..c826bf9d49ac
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -0,0 +1,2902 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/klnds/socklnd/socklnd.c
+ *
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ */
+
+#include "socklnd.h"
+
+lnd_t the_ksocklnd;
+ksock_nal_data_t ksocknal_data;
+
+ksock_interface_t *
+ksocknal_ip2iface(lnet_ni_t *ni, __u32 ip)
+{
+ ksock_net_t *net = ni->ni_data;
+ int i;
+ ksock_interface_t *iface;
+
+ for (i = 0; i < net->ksnn_ninterfaces; i++) {
+ LASSERT(i < LNET_MAX_INTERFACES);
+ iface = &net->ksnn_interfaces[i];
+
+ if (iface->ksni_ipaddr == ip)
+ return (iface);
+ }
+
+ return (NULL);
+}
+
+ksock_route_t *
+ksocknal_create_route (__u32 ipaddr, int port)
+{
+ ksock_route_t *route;
+
+ LIBCFS_ALLOC (route, sizeof (*route));
+ if (route == NULL)
+ return (NULL);
+
+ atomic_set (&route->ksnr_refcount, 1);
+ route->ksnr_peer = NULL;
+ route->ksnr_retry_interval = 0; /* OK to connect at any time */
+ route->ksnr_ipaddr = ipaddr;
+ route->ksnr_port = port;
+ route->ksnr_scheduled = 0;
+ route->ksnr_connecting = 0;
+ route->ksnr_connected = 0;
+ route->ksnr_deleted = 0;
+ route->ksnr_conn_count = 0;
+ route->ksnr_share_count = 0;
+
+ return (route);
+}
+
+void
+ksocknal_destroy_route (ksock_route_t *route)
+{
+ LASSERT (atomic_read(&route->ksnr_refcount) == 0);
+
+ if (route->ksnr_peer != NULL)
+ ksocknal_peer_decref(route->ksnr_peer);
+
+ LIBCFS_FREE (route, sizeof (*route));
+}
+
+int
+ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
+{
+ ksock_net_t *net = ni->ni_data;
+ ksock_peer_t *peer;
+
+ LASSERT (id.nid != LNET_NID_ANY);
+ LASSERT (id.pid != LNET_PID_ANY);
+ LASSERT (!in_interrupt());
+
+ LIBCFS_ALLOC (peer, sizeof (*peer));
+ if (peer == NULL)
+ return -ENOMEM;
+
+ memset (peer, 0, sizeof (*peer)); /* NULL pointers/clear flags etc */
+
+ peer->ksnp_ni = ni;
+ peer->ksnp_id = id;
+ atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */
+ peer->ksnp_closing = 0;
+ peer->ksnp_accepting = 0;
+ peer->ksnp_proto = NULL;
+ peer->ksnp_last_alive = 0;
+ peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
+
+ INIT_LIST_HEAD (&peer->ksnp_conns);
+ INIT_LIST_HEAD (&peer->ksnp_routes);
+ INIT_LIST_HEAD (&peer->ksnp_tx_queue);
+ INIT_LIST_HEAD (&peer->ksnp_zc_req_list);
+ spin_lock_init(&peer->ksnp_lock);
+
+ spin_lock_bh(&net->ksnn_lock);
+
+ if (net->ksnn_shutdown) {
+ spin_unlock_bh(&net->ksnn_lock);
+
+ LIBCFS_FREE(peer, sizeof(*peer));
+ CERROR("Can't create peer: network shutdown\n");
+ return -ESHUTDOWN;
+ }
+
+ net->ksnn_npeers++;
+
+ spin_unlock_bh(&net->ksnn_lock);
+
+ *peerp = peer;
+ return 0;
+}
+
+void
+ksocknal_destroy_peer (ksock_peer_t *peer)
+{
+ ksock_net_t *net = peer->ksnp_ni->ni_data;
+
+ CDEBUG (D_NET, "peer %s %p deleted\n",
+ libcfs_id2str(peer->ksnp_id), peer);
+
+ LASSERT (atomic_read (&peer->ksnp_refcount) == 0);
+ LASSERT (peer->ksnp_accepting == 0);
+ LASSERT (list_empty (&peer->ksnp_conns));
+ LASSERT (list_empty (&peer->ksnp_routes));
+ LASSERT (list_empty (&peer->ksnp_tx_queue));
+ LASSERT (list_empty (&peer->ksnp_zc_req_list));
+
+ LIBCFS_FREE (peer, sizeof (*peer));
+
+ /* NB a peer's connections and routes keep a reference on their peer
+ * until they are destroyed, so we can be assured that _all_ state to
+ * do with this peer has been cleaned up when its refcount drops to
+ * zero. */
+ spin_lock_bh(&net->ksnn_lock);
+ net->ksnn_npeers--;
+ spin_unlock_bh(&net->ksnn_lock);
+}
+
+ksock_peer_t *
+ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id)
+{
+ struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
+ struct list_head *tmp;
+ ksock_peer_t *peer;
+
+ list_for_each (tmp, peer_list) {
+
+ peer = list_entry (tmp, ksock_peer_t, ksnp_list);
+
+ LASSERT (!peer->ksnp_closing);
+
+ if (peer->ksnp_ni != ni)
+ continue;
+
+ if (peer->ksnp_id.nid != id.nid ||
+ peer->ksnp_id.pid != id.pid)
+ continue;
+
+ CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
+ peer, libcfs_id2str(id),
+ atomic_read(&peer->ksnp_refcount));
+ return (peer);
+ }
+ return (NULL);
+}
+
+ksock_peer_t *
+ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id)
+{
+ ksock_peer_t *peer;
+
+ read_lock(&ksocknal_data.ksnd_global_lock);
+ peer = ksocknal_find_peer_locked(ni, id);
+ if (peer != NULL) /* +1 ref for caller? */
+ ksocknal_peer_addref(peer);
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+
+ return (peer);
+}
+
+void
+ksocknal_unlink_peer_locked (ksock_peer_t *peer)
+{
+ int i;
+ __u32 ip;
+ ksock_interface_t *iface;
+
+ for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
+ LASSERT (i < LNET_MAX_INTERFACES);
+ ip = peer->ksnp_passive_ips[i];
+
+ iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
+ /* All IPs in peer->ksnp_passive_ips[] come from the
+ * interface list, therefore the call must succeed. */
+ LASSERT (iface != NULL);
+
+ CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
+ peer, iface, iface->ksni_nroutes);
+ iface->ksni_npeers--;
+ }
+
+ LASSERT (list_empty(&peer->ksnp_conns));
+ LASSERT (list_empty(&peer->ksnp_routes));
+ LASSERT (!peer->ksnp_closing);
+ peer->ksnp_closing = 1;
+ list_del (&peer->ksnp_list);
+ /* lose peerlist's ref */
+ ksocknal_peer_decref(peer);
+}
+
+int
+ksocknal_get_peer_info (lnet_ni_t *ni, int index,
+ lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip,
+ int *port, int *conn_count, int *share_count)
+{
+ ksock_peer_t *peer;
+ struct list_head *ptmp;
+ ksock_route_t *route;
+ struct list_head *rtmp;
+ int i;
+ int j;
+ int rc = -ENOENT;
+
+ read_lock(&ksocknal_data.ksnd_global_lock);
+
+ for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
+
+ list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
+ peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
+
+ if (peer->ksnp_ni != ni)
+ continue;
+
+ if (peer->ksnp_n_passive_ips == 0 &&
+ list_empty(&peer->ksnp_routes)) {
+ if (index-- > 0)
+ continue;
+
+ *id = peer->ksnp_id;
+ *myip = 0;
+ *peer_ip = 0;
+ *port = 0;
+ *conn_count = 0;
+ *share_count = 0;
+ rc = 0;
+ goto out;
+ }
+
+ for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
+ if (index-- > 0)
+ continue;
+
+ *id = peer->ksnp_id;
+ *myip = peer->ksnp_passive_ips[j];
+ *peer_ip = 0;
+ *port = 0;
+ *conn_count = 0;
+ *share_count = 0;
+ rc = 0;
+ goto out;
+ }
+
+ list_for_each (rtmp, &peer->ksnp_routes) {
+ if (index-- > 0)
+ continue;
+
+ route = list_entry(rtmp, ksock_route_t,
+ ksnr_list);
+
+ *id = peer->ksnp_id;
+ *myip = route->ksnr_myipaddr;
+ *peer_ip = route->ksnr_ipaddr;
+ *port = route->ksnr_port;
+ *conn_count = route->ksnr_conn_count;
+ *share_count = route->ksnr_share_count;
+ rc = 0;
+ goto out;
+ }
+ }
+ }
+ out:
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+ return (rc);
+}
+
+void
+ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
+{
+ ksock_peer_t *peer = route->ksnr_peer;
+ int type = conn->ksnc_type;
+ ksock_interface_t *iface;
+
+ conn->ksnc_route = route;
+ ksocknal_route_addref(route);
+
+ if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
+ if (route->ksnr_myipaddr == 0) {
+ /* route wasn't bound locally yet (the initial route) */
+ CDEBUG(D_NET, "Binding %s %u.%u.%u.%u to %u.%u.%u.%u\n",
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(route->ksnr_ipaddr),
+ HIPQUAD(conn->ksnc_myipaddr));
+ } else {
+ CDEBUG(D_NET, "Rebinding %s %u.%u.%u.%u from "
+ "%u.%u.%u.%u to %u.%u.%u.%u\n",
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(route->ksnr_ipaddr),
+ HIPQUAD(route->ksnr_myipaddr),
+ HIPQUAD(conn->ksnc_myipaddr));
+
+ iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
+ route->ksnr_myipaddr);
+ if (iface != NULL)
+ iface->ksni_nroutes--;
+ }
+ route->ksnr_myipaddr = conn->ksnc_myipaddr;
+ iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
+ route->ksnr_myipaddr);
+ if (iface != NULL)
+ iface->ksni_nroutes++;
+ }
+
+ route->ksnr_connected |= (1<<type);
+ route->ksnr_conn_count++;
+
+ /* Successful connection => further attempts can
+ * proceed immediately */
+ route->ksnr_retry_interval = 0;
+}
+
+void
+ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route)
+{
+ struct list_head *tmp;
+ ksock_conn_t *conn;
+ ksock_route_t *route2;
+
+ LASSERT (!peer->ksnp_closing);
+ LASSERT (route->ksnr_peer == NULL);
+ LASSERT (!route->ksnr_scheduled);
+ LASSERT (!route->ksnr_connecting);
+ LASSERT (route->ksnr_connected == 0);
+
+ /* LASSERT(unique) */
+ list_for_each(tmp, &peer->ksnp_routes) {
+ route2 = list_entry(tmp, ksock_route_t, ksnr_list);
+
+ if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
+ CERROR ("Duplicate route %s %u.%u.%u.%u\n",
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(route->ksnr_ipaddr));
+ LBUG();
+ }
+ }
+
+ route->ksnr_peer = peer;
+ ksocknal_peer_addref(peer);
+ /* peer's routelist takes over my ref on 'route' */
+ list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
+
+ list_for_each(tmp, &peer->ksnp_conns) {
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
+ continue;
+
+ ksocknal_associate_route_conn_locked(route, conn);
+ /* keep going (typed routes) */
+ }
+}
+
+void
+ksocknal_del_route_locked (ksock_route_t *route)
+{
+ ksock_peer_t *peer = route->ksnr_peer;
+ ksock_interface_t *iface;
+ ksock_conn_t *conn;
+ struct list_head *ctmp;
+ struct list_head *cnxt;
+
+ LASSERT (!route->ksnr_deleted);
+
+ /* Close associated conns */
+ list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
+ conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_route != route)
+ continue;
+
+ ksocknal_close_conn_locked (conn, 0);
+ }
+
+ if (route->ksnr_myipaddr != 0) {
+ iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
+ route->ksnr_myipaddr);
+ if (iface != NULL)
+ iface->ksni_nroutes--;
+ }
+
+ route->ksnr_deleted = 1;
+ list_del (&route->ksnr_list);
+ ksocknal_route_decref(route); /* drop peer's ref */
+
+ if (list_empty (&peer->ksnp_routes) &&
+ list_empty (&peer->ksnp_conns)) {
+ /* I've just removed the last route to a peer with no active
+ * connections */
+ ksocknal_unlink_peer_locked (peer);
+ }
+}
+
+int
+ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
+{
+ struct list_head *tmp;
+ ksock_peer_t *peer;
+ ksock_peer_t *peer2;
+ ksock_route_t *route;
+ ksock_route_t *route2;
+ int rc;
+
+ if (id.nid == LNET_NID_ANY ||
+ id.pid == LNET_PID_ANY)
+ return (-EINVAL);
+
+ /* Have a brand new peer ready... */
+ rc = ksocknal_create_peer(&peer, ni, id);
+ if (rc != 0)
+ return rc;
+
+ route = ksocknal_create_route (ipaddr, port);
+ if (route == NULL) {
+ ksocknal_peer_decref(peer);
+ return (-ENOMEM);
+ }
+
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+
+ /* always called with a ref on ni, so shutdown can't have started */
+ LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
+
+ peer2 = ksocknal_find_peer_locked (ni, id);
+ if (peer2 != NULL) {
+ ksocknal_peer_decref(peer);
+ peer = peer2;
+ } else {
+ /* peer table takes my ref on peer */
+ list_add_tail (&peer->ksnp_list,
+ ksocknal_nid2peerlist (id.nid));
+ }
+
+ route2 = NULL;
+ list_for_each (tmp, &peer->ksnp_routes) {
+ route2 = list_entry(tmp, ksock_route_t, ksnr_list);
+
+ if (route2->ksnr_ipaddr == ipaddr)
+ break;
+
+ route2 = NULL;
+ }
+ if (route2 == NULL) {
+ ksocknal_add_route_locked(peer, route);
+ route->ksnr_share_count++;
+ } else {
+ ksocknal_route_decref(route);
+ route2->ksnr_share_count++;
+ }
+
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+
+ return (0);
+}
+
+void
+ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip)
+{
+ ksock_conn_t *conn;
+ ksock_route_t *route;
+ struct list_head *tmp;
+ struct list_head *nxt;
+ int nshared;
+
+ LASSERT (!peer->ksnp_closing);
+
+ /* Extra ref prevents peer disappearing until I'm done with it */
+ ksocknal_peer_addref(peer);
+
+ list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
+ route = list_entry(tmp, ksock_route_t, ksnr_list);
+
+ /* no match */
+ if (!(ip == 0 || route->ksnr_ipaddr == ip))
+ continue;
+
+ route->ksnr_share_count = 0;
+ /* This deletes associated conns too */
+ ksocknal_del_route_locked (route);
+ }
+
+ nshared = 0;
+ list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
+ route = list_entry(tmp, ksock_route_t, ksnr_list);
+ nshared += route->ksnr_share_count;
+ }
+
+ if (nshared == 0) {
+ /* remove everything else if there are no explicit entries
+ * left */
+
+ list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
+ route = list_entry(tmp, ksock_route_t, ksnr_list);
+
+ /* we should only be removing auto-entries */
+ LASSERT(route->ksnr_share_count == 0);
+ ksocknal_del_route_locked (route);
+ }
+
+ list_for_each_safe (tmp, nxt, &peer->ksnp_conns) {
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+
+ ksocknal_close_conn_locked(conn, 0);
+ }
+ }
+
+ ksocknal_peer_decref(peer);
+ /* NB peer unlinks itself when last conn/route is removed */
+}
+
+int
+ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip)
+{
+ LIST_HEAD (zombies);
+ struct list_head *ptmp;
+ struct list_head *pnxt;
+ ksock_peer_t *peer;
+ int lo;
+ int hi;
+ int i;
+ int rc = -ENOENT;
+
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+
+ if (id.nid != LNET_NID_ANY)
+ lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
+ else {
+ lo = 0;
+ hi = ksocknal_data.ksnd_peer_hash_size - 1;
+ }
+
+ for (i = lo; i <= hi; i++) {
+ list_for_each_safe (ptmp, pnxt,
+ &ksocknal_data.ksnd_peers[i]) {
+ peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
+
+ if (peer->ksnp_ni != ni)
+ continue;
+
+ if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) &&
+ (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid)))
+ continue;
+
+ ksocknal_peer_addref(peer); /* a ref for me... */
+
+ ksocknal_del_peer_locked (peer, ip);
+
+ if (peer->ksnp_closing &&
+ !list_empty(&peer->ksnp_tx_queue)) {
+ LASSERT (list_empty(&peer->ksnp_conns));
+ LASSERT (list_empty(&peer->ksnp_routes));
+
+ list_splice_init(&peer->ksnp_tx_queue,
+ &zombies);
+ }
+
+ ksocknal_peer_decref(peer); /* ...till here */
+
+ rc = 0; /* matched! */
+ }
+ }
+
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+
+ ksocknal_txlist_done(ni, &zombies, 1);
+
+ return (rc);
+}
+
+ksock_conn_t *
+ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index)
+{
+ ksock_peer_t *peer;
+ struct list_head *ptmp;
+ ksock_conn_t *conn;
+ struct list_head *ctmp;
+ int i;
+
+ read_lock(&ksocknal_data.ksnd_global_lock);
+
+ for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
+ list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
+ peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
+
+ LASSERT (!peer->ksnp_closing);
+
+ if (peer->ksnp_ni != ni)
+ continue;
+
+ list_for_each (ctmp, &peer->ksnp_conns) {
+ if (index-- > 0)
+ continue;
+
+ conn = list_entry (ctmp, ksock_conn_t,
+ ksnc_list);
+ ksocknal_conn_addref(conn);
+ read_unlock(&ksocknal_data. \
+ ksnd_global_lock);
+ return (conn);
+ }
+ }
+ }
+
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+ return (NULL);
+}
+
+ksock_sched_t *
+ksocknal_choose_scheduler_locked(unsigned int cpt)
+{
+ struct ksock_sched_info *info = ksocknal_data.ksnd_sched_info[cpt];
+ ksock_sched_t *sched;
+ int i;
+
+ LASSERT(info->ksi_nthreads > 0);
+
+ sched = &info->ksi_scheds[0];
+ /*
+ * NB: it's safe so far, but info->ksi_nthreads could be changed
+ * at runtime when we have dynamic LNet configuration, then we
+ * need to take care of this.
+ */
+ for (i = 1; i < info->ksi_nthreads; i++) {
+ if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns)
+ sched = &info->ksi_scheds[i];
+ }
+
+ return sched;
+}
+
+int
+ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs)
+{
+ ksock_net_t *net = ni->ni_data;
+ int i;
+ int nip;
+
+ read_lock(&ksocknal_data.ksnd_global_lock);
+
+ nip = net->ksnn_ninterfaces;
+ LASSERT (nip <= LNET_MAX_INTERFACES);
+
+ /* Only offer interfaces for additional connections if I have
+ * more than one. */
+ if (nip < 2) {
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+ return 0;
+ }
+
+ for (i = 0; i < nip; i++) {
+ ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
+ LASSERT (ipaddrs[i] != 0);
+ }
+
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+ return (nip);
+}
+
+int
+ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips)
+{
+ int best_netmatch = 0;
+ int best_xor = 0;
+ int best = -1;
+ int this_xor;
+ int this_netmatch;
+ int i;
+
+ for (i = 0; i < nips; i++) {
+ if (ips[i] == 0)
+ continue;
+
+ this_xor = (ips[i] ^ iface->ksni_ipaddr);
+ this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0;
+
+ if (!(best < 0 ||
+ best_netmatch < this_netmatch ||
+ (best_netmatch == this_netmatch &&
+ best_xor > this_xor)))
+ continue;
+
+ best = i;
+ best_netmatch = this_netmatch;
+ best_xor = this_xor;
+ }
+
+ LASSERT (best >= 0);
+ return (best);
+}
+
+int
+ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
+{
+ rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
+ ksock_net_t *net = peer->ksnp_ni->ni_data;
+ ksock_interface_t *iface;
+ ksock_interface_t *best_iface;
+ int n_ips;
+ int i;
+ int j;
+ int k;
+ __u32 ip;
+ __u32 xor;
+ int this_netmatch;
+ int best_netmatch;
+ int best_npeers;
+
+ /* CAVEAT EMPTOR: We do all our interface matching with an
+ * exclusive hold of global lock at IRQ priority. We're only
+ * expecting to be dealing with small numbers of interfaces, so the
+ * O(n**3)-ness shouldn't matter */
+
+ /* Also note that I'm not going to return more than n_peerips
+ * interfaces, even if I have more myself */
+
+ write_lock_bh(global_lock);
+
+ LASSERT (n_peerips <= LNET_MAX_INTERFACES);
+ LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
+
+ /* Only match interfaces for additional connections
+ * if I have > 1 interface */
+ n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
+ MIN(n_peerips, net->ksnn_ninterfaces);
+
+ for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
+ /* ^ yes really... */
+
+ /* If we have any new interfaces, first tick off all the
+ * peer IPs that match old interfaces, then choose new
+ * interfaces to match the remaining peer IPS.
+ * We don't forget interfaces we've stopped using; we might
+ * start using them again... */
+
+ if (i < peer->ksnp_n_passive_ips) {
+ /* Old interface. */
+ ip = peer->ksnp_passive_ips[i];
+ best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
+
+ /* peer passive ips are kept up to date */
+ LASSERT(best_iface != NULL);
+ } else {
+ /* choose a new interface */
+ LASSERT (i == peer->ksnp_n_passive_ips);
+
+ best_iface = NULL;
+ best_netmatch = 0;
+ best_npeers = 0;
+
+ for (j = 0; j < net->ksnn_ninterfaces; j++) {
+ iface = &net->ksnn_interfaces[j];
+ ip = iface->ksni_ipaddr;
+
+ for (k = 0; k < peer->ksnp_n_passive_ips; k++)
+ if (peer->ksnp_passive_ips[k] == ip)
+ break;
+
+ if (k < peer->ksnp_n_passive_ips) /* using it already */
+ continue;
+
+ k = ksocknal_match_peerip(iface, peerips, n_peerips);
+ xor = (ip ^ peerips[k]);
+ this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0;
+
+ if (!(best_iface == NULL ||
+ best_netmatch < this_netmatch ||
+ (best_netmatch == this_netmatch &&
+ best_npeers > iface->ksni_npeers)))
+ continue;
+
+ best_iface = iface;
+ best_netmatch = this_netmatch;
+ best_npeers = iface->ksni_npeers;
+ }
+
+ best_iface->ksni_npeers++;
+ ip = best_iface->ksni_ipaddr;
+ peer->ksnp_passive_ips[i] = ip;
+ peer->ksnp_n_passive_ips = i+1;
+ }
+
+ LASSERT (best_iface != NULL);
+
+ /* mark the best matching peer IP used */
+ j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
+ peerips[j] = 0;
+ }
+
+ /* Overwrite input peer IP addresses */
+ memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
+
+ write_unlock_bh(global_lock);
+
+ return (n_ips);
+}
+
+void
+ksocknal_create_routes(ksock_peer_t *peer, int port,
+ __u32 *peer_ipaddrs, int npeer_ipaddrs)
+{
+ ksock_route_t *newroute = NULL;
+ rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
+ lnet_ni_t *ni = peer->ksnp_ni;
+ ksock_net_t *net = ni->ni_data;
+ struct list_head *rtmp;
+ ksock_route_t *route;
+ ksock_interface_t *iface;
+ ksock_interface_t *best_iface;
+ int best_netmatch;
+ int this_netmatch;
+ int best_nroutes;
+ int i;
+ int j;
+
+ /* CAVEAT EMPTOR: We do all our interface matching with an
+ * exclusive hold of global lock at IRQ priority. We're only
+ * expecting to be dealing with small numbers of interfaces, so the
+ * O(n**3)-ness here shouldn't matter */
+
+ write_lock_bh(global_lock);
+
+ if (net->ksnn_ninterfaces < 2) {
+ /* Only create additional connections
+ * if I have > 1 interface */
+ write_unlock_bh(global_lock);
+ return;
+ }
+
+ LASSERT (npeer_ipaddrs <= LNET_MAX_INTERFACES);
+
+ for (i = 0; i < npeer_ipaddrs; i++) {
+ if (newroute != NULL) {
+ newroute->ksnr_ipaddr = peer_ipaddrs[i];
+ } else {
+ write_unlock_bh(global_lock);
+
+ newroute = ksocknal_create_route(peer_ipaddrs[i], port);
+ if (newroute == NULL)
+ return;
+
+ write_lock_bh(global_lock);
+ }
+
+ if (peer->ksnp_closing) {
+ /* peer got closed under me */
+ break;
+ }
+
+ /* Already got a route? */
+ route = NULL;
+ list_for_each(rtmp, &peer->ksnp_routes) {
+ route = list_entry(rtmp, ksock_route_t, ksnr_list);
+
+ if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
+ break;
+
+ route = NULL;
+ }
+ if (route != NULL)
+ continue;
+
+ best_iface = NULL;
+ best_nroutes = 0;
+ best_netmatch = 0;
+
+ LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
+
+ /* Select interface to connect from */
+ for (j = 0; j < net->ksnn_ninterfaces; j++) {
+ iface = &net->ksnn_interfaces[j];
+
+ /* Using this interface already? */
+ list_for_each(rtmp, &peer->ksnp_routes) {
+ route = list_entry(rtmp, ksock_route_t,
+ ksnr_list);
+
+ if (route->ksnr_myipaddr == iface->ksni_ipaddr)
+ break;
+
+ route = NULL;
+ }
+ if (route != NULL)
+ continue;
+
+ this_netmatch = (((iface->ksni_ipaddr ^
+ newroute->ksnr_ipaddr) &
+ iface->ksni_netmask) == 0) ? 1 : 0;
+
+ if (!(best_iface == NULL ||
+ best_netmatch < this_netmatch ||
+ (best_netmatch == this_netmatch &&
+ best_nroutes > iface->ksni_nroutes)))
+ continue;
+
+ best_iface = iface;
+ best_netmatch = this_netmatch;
+ best_nroutes = iface->ksni_nroutes;
+ }
+
+ if (best_iface == NULL)
+ continue;
+
+ newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
+ best_iface->ksni_nroutes++;
+
+ ksocknal_add_route_locked(peer, newroute);
+ newroute = NULL;
+ }
+
+ write_unlock_bh(global_lock);
+ if (newroute != NULL)
+ ksocknal_route_decref(newroute);
+}
+
+int
+ksocknal_accept (lnet_ni_t *ni, socket_t *sock)
+{
+ ksock_connreq_t *cr;
+ int rc;
+ __u32 peer_ip;
+ int peer_port;
+
+ rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
+ LASSERT (rc == 0); /* we succeeded before */
+
+ LIBCFS_ALLOC(cr, sizeof(*cr));
+ if (cr == NULL) {
+ LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from "
+ "%u.%u.%u.%u: memory exhausted\n",
+ HIPQUAD(peer_ip));
+ return -ENOMEM;
+ }
+
+ lnet_ni_addref(ni);
+ cr->ksncr_ni = ni;
+ cr->ksncr_sock = sock;
+
+ spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
+
+ list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
+ wake_up(&ksocknal_data.ksnd_connd_waitq);
+
+ spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
+ return 0;
+}
+
+int
+ksocknal_connecting (ksock_peer_t *peer, __u32 ipaddr)
+{
+ ksock_route_t *route;
+
+ list_for_each_entry (route, &peer->ksnp_routes, ksnr_list) {
+
+ if (route->ksnr_ipaddr == ipaddr)
+ return route->ksnr_connecting;
+ }
+ return 0;
+}
+
+int
+ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
+ socket_t *sock, int type)
+{
+ rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
+ LIST_HEAD (zombies);
+ lnet_process_id_t peerid;
+ struct list_head *tmp;
+ __u64 incarnation;
+ ksock_conn_t *conn;
+ ksock_conn_t *conn2;
+ ksock_peer_t *peer = NULL;
+ ksock_peer_t *peer2;
+ ksock_sched_t *sched;
+ ksock_hello_msg_t *hello;
+ int cpt;
+ ksock_tx_t *tx;
+ ksock_tx_t *txtmp;
+ int rc;
+ int active;
+ char *warn = NULL;
+
+ active = (route != NULL);
+
+ LASSERT (active == (type != SOCKLND_CONN_NONE));
+
+ LIBCFS_ALLOC(conn, sizeof(*conn));
+ if (conn == NULL) {
+ rc = -ENOMEM;
+ goto failed_0;
+ }
+
+ memset (conn, 0, sizeof (*conn));
+
+ conn->ksnc_peer = NULL;
+ conn->ksnc_route = NULL;
+ conn->ksnc_sock = sock;
+ /* 2 ref, 1 for conn, another extra ref prevents socket
+ * being closed before establishment of connection */
+ atomic_set (&conn->ksnc_sock_refcount, 2);
+ conn->ksnc_type = type;
+ ksocknal_lib_save_callback(sock, conn);
+ atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
+
+ conn->ksnc_rx_ready = 0;
+ conn->ksnc_rx_scheduled = 0;
+
+ INIT_LIST_HEAD (&conn->ksnc_tx_queue);
+ conn->ksnc_tx_ready = 0;
+ conn->ksnc_tx_scheduled = 0;
+ conn->ksnc_tx_carrier = NULL;
+ atomic_set (&conn->ksnc_tx_nob, 0);
+
+ LIBCFS_ALLOC(hello, offsetof(ksock_hello_msg_t,
+ kshm_ips[LNET_MAX_INTERFACES]));
+ if (hello == NULL) {
+ rc = -ENOMEM;
+ goto failed_1;
+ }
+
+ /* stash conn's local and remote addrs */
+ rc = ksocknal_lib_get_conn_addrs (conn);
+ if (rc != 0)
+ goto failed_1;
+
+ /* Find out/confirm peer's NID and connection type and get the
+ * vector of interfaces she's willing to let me connect to.
+ * Passive connections use the listener timeout since the peer sends
+ * eagerly */
+
+ if (active) {
+ peer = route->ksnr_peer;
+ LASSERT(ni == peer->ksnp_ni);
+
+ /* Active connection sends HELLO eagerly */
+ hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
+ peerid = peer->ksnp_id;
+
+ write_lock_bh(global_lock);
+ conn->ksnc_proto = peer->ksnp_proto;
+ write_unlock_bh(global_lock);
+
+ if (conn->ksnc_proto == NULL) {
+ conn->ksnc_proto = &ksocknal_protocol_v3x;
+#if SOCKNAL_VERSION_DEBUG
+ if (*ksocknal_tunables.ksnd_protocol == 2)
+ conn->ksnc_proto = &ksocknal_protocol_v2x;
+ else if (*ksocknal_tunables.ksnd_protocol == 1)
+ conn->ksnc_proto = &ksocknal_protocol_v1x;
+#endif
+ }
+
+ rc = ksocknal_send_hello (ni, conn, peerid.nid, hello);
+ if (rc != 0)
+ goto failed_1;
+ } else {
+ peerid.nid = LNET_NID_ANY;
+ peerid.pid = LNET_PID_ANY;
+
+ /* Passive, get protocol from peer */
+ conn->ksnc_proto = NULL;
+ }
+
+ rc = ksocknal_recv_hello (ni, conn, hello, &peerid, &incarnation);
+ if (rc < 0)
+ goto failed_1;
+
+ LASSERT (rc == 0 || active);
+ LASSERT (conn->ksnc_proto != NULL);
+ LASSERT (peerid.nid != LNET_NID_ANY);
+
+ cpt = lnet_cpt_of_nid(peerid.nid);
+
+ if (active) {
+ ksocknal_peer_addref(peer);
+ write_lock_bh(global_lock);
+ } else {
+ rc = ksocknal_create_peer(&peer, ni, peerid);
+ if (rc != 0)
+ goto failed_1;
+
+ write_lock_bh(global_lock);
+
+ /* called with a ref on ni, so shutdown can't have started */
+ LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
+
+ peer2 = ksocknal_find_peer_locked(ni, peerid);
+ if (peer2 == NULL) {
+ /* NB this puts an "empty" peer in the peer
+ * table (which takes my ref) */
+ list_add_tail(&peer->ksnp_list,
+ ksocknal_nid2peerlist(peerid.nid));
+ } else {
+ ksocknal_peer_decref(peer);
+ peer = peer2;
+ }
+
+ /* +1 ref for me */
+ ksocknal_peer_addref(peer);
+ peer->ksnp_accepting++;
+
+ /* Am I already connecting to this guy? Resolve in
+ * favour of higher NID... */
+ if (peerid.nid < ni->ni_nid &&
+ ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
+ rc = EALREADY;
+ warn = "connection race resolution";
+ goto failed_2;
+ }
+ }
+
+ if (peer->ksnp_closing ||
+ (active && route->ksnr_deleted)) {
+ /* peer/route got closed under me */
+ rc = -ESTALE;
+ warn = "peer/route removed";
+ goto failed_2;
+ }
+
+ if (peer->ksnp_proto == NULL) {
+ /* Never connected before.
+ * NB recv_hello may have returned EPROTO to signal my peer
+ * wants a different protocol than the one I asked for.
+ */
+ LASSERT (list_empty(&peer->ksnp_conns));
+
+ peer->ksnp_proto = conn->ksnc_proto;
+ peer->ksnp_incarnation = incarnation;
+ }
+
+ if (peer->ksnp_proto != conn->ksnc_proto ||
+ peer->ksnp_incarnation != incarnation) {
+ /* Peer rebooted or I've got the wrong protocol version */
+ ksocknal_close_peer_conns_locked(peer, 0, 0);
+
+ peer->ksnp_proto = NULL;
+ rc = ESTALE;
+ warn = peer->ksnp_incarnation != incarnation ?
+ "peer rebooted" :
+ "wrong proto version";
+ goto failed_2;
+ }
+
+ switch (rc) {
+ default:
+ LBUG();
+ case 0:
+ break;
+ case EALREADY:
+ warn = "lost conn race";
+ goto failed_2;
+ case EPROTO:
+ warn = "retry with different protocol version";
+ goto failed_2;
+ }
+
+ /* Refuse to duplicate an existing connection, unless this is a
+ * loopback connection */
+ if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
+ list_for_each(tmp, &peer->ksnp_conns) {
+ conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
+
+ if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
+ conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
+ conn2->ksnc_type != conn->ksnc_type)
+ continue;
+
+ /* Reply on a passive connection attempt so the peer
+ * realises we're connected. */
+ LASSERT (rc == 0);
+ if (!active)
+ rc = EALREADY;
+
+ warn = "duplicate";
+ goto failed_2;
+ }
+ }
+
+ /* If the connection created by this route didn't bind to the IP
+ * address the route connected to, the connection/route matching
+ * code below probably isn't going to work. */
+ if (active &&
+ route->ksnr_ipaddr != conn->ksnc_ipaddr) {
+ CERROR("Route %s %u.%u.%u.%u connected to %u.%u.%u.%u\n",
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(route->ksnr_ipaddr),
+ HIPQUAD(conn->ksnc_ipaddr));
+ }
+
+ /* Search for a route corresponding to the new connection and
+ * create an association. This allows incoming connections created
+ * by routes in my peer to match my own route entries so I don't
+ * continually create duplicate routes. */
+ list_for_each (tmp, &peer->ksnp_routes) {
+ route = list_entry(tmp, ksock_route_t, ksnr_list);
+
+ if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
+ continue;
+
+ ksocknal_associate_route_conn_locked(route, conn);
+ break;
+ }
+
+ conn->ksnc_peer = peer; /* conn takes my ref on peer */
+ peer->ksnp_last_alive = cfs_time_current();
+ peer->ksnp_send_keepalive = 0;
+ peer->ksnp_error = 0;
+
+ sched = ksocknal_choose_scheduler_locked(cpt);
+ sched->kss_nconns++;
+ conn->ksnc_scheduler = sched;
+
+ conn->ksnc_tx_last_post = cfs_time_current();
+ /* Set the deadline for the outgoing HELLO to drain */
+ conn->ksnc_tx_bufnob = cfs_sock_wmem_queued(sock);
+ conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+ mb(); /* order with adding to peer's conn list */
+
+ list_add (&conn->ksnc_list, &peer->ksnp_conns);
+ ksocknal_conn_addref(conn);
+
+ ksocknal_new_packet(conn, 0);
+
+ conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
+
+ /* Take packets blocking for this connection. */
+ list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) {
+ if (conn->ksnc_proto->pro_match_tx(conn, tx, tx->tx_nonblk) == SOCKNAL_MATCH_NO)
+ continue;
+
+ list_del (&tx->tx_list);
+ ksocknal_queue_tx_locked (tx, conn);
+ }
+
+ write_unlock_bh(global_lock);
+
+ /* We've now got a new connection. Any errors from here on are just
+ * like "normal" comms errors and we close the connection normally.
+ * NB (a) we still have to send the reply HELLO for passive
+ * connections,
+ * (b) normal I/O on the conn is blocked until I setup and call the
+ * socket callbacks.
+ */
+
+ CDEBUG(D_NET, "New conn %s p %d.x %u.%u.%u.%u -> %u.%u.%u.%u/%d"
+ " incarnation:"LPD64" sched[%d:%d]\n",
+ libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
+ HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port, incarnation, cpt,
+ (int)(sched - &sched->kss_info->ksi_scheds[0]));
+
+ if (active) {
+ /* additional routes after interface exchange? */
+ ksocknal_create_routes(peer, conn->ksnc_port,
+ hello->kshm_ips, hello->kshm_nips);
+ } else {
+ hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
+ hello->kshm_nips);
+ rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
+ }
+
+ LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
+ kshm_ips[LNET_MAX_INTERFACES]));
+
+ /* setup the socket AFTER I've received hello (it disables
+ * SO_LINGER). I might call back to the acceptor who may want
+ * to send a protocol version response and then close the
+ * socket; this ensures the socket only tears down after the
+ * response has been sent. */
+ if (rc == 0)
+ rc = ksocknal_lib_setup_sock(sock);
+
+ write_lock_bh(global_lock);
+
+ /* NB my callbacks block while I hold ksnd_global_lock */
+ ksocknal_lib_set_callback(sock, conn);
+
+ if (!active)
+ peer->ksnp_accepting--;
+
+ write_unlock_bh(global_lock);
+
+ if (rc != 0) {
+ write_lock_bh(global_lock);
+ if (!conn->ksnc_closing) {
+ /* could be closed by another thread */
+ ksocknal_close_conn_locked(conn, rc);
+ }
+ write_unlock_bh(global_lock);
+ } else if (ksocknal_connsock_addref(conn) == 0) {
+ /* Allow I/O to proceed. */
+ ksocknal_read_callback(conn);
+ ksocknal_write_callback(conn);
+ ksocknal_connsock_decref(conn);
+ }
+
+ ksocknal_connsock_decref(conn);
+ ksocknal_conn_decref(conn);
+ return rc;
+
+ failed_2:
+ if (!peer->ksnp_closing &&
+ list_empty (&peer->ksnp_conns) &&
+ list_empty (&peer->ksnp_routes)) {
+ list_add(&zombies, &peer->ksnp_tx_queue);
+ list_del_init(&peer->ksnp_tx_queue);
+ ksocknal_unlink_peer_locked(peer);
+ }
+
+ write_unlock_bh(global_lock);
+
+ if (warn != NULL) {
+ if (rc < 0)
+ CERROR("Not creating conn %s type %d: %s\n",
+ libcfs_id2str(peerid), conn->ksnc_type, warn);
+ else
+ CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
+ libcfs_id2str(peerid), conn->ksnc_type, warn);
+ }
+
+ if (!active) {
+ if (rc > 0) {
+ /* Request retry by replying with CONN_NONE
+ * ksnc_proto has been set already */
+ conn->ksnc_type = SOCKLND_CONN_NONE;
+ hello->kshm_nips = 0;
+ ksocknal_send_hello(ni, conn, peerid.nid, hello);
+ }
+
+ write_lock_bh(global_lock);
+ peer->ksnp_accepting--;
+ write_unlock_bh(global_lock);
+ }
+
+ ksocknal_txlist_done(ni, &zombies, 1);
+ ksocknal_peer_decref(peer);
+
+ failed_1:
+ if (hello != NULL)
+ LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
+ kshm_ips[LNET_MAX_INTERFACES]));
+
+ LIBCFS_FREE (conn, sizeof(*conn));
+
+ failed_0:
+ libcfs_sock_release(sock);
+ return rc;
+}
+
+void
+ksocknal_close_conn_locked (ksock_conn_t *conn, int error)
+{
+ /* This just does the immmediate housekeeping, and queues the
+ * connection for the reaper to terminate.
+ * Caller holds ksnd_global_lock exclusively in irq context */
+ ksock_peer_t *peer = conn->ksnc_peer;
+ ksock_route_t *route;
+ ksock_conn_t *conn2;
+ struct list_head *tmp;
+
+ LASSERT (peer->ksnp_error == 0);
+ LASSERT (!conn->ksnc_closing);
+ conn->ksnc_closing = 1;
+
+ /* ksnd_deathrow_conns takes over peer's ref */
+ list_del (&conn->ksnc_list);
+
+ route = conn->ksnc_route;
+ if (route != NULL) {
+ /* dissociate conn from route... */
+ LASSERT (!route->ksnr_deleted);
+ LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0);
+
+ conn2 = NULL;
+ list_for_each(tmp, &peer->ksnp_conns) {
+ conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
+
+ if (conn2->ksnc_route == route &&
+ conn2->ksnc_type == conn->ksnc_type)
+ break;
+
+ conn2 = NULL;
+ }
+ if (conn2 == NULL)
+ route->ksnr_connected &= ~(1 << conn->ksnc_type);
+
+ conn->ksnc_route = NULL;
+
+#if 0 /* irrelevent with only eager routes */
+ /* make route least favourite */
+ list_del (&route->ksnr_list);
+ list_add_tail (&route->ksnr_list, &peer->ksnp_routes);
+#endif
+ ksocknal_route_decref(route); /* drop conn's ref on route */
+ }
+
+ if (list_empty (&peer->ksnp_conns)) {
+ /* No more connections to this peer */
+
+ if (!list_empty(&peer->ksnp_tx_queue)) {
+ ksock_tx_t *tx;
+
+ LASSERT (conn->ksnc_proto == &ksocknal_protocol_v3x);
+
+ /* throw them to the last connection...,
+ * these TXs will be send to /dev/null by scheduler */
+ list_for_each_entry(tx, &peer->ksnp_tx_queue,
+ tx_list)
+ ksocknal_tx_prep(conn, tx);
+
+ spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
+ list_splice_init(&peer->ksnp_tx_queue,
+ &conn->ksnc_tx_queue);
+ spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
+ }
+
+ peer->ksnp_proto = NULL; /* renegotiate protocol version */
+ peer->ksnp_error = error; /* stash last conn close reason */
+
+ if (list_empty (&peer->ksnp_routes)) {
+ /* I've just closed last conn belonging to a
+ * peer with no routes to it */
+ ksocknal_unlink_peer_locked (peer);
+ }
+ }
+
+ spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
+
+ list_add_tail(&conn->ksnc_list,
+ &ksocknal_data.ksnd_deathrow_conns);
+ wake_up(&ksocknal_data.ksnd_reaper_waitq);
+
+ spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
+}
+
+void
+ksocknal_peer_failed (ksock_peer_t *peer)
+{
+ int notify = 0;
+ cfs_time_t last_alive = 0;
+
+ /* There has been a connection failure or comms error; but I'll only
+ * tell LNET I think the peer is dead if it's to another kernel and
+ * there are no connections or connection attempts in existance. */
+
+ read_lock(&ksocknal_data.ksnd_global_lock);
+
+ if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 &&
+ list_empty(&peer->ksnp_conns) &&
+ peer->ksnp_accepting == 0 &&
+ ksocknal_find_connecting_route_locked(peer) == NULL) {
+ notify = 1;
+ last_alive = peer->ksnp_last_alive;
+ }
+
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+
+ if (notify)
+ lnet_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0,
+ last_alive);
+}
+
+void
+ksocknal_finalize_zcreq(ksock_conn_t *conn)
+{
+ ksock_peer_t *peer = conn->ksnc_peer;
+ ksock_tx_t *tx;
+ ksock_tx_t *tmp;
+ LIST_HEAD (zlist);
+
+ /* NB safe to finalize TXs because closing of socket will
+ * abort all buffered data */
+ LASSERT (conn->ksnc_sock == NULL);
+
+ spin_lock(&peer->ksnp_lock);
+
+ list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) {
+ if (tx->tx_conn != conn)
+ continue;
+
+ LASSERT (tx->tx_msg.ksm_zc_cookies[0] != 0);
+
+ tx->tx_msg.ksm_zc_cookies[0] = 0;
+ tx->tx_zc_aborted = 1; /* mark it as not-acked */
+ list_del(&tx->tx_zc_list);
+ list_add(&tx->tx_zc_list, &zlist);
+ }
+
+ spin_unlock(&peer->ksnp_lock);
+
+ while (!list_empty(&zlist)) {
+ tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
+
+ list_del(&tx->tx_zc_list);
+ ksocknal_tx_decref(tx);
+ }
+}
+
+void
+ksocknal_terminate_conn (ksock_conn_t *conn)
+{
+ /* This gets called by the reaper (guaranteed thread context) to
+ * disengage the socket from its callbacks and close it.
+ * ksnc_refcount will eventually hit zero, and then the reaper will
+ * destroy it. */
+ ksock_peer_t *peer = conn->ksnc_peer;
+ ksock_sched_t *sched = conn->ksnc_scheduler;
+ int failed = 0;
+
+ LASSERT(conn->ksnc_closing);
+
+ /* wake up the scheduler to "send" all remaining packets to /dev/null */
+ spin_lock_bh(&sched->kss_lock);
+
+ /* a closing conn is always ready to tx */
+ conn->ksnc_tx_ready = 1;
+
+ if (!conn->ksnc_tx_scheduled &&
+ !list_empty(&conn->ksnc_tx_queue)){
+ list_add_tail (&conn->ksnc_tx_list,
+ &sched->kss_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ /* extra ref for scheduler */
+ ksocknal_conn_addref(conn);
+
+ wake_up (&sched->kss_waitq);
+ }
+
+ spin_unlock_bh(&sched->kss_lock);
+
+ /* serialise with callbacks */
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+
+ ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
+
+ /* OK, so this conn may not be completely disengaged from its
+ * scheduler yet, but it _has_ committed to terminate... */
+ conn->ksnc_scheduler->kss_nconns--;
+
+ if (peer->ksnp_error != 0) {
+ /* peer's last conn closed in error */
+ LASSERT (list_empty (&peer->ksnp_conns));
+ failed = 1;
+ peer->ksnp_error = 0; /* avoid multiple notifications */
+ }
+
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+
+ if (failed)
+ ksocknal_peer_failed(peer);
+
+ /* The socket is closed on the final put; either here, or in
+ * ksocknal_{send,recv}msg(). Since we set up the linger2 option
+ * when the connection was established, this will close the socket
+ * immediately, aborting anything buffered in it. Any hung
+ * zero-copy transmits will therefore complete in finite time. */
+ ksocknal_connsock_decref(conn);
+}
+
+void
+ksocknal_queue_zombie_conn (ksock_conn_t *conn)
+{
+ /* Queue the conn for the reaper to destroy */
+
+ LASSERT(atomic_read(&conn->ksnc_conn_refcount) == 0);
+ spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
+
+ list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
+ wake_up(&ksocknal_data.ksnd_reaper_waitq);
+
+ spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
+}
+
+void
+ksocknal_destroy_conn (ksock_conn_t *conn)
+{
+ cfs_time_t last_rcv;
+
+ /* Final coup-de-grace of the reaper */
+ CDEBUG (D_NET, "connection %p\n", conn);
+
+ LASSERT (atomic_read (&conn->ksnc_conn_refcount) == 0);
+ LASSERT (atomic_read (&conn->ksnc_sock_refcount) == 0);
+ LASSERT (conn->ksnc_sock == NULL);
+ LASSERT (conn->ksnc_route == NULL);
+ LASSERT (!conn->ksnc_tx_scheduled);
+ LASSERT (!conn->ksnc_rx_scheduled);
+ LASSERT (list_empty(&conn->ksnc_tx_queue));
+
+ /* complete current receive if any */
+ switch (conn->ksnc_rx_state) {
+ case SOCKNAL_RX_LNET_PAYLOAD:
+ last_rcv = conn->ksnc_rx_deadline -
+ cfs_time_seconds(*ksocknal_tunables.ksnd_timeout);
+ CERROR("Completing partial receive from %s[%d]"
+ ", ip %d.%d.%d.%d:%d, with error, wanted: %d, left: %d, "
+ "last alive is %ld secs ago\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
+ HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port,
+ conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left,
+ cfs_duration_sec(cfs_time_sub(cfs_time_current(),
+ last_rcv)));
+ lnet_finalize (conn->ksnc_peer->ksnp_ni,
+ conn->ksnc_cookie, -EIO);
+ break;
+ case SOCKNAL_RX_LNET_HEADER:
+ if (conn->ksnc_rx_started)
+ CERROR("Incomplete receive of lnet header from %s"
+ ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port,
+ conn->ksnc_proto->pro_version);
+ break;
+ case SOCKNAL_RX_KSM_HEADER:
+ if (conn->ksnc_rx_started)
+ CERROR("Incomplete receive of ksock message from %s"
+ ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port,
+ conn->ksnc_proto->pro_version);
+ break;
+ case SOCKNAL_RX_SLOP:
+ if (conn->ksnc_rx_started)
+ CERROR("Incomplete receive of slops from %s"
+ ", ip %d.%d.%d.%d:%d, with error\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
+ break;
+ default:
+ LBUG ();
+ break;
+ }
+
+ ksocknal_peer_decref(conn->ksnc_peer);
+
+ LIBCFS_FREE (conn, sizeof (*conn));
+}
+
+int
+ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why)
+{
+ ksock_conn_t *conn;
+ struct list_head *ctmp;
+ struct list_head *cnxt;
+ int count = 0;
+
+ list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
+ conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
+
+ if (ipaddr == 0 ||
+ conn->ksnc_ipaddr == ipaddr) {
+ count++;
+ ksocknal_close_conn_locked (conn, why);
+ }
+ }
+
+ return (count);
+}
+
+int
+ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
+{
+ ksock_peer_t *peer = conn->ksnc_peer;
+ __u32 ipaddr = conn->ksnc_ipaddr;
+ int count;
+
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+
+ count = ksocknal_close_peer_conns_locked (peer, ipaddr, why);
+
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+
+ return (count);
+}
+
+int
+ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr)
+{
+ ksock_peer_t *peer;
+ struct list_head *ptmp;
+ struct list_head *pnxt;
+ int lo;
+ int hi;
+ int i;
+ int count = 0;
+
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+
+ if (id.nid != LNET_NID_ANY)
+ lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
+ else {
+ lo = 0;
+ hi = ksocknal_data.ksnd_peer_hash_size - 1;
+ }
+
+ for (i = lo; i <= hi; i++) {
+ list_for_each_safe (ptmp, pnxt,
+ &ksocknal_data.ksnd_peers[i]) {
+
+ peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
+
+ if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
+ (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
+ continue;
+
+ count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0);
+ }
+ }
+
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+
+ /* wildcards always succeed */
+ if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0)
+ return (0);
+
+ return (count == 0 ? -ENOENT : 0);
+}
+
+void
+ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive)
+{
+ /* The router is telling me she's been notified of a change in
+ * gateway state.... */
+ lnet_process_id_t id = {0};
+
+ id.nid = gw_nid;
+ id.pid = LNET_PID_ANY;
+
+ CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
+ alive ? "up" : "down");
+
+ if (!alive) {
+ /* If the gateway crashed, close all open connections... */
+ ksocknal_close_matching_conns (id, 0);
+ return;
+ }
+
+ /* ...otherwise do nothing. We can only establish new connections
+ * if we have autroutes, and these connect on demand. */
+}
+
+void
+ksocknal_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when)
+{
+ int connect = 1;
+ cfs_time_t last_alive = 0;
+ cfs_time_t now = cfs_time_current();
+ ksock_peer_t *peer = NULL;
+ rwlock_t *glock = &ksocknal_data.ksnd_global_lock;
+ lnet_process_id_t id = {.nid = nid, .pid = LUSTRE_SRV_LNET_PID};
+
+ read_lock(glock);
+
+ peer = ksocknal_find_peer_locked(ni, id);
+ if (peer != NULL) {
+ struct list_head *tmp;
+ ksock_conn_t *conn;
+ int bufnob;
+
+ list_for_each (tmp, &peer->ksnp_conns) {
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+ bufnob = cfs_sock_wmem_queued(conn->ksnc_sock);
+
+ if (bufnob < conn->ksnc_tx_bufnob) {
+ /* something got ACKed */
+ conn->ksnc_tx_deadline =
+ cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+ peer->ksnp_last_alive = now;
+ conn->ksnc_tx_bufnob = bufnob;
+ }
+ }
+
+ last_alive = peer->ksnp_last_alive;
+ if (ksocknal_find_connectable_route_locked(peer) == NULL)
+ connect = 0;
+ }
+
+ read_unlock(glock);
+
+ if (last_alive != 0)
+ *when = last_alive;
+
+ CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n",
+ libcfs_nid2str(nid), peer,
+ last_alive ? cfs_duration_sec(now - last_alive) : -1,
+ connect);
+
+ if (!connect)
+ return;
+
+ ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
+
+ write_lock_bh(glock);
+
+ peer = ksocknal_find_peer_locked(ni, id);
+ if (peer != NULL)
+ ksocknal_launch_all_connections_locked(peer);
+
+ write_unlock_bh(glock);
+ return;
+}
+
+void
+ksocknal_push_peer (ksock_peer_t *peer)
+{
+ int index;
+ int i;
+ struct list_head *tmp;
+ ksock_conn_t *conn;
+
+ for (index = 0; ; index++) {
+ read_lock(&ksocknal_data.ksnd_global_lock);
+
+ i = 0;
+ conn = NULL;
+
+ list_for_each (tmp, &peer->ksnp_conns) {
+ if (i++ == index) {
+ conn = list_entry (tmp, ksock_conn_t,
+ ksnc_list);
+ ksocknal_conn_addref(conn);
+ break;
+ }
+ }
+
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+
+ if (conn == NULL)
+ break;
+
+ ksocknal_lib_push_conn (conn);
+ ksocknal_conn_decref(conn);
+ }
+}
+
+int
+ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id)
+{
+ ksock_peer_t *peer;
+ struct list_head *tmp;
+ int index;
+ int i;
+ int j;
+ int rc = -ENOENT;
+
+ for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
+ for (j = 0; ; j++) {
+ read_lock(&ksocknal_data.ksnd_global_lock);
+
+ index = 0;
+ peer = NULL;
+
+ list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
+ peer = list_entry(tmp, ksock_peer_t,
+ ksnp_list);
+
+ if (!((id.nid == LNET_NID_ANY ||
+ id.nid == peer->ksnp_id.nid) &&
+ (id.pid == LNET_PID_ANY ||
+ id.pid == peer->ksnp_id.pid))) {
+ peer = NULL;
+ continue;
+ }
+
+ if (index++ == j) {
+ ksocknal_peer_addref(peer);
+ break;
+ }
+ }
+
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+
+ if (peer != NULL) {
+ rc = 0;
+ ksocknal_push_peer (peer);
+ ksocknal_peer_decref(peer);
+ }
+ }
+
+ }
+
+ return (rc);
+}
+
+int
+ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
+{
+ ksock_net_t *net = ni->ni_data;
+ ksock_interface_t *iface;
+ int rc;
+ int i;
+ int j;
+ struct list_head *ptmp;
+ ksock_peer_t *peer;
+ struct list_head *rtmp;
+ ksock_route_t *route;
+
+ if (ipaddress == 0 ||
+ netmask == 0)
+ return (-EINVAL);
+
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+
+ iface = ksocknal_ip2iface(ni, ipaddress);
+ if (iface != NULL) {
+ /* silently ignore dups */
+ rc = 0;
+ } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
+ rc = -ENOSPC;
+ } else {
+ iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
+
+ iface->ksni_ipaddr = ipaddress;
+ iface->ksni_netmask = netmask;
+ iface->ksni_nroutes = 0;
+ iface->ksni_npeers = 0;
+
+ for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
+ list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
+ peer = list_entry(ptmp, ksock_peer_t,
+ ksnp_list);
+
+ for (j = 0; j < peer->ksnp_n_passive_ips; j++)
+ if (peer->ksnp_passive_ips[j] == ipaddress)
+ iface->ksni_npeers++;
+
+ list_for_each(rtmp, &peer->ksnp_routes) {
+ route = list_entry(rtmp,
+ ksock_route_t,
+ ksnr_list);
+
+ if (route->ksnr_myipaddr == ipaddress)
+ iface->ksni_nroutes++;
+ }
+ }
+ }
+
+ rc = 0;
+ /* NB only new connections will pay attention to the new interface! */
+ }
+
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+
+ return (rc);
+}
+
+void
+ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
+{
+ struct list_head *tmp;
+ struct list_head *nxt;
+ ksock_route_t *route;
+ ksock_conn_t *conn;
+ int i;
+ int j;
+
+ for (i = 0; i < peer->ksnp_n_passive_ips; i++)
+ if (peer->ksnp_passive_ips[i] == ipaddr) {
+ for (j = i+1; j < peer->ksnp_n_passive_ips; j++)
+ peer->ksnp_passive_ips[j-1] =
+ peer->ksnp_passive_ips[j];
+ peer->ksnp_n_passive_ips--;
+ break;
+ }
+
+ list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
+ route = list_entry (tmp, ksock_route_t, ksnr_list);
+
+ if (route->ksnr_myipaddr != ipaddr)
+ continue;
+
+ if (route->ksnr_share_count != 0) {
+ /* Manually created; keep, but unbind */
+ route->ksnr_myipaddr = 0;
+ } else {
+ ksocknal_del_route_locked(route);
+ }
+ }
+
+ list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_myipaddr == ipaddr)
+ ksocknal_close_conn_locked (conn, 0);
+ }
+}
+
+int
+ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress)
+{
+ ksock_net_t *net = ni->ni_data;
+ int rc = -ENOENT;
+ struct list_head *tmp;
+ struct list_head *nxt;
+ ksock_peer_t *peer;
+ __u32 this_ip;
+ int i;
+ int j;
+
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+
+ for (i = 0; i < net->ksnn_ninterfaces; i++) {
+ this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
+
+ if (!(ipaddress == 0 ||
+ ipaddress == this_ip))
+ continue;
+
+ rc = 0;
+
+ for (j = i+1; j < net->ksnn_ninterfaces; j++)
+ net->ksnn_interfaces[j-1] =
+ net->ksnn_interfaces[j];
+
+ net->ksnn_ninterfaces--;
+
+ for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
+ list_for_each_safe(tmp, nxt,
+ &ksocknal_data.ksnd_peers[j]) {
+ peer = list_entry(tmp, ksock_peer_t,
+ ksnp_list);
+
+ if (peer->ksnp_ni != ni)
+ continue;
+
+ ksocknal_peer_del_interface_locked(peer, this_ip);
+ }
+ }
+ }
+
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+
+ return (rc);
+}
+
+int
+ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
+{
+ lnet_process_id_t id = {0};
+ struct libcfs_ioctl_data *data = arg;
+ int rc;
+
+ switch(cmd) {
+ case IOC_LIBCFS_GET_INTERFACE: {
+ ksock_net_t *net = ni->ni_data;
+ ksock_interface_t *iface;
+
+ read_lock(&ksocknal_data.ksnd_global_lock);
+
+ if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) {
+ rc = -ENOENT;
+ } else {
+ rc = 0;
+ iface = &net->ksnn_interfaces[data->ioc_count];
+
+ data->ioc_u32[0] = iface->ksni_ipaddr;
+ data->ioc_u32[1] = iface->ksni_netmask;
+ data->ioc_u32[2] = iface->ksni_npeers;
+ data->ioc_u32[3] = iface->ksni_nroutes;
+ }
+
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+ return rc;
+ }
+
+ case IOC_LIBCFS_ADD_INTERFACE:
+ return ksocknal_add_interface(ni,
+ data->ioc_u32[0], /* IP address */
+ data->ioc_u32[1]); /* net mask */
+
+ case IOC_LIBCFS_DEL_INTERFACE:
+ return ksocknal_del_interface(ni,
+ data->ioc_u32[0]); /* IP address */
+
+ case IOC_LIBCFS_GET_PEER: {
+ __u32 myip = 0;
+ __u32 ip = 0;
+ int port = 0;
+ int conn_count = 0;
+ int share_count = 0;
+
+ rc = ksocknal_get_peer_info(ni, data->ioc_count,
+ &id, &myip, &ip, &port,
+ &conn_count, &share_count);
+ if (rc != 0)
+ return rc;
+
+ data->ioc_nid = id.nid;
+ data->ioc_count = share_count;
+ data->ioc_u32[0] = ip;
+ data->ioc_u32[1] = port;
+ data->ioc_u32[2] = myip;
+ data->ioc_u32[3] = conn_count;
+ data->ioc_u32[4] = id.pid;
+ return 0;
+ }
+
+ case IOC_LIBCFS_ADD_PEER:
+ id.nid = data->ioc_nid;
+ id.pid = LUSTRE_SRV_LNET_PID;
+ return ksocknal_add_peer (ni, id,
+ data->ioc_u32[0], /* IP */
+ data->ioc_u32[1]); /* port */
+
+ case IOC_LIBCFS_DEL_PEER:
+ id.nid = data->ioc_nid;
+ id.pid = LNET_PID_ANY;
+ return ksocknal_del_peer (ni, id,
+ data->ioc_u32[0]); /* IP */
+
+ case IOC_LIBCFS_GET_CONN: {
+ int txmem;
+ int rxmem;
+ int nagle;
+ ksock_conn_t *conn = ksocknal_get_conn_by_idx (ni, data->ioc_count);
+
+ if (conn == NULL)
+ return -ENOENT;
+
+ ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
+
+ data->ioc_count = txmem;
+ data->ioc_nid = conn->ksnc_peer->ksnp_id.nid;
+ data->ioc_flags = nagle;
+ data->ioc_u32[0] = conn->ksnc_ipaddr;
+ data->ioc_u32[1] = conn->ksnc_port;
+ data->ioc_u32[2] = conn->ksnc_myipaddr;
+ data->ioc_u32[3] = conn->ksnc_type;
+ data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt;
+ data->ioc_u32[5] = rxmem;
+ data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
+ ksocknal_conn_decref(conn);
+ return 0;
+ }
+
+ case IOC_LIBCFS_CLOSE_CONNECTION:
+ id.nid = data->ioc_nid;
+ id.pid = LNET_PID_ANY;
+ return ksocknal_close_matching_conns (id,
+ data->ioc_u32[0]);
+
+ case IOC_LIBCFS_REGISTER_MYNID:
+ /* Ignore if this is a noop */
+ if (data->ioc_nid == ni->ni_nid)
+ return 0;
+
+ CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
+ libcfs_nid2str(data->ioc_nid),
+ libcfs_nid2str(ni->ni_nid));
+ return -EINVAL;
+
+ case IOC_LIBCFS_PUSH_CONNECTION:
+ id.nid = data->ioc_nid;
+ id.pid = LNET_PID_ANY;
+ return ksocknal_push(ni, id);
+
+ default:
+ return -EINVAL;
+ }
+ /* not reached */
+}
+
+void
+ksocknal_free_buffers (void)
+{
+ LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0);
+
+ if (ksocknal_data.ksnd_sched_info != NULL) {
+ struct ksock_sched_info *info;
+ int i;
+
+ cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
+ if (info->ksi_scheds != NULL) {
+ LIBCFS_FREE(info->ksi_scheds,
+ info->ksi_nthreads_max *
+ sizeof(info->ksi_scheds[0]));
+ }
+ }
+ cfs_percpt_free(ksocknal_data.ksnd_sched_info);
+ }
+
+ LIBCFS_FREE (ksocknal_data.ksnd_peers,
+ sizeof (struct list_head) *
+ ksocknal_data.ksnd_peer_hash_size);
+
+ spin_lock(&ksocknal_data.ksnd_tx_lock);
+
+ if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
+ struct list_head zlist;
+ ksock_tx_t *tx;
+
+ list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
+ list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
+ spin_unlock(&ksocknal_data.ksnd_tx_lock);
+
+ while (!list_empty(&zlist)) {
+ tx = list_entry(zlist.next, ksock_tx_t, tx_list);
+ list_del(&tx->tx_list);
+ LIBCFS_FREE(tx, tx->tx_desc_size);
+ }
+ } else {
+ spin_unlock(&ksocknal_data.ksnd_tx_lock);
+ }
+}
+
+void
+ksocknal_base_shutdown(void)
+{
+ struct ksock_sched_info *info;
+ ksock_sched_t *sched;
+ int i;
+ int j;
+
+ CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
+ atomic_read (&libcfs_kmemory));
+ LASSERT (ksocknal_data.ksnd_nnets == 0);
+
+ switch (ksocknal_data.ksnd_init) {
+ default:
+ LASSERT (0);
+
+ case SOCKNAL_INIT_ALL:
+ case SOCKNAL_INIT_DATA:
+ LASSERT (ksocknal_data.ksnd_peers != NULL);
+ for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
+ LASSERT (list_empty (&ksocknal_data.ksnd_peers[i]));
+ }
+
+ LASSERT(list_empty(&ksocknal_data.ksnd_nets));
+ LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns));
+ LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns));
+ LASSERT (list_empty (&ksocknal_data.ksnd_connd_connreqs));
+ LASSERT (list_empty (&ksocknal_data.ksnd_connd_routes));
+
+ if (ksocknal_data.ksnd_sched_info != NULL) {
+ cfs_percpt_for_each(info, i,
+ ksocknal_data.ksnd_sched_info) {
+ if (info->ksi_scheds == NULL)
+ continue;
+
+ for (j = 0; j < info->ksi_nthreads_max; j++) {
+
+ sched = &info->ksi_scheds[j];
+ LASSERT(list_empty(&sched->\
+ kss_tx_conns));
+ LASSERT(list_empty(&sched->\
+ kss_rx_conns));
+ LASSERT(list_empty(&sched-> \
+ kss_zombie_noop_txs));
+ LASSERT(sched->kss_nconns == 0);
+ }
+ }
+ }
+
+ /* flag threads to terminate; wake and wait for them to die */
+ ksocknal_data.ksnd_shuttingdown = 1;
+ wake_up_all(&ksocknal_data.ksnd_connd_waitq);
+ wake_up_all(&ksocknal_data.ksnd_reaper_waitq);
+
+ if (ksocknal_data.ksnd_sched_info != NULL) {
+ cfs_percpt_for_each(info, i,
+ ksocknal_data.ksnd_sched_info) {
+ if (info->ksi_scheds == NULL)
+ continue;
+
+ for (j = 0; j < info->ksi_nthreads_max; j++) {
+ sched = &info->ksi_scheds[j];
+ wake_up_all(&sched->kss_waitq);
+ }
+ }
+ }
+
+ i = 4;
+ read_lock(&ksocknal_data.ksnd_global_lock);
+ while (ksocknal_data.ksnd_nthreads != 0) {
+ i++;
+ CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+ "waiting for %d threads to terminate\n",
+ ksocknal_data.ksnd_nthreads);
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+ cfs_pause(cfs_time_seconds(1));
+ read_lock(&ksocknal_data.ksnd_global_lock);
+ }
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+
+ ksocknal_free_buffers();
+
+ ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
+ break;
+ }
+
+ CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
+ atomic_read (&libcfs_kmemory));
+
+ module_put(THIS_MODULE);
+}
+
+__u64
+ksocknal_new_incarnation (void)
+{
+ struct timeval tv;
+
+ /* The incarnation number is the time this module loaded and it
+ * identifies this particular instance of the socknal. Hopefully
+ * we won't be able to reboot more frequently than 1MHz for the
+ * forseeable future :) */
+
+ do_gettimeofday(&tv);
+
+ return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
+}
+
+int
+ksocknal_base_startup(void)
+{
+ struct ksock_sched_info *info;
+ int rc;
+ int i;
+
+ LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
+ LASSERT (ksocknal_data.ksnd_nnets == 0);
+
+ memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */
+
+ ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
+ LIBCFS_ALLOC (ksocknal_data.ksnd_peers,
+ sizeof (struct list_head) *
+ ksocknal_data.ksnd_peer_hash_size);
+ if (ksocknal_data.ksnd_peers == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
+
+ rwlock_init(&ksocknal_data.ksnd_global_lock);
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_nets);
+
+ spin_lock_init(&ksocknal_data.ksnd_reaper_lock);
+ INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
+ INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
+ INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
+ init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
+
+ spin_lock_init(&ksocknal_data.ksnd_connd_lock);
+ INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_connreqs);
+ INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_routes);
+ init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq);
+
+ spin_lock_init(&ksocknal_data.ksnd_tx_lock);
+ INIT_LIST_HEAD (&ksocknal_data.ksnd_idle_noop_txs);
+
+ /* NB memset above zeros whole of ksocknal_data */
+
+ /* flag lists/ptrs/locks initialised */
+ ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
+ try_module_get(THIS_MODULE);
+
+ ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(),
+ sizeof(*info));
+ if (ksocknal_data.ksnd_sched_info == NULL)
+ goto failed;
+
+ cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
+ ksock_sched_t *sched;
+ int nthrs;
+
+ nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
+ if (*ksocknal_tunables.ksnd_nscheds > 0) {
+ nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds);
+ } else {
+ /* max to half of CPUs, assume another half should be
+ * reserved for upper layer modules */
+ nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
+ }
+
+ info->ksi_nthreads_max = nthrs;
+ info->ksi_cpt = i;
+
+ LIBCFS_CPT_ALLOC(info->ksi_scheds, lnet_cpt_table(), i,
+ info->ksi_nthreads_max * sizeof(*sched));
+ if (info->ksi_scheds == NULL)
+ goto failed;
+
+ for (; nthrs > 0; nthrs--) {
+ sched = &info->ksi_scheds[nthrs - 1];
+
+ sched->kss_info = info;
+ spin_lock_init(&sched->kss_lock);
+ INIT_LIST_HEAD(&sched->kss_rx_conns);
+ INIT_LIST_HEAD(&sched->kss_tx_conns);
+ INIT_LIST_HEAD(&sched->kss_zombie_noop_txs);
+ init_waitqueue_head(&sched->kss_waitq);
+ }
+ }
+
+ ksocknal_data.ksnd_connd_starting = 0;
+ ksocknal_data.ksnd_connd_failed_stamp = 0;
+ ksocknal_data.ksnd_connd_starting_stamp = cfs_time_current_sec();
+ /* must have at least 2 connds to remain responsive to accepts while
+ * connecting */
+ if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1)
+ *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1;
+
+ if (*ksocknal_tunables.ksnd_nconnds_max <
+ *ksocknal_tunables.ksnd_nconnds) {
+ ksocknal_tunables.ksnd_nconnds_max =
+ ksocknal_tunables.ksnd_nconnds;
+ }
+
+ for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
+ char name[16];
+ spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
+ ksocknal_data.ksnd_connd_starting++;
+ spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
+
+
+ snprintf(name, sizeof(name), "socknal_cd%02d", i);
+ rc = ksocknal_thread_start(ksocknal_connd,
+ (void *)((ulong_ptr_t)i), name);
+ if (rc != 0) {
+ spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
+ ksocknal_data.ksnd_connd_starting--;
+ spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
+ CERROR("Can't spawn socknal connd: %d\n", rc);
+ goto failed;
+ }
+ }
+
+ rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper");
+ if (rc != 0) {
+ CERROR ("Can't spawn socknal reaper: %d\n", rc);
+ goto failed;
+ }
+
+ /* flag everything initialised */
+ ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
+
+ return 0;
+
+ failed:
+ ksocknal_base_shutdown();
+ return -ENETDOWN;
+}
+
+void
+ksocknal_debug_peerhash (lnet_ni_t *ni)
+{
+ ksock_peer_t *peer = NULL;
+ struct list_head *tmp;
+ int i;
+
+ read_lock(&ksocknal_data.ksnd_global_lock);
+
+ for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
+ list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
+ peer = list_entry (tmp, ksock_peer_t, ksnp_list);
+
+ if (peer->ksnp_ni == ni) break;
+
+ peer = NULL;
+ }
+ }
+
+ if (peer != NULL) {
+ ksock_route_t *route;
+ ksock_conn_t *conn;
+
+ CWARN ("Active peer on shutdown: %s, ref %d, scnt %d, "
+ "closing %d, accepting %d, err %d, zcookie "LPU64", "
+ "txq %d, zc_req %d\n", libcfs_id2str(peer->ksnp_id),
+ atomic_read(&peer->ksnp_refcount),
+ peer->ksnp_sharecount, peer->ksnp_closing,
+ peer->ksnp_accepting, peer->ksnp_error,
+ peer->ksnp_zc_next_cookie,
+ !list_empty(&peer->ksnp_tx_queue),
+ !list_empty(&peer->ksnp_zc_req_list));
+
+ list_for_each (tmp, &peer->ksnp_routes) {
+ route = list_entry(tmp, ksock_route_t, ksnr_list);
+ CWARN ("Route: ref %d, schd %d, conn %d, cnted %d, "
+ "del %d\n", atomic_read(&route->ksnr_refcount),
+ route->ksnr_scheduled, route->ksnr_connecting,
+ route->ksnr_connected, route->ksnr_deleted);
+ }
+
+ list_for_each (tmp, &peer->ksnp_conns) {
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+ CWARN ("Conn: ref %d, sref %d, t %d, c %d\n",
+ atomic_read(&conn->ksnc_conn_refcount),
+ atomic_read(&conn->ksnc_sock_refcount),
+ conn->ksnc_type, conn->ksnc_closing);
+ }
+ }
+
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+ return;
+}
+
+void
+ksocknal_shutdown (lnet_ni_t *ni)
+{
+ ksock_net_t *net = ni->ni_data;
+ int i;
+ lnet_process_id_t anyid = {0};
+
+ anyid.nid = LNET_NID_ANY;
+ anyid.pid = LNET_PID_ANY;
+
+ LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
+ LASSERT(ksocknal_data.ksnd_nnets > 0);
+
+ spin_lock_bh(&net->ksnn_lock);
+ net->ksnn_shutdown = 1; /* prevent new peers */
+ spin_unlock_bh(&net->ksnn_lock);
+
+ /* Delete all peers */
+ ksocknal_del_peer(ni, anyid, 0);
+
+ /* Wait for all peer state to clean up */
+ i = 2;
+ spin_lock_bh(&net->ksnn_lock);
+ while (net->ksnn_npeers != 0) {
+ spin_unlock_bh(&net->ksnn_lock);
+
+ i++;
+ CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+ "waiting for %d peers to disconnect\n",
+ net->ksnn_npeers);
+ cfs_pause(cfs_time_seconds(1));
+
+ ksocknal_debug_peerhash(ni);
+
+ spin_lock_bh(&net->ksnn_lock);
+ }
+ spin_unlock_bh(&net->ksnn_lock);
+
+ for (i = 0; i < net->ksnn_ninterfaces; i++) {
+ LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0);
+ LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0);
+ }
+
+ list_del(&net->ksnn_list);
+ LIBCFS_FREE(net, sizeof(*net));
+
+ ksocknal_data.ksnd_nnets--;
+ if (ksocknal_data.ksnd_nnets == 0)
+ ksocknal_base_shutdown();
+}
+
+int
+ksocknal_enumerate_interfaces(ksock_net_t *net)
+{
+ char **names;
+ int i;
+ int j;
+ int rc;
+ int n;
+
+ n = libcfs_ipif_enumerate(&names);
+ if (n <= 0) {
+ CERROR("Can't enumerate interfaces: %d\n", n);
+ return n;
+ }
+
+ for (i = j = 0; i < n; i++) {
+ int up;
+ __u32 ip;
+ __u32 mask;
+
+ if (!strcmp(names[i], "lo")) /* skip the loopback IF */
+ continue;
+
+ rc = libcfs_ipif_query(names[i], &up, &ip, &mask);
+ if (rc != 0) {
+ CWARN("Can't get interface %s info: %d\n",
+ names[i], rc);
+ continue;
+ }
+
+ if (!up) {
+ CWARN("Ignoring interface %s (down)\n",
+ names[i]);
+ continue;
+ }
+
+ if (j == LNET_MAX_INTERFACES) {
+ CWARN("Ignoring interface %s (too many interfaces)\n",
+ names[i]);
+ continue;
+ }
+
+ net->ksnn_interfaces[j].ksni_ipaddr = ip;
+ net->ksnn_interfaces[j].ksni_netmask = mask;
+ strncpy(&net->ksnn_interfaces[j].ksni_name[0],
+ names[i], IFNAMSIZ);
+ j++;
+ }
+
+ libcfs_ipif_free_enumeration(names, n);
+
+ if (j == 0)
+ CERROR("Can't find any usable interfaces\n");
+
+ return j;
+}
+
+int
+ksocknal_search_new_ipif(ksock_net_t *net)
+{
+ int new_ipif = 0;
+ int i;
+
+ for (i = 0; i < net->ksnn_ninterfaces; i++) {
+ char *ifnam = &net->ksnn_interfaces[i].ksni_name[0];
+ char *colon = strchr(ifnam, ':');
+ int found = 0;
+ ksock_net_t *tmp;
+ int j;
+
+ if (colon != NULL) /* ignore alias device */
+ *colon = 0;
+
+ list_for_each_entry(tmp, &ksocknal_data.ksnd_nets,
+ ksnn_list) {
+ for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) {
+ char *ifnam2 = &tmp->ksnn_interfaces[j].\
+ ksni_name[0];
+ char *colon2 = strchr(ifnam2, ':');
+
+ if (colon2 != NULL)
+ *colon2 = 0;
+
+ found = strcmp(ifnam, ifnam2) == 0;
+ if (colon2 != NULL)
+ *colon2 = ':';
+ }
+ if (found)
+ break;
+ }
+
+ new_ipif += !found;
+ if (colon != NULL)
+ *colon = ':';
+ }
+
+ return new_ipif;
+}
+
+int
+ksocknal_start_schedulers(struct ksock_sched_info *info)
+{
+ int nthrs;
+ int rc = 0;
+ int i;
+
+ if (info->ksi_nthreads == 0) {
+ if (*ksocknal_tunables.ksnd_nscheds > 0) {
+ nthrs = info->ksi_nthreads_max;
+ } else {
+ nthrs = cfs_cpt_weight(lnet_cpt_table(),
+ info->ksi_cpt);
+ nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
+ nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs);
+ }
+ nthrs = min(nthrs, info->ksi_nthreads_max);
+ } else {
+ LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max);
+ /* increase two threads if there is new interface */
+ nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads);
+ }
+
+ for (i = 0; i < nthrs; i++) {
+ long id;
+ char name[20];
+ ksock_sched_t *sched;
+ id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i);
+ sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
+ snprintf(name, sizeof(name), "socknal_sd%02d_%02d",
+ info->ksi_cpt, (int)(sched - &info->ksi_scheds[0]));
+
+ rc = ksocknal_thread_start(ksocknal_scheduler,
+ (void *)id, name);
+ if (rc == 0)
+ continue;
+
+ CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
+ info->ksi_cpt, info->ksi_nthreads + i, rc);
+ break;
+ }
+
+ info->ksi_nthreads += i;
+ return rc;
+}
+
+int
+ksocknal_net_start_threads(ksock_net_t *net, __u32 *cpts, int ncpts)
+{
+ int newif = ksocknal_search_new_ipif(net);
+ int rc;
+ int i;
+
+ LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table()));
+
+ for (i = 0; i < ncpts; i++) {
+ struct ksock_sched_info *info;
+ int cpt = (cpts == NULL) ? i : cpts[i];
+
+ LASSERT(cpt < cfs_cpt_number(lnet_cpt_table()));
+ info = ksocknal_data.ksnd_sched_info[cpt];
+
+ if (!newif && info->ksi_nthreads > 0)
+ continue;
+
+ rc = ksocknal_start_schedulers(info);
+ if (rc != 0)
+ return rc;
+ }
+ return 0;
+}
+
+int
+ksocknal_startup (lnet_ni_t *ni)
+{
+ ksock_net_t *net;
+ int rc;
+ int i;
+
+ LASSERT (ni->ni_lnd == &the_ksocklnd);
+
+ if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
+ rc = ksocknal_base_startup();
+ if (rc != 0)
+ return rc;
+ }
+
+ LIBCFS_ALLOC(net, sizeof(*net));
+ if (net == NULL)
+ goto fail_0;
+
+ spin_lock_init(&net->ksnn_lock);
+ net->ksnn_incarnation = ksocknal_new_incarnation();
+ ni->ni_data = net;
+ ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout;
+ ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits;
+ ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peertxcredits;
+ ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
+
+ if (ni->ni_interfaces[0] == NULL) {
+ rc = ksocknal_enumerate_interfaces(net);
+ if (rc <= 0)
+ goto fail_1;
+
+ net->ksnn_ninterfaces = 1;
+ } else {
+ for (i = 0; i < LNET_MAX_INTERFACES; i++) {
+ int up;
+
+ if (ni->ni_interfaces[i] == NULL)
+ break;
+
+ rc = libcfs_ipif_query(
+ ni->ni_interfaces[i], &up,
+ &net->ksnn_interfaces[i].ksni_ipaddr,
+ &net->ksnn_interfaces[i].ksni_netmask);
+
+ if (rc != 0) {
+ CERROR("Can't get interface %s info: %d\n",
+ ni->ni_interfaces[i], rc);
+ goto fail_1;
+ }
+
+ if (!up) {
+ CERROR("Interface %s is down\n",
+ ni->ni_interfaces[i]);
+ goto fail_1;
+ }
+
+ strncpy(&net->ksnn_interfaces[i].ksni_name[0],
+ ni->ni_interfaces[i], IFNAMSIZ);
+ }
+ net->ksnn_ninterfaces = i;
+ }
+
+ /* call it before add it to ksocknal_data.ksnd_nets */
+ rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
+ if (rc != 0)
+ goto fail_1;
+
+ ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
+ net->ksnn_interfaces[0].ksni_ipaddr);
+ list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
+
+ ksocknal_data.ksnd_nnets++;
+
+ return 0;
+
+ fail_1:
+ LIBCFS_FREE(net, sizeof(*net));
+ fail_0:
+ if (ksocknal_data.ksnd_nnets == 0)
+ ksocknal_base_shutdown();
+
+ return -ENETDOWN;
+}
+
+
+void __exit
+ksocknal_module_fini (void)
+{
+ lnet_unregister_lnd(&the_ksocklnd);
+ ksocknal_tunables_fini();
+}
+
+int __init
+ksocknal_module_init (void)
+{
+ int rc;
+
+ /* check ksnr_connected/connecting field large enough */
+ CLASSERT (SOCKLND_CONN_NTYPES <= 4);
+ CLASSERT (SOCKLND_CONN_ACK == SOCKLND_CONN_BULK_IN);
+
+ /* initialize the_ksocklnd */
+ the_ksocklnd.lnd_type = SOCKLND;
+ the_ksocklnd.lnd_startup = ksocknal_startup;
+ the_ksocklnd.lnd_shutdown = ksocknal_shutdown;
+ the_ksocklnd.lnd_ctl = ksocknal_ctl;
+ the_ksocklnd.lnd_send = ksocknal_send;
+ the_ksocklnd.lnd_recv = ksocknal_recv;
+ the_ksocklnd.lnd_notify = ksocknal_notify;
+ the_ksocklnd.lnd_query = ksocknal_query;
+ the_ksocklnd.lnd_accept = ksocknal_accept;
+
+ rc = ksocknal_tunables_init();
+ if (rc != 0)
+ return rc;
+
+ lnet_register_lnd(&the_ksocklnd);
+
+ return 0;
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Kernel TCP Socket LND v3.0.0");
+MODULE_LICENSE("GPL");
+
+cfs_module(ksocknal, "3.0.0", ksocknal_module_init, ksocknal_module_fini);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
new file mode 100644
index 000000000000..b483e0c3a69a
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
@@ -0,0 +1,602 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ *
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define DEBUG_PORTAL_ALLOC
+#define DEBUG_SUBSYSTEM S_LND
+
+#include "socklnd_lib-linux.h"
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/lnet.h>
+#include <linux/lnet/lib-lnet.h>
+#include <linux/lnet/socklnd.h>
+#include <linux/lnet/lnet-sysctl.h>
+
+#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */
+#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
+#define SOCKNAL_INSANITY_RECONN 5000 /* connd is trying on reconn infinitely */
+#define SOCKNAL_ENOMEM_RETRY CFS_TICK /* jiffies between retries */
+
+#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */
+#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */
+
+#define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */
+
+/* risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled).
+ * no risk if we're not running on a CONFIG_HIGHMEM platform. */
+#ifdef CONFIG_HIGHMEM
+# define SOCKNAL_RISK_KMAP_DEADLOCK 0
+#else
+# define SOCKNAL_RISK_KMAP_DEADLOCK 1
+#endif
+
+struct ksock_sched_info;
+
+typedef struct /* per scheduler state */
+{
+ spinlock_t kss_lock; /* serialise */
+ struct list_head kss_rx_conns; /* conn waiting to be read */
+ /* conn waiting to be written */
+ struct list_head kss_tx_conns;
+ /* zombie noop tx list */
+ struct list_head kss_zombie_noop_txs;
+ wait_queue_head_t kss_waitq; /* where scheduler sleeps */
+ /* # connections assigned to this scheduler */
+ int kss_nconns;
+ struct ksock_sched_info *kss_info; /* owner of it */
+ struct page *kss_rx_scratch_pgs[LNET_MAX_IOV];
+ struct iovec kss_scratch_iov[LNET_MAX_IOV];
+} ksock_sched_t;
+
+struct ksock_sched_info {
+ int ksi_nthreads_max; /* max allowed threads */
+ int ksi_nthreads; /* number of threads */
+ int ksi_cpt; /* CPT id */
+ ksock_sched_t *ksi_scheds; /* array of schedulers */
+};
+
+#define KSOCK_CPT_SHIFT 16
+#define KSOCK_THREAD_ID(cpt, sid) (((cpt) << KSOCK_CPT_SHIFT) | (sid))
+#define KSOCK_THREAD_CPT(id) ((id) >> KSOCK_CPT_SHIFT)
+#define KSOCK_THREAD_SID(id) ((id) & ((1UL << KSOCK_CPT_SHIFT) - 1))
+
+typedef struct /* in-use interface */
+{
+ __u32 ksni_ipaddr; /* interface's IP address */
+ __u32 ksni_netmask; /* interface's network mask */
+ int ksni_nroutes; /* # routes using (active) */
+ int ksni_npeers; /* # peers using (passive) */
+ char ksni_name[IFNAMSIZ]; /* interface name */
+} ksock_interface_t;
+
+typedef struct
+{
+ /* "stuck" socket timeout (seconds) */
+ int *ksnd_timeout;
+ /* # scheduler threads in each pool while starting */
+ int *ksnd_nscheds;
+ int *ksnd_nconnds; /* # connection daemons */
+ int *ksnd_nconnds_max; /* max # connection daemons */
+ int *ksnd_min_reconnectms; /* first connection retry after (ms)... */
+ int *ksnd_max_reconnectms; /* ...exponentially increasing to this */
+ int *ksnd_eager_ack; /* make TCP ack eagerly? */
+ int *ksnd_typed_conns; /* drive sockets by type? */
+ int *ksnd_min_bulk; /* smallest "large" message */
+ int *ksnd_tx_buffer_size; /* socket tx buffer size */
+ int *ksnd_rx_buffer_size; /* socket rx buffer size */
+ int *ksnd_nagle; /* enable NAGLE? */
+ int *ksnd_round_robin; /* round robin for multiple interfaces */
+ int *ksnd_keepalive; /* # secs for sending keepalive NOOP */
+ int *ksnd_keepalive_idle; /* # idle secs before 1st probe */
+ int *ksnd_keepalive_count; /* # probes */
+ int *ksnd_keepalive_intvl; /* time between probes */
+ int *ksnd_credits; /* # concurrent sends */
+ int *ksnd_peertxcredits; /* # concurrent sends to 1 peer */
+ int *ksnd_peerrtrcredits; /* # per-peer router buffer credits */
+ int *ksnd_peertimeout; /* seconds to consider peer dead */
+ int *ksnd_enable_csum; /* enable check sum */
+ int *ksnd_inject_csum_error; /* set non-zero to inject checksum error */
+ int *ksnd_nonblk_zcack; /* always send zc-ack on non-blocking connection */
+ unsigned int *ksnd_zc_min_payload; /* minimum zero copy payload size */
+ int *ksnd_zc_recv; /* enable ZC receive (for Chelsio TOE) */
+ int *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to enable ZC receive */
+#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
+ ctl_table_header_t *ksnd_sysctl; /* sysctl interface */
+#endif
+} ksock_tunables_t;
+
+typedef struct
+{
+ __u64 ksnn_incarnation; /* my epoch */
+ spinlock_t ksnn_lock; /* serialise */
+ struct list_head ksnn_list; /* chain on global list */
+ int ksnn_npeers; /* # peers */
+ int ksnn_shutdown; /* shutting down? */
+ int ksnn_ninterfaces; /* IP interfaces */
+ ksock_interface_t ksnn_interfaces[LNET_MAX_INTERFACES];
+} ksock_net_t;
+
+/** connd timeout */
+#define SOCKNAL_CONND_TIMEOUT 120
+/** reserved thread for accepting & creating new connd */
+#define SOCKNAL_CONND_RESV 1
+
+typedef struct
+{
+ int ksnd_init; /* initialisation state */
+ int ksnd_nnets; /* # networks set up */
+ struct list_head ksnd_nets; /* list of nets */
+ /* stabilize peer/conn ops */
+ rwlock_t ksnd_global_lock;
+ /* hash table of all my known peers */
+ struct list_head *ksnd_peers;
+ int ksnd_peer_hash_size; /* size of ksnd_peers */
+
+ int ksnd_nthreads; /* # live threads */
+ int ksnd_shuttingdown; /* tell threads to exit */
+ /* schedulers information */
+ struct ksock_sched_info **ksnd_sched_info;
+
+ atomic_t ksnd_nactive_txs; /* #active txs */
+
+ struct list_head ksnd_deathrow_conns; /* conns to close: reaper_lock*/
+ struct list_head ksnd_zombie_conns; /* conns to free: reaper_lock */
+ struct list_head ksnd_enomem_conns; /* conns to retry: reaper_lock*/
+ wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
+ cfs_time_t ksnd_reaper_waketime;/* when reaper will wake */
+ spinlock_t ksnd_reaper_lock; /* serialise */
+
+ int ksnd_enomem_tx; /* test ENOMEM sender */
+ int ksnd_stall_tx; /* test sluggish sender */
+ int ksnd_stall_rx; /* test sluggish receiver */
+
+ struct list_head ksnd_connd_connreqs; /* incoming connection requests */
+ struct list_head ksnd_connd_routes; /* routes waiting to be connected */
+ wait_queue_head_t ksnd_connd_waitq; /* connds sleep here */
+ int ksnd_connd_connecting;/* # connds connecting */
+ /** time stamp of the last failed connecting attempt */
+ long ksnd_connd_failed_stamp;
+ /** # starting connd */
+ unsigned ksnd_connd_starting;
+ /** time stamp of the last starting connd */
+ long ksnd_connd_starting_stamp;
+ /** # running connd */
+ unsigned ksnd_connd_running;
+ spinlock_t ksnd_connd_lock; /* serialise */
+
+ struct list_head ksnd_idle_noop_txs; /* list head for freed noop tx */
+ spinlock_t ksnd_tx_lock; /* serialise, g_lock unsafe */
+
+} ksock_nal_data_t;
+
+#define SOCKNAL_INIT_NOTHING 0
+#define SOCKNAL_INIT_DATA 1
+#define SOCKNAL_INIT_ALL 2
+
+/* A packet just assembled for transmission is represented by 1 or more
+ * struct iovec fragments (the first frag contains the portals header),
+ * followed by 0 or more lnet_kiov_t fragments.
+ *
+ * On the receive side, initially 1 struct iovec fragment is posted for
+ * receive (the header). Once the header has been received, the payload is
+ * received into either struct iovec or lnet_kiov_t fragments, depending on
+ * what the header matched or whether the message needs forwarding. */
+
+struct ksock_conn; /* forward ref */
+struct ksock_peer; /* forward ref */
+struct ksock_route; /* forward ref */
+struct ksock_proto; /* forward ref */
+
+typedef struct /* transmit packet */
+{
+ struct list_head tx_list; /* queue on conn for transmission etc */
+ struct list_head tx_zc_list; /* queue on peer for ZC request */
+ atomic_t tx_refcount; /* tx reference count */
+ int tx_nob; /* # packet bytes */
+ int tx_resid; /* residual bytes */
+ int tx_niov; /* # packet iovec frags */
+ struct iovec *tx_iov; /* packet iovec frags */
+ int tx_nkiov; /* # packet page frags */
+ unsigned short tx_zc_aborted; /* aborted ZC request */
+ unsigned short tx_zc_capable:1; /* payload is large enough for ZC */
+ unsigned short tx_zc_checked:1; /* Have I checked if I should ZC? */
+ unsigned short tx_nonblk:1; /* it's a non-blocking ACK */
+ lnet_kiov_t *tx_kiov; /* packet page frags */
+ struct ksock_conn *tx_conn; /* owning conn */
+ lnet_msg_t *tx_lnetmsg; /* lnet message for lnet_finalize() */
+ cfs_time_t tx_deadline; /* when (in jiffies) tx times out */
+ ksock_msg_t tx_msg; /* socklnd message buffer */
+ int tx_desc_size; /* size of this descriptor */
+ union {
+ struct {
+ struct iovec iov; /* virt hdr */
+ lnet_kiov_t kiov[0]; /* paged payload */
+ } paged;
+ struct {
+ struct iovec iov[1]; /* virt hdr + payload */
+ } virt;
+ } tx_frags;
+} ksock_tx_t;
+
+#define KSOCK_NOOP_TX_SIZE ((int)offsetof(ksock_tx_t, tx_frags.paged.kiov[0]))
+
+/* network zero copy callback descriptor embedded in ksock_tx_t */
+
+/* space for the rx frag descriptors; we either read a single contiguous
+ * header, or up to LNET_MAX_IOV frags of payload of either type. */
+typedef union {
+ struct iovec iov[LNET_MAX_IOV];
+ lnet_kiov_t kiov[LNET_MAX_IOV];
+} ksock_rxiovspace_t;
+
+#define SOCKNAL_RX_KSM_HEADER 1 /* reading ksock message header */
+#define SOCKNAL_RX_LNET_HEADER 2 /* reading lnet message header */
+#define SOCKNAL_RX_PARSE 3 /* Calling lnet_parse() */
+#define SOCKNAL_RX_PARSE_WAIT 4 /* waiting to be told to read the body */
+#define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */
+#define SOCKNAL_RX_SLOP 6 /* skipping body */
+
+typedef struct ksock_conn
+{
+ struct ksock_peer *ksnc_peer; /* owning peer */
+ struct ksock_route *ksnc_route; /* owning route */
+ struct list_head ksnc_list; /* stash on peer's conn list */
+ socket_t *ksnc_sock; /* actual socket */
+ void *ksnc_saved_data_ready; /* socket's original data_ready() callback */
+ void *ksnc_saved_write_space; /* socket's original write_space() callback */
+ atomic_t ksnc_conn_refcount; /* conn refcount */
+ atomic_t ksnc_sock_refcount; /* sock refcount */
+ ksock_sched_t *ksnc_scheduler; /* who schedules this connection */
+ __u32 ksnc_myipaddr; /* my IP */
+ __u32 ksnc_ipaddr; /* peer's IP */
+ int ksnc_port; /* peer's port */
+ signed int ksnc_type:3; /* type of connection,
+ * should be signed value */
+ unsigned int ksnc_closing:1; /* being shut down */
+ unsigned int ksnc_flip:1; /* flip or not, only for V2.x */
+ unsigned int ksnc_zc_capable:1; /* enable to ZC */
+ struct ksock_proto *ksnc_proto; /* protocol for the connection */
+
+ /* reader */
+ struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
+ cfs_time_t ksnc_rx_deadline; /* when (in jiffies) receive times out */
+ __u8 ksnc_rx_started; /* started receiving a message */
+ __u8 ksnc_rx_ready; /* data ready to read */
+ __u8 ksnc_rx_scheduled;/* being progressed */
+ __u8 ksnc_rx_state; /* what is being read */
+ int ksnc_rx_nob_left; /* # bytes to next hdr/body */
+ int ksnc_rx_nob_wanted; /* bytes actually wanted */
+ int ksnc_rx_niov; /* # iovec frags */
+ struct iovec *ksnc_rx_iov; /* the iovec frags */
+ int ksnc_rx_nkiov; /* # page frags */
+ lnet_kiov_t *ksnc_rx_kiov; /* the page frags */
+ ksock_rxiovspace_t ksnc_rx_iov_space;/* space for frag descriptors */
+ __u32 ksnc_rx_csum; /* partial checksum for incoming data */
+ void *ksnc_cookie; /* rx lnet_finalize passthru arg */
+ ksock_msg_t ksnc_msg; /* incoming message buffer:
+ * V2.x message takes the
+ * whole struct
+ * V1.x message is a bare
+ * lnet_hdr_t, it's stored in
+ * ksnc_msg.ksm_u.lnetmsg */
+
+ /* WRITER */
+ struct list_head ksnc_tx_list; /* where I enq waiting for output space */
+ struct list_head ksnc_tx_queue; /* packets waiting to be sent */
+ ksock_tx_t *ksnc_tx_carrier; /* next TX that can carry a LNet message or ZC-ACK */
+ cfs_time_t ksnc_tx_deadline; /* when (in jiffies) tx times out */
+ int ksnc_tx_bufnob; /* send buffer marker */
+ atomic_t ksnc_tx_nob; /* # bytes queued */
+ int ksnc_tx_ready; /* write space */
+ int ksnc_tx_scheduled; /* being progressed */
+ cfs_time_t ksnc_tx_last_post; /* time stamp of the last posted TX */
+} ksock_conn_t;
+
+typedef struct ksock_route
+{
+ struct list_head ksnr_list; /* chain on peer route list */
+ struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */
+ struct ksock_peer *ksnr_peer; /* owning peer */
+ atomic_t ksnr_refcount; /* # users */
+ cfs_time_t ksnr_timeout; /* when (in jiffies) reconnection can happen next */
+ cfs_duration_t ksnr_retry_interval; /* how long between retries */
+ __u32 ksnr_myipaddr; /* my IP */
+ __u32 ksnr_ipaddr; /* IP address to connect to */
+ int ksnr_port; /* port to connect to */
+ unsigned int ksnr_scheduled:1; /* scheduled for attention */
+ unsigned int ksnr_connecting:1;/* connection establishment in progress */
+ unsigned int ksnr_connected:4; /* connections established by type */
+ unsigned int ksnr_deleted:1; /* been removed from peer? */
+ unsigned int ksnr_share_count; /* created explicitly? */
+ int ksnr_conn_count; /* # conns established by this route */
+} ksock_route_t;
+
+#define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */
+
+typedef struct ksock_peer
+{
+ struct list_head ksnp_list; /* stash on global peer list */
+ cfs_time_t ksnp_last_alive; /* when (in jiffies) I was last alive */
+ lnet_process_id_t ksnp_id; /* who's on the other end(s) */
+ atomic_t ksnp_refcount; /* # users */
+ int ksnp_sharecount; /* lconf usage counter */
+ int ksnp_closing; /* being closed */
+ int ksnp_accepting;/* # passive connections pending */
+ int ksnp_error; /* errno on closing last conn */
+ __u64 ksnp_zc_next_cookie;/* ZC completion cookie */
+ __u64 ksnp_incarnation; /* latest known peer incarnation */
+ struct ksock_proto *ksnp_proto; /* latest known peer protocol */
+ struct list_head ksnp_conns; /* all active connections */
+ struct list_head ksnp_routes; /* routes */
+ struct list_head ksnp_tx_queue; /* waiting packets */
+ spinlock_t ksnp_lock; /* serialize, g_lock unsafe */
+ struct list_head ksnp_zc_req_list; /* zero copy requests wait for ACK */
+ cfs_time_t ksnp_send_keepalive; /* time to send keepalive */
+ lnet_ni_t *ksnp_ni; /* which network */
+ int ksnp_n_passive_ips; /* # of... */
+ __u32 ksnp_passive_ips[LNET_MAX_INTERFACES]; /* preferred local interfaces */
+} ksock_peer_t;
+
+typedef struct ksock_connreq
+{
+ struct list_head ksncr_list; /* stash on ksnd_connd_connreqs */
+ lnet_ni_t *ksncr_ni; /* chosen NI */
+ socket_t *ksncr_sock; /* accepted socket */
+} ksock_connreq_t;
+
+extern ksock_nal_data_t ksocknal_data;
+extern ksock_tunables_t ksocknal_tunables;
+
+#define SOCKNAL_MATCH_NO 0 /* TX can't match type of connection */
+#define SOCKNAL_MATCH_YES 1 /* TX matches type of connection */
+#define SOCKNAL_MATCH_MAY 2 /* TX can be sent on the connection, but not preferred */
+
+typedef struct ksock_proto
+{
+ int pro_version; /* version number of protocol */
+ int (*pro_send_hello)(ksock_conn_t *, ksock_hello_msg_t *); /* handshake function */
+ int (*pro_recv_hello)(ksock_conn_t *, ksock_hello_msg_t *, int);/* handshake function */
+ void (*pro_pack)(ksock_tx_t *); /* message pack */
+ void (*pro_unpack)(ksock_msg_t *); /* message unpack */
+ ksock_tx_t *(*pro_queue_tx_msg)(ksock_conn_t *, ksock_tx_t *); /* queue tx on the connection */
+ int (*pro_queue_tx_zcack)(ksock_conn_t *, ksock_tx_t *, __u64); /* queue ZC ack on the connection */
+ int (*pro_handle_zcreq)(ksock_conn_t *, __u64, int); /* handle ZC request */
+ int (*pro_handle_zcack)(ksock_conn_t *, __u64, __u64); /* handle ZC ACK */
+ int (*pro_match_tx)(ksock_conn_t *, ksock_tx_t *, int); /* msg type matches the connection type:
+ * return value:
+ * return MATCH_NO : no
+ * return MATCH_YES : matching type
+ * return MATCH_MAY : can be backup */
+} ksock_proto_t;
+
+extern ksock_proto_t ksocknal_protocol_v1x;
+extern ksock_proto_t ksocknal_protocol_v2x;
+extern ksock_proto_t ksocknal_protocol_v3x;
+
+#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR
+#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR
+#define KSOCK_PROTO_V1 KSOCK_PROTO_V1_MAJOR
+
+#ifndef CPU_MASK_NONE
+#define CPU_MASK_NONE 0UL
+#endif
+
+static inline int
+ksocknal_route_mask(void)
+{
+ if (!*ksocknal_tunables.ksnd_typed_conns)
+ return (1 << SOCKLND_CONN_ANY);
+
+ return ((1 << SOCKLND_CONN_CONTROL) |
+ (1 << SOCKLND_CONN_BULK_IN) |
+ (1 << SOCKLND_CONN_BULK_OUT));
+}
+
+static inline struct list_head *
+ksocknal_nid2peerlist (lnet_nid_t nid)
+{
+ unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
+
+ return (&ksocknal_data.ksnd_peers [hash]);
+}
+
+static inline void
+ksocknal_conn_addref (ksock_conn_t *conn)
+{
+ LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
+ atomic_inc(&conn->ksnc_conn_refcount);
+}
+
+extern void ksocknal_queue_zombie_conn (ksock_conn_t *conn);
+extern void ksocknal_finalize_zcreq(ksock_conn_t *conn);
+
+static inline void
+ksocknal_conn_decref (ksock_conn_t *conn)
+{
+ LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
+ if (atomic_dec_and_test(&conn->ksnc_conn_refcount))
+ ksocknal_queue_zombie_conn(conn);
+}
+
+static inline int
+ksocknal_connsock_addref (ksock_conn_t *conn)
+{
+ int rc = -ESHUTDOWN;
+
+ read_lock(&ksocknal_data.ksnd_global_lock);
+ if (!conn->ksnc_closing) {
+ LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
+ atomic_inc(&conn->ksnc_sock_refcount);
+ rc = 0;
+ }
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+
+ return (rc);
+}
+
+static inline void
+ksocknal_connsock_decref (ksock_conn_t *conn)
+{
+ LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0);
+ if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) {
+ LASSERT (conn->ksnc_closing);
+ libcfs_sock_release(conn->ksnc_sock);
+ conn->ksnc_sock = NULL;
+ ksocknal_finalize_zcreq(conn);
+ }
+}
+
+static inline void
+ksocknal_tx_addref (ksock_tx_t *tx)
+{
+ LASSERT (atomic_read(&tx->tx_refcount) > 0);
+ atomic_inc(&tx->tx_refcount);
+}
+
+extern void ksocknal_tx_prep (ksock_conn_t *, ksock_tx_t *tx);
+extern void ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx);
+
+static inline void
+ksocknal_tx_decref (ksock_tx_t *tx)
+{
+ LASSERT (atomic_read(&tx->tx_refcount) > 0);
+ if (atomic_dec_and_test(&tx->tx_refcount))
+ ksocknal_tx_done(NULL, tx);
+}
+
+static inline void
+ksocknal_route_addref (ksock_route_t *route)
+{
+ LASSERT (atomic_read(&route->ksnr_refcount) > 0);
+ atomic_inc(&route->ksnr_refcount);
+}
+
+extern void ksocknal_destroy_route (ksock_route_t *route);
+
+static inline void
+ksocknal_route_decref (ksock_route_t *route)
+{
+ LASSERT (atomic_read (&route->ksnr_refcount) > 0);
+ if (atomic_dec_and_test(&route->ksnr_refcount))
+ ksocknal_destroy_route (route);
+}
+
+static inline void
+ksocknal_peer_addref (ksock_peer_t *peer)
+{
+ LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
+ atomic_inc(&peer->ksnp_refcount);
+}
+
+extern void ksocknal_destroy_peer (ksock_peer_t *peer);
+
+static inline void
+ksocknal_peer_decref (ksock_peer_t *peer)
+{
+ LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
+ if (atomic_dec_and_test(&peer->ksnp_refcount))
+ ksocknal_destroy_peer (peer);
+}
+
+int ksocknal_startup (lnet_ni_t *ni);
+void ksocknal_shutdown (lnet_ni_t *ni);
+int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
+int ksocknal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
+int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
+ int delayed, unsigned int niov,
+ struct iovec *iov, lnet_kiov_t *kiov,
+ unsigned int offset, unsigned int mlen, unsigned int rlen);
+int ksocknal_accept(lnet_ni_t *ni, socket_t *sock);
+
+extern int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port);
+extern ksock_peer_t *ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id);
+extern ksock_peer_t *ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id);
+extern void ksocknal_peer_failed (ksock_peer_t *peer);
+extern int ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
+ socket_t *sock, int type);
+extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why);
+extern void ksocknal_terminate_conn (ksock_conn_t *conn);
+extern void ksocknal_destroy_conn (ksock_conn_t *conn);
+extern int ksocknal_close_peer_conns_locked (ksock_peer_t *peer,
+ __u32 ipaddr, int why);
+extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why);
+extern int ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr);
+extern ksock_conn_t *ksocknal_find_conn_locked(ksock_peer_t *peer,
+ ksock_tx_t *tx, int nonblk);
+
+extern int ksocknal_launch_packet(lnet_ni_t *ni, ksock_tx_t *tx,
+ lnet_process_id_t id);
+extern ksock_tx_t *ksocknal_alloc_tx(int type, int size);
+extern void ksocknal_free_tx (ksock_tx_t *tx);
+extern ksock_tx_t *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk);
+extern void ksocknal_next_tx_carrier(ksock_conn_t *conn);
+extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn);
+extern void ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist,
+ int error);
+extern void ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive);
+extern void ksocknal_query (struct lnet_ni *ni, lnet_nid_t nid, cfs_time_t *when);
+extern int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name);
+extern void ksocknal_thread_fini (void);
+extern void ksocknal_launch_all_connections_locked (ksock_peer_t *peer);
+extern ksock_route_t *ksocknal_find_connectable_route_locked (ksock_peer_t *peer);
+extern ksock_route_t *ksocknal_find_connecting_route_locked (ksock_peer_t *peer);
+extern int ksocknal_new_packet (ksock_conn_t *conn, int skip);
+extern int ksocknal_scheduler (void *arg);
+extern int ksocknal_connd (void *arg);
+extern int ksocknal_reaper (void *arg);
+extern int ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn,
+ lnet_nid_t peer_nid, ksock_hello_msg_t *hello);
+extern int ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
+ ksock_hello_msg_t *hello, lnet_process_id_t *id,
+ __u64 *incarnation);
+extern void ksocknal_read_callback(ksock_conn_t *conn);
+extern void ksocknal_write_callback(ksock_conn_t *conn);
+
+extern int ksocknal_lib_zc_capable(ksock_conn_t *conn);
+extern void ksocknal_lib_save_callback(socket_t *sock, ksock_conn_t *conn);
+extern void ksocknal_lib_set_callback(socket_t *sock, ksock_conn_t *conn);
+extern void ksocknal_lib_reset_callback(socket_t *sock, ksock_conn_t *conn);
+extern void ksocknal_lib_push_conn (ksock_conn_t *conn);
+extern int ksocknal_lib_get_conn_addrs (ksock_conn_t *conn);
+extern int ksocknal_lib_setup_sock (socket_t *so);
+extern int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx);
+extern int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx);
+extern void ksocknal_lib_eager_ack (ksock_conn_t *conn);
+extern int ksocknal_lib_recv_iov (ksock_conn_t *conn);
+extern int ksocknal_lib_recv_kiov (ksock_conn_t *conn);
+extern int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem,
+ int *rxmem, int *nagle);
+
+extern int ksocknal_tunables_init(void);
+extern void ksocknal_tunables_fini(void);
+extern int ksocknal_lib_tunables_init(void);
+extern void ksocknal_lib_tunables_fini(void);
+
+extern void ksocknal_lib_csum_tx(ksock_tx_t *tx);
+
+extern int ksocknal_lib_memory_pressure(ksock_conn_t *conn);
+extern int ksocknal_lib_bind_thread_to_cpu(int id);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
new file mode 100644
index 000000000000..ad5e24104238
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
@@ -0,0 +1,2664 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ *
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "socklnd.h"
+
+ksock_tx_t *
+ksocknal_alloc_tx(int type, int size)
+{
+ ksock_tx_t *tx = NULL;
+
+ if (type == KSOCK_MSG_NOOP) {
+ LASSERT(size == KSOCK_NOOP_TX_SIZE);
+
+ /* searching for a noop tx in free list */
+ spin_lock(&ksocknal_data.ksnd_tx_lock);
+
+ if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
+ tx = list_entry(ksocknal_data.ksnd_idle_noop_txs. \
+ next, ksock_tx_t, tx_list);
+ LASSERT(tx->tx_desc_size == size);
+ list_del(&tx->tx_list);
+ }
+
+ spin_unlock(&ksocknal_data.ksnd_tx_lock);
+ }
+
+ if (tx == NULL)
+ LIBCFS_ALLOC(tx, size);
+
+ if (tx == NULL)
+ return NULL;
+
+ atomic_set(&tx->tx_refcount, 1);
+ tx->tx_zc_aborted = 0;
+ tx->tx_zc_capable = 0;
+ tx->tx_zc_checked = 0;
+ tx->tx_desc_size = size;
+
+ atomic_inc(&ksocknal_data.ksnd_nactive_txs);
+
+ return tx;
+}
+
+ksock_tx_t *
+ksocknal_alloc_tx_noop(__u64 cookie, int nonblk)
+{
+ ksock_tx_t *tx;
+
+ tx = ksocknal_alloc_tx(KSOCK_MSG_NOOP, KSOCK_NOOP_TX_SIZE);
+ if (tx == NULL) {
+ CERROR("Can't allocate noop tx desc\n");
+ return NULL;
+ }
+
+ tx->tx_conn = NULL;
+ tx->tx_lnetmsg = NULL;
+ tx->tx_kiov = NULL;
+ tx->tx_nkiov = 0;
+ tx->tx_iov = tx->tx_frags.virt.iov;
+ tx->tx_niov = 1;
+ tx->tx_nonblk = nonblk;
+
+ socklnd_init_msg(&tx->tx_msg, KSOCK_MSG_NOOP);
+ tx->tx_msg.ksm_zc_cookies[1] = cookie;
+
+ return tx;
+}
+
+
+void
+ksocknal_free_tx (ksock_tx_t *tx)
+{
+ atomic_dec(&ksocknal_data.ksnd_nactive_txs);
+
+ if (tx->tx_lnetmsg == NULL && tx->tx_desc_size == KSOCK_NOOP_TX_SIZE) {
+ /* it's a noop tx */
+ spin_lock(&ksocknal_data.ksnd_tx_lock);
+
+ list_add(&tx->tx_list, &ksocknal_data.ksnd_idle_noop_txs);
+
+ spin_unlock(&ksocknal_data.ksnd_tx_lock);
+ } else {
+ LIBCFS_FREE(tx, tx->tx_desc_size);
+ }
+}
+
+int
+ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ struct iovec *iov = tx->tx_iov;
+ int nob;
+ int rc;
+
+ LASSERT (tx->tx_niov > 0);
+
+ /* Never touch tx->tx_iov inside ksocknal_lib_send_iov() */
+ rc = ksocknal_lib_send_iov(conn, tx);
+
+ if (rc <= 0) /* sent nothing? */
+ return (rc);
+
+ nob = rc;
+ LASSERT (nob <= tx->tx_resid);
+ tx->tx_resid -= nob;
+
+ /* "consume" iov */
+ do {
+ LASSERT (tx->tx_niov > 0);
+
+ if (nob < (int) iov->iov_len) {
+ iov->iov_base = (void *)((char *)iov->iov_base + nob);
+ iov->iov_len -= nob;
+ return (rc);
+ }
+
+ nob -= iov->iov_len;
+ tx->tx_iov = ++iov;
+ tx->tx_niov--;
+ } while (nob != 0);
+
+ return (rc);
+}
+
+int
+ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ lnet_kiov_t *kiov = tx->tx_kiov;
+ int nob;
+ int rc;
+
+ LASSERT (tx->tx_niov == 0);
+ LASSERT (tx->tx_nkiov > 0);
+
+ /* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */
+ rc = ksocknal_lib_send_kiov(conn, tx);
+
+ if (rc <= 0) /* sent nothing? */
+ return (rc);
+
+ nob = rc;
+ LASSERT (nob <= tx->tx_resid);
+ tx->tx_resid -= nob;
+
+ /* "consume" kiov */
+ do {
+ LASSERT(tx->tx_nkiov > 0);
+
+ if (nob < (int)kiov->kiov_len) {
+ kiov->kiov_offset += nob;
+ kiov->kiov_len -= nob;
+ return rc;
+ }
+
+ nob -= (int)kiov->kiov_len;
+ tx->tx_kiov = ++kiov;
+ tx->tx_nkiov--;
+ } while (nob != 0);
+
+ return (rc);
+}
+
+int
+ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ int rc;
+ int bufnob;
+
+ if (ksocknal_data.ksnd_stall_tx != 0) {
+ cfs_pause(cfs_time_seconds(ksocknal_data.ksnd_stall_tx));
+ }
+
+ LASSERT (tx->tx_resid != 0);
+
+ rc = ksocknal_connsock_addref(conn);
+ if (rc != 0) {
+ LASSERT (conn->ksnc_closing);
+ return (-ESHUTDOWN);
+ }
+
+ do {
+ if (ksocknal_data.ksnd_enomem_tx > 0) {
+ /* testing... */
+ ksocknal_data.ksnd_enomem_tx--;
+ rc = -EAGAIN;
+ } else if (tx->tx_niov != 0) {
+ rc = ksocknal_send_iov (conn, tx);
+ } else {
+ rc = ksocknal_send_kiov (conn, tx);
+ }
+
+ bufnob = cfs_sock_wmem_queued(conn->ksnc_sock);
+ if (rc > 0) /* sent something? */
+ conn->ksnc_tx_bufnob += rc; /* account it */
+
+ if (bufnob < conn->ksnc_tx_bufnob) {
+ /* allocated send buffer bytes < computed; infer
+ * something got ACKed */
+ conn->ksnc_tx_deadline =
+ cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+ conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
+ conn->ksnc_tx_bufnob = bufnob;
+ mb();
+ }
+
+ if (rc <= 0) { /* Didn't write anything? */
+
+ if (rc == 0) /* some stacks return 0 instead of -EAGAIN */
+ rc = -EAGAIN;
+
+ /* Check if EAGAIN is due to memory pressure */
+ if(rc == -EAGAIN && ksocknal_lib_memory_pressure(conn))
+ rc = -ENOMEM;
+
+ break;
+ }
+
+ /* socket's wmem_queued now includes 'rc' bytes */
+ atomic_sub (rc, &conn->ksnc_tx_nob);
+ rc = 0;
+
+ } while (tx->tx_resid != 0);
+
+ ksocknal_connsock_decref(conn);
+ return (rc);
+}
+
+int
+ksocknal_recv_iov (ksock_conn_t *conn)
+{
+ struct iovec *iov = conn->ksnc_rx_iov;
+ int nob;
+ int rc;
+
+ LASSERT (conn->ksnc_rx_niov > 0);
+
+ /* Never touch conn->ksnc_rx_iov or change connection
+ * status inside ksocknal_lib_recv_iov */
+ rc = ksocknal_lib_recv_iov(conn);
+
+ if (rc <= 0)
+ return (rc);
+
+ /* received something... */
+ nob = rc;
+
+ conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
+ conn->ksnc_rx_deadline =
+ cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+ mb(); /* order with setting rx_started */
+ conn->ksnc_rx_started = 1;
+
+ conn->ksnc_rx_nob_wanted -= nob;
+ conn->ksnc_rx_nob_left -= nob;
+
+ do {
+ LASSERT (conn->ksnc_rx_niov > 0);
+
+ if (nob < (int)iov->iov_len) {
+ iov->iov_len -= nob;
+ iov->iov_base = (void *)((char *)iov->iov_base + nob);
+ return (-EAGAIN);
+ }
+
+ nob -= iov->iov_len;
+ conn->ksnc_rx_iov = ++iov;
+ conn->ksnc_rx_niov--;
+ } while (nob != 0);
+
+ return (rc);
+}
+
+int
+ksocknal_recv_kiov (ksock_conn_t *conn)
+{
+ lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
+ int nob;
+ int rc;
+ LASSERT (conn->ksnc_rx_nkiov > 0);
+
+ /* Never touch conn->ksnc_rx_kiov or change connection
+ * status inside ksocknal_lib_recv_iov */
+ rc = ksocknal_lib_recv_kiov(conn);
+
+ if (rc <= 0)
+ return (rc);
+
+ /* received something... */
+ nob = rc;
+
+ conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
+ conn->ksnc_rx_deadline =
+ cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+ mb(); /* order with setting rx_started */
+ conn->ksnc_rx_started = 1;
+
+ conn->ksnc_rx_nob_wanted -= nob;
+ conn->ksnc_rx_nob_left -= nob;
+
+ do {
+ LASSERT (conn->ksnc_rx_nkiov > 0);
+
+ if (nob < (int) kiov->kiov_len) {
+ kiov->kiov_offset += nob;
+ kiov->kiov_len -= nob;
+ return -EAGAIN;
+ }
+
+ nob -= kiov->kiov_len;
+ conn->ksnc_rx_kiov = ++kiov;
+ conn->ksnc_rx_nkiov--;
+ } while (nob != 0);
+
+ return 1;
+}
+
+int
+ksocknal_receive (ksock_conn_t *conn)
+{
+ /* Return 1 on success, 0 on EOF, < 0 on error.
+ * Caller checks ksnc_rx_nob_wanted to determine
+ * progress/completion. */
+ int rc;
+ ENTRY;
+
+ if (ksocknal_data.ksnd_stall_rx != 0) {
+ cfs_pause(cfs_time_seconds (ksocknal_data.ksnd_stall_rx));
+ }
+
+ rc = ksocknal_connsock_addref(conn);
+ if (rc != 0) {
+ LASSERT (conn->ksnc_closing);
+ return (-ESHUTDOWN);
+ }
+
+ for (;;) {
+ if (conn->ksnc_rx_niov != 0)
+ rc = ksocknal_recv_iov (conn);
+ else
+ rc = ksocknal_recv_kiov (conn);
+
+ if (rc <= 0) {
+ /* error/EOF or partial receive */
+ if (rc == -EAGAIN) {
+ rc = 1;
+ } else if (rc == 0 && conn->ksnc_rx_started) {
+ /* EOF in the middle of a message */
+ rc = -EPROTO;
+ }
+ break;
+ }
+
+ /* Completed a fragment */
+
+ if (conn->ksnc_rx_nob_wanted == 0) {
+ rc = 1;
+ break;
+ }
+ }
+
+ ksocknal_connsock_decref(conn);
+ RETURN (rc);
+}
+
+void
+ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx)
+{
+ lnet_msg_t *lnetmsg = tx->tx_lnetmsg;
+ int rc = (tx->tx_resid == 0 && !tx->tx_zc_aborted) ? 0 : -EIO;
+ ENTRY;
+
+ LASSERT(ni != NULL || tx->tx_conn != NULL);
+
+ if (tx->tx_conn != NULL)
+ ksocknal_conn_decref(tx->tx_conn);
+
+ if (ni == NULL && tx->tx_conn != NULL)
+ ni = tx->tx_conn->ksnc_peer->ksnp_ni;
+
+ ksocknal_free_tx (tx);
+ if (lnetmsg != NULL) /* KSOCK_MSG_NOOP go without lnetmsg */
+ lnet_finalize (ni, lnetmsg, rc);
+
+ EXIT;
+}
+
+void
+ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int error)
+{
+ ksock_tx_t *tx;
+
+ while (!list_empty (txlist)) {
+ tx = list_entry (txlist->next, ksock_tx_t, tx_list);
+
+ if (error && tx->tx_lnetmsg != NULL) {
+ CNETERR("Deleting packet type %d len %d %s->%s\n",
+ le32_to_cpu (tx->tx_lnetmsg->msg_hdr.type),
+ le32_to_cpu (tx->tx_lnetmsg->msg_hdr.payload_length),
+ libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)),
+ libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.dest_nid)));
+ } else if (error) {
+ CNETERR("Deleting noop packet\n");
+ }
+
+ list_del (&tx->tx_list);
+
+ LASSERT (atomic_read(&tx->tx_refcount) == 1);
+ ksocknal_tx_done (ni, tx);
+ }
+}
+
+static void
+ksocknal_check_zc_req(ksock_tx_t *tx)
+{
+ ksock_conn_t *conn = tx->tx_conn;
+ ksock_peer_t *peer = conn->ksnc_peer;
+
+ /* Set tx_msg.ksm_zc_cookies[0] to a unique non-zero cookie and add tx
+ * to ksnp_zc_req_list if some fragment of this message should be sent
+ * zero-copy. Our peer will send an ACK containing this cookie when
+ * she has received this message to tell us we can signal completion.
+ * tx_msg.ksm_zc_cookies[0] remains non-zero while tx is on
+ * ksnp_zc_req_list. */
+ LASSERT (tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
+ LASSERT (tx->tx_zc_capable);
+
+ tx->tx_zc_checked = 1;
+
+ if (conn->ksnc_proto == &ksocknal_protocol_v1x ||
+ !conn->ksnc_zc_capable)
+ return;
+
+ /* assign cookie and queue tx to pending list, it will be released when
+ * a matching ack is received. See ksocknal_handle_zcack() */
+
+ ksocknal_tx_addref(tx);
+
+ spin_lock(&peer->ksnp_lock);
+
+ /* ZC_REQ is going to be pinned to the peer */
+ tx->tx_deadline =
+ cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+
+ LASSERT (tx->tx_msg.ksm_zc_cookies[0] == 0);
+
+ tx->tx_msg.ksm_zc_cookies[0] = peer->ksnp_zc_next_cookie++;
+
+ if (peer->ksnp_zc_next_cookie == 0)
+ peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
+
+ list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list);
+
+ spin_unlock(&peer->ksnp_lock);
+}
+
+static void
+ksocknal_uncheck_zc_req(ksock_tx_t *tx)
+{
+ ksock_peer_t *peer = tx->tx_conn->ksnc_peer;
+
+ LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
+ LASSERT(tx->tx_zc_capable);
+
+ tx->tx_zc_checked = 0;
+
+ spin_lock(&peer->ksnp_lock);
+
+ if (tx->tx_msg.ksm_zc_cookies[0] == 0) {
+ /* Not waiting for an ACK */
+ spin_unlock(&peer->ksnp_lock);
+ return;
+ }
+
+ tx->tx_msg.ksm_zc_cookies[0] = 0;
+ list_del(&tx->tx_zc_list);
+
+ spin_unlock(&peer->ksnp_lock);
+
+ ksocknal_tx_decref(tx);
+}
+
+int
+ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ int rc;
+
+ if (tx->tx_zc_capable && !tx->tx_zc_checked)
+ ksocknal_check_zc_req(tx);
+
+ rc = ksocknal_transmit (conn, tx);
+
+ CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc);
+
+ if (tx->tx_resid == 0) {
+ /* Sent everything OK */
+ LASSERT (rc == 0);
+
+ return (0);
+ }
+
+ if (rc == -EAGAIN)
+ return (rc);
+
+ if (rc == -ENOMEM) {
+ static int counter;
+
+ counter++; /* exponential backoff warnings */
+ if ((counter & (-counter)) == counter)
+ CWARN("%u ENOMEM tx %p (%u allocated)\n",
+ counter, conn, atomic_read(&libcfs_kmemory));
+
+ /* Queue on ksnd_enomem_conns for retry after a timeout */
+ spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
+
+ /* enomem list takes over scheduler's ref... */
+ LASSERT (conn->ksnc_tx_scheduled);
+ list_add_tail(&conn->ksnc_tx_list,
+ &ksocknal_data.ksnd_enomem_conns);
+ if (!cfs_time_aftereq(cfs_time_add(cfs_time_current(),
+ SOCKNAL_ENOMEM_RETRY),
+ ksocknal_data.ksnd_reaper_waketime))
+ wake_up (&ksocknal_data.ksnd_reaper_waitq);
+
+ spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
+ return (rc);
+ }
+
+ /* Actual error */
+ LASSERT (rc < 0);
+
+ if (!conn->ksnc_closing) {
+ switch (rc) {
+ case -ECONNRESET:
+ LCONSOLE_WARN("Host %u.%u.%u.%u reset our connection "
+ "while we were sending data; it may have "
+ "rebooted.\n",
+ HIPQUAD(conn->ksnc_ipaddr));
+ break;
+ default:
+ LCONSOLE_WARN("There was an unexpected network error "
+ "while writing to %u.%u.%u.%u: %d.\n",
+ HIPQUAD(conn->ksnc_ipaddr), rc);
+ break;
+ }
+ CDEBUG(D_NET, "[%p] Error %d on write to %s"
+ " ip %d.%d.%d.%d:%d\n", conn, rc,
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port);
+ }
+
+ if (tx->tx_zc_checked)
+ ksocknal_uncheck_zc_req(tx);
+
+ /* it's not an error if conn is being closed */
+ ksocknal_close_conn_and_siblings (conn,
+ (conn->ksnc_closing) ? 0 : rc);
+
+ return (rc);
+}
+
+void
+ksocknal_launch_connection_locked (ksock_route_t *route)
+{
+
+ /* called holding write lock on ksnd_global_lock */
+
+ LASSERT (!route->ksnr_scheduled);
+ LASSERT (!route->ksnr_connecting);
+ LASSERT ((ksocknal_route_mask() & ~route->ksnr_connected) != 0);
+
+ route->ksnr_scheduled = 1; /* scheduling conn for connd */
+ ksocknal_route_addref(route); /* extra ref for connd */
+
+ spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
+
+ list_add_tail(&route->ksnr_connd_list,
+ &ksocknal_data.ksnd_connd_routes);
+ wake_up(&ksocknal_data.ksnd_connd_waitq);
+
+ spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
+}
+
+void
+ksocknal_launch_all_connections_locked (ksock_peer_t *peer)
+{
+ ksock_route_t *route;
+
+ /* called holding write lock on ksnd_global_lock */
+ for (;;) {
+ /* launch any/all connections that need it */
+ route = ksocknal_find_connectable_route_locked(peer);
+ if (route == NULL)
+ return;
+
+ ksocknal_launch_connection_locked(route);
+ }
+}
+
+ksock_conn_t *
+ksocknal_find_conn_locked(ksock_peer_t *peer, ksock_tx_t *tx, int nonblk)
+{
+ struct list_head *tmp;
+ ksock_conn_t *conn;
+ ksock_conn_t *typed = NULL;
+ ksock_conn_t *fallback = NULL;
+ int tnob = 0;
+ int fnob = 0;
+
+ list_for_each (tmp, &peer->ksnp_conns) {
+ ksock_conn_t *c = list_entry(tmp, ksock_conn_t, ksnc_list);
+ int nob = atomic_read(&c->ksnc_tx_nob) +
+ cfs_sock_wmem_queued(c->ksnc_sock);
+ int rc;
+
+ LASSERT (!c->ksnc_closing);
+ LASSERT (c->ksnc_proto != NULL &&
+ c->ksnc_proto->pro_match_tx != NULL);
+
+ rc = c->ksnc_proto->pro_match_tx(c, tx, nonblk);
+
+ switch (rc) {
+ default:
+ LBUG();
+ case SOCKNAL_MATCH_NO: /* protocol rejected the tx */
+ continue;
+
+ case SOCKNAL_MATCH_YES: /* typed connection */
+ if (typed == NULL || tnob > nob ||
+ (tnob == nob && *ksocknal_tunables.ksnd_round_robin &&
+ cfs_time_after(typed->ksnc_tx_last_post, c->ksnc_tx_last_post))) {
+ typed = c;
+ tnob = nob;
+ }
+ break;
+
+ case SOCKNAL_MATCH_MAY: /* fallback connection */
+ if (fallback == NULL || fnob > nob ||
+ (fnob == nob && *ksocknal_tunables.ksnd_round_robin &&
+ cfs_time_after(fallback->ksnc_tx_last_post, c->ksnc_tx_last_post))) {
+ fallback = c;
+ fnob = nob;
+ }
+ break;
+ }
+ }
+
+ /* prefer the typed selection */
+ conn = (typed != NULL) ? typed : fallback;
+
+ if (conn != NULL)
+ conn->ksnc_tx_last_post = cfs_time_current();
+
+ return conn;
+}
+
+void
+ksocknal_tx_prep(ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ conn->ksnc_proto->pro_pack(tx);
+
+ atomic_add (tx->tx_nob, &conn->ksnc_tx_nob);
+ ksocknal_conn_addref(conn); /* +1 ref for tx */
+ tx->tx_conn = conn;
+}
+
+void
+ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
+{
+ ksock_sched_t *sched = conn->ksnc_scheduler;
+ ksock_msg_t *msg = &tx->tx_msg;
+ ksock_tx_t *ztx = NULL;
+ int bufnob = 0;
+
+ /* called holding global lock (read or irq-write) and caller may
+ * not have dropped this lock between finding conn and calling me,
+ * so we don't need the {get,put}connsock dance to deref
+ * ksnc_sock... */
+ LASSERT(!conn->ksnc_closing);
+
+ CDEBUG (D_NET, "Sending to %s ip %d.%d.%d.%d:%d\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port);
+
+ ksocknal_tx_prep(conn, tx);
+
+ /* Ensure the frags we've been given EXACTLY match the number of
+ * bytes we want to send. Many TCP/IP stacks disregard any total
+ * size parameters passed to them and just look at the frags.
+ *
+ * We always expect at least 1 mapped fragment containing the
+ * complete ksocknal message header. */
+ LASSERT (lnet_iov_nob (tx->tx_niov, tx->tx_iov) +
+ lnet_kiov_nob(tx->tx_nkiov, tx->tx_kiov) ==
+ (unsigned int)tx->tx_nob);
+ LASSERT (tx->tx_niov >= 1);
+ LASSERT (tx->tx_resid == tx->tx_nob);
+
+ CDEBUG (D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n",
+ tx, (tx->tx_lnetmsg != NULL) ? tx->tx_lnetmsg->msg_hdr.type:
+ KSOCK_MSG_NOOP,
+ tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
+
+ /*
+ * FIXME: SOCK_WMEM_QUEUED and SOCK_ERROR could block in __DARWIN8__
+ * but they're used inside spinlocks a lot.
+ */
+ bufnob = cfs_sock_wmem_queued(conn->ksnc_sock);
+ spin_lock_bh(&sched->kss_lock);
+
+ if (list_empty(&conn->ksnc_tx_queue) && bufnob == 0) {
+ /* First packet starts the timeout */
+ conn->ksnc_tx_deadline =
+ cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+ if (conn->ksnc_tx_bufnob > 0) /* something got ACKed */
+ conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
+ conn->ksnc_tx_bufnob = 0;
+ mb(); /* order with adding to tx_queue */
+ }
+
+ if (msg->ksm_type == KSOCK_MSG_NOOP) {
+ /* The packet is noop ZC ACK, try to piggyback the ack_cookie
+ * on a normal packet so I don't need to send it */
+ LASSERT (msg->ksm_zc_cookies[1] != 0);
+ LASSERT (conn->ksnc_proto->pro_queue_tx_zcack != NULL);
+
+ if (conn->ksnc_proto->pro_queue_tx_zcack(conn, tx, 0))
+ ztx = tx; /* ZC ACK piggybacked on ztx release tx later */
+
+ } else {
+ /* It's a normal packet - can it piggback a noop zc-ack that
+ * has been queued already? */
+ LASSERT (msg->ksm_zc_cookies[1] == 0);
+ LASSERT (conn->ksnc_proto->pro_queue_tx_msg != NULL);
+
+ ztx = conn->ksnc_proto->pro_queue_tx_msg(conn, tx);
+ /* ztx will be released later */
+ }
+
+ if (ztx != NULL) {
+ atomic_sub (ztx->tx_nob, &conn->ksnc_tx_nob);
+ list_add_tail(&ztx->tx_list, &sched->kss_zombie_noop_txs);
+ }
+
+ if (conn->ksnc_tx_ready && /* able to send */
+ !conn->ksnc_tx_scheduled) { /* not scheduled to send */
+ /* +1 ref for scheduler */
+ ksocknal_conn_addref(conn);
+ list_add_tail (&conn->ksnc_tx_list,
+ &sched->kss_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ wake_up (&sched->kss_waitq);
+ }
+
+ spin_unlock_bh(&sched->kss_lock);
+}
+
+
+ksock_route_t *
+ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
+{
+ cfs_time_t now = cfs_time_current();
+ struct list_head *tmp;
+ ksock_route_t *route;
+
+ list_for_each (tmp, &peer->ksnp_routes) {
+ route = list_entry (tmp, ksock_route_t, ksnr_list);
+
+ LASSERT (!route->ksnr_connecting || route->ksnr_scheduled);
+
+ if (route->ksnr_scheduled) /* connections being established */
+ continue;
+
+ /* all route types connected ? */
+ if ((ksocknal_route_mask() & ~route->ksnr_connected) == 0)
+ continue;
+
+ if (!(route->ksnr_retry_interval == 0 || /* first attempt */
+ cfs_time_aftereq(now, route->ksnr_timeout))) {
+ CDEBUG(D_NET,
+ "Too soon to retry route %u.%u.%u.%u "
+ "(cnted %d, interval %ld, %ld secs later)\n",
+ HIPQUAD(route->ksnr_ipaddr),
+ route->ksnr_connected,
+ route->ksnr_retry_interval,
+ cfs_duration_sec(route->ksnr_timeout - now));
+ continue;
+ }
+
+ return (route);
+ }
+
+ return (NULL);
+}
+
+ksock_route_t *
+ksocknal_find_connecting_route_locked (ksock_peer_t *peer)
+{
+ struct list_head *tmp;
+ ksock_route_t *route;
+
+ list_for_each (tmp, &peer->ksnp_routes) {
+ route = list_entry (tmp, ksock_route_t, ksnr_list);
+
+ LASSERT (!route->ksnr_connecting || route->ksnr_scheduled);
+
+ if (route->ksnr_scheduled)
+ return (route);
+ }
+
+ return (NULL);
+}
+
+int
+ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id)
+{
+ ksock_peer_t *peer;
+ ksock_conn_t *conn;
+ rwlock_t *g_lock;
+ int retry;
+ int rc;
+
+ LASSERT (tx->tx_conn == NULL);
+
+ g_lock = &ksocknal_data.ksnd_global_lock;
+
+ for (retry = 0;; retry = 1) {
+ read_lock(g_lock);
+ peer = ksocknal_find_peer_locked(ni, id);
+ if (peer != NULL) {
+ if (ksocknal_find_connectable_route_locked(peer) == NULL) {
+ conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
+ if (conn != NULL) {
+ /* I've got no routes that need to be
+ * connecting and I do have an actual
+ * connection... */
+ ksocknal_queue_tx_locked (tx, conn);
+ read_unlock(g_lock);
+ return (0);
+ }
+ }
+ }
+
+ /* I'll need a write lock... */
+ read_unlock(g_lock);
+
+ write_lock_bh(g_lock);
+
+ peer = ksocknal_find_peer_locked(ni, id);
+ if (peer != NULL)
+ break;
+
+ write_unlock_bh(g_lock);
+
+ if ((id.pid & LNET_PID_USERFLAG) != 0) {
+ CERROR("Refusing to create a connection to "
+ "userspace process %s\n", libcfs_id2str(id));
+ return -EHOSTUNREACH;
+ }
+
+ if (retry) {
+ CERROR("Can't find peer %s\n", libcfs_id2str(id));
+ return -EHOSTUNREACH;
+ }
+
+ rc = ksocknal_add_peer(ni, id,
+ LNET_NIDADDR(id.nid),
+ lnet_acceptor_port());
+ if (rc != 0) {
+ CERROR("Can't add peer %s: %d\n",
+ libcfs_id2str(id), rc);
+ return rc;
+ }
+ }
+
+ ksocknal_launch_all_connections_locked(peer);
+
+ conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
+ if (conn != NULL) {
+ /* Connection exists; queue message on it */
+ ksocknal_queue_tx_locked (tx, conn);
+ write_unlock_bh(g_lock);
+ return (0);
+ }
+
+ if (peer->ksnp_accepting > 0 ||
+ ksocknal_find_connecting_route_locked (peer) != NULL) {
+ /* the message is going to be pinned to the peer */
+ tx->tx_deadline =
+ cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+
+ /* Queue the message until a connection is established */
+ list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue);
+ write_unlock_bh(g_lock);
+ return 0;
+ }
+
+ write_unlock_bh(g_lock);
+
+ /* NB Routes may be ignored if connections to them failed recently */
+ CNETERR("No usable routes to %s\n", libcfs_id2str(id));
+ return (-EHOSTUNREACH);
+}
+
+int
+ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
+{
+ int mpflag = 0;
+ int type = lntmsg->msg_type;
+ lnet_process_id_t target = lntmsg->msg_target;
+ unsigned int payload_niov = lntmsg->msg_niov;
+ struct iovec *payload_iov = lntmsg->msg_iov;
+ lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
+ unsigned int payload_offset = lntmsg->msg_offset;
+ unsigned int payload_nob = lntmsg->msg_len;
+ ksock_tx_t *tx;
+ int desc_size;
+ int rc;
+
+ /* NB 'private' is different depending on what we're sending.
+ * Just ignore it... */
+
+ CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n",
+ payload_nob, payload_niov, libcfs_id2str(target));
+
+ LASSERT (payload_nob == 0 || payload_niov > 0);
+ LASSERT (payload_niov <= LNET_MAX_IOV);
+ /* payload is either all vaddrs or all pages */
+ LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
+ LASSERT (!in_interrupt ());
+
+ if (payload_iov != NULL)
+ desc_size = offsetof(ksock_tx_t,
+ tx_frags.virt.iov[1 + payload_niov]);
+ else
+ desc_size = offsetof(ksock_tx_t,
+ tx_frags.paged.kiov[payload_niov]);
+
+ if (lntmsg->msg_vmflush)
+ mpflag = cfs_memory_pressure_get_and_set();
+ tx = ksocknal_alloc_tx(KSOCK_MSG_LNET, desc_size);
+ if (tx == NULL) {
+ CERROR("Can't allocate tx desc type %d size %d\n",
+ type, desc_size);
+ if (lntmsg->msg_vmflush)
+ cfs_memory_pressure_restore(mpflag);
+ return (-ENOMEM);
+ }
+
+ tx->tx_conn = NULL; /* set when assigned a conn */
+ tx->tx_lnetmsg = lntmsg;
+
+ if (payload_iov != NULL) {
+ tx->tx_kiov = NULL;
+ tx->tx_nkiov = 0;
+ tx->tx_iov = tx->tx_frags.virt.iov;
+ tx->tx_niov = 1 +
+ lnet_extract_iov(payload_niov, &tx->tx_iov[1],
+ payload_niov, payload_iov,
+ payload_offset, payload_nob);
+ } else {
+ tx->tx_niov = 1;
+ tx->tx_iov = &tx->tx_frags.paged.iov;
+ tx->tx_kiov = tx->tx_frags.paged.kiov;
+ tx->tx_nkiov = lnet_extract_kiov(payload_niov, tx->tx_kiov,
+ payload_niov, payload_kiov,
+ payload_offset, payload_nob);
+
+ if (payload_nob >= *ksocknal_tunables.ksnd_zc_min_payload)
+ tx->tx_zc_capable = 1;
+ }
+
+ socklnd_init_msg(&tx->tx_msg, KSOCK_MSG_LNET);
+
+ /* The first fragment will be set later in pro_pack */
+ rc = ksocknal_launch_packet(ni, tx, target);
+ if (lntmsg->msg_vmflush)
+ cfs_memory_pressure_restore(mpflag);
+ if (rc == 0)
+ return (0);
+
+ ksocknal_free_tx(tx);
+ return (-EIO);
+}
+
+int
+ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name)
+{
+ task_t *task = kthread_run(fn, arg, name);
+
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+ ksocknal_data.ksnd_nthreads++;
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+ return 0;
+}
+
+void
+ksocknal_thread_fini (void)
+{
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+ ksocknal_data.ksnd_nthreads--;
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+}
+
+int
+ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip)
+{
+ static char ksocknal_slop_buffer[4096];
+
+ int nob;
+ unsigned int niov;
+ int skipped;
+
+ LASSERT(conn->ksnc_proto != NULL);
+
+ if ((*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) != 0) {
+ /* Remind the socket to ack eagerly... */
+ ksocknal_lib_eager_ack(conn);
+ }
+
+ if (nob_to_skip == 0) { /* right at next packet boundary now */
+ conn->ksnc_rx_started = 0;
+ mb(); /* racing with timeout thread */
+
+ switch (conn->ksnc_proto->pro_version) {
+ case KSOCK_PROTO_V2:
+ case KSOCK_PROTO_V3:
+ conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER;
+ conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
+ conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg;
+
+ conn->ksnc_rx_nob_wanted = offsetof(ksock_msg_t, ksm_u);
+ conn->ksnc_rx_nob_left = offsetof(ksock_msg_t, ksm_u);
+ conn->ksnc_rx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u);
+ break;
+
+ case KSOCK_PROTO_V1:
+ /* Receiving bare lnet_hdr_t */
+ conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
+ conn->ksnc_rx_nob_wanted = sizeof(lnet_hdr_t);
+ conn->ksnc_rx_nob_left = sizeof(lnet_hdr_t);
+
+ conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
+ conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg.ksm_u.lnetmsg;
+ conn->ksnc_rx_iov[0].iov_len = sizeof (lnet_hdr_t);
+ break;
+
+ default:
+ LBUG ();
+ }
+ conn->ksnc_rx_niov = 1;
+
+ conn->ksnc_rx_kiov = NULL;
+ conn->ksnc_rx_nkiov = 0;
+ conn->ksnc_rx_csum = ~0;
+ return (1);
+ }
+
+ /* Set up to skip as much as possible now. If there's more left
+ * (ran out of iov entries) we'll get called again */
+
+ conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
+ conn->ksnc_rx_nob_left = nob_to_skip;
+ conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
+ skipped = 0;
+ niov = 0;
+
+ do {
+ nob = MIN (nob_to_skip, sizeof (ksocknal_slop_buffer));
+
+ conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer;
+ conn->ksnc_rx_iov[niov].iov_len = nob;
+ niov++;
+ skipped += nob;
+ nob_to_skip -=nob;
+
+ } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */
+ niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec));
+
+ conn->ksnc_rx_niov = niov;
+ conn->ksnc_rx_kiov = NULL;
+ conn->ksnc_rx_nkiov = 0;
+ conn->ksnc_rx_nob_wanted = skipped;
+ return (0);
+}
+
+int
+ksocknal_process_receive (ksock_conn_t *conn)
+{
+ lnet_hdr_t *lhdr;
+ lnet_process_id_t *id;
+ int rc;
+
+ LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
+
+ /* NB: sched lock NOT held */
+ /* SOCKNAL_RX_LNET_HEADER is here for backward compatability */
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_KSM_HEADER ||
+ conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD ||
+ conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER ||
+ conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
+ again:
+ if (conn->ksnc_rx_nob_wanted != 0) {
+ rc = ksocknal_receive(conn);
+
+ if (rc <= 0) {
+ LASSERT (rc != -EAGAIN);
+
+ if (rc == 0)
+ CDEBUG (D_NET, "[%p] EOF from %s"
+ " ip %d.%d.%d.%d:%d\n", conn,
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port);
+ else if (!conn->ksnc_closing)
+ CERROR ("[%p] Error %d on read from %s"
+ " ip %d.%d.%d.%d:%d\n",
+ conn, rc,
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port);
+
+ /* it's not an error if conn is being closed */
+ ksocknal_close_conn_and_siblings (conn,
+ (conn->ksnc_closing) ? 0 : rc);
+ return (rc == 0 ? -ESHUTDOWN : rc);
+ }
+
+ if (conn->ksnc_rx_nob_wanted != 0) {
+ /* short read */
+ return (-EAGAIN);
+ }
+ }
+ switch (conn->ksnc_rx_state) {
+ case SOCKNAL_RX_KSM_HEADER:
+ if (conn->ksnc_flip) {
+ __swab32s(&conn->ksnc_msg.ksm_type);
+ __swab32s(&conn->ksnc_msg.ksm_csum);
+ __swab64s(&conn->ksnc_msg.ksm_zc_cookies[0]);
+ __swab64s(&conn->ksnc_msg.ksm_zc_cookies[1]);
+ }
+
+ if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP &&
+ conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) {
+ CERROR("%s: Unknown message type: %x\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ conn->ksnc_msg.ksm_type);
+ ksocknal_new_packet(conn, 0);
+ ksocknal_close_conn_and_siblings(conn, -EPROTO);
+ return (-EPROTO);
+ }
+
+ if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP &&
+ conn->ksnc_msg.ksm_csum != 0 && /* has checksum */
+ conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
+ /* NOOP Checksum error */
+ CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
+ ksocknal_new_packet(conn, 0);
+ ksocknal_close_conn_and_siblings(conn, -EPROTO);
+ return (-EIO);
+ }
+
+ if (conn->ksnc_msg.ksm_zc_cookies[1] != 0) {
+ __u64 cookie = 0;
+
+ LASSERT (conn->ksnc_proto != &ksocknal_protocol_v1x);
+
+ if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP)
+ cookie = conn->ksnc_msg.ksm_zc_cookies[0];
+
+ rc = conn->ksnc_proto->pro_handle_zcack(conn, cookie,
+ conn->ksnc_msg.ksm_zc_cookies[1]);
+
+ if (rc != 0) {
+ CERROR("%s: Unknown ZC-ACK cookie: "LPU64", "LPU64"\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ cookie, conn->ksnc_msg.ksm_zc_cookies[1]);
+ ksocknal_new_packet(conn, 0);
+ ksocknal_close_conn_and_siblings(conn, -EPROTO);
+ return (rc);
+ }
+ }
+
+ if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) {
+ ksocknal_new_packet (conn, 0);
+ return 0; /* NOOP is done and just return */
+ }
+
+ conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
+ conn->ksnc_rx_nob_wanted = sizeof(ksock_lnet_msg_t);
+ conn->ksnc_rx_nob_left = sizeof(ksock_lnet_msg_t);
+
+ conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
+ conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg.ksm_u.lnetmsg;
+ conn->ksnc_rx_iov[0].iov_len = sizeof(ksock_lnet_msg_t);
+
+ conn->ksnc_rx_niov = 1;
+ conn->ksnc_rx_kiov = NULL;
+ conn->ksnc_rx_nkiov = 0;
+
+ goto again; /* read lnet header now */
+
+ case SOCKNAL_RX_LNET_HEADER:
+ /* unpack message header */
+ conn->ksnc_proto->pro_unpack(&conn->ksnc_msg);
+
+ if ((conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) != 0) {
+ /* Userspace peer */
+ lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
+ id = &conn->ksnc_peer->ksnp_id;
+
+ /* Substitute process ID assigned at connection time */
+ lhdr->src_pid = cpu_to_le32(id->pid);
+ lhdr->src_nid = cpu_to_le64(id->nid);
+ }
+
+ conn->ksnc_rx_state = SOCKNAL_RX_PARSE;
+ ksocknal_conn_addref(conn); /* ++ref while parsing */
+
+ rc = lnet_parse(conn->ksnc_peer->ksnp_ni,
+ &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr,
+ conn->ksnc_peer->ksnp_id.nid, conn, 0);
+ if (rc < 0) {
+ /* I just received garbage: give up on this conn */
+ ksocknal_new_packet(conn, 0);
+ ksocknal_close_conn_and_siblings (conn, rc);
+ ksocknal_conn_decref(conn);
+ return (-EPROTO);
+ }
+
+ /* I'm racing with ksocknal_recv() */
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_PARSE ||
+ conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD);
+
+ if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD)
+ return 0;
+
+ /* ksocknal_recv() got called */
+ goto again;
+
+ case SOCKNAL_RX_LNET_PAYLOAD:
+ /* payload all received */
+ rc = 0;
+
+ if (conn->ksnc_rx_nob_left == 0 && /* not truncating */
+ conn->ksnc_msg.ksm_csum != 0 && /* has checksum */
+ conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
+ CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
+ rc = -EIO;
+ }
+
+ if (rc == 0 && conn->ksnc_msg.ksm_zc_cookies[0] != 0) {
+ LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x);
+
+ lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
+ id = &conn->ksnc_peer->ksnp_id;
+
+ rc = conn->ksnc_proto->pro_handle_zcreq(conn,
+ conn->ksnc_msg.ksm_zc_cookies[0],
+ *ksocknal_tunables.ksnd_nonblk_zcack ||
+ le64_to_cpu(lhdr->src_nid) != id->nid);
+ }
+
+ lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, rc);
+
+ if (rc != 0) {
+ ksocknal_new_packet(conn, 0);
+ ksocknal_close_conn_and_siblings (conn, rc);
+ return (-EPROTO);
+ }
+ /* Fall through */
+
+ case SOCKNAL_RX_SLOP:
+ /* starting new packet? */
+ if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left))
+ return 0; /* come back later */
+ goto again; /* try to finish reading slop now */
+
+ default:
+ break;
+ }
+
+ /* Not Reached */
+ LBUG ();
+ return (-EINVAL); /* keep gcc happy */
+}
+
+int
+ksocknal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
+ unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
+ unsigned int offset, unsigned int mlen, unsigned int rlen)
+{
+ ksock_conn_t *conn = (ksock_conn_t *)private;
+ ksock_sched_t *sched = conn->ksnc_scheduler;
+
+ LASSERT (mlen <= rlen);
+ LASSERT (niov <= LNET_MAX_IOV);
+
+ conn->ksnc_cookie = msg;
+ conn->ksnc_rx_nob_wanted = mlen;
+ conn->ksnc_rx_nob_left = rlen;
+
+ if (mlen == 0 || iov != NULL) {
+ conn->ksnc_rx_nkiov = 0;
+ conn->ksnc_rx_kiov = NULL;
+ conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
+ conn->ksnc_rx_niov =
+ lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov,
+ niov, iov, offset, mlen);
+ } else {
+ conn->ksnc_rx_niov = 0;
+ conn->ksnc_rx_iov = NULL;
+ conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
+ conn->ksnc_rx_nkiov =
+ lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov,
+ niov, kiov, offset, mlen);
+ }
+
+ LASSERT (mlen ==
+ lnet_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
+ lnet_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
+
+ LASSERT (conn->ksnc_rx_scheduled);
+
+ spin_lock_bh(&sched->kss_lock);
+
+ switch (conn->ksnc_rx_state) {
+ case SOCKNAL_RX_PARSE_WAIT:
+ list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns);
+ wake_up (&sched->kss_waitq);
+ LASSERT (conn->ksnc_rx_ready);
+ break;
+
+ case SOCKNAL_RX_PARSE:
+ /* scheduler hasn't noticed I'm parsing yet */
+ break;
+ }
+
+ conn->ksnc_rx_state = SOCKNAL_RX_LNET_PAYLOAD;
+
+ spin_unlock_bh(&sched->kss_lock);
+ ksocknal_conn_decref(conn);
+ return 0;
+}
+
+static inline int
+ksocknal_sched_cansleep(ksock_sched_t *sched)
+{
+ int rc;
+
+ spin_lock_bh(&sched->kss_lock);
+
+ rc = (!ksocknal_data.ksnd_shuttingdown &&
+ list_empty(&sched->kss_rx_conns) &&
+ list_empty(&sched->kss_tx_conns));
+
+ spin_unlock_bh(&sched->kss_lock);
+ return rc;
+}
+
+int ksocknal_scheduler(void *arg)
+{
+ struct ksock_sched_info *info;
+ ksock_sched_t *sched;
+ ksock_conn_t *conn;
+ ksock_tx_t *tx;
+ int rc;
+ int nloops = 0;
+ long id = (long)arg;
+
+ info = ksocknal_data.ksnd_sched_info[KSOCK_THREAD_CPT(id)];
+ sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
+
+ cfs_block_allsigs();
+
+ rc = cfs_cpt_bind(lnet_cpt_table(), info->ksi_cpt);
+ if (rc != 0) {
+ CERROR("Can't set CPT affinity to %d: %d\n",
+ info->ksi_cpt, rc);
+ }
+
+ spin_lock_bh(&sched->kss_lock);
+
+ while (!ksocknal_data.ksnd_shuttingdown) {
+ int did_something = 0;
+
+ /* Ensure I progress everything semi-fairly */
+
+ if (!list_empty (&sched->kss_rx_conns)) {
+ conn = list_entry(sched->kss_rx_conns.next,
+ ksock_conn_t, ksnc_rx_list);
+ list_del(&conn->ksnc_rx_list);
+
+ LASSERT(conn->ksnc_rx_scheduled);
+ LASSERT(conn->ksnc_rx_ready);
+
+ /* clear rx_ready in case receive isn't complete.
+ * Do it BEFORE we call process_recv, since
+ * data_ready can set it any time after we release
+ * kss_lock. */
+ conn->ksnc_rx_ready = 0;
+ spin_unlock_bh(&sched->kss_lock);
+
+ rc = ksocknal_process_receive(conn);
+
+ spin_lock_bh(&sched->kss_lock);
+
+ /* I'm the only one that can clear this flag */
+ LASSERT(conn->ksnc_rx_scheduled);
+
+ /* Did process_receive get everything it wanted? */
+ if (rc == 0)
+ conn->ksnc_rx_ready = 1;
+
+ if (conn->ksnc_rx_state == SOCKNAL_RX_PARSE) {
+ /* Conn blocked waiting for ksocknal_recv()
+ * I change its state (under lock) to signal
+ * it can be rescheduled */
+ conn->ksnc_rx_state = SOCKNAL_RX_PARSE_WAIT;
+ } else if (conn->ksnc_rx_ready) {
+ /* reschedule for rx */
+ list_add_tail (&conn->ksnc_rx_list,
+ &sched->kss_rx_conns);
+ } else {
+ conn->ksnc_rx_scheduled = 0;
+ /* drop my ref */
+ ksocknal_conn_decref(conn);
+ }
+
+ did_something = 1;
+ }
+
+ if (!list_empty (&sched->kss_tx_conns)) {
+ LIST_HEAD (zlist);
+
+ if (!list_empty(&sched->kss_zombie_noop_txs)) {
+ list_add(&zlist,
+ &sched->kss_zombie_noop_txs);
+ list_del_init(&sched->kss_zombie_noop_txs);
+ }
+
+ conn = list_entry(sched->kss_tx_conns.next,
+ ksock_conn_t, ksnc_tx_list);
+ list_del (&conn->ksnc_tx_list);
+
+ LASSERT(conn->ksnc_tx_scheduled);
+ LASSERT(conn->ksnc_tx_ready);
+ LASSERT(!list_empty(&conn->ksnc_tx_queue));
+
+ tx = list_entry(conn->ksnc_tx_queue.next,
+ ksock_tx_t, tx_list);
+
+ if (conn->ksnc_tx_carrier == tx)
+ ksocknal_next_tx_carrier(conn);
+
+ /* dequeue now so empty list => more to send */
+ list_del(&tx->tx_list);
+
+ /* Clear tx_ready in case send isn't complete. Do
+ * it BEFORE we call process_transmit, since
+ * write_space can set it any time after we release
+ * kss_lock. */
+ conn->ksnc_tx_ready = 0;
+ spin_unlock_bh(&sched->kss_lock);
+
+ if (!list_empty(&zlist)) {
+ /* free zombie noop txs, it's fast because
+ * noop txs are just put in freelist */
+ ksocknal_txlist_done(NULL, &zlist, 0);
+ }
+
+ rc = ksocknal_process_transmit(conn, tx);
+
+ if (rc == -ENOMEM || rc == -EAGAIN) {
+ /* Incomplete send: replace tx on HEAD of tx_queue */
+ spin_lock_bh(&sched->kss_lock);
+ list_add(&tx->tx_list,
+ &conn->ksnc_tx_queue);
+ } else {
+ /* Complete send; tx -ref */
+ ksocknal_tx_decref(tx);
+
+ spin_lock_bh(&sched->kss_lock);
+ /* assume space for more */
+ conn->ksnc_tx_ready = 1;
+ }
+
+ if (rc == -ENOMEM) {
+ /* Do nothing; after a short timeout, this
+ * conn will be reposted on kss_tx_conns. */
+ } else if (conn->ksnc_tx_ready &&
+ !list_empty (&conn->ksnc_tx_queue)) {
+ /* reschedule for tx */
+ list_add_tail (&conn->ksnc_tx_list,
+ &sched->kss_tx_conns);
+ } else {
+ conn->ksnc_tx_scheduled = 0;
+ /* drop my ref */
+ ksocknal_conn_decref(conn);
+ }
+
+ did_something = 1;
+ }
+ if (!did_something || /* nothing to do */
+ ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */
+ spin_unlock_bh(&sched->kss_lock);
+
+ nloops = 0;
+
+ if (!did_something) { /* wait for something to do */
+ cfs_wait_event_interruptible_exclusive(
+ sched->kss_waitq,
+ !ksocknal_sched_cansleep(sched), rc);
+ LASSERT (rc == 0);
+ } else {
+ cond_resched();
+ }
+
+ spin_lock_bh(&sched->kss_lock);
+ }
+ }
+
+ spin_unlock_bh(&sched->kss_lock);
+ ksocknal_thread_fini();
+ return 0;
+}
+
+/*
+ * Add connection to kss_rx_conns of scheduler
+ * and wakeup the scheduler.
+ */
+void ksocknal_read_callback (ksock_conn_t *conn)
+{
+ ksock_sched_t *sched;
+ ENTRY;
+
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_bh(&sched->kss_lock);
+
+ conn->ksnc_rx_ready = 1;
+
+ if (!conn->ksnc_rx_scheduled) { /* not being progressed */
+ list_add_tail(&conn->ksnc_rx_list,
+ &sched->kss_rx_conns);
+ conn->ksnc_rx_scheduled = 1;
+ /* extra ref for scheduler */
+ ksocknal_conn_addref(conn);
+
+ wake_up (&sched->kss_waitq);
+ }
+ spin_unlock_bh(&sched->kss_lock);
+
+ EXIT;
+}
+
+/*
+ * Add connection to kss_tx_conns of scheduler
+ * and wakeup the scheduler.
+ */
+void ksocknal_write_callback (ksock_conn_t *conn)
+{
+ ksock_sched_t *sched;
+ ENTRY;
+
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_bh(&sched->kss_lock);
+
+ conn->ksnc_tx_ready = 1;
+
+ if (!conn->ksnc_tx_scheduled && // not being progressed
+ !list_empty(&conn->ksnc_tx_queue)){//packets to send
+ list_add_tail (&conn->ksnc_tx_list,
+ &sched->kss_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ /* extra ref for scheduler */
+ ksocknal_conn_addref(conn);
+
+ wake_up (&sched->kss_waitq);
+ }
+
+ spin_unlock_bh(&sched->kss_lock);
+
+ EXIT;
+}
+
+ksock_proto_t *
+ksocknal_parse_proto_version (ksock_hello_msg_t *hello)
+{
+ __u32 version = 0;
+
+ if (hello->kshm_magic == LNET_PROTO_MAGIC)
+ version = hello->kshm_version;
+ else if (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC))
+ version = __swab32(hello->kshm_version);
+
+ if (version != 0) {
+#if SOCKNAL_VERSION_DEBUG
+ if (*ksocknal_tunables.ksnd_protocol == 1)
+ return NULL;
+
+ if (*ksocknal_tunables.ksnd_protocol == 2 &&
+ version == KSOCK_PROTO_V3)
+ return NULL;
+#endif
+ if (version == KSOCK_PROTO_V2)
+ return &ksocknal_protocol_v2x;
+
+ if (version == KSOCK_PROTO_V3)
+ return &ksocknal_protocol_v3x;
+
+ return NULL;
+ }
+
+ if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) {
+ lnet_magicversion_t *hmv = (lnet_magicversion_t *)hello;
+
+ CLASSERT (sizeof (lnet_magicversion_t) ==
+ offsetof (ksock_hello_msg_t, kshm_src_nid));
+
+ if (hmv->version_major == cpu_to_le16 (KSOCK_PROTO_V1_MAJOR) &&
+ hmv->version_minor == cpu_to_le16 (KSOCK_PROTO_V1_MINOR))
+ return &ksocknal_protocol_v1x;
+ }
+
+ return NULL;
+}
+
+int
+ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn,
+ lnet_nid_t peer_nid, ksock_hello_msg_t *hello)
+{
+ /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */
+ ksock_net_t *net = (ksock_net_t *)ni->ni_data;
+
+ LASSERT (hello->kshm_nips <= LNET_MAX_INTERFACES);
+
+ /* rely on caller to hold a ref on socket so it wouldn't disappear */
+ LASSERT (conn->ksnc_proto != NULL);
+
+ hello->kshm_src_nid = ni->ni_nid;
+ hello->kshm_dst_nid = peer_nid;
+ hello->kshm_src_pid = the_lnet.ln_pid;
+
+ hello->kshm_src_incarnation = net->ksnn_incarnation;
+ hello->kshm_ctype = conn->ksnc_type;
+
+ return conn->ksnc_proto->pro_send_hello(conn, hello);
+}
+
+int
+ksocknal_invert_type(int type)
+{
+ switch (type)
+ {
+ case SOCKLND_CONN_ANY:
+ case SOCKLND_CONN_CONTROL:
+ return (type);
+ case SOCKLND_CONN_BULK_IN:
+ return SOCKLND_CONN_BULK_OUT;
+ case SOCKLND_CONN_BULK_OUT:
+ return SOCKLND_CONN_BULK_IN;
+ default:
+ return (SOCKLND_CONN_NONE);
+ }
+}
+
+int
+ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
+ ksock_hello_msg_t *hello, lnet_process_id_t *peerid,
+ __u64 *incarnation)
+{
+ /* Return < 0 fatal error
+ * 0 success
+ * EALREADY lost connection race
+ * EPROTO protocol version mismatch
+ */
+ socket_t *sock = conn->ksnc_sock;
+ int active = (conn->ksnc_proto != NULL);
+ int timeout;
+ int proto_match;
+ int rc;
+ ksock_proto_t *proto;
+ lnet_process_id_t recv_id;
+
+ /* socket type set on active connections - not set on passive */
+ LASSERT (!active == !(conn->ksnc_type != SOCKLND_CONN_NONE));
+
+ timeout = active ? *ksocknal_tunables.ksnd_timeout :
+ lnet_acceptor_timeout();
+
+ rc = libcfs_sock_read(sock, &hello->kshm_magic, sizeof (hello->kshm_magic), timeout);
+ if (rc != 0) {
+ CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0);
+ return rc;
+ }
+
+ if (hello->kshm_magic != LNET_PROTO_MAGIC &&
+ hello->kshm_magic != __swab32(LNET_PROTO_MAGIC) &&
+ hello->kshm_magic != le32_to_cpu (LNET_PROTO_TCP_MAGIC)) {
+ /* Unexpected magic! */
+ CERROR ("Bad magic(1) %#08x (%#08x expected) from "
+ "%u.%u.%u.%u\n", __cpu_to_le32 (hello->kshm_magic),
+ LNET_PROTO_TCP_MAGIC,
+ HIPQUAD(conn->ksnc_ipaddr));
+ return -EPROTO;
+ }
+
+ rc = libcfs_sock_read(sock, &hello->kshm_version,
+ sizeof(hello->kshm_version), timeout);
+ if (rc != 0) {
+ CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0);
+ return rc;
+ }
+
+ proto = ksocknal_parse_proto_version(hello);
+ if (proto == NULL) {
+ if (!active) {
+ /* unknown protocol from peer, tell peer my protocol */
+ conn->ksnc_proto = &ksocknal_protocol_v3x;
+#if SOCKNAL_VERSION_DEBUG
+ if (*ksocknal_tunables.ksnd_protocol == 2)
+ conn->ksnc_proto = &ksocknal_protocol_v2x;
+ else if (*ksocknal_tunables.ksnd_protocol == 1)
+ conn->ksnc_proto = &ksocknal_protocol_v1x;
+#endif
+ hello->kshm_nips = 0;
+ ksocknal_send_hello(ni, conn, ni->ni_nid, hello);
+ }
+
+ CERROR ("Unknown protocol version (%d.x expected)"
+ " from %u.%u.%u.%u\n",
+ conn->ksnc_proto->pro_version,
+ HIPQUAD(conn->ksnc_ipaddr));
+
+ return -EPROTO;
+ }
+
+ proto_match = (conn->ksnc_proto == proto);
+ conn->ksnc_proto = proto;
+
+ /* receive the rest of hello message anyway */
+ rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout);
+ if (rc != 0) {
+ CERROR("Error %d reading or checking hello from from %u.%u.%u.%u\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0);
+ return rc;
+ }
+
+ *incarnation = hello->kshm_src_incarnation;
+
+ if (hello->kshm_src_nid == LNET_NID_ANY) {
+ CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY"
+ "from %u.%u.%u.%u\n", HIPQUAD(conn->ksnc_ipaddr));
+ return -EPROTO;
+ }
+
+ if (!active &&
+ conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
+ /* Userspace NAL assigns peer process ID from socket */
+ recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG;
+ recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), conn->ksnc_ipaddr);
+ } else {
+ recv_id.nid = hello->kshm_src_nid;
+ recv_id.pid = hello->kshm_src_pid;
+ }
+
+ if (!active) {
+ *peerid = recv_id;
+
+ /* peer determines type */
+ conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype);
+ if (conn->ksnc_type == SOCKLND_CONN_NONE) {
+ CERROR ("Unexpected type %d from %s ip %u.%u.%u.%u\n",
+ hello->kshm_ctype, libcfs_id2str(*peerid),
+ HIPQUAD(conn->ksnc_ipaddr));
+ return -EPROTO;
+ }
+
+ return 0;
+ }
+
+ if (peerid->pid != recv_id.pid ||
+ peerid->nid != recv_id.nid) {
+ LCONSOLE_ERROR_MSG(0x130, "Connected successfully to %s on host"
+ " %u.%u.%u.%u, but they claimed they were "
+ "%s; please check your Lustre "
+ "configuration.\n",
+ libcfs_id2str(*peerid),
+ HIPQUAD(conn->ksnc_ipaddr),
+ libcfs_id2str(recv_id));
+ return -EPROTO;
+ }
+
+ if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
+ /* Possible protocol mismatch or I lost the connection race */
+ return proto_match ? EALREADY : EPROTO;
+ }
+
+ if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) {
+ CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n",
+ conn->ksnc_type, libcfs_id2str(*peerid),
+ HIPQUAD(conn->ksnc_ipaddr),
+ hello->kshm_ctype);
+ return -EPROTO;
+ }
+
+ return 0;
+}
+
+int
+ksocknal_connect (ksock_route_t *route)
+{
+ LIST_HEAD (zombies);
+ ksock_peer_t *peer = route->ksnr_peer;
+ int type;
+ int wanted;
+ socket_t *sock;
+ cfs_time_t deadline;
+ int retry_later = 0;
+ int rc = 0;
+
+ deadline = cfs_time_add(cfs_time_current(),
+ cfs_time_seconds(*ksocknal_tunables.ksnd_timeout));
+
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+
+ LASSERT (route->ksnr_scheduled);
+ LASSERT (!route->ksnr_connecting);
+
+ route->ksnr_connecting = 1;
+
+ for (;;) {
+ wanted = ksocknal_route_mask() & ~route->ksnr_connected;
+
+ /* stop connecting if peer/route got closed under me, or
+ * route got connected while queued */
+ if (peer->ksnp_closing || route->ksnr_deleted ||
+ wanted == 0) {
+ retry_later = 0;
+ break;
+ }
+
+ /* reschedule if peer is connecting to me */
+ if (peer->ksnp_accepting > 0) {
+ CDEBUG(D_NET,
+ "peer %s(%d) already connecting to me, retry later.\n",
+ libcfs_nid2str(peer->ksnp_id.nid), peer->ksnp_accepting);
+ retry_later = 1;
+ }
+
+ if (retry_later) /* needs reschedule */
+ break;
+
+ if ((wanted & (1 << SOCKLND_CONN_ANY)) != 0) {
+ type = SOCKLND_CONN_ANY;
+ } else if ((wanted & (1 << SOCKLND_CONN_CONTROL)) != 0) {
+ type = SOCKLND_CONN_CONTROL;
+ } else if ((wanted & (1 << SOCKLND_CONN_BULK_IN)) != 0) {
+ type = SOCKLND_CONN_BULK_IN;
+ } else {
+ LASSERT ((wanted & (1 << SOCKLND_CONN_BULK_OUT)) != 0);
+ type = SOCKLND_CONN_BULK_OUT;
+ }
+
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+
+ if (cfs_time_aftereq(cfs_time_current(), deadline)) {
+ rc = -ETIMEDOUT;
+ lnet_connect_console_error(rc, peer->ksnp_id.nid,
+ route->ksnr_ipaddr,
+ route->ksnr_port);
+ goto failed;
+ }
+
+ rc = lnet_connect(&sock, peer->ksnp_id.nid,
+ route->ksnr_myipaddr,
+ route->ksnr_ipaddr, route->ksnr_port);
+ if (rc != 0)
+ goto failed;
+
+ rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
+ if (rc < 0) {
+ lnet_connect_console_error(rc, peer->ksnp_id.nid,
+ route->ksnr_ipaddr,
+ route->ksnr_port);
+ goto failed;
+ }
+
+ /* A +ve RC means I have to retry because I lost the connection
+ * race or I have to renegotiate protocol version */
+ retry_later = (rc != 0);
+ if (retry_later)
+ CDEBUG(D_NET, "peer %s: conn race, retry later.\n",
+ libcfs_nid2str(peer->ksnp_id.nid));
+
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+ }
+
+ route->ksnr_scheduled = 0;
+ route->ksnr_connecting = 0;
+
+ if (retry_later) {
+ /* re-queue for attention; this frees me up to handle
+ * the peer's incoming connection request */
+
+ if (rc == EALREADY ||
+ (rc == 0 && peer->ksnp_accepting > 0)) {
+ /* We want to introduce a delay before next
+ * attempt to connect if we lost conn race,
+ * but the race is resolved quickly usually,
+ * so min_reconnectms should be good heuristic */
+ route->ksnr_retry_interval =
+ cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms)/1000;
+ route->ksnr_timeout = cfs_time_add(cfs_time_current(),
+ route->ksnr_retry_interval);
+ }
+
+ ksocknal_launch_connection_locked(route);
+ }
+
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+ return retry_later;
+
+ failed:
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+
+ route->ksnr_scheduled = 0;
+ route->ksnr_connecting = 0;
+
+ /* This is a retry rather than a new connection */
+ route->ksnr_retry_interval *= 2;
+ route->ksnr_retry_interval =
+ MAX(route->ksnr_retry_interval,
+ cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms)/1000);
+ route->ksnr_retry_interval =
+ MIN(route->ksnr_retry_interval,
+ cfs_time_seconds(*ksocknal_tunables.ksnd_max_reconnectms)/1000);
+
+ LASSERT (route->ksnr_retry_interval != 0);
+ route->ksnr_timeout = cfs_time_add(cfs_time_current(),
+ route->ksnr_retry_interval);
+
+ if (!list_empty(&peer->ksnp_tx_queue) &&
+ peer->ksnp_accepting == 0 &&
+ ksocknal_find_connecting_route_locked(peer) == NULL) {
+ ksock_conn_t *conn;
+
+ /* ksnp_tx_queue is queued on a conn on successful
+ * connection for V1.x and V2.x */
+ if (!list_empty (&peer->ksnp_conns)) {
+ conn = list_entry(peer->ksnp_conns.next,
+ ksock_conn_t, ksnc_list);
+ LASSERT (conn->ksnc_proto == &ksocknal_protocol_v3x);
+ }
+
+ /* take all the blocked packets while I've got the lock and
+ * complete below... */
+ list_splice_init(&peer->ksnp_tx_queue, &zombies);
+ }
+
+#if 0 /* irrelevent with only eager routes */
+ if (!route->ksnr_deleted) {
+ /* make this route least-favourite for re-selection */
+ list_del(&route->ksnr_list);
+ list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
+ }
+#endif
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+
+ ksocknal_peer_failed(peer);
+ ksocknal_txlist_done(peer->ksnp_ni, &zombies, 1);
+ return 0;
+}
+
+/*
+ * check whether we need to create more connds.
+ * It will try to create new thread if it's necessary, @timeout can
+ * be updated if failed to create, so caller wouldn't keep try while
+ * running out of resource.
+ */
+static int
+ksocknal_connd_check_start(long sec, long *timeout)
+{
+ char name[16];
+ int rc;
+ int total = ksocknal_data.ksnd_connd_starting +
+ ksocknal_data.ksnd_connd_running;
+
+ if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) {
+ /* still in initializing */
+ return 0;
+ }
+
+ if (total >= *ksocknal_tunables.ksnd_nconnds_max ||
+ total > ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV) {
+ /* can't create more connd, or still have enough
+ * threads to handle more connecting */
+ return 0;
+ }
+
+ if (list_empty(&ksocknal_data.ksnd_connd_routes)) {
+ /* no pending connecting request */
+ return 0;
+ }
+
+ if (sec - ksocknal_data.ksnd_connd_failed_stamp <= 1) {
+ /* may run out of resource, retry later */
+ *timeout = cfs_time_seconds(1);
+ return 0;
+ }
+
+ if (ksocknal_data.ksnd_connd_starting > 0) {
+ /* serialize starting to avoid flood */
+ return 0;
+ }
+
+ ksocknal_data.ksnd_connd_starting_stamp = sec;
+ ksocknal_data.ksnd_connd_starting++;
+ spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
+
+ /* NB: total is the next id */
+ snprintf(name, sizeof(name), "socknal_cd%02d", total);
+ rc = ksocknal_thread_start(ksocknal_connd, NULL, name);
+
+ spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
+ if (rc == 0)
+ return 1;
+
+ /* we tried ... */
+ LASSERT(ksocknal_data.ksnd_connd_starting > 0);
+ ksocknal_data.ksnd_connd_starting--;
+ ksocknal_data.ksnd_connd_failed_stamp = cfs_time_current_sec();
+
+ return 1;
+}
+
+/*
+ * check whether current thread can exit, it will return 1 if there are too
+ * many threads and no creating in past 120 seconds.
+ * Also, this function may update @timeout to make caller come back
+ * again to recheck these conditions.
+ */
+static int
+ksocknal_connd_check_stop(long sec, long *timeout)
+{
+ int val;
+
+ if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) {
+ /* still in initializing */
+ return 0;
+ }
+
+ if (ksocknal_data.ksnd_connd_starting > 0) {
+ /* in progress of starting new thread */
+ return 0;
+ }
+
+ if (ksocknal_data.ksnd_connd_running <=
+ *ksocknal_tunables.ksnd_nconnds) { /* can't shrink */
+ return 0;
+ }
+
+ /* created thread in past 120 seconds? */
+ val = (int)(ksocknal_data.ksnd_connd_starting_stamp +
+ SOCKNAL_CONND_TIMEOUT - sec);
+
+ *timeout = (val > 0) ? cfs_time_seconds(val) :
+ cfs_time_seconds(SOCKNAL_CONND_TIMEOUT);
+ if (val > 0)
+ return 0;
+
+ /* no creating in past 120 seconds */
+
+ return ksocknal_data.ksnd_connd_running >
+ ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV;
+}
+
+/* Go through connd_routes queue looking for a route that we can process
+ * right now, @timeout_p can be updated if we need to come back later */
+static ksock_route_t *
+ksocknal_connd_get_route_locked(signed long *timeout_p)
+{
+ ksock_route_t *route;
+ cfs_time_t now;
+
+ now = cfs_time_current();
+
+ /* connd_routes can contain both pending and ordinary routes */
+ list_for_each_entry (route, &ksocknal_data.ksnd_connd_routes,
+ ksnr_connd_list) {
+
+ if (route->ksnr_retry_interval == 0 ||
+ cfs_time_aftereq(now, route->ksnr_timeout))
+ return route;
+
+ if (*timeout_p == MAX_SCHEDULE_TIMEOUT ||
+ (int)*timeout_p > (int)(route->ksnr_timeout - now))
+ *timeout_p = (int)(route->ksnr_timeout - now);
+ }
+
+ return NULL;
+}
+
+int
+ksocknal_connd (void *arg)
+{
+ spinlock_t *connd_lock = &ksocknal_data.ksnd_connd_lock;
+ ksock_connreq_t *cr;
+ wait_queue_t wait;
+ int nloops = 0;
+ int cons_retry = 0;
+
+ cfs_block_allsigs ();
+
+ init_waitqueue_entry_current (&wait);
+
+ spin_lock_bh(connd_lock);
+
+ LASSERT(ksocknal_data.ksnd_connd_starting > 0);
+ ksocknal_data.ksnd_connd_starting--;
+ ksocknal_data.ksnd_connd_running++;
+
+ while (!ksocknal_data.ksnd_shuttingdown) {
+ ksock_route_t *route = NULL;
+ long sec = cfs_time_current_sec();
+ long timeout = MAX_SCHEDULE_TIMEOUT;
+ int dropped_lock = 0;
+
+ if (ksocknal_connd_check_stop(sec, &timeout)) {
+ /* wakeup another one to check stop */
+ wake_up(&ksocknal_data.ksnd_connd_waitq);
+ break;
+ }
+
+ if (ksocknal_connd_check_start(sec, &timeout)) {
+ /* created new thread */
+ dropped_lock = 1;
+ }
+
+ if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) {
+ /* Connection accepted by the listener */
+ cr = list_entry(ksocknal_data.ksnd_connd_connreqs. \
+ next, ksock_connreq_t, ksncr_list);
+
+ list_del(&cr->ksncr_list);
+ spin_unlock_bh(connd_lock);
+ dropped_lock = 1;
+
+ ksocknal_create_conn(cr->ksncr_ni, NULL,
+ cr->ksncr_sock, SOCKLND_CONN_NONE);
+ lnet_ni_decref(cr->ksncr_ni);
+ LIBCFS_FREE(cr, sizeof(*cr));
+
+ spin_lock_bh(connd_lock);
+ }
+
+ /* Only handle an outgoing connection request if there
+ * is a thread left to handle incoming connections and
+ * create new connd */
+ if (ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV <
+ ksocknal_data.ksnd_connd_running) {
+ route = ksocknal_connd_get_route_locked(&timeout);
+ }
+ if (route != NULL) {
+ list_del (&route->ksnr_connd_list);
+ ksocknal_data.ksnd_connd_connecting++;
+ spin_unlock_bh(connd_lock);
+ dropped_lock = 1;
+
+ if (ksocknal_connect(route)) {
+ /* consecutive retry */
+ if (cons_retry++ > SOCKNAL_INSANITY_RECONN) {
+ CWARN("massive consecutive "
+ "re-connecting to %u.%u.%u.%u\n",
+ HIPQUAD(route->ksnr_ipaddr));
+ cons_retry = 0;
+ }
+ } else {
+ cons_retry = 0;
+ }
+
+ ksocknal_route_decref(route);
+
+ spin_lock_bh(connd_lock);
+ ksocknal_data.ksnd_connd_connecting--;
+ }
+
+ if (dropped_lock) {
+ if (++nloops < SOCKNAL_RESCHED)
+ continue;
+ spin_unlock_bh(connd_lock);
+ nloops = 0;
+ cond_resched();
+ spin_lock_bh(connd_lock);
+ continue;
+ }
+
+ /* Nothing to do for 'timeout' */
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue_exclusive(&ksocknal_data.ksnd_connd_waitq, &wait);
+ spin_unlock_bh(connd_lock);
+
+ nloops = 0;
+ waitq_timedwait(&wait, TASK_INTERRUPTIBLE, timeout);
+
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&ksocknal_data.ksnd_connd_waitq, &wait);
+ spin_lock_bh(connd_lock);
+ }
+ ksocknal_data.ksnd_connd_running--;
+ spin_unlock_bh(connd_lock);
+
+ ksocknal_thread_fini();
+ return 0;
+}
+
+ksock_conn_t *
+ksocknal_find_timed_out_conn (ksock_peer_t *peer)
+{
+ /* We're called with a shared lock on ksnd_global_lock */
+ ksock_conn_t *conn;
+ struct list_head *ctmp;
+
+ list_for_each (ctmp, &peer->ksnp_conns) {
+ int error;
+ conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
+
+ /* Don't need the {get,put}connsock dance to deref ksnc_sock */
+ LASSERT (!conn->ksnc_closing);
+
+ /* SOCK_ERROR will reset error code of socket in
+ * some platform (like Darwin8.x) */
+ error = cfs_sock_error(conn->ksnc_sock);
+ if (error != 0) {
+ ksocknal_conn_addref(conn);
+
+ switch (error) {
+ case ECONNRESET:
+ CNETERR("A connection with %s "
+ "(%u.%u.%u.%u:%d) was reset; "
+ "it may have rebooted.\n",
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port);
+ break;
+ case ETIMEDOUT:
+ CNETERR("A connection with %s "
+ "(%u.%u.%u.%u:%d) timed out; the "
+ "network or node may be down.\n",
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port);
+ break;
+ default:
+ CNETERR("An unexpected network error %d "
+ "occurred with %s "
+ "(%u.%u.%u.%u:%d\n", error,
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port);
+ break;
+ }
+
+ return (conn);
+ }
+
+ if (conn->ksnc_rx_started &&
+ cfs_time_aftereq(cfs_time_current(),
+ conn->ksnc_rx_deadline)) {
+ /* Timed out incomplete incoming message */
+ ksocknal_conn_addref(conn);
+ CNETERR("Timeout receiving from %s (%u.%u.%u.%u:%d), "
+ "state %d wanted %d left %d\n",
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port,
+ conn->ksnc_rx_state,
+ conn->ksnc_rx_nob_wanted,
+ conn->ksnc_rx_nob_left);
+ return (conn);
+ }
+
+ if ((!list_empty(&conn->ksnc_tx_queue) ||
+ cfs_sock_wmem_queued(conn->ksnc_sock) != 0) &&
+ cfs_time_aftereq(cfs_time_current(),
+ conn->ksnc_tx_deadline)) {
+ /* Timed out messages queued for sending or
+ * buffered in the socket's send buffer */
+ ksocknal_conn_addref(conn);
+ CNETERR("Timeout sending data to %s (%u.%u.%u.%u:%d) "
+ "the network or that node may be down.\n",
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port);
+ return (conn);
+ }
+ }
+
+ return (NULL);
+}
+
+static inline void
+ksocknal_flush_stale_txs(ksock_peer_t *peer)
+{
+ ksock_tx_t *tx;
+ LIST_HEAD (stale_txs);
+
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+
+ while (!list_empty (&peer->ksnp_tx_queue)) {
+ tx = list_entry (peer->ksnp_tx_queue.next,
+ ksock_tx_t, tx_list);
+
+ if (!cfs_time_aftereq(cfs_time_current(),
+ tx->tx_deadline))
+ break;
+
+ list_del (&tx->tx_list);
+ list_add_tail (&tx->tx_list, &stale_txs);
+ }
+
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+
+ ksocknal_txlist_done(peer->ksnp_ni, &stale_txs, 1);
+}
+
+int
+ksocknal_send_keepalive_locked(ksock_peer_t *peer)
+{
+ ksock_sched_t *sched;
+ ksock_conn_t *conn;
+ ksock_tx_t *tx;
+
+ if (list_empty(&peer->ksnp_conns)) /* last_alive will be updated by create_conn */
+ return 0;
+
+ if (peer->ksnp_proto != &ksocknal_protocol_v3x)
+ return 0;
+
+ if (*ksocknal_tunables.ksnd_keepalive <= 0 ||
+ cfs_time_before(cfs_time_current(),
+ cfs_time_add(peer->ksnp_last_alive,
+ cfs_time_seconds(*ksocknal_tunables.ksnd_keepalive))))
+ return 0;
+
+ if (cfs_time_before(cfs_time_current(),
+ peer->ksnp_send_keepalive))
+ return 0;
+
+ /* retry 10 secs later, so we wouldn't put pressure
+ * on this peer if we failed to send keepalive this time */
+ peer->ksnp_send_keepalive = cfs_time_shift(10);
+
+ conn = ksocknal_find_conn_locked(peer, NULL, 1);
+ if (conn != NULL) {
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_bh(&sched->kss_lock);
+ if (!list_empty(&conn->ksnc_tx_queue)) {
+ spin_unlock_bh(&sched->kss_lock);
+ /* there is an queued ACK, don't need keepalive */
+ return 0;
+ }
+
+ spin_unlock_bh(&sched->kss_lock);
+ }
+
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+
+ /* cookie = 1 is reserved for keepalive PING */
+ tx = ksocknal_alloc_tx_noop(1, 1);
+ if (tx == NULL) {
+ read_lock(&ksocknal_data.ksnd_global_lock);
+ return -ENOMEM;
+ }
+
+ if (ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id) == 0) {
+ read_lock(&ksocknal_data.ksnd_global_lock);
+ return 1;
+ }
+
+ ksocknal_free_tx(tx);
+ read_lock(&ksocknal_data.ksnd_global_lock);
+
+ return -EIO;
+}
+
+
+void
+ksocknal_check_peer_timeouts (int idx)
+{
+ struct list_head *peers = &ksocknal_data.ksnd_peers[idx];
+ ksock_peer_t *peer;
+ ksock_conn_t *conn;
+ ksock_tx_t *tx;
+
+ again:
+ /* NB. We expect to have a look at all the peers and not find any
+ * connections to time out, so we just use a shared lock while we
+ * take a look... */
+ read_lock(&ksocknal_data.ksnd_global_lock);
+
+ list_for_each_entry(peer, peers, ksnp_list) {
+ cfs_time_t deadline = 0;
+ int resid = 0;
+ int n = 0;
+
+ if (ksocknal_send_keepalive_locked(peer) != 0) {
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+ goto again;
+ }
+
+ conn = ksocknal_find_timed_out_conn (peer);
+
+ if (conn != NULL) {
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+
+ ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT);
+
+ /* NB we won't find this one again, but we can't
+ * just proceed with the next peer, since we dropped
+ * ksnd_global_lock and it might be dead already! */
+ ksocknal_conn_decref(conn);
+ goto again;
+ }
+
+ /* we can't process stale txs right here because we're
+ * holding only shared lock */
+ if (!list_empty (&peer->ksnp_tx_queue)) {
+ ksock_tx_t *tx =
+ list_entry (peer->ksnp_tx_queue.next,
+ ksock_tx_t, tx_list);
+
+ if (cfs_time_aftereq(cfs_time_current(),
+ tx->tx_deadline)) {
+
+ ksocknal_peer_addref(peer);
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+
+ ksocknal_flush_stale_txs(peer);
+
+ ksocknal_peer_decref(peer);
+ goto again;
+ }
+ }
+
+ if (list_empty(&peer->ksnp_zc_req_list))
+ continue;
+
+ spin_lock(&peer->ksnp_lock);
+ list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) {
+ if (!cfs_time_aftereq(cfs_time_current(),
+ tx->tx_deadline))
+ break;
+ /* ignore the TX if connection is being closed */
+ if (tx->tx_conn->ksnc_closing)
+ continue;
+ n++;
+ }
+
+ if (n == 0) {
+ spin_unlock(&peer->ksnp_lock);
+ continue;
+ }
+
+ tx = list_entry(peer->ksnp_zc_req_list.next,
+ ksock_tx_t, tx_zc_list);
+ deadline = tx->tx_deadline;
+ resid = tx->tx_resid;
+ conn = tx->tx_conn;
+ ksocknal_conn_addref(conn);
+
+ spin_unlock(&peer->ksnp_lock);
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+
+ CERROR("Total %d stale ZC_REQs for peer %s detected; the "
+ "oldest(%p) timed out %ld secs ago, "
+ "resid: %d, wmem: %d\n",
+ n, libcfs_nid2str(peer->ksnp_id.nid), tx,
+ cfs_duration_sec(cfs_time_current() - deadline),
+ resid, cfs_sock_wmem_queued(conn->ksnc_sock));
+
+ ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT);
+ ksocknal_conn_decref(conn);
+ goto again;
+ }
+
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+}
+
+int
+ksocknal_reaper (void *arg)
+{
+ wait_queue_t wait;
+ ksock_conn_t *conn;
+ ksock_sched_t *sched;
+ struct list_head enomem_conns;
+ int nenomem_conns;
+ cfs_duration_t timeout;
+ int i;
+ int peer_index = 0;
+ cfs_time_t deadline = cfs_time_current();
+
+ cfs_block_allsigs ();
+
+ INIT_LIST_HEAD(&enomem_conns);
+ init_waitqueue_entry_current (&wait);
+
+ spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
+
+ while (!ksocknal_data.ksnd_shuttingdown) {
+
+ if (!list_empty (&ksocknal_data.ksnd_deathrow_conns)) {
+ conn = list_entry (ksocknal_data. \
+ ksnd_deathrow_conns.next,
+ ksock_conn_t, ksnc_list);
+ list_del (&conn->ksnc_list);
+
+ spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
+
+ ksocknal_terminate_conn(conn);
+ ksocknal_conn_decref(conn);
+
+ spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
+ continue;
+ }
+
+ if (!list_empty (&ksocknal_data.ksnd_zombie_conns)) {
+ conn = list_entry (ksocknal_data.ksnd_zombie_conns.\
+ next, ksock_conn_t, ksnc_list);
+ list_del (&conn->ksnc_list);
+
+ spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
+
+ ksocknal_destroy_conn(conn);
+
+ spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
+ continue;
+ }
+
+ if (!list_empty (&ksocknal_data.ksnd_enomem_conns)) {
+ list_add(&enomem_conns,
+ &ksocknal_data.ksnd_enomem_conns);
+ list_del_init(&ksocknal_data.ksnd_enomem_conns);
+ }
+
+ spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
+
+ /* reschedule all the connections that stalled with ENOMEM... */
+ nenomem_conns = 0;
+ while (!list_empty (&enomem_conns)) {
+ conn = list_entry (enomem_conns.next,
+ ksock_conn_t, ksnc_tx_list);
+ list_del (&conn->ksnc_tx_list);
+
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_bh(&sched->kss_lock);
+
+ LASSERT(conn->ksnc_tx_scheduled);
+ conn->ksnc_tx_ready = 1;
+ list_add_tail(&conn->ksnc_tx_list,
+ &sched->kss_tx_conns);
+ wake_up(&sched->kss_waitq);
+
+ spin_unlock_bh(&sched->kss_lock);
+ nenomem_conns++;
+ }
+
+ /* careful with the jiffy wrap... */
+ while ((timeout = cfs_time_sub(deadline,
+ cfs_time_current())) <= 0) {
+ const int n = 4;
+ const int p = 1;
+ int chunk = ksocknal_data.ksnd_peer_hash_size;
+
+ /* Time to check for timeouts on a few more peers: I do
+ * checks every 'p' seconds on a proportion of the peer
+ * table and I need to check every connection 'n' times
+ * within a timeout interval, to ensure I detect a
+ * timeout on any connection within (n+1)/n times the
+ * timeout interval. */
+
+ if (*ksocknal_tunables.ksnd_timeout > n * p)
+ chunk = (chunk * n * p) /
+ *ksocknal_tunables.ksnd_timeout;
+ if (chunk == 0)
+ chunk = 1;
+
+ for (i = 0; i < chunk; i++) {
+ ksocknal_check_peer_timeouts (peer_index);
+ peer_index = (peer_index + 1) %
+ ksocknal_data.ksnd_peer_hash_size;
+ }
+
+ deadline = cfs_time_add(deadline, cfs_time_seconds(p));
+ }
+
+ if (nenomem_conns != 0) {
+ /* Reduce my timeout if I rescheduled ENOMEM conns.
+ * This also prevents me getting woken immediately
+ * if any go back on my enomem list. */
+ timeout = SOCKNAL_ENOMEM_RETRY;
+ }
+ ksocknal_data.ksnd_reaper_waketime =
+ cfs_time_add(cfs_time_current(), timeout);
+
+ set_current_state (TASK_INTERRUPTIBLE);
+ add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
+
+ if (!ksocknal_data.ksnd_shuttingdown &&
+ list_empty (&ksocknal_data.ksnd_deathrow_conns) &&
+ list_empty (&ksocknal_data.ksnd_zombie_conns))
+ waitq_timedwait (&wait, TASK_INTERRUPTIBLE,
+ timeout);
+
+ set_current_state (TASK_RUNNING);
+ remove_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
+
+ spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
+ }
+
+ spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
+
+ ksocknal_thread_fini();
+ return 0;
+}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c
new file mode 100644
index 000000000000..3e08fe2d1489
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c
@@ -0,0 +1,1088 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#include "socklnd.h"
+
+# if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
+
+
+enum {
+ SOCKLND_TIMEOUT = 1,
+ SOCKLND_CREDITS,
+ SOCKLND_PEER_TXCREDITS,
+ SOCKLND_PEER_RTRCREDITS,
+ SOCKLND_PEER_TIMEOUT,
+ SOCKLND_NCONNDS,
+ SOCKLND_RECONNECTS_MIN,
+ SOCKLND_RECONNECTS_MAX,
+ SOCKLND_EAGER_ACK,
+ SOCKLND_ZERO_COPY,
+ SOCKLND_TYPED,
+ SOCKLND_BULK_MIN,
+ SOCKLND_RX_BUFFER_SIZE,
+ SOCKLND_TX_BUFFER_SIZE,
+ SOCKLND_NAGLE,
+ SOCKLND_IRQ_AFFINITY,
+ SOCKLND_ROUND_ROBIN,
+ SOCKLND_KEEPALIVE,
+ SOCKLND_KEEPALIVE_IDLE,
+ SOCKLND_KEEPALIVE_COUNT,
+ SOCKLND_KEEPALIVE_INTVL,
+ SOCKLND_BACKOFF_INIT,
+ SOCKLND_BACKOFF_MAX,
+ SOCKLND_PROTOCOL,
+ SOCKLND_ZERO_COPY_RECV,
+ SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS
+};
+
+static ctl_table_t ksocknal_ctl_table[] = {
+ {
+ .ctl_name = SOCKLND_TIMEOUT,
+ .procname = "timeout",
+ .data = &ksocknal_tunables.ksnd_timeout,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_CREDITS,
+ .procname = "credits",
+ .data = &ksocknal_tunables.ksnd_credits,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_PEER_TXCREDITS,
+ .procname = "peer_credits",
+ .data = &ksocknal_tunables.ksnd_peertxcredits,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_PEER_RTRCREDITS,
+ .procname = "peer_buffer_credits",
+ .data = &ksocknal_tunables.ksnd_peerrtrcredits,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_PEER_TIMEOUT,
+ .procname = "peer_timeout",
+ .data = &ksocknal_tunables.ksnd_peertimeout,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_NCONNDS,
+ .procname = "nconnds",
+ .data = &ksocknal_tunables.ksnd_nconnds,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_RECONNECTS_MIN,
+ .procname = "min_reconnectms",
+ .data = &ksocknal_tunables.ksnd_min_reconnectms,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_RECONNECTS_MAX,
+ .procname = "max_reconnectms",
+ .data = &ksocknal_tunables.ksnd_max_reconnectms,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_EAGER_ACK,
+ .procname = "eager_ack",
+ .data = &ksocknal_tunables.ksnd_eager_ack,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_ZERO_COPY,
+ .procname = "zero_copy",
+ .data = &ksocknal_tunables.ksnd_zc_min_payload,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_ZERO_COPY_RECV,
+ .procname = "zero_copy_recv",
+ .data = &ksocknal_tunables.ksnd_zc_recv,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+
+ {
+ .ctl_name = SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS,
+ .procname = "zero_copy_recv",
+ .data = &ksocknal_tunables.ksnd_zc_recv_min_nfrags,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_TYPED,
+ .procname = "typed",
+ .data = &ksocknal_tunables.ksnd_typed_conns,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_BULK_MIN,
+ .procname = "min_bulk",
+ .data = &ksocknal_tunables.ksnd_min_bulk,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_RX_BUFFER_SIZE,
+ .procname = "rx_buffer_size",
+ .data = &ksocknal_tunables.ksnd_rx_buffer_size,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_TX_BUFFER_SIZE,
+ .procname = "tx_buffer_size",
+ .data = &ksocknal_tunables.ksnd_tx_buffer_size,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_NAGLE,
+ .procname = "nagle",
+ .data = &ksocknal_tunables.ksnd_nagle,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_ROUND_ROBIN,
+ .procname = "round_robin",
+ .data = &ksocknal_tunables.ksnd_round_robin,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_KEEPALIVE,
+ .procname = "keepalive",
+ .data = &ksocknal_tunables.ksnd_keepalive,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_KEEPALIVE_IDLE,
+ .procname = "keepalive_idle",
+ .data = &ksocknal_tunables.ksnd_keepalive_idle,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_KEEPALIVE_COUNT,
+ .procname = "keepalive_count",
+ .data = &ksocknal_tunables.ksnd_keepalive_count,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = SOCKLND_KEEPALIVE_INTVL,
+ .procname = "keepalive_intvl",
+ .data = &ksocknal_tunables.ksnd_keepalive_intvl,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+#if SOCKNAL_VERSION_DEBUG
+ {
+ .ctl_name = SOCKLND_PROTOCOL,
+ .procname = "protocol",
+ .data = &ksocknal_tunables.ksnd_protocol,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+#endif
+ {0}
+};
+
+
+ctl_table_t ksocknal_top_ctl_table[] = {
+ {
+ .ctl_name = CTL_SOCKLND,
+ .procname = "socknal",
+ .data = NULL,
+ .maxlen = 0,
+ .mode = 0555,
+ .child = ksocknal_ctl_table
+ },
+ { 0 }
+};
+
+int
+ksocknal_lib_tunables_init ()
+{
+ if (!*ksocknal_tunables.ksnd_typed_conns) {
+ int rc = -EINVAL;
+#if SOCKNAL_VERSION_DEBUG
+ if (*ksocknal_tunables.ksnd_protocol < 3)
+ rc = 0;
+#endif
+ if (rc != 0) {
+ CERROR("Protocol V3.x MUST have typed connections\n");
+ return rc;
+ }
+ }
+
+ if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2)
+ *ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2;
+ if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV)
+ *ksocknal_tunables.ksnd_zc_recv_min_nfrags = LNET_MAX_IOV;
+
+ ksocknal_tunables.ksnd_sysctl =
+ cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
+
+ if (ksocknal_tunables.ksnd_sysctl == NULL)
+ CWARN("Can't setup /proc tunables\n");
+
+ return 0;
+}
+
+void
+ksocknal_lib_tunables_fini ()
+{
+ if (ksocknal_tunables.ksnd_sysctl != NULL)
+ unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl);
+}
+#else
+int
+ksocknal_lib_tunables_init ()
+{
+ return 0;
+}
+
+void
+ksocknal_lib_tunables_fini ()
+{
+}
+#endif /* # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM */
+
+int
+ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
+{
+ int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
+ &conn->ksnc_ipaddr,
+ &conn->ksnc_port);
+
+ /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
+ LASSERT (!conn->ksnc_closing);
+
+ if (rc != 0) {
+ CERROR ("Error %d getting sock peer IP\n", rc);
+ return rc;
+ }
+
+ rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
+ &conn->ksnc_myipaddr, NULL);
+ if (rc != 0) {
+ CERROR ("Error %d getting sock local IP\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+int
+ksocknal_lib_zc_capable(ksock_conn_t *conn)
+{
+ int caps = conn->ksnc_sock->sk->sk_route_caps;
+
+ if (conn->ksnc_proto == &ksocknal_protocol_v1x)
+ return 0;
+
+ /* ZC if the socket supports scatter/gather and doesn't need software
+ * checksums */
+ return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_ALL_CSUM) != 0);
+}
+
+int
+ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ struct socket *sock = conn->ksnc_sock;
+ int nob;
+ int rc;
+
+ if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */
+ conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */
+ tx->tx_nob == tx->tx_resid && /* frist sending */
+ tx->tx_msg.ksm_csum == 0) /* not checksummed */
+ ksocknal_lib_csum_tx(tx);
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone. */
+
+ {
+#if SOCKNAL_SINGLE_FRAG_TX
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ unsigned int niov = 1;
+#else
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
+ unsigned int niov = tx->tx_niov;
+#endif
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = scratchiov,
+ .msg_iovlen = niov,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = MSG_DONTWAIT
+ };
+ mm_segment_t oldmm = get_fs();
+ int i;
+
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i] = tx->tx_iov[i];
+ nob += scratchiov[i].iov_len;
+ }
+
+ if (!list_empty(&conn->ksnc_tx_queue) ||
+ nob < tx->tx_resid)
+ msg.msg_flags |= MSG_MORE;
+
+ set_fs (KERNEL_DS);
+ rc = sock_sendmsg(sock, &msg, nob);
+ set_fs (oldmm);
+ }
+ return rc;
+}
+
+int
+ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ struct socket *sock = conn->ksnc_sock;
+ lnet_kiov_t *kiov = tx->tx_kiov;
+ int rc;
+ int nob;
+
+ /* Not NOOP message */
+ LASSERT (tx->tx_lnetmsg != NULL);
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone. */
+ if (tx->tx_msg.ksm_zc_cookies[0] != 0) {
+ /* Zero copy is enabled */
+ struct sock *sk = sock->sk;
+ struct page *page = kiov->kiov_page;
+ int offset = kiov->kiov_offset;
+ int fragsize = kiov->kiov_len;
+ int msgflg = MSG_DONTWAIT;
+
+ CDEBUG(D_NET, "page %p + offset %x for %d\n",
+ page, offset, kiov->kiov_len);
+
+ if (!list_empty(&conn->ksnc_tx_queue) ||
+ fragsize < tx->tx_resid)
+ msgflg |= MSG_MORE;
+
+ if (sk->sk_prot->sendpage != NULL) {
+ rc = sk->sk_prot->sendpage(sk, page,
+ offset, fragsize, msgflg);
+ } else {
+ rc = cfs_tcp_sendpage(sk, page, offset, fragsize,
+ msgflg);
+ }
+ } else {
+#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ unsigned int niov = 1;
+#else
+#ifdef CONFIG_HIGHMEM
+#warning "XXX risk of kmap deadlock on multiple frags..."
+#endif
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
+ unsigned int niov = tx->tx_nkiov;
+#endif
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = scratchiov,
+ .msg_iovlen = niov,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = MSG_DONTWAIT
+ };
+ mm_segment_t oldmm = get_fs();
+ int i;
+
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
+ kiov[i].kiov_offset;
+ nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+ }
+
+ if (!list_empty(&conn->ksnc_tx_queue) ||
+ nob < tx->tx_resid)
+ msg.msg_flags |= MSG_MORE;
+
+ set_fs (KERNEL_DS);
+ rc = sock_sendmsg(sock, &msg, nob);
+ set_fs (oldmm);
+
+ for (i = 0; i < niov; i++)
+ kunmap(kiov[i].kiov_page);
+ }
+ return rc;
+}
+
+void
+ksocknal_lib_eager_ack (ksock_conn_t *conn)
+{
+ int opt = 1;
+ mm_segment_t oldmm = get_fs();
+ struct socket *sock = conn->ksnc_sock;
+
+ /* Remind the socket to ACK eagerly. If I don't, the socket might
+ * think I'm about to send something it could piggy-back the ACK
+ * on, introducing delay in completing zero-copy sends in my
+ * peer. */
+
+ set_fs(KERNEL_DS);
+ sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK,
+ (char *)&opt, sizeof (opt));
+ set_fs(oldmm);
+}
+
+int
+ksocknal_lib_recv_iov (ksock_conn_t *conn)
+{
+#if SOCKNAL_SINGLE_FRAG_RX
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ unsigned int niov = 1;
+#else
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
+ unsigned int niov = conn->ksnc_rx_niov;
+#endif
+ struct iovec *iov = conn->ksnc_rx_iov;
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = scratchiov,
+ .msg_iovlen = niov,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+ mm_segment_t oldmm = get_fs();
+ int nob;
+ int i;
+ int rc;
+ int fragnob;
+ int sum;
+ __u32 saved_csum;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone. */
+ LASSERT (niov > 0);
+
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i] = iov[i];
+ nob += scratchiov[i].iov_len;
+ }
+ LASSERT (nob <= conn->ksnc_rx_nob_wanted);
+
+ set_fs (KERNEL_DS);
+ rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
+ /* NB this is just a boolean..........................^ */
+ set_fs (oldmm);
+
+ saved_csum = 0;
+ if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
+ saved_csum = conn->ksnc_msg.ksm_csum;
+ conn->ksnc_msg.ksm_csum = 0;
+ }
+
+ if (saved_csum != 0) {
+ /* accumulate checksum */
+ for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
+ LASSERT (i < niov);
+
+ fragnob = iov[i].iov_len;
+ if (fragnob > sum)
+ fragnob = sum;
+
+ conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
+ iov[i].iov_base, fragnob);
+ }
+ conn->ksnc_msg.ksm_csum = saved_csum;
+ }
+
+ return rc;
+}
+
+static void
+ksocknal_lib_kiov_vunmap(void *addr)
+{
+ if (addr == NULL)
+ return;
+
+ vunmap(addr);
+}
+
+static void *
+ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
+ struct iovec *iov, struct page **pages)
+{
+ void *addr;
+ int nob;
+ int i;
+
+ if (!*ksocknal_tunables.ksnd_zc_recv || pages == NULL)
+ return NULL;
+
+ LASSERT (niov <= LNET_MAX_IOV);
+
+ if (niov < 2 ||
+ niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags)
+ return NULL;
+
+ for (nob = i = 0; i < niov; i++) {
+ if ((kiov[i].kiov_offset != 0 && i > 0) ||
+ (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_CACHE_SIZE && i < niov - 1))
+ return NULL;
+
+ pages[i] = kiov[i].kiov_page;
+ nob += kiov[i].kiov_len;
+ }
+
+ addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL);
+ if (addr == NULL)
+ return NULL;
+
+ iov->iov_base = addr + kiov[0].kiov_offset;
+ iov->iov_len = nob;
+
+ return addr;
+}
+
+int
+ksocknal_lib_recv_kiov (ksock_conn_t *conn)
+{
+#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ struct page **pages = NULL;
+ unsigned int niov = 1;
+#else
+#ifdef CONFIG_HIGHMEM
+#warning "XXX risk of kmap deadlock on multiple frags..."
+#endif
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
+ struct page **pages = conn->ksnc_scheduler->kss_rx_scratch_pgs;
+ unsigned int niov = conn->ksnc_rx_nkiov;
+#endif
+ lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = scratchiov,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+ mm_segment_t oldmm = get_fs();
+ int nob;
+ int i;
+ int rc;
+ void *base;
+ void *addr;
+ int sum;
+ int fragnob;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone. */
+ if ((addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages)) != NULL) {
+ nob = scratchiov[0].iov_len;
+ msg.msg_iovlen = 1;
+
+ } else {
+ for (nob = i = 0; i < niov; i++) {
+ nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+ scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
+ kiov[i].kiov_offset;
+ }
+ msg.msg_iovlen = niov;
+ }
+
+ LASSERT (nob <= conn->ksnc_rx_nob_wanted);
+
+ set_fs (KERNEL_DS);
+ rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
+ /* NB this is just a boolean.......................^ */
+ set_fs (oldmm);
+
+ if (conn->ksnc_msg.ksm_csum != 0) {
+ for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
+ LASSERT (i < niov);
+
+ /* Dang! have to kmap again because I have nowhere to stash the
+ * mapped address. But by doing it while the page is still
+ * mapped, the kernel just bumps the map count and returns me
+ * the address it stashed. */
+ base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
+ fragnob = kiov[i].kiov_len;
+ if (fragnob > sum)
+ fragnob = sum;
+
+ conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
+ base, fragnob);
+
+ kunmap(kiov[i].kiov_page);
+ }
+ }
+
+ if (addr != NULL) {
+ ksocknal_lib_kiov_vunmap(addr);
+ } else {
+ for (i = 0; i < niov; i++)
+ kunmap(kiov[i].kiov_page);
+ }
+
+ return (rc);
+}
+
+void
+ksocknal_lib_csum_tx(ksock_tx_t *tx)
+{
+ int i;
+ __u32 csum;
+ void *base;
+
+ LASSERT(tx->tx_iov[0].iov_base == (void *)&tx->tx_msg);
+ LASSERT(tx->tx_conn != NULL);
+ LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x);
+
+ tx->tx_msg.ksm_csum = 0;
+
+ csum = ksocknal_csum(~0, (void *)tx->tx_iov[0].iov_base,
+ tx->tx_iov[0].iov_len);
+
+ if (tx->tx_kiov != NULL) {
+ for (i = 0; i < tx->tx_nkiov; i++) {
+ base = kmap(tx->tx_kiov[i].kiov_page) +
+ tx->tx_kiov[i].kiov_offset;
+
+ csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len);
+
+ kunmap(tx->tx_kiov[i].kiov_page);
+ }
+ } else {
+ for (i = 1; i < tx->tx_niov; i++)
+ csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base,
+ tx->tx_iov[i].iov_len);
+ }
+
+ if (*ksocknal_tunables.ksnd_inject_csum_error) {
+ csum++;
+ *ksocknal_tunables.ksnd_inject_csum_error = 0;
+ }
+
+ tx->tx_msg.ksm_csum = csum;
+}
+
+int
+ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
+{
+ mm_segment_t oldmm = get_fs ();
+ struct socket *sock = conn->ksnc_sock;
+ int len;
+ int rc;
+
+ rc = ksocknal_connsock_addref(conn);
+ if (rc != 0) {
+ LASSERT (conn->ksnc_closing);
+ *txmem = *rxmem = *nagle = 0;
+ return (-ESHUTDOWN);
+ }
+
+ rc = libcfs_sock_getbuf(sock, txmem, rxmem);
+ if (rc == 0) {
+ len = sizeof(*nagle);
+ set_fs(KERNEL_DS);
+ rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY,
+ (char *)nagle, &len);
+ set_fs(oldmm);
+ }
+
+ ksocknal_connsock_decref(conn);
+
+ if (rc == 0)
+ *nagle = !*nagle;
+ else
+ *txmem = *rxmem = *nagle = 0;
+
+ return (rc);
+}
+
+int
+ksocknal_lib_setup_sock (struct socket *sock)
+{
+ mm_segment_t oldmm = get_fs ();
+ int rc;
+ int option;
+ int keep_idle;
+ int keep_intvl;
+ int keep_count;
+ int do_keepalive;
+ struct linger linger;
+
+ sock->sk->sk_allocation = GFP_NOFS;
+
+ /* Ensure this socket aborts active sends immediately when we close
+ * it. */
+
+ linger.l_onoff = 0;
+ linger.l_linger = 0;
+
+ set_fs (KERNEL_DS);
+ rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER,
+ (char *)&linger, sizeof (linger));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set SO_LINGER: %d\n", rc);
+ return (rc);
+ }
+
+ option = -1;
+ set_fs (KERNEL_DS);
+ rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set SO_LINGER2: %d\n", rc);
+ return (rc);
+ }
+
+ if (!*ksocknal_tunables.ksnd_nagle) {
+ option = 1;
+
+ set_fs (KERNEL_DS);
+ rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't disable nagle: %d\n", rc);
+ return (rc);
+ }
+ }
+
+ rc = libcfs_sock_setbuf(sock,
+ *ksocknal_tunables.ksnd_tx_buffer_size,
+ *ksocknal_tunables.ksnd_rx_buffer_size);
+ if (rc != 0) {
+ CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
+ *ksocknal_tunables.ksnd_tx_buffer_size,
+ *ksocknal_tunables.ksnd_rx_buffer_size, rc);
+ return (rc);
+ }
+
+/* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */
+
+ /* snapshot tunables */
+ keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
+ keep_count = *ksocknal_tunables.ksnd_keepalive_count;
+ keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
+
+ do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
+
+ option = (do_keepalive ? 1 : 0);
+ set_fs (KERNEL_DS);
+ rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
+ return (rc);
+ }
+
+ if (!do_keepalive)
+ return (0);
+
+ set_fs (KERNEL_DS);
+ rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE,
+ (char *)&keep_idle, sizeof (keep_idle));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc);
+ return (rc);
+ }
+
+ set_fs (KERNEL_DS);
+ rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL,
+ (char *)&keep_intvl, sizeof (keep_intvl));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc);
+ return (rc);
+ }
+
+ set_fs (KERNEL_DS);
+ rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT,
+ (char *)&keep_count, sizeof (keep_count));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set TCP_KEEPCNT: %d\n", rc);
+ return (rc);
+ }
+
+ return (0);
+}
+
+void
+ksocknal_lib_push_conn (ksock_conn_t *conn)
+{
+ struct sock *sk;
+ struct tcp_sock *tp;
+ int nonagle;
+ int val = 1;
+ int rc;
+ mm_segment_t oldmm;
+
+ rc = ksocknal_connsock_addref(conn);
+ if (rc != 0) /* being shut down */
+ return;
+
+ sk = conn->ksnc_sock->sk;
+ tp = tcp_sk(sk);
+
+ lock_sock (sk);
+ nonagle = tp->nonagle;
+ tp->nonagle = 1;
+ release_sock (sk);
+
+ oldmm = get_fs ();
+ set_fs (KERNEL_DS);
+
+ rc = sk->sk_prot->setsockopt (sk, SOL_TCP, TCP_NODELAY,
+ (char *)&val, sizeof (val));
+ LASSERT (rc == 0);
+
+ set_fs (oldmm);
+
+ lock_sock (sk);
+ tp->nonagle = nonagle;
+ release_sock (sk);
+
+ ksocknal_connsock_decref(conn);
+}
+
+extern void ksocknal_read_callback (ksock_conn_t *conn);
+extern void ksocknal_write_callback (ksock_conn_t *conn);
+/*
+ * socket call back in Linux
+ */
+static void
+ksocknal_data_ready (struct sock *sk, int n)
+{
+ ksock_conn_t *conn;
+ ENTRY;
+
+ /* interleave correctly with closing sockets... */
+ LASSERT(!in_irq());
+ read_lock(&ksocknal_data.ksnd_global_lock);
+
+ conn = sk->sk_user_data;
+ if (conn == NULL) { /* raced with ksocknal_terminate_conn */
+ LASSERT (sk->sk_data_ready != &ksocknal_data_ready);
+ sk->sk_data_ready (sk, n);
+ } else
+ ksocknal_read_callback(conn);
+
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+
+ EXIT;
+}
+
+static void
+ksocknal_write_space (struct sock *sk)
+{
+ ksock_conn_t *conn;
+ int wspace;
+ int min_wpace;
+
+ /* interleave correctly with closing sockets... */
+ LASSERT(!in_irq());
+ read_lock(&ksocknal_data.ksnd_global_lock);
+
+ conn = sk->sk_user_data;
+ wspace = SOCKNAL_WSPACE(sk);
+ min_wpace = SOCKNAL_MIN_WSPACE(sk);
+
+ CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
+ sk, wspace, min_wpace, conn,
+ (conn == NULL) ? "" : (conn->ksnc_tx_ready ?
+ " ready" : " blocked"),
+ (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ?
+ " scheduled" : " idle"),
+ (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
+ " empty" : " queued"));
+
+ if (conn == NULL) { /* raced with ksocknal_terminate_conn */
+ LASSERT (sk->sk_write_space != &ksocknal_write_space);
+ sk->sk_write_space (sk);
+
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+ return;
+ }
+
+ if (wspace >= min_wpace) { /* got enough space */
+ ksocknal_write_callback(conn);
+
+ /* Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the
+ * ENOMEM check in ksocknal_transmit is race-free (think about
+ * it). */
+
+ clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags);
+ }
+
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+}
+
+void
+ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
+{
+ conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
+ conn->ksnc_saved_write_space = sock->sk->sk_write_space;
+}
+
+void
+ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
+{
+ sock->sk->sk_user_data = conn;
+ sock->sk->sk_data_ready = ksocknal_data_ready;
+ sock->sk->sk_write_space = ksocknal_write_space;
+ return;
+}
+
+void
+ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
+{
+ /* Remove conn's network callbacks.
+ * NB I _have_ to restore the callback, rather than storing a noop,
+ * since the socket could survive past this module being unloaded!! */
+ sock->sk->sk_data_ready = conn->ksnc_saved_data_ready;
+ sock->sk->sk_write_space = conn->ksnc_saved_write_space;
+
+ /* A callback could be in progress already; they hold a read lock
+ * on ksnd_global_lock (to serialise with me) and NOOP if
+ * sk_user_data is NULL. */
+ sock->sk->sk_user_data = NULL;
+
+ return ;
+}
+
+int
+ksocknal_lib_memory_pressure(ksock_conn_t *conn)
+{
+ int rc = 0;
+ ksock_sched_t *sched;
+
+ sched = conn->ksnc_scheduler;
+ spin_lock_bh(&sched->kss_lock);
+
+ if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) &&
+ !conn->ksnc_tx_ready) {
+ /* SOCK_NOSPACE is set when the socket fills
+ * and cleared in the write_space callback
+ * (which also sets ksnc_tx_ready). If
+ * SOCK_NOSPACE and ksnc_tx_ready are BOTH
+ * zero, I didn't fill the socket and
+ * write_space won't reschedule me, so I
+ * return -ENOMEM to get my caller to retry
+ * after a timeout */
+ rc = -ENOMEM;
+ }
+
+ spin_unlock_bh(&sched->kss_lock);
+
+ return rc;
+}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.h
new file mode 100644
index 000000000000..3c135786dc11
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.h
@@ -0,0 +1,91 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_PORTAL_ALLOC
+
+#ifndef __LINUX_SOCKNAL_LIB_H__
+#define __LINUX_SOCKNAL_LIB_H__
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/uio.h>
+#include <linux/if.h>
+
+#include <asm/uaccess.h>
+#include <asm/irq.h>
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <linux/kmod.h>
+#include <linux/sysctl.h>
+#include <asm/uaccess.h>
+#include <asm/div64.h>
+#include <linux/syscalls.h>
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/libcfs/linux/portals_compat25.h>
+
+#include <linux/crc32.h>
+static inline __u32 ksocknal_csum(__u32 crc, unsigned char const *p, size_t len)
+{
+#if 1
+ return crc32_le(crc, p, len);
+#else
+ while (len-- > 0)
+ crc = ((crc + 0x100) & ~0xff) | ((crc + *p++) & 0xff) ;
+ return crc;
+#endif
+}
+
+#define SOCKNAL_WSPACE(sk) sk_stream_wspace(sk)
+#define SOCKNAL_MIN_WSPACE(sk) sk_stream_min_wspace(sk)
+
+/* assume one thread for each connection type */
+#define SOCKNAL_NSCHEDS 3
+#define SOCKNAL_NSCHEDS_HIGH (SOCKNAL_NSCHEDS << 1)
+
+#endif
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
new file mode 100644
index 000000000000..8a474f64abbe
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ *
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "socklnd.h"
+
+static int sock_timeout = 50;
+CFS_MODULE_PARM(sock_timeout, "i", int, 0644,
+ "dead socket timeout (seconds)");
+
+static int credits = 256;
+CFS_MODULE_PARM(credits, "i", int, 0444,
+ "# concurrent sends");
+
+static int peer_credits = 8;
+CFS_MODULE_PARM(peer_credits, "i", int, 0444,
+ "# concurrent sends to 1 peer");
+
+static int peer_buffer_credits = 0;
+CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
+ "# per-peer router buffer credits");
+
+static int peer_timeout = 180;
+CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
+ "Seconds without aliveness news to declare peer dead (<=0 to disable)");
+
+/* Number of daemons in each thread pool which is percpt,
+ * we will estimate reasonable value based on CPUs if it's not set. */
+static unsigned int nscheds;
+CFS_MODULE_PARM(nscheds, "i", int, 0444,
+ "# scheduler daemons in each pool while starting");
+
+static int nconnds = 4;
+CFS_MODULE_PARM(nconnds, "i", int, 0444,
+ "# connection daemons while starting");
+
+static int nconnds_max = 64;
+CFS_MODULE_PARM(nconnds_max, "i", int, 0444,
+ "max # connection daemons");
+
+static int min_reconnectms = 1000;
+CFS_MODULE_PARM(min_reconnectms, "i", int, 0644,
+ "min connection retry interval (mS)");
+
+static int max_reconnectms = 60000;
+CFS_MODULE_PARM(max_reconnectms, "i", int, 0644,
+ "max connection retry interval (mS)");
+
+# define DEFAULT_EAGER_ACK 0
+static int eager_ack = DEFAULT_EAGER_ACK;
+CFS_MODULE_PARM(eager_ack, "i", int, 0644,
+ "send tcp ack packets eagerly");
+
+static int typed_conns = 1;
+CFS_MODULE_PARM(typed_conns, "i", int, 0444,
+ "use different sockets for bulk");
+
+static int min_bulk = (1<<10);
+CFS_MODULE_PARM(min_bulk, "i", int, 0644,
+ "smallest 'large' message");
+
+# define DEFAULT_BUFFER_SIZE 0
+static int tx_buffer_size = DEFAULT_BUFFER_SIZE;
+CFS_MODULE_PARM(tx_buffer_size, "i", int, 0644,
+ "socket tx buffer size (0 for system default)");
+
+static int rx_buffer_size = DEFAULT_BUFFER_SIZE;
+CFS_MODULE_PARM(rx_buffer_size, "i", int, 0644,
+ "socket rx buffer size (0 for system default)");
+
+static int nagle = 0;
+CFS_MODULE_PARM(nagle, "i", int, 0644,
+ "enable NAGLE?");
+
+static int round_robin = 1;
+CFS_MODULE_PARM(round_robin, "i", int, 0644,
+ "Round robin for multiple interfaces");
+
+static int keepalive = 30;
+CFS_MODULE_PARM(keepalive, "i", int, 0644,
+ "# seconds before send keepalive");
+
+static int keepalive_idle = 30;
+CFS_MODULE_PARM(keepalive_idle, "i", int, 0644,
+ "# idle seconds before probe");
+
+#define DEFAULT_KEEPALIVE_COUNT 5
+static int keepalive_count = DEFAULT_KEEPALIVE_COUNT;
+CFS_MODULE_PARM(keepalive_count, "i", int, 0644,
+ "# missed probes == dead");
+
+static int keepalive_intvl = 5;
+CFS_MODULE_PARM(keepalive_intvl, "i", int, 0644,
+ "seconds between probes");
+
+static int enable_csum = 0;
+CFS_MODULE_PARM(enable_csum, "i", int, 0644,
+ "enable check sum");
+
+static int inject_csum_error = 0;
+CFS_MODULE_PARM(inject_csum_error, "i", int, 0644,
+ "set non-zero to inject a checksum error");
+
+static int nonblk_zcack = 1;
+CFS_MODULE_PARM(nonblk_zcack, "i", int, 0644,
+ "always send ZC-ACK on non-blocking connection");
+
+static unsigned int zc_min_payload = (16 << 10);
+CFS_MODULE_PARM(zc_min_payload, "i", int, 0644,
+ "minimum payload size to zero copy");
+
+static unsigned int zc_recv = 0;
+CFS_MODULE_PARM(zc_recv, "i", int, 0644,
+ "enable ZC recv for Chelsio driver");
+
+static unsigned int zc_recv_min_nfrags = 16;
+CFS_MODULE_PARM(zc_recv_min_nfrags, "i", int, 0644,
+ "minimum # of fragments to enable ZC recv");
+
+
+#if SOCKNAL_VERSION_DEBUG
+static int protocol = 3;
+CFS_MODULE_PARM(protocol, "i", int, 0644,
+ "protocol version");
+#endif
+
+ksock_tunables_t ksocknal_tunables;
+
+int ksocknal_tunables_init(void)
+{
+
+ /* initialize ksocknal_tunables structure */
+ ksocknal_tunables.ksnd_timeout = &sock_timeout;
+ ksocknal_tunables.ksnd_nscheds = &nscheds;
+ ksocknal_tunables.ksnd_nconnds = &nconnds;
+ ksocknal_tunables.ksnd_nconnds_max = &nconnds_max;
+ ksocknal_tunables.ksnd_min_reconnectms = &min_reconnectms;
+ ksocknal_tunables.ksnd_max_reconnectms = &max_reconnectms;
+ ksocknal_tunables.ksnd_eager_ack = &eager_ack;
+ ksocknal_tunables.ksnd_typed_conns = &typed_conns;
+ ksocknal_tunables.ksnd_min_bulk = &min_bulk;
+ ksocknal_tunables.ksnd_tx_buffer_size = &tx_buffer_size;
+ ksocknal_tunables.ksnd_rx_buffer_size = &rx_buffer_size;
+ ksocknal_tunables.ksnd_nagle = &nagle;
+ ksocknal_tunables.ksnd_round_robin = &round_robin;
+ ksocknal_tunables.ksnd_keepalive = &keepalive;
+ ksocknal_tunables.ksnd_keepalive_idle = &keepalive_idle;
+ ksocknal_tunables.ksnd_keepalive_count = &keepalive_count;
+ ksocknal_tunables.ksnd_keepalive_intvl = &keepalive_intvl;
+ ksocknal_tunables.ksnd_credits = &credits;
+ ksocknal_tunables.ksnd_peertxcredits = &peer_credits;
+ ksocknal_tunables.ksnd_peerrtrcredits = &peer_buffer_credits;
+ ksocknal_tunables.ksnd_peertimeout = &peer_timeout;
+ ksocknal_tunables.ksnd_enable_csum = &enable_csum;
+ ksocknal_tunables.ksnd_inject_csum_error = &inject_csum_error;
+ ksocknal_tunables.ksnd_nonblk_zcack = &nonblk_zcack;
+ ksocknal_tunables.ksnd_zc_min_payload = &zc_min_payload;
+ ksocknal_tunables.ksnd_zc_recv = &zc_recv;
+ ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags;
+
+
+
+#if SOCKNAL_VERSION_DEBUG
+ ksocknal_tunables.ksnd_protocol = &protocol;
+#endif
+
+#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
+ ksocknal_tunables.ksnd_sysctl = NULL;
+#endif
+
+ if (*ksocknal_tunables.ksnd_zc_min_payload < (2 << 10))
+ *ksocknal_tunables.ksnd_zc_min_payload = (2 << 10);
+
+ /* initialize platform-sepcific tunables */
+ return ksocknal_lib_tunables_init();
+};
+
+void ksocknal_tunables_fini(void)
+{
+ ksocknal_lib_tunables_fini();
+}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
new file mode 100644
index 000000000000..ec57179f8d2b
--- /dev/null
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
@@ -0,0 +1,797 @@
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ *
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "socklnd.h"
+
+/*
+ * Protocol entries :
+ * pro_send_hello : send hello message
+ * pro_recv_hello : receive hello message
+ * pro_pack : pack message header
+ * pro_unpack : unpack message header
+ * pro_queue_tx_zcack() : Called holding BH lock: kss_lock
+ * return 1 if ACK is piggybacked, otherwise return 0
+ * pro_queue_tx_msg() : Called holding BH lock: kss_lock
+ * return the ACK that piggybacked by my message, or NULL
+ * pro_handle_zcreq() : handler of incoming ZC-REQ
+ * pro_handle_zcack() : handler of incoming ZC-ACK
+ * pro_match_tx() : Called holding glock
+ */
+
+static ksock_tx_t *
+ksocknal_queue_tx_msg_v1(ksock_conn_t *conn, ksock_tx_t *tx_msg)
+{
+ /* V1.x, just enqueue it */
+ list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
+ return NULL;
+}
+
+void
+ksocknal_next_tx_carrier(ksock_conn_t *conn)
+{
+ ksock_tx_t *tx = conn->ksnc_tx_carrier;
+
+ /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
+ LASSERT (!list_empty(&conn->ksnc_tx_queue));
+ LASSERT (tx != NULL);
+
+ /* Next TX that can carry ZC-ACK or LNet message */
+ if (tx->tx_list.next == &conn->ksnc_tx_queue) {
+ /* no more packets queued */
+ conn->ksnc_tx_carrier = NULL;
+ } else {
+ conn->ksnc_tx_carrier = list_entry(tx->tx_list.next,
+ ksock_tx_t, tx_list);
+ LASSERT (conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type);
+ }
+}
+
+static int
+ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn,
+ ksock_tx_t *tx_ack, __u64 cookie)
+{
+ ksock_tx_t *tx = conn->ksnc_tx_carrier;
+
+ LASSERT (tx_ack == NULL ||
+ tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
+
+ /*
+ * Enqueue or piggyback tx_ack / cookie
+ * . no tx can piggyback cookie of tx_ack (or cookie), just
+ * enqueue the tx_ack (if tx_ack != NUL) and return NULL.
+ * . There is tx can piggyback cookie of tx_ack (or cookie),
+ * piggyback the cookie and return the tx.
+ */
+ if (tx == NULL) {
+ if (tx_ack != NULL) {
+ list_add_tail(&tx_ack->tx_list,
+ &conn->ksnc_tx_queue);
+ conn->ksnc_tx_carrier = tx_ack;
+ }
+ return 0;
+ }
+
+ if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
+ /* tx is noop zc-ack, can't piggyback zc-ack cookie */
+ if (tx_ack != NULL)
+ list_add_tail(&tx_ack->tx_list,
+ &conn->ksnc_tx_queue);
+ return 0;
+ }
+
+ LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET);
+ LASSERT(tx->tx_msg.ksm_zc_cookies[1] == 0);
+
+ if (tx_ack != NULL)
+ cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
+
+ /* piggyback the zc-ack cookie */
+ tx->tx_msg.ksm_zc_cookies[1] = cookie;
+ /* move on to the next TX which can carry cookie */
+ ksocknal_next_tx_carrier(conn);
+
+ return 1;
+}
+
+static ksock_tx_t *
+ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg)
+{
+ ksock_tx_t *tx = conn->ksnc_tx_carrier;
+
+ /*
+ * Enqueue tx_msg:
+ * . If there is no NOOP on the connection, just enqueue
+ * tx_msg and return NULL
+ * . If there is NOOP on the connection, piggyback the cookie
+ * and replace the NOOP tx, and return the NOOP tx.
+ */
+ if (tx == NULL) { /* nothing on queue */
+ list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
+ conn->ksnc_tx_carrier = tx_msg;
+ return NULL;
+ }
+
+ if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */
+ list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
+ return NULL;
+ }
+
+ LASSERT (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
+
+ /* There is a noop zc-ack can be piggybacked */
+ tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1];
+ ksocknal_next_tx_carrier(conn);
+
+ /* use new_tx to replace the noop zc-ack packet */
+ list_add(&tx_msg->tx_list, &tx->tx_list);
+ list_del(&tx->tx_list);
+
+ return tx;
+}
+
+static int
+ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn,
+ ksock_tx_t *tx_ack, __u64 cookie)
+{
+ ksock_tx_t *tx;
+
+ if (conn->ksnc_type != SOCKLND_CONN_ACK)
+ return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie);
+
+ /* non-blocking ZC-ACK (to router) */
+ LASSERT (tx_ack == NULL ||
+ tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
+
+ if ((tx = conn->ksnc_tx_carrier) == NULL) {
+ if (tx_ack != NULL) {
+ list_add_tail(&tx_ack->tx_list,
+ &conn->ksnc_tx_queue);
+ conn->ksnc_tx_carrier = tx_ack;
+ }
+ return 0;
+ }
+
+ /* conn->ksnc_tx_carrier != NULL */
+
+ if (tx_ack != NULL)
+ cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
+
+ if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */
+ return 1;
+
+ if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) {
+ /* replace the keepalive PING with a real ACK */
+ LASSERT (tx->tx_msg.ksm_zc_cookies[0] == 0);
+ tx->tx_msg.ksm_zc_cookies[1] = cookie;
+ return 1;
+ }
+
+ if (cookie == tx->tx_msg.ksm_zc_cookies[0] ||
+ cookie == tx->tx_msg.ksm_zc_cookies[1]) {
+ CWARN("%s: duplicated ZC cookie: "LPU64"\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
+ return 1; /* XXX return error in the future */
+ }
+
+ if (tx->tx_msg.ksm_zc_cookies[0] == 0) {
+ /* NOOP tx has only one ZC-ACK cookie, can carry at least one more */
+ if (tx->tx_msg.ksm_zc_cookies[1] > cookie) {
+ tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1];
+ tx->tx_msg.ksm_zc_cookies[1] = cookie;
+ } else {
+ tx->tx_msg.ksm_zc_cookies[0] = cookie;
+ }
+
+ if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) {
+ /* not likely to carry more ACKs, skip it to simplify logic */
+ ksocknal_next_tx_carrier(conn);
+ }
+
+ return 1;
+ }
+
+ /* takes two or more cookies already */
+
+ if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) {
+ __u64 tmp = 0;
+
+ /* two seperated cookies: (a+2, a) or (a+1, a) */
+ LASSERT (tx->tx_msg.ksm_zc_cookies[0] -
+ tx->tx_msg.ksm_zc_cookies[1] <= 2);
+
+ if (tx->tx_msg.ksm_zc_cookies[0] -
+ tx->tx_msg.ksm_zc_cookies[1] == 2) {
+ if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1)
+ tmp = cookie;
+ } else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) {
+ tmp = tx->tx_msg.ksm_zc_cookies[1];
+ } else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) {
+ tmp = tx->tx_msg.ksm_zc_cookies[0];
+ }
+
+ if (tmp != 0) {
+ /* range of cookies */
+ tx->tx_msg.ksm_zc_cookies[0] = tmp - 1;
+ tx->tx_msg.ksm_zc_cookies[1] = tmp + 1;
+ return 1;
+ }
+
+ } else {
+ /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */
+ if (cookie >= tx->tx_msg.ksm_zc_cookies[0] &&
+ cookie <= tx->tx_msg.ksm_zc_cookies[1]) {
+ CWARN("%s: duplicated ZC cookie: "LPU64"\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
+ return 1; /* XXX: return error in the future */
+ }
+
+ if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) {
+ tx->tx_msg.ksm_zc_cookies[1] = cookie;
+ return 1;
+ }
+
+ if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) {
+ tx->tx_msg.ksm_zc_cookies[0] = cookie;
+ return 1;
+ }
+ }
+
+ /* failed to piggyback ZC-ACK */
+ if (tx_ack != NULL) {
+ list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue);
+ /* the next tx can piggyback at least 1 ACK */
+ ksocknal_next_tx_carrier(conn);
+ }
+
+ return 0;
+}
+
+static int
+ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
+{
+ int nob;
+
+#if SOCKNAL_VERSION_DEBUG
+ if (!*ksocknal_tunables.ksnd_typed_conns)
+ return SOCKNAL_MATCH_YES;
+#endif
+
+ if (tx == NULL || tx->tx_lnetmsg == NULL) {
+ /* noop packet */
+ nob = offsetof(ksock_msg_t, ksm_u);
+ } else {
+ nob = tx->tx_lnetmsg->msg_len +
+ ((conn->ksnc_proto == &ksocknal_protocol_v1x) ?
+ sizeof(lnet_hdr_t) : sizeof(ksock_msg_t));
+ }
+
+ /* default checking for typed connection */
+ switch (conn->ksnc_type) {
+ default:
+ CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
+ LBUG();
+ case SOCKLND_CONN_ANY:
+ return SOCKNAL_MATCH_YES;
+
+ case SOCKLND_CONN_BULK_IN:
+ return SOCKNAL_MATCH_MAY;
+
+ case SOCKLND_CONN_BULK_OUT:
+ if (nob < *ksocknal_tunables.ksnd_min_bulk)
+ return SOCKNAL_MATCH_MAY;
+ else
+ return SOCKNAL_MATCH_YES;
+
+ case SOCKLND_CONN_CONTROL:
+ if (nob >= *ksocknal_tunables.ksnd_min_bulk)
+ return SOCKNAL_MATCH_MAY;
+ else
+ return SOCKNAL_MATCH_YES;
+ }
+}
+
+static int
+ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
+{
+ int nob;
+
+ if (tx == NULL || tx->tx_lnetmsg == NULL)
+ nob = offsetof(ksock_msg_t, ksm_u);
+ else
+ nob = tx->tx_lnetmsg->msg_len + sizeof(ksock_msg_t);
+
+ switch (conn->ksnc_type) {
+ default:
+ CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
+ LBUG();
+ case SOCKLND_CONN_ANY:
+ return SOCKNAL_MATCH_NO;
+
+ case SOCKLND_CONN_ACK:
+ if (nonblk)
+ return SOCKNAL_MATCH_YES;
+ else if (tx == NULL || tx->tx_lnetmsg == NULL)
+ return SOCKNAL_MATCH_MAY;
+ else
+ return SOCKNAL_MATCH_NO;
+
+ case SOCKLND_CONN_BULK_OUT:
+ if (nonblk)
+ return SOCKNAL_MATCH_NO;
+ else if (nob < *ksocknal_tunables.ksnd_min_bulk)
+ return SOCKNAL_MATCH_MAY;
+ else
+ return SOCKNAL_MATCH_YES;
+
+ case SOCKLND_CONN_CONTROL:
+ if (nonblk)
+ return SOCKNAL_MATCH_NO;
+ else if (nob >= *ksocknal_tunables.ksnd_min_bulk)
+ return SOCKNAL_MATCH_MAY;
+ else
+ return SOCKNAL_MATCH_YES;
+ }
+}
+
+/* (Sink) handle incoming ZC request from sender */
+static int
+ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote)
+{
+ ksock_peer_t *peer = c->ksnc_peer;
+ ksock_conn_t *conn;
+ ksock_tx_t *tx;
+ int rc;
+
+ read_lock(&ksocknal_data.ksnd_global_lock);
+
+ conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
+ if (conn != NULL) {
+ ksock_sched_t *sched = conn->ksnc_scheduler;
+
+ LASSERT(conn->ksnc_proto->pro_queue_tx_zcack != NULL);
+
+ spin_lock_bh(&sched->kss_lock);
+
+ rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie);
+
+ spin_unlock_bh(&sched->kss_lock);
+
+ if (rc) { /* piggybacked */
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+ return 0;
+ }
+ }
+
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+
+ /* ACK connection is not ready, or can't piggyback the ACK */
+ tx = ksocknal_alloc_tx_noop(cookie, !!remote);
+ if (tx == NULL)
+ return -ENOMEM;
+
+ if ((rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id)) == 0)
+ return 0;
+
+ ksocknal_free_tx(tx);
+ return rc;
+}
+
+/* (Sender) handle ZC_ACK from sink */
+static int
+ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2)
+{
+ ksock_peer_t *peer = conn->ksnc_peer;
+ ksock_tx_t *tx;
+ ksock_tx_t *tmp;
+ LIST_HEAD (zlist);
+ int count;
+
+ if (cookie1 == 0)
+ cookie1 = cookie2;
+
+ count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1);
+
+ if (cookie2 == SOCKNAL_KEEPALIVE_PING &&
+ conn->ksnc_proto == &ksocknal_protocol_v3x) {
+ /* keepalive PING for V3.x, just ignore it */
+ return count == 1 ? 0 : -EPROTO;
+ }
+
+ spin_lock(&peer->ksnp_lock);
+
+ list_for_each_entry_safe(tx, tmp,
+ &peer->ksnp_zc_req_list, tx_zc_list) {
+ __u64 c = tx->tx_msg.ksm_zc_cookies[0];
+
+ if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) {
+ tx->tx_msg.ksm_zc_cookies[0] = 0;
+ list_del(&tx->tx_zc_list);
+ list_add(&tx->tx_zc_list, &zlist);
+
+ if (--count == 0)
+ break;
+ }
+ }
+
+ spin_unlock(&peer->ksnp_lock);
+
+ while (!list_empty(&zlist)) {
+ tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
+ list_del(&tx->tx_zc_list);
+ ksocknal_tx_decref(tx);
+ }
+
+ return count == 0 ? 0 : -EPROTO;
+}
+
+static int
+ksocknal_send_hello_v1 (ksock_conn_t *conn, ksock_hello_msg_t *hello)
+{
+ socket_t *sock = conn->ksnc_sock;
+ lnet_hdr_t *hdr;
+ lnet_magicversion_t *hmv;
+ int rc;
+ int i;
+
+ CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid));
+
+ LIBCFS_ALLOC(hdr, sizeof(*hdr));
+ if (hdr == NULL) {
+ CERROR("Can't allocate lnet_hdr_t\n");
+ return -ENOMEM;
+ }
+
+ hmv = (lnet_magicversion_t *)&hdr->dest_nid;
+
+ /* Re-organize V2.x message header to V1.x (lnet_hdr_t)
+ * header and send out */
+ hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC);
+ hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR);
+ hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR);
+
+ if (the_lnet.ln_testprotocompat != 0) {
+ /* single-shot proto check */
+ LNET_LOCK();
+ if ((the_lnet.ln_testprotocompat & 1) != 0) {
+ hmv->version_major++; /* just different! */
+ the_lnet.ln_testprotocompat &= ~1;
+ }
+ if ((the_lnet.ln_testprotocompat & 2) != 0) {
+ hmv->magic = LNET_PROTO_MAGIC;
+ the_lnet.ln_testprotocompat &= ~2;
+ }
+ LNET_UNLOCK();
+ }
+
+ hdr->src_nid = cpu_to_le64 (hello->kshm_src_nid);
+ hdr->src_pid = cpu_to_le32 (hello->kshm_src_pid);
+ hdr->type = cpu_to_le32 (LNET_MSG_HELLO);
+ hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32));
+ hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype);
+ hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation);
+
+ rc = libcfs_sock_write(sock, hdr, sizeof(*hdr),lnet_acceptor_timeout());
+
+ if (rc != 0) {
+ CNETERR("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
+ goto out;
+ }
+
+ if (hello->kshm_nips == 0)
+ goto out;
+
+ for (i = 0; i < (int) hello->kshm_nips; i++) {
+ hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]);
+ }
+
+ rc = libcfs_sock_write(sock, hello->kshm_ips,
+ hello->kshm_nips * sizeof(__u32),
+ lnet_acceptor_timeout());
+ if (rc != 0) {
+ CNETERR("Error %d sending HELLO payload (%d)"
+ " to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips,
+ HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
+ }
+out:
+ LIBCFS_FREE(hdr, sizeof(*hdr));
+
+ return rc;
+}
+
+static int
+ksocknal_send_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello)
+{
+ socket_t *sock = conn->ksnc_sock;
+ int rc;
+
+ hello->kshm_magic = LNET_PROTO_MAGIC;
+ hello->kshm_version = conn->ksnc_proto->pro_version;
+
+ if (the_lnet.ln_testprotocompat != 0) {
+ /* single-shot proto check */
+ LNET_LOCK();
+ if ((the_lnet.ln_testprotocompat & 1) != 0) {
+ hello->kshm_version++; /* just different! */
+ the_lnet.ln_testprotocompat &= ~1;
+ }
+ LNET_UNLOCK();
+ }
+
+ rc = libcfs_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips),
+ lnet_acceptor_timeout());
+
+ if (rc != 0) {
+ CNETERR("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
+ return rc;
+ }
+
+ if (hello->kshm_nips == 0)
+ return 0;
+
+ rc = libcfs_sock_write(sock, hello->kshm_ips,
+ hello->kshm_nips * sizeof(__u32),
+ lnet_acceptor_timeout());
+ if (rc != 0) {
+ CNETERR("Error %d sending HELLO payload (%d)"
+ " to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips,
+ HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
+ }
+
+ return rc;
+}
+
+static int
+ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello,int timeout)
+{
+ socket_t *sock = conn->ksnc_sock;
+ lnet_hdr_t *hdr;
+ int rc;
+ int i;
+
+ LIBCFS_ALLOC(hdr, sizeof(*hdr));
+ if (hdr == NULL) {
+ CERROR("Can't allocate lnet_hdr_t\n");
+ return -ENOMEM;
+ }
+
+ rc = libcfs_sock_read(sock, &hdr->src_nid,
+ sizeof (*hdr) - offsetof (lnet_hdr_t, src_nid),
+ timeout);
+ if (rc != 0) {
+ CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0 && rc != -EALREADY);
+ goto out;
+ }
+
+ /* ...and check we got what we expected */
+ if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) {
+ CERROR ("Expecting a HELLO hdr,"
+ " but got type %d from %u.%u.%u.%u\n",
+ le32_to_cpu (hdr->type),
+ HIPQUAD(conn->ksnc_ipaddr));
+ rc = -EPROTO;
+ goto out;
+ }
+
+ hello->kshm_src_nid = le64_to_cpu (hdr->src_nid);
+ hello->kshm_src_pid = le32_to_cpu (hdr->src_pid);
+ hello->kshm_src_incarnation = le64_to_cpu (hdr->msg.hello.incarnation);
+ hello->kshm_ctype = le32_to_cpu (hdr->msg.hello.type);
+ hello->kshm_nips = le32_to_cpu (hdr->payload_length) /
+ sizeof (__u32);
+
+ if (hello->kshm_nips > LNET_MAX_INTERFACES) {
+ CERROR("Bad nips %d from ip %u.%u.%u.%u\n",
+ hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr));
+ rc = -EPROTO;
+ goto out;
+ }
+
+ if (hello->kshm_nips == 0)
+ goto out;
+
+ rc = libcfs_sock_read(sock, hello->kshm_ips,
+ hello->kshm_nips * sizeof(__u32), timeout);
+ if (rc != 0) {
+ CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0 && rc != -EALREADY);
+ goto out;
+ }
+
+ for (i = 0; i < (int) hello->kshm_nips; i++) {
+ hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
+
+ if (hello->kshm_ips[i] == 0) {
+ CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n",
+ i, HIPQUAD(conn->ksnc_ipaddr));
+ rc = -EPROTO;
+ break;
+ }
+ }
+out:
+ LIBCFS_FREE(hdr, sizeof(*hdr));
+
+ return rc;
+}
+
+static int
+ksocknal_recv_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout)
+{
+ socket_t *sock = conn->ksnc_sock;
+ int rc;
+ int i;
+
+ if (hello->kshm_magic == LNET_PROTO_MAGIC)
+ conn->ksnc_flip = 0;
+ else
+ conn->ksnc_flip = 1;
+
+ rc = libcfs_sock_read(sock, &hello->kshm_src_nid,
+ offsetof(ksock_hello_msg_t, kshm_ips) -
+ offsetof(ksock_hello_msg_t, kshm_src_nid),
+ timeout);
+ if (rc != 0) {
+ CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0 && rc != -EALREADY);
+ return rc;
+ }
+
+ if (conn->ksnc_flip) {
+ __swab32s(&hello->kshm_src_pid);
+ __swab64s(&hello->kshm_src_nid);
+ __swab32s(&hello->kshm_dst_pid);
+ __swab64s(&hello->kshm_dst_nid);
+ __swab64s(&hello->kshm_src_incarnation);
+ __swab64s(&hello->kshm_dst_incarnation);
+ __swab32s(&hello->kshm_ctype);
+ __swab32s(&hello->kshm_nips);
+ }
+
+ if (hello->kshm_nips > LNET_MAX_INTERFACES) {
+ CERROR("Bad nips %d from ip %u.%u.%u.%u\n",
+ hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr));
+ return -EPROTO;
+ }
+
+ if (hello->kshm_nips == 0)
+ return 0;
+
+ rc = libcfs_sock_read(sock, hello->kshm_ips,
+ hello->kshm_nips * sizeof(__u32), timeout);
+ if (rc != 0) {
+ CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0 && rc != -EALREADY);
+ return rc;
+ }
+
+ for (i = 0; i < (int) hello->kshm_nips; i++) {
+ if (conn->ksnc_flip)
+ __swab32s(&hello->kshm_ips[i]);
+
+ if (hello->kshm_ips[i] == 0) {
+ CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n",
+ i, HIPQUAD(conn->ksnc_ipaddr));
+ return -EPROTO;
+ }
+ }
+
+ return 0;
+}
+
+static void
+ksocknal_pack_msg_v1(ksock_tx_t *tx)
+{
+ /* V1.x has no KSOCK_MSG_NOOP */
+ LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
+ LASSERT(tx->tx_lnetmsg != NULL);
+
+ tx->tx_iov[0].iov_base = (void *)&tx->tx_lnetmsg->msg_hdr;
+ tx->tx_iov[0].iov_len = sizeof(lnet_hdr_t);
+
+ tx->tx_resid = tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t);
+}
+
+static void
+ksocknal_pack_msg_v2(ksock_tx_t *tx)
+{
+ tx->tx_iov[0].iov_base = (void *)&tx->tx_msg;
+
+ if (tx->tx_lnetmsg != NULL) {
+ LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
+
+ tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
+ tx->tx_iov[0].iov_len = sizeof(ksock_msg_t);
+ tx->tx_resid = tx->tx_nob = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len;
+ } else {
+ LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
+
+ tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
+ tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
+ }
+ /* Don't checksum before start sending, because packet can be piggybacked with ACK */
+}
+
+static void
+ksocknal_unpack_msg_v1(ksock_msg_t *msg)
+{
+ msg->ksm_csum = 0;
+ msg->ksm_type = KSOCK_MSG_LNET;
+ msg->ksm_zc_cookies[0] = msg->ksm_zc_cookies[1] = 0;
+}
+
+static void
+ksocknal_unpack_msg_v2(ksock_msg_t *msg)
+{
+ return; /* Do nothing */
+}
+
+ksock_proto_t ksocknal_protocol_v1x =
+{
+ .pro_version = KSOCK_PROTO_V1,
+ .pro_send_hello = ksocknal_send_hello_v1,
+ .pro_recv_hello = ksocknal_recv_hello_v1,
+ .pro_pack = ksocknal_pack_msg_v1,
+ .pro_unpack = ksocknal_unpack_msg_v1,
+ .pro_queue_tx_msg = ksocknal_queue_tx_msg_v1,
+ .pro_handle_zcreq = NULL,
+ .pro_handle_zcack = NULL,
+ .pro_queue_tx_zcack = NULL,
+ .pro_match_tx = ksocknal_match_tx
+};
+
+ksock_proto_t ksocknal_protocol_v2x =
+{
+ .pro_version = KSOCK_PROTO_V2,
+ .pro_send_hello = ksocknal_send_hello_v2,
+ .pro_recv_hello = ksocknal_recv_hello_v2,
+ .pro_pack = ksocknal_pack_msg_v2,
+ .pro_unpack = ksocknal_unpack_msg_v2,
+ .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2,
+ .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v2,
+ .pro_handle_zcreq = ksocknal_handle_zcreq,
+ .pro_handle_zcack = ksocknal_handle_zcack,
+ .pro_match_tx = ksocknal_match_tx
+};
+
+ksock_proto_t ksocknal_protocol_v3x =
+{
+ .pro_version = KSOCK_PROTO_V3,
+ .pro_send_hello = ksocknal_send_hello_v2,
+ .pro_recv_hello = ksocknal_recv_hello_v2,
+ .pro_pack = ksocknal_pack_msg_v2,
+ .pro_unpack = ksocknal_unpack_msg_v2,
+ .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2,
+ .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3,
+ .pro_handle_zcreq = ksocknal_handle_zcreq,
+ .pro_handle_zcack = ksocknal_handle_zcack,
+ .pro_match_tx = ksocknal_match_tx_v3
+};
diff --git a/drivers/staging/lustre/lnet/lnet/Makefile b/drivers/staging/lustre/lnet/lnet/Makefile
new file mode 100644
index 000000000000..1bd9ef774208
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/Makefile
@@ -0,0 +1,8 @@
+obj-$(CONFIG_LNET) += lnet.o
+
+lnet-y := api-errno.o api-ni.o config.o lib-me.o lib-msg.o lib-eq.o \
+ lib-md.o lib-ptl.o lib-move.o module.o lo.o router.o \
+ router_proc.o acceptor.o peer.o
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lnet/lnet/acceptor.c b/drivers/staging/lustre/lnet/lnet/acceptor.c
new file mode 100644
index 000000000000..81ef28bbcba0
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/acceptor.c
@@ -0,0 +1,527 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/lnet/lib-lnet.h>
+
+
+static int accept_port = 988;
+static int accept_backlog = 127;
+static int accept_timeout = 5;
+
+struct {
+ int pta_shutdown;
+ socket_t *pta_sock;
+ struct completion pta_signal;
+} lnet_acceptor_state;
+
+int
+lnet_acceptor_port(void)
+{
+ return accept_port;
+}
+
+static inline int
+lnet_accept_magic(__u32 magic, __u32 constant)
+{
+ return (magic == constant ||
+ magic == __swab32(constant));
+}
+
+
+EXPORT_SYMBOL(lnet_acceptor_port);
+
+static char *accept = "secure";
+
+CFS_MODULE_PARM(accept, "s", charp, 0444,
+ "Accept connections (secure|all|none)");
+CFS_MODULE_PARM(accept_port, "i", int, 0444,
+ "Acceptor's port (same on all nodes)");
+CFS_MODULE_PARM(accept_backlog, "i", int, 0444,
+ "Acceptor's listen backlog");
+CFS_MODULE_PARM(accept_timeout, "i", int, 0644,
+ "Acceptor's timeout (seconds)");
+
+static char *accept_type = NULL;
+
+int
+lnet_acceptor_get_tunables(void)
+{
+ /* Userland acceptor uses 'accept_type' instead of 'accept', due to
+ * conflict with 'accept(2)', but kernel acceptor still uses 'accept'
+ * for compatibility. Hence the trick. */
+ accept_type = accept;
+ return 0;
+}
+
+int
+lnet_acceptor_timeout(void)
+{
+ return accept_timeout;
+}
+EXPORT_SYMBOL(lnet_acceptor_timeout);
+
+void
+lnet_connect_console_error (int rc, lnet_nid_t peer_nid,
+ __u32 peer_ip, int peer_port)
+{
+ switch (rc) {
+ /* "normal" errors */
+ case -ECONNREFUSED:
+ CNETERR("Connection to %s at host %u.%u.%u.%u on port %d was "
+ "refused: check that Lustre is running on that node.\n",
+ libcfs_nid2str(peer_nid),
+ HIPQUAD(peer_ip), peer_port);
+ break;
+ case -EHOSTUNREACH:
+ case -ENETUNREACH:
+ CNETERR("Connection to %s at host %u.%u.%u.%u "
+ "was unreachable: the network or that node may "
+ "be down, or Lustre may be misconfigured.\n",
+ libcfs_nid2str(peer_nid), HIPQUAD(peer_ip));
+ break;
+ case -ETIMEDOUT:
+ CNETERR("Connection to %s at host %u.%u.%u.%u on "
+ "port %d took too long: that node may be hung "
+ "or experiencing high load.\n",
+ libcfs_nid2str(peer_nid),
+ HIPQUAD(peer_ip), peer_port);
+ break;
+ case -ECONNRESET:
+ LCONSOLE_ERROR_MSG(0x11b, "Connection to %s at host %u.%u.%u.%u"
+ " on port %d was reset: "
+ "is it running a compatible version of "
+ "Lustre and is %s one of its NIDs?\n",
+ libcfs_nid2str(peer_nid),
+ HIPQUAD(peer_ip), peer_port,
+ libcfs_nid2str(peer_nid));
+ break;
+ case -EPROTO:
+ LCONSOLE_ERROR_MSG(0x11c, "Protocol error connecting to %s at "
+ "host %u.%u.%u.%u on port %d: is it running "
+ "a compatible version of Lustre?\n",
+ libcfs_nid2str(peer_nid),
+ HIPQUAD(peer_ip), peer_port);
+ break;
+ case -EADDRINUSE:
+ LCONSOLE_ERROR_MSG(0x11d, "No privileged ports available to "
+ "connect to %s at host %u.%u.%u.%u on port "
+ "%d\n", libcfs_nid2str(peer_nid),
+ HIPQUAD(peer_ip), peer_port);
+ break;
+ default:
+ LCONSOLE_ERROR_MSG(0x11e, "Unexpected error %d connecting to %s"
+ " at host %u.%u.%u.%u on port %d\n", rc,
+ libcfs_nid2str(peer_nid),
+ HIPQUAD(peer_ip), peer_port);
+ break;
+ }
+}
+EXPORT_SYMBOL(lnet_connect_console_error);
+
+int
+lnet_connect(socket_t **sockp, lnet_nid_t peer_nid,
+ __u32 local_ip, __u32 peer_ip, int peer_port)
+{
+ lnet_acceptor_connreq_t cr;
+ socket_t *sock;
+ int rc;
+ int port;
+ int fatal;
+
+ CLASSERT (sizeof(cr) <= 16); /* not too big to be on the stack */
+
+ for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT;
+ port >= LNET_ACCEPTOR_MIN_RESERVED_PORT;
+ --port) {
+ /* Iterate through reserved ports. */
+
+ rc = libcfs_sock_connect(&sock, &fatal,
+ local_ip, port,
+ peer_ip, peer_port);
+ if (rc != 0) {
+ if (fatal)
+ goto failed;
+ continue;
+ }
+
+ CLASSERT (LNET_PROTO_ACCEPTOR_VERSION == 1);
+
+ cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
+ cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
+ cr.acr_nid = peer_nid;
+
+ if (the_lnet.ln_testprotocompat != 0) {
+ /* single-shot proto check */
+ lnet_net_lock(LNET_LOCK_EX);
+ if ((the_lnet.ln_testprotocompat & 4) != 0) {
+ cr.acr_version++;
+ the_lnet.ln_testprotocompat &= ~4;
+ }
+ if ((the_lnet.ln_testprotocompat & 8) != 0) {
+ cr.acr_magic = LNET_PROTO_MAGIC;
+ the_lnet.ln_testprotocompat &= ~8;
+ }
+ lnet_net_unlock(LNET_LOCK_EX);
+ }
+
+ rc = libcfs_sock_write(sock, &cr, sizeof(cr),
+ accept_timeout);
+ if (rc != 0)
+ goto failed_sock;
+
+ *sockp = sock;
+ return 0;
+ }
+
+ rc = -EADDRINUSE;
+ goto failed;
+
+ failed_sock:
+ libcfs_sock_release(sock);
+ failed:
+ lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port);
+ return rc;
+}
+EXPORT_SYMBOL(lnet_connect);
+
+
+/* Below is the code common for both kernel and MT user-space */
+
+int
+lnet_accept(socket_t *sock, __u32 magic)
+{
+ lnet_acceptor_connreq_t cr;
+ __u32 peer_ip;
+ int peer_port;
+ int rc;
+ int flip;
+ lnet_ni_t *ni;
+ char *str;
+
+ LASSERT (sizeof(cr) <= 16); /* not too big for the stack */
+
+ rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
+ LASSERT (rc == 0); /* we succeeded before */
+
+ if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) {
+
+ if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) {
+ /* future version compatibility!
+ * When LNET unifies protocols over all LNDs, the first
+ * thing sent will be a version query. I send back
+ * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old" */
+
+ memset (&cr, 0, sizeof(cr));
+ cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
+ cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
+ rc = libcfs_sock_write(sock, &cr, sizeof(cr),
+ accept_timeout);
+
+ if (rc != 0)
+ CERROR("Error sending magic+version in response"
+ "to LNET magic from %u.%u.%u.%u: %d\n",
+ HIPQUAD(peer_ip), rc);
+ return -EPROTO;
+ }
+
+ if (magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC))
+ str = "'old' socknal/tcpnal";
+ else if (lnet_accept_magic(magic, LNET_PROTO_RA_MAGIC))
+ str = "'old' ranal";
+ else
+ str = "unrecognised";
+
+ LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %u.%u.%u.%u"
+ " magic %08x: %s acceptor protocol\n",
+ HIPQUAD(peer_ip), magic, str);
+ return -EPROTO;
+ }
+
+ flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC);
+
+ rc = libcfs_sock_read(sock, &cr.acr_version,
+ sizeof(cr.acr_version),
+ accept_timeout);
+ if (rc != 0) {
+ CERROR("Error %d reading connection request version from "
+ "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
+ return -EIO;
+ }
+
+ if (flip)
+ __swab32s(&cr.acr_version);
+
+ if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) {
+ /* future version compatibility!
+ * An acceptor-specific protocol rev will first send a version
+ * query. I send back my current version to tell her I'm
+ * "old". */
+ int peer_version = cr.acr_version;
+
+ memset (&cr, 0, sizeof(cr));
+ cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
+ cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
+
+ rc = libcfs_sock_write(sock, &cr, sizeof(cr),
+ accept_timeout);
+
+ if (rc != 0)
+ CERROR("Error sending magic+version in response"
+ "to version %d from %u.%u.%u.%u: %d\n",
+ peer_version, HIPQUAD(peer_ip), rc);
+ return -EPROTO;
+ }
+
+ rc = libcfs_sock_read(sock, &cr.acr_nid,
+ sizeof(cr) -
+ offsetof(lnet_acceptor_connreq_t, acr_nid),
+ accept_timeout);
+ if (rc != 0) {
+ CERROR("Error %d reading connection request from "
+ "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
+ return -EIO;
+ }
+
+ if (flip)
+ __swab64s(&cr.acr_nid);
+
+ ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
+ if (ni == NULL || /* no matching net */
+ ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
+ if (ni != NULL)
+ lnet_ni_decref(ni);
+ LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %u.%u.%u.%u"
+ " for %s: No matching NI\n",
+ HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid));
+ return -EPERM;
+ }
+
+ if (ni->ni_lnd->lnd_accept == NULL) {
+ /* This catches a request for the loopback LND */
+ lnet_ni_decref(ni);
+ LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %u.%u.%u.%u"
+ " for %s: NI doesn not accept IP connections\n",
+ HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid));
+ return -EPERM;
+ }
+
+ CDEBUG(D_NET, "Accept %s from %u.%u.%u.%u\n",
+ libcfs_nid2str(cr.acr_nid), HIPQUAD(peer_ip));
+
+ rc = ni->ni_lnd->lnd_accept(ni, sock);
+
+ lnet_ni_decref(ni);
+ return rc;
+}
+
+int
+lnet_acceptor(void *arg)
+{
+ socket_t *newsock;
+ int rc;
+ __u32 magic;
+ __u32 peer_ip;
+ int peer_port;
+ int secure = (int)((long_ptr_t)arg);
+
+ LASSERT (lnet_acceptor_state.pta_sock == NULL);
+
+ cfs_block_allsigs();
+
+ rc = libcfs_sock_listen(&lnet_acceptor_state.pta_sock,
+ 0, accept_port, accept_backlog);
+ if (rc != 0) {
+ if (rc == -EADDRINUSE)
+ LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port"
+ " %d: port already in use\n",
+ accept_port);
+ else
+ LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port "
+ "%d: unexpected error %d\n",
+ accept_port, rc);
+
+ lnet_acceptor_state.pta_sock = NULL;
+ } else {
+ LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port);
+ }
+
+ /* set init status and unblock parent */
+ lnet_acceptor_state.pta_shutdown = rc;
+ complete(&lnet_acceptor_state.pta_signal);
+
+ if (rc != 0)
+ return rc;
+
+ while (!lnet_acceptor_state.pta_shutdown) {
+
+ rc = libcfs_sock_accept(&newsock, lnet_acceptor_state.pta_sock);
+ if (rc != 0) {
+ if (rc != -EAGAIN) {
+ CWARN("Accept error %d: pausing...\n", rc);
+ cfs_pause(cfs_time_seconds(1));
+ }
+ continue;
+ }
+
+ /* maybe we're waken up with libcfs_sock_abort_accept() */
+ if (lnet_acceptor_state.pta_shutdown) {
+ libcfs_sock_release(newsock);
+ break;
+ }
+
+ rc = libcfs_sock_getaddr(newsock, 1, &peer_ip, &peer_port);
+ if (rc != 0) {
+ CERROR("Can't determine new connection's address\n");
+ goto failed;
+ }
+
+ if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
+ CERROR("Refusing connection from %u.%u.%u.%u: "
+ "insecure port %d\n",
+ HIPQUAD(peer_ip), peer_port);
+ goto failed;
+ }
+
+ rc = libcfs_sock_read(newsock, &magic, sizeof(magic),
+ accept_timeout);
+ if (rc != 0) {
+ CERROR("Error %d reading connection request from "
+ "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
+ goto failed;
+ }
+
+ rc = lnet_accept(newsock, magic);
+ if (rc != 0)
+ goto failed;
+
+ continue;
+
+ failed:
+ libcfs_sock_release(newsock);
+ }
+
+ libcfs_sock_release(lnet_acceptor_state.pta_sock);
+ lnet_acceptor_state.pta_sock = NULL;
+
+ CDEBUG(D_NET, "Acceptor stopping\n");
+
+ /* unblock lnet_acceptor_stop() */
+ complete(&lnet_acceptor_state.pta_signal);
+ return 0;
+}
+
+static inline int
+accept2secure(const char *acc, long *sec)
+{
+ if (!strcmp(acc, "secure")) {
+ *sec = 1;
+ return 1;
+ } else if (!strcmp(acc, "all")) {
+ *sec = 0;
+ return 1;
+ } else if (!strcmp(acc, "none")) {
+ return 0;
+ } else {
+ LCONSOLE_ERROR_MSG(0x124, "Can't parse 'accept=\"%s\"'\n",
+ acc);
+ return -EINVAL;
+ }
+}
+
+int
+lnet_acceptor_start(void)
+{
+ int rc;
+ long rc2;
+ long secure;
+
+ LASSERT (lnet_acceptor_state.pta_sock == NULL);
+
+ rc = lnet_acceptor_get_tunables();
+ if (rc != 0)
+ return rc;
+
+
+ init_completion(&lnet_acceptor_state.pta_signal);
+ rc = accept2secure(accept_type, &secure);
+ if (rc <= 0) {
+ fini_completion(&lnet_acceptor_state.pta_signal);
+ return rc;
+ }
+
+ if (lnet_count_acceptor_nis() == 0) /* not required */
+ return 0;
+
+ rc2 = PTR_ERR(kthread_run(lnet_acceptor,
+ (void *)(ulong_ptr_t)secure,
+ "acceptor_%03ld", secure));
+ if (IS_ERR_VALUE(rc2)) {
+ CERROR("Can't start acceptor thread: %ld\n", rc2);
+ fini_completion(&lnet_acceptor_state.pta_signal);
+
+ return -ESRCH;
+ }
+
+ /* wait for acceptor to startup */
+ wait_for_completion(&lnet_acceptor_state.pta_signal);
+
+ if (!lnet_acceptor_state.pta_shutdown) {
+ /* started OK */
+ LASSERT(lnet_acceptor_state.pta_sock != NULL);
+ return 0;
+ }
+
+ LASSERT(lnet_acceptor_state.pta_sock == NULL);
+ fini_completion(&lnet_acceptor_state.pta_signal);
+
+ return -ENETDOWN;
+}
+
+void
+lnet_acceptor_stop(void)
+{
+ if (lnet_acceptor_state.pta_sock == NULL) /* not running */
+ return;
+
+ lnet_acceptor_state.pta_shutdown = 1;
+ libcfs_sock_abort_accept(lnet_acceptor_state.pta_sock);
+
+ /* block until acceptor signals exit */
+ wait_for_completion(&lnet_acceptor_state.pta_signal);
+
+ fini_completion(&lnet_acceptor_state.pta_signal);
+}
diff --git a/drivers/staging/lustre/lnet/lnet/api-errno.c b/drivers/staging/lustre/lnet/lnet/api-errno.c
new file mode 100644
index 000000000000..695b27265e23
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/api-errno.c
@@ -0,0 +1,39 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/lnet/api-errno.c
+ *
+ * Instantiate the string table of errors
+ */
+
+/* If you change these, you must update the number table in portals/errno.h */
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
new file mode 100644
index 000000000000..e88bee362497
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -0,0 +1,1941 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/lnet/lib-lnet.h>
+#include <linux/log2.h>
+
+#define D_LNI D_CONSOLE
+
+lnet_t the_lnet; /* THE state of the network */
+EXPORT_SYMBOL(the_lnet);
+
+
+static char *ip2nets = "";
+CFS_MODULE_PARM(ip2nets, "s", charp, 0444,
+ "LNET network <- IP table");
+
+static char *networks = "";
+CFS_MODULE_PARM(networks, "s", charp, 0444,
+ "local networks");
+
+static char *routes = "";
+CFS_MODULE_PARM(routes, "s", charp, 0444,
+ "routes to non-local networks");
+
+static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
+CFS_MODULE_PARM(rnet_htable_size, "i", int, 0444,
+ "size of remote network hash table");
+
+char *
+lnet_get_routes(void)
+{
+ return routes;
+}
+
+char *
+lnet_get_networks(void)
+{
+ char *nets;
+ int rc;
+
+ if (*networks != 0 && *ip2nets != 0) {
+ LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
+ "'ip2nets' but not both at once\n");
+ return NULL;
+ }
+
+ if (*ip2nets != 0) {
+ rc = lnet_parse_ip2nets(&nets, ip2nets);
+ return (rc == 0) ? nets : NULL;
+ }
+
+ if (*networks != 0)
+ return networks;
+
+ return "tcp";
+}
+
+void
+lnet_init_locks(void)
+{
+ spin_lock_init(&the_lnet.ln_eq_wait_lock);
+ init_waitqueue_head(&the_lnet.ln_eq_waitq);
+ mutex_init(&the_lnet.ln_lnd_mutex);
+ mutex_init(&the_lnet.ln_api_mutex);
+}
+
+void
+lnet_fini_locks(void)
+{
+}
+
+
+static int
+lnet_create_remote_nets_table(void)
+{
+ int i;
+ struct list_head *hash;
+
+ LASSERT(the_lnet.ln_remote_nets_hash == NULL);
+ LASSERT(the_lnet.ln_remote_nets_hbits > 0);
+ LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
+ if (hash == NULL) {
+ CERROR("Failed to create remote nets hash table\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&hash[i]);
+ the_lnet.ln_remote_nets_hash = hash;
+ return 0;
+}
+
+static void
+lnet_destroy_remote_nets_table(void)
+{
+ int i;
+ struct list_head *hash;
+
+ if (the_lnet.ln_remote_nets_hash == NULL)
+ return;
+
+ for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
+ LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
+
+ LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
+ LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
+ the_lnet.ln_remote_nets_hash = NULL;
+}
+
+static void
+lnet_destroy_locks(void)
+{
+ if (the_lnet.ln_res_lock != NULL) {
+ cfs_percpt_lock_free(the_lnet.ln_res_lock);
+ the_lnet.ln_res_lock = NULL;
+ }
+
+ if (the_lnet.ln_net_lock != NULL) {
+ cfs_percpt_lock_free(the_lnet.ln_net_lock);
+ the_lnet.ln_net_lock = NULL;
+ }
+
+ lnet_fini_locks();
+}
+
+static int
+lnet_create_locks(void)
+{
+ lnet_init_locks();
+
+ the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
+ if (the_lnet.ln_res_lock == NULL)
+ goto failed;
+
+ the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
+ if (the_lnet.ln_net_lock == NULL)
+ goto failed;
+
+ return 0;
+
+ failed:
+ lnet_destroy_locks();
+ return -ENOMEM;
+}
+
+void lnet_assert_wire_constants (void)
+{
+ /* Wire protocol assertions generated by 'wirecheck'
+ * running on Linux robert.bartonsoftware.com 2.6.8-1.521
+ * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
+ * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
+
+ /* Constants... */
+ CLASSERT (LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
+ CLASSERT (LNET_PROTO_TCP_VERSION_MAJOR == 1);
+ CLASSERT (LNET_PROTO_TCP_VERSION_MINOR == 0);
+ CLASSERT (LNET_MSG_ACK == 0);
+ CLASSERT (LNET_MSG_PUT == 1);
+ CLASSERT (LNET_MSG_GET == 2);
+ CLASSERT (LNET_MSG_REPLY == 3);
+ CLASSERT (LNET_MSG_HELLO == 4);
+
+ /* Checks for struct ptl_handle_wire_t */
+ CLASSERT ((int)sizeof(lnet_handle_wire_t) == 16);
+ CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0);
+ CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8);
+ CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8);
+ CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8);
+
+ /* Checks for struct lnet_magicversion_t */
+ CLASSERT ((int)sizeof(lnet_magicversion_t) == 8);
+ CLASSERT ((int)offsetof(lnet_magicversion_t, magic) == 0);
+ CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
+ CLASSERT ((int)offsetof(lnet_magicversion_t, version_major) == 4);
+ CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
+ CLASSERT ((int)offsetof(lnet_magicversion_t, version_minor) == 6);
+ CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
+
+ /* Checks for struct lnet_hdr_t */
+ CLASSERT ((int)sizeof(lnet_hdr_t) == 72);
+ CLASSERT ((int)offsetof(lnet_hdr_t, dest_nid) == 0);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
+ CLASSERT ((int)offsetof(lnet_hdr_t, src_nid) == 8);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
+ CLASSERT ((int)offsetof(lnet_hdr_t, dest_pid) == 16);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
+ CLASSERT ((int)offsetof(lnet_hdr_t, src_pid) == 20);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
+ CLASSERT ((int)offsetof(lnet_hdr_t, type) == 24);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
+ CLASSERT ((int)offsetof(lnet_hdr_t, payload_length) == 28);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg) == 32);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
+
+ /* Ack */
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
+
+ /* Put */
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
+
+ /* Get */
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
+
+ /* Reply */
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
+
+ /* Hello */
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
+ CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
+ CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
+}
+
+lnd_t *
+lnet_find_lnd_by_type (int type)
+{
+ lnd_t *lnd;
+ struct list_head *tmp;
+
+ /* holding lnd mutex */
+ list_for_each (tmp, &the_lnet.ln_lnds) {
+ lnd = list_entry(tmp, lnd_t, lnd_list);
+
+ if ((int)lnd->lnd_type == type)
+ return lnd;
+ }
+
+ return NULL;
+}
+
+void
+lnet_register_lnd (lnd_t *lnd)
+{
+ LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
+
+ LASSERT (the_lnet.ln_init);
+ LASSERT (libcfs_isknown_lnd(lnd->lnd_type));
+ LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
+
+ list_add_tail (&lnd->lnd_list, &the_lnet.ln_lnds);
+ lnd->lnd_refcount = 0;
+
+ CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
+
+ LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
+}
+EXPORT_SYMBOL(lnet_register_lnd);
+
+void
+lnet_unregister_lnd (lnd_t *lnd)
+{
+ LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
+
+ LASSERT (the_lnet.ln_init);
+ LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
+ LASSERT (lnd->lnd_refcount == 0);
+
+ list_del (&lnd->lnd_list);
+ CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
+
+ LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
+}
+EXPORT_SYMBOL(lnet_unregister_lnd);
+
+void
+lnet_counters_get(lnet_counters_t *counters)
+{
+ lnet_counters_t *ctr;
+ int i;
+
+ memset(counters, 0, sizeof(*counters));
+
+ lnet_net_lock(LNET_LOCK_EX);
+
+ cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
+ counters->msgs_max += ctr->msgs_max;
+ counters->msgs_alloc += ctr->msgs_alloc;
+ counters->errors += ctr->errors;
+ counters->send_count += ctr->send_count;
+ counters->recv_count += ctr->recv_count;
+ counters->route_count += ctr->route_count;
+ counters->drop_length += ctr->drop_length;
+ counters->send_length += ctr->send_length;
+ counters->recv_length += ctr->recv_length;
+ counters->route_length += ctr->route_length;
+ counters->drop_length += ctr->drop_length;
+
+ }
+ lnet_net_unlock(LNET_LOCK_EX);
+}
+EXPORT_SYMBOL(lnet_counters_get);
+
+void
+lnet_counters_reset(void)
+{
+ lnet_counters_t *counters;
+ int i;
+
+ lnet_net_lock(LNET_LOCK_EX);
+
+ cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
+ memset(counters, 0, sizeof(lnet_counters_t));
+
+ lnet_net_unlock(LNET_LOCK_EX);
+}
+EXPORT_SYMBOL(lnet_counters_reset);
+
+#ifdef LNET_USE_LIB_FREELIST
+
+int
+lnet_freelist_init (lnet_freelist_t *fl, int n, int size)
+{
+ char *space;
+
+ LASSERT (n > 0);
+
+ size += offsetof (lnet_freeobj_t, fo_contents);
+
+ LIBCFS_ALLOC(space, n * size);
+ if (space == NULL)
+ return (-ENOMEM);
+
+ INIT_LIST_HEAD (&fl->fl_list);
+ fl->fl_objs = space;
+ fl->fl_nobjs = n;
+ fl->fl_objsize = size;
+
+ do
+ {
+ memset (space, 0, size);
+ list_add ((struct list_head *)space, &fl->fl_list);
+ space += size;
+ } while (--n != 0);
+
+ return (0);
+}
+
+void
+lnet_freelist_fini (lnet_freelist_t *fl)
+{
+ struct list_head *el;
+ int count;
+
+ if (fl->fl_nobjs == 0)
+ return;
+
+ count = 0;
+ for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
+ count++;
+
+ LASSERT (count == fl->fl_nobjs);
+
+ LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
+ memset (fl, 0, sizeof (*fl));
+}
+
+#endif /* LNET_USE_LIB_FREELIST */
+
+__u64
+lnet_create_interface_cookie (void)
+{
+ /* NB the interface cookie in wire handles guards against delayed
+ * replies and ACKs appearing valid after reboot. Initialisation time,
+ * even if it's only implemented to millisecond resolution is probably
+ * easily good enough. */
+ struct timeval tv;
+ __u64 cookie;
+ do_gettimeofday(&tv);
+ cookie = tv.tv_sec;
+ cookie *= 1000000;
+ cookie += tv.tv_usec;
+ return cookie;
+}
+
+static char *
+lnet_res_type2str(int type)
+{
+ switch (type) {
+ default:
+ LBUG();
+ case LNET_COOKIE_TYPE_MD:
+ return "MD";
+ case LNET_COOKIE_TYPE_ME:
+ return "ME";
+ case LNET_COOKIE_TYPE_EQ:
+ return "EQ";
+ }
+}
+
+void
+lnet_res_container_cleanup(struct lnet_res_container *rec)
+{
+ int count = 0;
+
+ if (rec->rec_type == 0) /* not set yet, it's uninitialized */
+ return;
+
+ while (!list_empty(&rec->rec_active)) {
+ struct list_head *e = rec->rec_active.next;
+
+ list_del_init(e);
+ if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
+ lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
+
+ } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
+ lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
+
+ } else { /* NB: Active MEs should be attached on portals */
+ LBUG();
+ }
+ count++;
+ }
+
+ if (count > 0) {
+ /* Found alive MD/ME/EQ, user really should unlink/free
+ * all of them before finalize LNet, but if someone didn't,
+ * we have to recycle garbage for him */
+ CERROR("%d active elements on exit of %s container\n",
+ count, lnet_res_type2str(rec->rec_type));
+ }
+
+#ifdef LNET_USE_LIB_FREELIST
+ lnet_freelist_fini(&rec->rec_freelist);
+#endif
+ if (rec->rec_lh_hash != NULL) {
+ LIBCFS_FREE(rec->rec_lh_hash,
+ LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
+ rec->rec_lh_hash = NULL;
+ }
+
+ rec->rec_type = 0; /* mark it as finalized */
+}
+
+int
+lnet_res_container_setup(struct lnet_res_container *rec,
+ int cpt, int type, int objnum, int objsz)
+{
+ int rc = 0;
+ int i;
+
+ LASSERT(rec->rec_type == 0);
+
+ rec->rec_type = type;
+ INIT_LIST_HEAD(&rec->rec_active);
+
+#ifdef LNET_USE_LIB_FREELIST
+ memset(&rec->rec_freelist, 0, sizeof(rec->rec_freelist));
+ rc = lnet_freelist_init(&rec->rec_freelist, objnum, objsz);
+ if (rc != 0)
+ goto out;
+#endif
+ rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
+
+ /* Arbitrary choice of hash table size */
+ LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
+ LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
+ if (rec->rec_lh_hash == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < LNET_LH_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
+
+ return 0;
+
+out:
+ CERROR("Failed to setup %s resource container\n",
+ lnet_res_type2str(type));
+ lnet_res_container_cleanup(rec);
+ return rc;
+}
+
+static void
+lnet_res_containers_destroy(struct lnet_res_container **recs)
+{
+ struct lnet_res_container *rec;
+ int i;
+
+ cfs_percpt_for_each(rec, i, recs)
+ lnet_res_container_cleanup(rec);
+
+ cfs_percpt_free(recs);
+}
+
+static struct lnet_res_container **
+lnet_res_containers_create(int type, int objnum, int objsz)
+{
+ struct lnet_res_container **recs;
+ struct lnet_res_container *rec;
+ int rc;
+ int i;
+
+ recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
+ if (recs == NULL) {
+ CERROR("Failed to allocate %s resource containers\n",
+ lnet_res_type2str(type));
+ return NULL;
+ }
+
+ cfs_percpt_for_each(rec, i, recs) {
+ rc = lnet_res_container_setup(rec, i, type, objnum, objsz);
+ if (rc != 0) {
+ lnet_res_containers_destroy(recs);
+ return NULL;
+ }
+ }
+
+ return recs;
+}
+
+lnet_libhandle_t *
+lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
+{
+ /* ALWAYS called with lnet_res_lock held */
+ struct list_head *head;
+ lnet_libhandle_t *lh;
+ unsigned int hash;
+
+ if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
+ return NULL;
+
+ hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
+ head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
+
+ list_for_each_entry(lh, head, lh_hash_chain) {
+ if (lh->lh_cookie == cookie)
+ return lh;
+ }
+
+ return NULL;
+}
+
+void
+lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
+{
+ /* ALWAYS called with lnet_res_lock held */
+ unsigned int ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
+ unsigned int hash;
+
+ lh->lh_cookie = rec->rec_lh_cookie;
+ rec->rec_lh_cookie += 1 << ibits;
+
+ hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
+
+ list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
+}
+
+
+int lnet_unprepare(void);
+
+int
+lnet_prepare(lnet_pid_t requested_pid)
+{
+ /* Prepare to bring up the network */
+ struct lnet_res_container **recs;
+ int rc = 0;
+
+ LASSERT (the_lnet.ln_refcount == 0);
+
+ the_lnet.ln_routing = 0;
+
+ LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
+ the_lnet.ln_pid = requested_pid;
+
+ INIT_LIST_HEAD(&the_lnet.ln_test_peers);
+ INIT_LIST_HEAD(&the_lnet.ln_nis);
+ INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
+ INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
+ INIT_LIST_HEAD(&the_lnet.ln_routers);
+
+ rc = lnet_create_remote_nets_table();
+ if (rc != 0)
+ goto failed;
+
+ the_lnet.ln_interface_cookie = lnet_create_interface_cookie();
+
+ the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
+ sizeof(lnet_counters_t));
+ if (the_lnet.ln_counters == NULL) {
+ CERROR("Failed to allocate counters for LNet\n");
+ rc = -ENOMEM;
+ goto failed;
+ }
+
+ rc = lnet_peer_tables_create();
+ if (rc != 0)
+ goto failed;
+
+ rc = lnet_msg_containers_create();
+ if (rc != 0)
+ goto failed;
+
+ rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
+ LNET_COOKIE_TYPE_EQ, LNET_FL_MAX_EQS,
+ sizeof(lnet_eq_t));
+ if (rc != 0)
+ goto failed;
+
+ recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME, LNET_FL_MAX_MES,
+ sizeof(lnet_me_t));
+ if (recs == NULL)
+ goto failed;
+
+ the_lnet.ln_me_containers = recs;
+
+ recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD, LNET_FL_MAX_MDS,
+ sizeof(lnet_libmd_t));
+ if (recs == NULL)
+ goto failed;
+
+ the_lnet.ln_md_containers = recs;
+
+ rc = lnet_portals_create();
+ if (rc != 0) {
+ CERROR("Failed to create portals for LNet: %d\n", rc);
+ goto failed;
+ }
+
+ return 0;
+
+ failed:
+ lnet_unprepare();
+ return rc;
+}
+
+int
+lnet_unprepare (void)
+{
+ /* NB no LNET_LOCK since this is the last reference. All LND instances
+ * have shut down already, so it is safe to unlink and free all
+ * descriptors, even those that appear committed to a network op (eg MD
+ * with non-zero pending count) */
+
+ lnet_fail_nid(LNET_NID_ANY, 0);
+
+ LASSERT(the_lnet.ln_refcount == 0);
+ LASSERT(list_empty(&the_lnet.ln_test_peers));
+ LASSERT(list_empty(&the_lnet.ln_nis));
+ LASSERT(list_empty(&the_lnet.ln_nis_cpt));
+ LASSERT(list_empty(&the_lnet.ln_nis_zombie));
+
+ lnet_portals_destroy();
+
+ if (the_lnet.ln_md_containers != NULL) {
+ lnet_res_containers_destroy(the_lnet.ln_md_containers);
+ the_lnet.ln_md_containers = NULL;
+ }
+
+ if (the_lnet.ln_me_containers != NULL) {
+ lnet_res_containers_destroy(the_lnet.ln_me_containers);
+ the_lnet.ln_me_containers = NULL;
+ }
+
+ lnet_res_container_cleanup(&the_lnet.ln_eq_container);
+
+ lnet_msg_containers_destroy();
+ lnet_peer_tables_destroy();
+ lnet_rtrpools_free();
+
+ if (the_lnet.ln_counters != NULL) {
+ cfs_percpt_free(the_lnet.ln_counters);
+ the_lnet.ln_counters = NULL;
+ }
+ lnet_destroy_remote_nets_table();
+
+ return 0;
+}
+
+lnet_ni_t *
+lnet_net2ni_locked(__u32 net, int cpt)
+{
+ struct list_head *tmp;
+ lnet_ni_t *ni;
+
+ LASSERT(cpt != LNET_LOCK_EX);
+
+ list_for_each(tmp, &the_lnet.ln_nis) {
+ ni = list_entry(tmp, lnet_ni_t, ni_list);
+
+ if (LNET_NIDNET(ni->ni_nid) == net) {
+ lnet_ni_addref_locked(ni, cpt);
+ return ni;
+ }
+ }
+
+ return NULL;
+}
+
+lnet_ni_t *
+lnet_net2ni(__u32 net)
+{
+ lnet_ni_t *ni;
+
+ lnet_net_lock(0);
+ ni = lnet_net2ni_locked(net, 0);
+ lnet_net_unlock(0);
+
+ return ni;
+}
+EXPORT_SYMBOL(lnet_net2ni);
+
+static unsigned int
+lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
+{
+ __u64 key = nid;
+ unsigned int val;
+
+ LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
+
+ if (number == 1)
+ return 0;
+
+ val = cfs_hash_long(key, LNET_CPT_BITS);
+ /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
+ if (val < number)
+ return val;
+
+ return (unsigned int)(key + val + (val >> 1)) % number;
+}
+
+int
+lnet_cpt_of_nid_locked(lnet_nid_t nid)
+{
+ struct lnet_ni *ni;
+
+ /* must called with hold of lnet_net_lock */
+ if (LNET_CPT_NUMBER == 1)
+ return 0; /* the only one */
+
+ /* take lnet_net_lock(any) would be OK */
+ if (!list_empty(&the_lnet.ln_nis_cpt)) {
+ list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
+ if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
+ continue;
+
+ LASSERT(ni->ni_cpts != NULL);
+ return ni->ni_cpts[lnet_nid_cpt_hash
+ (nid, ni->ni_ncpts)];
+ }
+ }
+
+ return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
+}
+
+int
+lnet_cpt_of_nid(lnet_nid_t nid)
+{
+ int cpt;
+ int cpt2;
+
+ if (LNET_CPT_NUMBER == 1)
+ return 0; /* the only one */
+
+ if (list_empty(&the_lnet.ln_nis_cpt))
+ return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
+
+ cpt = lnet_net_lock_current();
+ cpt2 = lnet_cpt_of_nid_locked(nid);
+ lnet_net_unlock(cpt);
+
+ return cpt2;
+}
+EXPORT_SYMBOL(lnet_cpt_of_nid);
+
+int
+lnet_islocalnet(__u32 net)
+{
+ struct lnet_ni *ni;
+ int cpt;
+
+ cpt = lnet_net_lock_current();
+
+ ni = lnet_net2ni_locked(net, cpt);
+ if (ni != NULL)
+ lnet_ni_decref_locked(ni, cpt);
+
+ lnet_net_unlock(cpt);
+
+ return ni != NULL;
+}
+
+lnet_ni_t *
+lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
+{
+ struct lnet_ni *ni;
+ struct list_head *tmp;
+
+ LASSERT(cpt != LNET_LOCK_EX);
+
+ list_for_each(tmp, &the_lnet.ln_nis) {
+ ni = list_entry(tmp, lnet_ni_t, ni_list);
+
+ if (ni->ni_nid == nid) {
+ lnet_ni_addref_locked(ni, cpt);
+ return ni;
+ }
+ }
+
+ return NULL;
+}
+
+int
+lnet_islocalnid(lnet_nid_t nid)
+{
+ struct lnet_ni *ni;
+ int cpt;
+
+ cpt = lnet_net_lock_current();
+ ni = lnet_nid2ni_locked(nid, cpt);
+ if (ni != NULL)
+ lnet_ni_decref_locked(ni, cpt);
+ lnet_net_unlock(cpt);
+
+ return ni != NULL;
+}
+
+int
+lnet_count_acceptor_nis (void)
+{
+ /* Return the # of NIs that need the acceptor. */
+ int count = 0;
+ struct list_head *tmp;
+ struct lnet_ni *ni;
+ int cpt;
+
+ cpt = lnet_net_lock_current();
+ list_for_each(tmp, &the_lnet.ln_nis) {
+ ni = list_entry(tmp, lnet_ni_t, ni_list);
+
+ if (ni->ni_lnd->lnd_accept != NULL)
+ count++;
+ }
+
+ lnet_net_unlock(cpt);
+
+ return count;
+}
+
+static int
+lnet_ni_tq_credits(lnet_ni_t *ni)
+{
+ int credits;
+
+ LASSERT(ni->ni_ncpts >= 1);
+
+ if (ni->ni_ncpts == 1)
+ return ni->ni_maxtxcredits;
+
+ credits = ni->ni_maxtxcredits / ni->ni_ncpts;
+ credits = max(credits, 8 * ni->ni_peertxcredits);
+ credits = min(credits, ni->ni_maxtxcredits);
+
+ return credits;
+}
+
+void
+lnet_shutdown_lndnis (void)
+{
+ int i;
+ int islo;
+ lnet_ni_t *ni;
+
+ /* NB called holding the global mutex */
+
+ /* All quiet on the API front */
+ LASSERT(!the_lnet.ln_shutdown);
+ LASSERT(the_lnet.ln_refcount == 0);
+ LASSERT(list_empty(&the_lnet.ln_nis_zombie));
+
+ lnet_net_lock(LNET_LOCK_EX);
+ the_lnet.ln_shutdown = 1; /* flag shutdown */
+
+ /* Unlink NIs from the global table */
+ while (!list_empty(&the_lnet.ln_nis)) {
+ ni = list_entry(the_lnet.ln_nis.next,
+ lnet_ni_t, ni_list);
+ /* move it to zombie list and nobody can find it anymore */
+ list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
+ lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */
+
+ if (!list_empty(&ni->ni_cptlist)) {
+ list_del_init(&ni->ni_cptlist);
+ lnet_ni_decref_locked(ni, 0);
+ }
+ }
+
+ /* Drop the cached eqwait NI. */
+ if (the_lnet.ln_eq_waitni != NULL) {
+ lnet_ni_decref_locked(the_lnet.ln_eq_waitni, 0);
+ the_lnet.ln_eq_waitni = NULL;
+ }
+
+ /* Drop the cached loopback NI. */
+ if (the_lnet.ln_loni != NULL) {
+ lnet_ni_decref_locked(the_lnet.ln_loni, 0);
+ the_lnet.ln_loni = NULL;
+ }
+
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ /* Clear lazy portals and drop delayed messages which hold refs
+ * on their lnet_msg_t::msg_rxpeer */
+ for (i = 0; i < the_lnet.ln_nportals; i++)
+ LNetClearLazyPortal(i);
+
+ /* Clear the peer table and wait for all peers to go (they hold refs on
+ * their NIs) */
+ lnet_peer_tables_cleanup();
+
+ lnet_net_lock(LNET_LOCK_EX);
+ /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
+ * and shut them down in guaranteed thread context */
+ i = 2;
+ while (!list_empty(&the_lnet.ln_nis_zombie)) {
+ int *ref;
+ int j;
+
+ ni = list_entry(the_lnet.ln_nis_zombie.next,
+ lnet_ni_t, ni_list);
+ list_del_init(&ni->ni_list);
+ cfs_percpt_for_each(ref, j, ni->ni_refs) {
+ if (*ref == 0)
+ continue;
+ /* still busy, add it back to zombie list */
+ list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
+ break;
+ }
+
+ while (!list_empty(&ni->ni_list)) {
+ lnet_net_unlock(LNET_LOCK_EX);
+ ++i;
+ if ((i & (-i)) == i) {
+ CDEBUG(D_WARNING,
+ "Waiting for zombie LNI %s\n",
+ libcfs_nid2str(ni->ni_nid));
+ }
+ cfs_pause(cfs_time_seconds(1));
+ lnet_net_lock(LNET_LOCK_EX);
+ continue;
+ }
+
+ ni->ni_lnd->lnd_refcount--;
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ islo = ni->ni_lnd->lnd_type == LOLND;
+
+ LASSERT (!in_interrupt ());
+ (ni->ni_lnd->lnd_shutdown)(ni);
+
+ /* can't deref lnd anymore now; it might have unregistered
+ * itself... */
+
+ if (!islo)
+ CDEBUG(D_LNI, "Removed LNI %s\n",
+ libcfs_nid2str(ni->ni_nid));
+
+ lnet_ni_free(ni);
+ lnet_net_lock(LNET_LOCK_EX);
+ }
+
+ the_lnet.ln_shutdown = 0;
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ if (the_lnet.ln_network_tokens != NULL) {
+ LIBCFS_FREE(the_lnet.ln_network_tokens,
+ the_lnet.ln_network_tokens_nob);
+ the_lnet.ln_network_tokens = NULL;
+ }
+}
+
+int
+lnet_startup_lndnis (void)
+{
+ lnd_t *lnd;
+ struct lnet_ni *ni;
+ struct lnet_tx_queue *tq;
+ struct list_head nilist;
+ int i;
+ int rc = 0;
+ int lnd_type;
+ int nicount = 0;
+ char *nets = lnet_get_networks();
+
+ INIT_LIST_HEAD(&nilist);
+
+ if (nets == NULL)
+ goto failed;
+
+ rc = lnet_parse_networks(&nilist, nets);
+ if (rc != 0)
+ goto failed;
+
+ while (!list_empty(&nilist)) {
+ ni = list_entry(nilist.next, lnet_ni_t, ni_list);
+ lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
+
+ LASSERT (libcfs_isknown_lnd(lnd_type));
+
+ if (lnd_type == CIBLND ||
+ lnd_type == OPENIBLND ||
+ lnd_type == IIBLND ||
+ lnd_type == VIBLND) {
+ CERROR("LND %s obsoleted\n",
+ libcfs_lnd2str(lnd_type));
+ goto failed;
+ }
+
+ LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
+ lnd = lnet_find_lnd_by_type(lnd_type);
+
+ if (lnd == NULL) {
+ LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
+ rc = request_module("%s",
+ libcfs_lnd2modname(lnd_type));
+ LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
+
+ lnd = lnet_find_lnd_by_type(lnd_type);
+ if (lnd == NULL) {
+ LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
+ CERROR("Can't load LND %s, module %s, rc=%d\n",
+ libcfs_lnd2str(lnd_type),
+ libcfs_lnd2modname(lnd_type), rc);
+ goto failed;
+ }
+ }
+
+ lnet_net_lock(LNET_LOCK_EX);
+ lnd->lnd_refcount++;
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ ni->ni_lnd = lnd;
+
+ rc = (lnd->lnd_startup)(ni);
+
+ LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
+
+ if (rc != 0) {
+ LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s"
+ "\n",
+ rc, libcfs_lnd2str(lnd->lnd_type));
+ lnet_net_lock(LNET_LOCK_EX);
+ lnd->lnd_refcount--;
+ lnet_net_unlock(LNET_LOCK_EX);
+ goto failed;
+ }
+
+ LASSERT (ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
+
+ list_del(&ni->ni_list);
+
+ lnet_net_lock(LNET_LOCK_EX);
+ /* refcount for ln_nis */
+ lnet_ni_addref_locked(ni, 0);
+ list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
+ if (ni->ni_cpts != NULL) {
+ list_add_tail(&ni->ni_cptlist,
+ &the_lnet.ln_nis_cpt);
+ lnet_ni_addref_locked(ni, 0);
+ }
+
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ if (lnd->lnd_type == LOLND) {
+ lnet_ni_addref(ni);
+ LASSERT (the_lnet.ln_loni == NULL);
+ the_lnet.ln_loni = ni;
+ continue;
+ }
+
+ if (ni->ni_peertxcredits == 0 ||
+ ni->ni_maxtxcredits == 0) {
+ LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
+ libcfs_lnd2str(lnd->lnd_type),
+ ni->ni_peertxcredits == 0 ?
+ "" : "per-peer ");
+ goto failed;
+ }
+
+ cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
+ tq->tq_credits_min =
+ tq->tq_credits_max =
+ tq->tq_credits = lnet_ni_tq_credits(ni);
+ }
+
+ CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
+ libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
+ lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
+ ni->ni_peerrtrcredits, ni->ni_peertimeout);
+
+ nicount++;
+ }
+
+ if (the_lnet.ln_eq_waitni != NULL && nicount > 1) {
+ lnd_type = the_lnet.ln_eq_waitni->ni_lnd->lnd_type;
+ LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network"
+ "\n",
+ libcfs_lnd2str(lnd_type));
+ goto failed;
+ }
+
+ return 0;
+
+ failed:
+ lnet_shutdown_lndnis();
+
+ while (!list_empty(&nilist)) {
+ ni = list_entry(nilist.next, lnet_ni_t, ni_list);
+ list_del(&ni->ni_list);
+ lnet_ni_free(ni);
+ }
+
+ return -ENETDOWN;
+}
+
+/**
+ * Initialize LNet library.
+ *
+ * Only userspace program needs to call this function - it's automatically
+ * called in the kernel at module loading time. Caller has to call LNetFini()
+ * after a call to LNetInit(), if and only if the latter returned 0. It must
+ * be called exactly once.
+ *
+ * \return 0 on success, and -ve on failures.
+ */
+int
+LNetInit(void)
+{
+ int rc;
+
+ lnet_assert_wire_constants();
+ LASSERT(!the_lnet.ln_init);
+
+ memset(&the_lnet, 0, sizeof(the_lnet));
+
+ /* refer to global cfs_cpt_table for now */
+ the_lnet.ln_cpt_table = cfs_cpt_table;
+ the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_table);
+
+ LASSERT(the_lnet.ln_cpt_number > 0);
+ if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
+ /* we are under risk of consuming all lh_cookie */
+ CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
+ "please change setting of CPT-table and retry\n",
+ the_lnet.ln_cpt_number, LNET_CPT_MAX);
+ return -1;
+ }
+
+ while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
+ the_lnet.ln_cpt_bits++;
+
+ rc = lnet_create_locks();
+ if (rc != 0) {
+ CERROR("Can't create LNet global locks: %d\n", rc);
+ return -1;
+ }
+
+ the_lnet.ln_refcount = 0;
+ the_lnet.ln_init = 1;
+ LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
+ INIT_LIST_HEAD(&the_lnet.ln_lnds);
+ INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
+ INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
+
+ /* The hash table size is the number of bits it takes to express the set
+ * ln_num_routes, minus 1 (better to under estimate than over so we
+ * don't waste memory). */
+ if (rnet_htable_size <= 0)
+ rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
+ else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
+ rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
+ the_lnet.ln_remote_nets_hbits = max_t(int, 1,
+ order_base_2(rnet_htable_size) - 1);
+
+ /* All LNDs apart from the LOLND are in separate modules. They
+ * register themselves when their module loads, and unregister
+ * themselves when their module is unloaded. */
+ lnet_register_lnd(&the_lolnd);
+ return 0;
+}
+EXPORT_SYMBOL(LNetInit);
+
+/**
+ * Finalize LNet library.
+ *
+ * Only userspace program needs to call this function. It can be called
+ * at most once.
+ *
+ * \pre LNetInit() called with success.
+ * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
+ */
+void
+LNetFini(void)
+{
+ LASSERT(the_lnet.ln_init);
+ LASSERT(the_lnet.ln_refcount == 0);
+
+ while (!list_empty(&the_lnet.ln_lnds))
+ lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
+ lnd_t, lnd_list));
+ lnet_destroy_locks();
+
+ the_lnet.ln_init = 0;
+}
+EXPORT_SYMBOL(LNetFini);
+
+/**
+ * Set LNet PID and start LNet interfaces, routing, and forwarding.
+ *
+ * Userspace program should call this after a successful call to LNetInit().
+ * Users must call this function at least once before any other functions.
+ * For each successful call there must be a corresponding call to
+ * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
+ * ignored.
+ *
+ * The PID used by LNet may be different from the one requested.
+ * See LNetGetId().
+ *
+ * \param requested_pid PID requested by the caller.
+ *
+ * \return >= 0 on success, and < 0 error code on failures.
+ */
+int
+LNetNIInit(lnet_pid_t requested_pid)
+{
+ int im_a_router = 0;
+ int rc;
+
+ LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
+
+ LASSERT (the_lnet.ln_init);
+ CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
+
+ if (the_lnet.ln_refcount > 0) {
+ rc = the_lnet.ln_refcount++;
+ goto out;
+ }
+
+ lnet_get_tunables();
+
+ if (requested_pid == LNET_PID_ANY) {
+ /* Don't instantiate LNET just for me */
+ rc = -ENETDOWN;
+ goto failed0;
+ }
+
+ rc = lnet_prepare(requested_pid);
+ if (rc != 0)
+ goto failed0;
+
+ rc = lnet_startup_lndnis();
+ if (rc != 0)
+ goto failed1;
+
+ rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
+ if (rc != 0)
+ goto failed2;
+
+ rc = lnet_check_routes();
+ if (rc != 0)
+ goto failed2;
+
+ rc = lnet_rtrpools_alloc(im_a_router);
+ if (rc != 0)
+ goto failed2;
+
+ rc = lnet_acceptor_start();
+ if (rc != 0)
+ goto failed2;
+
+ the_lnet.ln_refcount = 1;
+ /* Now I may use my own API functions... */
+
+ /* NB router checker needs the_lnet.ln_ping_info in
+ * lnet_router_checker -> lnet_update_ni_status_locked */
+ rc = lnet_ping_target_init();
+ if (rc != 0)
+ goto failed3;
+
+ rc = lnet_router_checker_start();
+ if (rc != 0)
+ goto failed4;
+
+ lnet_proc_init();
+ goto out;
+
+ failed4:
+ lnet_ping_target_fini();
+ failed3:
+ the_lnet.ln_refcount = 0;
+ lnet_acceptor_stop();
+ failed2:
+ lnet_destroy_routes();
+ lnet_shutdown_lndnis();
+ failed1:
+ lnet_unprepare();
+ failed0:
+ LASSERT (rc < 0);
+ out:
+ LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
+ return rc;
+}
+EXPORT_SYMBOL(LNetNIInit);
+
+/**
+ * Stop LNet interfaces, routing, and forwarding.
+ *
+ * Users must call this function once for each successful call to LNetNIInit().
+ * Once the LNetNIFini() operation has been started, the results of pending
+ * API operations are undefined.
+ *
+ * \return always 0 for current implementation.
+ */
+int
+LNetNIFini()
+{
+ LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
+
+ LASSERT (the_lnet.ln_init);
+ LASSERT (the_lnet.ln_refcount > 0);
+
+ if (the_lnet.ln_refcount != 1) {
+ the_lnet.ln_refcount--;
+ } else {
+ LASSERT (!the_lnet.ln_niinit_self);
+
+ lnet_proc_fini();
+ lnet_router_checker_stop();
+ lnet_ping_target_fini();
+
+ /* Teardown fns that use my own API functions BEFORE here */
+ the_lnet.ln_refcount = 0;
+
+ lnet_acceptor_stop();
+ lnet_destroy_routes();
+ lnet_shutdown_lndnis();
+ lnet_unprepare();
+ }
+
+ LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
+ return 0;
+}
+EXPORT_SYMBOL(LNetNIFini);
+
+/**
+ * This is an ugly hack to export IOC_LIBCFS_DEBUG_PEER and
+ * IOC_LIBCFS_PORTALS_COMPATIBILITY commands to users, by tweaking the LNet
+ * internal ioctl handler.
+ *
+ * IOC_LIBCFS_PORTALS_COMPATIBILITY is now deprecated, don't use it.
+ *
+ * \param cmd IOC_LIBCFS_DEBUG_PEER to print debugging data about a peer.
+ * The data will be printed to system console. Don't use it excessively.
+ * \param arg A pointer to lnet_process_id_t, process ID of the peer.
+ *
+ * \return Always return 0 when called by users directly (i.e., not via ioctl).
+ */
+int
+LNetCtl(unsigned int cmd, void *arg)
+{
+ struct libcfs_ioctl_data *data = arg;
+ lnet_process_id_t id = {0};
+ lnet_ni_t *ni;
+ int rc;
+
+ LASSERT (the_lnet.ln_init);
+ LASSERT (the_lnet.ln_refcount > 0);
+
+ switch (cmd) {
+ case IOC_LIBCFS_GET_NI:
+ rc = LNetGetId(data->ioc_count, &id);
+ data->ioc_nid = id.nid;
+ return rc;
+
+ case IOC_LIBCFS_FAIL_NID:
+ return lnet_fail_nid(data->ioc_nid, data->ioc_count);
+
+ case IOC_LIBCFS_ADD_ROUTE:
+ rc = lnet_add_route(data->ioc_net, data->ioc_count,
+ data->ioc_nid);
+ return (rc != 0) ? rc : lnet_check_routes();
+
+ case IOC_LIBCFS_DEL_ROUTE:
+ return lnet_del_route(data->ioc_net, data->ioc_nid);
+
+ case IOC_LIBCFS_GET_ROUTE:
+ return lnet_get_route(data->ioc_count,
+ &data->ioc_net, &data->ioc_count,
+ &data->ioc_nid, &data->ioc_flags);
+ case IOC_LIBCFS_NOTIFY_ROUTER:
+ return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
+ cfs_time_current() -
+ cfs_time_seconds(cfs_time_current_sec() -
+ (time_t)data->ioc_u64[0]));
+
+ case IOC_LIBCFS_PORTALS_COMPATIBILITY:
+ /* This can be removed once lustre stops calling it */
+ return 0;
+
+ case IOC_LIBCFS_LNET_DIST:
+ rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
+ if (rc < 0 && rc != -EHOSTUNREACH)
+ return rc;
+
+ data->ioc_u32[0] = rc;
+ return 0;
+
+ case IOC_LIBCFS_TESTPROTOCOMPAT:
+ lnet_net_lock(LNET_LOCK_EX);
+ the_lnet.ln_testprotocompat = data->ioc_flags;
+ lnet_net_unlock(LNET_LOCK_EX);
+ return 0;
+
+ case IOC_LIBCFS_PING:
+ id.nid = data->ioc_nid;
+ id.pid = data->ioc_u32[0];
+ rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
+ (lnet_process_id_t *)data->ioc_pbuf1,
+ data->ioc_plen1/sizeof(lnet_process_id_t));
+ if (rc < 0)
+ return rc;
+ data->ioc_count = rc;
+ return 0;
+
+ case IOC_LIBCFS_DEBUG_PEER: {
+ /* CAVEAT EMPTOR: this one designed for calling directly; not
+ * via an ioctl */
+ id = *((lnet_process_id_t *) arg);
+
+ lnet_debug_peer(id.nid);
+
+ ni = lnet_net2ni(LNET_NIDNET(id.nid));
+ if (ni == NULL) {
+ CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(id));
+ } else {
+ if (ni->ni_lnd->lnd_ctl == NULL) {
+ CDEBUG(D_WARNING, "No ctl for %s\n",
+ libcfs_id2str(id));
+ } else {
+ (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
+ }
+
+ lnet_ni_decref(ni);
+ }
+ return 0;
+ }
+
+ default:
+ ni = lnet_net2ni(data->ioc_net);
+ if (ni == NULL)
+ return -EINVAL;
+
+ if (ni->ni_lnd->lnd_ctl == NULL)
+ rc = -EINVAL;
+ else
+ rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
+
+ lnet_ni_decref(ni);
+ return rc;
+ }
+ /* not reached */
+}
+EXPORT_SYMBOL(LNetCtl);
+
+/**
+ * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
+ * all interfaces share a same PID, as requested by LNetNIInit().
+ *
+ * \param index Index of the interface to look up.
+ * \param id On successful return, this location will hold the
+ * lnet_process_id_t ID of the interface.
+ *
+ * \retval 0 If an interface exists at \a index.
+ * \retval -ENOENT If no interface has been found.
+ */
+int
+LNetGetId(unsigned int index, lnet_process_id_t *id)
+{
+ struct lnet_ni *ni;
+ struct list_head *tmp;
+ int cpt;
+ int rc = -ENOENT;
+
+ LASSERT(the_lnet.ln_init);
+ LASSERT(the_lnet.ln_refcount > 0);
+
+ cpt = lnet_net_lock_current();
+
+ list_for_each(tmp, &the_lnet.ln_nis) {
+ if (index-- != 0)
+ continue;
+
+ ni = list_entry(tmp, lnet_ni_t, ni_list);
+
+ id->nid = ni->ni_nid;
+ id->pid = the_lnet.ln_pid;
+ rc = 0;
+ break;
+ }
+
+ lnet_net_unlock(cpt);
+ return rc;
+}
+EXPORT_SYMBOL(LNetGetId);
+
+/**
+ * Print a string representation of handle \a h into buffer \a str of
+ * \a len bytes.
+ */
+void
+LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
+{
+ snprintf(str, len, LPX64, h.cookie);
+}
+EXPORT_SYMBOL(LNetSnprintHandle);
+
+static int
+lnet_create_ping_info(void)
+{
+ int i;
+ int n;
+ int rc;
+ unsigned int infosz;
+ lnet_ni_t *ni;
+ lnet_process_id_t id;
+ lnet_ping_info_t *pinfo;
+
+ for (n = 0; ; n++) {
+ rc = LNetGetId(n, &id);
+ if (rc == -ENOENT)
+ break;
+
+ LASSERT (rc == 0);
+ }
+
+ infosz = offsetof(lnet_ping_info_t, pi_ni[n]);
+ LIBCFS_ALLOC(pinfo, infosz);
+ if (pinfo == NULL) {
+ CERROR("Can't allocate ping info[%d]\n", n);
+ return -ENOMEM;
+ }
+
+ pinfo->pi_nnis = n;
+ pinfo->pi_pid = the_lnet.ln_pid;
+ pinfo->pi_magic = LNET_PROTO_PING_MAGIC;
+ pinfo->pi_features = LNET_PING_FEAT_NI_STATUS;
+
+ for (i = 0; i < n; i++) {
+ lnet_ni_status_t *ns = &pinfo->pi_ni[i];
+
+ rc = LNetGetId(i, &id);
+ LASSERT (rc == 0);
+
+ ns->ns_nid = id.nid;
+ ns->ns_status = LNET_NI_STATUS_UP;
+
+ lnet_net_lock(0);
+
+ ni = lnet_nid2ni_locked(id.nid, 0);
+ LASSERT(ni != NULL);
+
+ lnet_ni_lock(ni);
+ LASSERT(ni->ni_status == NULL);
+ ni->ni_status = ns;
+ lnet_ni_unlock(ni);
+
+ lnet_ni_decref_locked(ni, 0);
+ lnet_net_unlock(0);
+ }
+
+ the_lnet.ln_ping_info = pinfo;
+ return 0;
+}
+
+static void
+lnet_destroy_ping_info(void)
+{
+ struct lnet_ni *ni;
+
+ lnet_net_lock(0);
+
+ list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
+ lnet_ni_lock(ni);
+ ni->ni_status = NULL;
+ lnet_ni_unlock(ni);
+ }
+
+ lnet_net_unlock(0);
+
+ LIBCFS_FREE(the_lnet.ln_ping_info,
+ offsetof(lnet_ping_info_t,
+ pi_ni[the_lnet.ln_ping_info->pi_nnis]));
+ the_lnet.ln_ping_info = NULL;
+ return;
+}
+
+int
+lnet_ping_target_init(void)
+{
+ lnet_md_t md = {0};
+ lnet_handle_me_t meh;
+ lnet_process_id_t id;
+ int rc;
+ int rc2;
+ int infosz;
+
+ rc = lnet_create_ping_info();
+ if (rc != 0)
+ return rc;
+
+ /* We can have a tiny EQ since we only need to see the unlink event on
+ * teardown, which by definition is the last one! */
+ rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &the_lnet.ln_ping_target_eq);
+ if (rc != 0) {
+ CERROR("Can't allocate ping EQ: %d\n", rc);
+ goto failed_0;
+ }
+
+ memset(&id, 0, sizeof(lnet_process_id_t));
+ id.nid = LNET_NID_ANY;
+ id.pid = LNET_PID_ANY;
+
+ rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
+ LNET_PROTO_PING_MATCHBITS, 0,
+ LNET_UNLINK, LNET_INS_AFTER,
+ &meh);
+ if (rc != 0) {
+ CERROR("Can't create ping ME: %d\n", rc);
+ goto failed_1;
+ }
+
+ /* initialize md content */
+ infosz = offsetof(lnet_ping_info_t,
+ pi_ni[the_lnet.ln_ping_info->pi_nnis]);
+ md.start = the_lnet.ln_ping_info;
+ md.length = infosz;
+ md.threshold = LNET_MD_THRESH_INF;
+ md.max_size = 0;
+ md.options = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
+ LNET_MD_MANAGE_REMOTE;
+ md.user_ptr = NULL;
+ md.eq_handle = the_lnet.ln_ping_target_eq;
+
+ rc = LNetMDAttach(meh, md,
+ LNET_RETAIN,
+ &the_lnet.ln_ping_target_md);
+ if (rc != 0) {
+ CERROR("Can't attach ping MD: %d\n", rc);
+ goto failed_2;
+ }
+
+ return 0;
+
+ failed_2:
+ rc2 = LNetMEUnlink(meh);
+ LASSERT (rc2 == 0);
+ failed_1:
+ rc2 = LNetEQFree(the_lnet.ln_ping_target_eq);
+ LASSERT (rc2 == 0);
+ failed_0:
+ lnet_destroy_ping_info();
+ return rc;
+}
+
+void
+lnet_ping_target_fini(void)
+{
+ lnet_event_t event;
+ int rc;
+ int which;
+ int timeout_ms = 1000;
+ sigset_t blocked = cfs_block_allsigs();
+
+ LNetMDUnlink(the_lnet.ln_ping_target_md);
+ /* NB md could be busy; this just starts the unlink */
+
+ for (;;) {
+ rc = LNetEQPoll(&the_lnet.ln_ping_target_eq, 1,
+ timeout_ms, &event, &which);
+
+ /* I expect overflow... */
+ LASSERT (rc >= 0 || rc == -EOVERFLOW);
+
+ if (rc == 0) {
+ /* timed out: provide a diagnostic */
+ CWARN("Still waiting for ping MD to unlink\n");
+ timeout_ms *= 2;
+ continue;
+ }
+
+ /* Got a valid event */
+ if (event.unlinked)
+ break;
+ }
+
+ rc = LNetEQFree(the_lnet.ln_ping_target_eq);
+ LASSERT (rc == 0);
+ lnet_destroy_ping_info();
+ cfs_restore_sigs(blocked);
+}
+
+int
+lnet_ping (lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_ids)
+{
+ lnet_handle_eq_t eqh;
+ lnet_handle_md_t mdh;
+ lnet_event_t event;
+ lnet_md_t md = {0};
+ int which;
+ int unlinked = 0;
+ int replied = 0;
+ const int a_long_time = 60000; /* mS */
+ int infosz = offsetof(lnet_ping_info_t, pi_ni[n_ids]);
+ lnet_ping_info_t *info;
+ lnet_process_id_t tmpid;
+ int i;
+ int nob;
+ int rc;
+ int rc2;
+ sigset_t blocked;
+
+ if (n_ids <= 0 ||
+ id.nid == LNET_NID_ANY ||
+ timeout_ms > 500000 || /* arbitrary limit! */
+ n_ids > 20) /* arbitrary limit! */
+ return -EINVAL;
+
+ if (id.pid == LNET_PID_ANY)
+ id.pid = LUSTRE_SRV_LNET_PID;
+
+ LIBCFS_ALLOC(info, infosz);
+ if (info == NULL)
+ return -ENOMEM;
+
+ /* NB 2 events max (including any unlink event) */
+ rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
+ if (rc != 0) {
+ CERROR("Can't allocate EQ: %d\n", rc);
+ goto out_0;
+ }
+
+ /* initialize md content */
+ md.start = info;
+ md.length = infosz;
+ md.threshold = 2; /*GET/REPLY*/
+ md.max_size = 0;
+ md.options = LNET_MD_TRUNCATE;
+ md.user_ptr = NULL;
+ md.eq_handle = eqh;
+
+ rc = LNetMDBind(md, LNET_UNLINK, &mdh);
+ if (rc != 0) {
+ CERROR("Can't bind MD: %d\n", rc);
+ goto out_1;
+ }
+
+ rc = LNetGet(LNET_NID_ANY, mdh, id,
+ LNET_RESERVED_PORTAL,
+ LNET_PROTO_PING_MATCHBITS, 0);
+
+ if (rc != 0) {
+ /* Don't CERROR; this could be deliberate! */
+
+ rc2 = LNetMDUnlink(mdh);
+ LASSERT (rc2 == 0);
+
+ /* NB must wait for the UNLINK event below... */
+ unlinked = 1;
+ timeout_ms = a_long_time;
+ }
+
+ do {
+ /* MUST block for unlink to complete */
+ if (unlinked)
+ blocked = cfs_block_allsigs();
+
+ rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which);
+
+ if (unlinked)
+ cfs_restore_sigs(blocked);
+
+ CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
+ (rc2 <= 0) ? -1 : event.type,
+ (rc2 <= 0) ? -1 : event.status,
+ (rc2 > 0 && event.unlinked) ? " unlinked" : "");
+
+ LASSERT (rc2 != -EOVERFLOW); /* can't miss anything */
+
+ if (rc2 <= 0 || event.status != 0) {
+ /* timeout or error */
+ if (!replied && rc == 0)
+ rc = (rc2 < 0) ? rc2 :
+ (rc2 == 0) ? -ETIMEDOUT :
+ event.status;
+
+ if (!unlinked) {
+ /* Ensure completion in finite time... */
+ LNetMDUnlink(mdh);
+ /* No assertion (racing with network) */
+ unlinked = 1;
+ timeout_ms = a_long_time;
+ } else if (rc2 == 0) {
+ /* timed out waiting for unlink */
+ CWARN("ping %s: late network completion\n",
+ libcfs_id2str(id));
+ }
+ } else if (event.type == LNET_EVENT_REPLY) {
+ replied = 1;
+ rc = event.mlength;
+ }
+
+ } while (rc2 <= 0 || !event.unlinked);
+
+ if (!replied) {
+ if (rc >= 0)
+ CWARN("%s: Unexpected rc >= 0 but no reply!\n",
+ libcfs_id2str(id));
+ rc = -EIO;
+ goto out_1;
+ }
+
+ nob = rc;
+ LASSERT (nob >= 0 && nob <= infosz);
+
+ rc = -EPROTO; /* if I can't parse... */
+
+ if (nob < 8) {
+ /* can't check magic/version */
+ CERROR("%s: ping info too short %d\n",
+ libcfs_id2str(id), nob);
+ goto out_1;
+ }
+
+ if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
+ lnet_swap_pinginfo(info);
+ } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
+ CERROR("%s: Unexpected magic %08x\n",
+ libcfs_id2str(id), info->pi_magic);
+ goto out_1;
+ }
+
+ if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
+ CERROR("%s: ping w/o NI status: 0x%x\n",
+ libcfs_id2str(id), info->pi_features);
+ goto out_1;
+ }
+
+ if (nob < offsetof(lnet_ping_info_t, pi_ni[0])) {
+ CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
+ nob, (int)offsetof(lnet_ping_info_t, pi_ni[0]));
+ goto out_1;
+ }
+
+ if (info->pi_nnis < n_ids)
+ n_ids = info->pi_nnis;
+
+ if (nob < offsetof(lnet_ping_info_t, pi_ni[n_ids])) {
+ CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
+ nob, (int)offsetof(lnet_ping_info_t, pi_ni[n_ids]));
+ goto out_1;
+ }
+
+ rc = -EFAULT; /* If I SEGV... */
+
+ for (i = 0; i < n_ids; i++) {
+ tmpid.pid = info->pi_pid;
+ tmpid.nid = info->pi_ni[i].ns_nid;
+ if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
+ goto out_1;
+ }
+ rc = info->pi_nnis;
+
+ out_1:
+ rc2 = LNetEQFree(eqh);
+ if (rc2 != 0)
+ CERROR("rc2 %d\n", rc2);
+ LASSERT (rc2 == 0);
+
+ out_0:
+ LIBCFS_FREE(info, infosz);
+ return rc;
+}
diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
new file mode 100644
index 000000000000..28711e6e8b03
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/config.c
@@ -0,0 +1,1264 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/lnet/lib-lnet.h>
+
+typedef struct { /* tmp struct for parsing routes */
+ struct list_head ltb_list; /* stash on lists */
+ int ltb_size; /* allocated size */
+ char ltb_text[0]; /* text buffer */
+} lnet_text_buf_t;
+
+static int lnet_tbnob = 0; /* track text buf allocation */
+#define LNET_MAX_TEXTBUF_NOB (64<<10) /* bound allocation */
+#define LNET_SINGLE_TEXTBUF_NOB (4<<10)
+
+void
+lnet_syntax(char *name, char *str, int offset, int width)
+{
+ static char dots[LNET_SINGLE_TEXTBUF_NOB];
+ static char dashes[LNET_SINGLE_TEXTBUF_NOB];
+
+ memset(dots, '.', sizeof(dots));
+ dots[sizeof(dots)-1] = 0;
+ memset(dashes, '-', sizeof(dashes));
+ dashes[sizeof(dashes)-1] = 0;
+
+ LCONSOLE_ERROR_MSG(0x10f, "Error parsing '%s=\"%s\"'\n", name, str);
+ LCONSOLE_ERROR_MSG(0x110, "here...........%.*s..%.*s|%.*s|\n",
+ (int)strlen(name), dots, offset, dots,
+ (width < 1) ? 0 : width - 1, dashes);
+}
+
+int
+lnet_issep (char c)
+{
+ switch (c) {
+ case '\n':
+ case '\r':
+ case ';':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+int
+lnet_net_unique(__u32 net, struct list_head *nilist)
+{
+ struct list_head *tmp;
+ lnet_ni_t *ni;
+
+ list_for_each (tmp, nilist) {
+ ni = list_entry(tmp, lnet_ni_t, ni_list);
+
+ if (LNET_NIDNET(ni->ni_nid) == net)
+ return 0;
+ }
+
+ return 1;
+}
+
+void
+lnet_ni_free(struct lnet_ni *ni)
+{
+ if (ni->ni_refs != NULL)
+ cfs_percpt_free(ni->ni_refs);
+
+ if (ni->ni_tx_queues != NULL)
+ cfs_percpt_free(ni->ni_tx_queues);
+
+ if (ni->ni_cpts != NULL)
+ cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
+
+ LIBCFS_FREE(ni, sizeof(*ni));
+}
+
+lnet_ni_t *
+lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
+{
+ struct lnet_tx_queue *tq;
+ struct lnet_ni *ni;
+ int rc;
+ int i;
+
+ if (!lnet_net_unique(net, nilist)) {
+ LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n",
+ libcfs_net2str(net));
+ return NULL;
+ }
+
+ LIBCFS_ALLOC(ni, sizeof(*ni));
+ if (ni == NULL) {
+ CERROR("Out of memory creating network %s\n",
+ libcfs_net2str(net));
+ return NULL;
+ }
+
+ spin_lock_init(&ni->ni_lock);
+ INIT_LIST_HEAD(&ni->ni_cptlist);
+ ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(),
+ sizeof(*ni->ni_refs[0]));
+ if (ni->ni_refs == NULL)
+ goto failed;
+
+ ni->ni_tx_queues = cfs_percpt_alloc(lnet_cpt_table(),
+ sizeof(*ni->ni_tx_queues[0]));
+ if (ni->ni_tx_queues == NULL)
+ goto failed;
+
+ cfs_percpt_for_each(tq, i, ni->ni_tx_queues)
+ INIT_LIST_HEAD(&tq->tq_delayed);
+
+ if (el == NULL) {
+ ni->ni_cpts = NULL;
+ ni->ni_ncpts = LNET_CPT_NUMBER;
+ } else {
+ rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts);
+ if (rc <= 0) {
+ CERROR("Failed to set CPTs for NI %s: %d\n",
+ libcfs_net2str(net), rc);
+ goto failed;
+ }
+
+ LASSERT(rc <= LNET_CPT_NUMBER);
+ if (rc == LNET_CPT_NUMBER) {
+ LIBCFS_FREE(ni->ni_cpts, rc * sizeof(ni->ni_cpts[0]));
+ ni->ni_cpts = NULL;
+ }
+
+ ni->ni_ncpts = rc;
+ }
+
+ /* LND will fill in the address part of the NID */
+ ni->ni_nid = LNET_MKNID(net, 0);
+ ni->ni_last_alive = cfs_time_current_sec();
+ list_add_tail(&ni->ni_list, nilist);
+ return ni;
+ failed:
+ lnet_ni_free(ni);
+ return NULL;
+}
+
+int
+lnet_parse_networks(struct list_head *nilist, char *networks)
+{
+ struct cfs_expr_list *el = NULL;
+ int tokensize = strlen(networks) + 1;
+ char *tokens;
+ char *str;
+ char *tmp;
+ struct lnet_ni *ni;
+ __u32 net;
+ int nnets = 0;
+
+ if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) {
+ /* _WAY_ conservative */
+ LCONSOLE_ERROR_MSG(0x112, "Can't parse networks: string too "
+ "long\n");
+ return -EINVAL;
+ }
+
+ LIBCFS_ALLOC(tokens, tokensize);
+ if (tokens == NULL) {
+ CERROR("Can't allocate net tokens\n");
+ return -ENOMEM;
+ }
+
+ the_lnet.ln_network_tokens = tokens;
+ the_lnet.ln_network_tokens_nob = tokensize;
+ memcpy (tokens, networks, tokensize);
+ str = tmp = tokens;
+
+ /* Add in the loopback network */
+ ni = lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, nilist);
+ if (ni == NULL)
+ goto failed;
+
+ while (str != NULL && *str != 0) {
+ char *comma = strchr(str, ',');
+ char *bracket = strchr(str, '(');
+ char *square = strchr(str, '[');
+ char *iface;
+ int niface;
+ int rc;
+
+ /* NB we don't check interface conflicts here; it's the LNDs
+ * responsibility (if it cares at all) */
+
+ if (square != NULL && (comma == NULL || square < comma)) {
+ /* i.e: o2ib0(ib0)[1,2], number between square
+ * brackets are CPTs this NI needs to be bond */
+ if (bracket != NULL && bracket > square) {
+ tmp = square;
+ goto failed_syntax;
+ }
+
+ tmp = strchr(square, ']');
+ if (tmp == NULL) {
+ tmp = square;
+ goto failed_syntax;
+ }
+
+ rc = cfs_expr_list_parse(square, tmp - square + 1,
+ 0, LNET_CPT_NUMBER - 1, &el);
+ if (rc != 0) {
+ tmp = square;
+ goto failed_syntax;
+ }
+
+ while (square <= tmp)
+ *square++ = ' ';
+ }
+
+ if (bracket == NULL ||
+ (comma != NULL && comma < bracket)) {
+
+ /* no interface list specified */
+
+ if (comma != NULL)
+ *comma++ = 0;
+ net = libcfs_str2net(cfs_trimwhite(str));
+
+ if (net == LNET_NIDNET(LNET_NID_ANY)) {
+ LCONSOLE_ERROR_MSG(0x113, "Unrecognised network"
+ " type\n");
+ tmp = str;
+ goto failed_syntax;
+ }
+
+ if (LNET_NETTYP(net) != LOLND && /* LO is implicit */
+ lnet_ni_alloc(net, el, nilist) == NULL)
+ goto failed;
+
+ if (el != NULL) {
+ cfs_expr_list_free(el);
+ el = NULL;
+ }
+
+ str = comma;
+ continue;
+ }
+
+ *bracket = 0;
+ net = libcfs_str2net(cfs_trimwhite(str));
+ if (net == LNET_NIDNET(LNET_NID_ANY)) {
+ tmp = str;
+ goto failed_syntax;
+ }
+
+ nnets++;
+ ni = lnet_ni_alloc(net, el, nilist);
+ if (ni == NULL)
+ goto failed;
+
+ if (el != NULL) {
+ cfs_expr_list_free(el);
+ el = NULL;
+ }
+
+ niface = 0;
+ iface = bracket + 1;
+
+ bracket = strchr(iface, ')');
+ if (bracket == NULL) {
+ tmp = iface;
+ goto failed_syntax;
+ }
+
+ *bracket = 0;
+ do {
+ comma = strchr(iface, ',');
+ if (comma != NULL)
+ *comma++ = 0;
+
+ iface = cfs_trimwhite(iface);
+ if (*iface == 0) {
+ tmp = iface;
+ goto failed_syntax;
+ }
+
+ if (niface == LNET_MAX_INTERFACES) {
+ LCONSOLE_ERROR_MSG(0x115, "Too many interfaces "
+ "for net %s\n",
+ libcfs_net2str(net));
+ goto failed;
+ }
+
+ ni->ni_interfaces[niface++] = iface;
+ iface = comma;
+ } while (iface != NULL);
+
+ str = bracket + 1;
+ comma = strchr(bracket + 1, ',');
+ if (comma != NULL) {
+ *comma = 0;
+ str = cfs_trimwhite(str);
+ if (*str != 0) {
+ tmp = str;
+ goto failed_syntax;
+ }
+ str = comma + 1;
+ continue;
+ }
+
+ str = cfs_trimwhite(str);
+ if (*str != 0) {
+ tmp = str;
+ goto failed_syntax;
+ }
+ }
+
+ LASSERT(!list_empty(nilist));
+ return 0;
+
+ failed_syntax:
+ lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp));
+ failed:
+ while (!list_empty(nilist)) {
+ ni = list_entry(nilist->next, lnet_ni_t, ni_list);
+
+ list_del(&ni->ni_list);
+ lnet_ni_free(ni);
+ }
+
+ if (el != NULL)
+ cfs_expr_list_free(el);
+
+ LIBCFS_FREE(tokens, tokensize);
+ the_lnet.ln_network_tokens = NULL;
+
+ return -EINVAL;
+}
+
+lnet_text_buf_t *
+lnet_new_text_buf (int str_len)
+{
+ lnet_text_buf_t *ltb;
+ int nob;
+
+ /* NB allocate space for the terminating 0 */
+ nob = offsetof(lnet_text_buf_t, ltb_text[str_len + 1]);
+ if (nob > LNET_SINGLE_TEXTBUF_NOB) {
+ /* _way_ conservative for "route net gateway..." */
+ CERROR("text buffer too big\n");
+ return NULL;
+ }
+
+ if (lnet_tbnob + nob > LNET_MAX_TEXTBUF_NOB) {
+ CERROR("Too many text buffers\n");
+ return NULL;
+ }
+
+ LIBCFS_ALLOC(ltb, nob);
+ if (ltb == NULL)
+ return NULL;
+
+ ltb->ltb_size = nob;
+ ltb->ltb_text[0] = 0;
+ lnet_tbnob += nob;
+ return ltb;
+}
+
+void
+lnet_free_text_buf (lnet_text_buf_t *ltb)
+{
+ lnet_tbnob -= ltb->ltb_size;
+ LIBCFS_FREE(ltb, ltb->ltb_size);
+}
+
+void
+lnet_free_text_bufs(struct list_head *tbs)
+{
+ lnet_text_buf_t *ltb;
+
+ while (!list_empty(tbs)) {
+ ltb = list_entry(tbs->next, lnet_text_buf_t, ltb_list);
+
+ list_del(&ltb->ltb_list);
+ lnet_free_text_buf(ltb);
+ }
+}
+
+void
+lnet_print_text_bufs(struct list_head *tbs)
+{
+ struct list_head *tmp;
+ lnet_text_buf_t *ltb;
+
+ list_for_each (tmp, tbs) {
+ ltb = list_entry(tmp, lnet_text_buf_t, ltb_list);
+
+ CDEBUG(D_WARNING, "%s\n", ltb->ltb_text);
+ }
+
+ CDEBUG(D_WARNING, "%d allocated\n", lnet_tbnob);
+}
+
+int
+lnet_str2tbs_sep (struct list_head *tbs, char *str)
+{
+ struct list_head pending;
+ char *sep;
+ int nob;
+ int i;
+ lnet_text_buf_t *ltb;
+
+ INIT_LIST_HEAD(&pending);
+
+ /* Split 'str' into separate commands */
+ for (;;) {
+ /* skip leading whitespace */
+ while (cfs_iswhite(*str))
+ str++;
+
+ /* scan for separator or comment */
+ for (sep = str; *sep != 0; sep++)
+ if (lnet_issep(*sep) || *sep == '#')
+ break;
+
+ nob = (int)(sep - str);
+ if (nob > 0) {
+ ltb = lnet_new_text_buf(nob);
+ if (ltb == NULL) {
+ lnet_free_text_bufs(&pending);
+ return -1;
+ }
+
+ for (i = 0; i < nob; i++)
+ if (cfs_iswhite(str[i]))
+ ltb->ltb_text[i] = ' ';
+ else
+ ltb->ltb_text[i] = str[i];
+
+ ltb->ltb_text[nob] = 0;
+
+ list_add_tail(&ltb->ltb_list, &pending);
+ }
+
+ if (*sep == '#') {
+ /* scan for separator */
+ do {
+ sep++;
+ } while (*sep != 0 && !lnet_issep(*sep));
+ }
+
+ if (*sep == 0)
+ break;
+
+ str = sep + 1;
+ }
+
+ list_splice(&pending, tbs->prev);
+ return 0;
+}
+
+int
+lnet_expand1tb (struct list_head *list,
+ char *str, char *sep1, char *sep2,
+ char *item, int itemlen)
+{
+ int len1 = (int)(sep1 - str);
+ int len2 = strlen(sep2 + 1);
+ lnet_text_buf_t *ltb;
+
+ LASSERT (*sep1 == '[');
+ LASSERT (*sep2 == ']');
+
+ ltb = lnet_new_text_buf(len1 + itemlen + len2);
+ if (ltb == NULL)
+ return -ENOMEM;
+
+ memcpy(ltb->ltb_text, str, len1);
+ memcpy(&ltb->ltb_text[len1], item, itemlen);
+ memcpy(&ltb->ltb_text[len1+itemlen], sep2 + 1, len2);
+ ltb->ltb_text[len1 + itemlen + len2] = 0;
+
+ list_add_tail(&ltb->ltb_list, list);
+ return 0;
+}
+
+int
+lnet_str2tbs_expand (struct list_head *tbs, char *str)
+{
+ char num[16];
+ struct list_head pending;
+ char *sep;
+ char *sep2;
+ char *parsed;
+ char *enditem;
+ int lo;
+ int hi;
+ int stride;
+ int i;
+ int nob;
+ int scanned;
+
+ INIT_LIST_HEAD(&pending);
+
+ sep = strchr(str, '[');
+ if (sep == NULL) /* nothing to expand */
+ return 0;
+
+ sep2 = strchr(sep, ']');
+ if (sep2 == NULL)
+ goto failed;
+
+ for (parsed = sep; parsed < sep2; parsed = enditem) {
+
+ enditem = ++parsed;
+ while (enditem < sep2 && *enditem != ',')
+ enditem++;
+
+ if (enditem == parsed) /* no empty items */
+ goto failed;
+
+ if (sscanf(parsed, "%d-%d/%d%n", &lo, &hi, &stride, &scanned) < 3) {
+
+ if (sscanf(parsed, "%d-%d%n", &lo, &hi, &scanned) < 2) {
+
+ /* simple string enumeration */
+ if (lnet_expand1tb(&pending, str, sep, sep2,
+ parsed, (int)(enditem - parsed)) != 0)
+ goto failed;
+
+ continue;
+ }
+
+ stride = 1;
+ }
+
+ /* range expansion */
+
+ if (enditem != parsed + scanned) /* no trailing junk */
+ goto failed;
+
+ if (hi < 0 || lo < 0 || stride < 0 || hi < lo ||
+ (hi - lo) % stride != 0)
+ goto failed;
+
+ for (i = lo; i <= hi; i += stride) {
+
+ snprintf(num, sizeof(num), "%d", i);
+ nob = strlen(num);
+ if (nob + 1 == sizeof(num))
+ goto failed;
+
+ if (lnet_expand1tb(&pending, str, sep, sep2,
+ num, nob) != 0)
+ goto failed;
+ }
+ }
+
+ list_splice(&pending, tbs->prev);
+ return 1;
+
+ failed:
+ lnet_free_text_bufs(&pending);
+ return -1;
+}
+
+int
+lnet_parse_hops (char *str, unsigned int *hops)
+{
+ int len = strlen(str);
+ int nob = len;
+
+ return (sscanf(str, "%u%n", hops, &nob) >= 1 &&
+ nob == len &&
+ *hops > 0 && *hops < 256);
+}
+
+
+int
+lnet_parse_route (char *str, int *im_a_router)
+{
+ /* static scratch buffer OK (single threaded) */
+ static char cmd[LNET_SINGLE_TEXTBUF_NOB];
+
+ struct list_head nets;
+ struct list_head gateways;
+ struct list_head *tmp1;
+ struct list_head *tmp2;
+ __u32 net;
+ lnet_nid_t nid;
+ lnet_text_buf_t *ltb;
+ int rc;
+ char *sep;
+ char *token = str;
+ int ntokens = 0;
+ int myrc = -1;
+ unsigned int hops;
+ int got_hops = 0;
+
+ INIT_LIST_HEAD(&gateways);
+ INIT_LIST_HEAD(&nets);
+
+ /* save a copy of the string for error messages */
+ strncpy(cmd, str, sizeof(cmd) - 1);
+ cmd[sizeof(cmd) - 1] = 0;
+
+ sep = str;
+ for (;;) {
+ /* scan for token start */
+ while (cfs_iswhite(*sep))
+ sep++;
+ if (*sep == 0) {
+ if (ntokens < (got_hops ? 3 : 2))
+ goto token_error;
+ break;
+ }
+
+ ntokens++;
+ token = sep++;
+
+ /* scan for token end */
+ while (*sep != 0 && !cfs_iswhite(*sep))
+ sep++;
+ if (*sep != 0)
+ *sep++ = 0;
+
+ if (ntokens == 1) {
+ tmp2 = &nets; /* expanding nets */
+ } else if (ntokens == 2 &&
+ lnet_parse_hops(token, &hops)) {
+ got_hops = 1; /* got a hop count */
+ continue;
+ } else {
+ tmp2 = &gateways; /* expanding gateways */
+ }
+
+ ltb = lnet_new_text_buf(strlen(token));
+ if (ltb == NULL)
+ goto out;
+
+ strcpy(ltb->ltb_text, token);
+ tmp1 = &ltb->ltb_list;
+ list_add_tail(tmp1, tmp2);
+
+ while (tmp1 != tmp2) {
+ ltb = list_entry(tmp1, lnet_text_buf_t, ltb_list);
+
+ rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text);
+ if (rc < 0)
+ goto token_error;
+
+ tmp1 = tmp1->next;
+
+ if (rc > 0) { /* expanded! */
+ list_del(&ltb->ltb_list);
+ lnet_free_text_buf(ltb);
+ continue;
+ }
+
+ if (ntokens == 1) {
+ net = libcfs_str2net(ltb->ltb_text);
+ if (net == LNET_NIDNET(LNET_NID_ANY) ||
+ LNET_NETTYP(net) == LOLND)
+ goto token_error;
+ } else {
+ nid = libcfs_str2nid(ltb->ltb_text);
+ if (nid == LNET_NID_ANY ||
+ LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
+ goto token_error;
+ }
+ }
+ }
+
+ if (!got_hops)
+ hops = 1;
+
+ LASSERT (!list_empty(&nets));
+ LASSERT (!list_empty(&gateways));
+
+ list_for_each (tmp1, &nets) {
+ ltb = list_entry(tmp1, lnet_text_buf_t, ltb_list);
+ net = libcfs_str2net(ltb->ltb_text);
+ LASSERT (net != LNET_NIDNET(LNET_NID_ANY));
+
+ list_for_each (tmp2, &gateways) {
+ ltb = list_entry(tmp2, lnet_text_buf_t, ltb_list);
+ nid = libcfs_str2nid(ltb->ltb_text);
+ LASSERT (nid != LNET_NID_ANY);
+
+ if (lnet_islocalnid(nid)) {
+ *im_a_router = 1;
+ continue;
+ }
+
+ rc = lnet_add_route (net, hops, nid);
+ if (rc != 0) {
+ CERROR("Can't create route "
+ "to %s via %s\n",
+ libcfs_net2str(net),
+ libcfs_nid2str(nid));
+ goto out;
+ }
+ }
+ }
+
+ myrc = 0;
+ goto out;
+
+ token_error:
+ lnet_syntax("routes", cmd, (int)(token - str), strlen(token));
+ out:
+ lnet_free_text_bufs(&nets);
+ lnet_free_text_bufs(&gateways);
+ return myrc;
+}
+
+int
+lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router)
+{
+ lnet_text_buf_t *ltb;
+
+ while (!list_empty(tbs)) {
+ ltb = list_entry(tbs->next, lnet_text_buf_t, ltb_list);
+
+ if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) {
+ lnet_free_text_bufs(tbs);
+ return -EINVAL;
+ }
+
+ list_del(&ltb->ltb_list);
+ lnet_free_text_buf(ltb);
+ }
+
+ return 0;
+}
+
+int
+lnet_parse_routes (char *routes, int *im_a_router)
+{
+ struct list_head tbs;
+ int rc = 0;
+
+ *im_a_router = 0;
+
+ INIT_LIST_HEAD(&tbs);
+
+ if (lnet_str2tbs_sep(&tbs, routes) < 0) {
+ CERROR("Error parsing routes\n");
+ rc = -EINVAL;
+ } else {
+ rc = lnet_parse_route_tbs(&tbs, im_a_router);
+ }
+
+ LASSERT (lnet_tbnob == 0);
+ return rc;
+}
+
+int
+lnet_match_network_token(char *token, int len, __u32 *ipaddrs, int nip)
+{
+ LIST_HEAD (list);
+ int rc;
+ int i;
+
+ rc = cfs_ip_addr_parse(token, len, &list);
+ if (rc != 0)
+ return rc;
+
+ for (rc = i = 0; !rc && i < nip; i++)
+ rc = cfs_ip_addr_match(ipaddrs[i], &list);
+
+ cfs_ip_addr_free(&list);
+
+ return rc;
+}
+
+int
+lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip)
+{
+ static char tokens[LNET_SINGLE_TEXTBUF_NOB];
+
+ int matched = 0;
+ int ntokens = 0;
+ int len;
+ char *net = NULL;
+ char *sep;
+ char *token;
+ int rc;
+
+ LASSERT (strlen(net_entry) < sizeof(tokens));
+
+ /* work on a copy of the string */
+ strcpy(tokens, net_entry);
+ sep = tokens;
+ for (;;) {
+ /* scan for token start */
+ while (cfs_iswhite(*sep))
+ sep++;
+ if (*sep == 0)
+ break;
+
+ token = sep++;
+
+ /* scan for token end */
+ while (*sep != 0 && !cfs_iswhite(*sep))
+ sep++;
+ if (*sep != 0)
+ *sep++ = 0;
+
+ if (ntokens++ == 0) {
+ net = token;
+ continue;
+ }
+
+ len = strlen(token);
+
+ rc = lnet_match_network_token(token, len, ipaddrs, nip);
+ if (rc < 0) {
+ lnet_syntax("ip2nets", net_entry,
+ (int)(token - tokens), len);
+ return rc;
+ }
+
+ matched |= (rc != 0);
+ }
+
+ if (!matched)
+ return 0;
+
+ strcpy(net_entry, net); /* replace with matched net */
+ return 1;
+}
+
+__u32
+lnet_netspec2net(char *netspec)
+{
+ char *bracket = strchr(netspec, '(');
+ __u32 net;
+
+ if (bracket != NULL)
+ *bracket = 0;
+
+ net = libcfs_str2net(netspec);
+
+ if (bracket != NULL)
+ *bracket = '(';
+
+ return net;
+}
+
+int
+lnet_splitnets(char *source, struct list_head *nets)
+{
+ int offset = 0;
+ int offset2;
+ int len;
+ lnet_text_buf_t *tb;
+ lnet_text_buf_t *tb2;
+ struct list_head *t;
+ char *sep;
+ char *bracket;
+ __u32 net;
+
+ LASSERT (!list_empty(nets));
+ LASSERT (nets->next == nets->prev); /* single entry */
+
+ tb = list_entry(nets->next, lnet_text_buf_t, ltb_list);
+
+ for (;;) {
+ sep = strchr(tb->ltb_text, ',');
+ bracket = strchr(tb->ltb_text, '(');
+
+ if (sep != NULL &&
+ bracket != NULL &&
+ bracket < sep) {
+ /* netspec lists interfaces... */
+
+ offset2 = offset + (int)(bracket - tb->ltb_text);
+ len = strlen(bracket);
+
+ bracket = strchr(bracket + 1, ')');
+
+ if (bracket == NULL ||
+ !(bracket[1] == ',' || bracket[1] == 0)) {
+ lnet_syntax("ip2nets", source, offset2, len);
+ return -EINVAL;
+ }
+
+ sep = (bracket[1] == 0) ? NULL : bracket + 1;
+ }
+
+ if (sep != NULL)
+ *sep++ = 0;
+
+ net = lnet_netspec2net(tb->ltb_text);
+ if (net == LNET_NIDNET(LNET_NID_ANY)) {
+ lnet_syntax("ip2nets", source, offset,
+ strlen(tb->ltb_text));
+ return -EINVAL;
+ }
+
+ list_for_each(t, nets) {
+ tb2 = list_entry(t, lnet_text_buf_t, ltb_list);
+
+ if (tb2 == tb)
+ continue;
+
+ if (net == lnet_netspec2net(tb2->ltb_text)) {
+ /* duplicate network */
+ lnet_syntax("ip2nets", source, offset,
+ strlen(tb->ltb_text));
+ return -EINVAL;
+ }
+ }
+
+ if (sep == NULL)
+ return 0;
+
+ offset += (int)(sep - tb->ltb_text);
+ tb2 = lnet_new_text_buf(strlen(sep));
+ if (tb2 == NULL)
+ return -ENOMEM;
+
+ strcpy(tb2->ltb_text, sep);
+ list_add_tail(&tb2->ltb_list, nets);
+
+ tb = tb2;
+ }
+}
+
+int
+lnet_match_networks (char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
+{
+ static char networks[LNET_SINGLE_TEXTBUF_NOB];
+ static char source[LNET_SINGLE_TEXTBUF_NOB];
+
+ struct list_head raw_entries;
+ struct list_head matched_nets;
+ struct list_head current_nets;
+ struct list_head *t;
+ struct list_head *t2;
+ lnet_text_buf_t *tb;
+ lnet_text_buf_t *tb2;
+ __u32 net1;
+ __u32 net2;
+ int len;
+ int count;
+ int dup;
+ int rc;
+
+ INIT_LIST_HEAD(&raw_entries);
+ if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) {
+ CERROR("Error parsing ip2nets\n");
+ LASSERT (lnet_tbnob == 0);
+ return -EINVAL;
+ }
+
+ INIT_LIST_HEAD(&matched_nets);
+ INIT_LIST_HEAD(&current_nets);
+ networks[0] = 0;
+ count = 0;
+ len = 0;
+ rc = 0;
+
+ while (!list_empty(&raw_entries)) {
+ tb = list_entry(raw_entries.next, lnet_text_buf_t,
+ ltb_list);
+
+ strncpy(source, tb->ltb_text, sizeof(source)-1);
+ source[sizeof(source)-1] = 0;
+
+ /* replace ltb_text with the network(s) add on match */
+ rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip);
+ if (rc < 0)
+ break;
+
+ list_del(&tb->ltb_list);
+
+ if (rc == 0) { /* no match */
+ lnet_free_text_buf(tb);
+ continue;
+ }
+
+ /* split into separate networks */
+ INIT_LIST_HEAD(&current_nets);
+ list_add(&tb->ltb_list, &current_nets);
+ rc = lnet_splitnets(source, &current_nets);
+ if (rc < 0)
+ break;
+
+ dup = 0;
+ list_for_each (t, &current_nets) {
+ tb = list_entry(t, lnet_text_buf_t, ltb_list);
+ net1 = lnet_netspec2net(tb->ltb_text);
+ LASSERT (net1 != LNET_NIDNET(LNET_NID_ANY));
+
+ list_for_each(t2, &matched_nets) {
+ tb2 = list_entry(t2, lnet_text_buf_t,
+ ltb_list);
+ net2 = lnet_netspec2net(tb2->ltb_text);
+ LASSERT (net2 != LNET_NIDNET(LNET_NID_ANY));
+
+ if (net1 == net2) {
+ dup = 1;
+ break;
+ }
+ }
+
+ if (dup)
+ break;
+ }
+
+ if (dup) {
+ lnet_free_text_bufs(&current_nets);
+ continue;
+ }
+
+ list_for_each_safe(t, t2, &current_nets) {
+ tb = list_entry(t, lnet_text_buf_t, ltb_list);
+
+ list_del(&tb->ltb_list);
+ list_add_tail(&tb->ltb_list, &matched_nets);
+
+ len += snprintf(networks + len, sizeof(networks) - len,
+ "%s%s", (len == 0) ? "" : ",",
+ tb->ltb_text);
+
+ if (len >= sizeof(networks)) {
+ CERROR("Too many matched networks\n");
+ rc = -E2BIG;
+ goto out;
+ }
+ }
+
+ count++;
+ }
+
+ out:
+ lnet_free_text_bufs(&raw_entries);
+ lnet_free_text_bufs(&matched_nets);
+ lnet_free_text_bufs(&current_nets);
+ LASSERT (lnet_tbnob == 0);
+
+ if (rc < 0)
+ return rc;
+
+ *networksp = networks;
+ return count;
+}
+
+void
+lnet_ipaddr_free_enumeration(__u32 *ipaddrs, int nip)
+{
+ LIBCFS_FREE(ipaddrs, nip * sizeof(*ipaddrs));
+}
+
+int
+lnet_ipaddr_enumerate (__u32 **ipaddrsp)
+{
+ int up;
+ __u32 netmask;
+ __u32 *ipaddrs;
+ __u32 *ipaddrs2;
+ int nip;
+ char **ifnames;
+ int nif = libcfs_ipif_enumerate(&ifnames);
+ int i;
+ int rc;
+
+ if (nif <= 0)
+ return nif;
+
+ LIBCFS_ALLOC(ipaddrs, nif * sizeof(*ipaddrs));
+ if (ipaddrs == NULL) {
+ CERROR("Can't allocate ipaddrs[%d]\n", nif);
+ libcfs_ipif_free_enumeration(ifnames, nif);
+ return -ENOMEM;
+ }
+
+ for (i = nip = 0; i < nif; i++) {
+ if (!strcmp(ifnames[i], "lo"))
+ continue;
+
+ rc = libcfs_ipif_query(ifnames[i], &up,
+ &ipaddrs[nip], &netmask);
+ if (rc != 0) {
+ CWARN("Can't query interface %s: %d\n",
+ ifnames[i], rc);
+ continue;
+ }
+
+ if (!up) {
+ CWARN("Ignoring interface %s: it's down\n",
+ ifnames[i]);
+ continue;
+ }
+
+ nip++;
+ }
+
+ libcfs_ipif_free_enumeration(ifnames, nif);
+
+ if (nip == nif) {
+ *ipaddrsp = ipaddrs;
+ } else {
+ if (nip > 0) {
+ LIBCFS_ALLOC(ipaddrs2, nip * sizeof(*ipaddrs2));
+ if (ipaddrs2 == NULL) {
+ CERROR("Can't allocate ipaddrs[%d]\n", nip);
+ nip = -ENOMEM;
+ } else {
+ memcpy(ipaddrs2, ipaddrs,
+ nip * sizeof(*ipaddrs));
+ *ipaddrsp = ipaddrs2;
+ rc = nip;
+ }
+ }
+ lnet_ipaddr_free_enumeration(ipaddrs, nif);
+ }
+ return nip;
+}
+
+int
+lnet_parse_ip2nets (char **networksp, char *ip2nets)
+{
+ __u32 *ipaddrs;
+ int nip = lnet_ipaddr_enumerate(&ipaddrs);
+ int rc;
+
+ if (nip < 0) {
+ LCONSOLE_ERROR_MSG(0x117, "Error %d enumerating local IP "
+ "interfaces for ip2nets to match\n", nip);
+ return nip;
+ }
+
+ if (nip == 0) {
+ LCONSOLE_ERROR_MSG(0x118, "No local IP interfaces "
+ "for ip2nets to match\n");
+ return -ENOENT;
+ }
+
+ rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip);
+ lnet_ipaddr_free_enumeration(ipaddrs, nip);
+
+ if (rc < 0) {
+ LCONSOLE_ERROR_MSG(0x119, "Error %d parsing ip2nets\n", rc);
+ return rc;
+ }
+
+ if (rc == 0) {
+ LCONSOLE_ERROR_MSG(0x11a, "ip2nets does not match "
+ "any local IP interfaces\n");
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+int
+lnet_set_ip_niaddr (lnet_ni_t *ni)
+{
+ __u32 net = LNET_NIDNET(ni->ni_nid);
+ char **names;
+ int n;
+ __u32 ip;
+ __u32 netmask;
+ int up;
+ int i;
+ int rc;
+
+ /* Convenience for LNDs that use the IP address of a local interface as
+ * the local address part of their NID */
+
+ if (ni->ni_interfaces[0] != NULL) {
+
+ CLASSERT (LNET_MAX_INTERFACES > 1);
+
+ if (ni->ni_interfaces[1] != NULL) {
+ CERROR("Net %s doesn't support multiple interfaces\n",
+ libcfs_net2str(net));
+ return -EPERM;
+ }
+
+ rc = libcfs_ipif_query(ni->ni_interfaces[0],
+ &up, &ip, &netmask);
+ if (rc != 0) {
+ CERROR("Net %s can't query interface %s: %d\n",
+ libcfs_net2str(net), ni->ni_interfaces[0], rc);
+ return -EPERM;
+ }
+
+ if (!up) {
+ CERROR("Net %s can't use interface %s: it's down\n",
+ libcfs_net2str(net), ni->ni_interfaces[0]);
+ return -ENETDOWN;
+ }
+
+ ni->ni_nid = LNET_MKNID(net, ip);
+ return 0;
+ }
+
+ n = libcfs_ipif_enumerate(&names);
+ if (n <= 0) {
+ CERROR("Net %s can't enumerate interfaces: %d\n",
+ libcfs_net2str(net), n);
+ return 0;
+ }
+
+ for (i = 0; i < n; i++) {
+ if (!strcmp(names[i], "lo")) /* skip the loopback IF */
+ continue;
+
+ rc = libcfs_ipif_query(names[i], &up, &ip, &netmask);
+
+ if (rc != 0) {
+ CWARN("Net %s can't query interface %s: %d\n",
+ libcfs_net2str(net), names[i], rc);
+ continue;
+ }
+
+ if (!up) {
+ CWARN("Net %s ignoring interface %s (down)\n",
+ libcfs_net2str(net), names[i]);
+ continue;
+ }
+
+ libcfs_ipif_free_enumeration(names, n);
+ ni->ni_nid = LNET_MKNID(net, ip);
+ return 0;
+ }
+
+ CERROR("Net %s can't find any interfaces\n", libcfs_net2str(net));
+ libcfs_ipif_free_enumeration(names, n);
+ return -ENOENT;
+}
+EXPORT_SYMBOL(lnet_set_ip_niaddr);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c
new file mode 100644
index 000000000000..78297a7d94e8
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/lib-eq.c
@@ -0,0 +1,447 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/lnet/lib-eq.c
+ *
+ * Library level Event queue management routines
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/lnet/lib-lnet.h>
+
+/**
+ * Create an event queue that has room for \a count number of events.
+ *
+ * The event queue is circular and older events will be overwritten by new
+ * ones if they are not removed in time by the user using the functions
+ * LNetEQGet(), LNetEQWait(), or LNetEQPoll(). It is up to the user to
+ * determine the appropriate size of the event queue to prevent this loss
+ * of events. Note that when EQ handler is specified in \a callback, no
+ * event loss can happen, since the handler is run for each event deposited
+ * into the EQ.
+ *
+ * \param count The number of events to be stored in the event queue. It
+ * will be rounded up to the next power of two.
+ * \param callback A handler function that runs when an event is deposited
+ * into the EQ. The constant value LNET_EQ_HANDLER_NONE can be used to
+ * indicate that no event handler is desired.
+ * \param handle On successful return, this location will hold a handle for
+ * the newly created EQ.
+ *
+ * \retval 0 On success.
+ * \retval -EINVAL If an parameter is not valid.
+ * \retval -ENOMEM If memory for the EQ can't be allocated.
+ *
+ * \see lnet_eq_handler_t for the discussion on EQ handler semantics.
+ */
+int
+LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback,
+ lnet_handle_eq_t *handle)
+{
+ lnet_eq_t *eq;
+
+ LASSERT (the_lnet.ln_init);
+ LASSERT (the_lnet.ln_refcount > 0);
+
+ /* We need count to be a power of 2 so that when eq_{enq,deq}_seq
+ * overflow, they don't skip entries, so the queue has the same
+ * apparent capacity at all times */
+
+ count = cfs_power2_roundup(count);
+
+ if (callback != LNET_EQ_HANDLER_NONE && count != 0) {
+ CWARN("EQ callback is guaranteed to get every event, "
+ "do you still want to set eqcount %d for polling "
+ "event which will have locking overhead? "
+ "Please contact with developer to confirm\n", count);
+ }
+
+ /* count can be 0 if only need callback, we can eliminate
+ * overhead of enqueue event */
+ if (count == 0 && callback == LNET_EQ_HANDLER_NONE)
+ return -EINVAL;
+
+ eq = lnet_eq_alloc();
+ if (eq == NULL)
+ return -ENOMEM;
+
+ if (count != 0) {
+ LIBCFS_ALLOC(eq->eq_events, count * sizeof(lnet_event_t));
+ if (eq->eq_events == NULL)
+ goto failed;
+ /* NB allocator has set all event sequence numbers to 0,
+ * so all them should be earlier than eq_deq_seq */
+ }
+
+ eq->eq_deq_seq = 1;
+ eq->eq_enq_seq = 1;
+ eq->eq_size = count;
+ eq->eq_callback = callback;
+
+ eq->eq_refs = cfs_percpt_alloc(lnet_cpt_table(),
+ sizeof(*eq->eq_refs[0]));
+ if (eq->eq_refs == NULL)
+ goto failed;
+
+ /* MUST hold both exclusive lnet_res_lock */
+ lnet_res_lock(LNET_LOCK_EX);
+ /* NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
+ * both EQ lookup and poll event with only lnet_eq_wait_lock */
+ lnet_eq_wait_lock();
+
+ lnet_res_lh_initialize(&the_lnet.ln_eq_container, &eq->eq_lh);
+ list_add(&eq->eq_list, &the_lnet.ln_eq_container.rec_active);
+
+ lnet_eq_wait_unlock();
+ lnet_res_unlock(LNET_LOCK_EX);
+
+ lnet_eq2handle(handle, eq);
+ return 0;
+
+failed:
+ if (eq->eq_events != NULL)
+ LIBCFS_FREE(eq->eq_events, count * sizeof(lnet_event_t));
+
+ if (eq->eq_refs != NULL)
+ cfs_percpt_free(eq->eq_refs);
+
+ lnet_eq_free(eq);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(LNetEQAlloc);
+
+/**
+ * Release the resources associated with an event queue if it's idle;
+ * otherwise do nothing and it's up to the user to try again.
+ *
+ * \param eqh A handle for the event queue to be released.
+ *
+ * \retval 0 If the EQ is not in use and freed.
+ * \retval -ENOENT If \a eqh does not point to a valid EQ.
+ * \retval -EBUSY If the EQ is still in use by some MDs.
+ */
+int
+LNetEQFree(lnet_handle_eq_t eqh)
+{
+ struct lnet_eq *eq;
+ lnet_event_t *events = NULL;
+ int **refs = NULL;
+ int *ref;
+ int rc = 0;
+ int size = 0;
+ int i;
+
+ LASSERT(the_lnet.ln_init);
+ LASSERT(the_lnet.ln_refcount > 0);
+
+ lnet_res_lock(LNET_LOCK_EX);
+ /* NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
+ * both EQ lookup and poll event with only lnet_eq_wait_lock */
+ lnet_eq_wait_lock();
+
+ eq = lnet_handle2eq(&eqh);
+ if (eq == NULL) {
+ rc = -ENOENT;
+ goto out;
+ }
+
+ cfs_percpt_for_each(ref, i, eq->eq_refs) {
+ LASSERT(*ref >= 0);
+ if (*ref == 0)
+ continue;
+
+ CDEBUG(D_NET, "Event equeue (%d: %d) busy on destroy.\n",
+ i, *ref);
+ rc = -EBUSY;
+ goto out;
+ }
+
+ /* stash for free after lock dropped */
+ events = eq->eq_events;
+ size = eq->eq_size;
+ refs = eq->eq_refs;
+
+ lnet_res_lh_invalidate(&eq->eq_lh);
+ list_del(&eq->eq_list);
+ lnet_eq_free_locked(eq);
+ out:
+ lnet_eq_wait_unlock();
+ lnet_res_unlock(LNET_LOCK_EX);
+
+ if (events != NULL)
+ LIBCFS_FREE(events, size * sizeof(lnet_event_t));
+ if (refs != NULL)
+ cfs_percpt_free(refs);
+
+ return rc;
+}
+EXPORT_SYMBOL(LNetEQFree);
+
+void
+lnet_eq_enqueue_event(lnet_eq_t *eq, lnet_event_t *ev)
+{
+ /* MUST called with resource lock hold but w/o lnet_eq_wait_lock */
+ int index;
+
+ if (eq->eq_size == 0) {
+ LASSERT(eq->eq_callback != LNET_EQ_HANDLER_NONE);
+ eq->eq_callback(ev);
+ return;
+ }
+
+ lnet_eq_wait_lock();
+ ev->sequence = eq->eq_enq_seq++;
+
+ LASSERT(eq->eq_size == LOWEST_BIT_SET(eq->eq_size));
+ index = ev->sequence & (eq->eq_size - 1);
+
+ eq->eq_events[index] = *ev;
+
+ if (eq->eq_callback != LNET_EQ_HANDLER_NONE)
+ eq->eq_callback(ev);
+
+ /* Wake anyone waiting in LNetEQPoll() */
+ if (waitqueue_active(&the_lnet.ln_eq_waitq))
+ wake_up_all(&the_lnet.ln_eq_waitq);
+ lnet_eq_wait_unlock();
+}
+
+int
+lnet_eq_dequeue_event(lnet_eq_t *eq, lnet_event_t *ev)
+{
+ int new_index = eq->eq_deq_seq & (eq->eq_size - 1);
+ lnet_event_t *new_event = &eq->eq_events[new_index];
+ int rc;
+ ENTRY;
+
+ /* must called with lnet_eq_wait_lock hold */
+ if (LNET_SEQ_GT(eq->eq_deq_seq, new_event->sequence))
+ RETURN(0);
+
+ /* We've got a new event... */
+ *ev = *new_event;
+
+ CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
+ new_event, eq->eq_deq_seq, eq->eq_size);
+
+ /* ...but did it overwrite an event we've not seen yet? */
+ if (eq->eq_deq_seq == new_event->sequence) {
+ rc = 1;
+ } else {
+ /* don't complain with CERROR: some EQs are sized small
+ * anyway; if it's important, the caller should complain */
+ CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n",
+ eq->eq_deq_seq, new_event->sequence);
+ rc = -EOVERFLOW;
+ }
+
+ eq->eq_deq_seq = new_event->sequence + 1;
+ RETURN(rc);
+}
+
+/**
+ * A nonblocking function that can be used to get the next event in an EQ.
+ * If an event handler is associated with the EQ, the handler will run before
+ * this function returns successfully. The event is removed from the queue.
+ *
+ * \param eventq A handle for the event queue.
+ * \param event On successful return (1 or -EOVERFLOW), this location will
+ * hold the next event in the EQ.
+ *
+ * \retval 0 No pending event in the EQ.
+ * \retval 1 Indicates success.
+ * \retval -ENOENT If \a eventq does not point to a valid EQ.
+ * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
+ * at least one event between this event and the last event obtained from the
+ * EQ has been dropped due to limited space in the EQ.
+ */
+int
+LNetEQGet (lnet_handle_eq_t eventq, lnet_event_t *event)
+{
+ int which;
+
+ return LNetEQPoll(&eventq, 1, 0,
+ event, &which);
+}
+EXPORT_SYMBOL(LNetEQGet);
+
+/**
+ * Block the calling process until there is an event in the EQ.
+ * If an event handler is associated with the EQ, the handler will run before
+ * this function returns successfully. This function returns the next event
+ * in the EQ and removes it from the EQ.
+ *
+ * \param eventq A handle for the event queue.
+ * \param event On successful return (1 or -EOVERFLOW), this location will
+ * hold the next event in the EQ.
+ *
+ * \retval 1 Indicates success.
+ * \retval -ENOENT If \a eventq does not point to a valid EQ.
+ * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
+ * at least one event between this event and the last event obtained from the
+ * EQ has been dropped due to limited space in the EQ.
+ */
+int
+LNetEQWait (lnet_handle_eq_t eventq, lnet_event_t *event)
+{
+ int which;
+
+ return LNetEQPoll(&eventq, 1, LNET_TIME_FOREVER,
+ event, &which);
+}
+EXPORT_SYMBOL(LNetEQWait);
+
+
+static int
+lnet_eq_wait_locked(int *timeout_ms)
+{
+ int tms = *timeout_ms;
+ int wait;
+ wait_queue_t wl;
+ cfs_time_t now;
+
+ if (tms == 0)
+ return -1; /* don't want to wait and no new event */
+
+ init_waitqueue_entry_current(&wl);
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&the_lnet.ln_eq_waitq, &wl);
+
+ lnet_eq_wait_unlock();
+
+ if (tms < 0) {
+ waitq_wait(&wl, TASK_INTERRUPTIBLE);
+
+ } else {
+ struct timeval tv;
+
+ now = cfs_time_current();
+ waitq_timedwait(&wl, TASK_INTERRUPTIBLE,
+ cfs_time_seconds(tms) / 1000);
+ cfs_duration_usec(cfs_time_sub(cfs_time_current(), now), &tv);
+ tms -= (int)(tv.tv_sec * 1000 + tv.tv_usec / 1000);
+ if (tms < 0) /* no more wait but may have new event */
+ tms = 0;
+ }
+
+ wait = tms != 0; /* might need to call here again */
+ *timeout_ms = tms;
+
+ lnet_eq_wait_lock();
+ remove_wait_queue(&the_lnet.ln_eq_waitq, &wl);
+
+ return wait;
+}
+
+
+
+/**
+ * Block the calling process until there's an event from a set of EQs or
+ * timeout happens.
+ *
+ * If an event handler is associated with the EQ, the handler will run before
+ * this function returns successfully, in which case the corresponding event
+ * is consumed.
+ *
+ * LNetEQPoll() provides a timeout to allow applications to poll, block for a
+ * fixed period, or block indefinitely.
+ *
+ * \param eventqs,neq An array of EQ handles, and size of the array.
+ * \param timeout_ms Time in milliseconds to wait for an event to occur on
+ * one of the EQs. The constant LNET_TIME_FOREVER can be used to indicate an
+ * infinite timeout.
+ * \param event,which On successful return (1 or -EOVERFLOW), \a event will
+ * hold the next event in the EQs, and \a which will contain the index of the
+ * EQ from which the event was taken.
+ *
+ * \retval 0 No pending event in the EQs after timeout.
+ * \retval 1 Indicates success.
+ * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
+ * at least one event between this event and the last event obtained from the
+ * EQ indicated by \a which has been dropped due to limited space in the EQ.
+ * \retval -ENOENT If there's an invalid handle in \a eventqs.
+ */
+int
+LNetEQPoll(lnet_handle_eq_t *eventqs, int neq, int timeout_ms,
+ lnet_event_t *event, int *which)
+{
+ int wait = 1;
+ int rc;
+ int i;
+ ENTRY;
+
+ LASSERT (the_lnet.ln_init);
+ LASSERT (the_lnet.ln_refcount > 0);
+
+ if (neq < 1)
+ RETURN(-ENOENT);
+
+ lnet_eq_wait_lock();
+
+ for (;;) {
+ for (i = 0; i < neq; i++) {
+ lnet_eq_t *eq = lnet_handle2eq(&eventqs[i]);
+
+ if (eq == NULL) {
+ lnet_eq_wait_unlock();
+ RETURN(-ENOENT);
+ }
+
+ rc = lnet_eq_dequeue_event(eq, event);
+ if (rc != 0) {
+ lnet_eq_wait_unlock();
+ *which = i;
+ RETURN(rc);
+ }
+ }
+
+ if (wait == 0)
+ break;
+
+ /*
+ * return value of lnet_eq_wait_locked:
+ * -1 : did nothing and it's sure no new event
+ * 1 : sleep inside and wait until new event
+ * 0 : don't want to wait anymore, but might have new event
+ * so need to call dequeue again
+ */
+ wait = lnet_eq_wait_locked(&timeout_ms);
+ if (wait < 0) /* no new event */
+ break;
+ }
+
+ lnet_eq_wait_unlock();
+ RETURN(0);
+}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c
new file mode 100644
index 000000000000..ae643f26933b
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/lib-md.c
@@ -0,0 +1,451 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/lnet/lib-md.c
+ *
+ * Memory Descriptor management routines
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/lnet/lib-lnet.h>
+
+/* must be called with lnet_res_lock held */
+void
+lnet_md_unlink(lnet_libmd_t *md)
+{
+ if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) == 0) {
+ /* first unlink attempt... */
+ lnet_me_t *me = md->md_me;
+
+ md->md_flags |= LNET_MD_FLAG_ZOMBIE;
+
+ /* Disassociate from ME (if any), and unlink it if it was created
+ * with LNET_UNLINK */
+ if (me != NULL) {
+ /* detach MD from portal */
+ lnet_ptl_detach_md(me, md);
+ if (me->me_unlink == LNET_UNLINK)
+ lnet_me_unlink(me);
+ }
+
+ /* ensure all future handle lookups fail */
+ lnet_res_lh_invalidate(&md->md_lh);
+ }
+
+ if (md->md_refcount != 0) {
+ CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
+ return;
+ }
+
+ CDEBUG(D_NET, "Unlinking md %p\n", md);
+
+ if (md->md_eq != NULL) {
+ int cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
+
+ LASSERT(*md->md_eq->eq_refs[cpt] > 0);
+ (*md->md_eq->eq_refs[cpt])--;
+ }
+
+ LASSERT(!list_empty(&md->md_list));
+ list_del_init(&md->md_list);
+ lnet_md_free_locked(md);
+}
+
+static int
+lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink)
+{
+ int i;
+ unsigned int niov;
+ int total_length = 0;
+
+ lmd->md_me = NULL;
+ lmd->md_start = umd->start;
+ lmd->md_offset = 0;
+ lmd->md_max_size = umd->max_size;
+ lmd->md_options = umd->options;
+ lmd->md_user_ptr = umd->user_ptr;
+ lmd->md_eq = NULL;
+ lmd->md_threshold = umd->threshold;
+ lmd->md_refcount = 0;
+ lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0;
+
+ if ((umd->options & LNET_MD_IOVEC) != 0) {
+
+ if ((umd->options & LNET_MD_KIOV) != 0) /* Can't specify both */
+ return -EINVAL;
+
+ lmd->md_niov = niov = umd->length;
+ memcpy(lmd->md_iov.iov, umd->start,
+ niov * sizeof (lmd->md_iov.iov[0]));
+
+ for (i = 0; i < (int)niov; i++) {
+ /* We take the base address on trust */
+ if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */
+ return -EINVAL;
+
+ total_length += lmd->md_iov.iov[i].iov_len;
+ }
+
+ lmd->md_length = total_length;
+
+ if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
+ (umd->max_size < 0 ||
+ umd->max_size > total_length)) // illegal max_size
+ return -EINVAL;
+
+ } else if ((umd->options & LNET_MD_KIOV) != 0) {
+ lmd->md_niov = niov = umd->length;
+ memcpy(lmd->md_iov.kiov, umd->start,
+ niov * sizeof (lmd->md_iov.kiov[0]));
+
+ for (i = 0; i < (int)niov; i++) {
+ /* We take the page pointer on trust */
+ if (lmd->md_iov.kiov[i].kiov_offset +
+ lmd->md_iov.kiov[i].kiov_len > PAGE_CACHE_SIZE )
+ return -EINVAL; /* invalid length */
+
+ total_length += lmd->md_iov.kiov[i].kiov_len;
+ }
+
+ lmd->md_length = total_length;
+
+ if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
+ (umd->max_size < 0 ||
+ umd->max_size > total_length)) // illegal max_size
+ return -EINVAL;
+ } else { /* contiguous */
+ lmd->md_length = umd->length;
+ lmd->md_niov = niov = 1;
+ lmd->md_iov.iov[0].iov_base = umd->start;
+ lmd->md_iov.iov[0].iov_len = umd->length;
+
+ if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
+ (umd->max_size < 0 ||
+ umd->max_size > (int)umd->length)) // illegal max_size
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* must be called with resource lock held */
+static int
+lnet_md_link(lnet_libmd_t *md, lnet_handle_eq_t eq_handle, int cpt)
+{
+ struct lnet_res_container *container = the_lnet.ln_md_containers[cpt];
+
+ /* NB we are passed an allocated, but inactive md.
+ * if we return success, caller may lnet_md_unlink() it.
+ * otherwise caller may only lnet_md_free() it.
+ */
+ /* This implementation doesn't know how to create START events or
+ * disable END events. Best to LASSERT our caller is compliant so
+ * we find out quickly... */
+ /* TODO - reevaluate what should be here in light of
+ * the removal of the start and end events
+ * maybe there we shouldn't even allow LNET_EQ_NONE!)
+ * LASSERT (eq == NULL);
+ */
+ if (!LNetHandleIsInvalid(eq_handle)) {
+ md->md_eq = lnet_handle2eq(&eq_handle);
+
+ if (md->md_eq == NULL)
+ return -ENOENT;
+
+ (*md->md_eq->eq_refs[cpt])++;
+ }
+
+ lnet_res_lh_initialize(container, &md->md_lh);
+
+ LASSERT(list_empty(&md->md_list));
+ list_add(&md->md_list, &container->rec_active);
+
+ return 0;
+}
+
+/* must be called with lnet_res_lock held */
+void
+lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd)
+{
+ /* NB this doesn't copy out all the iov entries so when a
+ * discontiguous MD is copied out, the target gets to know the
+ * original iov pointer (in start) and the number of entries it had
+ * and that's all.
+ */
+ umd->start = lmd->md_start;
+ umd->length = ((lmd->md_options & (LNET_MD_IOVEC | LNET_MD_KIOV)) == 0) ?
+ lmd->md_length : lmd->md_niov;
+ umd->threshold = lmd->md_threshold;
+ umd->max_size = lmd->md_max_size;
+ umd->options = lmd->md_options;
+ umd->user_ptr = lmd->md_user_ptr;
+ lnet_eq2handle(&umd->eq_handle, lmd->md_eq);
+}
+
+int
+lnet_md_validate(lnet_md_t *umd)
+{
+ if (umd->start == NULL && umd->length != 0) {
+ CERROR("MD start pointer can not be NULL with length %u\n",
+ umd->length);
+ return -EINVAL;
+ }
+
+ if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 &&
+ umd->length > LNET_MAX_IOV) {
+ CERROR("Invalid option: too many fragments %u, %d max\n",
+ umd->length, LNET_MAX_IOV);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * Create a memory descriptor and attach it to a ME
+ *
+ * \param meh A handle for a ME to associate the new MD with.
+ * \param umd Provides initial values for the user-visible parts of a MD.
+ * Other than its use for initialization, there is no linkage between this
+ * structure and the MD maintained by the LNet.
+ * \param unlink A flag to indicate whether the MD is automatically unlinked
+ * when it becomes inactive, either because the operation threshold drops to
+ * zero or because the available memory becomes less than \a umd.max_size.
+ * (Note that the check for unlinking a MD only occurs after the completion
+ * of a successful operation on the MD.) The value LNET_UNLINK enables auto
+ * unlinking; the value LNET_RETAIN disables it.
+ * \param handle On successful returns, a handle to the newly created MD is
+ * saved here. This handle can be used later in LNetMDUnlink().
+ *
+ * \retval 0 On success.
+ * \retval -EINVAL If \a umd is not valid.
+ * \retval -ENOMEM If new MD cannot be allocated.
+ * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a
+ * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by
+ * calling LNetInvalidateHandle() on it.
+ * \retval -EBUSY If the ME pointed to by \a meh is already associated with
+ * a MD.
+ */
+int
+LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
+ lnet_unlink_t unlink, lnet_handle_md_t *handle)
+{
+ LIST_HEAD (matches);
+ LIST_HEAD (drops);
+ struct lnet_me *me;
+ struct lnet_libmd *md;
+ int cpt;
+ int rc;
+
+ LASSERT (the_lnet.ln_init);
+ LASSERT (the_lnet.ln_refcount > 0);
+
+ if (lnet_md_validate(&umd) != 0)
+ return -EINVAL;
+
+ if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) == 0) {
+ CERROR("Invalid option: no MD_OP set\n");
+ return -EINVAL;
+ }
+
+ md = lnet_md_alloc(&umd);
+ if (md == NULL)
+ return -ENOMEM;
+
+ rc = lnet_md_build(md, &umd, unlink);
+ cpt = lnet_cpt_of_cookie(meh.cookie);
+
+ lnet_res_lock(cpt);
+ if (rc != 0)
+ goto failed;
+
+ me = lnet_handle2me(&meh);
+ if (me == NULL)
+ rc = -ENOENT;
+ else if (me->me_md != NULL)
+ rc = -EBUSY;
+ else
+ rc = lnet_md_link(md, umd.eq_handle, cpt);
+
+ if (rc != 0)
+ goto failed;
+
+ /* attach this MD to portal of ME and check if it matches any
+ * blocked msgs on this portal */
+ lnet_ptl_attach_md(me, md, &matches, &drops);
+
+ lnet_md2handle(handle, md);
+
+ lnet_res_unlock(cpt);
+
+ lnet_drop_delayed_msg_list(&drops, "Bad match");
+ lnet_recv_delayed_msg_list(&matches);
+
+ return 0;
+
+ failed:
+ lnet_md_free_locked(md);
+
+ lnet_res_unlock(cpt);
+ return rc;
+}
+EXPORT_SYMBOL(LNetMDAttach);
+
+/**
+ * Create a "free floating" memory descriptor - a MD that is not associated
+ * with a ME. Such MDs are usually used in LNetPut() and LNetGet() operations.
+ *
+ * \param umd,unlink See the discussion for LNetMDAttach().
+ * \param handle On successful returns, a handle to the newly created MD is
+ * saved here. This handle can be used later in LNetMDUnlink(), LNetPut(),
+ * and LNetGet() operations.
+ *
+ * \retval 0 On success.
+ * \retval -EINVAL If \a umd is not valid.
+ * \retval -ENOMEM If new MD cannot be allocated.
+ * \retval -ENOENT \a umd.eq_handle does not point to a valid EQ. Note that
+ * it's OK to supply a NULL \a umd.eq_handle by calling
+ * LNetInvalidateHandle() on it.
+ */
+int
+LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle)
+{
+ lnet_libmd_t *md;
+ int cpt;
+ int rc;
+
+ LASSERT (the_lnet.ln_init);
+ LASSERT (the_lnet.ln_refcount > 0);
+
+ if (lnet_md_validate(&umd) != 0)
+ return -EINVAL;
+
+ if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) != 0) {
+ CERROR("Invalid option: GET|PUT illegal on active MDs\n");
+ return -EINVAL;
+ }
+
+ md = lnet_md_alloc(&umd);
+ if (md == NULL)
+ return -ENOMEM;
+
+ rc = lnet_md_build(md, &umd, unlink);
+
+ cpt = lnet_res_lock_current();
+ if (rc != 0)
+ goto failed;
+
+ rc = lnet_md_link(md, umd.eq_handle, cpt);
+ if (rc != 0)
+ goto failed;
+
+ lnet_md2handle(handle, md);
+
+ lnet_res_unlock(cpt);
+ return 0;
+
+ failed:
+ lnet_md_free_locked(md);
+
+ lnet_res_unlock(cpt);
+ return rc;
+}
+EXPORT_SYMBOL(LNetMDBind);
+
+/**
+ * Unlink the memory descriptor from any ME it may be linked to and release
+ * the internal resources associated with it.
+ *
+ * This function does not free the memory region associated with the MD;
+ * i.e., the memory the user allocated for this MD. If the ME associated with
+ * this MD is not NULL and was created with auto unlink enabled, the ME is
+ * unlinked as well (see LNetMEAttach()).
+ *
+ * Explicitly unlinking a MD via this function call has the same behavior as
+ * a MD that has been automatically unlinked, except that no LNET_EVENT_UNLINK
+ * is generated in the latter case.
+ *
+ * An unlinked event can be reported in two ways:
+ * - If there's no pending operations on the MD, it's unlinked immediately
+ * and an LNET_EVENT_UNLINK event is logged before this function returns.
+ * - Otherwise, the MD is only marked for deletion when this function
+ * returns, and the unlinked event will be piggybacked on the event of
+ * the completion of the last operation by setting the unlinked field of
+ * the event. No dedicated LNET_EVENT_UNLINK event is generated.
+ *
+ * Note that in both cases the unlinked field of the event is always set; no
+ * more event will happen on the MD after such an event is logged.
+ *
+ * \param mdh A handle for the MD to be unlinked.
+ *
+ * \retval 0 On success.
+ * \retval -ENOENT If \a mdh does not point to a valid MD object.
+ */
+int
+LNetMDUnlink (lnet_handle_md_t mdh)
+{
+ lnet_event_t ev;
+ lnet_libmd_t *md;
+ int cpt;
+
+ LASSERT(the_lnet.ln_init);
+ LASSERT(the_lnet.ln_refcount > 0);
+
+ cpt = lnet_cpt_of_cookie(mdh.cookie);
+ lnet_res_lock(cpt);
+
+ md = lnet_handle2md(&mdh);
+ if (md == NULL) {
+ lnet_res_unlock(cpt);
+ return -ENOENT;
+ }
+
+ /* If the MD is busy, lnet_md_unlink just marks it for deletion, and
+ * when the NAL is done, the completion event flags that the MD was
+ * unlinked. Otherwise, we enqueue an event now... */
+
+ if (md->md_eq != NULL &&
+ md->md_refcount == 0) {
+ lnet_build_unlink_event(md, &ev);
+ lnet_eq_enqueue_event(md->md_eq, &ev);
+ }
+
+ lnet_md_unlink(md);
+
+ lnet_res_unlock(cpt);
+ return 0;
+}
+EXPORT_SYMBOL(LNetMDUnlink);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-me.c b/drivers/staging/lustre/lnet/lnet/lib-me.c
new file mode 100644
index 000000000000..0081075cabee
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/lib-me.c
@@ -0,0 +1,297 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/lnet/lib-me.c
+ *
+ * Match Entry management routines
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/lnet/lib-lnet.h>
+
+/**
+ * Create and attach a match entry to the match list of \a portal. The new
+ * ME is empty, i.e. not associated with a memory descriptor. LNetMDAttach()
+ * can be used to attach a MD to an empty ME.
+ *
+ * \param portal The portal table index where the ME should be attached.
+ * \param match_id Specifies the match criteria for the process ID of
+ * the requester. The constants LNET_PID_ANY and LNET_NID_ANY can be
+ * used to wildcard either of the identifiers in the lnet_process_id_t
+ * structure.
+ * \param match_bits,ignore_bits Specify the match criteria to apply
+ * to the match bits in the incoming request. The ignore bits are used
+ * to mask out insignificant bits in the incoming match bits. The resulting
+ * bits are then compared to the ME's match bits to determine if the
+ * incoming request meets the match criteria.
+ * \param unlink Indicates whether the ME should be unlinked when the memory
+ * descriptor associated with it is unlinked (Note that the check for
+ * unlinking a ME only occurs when the memory descriptor is unlinked.).
+ * Valid values are LNET_RETAIN and LNET_UNLINK.
+ * \param pos Indicates whether the new ME should be prepended or
+ * appended to the match list. Allowed constants: LNET_INS_BEFORE,
+ * LNET_INS_AFTER.
+ * \param handle On successful returns, a handle to the newly created ME
+ * object is saved here. This handle can be used later in LNetMEInsert(),
+ * LNetMEUnlink(), or LNetMDAttach() functions.
+ *
+ * \retval 0 On success.
+ * \retval -EINVAL If \a portal is invalid.
+ * \retval -ENOMEM If new ME object cannot be allocated.
+ */
+int
+LNetMEAttach(unsigned int portal,
+ lnet_process_id_t match_id,
+ __u64 match_bits, __u64 ignore_bits,
+ lnet_unlink_t unlink, lnet_ins_pos_t pos,
+ lnet_handle_me_t *handle)
+{
+ struct lnet_match_table *mtable;
+ struct lnet_me *me;
+ struct list_head *head;
+
+ LASSERT(the_lnet.ln_init);
+ LASSERT(the_lnet.ln_refcount > 0);
+
+ if ((int)portal >= the_lnet.ln_nportals)
+ return -EINVAL;
+
+ mtable = lnet_mt_of_attach(portal, match_id,
+ match_bits, ignore_bits, pos);
+ if (mtable == NULL) /* can't match portal type */
+ return -EPERM;
+
+ me = lnet_me_alloc();
+ if (me == NULL)
+ return -ENOMEM;
+
+ lnet_res_lock(mtable->mt_cpt);
+
+ me->me_portal = portal;
+ me->me_match_id = match_id;
+ me->me_match_bits = match_bits;
+ me->me_ignore_bits = ignore_bits;
+ me->me_unlink = unlink;
+ me->me_md = NULL;
+
+ lnet_res_lh_initialize(the_lnet.ln_me_containers[mtable->mt_cpt],
+ &me->me_lh);
+ if (ignore_bits != 0)
+ head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
+ else
+ head = lnet_mt_match_head(mtable, match_id, match_bits);
+
+ me->me_pos = head - &mtable->mt_mhash[0];
+ if (pos == LNET_INS_AFTER || pos == LNET_INS_LOCAL)
+ list_add_tail(&me->me_list, head);
+ else
+ list_add(&me->me_list, head);
+
+ lnet_me2handle(handle, me);
+
+ lnet_res_unlock(mtable->mt_cpt);
+ return 0;
+}
+EXPORT_SYMBOL(LNetMEAttach);
+
+/**
+ * Create and a match entry and insert it before or after the ME pointed to by
+ * \a current_meh. The new ME is empty, i.e. not associated with a memory
+ * descriptor. LNetMDAttach() can be used to attach a MD to an empty ME.
+ *
+ * This function is identical to LNetMEAttach() except for the position
+ * where the new ME is inserted.
+ *
+ * \param current_meh A handle for a ME. The new ME will be inserted
+ * immediately before or immediately after this ME.
+ * \param match_id,match_bits,ignore_bits,unlink,pos,handle See the discussion
+ * for LNetMEAttach().
+ *
+ * \retval 0 On success.
+ * \retval -ENOMEM If new ME object cannot be allocated.
+ * \retval -ENOENT If \a current_meh does not point to a valid match entry.
+ */
+int
+LNetMEInsert(lnet_handle_me_t current_meh,
+ lnet_process_id_t match_id,
+ __u64 match_bits, __u64 ignore_bits,
+ lnet_unlink_t unlink, lnet_ins_pos_t pos,
+ lnet_handle_me_t *handle)
+{
+ struct lnet_me *current_me;
+ struct lnet_me *new_me;
+ struct lnet_portal *ptl;
+ int cpt;
+
+ LASSERT(the_lnet.ln_init);
+ LASSERT(the_lnet.ln_refcount > 0);
+
+ if (pos == LNET_INS_LOCAL)
+ return -EPERM;
+
+ new_me = lnet_me_alloc();
+ if (new_me == NULL)
+ return -ENOMEM;
+
+ cpt = lnet_cpt_of_cookie(current_meh.cookie);
+
+ lnet_res_lock(cpt);
+
+ current_me = lnet_handle2me(&current_meh);
+ if (current_me == NULL) {
+ lnet_me_free_locked(new_me);
+
+ lnet_res_unlock(cpt);
+ return -ENOENT;
+ }
+
+ LASSERT(current_me->me_portal < the_lnet.ln_nportals);
+
+ ptl = the_lnet.ln_portals[current_me->me_portal];
+ if (lnet_ptl_is_unique(ptl)) {
+ /* nosense to insertion on unique portal */
+ lnet_me_free_locked(new_me);
+ lnet_res_unlock(cpt);
+ return -EPERM;
+ }
+
+ new_me->me_pos = current_me->me_pos;
+ new_me->me_portal = current_me->me_portal;
+ new_me->me_match_id = match_id;
+ new_me->me_match_bits = match_bits;
+ new_me->me_ignore_bits = ignore_bits;
+ new_me->me_unlink = unlink;
+ new_me->me_md = NULL;
+
+ lnet_res_lh_initialize(the_lnet.ln_me_containers[cpt], &new_me->me_lh);
+
+ if (pos == LNET_INS_AFTER)
+ list_add(&new_me->me_list, &current_me->me_list);
+ else
+ list_add_tail(&new_me->me_list, &current_me->me_list);
+
+ lnet_me2handle(handle, new_me);
+
+ lnet_res_unlock(cpt);
+
+ return 0;
+}
+EXPORT_SYMBOL(LNetMEInsert);
+
+/**
+ * Unlink a match entry from its match list.
+ *
+ * This operation also releases any resources associated with the ME. If a
+ * memory descriptor is attached to the ME, then it will be unlinked as well
+ * and an unlink event will be generated. It is an error to use the ME handle
+ * after calling LNetMEUnlink().
+ *
+ * \param meh A handle for the ME to be unlinked.
+ *
+ * \retval 0 On success.
+ * \retval -ENOENT If \a meh does not point to a valid ME.
+ * \see LNetMDUnlink() for the discussion on delivering unlink event.
+ */
+int
+LNetMEUnlink(lnet_handle_me_t meh)
+{
+ lnet_me_t *me;
+ lnet_libmd_t *md;
+ lnet_event_t ev;
+ int cpt;
+
+ LASSERT(the_lnet.ln_init);
+ LASSERT(the_lnet.ln_refcount > 0);
+
+ cpt = lnet_cpt_of_cookie(meh.cookie);
+ lnet_res_lock(cpt);
+
+ me = lnet_handle2me(&meh);
+ if (me == NULL) {
+ lnet_res_unlock(cpt);
+ return -ENOENT;
+ }
+
+ md = me->me_md;
+ if (md != NULL &&
+ md->md_eq != NULL &&
+ md->md_refcount == 0) {
+ lnet_build_unlink_event(md, &ev);
+ lnet_eq_enqueue_event(md->md_eq, &ev);
+ }
+
+ lnet_me_unlink(me);
+
+ lnet_res_unlock(cpt);
+ return 0;
+}
+EXPORT_SYMBOL(LNetMEUnlink);
+
+/* call with lnet_res_lock please */
+void
+lnet_me_unlink(lnet_me_t *me)
+{
+ list_del(&me->me_list);
+
+ if (me->me_md != NULL) {
+ lnet_libmd_t *md = me->me_md;
+
+ /* detach MD from portal of this ME */
+ lnet_ptl_detach_md(me, md);
+ lnet_md_unlink(md);
+ }
+
+ lnet_res_lh_invalidate(&me->me_lh);
+ lnet_me_free_locked(me);
+}
+
+#if 0
+static void
+lib_me_dump(lnet_me_t *me)
+{
+ CWARN("Match Entry %p ("LPX64")\n", me,
+ me->me_lh.lh_cookie);
+
+ CWARN("\tMatch/Ignore\t= %016lx / %016lx\n",
+ me->me_match_bits, me->me_ignore_bits);
+
+ CWARN("\tMD\t= %p\n", me->md);
+ CWARN("\tprev\t= %p\n",
+ list_entry(me->me_list.prev, lnet_me_t, me_list));
+ CWARN("\tnext\t= %p\n",
+ list_entry(me->me_list.next, lnet_me_t, me_list));
+}
+#endif
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
new file mode 100644
index 000000000000..49b0f1287a69
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -0,0 +1,2441 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/lnet/lib-move.c
+ *
+ * Data movement routines
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/lnet/lib-lnet.h>
+
+static int local_nid_dist_zero = 1;
+CFS_MODULE_PARM(local_nid_dist_zero, "i", int, 0444,
+ "Reserved");
+
+int
+lnet_fail_nid (lnet_nid_t nid, unsigned int threshold)
+{
+ lnet_test_peer_t *tp;
+ struct list_head *el;
+ struct list_head *next;
+ struct list_head cull;
+
+ LASSERT (the_lnet.ln_init);
+
+ /* NB: use lnet_net_lock(0) to serialize operations on test peers */
+ if (threshold != 0) {
+ /* Adding a new entry */
+ LIBCFS_ALLOC(tp, sizeof(*tp));
+ if (tp == NULL)
+ return -ENOMEM;
+
+ tp->tp_nid = nid;
+ tp->tp_threshold = threshold;
+
+ lnet_net_lock(0);
+ list_add_tail(&tp->tp_list, &the_lnet.ln_test_peers);
+ lnet_net_unlock(0);
+ return 0;
+ }
+
+ /* removing entries */
+ INIT_LIST_HEAD(&cull);
+
+ lnet_net_lock(0);
+
+ list_for_each_safe (el, next, &the_lnet.ln_test_peers) {
+ tp = list_entry (el, lnet_test_peer_t, tp_list);
+
+ if (tp->tp_threshold == 0 || /* needs culling anyway */
+ nid == LNET_NID_ANY || /* removing all entries */
+ tp->tp_nid == nid) /* matched this one */
+ {
+ list_del (&tp->tp_list);
+ list_add (&tp->tp_list, &cull);
+ }
+ }
+
+ lnet_net_unlock(0);
+
+ while (!list_empty (&cull)) {
+ tp = list_entry (cull.next, lnet_test_peer_t, tp_list);
+
+ list_del (&tp->tp_list);
+ LIBCFS_FREE(tp, sizeof (*tp));
+ }
+ return 0;
+}
+
+static int
+fail_peer (lnet_nid_t nid, int outgoing)
+{
+ lnet_test_peer_t *tp;
+ struct list_head *el;
+ struct list_head *next;
+ struct list_head cull;
+ int fail = 0;
+
+ INIT_LIST_HEAD (&cull);
+
+ /* NB: use lnet_net_lock(0) to serialize operations on test peers */
+ lnet_net_lock(0);
+
+ list_for_each_safe (el, next, &the_lnet.ln_test_peers) {
+ tp = list_entry (el, lnet_test_peer_t, tp_list);
+
+ if (tp->tp_threshold == 0) {
+ /* zombie entry */
+ if (outgoing) {
+ /* only cull zombies on outgoing tests,
+ * since we may be at interrupt priority on
+ * incoming messages. */
+ list_del (&tp->tp_list);
+ list_add (&tp->tp_list, &cull);
+ }
+ continue;
+ }
+
+ if (tp->tp_nid == LNET_NID_ANY || /* fail every peer */
+ nid == tp->tp_nid) { /* fail this peer */
+ fail = 1;
+
+ if (tp->tp_threshold != LNET_MD_THRESH_INF) {
+ tp->tp_threshold--;
+ if (outgoing &&
+ tp->tp_threshold == 0) {
+ /* see above */
+ list_del (&tp->tp_list);
+ list_add (&tp->tp_list, &cull);
+ }
+ }
+ break;
+ }
+ }
+
+ lnet_net_unlock(0);
+
+ while (!list_empty (&cull)) {
+ tp = list_entry (cull.next, lnet_test_peer_t, tp_list);
+ list_del (&tp->tp_list);
+
+ LIBCFS_FREE(tp, sizeof (*tp));
+ }
+
+ return (fail);
+}
+
+unsigned int
+lnet_iov_nob (unsigned int niov, struct iovec *iov)
+{
+ unsigned int nob = 0;
+
+ while (niov-- > 0)
+ nob += (iov++)->iov_len;
+
+ return (nob);
+}
+EXPORT_SYMBOL(lnet_iov_nob);
+
+void
+lnet_copy_iov2iov (unsigned int ndiov, struct iovec *diov, unsigned int doffset,
+ unsigned int nsiov, struct iovec *siov, unsigned int soffset,
+ unsigned int nob)
+{
+ /* NB diov, siov are READ-ONLY */
+ unsigned int this_nob;
+
+ if (nob == 0)
+ return;
+
+ /* skip complete frags before 'doffset' */
+ LASSERT (ndiov > 0);
+ while (doffset >= diov->iov_len) {
+ doffset -= diov->iov_len;
+ diov++;
+ ndiov--;
+ LASSERT (ndiov > 0);
+ }
+
+ /* skip complete frags before 'soffset' */
+ LASSERT (nsiov > 0);
+ while (soffset >= siov->iov_len) {
+ soffset -= siov->iov_len;
+ siov++;
+ nsiov--;
+ LASSERT (nsiov > 0);
+ }
+
+ do {
+ LASSERT (ndiov > 0);
+ LASSERT (nsiov > 0);
+ this_nob = MIN(diov->iov_len - doffset,
+ siov->iov_len - soffset);
+ this_nob = MIN(this_nob, nob);
+
+ memcpy ((char *)diov->iov_base + doffset,
+ (char *)siov->iov_base + soffset, this_nob);
+ nob -= this_nob;
+
+ if (diov->iov_len > doffset + this_nob) {
+ doffset += this_nob;
+ } else {
+ diov++;
+ ndiov--;
+ doffset = 0;
+ }
+
+ if (siov->iov_len > soffset + this_nob) {
+ soffset += this_nob;
+ } else {
+ siov++;
+ nsiov--;
+ soffset = 0;
+ }
+ } while (nob > 0);
+}
+EXPORT_SYMBOL(lnet_copy_iov2iov);
+
+int
+lnet_extract_iov (int dst_niov, struct iovec *dst,
+ int src_niov, struct iovec *src,
+ unsigned int offset, unsigned int len)
+{
+ /* Initialise 'dst' to the subset of 'src' starting at 'offset',
+ * for exactly 'len' bytes, and return the number of entries.
+ * NB not destructive to 'src' */
+ unsigned int frag_len;
+ unsigned int niov;
+
+ if (len == 0) /* no data => */
+ return (0); /* no frags */
+
+ LASSERT (src_niov > 0);
+ while (offset >= src->iov_len) { /* skip initial frags */
+ offset -= src->iov_len;
+ src_niov--;
+ src++;
+ LASSERT (src_niov > 0);
+ }
+
+ niov = 1;
+ for (;;) {
+ LASSERT (src_niov > 0);
+ LASSERT ((int)niov <= dst_niov);
+
+ frag_len = src->iov_len - offset;
+ dst->iov_base = ((char *)src->iov_base) + offset;
+
+ if (len <= frag_len) {
+ dst->iov_len = len;
+ return (niov);
+ }
+
+ dst->iov_len = frag_len;
+
+ len -= frag_len;
+ dst++;
+ src++;
+ niov++;
+ src_niov--;
+ offset = 0;
+ }
+}
+EXPORT_SYMBOL(lnet_extract_iov);
+
+
+unsigned int
+lnet_kiov_nob (unsigned int niov, lnet_kiov_t *kiov)
+{
+ unsigned int nob = 0;
+
+ while (niov-- > 0)
+ nob += (kiov++)->kiov_len;
+
+ return (nob);
+}
+EXPORT_SYMBOL(lnet_kiov_nob);
+
+void
+lnet_copy_kiov2kiov (unsigned int ndiov, lnet_kiov_t *diov, unsigned int doffset,
+ unsigned int nsiov, lnet_kiov_t *siov, unsigned int soffset,
+ unsigned int nob)
+{
+ /* NB diov, siov are READ-ONLY */
+ unsigned int this_nob;
+ char *daddr = NULL;
+ char *saddr = NULL;
+
+ if (nob == 0)
+ return;
+
+ LASSERT (!in_interrupt ());
+
+ LASSERT (ndiov > 0);
+ while (doffset >= diov->kiov_len) {
+ doffset -= diov->kiov_len;
+ diov++;
+ ndiov--;
+ LASSERT (ndiov > 0);
+ }
+
+ LASSERT (nsiov > 0);
+ while (soffset >= siov->kiov_len) {
+ soffset -= siov->kiov_len;
+ siov++;
+ nsiov--;
+ LASSERT (nsiov > 0);
+ }
+
+ do {
+ LASSERT (ndiov > 0);
+ LASSERT (nsiov > 0);
+ this_nob = MIN(diov->kiov_len - doffset,
+ siov->kiov_len - soffset);
+ this_nob = MIN(this_nob, nob);
+
+ if (daddr == NULL)
+ daddr = ((char *)kmap(diov->kiov_page)) +
+ diov->kiov_offset + doffset;
+ if (saddr == NULL)
+ saddr = ((char *)kmap(siov->kiov_page)) +
+ siov->kiov_offset + soffset;
+
+ /* Vanishing risk of kmap deadlock when mapping 2 pages.
+ * However in practice at least one of the kiovs will be mapped
+ * kernel pages and the map/unmap will be NOOPs */
+
+ memcpy (daddr, saddr, this_nob);
+ nob -= this_nob;
+
+ if (diov->kiov_len > doffset + this_nob) {
+ daddr += this_nob;
+ doffset += this_nob;
+ } else {
+ kunmap(diov->kiov_page);
+ daddr = NULL;
+ diov++;
+ ndiov--;
+ doffset = 0;
+ }
+
+ if (siov->kiov_len > soffset + this_nob) {
+ saddr += this_nob;
+ soffset += this_nob;
+ } else {
+ kunmap(siov->kiov_page);
+ saddr = NULL;
+ siov++;
+ nsiov--;
+ soffset = 0;
+ }
+ } while (nob > 0);
+
+ if (daddr != NULL)
+ kunmap(diov->kiov_page);
+ if (saddr != NULL)
+ kunmap(siov->kiov_page);
+}
+EXPORT_SYMBOL(lnet_copy_kiov2kiov);
+
+void
+lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov, unsigned int iovoffset,
+ unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset,
+ unsigned int nob)
+{
+ /* NB iov, kiov are READ-ONLY */
+ unsigned int this_nob;
+ char *addr = NULL;
+
+ if (nob == 0)
+ return;
+
+ LASSERT (!in_interrupt ());
+
+ LASSERT (niov > 0);
+ while (iovoffset >= iov->iov_len) {
+ iovoffset -= iov->iov_len;
+ iov++;
+ niov--;
+ LASSERT (niov > 0);
+ }
+
+ LASSERT (nkiov > 0);
+ while (kiovoffset >= kiov->kiov_len) {
+ kiovoffset -= kiov->kiov_len;
+ kiov++;
+ nkiov--;
+ LASSERT (nkiov > 0);
+ }
+
+ do {
+ LASSERT (niov > 0);
+ LASSERT (nkiov > 0);
+ this_nob = MIN(iov->iov_len - iovoffset,
+ kiov->kiov_len - kiovoffset);
+ this_nob = MIN(this_nob, nob);
+
+ if (addr == NULL)
+ addr = ((char *)kmap(kiov->kiov_page)) +
+ kiov->kiov_offset + kiovoffset;
+
+ memcpy ((char *)iov->iov_base + iovoffset, addr, this_nob);
+ nob -= this_nob;
+
+ if (iov->iov_len > iovoffset + this_nob) {
+ iovoffset += this_nob;
+ } else {
+ iov++;
+ niov--;
+ iovoffset = 0;
+ }
+
+ if (kiov->kiov_len > kiovoffset + this_nob) {
+ addr += this_nob;
+ kiovoffset += this_nob;
+ } else {
+ kunmap(kiov->kiov_page);
+ addr = NULL;
+ kiov++;
+ nkiov--;
+ kiovoffset = 0;
+ }
+
+ } while (nob > 0);
+
+ if (addr != NULL)
+ kunmap(kiov->kiov_page);
+}
+EXPORT_SYMBOL(lnet_copy_kiov2iov);
+
+void
+lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset,
+ unsigned int niov, struct iovec *iov, unsigned int iovoffset,
+ unsigned int nob)
+{
+ /* NB kiov, iov are READ-ONLY */
+ unsigned int this_nob;
+ char *addr = NULL;
+
+ if (nob == 0)
+ return;
+
+ LASSERT (!in_interrupt ());
+
+ LASSERT (nkiov > 0);
+ while (kiovoffset >= kiov->kiov_len) {
+ kiovoffset -= kiov->kiov_len;
+ kiov++;
+ nkiov--;
+ LASSERT (nkiov > 0);
+ }
+
+ LASSERT (niov > 0);
+ while (iovoffset >= iov->iov_len) {
+ iovoffset -= iov->iov_len;
+ iov++;
+ niov--;
+ LASSERT (niov > 0);
+ }
+
+ do {
+ LASSERT (nkiov > 0);
+ LASSERT (niov > 0);
+ this_nob = MIN(kiov->kiov_len - kiovoffset,
+ iov->iov_len - iovoffset);
+ this_nob = MIN(this_nob, nob);
+
+ if (addr == NULL)
+ addr = ((char *)kmap(kiov->kiov_page)) +
+ kiov->kiov_offset + kiovoffset;
+
+ memcpy (addr, (char *)iov->iov_base + iovoffset, this_nob);
+ nob -= this_nob;
+
+ if (kiov->kiov_len > kiovoffset + this_nob) {
+ addr += this_nob;
+ kiovoffset += this_nob;
+ } else {
+ kunmap(kiov->kiov_page);
+ addr = NULL;
+ kiov++;
+ nkiov--;
+ kiovoffset = 0;
+ }
+
+ if (iov->iov_len > iovoffset + this_nob) {
+ iovoffset += this_nob;
+ } else {
+ iov++;
+ niov--;
+ iovoffset = 0;
+ }
+ } while (nob > 0);
+
+ if (addr != NULL)
+ kunmap(kiov->kiov_page);
+}
+EXPORT_SYMBOL(lnet_copy_iov2kiov);
+
+int
+lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst,
+ int src_niov, lnet_kiov_t *src,
+ unsigned int offset, unsigned int len)
+{
+ /* Initialise 'dst' to the subset of 'src' starting at 'offset',
+ * for exactly 'len' bytes, and return the number of entries.
+ * NB not destructive to 'src' */
+ unsigned int frag_len;
+ unsigned int niov;
+
+ if (len == 0) /* no data => */
+ return (0); /* no frags */
+
+ LASSERT (src_niov > 0);
+ while (offset >= src->kiov_len) { /* skip initial frags */
+ offset -= src->kiov_len;
+ src_niov--;
+ src++;
+ LASSERT (src_niov > 0);
+ }
+
+ niov = 1;
+ for (;;) {
+ LASSERT (src_niov > 0);
+ LASSERT ((int)niov <= dst_niov);
+
+ frag_len = src->kiov_len - offset;
+ dst->kiov_page = src->kiov_page;
+ dst->kiov_offset = src->kiov_offset + offset;
+
+ if (len <= frag_len) {
+ dst->kiov_len = len;
+ LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_CACHE_SIZE);
+ return (niov);
+ }
+
+ dst->kiov_len = frag_len;
+ LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_CACHE_SIZE);
+
+ len -= frag_len;
+ dst++;
+ src++;
+ niov++;
+ src_niov--;
+ offset = 0;
+ }
+}
+EXPORT_SYMBOL(lnet_extract_kiov);
+
+void
+lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
+ unsigned int offset, unsigned int mlen, unsigned int rlen)
+{
+ unsigned int niov = 0;
+ struct iovec *iov = NULL;
+ lnet_kiov_t *kiov = NULL;
+ int rc;
+
+ LASSERT (!in_interrupt ());
+ LASSERT (mlen == 0 || msg != NULL);
+
+ if (msg != NULL) {
+ LASSERT(msg->msg_receiving);
+ LASSERT(!msg->msg_sending);
+ LASSERT(rlen == msg->msg_len);
+ LASSERT(mlen <= msg->msg_len);
+ LASSERT(msg->msg_offset == offset);
+ LASSERT(msg->msg_wanted == mlen);
+
+ msg->msg_receiving = 0;
+
+ if (mlen != 0) {
+ niov = msg->msg_niov;
+ iov = msg->msg_iov;
+ kiov = msg->msg_kiov;
+
+ LASSERT (niov > 0);
+ LASSERT ((iov == NULL) != (kiov == NULL));
+ }
+ }
+
+ rc = (ni->ni_lnd->lnd_recv)(ni, private, msg, delayed,
+ niov, iov, kiov, offset, mlen, rlen);
+ if (rc < 0)
+ lnet_finalize(ni, msg, rc);
+}
+
+void
+lnet_setpayloadbuffer(lnet_msg_t *msg)
+{
+ lnet_libmd_t *md = msg->msg_md;
+
+ LASSERT (msg->msg_len > 0);
+ LASSERT (!msg->msg_routing);
+ LASSERT (md != NULL);
+ LASSERT (msg->msg_niov == 0);
+ LASSERT (msg->msg_iov == NULL);
+ LASSERT (msg->msg_kiov == NULL);
+
+ msg->msg_niov = md->md_niov;
+ if ((md->md_options & LNET_MD_KIOV) != 0)
+ msg->msg_kiov = md->md_iov.kiov;
+ else
+ msg->msg_iov = md->md_iov.iov;
+}
+
+void
+lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target,
+ unsigned int offset, unsigned int len)
+{
+ msg->msg_type = type;
+ msg->msg_target = target;
+ msg->msg_len = len;
+ msg->msg_offset = offset;
+
+ if (len != 0)
+ lnet_setpayloadbuffer(msg);
+
+ memset (&msg->msg_hdr, 0, sizeof (msg->msg_hdr));
+ msg->msg_hdr.type = cpu_to_le32(type);
+ msg->msg_hdr.dest_nid = cpu_to_le64(target.nid);
+ msg->msg_hdr.dest_pid = cpu_to_le32(target.pid);
+ /* src_nid will be set later */
+ msg->msg_hdr.src_pid = cpu_to_le32(the_lnet.ln_pid);
+ msg->msg_hdr.payload_length = cpu_to_le32(len);
+}
+
+void
+lnet_ni_send(lnet_ni_t *ni, lnet_msg_t *msg)
+{
+ void *priv = msg->msg_private;
+ int rc;
+
+ LASSERT (!in_interrupt ());
+ LASSERT (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND ||
+ (msg->msg_txcredit && msg->msg_peertxcredit));
+
+ rc = (ni->ni_lnd->lnd_send)(ni, priv, msg);
+ if (rc < 0)
+ lnet_finalize(ni, msg, rc);
+}
+
+int
+lnet_ni_eager_recv(lnet_ni_t *ni, lnet_msg_t *msg)
+{
+ int rc;
+
+ LASSERT(!msg->msg_sending);
+ LASSERT(msg->msg_receiving);
+ LASSERT(!msg->msg_rx_ready_delay);
+ LASSERT(ni->ni_lnd->lnd_eager_recv != NULL);
+
+ msg->msg_rx_ready_delay = 1;
+ rc = (ni->ni_lnd->lnd_eager_recv)(ni, msg->msg_private, msg,
+ &msg->msg_private);
+ if (rc != 0) {
+ CERROR("recv from %s / send to %s aborted: "
+ "eager_recv failed %d\n",
+ libcfs_nid2str(msg->msg_rxpeer->lp_nid),
+ libcfs_id2str(msg->msg_target), rc);
+ LASSERT(rc < 0); /* required by my callers */
+ }
+
+ return rc;
+}
+
+/* NB: caller shall hold a ref on 'lp' as I'd drop lnet_net_lock */
+void
+lnet_ni_query_locked(lnet_ni_t *ni, lnet_peer_t *lp)
+{
+ cfs_time_t last_alive = 0;
+
+ LASSERT(lnet_peer_aliveness_enabled(lp));
+ LASSERT(ni->ni_lnd->lnd_query != NULL);
+
+ lnet_net_unlock(lp->lp_cpt);
+ (ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive);
+ lnet_net_lock(lp->lp_cpt);
+
+ lp->lp_last_query = cfs_time_current();
+
+ if (last_alive != 0) /* NI has updated timestamp */
+ lp->lp_last_alive = last_alive;
+}
+
+/* NB: always called with lnet_net_lock held */
+static inline int
+lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now)
+{
+ int alive;
+ cfs_time_t deadline;
+
+ LASSERT (lnet_peer_aliveness_enabled(lp));
+
+ /* Trust lnet_notify() if it has more recent aliveness news, but
+ * ignore the initial assumed death (see lnet_peers_start_down()).
+ */
+ if (!lp->lp_alive && lp->lp_alive_count > 0 &&
+ cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive))
+ return 0;
+
+ deadline = cfs_time_add(lp->lp_last_alive,
+ cfs_time_seconds(lp->lp_ni->ni_peertimeout));
+ alive = cfs_time_after(deadline, now);
+
+ /* Update obsolete lp_alive except for routers assumed to be dead
+ * initially, because router checker would update aliveness in this
+ * case, and moreover lp_last_alive at peer creation is assumed.
+ */
+ if (alive && !lp->lp_alive &&
+ !(lnet_isrouter(lp) && lp->lp_alive_count == 0))
+ lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
+
+ return alive;
+}
+
+
+/* NB: returns 1 when alive, 0 when dead, negative when error;
+ * may drop the lnet_net_lock */
+int
+lnet_peer_alive_locked (lnet_peer_t *lp)
+{
+ cfs_time_t now = cfs_time_current();
+
+ if (!lnet_peer_aliveness_enabled(lp))
+ return -ENODEV;
+
+ if (lnet_peer_is_alive(lp, now))
+ return 1;
+
+ /* Peer appears dead, but we should avoid frequent NI queries (at
+ * most once per lnet_queryinterval seconds). */
+ if (lp->lp_last_query != 0) {
+ static const int lnet_queryinterval = 1;
+
+ cfs_time_t next_query =
+ cfs_time_add(lp->lp_last_query,
+ cfs_time_seconds(lnet_queryinterval));
+
+ if (cfs_time_before(now, next_query)) {
+ if (lp->lp_alive)
+ CWARN("Unexpected aliveness of peer %s: "
+ "%d < %d (%d/%d)\n",
+ libcfs_nid2str(lp->lp_nid),
+ (int)now, (int)next_query,
+ lnet_queryinterval,
+ lp->lp_ni->ni_peertimeout);
+ return 0;
+ }
+ }
+
+ /* query NI for latest aliveness news */
+ lnet_ni_query_locked(lp->lp_ni, lp);
+
+ if (lnet_peer_is_alive(lp, now))
+ return 1;
+
+ lnet_notify_locked(lp, 0, 0, lp->lp_last_alive);
+ return 0;
+}
+
+int
+lnet_post_send_locked(lnet_msg_t *msg, int do_send)
+{
+ /* lnet_send is going to lnet_net_unlock immediately after this,
+ * so it sets do_send FALSE and I don't do the unlock/send/lock bit.
+ * I return EAGAIN if msg blocked, EHOSTUNREACH if msg_txpeer
+ * appears dead, and 0 if sent or OK to send */
+ struct lnet_peer *lp = msg->msg_txpeer;
+ struct lnet_ni *ni = lp->lp_ni;
+ struct lnet_tx_queue *tq;
+ int cpt;
+
+ /* non-lnet_send() callers have checked before */
+ LASSERT(!do_send || msg->msg_tx_delayed);
+ LASSERT(!msg->msg_receiving);
+ LASSERT(msg->msg_tx_committed);
+
+ cpt = msg->msg_tx_cpt;
+ tq = ni->ni_tx_queues[cpt];
+
+ /* NB 'lp' is always the next hop */
+ if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
+ lnet_peer_alive_locked(lp) == 0) {
+ the_lnet.ln_counters[cpt]->drop_count++;
+ the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
+ lnet_net_unlock(cpt);
+
+ CNETERR("Dropping message for %s: peer not alive\n",
+ libcfs_id2str(msg->msg_target));
+ if (do_send)
+ lnet_finalize(ni, msg, -EHOSTUNREACH);
+
+ lnet_net_lock(cpt);
+ return EHOSTUNREACH;
+ }
+
+ if (!msg->msg_peertxcredit) {
+ LASSERT ((lp->lp_txcredits < 0) ==
+ !list_empty(&lp->lp_txq));
+
+ msg->msg_peertxcredit = 1;
+ lp->lp_txqnob += msg->msg_len + sizeof(lnet_hdr_t);
+ lp->lp_txcredits--;
+
+ if (lp->lp_txcredits < lp->lp_mintxcredits)
+ lp->lp_mintxcredits = lp->lp_txcredits;
+
+ if (lp->lp_txcredits < 0) {
+ msg->msg_tx_delayed = 1;
+ list_add_tail(&msg->msg_list, &lp->lp_txq);
+ return EAGAIN;
+ }
+ }
+
+ if (!msg->msg_txcredit) {
+ LASSERT((tq->tq_credits < 0) ==
+ !list_empty(&tq->tq_delayed));
+
+ msg->msg_txcredit = 1;
+ tq->tq_credits--;
+
+ if (tq->tq_credits < tq->tq_credits_min)
+ tq->tq_credits_min = tq->tq_credits;
+
+ if (tq->tq_credits < 0) {
+ msg->msg_tx_delayed = 1;
+ list_add_tail(&msg->msg_list, &tq->tq_delayed);
+ return EAGAIN;
+ }
+ }
+
+ if (do_send) {
+ lnet_net_unlock(cpt);
+ lnet_ni_send(ni, msg);
+ lnet_net_lock(cpt);
+ }
+ return 0;
+}
+
+
+lnet_rtrbufpool_t *
+lnet_msg2bufpool(lnet_msg_t *msg)
+{
+ lnet_rtrbufpool_t *rbp;
+ int cpt;
+
+ LASSERT(msg->msg_rx_committed);
+
+ cpt = msg->msg_rx_cpt;
+ rbp = &the_lnet.ln_rtrpools[cpt][0];
+
+ LASSERT(msg->msg_len <= LNET_MTU);
+ while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_CACHE_SIZE) {
+ rbp++;
+ LASSERT(rbp < &the_lnet.ln_rtrpools[cpt][LNET_NRBPOOLS]);
+ }
+
+ return rbp;
+}
+
+int
+lnet_post_routed_recv_locked (lnet_msg_t *msg, int do_recv)
+{
+ /* lnet_parse is going to lnet_net_unlock immediately after this, so it
+ * sets do_recv FALSE and I don't do the unlock/send/lock bit. I
+ * return EAGAIN if msg blocked and 0 if received or OK to receive */
+ lnet_peer_t *lp = msg->msg_rxpeer;
+ lnet_rtrbufpool_t *rbp;
+ lnet_rtrbuf_t *rb;
+
+ LASSERT (msg->msg_iov == NULL);
+ LASSERT (msg->msg_kiov == NULL);
+ LASSERT (msg->msg_niov == 0);
+ LASSERT (msg->msg_routing);
+ LASSERT (msg->msg_receiving);
+ LASSERT (!msg->msg_sending);
+
+ /* non-lnet_parse callers only receive delayed messages */
+ LASSERT(!do_recv || msg->msg_rx_delayed);
+
+ if (!msg->msg_peerrtrcredit) {
+ LASSERT ((lp->lp_rtrcredits < 0) ==
+ !list_empty(&lp->lp_rtrq));
+
+ msg->msg_peerrtrcredit = 1;
+ lp->lp_rtrcredits--;
+ if (lp->lp_rtrcredits < lp->lp_minrtrcredits)
+ lp->lp_minrtrcredits = lp->lp_rtrcredits;
+
+ if (lp->lp_rtrcredits < 0) {
+ /* must have checked eager_recv before here */
+ LASSERT(msg->msg_rx_ready_delay);
+ msg->msg_rx_delayed = 1;
+ list_add_tail(&msg->msg_list, &lp->lp_rtrq);
+ return EAGAIN;
+ }
+ }
+
+ rbp = lnet_msg2bufpool(msg);
+
+ if (!msg->msg_rtrcredit) {
+ LASSERT ((rbp->rbp_credits < 0) ==
+ !list_empty(&rbp->rbp_msgs));
+
+ msg->msg_rtrcredit = 1;
+ rbp->rbp_credits--;
+ if (rbp->rbp_credits < rbp->rbp_mincredits)
+ rbp->rbp_mincredits = rbp->rbp_credits;
+
+ if (rbp->rbp_credits < 0) {
+ /* must have checked eager_recv before here */
+ LASSERT(msg->msg_rx_ready_delay);
+ msg->msg_rx_delayed = 1;
+ list_add_tail(&msg->msg_list, &rbp->rbp_msgs);
+ return EAGAIN;
+ }
+ }
+
+ LASSERT (!list_empty(&rbp->rbp_bufs));
+ rb = list_entry(rbp->rbp_bufs.next, lnet_rtrbuf_t, rb_list);
+ list_del(&rb->rb_list);
+
+ msg->msg_niov = rbp->rbp_npages;
+ msg->msg_kiov = &rb->rb_kiov[0];
+
+ if (do_recv) {
+ int cpt = msg->msg_rx_cpt;
+
+ lnet_net_unlock(cpt);
+ lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1,
+ 0, msg->msg_len, msg->msg_len);
+ lnet_net_lock(cpt);
+ }
+ return 0;
+}
+
+void
+lnet_return_tx_credits_locked(lnet_msg_t *msg)
+{
+ lnet_peer_t *txpeer = msg->msg_txpeer;
+ lnet_msg_t *msg2;
+
+ if (msg->msg_txcredit) {
+ struct lnet_ni *ni = txpeer->lp_ni;
+ struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt];
+
+ /* give back NI txcredits */
+ msg->msg_txcredit = 0;
+
+ LASSERT((tq->tq_credits < 0) ==
+ !list_empty(&tq->tq_delayed));
+
+ tq->tq_credits++;
+ if (tq->tq_credits <= 0) {
+ msg2 = list_entry(tq->tq_delayed.next,
+ lnet_msg_t, msg_list);
+ list_del(&msg2->msg_list);
+
+ LASSERT(msg2->msg_txpeer->lp_ni == ni);
+ LASSERT(msg2->msg_tx_delayed);
+
+ (void) lnet_post_send_locked(msg2, 1);
+ }
+ }
+
+ if (msg->msg_peertxcredit) {
+ /* give back peer txcredits */
+ msg->msg_peertxcredit = 0;
+
+ LASSERT((txpeer->lp_txcredits < 0) ==
+ !list_empty(&txpeer->lp_txq));
+
+ txpeer->lp_txqnob -= msg->msg_len + sizeof(lnet_hdr_t);
+ LASSERT (txpeer->lp_txqnob >= 0);
+
+ txpeer->lp_txcredits++;
+ if (txpeer->lp_txcredits <= 0) {
+ msg2 = list_entry(txpeer->lp_txq.next,
+ lnet_msg_t, msg_list);
+ list_del(&msg2->msg_list);
+
+ LASSERT(msg2->msg_txpeer == txpeer);
+ LASSERT(msg2->msg_tx_delayed);
+
+ (void) lnet_post_send_locked(msg2, 1);
+ }
+ }
+
+ if (txpeer != NULL) {
+ msg->msg_txpeer = NULL;
+ lnet_peer_decref_locked(txpeer);
+ }
+}
+
+void
+lnet_return_rx_credits_locked(lnet_msg_t *msg)
+{
+ lnet_peer_t *rxpeer = msg->msg_rxpeer;
+ lnet_msg_t *msg2;
+
+ if (msg->msg_rtrcredit) {
+ /* give back global router credits */
+ lnet_rtrbuf_t *rb;
+ lnet_rtrbufpool_t *rbp;
+
+ /* NB If a msg ever blocks for a buffer in rbp_msgs, it stays
+ * there until it gets one allocated, or aborts the wait
+ * itself */
+ LASSERT (msg->msg_kiov != NULL);
+
+ rb = list_entry(msg->msg_kiov, lnet_rtrbuf_t, rb_kiov[0]);
+ rbp = rb->rb_pool;
+ LASSERT (rbp == lnet_msg2bufpool(msg));
+
+ msg->msg_kiov = NULL;
+ msg->msg_rtrcredit = 0;
+
+ LASSERT((rbp->rbp_credits < 0) ==
+ !list_empty(&rbp->rbp_msgs));
+ LASSERT((rbp->rbp_credits > 0) ==
+ !list_empty(&rbp->rbp_bufs));
+
+ list_add(&rb->rb_list, &rbp->rbp_bufs);
+ rbp->rbp_credits++;
+ if (rbp->rbp_credits <= 0) {
+ msg2 = list_entry(rbp->rbp_msgs.next,
+ lnet_msg_t, msg_list);
+ list_del(&msg2->msg_list);
+
+ (void) lnet_post_routed_recv_locked(msg2, 1);
+ }
+ }
+
+ if (msg->msg_peerrtrcredit) {
+ /* give back peer router credits */
+ msg->msg_peerrtrcredit = 0;
+
+ LASSERT((rxpeer->lp_rtrcredits < 0) ==
+ !list_empty(&rxpeer->lp_rtrq));
+
+ rxpeer->lp_rtrcredits++;
+ if (rxpeer->lp_rtrcredits <= 0) {
+ msg2 = list_entry(rxpeer->lp_rtrq.next,
+ lnet_msg_t, msg_list);
+ list_del(&msg2->msg_list);
+
+ (void) lnet_post_routed_recv_locked(msg2, 1);
+ }
+ }
+ if (rxpeer != NULL) {
+ msg->msg_rxpeer = NULL;
+ lnet_peer_decref_locked(rxpeer);
+ }
+}
+
+static int
+lnet_compare_routes(lnet_route_t *r1, lnet_route_t *r2)
+{
+ lnet_peer_t *p1 = r1->lr_gateway;
+ lnet_peer_t *p2 = r2->lr_gateway;
+
+ if (r1->lr_hops < r2->lr_hops)
+ return 1;
+
+ if (r1->lr_hops > r2->lr_hops)
+ return -1;
+
+ if (p1->lp_txqnob < p2->lp_txqnob)
+ return 1;
+
+ if (p1->lp_txqnob > p2->lp_txqnob)
+ return -1;
+
+ if (p1->lp_txcredits > p2->lp_txcredits)
+ return 1;
+
+ if (p1->lp_txcredits < p2->lp_txcredits)
+ return -1;
+
+ if (r1->lr_seq - r2->lr_seq <= 0)
+ return 1;
+
+ return -1;
+}
+
+static lnet_peer_t *
+lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid)
+{
+ lnet_remotenet_t *rnet;
+ lnet_route_t *rtr;
+ lnet_route_t *rtr_best;
+ lnet_route_t *rtr_last;
+ struct lnet_peer *lp_best;
+ struct lnet_peer *lp;
+ int rc;
+
+ /* If @rtr_nid is not LNET_NID_ANY, return the gateway with
+ * rtr_nid nid, otherwise find the best gateway I can use */
+
+ rnet = lnet_find_net_locked(LNET_NIDNET(target));
+ if (rnet == NULL)
+ return NULL;
+
+ lp_best = NULL;
+ rtr_best = rtr_last = NULL;
+ list_for_each_entry(rtr, &rnet->lrn_routes, lr_list) {
+ lp = rtr->lr_gateway;
+
+ if (!lp->lp_alive || /* gateway is down */
+ ((lp->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0 &&
+ rtr->lr_downis != 0)) /* NI to target is down */
+ continue;
+
+ if (ni != NULL && lp->lp_ni != ni)
+ continue;
+
+ if (lp->lp_nid == rtr_nid) /* it's pre-determined router */
+ return lp;
+
+ if (lp_best == NULL) {
+ rtr_best = rtr_last = rtr;
+ lp_best = lp;
+ continue;
+ }
+
+ /* no protection on below fields, but it's harmless */
+ if (rtr_last->lr_seq - rtr->lr_seq < 0)
+ rtr_last = rtr;
+
+ rc = lnet_compare_routes(rtr, rtr_best);
+ if (rc < 0)
+ continue;
+
+ rtr_best = rtr;
+ lp_best = lp;
+ }
+
+ /* set sequence number on the best router to the latest sequence + 1
+ * so we can round-robin all routers, it's race and inaccurate but
+ * harmless and functional */
+ if (rtr_best != NULL)
+ rtr_best->lr_seq = rtr_last->lr_seq + 1;
+ return lp_best;
+}
+
+int
+lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
+{
+ lnet_nid_t dst_nid = msg->msg_target.nid;
+ struct lnet_ni *src_ni;
+ struct lnet_ni *local_ni;
+ struct lnet_peer *lp;
+ int cpt;
+ int cpt2;
+ int rc;
+
+ /* NB: rtr_nid is set to LNET_NID_ANY for all current use-cases,
+ * but we might want to use pre-determined router for ACK/REPLY
+ * in the future */
+ /* NB: ni != NULL == interface pre-determined (ACK/REPLY) */
+ LASSERT (msg->msg_txpeer == NULL);
+ LASSERT (!msg->msg_sending);
+ LASSERT (!msg->msg_target_is_router);
+ LASSERT (!msg->msg_receiving);
+
+ msg->msg_sending = 1;
+
+ LASSERT(!msg->msg_tx_committed);
+ cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid);
+ again:
+ lnet_net_lock(cpt);
+
+ if (the_lnet.ln_shutdown) {
+ lnet_net_unlock(cpt);
+ return -ESHUTDOWN;
+ }
+
+ if (src_nid == LNET_NID_ANY) {
+ src_ni = NULL;
+ } else {
+ src_ni = lnet_nid2ni_locked(src_nid, cpt);
+ if (src_ni == NULL) {
+ lnet_net_unlock(cpt);
+ LCONSOLE_WARN("Can't send to %s: src %s is not a "
+ "local nid\n", libcfs_nid2str(dst_nid),
+ libcfs_nid2str(src_nid));
+ return -EINVAL;
+ }
+ LASSERT (!msg->msg_routing);
+ }
+
+ /* Is this for someone on a local network? */
+ local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid), cpt);
+
+ if (local_ni != NULL) {
+ if (src_ni == NULL) {
+ src_ni = local_ni;
+ src_nid = src_ni->ni_nid;
+ } else if (src_ni == local_ni) {
+ lnet_ni_decref_locked(local_ni, cpt);
+ } else {
+ lnet_ni_decref_locked(local_ni, cpt);
+ lnet_ni_decref_locked(src_ni, cpt);
+ lnet_net_unlock(cpt);
+ LCONSOLE_WARN("No route to %s via from %s\n",
+ libcfs_nid2str(dst_nid),
+ libcfs_nid2str(src_nid));
+ return -EINVAL;
+ }
+
+ LASSERT(src_nid != LNET_NID_ANY);
+ lnet_msg_commit(msg, cpt);
+
+ if (!msg->msg_routing)
+ msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
+
+ if (src_ni == the_lnet.ln_loni) {
+ /* No send credit hassles with LOLND */
+ lnet_net_unlock(cpt);
+ lnet_ni_send(src_ni, msg);
+
+ lnet_net_lock(cpt);
+ lnet_ni_decref_locked(src_ni, cpt);
+ lnet_net_unlock(cpt);
+ return 0;
+ }
+
+ rc = lnet_nid2peer_locked(&lp, dst_nid, cpt);
+ /* lp has ref on src_ni; lose mine */
+ lnet_ni_decref_locked(src_ni, cpt);
+ if (rc != 0) {
+ lnet_net_unlock(cpt);
+ LCONSOLE_WARN("Error %d finding peer %s\n", rc,
+ libcfs_nid2str(dst_nid));
+ /* ENOMEM or shutting down */
+ return rc;
+ }
+ LASSERT (lp->lp_ni == src_ni);
+ } else {
+ /* sending to a remote network */
+ lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid);
+ if (lp == NULL) {
+ if (src_ni != NULL)
+ lnet_ni_decref_locked(src_ni, cpt);
+ lnet_net_unlock(cpt);
+
+ LCONSOLE_WARN("No route to %s via %s "
+ "(all routers down)\n",
+ libcfs_id2str(msg->msg_target),
+ libcfs_nid2str(src_nid));
+ return -EHOSTUNREACH;
+ }
+
+ /* rtr_nid is LNET_NID_ANY or NID of pre-determined router,
+ * it's possible that rtr_nid isn't LNET_NID_ANY and lp isn't
+ * pre-determined router, this can happen if router table
+ * was changed when we release the lock */
+ if (rtr_nid != lp->lp_nid) {
+ cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid);
+ if (cpt2 != cpt) {
+ if (src_ni != NULL)
+ lnet_ni_decref_locked(src_ni, cpt);
+ lnet_net_unlock(cpt);
+
+ rtr_nid = lp->lp_nid;
+ cpt = cpt2;
+ goto again;
+ }
+ }
+
+ CDEBUG(D_NET, "Best route to %s via %s for %s %d\n",
+ libcfs_nid2str(dst_nid), libcfs_nid2str(lp->lp_nid),
+ lnet_msgtyp2str(msg->msg_type), msg->msg_len);
+
+ if (src_ni == NULL) {
+ src_ni = lp->lp_ni;
+ src_nid = src_ni->ni_nid;
+ } else {
+ LASSERT (src_ni == lp->lp_ni);
+ lnet_ni_decref_locked(src_ni, cpt);
+ }
+
+ lnet_peer_addref_locked(lp);
+
+ LASSERT(src_nid != LNET_NID_ANY);
+ lnet_msg_commit(msg, cpt);
+
+ if (!msg->msg_routing) {
+ /* I'm the source and now I know which NI to send on */
+ msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
+ }
+
+ msg->msg_target_is_router = 1;
+ msg->msg_target.nid = lp->lp_nid;
+ msg->msg_target.pid = LUSTRE_SRV_LNET_PID;
+ }
+
+ /* 'lp' is our best choice of peer */
+
+ LASSERT (!msg->msg_peertxcredit);
+ LASSERT (!msg->msg_txcredit);
+ LASSERT (msg->msg_txpeer == NULL);
+
+ msg->msg_txpeer = lp; /* msg takes my ref on lp */
+
+ rc = lnet_post_send_locked(msg, 0);
+ lnet_net_unlock(cpt);
+
+ if (rc == EHOSTUNREACH)
+ return -EHOSTUNREACH;
+
+ if (rc == 0)
+ lnet_ni_send(src_ni, msg);
+
+ return 0;
+}
+
+static void
+lnet_drop_message(lnet_ni_t *ni, int cpt, void *private, unsigned int nob)
+{
+ lnet_net_lock(cpt);
+ the_lnet.ln_counters[cpt]->drop_count++;
+ the_lnet.ln_counters[cpt]->drop_length += nob;
+ lnet_net_unlock(cpt);
+
+ lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob);
+}
+
+static void
+lnet_recv_put(lnet_ni_t *ni, lnet_msg_t *msg)
+{
+ lnet_hdr_t *hdr = &msg->msg_hdr;
+
+ if (msg->msg_wanted != 0)
+ lnet_setpayloadbuffer(msg);
+
+ lnet_build_msg_event(msg, LNET_EVENT_PUT);
+
+ /* Must I ACK? If so I'll grab the ack_wmd out of the header and put
+ * it back into the ACK during lnet_finalize() */
+ msg->msg_ack = (!lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
+ (msg->msg_md->md_options & LNET_MD_ACK_DISABLE) == 0);
+
+ lnet_ni_recv(ni, msg->msg_private, msg, msg->msg_rx_delayed,
+ msg->msg_offset, msg->msg_wanted, hdr->payload_length);
+}
+
+static int
+lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg)
+{
+ lnet_hdr_t *hdr = &msg->msg_hdr;
+ struct lnet_match_info info;
+ int rc;
+
+ /* Convert put fields to host byte order */
+ hdr->msg.put.match_bits = le64_to_cpu(hdr->msg.put.match_bits);
+ hdr->msg.put.ptl_index = le32_to_cpu(hdr->msg.put.ptl_index);
+ hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset);
+
+ info.mi_id.nid = hdr->src_nid;
+ info.mi_id.pid = hdr->src_pid;
+ info.mi_opc = LNET_MD_OP_PUT;
+ info.mi_portal = hdr->msg.put.ptl_index;
+ info.mi_rlength = hdr->payload_length;
+ info.mi_roffset = hdr->msg.put.offset;
+ info.mi_mbits = hdr->msg.put.match_bits;
+
+ msg->msg_rx_ready_delay = ni->ni_lnd->lnd_eager_recv == NULL;
+
+ again:
+ rc = lnet_ptl_match_md(&info, msg);
+ switch (rc) {
+ default:
+ LBUG();
+
+ case LNET_MATCHMD_OK:
+ lnet_recv_put(ni, msg);
+ return 0;
+
+ case LNET_MATCHMD_NONE:
+ if (msg->msg_rx_delayed) /* attached on delayed list */
+ return 0;
+
+ rc = lnet_ni_eager_recv(ni, msg);
+ if (rc == 0)
+ goto again;
+ /* fall through */
+
+ case LNET_MATCHMD_DROP:
+ CNETERR("Dropping PUT from %s portal %d match "LPU64
+ " offset %d length %d: %d\n",
+ libcfs_id2str(info.mi_id), info.mi_portal,
+ info.mi_mbits, info.mi_roffset, info.mi_rlength, rc);
+
+ return ENOENT; /* +ve: OK but no match */
+ }
+}
+
+static int
+lnet_parse_get(lnet_ni_t *ni, lnet_msg_t *msg, int rdma_get)
+{
+ struct lnet_match_info info;
+ lnet_hdr_t *hdr = &msg->msg_hdr;
+ lnet_handle_wire_t reply_wmd;
+ int rc;
+
+ /* Convert get fields to host byte order */
+ hdr->msg.get.match_bits = le64_to_cpu(hdr->msg.get.match_bits);
+ hdr->msg.get.ptl_index = le32_to_cpu(hdr->msg.get.ptl_index);
+ hdr->msg.get.sink_length = le32_to_cpu(hdr->msg.get.sink_length);
+ hdr->msg.get.src_offset = le32_to_cpu(hdr->msg.get.src_offset);
+
+ info.mi_id.nid = hdr->src_nid;
+ info.mi_id.pid = hdr->src_pid;
+ info.mi_opc = LNET_MD_OP_GET;
+ info.mi_portal = hdr->msg.get.ptl_index;
+ info.mi_rlength = hdr->msg.get.sink_length;
+ info.mi_roffset = hdr->msg.get.src_offset;
+ info.mi_mbits = hdr->msg.get.match_bits;
+
+ rc = lnet_ptl_match_md(&info, msg);
+ if (rc == LNET_MATCHMD_DROP) {
+ CNETERR("Dropping GET from %s portal %d match "LPU64
+ " offset %d length %d\n",
+ libcfs_id2str(info.mi_id), info.mi_portal,
+ info.mi_mbits, info.mi_roffset, info.mi_rlength);
+ return ENOENT; /* +ve: OK but no match */
+ }
+
+ LASSERT(rc == LNET_MATCHMD_OK);
+
+ lnet_build_msg_event(msg, LNET_EVENT_GET);
+
+ reply_wmd = hdr->msg.get.return_wmd;
+
+ lnet_prep_send(msg, LNET_MSG_REPLY, info.mi_id,
+ msg->msg_offset, msg->msg_wanted);
+
+ msg->msg_hdr.msg.reply.dst_wmd = reply_wmd;
+
+ if (rdma_get) {
+ /* The LND completes the REPLY from her recv procedure */
+ lnet_ni_recv(ni, msg->msg_private, msg, 0,
+ msg->msg_offset, msg->msg_len, msg->msg_len);
+ return 0;
+ }
+
+ lnet_ni_recv(ni, msg->msg_private, NULL, 0, 0, 0, 0);
+ msg->msg_receiving = 0;
+
+ rc = lnet_send(ni->ni_nid, msg, LNET_NID_ANY);
+ if (rc < 0) {
+ /* didn't get as far as lnet_ni_send() */
+ CERROR("%s: Unable to send REPLY for GET from %s: %d\n",
+ libcfs_nid2str(ni->ni_nid),
+ libcfs_id2str(info.mi_id), rc);
+
+ lnet_finalize(ni, msg, rc);
+ }
+
+ return 0;
+}
+
+static int
+lnet_parse_reply(lnet_ni_t *ni, lnet_msg_t *msg)
+{
+ void *private = msg->msg_private;
+ lnet_hdr_t *hdr = &msg->msg_hdr;
+ lnet_process_id_t src = {0};
+ lnet_libmd_t *md;
+ int rlength;
+ int mlength;
+ int cpt;
+
+ cpt = lnet_cpt_of_cookie(hdr->msg.reply.dst_wmd.wh_object_cookie);
+ lnet_res_lock(cpt);
+
+ src.nid = hdr->src_nid;
+ src.pid = hdr->src_pid;
+
+ /* NB handles only looked up by creator (no flips) */
+ md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd);
+ if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
+ CNETERR("%s: Dropping REPLY from %s for %s "
+ "MD "LPX64"."LPX64"\n",
+ libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
+ (md == NULL) ? "invalid" : "inactive",
+ hdr->msg.reply.dst_wmd.wh_interface_cookie,
+ hdr->msg.reply.dst_wmd.wh_object_cookie);
+ if (md != NULL && md->md_me != NULL)
+ CERROR("REPLY MD also attached to portal %d\n",
+ md->md_me->me_portal);
+
+ lnet_res_unlock(cpt);
+ return ENOENT; /* +ve: OK but no match */
+ }
+
+ LASSERT (md->md_offset == 0);
+
+ rlength = hdr->payload_length;
+ mlength = MIN(rlength, (int)md->md_length);
+
+ if (mlength < rlength &&
+ (md->md_options & LNET_MD_TRUNCATE) == 0) {
+ CNETERR("%s: Dropping REPLY from %s length %d "
+ "for MD "LPX64" would overflow (%d)\n",
+ libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
+ rlength, hdr->msg.reply.dst_wmd.wh_object_cookie,
+ mlength);
+ lnet_res_unlock(cpt);
+ return ENOENT; /* +ve: OK but no match */
+ }
+
+ CDEBUG(D_NET, "%s: Reply from %s of length %d/%d into md "LPX64"\n",
+ libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
+ mlength, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie);
+
+ lnet_msg_attach_md(msg, md, 0, mlength);
+
+ if (mlength != 0)
+ lnet_setpayloadbuffer(msg);
+
+ lnet_res_unlock(cpt);
+
+ lnet_build_msg_event(msg, LNET_EVENT_REPLY);
+
+ lnet_ni_recv(ni, private, msg, 0, 0, mlength, rlength);
+ return 0;
+}
+
+static int
+lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg)
+{
+ lnet_hdr_t *hdr = &msg->msg_hdr;
+ lnet_process_id_t src = {0};
+ lnet_libmd_t *md;
+ int cpt;
+
+ src.nid = hdr->src_nid;
+ src.pid = hdr->src_pid;
+
+ /* Convert ack fields to host byte order */
+ hdr->msg.ack.match_bits = le64_to_cpu(hdr->msg.ack.match_bits);
+ hdr->msg.ack.mlength = le32_to_cpu(hdr->msg.ack.mlength);
+
+ cpt = lnet_cpt_of_cookie(hdr->msg.ack.dst_wmd.wh_object_cookie);
+ lnet_res_lock(cpt);
+
+ /* NB handles only looked up by creator (no flips) */
+ md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd);
+ if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
+ /* Don't moan; this is expected */
+ CDEBUG(D_NET,
+ "%s: Dropping ACK from %s to %s MD "LPX64"."LPX64"\n",
+ libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
+ (md == NULL) ? "invalid" : "inactive",
+ hdr->msg.ack.dst_wmd.wh_interface_cookie,
+ hdr->msg.ack.dst_wmd.wh_object_cookie);
+ if (md != NULL && md->md_me != NULL)
+ CERROR("Source MD also attached to portal %d\n",
+ md->md_me->me_portal);
+
+ lnet_res_unlock(cpt);
+ return ENOENT; /* +ve! */
+ }
+
+ CDEBUG(D_NET, "%s: ACK from %s into md "LPX64"\n",
+ libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
+ hdr->msg.ack.dst_wmd.wh_object_cookie);
+
+ lnet_msg_attach_md(msg, md, 0, 0);
+
+ lnet_res_unlock(cpt);
+
+ lnet_build_msg_event(msg, LNET_EVENT_ACK);
+
+ lnet_ni_recv(ni, msg->msg_private, msg, 0, 0, 0, msg->msg_len);
+ return 0;
+}
+
+static int
+lnet_parse_forward_locked(lnet_ni_t *ni, lnet_msg_t *msg)
+{
+ int rc = 0;
+
+ if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
+ lnet_msg2bufpool(msg)->rbp_credits <= 0) {
+ if (ni->ni_lnd->lnd_eager_recv == NULL) {
+ msg->msg_rx_ready_delay = 1;
+ } else {
+ lnet_net_unlock(msg->msg_rx_cpt);
+ rc = lnet_ni_eager_recv(ni, msg);
+ lnet_net_lock(msg->msg_rx_cpt);
+ }
+ }
+
+ if (rc == 0)
+ rc = lnet_post_routed_recv_locked(msg, 0);
+ return rc;
+}
+
+char *
+lnet_msgtyp2str (int type)
+{
+ switch (type) {
+ case LNET_MSG_ACK:
+ return ("ACK");
+ case LNET_MSG_PUT:
+ return ("PUT");
+ case LNET_MSG_GET:
+ return ("GET");
+ case LNET_MSG_REPLY:
+ return ("REPLY");
+ case LNET_MSG_HELLO:
+ return ("HELLO");
+ default:
+ return ("<UNKNOWN>");
+ }
+}
+EXPORT_SYMBOL(lnet_msgtyp2str);
+
+void
+lnet_print_hdr(lnet_hdr_t * hdr)
+{
+ lnet_process_id_t src = {0};
+ lnet_process_id_t dst = {0};
+ char *type_str = lnet_msgtyp2str (hdr->type);
+
+ src.nid = hdr->src_nid;
+ src.pid = hdr->src_pid;
+
+ dst.nid = hdr->dest_nid;
+ dst.pid = hdr->dest_pid;
+
+ CWARN("P3 Header at %p of type %s\n", hdr, type_str);
+ CWARN(" From %s\n", libcfs_id2str(src));
+ CWARN(" To %s\n", libcfs_id2str(dst));
+
+ switch (hdr->type) {
+ default:
+ break;
+
+ case LNET_MSG_PUT:
+ CWARN(" Ptl index %d, ack md "LPX64"."LPX64", "
+ "match bits "LPU64"\n",
+ hdr->msg.put.ptl_index,
+ hdr->msg.put.ack_wmd.wh_interface_cookie,
+ hdr->msg.put.ack_wmd.wh_object_cookie,
+ hdr->msg.put.match_bits);
+ CWARN(" Length %d, offset %d, hdr data "LPX64"\n",
+ hdr->payload_length, hdr->msg.put.offset,
+ hdr->msg.put.hdr_data);
+ break;
+
+ case LNET_MSG_GET:
+ CWARN(" Ptl index %d, return md "LPX64"."LPX64", "
+ "match bits "LPU64"\n", hdr->msg.get.ptl_index,
+ hdr->msg.get.return_wmd.wh_interface_cookie,
+ hdr->msg.get.return_wmd.wh_object_cookie,
+ hdr->msg.get.match_bits);
+ CWARN(" Length %d, src offset %d\n",
+ hdr->msg.get.sink_length,
+ hdr->msg.get.src_offset);
+ break;
+
+ case LNET_MSG_ACK:
+ CWARN(" dst md "LPX64"."LPX64", "
+ "manipulated length %d\n",
+ hdr->msg.ack.dst_wmd.wh_interface_cookie,
+ hdr->msg.ack.dst_wmd.wh_object_cookie,
+ hdr->msg.ack.mlength);
+ break;
+
+ case LNET_MSG_REPLY:
+ CWARN(" dst md "LPX64"."LPX64", "
+ "length %d\n",
+ hdr->msg.reply.dst_wmd.wh_interface_cookie,
+ hdr->msg.reply.dst_wmd.wh_object_cookie,
+ hdr->payload_length);
+ }
+
+}
+
+int
+lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
+ void *private, int rdma_req)
+{
+ int rc = 0;
+ int cpt;
+ int for_me;
+ struct lnet_msg *msg;
+ lnet_pid_t dest_pid;
+ lnet_nid_t dest_nid;
+ lnet_nid_t src_nid;
+ __u32 payload_length;
+ __u32 type;
+
+ LASSERT (!in_interrupt ());
+
+ type = le32_to_cpu(hdr->type);
+ src_nid = le64_to_cpu(hdr->src_nid);
+ dest_nid = le64_to_cpu(hdr->dest_nid);
+ dest_pid = le32_to_cpu(hdr->dest_pid);
+ payload_length = le32_to_cpu(hdr->payload_length);
+
+ for_me = (ni->ni_nid == dest_nid);
+ cpt = lnet_cpt_of_nid(from_nid);
+
+ switch (type) {
+ case LNET_MSG_ACK:
+ case LNET_MSG_GET:
+ if (payload_length > 0) {
+ CERROR("%s, src %s: bad %s payload %d (0 expected)\n",
+ libcfs_nid2str(from_nid),
+ libcfs_nid2str(src_nid),
+ lnet_msgtyp2str(type), payload_length);
+ return -EPROTO;
+ }
+ break;
+
+ case LNET_MSG_PUT:
+ case LNET_MSG_REPLY:
+ if (payload_length > (__u32)(for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) {
+ CERROR("%s, src %s: bad %s payload %d "
+ "(%d max expected)\n",
+ libcfs_nid2str(from_nid),
+ libcfs_nid2str(src_nid),
+ lnet_msgtyp2str(type),
+ payload_length,
+ for_me ? LNET_MAX_PAYLOAD : LNET_MTU);
+ return -EPROTO;
+ }
+ break;
+
+ default:
+ CERROR("%s, src %s: Bad message type 0x%x\n",
+ libcfs_nid2str(from_nid),
+ libcfs_nid2str(src_nid), type);
+ return -EPROTO;
+ }
+
+ if (the_lnet.ln_routing &&
+ ni->ni_last_alive != cfs_time_current_sec()) {
+ lnet_ni_lock(ni);
+
+ /* NB: so far here is the only place to set NI status to "up */
+ ni->ni_last_alive = cfs_time_current_sec();
+ if (ni->ni_status != NULL &&
+ ni->ni_status->ns_status == LNET_NI_STATUS_DOWN)
+ ni->ni_status->ns_status = LNET_NI_STATUS_UP;
+ lnet_ni_unlock(ni);
+ }
+
+ /* Regard a bad destination NID as a protocol error. Senders should
+ * know what they're doing; if they don't they're misconfigured, buggy
+ * or malicious so we chop them off at the knees :) */
+
+ if (!for_me) {
+ if (LNET_NIDNET(dest_nid) == LNET_NIDNET(ni->ni_nid)) {
+ /* should have gone direct */
+ CERROR ("%s, src %s: Bad dest nid %s "
+ "(should have been sent direct)\n",
+ libcfs_nid2str(from_nid),
+ libcfs_nid2str(src_nid),
+ libcfs_nid2str(dest_nid));
+ return -EPROTO;
+ }
+
+ if (lnet_islocalnid(dest_nid)) {
+ /* dest is another local NI; sender should have used
+ * this node's NID on its own network */
+ CERROR ("%s, src %s: Bad dest nid %s "
+ "(it's my nid but on a different network)\n",
+ libcfs_nid2str(from_nid),
+ libcfs_nid2str(src_nid),
+ libcfs_nid2str(dest_nid));
+ return -EPROTO;
+ }
+
+ if (rdma_req && type == LNET_MSG_GET) {
+ CERROR ("%s, src %s: Bad optimized GET for %s "
+ "(final destination must be me)\n",
+ libcfs_nid2str(from_nid),
+ libcfs_nid2str(src_nid),
+ libcfs_nid2str(dest_nid));
+ return -EPROTO;
+ }
+
+ if (!the_lnet.ln_routing) {
+ CERROR ("%s, src %s: Dropping message for %s "
+ "(routing not enabled)\n",
+ libcfs_nid2str(from_nid),
+ libcfs_nid2str(src_nid),
+ libcfs_nid2str(dest_nid));
+ goto drop;
+ }
+ }
+
+ /* Message looks OK; we're not going to return an error, so we MUST
+ * call back lnd_recv() come what may... */
+
+ if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */
+ fail_peer (src_nid, 0)) /* shall we now? */
+ {
+ CERROR("%s, src %s: Dropping %s to simulate failure\n",
+ libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
+ lnet_msgtyp2str(type));
+ goto drop;
+ }
+
+ msg = lnet_msg_alloc();
+ if (msg == NULL) {
+ CERROR("%s, src %s: Dropping %s (out of memory)\n",
+ libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
+ lnet_msgtyp2str(type));
+ goto drop;
+ }
+
+ /* msg zeroed in lnet_msg_alloc; i.e. flags all clear, pointers NULL etc */
+
+ msg->msg_type = type;
+ msg->msg_private = private;
+ msg->msg_receiving = 1;
+ msg->msg_len = msg->msg_wanted = payload_length;
+ msg->msg_offset = 0;
+ msg->msg_hdr = *hdr;
+ /* for building message event */
+ msg->msg_from = from_nid;
+ if (!for_me) {
+ msg->msg_target.pid = dest_pid;
+ msg->msg_target.nid = dest_nid;
+ msg->msg_routing = 1;
+
+ } else {
+ /* convert common msg->hdr fields to host byteorder */
+ msg->msg_hdr.type = type;
+ msg->msg_hdr.src_nid = src_nid;
+ msg->msg_hdr.src_pid = le32_to_cpu(msg->msg_hdr.src_pid);
+ msg->msg_hdr.dest_nid = dest_nid;
+ msg->msg_hdr.dest_pid = dest_pid;
+ msg->msg_hdr.payload_length = payload_length;
+ }
+
+ lnet_net_lock(cpt);
+ rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid, cpt);
+ if (rc != 0) {
+ lnet_net_unlock(cpt);
+ CERROR("%s, src %s: Dropping %s "
+ "(error %d looking up sender)\n",
+ libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
+ lnet_msgtyp2str(type), rc);
+ lnet_msg_free(msg);
+ goto drop;
+ }
+
+ lnet_msg_commit(msg, cpt);
+
+ if (!for_me) {
+ rc = lnet_parse_forward_locked(ni, msg);
+ lnet_net_unlock(cpt);
+
+ if (rc < 0)
+ goto free_drop;
+ if (rc == 0) {
+ lnet_ni_recv(ni, msg->msg_private, msg, 0,
+ 0, payload_length, payload_length);
+ }
+ return 0;
+ }
+
+ lnet_net_unlock(cpt);
+
+ switch (type) {
+ case LNET_MSG_ACK:
+ rc = lnet_parse_ack(ni, msg);
+ break;
+ case LNET_MSG_PUT:
+ rc = lnet_parse_put(ni, msg);
+ break;
+ case LNET_MSG_GET:
+ rc = lnet_parse_get(ni, msg, rdma_req);
+ break;
+ case LNET_MSG_REPLY:
+ rc = lnet_parse_reply(ni, msg);
+ break;
+ default:
+ LASSERT(0);
+ rc = -EPROTO;
+ goto free_drop; /* prevent an unused label if !kernel */
+ }
+
+ if (rc == 0)
+ return 0;
+
+ LASSERT (rc == ENOENT);
+
+ free_drop:
+ LASSERT(msg->msg_md == NULL);
+ lnet_finalize(ni, msg, rc);
+
+ drop:
+ lnet_drop_message(ni, cpt, private, payload_length);
+ return 0;
+}
+EXPORT_SYMBOL(lnet_parse);
+
+void
+lnet_drop_delayed_msg_list(struct list_head *head, char *reason)
+{
+ while (!list_empty(head)) {
+ lnet_process_id_t id = {0};
+ lnet_msg_t *msg;
+
+ msg = list_entry(head->next, lnet_msg_t, msg_list);
+ list_del(&msg->msg_list);
+
+ id.nid = msg->msg_hdr.src_nid;
+ id.pid = msg->msg_hdr.src_pid;
+
+ LASSERT(msg->msg_md == NULL);
+ LASSERT(msg->msg_rx_delayed);
+ LASSERT(msg->msg_rxpeer != NULL);
+ LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
+
+ CWARN("Dropping delayed PUT from %s portal %d match "LPU64
+ " offset %d length %d: %s\n",
+ libcfs_id2str(id),
+ msg->msg_hdr.msg.put.ptl_index,
+ msg->msg_hdr.msg.put.match_bits,
+ msg->msg_hdr.msg.put.offset,
+ msg->msg_hdr.payload_length, reason);
+
+ /* NB I can't drop msg's ref on msg_rxpeer until after I've
+ * called lnet_drop_message(), so I just hang onto msg as well
+ * until that's done */
+
+ lnet_drop_message(msg->msg_rxpeer->lp_ni,
+ msg->msg_rxpeer->lp_cpt,
+ msg->msg_private, msg->msg_len);
+ /*
+ * NB: message will not generate event because w/o attached MD,
+ * but we still should give error code so lnet_msg_decommit()
+ * can skip counters operations and other checks.
+ */
+ lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
+ }
+}
+
+void
+lnet_recv_delayed_msg_list(struct list_head *head)
+{
+ while (!list_empty(head)) {
+ lnet_msg_t *msg;
+ lnet_process_id_t id;
+
+ msg = list_entry(head->next, lnet_msg_t, msg_list);
+ list_del(&msg->msg_list);
+
+ /* md won't disappear under me, since each msg
+ * holds a ref on it */
+
+ id.nid = msg->msg_hdr.src_nid;
+ id.pid = msg->msg_hdr.src_pid;
+
+ LASSERT(msg->msg_rx_delayed);
+ LASSERT(msg->msg_md != NULL);
+ LASSERT(msg->msg_rxpeer != NULL);
+ LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
+
+ CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
+ "match "LPU64" offset %d length %d.\n",
+ libcfs_id2str(id), msg->msg_hdr.msg.put.ptl_index,
+ msg->msg_hdr.msg.put.match_bits,
+ msg->msg_hdr.msg.put.offset,
+ msg->msg_hdr.payload_length);
+
+ lnet_recv_put(msg->msg_rxpeer->lp_ni, msg);
+ }
+}
+
+/**
+ * Initiate an asynchronous PUT operation.
+ *
+ * There are several events associated with a PUT: completion of the send on
+ * the initiator node (LNET_EVENT_SEND), and when the send completes
+ * successfully, the receipt of an acknowledgment (LNET_EVENT_ACK) indicating
+ * that the operation was accepted by the target. The event LNET_EVENT_PUT is
+ * used at the target node to indicate the completion of incoming data
+ * delivery.
+ *
+ * The local events will be logged in the EQ associated with the MD pointed to
+ * by \a mdh handle. Using a MD without an associated EQ results in these
+ * events being discarded. In this case, the caller must have another
+ * mechanism (e.g., a higher level protocol) for determining when it is safe
+ * to modify the memory region associated with the MD.
+ *
+ * Note that LNet does not guarantee the order of LNET_EVENT_SEND and
+ * LNET_EVENT_ACK, though intuitively ACK should happen after SEND.
+ *
+ * \param self Indicates the NID of a local interface through which to send
+ * the PUT request. Use LNET_NID_ANY to let LNet choose one by itself.
+ * \param mdh A handle for the MD that describes the memory to be sent. The MD
+ * must be "free floating" (See LNetMDBind()).
+ * \param ack Controls whether an acknowledgment is requested.
+ * Acknowledgments are only sent when they are requested by the initiating
+ * process and the target MD enables them.
+ * \param target A process identifier for the target process.
+ * \param portal The index in the \a target's portal table.
+ * \param match_bits The match bits to use for MD selection at the target
+ * process.
+ * \param offset The offset into the target MD (only used when the target
+ * MD has the LNET_MD_MANAGE_REMOTE option set).
+ * \param hdr_data 64 bits of user data that can be included in the message
+ * header. This data is written to an event queue entry at the target if an
+ * EQ is present on the matching MD.
+ *
+ * \retval 0 Success, and only in this case events will be generated
+ * and logged to EQ (if it exists).
+ * \retval -EIO Simulated failure.
+ * \retval -ENOMEM Memory allocation failure.
+ * \retval -ENOENT Invalid MD object.
+ *
+ * \see lnet_event_t::hdr_data and lnet_event_kind_t.
+ */
+int
+LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack,
+ lnet_process_id_t target, unsigned int portal,
+ __u64 match_bits, unsigned int offset,
+ __u64 hdr_data)
+{
+ struct lnet_msg *msg;
+ struct lnet_libmd *md;
+ int cpt;
+ int rc;
+
+ LASSERT (the_lnet.ln_init);
+ LASSERT (the_lnet.ln_refcount > 0);
+
+ if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */
+ fail_peer (target.nid, 1)) /* shall we now? */
+ {
+ CERROR("Dropping PUT to %s: simulated failure\n",
+ libcfs_id2str(target));
+ return -EIO;
+ }
+
+ msg = lnet_msg_alloc();
+ if (msg == NULL) {
+ CERROR("Dropping PUT to %s: ENOMEM on lnet_msg_t\n",
+ libcfs_id2str(target));
+ return -ENOMEM;
+ }
+ msg->msg_vmflush = !!memory_pressure_get();
+
+ cpt = lnet_cpt_of_cookie(mdh.cookie);
+ lnet_res_lock(cpt);
+
+ md = lnet_handle2md(&mdh);
+ if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
+ CERROR("Dropping PUT ("LPU64":%d:%s): MD (%d) invalid\n",
+ match_bits, portal, libcfs_id2str(target),
+ md == NULL ? -1 : md->md_threshold);
+ if (md != NULL && md->md_me != NULL)
+ CERROR("Source MD also attached to portal %d\n",
+ md->md_me->me_portal);
+ lnet_res_unlock(cpt);
+
+ lnet_msg_free(msg);
+ return -ENOENT;
+ }
+
+ CDEBUG(D_NET, "LNetPut -> %s\n", libcfs_id2str(target));
+
+ lnet_msg_attach_md(msg, md, 0, 0);
+
+ lnet_prep_send(msg, LNET_MSG_PUT, target, 0, md->md_length);
+
+ msg->msg_hdr.msg.put.match_bits = cpu_to_le64(match_bits);
+ msg->msg_hdr.msg.put.ptl_index = cpu_to_le32(portal);
+ msg->msg_hdr.msg.put.offset = cpu_to_le32(offset);
+ msg->msg_hdr.msg.put.hdr_data = hdr_data;
+
+ /* NB handles only looked up by creator (no flips) */
+ if (ack == LNET_ACK_REQ) {
+ msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
+ the_lnet.ln_interface_cookie;
+ msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
+ md->md_lh.lh_cookie;
+ } else {
+ msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
+ LNET_WIRE_HANDLE_COOKIE_NONE;
+ msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
+ LNET_WIRE_HANDLE_COOKIE_NONE;
+ }
+
+ lnet_res_unlock(cpt);
+
+ lnet_build_msg_event(msg, LNET_EVENT_SEND);
+
+ rc = lnet_send(self, msg, LNET_NID_ANY);
+ if (rc != 0) {
+ CNETERR( "Error sending PUT to %s: %d\n",
+ libcfs_id2str(target), rc);
+ lnet_finalize (NULL, msg, rc);
+ }
+
+ /* completion will be signalled by an event */
+ return 0;
+}
+EXPORT_SYMBOL(LNetPut);
+
+lnet_msg_t *
+lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *getmsg)
+{
+ /* The LND can DMA direct to the GET md (i.e. no REPLY msg). This
+ * returns a msg for the LND to pass to lnet_finalize() when the sink
+ * data has been received.
+ *
+ * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
+ * lnet_finalize() is called on it, so the LND must call this first */
+
+ struct lnet_msg *msg = lnet_msg_alloc();
+ struct lnet_libmd *getmd = getmsg->msg_md;
+ lnet_process_id_t peer_id = getmsg->msg_target;
+ int cpt;
+
+ LASSERT(!getmsg->msg_target_is_router);
+ LASSERT(!getmsg->msg_routing);
+
+ cpt = lnet_cpt_of_cookie(getmd->md_lh.lh_cookie);
+ lnet_res_lock(cpt);
+
+ LASSERT (getmd->md_refcount > 0);
+
+ if (msg == NULL) {
+ CERROR ("%s: Dropping REPLY from %s: can't allocate msg\n",
+ libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id));
+ goto drop;
+ }
+
+ if (getmd->md_threshold == 0) {
+ CERROR ("%s: Dropping REPLY from %s for inactive MD %p\n",
+ libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id),
+ getmd);
+ lnet_res_unlock(cpt);
+ goto drop;
+ }
+
+ LASSERT(getmd->md_offset == 0);
+
+ CDEBUG(D_NET, "%s: Reply from %s md %p\n",
+ libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd);
+
+ /* setup information for lnet_build_msg_event */
+ msg->msg_from = peer_id.nid;
+ msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */
+ msg->msg_hdr.src_nid = peer_id.nid;
+ msg->msg_hdr.payload_length = getmd->md_length;
+ msg->msg_receiving = 1; /* required by lnet_msg_attach_md */
+
+ lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length);
+ lnet_res_unlock(cpt);
+
+ cpt = lnet_cpt_of_nid(peer_id.nid);
+
+ lnet_net_lock(cpt);
+ lnet_msg_commit(msg, cpt);
+ lnet_net_unlock(cpt);
+
+ lnet_build_msg_event(msg, LNET_EVENT_REPLY);
+
+ return msg;
+
+ drop:
+ cpt = lnet_cpt_of_nid(peer_id.nid);
+
+ lnet_net_lock(cpt);
+ the_lnet.ln_counters[cpt]->drop_count++;
+ the_lnet.ln_counters[cpt]->drop_length += getmd->md_length;
+ lnet_net_unlock(cpt);
+
+ if (msg != NULL)
+ lnet_msg_free(msg);
+
+ return NULL;
+}
+EXPORT_SYMBOL(lnet_create_reply_msg);
+
+void
+lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *reply, unsigned int len)
+{
+ /* Set the REPLY length, now the RDMA that elides the REPLY message has
+ * completed and I know it. */
+ LASSERT (reply != NULL);
+ LASSERT (reply->msg_type == LNET_MSG_GET);
+ LASSERT (reply->msg_ev.type == LNET_EVENT_REPLY);
+
+ /* NB I trusted my peer to RDMA. If she tells me she's written beyond
+ * the end of my buffer, I might as well be dead. */
+ LASSERT (len <= reply->msg_ev.mlength);
+
+ reply->msg_ev.mlength = len;
+}
+EXPORT_SYMBOL(lnet_set_reply_msg_len);
+
+/**
+ * Initiate an asynchronous GET operation.
+ *
+ * On the initiator node, an LNET_EVENT_SEND is logged when the GET request
+ * is sent, and an LNET_EVENT_REPLY is logged when the data returned from
+ * the target node in the REPLY has been written to local MD.
+ *
+ * On the target node, an LNET_EVENT_GET is logged when the GET request
+ * arrives and is accepted into a MD.
+ *
+ * \param self,target,portal,match_bits,offset See the discussion in LNetPut().
+ * \param mdh A handle for the MD that describes the memory into which the
+ * requested data will be received. The MD must be "free floating" (See LNetMDBind()).
+ *
+ * \retval 0 Success, and only in this case events will be generated
+ * and logged to EQ (if it exists) of the MD.
+ * \retval -EIO Simulated failure.
+ * \retval -ENOMEM Memory allocation failure.
+ * \retval -ENOENT Invalid MD object.
+ */
+int
+LNetGet(lnet_nid_t self, lnet_handle_md_t mdh,
+ lnet_process_id_t target, unsigned int portal,
+ __u64 match_bits, unsigned int offset)
+{
+ struct lnet_msg *msg;
+ struct lnet_libmd *md;
+ int cpt;
+ int rc;
+
+ LASSERT (the_lnet.ln_init);
+ LASSERT (the_lnet.ln_refcount > 0);
+
+ if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */
+ fail_peer (target.nid, 1)) /* shall we now? */
+ {
+ CERROR("Dropping GET to %s: simulated failure\n",
+ libcfs_id2str(target));
+ return -EIO;
+ }
+
+ msg = lnet_msg_alloc();
+ if (msg == NULL) {
+ CERROR("Dropping GET to %s: ENOMEM on lnet_msg_t\n",
+ libcfs_id2str(target));
+ return -ENOMEM;
+ }
+
+ cpt = lnet_cpt_of_cookie(mdh.cookie);
+ lnet_res_lock(cpt);
+
+ md = lnet_handle2md(&mdh);
+ if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
+ CERROR("Dropping GET ("LPU64":%d:%s): MD (%d) invalid\n",
+ match_bits, portal, libcfs_id2str(target),
+ md == NULL ? -1 : md->md_threshold);
+ if (md != NULL && md->md_me != NULL)
+ CERROR("REPLY MD also attached to portal %d\n",
+ md->md_me->me_portal);
+
+ lnet_res_unlock(cpt);
+
+ lnet_msg_free(msg);
+
+ return -ENOENT;
+ }
+
+ CDEBUG(D_NET, "LNetGet -> %s\n", libcfs_id2str(target));
+
+ lnet_msg_attach_md(msg, md, 0, 0);
+
+ lnet_prep_send(msg, LNET_MSG_GET, target, 0, 0);
+
+ msg->msg_hdr.msg.get.match_bits = cpu_to_le64(match_bits);
+ msg->msg_hdr.msg.get.ptl_index = cpu_to_le32(portal);
+ msg->msg_hdr.msg.get.src_offset = cpu_to_le32(offset);
+ msg->msg_hdr.msg.get.sink_length = cpu_to_le32(md->md_length);
+
+ /* NB handles only looked up by creator (no flips) */
+ msg->msg_hdr.msg.get.return_wmd.wh_interface_cookie =
+ the_lnet.ln_interface_cookie;
+ msg->msg_hdr.msg.get.return_wmd.wh_object_cookie =
+ md->md_lh.lh_cookie;
+
+ lnet_res_unlock(cpt);
+
+ lnet_build_msg_event(msg, LNET_EVENT_SEND);
+
+ rc = lnet_send(self, msg, LNET_NID_ANY);
+ if (rc < 0) {
+ CNETERR( "Error sending GET to %s: %d\n",
+ libcfs_id2str(target), rc);
+ lnet_finalize (NULL, msg, rc);
+ }
+
+ /* completion will be signalled by an event */
+ return 0;
+}
+EXPORT_SYMBOL(LNetGet);
+
+/**
+ * Calculate distance to node at \a dstnid.
+ *
+ * \param dstnid Target NID.
+ * \param srcnidp If not NULL, NID of the local interface to reach \a dstnid
+ * is saved here.
+ * \param orderp If not NULL, order of the route to reach \a dstnid is saved
+ * here.
+ *
+ * \retval 0 If \a dstnid belongs to a local interface, and reserved option
+ * local_nid_dist_zero is set, which is the default.
+ * \retval positives Distance to target NID, i.e. number of hops plus one.
+ * \retval -EHOSTUNREACH If \a dstnid is not reachable.
+ */
+int
+LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
+{
+ struct list_head *e;
+ struct lnet_ni *ni;
+ lnet_remotenet_t *rnet;
+ __u32 dstnet = LNET_NIDNET(dstnid);
+ int hops;
+ int cpt;
+ __u32 order = 2;
+ struct list_head *rn_list;
+
+ /* if !local_nid_dist_zero, I don't return a distance of 0 ever
+ * (when lustre sees a distance of 0, it substitutes 0@lo), so I
+ * keep order 0 free for 0@lo and order 1 free for a local NID
+ * match */
+
+ LASSERT (the_lnet.ln_init);
+ LASSERT (the_lnet.ln_refcount > 0);
+
+ cpt = lnet_net_lock_current();
+
+ list_for_each (e, &the_lnet.ln_nis) {
+ ni = list_entry(e, lnet_ni_t, ni_list);
+
+ if (ni->ni_nid == dstnid) {
+ if (srcnidp != NULL)
+ *srcnidp = dstnid;
+ if (orderp != NULL) {
+ if (LNET_NETTYP(LNET_NIDNET(dstnid)) == LOLND)
+ *orderp = 0;
+ else
+ *orderp = 1;
+ }
+ lnet_net_unlock(cpt);
+
+ return local_nid_dist_zero ? 0 : 1;
+ }
+
+ if (LNET_NIDNET(ni->ni_nid) == dstnet) {
+ if (srcnidp != NULL)
+ *srcnidp = ni->ni_nid;
+ if (orderp != NULL)
+ *orderp = order;
+ lnet_net_unlock(cpt);
+ return 1;
+ }
+
+ order++;
+ }
+
+ rn_list = lnet_net2rnethash(dstnet);
+ list_for_each(e, rn_list) {
+ rnet = list_entry(e, lnet_remotenet_t, lrn_list);
+
+ if (rnet->lrn_net == dstnet) {
+ lnet_route_t *route;
+ lnet_route_t *shortest = NULL;
+
+ LASSERT (!list_empty(&rnet->lrn_routes));
+
+ list_for_each_entry(route, &rnet->lrn_routes,
+ lr_list) {
+ if (shortest == NULL ||
+ route->lr_hops < shortest->lr_hops)
+ shortest = route;
+ }
+
+ LASSERT (shortest != NULL);
+ hops = shortest->lr_hops;
+ if (srcnidp != NULL)
+ *srcnidp = shortest->lr_gateway->lp_ni->ni_nid;
+ if (orderp != NULL)
+ *orderp = order;
+ lnet_net_unlock(cpt);
+ return hops + 1;
+ }
+ order++;
+ }
+
+ lnet_net_unlock(cpt);
+ return -EHOSTUNREACH;
+}
+EXPORT_SYMBOL(LNetDist);
+
+/**
+ * Set the number of asynchronous messages expected from a target process.
+ *
+ * This function is only meaningful for userspace callers. It's a no-op when
+ * called from kernel.
+ *
+ * Asynchronous messages are those that can come from a target when the
+ * userspace process is not waiting for IO to complete; e.g., AST callbacks
+ * from Lustre servers. Specifying the expected number of such messages
+ * allows them to be eagerly received when user process is not running in
+ * LNet; otherwise network errors may occur.
+ *
+ * \param id Process ID of the target process.
+ * \param nasync Number of asynchronous messages expected from the target.
+ *
+ * \return 0 on success, and an error code otherwise.
+ */
+int
+LNetSetAsync(lnet_process_id_t id, int nasync)
+{
+ return 0;
+}
+EXPORT_SYMBOL(LNetSetAsync);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-msg.c b/drivers/staging/lustre/lnet/lnet/lib-msg.c
new file mode 100644
index 000000000000..8f3a50bd5f69
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/lib-msg.c
@@ -0,0 +1,650 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/lnet/lib-msg.c
+ *
+ * Message decoding, parsing and finalizing routines
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/lnet/lib-lnet.h>
+
+void
+lnet_build_unlink_event (lnet_libmd_t *md, lnet_event_t *ev)
+{
+ ENTRY;
+
+ memset(ev, 0, sizeof(*ev));
+
+ ev->status = 0;
+ ev->unlinked = 1;
+ ev->type = LNET_EVENT_UNLINK;
+ lnet_md_deconstruct(md, &ev->md);
+ lnet_md2handle(&ev->md_handle, md);
+ EXIT;
+}
+
+/*
+ * Don't need any lock, must be called after lnet_commit_md
+ */
+void
+lnet_build_msg_event(lnet_msg_t *msg, lnet_event_kind_t ev_type)
+{
+ lnet_hdr_t *hdr = &msg->msg_hdr;
+ lnet_event_t *ev = &msg->msg_ev;
+
+ LASSERT(!msg->msg_routing);
+
+ ev->type = ev_type;
+
+ if (ev_type == LNET_EVENT_SEND) {
+ /* event for active message */
+ ev->target.nid = le64_to_cpu(hdr->dest_nid);
+ ev->target.pid = le32_to_cpu(hdr->dest_pid);
+ ev->initiator.nid = LNET_NID_ANY;
+ ev->initiator.pid = the_lnet.ln_pid;
+ ev->sender = LNET_NID_ANY;
+
+ } else {
+ /* event for passive message */
+ ev->target.pid = hdr->dest_pid;
+ ev->target.nid = hdr->dest_nid;
+ ev->initiator.pid = hdr->src_pid;
+ ev->initiator.nid = hdr->src_nid;
+ ev->rlength = hdr->payload_length;
+ ev->sender = msg->msg_from;
+ ev->mlength = msg->msg_wanted;
+ ev->offset = msg->msg_offset;
+ }
+
+ switch (ev_type) {
+ default:
+ LBUG();
+
+ case LNET_EVENT_PUT: /* passive PUT */
+ ev->pt_index = hdr->msg.put.ptl_index;
+ ev->match_bits = hdr->msg.put.match_bits;
+ ev->hdr_data = hdr->msg.put.hdr_data;
+ return;
+
+ case LNET_EVENT_GET: /* passive GET */
+ ev->pt_index = hdr->msg.get.ptl_index;
+ ev->match_bits = hdr->msg.get.match_bits;
+ ev->hdr_data = 0;
+ return;
+
+ case LNET_EVENT_ACK: /* ACK */
+ ev->match_bits = hdr->msg.ack.match_bits;
+ ev->mlength = hdr->msg.ack.mlength;
+ return;
+
+ case LNET_EVENT_REPLY: /* REPLY */
+ return;
+
+ case LNET_EVENT_SEND: /* active message */
+ if (msg->msg_type == LNET_MSG_PUT) {
+ ev->pt_index = le32_to_cpu(hdr->msg.put.ptl_index);
+ ev->match_bits = le64_to_cpu(hdr->msg.put.match_bits);
+ ev->offset = le32_to_cpu(hdr->msg.put.offset);
+ ev->mlength =
+ ev->rlength = le32_to_cpu(hdr->payload_length);
+ ev->hdr_data = le64_to_cpu(hdr->msg.put.hdr_data);
+
+ } else {
+ LASSERT(msg->msg_type == LNET_MSG_GET);
+ ev->pt_index = le32_to_cpu(hdr->msg.get.ptl_index);
+ ev->match_bits = le64_to_cpu(hdr->msg.get.match_bits);
+ ev->mlength =
+ ev->rlength = le32_to_cpu(hdr->msg.get.sink_length);
+ ev->offset = le32_to_cpu(hdr->msg.get.src_offset);
+ ev->hdr_data = 0;
+ }
+ return;
+ }
+}
+
+void
+lnet_msg_commit(lnet_msg_t *msg, int cpt)
+{
+ struct lnet_msg_container *container = the_lnet.ln_msg_containers[cpt];
+ lnet_counters_t *counters = the_lnet.ln_counters[cpt];
+
+ /* routed message can be committed for both receiving and sending */
+ LASSERT(!msg->msg_tx_committed);
+
+ if (msg->msg_sending) {
+ LASSERT(!msg->msg_receiving);
+
+ msg->msg_tx_cpt = cpt;
+ msg->msg_tx_committed = 1;
+ if (msg->msg_rx_committed) { /* routed message REPLY */
+ LASSERT(msg->msg_onactivelist);
+ return;
+ }
+ } else {
+ LASSERT(!msg->msg_sending);
+ msg->msg_rx_cpt = cpt;
+ msg->msg_rx_committed = 1;
+ }
+
+ LASSERT(!msg->msg_onactivelist);
+ msg->msg_onactivelist = 1;
+ list_add(&msg->msg_activelist, &container->msc_active);
+
+ counters->msgs_alloc++;
+ if (counters->msgs_alloc > counters->msgs_max)
+ counters->msgs_max = counters->msgs_alloc;
+}
+
+static void
+lnet_msg_decommit_tx(lnet_msg_t *msg, int status)
+{
+ lnet_counters_t *counters;
+ lnet_event_t *ev = &msg->msg_ev;
+
+ LASSERT(msg->msg_tx_committed);
+ if (status != 0)
+ goto out;
+
+ counters = the_lnet.ln_counters[msg->msg_tx_cpt];
+ switch (ev->type) {
+ default: /* routed message */
+ LASSERT(msg->msg_routing);
+ LASSERT(msg->msg_rx_committed);
+ LASSERT(ev->type == 0);
+
+ counters->route_length += msg->msg_len;
+ counters->route_count++;
+ goto out;
+
+ case LNET_EVENT_PUT:
+ /* should have been decommitted */
+ LASSERT(!msg->msg_rx_committed);
+ /* overwritten while sending ACK */
+ LASSERT(msg->msg_type == LNET_MSG_ACK);
+ msg->msg_type = LNET_MSG_PUT; /* fix type */
+ break;
+
+ case LNET_EVENT_SEND:
+ LASSERT(!msg->msg_rx_committed);
+ if (msg->msg_type == LNET_MSG_PUT)
+ counters->send_length += msg->msg_len;
+ break;
+
+ case LNET_EVENT_GET:
+ LASSERT(msg->msg_rx_committed);
+ /* overwritten while sending reply, we should never be
+ * here for optimized GET */
+ LASSERT(msg->msg_type == LNET_MSG_REPLY);
+ msg->msg_type = LNET_MSG_GET; /* fix type */
+ break;
+ }
+
+ counters->send_count++;
+ out:
+ lnet_return_tx_credits_locked(msg);
+ msg->msg_tx_committed = 0;
+}
+
+static void
+lnet_msg_decommit_rx(lnet_msg_t *msg, int status)
+{
+ lnet_counters_t *counters;
+ lnet_event_t *ev = &msg->msg_ev;
+
+ LASSERT(!msg->msg_tx_committed); /* decommitted or never committed */
+ LASSERT(msg->msg_rx_committed);
+
+ if (status != 0)
+ goto out;
+
+ counters = the_lnet.ln_counters[msg->msg_rx_cpt];
+ switch (ev->type) {
+ default:
+ LASSERT(ev->type == 0);
+ LASSERT(msg->msg_routing);
+ goto out;
+
+ case LNET_EVENT_ACK:
+ LASSERT(msg->msg_type == LNET_MSG_ACK);
+ break;
+
+ case LNET_EVENT_GET:
+ /* type is "REPLY" if it's an optimized GET on passive side,
+ * because optimized GET will never be committed for sending,
+ * so message type wouldn't be changed back to "GET" by
+ * lnet_msg_decommit_tx(), see details in lnet_parse_get() */
+ LASSERT(msg->msg_type == LNET_MSG_REPLY ||
+ msg->msg_type == LNET_MSG_GET);
+ counters->send_length += msg->msg_wanted;
+ break;
+
+ case LNET_EVENT_PUT:
+ LASSERT(msg->msg_type == LNET_MSG_PUT);
+ break;
+
+ case LNET_EVENT_REPLY:
+ /* type is "GET" if it's an optimized GET on active side,
+ * see details in lnet_create_reply_msg() */
+ LASSERT(msg->msg_type == LNET_MSG_GET ||
+ msg->msg_type == LNET_MSG_REPLY);
+ break;
+ }
+
+ counters->recv_count++;
+ if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY)
+ counters->recv_length += msg->msg_wanted;
+
+ out:
+ lnet_return_rx_credits_locked(msg);
+ msg->msg_rx_committed = 0;
+}
+
+void
+lnet_msg_decommit(lnet_msg_t *msg, int cpt, int status)
+{
+ int cpt2 = cpt;
+
+ LASSERT(msg->msg_tx_committed || msg->msg_rx_committed);
+ LASSERT(msg->msg_onactivelist);
+
+ if (msg->msg_tx_committed) { /* always decommit for sending first */
+ LASSERT(cpt == msg->msg_tx_cpt);
+ lnet_msg_decommit_tx(msg, status);
+ }
+
+ if (msg->msg_rx_committed) {
+ /* forwarding msg committed for both receiving and sending */
+ if (cpt != msg->msg_rx_cpt) {
+ lnet_net_unlock(cpt);
+ cpt2 = msg->msg_rx_cpt;
+ lnet_net_lock(cpt2);
+ }
+ lnet_msg_decommit_rx(msg, status);
+ }
+
+ list_del(&msg->msg_activelist);
+ msg->msg_onactivelist = 0;
+
+ the_lnet.ln_counters[cpt2]->msgs_alloc--;
+
+ if (cpt2 != cpt) {
+ lnet_net_unlock(cpt2);
+ lnet_net_lock(cpt);
+ }
+}
+
+void
+lnet_msg_attach_md(lnet_msg_t *msg, lnet_libmd_t *md,
+ unsigned int offset, unsigned int mlen)
+{
+ /* NB: @offset and @len are only useful for receiving */
+ /* Here, we attach the MD on lnet_msg and mark it busy and
+ * decrementing its threshold. Come what may, the lnet_msg "owns"
+ * the MD until a call to lnet_msg_detach_md or lnet_finalize()
+ * signals completion. */
+ LASSERT(!msg->msg_routing);
+
+ msg->msg_md = md;
+ if (msg->msg_receiving) { /* commited for receiving */
+ msg->msg_offset = offset;
+ msg->msg_wanted = mlen;
+ }
+
+ md->md_refcount++;
+ if (md->md_threshold != LNET_MD_THRESH_INF) {
+ LASSERT(md->md_threshold > 0);
+ md->md_threshold--;
+ }
+
+ /* build umd in event */
+ lnet_md2handle(&msg->msg_ev.md_handle, md);
+ lnet_md_deconstruct(md, &msg->msg_ev.md);
+}
+
+void
+lnet_msg_detach_md(lnet_msg_t *msg, int status)
+{
+ lnet_libmd_t *md = msg->msg_md;
+ int unlink;
+
+ /* Now it's safe to drop my caller's ref */
+ md->md_refcount--;
+ LASSERT(md->md_refcount >= 0);
+
+ unlink = lnet_md_unlinkable(md);
+ if (md->md_eq != NULL) {
+ msg->msg_ev.status = status;
+ msg->msg_ev.unlinked = unlink;
+ lnet_eq_enqueue_event(md->md_eq, &msg->msg_ev);
+ }
+
+ if (unlink)
+ lnet_md_unlink(md);
+
+ msg->msg_md = NULL;
+}
+
+static int
+lnet_complete_msg_locked(lnet_msg_t *msg, int cpt)
+{
+ lnet_handle_wire_t ack_wmd;
+ int rc;
+ int status = msg->msg_ev.status;
+
+ LASSERT (msg->msg_onactivelist);
+
+ if (status == 0 && msg->msg_ack) {
+ /* Only send an ACK if the PUT completed successfully */
+
+ lnet_msg_decommit(msg, cpt, 0);
+
+ msg->msg_ack = 0;
+ lnet_net_unlock(cpt);
+
+ LASSERT(msg->msg_ev.type == LNET_EVENT_PUT);
+ LASSERT(!msg->msg_routing);
+
+ ack_wmd = msg->msg_hdr.msg.put.ack_wmd;
+
+ lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0);
+
+ msg->msg_hdr.msg.ack.dst_wmd = ack_wmd;
+ msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits;
+ msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength);
+
+ /* NB: we probably want to use NID of msg::msg_from as 3rd
+ * parameter (router NID) if it's routed message */
+ rc = lnet_send(msg->msg_ev.target.nid, msg, LNET_NID_ANY);
+
+ lnet_net_lock(cpt);
+ /*
+ * NB: message is committed for sending, we should return
+ * on success because LND will finalize this message later.
+ *
+ * Also, there is possibility that message is commited for
+ * sending and also failed before delivering to LND,
+ * i.e: ENOMEM, in that case we can't fall through either
+ * because CPT for sending can be different with CPT for
+ * receiving, so we should return back to lnet_finalize()
+ * to make sure we are locking the correct partition.
+ */
+ return rc;
+
+ } else if (status == 0 && /* OK so far */
+ (msg->msg_routing && !msg->msg_sending)) {
+ /* not forwarded */
+ LASSERT(!msg->msg_receiving); /* called back recv already */
+ lnet_net_unlock(cpt);
+
+ rc = lnet_send(LNET_NID_ANY, msg, LNET_NID_ANY);
+
+ lnet_net_lock(cpt);
+ /*
+ * NB: message is committed for sending, we should return
+ * on success because LND will finalize this message later.
+ *
+ * Also, there is possibility that message is commited for
+ * sending and also failed before delivering to LND,
+ * i.e: ENOMEM, in that case we can't fall through either:
+ * - The rule is message must decommit for sending first if
+ * the it's committed for both sending and receiving
+ * - CPT for sending can be different with CPT for receiving,
+ * so we should return back to lnet_finalize() to make
+ * sure we are locking the correct partition.
+ */
+ return rc;
+ }
+
+ lnet_msg_decommit(msg, cpt, status);
+ lnet_msg_free_locked(msg);
+ return 0;
+}
+
+void
+lnet_finalize (lnet_ni_t *ni, lnet_msg_t *msg, int status)
+{
+ struct lnet_msg_container *container;
+ int my_slot;
+ int cpt;
+ int rc;
+ int i;
+
+ LASSERT (!in_interrupt ());
+
+ if (msg == NULL)
+ return;
+#if 0
+ CDEBUG(D_WARNING, "%s msg->%s Flags:%s%s%s%s%s%s%s%s%s%s%s txp %s rxp %s\n",
+ lnet_msgtyp2str(msg->msg_type), libcfs_id2str(msg->msg_target),
+ msg->msg_target_is_router ? "t" : "",
+ msg->msg_routing ? "X" : "",
+ msg->msg_ack ? "A" : "",
+ msg->msg_sending ? "S" : "",
+ msg->msg_receiving ? "R" : "",
+ msg->msg_delayed ? "d" : "",
+ msg->msg_txcredit ? "C" : "",
+ msg->msg_peertxcredit ? "c" : "",
+ msg->msg_rtrcredit ? "F" : "",
+ msg->msg_peerrtrcredit ? "f" : "",
+ msg->msg_onactivelist ? "!" : "",
+ msg->msg_txpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid),
+ msg->msg_rxpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid));
+#endif
+ msg->msg_ev.status = status;
+
+ if (msg->msg_md != NULL) {
+ cpt = lnet_cpt_of_cookie(msg->msg_md->md_lh.lh_cookie);
+
+ lnet_res_lock(cpt);
+ lnet_msg_detach_md(msg, status);
+ lnet_res_unlock(cpt);
+ }
+
+ again:
+ rc = 0;
+ if (!msg->msg_tx_committed && !msg->msg_rx_committed) {
+ /* not commited to network yet */
+ LASSERT(!msg->msg_onactivelist);
+ lnet_msg_free(msg);
+ return;
+ }
+
+ /*
+ * NB: routed message can be commited for both receiving and sending,
+ * we should finalize in LIFO order and keep counters correct.
+ * (finalize sending first then finalize receiving)
+ */
+ cpt = msg->msg_tx_committed ? msg->msg_tx_cpt : msg->msg_rx_cpt;
+ lnet_net_lock(cpt);
+
+ container = the_lnet.ln_msg_containers[cpt];
+ list_add_tail(&msg->msg_list, &container->msc_finalizing);
+
+ /* Recursion breaker. Don't complete the message here if I am (or
+ * enough other threads are) already completing messages */
+
+ my_slot = -1;
+ for (i = 0; i < container->msc_nfinalizers; i++) {
+ if (container->msc_finalizers[i] == current)
+ break;
+
+ if (my_slot < 0 && container->msc_finalizers[i] == NULL)
+ my_slot = i;
+ }
+
+ if (i < container->msc_nfinalizers || my_slot < 0) {
+ lnet_net_unlock(cpt);
+ return;
+ }
+
+ container->msc_finalizers[my_slot] = current;
+
+ while (!list_empty(&container->msc_finalizing)) {
+ msg = list_entry(container->msc_finalizing.next,
+ lnet_msg_t, msg_list);
+
+ list_del(&msg->msg_list);
+
+ /* NB drops and regains the lnet lock if it actually does
+ * anything, so my finalizing friends can chomp along too */
+ rc = lnet_complete_msg_locked(msg, cpt);
+ if (rc != 0)
+ break;
+ }
+
+ container->msc_finalizers[my_slot] = NULL;
+ lnet_net_unlock(cpt);
+
+ if (rc != 0)
+ goto again;
+}
+EXPORT_SYMBOL(lnet_finalize);
+
+void
+lnet_msg_container_cleanup(struct lnet_msg_container *container)
+{
+ int count = 0;
+
+ if (container->msc_init == 0)
+ return;
+
+ while (!list_empty(&container->msc_active)) {
+ lnet_msg_t *msg = list_entry(container->msc_active.next,
+ lnet_msg_t, msg_activelist);
+
+ LASSERT(msg->msg_onactivelist);
+ msg->msg_onactivelist = 0;
+ list_del(&msg->msg_activelist);
+ lnet_msg_free(msg);
+ count++;
+ }
+
+ if (count > 0)
+ CERROR("%d active msg on exit\n", count);
+
+ if (container->msc_finalizers != NULL) {
+ LIBCFS_FREE(container->msc_finalizers,
+ container->msc_nfinalizers *
+ sizeof(*container->msc_finalizers));
+ container->msc_finalizers = NULL;
+ }
+#ifdef LNET_USE_LIB_FREELIST
+ lnet_freelist_fini(&container->msc_freelist);
+#endif
+ container->msc_init = 0;
+}
+
+int
+lnet_msg_container_setup(struct lnet_msg_container *container, int cpt)
+{
+ int rc;
+
+ container->msc_init = 1;
+
+ INIT_LIST_HEAD(&container->msc_active);
+ INIT_LIST_HEAD(&container->msc_finalizing);
+
+#ifdef LNET_USE_LIB_FREELIST
+ memset(&container->msc_freelist, 0, sizeof(lnet_freelist_t));
+
+ rc = lnet_freelist_init(&container->msc_freelist,
+ LNET_FL_MAX_MSGS, sizeof(lnet_msg_t));
+ if (rc != 0) {
+ CERROR("Failed to init freelist for message container\n");
+ lnet_msg_container_cleanup(container);
+ return rc;
+ }
+#else
+ rc = 0;
+#endif
+ /* number of CPUs */
+ container->msc_nfinalizers = cfs_cpt_weight(lnet_cpt_table(), cpt);
+
+ LIBCFS_CPT_ALLOC(container->msc_finalizers, lnet_cpt_table(), cpt,
+ container->msc_nfinalizers *
+ sizeof(*container->msc_finalizers));
+
+ if (container->msc_finalizers == NULL) {
+ CERROR("Failed to allocate message finalizers\n");
+ lnet_msg_container_cleanup(container);
+ return -ENOMEM;
+ }
+
+ return rc;
+}
+
+void
+lnet_msg_containers_destroy(void)
+{
+ struct lnet_msg_container *container;
+ int i;
+
+ if (the_lnet.ln_msg_containers == NULL)
+ return;
+
+ cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers)
+ lnet_msg_container_cleanup(container);
+
+ cfs_percpt_free(the_lnet.ln_msg_containers);
+ the_lnet.ln_msg_containers = NULL;
+}
+
+int
+lnet_msg_containers_create(void)
+{
+ struct lnet_msg_container *container;
+ int rc;
+ int i;
+
+ the_lnet.ln_msg_containers = cfs_percpt_alloc(lnet_cpt_table(),
+ sizeof(*container));
+
+ if (the_lnet.ln_msg_containers == NULL) {
+ CERROR("Failed to allocate cpu-partition data for network\n");
+ return -ENOMEM;
+ }
+
+ cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) {
+ rc = lnet_msg_container_setup(container, i);
+ if (rc != 0) {
+ lnet_msg_containers_destroy();
+ return rc;
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-ptl.c b/drivers/staging/lustre/lnet/lnet/lib-ptl.c
new file mode 100644
index 000000000000..9b9e7d3139b0
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/lib-ptl.c
@@ -0,0 +1,938 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/lnet/lib-ptl.c
+ *
+ * portal & match routines
+ *
+ * Author: liang@whamcloud.com
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/lnet/lib-lnet.h>
+
+/* NB: add /proc interfaces in upcoming patches */
+int portal_rotor = LNET_PTL_ROTOR_HASH_RT;
+CFS_MODULE_PARM(portal_rotor, "i", int, 0644,
+ "redirect PUTs to different cpu-partitions");
+
+static int
+lnet_ptl_match_type(unsigned int index, lnet_process_id_t match_id,
+ __u64 mbits, __u64 ignore_bits)
+{
+ struct lnet_portal *ptl = the_lnet.ln_portals[index];
+ int unique;
+
+ unique = ignore_bits == 0 &&
+ match_id.nid != LNET_NID_ANY &&
+ match_id.pid != LNET_PID_ANY;
+
+ LASSERT(!lnet_ptl_is_unique(ptl) || !lnet_ptl_is_wildcard(ptl));
+
+ /* prefer to check w/o any lock */
+ if (likely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl)))
+ goto match;
+
+ /* unset, new portal */
+ lnet_ptl_lock(ptl);
+ /* check again with lock */
+ if (unlikely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) {
+ lnet_ptl_unlock(ptl);
+ goto match;
+ }
+
+ /* still not set */
+ if (unique)
+ lnet_ptl_setopt(ptl, LNET_PTL_MATCH_UNIQUE);
+ else
+ lnet_ptl_setopt(ptl, LNET_PTL_MATCH_WILDCARD);
+
+ lnet_ptl_unlock(ptl);
+
+ return 1;
+
+ match:
+ if ((lnet_ptl_is_unique(ptl) && !unique) ||
+ (lnet_ptl_is_wildcard(ptl) && unique))
+ return 0;
+ return 1;
+}
+
+static void
+lnet_ptl_enable_mt(struct lnet_portal *ptl, int cpt)
+{
+ struct lnet_match_table *mtable = ptl->ptl_mtables[cpt];
+ int i;
+
+ /* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
+ LASSERT(lnet_ptl_is_wildcard(ptl));
+
+ mtable->mt_enabled = 1;
+
+ ptl->ptl_mt_maps[ptl->ptl_mt_nmaps] = cpt;
+ for (i = ptl->ptl_mt_nmaps - 1; i >= 0; i--) {
+ LASSERT(ptl->ptl_mt_maps[i] != cpt);
+ if (ptl->ptl_mt_maps[i] < cpt)
+ break;
+
+ /* swap to order */
+ ptl->ptl_mt_maps[i + 1] = ptl->ptl_mt_maps[i];
+ ptl->ptl_mt_maps[i] = cpt;
+ }
+
+ ptl->ptl_mt_nmaps++;
+}
+
+static void
+lnet_ptl_disable_mt(struct lnet_portal *ptl, int cpt)
+{
+ struct lnet_match_table *mtable = ptl->ptl_mtables[cpt];
+ int i;
+
+ /* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
+ LASSERT(lnet_ptl_is_wildcard(ptl));
+
+ if (LNET_CPT_NUMBER == 1)
+ return; /* never disable the only match-table */
+
+ mtable->mt_enabled = 0;
+
+ LASSERT(ptl->ptl_mt_nmaps > 0 &&
+ ptl->ptl_mt_nmaps <= LNET_CPT_NUMBER);
+
+ /* remove it from mt_maps */
+ ptl->ptl_mt_nmaps--;
+ for (i = 0; i < ptl->ptl_mt_nmaps; i++) {
+ if (ptl->ptl_mt_maps[i] >= cpt) /* overwrite it */
+ ptl->ptl_mt_maps[i] = ptl->ptl_mt_maps[i + 1];
+ }
+}
+
+static int
+lnet_try_match_md(lnet_libmd_t *md,
+ struct lnet_match_info *info, struct lnet_msg *msg)
+{
+ /* ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock;
+ * lnet_match_blocked_msg() relies on this to avoid races */
+ unsigned int offset;
+ unsigned int mlength;
+ lnet_me_t *me = md->md_me;
+
+ /* MD exhausted */
+ if (lnet_md_exhausted(md))
+ return LNET_MATCHMD_NONE | LNET_MATCHMD_EXHAUSTED;
+
+ /* mismatched MD op */
+ if ((md->md_options & info->mi_opc) == 0)
+ return LNET_MATCHMD_NONE;
+
+ /* mismatched ME nid/pid? */
+ if (me->me_match_id.nid != LNET_NID_ANY &&
+ me->me_match_id.nid != info->mi_id.nid)
+ return LNET_MATCHMD_NONE;
+
+ if (me->me_match_id.pid != LNET_PID_ANY &&
+ me->me_match_id.pid != info->mi_id.pid)
+ return LNET_MATCHMD_NONE;
+
+ /* mismatched ME matchbits? */
+ if (((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits) != 0)
+ return LNET_MATCHMD_NONE;
+
+ /* Hurrah! This _is_ a match; check it out... */
+
+ if ((md->md_options & LNET_MD_MANAGE_REMOTE) == 0)
+ offset = md->md_offset;
+ else
+ offset = info->mi_roffset;
+
+ if ((md->md_options & LNET_MD_MAX_SIZE) != 0) {
+ mlength = md->md_max_size;
+ LASSERT(md->md_offset + mlength <= md->md_length);
+ } else {
+ mlength = md->md_length - offset;
+ }
+
+ if (info->mi_rlength <= mlength) { /* fits in allowed space */
+ mlength = info->mi_rlength;
+ } else if ((md->md_options & LNET_MD_TRUNCATE) == 0) {
+ /* this packet _really_ is too big */
+ CERROR("Matching packet from %s, match "LPU64
+ " length %d too big: %d left, %d allowed\n",
+ libcfs_id2str(info->mi_id), info->mi_mbits,
+ info->mi_rlength, md->md_length - offset, mlength);
+
+ return LNET_MATCHMD_DROP;
+ }
+
+ /* Commit to this ME/MD */
+ CDEBUG(D_NET, "Incoming %s index %x from %s of "
+ "length %d/%d into md "LPX64" [%d] + %d\n",
+ (info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get",
+ info->mi_portal, libcfs_id2str(info->mi_id), mlength,
+ info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset);
+
+ lnet_msg_attach_md(msg, md, offset, mlength);
+ md->md_offset = offset + mlength;
+
+ if (!lnet_md_exhausted(md))
+ return LNET_MATCHMD_OK;
+
+ /* Auto-unlink NOW, so the ME gets unlinked if required.
+ * We bumped md->md_refcount above so the MD just gets flagged
+ * for unlink when it is finalized. */
+ if ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0)
+ lnet_md_unlink(md);
+
+ return LNET_MATCHMD_OK | LNET_MATCHMD_EXHAUSTED;
+}
+
+static struct lnet_match_table *
+lnet_match2mt(struct lnet_portal *ptl, lnet_process_id_t id, __u64 mbits)
+{
+ if (LNET_CPT_NUMBER == 1)
+ return ptl->ptl_mtables[0]; /* the only one */
+
+ /* if it's a unique portal, return match-table hashed by NID */
+ return lnet_ptl_is_unique(ptl) ?
+ ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL;
+}
+
+struct lnet_match_table *
+lnet_mt_of_attach(unsigned int index, lnet_process_id_t id,
+ __u64 mbits, __u64 ignore_bits, lnet_ins_pos_t pos)
+{
+ struct lnet_portal *ptl;
+ struct lnet_match_table *mtable;
+
+ /* NB: called w/o lock */
+ LASSERT(index < the_lnet.ln_nportals);
+
+ if (!lnet_ptl_match_type(index, id, mbits, ignore_bits))
+ return NULL;
+
+ ptl = the_lnet.ln_portals[index];
+
+ mtable = lnet_match2mt(ptl, id, mbits);
+ if (mtable != NULL) /* unique portal or only one match-table */
+ return mtable;
+
+ /* it's a wildcard portal */
+ switch (pos) {
+ default:
+ return NULL;
+ case LNET_INS_BEFORE:
+ case LNET_INS_AFTER:
+ /* posted by no affinity thread, always hash to specific
+ * match-table to avoid buffer stealing which is heavy */
+ return ptl->ptl_mtables[ptl->ptl_index % LNET_CPT_NUMBER];
+ case LNET_INS_LOCAL:
+ /* posted by cpu-affinity thread */
+ return ptl->ptl_mtables[lnet_cpt_current()];
+ }
+}
+
+static struct lnet_match_table *
+lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg)
+{
+ struct lnet_match_table *mtable;
+ struct lnet_portal *ptl;
+ int nmaps;
+ int rotor;
+ int routed;
+ int cpt;
+
+ /* NB: called w/o lock */
+ LASSERT(info->mi_portal < the_lnet.ln_nportals);
+ ptl = the_lnet.ln_portals[info->mi_portal];
+
+ LASSERT(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl));
+
+ mtable = lnet_match2mt(ptl, info->mi_id, info->mi_mbits);
+ if (mtable != NULL)
+ return mtable;
+
+ /* it's a wildcard portal */
+ routed = LNET_NIDNET(msg->msg_hdr.src_nid) !=
+ LNET_NIDNET(msg->msg_hdr.dest_nid);
+
+ if (portal_rotor == LNET_PTL_ROTOR_OFF ||
+ (portal_rotor != LNET_PTL_ROTOR_ON && !routed)) {
+ cpt = lnet_cpt_current();
+ if (ptl->ptl_mtables[cpt]->mt_enabled)
+ return ptl->ptl_mtables[cpt];
+ }
+
+ rotor = ptl->ptl_rotor++; /* get round-robin factor */
+ if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
+ cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid);
+ else
+ cpt = rotor % LNET_CPT_NUMBER;
+
+ if (!ptl->ptl_mtables[cpt]->mt_enabled) {
+ /* is there any active entry for this portal? */
+ nmaps = ptl->ptl_mt_nmaps;
+ /* map to an active mtable to avoid heavy "stealing" */
+ if (nmaps != 0) {
+ /* NB: there is possibility that ptl_mt_maps is being
+ * changed because we are not under protection of
+ * lnet_ptl_lock, but it shouldn't hurt anything */
+ cpt = ptl->ptl_mt_maps[rotor % nmaps];
+ }
+ }
+
+ return ptl->ptl_mtables[cpt];
+}
+
+static int
+lnet_mt_test_exhausted(struct lnet_match_table *mtable, int pos)
+{
+ __u64 *bmap;
+ int i;
+
+ if (!lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
+ return 0;
+
+ if (pos < 0) { /* check all bits */
+ for (i = 0; i < LNET_MT_EXHAUSTED_BMAP; i++) {
+ if (mtable->mt_exhausted[i] != (__u64)(-1))
+ return 0;
+ }
+ return 1;
+ }
+
+ LASSERT(pos <= LNET_MT_HASH_IGNORE);
+ /* mtable::mt_mhash[pos] is marked as exhausted or not */
+ bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
+ pos &= (1 << LNET_MT_BITS_U64) - 1;
+
+ return ((*bmap) & (1ULL << pos)) != 0;
+}
+
+static void
+lnet_mt_set_exhausted(struct lnet_match_table *mtable, int pos, int exhausted)
+{
+ __u64 *bmap;
+
+ LASSERT(lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]));
+ LASSERT(pos <= LNET_MT_HASH_IGNORE);
+
+ /* set mtable::mt_mhash[pos] as exhausted/non-exhausted */
+ bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
+ pos &= (1 << LNET_MT_BITS_U64) - 1;
+
+ if (!exhausted)
+ *bmap &= ~(1ULL << pos);
+ else
+ *bmap |= 1ULL << pos;
+}
+
+struct list_head *
+lnet_mt_match_head(struct lnet_match_table *mtable,
+ lnet_process_id_t id, __u64 mbits)
+{
+ struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal];
+
+ if (lnet_ptl_is_wildcard(ptl)) {
+ return &mtable->mt_mhash[mbits & LNET_MT_HASH_MASK];
+ } else {
+ unsigned long hash = mbits + id.nid + id.pid;
+
+ LASSERT(lnet_ptl_is_unique(ptl));
+ hash = cfs_hash_long(hash, LNET_MT_HASH_BITS);
+ return &mtable->mt_mhash[hash];
+ }
+}
+
+int
+lnet_mt_match_md(struct lnet_match_table *mtable,
+ struct lnet_match_info *info, struct lnet_msg *msg)
+{
+ struct list_head *head;
+ lnet_me_t *me;
+ lnet_me_t *tmp;
+ int exhausted = 0;
+ int rc;
+
+ /* any ME with ignore bits? */
+ if (!list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE]))
+ head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
+ else
+ head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
+ again:
+ /* NB: only wildcard portal needs to return LNET_MATCHMD_EXHAUSTED */
+ if (lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
+ exhausted = LNET_MATCHMD_EXHAUSTED;
+
+ list_for_each_entry_safe(me, tmp, head, me_list) {
+ /* ME attached but MD not attached yet */
+ if (me->me_md == NULL)
+ continue;
+
+ LASSERT(me == me->me_md->md_me);
+
+ rc = lnet_try_match_md(me->me_md, info, msg);
+ if ((rc & LNET_MATCHMD_EXHAUSTED) == 0)
+ exhausted = 0; /* mlist is not empty */
+
+ if ((rc & LNET_MATCHMD_FINISH) != 0) {
+ /* don't return EXHAUSTED bit because we don't know
+ * whether the mlist is empty or not */
+ return rc & ~LNET_MATCHMD_EXHAUSTED;
+ }
+ }
+
+ if (exhausted == LNET_MATCHMD_EXHAUSTED) { /* @head is exhausted */
+ lnet_mt_set_exhausted(mtable, head - mtable->mt_mhash, 1);
+ if (!lnet_mt_test_exhausted(mtable, -1))
+ exhausted = 0;
+ }
+
+ if (exhausted == 0 && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) {
+ head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
+ goto again; /* re-check MEs w/o ignore-bits */
+ }
+
+ if (info->mi_opc == LNET_MD_OP_GET ||
+ !lnet_ptl_is_lazy(the_lnet.ln_portals[info->mi_portal]))
+ return LNET_MATCHMD_DROP | exhausted;
+
+ return LNET_MATCHMD_NONE | exhausted;
+}
+
+static int
+lnet_ptl_match_early(struct lnet_portal *ptl, struct lnet_msg *msg)
+{
+ int rc;
+
+ /* message arrived before any buffer posting on this portal,
+ * simply delay or drop this message */
+ if (likely(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)))
+ return 0;
+
+ lnet_ptl_lock(ptl);
+ /* check it again with hold of lock */
+ if (lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)) {
+ lnet_ptl_unlock(ptl);
+ return 0;
+ }
+
+ if (lnet_ptl_is_lazy(ptl)) {
+ if (msg->msg_rx_ready_delay) {
+ msg->msg_rx_delayed = 1;
+ list_add_tail(&msg->msg_list,
+ &ptl->ptl_msg_delayed);
+ }
+ rc = LNET_MATCHMD_NONE;
+ } else {
+ rc = LNET_MATCHMD_DROP;
+ }
+
+ lnet_ptl_unlock(ptl);
+ return rc;
+}
+
+static int
+lnet_ptl_match_delay(struct lnet_portal *ptl,
+ struct lnet_match_info *info, struct lnet_msg *msg)
+{
+ int first = ptl->ptl_mt_maps[0]; /* read w/o lock */
+ int rc = 0;
+ int i;
+
+ /* steal buffer from other CPTs, and delay it if nothing to steal,
+ * this function is more expensive than a regular match, but we
+ * don't expect it can happen a lot */
+ LASSERT(lnet_ptl_is_wildcard(ptl));
+
+ for (i = 0; i < LNET_CPT_NUMBER; i++) {
+ struct lnet_match_table *mtable;
+ int cpt;
+
+ cpt = (first + i) % LNET_CPT_NUMBER;
+ mtable = ptl->ptl_mtables[cpt];
+ if (i != 0 && i != LNET_CPT_NUMBER - 1 && !mtable->mt_enabled)
+ continue;
+
+ lnet_res_lock(cpt);
+ lnet_ptl_lock(ptl);
+
+ if (i == 0) { /* the first try, attach on stealing list */
+ list_add_tail(&msg->msg_list,
+ &ptl->ptl_msg_stealing);
+ }
+
+ if (!list_empty(&msg->msg_list)) { /* on stealing list */
+ rc = lnet_mt_match_md(mtable, info, msg);
+
+ if ((rc & LNET_MATCHMD_EXHAUSTED) != 0 &&
+ mtable->mt_enabled)
+ lnet_ptl_disable_mt(ptl, cpt);
+
+ if ((rc & LNET_MATCHMD_FINISH) != 0)
+ list_del_init(&msg->msg_list);
+
+ } else {
+ /* could be matched by lnet_ptl_attach_md()
+ * which is called by another thread */
+ rc = msg->msg_md == NULL ?
+ LNET_MATCHMD_DROP : LNET_MATCHMD_OK;
+ }
+
+ if (!list_empty(&msg->msg_list) && /* not matched yet */
+ (i == LNET_CPT_NUMBER - 1 || /* the last CPT */
+ ptl->ptl_mt_nmaps == 0 || /* no active CPT */
+ (ptl->ptl_mt_nmaps == 1 && /* the only active CPT */
+ ptl->ptl_mt_maps[0] == cpt))) {
+ /* nothing to steal, delay or drop */
+ list_del_init(&msg->msg_list);
+
+ if (lnet_ptl_is_lazy(ptl)) {
+ msg->msg_rx_delayed = 1;
+ list_add_tail(&msg->msg_list,
+ &ptl->ptl_msg_delayed);
+ rc = LNET_MATCHMD_NONE;
+ } else {
+ rc = LNET_MATCHMD_DROP;
+ }
+ }
+
+ lnet_ptl_unlock(ptl);
+ lnet_res_unlock(cpt);
+
+ if ((rc & LNET_MATCHMD_FINISH) != 0 || msg->msg_rx_delayed)
+ break;
+ }
+
+ return rc;
+}
+
+int
+lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg)
+{
+ struct lnet_match_table *mtable;
+ struct lnet_portal *ptl;
+ int rc;
+
+ CDEBUG(D_NET, "Request from %s of length %d into portal %d "
+ "MB="LPX64"\n", libcfs_id2str(info->mi_id),
+ info->mi_rlength, info->mi_portal, info->mi_mbits);
+
+ if (info->mi_portal >= the_lnet.ln_nportals) {
+ CERROR("Invalid portal %d not in [0-%d]\n",
+ info->mi_portal, the_lnet.ln_nportals);
+ return LNET_MATCHMD_DROP;
+ }
+
+ ptl = the_lnet.ln_portals[info->mi_portal];
+ rc = lnet_ptl_match_early(ptl, msg);
+ if (rc != 0) /* matched or delayed early message */
+ return rc;
+
+ mtable = lnet_mt_of_match(info, msg);
+ lnet_res_lock(mtable->mt_cpt);
+
+ if (the_lnet.ln_shutdown) {
+ rc = LNET_MATCHMD_DROP;
+ goto out1;
+ }
+
+ rc = lnet_mt_match_md(mtable, info, msg);
+ if ((rc & LNET_MATCHMD_EXHAUSTED) != 0 && mtable->mt_enabled) {
+ lnet_ptl_lock(ptl);
+ lnet_ptl_disable_mt(ptl, mtable->mt_cpt);
+ lnet_ptl_unlock(ptl);
+ }
+
+ if ((rc & LNET_MATCHMD_FINISH) != 0) /* matched or dropping */
+ goto out1;
+
+ if (!msg->msg_rx_ready_delay)
+ goto out1;
+
+ LASSERT(lnet_ptl_is_lazy(ptl));
+ LASSERT(!msg->msg_rx_delayed);
+
+ /* NB: we don't expect "delay" can happen a lot */
+ if (lnet_ptl_is_unique(ptl) || LNET_CPT_NUMBER == 1) {
+ lnet_ptl_lock(ptl);
+
+ msg->msg_rx_delayed = 1;
+ list_add_tail(&msg->msg_list, &ptl->ptl_msg_delayed);
+
+ lnet_ptl_unlock(ptl);
+ lnet_res_unlock(mtable->mt_cpt);
+
+ } else {
+ lnet_res_unlock(mtable->mt_cpt);
+ rc = lnet_ptl_match_delay(ptl, info, msg);
+ }
+
+ if (msg->msg_rx_delayed) {
+ CDEBUG(D_NET,
+ "Delaying %s from %s ptl %d MB "LPX64" off %d len %d\n",
+ info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET",
+ libcfs_id2str(info->mi_id), info->mi_portal,
+ info->mi_mbits, info->mi_roffset, info->mi_rlength);
+ }
+ goto out0;
+ out1:
+ lnet_res_unlock(mtable->mt_cpt);
+ out0:
+ /* EXHAUSTED bit is only meaningful for internal functions */
+ return rc & ~LNET_MATCHMD_EXHAUSTED;
+}
+
+void
+lnet_ptl_detach_md(lnet_me_t *me, lnet_libmd_t *md)
+{
+ LASSERT(me->me_md == md && md->md_me == me);
+
+ me->me_md = NULL;
+ md->md_me = NULL;
+}
+
+/* called with lnet_res_lock held */
+void
+lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
+ struct list_head *matches, struct list_head *drops)
+{
+ struct lnet_portal *ptl = the_lnet.ln_portals[me->me_portal];
+ struct lnet_match_table *mtable;
+ struct list_head *head;
+ lnet_msg_t *tmp;
+ lnet_msg_t *msg;
+ int exhausted = 0;
+ int cpt;
+
+ LASSERT(md->md_refcount == 0); /* a brand new MD */
+
+ me->me_md = md;
+ md->md_me = me;
+
+ cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
+ mtable = ptl->ptl_mtables[cpt];
+
+ if (list_empty(&ptl->ptl_msg_stealing) &&
+ list_empty(&ptl->ptl_msg_delayed) &&
+ !lnet_mt_test_exhausted(mtable, me->me_pos))
+ return;
+
+ lnet_ptl_lock(ptl);
+ head = &ptl->ptl_msg_stealing;
+ again:
+ list_for_each_entry_safe(msg, tmp, head, msg_list) {
+ struct lnet_match_info info;
+ lnet_hdr_t *hdr;
+ int rc;
+
+ LASSERT(msg->msg_rx_delayed || head == &ptl->ptl_msg_stealing);
+
+ hdr = &msg->msg_hdr;
+ info.mi_id.nid = hdr->src_nid;
+ info.mi_id.pid = hdr->src_pid;
+ info.mi_opc = LNET_MD_OP_PUT;
+ info.mi_portal = hdr->msg.put.ptl_index;
+ info.mi_rlength = hdr->payload_length;
+ info.mi_roffset = hdr->msg.put.offset;
+ info.mi_mbits = hdr->msg.put.match_bits;
+
+ rc = lnet_try_match_md(md, &info, msg);
+
+ exhausted = (rc & LNET_MATCHMD_EXHAUSTED) != 0;
+ if ((rc & LNET_MATCHMD_NONE) != 0) {
+ if (exhausted)
+ break;
+ continue;
+ }
+
+ /* Hurrah! This _is_ a match */
+ LASSERT((rc & LNET_MATCHMD_FINISH) != 0);
+ list_del_init(&msg->msg_list);
+
+ if (head == &ptl->ptl_msg_stealing) {
+ if (exhausted)
+ break;
+ /* stealing thread will handle the message */
+ continue;
+ }
+
+ if ((rc & LNET_MATCHMD_OK) != 0) {
+ list_add_tail(&msg->msg_list, matches);
+
+ CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
+ "match "LPU64" offset %d length %d.\n",
+ libcfs_id2str(info.mi_id),
+ info.mi_portal, info.mi_mbits,
+ info.mi_roffset, info.mi_rlength);
+ } else {
+ list_add_tail(&msg->msg_list, drops);
+ }
+
+ if (exhausted)
+ break;
+ }
+
+ if (!exhausted && head == &ptl->ptl_msg_stealing) {
+ head = &ptl->ptl_msg_delayed;
+ goto again;
+ }
+
+ if (lnet_ptl_is_wildcard(ptl) && !exhausted) {
+ lnet_mt_set_exhausted(mtable, me->me_pos, 0);
+ if (!mtable->mt_enabled)
+ lnet_ptl_enable_mt(ptl, cpt);
+ }
+
+ lnet_ptl_unlock(ptl);
+}
+
+void
+lnet_ptl_cleanup(struct lnet_portal *ptl)
+{
+ struct lnet_match_table *mtable;
+ int i;
+
+ if (ptl->ptl_mtables == NULL) /* uninitialized portal */
+ return;
+
+ LASSERT(list_empty(&ptl->ptl_msg_delayed));
+ LASSERT(list_empty(&ptl->ptl_msg_stealing));
+ cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
+ struct list_head *mhash;
+ lnet_me_t *me;
+ int j;
+
+ if (mtable->mt_mhash == NULL) /* uninitialized match-table */
+ continue;
+
+ mhash = mtable->mt_mhash;
+ /* cleanup ME */
+ for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) {
+ while (!list_empty(&mhash[j])) {
+ me = list_entry(mhash[j].next,
+ lnet_me_t, me_list);
+ CERROR("Active ME %p on exit\n", me);
+ list_del(&me->me_list);
+ lnet_me_free(me);
+ }
+ }
+ /* the extra entry is for MEs with ignore bits */
+ LIBCFS_FREE(mhash, sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1));
+ }
+
+ cfs_percpt_free(ptl->ptl_mtables);
+ ptl->ptl_mtables = NULL;
+}
+
+int
+lnet_ptl_setup(struct lnet_portal *ptl, int index)
+{
+ struct lnet_match_table *mtable;
+ struct list_head *mhash;
+ int i;
+ int j;
+
+ ptl->ptl_mtables = cfs_percpt_alloc(lnet_cpt_table(),
+ sizeof(struct lnet_match_table));
+ if (ptl->ptl_mtables == NULL) {
+ CERROR("Failed to create match table for portal %d\n", index);
+ return -ENOMEM;
+ }
+
+ ptl->ptl_index = index;
+ INIT_LIST_HEAD(&ptl->ptl_msg_delayed);
+ INIT_LIST_HEAD(&ptl->ptl_msg_stealing);
+ spin_lock_init(&ptl->ptl_lock);
+ cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
+ /* the extra entry is for MEs with ignore bits */
+ LIBCFS_CPT_ALLOC(mhash, lnet_cpt_table(), i,
+ sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1));
+ if (mhash == NULL) {
+ CERROR("Failed to create match hash for portal %d\n",
+ index);
+ goto failed;
+ }
+
+ memset(&mtable->mt_exhausted[0], -1,
+ sizeof(mtable->mt_exhausted[0]) *
+ LNET_MT_EXHAUSTED_BMAP);
+ mtable->mt_mhash = mhash;
+ for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++)
+ INIT_LIST_HEAD(&mhash[j]);
+
+ mtable->mt_portal = index;
+ mtable->mt_cpt = i;
+ }
+
+ return 0;
+ failed:
+ lnet_ptl_cleanup(ptl);
+ return -ENOMEM;
+}
+
+void
+lnet_portals_destroy(void)
+{
+ int i;
+
+ if (the_lnet.ln_portals == NULL)
+ return;
+
+ for (i = 0; i < the_lnet.ln_nportals; i++)
+ lnet_ptl_cleanup(the_lnet.ln_portals[i]);
+
+ cfs_array_free(the_lnet.ln_portals);
+ the_lnet.ln_portals = NULL;
+}
+
+int
+lnet_portals_create(void)
+{
+ int size;
+ int i;
+
+ size = offsetof(struct lnet_portal, ptl_mt_maps[LNET_CPT_NUMBER]);
+
+ the_lnet.ln_nportals = MAX_PORTALS;
+ the_lnet.ln_portals = cfs_array_alloc(the_lnet.ln_nportals, size);
+ if (the_lnet.ln_portals == NULL) {
+ CERROR("Failed to allocate portals table\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < the_lnet.ln_nportals; i++) {
+ if (lnet_ptl_setup(the_lnet.ln_portals[i], i)) {
+ lnet_portals_destroy();
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Turn on the lazy portal attribute. Use with caution!
+ *
+ * This portal attribute only affects incoming PUT requests to the portal,
+ * and is off by default. By default, if there's no matching MD for an
+ * incoming PUT request, it is simply dropped. With the lazy attribute on,
+ * such requests are queued indefinitely until either a matching MD is
+ * posted to the portal or the lazy attribute is turned off.
+ *
+ * It would prevent dropped requests, however it should be regarded as the
+ * last line of defense - i.e. users must keep a close watch on active
+ * buffers on a lazy portal and once it becomes too low post more buffers as
+ * soon as possible. This is because delayed requests usually have detrimental
+ * effects on underlying network connections. A few delayed requests often
+ * suffice to bring an underlying connection to a complete halt, due to flow
+ * control mechanisms.
+ *
+ * There's also a DOS attack risk. If users don't post match-all MDs on a
+ * lazy portal, a malicious peer can easily stop a service by sending some
+ * PUT requests with match bits that won't match any MD. A routed server is
+ * especially vulnerable since the connections to its neighbor routers are
+ * shared among all clients.
+ *
+ * \param portal Index of the portal to enable the lazy attribute on.
+ *
+ * \retval 0 On success.
+ * \retval -EINVAL If \a portal is not a valid index.
+ */
+int
+LNetSetLazyPortal(int portal)
+{
+ struct lnet_portal *ptl;
+
+ if (portal < 0 || portal >= the_lnet.ln_nportals)
+ return -EINVAL;
+
+ CDEBUG(D_NET, "Setting portal %d lazy\n", portal);
+ ptl = the_lnet.ln_portals[portal];
+
+ lnet_res_lock(LNET_LOCK_EX);
+ lnet_ptl_lock(ptl);
+
+ lnet_ptl_setopt(ptl, LNET_PTL_LAZY);
+
+ lnet_ptl_unlock(ptl);
+ lnet_res_unlock(LNET_LOCK_EX);
+
+ return 0;
+}
+EXPORT_SYMBOL(LNetSetLazyPortal);
+
+/**
+ * Turn off the lazy portal attribute. Delayed requests on the portal,
+ * if any, will be all dropped when this function returns.
+ *
+ * \param portal Index of the portal to disable the lazy attribute on.
+ *
+ * \retval 0 On success.
+ * \retval -EINVAL If \a portal is not a valid index.
+ */
+int
+LNetClearLazyPortal(int portal)
+{
+ struct lnet_portal *ptl;
+ LIST_HEAD (zombies);
+
+ if (portal < 0 || portal >= the_lnet.ln_nportals)
+ return -EINVAL;
+
+ ptl = the_lnet.ln_portals[portal];
+
+ lnet_res_lock(LNET_LOCK_EX);
+ lnet_ptl_lock(ptl);
+
+ if (!lnet_ptl_is_lazy(ptl)) {
+ lnet_ptl_unlock(ptl);
+ lnet_res_unlock(LNET_LOCK_EX);
+ return 0;
+ }
+
+ if (the_lnet.ln_shutdown)
+ CWARN("Active lazy portal %d on exit\n", portal);
+ else
+ CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
+
+ /* grab all the blocked messages atomically */
+ list_splice_init(&ptl->ptl_msg_delayed, &zombies);
+
+ lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
+
+ lnet_ptl_unlock(ptl);
+ lnet_res_unlock(LNET_LOCK_EX);
+
+ lnet_drop_delayed_msg_list(&zombies, "Clearing lazy portal attr");
+
+ return 0;
+}
+EXPORT_SYMBOL(LNetClearLazyPortal);
diff --git a/drivers/staging/lustre/lnet/lnet/lo.c b/drivers/staging/lustre/lnet/lnet/lo.c
new file mode 100644
index 000000000000..670dae34107c
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/lo.c
@@ -0,0 +1,120 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/lnet/lib-lnet.h>
+
+int
+lolnd_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
+{
+ LASSERT (!lntmsg->msg_routing);
+ LASSERT (!lntmsg->msg_target_is_router);
+
+ return lnet_parse(ni, &lntmsg->msg_hdr, ni->ni_nid, lntmsg, 0);
+}
+
+int
+lolnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
+ int delayed, unsigned int niov,
+ struct iovec *iov, lnet_kiov_t *kiov,
+ unsigned int offset, unsigned int mlen, unsigned int rlen)
+{
+ lnet_msg_t *sendmsg = private;
+
+ if (lntmsg != NULL) { /* not discarding */
+ if (sendmsg->msg_iov != NULL) {
+ if (iov != NULL)
+ lnet_copy_iov2iov(niov, iov, offset,
+ sendmsg->msg_niov,
+ sendmsg->msg_iov,
+ sendmsg->msg_offset, mlen);
+ else
+ lnet_copy_iov2kiov(niov, kiov, offset,
+ sendmsg->msg_niov,
+ sendmsg->msg_iov,
+ sendmsg->msg_offset, mlen);
+ } else {
+ if (iov != NULL)
+ lnet_copy_kiov2iov(niov, iov, offset,
+ sendmsg->msg_niov,
+ sendmsg->msg_kiov,
+ sendmsg->msg_offset, mlen);
+ else
+ lnet_copy_kiov2kiov(niov, kiov, offset,
+ sendmsg->msg_niov,
+ sendmsg->msg_kiov,
+ sendmsg->msg_offset, mlen);
+ }
+
+ lnet_finalize(ni, lntmsg, 0);
+ }
+
+ lnet_finalize(ni, sendmsg, 0);
+ return 0;
+}
+
+static int lolnd_instanced;
+
+void
+lolnd_shutdown(lnet_ni_t *ni)
+{
+ CDEBUG (D_NET, "shutdown\n");
+ LASSERT (lolnd_instanced);
+
+ lolnd_instanced = 0;
+}
+
+int
+lolnd_startup (lnet_ni_t *ni)
+{
+ LASSERT (ni->ni_lnd == &the_lolnd);
+ LASSERT (!lolnd_instanced);
+ lolnd_instanced = 1;
+
+ return (0);
+}
+
+lnd_t the_lolnd = {
+ /* .lnd_list = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list},
+ /* .lnd_refcount = */ 0,
+ /* .lnd_type = */ LOLND,
+ /* .lnd_startup = */ lolnd_startup,
+ /* .lnd_shutdown = */ lolnd_shutdown,
+ /* .lnt_ctl = */ NULL,
+ /* .lnd_send = */ lolnd_send,
+ /* .lnd_recv = */ lolnd_recv,
+ /* .lnd_eager_recv = */ NULL,
+ /* .lnd_notify = */ NULL,
+ /* .lnd_accept = */ NULL
+};
diff --git a/drivers/staging/lustre/lnet/lnet/module.c b/drivers/staging/lustre/lnet/lnet/module.c
new file mode 100644
index 000000000000..c8323854580a
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/module.c
@@ -0,0 +1,154 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/lnet/lib-lnet.h>
+
+static int config_on_load = 0;
+CFS_MODULE_PARM(config_on_load, "i", int, 0444,
+ "configure network at module load");
+
+static struct mutex lnet_config_mutex;
+
+int
+lnet_configure (void *arg)
+{
+ /* 'arg' only there so I can be passed to cfs_create_thread() */
+ int rc = 0;
+
+ LNET_MUTEX_LOCK(&lnet_config_mutex);
+
+ if (!the_lnet.ln_niinit_self) {
+ rc = LNetNIInit(LUSTRE_SRV_LNET_PID);
+ if (rc >= 0) {
+ the_lnet.ln_niinit_self = 1;
+ rc = 0;
+ }
+ }
+
+ LNET_MUTEX_UNLOCK(&lnet_config_mutex);
+ return rc;
+}
+
+int
+lnet_unconfigure (void)
+{
+ int refcount;
+
+ LNET_MUTEX_LOCK(&lnet_config_mutex);
+
+ if (the_lnet.ln_niinit_self) {
+ the_lnet.ln_niinit_self = 0;
+ LNetNIFini();
+ }
+
+ LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
+ refcount = the_lnet.ln_refcount;
+ LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
+
+ LNET_MUTEX_UNLOCK(&lnet_config_mutex);
+ return (refcount == 0) ? 0 : -EBUSY;
+}
+
+int
+lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_data *data)
+{
+ int rc;
+
+ switch (cmd) {
+ case IOC_LIBCFS_CONFIGURE:
+ return lnet_configure(NULL);
+
+ case IOC_LIBCFS_UNCONFIGURE:
+ return lnet_unconfigure();
+
+ default:
+ /* Passing LNET_PID_ANY only gives me a ref if the net is up
+ * already; I'll need it to ensure the net can't go down while
+ * I'm called into it */
+ rc = LNetNIInit(LNET_PID_ANY);
+ if (rc >= 0) {
+ rc = LNetCtl(cmd, data);
+ LNetNIFini();
+ }
+ return rc;
+ }
+}
+
+DECLARE_IOCTL_HANDLER(lnet_ioctl_handler, lnet_ioctl);
+
+int
+init_lnet(void)
+{
+ int rc;
+ ENTRY;
+
+ mutex_init(&lnet_config_mutex);
+
+ rc = LNetInit();
+ if (rc != 0) {
+ CERROR("LNetInit: error %d\n", rc);
+ RETURN(rc);
+ }
+
+ rc = libcfs_register_ioctl(&lnet_ioctl_handler);
+ LASSERT (rc == 0);
+
+ if (config_on_load) {
+ /* Have to schedule a separate thread to avoid deadlocking
+ * in modload */
+ (void) kthread_run(lnet_configure, NULL, "lnet_initd");
+ }
+
+ RETURN(0);
+}
+
+void
+fini_lnet(void)
+{
+ int rc;
+
+ rc = libcfs_deregister_ioctl(&lnet_ioctl_handler);
+ LASSERT (rc == 0);
+
+ LNetFini();
+}
+
+MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
+MODULE_DESCRIPTION("Portals v3.1");
+MODULE_LICENSE("GPL");
+
+cfs_module(lnet, "1.0.0", init_lnet, fini_lnet);
diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c
new file mode 100644
index 000000000000..286977691393
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/peer.c
@@ -0,0 +1,337 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/lnet/peer.c
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/lnet/lib-lnet.h>
+
+int
+lnet_peer_tables_create(void)
+{
+ struct lnet_peer_table *ptable;
+ struct list_head *hash;
+ int i;
+ int j;
+
+ the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
+ sizeof(*ptable));
+ if (the_lnet.ln_peer_tables == NULL) {
+ CERROR("Failed to allocate cpu-partition peer tables\n");
+ return -ENOMEM;
+ }
+
+ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
+ INIT_LIST_HEAD(&ptable->pt_deathrow);
+
+ LIBCFS_CPT_ALLOC(hash, lnet_cpt_table(), i,
+ LNET_PEER_HASH_SIZE * sizeof(*hash));
+ if (hash == NULL) {
+ CERROR("Failed to create peer hash table\n");
+ lnet_peer_tables_destroy();
+ return -ENOMEM;
+ }
+
+ for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
+ INIT_LIST_HEAD(&hash[j]);
+ ptable->pt_hash = hash; /* sign of initialization */
+ }
+
+ return 0;
+}
+
+void
+lnet_peer_tables_destroy(void)
+{
+ struct lnet_peer_table *ptable;
+ struct list_head *hash;
+ int i;
+ int j;
+
+ if (the_lnet.ln_peer_tables == NULL)
+ return;
+
+ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
+ hash = ptable->pt_hash;
+ if (hash == NULL) /* not intialized */
+ break;
+
+ LASSERT(list_empty(&ptable->pt_deathrow));
+
+ ptable->pt_hash = NULL;
+ for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
+ LASSERT(list_empty(&hash[j]));
+
+ LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash));
+ }
+
+ cfs_percpt_free(the_lnet.ln_peer_tables);
+ the_lnet.ln_peer_tables = NULL;
+}
+
+void
+lnet_peer_tables_cleanup(void)
+{
+ struct lnet_peer_table *ptable;
+ int i;
+ int j;
+
+ LASSERT(the_lnet.ln_shutdown); /* i.e. no new peers */
+
+ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
+ lnet_net_lock(i);
+
+ for (j = 0; j < LNET_PEER_HASH_SIZE; j++) {
+ struct list_head *peers = &ptable->pt_hash[j];
+
+ while (!list_empty(peers)) {
+ lnet_peer_t *lp = list_entry(peers->next,
+ lnet_peer_t,
+ lp_hashlist);
+ list_del_init(&lp->lp_hashlist);
+ /* lose hash table's ref */
+ lnet_peer_decref_locked(lp);
+ }
+ }
+
+ lnet_net_unlock(i);
+ }
+
+ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
+ LIST_HEAD (deathrow);
+ lnet_peer_t *lp;
+
+ lnet_net_lock(i);
+
+ for (j = 3; ptable->pt_number != 0; j++) {
+ lnet_net_unlock(i);
+
+ if ((j & (j - 1)) == 0) {
+ CDEBUG(D_WARNING,
+ "Waiting for %d peers on peer table\n",
+ ptable->pt_number);
+ }
+ cfs_pause(cfs_time_seconds(1) / 2);
+ lnet_net_lock(i);
+ }
+ list_splice_init(&ptable->pt_deathrow, &deathrow);
+
+ lnet_net_unlock(i);
+
+ while (!list_empty(&deathrow)) {
+ lp = list_entry(deathrow.next,
+ lnet_peer_t, lp_hashlist);
+ list_del(&lp->lp_hashlist);
+ LIBCFS_FREE(lp, sizeof(*lp));
+ }
+ }
+}
+
+void
+lnet_destroy_peer_locked(lnet_peer_t *lp)
+{
+ struct lnet_peer_table *ptable;
+
+ LASSERT(lp->lp_refcount == 0);
+ LASSERT(lp->lp_rtr_refcount == 0);
+ LASSERT(list_empty(&lp->lp_txq));
+ LASSERT(list_empty(&lp->lp_hashlist));
+ LASSERT(lp->lp_txqnob == 0);
+
+ ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
+ LASSERT(ptable->pt_number > 0);
+ ptable->pt_number--;
+
+ lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
+ lp->lp_ni = NULL;
+
+ list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
+}
+
+lnet_peer_t *
+lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
+{
+ struct list_head *peers;
+ lnet_peer_t *lp;
+
+ LASSERT(!the_lnet.ln_shutdown);
+
+ peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
+ list_for_each_entry(lp, peers, lp_hashlist) {
+ if (lp->lp_nid == nid) {
+ lnet_peer_addref_locked(lp);
+ return lp;
+ }
+ }
+
+ return NULL;
+}
+
+int
+lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt)
+{
+ struct lnet_peer_table *ptable;
+ lnet_peer_t *lp = NULL;
+ lnet_peer_t *lp2;
+ int cpt2;
+ int rc = 0;
+
+ *lpp = NULL;
+ if (the_lnet.ln_shutdown) /* it's shutting down */
+ return -ESHUTDOWN;
+
+ /* cpt can be LNET_LOCK_EX if it's called from router functions */
+ cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
+
+ ptable = the_lnet.ln_peer_tables[cpt2];
+ lp = lnet_find_peer_locked(ptable, nid);
+ if (lp != NULL) {
+ *lpp = lp;
+ return 0;
+ }
+
+ if (!list_empty(&ptable->pt_deathrow)) {
+ lp = list_entry(ptable->pt_deathrow.next,
+ lnet_peer_t, lp_hashlist);
+ list_del(&lp->lp_hashlist);
+ }
+
+ /*
+ * take extra refcount in case another thread has shutdown LNet
+ * and destroyed locks and peer-table before I finish the allocation
+ */
+ ptable->pt_number++;
+ lnet_net_unlock(cpt);
+
+ if (lp != NULL)
+ memset(lp, 0, sizeof(*lp));
+ else
+ LIBCFS_CPT_ALLOC(lp, lnet_cpt_table(), cpt2, sizeof(*lp));
+
+ if (lp == NULL) {
+ rc = -ENOMEM;
+ lnet_net_lock(cpt);
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&lp->lp_txq);
+ INIT_LIST_HEAD(&lp->lp_rtrq);
+ INIT_LIST_HEAD(&lp->lp_routes);
+
+ lp->lp_notify = 0;
+ lp->lp_notifylnd = 0;
+ lp->lp_notifying = 0;
+ lp->lp_alive_count = 0;
+ lp->lp_timestamp = 0;
+ lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
+ lp->lp_last_alive = cfs_time_current(); /* assumes alive */
+ lp->lp_last_query = 0; /* haven't asked NI yet */
+ lp->lp_ping_timestamp = 0;
+ lp->lp_ping_feats = LNET_PING_FEAT_INVAL;
+ lp->lp_nid = nid;
+ lp->lp_cpt = cpt2;
+ lp->lp_refcount = 2; /* 1 for caller; 1 for hash */
+ lp->lp_rtr_refcount = 0;
+
+ lnet_net_lock(cpt);
+
+ if (the_lnet.ln_shutdown) {
+ rc = -ESHUTDOWN;
+ goto out;
+ }
+
+ lp2 = lnet_find_peer_locked(ptable, nid);
+ if (lp2 != NULL) {
+ *lpp = lp2;
+ goto out;
+ }
+
+ lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
+ if (lp->lp_ni == NULL) {
+ rc = -EHOSTUNREACH;
+ goto out;
+ }
+
+ lp->lp_txcredits =
+ lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
+ lp->lp_rtrcredits =
+ lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
+
+ list_add_tail(&lp->lp_hashlist,
+ &ptable->pt_hash[lnet_nid2peerhash(nid)]);
+ ptable->pt_version++;
+ *lpp = lp;
+
+ return 0;
+out:
+ if (lp != NULL)
+ list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
+ ptable->pt_number--;
+ return rc;
+}
+
+void
+lnet_debug_peer(lnet_nid_t nid)
+{
+ char *aliveness = "NA";
+ lnet_peer_t *lp;
+ int rc;
+ int cpt;
+
+ cpt = lnet_cpt_of_nid(nid);
+ lnet_net_lock(cpt);
+
+ rc = lnet_nid2peer_locked(&lp, nid, cpt);
+ if (rc != 0) {
+ lnet_net_unlock(cpt);
+ CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
+ return;
+ }
+
+ if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
+ aliveness = lp->lp_alive ? "up" : "down";
+
+ CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
+ libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
+ aliveness, lp->lp_ni->ni_peertxcredits,
+ lp->lp_rtrcredits, lp->lp_minrtrcredits,
+ lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
+
+ lnet_peer_decref_locked(lp);
+
+ lnet_net_unlock(cpt);
+}
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
new file mode 100644
index 000000000000..a326ce06bc76
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -0,0 +1,1694 @@
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ *
+ * This file is part of Portals
+ * http://sourceforge.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/lnet/lib-lnet.h>
+
+#if defined(LNET_ROUTER)
+
+#define LNET_NRB_TINY_MIN 512 /* min value for each CPT */
+#define LNET_NRB_TINY (LNET_NRB_TINY_MIN * 4)
+#define LNET_NRB_SMALL_MIN 4096 /* min value for each CPT */
+#define LNET_NRB_SMALL (LNET_NRB_SMALL_MIN * 4)
+#define LNET_NRB_LARGE_MIN 256 /* min value for each CPT */
+#define LNET_NRB_LARGE (LNET_NRB_LARGE_MIN * 4)
+
+static char *forwarding = "";
+CFS_MODULE_PARM(forwarding, "s", charp, 0444,
+ "Explicitly enable/disable forwarding between networks");
+
+static int tiny_router_buffers;
+CFS_MODULE_PARM(tiny_router_buffers, "i", int, 0444,
+ "# of 0 payload messages to buffer in the router");
+static int small_router_buffers;
+CFS_MODULE_PARM(small_router_buffers, "i", int, 0444,
+ "# of small (1 page) messages to buffer in the router");
+static int large_router_buffers;
+CFS_MODULE_PARM(large_router_buffers, "i", int, 0444,
+ "# of large messages to buffer in the router");
+static int peer_buffer_credits = 0;
+CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
+ "# router buffer credits per peer");
+
+static int auto_down = 1;
+CFS_MODULE_PARM(auto_down, "i", int, 0444,
+ "Automatically mark peers down on comms error");
+
+int
+lnet_peer_buffer_credits(lnet_ni_t *ni)
+{
+ /* NI option overrides LNet default */
+ if (ni->ni_peerrtrcredits > 0)
+ return ni->ni_peerrtrcredits;
+ if (peer_buffer_credits > 0)
+ return peer_buffer_credits;
+
+ /* As an approximation, allow this peer the same number of router
+ * buffers as it is allowed outstanding sends */
+ return ni->ni_peertxcredits;
+}
+
+/* forward ref's */
+static int lnet_router_checker(void *);
+#else
+
+int
+lnet_peer_buffer_credits(lnet_ni_t *ni)
+{
+ return 0;
+}
+
+#endif
+
+static int check_routers_before_use = 0;
+CFS_MODULE_PARM(check_routers_before_use, "i", int, 0444,
+ "Assume routers are down and ping them before use");
+
+static int avoid_asym_router_failure = 1;
+CFS_MODULE_PARM(avoid_asym_router_failure, "i", int, 0644,
+ "Avoid asymmetrical router failures (0 to disable)");
+
+static int dead_router_check_interval = 60;
+CFS_MODULE_PARM(dead_router_check_interval, "i", int, 0644,
+ "Seconds between dead router health checks (<= 0 to disable)");
+
+static int live_router_check_interval = 60;
+CFS_MODULE_PARM(live_router_check_interval, "i", int, 0644,
+ "Seconds between live router health checks (<= 0 to disable)");
+
+static int router_ping_timeout = 50;
+CFS_MODULE_PARM(router_ping_timeout, "i", int, 0644,
+ "Seconds to wait for the reply to a router health query");
+
+int
+lnet_peers_start_down(void)
+{
+ return check_routers_before_use;
+}
+
+void
+lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, cfs_time_t when)
+{
+ if (cfs_time_before(when, lp->lp_timestamp)) { /* out of date information */
+ CDEBUG(D_NET, "Out of date\n");
+ return;
+ }
+
+ lp->lp_timestamp = when; /* update timestamp */
+ lp->lp_ping_deadline = 0; /* disable ping timeout */
+
+ if (lp->lp_alive_count != 0 && /* got old news */
+ (!lp->lp_alive) == (!alive)) { /* new date for old news */
+ CDEBUG(D_NET, "Old news\n");
+ return;
+ }
+
+ /* Flag that notification is outstanding */
+
+ lp->lp_alive_count++;
+ lp->lp_alive = !(!alive); /* 1 bit! */
+ lp->lp_notify = 1;
+ lp->lp_notifylnd |= notifylnd;
+ if (lp->lp_alive)
+ lp->lp_ping_feats = LNET_PING_FEAT_INVAL; /* reset */
+
+ CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive);
+}
+
+void
+lnet_ni_notify_locked(lnet_ni_t *ni, lnet_peer_t *lp)
+{
+ int alive;
+ int notifylnd;
+
+ /* Notify only in 1 thread at any time to ensure ordered notification.
+ * NB individual events can be missed; the only guarantee is that you
+ * always get the most recent news */
+
+ if (lp->lp_notifying)
+ return;
+
+ lp->lp_notifying = 1;
+
+ while (lp->lp_notify) {
+ alive = lp->lp_alive;
+ notifylnd = lp->lp_notifylnd;
+
+ lp->lp_notifylnd = 0;
+ lp->lp_notify = 0;
+
+ if (notifylnd && ni->ni_lnd->lnd_notify != NULL) {
+ lnet_net_unlock(lp->lp_cpt);
+
+ /* A new notification could happen now; I'll handle it
+ * when control returns to me */
+
+ (ni->ni_lnd->lnd_notify)(ni, lp->lp_nid, alive);
+
+ lnet_net_lock(lp->lp_cpt);
+ }
+ }
+
+ lp->lp_notifying = 0;
+}
+
+
+static void
+lnet_rtr_addref_locked(lnet_peer_t *lp)
+{
+ LASSERT(lp->lp_refcount > 0);
+ LASSERT(lp->lp_rtr_refcount >= 0);
+
+ /* lnet_net_lock must be exclusively locked */
+ lp->lp_rtr_refcount++;
+ if (lp->lp_rtr_refcount == 1) {
+ struct list_head *pos;
+
+ /* a simple insertion sort */
+ list_for_each_prev(pos, &the_lnet.ln_routers) {
+ lnet_peer_t *rtr = list_entry(pos, lnet_peer_t,
+ lp_rtr_list);
+
+ if (rtr->lp_nid < lp->lp_nid)
+ break;
+ }
+
+ list_add(&lp->lp_rtr_list, pos);
+ /* addref for the_lnet.ln_routers */
+ lnet_peer_addref_locked(lp);
+ the_lnet.ln_routers_version++;
+ }
+}
+
+static void
+lnet_rtr_decref_locked(lnet_peer_t *lp)
+{
+ LASSERT(lp->lp_refcount > 0);
+ LASSERT(lp->lp_rtr_refcount > 0);
+
+ /* lnet_net_lock must be exclusively locked */
+ lp->lp_rtr_refcount--;
+ if (lp->lp_rtr_refcount == 0) {
+ LASSERT(list_empty(&lp->lp_routes));
+
+ if (lp->lp_rcd != NULL) {
+ list_add(&lp->lp_rcd->rcd_list,
+ &the_lnet.ln_rcd_deathrow);
+ lp->lp_rcd = NULL;
+ }
+
+ list_del(&lp->lp_rtr_list);
+ /* decref for the_lnet.ln_routers */
+ lnet_peer_decref_locked(lp);
+ the_lnet.ln_routers_version++;
+ }
+}
+
+lnet_remotenet_t *
+lnet_find_net_locked (__u32 net)
+{
+ lnet_remotenet_t *rnet;
+ struct list_head *tmp;
+ struct list_head *rn_list;
+
+ LASSERT(!the_lnet.ln_shutdown);
+
+ rn_list = lnet_net2rnethash(net);
+ list_for_each(tmp, rn_list) {
+ rnet = list_entry(tmp, lnet_remotenet_t, lrn_list);
+
+ if (rnet->lrn_net == net)
+ return rnet;
+ }
+ return NULL;
+}
+
+static void lnet_shuffle_seed(void)
+{
+ static int seeded = 0;
+ int lnd_type, seed[2];
+ struct timeval tv;
+ lnet_ni_t *ni;
+ struct list_head *tmp;
+
+ if (seeded)
+ return;
+
+ cfs_get_random_bytes(seed, sizeof(seed));
+
+ /* Nodes with small feet have little entropy
+ * the NID for this node gives the most entropy in the low bits */
+ list_for_each(tmp, &the_lnet.ln_nis) {
+ ni = list_entry(tmp, lnet_ni_t, ni_list);
+ lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
+
+ if (lnd_type != LOLND)
+ seed[0] ^= (LNET_NIDADDR(ni->ni_nid) | lnd_type);
+ }
+
+ do_gettimeofday(&tv);
+ cfs_srand(tv.tv_sec ^ seed[0], tv.tv_usec ^ seed[1]);
+ seeded = 1;
+ return;
+}
+
+/* NB expects LNET_LOCK held */
+void
+lnet_add_route_to_rnet (lnet_remotenet_t *rnet, lnet_route_t *route)
+{
+ unsigned int len = 0;
+ unsigned int offset = 0;
+ struct list_head *e;
+
+ lnet_shuffle_seed();
+
+ list_for_each (e, &rnet->lrn_routes) {
+ len++;
+ }
+
+ /* len+1 positions to add a new entry, also prevents division by 0 */
+ offset = cfs_rand() % (len + 1);
+ list_for_each (e, &rnet->lrn_routes) {
+ if (offset == 0)
+ break;
+ offset--;
+ }
+ list_add(&route->lr_list, e);
+ list_add(&route->lr_gwlist, &route->lr_gateway->lp_routes);
+
+ the_lnet.ln_remote_nets_version++;
+ lnet_rtr_addref_locked(route->lr_gateway);
+}
+
+int
+lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
+{
+ struct list_head *e;
+ lnet_remotenet_t *rnet;
+ lnet_remotenet_t *rnet2;
+ lnet_route_t *route;
+ lnet_ni_t *ni;
+ int add_route;
+ int rc;
+
+ CDEBUG(D_NET, "Add route: net %s hops %u gw %s\n",
+ libcfs_net2str(net), hops, libcfs_nid2str(gateway));
+
+ if (gateway == LNET_NID_ANY ||
+ LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND ||
+ net == LNET_NIDNET(LNET_NID_ANY) ||
+ LNET_NETTYP(net) == LOLND ||
+ LNET_NIDNET(gateway) == net ||
+ hops < 1 || hops > 255)
+ return (-EINVAL);
+
+ if (lnet_islocalnet(net)) /* it's a local network */
+ return 0; /* ignore the route entry */
+
+ /* Assume net, route, all new */
+ LIBCFS_ALLOC(route, sizeof(*route));
+ LIBCFS_ALLOC(rnet, sizeof(*rnet));
+ if (route == NULL || rnet == NULL) {
+ CERROR("Out of memory creating route %s %d %s\n",
+ libcfs_net2str(net), hops, libcfs_nid2str(gateway));
+ if (route != NULL)
+ LIBCFS_FREE(route, sizeof(*route));
+ if (rnet != NULL)
+ LIBCFS_FREE(rnet, sizeof(*rnet));
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&rnet->lrn_routes);
+ rnet->lrn_net = net;
+ route->lr_hops = hops;
+ route->lr_net = net;
+
+ lnet_net_lock(LNET_LOCK_EX);
+
+ rc = lnet_nid2peer_locked(&route->lr_gateway, gateway, LNET_LOCK_EX);
+ if (rc != 0) {
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ LIBCFS_FREE(route, sizeof(*route));
+ LIBCFS_FREE(rnet, sizeof(*rnet));
+
+ if (rc == -EHOSTUNREACH) { /* gateway is not on a local net */
+ return 0; /* ignore the route entry */
+ } else {
+ CERROR("Error %d creating route %s %d %s\n", rc,
+ libcfs_net2str(net), hops,
+ libcfs_nid2str(gateway));
+ }
+ return rc;
+ }
+
+ LASSERT (!the_lnet.ln_shutdown);
+
+ rnet2 = lnet_find_net_locked(net);
+ if (rnet2 == NULL) {
+ /* new network */
+ list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
+ rnet2 = rnet;
+ }
+
+ /* Search for a duplicate route (it's a NOOP if it is) */
+ add_route = 1;
+ list_for_each (e, &rnet2->lrn_routes) {
+ lnet_route_t *route2 = list_entry(e, lnet_route_t, lr_list);
+
+ if (route2->lr_gateway == route->lr_gateway) {
+ add_route = 0;
+ break;
+ }
+
+ /* our lookups must be true */
+ LASSERT (route2->lr_gateway->lp_nid != gateway);
+ }
+
+ if (add_route) {
+ lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
+ lnet_add_route_to_rnet(rnet2, route);
+
+ ni = route->lr_gateway->lp_ni;
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ /* XXX Assume alive */
+ if (ni->ni_lnd->lnd_notify != NULL)
+ (ni->ni_lnd->lnd_notify)(ni, gateway, 1);
+
+ lnet_net_lock(LNET_LOCK_EX);
+ }
+
+ /* -1 for notify or !add_route */
+ lnet_peer_decref_locked(route->lr_gateway);
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ if (!add_route)
+ LIBCFS_FREE(route, sizeof(*route));
+
+ if (rnet != rnet2)
+ LIBCFS_FREE(rnet, sizeof(*rnet));
+
+ return 0;
+}
+
+int
+lnet_check_routes(void)
+{
+ lnet_remotenet_t *rnet;
+ lnet_route_t *route;
+ lnet_route_t *route2;
+ struct list_head *e1;
+ struct list_head *e2;
+ int cpt;
+ struct list_head *rn_list;
+ int i;
+
+ cpt = lnet_net_lock_current();
+
+ for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
+ rn_list = &the_lnet.ln_remote_nets_hash[i];
+ list_for_each(e1, rn_list) {
+ rnet = list_entry(e1, lnet_remotenet_t, lrn_list);
+
+ route2 = NULL;
+ list_for_each(e2, &rnet->lrn_routes) {
+ lnet_nid_t nid1;
+ lnet_nid_t nid2;
+ int net;
+
+ route = list_entry(e2, lnet_route_t,
+ lr_list);
+
+ if (route2 == NULL) {
+ route2 = route;
+ continue;
+ }
+
+ if (route->lr_gateway->lp_ni ==
+ route2->lr_gateway->lp_ni)
+ continue;
+
+ nid1 = route->lr_gateway->lp_nid;
+ nid2 = route2->lr_gateway->lp_nid;
+ net = rnet->lrn_net;
+
+ lnet_net_unlock(cpt);
+
+ CERROR("Routes to %s via %s and %s not "
+ "supported\n",
+ libcfs_net2str(net),
+ libcfs_nid2str(nid1),
+ libcfs_nid2str(nid2));
+ return -EINVAL;
+ }
+ }
+ }
+
+ lnet_net_unlock(cpt);
+ return 0;
+}
+
+int
+lnet_del_route(__u32 net, lnet_nid_t gw_nid)
+{
+ struct lnet_peer *gateway;
+ lnet_remotenet_t *rnet;
+ lnet_route_t *route;
+ struct list_head *e1;
+ struct list_head *e2;
+ int rc = -ENOENT;
+ struct list_head *rn_list;
+ int idx = 0;
+
+ CDEBUG(D_NET, "Del route: net %s : gw %s\n",
+ libcfs_net2str(net), libcfs_nid2str(gw_nid));
+
+ /* NB Caller may specify either all routes via the given gateway
+ * or a specific route entry actual NIDs) */
+
+ lnet_net_lock(LNET_LOCK_EX);
+ if (net == LNET_NIDNET(LNET_NID_ANY))
+ rn_list = &the_lnet.ln_remote_nets_hash[0];
+ else
+ rn_list = lnet_net2rnethash(net);
+
+ again:
+ list_for_each(e1, rn_list) {
+ rnet = list_entry(e1, lnet_remotenet_t, lrn_list);
+
+ if (!(net == LNET_NIDNET(LNET_NID_ANY) ||
+ net == rnet->lrn_net))
+ continue;
+
+ list_for_each(e2, &rnet->lrn_routes) {
+ route = list_entry(e2, lnet_route_t, lr_list);
+
+ gateway = route->lr_gateway;
+ if (!(gw_nid == LNET_NID_ANY ||
+ gw_nid == gateway->lp_nid))
+ continue;
+
+ list_del(&route->lr_list);
+ list_del(&route->lr_gwlist);
+ the_lnet.ln_remote_nets_version++;
+
+ if (list_empty(&rnet->lrn_routes))
+ list_del(&rnet->lrn_list);
+ else
+ rnet = NULL;
+
+ lnet_rtr_decref_locked(gateway);
+ lnet_peer_decref_locked(gateway);
+
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ LIBCFS_FREE(route, sizeof(*route));
+
+ if (rnet != NULL)
+ LIBCFS_FREE(rnet, sizeof(*rnet));
+
+ rc = 0;
+ lnet_net_lock(LNET_LOCK_EX);
+ goto again;
+ }
+ }
+
+ if (net == LNET_NIDNET(LNET_NID_ANY) &&
+ ++idx < LNET_REMOTE_NETS_HASH_SIZE) {
+ rn_list = &the_lnet.ln_remote_nets_hash[idx];
+ goto again;
+ }
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ return rc;
+}
+
+void
+lnet_destroy_routes (void)
+{
+ lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY);
+}
+
+int
+lnet_get_route(int idx, __u32 *net, __u32 *hops,
+ lnet_nid_t *gateway, __u32 *alive)
+{
+ struct list_head *e1;
+ struct list_head *e2;
+ lnet_remotenet_t *rnet;
+ lnet_route_t *route;
+ int cpt;
+ int i;
+ struct list_head *rn_list;
+
+ cpt = lnet_net_lock_current();
+
+ for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
+ rn_list = &the_lnet.ln_remote_nets_hash[i];
+ list_for_each(e1, rn_list) {
+ rnet = list_entry(e1, lnet_remotenet_t, lrn_list);
+
+ list_for_each(e2, &rnet->lrn_routes) {
+ route = list_entry(e2, lnet_route_t,
+ lr_list);
+
+ if (idx-- == 0) {
+ *net = rnet->lrn_net;
+ *hops = route->lr_hops;
+ *gateway = route->lr_gateway->lp_nid;
+ *alive = route->lr_gateway->lp_alive;
+ lnet_net_unlock(cpt);
+ return 0;
+ }
+ }
+ }
+ }
+
+ lnet_net_unlock(cpt);
+ return -ENOENT;
+}
+
+void
+lnet_swap_pinginfo(lnet_ping_info_t *info)
+{
+ int i;
+ lnet_ni_status_t *stat;
+
+ __swab32s(&info->pi_magic);
+ __swab32s(&info->pi_features);
+ __swab32s(&info->pi_pid);
+ __swab32s(&info->pi_nnis);
+ for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
+ stat = &info->pi_ni[i];
+ __swab64s(&stat->ns_nid);
+ __swab32s(&stat->ns_status);
+ }
+ return;
+}
+
+/**
+ * parse router-checker pinginfo, record number of down NIs for remote
+ * networks on that router.
+ */
+static void
+lnet_parse_rc_info(lnet_rc_data_t *rcd)
+{
+ lnet_ping_info_t *info = rcd->rcd_pinginfo;
+ struct lnet_peer *gw = rcd->rcd_gateway;
+ lnet_route_t *rtr;
+
+ if (!gw->lp_alive)
+ return;
+
+ if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
+ lnet_swap_pinginfo(info);
+
+ /* NB always racing with network! */
+ if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
+ CDEBUG(D_NET, "%s: Unexpected magic %08x\n",
+ libcfs_nid2str(gw->lp_nid), info->pi_magic);
+ gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
+ return;
+ }
+
+ gw->lp_ping_feats = info->pi_features;
+ if ((gw->lp_ping_feats & LNET_PING_FEAT_MASK) == 0) {
+ CDEBUG(D_NET, "%s: Unexpected features 0x%x\n",
+ libcfs_nid2str(gw->lp_nid), gw->lp_ping_feats);
+ return; /* nothing I can understand */
+ }
+
+ if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) == 0)
+ return; /* can't carry NI status info */
+
+ list_for_each_entry(rtr, &gw->lp_routes, lr_gwlist) {
+ int ptl_status = LNET_NI_STATUS_INVALID;
+ int down = 0;
+ int up = 0;
+ int i;
+
+ for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
+ lnet_ni_status_t *stat = &info->pi_ni[i];
+ lnet_nid_t nid = stat->ns_nid;
+
+ if (nid == LNET_NID_ANY) {
+ CDEBUG(D_NET, "%s: unexpected LNET_NID_ANY\n",
+ libcfs_nid2str(gw->lp_nid));
+ gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
+ return;
+ }
+
+ if (LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
+ continue;
+
+ if (stat->ns_status == LNET_NI_STATUS_DOWN) {
+ if (LNET_NETTYP(LNET_NIDNET(nid)) != PTLLND)
+ down++;
+ else if (ptl_status != LNET_NI_STATUS_UP)
+ ptl_status = LNET_NI_STATUS_DOWN;
+ continue;
+ }
+
+ if (stat->ns_status == LNET_NI_STATUS_UP) {
+ if (LNET_NIDNET(nid) == rtr->lr_net) {
+ up = 1;
+ break;
+ }
+ /* ptl NIs are considered down only when
+ * they're all down */
+ if (LNET_NETTYP(LNET_NIDNET(nid)) == PTLLND)
+ ptl_status = LNET_NI_STATUS_UP;
+ continue;
+ }
+
+ CDEBUG(D_NET, "%s: Unexpected status 0x%x\n",
+ libcfs_nid2str(gw->lp_nid), stat->ns_status);
+ gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
+ return;
+ }
+
+ if (up) { /* ignore downed NIs if NI for dest network is up */
+ rtr->lr_downis = 0;
+ continue;
+ }
+ rtr->lr_downis = down + (ptl_status == LNET_NI_STATUS_DOWN);
+ }
+}
+
+static void
+lnet_router_checker_event(lnet_event_t *event)
+{
+ lnet_rc_data_t *rcd = event->md.user_ptr;
+ struct lnet_peer *lp;
+
+ LASSERT(rcd != NULL);
+
+ if (event->unlinked) {
+ LNetInvalidateHandle(&rcd->rcd_mdh);
+ return;
+ }
+
+ LASSERT(event->type == LNET_EVENT_SEND ||
+ event->type == LNET_EVENT_REPLY);
+
+ lp = rcd->rcd_gateway;
+ LASSERT(lp != NULL);
+
+ /* NB: it's called with holding lnet_res_lock, we have a few
+ * places need to hold both locks at the same time, please take
+ * care of lock ordering */
+ lnet_net_lock(lp->lp_cpt);
+ if (!lnet_isrouter(lp) || lp->lp_rcd != rcd) {
+ /* ignore if no longer a router or rcd is replaced */
+ goto out;
+ }
+
+ if (event->type == LNET_EVENT_SEND) {
+ lp->lp_ping_notsent = 0;
+ if (event->status == 0)
+ goto out;
+ }
+
+ /* LNET_EVENT_REPLY */
+ /* A successful REPLY means the router is up. If _any_ comms
+ * to the router fail I assume it's down (this will happen if
+ * we ping alive routers to try to detect router death before
+ * apps get burned). */
+
+ lnet_notify_locked(lp, 1, (event->status == 0), cfs_time_current());
+ /* The router checker will wake up very shortly and do the
+ * actual notification.
+ * XXX If 'lp' stops being a router before then, it will still
+ * have the notification pending!!! */
+
+ if (avoid_asym_router_failure && event->status == 0)
+ lnet_parse_rc_info(rcd);
+
+ out:
+ lnet_net_unlock(lp->lp_cpt);
+}
+
+void
+lnet_wait_known_routerstate(void)
+{
+ lnet_peer_t *rtr;
+ struct list_head *entry;
+ int all_known;
+
+ LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
+
+ for (;;) {
+ int cpt = lnet_net_lock_current();
+
+ all_known = 1;
+ list_for_each (entry, &the_lnet.ln_routers) {
+ rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
+
+ if (rtr->lp_alive_count == 0) {
+ all_known = 0;
+ break;
+ }
+ }
+
+ lnet_net_unlock(cpt);
+
+ if (all_known)
+ return;
+
+ cfs_pause(cfs_time_seconds(1));
+ }
+}
+
+void
+lnet_update_ni_status_locked(void)
+{
+ lnet_ni_t *ni;
+ long now;
+ int timeout;
+
+ LASSERT(the_lnet.ln_routing);
+
+ timeout = router_ping_timeout +
+ MAX(live_router_check_interval, dead_router_check_interval);
+
+ now = cfs_time_current_sec();
+ list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
+ if (ni->ni_lnd->lnd_type == LOLND)
+ continue;
+
+ if (now < ni->ni_last_alive + timeout)
+ continue;
+
+ lnet_ni_lock(ni);
+ /* re-check with lock */
+ if (now < ni->ni_last_alive + timeout) {
+ lnet_ni_unlock(ni);
+ continue;
+ }
+
+ LASSERT(ni->ni_status != NULL);
+
+ if (ni->ni_status->ns_status != LNET_NI_STATUS_DOWN) {
+ CDEBUG(D_NET, "NI(%s:%d) status changed to down\n",
+ libcfs_nid2str(ni->ni_nid), timeout);
+ /* NB: so far, this is the only place to set
+ * NI status to "down" */
+ ni->ni_status->ns_status = LNET_NI_STATUS_DOWN;
+ }
+ lnet_ni_unlock(ni);
+ }
+}
+
+void
+lnet_destroy_rc_data(lnet_rc_data_t *rcd)
+{
+ LASSERT(list_empty(&rcd->rcd_list));
+ /* detached from network */
+ LASSERT(LNetHandleIsInvalid(rcd->rcd_mdh));
+
+ if (rcd->rcd_gateway != NULL) {
+ int cpt = rcd->rcd_gateway->lp_cpt;
+
+ lnet_net_lock(cpt);
+ lnet_peer_decref_locked(rcd->rcd_gateway);
+ lnet_net_unlock(cpt);
+ }
+
+ if (rcd->rcd_pinginfo != NULL)
+ LIBCFS_FREE(rcd->rcd_pinginfo, LNET_PINGINFO_SIZE);
+
+ LIBCFS_FREE(rcd, sizeof(*rcd));
+}
+
+lnet_rc_data_t *
+lnet_create_rc_data_locked(lnet_peer_t *gateway)
+{
+ lnet_rc_data_t *rcd = NULL;
+ lnet_ping_info_t *pi;
+ int rc;
+ int i;
+
+ lnet_net_unlock(gateway->lp_cpt);
+
+ LIBCFS_ALLOC(rcd, sizeof(*rcd));
+ if (rcd == NULL)
+ goto out;
+
+ LNetInvalidateHandle(&rcd->rcd_mdh);
+ INIT_LIST_HEAD(&rcd->rcd_list);
+
+ LIBCFS_ALLOC(pi, LNET_PINGINFO_SIZE);
+ if (pi == NULL)
+ goto out;
+
+ memset(pi, 0, LNET_PINGINFO_SIZE);
+ for (i = 0; i < LNET_MAX_RTR_NIS; i++) {
+ pi->pi_ni[i].ns_nid = LNET_NID_ANY;
+ pi->pi_ni[i].ns_status = LNET_NI_STATUS_INVALID;
+ }
+ rcd->rcd_pinginfo = pi;
+
+ LASSERT (!LNetHandleIsInvalid(the_lnet.ln_rc_eqh));
+ rc = LNetMDBind((lnet_md_t){.start = pi,
+ .user_ptr = rcd,
+ .length = LNET_PINGINFO_SIZE,
+ .threshold = LNET_MD_THRESH_INF,
+ .options = LNET_MD_TRUNCATE,
+ .eq_handle = the_lnet.ln_rc_eqh},
+ LNET_UNLINK,
+ &rcd->rcd_mdh);
+ if (rc < 0) {
+ CERROR("Can't bind MD: %d\n", rc);
+ goto out;
+ }
+ LASSERT(rc == 0);
+
+ lnet_net_lock(gateway->lp_cpt);
+ /* router table changed or someone has created rcd for this gateway */
+ if (!lnet_isrouter(gateway) || gateway->lp_rcd != NULL) {
+ lnet_net_unlock(gateway->lp_cpt);
+ goto out;
+ }
+
+ lnet_peer_addref_locked(gateway);
+ rcd->rcd_gateway = gateway;
+ gateway->lp_rcd = rcd;
+ gateway->lp_ping_notsent = 0;
+
+ return rcd;
+
+ out:
+ if (rcd != NULL) {
+ if (!LNetHandleIsInvalid(rcd->rcd_mdh)) {
+ rc = LNetMDUnlink(rcd->rcd_mdh);
+ LASSERT(rc == 0);
+ }
+ lnet_destroy_rc_data(rcd);
+ }
+
+ lnet_net_lock(gateway->lp_cpt);
+ return gateway->lp_rcd;
+}
+
+static int
+lnet_router_check_interval (lnet_peer_t *rtr)
+{
+ int secs;
+
+ secs = rtr->lp_alive ? live_router_check_interval :
+ dead_router_check_interval;
+ if (secs < 0)
+ secs = 0;
+
+ return secs;
+}
+
+static void
+lnet_ping_router_locked (lnet_peer_t *rtr)
+{
+ lnet_rc_data_t *rcd = NULL;
+ cfs_time_t now = cfs_time_current();
+ int secs;
+
+ lnet_peer_addref_locked(rtr);
+
+ if (rtr->lp_ping_deadline != 0 && /* ping timed out? */
+ cfs_time_after(now, rtr->lp_ping_deadline))
+ lnet_notify_locked(rtr, 1, 0, now);
+
+ /* Run any outstanding notifications */
+ lnet_ni_notify_locked(rtr->lp_ni, rtr);
+
+ if (!lnet_isrouter(rtr) ||
+ the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
+ /* router table changed or router checker is shutting down */
+ lnet_peer_decref_locked(rtr);
+ return;
+ }
+
+ rcd = rtr->lp_rcd != NULL ?
+ rtr->lp_rcd : lnet_create_rc_data_locked(rtr);
+
+ if (rcd == NULL)
+ return;
+
+ secs = lnet_router_check_interval(rtr);
+
+ CDEBUG(D_NET,
+ "rtr %s %d: deadline %lu ping_notsent %d alive %d "
+ "alive_count %d lp_ping_timestamp %lu\n",
+ libcfs_nid2str(rtr->lp_nid), secs,
+ rtr->lp_ping_deadline, rtr->lp_ping_notsent,
+ rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp);
+
+ if (secs != 0 && !rtr->lp_ping_notsent &&
+ cfs_time_after(now, cfs_time_add(rtr->lp_ping_timestamp,
+ cfs_time_seconds(secs)))) {
+ int rc;
+ lnet_process_id_t id;
+ lnet_handle_md_t mdh;
+
+ id.nid = rtr->lp_nid;
+ id.pid = LUSTRE_SRV_LNET_PID;
+ CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id));
+
+ rtr->lp_ping_notsent = 1;
+ rtr->lp_ping_timestamp = now;
+
+ mdh = rcd->rcd_mdh;
+
+ if (rtr->lp_ping_deadline == 0) {
+ rtr->lp_ping_deadline =
+ cfs_time_shift(router_ping_timeout);
+ }
+
+ lnet_net_unlock(rtr->lp_cpt);
+
+ rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL,
+ LNET_PROTO_PING_MATCHBITS, 0);
+
+ lnet_net_lock(rtr->lp_cpt);
+ if (rc != 0)
+ rtr->lp_ping_notsent = 0; /* no event pending */
+ }
+
+ lnet_peer_decref_locked(rtr);
+ return;
+}
+
+int
+lnet_router_checker_start(void)
+{
+ int rc;
+ int eqsz;
+
+ LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
+
+ if (check_routers_before_use &&
+ dead_router_check_interval <= 0) {
+ LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be"
+ " set if 'check_routers_before_use' is set"
+ "\n");
+ return -EINVAL;
+ }
+
+ if (!the_lnet.ln_routing &&
+ live_router_check_interval <= 0 &&
+ dead_router_check_interval <= 0)
+ return 0;
+
+ sema_init(&the_lnet.ln_rc_signal, 0);
+ /* EQ size doesn't matter; the callback is guaranteed to get every
+ * event */
+ eqsz = 0;
+ rc = LNetEQAlloc(eqsz, lnet_router_checker_event,
+ &the_lnet.ln_rc_eqh);
+ if (rc != 0) {
+ CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc);
+ return -ENOMEM;
+ }
+
+ the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING;
+ rc = PTR_ERR(kthread_run(lnet_router_checker,
+ NULL, "router_checker"));
+ if (IS_ERR_VALUE(rc)) {
+ CERROR("Can't start router checker thread: %d\n", rc);
+ /* block until event callback signals exit */
+ down(&the_lnet.ln_rc_signal);
+ rc = LNetEQFree(the_lnet.ln_rc_eqh);
+ LASSERT(rc == 0);
+ the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
+ return -ENOMEM;
+ }
+
+ if (check_routers_before_use) {
+ /* Note that a helpful side-effect of pinging all known routers
+ * at startup is that it makes them drop stale connections they
+ * may have to a previous instance of me. */
+ lnet_wait_known_routerstate();
+ }
+
+ return 0;
+}
+
+void
+lnet_router_checker_stop (void)
+{
+ int rc;
+
+ if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN)
+ return;
+
+ LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
+ the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING;
+
+ /* block until event callback signals exit */
+ down(&the_lnet.ln_rc_signal);
+ LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
+
+ rc = LNetEQFree(the_lnet.ln_rc_eqh);
+ LASSERT (rc == 0);
+ return;
+}
+
+static void
+lnet_prune_rc_data(int wait_unlink)
+{
+ lnet_rc_data_t *rcd;
+ lnet_rc_data_t *tmp;
+ lnet_peer_t *lp;
+ struct list_head head;
+ int i = 2;
+
+ if (likely(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING &&
+ list_empty(&the_lnet.ln_rcd_deathrow) &&
+ list_empty(&the_lnet.ln_rcd_zombie)))
+ return;
+
+ INIT_LIST_HEAD(&head);
+
+ lnet_net_lock(LNET_LOCK_EX);
+
+ if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
+ /* router checker is stopping, prune all */
+ list_for_each_entry(lp, &the_lnet.ln_routers,
+ lp_rtr_list) {
+ if (lp->lp_rcd == NULL)
+ continue;
+
+ LASSERT(list_empty(&lp->lp_rcd->rcd_list));
+ list_add(&lp->lp_rcd->rcd_list,
+ &the_lnet.ln_rcd_deathrow);
+ lp->lp_rcd = NULL;
+ }
+ }
+
+ /* unlink all RCDs on deathrow list */
+ list_splice_init(&the_lnet.ln_rcd_deathrow, &head);
+
+ if (!list_empty(&head)) {
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ list_for_each_entry(rcd, &head, rcd_list)
+ LNetMDUnlink(rcd->rcd_mdh);
+
+ lnet_net_lock(LNET_LOCK_EX);
+ }
+
+ list_splice_init(&head, &the_lnet.ln_rcd_zombie);
+
+ /* release all zombie RCDs */
+ while (!list_empty(&the_lnet.ln_rcd_zombie)) {
+ list_for_each_entry_safe(rcd, tmp, &the_lnet.ln_rcd_zombie,
+ rcd_list) {
+ if (LNetHandleIsInvalid(rcd->rcd_mdh))
+ list_move(&rcd->rcd_list, &head);
+ }
+
+ wait_unlink = wait_unlink &&
+ !list_empty(&the_lnet.ln_rcd_zombie);
+
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ while (!list_empty(&head)) {
+ rcd = list_entry(head.next,
+ lnet_rc_data_t, rcd_list);
+ list_del_init(&rcd->rcd_list);
+ lnet_destroy_rc_data(rcd);
+ }
+
+ if (!wait_unlink)
+ return;
+
+ i++;
+ CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
+ "Waiting for rc buffers to unlink\n");
+ cfs_pause(cfs_time_seconds(1) / 4);
+
+ lnet_net_lock(LNET_LOCK_EX);
+ }
+
+ lnet_net_unlock(LNET_LOCK_EX);
+}
+
+
+#if defined(LNET_ROUTER)
+
+static int
+lnet_router_checker(void *arg)
+{
+ lnet_peer_t *rtr;
+ struct list_head *entry;
+
+ cfs_block_allsigs();
+
+ LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
+
+ while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) {
+ __u64 version;
+ int cpt;
+ int cpt2;
+
+ cpt = lnet_net_lock_current();
+rescan:
+ version = the_lnet.ln_routers_version;
+
+ list_for_each(entry, &the_lnet.ln_routers) {
+ rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
+
+ cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid);
+ if (cpt != cpt2) {
+ lnet_net_unlock(cpt);
+ cpt = cpt2;
+ lnet_net_lock(cpt);
+ /* the routers list has changed */
+ if (version != the_lnet.ln_routers_version)
+ goto rescan;
+ }
+
+ lnet_ping_router_locked(rtr);
+
+ /* NB dropped lock */
+ if (version != the_lnet.ln_routers_version) {
+ /* the routers list has changed */
+ goto rescan;
+ }
+ }
+
+ if (the_lnet.ln_routing)
+ lnet_update_ni_status_locked();
+
+ lnet_net_unlock(cpt);
+
+ lnet_prune_rc_data(0); /* don't wait for UNLINK */
+
+ /* Call cfs_pause() here always adds 1 to load average
+ * because kernel counts # active tasks as nr_running
+ * + nr_uninterruptible. */
+ schedule_timeout_and_set_state(TASK_INTERRUPTIBLE,
+ cfs_time_seconds(1));
+ }
+
+ LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING);
+
+ lnet_prune_rc_data(1); /* wait for UNLINK */
+
+ the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
+ up(&the_lnet.ln_rc_signal);
+ /* The unlink event callback will signal final completion */
+ return 0;
+}
+
+void
+lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages)
+{
+ int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]);
+
+ while (--npages >= 0)
+ __free_page(rb->rb_kiov[npages].kiov_page);
+
+ LIBCFS_FREE(rb, sz);
+}
+
+lnet_rtrbuf_t *
+lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt)
+{
+ int npages = rbp->rbp_npages;
+ int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]);
+ struct page *page;
+ lnet_rtrbuf_t *rb;
+ int i;
+
+ LIBCFS_CPT_ALLOC(rb, lnet_cpt_table(), cpt, sz);
+ if (rb == NULL)
+ return NULL;
+
+ rb->rb_pool = rbp;
+
+ for (i = 0; i < npages; i++) {
+ page = alloc_pages_node(
+ cfs_cpt_spread_node(lnet_cpt_table(), cpt),
+ __GFP_ZERO | GFP_IOFS, 0);
+ if (page == NULL) {
+ while (--i >= 0)
+ __free_page(rb->rb_kiov[i].kiov_page);
+
+ LIBCFS_FREE(rb, sz);
+ return NULL;
+ }
+
+ rb->rb_kiov[i].kiov_len = PAGE_CACHE_SIZE;
+ rb->rb_kiov[i].kiov_offset = 0;
+ rb->rb_kiov[i].kiov_page = page;
+ }
+
+ return rb;
+}
+
+void
+lnet_rtrpool_free_bufs(lnet_rtrbufpool_t *rbp)
+{
+ int npages = rbp->rbp_npages;
+ int nbuffers = 0;
+ lnet_rtrbuf_t *rb;
+
+ if (rbp->rbp_nbuffers == 0) /* not initialized or already freed */
+ return;
+
+ LASSERT (list_empty(&rbp->rbp_msgs));
+ LASSERT (rbp->rbp_credits == rbp->rbp_nbuffers);
+
+ while (!list_empty(&rbp->rbp_bufs)) {
+ LASSERT (rbp->rbp_credits > 0);
+
+ rb = list_entry(rbp->rbp_bufs.next,
+ lnet_rtrbuf_t, rb_list);
+ list_del(&rb->rb_list);
+ lnet_destroy_rtrbuf(rb, npages);
+ nbuffers++;
+ }
+
+ LASSERT (rbp->rbp_nbuffers == nbuffers);
+ LASSERT (rbp->rbp_credits == nbuffers);
+
+ rbp->rbp_nbuffers = rbp->rbp_credits = 0;
+}
+
+int
+lnet_rtrpool_alloc_bufs(lnet_rtrbufpool_t *rbp, int nbufs, int cpt)
+{
+ lnet_rtrbuf_t *rb;
+ int i;
+
+ if (rbp->rbp_nbuffers != 0) {
+ LASSERT (rbp->rbp_nbuffers == nbufs);
+ return 0;
+ }
+
+ for (i = 0; i < nbufs; i++) {
+ rb = lnet_new_rtrbuf(rbp, cpt);
+
+ if (rb == NULL) {
+ CERROR("Failed to allocate %d router bufs of %d pages\n",
+ nbufs, rbp->rbp_npages);
+ return -ENOMEM;
+ }
+
+ rbp->rbp_nbuffers++;
+ rbp->rbp_credits++;
+ rbp->rbp_mincredits++;
+ list_add(&rb->rb_list, &rbp->rbp_bufs);
+
+ /* No allocation "under fire" */
+ /* Otherwise we'd need code to schedule blocked msgs etc */
+ LASSERT (!the_lnet.ln_routing);
+ }
+
+ LASSERT (rbp->rbp_credits == nbufs);
+ return 0;
+}
+
+void
+lnet_rtrpool_init(lnet_rtrbufpool_t *rbp, int npages)
+{
+ INIT_LIST_HEAD(&rbp->rbp_msgs);
+ INIT_LIST_HEAD(&rbp->rbp_bufs);
+
+ rbp->rbp_npages = npages;
+ rbp->rbp_credits = 0;
+ rbp->rbp_mincredits = 0;
+}
+
+void
+lnet_rtrpools_free(void)
+{
+ lnet_rtrbufpool_t *rtrp;
+ int i;
+
+ if (the_lnet.ln_rtrpools == NULL) /* uninitialized or freed */
+ return;
+
+ cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
+ lnet_rtrpool_free_bufs(&rtrp[0]);
+ lnet_rtrpool_free_bufs(&rtrp[1]);
+ lnet_rtrpool_free_bufs(&rtrp[2]);
+ }
+
+ cfs_percpt_free(the_lnet.ln_rtrpools);
+ the_lnet.ln_rtrpools = NULL;
+}
+
+static int
+lnet_nrb_tiny_calculate(int npages)
+{
+ int nrbs = LNET_NRB_TINY;
+
+ if (tiny_router_buffers < 0) {
+ LCONSOLE_ERROR_MSG(0x10c,
+ "tiny_router_buffers=%d invalid when "
+ "routing enabled\n", tiny_router_buffers);
+ return -1;
+ }
+
+ if (tiny_router_buffers > 0)
+ nrbs = tiny_router_buffers;
+
+ nrbs /= LNET_CPT_NUMBER;
+ return max(nrbs, LNET_NRB_TINY_MIN);
+}
+
+static int
+lnet_nrb_small_calculate(int npages)
+{
+ int nrbs = LNET_NRB_SMALL;
+
+ if (small_router_buffers < 0) {
+ LCONSOLE_ERROR_MSG(0x10c,
+ "small_router_buffers=%d invalid when "
+ "routing enabled\n", small_router_buffers);
+ return -1;
+ }
+
+ if (small_router_buffers > 0)
+ nrbs = small_router_buffers;
+
+ nrbs /= LNET_CPT_NUMBER;
+ return max(nrbs, LNET_NRB_SMALL_MIN);
+}
+
+static int
+lnet_nrb_large_calculate(int npages)
+{
+ int nrbs = LNET_NRB_LARGE;
+
+ if (large_router_buffers < 0) {
+ LCONSOLE_ERROR_MSG(0x10c,
+ "large_router_buffers=%d invalid when "
+ "routing enabled\n", large_router_buffers);
+ return -1;
+ }
+
+ if (large_router_buffers > 0)
+ nrbs = large_router_buffers;
+
+ nrbs /= LNET_CPT_NUMBER;
+ return max(nrbs, LNET_NRB_LARGE_MIN);
+}
+
+int
+lnet_rtrpools_alloc(int im_a_router)
+{
+ lnet_rtrbufpool_t *rtrp;
+ int large_pages = (LNET_MTU + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ int small_pages = 1;
+ int nrb_tiny;
+ int nrb_small;
+ int nrb_large;
+ int rc;
+ int i;
+
+ if (!strcmp(forwarding, "")) {
+ /* not set either way */
+ if (!im_a_router)
+ return 0;
+ } else if (!strcmp(forwarding, "disabled")) {
+ /* explicitly disabled */
+ return 0;
+ } else if (!strcmp(forwarding, "enabled")) {
+ /* explicitly enabled */
+ } else {
+ LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either "
+ "'enabled' or 'disabled'\n");
+ return -EINVAL;
+ }
+
+ nrb_tiny = lnet_nrb_tiny_calculate(0);
+ if (nrb_tiny < 0)
+ return -EINVAL;
+
+ nrb_small = lnet_nrb_small_calculate(small_pages);
+ if (nrb_small < 0)
+ return -EINVAL;
+
+ nrb_large = lnet_nrb_large_calculate(large_pages);
+ if (nrb_large < 0)
+ return -EINVAL;
+
+ the_lnet.ln_rtrpools = cfs_percpt_alloc(lnet_cpt_table(),
+ LNET_NRBPOOLS *
+ sizeof(lnet_rtrbufpool_t));
+ if (the_lnet.ln_rtrpools == NULL) {
+ LCONSOLE_ERROR_MSG(0x10c,
+ "Failed to initialize router buffe pool\n");
+ return -ENOMEM;
+ }
+
+ cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
+ lnet_rtrpool_init(&rtrp[0], 0);
+ rc = lnet_rtrpool_alloc_bufs(&rtrp[0], nrb_tiny, i);
+ if (rc != 0)
+ goto failed;
+
+ lnet_rtrpool_init(&rtrp[1], small_pages);
+ rc = lnet_rtrpool_alloc_bufs(&rtrp[1], nrb_small, i);
+ if (rc != 0)
+ goto failed;
+
+ lnet_rtrpool_init(&rtrp[2], large_pages);
+ rc = lnet_rtrpool_alloc_bufs(&rtrp[2], nrb_large, i);
+ if (rc != 0)
+ goto failed;
+ }
+
+ lnet_net_lock(LNET_LOCK_EX);
+ the_lnet.ln_routing = 1;
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ return 0;
+
+ failed:
+ lnet_rtrpools_free();
+ return rc;
+}
+
+int
+lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, cfs_time_t when)
+{
+ struct lnet_peer *lp = NULL;
+ cfs_time_t now = cfs_time_current();
+ int cpt = lnet_cpt_of_nid(nid);
+
+ LASSERT (!in_interrupt ());
+
+ CDEBUG (D_NET, "%s notifying %s: %s\n",
+ (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid),
+ libcfs_nid2str(nid),
+ alive ? "up" : "down");
+
+ if (ni != NULL &&
+ LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) {
+ CWARN ("Ignoring notification of %s %s by %s (different net)\n",
+ libcfs_nid2str(nid), alive ? "birth" : "death",
+ libcfs_nid2str(ni->ni_nid));
+ return -EINVAL;
+ }
+
+ /* can't do predictions... */
+ if (cfs_time_after(when, now)) {
+ CWARN ("Ignoring prediction from %s of %s %s "
+ "%ld seconds in the future\n",
+ (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid),
+ libcfs_nid2str(nid), alive ? "up" : "down",
+ cfs_duration_sec(cfs_time_sub(when, now)));
+ return -EINVAL;
+ }
+
+ if (ni != NULL && !alive && /* LND telling me she's down */
+ !auto_down) { /* auto-down disabled */
+ CDEBUG(D_NET, "Auto-down disabled\n");
+ return 0;
+ }
+
+ lnet_net_lock(cpt);
+
+ if (the_lnet.ln_shutdown) {
+ lnet_net_unlock(cpt);
+ return -ESHUTDOWN;
+ }
+
+ lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid);
+ if (lp == NULL) {
+ /* nid not found */
+ lnet_net_unlock(cpt);
+ CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid));
+ return 0;
+ }
+
+ /* We can't fully trust LND on reporting exact peer last_alive
+ * if he notifies us about dead peer. For example ksocklnd can
+ * call us with when == _time_when_the_node_was_booted_ if
+ * no connections were successfully established */
+ if (ni != NULL && !alive && when < lp->lp_last_alive)
+ when = lp->lp_last_alive;
+
+ lnet_notify_locked(lp, ni == NULL, alive, when);
+
+ lnet_ni_notify_locked(ni, lp);
+
+ lnet_peer_decref_locked(lp);
+
+ lnet_net_unlock(cpt);
+ return 0;
+}
+EXPORT_SYMBOL(lnet_notify);
+
+void
+lnet_get_tunables (void)
+{
+ return;
+}
+
+#else
+
+int
+lnet_notify (lnet_ni_t *ni, lnet_nid_t nid, int alive, cfs_time_t when)
+{
+ return -EOPNOTSUPP;
+}
+
+void
+lnet_router_checker (void)
+{
+ static time_t last = 0;
+ static int running = 0;
+
+ time_t now = cfs_time_current_sec();
+ int interval = now - last;
+ int rc;
+ __u64 version;
+ lnet_peer_t *rtr;
+
+ /* It's no use to call me again within a sec - all intervals and
+ * timeouts are measured in seconds */
+ if (last != 0 && interval < 2)
+ return;
+
+ if (last != 0 &&
+ interval > MAX(live_router_check_interval,
+ dead_router_check_interval))
+ CNETERR("Checker(%d/%d) not called for %d seconds\n",
+ live_router_check_interval, dead_router_check_interval,
+ interval);
+
+ LASSERT(LNET_CPT_NUMBER == 1);
+
+ lnet_net_lock(0);
+ LASSERT(!running); /* recursion check */
+ running = 1;
+ lnet_net_unlock(0);
+
+ last = now;
+
+ if (the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING)
+ lnet_prune_rc_data(0); /* unlink all rcd and nowait */
+
+ /* consume all pending events */
+ while (1) {
+ int i;
+ lnet_event_t ev;
+
+ /* NB ln_rc_eqh must be the 1st in 'eventqs' otherwise the
+ * recursion breaker in LNetEQPoll would fail */
+ rc = LNetEQPoll(&the_lnet.ln_rc_eqh, 1, 0, &ev, &i);
+ if (rc == 0) /* no event pending */
+ break;
+
+ /* NB a lost SENT prevents me from pinging a router again */
+ if (rc == -EOVERFLOW) {
+ CERROR("Dropped an event!!!\n");
+ abort();
+ }
+
+ LASSERT (rc == 1);
+
+ lnet_router_checker_event(&ev);
+ }
+
+ if (the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING) {
+ lnet_prune_rc_data(1); /* release rcd */
+ the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
+ running = 0;
+ return;
+ }
+
+ LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
+
+ lnet_net_lock(0);
+
+ version = the_lnet.ln_routers_version;
+ list_for_each_entry (rtr, &the_lnet.ln_routers, lp_rtr_list) {
+ lnet_ping_router_locked(rtr);
+ LASSERT (version == the_lnet.ln_routers_version);
+ }
+
+ lnet_net_unlock(0);
+
+ running = 0; /* lock only needed for the recursion check */
+ return;
+}
+
+/* NB lnet_peers_start_down depends on me,
+ * so must be called before any peer creation */
+void
+lnet_get_tunables (void)
+{
+ char *s;
+
+ s = getenv("LNET_ROUTER_PING_TIMEOUT");
+ if (s != NULL) router_ping_timeout = atoi(s);
+
+ s = getenv("LNET_LIVE_ROUTER_CHECK_INTERVAL");
+ if (s != NULL) live_router_check_interval = atoi(s);
+
+ s = getenv("LNET_DEAD_ROUTER_CHECK_INTERVAL");
+ if (s != NULL) dead_router_check_interval = atoi(s);
+
+ /* This replaces old lnd_notify mechanism */
+ check_routers_before_use = 1;
+ if (dead_router_check_interval <= 0)
+ dead_router_check_interval = 30;
+}
+
+void
+lnet_rtrpools_free(void)
+{
+}
+
+int
+lnet_rtrpools_alloc(int im_a_arouter)
+{
+ return 0;
+}
+
+#endif
diff --git a/drivers/staging/lustre/lnet/lnet/router_proc.c b/drivers/staging/lustre/lnet/lnet/router_proc.c
new file mode 100644
index 000000000000..3084b0c75983
--- /dev/null
+++ b/drivers/staging/lustre/lnet/lnet/router_proc.c
@@ -0,0 +1,950 @@
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ *
+ * This file is part of Portals
+ * http://sourceforge.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/lib-lnet.h>
+
+#if defined(LNET_ROUTER)
+
+/* This is really lnet_proc.c. You might need to update sanity test 215
+ * if any file format is changed. */
+
+static ctl_table_header_t *lnet_table_header = NULL;
+
+#define CTL_LNET (0x100)
+enum {
+ PSDEV_LNET_STATS = 100,
+ PSDEV_LNET_ROUTES,
+ PSDEV_LNET_ROUTERS,
+ PSDEV_LNET_PEERS,
+ PSDEV_LNET_BUFFERS,
+ PSDEV_LNET_NIS,
+ PSDEV_LNET_PTL_ROTOR,
+};
+
+#define LNET_LOFFT_BITS (sizeof(loff_t) * 8)
+/*
+ * NB: max allowed LNET_CPT_BITS is 8 on 64-bit system and 2 on 32-bit system
+ */
+#define LNET_PROC_CPT_BITS (LNET_CPT_BITS + 1)
+/* change version, 16 bits or 8 bits */
+#define LNET_PROC_VER_BITS MAX(((MIN(LNET_LOFFT_BITS, 64)) / 4), 8)
+
+#define LNET_PROC_HASH_BITS LNET_PEER_HASH_BITS
+/*
+ * bits for peer hash offset
+ * NB: we don't use the highest bit of *ppos because it's signed
+ */
+#define LNET_PROC_HOFF_BITS (LNET_LOFFT_BITS - \
+ LNET_PROC_CPT_BITS - \
+ LNET_PROC_VER_BITS - \
+ LNET_PROC_HASH_BITS - 1)
+/* bits for hash index + position */
+#define LNET_PROC_HPOS_BITS (LNET_PROC_HASH_BITS + LNET_PROC_HOFF_BITS)
+/* bits for peer hash table + hash version */
+#define LNET_PROC_VPOS_BITS (LNET_PROC_HPOS_BITS + LNET_PROC_VER_BITS)
+
+#define LNET_PROC_CPT_MASK ((1ULL << LNET_PROC_CPT_BITS) - 1)
+#define LNET_PROC_VER_MASK ((1ULL << LNET_PROC_VER_BITS) - 1)
+#define LNET_PROC_HASH_MASK ((1ULL << LNET_PROC_HASH_BITS) - 1)
+#define LNET_PROC_HOFF_MASK ((1ULL << LNET_PROC_HOFF_BITS) - 1)
+
+#define LNET_PROC_CPT_GET(pos) \
+ (int)(((pos) >> LNET_PROC_VPOS_BITS) & LNET_PROC_CPT_MASK)
+
+#define LNET_PROC_VER_GET(pos) \
+ (int)(((pos) >> LNET_PROC_HPOS_BITS) & LNET_PROC_VER_MASK)
+
+#define LNET_PROC_HASH_GET(pos) \
+ (int)(((pos) >> LNET_PROC_HOFF_BITS) & LNET_PROC_HASH_MASK)
+
+#define LNET_PROC_HOFF_GET(pos) \
+ (int)((pos) & LNET_PROC_HOFF_MASK)
+
+#define LNET_PROC_POS_MAKE(cpt, ver, hash, off) \
+ (((((loff_t)(cpt)) & LNET_PROC_CPT_MASK) << LNET_PROC_VPOS_BITS) | \
+ ((((loff_t)(ver)) & LNET_PROC_VER_MASK) << LNET_PROC_HPOS_BITS) | \
+ ((((loff_t)(hash)) & LNET_PROC_HASH_MASK) << LNET_PROC_HOFF_BITS) | \
+ ((off) & LNET_PROC_HOFF_MASK))
+
+#define LNET_PROC_VERSION(v) ((unsigned int)((v) & LNET_PROC_VER_MASK))
+
+static int __proc_lnet_stats(void *data, int write,
+ loff_t pos, void *buffer, int nob)
+{
+ int rc;
+ lnet_counters_t *ctrs;
+ int len;
+ char *tmpstr;
+ const int tmpsiz = 256; /* 7 %u and 4 LPU64 */
+
+ if (write) {
+ lnet_counters_reset();
+ return 0;
+ }
+
+ /* read */
+
+ LIBCFS_ALLOC(ctrs, sizeof(*ctrs));
+ if (ctrs == NULL)
+ return -ENOMEM;
+
+ LIBCFS_ALLOC(tmpstr, tmpsiz);
+ if (tmpstr == NULL) {
+ LIBCFS_FREE(ctrs, sizeof(*ctrs));
+ return -ENOMEM;
+ }
+
+ lnet_counters_get(ctrs);
+
+ len = snprintf(tmpstr, tmpsiz,
+ "%u %u %u %u %u %u %u "LPU64" "LPU64" "
+ LPU64" "LPU64,
+ ctrs->msgs_alloc, ctrs->msgs_max,
+ ctrs->errors,
+ ctrs->send_count, ctrs->recv_count,
+ ctrs->route_count, ctrs->drop_count,
+ ctrs->send_length, ctrs->recv_length,
+ ctrs->route_length, ctrs->drop_length);
+
+ if (pos >= min_t(int, len, strlen(tmpstr)))
+ rc = 0;
+ else
+ rc = cfs_trace_copyout_string(buffer, nob,
+ tmpstr + pos, "\n");
+
+ LIBCFS_FREE(tmpstr, tmpsiz);
+ LIBCFS_FREE(ctrs, sizeof(*ctrs));
+ return rc;
+}
+
+DECLARE_PROC_HANDLER(proc_lnet_stats);
+
+int LL_PROC_PROTO(proc_lnet_routes)
+{
+ const int tmpsiz = 256;
+ char *tmpstr;
+ char *s;
+ int rc = 0;
+ int len;
+ int ver;
+ int off;
+
+ DECLARE_LL_PROC_PPOS_DECL;
+
+ CLASSERT(sizeof(loff_t) >= 4);
+
+ off = LNET_PROC_HOFF_GET(*ppos);
+ ver = LNET_PROC_VER_GET(*ppos);
+
+ LASSERT (!write);
+
+ if (*lenp == 0)
+ return 0;
+
+ LIBCFS_ALLOC(tmpstr, tmpsiz);
+ if (tmpstr == NULL)
+ return -ENOMEM;
+
+ s = tmpstr; /* points to current position in tmpstr[] */
+
+ if (*ppos == 0) {
+ s += snprintf(s, tmpstr + tmpsiz - s, "Routing %s\n",
+ the_lnet.ln_routing ? "enabled" : "disabled");
+ LASSERT (tmpstr + tmpsiz - s > 0);
+
+ s += snprintf(s, tmpstr + tmpsiz - s, "%-8s %4s %7s %s\n",
+ "net", "hops", "state", "router");
+ LASSERT (tmpstr + tmpsiz - s > 0);
+
+ lnet_net_lock(0);
+ ver = (unsigned int)the_lnet.ln_remote_nets_version;
+ lnet_net_unlock(0);
+ *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
+ } else {
+ struct list_head *n;
+ struct list_head *r;
+ lnet_route_t *route = NULL;
+ lnet_remotenet_t *rnet = NULL;
+ int skip = off - 1;
+ struct list_head *rn_list;
+ int i;
+
+ lnet_net_lock(0);
+
+ if (ver != LNET_PROC_VERSION(the_lnet.ln_remote_nets_version)) {
+ lnet_net_unlock(0);
+ LIBCFS_FREE(tmpstr, tmpsiz);
+ return -ESTALE;
+ }
+
+ for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE && route == NULL;
+ i++) {
+ rn_list = &the_lnet.ln_remote_nets_hash[i];
+
+ n = rn_list->next;
+
+ while (n != rn_list && route == NULL) {
+ rnet = list_entry(n, lnet_remotenet_t,
+ lrn_list);
+
+ r = rnet->lrn_routes.next;
+
+ while (r != &rnet->lrn_routes) {
+ lnet_route_t *re =
+ list_entry(r, lnet_route_t,
+ lr_list);
+ if (skip == 0) {
+ route = re;
+ break;
+ }
+
+ skip--;
+ r = r->next;
+ }
+
+ n = n->next;
+ }
+ }
+
+ if (route != NULL) {
+ __u32 net = rnet->lrn_net;
+ unsigned int hops = route->lr_hops;
+ lnet_nid_t nid = route->lr_gateway->lp_nid;
+ int alive = route->lr_gateway->lp_alive;
+
+ s += snprintf(s, tmpstr + tmpsiz - s,
+ "%-8s %4u %7s %s\n",
+ libcfs_net2str(net), hops,
+ alive ? "up" : "down",
+ libcfs_nid2str(nid));
+ LASSERT(tmpstr + tmpsiz - s > 0);
+ }
+
+ lnet_net_unlock(0);
+ }
+
+ len = s - tmpstr; /* how many bytes was written */
+
+ if (len > *lenp) { /* linux-supplied buffer is too small */
+ rc = -EINVAL;
+ } else if (len > 0) { /* wrote something */
+ if (copy_to_user(buffer, tmpstr, len))
+ rc = -EFAULT;
+ else {
+ off += 1;
+ *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
+ }
+ }
+
+ LIBCFS_FREE(tmpstr, tmpsiz);
+
+ if (rc == 0)
+ *lenp = len;
+
+ return rc;
+}
+
+int LL_PROC_PROTO(proc_lnet_routers)
+{
+ int rc = 0;
+ char *tmpstr;
+ char *s;
+ const int tmpsiz = 256;
+ int len;
+ int ver;
+ int off;
+
+ DECLARE_LL_PROC_PPOS_DECL;
+
+ off = LNET_PROC_HOFF_GET(*ppos);
+ ver = LNET_PROC_VER_GET(*ppos);
+
+ LASSERT (!write);
+
+ if (*lenp == 0)
+ return 0;
+
+ LIBCFS_ALLOC(tmpstr, tmpsiz);
+ if (tmpstr == NULL)
+ return -ENOMEM;
+
+ s = tmpstr; /* points to current position in tmpstr[] */
+
+ if (*ppos == 0) {
+ s += snprintf(s, tmpstr + tmpsiz - s,
+ "%-4s %7s %9s %6s %12s %9s %8s %7s %s\n",
+ "ref", "rtr_ref", "alive_cnt", "state",
+ "last_ping", "ping_sent", "deadline",
+ "down_ni", "router");
+ LASSERT(tmpstr + tmpsiz - s > 0);
+
+ lnet_net_lock(0);
+ ver = (unsigned int)the_lnet.ln_routers_version;
+ lnet_net_unlock(0);
+ *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
+ } else {
+ struct list_head *r;
+ struct lnet_peer *peer = NULL;
+ int skip = off - 1;
+
+ lnet_net_lock(0);
+
+ if (ver != LNET_PROC_VERSION(the_lnet.ln_routers_version)) {
+ lnet_net_unlock(0);
+
+ LIBCFS_FREE(tmpstr, tmpsiz);
+ return -ESTALE;
+ }
+
+ r = the_lnet.ln_routers.next;
+
+ while (r != &the_lnet.ln_routers) {
+ lnet_peer_t *lp = list_entry(r, lnet_peer_t,
+ lp_rtr_list);
+
+ if (skip == 0) {
+ peer = lp;
+ break;
+ }
+
+ skip--;
+ r = r->next;
+ }
+
+ if (peer != NULL) {
+ lnet_nid_t nid = peer->lp_nid;
+ cfs_time_t now = cfs_time_current();
+ cfs_time_t deadline = peer->lp_ping_deadline;
+ int nrefs = peer->lp_refcount;
+ int nrtrrefs = peer->lp_rtr_refcount;
+ int alive_cnt = peer->lp_alive_count;
+ int alive = peer->lp_alive;
+ int pingsent = !peer->lp_ping_notsent;
+ int last_ping = cfs_duration_sec(cfs_time_sub(now,
+ peer->lp_ping_timestamp));
+ int down_ni = 0;
+ lnet_route_t *rtr;
+
+ if ((peer->lp_ping_feats &
+ LNET_PING_FEAT_NI_STATUS) != 0) {
+ list_for_each_entry(rtr, &peer->lp_routes,
+ lr_gwlist) {
+ /* downis on any route should be the
+ * number of downis on the gateway */
+ if (rtr->lr_downis != 0) {
+ down_ni = rtr->lr_downis;
+ break;
+ }
+ }
+ }
+
+ if (deadline == 0)
+ s += snprintf(s, tmpstr + tmpsiz - s,
+ "%-4d %7d %9d %6s %12d %9d %8s %7d %s\n",
+ nrefs, nrtrrefs, alive_cnt,
+ alive ? "up" : "down", last_ping,
+ pingsent, "NA", down_ni,
+ libcfs_nid2str(nid));
+ else
+ s += snprintf(s, tmpstr + tmpsiz - s,
+ "%-4d %7d %9d %6s %12d %9d %8lu %7d %s\n",
+ nrefs, nrtrrefs, alive_cnt,
+ alive ? "up" : "down", last_ping,
+ pingsent,
+ cfs_duration_sec(cfs_time_sub(deadline, now)),
+ down_ni, libcfs_nid2str(nid));
+ LASSERT (tmpstr + tmpsiz - s > 0);
+ }
+
+ lnet_net_unlock(0);
+ }
+
+ len = s - tmpstr; /* how many bytes was written */
+
+ if (len > *lenp) { /* linux-supplied buffer is too small */
+ rc = -EINVAL;
+ } else if (len > 0) { /* wrote something */
+ if (copy_to_user(buffer, tmpstr, len))
+ rc = -EFAULT;
+ else {
+ off += 1;
+ *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
+ }
+ }
+
+ LIBCFS_FREE(tmpstr, tmpsiz);
+
+ if (rc == 0)
+ *lenp = len;
+
+ return rc;
+}
+
+int LL_PROC_PROTO(proc_lnet_peers)
+{
+ const int tmpsiz = 256;
+ struct lnet_peer_table *ptable;
+ char *tmpstr;
+ char *s;
+ int cpt = LNET_PROC_CPT_GET(*ppos);
+ int ver = LNET_PROC_VER_GET(*ppos);
+ int hash = LNET_PROC_HASH_GET(*ppos);
+ int hoff = LNET_PROC_HOFF_GET(*ppos);
+ int rc = 0;
+ int len;
+
+ CLASSERT(LNET_PROC_HASH_BITS >= LNET_PEER_HASH_BITS);
+ LASSERT(!write);
+
+ if (*lenp == 0)
+ return 0;
+
+ if (cpt >= LNET_CPT_NUMBER) {
+ *lenp = 0;
+ return 0;
+ }
+
+ LIBCFS_ALLOC(tmpstr, tmpsiz);
+ if (tmpstr == NULL)
+ return -ENOMEM;
+
+ s = tmpstr; /* points to current position in tmpstr[] */
+
+ if (*ppos == 0) {
+ s += snprintf(s, tmpstr + tmpsiz - s,
+ "%-24s %4s %5s %5s %5s %5s %5s %5s %5s %s\n",
+ "nid", "refs", "state", "last", "max",
+ "rtr", "min", "tx", "min", "queue");
+ LASSERT (tmpstr + tmpsiz - s > 0);
+
+ hoff++;
+ } else {
+ struct lnet_peer *peer;
+ struct list_head *p;
+ int skip;
+ again:
+ p = NULL;
+ peer = NULL;
+ skip = hoff - 1;
+
+ lnet_net_lock(cpt);
+ ptable = the_lnet.ln_peer_tables[cpt];
+ if (hoff == 1)
+ ver = LNET_PROC_VERSION(ptable->pt_version);
+
+ if (ver != LNET_PROC_VERSION(ptable->pt_version)) {
+ lnet_net_unlock(cpt);
+ LIBCFS_FREE(tmpstr, tmpsiz);
+ return -ESTALE;
+ }
+
+ while (hash < LNET_PEER_HASH_SIZE) {
+ if (p == NULL)
+ p = ptable->pt_hash[hash].next;
+
+ while (p != &ptable->pt_hash[hash]) {
+ lnet_peer_t *lp = list_entry(p, lnet_peer_t,
+ lp_hashlist);
+ if (skip == 0) {
+ peer = lp;
+
+ /* minor optimization: start from idx+1
+ * on next iteration if we've just
+ * drained lp_hashlist */
+ if (lp->lp_hashlist.next ==
+ &ptable->pt_hash[hash]) {
+ hoff = 1;
+ hash++;
+ } else {
+ hoff++;
+ }
+
+ break;
+ }
+
+ skip--;
+ p = lp->lp_hashlist.next;
+ }
+
+ if (peer != NULL)
+ break;
+
+ p = NULL;
+ hoff = 1;
+ hash++;
+ }
+
+ if (peer != NULL) {
+ lnet_nid_t nid = peer->lp_nid;
+ int nrefs = peer->lp_refcount;
+ int lastalive = -1;
+ char *aliveness = "NA";
+ int maxcr = peer->lp_ni->ni_peertxcredits;
+ int txcr = peer->lp_txcredits;
+ int mintxcr = peer->lp_mintxcredits;
+ int rtrcr = peer->lp_rtrcredits;
+ int minrtrcr = peer->lp_minrtrcredits;
+ int txqnob = peer->lp_txqnob;
+
+ if (lnet_isrouter(peer) ||
+ lnet_peer_aliveness_enabled(peer))
+ aliveness = peer->lp_alive ? "up" : "down";
+
+ if (lnet_peer_aliveness_enabled(peer)) {
+ cfs_time_t now = cfs_time_current();
+ cfs_duration_t delta;
+
+ delta = cfs_time_sub(now, peer->lp_last_alive);
+ lastalive = cfs_duration_sec(delta);
+
+ /* No need to mess up peers contents with
+ * arbitrarily long integers - it suffices to
+ * know that lastalive is more than 10000s old
+ */
+ if (lastalive >= 10000)
+ lastalive = 9999;
+ }
+
+ lnet_net_unlock(cpt);
+
+ s += snprintf(s, tmpstr + tmpsiz - s,
+ "%-24s %4d %5s %5d %5d %5d %5d %5d %5d %d\n",
+ libcfs_nid2str(nid), nrefs, aliveness,
+ lastalive, maxcr, rtrcr, minrtrcr, txcr,
+ mintxcr, txqnob);
+ LASSERT (tmpstr + tmpsiz - s > 0);
+
+ } else { /* peer is NULL */
+ lnet_net_unlock(cpt);
+ }
+
+ if (hash == LNET_PEER_HASH_SIZE) {
+ cpt++;
+ hash = 0;
+ hoff = 1;
+ if (peer == NULL && cpt < LNET_CPT_NUMBER)
+ goto again;
+ }
+ }
+
+ len = s - tmpstr; /* how many bytes was written */
+
+ if (len > *lenp) { /* linux-supplied buffer is too small */
+ rc = -EINVAL;
+ } else if (len > 0) { /* wrote something */
+ if (copy_to_user(buffer, tmpstr, len))
+ rc = -EFAULT;
+ else
+ *ppos = LNET_PROC_POS_MAKE(cpt, ver, hash, hoff);
+ }
+
+ LIBCFS_FREE(tmpstr, tmpsiz);
+
+ if (rc == 0)
+ *lenp = len;
+
+ return rc;
+}
+
+static int __proc_lnet_buffers(void *data, int write,
+ loff_t pos, void *buffer, int nob)
+{
+ char *s;
+ char *tmpstr;
+ int tmpsiz;
+ int idx;
+ int len;
+ int rc;
+ int i;
+
+ LASSERT(!write);
+
+ /* (4 %d) * 4 * LNET_CPT_NUMBER */
+ tmpsiz = 64 * (LNET_NRBPOOLS + 1) * LNET_CPT_NUMBER;
+ LIBCFS_ALLOC(tmpstr, tmpsiz);
+ if (tmpstr == NULL)
+ return -ENOMEM;
+
+ s = tmpstr; /* points to current position in tmpstr[] */
+
+ s += snprintf(s, tmpstr + tmpsiz - s,
+ "%5s %5s %7s %7s\n",
+ "pages", "count", "credits", "min");
+ LASSERT (tmpstr + tmpsiz - s > 0);
+
+ if (the_lnet.ln_rtrpools == NULL)
+ goto out; /* I'm not a router */
+
+ for (idx = 0; idx < LNET_NRBPOOLS; idx++) {
+ lnet_rtrbufpool_t *rbp;
+
+ lnet_net_lock(LNET_LOCK_EX);
+ cfs_percpt_for_each(rbp, i, the_lnet.ln_rtrpools) {
+ s += snprintf(s, tmpstr + tmpsiz - s,
+ "%5d %5d %7d %7d\n",
+ rbp[idx].rbp_npages,
+ rbp[idx].rbp_nbuffers,
+ rbp[idx].rbp_credits,
+ rbp[idx].rbp_mincredits);
+ LASSERT(tmpstr + tmpsiz - s > 0);
+ }
+ lnet_net_unlock(LNET_LOCK_EX);
+ }
+
+ out:
+ len = s - tmpstr;
+
+ if (pos >= min_t(int, len, strlen(tmpstr)))
+ rc = 0;
+ else
+ rc = cfs_trace_copyout_string(buffer, nob,
+ tmpstr + pos, NULL);
+
+ LIBCFS_FREE(tmpstr, tmpsiz);
+ return rc;
+}
+
+DECLARE_PROC_HANDLER(proc_lnet_buffers);
+
+int LL_PROC_PROTO(proc_lnet_nis)
+{
+ int tmpsiz = 128 * LNET_CPT_NUMBER;
+ int rc = 0;
+ char *tmpstr;
+ char *s;
+ int len;
+
+ DECLARE_LL_PROC_PPOS_DECL;
+
+ LASSERT (!write);
+
+ if (*lenp == 0)
+ return 0;
+
+ LIBCFS_ALLOC(tmpstr, tmpsiz);
+ if (tmpstr == NULL)
+ return -ENOMEM;
+
+ s = tmpstr; /* points to current position in tmpstr[] */
+
+ if (*ppos == 0) {
+ s += snprintf(s, tmpstr + tmpsiz - s,
+ "%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n",
+ "nid", "status", "alive", "refs", "peer",
+ "rtr", "max", "tx", "min");
+ LASSERT (tmpstr + tmpsiz - s > 0);
+ } else {
+ struct list_head *n;
+ lnet_ni_t *ni = NULL;
+ int skip = *ppos - 1;
+
+ lnet_net_lock(0);
+
+ n = the_lnet.ln_nis.next;
+
+ while (n != &the_lnet.ln_nis) {
+ lnet_ni_t *a_ni = list_entry(n, lnet_ni_t, ni_list);
+
+ if (skip == 0) {
+ ni = a_ni;
+ break;
+ }
+
+ skip--;
+ n = n->next;
+ }
+
+ if (ni != NULL) {
+ struct lnet_tx_queue *tq;
+ char *stat;
+ long now = cfs_time_current_sec();
+ int last_alive = -1;
+ int i;
+ int j;
+
+ if (the_lnet.ln_routing)
+ last_alive = now - ni->ni_last_alive;
+
+ /* @lo forever alive */
+ if (ni->ni_lnd->lnd_type == LOLND)
+ last_alive = 0;
+
+ lnet_ni_lock(ni);
+ LASSERT(ni->ni_status != NULL);
+ stat = (ni->ni_status->ns_status ==
+ LNET_NI_STATUS_UP) ? "up" : "down";
+ lnet_ni_unlock(ni);
+
+ /* we actually output credits information for
+ * TX queue of each partition */
+ cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
+ for (j = 0; ni->ni_cpts != NULL &&
+ j < ni->ni_ncpts; j++) {
+ if (i == ni->ni_cpts[j])
+ break;
+ }
+
+ if (j == ni->ni_ncpts)
+ continue;
+
+ if (i != 0)
+ lnet_net_lock(i);
+
+ s += snprintf(s, tmpstr + tmpsiz - s,
+ "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n",
+ libcfs_nid2str(ni->ni_nid), stat,
+ last_alive, *ni->ni_refs[i],
+ ni->ni_peertxcredits,
+ ni->ni_peerrtrcredits,
+ tq->tq_credits_max,
+ tq->tq_credits, tq->tq_credits_min);
+ if (i != 0)
+ lnet_net_unlock(i);
+ }
+ LASSERT(tmpstr + tmpsiz - s > 0);
+ }
+
+ lnet_net_unlock(0);
+ }
+
+ len = s - tmpstr; /* how many bytes was written */
+
+ if (len > *lenp) { /* linux-supplied buffer is too small */
+ rc = -EINVAL;
+ } else if (len > 0) { /* wrote something */
+ if (copy_to_user(buffer, tmpstr, len))
+ rc = -EFAULT;
+ else
+ *ppos += 1;
+ }
+
+ LIBCFS_FREE(tmpstr, tmpsiz);
+
+ if (rc == 0)
+ *lenp = len;
+
+ return rc;
+}
+
+struct lnet_portal_rotors {
+ int pr_value;
+ const char *pr_name;
+ const char *pr_desc;
+};
+
+static struct lnet_portal_rotors portal_rotors[] = {
+ {
+ .pr_value = LNET_PTL_ROTOR_OFF,
+ .pr_name = "OFF",
+ .pr_desc = "Turn off message rotor for wildcard portals"
+ },
+ {
+ .pr_value = LNET_PTL_ROTOR_ON,
+ .pr_name = "ON",
+ .pr_desc = "round-robin dispatch all PUT messages for "
+ "wildcard portals"
+ },
+ {
+ .pr_value = LNET_PTL_ROTOR_RR_RT,
+ .pr_name = "RR_RT",
+ .pr_desc = "round-robin dispatch routed PUT message for "
+ "wildcard portals"
+ },
+ {
+ .pr_value = LNET_PTL_ROTOR_HASH_RT,
+ .pr_name = "HASH_RT",
+ .pr_desc = "dispatch routed PUT message by hashing source "
+ "NID for wildcard portals"
+ },
+ {
+ .pr_value = -1,
+ .pr_name = NULL,
+ .pr_desc = NULL
+ },
+};
+
+extern int portal_rotor;
+
+static int __proc_lnet_portal_rotor(void *data, int write,
+ loff_t pos, void *buffer, int nob)
+{
+ const int buf_len = 128;
+ char *buf;
+ char *tmp;
+ int rc;
+ int i;
+
+ LIBCFS_ALLOC(buf, buf_len);
+ if (buf == NULL)
+ return -ENOMEM;
+
+ if (!write) {
+ lnet_res_lock(0);
+
+ for (i = 0; portal_rotors[i].pr_value >= 0; i++) {
+ if (portal_rotors[i].pr_value == portal_rotor)
+ break;
+ }
+
+ LASSERT(portal_rotors[i].pr_value == portal_rotor);
+ lnet_res_unlock(0);
+
+ rc = snprintf(buf, buf_len,
+ "{\n\tportals: all\n"
+ "\trotor: %s\n\tdescription: %s\n}",
+ portal_rotors[i].pr_name,
+ portal_rotors[i].pr_desc);
+
+ if (pos >= min_t(int, rc, buf_len)) {
+ rc = 0;
+ } else {
+ rc = cfs_trace_copyout_string(buffer, nob,
+ buf + pos, "\n");
+ }
+ goto out;
+ }
+
+ rc = cfs_trace_copyin_string(buf, buf_len, buffer, nob);
+ if (rc < 0)
+ goto out;
+
+ tmp = cfs_trimwhite(buf);
+
+ rc = -EINVAL;
+ lnet_res_lock(0);
+ for (i = 0; portal_rotors[i].pr_name != NULL; i++) {
+ if (cfs_strncasecmp(portal_rotors[i].pr_name, tmp,
+ strlen(portal_rotors[i].pr_name)) == 0) {
+ portal_rotor = portal_rotors[i].pr_value;
+ rc = 0;
+ break;
+ }
+ }
+ lnet_res_unlock(0);
+out:
+ LIBCFS_FREE(buf, buf_len);
+ return rc;
+}
+DECLARE_PROC_HANDLER(proc_lnet_portal_rotor);
+
+static ctl_table_t lnet_table[] = {
+ /*
+ * NB No .strategy entries have been provided since sysctl(8) prefers
+ * to go via /proc for portability.
+ */
+ {
+ INIT_CTL_NAME(PSDEV_LNET_STATS)
+ .procname = "stats",
+ .mode = 0644,
+ .proc_handler = &proc_lnet_stats,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_ROUTES)
+ .procname = "routes",
+ .mode = 0444,
+ .proc_handler = &proc_lnet_routes,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_ROUTERS)
+ .procname = "routers",
+ .mode = 0444,
+ .proc_handler = &proc_lnet_routers,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_PEERS)
+ .procname = "peers",
+ .mode = 0444,
+ .proc_handler = &proc_lnet_peers,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_PEERS)
+ .procname = "buffers",
+ .mode = 0444,
+ .proc_handler = &proc_lnet_buffers,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_NIS)
+ .procname = "nis",
+ .mode = 0444,
+ .proc_handler = &proc_lnet_nis,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_PTL_ROTOR)
+ .procname = "portal_rotor",
+ .mode = 0644,
+ .proc_handler = &proc_lnet_portal_rotor,
+ },
+ {
+ INIT_CTL_NAME(0)
+ }
+};
+
+static ctl_table_t top_table[] = {
+ {
+ INIT_CTL_NAME(CTL_LNET)
+ .procname = "lnet",
+ .mode = 0555,
+ .data = NULL,
+ .maxlen = 0,
+ .child = lnet_table,
+ },
+ {
+ INIT_CTL_NAME(0)
+ }
+};
+
+void
+lnet_proc_init(void)
+{
+#ifdef CONFIG_SYSCTL
+ if (lnet_table_header == NULL)
+ lnet_table_header = cfs_register_sysctl_table(top_table, 0);
+#endif
+}
+
+void
+lnet_proc_fini(void)
+{
+#ifdef CONFIG_SYSCTL
+ if (lnet_table_header != NULL)
+ unregister_sysctl_table(lnet_table_header);
+
+ lnet_table_header = NULL;
+#endif
+}
+
+#else
+
+void
+lnet_proc_init(void)
+{
+}
+
+void
+lnet_proc_fini(void)
+{
+}
+
+#endif
diff --git a/drivers/staging/lustre/lnet/selftest/Makefile b/drivers/staging/lustre/lnet/selftest/Makefile
new file mode 100644
index 000000000000..1e40aeea2962
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/Makefile
@@ -0,0 +1,6 @@
+obj-$(CONFIG_LNET_SELFTEST) := lnet_selftest.o
+
+lnet_selftest-y := console.o conrpc.o conctl.o framework.o timer.o rpc.o \
+ module.o ping_test.o brw_test.o
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c
new file mode 100644
index 000000000000..3bb6fbe23f78
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/brw_test.c
@@ -0,0 +1,499 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/selftest/brw_test.c
+ *
+ * Author: Isaac Huang <isaac@clusterfs.com>
+ */
+
+#include "selftest.h"
+
+static int brw_srv_workitems = SFW_TEST_WI_MAX;
+CFS_MODULE_PARM(brw_srv_workitems, "i", int, 0644, "# BRW server workitems");
+
+static int brw_inject_errors;
+CFS_MODULE_PARM(brw_inject_errors, "i", int, 0644,
+ "# data errors to inject randomly, zero by default");
+
+static void
+brw_client_fini (sfw_test_instance_t *tsi)
+{
+ srpc_bulk_t *bulk;
+ sfw_test_unit_t *tsu;
+
+ LASSERT (tsi->tsi_is_client);
+
+ list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) {
+ bulk = tsu->tsu_private;
+ if (bulk == NULL) continue;
+
+ srpc_free_bulk(bulk);
+ tsu->tsu_private = NULL;
+ }
+}
+
+int
+brw_client_init (sfw_test_instance_t *tsi)
+{
+ sfw_session_t *sn = tsi->tsi_batch->bat_session;
+ int flags;
+ int npg;
+ int len;
+ int opc;
+ srpc_bulk_t *bulk;
+ sfw_test_unit_t *tsu;
+
+ LASSERT(sn != NULL);
+ LASSERT(tsi->tsi_is_client);
+
+ if ((sn->sn_features & LST_FEAT_BULK_LEN) == 0) {
+ test_bulk_req_t *breq = &tsi->tsi_u.bulk_v0;
+
+ opc = breq->blk_opc;
+ flags = breq->blk_flags;
+ npg = breq->blk_npg;
+ /* NB: this is not going to work for variable page size,
+ * but we have to keep it for compatibility */
+ len = npg * PAGE_CACHE_SIZE;
+
+ } else {
+ test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1;
+
+ /* I should never get this step if it's unknown feature
+ * because make_session will reject unknown feature */
+ LASSERT((sn->sn_features & ~LST_FEATS_MASK) == 0);
+
+ opc = breq->blk_opc;
+ flags = breq->blk_flags;
+ len = breq->blk_len;
+ npg = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ }
+
+ if (npg > LNET_MAX_IOV || npg <= 0)
+ return -EINVAL;
+
+ if (opc != LST_BRW_READ && opc != LST_BRW_WRITE)
+ return -EINVAL;
+
+ if (flags != LST_BRW_CHECK_NONE &&
+ flags != LST_BRW_CHECK_FULL && flags != LST_BRW_CHECK_SIMPLE)
+ return -EINVAL;
+
+ list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
+ bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid),
+ npg, len, opc == LST_BRW_READ);
+ if (bulk == NULL) {
+ brw_client_fini(tsi);
+ return -ENOMEM;
+ }
+
+ tsu->tsu_private = bulk;
+ }
+
+ return 0;
+}
+
+#define BRW_POISON 0xbeefbeefbeefbeefULL
+#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL
+#define BRW_MSIZE sizeof(__u64)
+
+int
+brw_inject_one_error (void)
+{
+ struct timeval tv;
+
+ if (brw_inject_errors <= 0) return 0;
+
+ do_gettimeofday(&tv);
+
+ if ((tv.tv_usec & 1) == 0) return 0;
+
+ return brw_inject_errors--;
+}
+
+void
+brw_fill_page (struct page *pg, int pattern, __u64 magic)
+{
+ char *addr = page_address(pg);
+ int i;
+
+ LASSERT (addr != NULL);
+
+ if (pattern == LST_BRW_CHECK_NONE) return;
+
+ if (magic == BRW_MAGIC)
+ magic += brw_inject_one_error();
+
+ if (pattern == LST_BRW_CHECK_SIMPLE) {
+ memcpy(addr, &magic, BRW_MSIZE);
+ addr += PAGE_CACHE_SIZE - BRW_MSIZE;
+ memcpy(addr, &magic, BRW_MSIZE);
+ return;
+ }
+
+ if (pattern == LST_BRW_CHECK_FULL) {
+ for (i = 0; i < PAGE_CACHE_SIZE / BRW_MSIZE; i++)
+ memcpy(addr + i * BRW_MSIZE, &magic, BRW_MSIZE);
+ return;
+ }
+
+ LBUG ();
+ return;
+}
+
+int
+brw_check_page (struct page *pg, int pattern, __u64 magic)
+{
+ char *addr = page_address(pg);
+ __u64 data = 0; /* make compiler happy */
+ int i;
+
+ LASSERT (addr != NULL);
+
+ if (pattern == LST_BRW_CHECK_NONE)
+ return 0;
+
+ if (pattern == LST_BRW_CHECK_SIMPLE) {
+ data = *((__u64 *) addr);
+ if (data != magic) goto bad_data;
+
+ addr += PAGE_CACHE_SIZE - BRW_MSIZE;
+ data = *((__u64 *) addr);
+ if (data != magic) goto bad_data;
+
+ return 0;
+ }
+
+ if (pattern == LST_BRW_CHECK_FULL) {
+ for (i = 0; i < PAGE_CACHE_SIZE / BRW_MSIZE; i++) {
+ data = *(((__u64 *) addr) + i);
+ if (data != magic) goto bad_data;
+ }
+
+ return 0;
+ }
+
+ LBUG ();
+
+bad_data:
+ CERROR ("Bad data in page %p: "LPX64", "LPX64" expected\n",
+ pg, data, magic);
+ return 1;
+}
+
+void
+brw_fill_bulk (srpc_bulk_t *bk, int pattern, __u64 magic)
+{
+ int i;
+ struct page *pg;
+
+ for (i = 0; i < bk->bk_niov; i++) {
+ pg = bk->bk_iovs[i].kiov_page;
+ brw_fill_page(pg, pattern, magic);
+ }
+}
+
+int
+brw_check_bulk (srpc_bulk_t *bk, int pattern, __u64 magic)
+{
+ int i;
+ struct page *pg;
+
+ for (i = 0; i < bk->bk_niov; i++) {
+ pg = bk->bk_iovs[i].kiov_page;
+ if (brw_check_page(pg, pattern, magic) != 0) {
+ CERROR ("Bulk page %p (%d/%d) is corrupted!\n",
+ pg, i, bk->bk_niov);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+brw_client_prep_rpc (sfw_test_unit_t *tsu,
+ lnet_process_id_t dest, srpc_client_rpc_t **rpcpp)
+{
+ srpc_bulk_t *bulk = tsu->tsu_private;
+ sfw_test_instance_t *tsi = tsu->tsu_instance;
+ sfw_session_t *sn = tsi->tsi_batch->bat_session;
+ srpc_client_rpc_t *rpc;
+ srpc_brw_reqst_t *req;
+ int flags;
+ int npg;
+ int len;
+ int opc;
+ int rc;
+
+ LASSERT(sn != NULL);
+ LASSERT(bulk != NULL);
+
+ if ((sn->sn_features & LST_FEAT_BULK_LEN) == 0) {
+ test_bulk_req_t *breq = &tsi->tsi_u.bulk_v0;
+
+ opc = breq->blk_opc;
+ flags = breq->blk_flags;
+ npg = breq->blk_npg;
+ len = npg * PAGE_CACHE_SIZE;
+
+ } else {
+ test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1;
+
+ /* I should never get this step if it's unknown feature
+ * because make_session will reject unknown feature */
+ LASSERT((sn->sn_features & ~LST_FEATS_MASK) == 0);
+
+ opc = breq->blk_opc;
+ flags = breq->blk_flags;
+ len = breq->blk_len;
+ npg = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ }
+
+ rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc);
+ if (rc != 0)
+ return rc;
+
+ memcpy(&rpc->crpc_bulk, bulk, offsetof(srpc_bulk_t, bk_iovs[npg]));
+ if (opc == LST_BRW_WRITE)
+ brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_MAGIC);
+ else
+ brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_POISON);
+
+ req = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
+ req->brw_flags = flags;
+ req->brw_rw = opc;
+ req->brw_len = len;
+
+ *rpcpp = rpc;
+ return 0;
+}
+
+static void
+brw_client_done_rpc (sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc)
+{
+ __u64 magic = BRW_MAGIC;
+ sfw_test_instance_t *tsi = tsu->tsu_instance;
+ sfw_session_t *sn = tsi->tsi_batch->bat_session;
+ srpc_msg_t *msg = &rpc->crpc_replymsg;
+ srpc_brw_reply_t *reply = &msg->msg_body.brw_reply;
+ srpc_brw_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
+
+ LASSERT (sn != NULL);
+
+ if (rpc->crpc_status != 0) {
+ CERROR ("BRW RPC to %s failed with %d\n",
+ libcfs_id2str(rpc->crpc_dest), rpc->crpc_status);
+ if (!tsi->tsi_stopping) /* rpc could have been aborted */
+ atomic_inc(&sn->sn_brw_errors);
+ goto out;
+ }
+
+ if (msg->msg_magic != SRPC_MSG_MAGIC) {
+ __swab64s(&magic);
+ __swab32s(&reply->brw_status);
+ }
+
+ CDEBUG (reply->brw_status ? D_WARNING : D_NET,
+ "BRW RPC to %s finished with brw_status: %d\n",
+ libcfs_id2str(rpc->crpc_dest), reply->brw_status);
+
+ if (reply->brw_status != 0) {
+ atomic_inc(&sn->sn_brw_errors);
+ rpc->crpc_status = -(int)reply->brw_status;
+ goto out;
+ }
+
+ if (reqst->brw_rw == LST_BRW_WRITE) goto out;
+
+ if (brw_check_bulk(&rpc->crpc_bulk, reqst->brw_flags, magic) != 0) {
+ CERROR ("Bulk data from %s is corrupted!\n",
+ libcfs_id2str(rpc->crpc_dest));
+ atomic_inc(&sn->sn_brw_errors);
+ rpc->crpc_status = -EBADMSG;
+ }
+
+out:
+ return;
+}
+
+void
+brw_server_rpc_done (srpc_server_rpc_t *rpc)
+{
+ srpc_bulk_t *blk = rpc->srpc_bulk;
+
+ if (blk == NULL) return;
+
+ if (rpc->srpc_status != 0)
+ CERROR ("Bulk transfer %s %s has failed: %d\n",
+ blk->bk_sink ? "from" : "to",
+ libcfs_id2str(rpc->srpc_peer), rpc->srpc_status);
+ else
+ CDEBUG (D_NET, "Transfered %d pages bulk data %s %s\n",
+ blk->bk_niov, blk->bk_sink ? "from" : "to",
+ libcfs_id2str(rpc->srpc_peer));
+
+ sfw_free_pages(rpc);
+}
+
+int
+brw_bulk_ready (srpc_server_rpc_t *rpc, int status)
+{
+ __u64 magic = BRW_MAGIC;
+ srpc_brw_reply_t *reply = &rpc->srpc_replymsg.msg_body.brw_reply;
+ srpc_brw_reqst_t *reqst;
+ srpc_msg_t *reqstmsg;
+
+ LASSERT (rpc->srpc_bulk != NULL);
+ LASSERT (rpc->srpc_reqstbuf != NULL);
+
+ reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
+ reqst = &reqstmsg->msg_body.brw_reqst;
+
+ if (status != 0) {
+ CERROR ("BRW bulk %s failed for RPC from %s: %d\n",
+ reqst->brw_rw == LST_BRW_READ ? "READ" : "WRITE",
+ libcfs_id2str(rpc->srpc_peer), status);
+ return -EIO;
+ }
+
+ if (reqst->brw_rw == LST_BRW_READ)
+ return 0;
+
+ if (reqstmsg->msg_magic != SRPC_MSG_MAGIC)
+ __swab64s(&magic);
+
+ if (brw_check_bulk(rpc->srpc_bulk, reqst->brw_flags, magic) != 0) {
+ CERROR ("Bulk data from %s is corrupted!\n",
+ libcfs_id2str(rpc->srpc_peer));
+ reply->brw_status = EBADMSG;
+ }
+
+ return 0;
+}
+
+int
+brw_server_handle(struct srpc_server_rpc *rpc)
+{
+ struct srpc_service *sv = rpc->srpc_scd->scd_svc;
+ srpc_msg_t *replymsg = &rpc->srpc_replymsg;
+ srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
+ srpc_brw_reply_t *reply = &replymsg->msg_body.brw_reply;
+ srpc_brw_reqst_t *reqst = &reqstmsg->msg_body.brw_reqst;
+ int npg;
+ int rc;
+
+ LASSERT (sv->sv_id == SRPC_SERVICE_BRW);
+
+ if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
+ LASSERT (reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
+
+ __swab32s(&reqst->brw_rw);
+ __swab32s(&reqst->brw_len);
+ __swab32s(&reqst->brw_flags);
+ __swab64s(&reqst->brw_rpyid);
+ __swab64s(&reqst->brw_bulkid);
+ }
+ LASSERT (reqstmsg->msg_type == (__u32)srpc_service2request(sv->sv_id));
+
+ reply->brw_status = 0;
+ rpc->srpc_done = brw_server_rpc_done;
+
+ if ((reqst->brw_rw != LST_BRW_READ && reqst->brw_rw != LST_BRW_WRITE) ||
+ (reqst->brw_flags != LST_BRW_CHECK_NONE &&
+ reqst->brw_flags != LST_BRW_CHECK_FULL &&
+ reqst->brw_flags != LST_BRW_CHECK_SIMPLE)) {
+ reply->brw_status = EINVAL;
+ return 0;
+ }
+
+ if ((reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) != 0) {
+ replymsg->msg_ses_feats = LST_FEATS_MASK;
+ reply->brw_status = EPROTO;
+ return 0;
+ }
+
+ if ((reqstmsg->msg_ses_feats & LST_FEAT_BULK_LEN) == 0) {
+ /* compat with old version */
+ if ((reqst->brw_len & ~CFS_PAGE_MASK) != 0) {
+ reply->brw_status = EINVAL;
+ return 0;
+ }
+ npg = reqst->brw_len >> PAGE_CACHE_SHIFT;
+
+ } else {
+ npg = (reqst->brw_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ }
+
+ replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
+
+ if (reqst->brw_len == 0 || npg > LNET_MAX_IOV) {
+ reply->brw_status = EINVAL;
+ return 0;
+ }
+
+ rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg,
+ reqst->brw_len,
+ reqst->brw_rw == LST_BRW_WRITE);
+ if (rc != 0)
+ return rc;
+
+ if (reqst->brw_rw == LST_BRW_READ)
+ brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC);
+ else
+ brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON);
+
+ return 0;
+}
+
+sfw_test_client_ops_t brw_test_client;
+void brw_init_test_client(void)
+{
+ brw_test_client.tso_init = brw_client_init;
+ brw_test_client.tso_fini = brw_client_fini;
+ brw_test_client.tso_prep_rpc = brw_client_prep_rpc;
+ brw_test_client.tso_done_rpc = brw_client_done_rpc;
+};
+
+srpc_service_t brw_test_service;
+void brw_init_test_service(void)
+{
+
+ brw_test_service.sv_id = SRPC_SERVICE_BRW;
+ brw_test_service.sv_name = "brw_test";
+ brw_test_service.sv_handler = brw_server_handle;
+ brw_test_service.sv_bulk_ready = brw_bulk_ready;
+ brw_test_service.sv_wi_total = brw_srv_workitems;
+}
diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c
new file mode 100644
index 000000000000..bce3d3bde6b2
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/conctl.c
@@ -0,0 +1,931 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/selftest/conctl.c
+ *
+ * IOC handle in kernel
+ *
+ * Author: Liang Zhen <liangzhen@clusterfs.com>
+ */
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/lib-lnet.h>
+#include <linux/lnet/lnetst.h>
+#include "console.h"
+
+int
+lst_session_new_ioctl(lstio_session_new_args_t *args)
+{
+ char *name;
+ int rc;
+
+ if (args->lstio_ses_idp == NULL || /* address for output sid */
+ args->lstio_ses_key == 0 || /* no key is specified */
+ args->lstio_ses_namep == NULL || /* session name */
+ args->lstio_ses_nmlen <= 0 ||
+ args->lstio_ses_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ LIBCFS_ALLOC(name, args->lstio_ses_nmlen + 1);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name,
+ args->lstio_ses_namep,
+ args->lstio_ses_nmlen)) {
+ LIBCFS_FREE(name, args->lstio_ses_nmlen + 1);
+ return -EFAULT;
+ }
+
+ name[args->lstio_ses_nmlen] = 0;
+
+ rc = lstcon_session_new(name,
+ args->lstio_ses_key,
+ args->lstio_ses_feats,
+ args->lstio_ses_force,
+ args->lstio_ses_timeout,
+ args->lstio_ses_idp);
+
+ LIBCFS_FREE(name, args->lstio_ses_nmlen + 1);
+ return rc;
+}
+
+int
+lst_session_end_ioctl(lstio_session_end_args_t *args)
+{
+ if (args->lstio_ses_key != console_session.ses_key)
+ return -EACCES;
+
+ return lstcon_session_end();
+}
+
+int
+lst_session_info_ioctl(lstio_session_info_args_t *args)
+{
+ /* no checking of key */
+
+ if (args->lstio_ses_idp == NULL || /* address for ouput sid */
+ args->lstio_ses_keyp == NULL || /* address for ouput key */
+ args->lstio_ses_featp == NULL || /* address for ouput features */
+ args->lstio_ses_ndinfo == NULL || /* address for output ndinfo */
+ args->lstio_ses_namep == NULL || /* address for ouput name */
+ args->lstio_ses_nmlen <= 0 ||
+ args->lstio_ses_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ return lstcon_session_info(args->lstio_ses_idp,
+ args->lstio_ses_keyp,
+ args->lstio_ses_featp,
+ args->lstio_ses_ndinfo,
+ args->lstio_ses_namep,
+ args->lstio_ses_nmlen);
+}
+
+int
+lst_debug_ioctl(lstio_debug_args_t *args)
+{
+ char *name = NULL;
+ int client = 1;
+ int rc;
+
+ if (args->lstio_dbg_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_dbg_resultp == NULL)
+ return -EINVAL;
+
+ if (args->lstio_dbg_namep != NULL && /* name of batch/group */
+ (args->lstio_dbg_nmlen <= 0 ||
+ args->lstio_dbg_nmlen > LST_NAME_SIZE))
+ return -EINVAL;
+
+ if (args->lstio_dbg_namep != NULL) {
+ LIBCFS_ALLOC(name, args->lstio_dbg_nmlen + 1);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name, args->lstio_dbg_namep,
+ args->lstio_dbg_nmlen)) {
+ LIBCFS_FREE(name, args->lstio_dbg_nmlen + 1);
+
+ return -EFAULT;
+ }
+
+ name[args->lstio_dbg_nmlen] = 0;
+ }
+
+ rc = -EINVAL;
+
+ switch (args->lstio_dbg_type) {
+ case LST_OPC_SESSION:
+ rc = lstcon_session_debug(args->lstio_dbg_timeout,
+ args->lstio_dbg_resultp);
+ break;
+
+ case LST_OPC_BATCHSRV:
+ client = 0;
+ case LST_OPC_BATCHCLI:
+ if (name == NULL)
+ goto out;
+
+ rc = lstcon_batch_debug(args->lstio_dbg_timeout,
+ name, client, args->lstio_dbg_resultp);
+ break;
+
+ case LST_OPC_GROUP:
+ if (name == NULL)
+ goto out;
+
+ rc = lstcon_group_debug(args->lstio_dbg_timeout,
+ name, args->lstio_dbg_resultp);
+ break;
+
+ case LST_OPC_NODES:
+ if (args->lstio_dbg_count <= 0 ||
+ args->lstio_dbg_idsp == NULL)
+ goto out;
+
+ rc = lstcon_nodes_debug(args->lstio_dbg_timeout,
+ args->lstio_dbg_count,
+ args->lstio_dbg_idsp,
+ args->lstio_dbg_resultp);
+ break;
+
+ default:
+ break;
+ }
+
+out:
+ if (name != NULL)
+ LIBCFS_FREE(name, args->lstio_dbg_nmlen + 1);
+
+ return rc;
+}
+
+int
+lst_group_add_ioctl(lstio_group_add_args_t *args)
+{
+ char *name;
+ int rc;
+
+ if (args->lstio_grp_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_grp_namep == NULL||
+ args->lstio_grp_nmlen <= 0 ||
+ args->lstio_grp_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name,
+ args->lstio_grp_namep,
+ args->lstio_grp_nmlen)) {
+ LIBCFS_FREE(name, args->lstio_grp_nmlen);
+ return -EFAULT;
+ }
+
+ name[args->lstio_grp_nmlen] = 0;
+
+ rc = lstcon_group_add(name);
+
+ LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
+
+ return rc;
+}
+
+int
+lst_group_del_ioctl(lstio_group_del_args_t *args)
+{
+ int rc;
+ char *name;
+
+ if (args->lstio_grp_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_grp_namep == NULL ||
+ args->lstio_grp_nmlen <= 0 ||
+ args->lstio_grp_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name,
+ args->lstio_grp_namep,
+ args->lstio_grp_nmlen)) {
+ LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
+ return -EFAULT;
+ }
+
+ name[args->lstio_grp_nmlen] = 0;
+
+ rc = lstcon_group_del(name);
+
+ LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
+
+ return rc;
+}
+
+int
+lst_group_update_ioctl(lstio_group_update_args_t *args)
+{
+ int rc;
+ char *name;
+
+ if (args->lstio_grp_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_grp_resultp == NULL ||
+ args->lstio_grp_namep == NULL ||
+ args->lstio_grp_nmlen <= 0 ||
+ args->lstio_grp_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name,
+ args->lstio_grp_namep,
+ args->lstio_grp_nmlen)) {
+ LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
+ return -EFAULT;
+ }
+
+ name[args->lstio_grp_nmlen] = 0;
+
+ switch (args->lstio_grp_opc) {
+ case LST_GROUP_CLEAN:
+ rc = lstcon_group_clean(name, args->lstio_grp_args);
+ break;
+
+ case LST_GROUP_REFRESH:
+ rc = lstcon_group_refresh(name, args->lstio_grp_resultp);
+ break;
+
+ case LST_GROUP_RMND:
+ if (args->lstio_grp_count <= 0 ||
+ args->lstio_grp_idsp == NULL) {
+ rc = -EINVAL;
+ break;
+ }
+ rc = lstcon_nodes_remove(name, args->lstio_grp_count,
+ args->lstio_grp_idsp,
+ args->lstio_grp_resultp);
+ break;
+
+ default:
+ rc = -EINVAL;
+ break;
+ }
+
+ LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
+
+ return rc;
+}
+
+int
+lst_nodes_add_ioctl(lstio_group_nodes_args_t *args)
+{
+ unsigned feats;
+ int rc;
+ char *name;
+
+ if (args->lstio_grp_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_grp_idsp == NULL || /* array of ids */
+ args->lstio_grp_count <= 0 ||
+ args->lstio_grp_resultp == NULL ||
+ args->lstio_grp_featp == NULL ||
+ args->lstio_grp_namep == NULL ||
+ args->lstio_grp_nmlen <= 0 ||
+ args->lstio_grp_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name, args->lstio_grp_namep,
+ args->lstio_grp_nmlen)) {
+ LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
+
+ return -EFAULT;
+ }
+
+ name[args->lstio_grp_nmlen] = 0;
+
+ rc = lstcon_nodes_add(name, args->lstio_grp_count,
+ args->lstio_grp_idsp, &feats,
+ args->lstio_grp_resultp);
+
+ LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
+ if (rc == 0 &&
+ copy_to_user(args->lstio_grp_featp, &feats, sizeof(feats))) {
+ return -EINVAL;
+ }
+
+ return rc;
+}
+
+int
+lst_group_list_ioctl(lstio_group_list_args_t *args)
+{
+ if (args->lstio_grp_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_grp_idx < 0 ||
+ args->lstio_grp_namep == NULL ||
+ args->lstio_grp_nmlen <= 0 ||
+ args->lstio_grp_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ return lstcon_group_list(args->lstio_grp_idx,
+ args->lstio_grp_nmlen,
+ args->lstio_grp_namep);
+}
+
+int
+lst_group_info_ioctl(lstio_group_info_args_t *args)
+{
+ char *name;
+ int ndent;
+ int index;
+ int rc;
+
+ if (args->lstio_grp_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_grp_namep == NULL ||
+ args->lstio_grp_nmlen <= 0 ||
+ args->lstio_grp_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ if (args->lstio_grp_entp == NULL && /* output: group entry */
+ args->lstio_grp_dentsp == NULL) /* output: node entry */
+ return -EINVAL;
+
+ if (args->lstio_grp_dentsp != NULL) { /* have node entry */
+ if (args->lstio_grp_idxp == NULL || /* node index */
+ args->lstio_grp_ndentp == NULL) /* # of node entry */
+ return -EINVAL;
+
+ if (copy_from_user(&ndent, args->lstio_grp_ndentp,
+ sizeof(ndent)) ||
+ copy_from_user(&index, args->lstio_grp_idxp,
+ sizeof(index)))
+ return -EFAULT;
+
+ if (ndent <= 0 || index < 0)
+ return -EINVAL;
+ }
+
+ LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name,
+ args->lstio_grp_namep,
+ args->lstio_grp_nmlen)) {
+ LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
+ return -EFAULT;
+ }
+
+ name[args->lstio_grp_nmlen] = 0;
+
+ rc = lstcon_group_info(name, args->lstio_grp_entp,
+ &index, &ndent, args->lstio_grp_dentsp);
+
+ LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
+
+ if (rc != 0)
+ return rc;
+
+ if (args->lstio_grp_dentsp != NULL &&
+ (copy_to_user(args->lstio_grp_idxp, &index, sizeof(index)) ||
+ copy_to_user(args->lstio_grp_ndentp, &ndent, sizeof(ndent))))
+ rc = -EFAULT;
+
+ return 0;
+}
+
+int
+lst_batch_add_ioctl(lstio_batch_add_args_t *args)
+{
+ int rc;
+ char *name;
+
+ if (args->lstio_bat_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_bat_namep == NULL ||
+ args->lstio_bat_nmlen <= 0 ||
+ args->lstio_bat_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name,
+ args->lstio_bat_namep,
+ args->lstio_bat_nmlen)) {
+ LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
+ return -EFAULT;
+ }
+
+ name[args->lstio_bat_nmlen] = 0;
+
+ rc = lstcon_batch_add(name);
+
+ LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
+
+ return rc;
+}
+
+int
+lst_batch_run_ioctl(lstio_batch_run_args_t *args)
+{
+ int rc;
+ char *name;
+
+ if (args->lstio_bat_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_bat_namep == NULL ||
+ args->lstio_bat_nmlen <= 0 ||
+ args->lstio_bat_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name,
+ args->lstio_bat_namep,
+ args->lstio_bat_nmlen)) {
+ LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
+ return -EFAULT;
+ }
+
+ name[args->lstio_bat_nmlen] = 0;
+
+ rc = lstcon_batch_run(name, args->lstio_bat_timeout,
+ args->lstio_bat_resultp);
+
+ LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
+
+ return rc;
+}
+
+int
+lst_batch_stop_ioctl(lstio_batch_stop_args_t *args)
+{
+ int rc;
+ char *name;
+
+ if (args->lstio_bat_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_bat_resultp == NULL ||
+ args->lstio_bat_namep == NULL ||
+ args->lstio_bat_nmlen <= 0 ||
+ args->lstio_bat_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name,
+ args->lstio_bat_namep,
+ args->lstio_bat_nmlen)) {
+ LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
+ return -EFAULT;
+ }
+
+ name[args->lstio_bat_nmlen] = 0;
+
+ rc = lstcon_batch_stop(name, args->lstio_bat_force,
+ args->lstio_bat_resultp);
+
+ LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
+
+ return rc;
+}
+
+int
+lst_batch_query_ioctl(lstio_batch_query_args_t *args)
+{
+ char *name;
+ int rc;
+
+ if (args->lstio_bat_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_bat_resultp == NULL ||
+ args->lstio_bat_namep == NULL ||
+ args->lstio_bat_nmlen <= 0 ||
+ args->lstio_bat_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ if (args->lstio_bat_testidx < 0)
+ return -EINVAL;
+
+ LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name,
+ args->lstio_bat_namep,
+ args->lstio_bat_nmlen)) {
+ LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
+ return -EFAULT;
+ }
+
+ name[args->lstio_bat_nmlen] = 0;
+
+ rc = lstcon_test_batch_query(name,
+ args->lstio_bat_testidx,
+ args->lstio_bat_client,
+ args->lstio_bat_timeout,
+ args->lstio_bat_resultp);
+
+ LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
+
+ return rc;
+}
+
+int
+lst_batch_list_ioctl(lstio_batch_list_args_t *args)
+{
+ if (args->lstio_bat_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_bat_idx < 0 ||
+ args->lstio_bat_namep == NULL ||
+ args->lstio_bat_nmlen <= 0 ||
+ args->lstio_bat_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ return lstcon_batch_list(args->lstio_bat_idx,
+ args->lstio_bat_nmlen,
+ args->lstio_bat_namep);
+}
+
+int
+lst_batch_info_ioctl(lstio_batch_info_args_t *args)
+{
+ char *name;
+ int rc;
+ int index;
+ int ndent;
+
+ if (args->lstio_bat_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_bat_namep == NULL || /* batch name */
+ args->lstio_bat_nmlen <= 0 ||
+ args->lstio_bat_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ if (args->lstio_bat_entp == NULL && /* output: batch entry */
+ args->lstio_bat_dentsp == NULL) /* output: node entry */
+ return -EINVAL;
+
+ if (args->lstio_bat_dentsp != NULL) { /* have node entry */
+ if (args->lstio_bat_idxp == NULL || /* node index */
+ args->lstio_bat_ndentp == NULL) /* # of node entry */
+ return -EINVAL;
+
+ if (copy_from_user(&index, args->lstio_bat_idxp,
+ sizeof(index)) ||
+ copy_from_user(&ndent, args->lstio_bat_ndentp,
+ sizeof(ndent)))
+ return -EFAULT;
+
+ if (ndent <= 0 || index < 0)
+ return -EINVAL;
+ }
+
+ LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name,
+ args->lstio_bat_namep, args->lstio_bat_nmlen)) {
+ LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
+ return -EFAULT;
+ }
+
+ name[args->lstio_bat_nmlen] = 0;
+
+ rc = lstcon_batch_info(name,
+ args->lstio_bat_entp, args->lstio_bat_server,
+ args->lstio_bat_testidx, &index, &ndent,
+ args->lstio_bat_dentsp);
+
+ LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
+
+ if (rc != 0)
+ return rc;
+
+ if (args->lstio_bat_dentsp != NULL &&
+ (copy_to_user(args->lstio_bat_idxp, &index, sizeof(index)) ||
+ copy_to_user(args->lstio_bat_ndentp, &ndent, sizeof(ndent))))
+ rc = -EFAULT;
+
+ return rc;
+}
+
+int
+lst_stat_query_ioctl(lstio_stat_args_t *args)
+{
+ int rc;
+ char *name;
+
+ /* TODO: not finished */
+ if (args->lstio_sta_key != console_session.ses_key)
+ return -EACCES;
+
+ if (args->lstio_sta_resultp == NULL ||
+ (args->lstio_sta_namep == NULL &&
+ args->lstio_sta_idsp == NULL) ||
+ args->lstio_sta_nmlen <= 0 ||
+ args->lstio_sta_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ if (args->lstio_sta_idsp != NULL &&
+ args->lstio_sta_count <= 0)
+ return -EINVAL;
+
+ LIBCFS_ALLOC(name, args->lstio_sta_nmlen + 1);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name, args->lstio_sta_namep,
+ args->lstio_sta_nmlen)) {
+ LIBCFS_FREE(name, args->lstio_sta_nmlen + 1);
+ return -EFAULT;
+ }
+
+ if (args->lstio_sta_idsp == NULL) {
+ rc = lstcon_group_stat(name, args->lstio_sta_timeout,
+ args->lstio_sta_resultp);
+ } else {
+ rc = lstcon_nodes_stat(args->lstio_sta_count,
+ args->lstio_sta_idsp,
+ args->lstio_sta_timeout,
+ args->lstio_sta_resultp);
+ }
+
+ LIBCFS_FREE(name, args->lstio_sta_nmlen + 1);
+
+ return rc;
+}
+
+int lst_test_add_ioctl(lstio_test_args_t *args)
+{
+ char *name;
+ char *srcgrp = NULL;
+ char *dstgrp = NULL;
+ void *param = NULL;
+ int ret = 0;
+ int rc = -ENOMEM;
+
+ if (args->lstio_tes_resultp == NULL ||
+ args->lstio_tes_retp == NULL ||
+ args->lstio_tes_bat_name == NULL || /* no specified batch */
+ args->lstio_tes_bat_nmlen <= 0 ||
+ args->lstio_tes_bat_nmlen > LST_NAME_SIZE ||
+ args->lstio_tes_sgrp_name == NULL || /* no source group */
+ args->lstio_tes_sgrp_nmlen <= 0 ||
+ args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE ||
+ args->lstio_tes_dgrp_name == NULL || /* no target group */
+ args->lstio_tes_dgrp_nmlen <= 0 ||
+ args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE)
+ return -EINVAL;
+
+ if (args->lstio_tes_loop == 0 || /* negative is infinite */
+ args->lstio_tes_concur <= 0 ||
+ args->lstio_tes_dist <= 0 ||
+ args->lstio_tes_span <= 0)
+ return -EINVAL;
+
+ /* have parameter, check if parameter length is valid */
+ if (args->lstio_tes_param != NULL &&
+ (args->lstio_tes_param_len <= 0 ||
+ args->lstio_tes_param_len > PAGE_CACHE_SIZE - sizeof(lstcon_test_t)))
+ return -EINVAL;
+
+ LIBCFS_ALLOC(name, args->lstio_tes_bat_nmlen + 1);
+ if (name == NULL)
+ return rc;
+
+ LIBCFS_ALLOC(srcgrp, args->lstio_tes_sgrp_nmlen + 1);
+ if (srcgrp == NULL)
+ goto out;
+
+ LIBCFS_ALLOC(dstgrp, args->lstio_tes_dgrp_nmlen + 1);
+ if (dstgrp == NULL)
+ goto out;
+
+ if (args->lstio_tes_param != NULL) {
+ LIBCFS_ALLOC(param, args->lstio_tes_param_len);
+ if (param == NULL)
+ goto out;
+ }
+
+ rc = -EFAULT;
+ if (copy_from_user(name,
+ args->lstio_tes_bat_name,
+ args->lstio_tes_bat_nmlen) ||
+ copy_from_user(srcgrp,
+ args->lstio_tes_sgrp_name,
+ args->lstio_tes_sgrp_nmlen) ||
+ copy_from_user(dstgrp,
+ args->lstio_tes_dgrp_name,
+ args->lstio_tes_dgrp_nmlen) ||
+ copy_from_user(param, args->lstio_tes_param,
+ args->lstio_tes_param_len))
+ goto out;
+
+ rc = lstcon_test_add(name,
+ args->lstio_tes_type,
+ args->lstio_tes_loop,
+ args->lstio_tes_concur,
+ args->lstio_tes_dist, args->lstio_tes_span,
+ srcgrp, dstgrp, param, args->lstio_tes_param_len,
+ &ret, args->lstio_tes_resultp);
+
+ if (ret != 0)
+ rc = (copy_to_user(args->lstio_tes_retp, &ret,
+ sizeof(ret))) ? -EFAULT : 0;
+out:
+ if (name != NULL)
+ LIBCFS_FREE(name, args->lstio_tes_bat_nmlen + 1);
+
+ if (srcgrp != NULL)
+ LIBCFS_FREE(srcgrp, args->lstio_tes_sgrp_nmlen + 1);
+
+ if (dstgrp != NULL)
+ LIBCFS_FREE(dstgrp, args->lstio_tes_dgrp_nmlen + 1);
+
+ if (param != NULL)
+ LIBCFS_FREE(param, args->lstio_tes_param_len);
+
+ return rc;
+}
+
+int
+lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_data *data)
+{
+ char *buf;
+ int opc = data->ioc_u32[0];
+ int rc;
+
+ if (cmd != IOC_LIBCFS_LNETST)
+ return -EINVAL;
+
+ if (data->ioc_plen1 > PAGE_CACHE_SIZE)
+ return -EINVAL;
+
+ LIBCFS_ALLOC(buf, data->ioc_plen1);
+ if (buf == NULL)
+ return -ENOMEM;
+
+ /* copy in parameter */
+ if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) {
+ LIBCFS_FREE(buf, data->ioc_plen1);
+ return -EFAULT;
+ }
+
+ mutex_lock(&console_session.ses_mutex);
+
+ console_session.ses_laststamp = cfs_time_current_sec();
+
+ if (console_session.ses_shutdown) {
+ rc = -ESHUTDOWN;
+ goto out;
+ }
+
+ if (console_session.ses_expired)
+ lstcon_session_end();
+
+ if (opc != LSTIO_SESSION_NEW &&
+ console_session.ses_state == LST_SESSION_NONE) {
+ CDEBUG(D_NET, "LST no active session\n");
+ rc = -ESRCH;
+ goto out;
+ }
+
+ memset(&console_session.ses_trans_stat, 0, sizeof(lstcon_trans_stat_t));
+
+ switch (opc) {
+ case LSTIO_SESSION_NEW:
+ rc = lst_session_new_ioctl((lstio_session_new_args_t *)buf);
+ break;
+ case LSTIO_SESSION_END:
+ rc = lst_session_end_ioctl((lstio_session_end_args_t *)buf);
+ break;
+ case LSTIO_SESSION_INFO:
+ rc = lst_session_info_ioctl((lstio_session_info_args_t *)buf);
+ break;
+ case LSTIO_DEBUG:
+ rc = lst_debug_ioctl((lstio_debug_args_t *)buf);
+ break;
+ case LSTIO_GROUP_ADD:
+ rc = lst_group_add_ioctl((lstio_group_add_args_t *)buf);
+ break;
+ case LSTIO_GROUP_DEL:
+ rc = lst_group_del_ioctl((lstio_group_del_args_t *)buf);
+ break;
+ case LSTIO_GROUP_UPDATE:
+ rc = lst_group_update_ioctl((lstio_group_update_args_t *)buf);
+ break;
+ case LSTIO_NODES_ADD:
+ rc = lst_nodes_add_ioctl((lstio_group_nodes_args_t *)buf);
+ break;
+ case LSTIO_GROUP_LIST:
+ rc = lst_group_list_ioctl((lstio_group_list_args_t *)buf);
+ break;
+ case LSTIO_GROUP_INFO:
+ rc = lst_group_info_ioctl((lstio_group_info_args_t *)buf);
+ break;
+ case LSTIO_BATCH_ADD:
+ rc = lst_batch_add_ioctl((lstio_batch_add_args_t *)buf);
+ break;
+ case LSTIO_BATCH_START:
+ rc = lst_batch_run_ioctl((lstio_batch_run_args_t *)buf);
+ break;
+ case LSTIO_BATCH_STOP:
+ rc = lst_batch_stop_ioctl((lstio_batch_stop_args_t *)buf);
+ break;
+ case LSTIO_BATCH_QUERY:
+ rc = lst_batch_query_ioctl((lstio_batch_query_args_t *)buf);
+ break;
+ case LSTIO_BATCH_LIST:
+ rc = lst_batch_list_ioctl((lstio_batch_list_args_t *)buf);
+ break;
+ case LSTIO_BATCH_INFO:
+ rc = lst_batch_info_ioctl((lstio_batch_info_args_t *)buf);
+ break;
+ case LSTIO_TEST_ADD:
+ rc = lst_test_add_ioctl((lstio_test_args_t *)buf);
+ break;
+ case LSTIO_STAT_QUERY:
+ rc = lst_stat_query_ioctl((lstio_stat_args_t *)buf);
+ break;
+ default:
+ rc = -EINVAL;
+ }
+
+ if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat,
+ sizeof(lstcon_trans_stat_t)))
+ rc = -EFAULT;
+out:
+ mutex_unlock(&console_session.ses_mutex);
+
+ LIBCFS_FREE(buf, data->ioc_plen1);
+
+ return rc;
+}
+
+EXPORT_SYMBOL(lstcon_ioctl_entry);
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
new file mode 100644
index 000000000000..446de0e4672f
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -0,0 +1,1397 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/selftest/conctl.c
+ *
+ * Console framework rpcs
+ *
+ * Author: Liang Zhen <liang@whamcloud.com>
+ */
+
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/lib-lnet.h>
+#include "timer.h"
+#include "conrpc.h"
+#include "console.h"
+
+void lstcon_rpc_stat_reply(lstcon_rpc_trans_t *, srpc_msg_t *,
+ lstcon_node_t *, lstcon_trans_stat_t *);
+
+static void
+lstcon_rpc_done(srpc_client_rpc_t *rpc)
+{
+ lstcon_rpc_t *crpc = (lstcon_rpc_t *)rpc->crpc_priv;
+
+ LASSERT(crpc != NULL && rpc == crpc->crp_rpc);
+ LASSERT(crpc->crp_posted && !crpc->crp_finished);
+
+ spin_lock(&rpc->crpc_lock);
+
+ if (crpc->crp_trans == NULL) {
+ /* Orphan RPC is not in any transaction,
+ * I'm just a poor body and nobody loves me */
+ spin_unlock(&rpc->crpc_lock);
+
+ /* release it */
+ lstcon_rpc_put(crpc);
+ return;
+ }
+
+ /* not an orphan RPC */
+ crpc->crp_finished = 1;
+
+ if (crpc->crp_stamp == 0) {
+ /* not aborted */
+ LASSERT (crpc->crp_status == 0);
+
+ crpc->crp_stamp = cfs_time_current();
+ crpc->crp_status = rpc->crpc_status;
+ }
+
+ /* wakeup (transaction)thread if I'm the last RPC in the transaction */
+ if (atomic_dec_and_test(&crpc->crp_trans->tas_remaining))
+ wake_up(&crpc->crp_trans->tas_waitq);
+
+ spin_unlock(&rpc->crpc_lock);
+}
+
+int
+lstcon_rpc_init(lstcon_node_t *nd, int service, unsigned feats,
+ int bulk_npg, int bulk_len, int embedded, lstcon_rpc_t *crpc)
+{
+ crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service,
+ feats, bulk_npg, bulk_len,
+ lstcon_rpc_done, (void *)crpc);
+ if (crpc->crp_rpc == NULL)
+ return -ENOMEM;
+
+ crpc->crp_trans = NULL;
+ crpc->crp_node = nd;
+ crpc->crp_posted = 0;
+ crpc->crp_finished = 0;
+ crpc->crp_unpacked = 0;
+ crpc->crp_status = 0;
+ crpc->crp_stamp = 0;
+ crpc->crp_embedded = embedded;
+ INIT_LIST_HEAD(&crpc->crp_link);
+
+ atomic_inc(&console_session.ses_rpc_counter);
+
+ return 0;
+}
+
+int
+lstcon_rpc_prep(lstcon_node_t *nd, int service, unsigned feats,
+ int bulk_npg, int bulk_len, lstcon_rpc_t **crpcpp)
+{
+ lstcon_rpc_t *crpc = NULL;
+ int rc;
+
+ spin_lock(&console_session.ses_rpc_lock);
+
+ if (!list_empty(&console_session.ses_rpc_freelist)) {
+ crpc = list_entry(console_session.ses_rpc_freelist.next,
+ lstcon_rpc_t, crp_link);
+ list_del_init(&crpc->crp_link);
+ }
+
+ spin_unlock(&console_session.ses_rpc_lock);
+
+ if (crpc == NULL) {
+ LIBCFS_ALLOC(crpc, sizeof(*crpc));
+ if (crpc == NULL)
+ return -ENOMEM;
+ }
+
+ rc = lstcon_rpc_init(nd, service, feats, bulk_npg, bulk_len, 0, crpc);
+ if (rc == 0) {
+ *crpcpp = crpc;
+ return 0;
+ }
+
+ LIBCFS_FREE(crpc, sizeof(*crpc));
+
+ return rc;
+}
+
+void
+lstcon_rpc_put(lstcon_rpc_t *crpc)
+{
+ srpc_bulk_t *bulk = &crpc->crp_rpc->crpc_bulk;
+ int i;
+
+ LASSERT (list_empty(&crpc->crp_link));
+
+ for (i = 0; i < bulk->bk_niov; i++) {
+ if (bulk->bk_iovs[i].kiov_page == NULL)
+ continue;
+
+ __free_page(bulk->bk_iovs[i].kiov_page);
+ }
+
+ srpc_client_rpc_decref(crpc->crp_rpc);
+
+ if (crpc->crp_embedded) {
+ /* embedded RPC, don't recycle it */
+ memset(crpc, 0, sizeof(*crpc));
+ crpc->crp_embedded = 1;
+
+ } else {
+ spin_lock(&console_session.ses_rpc_lock);
+
+ list_add(&crpc->crp_link,
+ &console_session.ses_rpc_freelist);
+
+ spin_unlock(&console_session.ses_rpc_lock);
+ }
+
+ /* RPC is not alive now */
+ atomic_dec(&console_session.ses_rpc_counter);
+}
+
+void
+lstcon_rpc_post(lstcon_rpc_t *crpc)
+{
+ lstcon_rpc_trans_t *trans = crpc->crp_trans;
+
+ LASSERT (trans != NULL);
+
+ atomic_inc(&trans->tas_remaining);
+ crpc->crp_posted = 1;
+
+ sfw_post_rpc(crpc->crp_rpc);
+}
+
+static char *
+lstcon_rpc_trans_name(int transop)
+{
+ if (transop == LST_TRANS_SESNEW)
+ return "SESNEW";
+
+ if (transop == LST_TRANS_SESEND)
+ return "SESEND";
+
+ if (transop == LST_TRANS_SESQRY)
+ return "SESQRY";
+
+ if (transop == LST_TRANS_SESPING)
+ return "SESPING";
+
+ if (transop == LST_TRANS_TSBCLIADD)
+ return "TSBCLIADD";
+
+ if (transop == LST_TRANS_TSBSRVADD)
+ return "TSBSRVADD";
+
+ if (transop == LST_TRANS_TSBRUN)
+ return "TSBRUN";
+
+ if (transop == LST_TRANS_TSBSTOP)
+ return "TSBSTOP";
+
+ if (transop == LST_TRANS_TSBCLIQRY)
+ return "TSBCLIQRY";
+
+ if (transop == LST_TRANS_TSBSRVQRY)
+ return "TSBSRVQRY";
+
+ if (transop == LST_TRANS_STATQRY)
+ return "STATQRY";
+
+ return "Unknown";
+}
+
+int
+lstcon_rpc_trans_prep(struct list_head *translist,
+ int transop, lstcon_rpc_trans_t **transpp)
+{
+ lstcon_rpc_trans_t *trans;
+
+ if (translist != NULL) {
+ list_for_each_entry(trans, translist, tas_link) {
+ /* Can't enqueue two private transaction on
+ * the same object */
+ if ((trans->tas_opc & transop) == LST_TRANS_PRIVATE)
+ return -EPERM;
+ }
+ }
+
+ /* create a trans group */
+ LIBCFS_ALLOC(trans, sizeof(*trans));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ trans->tas_opc = transop;
+
+ if (translist == NULL)
+ INIT_LIST_HEAD(&trans->tas_olink);
+ else
+ list_add_tail(&trans->tas_olink, translist);
+
+ list_add_tail(&trans->tas_link, &console_session.ses_trans_list);
+
+ INIT_LIST_HEAD(&trans->tas_rpcs_list);
+ atomic_set(&trans->tas_remaining, 0);
+ init_waitqueue_head(&trans->tas_waitq);
+
+ spin_lock(&console_session.ses_rpc_lock);
+ trans->tas_features = console_session.ses_features;
+ spin_unlock(&console_session.ses_rpc_lock);
+
+ *transpp = trans;
+ return 0;
+}
+
+void
+lstcon_rpc_trans_addreq(lstcon_rpc_trans_t *trans, lstcon_rpc_t *crpc)
+{
+ list_add_tail(&crpc->crp_link, &trans->tas_rpcs_list);
+ crpc->crp_trans = trans;
+}
+
+void
+lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error)
+{
+ srpc_client_rpc_t *rpc;
+ lstcon_rpc_t *crpc;
+ lstcon_node_t *nd;
+
+ list_for_each_entry (crpc, &trans->tas_rpcs_list, crp_link) {
+ rpc = crpc->crp_rpc;
+
+ spin_lock(&rpc->crpc_lock);
+
+ if (!crpc->crp_posted || /* not posted */
+ crpc->crp_stamp != 0) { /* rpc done or aborted already */
+ if (crpc->crp_stamp == 0) {
+ crpc->crp_stamp = cfs_time_current();
+ crpc->crp_status = -EINTR;
+ }
+ spin_unlock(&rpc->crpc_lock);
+ continue;
+ }
+
+ crpc->crp_stamp = cfs_time_current();
+ crpc->crp_status = error;
+
+ spin_unlock(&rpc->crpc_lock);
+
+ sfw_abort_rpc(rpc);
+
+ if (error != ETIMEDOUT)
+ continue;
+
+ nd = crpc->crp_node;
+ if (cfs_time_after(nd->nd_stamp, crpc->crp_stamp))
+ continue;
+
+ nd->nd_stamp = crpc->crp_stamp;
+ nd->nd_state = LST_NODE_DOWN;
+ }
+}
+
+static int
+lstcon_rpc_trans_check(lstcon_rpc_trans_t *trans)
+{
+ if (console_session.ses_shutdown &&
+ !list_empty(&trans->tas_olink)) /* Not an end session RPC */
+ return 1;
+
+ return (atomic_read(&trans->tas_remaining) == 0) ? 1: 0;
+}
+
+int
+lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout)
+{
+ lstcon_rpc_t *crpc;
+ int rc;
+
+ if (list_empty(&trans->tas_rpcs_list))
+ return 0;
+
+ if (timeout < LST_TRANS_MIN_TIMEOUT)
+ timeout = LST_TRANS_MIN_TIMEOUT;
+
+ CDEBUG(D_NET, "Transaction %s started\n",
+ lstcon_rpc_trans_name(trans->tas_opc));
+
+ /* post all requests */
+ list_for_each_entry (crpc, &trans->tas_rpcs_list, crp_link) {
+ LASSERT (!crpc->crp_posted);
+
+ lstcon_rpc_post(crpc);
+ }
+
+ mutex_unlock(&console_session.ses_mutex);
+
+ rc = wait_event_interruptible_timeout(trans->tas_waitq,
+ lstcon_rpc_trans_check(trans),
+ cfs_time_seconds(timeout));
+ rc = (rc > 0) ? 0 : ((rc < 0) ? -EINTR : -ETIMEDOUT);
+
+ mutex_lock(&console_session.ses_mutex);
+
+ if (console_session.ses_shutdown)
+ rc = -ESHUTDOWN;
+
+ if (rc != 0 || atomic_read(&trans->tas_remaining) != 0) {
+ /* treat short timeout as canceled */
+ if (rc == -ETIMEDOUT && timeout < LST_TRANS_MIN_TIMEOUT * 2)
+ rc = -EINTR;
+
+ lstcon_rpc_trans_abort(trans, rc);
+ }
+
+ CDEBUG(D_NET, "Transaction %s stopped: %d\n",
+ lstcon_rpc_trans_name(trans->tas_opc), rc);
+
+ lstcon_rpc_trans_stat(trans, lstcon_trans_stat());
+
+ return rc;
+}
+
+int
+lstcon_rpc_get_reply(lstcon_rpc_t *crpc, srpc_msg_t **msgpp)
+{
+ lstcon_node_t *nd = crpc->crp_node;
+ srpc_client_rpc_t *rpc = crpc->crp_rpc;
+ srpc_generic_reply_t *rep;
+
+ LASSERT (nd != NULL && rpc != NULL);
+ LASSERT (crpc->crp_stamp != 0);
+
+ if (crpc->crp_status != 0) {
+ *msgpp = NULL;
+ return crpc->crp_status;
+ }
+
+ *msgpp = &rpc->crpc_replymsg;
+ if (!crpc->crp_unpacked) {
+ sfw_unpack_message(*msgpp);
+ crpc->crp_unpacked = 1;
+ }
+
+ if (cfs_time_after(nd->nd_stamp, crpc->crp_stamp))
+ return 0;
+
+ nd->nd_stamp = crpc->crp_stamp;
+ rep = &(*msgpp)->msg_body.reply;
+
+ if (rep->sid.ses_nid == LNET_NID_ANY)
+ nd->nd_state = LST_NODE_UNKNOWN;
+ else if (lstcon_session_match(rep->sid))
+ nd->nd_state = LST_NODE_ACTIVE;
+ else
+ nd->nd_state = LST_NODE_BUSY;
+
+ return 0;
+}
+
+void
+lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, lstcon_trans_stat_t *stat)
+{
+ lstcon_rpc_t *crpc;
+ srpc_msg_t *rep;
+ int error;
+
+ LASSERT (stat != NULL);
+
+ memset(stat, 0, sizeof(*stat));
+
+ list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
+ lstcon_rpc_stat_total(stat, 1);
+
+ LASSERT (crpc->crp_stamp != 0);
+
+ error = lstcon_rpc_get_reply(crpc, &rep);
+ if (error != 0) {
+ lstcon_rpc_stat_failure(stat, 1);
+ if (stat->trs_rpc_errno == 0)
+ stat->trs_rpc_errno = -error;
+
+ continue;
+ }
+
+ lstcon_rpc_stat_success(stat, 1);
+
+ lstcon_rpc_stat_reply(trans, rep, crpc->crp_node, stat);
+ }
+
+ if (trans->tas_opc == LST_TRANS_SESNEW && stat->trs_fwk_errno == 0) {
+ stat->trs_fwk_errno =
+ lstcon_session_feats_check(trans->tas_features);
+ }
+
+ CDEBUG(D_NET, "transaction %s : success %d, failure %d, total %d, "
+ "RPC error(%d), Framework error(%d)\n",
+ lstcon_rpc_trans_name(trans->tas_opc),
+ lstcon_rpc_stat_success(stat, 0),
+ lstcon_rpc_stat_failure(stat, 0),
+ lstcon_rpc_stat_total(stat, 0),
+ stat->trs_rpc_errno, stat->trs_fwk_errno);
+
+ return;
+}
+
+int
+lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans,
+ struct list_head *head_up,
+ lstcon_rpc_readent_func_t readent)
+{
+ struct list_head tmp;
+ struct list_head *next;
+ lstcon_rpc_ent_t *ent;
+ srpc_generic_reply_t *rep;
+ lstcon_rpc_t *crpc;
+ srpc_msg_t *msg;
+ lstcon_node_t *nd;
+ cfs_duration_t dur;
+ struct timeval tv;
+ int error;
+
+ LASSERT (head_up != NULL);
+
+ next = head_up;
+
+ list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
+ if (copy_from_user(&tmp, next,
+ sizeof(struct list_head)))
+ return -EFAULT;
+
+ if (tmp.next == head_up)
+ return 0;
+
+ next = tmp.next;
+
+ ent = list_entry(next, lstcon_rpc_ent_t, rpe_link);
+
+ LASSERT (crpc->crp_stamp != 0);
+
+ error = lstcon_rpc_get_reply(crpc, &msg);
+
+ nd = crpc->crp_node;
+
+ dur = (cfs_duration_t)cfs_time_sub(crpc->crp_stamp,
+ (cfs_time_t)console_session.ses_id.ses_stamp);
+ cfs_duration_usec(dur, &tv);
+
+ if (copy_to_user(&ent->rpe_peer,
+ &nd->nd_id, sizeof(lnet_process_id_t)) ||
+ copy_to_user(&ent->rpe_stamp, &tv, sizeof(tv)) ||
+ copy_to_user(&ent->rpe_state,
+ &nd->nd_state, sizeof(nd->nd_state)) ||
+ copy_to_user(&ent->rpe_rpc_errno, &error,
+ sizeof(error)))
+ return -EFAULT;
+
+ if (error != 0)
+ continue;
+
+ /* RPC is done */
+ rep = (srpc_generic_reply_t *)&msg->msg_body.reply;
+
+ if (copy_to_user(&ent->rpe_sid,
+ &rep->sid, sizeof(lst_sid_t)) ||
+ copy_to_user(&ent->rpe_fwk_errno,
+ &rep->status, sizeof(rep->status)))
+ return -EFAULT;
+
+ if (readent == NULL)
+ continue;
+
+ if ((error = readent(trans->tas_opc, msg, ent)) != 0)
+ return error;
+ }
+
+ return 0;
+}
+
+void
+lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans)
+{
+ srpc_client_rpc_t *rpc;
+ lstcon_rpc_t *crpc;
+ lstcon_rpc_t *tmp;
+ int count = 0;
+
+ list_for_each_entry_safe(crpc, tmp, &trans->tas_rpcs_list,
+ crp_link) {
+ rpc = crpc->crp_rpc;
+
+ spin_lock(&rpc->crpc_lock);
+
+ /* free it if not posted or finished already */
+ if (!crpc->crp_posted || crpc->crp_finished) {
+ spin_unlock(&rpc->crpc_lock);
+
+ list_del_init(&crpc->crp_link);
+ lstcon_rpc_put(crpc);
+
+ continue;
+ }
+
+ /* rpcs can be still not callbacked (even LNetMDUnlink is called)
+ * because huge timeout for inaccessible network, don't make
+ * user wait for them, just abandon them, they will be recycled
+ * in callback */
+
+ LASSERT (crpc->crp_status != 0);
+
+ crpc->crp_node = NULL;
+ crpc->crp_trans = NULL;
+ list_del_init(&crpc->crp_link);
+ count ++;
+
+ spin_unlock(&rpc->crpc_lock);
+
+ atomic_dec(&trans->tas_remaining);
+ }
+
+ LASSERT (atomic_read(&trans->tas_remaining) == 0);
+
+ list_del(&trans->tas_link);
+ if (!list_empty(&trans->tas_olink))
+ list_del(&trans->tas_olink);
+
+ CDEBUG(D_NET, "Transaction %s destroyed with %d pending RPCs\n",
+ lstcon_rpc_trans_name(trans->tas_opc), count);
+
+ LIBCFS_FREE(trans, sizeof(*trans));
+
+ return;
+}
+
+int
+lstcon_sesrpc_prep(lstcon_node_t *nd, int transop,
+ unsigned feats, lstcon_rpc_t **crpc)
+{
+ srpc_mksn_reqst_t *msrq;
+ srpc_rmsn_reqst_t *rsrq;
+ int rc;
+
+ switch (transop) {
+ case LST_TRANS_SESNEW:
+ rc = lstcon_rpc_prep(nd, SRPC_SERVICE_MAKE_SESSION,
+ feats, 0, 0, crpc);
+ if (rc != 0)
+ return rc;
+
+ msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst;
+ msrq->mksn_sid = console_session.ses_id;
+ msrq->mksn_force = console_session.ses_force;
+ strncpy(msrq->mksn_name, console_session.ses_name,
+ strlen(console_session.ses_name));
+ break;
+
+ case LST_TRANS_SESEND:
+ rc = lstcon_rpc_prep(nd, SRPC_SERVICE_REMOVE_SESSION,
+ feats, 0, 0, crpc);
+ if (rc != 0)
+ return rc;
+
+ rsrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.rmsn_reqst;
+ rsrq->rmsn_sid = console_session.ses_id;
+ break;
+
+ default:
+ LBUG();
+ }
+
+ return 0;
+}
+
+int
+lstcon_dbgrpc_prep(lstcon_node_t *nd, unsigned feats, lstcon_rpc_t **crpc)
+{
+ srpc_debug_reqst_t *drq;
+ int rc;
+
+ rc = lstcon_rpc_prep(nd, SRPC_SERVICE_DEBUG, feats, 0, 0, crpc);
+ if (rc != 0)
+ return rc;
+
+ drq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
+
+ drq->dbg_sid = console_session.ses_id;
+ drq->dbg_flags = 0;
+
+ return rc;
+}
+
+int
+lstcon_batrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
+ lstcon_tsb_hdr_t *tsb, lstcon_rpc_t **crpc)
+{
+ lstcon_batch_t *batch;
+ srpc_batch_reqst_t *brq;
+ int rc;
+
+ rc = lstcon_rpc_prep(nd, SRPC_SERVICE_BATCH, feats, 0, 0, crpc);
+ if (rc != 0)
+ return rc;
+
+ brq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.bat_reqst;
+
+ brq->bar_sid = console_session.ses_id;
+ brq->bar_bid = tsb->tsb_id;
+ brq->bar_testidx = tsb->tsb_index;
+ brq->bar_opc = transop == LST_TRANS_TSBRUN ? SRPC_BATCH_OPC_RUN :
+ (transop == LST_TRANS_TSBSTOP ? SRPC_BATCH_OPC_STOP:
+ SRPC_BATCH_OPC_QUERY);
+
+ if (transop != LST_TRANS_TSBRUN &&
+ transop != LST_TRANS_TSBSTOP)
+ return 0;
+
+ LASSERT (tsb->tsb_index == 0);
+
+ batch = (lstcon_batch_t *)tsb;
+ brq->bar_arg = batch->bat_arg;
+
+ return 0;
+}
+
+int
+lstcon_statrpc_prep(lstcon_node_t *nd, unsigned feats, lstcon_rpc_t **crpc)
+{
+ srpc_stat_reqst_t *srq;
+ int rc;
+
+ rc = lstcon_rpc_prep(nd, SRPC_SERVICE_QUERY_STAT, feats, 0, 0, crpc);
+ if (rc != 0)
+ return rc;
+
+ srq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.stat_reqst;
+
+ srq->str_sid = console_session.ses_id;
+ srq->str_type = 0; /* XXX remove it */
+
+ return 0;
+}
+
+lnet_process_id_packed_t *
+lstcon_next_id(int idx, int nkiov, lnet_kiov_t *kiov)
+{
+ lnet_process_id_packed_t *pid;
+ int i;
+
+ i = idx / SFW_ID_PER_PAGE;
+
+ LASSERT (i < nkiov);
+
+ pid = (lnet_process_id_packed_t *)page_address(kiov[i].kiov_page);
+
+ return &pid[idx % SFW_ID_PER_PAGE];
+}
+
+int
+lstcon_dstnodes_prep(lstcon_group_t *grp, int idx,
+ int dist, int span, int nkiov, lnet_kiov_t *kiov)
+{
+ lnet_process_id_packed_t *pid;
+ lstcon_ndlink_t *ndl;
+ lstcon_node_t *nd;
+ int start;
+ int end;
+ int i = 0;
+
+ LASSERT (dist >= 1);
+ LASSERT (span >= 1);
+ LASSERT (grp->grp_nnode >= 1);
+
+ if (span > grp->grp_nnode)
+ return -EINVAL;
+
+ start = ((idx / dist) * span) % grp->grp_nnode;
+ end = ((idx / dist) * span + span - 1) % grp->grp_nnode;
+
+ list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
+ nd = ndl->ndl_node;
+ if (i < start) {
+ i ++;
+ continue;
+ }
+
+ if (i > (end >= start ? end: grp->grp_nnode))
+ break;
+
+ pid = lstcon_next_id((i - start), nkiov, kiov);
+ pid->nid = nd->nd_id.nid;
+ pid->pid = nd->nd_id.pid;
+ i++;
+ }
+
+ if (start <= end) /* done */
+ return 0;
+
+ list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) {
+ if (i > grp->grp_nnode + end)
+ break;
+
+ nd = ndl->ndl_node;
+ pid = lstcon_next_id((i - start), nkiov, kiov);
+ pid->nid = nd->nd_id.nid;
+ pid->pid = nd->nd_id.pid;
+ i++;
+ }
+
+ return 0;
+}
+
+int
+lstcon_pingrpc_prep(lst_test_ping_param_t *param, srpc_test_reqst_t *req)
+{
+ test_ping_req_t *prq = &req->tsr_u.ping;
+
+ prq->png_size = param->png_size;
+ prq->png_flags = param->png_flags;
+ /* TODO dest */
+ return 0;
+}
+
+int
+lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req)
+{
+ test_bulk_req_t *brq = &req->tsr_u.bulk_v0;
+
+ brq->blk_opc = param->blk_opc;
+ brq->blk_npg = (param->blk_size + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE;
+ brq->blk_flags = param->blk_flags;
+
+ return 0;
+}
+
+int
+lstcon_bulkrpc_v1_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req)
+{
+ test_bulk_req_v1_t *brq = &req->tsr_u.bulk_v1;
+
+ brq->blk_opc = param->blk_opc;
+ brq->blk_flags = param->blk_flags;
+ brq->blk_len = param->blk_size;
+ brq->blk_offset = 0; /* reserved */
+
+ return 0;
+}
+
+int
+lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
+ lstcon_test_t *test, lstcon_rpc_t **crpc)
+{
+ lstcon_group_t *sgrp = test->tes_src_grp;
+ lstcon_group_t *dgrp = test->tes_dst_grp;
+ srpc_test_reqst_t *trq;
+ srpc_bulk_t *bulk;
+ int i;
+ int npg = 0;
+ int nob = 0;
+ int rc = 0;
+
+ if (transop == LST_TRANS_TSBCLIADD) {
+ npg = sfw_id_pages(test->tes_span);
+ nob = (feats & LST_FEAT_BULK_LEN) == 0 ?
+ npg * PAGE_CACHE_SIZE :
+ sizeof(lnet_process_id_packed_t) * test->tes_span;
+ }
+
+ rc = lstcon_rpc_prep(nd, SRPC_SERVICE_TEST, feats, npg, nob, crpc);
+ if (rc != 0)
+ return rc;
+
+ trq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.tes_reqst;
+
+ if (transop == LST_TRANS_TSBSRVADD) {
+ int ndist = (sgrp->grp_nnode + test->tes_dist - 1) / test->tes_dist;
+ int nspan = (dgrp->grp_nnode + test->tes_span - 1) / test->tes_span;
+ int nmax = (ndist + nspan - 1) / nspan;
+
+ trq->tsr_ndest = 0;
+ trq->tsr_loop = nmax * test->tes_dist * test->tes_concur;
+
+ } else {
+ bulk = &(*crpc)->crp_rpc->crpc_bulk;
+
+ for (i = 0; i < npg; i++) {
+ int len;
+
+ LASSERT(nob > 0);
+
+ len = (feats & LST_FEAT_BULK_LEN) == 0 ?
+ PAGE_CACHE_SIZE : min_t(int, nob, PAGE_CACHE_SIZE);
+ nob -= len;
+
+ bulk->bk_iovs[i].kiov_offset = 0;
+ bulk->bk_iovs[i].kiov_len = len;
+ bulk->bk_iovs[i].kiov_page =
+ alloc_page(GFP_IOFS);
+
+ if (bulk->bk_iovs[i].kiov_page == NULL) {
+ lstcon_rpc_put(*crpc);
+ return -ENOMEM;
+ }
+ }
+
+ bulk->bk_sink = 0;
+
+ LASSERT (transop == LST_TRANS_TSBCLIADD);
+
+ rc = lstcon_dstnodes_prep(test->tes_dst_grp,
+ test->tes_cliidx++,
+ test->tes_dist,
+ test->tes_span,
+ npg, &bulk->bk_iovs[0]);
+ if (rc != 0) {
+ lstcon_rpc_put(*crpc);
+ return rc;
+ }
+
+ trq->tsr_ndest = test->tes_span;
+ trq->tsr_loop = test->tes_loop;
+ }
+
+ trq->tsr_sid = console_session.ses_id;
+ trq->tsr_bid = test->tes_hdr.tsb_id;
+ trq->tsr_concur = test->tes_concur;
+ trq->tsr_is_client = (transop == LST_TRANS_TSBCLIADD) ? 1 : 0;
+ trq->tsr_stop_onerr = !!test->tes_stop_onerr;
+
+ switch (test->tes_type) {
+ case LST_TEST_PING:
+ trq->tsr_service = SRPC_SERVICE_PING;
+ rc = lstcon_pingrpc_prep((lst_test_ping_param_t *)
+ &test->tes_param[0], trq);
+ break;
+
+ case LST_TEST_BULK:
+ trq->tsr_service = SRPC_SERVICE_BRW;
+ if ((feats & LST_FEAT_BULK_LEN) == 0) {
+ rc = lstcon_bulkrpc_v0_prep((lst_test_bulk_param_t *)
+ &test->tes_param[0], trq);
+ } else {
+ rc = lstcon_bulkrpc_v1_prep((lst_test_bulk_param_t *)
+ &test->tes_param[0], trq);
+ }
+
+ break;
+ default:
+ LBUG();
+ break;
+ }
+
+ return rc;
+}
+
+int
+lstcon_sesnew_stat_reply(lstcon_rpc_trans_t *trans,
+ lstcon_node_t *nd, srpc_msg_t *reply)
+{
+ srpc_mksn_reply_t *mksn_rep = &reply->msg_body.mksn_reply;
+ int status = mksn_rep->mksn_status;
+
+ if (status == 0 &&
+ (reply->msg_ses_feats & ~LST_FEATS_MASK) != 0) {
+ mksn_rep->mksn_status = EPROTO;
+ status = EPROTO;
+ }
+
+ if (status == EPROTO) {
+ CNETERR("session protocol error from %s: %u\n",
+ libcfs_nid2str(nd->nd_id.nid),
+ reply->msg_ses_feats);
+ }
+
+ if (status != 0)
+ return status;
+
+ if (!trans->tas_feats_updated) {
+ trans->tas_feats_updated = 1;
+ trans->tas_features = reply->msg_ses_feats;
+ }
+
+ if (reply->msg_ses_feats != trans->tas_features) {
+ CNETERR("Framework features %x from %s is different with "
+ "features on this transaction: %x\n",
+ reply->msg_ses_feats, libcfs_nid2str(nd->nd_id.nid),
+ trans->tas_features);
+ status = mksn_rep->mksn_status = EPROTO;
+ }
+
+ if (status == 0) {
+ /* session timeout on remote node */
+ nd->nd_timeout = mksn_rep->mksn_timeout;
+ }
+
+ return status;
+}
+
+void
+lstcon_rpc_stat_reply(lstcon_rpc_trans_t *trans, srpc_msg_t *msg,
+ lstcon_node_t *nd, lstcon_trans_stat_t *stat)
+{
+ srpc_rmsn_reply_t *rmsn_rep;
+ srpc_debug_reply_t *dbg_rep;
+ srpc_batch_reply_t *bat_rep;
+ srpc_test_reply_t *test_rep;
+ srpc_stat_reply_t *stat_rep;
+ int rc = 0;
+
+ switch (trans->tas_opc) {
+ case LST_TRANS_SESNEW:
+ rc = lstcon_sesnew_stat_reply(trans, nd, msg);
+ if (rc == 0) {
+ lstcon_sesop_stat_success(stat, 1);
+ return;
+ }
+
+ lstcon_sesop_stat_failure(stat, 1);
+ break;
+
+ case LST_TRANS_SESEND:
+ rmsn_rep = &msg->msg_body.rmsn_reply;
+ /* ESRCH is not an error for end session */
+ if (rmsn_rep->rmsn_status == 0 ||
+ rmsn_rep->rmsn_status == ESRCH) {
+ lstcon_sesop_stat_success(stat, 1);
+ return;
+ }
+
+ lstcon_sesop_stat_failure(stat, 1);
+ rc = rmsn_rep->rmsn_status;
+ break;
+
+ case LST_TRANS_SESQRY:
+ case LST_TRANS_SESPING:
+ dbg_rep = &msg->msg_body.dbg_reply;
+
+ if (dbg_rep->dbg_status == ESRCH) {
+ lstcon_sesqry_stat_unknown(stat, 1);
+ return;
+ }
+
+ if (lstcon_session_match(dbg_rep->dbg_sid))
+ lstcon_sesqry_stat_active(stat, 1);
+ else
+ lstcon_sesqry_stat_busy(stat, 1);
+ return;
+
+ case LST_TRANS_TSBRUN:
+ case LST_TRANS_TSBSTOP:
+ bat_rep = &msg->msg_body.bat_reply;
+
+ if (bat_rep->bar_status == 0) {
+ lstcon_tsbop_stat_success(stat, 1);
+ return;
+ }
+
+ if (bat_rep->bar_status == EPERM &&
+ trans->tas_opc == LST_TRANS_TSBSTOP) {
+ lstcon_tsbop_stat_success(stat, 1);
+ return;
+ }
+
+ lstcon_tsbop_stat_failure(stat, 1);
+ rc = bat_rep->bar_status;
+ break;
+
+ case LST_TRANS_TSBCLIQRY:
+ case LST_TRANS_TSBSRVQRY:
+ bat_rep = &msg->msg_body.bat_reply;
+
+ if (bat_rep->bar_active != 0)
+ lstcon_tsbqry_stat_run(stat, 1);
+ else
+ lstcon_tsbqry_stat_idle(stat, 1);
+
+ if (bat_rep->bar_status == 0)
+ return;
+
+ lstcon_tsbqry_stat_failure(stat, 1);
+ rc = bat_rep->bar_status;
+ break;
+
+ case LST_TRANS_TSBCLIADD:
+ case LST_TRANS_TSBSRVADD:
+ test_rep = &msg->msg_body.tes_reply;
+
+ if (test_rep->tsr_status == 0) {
+ lstcon_tsbop_stat_success(stat, 1);
+ return;
+ }
+
+ lstcon_tsbop_stat_failure(stat, 1);
+ rc = test_rep->tsr_status;
+ break;
+
+ case LST_TRANS_STATQRY:
+ stat_rep = &msg->msg_body.stat_reply;
+
+ if (stat_rep->str_status == 0) {
+ lstcon_statqry_stat_success(stat, 1);
+ return;
+ }
+
+ lstcon_statqry_stat_failure(stat, 1);
+ rc = stat_rep->str_status;
+ break;
+
+ default:
+ LBUG();
+ }
+
+ if (stat->trs_fwk_errno == 0)
+ stat->trs_fwk_errno = rc;
+
+ return;
+}
+
+int
+lstcon_rpc_trans_ndlist(struct list_head *ndlist,
+ struct list_head *translist, int transop,
+ void *arg, lstcon_rpc_cond_func_t condition,
+ lstcon_rpc_trans_t **transpp)
+{
+ lstcon_rpc_trans_t *trans;
+ lstcon_ndlink_t *ndl;
+ lstcon_node_t *nd;
+ lstcon_rpc_t *rpc;
+ unsigned feats;
+ int rc;
+
+ /* Creating session RPG for list of nodes */
+
+ rc = lstcon_rpc_trans_prep(translist, transop, &trans);
+ if (rc != 0) {
+ CERROR("Can't create transaction %d: %d\n", transop, rc);
+ return rc;
+ }
+
+ feats = trans->tas_features;
+ list_for_each_entry(ndl, ndlist, ndl_link) {
+ rc = condition == NULL ? 1 :
+ condition(transop, ndl->ndl_node, arg);
+
+ if (rc == 0)
+ continue;
+
+ if (rc < 0) {
+ CDEBUG(D_NET, "Condition error while creating RPC "
+ " for transaction %d: %d\n", transop, rc);
+ break;
+ }
+
+ nd = ndl->ndl_node;
+
+ switch (transop) {
+ case LST_TRANS_SESNEW:
+ case LST_TRANS_SESEND:
+ rc = lstcon_sesrpc_prep(nd, transop, feats, &rpc);
+ break;
+ case LST_TRANS_SESQRY:
+ case LST_TRANS_SESPING:
+ rc = lstcon_dbgrpc_prep(nd, feats, &rpc);
+ break;
+ case LST_TRANS_TSBCLIADD:
+ case LST_TRANS_TSBSRVADD:
+ rc = lstcon_testrpc_prep(nd, transop, feats,
+ (lstcon_test_t *)arg, &rpc);
+ break;
+ case LST_TRANS_TSBRUN:
+ case LST_TRANS_TSBSTOP:
+ case LST_TRANS_TSBCLIQRY:
+ case LST_TRANS_TSBSRVQRY:
+ rc = lstcon_batrpc_prep(nd, transop, feats,
+ (lstcon_tsb_hdr_t *)arg, &rpc);
+ break;
+ case LST_TRANS_STATQRY:
+ rc = lstcon_statrpc_prep(nd, feats, &rpc);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+
+ if (rc != 0) {
+ CERROR("Failed to create RPC for transaction %s: %d\n",
+ lstcon_rpc_trans_name(transop), rc);
+ break;
+ }
+
+ lstcon_rpc_trans_addreq(trans, rpc);
+ }
+
+ if (rc == 0) {
+ *transpp = trans;
+ return 0;
+ }
+
+ lstcon_rpc_trans_destroy(trans);
+
+ return rc;
+}
+
+void
+lstcon_rpc_pinger(void *arg)
+{
+ stt_timer_t *ptimer = (stt_timer_t *)arg;
+ lstcon_rpc_trans_t *trans;
+ lstcon_rpc_t *crpc;
+ srpc_msg_t *rep;
+ srpc_debug_reqst_t *drq;
+ lstcon_ndlink_t *ndl;
+ lstcon_node_t *nd;
+ time_t intv;
+ int count = 0;
+ int rc;
+
+ /* RPC pinger is a special case of transaction,
+ * it's called by timer at 8 seconds interval.
+ */
+ mutex_lock(&console_session.ses_mutex);
+
+ if (console_session.ses_shutdown || console_session.ses_expired) {
+ mutex_unlock(&console_session.ses_mutex);
+ return;
+ }
+
+ if (!console_session.ses_expired &&
+ cfs_time_current_sec() - console_session.ses_laststamp >
+ (time_t)console_session.ses_timeout)
+ console_session.ses_expired = 1;
+
+ trans = console_session.ses_ping;
+
+ LASSERT (trans != NULL);
+
+ list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) {
+ nd = ndl->ndl_node;
+
+ if (console_session.ses_expired) {
+ /* idle console, end session on all nodes */
+ if (nd->nd_state != LST_NODE_ACTIVE)
+ continue;
+
+ rc = lstcon_sesrpc_prep(nd, LST_TRANS_SESEND,
+ trans->tas_features, &crpc);
+ if (rc != 0) {
+ CERROR("Out of memory\n");
+ break;
+ }
+
+ lstcon_rpc_trans_addreq(trans, crpc);
+ lstcon_rpc_post(crpc);
+
+ continue;
+ }
+
+ crpc = &nd->nd_ping;
+
+ if (crpc->crp_rpc != NULL) {
+ LASSERT (crpc->crp_trans == trans);
+ LASSERT (!list_empty(&crpc->crp_link));
+
+ spin_lock(&crpc->crp_rpc->crpc_lock);
+
+ LASSERT(crpc->crp_posted);
+
+ if (!crpc->crp_finished) {
+ /* in flight */
+ spin_unlock(&crpc->crp_rpc->crpc_lock);
+ continue;
+ }
+
+ spin_unlock(&crpc->crp_rpc->crpc_lock);
+
+ lstcon_rpc_get_reply(crpc, &rep);
+
+ list_del_init(&crpc->crp_link);
+
+ lstcon_rpc_put(crpc);
+ }
+
+ if (nd->nd_state != LST_NODE_ACTIVE)
+ continue;
+
+ intv = cfs_duration_sec(cfs_time_sub(cfs_time_current(),
+ nd->nd_stamp));
+ if (intv < (time_t)nd->nd_timeout / 2)
+ continue;
+
+ rc = lstcon_rpc_init(nd, SRPC_SERVICE_DEBUG,
+ trans->tas_features, 0, 0, 1, crpc);
+ if (rc != 0) {
+ CERROR("Out of memory\n");
+ break;
+ }
+
+ drq = &crpc->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst;
+
+ drq->dbg_sid = console_session.ses_id;
+ drq->dbg_flags = 0;
+
+ lstcon_rpc_trans_addreq(trans, crpc);
+ lstcon_rpc_post(crpc);
+
+ count ++;
+ }
+
+ if (console_session.ses_expired) {
+ mutex_unlock(&console_session.ses_mutex);
+ return;
+ }
+
+ CDEBUG(D_NET, "Ping %d nodes in session\n", count);
+
+ ptimer->stt_expires = (cfs_time_t)(cfs_time_current_sec() + LST_PING_INTERVAL);
+ stt_add_timer(ptimer);
+
+ mutex_unlock(&console_session.ses_mutex);
+}
+
+int
+lstcon_rpc_pinger_start(void)
+{
+ stt_timer_t *ptimer;
+ int rc;
+
+ LASSERT (list_empty(&console_session.ses_rpc_freelist));
+ LASSERT (atomic_read(&console_session.ses_rpc_counter) == 0);
+
+ rc = lstcon_rpc_trans_prep(NULL, LST_TRANS_SESPING,
+ &console_session.ses_ping);
+ if (rc != 0) {
+ CERROR("Failed to create console pinger\n");
+ return rc;
+ }
+
+ ptimer = &console_session.ses_ping_timer;
+ ptimer->stt_expires = (cfs_time_t)(cfs_time_current_sec() + LST_PING_INTERVAL);
+
+ stt_add_timer(ptimer);
+
+ return 0;
+}
+
+void
+lstcon_rpc_pinger_stop(void)
+{
+ LASSERT (console_session.ses_shutdown);
+
+ stt_del_timer(&console_session.ses_ping_timer);
+
+ lstcon_rpc_trans_abort(console_session.ses_ping, -ESHUTDOWN);
+ lstcon_rpc_trans_stat(console_session.ses_ping, lstcon_trans_stat());
+ lstcon_rpc_trans_destroy(console_session.ses_ping);
+
+ memset(lstcon_trans_stat(), 0, sizeof(lstcon_trans_stat_t));
+
+ console_session.ses_ping = NULL;
+}
+
+void
+lstcon_rpc_cleanup_wait(void)
+{
+ lstcon_rpc_trans_t *trans;
+ lstcon_rpc_t *crpc;
+ struct list_head *pacer;
+ struct list_head zlist;
+
+ /* Called with hold of global mutex */
+
+ LASSERT (console_session.ses_shutdown);
+
+ while (!list_empty(&console_session.ses_trans_list)) {
+ list_for_each(pacer, &console_session.ses_trans_list) {
+ trans = list_entry(pacer, lstcon_rpc_trans_t,
+ tas_link);
+
+ CDEBUG(D_NET, "Session closed, wakeup transaction %s\n",
+ lstcon_rpc_trans_name(trans->tas_opc));
+
+ wake_up(&trans->tas_waitq);
+ }
+
+ mutex_unlock(&console_session.ses_mutex);
+
+ CWARN("Session is shutting down, "
+ "waiting for termination of transactions\n");
+ cfs_pause(cfs_time_seconds(1));
+
+ mutex_lock(&console_session.ses_mutex);
+ }
+
+ spin_lock(&console_session.ses_rpc_lock);
+
+ lst_wait_until((atomic_read(&console_session.ses_rpc_counter) == 0),
+ console_session.ses_rpc_lock,
+ "Network is not accessable or target is down, "
+ "waiting for %d console RPCs to being recycled\n",
+ atomic_read(&console_session.ses_rpc_counter));
+
+ list_add(&zlist, &console_session.ses_rpc_freelist);
+ list_del_init(&console_session.ses_rpc_freelist);
+
+ spin_unlock(&console_session.ses_rpc_lock);
+
+ while (!list_empty(&zlist)) {
+ crpc = list_entry(zlist.next, lstcon_rpc_t, crp_link);
+
+ list_del(&crpc->crp_link);
+ LIBCFS_FREE(crpc, sizeof(lstcon_rpc_t));
+ }
+}
+
+int
+lstcon_rpc_module_init(void)
+{
+ INIT_LIST_HEAD(&console_session.ses_ping_timer.stt_list);
+ console_session.ses_ping_timer.stt_func = lstcon_rpc_pinger;
+ console_session.ses_ping_timer.stt_data = &console_session.ses_ping_timer;
+
+ console_session.ses_ping = NULL;
+
+ spin_lock_init(&console_session.ses_rpc_lock);
+ atomic_set(&console_session.ses_rpc_counter, 0);
+ INIT_LIST_HEAD(&console_session.ses_rpc_freelist);
+
+ return 0;
+}
+
+void
+lstcon_rpc_module_fini(void)
+{
+ LASSERT (list_empty(&console_session.ses_rpc_freelist));
+ LASSERT (atomic_read(&console_session.ses_rpc_counter) == 0);
+}
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.h b/drivers/staging/lustre/lnet/selftest/conrpc.h
new file mode 100644
index 000000000000..9aba24a2eab9
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.h
@@ -0,0 +1,146 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * /lnet/selftest/conrpc.h
+ *
+ * Console rpc
+ *
+ * Author: Liang Zhen <liang@whamcloud.com>
+ */
+
+#ifndef __LST_CONRPC_H__
+#define __LST_CONRPC_H__
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/lnet.h>
+#include <linux/lnet/lib-types.h>
+#include <linux/lnet/lnetst.h>
+#include "rpc.h"
+#include "selftest.h"
+
+/* Console rpc and rpc transaction */
+#define LST_TRANS_TIMEOUT 30
+#define LST_TRANS_MIN_TIMEOUT 3
+
+#define LST_VALIDATE_TIMEOUT(t) MIN(MAX(t, LST_TRANS_MIN_TIMEOUT), LST_TRANS_TIMEOUT)
+
+#define LST_PING_INTERVAL 8
+
+struct lstcon_rpc_trans;
+struct lstcon_tsb_hdr;
+struct lstcon_test;
+struct lstcon_node;
+
+typedef struct lstcon_rpc {
+ struct list_head crp_link; /* chain on rpc transaction */
+ srpc_client_rpc_t *crp_rpc; /* client rpc */
+ struct lstcon_node *crp_node; /* destination node */
+ struct lstcon_rpc_trans *crp_trans; /* conrpc transaction */
+
+ unsigned int crp_posted:1; /* rpc is posted */
+ unsigned int crp_finished:1; /* rpc is finished */
+ unsigned int crp_unpacked:1; /* reply is unpacked */
+ /** RPC is embedded in other structure and can't free it */
+ unsigned int crp_embedded:1;
+ int crp_status; /* console rpc errors */
+ cfs_time_t crp_stamp; /* replied time stamp */
+} lstcon_rpc_t;
+
+typedef struct lstcon_rpc_trans {
+ struct list_head tas_olink; /* link chain on owner list */
+ struct list_head tas_link; /* link chain on global list */
+ int tas_opc; /* operation code of transaction */
+ /* features mask is uptodate */
+ unsigned tas_feats_updated;
+ /* test features mask */
+ unsigned tas_features;
+ wait_queue_head_t tas_waitq; /* wait queue head */
+ atomic_t tas_remaining; /* # of un-scheduled rpcs */
+ struct list_head tas_rpcs_list; /* queued requests */
+} lstcon_rpc_trans_t;
+
+#define LST_TRANS_PRIVATE 0x1000
+
+#define LST_TRANS_SESNEW (LST_TRANS_PRIVATE | 0x01)
+#define LST_TRANS_SESEND (LST_TRANS_PRIVATE | 0x02)
+#define LST_TRANS_SESQRY 0x03
+#define LST_TRANS_SESPING 0x04
+
+#define LST_TRANS_TSBCLIADD (LST_TRANS_PRIVATE | 0x11)
+#define LST_TRANS_TSBSRVADD (LST_TRANS_PRIVATE | 0x12)
+#define LST_TRANS_TSBRUN (LST_TRANS_PRIVATE | 0x13)
+#define LST_TRANS_TSBSTOP (LST_TRANS_PRIVATE | 0x14)
+#define LST_TRANS_TSBCLIQRY 0x15
+#define LST_TRANS_TSBSRVQRY 0x16
+
+#define LST_TRANS_STATQRY 0x21
+
+typedef int (* lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *);
+typedef int (* lstcon_rpc_readent_func_t)(int, srpc_msg_t *, lstcon_rpc_ent_t *);
+
+int lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
+ unsigned version, lstcon_rpc_t **crpc);
+int lstcon_dbgrpc_prep(struct lstcon_node *nd,
+ unsigned version, lstcon_rpc_t **crpc);
+int lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned version,
+ struct lstcon_tsb_hdr *tsb, lstcon_rpc_t **crpc);
+int lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned version,
+ struct lstcon_test *test, lstcon_rpc_t **crpc);
+int lstcon_statrpc_prep(struct lstcon_node *nd, unsigned version,
+ lstcon_rpc_t **crpc);
+void lstcon_rpc_put(lstcon_rpc_t *crpc);
+int lstcon_rpc_trans_prep(struct list_head *translist,
+ int transop, lstcon_rpc_trans_t **transpp);
+int lstcon_rpc_trans_ndlist(struct list_head *ndlist,
+ struct list_head *translist, int transop,
+ void *arg, lstcon_rpc_cond_func_t condition,
+ lstcon_rpc_trans_t **transpp);
+void lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans,
+ lstcon_trans_stat_t *stat);
+int lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans,
+ struct list_head *head_up,
+ lstcon_rpc_readent_func_t readent);
+void lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error);
+void lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans);
+void lstcon_rpc_trans_addreq(lstcon_rpc_trans_t *trans, lstcon_rpc_t *req);
+int lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout);
+int lstcon_rpc_pinger_start(void);
+void lstcon_rpc_pinger_stop(void);
+void lstcon_rpc_cleanup_wait(void);
+int lstcon_rpc_module_init(void);
+void lstcon_rpc_module_fini(void);
+
+
+#endif
diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c
new file mode 100644
index 000000000000..78e8d0467267
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/console.c
@@ -0,0 +1,2071 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/selftest/conctl.c
+ *
+ * Infrastructure of LST console
+ *
+ * Author: Liang Zhen <liangzhen@clusterfs.com>
+ */
+
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/lib-lnet.h>
+#include "console.h"
+#include "conrpc.h"
+
+#define LST_NODE_STATE_COUNTER(nd, p) \
+do { \
+ if ((nd)->nd_state == LST_NODE_ACTIVE) \
+ (p)->nle_nactive ++; \
+ else if ((nd)->nd_state == LST_NODE_BUSY) \
+ (p)->nle_nbusy ++; \
+ else if ((nd)->nd_state == LST_NODE_DOWN) \
+ (p)->nle_ndown ++; \
+ else \
+ (p)->nle_nunknown ++; \
+ (p)->nle_nnode ++; \
+} while (0)
+
+lstcon_session_t console_session;
+
+void
+lstcon_node_get(lstcon_node_t *nd)
+{
+ LASSERT (nd->nd_ref >= 1);
+
+ nd->nd_ref++;
+}
+
+static int
+lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create)
+{
+ lstcon_ndlink_t *ndl;
+ unsigned int idx = LNET_NIDADDR(id.nid) % LST_GLOBAL_HASHSIZE;
+
+ LASSERT (id.nid != LNET_NID_ANY);
+
+ list_for_each_entry(ndl, &console_session.ses_ndl_hash[idx], ndl_hlink) {
+ if (ndl->ndl_node->nd_id.nid != id.nid ||
+ ndl->ndl_node->nd_id.pid != id.pid)
+ continue;
+
+ lstcon_node_get(ndl->ndl_node);
+ *ndpp = ndl->ndl_node;
+ return 0;
+ }
+
+ if (!create)
+ return -ENOENT;
+
+ LIBCFS_ALLOC(*ndpp, sizeof(lstcon_node_t) + sizeof(lstcon_ndlink_t));
+ if (*ndpp == NULL)
+ return -ENOMEM;
+
+ ndl = (lstcon_ndlink_t *)(*ndpp + 1);
+
+ ndl->ndl_node = *ndpp;
+
+ ndl->ndl_node->nd_ref = 1;
+ ndl->ndl_node->nd_id = id;
+ ndl->ndl_node->nd_stamp = cfs_time_current();
+ ndl->ndl_node->nd_state = LST_NODE_UNKNOWN;
+ ndl->ndl_node->nd_timeout = 0;
+ memset(&ndl->ndl_node->nd_ping, 0, sizeof(lstcon_rpc_t));
+
+ /* queued in global hash & list, no refcount is taken by
+ * global hash & list, if caller release his refcount,
+ * node will be released */
+ list_add_tail(&ndl->ndl_hlink, &console_session.ses_ndl_hash[idx]);
+ list_add_tail(&ndl->ndl_link, &console_session.ses_ndl_list);
+
+ return 0;
+}
+
+void
+lstcon_node_put(lstcon_node_t *nd)
+{
+ lstcon_ndlink_t *ndl;
+
+ LASSERT (nd->nd_ref > 0);
+
+ if (--nd->nd_ref > 0)
+ return;
+
+ ndl = (lstcon_ndlink_t *)(nd + 1);
+
+ LASSERT (!list_empty(&ndl->ndl_link));
+ LASSERT (!list_empty(&ndl->ndl_hlink));
+
+ /* remove from session */
+ list_del(&ndl->ndl_link);
+ list_del(&ndl->ndl_hlink);
+
+ LIBCFS_FREE(nd, sizeof(lstcon_node_t) + sizeof(lstcon_ndlink_t));
+}
+
+static int
+lstcon_ndlink_find(struct list_head *hash,
+ lnet_process_id_t id, lstcon_ndlink_t **ndlpp, int create)
+{
+ unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
+ lstcon_ndlink_t *ndl;
+ lstcon_node_t *nd;
+ int rc;
+
+ if (id.nid == LNET_NID_ANY)
+ return -EINVAL;
+
+ /* search in hash */
+ list_for_each_entry(ndl, &hash[idx], ndl_hlink) {
+ if (ndl->ndl_node->nd_id.nid != id.nid ||
+ ndl->ndl_node->nd_id.pid != id.pid)
+ continue;
+
+ *ndlpp = ndl;
+ return 0;
+ }
+
+ if (create == 0)
+ return -ENOENT;
+
+ /* find or create in session hash */
+ rc = lstcon_node_find(id, &nd, (create == 1) ? 1 : 0);
+ if (rc != 0)
+ return rc;
+
+ LIBCFS_ALLOC(ndl, sizeof(lstcon_ndlink_t));
+ if (ndl == NULL) {
+ lstcon_node_put(nd);
+ return -ENOMEM;
+ }
+
+ *ndlpp = ndl;
+
+ ndl->ndl_node = nd;
+ INIT_LIST_HEAD(&ndl->ndl_link);
+ list_add_tail(&ndl->ndl_hlink, &hash[idx]);
+
+ return 0;
+}
+
+static void
+lstcon_ndlink_release(lstcon_ndlink_t *ndl)
+{
+ LASSERT (list_empty(&ndl->ndl_link));
+ LASSERT (!list_empty(&ndl->ndl_hlink));
+
+ list_del(&ndl->ndl_hlink); /* delete from hash */
+ lstcon_node_put(ndl->ndl_node);
+
+ LIBCFS_FREE(ndl, sizeof(*ndl));
+}
+
+static int
+lstcon_group_alloc(char *name, lstcon_group_t **grpp)
+{
+ lstcon_group_t *grp;
+ int i;
+
+ LIBCFS_ALLOC(grp, offsetof(lstcon_group_t,
+ grp_ndl_hash[LST_NODE_HASHSIZE]));
+ if (grp == NULL)
+ return -ENOMEM;
+
+ memset(grp, 0, offsetof(lstcon_group_t,
+ grp_ndl_hash[LST_NODE_HASHSIZE]));
+
+ grp->grp_ref = 1;
+ if (name != NULL)
+ strcpy(grp->grp_name, name);
+
+ INIT_LIST_HEAD(&grp->grp_link);
+ INIT_LIST_HEAD(&grp->grp_ndl_list);
+ INIT_LIST_HEAD(&grp->grp_trans_list);
+
+ for (i = 0; i < LST_NODE_HASHSIZE; i++)
+ INIT_LIST_HEAD(&grp->grp_ndl_hash[i]);
+
+ *grpp = grp;
+
+ return 0;
+}
+
+static void
+lstcon_group_addref(lstcon_group_t *grp)
+{
+ grp->grp_ref ++;
+}
+
+static void lstcon_group_ndlink_release(lstcon_group_t *, lstcon_ndlink_t *);
+
+static void
+lstcon_group_drain(lstcon_group_t *grp, int keep)
+{
+ lstcon_ndlink_t *ndl;
+ lstcon_ndlink_t *tmp;
+
+ list_for_each_entry_safe(ndl, tmp, &grp->grp_ndl_list, ndl_link) {
+ if ((ndl->ndl_node->nd_state & keep) == 0)
+ lstcon_group_ndlink_release(grp, ndl);
+ }
+}
+
+static void
+lstcon_group_decref(lstcon_group_t *grp)
+{
+ int i;
+
+ if (--grp->grp_ref > 0)
+ return;
+
+ if (!list_empty(&grp->grp_link))
+ list_del(&grp->grp_link);
+
+ lstcon_group_drain(grp, 0);
+
+ for (i = 0; i < LST_NODE_HASHSIZE; i++) {
+ LASSERT (list_empty(&grp->grp_ndl_hash[i]));
+ }
+
+ LIBCFS_FREE(grp, offsetof(lstcon_group_t,
+ grp_ndl_hash[LST_NODE_HASHSIZE]));
+}
+
+static int
+lstcon_group_find(char *name, lstcon_group_t **grpp)
+{
+ lstcon_group_t *grp;
+
+ list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
+ if (strncmp(grp->grp_name, name, LST_NAME_SIZE) != 0)
+ continue;
+
+ lstcon_group_addref(grp); /* +1 ref for caller */
+ *grpp = grp;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static void
+lstcon_group_put(lstcon_group_t *grp)
+{
+ lstcon_group_decref(grp);
+}
+
+static int
+lstcon_group_ndlink_find(lstcon_group_t *grp, lnet_process_id_t id,
+ lstcon_ndlink_t **ndlpp, int create)
+{
+ int rc;
+
+ rc = lstcon_ndlink_find(&grp->grp_ndl_hash[0], id, ndlpp, create);
+ if (rc != 0)
+ return rc;
+
+ if (!list_empty(&(*ndlpp)->ndl_link))
+ return 0;
+
+ list_add_tail(&(*ndlpp)->ndl_link, &grp->grp_ndl_list);
+ grp->grp_nnode ++;
+
+ return 0;
+}
+
+static void
+lstcon_group_ndlink_release(lstcon_group_t *grp, lstcon_ndlink_t *ndl)
+{
+ list_del_init(&ndl->ndl_link);
+ lstcon_ndlink_release(ndl);
+ grp->grp_nnode --;
+}
+
+static void
+lstcon_group_ndlink_move(lstcon_group_t *old,
+ lstcon_group_t *new, lstcon_ndlink_t *ndl)
+{
+ unsigned int idx = LNET_NIDADDR(ndl->ndl_node->nd_id.nid) %
+ LST_NODE_HASHSIZE;
+
+ list_del(&ndl->ndl_hlink);
+ list_del(&ndl->ndl_link);
+ old->grp_nnode --;
+
+ list_add_tail(&ndl->ndl_hlink, &new->grp_ndl_hash[idx]);
+ list_add_tail(&ndl->ndl_link, &new->grp_ndl_list);
+ new->grp_nnode ++;
+
+ return;
+}
+
+static void
+lstcon_group_move(lstcon_group_t *old, lstcon_group_t *new)
+{
+ lstcon_ndlink_t *ndl;
+
+ while (!list_empty(&old->grp_ndl_list)) {
+ ndl = list_entry(old->grp_ndl_list.next,
+ lstcon_ndlink_t, ndl_link);
+ lstcon_group_ndlink_move(old, new, ndl);
+ }
+}
+
+int
+lstcon_sesrpc_condition(int transop, lstcon_node_t *nd, void *arg)
+{
+ lstcon_group_t *grp = (lstcon_group_t *)arg;
+
+ switch (transop) {
+ case LST_TRANS_SESNEW:
+ if (nd->nd_state == LST_NODE_ACTIVE)
+ return 0;
+ break;
+
+ case LST_TRANS_SESEND:
+ if (nd->nd_state != LST_NODE_ACTIVE)
+ return 0;
+
+ if (grp != NULL && nd->nd_ref > 1)
+ return 0;
+ break;
+
+ case LST_TRANS_SESQRY:
+ break;
+
+ default:
+ LBUG();
+ }
+
+ return 1;
+}
+
+int
+lstcon_sesrpc_readent(int transop, srpc_msg_t *msg,
+ lstcon_rpc_ent_t *ent_up)
+{
+ srpc_debug_reply_t *rep;
+
+ switch (transop) {
+ case LST_TRANS_SESNEW:
+ case LST_TRANS_SESEND:
+ return 0;
+
+ case LST_TRANS_SESQRY:
+ rep = &msg->msg_body.dbg_reply;
+
+ if (copy_to_user(&ent_up->rpe_priv[0],
+ &rep->dbg_timeout, sizeof(int)) ||
+ copy_to_user(&ent_up->rpe_payload[0],
+ &rep->dbg_name, LST_NAME_SIZE))
+ return -EFAULT;
+
+ return 0;
+
+ default:
+ LBUG();
+ }
+
+ return 0;
+}
+
+static int
+lstcon_group_nodes_add(lstcon_group_t *grp,
+ int count, lnet_process_id_t *ids_up,
+ unsigned *featp, struct list_head *result_up)
+{
+ lstcon_rpc_trans_t *trans;
+ lstcon_ndlink_t *ndl;
+ lstcon_group_t *tmp;
+ lnet_process_id_t id;
+ int i;
+ int rc;
+
+ rc = lstcon_group_alloc(NULL, &tmp);
+ if (rc != 0) {
+ CERROR("Out of memory\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0 ; i < count; i++) {
+ if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
+ rc = -EFAULT;
+ break;
+ }
+
+ /* skip if it's in this group already */
+ rc = lstcon_group_ndlink_find(grp, id, &ndl, 0);
+ if (rc == 0)
+ continue;
+
+ /* add to tmp group */
+ rc = lstcon_group_ndlink_find(tmp, id, &ndl, 1);
+ if (rc != 0) {
+ CERROR("Can't create ndlink, out of memory\n");
+ break;
+ }
+ }
+
+ if (rc != 0) {
+ lstcon_group_put(tmp);
+ return rc;
+ }
+
+ rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
+ &tmp->grp_trans_list, LST_TRANS_SESNEW,
+ tmp, lstcon_sesrpc_condition, &trans);
+ if (rc != 0) {
+ CERROR("Can't create transaction: %d\n", rc);
+ lstcon_group_put(tmp);
+ return rc;
+ }
+
+ /* post all RPCs */
+ lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
+
+ rc = lstcon_rpc_trans_interpreter(trans, result_up,
+ lstcon_sesrpc_readent);
+ *featp = trans->tas_features;
+
+ /* destroy all RPGs */
+ lstcon_rpc_trans_destroy(trans);
+
+ lstcon_group_move(tmp, grp);
+ lstcon_group_put(tmp);
+
+ return rc;
+}
+
+static int
+lstcon_group_nodes_remove(lstcon_group_t *grp,
+ int count, lnet_process_id_t *ids_up,
+ struct list_head *result_up)
+{
+ lstcon_rpc_trans_t *trans;
+ lstcon_ndlink_t *ndl;
+ lstcon_group_t *tmp;
+ lnet_process_id_t id;
+ int rc;
+ int i;
+
+ /* End session and remove node from the group */
+
+ rc = lstcon_group_alloc(NULL, &tmp);
+ if (rc != 0) {
+ CERROR("Out of memory\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < count; i++) {
+ if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
+ rc = -EFAULT;
+ goto error;
+ }
+
+ /* move node to tmp group */
+ if (lstcon_group_ndlink_find(grp, id, &ndl, 0) == 0)
+ lstcon_group_ndlink_move(grp, tmp, ndl);
+ }
+
+ rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list,
+ &tmp->grp_trans_list, LST_TRANS_SESEND,
+ tmp, lstcon_sesrpc_condition, &trans);
+ if (rc != 0) {
+ CERROR("Can't create transaction: %d\n", rc);
+ goto error;
+ }
+
+ lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
+
+ rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
+
+ lstcon_rpc_trans_destroy(trans);
+ /* release nodes anyway, because we can't rollback status */
+ lstcon_group_put(tmp);
+
+ return rc;
+error:
+ lstcon_group_move(tmp, grp);
+ lstcon_group_put(tmp);
+
+ return rc;
+}
+
+int
+lstcon_group_add(char *name)
+{
+ lstcon_group_t *grp;
+ int rc;
+
+ rc = (lstcon_group_find(name, &grp) == 0)? -EEXIST: 0;
+ if (rc != 0) {
+ /* find a group with same name */
+ lstcon_group_put(grp);
+ return rc;
+ }
+
+ rc = lstcon_group_alloc(name, &grp);
+ if (rc != 0) {
+ CERROR("Can't allocate descriptor for group %s\n", name);
+ return -ENOMEM;
+ }
+
+ list_add_tail(&grp->grp_link, &console_session.ses_grp_list);
+
+ return rc;
+}
+
+int
+lstcon_nodes_add(char *name, int count, lnet_process_id_t *ids_up,
+ unsigned *featp, struct list_head *result_up)
+{
+ lstcon_group_t *grp;
+ int rc;
+
+ LASSERT (count > 0);
+ LASSERT (ids_up != NULL);
+
+ rc = lstcon_group_find(name, &grp);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't find group %s\n", name);
+ return rc;
+ }
+
+ if (grp->grp_ref > 2) {
+ /* referred by other threads or test */
+ CDEBUG(D_NET, "Group %s is busy\n", name);
+ lstcon_group_put(grp);
+
+ return -EBUSY;
+ }
+
+ rc = lstcon_group_nodes_add(grp, count, ids_up, featp, result_up);
+
+ lstcon_group_put(grp);
+
+ return rc;
+}
+
+int
+lstcon_group_del(char *name)
+{
+ lstcon_rpc_trans_t *trans;
+ lstcon_group_t *grp;
+ int rc;
+
+ rc = lstcon_group_find(name, &grp);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't find group: %s\n", name);
+ return rc;
+ }
+
+ if (grp->grp_ref > 2) {
+ /* referred by others threads or test */
+ CDEBUG(D_NET, "Group %s is busy\n", name);
+ lstcon_group_put(grp);
+ return -EBUSY;
+ }
+
+ rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
+ &grp->grp_trans_list, LST_TRANS_SESEND,
+ grp, lstcon_sesrpc_condition, &trans);
+ if (rc != 0) {
+ CERROR("Can't create transaction: %d\n", rc);
+ lstcon_group_put(grp);
+ return rc;
+ }
+
+ lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
+
+ lstcon_rpc_trans_destroy(trans);
+
+ lstcon_group_put(grp);
+ /* -ref for session, it's destroyed,
+ * status can't be rolled back, destroy group anway */
+ lstcon_group_put(grp);
+
+ return rc;
+}
+
+int
+lstcon_group_clean(char *name, int args)
+{
+ lstcon_group_t *grp = NULL;
+ int rc;
+
+ rc = lstcon_group_find(name, &grp);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't find group %s\n", name);
+ return rc;
+ }
+
+ if (grp->grp_ref > 2) {
+ /* referred by test */
+ CDEBUG(D_NET, "Group %s is busy\n", name);
+ lstcon_group_put(grp);
+ return -EBUSY;
+ }
+
+ args = (LST_NODE_ACTIVE | LST_NODE_BUSY |
+ LST_NODE_DOWN | LST_NODE_UNKNOWN) & ~args;
+
+ lstcon_group_drain(grp, args);
+
+ lstcon_group_put(grp);
+ /* release empty group */
+ if (list_empty(&grp->grp_ndl_list))
+ lstcon_group_put(grp);
+
+ return 0;
+}
+
+int
+lstcon_nodes_remove(char *name, int count,
+ lnet_process_id_t *ids_up, struct list_head *result_up)
+{
+ lstcon_group_t *grp = NULL;
+ int rc;
+
+ rc = lstcon_group_find(name, &grp);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't find group: %s\n", name);
+ return rc;
+ }
+
+ if (grp->grp_ref > 2) {
+ /* referred by test */
+ CDEBUG(D_NET, "Group %s is busy\n", name);
+ lstcon_group_put(grp);
+ return -EBUSY;
+ }
+
+ rc = lstcon_group_nodes_remove(grp, count, ids_up, result_up);
+
+ lstcon_group_put(grp);
+ /* release empty group */
+ if (list_empty(&grp->grp_ndl_list))
+ lstcon_group_put(grp);
+
+ return rc;
+}
+
+int
+lstcon_group_refresh(char *name, struct list_head *result_up)
+{
+ lstcon_rpc_trans_t *trans;
+ lstcon_group_t *grp;
+ int rc;
+
+ rc = lstcon_group_find(name, &grp);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't find group: %s\n", name);
+ return rc;
+ }
+
+ if (grp->grp_ref > 2) {
+ /* referred by test */
+ CDEBUG(D_NET, "Group %s is busy\n", name);
+ lstcon_group_put(grp);
+ return -EBUSY;
+ }
+
+ /* re-invite all inactive nodes int the group */
+ rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
+ &grp->grp_trans_list, LST_TRANS_SESNEW,
+ grp, lstcon_sesrpc_condition, &trans);
+ if (rc != 0) {
+ /* local error, return */
+ CDEBUG(D_NET, "Can't create transaction: %d\n", rc);
+ lstcon_group_put(grp);
+ return rc;
+ }
+
+ lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
+
+ rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
+
+ lstcon_rpc_trans_destroy(trans);
+ /* -ref for me */
+ lstcon_group_put(grp);
+
+ return rc;
+}
+
+int
+lstcon_group_list(int index, int len, char *name_up)
+{
+ lstcon_group_t *grp;
+
+ LASSERT (index >= 0);
+ LASSERT (name_up != NULL);
+
+ list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
+ if (index-- == 0) {
+ return copy_to_user(name_up, grp->grp_name, len) ?
+ -EFAULT : 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int
+lstcon_nodes_getent(struct list_head *head, int *index_p,
+ int *count_p, lstcon_node_ent_t *dents_up)
+{
+ lstcon_ndlink_t *ndl;
+ lstcon_node_t *nd;
+ int count = 0;
+ int index = 0;
+
+ LASSERT (index_p != NULL && count_p != NULL);
+ LASSERT (dents_up != NULL);
+ LASSERT (*index_p >= 0);
+ LASSERT (*count_p > 0);
+
+ list_for_each_entry(ndl, head, ndl_link) {
+ if (index++ < *index_p)
+ continue;
+
+ if (count >= *count_p)
+ break;
+
+ nd = ndl->ndl_node;
+ if (copy_to_user(&dents_up[count].nde_id,
+ &nd->nd_id, sizeof(nd->nd_id)) ||
+ copy_to_user(&dents_up[count].nde_state,
+ &nd->nd_state, sizeof(nd->nd_state)))
+ return -EFAULT;
+
+ count ++;
+ }
+
+ if (index <= *index_p)
+ return -ENOENT;
+
+ *count_p = count;
+ *index_p = index;
+
+ return 0;
+}
+
+int
+lstcon_group_info(char *name, lstcon_ndlist_ent_t *gents_p,
+ int *index_p, int *count_p, lstcon_node_ent_t *dents_up)
+{
+ lstcon_ndlist_ent_t *gentp;
+ lstcon_group_t *grp;
+ lstcon_ndlink_t *ndl;
+ int rc;
+
+ rc = lstcon_group_find(name, &grp);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't find group %s\n", name);
+ return rc;
+ }
+
+ if (dents_up != 0) {
+ /* verbose query */
+ rc = lstcon_nodes_getent(&grp->grp_ndl_list,
+ index_p, count_p, dents_up);
+ lstcon_group_put(grp);
+
+ return rc;
+ }
+
+ /* non-verbose query */
+ LIBCFS_ALLOC(gentp, sizeof(lstcon_ndlist_ent_t));
+ if (gentp == NULL) {
+ CERROR("Can't allocate ndlist_ent\n");
+ lstcon_group_put(grp);
+
+ return -ENOMEM;
+ }
+
+ memset(gentp, 0, sizeof(lstcon_ndlist_ent_t));
+
+ list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link)
+ LST_NODE_STATE_COUNTER(ndl->ndl_node, gentp);
+
+ rc = copy_to_user(gents_p, gentp,
+ sizeof(lstcon_ndlist_ent_t)) ? -EFAULT: 0;
+
+ LIBCFS_FREE(gentp, sizeof(lstcon_ndlist_ent_t));
+
+ lstcon_group_put(grp);
+
+ return 0;
+}
+
+int
+lstcon_batch_find(char *name, lstcon_batch_t **batpp)
+{
+ lstcon_batch_t *bat;
+
+ list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
+ if (strncmp(bat->bat_name, name, LST_NAME_SIZE) == 0) {
+ *batpp = bat;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+int
+lstcon_batch_add(char *name)
+{
+ lstcon_batch_t *bat;
+ int i;
+ int rc;
+
+ rc = (lstcon_batch_find(name, &bat) == 0)? -EEXIST: 0;
+ if (rc != 0) {
+ CDEBUG(D_NET, "Batch %s already exists\n", name);
+ return rc;
+ }
+
+ LIBCFS_ALLOC(bat, sizeof(lstcon_batch_t));
+ if (bat == NULL) {
+ CERROR("Can't allocate descriptor for batch %s\n", name);
+ return -ENOMEM;
+ }
+
+ LIBCFS_ALLOC(bat->bat_cli_hash,
+ sizeof(struct list_head) * LST_NODE_HASHSIZE);
+ if (bat->bat_cli_hash == NULL) {
+ CERROR("Can't allocate hash for batch %s\n", name);
+ LIBCFS_FREE(bat, sizeof(lstcon_batch_t));
+
+ return -ENOMEM;
+ }
+
+ LIBCFS_ALLOC(bat->bat_srv_hash,
+ sizeof(struct list_head) * LST_NODE_HASHSIZE);
+ if (bat->bat_srv_hash == NULL) {
+ CERROR("Can't allocate hash for batch %s\n", name);
+ LIBCFS_FREE(bat->bat_cli_hash, LST_NODE_HASHSIZE);
+ LIBCFS_FREE(bat, sizeof(lstcon_batch_t));
+
+ return -ENOMEM;
+ }
+
+ strcpy(bat->bat_name, name);
+ bat->bat_hdr.tsb_index = 0;
+ bat->bat_hdr.tsb_id.bat_id = ++console_session.ses_id_cookie;
+
+ bat->bat_ntest = 0;
+ bat->bat_state = LST_BATCH_IDLE;
+
+ INIT_LIST_HEAD(&bat->bat_cli_list);
+ INIT_LIST_HEAD(&bat->bat_srv_list);
+ INIT_LIST_HEAD(&bat->bat_test_list);
+ INIT_LIST_HEAD(&bat->bat_trans_list);
+
+ for (i = 0; i < LST_NODE_HASHSIZE; i++) {
+ INIT_LIST_HEAD(&bat->bat_cli_hash[i]);
+ INIT_LIST_HEAD(&bat->bat_srv_hash[i]);
+ }
+
+ list_add_tail(&bat->bat_link, &console_session.ses_bat_list);
+
+ return rc;
+}
+
+int
+lstcon_batch_list(int index, int len, char *name_up)
+{
+ lstcon_batch_t *bat;
+
+ LASSERT (name_up != NULL);
+ LASSERT (index >= 0);
+
+ list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
+ if (index-- == 0) {
+ return copy_to_user(name_up,bat->bat_name, len) ?
+ -EFAULT: 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+int
+lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up, int server,
+ int testidx, int *index_p, int *ndent_p,
+ lstcon_node_ent_t *dents_up)
+{
+ lstcon_test_batch_ent_t *entp;
+ struct list_head *clilst;
+ struct list_head *srvlst;
+ lstcon_test_t *test = NULL;
+ lstcon_batch_t *bat;
+ lstcon_ndlink_t *ndl;
+ int rc;
+
+ rc = lstcon_batch_find(name, &bat);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't find batch %s\n", name);
+ return -ENOENT;
+ }
+
+ if (testidx > 0) {
+ /* query test, test index start from 1 */
+ list_for_each_entry(test, &bat->bat_test_list, tes_link) {
+ if (testidx-- == 1)
+ break;
+ }
+
+ if (testidx > 0) {
+ CDEBUG(D_NET, "Can't find specified test in batch\n");
+ return -ENOENT;
+ }
+ }
+
+ clilst = (test == NULL) ? &bat->bat_cli_list :
+ &test->tes_src_grp->grp_ndl_list;
+ srvlst = (test == NULL) ? &bat->bat_srv_list :
+ &test->tes_dst_grp->grp_ndl_list;
+
+ if (dents_up != NULL) {
+ rc = lstcon_nodes_getent((server ? srvlst: clilst),
+ index_p, ndent_p, dents_up);
+ return rc;
+ }
+
+ /* non-verbose query */
+ LIBCFS_ALLOC(entp, sizeof(lstcon_test_batch_ent_t));
+ if (entp == NULL)
+ return -ENOMEM;
+
+ memset(entp, 0, sizeof(lstcon_test_batch_ent_t));
+
+ if (test == NULL) {
+ entp->u.tbe_batch.bae_ntest = bat->bat_ntest;
+ entp->u.tbe_batch.bae_state = bat->bat_state;
+
+ } else {
+
+ entp->u.tbe_test.tse_type = test->tes_type;
+ entp->u.tbe_test.tse_loop = test->tes_loop;
+ entp->u.tbe_test.tse_concur = test->tes_concur;
+ }
+
+ list_for_each_entry(ndl, clilst, ndl_link)
+ LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_cli_nle);
+
+ list_for_each_entry(ndl, srvlst, ndl_link)
+ LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_srv_nle);
+
+ rc = copy_to_user(ent_up, entp,
+ sizeof(lstcon_test_batch_ent_t)) ? -EFAULT : 0;
+
+ LIBCFS_FREE(entp, sizeof(lstcon_test_batch_ent_t));
+
+ return rc;
+}
+
+int
+lstcon_batrpc_condition(int transop, lstcon_node_t *nd, void *arg)
+{
+ switch (transop) {
+ case LST_TRANS_TSBRUN:
+ if (nd->nd_state != LST_NODE_ACTIVE)
+ return -ENETDOWN;
+ break;
+
+ case LST_TRANS_TSBSTOP:
+ if (nd->nd_state != LST_NODE_ACTIVE)
+ return 0;
+ break;
+
+ case LST_TRANS_TSBCLIQRY:
+ case LST_TRANS_TSBSRVQRY:
+ break;
+ }
+
+ return 1;
+}
+
+static int
+lstcon_batch_op(lstcon_batch_t *bat, int transop,
+ struct list_head *result_up)
+{
+ lstcon_rpc_trans_t *trans;
+ int rc;
+
+ rc = lstcon_rpc_trans_ndlist(&bat->bat_cli_list,
+ &bat->bat_trans_list, transop,
+ bat, lstcon_batrpc_condition, &trans);
+ if (rc != 0) {
+ CERROR("Can't create transaction: %d\n", rc);
+ return rc;
+ }
+
+ lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
+
+ rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL);
+
+ lstcon_rpc_trans_destroy(trans);
+
+ return rc;
+}
+
+int
+lstcon_batch_run(char *name, int timeout, struct list_head *result_up)
+{
+ lstcon_batch_t *bat;
+ int rc;
+
+ if (lstcon_batch_find(name, &bat) != 0) {
+ CDEBUG(D_NET, "Can't find batch %s\n", name);
+ return -ENOENT;
+ }
+
+ bat->bat_arg = timeout;
+
+ rc = lstcon_batch_op(bat, LST_TRANS_TSBRUN, result_up);
+
+ /* mark batch as running if it's started in any node */
+ if (lstcon_tsbop_stat_success(lstcon_trans_stat(), 0) != 0)
+ bat->bat_state = LST_BATCH_RUNNING;
+
+ return rc;
+}
+
+int
+lstcon_batch_stop(char *name, int force, struct list_head *result_up)
+{
+ lstcon_batch_t *bat;
+ int rc;
+
+ if (lstcon_batch_find(name, &bat) != 0) {
+ CDEBUG(D_NET, "Can't find batch %s\n", name);
+ return -ENOENT;
+ }
+
+ bat->bat_arg = force;
+
+ rc = lstcon_batch_op(bat, LST_TRANS_TSBSTOP, result_up);
+
+ /* mark batch as stopped if all RPCs finished */
+ if (lstcon_tsbop_stat_failure(lstcon_trans_stat(), 0) == 0)
+ bat->bat_state = LST_BATCH_IDLE;
+
+ return rc;
+}
+
+static void
+lstcon_batch_destroy(lstcon_batch_t *bat)
+{
+ lstcon_ndlink_t *ndl;
+ lstcon_test_t *test;
+ int i;
+
+ list_del(&bat->bat_link);
+
+ while (!list_empty(&bat->bat_test_list)) {
+ test = list_entry(bat->bat_test_list.next,
+ lstcon_test_t, tes_link);
+ LASSERT (list_empty(&test->tes_trans_list));
+
+ list_del(&test->tes_link);
+
+ lstcon_group_put(test->tes_src_grp);
+ lstcon_group_put(test->tes_dst_grp);
+
+ LIBCFS_FREE(test, offsetof(lstcon_test_t,
+ tes_param[test->tes_paramlen]));
+ }
+
+ LASSERT (list_empty(&bat->bat_trans_list));
+
+ while (!list_empty(&bat->bat_cli_list)) {
+ ndl = list_entry(bat->bat_cli_list.next,
+ lstcon_ndlink_t, ndl_link);
+ list_del_init(&ndl->ndl_link);
+
+ lstcon_ndlink_release(ndl);
+ }
+
+ while (!list_empty(&bat->bat_srv_list)) {
+ ndl = list_entry(bat->bat_srv_list.next,
+ lstcon_ndlink_t, ndl_link);
+ list_del_init(&ndl->ndl_link);
+
+ lstcon_ndlink_release(ndl);
+ }
+
+ for (i = 0; i < LST_NODE_HASHSIZE; i++) {
+ LASSERT (list_empty(&bat->bat_cli_hash[i]));
+ LASSERT (list_empty(&bat->bat_srv_hash[i]));
+ }
+
+ LIBCFS_FREE(bat->bat_cli_hash,
+ sizeof(struct list_head) * LST_NODE_HASHSIZE);
+ LIBCFS_FREE(bat->bat_srv_hash,
+ sizeof(struct list_head) * LST_NODE_HASHSIZE);
+ LIBCFS_FREE(bat, sizeof(lstcon_batch_t));
+}
+
+int
+lstcon_testrpc_condition(int transop, lstcon_node_t *nd, void *arg)
+{
+ lstcon_test_t *test;
+ lstcon_batch_t *batch;
+ lstcon_ndlink_t *ndl;
+ struct list_head *hash;
+ struct list_head *head;
+
+ test = (lstcon_test_t *)arg;
+ LASSERT (test != NULL);
+
+ batch = test->tes_batch;
+ LASSERT (batch != NULL);
+
+ if (test->tes_oneside &&
+ transop == LST_TRANS_TSBSRVADD)
+ return 0;
+
+ if (nd->nd_state != LST_NODE_ACTIVE)
+ return -ENETDOWN;
+
+ if (transop == LST_TRANS_TSBCLIADD) {
+ hash = batch->bat_cli_hash;
+ head = &batch->bat_cli_list;
+
+ } else {
+ LASSERT (transop == LST_TRANS_TSBSRVADD);
+
+ hash = batch->bat_srv_hash;
+ head = &batch->bat_srv_list;
+ }
+
+ LASSERT (nd->nd_id.nid != LNET_NID_ANY);
+
+ if (lstcon_ndlink_find(hash, nd->nd_id, &ndl, 1) != 0)
+ return -ENOMEM;
+
+ if (list_empty(&ndl->ndl_link))
+ list_add_tail(&ndl->ndl_link, head);
+
+ return 1;
+}
+
+static int
+lstcon_test_nodes_add(lstcon_test_t *test, struct list_head *result_up)
+{
+ lstcon_rpc_trans_t *trans;
+ lstcon_group_t *grp;
+ int transop;
+ int rc;
+
+ LASSERT (test->tes_src_grp != NULL);
+ LASSERT (test->tes_dst_grp != NULL);
+
+ transop = LST_TRANS_TSBSRVADD;
+ grp = test->tes_dst_grp;
+again:
+ rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list,
+ &test->tes_trans_list, transop,
+ test, lstcon_testrpc_condition, &trans);
+ if (rc != 0) {
+ CERROR("Can't create transaction: %d\n", rc);
+ return rc;
+ }
+
+ lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
+
+ if (lstcon_trans_stat()->trs_rpc_errno != 0 ||
+ lstcon_trans_stat()->trs_fwk_errno != 0) {
+ lstcon_rpc_trans_interpreter(trans, result_up, NULL);
+
+ lstcon_rpc_trans_destroy(trans);
+ /* return if any error */
+ CDEBUG(D_NET, "Failed to add test %s, "
+ "RPC error %d, framework error %d\n",
+ transop == LST_TRANS_TSBCLIADD ? "client" : "server",
+ lstcon_trans_stat()->trs_rpc_errno,
+ lstcon_trans_stat()->trs_fwk_errno);
+
+ return rc;
+ }
+
+ lstcon_rpc_trans_destroy(trans);
+
+ if (transop == LST_TRANS_TSBCLIADD)
+ return rc;
+
+ transop = LST_TRANS_TSBCLIADD;
+ grp = test->tes_src_grp;
+ test->tes_cliidx = 0;
+
+ /* requests to test clients */
+ goto again;
+}
+
+int
+lstcon_test_add(char *name, int type, int loop, int concur,
+ int dist, int span, char *src_name, char * dst_name,
+ void *param, int paramlen, int *retp,
+ struct list_head *result_up)
+{
+ lstcon_group_t *src_grp = NULL;
+ lstcon_group_t *dst_grp = NULL;
+ lstcon_test_t *test = NULL;
+ lstcon_batch_t *batch;
+ int rc;
+
+ rc = lstcon_batch_find(name, &batch);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't find batch %s\n", name);
+ return rc;
+ }
+
+ if (batch->bat_state != LST_BATCH_IDLE) {
+ CDEBUG(D_NET, "Can't change running batch %s\n", name);
+ return rc;
+ }
+
+ rc = lstcon_group_find(src_name, &src_grp);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't find group %s\n", src_name);
+ goto out;
+ }
+
+ rc = lstcon_group_find(dst_name, &dst_grp);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't find group %s\n", dst_name);
+ goto out;
+ }
+
+ if (dst_grp->grp_userland)
+ *retp = 1;
+
+ LIBCFS_ALLOC(test, offsetof(lstcon_test_t, tes_param[paramlen]));
+ if (!test) {
+ CERROR("Can't allocate test descriptor\n");
+ rc = -ENOMEM;
+
+ goto out;
+ }
+
+ memset(test, 0, offsetof(lstcon_test_t, tes_param[paramlen]));
+ test->tes_hdr.tsb_id = batch->bat_hdr.tsb_id;
+ test->tes_batch = batch;
+ test->tes_type = type;
+ test->tes_oneside = 0; /* TODO */
+ test->tes_loop = loop;
+ test->tes_concur = concur;
+ test->tes_stop_onerr = 1; /* TODO */
+ test->tes_span = span;
+ test->tes_dist = dist;
+ test->tes_cliidx = 0; /* just used for creating RPC */
+ test->tes_src_grp = src_grp;
+ test->tes_dst_grp = dst_grp;
+ INIT_LIST_HEAD(&test->tes_trans_list);
+
+ if (param != NULL) {
+ test->tes_paramlen = paramlen;
+ memcpy(&test->tes_param[0], param, paramlen);
+ }
+
+ rc = lstcon_test_nodes_add(test, result_up);
+
+ if (rc != 0)
+ goto out;
+
+ if (lstcon_trans_stat()->trs_rpc_errno != 0 ||
+ lstcon_trans_stat()->trs_fwk_errno != 0)
+ CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type, name);
+
+ /* add to test list anyway, so user can check what's going on */
+ list_add_tail(&test->tes_link, &batch->bat_test_list);
+
+ batch->bat_ntest ++;
+ test->tes_hdr.tsb_index = batch->bat_ntest;
+
+ /* hold groups so nobody can change them */
+ return rc;
+out:
+ if (test != NULL)
+ LIBCFS_FREE(test, offsetof(lstcon_test_t, tes_param[paramlen]));
+
+ if (dst_grp != NULL)
+ lstcon_group_put(dst_grp);
+
+ if (src_grp != NULL)
+ lstcon_group_put(src_grp);
+
+ return rc;
+}
+
+int
+lstcon_test_find(lstcon_batch_t *batch, int idx, lstcon_test_t **testpp)
+{
+ lstcon_test_t *test;
+
+ list_for_each_entry(test, &batch->bat_test_list, tes_link) {
+ if (idx == test->tes_hdr.tsb_index) {
+ *testpp = test;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+int
+lstcon_tsbrpc_readent(int transop, srpc_msg_t *msg,
+ lstcon_rpc_ent_t *ent_up)
+{
+ srpc_batch_reply_t *rep = &msg->msg_body.bat_reply;
+
+ LASSERT (transop == LST_TRANS_TSBCLIQRY ||
+ transop == LST_TRANS_TSBSRVQRY);
+
+ /* positive errno, framework error code */
+ if (copy_to_user(&ent_up->rpe_priv[0],
+ &rep->bar_active, sizeof(rep->bar_active)))
+ return -EFAULT;
+
+ return 0;
+}
+
+int
+lstcon_test_batch_query(char *name, int testidx, int client,
+ int timeout, struct list_head *result_up)
+{
+ lstcon_rpc_trans_t *trans;
+ struct list_head *translist;
+ struct list_head *ndlist;
+ lstcon_tsb_hdr_t *hdr;
+ lstcon_batch_t *batch;
+ lstcon_test_t *test = NULL;
+ int transop;
+ int rc;
+
+ rc = lstcon_batch_find(name, &batch);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't find batch: %s\n", name);
+ return rc;
+ }
+
+ if (testidx == 0) {
+ translist = &batch->bat_trans_list;
+ ndlist = &batch->bat_cli_list;
+ hdr = &batch->bat_hdr;
+
+ } else {
+ /* query specified test only */
+ rc = lstcon_test_find(batch, testidx, &test);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't find test: %d\n", testidx);
+ return rc;
+ }
+
+ translist = &test->tes_trans_list;
+ ndlist = &test->tes_src_grp->grp_ndl_list;
+ hdr = &test->tes_hdr;
+ }
+
+ transop = client ? LST_TRANS_TSBCLIQRY : LST_TRANS_TSBSRVQRY;
+
+ rc = lstcon_rpc_trans_ndlist(ndlist, translist, transop, hdr,
+ lstcon_batrpc_condition, &trans);
+ if (rc != 0) {
+ CERROR("Can't create transaction: %d\n", rc);
+ return rc;
+ }
+
+ lstcon_rpc_trans_postwait(trans, timeout);
+
+ if (testidx == 0 && /* query a batch, not a test */
+ lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) == 0 &&
+ lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0) == 0) {
+ /* all RPCs finished, and no active test */
+ batch->bat_state = LST_BATCH_IDLE;
+ }
+
+ rc = lstcon_rpc_trans_interpreter(trans, result_up,
+ lstcon_tsbrpc_readent);
+ lstcon_rpc_trans_destroy(trans);
+
+ return rc;
+}
+
+int
+lstcon_statrpc_readent(int transop, srpc_msg_t *msg,
+ lstcon_rpc_ent_t *ent_up)
+{
+ srpc_stat_reply_t *rep = &msg->msg_body.stat_reply;
+ sfw_counters_t *sfwk_stat;
+ srpc_counters_t *srpc_stat;
+ lnet_counters_t *lnet_stat;
+
+ if (rep->str_status != 0)
+ return 0;
+
+ sfwk_stat = (sfw_counters_t *)&ent_up->rpe_payload[0];
+ srpc_stat = (srpc_counters_t *)((char *)sfwk_stat + sizeof(*sfwk_stat));
+ lnet_stat = (lnet_counters_t *)((char *)srpc_stat + sizeof(*srpc_stat));
+
+ if (copy_to_user(sfwk_stat, &rep->str_fw, sizeof(*sfwk_stat)) ||
+ copy_to_user(srpc_stat, &rep->str_rpc, sizeof(*srpc_stat)) ||
+ copy_to_user(lnet_stat, &rep->str_lnet, sizeof(*lnet_stat)))
+ return -EFAULT;
+
+ return 0;
+}
+
+int
+lstcon_ndlist_stat(struct list_head *ndlist,
+ int timeout, struct list_head *result_up)
+{
+ struct list_head head;
+ lstcon_rpc_trans_t *trans;
+ int rc;
+
+ INIT_LIST_HEAD(&head);
+
+ rc = lstcon_rpc_trans_ndlist(ndlist, &head,
+ LST_TRANS_STATQRY, NULL, NULL, &trans);
+ if (rc != 0) {
+ CERROR("Can't create transaction: %d\n", rc);
+ return rc;
+ }
+
+ lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout));
+
+ rc = lstcon_rpc_trans_interpreter(trans, result_up,
+ lstcon_statrpc_readent);
+ lstcon_rpc_trans_destroy(trans);
+
+ return rc;
+}
+
+int
+lstcon_group_stat(char *grp_name, int timeout, struct list_head *result_up)
+{
+ lstcon_group_t *grp;
+ int rc;
+
+ rc = lstcon_group_find(grp_name, &grp);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't find group %s\n", grp_name);
+ return rc;
+ }
+
+ rc = lstcon_ndlist_stat(&grp->grp_ndl_list, timeout, result_up);
+
+ lstcon_group_put(grp);
+
+ return rc;
+}
+
+int
+lstcon_nodes_stat(int count, lnet_process_id_t *ids_up,
+ int timeout, struct list_head *result_up)
+{
+ lstcon_ndlink_t *ndl;
+ lstcon_group_t *tmp;
+ lnet_process_id_t id;
+ int i;
+ int rc;
+
+ rc = lstcon_group_alloc(NULL, &tmp);
+ if (rc != 0) {
+ CERROR("Out of memory\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0 ; i < count; i++) {
+ if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
+ rc = -EFAULT;
+ break;
+ }
+
+ /* add to tmp group */
+ rc = lstcon_group_ndlink_find(tmp, id, &ndl, 2);
+ if (rc != 0) {
+ CDEBUG((rc == -ENOMEM) ? D_ERROR : D_NET,
+ "Failed to find or create %s: %d\n",
+ libcfs_id2str(id), rc);
+ break;
+ }
+ }
+
+ if (rc != 0) {
+ lstcon_group_put(tmp);
+ return rc;
+ }
+
+ rc = lstcon_ndlist_stat(&tmp->grp_ndl_list, timeout, result_up);
+
+ lstcon_group_put(tmp);
+
+ return rc;
+}
+
+int
+lstcon_debug_ndlist(struct list_head *ndlist,
+ struct list_head *translist,
+ int timeout, struct list_head *result_up)
+{
+ lstcon_rpc_trans_t *trans;
+ int rc;
+
+ rc = lstcon_rpc_trans_ndlist(ndlist, translist, LST_TRANS_SESQRY,
+ NULL, lstcon_sesrpc_condition, &trans);
+ if (rc != 0) {
+ CERROR("Can't create transaction: %d\n", rc);
+ return rc;
+ }
+
+ lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout));
+
+ rc = lstcon_rpc_trans_interpreter(trans, result_up,
+ lstcon_sesrpc_readent);
+ lstcon_rpc_trans_destroy(trans);
+
+ return rc;
+}
+
+int
+lstcon_session_debug(int timeout, struct list_head *result_up)
+{
+ return lstcon_debug_ndlist(&console_session.ses_ndl_list,
+ NULL, timeout, result_up);
+}
+
+int
+lstcon_batch_debug(int timeout, char *name,
+ int client, struct list_head *result_up)
+{
+ lstcon_batch_t *bat;
+ int rc;
+
+ rc = lstcon_batch_find(name, &bat);
+ if (rc != 0)
+ return -ENOENT;
+
+ rc = lstcon_debug_ndlist(client ? &bat->bat_cli_list :
+ &bat->bat_srv_list,
+ NULL, timeout, result_up);
+
+ return rc;
+}
+
+int
+lstcon_group_debug(int timeout, char *name,
+ struct list_head *result_up)
+{
+ lstcon_group_t *grp;
+ int rc;
+
+ rc = lstcon_group_find(name, &grp);
+ if (rc != 0)
+ return -ENOENT;
+
+ rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
+ timeout, result_up);
+ lstcon_group_put(grp);
+
+ return rc;
+}
+
+int
+lstcon_nodes_debug(int timeout,
+ int count, lnet_process_id_t *ids_up,
+ struct list_head *result_up)
+{
+ lnet_process_id_t id;
+ lstcon_ndlink_t *ndl;
+ lstcon_group_t *grp;
+ int i;
+ int rc;
+
+ rc = lstcon_group_alloc(NULL, &grp);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Out of memory\n");
+ return rc;
+ }
+
+ for (i = 0; i < count; i++) {
+ if (copy_from_user(&id, &ids_up[i], sizeof(id))) {
+ rc = -EFAULT;
+ break;
+ }
+
+ /* node is added to tmp group */
+ rc = lstcon_group_ndlink_find(grp, id, &ndl, 1);
+ if (rc != 0) {
+ CERROR("Can't create node link\n");
+ break;
+ }
+ }
+
+ if (rc != 0) {
+ lstcon_group_put(grp);
+ return rc;
+ }
+
+ rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL,
+ timeout, result_up);
+
+ lstcon_group_put(grp);
+
+ return rc;
+}
+
+int
+lstcon_session_match(lst_sid_t sid)
+{
+ return (console_session.ses_id.ses_nid == sid.ses_nid &&
+ console_session.ses_id.ses_stamp == sid.ses_stamp) ? 1: 0;
+}
+
+static void
+lstcon_new_session_id(lst_sid_t *sid)
+{
+ lnet_process_id_t id;
+
+ LASSERT (console_session.ses_state == LST_SESSION_NONE);
+
+ LNetGetId(1, &id);
+ sid->ses_nid = id.nid;
+ sid->ses_stamp = cfs_time_current();
+}
+
+extern srpc_service_t lstcon_acceptor_service;
+
+int
+lstcon_session_new(char *name, int key, unsigned feats,
+ int timeout, int force, lst_sid_t *sid_up)
+{
+ int rc = 0;
+ int i;
+
+ if (console_session.ses_state != LST_SESSION_NONE) {
+ /* session exists */
+ if (!force) {
+ CNETERR("Session %s already exists\n",
+ console_session.ses_name);
+ return -EEXIST;
+ }
+
+ rc = lstcon_session_end();
+
+ /* lstcon_session_end() only return local error */
+ if (rc != 0)
+ return rc;
+ }
+
+ if ((feats & ~LST_FEATS_MASK) != 0) {
+ CNETERR("Unknown session features %x\n",
+ (feats & ~LST_FEATS_MASK));
+ return -EINVAL;
+ }
+
+ for (i = 0; i < LST_GLOBAL_HASHSIZE; i++)
+ LASSERT(list_empty(&console_session.ses_ndl_hash[i]));
+
+ lstcon_new_session_id(&console_session.ses_id);
+
+ console_session.ses_key = key;
+ console_session.ses_state = LST_SESSION_ACTIVE;
+ console_session.ses_force = !!force;
+ console_session.ses_features = feats;
+ console_session.ses_feats_updated = 0;
+ console_session.ses_timeout = (timeout <= 0) ?
+ LST_CONSOLE_TIMEOUT : timeout;
+ strcpy(console_session.ses_name, name);
+
+ rc = lstcon_batch_add(LST_DEFAULT_BATCH);
+ if (rc != 0)
+ return rc;
+
+ rc = lstcon_rpc_pinger_start();
+ if (rc != 0) {
+ lstcon_batch_t *bat = NULL;
+
+ lstcon_batch_find(LST_DEFAULT_BATCH, &bat);
+ lstcon_batch_destroy(bat);
+
+ return rc;
+ }
+
+ if (copy_to_user(sid_up, &console_session.ses_id,
+ sizeof(lst_sid_t)) == 0)
+ return rc;
+
+ lstcon_session_end();
+
+ return -EFAULT;
+}
+
+int
+lstcon_session_info(lst_sid_t *sid_up, int *key_up, unsigned *featp,
+ lstcon_ndlist_ent_t *ndinfo_up, char *name_up, int len)
+{
+ lstcon_ndlist_ent_t *entp;
+ lstcon_ndlink_t *ndl;
+ int rc = 0;
+
+ if (console_session.ses_state != LST_SESSION_ACTIVE)
+ return -ESRCH;
+
+ LIBCFS_ALLOC(entp, sizeof(*entp));
+ if (entp == NULL)
+ return -ENOMEM;
+
+ memset(entp, 0, sizeof(*entp));
+
+ list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link)
+ LST_NODE_STATE_COUNTER(ndl->ndl_node, entp);
+
+ if (copy_to_user(sid_up, &console_session.ses_id,
+ sizeof(lst_sid_t)) ||
+ copy_to_user(key_up, &console_session.ses_key,
+ sizeof(*key_up)) ||
+ copy_to_user(featp, &console_session.ses_features,
+ sizeof(*featp)) ||
+ copy_to_user(ndinfo_up, entp, sizeof(*entp)) ||
+ copy_to_user(name_up, console_session.ses_name, len))
+ rc = -EFAULT;
+
+ LIBCFS_FREE(entp, sizeof(*entp));
+
+ return rc;
+}
+
+int
+lstcon_session_end()
+{
+ lstcon_rpc_trans_t *trans;
+ lstcon_group_t *grp;
+ lstcon_batch_t *bat;
+ int rc = 0;
+
+ LASSERT (console_session.ses_state == LST_SESSION_ACTIVE);
+
+ rc = lstcon_rpc_trans_ndlist(&console_session.ses_ndl_list,
+ NULL, LST_TRANS_SESEND, NULL,
+ lstcon_sesrpc_condition, &trans);
+ if (rc != 0) {
+ CERROR("Can't create transaction: %d\n", rc);
+ return rc;
+ }
+
+ console_session.ses_shutdown = 1;
+
+ lstcon_rpc_pinger_stop();
+
+ lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT);
+
+ lstcon_rpc_trans_destroy(trans);
+ /* User can do nothing even rpc failed, so go on */
+
+ /* waiting for orphan rpcs to die */
+ lstcon_rpc_cleanup_wait();
+
+ console_session.ses_id = LST_INVALID_SID;
+ console_session.ses_state = LST_SESSION_NONE;
+ console_session.ses_key = 0;
+ console_session.ses_force = 0;
+ console_session.ses_feats_updated = 0;
+
+ /* destroy all batches */
+ while (!list_empty(&console_session.ses_bat_list)) {
+ bat = list_entry(console_session.ses_bat_list.next,
+ lstcon_batch_t, bat_link);
+
+ lstcon_batch_destroy(bat);
+ }
+
+ /* destroy all groups */
+ while (!list_empty(&console_session.ses_grp_list)) {
+ grp = list_entry(console_session.ses_grp_list.next,
+ lstcon_group_t, grp_link);
+ LASSERT (grp->grp_ref == 1);
+
+ lstcon_group_put(grp);
+ }
+
+ /* all nodes should be released */
+ LASSERT (list_empty(&console_session.ses_ndl_list));
+
+ console_session.ses_shutdown = 0;
+ console_session.ses_expired = 0;
+
+ return rc;
+}
+
+int
+lstcon_session_feats_check(unsigned feats)
+{
+ int rc = 0;
+
+ if ((feats & ~LST_FEATS_MASK) != 0) {
+ CERROR("Can't support these features: %x\n",
+ (feats & ~LST_FEATS_MASK));
+ return -EPROTO;
+ }
+
+ spin_lock(&console_session.ses_rpc_lock);
+
+ if (!console_session.ses_feats_updated) {
+ console_session.ses_feats_updated = 1;
+ console_session.ses_features = feats;
+ }
+
+ if (console_session.ses_features != feats)
+ rc = -EPROTO;
+
+ spin_unlock(&console_session.ses_rpc_lock);
+
+ if (rc != 0) {
+ CERROR("remote features %x do not match with "
+ "session features %x of console\n",
+ feats, console_session.ses_features);
+ }
+
+ return rc;
+}
+
+static int
+lstcon_acceptor_handle (srpc_server_rpc_t *rpc)
+{
+ srpc_msg_t *rep = &rpc->srpc_replymsg;
+ srpc_msg_t *req = &rpc->srpc_reqstbuf->buf_msg;
+ srpc_join_reqst_t *jreq = &req->msg_body.join_reqst;
+ srpc_join_reply_t *jrep = &rep->msg_body.join_reply;
+ lstcon_group_t *grp = NULL;
+ lstcon_ndlink_t *ndl;
+ int rc = 0;
+
+ sfw_unpack_message(req);
+
+ mutex_lock(&console_session.ses_mutex);
+
+ jrep->join_sid = console_session.ses_id;
+
+ if (console_session.ses_id.ses_nid == LNET_NID_ANY) {
+ jrep->join_status = ESRCH;
+ goto out;
+ }
+
+ if (lstcon_session_feats_check(req->msg_ses_feats) != 0) {
+ jrep->join_status = EPROTO;
+ goto out;
+ }
+
+ if (jreq->join_sid.ses_nid != LNET_NID_ANY &&
+ !lstcon_session_match(jreq->join_sid)) {
+ jrep->join_status = EBUSY;
+ goto out;
+ }
+
+ if (lstcon_group_find(jreq->join_group, &grp) != 0) {
+ rc = lstcon_group_alloc(jreq->join_group, &grp);
+ if (rc != 0) {
+ CERROR("Out of memory\n");
+ goto out;
+ }
+
+ list_add_tail(&grp->grp_link,
+ &console_session.ses_grp_list);
+ lstcon_group_addref(grp);
+ }
+
+ if (grp->grp_ref > 2) {
+ /* Group in using */
+ jrep->join_status = EBUSY;
+ goto out;
+ }
+
+ rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 0);
+ if (rc == 0) {
+ jrep->join_status = EEXIST;
+ goto out;
+ }
+
+ rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 1);
+ if (rc != 0) {
+ CERROR("Out of memory\n");
+ goto out;
+ }
+
+ ndl->ndl_node->nd_state = LST_NODE_ACTIVE;
+ ndl->ndl_node->nd_timeout = console_session.ses_timeout;
+
+ if (grp->grp_userland == 0)
+ grp->grp_userland = 1;
+
+ strcpy(jrep->join_session, console_session.ses_name);
+ jrep->join_timeout = console_session.ses_timeout;
+ jrep->join_status = 0;
+
+out:
+ rep->msg_ses_feats = console_session.ses_features;
+ if (grp != NULL)
+ lstcon_group_put(grp);
+
+ mutex_unlock(&console_session.ses_mutex);
+
+ return rc;
+}
+
+srpc_service_t lstcon_acceptor_service;
+void lstcon_init_acceptor_service(void)
+{
+ /* initialize selftest console acceptor service table */
+ lstcon_acceptor_service.sv_name = "join session";
+ lstcon_acceptor_service.sv_handler = lstcon_acceptor_handle;
+ lstcon_acceptor_service.sv_id = SRPC_SERVICE_JOIN;
+ lstcon_acceptor_service.sv_wi_total = SFW_FRWK_WI_MAX;
+}
+
+extern int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_data *data);
+
+DECLARE_IOCTL_HANDLER(lstcon_ioctl_handler, lstcon_ioctl_entry);
+
+/* initialize console */
+int
+lstcon_console_init(void)
+{
+ int i;
+ int rc;
+
+ memset(&console_session, 0, sizeof(lstcon_session_t));
+
+ console_session.ses_id = LST_INVALID_SID;
+ console_session.ses_state = LST_SESSION_NONE;
+ console_session.ses_timeout = 0;
+ console_session.ses_force = 0;
+ console_session.ses_expired = 0;
+ console_session.ses_feats_updated = 0;
+ console_session.ses_features = LST_FEATS_MASK;
+ console_session.ses_laststamp = cfs_time_current_sec();
+
+ mutex_init(&console_session.ses_mutex);
+
+ INIT_LIST_HEAD(&console_session.ses_ndl_list);
+ INIT_LIST_HEAD(&console_session.ses_grp_list);
+ INIT_LIST_HEAD(&console_session.ses_bat_list);
+ INIT_LIST_HEAD(&console_session.ses_trans_list);
+
+ LIBCFS_ALLOC(console_session.ses_ndl_hash,
+ sizeof(struct list_head) * LST_GLOBAL_HASHSIZE);
+ if (console_session.ses_ndl_hash == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < LST_GLOBAL_HASHSIZE; i++)
+ INIT_LIST_HEAD(&console_session.ses_ndl_hash[i]);
+
+
+ /* initialize acceptor service table */
+ lstcon_init_acceptor_service();
+
+ rc = srpc_add_service(&lstcon_acceptor_service);
+ LASSERT (rc != -EBUSY);
+ if (rc != 0) {
+ LIBCFS_FREE(console_session.ses_ndl_hash,
+ sizeof(struct list_head) * LST_GLOBAL_HASHSIZE);
+ return rc;
+ }
+
+ rc = srpc_service_add_buffers(&lstcon_acceptor_service,
+ lstcon_acceptor_service.sv_wi_total);
+ if (rc != 0) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = libcfs_register_ioctl(&lstcon_ioctl_handler);
+
+ if (rc == 0) {
+ lstcon_rpc_module_init();
+ return 0;
+ }
+
+out:
+ srpc_shutdown_service(&lstcon_acceptor_service);
+ srpc_remove_service(&lstcon_acceptor_service);
+
+ LIBCFS_FREE(console_session.ses_ndl_hash,
+ sizeof(struct list_head) * LST_GLOBAL_HASHSIZE);
+
+ srpc_wait_service_shutdown(&lstcon_acceptor_service);
+
+ return rc;
+}
+
+int
+lstcon_console_fini(void)
+{
+ int i;
+
+ libcfs_deregister_ioctl(&lstcon_ioctl_handler);
+
+ mutex_lock(&console_session.ses_mutex);
+
+ srpc_shutdown_service(&lstcon_acceptor_service);
+ srpc_remove_service(&lstcon_acceptor_service);
+
+ if (console_session.ses_state != LST_SESSION_NONE)
+ lstcon_session_end();
+
+ lstcon_rpc_module_fini();
+
+ mutex_unlock(&console_session.ses_mutex);
+
+ LASSERT (list_empty(&console_session.ses_ndl_list));
+ LASSERT (list_empty(&console_session.ses_grp_list));
+ LASSERT (list_empty(&console_session.ses_bat_list));
+ LASSERT (list_empty(&console_session.ses_trans_list));
+
+ for (i = 0; i < LST_NODE_HASHSIZE; i++) {
+ LASSERT (list_empty(&console_session.ses_ndl_hash[i]));
+ }
+
+ LIBCFS_FREE(console_session.ses_ndl_hash,
+ sizeof(struct list_head) * LST_GLOBAL_HASHSIZE);
+
+ srpc_wait_service_shutdown(&lstcon_acceptor_service);
+
+ return 0;
+}
diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h
new file mode 100644
index 000000000000..e61b26687dbb
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/console.h
@@ -0,0 +1,232 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/selftest/console.h
+ *
+ * kernel structure for LST console
+ *
+ * Author: Liang Zhen <liangzhen@clusterfs.com>
+ */
+
+#ifndef __LST_CONSOLE_H__
+#define __LST_CONSOLE_H__
+
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/lnet.h>
+#include <linux/lnet/lib-types.h>
+#include <linux/lnet/lnetst.h>
+#include "selftest.h"
+#include "conrpc.h"
+
+typedef struct lstcon_node {
+ lnet_process_id_t nd_id; /* id of the node */
+ int nd_ref; /* reference count */
+ int nd_state; /* state of the node */
+ int nd_timeout; /* session timeout */
+ cfs_time_t nd_stamp; /* timestamp of last replied RPC */
+ struct lstcon_rpc nd_ping; /* ping rpc */
+} lstcon_node_t; /*** node descriptor */
+
+typedef struct {
+ struct list_head ndl_link; /* chain on list */
+ struct list_head ndl_hlink; /* chain on hash */
+ lstcon_node_t *ndl_node; /* pointer to node */
+} lstcon_ndlink_t; /*** node link descriptor */
+
+typedef struct {
+ struct list_head grp_link; /* chain on global group list */
+ int grp_ref; /* reference count */
+ int grp_userland; /* has userland nodes */
+ int grp_nnode; /* # of nodes */
+ char grp_name[LST_NAME_SIZE]; /* group name */
+
+ struct list_head grp_trans_list; /* transaction list */
+ struct list_head grp_ndl_list; /* nodes list */
+ struct list_head grp_ndl_hash[0];/* hash table for nodes */
+} lstcon_group_t; /*** (alias of nodes) group descriptor */
+
+#define LST_BATCH_IDLE 0xB0 /* idle batch */
+#define LST_BATCH_RUNNING 0xB1 /* running batch */
+
+typedef struct lstcon_tsb_hdr {
+ lst_bid_t tsb_id; /* batch ID */
+ int tsb_index; /* test index */
+} lstcon_tsb_hdr_t;
+
+typedef struct {
+ lstcon_tsb_hdr_t bat_hdr; /* test_batch header */
+ struct list_head bat_link; /* chain on session's batches list */
+ int bat_ntest; /* # of test */
+ int bat_state; /* state of the batch */
+ int bat_arg; /* parameter for run|stop, timeout for run, force for stop */
+ char bat_name[LST_NAME_SIZE]; /* name of batch */
+
+ struct list_head bat_test_list; /* list head of tests (lstcon_test_t) */
+ struct list_head bat_trans_list; /* list head of transaction */
+ struct list_head bat_cli_list; /* list head of client nodes (lstcon_node_t) */
+ struct list_head *bat_cli_hash; /* hash table of client nodes */
+ struct list_head bat_srv_list; /* list head of server nodes */
+ struct list_head *bat_srv_hash; /* hash table of server nodes */
+} lstcon_batch_t; /*** (tests ) batch descritptor */
+
+typedef struct lstcon_test {
+ lstcon_tsb_hdr_t tes_hdr; /* test batch header */
+ struct list_head tes_link; /* chain on batch's tests list */
+ lstcon_batch_t *tes_batch; /* pointer to batch */
+
+ int tes_type; /* type of the test, i.e: bulk, ping */
+ int tes_stop_onerr; /* stop on error */
+ int tes_oneside; /* one-sided test */
+ int tes_concur; /* concurrency */
+ int tes_loop; /* loop count */
+ int tes_dist; /* nodes distribution of target group */
+ int tes_span; /* nodes span of target group */
+ int tes_cliidx; /* client index, used for RPC creating */
+
+ struct list_head tes_trans_list; /* transaction list */
+ lstcon_group_t *tes_src_grp; /* group run the test */
+ lstcon_group_t *tes_dst_grp; /* target group */
+
+ int tes_paramlen; /* test parameter length */
+ char tes_param[0]; /* test parameter */
+} lstcon_test_t; /*** a single test descriptor */
+
+#define LST_GLOBAL_HASHSIZE 503 /* global nodes hash table size */
+#define LST_NODE_HASHSIZE 239 /* node hash table (for batch or group) */
+
+#define LST_SESSION_NONE 0x0 /* no session */
+#define LST_SESSION_ACTIVE 0x1 /* working session */
+
+#define LST_CONSOLE_TIMEOUT 300 /* default console timeout */
+
+typedef struct {
+ struct mutex ses_mutex; /* only 1 thread in session */
+ lst_sid_t ses_id; /* global session id */
+ int ses_key; /* local session key */
+ int ses_state; /* state of session */
+ int ses_timeout; /* timeout in seconds */
+ time_t ses_laststamp; /* last operation stamp (seconds) */
+ /** tests features of the session */
+ unsigned ses_features;
+ /** features are synced with remote test nodes */
+ unsigned ses_feats_updated:1;
+ /** force creating */
+ unsigned ses_force:1;
+ /** session is shutting down */
+ unsigned ses_shutdown:1;
+ /** console is timedout */
+ unsigned ses_expired:1;
+ __u64 ses_id_cookie; /* batch id cookie */
+ char ses_name[LST_NAME_SIZE]; /* session name */
+ lstcon_rpc_trans_t *ses_ping; /* session pinger */
+ stt_timer_t ses_ping_timer; /* timer for pinger */
+ lstcon_trans_stat_t ses_trans_stat; /* transaction stats */
+
+ struct list_head ses_trans_list; /* global list of transaction */
+ struct list_head ses_grp_list; /* global list of groups */
+ struct list_head ses_bat_list; /* global list of batches */
+ struct list_head ses_ndl_list; /* global list of nodes */
+ struct list_head *ses_ndl_hash; /* hash table of nodes */
+
+ spinlock_t ses_rpc_lock; /* serialize */
+ atomic_t ses_rpc_counter;/* # of initialized RPCs */
+ struct list_head ses_rpc_freelist; /* idle console rpc */
+} lstcon_session_t; /*** session descriptor */
+
+extern lstcon_session_t console_session;
+
+static inline lstcon_trans_stat_t *
+lstcon_trans_stat(void)
+{
+ return &console_session.ses_trans_stat;
+}
+
+static inline struct list_head *
+lstcon_id2hash (lnet_process_id_t id, struct list_head *hash)
+{
+ unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
+
+ return &hash[idx];
+}
+
+extern int lstcon_session_match(lst_sid_t sid);
+extern int lstcon_session_new(char *name, int key, unsigned version,
+ int timeout, int flags, lst_sid_t *sid_up);
+extern int lstcon_session_info(lst_sid_t *sid_up, int *key, unsigned *verp,
+ lstcon_ndlist_ent_t *entp, char *name_up, int len);
+extern int lstcon_session_end(void);
+extern int lstcon_session_debug(int timeout, struct list_head *result_up);
+extern int lstcon_session_feats_check(unsigned feats);
+extern int lstcon_batch_debug(int timeout, char *name,
+ int client, struct list_head *result_up);
+extern int lstcon_group_debug(int timeout, char *name,
+ struct list_head *result_up);
+extern int lstcon_nodes_debug(int timeout, int nnd, lnet_process_id_t *nds_up,
+ struct list_head *result_up);
+extern int lstcon_group_add(char *name);
+extern int lstcon_group_del(char *name);
+extern int lstcon_group_clean(char *name, int args);
+extern int lstcon_group_refresh(char *name, struct list_head *result_up);
+extern int lstcon_nodes_add(char *name, int nnd, lnet_process_id_t *nds_up,
+ unsigned *featp, struct list_head *result_up);
+extern int lstcon_nodes_remove(char *name, int nnd, lnet_process_id_t *nds_up,
+ struct list_head *result_up);
+extern int lstcon_group_info(char *name, lstcon_ndlist_ent_t *gent_up,
+ int *index_p, int *ndent_p, lstcon_node_ent_t *ndents_up);
+extern int lstcon_group_list(int idx, int len, char *name_up);
+extern int lstcon_batch_add(char *name);
+extern int lstcon_batch_run(char *name, int timeout,
+ struct list_head *result_up);
+extern int lstcon_batch_stop(char *name, int force,
+ struct list_head *result_up);
+extern int lstcon_test_batch_query(char *name, int testidx,
+ int client, int timeout,
+ struct list_head *result_up);
+extern int lstcon_batch_del(char *name);
+extern int lstcon_batch_list(int idx, int namelen, char *name_up);
+extern int lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up,
+ int server, int testidx, int *index_p,
+ int *ndent_p, lstcon_node_ent_t *dents_up);
+extern int lstcon_group_stat(char *grp_name, int timeout,
+ struct list_head *result_up);
+extern int lstcon_nodes_stat(int count, lnet_process_id_t *ids_up,
+ int timeout, struct list_head *result_up);
+extern int lstcon_test_add(char *name, int type, int loop, int concur,
+ int dist, int span, char *src_name, char * dst_name,
+ void *param, int paramlen, int *retp,
+ struct list_head *result_up);
+
+#endif
diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c
new file mode 100644
index 000000000000..483c78564dae
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/framework.c
@@ -0,0 +1,1814 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/selftest/framework.c
+ *
+ * Author: Isaac Huang <isaac@clusterfs.com>
+ * Author: Liang Zhen <liangzhen@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include "selftest.h"
+
+lst_sid_t LST_INVALID_SID = {LNET_NID_ANY, -1};
+
+static int session_timeout = 100;
+CFS_MODULE_PARM(session_timeout, "i", int, 0444,
+ "test session timeout in seconds (100 by default, 0 == never)");
+
+static int rpc_timeout = 64;
+CFS_MODULE_PARM(rpc_timeout, "i", int, 0644,
+ "rpc timeout in seconds (64 by default, 0 == never)");
+
+#define sfw_unpack_id(id) \
+do { \
+ __swab64s(&(id).nid); \
+ __swab32s(&(id).pid); \
+} while (0)
+
+#define sfw_unpack_sid(sid) \
+do { \
+ __swab64s(&(sid).ses_nid); \
+ __swab64s(&(sid).ses_stamp); \
+} while (0)
+
+#define sfw_unpack_fw_counters(fc) \
+do { \
+ __swab32s(&(fc).running_ms); \
+ __swab32s(&(fc).active_batches); \
+ __swab32s(&(fc).zombie_sessions); \
+ __swab32s(&(fc).brw_errors); \
+ __swab32s(&(fc).ping_errors); \
+} while (0)
+
+#define sfw_unpack_rpc_counters(rc) \
+do { \
+ __swab32s(&(rc).errors); \
+ __swab32s(&(rc).rpcs_sent); \
+ __swab32s(&(rc).rpcs_rcvd); \
+ __swab32s(&(rc).rpcs_dropped); \
+ __swab32s(&(rc).rpcs_expired); \
+ __swab64s(&(rc).bulk_get); \
+ __swab64s(&(rc).bulk_put); \
+} while (0)
+
+#define sfw_unpack_lnet_counters(lc) \
+do { \
+ __swab32s(&(lc).errors); \
+ __swab32s(&(lc).msgs_max); \
+ __swab32s(&(lc).msgs_alloc); \
+ __swab32s(&(lc).send_count); \
+ __swab32s(&(lc).recv_count); \
+ __swab32s(&(lc).drop_count); \
+ __swab32s(&(lc).route_count); \
+ __swab64s(&(lc).send_length); \
+ __swab64s(&(lc).recv_length); \
+ __swab64s(&(lc).drop_length); \
+ __swab64s(&(lc).route_length); \
+} while (0)
+
+#define sfw_test_active(t) (atomic_read(&(t)->tsi_nactive) != 0)
+#define sfw_batch_active(b) (atomic_read(&(b)->bat_nactive) != 0)
+
+struct smoketest_framework {
+ struct list_head fw_zombie_rpcs; /* RPCs to be recycled */
+ struct list_head fw_zombie_sessions; /* stopping sessions */
+ struct list_head fw_tests; /* registered test cases */
+ atomic_t fw_nzombies; /* # zombie sessions */
+ spinlock_t fw_lock; /* serialise */
+ sfw_session_t *fw_session; /* _the_ session */
+ int fw_shuttingdown; /* shutdown in progress */
+ srpc_server_rpc_t *fw_active_srpc; /* running RPC */
+} sfw_data;
+
+/* forward ref's */
+int sfw_stop_batch (sfw_batch_t *tsb, int force);
+void sfw_destroy_session (sfw_session_t *sn);
+
+static inline sfw_test_case_t *
+sfw_find_test_case(int id)
+{
+ sfw_test_case_t *tsc;
+
+ LASSERT (id <= SRPC_SERVICE_MAX_ID);
+ LASSERT (id > SRPC_FRAMEWORK_SERVICE_MAX_ID);
+
+ list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
+ if (tsc->tsc_srv_service->sv_id == id)
+ return tsc;
+ }
+
+ return NULL;
+}
+
+static int
+sfw_register_test (srpc_service_t *service, sfw_test_client_ops_t *cliops)
+{
+ sfw_test_case_t *tsc;
+
+ if (sfw_find_test_case(service->sv_id) != NULL) {
+ CERROR ("Failed to register test %s (%d)\n",
+ service->sv_name, service->sv_id);
+ return -EEXIST;
+ }
+
+ LIBCFS_ALLOC(tsc, sizeof(sfw_test_case_t));
+ if (tsc == NULL)
+ return -ENOMEM;
+
+ memset(tsc, 0, sizeof(sfw_test_case_t));
+ tsc->tsc_cli_ops = cliops;
+ tsc->tsc_srv_service = service;
+
+ list_add_tail(&tsc->tsc_list, &sfw_data.fw_tests);
+ return 0;
+}
+
+void
+sfw_add_session_timer (void)
+{
+ sfw_session_t *sn = sfw_data.fw_session;
+ stt_timer_t *timer = &sn->sn_timer;
+
+ LASSERT (!sfw_data.fw_shuttingdown);
+
+ if (sn == NULL || sn->sn_timeout == 0)
+ return;
+
+ LASSERT (!sn->sn_timer_active);
+
+ sn->sn_timer_active = 1;
+ timer->stt_expires = cfs_time_add(sn->sn_timeout,
+ cfs_time_current_sec());
+ stt_add_timer(timer);
+ return;
+}
+
+int
+sfw_del_session_timer (void)
+{
+ sfw_session_t *sn = sfw_data.fw_session;
+
+ if (sn == NULL || !sn->sn_timer_active)
+ return 0;
+
+ LASSERT (sn->sn_timeout != 0);
+
+ if (stt_del_timer(&sn->sn_timer)) { /* timer defused */
+ sn->sn_timer_active = 0;
+ return 0;
+ }
+
+ return EBUSY; /* racing with sfw_session_expired() */
+}
+
+/* called with sfw_data.fw_lock held */
+static void
+sfw_deactivate_session (void)
+{
+ sfw_session_t *sn = sfw_data.fw_session;
+ int nactive = 0;
+ sfw_batch_t *tsb;
+ sfw_test_case_t *tsc;
+
+ if (sn == NULL) return;
+
+ LASSERT (!sn->sn_timer_active);
+
+ sfw_data.fw_session = NULL;
+ atomic_inc(&sfw_data.fw_nzombies);
+ list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions);
+
+ spin_unlock(&sfw_data.fw_lock);
+
+ list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) {
+ srpc_abort_service(tsc->tsc_srv_service);
+ }
+
+ spin_lock(&sfw_data.fw_lock);
+
+ list_for_each_entry (tsb, &sn->sn_batches, bat_list) {
+ if (sfw_batch_active(tsb)) {
+ nactive++;
+ sfw_stop_batch(tsb, 1);
+ }
+ }
+
+ if (nactive != 0)
+ return; /* wait for active batches to stop */
+
+ list_del_init(&sn->sn_list);
+ spin_unlock(&sfw_data.fw_lock);
+
+ sfw_destroy_session(sn);
+
+ spin_lock(&sfw_data.fw_lock);
+}
+
+
+void
+sfw_session_expired (void *data)
+{
+ sfw_session_t *sn = data;
+
+ spin_lock(&sfw_data.fw_lock);
+
+ LASSERT (sn->sn_timer_active);
+ LASSERT (sn == sfw_data.fw_session);
+
+ CWARN ("Session expired! sid: %s-"LPU64", name: %s\n",
+ libcfs_nid2str(sn->sn_id.ses_nid),
+ sn->sn_id.ses_stamp, &sn->sn_name[0]);
+
+ sn->sn_timer_active = 0;
+ sfw_deactivate_session();
+
+ spin_unlock(&sfw_data.fw_lock);
+}
+
+static inline void
+sfw_init_session(sfw_session_t *sn, lst_sid_t sid,
+ unsigned features, const char *name)
+{
+ stt_timer_t *timer = &sn->sn_timer;
+
+ memset(sn, 0, sizeof(sfw_session_t));
+ INIT_LIST_HEAD(&sn->sn_list);
+ INIT_LIST_HEAD(&sn->sn_batches);
+ atomic_set(&sn->sn_refcount, 1); /* +1 for caller */
+ atomic_set(&sn->sn_brw_errors, 0);
+ atomic_set(&sn->sn_ping_errors, 0);
+ strlcpy(&sn->sn_name[0], name, sizeof(sn->sn_name));
+
+ sn->sn_timer_active = 0;
+ sn->sn_id = sid;
+ sn->sn_features = features;
+ sn->sn_timeout = session_timeout;
+ sn->sn_started = cfs_time_current();
+
+ timer->stt_data = sn;
+ timer->stt_func = sfw_session_expired;
+ INIT_LIST_HEAD(&timer->stt_list);
+}
+
+/* completion handler for incoming framework RPCs */
+void
+sfw_server_rpc_done(struct srpc_server_rpc *rpc)
+{
+ struct srpc_service *sv = rpc->srpc_scd->scd_svc;
+ int status = rpc->srpc_status;
+
+ CDEBUG (D_NET,
+ "Incoming framework RPC done: "
+ "service %s, peer %s, status %s:%d\n",
+ sv->sv_name, libcfs_id2str(rpc->srpc_peer),
+ swi_state2str(rpc->srpc_wi.swi_state),
+ status);
+
+ if (rpc->srpc_bulk != NULL)
+ sfw_free_pages(rpc);
+ return;
+}
+
+void
+sfw_client_rpc_fini (srpc_client_rpc_t *rpc)
+{
+ LASSERT (rpc->crpc_bulk.bk_niov == 0);
+ LASSERT (list_empty(&rpc->crpc_list));
+ LASSERT (atomic_read(&rpc->crpc_refcount) == 0);
+
+ CDEBUG (D_NET,
+ "Outgoing framework RPC done: "
+ "service %d, peer %s, status %s:%d:%d\n",
+ rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
+ swi_state2str(rpc->crpc_wi.swi_state),
+ rpc->crpc_aborted, rpc->crpc_status);
+
+ spin_lock(&sfw_data.fw_lock);
+
+ /* my callers must finish all RPCs before shutting me down */
+ LASSERT(!sfw_data.fw_shuttingdown);
+ list_add(&rpc->crpc_list, &sfw_data.fw_zombie_rpcs);
+
+ spin_unlock(&sfw_data.fw_lock);
+}
+
+sfw_batch_t *
+sfw_find_batch (lst_bid_t bid)
+{
+ sfw_session_t *sn = sfw_data.fw_session;
+ sfw_batch_t *bat;
+
+ LASSERT (sn != NULL);
+
+ list_for_each_entry (bat, &sn->sn_batches, bat_list) {
+ if (bat->bat_id.bat_id == bid.bat_id)
+ return bat;
+ }
+
+ return NULL;
+}
+
+sfw_batch_t *
+sfw_bid2batch (lst_bid_t bid)
+{
+ sfw_session_t *sn = sfw_data.fw_session;
+ sfw_batch_t *bat;
+
+ LASSERT (sn != NULL);
+
+ bat = sfw_find_batch(bid);
+ if (bat != NULL)
+ return bat;
+
+ LIBCFS_ALLOC(bat, sizeof(sfw_batch_t));
+ if (bat == NULL)
+ return NULL;
+
+ bat->bat_error = 0;
+ bat->bat_session = sn;
+ bat->bat_id = bid;
+ atomic_set(&bat->bat_nactive, 0);
+ INIT_LIST_HEAD(&bat->bat_tests);
+
+ list_add_tail(&bat->bat_list, &sn->sn_batches);
+ return bat;
+}
+
+int
+sfw_get_stats (srpc_stat_reqst_t *request, srpc_stat_reply_t *reply)
+{
+ sfw_session_t *sn = sfw_data.fw_session;
+ sfw_counters_t *cnt = &reply->str_fw;
+ sfw_batch_t *bat;
+ struct timeval tv;
+
+ reply->str_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
+
+ if (request->str_sid.ses_nid == LNET_NID_ANY) {
+ reply->str_status = EINVAL;
+ return 0;
+ }
+
+ if (sn == NULL || !sfw_sid_equal(request->str_sid, sn->sn_id)) {
+ reply->str_status = ESRCH;
+ return 0;
+ }
+
+ lnet_counters_get(&reply->str_lnet);
+ srpc_get_counters(&reply->str_rpc);
+
+ /* send over the msecs since the session was started
+ - with 32 bits to send, this is ~49 days */
+ cfs_duration_usec(cfs_time_sub(cfs_time_current(),
+ sn->sn_started), &tv);
+
+ cnt->running_ms = (__u32)(tv.tv_sec * 1000 + tv.tv_usec / 1000);
+ cnt->brw_errors = atomic_read(&sn->sn_brw_errors);
+ cnt->ping_errors = atomic_read(&sn->sn_ping_errors);
+ cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies);
+
+ cnt->active_batches = 0;
+ list_for_each_entry (bat, &sn->sn_batches, bat_list) {
+ if (atomic_read(&bat->bat_nactive) > 0)
+ cnt->active_batches++;
+ }
+
+ reply->str_status = 0;
+ return 0;
+}
+
+int
+sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply)
+{
+ sfw_session_t *sn = sfw_data.fw_session;
+ srpc_msg_t *msg = container_of(request, srpc_msg_t,
+ msg_body.mksn_reqst);
+ int cplen = 0;
+
+ if (request->mksn_sid.ses_nid == LNET_NID_ANY) {
+ reply->mksn_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
+ reply->mksn_status = EINVAL;
+ return 0;
+ }
+
+ if (sn != NULL) {
+ reply->mksn_status = 0;
+ reply->mksn_sid = sn->sn_id;
+ reply->mksn_timeout = sn->sn_timeout;
+
+ if (sfw_sid_equal(request->mksn_sid, sn->sn_id)) {
+ atomic_inc(&sn->sn_refcount);
+ return 0;
+ }
+
+ if (!request->mksn_force) {
+ reply->mksn_status = EBUSY;
+ cplen = strlcpy(&reply->mksn_name[0], &sn->sn_name[0],
+ sizeof(reply->mksn_name));
+ if (cplen >= sizeof(reply->mksn_name))
+ return -E2BIG;
+ return 0;
+ }
+ }
+
+ /* reject the request if it requires unknown features
+ * NB: old version will always accept all features because it's not
+ * aware of srpc_msg_t::msg_ses_feats, it's a defect but it's also
+ * harmless because it will return zero feature to console, and it's
+ * console's responsibility to make sure all nodes in a session have
+ * same feature mask. */
+ if ((msg->msg_ses_feats & ~LST_FEATS_MASK) != 0) {
+ reply->mksn_status = EPROTO;
+ return 0;
+ }
+
+ /* brand new or create by force */
+ LIBCFS_ALLOC(sn, sizeof(sfw_session_t));
+ if (sn == NULL) {
+ CERROR ("Dropping RPC (mksn) under memory pressure.\n");
+ return -ENOMEM;
+ }
+
+ sfw_init_session(sn, request->mksn_sid,
+ msg->msg_ses_feats, &request->mksn_name[0]);
+
+ spin_lock(&sfw_data.fw_lock);
+
+ sfw_deactivate_session();
+ LASSERT(sfw_data.fw_session == NULL);
+ sfw_data.fw_session = sn;
+
+ spin_unlock(&sfw_data.fw_lock);
+
+ reply->mksn_status = 0;
+ reply->mksn_sid = sn->sn_id;
+ reply->mksn_timeout = sn->sn_timeout;
+ return 0;
+}
+
+int
+sfw_remove_session (srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply)
+{
+ sfw_session_t *sn = sfw_data.fw_session;
+
+ reply->rmsn_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
+
+ if (request->rmsn_sid.ses_nid == LNET_NID_ANY) {
+ reply->rmsn_status = EINVAL;
+ return 0;
+ }
+
+ if (sn == NULL || !sfw_sid_equal(request->rmsn_sid, sn->sn_id)) {
+ reply->rmsn_status = (sn == NULL) ? ESRCH : EBUSY;
+ return 0;
+ }
+
+ if (!atomic_dec_and_test(&sn->sn_refcount)) {
+ reply->rmsn_status = 0;
+ return 0;
+ }
+
+ spin_lock(&sfw_data.fw_lock);
+ sfw_deactivate_session();
+ spin_unlock(&sfw_data.fw_lock);
+
+ reply->rmsn_status = 0;
+ reply->rmsn_sid = LST_INVALID_SID;
+ LASSERT(sfw_data.fw_session == NULL);
+ return 0;
+}
+
+int
+sfw_debug_session (srpc_debug_reqst_t *request, srpc_debug_reply_t *reply)
+{
+ sfw_session_t *sn = sfw_data.fw_session;
+
+ if (sn == NULL) {
+ reply->dbg_status = ESRCH;
+ reply->dbg_sid = LST_INVALID_SID;
+ return 0;
+ }
+
+ reply->dbg_status = 0;
+ reply->dbg_sid = sn->sn_id;
+ reply->dbg_timeout = sn->sn_timeout;
+ if (strlcpy(reply->dbg_name, &sn->sn_name[0], sizeof(reply->dbg_name))
+ >= sizeof(reply->dbg_name))
+ return -E2BIG;
+
+ return 0;
+}
+
+void
+sfw_test_rpc_fini (srpc_client_rpc_t *rpc)
+{
+ sfw_test_unit_t *tsu = rpc->crpc_priv;
+ sfw_test_instance_t *tsi = tsu->tsu_instance;
+
+ /* Called with hold of tsi->tsi_lock */
+ LASSERT (list_empty(&rpc->crpc_list));
+ list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
+}
+
+static inline int
+sfw_test_buffers(sfw_test_instance_t *tsi)
+{
+ struct sfw_test_case *tsc = sfw_find_test_case(tsi->tsi_service);
+ struct srpc_service *svc = tsc->tsc_srv_service;
+ int nbuf;
+
+ nbuf = min(svc->sv_wi_total, tsi->tsi_loop) / svc->sv_ncpts;
+ return max(SFW_TEST_WI_MIN, nbuf + SFW_TEST_WI_EXTRA);
+}
+
+int
+sfw_load_test(struct sfw_test_instance *tsi)
+{
+ struct sfw_test_case *tsc;
+ struct srpc_service *svc;
+ int nbuf;
+ int rc;
+
+ LASSERT(tsi != NULL);
+ tsc = sfw_find_test_case(tsi->tsi_service);
+ nbuf = sfw_test_buffers(tsi);
+ LASSERT(tsc != NULL);
+ svc = tsc->tsc_srv_service;
+
+ if (tsi->tsi_is_client) {
+ tsi->tsi_ops = tsc->tsc_cli_ops;
+ return 0;
+ }
+
+ rc = srpc_service_add_buffers(svc, nbuf);
+ if (rc != 0) {
+ CWARN("Failed to reserve enough buffers: "
+ "service %s, %d needed: %d\n", svc->sv_name, nbuf, rc);
+ /* NB: this error handler is not strictly correct, because
+ * it may release more buffers than already allocated,
+ * but it doesn't matter because request portal should
+ * be lazy portal and will grow buffers if necessary. */
+ srpc_service_remove_buffers(svc, nbuf);
+ return -ENOMEM;
+ }
+
+ CDEBUG(D_NET, "Reserved %d buffers for test %s\n",
+ nbuf * (srpc_serv_is_framework(svc) ?
+ 1 : cfs_cpt_number(cfs_cpt_table)), svc->sv_name);
+ return 0;
+}
+
+void
+sfw_unload_test(struct sfw_test_instance *tsi)
+{
+ struct sfw_test_case *tsc = sfw_find_test_case(tsi->tsi_service);
+
+ LASSERT(tsc != NULL);
+
+ if (tsi->tsi_is_client)
+ return;
+
+ /* shrink buffers, because request portal is lazy portal
+ * which can grow buffers at runtime so we may leave
+ * some buffers behind, but never mind... */
+ srpc_service_remove_buffers(tsc->tsc_srv_service,
+ sfw_test_buffers(tsi));
+ return;
+}
+
+void
+sfw_destroy_test_instance (sfw_test_instance_t *tsi)
+{
+ srpc_client_rpc_t *rpc;
+ sfw_test_unit_t *tsu;
+
+ if (!tsi->tsi_is_client) goto clean;
+
+ tsi->tsi_ops->tso_fini(tsi);
+
+ LASSERT (!tsi->tsi_stopping);
+ LASSERT (list_empty(&tsi->tsi_active_rpcs));
+ LASSERT (!sfw_test_active(tsi));
+
+ while (!list_empty(&tsi->tsi_units)) {
+ tsu = list_entry(tsi->tsi_units.next,
+ sfw_test_unit_t, tsu_list);
+ list_del(&tsu->tsu_list);
+ LIBCFS_FREE(tsu, sizeof(*tsu));
+ }
+
+ while (!list_empty(&tsi->tsi_free_rpcs)) {
+ rpc = list_entry(tsi->tsi_free_rpcs.next,
+ srpc_client_rpc_t, crpc_list);
+ list_del(&rpc->crpc_list);
+ LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
+ }
+
+clean:
+ sfw_unload_test(tsi);
+ LIBCFS_FREE(tsi, sizeof(*tsi));
+ return;
+}
+
+void
+sfw_destroy_batch (sfw_batch_t *tsb)
+{
+ sfw_test_instance_t *tsi;
+
+ LASSERT (!sfw_batch_active(tsb));
+ LASSERT (list_empty(&tsb->bat_list));
+
+ while (!list_empty(&tsb->bat_tests)) {
+ tsi = list_entry(tsb->bat_tests.next,
+ sfw_test_instance_t, tsi_list);
+ list_del_init(&tsi->tsi_list);
+ sfw_destroy_test_instance(tsi);
+ }
+
+ LIBCFS_FREE(tsb, sizeof(sfw_batch_t));
+ return;
+}
+
+void
+sfw_destroy_session (sfw_session_t *sn)
+{
+ sfw_batch_t *batch;
+
+ LASSERT (list_empty(&sn->sn_list));
+ LASSERT (sn != sfw_data.fw_session);
+
+ while (!list_empty(&sn->sn_batches)) {
+ batch = list_entry(sn->sn_batches.next,
+ sfw_batch_t, bat_list);
+ list_del_init(&batch->bat_list);
+ sfw_destroy_batch(batch);
+ }
+
+ LIBCFS_FREE(sn, sizeof(*sn));
+ atomic_dec(&sfw_data.fw_nzombies);
+ return;
+}
+
+void
+sfw_unpack_addtest_req(srpc_msg_t *msg)
+{
+ srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
+
+ LASSERT (msg->msg_type == SRPC_MSG_TEST_REQST);
+ LASSERT (req->tsr_is_client);
+
+ if (msg->msg_magic == SRPC_MSG_MAGIC)
+ return; /* no flipping needed */
+
+ LASSERT (msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
+
+ if (req->tsr_service == SRPC_SERVICE_BRW) {
+ if ((msg->msg_ses_feats & LST_FEAT_BULK_LEN) == 0) {
+ test_bulk_req_t *bulk = &req->tsr_u.bulk_v0;
+
+ __swab32s(&bulk->blk_opc);
+ __swab32s(&bulk->blk_npg);
+ __swab32s(&bulk->blk_flags);
+
+ } else {
+ test_bulk_req_v1_t *bulk = &req->tsr_u.bulk_v1;
+
+ __swab16s(&bulk->blk_opc);
+ __swab16s(&bulk->blk_flags);
+ __swab32s(&bulk->blk_offset);
+ __swab32s(&bulk->blk_len);
+ }
+
+ return;
+ }
+
+ if (req->tsr_service == SRPC_SERVICE_PING) {
+ test_ping_req_t *ping = &req->tsr_u.ping;
+
+ __swab32s(&ping->png_size);
+ __swab32s(&ping->png_flags);
+ return;
+ }
+
+ LBUG ();
+ return;
+}
+
+int
+sfw_add_test_instance (sfw_batch_t *tsb, srpc_server_rpc_t *rpc)
+{
+ srpc_msg_t *msg = &rpc->srpc_reqstbuf->buf_msg;
+ srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
+ srpc_bulk_t *bk = rpc->srpc_bulk;
+ int ndest = req->tsr_ndest;
+ sfw_test_unit_t *tsu;
+ sfw_test_instance_t *tsi;
+ int i;
+ int rc;
+
+ LIBCFS_ALLOC(tsi, sizeof(*tsi));
+ if (tsi == NULL) {
+ CERROR ("Can't allocate test instance for batch: "LPU64"\n",
+ tsb->bat_id.bat_id);
+ return -ENOMEM;
+ }
+
+ memset(tsi, 0, sizeof(*tsi));
+ spin_lock_init(&tsi->tsi_lock);
+ atomic_set(&tsi->tsi_nactive, 0);
+ INIT_LIST_HEAD(&tsi->tsi_units);
+ INIT_LIST_HEAD(&tsi->tsi_free_rpcs);
+ INIT_LIST_HEAD(&tsi->tsi_active_rpcs);
+
+ tsi->tsi_stopping = 0;
+ tsi->tsi_batch = tsb;
+ tsi->tsi_loop = req->tsr_loop;
+ tsi->tsi_concur = req->tsr_concur;
+ tsi->tsi_service = req->tsr_service;
+ tsi->tsi_is_client = !!(req->tsr_is_client);
+ tsi->tsi_stoptsu_onerr = !!(req->tsr_stop_onerr);
+
+ rc = sfw_load_test(tsi);
+ if (rc != 0) {
+ LIBCFS_FREE(tsi, sizeof(*tsi));
+ return rc;
+ }
+
+ LASSERT (!sfw_batch_active(tsb));
+
+ if (!tsi->tsi_is_client) {
+ /* it's test server, just add it to tsb */
+ list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
+ return 0;
+ }
+
+ LASSERT (bk != NULL);
+ LASSERT (bk->bk_niov * SFW_ID_PER_PAGE >= (unsigned int)ndest);
+ LASSERT((unsigned int)bk->bk_len >=
+ sizeof(lnet_process_id_packed_t) * ndest);
+
+ sfw_unpack_addtest_req(msg);
+ memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u));
+
+ for (i = 0; i < ndest; i++) {
+ lnet_process_id_packed_t *dests;
+ lnet_process_id_packed_t id;
+ int j;
+
+ dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].kiov_page);
+ LASSERT (dests != NULL); /* my pages are within KVM always */
+ id = dests[i % SFW_ID_PER_PAGE];
+ if (msg->msg_magic != SRPC_MSG_MAGIC)
+ sfw_unpack_id(id);
+
+ for (j = 0; j < tsi->tsi_concur; j++) {
+ LIBCFS_ALLOC(tsu, sizeof(sfw_test_unit_t));
+ if (tsu == NULL) {
+ rc = -ENOMEM;
+ CERROR ("Can't allocate tsu for %d\n",
+ tsi->tsi_service);
+ goto error;
+ }
+
+ tsu->tsu_dest.nid = id.nid;
+ tsu->tsu_dest.pid = id.pid;
+ tsu->tsu_instance = tsi;
+ tsu->tsu_private = NULL;
+ list_add_tail(&tsu->tsu_list, &tsi->tsi_units);
+ }
+ }
+
+ rc = tsi->tsi_ops->tso_init(tsi);
+ if (rc == 0) {
+ list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
+ return 0;
+ }
+
+error:
+ LASSERT (rc != 0);
+ sfw_destroy_test_instance(tsi);
+ return rc;
+}
+
+static void
+sfw_test_unit_done (sfw_test_unit_t *tsu)
+{
+ sfw_test_instance_t *tsi = tsu->tsu_instance;
+ sfw_batch_t *tsb = tsi->tsi_batch;
+ sfw_session_t *sn = tsb->bat_session;
+
+ LASSERT (sfw_test_active(tsi));
+
+ if (!atomic_dec_and_test(&tsi->tsi_nactive))
+ return;
+
+ /* the test instance is done */
+ spin_lock(&tsi->tsi_lock);
+
+ tsi->tsi_stopping = 0;
+
+ spin_unlock(&tsi->tsi_lock);
+
+ spin_lock(&sfw_data.fw_lock);
+
+ if (!atomic_dec_and_test(&tsb->bat_nactive) ||/* tsb still active */
+ sn == sfw_data.fw_session) { /* sn also active */
+ spin_unlock(&sfw_data.fw_lock);
+ return;
+ }
+
+ LASSERT (!list_empty(&sn->sn_list)); /* I'm a zombie! */
+
+ list_for_each_entry (tsb, &sn->sn_batches, bat_list) {
+ if (sfw_batch_active(tsb)) {
+ spin_unlock(&sfw_data.fw_lock);
+ return;
+ }
+ }
+
+ list_del_init(&sn->sn_list);
+ spin_unlock(&sfw_data.fw_lock);
+
+ sfw_destroy_session(sn);
+ return;
+}
+
+void
+sfw_test_rpc_done (srpc_client_rpc_t *rpc)
+{
+ sfw_test_unit_t *tsu = rpc->crpc_priv;
+ sfw_test_instance_t *tsi = tsu->tsu_instance;
+ int done = 0;
+
+ tsi->tsi_ops->tso_done_rpc(tsu, rpc);
+
+ spin_lock(&tsi->tsi_lock);
+
+ LASSERT (sfw_test_active(tsi));
+ LASSERT (!list_empty(&rpc->crpc_list));
+
+ list_del_init(&rpc->crpc_list);
+
+ /* batch is stopping or loop is done or get error */
+ if (tsi->tsi_stopping ||
+ tsu->tsu_loop == 0 ||
+ (rpc->crpc_status != 0 && tsi->tsi_stoptsu_onerr))
+ done = 1;
+
+ /* dec ref for poster */
+ srpc_client_rpc_decref(rpc);
+
+ spin_unlock(&tsi->tsi_lock);
+
+ if (!done) {
+ swi_schedule_workitem(&tsu->tsu_worker);
+ return;
+ }
+
+ sfw_test_unit_done(tsu);
+ return;
+}
+
+int
+sfw_create_test_rpc(sfw_test_unit_t *tsu, lnet_process_id_t peer,
+ unsigned features, int nblk, int blklen,
+ srpc_client_rpc_t **rpcpp)
+{
+ srpc_client_rpc_t *rpc = NULL;
+ sfw_test_instance_t *tsi = tsu->tsu_instance;
+
+ spin_lock(&tsi->tsi_lock);
+
+ LASSERT (sfw_test_active(tsi));
+
+ if (!list_empty(&tsi->tsi_free_rpcs)) {
+ /* pick request from buffer */
+ rpc = list_entry(tsi->tsi_free_rpcs.next,
+ srpc_client_rpc_t, crpc_list);
+ LASSERT (nblk == rpc->crpc_bulk.bk_niov);
+ list_del_init(&rpc->crpc_list);
+ }
+
+ spin_unlock(&tsi->tsi_lock);
+
+ if (rpc == NULL) {
+ rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk,
+ blklen, sfw_test_rpc_done,
+ sfw_test_rpc_fini, tsu);
+ } else {
+ srpc_init_client_rpc(rpc, peer, tsi->tsi_service, nblk,
+ blklen, sfw_test_rpc_done,
+ sfw_test_rpc_fini, tsu);
+ }
+
+ if (rpc == NULL) {
+ CERROR("Can't create rpc for test %d\n", tsi->tsi_service);
+ return -ENOMEM;
+ }
+
+ rpc->crpc_reqstmsg.msg_ses_feats = features;
+ *rpcpp = rpc;
+
+ return 0;
+}
+
+int
+sfw_run_test (swi_workitem_t *wi)
+{
+ sfw_test_unit_t *tsu = wi->swi_workitem.wi_data;
+ sfw_test_instance_t *tsi = tsu->tsu_instance;
+ srpc_client_rpc_t *rpc = NULL;
+
+ LASSERT (wi == &tsu->tsu_worker);
+
+ if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc) != 0) {
+ LASSERT (rpc == NULL);
+ goto test_done;
+ }
+
+ LASSERT (rpc != NULL);
+
+ spin_lock(&tsi->tsi_lock);
+
+ if (tsi->tsi_stopping) {
+ list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
+ spin_unlock(&tsi->tsi_lock);
+ goto test_done;
+ }
+
+ if (tsu->tsu_loop > 0)
+ tsu->tsu_loop--;
+
+ list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs);
+ spin_unlock(&tsi->tsi_lock);
+
+ rpc->crpc_timeout = rpc_timeout;
+
+ spin_lock(&rpc->crpc_lock);
+ srpc_post_rpc(rpc);
+ spin_unlock(&rpc->crpc_lock);
+ return 0;
+
+test_done:
+ /*
+ * No one can schedule me now since:
+ * - previous RPC, if any, has done and
+ * - no new RPC is initiated.
+ * - my batch is still active; no one can run it again now.
+ * Cancel pending schedules and prevent future schedule attempts:
+ */
+ swi_exit_workitem(wi);
+ sfw_test_unit_done(tsu);
+ return 1;
+}
+
+int
+sfw_run_batch (sfw_batch_t *tsb)
+{
+ swi_workitem_t *wi;
+ sfw_test_unit_t *tsu;
+ sfw_test_instance_t *tsi;
+
+ if (sfw_batch_active(tsb)) {
+ CDEBUG(D_NET, "Batch already active: "LPU64" (%d)\n",
+ tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive));
+ return 0;
+ }
+
+ list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
+ if (!tsi->tsi_is_client) /* skip server instances */
+ continue;
+
+ LASSERT (!tsi->tsi_stopping);
+ LASSERT (!sfw_test_active(tsi));
+
+ atomic_inc(&tsb->bat_nactive);
+
+ list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) {
+ atomic_inc(&tsi->tsi_nactive);
+ tsu->tsu_loop = tsi->tsi_loop;
+ wi = &tsu->tsu_worker;
+ swi_init_workitem(wi, tsu, sfw_run_test,
+ lst_sched_test[\
+ lnet_cpt_of_nid(tsu->tsu_dest.nid)]);
+ swi_schedule_workitem(wi);
+ }
+ }
+
+ return 0;
+}
+
+int
+sfw_stop_batch (sfw_batch_t *tsb, int force)
+{
+ sfw_test_instance_t *tsi;
+ srpc_client_rpc_t *rpc;
+
+ if (!sfw_batch_active(tsb)) {
+ CDEBUG(D_NET, "Batch "LPU64" inactive\n", tsb->bat_id.bat_id);
+ return 0;
+ }
+
+ list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
+ spin_lock(&tsi->tsi_lock);
+
+ if (!tsi->tsi_is_client ||
+ !sfw_test_active(tsi) || tsi->tsi_stopping) {
+ spin_unlock(&tsi->tsi_lock);
+ continue;
+ }
+
+ tsi->tsi_stopping = 1;
+
+ if (!force) {
+ spin_unlock(&tsi->tsi_lock);
+ continue;
+ }
+
+ /* abort launched rpcs in the test */
+ list_for_each_entry(rpc, &tsi->tsi_active_rpcs, crpc_list) {
+ spin_lock(&rpc->crpc_lock);
+
+ srpc_abort_rpc(rpc, -EINTR);
+
+ spin_unlock(&rpc->crpc_lock);
+ }
+
+ spin_unlock(&tsi->tsi_lock);
+ }
+
+ return 0;
+}
+
+int
+sfw_query_batch (sfw_batch_t *tsb, int testidx, srpc_batch_reply_t *reply)
+{
+ sfw_test_instance_t *tsi;
+
+ if (testidx < 0)
+ return -EINVAL;
+
+ if (testidx == 0) {
+ reply->bar_active = atomic_read(&tsb->bat_nactive);
+ return 0;
+ }
+
+ list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
+ if (testidx-- > 1)
+ continue;
+
+ reply->bar_active = atomic_read(&tsi->tsi_nactive);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+void
+sfw_free_pages (srpc_server_rpc_t *rpc)
+{
+ srpc_free_bulk(rpc->srpc_bulk);
+ rpc->srpc_bulk = NULL;
+}
+
+int
+sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
+ int sink)
+{
+ LASSERT(rpc->srpc_bulk == NULL);
+ LASSERT(npages > 0 && npages <= LNET_MAX_IOV);
+
+ rpc->srpc_bulk = srpc_alloc_bulk(cpt, npages, len, sink);
+ if (rpc->srpc_bulk == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int
+sfw_add_test (srpc_server_rpc_t *rpc)
+{
+ sfw_session_t *sn = sfw_data.fw_session;
+ srpc_test_reply_t *reply = &rpc->srpc_replymsg.msg_body.tes_reply;
+ srpc_test_reqst_t *request;
+ int rc;
+ sfw_batch_t *bat;
+
+ request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst;
+ reply->tsr_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
+
+ if (request->tsr_loop == 0 ||
+ request->tsr_concur == 0 ||
+ request->tsr_sid.ses_nid == LNET_NID_ANY ||
+ request->tsr_ndest > SFW_MAX_NDESTS ||
+ (request->tsr_is_client && request->tsr_ndest == 0) ||
+ request->tsr_concur > SFW_MAX_CONCUR ||
+ request->tsr_service > SRPC_SERVICE_MAX_ID ||
+ request->tsr_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID) {
+ reply->tsr_status = EINVAL;
+ return 0;
+ }
+
+ if (sn == NULL || !sfw_sid_equal(request->tsr_sid, sn->sn_id) ||
+ sfw_find_test_case(request->tsr_service) == NULL) {
+ reply->tsr_status = ENOENT;
+ return 0;
+ }
+
+ bat = sfw_bid2batch(request->tsr_bid);
+ if (bat == NULL) {
+ CERROR ("Dropping RPC (%s) from %s under memory pressure.\n",
+ rpc->srpc_scd->scd_svc->sv_name,
+ libcfs_id2str(rpc->srpc_peer));
+ return -ENOMEM;
+ }
+
+ if (sfw_batch_active(bat)) {
+ reply->tsr_status = EBUSY;
+ return 0;
+ }
+
+ if (request->tsr_is_client && rpc->srpc_bulk == NULL) {
+ /* rpc will be resumed later in sfw_bulk_ready */
+ int npg = sfw_id_pages(request->tsr_ndest);
+ int len;
+
+ if ((sn->sn_features & LST_FEAT_BULK_LEN) == 0) {
+ len = npg * PAGE_CACHE_SIZE;
+
+ } else {
+ len = sizeof(lnet_process_id_packed_t) *
+ request->tsr_ndest;
+ }
+
+ return sfw_alloc_pages(rpc, CFS_CPT_ANY, npg, len, 1);
+ }
+
+ rc = sfw_add_test_instance(bat, rpc);
+ CDEBUG (rc == 0 ? D_NET : D_WARNING,
+ "%s test: sv %d %s, loop %d, concur %d, ndest %d\n",
+ rc == 0 ? "Added" : "Failed to add", request->tsr_service,
+ request->tsr_is_client ? "client" : "server",
+ request->tsr_loop, request->tsr_concur, request->tsr_ndest);
+
+ reply->tsr_status = (rc < 0) ? -rc : rc;
+ return 0;
+}
+
+int
+sfw_control_batch (srpc_batch_reqst_t *request, srpc_batch_reply_t *reply)
+{
+ sfw_session_t *sn = sfw_data.fw_session;
+ int rc = 0;
+ sfw_batch_t *bat;
+
+ reply->bar_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
+
+ if (sn == NULL || !sfw_sid_equal(request->bar_sid, sn->sn_id)) {
+ reply->bar_status = ESRCH;
+ return 0;
+ }
+
+ bat = sfw_find_batch(request->bar_bid);
+ if (bat == NULL) {
+ reply->bar_status = ENOENT;
+ return 0;
+ }
+
+ switch (request->bar_opc) {
+ case SRPC_BATCH_OPC_RUN:
+ rc = sfw_run_batch(bat);
+ break;
+
+ case SRPC_BATCH_OPC_STOP:
+ rc = sfw_stop_batch(bat, request->bar_arg);
+ break;
+
+ case SRPC_BATCH_OPC_QUERY:
+ rc = sfw_query_batch(bat, request->bar_testidx, reply);
+ break;
+
+ default:
+ return -EINVAL; /* drop it */
+ }
+
+ reply->bar_status = (rc < 0) ? -rc : rc;
+ return 0;
+}
+
+int
+sfw_handle_server_rpc(struct srpc_server_rpc *rpc)
+{
+ struct srpc_service *sv = rpc->srpc_scd->scd_svc;
+ srpc_msg_t *reply = &rpc->srpc_replymsg;
+ srpc_msg_t *request = &rpc->srpc_reqstbuf->buf_msg;
+ unsigned features = LST_FEATS_MASK;
+ int rc = 0;
+
+ LASSERT(sfw_data.fw_active_srpc == NULL);
+ LASSERT(sv->sv_id <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
+
+ spin_lock(&sfw_data.fw_lock);
+
+ if (sfw_data.fw_shuttingdown) {
+ spin_unlock(&sfw_data.fw_lock);
+ return -ESHUTDOWN;
+ }
+
+ /* Remove timer to avoid racing with it or expiring active session */
+ if (sfw_del_session_timer() != 0) {
+ CERROR("Dropping RPC (%s) from %s: racing with expiry timer.",
+ sv->sv_name, libcfs_id2str(rpc->srpc_peer));
+ spin_unlock(&sfw_data.fw_lock);
+ return -EAGAIN;
+ }
+
+ sfw_data.fw_active_srpc = rpc;
+ spin_unlock(&sfw_data.fw_lock);
+
+ sfw_unpack_message(request);
+ LASSERT(request->msg_type == srpc_service2request(sv->sv_id));
+
+ /* rpc module should have checked this */
+ LASSERT(request->msg_version == SRPC_MSG_VERSION);
+
+ if (sv->sv_id != SRPC_SERVICE_MAKE_SESSION &&
+ sv->sv_id != SRPC_SERVICE_DEBUG) {
+ sfw_session_t *sn = sfw_data.fw_session;
+
+ if (sn != NULL &&
+ sn->sn_features != request->msg_ses_feats) {
+ CNETERR("Features of framework RPC don't match "
+ "features of current session: %x/%x\n",
+ request->msg_ses_feats, sn->sn_features);
+ reply->msg_body.reply.status = EPROTO;
+ reply->msg_body.reply.sid = sn->sn_id;
+ goto out;
+ }
+
+ } else if ((request->msg_ses_feats & ~LST_FEATS_MASK) != 0) {
+ /* NB: at this point, old version will ignore features and
+ * create new session anyway, so console should be able
+ * to handle this */
+ reply->msg_body.reply.status = EPROTO;
+ goto out;
+ }
+
+ switch(sv->sv_id) {
+ default:
+ LBUG ();
+ case SRPC_SERVICE_TEST:
+ rc = sfw_add_test(rpc);
+ break;
+
+ case SRPC_SERVICE_BATCH:
+ rc = sfw_control_batch(&request->msg_body.bat_reqst,
+ &reply->msg_body.bat_reply);
+ break;
+
+ case SRPC_SERVICE_QUERY_STAT:
+ rc = sfw_get_stats(&request->msg_body.stat_reqst,
+ &reply->msg_body.stat_reply);
+ break;
+
+ case SRPC_SERVICE_DEBUG:
+ rc = sfw_debug_session(&request->msg_body.dbg_reqst,
+ &reply->msg_body.dbg_reply);
+ break;
+
+ case SRPC_SERVICE_MAKE_SESSION:
+ rc = sfw_make_session(&request->msg_body.mksn_reqst,
+ &reply->msg_body.mksn_reply);
+ break;
+
+ case SRPC_SERVICE_REMOVE_SESSION:
+ rc = sfw_remove_session(&request->msg_body.rmsn_reqst,
+ &reply->msg_body.rmsn_reply);
+ break;
+ }
+
+ if (sfw_data.fw_session != NULL)
+ features = sfw_data.fw_session->sn_features;
+ out:
+ reply->msg_ses_feats = features;
+ rpc->srpc_done = sfw_server_rpc_done;
+ spin_lock(&sfw_data.fw_lock);
+
+ if (!sfw_data.fw_shuttingdown)
+ sfw_add_session_timer();
+
+ sfw_data.fw_active_srpc = NULL;
+ spin_unlock(&sfw_data.fw_lock);
+ return rc;
+}
+
+int
+sfw_bulk_ready(struct srpc_server_rpc *rpc, int status)
+{
+ struct srpc_service *sv = rpc->srpc_scd->scd_svc;
+ int rc;
+
+ LASSERT(rpc->srpc_bulk != NULL);
+ LASSERT(sv->sv_id == SRPC_SERVICE_TEST);
+ LASSERT(sfw_data.fw_active_srpc == NULL);
+ LASSERT(rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst.tsr_is_client);
+
+ spin_lock(&sfw_data.fw_lock);
+
+ if (status != 0) {
+ CERROR("Bulk transfer failed for RPC: "
+ "service %s, peer %s, status %d\n",
+ sv->sv_name, libcfs_id2str(rpc->srpc_peer), status);
+ spin_unlock(&sfw_data.fw_lock);
+ return -EIO;
+ }
+
+ if (sfw_data.fw_shuttingdown) {
+ spin_unlock(&sfw_data.fw_lock);
+ return -ESHUTDOWN;
+ }
+
+ if (sfw_del_session_timer() != 0) {
+ CERROR("Dropping RPC (%s) from %s: racing with expiry timer",
+ sv->sv_name, libcfs_id2str(rpc->srpc_peer));
+ spin_unlock(&sfw_data.fw_lock);
+ return -EAGAIN;
+ }
+
+ sfw_data.fw_active_srpc = rpc;
+ spin_unlock(&sfw_data.fw_lock);
+
+ rc = sfw_add_test(rpc);
+
+ spin_lock(&sfw_data.fw_lock);
+
+ if (!sfw_data.fw_shuttingdown)
+ sfw_add_session_timer();
+
+ sfw_data.fw_active_srpc = NULL;
+ spin_unlock(&sfw_data.fw_lock);
+ return rc;
+}
+
+srpc_client_rpc_t *
+sfw_create_rpc(lnet_process_id_t peer, int service,
+ unsigned features, int nbulkiov, int bulklen,
+ void (*done)(srpc_client_rpc_t *), void *priv)
+{
+ srpc_client_rpc_t *rpc = NULL;
+
+ spin_lock(&sfw_data.fw_lock);
+
+ LASSERT (!sfw_data.fw_shuttingdown);
+ LASSERT (service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
+
+ if (nbulkiov == 0 && !list_empty(&sfw_data.fw_zombie_rpcs)) {
+ rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
+ srpc_client_rpc_t, crpc_list);
+ list_del(&rpc->crpc_list);
+
+ srpc_init_client_rpc(rpc, peer, service, 0, 0,
+ done, sfw_client_rpc_fini, priv);
+ }
+
+ spin_unlock(&sfw_data.fw_lock);
+
+ if (rpc == NULL) {
+ rpc = srpc_create_client_rpc(peer, service,
+ nbulkiov, bulklen, done,
+ nbulkiov != 0 ? NULL :
+ sfw_client_rpc_fini,
+ priv);
+ }
+
+ if (rpc != NULL) /* "session" is concept in framework */
+ rpc->crpc_reqstmsg.msg_ses_feats = features;
+
+ return rpc;
+}
+
+void
+sfw_unpack_message (srpc_msg_t *msg)
+{
+ if (msg->msg_magic == SRPC_MSG_MAGIC)
+ return; /* no flipping needed */
+
+ /* srpc module should guarantee I wouldn't get crap */
+ LASSERT (msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
+
+ if (msg->msg_type == SRPC_MSG_STAT_REQST) {
+ srpc_stat_reqst_t *req = &msg->msg_body.stat_reqst;
+
+ __swab32s(&req->str_type);
+ __swab64s(&req->str_rpyid);
+ sfw_unpack_sid(req->str_sid);
+ return;
+ }
+
+ if (msg->msg_type == SRPC_MSG_STAT_REPLY) {
+ srpc_stat_reply_t *rep = &msg->msg_body.stat_reply;
+
+ __swab32s(&rep->str_status);
+ sfw_unpack_sid(rep->str_sid);
+ sfw_unpack_fw_counters(rep->str_fw);
+ sfw_unpack_rpc_counters(rep->str_rpc);
+ sfw_unpack_lnet_counters(rep->str_lnet);
+ return;
+ }
+
+ if (msg->msg_type == SRPC_MSG_MKSN_REQST) {
+ srpc_mksn_reqst_t *req = &msg->msg_body.mksn_reqst;
+
+ __swab64s(&req->mksn_rpyid);
+ __swab32s(&req->mksn_force);
+ sfw_unpack_sid(req->mksn_sid);
+ return;
+ }
+
+ if (msg->msg_type == SRPC_MSG_MKSN_REPLY) {
+ srpc_mksn_reply_t *rep = &msg->msg_body.mksn_reply;
+
+ __swab32s(&rep->mksn_status);
+ __swab32s(&rep->mksn_timeout);
+ sfw_unpack_sid(rep->mksn_sid);
+ return;
+ }
+
+ if (msg->msg_type == SRPC_MSG_RMSN_REQST) {
+ srpc_rmsn_reqst_t *req = &msg->msg_body.rmsn_reqst;
+
+ __swab64s(&req->rmsn_rpyid);
+ sfw_unpack_sid(req->rmsn_sid);
+ return;
+ }
+
+ if (msg->msg_type == SRPC_MSG_RMSN_REPLY) {
+ srpc_rmsn_reply_t *rep = &msg->msg_body.rmsn_reply;
+
+ __swab32s(&rep->rmsn_status);
+ sfw_unpack_sid(rep->rmsn_sid);
+ return;
+ }
+
+ if (msg->msg_type == SRPC_MSG_DEBUG_REQST) {
+ srpc_debug_reqst_t *req = &msg->msg_body.dbg_reqst;
+
+ __swab64s(&req->dbg_rpyid);
+ __swab32s(&req->dbg_flags);
+ sfw_unpack_sid(req->dbg_sid);
+ return;
+ }
+
+ if (msg->msg_type == SRPC_MSG_DEBUG_REPLY) {
+ srpc_debug_reply_t *rep = &msg->msg_body.dbg_reply;
+
+ __swab32s(&rep->dbg_nbatch);
+ __swab32s(&rep->dbg_timeout);
+ sfw_unpack_sid(rep->dbg_sid);
+ return;
+ }
+
+ if (msg->msg_type == SRPC_MSG_BATCH_REQST) {
+ srpc_batch_reqst_t *req = &msg->msg_body.bat_reqst;
+
+ __swab32s(&req->bar_opc);
+ __swab64s(&req->bar_rpyid);
+ __swab32s(&req->bar_testidx);
+ __swab32s(&req->bar_arg);
+ sfw_unpack_sid(req->bar_sid);
+ __swab64s(&req->bar_bid.bat_id);
+ return;
+ }
+
+ if (msg->msg_type == SRPC_MSG_BATCH_REPLY) {
+ srpc_batch_reply_t *rep = &msg->msg_body.bat_reply;
+
+ __swab32s(&rep->bar_status);
+ sfw_unpack_sid(rep->bar_sid);
+ return;
+ }
+
+ if (msg->msg_type == SRPC_MSG_TEST_REQST) {
+ srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
+
+ __swab64s(&req->tsr_rpyid);
+ __swab64s(&req->tsr_bulkid);
+ __swab32s(&req->tsr_loop);
+ __swab32s(&req->tsr_ndest);
+ __swab32s(&req->tsr_concur);
+ __swab32s(&req->tsr_service);
+ sfw_unpack_sid(req->tsr_sid);
+ __swab64s(&req->tsr_bid.bat_id);
+ return;
+ }
+
+ if (msg->msg_type == SRPC_MSG_TEST_REPLY) {
+ srpc_test_reply_t *rep = &msg->msg_body.tes_reply;
+
+ __swab32s(&rep->tsr_status);
+ sfw_unpack_sid(rep->tsr_sid);
+ return;
+ }
+
+ if (msg->msg_type == SRPC_MSG_JOIN_REQST) {
+ srpc_join_reqst_t *req = &msg->msg_body.join_reqst;
+
+ __swab64s(&req->join_rpyid);
+ sfw_unpack_sid(req->join_sid);
+ return;
+ }
+
+ if (msg->msg_type == SRPC_MSG_JOIN_REPLY) {
+ srpc_join_reply_t *rep = &msg->msg_body.join_reply;
+
+ __swab32s(&rep->join_status);
+ __swab32s(&rep->join_timeout);
+ sfw_unpack_sid(rep->join_sid);
+ return;
+ }
+
+ LBUG ();
+ return;
+}
+
+void
+sfw_abort_rpc (srpc_client_rpc_t *rpc)
+{
+ LASSERT(atomic_read(&rpc->crpc_refcount) > 0);
+ LASSERT(rpc->crpc_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
+
+ spin_lock(&rpc->crpc_lock);
+ srpc_abort_rpc(rpc, -EINTR);
+ spin_unlock(&rpc->crpc_lock);
+ return;
+}
+
+void
+sfw_post_rpc (srpc_client_rpc_t *rpc)
+{
+ spin_lock(&rpc->crpc_lock);
+
+ LASSERT (!rpc->crpc_closed);
+ LASSERT (!rpc->crpc_aborted);
+ LASSERT (list_empty(&rpc->crpc_list));
+ LASSERT (!sfw_data.fw_shuttingdown);
+
+ rpc->crpc_timeout = rpc_timeout;
+ srpc_post_rpc(rpc);
+
+ spin_unlock(&rpc->crpc_lock);
+ return;
+}
+
+static srpc_service_t sfw_services[] =
+{
+ {
+ /* sv_id */ SRPC_SERVICE_DEBUG,
+ /* sv_name */ "debug",
+ 0
+ },
+ {
+ /* sv_id */ SRPC_SERVICE_QUERY_STAT,
+ /* sv_name */ "query stats",
+ 0
+ },
+ {
+ /* sv_id */ SRPC_SERVICE_MAKE_SESSION,
+ /* sv_name */ "make session",
+ 0
+ },
+ {
+ /* sv_id */ SRPC_SERVICE_REMOVE_SESSION,
+ /* sv_name */ "remove session",
+ 0
+ },
+ {
+ /* sv_id */ SRPC_SERVICE_BATCH,
+ /* sv_name */ "batch service",
+ 0
+ },
+ {
+ /* sv_id */ SRPC_SERVICE_TEST,
+ /* sv_name */ "test service",
+ 0
+ },
+ {
+ /* sv_id */ 0,
+ /* sv_name */ NULL,
+ 0
+ }
+};
+
+extern sfw_test_client_ops_t ping_test_client;
+extern srpc_service_t ping_test_service;
+extern void ping_init_test_client(void);
+extern void ping_init_test_service(void);
+
+extern sfw_test_client_ops_t brw_test_client;
+extern srpc_service_t brw_test_service;
+extern void brw_init_test_client(void);
+extern void brw_init_test_service(void);
+
+
+int
+sfw_startup (void)
+{
+ int i;
+ int rc;
+ int error;
+ srpc_service_t *sv;
+ sfw_test_case_t *tsc;
+
+
+ if (session_timeout < 0) {
+ CERROR ("Session timeout must be non-negative: %d\n",
+ session_timeout);
+ return -EINVAL;
+ }
+
+ if (rpc_timeout < 0) {
+ CERROR ("RPC timeout must be non-negative: %d\n",
+ rpc_timeout);
+ return -EINVAL;
+ }
+
+ if (session_timeout == 0)
+ CWARN ("Zero session_timeout specified "
+ "- test sessions never expire.\n");
+
+ if (rpc_timeout == 0)
+ CWARN ("Zero rpc_timeout specified "
+ "- test RPC never expire.\n");
+
+ memset(&sfw_data, 0, sizeof(struct smoketest_framework));
+
+ sfw_data.fw_session = NULL;
+ sfw_data.fw_active_srpc = NULL;
+ spin_lock_init(&sfw_data.fw_lock);
+ atomic_set(&sfw_data.fw_nzombies, 0);
+ INIT_LIST_HEAD(&sfw_data.fw_tests);
+ INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs);
+ INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions);
+
+ brw_init_test_client();
+ brw_init_test_service();
+ rc = sfw_register_test(&brw_test_service, &brw_test_client);
+ LASSERT (rc == 0);
+
+ ping_init_test_client();
+ ping_init_test_service();
+ rc = sfw_register_test(&ping_test_service, &ping_test_client);
+ LASSERT (rc == 0);
+
+ error = 0;
+ list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
+ sv = tsc->tsc_srv_service;
+
+ rc = srpc_add_service(sv);
+ LASSERT (rc != -EBUSY);
+ if (rc != 0) {
+ CWARN ("Failed to add %s service: %d\n",
+ sv->sv_name, rc);
+ error = rc;
+ }
+ }
+
+ for (i = 0; ; i++) {
+ sv = &sfw_services[i];
+ if (sv->sv_name == NULL) break;
+
+ sv->sv_bulk_ready = NULL;
+ sv->sv_handler = sfw_handle_server_rpc;
+ sv->sv_wi_total = SFW_FRWK_WI_MAX;
+ if (sv->sv_id == SRPC_SERVICE_TEST)
+ sv->sv_bulk_ready = sfw_bulk_ready;
+
+ rc = srpc_add_service(sv);
+ LASSERT (rc != -EBUSY);
+ if (rc != 0) {
+ CWARN ("Failed to add %s service: %d\n",
+ sv->sv_name, rc);
+ error = rc;
+ }
+
+ /* about to sfw_shutdown, no need to add buffer */
+ if (error) continue;
+
+ rc = srpc_service_add_buffers(sv, sv->sv_wi_total);
+ if (rc != 0) {
+ CWARN("Failed to reserve enough buffers: "
+ "service %s, %d needed: %d\n",
+ sv->sv_name, sv->sv_wi_total, rc);
+ error = -ENOMEM;
+ }
+ }
+
+ if (error != 0)
+ sfw_shutdown();
+ return error;
+}
+
+void
+sfw_shutdown (void)
+{
+ srpc_service_t *sv;
+ sfw_test_case_t *tsc;
+ int i;
+
+ spin_lock(&sfw_data.fw_lock);
+
+ sfw_data.fw_shuttingdown = 1;
+ lst_wait_until(sfw_data.fw_active_srpc == NULL, sfw_data.fw_lock,
+ "waiting for active RPC to finish.\n");
+
+ if (sfw_del_session_timer() != 0)
+ lst_wait_until(sfw_data.fw_session == NULL, sfw_data.fw_lock,
+ "waiting for session timer to explode.\n");
+
+ sfw_deactivate_session();
+ lst_wait_until(atomic_read(&sfw_data.fw_nzombies) == 0,
+ sfw_data.fw_lock,
+ "waiting for %d zombie sessions to die.\n",
+ atomic_read(&sfw_data.fw_nzombies));
+
+ spin_unlock(&sfw_data.fw_lock);
+
+ for (i = 0; ; i++) {
+ sv = &sfw_services[i];
+ if (sv->sv_name == NULL)
+ break;
+
+ srpc_shutdown_service(sv);
+ srpc_remove_service(sv);
+ }
+
+ list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
+ sv = tsc->tsc_srv_service;
+ srpc_shutdown_service(sv);
+ srpc_remove_service(sv);
+ }
+
+ while (!list_empty(&sfw_data.fw_zombie_rpcs)) {
+ srpc_client_rpc_t *rpc;
+
+ rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
+ srpc_client_rpc_t, crpc_list);
+ list_del(&rpc->crpc_list);
+
+ LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
+ }
+
+ for (i = 0; ; i++) {
+ sv = &sfw_services[i];
+ if (sv->sv_name == NULL)
+ break;
+
+ srpc_wait_service_shutdown(sv);
+ }
+
+ while (!list_empty(&sfw_data.fw_tests)) {
+ tsc = list_entry(sfw_data.fw_tests.next,
+ sfw_test_case_t, tsc_list);
+
+ srpc_wait_service_shutdown(tsc->tsc_srv_service);
+
+ list_del(&tsc->tsc_list);
+ LIBCFS_FREE(tsc, sizeof(*tsc));
+ }
+
+ return;
+}
diff --git a/drivers/staging/lustre/lnet/selftest/module.c b/drivers/staging/lustre/lnet/selftest/module.c
new file mode 100644
index 000000000000..5257e5630a0e
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/module.c
@@ -0,0 +1,169 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include "selftest.h"
+
+enum {
+ LST_INIT_NONE = 0,
+ LST_INIT_WI_SERIAL,
+ LST_INIT_WI_TEST,
+ LST_INIT_RPC,
+ LST_INIT_FW,
+ LST_INIT_CONSOLE
+};
+
+extern int lstcon_console_init(void);
+extern int lstcon_console_fini(void);
+
+static int lst_init_step = LST_INIT_NONE;
+
+struct cfs_wi_sched *lst_sched_serial;
+struct cfs_wi_sched **lst_sched_test;
+
+void
+lnet_selftest_fini(void)
+{
+ int i;
+
+ switch (lst_init_step) {
+ case LST_INIT_CONSOLE:
+ lstcon_console_fini();
+ case LST_INIT_FW:
+ sfw_shutdown();
+ case LST_INIT_RPC:
+ srpc_shutdown();
+ case LST_INIT_WI_TEST:
+ for (i = 0;
+ i < cfs_cpt_number(lnet_cpt_table()); i++) {
+ if (lst_sched_test[i] == NULL)
+ continue;
+ cfs_wi_sched_destroy(lst_sched_test[i]);
+ }
+ LIBCFS_FREE(lst_sched_test,
+ sizeof(lst_sched_test[0]) *
+ cfs_cpt_number(lnet_cpt_table()));
+ lst_sched_test = NULL;
+
+ case LST_INIT_WI_SERIAL:
+ cfs_wi_sched_destroy(lst_sched_serial);
+ lst_sched_serial = NULL;
+ case LST_INIT_NONE:
+ break;
+ default:
+ LBUG();
+ }
+ return;
+}
+
+void
+lnet_selftest_structure_assertion(void)
+{
+ CLASSERT(sizeof(srpc_msg_t) == 160);
+ CLASSERT(sizeof(srpc_test_reqst_t) == 70);
+ CLASSERT(offsetof(srpc_msg_t, msg_body.tes_reqst.tsr_concur) == 72);
+ CLASSERT(offsetof(srpc_msg_t, msg_body.tes_reqst.tsr_ndest) == 78);
+ CLASSERT(sizeof(srpc_stat_reply_t) == 136);
+ CLASSERT(sizeof(srpc_stat_reqst_t) == 28);
+}
+
+int
+lnet_selftest_init(void)
+{
+ int nscheds;
+ int rc;
+ int i;
+
+ rc = cfs_wi_sched_create("lst_s", lnet_cpt_table(), CFS_CPT_ANY,
+ 1, &lst_sched_serial);
+ if (rc != 0) {
+ CERROR("Failed to create serial WI scheduler for LST\n");
+ return rc;
+ }
+ lst_init_step = LST_INIT_WI_SERIAL;
+
+ nscheds = cfs_cpt_number(lnet_cpt_table());
+ LIBCFS_ALLOC(lst_sched_test, sizeof(lst_sched_test[0]) * nscheds);
+ if (lst_sched_test == NULL)
+ goto error;
+
+ lst_init_step = LST_INIT_WI_TEST;
+ for (i = 0; i < nscheds; i++) {
+ int nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
+
+ /* reserve at least one CPU for LND */
+ nthrs = max(nthrs - 1, 1);
+ rc = cfs_wi_sched_create("lst_t", lnet_cpt_table(), i,
+ nthrs, &lst_sched_test[i]);
+ if (rc != 0) {
+ CERROR("Failed to create CPT affinity WI scheduler "
+ "%d for LST\n", i);
+ goto error;
+ }
+ }
+
+ rc = srpc_startup();
+ if (rc != 0) {
+ CERROR("LST can't startup rpc\n");
+ goto error;
+ }
+ lst_init_step = LST_INIT_RPC;
+
+ rc = sfw_startup();
+ if (rc != 0) {
+ CERROR("LST can't startup framework\n");
+ goto error;
+ }
+ lst_init_step = LST_INIT_FW;
+
+ rc = lstcon_console_init();
+ if (rc != 0) {
+ CERROR("LST can't startup console\n");
+ goto error;
+ }
+ lst_init_step = LST_INIT_CONSOLE;
+ return 0;
+error:
+ lnet_selftest_fini();
+ return rc;
+}
+
+
+MODULE_DESCRIPTION("LNet Selftest");
+MODULE_LICENSE("GPL");
+
+cfs_module(lnet, "0.9.0", lnet_selftest_init, lnet_selftest_fini);
diff --git a/drivers/staging/lustre/lnet/selftest/ping_test.c b/drivers/staging/lustre/lnet/selftest/ping_test.c
new file mode 100644
index 000000000000..f0f919482b56
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/ping_test.c
@@ -0,0 +1,229 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/selftest/conctl.c
+ *
+ * Test client & Server
+ *
+ * Author: Liang Zhen <liangzhen@clusterfs.com>
+ */
+
+#include "selftest.h"
+
+#define LST_PING_TEST_MAGIC 0xbabeface
+
+int ping_srv_workitems = SFW_TEST_WI_MAX;
+CFS_MODULE_PARM(ping_srv_workitems, "i", int, 0644, "# PING server workitems");
+
+typedef struct {
+ spinlock_t pnd_lock; /* serialize */
+ int pnd_counter; /* sequence counter */
+} lst_ping_data_t;
+
+static lst_ping_data_t lst_ping_data;
+
+static int
+ping_client_init(sfw_test_instance_t *tsi)
+{
+ sfw_session_t *sn = tsi->tsi_batch->bat_session;
+
+ LASSERT(tsi->tsi_is_client);
+ LASSERT(sn != NULL && (sn->sn_features & ~LST_FEATS_MASK) == 0);
+
+ spin_lock_init(&lst_ping_data.pnd_lock);
+ lst_ping_data.pnd_counter = 0;
+
+ return 0;
+}
+
+static void
+ping_client_fini (sfw_test_instance_t *tsi)
+{
+ sfw_session_t *sn = tsi->tsi_batch->bat_session;
+ int errors;
+
+ LASSERT (sn != NULL);
+ LASSERT (tsi->tsi_is_client);
+
+ errors = atomic_read(&sn->sn_ping_errors);
+ if (errors)
+ CWARN ("%d pings have failed.\n", errors);
+ else
+ CDEBUG (D_NET, "Ping test finished OK.\n");
+}
+
+static int
+ping_client_prep_rpc(sfw_test_unit_t *tsu,
+ lnet_process_id_t dest, srpc_client_rpc_t **rpc)
+{
+ srpc_ping_reqst_t *req;
+ sfw_test_instance_t *tsi = tsu->tsu_instance;
+ sfw_session_t *sn = tsi->tsi_batch->bat_session;
+ struct timeval tv;
+ int rc;
+
+ LASSERT(sn != NULL);
+ LASSERT((sn->sn_features & ~LST_FEATS_MASK) == 0);
+
+ rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, 0, 0, rpc);
+ if (rc != 0)
+ return rc;
+
+ req = &(*rpc)->crpc_reqstmsg.msg_body.ping_reqst;
+
+ req->pnr_magic = LST_PING_TEST_MAGIC;
+
+ spin_lock(&lst_ping_data.pnd_lock);
+ req->pnr_seq = lst_ping_data.pnd_counter++;
+ spin_unlock(&lst_ping_data.pnd_lock);
+
+ cfs_fs_timeval(&tv);
+ req->pnr_time_sec = tv.tv_sec;
+ req->pnr_time_usec = tv.tv_usec;
+
+ return rc;
+}
+
+static void
+ping_client_done_rpc (sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc)
+{
+ sfw_test_instance_t *tsi = tsu->tsu_instance;
+ sfw_session_t *sn = tsi->tsi_batch->bat_session;
+ srpc_ping_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst;
+ srpc_ping_reply_t *reply = &rpc->crpc_replymsg.msg_body.ping_reply;
+ struct timeval tv;
+
+ LASSERT (sn != NULL);
+
+ if (rpc->crpc_status != 0) {
+ if (!tsi->tsi_stopping) /* rpc could have been aborted */
+ atomic_inc(&sn->sn_ping_errors);
+ CERROR ("Unable to ping %s (%d): %d\n",
+ libcfs_id2str(rpc->crpc_dest),
+ reqst->pnr_seq, rpc->crpc_status);
+ return;
+ }
+
+ if (rpc->crpc_replymsg.msg_magic != SRPC_MSG_MAGIC) {
+ __swab32s(&reply->pnr_seq);
+ __swab32s(&reply->pnr_magic);
+ __swab32s(&reply->pnr_status);
+ }
+
+ if (reply->pnr_magic != LST_PING_TEST_MAGIC) {
+ rpc->crpc_status = -EBADMSG;
+ atomic_inc(&sn->sn_ping_errors);
+ CERROR ("Bad magic %u from %s, %u expected.\n",
+ reply->pnr_magic, libcfs_id2str(rpc->crpc_dest),
+ LST_PING_TEST_MAGIC);
+ return;
+ }
+
+ if (reply->pnr_seq != reqst->pnr_seq) {
+ rpc->crpc_status = -EBADMSG;
+ atomic_inc(&sn->sn_ping_errors);
+ CERROR ("Bad seq %u from %s, %u expected.\n",
+ reply->pnr_seq, libcfs_id2str(rpc->crpc_dest),
+ reqst->pnr_seq);
+ return;
+ }
+
+ cfs_fs_timeval(&tv);
+ CDEBUG (D_NET, "%d reply in %u usec\n", reply->pnr_seq,
+ (unsigned)((tv.tv_sec - (unsigned)reqst->pnr_time_sec) * 1000000
+ + (tv.tv_usec - reqst->pnr_time_usec)));
+ return;
+}
+
+static int
+ping_server_handle(struct srpc_server_rpc *rpc)
+{
+ struct srpc_service *sv = rpc->srpc_scd->scd_svc;
+ srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
+ srpc_msg_t *replymsg = &rpc->srpc_replymsg;
+ srpc_ping_reqst_t *req = &reqstmsg->msg_body.ping_reqst;
+ srpc_ping_reply_t *rep = &rpc->srpc_replymsg.msg_body.ping_reply;
+
+ LASSERT (sv->sv_id == SRPC_SERVICE_PING);
+
+ if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
+ LASSERT (reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
+
+ __swab32s(&req->pnr_seq);
+ __swab32s(&req->pnr_magic);
+ __swab64s(&req->pnr_time_sec);
+ __swab64s(&req->pnr_time_usec);
+ }
+ LASSERT (reqstmsg->msg_type == srpc_service2request(sv->sv_id));
+
+ if (req->pnr_magic != LST_PING_TEST_MAGIC) {
+ CERROR ("Unexpect magic %08x from %s\n",
+ req->pnr_magic, libcfs_id2str(rpc->srpc_peer));
+ return -EINVAL;
+ }
+
+ rep->pnr_seq = req->pnr_seq;
+ rep->pnr_magic = LST_PING_TEST_MAGIC;
+
+ if ((reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) != 0) {
+ replymsg->msg_ses_feats = LST_FEATS_MASK;
+ rep->pnr_status = EPROTO;
+ return 0;
+ }
+
+ replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
+
+ CDEBUG(D_NET, "Get ping %d from %s\n",
+ req->pnr_seq, libcfs_id2str(rpc->srpc_peer));
+ return 0;
+}
+
+sfw_test_client_ops_t ping_test_client;
+void ping_init_test_client(void)
+{
+ ping_test_client.tso_init = ping_client_init;
+ ping_test_client.tso_fini = ping_client_fini;
+ ping_test_client.tso_prep_rpc = ping_client_prep_rpc;
+ ping_test_client.tso_done_rpc = ping_client_done_rpc;
+}
+
+srpc_service_t ping_test_service;
+void ping_init_test_service(void)
+{
+ ping_test_service.sv_id = SRPC_SERVICE_PING;
+ ping_test_service.sv_name = "ping_test";
+ ping_test_service.sv_handler = ping_server_handle;
+ ping_test_service.sv_wi_total = ping_srv_workitems;
+}
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
new file mode 100644
index 000000000000..bc1f38b80486
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -0,0 +1,1666 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/selftest/rpc.c
+ *
+ * Author: Isaac Huang <isaac@clusterfs.com>
+ *
+ * 2012-05-13: Liang Zhen <liang@whamcloud.com>
+ * - percpt data for service to improve smp performance
+ * - code cleanup
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include "selftest.h"
+
+typedef enum {
+ SRPC_STATE_NONE,
+ SRPC_STATE_NI_INIT,
+ SRPC_STATE_EQ_INIT,
+ SRPC_STATE_RUNNING,
+ SRPC_STATE_STOPPING,
+} srpc_state_t;
+
+struct smoketest_rpc {
+ spinlock_t rpc_glock; /* global lock */
+ srpc_service_t *rpc_services[SRPC_SERVICE_MAX_ID + 1];
+ lnet_handle_eq_t rpc_lnet_eq; /* _the_ LNet event queue */
+ srpc_state_t rpc_state;
+ srpc_counters_t rpc_counters;
+ __u64 rpc_matchbits; /* matchbits counter */
+} srpc_data;
+
+static inline int
+srpc_serv_portal(int svc_id)
+{
+ return svc_id < SRPC_FRAMEWORK_SERVICE_MAX_ID ?
+ SRPC_FRAMEWORK_REQUEST_PORTAL : SRPC_REQUEST_PORTAL;
+}
+
+/* forward ref's */
+int srpc_handle_rpc (swi_workitem_t *wi);
+
+void srpc_get_counters (srpc_counters_t *cnt)
+{
+ spin_lock(&srpc_data.rpc_glock);
+ *cnt = srpc_data.rpc_counters;
+ spin_unlock(&srpc_data.rpc_glock);
+}
+
+void srpc_set_counters (const srpc_counters_t *cnt)
+{
+ spin_lock(&srpc_data.rpc_glock);
+ srpc_data.rpc_counters = *cnt;
+ spin_unlock(&srpc_data.rpc_glock);
+}
+
+int
+srpc_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i, int nob)
+{
+ nob = min(nob, (int)PAGE_CACHE_SIZE);
+
+ LASSERT(nob > 0);
+ LASSERT(i >= 0 && i < bk->bk_niov);
+
+ bk->bk_iovs[i].kiov_offset = 0;
+ bk->bk_iovs[i].kiov_page = pg;
+ bk->bk_iovs[i].kiov_len = nob;
+ return nob;
+}
+
+void
+srpc_free_bulk (srpc_bulk_t *bk)
+{
+ int i;
+ struct page *pg;
+
+ LASSERT (bk != NULL);
+
+ for (i = 0; i < bk->bk_niov; i++) {
+ pg = bk->bk_iovs[i].kiov_page;
+ if (pg == NULL) break;
+
+ __free_page(pg);
+ }
+
+ LIBCFS_FREE(bk, offsetof(srpc_bulk_t, bk_iovs[bk->bk_niov]));
+ return;
+}
+
+srpc_bulk_t *
+srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink)
+{
+ srpc_bulk_t *bk;
+ struct page **pages;
+ int i;
+
+ LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV);
+
+ LIBCFS_CPT_ALLOC(bk, lnet_cpt_table(), cpt,
+ offsetof(srpc_bulk_t, bk_iovs[bulk_npg]));
+ if (bk == NULL) {
+ CERROR("Can't allocate descriptor for %d pages\n", bulk_npg);
+ return NULL;
+ }
+
+ memset(bk, 0, offsetof(srpc_bulk_t, bk_iovs[bulk_npg]));
+ bk->bk_sink = sink;
+ bk->bk_len = bulk_len;
+ bk->bk_niov = bulk_npg;
+ UNUSED(pages);
+
+ for (i = 0; i < bulk_npg; i++) {
+ struct page *pg;
+ int nob;
+
+ pg = alloc_pages_node(cfs_cpt_spread_node(lnet_cpt_table(), cpt),
+ GFP_IOFS, 0);
+ if (pg == NULL) {
+ CERROR("Can't allocate page %d of %d\n", i, bulk_npg);
+ srpc_free_bulk(bk);
+ return NULL;
+ }
+
+ nob = srpc_add_bulk_page(bk, pg, i, bulk_len);
+ bulk_len -= nob;
+ }
+
+ return bk;
+}
+
+static inline __u64
+srpc_next_id (void)
+{
+ __u64 id;
+
+ spin_lock(&srpc_data.rpc_glock);
+ id = srpc_data.rpc_matchbits++;
+ spin_unlock(&srpc_data.rpc_glock);
+ return id;
+}
+
+void
+srpc_init_server_rpc(struct srpc_server_rpc *rpc,
+ struct srpc_service_cd *scd,
+ struct srpc_buffer *buffer)
+{
+ memset(rpc, 0, sizeof(*rpc));
+ swi_init_workitem(&rpc->srpc_wi, rpc, srpc_handle_rpc,
+ srpc_serv_is_framework(scd->scd_svc) ?
+ lst_sched_serial : lst_sched_test[scd->scd_cpt]);
+
+ rpc->srpc_ev.ev_fired = 1; /* no event expected now */
+
+ rpc->srpc_scd = scd;
+ rpc->srpc_reqstbuf = buffer;
+ rpc->srpc_peer = buffer->buf_peer;
+ rpc->srpc_self = buffer->buf_self;
+ LNetInvalidateHandle(&rpc->srpc_replymdh);
+}
+
+static void
+srpc_service_fini(struct srpc_service *svc)
+{
+ struct srpc_service_cd *scd;
+ struct srpc_server_rpc *rpc;
+ struct srpc_buffer *buf;
+ struct list_head *q;
+ int i;
+
+ if (svc->sv_cpt_data == NULL)
+ return;
+
+ cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
+ while (1) {
+ if (!list_empty(&scd->scd_buf_posted))
+ q = &scd->scd_buf_posted;
+ else if (!list_empty(&scd->scd_buf_blocked))
+ q = &scd->scd_buf_blocked;
+ else
+ break;
+
+ while (!list_empty(q)) {
+ buf = list_entry(q->next,
+ struct srpc_buffer,
+ buf_list);
+ list_del(&buf->buf_list);
+ LIBCFS_FREE(buf, sizeof(*buf));
+ }
+ }
+
+ LASSERT(list_empty(&scd->scd_rpc_active));
+
+ while (!list_empty(&scd->scd_rpc_free)) {
+ rpc = list_entry(scd->scd_rpc_free.next,
+ struct srpc_server_rpc,
+ srpc_list);
+ list_del(&rpc->srpc_list);
+ LIBCFS_FREE(rpc, sizeof(*rpc));
+ }
+ }
+
+ cfs_percpt_free(svc->sv_cpt_data);
+ svc->sv_cpt_data = NULL;
+}
+
+static int
+srpc_service_nrpcs(struct srpc_service *svc)
+{
+ int nrpcs = svc->sv_wi_total / svc->sv_ncpts;
+
+ return srpc_serv_is_framework(svc) ?
+ max(nrpcs, SFW_FRWK_WI_MIN) : max(nrpcs, SFW_TEST_WI_MIN);
+}
+
+int srpc_add_buffer(struct swi_workitem *wi);
+
+static int
+srpc_service_init(struct srpc_service *svc)
+{
+ struct srpc_service_cd *scd;
+ struct srpc_server_rpc *rpc;
+ int nrpcs;
+ int i;
+ int j;
+
+ svc->sv_shuttingdown = 0;
+
+ svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(),
+ sizeof(struct srpc_service_cd));
+ if (svc->sv_cpt_data == NULL)
+ return -ENOMEM;
+
+ svc->sv_ncpts = srpc_serv_is_framework(svc) ?
+ 1 : cfs_cpt_number(lnet_cpt_table());
+ nrpcs = srpc_service_nrpcs(svc);
+
+ cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
+ scd->scd_cpt = i;
+ scd->scd_svc = svc;
+ spin_lock_init(&scd->scd_lock);
+ INIT_LIST_HEAD(&scd->scd_rpc_free);
+ INIT_LIST_HEAD(&scd->scd_rpc_active);
+ INIT_LIST_HEAD(&scd->scd_buf_posted);
+ INIT_LIST_HEAD(&scd->scd_buf_blocked);
+
+ scd->scd_ev.ev_data = scd;
+ scd->scd_ev.ev_type = SRPC_REQUEST_RCVD;
+
+ /* NB: don't use lst_sched_serial for adding buffer,
+ * see details in srpc_service_add_buffers() */
+ swi_init_workitem(&scd->scd_buf_wi, scd,
+ srpc_add_buffer, lst_sched_test[i]);
+
+ if (i != 0 && srpc_serv_is_framework(svc)) {
+ /* NB: framework service only needs srpc_service_cd for
+ * one partition, but we allocate for all to make
+ * it easier to implement, it will waste a little
+ * memory but nobody should care about this */
+ continue;
+ }
+
+ for (j = 0; j < nrpcs; j++) {
+ LIBCFS_CPT_ALLOC(rpc, lnet_cpt_table(),
+ i, sizeof(*rpc));
+ if (rpc == NULL) {
+ srpc_service_fini(svc);
+ return -ENOMEM;
+ }
+ list_add(&rpc->srpc_list, &scd->scd_rpc_free);
+ }
+ }
+
+ return 0;
+}
+
+int
+srpc_add_service(struct srpc_service *sv)
+{
+ int id = sv->sv_id;
+
+ LASSERT(0 <= id && id <= SRPC_SERVICE_MAX_ID);
+
+ if (srpc_service_init(sv) != 0)
+ return -ENOMEM;
+
+ spin_lock(&srpc_data.rpc_glock);
+
+ LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING);
+
+ if (srpc_data.rpc_services[id] != NULL) {
+ spin_unlock(&srpc_data.rpc_glock);
+ goto failed;
+ }
+
+ srpc_data.rpc_services[id] = sv;
+ spin_unlock(&srpc_data.rpc_glock);
+
+ CDEBUG(D_NET, "Adding service: id %d, name %s\n", id, sv->sv_name);
+ return 0;
+
+ failed:
+ srpc_service_fini(sv);
+ return -EBUSY;
+}
+
+int
+srpc_remove_service (srpc_service_t *sv)
+{
+ int id = sv->sv_id;
+
+ spin_lock(&srpc_data.rpc_glock);
+
+ if (srpc_data.rpc_services[id] != sv) {
+ spin_unlock(&srpc_data.rpc_glock);
+ return -ENOENT;
+ }
+
+ srpc_data.rpc_services[id] = NULL;
+ spin_unlock(&srpc_data.rpc_glock);
+ return 0;
+}
+
+int
+srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf,
+ int len, int options, lnet_process_id_t peer,
+ lnet_handle_md_t *mdh, srpc_event_t *ev)
+{
+ int rc;
+ lnet_md_t md;
+ lnet_handle_me_t meh;
+
+ rc = LNetMEAttach(portal, peer, matchbits, 0, LNET_UNLINK,
+ local ? LNET_INS_LOCAL : LNET_INS_AFTER, &meh);
+ if (rc != 0) {
+ CERROR ("LNetMEAttach failed: %d\n", rc);
+ LASSERT (rc == -ENOMEM);
+ return -ENOMEM;
+ }
+
+ md.threshold = 1;
+ md.user_ptr = ev;
+ md.start = buf;
+ md.length = len;
+ md.options = options;
+ md.eq_handle = srpc_data.rpc_lnet_eq;
+
+ rc = LNetMDAttach(meh, md, LNET_UNLINK, mdh);
+ if (rc != 0) {
+ CERROR ("LNetMDAttach failed: %d\n", rc);
+ LASSERT (rc == -ENOMEM);
+
+ rc = LNetMEUnlink(meh);
+ LASSERT (rc == 0);
+ return -ENOMEM;
+ }
+
+ CDEBUG (D_NET,
+ "Posted passive RDMA: peer %s, portal %d, matchbits "LPX64"\n",
+ libcfs_id2str(peer), portal, matchbits);
+ return 0;
+}
+
+int
+srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len,
+ int options, lnet_process_id_t peer, lnet_nid_t self,
+ lnet_handle_md_t *mdh, srpc_event_t *ev)
+{
+ int rc;
+ lnet_md_t md;
+
+ md.user_ptr = ev;
+ md.start = buf;
+ md.length = len;
+ md.eq_handle = srpc_data.rpc_lnet_eq;
+ md.threshold = ((options & LNET_MD_OP_GET) != 0) ? 2 : 1;
+ md.options = options & ~(LNET_MD_OP_PUT | LNET_MD_OP_GET);
+
+ rc = LNetMDBind(md, LNET_UNLINK, mdh);
+ if (rc != 0) {
+ CERROR ("LNetMDBind failed: %d\n", rc);
+ LASSERT (rc == -ENOMEM);
+ return -ENOMEM;
+ }
+
+ /* this is kind of an abuse of the LNET_MD_OP_{PUT,GET} options.
+ * they're only meaningful for MDs attached to an ME (i.e. passive
+ * buffers... */
+ if ((options & LNET_MD_OP_PUT) != 0) {
+ rc = LNetPut(self, *mdh, LNET_NOACK_REQ, peer,
+ portal, matchbits, 0, 0);
+ } else {
+ LASSERT ((options & LNET_MD_OP_GET) != 0);
+
+ rc = LNetGet(self, *mdh, peer, portal, matchbits, 0);
+ }
+
+ if (rc != 0) {
+ CERROR ("LNet%s(%s, %d, "LPD64") failed: %d\n",
+ ((options & LNET_MD_OP_PUT) != 0) ? "Put" : "Get",
+ libcfs_id2str(peer), portal, matchbits, rc);
+
+ /* The forthcoming unlink event will complete this operation
+ * with failure, so fall through and return success here.
+ */
+ rc = LNetMDUnlink(*mdh);
+ LASSERT (rc == 0);
+ } else {
+ CDEBUG (D_NET,
+ "Posted active RDMA: peer %s, portal %u, matchbits "LPX64"\n",
+ libcfs_id2str(peer), portal, matchbits);
+ }
+ return 0;
+}
+
+int
+srpc_post_active_rqtbuf(lnet_process_id_t peer, int service, void *buf,
+ int len, lnet_handle_md_t *mdh, srpc_event_t *ev)
+{
+ return srpc_post_active_rdma(srpc_serv_portal(service), service,
+ buf, len, LNET_MD_OP_PUT, peer,
+ LNET_NID_ANY, mdh, ev);
+}
+
+int
+srpc_post_passive_rqtbuf(int service, int local, void *buf, int len,
+ lnet_handle_md_t *mdh, srpc_event_t *ev)
+{
+ lnet_process_id_t any = {0};
+
+ any.nid = LNET_NID_ANY;
+ any.pid = LNET_PID_ANY;
+
+ return srpc_post_passive_rdma(srpc_serv_portal(service),
+ local, service, buf, len,
+ LNET_MD_OP_PUT, any, mdh, ev);
+}
+
+int
+srpc_service_post_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf)
+{
+ struct srpc_service *sv = scd->scd_svc;
+ struct srpc_msg *msg = &buf->buf_msg;
+ int rc;
+
+ LNetInvalidateHandle(&buf->buf_mdh);
+ list_add(&buf->buf_list, &scd->scd_buf_posted);
+ scd->scd_buf_nposted++;
+ spin_unlock(&scd->scd_lock);
+
+ rc = srpc_post_passive_rqtbuf(sv->sv_id,
+ !srpc_serv_is_framework(sv),
+ msg, sizeof(*msg), &buf->buf_mdh,
+ &scd->scd_ev);
+
+ /* At this point, a RPC (new or delayed) may have arrived in
+ * msg and its event handler has been called. So we must add
+ * buf to scd_buf_posted _before_ dropping scd_lock */
+
+ spin_lock(&scd->scd_lock);
+
+ if (rc == 0) {
+ if (!sv->sv_shuttingdown)
+ return 0;
+
+ spin_unlock(&scd->scd_lock);
+ /* srpc_shutdown_service might have tried to unlink me
+ * when my buf_mdh was still invalid */
+ LNetMDUnlink(buf->buf_mdh);
+ spin_lock(&scd->scd_lock);
+ return 0;
+ }
+
+ scd->scd_buf_nposted--;
+ if (sv->sv_shuttingdown)
+ return rc; /* don't allow to change scd_buf_posted */
+
+ list_del(&buf->buf_list);
+ spin_unlock(&scd->scd_lock);
+
+ LIBCFS_FREE(buf, sizeof(*buf));
+
+ spin_lock(&scd->scd_lock);
+ return rc;
+}
+
+int
+srpc_add_buffer(struct swi_workitem *wi)
+{
+ struct srpc_service_cd *scd = wi->swi_workitem.wi_data;
+ struct srpc_buffer *buf;
+ int rc = 0;
+
+ /* it's called by workitem scheduler threads, these threads
+ * should have been set CPT affinity, so buffers will be posted
+ * on CPT local list of Portal */
+ spin_lock(&scd->scd_lock);
+
+ while (scd->scd_buf_adjust > 0 &&
+ !scd->scd_svc->sv_shuttingdown) {
+ scd->scd_buf_adjust--; /* consume it */
+ scd->scd_buf_posting++;
+
+ spin_unlock(&scd->scd_lock);
+
+ LIBCFS_ALLOC(buf, sizeof(*buf));
+ if (buf == NULL) {
+ CERROR("Failed to add new buf to service: %s\n",
+ scd->scd_svc->sv_name);
+ spin_lock(&scd->scd_lock);
+ rc = -ENOMEM;
+ break;
+ }
+
+ spin_lock(&scd->scd_lock);
+ if (scd->scd_svc->sv_shuttingdown) {
+ spin_unlock(&scd->scd_lock);
+ LIBCFS_FREE(buf, sizeof(*buf));
+
+ spin_lock(&scd->scd_lock);
+ rc = -ESHUTDOWN;
+ break;
+ }
+
+ rc = srpc_service_post_buffer(scd, buf);
+ if (rc != 0)
+ break; /* buf has been freed inside */
+
+ LASSERT(scd->scd_buf_posting > 0);
+ scd->scd_buf_posting--;
+ scd->scd_buf_total++;
+ scd->scd_buf_low = MAX(2, scd->scd_buf_total / 4);
+ }
+
+ if (rc != 0) {
+ scd->scd_buf_err_stamp = cfs_time_current_sec();
+ scd->scd_buf_err = rc;
+
+ LASSERT(scd->scd_buf_posting > 0);
+ scd->scd_buf_posting--;
+ }
+
+ spin_unlock(&scd->scd_lock);
+ return 0;
+}
+
+int
+srpc_service_add_buffers(struct srpc_service *sv, int nbuffer)
+{
+ struct srpc_service_cd *scd;
+ int rc = 0;
+ int i;
+
+ LASSERTF(nbuffer > 0, "nbuffer must be positive: %d\n", nbuffer);
+
+ cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
+ spin_lock(&scd->scd_lock);
+
+ scd->scd_buf_err = 0;
+ scd->scd_buf_err_stamp = 0;
+ scd->scd_buf_posting = 0;
+ scd->scd_buf_adjust = nbuffer;
+ /* start to post buffers */
+ swi_schedule_workitem(&scd->scd_buf_wi);
+ spin_unlock(&scd->scd_lock);
+
+ /* framework service only post buffer for one partition */
+ if (srpc_serv_is_framework(sv))
+ break;
+ }
+
+ cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
+ spin_lock(&scd->scd_lock);
+ /*
+ * NB: srpc_service_add_buffers() can be called inside
+ * thread context of lst_sched_serial, and we don't normally
+ * allow to sleep inside thread context of WI scheduler
+ * because it will block current scheduler thread from doing
+ * anything else, even worse, it could deadlock if it's
+ * waiting on result from another WI of the same scheduler.
+ * However, it's safe at here because scd_buf_wi is scheduled
+ * by thread in a different WI scheduler (lst_sched_test),
+ * so we don't have any risk of deadlock, though this could
+ * block all WIs pending on lst_sched_serial for a moment
+ * which is not good but not fatal.
+ */
+ lst_wait_until(scd->scd_buf_err != 0 ||
+ (scd->scd_buf_adjust == 0 &&
+ scd->scd_buf_posting == 0),
+ scd->scd_lock, "waiting for adding buffer\n");
+
+ if (scd->scd_buf_err != 0 && rc == 0)
+ rc = scd->scd_buf_err;
+
+ spin_unlock(&scd->scd_lock);
+ }
+
+ return rc;
+}
+
+void
+srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer)
+{
+ struct srpc_service_cd *scd;
+ int num;
+ int i;
+
+ LASSERT(!sv->sv_shuttingdown);
+
+ cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
+ spin_lock(&scd->scd_lock);
+
+ num = scd->scd_buf_total + scd->scd_buf_posting;
+ scd->scd_buf_adjust -= min(nbuffer, num);
+
+ spin_unlock(&scd->scd_lock);
+ }
+}
+
+/* returns 1 if sv has finished, otherwise 0 */
+int
+srpc_finish_service(struct srpc_service *sv)
+{
+ struct srpc_service_cd *scd;
+ struct srpc_server_rpc *rpc;
+ int i;
+
+ LASSERT(sv->sv_shuttingdown); /* srpc_shutdown_service called */
+
+ cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
+ spin_lock(&scd->scd_lock);
+ if (!swi_deschedule_workitem(&scd->scd_buf_wi))
+ return 0;
+
+ if (scd->scd_buf_nposted > 0) {
+ CDEBUG(D_NET, "waiting for %d posted buffers to unlink",
+ scd->scd_buf_nposted);
+ spin_unlock(&scd->scd_lock);
+ return 0;
+ }
+
+ if (list_empty(&scd->scd_rpc_active)) {
+ spin_unlock(&scd->scd_lock);
+ continue;
+ }
+
+ rpc = list_entry(scd->scd_rpc_active.next,
+ struct srpc_server_rpc, srpc_list);
+ CNETERR("Active RPC %p on shutdown: sv %s, peer %s, "
+ "wi %s scheduled %d running %d, "
+ "ev fired %d type %d status %d lnet %d\n",
+ rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
+ swi_state2str(rpc->srpc_wi.swi_state),
+ rpc->srpc_wi.swi_workitem.wi_scheduled,
+ rpc->srpc_wi.swi_workitem.wi_running,
+ rpc->srpc_ev.ev_fired, rpc->srpc_ev.ev_type,
+ rpc->srpc_ev.ev_status, rpc->srpc_ev.ev_lnet);
+ spin_unlock(&scd->scd_lock);
+ return 0;
+ }
+
+ /* no lock needed from now on */
+ srpc_service_fini(sv);
+ return 1;
+}
+
+/* called with sv->sv_lock held */
+void
+srpc_service_recycle_buffer(struct srpc_service_cd *scd, srpc_buffer_t *buf)
+{
+ if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) {
+ if (srpc_service_post_buffer(scd, buf) != 0) {
+ CWARN("Failed to post %s buffer\n",
+ scd->scd_svc->sv_name);
+ }
+ return;
+ }
+
+ /* service is shutting down, or we want to recycle some buffers */
+ scd->scd_buf_total--;
+
+ if (scd->scd_buf_adjust < 0) {
+ scd->scd_buf_adjust++;
+ if (scd->scd_buf_adjust < 0 &&
+ scd->scd_buf_total == 0 && scd->scd_buf_posting == 0) {
+ CDEBUG(D_INFO,
+ "Try to recyle %d buffers but nothing left\n",
+ scd->scd_buf_adjust);
+ scd->scd_buf_adjust = 0;
+ }
+ }
+
+ spin_unlock(&scd->scd_lock);
+ LIBCFS_FREE(buf, sizeof(*buf));
+ spin_lock(&scd->scd_lock);
+}
+
+void
+srpc_abort_service(struct srpc_service *sv)
+{
+ struct srpc_service_cd *scd;
+ struct srpc_server_rpc *rpc;
+ int i;
+
+ CDEBUG(D_NET, "Aborting service: id %d, name %s\n",
+ sv->sv_id, sv->sv_name);
+
+ cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
+ spin_lock(&scd->scd_lock);
+
+ /* schedule in-flight RPCs to notice the abort, NB:
+ * racing with incoming RPCs; complete fix should make test
+ * RPCs carry session ID in its headers */
+ list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list) {
+ rpc->srpc_aborted = 1;
+ swi_schedule_workitem(&rpc->srpc_wi);
+ }
+
+ spin_unlock(&scd->scd_lock);
+ }
+}
+
+void
+srpc_shutdown_service(srpc_service_t *sv)
+{
+ struct srpc_service_cd *scd;
+ struct srpc_server_rpc *rpc;
+ srpc_buffer_t *buf;
+ int i;
+
+ CDEBUG(D_NET, "Shutting down service: id %d, name %s\n",
+ sv->sv_id, sv->sv_name);
+
+ cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
+ spin_lock(&scd->scd_lock);
+
+ sv->sv_shuttingdown = 1; /* i.e. no new active RPC */
+
+ cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
+ spin_unlock(&scd->scd_lock);
+
+ cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
+ spin_lock(&scd->scd_lock);
+
+ /* schedule in-flight RPCs to notice the shutdown */
+ list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list)
+ swi_schedule_workitem(&rpc->srpc_wi);
+
+ spin_unlock(&scd->scd_lock);
+
+ /* OK to traverse scd_buf_posted without lock, since no one
+ * touches scd_buf_posted now */
+ list_for_each_entry(buf, &scd->scd_buf_posted, buf_list)
+ LNetMDUnlink(buf->buf_mdh);
+ }
+}
+
+int
+srpc_send_request (srpc_client_rpc_t *rpc)
+{
+ srpc_event_t *ev = &rpc->crpc_reqstev;
+ int rc;
+
+ ev->ev_fired = 0;
+ ev->ev_data = rpc;
+ ev->ev_type = SRPC_REQUEST_SENT;
+
+ rc = srpc_post_active_rqtbuf(rpc->crpc_dest, rpc->crpc_service,
+ &rpc->crpc_reqstmsg, sizeof(srpc_msg_t),
+ &rpc->crpc_reqstmdh, ev);
+ if (rc != 0) {
+ LASSERT (rc == -ENOMEM);
+ ev->ev_fired = 1; /* no more event expected */
+ }
+ return rc;
+}
+
+int
+srpc_prepare_reply (srpc_client_rpc_t *rpc)
+{
+ srpc_event_t *ev = &rpc->crpc_replyev;
+ __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.rpyid;
+ int rc;
+
+ ev->ev_fired = 0;
+ ev->ev_data = rpc;
+ ev->ev_type = SRPC_REPLY_RCVD;
+
+ *id = srpc_next_id();
+
+ rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
+ &rpc->crpc_replymsg, sizeof(srpc_msg_t),
+ LNET_MD_OP_PUT, rpc->crpc_dest,
+ &rpc->crpc_replymdh, ev);
+ if (rc != 0) {
+ LASSERT (rc == -ENOMEM);
+ ev->ev_fired = 1; /* no more event expected */
+ }
+ return rc;
+}
+
+int
+srpc_prepare_bulk (srpc_client_rpc_t *rpc)
+{
+ srpc_bulk_t *bk = &rpc->crpc_bulk;
+ srpc_event_t *ev = &rpc->crpc_bulkev;
+ __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid;
+ int rc;
+ int opt;
+
+ LASSERT (bk->bk_niov <= LNET_MAX_IOV);
+
+ if (bk->bk_niov == 0) return 0; /* nothing to do */
+
+ opt = bk->bk_sink ? LNET_MD_OP_PUT : LNET_MD_OP_GET;
+ opt |= LNET_MD_KIOV;
+
+ ev->ev_fired = 0;
+ ev->ev_data = rpc;
+ ev->ev_type = SRPC_BULK_REQ_RCVD;
+
+ *id = srpc_next_id();
+
+ rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
+ &bk->bk_iovs[0], bk->bk_niov, opt,
+ rpc->crpc_dest, &bk->bk_mdh, ev);
+ if (rc != 0) {
+ LASSERT (rc == -ENOMEM);
+ ev->ev_fired = 1; /* no more event expected */
+ }
+ return rc;
+}
+
+int
+srpc_do_bulk (srpc_server_rpc_t *rpc)
+{
+ srpc_event_t *ev = &rpc->srpc_ev;
+ srpc_bulk_t *bk = rpc->srpc_bulk;
+ __u64 id = rpc->srpc_reqstbuf->buf_msg.msg_body.reqst.bulkid;
+ int rc;
+ int opt;
+
+ LASSERT (bk != NULL);
+
+ opt = bk->bk_sink ? LNET_MD_OP_GET : LNET_MD_OP_PUT;
+ opt |= LNET_MD_KIOV;
+
+ ev->ev_fired = 0;
+ ev->ev_data = rpc;
+ ev->ev_type = bk->bk_sink ? SRPC_BULK_GET_RPLD : SRPC_BULK_PUT_SENT;
+
+ rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, id,
+ &bk->bk_iovs[0], bk->bk_niov, opt,
+ rpc->srpc_peer, rpc->srpc_self,
+ &bk->bk_mdh, ev);
+ if (rc != 0)
+ ev->ev_fired = 1; /* no more event expected */
+ return rc;
+}
+
+/* only called from srpc_handle_rpc */
+void
+srpc_server_rpc_done(srpc_server_rpc_t *rpc, int status)
+{
+ struct srpc_service_cd *scd = rpc->srpc_scd;
+ struct srpc_service *sv = scd->scd_svc;
+ srpc_buffer_t *buffer;
+
+ LASSERT (status != 0 || rpc->srpc_wi.swi_state == SWI_STATE_DONE);
+
+ rpc->srpc_status = status;
+
+ CDEBUG_LIMIT (status == 0 ? D_NET : D_NETERROR,
+ "Server RPC %p done: service %s, peer %s, status %s:%d\n",
+ rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
+ swi_state2str(rpc->srpc_wi.swi_state), status);
+
+ if (status != 0) {
+ spin_lock(&srpc_data.rpc_glock);
+ srpc_data.rpc_counters.rpcs_dropped++;
+ spin_unlock(&srpc_data.rpc_glock);
+ }
+
+ if (rpc->srpc_done != NULL)
+ (*rpc->srpc_done) (rpc);
+ LASSERT(rpc->srpc_bulk == NULL);
+
+ spin_lock(&scd->scd_lock);
+
+ if (rpc->srpc_reqstbuf != NULL) {
+ /* NB might drop sv_lock in srpc_service_recycle_buffer, but
+ * sv won't go away for scd_rpc_active must not be empty */
+ srpc_service_recycle_buffer(scd, rpc->srpc_reqstbuf);
+ rpc->srpc_reqstbuf = NULL;
+ }
+
+ list_del(&rpc->srpc_list); /* from scd->scd_rpc_active */
+
+ /*
+ * No one can schedule me now since:
+ * - I'm not on scd_rpc_active.
+ * - all LNet events have been fired.
+ * Cancel pending schedules and prevent future schedule attempts:
+ */
+ LASSERT(rpc->srpc_ev.ev_fired);
+ swi_exit_workitem(&rpc->srpc_wi);
+
+ if (!sv->sv_shuttingdown && !list_empty(&scd->scd_buf_blocked)) {
+ buffer = list_entry(scd->scd_buf_blocked.next,
+ srpc_buffer_t, buf_list);
+ list_del(&buffer->buf_list);
+
+ srpc_init_server_rpc(rpc, scd, buffer);
+ list_add_tail(&rpc->srpc_list, &scd->scd_rpc_active);
+ swi_schedule_workitem(&rpc->srpc_wi);
+ } else {
+ list_add(&rpc->srpc_list, &scd->scd_rpc_free);
+ }
+
+ spin_unlock(&scd->scd_lock);
+ return;
+}
+
+/* handles an incoming RPC */
+int
+srpc_handle_rpc(swi_workitem_t *wi)
+{
+ struct srpc_server_rpc *rpc = wi->swi_workitem.wi_data;
+ struct srpc_service_cd *scd = rpc->srpc_scd;
+ struct srpc_service *sv = scd->scd_svc;
+ srpc_event_t *ev = &rpc->srpc_ev;
+ int rc = 0;
+
+ LASSERT(wi == &rpc->srpc_wi);
+
+ spin_lock(&scd->scd_lock);
+
+ if (sv->sv_shuttingdown || rpc->srpc_aborted) {
+ spin_unlock(&scd->scd_lock);
+
+ if (rpc->srpc_bulk != NULL)
+ LNetMDUnlink(rpc->srpc_bulk->bk_mdh);
+ LNetMDUnlink(rpc->srpc_replymdh);
+
+ if (ev->ev_fired) { /* no more event, OK to finish */
+ srpc_server_rpc_done(rpc, -ESHUTDOWN);
+ return 1;
+ }
+ return 0;
+ }
+
+ spin_unlock(&scd->scd_lock);
+
+ switch (wi->swi_state) {
+ default:
+ LBUG ();
+ case SWI_STATE_NEWBORN: {
+ srpc_msg_t *msg;
+ srpc_generic_reply_t *reply;
+
+ msg = &rpc->srpc_reqstbuf->buf_msg;
+ reply = &rpc->srpc_replymsg.msg_body.reply;
+
+ if (msg->msg_magic == 0) {
+ /* moaned already in srpc_lnet_ev_handler */
+ srpc_server_rpc_done(rpc, EBADMSG);
+ return 1;
+ }
+
+ srpc_unpack_msg_hdr(msg);
+ if (msg->msg_version != SRPC_MSG_VERSION) {
+ CWARN("Version mismatch: %u, %u expected, from %s\n",
+ msg->msg_version, SRPC_MSG_VERSION,
+ libcfs_id2str(rpc->srpc_peer));
+ reply->status = EPROTO;
+ /* drop through and send reply */
+ } else {
+ reply->status = 0;
+ rc = (*sv->sv_handler)(rpc);
+ LASSERT(reply->status == 0 || !rpc->srpc_bulk);
+ if (rc != 0) {
+ srpc_server_rpc_done(rpc, rc);
+ return 1;
+ }
+ }
+
+ wi->swi_state = SWI_STATE_BULK_STARTED;
+
+ if (rpc->srpc_bulk != NULL) {
+ rc = srpc_do_bulk(rpc);
+ if (rc == 0)
+ return 0; /* wait for bulk */
+
+ LASSERT (ev->ev_fired);
+ ev->ev_status = rc;
+ }
+ }
+ case SWI_STATE_BULK_STARTED:
+ LASSERT (rpc->srpc_bulk == NULL || ev->ev_fired);
+
+ if (rpc->srpc_bulk != NULL) {
+ rc = ev->ev_status;
+
+ if (sv->sv_bulk_ready != NULL)
+ rc = (*sv->sv_bulk_ready) (rpc, rc);
+
+ if (rc != 0) {
+ srpc_server_rpc_done(rpc, rc);
+ return 1;
+ }
+ }
+
+ wi->swi_state = SWI_STATE_REPLY_SUBMITTED;
+ rc = srpc_send_reply(rpc);
+ if (rc == 0)
+ return 0; /* wait for reply */
+ srpc_server_rpc_done(rpc, rc);
+ return 1;
+
+ case SWI_STATE_REPLY_SUBMITTED:
+ if (!ev->ev_fired) {
+ CERROR("RPC %p: bulk %p, service %d\n",
+ rpc, rpc->srpc_bulk, sv->sv_id);
+ CERROR("Event: status %d, type %d, lnet %d\n",
+ ev->ev_status, ev->ev_type, ev->ev_lnet);
+ LASSERT (ev->ev_fired);
+ }
+
+ wi->swi_state = SWI_STATE_DONE;
+ srpc_server_rpc_done(rpc, ev->ev_status);
+ return 1;
+ }
+
+ return 0;
+}
+
+void
+srpc_client_rpc_expired (void *data)
+{
+ srpc_client_rpc_t *rpc = data;
+
+ CWARN ("Client RPC expired: service %d, peer %s, timeout %d.\n",
+ rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
+ rpc->crpc_timeout);
+
+ spin_lock(&rpc->crpc_lock);
+
+ rpc->crpc_timeout = 0;
+ srpc_abort_rpc(rpc, -ETIMEDOUT);
+
+ spin_unlock(&rpc->crpc_lock);
+
+ spin_lock(&srpc_data.rpc_glock);
+ srpc_data.rpc_counters.rpcs_expired++;
+ spin_unlock(&srpc_data.rpc_glock);
+}
+
+inline void
+srpc_add_client_rpc_timer (srpc_client_rpc_t *rpc)
+{
+ stt_timer_t *timer = &rpc->crpc_timer;
+
+ if (rpc->crpc_timeout == 0) return;
+
+ INIT_LIST_HEAD(&timer->stt_list);
+ timer->stt_data = rpc;
+ timer->stt_func = srpc_client_rpc_expired;
+ timer->stt_expires = cfs_time_add(rpc->crpc_timeout,
+ cfs_time_current_sec());
+ stt_add_timer(timer);
+ return;
+}
+
+/*
+ * Called with rpc->crpc_lock held.
+ *
+ * Upon exit the RPC expiry timer is not queued and the handler is not
+ * running on any CPU. */
+void
+srpc_del_client_rpc_timer (srpc_client_rpc_t *rpc)
+{
+ /* timer not planted or already exploded */
+ if (rpc->crpc_timeout == 0)
+ return;
+
+ /* timer sucessfully defused */
+ if (stt_del_timer(&rpc->crpc_timer))
+ return;
+
+ /* timer detonated, wait for it to explode */
+ while (rpc->crpc_timeout != 0) {
+ spin_unlock(&rpc->crpc_lock);
+
+ schedule();
+
+ spin_lock(&rpc->crpc_lock);
+ }
+}
+
+void
+srpc_client_rpc_done (srpc_client_rpc_t *rpc, int status)
+{
+ swi_workitem_t *wi = &rpc->crpc_wi;
+
+ LASSERT(status != 0 || wi->swi_state == SWI_STATE_DONE);
+
+ spin_lock(&rpc->crpc_lock);
+
+ rpc->crpc_closed = 1;
+ if (rpc->crpc_status == 0)
+ rpc->crpc_status = status;
+
+ srpc_del_client_rpc_timer(rpc);
+
+ CDEBUG_LIMIT ((status == 0) ? D_NET : D_NETERROR,
+ "Client RPC done: service %d, peer %s, status %s:%d:%d\n",
+ rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
+ swi_state2str(wi->swi_state), rpc->crpc_aborted, status);
+
+ /*
+ * No one can schedule me now since:
+ * - RPC timer has been defused.
+ * - all LNet events have been fired.
+ * - crpc_closed has been set, preventing srpc_abort_rpc from
+ * scheduling me.
+ * Cancel pending schedules and prevent future schedule attempts:
+ */
+ LASSERT (!srpc_event_pending(rpc));
+ swi_exit_workitem(wi);
+
+ spin_unlock(&rpc->crpc_lock);
+
+ (*rpc->crpc_done)(rpc);
+ return;
+}
+
+/* sends an outgoing RPC */
+int
+srpc_send_rpc (swi_workitem_t *wi)
+{
+ int rc = 0;
+ srpc_client_rpc_t *rpc;
+ srpc_msg_t *reply;
+ int do_bulk;
+
+ LASSERT(wi != NULL);
+
+ rpc = wi->swi_workitem.wi_data;
+
+ LASSERT (rpc != NULL);
+ LASSERT (wi == &rpc->crpc_wi);
+
+ reply = &rpc->crpc_replymsg;
+ do_bulk = rpc->crpc_bulk.bk_niov > 0;
+
+ spin_lock(&rpc->crpc_lock);
+
+ if (rpc->crpc_aborted) {
+ spin_unlock(&rpc->crpc_lock);
+ goto abort;
+ }
+
+ spin_unlock(&rpc->crpc_lock);
+
+ switch (wi->swi_state) {
+ default:
+ LBUG ();
+ case SWI_STATE_NEWBORN:
+ LASSERT (!srpc_event_pending(rpc));
+
+ rc = srpc_prepare_reply(rpc);
+ if (rc != 0) {
+ srpc_client_rpc_done(rpc, rc);
+ return 1;
+ }
+
+ rc = srpc_prepare_bulk(rpc);
+ if (rc != 0) break;
+
+ wi->swi_state = SWI_STATE_REQUEST_SUBMITTED;
+ rc = srpc_send_request(rpc);
+ break;
+
+ case SWI_STATE_REQUEST_SUBMITTED:
+ /* CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any
+ * order; however, they're processed in a strict order:
+ * rqt, rpy, and bulk. */
+ if (!rpc->crpc_reqstev.ev_fired) break;
+
+ rc = rpc->crpc_reqstev.ev_status;
+ if (rc != 0) break;
+
+ wi->swi_state = SWI_STATE_REQUEST_SENT;
+ /* perhaps more events, fall thru */
+ case SWI_STATE_REQUEST_SENT: {
+ srpc_msg_type_t type = srpc_service2reply(rpc->crpc_service);
+
+ if (!rpc->crpc_replyev.ev_fired) break;
+
+ rc = rpc->crpc_replyev.ev_status;
+ if (rc != 0) break;
+
+ srpc_unpack_msg_hdr(reply);
+ if (reply->msg_type != type ||
+ (reply->msg_magic != SRPC_MSG_MAGIC &&
+ reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
+ CWARN ("Bad message from %s: type %u (%d expected),"
+ " magic %u (%d expected).\n",
+ libcfs_id2str(rpc->crpc_dest),
+ reply->msg_type, type,
+ reply->msg_magic, SRPC_MSG_MAGIC);
+ rc = -EBADMSG;
+ break;
+ }
+
+ if (do_bulk && reply->msg_body.reply.status != 0) {
+ CWARN ("Remote error %d at %s, unlink bulk buffer in "
+ "case peer didn't initiate bulk transfer\n",
+ reply->msg_body.reply.status,
+ libcfs_id2str(rpc->crpc_dest));
+ LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
+ }
+
+ wi->swi_state = SWI_STATE_REPLY_RECEIVED;
+ }
+ case SWI_STATE_REPLY_RECEIVED:
+ if (do_bulk && !rpc->crpc_bulkev.ev_fired) break;
+
+ rc = do_bulk ? rpc->crpc_bulkev.ev_status : 0;
+
+ /* Bulk buffer was unlinked due to remote error. Clear error
+ * since reply buffer still contains valid data.
+ * NB rpc->crpc_done shouldn't look into bulk data in case of
+ * remote error. */
+ if (do_bulk && rpc->crpc_bulkev.ev_lnet == LNET_EVENT_UNLINK &&
+ rpc->crpc_status == 0 && reply->msg_body.reply.status != 0)
+ rc = 0;
+
+ wi->swi_state = SWI_STATE_DONE;
+ srpc_client_rpc_done(rpc, rc);
+ return 1;
+ }
+
+ if (rc != 0) {
+ spin_lock(&rpc->crpc_lock);
+ srpc_abort_rpc(rpc, rc);
+ spin_unlock(&rpc->crpc_lock);
+ }
+
+abort:
+ if (rpc->crpc_aborted) {
+ LNetMDUnlink(rpc->crpc_reqstmdh);
+ LNetMDUnlink(rpc->crpc_replymdh);
+ LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
+
+ if (!srpc_event_pending(rpc)) {
+ srpc_client_rpc_done(rpc, -EINTR);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+srpc_client_rpc_t *
+srpc_create_client_rpc (lnet_process_id_t peer, int service,
+ int nbulkiov, int bulklen,
+ void (*rpc_done)(srpc_client_rpc_t *),
+ void (*rpc_fini)(srpc_client_rpc_t *), void *priv)
+{
+ srpc_client_rpc_t *rpc;
+
+ LIBCFS_ALLOC(rpc, offsetof(srpc_client_rpc_t,
+ crpc_bulk.bk_iovs[nbulkiov]));
+ if (rpc == NULL)
+ return NULL;
+
+ srpc_init_client_rpc(rpc, peer, service, nbulkiov,
+ bulklen, rpc_done, rpc_fini, priv);
+ return rpc;
+}
+
+/* called with rpc->crpc_lock held */
+void
+srpc_abort_rpc (srpc_client_rpc_t *rpc, int why)
+{
+ LASSERT (why != 0);
+
+ if (rpc->crpc_aborted || /* already aborted */
+ rpc->crpc_closed) /* callback imminent */
+ return;
+
+ CDEBUG (D_NET,
+ "Aborting RPC: service %d, peer %s, state %s, why %d\n",
+ rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
+ swi_state2str(rpc->crpc_wi.swi_state), why);
+
+ rpc->crpc_aborted = 1;
+ rpc->crpc_status = why;
+ swi_schedule_workitem(&rpc->crpc_wi);
+ return;
+}
+
+/* called with rpc->crpc_lock held */
+void
+srpc_post_rpc (srpc_client_rpc_t *rpc)
+{
+ LASSERT (!rpc->crpc_aborted);
+ LASSERT (srpc_data.rpc_state == SRPC_STATE_RUNNING);
+
+ CDEBUG (D_NET, "Posting RPC: peer %s, service %d, timeout %d\n",
+ libcfs_id2str(rpc->crpc_dest), rpc->crpc_service,
+ rpc->crpc_timeout);
+
+ srpc_add_client_rpc_timer(rpc);
+ swi_schedule_workitem(&rpc->crpc_wi);
+ return;
+}
+
+
+int
+srpc_send_reply(struct srpc_server_rpc *rpc)
+{
+ srpc_event_t *ev = &rpc->srpc_ev;
+ struct srpc_msg *msg = &rpc->srpc_replymsg;
+ struct srpc_buffer *buffer = rpc->srpc_reqstbuf;
+ struct srpc_service_cd *scd = rpc->srpc_scd;
+ struct srpc_service *sv = scd->scd_svc;
+ __u64 rpyid;
+ int rc;
+
+ LASSERT(buffer != NULL);
+ rpyid = buffer->buf_msg.msg_body.reqst.rpyid;
+
+ spin_lock(&scd->scd_lock);
+
+ if (!sv->sv_shuttingdown && !srpc_serv_is_framework(sv)) {
+ /* Repost buffer before replying since test client
+ * might send me another RPC once it gets the reply */
+ if (srpc_service_post_buffer(scd, buffer) != 0)
+ CWARN("Failed to repost %s buffer\n", sv->sv_name);
+ rpc->srpc_reqstbuf = NULL;
+ }
+
+ spin_unlock(&scd->scd_lock);
+
+ ev->ev_fired = 0;
+ ev->ev_data = rpc;
+ ev->ev_type = SRPC_REPLY_SENT;
+
+ msg->msg_magic = SRPC_MSG_MAGIC;
+ msg->msg_version = SRPC_MSG_VERSION;
+ msg->msg_type = srpc_service2reply(sv->sv_id);
+
+ rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, rpyid, msg,
+ sizeof(*msg), LNET_MD_OP_PUT,
+ rpc->srpc_peer, rpc->srpc_self,
+ &rpc->srpc_replymdh, ev);
+ if (rc != 0)
+ ev->ev_fired = 1; /* no more event expected */
+ return rc;
+}
+
+/* when in kernel always called with LNET_LOCK() held, and in thread context */
+void
+srpc_lnet_ev_handler(lnet_event_t *ev)
+{
+ struct srpc_service_cd *scd;
+ srpc_event_t *rpcev = ev->md.user_ptr;
+ srpc_client_rpc_t *crpc;
+ srpc_server_rpc_t *srpc;
+ srpc_buffer_t *buffer;
+ srpc_service_t *sv;
+ srpc_msg_t *msg;
+ srpc_msg_type_t type;
+
+ LASSERT (!in_interrupt());
+
+ if (ev->status != 0) {
+ spin_lock(&srpc_data.rpc_glock);
+ srpc_data.rpc_counters.errors++;
+ spin_unlock(&srpc_data.rpc_glock);
+ }
+
+ rpcev->ev_lnet = ev->type;
+
+ switch (rpcev->ev_type) {
+ default:
+ CERROR("Unknown event: status %d, type %d, lnet %d\n",
+ rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet);
+ LBUG ();
+ case SRPC_REQUEST_SENT:
+ if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) {
+ spin_lock(&srpc_data.rpc_glock);
+ srpc_data.rpc_counters.rpcs_sent++;
+ spin_unlock(&srpc_data.rpc_glock);
+ }
+ case SRPC_REPLY_RCVD:
+ case SRPC_BULK_REQ_RCVD:
+ crpc = rpcev->ev_data;
+
+ if (rpcev != &crpc->crpc_reqstev &&
+ rpcev != &crpc->crpc_replyev &&
+ rpcev != &crpc->crpc_bulkev) {
+ CERROR("rpcev %p, crpc %p, reqstev %p, replyev %p, bulkev %p\n",
+ rpcev, crpc, &crpc->crpc_reqstev,
+ &crpc->crpc_replyev, &crpc->crpc_bulkev);
+ CERROR("Bad event: status %d, type %d, lnet %d\n",
+ rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet);
+ LBUG ();
+ }
+
+ spin_lock(&crpc->crpc_lock);
+
+ LASSERT(rpcev->ev_fired == 0);
+ rpcev->ev_fired = 1;
+ rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
+ -EINTR : ev->status;
+ swi_schedule_workitem(&crpc->crpc_wi);
+
+ spin_unlock(&crpc->crpc_lock);
+ break;
+
+ case SRPC_REQUEST_RCVD:
+ scd = rpcev->ev_data;
+ sv = scd->scd_svc;
+
+ LASSERT(rpcev == &scd->scd_ev);
+
+ spin_lock(&scd->scd_lock);
+
+ LASSERT (ev->unlinked);
+ LASSERT (ev->type == LNET_EVENT_PUT ||
+ ev->type == LNET_EVENT_UNLINK);
+ LASSERT (ev->type != LNET_EVENT_UNLINK ||
+ sv->sv_shuttingdown);
+
+ buffer = container_of(ev->md.start, srpc_buffer_t, buf_msg);
+ buffer->buf_peer = ev->initiator;
+ buffer->buf_self = ev->target.nid;
+
+ LASSERT(scd->scd_buf_nposted > 0);
+ scd->scd_buf_nposted--;
+
+ if (sv->sv_shuttingdown) {
+ /* Leave buffer on scd->scd_buf_nposted since
+ * srpc_finish_service needs to traverse it. */
+ spin_unlock(&scd->scd_lock);
+ break;
+ }
+
+ if (scd->scd_buf_err_stamp != 0 &&
+ scd->scd_buf_err_stamp < cfs_time_current_sec()) {
+ /* re-enable adding buffer */
+ scd->scd_buf_err_stamp = 0;
+ scd->scd_buf_err = 0;
+ }
+
+ if (scd->scd_buf_err == 0 && /* adding buffer is enabled */
+ scd->scd_buf_adjust == 0 &&
+ scd->scd_buf_nposted < scd->scd_buf_low) {
+ scd->scd_buf_adjust = MAX(scd->scd_buf_total / 2,
+ SFW_TEST_WI_MIN);
+ swi_schedule_workitem(&scd->scd_buf_wi);
+ }
+
+ list_del(&buffer->buf_list); /* from scd->scd_buf_posted */
+ msg = &buffer->buf_msg;
+ type = srpc_service2request(sv->sv_id);
+
+ if (ev->status != 0 || ev->mlength != sizeof(*msg) ||
+ (msg->msg_type != type &&
+ msg->msg_type != __swab32(type)) ||
+ (msg->msg_magic != SRPC_MSG_MAGIC &&
+ msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
+ CERROR ("Dropping RPC (%s) from %s: "
+ "status %d mlength %d type %u magic %u.\n",
+ sv->sv_name, libcfs_id2str(ev->initiator),
+ ev->status, ev->mlength,
+ msg->msg_type, msg->msg_magic);
+
+ /* NB can't call srpc_service_recycle_buffer here since
+ * it may call LNetM[DE]Attach. The invalid magic tells
+ * srpc_handle_rpc to drop this RPC */
+ msg->msg_magic = 0;
+ }
+
+ if (!list_empty(&scd->scd_rpc_free)) {
+ srpc = list_entry(scd->scd_rpc_free.next,
+ struct srpc_server_rpc,
+ srpc_list);
+ list_del(&srpc->srpc_list);
+
+ srpc_init_server_rpc(srpc, scd, buffer);
+ list_add_tail(&srpc->srpc_list,
+ &scd->scd_rpc_active);
+ swi_schedule_workitem(&srpc->srpc_wi);
+ } else {
+ list_add_tail(&buffer->buf_list,
+ &scd->scd_buf_blocked);
+ }
+
+ spin_unlock(&scd->scd_lock);
+
+ spin_lock(&srpc_data.rpc_glock);
+ srpc_data.rpc_counters.rpcs_rcvd++;
+ spin_unlock(&srpc_data.rpc_glock);
+ break;
+
+ case SRPC_BULK_GET_RPLD:
+ LASSERT (ev->type == LNET_EVENT_SEND ||
+ ev->type == LNET_EVENT_REPLY ||
+ ev->type == LNET_EVENT_UNLINK);
+
+ if (!ev->unlinked)
+ break; /* wait for final event */
+
+ case SRPC_BULK_PUT_SENT:
+ if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) {
+ spin_lock(&srpc_data.rpc_glock);
+
+ if (rpcev->ev_type == SRPC_BULK_GET_RPLD)
+ srpc_data.rpc_counters.bulk_get += ev->mlength;
+ else
+ srpc_data.rpc_counters.bulk_put += ev->mlength;
+
+ spin_unlock(&srpc_data.rpc_glock);
+ }
+ case SRPC_REPLY_SENT:
+ srpc = rpcev->ev_data;
+ scd = srpc->srpc_scd;
+
+ LASSERT(rpcev == &srpc->srpc_ev);
+
+ spin_lock(&scd->scd_lock);
+
+ rpcev->ev_fired = 1;
+ rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
+ -EINTR : ev->status;
+ swi_schedule_workitem(&srpc->srpc_wi);
+
+ spin_unlock(&scd->scd_lock);
+ break;
+ }
+}
+
+
+int
+srpc_startup (void)
+{
+ int rc;
+
+ memset(&srpc_data, 0, sizeof(struct smoketest_rpc));
+ spin_lock_init(&srpc_data.rpc_glock);
+
+ /* 1 second pause to avoid timestamp reuse */
+ cfs_pause(cfs_time_seconds(1));
+ srpc_data.rpc_matchbits = ((__u64) cfs_time_current_sec()) << 48;
+
+ srpc_data.rpc_state = SRPC_STATE_NONE;
+
+ rc = LNetNIInit(LUSTRE_SRV_LNET_PID);
+ if (rc < 0) {
+ CERROR ("LNetNIInit() has failed: %d\n", rc);
+ return rc;
+ }
+
+ srpc_data.rpc_state = SRPC_STATE_NI_INIT;
+
+ LNetInvalidateHandle(&srpc_data.rpc_lnet_eq);
+ rc = LNetEQAlloc(0, srpc_lnet_ev_handler, &srpc_data.rpc_lnet_eq);
+ if (rc != 0) {
+ CERROR("LNetEQAlloc() has failed: %d\n", rc);
+ goto bail;
+ }
+
+ rc = LNetSetLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
+ LASSERT(rc == 0);
+ rc = LNetSetLazyPortal(SRPC_REQUEST_PORTAL);
+ LASSERT(rc == 0);
+
+ srpc_data.rpc_state = SRPC_STATE_EQ_INIT;
+
+ rc = stt_startup();
+
+bail:
+ if (rc != 0)
+ srpc_shutdown();
+ else
+ srpc_data.rpc_state = SRPC_STATE_RUNNING;
+
+ return rc;
+}
+
+void
+srpc_shutdown (void)
+{
+ int i;
+ int rc;
+ int state;
+
+ state = srpc_data.rpc_state;
+ srpc_data.rpc_state = SRPC_STATE_STOPPING;
+
+ switch (state) {
+ default:
+ LBUG ();
+ case SRPC_STATE_RUNNING:
+ spin_lock(&srpc_data.rpc_glock);
+
+ for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) {
+ srpc_service_t *sv = srpc_data.rpc_services[i];
+
+ LASSERTF (sv == NULL,
+ "service not empty: id %d, name %s\n",
+ i, sv->sv_name);
+ }
+
+ spin_unlock(&srpc_data.rpc_glock);
+
+ stt_shutdown();
+
+ case SRPC_STATE_EQ_INIT:
+ rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
+ rc = LNetClearLazyPortal(SRPC_REQUEST_PORTAL);
+ LASSERT (rc == 0);
+ rc = LNetEQFree(srpc_data.rpc_lnet_eq);
+ LASSERT (rc == 0); /* the EQ should have no user by now */
+
+ case SRPC_STATE_NI_INIT:
+ LNetNIFini();
+ }
+
+ return;
+}
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.h b/drivers/staging/lustre/lnet/selftest/rpc.h
new file mode 100644
index 000000000000..b905d49a351f
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/rpc.h
@@ -0,0 +1,302 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __SELFTEST_RPC_H__
+#define __SELFTEST_RPC_H__
+
+#include <linux/lnet/lnetst.h>
+
+/*
+ * LST wired structures
+ *
+ * XXX: *REPLY == *REQST + 1
+ */
+typedef enum {
+ SRPC_MSG_MKSN_REQST = 0,
+ SRPC_MSG_MKSN_REPLY = 1,
+ SRPC_MSG_RMSN_REQST = 2,
+ SRPC_MSG_RMSN_REPLY = 3,
+ SRPC_MSG_BATCH_REQST = 4,
+ SRPC_MSG_BATCH_REPLY = 5,
+ SRPC_MSG_STAT_REQST = 6,
+ SRPC_MSG_STAT_REPLY = 7,
+ SRPC_MSG_TEST_REQST = 8,
+ SRPC_MSG_TEST_REPLY = 9,
+ SRPC_MSG_DEBUG_REQST = 10,
+ SRPC_MSG_DEBUG_REPLY = 11,
+ SRPC_MSG_BRW_REQST = 12,
+ SRPC_MSG_BRW_REPLY = 13,
+ SRPC_MSG_PING_REQST = 14,
+ SRPC_MSG_PING_REPLY = 15,
+ SRPC_MSG_JOIN_REQST = 16,
+ SRPC_MSG_JOIN_REPLY = 17,
+} srpc_msg_type_t;
+
+
+/* CAVEAT EMPTOR:
+ * All srpc_*_reqst_t's 1st field must be matchbits of reply buffer,
+ * and 2nd field matchbits of bulk buffer if any.
+ *
+ * All srpc_*_reply_t's 1st field must be a __u32 status, and 2nd field
+ * session id if needed.
+ */
+typedef struct {
+ __u64 rpyid; /* reply buffer matchbits */
+ __u64 bulkid; /* bulk buffer matchbits */
+} WIRE_ATTR srpc_generic_reqst_t;
+
+typedef struct {
+ __u32 status;
+ lst_sid_t sid;
+} WIRE_ATTR srpc_generic_reply_t;
+
+/* FRAMEWORK RPCs */
+typedef struct {
+ __u64 mksn_rpyid; /* reply buffer matchbits */
+ lst_sid_t mksn_sid; /* session id */
+ __u32 mksn_force; /* use brute force */
+ char mksn_name[LST_NAME_SIZE];
+} WIRE_ATTR srpc_mksn_reqst_t; /* make session request */
+
+typedef struct {
+ __u32 mksn_status; /* session status */
+ lst_sid_t mksn_sid; /* session id */
+ __u32 mksn_timeout; /* session timeout */
+ char mksn_name[LST_NAME_SIZE];
+} WIRE_ATTR srpc_mksn_reply_t; /* make session reply */
+
+typedef struct {
+ __u64 rmsn_rpyid; /* reply buffer matchbits */
+ lst_sid_t rmsn_sid; /* session id */
+} WIRE_ATTR srpc_rmsn_reqst_t; /* remove session request */
+
+typedef struct {
+ __u32 rmsn_status;
+ lst_sid_t rmsn_sid; /* session id */
+} WIRE_ATTR srpc_rmsn_reply_t; /* remove session reply */
+
+typedef struct {
+ __u64 join_rpyid; /* reply buffer matchbits */
+ lst_sid_t join_sid; /* session id to join */
+ char join_group[LST_NAME_SIZE]; /* group name */
+} WIRE_ATTR srpc_join_reqst_t;
+
+typedef struct {
+ __u32 join_status; /* returned status */
+ lst_sid_t join_sid; /* session id */
+ __u32 join_timeout; /* # seconds' inactivity to expire */
+ char join_session[LST_NAME_SIZE]; /* session name */
+} WIRE_ATTR srpc_join_reply_t;
+
+typedef struct {
+ __u64 dbg_rpyid; /* reply buffer matchbits */
+ lst_sid_t dbg_sid; /* session id */
+ __u32 dbg_flags; /* bitmap of debug */
+} WIRE_ATTR srpc_debug_reqst_t;
+
+typedef struct {
+ __u32 dbg_status; /* returned code */
+ lst_sid_t dbg_sid; /* session id */
+ __u32 dbg_timeout; /* session timeout */
+ __u32 dbg_nbatch; /* # of batches in the node */
+ char dbg_name[LST_NAME_SIZE]; /* session name */
+} WIRE_ATTR srpc_debug_reply_t;
+
+#define SRPC_BATCH_OPC_RUN 1
+#define SRPC_BATCH_OPC_STOP 2
+#define SRPC_BATCH_OPC_QUERY 3
+
+typedef struct {
+ __u64 bar_rpyid; /* reply buffer matchbits */
+ lst_sid_t bar_sid; /* session id */
+ lst_bid_t bar_bid; /* batch id */
+ __u32 bar_opc; /* create/start/stop batch */
+ __u32 bar_testidx; /* index of test */
+ __u32 bar_arg; /* parameters */
+} WIRE_ATTR srpc_batch_reqst_t;
+
+typedef struct {
+ __u32 bar_status; /* status of request */
+ lst_sid_t bar_sid; /* session id */
+ __u32 bar_active; /* # of active tests in batch/test */
+ __u32 bar_time; /* remained time */
+} WIRE_ATTR srpc_batch_reply_t;
+
+typedef struct {
+ __u64 str_rpyid; /* reply buffer matchbits */
+ lst_sid_t str_sid; /* session id */
+ __u32 str_type; /* type of stat */
+} WIRE_ATTR srpc_stat_reqst_t;
+
+typedef struct {
+ __u32 str_status;
+ lst_sid_t str_sid;
+ sfw_counters_t str_fw;
+ srpc_counters_t str_rpc;
+ lnet_counters_t str_lnet;
+} WIRE_ATTR srpc_stat_reply_t;
+
+typedef struct {
+ __u32 blk_opc; /* bulk operation code */
+ __u32 blk_npg; /* # of pages */
+ __u32 blk_flags; /* reserved flags */
+} WIRE_ATTR test_bulk_req_t;
+
+typedef struct {
+ /** bulk operation code */
+ __u16 blk_opc;
+ /** data check flags */
+ __u16 blk_flags;
+ /** data length */
+ __u32 blk_len;
+ /** reserved: offset */
+ __u32 blk_offset;
+} WIRE_ATTR test_bulk_req_v1_t;
+
+typedef struct {
+ __u32 png_size; /* size of ping message */
+ __u32 png_flags; /* reserved flags */
+} WIRE_ATTR test_ping_req_t;
+
+typedef struct {
+ __u64 tsr_rpyid; /* reply buffer matchbits */
+ __u64 tsr_bulkid; /* bulk buffer matchbits */
+ lst_sid_t tsr_sid; /* session id */
+ lst_bid_t tsr_bid; /* batch id */
+ __u32 tsr_service; /* test type: bulk|ping|... */
+ /* test client loop count or # server buffers needed */
+ __u32 tsr_loop;
+ __u32 tsr_concur; /* concurrency of test */
+ __u8 tsr_is_client; /* is test client or not */
+ __u8 tsr_stop_onerr; /* stop on error */
+ __u32 tsr_ndest; /* # of dest nodes */
+
+ union {
+ test_ping_req_t ping;
+ test_bulk_req_t bulk_v0;
+ test_bulk_req_v1_t bulk_v1;
+ } tsr_u;
+} WIRE_ATTR srpc_test_reqst_t;
+
+typedef struct {
+ __u32 tsr_status; /* returned code */
+ lst_sid_t tsr_sid;
+} WIRE_ATTR srpc_test_reply_t;
+
+/* TEST RPCs */
+typedef struct {
+ __u64 pnr_rpyid;
+ __u32 pnr_magic;
+ __u32 pnr_seq;
+ __u64 pnr_time_sec;
+ __u64 pnr_time_usec;
+} WIRE_ATTR srpc_ping_reqst_t;
+
+typedef struct {
+ __u32 pnr_status;
+ __u32 pnr_magic;
+ __u32 pnr_seq;
+} WIRE_ATTR srpc_ping_reply_t;
+
+typedef struct {
+ __u64 brw_rpyid; /* reply buffer matchbits */
+ __u64 brw_bulkid; /* bulk buffer matchbits */
+ __u32 brw_rw; /* read or write */
+ __u32 brw_len; /* bulk data len */
+ __u32 brw_flags; /* bulk data patterns */
+} WIRE_ATTR srpc_brw_reqst_t; /* bulk r/w request */
+
+typedef struct {
+ __u32 brw_status;
+} WIRE_ATTR srpc_brw_reply_t; /* bulk r/w reply */
+
+#define SRPC_MSG_MAGIC 0xeeb0f00d
+#define SRPC_MSG_VERSION 1
+
+typedef struct srpc_msg {
+ /** magic number */
+ __u32 msg_magic;
+ /** message version number */
+ __u32 msg_version;
+ /** type of message body: srpc_msg_type_t */
+ __u32 msg_type;
+ __u32 msg_reserved0;
+ __u32 msg_reserved1;
+ /** test session features */
+ __u32 msg_ses_feats;
+ union {
+ srpc_generic_reqst_t reqst;
+ srpc_generic_reply_t reply;
+
+ srpc_mksn_reqst_t mksn_reqst;
+ srpc_mksn_reply_t mksn_reply;
+ srpc_rmsn_reqst_t rmsn_reqst;
+ srpc_rmsn_reply_t rmsn_reply;
+ srpc_debug_reqst_t dbg_reqst;
+ srpc_debug_reply_t dbg_reply;
+ srpc_batch_reqst_t bat_reqst;
+ srpc_batch_reply_t bat_reply;
+ srpc_stat_reqst_t stat_reqst;
+ srpc_stat_reply_t stat_reply;
+ srpc_test_reqst_t tes_reqst;
+ srpc_test_reply_t tes_reply;
+ srpc_join_reqst_t join_reqst;
+ srpc_join_reply_t join_reply;
+
+ srpc_ping_reqst_t ping_reqst;
+ srpc_ping_reply_t ping_reply;
+ srpc_brw_reqst_t brw_reqst;
+ srpc_brw_reply_t brw_reply;
+ } msg_body;
+} WIRE_ATTR srpc_msg_t;
+
+static inline void
+srpc_unpack_msg_hdr(srpc_msg_t *msg)
+{
+ if (msg->msg_magic == SRPC_MSG_MAGIC)
+ return; /* no flipping needed */
+
+ /* We do not swap the magic number here as it is needed to
+ determine whether the body needs to be swapped. */
+ /* __swab32s(&msg->msg_magic); */
+ __swab32s(&msg->msg_type);
+ __swab32s(&msg->msg_version);
+ __swab32s(&msg->msg_ses_feats);
+ __swab32s(&msg->msg_reserved0);
+ __swab32s(&msg->msg_reserved1);
+}
+
+#endif /* __SELFTEST_RPC_H__ */
diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h
new file mode 100644
index 000000000000..8053b0563ff3
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/selftest.h
@@ -0,0 +1,611 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ * copy of GPLv2].
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/selftest/selftest.h
+ *
+ * Author: Isaac Huang <isaac@clusterfs.com>
+ */
+#ifndef __SELFTEST_SELFTEST_H__
+#define __SELFTEST_SELFTEST_H__
+
+#define LNET_ONLY
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/lnet.h>
+#include <linux/lnet/lib-lnet.h>
+#include <linux/lnet/lib-types.h>
+#include <linux/lnet/lnetst.h>
+
+#include "rpc.h"
+#include "timer.h"
+
+#ifndef MADE_WITHOUT_COMPROMISE
+#define MADE_WITHOUT_COMPROMISE
+#endif
+
+
+#define SWI_STATE_NEWBORN 0
+#define SWI_STATE_REPLY_SUBMITTED 1
+#define SWI_STATE_REPLY_SENT 2
+#define SWI_STATE_REQUEST_SUBMITTED 3
+#define SWI_STATE_REQUEST_SENT 4
+#define SWI_STATE_REPLY_RECEIVED 5
+#define SWI_STATE_BULK_STARTED 6
+#define SWI_STATE_DONE 10
+
+/* forward refs */
+struct srpc_service;
+struct srpc_service_cd;
+struct sfw_test_unit;
+struct sfw_test_instance;
+
+/* services below SRPC_FRAMEWORK_SERVICE_MAX_ID are framework
+ * services, e.g. create/modify session.
+ */
+#define SRPC_SERVICE_DEBUG 0
+#define SRPC_SERVICE_MAKE_SESSION 1
+#define SRPC_SERVICE_REMOVE_SESSION 2
+#define SRPC_SERVICE_BATCH 3
+#define SRPC_SERVICE_TEST 4
+#define SRPC_SERVICE_QUERY_STAT 5
+#define SRPC_SERVICE_JOIN 6
+#define SRPC_FRAMEWORK_SERVICE_MAX_ID 10
+/* other services start from SRPC_FRAMEWORK_SERVICE_MAX_ID+1 */
+#define SRPC_SERVICE_BRW 11
+#define SRPC_SERVICE_PING 12
+#define SRPC_SERVICE_MAX_ID 12
+
+#define SRPC_REQUEST_PORTAL 50
+/* a lazy portal for framework RPC requests */
+#define SRPC_FRAMEWORK_REQUEST_PORTAL 51
+/* all reply/bulk RDMAs go to this portal */
+#define SRPC_RDMA_PORTAL 52
+
+static inline srpc_msg_type_t
+srpc_service2request (int service)
+{
+ switch (service) {
+ default:
+ LBUG ();
+ case SRPC_SERVICE_DEBUG:
+ return SRPC_MSG_DEBUG_REQST;
+
+ case SRPC_SERVICE_MAKE_SESSION:
+ return SRPC_MSG_MKSN_REQST;
+
+ case SRPC_SERVICE_REMOVE_SESSION:
+ return SRPC_MSG_RMSN_REQST;
+
+ case SRPC_SERVICE_BATCH:
+ return SRPC_MSG_BATCH_REQST;
+
+ case SRPC_SERVICE_TEST:
+ return SRPC_MSG_TEST_REQST;
+
+ case SRPC_SERVICE_QUERY_STAT:
+ return SRPC_MSG_STAT_REQST;
+
+ case SRPC_SERVICE_BRW:
+ return SRPC_MSG_BRW_REQST;
+
+ case SRPC_SERVICE_PING:
+ return SRPC_MSG_PING_REQST;
+
+ case SRPC_SERVICE_JOIN:
+ return SRPC_MSG_JOIN_REQST;
+ }
+}
+
+static inline srpc_msg_type_t
+srpc_service2reply (int service)
+{
+ return srpc_service2request(service) + 1;
+}
+
+typedef enum {
+ SRPC_BULK_REQ_RCVD = 1, /* passive bulk request(PUT sink/GET source) received */
+ SRPC_BULK_PUT_SENT = 2, /* active bulk PUT sent (source) */
+ SRPC_BULK_GET_RPLD = 3, /* active bulk GET replied (sink) */
+ SRPC_REPLY_RCVD = 4, /* incoming reply received */
+ SRPC_REPLY_SENT = 5, /* outgoing reply sent */
+ SRPC_REQUEST_RCVD = 6, /* incoming request received */
+ SRPC_REQUEST_SENT = 7, /* outgoing request sent */
+} srpc_event_type_t;
+
+/* RPC event */
+typedef struct {
+ srpc_event_type_t ev_type; /* what's up */
+ lnet_event_kind_t ev_lnet; /* LNet event type */
+ int ev_fired; /* LNet event fired? */
+ int ev_status; /* LNet event status */
+ void *ev_data; /* owning server/client RPC */
+} srpc_event_t;
+
+typedef struct {
+ int bk_len; /* len of bulk data */
+ lnet_handle_md_t bk_mdh;
+ int bk_sink; /* sink/source */
+ int bk_niov; /* # iov in bk_iovs */
+ lnet_kiov_t bk_iovs[0];
+} srpc_bulk_t; /* bulk descriptor */
+
+/* message buffer descriptor */
+typedef struct srpc_buffer {
+ struct list_head buf_list; /* chain on srpc_service::*_msgq */
+ srpc_msg_t buf_msg;
+ lnet_handle_md_t buf_mdh;
+ lnet_nid_t buf_self;
+ lnet_process_id_t buf_peer;
+} srpc_buffer_t;
+
+struct swi_workitem;
+typedef int (*swi_action_t) (struct swi_workitem *);
+
+typedef struct swi_workitem {
+ struct cfs_wi_sched *swi_sched;
+ cfs_workitem_t swi_workitem;
+ swi_action_t swi_action;
+ int swi_state;
+} swi_workitem_t;
+
+/* server-side state of a RPC */
+typedef struct srpc_server_rpc {
+ /* chain on srpc_service::*_rpcq */
+ struct list_head srpc_list;
+ struct srpc_service_cd *srpc_scd;
+ swi_workitem_t srpc_wi;
+ srpc_event_t srpc_ev; /* bulk/reply event */
+ lnet_nid_t srpc_self;
+ lnet_process_id_t srpc_peer;
+ srpc_msg_t srpc_replymsg;
+ lnet_handle_md_t srpc_replymdh;
+ srpc_buffer_t *srpc_reqstbuf;
+ srpc_bulk_t *srpc_bulk;
+
+ unsigned int srpc_aborted; /* being given up */
+ int srpc_status;
+ void (*srpc_done)(struct srpc_server_rpc *);
+} srpc_server_rpc_t;
+
+/* client-side state of a RPC */
+typedef struct srpc_client_rpc {
+ struct list_head crpc_list; /* chain on user's lists */
+ spinlock_t crpc_lock; /* serialize */
+ int crpc_service;
+ atomic_t crpc_refcount;
+ int crpc_timeout; /* # seconds to wait for reply */
+ stt_timer_t crpc_timer;
+ swi_workitem_t crpc_wi;
+ lnet_process_id_t crpc_dest;
+
+ void (*crpc_done)(struct srpc_client_rpc *);
+ void (*crpc_fini)(struct srpc_client_rpc *);
+ int crpc_status; /* completion status */
+ void *crpc_priv; /* caller data */
+
+ /* state flags */
+ unsigned int crpc_aborted:1; /* being given up */
+ unsigned int crpc_closed:1; /* completed */
+
+ /* RPC events */
+ srpc_event_t crpc_bulkev; /* bulk event */
+ srpc_event_t crpc_reqstev; /* request event */
+ srpc_event_t crpc_replyev; /* reply event */
+
+ /* bulk, request(reqst), and reply exchanged on wire */
+ srpc_msg_t crpc_reqstmsg;
+ srpc_msg_t crpc_replymsg;
+ lnet_handle_md_t crpc_reqstmdh;
+ lnet_handle_md_t crpc_replymdh;
+ srpc_bulk_t crpc_bulk;
+} srpc_client_rpc_t;
+
+#define srpc_client_rpc_size(rpc) \
+offsetof(srpc_client_rpc_t, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov])
+
+#define srpc_client_rpc_addref(rpc) \
+do { \
+ CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n", \
+ (rpc), libcfs_id2str((rpc)->crpc_dest), \
+ atomic_read(&(rpc)->crpc_refcount)); \
+ LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \
+ atomic_inc(&(rpc)->crpc_refcount); \
+} while (0)
+
+#define srpc_client_rpc_decref(rpc) \
+do { \
+ CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n", \
+ (rpc), libcfs_id2str((rpc)->crpc_dest), \
+ atomic_read(&(rpc)->crpc_refcount)); \
+ LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \
+ if (atomic_dec_and_test(&(rpc)->crpc_refcount)) \
+ srpc_destroy_client_rpc(rpc); \
+} while (0)
+
+#define srpc_event_pending(rpc) ((rpc)->crpc_bulkev.ev_fired == 0 || \
+ (rpc)->crpc_reqstev.ev_fired == 0 || \
+ (rpc)->crpc_replyev.ev_fired == 0)
+
+/* CPU partition data of srpc service */
+struct srpc_service_cd {
+ /** serialize */
+ spinlock_t scd_lock;
+ /** backref to service */
+ struct srpc_service *scd_svc;
+ /** event buffer */
+ srpc_event_t scd_ev;
+ /** free RPC descriptors */
+ struct list_head scd_rpc_free;
+ /** in-flight RPCs */
+ struct list_head scd_rpc_active;
+ /** workitem for posting buffer */
+ swi_workitem_t scd_buf_wi;
+ /** CPT id */
+ int scd_cpt;
+ /** error code for scd_buf_wi */
+ int scd_buf_err;
+ /** timestamp for scd_buf_err */
+ unsigned long scd_buf_err_stamp;
+ /** total # request buffers */
+ int scd_buf_total;
+ /** # posted request buffers */
+ int scd_buf_nposted;
+ /** in progress of buffer posting */
+ int scd_buf_posting;
+ /** allocate more buffers if scd_buf_nposted < scd_buf_low */
+ int scd_buf_low;
+ /** increase/decrease some buffers */
+ int scd_buf_adjust;
+ /** posted message buffers */
+ struct list_head scd_buf_posted;
+ /** blocked for RPC descriptor */
+ struct list_head scd_buf_blocked;
+};
+
+/* number of server workitems (mini-thread) for testing service */
+#define SFW_TEST_WI_MIN 256
+#define SFW_TEST_WI_MAX 2048
+/* extra buffers for tolerating buggy peers, or unbalanced number
+ * of peers between partitions */
+#define SFW_TEST_WI_EXTRA 64
+
+/* number of server workitems (mini-thread) for framework service */
+#define SFW_FRWK_WI_MIN 16
+#define SFW_FRWK_WI_MAX 256
+
+typedef struct srpc_service {
+ int sv_id; /* service id */
+ const char *sv_name; /* human readable name */
+ int sv_wi_total; /* total server workitems */
+ int sv_shuttingdown;
+ int sv_ncpts;
+ /* percpt data for srpc_service */
+ struct srpc_service_cd **sv_cpt_data;
+ /* Service callbacks:
+ * - sv_handler: process incoming RPC request
+ * - sv_bulk_ready: notify bulk data
+ */
+ int (*sv_handler) (srpc_server_rpc_t *);
+ int (*sv_bulk_ready) (srpc_server_rpc_t *, int);
+} srpc_service_t;
+
+typedef struct {
+ struct list_head sn_list; /* chain on fw_zombie_sessions */
+ lst_sid_t sn_id; /* unique identifier */
+ unsigned int sn_timeout; /* # seconds' inactivity to expire */
+ int sn_timer_active;
+ unsigned int sn_features;
+ stt_timer_t sn_timer;
+ struct list_head sn_batches; /* list of batches */
+ char sn_name[LST_NAME_SIZE];
+ atomic_t sn_refcount;
+ atomic_t sn_brw_errors;
+ atomic_t sn_ping_errors;
+ cfs_time_t sn_started;
+} sfw_session_t;
+
+#define sfw_sid_equal(sid0, sid1) ((sid0).ses_nid == (sid1).ses_nid && \
+ (sid0).ses_stamp == (sid1).ses_stamp)
+
+typedef struct {
+ struct list_head bat_list; /* chain on sn_batches */
+ lst_bid_t bat_id; /* batch id */
+ int bat_error; /* error code of batch */
+ sfw_session_t *bat_session; /* batch's session */
+ atomic_t bat_nactive; /* # of active tests */
+ struct list_head bat_tests; /* test instances */
+} sfw_batch_t;
+
+typedef struct {
+ int (*tso_init)(struct sfw_test_instance *tsi); /* intialize test client */
+ void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test client */
+ int (*tso_prep_rpc)(struct sfw_test_unit *tsu,
+ lnet_process_id_t dest,
+ srpc_client_rpc_t **rpc); /* prep a tests rpc */
+ void (*tso_done_rpc)(struct sfw_test_unit *tsu,
+ srpc_client_rpc_t *rpc); /* done a test rpc */
+} sfw_test_client_ops_t;
+
+typedef struct sfw_test_instance {
+ struct list_head tsi_list; /* chain on batch */
+ int tsi_service; /* test type */
+ sfw_batch_t *tsi_batch; /* batch */
+ sfw_test_client_ops_t *tsi_ops; /* test client operations */
+
+ /* public parameter for all test units */
+ unsigned int tsi_is_client:1; /* is test client */
+ unsigned int tsi_stoptsu_onerr:1; /* stop tsu on error */
+ int tsi_concur; /* concurrency */
+ int tsi_loop; /* loop count */
+
+ /* status of test instance */
+ spinlock_t tsi_lock; /* serialize */
+ unsigned int tsi_stopping:1; /* test is stopping */
+ atomic_t tsi_nactive; /* # of active test unit */
+ struct list_head tsi_units; /* test units */
+ struct list_head tsi_free_rpcs; /* free rpcs */
+ struct list_head tsi_active_rpcs; /* active rpcs */
+
+ union {
+ test_ping_req_t ping; /* ping parameter */
+ test_bulk_req_t bulk_v0; /* bulk parameter */
+ test_bulk_req_v1_t bulk_v1; /* bulk v1 parameter */
+ } tsi_u;
+} sfw_test_instance_t;
+
+/* XXX: trailing (PAGE_CACHE_SIZE % sizeof(lnet_process_id_t)) bytes at
+ * the end of pages are not used */
+#define SFW_MAX_CONCUR LST_MAX_CONCUR
+#define SFW_ID_PER_PAGE (PAGE_CACHE_SIZE / sizeof(lnet_process_id_packed_t))
+#define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE)
+#define sfw_id_pages(n) (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE)
+
+typedef struct sfw_test_unit {
+ struct list_head tsu_list; /* chain on lst_test_instance */
+ lnet_process_id_t tsu_dest; /* id of dest node */
+ int tsu_loop; /* loop count of the test */
+ sfw_test_instance_t *tsu_instance; /* pointer to test instance */
+ void *tsu_private; /* private data */
+ swi_workitem_t tsu_worker; /* workitem of the test unit */
+} sfw_test_unit_t;
+
+typedef struct sfw_test_case {
+ struct list_head tsc_list; /* chain on fw_tests */
+ srpc_service_t *tsc_srv_service; /* test service */
+ sfw_test_client_ops_t *tsc_cli_ops; /* ops of test client */
+} sfw_test_case_t;
+
+srpc_client_rpc_t *
+sfw_create_rpc(lnet_process_id_t peer, int service,
+ unsigned features, int nbulkiov, int bulklen,
+ void (*done) (srpc_client_rpc_t *), void *priv);
+int sfw_create_test_rpc(sfw_test_unit_t *tsu,
+ lnet_process_id_t peer, unsigned features,
+ int nblk, int blklen, srpc_client_rpc_t **rpc);
+void sfw_abort_rpc(srpc_client_rpc_t *rpc);
+void sfw_post_rpc(srpc_client_rpc_t *rpc);
+void sfw_client_rpc_done(srpc_client_rpc_t *rpc);
+void sfw_unpack_message(srpc_msg_t *msg);
+void sfw_free_pages(srpc_server_rpc_t *rpc);
+void sfw_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i);
+int sfw_alloc_pages(srpc_server_rpc_t *rpc, int cpt, int npages, int len,
+ int sink);
+int sfw_make_session (srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply);
+
+srpc_client_rpc_t *
+srpc_create_client_rpc(lnet_process_id_t peer, int service,
+ int nbulkiov, int bulklen,
+ void (*rpc_done)(srpc_client_rpc_t *),
+ void (*rpc_fini)(srpc_client_rpc_t *), void *priv);
+void srpc_post_rpc(srpc_client_rpc_t *rpc);
+void srpc_abort_rpc(srpc_client_rpc_t *rpc, int why);
+void srpc_free_bulk(srpc_bulk_t *bk);
+srpc_bulk_t *srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len,
+ int sink);
+int srpc_send_rpc(swi_workitem_t *wi);
+int srpc_send_reply(srpc_server_rpc_t *rpc);
+int srpc_add_service(srpc_service_t *sv);
+int srpc_remove_service(srpc_service_t *sv);
+void srpc_shutdown_service(srpc_service_t *sv);
+void srpc_abort_service(srpc_service_t *sv);
+int srpc_finish_service(srpc_service_t *sv);
+int srpc_service_add_buffers(srpc_service_t *sv, int nbuffer);
+void srpc_service_remove_buffers(srpc_service_t *sv, int nbuffer);
+void srpc_get_counters(srpc_counters_t *cnt);
+void srpc_set_counters(const srpc_counters_t *cnt);
+
+extern struct cfs_wi_sched *lst_sched_serial;
+extern struct cfs_wi_sched **lst_sched_test;
+
+static inline int
+srpc_serv_is_framework(struct srpc_service *svc)
+{
+ return svc->sv_id < SRPC_FRAMEWORK_SERVICE_MAX_ID;
+}
+
+static inline int
+swi_wi_action(cfs_workitem_t *wi)
+{
+ swi_workitem_t *swi = container_of(wi, swi_workitem_t, swi_workitem);
+
+ return swi->swi_action(swi);
+}
+
+static inline void
+swi_init_workitem(swi_workitem_t *swi, void *data,
+ swi_action_t action, struct cfs_wi_sched *sched)
+{
+ swi->swi_sched = sched;
+ swi->swi_action = action;
+ swi->swi_state = SWI_STATE_NEWBORN;
+ cfs_wi_init(&swi->swi_workitem, data, swi_wi_action);
+}
+
+static inline void
+swi_schedule_workitem(swi_workitem_t *wi)
+{
+ cfs_wi_schedule(wi->swi_sched, &wi->swi_workitem);
+}
+
+static inline void
+swi_exit_workitem(swi_workitem_t *swi)
+{
+ cfs_wi_exit(swi->swi_sched, &swi->swi_workitem);
+}
+
+static inline int
+swi_deschedule_workitem(swi_workitem_t *swi)
+{
+ return cfs_wi_deschedule(swi->swi_sched, &swi->swi_workitem);
+}
+
+
+int sfw_startup(void);
+int srpc_startup(void);
+void sfw_shutdown(void);
+void srpc_shutdown(void);
+
+static inline void
+srpc_destroy_client_rpc (srpc_client_rpc_t *rpc)
+{
+ LASSERT (rpc != NULL);
+ LASSERT (!srpc_event_pending(rpc));
+ LASSERT (atomic_read(&rpc->crpc_refcount) == 0);
+
+ if (rpc->crpc_fini == NULL) {
+ LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
+ } else {
+ (*rpc->crpc_fini) (rpc);
+ }
+
+ return;
+}
+
+static inline void
+srpc_init_client_rpc (srpc_client_rpc_t *rpc, lnet_process_id_t peer,
+ int service, int nbulkiov, int bulklen,
+ void (*rpc_done)(srpc_client_rpc_t *),
+ void (*rpc_fini)(srpc_client_rpc_t *), void *priv)
+{
+ LASSERT (nbulkiov <= LNET_MAX_IOV);
+
+ memset(rpc, 0, offsetof(srpc_client_rpc_t,
+ crpc_bulk.bk_iovs[nbulkiov]));
+
+ INIT_LIST_HEAD(&rpc->crpc_list);
+ swi_init_workitem(&rpc->crpc_wi, rpc, srpc_send_rpc,
+ lst_sched_test[lnet_cpt_of_nid(peer.nid)]);
+ spin_lock_init(&rpc->crpc_lock);
+ atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */
+
+ rpc->crpc_dest = peer;
+ rpc->crpc_priv = priv;
+ rpc->crpc_service = service;
+ rpc->crpc_bulk.bk_len = bulklen;
+ rpc->crpc_bulk.bk_niov = nbulkiov;
+ rpc->crpc_done = rpc_done;
+ rpc->crpc_fini = rpc_fini;
+ LNetInvalidateHandle(&rpc->crpc_reqstmdh);
+ LNetInvalidateHandle(&rpc->crpc_replymdh);
+ LNetInvalidateHandle(&rpc->crpc_bulk.bk_mdh);
+
+ /* no event is expected at this point */
+ rpc->crpc_bulkev.ev_fired =
+ rpc->crpc_reqstev.ev_fired =
+ rpc->crpc_replyev.ev_fired = 1;
+
+ rpc->crpc_reqstmsg.msg_magic = SRPC_MSG_MAGIC;
+ rpc->crpc_reqstmsg.msg_version = SRPC_MSG_VERSION;
+ rpc->crpc_reqstmsg.msg_type = srpc_service2request(service);
+ return;
+}
+
+static inline const char *
+swi_state2str (int state)
+{
+#define STATE2STR(x) case x: return #x
+ switch(state) {
+ default:
+ LBUG();
+ STATE2STR(SWI_STATE_NEWBORN);
+ STATE2STR(SWI_STATE_REPLY_SUBMITTED);
+ STATE2STR(SWI_STATE_REPLY_SENT);
+ STATE2STR(SWI_STATE_REQUEST_SUBMITTED);
+ STATE2STR(SWI_STATE_REQUEST_SENT);
+ STATE2STR(SWI_STATE_REPLY_RECEIVED);
+ STATE2STR(SWI_STATE_BULK_STARTED);
+ STATE2STR(SWI_STATE_DONE);
+ }
+#undef STATE2STR
+}
+
+#define UNUSED(x) ( (void)(x) )
+
+
+#define selftest_wait_events() cfs_pause(cfs_time_seconds(1) / 10)
+
+
+#define lst_wait_until(cond, lock, fmt, ...) \
+do { \
+ int __I = 2; \
+ while (!(cond)) { \
+ CDEBUG(IS_PO2(++__I) ? D_WARNING : D_NET, \
+ fmt, ## __VA_ARGS__); \
+ spin_unlock(&(lock)); \
+ \
+ selftest_wait_events(); \
+ \
+ spin_lock(&(lock)); \
+ } \
+} while (0)
+
+static inline void
+srpc_wait_service_shutdown(srpc_service_t *sv)
+{
+ int i = 2;
+
+ LASSERT(sv->sv_shuttingdown);
+
+ while (srpc_finish_service(sv) == 0) {
+ i++;
+ CDEBUG (((i & -i) == i) ? D_WARNING : D_NET,
+ "Waiting for %s service to shutdown...\n",
+ sv->sv_name);
+ selftest_wait_events();
+ }
+}
+
+#endif /* __SELFTEST_SELFTEST_H__ */
diff --git a/drivers/staging/lustre/lnet/selftest/timer.c b/drivers/staging/lustre/lnet/selftest/timer.c
new file mode 100644
index 000000000000..2c078550277b
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/timer.c
@@ -0,0 +1,253 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/selftest/timer.c
+ *
+ * Author: Isaac Huang <isaac@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include "selftest.h"
+
+
+/*
+ * Timers are implemented as a sorted queue of expiry times. The queue
+ * is slotted, with each slot holding timers which expire in a
+ * 2**STTIMER_MINPOLL (8) second period. The timers in each slot are
+ * sorted by increasing expiry time. The number of slots is 2**7 (128),
+ * to cover a time period of 1024 seconds into the future before wrapping.
+ */
+#define STTIMER_MINPOLL 3 /* log2 min poll interval (8 s) */
+#define STTIMER_SLOTTIME (1 << STTIMER_MINPOLL)
+#define STTIMER_SLOTTIMEMASK (~(STTIMER_SLOTTIME - 1))
+#define STTIMER_NSLOTS (1 << 7)
+#define STTIMER_SLOT(t) (&stt_data.stt_hash[(((t) >> STTIMER_MINPOLL) & \
+ (STTIMER_NSLOTS - 1))])
+
+struct st_timer_data {
+ spinlock_t stt_lock;
+ /* start time of the slot processed previously */
+ cfs_time_t stt_prev_slot;
+ struct list_head stt_hash[STTIMER_NSLOTS];
+ int stt_shuttingdown;
+ wait_queue_head_t stt_waitq;
+ int stt_nthreads;
+} stt_data;
+
+void
+stt_add_timer(stt_timer_t *timer)
+{
+ struct list_head *pos;
+
+ spin_lock(&stt_data.stt_lock);
+
+ LASSERT (stt_data.stt_nthreads > 0);
+ LASSERT (!stt_data.stt_shuttingdown);
+ LASSERT (timer->stt_func != NULL);
+ LASSERT (list_empty(&timer->stt_list));
+ LASSERT (cfs_time_after(timer->stt_expires, cfs_time_current_sec()));
+
+ /* a simple insertion sort */
+ list_for_each_prev (pos, STTIMER_SLOT(timer->stt_expires)) {
+ stt_timer_t *old = list_entry(pos, stt_timer_t, stt_list);
+
+ if (cfs_time_aftereq(timer->stt_expires, old->stt_expires))
+ break;
+ }
+ list_add(&timer->stt_list, pos);
+
+ spin_unlock(&stt_data.stt_lock);
+}
+
+/*
+ * The function returns whether it has deactivated a pending timer or not.
+ * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
+ * active timer returns 1.)
+ *
+ * CAVEAT EMPTOR:
+ * When 0 is returned, it is possible that timer->stt_func _is_ running on
+ * another CPU.
+ */
+int
+stt_del_timer (stt_timer_t *timer)
+{
+ int ret = 0;
+
+ spin_lock(&stt_data.stt_lock);
+
+ LASSERT (stt_data.stt_nthreads > 0);
+ LASSERT (!stt_data.stt_shuttingdown);
+
+ if (!list_empty(&timer->stt_list)) {
+ ret = 1;
+ list_del_init(&timer->stt_list);
+ }
+
+ spin_unlock(&stt_data.stt_lock);
+ return ret;
+}
+
+/* called with stt_data.stt_lock held */
+int
+stt_expire_list (struct list_head *slot, cfs_time_t now)
+{
+ int expired = 0;
+ stt_timer_t *timer;
+
+ while (!list_empty(slot)) {
+ timer = list_entry(slot->next, stt_timer_t, stt_list);
+
+ if (cfs_time_after(timer->stt_expires, now))
+ break;
+
+ list_del_init(&timer->stt_list);
+ spin_unlock(&stt_data.stt_lock);
+
+ expired++;
+ (*timer->stt_func) (timer->stt_data);
+
+ spin_lock(&stt_data.stt_lock);
+ }
+
+ return expired;
+}
+
+int
+stt_check_timers (cfs_time_t *last)
+{
+ int expired = 0;
+ cfs_time_t now;
+ cfs_time_t this_slot;
+
+ now = cfs_time_current_sec();
+ this_slot = now & STTIMER_SLOTTIMEMASK;
+
+ spin_lock(&stt_data.stt_lock);
+
+ while (cfs_time_aftereq(this_slot, *last)) {
+ expired += stt_expire_list(STTIMER_SLOT(this_slot), now);
+ this_slot = cfs_time_sub(this_slot, STTIMER_SLOTTIME);
+ }
+
+ *last = now & STTIMER_SLOTTIMEMASK;
+ spin_unlock(&stt_data.stt_lock);
+ return expired;
+}
+
+
+int
+stt_timer_main (void *arg)
+{
+ int rc = 0;
+ UNUSED(arg);
+
+ SET_BUT_UNUSED(rc);
+
+ cfs_block_allsigs();
+
+ while (!stt_data.stt_shuttingdown) {
+ stt_check_timers(&stt_data.stt_prev_slot);
+
+ rc = wait_event_timeout(stt_data.stt_waitq,
+ stt_data.stt_shuttingdown,
+ cfs_time_seconds(STTIMER_SLOTTIME));
+ }
+
+ spin_lock(&stt_data.stt_lock);
+ stt_data.stt_nthreads--;
+ spin_unlock(&stt_data.stt_lock);
+ return 0;
+}
+
+int
+stt_start_timer_thread (void)
+{
+ task_t *task;
+
+ LASSERT(!stt_data.stt_shuttingdown);
+
+ task = kthread_run(stt_timer_main, NULL, "st_timer");
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+
+ spin_lock(&stt_data.stt_lock);
+ stt_data.stt_nthreads++;
+ spin_unlock(&stt_data.stt_lock);
+ return 0;
+}
+
+
+int
+stt_startup (void)
+{
+ int rc = 0;
+ int i;
+
+ stt_data.stt_shuttingdown = 0;
+ stt_data.stt_prev_slot = cfs_time_current_sec() & STTIMER_SLOTTIMEMASK;
+
+ spin_lock_init(&stt_data.stt_lock);
+ for (i = 0; i < STTIMER_NSLOTS; i++)
+ INIT_LIST_HEAD(&stt_data.stt_hash[i]);
+
+ stt_data.stt_nthreads = 0;
+ init_waitqueue_head(&stt_data.stt_waitq);
+ rc = stt_start_timer_thread();
+ if (rc != 0)
+ CERROR ("Can't spawn timer thread: %d\n", rc);
+
+ return rc;
+}
+
+void
+stt_shutdown (void)
+{
+ int i;
+
+ spin_lock(&stt_data.stt_lock);
+
+ for (i = 0; i < STTIMER_NSLOTS; i++)
+ LASSERT (list_empty(&stt_data.stt_hash[i]));
+
+ stt_data.stt_shuttingdown = 1;
+
+ wake_up(&stt_data.stt_waitq);
+ lst_wait_until(stt_data.stt_nthreads == 0, stt_data.stt_lock,
+ "waiting for %d threads to terminate\n",
+ stt_data.stt_nthreads);
+
+ spin_unlock(&stt_data.stt_lock);
+}
diff --git a/drivers/staging/lustre/lnet/selftest/timer.h b/drivers/staging/lustre/lnet/selftest/timer.h
new file mode 100644
index 000000000000..56dbfe5ea1e5
--- /dev/null
+++ b/drivers/staging/lustre/lnet/selftest/timer.h
@@ -0,0 +1,53 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/selftest/timer.h
+ *
+ * Author: Isaac Huang <isaac@clusterfs.com>
+ */
+#ifndef __SELFTEST_TIMER_H__
+#define __SELFTEST_TIMER_H__
+
+typedef struct {
+ struct list_head stt_list;
+ cfs_time_t stt_expires;
+ void (*stt_func) (void *);
+ void *stt_data;
+} stt_timer_t;
+
+void stt_add_timer (stt_timer_t *timer);
+int stt_del_timer (stt_timer_t *timer);
+int stt_startup (void);
+void stt_shutdown (void);
+
+#endif /* __SELFTEST_TIMER_H__ */
diff --git a/drivers/staging/lustre/lustre/Kconfig b/drivers/staging/lustre/lustre/Kconfig
new file mode 100644
index 000000000000..e0eb8303a50c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/Kconfig
@@ -0,0 +1,51 @@
+config LUSTRE_FS
+ tristate "Lustre file system client support"
+ depends on STAGING && INET && BROKEN
+ select LNET
+ select CRYPTO
+ select CRYPTO_CRC32
+ select CRYPTO_CRC32_PCLMUL if X86
+ select CRYPTO_CRC32C
+ select CRYPTO_MD5
+ select CRYPTO_SHA1
+ select CRYPTO_SHA256
+ select CRYPTO_SHA512
+ help
+ This option enables Lustre file system client support. Choose Y
+ here if you want to access a Lustre file system cluster. To compile
+ this file system support as a module, choose M here: the module will
+ be called lustre.
+
+ To mount Lustre file systems , you also need to install the user space
+ mount.lustre and other user space commands which can be found in the
+ lustre-client package, available from
+ http://downloads.whamcloud.com/public/lustre/
+
+ Lustre file system is the most popular cluster file system in high
+ performance computing. Source code of both kernel space and user space
+ Lustre components can also be found at
+ http://git.whamcloud.com/?p=fs/lustre-release.git;a=summary
+
+ If unsure, say N.
+
+ See also http://wiki.lustre.org/
+
+config LUSTRE_OBD_MAX_IOCTL_BUFFER
+ int "Lustre obd max ioctl buffer bytes (default 8KB)"
+ depends on LUSTRE_FS
+ default 8192
+ help
+ This option defines the maximum size of buffer in bytes that user space
+ applications can pass to Lustre kernel module through ioctl interface.
+
+ If unsure, use default.
+
+config LUSTRE_DEBUG_EXPENSIVE_CHECK
+ bool "Enable Lustre DEBUG checks"
+ depends on LUSTRE_FS
+ default false
+ help
+ This option is mainly for debug purpose. It enables Lustre code to do
+ expensive checks that may have a performance impact.
+
+ Use with caution. If unsure, say N.
diff --git a/drivers/staging/lustre/lustre/Makefile b/drivers/staging/lustre/lustre/Makefile
new file mode 100644
index 000000000000..3fb94fc12068
--- /dev/null
+++ b/drivers/staging/lustre/lustre/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_LUSTRE_FS) := fid/ lvfs/ obdclass/ ptlrpc/ obdecho/ mgc/ lov/ \
+ osc/ mdc/ lmv/ llite/ fld/ libcfs/
diff --git a/drivers/staging/lustre/lustre/fid/Makefile b/drivers/staging/lustre/lustre/fid/Makefile
new file mode 100644
index 000000000000..b8d6d21b39ff
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fid/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_LUSTRE_FS) += fid.o
+fid-y := fid_handler.o fid_store.o fid_request.o lproc_fid.o fid_lib.o
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lustre/fid/fid_handler.c b/drivers/staging/lustre/lustre/fid/fid_handler.c
new file mode 100644
index 000000000000..bbbb3cfe57b3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fid/fid_handler.c
@@ -0,0 +1,661 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fid/fid_handler.c
+ *
+ * Lustre Sequence Manager
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_FID
+
+# include <linux/libcfs/libcfs.h>
+# include <linux/module.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <dt_object.h>
+#include <md_object.h>
+#include <obd_support.h>
+#include <lustre_req_layout.h>
+#include <lustre_fid.h>
+#include "fid_internal.h"
+
+int client_fid_init(struct obd_device *obd,
+ struct obd_export *exp, enum lu_cli_type type)
+{
+ struct client_obd *cli = &obd->u.cli;
+ char *prefix;
+ int rc;
+ ENTRY;
+
+ OBD_ALLOC_PTR(cli->cl_seq);
+ if (cli->cl_seq == NULL)
+ RETURN(-ENOMEM);
+
+ OBD_ALLOC(prefix, MAX_OBD_NAME + 5);
+ if (prefix == NULL)
+ GOTO(out_free_seq, rc = -ENOMEM);
+
+ snprintf(prefix, MAX_OBD_NAME + 5, "cli-%s", obd->obd_name);
+
+ /* Init client side sequence-manager */
+ rc = seq_client_init(cli->cl_seq, exp, type, prefix, NULL);
+ OBD_FREE(prefix, MAX_OBD_NAME + 5);
+ if (rc)
+ GOTO(out_free_seq, rc);
+
+ RETURN(rc);
+out_free_seq:
+ OBD_FREE_PTR(cli->cl_seq);
+ cli->cl_seq = NULL;
+ return rc;
+}
+EXPORT_SYMBOL(client_fid_init);
+
+int client_fid_fini(struct obd_device *obd)
+{
+ struct client_obd *cli = &obd->u.cli;
+ ENTRY;
+
+ if (cli->cl_seq != NULL) {
+ seq_client_fini(cli->cl_seq);
+ OBD_FREE_PTR(cli->cl_seq);
+ cli->cl_seq = NULL;
+ }
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(client_fid_fini);
+
+static void seq_server_proc_fini(struct lu_server_seq *seq);
+
+/* Assigns client to sequence controller node. */
+int seq_server_set_cli(struct lu_server_seq *seq,
+ struct lu_client_seq *cli,
+ const struct lu_env *env)
+{
+ int rc = 0;
+ ENTRY;
+
+ /*
+ * Ask client for new range, assign that range to ->seq_space and write
+ * seq state to backing store should be atomic.
+ */
+ mutex_lock(&seq->lss_mutex);
+
+ if (cli == NULL) {
+ CDEBUG(D_INFO, "%s: Detached sequence client %s\n",
+ seq->lss_name, cli->lcs_name);
+ seq->lss_cli = cli;
+ GOTO(out_up, rc = 0);
+ }
+
+ if (seq->lss_cli != NULL) {
+ CDEBUG(D_HA, "%s: Sequence controller is already "
+ "assigned\n", seq->lss_name);
+ GOTO(out_up, rc = -EEXIST);
+ }
+
+ CDEBUG(D_INFO, "%s: Attached sequence controller %s\n",
+ seq->lss_name, cli->lcs_name);
+
+ seq->lss_cli = cli;
+ cli->lcs_space.lsr_index = seq->lss_site->ss_node_id;
+ EXIT;
+out_up:
+ mutex_unlock(&seq->lss_mutex);
+ return rc;
+}
+EXPORT_SYMBOL(seq_server_set_cli);
+/*
+ * allocate \a w units of sequence from range \a from.
+ */
+static inline void range_alloc(struct lu_seq_range *to,
+ struct lu_seq_range *from,
+ __u64 width)
+{
+ width = min(range_space(from), width);
+ to->lsr_start = from->lsr_start;
+ to->lsr_end = from->lsr_start + width;
+ from->lsr_start += width;
+}
+
+/**
+ * On controller node, allocate new super sequence for regular sequence server.
+ * As this super sequence controller, this node suppose to maintain fld
+ * and update index.
+ * \a out range always has currect mds node number of requester.
+ */
+
+static int __seq_server_alloc_super(struct lu_server_seq *seq,
+ struct lu_seq_range *out,
+ const struct lu_env *env)
+{
+ struct lu_seq_range *space = &seq->lss_space;
+ int rc;
+ ENTRY;
+
+ LASSERT(range_is_sane(space));
+
+ if (range_is_exhausted(space)) {
+ CERROR("%s: Sequences space is exhausted\n",
+ seq->lss_name);
+ RETURN(-ENOSPC);
+ } else {
+ range_alloc(out, space, seq->lss_width);
+ }
+
+ rc = seq_store_update(env, seq, out, 1 /* sync */);
+
+ LCONSOLE_INFO("%s: super-sequence allocation rc = %d " DRANGE"\n",
+ seq->lss_name, rc, PRANGE(out));
+
+ RETURN(rc);
+}
+
+int seq_server_alloc_super(struct lu_server_seq *seq,
+ struct lu_seq_range *out,
+ const struct lu_env *env)
+{
+ int rc;
+ ENTRY;
+
+ mutex_lock(&seq->lss_mutex);
+ rc = __seq_server_alloc_super(seq, out, env);
+ mutex_unlock(&seq->lss_mutex);
+
+ RETURN(rc);
+}
+
+static int __seq_set_init(const struct lu_env *env,
+ struct lu_server_seq *seq)
+{
+ struct lu_seq_range *space = &seq->lss_space;
+ int rc;
+
+ range_alloc(&seq->lss_lowater_set, space, seq->lss_set_width);
+ range_alloc(&seq->lss_hiwater_set, space, seq->lss_set_width);
+
+ rc = seq_store_update(env, seq, NULL, 1);
+
+ return rc;
+}
+
+/*
+ * This function implements new seq allocation algorithm using async
+ * updates to seq file on disk. ref bug 18857 for details.
+ * there are four variable to keep track of this process
+ *
+ * lss_space; - available lss_space
+ * lss_lowater_set; - lu_seq_range for all seqs before barrier, i.e. safe to use
+ * lss_hiwater_set; - lu_seq_range after barrier, i.e. allocated but may be
+ * not yet committed
+ *
+ * when lss_lowater_set reaches the end it is replaced with hiwater one and
+ * a write operation is initiated to allocate new hiwater range.
+ * if last seq write opearion is still not commited, current operation is
+ * flaged as sync write op.
+ */
+static int range_alloc_set(const struct lu_env *env,
+ struct lu_seq_range *out,
+ struct lu_server_seq *seq)
+{
+ struct lu_seq_range *space = &seq->lss_space;
+ struct lu_seq_range *loset = &seq->lss_lowater_set;
+ struct lu_seq_range *hiset = &seq->lss_hiwater_set;
+ int rc = 0;
+
+ if (range_is_zero(loset))
+ __seq_set_init(env, seq);
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_ALLOC)) /* exhaust set */
+ loset->lsr_start = loset->lsr_end;
+
+ if (range_is_exhausted(loset)) {
+ /* reached high water mark. */
+ struct lu_device *dev = seq->lss_site->ss_lu->ls_top_dev;
+ int obd_num_clients = dev->ld_obd->obd_num_exports;
+ __u64 set_sz;
+
+ /* calculate new seq width based on number of clients */
+ set_sz = max(seq->lss_set_width,
+ obd_num_clients * seq->lss_width);
+ set_sz = min(range_space(space), set_sz);
+
+ /* Switch to hiwater range now */
+ *loset = *hiset;
+ /* allocate new hiwater range */
+ range_alloc(hiset, space, set_sz);
+
+ /* update ondisk seq with new *space */
+ rc = seq_store_update(env, seq, NULL, seq->lss_need_sync);
+ }
+
+ LASSERTF(!range_is_exhausted(loset) || range_is_sane(loset),
+ DRANGE"\n", PRANGE(loset));
+
+ if (rc == 0)
+ range_alloc(out, loset, seq->lss_width);
+
+ RETURN(rc);
+}
+
+static int __seq_server_alloc_meta(struct lu_server_seq *seq,
+ struct lu_seq_range *out,
+ const struct lu_env *env)
+{
+ struct lu_seq_range *space = &seq->lss_space;
+ int rc = 0;
+
+ ENTRY;
+
+ LASSERT(range_is_sane(space));
+
+ /* Check if available space ends and allocate new super seq */
+ if (range_is_exhausted(space)) {
+ if (!seq->lss_cli) {
+ CERROR("%s: No sequence controller is attached.\n",
+ seq->lss_name);
+ RETURN(-ENODEV);
+ }
+
+ rc = seq_client_alloc_super(seq->lss_cli, env);
+ if (rc) {
+ CERROR("%s: Can't allocate super-sequence, rc %d\n",
+ seq->lss_name, rc);
+ RETURN(rc);
+ }
+
+ /* Saving new range to allocation space. */
+ *space = seq->lss_cli->lcs_space;
+ LASSERT(range_is_sane(space));
+ }
+
+ rc = range_alloc_set(env, out, seq);
+ if (rc != 0) {
+ CERROR("%s: Allocated meta-sequence failed: rc = %d\n",
+ seq->lss_name, rc);
+ RETURN(rc);
+ }
+
+ CDEBUG(D_INFO, "%s: Allocated meta-sequence " DRANGE"\n",
+ seq->lss_name, PRANGE(out));
+
+ RETURN(rc);
+}
+
+int seq_server_alloc_meta(struct lu_server_seq *seq,
+ struct lu_seq_range *out,
+ const struct lu_env *env)
+{
+ int rc;
+ ENTRY;
+
+ mutex_lock(&seq->lss_mutex);
+ rc = __seq_server_alloc_meta(seq, out, env);
+ mutex_unlock(&seq->lss_mutex);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(seq_server_alloc_meta);
+
+static int seq_server_handle(struct lu_site *site,
+ const struct lu_env *env,
+ __u32 opc, struct lu_seq_range *out)
+{
+ int rc;
+ struct seq_server_site *ss_site;
+ ENTRY;
+
+ ss_site = lu_site2seq(site);
+
+ switch (opc) {
+ case SEQ_ALLOC_META:
+ if (!ss_site->ss_server_seq) {
+ CERROR("Sequence server is not "
+ "initialized\n");
+ RETURN(-EINVAL);
+ }
+ rc = seq_server_alloc_meta(ss_site->ss_server_seq, out, env);
+ break;
+ case SEQ_ALLOC_SUPER:
+ if (!ss_site->ss_control_seq) {
+ CERROR("Sequence controller is not "
+ "initialized\n");
+ RETURN(-EINVAL);
+ }
+ rc = seq_server_alloc_super(ss_site->ss_control_seq, out, env);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+
+ RETURN(rc);
+}
+
+static int seq_req_handle(struct ptlrpc_request *req,
+ const struct lu_env *env,
+ struct seq_thread_info *info)
+{
+ struct lu_seq_range *out, *tmp;
+ struct lu_site *site;
+ int rc = -EPROTO;
+ __u32 *opc;
+ ENTRY;
+
+ LASSERT(!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY));
+ site = req->rq_export->exp_obd->obd_lu_dev->ld_site;
+ LASSERT(site != NULL);
+
+ rc = req_capsule_server_pack(info->sti_pill);
+ if (rc)
+ RETURN(err_serious(rc));
+
+ opc = req_capsule_client_get(info->sti_pill, &RMF_SEQ_OPC);
+ if (opc != NULL) {
+ out = req_capsule_server_get(info->sti_pill, &RMF_SEQ_RANGE);
+ if (out == NULL)
+ RETURN(err_serious(-EPROTO));
+
+ tmp = req_capsule_client_get(info->sti_pill, &RMF_SEQ_RANGE);
+
+ /* seq client passed mdt id, we need to pass that using out
+ * range parameter */
+
+ out->lsr_index = tmp->lsr_index;
+ out->lsr_flags = tmp->lsr_flags;
+ rc = seq_server_handle(site, env, *opc, out);
+ } else
+ rc = err_serious(-EPROTO);
+
+ RETURN(rc);
+}
+
+/* context key constructor/destructor: seq_key_init, seq_key_fini */
+LU_KEY_INIT_FINI(seq, struct seq_thread_info);
+
+/* context key: seq_thread_key */
+LU_CONTEXT_KEY_DEFINE(seq, LCT_MD_THREAD | LCT_DT_THREAD);
+
+static void seq_thread_info_init(struct ptlrpc_request *req,
+ struct seq_thread_info *info)
+{
+ info->sti_pill = &req->rq_pill;
+ /* Init request capsule */
+ req_capsule_init(info->sti_pill, req, RCL_SERVER);
+ req_capsule_set(info->sti_pill, &RQF_SEQ_QUERY);
+}
+
+static void seq_thread_info_fini(struct seq_thread_info *info)
+{
+ req_capsule_fini(info->sti_pill);
+}
+
+int seq_handle(struct ptlrpc_request *req)
+{
+ const struct lu_env *env;
+ struct seq_thread_info *info;
+ int rc;
+
+ env = req->rq_svc_thread->t_env;
+ LASSERT(env != NULL);
+
+ info = lu_context_key_get(&env->le_ctx, &seq_thread_key);
+ LASSERT(info != NULL);
+
+ seq_thread_info_init(req, info);
+ rc = seq_req_handle(req, env, info);
+ /* XXX: we don't need replay but MDT assign transno in any case,
+ * remove it manually before reply*/
+ lustre_msg_set_transno(req->rq_repmsg, 0);
+ seq_thread_info_fini(info);
+
+ return rc;
+}
+EXPORT_SYMBOL(seq_handle);
+
+/*
+ * Entry point for handling FLD RPCs called from MDT.
+ */
+int seq_query(struct com_thread_info *info)
+{
+ return seq_handle(info->cti_pill->rc_req);
+}
+EXPORT_SYMBOL(seq_query);
+
+
+#ifdef LPROCFS
+static int seq_server_proc_init(struct lu_server_seq *seq)
+{
+ int rc;
+ ENTRY;
+
+ seq->lss_proc_dir = lprocfs_register(seq->lss_name,
+ seq_type_proc_dir,
+ NULL, NULL);
+ if (IS_ERR(seq->lss_proc_dir)) {
+ rc = PTR_ERR(seq->lss_proc_dir);
+ RETURN(rc);
+ }
+
+ rc = lprocfs_add_vars(seq->lss_proc_dir,
+ seq_server_proc_list, seq);
+ if (rc) {
+ CERROR("%s: Can't init sequence manager "
+ "proc, rc %d\n", seq->lss_name, rc);
+ GOTO(out_cleanup, rc);
+ }
+
+ RETURN(0);
+
+out_cleanup:
+ seq_server_proc_fini(seq);
+ return rc;
+}
+
+static void seq_server_proc_fini(struct lu_server_seq *seq)
+{
+ ENTRY;
+ if (seq->lss_proc_dir != NULL) {
+ if (!IS_ERR(seq->lss_proc_dir))
+ lprocfs_remove(&seq->lss_proc_dir);
+ seq->lss_proc_dir = NULL;
+ }
+ EXIT;
+}
+#else
+static int seq_server_proc_init(struct lu_server_seq *seq)
+{
+ return 0;
+}
+
+static void seq_server_proc_fini(struct lu_server_seq *seq)
+{
+ return;
+}
+#endif
+
+
+int seq_server_init(struct lu_server_seq *seq,
+ struct dt_device *dev,
+ const char *prefix,
+ enum lu_mgr_type type,
+ struct seq_server_site *ss,
+ const struct lu_env *env)
+{
+ int rc, is_srv = (type == LUSTRE_SEQ_SERVER);
+ ENTRY;
+
+ LASSERT(dev != NULL);
+ LASSERT(prefix != NULL);
+ LASSERT(ss != NULL);
+ LASSERT(ss->ss_lu != NULL);
+
+ seq->lss_cli = NULL;
+ seq->lss_type = type;
+ seq->lss_site = ss;
+ range_init(&seq->lss_space);
+
+ range_init(&seq->lss_lowater_set);
+ range_init(&seq->lss_hiwater_set);
+ seq->lss_set_width = LUSTRE_SEQ_BATCH_WIDTH;
+
+ mutex_init(&seq->lss_mutex);
+
+ seq->lss_width = is_srv ?
+ LUSTRE_SEQ_META_WIDTH : LUSTRE_SEQ_SUPER_WIDTH;
+
+ snprintf(seq->lss_name, sizeof(seq->lss_name),
+ "%s-%s", (is_srv ? "srv" : "ctl"), prefix);
+
+ rc = seq_store_init(seq, env, dev);
+ if (rc)
+ GOTO(out, rc);
+ /* Request backing store for saved sequence info. */
+ rc = seq_store_read(seq, env);
+ if (rc == -ENODATA) {
+
+ /* Nothing is read, init by default value. */
+ seq->lss_space = is_srv ?
+ LUSTRE_SEQ_ZERO_RANGE:
+ LUSTRE_SEQ_SPACE_RANGE;
+
+ LASSERT(ss != NULL);
+ seq->lss_space.lsr_index = ss->ss_node_id;
+ LCONSOLE_INFO("%s: No data found "
+ "on store. Initialize space\n",
+ seq->lss_name);
+
+ rc = seq_store_update(env, seq, NULL, 0);
+ if (rc) {
+ CERROR("%s: Can't write space data, "
+ "rc %d\n", seq->lss_name, rc);
+ }
+ } else if (rc) {
+ CERROR("%s: Can't read space data, rc %d\n",
+ seq->lss_name, rc);
+ GOTO(out, rc);
+ }
+
+ if (is_srv) {
+ LASSERT(range_is_sane(&seq->lss_space));
+ } else {
+ LASSERT(!range_is_zero(&seq->lss_space) &&
+ range_is_sane(&seq->lss_space));
+ }
+
+ rc = seq_server_proc_init(seq);
+ if (rc)
+ GOTO(out, rc);
+
+ EXIT;
+out:
+ if (rc)
+ seq_server_fini(seq, env);
+ return rc;
+}
+EXPORT_SYMBOL(seq_server_init);
+
+void seq_server_fini(struct lu_server_seq *seq,
+ const struct lu_env *env)
+{
+ ENTRY;
+
+ seq_server_proc_fini(seq);
+ seq_store_fini(seq, env);
+
+ EXIT;
+}
+EXPORT_SYMBOL(seq_server_fini);
+
+int seq_site_fini(const struct lu_env *env, struct seq_server_site *ss)
+{
+ if (ss == NULL)
+ RETURN(0);
+
+ if (ss->ss_server_seq) {
+ seq_server_fini(ss->ss_server_seq, env);
+ OBD_FREE_PTR(ss->ss_server_seq);
+ ss->ss_server_seq = NULL;
+ }
+
+ if (ss->ss_control_seq) {
+ seq_server_fini(ss->ss_control_seq, env);
+ OBD_FREE_PTR(ss->ss_control_seq);
+ ss->ss_control_seq = NULL;
+ }
+
+ if (ss->ss_client_seq) {
+ seq_client_fini(ss->ss_client_seq);
+ OBD_FREE_PTR(ss->ss_client_seq);
+ ss->ss_client_seq = NULL;
+ }
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(seq_site_fini);
+
+proc_dir_entry_t *seq_type_proc_dir = NULL;
+
+static int __init fid_mod_init(void)
+{
+ seq_type_proc_dir = lprocfs_register(LUSTRE_SEQ_NAME,
+ proc_lustre_root,
+ NULL, NULL);
+ if (IS_ERR(seq_type_proc_dir))
+ return PTR_ERR(seq_type_proc_dir);
+
+ LU_CONTEXT_KEY_INIT(&seq_thread_key);
+ lu_context_key_register(&seq_thread_key);
+ return 0;
+}
+
+static void __exit fid_mod_exit(void)
+{
+ lu_context_key_degister(&seq_thread_key);
+ if (seq_type_proc_dir != NULL && !IS_ERR(seq_type_proc_dir)) {
+ lprocfs_remove(&seq_type_proc_dir);
+ seq_type_proc_dir = NULL;
+ }
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre FID Module");
+MODULE_LICENSE("GPL");
+
+cfs_module(fid, "0.1.0", fid_mod_init, fid_mod_exit);
diff --git a/drivers/staging/lustre/lustre/fid/fid_internal.h b/drivers/staging/lustre/lustre/fid/fid_internal.h
new file mode 100644
index 000000000000..407a7435583f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fid/fid_internal.h
@@ -0,0 +1,84 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fid/fid_internal.h
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ */
+#ifndef __FID_INTERNAL_H
+#define __FID_INTERNAL_H
+
+#include <lustre/lustre_idl.h>
+#include <dt_object.h>
+
+#include <linux/libcfs/libcfs.h>
+
+struct seq_thread_info {
+ struct req_capsule *sti_pill;
+ struct lu_seq_range sti_space;
+ struct lu_buf sti_buf;
+};
+
+enum {
+ SEQ_TXN_STORE_CREDITS = 20
+};
+
+extern struct lu_context_key seq_thread_key;
+
+int seq_client_alloc_super(struct lu_client_seq *seq,
+ const struct lu_env *env);
+/* Store API functions. */
+int seq_store_init(struct lu_server_seq *seq,
+ const struct lu_env *env,
+ struct dt_device *dt);
+
+void seq_store_fini(struct lu_server_seq *seq,
+ const struct lu_env *env);
+
+int seq_store_read(struct lu_server_seq *seq,
+ const struct lu_env *env);
+
+int seq_store_update(const struct lu_env *env, struct lu_server_seq *seq,
+ struct lu_seq_range *out, int sync);
+
+#ifdef LPROCFS
+extern struct lprocfs_vars seq_server_proc_list[];
+extern struct lprocfs_vars seq_client_proc_list[];
+#endif
+
+
+extern proc_dir_entry_t *seq_type_proc_dir;
+
+#endif /* __FID_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/fid/fid_lib.c b/drivers/staging/lustre/lustre/fid/fid_lib.c
new file mode 100644
index 000000000000..eaff51a555fb
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fid/fid_lib.c
@@ -0,0 +1,97 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fid/fid_lib.c
+ *
+ * Miscellaneous fid functions.
+ *
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ * Author: Yury Umanets <umka@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_FID
+
+# include <linux/libcfs/libcfs.h>
+# include <linux/module.h>
+
+#include <obd.h>
+#include <lu_object.h>
+#include <lustre_fid.h>
+
+/**
+ * A cluster-wide range from which fid-sequences are granted to servers and
+ * then clients.
+ *
+ * Fid namespace:
+ * <pre>
+ * Normal FID: seq:64 [2^33,2^64-1] oid:32 ver:32
+ * IGIF : 0:32, ino:32 gen:32 0:32
+ * IDIF : 0:31, 1:1, ost-index:16, objd:48 0:32
+ * </pre>
+ *
+ * The first 0x400 sequences of normal FID are reserved for special purpose.
+ * FID_SEQ_START + 1 is for local file id generation.
+ * FID_SEQ_START + 2 is for .lustre directory and its objects
+ */
+const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE = {
+ FID_SEQ_NORMAL,
+ (__u64)~0ULL
+};
+EXPORT_SYMBOL(LUSTRE_SEQ_SPACE_RANGE);
+
+/* Zero range, used for init and other purposes. */
+const struct lu_seq_range LUSTRE_SEQ_ZERO_RANGE = {
+ 0,
+ 0
+};
+EXPORT_SYMBOL(LUSTRE_SEQ_ZERO_RANGE);
+
+/* Lustre Big Fs Lock fid. */
+const struct lu_fid LUSTRE_BFL_FID = { .f_seq = FID_SEQ_SPECIAL,
+ .f_oid = FID_OID_SPECIAL_BFL,
+ .f_ver = 0x0000000000000000 };
+EXPORT_SYMBOL(LUSTRE_BFL_FID);
+
+/** Special fid for ".lustre" directory */
+const struct lu_fid LU_DOT_LUSTRE_FID = { .f_seq = FID_SEQ_DOT_LUSTRE,
+ .f_oid = FID_OID_DOT_LUSTRE,
+ .f_ver = 0x0000000000000000 };
+EXPORT_SYMBOL(LU_DOT_LUSTRE_FID);
+
+/** Special fid for "fid" special object in .lustre */
+const struct lu_fid LU_OBF_FID = { .f_seq = FID_SEQ_DOT_LUSTRE,
+ .f_oid = FID_OID_DOT_LUSTRE_OBF,
+ .f_ver = 0x0000000000000000 };
+EXPORT_SYMBOL(LU_OBF_FID);
diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c
new file mode 100644
index 000000000000..fcaaca7e2e01
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fid/fid_request.c
@@ -0,0 +1,522 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fid/fid_request.c
+ *
+ * Lustre Sequence Manager
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_FID
+
+# include <linux/libcfs/libcfs.h>
+# include <linux/module.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <dt_object.h>
+#include <md_object.h>
+#include <obd_support.h>
+#include <lustre_req_layout.h>
+#include <lustre_fid.h>
+/* mdc RPC locks */
+#include <lustre_mdc.h>
+#include "fid_internal.h"
+
+static int seq_client_rpc(struct lu_client_seq *seq,
+ struct lu_seq_range *output, __u32 opc,
+ const char *opcname)
+{
+ struct obd_export *exp = seq->lcs_exp;
+ struct ptlrpc_request *req;
+ struct lu_seq_range *out, *in;
+ __u32 *op;
+ unsigned int debug_mask;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_SEQ_QUERY,
+ LUSTRE_MDS_VERSION, SEQ_QUERY);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ /* Init operation code */
+ op = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_OPC);
+ *op = opc;
+
+ /* Zero out input range, this is not recovery yet. */
+ in = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_RANGE);
+ range_init(in);
+
+ ptlrpc_request_set_replen(req);
+
+ in->lsr_index = seq->lcs_space.lsr_index;
+ if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+ fld_range_set_mdt(in);
+ else
+ fld_range_set_ost(in);
+
+ if (opc == SEQ_ALLOC_SUPER) {
+ req->rq_request_portal = SEQ_CONTROLLER_PORTAL;
+ req->rq_reply_portal = MDC_REPLY_PORTAL;
+ /* During allocating super sequence for data object,
+ * the current thread might hold the export of MDT0(MDT0
+ * precreating objects on this OST), and it will send the
+ * request to MDT0 here, so we can not keep resending the
+ * request here, otherwise if MDT0 is failed(umounted),
+ * it can not release the export of MDT0 */
+ if (seq->lcs_type == LUSTRE_SEQ_DATA)
+ req->rq_no_delay = req->rq_no_resend = 1;
+ debug_mask = D_CONSOLE;
+ } else {
+ if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+ req->rq_request_portal = SEQ_METADATA_PORTAL;
+ else
+ req->rq_request_portal = SEQ_DATA_PORTAL;
+ debug_mask = D_INFO;
+ }
+
+ ptlrpc_at_set_req_timeout(req);
+
+ if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+ mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
+ rc = ptlrpc_queue_wait(req);
+ if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+ mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
+ if (rc)
+ GOTO(out_req, rc);
+
+ out = req_capsule_server_get(&req->rq_pill, &RMF_SEQ_RANGE);
+ *output = *out;
+
+ if (!range_is_sane(output)) {
+ CERROR("%s: Invalid range received from server: "
+ DRANGE"\n", seq->lcs_name, PRANGE(output));
+ GOTO(out_req, rc = -EINVAL);
+ }
+
+ if (range_is_exhausted(output)) {
+ CERROR("%s: Range received from server is exhausted: "
+ DRANGE"]\n", seq->lcs_name, PRANGE(output));
+ GOTO(out_req, rc = -EINVAL);
+ }
+
+ CDEBUG_LIMIT(debug_mask, "%s: Allocated %s-sequence "DRANGE"]\n",
+ seq->lcs_name, opcname, PRANGE(output));
+
+ EXIT;
+out_req:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+/* Request sequence-controller node to allocate new super-sequence. */
+int seq_client_alloc_super(struct lu_client_seq *seq,
+ const struct lu_env *env)
+{
+ int rc;
+ ENTRY;
+
+ mutex_lock(&seq->lcs_mutex);
+
+ if (seq->lcs_srv) {
+ LASSERT(env != NULL);
+ rc = seq_server_alloc_super(seq->lcs_srv, &seq->lcs_space,
+ env);
+ } else {
+ /* Check whether the connection to seq controller has been
+ * setup (lcs_exp != NULL) */
+ if (seq->lcs_exp == NULL) {
+ mutex_unlock(&seq->lcs_mutex);
+ RETURN(-EINPROGRESS);
+ }
+
+ rc = seq_client_rpc(seq, &seq->lcs_space,
+ SEQ_ALLOC_SUPER, "super");
+ }
+ mutex_unlock(&seq->lcs_mutex);
+ RETURN(rc);
+}
+
+/* Request sequence-controller node to allocate new meta-sequence. */
+static int seq_client_alloc_meta(const struct lu_env *env,
+ struct lu_client_seq *seq)
+{
+ int rc;
+ ENTRY;
+
+ if (seq->lcs_srv) {
+ LASSERT(env != NULL);
+ rc = seq_server_alloc_meta(seq->lcs_srv, &seq->lcs_space, env);
+ } else {
+ do {
+ /* If meta server return -EINPROGRESS or EAGAIN,
+ * it means meta server might not be ready to
+ * allocate super sequence from sequence controller
+ * (MDT0)yet */
+ rc = seq_client_rpc(seq, &seq->lcs_space,
+ SEQ_ALLOC_META, "meta");
+ } while (rc == -EINPROGRESS || rc == -EAGAIN);
+ }
+ RETURN(rc);
+}
+
+/* Allocate new sequence for client. */
+static int seq_client_alloc_seq(const struct lu_env *env,
+ struct lu_client_seq *seq, seqno_t *seqnr)
+{
+ int rc;
+ ENTRY;
+
+ LASSERT(range_is_sane(&seq->lcs_space));
+
+ if (range_is_exhausted(&seq->lcs_space)) {
+ rc = seq_client_alloc_meta(env, seq);
+ if (rc) {
+ CERROR("%s: Can't allocate new meta-sequence,"
+ "rc %d\n", seq->lcs_name, rc);
+ RETURN(rc);
+ } else {
+ CDEBUG(D_INFO, "%s: New range - "DRANGE"\n",
+ seq->lcs_name, PRANGE(&seq->lcs_space));
+ }
+ } else {
+ rc = 0;
+ }
+
+ LASSERT(!range_is_exhausted(&seq->lcs_space));
+ *seqnr = seq->lcs_space.lsr_start;
+ seq->lcs_space.lsr_start += 1;
+
+ CDEBUG(D_INFO, "%s: Allocated sequence ["LPX64"]\n", seq->lcs_name,
+ *seqnr);
+
+ RETURN(rc);
+}
+
+static int seq_fid_alloc_prep(struct lu_client_seq *seq,
+ wait_queue_t *link)
+{
+ if (seq->lcs_update) {
+ add_wait_queue(&seq->lcs_waitq, link);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ mutex_unlock(&seq->lcs_mutex);
+
+ waitq_wait(link, TASK_UNINTERRUPTIBLE);
+
+ mutex_lock(&seq->lcs_mutex);
+ remove_wait_queue(&seq->lcs_waitq, link);
+ set_current_state(TASK_RUNNING);
+ return -EAGAIN;
+ }
+ ++seq->lcs_update;
+ mutex_unlock(&seq->lcs_mutex);
+ return 0;
+}
+
+static void seq_fid_alloc_fini(struct lu_client_seq *seq)
+{
+ LASSERT(seq->lcs_update == 1);
+ mutex_lock(&seq->lcs_mutex);
+ --seq->lcs_update;
+ wake_up(&seq->lcs_waitq);
+}
+
+/**
+ * Allocate the whole seq to the caller.
+ **/
+int seq_client_get_seq(const struct lu_env *env,
+ struct lu_client_seq *seq, seqno_t *seqnr)
+{
+ wait_queue_t link;
+ int rc;
+
+ LASSERT(seqnr != NULL);
+ mutex_lock(&seq->lcs_mutex);
+ init_waitqueue_entry_current(&link);
+
+ while (1) {
+ rc = seq_fid_alloc_prep(seq, &link);
+ if (rc == 0)
+ break;
+ }
+
+ rc = seq_client_alloc_seq(env, seq, seqnr);
+ if (rc) {
+ CERROR("%s: Can't allocate new sequence, "
+ "rc %d\n", seq->lcs_name, rc);
+ seq_fid_alloc_fini(seq);
+ mutex_unlock(&seq->lcs_mutex);
+ return rc;
+ }
+
+ CDEBUG(D_INFO, "%s: allocate sequence "
+ "[0x%16.16"LPF64"x]\n", seq->lcs_name, *seqnr);
+
+ /* Since the caller require the whole seq,
+ * so marked this seq to be used */
+ if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+ seq->lcs_fid.f_oid = LUSTRE_METADATA_SEQ_MAX_WIDTH;
+ else
+ seq->lcs_fid.f_oid = LUSTRE_DATA_SEQ_MAX_WIDTH;
+
+ seq->lcs_fid.f_seq = *seqnr;
+ seq->lcs_fid.f_ver = 0;
+ /*
+ * Inform caller that sequence switch is performed to allow it
+ * to setup FLD for it.
+ */
+ seq_fid_alloc_fini(seq);
+ mutex_unlock(&seq->lcs_mutex);
+
+ return rc;
+}
+EXPORT_SYMBOL(seq_client_get_seq);
+
+/* Allocate new fid on passed client @seq and save it to @fid. */
+int seq_client_alloc_fid(const struct lu_env *env,
+ struct lu_client_seq *seq, struct lu_fid *fid)
+{
+ wait_queue_t link;
+ int rc;
+ ENTRY;
+
+ LASSERT(seq != NULL);
+ LASSERT(fid != NULL);
+
+ init_waitqueue_entry_current(&link);
+ mutex_lock(&seq->lcs_mutex);
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST))
+ seq->lcs_fid.f_oid = seq->lcs_width;
+
+ while (1) {
+ seqno_t seqnr;
+
+ if (!fid_is_zero(&seq->lcs_fid) &&
+ fid_oid(&seq->lcs_fid) < seq->lcs_width) {
+ /* Just bump last allocated fid and return to caller. */
+ seq->lcs_fid.f_oid += 1;
+ rc = 0;
+ break;
+ }
+
+ rc = seq_fid_alloc_prep(seq, &link);
+ if (rc)
+ continue;
+
+ rc = seq_client_alloc_seq(env, seq, &seqnr);
+ if (rc) {
+ CERROR("%s: Can't allocate new sequence, "
+ "rc %d\n", seq->lcs_name, rc);
+ seq_fid_alloc_fini(seq);
+ mutex_unlock(&seq->lcs_mutex);
+ RETURN(rc);
+ }
+
+ CDEBUG(D_INFO, "%s: Switch to sequence "
+ "[0x%16.16"LPF64"x]\n", seq->lcs_name, seqnr);
+
+ seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID;
+ seq->lcs_fid.f_seq = seqnr;
+ seq->lcs_fid.f_ver = 0;
+
+ /*
+ * Inform caller that sequence switch is performed to allow it
+ * to setup FLD for it.
+ */
+ rc = 1;
+
+ seq_fid_alloc_fini(seq);
+ break;
+ }
+
+ *fid = seq->lcs_fid;
+ mutex_unlock(&seq->lcs_mutex);
+
+ CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name, PFID(fid));
+ RETURN(rc);
+}
+EXPORT_SYMBOL(seq_client_alloc_fid);
+
+/*
+ * Finish the current sequence due to disconnect.
+ * See mdc_import_event()
+ */
+void seq_client_flush(struct lu_client_seq *seq)
+{
+ wait_queue_t link;
+
+ LASSERT(seq != NULL);
+ init_waitqueue_entry_current(&link);
+ mutex_lock(&seq->lcs_mutex);
+
+ while (seq->lcs_update) {
+ add_wait_queue(&seq->lcs_waitq, &link);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ mutex_unlock(&seq->lcs_mutex);
+
+ waitq_wait(&link, TASK_UNINTERRUPTIBLE);
+
+ mutex_lock(&seq->lcs_mutex);
+ remove_wait_queue(&seq->lcs_waitq, &link);
+ set_current_state(TASK_RUNNING);
+ }
+
+ fid_zero(&seq->lcs_fid);
+ /**
+ * this id shld not be used for seq range allocation.
+ * set to -1 for dgb check.
+ */
+
+ seq->lcs_space.lsr_index = -1;
+
+ range_init(&seq->lcs_space);
+ mutex_unlock(&seq->lcs_mutex);
+}
+EXPORT_SYMBOL(seq_client_flush);
+
+static void seq_client_proc_fini(struct lu_client_seq *seq);
+
+#ifdef LPROCFS
+static int seq_client_proc_init(struct lu_client_seq *seq)
+{
+ int rc;
+ ENTRY;
+
+ seq->lcs_proc_dir = lprocfs_register(seq->lcs_name,
+ seq_type_proc_dir,
+ NULL, NULL);
+
+ if (IS_ERR(seq->lcs_proc_dir)) {
+ CERROR("%s: LProcFS failed in seq-init\n",
+ seq->lcs_name);
+ rc = PTR_ERR(seq->lcs_proc_dir);
+ RETURN(rc);
+ }
+
+ rc = lprocfs_add_vars(seq->lcs_proc_dir,
+ seq_client_proc_list, seq);
+ if (rc) {
+ CERROR("%s: Can't init sequence manager "
+ "proc, rc %d\n", seq->lcs_name, rc);
+ GOTO(out_cleanup, rc);
+ }
+
+ RETURN(0);
+
+out_cleanup:
+ seq_client_proc_fini(seq);
+ return rc;
+}
+
+static void seq_client_proc_fini(struct lu_client_seq *seq)
+{
+ ENTRY;
+ if (seq->lcs_proc_dir) {
+ if (!IS_ERR(seq->lcs_proc_dir))
+ lprocfs_remove(&seq->lcs_proc_dir);
+ seq->lcs_proc_dir = NULL;
+ }
+ EXIT;
+}
+#else
+static int seq_client_proc_init(struct lu_client_seq *seq)
+{
+ return 0;
+}
+
+static void seq_client_proc_fini(struct lu_client_seq *seq)
+{
+ return;
+}
+#endif
+
+int seq_client_init(struct lu_client_seq *seq,
+ struct obd_export *exp,
+ enum lu_cli_type type,
+ const char *prefix,
+ struct lu_server_seq *srv)
+{
+ int rc;
+ ENTRY;
+
+ LASSERT(seq != NULL);
+ LASSERT(prefix != NULL);
+
+ seq->lcs_srv = srv;
+ seq->lcs_type = type;
+
+ mutex_init(&seq->lcs_mutex);
+ if (type == LUSTRE_SEQ_METADATA)
+ seq->lcs_width = LUSTRE_METADATA_SEQ_MAX_WIDTH;
+ else
+ seq->lcs_width = LUSTRE_DATA_SEQ_MAX_WIDTH;
+
+ init_waitqueue_head(&seq->lcs_waitq);
+ /* Make sure that things are clear before work is started. */
+ seq_client_flush(seq);
+
+ if (exp != NULL)
+ seq->lcs_exp = class_export_get(exp);
+ else if (type == LUSTRE_SEQ_METADATA)
+ LASSERT(seq->lcs_srv != NULL);
+
+ snprintf(seq->lcs_name, sizeof(seq->lcs_name),
+ "cli-%s", prefix);
+
+ rc = seq_client_proc_init(seq);
+ if (rc)
+ seq_client_fini(seq);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(seq_client_init);
+
+void seq_client_fini(struct lu_client_seq *seq)
+{
+ ENTRY;
+
+ seq_client_proc_fini(seq);
+
+ if (seq->lcs_exp != NULL) {
+ class_export_put(seq->lcs_exp);
+ seq->lcs_exp = NULL;
+ }
+
+ seq->lcs_srv = NULL;
+ EXIT;
+}
+EXPORT_SYMBOL(seq_client_fini);
diff --git a/drivers/staging/lustre/lustre/fid/fid_store.c b/drivers/staging/lustre/lustre/fid/fid_store.c
new file mode 100644
index 000000000000..a90e6e37d689
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fid/fid_store.c
@@ -0,0 +1,259 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2013, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fid/fid_store.c
+ *
+ * Lustre Sequence Manager
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_FID
+
+# include <linux/libcfs/libcfs.h>
+# include <linux/module.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <dt_object.h>
+#include <md_object.h>
+#include <obd_support.h>
+#include <lustre_req_layout.h>
+#include <lustre_fid.h>
+#include "fid_internal.h"
+
+
+static struct lu_buf *seq_store_buf(struct seq_thread_info *info)
+{
+ struct lu_buf *buf;
+
+ buf = &info->sti_buf;
+ buf->lb_buf = &info->sti_space;
+ buf->lb_len = sizeof(info->sti_space);
+ return buf;
+}
+
+struct seq_update_callback {
+ struct dt_txn_commit_cb suc_cb;
+ struct lu_server_seq *suc_seq;
+};
+
+void seq_update_cb(struct lu_env *env, struct thandle *th,
+ struct dt_txn_commit_cb *cb, int err)
+{
+ struct seq_update_callback *ccb;
+
+ ccb = container_of0(cb, struct seq_update_callback, suc_cb);
+
+ LASSERT(ccb->suc_seq != NULL);
+
+ ccb->suc_seq->lss_need_sync = 0;
+ OBD_FREE_PTR(ccb);
+}
+
+int seq_update_cb_add(struct thandle *th, struct lu_server_seq *seq)
+{
+ struct seq_update_callback *ccb;
+ struct dt_txn_commit_cb *dcb;
+ int rc;
+
+ OBD_ALLOC_PTR(ccb);
+ if (ccb == NULL)
+ return -ENOMEM;
+
+ ccb->suc_seq = seq;
+ seq->lss_need_sync = 1;
+
+ dcb = &ccb->suc_cb;
+ dcb->dcb_func = seq_update_cb;
+ INIT_LIST_HEAD(&dcb->dcb_linkage);
+ strncpy(dcb->dcb_name, "seq_update_cb", MAX_COMMIT_CB_STR_LEN);
+ dcb->dcb_name[MAX_COMMIT_CB_STR_LEN - 1] = '\0';
+
+ rc = dt_trans_cb_add(th, dcb);
+ if (rc)
+ OBD_FREE_PTR(ccb);
+ return rc;
+}
+
+/* This function implies that caller takes care about locking. */
+int seq_store_update(const struct lu_env *env, struct lu_server_seq *seq,
+ struct lu_seq_range *out, int sync)
+{
+ struct dt_device *dt_dev = lu2dt_dev(seq->lss_obj->do_lu.lo_dev);
+ struct seq_thread_info *info;
+ struct thandle *th;
+ loff_t pos = 0;
+ int rc;
+
+ info = lu_context_key_get(&env->le_ctx, &seq_thread_key);
+ LASSERT(info != NULL);
+
+ th = dt_trans_create(env, dt_dev);
+ if (IS_ERR(th))
+ RETURN(PTR_ERR(th));
+
+ rc = dt_declare_record_write(env, seq->lss_obj,
+ sizeof(struct lu_seq_range), 0, th);
+ if (rc)
+ GOTO(exit, rc);
+
+ if (out != NULL) {
+ rc = fld_declare_server_create(env,
+ seq->lss_site->ss_server_fld,
+ out, th);
+ if (rc)
+ GOTO(exit, rc);
+ }
+
+ rc = dt_trans_start_local(env, dt_dev, th);
+ if (rc)
+ GOTO(exit, rc);
+
+ /* Store ranges in le format. */
+ range_cpu_to_le(&info->sti_space, &seq->lss_space);
+
+ rc = dt_record_write(env, seq->lss_obj, seq_store_buf(info), &pos, th);
+ if (rc) {
+ CERROR("%s: Can't write space data, rc %d\n",
+ seq->lss_name, rc);
+ GOTO(exit, rc);
+ } else if (out != NULL) {
+ rc = fld_server_create(env, seq->lss_site->ss_server_fld, out,
+ th);
+ if (rc) {
+ CERROR("%s: Can't Update fld database, rc %d\n",
+ seq->lss_name, rc);
+ GOTO(exit, rc);
+ }
+ }
+ /* next sequence update will need sync until this update is committed
+ * in case of sync operation this is not needed obviously */
+ if (!sync)
+ /* if callback can't be added then sync always */
+ sync = !!seq_update_cb_add(th, seq);
+
+ th->th_sync |= sync;
+exit:
+ dt_trans_stop(env, dt_dev, th);
+ return rc;
+}
+
+/*
+ * This function implies that caller takes care about locking or locking is not
+ * needed (init time).
+ */
+int seq_store_read(struct lu_server_seq *seq,
+ const struct lu_env *env)
+{
+ struct seq_thread_info *info;
+ loff_t pos = 0;
+ int rc;
+ ENTRY;
+
+ info = lu_context_key_get(&env->le_ctx, &seq_thread_key);
+ LASSERT(info != NULL);
+
+ rc = seq->lss_obj->do_body_ops->dbo_read(env, seq->lss_obj,
+ seq_store_buf(info),
+ &pos, BYPASS_CAPA);
+
+ if (rc == sizeof(info->sti_space)) {
+ range_le_to_cpu(&seq->lss_space, &info->sti_space);
+ CDEBUG(D_INFO, "%s: Space - "DRANGE"\n",
+ seq->lss_name, PRANGE(&seq->lss_space));
+ rc = 0;
+ } else if (rc == 0) {
+ rc = -ENODATA;
+ } else if (rc > 0) {
+ CERROR("%s: Read only %d bytes of %d\n", seq->lss_name,
+ rc, (int)sizeof(info->sti_space));
+ rc = -EIO;
+ }
+
+ RETURN(rc);
+}
+
+int seq_store_init(struct lu_server_seq *seq,
+ const struct lu_env *env,
+ struct dt_device *dt)
+{
+ struct dt_object *dt_obj;
+ struct lu_fid fid;
+ struct lu_attr attr;
+ struct dt_object_format dof;
+ const char *name;
+ int rc;
+ ENTRY;
+
+ name = seq->lss_type == LUSTRE_SEQ_SERVER ?
+ LUSTRE_SEQ_SRV_NAME : LUSTRE_SEQ_CTL_NAME;
+
+ if (seq->lss_type == LUSTRE_SEQ_SERVER)
+ lu_local_obj_fid(&fid, FID_SEQ_SRV_OID);
+ else
+ lu_local_obj_fid(&fid, FID_SEQ_CTL_OID);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.la_valid = LA_MODE;
+ attr.la_mode = S_IFREG | 0666;
+ dof.dof_type = DFT_REGULAR;
+
+ dt_obj = dt_find_or_create(env, dt, &fid, &dof, &attr);
+ if (!IS_ERR(dt_obj)) {
+ seq->lss_obj = dt_obj;
+ rc = 0;
+ } else {
+ CERROR("%s: Can't find \"%s\" obj %d\n",
+ seq->lss_name, name, (int)PTR_ERR(dt_obj));
+ rc = PTR_ERR(dt_obj);
+ }
+
+ RETURN(rc);
+}
+
+void seq_store_fini(struct lu_server_seq *seq,
+ const struct lu_env *env)
+{
+ ENTRY;
+
+ if (seq->lss_obj != NULL) {
+ if (!IS_ERR(seq->lss_obj))
+ lu_object_put(env, &seq->lss_obj->do_lu);
+ seq->lss_obj = NULL;
+ }
+
+ EXIT;
+}
diff --git a/drivers/staging/lustre/lustre/fid/lproc_fid.c b/drivers/staging/lustre/lustre/fid/lproc_fid.c
new file mode 100644
index 000000000000..af817a867f8b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fid/lproc_fid.c
@@ -0,0 +1,222 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fid/lproc_fid.c
+ *
+ * Lustre Sequence Manager
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_FID
+
+# include <linux/libcfs/libcfs.h>
+# include <linux/module.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <dt_object.h>
+#include <md_object.h>
+#include <obd_support.h>
+#include <lustre_req_layout.h>
+#include <lustre_fid.h>
+#include "fid_internal.h"
+
+#ifdef LPROCFS
+/*
+ * Note: this function is only used for testing, it is no safe for production
+ * use.
+ */
+static int
+lprocfs_fid_write_common(const char *buffer, unsigned long count,
+ struct lu_seq_range *range)
+{
+ struct lu_seq_range tmp;
+ int rc;
+ ENTRY;
+
+ LASSERT(range != NULL);
+
+ rc = sscanf(buffer, "[%llx - %llx]\n",
+ (long long unsigned *)&tmp.lsr_start,
+ (long long unsigned *)&tmp.lsr_end);
+ if (rc != 2 || !range_is_sane(&tmp) || range_is_zero(&tmp))
+ RETURN(-EINVAL);
+ *range = tmp;
+ RETURN(0);
+}
+
+/* Client side procfs stuff */
+static ssize_t
+lprocfs_fid_space_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct lu_client_seq *seq = ((struct seq_file *)file->private_data)->private;
+ int rc;
+ ENTRY;
+
+ LASSERT(seq != NULL);
+
+ mutex_lock(&seq->lcs_mutex);
+ rc = lprocfs_fid_write_common(buffer, count, &seq->lcs_space);
+
+ if (rc == 0) {
+ CDEBUG(D_INFO, "%s: Space: "DRANGE"\n",
+ seq->lcs_name, PRANGE(&seq->lcs_space));
+ }
+
+ mutex_unlock(&seq->lcs_mutex);
+
+ RETURN(count);
+}
+
+static int
+lprocfs_fid_space_seq_show(struct seq_file *m, void *unused)
+{
+ struct lu_client_seq *seq = (struct lu_client_seq *)m->private;
+ int rc;
+ ENTRY;
+
+ LASSERT(seq != NULL);
+
+ mutex_lock(&seq->lcs_mutex);
+ rc = seq_printf(m, "["LPX64" - "LPX64"]:%x:%s\n", PRANGE(&seq->lcs_space));
+ mutex_unlock(&seq->lcs_mutex);
+
+ RETURN(rc);
+}
+
+static ssize_t
+lprocfs_fid_width_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct lu_client_seq *seq = ((struct seq_file *)file->private_data)->private;
+ __u64 max;
+ int rc, val;
+ ENTRY;
+
+ LASSERT(seq != NULL);
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ RETURN(rc);
+
+ mutex_lock(&seq->lcs_mutex);
+ if (seq->lcs_type == LUSTRE_SEQ_DATA)
+ max = LUSTRE_DATA_SEQ_MAX_WIDTH;
+ else
+ max = LUSTRE_METADATA_SEQ_MAX_WIDTH;
+
+ if (val <= max && val > 0) {
+ seq->lcs_width = val;
+
+ if (rc == 0) {
+ CDEBUG(D_INFO, "%s: Sequence size: "LPU64"\n",
+ seq->lcs_name, seq->lcs_width);
+ }
+ }
+
+ mutex_unlock(&seq->lcs_mutex);
+
+ RETURN(count);
+}
+
+static int
+lprocfs_fid_width_seq_show(struct seq_file *m, void *unused)
+{
+ struct lu_client_seq *seq = (struct lu_client_seq *)m->private;
+ int rc;
+ ENTRY;
+
+ LASSERT(seq != NULL);
+
+ mutex_lock(&seq->lcs_mutex);
+ rc = seq_printf(m, LPU64"\n", seq->lcs_width);
+ mutex_unlock(&seq->lcs_mutex);
+
+ RETURN(rc);
+}
+
+static int
+lprocfs_fid_fid_seq_show(struct seq_file *m, void *unused)
+{
+ struct lu_client_seq *seq = (struct lu_client_seq *)m->private;
+ int rc;
+ ENTRY;
+
+ LASSERT(seq != NULL);
+
+ mutex_lock(&seq->lcs_mutex);
+ rc = seq_printf(m, DFID"\n", PFID(&seq->lcs_fid));
+ mutex_unlock(&seq->lcs_mutex);
+
+ RETURN(rc);
+}
+
+static int
+lprocfs_fid_server_seq_show(struct seq_file *m, void *unused)
+{
+ struct lu_client_seq *seq = (struct lu_client_seq *)m->private;
+ struct client_obd *cli;
+ int rc;
+ ENTRY;
+
+ LASSERT(seq != NULL);
+
+ if (seq->lcs_exp != NULL) {
+ cli = &seq->lcs_exp->exp_obd->u.cli;
+ rc = seq_printf(m, "%s\n", cli->cl_target_uuid.uuid);
+ } else {
+ rc = seq_printf(m, "%s\n", seq->lcs_srv->lss_name);
+ }
+ RETURN(rc);
+}
+
+struct lprocfs_vars seq_server_proc_list[] = {
+};
+
+LPROC_SEQ_FOPS(lprocfs_fid_space);
+LPROC_SEQ_FOPS(lprocfs_fid_width);
+LPROC_SEQ_FOPS_RO(lprocfs_fid_server);
+LPROC_SEQ_FOPS_RO(lprocfs_fid_fid);
+
+struct lprocfs_vars seq_client_proc_list[] = {
+ { "space", &lprocfs_fid_space_fops },
+ { "width", &lprocfs_fid_width_fops },
+ { "server", &lprocfs_fid_server_fops },
+ { "fid", &lprocfs_fid_fid_fops },
+ { NULL }
+};
+#endif
diff --git a/drivers/staging/lustre/lustre/fld/Makefile b/drivers/staging/lustre/lustre/fld/Makefile
new file mode 100644
index 000000000000..e7f2881a1d9e
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fld/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_LUSTRE_FS) += fld.o
+fld-y := fld_handler.o fld_request.o fld_cache.o fld_index.o lproc_fld.o
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c
new file mode 100644
index 000000000000..347f2ae83bc8
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fld/fld_cache.c
@@ -0,0 +1,566 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, 2013, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fld/fld_cache.c
+ *
+ * FLD (Fids Location Database)
+ *
+ * Author: Pravin Shelar <pravin.shelar@sun.com>
+ * Author: Yury Umanets <umka@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_FLD
+
+# include <linux/libcfs/libcfs.h>
+# include <linux/module.h>
+# include <linux/jbd.h>
+# include <asm/div64.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_ver.h>
+#include <obd_support.h>
+#include <lprocfs_status.h>
+
+#include <dt_object.h>
+#include <md_object.h>
+#include <lustre_req_layout.h>
+#include <lustre_fld.h>
+#include "fld_internal.h"
+
+/**
+ * create fld cache.
+ */
+struct fld_cache *fld_cache_init(const char *name,
+ int cache_size, int cache_threshold)
+{
+ struct fld_cache *cache;
+ ENTRY;
+
+ LASSERT(name != NULL);
+ LASSERT(cache_threshold < cache_size);
+
+ OBD_ALLOC_PTR(cache);
+ if (cache == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ INIT_LIST_HEAD(&cache->fci_entries_head);
+ INIT_LIST_HEAD(&cache->fci_lru);
+
+ cache->fci_cache_count = 0;
+ rwlock_init(&cache->fci_lock);
+
+ strlcpy(cache->fci_name, name,
+ sizeof(cache->fci_name));
+
+ cache->fci_cache_size = cache_size;
+ cache->fci_threshold = cache_threshold;
+
+ /* Init fld cache info. */
+ memset(&cache->fci_stat, 0, sizeof(cache->fci_stat));
+
+ CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n",
+ cache->fci_name, cache_size, cache_threshold);
+
+ RETURN(cache);
+}
+
+/**
+ * destroy fld cache.
+ */
+void fld_cache_fini(struct fld_cache *cache)
+{
+ __u64 pct;
+ ENTRY;
+
+ LASSERT(cache != NULL);
+ fld_cache_flush(cache);
+
+ if (cache->fci_stat.fst_count > 0) {
+ pct = cache->fci_stat.fst_cache * 100;
+ do_div(pct, cache->fci_stat.fst_count);
+ } else {
+ pct = 0;
+ }
+
+ CDEBUG(D_INFO, "FLD cache statistics (%s):\n", cache->fci_name);
+ CDEBUG(D_INFO, " Total reqs: "LPU64"\n", cache->fci_stat.fst_count);
+ CDEBUG(D_INFO, " Cache reqs: "LPU64"\n", cache->fci_stat.fst_cache);
+ CDEBUG(D_INFO, " Cache hits: "LPU64"%%\n", pct);
+
+ OBD_FREE_PTR(cache);
+
+ EXIT;
+}
+
+/**
+ * delete given node from list.
+ */
+void fld_cache_entry_delete(struct fld_cache *cache,
+ struct fld_cache_entry *node)
+{
+ list_del(&node->fce_list);
+ list_del(&node->fce_lru);
+ cache->fci_cache_count--;
+ OBD_FREE_PTR(node);
+}
+
+/**
+ * fix list by checking new entry with NEXT entry in order.
+ */
+static void fld_fix_new_list(struct fld_cache *cache)
+{
+ struct fld_cache_entry *f_curr;
+ struct fld_cache_entry *f_next;
+ struct lu_seq_range *c_range;
+ struct lu_seq_range *n_range;
+ struct list_head *head = &cache->fci_entries_head;
+ ENTRY;
+
+restart_fixup:
+
+ list_for_each_entry_safe(f_curr, f_next, head, fce_list) {
+ c_range = &f_curr->fce_range;
+ n_range = &f_next->fce_range;
+
+ LASSERT(range_is_sane(c_range));
+ if (&f_next->fce_list == head)
+ break;
+
+ if (c_range->lsr_flags != n_range->lsr_flags)
+ continue;
+
+ LASSERTF(c_range->lsr_start <= n_range->lsr_start,
+ "cur lsr_start "DRANGE" next lsr_start "DRANGE"\n",
+ PRANGE(c_range), PRANGE(n_range));
+
+ /* check merge possibility with next range */
+ if (c_range->lsr_end == n_range->lsr_start) {
+ if (c_range->lsr_index != n_range->lsr_index)
+ continue;
+ n_range->lsr_start = c_range->lsr_start;
+ fld_cache_entry_delete(cache, f_curr);
+ continue;
+ }
+
+ /* check if current range overlaps with next range. */
+ if (n_range->lsr_start < c_range->lsr_end) {
+ if (c_range->lsr_index == n_range->lsr_index) {
+ n_range->lsr_start = c_range->lsr_start;
+ n_range->lsr_end = max(c_range->lsr_end,
+ n_range->lsr_end);
+ fld_cache_entry_delete(cache, f_curr);
+ } else {
+ if (n_range->lsr_end <= c_range->lsr_end) {
+ *n_range = *c_range;
+ fld_cache_entry_delete(cache, f_curr);
+ } else
+ n_range->lsr_start = c_range->lsr_end;
+ }
+
+ /* we could have overlap over next
+ * range too. better restart. */
+ goto restart_fixup;
+ }
+
+ /* kill duplicates */
+ if (c_range->lsr_start == n_range->lsr_start &&
+ c_range->lsr_end == n_range->lsr_end)
+ fld_cache_entry_delete(cache, f_curr);
+ }
+
+ EXIT;
+}
+
+/**
+ * add node to fld cache
+ */
+static inline void fld_cache_entry_add(struct fld_cache *cache,
+ struct fld_cache_entry *f_new,
+ struct list_head *pos)
+{
+ list_add(&f_new->fce_list, pos);
+ list_add(&f_new->fce_lru, &cache->fci_lru);
+
+ cache->fci_cache_count++;
+ fld_fix_new_list(cache);
+}
+
+/**
+ * Check if cache needs to be shrunk. If so - do it.
+ * Remove one entry in list and so on until cache is shrunk enough.
+ */
+static int fld_cache_shrink(struct fld_cache *cache)
+{
+ struct fld_cache_entry *flde;
+ struct list_head *curr;
+ int num = 0;
+ ENTRY;
+
+ LASSERT(cache != NULL);
+
+ if (cache->fci_cache_count < cache->fci_cache_size)
+ RETURN(0);
+
+ curr = cache->fci_lru.prev;
+
+ while (cache->fci_cache_count + cache->fci_threshold >
+ cache->fci_cache_size && curr != &cache->fci_lru) {
+
+ flde = list_entry(curr, struct fld_cache_entry, fce_lru);
+ curr = curr->prev;
+ fld_cache_entry_delete(cache, flde);
+ num++;
+ }
+
+ CDEBUG(D_INFO, "%s: FLD cache - Shrunk by "
+ "%d entries\n", cache->fci_name, num);
+
+ RETURN(0);
+}
+
+/**
+ * kill all fld cache entries.
+ */
+void fld_cache_flush(struct fld_cache *cache)
+{
+ ENTRY;
+
+ write_lock(&cache->fci_lock);
+ cache->fci_cache_size = 0;
+ fld_cache_shrink(cache);
+ write_unlock(&cache->fci_lock);
+
+ EXIT;
+}
+
+/**
+ * punch hole in existing range. divide this range and add new
+ * entry accordingly.
+ */
+
+void fld_cache_punch_hole(struct fld_cache *cache,
+ struct fld_cache_entry *f_curr,
+ struct fld_cache_entry *f_new)
+{
+ const struct lu_seq_range *range = &f_new->fce_range;
+ const seqno_t new_start = range->lsr_start;
+ const seqno_t new_end = range->lsr_end;
+ struct fld_cache_entry *fldt;
+
+ ENTRY;
+ OBD_ALLOC_GFP(fldt, sizeof *fldt, GFP_ATOMIC);
+ if (!fldt) {
+ OBD_FREE_PTR(f_new);
+ EXIT;
+ /* overlap is not allowed, so dont mess up list. */
+ return;
+ }
+ /* break f_curr RANGE into three RANGES:
+ * f_curr, f_new , fldt
+ */
+
+ /* f_new = *range */
+
+ /* fldt */
+ fldt->fce_range.lsr_start = new_end;
+ fldt->fce_range.lsr_end = f_curr->fce_range.lsr_end;
+ fldt->fce_range.lsr_index = f_curr->fce_range.lsr_index;
+
+ /* f_curr */
+ f_curr->fce_range.lsr_end = new_start;
+
+ /* add these two entries to list */
+ fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
+ fld_cache_entry_add(cache, fldt, &f_new->fce_list);
+
+ /* no need to fixup */
+ EXIT;
+}
+
+/**
+ * handle range overlap in fld cache.
+ */
+static void fld_cache_overlap_handle(struct fld_cache *cache,
+ struct fld_cache_entry *f_curr,
+ struct fld_cache_entry *f_new)
+{
+ const struct lu_seq_range *range = &f_new->fce_range;
+ const seqno_t new_start = range->lsr_start;
+ const seqno_t new_end = range->lsr_end;
+ const mdsno_t mdt = range->lsr_index;
+
+ /* this is overlap case, these case are checking overlapping with
+ * prev range only. fixup will handle overlaping with next range. */
+
+ if (f_curr->fce_range.lsr_index == mdt) {
+ f_curr->fce_range.lsr_start = min(f_curr->fce_range.lsr_start,
+ new_start);
+
+ f_curr->fce_range.lsr_end = max(f_curr->fce_range.lsr_end,
+ new_end);
+
+ OBD_FREE_PTR(f_new);
+ fld_fix_new_list(cache);
+
+ } else if (new_start <= f_curr->fce_range.lsr_start &&
+ f_curr->fce_range.lsr_end <= new_end) {
+ /* case 1: new range completely overshadowed existing range.
+ * e.g. whole range migrated. update fld cache entry */
+
+ f_curr->fce_range = *range;
+ OBD_FREE_PTR(f_new);
+ fld_fix_new_list(cache);
+
+ } else if (f_curr->fce_range.lsr_start < new_start &&
+ new_end < f_curr->fce_range.lsr_end) {
+ /* case 2: new range fit within existing range. */
+
+ fld_cache_punch_hole(cache, f_curr, f_new);
+
+ } else if (new_end <= f_curr->fce_range.lsr_end) {
+ /* case 3: overlap:
+ * [new_start [c_start new_end) c_end)
+ */
+
+ LASSERT(new_start <= f_curr->fce_range.lsr_start);
+
+ f_curr->fce_range.lsr_start = new_end;
+ fld_cache_entry_add(cache, f_new, f_curr->fce_list.prev);
+
+ } else if (f_curr->fce_range.lsr_start <= new_start) {
+ /* case 4: overlap:
+ * [c_start [new_start c_end) new_end)
+ */
+
+ LASSERT(f_curr->fce_range.lsr_end <= new_end);
+
+ f_curr->fce_range.lsr_end = new_start;
+ fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
+ } else
+ CERROR("NEW range ="DRANGE" curr = "DRANGE"\n",
+ PRANGE(range),PRANGE(&f_curr->fce_range));
+}
+
+struct fld_cache_entry
+*fld_cache_entry_create(const struct lu_seq_range *range)
+{
+ struct fld_cache_entry *f_new;
+
+ LASSERT(range_is_sane(range));
+
+ OBD_ALLOC_PTR(f_new);
+ if (!f_new)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ f_new->fce_range = *range;
+ RETURN(f_new);
+}
+
+/**
+ * Insert FLD entry in FLD cache.
+ *
+ * This function handles all cases of merging and breaking up of
+ * ranges.
+ */
+int fld_cache_insert_nolock(struct fld_cache *cache,
+ struct fld_cache_entry *f_new)
+{
+ struct fld_cache_entry *f_curr;
+ struct fld_cache_entry *n;
+ struct list_head *head;
+ struct list_head *prev = NULL;
+ const seqno_t new_start = f_new->fce_range.lsr_start;
+ const seqno_t new_end = f_new->fce_range.lsr_end;
+ __u32 new_flags = f_new->fce_range.lsr_flags;
+ ENTRY;
+
+ /*
+ * Duplicate entries are eliminated in insert op.
+ * So we don't need to search new entry before starting
+ * insertion loop.
+ */
+
+ if (!cache->fci_no_shrink)
+ fld_cache_shrink(cache);
+
+ head = &cache->fci_entries_head;
+
+ list_for_each_entry_safe(f_curr, n, head, fce_list) {
+ /* add list if next is end of list */
+ if (new_end < f_curr->fce_range.lsr_start ||
+ (new_end == f_curr->fce_range.lsr_start &&
+ new_flags != f_curr->fce_range.lsr_flags))
+ break;
+
+ prev = &f_curr->fce_list;
+ /* check if this range is to left of new range. */
+ if (new_start < f_curr->fce_range.lsr_end &&
+ new_flags == f_curr->fce_range.lsr_flags) {
+ fld_cache_overlap_handle(cache, f_curr, f_new);
+ goto out;
+ }
+ }
+
+ if (prev == NULL)
+ prev = head;
+
+ CDEBUG(D_INFO, "insert range "DRANGE"\n", PRANGE(&f_new->fce_range));
+ /* Add new entry to cache and lru list. */
+ fld_cache_entry_add(cache, f_new, prev);
+out:
+ RETURN(0);
+}
+
+int fld_cache_insert(struct fld_cache *cache,
+ const struct lu_seq_range *range)
+{
+ struct fld_cache_entry *flde;
+ int rc;
+
+ flde = fld_cache_entry_create(range);
+ if (IS_ERR(flde))
+ RETURN(PTR_ERR(flde));
+
+ write_lock(&cache->fci_lock);
+ rc = fld_cache_insert_nolock(cache, flde);
+ write_unlock(&cache->fci_lock);
+ if (rc)
+ OBD_FREE_PTR(flde);
+
+ RETURN(rc);
+}
+
+void fld_cache_delete_nolock(struct fld_cache *cache,
+ const struct lu_seq_range *range)
+{
+ struct fld_cache_entry *flde;
+ struct fld_cache_entry *tmp;
+ struct list_head *head;
+
+ head = &cache->fci_entries_head;
+ list_for_each_entry_safe(flde, tmp, head, fce_list) {
+ /* add list if next is end of list */
+ if (range->lsr_start == flde->fce_range.lsr_start ||
+ (range->lsr_end == flde->fce_range.lsr_end &&
+ range->lsr_flags == flde->fce_range.lsr_flags)) {
+ fld_cache_entry_delete(cache, flde);
+ break;
+ }
+ }
+}
+
+/**
+ * Delete FLD entry in FLD cache.
+ *
+ */
+void fld_cache_delete(struct fld_cache *cache,
+ const struct lu_seq_range *range)
+{
+ write_lock(&cache->fci_lock);
+ fld_cache_delete_nolock(cache, range);
+ write_unlock(&cache->fci_lock);
+}
+
+struct fld_cache_entry
+*fld_cache_entry_lookup_nolock(struct fld_cache *cache,
+ struct lu_seq_range *range)
+{
+ struct fld_cache_entry *flde;
+ struct fld_cache_entry *got = NULL;
+ struct list_head *head;
+
+ head = &cache->fci_entries_head;
+ list_for_each_entry(flde, head, fce_list) {
+ if (range->lsr_start == flde->fce_range.lsr_start ||
+ (range->lsr_end == flde->fce_range.lsr_end &&
+ range->lsr_flags == flde->fce_range.lsr_flags)) {
+ got = flde;
+ break;
+ }
+ }
+
+ RETURN(got);
+}
+
+/**
+ * lookup \a seq sequence for range in fld cache.
+ */
+struct fld_cache_entry
+*fld_cache_entry_lookup(struct fld_cache *cache, struct lu_seq_range *range)
+{
+ struct fld_cache_entry *got = NULL;
+ ENTRY;
+
+ read_lock(&cache->fci_lock);
+ got = fld_cache_entry_lookup_nolock(cache, range);
+ read_unlock(&cache->fci_lock);
+ RETURN(got);
+}
+
+/**
+ * lookup \a seq sequence for range in fld cache.
+ */
+int fld_cache_lookup(struct fld_cache *cache,
+ const seqno_t seq, struct lu_seq_range *range)
+{
+ struct fld_cache_entry *flde;
+ struct fld_cache_entry *prev = NULL;
+ struct list_head *head;
+ ENTRY;
+
+ read_lock(&cache->fci_lock);
+ head = &cache->fci_entries_head;
+
+ cache->fci_stat.fst_count++;
+ list_for_each_entry(flde, head, fce_list) {
+ if (flde->fce_range.lsr_start > seq) {
+ if (prev != NULL)
+ *range = prev->fce_range;
+ break;
+ }
+
+ prev = flde;
+ if (range_within(&flde->fce_range, seq)) {
+ *range = flde->fce_range;
+
+ cache->fci_stat.fst_cache++;
+ read_unlock(&cache->fci_lock);
+ RETURN(0);
+ }
+ }
+ read_unlock(&cache->fci_lock);
+ RETURN(-ENOENT);
+}
diff --git a/drivers/staging/lustre/lustre/fld/fld_handler.c b/drivers/staging/lustre/lustre/fld/fld_handler.c
new file mode 100644
index 000000000000..d2707ae4ad57
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fld/fld_handler.c
@@ -0,0 +1,447 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2013, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fld/fld_handler.c
+ *
+ * FLD (Fids Location Database)
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ * Author: WangDi <wangdi@clusterfs.com>
+ * Author: Pravin Shelar <pravin.shelar@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_FLD
+
+# include <linux/libcfs/libcfs.h>
+# include <linux/module.h>
+# include <linux/jbd.h>
+# include <asm/div64.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_ver.h>
+#include <obd_support.h>
+#include <lprocfs_status.h>
+
+#include <md_object.h>
+#include <lustre_fid.h>
+#include <lustre_req_layout.h>
+#include "fld_internal.h"
+#include <lustre_fid.h>
+
+
+/* context key constructor/destructor: fld_key_init, fld_key_fini */
+LU_KEY_INIT_FINI(fld, struct fld_thread_info);
+
+/* context key: fld_thread_key */
+LU_CONTEXT_KEY_DEFINE(fld, LCT_MD_THREAD | LCT_DT_THREAD | LCT_MG_THREAD);
+
+proc_dir_entry_t *fld_type_proc_dir = NULL;
+
+static int __init fld_mod_init(void)
+{
+ fld_type_proc_dir = lprocfs_register(LUSTRE_FLD_NAME,
+ proc_lustre_root,
+ NULL, NULL);
+ if (IS_ERR(fld_type_proc_dir))
+ return PTR_ERR(fld_type_proc_dir);
+
+ LU_CONTEXT_KEY_INIT(&fld_thread_key);
+ lu_context_key_register(&fld_thread_key);
+ return 0;
+}
+
+static void __exit fld_mod_exit(void)
+{
+ lu_context_key_degister(&fld_thread_key);
+ if (fld_type_proc_dir != NULL && !IS_ERR(fld_type_proc_dir)) {
+ lprocfs_remove(&fld_type_proc_dir);
+ fld_type_proc_dir = NULL;
+ }
+}
+
+int fld_declare_server_create(const struct lu_env *env,
+ struct lu_server_fld *fld,
+ struct lu_seq_range *range,
+ struct thandle *th)
+{
+ int rc;
+
+ rc = fld_declare_index_create(env, fld, range, th);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(fld_declare_server_create);
+
+/**
+ * Insert FLD index entry and update FLD cache.
+ *
+ * This function is called from the sequence allocator when a super-sequence
+ * is granted to a server.
+ */
+int fld_server_create(const struct lu_env *env, struct lu_server_fld *fld,
+ struct lu_seq_range *range, struct thandle *th)
+{
+ int rc;
+
+ mutex_lock(&fld->lsf_lock);
+ rc = fld_index_create(env, fld, range, th);
+ mutex_unlock(&fld->lsf_lock);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(fld_server_create);
+
+/**
+ * Lookup mds by seq, returns a range for given seq.
+ *
+ * If that entry is not cached in fld cache, request is sent to super
+ * sequence controller node (MDT0). All other MDT[1...N] and client
+ * cache fld entries, but this cache is not persistent.
+ */
+int fld_server_lookup(const struct lu_env *env, struct lu_server_fld *fld,
+ seqno_t seq, struct lu_seq_range *range)
+{
+ struct lu_seq_range *erange;
+ struct fld_thread_info *info;
+ int rc;
+ ENTRY;
+
+ info = lu_context_key_get(&env->le_ctx, &fld_thread_key);
+ LASSERT(info != NULL);
+ erange = &info->fti_lrange;
+
+ /* Lookup it in the cache. */
+ rc = fld_cache_lookup(fld->lsf_cache, seq, erange);
+ if (rc == 0) {
+ if (unlikely(fld_range_type(erange) != fld_range_type(range) &&
+ !fld_range_is_any(range))) {
+ CERROR("%s: FLD cache range "DRANGE" does not match"
+ "requested flag %x: rc = %d\n", fld->lsf_name,
+ PRANGE(erange), range->lsr_flags, -EIO);
+ RETURN(-EIO);
+ }
+ *range = *erange;
+ RETURN(0);
+ }
+
+ if (fld->lsf_obj) {
+ /* On server side, all entries should be in cache.
+ * If we can not find it in cache, just return error */
+ CERROR("%s: Cannot find sequence "LPX64": rc = %d\n",
+ fld->lsf_name, seq, -EIO);
+ RETURN(-EIO);
+ } else {
+ LASSERT(fld->lsf_control_exp);
+ /* send request to mdt0 i.e. super seq. controller.
+ * This is temporary solution, long term solution is fld
+ * replication on all mdt servers.
+ */
+ range->lsr_start = seq;
+ rc = fld_client_rpc(fld->lsf_control_exp,
+ range, FLD_LOOKUP);
+ if (rc == 0)
+ fld_cache_insert(fld->lsf_cache, range);
+ }
+ RETURN(rc);
+}
+EXPORT_SYMBOL(fld_server_lookup);
+
+/**
+ * All MDT server handle fld lookup operation. But only MDT0 has fld index.
+ * if entry is not found in cache we need to forward lookup request to MDT0
+ */
+
+static int fld_server_handle(struct lu_server_fld *fld,
+ const struct lu_env *env,
+ __u32 opc, struct lu_seq_range *range,
+ struct fld_thread_info *info)
+{
+ int rc;
+ ENTRY;
+
+ switch (opc) {
+ case FLD_LOOKUP:
+ rc = fld_server_lookup(env, fld, range->lsr_start, range);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+
+ CDEBUG(D_INFO, "%s: FLD req handle: error %d (opc: %d, range: "
+ DRANGE"\n", fld->lsf_name, rc, opc, PRANGE(range));
+
+ RETURN(rc);
+
+}
+
+static int fld_req_handle(struct ptlrpc_request *req,
+ struct fld_thread_info *info)
+{
+ struct obd_export *exp = req->rq_export;
+ struct lu_site *site = exp->exp_obd->obd_lu_dev->ld_site;
+ struct lu_seq_range *in;
+ struct lu_seq_range *out;
+ int rc;
+ __u32 *opc;
+ ENTRY;
+
+ rc = req_capsule_server_pack(info->fti_pill);
+ if (rc)
+ RETURN(err_serious(rc));
+
+ opc = req_capsule_client_get(info->fti_pill, &RMF_FLD_OPC);
+ if (opc != NULL) {
+ in = req_capsule_client_get(info->fti_pill, &RMF_FLD_MDFLD);
+ if (in == NULL)
+ RETURN(err_serious(-EPROTO));
+ out = req_capsule_server_get(info->fti_pill, &RMF_FLD_MDFLD);
+ if (out == NULL)
+ RETURN(err_serious(-EPROTO));
+ *out = *in;
+
+ /* For old 2.0 client, the 'lsr_flags' is uninitialized.
+ * Set it as 'LU_SEQ_RANGE_MDT' by default. */
+ if (!(exp_connect_flags(exp) & OBD_CONNECT_64BITHASH) &&
+ !(exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS) &&
+ !(exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT) &&
+ !exp->exp_libclient)
+ fld_range_set_mdt(out);
+
+ rc = fld_server_handle(lu_site2seq(site)->ss_server_fld,
+ req->rq_svc_thread->t_env,
+ *opc, out, info);
+ } else {
+ rc = err_serious(-EPROTO);
+ }
+
+ RETURN(rc);
+}
+
+static void fld_thread_info_init(struct ptlrpc_request *req,
+ struct fld_thread_info *info)
+{
+ info->fti_pill = &req->rq_pill;
+ /* Init request capsule. */
+ req_capsule_init(info->fti_pill, req, RCL_SERVER);
+ req_capsule_set(info->fti_pill, &RQF_FLD_QUERY);
+}
+
+static void fld_thread_info_fini(struct fld_thread_info *info)
+{
+ req_capsule_fini(info->fti_pill);
+}
+
+static int fld_handle(struct ptlrpc_request *req)
+{
+ struct fld_thread_info *info;
+ const struct lu_env *env;
+ int rc;
+
+ env = req->rq_svc_thread->t_env;
+ LASSERT(env != NULL);
+
+ info = lu_context_key_get(&env->le_ctx, &fld_thread_key);
+ LASSERT(info != NULL);
+
+ fld_thread_info_init(req, info);
+ rc = fld_req_handle(req, info);
+ fld_thread_info_fini(info);
+
+ return rc;
+}
+
+/*
+ * Entry point for handling FLD RPCs called from MDT.
+ */
+int fld_query(struct com_thread_info *info)
+{
+ return fld_handle(info->cti_pill->rc_req);
+}
+EXPORT_SYMBOL(fld_query);
+
+/*
+ * Returns true, if fid is local to this server node.
+ *
+ * WARNING: this function is *not* guaranteed to return false if fid is
+ * remote: it makes an educated conservative guess only.
+ *
+ * fid_is_local() is supposed to be used in assertion checks only.
+ */
+int fid_is_local(const struct lu_env *env,
+ struct lu_site *site, const struct lu_fid *fid)
+{
+ int result;
+ struct seq_server_site *ss_site;
+ struct lu_seq_range *range;
+ struct fld_thread_info *info;
+ ENTRY;
+
+ info = lu_context_key_get(&env->le_ctx, &fld_thread_key);
+ range = &info->fti_lrange;
+
+ result = 1; /* conservatively assume fid is local */
+ ss_site = lu_site2seq(site);
+ if (ss_site->ss_client_fld != NULL) {
+ int rc;
+
+ rc = fld_cache_lookup(ss_site->ss_client_fld->lcf_cache,
+ fid_seq(fid), range);
+ if (rc == 0)
+ result = (range->lsr_index == ss_site->ss_node_id);
+ }
+ return result;
+}
+EXPORT_SYMBOL(fid_is_local);
+
+static void fld_server_proc_fini(struct lu_server_fld *fld);
+
+#ifdef LPROCFS
+static int fld_server_proc_init(struct lu_server_fld *fld)
+{
+ int rc = 0;
+ ENTRY;
+
+ fld->lsf_proc_dir = lprocfs_register(fld->lsf_name,
+ fld_type_proc_dir,
+ fld_server_proc_list, fld);
+ if (IS_ERR(fld->lsf_proc_dir)) {
+ rc = PTR_ERR(fld->lsf_proc_dir);
+ RETURN(rc);
+ }
+
+ rc = lprocfs_seq_create(fld->lsf_proc_dir, "fldb", 0444,
+ &fld_proc_seq_fops, fld);
+ if (rc) {
+ lprocfs_remove(&fld->lsf_proc_dir);
+ fld->lsf_proc_dir = NULL;
+ }
+
+ RETURN(rc);
+}
+
+static void fld_server_proc_fini(struct lu_server_fld *fld)
+{
+ ENTRY;
+ if (fld->lsf_proc_dir != NULL) {
+ if (!IS_ERR(fld->lsf_proc_dir))
+ lprocfs_remove(&fld->lsf_proc_dir);
+ fld->lsf_proc_dir = NULL;
+ }
+ EXIT;
+}
+#else
+static int fld_server_proc_init(struct lu_server_fld *fld)
+{
+ return 0;
+}
+
+static void fld_server_proc_fini(struct lu_server_fld *fld)
+{
+ return;
+}
+#endif
+
+int fld_server_init(const struct lu_env *env, struct lu_server_fld *fld,
+ struct dt_device *dt, const char *prefix, int mds_node_id,
+ int type)
+{
+ int cache_size, cache_threshold;
+ int rc;
+ ENTRY;
+
+ snprintf(fld->lsf_name, sizeof(fld->lsf_name),
+ "srv-%s", prefix);
+
+ cache_size = FLD_SERVER_CACHE_SIZE /
+ sizeof(struct fld_cache_entry);
+
+ cache_threshold = cache_size *
+ FLD_SERVER_CACHE_THRESHOLD / 100;
+
+ mutex_init(&fld->lsf_lock);
+ fld->lsf_cache = fld_cache_init(fld->lsf_name,
+ cache_size, cache_threshold);
+ if (IS_ERR(fld->lsf_cache)) {
+ rc = PTR_ERR(fld->lsf_cache);
+ fld->lsf_cache = NULL;
+ GOTO(out, rc);
+ }
+
+ if (!mds_node_id && type == LU_SEQ_RANGE_MDT) {
+ rc = fld_index_init(env, fld, dt);
+ if (rc)
+ GOTO(out, rc);
+ } else {
+ fld->lsf_obj = NULL;
+ }
+
+ rc = fld_server_proc_init(fld);
+ if (rc)
+ GOTO(out, rc);
+
+ fld->lsf_control_exp = NULL;
+
+ GOTO(out, rc);
+
+out:
+ if (rc)
+ fld_server_fini(env, fld);
+ return rc;
+}
+EXPORT_SYMBOL(fld_server_init);
+
+void fld_server_fini(const struct lu_env *env, struct lu_server_fld *fld)
+{
+ ENTRY;
+
+ fld_server_proc_fini(fld);
+ fld_index_fini(env, fld);
+
+ if (fld->lsf_cache != NULL) {
+ if (!IS_ERR(fld->lsf_cache))
+ fld_cache_fini(fld->lsf_cache);
+ fld->lsf_cache = NULL;
+ }
+
+ EXIT;
+}
+EXPORT_SYMBOL(fld_server_fini);
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre FLD");
+MODULE_LICENSE("GPL");
+
+cfs_module(mdd, "0.1.0", fld_mod_init, fld_mod_exit);
diff --git a/drivers/staging/lustre/lustre/fld/fld_index.c b/drivers/staging/lustre/lustre/fld/fld_index.c
new file mode 100644
index 000000000000..ec68a54c23bd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fld/fld_index.c
@@ -0,0 +1,426 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2013, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fld/fld_index.c
+ *
+ * Author: WangDi <wangdi@clusterfs.com>
+ * Author: Yury Umanets <umka@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_FLD
+
+# include <linux/libcfs/libcfs.h>
+# include <linux/module.h>
+# include <linux/jbd.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_ver.h>
+#include <obd_support.h>
+#include <lprocfs_status.h>
+
+#include <dt_object.h>
+#include <md_object.h>
+#include <lustre_mdc.h>
+#include <lustre_fid.h>
+#include <lustre_fld.h>
+#include "fld_internal.h"
+
+const char fld_index_name[] = "fld";
+
+static const struct lu_seq_range IGIF_FLD_RANGE = {
+ .lsr_start = FID_SEQ_IGIF,
+ .lsr_end = FID_SEQ_IGIF_MAX + 1,
+ .lsr_index = 0,
+ .lsr_flags = LU_SEQ_RANGE_MDT
+};
+
+static const struct lu_seq_range DOT_LUSTRE_FLD_RANGE = {
+ .lsr_start = FID_SEQ_DOT_LUSTRE,
+ .lsr_end = FID_SEQ_DOT_LUSTRE + 1,
+ .lsr_index = 0,
+ .lsr_flags = LU_SEQ_RANGE_MDT
+};
+
+static const struct lu_seq_range ROOT_FLD_RANGE = {
+ .lsr_start = FID_SEQ_ROOT,
+ .lsr_end = FID_SEQ_ROOT + 1,
+ .lsr_index = 0,
+ .lsr_flags = LU_SEQ_RANGE_MDT
+};
+
+const struct dt_index_features fld_index_features = {
+ .dif_flags = DT_IND_UPDATE,
+ .dif_keysize_min = sizeof(seqno_t),
+ .dif_keysize_max = sizeof(seqno_t),
+ .dif_recsize_min = sizeof(struct lu_seq_range),
+ .dif_recsize_max = sizeof(struct lu_seq_range),
+ .dif_ptrsize = 4
+};
+
+extern struct lu_context_key fld_thread_key;
+
+int fld_declare_index_create(const struct lu_env *env,
+ struct lu_server_fld *fld,
+ const struct lu_seq_range *new_range,
+ struct thandle *th)
+{
+ struct lu_seq_range *tmp;
+ struct lu_seq_range *range;
+ struct fld_thread_info *info;
+ int rc = 0;
+
+ ENTRY;
+
+ info = lu_context_key_get(&env->le_ctx, &fld_thread_key);
+ range = &info->fti_lrange;
+ tmp = &info->fti_irange;
+ memset(range, 0, sizeof(*range));
+
+ rc = fld_index_lookup(env, fld, new_range->lsr_start, range);
+ if (rc == 0) {
+ /* In case of duplicate entry, the location must be same */
+ LASSERT((range_compare_loc(new_range, range) == 0));
+ GOTO(out, rc = -EEXIST);
+ }
+
+ if (rc != -ENOENT) {
+ CERROR("%s: lookup range "DRANGE" error: rc = %d\n",
+ fld->lsf_name, PRANGE(range), rc);
+ GOTO(out, rc);
+ }
+
+ /* Check for merge case, since the fld entry can only be increamental,
+ * so we will only check whether it can be merged from the left. */
+ if (new_range->lsr_start == range->lsr_end && range->lsr_end != 0 &&
+ range_compare_loc(new_range, range) == 0) {
+ range_cpu_to_be(tmp, range);
+ rc = dt_declare_delete(env, fld->lsf_obj,
+ (struct dt_key *)&tmp->lsr_start, th);
+ if (rc) {
+ CERROR("%s: declare record "DRANGE" failed: rc = %d\n",
+ fld->lsf_name, PRANGE(range), rc);
+ GOTO(out, rc);
+ }
+ memcpy(tmp, new_range, sizeof(*new_range));
+ tmp->lsr_start = range->lsr_start;
+ } else {
+ memcpy(tmp, new_range, sizeof(*new_range));
+ }
+
+ range_cpu_to_be(tmp, tmp);
+ rc = dt_declare_insert(env, fld->lsf_obj, (struct dt_rec *)tmp,
+ (struct dt_key *)&tmp->lsr_start, th);
+out:
+ RETURN(rc);
+}
+
+/**
+ * insert range in fld store.
+ *
+ * \param range range to be inserted
+ * \param th transaction for this operation as it could compound
+ * transaction.
+ *
+ * \retval 0 success
+ * \retval -ve error
+ *
+ * The whole fld index insertion is protected by seq->lss_mutex (see
+ * seq_server_alloc_super), i.e. only one thread will access fldb each
+ * time, so we do not need worry the fld file and cache will being
+ * changed between declare and create.
+ * Because the fld entry can only be increamental, so we will only check
+ * whether it can be merged from the left.
+ **/
+int fld_index_create(const struct lu_env *env, struct lu_server_fld *fld,
+ const struct lu_seq_range *new_range, struct thandle *th)
+{
+ struct lu_seq_range *range;
+ struct lu_seq_range *tmp;
+ struct fld_thread_info *info;
+ int rc = 0;
+ int deleted = 0;
+ struct fld_cache_entry *flde;
+ ENTRY;
+
+ info = lu_context_key_get(&env->le_ctx, &fld_thread_key);
+
+ LASSERT(mutex_is_locked(&fld->lsf_lock));
+
+ range = &info->fti_lrange;
+ memset(range, 0, sizeof(*range));
+ tmp = &info->fti_irange;
+ rc = fld_index_lookup(env, fld, new_range->lsr_start, range);
+ if (rc != -ENOENT) {
+ rc = rc == 0 ? -EEXIST : rc;
+ GOTO(out, rc);
+ }
+
+ if (new_range->lsr_start == range->lsr_end && range->lsr_end != 0 &&
+ range_compare_loc(new_range, range) == 0) {
+ range_cpu_to_be(tmp, range);
+ rc = dt_delete(env, fld->lsf_obj,
+ (struct dt_key *)&tmp->lsr_start, th,
+ BYPASS_CAPA);
+ if (rc != 0)
+ GOTO(out, rc);
+ memcpy(tmp, new_range, sizeof(*new_range));
+ tmp->lsr_start = range->lsr_start;
+ deleted = 1;
+ } else {
+ memcpy(tmp, new_range, sizeof(*new_range));
+ }
+
+ range_cpu_to_be(tmp, tmp);
+ rc = dt_insert(env, fld->lsf_obj, (struct dt_rec *)tmp,
+ (struct dt_key *)&tmp->lsr_start, th, BYPASS_CAPA, 1);
+ if (rc != 0) {
+ CERROR("%s: insert range "DRANGE" failed: rc = %d\n",
+ fld->lsf_name, PRANGE(new_range), rc);
+ GOTO(out, rc);
+ }
+
+ flde = fld_cache_entry_create(new_range);
+ if (IS_ERR(flde))
+ GOTO(out, rc = PTR_ERR(flde));
+
+ write_lock(&fld->lsf_cache->fci_lock);
+ if (deleted)
+ fld_cache_delete_nolock(fld->lsf_cache, new_range);
+ rc = fld_cache_insert_nolock(fld->lsf_cache, flde);
+ write_unlock(&fld->lsf_cache->fci_lock);
+ if (rc)
+ OBD_FREE_PTR(flde);
+out:
+ RETURN(rc);
+}
+
+/**
+ * lookup range for a seq passed. note here we only care about the start/end,
+ * caller should handle the attached location data (flags, index).
+ *
+ * \param seq seq for lookup.
+ * \param range result of lookup.
+ *
+ * \retval 0 found, \a range is the matched range;
+ * \retval -ENOENT not found, \a range is the left-side range;
+ * \retval -ve other error;
+ */
+int fld_index_lookup(const struct lu_env *env, struct lu_server_fld *fld,
+ seqno_t seq, struct lu_seq_range *range)
+{
+ struct lu_seq_range *fld_rec;
+ struct fld_thread_info *info;
+ int rc;
+
+ ENTRY;
+
+ info = lu_context_key_get(&env->le_ctx, &fld_thread_key);
+ fld_rec = &info->fti_rec;
+
+ rc = fld_cache_lookup(fld->lsf_cache, seq, fld_rec);
+ if (rc == 0) {
+ *range = *fld_rec;
+ if (range_within(range, seq))
+ rc = 0;
+ else
+ rc = -ENOENT;
+ }
+
+ CDEBUG(D_INFO, "%s: lookup seq = "LPX64" range : "DRANGE" rc = %d\n",
+ fld->lsf_name, seq, PRANGE(range), rc);
+
+ RETURN(rc);
+}
+
+int fld_insert_entry(const struct lu_env *env,
+ struct lu_server_fld *fld,
+ const struct lu_seq_range *range)
+{
+ struct thandle *th;
+ int rc;
+ ENTRY;
+
+ th = dt_trans_create(env, lu2dt_dev(fld->lsf_obj->do_lu.lo_dev));
+ if (IS_ERR(th))
+ RETURN(PTR_ERR(th));
+
+ rc = fld_declare_index_create(env, fld, range, th);
+ if (rc != 0) {
+ if (rc == -EEXIST)
+ rc = 0;
+ GOTO(out, rc);
+ }
+
+ rc = dt_trans_start_local(env, lu2dt_dev(fld->lsf_obj->do_lu.lo_dev),
+ th);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = fld_index_create(env, fld, range, th);
+ if (rc == -EEXIST)
+ rc = 0;
+out:
+ dt_trans_stop(env, lu2dt_dev(fld->lsf_obj->do_lu.lo_dev), th);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(fld_insert_entry);
+
+static int fld_insert_special_entries(const struct lu_env *env,
+ struct lu_server_fld *fld)
+{
+ int rc;
+
+ rc = fld_insert_entry(env, fld, &IGIF_FLD_RANGE);
+ if (rc != 0)
+ RETURN(rc);
+
+ rc = fld_insert_entry(env, fld, &DOT_LUSTRE_FLD_RANGE);
+ if (rc != 0)
+ RETURN(rc);
+
+ rc = fld_insert_entry(env, fld, &ROOT_FLD_RANGE);
+
+ RETURN(rc);
+}
+
+int fld_index_init(const struct lu_env *env, struct lu_server_fld *fld,
+ struct dt_device *dt)
+{
+ struct dt_object *dt_obj = NULL;
+ struct lu_fid fid;
+ struct lu_attr *attr = NULL;
+ struct lu_seq_range *range = NULL;
+ struct fld_thread_info *info;
+ struct dt_object_format dof;
+ struct dt_it *it;
+ const struct dt_it_ops *iops;
+ int rc;
+ ENTRY;
+
+ info = lu_context_key_get(&env->le_ctx, &fld_thread_key);
+ LASSERT(info != NULL);
+
+ lu_local_obj_fid(&fid, FLD_INDEX_OID);
+ OBD_ALLOC_PTR(attr);
+ if (attr == NULL)
+ RETURN(-ENOMEM);
+
+ memset(attr, 0, sizeof(*attr));
+ attr->la_valid = LA_MODE;
+ attr->la_mode = S_IFREG | 0666;
+ dof.dof_type = DFT_INDEX;
+ dof.u.dof_idx.di_feat = &fld_index_features;
+
+ dt_obj = dt_find_or_create(env, dt, &fid, &dof, attr);
+ if (IS_ERR(dt_obj)) {
+ rc = PTR_ERR(dt_obj);
+ CERROR("%s: Can't find \"%s\" obj %d\n", fld->lsf_name,
+ fld_index_name, rc);
+ dt_obj = NULL;
+ GOTO(out, rc);
+ }
+
+ fld->lsf_obj = dt_obj;
+ rc = dt_obj->do_ops->do_index_try(env, dt_obj, &fld_index_features);
+ if (rc != 0) {
+ CERROR("%s: File \"%s\" is not an index: rc = %d!\n",
+ fld->lsf_name, fld_index_name, rc);
+ GOTO(out, rc);
+ }
+
+ range = &info->fti_rec;
+ /* Load fld entry to cache */
+ iops = &dt_obj->do_index_ops->dio_it;
+ it = iops->init(env, dt_obj, 0, NULL);
+ if (IS_ERR(it))
+ GOTO(out, rc = PTR_ERR(it));
+
+ rc = iops->load(env, it, 0);
+ if (rc < 0)
+ GOTO(out_it_fini, rc);
+
+ if (rc > 0) {
+ /* Load FLD entry into server cache */
+ do {
+ rc = iops->rec(env, it, (struct dt_rec *)range, 0);
+ if (rc != 0)
+ GOTO(out_it_put, rc);
+ LASSERT(range != NULL);
+ range_be_to_cpu(range, range);
+ rc = fld_cache_insert(fld->lsf_cache, range);
+ if (rc != 0)
+ GOTO(out_it_put, rc);
+ rc = iops->next(env, it);
+ } while (rc == 0);
+ }
+
+ /* Note: fld_insert_entry will detect whether these
+ * special entries already exist inside FLDB */
+ mutex_lock(&fld->lsf_lock);
+ rc = fld_insert_special_entries(env, fld);
+ mutex_unlock(&fld->lsf_lock);
+ if (rc != 0) {
+ CERROR("%s: insert special entries failed!: rc = %d\n",
+ fld->lsf_name, rc);
+ GOTO(out_it_put, rc);
+ }
+
+out_it_put:
+ iops->put(env, it);
+out_it_fini:
+ iops->fini(env, it);
+out:
+ if (attr != NULL)
+ OBD_FREE_PTR(attr);
+
+ if (rc != 0) {
+ if (dt_obj != NULL)
+ lu_object_put(env, &dt_obj->do_lu);
+ fld->lsf_obj = NULL;
+ }
+ RETURN(rc);
+}
+
+void fld_index_fini(const struct lu_env *env, struct lu_server_fld *fld)
+{
+ ENTRY;
+ if (fld->lsf_obj != NULL) {
+ if (!IS_ERR(fld->lsf_obj))
+ lu_object_put(env, &fld->lsf_obj->do_lu);
+ fld->lsf_obj = NULL;
+ }
+ EXIT;
+}
diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h
new file mode 100644
index 000000000000..9fa9e01cdb67
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fld/fld_internal.h
@@ -0,0 +1,223 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, 2013, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fld/fld_internal.h
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ * Author: Tom WangDi <wangdi@clusterfs.com>
+ */
+#ifndef __FLD_INTERNAL_H
+#define __FLD_INTERNAL_H
+
+#include <lustre/lustre_idl.h>
+#include <dt_object.h>
+
+#include <linux/libcfs/libcfs.h>
+#include <lustre_req_layout.h>
+#include <lustre_fld.h>
+
+enum {
+ LUSTRE_FLD_INIT = 1 << 0,
+ LUSTRE_FLD_RUN = 1 << 1
+};
+
+struct fld_stats {
+ __u64 fst_count;
+ __u64 fst_cache;
+ __u64 fst_inflight;
+};
+
+typedef int (*fld_hash_func_t) (struct lu_client_fld *, __u64);
+
+typedef struct lu_fld_target *
+(*fld_scan_func_t) (struct lu_client_fld *, __u64);
+
+struct lu_fld_hash {
+ const char *fh_name;
+ fld_hash_func_t fh_hash_func;
+ fld_scan_func_t fh_scan_func;
+};
+
+struct fld_cache_entry {
+ struct list_head fce_lru;
+ struct list_head fce_list;
+ /**
+ * fld cache entries are sorted on range->lsr_start field. */
+ struct lu_seq_range fce_range;
+};
+
+struct fld_cache {
+ /**
+ * Cache guard, protects fci_hash mostly because others immutable after
+ * init is finished.
+ */
+ rwlock_t fci_lock;
+
+ /**
+ * Cache shrink threshold */
+ int fci_threshold;
+
+ /**
+ * Prefered number of cached entries */
+ int fci_cache_size;
+
+ /**
+ * Current number of cached entries. Protected by \a fci_lock */
+ int fci_cache_count;
+
+ /**
+ * LRU list fld entries. */
+ struct list_head fci_lru;
+
+ /**
+ * sorted fld entries. */
+ struct list_head fci_entries_head;
+
+ /**
+ * Cache statistics. */
+ struct fld_stats fci_stat;
+
+ /**
+ * Cache name used for debug and messages. */
+ char fci_name[80];
+ unsigned int fci_no_shrink:1;
+};
+
+enum fld_op {
+ FLD_CREATE = 0,
+ FLD_DELETE = 1,
+ FLD_LOOKUP = 2
+};
+
+enum {
+ /* 4M of FLD cache will not hurt client a lot. */
+ FLD_SERVER_CACHE_SIZE = (4 * 0x100000),
+
+ /* 1M of FLD cache will not hurt client a lot. */
+ FLD_CLIENT_CACHE_SIZE = (1 * 0x100000)
+};
+
+enum {
+ /* Cache threshold is 10 percent of size. */
+ FLD_SERVER_CACHE_THRESHOLD = 10,
+
+ /* Cache threshold is 10 percent of size. */
+ FLD_CLIENT_CACHE_THRESHOLD = 10
+};
+
+extern struct lu_fld_hash fld_hash[];
+
+
+struct fld_thread_info {
+ struct req_capsule *fti_pill;
+ __u64 fti_key;
+ struct lu_seq_range fti_rec;
+ struct lu_seq_range fti_lrange;
+ struct lu_seq_range fti_irange;
+};
+
+extern struct lu_context_key fld_thread_key;
+
+int fld_index_init(const struct lu_env *env, struct lu_server_fld *fld,
+ struct dt_device *dt);
+
+void fld_index_fini(const struct lu_env *env, struct lu_server_fld *fld);
+
+int fld_declare_index_create(const struct lu_env *env,
+ struct lu_server_fld *fld,
+ const struct lu_seq_range *new,
+ struct thandle *th);
+
+int fld_index_create(const struct lu_env *env, struct lu_server_fld *fld,
+ const struct lu_seq_range *new, struct thandle *th);
+
+int fld_index_lookup(const struct lu_env *env, struct lu_server_fld *fld,
+ seqno_t seq, struct lu_seq_range *range);
+
+int fld_client_rpc(struct obd_export *exp,
+ struct lu_seq_range *range, __u32 fld_op);
+
+#ifdef LPROCFS
+extern struct lprocfs_vars fld_server_proc_list[];
+extern struct lprocfs_vars fld_client_proc_list[];
+#endif
+
+
+struct fld_cache *fld_cache_init(const char *name,
+ int cache_size, int cache_threshold);
+
+void fld_cache_fini(struct fld_cache *cache);
+
+void fld_cache_flush(struct fld_cache *cache);
+
+int fld_cache_insert(struct fld_cache *cache,
+ const struct lu_seq_range *range);
+
+struct fld_cache_entry
+*fld_cache_entry_create(const struct lu_seq_range *range);
+
+int fld_cache_insert_nolock(struct fld_cache *cache,
+ struct fld_cache_entry *f_new);
+void fld_cache_delete(struct fld_cache *cache,
+ const struct lu_seq_range *range);
+void fld_cache_delete_nolock(struct fld_cache *cache,
+ const struct lu_seq_range *range);
+int fld_cache_lookup(struct fld_cache *cache,
+ const seqno_t seq, struct lu_seq_range *range);
+
+struct fld_cache_entry*
+fld_cache_entry_lookup(struct fld_cache *cache, struct lu_seq_range *range);
+void fld_cache_entry_delete(struct fld_cache *cache,
+ struct fld_cache_entry *node);
+void fld_dump_cache_entries(struct fld_cache *cache);
+
+struct fld_cache_entry
+*fld_cache_entry_lookup_nolock(struct fld_cache *cache,
+ struct lu_seq_range *range);
+int fld_write_range(const struct lu_env *env, struct dt_object *dt,
+ const struct lu_seq_range *range, struct thandle *th);
+
+static inline const char *
+fld_target_name(struct lu_fld_target *tar)
+{
+ if (tar->ft_srv != NULL)
+ return tar->ft_srv->lsf_name;
+
+ return (const char *)tar->ft_exp->exp_obd->obd_name;
+}
+
+extern proc_dir_entry_t *fld_type_proc_dir;
+extern struct file_operations fld_proc_seq_fops;
+#endif /* __FLD_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c
new file mode 100644
index 000000000000..e9f07398b68a
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fld/fld_request.c
@@ -0,0 +1,519 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2013, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fld/fld_request.c
+ *
+ * FLD (Fids Location Database)
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_FLD
+
+# include <linux/libcfs/libcfs.h>
+# include <linux/module.h>
+# include <linux/jbd.h>
+# include <asm/div64.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_ver.h>
+#include <obd_support.h>
+#include <lprocfs_status.h>
+
+#include <dt_object.h>
+#include <md_object.h>
+#include <lustre_req_layout.h>
+#include <lustre_fld.h>
+#include <lustre_mdc.h>
+#include "fld_internal.h"
+
+/* TODO: these 3 functions are copies of flow-control code from mdc_lib.c
+ * It should be common thing. The same about mdc RPC lock */
+static int fld_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
+{
+ int rc;
+ ENTRY;
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ rc = list_empty(&mcw->mcw_entry);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ RETURN(rc);
+};
+
+static void fld_enter_request(struct client_obd *cli)
+{
+ struct mdc_cache_waiter mcw;
+ struct l_wait_info lwi = { 0 };
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
+ list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
+ init_waitqueue_head(&mcw.mcw_waitq);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ l_wait_event(mcw.mcw_waitq, fld_req_avail(cli, &mcw), &lwi);
+ } else {
+ cli->cl_r_in_flight++;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ }
+}
+
+static void fld_exit_request(struct client_obd *cli)
+{
+ struct list_head *l, *tmp;
+ struct mdc_cache_waiter *mcw;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ cli->cl_r_in_flight--;
+ list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
+
+ if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
+ /* No free request slots anymore */
+ break;
+ }
+
+ mcw = list_entry(l, struct mdc_cache_waiter, mcw_entry);
+ list_del_init(&mcw->mcw_entry);
+ cli->cl_r_in_flight++;
+ wake_up(&mcw->mcw_waitq);
+ }
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+}
+
+static int fld_rrb_hash(struct lu_client_fld *fld,
+ seqno_t seq)
+{
+ LASSERT(fld->lcf_count > 0);
+ return do_div(seq, fld->lcf_count);
+}
+
+static struct lu_fld_target *
+fld_rrb_scan(struct lu_client_fld *fld, seqno_t seq)
+{
+ struct lu_fld_target *target;
+ int hash;
+ ENTRY;
+
+ /* Because almost all of special sequence located in MDT0,
+ * it should go to index 0 directly, instead of calculating
+ * hash again, and also if other MDTs is not being connected,
+ * the fld lookup requests(for seq on MDT0) should not be
+ * blocked because of other MDTs */
+ if (fid_seq_is_norm(seq))
+ hash = fld_rrb_hash(fld, seq);
+ else
+ hash = 0;
+
+ list_for_each_entry(target, &fld->lcf_targets, ft_chain) {
+ if (target->ft_idx == hash)
+ RETURN(target);
+ }
+
+ CERROR("%s: Can't find target by hash %d (seq "LPX64"). "
+ "Targets (%d):\n", fld->lcf_name, hash, seq,
+ fld->lcf_count);
+
+ list_for_each_entry(target, &fld->lcf_targets, ft_chain) {
+ const char *srv_name = target->ft_srv != NULL ?
+ target->ft_srv->lsf_name : "<null>";
+ const char *exp_name = target->ft_exp != NULL ?
+ (char *)target->ft_exp->exp_obd->obd_uuid.uuid :
+ "<null>";
+
+ CERROR(" exp: 0x%p (%s), srv: 0x%p (%s), idx: "LPU64"\n",
+ target->ft_exp, exp_name, target->ft_srv,
+ srv_name, target->ft_idx);
+ }
+
+ /*
+ * If target is not found, there is logical error anyway, so here is
+ * LBUG() to catch this situation.
+ */
+ LBUG();
+ RETURN(NULL);
+}
+
+struct lu_fld_hash fld_hash[] = {
+ {
+ .fh_name = "RRB",
+ .fh_hash_func = fld_rrb_hash,
+ .fh_scan_func = fld_rrb_scan
+ },
+ {
+ 0,
+ }
+};
+
+static struct lu_fld_target *
+fld_client_get_target(struct lu_client_fld *fld, seqno_t seq)
+{
+ struct lu_fld_target *target;
+ ENTRY;
+
+ LASSERT(fld->lcf_hash != NULL);
+
+ spin_lock(&fld->lcf_lock);
+ target = fld->lcf_hash->fh_scan_func(fld, seq);
+ spin_unlock(&fld->lcf_lock);
+
+ if (target != NULL) {
+ CDEBUG(D_INFO, "%s: Found target (idx "LPU64
+ ") by seq "LPX64"\n", fld->lcf_name,
+ target->ft_idx, seq);
+ }
+
+ RETURN(target);
+}
+
+/*
+ * Add export to FLD. This is usually done by CMM and LMV as they are main users
+ * of FLD module.
+ */
+int fld_client_add_target(struct lu_client_fld *fld,
+ struct lu_fld_target *tar)
+{
+ const char *name;
+ struct lu_fld_target *target, *tmp;
+ ENTRY;
+
+ LASSERT(tar != NULL);
+ name = fld_target_name(tar);
+ LASSERT(name != NULL);
+ LASSERT(tar->ft_srv != NULL || tar->ft_exp != NULL);
+
+ if (fld->lcf_flags != LUSTRE_FLD_INIT) {
+ CERROR("%s: Attempt to add target %s (idx "LPU64") "
+ "on fly - skip it\n", fld->lcf_name, name,
+ tar->ft_idx);
+ RETURN(0);
+ } else {
+ CDEBUG(D_INFO, "%s: Adding target %s (idx "
+ LPU64")\n", fld->lcf_name, name, tar->ft_idx);
+ }
+
+ OBD_ALLOC_PTR(target);
+ if (target == NULL)
+ RETURN(-ENOMEM);
+
+ spin_lock(&fld->lcf_lock);
+ list_for_each_entry(tmp, &fld->lcf_targets, ft_chain) {
+ if (tmp->ft_idx == tar->ft_idx) {
+ spin_unlock(&fld->lcf_lock);
+ OBD_FREE_PTR(target);
+ CERROR("Target %s exists in FLD and known as %s:#"LPU64"\n",
+ name, fld_target_name(tmp), tmp->ft_idx);
+ RETURN(-EEXIST);
+ }
+ }
+
+ target->ft_exp = tar->ft_exp;
+ if (target->ft_exp != NULL)
+ class_export_get(target->ft_exp);
+ target->ft_srv = tar->ft_srv;
+ target->ft_idx = tar->ft_idx;
+
+ list_add_tail(&target->ft_chain,
+ &fld->lcf_targets);
+
+ fld->lcf_count++;
+ spin_unlock(&fld->lcf_lock);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(fld_client_add_target);
+
+/* Remove export from FLD */
+int fld_client_del_target(struct lu_client_fld *fld, __u64 idx)
+{
+ struct lu_fld_target *target, *tmp;
+ ENTRY;
+
+ spin_lock(&fld->lcf_lock);
+ list_for_each_entry_safe(target, tmp,
+ &fld->lcf_targets, ft_chain) {
+ if (target->ft_idx == idx) {
+ fld->lcf_count--;
+ list_del(&target->ft_chain);
+ spin_unlock(&fld->lcf_lock);
+
+ if (target->ft_exp != NULL)
+ class_export_put(target->ft_exp);
+
+ OBD_FREE_PTR(target);
+ RETURN(0);
+ }
+ }
+ spin_unlock(&fld->lcf_lock);
+ RETURN(-ENOENT);
+}
+EXPORT_SYMBOL(fld_client_del_target);
+
+#ifdef LPROCFS
+static int fld_client_proc_init(struct lu_client_fld *fld)
+{
+ int rc;
+ ENTRY;
+
+ fld->lcf_proc_dir = lprocfs_register(fld->lcf_name,
+ fld_type_proc_dir,
+ NULL, NULL);
+
+ if (IS_ERR(fld->lcf_proc_dir)) {
+ CERROR("%s: LProcFS failed in fld-init\n",
+ fld->lcf_name);
+ rc = PTR_ERR(fld->lcf_proc_dir);
+ RETURN(rc);
+ }
+
+ rc = lprocfs_add_vars(fld->lcf_proc_dir,
+ fld_client_proc_list, fld);
+ if (rc) {
+ CERROR("%s: Can't init FLD proc, rc %d\n",
+ fld->lcf_name, rc);
+ GOTO(out_cleanup, rc);
+ }
+
+ RETURN(0);
+
+out_cleanup:
+ fld_client_proc_fini(fld);
+ return rc;
+}
+
+void fld_client_proc_fini(struct lu_client_fld *fld)
+{
+ ENTRY;
+ if (fld->lcf_proc_dir) {
+ if (!IS_ERR(fld->lcf_proc_dir))
+ lprocfs_remove(&fld->lcf_proc_dir);
+ fld->lcf_proc_dir = NULL;
+ }
+ EXIT;
+}
+#else
+static int fld_client_proc_init(struct lu_client_fld *fld)
+{
+ return 0;
+}
+
+void fld_client_proc_fini(struct lu_client_fld *fld)
+{
+ return;
+}
+#endif
+
+EXPORT_SYMBOL(fld_client_proc_fini);
+
+static inline int hash_is_sane(int hash)
+{
+ return (hash >= 0 && hash < ARRAY_SIZE(fld_hash));
+}
+
+int fld_client_init(struct lu_client_fld *fld,
+ const char *prefix, int hash)
+{
+ int cache_size, cache_threshold;
+ int rc;
+ ENTRY;
+
+ LASSERT(fld != NULL);
+
+ snprintf(fld->lcf_name, sizeof(fld->lcf_name),
+ "cli-%s", prefix);
+
+ if (!hash_is_sane(hash)) {
+ CERROR("%s: Wrong hash function %#x\n",
+ fld->lcf_name, hash);
+ RETURN(-EINVAL);
+ }
+
+ fld->lcf_count = 0;
+ spin_lock_init(&fld->lcf_lock);
+ fld->lcf_hash = &fld_hash[hash];
+ fld->lcf_flags = LUSTRE_FLD_INIT;
+ INIT_LIST_HEAD(&fld->lcf_targets);
+
+ cache_size = FLD_CLIENT_CACHE_SIZE /
+ sizeof(struct fld_cache_entry);
+
+ cache_threshold = cache_size *
+ FLD_CLIENT_CACHE_THRESHOLD / 100;
+
+ fld->lcf_cache = fld_cache_init(fld->lcf_name,
+ cache_size, cache_threshold);
+ if (IS_ERR(fld->lcf_cache)) {
+ rc = PTR_ERR(fld->lcf_cache);
+ fld->lcf_cache = NULL;
+ GOTO(out, rc);
+ }
+
+ rc = fld_client_proc_init(fld);
+ if (rc)
+ GOTO(out, rc);
+ EXIT;
+out:
+ if (rc)
+ fld_client_fini(fld);
+ else
+ CDEBUG(D_INFO, "%s: Using \"%s\" hash\n",
+ fld->lcf_name, fld->lcf_hash->fh_name);
+ return rc;
+}
+EXPORT_SYMBOL(fld_client_init);
+
+void fld_client_fini(struct lu_client_fld *fld)
+{
+ struct lu_fld_target *target, *tmp;
+ ENTRY;
+
+ spin_lock(&fld->lcf_lock);
+ list_for_each_entry_safe(target, tmp,
+ &fld->lcf_targets, ft_chain) {
+ fld->lcf_count--;
+ list_del(&target->ft_chain);
+ if (target->ft_exp != NULL)
+ class_export_put(target->ft_exp);
+ OBD_FREE_PTR(target);
+ }
+ spin_unlock(&fld->lcf_lock);
+
+ if (fld->lcf_cache != NULL) {
+ if (!IS_ERR(fld->lcf_cache))
+ fld_cache_fini(fld->lcf_cache);
+ fld->lcf_cache = NULL;
+ }
+
+ EXIT;
+}
+EXPORT_SYMBOL(fld_client_fini);
+
+int fld_client_rpc(struct obd_export *exp,
+ struct lu_seq_range *range, __u32 fld_op)
+{
+ struct ptlrpc_request *req;
+ struct lu_seq_range *prange;
+ __u32 *op;
+ int rc;
+ struct obd_import *imp;
+ ENTRY;
+
+ LASSERT(exp != NULL);
+
+ imp = class_exp2cliimp(exp);
+ req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY, LUSTRE_MDS_VERSION,
+ FLD_QUERY);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC);
+ *op = fld_op;
+
+ prange = req_capsule_client_get(&req->rq_pill, &RMF_FLD_MDFLD);
+ *prange = *range;
+
+ ptlrpc_request_set_replen(req);
+ req->rq_request_portal = FLD_REQUEST_PORTAL;
+ ptlrpc_at_set_req_timeout(req);
+
+ if (fld_op == FLD_LOOKUP &&
+ imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS)
+ req->rq_allow_replay = 1;
+
+ if (fld_op != FLD_LOOKUP)
+ mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
+ fld_enter_request(&exp->exp_obd->u.cli);
+ rc = ptlrpc_queue_wait(req);
+ fld_exit_request(&exp->exp_obd->u.cli);
+ if (fld_op != FLD_LOOKUP)
+ mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
+ if (rc)
+ GOTO(out_req, rc);
+
+ prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD);
+ if (prange == NULL)
+ GOTO(out_req, rc = -EFAULT);
+ *range = *prange;
+ EXIT;
+out_req:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+int fld_client_lookup(struct lu_client_fld *fld, seqno_t seq, mdsno_t *mds,
+ __u32 flags, const struct lu_env *env)
+{
+ struct lu_seq_range res = { 0 };
+ struct lu_fld_target *target;
+ int rc;
+ ENTRY;
+
+ fld->lcf_flags |= LUSTRE_FLD_RUN;
+
+ rc = fld_cache_lookup(fld->lcf_cache, seq, &res);
+ if (rc == 0) {
+ *mds = res.lsr_index;
+ RETURN(0);
+ }
+
+ /* Can not find it in the cache */
+ target = fld_client_get_target(fld, seq);
+ LASSERT(target != NULL);
+
+ CDEBUG(D_INFO, "%s: Lookup fld entry (seq: "LPX64") on "
+ "target %s (idx "LPU64")\n", fld->lcf_name, seq,
+ fld_target_name(target), target->ft_idx);
+
+ res.lsr_start = seq;
+ fld_range_set_type(&res, flags);
+ if (target->ft_srv != NULL) {
+ LASSERT(env != NULL);
+ rc = fld_server_lookup(env, target->ft_srv, seq, &res);
+ } else {
+ rc = fld_client_rpc(target->ft_exp, &res, FLD_LOOKUP);
+ }
+
+ if (rc == 0) {
+ *mds = res.lsr_index;
+
+ fld_cache_insert(fld->lcf_cache, &res);
+ }
+ RETURN(rc);
+}
+EXPORT_SYMBOL(fld_client_lookup);
+
+void fld_client_flush(struct lu_client_fld *fld)
+{
+ fld_cache_flush(fld->lcf_cache);
+}
+EXPORT_SYMBOL(fld_client_flush);
diff --git a/drivers/staging/lustre/lustre/fld/lproc_fld.c b/drivers/staging/lustre/lustre/fld/lproc_fld.c
new file mode 100644
index 000000000000..c1bd80339e67
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fld/lproc_fld.c
@@ -0,0 +1,373 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, 2013, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fld/lproc_fld.c
+ *
+ * FLD (FIDs Location Database)
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ * Di Wang <di.wang@whamcloud.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_FLD
+
+# include <linux/libcfs/libcfs.h>
+# include <linux/module.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <dt_object.h>
+#include <md_object.h>
+#include <obd_support.h>
+#include <lustre_req_layout.h>
+#include <lustre_fld.h>
+#include <lustre_fid.h>
+#include "fld_internal.h"
+
+#ifdef LPROCFS
+static int
+fld_proc_targets_seq_show(struct seq_file *m, void *unused)
+{
+ struct lu_client_fld *fld = (struct lu_client_fld *)m->private;
+ struct lu_fld_target *target;
+ ENTRY;
+
+ LASSERT(fld != NULL);
+
+ spin_lock(&fld->lcf_lock);
+ list_for_each_entry(target,
+ &fld->lcf_targets, ft_chain)
+ seq_printf(m, "%s\n", fld_target_name(target));
+ spin_unlock(&fld->lcf_lock);
+
+ RETURN(0);
+}
+
+static int
+fld_proc_hash_seq_show(struct seq_file *m, void *unused)
+{
+ struct lu_client_fld *fld = (struct lu_client_fld *)m->private;
+ ENTRY;
+
+ LASSERT(fld != NULL);
+
+ spin_lock(&fld->lcf_lock);
+ seq_printf(m, "%s\n", fld->lcf_hash->fh_name);
+ spin_unlock(&fld->lcf_lock);
+
+ RETURN(0);
+}
+
+static ssize_t
+fld_proc_hash_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct lu_client_fld *fld = ((struct seq_file *)file->private_data)->private;
+ struct lu_fld_hash *hash = NULL;
+ int i;
+ ENTRY;
+
+ LASSERT(fld != NULL);
+
+ for (i = 0; fld_hash[i].fh_name != NULL; i++) {
+ if (count != strlen(fld_hash[i].fh_name))
+ continue;
+
+ if (!strncmp(fld_hash[i].fh_name, buffer, count)) {
+ hash = &fld_hash[i];
+ break;
+ }
+ }
+
+ if (hash != NULL) {
+ spin_lock(&fld->lcf_lock);
+ fld->lcf_hash = hash;
+ spin_unlock(&fld->lcf_lock);
+
+ CDEBUG(D_INFO, "%s: Changed hash to \"%s\"\n",
+ fld->lcf_name, hash->fh_name);
+ }
+
+ RETURN(count);
+}
+
+static ssize_t
+fld_proc_cache_flush_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *pos)
+{
+ struct lu_client_fld *fld = file->private_data;
+ ENTRY;
+
+ LASSERT(fld != NULL);
+
+ fld_cache_flush(fld->lcf_cache);
+
+ CDEBUG(D_INFO, "%s: Lookup cache is flushed\n", fld->lcf_name);
+
+ RETURN(count);
+}
+
+static int fld_proc_cache_flush_open(struct inode *inode, struct file *file)
+{
+ file->private_data = PDE_DATA(inode);
+ return 0;
+}
+
+static int fld_proc_cache_flush_release(struct inode *inode, struct file *file)
+{
+ file->private_data = NULL;
+ return 0;
+}
+
+struct file_operations fld_proc_cache_flush_fops = {
+ .owner = THIS_MODULE,
+ .open = fld_proc_cache_flush_open,
+ .write = fld_proc_cache_flush_write,
+ .release = fld_proc_cache_flush_release,
+};
+
+struct fld_seq_param {
+ struct lu_env fsp_env;
+ struct dt_it *fsp_it;
+ struct lu_server_fld *fsp_fld;
+ unsigned int fsp_stop:1;
+};
+
+static void *fldb_seq_start(struct seq_file *p, loff_t *pos)
+{
+ struct fld_seq_param *param = p->private;
+ struct lu_server_fld *fld;
+ struct dt_object *obj;
+ const struct dt_it_ops *iops;
+
+ if (param == NULL || param->fsp_stop)
+ return NULL;
+
+ fld = param->fsp_fld;
+ obj = fld->lsf_obj;
+ LASSERT(obj != NULL);
+ iops = &obj->do_index_ops->dio_it;
+
+ iops->load(&param->fsp_env, param->fsp_it, *pos);
+
+ *pos = be64_to_cpu(*(__u64 *)iops->key(&param->fsp_env, param->fsp_it));
+ return param;
+}
+
+static void fldb_seq_stop(struct seq_file *p, void *v)
+{
+ struct fld_seq_param *param = p->private;
+ const struct dt_it_ops *iops;
+ struct lu_server_fld *fld;
+ struct dt_object *obj;
+
+ if (param == NULL)
+ return;
+
+ fld = param->fsp_fld;
+ obj = fld->lsf_obj;
+ LASSERT(obj != NULL);
+ iops = &obj->do_index_ops->dio_it;
+
+ iops->put(&param->fsp_env, param->fsp_it);
+}
+
+static void *fldb_seq_next(struct seq_file *p, void *v, loff_t *pos)
+{
+ struct fld_seq_param *param = p->private;
+ struct lu_server_fld *fld;
+ struct dt_object *obj;
+ const struct dt_it_ops *iops;
+ int rc;
+
+ if (param == NULL || param->fsp_stop)
+ return NULL;
+
+ fld = param->fsp_fld;
+ obj = fld->lsf_obj;
+ LASSERT(obj != NULL);
+ iops = &obj->do_index_ops->dio_it;
+
+ rc = iops->next(&param->fsp_env, param->fsp_it);
+ if (rc > 0) {
+ param->fsp_stop = 1;
+ return NULL;
+ }
+
+ *pos = be64_to_cpu(*(__u64 *)iops->key(&param->fsp_env, param->fsp_it));
+ return param;
+}
+
+static int fldb_seq_show(struct seq_file *p, void *v)
+{
+ struct fld_seq_param *param = p->private;
+ struct lu_server_fld *fld;
+ struct dt_object *obj;
+ const struct dt_it_ops *iops;
+ struct fld_thread_info *info;
+ struct lu_seq_range *fld_rec;
+ int rc;
+
+ if (param == NULL || param->fsp_stop)
+ return 0;
+
+ fld = param->fsp_fld;
+ obj = fld->lsf_obj;
+ LASSERT(obj != NULL);
+ iops = &obj->do_index_ops->dio_it;
+
+ info = lu_context_key_get(&param->fsp_env.le_ctx,
+ &fld_thread_key);
+ fld_rec = &info->fti_rec;
+ rc = iops->rec(&param->fsp_env, param->fsp_it,
+ (struct dt_rec *)fld_rec, 0);
+ if (rc != 0) {
+ CERROR("%s:read record error: rc %d\n",
+ fld->lsf_name, rc);
+ } else if (fld_rec->lsr_start != 0) {
+ range_be_to_cpu(fld_rec, fld_rec);
+ rc = seq_printf(p, DRANGE"\n", PRANGE(fld_rec));
+ }
+
+ return rc;
+}
+
+struct seq_operations fldb_sops = {
+ .start = fldb_seq_start,
+ .stop = fldb_seq_stop,
+ .next = fldb_seq_next,
+ .show = fldb_seq_show,
+};
+
+static int fldb_seq_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ struct lu_server_fld *fld = (struct lu_server_fld *)PDE_DATA(inode);
+ struct dt_object *obj;
+ const struct dt_it_ops *iops;
+ struct fld_seq_param *param = NULL;
+ int env_init = 0;
+ int rc;
+
+ rc = seq_open(file, &fldb_sops);
+ if (rc)
+ GOTO(out, rc);
+
+ obj = fld->lsf_obj;
+ if (obj == NULL) {
+ seq = file->private_data;
+ seq->private = NULL;
+ return 0;
+ }
+
+ OBD_ALLOC_PTR(param);
+ if (param == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ rc = lu_env_init(&param->fsp_env, LCT_MD_THREAD);
+ if (rc != 0)
+ GOTO(out, rc);
+
+ env_init = 1;
+ iops = &obj->do_index_ops->dio_it;
+ param->fsp_it = iops->init(&param->fsp_env, obj, 0, NULL);
+ if (IS_ERR(param->fsp_it))
+ GOTO(out, rc = PTR_ERR(param->fsp_it));
+
+ param->fsp_fld = fld;
+ param->fsp_stop = 0;
+
+ seq = file->private_data;
+ seq->private = param;
+out:
+ if (rc != 0) {
+ if (env_init == 1)
+ lu_env_fini(&param->fsp_env);
+ if (param != NULL)
+ OBD_FREE_PTR(param);
+ }
+ return rc;
+}
+
+static int fldb_seq_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct fld_seq_param *param;
+ struct lu_server_fld *fld;
+ struct dt_object *obj;
+ const struct dt_it_ops *iops;
+
+ param = seq->private;
+ if (param == NULL) {
+ lprocfs_seq_release(inode, file);
+ return 0;
+ }
+
+ fld = param->fsp_fld;
+ obj = fld->lsf_obj;
+ LASSERT(obj != NULL);
+ iops = &obj->do_index_ops->dio_it;
+
+ LASSERT(iops != NULL);
+ LASSERT(obj != NULL);
+ LASSERT(param->fsp_it != NULL);
+ iops->fini(&param->fsp_env, param->fsp_it);
+ lu_env_fini(&param->fsp_env);
+ OBD_FREE_PTR(param);
+ lprocfs_seq_release(inode, file);
+
+ return 0;
+}
+
+struct lprocfs_vars fld_server_proc_list[] = {
+ { NULL }};
+
+LPROC_SEQ_FOPS_RO(fld_proc_targets);
+LPROC_SEQ_FOPS(fld_proc_hash);
+
+struct lprocfs_vars fld_client_proc_list[] = {
+ { "targets", &fld_proc_targets_fops },
+ { "hash", &fld_proc_hash_fops },
+ { "cache_flush", &fld_proc_cache_flush_fops },
+ { NULL }};
+
+struct file_operations fld_proc_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = fldb_seq_open,
+ .read = seq_read,
+ .release = fldb_seq_release,
+};
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h
new file mode 100644
index 000000000000..4bb68801d3a9
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/cl_object.h
@@ -0,0 +1,3279 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#ifndef _LUSTRE_CL_OBJECT_H
+#define _LUSTRE_CL_OBJECT_H
+
+/** \defgroup clio clio
+ *
+ * Client objects implement io operations and cache pages.
+ *
+ * Examples: lov and osc are implementations of cl interface.
+ *
+ * Big Theory Statement.
+ *
+ * Layered objects.
+ *
+ * Client implementation is based on the following data-types:
+ *
+ * - cl_object
+ *
+ * - cl_page
+ *
+ * - cl_lock represents an extent lock on an object.
+ *
+ * - cl_io represents high-level i/o activity such as whole read/write
+ * system call, or write-out of pages from under the lock being
+ * canceled. cl_io has sub-ios that can be stopped and resumed
+ * independently, thus achieving high degree of transfer
+ * parallelism. Single cl_io can be advanced forward by
+ * the multiple threads (although in the most usual case of
+ * read/write system call it is associated with the single user
+ * thread, that issued the system call).
+ *
+ * - cl_req represents a collection of pages for a transfer. cl_req is
+ * constructed by req-forming engine that tries to saturate
+ * transport with large and continuous transfers.
+ *
+ * Terminology
+ *
+ * - to avoid confusion high-level I/O operation like read or write system
+ * call is referred to as "an io", whereas low-level I/O operation, like
+ * RPC, is referred to as "a transfer"
+ *
+ * - "generic code" means generic (not file system specific) code in the
+ * hosting environment. "cl-code" means code (mostly in cl_*.c files) that
+ * is not layer specific.
+ *
+ * Locking.
+ *
+ * - i_mutex
+ * - PG_locked
+ * - cl_object_header::coh_page_guard
+ * - cl_object_header::coh_lock_guard
+ * - lu_site::ls_guard
+ *
+ * See the top comment in cl_object.c for the description of overall locking and
+ * reference-counting design.
+ *
+ * See comments below for the description of i/o, page, and dlm-locking
+ * design.
+ *
+ * @{
+ */
+
+/*
+ * super-class definitions.
+ */
+#include <lu_object.h>
+#include <lvfs.h>
+# include <linux/mutex.h>
+# include <linux/radix-tree.h>
+
+struct inode;
+
+struct cl_device;
+struct cl_device_operations;
+
+struct cl_object;
+struct cl_object_page_operations;
+struct cl_object_lock_operations;
+
+struct cl_page;
+struct cl_page_slice;
+struct cl_lock;
+struct cl_lock_slice;
+
+struct cl_lock_operations;
+struct cl_page_operations;
+
+struct cl_io;
+struct cl_io_slice;
+
+struct cl_req;
+struct cl_req_slice;
+
+/**
+ * Operations for each data device in the client stack.
+ *
+ * \see vvp_cl_ops, lov_cl_ops, lovsub_cl_ops, osc_cl_ops
+ */
+struct cl_device_operations {
+ /**
+ * Initialize cl_req. This method is called top-to-bottom on all
+ * devices in the stack to get them a chance to allocate layer-private
+ * data, and to attach them to the cl_req by calling
+ * cl_req_slice_add().
+ *
+ * \see osc_req_init(), lov_req_init(), lovsub_req_init()
+ * \see ccc_req_init()
+ */
+ int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev,
+ struct cl_req *req);
+};
+
+/**
+ * Device in the client stack.
+ *
+ * \see ccc_device, lov_device, lovsub_device, osc_device
+ */
+struct cl_device {
+ /** Super-class. */
+ struct lu_device cd_lu_dev;
+ /** Per-layer operation vector. */
+ const struct cl_device_operations *cd_ops;
+};
+
+/** \addtogroup cl_object cl_object
+ * @{ */
+/**
+ * "Data attributes" of cl_object. Data attributes can be updated
+ * independently for a sub-object, and top-object's attributes are calculated
+ * from sub-objects' ones.
+ */
+struct cl_attr {
+ /** Object size, in bytes */
+ loff_t cat_size;
+ /**
+ * Known minimal size, in bytes.
+ *
+ * This is only valid when at least one DLM lock is held.
+ */
+ loff_t cat_kms;
+ /** Modification time. Measured in seconds since epoch. */
+ time_t cat_mtime;
+ /** Access time. Measured in seconds since epoch. */
+ time_t cat_atime;
+ /** Change time. Measured in seconds since epoch. */
+ time_t cat_ctime;
+ /**
+ * Blocks allocated to this cl_object on the server file system.
+ *
+ * \todo XXX An interface for block size is needed.
+ */
+ __u64 cat_blocks;
+ /**
+ * User identifier for quota purposes.
+ */
+ uid_t cat_uid;
+ /**
+ * Group identifier for quota purposes.
+ */
+ gid_t cat_gid;
+};
+
+/**
+ * Fields in cl_attr that are being set.
+ */
+enum cl_attr_valid {
+ CAT_SIZE = 1 << 0,
+ CAT_KMS = 1 << 1,
+ CAT_MTIME = 1 << 3,
+ CAT_ATIME = 1 << 4,
+ CAT_CTIME = 1 << 5,
+ CAT_BLOCKS = 1 << 6,
+ CAT_UID = 1 << 7,
+ CAT_GID = 1 << 8
+};
+
+/**
+ * Sub-class of lu_object with methods common for objects on the client
+ * stacks.
+ *
+ * cl_object: represents a regular file system object, both a file and a
+ * stripe. cl_object is based on lu_object: it is identified by a fid,
+ * layered, cached, hashed, and lrued. Important distinction with the server
+ * side, where md_object and dt_object are used, is that cl_object "fans out"
+ * at the lov/sns level: depending on the file layout, single file is
+ * represented as a set of "sub-objects" (stripes). At the implementation
+ * level, struct lov_object contains an array of cl_objects. Each sub-object
+ * is a full-fledged cl_object, having its fid, living in the lru and hash
+ * table.
+ *
+ * This leads to the next important difference with the server side: on the
+ * client, it's quite usual to have objects with the different sequence of
+ * layers. For example, typical top-object is composed of the following
+ * layers:
+ *
+ * - vvp
+ * - lov
+ *
+ * whereas its sub-objects are composed of
+ *
+ * - lovsub
+ * - osc
+ *
+ * layers. Here "lovsub" is a mostly dummy layer, whose purpose is to keep
+ * track of the object-subobject relationship.
+ *
+ * Sub-objects are not cached independently: when top-object is about to
+ * be discarded from the memory, all its sub-objects are torn-down and
+ * destroyed too.
+ *
+ * \see ccc_object, lov_object, lovsub_object, osc_object
+ */
+struct cl_object {
+ /** super class */
+ struct lu_object co_lu;
+ /** per-object-layer operations */
+ const struct cl_object_operations *co_ops;
+ /** offset of page slice in cl_page buffer */
+ int co_slice_off;
+};
+
+/**
+ * Description of the client object configuration. This is used for the
+ * creation of a new client object that is identified by a more state than
+ * fid.
+ */
+struct cl_object_conf {
+ /** Super-class. */
+ struct lu_object_conf coc_lu;
+ union {
+ /**
+ * Object layout. This is consumed by lov.
+ */
+ struct lustre_md *coc_md;
+ /**
+ * Description of particular stripe location in the
+ * cluster. This is consumed by osc.
+ */
+ struct lov_oinfo *coc_oinfo;
+ } u;
+ /**
+ * VFS inode. This is consumed by vvp.
+ */
+ struct inode *coc_inode;
+ /**
+ * Layout lock handle.
+ */
+ struct ldlm_lock *coc_lock;
+ /**
+ * Operation to handle layout, OBJECT_CONF_XYZ.
+ */
+ int coc_opc;
+};
+
+enum {
+ /** configure layout, set up a new stripe, must be called while
+ * holding layout lock. */
+ OBJECT_CONF_SET = 0,
+ /** invalidate the current stripe configuration due to losing
+ * layout lock. */
+ OBJECT_CONF_INVALIDATE = 1,
+ /** wait for old layout to go away so that new layout can be
+ * set up. */
+ OBJECT_CONF_WAIT = 2
+};
+
+/**
+ * Operations implemented for each cl object layer.
+ *
+ * \see vvp_ops, lov_ops, lovsub_ops, osc_ops
+ */
+struct cl_object_operations {
+ /**
+ * Initialize page slice for this layer. Called top-to-bottom through
+ * every object layer when a new cl_page is instantiated. Layer
+ * keeping private per-page data, or requiring its own page operations
+ * vector should allocate these data here, and attach then to the page
+ * by calling cl_page_slice_add(). \a vmpage is locked (in the VM
+ * sense). Optional.
+ *
+ * \retval NULL success.
+ *
+ * \retval ERR_PTR(errno) failure code.
+ *
+ * \retval valid-pointer pointer to already existing referenced page
+ * to be used instead of newly created.
+ */
+ int (*coo_page_init)(const struct lu_env *env, struct cl_object *obj,
+ struct cl_page *page, struct page *vmpage);
+ /**
+ * Initialize lock slice for this layer. Called top-to-bottom through
+ * every object layer when a new cl_lock is instantiated. Layer
+ * keeping private per-lock data, or requiring its own lock operations
+ * vector should allocate these data here, and attach then to the lock
+ * by calling cl_lock_slice_add(). Mandatory.
+ */
+ int (*coo_lock_init)(const struct lu_env *env,
+ struct cl_object *obj, struct cl_lock *lock,
+ const struct cl_io *io);
+ /**
+ * Initialize io state for a given layer.
+ *
+ * called top-to-bottom once per io existence to initialize io
+ * state. If layer wants to keep some state for this type of io, it
+ * has to embed struct cl_io_slice in lu_env::le_ses, and register
+ * slice with cl_io_slice_add(). It is guaranteed that all threads
+ * participating in this io share the same session.
+ */
+ int (*coo_io_init)(const struct lu_env *env,
+ struct cl_object *obj, struct cl_io *io);
+ /**
+ * Fill portion of \a attr that this layer controls. This method is
+ * called top-to-bottom through all object layers.
+ *
+ * \pre cl_object_header::coh_attr_guard of the top-object is locked.
+ *
+ * \return 0: to continue
+ * \return +ve: to stop iterating through layers (but 0 is returned
+ * from enclosing cl_object_attr_get())
+ * \return -ve: to signal error
+ */
+ int (*coo_attr_get)(const struct lu_env *env, struct cl_object *obj,
+ struct cl_attr *attr);
+ /**
+ * Update attributes.
+ *
+ * \a valid is a bitmask composed from enum #cl_attr_valid, and
+ * indicating what attributes are to be set.
+ *
+ * \pre cl_object_header::coh_attr_guard of the top-object is locked.
+ *
+ * \return the same convention as for
+ * cl_object_operations::coo_attr_get() is used.
+ */
+ int (*coo_attr_set)(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned valid);
+ /**
+ * Update object configuration. Called top-to-bottom to modify object
+ * configuration.
+ *
+ * XXX error conditions and handling.
+ */
+ int (*coo_conf_set)(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_object_conf *conf);
+ /**
+ * Glimpse ast. Executed when glimpse ast arrives for a lock on this
+ * object. Layers are supposed to fill parts of \a lvb that will be
+ * shipped to the glimpse originator as a glimpse result.
+ *
+ * \see ccc_object_glimpse(), lovsub_object_glimpse(),
+ * \see osc_object_glimpse()
+ */
+ int (*coo_glimpse)(const struct lu_env *env,
+ const struct cl_object *obj, struct ost_lvb *lvb);
+};
+
+/**
+ * Extended header for client object.
+ */
+struct cl_object_header {
+ /** Standard lu_object_header. cl_object::co_lu::lo_header points
+ * here. */
+ struct lu_object_header coh_lu;
+ /** \name locks
+ * \todo XXX move locks below to the separate cache-lines, they are
+ * mostly useless otherwise.
+ */
+ /** @{ */
+ /** Lock protecting page tree. */
+ spinlock_t coh_page_guard;
+ /** Lock protecting lock list. */
+ spinlock_t coh_lock_guard;
+ /** @} locks */
+ /** Radix tree of cl_page's, cached for this object. */
+ struct radix_tree_root coh_tree;
+ /** # of pages in radix tree. */
+ unsigned long coh_pages;
+ /** List of cl_lock's granted for this object. */
+ struct list_head coh_locks;
+
+ /**
+ * Parent object. It is assumed that an object has a well-defined
+ * parent, but not a well-defined child (there may be multiple
+ * sub-objects, for the same top-object). cl_object_header::coh_parent
+ * field allows certain code to be written generically, without
+ * limiting possible cl_object layouts unduly.
+ */
+ struct cl_object_header *coh_parent;
+ /**
+ * Protects consistency between cl_attr of parent object and
+ * attributes of sub-objects, that the former is calculated ("merged")
+ * from.
+ *
+ * \todo XXX this can be read/write lock if needed.
+ */
+ spinlock_t coh_attr_guard;
+ /**
+ * Size of cl_page + page slices
+ */
+ unsigned short coh_page_bufsize;
+ /**
+ * Number of objects above this one: 0 for a top-object, 1 for its
+ * sub-object, etc.
+ */
+ unsigned char coh_nesting;
+};
+
+/**
+ * Helper macro: iterate over all layers of the object \a obj, assigning every
+ * layer top-to-bottom to \a slice.
+ */
+#define cl_object_for_each(slice, obj) \
+ list_for_each_entry((slice), \
+ &(obj)->co_lu.lo_header->loh_layers, \
+ co_lu.lo_linkage)
+/**
+ * Helper macro: iterate over all layers of the object \a obj, assigning every
+ * layer bottom-to-top to \a slice.
+ */
+#define cl_object_for_each_reverse(slice, obj) \
+ list_for_each_entry_reverse((slice), \
+ &(obj)->co_lu.lo_header->loh_layers, \
+ co_lu.lo_linkage)
+/** @} cl_object */
+
+#ifndef pgoff_t
+#define pgoff_t unsigned long
+#endif
+
+#define CL_PAGE_EOF ((pgoff_t)~0ull)
+
+/** \addtogroup cl_page cl_page
+ * @{ */
+
+/** \struct cl_page
+ * Layered client page.
+ *
+ * cl_page: represents a portion of a file, cached in the memory. All pages
+ * of the given file are of the same size, and are kept in the radix tree
+ * hanging off the cl_object. cl_page doesn't fan out, but as sub-objects
+ * of the top-level file object are first class cl_objects, they have their
+ * own radix trees of pages and hence page is implemented as a sequence of
+ * struct cl_pages's, linked into double-linked list through
+ * cl_page::cp_parent and cl_page::cp_child pointers, each residing in the
+ * corresponding radix tree at the corresponding logical offset.
+ *
+ * cl_page is associated with VM page of the hosting environment (struct
+ * page in Linux kernel, for example), struct page. It is assumed, that this
+ * association is implemented by one of cl_page layers (top layer in the
+ * current design) that
+ *
+ * - intercepts per-VM-page call-backs made by the environment (e.g.,
+ * memory pressure),
+ *
+ * - translates state (page flag bits) and locking between lustre and
+ * environment.
+ *
+ * The association between cl_page and struct page is immutable and
+ * established when cl_page is created.
+ *
+ * cl_page can be "owned" by a particular cl_io (see below), guaranteeing
+ * this io an exclusive access to this page w.r.t. other io attempts and
+ * various events changing page state (such as transfer completion, or
+ * eviction of the page from the memory). Note, that in general cl_io
+ * cannot be identified with a particular thread, and page ownership is not
+ * exactly equal to the current thread holding a lock on the page. Layer
+ * implementing association between cl_page and struct page has to implement
+ * ownership on top of available synchronization mechanisms.
+ *
+ * While lustre client maintains the notion of an page ownership by io,
+ * hosting MM/VM usually has its own page concurrency control
+ * mechanisms. For example, in Linux, page access is synchronized by the
+ * per-page PG_locked bit-lock, and generic kernel code (generic_file_*())
+ * takes care to acquire and release such locks as necessary around the
+ * calls to the file system methods (->readpage(), ->prepare_write(),
+ * ->commit_write(), etc.). This leads to the situation when there are two
+ * different ways to own a page in the client:
+ *
+ * - client code explicitly and voluntary owns the page (cl_page_own());
+ *
+ * - VM locks a page and then calls the client, that has "to assume"
+ * the ownership from the VM (cl_page_assume()).
+ *
+ * Dual methods to release ownership are cl_page_disown() and
+ * cl_page_unassume().
+ *
+ * cl_page is reference counted (cl_page::cp_ref). When reference counter
+ * drops to 0, the page is returned to the cache, unless it is in
+ * cl_page_state::CPS_FREEING state, in which case it is immediately
+ * destroyed.
+ *
+ * The general logic guaranteeing the absence of "existential races" for
+ * pages is the following:
+ *
+ * - there are fixed known ways for a thread to obtain a new reference
+ * to a page:
+ *
+ * - by doing a lookup in the cl_object radix tree, protected by the
+ * spin-lock;
+ *
+ * - by starting from VM-locked struct page and following some
+ * hosting environment method (e.g., following ->private pointer in
+ * the case of Linux kernel), see cl_vmpage_page();
+ *
+ * - when the page enters cl_page_state::CPS_FREEING state, all these
+ * ways are severed with the proper synchronization
+ * (cl_page_delete());
+ *
+ * - entry into cl_page_state::CPS_FREEING is serialized by the VM page
+ * lock;
+ *
+ * - no new references to the page in cl_page_state::CPS_FREEING state
+ * are allowed (checked in cl_page_get()).
+ *
+ * Together this guarantees that when last reference to a
+ * cl_page_state::CPS_FREEING page is released, it is safe to destroy the
+ * page, as neither references to it can be acquired at that point, nor
+ * ones exist.
+ *
+ * cl_page is a state machine. States are enumerated in enum
+ * cl_page_state. Possible state transitions are enumerated in
+ * cl_page_state_set(). State transition process (i.e., actual changing of
+ * cl_page::cp_state field) is protected by the lock on the underlying VM
+ * page.
+ *
+ * Linux Kernel implementation.
+ *
+ * Binding between cl_page and struct page (which is a typedef for
+ * struct page) is implemented in the vvp layer. cl_page is attached to the
+ * ->private pointer of the struct page, together with the setting of
+ * PG_private bit in page->flags, and acquiring additional reference on the
+ * struct page (much like struct buffer_head, or any similar file system
+ * private data structures).
+ *
+ * PG_locked lock is used to implement both ownership and transfer
+ * synchronization, that is, page is VM-locked in CPS_{OWNED,PAGE{IN,OUT}}
+ * states. No additional references are acquired for the duration of the
+ * transfer.
+ *
+ * \warning *THIS IS NOT* the behavior expected by the Linux kernel, where
+ * write-out is "protected" by the special PG_writeback bit.
+ */
+
+/**
+ * States of cl_page. cl_page.c assumes particular order here.
+ *
+ * The page state machine is rather crude, as it doesn't recognize finer page
+ * states like "dirty" or "up to date". This is because such states are not
+ * always well defined for the whole stack (see, for example, the
+ * implementation of the read-ahead, that hides page up-to-dateness to track
+ * cache hits accurately). Such sub-states are maintained by the layers that
+ * are interested in them.
+ */
+enum cl_page_state {
+ /**
+ * Page is in the cache, un-owned. Page leaves cached state in the
+ * following cases:
+ *
+ * - [cl_page_state::CPS_OWNED] io comes across the page and
+ * owns it;
+ *
+ * - [cl_page_state::CPS_PAGEOUT] page is dirty, the
+ * req-formation engine decides that it wants to include this page
+ * into an cl_req being constructed, and yanks it from the cache;
+ *
+ * - [cl_page_state::CPS_FREEING] VM callback is executed to
+ * evict the page form the memory;
+ *
+ * \invariant cl_page::cp_owner == NULL && cl_page::cp_req == NULL
+ */
+ CPS_CACHED,
+ /**
+ * Page is exclusively owned by some cl_io. Page may end up in this
+ * state as a result of
+ *
+ * - io creating new page and immediately owning it;
+ *
+ * - [cl_page_state::CPS_CACHED] io finding existing cached page
+ * and owning it;
+ *
+ * - [cl_page_state::CPS_OWNED] io finding existing owned page
+ * and waiting for owner to release the page;
+ *
+ * Page leaves owned state in the following cases:
+ *
+ * - [cl_page_state::CPS_CACHED] io decides to leave the page in
+ * the cache, doing nothing;
+ *
+ * - [cl_page_state::CPS_PAGEIN] io starts read transfer for
+ * this page;
+ *
+ * - [cl_page_state::CPS_PAGEOUT] io starts immediate write
+ * transfer for this page;
+ *
+ * - [cl_page_state::CPS_FREEING] io decides to destroy this
+ * page (e.g., as part of truncate or extent lock cancellation).
+ *
+ * \invariant cl_page::cp_owner != NULL && cl_page::cp_req == NULL
+ */
+ CPS_OWNED,
+ /**
+ * Page is being written out, as a part of a transfer. This state is
+ * entered when req-formation logic decided that it wants this page to
+ * be sent through the wire _now_. Specifically, it means that once
+ * this state is achieved, transfer completion handler (with either
+ * success or failure indication) is guaranteed to be executed against
+ * this page independently of any locks and any scheduling decisions
+ * made by the hosting environment (that effectively means that the
+ * page is never put into cl_page_state::CPS_PAGEOUT state "in
+ * advance". This property is mentioned, because it is important when
+ * reasoning about possible dead-locks in the system). The page can
+ * enter this state as a result of
+ *
+ * - [cl_page_state::CPS_OWNED] an io requesting an immediate
+ * write-out of this page, or
+ *
+ * - [cl_page_state::CPS_CACHED] req-forming engine deciding
+ * that it has enough dirty pages cached to issue a "good"
+ * transfer.
+ *
+ * The page leaves cl_page_state::CPS_PAGEOUT state when the transfer
+ * is completed---it is moved into cl_page_state::CPS_CACHED state.
+ *
+ * Underlying VM page is locked for the duration of transfer.
+ *
+ * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req != NULL
+ */
+ CPS_PAGEOUT,
+ /**
+ * Page is being read in, as a part of a transfer. This is quite
+ * similar to the cl_page_state::CPS_PAGEOUT state, except that
+ * read-in is always "immediate"---there is no such thing a sudden
+ * construction of read cl_req from cached, presumably not up to date,
+ * pages.
+ *
+ * Underlying VM page is locked for the duration of transfer.
+ *
+ * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req != NULL
+ */
+ CPS_PAGEIN,
+ /**
+ * Page is being destroyed. This state is entered when client decides
+ * that page has to be deleted from its host object, as, e.g., a part
+ * of truncate.
+ *
+ * Once this state is reached, there is no way to escape it.
+ *
+ * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req == NULL
+ */
+ CPS_FREEING,
+ CPS_NR
+};
+
+enum cl_page_type {
+ /** Host page, the page is from the host inode which the cl_page
+ * belongs to. */
+ CPT_CACHEABLE = 1,
+
+ /** Transient page, the transient cl_page is used to bind a cl_page
+ * to vmpage which is not belonging to the same object of cl_page.
+ * it is used in DirectIO, lockless IO and liblustre. */
+ CPT_TRANSIENT,
+};
+
+/**
+ * Flags maintained for every cl_page.
+ */
+enum cl_page_flags {
+ /**
+ * Set when pagein completes. Used for debugging (read completes at
+ * most once for a page).
+ */
+ CPF_READ_COMPLETED = 1 << 0
+};
+
+/**
+ * Fields are protected by the lock on struct page, except for atomics and
+ * immutables.
+ *
+ * \invariant Data type invariants are in cl_page_invariant(). Basically:
+ * cl_page::cp_parent and cl_page::cp_child are a well-formed double-linked
+ * list, consistent with the parent/child pointers in the cl_page::cp_obj and
+ * cl_page::cp_owner (when set).
+ */
+struct cl_page {
+ /** Reference counter. */
+ atomic_t cp_ref;
+ /** An object this page is a part of. Immutable after creation. */
+ struct cl_object *cp_obj;
+ /** Logical page index within the object. Immutable after creation. */
+ pgoff_t cp_index;
+ /** List of slices. Immutable after creation. */
+ struct list_head cp_layers;
+ /** Parent page, NULL for top-level page. Immutable after creation. */
+ struct cl_page *cp_parent;
+ /** Lower-layer page. NULL for bottommost page. Immutable after
+ * creation. */
+ struct cl_page *cp_child;
+ /**
+ * Page state. This field is const to avoid accidental update, it is
+ * modified only internally within cl_page.c. Protected by a VM lock.
+ */
+ const enum cl_page_state cp_state;
+ /** Linkage of pages within group. Protected by cl_page::cp_mutex. */
+ struct list_head cp_batch;
+ /** Mutex serializing membership of a page in a batch. */
+ struct mutex cp_mutex;
+ /** Linkage of pages within cl_req. */
+ struct list_head cp_flight;
+ /** Transfer error. */
+ int cp_error;
+
+ /**
+ * Page type. Only CPT_TRANSIENT is used so far. Immutable after
+ * creation.
+ */
+ enum cl_page_type cp_type;
+
+ /**
+ * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned
+ * by sub-io. Protected by a VM lock.
+ */
+ struct cl_io *cp_owner;
+ /**
+ * Debug information, the task is owning the page.
+ */
+ task_t *cp_task;
+ /**
+ * Owning IO request in cl_page_state::CPS_PAGEOUT and
+ * cl_page_state::CPS_PAGEIN states. This field is maintained only in
+ * the top-level pages. Protected by a VM lock.
+ */
+ struct cl_req *cp_req;
+ /** List of references to this page, for debugging. */
+ struct lu_ref cp_reference;
+ /** Link to an object, for debugging. */
+ struct lu_ref_link *cp_obj_ref;
+ /** Link to a queue, for debugging. */
+ struct lu_ref_link *cp_queue_ref;
+ /** Per-page flags from enum cl_page_flags. Protected by a VM lock. */
+ unsigned cp_flags;
+ /** Assigned if doing a sync_io */
+ struct cl_sync_io *cp_sync_io;
+};
+
+/**
+ * Per-layer part of cl_page.
+ *
+ * \see ccc_page, lov_page, osc_page
+ */
+struct cl_page_slice {
+ struct cl_page *cpl_page;
+ /**
+ * Object slice corresponding to this page slice. Immutable after
+ * creation.
+ */
+ struct cl_object *cpl_obj;
+ const struct cl_page_operations *cpl_ops;
+ /** Linkage into cl_page::cp_layers. Immutable after creation. */
+ struct list_head cpl_linkage;
+};
+
+/**
+ * Lock mode. For the client extent locks.
+ *
+ * \warning: cl_lock_mode_match() assumes particular ordering here.
+ * \ingroup cl_lock
+ */
+enum cl_lock_mode {
+ /**
+ * Mode of a lock that protects no data, and exists only as a
+ * placeholder. This is used for `glimpse' requests. A phantom lock
+ * might get promoted to real lock at some point.
+ */
+ CLM_PHANTOM,
+ CLM_READ,
+ CLM_WRITE,
+ CLM_GROUP
+};
+
+/**
+ * Requested transfer type.
+ * \ingroup cl_req
+ */
+enum cl_req_type {
+ CRT_READ,
+ CRT_WRITE,
+ CRT_NR
+};
+
+/**
+ * Per-layer page operations.
+ *
+ * Methods taking an \a io argument are for the activity happening in the
+ * context of given \a io. Page is assumed to be owned by that io, except for
+ * the obvious cases (like cl_page_operations::cpo_own()).
+ *
+ * \see vvp_page_ops, lov_page_ops, osc_page_ops
+ */
+struct cl_page_operations {
+ /**
+ * cl_page<->struct page methods. Only one layer in the stack has to
+ * implement these. Current code assumes that this functionality is
+ * provided by the topmost layer, see cl_page_disown0() as an example.
+ */
+
+ /**
+ * \return the underlying VM page. Optional.
+ */
+ struct page *(*cpo_vmpage)(const struct lu_env *env,
+ const struct cl_page_slice *slice);
+ /**
+ * Called when \a io acquires this page into the exclusive
+ * ownership. When this method returns, it is guaranteed that the is
+ * not owned by other io, and no transfer is going on against
+ * it. Optional.
+ *
+ * \see cl_page_own()
+ * \see vvp_page_own(), lov_page_own()
+ */
+ int (*cpo_own)(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io, int nonblock);
+ /** Called when ownership it yielded. Optional.
+ *
+ * \see cl_page_disown()
+ * \see vvp_page_disown()
+ */
+ void (*cpo_disown)(const struct lu_env *env,
+ const struct cl_page_slice *slice, struct cl_io *io);
+ /**
+ * Called for a page that is already "owned" by \a io from VM point of
+ * view. Optional.
+ *
+ * \see cl_page_assume()
+ * \see vvp_page_assume(), lov_page_assume()
+ */
+ void (*cpo_assume)(const struct lu_env *env,
+ const struct cl_page_slice *slice, struct cl_io *io);
+ /** Dual to cl_page_operations::cpo_assume(). Optional. Called
+ * bottom-to-top when IO releases a page without actually unlocking
+ * it.
+ *
+ * \see cl_page_unassume()
+ * \see vvp_page_unassume()
+ */
+ void (*cpo_unassume)(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io);
+ /**
+ * Announces whether the page contains valid data or not by \a uptodate.
+ *
+ * \see cl_page_export()
+ * \see vvp_page_export()
+ */
+ void (*cpo_export)(const struct lu_env *env,
+ const struct cl_page_slice *slice, int uptodate);
+ /**
+ * Unmaps page from the user space (if it is mapped).
+ *
+ * \see cl_page_unmap()
+ * \see vvp_page_unmap()
+ */
+ int (*cpo_unmap)(const struct lu_env *env,
+ const struct cl_page_slice *slice, struct cl_io *io);
+ /**
+ * Checks whether underlying VM page is locked (in the suitable
+ * sense). Used for assertions.
+ *
+ * \retval -EBUSY: page is protected by a lock of a given mode;
+ * \retval -ENODATA: page is not protected by a lock;
+ * \retval 0: this layer cannot decide. (Should never happen.)
+ */
+ int (*cpo_is_vmlocked)(const struct lu_env *env,
+ const struct cl_page_slice *slice);
+ /**
+ * Page destruction.
+ */
+
+ /**
+ * Called when page is truncated from the object. Optional.
+ *
+ * \see cl_page_discard()
+ * \see vvp_page_discard(), osc_page_discard()
+ */
+ void (*cpo_discard)(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io);
+ /**
+ * Called when page is removed from the cache, and is about to being
+ * destroyed. Optional.
+ *
+ * \see cl_page_delete()
+ * \see vvp_page_delete(), osc_page_delete()
+ */
+ void (*cpo_delete)(const struct lu_env *env,
+ const struct cl_page_slice *slice);
+ /** Destructor. Frees resources and slice itself. */
+ void (*cpo_fini)(const struct lu_env *env,
+ struct cl_page_slice *slice);
+
+ /**
+ * Checks whether the page is protected by a cl_lock. This is a
+ * per-layer method, because certain layers have ways to check for the
+ * lock much more efficiently than through the generic locks scan, or
+ * implement locking mechanisms separate from cl_lock, e.g.,
+ * LL_FILE_GROUP_LOCKED in vvp. If \a pending is true, check for locks
+ * being canceled, or scheduled for cancellation as soon as the last
+ * user goes away, too.
+ *
+ * \retval -EBUSY: page is protected by a lock of a given mode;
+ * \retval -ENODATA: page is not protected by a lock;
+ * \retval 0: this layer cannot decide.
+ *
+ * \see cl_page_is_under_lock()
+ */
+ int (*cpo_is_under_lock)(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io);
+
+ /**
+ * Optional debugging helper. Prints given page slice.
+ *
+ * \see cl_page_print()
+ */
+ int (*cpo_print)(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ void *cookie, lu_printer_t p);
+ /**
+ * \name transfer
+ *
+ * Transfer methods. See comment on cl_req for a description of
+ * transfer formation and life-cycle.
+ *
+ * @{
+ */
+ /**
+ * Request type dependent vector of operations.
+ *
+ * Transfer operations depend on transfer mode (cl_req_type). To avoid
+ * passing transfer mode to each and every of these methods, and to
+ * avoid branching on request type inside of the methods, separate
+ * methods for cl_req_type:CRT_READ and cl_req_type:CRT_WRITE are
+ * provided. That is, method invocation usually looks like
+ *
+ * slice->cp_ops.io[req->crq_type].cpo_method(env, slice, ...);
+ */
+ struct {
+ /**
+ * Called when a page is submitted for a transfer as a part of
+ * cl_page_list.
+ *
+ * \return 0 : page is eligible for submission;
+ * \return -EALREADY : skip this page;
+ * \return -ve : error.
+ *
+ * \see cl_page_prep()
+ */
+ int (*cpo_prep)(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io);
+ /**
+ * Completion handler. This is guaranteed to be eventually
+ * fired after cl_page_operations::cpo_prep() or
+ * cl_page_operations::cpo_make_ready() call.
+ *
+ * This method can be called in a non-blocking context. It is
+ * guaranteed however, that the page involved and its object
+ * are pinned in memory (and, hence, calling cl_page_put() is
+ * safe).
+ *
+ * \see cl_page_completion()
+ */
+ void (*cpo_completion)(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ int ioret);
+ /**
+ * Called when cached page is about to be added to the
+ * cl_req as a part of req formation.
+ *
+ * \return 0 : proceed with this page;
+ * \return -EAGAIN : skip this page;
+ * \return -ve : error.
+ *
+ * \see cl_page_make_ready()
+ */
+ int (*cpo_make_ready)(const struct lu_env *env,
+ const struct cl_page_slice *slice);
+ /**
+ * Announce that this page is to be written out
+ * opportunistically, that is, page is dirty, it is not
+ * necessary to start write-out transfer right now, but
+ * eventually page has to be written out.
+ *
+ * Main caller of this is the write path (see
+ * vvp_io_commit_write()), using this method to build a
+ * "transfer cache" from which large transfers are then
+ * constructed by the req-formation engine.
+ *
+ * \todo XXX it would make sense to add page-age tracking
+ * semantics here, and to oblige the req-formation engine to
+ * send the page out not later than it is too old.
+ *
+ * \see cl_page_cache_add()
+ */
+ int (*cpo_cache_add)(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io);
+ } io[CRT_NR];
+ /**
+ * Tell transfer engine that only [to, from] part of a page should be
+ * transmitted.
+ *
+ * This is used for immediate transfers.
+ *
+ * \todo XXX this is not very good interface. It would be much better
+ * if all transfer parameters were supplied as arguments to
+ * cl_io_operations::cio_submit() call, but it is not clear how to do
+ * this for page queues.
+ *
+ * \see cl_page_clip()
+ */
+ void (*cpo_clip)(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ int from, int to);
+ /**
+ * \pre the page was queued for transferring.
+ * \post page is removed from client's pending list, or -EBUSY
+ * is returned if it has already been in transferring.
+ *
+ * This is one of seldom page operation which is:
+ * 0. called from top level;
+ * 1. don't have vmpage locked;
+ * 2. every layer should synchronize execution of its ->cpo_cancel()
+ * with completion handlers. Osc uses client obd lock for this
+ * purpose. Based on there is no vvp_page_cancel and
+ * lov_page_cancel(), cpo_cancel is defacto protected by client lock.
+ *
+ * \see osc_page_cancel().
+ */
+ int (*cpo_cancel)(const struct lu_env *env,
+ const struct cl_page_slice *slice);
+ /**
+ * Write out a page by kernel. This is only called by ll_writepage
+ * right now.
+ *
+ * \see cl_page_flush()
+ */
+ int (*cpo_flush)(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io);
+ /** @} transfer */
+};
+
+/**
+ * Helper macro, dumping detailed information about \a page into a log.
+ */
+#define CL_PAGE_DEBUG(mask, env, page, format, ...) \
+do { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
+ \
+ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ cl_page_print(env, &msgdata, lu_cdebug_printer, page); \
+ CDEBUG(mask, format , ## __VA_ARGS__); \
+ } \
+} while (0)
+
+/**
+ * Helper macro, dumping shorter information about \a page into a log.
+ */
+#define CL_PAGE_HEADER(mask, env, page, format, ...) \
+do { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
+ \
+ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \
+ CDEBUG(mask, format , ## __VA_ARGS__); \
+ } \
+} while (0)
+
+static inline int __page_in_use(const struct cl_page *page, int refc)
+{
+ if (page->cp_type == CPT_CACHEABLE)
+ ++refc;
+ LASSERT(atomic_read(&page->cp_ref) > 0);
+ return (atomic_read(&page->cp_ref) > refc);
+}
+#define cl_page_in_use(pg) __page_in_use(pg, 1)
+#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
+
+/** @} cl_page */
+
+/** \addtogroup cl_lock cl_lock
+ * @{ */
+/** \struct cl_lock
+ *
+ * Extent locking on the client.
+ *
+ * LAYERING
+ *
+ * The locking model of the new client code is built around
+ *
+ * struct cl_lock
+ *
+ * data-type representing an extent lock on a regular file. cl_lock is a
+ * layered object (much like cl_object and cl_page), it consists of a header
+ * (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to
+ * cl_lock::cll_layers list through cl_lock_slice::cls_linkage.
+ *
+ * All locks for a given object are linked into cl_object_header::coh_locks
+ * list (protected by cl_object_header::coh_lock_guard spin-lock) through
+ * cl_lock::cll_linkage. Currently this list is not sorted in any way. We can
+ * sort it in starting lock offset, or use altogether different data structure
+ * like a tree.
+ *
+ * Typical cl_lock consists of the two layers:
+ *
+ * - vvp_lock (vvp specific data), and
+ * - lov_lock (lov specific data).
+ *
+ * lov_lock contains an array of sub-locks. Each of these sub-locks is a
+ * normal cl_lock: it has a header (struct cl_lock) and a list of layers:
+ *
+ * - lovsub_lock, and
+ * - osc_lock
+ *
+ * Each sub-lock is associated with a cl_object (representing stripe
+ * sub-object or the file to which top-level cl_lock is associated to), and is
+ * linked into that cl_object::coh_locks. In this respect cl_lock is similar to
+ * cl_object (that at lov layer also fans out into multiple sub-objects), and
+ * is different from cl_page, that doesn't fan out (there is usually exactly
+ * one osc_page for every vvp_page). We shall call vvp-lov portion of the lock
+ * a "top-lock" and its lovsub-osc portion a "sub-lock".
+ *
+ * LIFE CYCLE
+ *
+ * cl_lock is reference counted. When reference counter drops to 0, lock is
+ * placed in the cache, except when lock is in CLS_FREEING state. CLS_FREEING
+ * lock is destroyed when last reference is released. Referencing between
+ * top-lock and its sub-locks is described in the lov documentation module.
+ *
+ * STATE MACHINE
+ *
+ * Also, cl_lock is a state machine. This requires some clarification. One of
+ * the goals of client IO re-write was to make IO path non-blocking, or at
+ * least to make it easier to make it non-blocking in the future. Here
+ * `non-blocking' means that when a system call (read, write, truncate)
+ * reaches a situation where it has to wait for a communication with the
+ * server, it should --instead of waiting-- remember its current state and
+ * switch to some other work. E.g,. instead of waiting for a lock enqueue,
+ * client should proceed doing IO on the next stripe, etc. Obviously this is
+ * rather radical redesign, and it is not planned to be fully implemented at
+ * this time, instead we are putting some infrastructure in place, that would
+ * make it easier to do asynchronous non-blocking IO easier in the
+ * future. Specifically, where old locking code goes to sleep (waiting for
+ * enqueue, for example), new code returns cl_lock_transition::CLO_WAIT. When
+ * enqueue reply comes, its completion handler signals that lock state-machine
+ * is ready to transit to the next state. There is some generic code in
+ * cl_lock.c that sleeps, waiting for these signals. As a result, for users of
+ * this cl_lock.c code, it looks like locking is done in normal blocking
+ * fashion, and it the same time it is possible to switch to the non-blocking
+ * locking (simply by returning cl_lock_transition::CLO_WAIT from cl_lock.c
+ * functions).
+ *
+ * For a description of state machine states and transitions see enum
+ * cl_lock_state.
+ *
+ * There are two ways to restrict a set of states which lock might move to:
+ *
+ * - placing a "hold" on a lock guarantees that lock will not be moved
+ * into cl_lock_state::CLS_FREEING state until hold is released. Hold
+ * can be only acquired on a lock that is not in
+ * cl_lock_state::CLS_FREEING. All holds on a lock are counted in
+ * cl_lock::cll_holds. Hold protects lock from cancellation and
+ * destruction. Requests to cancel and destroy a lock on hold will be
+ * recorded, but only honored when last hold on a lock is released;
+ *
+ * - placing a "user" on a lock guarantees that lock will not leave
+ * cl_lock_state::CLS_NEW, cl_lock_state::CLS_QUEUING,
+ * cl_lock_state::CLS_ENQUEUED and cl_lock_state::CLS_HELD set of
+ * states, once it enters this set. That is, if a user is added onto a
+ * lock in a state not from this set, it doesn't immediately enforce
+ * lock to move to this set, but once lock enters this set it will
+ * remain there until all users are removed. Lock users are counted in
+ * cl_lock::cll_users.
+ *
+ * User is used to assure that lock is not canceled or destroyed while
+ * it is being enqueued, or actively used by some IO.
+ *
+ * Currently, a user always comes with a hold (cl_lock_invariant()
+ * checks that a number of holds is not less than a number of users).
+ *
+ * CONCURRENCY
+ *
+ * This is how lock state-machine operates. struct cl_lock contains a mutex
+ * cl_lock::cll_guard that protects struct fields.
+ *
+ * - mutex is taken, and cl_lock::cll_state is examined.
+ *
+ * - for every state there are possible target states where lock can move
+ * into. They are tried in order. Attempts to move into next state are
+ * done by _try() functions in cl_lock.c:cl_{enqueue,unlock,wait}_try().
+ *
+ * - if the transition can be performed immediately, state is changed,
+ * and mutex is released.
+ *
+ * - if the transition requires blocking, _try() function returns
+ * cl_lock_transition::CLO_WAIT. Caller unlocks mutex and goes to
+ * sleep, waiting for possibility of lock state change. It is woken
+ * up when some event occurs, that makes lock state change possible
+ * (e.g., the reception of the reply from the server), and repeats
+ * the loop.
+ *
+ * Top-lock and sub-lock has separate mutexes and the latter has to be taken
+ * first to avoid dead-lock.
+ *
+ * To see an example of interaction of all these issues, take a look at the
+ * lov_cl.c:lov_lock_enqueue() function. It is called as a part of
+ * cl_enqueue_try(), and tries to advance top-lock to ENQUEUED state, by
+ * advancing state-machines of its sub-locks (lov_lock_enqueue_one()). Note
+ * also, that it uses trylock to grab sub-lock mutex to avoid dead-lock. It
+ * also has to handle CEF_ASYNC enqueue, when sub-locks enqueues have to be
+ * done in parallel, rather than one after another (this is used for glimpse
+ * locks, that cannot dead-lock).
+ *
+ * INTERFACE AND USAGE
+ *
+ * struct cl_lock_operations provide a number of call-backs that are invoked
+ * when events of interest occurs. Layers can intercept and handle glimpse,
+ * blocking, cancel ASTs and a reception of the reply from the server.
+ *
+ * One important difference with the old client locking model is that new
+ * client has a representation for the top-lock, whereas in the old code only
+ * sub-locks existed as real data structures and file-level locks are
+ * represented by "request sets" that are created and destroyed on each and
+ * every lock creation.
+ *
+ * Top-locks are cached, and can be found in the cache by the system calls. It
+ * is possible that top-lock is in cache, but some of its sub-locks were
+ * canceled and destroyed. In that case top-lock has to be enqueued again
+ * before it can be used.
+ *
+ * Overall process of the locking during IO operation is as following:
+ *
+ * - once parameters for IO are setup in cl_io, cl_io_operations::cio_lock()
+ * is called on each layer. Responsibility of this method is to add locks,
+ * needed by a given layer into cl_io.ci_lockset.
+ *
+ * - once locks for all layers were collected, they are sorted to avoid
+ * dead-locks (cl_io_locks_sort()), and enqueued.
+ *
+ * - when all locks are acquired, IO is performed;
+ *
+ * - locks are released into cache.
+ *
+ * Striping introduces major additional complexity into locking. The
+ * fundamental problem is that it is generally unsafe to actively use (hold)
+ * two locks on the different OST servers at the same time, as this introduces
+ * inter-server dependency and can lead to cascading evictions.
+ *
+ * Basic solution is to sub-divide large read/write IOs into smaller pieces so
+ * that no multi-stripe locks are taken (note that this design abandons POSIX
+ * read/write semantics). Such pieces ideally can be executed concurrently. At
+ * the same time, certain types of IO cannot be sub-divived, without
+ * sacrificing correctness. This includes:
+ *
+ * - O_APPEND write, where [0, EOF] lock has to be taken, to guarantee
+ * atomicity;
+ *
+ * - ftruncate(fd, offset), where [offset, EOF] lock has to be taken.
+ *
+ * Also, in the case of read(fd, buf, count) or write(fd, buf, count), where
+ * buf is a part of memory mapped Lustre file, a lock or locks protecting buf
+ * has to be held together with the usual lock on [offset, offset + count].
+ *
+ * As multi-stripe locks have to be allowed, it makes sense to cache them, so
+ * that, for example, a sequence of O_APPEND writes can proceed quickly
+ * without going down to the individual stripes to do lock matching. On the
+ * other hand, multi-stripe locks shouldn't be used by normal read/write
+ * calls. To achieve this, every layer can implement ->clo_fits_into() method,
+ * that is called by lock matching code (cl_lock_lookup()), and that can be
+ * used to selectively disable matching of certain locks for certain IOs. For
+ * exmaple, lov layer implements lov_lock_fits_into() that allow multi-stripe
+ * locks to be matched only for truncates and O_APPEND writes.
+ *
+ * Interaction with DLM
+ *
+ * In the expected setup, cl_lock is ultimately backed up by a collection of
+ * DLM locks (struct ldlm_lock). Association between cl_lock and DLM lock is
+ * implemented in osc layer, that also matches DLM events (ASTs, cancellation,
+ * etc.) into cl_lock_operation calls. See struct osc_lock for a more detailed
+ * description of interaction with DLM.
+ */
+
+/**
+ * Lock description.
+ */
+struct cl_lock_descr {
+ /** Object this lock is granted for. */
+ struct cl_object *cld_obj;
+ /** Index of the first page protected by this lock. */
+ pgoff_t cld_start;
+ /** Index of the last page (inclusive) protected by this lock. */
+ pgoff_t cld_end;
+ /** Group ID, for group lock */
+ __u64 cld_gid;
+ /** Lock mode. */
+ enum cl_lock_mode cld_mode;
+ /**
+ * flags to enqueue lock. A combination of bit-flags from
+ * enum cl_enq_flags.
+ */
+ __u32 cld_enq_flags;
+};
+
+#define DDESCR "%s(%d):[%lu, %lu]"
+#define PDESCR(descr) \
+ cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode, \
+ (descr)->cld_start, (descr)->cld_end
+
+const char *cl_lock_mode_name(const enum cl_lock_mode mode);
+
+/**
+ * Lock state-machine states.
+ *
+ * \htmlonly
+ * <pre>
+ *
+ * Possible state transitions:
+ *
+ * +------------------>NEW
+ * | |
+ * | | cl_enqueue_try()
+ * | |
+ * | cl_unuse_try() V
+ * | +--------------QUEUING (*)
+ * | | |
+ * | | | cl_enqueue_try()
+ * | | |
+ * | | cl_unuse_try() V
+ * sub-lock | +-------------ENQUEUED (*)
+ * canceled | | |
+ * | | | cl_wait_try()
+ * | | |
+ * | | (R)
+ * | | |
+ * | | V
+ * | | HELD<---------+
+ * | | | |
+ * | | | | cl_use_try()
+ * | | cl_unuse_try() | |
+ * | | | |
+ * | | V ---+
+ * | +------------>INTRANSIT (D) <--+
+ * | | |
+ * | cl_unuse_try() | | cached lock found
+ * | | | cl_use_try()
+ * | | |
+ * | V |
+ * +------------------CACHED---------+
+ * |
+ * (C)
+ * |
+ * V
+ * FREEING
+ *
+ * Legend:
+ *
+ * In states marked with (*) transition to the same state (i.e., a loop
+ * in the diagram) is possible.
+ *
+ * (R) is the point where Receive call-back is invoked: it allows layers
+ * to handle arrival of lock reply.
+ *
+ * (C) is the point where Cancellation call-back is invoked.
+ *
+ * (D) is the transit state which means the lock is changing.
+ *
+ * Transition to FREEING state is possible from any other state in the
+ * diagram in case of unrecoverable error.
+ * </pre>
+ * \endhtmlonly
+ *
+ * These states are for individual cl_lock object. Top-lock and its sub-locks
+ * can be in the different states. Another way to say this is that we have
+ * nested state-machines.
+ *
+ * Separate QUEUING and ENQUEUED states are needed to support non-blocking
+ * operation for locks with multiple sub-locks. Imagine lock on a file F, that
+ * intersects 3 stripes S0, S1, and S2. To enqueue F client has to send
+ * enqueue to S0, wait for its completion, then send enqueue for S1, wait for
+ * its completion and at last enqueue lock for S2, and wait for its
+ * completion. In that case, top-lock is in QUEUING state while S0, S1 are
+ * handled, and is in ENQUEUED state after enqueue to S2 has been sent (note
+ * that in this case, sub-locks move from state to state, and top-lock remains
+ * in the same state).
+ */
+enum cl_lock_state {
+ /**
+ * Lock that wasn't yet enqueued
+ */
+ CLS_NEW,
+ /**
+ * Enqueue is in progress, blocking for some intermediate interaction
+ * with the other side.
+ */
+ CLS_QUEUING,
+ /**
+ * Lock is fully enqueued, waiting for server to reply when it is
+ * granted.
+ */
+ CLS_ENQUEUED,
+ /**
+ * Lock granted, actively used by some IO.
+ */
+ CLS_HELD,
+ /**
+ * This state is used to mark the lock is being used, or unused.
+ * We need this state because the lock may have several sublocks,
+ * so it's impossible to have an atomic way to bring all sublocks
+ * into CLS_HELD state at use case, or all sublocks to CLS_CACHED
+ * at unuse case.
+ * If a thread is referring to a lock, and it sees the lock is in this
+ * state, it must wait for the lock.
+ * See state diagram for details.
+ */
+ CLS_INTRANSIT,
+ /**
+ * Lock granted, not used.
+ */
+ CLS_CACHED,
+ /**
+ * Lock is being destroyed.
+ */
+ CLS_FREEING,
+ CLS_NR
+};
+
+enum cl_lock_flags {
+ /**
+ * lock has been cancelled. This flag is never cleared once set (by
+ * cl_lock_cancel0()).
+ */
+ CLF_CANCELLED = 1 << 0,
+ /** cancellation is pending for this lock. */
+ CLF_CANCELPEND = 1 << 1,
+ /** destruction is pending for this lock. */
+ CLF_DOOMED = 1 << 2,
+ /** from enqueue RPC reply upcall. */
+ CLF_FROM_UPCALL= 1 << 3,
+};
+
+/**
+ * Lock closure.
+ *
+ * Lock closure is a collection of locks (both top-locks and sub-locks) that
+ * might be updated in a result of an operation on a certain lock (which lock
+ * this is a closure of).
+ *
+ * Closures are needed to guarantee dead-lock freedom in the presence of
+ *
+ * - nested state-machines (top-lock state-machine composed of sub-lock
+ * state-machines), and
+ *
+ * - shared sub-locks.
+ *
+ * Specifically, many operations, such as lock enqueue, wait, unlock,
+ * etc. start from a top-lock, and then operate on a sub-locks of this
+ * top-lock, holding a top-lock mutex. When sub-lock state changes as a result
+ * of such operation, this change has to be propagated to all top-locks that
+ * share this sub-lock. Obviously, no natural lock ordering (e.g.,
+ * top-to-bottom or bottom-to-top) captures this scenario, so try-locking has
+ * to be used. Lock closure systematizes this try-and-repeat logic.
+ */
+struct cl_lock_closure {
+ /**
+ * Lock that is mutexed when closure construction is started. When
+ * closure in is `wait' mode (cl_lock_closure::clc_wait), mutex on
+ * origin is released before waiting.
+ */
+ struct cl_lock *clc_origin;
+ /**
+ * List of enclosed locks, so far. Locks are linked here through
+ * cl_lock::cll_inclosure.
+ */
+ struct list_head clc_list;
+ /**
+ * True iff closure is in a `wait' mode. This determines what
+ * cl_lock_enclosure() does when a lock L to be added to the closure
+ * is currently mutexed by some other thread.
+ *
+ * If cl_lock_closure::clc_wait is not set, then closure construction
+ * fails with CLO_REPEAT immediately.
+ *
+ * In wait mode, cl_lock_enclosure() waits until next attempt to build
+ * a closure might succeed. To this end it releases an origin mutex
+ * (cl_lock_closure::clc_origin), that has to be the only lock mutex
+ * owned by the current thread, and then waits on L mutex (by grabbing
+ * it and immediately releasing), before returning CLO_REPEAT to the
+ * caller.
+ */
+ int clc_wait;
+ /** Number of locks in the closure. */
+ int clc_nr;
+};
+
+/**
+ * Layered client lock.
+ */
+struct cl_lock {
+ /** Reference counter. */
+ atomic_t cll_ref;
+ /** List of slices. Immutable after creation. */
+ struct list_head cll_layers;
+ /**
+ * Linkage into cl_lock::cll_descr::cld_obj::coh_locks list. Protected
+ * by cl_lock::cll_descr::cld_obj::coh_lock_guard.
+ */
+ struct list_head cll_linkage;
+ /**
+ * Parameters of this lock. Protected by
+ * cl_lock::cll_descr::cld_obj::coh_lock_guard nested within
+ * cl_lock::cll_guard. Modified only on lock creation and in
+ * cl_lock_modify().
+ */
+ struct cl_lock_descr cll_descr;
+ /** Protected by cl_lock::cll_guard. */
+ enum cl_lock_state cll_state;
+ /** signals state changes. */
+ wait_queue_head_t cll_wq;
+ /**
+ * Recursive lock, most fields in cl_lock{} are protected by this.
+ *
+ * Locking rules: this mutex is never held across network
+ * communication, except when lock is being canceled.
+ *
+ * Lock ordering: a mutex of a sub-lock is taken first, then a mutex
+ * on a top-lock. Other direction is implemented through a
+ * try-lock-repeat loop. Mutices of unrelated locks can be taken only
+ * by try-locking.
+ *
+ * \see osc_lock_enqueue_wait(), lov_lock_cancel(), lov_sublock_wait().
+ */
+ struct mutex cll_guard;
+ task_t *cll_guarder;
+ int cll_depth;
+
+ /**
+ * the owner for INTRANSIT state
+ */
+ task_t *cll_intransit_owner;
+ int cll_error;
+ /**
+ * Number of holds on a lock. A hold prevents a lock from being
+ * canceled and destroyed. Protected by cl_lock::cll_guard.
+ *
+ * \see cl_lock_hold(), cl_lock_unhold(), cl_lock_release()
+ */
+ int cll_holds;
+ /**
+ * Number of lock users. Valid in cl_lock_state::CLS_HELD state
+ * only. Lock user pins lock in CLS_HELD state. Protected by
+ * cl_lock::cll_guard.
+ *
+ * \see cl_wait(), cl_unuse().
+ */
+ int cll_users;
+ /**
+ * Flag bit-mask. Values from enum cl_lock_flags. Updates are
+ * protected by cl_lock::cll_guard.
+ */
+ unsigned long cll_flags;
+ /**
+ * A linkage into a list of locks in a closure.
+ *
+ * \see cl_lock_closure
+ */
+ struct list_head cll_inclosure;
+ /**
+ * Confict lock at queuing time.
+ */
+ struct cl_lock *cll_conflict;
+ /**
+ * A list of references to this lock, for debugging.
+ */
+ struct lu_ref cll_reference;
+ /**
+ * A list of holds on this lock, for debugging.
+ */
+ struct lu_ref cll_holders;
+ /**
+ * A reference for cl_lock::cll_descr::cld_obj. For debugging.
+ */
+ struct lu_ref_link *cll_obj_ref;
+#ifdef CONFIG_LOCKDEP
+ /* "dep_map" name is assumed by lockdep.h macros. */
+ struct lockdep_map dep_map;
+#endif
+};
+
+/**
+ * Per-layer part of cl_lock
+ *
+ * \see ccc_lock, lov_lock, lovsub_lock, osc_lock
+ */
+struct cl_lock_slice {
+ struct cl_lock *cls_lock;
+ /** Object slice corresponding to this lock slice. Immutable after
+ * creation. */
+ struct cl_object *cls_obj;
+ const struct cl_lock_operations *cls_ops;
+ /** Linkage into cl_lock::cll_layers. Immutable after creation. */
+ struct list_head cls_linkage;
+};
+
+/**
+ * Possible (non-error) return values of ->clo_{enqueue,wait,unlock}().
+ *
+ * NOTE: lov_subresult() depends on ordering here.
+ */
+enum cl_lock_transition {
+ /** operation cannot be completed immediately. Wait for state change. */
+ CLO_WAIT = 1,
+ /** operation had to release lock mutex, restart. */
+ CLO_REPEAT = 2,
+ /** lower layer re-enqueued. */
+ CLO_REENQUEUED = 3,
+};
+
+/**
+ *
+ * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops
+ */
+struct cl_lock_operations {
+ /**
+ * \name statemachine
+ *
+ * State machine transitions. These 3 methods are called to transfer
+ * lock from one state to another, as described in the commentary
+ * above enum #cl_lock_state.
+ *
+ * \retval 0 this layer has nothing more to do to before
+ * transition to the target state happens;
+ *
+ * \retval CLO_REPEAT method had to release and re-acquire cl_lock
+ * mutex, repeat invocation of transition method
+ * across all layers;
+ *
+ * \retval CLO_WAIT this layer cannot move to the target state
+ * immediately, as it has to wait for certain event
+ * (e.g., the communication with the server). It
+ * is guaranteed, that when the state transfer
+ * becomes possible, cl_lock::cll_wq wait-queue
+ * is signaled. Caller can wait for this event by
+ * calling cl_lock_state_wait();
+ *
+ * \retval -ve failure, abort state transition, move the lock
+ * into cl_lock_state::CLS_FREEING state, and set
+ * cl_lock::cll_error.
+ *
+ * Once all layers voted to agree to transition (by returning 0), lock
+ * is moved into corresponding target state. All state transition
+ * methods are optional.
+ */
+ /** @{ */
+ /**
+ * Attempts to enqueue the lock. Called top-to-bottom.
+ *
+ * \see ccc_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
+ * \see osc_lock_enqueue()
+ */
+ int (*clo_enqueue)(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ struct cl_io *io, __u32 enqflags);
+ /**
+ * Attempts to wait for enqueue result. Called top-to-bottom.
+ *
+ * \see ccc_lock_wait(), lov_lock_wait(), osc_lock_wait()
+ */
+ int (*clo_wait)(const struct lu_env *env,
+ const struct cl_lock_slice *slice);
+ /**
+ * Attempts to unlock the lock. Called bottom-to-top. In addition to
+ * usual return values of lock state-machine methods, this can return
+ * -ESTALE to indicate that lock cannot be returned to the cache, and
+ * has to be re-initialized.
+ * unuse is a one-shot operation, so it must NOT return CLO_WAIT.
+ *
+ * \see ccc_lock_unuse(), lov_lock_unuse(), osc_lock_unuse()
+ */
+ int (*clo_unuse)(const struct lu_env *env,
+ const struct cl_lock_slice *slice);
+ /**
+ * Notifies layer that cached lock is started being used.
+ *
+ * \pre lock->cll_state == CLS_CACHED
+ *
+ * \see lov_lock_use(), osc_lock_use()
+ */
+ int (*clo_use)(const struct lu_env *env,
+ const struct cl_lock_slice *slice);
+ /** @} statemachine */
+ /**
+ * A method invoked when lock state is changed (as a result of state
+ * transition). This is used, for example, to track when the state of
+ * a sub-lock changes, to propagate this change to the corresponding
+ * top-lock. Optional
+ *
+ * \see lovsub_lock_state()
+ */
+ void (*clo_state)(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ enum cl_lock_state st);
+ /**
+ * Returns true, iff given lock is suitable for the given io, idea
+ * being, that there are certain "unsafe" locks, e.g., ones acquired
+ * for O_APPEND writes, that we don't want to re-use for a normal
+ * write, to avoid the danger of cascading evictions. Optional. Runs
+ * under cl_object_header::coh_lock_guard.
+ *
+ * XXX this should take more information about lock needed by
+ * io. Probably lock description or something similar.
+ *
+ * \see lov_fits_into()
+ */
+ int (*clo_fits_into)(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ const struct cl_lock_descr *need,
+ const struct cl_io *io);
+ /**
+ * \name ast
+ * Asynchronous System Traps. All of then are optional, all are
+ * executed bottom-to-top.
+ */
+ /** @{ */
+
+ /**
+ * Cancellation callback. Cancel a lock voluntarily, or under
+ * the request of server.
+ */
+ void (*clo_cancel)(const struct lu_env *env,
+ const struct cl_lock_slice *slice);
+ /**
+ * Lock weighting ast. Executed to estimate how precious this lock
+ * is. The sum of results across all layers is used to determine
+ * whether lock worth keeping in cache given present memory usage.
+ *
+ * \see osc_lock_weigh(), vvp_lock_weigh(), lovsub_lock_weigh().
+ */
+ unsigned long (*clo_weigh)(const struct lu_env *env,
+ const struct cl_lock_slice *slice);
+ /** @} ast */
+
+ /**
+ * \see lovsub_lock_closure()
+ */
+ int (*clo_closure)(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ struct cl_lock_closure *closure);
+ /**
+ * Executed bottom-to-top when lock description changes (e.g., as a
+ * result of server granting more generous lock than was requested).
+ *
+ * \see lovsub_lock_modify()
+ */
+ int (*clo_modify)(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ const struct cl_lock_descr *updated);
+ /**
+ * Notifies layers (bottom-to-top) that lock is going to be
+ * destroyed. Responsibility of layers is to prevent new references on
+ * this lock from being acquired once this method returns.
+ *
+ * This can be called multiple times due to the races.
+ *
+ * \see cl_lock_delete()
+ * \see osc_lock_delete(), lovsub_lock_delete()
+ */
+ void (*clo_delete)(const struct lu_env *env,
+ const struct cl_lock_slice *slice);
+ /**
+ * Destructor. Frees resources and the slice.
+ *
+ * \see ccc_lock_fini(), lov_lock_fini(), lovsub_lock_fini(),
+ * \see osc_lock_fini()
+ */
+ void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice);
+ /**
+ * Optional debugging helper. Prints given lock slice.
+ */
+ int (*clo_print)(const struct lu_env *env,
+ void *cookie, lu_printer_t p,
+ const struct cl_lock_slice *slice);
+};
+
+#define CL_LOCK_DEBUG(mask, env, lock, format, ...) \
+do { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
+ \
+ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ cl_lock_print(env, &msgdata, lu_cdebug_printer, lock); \
+ CDEBUG(mask, format , ## __VA_ARGS__); \
+ } \
+} while (0)
+
+#define CL_LOCK_ASSERT(expr, env, lock) do { \
+ if (likely(expr)) \
+ break; \
+ \
+ CL_LOCK_DEBUG(D_ERROR, env, lock, "failed at %s.\n", #expr); \
+ LBUG(); \
+} while (0)
+
+/** @} cl_lock */
+
+/** \addtogroup cl_page_list cl_page_list
+ * Page list used to perform collective operations on a group of pages.
+ *
+ * Pages are added to the list one by one. cl_page_list acquires a reference
+ * for every page in it. Page list is used to perform collective operations on
+ * pages:
+ *
+ * - submit pages for an immediate transfer,
+ *
+ * - own pages on behalf of certain io (waiting for each page in turn),
+ *
+ * - discard pages.
+ *
+ * When list is finalized, it releases references on all pages it still has.
+ *
+ * \todo XXX concurrency control.
+ *
+ * @{
+ */
+struct cl_page_list {
+ unsigned pl_nr;
+ struct list_head pl_pages;
+ task_t *pl_owner;
+};
+
+/**
+ * A 2-queue of pages. A convenience data-type for common use case, 2-queue
+ * contains an incoming page list and an outgoing page list.
+ */
+struct cl_2queue {
+ struct cl_page_list c2_qin;
+ struct cl_page_list c2_qout;
+};
+
+/** @} cl_page_list */
+
+/** \addtogroup cl_io cl_io
+ * @{ */
+/** \struct cl_io
+ * I/O
+ *
+ * cl_io represents a high level I/O activity like
+ * read(2)/write(2)/truncate(2) system call, or cancellation of an extent
+ * lock.
+ *
+ * cl_io is a layered object, much like cl_{object,page,lock} but with one
+ * important distinction. We want to minimize number of calls to the allocator
+ * in the fast path, e.g., in the case of read(2) when everything is cached:
+ * client already owns the lock over region being read, and data are cached
+ * due to read-ahead. To avoid allocation of cl_io layers in such situations,
+ * per-layer io state is stored in the session, associated with the io, see
+ * struct {vvp,lov,osc}_io for example. Sessions allocation is amortized
+ * by using free-lists, see cl_env_get().
+ *
+ * There is a small predefined number of possible io types, enumerated in enum
+ * cl_io_type.
+ *
+ * cl_io is a state machine, that can be advanced concurrently by the multiple
+ * threads. It is up to these threads to control the concurrency and,
+ * specifically, to detect when io is done, and its state can be safely
+ * released.
+ *
+ * For read/write io overall execution plan is as following:
+ *
+ * (0) initialize io state through all layers;
+ *
+ * (1) loop: prepare chunk of work to do
+ *
+ * (2) call all layers to collect locks they need to process current chunk
+ *
+ * (3) sort all locks to avoid dead-locks, and acquire them
+ *
+ * (4) process the chunk: call per-page methods
+ * (cl_io_operations::cio_read_page() for read,
+ * cl_io_operations::cio_prepare_write(),
+ * cl_io_operations::cio_commit_write() for write)
+ *
+ * (5) release locks
+ *
+ * (6) repeat loop.
+ *
+ * To implement the "parallel IO mode", lov layer creates sub-io's (lazily to
+ * address allocation efficiency issues mentioned above), and returns with the
+ * special error condition from per-page method when current sub-io has to
+ * block. This causes io loop to be repeated, and lov switches to the next
+ * sub-io in its cl_io_operations::cio_iter_init() implementation.
+ */
+
+/** IO types */
+enum cl_io_type {
+ /** read system call */
+ CIT_READ,
+ /** write system call */
+ CIT_WRITE,
+ /** truncate, utime system calls */
+ CIT_SETATTR,
+ /**
+ * page fault handling
+ */
+ CIT_FAULT,
+ /**
+ * fsync system call handling
+ * To write out a range of file
+ */
+ CIT_FSYNC,
+ /**
+ * Miscellaneous io. This is used for occasional io activity that
+ * doesn't fit into other types. Currently this is used for:
+ *
+ * - cancellation of an extent lock. This io exists as a context
+ * to write dirty pages from under the lock being canceled back
+ * to the server;
+ *
+ * - VM induced page write-out. An io context for writing page out
+ * for memory cleansing;
+ *
+ * - glimpse. An io context to acquire glimpse lock.
+ *
+ * - grouplock. An io context to acquire group lock.
+ *
+ * CIT_MISC io is used simply as a context in which locks and pages
+ * are manipulated. Such io has no internal "process", that is,
+ * cl_io_loop() is never called for it.
+ */
+ CIT_MISC,
+ CIT_OP_NR
+};
+
+/**
+ * States of cl_io state machine
+ */
+enum cl_io_state {
+ /** Not initialized. */
+ CIS_ZERO,
+ /** Initialized. */
+ CIS_INIT,
+ /** IO iteration started. */
+ CIS_IT_STARTED,
+ /** Locks taken. */
+ CIS_LOCKED,
+ /** Actual IO is in progress. */
+ CIS_IO_GOING,
+ /** IO for the current iteration finished. */
+ CIS_IO_FINISHED,
+ /** Locks released. */
+ CIS_UNLOCKED,
+ /** Iteration completed. */
+ CIS_IT_ENDED,
+ /** cl_io finalized. */
+ CIS_FINI
+};
+
+/**
+ * IO state private for a layer.
+ *
+ * This is usually embedded into layer session data, rather than allocated
+ * dynamically.
+ *
+ * \see vvp_io, lov_io, osc_io, ccc_io
+ */
+struct cl_io_slice {
+ struct cl_io *cis_io;
+ /** corresponding object slice. Immutable after creation. */
+ struct cl_object *cis_obj;
+ /** io operations. Immutable after creation. */
+ const struct cl_io_operations *cis_iop;
+ /**
+ * linkage into a list of all slices for a given cl_io, hanging off
+ * cl_io::ci_layers. Immutable after creation.
+ */
+ struct list_head cis_linkage;
+};
+
+
+/**
+ * Per-layer io operations.
+ * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops
+ */
+struct cl_io_operations {
+ /**
+ * Vector of io state transition methods for every io type.
+ *
+ * \see cl_page_operations::io
+ */
+ struct {
+ /**
+ * Prepare io iteration at a given layer.
+ *
+ * Called top-to-bottom at the beginning of each iteration of
+ * "io loop" (if it makes sense for this type of io). Here
+ * layer selects what work it will do during this iteration.
+ *
+ * \see cl_io_operations::cio_iter_fini()
+ */
+ int (*cio_iter_init) (const struct lu_env *env,
+ const struct cl_io_slice *slice);
+ /**
+ * Finalize io iteration.
+ *
+ * Called bottom-to-top at the end of each iteration of "io
+ * loop". Here layers can decide whether IO has to be
+ * continued.
+ *
+ * \see cl_io_operations::cio_iter_init()
+ */
+ void (*cio_iter_fini) (const struct lu_env *env,
+ const struct cl_io_slice *slice);
+ /**
+ * Collect locks for the current iteration of io.
+ *
+ * Called top-to-bottom to collect all locks necessary for
+ * this iteration. This methods shouldn't actually enqueue
+ * anything, instead it should post a lock through
+ * cl_io_lock_add(). Once all locks are collected, they are
+ * sorted and enqueued in the proper order.
+ */
+ int (*cio_lock) (const struct lu_env *env,
+ const struct cl_io_slice *slice);
+ /**
+ * Finalize unlocking.
+ *
+ * Called bottom-to-top to finish layer specific unlocking
+ * functionality, after generic code released all locks
+ * acquired by cl_io_operations::cio_lock().
+ */
+ void (*cio_unlock)(const struct lu_env *env,
+ const struct cl_io_slice *slice);
+ /**
+ * Start io iteration.
+ *
+ * Once all locks are acquired, called top-to-bottom to
+ * commence actual IO. In the current implementation,
+ * top-level vvp_io_{read,write}_start() does all the work
+ * synchronously by calling generic_file_*(), so other layers
+ * are called when everything is done.
+ */
+ int (*cio_start)(const struct lu_env *env,
+ const struct cl_io_slice *slice);
+ /**
+ * Called top-to-bottom at the end of io loop. Here layer
+ * might wait for an unfinished asynchronous io.
+ */
+ void (*cio_end) (const struct lu_env *env,
+ const struct cl_io_slice *slice);
+ /**
+ * Called bottom-to-top to notify layers that read/write IO
+ * iteration finished, with \a nob bytes transferred.
+ */
+ void (*cio_advance)(const struct lu_env *env,
+ const struct cl_io_slice *slice,
+ size_t nob);
+ /**
+ * Called once per io, bottom-to-top to release io resources.
+ */
+ void (*cio_fini) (const struct lu_env *env,
+ const struct cl_io_slice *slice);
+ } op[CIT_OP_NR];
+ struct {
+ /**
+ * Submit pages from \a queue->c2_qin for IO, and move
+ * successfully submitted pages into \a queue->c2_qout. Return
+ * non-zero if failed to submit even the single page. If
+ * submission failed after some pages were moved into \a
+ * queue->c2_qout, completion callback with non-zero ioret is
+ * executed on them.
+ */
+ int (*cio_submit)(const struct lu_env *env,
+ const struct cl_io_slice *slice,
+ enum cl_req_type crt,
+ struct cl_2queue *queue);
+ } req_op[CRT_NR];
+ /**
+ * Read missing page.
+ *
+ * Called by a top-level cl_io_operations::op[CIT_READ]::cio_start()
+ * method, when it hits not-up-to-date page in the range. Optional.
+ *
+ * \pre io->ci_type == CIT_READ
+ */
+ int (*cio_read_page)(const struct lu_env *env,
+ const struct cl_io_slice *slice,
+ const struct cl_page_slice *page);
+ /**
+ * Prepare write of a \a page. Called bottom-to-top by a top-level
+ * cl_io_operations::op[CIT_WRITE]::cio_start() to prepare page for
+ * get data from user-level buffer.
+ *
+ * \pre io->ci_type == CIT_WRITE
+ *
+ * \see vvp_io_prepare_write(), lov_io_prepare_write(),
+ * osc_io_prepare_write().
+ */
+ int (*cio_prepare_write)(const struct lu_env *env,
+ const struct cl_io_slice *slice,
+ const struct cl_page_slice *page,
+ unsigned from, unsigned to);
+ /**
+ *
+ * \pre io->ci_type == CIT_WRITE
+ *
+ * \see vvp_io_commit_write(), lov_io_commit_write(),
+ * osc_io_commit_write().
+ */
+ int (*cio_commit_write)(const struct lu_env *env,
+ const struct cl_io_slice *slice,
+ const struct cl_page_slice *page,
+ unsigned from, unsigned to);
+ /**
+ * Optional debugging helper. Print given io slice.
+ */
+ int (*cio_print)(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct cl_io_slice *slice);
+};
+
+/**
+ * Flags to lock enqueue procedure.
+ * \ingroup cl_lock
+ */
+enum cl_enq_flags {
+ /**
+ * instruct server to not block, if conflicting lock is found. Instead
+ * -EWOULDBLOCK is returned immediately.
+ */
+ CEF_NONBLOCK = 0x00000001,
+ /**
+ * take lock asynchronously (out of order), as it cannot
+ * deadlock. This is for LDLM_FL_HAS_INTENT locks used for glimpsing.
+ */
+ CEF_ASYNC = 0x00000002,
+ /**
+ * tell the server to instruct (though a flag in the blocking ast) an
+ * owner of the conflicting lock, that it can drop dirty pages
+ * protected by this lock, without sending them to the server.
+ */
+ CEF_DISCARD_DATA = 0x00000004,
+ /**
+ * tell the sub layers that it must be a `real' lock. This is used for
+ * mmapped-buffer locks and glimpse locks that must be never converted
+ * into lockless mode.
+ *
+ * \see vvp_mmap_locks(), cl_glimpse_lock().
+ */
+ CEF_MUST = 0x00000008,
+ /**
+ * tell the sub layers that never request a `real' lock. This flag is
+ * not used currently.
+ *
+ * cl_io::ci_lockreq and CEF_{MUST,NEVER} flags specify lockless
+ * conversion policy: ci_lockreq describes generic information of lock
+ * requirement for this IO, especially for locks which belong to the
+ * object doing IO; however, lock itself may have precise requirements
+ * that are described by the enqueue flags.
+ */
+ CEF_NEVER = 0x00000010,
+ /**
+ * for async glimpse lock.
+ */
+ CEF_AGL = 0x00000020,
+ /**
+ * mask of enq_flags.
+ */
+ CEF_MASK = 0x0000003f,
+};
+
+/**
+ * Link between lock and io. Intermediate structure is needed, because the
+ * same lock can be part of multiple io's simultaneously.
+ */
+struct cl_io_lock_link {
+ /** linkage into one of cl_lockset lists. */
+ struct list_head cill_linkage;
+ struct cl_lock_descr cill_descr;
+ struct cl_lock *cill_lock;
+ /** optional destructor */
+ void (*cill_fini)(const struct lu_env *env,
+ struct cl_io_lock_link *link);
+};
+
+/**
+ * Lock-set represents a collection of locks, that io needs at a
+ * time. Generally speaking, client tries to avoid holding multiple locks when
+ * possible, because
+ *
+ * - holding extent locks over multiple ost's introduces the danger of
+ * "cascading timeouts";
+ *
+ * - holding multiple locks over the same ost is still dead-lock prone,
+ * see comment in osc_lock_enqueue(),
+ *
+ * but there are certain situations where this is unavoidable:
+ *
+ * - O_APPEND writes have to take [0, EOF] lock for correctness;
+ *
+ * - truncate has to take [new-size, EOF] lock for correctness;
+ *
+ * - SNS has to take locks across full stripe for correctness;
+ *
+ * - in the case when user level buffer, supplied to {read,write}(file0),
+ * is a part of a memory mapped lustre file, client has to take a dlm
+ * locks on file0, and all files that back up the buffer (or a part of
+ * the buffer, that is being processed in the current chunk, in any
+ * case, there are situations where at least 2 locks are necessary).
+ *
+ * In such cases we at least try to take locks in the same consistent
+ * order. To this end, all locks are first collected, then sorted, and then
+ * enqueued.
+ */
+struct cl_lockset {
+ /** locks to be acquired. */
+ struct list_head cls_todo;
+ /** locks currently being processed. */
+ struct list_head cls_curr;
+ /** locks acquired. */
+ struct list_head cls_done;
+};
+
+/**
+ * Lock requirements(demand) for IO. It should be cl_io_lock_req,
+ * but 'req' is always to be thought as 'request' :-)
+ */
+enum cl_io_lock_dmd {
+ /** Always lock data (e.g., O_APPEND). */
+ CILR_MANDATORY = 0,
+ /** Layers are free to decide between local and global locking. */
+ CILR_MAYBE,
+ /** Never lock: there is no cache (e.g., liblustre). */
+ CILR_NEVER
+};
+
+enum cl_fsync_mode {
+ /** start writeback, do not wait for them to finish */
+ CL_FSYNC_NONE = 0,
+ /** start writeback and wait for them to finish */
+ CL_FSYNC_LOCAL = 1,
+ /** discard all of dirty pages in a specific file range */
+ CL_FSYNC_DISCARD = 2,
+ /** start writeback and make sure they have reached storage before
+ * return. OST_SYNC RPC must be issued and finished */
+ CL_FSYNC_ALL = 3
+};
+
+struct cl_io_rw_common {
+ loff_t crw_pos;
+ size_t crw_count;
+ int crw_nonblock;
+};
+
+
+/**
+ * State for io.
+ *
+ * cl_io is shared by all threads participating in this IO (in current
+ * implementation only one thread advances IO, but parallel IO design and
+ * concurrent copy_*_user() require multiple threads acting on the same IO. It
+ * is up to these threads to serialize their activities, including updates to
+ * mutable cl_io fields.
+ */
+struct cl_io {
+ /** type of this IO. Immutable after creation. */
+ enum cl_io_type ci_type;
+ /** current state of cl_io state machine. */
+ enum cl_io_state ci_state;
+ /** main object this io is against. Immutable after creation. */
+ struct cl_object *ci_obj;
+ /**
+ * Upper layer io, of which this io is a part of. Immutable after
+ * creation.
+ */
+ struct cl_io *ci_parent;
+ /** List of slices. Immutable after creation. */
+ struct list_head ci_layers;
+ /** list of locks (to be) acquired by this io. */
+ struct cl_lockset ci_lockset;
+ /** lock requirements, this is just a help info for sublayers. */
+ enum cl_io_lock_dmd ci_lockreq;
+ union {
+ struct cl_rd_io {
+ struct cl_io_rw_common rd;
+ } ci_rd;
+ struct cl_wr_io {
+ struct cl_io_rw_common wr;
+ int wr_append;
+ int wr_sync;
+ } ci_wr;
+ struct cl_io_rw_common ci_rw;
+ struct cl_setattr_io {
+ struct ost_lvb sa_attr;
+ unsigned int sa_valid;
+ struct obd_capa *sa_capa;
+ } ci_setattr;
+ struct cl_fault_io {
+ /** page index within file. */
+ pgoff_t ft_index;
+ /** bytes valid byte on a faulted page. */
+ int ft_nob;
+ /** writable page? for nopage() only */
+ int ft_writable;
+ /** page of an executable? */
+ int ft_executable;
+ /** page_mkwrite() */
+ int ft_mkwrite;
+ /** resulting page */
+ struct cl_page *ft_page;
+ } ci_fault;
+ struct cl_fsync_io {
+ loff_t fi_start;
+ loff_t fi_end;
+ struct obd_capa *fi_capa;
+ /** file system level fid */
+ struct lu_fid *fi_fid;
+ enum cl_fsync_mode fi_mode;
+ /* how many pages were written/discarded */
+ unsigned int fi_nr_written;
+ } ci_fsync;
+ } u;
+ struct cl_2queue ci_queue;
+ size_t ci_nob;
+ int ci_result;
+ unsigned int ci_continue:1,
+ /**
+ * This io has held grouplock, to inform sublayers that
+ * don't do lockless i/o.
+ */
+ ci_no_srvlock:1,
+ /**
+ * The whole IO need to be restarted because layout has been changed
+ */
+ ci_need_restart:1,
+ /**
+ * to not refresh layout - the IO issuer knows that the layout won't
+ * change(page operations, layout change causes all page to be
+ * discarded), or it doesn't matter if it changes(sync).
+ */
+ ci_ignore_layout:1,
+ /**
+ * Check if layout changed after the IO finishes. Mainly for HSM
+ * requirement. If IO occurs to openning files, it doesn't need to
+ * verify layout because HSM won't release openning files.
+ * Right now, only two opertaions need to verify layout: glimpse
+ * and setattr.
+ */
+ ci_verify_layout:1;
+ /**
+ * Number of pages owned by this IO. For invariant checking.
+ */
+ unsigned ci_owned_nr;
+};
+
+/** @} cl_io */
+
+/** \addtogroup cl_req cl_req
+ * @{ */
+/** \struct cl_req
+ * Transfer.
+ *
+ * There are two possible modes of transfer initiation on the client:
+ *
+ * - immediate transfer: this is started when a high level io wants a page
+ * or a collection of pages to be transferred right away. Examples:
+ * read-ahead, synchronous read in the case of non-page aligned write,
+ * page write-out as a part of extent lock cancellation, page write-out
+ * as a part of memory cleansing. Immediate transfer can be both
+ * cl_req_type::CRT_READ and cl_req_type::CRT_WRITE;
+ *
+ * - opportunistic transfer (cl_req_type::CRT_WRITE only), that happens
+ * when io wants to transfer a page to the server some time later, when
+ * it can be done efficiently. Example: pages dirtied by the write(2)
+ * path.
+ *
+ * In any case, transfer takes place in the form of a cl_req, which is a
+ * representation for a network RPC.
+ *
+ * Pages queued for an opportunistic transfer are cached until it is decided
+ * that efficient RPC can be composed of them. This decision is made by "a
+ * req-formation engine", currently implemented as a part of osc
+ * layer. Req-formation depends on many factors: the size of the resulting
+ * RPC, whether or not multi-object RPCs are supported by the server,
+ * max-rpc-in-flight limitations, size of the dirty cache, etc.
+ *
+ * For the immediate transfer io submits a cl_page_list, that req-formation
+ * engine slices into cl_req's, possibly adding cached pages to some of
+ * the resulting req's.
+ *
+ * Whenever a page from cl_page_list is added to a newly constructed req, its
+ * cl_page_operations::cpo_prep() layer methods are called. At that moment,
+ * page state is atomically changed from cl_page_state::CPS_OWNED to
+ * cl_page_state::CPS_PAGEOUT or cl_page_state::CPS_PAGEIN, cl_page::cp_owner
+ * is zeroed, and cl_page::cp_req is set to the
+ * req. cl_page_operations::cpo_prep() method at the particular layer might
+ * return -EALREADY to indicate that it does not need to submit this page
+ * at all. This is possible, for example, if page, submitted for read,
+ * became up-to-date in the meantime; and for write, the page don't have
+ * dirty bit marked. \see cl_io_submit_rw()
+ *
+ * Whenever a cached page is added to a newly constructed req, its
+ * cl_page_operations::cpo_make_ready() layer methods are called. At that
+ * moment, page state is atomically changed from cl_page_state::CPS_CACHED to
+ * cl_page_state::CPS_PAGEOUT, and cl_page::cp_req is set to
+ * req. cl_page_operations::cpo_make_ready() method at the particular layer
+ * might return -EAGAIN to indicate that this page is not eligible for the
+ * transfer right now.
+ *
+ * FUTURE
+ *
+ * Plan is to divide transfers into "priority bands" (indicated when
+ * submitting cl_page_list, and queuing a page for the opportunistic transfer)
+ * and allow glueing of cached pages to immediate transfers only within single
+ * band. This would make high priority transfers (like lock cancellation or
+ * memory pressure induced write-out) really high priority.
+ *
+ */
+
+/**
+ * Per-transfer attributes.
+ */
+struct cl_req_attr {
+ /** Generic attributes for the server consumption. */
+ struct obdo *cra_oa;
+ /** Capability. */
+ struct obd_capa *cra_capa;
+ /** Jobid */
+ char cra_jobid[JOBSTATS_JOBID_SIZE];
+};
+
+/**
+ * Transfer request operations definable at every layer.
+ *
+ * Concurrency: transfer formation engine synchronizes calls to all transfer
+ * methods.
+ */
+struct cl_req_operations {
+ /**
+ * Invoked top-to-bottom by cl_req_prep() when transfer formation is
+ * complete (all pages are added).
+ *
+ * \see osc_req_prep()
+ */
+ int (*cro_prep)(const struct lu_env *env,
+ const struct cl_req_slice *slice);
+ /**
+ * Called top-to-bottom to fill in \a oa fields. This is called twice
+ * with different flags, see bug 10150 and osc_build_req().
+ *
+ * \param obj an object from cl_req which attributes are to be set in
+ * \a oa.
+ *
+ * \param oa struct obdo where attributes are placed
+ *
+ * \param flags \a oa fields to be filled.
+ */
+ void (*cro_attr_set)(const struct lu_env *env,
+ const struct cl_req_slice *slice,
+ const struct cl_object *obj,
+ struct cl_req_attr *attr, obd_valid flags);
+ /**
+ * Called top-to-bottom from cl_req_completion() to notify layers that
+ * transfer completed. Has to free all state allocated by
+ * cl_device_operations::cdo_req_init().
+ */
+ void (*cro_completion)(const struct lu_env *env,
+ const struct cl_req_slice *slice, int ioret);
+};
+
+/**
+ * A per-object state that (potentially multi-object) transfer request keeps.
+ */
+struct cl_req_obj {
+ /** object itself */
+ struct cl_object *ro_obj;
+ /** reference to cl_req_obj::ro_obj. For debugging. */
+ struct lu_ref_link *ro_obj_ref;
+ /* something else? Number of pages for a given object? */
+};
+
+/**
+ * Transfer request.
+ *
+ * Transfer requests are not reference counted, because IO sub-system owns
+ * them exclusively and knows when to free them.
+ *
+ * Life cycle.
+ *
+ * cl_req is created by cl_req_alloc() that calls
+ * cl_device_operations::cdo_req_init() device methods to allocate per-req
+ * state in every layer.
+ *
+ * Then pages are added (cl_req_page_add()), req keeps track of all objects it
+ * contains pages for.
+ *
+ * Once all pages were collected, cl_page_operations::cpo_prep() method is
+ * called top-to-bottom. At that point layers can modify req, let it pass, or
+ * deny it completely. This is to support things like SNS that have transfer
+ * ordering requirements invisible to the individual req-formation engine.
+ *
+ * On transfer completion (or transfer timeout, or failure to initiate the
+ * transfer of an allocated req), cl_req_operations::cro_completion() method
+ * is called, after execution of cl_page_operations::cpo_completion() of all
+ * req's pages.
+ */
+struct cl_req {
+ enum cl_req_type crq_type;
+ /** A list of pages being transfered */
+ struct list_head crq_pages;
+ /** Number of pages in cl_req::crq_pages */
+ unsigned crq_nrpages;
+ /** An array of objects which pages are in ->crq_pages */
+ struct cl_req_obj *crq_o;
+ /** Number of elements in cl_req::crq_objs[] */
+ unsigned crq_nrobjs;
+ struct list_head crq_layers;
+};
+
+/**
+ * Per-layer state for request.
+ */
+struct cl_req_slice {
+ struct cl_req *crs_req;
+ struct cl_device *crs_dev;
+ struct list_head crs_linkage;
+ const struct cl_req_operations *crs_ops;
+};
+
+/* @} cl_req */
+
+enum cache_stats_item {
+ /** how many cache lookups were performed */
+ CS_lookup = 0,
+ /** how many times cache lookup resulted in a hit */
+ CS_hit,
+ /** how many entities are in the cache right now */
+ CS_total,
+ /** how many entities in the cache are actively used (and cannot be
+ * evicted) right now */
+ CS_busy,
+ /** how many entities were created at all */
+ CS_create,
+ CS_NR
+};
+
+#define CS_NAMES { "lookup", "hit", "total", "busy", "create" }
+
+/**
+ * Stats for a generic cache (similar to inode, lu_object, etc. caches).
+ */
+struct cache_stats {
+ const char *cs_name;
+ atomic_t cs_stats[CS_NR];
+};
+
+/** These are not exported so far */
+void cache_stats_init (struct cache_stats *cs, const char *name);
+
+/**
+ * Client-side site. This represents particular client stack. "Global"
+ * variables should (directly or indirectly) be added here to allow multiple
+ * clients to co-exist in the single address space.
+ */
+struct cl_site {
+ struct lu_site cs_lu;
+ /**
+ * Statistical counters. Atomics do not scale, something better like
+ * per-cpu counters is needed.
+ *
+ * These are exported as /proc/fs/lustre/llite/.../site
+ *
+ * When interpreting keep in mind that both sub-locks (and sub-pages)
+ * and top-locks (and top-pages) are accounted here.
+ */
+ struct cache_stats cs_pages;
+ struct cache_stats cs_locks;
+ atomic_t cs_pages_state[CPS_NR];
+ atomic_t cs_locks_state[CLS_NR];
+};
+
+int cl_site_init (struct cl_site *s, struct cl_device *top);
+void cl_site_fini (struct cl_site *s);
+void cl_stack_fini(const struct lu_env *env, struct cl_device *cl);
+
+/**
+ * Output client site statistical counters into a buffer. Suitable for
+ * ll_rd_*()-style functions.
+ */
+int cl_site_stats_print(const struct cl_site *site, struct seq_file *m);
+
+/**
+ * \name helpers
+ *
+ * Type conversion and accessory functions.
+ */
+/** @{ */
+
+static inline struct cl_site *lu2cl_site(const struct lu_site *site)
+{
+ return container_of(site, struct cl_site, cs_lu);
+}
+
+static inline int lu_device_is_cl(const struct lu_device *d)
+{
+ return d->ld_type->ldt_tags & LU_DEVICE_CL;
+}
+
+static inline struct cl_device *lu2cl_dev(const struct lu_device *d)
+{
+ LASSERT(d == NULL || IS_ERR(d) || lu_device_is_cl(d));
+ return container_of0(d, struct cl_device, cd_lu_dev);
+}
+
+static inline struct lu_device *cl2lu_dev(struct cl_device *d)
+{
+ return &d->cd_lu_dev;
+}
+
+static inline struct cl_object *lu2cl(const struct lu_object *o)
+{
+ LASSERT(o == NULL || IS_ERR(o) || lu_device_is_cl(o->lo_dev));
+ return container_of0(o, struct cl_object, co_lu);
+}
+
+static inline const struct cl_object_conf *
+lu2cl_conf(const struct lu_object_conf *conf)
+{
+ return container_of0(conf, struct cl_object_conf, coc_lu);
+}
+
+static inline struct cl_object *cl_object_next(const struct cl_object *obj)
+{
+ return obj ? lu2cl(lu_object_next(&obj->co_lu)) : NULL;
+}
+
+static inline struct cl_device *cl_object_device(const struct cl_object *o)
+{
+ LASSERT(o == NULL || IS_ERR(o) || lu_device_is_cl(o->co_lu.lo_dev));
+ return container_of0(o->co_lu.lo_dev, struct cl_device, cd_lu_dev);
+}
+
+static inline struct cl_object_header *luh2coh(const struct lu_object_header *h)
+{
+ return container_of0(h, struct cl_object_header, coh_lu);
+}
+
+static inline struct cl_site *cl_object_site(const struct cl_object *obj)
+{
+ return lu2cl_site(obj->co_lu.lo_dev->ld_site);
+}
+
+static inline
+struct cl_object_header *cl_object_header(const struct cl_object *obj)
+{
+ return luh2coh(obj->co_lu.lo_header);
+}
+
+static inline int cl_device_init(struct cl_device *d, struct lu_device_type *t)
+{
+ return lu_device_init(&d->cd_lu_dev, t);
+}
+
+static inline void cl_device_fini(struct cl_device *d)
+{
+ lu_device_fini(&d->cd_lu_dev);
+}
+
+void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
+ struct cl_object *obj,
+ const struct cl_page_operations *ops);
+void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
+ struct cl_object *obj,
+ const struct cl_lock_operations *ops);
+void cl_io_slice_add(struct cl_io *io, struct cl_io_slice *slice,
+ struct cl_object *obj, const struct cl_io_operations *ops);
+void cl_req_slice_add(struct cl_req *req, struct cl_req_slice *slice,
+ struct cl_device *dev,
+ const struct cl_req_operations *ops);
+/** @} helpers */
+
+/** \defgroup cl_object cl_object
+ * @{ */
+struct cl_object *cl_object_top (struct cl_object *o);
+struct cl_object *cl_object_find(const struct lu_env *env, struct cl_device *cd,
+ const struct lu_fid *fid,
+ const struct cl_object_conf *c);
+
+int cl_object_header_init(struct cl_object_header *h);
+void cl_object_header_fini(struct cl_object_header *h);
+void cl_object_put (const struct lu_env *env, struct cl_object *o);
+void cl_object_get (struct cl_object *o);
+void cl_object_attr_lock (struct cl_object *o);
+void cl_object_attr_unlock(struct cl_object *o);
+int cl_object_attr_get (const struct lu_env *env, struct cl_object *obj,
+ struct cl_attr *attr);
+int cl_object_attr_set (const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned valid);
+int cl_object_glimpse (const struct lu_env *env, struct cl_object *obj,
+ struct ost_lvb *lvb);
+int cl_conf_set (const struct lu_env *env, struct cl_object *obj,
+ const struct cl_object_conf *conf);
+void cl_object_prune (const struct lu_env *env, struct cl_object *obj);
+void cl_object_kill (const struct lu_env *env, struct cl_object *obj);
+int cl_object_has_locks (struct cl_object *obj);
+
+/**
+ * Returns true, iff \a o0 and \a o1 are slices of the same object.
+ */
+static inline int cl_object_same(struct cl_object *o0, struct cl_object *o1)
+{
+ return cl_object_header(o0) == cl_object_header(o1);
+}
+
+static inline void cl_object_page_init(struct cl_object *clob, int size)
+{
+ clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize;
+ cl_object_header(clob)->coh_page_bufsize += ALIGN(size, 8);
+}
+
+static inline void *cl_object_page_slice(struct cl_object *clob,
+ struct cl_page *page)
+{
+ return (void *)((char *)page + clob->co_slice_off);
+}
+
+/** @} cl_object */
+
+/** \defgroup cl_page cl_page
+ * @{ */
+enum {
+ CLP_GANG_OKAY = 0,
+ CLP_GANG_RESCHED,
+ CLP_GANG_AGAIN,
+ CLP_GANG_ABORT
+};
+
+/* callback of cl_page_gang_lookup() */
+typedef int (*cl_page_gang_cb_t) (const struct lu_env *, struct cl_io *,
+ struct cl_page *, void *);
+int cl_page_gang_lookup (const struct lu_env *env,
+ struct cl_object *obj,
+ struct cl_io *io,
+ pgoff_t start, pgoff_t end,
+ cl_page_gang_cb_t cb, void *cbdata);
+struct cl_page *cl_page_lookup (struct cl_object_header *hdr,
+ pgoff_t index);
+struct cl_page *cl_page_find (const struct lu_env *env,
+ struct cl_object *obj,
+ pgoff_t idx, struct page *vmpage,
+ enum cl_page_type type);
+struct cl_page *cl_page_find_sub (const struct lu_env *env,
+ struct cl_object *obj,
+ pgoff_t idx, struct page *vmpage,
+ struct cl_page *parent);
+void cl_page_get (struct cl_page *page);
+void cl_page_put (const struct lu_env *env,
+ struct cl_page *page);
+void cl_page_print (const struct lu_env *env, void *cookie,
+ lu_printer_t printer,
+ const struct cl_page *pg);
+void cl_page_header_print(const struct lu_env *env, void *cookie,
+ lu_printer_t printer,
+ const struct cl_page *pg);
+struct page *cl_page_vmpage (const struct lu_env *env,
+ struct cl_page *page);
+struct cl_page *cl_vmpage_page (struct page *vmpage, struct cl_object *obj);
+struct cl_page *cl_page_top (struct cl_page *page);
+
+const struct cl_page_slice *cl_page_at(const struct cl_page *page,
+ const struct lu_device_type *dtype);
+
+/**
+ * \name ownership
+ *
+ * Functions dealing with the ownership of page by io.
+ */
+/** @{ */
+
+int cl_page_own (const struct lu_env *env,
+ struct cl_io *io, struct cl_page *page);
+int cl_page_own_try (const struct lu_env *env,
+ struct cl_io *io, struct cl_page *page);
+void cl_page_assume (const struct lu_env *env,
+ struct cl_io *io, struct cl_page *page);
+void cl_page_unassume (const struct lu_env *env,
+ struct cl_io *io, struct cl_page *pg);
+void cl_page_disown (const struct lu_env *env,
+ struct cl_io *io, struct cl_page *page);
+int cl_page_is_owned (const struct cl_page *pg, const struct cl_io *io);
+
+/** @} ownership */
+
+/**
+ * \name transfer
+ *
+ * Functions dealing with the preparation of a page for a transfer, and
+ * tracking transfer state.
+ */
+/** @{ */
+int cl_page_prep (const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg, enum cl_req_type crt);
+void cl_page_completion (const struct lu_env *env,
+ struct cl_page *pg, enum cl_req_type crt, int ioret);
+int cl_page_make_ready (const struct lu_env *env, struct cl_page *pg,
+ enum cl_req_type crt);
+int cl_page_cache_add (const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg, enum cl_req_type crt);
+void cl_page_clip (const struct lu_env *env, struct cl_page *pg,
+ int from, int to);
+int cl_page_cancel (const struct lu_env *env, struct cl_page *page);
+int cl_page_flush (const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg);
+
+/** @} transfer */
+
+
+/**
+ * \name helper routines
+ * Functions to discard, delete and export a cl_page.
+ */
+/** @{ */
+void cl_page_discard (const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg);
+void cl_page_delete (const struct lu_env *env, struct cl_page *pg);
+int cl_page_unmap (const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg);
+int cl_page_is_vmlocked (const struct lu_env *env,
+ const struct cl_page *pg);
+void cl_page_export (const struct lu_env *env,
+ struct cl_page *pg, int uptodate);
+int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page);
+loff_t cl_offset (const struct cl_object *obj, pgoff_t idx);
+pgoff_t cl_index (const struct cl_object *obj, loff_t offset);
+int cl_page_size (const struct cl_object *obj);
+int cl_pages_prune (const struct lu_env *env, struct cl_object *obj);
+
+void cl_lock_print (const struct lu_env *env, void *cookie,
+ lu_printer_t printer, const struct cl_lock *lock);
+void cl_lock_descr_print(const struct lu_env *env, void *cookie,
+ lu_printer_t printer,
+ const struct cl_lock_descr *descr);
+/* @} helper */
+
+/** @} cl_page */
+
+/** \defgroup cl_lock cl_lock
+ * @{ */
+
+struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
+ const struct cl_lock_descr *need,
+ const char *scope, const void *source);
+struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
+ const struct cl_lock_descr *need,
+ const char *scope, const void *source);
+struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
+ const struct cl_lock_descr *need,
+ const char *scope, const void *source);
+struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
+ struct cl_object *obj, pgoff_t index,
+ struct cl_lock *except, int pending,
+ int canceld);
+static inline struct cl_lock *cl_lock_at_page(const struct lu_env *env,
+ struct cl_object *obj,
+ struct cl_page *page,
+ struct cl_lock *except,
+ int pending, int canceld)
+{
+ LASSERT(cl_object_header(obj) == cl_object_header(page->cp_obj));
+ return cl_lock_at_pgoff(env, obj, page->cp_index, except,
+ pending, canceld);
+}
+
+const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
+ const struct lu_device_type *dtype);
+
+void cl_lock_get (struct cl_lock *lock);
+void cl_lock_get_trust (struct cl_lock *lock);
+void cl_lock_put (const struct lu_env *env, struct cl_lock *lock);
+void cl_lock_hold_add (const struct lu_env *env, struct cl_lock *lock,
+ const char *scope, const void *source);
+void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
+ const char *scope, const void *source);
+void cl_lock_unhold (const struct lu_env *env, struct cl_lock *lock,
+ const char *scope, const void *source);
+void cl_lock_release (const struct lu_env *env, struct cl_lock *lock,
+ const char *scope, const void *source);
+void cl_lock_user_add (const struct lu_env *env, struct cl_lock *lock);
+void cl_lock_user_del (const struct lu_env *env, struct cl_lock *lock);
+
+enum cl_lock_state cl_lock_intransit(const struct lu_env *env,
+ struct cl_lock *lock);
+void cl_lock_extransit(const struct lu_env *env, struct cl_lock *lock,
+ enum cl_lock_state state);
+int cl_lock_is_intransit(struct cl_lock *lock);
+
+int cl_lock_enqueue_wait(const struct lu_env *env, struct cl_lock *lock,
+ int keep_mutex);
+
+/** \name statemachine statemachine
+ * Interface to lock state machine consists of 3 parts:
+ *
+ * - "try" functions that attempt to effect a state transition. If state
+ * transition is not possible right now (e.g., if it has to wait for some
+ * asynchronous event to occur), these functions return
+ * cl_lock_transition::CLO_WAIT.
+ *
+ * - "non-try" functions that implement synchronous blocking interface on
+ * top of non-blocking "try" functions. These functions repeatedly call
+ * corresponding "try" versions, and if state transition is not possible
+ * immediately, wait for lock state change.
+ *
+ * - methods from cl_lock_operations, called by "try" functions. Lock can
+ * be advanced to the target state only when all layers voted that they
+ * are ready for this transition. "Try" functions call methods under lock
+ * mutex. If a layer had to release a mutex, it re-acquires it and returns
+ * cl_lock_transition::CLO_REPEAT, causing "try" function to call all
+ * layers again.
+ *
+ * TRY NON-TRY METHOD FINAL STATE
+ *
+ * cl_enqueue_try() cl_enqueue() cl_lock_operations::clo_enqueue() CLS_ENQUEUED
+ *
+ * cl_wait_try() cl_wait() cl_lock_operations::clo_wait() CLS_HELD
+ *
+ * cl_unuse_try() cl_unuse() cl_lock_operations::clo_unuse() CLS_CACHED
+ *
+ * cl_use_try() NONE cl_lock_operations::clo_use() CLS_HELD
+ *
+ * @{ */
+
+int cl_enqueue (const struct lu_env *env, struct cl_lock *lock,
+ struct cl_io *io, __u32 flags);
+int cl_wait (const struct lu_env *env, struct cl_lock *lock);
+void cl_unuse (const struct lu_env *env, struct cl_lock *lock);
+int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
+ struct cl_io *io, __u32 flags);
+int cl_unuse_try (const struct lu_env *env, struct cl_lock *lock);
+int cl_wait_try (const struct lu_env *env, struct cl_lock *lock);
+int cl_use_try (const struct lu_env *env, struct cl_lock *lock, int atomic);
+
+/** @} statemachine */
+
+void cl_lock_signal (const struct lu_env *env, struct cl_lock *lock);
+int cl_lock_state_wait (const struct lu_env *env, struct cl_lock *lock);
+void cl_lock_state_set (const struct lu_env *env, struct cl_lock *lock,
+ enum cl_lock_state state);
+int cl_queue_match (const struct list_head *queue,
+ const struct cl_lock_descr *need);
+
+void cl_lock_mutex_get (const struct lu_env *env, struct cl_lock *lock);
+int cl_lock_mutex_try (const struct lu_env *env, struct cl_lock *lock);
+void cl_lock_mutex_put (const struct lu_env *env, struct cl_lock *lock);
+int cl_lock_is_mutexed (struct cl_lock *lock);
+int cl_lock_nr_mutexed (const struct lu_env *env);
+int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock);
+int cl_lock_ext_match (const struct cl_lock_descr *has,
+ const struct cl_lock_descr *need);
+int cl_lock_descr_match(const struct cl_lock_descr *has,
+ const struct cl_lock_descr *need);
+int cl_lock_mode_match (enum cl_lock_mode has, enum cl_lock_mode need);
+int cl_lock_modify (const struct lu_env *env, struct cl_lock *lock,
+ const struct cl_lock_descr *desc);
+
+void cl_lock_closure_init (const struct lu_env *env,
+ struct cl_lock_closure *closure,
+ struct cl_lock *origin, int wait);
+void cl_lock_closure_fini (struct cl_lock_closure *closure);
+int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
+ struct cl_lock_closure *closure);
+void cl_lock_disclosure (const struct lu_env *env,
+ struct cl_lock_closure *closure);
+int cl_lock_enclosure (const struct lu_env *env, struct cl_lock *lock,
+ struct cl_lock_closure *closure);
+
+void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock);
+void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock);
+void cl_lock_error (const struct lu_env *env, struct cl_lock *lock, int error);
+void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int wait);
+
+unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock);
+
+/** @} cl_lock */
+
+/** \defgroup cl_io cl_io
+ * @{ */
+
+int cl_io_init (const struct lu_env *env, struct cl_io *io,
+ enum cl_io_type iot, struct cl_object *obj);
+int cl_io_sub_init (const struct lu_env *env, struct cl_io *io,
+ enum cl_io_type iot, struct cl_object *obj);
+int cl_io_rw_init (const struct lu_env *env, struct cl_io *io,
+ enum cl_io_type iot, loff_t pos, size_t count);
+int cl_io_loop (const struct lu_env *env, struct cl_io *io);
+
+void cl_io_fini (const struct lu_env *env, struct cl_io *io);
+int cl_io_iter_init (const struct lu_env *env, struct cl_io *io);
+void cl_io_iter_fini (const struct lu_env *env, struct cl_io *io);
+int cl_io_lock (const struct lu_env *env, struct cl_io *io);
+void cl_io_unlock (const struct lu_env *env, struct cl_io *io);
+int cl_io_start (const struct lu_env *env, struct cl_io *io);
+void cl_io_end (const struct lu_env *env, struct cl_io *io);
+int cl_io_lock_add (const struct lu_env *env, struct cl_io *io,
+ struct cl_io_lock_link *link);
+int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
+ struct cl_lock_descr *descr);
+int cl_io_read_page (const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page);
+int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, unsigned from, unsigned to);
+int cl_io_commit_write (const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, unsigned from, unsigned to);
+int cl_io_submit_rw (const struct lu_env *env, struct cl_io *io,
+ enum cl_req_type iot, struct cl_2queue *queue);
+int cl_io_submit_sync (const struct lu_env *env, struct cl_io *io,
+ enum cl_req_type iot, struct cl_2queue *queue,
+ long timeout);
+void cl_io_rw_advance (const struct lu_env *env, struct cl_io *io,
+ size_t nob);
+int cl_io_cancel (const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *queue);
+int cl_io_is_going (const struct lu_env *env);
+
+/**
+ * True, iff \a io is an O_APPEND write(2).
+ */
+static inline int cl_io_is_append(const struct cl_io *io)
+{
+ return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_append;
+}
+
+static inline int cl_io_is_sync_write(const struct cl_io *io)
+{
+ return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_sync;
+}
+
+static inline int cl_io_is_mkwrite(const struct cl_io *io)
+{
+ return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_mkwrite;
+}
+
+/**
+ * True, iff \a io is a truncate(2).
+ */
+static inline int cl_io_is_trunc(const struct cl_io *io)
+{
+ return io->ci_type == CIT_SETATTR &&
+ (io->u.ci_setattr.sa_valid & ATTR_SIZE);
+}
+
+struct cl_io *cl_io_top(struct cl_io *io);
+
+void cl_io_print(const struct lu_env *env, void *cookie,
+ lu_printer_t printer, const struct cl_io *io);
+
+#define CL_IO_SLICE_CLEAN(foo_io, base) \
+do { \
+ typeof(foo_io) __foo_io = (foo_io); \
+ \
+ CLASSERT(offsetof(typeof(*__foo_io), base) == 0); \
+ memset(&__foo_io->base + 1, 0, \
+ (sizeof *__foo_io) - sizeof __foo_io->base); \
+} while (0)
+
+/** @} cl_io */
+
+/** \defgroup cl_page_list cl_page_list
+ * @{ */
+
+/**
+ * Last page in the page list.
+ */
+static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist)
+{
+ LASSERT(plist->pl_nr > 0);
+ return list_entry(plist->pl_pages.prev, struct cl_page, cp_batch);
+}
+
+/**
+ * Iterate over pages in a page list.
+ */
+#define cl_page_list_for_each(page, list) \
+ list_for_each_entry((page), &(list)->pl_pages, cp_batch)
+
+/**
+ * Iterate over pages in a page list, taking possible removals into account.
+ */
+#define cl_page_list_for_each_safe(page, temp, list) \
+ list_for_each_entry_safe((page), (temp), &(list)->pl_pages, cp_batch)
+
+void cl_page_list_init (struct cl_page_list *plist);
+void cl_page_list_add (struct cl_page_list *plist, struct cl_page *page);
+void cl_page_list_move (struct cl_page_list *dst, struct cl_page_list *src,
+ struct cl_page *page);
+void cl_page_list_splice (struct cl_page_list *list,
+ struct cl_page_list *head);
+void cl_page_list_del (const struct lu_env *env,
+ struct cl_page_list *plist, struct cl_page *page);
+void cl_page_list_disown (const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist);
+int cl_page_list_own (const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_assume (const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_discard(const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist);
+int cl_page_list_unmap (const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_fini (const struct lu_env *env, struct cl_page_list *plist);
+
+void cl_2queue_init (struct cl_2queue *queue);
+void cl_2queue_add (struct cl_2queue *queue, struct cl_page *page);
+void cl_2queue_disown (const struct lu_env *env,
+ struct cl_io *io, struct cl_2queue *queue);
+void cl_2queue_assume (const struct lu_env *env,
+ struct cl_io *io, struct cl_2queue *queue);
+void cl_2queue_discard (const struct lu_env *env,
+ struct cl_io *io, struct cl_2queue *queue);
+void cl_2queue_fini (const struct lu_env *env, struct cl_2queue *queue);
+void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page);
+
+/** @} cl_page_list */
+
+/** \defgroup cl_req cl_req
+ * @{ */
+struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page,
+ enum cl_req_type crt, int nr_objects);
+
+void cl_req_page_add (const struct lu_env *env, struct cl_req *req,
+ struct cl_page *page);
+void cl_req_page_done (const struct lu_env *env, struct cl_page *page);
+int cl_req_prep (const struct lu_env *env, struct cl_req *req);
+void cl_req_attr_set (const struct lu_env *env, struct cl_req *req,
+ struct cl_req_attr *attr, obd_valid flags);
+void cl_req_completion(const struct lu_env *env, struct cl_req *req, int ioret);
+
+/** \defgroup cl_sync_io cl_sync_io
+ * @{ */
+
+/**
+ * Anchor for synchronous transfer. This is allocated on a stack by thread
+ * doing synchronous transfer, and a pointer to this structure is set up in
+ * every page submitted for transfer. Transfer completion routine updates
+ * anchor and wakes up waiting thread when transfer is complete.
+ */
+struct cl_sync_io {
+ /** number of pages yet to be transferred. */
+ atomic_t csi_sync_nr;
+ /** error code. */
+ int csi_sync_rc;
+ /** barrier of destroy this structure */
+ atomic_t csi_barrier;
+ /** completion to be signaled when transfer is complete. */
+ wait_queue_head_t csi_waitq;
+};
+
+void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages);
+int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *queue, struct cl_sync_io *anchor,
+ long timeout);
+void cl_sync_io_note(struct cl_sync_io *anchor, int ioret);
+
+/** @} cl_sync_io */
+
+/** @} cl_req */
+
+/** \defgroup cl_env cl_env
+ *
+ * lu_env handling for a client.
+ *
+ * lu_env is an environment within which lustre code executes. Its major part
+ * is lu_context---a fast memory allocation mechanism that is used to conserve
+ * precious kernel stack space. Originally lu_env was designed for a server,
+ * where
+ *
+ * - there is a (mostly) fixed number of threads, and
+ *
+ * - call chains have no non-lustre portions inserted between lustre code.
+ *
+ * On a client both these assumtpion fails, because every user thread can
+ * potentially execute lustre code as part of a system call, and lustre calls
+ * into VFS or MM that call back into lustre.
+ *
+ * To deal with that, cl_env wrapper functions implement the following
+ * optimizations:
+ *
+ * - allocation and destruction of environment is amortized by caching no
+ * longer used environments instead of destroying them;
+ *
+ * - there is a notion of "current" environment, attached to the kernel
+ * data structure representing current thread Top-level lustre code
+ * allocates an environment and makes it current, then calls into
+ * non-lustre code, that in turn calls lustre back. Low-level lustre
+ * code thus called can fetch environment created by the top-level code
+ * and reuse it, avoiding additional environment allocation.
+ * Right now, three interfaces can attach the cl_env to running thread:
+ * - cl_env_get
+ * - cl_env_implant
+ * - cl_env_reexit(cl_env_reenter had to be called priorly)
+ *
+ * \see lu_env, lu_context, lu_context_key
+ * @{ */
+
+struct cl_env_nest {
+ int cen_refcheck;
+ void *cen_cookie;
+};
+
+struct lu_env *cl_env_peek (int *refcheck);
+struct lu_env *cl_env_get (int *refcheck);
+struct lu_env *cl_env_alloc (int *refcheck, __u32 tags);
+struct lu_env *cl_env_nested_get (struct cl_env_nest *nest);
+void cl_env_put (struct lu_env *env, int *refcheck);
+void cl_env_nested_put (struct cl_env_nest *nest, struct lu_env *env);
+void *cl_env_reenter (void);
+void cl_env_reexit (void *cookie);
+void cl_env_implant (struct lu_env *env, int *refcheck);
+void cl_env_unplant (struct lu_env *env, int *refcheck);
+
+/** @} cl_env */
+
+/*
+ * Misc
+ */
+void cl_attr2lvb(struct ost_lvb *lvb, const struct cl_attr *attr);
+void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb);
+
+struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site,
+ struct lu_device_type *ldt,
+ struct lu_device *next);
+/** @} clio */
+
+int cl_global_init(void);
+void cl_global_fini(void);
+
+#endif /* _LINUX_CL_OBJECT_H */
diff --git a/drivers/staging/lustre/lustre/include/dt_object.h b/drivers/staging/lustre/lustre/include/dt_object.h
new file mode 100644
index 000000000000..e116bb21b529
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/dt_object.h
@@ -0,0 +1,1498 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LUSTRE_DT_OBJECT_H
+#define __LUSTRE_DT_OBJECT_H
+
+/** \defgroup dt dt
+ * Sub-class of lu_object with methods common for "data" objects in OST stack.
+ *
+ * Data objects behave like regular files: you can read/write them, get and
+ * set their attributes. Implementation of dt interface is supposed to
+ * implement some form of garbage collection, normally reference counting
+ * (nlink) based one.
+ *
+ * Examples: osd (lustre/osd) is an implementation of dt interface.
+ * @{
+ */
+
+
+/*
+ * super-class definitions.
+ */
+#include <lu_object.h>
+
+#include <linux/libcfs/libcfs.h>
+
+struct seq_file;
+struct proc_dir_entry;
+struct lustre_cfg;
+
+struct thandle;
+struct dt_device;
+struct dt_object;
+struct dt_index_features;
+struct niobuf_local;
+struct niobuf_remote;
+struct ldlm_enqueue_info;
+
+typedef enum {
+ MNTOPT_USERXATTR = 0x00000001,
+ MNTOPT_ACL = 0x00000002,
+} mntopt_t;
+
+struct dt_device_param {
+ unsigned ddp_max_name_len;
+ unsigned ddp_max_nlink;
+ unsigned ddp_block_shift;
+ mntopt_t ddp_mntopts;
+ unsigned ddp_max_ea_size;
+ void *ddp_mnt; /* XXX: old code can retrieve mnt -bzzz */
+ int ddp_mount_type;
+ unsigned long long ddp_maxbytes;
+ /* percentage of available space to reserve for grant error margin */
+ int ddp_grant_reserved;
+ /* per-inode space consumption */
+ short ddp_inodespace;
+ /* per-fragment grant overhead to be used by client for grant
+ * calculation */
+ int ddp_grant_frag;
+};
+
+/**
+ * Per-transaction commit callback function
+ */
+struct dt_txn_commit_cb;
+typedef void (*dt_cb_t)(struct lu_env *env, struct thandle *th,
+ struct dt_txn_commit_cb *cb, int err);
+/**
+ * Special per-transaction callback for cases when just commit callback
+ * is needed and per-device callback are not convenient to use
+ */
+#define TRANS_COMMIT_CB_MAGIC 0xa0a00a0a
+#define MAX_COMMIT_CB_STR_LEN 32
+
+struct dt_txn_commit_cb {
+ struct list_head dcb_linkage;
+ dt_cb_t dcb_func;
+ __u32 dcb_magic;
+ char dcb_name[MAX_COMMIT_CB_STR_LEN];
+};
+
+/**
+ * Operations on dt device.
+ */
+struct dt_device_operations {
+ /**
+ * Return device-wide statistics.
+ */
+ int (*dt_statfs)(const struct lu_env *env,
+ struct dt_device *dev, struct obd_statfs *osfs);
+ /**
+ * Create transaction, described by \a param.
+ */
+ struct thandle *(*dt_trans_create)(const struct lu_env *env,
+ struct dt_device *dev);
+ /**
+ * Start transaction, described by \a param.
+ */
+ int (*dt_trans_start)(const struct lu_env *env,
+ struct dt_device *dev, struct thandle *th);
+ /**
+ * Finish previously started transaction.
+ */
+ int (*dt_trans_stop)(const struct lu_env *env,
+ struct thandle *th);
+ /**
+ * Add commit callback to the transaction.
+ */
+ int (*dt_trans_cb_add)(struct thandle *th,
+ struct dt_txn_commit_cb *dcb);
+ /**
+ * Return fid of root index object.
+ */
+ int (*dt_root_get)(const struct lu_env *env,
+ struct dt_device *dev, struct lu_fid *f);
+ /**
+ * Return device configuration data.
+ */
+ void (*dt_conf_get)(const struct lu_env *env,
+ const struct dt_device *dev,
+ struct dt_device_param *param);
+ /**
+ * handling device state, mostly for tests
+ */
+ int (*dt_sync)(const struct lu_env *env, struct dt_device *dev);
+ int (*dt_ro)(const struct lu_env *env, struct dt_device *dev);
+ /**
+ * Start a transaction commit asynchronously
+ *
+ * \param env environment
+ * \param dev dt_device to start commit on
+ *
+ * \return 0 success, negative value if error
+ */
+ int (*dt_commit_async)(const struct lu_env *env,
+ struct dt_device *dev);
+ /**
+ * Initialize capability context.
+ */
+ int (*dt_init_capa_ctxt)(const struct lu_env *env,
+ struct dt_device *dev,
+ int mode, unsigned long timeout,
+ __u32 alg, struct lustre_capa_key *keys);
+};
+
+struct dt_index_features {
+ /** required feature flags from enum dt_index_flags */
+ __u32 dif_flags;
+ /** minimal required key size */
+ size_t dif_keysize_min;
+ /** maximal required key size, 0 if no limit */
+ size_t dif_keysize_max;
+ /** minimal required record size */
+ size_t dif_recsize_min;
+ /** maximal required record size, 0 if no limit */
+ size_t dif_recsize_max;
+ /** pointer size for record */
+ size_t dif_ptrsize;
+};
+
+enum dt_index_flags {
+ /** index supports variable sized keys */
+ DT_IND_VARKEY = 1 << 0,
+ /** index supports variable sized records */
+ DT_IND_VARREC = 1 << 1,
+ /** index can be modified */
+ DT_IND_UPDATE = 1 << 2,
+ /** index supports records with non-unique (duplicate) keys */
+ DT_IND_NONUNQ = 1 << 3,
+ /**
+ * index support fixed-size keys sorted with natural numerical way
+ * and is able to return left-side value if no exact value found
+ */
+ DT_IND_RANGE = 1 << 4,
+};
+
+/**
+ * Features, required from index to support file system directories (mapping
+ * names to fids).
+ */
+extern const struct dt_index_features dt_directory_features;
+extern const struct dt_index_features dt_otable_features;
+extern const struct dt_index_features dt_lfsck_features;
+
+/* index features supported by the accounting objects */
+extern const struct dt_index_features dt_acct_features;
+
+/* index features supported by the quota global indexes */
+extern const struct dt_index_features dt_quota_glb_features;
+
+/* index features supported by the quota slave indexes */
+extern const struct dt_index_features dt_quota_slv_features;
+
+/**
+ * This is a general purpose dt allocation hint.
+ * It now contains the parent object.
+ * It can contain any allocation hint in the future.
+ */
+struct dt_allocation_hint {
+ struct dt_object *dah_parent;
+ __u32 dah_mode;
+};
+
+/**
+ * object type specifier.
+ */
+
+enum dt_format_type {
+ DFT_REGULAR,
+ DFT_DIR,
+ /** for mknod */
+ DFT_NODE,
+ /** for special index */
+ DFT_INDEX,
+ /** for symbolic link */
+ DFT_SYM,
+};
+
+/**
+ * object format specifier.
+ */
+struct dt_object_format {
+ /** type for dt object */
+ enum dt_format_type dof_type;
+ union {
+ struct dof_regular {
+ int striped;
+ } dof_reg;
+ struct dof_dir {
+ } dof_dir;
+ struct dof_node {
+ } dof_node;
+ /**
+ * special index need feature as parameter to create
+ * special idx
+ */
+ struct dof_index {
+ const struct dt_index_features *di_feat;
+ } dof_idx;
+ } u;
+};
+
+enum dt_format_type dt_mode_to_dft(__u32 mode);
+
+typedef __u64 dt_obj_version_t;
+
+/**
+ * Per-dt-object operations.
+ */
+struct dt_object_operations {
+ void (*do_read_lock)(const struct lu_env *env,
+ struct dt_object *dt, unsigned role);
+ void (*do_write_lock)(const struct lu_env *env,
+ struct dt_object *dt, unsigned role);
+ void (*do_read_unlock)(const struct lu_env *env,
+ struct dt_object *dt);
+ void (*do_write_unlock)(const struct lu_env *env,
+ struct dt_object *dt);
+ int (*do_write_locked)(const struct lu_env *env,
+ struct dt_object *dt);
+ /**
+ * Note: following ->do_{x,}attr_{set,get}() operations are very
+ * similar to ->moo_{x,}attr_{set,get}() operations in struct
+ * md_object_operations (see md_object.h). These operations are not in
+ * lu_object_operations, because ->do_{x,}attr_set() versions take
+ * transaction handle as an argument (this transaction is started by
+ * caller). We might factor ->do_{x,}attr_get() into
+ * lu_object_operations, but that would break existing symmetry.
+ */
+
+ /**
+ * Return standard attributes.
+ *
+ * precondition: lu_object_exists(&dt->do_lu);
+ */
+ int (*do_attr_get)(const struct lu_env *env,
+ struct dt_object *dt, struct lu_attr *attr,
+ struct lustre_capa *capa);
+ /**
+ * Set standard attributes.
+ *
+ * precondition: dt_object_exists(dt);
+ */
+ int (*do_declare_attr_set)(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct lu_attr *attr,
+ struct thandle *handle);
+ int (*do_attr_set)(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct lu_attr *attr,
+ struct thandle *handle,
+ struct lustre_capa *capa);
+ /**
+ * Return a value of an extended attribute.
+ *
+ * precondition: dt_object_exists(dt);
+ */
+ int (*do_xattr_get)(const struct lu_env *env, struct dt_object *dt,
+ struct lu_buf *buf, const char *name,
+ struct lustre_capa *capa);
+ /**
+ * Set value of an extended attribute.
+ *
+ * \a fl - flags from enum lu_xattr_flags
+ *
+ * precondition: dt_object_exists(dt);
+ */
+ int (*do_declare_xattr_set)(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct lu_buf *buf,
+ const char *name, int fl,
+ struct thandle *handle);
+ int (*do_xattr_set)(const struct lu_env *env,
+ struct dt_object *dt, const struct lu_buf *buf,
+ const char *name, int fl, struct thandle *handle,
+ struct lustre_capa *capa);
+ /**
+ * Delete existing extended attribute.
+ *
+ * precondition: dt_object_exists(dt);
+ */
+ int (*do_declare_xattr_del)(const struct lu_env *env,
+ struct dt_object *dt,
+ const char *name, struct thandle *handle);
+ int (*do_xattr_del)(const struct lu_env *env,
+ struct dt_object *dt,
+ const char *name, struct thandle *handle,
+ struct lustre_capa *capa);
+ /**
+ * Place list of existing extended attributes into \a buf (which has
+ * length len).
+ *
+ * precondition: dt_object_exists(dt);
+ */
+ int (*do_xattr_list)(const struct lu_env *env,
+ struct dt_object *dt, struct lu_buf *buf,
+ struct lustre_capa *capa);
+ /**
+ * Init allocation hint using parent object and child mode.
+ * (1) The \a parent might be NULL if this is a partial creation for
+ * remote object.
+ * (2) The type of child is in \a child_mode.
+ * (3) The result hint is stored in \a ah;
+ */
+ void (*do_ah_init)(const struct lu_env *env,
+ struct dt_allocation_hint *ah,
+ struct dt_object *parent,
+ struct dt_object *child,
+ umode_t child_mode);
+ /**
+ * Create new object on this device.
+ *
+ * precondition: !dt_object_exists(dt);
+ * postcondition: ergo(result == 0, dt_object_exists(dt));
+ */
+ int (*do_declare_create)(const struct lu_env *env,
+ struct dt_object *dt,
+ struct lu_attr *attr,
+ struct dt_allocation_hint *hint,
+ struct dt_object_format *dof,
+ struct thandle *th);
+ int (*do_create)(const struct lu_env *env, struct dt_object *dt,
+ struct lu_attr *attr,
+ struct dt_allocation_hint *hint,
+ struct dt_object_format *dof,
+ struct thandle *th);
+
+ /**
+ Destroy object on this device
+ * precondition: !dt_object_exists(dt);
+ * postcondition: ergo(result == 0, dt_object_exists(dt));
+ */
+ int (*do_declare_destroy)(const struct lu_env *env,
+ struct dt_object *dt,
+ struct thandle *th);
+ int (*do_destroy)(const struct lu_env *env, struct dt_object *dt,
+ struct thandle *th);
+
+ /**
+ * Announce that this object is going to be used as an index. This
+ * operation check that object supports indexing operations and
+ * installs appropriate dt_index_operations vector on success.
+ *
+ * Also probes for features. Operation is successful if all required
+ * features are supported.
+ */
+ int (*do_index_try)(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct dt_index_features *feat);
+ /**
+ * Add nlink of the object
+ * precondition: dt_object_exists(dt);
+ */
+ int (*do_declare_ref_add)(const struct lu_env *env,
+ struct dt_object *dt, struct thandle *th);
+ int (*do_ref_add)(const struct lu_env *env,
+ struct dt_object *dt, struct thandle *th);
+ /**
+ * Del nlink of the object
+ * precondition: dt_object_exists(dt);
+ */
+ int (*do_declare_ref_del)(const struct lu_env *env,
+ struct dt_object *dt, struct thandle *th);
+ int (*do_ref_del)(const struct lu_env *env,
+ struct dt_object *dt, struct thandle *th);
+
+ struct obd_capa *(*do_capa_get)(const struct lu_env *env,
+ struct dt_object *dt,
+ struct lustre_capa *old,
+ __u64 opc);
+ int (*do_object_sync)(const struct lu_env *, struct dt_object *);
+ /**
+ * Get object info of next level. Currently, only get inode from osd.
+ * This is only used by quota b=16542
+ * precondition: dt_object_exists(dt);
+ */
+ int (*do_data_get)(const struct lu_env *env, struct dt_object *dt,
+ void **data);
+
+ /**
+ * Lock object.
+ */
+ int (*do_object_lock)(const struct lu_env *env, struct dt_object *dt,
+ struct lustre_handle *lh,
+ struct ldlm_enqueue_info *einfo,
+ void *policy);
+};
+
+/**
+ * Per-dt-object operations on "file body".
+ */
+struct dt_body_operations {
+ /**
+ * precondition: dt_object_exists(dt);
+ */
+ ssize_t (*dbo_read)(const struct lu_env *env, struct dt_object *dt,
+ struct lu_buf *buf, loff_t *pos,
+ struct lustre_capa *capa);
+ /**
+ * precondition: dt_object_exists(dt);
+ */
+ ssize_t (*dbo_declare_write)(const struct lu_env *env,
+ struct dt_object *dt,
+ const loff_t size, loff_t pos,
+ struct thandle *handle);
+ ssize_t (*dbo_write)(const struct lu_env *env, struct dt_object *dt,
+ const struct lu_buf *buf, loff_t *pos,
+ struct thandle *handle, struct lustre_capa *capa,
+ int ignore_quota);
+ /*
+ * methods for zero-copy IO
+ */
+
+ /*
+ * precondition: dt_object_exists(dt);
+ * returns:
+ * < 0 - error code
+ * = 0 - illegal
+ * > 0 - number of local buffers prepared
+ */
+ int (*dbo_bufs_get)(const struct lu_env *env, struct dt_object *dt,
+ loff_t pos, ssize_t len, struct niobuf_local *lb,
+ int rw, struct lustre_capa *capa);
+ /*
+ * precondition: dt_object_exists(dt);
+ */
+ int (*dbo_bufs_put)(const struct lu_env *env, struct dt_object *dt,
+ struct niobuf_local *lb, int nr);
+ /*
+ * precondition: dt_object_exists(dt);
+ */
+ int (*dbo_write_prep)(const struct lu_env *env, struct dt_object *dt,
+ struct niobuf_local *lb, int nr);
+ /*
+ * precondition: dt_object_exists(dt);
+ */
+ int (*dbo_declare_write_commit)(const struct lu_env *env,
+ struct dt_object *dt,
+ struct niobuf_local *,
+ int, struct thandle *);
+ /*
+ * precondition: dt_object_exists(dt);
+ */
+ int (*dbo_write_commit)(const struct lu_env *env, struct dt_object *dt,
+ struct niobuf_local *, int, struct thandle *);
+ /*
+ * precondition: dt_object_exists(dt);
+ */
+ int (*dbo_read_prep)(const struct lu_env *env, struct dt_object *dt,
+ struct niobuf_local *lnb, int nr);
+ int (*dbo_fiemap_get)(const struct lu_env *env, struct dt_object *dt,
+ struct ll_user_fiemap *fm);
+ /**
+ * Punch object's content
+ * precondition: regular object, not index
+ */
+ int (*dbo_declare_punch)(const struct lu_env *, struct dt_object *,
+ __u64, __u64, struct thandle *th);
+ int (*dbo_punch)(const struct lu_env *env, struct dt_object *dt,
+ __u64 start, __u64 end, struct thandle *th,
+ struct lustre_capa *capa);
+};
+
+/**
+ * Incomplete type of index record.
+ */
+struct dt_rec;
+
+/**
+ * Incomplete type of index key.
+ */
+struct dt_key;
+
+/**
+ * Incomplete type of dt iterator.
+ */
+struct dt_it;
+
+/**
+ * Per-dt-object operations on object as index.
+ */
+struct dt_index_operations {
+ /**
+ * precondition: dt_object_exists(dt);
+ */
+ int (*dio_lookup)(const struct lu_env *env, struct dt_object *dt,
+ struct dt_rec *rec, const struct dt_key *key,
+ struct lustre_capa *capa);
+ /**
+ * precondition: dt_object_exists(dt);
+ */
+ int (*dio_declare_insert)(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct dt_rec *rec,
+ const struct dt_key *key,
+ struct thandle *handle);
+ int (*dio_insert)(const struct lu_env *env, struct dt_object *dt,
+ const struct dt_rec *rec, const struct dt_key *key,
+ struct thandle *handle, struct lustre_capa *capa,
+ int ignore_quota);
+ /**
+ * precondition: dt_object_exists(dt);
+ */
+ int (*dio_declare_delete)(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct dt_key *key,
+ struct thandle *handle);
+ int (*dio_delete)(const struct lu_env *env, struct dt_object *dt,
+ const struct dt_key *key, struct thandle *handle,
+ struct lustre_capa *capa);
+ /**
+ * Iterator interface
+ */
+ struct dt_it_ops {
+ /**
+ * Allocate and initialize new iterator.
+ *
+ * precondition: dt_object_exists(dt);
+ */
+ struct dt_it *(*init)(const struct lu_env *env,
+ struct dt_object *dt,
+ __u32 attr,
+ struct lustre_capa *capa);
+ void (*fini)(const struct lu_env *env,
+ struct dt_it *di);
+ int (*get)(const struct lu_env *env,
+ struct dt_it *di,
+ const struct dt_key *key);
+ void (*put)(const struct lu_env *env,
+ struct dt_it *di);
+ int (*next)(const struct lu_env *env,
+ struct dt_it *di);
+ struct dt_key *(*key)(const struct lu_env *env,
+ const struct dt_it *di);
+ int (*key_size)(const struct lu_env *env,
+ const struct dt_it *di);
+ int (*rec)(const struct lu_env *env,
+ const struct dt_it *di,
+ struct dt_rec *rec,
+ __u32 attr);
+ __u64 (*store)(const struct lu_env *env,
+ const struct dt_it *di);
+ int (*load)(const struct lu_env *env,
+ const struct dt_it *di, __u64 hash);
+ int (*key_rec)(const struct lu_env *env,
+ const struct dt_it *di, void* key_rec);
+ } dio_it;
+};
+
+enum dt_otable_it_valid {
+ DOIV_ERROR_HANDLE = 0x0001,
+};
+
+enum dt_otable_it_flags {
+ /* Exit when fail. */
+ DOIF_FAILOUT = 0x0001,
+
+ /* Reset iteration position to the device beginning. */
+ DOIF_RESET = 0x0002,
+
+ /* There is up layer component uses the iteration. */
+ DOIF_OUTUSED = 0x0004,
+};
+
+/* otable based iteration needs to use the common DT interation APIs.
+ * To initialize the iteration, it needs call dio_it::init() firstly.
+ * Here is how the otable based iteration should prepare arguments to
+ * call dt_it_ops::init().
+ *
+ * For otable based iteration, the 32-bits 'attr' for dt_it_ops::init()
+ * is composed of two parts:
+ * low 16-bits is for valid bits, high 16-bits is for flags bits. */
+#define DT_OTABLE_IT_FLAGS_SHIFT 16
+#define DT_OTABLE_IT_FLAGS_MASK 0xffff0000
+
+struct dt_device {
+ struct lu_device dd_lu_dev;
+ const struct dt_device_operations *dd_ops;
+
+ /**
+ * List of dt_txn_callback (see below). This is not protected in any
+ * way, because callbacks are supposed to be added/deleted only during
+ * single-threaded start-up shut-down procedures.
+ */
+ struct list_head dd_txn_callbacks;
+};
+
+int dt_device_init(struct dt_device *dev, struct lu_device_type *t);
+void dt_device_fini(struct dt_device *dev);
+
+static inline int lu_device_is_dt(const struct lu_device *d)
+{
+ return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_DT);
+}
+
+static inline struct dt_device * lu2dt_dev(struct lu_device *l)
+{
+ LASSERT(lu_device_is_dt(l));
+ return container_of0(l, struct dt_device, dd_lu_dev);
+}
+
+struct dt_object {
+ struct lu_object do_lu;
+ const struct dt_object_operations *do_ops;
+ const struct dt_body_operations *do_body_ops;
+ const struct dt_index_operations *do_index_ops;
+};
+
+/*
+ * In-core representation of per-device local object OID storage
+ */
+struct local_oid_storage {
+ /* all initialized llog systems on this node linked by this */
+ struct list_head los_list;
+
+ /* how many handle's reference this los has */
+ atomic_t los_refcount;
+ struct dt_device *los_dev;
+ struct dt_object *los_obj;
+
+ /* data used to generate new fids */
+ struct mutex los_id_lock;
+ __u64 los_seq;
+ __u32 los_last_oid;
+};
+
+static inline struct dt_object *lu2dt(struct lu_object *l)
+{
+ LASSERT(l == NULL || IS_ERR(l) || lu_device_is_dt(l->lo_dev));
+ return container_of0(l, struct dt_object, do_lu);
+}
+
+int dt_object_init(struct dt_object *obj,
+ struct lu_object_header *h, struct lu_device *d);
+
+void dt_object_fini(struct dt_object *obj);
+
+static inline int dt_object_exists(const struct dt_object *dt)
+{
+ return lu_object_exists(&dt->do_lu);
+}
+
+static inline int dt_object_remote(const struct dt_object *dt)
+{
+ return lu_object_remote(&dt->do_lu);
+}
+
+static inline struct dt_object *lu2dt_obj(struct lu_object *o)
+{
+ LASSERT(ergo(o != NULL, lu_device_is_dt(o->lo_dev)));
+ return container_of0(o, struct dt_object, do_lu);
+}
+
+/**
+ * This is the general purpose transaction handle.
+ * 1. Transaction Life Cycle
+ * This transaction handle is allocated upon starting a new transaction,
+ * and deallocated after this transaction is committed.
+ * 2. Transaction Nesting
+ * We do _NOT_ support nested transaction. So, every thread should only
+ * have one active transaction, and a transaction only belongs to one
+ * thread. Due to this, transaction handle need no reference count.
+ * 3. Transaction & dt_object locking
+ * dt_object locks should be taken inside transaction.
+ * 4. Transaction & RPC
+ * No RPC request should be issued inside transaction.
+ */
+struct thandle {
+ /** the dt device on which the transactions are executed */
+ struct dt_device *th_dev;
+
+ /** context for this transaction, tag is LCT_TX_HANDLE */
+ struct lu_context th_ctx;
+
+ /** additional tags (layers can add in declare) */
+ __u32 th_tags;
+
+ /** the last operation result in this transaction.
+ * this value is used in recovery */
+ __s32 th_result;
+
+ /** whether we need sync commit */
+ unsigned int th_sync:1;
+
+ /* local transation, no need to inform other layers */
+ unsigned int th_local:1;
+
+ /* In DNE, one transaction can be disassemblied into
+ * updates on several different MDTs, and these updates
+ * will be attached to th_remote_update_list per target.
+ * Only single thread will access the list, no need lock
+ */
+ struct list_head th_remote_update_list;
+ struct update_request *th_current_request;
+};
+
+/**
+ * Transaction call-backs.
+ *
+ * These are invoked by osd (or underlying transaction engine) when
+ * transaction changes state.
+ *
+ * Call-backs are used by upper layers to modify transaction parameters and to
+ * perform some actions on for each transaction state transition. Typical
+ * example is mdt registering call-back to write into last-received file
+ * before each transaction commit.
+ */
+struct dt_txn_callback {
+ int (*dtc_txn_start)(const struct lu_env *env,
+ struct thandle *txn, void *cookie);
+ int (*dtc_txn_stop)(const struct lu_env *env,
+ struct thandle *txn, void *cookie);
+ void (*dtc_txn_commit)(struct thandle *txn, void *cookie);
+ void *dtc_cookie;
+ __u32 dtc_tag;
+ struct list_head dtc_linkage;
+};
+
+void dt_txn_callback_add(struct dt_device *dev, struct dt_txn_callback *cb);
+void dt_txn_callback_del(struct dt_device *dev, struct dt_txn_callback *cb);
+
+int dt_txn_hook_start(const struct lu_env *env,
+ struct dt_device *dev, struct thandle *txn);
+int dt_txn_hook_stop(const struct lu_env *env, struct thandle *txn);
+void dt_txn_hook_commit(struct thandle *txn);
+
+int dt_try_as_dir(const struct lu_env *env, struct dt_object *obj);
+
+/**
+ * Callback function used for parsing path.
+ * \see llo_store_resolve
+ */
+typedef int (*dt_entry_func_t)(const struct lu_env *env,
+ const char *name,
+ void *pvt);
+
+#define DT_MAX_PATH 1024
+
+int dt_path_parser(const struct lu_env *env,
+ char *local, dt_entry_func_t entry_func,
+ void *data);
+
+struct dt_object *
+dt_store_resolve(const struct lu_env *env, struct dt_device *dt,
+ const char *path, struct lu_fid *fid);
+
+struct dt_object *dt_store_open(const struct lu_env *env,
+ struct dt_device *dt,
+ const char *dirname,
+ const char *filename,
+ struct lu_fid *fid);
+
+struct dt_object *dt_find_or_create(const struct lu_env *env,
+ struct dt_device *dt,
+ const struct lu_fid *fid,
+ struct dt_object_format *dof,
+ struct lu_attr *attr);
+
+struct dt_object *dt_locate_at(const struct lu_env *env,
+ struct dt_device *dev,
+ const struct lu_fid *fid,
+ struct lu_device *top_dev);
+static inline struct dt_object *
+dt_locate(const struct lu_env *env, struct dt_device *dev,
+ const struct lu_fid *fid)
+{
+ return dt_locate_at(env, dev, fid, dev->dd_lu_dev.ld_site->ls_top_dev);
+}
+
+
+int local_oid_storage_init(const struct lu_env *env, struct dt_device *dev,
+ const struct lu_fid *first_fid,
+ struct local_oid_storage **los);
+void local_oid_storage_fini(const struct lu_env *env,
+ struct local_oid_storage *los);
+int local_object_fid_generate(const struct lu_env *env,
+ struct local_oid_storage *los,
+ struct lu_fid *fid);
+int local_object_declare_create(const struct lu_env *env,
+ struct local_oid_storage *los,
+ struct dt_object *o,
+ struct lu_attr *attr,
+ struct dt_object_format *dof,
+ struct thandle *th);
+int local_object_create(const struct lu_env *env,
+ struct local_oid_storage *los,
+ struct dt_object *o,
+ struct lu_attr *attr, struct dt_object_format *dof,
+ struct thandle *th);
+struct dt_object *local_file_find_or_create(const struct lu_env *env,
+ struct local_oid_storage *los,
+ struct dt_object *parent,
+ const char *name, __u32 mode);
+struct dt_object *local_file_find_or_create_with_fid(const struct lu_env *env,
+ struct dt_device *dt,
+ const struct lu_fid *fid,
+ struct dt_object *parent,
+ const char *name,
+ __u32 mode);
+struct dt_object *
+local_index_find_or_create(const struct lu_env *env,
+ struct local_oid_storage *los,
+ struct dt_object *parent,
+ const char *name, __u32 mode,
+ const struct dt_index_features *ft);
+struct dt_object *
+local_index_find_or_create_with_fid(const struct lu_env *env,
+ struct dt_device *dt,
+ const struct lu_fid *fid,
+ struct dt_object *parent,
+ const char *name, __u32 mode,
+ const struct dt_index_features *ft);
+int local_object_unlink(const struct lu_env *env, struct dt_device *dt,
+ struct dt_object *parent, const char *name);
+
+static inline int dt_object_lock(const struct lu_env *env,
+ struct dt_object *o, struct lustre_handle *lh,
+ struct ldlm_enqueue_info *einfo,
+ void *policy)
+{
+ LASSERT(o);
+ LASSERT(o->do_ops);
+ LASSERT(o->do_ops->do_object_lock);
+ return o->do_ops->do_object_lock(env, o, lh, einfo, policy);
+}
+
+int dt_lookup_dir(const struct lu_env *env, struct dt_object *dir,
+ const char *name, struct lu_fid *fid);
+
+static inline int dt_object_sync(const struct lu_env *env,
+ struct dt_object *o)
+{
+ LASSERT(o);
+ LASSERT(o->do_ops);
+ LASSERT(o->do_ops->do_object_sync);
+ return o->do_ops->do_object_sync(env, o);
+}
+
+int dt_declare_version_set(const struct lu_env *env, struct dt_object *o,
+ struct thandle *th);
+void dt_version_set(const struct lu_env *env, struct dt_object *o,
+ dt_obj_version_t version, struct thandle *th);
+dt_obj_version_t dt_version_get(const struct lu_env *env, struct dt_object *o);
+
+
+int dt_read(const struct lu_env *env, struct dt_object *dt,
+ struct lu_buf *buf, loff_t *pos);
+int dt_record_read(const struct lu_env *env, struct dt_object *dt,
+ struct lu_buf *buf, loff_t *pos);
+int dt_record_write(const struct lu_env *env, struct dt_object *dt,
+ const struct lu_buf *buf, loff_t *pos, struct thandle *th);
+typedef int (*dt_index_page_build_t)(const struct lu_env *env,
+ union lu_page *lp, int nob,
+ const struct dt_it_ops *iops,
+ struct dt_it *it, __u32 attr, void *arg);
+int dt_index_walk(const struct lu_env *env, struct dt_object *obj,
+ const struct lu_rdpg *rdpg, dt_index_page_build_t filler,
+ void *arg);
+int dt_index_read(const struct lu_env *env, struct dt_device *dev,
+ struct idx_info *ii, const struct lu_rdpg *rdpg);
+
+static inline struct thandle *dt_trans_create(const struct lu_env *env,
+ struct dt_device *d)
+{
+ LASSERT(d->dd_ops->dt_trans_create);
+ return d->dd_ops->dt_trans_create(env, d);
+}
+
+static inline int dt_trans_start(const struct lu_env *env,
+ struct dt_device *d, struct thandle *th)
+{
+ LASSERT(d->dd_ops->dt_trans_start);
+ return d->dd_ops->dt_trans_start(env, d, th);
+}
+
+/* for this transaction hooks shouldn't be called */
+static inline int dt_trans_start_local(const struct lu_env *env,
+ struct dt_device *d, struct thandle *th)
+{
+ LASSERT(d->dd_ops->dt_trans_start);
+ th->th_local = 1;
+ return d->dd_ops->dt_trans_start(env, d, th);
+}
+
+static inline int dt_trans_stop(const struct lu_env *env,
+ struct dt_device *d, struct thandle *th)
+{
+ LASSERT(d->dd_ops->dt_trans_stop);
+ return d->dd_ops->dt_trans_stop(env, th);
+}
+
+static inline int dt_trans_cb_add(struct thandle *th,
+ struct dt_txn_commit_cb *dcb)
+{
+ LASSERT(th->th_dev->dd_ops->dt_trans_cb_add);
+ dcb->dcb_magic = TRANS_COMMIT_CB_MAGIC;
+ return th->th_dev->dd_ops->dt_trans_cb_add(th, dcb);
+}
+/** @} dt */
+
+
+static inline int dt_declare_record_write(const struct lu_env *env,
+ struct dt_object *dt,
+ int size, loff_t pos,
+ struct thandle *th)
+{
+ int rc;
+
+ LASSERTF(dt != NULL, "dt is NULL when we want to write record\n");
+ LASSERT(th != NULL);
+ LASSERT(dt->do_body_ops);
+ LASSERT(dt->do_body_ops->dbo_declare_write);
+ rc = dt->do_body_ops->dbo_declare_write(env, dt, size, pos, th);
+ return rc;
+}
+
+static inline int dt_declare_create(const struct lu_env *env,
+ struct dt_object *dt,
+ struct lu_attr *attr,
+ struct dt_allocation_hint *hint,
+ struct dt_object_format *dof,
+ struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_declare_create);
+ return dt->do_ops->do_declare_create(env, dt, attr, hint, dof, th);
+}
+
+static inline int dt_create(const struct lu_env *env,
+ struct dt_object *dt,
+ struct lu_attr *attr,
+ struct dt_allocation_hint *hint,
+ struct dt_object_format *dof,
+ struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_create);
+ return dt->do_ops->do_create(env, dt, attr, hint, dof, th);
+}
+
+static inline int dt_declare_destroy(const struct lu_env *env,
+ struct dt_object *dt,
+ struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_declare_destroy);
+ return dt->do_ops->do_declare_destroy(env, dt, th);
+}
+
+static inline int dt_destroy(const struct lu_env *env,
+ struct dt_object *dt,
+ struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_destroy);
+ return dt->do_ops->do_destroy(env, dt, th);
+}
+
+static inline void dt_read_lock(const struct lu_env *env,
+ struct dt_object *dt,
+ unsigned role)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_read_lock);
+ dt->do_ops->do_read_lock(env, dt, role);
+}
+
+static inline void dt_write_lock(const struct lu_env *env,
+ struct dt_object *dt,
+ unsigned role)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_write_lock);
+ dt->do_ops->do_write_lock(env, dt, role);
+}
+
+static inline void dt_read_unlock(const struct lu_env *env,
+ struct dt_object *dt)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_read_unlock);
+ dt->do_ops->do_read_unlock(env, dt);
+}
+
+static inline void dt_write_unlock(const struct lu_env *env,
+ struct dt_object *dt)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_write_unlock);
+ dt->do_ops->do_write_unlock(env, dt);
+}
+
+static inline int dt_write_locked(const struct lu_env *env,
+ struct dt_object *dt)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_write_locked);
+ return dt->do_ops->do_write_locked(env, dt);
+}
+
+static inline int dt_attr_get(const struct lu_env *env, struct dt_object *dt,
+ struct lu_attr *la, void *arg)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_attr_get);
+ return dt->do_ops->do_attr_get(env, dt, la, arg);
+}
+
+static inline int dt_declare_attr_set(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct lu_attr *la,
+ struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_declare_attr_set);
+ return dt->do_ops->do_declare_attr_set(env, dt, la, th);
+}
+
+static inline int dt_attr_set(const struct lu_env *env, struct dt_object *dt,
+ const struct lu_attr *la, struct thandle *th,
+ struct lustre_capa *capa)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_attr_set);
+ return dt->do_ops->do_attr_set(env, dt, la, th, capa);
+}
+
+static inline int dt_declare_ref_add(const struct lu_env *env,
+ struct dt_object *dt, struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_declare_ref_add);
+ return dt->do_ops->do_declare_ref_add(env, dt, th);
+}
+
+static inline int dt_ref_add(const struct lu_env *env,
+ struct dt_object *dt, struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_ref_add);
+ return dt->do_ops->do_ref_add(env, dt, th);
+}
+
+static inline int dt_declare_ref_del(const struct lu_env *env,
+ struct dt_object *dt, struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_declare_ref_del);
+ return dt->do_ops->do_declare_ref_del(env, dt, th);
+}
+
+static inline int dt_ref_del(const struct lu_env *env,
+ struct dt_object *dt, struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_ref_del);
+ return dt->do_ops->do_ref_del(env, dt, th);
+}
+
+static inline struct obd_capa *dt_capa_get(const struct lu_env *env,
+ struct dt_object *dt,
+ struct lustre_capa *old, __u64 opc)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_ref_del);
+ return dt->do_ops->do_capa_get(env, dt, old, opc);
+}
+
+static inline int dt_bufs_get(const struct lu_env *env, struct dt_object *d,
+ struct niobuf_remote *rnb,
+ struct niobuf_local *lnb, int rw,
+ struct lustre_capa *capa)
+{
+ LASSERT(d);
+ LASSERT(d->do_body_ops);
+ LASSERT(d->do_body_ops->dbo_bufs_get);
+ return d->do_body_ops->dbo_bufs_get(env, d, rnb->offset,
+ rnb->len, lnb, rw, capa);
+}
+
+static inline int dt_bufs_put(const struct lu_env *env, struct dt_object *d,
+ struct niobuf_local *lnb, int n)
+{
+ LASSERT(d);
+ LASSERT(d->do_body_ops);
+ LASSERT(d->do_body_ops->dbo_bufs_put);
+ return d->do_body_ops->dbo_bufs_put(env, d, lnb, n);
+}
+
+static inline int dt_write_prep(const struct lu_env *env, struct dt_object *d,
+ struct niobuf_local *lnb, int n)
+{
+ LASSERT(d);
+ LASSERT(d->do_body_ops);
+ LASSERT(d->do_body_ops->dbo_write_prep);
+ return d->do_body_ops->dbo_write_prep(env, d, lnb, n);
+}
+
+static inline int dt_declare_write_commit(const struct lu_env *env,
+ struct dt_object *d,
+ struct niobuf_local *lnb,
+ int n, struct thandle *th)
+{
+ LASSERTF(d != NULL, "dt is NULL when we want to declare write\n");
+ LASSERT(th != NULL);
+ return d->do_body_ops->dbo_declare_write_commit(env, d, lnb, n, th);
+}
+
+
+static inline int dt_write_commit(const struct lu_env *env,
+ struct dt_object *d, struct niobuf_local *lnb,
+ int n, struct thandle *th)
+{
+ LASSERT(d);
+ LASSERT(d->do_body_ops);
+ LASSERT(d->do_body_ops->dbo_write_commit);
+ return d->do_body_ops->dbo_write_commit(env, d, lnb, n, th);
+}
+
+static inline int dt_read_prep(const struct lu_env *env, struct dt_object *d,
+ struct niobuf_local *lnb, int n)
+{
+ LASSERT(d);
+ LASSERT(d->do_body_ops);
+ LASSERT(d->do_body_ops->dbo_read_prep);
+ return d->do_body_ops->dbo_read_prep(env, d, lnb, n);
+}
+
+static inline int dt_declare_punch(const struct lu_env *env,
+ struct dt_object *dt, __u64 start,
+ __u64 end, struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_body_ops);
+ LASSERT(dt->do_body_ops->dbo_declare_punch);
+ return dt->do_body_ops->dbo_declare_punch(env, dt, start, end, th);
+}
+
+static inline int dt_punch(const struct lu_env *env, struct dt_object *dt,
+ __u64 start, __u64 end, struct thandle *th,
+ struct lustre_capa *capa)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_body_ops);
+ LASSERT(dt->do_body_ops->dbo_punch);
+ return dt->do_body_ops->dbo_punch(env, dt, start, end, th, capa);
+}
+
+static inline int dt_fiemap_get(const struct lu_env *env, struct dt_object *d,
+ struct ll_user_fiemap *fm)
+{
+ LASSERT(d);
+ if (d->do_body_ops == NULL)
+ return -EPROTO;
+ if (d->do_body_ops->dbo_fiemap_get == NULL)
+ return -EOPNOTSUPP;
+ return d->do_body_ops->dbo_fiemap_get(env, d, fm);
+}
+
+static inline int dt_statfs(const struct lu_env *env, struct dt_device *dev,
+ struct obd_statfs *osfs)
+{
+ LASSERT(dev);
+ LASSERT(dev->dd_ops);
+ LASSERT(dev->dd_ops->dt_statfs);
+ return dev->dd_ops->dt_statfs(env, dev, osfs);
+}
+
+static inline int dt_root_get(const struct lu_env *env, struct dt_device *dev,
+ struct lu_fid *f)
+{
+ LASSERT(dev);
+ LASSERT(dev->dd_ops);
+ LASSERT(dev->dd_ops->dt_root_get);
+ return dev->dd_ops->dt_root_get(env, dev, f);
+}
+
+static inline void dt_conf_get(const struct lu_env *env,
+ const struct dt_device *dev,
+ struct dt_device_param *param)
+{
+ LASSERT(dev);
+ LASSERT(dev->dd_ops);
+ LASSERT(dev->dd_ops->dt_conf_get);
+ return dev->dd_ops->dt_conf_get(env, dev, param);
+}
+
+static inline int dt_sync(const struct lu_env *env, struct dt_device *dev)
+{
+ LASSERT(dev);
+ LASSERT(dev->dd_ops);
+ LASSERT(dev->dd_ops->dt_sync);
+ return dev->dd_ops->dt_sync(env, dev);
+}
+
+static inline int dt_ro(const struct lu_env *env, struct dt_device *dev)
+{
+ LASSERT(dev);
+ LASSERT(dev->dd_ops);
+ LASSERT(dev->dd_ops->dt_ro);
+ return dev->dd_ops->dt_ro(env, dev);
+}
+
+static inline int dt_declare_insert(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct dt_rec *rec,
+ const struct dt_key *key,
+ struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_index_ops);
+ LASSERT(dt->do_index_ops->dio_declare_insert);
+ return dt->do_index_ops->dio_declare_insert(env, dt, rec, key, th);
+}
+
+static inline int dt_insert(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct dt_rec *rec,
+ const struct dt_key *key,
+ struct thandle *th,
+ struct lustre_capa *capa,
+ int noquota)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_index_ops);
+ LASSERT(dt->do_index_ops->dio_insert);
+ return dt->do_index_ops->dio_insert(env, dt, rec, key, th,
+ capa, noquota);
+}
+
+static inline int dt_declare_xattr_del(const struct lu_env *env,
+ struct dt_object *dt,
+ const char *name,
+ struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_declare_xattr_del);
+ return dt->do_ops->do_declare_xattr_del(env, dt, name, th);
+}
+
+static inline int dt_xattr_del(const struct lu_env *env,
+ struct dt_object *dt, const char *name,
+ struct thandle *th,
+ struct lustre_capa *capa)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_xattr_del);
+ return dt->do_ops->do_xattr_del(env, dt, name, th, capa);
+}
+
+static inline int dt_declare_xattr_set(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct lu_buf *buf,
+ const char *name, int fl,
+ struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_declare_xattr_set);
+ return dt->do_ops->do_declare_xattr_set(env, dt, buf, name, fl, th);
+}
+
+static inline int dt_xattr_set(const struct lu_env *env,
+ struct dt_object *dt, const struct lu_buf *buf,
+ const char *name, int fl, struct thandle *th,
+ struct lustre_capa *capa)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_xattr_set);
+ return dt->do_ops->do_xattr_set(env, dt, buf, name, fl, th, capa);
+}
+
+static inline int dt_xattr_get(const struct lu_env *env,
+ struct dt_object *dt, struct lu_buf *buf,
+ const char *name, struct lustre_capa *capa)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_xattr_get);
+ return dt->do_ops->do_xattr_get(env, dt, buf, name, capa);
+}
+
+static inline int dt_xattr_list(const struct lu_env *env,
+ struct dt_object *dt, struct lu_buf *buf,
+ struct lustre_capa *capa)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_ops);
+ LASSERT(dt->do_ops->do_xattr_list);
+ return dt->do_ops->do_xattr_list(env, dt, buf, capa);
+}
+
+static inline int dt_declare_delete(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct dt_key *key,
+ struct thandle *th)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_index_ops);
+ LASSERT(dt->do_index_ops->dio_declare_delete);
+ return dt->do_index_ops->dio_declare_delete(env, dt, key, th);
+}
+
+static inline int dt_delete(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct dt_key *key,
+ struct thandle *th,
+ struct lustre_capa *capa)
+{
+ LASSERT(dt);
+ LASSERT(dt->do_index_ops);
+ LASSERT(dt->do_index_ops->dio_delete);
+ return dt->do_index_ops->dio_delete(env, dt, key, th, capa);
+}
+
+static inline int dt_commit_async(const struct lu_env *env,
+ struct dt_device *dev)
+{
+ LASSERT(dev);
+ LASSERT(dev->dd_ops);
+ LASSERT(dev->dd_ops->dt_commit_async);
+ return dev->dd_ops->dt_commit_async(env, dev);
+}
+
+static inline int dt_init_capa_ctxt(const struct lu_env *env,
+ struct dt_device *dev,
+ int mode, unsigned long timeout,
+ __u32 alg, struct lustre_capa_key *keys)
+{
+ LASSERT(dev);
+ LASSERT(dev->dd_ops);
+ LASSERT(dev->dd_ops->dt_init_capa_ctxt);
+ return dev->dd_ops->dt_init_capa_ctxt(env, dev, mode,
+ timeout, alg, keys);
+}
+
+static inline int dt_lookup(const struct lu_env *env,
+ struct dt_object *dt,
+ struct dt_rec *rec,
+ const struct dt_key *key,
+ struct lustre_capa *capa)
+{
+ int ret;
+
+ LASSERT(dt);
+ LASSERT(dt->do_index_ops);
+ LASSERT(dt->do_index_ops->dio_lookup);
+
+ ret = dt->do_index_ops->dio_lookup(env, dt, rec, key, capa);
+ if (ret > 0)
+ ret = 0;
+ else if (ret == 0)
+ ret = -ENOENT;
+ return ret;
+}
+
+#define LU221_BAD_TIME (0x80000000U + 24 * 3600)
+
+struct dt_find_hint {
+ struct lu_fid *dfh_fid;
+ struct dt_device *dfh_dt;
+ struct dt_object *dfh_o;
+};
+
+struct dt_thread_info {
+ char dti_buf[DT_MAX_PATH];
+ struct dt_find_hint dti_dfh;
+ struct lu_attr dti_attr;
+ struct lu_fid dti_fid;
+ struct dt_object_format dti_dof;
+ struct lustre_mdt_attrs dti_lma;
+ struct lu_buf dti_lb;
+ loff_t dti_off;
+};
+
+extern struct lu_context_key dt_key;
+
+static inline struct dt_thread_info *dt_info(const struct lu_env *env)
+{
+ struct dt_thread_info *dti;
+
+ dti = lu_context_key_get(&env->le_ctx, &dt_key);
+ LASSERT(dti);
+ return dti;
+}
+
+int dt_global_init(void);
+void dt_global_fini(void);
+
+# ifdef LPROCFS
+int lprocfs_dt_rd_blksize(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+int lprocfs_dt_rd_kbytestotal(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+int lprocfs_dt_rd_kbytesfree(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+int lprocfs_dt_rd_kbytesavail(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+int lprocfs_dt_rd_filestotal(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+int lprocfs_dt_rd_filesfree(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+# endif /* LPROCFS */
+
+#endif /* __LUSTRE_DT_OBJECT_H */
diff --git a/drivers/staging/lustre/lustre/include/interval_tree.h b/drivers/staging/lustre/lustre/include/interval_tree.h
new file mode 100644
index 000000000000..dfdb8aa4e035
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/interval_tree.h
@@ -0,0 +1,124 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/interval_tree.h
+ *
+ * Author: Huang Wei <huangwei@clusterfs.com>
+ * Author: Jay Xiong <jinshan.xiong@sun.com>
+ */
+
+#ifndef _INTERVAL_H__
+#define _INTERVAL_H__
+
+#include <linux/libcfs/libcfs.h> /* LASSERT. */
+
+struct interval_node {
+ struct interval_node *in_left;
+ struct interval_node *in_right;
+ struct interval_node *in_parent;
+ unsigned in_color:1,
+ in_intree:1, /** set if the node is in tree */
+ in_res1:30;
+ __u8 in_res2[4]; /** tags, 8-bytes aligned */
+ __u64 in_max_high;
+ struct interval_node_extent {
+ __u64 start;
+ __u64 end;
+ } in_extent;
+};
+
+enum interval_iter {
+ INTERVAL_ITER_CONT = 1,
+ INTERVAL_ITER_STOP = 2
+};
+
+static inline int interval_is_intree(struct interval_node *node)
+{
+ return node->in_intree == 1;
+}
+
+static inline __u64 interval_low(struct interval_node *node)
+{
+ return node->in_extent.start;
+}
+
+static inline __u64 interval_high(struct interval_node *node)
+{
+ return node->in_extent.end;
+}
+
+static inline void interval_set(struct interval_node *node,
+ __u64 start, __u64 end)
+{
+ LASSERT(start <= end);
+ node->in_extent.start = start;
+ node->in_extent.end = end;
+ node->in_max_high = end;
+}
+
+/* Rules to write an interval callback.
+ * - the callback returns INTERVAL_ITER_STOP when it thinks the iteration
+ * should be stopped. It will then cause the iteration function to return
+ * immediately with return value INTERVAL_ITER_STOP.
+ * - callbacks for interval_iterate and interval_iterate_reverse: Every
+ * nodes in the tree will be set to @node before the callback being called
+ * - callback for interval_search: Only overlapped node will be set to @node
+ * before the callback being called.
+ */
+typedef enum interval_iter (*interval_callback_t)(struct interval_node *node,
+ void *args);
+
+struct interval_node *interval_insert(struct interval_node *node,
+ struct interval_node **root);
+void interval_erase(struct interval_node *node, struct interval_node **root);
+
+/* Search the extents in the tree and call @func for each overlapped
+ * extents. */
+enum interval_iter interval_search(struct interval_node *root,
+ struct interval_node_extent *ex,
+ interval_callback_t func, void *data);
+
+/* Iterate every node in the tree - by reverse order or regular order. */
+enum interval_iter interval_iterate(struct interval_node *root,
+ interval_callback_t func, void *data);
+enum interval_iter interval_iterate_reverse(struct interval_node *root,
+ interval_callback_t func,void *data);
+
+void interval_expand(struct interval_node *root,
+ struct interval_node_extent *ext,
+ struct interval_node_extent *limiter);
+int interval_is_overlapped(struct interval_node *root,
+ struct interval_node_extent *ex);
+struct interval_node *interval_find(struct interval_node *root,
+ struct interval_node_extent *ex);
+#endif
diff --git a/drivers/staging/lustre/lustre/include/ioctl.h b/drivers/staging/lustre/lustre/include/ioctl.h
new file mode 100644
index 000000000000..227c261b2ae9
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/ioctl.h
@@ -0,0 +1,106 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _IOWR
+
+/* On i386 and x86_64, _ASM_I386_IOCTL_H is defined by the kernel's ioctl.h,
+ * and on newer kernels this header is shared as _ASM_GENERIC_IOCTL_H.
+ *
+ * We can avoid any problems with the kernel header being included again by
+ * defining _ASM_I386_IOCTL_H here so that a later occurence of <asm/ioctl.h>
+ * does not include the kernel's ioctl.h after this one. b=14746 */
+#define _ASM_I386_IOCTL_H
+#define _ASM_GENERIC_IOCTL_H
+
+/* ioctl command encoding: 32 bits total, command in lower 16 bits,
+ * size of the parameter structure in the lower 14 bits of the
+ * upper 16 bits.
+ * Encoding the size of the parameter structure in the ioctl request
+ * The highest 2 bits are reserved for indicating the ``access mode''.
+ * NOTE: This limits the max parameter size to 16kB -1 !
+ */
+
+/*
+ * The following is for compatibility across the various Linux
+ * platforms. The i386 ioctl numbering scheme doesn't really enforce
+ * a type field. De facto, however, the top 8 bits of the lower 16
+ * bits are indeed used as a type field, so we might just as well make
+ * this explicit here. Please be sure to use the decoding macros
+ * below from now on.
+ */
+#define _IOC_NRBITS 8
+#define _IOC_TYPEBITS 8
+#define _IOC_SIZEBITS 14
+#define _IOC_DIRBITS 2
+
+#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1)
+#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1)
+#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1)
+#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1)
+
+#define _IOC_NRSHIFT 0
+#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS)
+#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS)
+#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS)
+
+/*
+ * Direction bits.
+ */
+#define _IOC_NONE 0U
+#define _IOC_WRITE 1U
+#define _IOC_READ 2U
+
+#define _IOC(dir,type,nr,size) (((dir) << _IOC_DIRSHIFT) | ((type) << _IOC_TYPESHIFT) | ((nr) << _IOC_NRSHIFT) | ((size) << _IOC_SIZESHIFT))
+
+/* used to create numbers */
+#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0)
+#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size))
+#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
+#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
+
+/* used to decode ioctl numbers.. */
+#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
+#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
+#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
+#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
+
+/* ...and for the drivers/sound files... */
+
+#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT)
+#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT)
+#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
+#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT)
+#define IOCSIZE_SHIFT (_IOC_SIZESHIFT)
+
+#endif /* _IOWR */
diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h
new file mode 100644
index 000000000000..9d4011f2908b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lclient.h
@@ -0,0 +1,437 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Definitions shared between vvp and liblustre, and other clients in the
+ * future.
+ *
+ * Author: Oleg Drokin <oleg.drokin@sun.com>
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#ifndef LCLIENT_H
+#define LCLIENT_H
+
+blkcnt_t dirty_cnt(struct inode *inode);
+
+int cl_glimpse_size0(struct inode *inode, int agl);
+int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
+ struct inode *inode, struct cl_object *clob, int agl);
+
+static inline int cl_glimpse_size(struct inode *inode)
+{
+ return cl_glimpse_size0(inode, 0);
+}
+
+static inline int cl_agl(struct inode *inode)
+{
+ return cl_glimpse_size0(inode, 1);
+}
+
+/**
+ * Locking policy for setattr.
+ */
+enum ccc_setattr_lock_type {
+ /** Locking is done by server */
+ SETATTR_NOLOCK,
+ /** Extent lock is enqueued */
+ SETATTR_EXTENT_LOCK,
+ /** Existing local extent lock is used */
+ SETATTR_MATCH_LOCK
+};
+
+
+/**
+ * IO state private to vvp or slp layers.
+ */
+struct ccc_io {
+ /** super class */
+ struct cl_io_slice cui_cl;
+ struct cl_io_lock_link cui_link;
+ /**
+ * I/O vector information to or from which read/write is going.
+ */
+ struct iovec *cui_iov;
+ unsigned long cui_nrsegs;
+ /**
+ * Total iov count for left IO.
+ */
+ unsigned long cui_tot_nrsegs;
+ /**
+ * Old length for iov that was truncated partially.
+ */
+ size_t cui_iov_olen;
+ /**
+ * Total size for the left IO.
+ */
+ size_t cui_tot_count;
+
+ union {
+ struct {
+ enum ccc_setattr_lock_type cui_local_lock;
+ } setattr;
+ } u;
+ /**
+ * True iff io is processing glimpse right now.
+ */
+ int cui_glimpse;
+ /**
+ * Layout version when this IO is initialized
+ */
+ __u32 cui_layout_gen;
+ /**
+ * File descriptor against which IO is done.
+ */
+ struct ll_file_data *cui_fd;
+ struct kiocb *cui_iocb;
+};
+
+/**
+ * True, if \a io is a normal io, False for other (sendfile, splice*).
+ * must be impementated in arch specific code.
+ */
+int cl_is_normalio(const struct lu_env *env, const struct cl_io *io);
+
+extern struct lu_context_key ccc_key;
+extern struct lu_context_key ccc_session_key;
+
+struct ccc_thread_info {
+ struct cl_lock_descr cti_descr;
+ struct cl_io cti_io;
+ struct cl_attr cti_attr;
+};
+
+static inline struct ccc_thread_info *ccc_env_info(const struct lu_env *env)
+{
+ struct ccc_thread_info *info;
+
+ info = lu_context_key_get(&env->le_ctx, &ccc_key);
+ LASSERT(info != NULL);
+ return info;
+}
+
+static inline struct cl_attr *ccc_env_thread_attr(const struct lu_env *env)
+{
+ struct cl_attr *attr = &ccc_env_info(env)->cti_attr;
+ memset(attr, 0, sizeof(*attr));
+ return attr;
+}
+
+static inline struct cl_io *ccc_env_thread_io(const struct lu_env *env)
+{
+ struct cl_io *io = &ccc_env_info(env)->cti_io;
+ memset(io, 0, sizeof(*io));
+ return io;
+}
+
+struct ccc_session {
+ struct ccc_io cs_ios;
+};
+
+static inline struct ccc_session *ccc_env_session(const struct lu_env *env)
+{
+ struct ccc_session *ses;
+
+ ses = lu_context_key_get(env->le_ses, &ccc_session_key);
+ LASSERT(ses != NULL);
+ return ses;
+}
+
+static inline struct ccc_io *ccc_env_io(const struct lu_env *env)
+{
+ return &ccc_env_session(env)->cs_ios;
+}
+
+/**
+ * ccc-private object state.
+ */
+struct ccc_object {
+ struct cl_object_header cob_header;
+ struct cl_object cob_cl;
+ struct inode *cob_inode;
+
+ /**
+ * A list of dirty pages pending IO in the cache. Used by
+ * SOM. Protected by ll_inode_info::lli_lock.
+ *
+ * \see ccc_page::cpg_pending_linkage
+ */
+ struct list_head cob_pending_list;
+
+ /**
+ * Access this counter is protected by inode->i_sem. Now that
+ * the lifetime of transient pages must be covered by inode sem,
+ * we don't need to hold any lock..
+ */
+ int cob_transient_pages;
+ /**
+ * Number of outstanding mmaps on this file.
+ *
+ * \see ll_vm_open(), ll_vm_close().
+ */
+ atomic_t cob_mmap_cnt;
+
+ /**
+ * various flags
+ * cob_discard_page_warned
+ * if pages belonging to this object are discarded when a client
+ * is evicted, some debug info will be printed, this flag will be set
+ * during processing the first discarded page, then avoid flooding
+ * debug message for lots of discarded pages.
+ *
+ * \see ll_dirty_page_discard_warn.
+ */
+ unsigned int cob_discard_page_warned:1;
+};
+
+/**
+ * ccc-private page state.
+ */
+struct ccc_page {
+ struct cl_page_slice cpg_cl;
+ int cpg_defer_uptodate;
+ int cpg_ra_used;
+ int cpg_write_queued;
+ /**
+ * Non-empty iff this page is already counted in
+ * ccc_object::cob_pending_list. Protected by
+ * ccc_object::cob_pending_guard. This list is only used as a flag,
+ * that is, never iterated through, only checked for list_empty(), but
+ * having a list is useful for debugging.
+ */
+ struct list_head cpg_pending_linkage;
+ /** VM page */
+ struct page *cpg_page;
+};
+
+static inline struct ccc_page *cl2ccc_page(const struct cl_page_slice *slice)
+{
+ return container_of(slice, struct ccc_page, cpg_cl);
+}
+
+struct cl_page *ccc_vmpage_page_transient(struct page *vmpage);
+
+struct ccc_device {
+ struct cl_device cdv_cl;
+ struct super_block *cdv_sb;
+ struct cl_device *cdv_next;
+};
+
+struct ccc_lock {
+ struct cl_lock_slice clk_cl;
+};
+
+struct ccc_req {
+ struct cl_req_slice crq_cl;
+};
+
+void *ccc_key_init (const struct lu_context *ctx,
+ struct lu_context_key *key);
+void ccc_key_fini (const struct lu_context *ctx,
+ struct lu_context_key *key, void *data);
+void *ccc_session_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key);
+void ccc_session_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data);
+
+int ccc_device_init (const struct lu_env *env,
+ struct lu_device *d,
+ const char *name, struct lu_device *next);
+struct lu_device *ccc_device_fini (const struct lu_env *env,
+ struct lu_device *d);
+struct lu_device *ccc_device_alloc(const struct lu_env *env,
+ struct lu_device_type *t,
+ struct lustre_cfg *cfg,
+ const struct lu_device_operations *luops,
+ const struct cl_device_operations *clops);
+struct lu_device *ccc_device_free (const struct lu_env *env,
+ struct lu_device *d);
+struct lu_object *ccc_object_alloc(const struct lu_env *env,
+ const struct lu_object_header *hdr,
+ struct lu_device *dev,
+ const struct cl_object_operations *clops,
+ const struct lu_object_operations *luops);
+
+int ccc_req_init(const struct lu_env *env, struct cl_device *dev,
+ struct cl_req *req);
+void ccc_umount(const struct lu_env *env, struct cl_device *dev);
+int ccc_global_init(struct lu_device_type *device_type);
+void ccc_global_fini(struct lu_device_type *device_type);
+int ccc_object_init0(const struct lu_env *env,struct ccc_object *vob,
+ const struct cl_object_conf *conf);
+int ccc_object_init(const struct lu_env *env, struct lu_object *obj,
+ const struct lu_object_conf *conf);
+void ccc_object_free(const struct lu_env *env, struct lu_object *obj);
+int ccc_lock_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_lock *lock, const struct cl_io *io,
+ const struct cl_lock_operations *lkops);
+int ccc_attr_set(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned valid);
+int ccc_object_glimpse(const struct lu_env *env,
+ const struct cl_object *obj, struct ost_lvb *lvb);
+int ccc_conf_set(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_object_conf *conf);
+struct page *ccc_page_vmpage(const struct lu_env *env,
+ const struct cl_page_slice *slice);
+int ccc_page_is_under_lock(const struct lu_env *env,
+ const struct cl_page_slice *slice, struct cl_io *io);
+int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice);
+void ccc_transient_page_verify(const struct cl_page *page);
+int ccc_transient_page_own(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io, int nonblock);
+void ccc_transient_page_assume(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io);
+void ccc_transient_page_unassume(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io);
+void ccc_transient_page_disown(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io);
+void ccc_transient_page_discard(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io);
+int ccc_transient_page_prep(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io);
+void ccc_lock_delete(const struct lu_env *env,
+ const struct cl_lock_slice *slice);
+void ccc_lock_fini(const struct lu_env *env,struct cl_lock_slice *slice);
+int ccc_lock_enqueue(const struct lu_env *env,const struct cl_lock_slice *slice,
+ struct cl_io *io, __u32 enqflags);
+int ccc_lock_unuse(const struct lu_env *env,const struct cl_lock_slice *slice);
+int ccc_lock_wait(const struct lu_env *env,const struct cl_lock_slice *slice);
+int ccc_lock_fits_into(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ const struct cl_lock_descr *need,
+ const struct cl_io *io);
+void ccc_lock_state(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ enum cl_lock_state state);
+
+void ccc_io_fini(const struct lu_env *env, const struct cl_io_slice *ios);
+int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
+ __u32 enqflags, enum cl_lock_mode mode,
+ pgoff_t start, pgoff_t end);
+int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
+ __u32 enqflags, enum cl_lock_mode mode,
+ loff_t start, loff_t end);
+void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios);
+void ccc_io_advance(const struct lu_env *env, const struct cl_io_slice *ios,
+ size_t nob);
+void ccc_io_update_iov(const struct lu_env *env, struct ccc_io *cio,
+ struct cl_io *io);
+int ccc_prep_size(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io, loff_t start, size_t count, int *exceed);
+void ccc_req_completion(const struct lu_env *env,
+ const struct cl_req_slice *slice, int ioret);
+void ccc_req_attr_set(const struct lu_env *env,const struct cl_req_slice *slice,
+ const struct cl_object *obj,
+ struct cl_req_attr *oa, obd_valid flags);
+
+struct lu_device *ccc2lu_dev (struct ccc_device *vdv);
+struct lu_object *ccc2lu (struct ccc_object *vob);
+struct ccc_device *lu2ccc_dev (const struct lu_device *d);
+struct ccc_device *cl2ccc_dev (const struct cl_device *d);
+struct ccc_object *lu2ccc (const struct lu_object *obj);
+struct ccc_object *cl2ccc (const struct cl_object *obj);
+struct ccc_lock *cl2ccc_lock (const struct cl_lock_slice *slice);
+struct ccc_io *cl2ccc_io (const struct lu_env *env,
+ const struct cl_io_slice *slice);
+struct ccc_req *cl2ccc_req (const struct cl_req_slice *slice);
+struct page *cl2vm_page (const struct cl_page_slice *slice);
+struct inode *ccc_object_inode(const struct cl_object *obj);
+struct ccc_object *cl_inode2ccc (struct inode *inode);
+
+int cl_setattr_ost(struct inode *inode, const struct iattr *attr,
+ struct obd_capa *capa);
+
+struct cl_page *ccc_vmpage_page_transient(struct page *vmpage);
+int ccc_object_invariant(const struct cl_object *obj);
+int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
+void cl_inode_fini(struct inode *inode);
+int cl_local_size(struct inode *inode);
+
+__u16 ll_dirent_type_get(struct lu_dirent *ent);
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
+__u32 cl_fid_build_gen(const struct lu_fid *fid);
+
+# define CLOBINVRNT(env, clob, expr) \
+ ((void)sizeof(env), (void)sizeof(clob), (void)sizeof !!(expr))
+
+int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp);
+int cl_ocd_update(struct obd_device *host,
+ struct obd_device *watched,
+ enum obd_notify_event ev, void *owner, void *data);
+
+struct ccc_grouplock {
+ struct lu_env *cg_env;
+ struct cl_io *cg_io;
+ struct cl_lock *cg_lock;
+ unsigned long cg_gid;
+};
+
+int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
+ struct ccc_grouplock *cg);
+void cl_put_grouplock(struct ccc_grouplock *cg);
+
+/**
+ * New interfaces to get and put lov_stripe_md from lov layer. This violates
+ * layering because lov_stripe_md is supposed to be a private data in lov.
+ *
+ * NB: If you find you have to use these interfaces for your new code, please
+ * think about it again. These interfaces may be removed in the future for
+ * better layering. */
+struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj);
+void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm);
+int lov_read_and_clear_async_rc(struct cl_object *clob);
+
+struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode);
+void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm);
+
+/**
+ * Data structure managing a client's cached clean pages. An LRU of
+ * pages is maintained, along with other statistics.
+ */
+struct cl_client_cache {
+ atomic_t ccc_users; /* # of users (OSCs) of this data */
+ struct list_head ccc_lru; /* LRU list of cached clean pages */
+ spinlock_t ccc_lru_lock; /* lock for list */
+ atomic_t ccc_lru_left; /* # of LRU entries available */
+ unsigned long ccc_lru_max; /* Max # of LRU entries possible */
+ unsigned int ccc_lru_shrinkers; /* # of threads reclaiming */
+};
+
+#endif /*LCLIENT_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lprocfs_status.h b/drivers/staging/lustre/lustre/include/linux/lprocfs_status.h
new file mode 100644
index 000000000000..586692272d78
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lprocfs_status.h
@@ -0,0 +1,58 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/linux/lprocfs_status.h
+ *
+ * Top level header file for LProc SNMP
+ *
+ * Author: Hariharan Thantry thantry@users.sourceforge.net
+ */
+#ifndef _LINUX_LPROCFS_SNMP_H
+#define _LINUX_LPROCFS_SNMP_H
+
+#ifndef _LPROCFS_SNMP_H
+#error Do not #include this file directly. #include <lprocfs_status.h> instead
+#endif
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/version.h>
+#include <linux/smp.h>
+#include <linux/rwsem.h>
+#include <linux/libcfs/libcfs.h>
+#include <linux/statfs.h>
+
+
+#endif /* LPROCFS_SNMP_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_acl.h b/drivers/staging/lustre/lustre/include/linux/lustre_acl.h
new file mode 100644
index 000000000000..ff4fc4ff2894
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_acl.h
@@ -0,0 +1,66 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lustre/include/lustre_acl.h
+ *
+ * MDS data structures.
+ * See also lustre_idl.h for wire formats of requests.
+ */
+
+#ifndef _LUSTRE_LINUX_ACL_H
+#define _LUSTRE_LINUX_ACL_H
+
+#ifndef _LUSTRE_ACL_H
+#error Shoud not include direectly. use #include <lustre_acl.h> instead
+#endif
+
+# include <linux/fs.h>
+# include <linux/dcache.h>
+# ifdef CONFIG_FS_POSIX_ACL
+# include <linux/posix_acl_xattr.h>
+# define LUSTRE_POSIX_ACL_MAX_ENTRIES 32
+# define LUSTRE_POSIX_ACL_MAX_SIZE \
+ (sizeof(posix_acl_xattr_header) + \
+ LUSTRE_POSIX_ACL_MAX_ENTRIES * sizeof(posix_acl_xattr_entry))
+# endif /* CONFIG_FS_POSIX_ACL */
+# include <linux/lustre_intent.h>
+# include <linux/xattr.h> /* XATTR_{REPLACE,CREATE} */
+
+#ifndef LUSTRE_POSIX_ACL_MAX_SIZE
+# define LUSTRE_POSIX_ACL_MAX_SIZE 0
+#endif
+
+#endif /* _LUSTRE_LINUX_ACL_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_common.h b/drivers/staging/lustre/lustre/include/linux/lustre_common.h
new file mode 100644
index 000000000000..d1783a33d8ca
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_common.h
@@ -0,0 +1,22 @@
+#ifndef LUSTRE_COMMON_H
+#define LUSTRE_COMMON_H
+
+#include <linux/sched.h>
+
+static inline int cfs_cleanup_group_info(void)
+{
+ struct group_info *ginfo;
+
+ ginfo = groups_alloc(0);
+ if (!ginfo)
+ return -ENOMEM;
+
+ set_current_groups(ginfo);
+ put_group_info(ginfo);
+
+ return 0;
+}
+
+#define ll_inode_blksize(a) (1<<(a)->i_blkbits)
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
new file mode 100644
index 000000000000..dff04688945b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
@@ -0,0 +1,349 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_COMPAT25_H
+#define _LINUX_COMPAT25_H
+
+#include <linux/fs_struct.h>
+#include <linux/namei.h>
+#include <linux/libcfs/linux/portals_compat25.h>
+
+#include <linux/lustre_patchless_compat.h>
+
+# define LOCK_FS_STRUCT(fs) spin_lock(&(fs)->lock)
+# define UNLOCK_FS_STRUCT(fs) spin_unlock(&(fs)->lock)
+
+static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
+ struct dentry *dentry)
+{
+ struct path path;
+ struct path old_pwd;
+
+ path.mnt = mnt;
+ path.dentry = dentry;
+ LOCK_FS_STRUCT(fs);
+ old_pwd = fs->pwd;
+ path_get(&path);
+ fs->pwd = path;
+ UNLOCK_FS_STRUCT(fs);
+
+ if (old_pwd.dentry)
+ path_put(&old_pwd);
+}
+
+
+/*
+ * set ATTR_BLOCKS to a high value to avoid any risk of collision with other
+ * ATTR_* attributes (see bug 13828)
+ */
+#define ATTR_BLOCKS (1 << 27)
+
+#define current_ngroups current_cred()->group_info->ngroups
+#define current_groups current_cred()->group_info->small_block
+
+/*
+ * OBD need working random driver, thus all our
+ * initialization routines must be called after device
+ * driver initialization
+ */
+#ifndef MODULE
+#undef module_init
+#define module_init(a) late_initcall(a)
+#endif
+
+
+#define LTIME_S(time) (time.tv_sec)
+
+#define ll_permission(inode,mask,nd) inode_permission(inode,mask)
+
+# define ll_generic_permission(inode, mask, flags, check_acl) \
+ generic_permission(inode, mask)
+
+#define ll_blkdev_put(a, b) blkdev_put(a, b)
+
+#define ll_dentry_open(a,b,c) dentry_open(a,b,c)
+
+#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
+ vfs_symlink(dir, dentry, path)
+
+
+#define ll_generic_file_llseek_size(file, offset, origin, maxbytes, eof) \
+ generic_file_llseek_size(file, offset, origin, maxbytes, eof);
+
+/* inode_dio_wait(i) use as-is for write lock */
+# define inode_dio_write_done(i) do {} while (0) /* for write unlock */
+# define inode_dio_read(i) atomic_inc(&(i)->i_dio_count)
+/* inode_dio_done(i) use as-is for read unlock */
+
+#define TREE_READ_LOCK_IRQ(mapping) spin_lock_irq(&(mapping)->tree_lock)
+#define TREE_READ_UNLOCK_IRQ(mapping) spin_unlock_irq(&(mapping)->tree_lock)
+
+static inline
+int ll_unregister_blkdev(unsigned int dev, const char *name)
+{
+ unregister_blkdev(dev, name);
+ return 0;
+}
+
+#define ll_invalidate_bdev(a,b) invalidate_bdev((a))
+
+#ifndef FS_HAS_FIEMAP
+#define FS_HAS_FIEMAP (0)
+#endif
+
+
+
+/* add a lustre compatible layer for crypto API */
+#include <linux/crypto.h>
+#define ll_crypto_hash crypto_hash
+#define ll_crypto_cipher crypto_blkcipher
+#define ll_crypto_alloc_hash(name, type, mask) crypto_alloc_hash(name, type, mask)
+#define ll_crypto_hash_setkey(tfm, key, keylen) crypto_hash_setkey(tfm, key, keylen)
+#define ll_crypto_hash_init(desc) crypto_hash_init(desc)
+#define ll_crypto_hash_update(desc, sl, bytes) crypto_hash_update(desc, sl, bytes)
+#define ll_crypto_hash_final(desc, out) crypto_hash_final(desc, out)
+#define ll_crypto_blkcipher_setkey(tfm, key, keylen) \
+ crypto_blkcipher_setkey(tfm, key, keylen)
+#define ll_crypto_blkcipher_set_iv(tfm, src, len) \
+ crypto_blkcipher_set_iv(tfm, src, len)
+#define ll_crypto_blkcipher_get_iv(tfm, dst, len) \
+ crypto_blkcipher_get_iv(tfm, dst, len)
+#define ll_crypto_blkcipher_encrypt(desc, dst, src, bytes) \
+ crypto_blkcipher_encrypt(desc, dst, src, bytes)
+#define ll_crypto_blkcipher_decrypt(desc, dst, src, bytes) \
+ crypto_blkcipher_decrypt(desc, dst, src, bytes)
+#define ll_crypto_blkcipher_encrypt_iv(desc, dst, src, bytes) \
+ crypto_blkcipher_encrypt_iv(desc, dst, src, bytes)
+#define ll_crypto_blkcipher_decrypt_iv(desc, dst, src, bytes) \
+ crypto_blkcipher_decrypt_iv(desc, dst, src, bytes)
+
+static inline
+struct ll_crypto_cipher *ll_crypto_alloc_blkcipher(const char *name,
+ u32 type, u32 mask)
+{
+ struct ll_crypto_cipher *rtn = crypto_alloc_blkcipher(name, type, mask);
+
+ return (rtn == NULL ? ERR_PTR(-ENOMEM) : rtn);
+}
+
+static inline int ll_crypto_hmac(struct ll_crypto_hash *tfm,
+ u8 *key, unsigned int *keylen,
+ struct scatterlist *sg,
+ unsigned int size, u8 *result)
+{
+ struct hash_desc desc;
+ int rv;
+ desc.tfm = tfm;
+ desc.flags = 0;
+ rv = crypto_hash_setkey(desc.tfm, key, *keylen);
+ if (rv) {
+ CERROR("failed to hash setkey: %d\n", rv);
+ return rv;
+ }
+ return crypto_hash_digest(&desc, sg, size, result);
+}
+static inline
+unsigned int ll_crypto_tfm_alg_max_keysize(struct crypto_blkcipher *tfm)
+{
+ return crypto_blkcipher_tfm(tfm)->__crt_alg->cra_blkcipher.max_keysize;
+}
+static inline
+unsigned int ll_crypto_tfm_alg_min_keysize(struct crypto_blkcipher *tfm)
+{
+ return crypto_blkcipher_tfm(tfm)->__crt_alg->cra_blkcipher.min_keysize;
+}
+
+#define ll_crypto_hash_blocksize(tfm) crypto_hash_blocksize(tfm)
+#define ll_crypto_hash_digestsize(tfm) crypto_hash_digestsize(tfm)
+#define ll_crypto_blkcipher_ivsize(tfm) crypto_blkcipher_ivsize(tfm)
+#define ll_crypto_blkcipher_blocksize(tfm) crypto_blkcipher_blocksize(tfm)
+#define ll_crypto_free_hash(tfm) crypto_free_hash(tfm)
+#define ll_crypto_free_blkcipher(tfm) crypto_free_blkcipher(tfm)
+
+#define ll_vfs_rmdir(dir,entry,mnt) vfs_rmdir(dir,entry)
+#define ll_vfs_mkdir(inode,dir,mnt,mode) vfs_mkdir(inode,dir,mode)
+#define ll_vfs_link(old,mnt,dir,new,mnt1) vfs_link(old,dir,new)
+#define ll_vfs_unlink(inode,entry,mnt) vfs_unlink(inode,entry)
+#define ll_vfs_mknod(dir,entry,mnt,mode,dev) vfs_mknod(dir,entry,mode,dev)
+#define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry)
+#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
+ vfs_rename(old,old_dir,new,new_dir)
+
+#ifdef for_each_possible_cpu
+#define cfs_for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
+#elif defined(for_each_cpu)
+#define cfs_for_each_possible_cpu(cpu) for_each_cpu(cpu)
+#endif
+
+#define cfs_bio_io_error(a,b) bio_io_error((a))
+#define cfs_bio_endio(a,b,c) bio_endio((a),(c))
+
+#define cfs_fs_pwd(fs) ((fs)->pwd.dentry)
+#define cfs_fs_mnt(fs) ((fs)->pwd.mnt)
+#define cfs_path_put(nd) path_put(&(nd)->path)
+
+
+#ifndef SLAB_DESTROY_BY_RCU
+#define SLAB_DESTROY_BY_RCU 0
+#endif
+
+
+
+static inline int
+ll_quota_on(struct super_block *sb, int off, int ver, char *name, int remount)
+{
+ int rc;
+
+ if (sb->s_qcop->quota_on) {
+ struct path path;
+
+ rc = kern_path(name, LOOKUP_FOLLOW, &path);
+ if (!rc)
+ return rc;
+ rc = sb->s_qcop->quota_on(sb, off, ver
+ , &path
+ );
+ path_put(&path);
+ return rc;
+ }
+ else
+ return -ENOSYS;
+}
+
+static inline int ll_quota_off(struct super_block *sb, int off, int remount)
+{
+ if (sb->s_qcop->quota_off) {
+ return sb->s_qcop->quota_off(sb, off
+ );
+ }
+ else
+ return -ENOSYS;
+}
+
+
+# define ll_vfs_dq_init dquot_initialize
+# define ll_vfs_dq_drop dquot_drop
+# define ll_vfs_dq_transfer dquot_transfer
+# define ll_vfs_dq_off(sb, remount) dquot_suspend(sb, -1)
+
+
+
+
+
+#define queue_max_phys_segments(rq) queue_max_segments(rq)
+#define queue_max_hw_segments(rq) queue_max_segments(rq)
+
+#define ll_kmap_atomic(a, b) kmap_atomic(a)
+#define ll_kunmap_atomic(a, b) kunmap_atomic(a)
+
+
+#define ll_d_hlist_node hlist_node
+#define ll_d_hlist_empty(list) hlist_empty(list)
+#define ll_d_hlist_entry(ptr, type, name) hlist_entry(ptr.first, type, name)
+#define ll_d_hlist_for_each(tmp, i_dentry) hlist_for_each(tmp, i_dentry)
+#define ll_d_hlist_for_each_entry(dentry, p, i_dentry, alias) \
+ p = NULL; hlist_for_each_entry(dentry, i_dentry, alias)
+
+
+#define bio_hw_segments(q, bio) 0
+
+
+#define ll_pagevec_init(pv, cold) do {} while (0)
+#define ll_pagevec_add(pv, pg) (0)
+#define ll_pagevec_lru_add_file(pv) do {} while (0)
+
+
+#ifndef QUOTA_OK
+# define QUOTA_OK 0
+#endif
+#ifndef NO_QUOTA
+# define NO_QUOTA (-EDQUOT)
+#endif
+
+#ifndef SEEK_DATA
+#define SEEK_DATA 3 /* seek to the next data */
+#endif
+#ifndef SEEK_HOLE
+#define SEEK_HOLE 4 /* seek to the next hole */
+#endif
+
+#ifndef FMODE_UNSIGNED_OFFSET
+#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
+#endif
+
+#if !defined(_ASM_GENERIC_BITOPS_EXT2_NON_ATOMIC_H_) && !defined(ext2_set_bit)
+# define ext2_set_bit __test_and_set_bit_le
+# define ext2_clear_bit __test_and_clear_bit_le
+# define ext2_test_bit test_bit_le
+# define ext2_find_first_zero_bit find_first_zero_bit_le
+# define ext2_find_next_zero_bit find_next_zero_bit_le
+#endif
+
+#ifdef ATTR_TIMES_SET
+# define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
+#else
+# define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET)
+#endif
+
+
+
+/*
+ * After 3.1, kernel's nameidata.intent.open.flags is different
+ * with lustre's lookup_intent.it_flags, as lustre's it_flags'
+ * lower bits equal to FMODE_xxx while kernel doesn't transliterate
+ * lower bits of nameidata.intent.open.flags to FMODE_xxx.
+ * */
+#include <linux/version.h>
+static inline int ll_namei_to_lookup_intent_flag(int flag)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+ flag = (flag & ~O_ACCMODE) | OPEN_FMODE(flag);
+#endif
+ return flag;
+}
+
+# define ll_mrf_ret void
+# define LL_MRF_RETURN(rc)
+
+#include <linux/fs.h>
+
+# define ll_umode_t umode_t
+
+#include <linux/dcache.h>
+
+# define ll_dirty_inode(inode, flag) (inode)->i_sb->s_op->dirty_inode((inode), flag)
+
+#endif /* _COMPAT25_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_debug.h b/drivers/staging/lustre/lustre/include/linux/lustre_debug.h
new file mode 100644
index 000000000000..11deac7248ae
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_debug.h
@@ -0,0 +1,47 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_LUSTRE_DEBUG_H
+#define _LINUX_LUSTRE_DEBUG_H
+
+#ifndef _LUSTRE_DEBUG_H
+#error Do not #include this file directly. #include <lprocfs_status.h> instead
+#endif
+
+#define LL_CDEBUG_PAGE(mask, page, fmt, arg...) \
+ CDEBUG(mask, "page %p map %p index %lu flags %lx count %u priv %0lx: "\
+ fmt, page, page->mapping, page->index, (long)page->flags, \
+ page_count(page), page_private(page), ## arg)
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_dlm.h b/drivers/staging/lustre/lustre/include/linux/lustre_dlm.h
new file mode 100644
index 000000000000..207df03f6149
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_dlm.h
@@ -0,0 +1,46 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_LUSTRE_DLM_H__
+#define _LINUX_LUSTRE_DLM_H__
+
+#ifndef _LUSTRE_DLM_H__
+#error Do not #include this file directly. #include <lprocfs_status.h> instead
+#endif
+
+# include <linux/proc_fs.h>
+# include <asm/processor.h>
+# include <linux/bit_spinlock.h>
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_fsfilt.h b/drivers/staging/lustre/lustre/include/linux/lustre_fsfilt.h
new file mode 100644
index 000000000000..6c7260957383
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_fsfilt.h
@@ -0,0 +1,181 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/linux/lustre_fsfilt.h
+ *
+ * Filesystem interface helper.
+ */
+
+#ifndef _LINUX_LUSTRE_FSFILT_H
+#define _LINUX_LUSTRE_FSFILT_H
+
+#ifndef _LUSTRE_FSFILT_H
+#error Do not #include this file directly. #include <lustre_fsfilt.h> instead
+#endif
+
+
+#include <obd.h>
+#include <obd_class.h>
+
+typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd,
+ void *data, int error);
+
+struct fsfilt_operations {
+ struct list_head fs_list;
+ module_t *fs_owner;
+ char *fs_type;
+ char *(* fs_getlabel)(struct super_block *sb);
+ void *(* fs_start)(struct inode *inode, int op, void *desc_private,
+ int logs);
+ int (* fs_commit)(struct inode *inode, void *handle,int force_sync);
+ int (* fs_map_inode_pages)(struct inode *inode, struct page **page,
+ int pages, unsigned long *blocks,
+ int create, struct mutex *sem);
+ int (* fs_write_record)(struct file *, void *, int size, loff_t *,
+ int force_sync);
+ int (* fs_read_record)(struct file *, void *, int size, loff_t *);
+ int (* fs_setup)(struct super_block *sb);
+};
+
+extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops);
+extern void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops);
+extern struct fsfilt_operations *fsfilt_get_ops(const char *type);
+extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
+
+static inline char *fsfilt_get_label(struct obd_device *obd,
+ struct super_block *sb)
+{
+ if (obd->obd_fsops->fs_getlabel == NULL)
+ return NULL;
+ if (obd->obd_fsops->fs_getlabel(sb)[0] == '\0')
+ return NULL;
+
+ return obd->obd_fsops->fs_getlabel(sb);
+}
+
+#define FSFILT_OP_UNLINK 1
+#define FSFILT_OP_CANCEL_UNLINK 10
+
+#define __fsfilt_check_slow(obd, start, msg) \
+do { \
+ if (cfs_time_before(jiffies, start + 15 * HZ)) \
+ break; \
+ else if (cfs_time_before(jiffies, start + 30 * HZ)) \
+ CDEBUG(D_VFSTRACE, "%s: slow %s %lus\n", obd->obd_name, \
+ msg, (jiffies-start) / HZ); \
+ else if (cfs_time_before(jiffies, start + DISK_TIMEOUT * HZ)) \
+ CWARN("%s: slow %s %lus\n", obd->obd_name, msg, \
+ (jiffies - start) / HZ); \
+ else \
+ CERROR("%s: slow %s %lus\n", obd->obd_name, msg, \
+ (jiffies - start) / HZ); \
+} while (0)
+
+#define fsfilt_check_slow(obd, start, msg) \
+do { \
+ __fsfilt_check_slow(obd, start, msg); \
+ start = jiffies; \
+} while (0)
+
+static inline void *fsfilt_start_log(struct obd_device *obd,
+ struct inode *inode, int op,
+ struct obd_trans_info *oti, int logs)
+{
+ unsigned long now = jiffies;
+ void *parent_handle = oti ? oti->oti_handle : NULL;
+ void *handle;
+
+ handle = obd->obd_fsops->fs_start(inode, op, parent_handle, logs);
+ CDEBUG(D_INFO, "started handle %p (%p)\n", handle, parent_handle);
+
+ if (oti != NULL) {
+ if (parent_handle == NULL) {
+ oti->oti_handle = handle;
+ } else if (handle != parent_handle) {
+ CERROR("mismatch: parent %p, handle %p, oti %p\n",
+ parent_handle, handle, oti);
+ LBUG();
+ }
+ }
+ fsfilt_check_slow(obd, now, "journal start");
+ return handle;
+}
+
+static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode,
+ void *handle, int force_sync)
+{
+ unsigned long now = jiffies;
+ int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync);
+ CDEBUG(D_INFO, "committing handle %p\n", handle);
+
+ fsfilt_check_slow(obd, now, "journal start");
+
+ return rc;
+}
+
+static inline int fsfilt_map_inode_pages(struct obd_device *obd,
+ struct inode *inode,
+ struct page **page, int pages,
+ unsigned long *blocks,
+ int create, struct mutex *mutex)
+{
+ return obd->obd_fsops->fs_map_inode_pages(inode, page, pages, blocks,
+ create, mutex);
+}
+
+static inline int fsfilt_read_record(struct obd_device *obd, struct file *file,
+ void *buf, loff_t size, loff_t *offs)
+{
+ return obd->obd_fsops->fs_read_record(file, buf, size, offs);
+}
+
+static inline int fsfilt_write_record(struct obd_device *obd, struct file *file,
+ void *buf, loff_t size, loff_t *offs,
+ int force_sync)
+{
+ return obd->obd_fsops->fs_write_record(file, buf, size,offs,force_sync);
+}
+
+static inline int fsfilt_setup(struct obd_device *obd, struct super_block *fs)
+{
+ if (obd->obd_fsops->fs_setup)
+ return obd->obd_fsops->fs_setup(fs);
+ return 0;
+}
+
+
+
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_handles.h b/drivers/staging/lustre/lustre/include/linux/lustre_handles.h
new file mode 100644
index 000000000000..ecf184051252
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_handles.h
@@ -0,0 +1,53 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LINUX_LUSTRE_HANDLES_H_
+#define __LINUX_LUSTRE_HANDLES_H_
+
+#ifndef __LUSTRE_HANDLES_H_
+#error Do not #include this file directly. #include <lustre_handles.h> instead
+#endif
+
+#include <asm/types.h>
+#include <asm/atomic.h>
+#include <linux/list.h>
+#include <linux/version.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <linux/rcupdate.h> /* for rcu_head{} */
+typedef struct rcu_head cfs_rcu_head_t;
+
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_intent.h b/drivers/staging/lustre/lustre/include/linux/lustre_intent.h
new file mode 100644
index 000000000000..b10ddfa7df29
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_intent.h
@@ -0,0 +1,62 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef LUSTRE_INTENT_H
+#define LUSTRE_INTENT_H
+
+/* intent IT_XXX are defined in lustre/include/obd.h */
+struct lustre_intent_data {
+ int it_disposition;
+ int it_status;
+ __u64 it_lock_handle;
+ __u64 it_lock_bits;
+ int it_lock_mode;
+ int it_remote_lock_mode;
+ __u64 it_remote_lock_handle;
+ void *it_data;
+ unsigned int it_lock_set:1;
+};
+
+struct lookup_intent {
+ int it_op;
+ int it_flags;
+ int it_create_mode;
+ union {
+ struct lustre_intent_data lustre;
+ } d;
+};
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_lib.h b/drivers/staging/lustre/lustre/include/linux/lustre_lib.h
new file mode 100644
index 000000000000..b2f755acadf6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_lib.h
@@ -0,0 +1,87 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/linux/lustre_lib.h
+ *
+ * Basic Lustre library routines.
+ */
+
+#ifndef _LINUX_LUSTRE_LIB_H
+#define _LINUX_LUSTRE_LIB_H
+
+#ifndef _LUSTRE_LIB_H
+#error Do not #include this file directly. #include <lustre_lib.h> instead
+#endif
+
+# include <linux/rwsem.h>
+# include <linux/sched.h>
+# include <linux/signal.h>
+# include <linux/types.h>
+# include <linux/lustre_compat25.h>
+# include <linux/lustre_common.h>
+
+#ifndef LP_POISON
+#if BITS_PER_LONG > 32
+# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
+#else
+# define LI_POISON ((int)0x5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a)
+#endif
+#endif
+
+/* This macro is only for compatibility reasons with older Linux Lustre user
+ * tools. New ioctls should NOT use this macro as the ioctl "size". Instead
+ * the ioctl should get a "size" argument which is the actual data type used
+ * by the ioctl, to ensure the ioctl interface is versioned correctly. */
+#define OBD_IOC_DATA_TYPE long
+
+#define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | \
+ sigmask(SIGTERM) | sigmask(SIGQUIT) | \
+ sigmask(SIGALRM))
+
+/* initialize ost_lvb according to inode */
+static inline void inode_init_lvb(struct inode *inode, struct ost_lvb *lvb)
+{
+ lvb->lvb_size = i_size_read(inode);
+ lvb->lvb_blocks = inode->i_blocks;
+ lvb->lvb_mtime = LTIME_S(inode->i_mtime);
+ lvb->lvb_atime = LTIME_S(inode->i_atime);
+ lvb->lvb_ctime = LTIME_S(inode->i_ctime);
+}
+
+#endif /* _LUSTRE_LIB_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
new file mode 100644
index 000000000000..c95dff900b58
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
@@ -0,0 +1,100 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_LL_H
+#define _LINUX_LL_H
+
+#ifndef _LL_H
+#error Do not #include this file directly. #include <lustre_lite.h> instead
+#endif
+
+
+#include <linux/version.h>
+
+#include <asm/statfs.h>
+
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/proc_fs.h>
+
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_ha.h>
+
+#include <linux/rbtree.h>
+#include <linux/lustre_compat25.h>
+#include <linux/lustre_common.h>
+#include <linux/pagemap.h>
+
+/* lprocfs.c */
+enum {
+ LPROC_LL_DIRTY_HITS = 0,
+ LPROC_LL_DIRTY_MISSES,
+ LPROC_LL_READ_BYTES,
+ LPROC_LL_WRITE_BYTES,
+ LPROC_LL_BRW_READ,
+ LPROC_LL_BRW_WRITE,
+ LPROC_LL_OSC_READ,
+ LPROC_LL_OSC_WRITE,
+ LPROC_LL_IOCTL,
+ LPROC_LL_OPEN,
+ LPROC_LL_RELEASE,
+ LPROC_LL_MAP,
+ LPROC_LL_LLSEEK,
+ LPROC_LL_FSYNC,
+ LPROC_LL_READDIR,
+ LPROC_LL_SETATTR,
+ LPROC_LL_TRUNC,
+ LPROC_LL_FLOCK,
+ LPROC_LL_GETATTR,
+ LPROC_LL_CREATE,
+ LPROC_LL_LINK,
+ LPROC_LL_UNLINK,
+ LPROC_LL_SYMLINK,
+ LPROC_LL_MKDIR,
+ LPROC_LL_RMDIR,
+ LPROC_LL_MKNOD,
+ LPROC_LL_RENAME,
+ LPROC_LL_STAFS,
+ LPROC_LL_ALLOC_INODE,
+ LPROC_LL_SETXATTR,
+ LPROC_LL_GETXATTR,
+ LPROC_LL_LISTXATTR,
+ LPROC_LL_REMOVEXATTR,
+ LPROC_LL_INODE_PERM,
+ LPROC_LL_FILE_OPCODES
+};
+
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_log.h b/drivers/staging/lustre/lustre/include/linux/lustre_log.h
new file mode 100644
index 000000000000..e9c8e56737d2
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_log.h
@@ -0,0 +1,57 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/linux/lustre_log.h
+ *
+ * Generic infrastructure for managing a collection of logs.
+ * These logs are used for:
+ * - orphan recovery: OST adds record on create
+ * - mtime/size consistency: the OST adds a record on first write
+ * - open/unlinked objects: OST adds a record on destroy
+ *
+ * - mds unlink log: the MDS adds an entry upon delete
+ *
+ * - raid1 replication log between OST's
+ * - MDS replication logs
+ */
+
+#ifndef _LINUX_LUSTRE_LOG_H
+#define _LINUX_LUSTRE_LOG_H
+
+#ifndef _LUSTRE_LOG_H
+#error Do not #include this file directly. #include <lustre_log.h> instead
+#endif
+
+#define LUSTRE_LOG_SERVER
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_net.h b/drivers/staging/lustre/lustre/include/linux/lustre_net.h
new file mode 100644
index 000000000000..2d7c425d7012
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_net.h
@@ -0,0 +1,50 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_LUSTRE_NET_H
+#define _LINUX_LUSTRE_NET_H
+
+#ifndef _LUSTRE_NET_H
+#error Do not #include this file directly. #include <lustre_net.h> instead
+#endif
+
+#include <linux/version.h>
+#include <linux/workqueue.h>
+
+/* XXX Liang: should be moved to other header instead of here */
+#ifndef WITH_GROUP_INFO
+#define WITH_GROUP_INFO
+#endif
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
new file mode 100644
index 000000000000..f0508084e8c5
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
@@ -0,0 +1,85 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef LUSTRE_PATCHLESS_COMPAT_H
+#define LUSTRE_PATCHLESS_COMPAT_H
+
+#include <linux/fs.h>
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/hash.h>
+
+
+#define ll_delete_from_page_cache(page) delete_from_page_cache(page)
+
+static inline void
+truncate_complete_page(struct address_space *mapping, struct page *page)
+{
+ if (page->mapping != mapping)
+ return;
+
+ if (PagePrivate(page))
+ page->mapping->a_ops->invalidatepage(page, 0);
+
+ cancel_dirty_page(page, PAGE_SIZE);
+ ClearPageMappedToDisk(page);
+ ll_delete_from_page_cache(page);
+}
+
+# define d_refcount(d) ((d)->d_count)
+
+#ifdef ATTR_OPEN
+# define ATTR_FROM_OPEN ATTR_OPEN
+#else
+# ifndef ATTR_FROM_OPEN
+# define ATTR_FROM_OPEN 0
+# endif
+#endif /* ATTR_OPEN */
+
+#ifndef ATTR_RAW
+#define ATTR_RAW 0
+#endif
+
+#ifndef ATTR_CTIME_SET
+/*
+ * set ATTR_CTIME_SET to a high value to avoid any risk of collision with other
+ * ATTR_* attributes (see bug 13828)
+ */
+#define ATTR_CTIME_SET (1 << 28)
+#endif
+
+#endif /* LUSTRE_PATCHLESS_COMPAT_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_quota.h b/drivers/staging/lustre/lustre/include/linux/lustre_quota.h
new file mode 100644
index 000000000000..421866b004cf
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_quota.h
@@ -0,0 +1,47 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_LUSTRE_QUOTA_H
+#define _LINUX_LUSTRE_QUOTA_H
+
+#ifndef _LUSTRE_QUOTA_H
+#error Do not #include this file directly. #include <lustre_quota.h> instead
+#endif
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/quotaops.h>
+
+#endif /* _LUSTRE_QUOTA_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_user.h b/drivers/staging/lustre/lustre/include/linux/lustre_user.h
new file mode 100644
index 000000000000..ebaf92977f7f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_user.h
@@ -0,0 +1,67 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/linux/lustre_user.h
+ *
+ * Lustre public user-space interface definitions.
+ */
+
+#ifndef _LINUX_LUSTRE_USER_H
+#define _LINUX_LUSTRE_USER_H
+
+# include <linux/version.h>
+# include <linux/quota.h>
+
+/*
+ * asm-x86_64/processor.h on some SLES 9 distros seems to use
+ * kernel-only typedefs. fortunately skipping it altogether is ok
+ * (for now).
+ */
+#define __ASM_X86_64_PROCESSOR_H
+
+#include <linux/string.h>
+
+#if defined(__x86_64__) || defined(__ia64__) || defined(__ppc64__) || \
+ defined(__craynv) || defined (__mips64__) || defined(__powerpc64__)
+typedef struct stat lstat_t;
+#define lstat_f lstat
+#define HAVE_LOV_USER_MDS_DATA
+#else
+typedef struct stat64 lstat_t;
+#define lstat_f lstat64
+#define HAVE_LOV_USER_MDS_DATA
+#endif
+
+#endif /* _LUSTRE_USER_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lvfs.h b/drivers/staging/lustre/lustre/include/linux/lvfs.h
new file mode 100644
index 000000000000..b4db6cb581bd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lvfs.h
@@ -0,0 +1,134 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/linux/lvfs.h
+ *
+ * lustre VFS/process permission interface
+ */
+
+#ifndef __LINUX_LVFS_H__
+#define __LINUX_LVFS_H__
+
+#ifndef __LVFS_H__
+#error Do not #include this file directly. #include <lvfs.h> instead
+#endif
+
+#include <linux/lustre_compat25.h>
+#include <linux/lustre_common.h>
+#include <linux/lvfs_linux.h>
+
+#define LLOG_LVFS
+
+/* simple.c */
+
+struct lvfs_ucred {
+ __u32 luc_uid;
+ __u32 luc_gid;
+ __u32 luc_fsuid;
+ __u32 luc_fsgid;
+ kernel_cap_t luc_cap;
+ __u32 luc_umask;
+ struct group_info *luc_ginfo;
+ struct md_identity *luc_identity;
+};
+
+struct lvfs_callback_ops {
+ struct dentry *(*l_fid2dentry)(__u64 id_ino, __u32 gen, __u64 gr, void *data);
+};
+
+#define OBD_RUN_CTXT_MAGIC 0xC0FFEEAA
+#define OBD_CTXT_DEBUG /* development-only debugging */
+struct lvfs_run_ctxt {
+ struct vfsmount *pwdmnt;
+ struct dentry *pwd;
+ mm_segment_t fs;
+ struct lvfs_ucred luc;
+ int ngroups;
+ struct lvfs_callback_ops cb_ops;
+ struct group_info *group_info;
+ struct dt_device *dt;
+#ifdef OBD_CTXT_DEBUG
+ __u32 magic;
+#endif
+};
+
+#ifdef OBD_CTXT_DEBUG
+#define OBD_SET_CTXT_MAGIC(ctxt) (ctxt)->magic = OBD_RUN_CTXT_MAGIC
+#else
+#define OBD_SET_CTXT_MAGIC(ctxt) do {} while(0)
+#endif
+
+
+int lustre_rename(struct dentry *dir, struct vfsmount *mnt, char *oldname,
+ char *newname);
+
+static inline void l_dput(struct dentry *de)
+{
+ if (!de || IS_ERR(de))
+ return;
+ //shrink_dcache_parent(de);
+ LASSERT(d_refcount(de) > 0);
+ dput(de);
+}
+
+/* We need to hold the inode semaphore over the dcache lookup itself, or we
+ * run the risk of entering the filesystem lookup path concurrently on SMP
+ * systems, and instantiating two inodes for the same entry. We still
+ * protect against concurrent addition/removal races with the DLM locking.
+ */
+static inline struct dentry *ll_lookup_one_len(const char *fid_name,
+ struct dentry *dparent,
+ int fid_namelen)
+{
+ struct dentry *dchild;
+
+ mutex_lock(&dparent->d_inode->i_mutex);
+ dchild = lookup_one_len(fid_name, dparent, fid_namelen);
+ mutex_unlock(&dparent->d_inode->i_mutex);
+
+ if (IS_ERR(dchild) || dchild->d_inode == NULL)
+ return dchild;
+
+ if (is_bad_inode(dchild->d_inode)) {
+ CERROR("bad inode returned %lu/%u\n",
+ dchild->d_inode->i_ino, dchild->d_inode->i_generation);
+ dput(dchild);
+ dchild = ERR_PTR(-ENOENT);
+ }
+ return dchild;
+}
+
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lvfs_linux.h b/drivers/staging/lustre/lustre/include/linux/lvfs_linux.h
new file mode 100644
index 000000000000..140a60f1f0c9
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lvfs_linux.h
@@ -0,0 +1,66 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LVFS_LINUX_H__
+#define __LVFS_LINUX_H__
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/sched.h>
+
+#include <lvfs.h>
+
+#define l_file file
+#define l_dentry dentry
+
+#define l_filp_open filp_open
+
+struct lvfs_run_ctxt;
+struct l_file *l_dentry_open(struct lvfs_run_ctxt *, struct l_dentry *,
+ int flags);
+
+struct l_linux_dirent {
+ struct list_head lld_list;
+ ino_t lld_ino;
+ unsigned long lld_off;
+ char lld_name[LL_FID_NAMELEN];
+};
+struct l_readdir_callback {
+ struct l_linux_dirent *lrc_dirent;
+ struct list_head *lrc_list;
+};
+
+#endif /* __LVFS_LINUX_H__ */
diff --git a/drivers/staging/lustre/lustre/include/linux/obd.h b/drivers/staging/lustre/lustre/include/linux/obd.h
new file mode 100644
index 000000000000..2c36c0d19d06
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/obd.h
@@ -0,0 +1,128 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LINUX_OBD_H
+#define __LINUX_OBD_H
+
+#ifndef __OBD_H
+#error Do not #include this file directly. #include <obd.h> instead
+#endif
+
+#include <obd_support.h>
+
+# include <linux/fs.h>
+# include <linux/list.h>
+# include <linux/sched.h> /* for struct task_struct, for current.h */
+# include <linux/proc_fs.h>
+# include <linux/mount.h>
+# include <linux/lustre_intent.h>
+
+struct ll_iattr {
+ struct iattr iattr;
+ unsigned int ia_attr_flags;
+};
+
+#define CLIENT_OBD_LIST_LOCK_DEBUG 1
+
+typedef struct {
+ spinlock_t lock;
+
+ unsigned long time;
+ struct task_struct *task;
+ const char *func;
+ int line;
+} client_obd_lock_t;
+
+static inline void __client_obd_list_lock(client_obd_lock_t *lock,
+ const char *func, int line)
+{
+ unsigned long cur = jiffies;
+ while (1) {
+ if (spin_trylock(&lock->lock)) {
+ LASSERT(lock->task == NULL);
+ lock->task = current;
+ lock->func = func;
+ lock->line = line;
+ lock->time = jiffies;
+ break;
+ }
+
+ if ((jiffies - cur > 5 * HZ) &&
+ (jiffies - lock->time > 5 * HZ)) {
+ struct task_struct *task = lock->task;
+
+ if (task == NULL)
+ continue;
+
+ LCONSOLE_WARN("%s:%d: lock %p was acquired"
+ " by <%s:%d:%s:%d> for %lu seconds.\n",
+ current->comm, current->pid,
+ lock, task->comm, task->pid,
+ lock->func, lock->line,
+ (jiffies - lock->time) / HZ);
+ LCONSOLE_WARN("====== for process holding the "
+ "lock =====\n");
+ libcfs_debug_dumpstack(task);
+ LCONSOLE_WARN("====== for current process =====\n");
+ libcfs_debug_dumpstack(NULL);
+ LCONSOLE_WARN("====== end =======\n");
+ cfs_pause(1000 * HZ);
+ }
+ cpu_relax();
+ }
+}
+
+#define client_obd_list_lock(lock) \
+ __client_obd_list_lock(lock, __FUNCTION__, __LINE__)
+
+static inline void client_obd_list_unlock(client_obd_lock_t *lock)
+{
+ LASSERT(lock->task != NULL);
+ lock->task = NULL;
+ lock->time = jiffies;
+ spin_unlock(&lock->lock);
+}
+
+
+static inline void client_obd_list_lock_init(client_obd_lock_t *lock)
+{
+ spin_lock_init(&lock->lock);
+}
+
+static inline void client_obd_list_lock_done(client_obd_lock_t *lock)
+{}
+
+#endif /* __LINUX_OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/obd_class.h b/drivers/staging/lustre/lustre/include/linux/obd_class.h
new file mode 100644
index 000000000000..021ead6639fc
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/obd_class.h
@@ -0,0 +1,58 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LINUX_CLASS_OBD_H
+#define __LINUX_CLASS_OBD_H
+
+#ifndef __CLASS_OBD_H
+#error Do not #include this file directly. #include <obd_class.h> instead
+#endif
+
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+
+/* obdo.c */
+void obdo_from_la(struct obdo *dst, struct lu_attr *la, __u64 valid);
+void la_from_obdo(struct lu_attr *la, struct obdo *dst, obd_flag valid);
+void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid);
+void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid);
+#define ll_inode_flags(inode) (inode->i_flags)
+
+
+#endif /* __LINUX_OBD_CLASS_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/obd_support.h b/drivers/staging/lustre/lustre/include/linux/obd_support.h
new file mode 100644
index 000000000000..9166503408aa
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/obd_support.h
@@ -0,0 +1,63 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_OBD_SUPPORT
+#define _LINUX_OBD_SUPPORT
+
+#ifndef _OBD_SUPPORT
+#error Do not #include this file directly. #include <obd_support.h> instead
+#endif
+
+#ifdef CONFIG_X86
+#include <asm/cpufeature.h>
+#endif
+#include <asm/processor.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/swap.h>
+#include <linux/lustre_compat25.h>
+#include <linux/lustre_common.h>
+#include <linux/libcfs/libcfs.h>
+#include <lustre/lustre_idl.h>
+
+
+# include <linux/types.h>
+# include <linux/blkdev.h>
+# include <lvfs.h>
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h
new file mode 100644
index 000000000000..e770d0260576
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h
@@ -0,0 +1,1043 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lprocfs_status.h
+ *
+ * Top level header file for LProc SNMP
+ *
+ * Author: Hariharan Thantry thantry@users.sourceforge.net
+ */
+#ifndef _LPROCFS_SNMP_H
+#define _LPROCFS_SNMP_H
+
+#include <linux/lprocfs_status.h>
+#include <lustre/lustre_idl.h>
+#include <linux/libcfs/params_tree.h>
+
+struct lprocfs_vars {
+ const char *name;
+ struct file_operations *fops;
+ void *data;
+ /**
+ * /proc file mode.
+ */
+ mode_t proc_mode;
+};
+
+struct lprocfs_static_vars {
+ struct lprocfs_vars *module_vars;
+ struct lprocfs_vars *obd_vars;
+};
+
+/* if we find more consumers this could be generalized */
+#define OBD_HIST_MAX 32
+struct obd_histogram {
+ spinlock_t oh_lock;
+ unsigned long oh_buckets[OBD_HIST_MAX];
+};
+
+enum {
+ BRW_R_PAGES = 0,
+ BRW_W_PAGES,
+ BRW_R_RPC_HIST,
+ BRW_W_RPC_HIST,
+ BRW_R_IO_TIME,
+ BRW_W_IO_TIME,
+ BRW_R_DISCONT_PAGES,
+ BRW_W_DISCONT_PAGES,
+ BRW_R_DISCONT_BLOCKS,
+ BRW_W_DISCONT_BLOCKS,
+ BRW_R_DISK_IOSIZE,
+ BRW_W_DISK_IOSIZE,
+ BRW_R_DIO_FRAGS,
+ BRW_W_DIO_FRAGS,
+ BRW_LAST,
+};
+
+struct brw_stats {
+ struct obd_histogram hist[BRW_LAST];
+};
+
+enum {
+ RENAME_SAMEDIR_SIZE = 0,
+ RENAME_CROSSDIR_SRC_SIZE,
+ RENAME_CROSSDIR_TGT_SIZE,
+ RENAME_LAST,
+};
+
+struct rename_stats {
+ struct obd_histogram hist[RENAME_LAST];
+};
+
+/* An lprocfs counter can be configured using the enum bit masks below.
+ *
+ * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already
+ * protects this counter from concurrent updates. If not specified,
+ * lprocfs an internal per-counter lock variable. External locks are
+ * not used to protect counter increments, but are used to protect
+ * counter readout and resets.
+ *
+ * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples,
+ * (i.e. counter can be incremented by more than "1"). When specified,
+ * the counter maintains min, max and sum in addition to a simple
+ * invocation count. This allows averages to be be computed.
+ * If not specified, the counter is an increment-by-1 counter.
+ * min, max, sum, etc. are not maintained.
+ *
+ * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of
+ * squares (for multi-valued counter samples only). This allows
+ * external computation of standard deviation, but involves a 64-bit
+ * multiply per counter increment.
+ */
+
+enum {
+ LPROCFS_CNTR_EXTERNALLOCK = 0x0001,
+ LPROCFS_CNTR_AVGMINMAX = 0x0002,
+ LPROCFS_CNTR_STDDEV = 0x0004,
+
+ /* counter data type */
+ LPROCFS_TYPE_REGS = 0x0100,
+ LPROCFS_TYPE_BYTES = 0x0200,
+ LPROCFS_TYPE_PAGES = 0x0400,
+ LPROCFS_TYPE_CYCLE = 0x0800,
+};
+
+#define LC_MIN_INIT ((~(__u64)0) >> 1)
+
+struct lprocfs_counter_header {
+ unsigned int lc_config;
+ const char *lc_name; /* must be static */
+ const char *lc_units; /* must be static */
+};
+
+struct lprocfs_counter {
+ __s64 lc_count;
+ __s64 lc_min;
+ __s64 lc_max;
+ __s64 lc_sumsquare;
+ /*
+ * Every counter has lc_array_sum[0], while lc_array_sum[1] is only
+ * for irq context counter, i.e. stats with
+ * LPROCFS_STATS_FLAG_IRQ_SAFE flag, its counter need
+ * lc_array_sum[1]
+ */
+ __s64 lc_array_sum[1];
+};
+#define lc_sum lc_array_sum[0]
+#define lc_sum_irq lc_array_sum[1]
+
+struct lprocfs_percpu {
+#ifndef __GNUC__
+ __s64 pad;
+#endif
+ struct lprocfs_counter lp_cntr[0];
+};
+
+#define LPROCFS_GET_NUM_CPU 0x0001
+#define LPROCFS_GET_SMP_ID 0x0002
+
+enum lprocfs_stats_flags {
+ LPROCFS_STATS_FLAG_NONE = 0x0000, /* per cpu counter */
+ LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* stats have no percpu
+ * area and need locking */
+ LPROCFS_STATS_FLAG_IRQ_SAFE = 0x0002, /* alloc need irq safe */
+};
+
+enum lprocfs_fields_flags {
+ LPROCFS_FIELDS_FLAGS_CONFIG = 0x0001,
+ LPROCFS_FIELDS_FLAGS_SUM = 0x0002,
+ LPROCFS_FIELDS_FLAGS_MIN = 0x0003,
+ LPROCFS_FIELDS_FLAGS_MAX = 0x0004,
+ LPROCFS_FIELDS_FLAGS_AVG = 0x0005,
+ LPROCFS_FIELDS_FLAGS_SUMSQUARE = 0x0006,
+ LPROCFS_FIELDS_FLAGS_COUNT = 0x0007,
+};
+
+struct lprocfs_stats {
+ /* # of counters */
+ unsigned short ls_num;
+ /* 1 + the biggest cpu # whose ls_percpu slot has been allocated */
+ unsigned short ls_biggest_alloc_num;
+ enum lprocfs_stats_flags ls_flags;
+ /* Lock used when there are no percpu stats areas; For percpu stats,
+ * it is used to protect ls_biggest_alloc_num change */
+ spinlock_t ls_lock;
+
+ /* has ls_num of counter headers */
+ struct lprocfs_counter_header *ls_cnt_header;
+ struct lprocfs_percpu *ls_percpu[0];
+};
+
+#define OPC_RANGE(seg) (seg ## _LAST_OPC - seg ## _FIRST_OPC)
+
+/* Pack all opcodes down into a single monotonically increasing index */
+static inline int opcode_offset(__u32 opc) {
+ if (opc < OST_LAST_OPC) {
+ /* OST opcode */
+ return (opc - OST_FIRST_OPC);
+ } else if (opc < MDS_LAST_OPC) {
+ /* MDS opcode */
+ return (opc - MDS_FIRST_OPC +
+ OPC_RANGE(OST));
+ } else if (opc < LDLM_LAST_OPC) {
+ /* LDLM Opcode */
+ return (opc - LDLM_FIRST_OPC +
+ OPC_RANGE(MDS) +
+ OPC_RANGE(OST));
+ } else if (opc < MGS_LAST_OPC) {
+ /* MGS Opcode */
+ return (opc - MGS_FIRST_OPC +
+ OPC_RANGE(LDLM) +
+ OPC_RANGE(MDS) +
+ OPC_RANGE(OST));
+ } else if (opc < OBD_LAST_OPC) {
+ /* OBD Ping */
+ return (opc - OBD_FIRST_OPC +
+ OPC_RANGE(MGS) +
+ OPC_RANGE(LDLM) +
+ OPC_RANGE(MDS) +
+ OPC_RANGE(OST));
+ } else if (opc < LLOG_LAST_OPC) {
+ /* LLOG Opcode */
+ return (opc - LLOG_FIRST_OPC +
+ OPC_RANGE(OBD) +
+ OPC_RANGE(MGS) +
+ OPC_RANGE(LDLM) +
+ OPC_RANGE(MDS) +
+ OPC_RANGE(OST));
+ } else if (opc < QUOTA_LAST_OPC) {
+ /* LQUOTA Opcode */
+ return (opc - QUOTA_FIRST_OPC +
+ OPC_RANGE(LLOG) +
+ OPC_RANGE(OBD) +
+ OPC_RANGE(MGS) +
+ OPC_RANGE(LDLM) +
+ OPC_RANGE(MDS) +
+ OPC_RANGE(OST));
+ } else if (opc < SEQ_LAST_OPC) {
+ /* SEQ opcode */
+ return (opc - SEQ_FIRST_OPC +
+ OPC_RANGE(QUOTA) +
+ OPC_RANGE(LLOG) +
+ OPC_RANGE(OBD) +
+ OPC_RANGE(MGS) +
+ OPC_RANGE(LDLM) +
+ OPC_RANGE(MDS) +
+ OPC_RANGE(OST));
+ } else if (opc < SEC_LAST_OPC) {
+ /* SEC opcode */
+ return (opc - SEC_FIRST_OPC +
+ OPC_RANGE(SEQ) +
+ OPC_RANGE(QUOTA) +
+ OPC_RANGE(LLOG) +
+ OPC_RANGE(OBD) +
+ OPC_RANGE(MGS) +
+ OPC_RANGE(LDLM) +
+ OPC_RANGE(MDS) +
+ OPC_RANGE(OST));
+ } else if (opc < FLD_LAST_OPC) {
+ /* FLD opcode */
+ return (opc - FLD_FIRST_OPC +
+ OPC_RANGE(SEC) +
+ OPC_RANGE(SEQ) +
+ OPC_RANGE(QUOTA) +
+ OPC_RANGE(LLOG) +
+ OPC_RANGE(OBD) +
+ OPC_RANGE(MGS) +
+ OPC_RANGE(LDLM) +
+ OPC_RANGE(MDS) +
+ OPC_RANGE(OST));
+ } else if (opc < UPDATE_LAST_OPC) {
+ /* update opcode */
+ return (opc - UPDATE_FIRST_OPC +
+ OPC_RANGE(FLD) +
+ OPC_RANGE(SEC) +
+ OPC_RANGE(SEQ) +
+ OPC_RANGE(QUOTA) +
+ OPC_RANGE(LLOG) +
+ OPC_RANGE(OBD) +
+ OPC_RANGE(MGS) +
+ OPC_RANGE(LDLM) +
+ OPC_RANGE(MDS) +
+ OPC_RANGE(OST));
+ } else {
+ /* Unknown Opcode */
+ return -1;
+ }
+}
+
+
+#define LUSTRE_MAX_OPCODES (OPC_RANGE(OST) + \
+ OPC_RANGE(MDS) + \
+ OPC_RANGE(LDLM) + \
+ OPC_RANGE(MGS) + \
+ OPC_RANGE(OBD) + \
+ OPC_RANGE(LLOG) + \
+ OPC_RANGE(SEC) + \
+ OPC_RANGE(SEQ) + \
+ OPC_RANGE(SEC) + \
+ OPC_RANGE(FLD) + \
+ OPC_RANGE(UPDATE))
+
+#define EXTRA_MAX_OPCODES ((PTLRPC_LAST_CNTR - PTLRPC_FIRST_CNTR) + \
+ OPC_RANGE(EXTRA))
+
+enum {
+ PTLRPC_REQWAIT_CNTR = 0,
+ PTLRPC_REQQDEPTH_CNTR,
+ PTLRPC_REQACTIVE_CNTR,
+ PTLRPC_TIMEOUT,
+ PTLRPC_REQBUF_AVAIL_CNTR,
+ PTLRPC_LAST_CNTR
+};
+
+#define PTLRPC_FIRST_CNTR PTLRPC_REQWAIT_CNTR
+
+enum {
+ LDLM_GLIMPSE_ENQUEUE = 0,
+ LDLM_PLAIN_ENQUEUE,
+ LDLM_EXTENT_ENQUEUE,
+ LDLM_FLOCK_ENQUEUE,
+ LDLM_IBITS_ENQUEUE,
+ MDS_REINT_SETATTR,
+ MDS_REINT_CREATE,
+ MDS_REINT_LINK,
+ MDS_REINT_UNLINK,
+ MDS_REINT_RENAME,
+ MDS_REINT_OPEN,
+ MDS_REINT_SETXATTR,
+ BRW_READ_BYTES,
+ BRW_WRITE_BYTES,
+ EXTRA_LAST_OPC
+};
+
+#define EXTRA_FIRST_OPC LDLM_GLIMPSE_ENQUEUE
+/* class_obd.c */
+extern proc_dir_entry_t *proc_lustre_root;
+
+struct obd_device;
+struct obd_histogram;
+
+/* Days / hours / mins / seconds format */
+struct dhms {
+ int d,h,m,s;
+};
+static inline void s2dhms(struct dhms *ts, time_t secs)
+{
+ ts->d = secs / 86400;
+ secs = secs % 86400;
+ ts->h = secs / 3600;
+ secs = secs % 3600;
+ ts->m = secs / 60;
+ ts->s = secs % 60;
+}
+#define DHMS_FMT "%dd%dh%02dm%02ds"
+#define DHMS_VARS(x) (x)->d, (x)->h, (x)->m, (x)->s
+
+#define JOBSTATS_JOBID_VAR_MAX_LEN 20
+#define JOBSTATS_DISABLE "disable"
+#define JOBSTATS_PROCNAME_UID "procname_uid"
+
+typedef void (*cntr_init_callback)(struct lprocfs_stats *stats);
+
+struct obd_job_stats {
+ cfs_hash_t *ojs_hash;
+ struct list_head ojs_list;
+ rwlock_t ojs_lock; /* protect the obj_list */
+ cntr_init_callback ojs_cntr_init_fn;
+ int ojs_cntr_num;
+ int ojs_cleanup_interval;
+ time_t ojs_last_cleanup;
+};
+
+#ifdef LPROCFS
+
+extern int lprocfs_stats_alloc_one(struct lprocfs_stats *stats,
+ unsigned int cpuid);
+/*
+ * \return value
+ * < 0 : on error (only possible for opc as LPROCFS_GET_SMP_ID)
+ */
+static inline int lprocfs_stats_lock(struct lprocfs_stats *stats, int opc,
+ unsigned long *flags)
+{
+ int rc = 0;
+
+ switch (opc) {
+ default:
+ LBUG();
+
+ case LPROCFS_GET_SMP_ID:
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+ spin_lock_irqsave(&stats->ls_lock, *flags);
+ else
+ spin_lock(&stats->ls_lock);
+ return 0;
+ } else {
+ unsigned int cpuid = get_cpu();
+
+ if (unlikely(stats->ls_percpu[cpuid] == NULL)) {
+ rc = lprocfs_stats_alloc_one(stats, cpuid);
+ if (rc < 0) {
+ put_cpu();
+ return rc;
+ }
+ }
+ return cpuid;
+ }
+
+ case LPROCFS_GET_NUM_CPU:
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+ spin_lock_irqsave(&stats->ls_lock, *flags);
+ else
+ spin_lock(&stats->ls_lock);
+ return 1;
+ } else {
+ return stats->ls_biggest_alloc_num;
+ }
+ }
+}
+
+static inline void lprocfs_stats_unlock(struct lprocfs_stats *stats, int opc,
+ unsigned long *flags)
+{
+ switch (opc) {
+ default:
+ LBUG();
+
+ case LPROCFS_GET_SMP_ID:
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) {
+ spin_unlock_irqrestore(&stats->ls_lock,
+ *flags);
+ } else {
+ spin_unlock(&stats->ls_lock);
+ }
+ } else {
+ put_cpu();
+ }
+ return;
+
+ case LPROCFS_GET_NUM_CPU:
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) {
+ spin_unlock_irqrestore(&stats->ls_lock,
+ *flags);
+ } else {
+ spin_unlock(&stats->ls_lock);
+ }
+ }
+ return;
+ }
+}
+
+static inline unsigned int
+lprocfs_stats_counter_size(struct lprocfs_stats *stats)
+{
+ unsigned int percpusize;
+
+ percpusize = offsetof(struct lprocfs_percpu, lp_cntr[stats->ls_num]);
+
+ /* irq safe stats need lc_array_sum[1] */
+ if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
+ percpusize += stats->ls_num * sizeof(__s64);
+
+ if ((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0)
+ percpusize = L1_CACHE_ALIGN(percpusize);
+
+ return percpusize;
+}
+
+static inline struct lprocfs_counter *
+lprocfs_stats_counter_get(struct lprocfs_stats *stats, unsigned int cpuid,
+ int index)
+{
+ struct lprocfs_counter *cntr;
+
+ cntr = &stats->ls_percpu[cpuid]->lp_cntr[index];
+
+ if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
+ cntr = (void *)cntr + index * sizeof(__s64);
+
+ return cntr;
+}
+
+/* Two optimized LPROCFS counter increment functions are provided:
+ * lprocfs_counter_incr(cntr, value) - optimized for by-one counters
+ * lprocfs_counter_add(cntr) - use for multi-valued counters
+ * Counter data layout allows config flag, counter lock and the
+ * count itself to reside within a single cache line.
+ */
+
+extern void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
+ long amount);
+extern void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx,
+ long amount);
+
+#define lprocfs_counter_incr(stats, idx) \
+ lprocfs_counter_add(stats, idx, 1)
+#define lprocfs_counter_decr(stats, idx) \
+ lprocfs_counter_sub(stats, idx, 1)
+
+extern __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
+ struct lprocfs_counter_header *header,
+ enum lprocfs_stats_flags flags,
+ enum lprocfs_fields_flags field);
+static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats,
+ int idx,
+ enum lprocfs_fields_flags field)
+{
+ int i;
+ unsigned int num_cpu;
+ unsigned long flags = 0;
+ __u64 ret = 0;
+
+ LASSERT(stats != NULL);
+
+ num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
+ for (i = 0; i < num_cpu; i++) {
+ if (stats->ls_percpu[i] == NULL)
+ continue;
+ ret += lprocfs_read_helper(
+ lprocfs_stats_counter_get(stats, i, idx),
+ &stats->ls_cnt_header[idx], stats->ls_flags,
+ field);
+ }
+ lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
+ return ret;
+}
+
+extern struct lprocfs_stats *
+lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags);
+extern void lprocfs_clear_stats(struct lprocfs_stats *stats);
+extern void lprocfs_free_stats(struct lprocfs_stats **stats);
+extern void lprocfs_init_ops_stats(int num_private_stats,
+ struct lprocfs_stats *stats);
+extern void lprocfs_init_mps_stats(int num_private_stats,
+ struct lprocfs_stats *stats);
+extern void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats);
+extern int lprocfs_alloc_obd_stats(struct obd_device *obddev,
+ unsigned int num_private_stats);
+extern int lprocfs_alloc_md_stats(struct obd_device *obddev,
+ unsigned int num_private_stats);
+extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
+ unsigned conf, const char *name,
+ const char *units);
+extern void lprocfs_free_obd_stats(struct obd_device *obddev);
+extern void lprocfs_free_md_stats(struct obd_device *obddev);
+struct obd_export;
+struct nid_stat;
+extern int lprocfs_add_clear_entry(struct obd_device * obd,
+ proc_dir_entry_t *entry);
+extern int lprocfs_exp_setup(struct obd_export *exp,
+ lnet_nid_t *peer_nid, int *newnid);
+extern int lprocfs_exp_cleanup(struct obd_export *exp);
+extern proc_dir_entry_t *lprocfs_add_simple(struct proc_dir_entry *root,
+ char *name,
+ void *data,
+ struct file_operations *fops);
+extern struct proc_dir_entry *
+lprocfs_add_symlink(const char *name, struct proc_dir_entry *parent,
+ const char *format, ...);
+extern void lprocfs_free_per_client_stats(struct obd_device *obd);
+extern int
+lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_nid_stats_clear_read(struct seq_file *m, void *data);
+
+extern int lprocfs_register_stats(proc_dir_entry_t *root, const char *name,
+ struct lprocfs_stats *stats);
+
+/* lprocfs_status.c */
+extern int lprocfs_add_vars(proc_dir_entry_t *root,
+ struct lprocfs_vars *var,
+ void *data);
+
+extern proc_dir_entry_t *lprocfs_register(const char *name,
+ proc_dir_entry_t *parent,
+ struct lprocfs_vars *list,
+ void *data);
+
+extern void lprocfs_remove(proc_dir_entry_t **root);
+extern void lprocfs_remove_proc_entry(const char *name,
+ struct proc_dir_entry *parent);
+
+extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list);
+extern int lprocfs_obd_cleanup(struct obd_device *obd);
+
+extern int lprocfs_seq_create(proc_dir_entry_t *parent, const char *name,
+ mode_t mode,
+ const struct file_operations *seq_fops,
+ void *data);
+extern int lprocfs_obd_seq_create(struct obd_device *dev, const char *name,
+ mode_t mode,
+ const struct file_operations *seq_fops,
+ void *data);
+
+/* Generic callbacks */
+
+extern int lprocfs_rd_u64(struct seq_file *m, void *data);
+extern int lprocfs_rd_atomic(struct seq_file *m, void *data);
+extern int lprocfs_wr_atomic(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_rd_uint(struct seq_file *m, void *data);
+extern int lprocfs_wr_uint(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_rd_uuid(struct seq_file *m, void *data);
+extern int lprocfs_rd_name(struct seq_file *m, void *data);
+extern int lprocfs_rd_server_uuid(struct seq_file *m, void *data);
+extern int lprocfs_rd_conn_uuid(struct seq_file *m, void *data);
+extern int lprocfs_rd_import(struct seq_file *m, void *data);
+extern int lprocfs_rd_state(struct seq_file *m, void *data);
+extern int lprocfs_rd_connect_flags(struct seq_file *m, void *data);
+extern int lprocfs_rd_num_exports(struct seq_file *m, void *data);
+extern int lprocfs_rd_numrefs(struct seq_file *m, void *data);
+
+struct adaptive_timeout;
+extern int lprocfs_at_hist_helper(struct seq_file *m,
+ struct adaptive_timeout *at);
+extern int lprocfs_rd_timeouts(struct seq_file *m, void *data);
+extern int lprocfs_wr_timeouts(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_wr_evict_client(struct file *file, const char *buffer,
+ size_t count, loff_t *off);
+extern int lprocfs_wr_ping(struct file *file, const char *buffer,
+ size_t count, loff_t *off);
+extern int lprocfs_wr_import(struct file *file, const char *buffer,
+ size_t count, loff_t *off);
+extern int lprocfs_rd_pinger_recov(struct seq_file *m, void *n);
+extern int lprocfs_wr_pinger_recov(struct file *file, const char *buffer,
+ size_t count, loff_t *off);
+
+/* Statfs helpers */
+extern int lprocfs_rd_blksize(struct seq_file *m, void *data);
+extern int lprocfs_rd_kbytestotal(struct seq_file *m, void *data);
+extern int lprocfs_rd_kbytesfree(struct seq_file *m, void *data);
+extern int lprocfs_rd_kbytesavail(struct seq_file *m, void *data);
+extern int lprocfs_rd_filestotal(struct seq_file *m, void *data);
+extern int lprocfs_rd_filesfree(struct seq_file *m, void *data);
+
+extern int lprocfs_write_helper(const char *buffer, unsigned long count,
+ int *val);
+extern int lprocfs_write_frac_helper(const char *buffer, unsigned long count,
+ int *val, int mult);
+extern int lprocfs_seq_read_frac_helper(struct seq_file *m, long val, int mult);
+extern int lprocfs_read_frac_helper(char *buffer, unsigned long count,
+ long val, int mult);
+extern int lprocfs_write_u64_helper(const char *buffer, unsigned long count,
+ __u64 *val);
+extern int lprocfs_write_frac_u64_helper(const char *buffer,
+ unsigned long count,
+ __u64 *val, int mult);
+char *lprocfs_find_named_value(const char *buffer, const char *name,
+ unsigned long *count);
+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value);
+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value);
+void lprocfs_oh_clear(struct obd_histogram *oh);
+unsigned long lprocfs_oh_sum(struct obd_histogram *oh);
+
+void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx,
+ struct lprocfs_counter *cnt);
+
+extern int lprocfs_single_release(cfs_inode_t *, struct file *);
+extern int lprocfs_seq_release(cfs_inode_t *, struct file *);
+
+/* You must use these macros when you want to refer to
+ * the import in a client obd_device for a lprocfs entry */
+#define LPROCFS_CLIMP_CHECK(obd) do { \
+ typecheck(struct obd_device *, obd); \
+ down_read(&(obd)->u.cli.cl_sem); \
+ if ((obd)->u.cli.cl_import == NULL) { \
+ up_read(&(obd)->u.cli.cl_sem); \
+ return -ENODEV; \
+ } \
+} while(0)
+#define LPROCFS_CLIMP_EXIT(obd) \
+ up_read(&(obd)->u.cli.cl_sem);
+
+
+/* write the name##_seq_show function, call LPROC_SEQ_FOPS_RO for read-only
+ proc entries; otherwise, you will define name##_seq_write function also for
+ a read-write proc entry, and then call LPROC_SEQ_SEQ instead. Finally,
+ call lprocfs_obd_seq_create(obd, filename, 0444, &name#_fops, data); */
+#define __LPROC_SEQ_FOPS(name, custom_seq_write) \
+static int name##_single_open(cfs_inode_t *inode, struct file *file) \
+{ \
+ return single_open(file, name##_seq_show, PDE_DATA(inode)); \
+} \
+struct file_operations name##_fops = { \
+ .owner = THIS_MODULE, \
+ .open = name##_single_open, \
+ .read = seq_read, \
+ .write = custom_seq_write, \
+ .llseek = seq_lseek, \
+ .release = lprocfs_single_release, \
+}
+
+#define LPROC_SEQ_FOPS_RO(name) __LPROC_SEQ_FOPS(name, NULL)
+#define LPROC_SEQ_FOPS(name) __LPROC_SEQ_FOPS(name, name##_seq_write)
+
+#define LPROC_SEQ_FOPS_RO_TYPE(name, type) \
+ static int name##_##type##_seq_show(struct seq_file *m, void *v)\
+ { \
+ return lprocfs_rd_##type(m, m->private); \
+ } \
+ LPROC_SEQ_FOPS_RO(name##_##type)
+
+#define LPROC_SEQ_FOPS_RW_TYPE(name, type) \
+ static int name##_##type##_seq_show(struct seq_file *m, void *v)\
+ { \
+ return lprocfs_rd_##type(m, m->private); \
+ } \
+ static ssize_t name##_##type##_seq_write(struct file *file, \
+ const char *buffer, size_t count, loff_t *off) \
+ { \
+ struct seq_file *seq = file->private_data; \
+ return lprocfs_wr_##type(file, buffer, \
+ count, seq->private); \
+ } \
+ LPROC_SEQ_FOPS(name##_##type);
+
+#define LPROC_SEQ_FOPS_WR_ONLY(name, type) \
+ static ssize_t name##_##type##_write(struct file *file, \
+ const char *buffer, size_t count, loff_t *off) \
+ { \
+ return lprocfs_wr_##type(file, buffer, count, off); \
+ } \
+ static int name##_##type##_open(cfs_inode_t *inode, struct file *file) \
+ { \
+ return single_open(file, NULL, PDE_DATA(inode)); \
+ } \
+ struct file_operations name##_##type##_fops = { \
+ .open = name##_##type##_open, \
+ .write = name##_##type##_write, \
+ .release = lprocfs_single_release, \
+ };
+
+/* lprocfs_jobstats.c */
+int lprocfs_job_stats_log(struct obd_device *obd, char *jobid,
+ int event, long amount);
+void lprocfs_job_stats_fini(struct obd_device *obd);
+int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num,
+ cntr_init_callback fn);
+int lprocfs_rd_job_interval(struct seq_file *m, void *data);
+int lprocfs_wr_job_interval(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+
+/* lproc_ptlrpc.c */
+struct ptlrpc_request;
+extern void target_print_req(void *seq_file, struct ptlrpc_request *req);
+
+/* lproc_status.c */
+int lprocfs_obd_rd_max_pages_per_rpc(struct seq_file *m, void *data);
+int lprocfs_obd_wr_max_pages_per_rpc(struct file *file, const char *buffer,
+ size_t count, loff_t *off);
+
+/* all quota proc functions */
+extern int lprocfs_quota_rd_bunit(char *page, char **start,
+ loff_t off, int count,
+ int *eof, void *data);
+extern int lprocfs_quota_wr_bunit(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_quota_rd_btune(char *page, char **start,
+ loff_t off, int count,
+ int *eof, void *data);
+extern int lprocfs_quota_wr_btune(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_quota_rd_iunit(char *page, char **start,
+ loff_t off, int count,
+ int *eof, void *data);
+extern int lprocfs_quota_wr_iunit(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_quota_rd_itune(char *page, char **start,
+ loff_t off, int count,
+ int *eof, void *data);
+extern int lprocfs_quota_wr_itune(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_quota_rd_type(char *page, char **start, loff_t off, int count,
+ int *eof, void *data);
+extern int lprocfs_quota_wr_type(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_quota_rd_switch_seconds(char *page, char **start, loff_t off,
+ int count, int *eof, void *data);
+extern int lprocfs_quota_wr_switch_seconds(struct file *file,
+ const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_quota_rd_sync_blk(char *page, char **start, loff_t off,
+ int count, int *eof, void *data);
+extern int lprocfs_quota_wr_sync_blk(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_quota_rd_switch_qs(char *page, char **start, loff_t off,
+ int count, int *eof, void *data);
+extern int lprocfs_quota_wr_switch_qs(struct file *file,
+ const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_quota_rd_boundary_factor(char *page, char **start, loff_t off,
+ int count, int *eof, void *data);
+extern int lprocfs_quota_wr_boundary_factor(struct file *file,
+ const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_quota_rd_least_bunit(char *page, char **start, loff_t off,
+ int count, int *eof, void *data);
+extern int lprocfs_quota_wr_least_bunit(struct file *file,
+ const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_quota_rd_least_iunit(char *page, char **start, loff_t off,
+ int count, int *eof, void *data);
+extern int lprocfs_quota_wr_least_iunit(struct file *file,
+ const char *buffer,
+ unsigned long count, void *data);
+extern int lprocfs_quota_rd_qs_factor(char *page, char **start, loff_t off,
+ int count, int *eof, void *data);
+extern int lprocfs_quota_wr_qs_factor(struct file *file,
+ const char *buffer,
+ unsigned long count, void *data);
+
+
+
+#else
+/* LPROCFS is not defined */
+
+#define proc_lustre_root NULL
+
+static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
+ int index, long amount)
+{ return; }
+static inline void lprocfs_counter_incr(struct lprocfs_stats *stats,
+ int index)
+{ return; }
+static inline void lprocfs_counter_sub(struct lprocfs_stats *stats,
+ int index, long amount)
+{ return; }
+static inline void lprocfs_counter_decr(struct lprocfs_stats *stats,
+ int index)
+{ return; }
+static inline void lprocfs_counter_init(struct lprocfs_stats *stats,
+ int index, unsigned conf,
+ const char *name, const char *units)
+{ return; }
+
+static inline __u64 lc_read_helper(struct lprocfs_counter *lc,
+ enum lprocfs_fields_flags field)
+{ return 0; }
+
+/* NB: we return !NULL to satisfy error checker */
+static inline struct lprocfs_stats *
+lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags)
+{ return (struct lprocfs_stats *)1; }
+static inline void lprocfs_clear_stats(struct lprocfs_stats *stats)
+{ return; }
+static inline void lprocfs_free_stats(struct lprocfs_stats **stats)
+{ return; }
+static inline int lprocfs_register_stats(proc_dir_entry_t *root,
+ const char *name,
+ struct lprocfs_stats *stats)
+{ return 0; }
+static inline void lprocfs_init_ops_stats(int num_private_stats,
+ struct lprocfs_stats *stats)
+{ return; }
+static inline void lprocfs_init_mps_stats(int num_private_stats,
+ struct lprocfs_stats *stats)
+{ return; }
+static inline void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
+{ return; }
+static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev,
+ unsigned int num_private_stats)
+{ return 0; }
+static inline int lprocfs_alloc_md_stats(struct obd_device *obddev,
+ unsigned int num_private_stats)
+{ return 0; }
+static inline void lprocfs_free_obd_stats(struct obd_device *obddev)
+{ return; }
+static inline void lprocfs_free_md_stats(struct obd_device *obddev)
+{ return; }
+
+struct obd_export;
+static inline int lprocfs_add_clear_entry(struct obd_export *exp)
+{ return 0; }
+static inline int lprocfs_exp_setup(struct obd_export *exp,lnet_nid_t *peer_nid,
+ int *newnid)
+{ return 0; }
+static inline int lprocfs_exp_cleanup(struct obd_export *exp)
+{ return 0; }
+static inline proc_dir_entry_t *
+lprocfs_add_simple(struct proc_dir_entry *root, char *name,
+ void *data, struct file_operations *fops)
+{return 0; }
+static inline struct proc_dir_entry *
+lprocfs_add_symlink(const char *name, struct proc_dir_entry *parent,
+ const char *format, ...)
+{return NULL; }
+static inline void lprocfs_free_per_client_stats(struct obd_device *obd)
+{ return; }
+static inline
+int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{return count;}
+static inline
+int lprocfs_nid_stats_clear_read(struct seq_file *m, void *data)
+{ return 0; }
+
+static inline proc_dir_entry_t *
+lprocfs_register(const char *name, proc_dir_entry_t *parent,
+ struct lprocfs_vars *list, void *data)
+{ return NULL; }
+static inline int lprocfs_add_vars(proc_dir_entry_t *root,
+ struct lprocfs_vars *var,
+ void *data)
+{ return 0; }
+static inline void lprocfs_remove(proc_dir_entry_t **root)
+{ return; }
+static inline void lprocfs_remove_proc_entry(const char *name,
+ struct proc_dir_entry *parent)
+{ return; }
+static inline int lprocfs_obd_setup(struct obd_device *dev,
+ struct lprocfs_vars *list)
+{ return 0; }
+static inline int lprocfs_obd_cleanup(struct obd_device *dev)
+{ return 0; }
+static inline int lprocfs_rd_u64(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_uuid(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_name(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_server_uuid(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_conn_uuid(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_import(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_pinger_recov(struct seq_file *m, void *n)
+{ return 0; }
+static inline int lprocfs_rd_state(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_connect_flags(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_num_exports(struct seq_file *m, void *data)
+{ return 0; }
+extern inline int lprocfs_rd_numrefs(struct seq_file *m, void *data)
+{ return 0; }
+struct adaptive_timeout;
+static inline int lprocfs_at_hist_helper(struct seq_file *m,
+ struct adaptive_timeout *at)
+{ return 0; }
+static inline int lprocfs_rd_timeouts(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_wr_timeouts(struct file *file,
+ const char *buffer,
+ unsigned long count, void *data)
+{ return 0; }
+static inline int lprocfs_wr_evict_client(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{ return 0; }
+static inline int lprocfs_wr_ping(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{ return 0; }
+static inline int lprocfs_wr_import(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{ return 0; }
+static inline int lprocfs_wr_pinger_recov(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{ return 0; }
+
+/* Statfs helpers */
+static inline
+int lprocfs_rd_blksize(struct seq_file *m, void *data)
+{ return 0; }
+static inline
+int lprocfs_rd_kbytestotal(struct seq_file *m, void *data)
+{ return 0; }
+static inline
+int lprocfs_rd_kbytesfree(struct seq_file *m, void *data)
+{ return 0; }
+static inline
+int lprocfs_rd_kbytesavail(struct seq_file *m, void *data)
+{ return 0; }
+static inline
+int lprocfs_rd_filestotal(struct seq_file *m, void *data)
+{ return 0; }
+static inline
+int lprocfs_rd_filesfree(struct seq_file *m, void *data)
+{ return 0; }
+static inline
+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value)
+{ return; }
+static inline
+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value)
+{ return; }
+static inline
+void lprocfs_oh_clear(struct obd_histogram *oh)
+{ return; }
+static inline
+unsigned long lprocfs_oh_sum(struct obd_histogram *oh)
+{ return 0; }
+static inline
+void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx,
+ struct lprocfs_counter *cnt)
+{ return; }
+static inline
+__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx,
+ enum lprocfs_fields_flags field)
+{ return (__u64)0; }
+
+#define LPROC_SEQ_FOPS_RO(name)
+#define LPROC_SEQ_FOPS(name)
+#define LPROC_SEQ_FOPS_RO_TYPE(name, type)
+#define LPROC_SEQ_FOPS_RW_TYPE(name, type)
+#define LPROC_SEQ_FOPS_WR_ONLY(name, type)
+
+/* lprocfs_jobstats.c */
+static inline
+int lprocfs_job_stats_log(struct obd_device *obd, char *jobid, int event,
+ long amount)
+{ return 0; }
+static inline
+void lprocfs_job_stats_fini(struct obd_device *obd)
+{ return; }
+static inline
+int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num,
+ cntr_init_callback fn)
+{ return 0; }
+
+
+/* lproc_ptlrpc.c */
+#define target_print_req NULL
+
+#endif /* LPROCFS */
+
+#endif /* LPROCFS_SNMP_H */
diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
new file mode 100644
index 000000000000..d40ad81b4eb2
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lu_object.h
@@ -0,0 +1,1346 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LUSTRE_LU_OBJECT_H
+#define __LUSTRE_LU_OBJECT_H
+
+#include <stdarg.h>
+#include <linux/libcfs/libcfs.h>
+#include <lustre/lustre_idl.h>
+#include <lu_ref.h>
+
+struct seq_file;
+struct proc_dir_entry;
+struct lustre_cfg;
+struct lprocfs_stats;
+
+/** \defgroup lu lu
+ * lu_* data-types represent server-side entities shared by data and meta-data
+ * stacks.
+ *
+ * Design goals:
+ *
+ * -# support for layering.
+ *
+ * Server side object is split into layers, one per device in the
+ * corresponding device stack. Individual layer is represented by struct
+ * lu_object. Compound layered object --- by struct lu_object_header. Most
+ * interface functions take lu_object as an argument and operate on the
+ * whole compound object. This decision was made due to the following
+ * reasons:
+ *
+ * - it's envisaged that lu_object will be used much more often than
+ * lu_object_header;
+ *
+ * - we want lower (non-top) layers to be able to initiate operations
+ * on the whole object.
+ *
+ * Generic code supports layering more complex than simple stacking, e.g.,
+ * it is possible that at some layer object "spawns" multiple sub-objects
+ * on the lower layer.
+ *
+ * -# fid-based identification.
+ *
+ * Compound object is uniquely identified by its fid. Objects are indexed
+ * by their fids (hash table is used for index).
+ *
+ * -# caching and life-cycle management.
+ *
+ * Object's life-time is controlled by reference counting. When reference
+ * count drops to 0, object is returned to cache. Cached objects still
+ * retain their identity (i.e., fid), and can be recovered from cache.
+ *
+ * Objects are kept in the global LRU list, and lu_site_purge() function
+ * can be used to reclaim given number of unused objects from the tail of
+ * the LRU.
+ *
+ * -# avoiding recursion.
+ *
+ * Generic code tries to replace recursion through layers by iterations
+ * where possible. Additionally to the end of reducing stack consumption,
+ * data, when practically possible, are allocated through lu_context_key
+ * interface rather than on stack.
+ * @{
+ */
+
+struct lu_site;
+struct lu_object;
+struct lu_device;
+struct lu_object_header;
+struct lu_context;
+struct lu_env;
+
+/**
+ * Operations common for data and meta-data devices.
+ */
+struct lu_device_operations {
+ /**
+ * Allocate object for the given device (without lower-layer
+ * parts). This is called by lu_object_operations::loo_object_init()
+ * from the parent layer, and should setup at least lu_object::lo_dev
+ * and lu_object::lo_ops fields of resulting lu_object.
+ *
+ * Object creation protocol.
+ *
+ * Due to design goal of avoiding recursion, object creation (see
+ * lu_object_alloc()) is somewhat involved:
+ *
+ * - first, lu_device_operations::ldo_object_alloc() method of the
+ * top-level device in the stack is called. It should allocate top
+ * level object (including lu_object_header), but without any
+ * lower-layer sub-object(s).
+ *
+ * - then lu_object_alloc() sets fid in the header of newly created
+ * object.
+ *
+ * - then lu_object_operations::loo_object_init() is called. It has
+ * to allocate lower-layer object(s). To do this,
+ * lu_object_operations::loo_object_init() calls ldo_object_alloc()
+ * of the lower-layer device(s).
+ *
+ * - for all new objects allocated by
+ * lu_object_operations::loo_object_init() (and inserted into object
+ * stack), lu_object_operations::loo_object_init() is called again
+ * repeatedly, until no new objects are created.
+ *
+ * \post ergo(!IS_ERR(result), result->lo_dev == d &&
+ * result->lo_ops != NULL);
+ */
+ struct lu_object *(*ldo_object_alloc)(const struct lu_env *env,
+ const struct lu_object_header *h,
+ struct lu_device *d);
+ /**
+ * process config specific for device.
+ */
+ int (*ldo_process_config)(const struct lu_env *env,
+ struct lu_device *, struct lustre_cfg *);
+ int (*ldo_recovery_complete)(const struct lu_env *,
+ struct lu_device *);
+
+ /**
+ * initialize local objects for device. this method called after layer has
+ * been initialized (after LCFG_SETUP stage) and before it starts serving
+ * user requests.
+ */
+
+ int (*ldo_prepare)(const struct lu_env *,
+ struct lu_device *parent,
+ struct lu_device *dev);
+
+};
+
+/**
+ * For lu_object_conf flags
+ */
+typedef enum {
+ /* This is a new object to be allocated, or the file
+ * corresponding to the object does not exists. */
+ LOC_F_NEW = 0x00000001,
+} loc_flags_t;
+
+/**
+ * Object configuration, describing particulars of object being created. On
+ * server this is not used, as server objects are full identified by fid. On
+ * client configuration contains struct lustre_md.
+ */
+struct lu_object_conf {
+ /**
+ * Some hints for obj find and alloc.
+ */
+ loc_flags_t loc_flags;
+};
+
+/**
+ * Type of "printer" function used by lu_object_operations::loo_object_print()
+ * method.
+ *
+ * Printer function is needed to provide some flexibility in (semi-)debugging
+ * output: possible implementations: printk, CDEBUG, sysfs/seq_file
+ */
+typedef int (*lu_printer_t)(const struct lu_env *env,
+ void *cookie, const char *format, ...)
+ __attribute__ ((format (printf, 3, 4)));
+
+/**
+ * Operations specific for particular lu_object.
+ */
+struct lu_object_operations {
+
+ /**
+ * Allocate lower-layer parts of the object by calling
+ * lu_device_operations::ldo_object_alloc() of the corresponding
+ * underlying device.
+ *
+ * This method is called once for each object inserted into object
+ * stack. It's responsibility of this method to insert lower-layer
+ * object(s) it create into appropriate places of object stack.
+ */
+ int (*loo_object_init)(const struct lu_env *env,
+ struct lu_object *o,
+ const struct lu_object_conf *conf);
+ /**
+ * Called (in top-to-bottom order) during object allocation after all
+ * layers were allocated and initialized. Can be used to perform
+ * initialization depending on lower layers.
+ */
+ int (*loo_object_start)(const struct lu_env *env,
+ struct lu_object *o);
+ /**
+ * Called before lu_object_operations::loo_object_free() to signal
+ * that object is being destroyed. Dual to
+ * lu_object_operations::loo_object_init().
+ */
+ void (*loo_object_delete)(const struct lu_env *env,
+ struct lu_object *o);
+ /**
+ * Dual to lu_device_operations::ldo_object_alloc(). Called when
+ * object is removed from memory.
+ */
+ void (*loo_object_free)(const struct lu_env *env,
+ struct lu_object *o);
+ /**
+ * Called when last active reference to the object is released (and
+ * object returns to the cache). This method is optional.
+ */
+ void (*loo_object_release)(const struct lu_env *env,
+ struct lu_object *o);
+ /**
+ * Optional debugging helper. Print given object.
+ */
+ int (*loo_object_print)(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct lu_object *o);
+ /**
+ * Optional debugging method. Returns true iff method is internally
+ * consistent.
+ */
+ int (*loo_object_invariant)(const struct lu_object *o);
+};
+
+/**
+ * Type of lu_device.
+ */
+struct lu_device_type;
+
+/**
+ * Device: a layer in the server side abstraction stacking.
+ */
+struct lu_device {
+ /**
+ * reference count. This is incremented, in particular, on each object
+ * created at this layer.
+ *
+ * \todo XXX which means that atomic_t is probably too small.
+ */
+ atomic_t ld_ref;
+ /**
+ * Pointer to device type. Never modified once set.
+ */
+ struct lu_device_type *ld_type;
+ /**
+ * Operation vector for this device.
+ */
+ const struct lu_device_operations *ld_ops;
+ /**
+ * Stack this device belongs to.
+ */
+ struct lu_site *ld_site;
+ struct proc_dir_entry *ld_proc_entry;
+
+ /** \todo XXX: temporary back pointer into obd. */
+ struct obd_device *ld_obd;
+ /**
+ * A list of references to this object, for debugging.
+ */
+ struct lu_ref ld_reference;
+ /**
+ * Link the device to the site.
+ **/
+ struct list_head ld_linkage;
+};
+
+struct lu_device_type_operations;
+
+/**
+ * Tag bits for device type. They are used to distinguish certain groups of
+ * device types.
+ */
+enum lu_device_tag {
+ /** this is meta-data device */
+ LU_DEVICE_MD = (1 << 0),
+ /** this is data device */
+ LU_DEVICE_DT = (1 << 1),
+ /** data device in the client stack */
+ LU_DEVICE_CL = (1 << 2)
+};
+
+/**
+ * Type of device.
+ */
+struct lu_device_type {
+ /**
+ * Tag bits. Taken from enum lu_device_tag. Never modified once set.
+ */
+ __u32 ldt_tags;
+ /**
+ * Name of this class. Unique system-wide. Never modified once set.
+ */
+ char *ldt_name;
+ /**
+ * Operations for this type.
+ */
+ const struct lu_device_type_operations *ldt_ops;
+ /**
+ * \todo XXX: temporary pointer to associated obd_type.
+ */
+ struct obd_type *ldt_obd_type;
+ /**
+ * \todo XXX: temporary: context tags used by obd_*() calls.
+ */
+ __u32 ldt_ctx_tags;
+ /**
+ * Number of existing device type instances.
+ */
+ unsigned ldt_device_nr;
+ /**
+ * Linkage into a global list of all device types.
+ *
+ * \see lu_device_types.
+ */
+ struct list_head ldt_linkage;
+};
+
+/**
+ * Operations on a device type.
+ */
+struct lu_device_type_operations {
+ /**
+ * Allocate new device.
+ */
+ struct lu_device *(*ldto_device_alloc)(const struct lu_env *env,
+ struct lu_device_type *t,
+ struct lustre_cfg *lcfg);
+ /**
+ * Free device. Dual to
+ * lu_device_type_operations::ldto_device_alloc(). Returns pointer to
+ * the next device in the stack.
+ */
+ struct lu_device *(*ldto_device_free)(const struct lu_env *,
+ struct lu_device *);
+
+ /**
+ * Initialize the devices after allocation
+ */
+ int (*ldto_device_init)(const struct lu_env *env,
+ struct lu_device *, const char *,
+ struct lu_device *);
+ /**
+ * Finalize device. Dual to
+ * lu_device_type_operations::ldto_device_init(). Returns pointer to
+ * the next device in the stack.
+ */
+ struct lu_device *(*ldto_device_fini)(const struct lu_env *env,
+ struct lu_device *);
+ /**
+ * Initialize device type. This is called on module load.
+ */
+ int (*ldto_init)(struct lu_device_type *t);
+ /**
+ * Finalize device type. Dual to
+ * lu_device_type_operations::ldto_init(). Called on module unload.
+ */
+ void (*ldto_fini)(struct lu_device_type *t);
+ /**
+ * Called when the first device is created.
+ */
+ void (*ldto_start)(struct lu_device_type *t);
+ /**
+ * Called when number of devices drops to 0.
+ */
+ void (*ldto_stop)(struct lu_device_type *t);
+};
+
+static inline int lu_device_is_md(const struct lu_device *d)
+{
+ return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_MD);
+}
+
+/**
+ * Flags for the object layers.
+ */
+enum lu_object_flags {
+ /**
+ * this flags is set if lu_object_operations::loo_object_init() has
+ * been called for this layer. Used by lu_object_alloc().
+ */
+ LU_OBJECT_ALLOCATED = (1 << 0)
+};
+
+/**
+ * Common object attributes.
+ */
+struct lu_attr {
+ /** size in bytes */
+ __u64 la_size;
+ /** modification time in seconds since Epoch */
+ obd_time la_mtime;
+ /** access time in seconds since Epoch */
+ obd_time la_atime;
+ /** change time in seconds since Epoch */
+ obd_time la_ctime;
+ /** 512-byte blocks allocated to object */
+ __u64 la_blocks;
+ /** permission bits and file type */
+ __u32 la_mode;
+ /** owner id */
+ __u32 la_uid;
+ /** group id */
+ __u32 la_gid;
+ /** object flags */
+ __u32 la_flags;
+ /** number of persistent references to this object */
+ __u32 la_nlink;
+ /** blk bits of the object*/
+ __u32 la_blkbits;
+ /** blk size of the object*/
+ __u32 la_blksize;
+ /** real device */
+ __u32 la_rdev;
+ /**
+ * valid bits
+ *
+ * \see enum la_valid
+ */
+ __u64 la_valid;
+};
+
+/** Bit-mask of valid attributes */
+enum la_valid {
+ LA_ATIME = 1 << 0,
+ LA_MTIME = 1 << 1,
+ LA_CTIME = 1 << 2,
+ LA_SIZE = 1 << 3,
+ LA_MODE = 1 << 4,
+ LA_UID = 1 << 5,
+ LA_GID = 1 << 6,
+ LA_BLOCKS = 1 << 7,
+ LA_TYPE = 1 << 8,
+ LA_FLAGS = 1 << 9,
+ LA_NLINK = 1 << 10,
+ LA_RDEV = 1 << 11,
+ LA_BLKSIZE = 1 << 12,
+ LA_KILL_SUID = 1 << 13,
+ LA_KILL_SGID = 1 << 14,
+};
+
+/**
+ * Layer in the layered object.
+ */
+struct lu_object {
+ /**
+ * Header for this object.
+ */
+ struct lu_object_header *lo_header;
+ /**
+ * Device for this layer.
+ */
+ struct lu_device *lo_dev;
+ /**
+ * Operations for this object.
+ */
+ const struct lu_object_operations *lo_ops;
+ /**
+ * Linkage into list of all layers.
+ */
+ struct list_head lo_linkage;
+ /**
+ * Depth. Top level layer depth is 0.
+ */
+ int lo_depth;
+ /**
+ * Flags from enum lu_object_flags.
+ */
+ __u32 lo_flags;
+ /**
+ * Link to the device, for debugging.
+ */
+ struct lu_ref_link *lo_dev_ref;
+};
+
+enum lu_object_header_flags {
+ /**
+ * Don't keep this object in cache. Object will be destroyed as soon
+ * as last reference to it is released. This flag cannot be cleared
+ * once set.
+ */
+ LU_OBJECT_HEARD_BANSHEE = 0,
+ /**
+ * Mark this object has already been taken out of cache.
+ */
+ LU_OBJECT_UNHASHED = 1
+};
+
+enum lu_object_header_attr {
+ LOHA_EXISTS = 1 << 0,
+ LOHA_REMOTE = 1 << 1,
+ /**
+ * UNIX file type is stored in S_IFMT bits.
+ */
+ LOHA_FT_START = 001 << 12, /**< S_IFIFO */
+ LOHA_FT_END = 017 << 12, /**< S_IFMT */
+};
+
+/**
+ * "Compound" object, consisting of multiple layers.
+ *
+ * Compound object with given fid is unique with given lu_site.
+ *
+ * Note, that object does *not* necessary correspond to the real object in the
+ * persistent storage: object is an anchor for locking and method calling, so
+ * it is created for things like not-yet-existing child created by mkdir or
+ * create calls. lu_object_operations::loo_exists() can be used to check
+ * whether object is backed by persistent storage entity.
+ */
+struct lu_object_header {
+ /**
+ * Object flags from enum lu_object_header_flags. Set and checked
+ * atomically.
+ */
+ unsigned long loh_flags;
+ /**
+ * Object reference count. Protected by lu_site::ls_guard.
+ */
+ atomic_t loh_ref;
+ /**
+ * Fid, uniquely identifying this object.
+ */
+ struct lu_fid loh_fid;
+ /**
+ * Common object attributes, cached for efficiency. From enum
+ * lu_object_header_attr.
+ */
+ __u32 loh_attr;
+ /**
+ * Linkage into per-site hash table. Protected by lu_site::ls_guard.
+ */
+ struct hlist_node loh_hash;
+ /**
+ * Linkage into per-site LRU list. Protected by lu_site::ls_guard.
+ */
+ struct list_head loh_lru;
+ /**
+ * Linkage into list of layers. Never modified once set (except lately
+ * during object destruction). No locking is necessary.
+ */
+ struct list_head loh_layers;
+ /**
+ * A list of references to this object, for debugging.
+ */
+ struct lu_ref loh_reference;
+};
+
+struct fld;
+
+struct lu_site_bkt_data {
+ /**
+ * number of busy object on this bucket
+ */
+ long lsb_busy;
+ /**
+ * LRU list, updated on each access to object. Protected by
+ * bucket lock of lu_site::ls_obj_hash.
+ *
+ * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
+ * moved to the lu_site::ls_lru.prev (this is due to the non-existence
+ * of list_for_each_entry_safe_reverse()).
+ */
+ struct list_head lsb_lru;
+ /**
+ * Wait-queue signaled when an object in this site is ultimately
+ * destroyed (lu_object_free()). It is used by lu_object_find() to
+ * wait before re-trying when object in the process of destruction is
+ * found in the hash table.
+ *
+ * \see htable_lookup().
+ */
+ wait_queue_head_t lsb_marche_funebre;
+};
+
+enum {
+ LU_SS_CREATED = 0,
+ LU_SS_CACHE_HIT,
+ LU_SS_CACHE_MISS,
+ LU_SS_CACHE_RACE,
+ LU_SS_CACHE_DEATH_RACE,
+ LU_SS_LRU_PURGED,
+ LU_SS_LAST_STAT
+};
+
+/**
+ * lu_site is a "compartment" within which objects are unique, and LRU
+ * discipline is maintained.
+ *
+ * lu_site exists so that multiple layered stacks can co-exist in the same
+ * address space.
+ *
+ * lu_site has the same relation to lu_device as lu_object_header to
+ * lu_object.
+ */
+struct lu_site {
+ /**
+ * objects hash table
+ */
+ cfs_hash_t *ls_obj_hash;
+ /**
+ * index of bucket on hash table while purging
+ */
+ int ls_purge_start;
+ /**
+ * Top-level device for this stack.
+ */
+ struct lu_device *ls_top_dev;
+ /**
+ * Bottom-level device for this stack
+ */
+ struct lu_device *ls_bottom_dev;
+ /**
+ * Linkage into global list of sites.
+ */
+ struct list_head ls_linkage;
+ /**
+ * List for lu device for this site, protected
+ * by ls_ld_lock.
+ **/
+ struct list_head ls_ld_linkage;
+ spinlock_t ls_ld_lock;
+
+ /**
+ * lu_site stats
+ */
+ struct lprocfs_stats *ls_stats;
+ /**
+ * XXX: a hack! fld has to find md_site via site, remove when possible
+ */
+ struct seq_server_site *ld_seq_site;
+};
+
+static inline struct lu_site_bkt_data *
+lu_site_bkt_from_fid(struct lu_site *site, struct lu_fid *fid)
+{
+ cfs_hash_bd_t bd;
+
+ cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
+ return cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
+}
+
+/** \name ctors
+ * Constructors/destructors.
+ * @{
+ */
+
+int lu_site_init (struct lu_site *s, struct lu_device *d);
+void lu_site_fini (struct lu_site *s);
+int lu_site_init_finish (struct lu_site *s);
+void lu_stack_fini (const struct lu_env *env, struct lu_device *top);
+void lu_device_get (struct lu_device *d);
+void lu_device_put (struct lu_device *d);
+int lu_device_init (struct lu_device *d, struct lu_device_type *t);
+void lu_device_fini (struct lu_device *d);
+int lu_object_header_init(struct lu_object_header *h);
+void lu_object_header_fini(struct lu_object_header *h);
+int lu_object_init (struct lu_object *o,
+ struct lu_object_header *h, struct lu_device *d);
+void lu_object_fini (struct lu_object *o);
+void lu_object_add_top (struct lu_object_header *h, struct lu_object *o);
+void lu_object_add (struct lu_object *before, struct lu_object *o);
+
+void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d);
+void lu_dev_del_linkage(struct lu_site *s, struct lu_device *d);
+
+/**
+ * Helpers to initialize and finalize device types.
+ */
+
+int lu_device_type_init(struct lu_device_type *ldt);
+void lu_device_type_fini(struct lu_device_type *ldt);
+void lu_types_stop(void);
+
+/** @} ctors */
+
+/** \name caching
+ * Caching and reference counting.
+ * @{
+ */
+
+/**
+ * Acquire additional reference to the given object. This function is used to
+ * attain additional reference. To acquire initial reference use
+ * lu_object_find().
+ */
+static inline void lu_object_get(struct lu_object *o)
+{
+ LASSERT(atomic_read(&o->lo_header->loh_ref) > 0);
+ atomic_inc(&o->lo_header->loh_ref);
+}
+
+/**
+ * Return true of object will not be cached after last reference to it is
+ * released.
+ */
+static inline int lu_object_is_dying(const struct lu_object_header *h)
+{
+ return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags);
+}
+
+void lu_object_put(const struct lu_env *env, struct lu_object *o);
+void lu_object_put_nocache(const struct lu_env *env, struct lu_object *o);
+void lu_object_unhash(const struct lu_env *env, struct lu_object *o);
+
+int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr);
+
+void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie,
+ lu_printer_t printer);
+struct lu_object *lu_object_find(const struct lu_env *env,
+ struct lu_device *dev, const struct lu_fid *f,
+ const struct lu_object_conf *conf);
+struct lu_object *lu_object_find_at(const struct lu_env *env,
+ struct lu_device *dev,
+ const struct lu_fid *f,
+ const struct lu_object_conf *conf);
+struct lu_object *lu_object_find_slice(const struct lu_env *env,
+ struct lu_device *dev,
+ const struct lu_fid *f,
+ const struct lu_object_conf *conf);
+/** @} caching */
+
+/** \name helpers
+ * Helpers.
+ * @{
+ */
+
+/**
+ * First (topmost) sub-object of given compound object
+ */
+static inline struct lu_object *lu_object_top(struct lu_object_header *h)
+{
+ LASSERT(!list_empty(&h->loh_layers));
+ return container_of0(h->loh_layers.next, struct lu_object, lo_linkage);
+}
+
+/**
+ * Next sub-object in the layering
+ */
+static inline struct lu_object *lu_object_next(const struct lu_object *o)
+{
+ return container_of0(o->lo_linkage.next, struct lu_object, lo_linkage);
+}
+
+/**
+ * Pointer to the fid of this object.
+ */
+static inline const struct lu_fid *lu_object_fid(const struct lu_object *o)
+{
+ return &o->lo_header->loh_fid;
+}
+
+/**
+ * return device operations vector for this object
+ */
+static const inline struct lu_device_operations *
+lu_object_ops(const struct lu_object *o)
+{
+ return o->lo_dev->ld_ops;
+}
+
+/**
+ * Given a compound object, find its slice, corresponding to the device type
+ * \a dtype.
+ */
+struct lu_object *lu_object_locate(struct lu_object_header *h,
+ const struct lu_device_type *dtype);
+
+/**
+ * Printer function emitting messages through libcfs_debug_msg().
+ */
+int lu_cdebug_printer(const struct lu_env *env,
+ void *cookie, const char *format, ...);
+
+/**
+ * Print object description followed by a user-supplied message.
+ */
+#define LU_OBJECT_DEBUG(mask, env, object, format, ...) \
+do { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
+ \
+ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ lu_object_print(env, &msgdata, lu_cdebug_printer, object);\
+ CDEBUG(mask, format , ## __VA_ARGS__); \
+ } \
+} while (0)
+
+/**
+ * Print short object description followed by a user-supplied message.
+ */
+#define LU_OBJECT_HEADER(mask, env, object, format, ...) \
+do { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
+ \
+ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ lu_object_header_print(env, &msgdata, lu_cdebug_printer,\
+ (object)->lo_header); \
+ lu_cdebug_printer(env, &msgdata, "\n"); \
+ CDEBUG(mask, format , ## __VA_ARGS__); \
+ } \
+} while (0)
+
+void lu_object_print (const struct lu_env *env, void *cookie,
+ lu_printer_t printer, const struct lu_object *o);
+void lu_object_header_print(const struct lu_env *env, void *cookie,
+ lu_printer_t printer,
+ const struct lu_object_header *hdr);
+
+/**
+ * Check object consistency.
+ */
+int lu_object_invariant(const struct lu_object *o);
+
+
+/**
+ * Check whether object exists, no matter on local or remote storage.
+ * Note: LOHA_EXISTS will be set once some one created the object,
+ * and it does not needs to be committed to storage.
+ */
+#define lu_object_exists(o) ((o)->lo_header->loh_attr & LOHA_EXISTS)
+
+/**
+ * Check whether object on the remote storage.
+ */
+#define lu_object_remote(o) unlikely((o)->lo_header->loh_attr & LOHA_REMOTE)
+
+static inline int lu_object_assert_exists(const struct lu_object *o)
+{
+ return lu_object_exists(o);
+}
+
+static inline int lu_object_assert_not_exists(const struct lu_object *o)
+{
+ return !lu_object_exists(o);
+}
+
+/**
+ * Attr of this object.
+ */
+static inline __u32 lu_object_attr(const struct lu_object *o)
+{
+ LASSERT(lu_object_exists(o) != 0);
+ return o->lo_header->loh_attr;
+}
+
+static inline struct lu_ref_link *lu_object_ref_add(struct lu_object *o,
+ const char *scope,
+ const void *source)
+{
+ return lu_ref_add(&o->lo_header->loh_reference, scope, source);
+}
+
+static inline void lu_object_ref_del(struct lu_object *o,
+ const char *scope, const void *source)
+{
+ lu_ref_del(&o->lo_header->loh_reference, scope, source);
+}
+
+static inline void lu_object_ref_del_at(struct lu_object *o,
+ struct lu_ref_link *link,
+ const char *scope, const void *source)
+{
+ lu_ref_del_at(&o->lo_header->loh_reference, link, scope, source);
+}
+
+/** input params, should be filled out by mdt */
+struct lu_rdpg {
+ /** hash */
+ __u64 rp_hash;
+ /** count in bytes */
+ unsigned int rp_count;
+ /** number of pages */
+ unsigned int rp_npages;
+ /** requested attr */
+ __u32 rp_attrs;
+ /** pointers to pages */
+ struct page **rp_pages;
+};
+
+enum lu_xattr_flags {
+ LU_XATTR_REPLACE = (1 << 0),
+ LU_XATTR_CREATE = (1 << 1)
+};
+
+/** @} helpers */
+
+/** \name lu_context
+ * @{ */
+
+/** For lu_context health-checks */
+enum lu_context_state {
+ LCS_INITIALIZED = 1,
+ LCS_ENTERED,
+ LCS_LEFT,
+ LCS_FINALIZED
+};
+
+/**
+ * lu_context. Execution context for lu_object methods. Currently associated
+ * with thread.
+ *
+ * All lu_object methods, except device and device type methods (called during
+ * system initialization and shutdown) are executed "within" some
+ * lu_context. This means, that pointer to some "current" lu_context is passed
+ * as an argument to all methods.
+ *
+ * All service ptlrpc threads create lu_context as part of their
+ * initialization. It is possible to create "stand-alone" context for other
+ * execution environments (like system calls).
+ *
+ * lu_object methods mainly use lu_context through lu_context_key interface
+ * that allows each layer to associate arbitrary pieces of data with each
+ * context (see pthread_key_create(3) for similar interface).
+ *
+ * On a client, lu_context is bound to a thread, see cl_env_get().
+ *
+ * \see lu_context_key
+ */
+struct lu_context {
+ /**
+ * lu_context is used on the client side too. Yet we don't want to
+ * allocate values of server-side keys for the client contexts and
+ * vice versa.
+ *
+ * To achieve this, set of tags in introduced. Contexts and keys are
+ * marked with tags. Key value are created only for context whose set
+ * of tags has non-empty intersection with one for key. Tags are taken
+ * from enum lu_context_tag.
+ */
+ __u32 lc_tags;
+ enum lu_context_state lc_state;
+ /**
+ * Pointer to the home service thread. NULL for other execution
+ * contexts.
+ */
+ struct ptlrpc_thread *lc_thread;
+ /**
+ * Pointer to an array with key values. Internal implementation
+ * detail.
+ */
+ void **lc_value;
+ /**
+ * Linkage into a list of all remembered contexts. Only
+ * `non-transient' contexts, i.e., ones created for service threads
+ * are placed here.
+ */
+ struct list_head lc_remember;
+ /**
+ * Version counter used to skip calls to lu_context_refill() when no
+ * keys were registered.
+ */
+ unsigned lc_version;
+ /**
+ * Debugging cookie.
+ */
+ unsigned lc_cookie;
+};
+
+/**
+ * lu_context_key interface. Similar to pthread_key.
+ */
+
+enum lu_context_tag {
+ /**
+ * Thread on md server
+ */
+ LCT_MD_THREAD = 1 << 0,
+ /**
+ * Thread on dt server
+ */
+ LCT_DT_THREAD = 1 << 1,
+ /**
+ * Context for transaction handle
+ */
+ LCT_TX_HANDLE = 1 << 2,
+ /**
+ * Thread on client
+ */
+ LCT_CL_THREAD = 1 << 3,
+ /**
+ * A per-request session on a server, and a per-system-call session on
+ * a client.
+ */
+ LCT_SESSION = 1 << 4,
+ /**
+ * A per-request data on OSP device
+ */
+ LCT_OSP_THREAD = 1 << 5,
+ /**
+ * MGS device thread
+ */
+ LCT_MG_THREAD = 1 << 6,
+ /**
+ * Context for local operations
+ */
+ LCT_LOCAL = 1 << 7,
+ /**
+ * Set when at least one of keys, having values in this context has
+ * non-NULL lu_context_key::lct_exit() method. This is used to
+ * optimize lu_context_exit() call.
+ */
+ LCT_HAS_EXIT = 1 << 28,
+ /**
+ * Don't add references for modules creating key values in that context.
+ * This is only for contexts used internally by lu_object framework.
+ */
+ LCT_NOREF = 1 << 29,
+ /**
+ * Key is being prepared for retiring, don't create new values for it.
+ */
+ LCT_QUIESCENT = 1 << 30,
+ /**
+ * Context should be remembered.
+ */
+ LCT_REMEMBER = 1 << 31,
+ /**
+ * Contexts usable in cache shrinker thread.
+ */
+ LCT_SHRINKER = LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD|LCT_NOREF
+};
+
+/**
+ * Key. Represents per-context value slot.
+ *
+ * Keys are usually registered when module owning the key is initialized, and
+ * de-registered when module is unloaded. Once key is registered, all new
+ * contexts with matching tags, will get key value. "Old" contexts, already
+ * initialized at the time of key registration, can be forced to get key value
+ * by calling lu_context_refill().
+ *
+ * Every key value is counted in lu_context_key::lct_used and acquires a
+ * reference on an owning module. This means, that all key values have to be
+ * destroyed before module can be unloaded. This is usually achieved by
+ * stopping threads started by the module, that created contexts in their
+ * entry functions. Situation is complicated by the threads shared by multiple
+ * modules, like ptlrpcd daemon on a client. To work around this problem,
+ * contexts, created in such threads, are `remembered' (see
+ * LCT_REMEMBER)---i.e., added into a global list. When module is preparing
+ * for unloading it does the following:
+ *
+ * - marks its keys as `quiescent' (lu_context_tag::LCT_QUIESCENT)
+ * preventing new key values from being allocated in the new contexts,
+ * and
+ *
+ * - scans a list of remembered contexts, destroying values of module
+ * keys, thus releasing references to the module.
+ *
+ * This is done by lu_context_key_quiesce(). If module is re-activated
+ * before key has been de-registered, lu_context_key_revive() call clears
+ * `quiescent' marker.
+ *
+ * lu_context code doesn't provide any internal synchronization for these
+ * activities---it's assumed that startup (including threads start-up) and
+ * shutdown are serialized by some external means.
+ *
+ * \see lu_context
+ */
+struct lu_context_key {
+ /**
+ * Set of tags for which values of this key are to be instantiated.
+ */
+ __u32 lct_tags;
+ /**
+ * Value constructor. This is called when new value is created for a
+ * context. Returns pointer to new value of error pointer.
+ */
+ void *(*lct_init)(const struct lu_context *ctx,
+ struct lu_context_key *key);
+ /**
+ * Value destructor. Called when context with previously allocated
+ * value of this slot is destroyed. \a data is a value that was returned
+ * by a matching call to lu_context_key::lct_init().
+ */
+ void (*lct_fini)(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data);
+ /**
+ * Optional method called on lu_context_exit() for all allocated
+ * keys. Can be used by debugging code checking that locks are
+ * released, etc.
+ */
+ void (*lct_exit)(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data);
+ /**
+ * Internal implementation detail: index within lu_context::lc_value[]
+ * reserved for this key.
+ */
+ int lct_index;
+ /**
+ * Internal implementation detail: number of values created for this
+ * key.
+ */
+ atomic_t lct_used;
+ /**
+ * Internal implementation detail: module for this key.
+ */
+ module_t *lct_owner;
+ /**
+ * References to this key. For debugging.
+ */
+ struct lu_ref lct_reference;
+};
+
+#define LU_KEY_INIT(mod, type) \
+ static void* mod##_key_init(const struct lu_context *ctx, \
+ struct lu_context_key *key) \
+ { \
+ type *value; \
+ \
+ CLASSERT(PAGE_CACHE_SIZE >= sizeof (*value)); \
+ \
+ OBD_ALLOC_PTR(value); \
+ if (value == NULL) \
+ value = ERR_PTR(-ENOMEM); \
+ \
+ return value; \
+ } \
+ struct __##mod##__dummy_init {;} /* semicolon catcher */
+
+#define LU_KEY_FINI(mod, type) \
+ static void mod##_key_fini(const struct lu_context *ctx, \
+ struct lu_context_key *key, void* data) \
+ { \
+ type *info = data; \
+ \
+ OBD_FREE_PTR(info); \
+ } \
+ struct __##mod##__dummy_fini {;} /* semicolon catcher */
+
+#define LU_KEY_INIT_FINI(mod, type) \
+ LU_KEY_INIT(mod,type); \
+ LU_KEY_FINI(mod,type)
+
+#define LU_CONTEXT_KEY_DEFINE(mod, tags) \
+ struct lu_context_key mod##_thread_key = { \
+ .lct_tags = tags, \
+ .lct_init = mod##_key_init, \
+ .lct_fini = mod##_key_fini \
+ }
+
+#define LU_CONTEXT_KEY_INIT(key) \
+do { \
+ (key)->lct_owner = THIS_MODULE; \
+} while (0)
+
+int lu_context_key_register(struct lu_context_key *key);
+void lu_context_key_degister(struct lu_context_key *key);
+void *lu_context_key_get (const struct lu_context *ctx,
+ const struct lu_context_key *key);
+void lu_context_key_quiesce (struct lu_context_key *key);
+void lu_context_key_revive (struct lu_context_key *key);
+
+
+/*
+ * LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an
+ * owning module.
+ */
+
+#define LU_KEY_INIT_GENERIC(mod) \
+ static void mod##_key_init_generic(struct lu_context_key *k, ...) \
+ { \
+ struct lu_context_key *key = k; \
+ va_list args; \
+ \
+ va_start(args, k); \
+ do { \
+ LU_CONTEXT_KEY_INIT(key); \
+ key = va_arg(args, struct lu_context_key *); \
+ } while (key != NULL); \
+ va_end(args); \
+ }
+
+#define LU_TYPE_INIT(mod, ...) \
+ LU_KEY_INIT_GENERIC(mod) \
+ static int mod##_type_init(struct lu_device_type *t) \
+ { \
+ mod##_key_init_generic(__VA_ARGS__, NULL); \
+ return lu_context_key_register_many(__VA_ARGS__, NULL); \
+ } \
+ struct __##mod##_dummy_type_init {;}
+
+#define LU_TYPE_FINI(mod, ...) \
+ static void mod##_type_fini(struct lu_device_type *t) \
+ { \
+ lu_context_key_degister_many(__VA_ARGS__, NULL); \
+ } \
+ struct __##mod##_dummy_type_fini {;}
+
+#define LU_TYPE_START(mod, ...) \
+ static void mod##_type_start(struct lu_device_type *t) \
+ { \
+ lu_context_key_revive_many(__VA_ARGS__, NULL); \
+ } \
+ struct __##mod##_dummy_type_start {;}
+
+#define LU_TYPE_STOP(mod, ...) \
+ static void mod##_type_stop(struct lu_device_type *t) \
+ { \
+ lu_context_key_quiesce_many(__VA_ARGS__, NULL); \
+ } \
+ struct __##mod##_dummy_type_stop {;}
+
+
+
+#define LU_TYPE_INIT_FINI(mod, ...) \
+ LU_TYPE_INIT(mod, __VA_ARGS__); \
+ LU_TYPE_FINI(mod, __VA_ARGS__); \
+ LU_TYPE_START(mod, __VA_ARGS__); \
+ LU_TYPE_STOP(mod, __VA_ARGS__)
+
+int lu_context_init (struct lu_context *ctx, __u32 tags);
+void lu_context_fini (struct lu_context *ctx);
+void lu_context_enter (struct lu_context *ctx);
+void lu_context_exit (struct lu_context *ctx);
+int lu_context_refill(struct lu_context *ctx);
+
+/*
+ * Helper functions to operate on multiple keys. These are used by the default
+ * device type operations, defined by LU_TYPE_INIT_FINI().
+ */
+
+int lu_context_key_register_many(struct lu_context_key *k, ...);
+void lu_context_key_degister_many(struct lu_context_key *k, ...);
+void lu_context_key_revive_many (struct lu_context_key *k, ...);
+void lu_context_key_quiesce_many (struct lu_context_key *k, ...);
+
+/*
+ * update/clear ctx/ses tags.
+ */
+void lu_context_tags_update(__u32 tags);
+void lu_context_tags_clear(__u32 tags);
+void lu_session_tags_update(__u32 tags);
+void lu_session_tags_clear(__u32 tags);
+
+/**
+ * Environment.
+ */
+struct lu_env {
+ /**
+ * "Local" context, used to store data instead of stack.
+ */
+ struct lu_context le_ctx;
+ /**
+ * "Session" context for per-request data.
+ */
+ struct lu_context *le_ses;
+};
+
+int lu_env_init (struct lu_env *env, __u32 tags);
+void lu_env_fini (struct lu_env *env);
+int lu_env_refill(struct lu_env *env);
+int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags, __u32 stags);
+
+/** @} lu_context */
+
+/**
+ * Output site statistical counters into a buffer. Suitable for
+ * ll_rd_*()-style functions.
+ */
+int lu_site_stats_print(const struct lu_site *s, struct seq_file *m);
+
+/**
+ * Common name structure to be passed around for various name related methods.
+ */
+struct lu_name {
+ const char *ln_name;
+ int ln_namelen;
+};
+
+/**
+ * Common buffer structure to be passed around for various xattr_{s,g}et()
+ * methods.
+ */
+struct lu_buf {
+ void *lb_buf;
+ ssize_t lb_len;
+};
+
+#define DLUBUF "(%p %zu)"
+#define PLUBUF(buf) (buf)->lb_buf, (buf)->lb_len
+/**
+ * One-time initializers, called at obdclass module initialization, not
+ * exported.
+ */
+
+/**
+ * Initialization of global lu_* data.
+ */
+int lu_global_init(void);
+
+/**
+ * Dual to lu_global_init().
+ */
+void lu_global_fini(void);
+
+struct lu_kmem_descr {
+ struct kmem_cache **ckd_cache;
+ const char *ckd_name;
+ const size_t ckd_size;
+};
+
+int lu_kmem_init(struct lu_kmem_descr *caches);
+void lu_kmem_fini(struct lu_kmem_descr *caches);
+
+void lu_object_assign_fid(const struct lu_env *env, struct lu_object *o,
+ const struct lu_fid *fid);
+struct lu_object *lu_object_anon(const struct lu_env *env,
+ struct lu_device *dev,
+ const struct lu_object_conf *conf);
+
+/** null buffer */
+extern struct lu_buf LU_BUF_NULL;
+
+void lu_buf_free(struct lu_buf *buf);
+void lu_buf_alloc(struct lu_buf *buf, int size);
+void lu_buf_realloc(struct lu_buf *buf, int size);
+
+int lu_buf_check_and_grow(struct lu_buf *buf, int len);
+struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, int len);
+
+/** @} lu */
+#endif /* __LUSTRE_LU_OBJECT_H */
diff --git a/drivers/staging/lustre/lustre/include/lu_ref.h b/drivers/staging/lustre/lustre/include/lu_ref.h
new file mode 100644
index 000000000000..624c19be1524
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lu_ref.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LUSTRE_LU_REF_H
+#define __LUSTRE_LU_REF_H
+
+#include <linux/list.h>
+
+/** \defgroup lu_ref lu_ref
+ *
+ * An interface to track references between objects. Mostly for debugging.
+ *
+ * Suppose there is a reference counted data-structure struct foo. To track
+ * who acquired references to instance of struct foo, add lu_ref field to it:
+ *
+ * \code
+ * struct foo {
+ * atomic_t foo_refcount;
+ * struct lu_ref foo_reference;
+ * ...
+ * };
+ * \endcode
+ *
+ * foo::foo_reference has to be initialized by calling
+ * lu_ref_init(). Typically there will be functions or macros to increment and
+ * decrement foo::foo_refcount, let's say they are foo_get(struct foo *foo)
+ * and foo_put(struct foo *foo), respectively.
+ *
+ * Whenever foo_get() is called to acquire a reference on a foo, lu_ref_add()
+ * has to be called to insert into foo::foo_reference a record, describing
+ * acquired reference. Dually, lu_ref_del() removes matching record. Typical
+ * usages are:
+ *
+ * \code
+ * struct bar *bar;
+ *
+ * // bar owns a reference to foo.
+ * bar->bar_foo = foo_get(foo);
+ * lu_ref_add(&foo->foo_reference, "bar", bar);
+ *
+ * ...
+ *
+ * // reference from bar to foo is released.
+ * lu_ref_del(&foo->foo_reference, "bar", bar);
+ * foo_put(bar->bar_foo);
+ *
+ *
+ * // current thread acquired a temporary reference to foo.
+ * foo_get(foo);
+ * lu_ref_add(&foo->reference, __FUNCTION__, current);
+ *
+ * ...
+ *
+ * // temporary reference is released.
+ * lu_ref_del(&foo->reference, __FUNCTION__, current);
+ * foo_put(foo);
+ * \endcode
+ *
+ * \e Et \e cetera. Often it makes sense to include lu_ref_add() and
+ * lu_ref_del() calls into foo_get() and foo_put(). When an instance of struct
+ * foo is destroyed, lu_ref_fini() has to be called that checks that no
+ * pending references remain. lu_ref_print() can be used to dump a list of
+ * pending references, while hunting down a leak.
+ *
+ * For objects to which a large number of references can be acquired,
+ * lu_ref_del() can become cpu consuming, as it has to scan the list of
+ * references. To work around this, remember result of lu_ref_add() (usually
+ * in the same place where pointer to struct foo is stored), and use
+ * lu_ref_del_at():
+ *
+ * \code
+ * // There is a large number of bar's for a single foo.
+ * bar->bar_foo = foo_get(foo);
+ * bar->bar_foo_ref = lu_ref_add(&foo->foo_reference, "bar", bar);
+ *
+ * ...
+ *
+ * // reference from bar to foo is released.
+ * lu_ref_del_at(&foo->foo_reference, bar->bar_foo_ref, "bar", bar);
+ * foo_put(bar->bar_foo);
+ * \endcode
+ *
+ * lu_ref interface degrades gracefully in case of memory shortages.
+ *
+ * @{
+ */
+
+
+struct lu_ref {};
+
+static inline void lu_ref_init(struct lu_ref *ref)
+{
+}
+
+static inline void lu_ref_fini(struct lu_ref *ref)
+{
+}
+
+static inline struct lu_ref_link *lu_ref_add(struct lu_ref *ref,
+ const char *scope,
+ const void *source)
+{
+ return NULL;
+}
+
+static inline struct lu_ref_link *lu_ref_add_atomic(struct lu_ref *ref,
+ const char *scope,
+ const void *source)
+{
+ return NULL;
+}
+
+static inline void lu_ref_del(struct lu_ref *ref, const char *scope,
+ const void *source)
+{
+}
+
+static inline void lu_ref_set_at(struct lu_ref *ref, struct lu_ref_link *link,
+ const char *scope, const void *source0,
+ const void *source1)
+{
+}
+
+static inline void lu_ref_del_at(struct lu_ref *ref, struct lu_ref_link *link,
+ const char *scope, const void *source)
+{
+}
+
+static inline int lu_ref_global_init(void)
+{
+ return 0;
+}
+
+static inline void lu_ref_global_fini(void)
+{
+}
+
+static inline void lu_ref_print(const struct lu_ref *ref)
+{
+}
+
+static inline void lu_ref_print_all(void)
+{
+}
+
+/** @} lu */
+
+#endif /* __LUSTRE_LU_REF_H */
diff --git a/drivers/staging/lustre/lustre/include/lu_target.h b/drivers/staging/lustre/lustre/include/lu_target.h
new file mode 100644
index 000000000000..8d48cf4e27ee
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lu_target.h
@@ -0,0 +1,91 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LUSTRE_LU_TARGET_H
+#define _LUSTRE_LU_TARGET_H
+
+#include <dt_object.h>
+#include <lustre_disk.h>
+
+struct lu_target {
+ struct obd_device *lut_obd;
+ struct dt_device *lut_bottom;
+ /** last_rcvd file */
+ struct dt_object *lut_last_rcvd;
+ /* transaction callbacks */
+ struct dt_txn_callback lut_txn_cb;
+ /** server data in last_rcvd file */
+ struct lr_server_data lut_lsd;
+ /** Server last transaction number */
+ __u64 lut_last_transno;
+ /** Lock protecting last transaction number */
+ spinlock_t lut_translock;
+ /** Lock protecting client bitmap */
+ spinlock_t lut_client_bitmap_lock;
+ /** Bitmap of known clients */
+ unsigned long *lut_client_bitmap;
+};
+
+typedef void (*tgt_cb_t)(struct lu_target *lut, __u64 transno,
+ void *data, int err);
+struct tgt_commit_cb {
+ tgt_cb_t tgt_cb_func;
+ void *tgt_cb_data;
+};
+
+void tgt_boot_epoch_update(struct lu_target *lut);
+int tgt_last_commit_cb_add(struct thandle *th, struct lu_target *lut,
+ struct obd_export *exp, __u64 transno);
+int tgt_new_client_cb_add(struct thandle *th, struct obd_export *exp);
+int tgt_init(const struct lu_env *env, struct lu_target *lut,
+ struct obd_device *obd, struct dt_device *dt);
+void tgt_fini(const struct lu_env *env, struct lu_target *lut);
+int tgt_client_alloc(struct obd_export *exp);
+void tgt_client_free(struct obd_export *exp);
+int tgt_client_del(const struct lu_env *env, struct obd_export *exp);
+int tgt_client_add(const struct lu_env *env, struct obd_export *exp, int);
+int tgt_client_new(const struct lu_env *env, struct obd_export *exp);
+int tgt_client_data_read(const struct lu_env *env, struct lu_target *tg,
+ struct lsd_client_data *lcd, loff_t *off, int index);
+int tgt_client_data_write(const struct lu_env *env, struct lu_target *tg,
+ struct lsd_client_data *lcd, loff_t *off, struct thandle *th);
+int tgt_server_data_read(const struct lu_env *env, struct lu_target *tg);
+int tgt_server_data_write(const struct lu_env *env, struct lu_target *tg,
+ struct thandle *th);
+int tgt_server_data_update(const struct lu_env *env, struct lu_target *tg, int sync);
+int tgt_truncate_last_rcvd(const struct lu_env *env, struct lu_target *tg, loff_t off);
+
+#endif /* __LUSTRE_LU_TARGET_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre/libiam.h b/drivers/staging/lustre/lustre/include/lustre/libiam.h
new file mode 100644
index 000000000000..e8e0b084a6bc
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/libiam.h
@@ -0,0 +1,145 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre/libiam.h
+ *
+ * iam user level library
+ *
+ * Author: Wang Di <wangdi@clusterfs.com>
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ * Author: Fan Yong <fanyong@clusterfs.com>
+ */
+
+/*
+ * lustre/libiam.h
+ */
+
+#ifndef __IAM_ULIB_H__
+#define __IAM_ULIB_H__
+
+/** \defgroup libiam libiam
+ *
+ * @{
+ */
+
+
+#define DX_FMT_NAME_LEN 16
+
+enum iam_fmt_t {
+ FMT_LFIX,
+ FMT_LVAR
+};
+
+struct iam_uapi_info {
+ __u16 iui_keysize;
+ __u16 iui_recsize;
+ __u16 iui_ptrsize;
+ __u16 iui_height;
+ char iui_fmt_name[DX_FMT_NAME_LEN];
+};
+
+/*
+ * Creat an iam file, but do NOT open it.
+ * Return 0 if success, else -1.
+ */
+int iam_creat(char *filename, enum iam_fmt_t fmt,
+ int blocksize, int keysize, int recsize, int ptrsize);
+
+/*
+ * Open an iam file, but do NOT creat it if the file doesn't exist.
+ * Please use iam_creat for creating the file before use iam_open.
+ * Return file id (fd) if success, else -1.
+ */
+int iam_open(char *filename, struct iam_uapi_info *ua);
+
+/*
+ * Close file opened by iam_open.
+ */
+int iam_close(int fd);
+
+/*
+ * Please use iam_open before use this function.
+ */
+int iam_insert(int fd, struct iam_uapi_info *ua,
+ int key_need_convert, char *keybuf,
+ int rec_need_convert, char *recbuf);
+
+/*
+ * Please use iam_open before use this function.
+ */
+int iam_lookup(int fd, struct iam_uapi_info *ua,
+ int key_need_convert, char *key_buf,
+ int *keysize, char *save_key,
+ int rec_need_convert, char *rec_buf,
+ int *recsize, char *save_rec);
+
+/*
+ * Please use iam_open before use this function.
+ */
+int iam_delete(int fd, struct iam_uapi_info *ua,
+ int key_need_convert, char *keybuf,
+ int rec_need_convert, char *recbuf);
+
+/*
+ * Please use iam_open before use this function.
+ */
+int iam_it_start(int fd, struct iam_uapi_info *ua,
+ int key_need_convert, char *key_buf,
+ int *keysize, char *save_key,
+ int rec_need_convert, char *rec_buf,
+ int *recsize, char *save_rec);
+
+/*
+ * Please use iam_open before use this function.
+ */
+int iam_it_next(int fd, struct iam_uapi_info *ua,
+ int key_need_convert, char *key_buf,
+ int *keysize, char *save_key,
+ int rec_need_convert, char *rec_buf,
+ int *recsize, char *save_rec);
+
+/*
+ * Please use iam_open before use this function.
+ */
+int iam_it_stop(int fd, struct iam_uapi_info *ua,
+ int key_need_convert, char *keybuf,
+ int rec_need_convert, char *recbuf);
+
+/*
+ * Change iam file mode.
+ */
+int iam_polymorph(char *filename, unsigned long mode);
+
+/** @} libiam */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre/liblustreapi.h b/drivers/staging/lustre/lustre/include/lustre/liblustreapi.h
new file mode 100644
index 000000000000..707eb74fdf68
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/liblustreapi.h
@@ -0,0 +1,43 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+/*
+ * NOTE: This file is DEPRECATED! Please include lustreapi.h directly
+ * instead of this file. This file will be removed from a future version
+ * of lustre!
+ */
+
+#ifndef _LIBLUSTREAPI_H_
+#define _LIBLUSTREAPI_H_
+
+#include <lustre/lustreapi.h>
+#warning "Including liblustreapi.h is deprecated. Include lustreapi.h directly."
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
new file mode 100644
index 000000000000..ad253c6deadd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
@@ -0,0 +1,121 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre/ll_fiemap.h
+ *
+ * FIEMAP data structures and flags. This header file will be used until
+ * fiemap.h is available in the upstream kernel.
+ *
+ * Author: Kalpak Shah <kalpak.shah@sun.com>
+ * Author: Andreas Dilger <adilger@sun.com>
+ */
+
+#ifndef _LUSTRE_FIEMAP_H
+#define _LUSTRE_FIEMAP_H
+
+
+
+struct ll_fiemap_extent {
+ __u64 fe_logical; /* logical offset in bytes for the start of
+ * the extent from the beginning of the file */
+ __u64 fe_physical; /* physical offset in bytes for the start
+ * of the extent from the beginning of the disk */
+ __u64 fe_length; /* length in bytes for this extent */
+ __u64 fe_reserved64[2];
+ __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
+ __u32 fe_device; /* device number for this extent */
+ __u32 fe_reserved[2];
+};
+
+struct ll_user_fiemap {
+ __u64 fm_start; /* logical offset (inclusive) at
+ * which to start mapping (in) */
+ __u64 fm_length; /* logical length of mapping which
+ * userspace wants (in) */
+ __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
+ __u32 fm_mapped_extents;/* number of extents that were mapped (out) */
+ __u32 fm_extent_count; /* size of fm_extents array (in) */
+ __u32 fm_reserved;
+ struct ll_fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
+};
+
+#define FIEMAP_MAX_OFFSET (~0ULL)
+
+#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */
+#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */
+
+#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */
+#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */
+#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending.
+ * Sets EXTENT_UNKNOWN. */
+#define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read
+ * while fs is unmounted */
+#define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs.
+ * Sets EXTENT_NO_DIRECT. */
+#define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be
+ * block aligned. */
+#define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata.
+ * Sets EXTENT_NOT_ALIGNED.*/
+#define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block.
+ * Sets EXTENT_NOT_ALIGNED.*/
+#define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but
+ * no data (i.e. zero). */
+#define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively
+ * support extents. Result
+ * merged for efficiency. */
+
+
+static inline size_t fiemap_count_to_size(size_t extent_count)
+{
+ return (sizeof(struct ll_user_fiemap) + extent_count *
+ sizeof(struct ll_fiemap_extent));
+}
+
+static inline unsigned fiemap_size_to_count(size_t array_size)
+{
+ return ((array_size - sizeof(struct ll_user_fiemap)) /
+ sizeof(struct ll_fiemap_extent));
+}
+
+#define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */
+
+#ifdef FIEMAP_FLAGS_COMPAT
+#undef FIEMAP_FLAGS_COMPAT
+#endif
+
+/* Lustre specific flags - use a high bit, don't conflict with upstream flag */
+#define FIEMAP_EXTENT_NO_DIRECT 0x40000000 /* Data mapping undefined */
+#define FIEMAP_EXTENT_NET 0x80000000 /* Data stored remotely.
+ * Sets NO_DIRECT flag */
+
+#endif /* _LUSTRE_FIEMAP_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_build_version.h b/drivers/staging/lustre/lustre/include/lustre/lustre_build_version.h
new file mode 100644
index 000000000000..93a3d7db3010
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_build_version.h
@@ -0,0 +1,2 @@
+#define BUILD_VERSION "v2_3_64_0-g6e62c21-CHANGED-3.9.0"
+#define LUSTRE_RELEASE 3.9.0_g6e62c21
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
new file mode 100644
index 000000000000..8825460f12ac
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -0,0 +1,3653 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre/lustre_idl.h
+ *
+ * Lustre wire protocol definitions.
+ */
+
+/** \defgroup lustreidl lustreidl
+ *
+ * Lustre wire protocol definitions.
+ *
+ * ALL structs passing over the wire should be declared here. Structs
+ * that are used in interfaces with userspace should go in lustre_user.h.
+ *
+ * All structs being declared here should be built from simple fixed-size
+ * types (__u8, __u16, __u32, __u64) or be built from other types or
+ * structs also declared in this file. Similarly, all flags and magic
+ * values in those structs should also be declared here. This ensures
+ * that the Lustre wire protocol is not influenced by external dependencies.
+ *
+ * The only other acceptable items in this file are VERY SIMPLE accessor
+ * functions to avoid callers grubbing inside the structures, and the
+ * prototypes of the swabber functions for each struct. Nothing that
+ * depends on external functions or definitions should be in here.
+ *
+ * Structs must be properly aligned to put 64-bit values on an 8-byte
+ * boundary. Any structs being added here must also be added to
+ * utils/wirecheck.c and "make newwiretest" run to regenerate the
+ * utils/wiretest.c sources. This allows us to verify that wire structs
+ * have the proper alignment/size on all architectures.
+ *
+ * DO NOT CHANGE any of the structs, flags, values declared here and used
+ * in released Lustre versions. Some structs may have padding fields that
+ * can be used. Some structs might allow addition at the end (verify this
+ * in the code to ensure that new/old clients that see this larger struct
+ * do not fail, otherwise you need to implement protocol compatibility).
+ *
+ * We assume all nodes are either little-endian or big-endian, and we
+ * always send messages in the sender's native format. The receiver
+ * detects the message format by checking the 'magic' field of the message
+ * (see lustre_msg_swabbed() below).
+ *
+ * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines,
+ * implemented either here, inline (trivial implementations) or in
+ * ptlrpc/pack_generic.c. These 'swabbers' convert the type from "other"
+ * endian, in-place in the message buffer.
+ *
+ * A swabber takes a single pointer argument. The caller must already have
+ * verified that the length of the message buffer >= sizeof (type).
+ *
+ * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
+ * may be defined that swabs just the variable part, after the caller has
+ * verified that the message buffer is large enough.
+ *
+ * @{
+ */
+
+#ifndef _LUSTRE_IDL_H_
+#define _LUSTRE_IDL_H_
+
+#if !defined(LASSERT) && !defined(LPU64)
+#include <linux/libcfs/libcfs.h> /* for LASSERT, LPUX64, etc */
+#endif
+
+/* Defn's shared with user-space. */
+#include <lustre/lustre_user.h>
+
+/*
+ * GENERAL STUFF
+ */
+/* FOO_REQUEST_PORTAL is for incoming requests on the FOO
+ * FOO_REPLY_PORTAL is for incoming replies on the FOO
+ * FOO_BULK_PORTAL is for incoming bulk on the FOO
+ */
+
+#define CONNMGR_REQUEST_PORTAL 1
+#define CONNMGR_REPLY_PORTAL 2
+//#define OSC_REQUEST_PORTAL 3
+#define OSC_REPLY_PORTAL 4
+//#define OSC_BULK_PORTAL 5
+#define OST_IO_PORTAL 6
+#define OST_CREATE_PORTAL 7
+#define OST_BULK_PORTAL 8
+//#define MDC_REQUEST_PORTAL 9
+#define MDC_REPLY_PORTAL 10
+//#define MDC_BULK_PORTAL 11
+#define MDS_REQUEST_PORTAL 12
+//#define MDS_REPLY_PORTAL 13
+#define MDS_BULK_PORTAL 14
+#define LDLM_CB_REQUEST_PORTAL 15
+#define LDLM_CB_REPLY_PORTAL 16
+#define LDLM_CANCEL_REQUEST_PORTAL 17
+#define LDLM_CANCEL_REPLY_PORTAL 18
+//#define PTLBD_REQUEST_PORTAL 19
+//#define PTLBD_REPLY_PORTAL 20
+//#define PTLBD_BULK_PORTAL 21
+#define MDS_SETATTR_PORTAL 22
+#define MDS_READPAGE_PORTAL 23
+#define MDS_MDS_PORTAL 24
+
+#define MGC_REPLY_PORTAL 25
+#define MGS_REQUEST_PORTAL 26
+#define MGS_REPLY_PORTAL 27
+#define OST_REQUEST_PORTAL 28
+#define FLD_REQUEST_PORTAL 29
+#define SEQ_METADATA_PORTAL 30
+#define SEQ_DATA_PORTAL 31
+#define SEQ_CONTROLLER_PORTAL 32
+#define MGS_BULK_PORTAL 33
+
+/* Portal 63 is reserved for the Cray Inc DVS - nic@cray.com, roe@cray.com, n8851@cray.com */
+
+/* packet types */
+#define PTL_RPC_MSG_REQUEST 4711
+#define PTL_RPC_MSG_ERR 4712
+#define PTL_RPC_MSG_REPLY 4713
+
+/* DON'T use swabbed values of MAGIC as magic! */
+#define LUSTRE_MSG_MAGIC_V1 0x0BD00BD0
+#define LUSTRE_MSG_MAGIC_V2 0x0BD00BD3
+
+#define LUSTRE_MSG_MAGIC_V1_SWABBED 0xD00BD00B
+#define LUSTRE_MSG_MAGIC_V2_SWABBED 0xD30BD00B
+
+#define LUSTRE_MSG_MAGIC LUSTRE_MSG_MAGIC_V2
+
+#define PTLRPC_MSG_VERSION 0x00000003
+#define LUSTRE_VERSION_MASK 0xffff0000
+#define LUSTRE_OBD_VERSION 0x00010000
+#define LUSTRE_MDS_VERSION 0x00020000
+#define LUSTRE_OST_VERSION 0x00030000
+#define LUSTRE_DLM_VERSION 0x00040000
+#define LUSTRE_LOG_VERSION 0x00050000
+#define LUSTRE_MGS_VERSION 0x00060000
+
+typedef __u32 mdsno_t;
+typedef __u64 seqno_t;
+typedef __u64 obd_id;
+typedef __u64 obd_seq;
+typedef __s64 obd_time;
+typedef __u64 obd_size;
+typedef __u64 obd_off;
+typedef __u64 obd_blocks;
+typedef __u64 obd_valid;
+typedef __u32 obd_blksize;
+typedef __u32 obd_mode;
+typedef __u32 obd_uid;
+typedef __u32 obd_gid;
+typedef __u32 obd_flag;
+typedef __u32 obd_count;
+
+/**
+ * Describes a range of sequence, lsr_start is included but lsr_end is
+ * not in the range.
+ * Same structure is used in fld module where lsr_index field holds mdt id
+ * of the home mdt.
+ */
+struct lu_seq_range {
+ __u64 lsr_start;
+ __u64 lsr_end;
+ __u32 lsr_index;
+ __u32 lsr_flags;
+};
+
+#define LU_SEQ_RANGE_MDT 0x0
+#define LU_SEQ_RANGE_OST 0x1
+#define LU_SEQ_RANGE_ANY 0x3
+
+#define LU_SEQ_RANGE_MASK 0x3
+
+static inline unsigned fld_range_type(const struct lu_seq_range *range)
+{
+ return range->lsr_flags & LU_SEQ_RANGE_MASK;
+}
+
+static inline int fld_range_is_ost(const struct lu_seq_range *range)
+{
+ return fld_range_type(range) == LU_SEQ_RANGE_OST;
+}
+
+static inline int fld_range_is_mdt(const struct lu_seq_range *range)
+{
+ return fld_range_type(range) == LU_SEQ_RANGE_MDT;
+}
+
+/**
+ * This all range is only being used when fld client sends fld query request,
+ * but it does not know whether the seq is MDT or OST, so it will send req
+ * with ALL type, which means either seq type gotten from lookup can be
+ * expected.
+ */
+static inline unsigned fld_range_is_any(const struct lu_seq_range *range)
+{
+ return fld_range_type(range) == LU_SEQ_RANGE_ANY;
+}
+
+static inline void fld_range_set_type(struct lu_seq_range *range,
+ unsigned flags)
+{
+ LASSERT(!(flags & ~LU_SEQ_RANGE_MASK));
+ range->lsr_flags |= flags;
+}
+
+static inline void fld_range_set_mdt(struct lu_seq_range *range)
+{
+ fld_range_set_type(range, LU_SEQ_RANGE_MDT);
+}
+
+static inline void fld_range_set_ost(struct lu_seq_range *range)
+{
+ fld_range_set_type(range, LU_SEQ_RANGE_OST);
+}
+
+static inline void fld_range_set_any(struct lu_seq_range *range)
+{
+ fld_range_set_type(range, LU_SEQ_RANGE_ANY);
+}
+
+/**
+ * returns width of given range \a r
+ */
+
+static inline __u64 range_space(const struct lu_seq_range *range)
+{
+ return range->lsr_end - range->lsr_start;
+}
+
+/**
+ * initialize range to zero
+ */
+
+static inline void range_init(struct lu_seq_range *range)
+{
+ range->lsr_start = range->lsr_end = range->lsr_index = 0;
+}
+
+/**
+ * check if given seq id \a s is within given range \a r
+ */
+
+static inline int range_within(const struct lu_seq_range *range,
+ __u64 s)
+{
+ return s >= range->lsr_start && s < range->lsr_end;
+}
+
+static inline int range_is_sane(const struct lu_seq_range *range)
+{
+ return (range->lsr_end >= range->lsr_start);
+}
+
+static inline int range_is_zero(const struct lu_seq_range *range)
+{
+ return (range->lsr_start == 0 && range->lsr_end == 0);
+}
+
+static inline int range_is_exhausted(const struct lu_seq_range *range)
+
+{
+ return range_space(range) == 0;
+}
+
+/* return 0 if two range have the same location */
+static inline int range_compare_loc(const struct lu_seq_range *r1,
+ const struct lu_seq_range *r2)
+{
+ return r1->lsr_index != r2->lsr_index ||
+ r1->lsr_flags != r2->lsr_flags;
+}
+
+#define DRANGE "[%#16.16"LPF64"x-%#16.16"LPF64"x):%x:%s"
+
+#define PRANGE(range) \
+ (range)->lsr_start, \
+ (range)->lsr_end, \
+ (range)->lsr_index, \
+ fld_range_is_mdt(range) ? "mdt" : "ost"
+
+
+/** \defgroup lu_fid lu_fid
+ * @{ */
+
+/**
+ * Flags for lustre_mdt_attrs::lma_compat and lustre_mdt_attrs::lma_incompat.
+ * Deprecated since HSM and SOM attributes are now stored in separate on-disk
+ * xattr.
+ */
+enum lma_compat {
+ LMAC_HSM = 0x00000001,
+ LMAC_SOM = 0x00000002,
+};
+
+/**
+ * Masks for all features that should be supported by a Lustre version to
+ * access a specific file.
+ * This information is stored in lustre_mdt_attrs::lma_incompat.
+ */
+enum lma_incompat {
+ LMAI_RELEASED = 0x0000001, /* file is released */
+ LMAI_AGENT = 0x00000002, /* agent inode */
+ LMAI_REMOTE_PARENT = 0x00000004, /* the parent of the object
+ is on the remote MDT */
+};
+#define LMA_INCOMPAT_SUPP (LMAI_AGENT | LMAI_REMOTE_PARENT)
+
+extern void lustre_lma_swab(struct lustre_mdt_attrs *lma);
+extern void lustre_lma_init(struct lustre_mdt_attrs *lma,
+ const struct lu_fid *fid, __u32 incompat);
+/**
+ * SOM on-disk attributes stored in a separate xattr.
+ */
+struct som_attrs {
+ /** Bitfield for supported data in this structure. For future use. */
+ __u32 som_compat;
+
+ /** Incompat feature list. The supported feature mask is availabe in
+ * SOM_INCOMPAT_SUPP */
+ __u32 som_incompat;
+
+ /** IO Epoch SOM attributes belongs to */
+ __u64 som_ioepoch;
+ /** total file size in objects */
+ __u64 som_size;
+ /** total fs blocks in objects */
+ __u64 som_blocks;
+ /** mds mount id the size is valid for */
+ __u64 som_mountid;
+};
+extern void lustre_som_swab(struct som_attrs *attrs);
+
+#define SOM_INCOMPAT_SUPP 0x0
+
+/**
+ * HSM on-disk attributes stored in a separate xattr.
+ */
+struct hsm_attrs {
+ /** Bitfield for supported data in this structure. For future use. */
+ __u32 hsm_compat;
+
+ /** HSM flags, see hsm_flags enum below */
+ __u32 hsm_flags;
+ /** backend archive id associated with the file */
+ __u64 hsm_arch_id;
+ /** version associated with the last archiving, if any */
+ __u64 hsm_arch_ver;
+};
+extern void lustre_hsm_swab(struct hsm_attrs *attrs);
+
+/**
+ * fid constants
+ */
+enum {
+ /** LASTID file has zero OID */
+ LUSTRE_FID_LASTID_OID = 0UL,
+ /** initial fid id value */
+ LUSTRE_FID_INIT_OID = 1UL
+};
+
+/** returns fid object sequence */
+static inline __u64 fid_seq(const struct lu_fid *fid)
+{
+ return fid->f_seq;
+}
+
+/** returns fid object id */
+static inline __u32 fid_oid(const struct lu_fid *fid)
+{
+ return fid->f_oid;
+}
+
+/** returns fid object version */
+static inline __u32 fid_ver(const struct lu_fid *fid)
+{
+ return fid->f_ver;
+}
+
+static inline void fid_zero(struct lu_fid *fid)
+{
+ memset(fid, 0, sizeof(*fid));
+}
+
+static inline obd_id fid_ver_oid(const struct lu_fid *fid)
+{
+ return ((__u64)fid_ver(fid) << 32 | fid_oid(fid));
+}
+
+/**
+ * Note that reserved SEQ numbers below 12 will conflict with ldiskfs
+ * inodes in the IGIF namespace, so these reserved SEQ numbers can be
+ * used for other purposes and not risk collisions with existing inodes.
+ *
+ * Different FID Format
+ * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs#NEW.0
+ */
+enum fid_seq {
+ FID_SEQ_OST_MDT0 = 0,
+ FID_SEQ_LLOG = 1, /* unnamed llogs */
+ FID_SEQ_ECHO = 2,
+ FID_SEQ_OST_MDT1 = 3,
+ FID_SEQ_OST_MAX = 9, /* Max MDT count before OST_on_FID */
+ FID_SEQ_LLOG_NAME = 10, /* named llogs */
+ FID_SEQ_RSVD = 11,
+ FID_SEQ_IGIF = 12,
+ FID_SEQ_IGIF_MAX = 0x0ffffffffULL,
+ FID_SEQ_IDIF = 0x100000000ULL,
+ FID_SEQ_IDIF_MAX = 0x1ffffffffULL,
+ /* Normal FID sequence starts from this value, i.e. 1<<33 */
+ FID_SEQ_START = 0x200000000ULL,
+ /* sequence for local pre-defined FIDs listed in local_oid */
+ FID_SEQ_LOCAL_FILE = 0x200000001ULL,
+ FID_SEQ_DOT_LUSTRE = 0x200000002ULL,
+ /* sequence is used for local named objects FIDs generated
+ * by local_object_storage library */
+ FID_SEQ_LOCAL_NAME = 0x200000003ULL,
+ /* Because current FLD will only cache the fid sequence, instead
+ * of oid on the client side, if the FID needs to be exposed to
+ * clients sides, it needs to make sure all of fids under one
+ * sequence will be located in one MDT. */
+ FID_SEQ_SPECIAL = 0x200000004ULL,
+ FID_SEQ_QUOTA = 0x200000005ULL,
+ FID_SEQ_QUOTA_GLB = 0x200000006ULL,
+ FID_SEQ_ROOT = 0x200000007ULL, /* Located on MDT0 */
+ FID_SEQ_NORMAL = 0x200000400ULL,
+ FID_SEQ_LOV_DEFAULT = 0xffffffffffffffffULL
+};
+
+#define OBIF_OID_MAX_BITS 32
+#define OBIF_MAX_OID (1ULL << OBIF_OID_MAX_BITS)
+#define OBIF_OID_MASK ((1ULL << OBIF_OID_MAX_BITS) - 1)
+#define IDIF_OID_MAX_BITS 48
+#define IDIF_MAX_OID (1ULL << IDIF_OID_MAX_BITS)
+#define IDIF_OID_MASK ((1ULL << IDIF_OID_MAX_BITS) - 1)
+
+/** OID for FID_SEQ_SPECIAL */
+enum special_oid {
+ /* Big Filesystem Lock to serialize rename operations */
+ FID_OID_SPECIAL_BFL = 1UL,
+};
+
+/** OID for FID_SEQ_DOT_LUSTRE */
+enum dot_lustre_oid {
+ FID_OID_DOT_LUSTRE = 1UL,
+ FID_OID_DOT_LUSTRE_OBF = 2UL,
+};
+
+static inline int fid_seq_is_mdt0(obd_seq seq)
+{
+ return (seq == FID_SEQ_OST_MDT0);
+}
+
+static inline int fid_seq_is_mdt(const __u64 seq)
+{
+ return seq == FID_SEQ_OST_MDT0 || seq >= FID_SEQ_NORMAL;
+};
+
+static inline int fid_seq_is_echo(obd_seq seq)
+{
+ return (seq == FID_SEQ_ECHO);
+}
+
+static inline int fid_is_echo(const struct lu_fid *fid)
+{
+ return fid_seq_is_echo(fid_seq(fid));
+}
+
+static inline int fid_seq_is_llog(obd_seq seq)
+{
+ return (seq == FID_SEQ_LLOG);
+}
+
+static inline int fid_is_llog(const struct lu_fid *fid)
+{
+ /* file with OID == 0 is not llog but contains last oid */
+ return fid_seq_is_llog(fid_seq(fid)) && fid_oid(fid) > 0;
+}
+
+static inline int fid_seq_is_rsvd(const __u64 seq)
+{
+ return (seq > FID_SEQ_OST_MDT0 && seq <= FID_SEQ_RSVD);
+};
+
+static inline int fid_seq_is_special(const __u64 seq)
+{
+ return seq == FID_SEQ_SPECIAL;
+};
+
+static inline int fid_seq_is_local_file(const __u64 seq)
+{
+ return seq == FID_SEQ_LOCAL_FILE ||
+ seq == FID_SEQ_LOCAL_NAME;
+};
+
+static inline int fid_seq_is_root(const __u64 seq)
+{
+ return seq == FID_SEQ_ROOT;
+}
+
+static inline int fid_seq_is_dot(const __u64 seq)
+{
+ return seq == FID_SEQ_DOT_LUSTRE;
+}
+
+static inline int fid_seq_is_default(const __u64 seq)
+{
+ return seq == FID_SEQ_LOV_DEFAULT;
+}
+
+static inline int fid_is_mdt0(const struct lu_fid *fid)
+{
+ return fid_seq_is_mdt0(fid_seq(fid));
+}
+
+static inline void lu_root_fid(struct lu_fid *fid)
+{
+ fid->f_seq = FID_SEQ_ROOT;
+ fid->f_oid = 1;
+ fid->f_ver = 0;
+}
+
+/**
+ * Check if a fid is igif or not.
+ * \param fid the fid to be tested.
+ * \return true if the fid is a igif; otherwise false.
+ */
+static inline int fid_seq_is_igif(const __u64 seq)
+{
+ return seq >= FID_SEQ_IGIF && seq <= FID_SEQ_IGIF_MAX;
+}
+
+static inline int fid_is_igif(const struct lu_fid *fid)
+{
+ return fid_seq_is_igif(fid_seq(fid));
+}
+
+/**
+ * Check if a fid is idif or not.
+ * \param fid the fid to be tested.
+ * \return true if the fid is a idif; otherwise false.
+ */
+static inline int fid_seq_is_idif(const __u64 seq)
+{
+ return seq >= FID_SEQ_IDIF && seq <= FID_SEQ_IDIF_MAX;
+}
+
+static inline int fid_is_idif(const struct lu_fid *fid)
+{
+ return fid_seq_is_idif(fid_seq(fid));
+}
+
+static inline int fid_is_local_file(const struct lu_fid *fid)
+{
+ return fid_seq_is_local_file(fid_seq(fid));
+}
+
+static inline int fid_seq_is_norm(const __u64 seq)
+{
+ return (seq >= FID_SEQ_NORMAL);
+}
+
+static inline int fid_is_norm(const struct lu_fid *fid)
+{
+ return fid_seq_is_norm(fid_seq(fid));
+}
+
+/* convert an OST objid into an IDIF FID SEQ number */
+static inline obd_seq fid_idif_seq(obd_id id, __u32 ost_idx)
+{
+ return FID_SEQ_IDIF | (ost_idx << 16) | ((id >> 32) & 0xffff);
+}
+
+/* convert a packed IDIF FID into an OST objid */
+static inline obd_id fid_idif_id(obd_seq seq, __u32 oid, __u32 ver)
+{
+ return ((__u64)ver << 48) | ((seq & 0xffff) << 32) | oid;
+}
+
+/* extract ost index from IDIF FID */
+static inline __u32 fid_idif_ost_idx(const struct lu_fid *fid)
+{
+ LASSERT(fid_is_idif(fid));
+ return (fid_seq(fid) >> 16) & 0xffff;
+}
+
+/* extract OST sequence (group) from a wire ost_id (id/seq) pair */
+static inline obd_seq ostid_seq(const struct ost_id *ostid)
+{
+ if (fid_seq_is_mdt0(ostid->oi.oi_seq))
+ return FID_SEQ_OST_MDT0;
+
+ if (fid_seq_is_default(ostid->oi.oi_seq))
+ return FID_SEQ_LOV_DEFAULT;
+
+ if (fid_is_idif(&ostid->oi_fid))
+ return FID_SEQ_OST_MDT0;
+
+ return fid_seq(&ostid->oi_fid);
+}
+
+/* extract OST objid from a wire ost_id (id/seq) pair */
+static inline obd_id ostid_id(const struct ost_id *ostid)
+{
+ if (fid_seq_is_mdt0(ostid_seq(ostid)))
+ return ostid->oi.oi_id & IDIF_OID_MASK;
+
+ if (fid_is_idif(&ostid->oi_fid))
+ return fid_idif_id(fid_seq(&ostid->oi_fid),
+ fid_oid(&ostid->oi_fid), 0);
+
+ return fid_oid(&ostid->oi_fid);
+}
+
+static inline void ostid_set_seq(struct ost_id *oi, __u64 seq)
+{
+ if (fid_seq_is_mdt0(seq) || fid_seq_is_default(seq)) {
+ oi->oi.oi_seq = seq;
+ } else {
+ oi->oi_fid.f_seq = seq;
+ /* Note: if f_oid + f_ver is zero, we need init it
+ * to be 1, otherwise, ostid_seq will treat this
+ * as old ostid (oi_seq == 0) */
+ if (oi->oi_fid.f_oid == 0 && oi->oi_fid.f_ver == 0)
+ oi->oi_fid.f_oid = LUSTRE_FID_INIT_OID;
+ }
+}
+
+static inline void ostid_set_seq_mdt0(struct ost_id *oi)
+{
+ ostid_set_seq(oi, FID_SEQ_OST_MDT0);
+}
+
+static inline void ostid_set_seq_echo(struct ost_id *oi)
+{
+ ostid_set_seq(oi, FID_SEQ_ECHO);
+}
+
+static inline void ostid_set_seq_llog(struct ost_id *oi)
+{
+ ostid_set_seq(oi, FID_SEQ_LLOG);
+}
+
+/**
+ * Note: we need check oi_seq to decide where to set oi_id,
+ * so oi_seq should always be set ahead of oi_id.
+ */
+static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
+{
+ if (fid_seq_is_mdt0(ostid_seq(oi))) {
+ if (oid >= IDIF_MAX_OID) {
+ CERROR("Bad "LPU64" to set "DOSTID"\n",
+ oid, POSTID(oi));
+ return;
+ }
+ oi->oi.oi_id = oid;
+ } else {
+ if (oid > OBIF_MAX_OID) {
+ CERROR("Bad "LPU64" to set "DOSTID"\n",
+ oid, POSTID(oi));
+ return;
+ }
+ oi->oi_fid.f_oid = oid;
+ }
+}
+
+static inline void ostid_inc_id(struct ost_id *oi)
+{
+ if (fid_seq_is_mdt0(ostid_seq(oi))) {
+ if (unlikely(ostid_id(oi) + 1 > IDIF_MAX_OID)) {
+ CERROR("Bad inc "DOSTID"\n", POSTID(oi));
+ return;
+ }
+ oi->oi.oi_id++;
+ } else {
+ oi->oi_fid.f_oid++;
+ }
+}
+
+static inline void ostid_dec_id(struct ost_id *oi)
+{
+ if (fid_seq_is_mdt0(ostid_seq(oi)))
+ oi->oi.oi_id--;
+ else
+ oi->oi_fid.f_oid--;
+}
+
+/**
+ * Unpack an OST object id/seq (group) into a FID. This is needed for
+ * converting all obdo, lmm, lsm, etc. 64-bit id/seq pairs into proper
+ * FIDs. Note that if an id/seq is already in FID/IDIF format it will
+ * be passed through unchanged. Only legacy OST objects in "group 0"
+ * will be mapped into the IDIF namespace so that they can fit into the
+ * struct lu_fid fields without loss. For reference see:
+ * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs
+ */
+static inline int ostid_to_fid(struct lu_fid *fid, struct ost_id *ostid,
+ __u32 ost_idx)
+{
+ if (ost_idx > 0xffff) {
+ CERROR("bad ost_idx, "DOSTID" ost_idx:%u\n", POSTID(ostid),
+ ost_idx);
+ return -EBADF;
+ }
+
+ if (fid_seq_is_mdt0(ostid_seq(ostid))) {
+ /* This is a "legacy" (old 1.x/2.early) OST object in "group 0"
+ * that we map into the IDIF namespace. It allows up to 2^48
+ * objects per OST, as this is the object namespace that has
+ * been in production for years. This can handle create rates
+ * of 1M objects/s/OST for 9 years, or combinations thereof. */
+ if (ostid_id(ostid) >= IDIF_MAX_OID) {
+ CERROR("bad MDT0 id, "DOSTID" ost_idx:%u\n",
+ POSTID(ostid), ost_idx);
+ return -EBADF;
+ }
+ fid->f_seq = fid_idif_seq(ostid_id(ostid), ost_idx);
+ /* truncate to 32 bits by assignment */
+ fid->f_oid = ostid_id(ostid);
+ /* in theory, not currently used */
+ fid->f_ver = ostid_id(ostid) >> 48;
+ } else /* if (fid_seq_is_idif(seq) || fid_seq_is_norm(seq)) */ {
+ /* This is either an IDIF object, which identifies objects across
+ * all OSTs, or a regular FID. The IDIF namespace maps legacy
+ * OST objects into the FID namespace. In both cases, we just
+ * pass the FID through, no conversion needed. */
+ if (ostid->oi_fid.f_ver != 0) {
+ CERROR("bad MDT0 id, "DOSTID" ost_idx:%u\n",
+ POSTID(ostid), ost_idx);
+ return -EBADF;
+ }
+ *fid = ostid->oi_fid;
+ }
+
+ return 0;
+}
+
+/* pack any OST FID into an ostid (id/seq) for the wire/disk */
+static inline int fid_to_ostid(const struct lu_fid *fid, struct ost_id *ostid)
+{
+ if (unlikely(fid_seq_is_igif(fid->f_seq))) {
+ CERROR("bad IGIF, "DFID"\n", PFID(fid));
+ return -EBADF;
+ }
+
+ if (fid_is_idif(fid)) {
+ ostid_set_seq_mdt0(ostid);
+ ostid_set_id(ostid, fid_idif_id(fid_seq(fid), fid_oid(fid),
+ fid_ver(fid)));
+ } else {
+ ostid->oi_fid = *fid;
+ }
+
+ return 0;
+}
+
+/* Check whether the fid is for LAST_ID */
+static inline int fid_is_last_id(const struct lu_fid *fid)
+{
+ return (fid_oid(fid) == 0);
+}
+
+/**
+ * Get inode number from a igif.
+ * \param fid a igif to get inode number from.
+ * \return inode number for the igif.
+ */
+static inline ino_t lu_igif_ino(const struct lu_fid *fid)
+{
+ return fid_seq(fid);
+}
+
+extern void lustre_swab_ost_id(struct ost_id *oid);
+
+/**
+ * Get inode generation from a igif.
+ * \param fid a igif to get inode generation from.
+ * \return inode generation for the igif.
+ */
+static inline __u32 lu_igif_gen(const struct lu_fid *fid)
+{
+ return fid_oid(fid);
+}
+
+/**
+ * Build igif from the inode number/generation.
+ */
+static inline void lu_igif_build(struct lu_fid *fid, __u32 ino, __u32 gen)
+{
+ fid->f_seq = ino;
+ fid->f_oid = gen;
+ fid->f_ver = 0;
+}
+
+/*
+ * Fids are transmitted across network (in the sender byte-ordering),
+ * and stored on disk in big-endian order.
+ */
+static inline void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src)
+{
+ /* check that all fields are converted */
+ CLASSERT(sizeof *src ==
+ sizeof fid_seq(src) +
+ sizeof fid_oid(src) + sizeof fid_ver(src));
+ dst->f_seq = cpu_to_le64(fid_seq(src));
+ dst->f_oid = cpu_to_le32(fid_oid(src));
+ dst->f_ver = cpu_to_le32(fid_ver(src));
+}
+
+static inline void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
+{
+ /* check that all fields are converted */
+ CLASSERT(sizeof *src ==
+ sizeof fid_seq(src) +
+ sizeof fid_oid(src) + sizeof fid_ver(src));
+ dst->f_seq = le64_to_cpu(fid_seq(src));
+ dst->f_oid = le32_to_cpu(fid_oid(src));
+ dst->f_ver = le32_to_cpu(fid_ver(src));
+}
+
+static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src)
+{
+ /* check that all fields are converted */
+ CLASSERT(sizeof *src ==
+ sizeof fid_seq(src) +
+ sizeof fid_oid(src) + sizeof fid_ver(src));
+ dst->f_seq = cpu_to_be64(fid_seq(src));
+ dst->f_oid = cpu_to_be32(fid_oid(src));
+ dst->f_ver = cpu_to_be32(fid_ver(src));
+}
+
+static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
+{
+ /* check that all fields are converted */
+ CLASSERT(sizeof *src ==
+ sizeof fid_seq(src) +
+ sizeof fid_oid(src) + sizeof fid_ver(src));
+ dst->f_seq = be64_to_cpu(fid_seq(src));
+ dst->f_oid = be32_to_cpu(fid_oid(src));
+ dst->f_ver = be32_to_cpu(fid_ver(src));
+}
+
+static inline int fid_is_sane(const struct lu_fid *fid)
+{
+ return fid != NULL &&
+ ((fid_seq(fid) >= FID_SEQ_START && fid_ver(fid) == 0) ||
+ fid_is_igif(fid) || fid_is_idif(fid) ||
+ fid_seq_is_rsvd(fid_seq(fid)));
+}
+
+static inline int fid_is_zero(const struct lu_fid *fid)
+{
+ return fid_seq(fid) == 0 && fid_oid(fid) == 0;
+}
+
+extern void lustre_swab_lu_fid(struct lu_fid *fid);
+extern void lustre_swab_lu_seq_range(struct lu_seq_range *range);
+
+static inline int lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
+{
+ /* Check that there is no alignment padding. */
+ CLASSERT(sizeof *f0 ==
+ sizeof f0->f_seq + sizeof f0->f_oid + sizeof f0->f_ver);
+ return memcmp(f0, f1, sizeof *f0) == 0;
+}
+
+#define __diff_normalize(val0, val1) \
+({ \
+ typeof(val0) __val0 = (val0); \
+ typeof(val1) __val1 = (val1); \
+ \
+ (__val0 == __val1 ? 0 : __val0 > __val1 ? +1 : -1); \
+})
+
+static inline int lu_fid_cmp(const struct lu_fid *f0,
+ const struct lu_fid *f1)
+{
+ return
+ __diff_normalize(fid_seq(f0), fid_seq(f1)) ?:
+ __diff_normalize(fid_oid(f0), fid_oid(f1)) ?:
+ __diff_normalize(fid_ver(f0), fid_ver(f1));
+}
+
+static inline void ostid_cpu_to_le(struct ost_id *src_oi,
+ struct ost_id *dst_oi)
+{
+ if (fid_seq_is_mdt0(ostid_seq(src_oi))) {
+ dst_oi->oi.oi_id = cpu_to_le64(src_oi->oi.oi_id);
+ dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq);
+ } else {
+ fid_cpu_to_le(&dst_oi->oi_fid, &src_oi->oi_fid);
+ }
+}
+
+static inline void ostid_le_to_cpu(struct ost_id *src_oi,
+ struct ost_id *dst_oi)
+{
+ if (fid_seq_is_mdt0(ostid_seq(src_oi))) {
+ dst_oi->oi.oi_id = le64_to_cpu(src_oi->oi.oi_id);
+ dst_oi->oi.oi_seq = le64_to_cpu(src_oi->oi.oi_seq);
+ } else {
+ fid_le_to_cpu(&dst_oi->oi_fid, &src_oi->oi_fid);
+ }
+}
+
+/** @} lu_fid */
+
+/** \defgroup lu_dir lu_dir
+ * @{ */
+
+/**
+ * Enumeration of possible directory entry attributes.
+ *
+ * Attributes follow directory entry header in the order they appear in this
+ * enumeration.
+ */
+enum lu_dirent_attrs {
+ LUDA_FID = 0x0001,
+ LUDA_TYPE = 0x0002,
+ LUDA_64BITHASH = 0x0004,
+
+ /* The following attrs are used for MDT interanl only,
+ * not visible to client */
+
+ /* Verify the dirent consistency */
+ LUDA_VERIFY = 0x8000,
+ /* Only check but not repair the dirent inconsistency */
+ LUDA_VERIFY_DRYRUN = 0x4000,
+ /* The dirent has been repaired, or to be repaired (dryrun). */
+ LUDA_REPAIR = 0x2000,
+ /* The system is upgraded, has beed or to be repaired (dryrun). */
+ LUDA_UPGRADE = 0x1000,
+ /* Ignore this record, go to next directly. */
+ LUDA_IGNORE = 0x0800,
+};
+
+#define LU_DIRENT_ATTRS_MASK 0xf800
+
+/**
+ * Layout of readdir pages, as transmitted on wire.
+ */
+struct lu_dirent {
+ /** valid if LUDA_FID is set. */
+ struct lu_fid lde_fid;
+ /** a unique entry identifier: a hash or an offset. */
+ __u64 lde_hash;
+ /** total record length, including all attributes. */
+ __u16 lde_reclen;
+ /** name length */
+ __u16 lde_namelen;
+ /** optional variable size attributes following this entry.
+ * taken from enum lu_dirent_attrs.
+ */
+ __u32 lde_attrs;
+ /** name is followed by the attributes indicated in ->ldp_attrs, in
+ * their natural order. After the last attribute, padding bytes are
+ * added to make ->lde_reclen a multiple of 8.
+ */
+ char lde_name[0];
+};
+
+/*
+ * Definitions of optional directory entry attributes formats.
+ *
+ * Individual attributes do not have their length encoded in a generic way. It
+ * is assumed that consumer of an attribute knows its format. This means that
+ * it is impossible to skip over an unknown attribute, except by skipping over all
+ * remaining attributes (by using ->lde_reclen), which is not too
+ * constraining, because new server versions will append new attributes at
+ * the end of an entry.
+ */
+
+/**
+ * Fid directory attribute: a fid of an object referenced by the entry. This
+ * will be almost always requested by the client and supplied by the server.
+ *
+ * Aligned to 8 bytes.
+ */
+/* To have compatibility with 1.8, lets have fid in lu_dirent struct. */
+
+/**
+ * File type.
+ *
+ * Aligned to 2 bytes.
+ */
+struct luda_type {
+ __u16 lt_type;
+};
+
+struct lu_dirpage {
+ __u64 ldp_hash_start;
+ __u64 ldp_hash_end;
+ __u32 ldp_flags;
+ __u32 ldp_pad0;
+ struct lu_dirent ldp_entries[0];
+};
+
+enum lu_dirpage_flags {
+ /**
+ * dirpage contains no entry.
+ */
+ LDF_EMPTY = 1 << 0,
+ /**
+ * last entry's lde_hash equals ldp_hash_end.
+ */
+ LDF_COLLIDE = 1 << 1
+};
+
+static inline struct lu_dirent *lu_dirent_start(struct lu_dirpage *dp)
+{
+ if (le32_to_cpu(dp->ldp_flags) & LDF_EMPTY)
+ return NULL;
+ else
+ return dp->ldp_entries;
+}
+
+static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent)
+{
+ struct lu_dirent *next;
+
+ if (le16_to_cpu(ent->lde_reclen) != 0)
+ next = ((void *)ent) + le16_to_cpu(ent->lde_reclen);
+ else
+ next = NULL;
+
+ return next;
+}
+
+static inline int lu_dirent_calc_size(int namelen, __u16 attr)
+{
+ int size;
+
+ if (attr & LUDA_TYPE) {
+ const unsigned align = sizeof(struct luda_type) - 1;
+ size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
+ size += sizeof(struct luda_type);
+ } else
+ size = sizeof(struct lu_dirent) + namelen;
+
+ return (size + 7) & ~7;
+}
+
+static inline int lu_dirent_size(struct lu_dirent *ent)
+{
+ if (le16_to_cpu(ent->lde_reclen) == 0) {
+ return lu_dirent_calc_size(le16_to_cpu(ent->lde_namelen),
+ le32_to_cpu(ent->lde_attrs));
+ }
+ return le16_to_cpu(ent->lde_reclen);
+}
+
+#define MDS_DIR_END_OFF 0xfffffffffffffffeULL
+
+/**
+ * MDS_READPAGE page size
+ *
+ * This is the directory page size packed in MDS_READPAGE RPC.
+ * It's different than PAGE_CACHE_SIZE because the client needs to
+ * access the struct lu_dirpage header packed at the beginning of
+ * the "page" and without this there isn't any way to know find the
+ * lu_dirpage header is if client and server PAGE_CACHE_SIZE differ.
+ */
+#define LU_PAGE_SHIFT 12
+#define LU_PAGE_SIZE (1UL << LU_PAGE_SHIFT)
+#define LU_PAGE_MASK (~(LU_PAGE_SIZE - 1))
+
+#define LU_PAGE_COUNT (1 << (PAGE_CACHE_SHIFT - LU_PAGE_SHIFT))
+
+/** @} lu_dir */
+
+struct lustre_handle {
+ __u64 cookie;
+};
+#define DEAD_HANDLE_MAGIC 0xdeadbeefcafebabeULL
+
+static inline int lustre_handle_is_used(struct lustre_handle *lh)
+{
+ return lh->cookie != 0ull;
+}
+
+static inline int lustre_handle_equal(const struct lustre_handle *lh1,
+ const struct lustre_handle *lh2)
+{
+ return lh1->cookie == lh2->cookie;
+}
+
+static inline void lustre_handle_copy(struct lustre_handle *tgt,
+ struct lustre_handle *src)
+{
+ tgt->cookie = src->cookie;
+}
+
+/* flags for lm_flags */
+#define MSGHDR_AT_SUPPORT 0x1
+#define MSGHDR_CKSUM_INCOMPAT18 0x2
+
+#define lustre_msg lustre_msg_v2
+/* we depend on this structure to be 8-byte aligned */
+/* this type is only endian-adjusted in lustre_unpack_msg() */
+struct lustre_msg_v2 {
+ __u32 lm_bufcount;
+ __u32 lm_secflvr;
+ __u32 lm_magic;
+ __u32 lm_repsize;
+ __u32 lm_cksum;
+ __u32 lm_flags;
+ __u32 lm_padding_2;
+ __u32 lm_padding_3;
+ __u32 lm_buflens[0];
+};
+
+/* without gss, ptlrpc_body is put at the first buffer. */
+#define PTLRPC_NUM_VERSIONS 4
+#define JOBSTATS_JOBID_SIZE 32 /* 32 bytes string */
+struct ptlrpc_body_v3 {
+ struct lustre_handle pb_handle;
+ __u32 pb_type;
+ __u32 pb_version;
+ __u32 pb_opc;
+ __u32 pb_status;
+ __u64 pb_last_xid;
+ __u64 pb_last_seen;
+ __u64 pb_last_committed;
+ __u64 pb_transno;
+ __u32 pb_flags;
+ __u32 pb_op_flags;
+ __u32 pb_conn_cnt;
+ __u32 pb_timeout; /* for req, the deadline, for rep, the service est */
+ __u32 pb_service_time; /* for rep, actual service time */
+ __u32 pb_limit;
+ __u64 pb_slv;
+ /* VBR: pre-versions */
+ __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
+ /* padding for future needs */
+ __u64 pb_padding[4];
+ char pb_jobid[JOBSTATS_JOBID_SIZE];
+};
+#define ptlrpc_body ptlrpc_body_v3
+
+struct ptlrpc_body_v2 {
+ struct lustre_handle pb_handle;
+ __u32 pb_type;
+ __u32 pb_version;
+ __u32 pb_opc;
+ __u32 pb_status;
+ __u64 pb_last_xid;
+ __u64 pb_last_seen;
+ __u64 pb_last_committed;
+ __u64 pb_transno;
+ __u32 pb_flags;
+ __u32 pb_op_flags;
+ __u32 pb_conn_cnt;
+ __u32 pb_timeout; /* for req, the deadline, for rep, the service est */
+ __u32 pb_service_time; /* for rep, actual service time, also used for
+ net_latency of req */
+ __u32 pb_limit;
+ __u64 pb_slv;
+ /* VBR: pre-versions */
+ __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
+ /* padding for future needs */
+ __u64 pb_padding[4];
+};
+
+extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
+
+/* message body offset for lustre_msg_v2 */
+/* ptlrpc body offset in all request/reply messages */
+#define MSG_PTLRPC_BODY_OFF 0
+
+/* normal request/reply message record offset */
+#define REQ_REC_OFF 1
+#define REPLY_REC_OFF 1
+
+/* ldlm request message body offset */
+#define DLM_LOCKREQ_OFF 1 /* lockreq offset */
+#define DLM_REQ_REC_OFF 2 /* normal dlm request record offset */
+
+/* ldlm intent lock message body offset */
+#define DLM_INTENT_IT_OFF 2 /* intent lock it offset */
+#define DLM_INTENT_REC_OFF 3 /* intent lock record offset */
+
+/* ldlm reply message body offset */
+#define DLM_LOCKREPLY_OFF 1 /* lockrep offset */
+#define DLM_REPLY_REC_OFF 2 /* reply record offset */
+
+/** only use in req->rq_{req,rep}_swab_mask */
+#define MSG_PTLRPC_HEADER_OFF 31
+
+/* Flags that are operation-specific go in the top 16 bits. */
+#define MSG_OP_FLAG_MASK 0xffff0000
+#define MSG_OP_FLAG_SHIFT 16
+
+/* Flags that apply to all requests are in the bottom 16 bits */
+#define MSG_GEN_FLAG_MASK 0x0000ffff
+#define MSG_LAST_REPLAY 0x0001
+#define MSG_RESENT 0x0002
+#define MSG_REPLAY 0x0004
+/* #define MSG_AT_SUPPORT 0x0008
+ * This was used in early prototypes of adaptive timeouts, and while there
+ * shouldn't be any users of that code there also isn't a need for using this
+ * bits. Defer usage until at least 1.10 to avoid potential conflict. */
+#define MSG_DELAY_REPLAY 0x0010
+#define MSG_VERSION_REPLAY 0x0020
+#define MSG_REQ_REPLAY_DONE 0x0040
+#define MSG_LOCK_REPLAY_DONE 0x0080
+
+/*
+ * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT)
+ */
+
+#define MSG_CONNECT_RECOVERING 0x00000001
+#define MSG_CONNECT_RECONNECT 0x00000002
+#define MSG_CONNECT_REPLAYABLE 0x00000004
+//#define MSG_CONNECT_PEER 0x8
+#define MSG_CONNECT_LIBCLIENT 0x00000010
+#define MSG_CONNECT_INITIAL 0x00000020
+#define MSG_CONNECT_ASYNC 0x00000040
+#define MSG_CONNECT_NEXT_VER 0x00000080 /* use next version of lustre_msg */
+#define MSG_CONNECT_TRANSNO 0x00000100 /* report transno */
+
+/* Connect flags */
+#define OBD_CONNECT_RDONLY 0x1ULL /*client has read-only access*/
+#define OBD_CONNECT_INDEX 0x2ULL /*connect specific LOV idx */
+#define OBD_CONNECT_MDS 0x4ULL /*connect from MDT to OST */
+#define OBD_CONNECT_GRANT 0x8ULL /*OSC gets grant at connect */
+#define OBD_CONNECT_SRVLOCK 0x10ULL /*server takes locks for cli */
+#define OBD_CONNECT_VERSION 0x20ULL /*Lustre versions in ocd */
+#define OBD_CONNECT_REQPORTAL 0x40ULL /*Separate non-IO req portal */
+#define OBD_CONNECT_ACL 0x80ULL /*access control lists */
+#define OBD_CONNECT_XATTR 0x100ULL /*client use extended attr */
+#define OBD_CONNECT_CROW 0x200ULL /*MDS+OST create obj on write*/
+#define OBD_CONNECT_TRUNCLOCK 0x400ULL /*locks on server for punch */
+#define OBD_CONNECT_TRANSNO 0x800ULL /*replay sends init transno */
+#define OBD_CONNECT_IBITS 0x1000ULL /*support for inodebits locks*/
+#define OBD_CONNECT_JOIN 0x2000ULL /*files can be concatenated.
+ *We do not support JOIN FILE
+ *anymore, reserve this flags
+ *just for preventing such bit
+ *to be reused.*/
+#define OBD_CONNECT_ATTRFID 0x4000ULL /*Server can GetAttr By Fid*/
+#define OBD_CONNECT_NODEVOH 0x8000ULL /*No open hndl on specl nodes*/
+#define OBD_CONNECT_RMT_CLIENT 0x10000ULL /*Remote client */
+#define OBD_CONNECT_RMT_CLIENT_FORCE 0x20000ULL /*Remote client by force */
+#define OBD_CONNECT_BRW_SIZE 0x40000ULL /*Max bytes per rpc */
+#define OBD_CONNECT_QUOTA64 0x80000ULL /*Not used since 2.4 */
+#define OBD_CONNECT_MDS_CAPA 0x100000ULL /*MDS capability */
+#define OBD_CONNECT_OSS_CAPA 0x200000ULL /*OSS capability */
+#define OBD_CONNECT_CANCELSET 0x400000ULL /*Early batched cancels. */
+#define OBD_CONNECT_SOM 0x800000ULL /*Size on MDS */
+#define OBD_CONNECT_AT 0x1000000ULL /*client uses AT */
+#define OBD_CONNECT_LRU_RESIZE 0x2000000ULL /*LRU resize feature. */
+#define OBD_CONNECT_MDS_MDS 0x4000000ULL /*MDS-MDS connection */
+#define OBD_CONNECT_REAL 0x8000000ULL /*real connection */
+#define OBD_CONNECT_CHANGE_QS 0x10000000ULL /*Not used since 2.4 */
+#define OBD_CONNECT_CKSUM 0x20000000ULL /*support several cksum algos*/
+#define OBD_CONNECT_FID 0x40000000ULL /*FID is supported by server */
+#define OBD_CONNECT_VBR 0x80000000ULL /*version based recovery */
+#define OBD_CONNECT_LOV_V3 0x100000000ULL /*client supports LOV v3 EA */
+#define OBD_CONNECT_GRANT_SHRINK 0x200000000ULL /* support grant shrink */
+#define OBD_CONNECT_SKIP_ORPHAN 0x400000000ULL /* don't reuse orphan objids */
+#define OBD_CONNECT_MAX_EASIZE 0x800000000ULL /* preserved for large EA */
+#define OBD_CONNECT_FULL20 0x1000000000ULL /* it is 2.0 client */
+#define OBD_CONNECT_LAYOUTLOCK 0x2000000000ULL /* client uses layout lock */
+#define OBD_CONNECT_64BITHASH 0x4000000000ULL /* client supports 64-bits
+ * directory hash */
+#define OBD_CONNECT_MAXBYTES 0x8000000000ULL /* max stripe size */
+#define OBD_CONNECT_IMP_RECOV 0x10000000000ULL /* imp recovery support */
+#define OBD_CONNECT_JOBSTATS 0x20000000000ULL /* jobid in ptlrpc_body */
+#define OBD_CONNECT_UMASK 0x40000000000ULL /* create uses client umask */
+#define OBD_CONNECT_EINPROGRESS 0x80000000000ULL /* client handles -EINPROGRESS
+ * RPC error properly */
+#define OBD_CONNECT_GRANT_PARAM 0x100000000000ULL/* extra grant params used for
+ * finer space reservation */
+#define OBD_CONNECT_FLOCK_OWNER 0x200000000000ULL /* for the fixed 1.8
+ * policy and 2.x server */
+#define OBD_CONNECT_LVB_TYPE 0x400000000000ULL /* variable type of LVB */
+#define OBD_CONNECT_NANOSEC_TIME 0x800000000000ULL /* nanosecond timestamps */
+#define OBD_CONNECT_LIGHTWEIGHT 0x1000000000000ULL/* lightweight connection */
+#define OBD_CONNECT_SHORTIO 0x2000000000000ULL/* short io */
+#define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */
+/* XXX README XXX:
+ * Please DO NOT add flag values here before first ensuring that this same
+ * flag value is not in use on some other branch. Please clear any such
+ * changes with senior engineers before starting to use a new flag. Then,
+ * submit a small patch against EVERY branch that ONLY adds the new flag,
+ * updates obd_connect_names[] for lprocfs_rd_connect_flags(), adds the
+ * flag to check_obd_connect_data(), and updates wiretests accordingly, so it
+ * can be approved and landed easily to reserve the flag for future use. */
+
+/* The MNE_SWAB flag is overloading the MDS_MDS bit only for the MGS
+ * connection. It is a temporary bug fix for Imperative Recovery interop
+ * between 2.2 and 2.3 x86/ppc nodes, and can be removed when interop for
+ * 2.2 clients/servers is no longer needed. LU-1252/LU-1644. */
+#define OBD_CONNECT_MNE_SWAB OBD_CONNECT_MDS_MDS
+
+#define OCD_HAS_FLAG(ocd, flg) \
+ (!!((ocd)->ocd_connect_flags & OBD_CONNECT_##flg))
+
+
+#define LRU_RESIZE_CONNECT_FLAG OBD_CONNECT_LRU_RESIZE
+
+#define MDT_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \
+ OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \
+ OBD_CONNECT_IBITS | \
+ OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID | \
+ OBD_CONNECT_CANCELSET | OBD_CONNECT_AT | \
+ OBD_CONNECT_RMT_CLIENT | \
+ OBD_CONNECT_RMT_CLIENT_FORCE | \
+ OBD_CONNECT_BRW_SIZE | OBD_CONNECT_MDS_CAPA | \
+ OBD_CONNECT_OSS_CAPA | OBD_CONNECT_MDS_MDS | \
+ OBD_CONNECT_FID | LRU_RESIZE_CONNECT_FLAG | \
+ OBD_CONNECT_VBR | OBD_CONNECT_LOV_V3 | \
+ OBD_CONNECT_SOM | OBD_CONNECT_FULL20 | \
+ OBD_CONNECT_64BITHASH | OBD_CONNECT_JOBSTATS | \
+ OBD_CONNECT_EINPROGRESS | \
+ OBD_CONNECT_LIGHTWEIGHT | OBD_CONNECT_UMASK | \
+ OBD_CONNECT_LVB_TYPE | OBD_CONNECT_LAYOUTLOCK |\
+ OBD_CONNECT_PINGLESS)
+#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
+ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
+ OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
+ OBD_CONNECT_BRW_SIZE | OBD_CONNECT_OSS_CAPA | \
+ OBD_CONNECT_CANCELSET | OBD_CONNECT_AT | \
+ LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_CKSUM | \
+ OBD_CONNECT_RMT_CLIENT | \
+ OBD_CONNECT_RMT_CLIENT_FORCE | OBD_CONNECT_VBR | \
+ OBD_CONNECT_MDS | OBD_CONNECT_SKIP_ORPHAN | \
+ OBD_CONNECT_GRANT_SHRINK | OBD_CONNECT_FULL20 | \
+ OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES | \
+ OBD_CONNECT_MAX_EASIZE | \
+ OBD_CONNECT_EINPROGRESS | \
+ OBD_CONNECT_JOBSTATS | \
+ OBD_CONNECT_LIGHTWEIGHT | OBD_CONNECT_LVB_TYPE|\
+ OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_FID | \
+ OBD_CONNECT_PINGLESS)
+#define ECHO_CONNECT_SUPPORTED (0)
+#define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \
+ OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV | \
+ OBD_CONNECT_MNE_SWAB | OBD_CONNECT_PINGLESS)
+
+/* Features required for this version of the client to work with server */
+#define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_IBITS | OBD_CONNECT_FID | \
+ OBD_CONNECT_FULL20)
+
+#define OBD_OCD_VERSION(major,minor,patch,fix) (((major)<<24) + ((minor)<<16) +\
+ ((patch)<<8) + (fix))
+#define OBD_OCD_VERSION_MAJOR(version) ((int)((version)>>24)&255)
+#define OBD_OCD_VERSION_MINOR(version) ((int)((version)>>16)&255)
+#define OBD_OCD_VERSION_PATCH(version) ((int)((version)>>8)&255)
+#define OBD_OCD_VERSION_FIX(version) ((int)(version)&255)
+
+/* This structure is used for both request and reply.
+ *
+ * If we eventually have separate connect data for different types, which we
+ * almost certainly will, then perhaps we stick a union in here. */
+struct obd_connect_data_v1 {
+ __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
+ __u32 ocd_version; /* lustre release version number */
+ __u32 ocd_grant; /* initial cache grant amount (bytes) */
+ __u32 ocd_index; /* LOV index to connect to */
+ __u32 ocd_brw_size; /* Maximum BRW size in bytes, must be 2^n */
+ __u64 ocd_ibits_known; /* inode bits this client understands */
+ __u8 ocd_blocksize; /* log2 of the backend filesystem blocksize */
+ __u8 ocd_inodespace; /* log2 of the per-inode space consumption */
+ __u16 ocd_grant_extent; /* per-extent grant overhead, in 1K blocks */
+ __u32 ocd_unused; /* also fix lustre_swab_connect */
+ __u64 ocd_transno; /* first transno from client to be replayed */
+ __u32 ocd_group; /* MDS group on OST */
+ __u32 ocd_cksum_types; /* supported checksum algorithms */
+ __u32 ocd_max_easize; /* How big LOV EA can be on MDS */
+ __u32 ocd_instance; /* also fix lustre_swab_connect */
+ __u64 ocd_maxbytes; /* Maximum stripe size in bytes */
+};
+
+struct obd_connect_data {
+ __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
+ __u32 ocd_version; /* lustre release version number */
+ __u32 ocd_grant; /* initial cache grant amount (bytes) */
+ __u32 ocd_index; /* LOV index to connect to */
+ __u32 ocd_brw_size; /* Maximum BRW size in bytes */
+ __u64 ocd_ibits_known; /* inode bits this client understands */
+ __u8 ocd_blocksize; /* log2 of the backend filesystem blocksize */
+ __u8 ocd_inodespace; /* log2 of the per-inode space consumption */
+ __u16 ocd_grant_extent; /* per-extent grant overhead, in 1K blocks */
+ __u32 ocd_unused; /* also fix lustre_swab_connect */
+ __u64 ocd_transno; /* first transno from client to be replayed */
+ __u32 ocd_group; /* MDS group on OST */
+ __u32 ocd_cksum_types; /* supported checksum algorithms */
+ __u32 ocd_max_easize; /* How big LOV EA can be on MDS */
+ __u32 ocd_instance; /* instance # of this target */
+ __u64 ocd_maxbytes; /* Maximum stripe size in bytes */
+ /* Fields after ocd_maxbytes are only accessible by the receiver
+ * if the corresponding flag in ocd_connect_flags is set. Accessing
+ * any field after ocd_maxbytes on the receiver without a valid flag
+ * may result in out-of-bound memory access and kernel oops. */
+ __u64 padding1; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 padding2; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 padding3; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 padding4; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 padding5; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 padding6; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 padding7; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 padding8; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 padding9; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 paddingA; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 paddingB; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 paddingC; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 paddingD; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 paddingE; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 paddingF; /* added 2.1.0. also fix lustre_swab_connect */
+};
+/* XXX README XXX:
+ * Please DO NOT use any fields here before first ensuring that this same
+ * field is not in use on some other branch. Please clear any such changes
+ * with senior engineers before starting to use a new field. Then, submit
+ * a small patch against EVERY branch that ONLY adds the new field along with
+ * the matching OBD_CONNECT flag, so that can be approved and landed easily to
+ * reserve the flag for future use. */
+
+
+extern void lustre_swab_connect(struct obd_connect_data *ocd);
+
+/*
+ * Supported checksum algorithms. Up to 32 checksum types are supported.
+ * (32-bit mask stored in obd_connect_data::ocd_cksum_types)
+ * Please update DECLARE_CKSUM_NAME/OBD_CKSUM_ALL in obd.h when adding a new
+ * algorithm and also the OBD_FL_CKSUM* flags.
+ */
+typedef enum {
+ OBD_CKSUM_CRC32 = 0x00000001,
+ OBD_CKSUM_ADLER = 0x00000002,
+ OBD_CKSUM_CRC32C= 0x00000004,
+} cksum_type_t;
+
+/*
+ * OST requests: OBDO & OBD request records
+ */
+
+/* opcodes */
+typedef enum {
+ OST_REPLY = 0, /* reply ? */
+ OST_GETATTR = 1,
+ OST_SETATTR = 2,
+ OST_READ = 3,
+ OST_WRITE = 4,
+ OST_CREATE = 5,
+ OST_DESTROY = 6,
+ OST_GET_INFO = 7,
+ OST_CONNECT = 8,
+ OST_DISCONNECT = 9,
+ OST_PUNCH = 10,
+ OST_OPEN = 11,
+ OST_CLOSE = 12,
+ OST_STATFS = 13,
+ OST_SYNC = 16,
+ OST_SET_INFO = 17,
+ OST_QUOTACHECK = 18,
+ OST_QUOTACTL = 19,
+ OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */
+ OST_LAST_OPC
+} ost_cmd_t;
+#define OST_FIRST_OPC OST_REPLY
+
+enum obdo_flags {
+ OBD_FL_INLINEDATA = 0x00000001,
+ OBD_FL_OBDMDEXISTS = 0x00000002,
+ OBD_FL_DELORPHAN = 0x00000004, /* if set in o_flags delete orphans */
+ OBD_FL_NORPC = 0x00000008, /* set in o_flags do in OSC not OST */
+ OBD_FL_IDONLY = 0x00000010, /* set in o_flags only adjust obj id*/
+ OBD_FL_RECREATE_OBJS= 0x00000020, /* recreate missing obj */
+ OBD_FL_DEBUG_CHECK = 0x00000040, /* echo client/server debug check */
+ OBD_FL_NO_USRQUOTA = 0x00000100, /* the object's owner is over quota */
+ OBD_FL_NO_GRPQUOTA = 0x00000200, /* the object's group is over quota */
+ OBD_FL_CREATE_CROW = 0x00000400, /* object should be create on write */
+ OBD_FL_SRVLOCK = 0x00000800, /* delegate DLM locking to server */
+ OBD_FL_CKSUM_CRC32 = 0x00001000, /* CRC32 checksum type */
+ OBD_FL_CKSUM_ADLER = 0x00002000, /* ADLER checksum type */
+ OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */
+ OBD_FL_CKSUM_RSVD2 = 0x00008000, /* for future cksum types */
+ OBD_FL_CKSUM_RSVD3 = 0x00010000, /* for future cksum types */
+ OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */
+ OBD_FL_MMAP = 0x00040000, /* object is mmapped on the client.
+ * XXX: obsoleted - reserved for old
+ * clients prior than 2.2 */
+ OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */
+ OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */
+
+ /* Note that while these checksum values are currently separate bits,
+ * in 2.x we can actually allow all values from 1-31 if we wanted. */
+ OBD_FL_CKSUM_ALL = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER |
+ OBD_FL_CKSUM_CRC32C,
+
+ /* mask for local-only flag, which won't be sent over network */
+ OBD_FL_LOCAL_MASK = 0xF0000000,
+};
+
+#define LOV_MAGIC_V1 0x0BD10BD0
+#define LOV_MAGIC LOV_MAGIC_V1
+#define LOV_MAGIC_JOIN_V1 0x0BD20BD0
+#define LOV_MAGIC_V3 0x0BD30BD0
+
+/*
+ * magic for fully defined striping
+ * the idea is that we should have different magics for striping "hints"
+ * (struct lov_user_md_v[13]) and defined ready-to-use striping (struct
+ * lov_mds_md_v[13]). at the moment the magics are used in wire protocol,
+ * we can't just change it w/o long way preparation, but we still need a
+ * mechanism to allow LOD to differentiate hint versus ready striping.
+ * so, at the moment we do a trick: MDT knows what to expect from request
+ * depending on the case (replay uses ready striping, non-replay req uses
+ * hints), so MDT replaces magic with appropriate one and now LOD can
+ * easily understand what's inside -bzzz
+ */
+#define LOV_MAGIC_V1_DEF 0x0CD10BD0
+#define LOV_MAGIC_V3_DEF 0x0CD30BD0
+
+#define LOV_PATTERN_RAID0 0x001 /* stripes are used round-robin */
+#define LOV_PATTERN_RAID1 0x002 /* stripes are mirrors of each other */
+#define LOV_PATTERN_FIRST 0x100 /* first stripe is not in round-robin */
+#define LOV_PATTERN_CMOBD 0x200
+
+#define lov_ost_data lov_ost_data_v1
+struct lov_ost_data_v1 { /* per-stripe data structure (little-endian)*/
+ struct ost_id l_ost_oi; /* OST object ID */
+ __u32 l_ost_gen; /* generation of this l_ost_idx */
+ __u32 l_ost_idx; /* OST index in LOV (lov_tgt_desc->tgts) */
+};
+
+#define lov_mds_md lov_mds_md_v1
+struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */
+ __u32 lmm_magic; /* magic number = LOV_MAGIC_V1 */
+ __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
+ struct ost_id lmm_oi; /* LOV object ID */
+ __u32 lmm_stripe_size; /* size of stripe in bytes */
+ /* lmm_stripe_count used to be __u32 */
+ __u16 lmm_stripe_count; /* num stripes in use for this object */
+ __u16 lmm_layout_gen; /* layout generation number */
+ struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
+};
+
+/**
+ * Sigh, because pre-2.4 uses
+ * struct lov_mds_md_v1 {
+ * ........
+ * __u64 lmm_object_id;
+ * __u64 lmm_object_seq;
+ * ......
+ * }
+ * to identify the LOV(MDT) object, and lmm_object_seq will
+ * be normal_fid, which make it hard to combine these conversion
+ * to ostid_to FID. so we will do lmm_oi/fid conversion separately
+ *
+ * We can tell the lmm_oi by this way,
+ * 1.8: lmm_object_id = {inode}, lmm_object_gr = 0
+ * 2.1: lmm_object_id = {oid < 128k}, lmm_object_seq = FID_SEQ_NORMAL
+ * 2.4: lmm_oi.f_seq = FID_SEQ_NORMAL, lmm_oi.f_oid = {oid < 128k},
+ * lmm_oi.f_ver = 0
+ *
+ * But currently lmm_oi/lsm_oi does not have any "real" usages,
+ * except for printing some information, and the user can always
+ * get the real FID from LMA, besides this multiple case check might
+ * make swab more complicate. So we will keep using id/seq for lmm_oi.
+ */
+
+static inline void fid_to_lmm_oi(const struct lu_fid *fid,
+ struct ost_id *oi)
+{
+ oi->oi.oi_id = fid_oid(fid);
+ oi->oi.oi_seq = fid_seq(fid);
+}
+
+static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq)
+{
+ oi->oi.oi_seq = seq;
+}
+
+static inline __u64 lmm_oi_id(struct ost_id *oi)
+{
+ return oi->oi.oi_id;
+}
+
+static inline __u64 lmm_oi_seq(struct ost_id *oi)
+{
+ return oi->oi.oi_seq;
+}
+
+static inline void lmm_oi_le_to_cpu(struct ost_id *dst_oi,
+ struct ost_id *src_oi)
+{
+ dst_oi->oi.oi_id = le64_to_cpu(src_oi->oi.oi_id);
+ dst_oi->oi.oi_seq = le64_to_cpu(src_oi->oi.oi_seq);
+}
+
+static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
+ struct ost_id *src_oi)
+{
+ dst_oi->oi.oi_id = cpu_to_le64(src_oi->oi.oi_id);
+ dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq);
+}
+
+/* extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm); */
+
+#define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
+#define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data))
+
+#define XATTR_NAME_ACL_ACCESS "system.posix_acl_access"
+#define XATTR_NAME_ACL_DEFAULT "system.posix_acl_default"
+#define XATTR_USER_PREFIX "user."
+#define XATTR_TRUSTED_PREFIX "trusted."
+#define XATTR_SECURITY_PREFIX "security."
+#define XATTR_LUSTRE_PREFIX "lustre."
+
+#define XATTR_NAME_LOV "trusted.lov"
+#define XATTR_NAME_LMA "trusted.lma"
+#define XATTR_NAME_LMV "trusted.lmv"
+#define XATTR_NAME_LINK "trusted.link"
+#define XATTR_NAME_FID "trusted.fid"
+#define XATTR_NAME_VERSION "trusted.version"
+#define XATTR_NAME_SOM "trusted.som"
+#define XATTR_NAME_HSM "trusted.hsm"
+#define XATTR_NAME_LFSCK_NAMESPACE "trusted.lfsck_namespace"
+
+struct lov_mds_md_v3 { /* LOV EA mds/wire data (little-endian) */
+ __u32 lmm_magic; /* magic number = LOV_MAGIC_V3 */
+ __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
+ struct ost_id lmm_oi; /* LOV object ID */
+ __u32 lmm_stripe_size; /* size of stripe in bytes */
+ /* lmm_stripe_count used to be __u32 */
+ __u16 lmm_stripe_count; /* num stripes in use for this object */
+ __u16 lmm_layout_gen; /* layout generation number */
+ char lmm_pool_name[LOV_MAXPOOLNAME]; /* must be 32bit aligned */
+ struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
+};
+
+#define OBD_MD_FLID (0x00000001ULL) /* object ID */
+#define OBD_MD_FLATIME (0x00000002ULL) /* access time */
+#define OBD_MD_FLMTIME (0x00000004ULL) /* data modification time */
+#define OBD_MD_FLCTIME (0x00000008ULL) /* change time */
+#define OBD_MD_FLSIZE (0x00000010ULL) /* size */
+#define OBD_MD_FLBLOCKS (0x00000020ULL) /* allocated blocks count */
+#define OBD_MD_FLBLKSZ (0x00000040ULL) /* block size */
+#define OBD_MD_FLMODE (0x00000080ULL) /* access bits (mode & ~S_IFMT) */
+#define OBD_MD_FLTYPE (0x00000100ULL) /* object type (mode & S_IFMT) */
+#define OBD_MD_FLUID (0x00000200ULL) /* user ID */
+#define OBD_MD_FLGID (0x00000400ULL) /* group ID */
+#define OBD_MD_FLFLAGS (0x00000800ULL) /* flags word */
+#define OBD_MD_FLNLINK (0x00002000ULL) /* link count */
+#define OBD_MD_FLGENER (0x00004000ULL) /* generation number */
+/*#define OBD_MD_FLINLINE (0x00008000ULL) inline data. used until 1.6.5 */
+#define OBD_MD_FLRDEV (0x00010000ULL) /* device number */
+#define OBD_MD_FLEASIZE (0x00020000ULL) /* extended attribute data */
+#define OBD_MD_LINKNAME (0x00040000ULL) /* symbolic link target */
+#define OBD_MD_FLHANDLE (0x00080000ULL) /* file/lock handle */
+#define OBD_MD_FLCKSUM (0x00100000ULL) /* bulk data checksum */
+#define OBD_MD_FLQOS (0x00200000ULL) /* quality of service stats */
+/*#define OBD_MD_FLOSCOPQ (0x00400000ULL) osc opaque data, never used */
+#define OBD_MD_FLCOOKIE (0x00800000ULL) /* log cancellation cookie */
+#define OBD_MD_FLGROUP (0x01000000ULL) /* group */
+#define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */
+#define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write with ioepoch */
+ /* ->mds if epoch opens or closes */
+#define OBD_MD_FLGRANT (0x08000000ULL) /* ost preallocation space grant */
+#define OBD_MD_FLDIREA (0x10000000ULL) /* dir's extended attribute data */
+#define OBD_MD_FLUSRQUOTA (0x20000000ULL) /* over quota flags sent from ost */
+#define OBD_MD_FLGRPQUOTA (0x40000000ULL) /* over quota flags sent from ost */
+#define OBD_MD_FLMODEASIZE (0x80000000ULL) /* EA size will be changed */
+
+#define OBD_MD_MDS (0x0000000100000000ULL) /* where an inode lives on */
+#define OBD_MD_REINT (0x0000000200000000ULL) /* reintegrate oa */
+#define OBD_MD_MEA (0x0000000400000000ULL) /* CMD split EA */
+
+/* OBD_MD_MDTIDX is used to get MDT index, but it is never been used overwire,
+ * and it is already obsolete since 2.3 */
+/* #define OBD_MD_MDTIDX (0x0000000800000000ULL) */
+
+#define OBD_MD_FLXATTR (0x0000001000000000ULL) /* xattr */
+#define OBD_MD_FLXATTRLS (0x0000002000000000ULL) /* xattr list */
+#define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */
+#define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */
+#define OBD_MD_FLRMTPERM (0x0000010000000000ULL) /* remote permission */
+#define OBD_MD_FLMDSCAPA (0x0000020000000000ULL) /* MDS capability */
+#define OBD_MD_FLOSSCAPA (0x0000040000000000ULL) /* OSS capability */
+#define OBD_MD_FLCKSPLIT (0x0000080000000000ULL) /* Check split on server */
+#define OBD_MD_FLCROSSREF (0x0000100000000000ULL) /* Cross-ref case */
+#define OBD_MD_FLGETATTRLOCK (0x0000200000000000ULL) /* Get IOEpoch attributes
+ * under lock */
+#define OBD_MD_FLOBJCOUNT (0x0000400000000000ULL) /* for multiple destroy */
+
+#define OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) /* lfs lsetfacl case */
+#define OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) /* lfs lgetfacl case */
+#define OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) /* lfs rsetfacl case */
+#define OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) /* lfs rgetfacl case */
+
+#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
+
+#define OBD_MD_FLGETATTR (OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | \
+ OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLKSZ | \
+ OBD_MD_FLMODE | OBD_MD_FLTYPE | OBD_MD_FLUID | \
+ OBD_MD_FLGID | OBD_MD_FLFLAGS | OBD_MD_FLNLINK | \
+ OBD_MD_FLGENER | OBD_MD_FLRDEV | OBD_MD_FLGROUP)
+
+/* don't forget obdo_fid which is way down at the bottom so it can
+ * come after the definition of llog_cookie */
+
+enum hss_valid {
+ HSS_SETMASK = 0x01,
+ HSS_CLEARMASK = 0x02,
+ HSS_ARCHIVE_ID = 0x04,
+};
+
+struct hsm_state_set {
+ __u32 hss_valid;
+ __u32 hss_archive_id;
+ __u64 hss_setmask;
+ __u64 hss_clearmask;
+};
+
+extern void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+extern void lustre_swab_hsm_state_set(struct hsm_state_set *hss);
+
+extern void lustre_swab_obd_statfs (struct obd_statfs *os);
+
+/* ost_body.data values for OST_BRW */
+
+#define OBD_BRW_READ 0x01
+#define OBD_BRW_WRITE 0x02
+#define OBD_BRW_RWMASK (OBD_BRW_READ | OBD_BRW_WRITE)
+#define OBD_BRW_SYNC 0x08 /* this page is a part of synchronous
+ * transfer and is not accounted in
+ * the grant. */
+#define OBD_BRW_CHECK 0x10
+#define OBD_BRW_FROM_GRANT 0x20 /* the osc manages this under llite */
+#define OBD_BRW_GRANTED 0x40 /* the ost manages this */
+#define OBD_BRW_NOCACHE 0x80 /* this page is a part of non-cached IO */
+#define OBD_BRW_NOQUOTA 0x100
+#define OBD_BRW_SRVLOCK 0x200 /* Client holds no lock over this page */
+#define OBD_BRW_ASYNC 0x400 /* Server may delay commit to disk */
+#define OBD_BRW_MEMALLOC 0x800 /* Client runs in the "kswapd" context */
+#define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */
+#define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */
+
+#define OBD_OBJECT_EOF 0xffffffffffffffffULL
+
+#define OST_MIN_PRECREATE 32
+#define OST_MAX_PRECREATE 20000
+
+struct obd_ioobj {
+ struct ost_id ioo_oid; /* object ID, if multi-obj BRW */
+ __u32 ioo_max_brw; /* low 16 bits were o_mode before 2.4,
+ * now (PTLRPC_BULK_OPS_COUNT - 1) in
+ * high 16 bits in 2.4 and later */
+ __u32 ioo_bufcnt; /* number of niobufs for this object */
+};
+
+#define IOOBJ_MAX_BRW_BITS 16
+#define IOOBJ_TYPE_MASK ((1U << IOOBJ_MAX_BRW_BITS) - 1)
+#define ioobj_max_brw_get(ioo) (((ioo)->ioo_max_brw >> IOOBJ_MAX_BRW_BITS) + 1)
+#define ioobj_max_brw_set(ioo, num) \
+do { (ioo)->ioo_max_brw = ((num) - 1) << IOOBJ_MAX_BRW_BITS; } while (0)
+
+extern void lustre_swab_obd_ioobj (struct obd_ioobj *ioo);
+
+/* multiple of 8 bytes => can array */
+struct niobuf_remote {
+ __u64 offset;
+ __u32 len;
+ __u32 flags;
+};
+
+extern void lustre_swab_niobuf_remote (struct niobuf_remote *nbr);
+
+/* lock value block communicated between the filter and llite */
+
+/* OST_LVB_ERR_INIT is needed because the return code in rc is
+ * negative, i.e. because ((MASK + rc) & MASK) != MASK. */
+#define OST_LVB_ERR_INIT 0xffbadbad80000000ULL
+#define OST_LVB_ERR_MASK 0xffbadbad00000000ULL
+#define OST_LVB_IS_ERR(blocks) \
+ ((blocks & OST_LVB_ERR_MASK) == OST_LVB_ERR_MASK)
+#define OST_LVB_SET_ERR(blocks, rc) \
+ do { blocks = OST_LVB_ERR_INIT + rc; } while (0)
+#define OST_LVB_GET_ERR(blocks) (int)(blocks - OST_LVB_ERR_INIT)
+
+struct ost_lvb_v1 {
+ __u64 lvb_size;
+ obd_time lvb_mtime;
+ obd_time lvb_atime;
+ obd_time lvb_ctime;
+ __u64 lvb_blocks;
+};
+
+extern void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb);
+
+struct ost_lvb {
+ __u64 lvb_size;
+ obd_time lvb_mtime;
+ obd_time lvb_atime;
+ obd_time lvb_ctime;
+ __u64 lvb_blocks;
+ __u32 lvb_mtime_ns;
+ __u32 lvb_atime_ns;
+ __u32 lvb_ctime_ns;
+ __u32 lvb_padding;
+};
+
+extern void lustre_swab_ost_lvb(struct ost_lvb *lvb);
+
+/*
+ * lquota data structures
+ */
+
+#ifndef QUOTABLOCK_BITS
+#define QUOTABLOCK_BITS 10
+#endif
+
+#ifndef QUOTABLOCK_SIZE
+#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
+#endif
+
+#ifndef toqb
+#define toqb(x) (((x) + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS)
+#endif
+
+/* The lquota_id structure is an union of all the possible identifier types that
+ * can be used with quota, this includes:
+ * - 64-bit user ID
+ * - 64-bit group ID
+ * - a FID which can be used for per-directory quota in the future */
+union lquota_id {
+ struct lu_fid qid_fid; /* FID for per-directory quota */
+ __u64 qid_uid; /* user identifier */
+ __u64 qid_gid; /* group identifier */
+};
+
+/* quotactl management */
+struct obd_quotactl {
+ __u32 qc_cmd;
+ __u32 qc_type; /* see Q_* flag below */
+ __u32 qc_id;
+ __u32 qc_stat;
+ struct obd_dqinfo qc_dqinfo;
+ struct obd_dqblk qc_dqblk;
+};
+
+extern void lustre_swab_obd_quotactl(struct obd_quotactl *q);
+
+#define Q_QUOTACHECK 0x800100 /* deprecated as of 2.4 */
+#define Q_INITQUOTA 0x800101 /* deprecated as of 2.4 */
+#define Q_GETOINFO 0x800102 /* get obd quota info */
+#define Q_GETOQUOTA 0x800103 /* get obd quotas */
+#define Q_FINVALIDATE 0x800104 /* deprecated as of 2.4 */
+
+#define Q_COPY(out, in, member) (out)->member = (in)->member
+
+#define QCTL_COPY(out, in) \
+do { \
+ Q_COPY(out, in, qc_cmd); \
+ Q_COPY(out, in, qc_type); \
+ Q_COPY(out, in, qc_id); \
+ Q_COPY(out, in, qc_stat); \
+ Q_COPY(out, in, qc_dqinfo); \
+ Q_COPY(out, in, qc_dqblk); \
+} while (0)
+
+/* Body of quota request used for quota acquire/release RPCs between quota
+ * master (aka QMT) and slaves (ak QSD). */
+struct quota_body {
+ struct lu_fid qb_fid; /* FID of global index packing the pool ID
+ * and type (data or metadata) as well as
+ * the quota type (user or group). */
+ union lquota_id qb_id; /* uid or gid or directory FID */
+ __u32 qb_flags; /* see below */
+ __u32 qb_padding;
+ __u64 qb_count; /* acquire/release count (kbytes/inodes) */
+ __u64 qb_usage; /* current slave usage (kbytes/inodes) */
+ __u64 qb_slv_ver; /* slave index file version */
+ struct lustre_handle qb_lockh; /* per-ID lock handle */
+ struct lustre_handle qb_glb_lockh; /* global lock handle */
+ __u64 qb_padding1[4];
+};
+
+/* When the quota_body is used in the reply of quota global intent
+ * lock (IT_QUOTA_CONN) reply, qb_fid contains slave index file FID. */
+#define qb_slv_fid qb_fid
+/* qb_usage is the current qunit (in kbytes/inodes) when quota_body is used in
+ * quota reply */
+#define qb_qunit qb_usage
+
+#define QUOTA_DQACQ_FL_ACQ 0x1 /* acquire quota */
+#define QUOTA_DQACQ_FL_PREACQ 0x2 /* pre-acquire */
+#define QUOTA_DQACQ_FL_REL 0x4 /* release quota */
+#define QUOTA_DQACQ_FL_REPORT 0x8 /* report usage */
+
+extern void lustre_swab_quota_body(struct quota_body *b);
+
+/* Quota types currently supported */
+enum {
+ LQUOTA_TYPE_USR = 0x00, /* maps to USRQUOTA */
+ LQUOTA_TYPE_GRP = 0x01, /* maps to GRPQUOTA */
+ LQUOTA_TYPE_MAX
+};
+
+/* There are 2 different resource types on which a quota limit can be enforced:
+ * - inodes on the MDTs
+ * - blocks on the OSTs */
+enum {
+ LQUOTA_RES_MD = 0x01, /* skip 0 to avoid null oid in FID */
+ LQUOTA_RES_DT = 0x02,
+ LQUOTA_LAST_RES,
+ LQUOTA_FIRST_RES = LQUOTA_RES_MD
+};
+#define LQUOTA_NR_RES (LQUOTA_LAST_RES - LQUOTA_FIRST_RES + 1)
+
+/*
+ * Space accounting support
+ * Format of an accounting record, providing disk usage information for a given
+ * user or group
+ */
+struct lquota_acct_rec { /* 16 bytes */
+ __u64 bspace; /* current space in use */
+ __u64 ispace; /* current # inodes in use */
+};
+
+/*
+ * Global quota index support
+ * Format of a global record, providing global quota settings for a given quota
+ * identifier
+ */
+struct lquota_glb_rec { /* 32 bytes */
+ __u64 qbr_hardlimit; /* quota hard limit, in #inodes or kbytes */
+ __u64 qbr_softlimit; /* quota soft limit, in #inodes or kbytes */
+ __u64 qbr_time; /* grace time, in seconds */
+ __u64 qbr_granted; /* how much is granted to slaves, in #inodes or
+ * kbytes */
+};
+
+/*
+ * Slave index support
+ * Format of a slave record, recording how much space is granted to a given
+ * slave
+ */
+struct lquota_slv_rec { /* 8 bytes */
+ __u64 qsr_granted; /* space granted to the slave for the key=ID,
+ * in #inodes or kbytes */
+};
+
+/* Data structures associated with the quota locks */
+
+/* Glimpse descriptor used for the index & per-ID quota locks */
+struct ldlm_gl_lquota_desc {
+ union lquota_id gl_id; /* quota ID subject to the glimpse */
+ __u64 gl_flags; /* see LQUOTA_FL* below */
+ __u64 gl_ver; /* new index version */
+ __u64 gl_hardlimit; /* new hardlimit or qunit value */
+ __u64 gl_softlimit; /* new softlimit */
+ __u64 gl_time;
+ __u64 gl_pad2;
+};
+#define gl_qunit gl_hardlimit /* current qunit value used when
+ * glimpsing per-ID quota locks */
+
+/* quota glimpse flags */
+#define LQUOTA_FL_EDQUOT 0x1 /* user/group out of quota space on QMT */
+
+/* LVB used with quota (global and per-ID) locks */
+struct lquota_lvb {
+ __u64 lvb_flags; /* see LQUOTA_FL* above */
+ __u64 lvb_id_may_rel; /* space that might be released later */
+ __u64 lvb_id_rel; /* space released by the slave for this ID */
+ __u64 lvb_id_qunit; /* current qunit value */
+ __u64 lvb_pad1;
+};
+
+extern void lustre_swab_lquota_lvb(struct lquota_lvb *lvb);
+
+/* LVB used with global quota lock */
+#define lvb_glb_ver lvb_id_may_rel /* current version of the global index */
+
+/* op codes */
+typedef enum {
+ QUOTA_DQACQ = 601,
+ QUOTA_DQREL = 602,
+ QUOTA_LAST_OPC
+} quota_cmd_t;
+#define QUOTA_FIRST_OPC QUOTA_DQACQ
+
+/*
+ * MDS REQ RECORDS
+ */
+
+/* opcodes */
+typedef enum {
+ MDS_GETATTR = 33,
+ MDS_GETATTR_NAME = 34,
+ MDS_CLOSE = 35,
+ MDS_REINT = 36,
+ MDS_READPAGE = 37,
+ MDS_CONNECT = 38,
+ MDS_DISCONNECT = 39,
+ MDS_GETSTATUS = 40,
+ MDS_STATFS = 41,
+ MDS_PIN = 42,
+ MDS_UNPIN = 43,
+ MDS_SYNC = 44,
+ MDS_DONE_WRITING = 45,
+ MDS_SET_INFO = 46,
+ MDS_QUOTACHECK = 47,
+ MDS_QUOTACTL = 48,
+ MDS_GETXATTR = 49,
+ MDS_SETXATTR = 50, /* obsolete, now it's MDS_REINT op */
+ MDS_WRITEPAGE = 51,
+ MDS_IS_SUBDIR = 52,
+ MDS_GET_INFO = 53,
+ MDS_HSM_STATE_GET = 54,
+ MDS_HSM_STATE_SET = 55,
+ MDS_HSM_ACTION = 56,
+ MDS_HSM_PROGRESS = 57,
+ MDS_HSM_REQUEST = 58,
+ MDS_HSM_CT_REGISTER = 59,
+ MDS_HSM_CT_UNREGISTER = 60,
+ MDS_SWAP_LAYOUTS = 61,
+ MDS_LAST_OPC
+} mds_cmd_t;
+
+#define MDS_FIRST_OPC MDS_GETATTR
+
+
+/* opcodes for object update */
+typedef enum {
+ UPDATE_OBJ = 1000,
+ UPDATE_LAST_OPC
+} update_cmd_t;
+
+#define UPDATE_FIRST_OPC UPDATE_OBJ
+
+/*
+ * Do not exceed 63
+ */
+
+typedef enum {
+ REINT_SETATTR = 1,
+ REINT_CREATE = 2,
+ REINT_LINK = 3,
+ REINT_UNLINK = 4,
+ REINT_RENAME = 5,
+ REINT_OPEN = 6,
+ REINT_SETXATTR = 7,
+ REINT_RMENTRY = 8,
+// REINT_WRITE = 9,
+ REINT_MAX
+} mds_reint_t, mdt_reint_t;
+
+extern void lustre_swab_generic_32s (__u32 *val);
+
+/* the disposition of the intent outlines what was executed */
+#define DISP_IT_EXECD 0x00000001
+#define DISP_LOOKUP_EXECD 0x00000002
+#define DISP_LOOKUP_NEG 0x00000004
+#define DISP_LOOKUP_POS 0x00000008
+#define DISP_OPEN_CREATE 0x00000010
+#define DISP_OPEN_OPEN 0x00000020
+#define DISP_ENQ_COMPLETE 0x00400000
+#define DISP_ENQ_OPEN_REF 0x00800000
+#define DISP_ENQ_CREATE_REF 0x01000000
+#define DISP_OPEN_LOCK 0x02000000
+
+/* INODE LOCK PARTS */
+#define MDS_INODELOCK_LOOKUP 0x000001 /* dentry, mode, owner, group */
+#define MDS_INODELOCK_UPDATE 0x000002 /* size, links, timestamps */
+#define MDS_INODELOCK_OPEN 0x000004 /* For opened files */
+#define MDS_INODELOCK_LAYOUT 0x000008 /* for layout */
+#define MDS_INODELOCK_PERM 0x000010 /* for permission */
+
+#define MDS_INODELOCK_MAXSHIFT 4
+/* This FULL lock is useful to take on unlink sort of operations */
+#define MDS_INODELOCK_FULL ((1<<(MDS_INODELOCK_MAXSHIFT+1))-1)
+
+extern void lustre_swab_ll_fid (struct ll_fid *fid);
+
+/* NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2],
+ * but was moved into name[1] along with the OID to avoid consuming the
+ * name[2,3] fields that need to be used for the quota id (also a FID). */
+enum {
+ LUSTRE_RES_ID_SEQ_OFF = 0,
+ LUSTRE_RES_ID_VER_OID_OFF = 1,
+ LUSTRE_RES_ID_WAS_VER_OFF = 2, /* see note above */
+ LUSTRE_RES_ID_QUOTA_SEQ_OFF = 2,
+ LUSTRE_RES_ID_QUOTA_VER_OID_OFF = 3,
+ LUSTRE_RES_ID_HSH_OFF = 3
+};
+
+#define MDS_STATUS_CONN 1
+#define MDS_STATUS_LOV 2
+
+/* mdt_thread_info.mti_flags. */
+enum md_op_flags {
+ /* The flag indicates Size-on-MDS attributes are changed. */
+ MF_SOM_CHANGE = (1 << 0),
+ /* Flags indicates an epoch opens or closes. */
+ MF_EPOCH_OPEN = (1 << 1),
+ MF_EPOCH_CLOSE = (1 << 2),
+ MF_MDC_CANCEL_FID1 = (1 << 3),
+ MF_MDC_CANCEL_FID2 = (1 << 4),
+ MF_MDC_CANCEL_FID3 = (1 << 5),
+ MF_MDC_CANCEL_FID4 = (1 << 6),
+ /* There is a pending attribute update. */
+ MF_SOM_AU = (1 << 7),
+ /* Cancel OST locks while getattr OST attributes. */
+ MF_GETATTR_LOCK = (1 << 8),
+ MF_GET_MDT_IDX = (1 << 9),
+};
+
+#define MF_SOM_LOCAL_FLAGS (MF_SOM_CHANGE | MF_EPOCH_OPEN | MF_EPOCH_CLOSE)
+
+#define LUSTRE_BFLAG_UNCOMMITTED_WRITES 0x1
+
+/* these should be identical to their EXT4_*_FL counterparts, they are
+ * redefined here only to avoid dragging in fs/ext4/ext4.h */
+#define LUSTRE_SYNC_FL 0x00000008 /* Synchronous updates */
+#define LUSTRE_IMMUTABLE_FL 0x00000010 /* Immutable file */
+#define LUSTRE_APPEND_FL 0x00000020 /* writes to file may only append */
+#define LUSTRE_NOATIME_FL 0x00000080 /* do not update atime */
+#define LUSTRE_DIRSYNC_FL 0x00010000 /* dirsync behaviour (dir only) */
+
+/* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values
+ * for the client inode i_flags. The LUSTRE_*_FL are the Lustre wire
+ * protocol equivalents of LDISKFS_*_FL values stored on disk, while
+ * the S_* flags are kernel-internal values that change between kernel
+ * versions. These flags are set/cleared via FSFILT_IOC_{GET,SET}_FLAGS.
+ * See b=16526 for a full history. */
+static inline int ll_ext_to_inode_flags(int flags)
+{
+ return (((flags & LUSTRE_SYNC_FL) ? S_SYNC : 0) |
+ ((flags & LUSTRE_NOATIME_FL) ? S_NOATIME : 0) |
+ ((flags & LUSTRE_APPEND_FL) ? S_APPEND : 0) |
+#if defined(S_DIRSYNC)
+ ((flags & LUSTRE_DIRSYNC_FL) ? S_DIRSYNC : 0) |
+#endif
+ ((flags & LUSTRE_IMMUTABLE_FL) ? S_IMMUTABLE : 0));
+}
+
+static inline int ll_inode_to_ext_flags(int iflags)
+{
+ return (((iflags & S_SYNC) ? LUSTRE_SYNC_FL : 0) |
+ ((iflags & S_NOATIME) ? LUSTRE_NOATIME_FL : 0) |
+ ((iflags & S_APPEND) ? LUSTRE_APPEND_FL : 0) |
+#if defined(S_DIRSYNC)
+ ((iflags & S_DIRSYNC) ? LUSTRE_DIRSYNC_FL : 0) |
+#endif
+ ((iflags & S_IMMUTABLE) ? LUSTRE_IMMUTABLE_FL : 0));
+}
+
+struct mdt_body {
+ struct lu_fid fid1;
+ struct lu_fid fid2;
+ struct lustre_handle handle;
+ __u64 valid;
+ __u64 size; /* Offset, in the case of MDS_READPAGE */
+ obd_time mtime;
+ obd_time atime;
+ obd_time ctime;
+ __u64 blocks; /* XID, in the case of MDS_READPAGE */
+ __u64 ioepoch;
+ __u64 unused1; /* was "ino" until 2.4.0 */
+ __u32 fsuid;
+ __u32 fsgid;
+ __u32 capability;
+ __u32 mode;
+ __u32 uid;
+ __u32 gid;
+ __u32 flags; /* from vfs for pin/unpin, LUSTRE_BFLAG close */
+ __u32 rdev;
+ __u32 nlink; /* #bytes to read in the case of MDS_READPAGE */
+ __u32 unused2; /* was "generation" until 2.4.0 */
+ __u32 suppgid;
+ __u32 eadatasize;
+ __u32 aclsize;
+ __u32 max_mdsize;
+ __u32 max_cookiesize;
+ __u32 uid_h; /* high 32-bits of uid, for FUID */
+ __u32 gid_h; /* high 32-bits of gid, for FUID */
+ __u32 padding_5; /* also fix lustre_swab_mdt_body */
+ __u64 padding_6;
+ __u64 padding_7;
+ __u64 padding_8;
+ __u64 padding_9;
+ __u64 padding_10;
+}; /* 216 */
+
+extern void lustre_swab_mdt_body (struct mdt_body *b);
+
+struct mdt_ioepoch {
+ struct lustre_handle handle;
+ __u64 ioepoch;
+ __u32 flags;
+ __u32 padding;
+};
+
+extern void lustre_swab_mdt_ioepoch (struct mdt_ioepoch *b);
+
+/* permissions for md_perm.mp_perm */
+enum {
+ CFS_SETUID_PERM = 0x01,
+ CFS_SETGID_PERM = 0x02,
+ CFS_SETGRP_PERM = 0x04,
+ CFS_RMTACL_PERM = 0x08,
+ CFS_RMTOWN_PERM = 0x10
+};
+
+/* inode access permission for remote user, the inode info are omitted,
+ * for client knows them. */
+struct mdt_remote_perm {
+ __u32 rp_uid;
+ __u32 rp_gid;
+ __u32 rp_fsuid;
+ __u32 rp_fsuid_h;
+ __u32 rp_fsgid;
+ __u32 rp_fsgid_h;
+ __u32 rp_access_perm; /* MAY_READ/WRITE/EXEC */
+ __u32 rp_padding;
+};
+
+extern void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p);
+
+struct mdt_rec_setattr {
+ __u32 sa_opcode;
+ __u32 sa_cap;
+ __u32 sa_fsuid;
+ __u32 sa_fsuid_h;
+ __u32 sa_fsgid;
+ __u32 sa_fsgid_h;
+ __u32 sa_suppgid;
+ __u32 sa_suppgid_h;
+ __u32 sa_padding_1;
+ __u32 sa_padding_1_h;
+ struct lu_fid sa_fid;
+ __u64 sa_valid;
+ __u32 sa_uid;
+ __u32 sa_gid;
+ __u64 sa_size;
+ __u64 sa_blocks;
+ obd_time sa_mtime;
+ obd_time sa_atime;
+ obd_time sa_ctime;
+ __u32 sa_attr_flags;
+ __u32 sa_mode;
+ __u32 sa_bias; /* some operation flags */
+ __u32 sa_padding_3;
+ __u32 sa_padding_4;
+ __u32 sa_padding_5;
+};
+
+extern void lustre_swab_mdt_rec_setattr (struct mdt_rec_setattr *sa);
+
+/*
+ * Attribute flags used in mdt_rec_setattr::sa_valid.
+ * The kernel's #defines for ATTR_* should not be used over the network
+ * since the client and MDS may run different kernels (see bug 13828)
+ * Therefore, we should only use MDS_ATTR_* attributes for sa_valid.
+ */
+#define MDS_ATTR_MODE 0x1ULL /* = 1 */
+#define MDS_ATTR_UID 0x2ULL /* = 2 */
+#define MDS_ATTR_GID 0x4ULL /* = 4 */
+#define MDS_ATTR_SIZE 0x8ULL /* = 8 */
+#define MDS_ATTR_ATIME 0x10ULL /* = 16 */
+#define MDS_ATTR_MTIME 0x20ULL /* = 32 */
+#define MDS_ATTR_CTIME 0x40ULL /* = 64 */
+#define MDS_ATTR_ATIME_SET 0x80ULL /* = 128 */
+#define MDS_ATTR_MTIME_SET 0x100ULL /* = 256 */
+#define MDS_ATTR_FORCE 0x200ULL /* = 512, Not a change, but a change it */
+#define MDS_ATTR_ATTR_FLAG 0x400ULL /* = 1024 */
+#define MDS_ATTR_KILL_SUID 0x800ULL /* = 2048 */
+#define MDS_ATTR_KILL_SGID 0x1000ULL /* = 4096 */
+#define MDS_ATTR_CTIME_SET 0x2000ULL /* = 8192 */
+#define MDS_ATTR_FROM_OPEN 0x4000ULL /* = 16384, called from open path, ie O_TRUNC */
+#define MDS_ATTR_BLOCKS 0x8000ULL /* = 32768 */
+
+#ifndef FMODE_READ
+#define FMODE_READ 00000001
+#define FMODE_WRITE 00000002
+#endif
+
+#define MDS_FMODE_CLOSED 00000000
+#define MDS_FMODE_EXEC 00000004
+/* IO Epoch is opened on a closed file. */
+#define MDS_FMODE_EPOCH 01000000
+/* IO Epoch is opened on a file truncate. */
+#define MDS_FMODE_TRUNC 02000000
+/* Size-on-MDS Attribute Update is pending. */
+#define MDS_FMODE_SOM 04000000
+
+#define MDS_OPEN_CREATED 00000010
+#define MDS_OPEN_CROSS 00000020
+
+#define MDS_OPEN_CREAT 00000100
+#define MDS_OPEN_EXCL 00000200
+#define MDS_OPEN_TRUNC 00001000
+#define MDS_OPEN_APPEND 00002000
+#define MDS_OPEN_SYNC 00010000
+#define MDS_OPEN_DIRECTORY 00200000
+
+#define MDS_OPEN_BY_FID 040000000 /* open_by_fid for known object */
+#define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */
+#define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */
+#define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file.
+ * We do not support JOIN FILE
+ * anymore, reserve this flags
+ * just for preventing such bit
+ * to be reused. */
+
+#define MDS_OPEN_LOCK 04000000000 /* This open requires open lock */
+#define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */
+#define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */
+#define MDS_OPEN_NORESTORE 0100000000000ULL /* Do not restore file at open */
+#define MDS_OPEN_NEWSTRIPE 0200000000000ULL /* New stripe needed (restripe or
+ * hsm restore) */
+#define MDS_OPEN_VOLATILE 0400000000000ULL /* File is volatile = created
+ unlinked */
+
+/* permission for create non-directory file */
+#define MAY_CREATE (1 << 7)
+/* permission for create directory file */
+#define MAY_LINK (1 << 8)
+/* permission for delete from the directory */
+#define MAY_UNLINK (1 << 9)
+/* source's permission for rename */
+#define MAY_RENAME_SRC (1 << 10)
+/* target's permission for rename */
+#define MAY_RENAME_TAR (1 << 11)
+/* part (parent's) VTX permission check */
+#define MAY_VTX_PART (1 << 12)
+/* full VTX permission check */
+#define MAY_VTX_FULL (1 << 13)
+/* lfs rgetfacl permission check */
+#define MAY_RGETFACL (1 << 14)
+
+enum {
+ MDS_CHECK_SPLIT = 1 << 0,
+ MDS_CROSS_REF = 1 << 1,
+ MDS_VTX_BYPASS = 1 << 2,
+ MDS_PERM_BYPASS = 1 << 3,
+ MDS_SOM = 1 << 4,
+ MDS_QUOTA_IGNORE = 1 << 5,
+ MDS_CLOSE_CLEANUP = 1 << 6,
+ MDS_KEEP_ORPHAN = 1 << 7,
+ MDS_RECOV_OPEN = 1 << 8,
+ MDS_DATA_MODIFIED = 1 << 9,
+ MDS_CREATE_VOLATILE = 1 << 10,
+ MDS_OWNEROVERRIDE = 1 << 11,
+};
+
+/* instance of mdt_reint_rec */
+struct mdt_rec_create {
+ __u32 cr_opcode;
+ __u32 cr_cap;
+ __u32 cr_fsuid;
+ __u32 cr_fsuid_h;
+ __u32 cr_fsgid;
+ __u32 cr_fsgid_h;
+ __u32 cr_suppgid1;
+ __u32 cr_suppgid1_h;
+ __u32 cr_suppgid2;
+ __u32 cr_suppgid2_h;
+ struct lu_fid cr_fid1;
+ struct lu_fid cr_fid2;
+ struct lustre_handle cr_old_handle; /* handle in case of open replay */
+ obd_time cr_time;
+ __u64 cr_rdev;
+ __u64 cr_ioepoch;
+ __u64 cr_padding_1; /* rr_blocks */
+ __u32 cr_mode;
+ __u32 cr_bias;
+ /* use of helpers set/get_mrc_cr_flags() is needed to access
+ * 64 bits cr_flags [cr_flags_l, cr_flags_h], this is done to
+ * extend cr_flags size without breaking 1.8 compat */
+ __u32 cr_flags_l; /* for use with open, low 32 bits */
+ __u32 cr_flags_h; /* for use with open, high 32 bits */
+ __u32 cr_umask; /* umask for create */
+ __u32 cr_padding_4; /* rr_padding_4 */
+};
+
+static inline void set_mrc_cr_flags(struct mdt_rec_create *mrc, __u64 flags)
+{
+ mrc->cr_flags_l = (__u32)(flags & 0xFFFFFFFFUll);
+ mrc->cr_flags_h = (__u32)(flags >> 32);
+}
+
+static inline __u64 get_mrc_cr_flags(struct mdt_rec_create *mrc)
+{
+ return ((__u64)(mrc->cr_flags_l) | ((__u64)mrc->cr_flags_h << 32));
+}
+
+/* instance of mdt_reint_rec */
+struct mdt_rec_link {
+ __u32 lk_opcode;
+ __u32 lk_cap;
+ __u32 lk_fsuid;
+ __u32 lk_fsuid_h;
+ __u32 lk_fsgid;
+ __u32 lk_fsgid_h;
+ __u32 lk_suppgid1;
+ __u32 lk_suppgid1_h;
+ __u32 lk_suppgid2;
+ __u32 lk_suppgid2_h;
+ struct lu_fid lk_fid1;
+ struct lu_fid lk_fid2;
+ obd_time lk_time;
+ __u64 lk_padding_1; /* rr_atime */
+ __u64 lk_padding_2; /* rr_ctime */
+ __u64 lk_padding_3; /* rr_size */
+ __u64 lk_padding_4; /* rr_blocks */
+ __u32 lk_bias;
+ __u32 lk_padding_5; /* rr_mode */
+ __u32 lk_padding_6; /* rr_flags */
+ __u32 lk_padding_7; /* rr_padding_2 */
+ __u32 lk_padding_8; /* rr_padding_3 */
+ __u32 lk_padding_9; /* rr_padding_4 */
+};
+
+/* instance of mdt_reint_rec */
+struct mdt_rec_unlink {
+ __u32 ul_opcode;
+ __u32 ul_cap;
+ __u32 ul_fsuid;
+ __u32 ul_fsuid_h;
+ __u32 ul_fsgid;
+ __u32 ul_fsgid_h;
+ __u32 ul_suppgid1;
+ __u32 ul_suppgid1_h;
+ __u32 ul_suppgid2;
+ __u32 ul_suppgid2_h;
+ struct lu_fid ul_fid1;
+ struct lu_fid ul_fid2;
+ obd_time ul_time;
+ __u64 ul_padding_2; /* rr_atime */
+ __u64 ul_padding_3; /* rr_ctime */
+ __u64 ul_padding_4; /* rr_size */
+ __u64 ul_padding_5; /* rr_blocks */
+ __u32 ul_bias;
+ __u32 ul_mode;
+ __u32 ul_padding_6; /* rr_flags */
+ __u32 ul_padding_7; /* rr_padding_2 */
+ __u32 ul_padding_8; /* rr_padding_3 */
+ __u32 ul_padding_9; /* rr_padding_4 */
+};
+
+/* instance of mdt_reint_rec */
+struct mdt_rec_rename {
+ __u32 rn_opcode;
+ __u32 rn_cap;
+ __u32 rn_fsuid;
+ __u32 rn_fsuid_h;
+ __u32 rn_fsgid;
+ __u32 rn_fsgid_h;
+ __u32 rn_suppgid1;
+ __u32 rn_suppgid1_h;
+ __u32 rn_suppgid2;
+ __u32 rn_suppgid2_h;
+ struct lu_fid rn_fid1;
+ struct lu_fid rn_fid2;
+ obd_time rn_time;
+ __u64 rn_padding_1; /* rr_atime */
+ __u64 rn_padding_2; /* rr_ctime */
+ __u64 rn_padding_3; /* rr_size */
+ __u64 rn_padding_4; /* rr_blocks */
+ __u32 rn_bias; /* some operation flags */
+ __u32 rn_mode; /* cross-ref rename has mode */
+ __u32 rn_padding_5; /* rr_flags */
+ __u32 rn_padding_6; /* rr_padding_2 */
+ __u32 rn_padding_7; /* rr_padding_3 */
+ __u32 rn_padding_8; /* rr_padding_4 */
+};
+
+/* instance of mdt_reint_rec */
+struct mdt_rec_setxattr {
+ __u32 sx_opcode;
+ __u32 sx_cap;
+ __u32 sx_fsuid;
+ __u32 sx_fsuid_h;
+ __u32 sx_fsgid;
+ __u32 sx_fsgid_h;
+ __u32 sx_suppgid1;
+ __u32 sx_suppgid1_h;
+ __u32 sx_suppgid2;
+ __u32 sx_suppgid2_h;
+ struct lu_fid sx_fid;
+ __u64 sx_padding_1; /* These three are rr_fid2 */
+ __u32 sx_padding_2;
+ __u32 sx_padding_3;
+ __u64 sx_valid;
+ obd_time sx_time;
+ __u64 sx_padding_5; /* rr_ctime */
+ __u64 sx_padding_6; /* rr_size */
+ __u64 sx_padding_7; /* rr_blocks */
+ __u32 sx_size;
+ __u32 sx_flags;
+ __u32 sx_padding_8; /* rr_flags */
+ __u32 sx_padding_9; /* rr_padding_2 */
+ __u32 sx_padding_10; /* rr_padding_3 */
+ __u32 sx_padding_11; /* rr_padding_4 */
+};
+
+/*
+ * mdt_rec_reint is the template for all mdt_reint_xxx structures.
+ * Do NOT change the size of various members, otherwise the value
+ * will be broken in lustre_swab_mdt_rec_reint().
+ *
+ * If you add new members in other mdt_reint_xxx structres and need to use the
+ * rr_padding_x fields, then update lustre_swab_mdt_rec_reint() also.
+ */
+struct mdt_rec_reint {
+ __u32 rr_opcode;
+ __u32 rr_cap;
+ __u32 rr_fsuid;
+ __u32 rr_fsuid_h;
+ __u32 rr_fsgid;
+ __u32 rr_fsgid_h;
+ __u32 rr_suppgid1;
+ __u32 rr_suppgid1_h;
+ __u32 rr_suppgid2;
+ __u32 rr_suppgid2_h;
+ struct lu_fid rr_fid1;
+ struct lu_fid rr_fid2;
+ obd_time rr_mtime;
+ obd_time rr_atime;
+ obd_time rr_ctime;
+ __u64 rr_size;
+ __u64 rr_blocks;
+ __u32 rr_bias;
+ __u32 rr_mode;
+ __u32 rr_flags;
+ __u32 rr_flags_h;
+ __u32 rr_umask;
+ __u32 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */
+};
+
+extern void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr);
+
+struct lmv_desc {
+ __u32 ld_tgt_count; /* how many MDS's */
+ __u32 ld_active_tgt_count; /* how many active */
+ __u32 ld_default_stripe_count; /* how many objects are used */
+ __u32 ld_pattern; /* default MEA_MAGIC_* */
+ __u64 ld_default_hash_size;
+ __u64 ld_padding_1; /* also fix lustre_swab_lmv_desc */
+ __u32 ld_padding_2; /* also fix lustre_swab_lmv_desc */
+ __u32 ld_qos_maxage; /* in second */
+ __u32 ld_padding_3; /* also fix lustre_swab_lmv_desc */
+ __u32 ld_padding_4; /* also fix lustre_swab_lmv_desc */
+ struct obd_uuid ld_uuid;
+};
+
+extern void lustre_swab_lmv_desc (struct lmv_desc *ld);
+
+/* TODO: lmv_stripe_md should contain mds capabilities for all slave fids */
+struct lmv_stripe_md {
+ __u32 mea_magic;
+ __u32 mea_count;
+ __u32 mea_master;
+ __u32 mea_padding;
+ char mea_pool_name[LOV_MAXPOOLNAME];
+ struct lu_fid mea_ids[0];
+};
+
+extern void lustre_swab_lmv_stripe_md(struct lmv_stripe_md *mea);
+
+/* lmv structures */
+#define MEA_MAGIC_LAST_CHAR 0xb2221ca1
+#define MEA_MAGIC_ALL_CHARS 0xb222a11c
+#define MEA_MAGIC_HASH_SEGMENT 0xb222a11b
+
+#define MAX_HASH_SIZE_32 0x7fffffffUL
+#define MAX_HASH_SIZE 0x7fffffffffffffffULL
+#define MAX_HASH_HIGHEST_BIT 0x1000000000000000ULL
+
+enum fld_rpc_opc {
+ FLD_QUERY = 900,
+ FLD_LAST_OPC,
+ FLD_FIRST_OPC = FLD_QUERY
+};
+
+enum seq_rpc_opc {
+ SEQ_QUERY = 700,
+ SEQ_LAST_OPC,
+ SEQ_FIRST_OPC = SEQ_QUERY
+};
+
+enum seq_op {
+ SEQ_ALLOC_SUPER = 0,
+ SEQ_ALLOC_META = 1
+};
+
+/*
+ * LOV data structures
+ */
+
+#define LOV_MAX_UUID_BUFFER_SIZE 8192
+/* The size of the buffer the lov/mdc reserves for the
+ * array of UUIDs returned by the MDS. With the current
+ * protocol, this will limit the max number of OSTs per LOV */
+
+#define LOV_DESC_MAGIC 0xB0CCDE5C
+
+/* LOV settings descriptor (should only contain static info) */
+struct lov_desc {
+ __u32 ld_tgt_count; /* how many OBD's */
+ __u32 ld_active_tgt_count; /* how many active */
+ __u32 ld_default_stripe_count; /* how many objects are used */
+ __u32 ld_pattern; /* default PATTERN_RAID0 */
+ __u64 ld_default_stripe_size; /* in bytes */
+ __u64 ld_default_stripe_offset; /* in bytes */
+ __u32 ld_padding_0; /* unused */
+ __u32 ld_qos_maxage; /* in second */
+ __u32 ld_padding_1; /* also fix lustre_swab_lov_desc */
+ __u32 ld_padding_2; /* also fix lustre_swab_lov_desc */
+ struct obd_uuid ld_uuid;
+};
+
+#define ld_magic ld_active_tgt_count /* for swabbing from llogs */
+
+extern void lustre_swab_lov_desc (struct lov_desc *ld);
+
+/*
+ * LDLM requests:
+ */
+/* opcodes -- MUST be distinct from OST/MDS opcodes */
+typedef enum {
+ LDLM_ENQUEUE = 101,
+ LDLM_CONVERT = 102,
+ LDLM_CANCEL = 103,
+ LDLM_BL_CALLBACK = 104,
+ LDLM_CP_CALLBACK = 105,
+ LDLM_GL_CALLBACK = 106,
+ LDLM_SET_INFO = 107,
+ LDLM_LAST_OPC
+} ldlm_cmd_t;
+#define LDLM_FIRST_OPC LDLM_ENQUEUE
+
+#define RES_NAME_SIZE 4
+struct ldlm_res_id {
+ __u64 name[RES_NAME_SIZE];
+};
+
+extern void lustre_swab_ldlm_res_id (struct ldlm_res_id *id);
+
+static inline int ldlm_res_eq(const struct ldlm_res_id *res0,
+ const struct ldlm_res_id *res1)
+{
+ return !memcmp(res0, res1, sizeof(*res0));
+}
+
+/* lock types */
+typedef enum {
+ LCK_MINMODE = 0,
+ LCK_EX = 1,
+ LCK_PW = 2,
+ LCK_PR = 4,
+ LCK_CW = 8,
+ LCK_CR = 16,
+ LCK_NL = 32,
+ LCK_GROUP = 64,
+ LCK_COS = 128,
+ LCK_MAXMODE
+} ldlm_mode_t;
+
+#define LCK_MODE_NUM 8
+
+typedef enum {
+ LDLM_PLAIN = 10,
+ LDLM_EXTENT = 11,
+ LDLM_FLOCK = 12,
+ LDLM_IBITS = 13,
+ LDLM_MAX_TYPE
+} ldlm_type_t;
+
+#define LDLM_MIN_TYPE LDLM_PLAIN
+
+struct ldlm_extent {
+ __u64 start;
+ __u64 end;
+ __u64 gid;
+};
+
+static inline int ldlm_extent_overlap(struct ldlm_extent *ex1,
+ struct ldlm_extent *ex2)
+{
+ return (ex1->start <= ex2->end) && (ex2->start <= ex1->end);
+}
+
+/* check if @ex1 contains @ex2 */
+static inline int ldlm_extent_contain(struct ldlm_extent *ex1,
+ struct ldlm_extent *ex2)
+{
+ return (ex1->start <= ex2->start) && (ex1->end >= ex2->end);
+}
+
+struct ldlm_inodebits {
+ __u64 bits;
+};
+
+struct ldlm_flock_wire {
+ __u64 lfw_start;
+ __u64 lfw_end;
+ __u64 lfw_owner;
+ __u32 lfw_padding;
+ __u32 lfw_pid;
+};
+
+/* it's important that the fields of the ldlm_extent structure match
+ * the first fields of the ldlm_flock structure because there is only
+ * one ldlm_swab routine to process the ldlm_policy_data_t union. if
+ * this ever changes we will need to swab the union differently based
+ * on the resource type. */
+
+typedef union {
+ struct ldlm_extent l_extent;
+ struct ldlm_flock_wire l_flock;
+ struct ldlm_inodebits l_inodebits;
+} ldlm_wire_policy_data_t;
+
+extern void lustre_swab_ldlm_policy_data (ldlm_wire_policy_data_t *d);
+
+union ldlm_gl_desc {
+ struct ldlm_gl_lquota_desc lquota_desc;
+};
+
+extern void lustre_swab_gl_desc(union ldlm_gl_desc *);
+
+struct ldlm_intent {
+ __u64 opc;
+};
+
+extern void lustre_swab_ldlm_intent (struct ldlm_intent *i);
+
+struct ldlm_resource_desc {
+ ldlm_type_t lr_type;
+ __u32 lr_padding; /* also fix lustre_swab_ldlm_resource_desc */
+ struct ldlm_res_id lr_name;
+};
+
+extern void lustre_swab_ldlm_resource_desc (struct ldlm_resource_desc *r);
+
+struct ldlm_lock_desc {
+ struct ldlm_resource_desc l_resource;
+ ldlm_mode_t l_req_mode;
+ ldlm_mode_t l_granted_mode;
+ ldlm_wire_policy_data_t l_policy_data;
+};
+
+extern void lustre_swab_ldlm_lock_desc (struct ldlm_lock_desc *l);
+
+#define LDLM_LOCKREQ_HANDLES 2
+#define LDLM_ENQUEUE_CANCEL_OFF 1
+
+struct ldlm_request {
+ __u32 lock_flags;
+ __u32 lock_count;
+ struct ldlm_lock_desc lock_desc;
+ struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES];
+};
+
+extern void lustre_swab_ldlm_request (struct ldlm_request *rq);
+
+/* If LDLM_ENQUEUE, 1 slot is already occupied, 1 is available.
+ * Otherwise, 2 are available. */
+#define ldlm_request_bufsize(count,type) \
+({ \
+ int _avail = LDLM_LOCKREQ_HANDLES; \
+ _avail -= (type == LDLM_ENQUEUE ? LDLM_ENQUEUE_CANCEL_OFF : 0); \
+ sizeof(struct ldlm_request) + \
+ (count > _avail ? count - _avail : 0) * \
+ sizeof(struct lustre_handle); \
+})
+
+struct ldlm_reply {
+ __u32 lock_flags;
+ __u32 lock_padding; /* also fix lustre_swab_ldlm_reply */
+ struct ldlm_lock_desc lock_desc;
+ struct lustre_handle lock_handle;
+ __u64 lock_policy_res1;
+ __u64 lock_policy_res2;
+};
+
+extern void lustre_swab_ldlm_reply (struct ldlm_reply *r);
+
+#define ldlm_flags_to_wire(flags) ((__u32)(flags))
+#define ldlm_flags_from_wire(flags) ((__u64)(flags))
+
+/*
+ * Opcodes for mountconf (mgs and mgc)
+ */
+typedef enum {
+ MGS_CONNECT = 250,
+ MGS_DISCONNECT,
+ MGS_EXCEPTION, /* node died, etc. */
+ MGS_TARGET_REG, /* whenever target starts up */
+ MGS_TARGET_DEL,
+ MGS_SET_INFO,
+ MGS_CONFIG_READ,
+ MGS_LAST_OPC
+} mgs_cmd_t;
+#define MGS_FIRST_OPC MGS_CONNECT
+
+#define MGS_PARAM_MAXLEN 1024
+#define KEY_SET_INFO "set_info"
+
+struct mgs_send_param {
+ char mgs_param[MGS_PARAM_MAXLEN];
+};
+
+/* We pass this info to the MGS so it can write config logs */
+#define MTI_NAME_MAXLEN 64
+#define MTI_PARAM_MAXLEN 4096
+#define MTI_NIDS_MAX 32
+struct mgs_target_info {
+ __u32 mti_lustre_ver;
+ __u32 mti_stripe_index;
+ __u32 mti_config_ver;
+ __u32 mti_flags;
+ __u32 mti_nid_count;
+ __u32 mti_instance; /* Running instance of target */
+ char mti_fsname[MTI_NAME_MAXLEN];
+ char mti_svname[MTI_NAME_MAXLEN];
+ char mti_uuid[sizeof(struct obd_uuid)];
+ __u64 mti_nids[MTI_NIDS_MAX]; /* host nids (lnet_nid_t)*/
+ char mti_params[MTI_PARAM_MAXLEN];
+};
+extern void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo);
+
+struct mgs_nidtbl_entry {
+ __u64 mne_version; /* table version of this entry */
+ __u32 mne_instance; /* target instance # */
+ __u32 mne_index; /* target index */
+ __u32 mne_length; /* length of this entry - by bytes */
+ __u8 mne_type; /* target type LDD_F_SV_TYPE_OST/MDT */
+ __u8 mne_nid_type; /* type of nid(mbz). for ipv6. */
+ __u8 mne_nid_size; /* size of each NID, by bytes */
+ __u8 mne_nid_count; /* # of NIDs in buffer */
+ union {
+ lnet_nid_t nids[0]; /* variable size buffer for NIDs. */
+ } u;
+};
+extern void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo);
+
+struct mgs_config_body {
+ char mcb_name[MTI_NAME_MAXLEN]; /* logname */
+ __u64 mcb_offset; /* next index of config log to request */
+ __u16 mcb_type; /* type of log: CONFIG_T_[CONFIG|RECOVER] */
+ __u8 mcb_reserved;
+ __u8 mcb_bits; /* bits unit size of config log */
+ __u32 mcb_units; /* # of units for bulk transfer */
+};
+extern void lustre_swab_mgs_config_body(struct mgs_config_body *body);
+
+struct mgs_config_res {
+ __u64 mcr_offset; /* index of last config log */
+ __u64 mcr_size; /* size of the log */
+};
+extern void lustre_swab_mgs_config_res(struct mgs_config_res *body);
+
+/* Config marker flags (in config log) */
+#define CM_START 0x01
+#define CM_END 0x02
+#define CM_SKIP 0x04
+#define CM_UPGRADE146 0x08
+#define CM_EXCLUDE 0x10
+#define CM_START_SKIP (CM_START | CM_SKIP)
+
+struct cfg_marker {
+ __u32 cm_step; /* aka config version */
+ __u32 cm_flags;
+ __u32 cm_vers; /* lustre release version number */
+ __u32 cm_padding; /* 64 bit align */
+ obd_time cm_createtime; /*when this record was first created */
+ obd_time cm_canceltime; /*when this record is no longer valid*/
+ char cm_tgtname[MTI_NAME_MAXLEN];
+ char cm_comment[MTI_NAME_MAXLEN];
+};
+
+extern void lustre_swab_cfg_marker(struct cfg_marker *marker,
+ int swab, int size);
+
+/*
+ * Opcodes for multiple servers.
+ */
+
+typedef enum {
+ OBD_PING = 400,
+ OBD_LOG_CANCEL,
+ OBD_QC_CALLBACK,
+ OBD_IDX_READ,
+ OBD_LAST_OPC
+} obd_cmd_t;
+#define OBD_FIRST_OPC OBD_PING
+
+/* catalog of log objects */
+
+/** Identifier for a single log object */
+struct llog_logid {
+ struct ost_id lgl_oi;
+ __u32 lgl_ogen;
+} __attribute__((packed));
+
+/** Records written to the CATALOGS list */
+#define CATLIST "CATALOGS"
+struct llog_catid {
+ struct llog_logid lci_logid;
+ __u32 lci_padding1;
+ __u32 lci_padding2;
+ __u32 lci_padding3;
+} __attribute__((packed));
+
+/* Log data record types - there is no specific reason that these need to
+ * be related to the RPC opcodes, but no reason not to (may be handy later?)
+ */
+#define LLOG_OP_MAGIC 0x10600000
+#define LLOG_OP_MASK 0xfff00000
+
+typedef enum {
+ LLOG_PAD_MAGIC = LLOG_OP_MAGIC | 0x00000,
+ OST_SZ_REC = LLOG_OP_MAGIC | 0x00f00,
+ /* OST_RAID1_REC = LLOG_OP_MAGIC | 0x01000, never used */
+ MDS_UNLINK_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) |
+ REINT_UNLINK, /* obsolete after 2.5.0 */
+ MDS_UNLINK64_REC = LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) |
+ REINT_UNLINK,
+ /* MDS_SETATTR_REC = LLOG_OP_MAGIC | 0x12401, obsolete 1.8.0 */
+ MDS_SETATTR64_REC = LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) |
+ REINT_SETATTR,
+ OBD_CFG_REC = LLOG_OP_MAGIC | 0x20000,
+ /* PTL_CFG_REC = LLOG_OP_MAGIC | 0x30000, obsolete 1.4.0 */
+ LLOG_GEN_REC = LLOG_OP_MAGIC | 0x40000,
+ /* LLOG_JOIN_REC = LLOG_OP_MAGIC | 0x50000, obsolete 1.8.0 */
+ CHANGELOG_REC = LLOG_OP_MAGIC | 0x60000,
+ CHANGELOG_USER_REC = LLOG_OP_MAGIC | 0x70000,
+ LLOG_HDR_MAGIC = LLOG_OP_MAGIC | 0x45539,
+ LLOG_LOGID_MAGIC = LLOG_OP_MAGIC | 0x4553b,
+} llog_op_type;
+
+#define LLOG_REC_HDR_NEEDS_SWABBING(r) \
+ (((r)->lrh_type & __swab32(LLOG_OP_MASK)) == __swab32(LLOG_OP_MAGIC))
+
+/** Log record header - stored in little endian order.
+ * Each record must start with this struct, end with a llog_rec_tail,
+ * and be a multiple of 256 bits in size.
+ */
+struct llog_rec_hdr {
+ __u32 lrh_len;
+ __u32 lrh_index;
+ __u32 lrh_type;
+ __u32 lrh_id;
+};
+
+struct llog_rec_tail {
+ __u32 lrt_len;
+ __u32 lrt_index;
+};
+
+/* Where data follow just after header */
+#define REC_DATA(ptr) \
+ ((void *)((char *)ptr + sizeof(struct llog_rec_hdr)))
+
+#define REC_DATA_LEN(rec) \
+ (rec->lrh_len - sizeof(struct llog_rec_hdr) - \
+ sizeof(struct llog_rec_tail))
+
+struct llog_logid_rec {
+ struct llog_rec_hdr lid_hdr;
+ struct llog_logid lid_id;
+ __u32 lid_padding1;
+ __u64 lid_padding2;
+ __u64 lid_padding3;
+ struct llog_rec_tail lid_tail;
+} __attribute__((packed));
+
+struct llog_unlink_rec {
+ struct llog_rec_hdr lur_hdr;
+ obd_id lur_oid;
+ obd_count lur_oseq;
+ obd_count lur_count;
+ struct llog_rec_tail lur_tail;
+} __attribute__((packed));
+
+struct llog_unlink64_rec {
+ struct llog_rec_hdr lur_hdr;
+ struct lu_fid lur_fid;
+ obd_count lur_count; /* to destroy the lost precreated */
+ __u32 lur_padding1;
+ __u64 lur_padding2;
+ __u64 lur_padding3;
+ struct llog_rec_tail lur_tail;
+} __attribute__((packed));
+
+struct llog_setattr64_rec {
+ struct llog_rec_hdr lsr_hdr;
+ struct ost_id lsr_oi;
+ __u32 lsr_uid;
+ __u32 lsr_uid_h;
+ __u32 lsr_gid;
+ __u32 lsr_gid_h;
+ __u64 lsr_padding;
+ struct llog_rec_tail lsr_tail;
+} __attribute__((packed));
+
+struct llog_size_change_rec {
+ struct llog_rec_hdr lsc_hdr;
+ struct ll_fid lsc_fid;
+ __u32 lsc_ioepoch;
+ __u32 lsc_padding1;
+ __u64 lsc_padding2;
+ __u64 lsc_padding3;
+ struct llog_rec_tail lsc_tail;
+} __attribute__((packed));
+
+#define CHANGELOG_MAGIC 0xca103000
+
+/** \a changelog_rec_type's that can't be masked */
+#define CHANGELOG_MINMASK (1 << CL_MARK)
+/** bits covering all \a changelog_rec_type's */
+#define CHANGELOG_ALLMASK 0XFFFFFFFF
+/** default \a changelog_rec_type mask */
+#define CHANGELOG_DEFMASK CHANGELOG_ALLMASK & ~(1 << CL_ATIME | 1 << CL_CLOSE)
+
+/* changelog llog name, needed by client replicators */
+#define CHANGELOG_CATALOG "changelog_catalog"
+
+struct changelog_setinfo {
+ __u64 cs_recno;
+ __u32 cs_id;
+} __attribute__((packed));
+
+/** changelog record */
+struct llog_changelog_rec {
+ struct llog_rec_hdr cr_hdr;
+ struct changelog_rec cr;
+ struct llog_rec_tail cr_tail; /**< for_sizezof_only */
+} __attribute__((packed));
+
+struct llog_changelog_ext_rec {
+ struct llog_rec_hdr cr_hdr;
+ struct changelog_ext_rec cr;
+ struct llog_rec_tail cr_tail; /**< for_sizezof_only */
+} __attribute__((packed));
+
+#define CHANGELOG_USER_PREFIX "cl"
+
+struct llog_changelog_user_rec {
+ struct llog_rec_hdr cur_hdr;
+ __u32 cur_id;
+ __u32 cur_padding;
+ __u64 cur_endrec;
+ struct llog_rec_tail cur_tail;
+} __attribute__((packed));
+
+/* Old llog gen for compatibility */
+struct llog_gen {
+ __u64 mnt_cnt;
+ __u64 conn_cnt;
+} __attribute__((packed));
+
+struct llog_gen_rec {
+ struct llog_rec_hdr lgr_hdr;
+ struct llog_gen lgr_gen;
+ __u64 padding1;
+ __u64 padding2;
+ __u64 padding3;
+ struct llog_rec_tail lgr_tail;
+};
+
+/* On-disk header structure of each log object, stored in little endian order */
+#define LLOG_CHUNK_SIZE 8192
+#define LLOG_HEADER_SIZE (96)
+#define LLOG_BITMAP_BYTES (LLOG_CHUNK_SIZE - LLOG_HEADER_SIZE)
+
+#define LLOG_MIN_REC_SIZE (24) /* round(llog_rec_hdr + llog_rec_tail) */
+
+/* flags for the logs */
+enum llog_flag {
+ LLOG_F_ZAP_WHEN_EMPTY = 0x1,
+ LLOG_F_IS_CAT = 0x2,
+ LLOG_F_IS_PLAIN = 0x4,
+};
+
+struct llog_log_hdr {
+ struct llog_rec_hdr llh_hdr;
+ obd_time llh_timestamp;
+ __u32 llh_count;
+ __u32 llh_bitmap_offset;
+ __u32 llh_size;
+ __u32 llh_flags;
+ __u32 llh_cat_idx;
+ /* for a catalog the first plain slot is next to it */
+ struct obd_uuid llh_tgtuuid;
+ __u32 llh_reserved[LLOG_HEADER_SIZE/sizeof(__u32) - 23];
+ __u32 llh_bitmap[LLOG_BITMAP_BYTES/sizeof(__u32)];
+ struct llog_rec_tail llh_tail;
+} __attribute__((packed));
+
+#define LLOG_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len - \
+ llh->llh_bitmap_offset - \
+ sizeof(llh->llh_tail)) * 8)
+
+/** log cookies are used to reference a specific log file and a record therein */
+struct llog_cookie {
+ struct llog_logid lgc_lgl;
+ __u32 lgc_subsys;
+ __u32 lgc_index;
+ __u32 lgc_padding;
+} __attribute__((packed));
+
+/** llog protocol */
+enum llogd_rpc_ops {
+ LLOG_ORIGIN_HANDLE_CREATE = 501,
+ LLOG_ORIGIN_HANDLE_NEXT_BLOCK = 502,
+ LLOG_ORIGIN_HANDLE_READ_HEADER = 503,
+ LLOG_ORIGIN_HANDLE_WRITE_REC = 504,
+ LLOG_ORIGIN_HANDLE_CLOSE = 505,
+ LLOG_ORIGIN_CONNECT = 506,
+ LLOG_CATINFO = 507, /* deprecated */
+ LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508,
+ LLOG_ORIGIN_HANDLE_DESTROY = 509, /* for destroy llog object*/
+ LLOG_LAST_OPC,
+ LLOG_FIRST_OPC = LLOG_ORIGIN_HANDLE_CREATE
+};
+
+struct llogd_body {
+ struct llog_logid lgd_logid;
+ __u32 lgd_ctxt_idx;
+ __u32 lgd_llh_flags;
+ __u32 lgd_index;
+ __u32 lgd_saved_index;
+ __u32 lgd_len;
+ __u64 lgd_cur_offset;
+} __attribute__((packed));
+
+struct llogd_conn_body {
+ struct llog_gen lgdc_gen;
+ struct llog_logid lgdc_logid;
+ __u32 lgdc_ctxt_idx;
+} __attribute__((packed));
+
+/* Note: 64-bit types are 64-bit aligned in structure */
+struct obdo {
+ obd_valid o_valid; /* hot fields in this obdo */
+ struct ost_id o_oi;
+ obd_id o_parent_seq;
+ obd_size o_size; /* o_size-o_blocks == ost_lvb */
+ obd_time o_mtime;
+ obd_time o_atime;
+ obd_time o_ctime;
+ obd_blocks o_blocks; /* brw: cli sent cached bytes */
+ obd_size o_grant;
+
+ /* 32-bit fields start here: keep an even number of them via padding */
+ obd_blksize o_blksize; /* optimal IO blocksize */
+ obd_mode o_mode; /* brw: cli sent cache remain */
+ obd_uid o_uid;
+ obd_gid o_gid;
+ obd_flag o_flags;
+ obd_count o_nlink; /* brw: checksum */
+ obd_count o_parent_oid;
+ obd_count o_misc; /* brw: o_dropped */
+
+ __u64 o_ioepoch; /* epoch in ost writes */
+ __u32 o_stripe_idx; /* holds stripe idx */
+ __u32 o_parent_ver;
+ struct lustre_handle o_handle; /* brw: lock handle to prolong
+ * locks */
+ struct llog_cookie o_lcookie; /* destroy: unlink cookie from
+ * MDS */
+ __u32 o_uid_h;
+ __u32 o_gid_h;
+
+ __u64 o_data_version; /* getattr: sum of iversion for
+ * each stripe.
+ * brw: grant space consumed on
+ * the client for the write */
+ __u64 o_padding_4;
+ __u64 o_padding_5;
+ __u64 o_padding_6;
+};
+
+#define o_dirty o_blocks
+#define o_undirty o_mode
+#define o_dropped o_misc
+#define o_cksum o_nlink
+#define o_grant_used o_data_version
+
+static inline void lustre_set_wire_obdo(struct obd_connect_data *ocd,
+ struct obdo *wobdo, struct obdo *lobdo)
+{
+ memcpy(wobdo, lobdo, sizeof(*lobdo));
+ wobdo->o_flags &= ~OBD_FL_LOCAL_MASK;
+ if (ocd == NULL)
+ return;
+
+ if (unlikely(!(ocd->ocd_connect_flags & OBD_CONNECT_FID)) &&
+ fid_seq_is_echo(ostid_seq(&lobdo->o_oi))) {
+ /* Currently OBD_FL_OSTID will only be used when 2.4 echo
+ * client communicate with pre-2.4 server */
+ wobdo->o_oi.oi.oi_id = fid_oid(&lobdo->o_oi.oi_fid);
+ wobdo->o_oi.oi.oi_seq = fid_seq(&lobdo->o_oi.oi_fid);
+ }
+}
+
+static inline void lustre_get_wire_obdo(struct obd_connect_data *ocd,
+ struct obdo *lobdo, struct obdo *wobdo)
+{
+ obd_flag local_flags = 0;
+
+ if (lobdo->o_valid & OBD_MD_FLFLAGS)
+ local_flags = lobdo->o_flags & OBD_FL_LOCAL_MASK;
+
+ LASSERT(!(wobdo->o_flags & OBD_FL_LOCAL_MASK));
+
+ memcpy(lobdo, wobdo, sizeof(*lobdo));
+ if (local_flags != 0) {
+ lobdo->o_valid |= OBD_MD_FLFLAGS;
+ lobdo->o_flags &= ~OBD_FL_LOCAL_MASK;
+ lobdo->o_flags |= local_flags;
+ }
+ if (ocd == NULL)
+ return;
+
+ if (unlikely(!(ocd->ocd_connect_flags & OBD_CONNECT_FID)) &&
+ fid_seq_is_echo(wobdo->o_oi.oi.oi_seq)) {
+ /* see above */
+ lobdo->o_oi.oi_fid.f_seq = wobdo->o_oi.oi.oi_seq;
+ lobdo->o_oi.oi_fid.f_oid = wobdo->o_oi.oi.oi_id;
+ lobdo->o_oi.oi_fid.f_ver = 0;
+ }
+}
+
+extern void lustre_swab_obdo (struct obdo *o);
+
+/* request structure for OST's */
+struct ost_body {
+ struct obdo oa;
+};
+
+/* Key for FIEMAP to be used in get_info calls */
+struct ll_fiemap_info_key {
+ char name[8];
+ struct obdo oa;
+ struct ll_user_fiemap fiemap;
+};
+
+extern void lustre_swab_ost_body (struct ost_body *b);
+extern void lustre_swab_ost_last_id(obd_id *id);
+extern void lustre_swab_fiemap(struct ll_user_fiemap *fiemap);
+
+extern void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum);
+extern void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
+extern void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
+ int stripe_count);
+extern void lustre_swab_lov_mds_md(struct lov_mds_md *lmm);
+
+/* llog_swab.c */
+extern void lustre_swab_llogd_body (struct llogd_body *d);
+extern void lustre_swab_llog_hdr (struct llog_log_hdr *h);
+extern void lustre_swab_llogd_conn_body (struct llogd_conn_body *d);
+extern void lustre_swab_llog_rec(struct llog_rec_hdr *rec);
+extern void lustre_swab_llog_id(struct llog_logid *lid);
+
+struct lustre_cfg;
+extern void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg);
+
+/* Functions for dumping PTLRPC fields */
+void dump_rniobuf(struct niobuf_remote *rnb);
+void dump_ioo(struct obd_ioobj *nb);
+void dump_obdo(struct obdo *oa);
+void dump_ost_body(struct ost_body *ob);
+void dump_rcs(__u32 *rc);
+
+#define IDX_INFO_MAGIC 0x3D37CC37
+
+/* Index file transfer through the network. The server serializes the index into
+ * a byte stream which is sent to the client via a bulk transfer */
+struct idx_info {
+ __u32 ii_magic;
+
+ /* reply: see idx_info_flags below */
+ __u32 ii_flags;
+
+ /* request & reply: number of lu_idxpage (to be) transferred */
+ __u16 ii_count;
+ __u16 ii_pad0;
+
+ /* request: requested attributes passed down to the iterator API */
+ __u32 ii_attrs;
+
+ /* request & reply: index file identifier (FID) */
+ struct lu_fid ii_fid;
+
+ /* reply: version of the index file before starting to walk the index.
+ * Please note that the version can be modified at any time during the
+ * transfer */
+ __u64 ii_version;
+
+ /* request: hash to start with:
+ * reply: hash of the first entry of the first lu_idxpage and hash
+ * of the entry to read next if any */
+ __u64 ii_hash_start;
+ __u64 ii_hash_end;
+
+ /* reply: size of keys in lu_idxpages, minimal one if II_FL_VARKEY is
+ * set */
+ __u16 ii_keysize;
+
+ /* reply: size of records in lu_idxpages, minimal one if II_FL_VARREC
+ * is set */
+ __u16 ii_recsize;
+
+ __u32 ii_pad1;
+ __u64 ii_pad2;
+ __u64 ii_pad3;
+};
+extern void lustre_swab_idx_info(struct idx_info *ii);
+
+#define II_END_OFF MDS_DIR_END_OFF /* all entries have been read */
+
+/* List of flags used in idx_info::ii_flags */
+enum idx_info_flags {
+ II_FL_NOHASH = 1 << 0, /* client doesn't care about hash value */
+ II_FL_VARKEY = 1 << 1, /* keys can be of variable size */
+ II_FL_VARREC = 1 << 2, /* records can be of variable size */
+ II_FL_NONUNQ = 1 << 3, /* index supports non-unique keys */
+};
+
+#define LIP_MAGIC 0x8A6D6B6C
+
+/* 4KB (= LU_PAGE_SIZE) container gathering key/record pairs */
+struct lu_idxpage {
+ /* 16-byte header */
+ __u32 lip_magic;
+ __u16 lip_flags;
+ __u16 lip_nr; /* number of entries in the container */
+ __u64 lip_pad0; /* additional padding for future use */
+
+ /* key/record pairs are stored in the remaining 4080 bytes.
+ * depending upon the flags in idx_info::ii_flags, each key/record
+ * pair might be preceded by:
+ * - a hash value
+ * - the key size (II_FL_VARKEY is set)
+ * - the record size (II_FL_VARREC is set)
+ *
+ * For the time being, we only support fixed-size key & record. */
+ char lip_entries[0];
+};
+extern void lustre_swab_lip_header(struct lu_idxpage *lip);
+
+#define LIP_HDR_SIZE (offsetof(struct lu_idxpage, lip_entries))
+
+/* Gather all possible type associated with a 4KB container */
+union lu_page {
+ struct lu_dirpage lp_dir; /* for MDS_READPAGE */
+ struct lu_idxpage lp_idx; /* for OBD_IDX_READ */
+ char lp_array[LU_PAGE_SIZE];
+};
+
+/* security opcodes */
+typedef enum {
+ SEC_CTX_INIT = 801,
+ SEC_CTX_INIT_CONT = 802,
+ SEC_CTX_FINI = 803,
+ SEC_LAST_OPC,
+ SEC_FIRST_OPC = SEC_CTX_INIT
+} sec_cmd_t;
+
+/*
+ * capa related definitions
+ */
+#define CAPA_HMAC_MAX_LEN 64
+#define CAPA_HMAC_KEY_MAX_LEN 56
+
+/* NB take care when changing the sequence of elements this struct,
+ * because the offset info is used in find_capa() */
+struct lustre_capa {
+ struct lu_fid lc_fid; /** fid */
+ __u64 lc_opc; /** operations allowed */
+ __u64 lc_uid; /** file owner */
+ __u64 lc_gid; /** file group */
+ __u32 lc_flags; /** HMAC algorithm & flags */
+ __u32 lc_keyid; /** key# used for the capability */
+ __u32 lc_timeout; /** capa timeout value (sec) */
+ __u32 lc_expiry; /** expiry time (sec) */
+ __u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /** HMAC */
+} __attribute__((packed));
+
+extern void lustre_swab_lustre_capa(struct lustre_capa *c);
+
+/** lustre_capa::lc_opc */
+enum {
+ CAPA_OPC_BODY_WRITE = 1<<0, /**< write object data */
+ CAPA_OPC_BODY_READ = 1<<1, /**< read object data */
+ CAPA_OPC_INDEX_LOOKUP = 1<<2, /**< lookup object fid */
+ CAPA_OPC_INDEX_INSERT = 1<<3, /**< insert object fid */
+ CAPA_OPC_INDEX_DELETE = 1<<4, /**< delete object fid */
+ CAPA_OPC_OSS_WRITE = 1<<5, /**< write oss object data */
+ CAPA_OPC_OSS_READ = 1<<6, /**< read oss object data */
+ CAPA_OPC_OSS_TRUNC = 1<<7, /**< truncate oss object */
+ CAPA_OPC_OSS_DESTROY = 1<<8, /**< destroy oss object */
+ CAPA_OPC_META_WRITE = 1<<9, /**< write object meta data */
+ CAPA_OPC_META_READ = 1<<10, /**< read object meta data */
+};
+
+#define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE)
+#define CAPA_OPC_MDS_ONLY \
+ (CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | CAPA_OPC_INDEX_LOOKUP | \
+ CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE)
+#define CAPA_OPC_OSS_ONLY \
+ (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC | \
+ CAPA_OPC_OSS_DESTROY)
+#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY
+#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY)
+
+/* MDS capability covers object capability for operations of body r/w
+ * (dir readpage/sendpage), index lookup/insert/delete and meta data r/w,
+ * while OSS capability only covers object capability for operations of
+ * oss data(file content) r/w/truncate.
+ */
+static inline int capa_for_mds(struct lustre_capa *c)
+{
+ return (c->lc_opc & CAPA_OPC_INDEX_LOOKUP) != 0;
+}
+
+static inline int capa_for_oss(struct lustre_capa *c)
+{
+ return (c->lc_opc & CAPA_OPC_INDEX_LOOKUP) == 0;
+}
+
+/* lustre_capa::lc_hmac_alg */
+enum {
+ CAPA_HMAC_ALG_SHA1 = 1, /**< sha1 algorithm */
+ CAPA_HMAC_ALG_MAX,
+};
+
+#define CAPA_FL_MASK 0x00ffffff
+#define CAPA_HMAC_ALG_MASK 0xff000000
+
+struct lustre_capa_key {
+ __u64 lk_seq; /**< mds# */
+ __u32 lk_keyid; /**< key# */
+ __u32 lk_padding;
+ __u8 lk_key[CAPA_HMAC_KEY_MAX_LEN]; /**< key */
+} __attribute__((packed));
+
+extern void lustre_swab_lustre_capa_key(struct lustre_capa_key *k);
+
+/** The link ea holds 1 \a link_ea_entry for each hardlink */
+#define LINK_EA_MAGIC 0x11EAF1DFUL
+struct link_ea_header {
+ __u32 leh_magic;
+ __u32 leh_reccount;
+ __u64 leh_len; /* total size */
+ /* future use */
+ __u32 padding1;
+ __u32 padding2;
+};
+
+/** Hardlink data is name and parent fid.
+ * Stored in this crazy struct for maximum packing and endian-neutrality
+ */
+struct link_ea_entry {
+ /** __u16 stored big-endian, unaligned */
+ unsigned char lee_reclen[2];
+ unsigned char lee_parent_fid[sizeof(struct lu_fid)];
+ char lee_name[0];
+}__attribute__((packed));
+
+/** fid2path request/reply structure */
+struct getinfo_fid2path {
+ struct lu_fid gf_fid;
+ __u64 gf_recno;
+ __u32 gf_linkno;
+ __u32 gf_pathlen;
+ char gf_path[0];
+} __attribute__((packed));
+
+void lustre_swab_fid2path (struct getinfo_fid2path *gf);
+
+enum {
+ LAYOUT_INTENT_ACCESS = 0,
+ LAYOUT_INTENT_READ = 1,
+ LAYOUT_INTENT_WRITE = 2,
+ LAYOUT_INTENT_GLIMPSE = 3,
+ LAYOUT_INTENT_TRUNC = 4,
+ LAYOUT_INTENT_RELEASE = 5,
+ LAYOUT_INTENT_RESTORE = 6
+};
+
+/* enqueue layout lock with intent */
+struct layout_intent {
+ __u32 li_opc; /* intent operation for enqueue, read, write etc */
+ __u32 li_flags;
+ __u64 li_start;
+ __u64 li_end;
+};
+
+void lustre_swab_layout_intent(struct layout_intent *li);
+
+/**
+ * On the wire version of hsm_progress structure.
+ *
+ * Contains the userspace hsm_progress and some internal fields.
+ */
+struct hsm_progress_kernel {
+ /* Field taken from struct hsm_progress */
+ lustre_fid hpk_fid;
+ __u64 hpk_cookie;
+ struct hsm_extent hpk_extent;
+ __u16 hpk_flags;
+ __u16 hpk_errval; /* positive val */
+ __u32 hpk_padding1;
+ /* Additional fields */
+ __u64 hpk_data_version;
+ __u64 hpk_padding2;
+} __attribute__((packed));
+
+extern void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+extern void lustre_swab_hsm_current_action(struct hsm_current_action *action);
+extern void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk);
+extern void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+extern void lustre_swab_hsm_user_item(struct hsm_user_item *hui);
+extern void lustre_swab_hsm_request(struct hsm_request *hr);
+
+/**
+ * These are object update opcode under UPDATE_OBJ, which is currently
+ * being used by cross-ref operations between MDT.
+ *
+ * During the cross-ref operation, the Master MDT, which the client send the
+ * request to, will disassembly the operation into object updates, then OSP
+ * will send these updates to the remote MDT to be executed.
+ *
+ * Update request format
+ * magic: UPDATE_BUFFER_MAGIC_V1
+ * Count: How many updates in the req.
+ * bufs[0] : following are packets of object.
+ * update[0]:
+ * type: object_update_op, the op code of update
+ * fid: The object fid of the update.
+ * lens/bufs: other parameters of the update.
+ * update[1]:
+ * type: object_update_op, the op code of update
+ * fid: The object fid of the update.
+ * lens/bufs: other parameters of the update.
+ * ..........
+ * update[7]: type: object_update_op, the op code of update
+ * fid: The object fid of the update.
+ * lens/bufs: other parameters of the update.
+ * Current 8 maxim updates per object update request.
+ *
+ *******************************************************************
+ * update reply format:
+ *
+ * ur_version: UPDATE_REPLY_V1
+ * ur_count: The count of the reply, which is usually equal
+ * to the number of updates in the request.
+ * ur_lens: The reply lengths of each object update.
+ *
+ * replies: 1st update reply [4bytes_ret: other body]
+ * 2nd update reply [4bytes_ret: other body]
+ * .....
+ * nth update reply [4bytes_ret: other body]
+ *
+ * For each reply of the update, the format would be
+ * result(4 bytes):Other stuff
+ */
+
+#define UPDATE_MAX_OPS 10
+#define UPDATE_BUFFER_MAGIC_V1 0xBDDE0001
+#define UPDATE_BUFFER_MAGIC UPDATE_BUFFER_MAGIC_V1
+#define UPDATE_BUF_COUNT 8
+enum object_update_op {
+ OBJ_CREATE = 1,
+ OBJ_DESTROY = 2,
+ OBJ_REF_ADD = 3,
+ OBJ_REF_DEL = 4,
+ OBJ_ATTR_SET = 5,
+ OBJ_ATTR_GET = 6,
+ OBJ_XATTR_SET = 7,
+ OBJ_XATTR_GET = 8,
+ OBJ_INDEX_LOOKUP = 9,
+ OBJ_INDEX_INSERT = 10,
+ OBJ_INDEX_DELETE = 11,
+ OBJ_LAST
+};
+
+struct update {
+ __u32 u_type;
+ __u32 u_batchid;
+ struct lu_fid u_fid;
+ __u32 u_lens[UPDATE_BUF_COUNT];
+ __u32 u_bufs[0];
+};
+
+struct update_buf {
+ __u32 ub_magic;
+ __u32 ub_count;
+ __u32 ub_bufs[0];
+};
+
+#define UPDATE_REPLY_V1 0x00BD0001
+struct update_reply {
+ __u32 ur_version;
+ __u32 ur_count;
+ __u32 ur_lens[0];
+};
+
+void lustre_swab_update_buf(struct update_buf *ub);
+void lustre_swab_update_reply_buf(struct update_reply *ur);
+
+/** layout swap request structure
+ * fid1 and fid2 are in mdt_body
+ */
+struct mdc_swap_layouts {
+ __u64 msl_flags;
+} __packed;
+
+void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl);
+
+#endif
+/** @} lustreidl */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_lfsck_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_lfsck_user.h
new file mode 100644
index 000000000000..1c87a61a7fc1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_lfsck_user.h
@@ -0,0 +1,95 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details. A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * lustre/include/lustre/lustre_lfsck_user.h
+ *
+ * Lustre LFSCK userspace interfaces.
+ *
+ * Author: Fan Yong <yong.fan@whamcloud.com>
+ */
+
+#ifndef _LUSTRE_LFSCK_USER_H
+# define _LUSTRE_LFSCK_USER_H
+
+enum lfsck_param_flags {
+ /* Reset LFSCK iterator position to the device beginning. */
+ LPF_RESET = 0x0001,
+
+ /* Exit when fail. */
+ LPF_FAILOUT = 0x0002,
+
+ /* Dryrun mode, only check without modification */
+ LPF_DRYRUN = 0x0004,
+};
+
+enum lfsck_type {
+ /* For MDT-OST consistency check/repair. */
+ LT_LAYOUT = 0x0001,
+
+ /* For MDT-MDT consistency check/repair. */
+ LT_DNE = 0x0002,
+
+ /* For FID-in-dirent and linkEA consistency check/repair. */
+ LT_NAMESPACE = 0x0004,
+};
+
+#define LFSCK_VERSION_V1 1
+#define LFSCK_VERSION_V2 2
+
+#define LFSCK_TYPES_ALL ((__u16)(~0))
+#define LFSCK_TYPES_DEF ((__u16)0)
+#define LFSCK_TYPES_SUPPORTED LT_NAMESPACE
+
+#define LFSCK_SPEED_NO_LIMIT 0
+#define LFSCK_SPEED_LIMIT_DEF LFSCK_SPEED_NO_LIMIT
+
+enum lfsck_start_valid {
+ LSV_SPEED_LIMIT = 0x00000001,
+ LSV_ERROR_HANDLE = 0x00000002,
+ LSV_DRYRUN = 0x00000004,
+};
+
+/* Arguments for starting lfsck. */
+struct lfsck_start {
+ /* Which arguments are valid, see 'enum lfsck_start_valid'. */
+ __u32 ls_valid;
+
+ /* How many items can be scanned at most per second. */
+ __u32 ls_speed_limit;
+
+ /* For compatibility between user space tools and kernel service. */
+ __u16 ls_version;
+
+ /* Which LFSCK components to be (have been) started. */
+ __u16 ls_active;
+
+ /* Flags for the LFSCK, see 'enum lfsck_param_flags'. */
+ __u16 ls_flags;
+
+ /* For 64-bits aligned. */
+ __u16 ls_padding;
+};
+
+#endif /* _LUSTRE_LFSCK_USER_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
new file mode 100644
index 000000000000..7e9f57507f04
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -0,0 +1,1145 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre/lustre_user.h
+ *
+ * Lustre public user-space interface definitions.
+ */
+
+#ifndef _LUSTRE_USER_H
+#define _LUSTRE_USER_H
+
+/** \defgroup lustreuser lustreuser
+ *
+ * @{
+ */
+
+#include <lustre/ll_fiemap.h>
+#include <linux/lustre_user.h>
+
+/* for statfs() */
+#define LL_SUPER_MAGIC 0x0BD00BD0
+
+#ifndef FSFILT_IOC_GETFLAGS
+#define FSFILT_IOC_GETFLAGS _IOR('f', 1, long)
+#define FSFILT_IOC_SETFLAGS _IOW('f', 2, long)
+#define FSFILT_IOC_GETVERSION _IOR('f', 3, long)
+#define FSFILT_IOC_SETVERSION _IOW('f', 4, long)
+#define FSFILT_IOC_GETVERSION_OLD _IOR('v', 1, long)
+#define FSFILT_IOC_SETVERSION_OLD _IOW('v', 2, long)
+#define FSFILT_IOC_FIEMAP _IOWR('f', 11, struct ll_user_fiemap)
+#endif
+
+/* FIEMAP flags supported by Lustre */
+#define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER)
+
+enum obd_statfs_state {
+ OS_STATE_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */
+ OS_STATE_READONLY = 0x00000002, /**< filesystem is read-only */
+ OS_STATE_RDONLY_1 = 0x00000004, /**< obsolete 1.6, was EROFS=30 */
+ OS_STATE_RDONLY_2 = 0x00000008, /**< obsolete 1.6, was EROFS=30 */
+ OS_STATE_RDONLY_3 = 0x00000010, /**< obsolete 1.6, was EROFS=30 */
+};
+
+struct obd_statfs {
+ __u64 os_type;
+ __u64 os_blocks;
+ __u64 os_bfree;
+ __u64 os_bavail;
+ __u64 os_files;
+ __u64 os_ffree;
+ __u8 os_fsid[40];
+ __u32 os_bsize;
+ __u32 os_namelen;
+ __u64 os_maxbytes;
+ __u32 os_state; /**< obd_statfs_state OS_STATE_* flag */
+ __u32 os_fprecreated; /* objs available now to the caller */
+ /* used in QoS code to find preferred
+ * OSTs */
+ __u32 os_spare2;
+ __u32 os_spare3;
+ __u32 os_spare4;
+ __u32 os_spare5;
+ __u32 os_spare6;
+ __u32 os_spare7;
+ __u32 os_spare8;
+ __u32 os_spare9;
+};
+
+/**
+ * File IDentifier.
+ *
+ * FID is a cluster-wide unique identifier of a file or an object (stripe).
+ * FIDs are never reused.
+ **/
+struct lu_fid {
+ /**
+ * FID sequence. Sequence is a unit of migration: all files (objects)
+ * with FIDs from a given sequence are stored on the same server.
+ * Lustre should support 2^64 objects, so even if each sequence
+ * has only a single object we can still enumerate 2^64 objects.
+ **/
+ __u64 f_seq;
+ /* FID number within sequence. */
+ __u32 f_oid;
+ /**
+ * FID version, used to distinguish different versions (in the sense
+ * of snapshots, etc.) of the same file system object. Not currently
+ * used.
+ **/
+ __u32 f_ver;
+};
+
+struct filter_fid {
+ struct lu_fid ff_parent; /* ff_parent.f_ver == file stripe number */
+};
+
+/* keep this one for compatibility */
+struct filter_fid_old {
+ struct lu_fid ff_parent;
+ __u64 ff_objid;
+ __u64 ff_seq;
+};
+
+/* Userspace should treat lu_fid as opaque, and only use the following methods
+ * to print or parse them. Other functions (e.g. compare, swab) could be moved
+ * here from lustre_idl.h if needed. */
+typedef struct lu_fid lustre_fid;
+
+/**
+ * Following struct for object attributes, that will be kept inode's EA.
+ * Introduced in 2.0 release (please see b15993, for details)
+ * Added to all objects since Lustre 2.4 as contains self FID
+ */
+struct lustre_mdt_attrs {
+ /**
+ * Bitfield for supported data in this structure. From enum lma_compat.
+ * lma_self_fid and lma_flags are always available.
+ */
+ __u32 lma_compat;
+ /**
+ * Per-file incompat feature list. Lustre version should support all
+ * flags set in this field. The supported feature mask is available in
+ * LMA_INCOMPAT_SUPP.
+ */
+ __u32 lma_incompat;
+ /** FID of this inode */
+ struct lu_fid lma_self_fid;
+};
+
+/**
+ * Prior to 2.4, the LMA structure also included SOM attributes which has since
+ * been moved to a dedicated xattr
+ * lma_flags was also removed because of lma_compat/incompat fields.
+ */
+#define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64))
+
+/**
+ * OST object IDentifier.
+ */
+struct ost_id {
+ union {
+ struct ostid {
+ __u64 oi_id;
+ __u64 oi_seq;
+ } oi;
+ struct lu_fid oi_fid;
+ };
+};
+
+#define DOSTID LPX64":"LPU64
+#define POSTID(oi) ostid_seq(oi), ostid_id(oi)
+
+/*
+ * The ioctl naming rules:
+ * LL_* - works on the currently opened filehandle instead of parent dir
+ * *_OBD_* - gets data for both OSC or MDC (LOV, LMV indirectly)
+ * *_MDC_* - gets/sets data related to MDC
+ * *_LOV_* - gets/sets data related to OSC/LOV
+ * *FILE* - called on parent dir and passes in a filename
+ * *STRIPE* - set/get lov_user_md
+ * *INFO - set/get lov_user_mds_data
+ */
+/* see <lustre_lib.h> for ioctl numberss 101-150 */
+#define LL_IOC_GETFLAGS _IOR ('f', 151, long)
+#define LL_IOC_SETFLAGS _IOW ('f', 152, long)
+#define LL_IOC_CLRFLAGS _IOW ('f', 153, long)
+/* LL_IOC_LOV_SETSTRIPE: See also OBD_IOC_LOV_SETSTRIPE */
+#define LL_IOC_LOV_SETSTRIPE _IOW ('f', 154, long)
+/* LL_IOC_LOV_GETSTRIPE: See also OBD_IOC_LOV_GETSTRIPE */
+#define LL_IOC_LOV_GETSTRIPE _IOW ('f', 155, long)
+/* LL_IOC_LOV_SETEA: See also OBD_IOC_LOV_SETEA */
+#define LL_IOC_LOV_SETEA _IOW ('f', 156, long)
+#define LL_IOC_RECREATE_OBJ _IOW ('f', 157, long)
+#define LL_IOC_RECREATE_FID _IOW ('f', 157, struct lu_fid)
+#define LL_IOC_GROUP_LOCK _IOW ('f', 158, long)
+#define LL_IOC_GROUP_UNLOCK _IOW ('f', 159, long)
+/* LL_IOC_QUOTACHECK: See also OBD_IOC_QUOTACHECK */
+#define LL_IOC_QUOTACHECK _IOW ('f', 160, int)
+/* LL_IOC_POLL_QUOTACHECK: See also OBD_IOC_POLL_QUOTACHECK */
+#define LL_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *)
+/* LL_IOC_QUOTACTL: See also OBD_IOC_QUOTACTL */
+#define LL_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl)
+#define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *)
+#define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *)
+#define LL_IOC_FLUSHCTX _IOW ('f', 166, long)
+#define LL_IOC_RMTACL _IOW ('f', 167, long)
+#define LL_IOC_GETOBDCOUNT _IOR ('f', 168, long)
+#define LL_IOC_LLOOP_ATTACH _IOWR('f', 169, long)
+#define LL_IOC_LLOOP_DETACH _IOWR('f', 170, long)
+#define LL_IOC_LLOOP_INFO _IOWR('f', 171, struct lu_fid)
+#define LL_IOC_LLOOP_DETACH_BYDEV _IOWR('f', 172, long)
+#define LL_IOC_PATH2FID _IOR ('f', 173, long)
+#define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *)
+#define LL_IOC_GET_MDTIDX _IOR ('f', 175, int)
+
+/* see <lustre_lib.h> for ioctl numbers 177-210 */
+
+#define LL_IOC_HSM_STATE_GET _IOR('f', 211, struct hsm_user_state)
+#define LL_IOC_HSM_STATE_SET _IOW('f', 212, struct hsm_state_set)
+#define LL_IOC_HSM_CT_START _IOW('f', 213, struct lustre_kernelcomm)
+#define LL_IOC_HSM_COPY_START _IOW('f', 214, struct hsm_copy *)
+#define LL_IOC_HSM_COPY_END _IOW('f', 215, struct hsm_copy *)
+#define LL_IOC_HSM_PROGRESS _IOW('f', 216, struct hsm_user_request)
+#define LL_IOC_HSM_REQUEST _IOW('f', 217, struct hsm_user_request)
+#define LL_IOC_DATA_VERSION _IOR('f', 218, struct ioc_data_version)
+#define LL_IOC_LOV_SWAP_LAYOUTS _IOW('f', 219, \
+ struct lustre_swap_layouts)
+#define LL_IOC_HSM_ACTION _IOR('f', 220, \
+ struct hsm_current_action)
+/* see <lustre_lib.h> for ioctl numbers 221-232 */
+
+#define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md)
+#define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md)
+#define LL_IOC_REMOVE_ENTRY _IOWR('f', 242, __u64)
+
+#define LL_STATFS_LMV 1
+#define LL_STATFS_LOV 2
+#define LL_STATFS_NODELAY 4
+
+#define IOC_MDC_TYPE 'i'
+#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
+#define IOC_MDC_GETFILESTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *)
+#define IOC_MDC_GETFILEINFO _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *)
+#define LL_IOC_MDC_GETINFO _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *)
+
+/* Keep these for backward compartability. */
+#define LL_IOC_OBD_STATFS IOC_OBD_STATFS
+#define IOC_MDC_GETSTRIPE IOC_MDC_GETFILESTRIPE
+
+
+#define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */
+
+/* Hopefully O_LOV_DELAY_CREATE does not conflict with standard O_xxx flags.
+ * Previously it was defined as 0100000000 and conflicts with FMODE_NONOTIFY
+ * which was added since kernel 2.6.36, so we redefine it as 020000000.
+ * To be compatible with old version's statically linked binary, finally we
+ * define it as (020000000 | 0100000000).
+ * */
+#define O_LOV_DELAY_CREATE 0120000000
+
+#define LL_FILE_IGNORE_LOCK 0x00000001
+#define LL_FILE_GROUP_LOCKED 0x00000002
+#define LL_FILE_READAHEA 0x00000004
+#define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */
+#define LL_FILE_LOCKLESS_IO 0x00000010 /* server-side locks with cio */
+#define LL_FILE_RMTACL 0x00000020
+
+#define LOV_USER_MAGIC_V1 0x0BD10BD0
+#define LOV_USER_MAGIC LOV_USER_MAGIC_V1
+#define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0
+#define LOV_USER_MAGIC_V3 0x0BD30BD0
+
+#define LMV_MAGIC_V1 0x0CD10CD0 /*normal stripe lmv magic */
+#define LMV_USER_MAGIC 0x0CD20CD0 /*default lmv magic*/
+
+#define LOV_PATTERN_RAID0 0x001
+#define LOV_PATTERN_RAID1 0x002
+#define LOV_PATTERN_FIRST 0x100
+
+#define LOV_MAXPOOLNAME 16
+#define LOV_POOLNAMEF "%.16s"
+
+#define LOV_MIN_STRIPE_BITS 16 /* maximum PAGE_SIZE (ia64), power of 2 */
+#define LOV_MIN_STRIPE_SIZE (1 << LOV_MIN_STRIPE_BITS)
+#define LOV_MAX_STRIPE_COUNT_OLD 160
+/* This calculation is crafted so that input of 4096 will result in 160
+ * which in turn is equal to old maximal stripe count.
+ * XXX: In fact this is too simpified for now, what it also need is to get
+ * ea_type argument to clearly know how much space each stripe consumes.
+ *
+ * The limit of 12 pages is somewhat arbitrary, but is a reasonably large
+ * allocation that is sufficient for the current generation of systems.
+ *
+ * (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1) */
+#define LOV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */
+#define LOV_ALL_STRIPES 0xffff /* only valid for directories */
+#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
+
+#define lov_user_ost_data lov_user_ost_data_v1
+struct lov_user_ost_data_v1 { /* per-stripe data structure */
+ struct ost_id l_ost_oi; /* OST object ID */
+ __u32 l_ost_gen; /* generation of this OST index */
+ __u32 l_ost_idx; /* OST index in LOV */
+} __attribute__((packed));
+
+#define lov_user_md lov_user_md_v1
+struct lov_user_md_v1 { /* LOV EA user data (host-endian) */
+ __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V1 */
+ __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
+ struct ost_id lmm_oi; /* LOV object ID */
+ __u32 lmm_stripe_size; /* size of stripe in bytes */
+ __u16 lmm_stripe_count; /* num stripes in use for this object */
+ union {
+ __u16 lmm_stripe_offset; /* starting stripe offset in
+ * lmm_objects, use when writing */
+ __u16 lmm_layout_gen; /* layout generation number
+ * used when reading */
+ };
+ struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
+} __attribute__((packed, __may_alias__));
+
+struct lov_user_md_v3 { /* LOV EA user data (host-endian) */
+ __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V3 */
+ __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
+ struct ost_id lmm_oi; /* LOV object ID */
+ __u32 lmm_stripe_size; /* size of stripe in bytes */
+ __u16 lmm_stripe_count; /* num stripes in use for this object */
+ union {
+ __u16 lmm_stripe_offset; /* starting stripe offset in
+ * lmm_objects, use when writing */
+ __u16 lmm_layout_gen; /* layout generation number
+ * used when reading */
+ };
+ char lmm_pool_name[LOV_MAXPOOLNAME]; /* pool name */
+ struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
+} __attribute__((packed));
+
+/* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
+ * use this. It is unsafe to #define those values in this header as it
+ * is possible the application has already #included <sys/stat.h>. */
+#ifdef HAVE_LOV_USER_MDS_DATA
+#define lov_user_mds_data lov_user_mds_data_v1
+struct lov_user_mds_data_v1 {
+ lstat_t lmd_st; /* MDS stat struct */
+ struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */
+} __attribute__((packed));
+
+struct lov_user_mds_data_v3 {
+ lstat_t lmd_st; /* MDS stat struct */
+ struct lov_user_md_v3 lmd_lmm; /* LOV EA V3 user data */
+} __attribute__((packed));
+#endif
+
+/* keep this to be the same size as lov_user_ost_data_v1 */
+struct lmv_user_mds_data {
+ struct lu_fid lum_fid;
+ __u32 lum_padding;
+ __u32 lum_mds;
+};
+
+/* lum_type */
+enum {
+ LMV_STRIPE_TYPE = 0,
+ LMV_DEFAULT_TYPE = 1,
+};
+
+#define lmv_user_md lmv_user_md_v1
+struct lmv_user_md_v1 {
+ __u32 lum_magic; /* must be the first field */
+ __u32 lum_stripe_count; /* dirstripe count */
+ __u32 lum_stripe_offset; /* MDT idx for default dirstripe */
+ __u32 lum_hash_type; /* Dir stripe policy */
+ __u32 lum_type; /* LMV type: default or normal */
+ __u32 lum_padding1;
+ __u32 lum_padding2;
+ __u32 lum_padding3;
+ char lum_pool_name[LOV_MAXPOOLNAME];
+ struct lmv_user_mds_data lum_objects[0];
+};
+
+static inline int lmv_user_md_size(int stripes, int lmm_magic)
+{
+ return sizeof(struct lmv_user_md) +
+ stripes * sizeof(struct lmv_user_mds_data);
+}
+
+extern void lustre_swab_lmv_user_md(struct lmv_user_md *lum);
+
+struct ll_recreate_obj {
+ __u64 lrc_id;
+ __u32 lrc_ost_idx;
+};
+
+struct ll_fid {
+ __u64 id; /* holds object id */
+ __u32 generation; /* holds object generation */
+ __u32 f_type; /* holds object type or stripe idx when passing it to
+ * OST for saving into EA. */
+};
+
+#define UUID_MAX 40
+struct obd_uuid {
+ char uuid[UUID_MAX];
+};
+
+static inline int obd_uuid_equals(const struct obd_uuid *u1,
+ const struct obd_uuid *u2)
+{
+ return strcmp((char *)u1->uuid, (char *)u2->uuid) == 0;
+}
+
+static inline int obd_uuid_empty(struct obd_uuid *uuid)
+{
+ return uuid->uuid[0] == '\0';
+}
+
+static inline void obd_str2uuid(struct obd_uuid *uuid, const char *tmp)
+{
+ strncpy((char *)uuid->uuid, tmp, sizeof(*uuid));
+ uuid->uuid[sizeof(*uuid) - 1] = '\0';
+}
+
+/* For printf's only, make sure uuid is terminated */
+static inline char *obd_uuid2str(struct obd_uuid *uuid)
+{
+ if (uuid->uuid[sizeof(*uuid) - 1] != '\0') {
+ /* Obviously not safe, but for printfs, no real harm done...
+ we're always null-terminated, even in a race. */
+ static char temp[sizeof(*uuid)];
+ memcpy(temp, uuid->uuid, sizeof(*uuid) - 1);
+ temp[sizeof(*uuid) - 1] = '\0';
+ return temp;
+ }
+ return (char *)(uuid->uuid);
+}
+
+/* Extract fsname from uuid (or target name) of a target
+ e.g. (myfs-OST0007_UUID -> myfs)
+ see also deuuidify. */
+static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
+{
+ char *p;
+
+ strncpy(buf, uuid, buflen - 1);
+ buf[buflen - 1] = '\0';
+ p = strrchr(buf, '-');
+ if (p)
+ *p = '\0';
+}
+
+/* printf display format
+ e.g. printf("file FID is "DFID"\n", PFID(fid)); */
+#define DFID_NOBRACE LPX64":0x%x:0x%x"
+#define DFID "["DFID_NOBRACE"]"
+#define PFID(fid) \
+ (fid)->f_seq, \
+ (fid)->f_oid, \
+ (fid)->f_ver
+
+/* scanf input parse format -- strip '[' first.
+ e.g. sscanf(fidstr, SFID, RFID(&fid)); */
+/* #define SFID "0x"LPX64i":0x"LPSZX":0x"LPSZX""
+liblustreapi.c:2893: warning: format '%lx' expects type 'long unsigned int *', but argument 4 has type 'unsigned int *'
+liblustreapi.c:2893: warning: format '%lx' expects type 'long unsigned int *', but argument 5 has type 'unsigned int *'
+*/
+#define SFID "0x"LPX64i":0x%x:0x%x"
+#define RFID(fid) \
+ &((fid)->f_seq), \
+ &((fid)->f_oid), \
+ &((fid)->f_ver)
+
+
+/********* Quotas **********/
+
+/* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
+#define LUSTRE_Q_QUOTAON 0x800002 /* turn quotas on */
+#define LUSTRE_Q_QUOTAOFF 0x800003 /* turn quotas off */
+#define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */
+#define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */
+#define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */
+#define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */
+/* lustre-specific control commands */
+#define LUSTRE_Q_INVALIDATE 0x80000b /* invalidate quota data */
+#define LUSTRE_Q_FINVALIDATE 0x80000c /* invalidate filter quota data */
+
+#define UGQUOTA 2 /* set both USRQUOTA and GRPQUOTA */
+
+struct if_quotacheck {
+ char obd_type[16];
+ struct obd_uuid obd_uuid;
+};
+
+#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629
+
+/* permission */
+#define N_PERMS_MAX 64
+
+struct perm_downcall_data {
+ __u64 pdd_nid;
+ __u32 pdd_perm;
+ __u32 pdd_padding;
+};
+
+struct identity_downcall_data {
+ __u32 idd_magic;
+ __u32 idd_err;
+ __u32 idd_uid;
+ __u32 idd_gid;
+ __u32 idd_nperms;
+ __u32 idd_ngroups;
+ struct perm_downcall_data idd_perms[N_PERMS_MAX];
+ __u32 idd_groups[0];
+};
+
+/* for non-mapped uid/gid */
+#define NOBODY_UID 99
+#define NOBODY_GID 99
+
+#define INVALID_ID (-1)
+
+enum {
+ RMT_LSETFACL = 1,
+ RMT_LGETFACL = 2,
+ RMT_RSETFACL = 3,
+ RMT_RGETFACL = 4
+};
+
+#ifdef NEED_QUOTA_DEFS
+#ifndef QIF_BLIMITS
+#define QIF_BLIMITS 1
+#define QIF_SPACE 2
+#define QIF_ILIMITS 4
+#define QIF_INODES 8
+#define QIF_BTIME 16
+#define QIF_ITIME 32
+#define QIF_LIMITS (QIF_BLIMITS | QIF_ILIMITS)
+#define QIF_USAGE (QIF_SPACE | QIF_INODES)
+#define QIF_TIMES (QIF_BTIME | QIF_ITIME)
+#define QIF_ALL (QIF_LIMITS | QIF_USAGE | QIF_TIMES)
+#endif
+
+#endif /* !__KERNEL__ */
+
+/* lustre volatile file support
+ * file name header: .^L^S^T^R:volatile"
+ */
+#define LUSTRE_VOLATILE_HDR ".\x0c\x13\x14\x12:VOLATILE"
+#define LUSTRE_VOLATILE_HDR_LEN 14
+/* hdr + MDT index */
+#define LUSTRE_VOLATILE_IDX LUSTRE_VOLATILE_HDR":%.4X:"
+
+typedef enum lustre_quota_version {
+ LUSTRE_QUOTA_V2 = 1
+} lustre_quota_version_t;
+
+/* XXX: same as if_dqinfo struct in kernel */
+struct obd_dqinfo {
+ __u64 dqi_bgrace;
+ __u64 dqi_igrace;
+ __u32 dqi_flags;
+ __u32 dqi_valid;
+};
+
+/* XXX: same as if_dqblk struct in kernel, plus one padding */
+struct obd_dqblk {
+ __u64 dqb_bhardlimit;
+ __u64 dqb_bsoftlimit;
+ __u64 dqb_curspace;
+ __u64 dqb_ihardlimit;
+ __u64 dqb_isoftlimit;
+ __u64 dqb_curinodes;
+ __u64 dqb_btime;
+ __u64 dqb_itime;
+ __u32 dqb_valid;
+ __u32 dqb_padding;
+};
+
+enum {
+ QC_GENERAL = 0,
+ QC_MDTIDX = 1,
+ QC_OSTIDX = 2,
+ QC_UUID = 3
+};
+
+struct if_quotactl {
+ __u32 qc_cmd;
+ __u32 qc_type;
+ __u32 qc_id;
+ __u32 qc_stat;
+ __u32 qc_valid;
+ __u32 qc_idx;
+ struct obd_dqinfo qc_dqinfo;
+ struct obd_dqblk qc_dqblk;
+ char obd_type[16];
+ struct obd_uuid obd_uuid;
+};
+
+/* swap layout flags */
+#define SWAP_LAYOUTS_CHECK_DV1 (1 << 0)
+#define SWAP_LAYOUTS_CHECK_DV2 (1 << 1)
+#define SWAP_LAYOUTS_KEEP_MTIME (1 << 2)
+#define SWAP_LAYOUTS_KEEP_ATIME (1 << 3)
+struct lustre_swap_layouts {
+ __u64 sl_flags;
+ __u32 sl_fd;
+ __u32 sl_gid;
+ __u64 sl_dv1;
+ __u64 sl_dv2;
+};
+
+
+/********* Changelogs **********/
+/** Changelog record types */
+enum changelog_rec_type {
+ CL_MARK = 0,
+ CL_CREATE = 1, /* namespace */
+ CL_MKDIR = 2, /* namespace */
+ CL_HARDLINK = 3, /* namespace */
+ CL_SOFTLINK = 4, /* namespace */
+ CL_MKNOD = 5, /* namespace */
+ CL_UNLINK = 6, /* namespace */
+ CL_RMDIR = 7, /* namespace */
+ CL_RENAME = 8, /* namespace */
+ CL_EXT = 9, /* namespace extended record (2nd half of rename) */
+ CL_OPEN = 10, /* not currently used */
+ CL_CLOSE = 11, /* may be written to log only with mtime change */
+ CL_LAYOUT = 12, /* file layout/striping modified */
+ CL_TRUNC = 13,
+ CL_SETATTR = 14,
+ CL_XATTR = 15,
+ CL_HSM = 16, /* HSM specific events, see flags */
+ CL_MTIME = 17, /* Precedence: setattr > mtime > ctime > atime */
+ CL_CTIME = 18,
+ CL_ATIME = 19,
+ CL_LAST
+};
+
+static inline const char *changelog_type2str(int type) {
+ static const char *changelog_str[] = {
+ "MARK", "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK",
+ "RMDIR", "RENME", "RNMTO", "OPEN", "CLOSE", "LYOUT", "TRUNC",
+ "SATTR", "XATTR", "HSM", "MTIME", "CTIME", "ATIME",
+ };
+
+ if (type >= 0 && type < CL_LAST)
+ return changelog_str[type];
+ return NULL;
+}
+
+/* per-record flags */
+#define CLF_VERSION 0x1000
+#define CLF_EXT_VERSION 0x2000
+#define CLF_FLAGSHIFT 12
+#define CLF_FLAGMASK ((1U << CLF_FLAGSHIFT) - 1)
+#define CLF_VERMASK (~CLF_FLAGMASK)
+/* Anything under the flagmask may be per-type (if desired) */
+/* Flags for unlink */
+#define CLF_UNLINK_LAST 0x0001 /* Unlink of last hardlink */
+#define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */
+ /* HSM cleaning needed */
+/* Flags for rename */
+#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of target */
+
+/* Flags for HSM */
+/* 12b used (from high weight to low weight):
+ * 2b for flags
+ * 3b for event
+ * 7b for error code
+ */
+#define CLF_HSM_ERR_L 0 /* HSM return code, 7 bits */
+#define CLF_HSM_ERR_H 6
+#define CLF_HSM_EVENT_L 7 /* HSM event, 3 bits, see enum hsm_event */
+#define CLF_HSM_EVENT_H 9
+#define CLF_HSM_FLAG_L 10 /* HSM flags, 2 bits, 1 used, 1 spare */
+#define CLF_HSM_FLAG_H 11
+#define CLF_HSM_SPARE_L 12 /* 4 spare bits */
+#define CLF_HSM_SPARE_H 15
+#define CLF_HSM_LAST 15
+
+/* Remove bits higher than _h, then extract the value
+ * between _h and _l by shifting lower weigth to bit 0. */
+#define CLF_GET_BITS(_b, _h, _l) (((_b << (CLF_HSM_LAST - _h)) & 0xFFFF) \
+ >> (CLF_HSM_LAST - _h + _l))
+
+#define CLF_HSM_SUCCESS 0x00
+#define CLF_HSM_MAXERROR 0x7E
+#define CLF_HSM_ERROVERFLOW 0x7F
+
+#define CLF_HSM_DIRTY 1 /* file is dirty after HSM request end */
+
+/* 3 bits field => 8 values allowed */
+enum hsm_event {
+ HE_ARCHIVE = 0,
+ HE_RESTORE = 1,
+ HE_CANCEL = 2,
+ HE_RELEASE = 3,
+ HE_REMOVE = 4,
+ HE_STATE = 5,
+ HE_SPARE1 = 6,
+ HE_SPARE2 = 7,
+};
+
+static inline enum hsm_event hsm_get_cl_event(__u16 flags)
+{
+ return CLF_GET_BITS(flags, CLF_HSM_EVENT_H, CLF_HSM_EVENT_L);
+}
+
+static inline void hsm_set_cl_event(int *flags, enum hsm_event he)
+{
+ *flags |= (he << CLF_HSM_EVENT_L);
+}
+
+static inline __u16 hsm_get_cl_flags(int flags)
+{
+ return CLF_GET_BITS(flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L);
+}
+
+static inline void hsm_set_cl_flags(int *flags, int bits)
+{
+ *flags |= (bits << CLF_HSM_FLAG_L);
+}
+
+static inline int hsm_get_cl_error(int flags)
+{
+ return CLF_GET_BITS(flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L);
+}
+
+static inline void hsm_set_cl_error(int *flags, int error)
+{
+ *flags |= (error << CLF_HSM_ERR_L);
+}
+
+#define CR_MAXSIZE cfs_size_round(2*NAME_MAX + 1 + sizeof(struct changelog_rec))
+
+struct changelog_rec {
+ __u16 cr_namelen;
+ __u16 cr_flags; /**< (flags&CLF_FLAGMASK)|CLF_VERSION */
+ __u32 cr_type; /**< \a changelog_rec_type */
+ __u64 cr_index; /**< changelog record number */
+ __u64 cr_prev; /**< last index for this target fid */
+ __u64 cr_time;
+ union {
+ lustre_fid cr_tfid; /**< target fid */
+ __u32 cr_markerflags; /**< CL_MARK flags */
+ };
+ lustre_fid cr_pfid; /**< parent fid */
+ char cr_name[0]; /**< last element */
+} __attribute__((packed));
+
+/* changelog_ext_rec is 2*sizeof(lu_fid) bigger than changelog_rec, to save
+ * space, only rename uses changelog_ext_rec, while others use changelog_rec to
+ * store records.
+ */
+struct changelog_ext_rec {
+ __u16 cr_namelen;
+ __u16 cr_flags; /**< (flags & CLF_FLAGMASK) |
+ CLF_EXT_VERSION */
+ __u32 cr_type; /**< \a changelog_rec_type */
+ __u64 cr_index; /**< changelog record number */
+ __u64 cr_prev; /**< last index for this target fid */
+ __u64 cr_time;
+ union {
+ lustre_fid cr_tfid; /**< target fid */
+ __u32 cr_markerflags; /**< CL_MARK flags */
+ };
+ lustre_fid cr_pfid; /**< target parent fid */
+ lustre_fid cr_sfid; /**< source fid, or zero */
+ lustre_fid cr_spfid; /**< source parent fid, or zero */
+ char cr_name[0]; /**< last element */
+} __attribute__((packed));
+
+#define CHANGELOG_REC_EXTENDED(rec) \
+ (((rec)->cr_flags & CLF_VERMASK) == CLF_EXT_VERSION)
+
+static inline int changelog_rec_size(struct changelog_rec *rec)
+{
+ return CHANGELOG_REC_EXTENDED(rec) ? sizeof(struct changelog_ext_rec):
+ sizeof(*rec);
+}
+
+static inline char *changelog_rec_name(struct changelog_rec *rec)
+{
+ return CHANGELOG_REC_EXTENDED(rec) ?
+ ((struct changelog_ext_rec *)rec)->cr_name: rec->cr_name;
+}
+
+static inline int changelog_rec_snamelen(struct changelog_ext_rec *rec)
+{
+ return rec->cr_namelen - strlen(rec->cr_name) - 1;
+}
+
+static inline char *changelog_rec_sname(struct changelog_ext_rec *rec)
+{
+ return rec->cr_name + strlen(rec->cr_name) + 1;
+}
+
+struct ioc_changelog {
+ __u64 icc_recno;
+ __u32 icc_mdtindex;
+ __u32 icc_id;
+ __u32 icc_flags;
+};
+
+enum changelog_message_type {
+ CL_RECORD = 10, /* message is a changelog_rec */
+ CL_EOF = 11, /* at end of current changelog */
+};
+
+/********* Misc **********/
+
+struct ioc_data_version {
+ __u64 idv_version;
+ __u64 idv_flags; /* See LL_DV_xxx */
+};
+#define LL_DV_NOFLUSH 0x01 /* Do not take READ EXTENT LOCK before sampling
+ version. Dirty caches are left unchanged. */
+
+#ifndef offsetof
+# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
+#endif
+
+#define dot_lustre_name ".lustre"
+
+
+/********* HSM **********/
+
+/** HSM per-file state
+ * See HSM_FLAGS below.
+ */
+enum hsm_states {
+ HS_EXISTS = 0x00000001,
+ HS_DIRTY = 0x00000002,
+ HS_RELEASED = 0x00000004,
+ HS_ARCHIVED = 0x00000008,
+ HS_NORELEASE = 0x00000010,
+ HS_NOARCHIVE = 0x00000020,
+ HS_LOST = 0x00000040,
+};
+
+/* HSM user-setable flags. */
+#define HSM_USER_MASK (HS_NORELEASE | HS_NOARCHIVE | HS_DIRTY)
+
+/* Other HSM flags. */
+#define HSM_STATUS_MASK (HS_EXISTS | HS_LOST | HS_RELEASED | HS_ARCHIVED)
+
+/*
+ * All HSM-related possible flags that could be applied to a file.
+ * This should be kept in sync with hsm_states.
+ */
+#define HSM_FLAGS_MASK (HSM_USER_MASK | HSM_STATUS_MASK)
+
+/**
+ * HSM request progress state
+ */
+enum hsm_progress_states {
+ HPS_WAITING = 1,
+ HPS_RUNNING = 2,
+ HPS_DONE = 3,
+};
+#define HPS_NONE 0
+
+static inline char *hsm_progress_state2name(enum hsm_progress_states s)
+{
+ switch (s) {
+ case HPS_WAITING: return "waiting";
+ case HPS_RUNNING: return "running";
+ case HPS_DONE: return "done";
+ default: return "unknown";
+ }
+}
+
+struct hsm_extent {
+ __u64 offset;
+ __u64 length;
+} __attribute__((packed));
+
+/**
+ * Current HSM states of a Lustre file.
+ *
+ * This structure purpose is to be sent to user-space mainly. It describes the
+ * current HSM flags and in-progress action.
+ */
+struct hsm_user_state {
+ /** Current HSM states, from enum hsm_states. */
+ __u32 hus_states;
+ __u32 hus_archive_id;
+ /** The current undergoing action, if there is one */
+ __u32 hus_in_progress_state;
+ __u32 hus_in_progress_action;
+ struct hsm_extent hus_in_progress_location;
+ char hus_extended_info[];
+};
+
+struct hsm_state_set_ioc {
+ struct lu_fid hssi_fid;
+ __u64 hssi_setmask;
+ __u64 hssi_clearmask;
+};
+
+/*
+ * This structure describes the current in-progress action for a file.
+ * it is retuned to user space and send over the wire
+ */
+struct hsm_current_action {
+ /** The current undergoing action, if there is one */
+ /* state is one of hsm_progress_states */
+ __u32 hca_state;
+ /* action is one of hsm_user_action */
+ __u32 hca_action;
+ struct hsm_extent hca_location;
+};
+
+/***** HSM user requests ******/
+/* User-generated (lfs/ioctl) request types */
+enum hsm_user_action {
+ HUA_NONE = 1, /* no action (noop) */
+ HUA_ARCHIVE = 10, /* copy to hsm */
+ HUA_RESTORE = 11, /* prestage */
+ HUA_RELEASE = 12, /* drop ost objects */
+ HUA_REMOVE = 13, /* remove from archive */
+ HUA_CANCEL = 14 /* cancel a request */
+};
+
+static inline char *hsm_user_action2name(enum hsm_user_action a)
+{
+ switch (a) {
+ case HUA_NONE: return "NOOP";
+ case HUA_ARCHIVE: return "ARCHIVE";
+ case HUA_RESTORE: return "RESTORE";
+ case HUA_RELEASE: return "RELEASE";
+ case HUA_REMOVE: return "REMOVE";
+ case HUA_CANCEL: return "CANCEL";
+ default: return "UNKNOWN";
+ }
+}
+
+/*
+ * List of hr_flags (bit field)
+ */
+#define HSM_FORCE_ACTION 0x0001
+/* used by CT, connot be set by user */
+#define HSM_GHOST_COPY 0x0002
+
+/**
+ * Contains all the fixed part of struct hsm_user_request.
+ *
+ */
+struct hsm_request {
+ __u32 hr_action; /* enum hsm_user_action */
+ __u32 hr_archive_id; /* archive id, used only with HUA_ARCHIVE */
+ __u64 hr_flags; /* request flags */
+ __u32 hr_itemcount; /* item count in hur_user_item vector */
+ __u32 hr_data_len;
+};
+
+struct hsm_user_item {
+ lustre_fid hui_fid;
+ struct hsm_extent hui_extent;
+} __attribute__((packed));
+
+struct hsm_user_request {
+ struct hsm_request hur_request;
+ struct hsm_user_item hur_user_item[0];
+ /* extra data blob at end of struct (after all
+ * hur_user_items), only use helpers to access it
+ */
+} __attribute__((packed));
+
+/** Return pointer to data field in a hsm user request */
+static inline void *hur_data(struct hsm_user_request *hur)
+{
+ return &(hur->hur_user_item[hur->hur_request.hr_itemcount]);
+}
+
+/** Compute the current length of the provided hsm_user_request. */
+static inline int hur_len(struct hsm_user_request *hur)
+{
+ return offsetof(struct hsm_user_request,
+ hur_user_item[hur->hur_request.hr_itemcount]) +
+ hur->hur_request.hr_data_len;
+}
+
+/****** HSM RPCs to copytool *****/
+/* Message types the copytool may receive */
+enum hsm_message_type {
+ HMT_ACTION_LIST = 100, /* message is a hsm_action_list */
+};
+
+/* Actions the copytool may be instructed to take for a given action_item */
+enum hsm_copytool_action {
+ HSMA_NONE = 10, /* no action */
+ HSMA_ARCHIVE = 20, /* arbitrary offset */
+ HSMA_RESTORE = 21,
+ HSMA_REMOVE = 22,
+ HSMA_CANCEL = 23
+};
+
+static inline char *hsm_copytool_action2name(enum hsm_copytool_action a)
+{
+ switch (a) {
+ case HSMA_NONE: return "NOOP";
+ case HSMA_ARCHIVE: return "ARCHIVE";
+ case HSMA_RESTORE: return "RESTORE";
+ case HSMA_REMOVE: return "REMOVE";
+ case HSMA_CANCEL: return "CANCEL";
+ default: return "UNKNOWN";
+ }
+}
+
+/* Copytool item action description */
+struct hsm_action_item {
+ __u32 hai_len; /* valid size of this struct */
+ __u32 hai_action; /* hsm_copytool_action, but use known size */
+ lustre_fid hai_fid; /* Lustre FID to operated on */
+ lustre_fid hai_dfid; /* fid used for data access */
+ struct hsm_extent hai_extent; /* byte range to operate on */
+ __u64 hai_cookie; /* action cookie from coordinator */
+ __u64 hai_gid; /* grouplock id */
+ char hai_data[0]; /* variable length */
+} __attribute__((packed));
+
+/*
+ * helper function which print in hexa the first bytes of
+ * hai opaque field
+ * \param hai [IN] record to print
+ * \param buffer [OUT] output buffer
+ * \param len [IN] max buffer len
+ * \retval buffer
+ */
+static inline char *hai_dump_data_field(struct hsm_action_item *hai,
+ char *buffer, int len)
+{
+ int i, sz, data_len;
+ char *ptr;
+
+ ptr = buffer;
+ sz = len;
+ data_len = hai->hai_len - sizeof(*hai);
+ for (i = 0 ; (i < data_len) && (sz > 0) ; i++)
+ {
+ int cnt;
+
+ cnt = snprintf(ptr, sz, "%.2X",
+ (unsigned char)hai->hai_data[i]);
+ ptr += cnt;
+ sz -= cnt;
+ }
+ *ptr = '\0';
+ return buffer;
+}
+
+/* Copytool action list */
+#define HAL_VERSION 1
+#define HAL_MAXSIZE LNET_MTU /* bytes, used in userspace only */
+struct hsm_action_list {
+ __u32 hal_version;
+ __u32 hal_count; /* number of hai's to follow */
+ __u64 hal_compound_id; /* returned by coordinator */
+ __u64 hal_flags;
+ __u32 hal_archive_id; /* which archive backend */
+ __u32 padding1;
+ char hal_fsname[0]; /* null-terminated */
+ /* struct hsm_action_item[hal_count] follows, aligned on 8-byte
+ boundaries. See hai_zero */
+} __attribute__((packed));
+
+#ifndef HAVE_CFS_SIZE_ROUND
+static inline int cfs_size_round (int val)
+{
+ return (val + 7) & (~0x7);
+}
+#define HAVE_CFS_SIZE_ROUND
+#endif
+
+/* Return pointer to first hai in action list */
+static inline struct hsm_action_item * hai_zero(struct hsm_action_list *hal)
+{
+ return (struct hsm_action_item *)(hal->hal_fsname +
+ cfs_size_round(strlen(hal-> \
+ hal_fsname)));
+}
+/* Return pointer to next hai */
+static inline struct hsm_action_item * hai_next(struct hsm_action_item *hai)
+{
+ return (struct hsm_action_item *)((char *)hai +
+ cfs_size_round(hai->hai_len));
+}
+
+/* Return size of an hsm_action_list */
+static inline int hal_size(struct hsm_action_list *hal)
+{
+ int i, sz;
+ struct hsm_action_item *hai;
+
+ sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname));
+ hai = hai_zero(hal);
+ for (i = 0 ; i < hal->hal_count ; i++) {
+ sz += cfs_size_round(hai->hai_len);
+ hai = hai_next(hai);
+ }
+ return(sz);
+}
+
+/* Copytool progress reporting */
+#define HP_FLAG_COMPLETED 0x01
+#define HP_FLAG_RETRY 0x02
+
+struct hsm_progress {
+ lustre_fid hp_fid;
+ __u64 hp_cookie;
+ struct hsm_extent hp_extent;
+ __u16 hp_flags;
+ __u16 hp_errval; /* positive val */
+ __u32 padding;
+};
+
+/**
+ * Use by copytool during any hsm request they handled.
+ * This structure is initialized by llapi_hsm_copy_start()
+ * which is an helper over the ioctl() interface
+ * Store Lustre, internal use only, data.
+ */
+struct hsm_copy {
+ __u64 hc_data_version;
+ __u16 hc_flags;
+ __u16 hc_errval; /* positive val */
+ __u32 padding;
+ struct hsm_action_item hc_hai;
+};
+
+/** @} lustreuser */
+
+#endif /* _LUSTRE_USER_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustreapi.h b/drivers/staging/lustre/lustre/include/lustre/lustreapi.h
new file mode 100644
index 000000000000..63da66506639
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/lustreapi.h
@@ -0,0 +1,310 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LUSTREAPI_H_
+#define _LUSTREAPI_H_
+
+/** \defgroup llapi llapi
+ *
+ * @{
+ */
+
+#include <lustre/lustre_user.h>
+
+typedef void (*llapi_cb_t)(char *obd_type_name, char *obd_name, char *obd_uuid, void *args);
+
+/* lustreapi message severity level */
+enum llapi_message_level {
+ LLAPI_MSG_OFF = 0,
+ LLAPI_MSG_FATAL = 1,
+ LLAPI_MSG_ERROR = 2,
+ LLAPI_MSG_WARN = 3,
+ LLAPI_MSG_NORMAL = 4,
+ LLAPI_MSG_INFO = 5,
+ LLAPI_MSG_DEBUG = 6,
+ LLAPI_MSG_MAX
+};
+
+/* the bottom three bits reserved for llapi_message_level */
+#define LLAPI_MSG_MASK 0x00000007
+#define LLAPI_MSG_NO_ERRNO 0x00000010
+
+extern void llapi_msg_set_level(int level);
+extern void llapi_error(int level, int rc, char *fmt, ...);
+#define llapi_err_noerrno(level, fmt, a...) \
+ llapi_error((level) | LLAPI_MSG_NO_ERRNO, 0, fmt, ## a)
+extern void llapi_printf(int level, char *fmt, ...);
+extern int llapi_file_create(const char *name, unsigned long long stripe_size,
+ int stripe_offset, int stripe_count,
+ int stripe_pattern);
+extern int llapi_file_open(const char *name, int flags, int mode,
+ unsigned long long stripe_size, int stripe_offset,
+ int stripe_count, int stripe_pattern);
+extern int llapi_file_create_pool(const char *name,
+ unsigned long long stripe_size,
+ int stripe_offset, int stripe_count,
+ int stripe_pattern, char *pool_name);
+extern int llapi_file_open_pool(const char *name, int flags, int mode,
+ unsigned long long stripe_size,
+ int stripe_offset, int stripe_count,
+ int stripe_pattern, char *pool_name);
+extern int llapi_poollist(const char *name);
+extern int llapi_get_poollist(const char *name, char **poollist, int list_size,
+ char *buffer, int buffer_size);
+extern int llapi_get_poolmembers(const char *poolname, char **members,
+ int list_size, char *buffer, int buffer_size);
+extern int llapi_file_get_stripe(const char *path, struct lov_user_md *lum);
+#define HAVE_LLAPI_FILE_LOOKUP
+extern int llapi_file_lookup(int dirfd, const char *name);
+
+#define VERBOSE_COUNT 0x1
+#define VERBOSE_SIZE 0x2
+#define VERBOSE_OFFSET 0x4
+#define VERBOSE_POOL 0x8
+#define VERBOSE_DETAIL 0x10
+#define VERBOSE_OBJID 0x20
+#define VERBOSE_GENERATION 0x40
+#define VERBOSE_MDTINDEX 0x80
+#define VERBOSE_ALL (VERBOSE_COUNT | VERBOSE_SIZE | VERBOSE_OFFSET | \
+ VERBOSE_POOL | VERBOSE_OBJID | VERBOSE_GENERATION)
+
+struct find_param {
+ unsigned int maxdepth;
+ time_t atime;
+ time_t mtime;
+ time_t ctime;
+ int asign; /* cannot be bitfields due to using pointers to */
+ int csign; /* access them during argument parsing. */
+ int msign;
+ int type;
+ int size_sign:2, /* these need to be signed values */
+ stripesize_sign:2,
+ stripecount_sign:2;
+ unsigned long long size;
+ unsigned long long size_units;
+ uid_t uid;
+ gid_t gid;
+
+ unsigned long zeroend:1,
+ recursive:1,
+ exclude_pattern:1,
+ exclude_type:1,
+ exclude_obd:1,
+ exclude_mdt:1,
+ exclude_gid:1,
+ exclude_uid:1,
+ check_gid:1, /* group ID */
+ check_uid:1, /* user ID */
+ check_pool:1, /* LOV pool name */
+ check_size:1, /* file size */
+ exclude_pool:1,
+ exclude_size:1,
+ exclude_atime:1,
+ exclude_mtime:1,
+ exclude_ctime:1,
+ get_lmv:1, /* get MDT list from LMV */
+ raw:1, /* do not fill in defaults */
+ check_stripesize:1, /* LOV stripe size */
+ exclude_stripesize:1,
+ check_stripecount:1, /* LOV stripe count */
+ exclude_stripecount:1;
+
+ int verbose;
+ int quiet;
+
+ /* regular expression */
+ char *pattern;
+
+ char *print_fmt;
+
+ struct obd_uuid *obduuid;
+ int num_obds;
+ int num_alloc_obds;
+ int obdindex;
+ int *obdindexes;
+
+ struct obd_uuid *mdtuuid;
+ int num_mdts;
+ int num_alloc_mdts;
+ int mdtindex;
+ int *mdtindexes;
+ int file_mdtindex;
+
+ int lumlen;
+ struct lov_user_mds_data *lmd;
+
+ char poolname[LOV_MAXPOOLNAME + 1];
+
+ int fp_lmv_count;
+ struct lmv_user_md *fp_lmv_md;
+
+ unsigned long long stripesize;
+ unsigned long long stripesize_units;
+ unsigned long long stripecount;
+
+ /* In-process parameters. */
+ unsigned long got_uuids:1,
+ obds_printed:1,
+ have_fileinfo:1; /* file attrs and LOV xattr */
+ unsigned int depth;
+ dev_t st_dev;
+};
+
+extern int llapi_ostlist(char *path, struct find_param *param);
+extern int llapi_uuid_match(char *real_uuid, char *search_uuid);
+extern int llapi_getstripe(char *path, struct find_param *param);
+extern int llapi_find(char *path, struct find_param *param);
+
+extern int llapi_file_fget_mdtidx(int fd, int *mdtidx);
+extern int llapi_dir_create_pool(const char *name, int flags, int stripe_offset,
+ int stripe_count, int stripe_pattern,
+ char *poolname);
+int llapi_direntry_remove(char *dname);
+extern int llapi_obd_statfs(char *path, __u32 type, __u32 index,
+ struct obd_statfs *stat_buf,
+ struct obd_uuid *uuid_buf);
+extern int llapi_ping(char *obd_type, char *obd_name);
+extern int llapi_target_check(int num_types, char **obd_types, char *dir);
+extern int llapi_file_get_lov_uuid(const char *path, struct obd_uuid *lov_uuid);
+extern int llapi_file_get_lmv_uuid(const char *path, struct obd_uuid *lmv_uuid);
+extern int llapi_file_fget_lov_uuid(int fd, struct obd_uuid *lov_uuid);
+extern int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count);
+extern int llapi_lmv_get_uuids(int fd, struct obd_uuid *uuidp, int *mdt_count);
+extern int llapi_is_lustre_mnttype(const char *type);
+extern int llapi_search_ost(char *fsname, char *poolname, char *ostname);
+extern int llapi_get_obd_count(char *mnt, int *count, int is_mdt);
+extern int parse_size(char *optarg, unsigned long long *size,
+ unsigned long long *size_units, int bytes_spec);
+extern int llapi_search_mounts(const char *pathname, int index,
+ char *mntdir, char *fsname);
+extern int llapi_search_fsname(const char *pathname, char *fsname);
+extern int llapi_getname(const char *path, char *buf, size_t size);
+
+extern void llapi_ping_target(char *obd_type, char *obd_name,
+ char *obd_uuid, void *args);
+
+extern int llapi_search_rootpath(char *pathname, const char *fsname);
+
+struct mntent;
+#define HAVE_LLAPI_IS_LUSTRE_MNT
+extern int llapi_is_lustre_mnt(struct mntent *mnt);
+extern int llapi_quotachown(char *path, int flag);
+extern int llapi_quotacheck(char *mnt, int check_type);
+extern int llapi_poll_quotacheck(char *mnt, struct if_quotacheck *qchk);
+extern int llapi_quotactl(char *mnt, struct if_quotactl *qctl);
+extern int llapi_target_iterate(int type_num, char **obd_type, void *args,
+ llapi_cb_t cb);
+extern int llapi_get_connect_flags(const char *mnt, __u64 *flags);
+extern int llapi_lsetfacl(int argc, char *argv[]);
+extern int llapi_lgetfacl(int argc, char *argv[]);
+extern int llapi_rsetfacl(int argc, char *argv[]);
+extern int llapi_rgetfacl(int argc, char *argv[]);
+extern int llapi_cp(int argc, char *argv[]);
+extern int llapi_ls(int argc, char *argv[]);
+extern int llapi_fid2path(const char *device, const char *fidstr, char *path,
+ int pathlen, long long *recno, int *linkno);
+extern int llapi_path2fid(const char *path, lustre_fid *fid);
+extern int llapi_fd2fid(const int fd, lustre_fid *fid);
+
+extern int llapi_get_version(char *buffer, int buffer_size, char **version);
+extern int llapi_get_data_version(int fd, __u64 *data_version, __u64 flags);
+extern int llapi_hsm_state_get(const char *path, struct hsm_user_state *hus);
+extern int llapi_hsm_state_set(const char *path, __u64 setmask, __u64 clearmask,
+ __u32 archive_id);
+
+extern int llapi_create_volatile_idx(char *directory, int idx, int mode);
+static inline int llapi_create_volatile(char *directory, int mode)
+{
+ return llapi_create_volatile_idx(directory, -1, mode);
+}
+
+
+extern int llapi_fswap_layouts(const int fd1, const int fd2,
+ __u64 dv1, __u64 dv2, __u64 flags);
+extern int llapi_swap_layouts(const char *path1, const char *path2,
+ __u64 dv1, __u64 dv2, __u64 flags);
+
+/* Changelog interface. priv is private state, managed internally
+ by these functions */
+#define CHANGELOG_FLAG_FOLLOW 0x01 /* Not yet implemented */
+#define CHANGELOG_FLAG_BLOCK 0x02 /* Blocking IO makes sense in case of
+ slow user parsing of the records, but it also prevents us from cleaning
+ up if the records are not consumed. */
+
+/* Records received are in extentded format now, though most of them are still
+ * written in disk in changelog_rec format (to save space and time), it's
+ * converted to extented format in the lustre api to ease changelog analysis. */
+#define HAVE_CHANGELOG_EXTEND_REC 1
+
+extern int llapi_changelog_start(void **priv, int flags, const char *mdtname,
+ long long startrec);
+extern int llapi_changelog_fini(void **priv);
+extern int llapi_changelog_recv(void *priv, struct changelog_ext_rec **rech);
+extern int llapi_changelog_free(struct changelog_ext_rec **rech);
+/* Allow records up to endrec to be destroyed; requires registered id. */
+extern int llapi_changelog_clear(const char *mdtname, const char *idstr,
+ long long endrec);
+
+/* HSM copytool interface.
+ * priv is private state, managed internally by these functions
+ */
+struct hsm_copytool_private;
+extern int llapi_hsm_copytool_start(struct hsm_copytool_private **priv,
+ char *fsname, int flags,
+ int archive_count, int *archives);
+extern int llapi_hsm_copytool_fini(struct hsm_copytool_private **priv);
+extern int llapi_hsm_copytool_recv(struct hsm_copytool_private *priv,
+ struct hsm_action_list **hal, int *msgsize);
+extern int llapi_hsm_copytool_free(struct hsm_action_list **hal);
+extern int llapi_hsm_copy_start(char *mnt, struct hsm_copy *copy,
+ const struct hsm_action_item *hai);
+extern int llapi_hsm_copy_end(char *mnt, struct hsm_copy *copy,
+ const struct hsm_progress *hp);
+extern int llapi_hsm_progress(char *mnt, struct hsm_progress *hp);
+extern int llapi_hsm_import(const char *dst, int archive, struct stat *st,
+ unsigned long long stripe_size, int stripe_offset,
+ int stripe_count, int stripe_pattern,
+ char *pool_name, lustre_fid *newfid);
+
+/* HSM user interface */
+extern struct hsm_user_request *llapi_hsm_user_request_alloc(int itemcount,
+ int data_len);
+extern int llapi_hsm_request(char *mnt, struct hsm_user_request *request);
+extern int llapi_hsm_current_action(const char *path,
+ struct hsm_current_action *hca);
+/** @} llapi */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_acl.h b/drivers/staging/lustre/lustre/include/lustre_acl.h
new file mode 100644
index 000000000000..5cfb87b180c3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_acl.h
@@ -0,0 +1,42 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_acl.h
+ */
+
+#ifndef _LUSTRE_ACL_H
+#define _LUSTRE_ACL_H
+
+#include <linux/lustre_acl.h>
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_capa.h b/drivers/staging/lustre/lustre/include/lustre_capa.h
new file mode 100644
index 000000000000..d77bffc0b59d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_capa.h
@@ -0,0 +1,305 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_capa.h
+ *
+ * Author: Lai Siyao <lsy@clusterfs.com>
+ */
+
+#ifndef __LINUX_CAPA_H_
+#define __LINUX_CAPA_H_
+
+/** \defgroup capa capa
+ *
+ * @{
+ */
+
+/*
+ * capability
+ */
+#include <linux/crypto.h>
+#include <lustre/lustre_idl.h>
+
+#define CAPA_TIMEOUT 1800 /* sec, == 30 min */
+#define CAPA_KEY_TIMEOUT (24 * 60 * 60) /* sec, == 1 days */
+
+struct capa_hmac_alg {
+ const char *ha_name;
+ int ha_len;
+ int ha_keylen;
+};
+
+#define DEF_CAPA_HMAC_ALG(name, type, len, keylen) \
+[CAPA_HMAC_ALG_ ## type] = { \
+ .ha_name = name, \
+ .ha_len = len, \
+ .ha_keylen = keylen, \
+}
+
+struct client_capa {
+ struct inode *inode;
+ struct list_head lli_list; /* link to lli_oss_capas */
+};
+
+struct target_capa {
+ struct hlist_node c_hash; /* link to capa hash */
+};
+
+struct obd_capa {
+ struct list_head c_list; /* link to capa_list */
+
+ struct lustre_capa c_capa; /* capa */
+ atomic_t c_refc; /* ref count */
+ cfs_time_t c_expiry; /* jiffies */
+ spinlock_t c_lock; /* protect capa content */
+ int c_site;
+
+ union {
+ struct client_capa cli;
+ struct target_capa tgt;
+ } u;
+};
+
+enum {
+ CAPA_SITE_CLIENT = 0,
+ CAPA_SITE_SERVER,
+ CAPA_SITE_MAX
+};
+
+static inline struct lu_fid *capa_fid(struct lustre_capa *capa)
+{
+ return &capa->lc_fid;
+}
+
+static inline __u64 capa_opc(struct lustre_capa *capa)
+{
+ return capa->lc_opc;
+}
+
+static inline __u64 capa_uid(struct lustre_capa *capa)
+{
+ return capa->lc_uid;
+}
+
+static inline __u64 capa_gid(struct lustre_capa *capa)
+{
+ return capa->lc_gid;
+}
+
+static inline __u32 capa_flags(struct lustre_capa *capa)
+{
+ return capa->lc_flags & 0xffffff;
+}
+
+static inline __u32 capa_alg(struct lustre_capa *capa)
+{
+ return (capa->lc_flags >> 24);
+}
+
+static inline __u32 capa_keyid(struct lustre_capa *capa)
+{
+ return capa->lc_keyid;
+}
+
+static inline __u64 capa_key_seq(struct lustre_capa_key *key)
+{
+ return key->lk_seq;
+}
+
+static inline __u32 capa_key_keyid(struct lustre_capa_key *key)
+{
+ return key->lk_keyid;
+}
+
+static inline __u32 capa_timeout(struct lustre_capa *capa)
+{
+ return capa->lc_timeout;
+}
+
+static inline __u32 capa_expiry(struct lustre_capa *capa)
+{
+ return capa->lc_expiry;
+}
+
+void _debug_capa(struct lustre_capa *, struct libcfs_debug_msg_data *,
+ const char *fmt, ... );
+#define DEBUG_CAPA(level, capa, fmt, args...) \
+do { \
+ if (((level) & D_CANTMASK) != 0 || \
+ ((libcfs_debug & (level)) != 0 && \
+ (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, NULL); \
+ _debug_capa((capa), &msgdata, fmt, ##args); \
+ } \
+} while (0)
+
+#define DEBUG_CAPA_KEY(level, k, fmt, args...) \
+do { \
+CDEBUG(level, fmt " capability key@%p seq "LPU64" keyid %u\n", \
+ ##args, k, capa_key_seq(k), capa_key_keyid(k)); \
+} while (0)
+
+typedef int (* renew_capa_cb_t)(struct obd_capa *, struct lustre_capa *);
+
+/* obdclass/capa.c */
+extern struct list_head capa_list[];
+extern spinlock_t capa_lock;
+extern int capa_count[];
+extern struct kmem_cache *capa_cachep;
+
+struct hlist_head *init_capa_hash(void);
+void cleanup_capa_hash(struct hlist_head *hash);
+
+struct obd_capa *capa_add(struct hlist_head *hash,
+ struct lustre_capa *capa);
+struct obd_capa *capa_lookup(struct hlist_head *hash,
+ struct lustre_capa *capa, int alive);
+
+int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key);
+int capa_encrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen);
+int capa_decrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen);
+void capa_cpy(void *dst, struct obd_capa *ocapa);
+static inline struct obd_capa *alloc_capa(int site)
+{
+ struct obd_capa *ocapa;
+
+ if (unlikely(site != CAPA_SITE_CLIENT && site != CAPA_SITE_SERVER))
+ return ERR_PTR(-EINVAL);
+
+ OBD_SLAB_ALLOC_PTR(ocapa, capa_cachep);
+ if (unlikely(!ocapa))
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&ocapa->c_list);
+ atomic_set(&ocapa->c_refc, 1);
+ spin_lock_init(&ocapa->c_lock);
+ ocapa->c_site = site;
+ if (ocapa->c_site == CAPA_SITE_CLIENT)
+ INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
+ else
+ INIT_HLIST_NODE(&ocapa->u.tgt.c_hash);
+
+ return ocapa;
+}
+
+static inline struct obd_capa *capa_get(struct obd_capa *ocapa)
+{
+ if (!ocapa)
+ return NULL;
+
+ atomic_inc(&ocapa->c_refc);
+ return ocapa;
+}
+
+static inline void capa_put(struct obd_capa *ocapa)
+{
+ if (!ocapa)
+ return;
+
+ if (atomic_read(&ocapa->c_refc) == 0) {
+ DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "refc is 0 for");
+ LBUG();
+ }
+
+ if (atomic_dec_and_test(&ocapa->c_refc)) {
+ LASSERT(list_empty(&ocapa->c_list));
+ if (ocapa->c_site == CAPA_SITE_CLIENT) {
+ LASSERT(list_empty(&ocapa->u.cli.lli_list));
+ } else {
+ struct hlist_node *hnode;
+
+ hnode = &ocapa->u.tgt.c_hash;
+ LASSERT(!hnode->next && !hnode->pprev);
+ }
+ OBD_SLAB_FREE(ocapa, capa_cachep, sizeof(*ocapa));
+ }
+}
+
+static inline int open_flags_to_accmode(int flags)
+{
+ int mode = flags;
+
+ if ((mode + 1) & O_ACCMODE)
+ mode++;
+ if (mode & O_TRUNC)
+ mode |= 2;
+
+ return mode;
+}
+
+static inline __u64 capa_open_opc(int mode)
+{
+ return mode & FMODE_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_READ;
+}
+
+static inline void set_capa_expiry(struct obd_capa *ocapa)
+{
+ cfs_time_t expiry = cfs_time_sub((cfs_time_t)ocapa->c_capa.lc_expiry,
+ cfs_time_current_sec());
+ ocapa->c_expiry = cfs_time_add(cfs_time_current(),
+ cfs_time_seconds(expiry));
+}
+
+static inline int capa_is_expired_sec(struct lustre_capa *capa)
+{
+ return (capa->lc_expiry - cfs_time_current_sec() <= 0);
+}
+
+static inline int capa_is_expired(struct obd_capa *ocapa)
+{
+ return cfs_time_beforeq(ocapa->c_expiry, cfs_time_current());
+}
+
+static inline int capa_opc_supported(struct lustre_capa *capa, __u64 opc)
+{
+ return (capa_opc(capa) & opc) == opc;
+}
+
+struct filter_capa_key {
+ struct list_head k_list;
+ struct lustre_capa_key k_key;
+};
+
+enum {
+ LC_ID_NONE = 0,
+ LC_ID_PLAIN = 1,
+ LC_ID_CONVERT = 2
+};
+
+#define BYPASS_CAPA (struct lustre_capa *)ERR_PTR(-ENOENT)
+
+/** @} capa */
+
+#endif /* __LINUX_CAPA_H_ */
diff --git a/drivers/staging/lustre/lustre/include/lustre_cfg.h b/drivers/staging/lustre/lustre/include/lustre_cfg.h
new file mode 100644
index 000000000000..f12429f38215
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_cfg.h
@@ -0,0 +1,299 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LUSTRE_CFG_H
+#define _LUSTRE_CFG_H
+
+/** \defgroup cfg cfg
+ *
+ * @{
+ */
+
+/*
+ * 1cf6
+ * lcfG
+ */
+#define LUSTRE_CFG_VERSION 0x1cf60001
+#define LUSTRE_CFG_MAX_BUFCOUNT 8
+
+#define LCFG_HDR_SIZE(count) \
+ cfs_size_round(offsetof (struct lustre_cfg, lcfg_buflens[(count)]))
+
+/** If the LCFG_REQUIRED bit is set in a configuration command,
+ * then the client is required to understand this parameter
+ * in order to mount the filesystem. If it does not understand
+ * a REQUIRED command the client mount will fail. */
+#define LCFG_REQUIRED 0x0001000
+
+enum lcfg_command_type {
+ LCFG_ATTACH = 0x00cf001, /**< create a new obd instance */
+ LCFG_DETACH = 0x00cf002, /**< destroy obd instance */
+ LCFG_SETUP = 0x00cf003, /**< call type-specific setup */
+ LCFG_CLEANUP = 0x00cf004, /**< call type-specific cleanup */
+ LCFG_ADD_UUID = 0x00cf005, /**< add a nid to a niduuid */
+ LCFG_DEL_UUID = 0x00cf006, /**< remove a nid from a niduuid */
+ LCFG_MOUNTOPT = 0x00cf007, /**< create a profile (mdc, osc) */
+ LCFG_DEL_MOUNTOPT = 0x00cf008, /**< destroy a profile */
+ LCFG_SET_TIMEOUT = 0x00cf009, /**< set obd_timeout */
+ LCFG_SET_UPCALL = 0x00cf00a, /**< deprecated */
+ LCFG_ADD_CONN = 0x00cf00b, /**< add a failover niduuid to an obd */
+ LCFG_DEL_CONN = 0x00cf00c, /**< remove a failover niduuid */
+ LCFG_LOV_ADD_OBD = 0x00cf00d, /**< add an osc to a lov */
+ LCFG_LOV_DEL_OBD = 0x00cf00e, /**< remove an osc from a lov */
+ LCFG_PARAM = 0x00cf00f, /**< set a proc parameter */
+ LCFG_MARKER = 0x00cf010, /**< metadata about next cfg rec */
+ LCFG_LOG_START = 0x00ce011, /**< mgc only, process a cfg log */
+ LCFG_LOG_END = 0x00ce012, /**< stop processing updates */
+ LCFG_LOV_ADD_INA = 0x00ce013, /**< like LOV_ADD_OBD, inactive */
+ LCFG_ADD_MDC = 0x00cf014, /**< add an mdc to a lmv */
+ LCFG_DEL_MDC = 0x00cf015, /**< remove an mdc from a lmv */
+ LCFG_SPTLRPC_CONF = 0x00ce016, /**< security */
+ LCFG_POOL_NEW = 0x00ce020, /**< create an ost pool name */
+ LCFG_POOL_ADD = 0x00ce021, /**< add an ost to a pool */
+ LCFG_POOL_REM = 0x00ce022, /**< remove an ost from a pool */
+ LCFG_POOL_DEL = 0x00ce023, /**< destroy an ost pool name */
+ LCFG_SET_LDLM_TIMEOUT = 0x00ce030, /**< set ldlm_timeout */
+ LCFG_PRE_CLEANUP = 0x00cf031, /**< call type-specific pre
+ * cleanup cleanup */
+};
+
+struct lustre_cfg_bufs {
+ void *lcfg_buf[LUSTRE_CFG_MAX_BUFCOUNT];
+ __u32 lcfg_buflen[LUSTRE_CFG_MAX_BUFCOUNT];
+ __u32 lcfg_bufcount;
+};
+
+struct lustre_cfg {
+ __u32 lcfg_version;
+ __u32 lcfg_command;
+
+ __u32 lcfg_num;
+ __u32 lcfg_flags;
+ __u64 lcfg_nid;
+ __u32 lcfg_nal; /* not used any more */
+
+ __u32 lcfg_bufcount;
+ __u32 lcfg_buflens[0];
+};
+
+enum cfg_record_type {
+ PORTALS_CFG_TYPE = 1,
+ LUSTRE_CFG_TYPE = 123,
+};
+
+#define LUSTRE_CFG_BUFLEN(lcfg, idx) \
+ ((lcfg)->lcfg_bufcount <= (idx) \
+ ? 0 \
+ : (lcfg)->lcfg_buflens[(idx)])
+
+static inline void lustre_cfg_bufs_set(struct lustre_cfg_bufs *bufs,
+ __u32 index,
+ void *buf,
+ __u32 buflen)
+{
+ if (index >= LUSTRE_CFG_MAX_BUFCOUNT)
+ return;
+ if (bufs == NULL)
+ return;
+
+ if (bufs->lcfg_bufcount <= index)
+ bufs->lcfg_bufcount = index + 1;
+
+ bufs->lcfg_buf[index] = buf;
+ bufs->lcfg_buflen[index] = buflen;
+}
+
+static inline void lustre_cfg_bufs_set_string(struct lustre_cfg_bufs *bufs,
+ __u32 index,
+ char *str)
+{
+ lustre_cfg_bufs_set(bufs, index, str, str ? strlen(str) + 1 : 0);
+}
+
+static inline void lustre_cfg_bufs_reset(struct lustre_cfg_bufs *bufs, char *name)
+{
+ memset((bufs), 0, sizeof(*bufs));
+ if (name)
+ lustre_cfg_bufs_set_string(bufs, 0, name);
+}
+
+static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, int index)
+{
+ int i;
+ int offset;
+ int bufcount;
+ LASSERT (lcfg != NULL);
+ LASSERT (index >= 0);
+
+ bufcount = lcfg->lcfg_bufcount;
+ if (index >= bufcount)
+ return NULL;
+
+ offset = LCFG_HDR_SIZE(lcfg->lcfg_bufcount);
+ for (i = 0; i < index; i++)
+ offset += cfs_size_round(lcfg->lcfg_buflens[i]);
+ return (char *)lcfg + offset;
+}
+
+static inline void lustre_cfg_bufs_init(struct lustre_cfg_bufs *bufs,
+ struct lustre_cfg *lcfg)
+{
+ int i;
+ bufs->lcfg_bufcount = lcfg->lcfg_bufcount;
+ for (i = 0; i < bufs->lcfg_bufcount; i++) {
+ bufs->lcfg_buflen[i] = lcfg->lcfg_buflens[i];
+ bufs->lcfg_buf[i] = lustre_cfg_buf(lcfg, i);
+ }
+}
+
+static inline char *lustre_cfg_string(struct lustre_cfg *lcfg, int index)
+{
+ char *s;
+
+ if (lcfg->lcfg_buflens[index] == 0)
+ return NULL;
+
+ s = lustre_cfg_buf(lcfg, index);
+ if (s == NULL)
+ return NULL;
+
+ /*
+ * make sure it's NULL terminated, even if this kills a char
+ * of data. Try to use the padding first though.
+ */
+ if (s[lcfg->lcfg_buflens[index] - 1] != '\0') {
+ int last = min((int)lcfg->lcfg_buflens[index],
+ cfs_size_round(lcfg->lcfg_buflens[index]) - 1);
+ char lost = s[last];
+ s[last] = '\0';
+ if (lost != '\0') {
+ CWARN("Truncated buf %d to '%s' (lost '%c'...)\n",
+ index, s, lost);
+ }
+ }
+ return s;
+}
+
+static inline int lustre_cfg_len(__u32 bufcount, __u32 *buflens)
+{
+ int i;
+ int len;
+ ENTRY;
+
+ len = LCFG_HDR_SIZE(bufcount);
+ for (i = 0; i < bufcount; i++)
+ len += cfs_size_round(buflens[i]);
+
+ RETURN(cfs_size_round(len));
+}
+
+
+#include <obd_support.h>
+
+static inline struct lustre_cfg *lustre_cfg_new(int cmd,
+ struct lustre_cfg_bufs *bufs)
+{
+ struct lustre_cfg *lcfg;
+ char *ptr;
+ int i;
+
+ ENTRY;
+
+ OBD_ALLOC(lcfg, lustre_cfg_len(bufs->lcfg_bufcount,
+ bufs->lcfg_buflen));
+ if (!lcfg)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ lcfg->lcfg_version = LUSTRE_CFG_VERSION;
+ lcfg->lcfg_command = cmd;
+ lcfg->lcfg_bufcount = bufs->lcfg_bufcount;
+
+ ptr = (char *)lcfg + LCFG_HDR_SIZE(lcfg->lcfg_bufcount);
+ for (i = 0; i < lcfg->lcfg_bufcount; i++) {
+ lcfg->lcfg_buflens[i] = bufs->lcfg_buflen[i];
+ LOGL((char *)bufs->lcfg_buf[i], bufs->lcfg_buflen[i], ptr);
+ }
+ RETURN(lcfg);
+}
+
+static inline void lustre_cfg_free(struct lustre_cfg *lcfg)
+{
+ int len;
+
+ len = lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens);
+
+ OBD_FREE(lcfg, len);
+ EXIT;
+ return;
+}
+
+static inline int lustre_cfg_sanity_check(void *buf, int len)
+{
+ struct lustre_cfg *lcfg = (struct lustre_cfg *)buf;
+ ENTRY;
+ if (!lcfg)
+ RETURN(-EINVAL);
+
+ /* check that the first bits of the struct are valid */
+ if (len < LCFG_HDR_SIZE(0))
+ RETURN(-EINVAL);
+
+ if (lcfg->lcfg_version != LUSTRE_CFG_VERSION)
+ RETURN(-EINVAL);
+
+ if (lcfg->lcfg_bufcount >= LUSTRE_CFG_MAX_BUFCOUNT)
+ RETURN(-EINVAL);
+
+ /* check that the buflens are valid */
+ if (len < LCFG_HDR_SIZE(lcfg->lcfg_bufcount))
+ RETURN(-EINVAL);
+
+ /* make sure all the pointers point inside the data */
+ if (len < lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens))
+ RETURN(-EINVAL);
+
+ RETURN(0);
+}
+
+#include <lustre/lustre_user.h>
+
+#ifndef INVALID_UID
+#define INVALID_UID (-1)
+#endif
+
+/** @} cfg */
+
+#endif // _LUSTRE_CFG_H
diff --git a/drivers/staging/lustre/lustre/include/lustre_debug.h b/drivers/staging/lustre/lustre/include/lustre_debug.h
new file mode 100644
index 000000000000..3d9e4462af43
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_debug.h
@@ -0,0 +1,76 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LUSTRE_DEBUG_H
+#define _LUSTRE_DEBUG_H
+
+/** \defgroup debug debug
+ *
+ * @{
+ */
+
+#include <lustre_net.h>
+#include <obd.h>
+
+#include <linux/lustre_debug.h>
+
+#define ASSERT_MAX_SIZE_MB 60000ULL
+#define ASSERT_PAGE_INDEX(index, OP) \
+do { if (index > ASSERT_MAX_SIZE_MB << (20 - PAGE_CACHE_SHIFT)) { \
+ CERROR("bad page index %lu > %llu\n", index, \
+ ASSERT_MAX_SIZE_MB << (20 - PAGE_CACHE_SHIFT)); \
+ libcfs_debug = ~0UL; \
+ OP; \
+}} while(0)
+
+#define ASSERT_FILE_OFFSET(offset, OP) \
+do { if (offset > ASSERT_MAX_SIZE_MB << 20) { \
+ CERROR("bad file offset %llu > %llu\n", offset, \
+ ASSERT_MAX_SIZE_MB << 20); \
+ libcfs_debug = ~0UL; \
+ OP; \
+}} while(0)
+
+/* lib/debug.c */
+void dump_lniobuf(struct niobuf_local *lnb);
+int dump_req(struct ptlrpc_request *req);
+void dump_lsm(int level, struct lov_stripe_md *lsm);
+int block_debug_setup(void *addr, int len, __u64 off, __u64 id);
+int block_debug_check(char *who, void *addr, int len, __u64 off, __u64 id);
+
+/** @} debug */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h
new file mode 100644
index 000000000000..8db6086ea4ea
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_disk.h
@@ -0,0 +1,543 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_disk.h
+ *
+ * Lustre disk format definitions.
+ *
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ */
+
+#ifndef _LUSTRE_DISK_H
+#define _LUSTRE_DISK_H
+
+/** \defgroup disk disk
+ *
+ * @{
+ */
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/types.h>
+
+/****************** on-disk files *********************/
+
+#define MDT_LOGS_DIR "LOGS" /* COMPAT_146 */
+#define MOUNT_CONFIGS_DIR "CONFIGS"
+#define CONFIGS_FILE "mountdata"
+/** Persistent mount data are stored on the disk in this file. */
+#define MOUNT_DATA_FILE MOUNT_CONFIGS_DIR"/"CONFIGS_FILE
+#define LAST_RCVD "last_rcvd"
+#define LOV_OBJID "lov_objid"
+#define LOV_OBJSEQ "lov_objseq"
+#define HEALTH_CHECK "health_check"
+#define CAPA_KEYS "capa_keys"
+#define CHANGELOG_USERS "changelog_users"
+#define MGS_NIDTBL_DIR "NIDTBL_VERSIONS"
+#define QMT_DIR "quota_master"
+#define QSD_DIR "quota_slave"
+
+/****************** persistent mount data *********************/
+
+#define LDD_F_SV_TYPE_MDT 0x0001
+#define LDD_F_SV_TYPE_OST 0x0002
+#define LDD_F_SV_TYPE_MGS 0x0004
+#define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT | \
+ LDD_F_SV_TYPE_OST | \
+ LDD_F_SV_TYPE_MGS)
+#define LDD_F_SV_ALL 0x0008
+/** need an index assignment */
+#define LDD_F_NEED_INDEX 0x0010
+/** never registered */
+#define LDD_F_VIRGIN 0x0020
+/** update the config logs for this server */
+#define LDD_F_UPDATE 0x0040
+/** rewrite the LDD */
+#define LDD_F_REWRITE_LDD 0x0080
+/** regenerate config logs for this fs or server */
+#define LDD_F_WRITECONF 0x0100
+/** COMPAT_14 */
+#define LDD_F_UPGRADE14 0x0200
+/** process as lctl conf_param */
+#define LDD_F_PARAM 0x0400
+/** all nodes are specified as service nodes */
+#define LDD_F_NO_PRIMNODE 0x1000
+/** IR enable flag */
+#define LDD_F_IR_CAPABLE 0x2000
+/** the MGS refused to register the target. */
+#define LDD_F_ERROR 0x4000
+
+/* opc for target register */
+#define LDD_F_OPC_REG 0x10000000
+#define LDD_F_OPC_UNREG 0x20000000
+#define LDD_F_OPC_READY 0x40000000
+#define LDD_F_OPC_MASK 0xf0000000
+
+#define LDD_F_ONDISK_MASK (LDD_F_SV_TYPE_MASK)
+
+#define LDD_F_MASK 0xFFFF
+
+enum ldd_mount_type {
+ LDD_MT_EXT3 = 0,
+ LDD_MT_LDISKFS,
+ LDD_MT_SMFS,
+ LDD_MT_REISERFS,
+ LDD_MT_LDISKFS2,
+ LDD_MT_ZFS,
+ LDD_MT_LAST
+};
+
+static inline char *mt_str(enum ldd_mount_type mt)
+{
+ static char *mount_type_string[] = {
+ "ext3",
+ "ldiskfs",
+ "smfs",
+ "reiserfs",
+ "ldiskfs2",
+ "zfs",
+ };
+ return mount_type_string[mt];
+}
+
+static inline char *mt_type(enum ldd_mount_type mt)
+{
+ static char *mount_type_string[] = {
+ "osd-ldiskfs",
+ "osd-ldiskfs",
+ "osd-smfs",
+ "osd-reiserfs",
+ "osd-ldiskfs",
+ "osd-zfs",
+ };
+ return mount_type_string[mt];
+}
+
+#define LDD_INCOMPAT_SUPP 0
+#define LDD_ROCOMPAT_SUPP 0
+
+#define LDD_MAGIC 0x1dd00001
+
+/* On-disk configuration file. In host-endian order. */
+struct lustre_disk_data {
+ __u32 ldd_magic;
+ __u32 ldd_feature_compat; /* compatible feature flags */
+ __u32 ldd_feature_rocompat;/* read-only compatible feature flags */
+ __u32 ldd_feature_incompat;/* incompatible feature flags */
+
+ __u32 ldd_config_ver; /* config rewrite count - not used */
+ __u32 ldd_flags; /* LDD_SV_TYPE */
+ __u32 ldd_svindex; /* server index (0001), must match
+ svname */
+ __u32 ldd_mount_type; /* target fs type LDD_MT_* */
+ char ldd_fsname[64]; /* filesystem this server is part of,
+ MTI_NAME_MAXLEN */
+ char ldd_svname[64]; /* this server's name (lustre-mdt0001)*/
+ __u8 ldd_uuid[40]; /* server UUID (COMPAT_146) */
+
+/*200*/ char ldd_userdata[1024 - 200]; /* arbitrary user string */
+/*1024*/__u8 ldd_padding[4096 - 1024];
+/*4096*/char ldd_mount_opts[4096]; /* target fs mount opts */
+/*8192*/char ldd_params[4096]; /* key=value pairs */
+};
+
+
+#define IS_MDT(data) ((data)->lsi_flags & LDD_F_SV_TYPE_MDT)
+#define IS_OST(data) ((data)->lsi_flags & LDD_F_SV_TYPE_OST)
+#define IS_MGS(data) ((data)->lsi_flags & LDD_F_SV_TYPE_MGS)
+#define IS_SERVER(data) ((data)->lsi_flags & (LDD_F_SV_TYPE_MGS | \
+ LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST))
+#define MT_STR(data) mt_str((data)->ldd_mount_type)
+
+/* Make the mdt/ost server obd name based on the filesystem name */
+static inline int server_make_name(__u32 flags, __u16 index, char *fs,
+ char *name)
+{
+ if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) {
+ if (!(flags & LDD_F_SV_ALL))
+ sprintf(name, "%.8s%c%s%04x", fs,
+ (flags & LDD_F_VIRGIN) ? ':' :
+ ((flags & LDD_F_WRITECONF) ? '=' : '-'),
+ (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST",
+ index);
+ } else if (flags & LDD_F_SV_TYPE_MGS) {
+ sprintf(name, "MGS");
+ } else {
+ CERROR("unknown server type %#x\n", flags);
+ return 1;
+ }
+ return 0;
+}
+
+/****************** mount command *********************/
+
+/* The lmd is only used internally by Lustre; mount simply passes
+ everything as string options */
+
+#define LMD_MAGIC 0xbdacbd03
+
+/* gleaned from the mount command - no persistent info here */
+struct lustre_mount_data {
+ __u32 lmd_magic;
+ __u32 lmd_flags; /* lustre mount flags */
+ int lmd_mgs_failnodes; /* mgs failover node count */
+ int lmd_exclude_count;
+ int lmd_recovery_time_soft;
+ int lmd_recovery_time_hard;
+ char *lmd_dev; /* device name */
+ char *lmd_profile; /* client only */
+ char *lmd_mgssec; /* sptlrpc flavor to mgs */
+ char *lmd_opts; /* lustre mount options (as opposed to
+ _device_ mount options) */
+ char *lmd_params; /* lustre params */
+ __u32 *lmd_exclude; /* array of OSTs to ignore */
+ char *lmd_mgs; /* MGS nid */
+ char *lmd_osd_type; /* OSD type */
+};
+
+#define LMD_FLG_SERVER 0x0001 /* Mounting a server */
+#define LMD_FLG_CLIENT 0x0002 /* Mounting a client */
+#define LMD_FLG_ABORT_RECOV 0x0008 /* Abort recovery */
+#define LMD_FLG_NOSVC 0x0010 /* Only start MGS/MGC for servers,
+ no other services */
+#define LMD_FLG_NOMGS 0x0020 /* Only start target for servers, reusing
+ existing MGS services */
+#define LMD_FLG_WRITECONF 0x0040 /* Rewrite config log */
+#define LMD_FLG_NOIR 0x0080 /* NO imperative recovery */
+#define LMD_FLG_NOSCRUB 0x0100 /* Do not trigger scrub automatically */
+#define LMD_FLG_MGS 0x0200 /* Also start MGS along with server */
+#define LMD_FLG_IAM 0x0400 /* IAM dir */
+#define LMD_FLG_NO_PRIMNODE 0x0800 /* all nodes are service nodes */
+#define LMD_FLG_VIRGIN 0x1000 /* the service registers first time */
+#define LMD_FLG_UPDATE 0x2000 /* update parameters */
+
+#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT)
+
+
+/****************** last_rcvd file *********************/
+
+/** version recovery epoch */
+#define LR_EPOCH_BITS 32
+#define lr_epoch(a) ((a) >> LR_EPOCH_BITS)
+#define LR_EXPIRE_INTERVALS 16 /**< number of intervals to track transno */
+#define ENOENT_VERSION 1 /** 'virtual' version of non-existent object */
+
+#define LR_SERVER_SIZE 512
+#define LR_CLIENT_START 8192
+#define LR_CLIENT_SIZE 128
+#if LR_CLIENT_START < LR_SERVER_SIZE
+#error "Can't have LR_CLIENT_START < LR_SERVER_SIZE"
+#endif
+
+/*
+ * This limit is arbitrary (131072 clients on x86), but it is convenient to use
+ * 2^n * PAGE_CACHE_SIZE * 8 for the number of bits that fit an order-n allocation.
+ * If we need more than 131072 clients (order-2 allocation on x86) then this
+ * should become an array of single-page pointers that are allocated on demand.
+ */
+#if (128 * 1024UL) > (PAGE_CACHE_SIZE * 8)
+#define LR_MAX_CLIENTS (128 * 1024UL)
+#else
+#define LR_MAX_CLIENTS (PAGE_CACHE_SIZE * 8)
+#endif
+
+/** COMPAT_146: this is an OST (temporary) */
+#define OBD_COMPAT_OST 0x00000002
+/** COMPAT_146: this is an MDT (temporary) */
+#define OBD_COMPAT_MDT 0x00000004
+/** 2.0 server, interop flag to show server version is changed */
+#define OBD_COMPAT_20 0x00000008
+
+/** MDS handles LOV_OBJID file */
+#define OBD_ROCOMPAT_LOVOBJID 0x00000001
+
+/** OST handles group subdirs */
+#define OBD_INCOMPAT_GROUPS 0x00000001
+/** this is an OST */
+#define OBD_INCOMPAT_OST 0x00000002
+/** this is an MDT */
+#define OBD_INCOMPAT_MDT 0x00000004
+/** common last_rvcd format */
+#define OBD_INCOMPAT_COMMON_LR 0x00000008
+/** FID is enabled */
+#define OBD_INCOMPAT_FID 0x00000010
+/** Size-on-MDS is enabled */
+#define OBD_INCOMPAT_SOM 0x00000020
+/** filesystem using iam format to store directory entries */
+#define OBD_INCOMPAT_IAM_DIR 0x00000040
+/** LMA attribute contains per-inode incompatible flags */
+#define OBD_INCOMPAT_LMA 0x00000080
+/** lmm_stripe_count has been shrunk from __u32 to __u16 and the remaining 16
+ * bits are now used to store a generation. Once we start changing the layout
+ * and bumping the generation, old versions expecting a 32-bit lmm_stripe_count
+ * will be confused by interpreting stripe_count | gen << 16 as the actual
+ * stripe count */
+#define OBD_INCOMPAT_LMM_VER 0x00000100
+/** multiple OI files for MDT */
+#define OBD_INCOMPAT_MULTI_OI 0x00000200
+
+/* Data stored per server at the head of the last_rcvd file. In le32 order.
+ This should be common to filter_internal.h, lustre_mds.h */
+struct lr_server_data {
+ __u8 lsd_uuid[40]; /* server UUID */
+ __u64 lsd_last_transno; /* last completed transaction ID */
+ __u64 lsd_compat14; /* reserved - compat with old last_rcvd */
+ __u64 lsd_mount_count; /* incarnation number */
+ __u32 lsd_feature_compat; /* compatible feature flags */
+ __u32 lsd_feature_rocompat;/* read-only compatible feature flags */
+ __u32 lsd_feature_incompat;/* incompatible feature flags */
+ __u32 lsd_server_size; /* size of server data area */
+ __u32 lsd_client_start; /* start of per-client data area */
+ __u16 lsd_client_size; /* size of per-client data area */
+ __u16 lsd_subdir_count; /* number of subdirectories for objects */
+ __u64 lsd_catalog_oid; /* recovery catalog object id */
+ __u32 lsd_catalog_ogen; /* recovery catalog inode generation */
+ __u8 lsd_peeruuid[40]; /* UUID of MDS associated with this OST */
+ __u32 lsd_osd_index; /* index number of OST in LOV */
+ __u32 lsd_padding1; /* was lsd_mdt_index, unused in 2.4.0 */
+ __u32 lsd_start_epoch; /* VBR: start epoch from last boot */
+ /** transaction values since lsd_trans_table_time */
+ __u64 lsd_trans_table[LR_EXPIRE_INTERVALS];
+ /** start point of transno table below */
+ __u32 lsd_trans_table_time; /* time of first slot in table above */
+ __u32 lsd_expire_intervals; /* LR_EXPIRE_INTERVALS */
+ __u8 lsd_padding[LR_SERVER_SIZE - 288];
+};
+
+/* Data stored per client in the last_rcvd file. In le32 order. */
+struct lsd_client_data {
+ __u8 lcd_uuid[40]; /* client UUID */
+ __u64 lcd_last_transno; /* last completed transaction ID */
+ __u64 lcd_last_xid; /* xid for the last transaction */
+ __u32 lcd_last_result; /* result from last RPC */
+ __u32 lcd_last_data; /* per-op data (disposition for open &c.) */
+ /* for MDS_CLOSE requests */
+ __u64 lcd_last_close_transno; /* last completed transaction ID */
+ __u64 lcd_last_close_xid; /* xid for the last transaction */
+ __u32 lcd_last_close_result; /* result from last RPC */
+ __u32 lcd_last_close_data; /* per-op data */
+ /* VBR: last versions */
+ __u64 lcd_pre_versions[4];
+ __u32 lcd_last_epoch;
+ /** orphans handling for delayed export rely on that */
+ __u32 lcd_first_epoch;
+ __u8 lcd_padding[LR_CLIENT_SIZE - 128];
+};
+
+/* bug20354: the lcd_uuid for export of clients may be wrong */
+static inline void check_lcd(char *obd_name, int index,
+ struct lsd_client_data *lcd)
+{
+ int length = sizeof(lcd->lcd_uuid);
+ if (strnlen((char*)lcd->lcd_uuid, length) == length) {
+ lcd->lcd_uuid[length - 1] = '\0';
+
+ LCONSOLE_ERROR("the client UUID (%s) on %s for exports"
+ "stored in last_rcvd(index = %d) is bad!\n",
+ lcd->lcd_uuid, obd_name, index);
+ }
+}
+
+/* last_rcvd handling */
+static inline void lsd_le_to_cpu(struct lr_server_data *buf,
+ struct lr_server_data *lsd)
+{
+ int i;
+ memcpy(lsd->lsd_uuid, buf->lsd_uuid, sizeof(lsd->lsd_uuid));
+ lsd->lsd_last_transno = le64_to_cpu(buf->lsd_last_transno);
+ lsd->lsd_compat14 = le64_to_cpu(buf->lsd_compat14);
+ lsd->lsd_mount_count = le64_to_cpu(buf->lsd_mount_count);
+ lsd->lsd_feature_compat = le32_to_cpu(buf->lsd_feature_compat);
+ lsd->lsd_feature_rocompat = le32_to_cpu(buf->lsd_feature_rocompat);
+ lsd->lsd_feature_incompat = le32_to_cpu(buf->lsd_feature_incompat);
+ lsd->lsd_server_size = le32_to_cpu(buf->lsd_server_size);
+ lsd->lsd_client_start = le32_to_cpu(buf->lsd_client_start);
+ lsd->lsd_client_size = le16_to_cpu(buf->lsd_client_size);
+ lsd->lsd_subdir_count = le16_to_cpu(buf->lsd_subdir_count);
+ lsd->lsd_catalog_oid = le64_to_cpu(buf->lsd_catalog_oid);
+ lsd->lsd_catalog_ogen = le32_to_cpu(buf->lsd_catalog_ogen);
+ memcpy(lsd->lsd_peeruuid, buf->lsd_peeruuid, sizeof(lsd->lsd_peeruuid));
+ lsd->lsd_osd_index = le32_to_cpu(buf->lsd_osd_index);
+ lsd->lsd_padding1 = le32_to_cpu(buf->lsd_padding1);
+ lsd->lsd_start_epoch = le32_to_cpu(buf->lsd_start_epoch);
+ for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
+ lsd->lsd_trans_table[i] = le64_to_cpu(buf->lsd_trans_table[i]);
+ lsd->lsd_trans_table_time = le32_to_cpu(buf->lsd_trans_table_time);
+ lsd->lsd_expire_intervals = le32_to_cpu(buf->lsd_expire_intervals);
+}
+
+static inline void lsd_cpu_to_le(struct lr_server_data *lsd,
+ struct lr_server_data *buf)
+{
+ int i;
+ memcpy(buf->lsd_uuid, lsd->lsd_uuid, sizeof(buf->lsd_uuid));
+ buf->lsd_last_transno = cpu_to_le64(lsd->lsd_last_transno);
+ buf->lsd_compat14 = cpu_to_le64(lsd->lsd_compat14);
+ buf->lsd_mount_count = cpu_to_le64(lsd->lsd_mount_count);
+ buf->lsd_feature_compat = cpu_to_le32(lsd->lsd_feature_compat);
+ buf->lsd_feature_rocompat = cpu_to_le32(lsd->lsd_feature_rocompat);
+ buf->lsd_feature_incompat = cpu_to_le32(lsd->lsd_feature_incompat);
+ buf->lsd_server_size = cpu_to_le32(lsd->lsd_server_size);
+ buf->lsd_client_start = cpu_to_le32(lsd->lsd_client_start);
+ buf->lsd_client_size = cpu_to_le16(lsd->lsd_client_size);
+ buf->lsd_subdir_count = cpu_to_le16(lsd->lsd_subdir_count);
+ buf->lsd_catalog_oid = cpu_to_le64(lsd->lsd_catalog_oid);
+ buf->lsd_catalog_ogen = cpu_to_le32(lsd->lsd_catalog_ogen);
+ memcpy(buf->lsd_peeruuid, lsd->lsd_peeruuid, sizeof(buf->lsd_peeruuid));
+ buf->lsd_osd_index = cpu_to_le32(lsd->lsd_osd_index);
+ buf->lsd_padding1 = cpu_to_le32(lsd->lsd_padding1);
+ buf->lsd_start_epoch = cpu_to_le32(lsd->lsd_start_epoch);
+ for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
+ buf->lsd_trans_table[i] = cpu_to_le64(lsd->lsd_trans_table[i]);
+ buf->lsd_trans_table_time = cpu_to_le32(lsd->lsd_trans_table_time);
+ buf->lsd_expire_intervals = cpu_to_le32(lsd->lsd_expire_intervals);
+}
+
+static inline void lcd_le_to_cpu(struct lsd_client_data *buf,
+ struct lsd_client_data *lcd)
+{
+ memcpy(lcd->lcd_uuid, buf->lcd_uuid, sizeof (lcd->lcd_uuid));
+ lcd->lcd_last_transno = le64_to_cpu(buf->lcd_last_transno);
+ lcd->lcd_last_xid = le64_to_cpu(buf->lcd_last_xid);
+ lcd->lcd_last_result = le32_to_cpu(buf->lcd_last_result);
+ lcd->lcd_last_data = le32_to_cpu(buf->lcd_last_data);
+ lcd->lcd_last_close_transno = le64_to_cpu(buf->lcd_last_close_transno);
+ lcd->lcd_last_close_xid = le64_to_cpu(buf->lcd_last_close_xid);
+ lcd->lcd_last_close_result = le32_to_cpu(buf->lcd_last_close_result);
+ lcd->lcd_last_close_data = le32_to_cpu(buf->lcd_last_close_data);
+ lcd->lcd_pre_versions[0] = le64_to_cpu(buf->lcd_pre_versions[0]);
+ lcd->lcd_pre_versions[1] = le64_to_cpu(buf->lcd_pre_versions[1]);
+ lcd->lcd_pre_versions[2] = le64_to_cpu(buf->lcd_pre_versions[2]);
+ lcd->lcd_pre_versions[3] = le64_to_cpu(buf->lcd_pre_versions[3]);
+ lcd->lcd_last_epoch = le32_to_cpu(buf->lcd_last_epoch);
+ lcd->lcd_first_epoch = le32_to_cpu(buf->lcd_first_epoch);
+}
+
+static inline void lcd_cpu_to_le(struct lsd_client_data *lcd,
+ struct lsd_client_data *buf)
+{
+ memcpy(buf->lcd_uuid, lcd->lcd_uuid, sizeof (lcd->lcd_uuid));
+ buf->lcd_last_transno = cpu_to_le64(lcd->lcd_last_transno);
+ buf->lcd_last_xid = cpu_to_le64(lcd->lcd_last_xid);
+ buf->lcd_last_result = cpu_to_le32(lcd->lcd_last_result);
+ buf->lcd_last_data = cpu_to_le32(lcd->lcd_last_data);
+ buf->lcd_last_close_transno = cpu_to_le64(lcd->lcd_last_close_transno);
+ buf->lcd_last_close_xid = cpu_to_le64(lcd->lcd_last_close_xid);
+ buf->lcd_last_close_result = cpu_to_le32(lcd->lcd_last_close_result);
+ buf->lcd_last_close_data = cpu_to_le32(lcd->lcd_last_close_data);
+ buf->lcd_pre_versions[0] = cpu_to_le64(lcd->lcd_pre_versions[0]);
+ buf->lcd_pre_versions[1] = cpu_to_le64(lcd->lcd_pre_versions[1]);
+ buf->lcd_pre_versions[2] = cpu_to_le64(lcd->lcd_pre_versions[2]);
+ buf->lcd_pre_versions[3] = cpu_to_le64(lcd->lcd_pre_versions[3]);
+ buf->lcd_last_epoch = cpu_to_le32(lcd->lcd_last_epoch);
+ buf->lcd_first_epoch = cpu_to_le32(lcd->lcd_first_epoch);
+}
+
+static inline __u64 lcd_last_transno(struct lsd_client_data *lcd)
+{
+ return (lcd->lcd_last_transno > lcd->lcd_last_close_transno ?
+ lcd->lcd_last_transno : lcd->lcd_last_close_transno);
+}
+
+static inline __u64 lcd_last_xid(struct lsd_client_data *lcd)
+{
+ return (lcd->lcd_last_xid > lcd->lcd_last_close_xid ?
+ lcd->lcd_last_xid : lcd->lcd_last_close_xid);
+}
+
+/****************** superblock additional info *********************/
+
+struct ll_sb_info;
+
+struct lustre_sb_info {
+ int lsi_flags;
+ struct obd_device *lsi_mgc; /* mgc obd */
+ struct lustre_mount_data *lsi_lmd; /* mount command info */
+ struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */
+ struct dt_device *lsi_dt_dev; /* dt device to access disk fs*/
+ struct vfsmount *lsi_srv_mnt; /* the one server mount */
+ atomic_t lsi_mounts; /* references to the srv_mnt */
+ char lsi_svname[MTI_NAME_MAXLEN];
+ char lsi_osd_obdname[64];
+ char lsi_osd_uuid[64];
+ struct obd_export *lsi_osd_exp;
+ char lsi_osd_type[16];
+ char lsi_fstype[16];
+ struct backing_dev_info lsi_bdi; /* each client mountpoint needs
+ own backing_dev_info */
+};
+
+#define LSI_UMOUNT_FAILOVER 0x00200000
+#define LSI_BDI_INITIALIZED 0x00400000
+
+#define s2lsi(sb) ((struct lustre_sb_info *)((sb)->s_fs_info))
+#define s2lsi_nocast(sb) ((sb)->s_fs_info)
+
+#define get_profile_name(sb) (s2lsi(sb)->lsi_lmd->lmd_profile)
+#define get_mount_flags(sb) (s2lsi(sb)->lsi_lmd->lmd_flags)
+#define get_mntdev_name(sb) (s2lsi(sb)->lsi_lmd->lmd_dev)
+
+
+/****************** mount lookup info *********************/
+
+struct lustre_mount_info {
+ char *lmi_name;
+ struct super_block *lmi_sb;
+ struct vfsmount *lmi_mnt;
+ struct list_head lmi_list_chain;
+};
+
+/****************** prototypes *********************/
+
+/* obd_mount.c */
+int server_name2fsname(const char *svname, char *fsname, const char **endptr);
+int server_name2index(const char *svname, __u32 *idx, const char **endptr);
+int server_name2svname(const char *label, char *svname, const char **endptr,
+ size_t svsize);
+
+int lustre_put_lsi(struct super_block *sb);
+int lustre_start_simple(char *obdname, char *type, char *uuid,
+ char *s1, char *s2, char *s3, char *s4);
+int lustre_start_mgc(struct super_block *sb);
+void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
+ struct vfsmount *mnt));
+void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb));
+int lustre_common_put_super(struct super_block *sb);
+
+
+int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type);
+
+/** @} disk */
+
+#endif // _LUSTRE_DISK_H
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
new file mode 100644
index 000000000000..317f928fc151
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@@ -0,0 +1,1671 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+/** \defgroup LDLM Lustre Distributed Lock Manager
+ *
+ * Lustre DLM is based on VAX DLM.
+ * Its two main roles are:
+ * - To provide locking assuring consistency of data on all Lustre nodes.
+ * - To allow clients to cache state protected by a lock by holding the
+ * lock until a conflicting lock is requested or it is expired by the LRU.
+ *
+ * @{
+ */
+
+#ifndef _LUSTRE_DLM_H__
+#define _LUSTRE_DLM_H__
+
+#include <linux/lustre_dlm.h>
+
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_handles.h>
+#include <interval_tree.h> /* for interval_node{}, ldlm_extent */
+#include <lu_ref.h>
+
+struct obd_ops;
+struct obd_device;
+
+#define OBD_LDLM_DEVICENAME "ldlm"
+
+#define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus())
+#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(36000))
+#define LDLM_CTIME_AGE_LIMIT (10)
+#define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024
+
+/**
+ * LDLM non-error return states
+ */
+typedef enum {
+ ELDLM_OK = 0,
+
+ ELDLM_LOCK_CHANGED = 300,
+ ELDLM_LOCK_ABORTED = 301,
+ ELDLM_LOCK_REPLACED = 302,
+ ELDLM_NO_LOCK_DATA = 303,
+ ELDLM_LOCK_WOULDBLOCK = 304,
+
+ ELDLM_NAMESPACE_EXISTS = 400,
+ ELDLM_BAD_NAMESPACE = 401
+} ldlm_error_t;
+
+/**
+ * LDLM namespace type.
+ * The "client" type is actually an indication that this is a narrow local view
+ * into complete namespace on the server. Such namespaces cannot make any
+ * decisions about lack of conflicts or do any autonomous lock granting without
+ * first speaking to a server.
+ */
+typedef enum {
+ LDLM_NAMESPACE_SERVER = 1 << 0,
+ LDLM_NAMESPACE_CLIENT = 1 << 1
+} ldlm_side_t;
+
+/**
+ * Declaration of flags sent through the wire.
+ **/
+#define LDLM_FL_LOCK_CHANGED 0x000001 /* extent, mode, or resource changed */
+
+/**
+ * If the server returns one of these flags, then the lock was put on that list.
+ * If the client sends one of these flags (during recovery ONLY!), it wants the
+ * lock added to the specified list, no questions asked.
+ */
+#define LDLM_FL_BLOCK_GRANTED 0x000002
+#define LDLM_FL_BLOCK_CONV 0x000004
+#define LDLM_FL_BLOCK_WAIT 0x000008
+
+/* Used to be LDLM_FL_CBPENDING 0x000010 moved to non-wire flags */
+
+#define LDLM_FL_AST_SENT 0x000020 /* blocking or cancel packet was
+ * queued for sending. */
+/* Used to be LDLM_FL_WAIT_NOREPROC 0x000040 moved to non-wire flags */
+/* Used to be LDLM_FL_CANCEL 0x000080 moved to non-wire flags */
+
+/**
+ * Lock is being replayed. This could probably be implied by the fact that one
+ * of BLOCK_{GRANTED,CONV,WAIT} is set, but that is pretty dangerous.
+ */
+#define LDLM_FL_REPLAY 0x000100
+
+#define LDLM_FL_INTENT_ONLY 0x000200 /* Don't grant lock, just do intent. */
+
+/* Used to be LDLM_FL_LOCAL_ONLY 0x000400 moved to non-wire flags */
+/* Used to be LDLM_FL_FAILED 0x000800 moved to non-wire flags */
+
+#define LDLM_FL_HAS_INTENT 0x001000 /* lock request has intent */
+
+/* Used to be LDLM_FL_CANCELING 0x002000 moved to non-wire flags */
+/* Used to be LDLM_FL_LOCAL 0x004000 moved to non-wire flags */
+
+#define LDLM_FL_DISCARD_DATA 0x010000 /* discard (no writeback) on cancel */
+
+#define LDLM_FL_NO_TIMEOUT 0x020000 /* Blocked by group lock - wait
+ * indefinitely */
+
+/** file & record locking */
+#define LDLM_FL_BLOCK_NOWAIT 0x040000 /* Server told not to wait if blocked.
+ * For AGL, OST will not send glimpse
+ * callback. */
+#define LDLM_FL_TEST_LOCK 0x080000 // return blocking lock
+
+/* Used to be LDLM_FL_LVB_READY 0x100000 moved to non-wire flags */
+/* Used to be LDLM_FL_KMS_IGNORE 0x200000 moved to non-wire flags */
+/* Used to be LDLM_FL_NO_LRU 0x400000 moved to non-wire flags */
+
+/* Immediatelly cancel such locks when they block some other locks. Send
+ * cancel notification to original lock holder, but expect no reply. This is
+ * for clients (like liblustre) that cannot be expected to reliably response
+ * to blocking AST. */
+#define LDLM_FL_CANCEL_ON_BLOCK 0x800000
+
+/* Flags flags inherited from parent lock when doing intents. */
+#define LDLM_INHERIT_FLAGS (LDLM_FL_CANCEL_ON_BLOCK)
+
+/* Used to be LDLM_FL_CP_REQD 0x1000000 moved to non-wire flags */
+/* Used to be LDLM_FL_CLEANED 0x2000000 moved to non-wire flags */
+/* Used to be LDLM_FL_ATOMIC_CB 0x4000000 moved to non-wire flags */
+/* Used to be LDLM_FL_BL_AST 0x10000000 moved to non-wire flags */
+/* Used to be LDLM_FL_BL_DONE 0x20000000 moved to non-wire flags */
+
+/* measure lock contention and return -EUSERS if locking contention is high */
+#define LDLM_FL_DENY_ON_CONTENTION 0x40000000
+
+/* These are flags that are mapped into the flags and ASTs of blocking locks */
+#define LDLM_AST_DISCARD_DATA 0x80000000 /* Add FL_DISCARD to blocking ASTs */
+
+/* Flags sent in AST lock_flags to be mapped into the receiving lock. */
+#define LDLM_AST_FLAGS (LDLM_FL_DISCARD_DATA)
+
+/*
+ * --------------------------------------------------------------------------
+ * NOTE! Starting from this point, that is, LDLM_FL_* flags with values above
+ * 0x80000000 will not be sent over the wire.
+ * --------------------------------------------------------------------------
+ */
+
+/**
+ * Declaration of flags not sent through the wire.
+ **/
+
+/**
+ * Used for marking lock as a target for -EINTR while cp_ast sleep
+ * emulation + race with upcoming bl_ast.
+ */
+#define LDLM_FL_FAIL_LOC 0x100000000ULL
+
+/**
+ * Used while processing the unused list to know that we have already
+ * handled this lock and decided to skip it.
+ */
+#define LDLM_FL_SKIPPED 0x200000000ULL
+/* this lock is being destroyed */
+#define LDLM_FL_CBPENDING 0x400000000ULL
+/* not a real flag, not saved in lock */
+#define LDLM_FL_WAIT_NOREPROC 0x800000000ULL
+/* cancellation callback already run */
+#define LDLM_FL_CANCEL 0x1000000000ULL
+#define LDLM_FL_LOCAL_ONLY 0x2000000000ULL
+/* don't run the cancel callback under ldlm_cli_cancel_unused */
+#define LDLM_FL_FAILED 0x4000000000ULL
+/* lock cancel has already been sent */
+#define LDLM_FL_CANCELING 0x8000000000ULL
+/* local lock (ie, no srv/cli split) */
+#define LDLM_FL_LOCAL 0x10000000000ULL
+/* XXX FIXME: This is being added to b_size as a low-risk fix to the fact that
+ * the LVB filling happens _after_ the lock has been granted, so another thread
+ * can match it before the LVB has been updated. As a dirty hack, we set
+ * LDLM_FL_LVB_READY only after we've done the LVB poop.
+ * this is only needed on LOV/OSC now, where LVB is actually used and callers
+ * must set it in input flags.
+ *
+ * The proper fix is to do the granting inside of the completion AST, which can
+ * be replaced with a LVB-aware wrapping function for OSC locks. That change is
+ * pretty high-risk, though, and would need a lot more testing. */
+#define LDLM_FL_LVB_READY 0x20000000000ULL
+/* A lock contributes to the known minimum size (KMS) calculation until it has
+ * finished the part of its cancelation that performs write back on its dirty
+ * pages. It can remain on the granted list during this whole time. Threads
+ * racing to update the KMS after performing their writeback need to know to
+ * exclude each other's locks from the calculation as they walk the granted
+ * list. */
+#define LDLM_FL_KMS_IGNORE 0x40000000000ULL
+/* completion AST to be executed */
+#define LDLM_FL_CP_REQD 0x80000000000ULL
+/* cleanup_resource has already handled the lock */
+#define LDLM_FL_CLEANED 0x100000000000ULL
+/* optimization hint: LDLM can run blocking callback from current context
+ * w/o involving separate thread. in order to decrease cs rate */
+#define LDLM_FL_ATOMIC_CB 0x200000000000ULL
+
+/* It may happen that a client initiates two operations, e.g. unlink and
+ * mkdir, such that the server sends a blocking AST for conflicting
+ * locks to this client for the first operation, whereas the second
+ * operation has canceled this lock and is waiting for rpc_lock which is
+ * taken by the first operation. LDLM_FL_BL_AST is set by
+ * ldlm_callback_handler() in the lock to prevent the Early Lock Cancel
+ * (ELC) code from cancelling it.
+ *
+ * LDLM_FL_BL_DONE is to be set by ldlm_cancel_callback() when lock
+ * cache is dropped to let ldlm_callback_handler() return EINVAL to the
+ * server. It is used when ELC RPC is already prepared and is waiting
+ * for rpc_lock, too late to send a separate CANCEL RPC. */
+#define LDLM_FL_BL_AST 0x400000000000ULL
+#define LDLM_FL_BL_DONE 0x800000000000ULL
+/* Don't put lock into the LRU list, so that it is not canceled due to aging.
+ * Used by MGC locks, they are cancelled only at unmount or by callback. */
+#define LDLM_FL_NO_LRU 0x1000000000000ULL
+
+/**
+ * The blocking callback is overloaded to perform two functions. These flags
+ * indicate which operation should be performed.
+ */
+#define LDLM_CB_BLOCKING 1
+#define LDLM_CB_CANCELING 2
+
+/**
+ * \name Lock Compatibility Matrix.
+ *
+ * A lock has both a type (extent, flock, inode bits, or plain) and a mode.
+ * Lock types are described in their respective implementation files:
+ * ldlm_{extent,flock,inodebits,plain}.c.
+ *
+ * There are six lock modes along with a compatibility matrix to indicate if
+ * two locks are compatible.
+ *
+ * - EX: Exclusive mode. Before a new file is created, MDS requests EX lock
+ * on the parent.
+ * - PW: Protective Write (normal write) mode. When a client requests a write
+ * lock from an OST, a lock with PW mode will be issued.
+ * - PR: Protective Read (normal read) mode. When a client requests a read from
+ * an OST, a lock with PR mode will be issued. Also, if the client opens a
+ * file for execution, it is granted a lock with PR mode.
+ * - CW: Concurrent Write mode. The type of lock that the MDS grants if a client
+ * requests a write lock during a file open operation.
+ * - CR Concurrent Read mode. When a client performs a path lookup, MDS grants
+ * an inodebit lock with the CR mode on the intermediate path component.
+ * - NL Null mode.
+ *
+ * <PRE>
+ * NL CR CW PR PW EX
+ * NL 1 1 1 1 1 1
+ * CR 1 1 1 1 1 0
+ * CW 1 1 1 0 0 0
+ * PR 1 1 0 1 0 0
+ * PW 1 1 0 0 0 0
+ * EX 1 0 0 0 0 0
+ * </PRE>
+ */
+/** @{ */
+#define LCK_COMPAT_EX LCK_NL
+#define LCK_COMPAT_PW (LCK_COMPAT_EX | LCK_CR)
+#define LCK_COMPAT_PR (LCK_COMPAT_PW | LCK_PR)
+#define LCK_COMPAT_CW (LCK_COMPAT_PW | LCK_CW)
+#define LCK_COMPAT_CR (LCK_COMPAT_CW | LCK_PR | LCK_PW)
+#define LCK_COMPAT_NL (LCK_COMPAT_CR | LCK_EX | LCK_GROUP)
+#define LCK_COMPAT_GROUP (LCK_GROUP | LCK_NL)
+#define LCK_COMPAT_COS (LCK_COS)
+/** @} Lock Compatibility Matrix */
+
+extern ldlm_mode_t lck_compat_array[];
+
+static inline void lockmode_verify(ldlm_mode_t mode)
+{
+ LASSERT(mode > LCK_MINMODE && mode < LCK_MAXMODE);
+}
+
+static inline int lockmode_compat(ldlm_mode_t exist_mode, ldlm_mode_t new_mode)
+{
+ return (lck_compat_array[exist_mode] & new_mode);
+}
+
+/*
+ *
+ * cluster name spaces
+ *
+ */
+
+#define DLM_OST_NAMESPACE 1
+#define DLM_MDS_NAMESPACE 2
+
+/* XXX
+ - do we just separate this by security domains and use a prefix for
+ multiple namespaces in the same domain?
+ -
+*/
+
+/**
+ * Locking rules for LDLM:
+ *
+ * lr_lock
+ *
+ * lr_lock
+ * waiting_locks_spinlock
+ *
+ * lr_lock
+ * led_lock
+ *
+ * lr_lock
+ * ns_lock
+ *
+ * lr_lvb_mutex
+ * lr_lock
+ *
+ */
+
+struct ldlm_pool;
+struct ldlm_lock;
+struct ldlm_resource;
+struct ldlm_namespace;
+
+/**
+ * Operations on LDLM pools.
+ * LDLM pool is a pool of locks in the namespace without any implicitly
+ * specified limits.
+ * Locks in the pool are organized in LRU.
+ * Local memory pressure or server instructions (e.g. mempressure on server)
+ * can trigger freeing of locks from the pool
+ */
+struct ldlm_pool_ops {
+ /** Recalculate pool \a pl usage */
+ int (*po_recalc)(struct ldlm_pool *pl);
+ /** Cancel at least \a nr locks from pool \a pl */
+ int (*po_shrink)(struct ldlm_pool *pl, int nr,
+ unsigned int gfp_mask);
+ int (*po_setup)(struct ldlm_pool *pl, int limit);
+};
+
+/** One second for pools thread check interval. Each pool has own period. */
+#define LDLM_POOLS_THREAD_PERIOD (1)
+
+/** ~6% margin for modest pools. See ldlm_pool.c for details. */
+#define LDLM_POOLS_MODEST_MARGIN_SHIFT (4)
+
+/** Default recalc period for server side pools in sec. */
+#define LDLM_POOL_SRV_DEF_RECALC_PERIOD (1)
+
+/** Default recalc period for client side pools in sec. */
+#define LDLM_POOL_CLI_DEF_RECALC_PERIOD (10)
+
+/**
+ * LDLM pool structure to track granted locks.
+ * For purposes of determining when to release locks on e.g. memory pressure.
+ * This feature is commonly referred to as lru_resize.
+ */
+struct ldlm_pool {
+ /** Pool proc directory. */
+ proc_dir_entry_t *pl_proc_dir;
+ /** Pool name, must be long enough to hold compound proc entry name. */
+ char pl_name[100];
+ /** Lock for protecting SLV/CLV updates. */
+ spinlock_t pl_lock;
+ /** Number of allowed locks in in pool, both, client and server side. */
+ atomic_t pl_limit;
+ /** Number of granted locks in */
+ atomic_t pl_granted;
+ /** Grant rate per T. */
+ atomic_t pl_grant_rate;
+ /** Cancel rate per T. */
+ atomic_t pl_cancel_rate;
+ /** Server lock volume (SLV). Protected by pl_lock. */
+ __u64 pl_server_lock_volume;
+ /** Current biggest client lock volume. Protected by pl_lock. */
+ __u64 pl_client_lock_volume;
+ /** Lock volume factor. SLV on client is calculated as following:
+ * server_slv * lock_volume_factor. */
+ atomic_t pl_lock_volume_factor;
+ /** Time when last SLV from server was obtained. */
+ time_t pl_recalc_time;
+ /** Recalculation period for pool. */
+ time_t pl_recalc_period;
+ /** Recalculation and shrink operations. */
+ struct ldlm_pool_ops *pl_ops;
+ /** Number of planned locks for next period. */
+ int pl_grant_plan;
+ /** Pool statistics. */
+ struct lprocfs_stats *pl_stats;
+};
+
+typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **,
+ void *req_cookie, ldlm_mode_t mode, __u64 flags,
+ void *data);
+
+typedef int (*ldlm_cancel_for_recovery)(struct ldlm_lock *lock);
+
+/**
+ * LVB operations.
+ * LVB is Lock Value Block. This is a special opaque (to LDLM) value that could
+ * be associated with an LDLM lock and transferred from client to server and
+ * back.
+ *
+ * Currently LVBs are used by:
+ * - OSC-OST code to maintain current object size/times
+ * - layout lock code to return the layout when the layout lock is granted
+ */
+struct ldlm_valblock_ops {
+ int (*lvbo_init)(struct ldlm_resource *res);
+ int (*lvbo_update)(struct ldlm_resource *res,
+ struct ptlrpc_request *r,
+ int increase);
+ int (*lvbo_free)(struct ldlm_resource *res);
+ /* Return size of lvb data appropriate RPC size can be reserved */
+ int (*lvbo_size)(struct ldlm_lock *lock);
+ /* Called to fill in lvb data to RPC buffer @buf */
+ int (*lvbo_fill)(struct ldlm_lock *lock, void *buf, int buflen);
+};
+
+/**
+ * LDLM pools related, type of lock pool in the namespace.
+ * Greedy means release cached locks aggressively
+ */
+typedef enum {
+ LDLM_NAMESPACE_GREEDY = 1 << 0,
+ LDLM_NAMESPACE_MODEST = 1 << 1
+} ldlm_appetite_t;
+
+/**
+ * Default values for the "max_nolock_size", "contention_time" and
+ * "contended_locks" namespace tunables.
+ */
+#define NS_DEFAULT_MAX_NOLOCK_BYTES 0
+#define NS_DEFAULT_CONTENTION_SECONDS 2
+#define NS_DEFAULT_CONTENDED_LOCKS 32
+
+struct ldlm_ns_bucket {
+ /** back pointer to namespace */
+ struct ldlm_namespace *nsb_namespace;
+ /**
+ * Estimated lock callback time. Used by adaptive timeout code to
+ * avoid spurious client evictions due to unresponsiveness when in
+ * fact the network or overall system load is at fault
+ */
+ struct adaptive_timeout nsb_at_estimate;
+};
+
+enum {
+ /** LDLM namespace lock stats */
+ LDLM_NSS_LOCKS = 0,
+ LDLM_NSS_LAST
+};
+
+typedef enum {
+ /** invalide type */
+ LDLM_NS_TYPE_UNKNOWN = 0,
+ /** mdc namespace */
+ LDLM_NS_TYPE_MDC,
+ /** mds namespace */
+ LDLM_NS_TYPE_MDT,
+ /** osc namespace */
+ LDLM_NS_TYPE_OSC,
+ /** ost namespace */
+ LDLM_NS_TYPE_OST,
+ /** mgc namespace */
+ LDLM_NS_TYPE_MGC,
+ /** mgs namespace */
+ LDLM_NS_TYPE_MGT,
+} ldlm_ns_type_t;
+
+/**
+ * LDLM Namespace.
+ *
+ * Namespace serves to contain locks related to a particular service.
+ * There are two kinds of namespaces:
+ * - Server namespace has knowledge of all locks and is therefore authoritative
+ * to make decisions like what locks could be granted and what conflicts
+ * exist during new lock enqueue.
+ * - Client namespace only has limited knowledge about locks in the namespace,
+ * only seeing locks held by the client.
+ *
+ * Every Lustre service has one server namespace present on the server serving
+ * that service. Every client connected to the service has a client namespace
+ * for it.
+ * Every lock obtained by client in that namespace is actually represented by
+ * two in-memory locks. One on the server and one on the client. The locks are
+ * linked by a special cookie by which one node can tell to the other which lock
+ * it actually means during communications. Such locks are called remote locks.
+ * The locks held by server only without any reference to a client are called
+ * local locks.
+ */
+struct ldlm_namespace {
+ /** Backward link to OBD, required for LDLM pool to store new SLV. */
+ struct obd_device *ns_obd;
+
+ /** Flag indicating if namespace is on client instead of server */
+ ldlm_side_t ns_client;
+
+ /** Resource hash table for namespace. */
+ cfs_hash_t *ns_rs_hash;
+
+ /** serialize */
+ spinlock_t ns_lock;
+
+ /** big refcount (by bucket) */
+ atomic_t ns_bref;
+
+ /**
+ * Namespace connect flags supported by server (may be changed via
+ * /proc, LRU resize may be disabled/enabled).
+ */
+ __u64 ns_connect_flags;
+
+ /** Client side original connect flags supported by server. */
+ __u64 ns_orig_connect_flags;
+
+ /* namespace proc dir entry */
+ struct proc_dir_entry *ns_proc_dir_entry;
+
+ /**
+ * Position in global namespace list linking all namespaces on
+ * the node.
+ */
+ struct list_head ns_list_chain;
+
+ /**
+ * List of unused locks for this namespace. This list is also called
+ * LRU lock list.
+ * Unused locks are locks with zero reader/writer reference counts.
+ * This list is only used on clients for lock caching purposes.
+ * When we want to release some locks voluntarily or if server wants
+ * us to release some locks due to e.g. memory pressure, we take locks
+ * to release from the head of this list.
+ * Locks are linked via l_lru field in \see struct ldlm_lock.
+ */
+ struct list_head ns_unused_list;
+ /** Number of locks in the LRU list above */
+ int ns_nr_unused;
+
+ /**
+ * Maximum number of locks permitted in the LRU. If 0, means locks
+ * are managed by pools and there is no preset limit, rather it is all
+ * controlled by available memory on this client and on server.
+ */
+ unsigned int ns_max_unused;
+ /** Maximum allowed age (last used time) for locks in the LRU */
+ unsigned int ns_max_age;
+ /**
+ * Server only: number of times we evicted clients due to lack of reply
+ * to ASTs.
+ */
+ unsigned int ns_timeouts;
+ /**
+ * Number of seconds since the file change time after which the
+ * MDT will return an UPDATE lock along with a LOOKUP lock.
+ * This allows the client to start caching negative dentries
+ * for a directory and may save an RPC for a later stat.
+ */
+ unsigned int ns_ctime_age_limit;
+
+ /**
+ * Used to rate-limit ldlm_namespace_dump calls.
+ * \see ldlm_namespace_dump. Increased by 10 seconds every time
+ * it is called.
+ */
+ cfs_time_t ns_next_dump;
+
+ /** "policy" function that does actual lock conflict determination */
+ ldlm_res_policy ns_policy;
+
+ /**
+ * LVB operations for this namespace.
+ * \see struct ldlm_valblock_ops
+ */
+ struct ldlm_valblock_ops *ns_lvbo;
+
+ /**
+ * Used by filter code to store pointer to OBD of the service.
+ * Should be dropped in favor of \a ns_obd
+ */
+ void *ns_lvbp;
+
+ /**
+ * Wait queue used by __ldlm_namespace_free. Gets woken up every time
+ * a resource is removed.
+ */
+ wait_queue_head_t ns_waitq;
+ /** LDLM pool structure for this namespace */
+ struct ldlm_pool ns_pool;
+ /** Definition of how eagerly unused locks will be released from LRU */
+ ldlm_appetite_t ns_appetite;
+
+ /**
+ * If more than \a ns_contended_locks are found, the resource is
+ * considered to be contended. Lock enqueues might specify that no
+ * contended locks should be granted
+ */
+ unsigned ns_contended_locks;
+
+ /**
+ * The resources in this namespace remember contended state during
+ * \a ns_contention_time, in seconds.
+ */
+ unsigned ns_contention_time;
+
+ /**
+ * Limit size of contended extent locks, in bytes.
+ * If extended lock is requested for more then this many bytes and
+ * caller instructs us not to grant contended locks, we would disregard
+ * such a request.
+ */
+ unsigned ns_max_nolock_size;
+
+ /** Limit of parallel AST RPC count. */
+ unsigned ns_max_parallel_ast;
+
+ /** Callback to cancel locks before replaying it during recovery. */
+ ldlm_cancel_for_recovery ns_cancel_for_recovery;
+
+ /** LDLM lock stats */
+ struct lprocfs_stats *ns_stats;
+
+ /**
+ * Flag to indicate namespace is being freed. Used to determine if
+ * recalculation of LDLM pool statistics should be skipped.
+ */
+ unsigned ns_stopping:1;
+};
+
+/**
+ * Returns 1 if namespace \a ns is a client namespace.
+ */
+static inline int ns_is_client(struct ldlm_namespace *ns)
+{
+ LASSERT(ns != NULL);
+ LASSERT(!(ns->ns_client & ~(LDLM_NAMESPACE_CLIENT |
+ LDLM_NAMESPACE_SERVER)));
+ LASSERT(ns->ns_client == LDLM_NAMESPACE_CLIENT ||
+ ns->ns_client == LDLM_NAMESPACE_SERVER);
+ return ns->ns_client == LDLM_NAMESPACE_CLIENT;
+}
+
+/**
+ * Returns 1 if namespace \a ns is a server namespace.
+ */
+static inline int ns_is_server(struct ldlm_namespace *ns)
+{
+ LASSERT(ns != NULL);
+ LASSERT(!(ns->ns_client & ~(LDLM_NAMESPACE_CLIENT |
+ LDLM_NAMESPACE_SERVER)));
+ LASSERT(ns->ns_client == LDLM_NAMESPACE_CLIENT ||
+ ns->ns_client == LDLM_NAMESPACE_SERVER);
+ return ns->ns_client == LDLM_NAMESPACE_SERVER;
+}
+
+/**
+ * Returns 1 if namespace \a ns supports early lock cancel (ELC).
+ */
+static inline int ns_connect_cancelset(struct ldlm_namespace *ns)
+{
+ LASSERT(ns != NULL);
+ return !!(ns->ns_connect_flags & OBD_CONNECT_CANCELSET);
+}
+
+/**
+ * Returns 1 if this namespace supports lru_resize.
+ */
+static inline int ns_connect_lru_resize(struct ldlm_namespace *ns)
+{
+ LASSERT(ns != NULL);
+ return !!(ns->ns_connect_flags & OBD_CONNECT_LRU_RESIZE);
+}
+
+static inline void ns_register_cancel(struct ldlm_namespace *ns,
+ ldlm_cancel_for_recovery arg)
+{
+ LASSERT(ns != NULL);
+ ns->ns_cancel_for_recovery = arg;
+}
+
+struct ldlm_lock;
+
+/** Type for blocking callback function of a lock. */
+typedef int (*ldlm_blocking_callback)(struct ldlm_lock *lock,
+ struct ldlm_lock_desc *new, void *data,
+ int flag);
+/** Type for completion callback function of a lock. */
+typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, __u64 flags,
+ void *data);
+/** Type for glimpse callback function of a lock. */
+typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data);
+/** Type for weight callback function of a lock. */
+typedef unsigned long (*ldlm_weigh_callback)(struct ldlm_lock *lock);
+
+/** Work list for sending GL ASTs to multiple locks. */
+struct ldlm_glimpse_work {
+ struct ldlm_lock *gl_lock; /* lock to glimpse */
+ struct list_head gl_list; /* linkage to other gl work structs */
+ __u32 gl_flags;/* see LDLM_GL_WORK_* below */
+ union ldlm_gl_desc *gl_desc; /* glimpse descriptor to be packed in
+ * glimpse callback request */
+};
+
+/** The ldlm_glimpse_work is allocated on the stack and should not be freed. */
+#define LDLM_GL_WORK_NOFREE 0x1
+
+/** Interval node data for each LDLM_EXTENT lock. */
+struct ldlm_interval {
+ struct interval_node li_node; /* node for tree management */
+ struct list_head li_group; /* the locks which have the same
+ * policy - group of the policy */
+};
+#define to_ldlm_interval(n) container_of(n, struct ldlm_interval, li_node)
+
+/**
+ * Interval tree for extent locks.
+ * The interval tree must be accessed under the resource lock.
+ * Interval trees are used for granted extent locks to speed up conflicts
+ * lookup. See ldlm/interval_tree.c for more details.
+ */
+struct ldlm_interval_tree {
+ /** Tree size. */
+ int lit_size;
+ ldlm_mode_t lit_mode; /* lock mode */
+ struct interval_node *lit_root; /* actual ldlm_interval */
+};
+
+/** Whether to track references to exports by LDLM locks. */
+#define LUSTRE_TRACKS_LOCK_EXP_REFS (0)
+
+/** Cancel flags. */
+typedef enum {
+ LCF_ASYNC = 0x1, /* Cancel locks asynchronously. */
+ LCF_LOCAL = 0x2, /* Cancel locks locally, not notifing server */
+ LCF_BL_AST = 0x4, /* Cancel locks marked as LDLM_FL_BL_AST
+ * in the same RPC */
+} ldlm_cancel_flags_t;
+
+struct ldlm_flock {
+ __u64 start;
+ __u64 end;
+ __u64 owner;
+ __u64 blocking_owner;
+ struct obd_export *blocking_export;
+ /* Protected by the hash lock */
+ __u32 blocking_refs;
+ __u32 pid;
+};
+
+typedef union {
+ struct ldlm_extent l_extent;
+ struct ldlm_flock l_flock;
+ struct ldlm_inodebits l_inodebits;
+} ldlm_policy_data_t;
+
+void ldlm_convert_policy_to_wire(ldlm_type_t type,
+ const ldlm_policy_data_t *lpolicy,
+ ldlm_wire_policy_data_t *wpolicy);
+void ldlm_convert_policy_to_local(struct obd_export *exp, ldlm_type_t type,
+ const ldlm_wire_policy_data_t *wpolicy,
+ ldlm_policy_data_t *lpolicy);
+
+enum lvb_type {
+ LVB_T_NONE = 0,
+ LVB_T_OST = 1,
+ LVB_T_LQUOTA = 2,
+ LVB_T_LAYOUT = 3,
+};
+
+/**
+ * LDLM lock structure
+ *
+ * Represents a single LDLM lock and its state in memory. Each lock is
+ * associated with a single ldlm_resource, the object which is being
+ * locked. There may be multiple ldlm_locks on a single resource,
+ * depending on the lock type and whether the locks are conflicting or
+ * not.
+ */
+struct ldlm_lock {
+ /**
+ * Local lock handle.
+ * When remote side wants to tell us about a lock, they address
+ * it by this opaque handle. The handle does not hold a
+ * reference on the ldlm_lock, so it can be safely passed to
+ * other threads or nodes. When the lock needs to be accessed
+ * from the handle, it is looked up again in the lock table, and
+ * may no longer exist.
+ *
+ * Must be first in the structure.
+ */
+ struct portals_handle l_handle;
+ /**
+ * Lock reference count.
+ * This is how many users have pointers to actual structure, so that
+ * we do not accidentally free lock structure that is in use.
+ */
+ atomic_t l_refc;
+ /**
+ * Internal spinlock protects l_resource. We should hold this lock
+ * first before taking res_lock.
+ */
+ spinlock_t l_lock;
+ /**
+ * Pointer to actual resource this lock is in.
+ * ldlm_lock_change_resource() can change this.
+ */
+ struct ldlm_resource *l_resource;
+ /**
+ * List item for client side LRU list.
+ * Protected by ns_lock in struct ldlm_namespace.
+ */
+ struct list_head l_lru;
+ /**
+ * Linkage to resource's lock queues according to current lock state.
+ * (could be granted, waiting or converting)
+ * Protected by lr_lock in struct ldlm_resource.
+ */
+ struct list_head l_res_link;
+ /**
+ * Tree node for ldlm_extent.
+ */
+ struct ldlm_interval *l_tree_node;
+ /**
+ * Per export hash of locks.
+ * Protected by per-bucket exp->exp_lock_hash locks.
+ */
+ struct hlist_node l_exp_hash;
+ /**
+ * Per export hash of flock locks.
+ * Protected by per-bucket exp->exp_flock_hash locks.
+ */
+ struct hlist_node l_exp_flock_hash;
+ /**
+ * Requested mode.
+ * Protected by lr_lock.
+ */
+ ldlm_mode_t l_req_mode;
+ /**
+ * Granted mode, also protected by lr_lock.
+ */
+ ldlm_mode_t l_granted_mode;
+ /** Lock completion handler pointer. Called when lock is granted. */
+ ldlm_completion_callback l_completion_ast;
+ /**
+ * Lock blocking AST handler pointer.
+ * It plays two roles:
+ * - as a notification of an attempt to queue a conflicting lock (once)
+ * - as a notification when the lock is being cancelled.
+ *
+ * As such it's typically called twice: once for the initial conflict
+ * and then once more when the last user went away and the lock is
+ * cancelled (could happen recursively).
+ */
+ ldlm_blocking_callback l_blocking_ast;
+ /**
+ * Lock glimpse handler.
+ * Glimpse handler is used to obtain LVB updates from a client by
+ * server
+ */
+ ldlm_glimpse_callback l_glimpse_ast;
+
+ /** XXX apparently unused "weight" handler. To be removed? */
+ ldlm_weigh_callback l_weigh_ast;
+
+ /**
+ * Lock export.
+ * This is a pointer to actual client export for locks that were granted
+ * to clients. Used server-side.
+ */
+ struct obd_export *l_export;
+ /**
+ * Lock connection export.
+ * Pointer to server export on a client.
+ */
+ struct obd_export *l_conn_export;
+
+ /**
+ * Remote lock handle.
+ * If the lock is remote, this is the handle of the other side lock
+ * (l_handle)
+ */
+ struct lustre_handle l_remote_handle;
+
+ /**
+ * Representation of private data specific for a lock type.
+ * Examples are: extent range for extent lock or bitmask for ibits locks
+ */
+ ldlm_policy_data_t l_policy_data;
+
+ /**
+ * Lock state flags.
+ * Like whenever we receive any blocking requests for this lock, etc.
+ * Protected by lr_lock.
+ */
+ __u64 l_flags;
+ /**
+ * Lock r/w usage counters.
+ * Protected by lr_lock.
+ */
+ __u32 l_readers;
+ __u32 l_writers;
+ /**
+ * If the lock is granted, a process sleeps on this waitq to learn when
+ * it's no longer in use. If the lock is not granted, a process sleeps
+ * on this waitq to learn when it becomes granted.
+ */
+ wait_queue_head_t l_waitq;
+
+ /**
+ * Seconds. It will be updated if there is any activity related to
+ * the lock, e.g. enqueue the lock or send blocking AST.
+ */
+ cfs_time_t l_last_activity;
+
+ /**
+ * Time last used by e.g. being matched by lock match.
+ * Jiffies. Should be converted to time if needed.
+ */
+ cfs_time_t l_last_used;
+
+ /** Originally requested extent for the extent lock. */
+ struct ldlm_extent l_req_extent;
+
+ unsigned int l_failed:1,
+ /**
+ * Set for locks that were removed from class hash table and will be
+ * destroyed when last reference to them is released. Set by
+ * ldlm_lock_destroy_internal().
+ *
+ * Protected by lock and resource locks.
+ */
+ l_destroyed:1,
+ /*
+ * it's set in lock_res_and_lock() and unset in unlock_res_and_lock().
+ *
+ * NB: compared with check_res_locked(), checking this bit is cheaper.
+ * Also, spin_is_locked() is deprecated for kernel code; one reason is
+ * because it works only for SMP so user needs to add extra macros like
+ * LASSERT_SPIN_LOCKED for uniprocessor kernels.
+ */
+ l_res_locked:1,
+ /*
+ * It's set once we call ldlm_add_waiting_lock_res_locked()
+ * to start the lock-timeout timer and it will never be reset.
+ *
+ * Protected by lock_res_and_lock().
+ */
+ l_waited:1,
+ /** Flag whether this is a server namespace lock. */
+ l_ns_srv:1;
+
+ /*
+ * Client-side-only members.
+ */
+
+ enum lvb_type l_lvb_type;
+
+ /**
+ * Temporary storage for a LVB received during an enqueue operation.
+ */
+ __u32 l_lvb_len;
+ void *l_lvb_data;
+
+ /** Private storage for lock user. Opaque to LDLM. */
+ void *l_ast_data;
+
+ /*
+ * Server-side-only members.
+ */
+
+ /**
+ * Connection cookie for the client originating the operation.
+ * Used by Commit on Share (COS) code. Currently only used for
+ * inodebits locks on MDS.
+ */
+ __u64 l_client_cookie;
+
+ /**
+ * List item for locks waiting for cancellation from clients.
+ * The lists this could be linked into are:
+ * waiting_locks_list (protected by waiting_locks_spinlock),
+ * then if the lock timed out, it is moved to
+ * expired_lock_thread.elt_expired_locks for further processing.
+ * Protected by elt_lock.
+ */
+ struct list_head l_pending_chain;
+
+ /**
+ * Set when lock is sent a blocking AST. Time in seconds when timeout
+ * is reached and client holding this lock could be evicted.
+ * This timeout could be further extended by e.g. certain IO activity
+ * under this lock.
+ * \see ost_rw_prolong_locks
+ */
+ cfs_time_t l_callback_timeout;
+
+ /** Local PID of process which created this lock. */
+ __u32 l_pid;
+
+ /**
+ * Number of times blocking AST was sent for this lock.
+ * This is for debugging. Valid values are 0 and 1, if there is an
+ * attempt to send blocking AST more than once, an assertion would be
+ * hit. \see ldlm_work_bl_ast_lock
+ */
+ int l_bl_ast_run;
+ /** List item ldlm_add_ast_work_item() for case of blocking ASTs. */
+ struct list_head l_bl_ast;
+ /** List item ldlm_add_ast_work_item() for case of completion ASTs. */
+ struct list_head l_cp_ast;
+ /** For ldlm_add_ast_work_item() for "revoke" AST used in COS. */
+ struct list_head l_rk_ast;
+
+ /**
+ * Pointer to a conflicting lock that caused blocking AST to be sent
+ * for this lock
+ */
+ struct ldlm_lock *l_blocking_lock;
+
+ /**
+ * Protected by lr_lock, linkages to "skip lists".
+ * For more explanations of skip lists see ldlm/ldlm_inodebits.c
+ */
+ struct list_head l_sl_mode;
+ struct list_head l_sl_policy;
+
+ /** Reference tracking structure to debug leaked locks. */
+ struct lu_ref l_reference;
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+ /* Debugging stuff for bug 20498, for tracking export references. */
+ /** number of export references taken */
+ int l_exp_refs_nr;
+ /** link all locks referencing one export */
+ struct list_head l_exp_refs_link;
+ /** referenced export object */
+ struct obd_export *l_exp_refs_target;
+#endif
+ /**
+ * export blocking dlm lock list, protected by
+ * l_export->exp_bl_list_lock.
+ * Lock order of waiting_lists_spinlock, exp_bl_list_lock and res lock
+ * is: res lock -> exp_bl_list_lock -> wanting_lists_spinlock.
+ */
+ struct list_head l_exp_list;
+};
+
+/**
+ * LDLM resource description.
+ * Basically, resource is a representation for a single object.
+ * Object has a name which is currently 4 64-bit integers. LDLM user is
+ * responsible for creation of a mapping between objects it wants to be
+ * protected and resource names.
+ *
+ * A resource can only hold locks of a single lock type, though there may be
+ * multiple ldlm_locks on a single resource, depending on the lock type and
+ * whether the locks are conflicting or not.
+ */
+struct ldlm_resource {
+ struct ldlm_ns_bucket *lr_ns_bucket;
+
+ /**
+ * List item for list in namespace hash.
+ * protected by ns_lock
+ */
+ struct hlist_node lr_hash;
+
+ /** Spinlock to protect locks under this resource. */
+ spinlock_t lr_lock;
+
+ /**
+ * protected by lr_lock
+ * @{ */
+ /** List of locks in granted state */
+ struct list_head lr_granted;
+ /** List of locks waiting to change their granted mode (converted) */
+ struct list_head lr_converting;
+ /**
+ * List of locks that could not be granted due to conflicts and
+ * that are waiting for conflicts to go away */
+ struct list_head lr_waiting;
+ /** @} */
+
+ /* XXX No longer needed? Remove ASAP */
+ ldlm_mode_t lr_most_restr;
+
+ /** Type of locks this resource can hold. Only one type per resource. */
+ ldlm_type_t lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK,IBITS} */
+
+ /** Resource name */
+ struct ldlm_res_id lr_name;
+ /** Reference count for this resource */
+ atomic_t lr_refcount;
+
+ /**
+ * Interval trees (only for extent locks) for all modes of this resource
+ */
+ struct ldlm_interval_tree lr_itree[LCK_MODE_NUM];
+
+ /**
+ * Server-side-only lock value block elements.
+ * To serialize lvbo_init.
+ */
+ struct mutex lr_lvb_mutex;
+ int lr_lvb_len;
+ /** protected by lr_lock */
+ void *lr_lvb_data;
+
+ /** When the resource was considered as contended. */
+ cfs_time_t lr_contention_time;
+ /** List of references to this resource. For debugging. */
+ struct lu_ref lr_reference;
+
+ struct inode *lr_lvb_inode;
+};
+
+static inline bool ldlm_has_layout(struct ldlm_lock *lock)
+{
+ return lock->l_resource->lr_type == LDLM_IBITS &&
+ lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_LAYOUT;
+}
+
+static inline char *
+ldlm_ns_name(struct ldlm_namespace *ns)
+{
+ return ns->ns_rs_hash->hs_name;
+}
+
+static inline struct ldlm_namespace *
+ldlm_res_to_ns(struct ldlm_resource *res)
+{
+ return res->lr_ns_bucket->nsb_namespace;
+}
+
+static inline struct ldlm_namespace *
+ldlm_lock_to_ns(struct ldlm_lock *lock)
+{
+ return ldlm_res_to_ns(lock->l_resource);
+}
+
+static inline char *
+ldlm_lock_to_ns_name(struct ldlm_lock *lock)
+{
+ return ldlm_ns_name(ldlm_lock_to_ns(lock));
+}
+
+static inline struct adaptive_timeout *
+ldlm_lock_to_ns_at(struct ldlm_lock *lock)
+{
+ return &lock->l_resource->lr_ns_bucket->nsb_at_estimate;
+}
+
+static inline int ldlm_lvbo_init(struct ldlm_resource *res)
+{
+ struct ldlm_namespace *ns = ldlm_res_to_ns(res);
+
+ if (ns->ns_lvbo != NULL && ns->ns_lvbo->lvbo_init != NULL)
+ return ns->ns_lvbo->lvbo_init(res);
+
+ return 0;
+}
+
+static inline int ldlm_lvbo_size(struct ldlm_lock *lock)
+{
+ struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
+
+ if (ns->ns_lvbo != NULL && ns->ns_lvbo->lvbo_size != NULL)
+ return ns->ns_lvbo->lvbo_size(lock);
+
+ return 0;
+}
+
+static inline int ldlm_lvbo_fill(struct ldlm_lock *lock, void *buf, int len)
+{
+ struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
+
+ if (ns->ns_lvbo != NULL) {
+ LASSERT(ns->ns_lvbo->lvbo_fill != NULL);
+ return ns->ns_lvbo->lvbo_fill(lock, buf, len);
+ }
+ return 0;
+}
+
+struct ldlm_ast_work {
+ struct ldlm_lock *w_lock;
+ int w_blocking;
+ struct ldlm_lock_desc w_desc;
+ struct list_head w_list;
+ int w_flags;
+ void *w_data;
+ int w_datalen;
+};
+
+/**
+ * Common ldlm_enqueue parameters
+ */
+struct ldlm_enqueue_info {
+ __u32 ei_type; /** Type of the lock being enqueued. */
+ __u32 ei_mode; /** Mode of the lock being enqueued. */
+ void *ei_cb_bl; /** blocking lock callback */
+ void *ei_cb_cp; /** lock completion callback */
+ void *ei_cb_gl; /** lock glimpse callback */
+ void *ei_cb_wg; /** lock weigh callback */
+ void *ei_cbdata; /** Data to be passed into callbacks. */
+};
+
+extern struct obd_ops ldlm_obd_ops;
+
+extern char *ldlm_lockname[];
+extern char *ldlm_typename[];
+extern char *ldlm_it2str(int it);
+
+/**
+ * Just a fancy CDEBUG call with log level preset to LDLM_DEBUG.
+ * For the cases where we do not have actual lock to print along
+ * with a debugging message that is ldlm-related
+ */
+#define LDLM_DEBUG_NOLOCK(format, a...) \
+ CDEBUG(D_DLMTRACE, "### " format "\n" , ##a)
+
+/**
+ * Support function for lock information printing into debug logs.
+ * \see LDLM_DEBUG
+ */
+#define ldlm_lock_debug(msgdata, mask, cdls, lock, fmt, a...) do { \
+ CFS_CHECK_STACK(msgdata, mask, cdls); \
+ \
+ if (((mask) & D_CANTMASK) != 0 || \
+ ((libcfs_debug & (mask)) != 0 && \
+ (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \
+ _ldlm_lock_debug(lock, msgdata, fmt, ##a); \
+} while(0)
+
+void _ldlm_lock_debug(struct ldlm_lock *lock,
+ struct libcfs_debug_msg_data *data,
+ const char *fmt, ...)
+ __attribute__ ((format (printf, 3, 4)));
+
+/**
+ * Rate-limited version of lock printing function.
+ */
+#define LDLM_DEBUG_LIMIT(mask, lock, fmt, a...) do { \
+ static cfs_debug_limit_state_t _ldlm_cdls; \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, &_ldlm_cdls); \
+ ldlm_lock_debug(&msgdata, mask, &_ldlm_cdls, lock, "### " fmt , ##a);\
+} while (0)
+
+#define LDLM_ERROR(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_ERROR, lock, fmt, ## a)
+#define LDLM_WARN(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_WARNING, lock, fmt, ## a)
+
+/** Non-rate-limited lock printing function for debugging purposes. */
+#define LDLM_DEBUG(lock, fmt, a...) do { \
+ if (likely(lock != NULL)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_DLMTRACE, NULL); \
+ ldlm_lock_debug(&msgdata, D_DLMTRACE, NULL, lock, \
+ "### " fmt , ##a); \
+ } else { \
+ LDLM_DEBUG_NOLOCK("no dlm lock: " fmt, ##a); \
+ } \
+} while (0)
+
+typedef int (*ldlm_processing_policy)(struct ldlm_lock *lock, __u64 *flags,
+ int first_enq, ldlm_error_t *err,
+ struct list_head *work_list);
+
+/**
+ * Return values for lock iterators.
+ * Also used during deciding of lock grants and cancellations.
+ */
+#define LDLM_ITER_CONTINUE 1 /* keep iterating */
+#define LDLM_ITER_STOP 2 /* stop iterating */
+
+typedef int (*ldlm_iterator_t)(struct ldlm_lock *, void *);
+typedef int (*ldlm_res_iterator_t)(struct ldlm_resource *, void *);
+
+/** \defgroup ldlm_iterator Lock iterators
+ *
+ * LDLM provides for a way to iterate through every lock on a resource or
+ * namespace or every resource in a namespace.
+ * @{ */
+int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter,
+ void *closure);
+void ldlm_namespace_foreach(struct ldlm_namespace *ns, ldlm_iterator_t iter,
+ void *closure);
+int ldlm_resource_iterate(struct ldlm_namespace *, const struct ldlm_res_id *,
+ ldlm_iterator_t iter, void *data);
+/** @} ldlm_iterator */
+
+int ldlm_replay_locks(struct obd_import *imp);
+
+/* ldlm_flock.c */
+int ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data);
+
+/* ldlm_extent.c */
+__u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms);
+
+struct ldlm_callback_suite {
+ ldlm_completion_callback lcs_completion;
+ ldlm_blocking_callback lcs_blocking;
+ ldlm_glimpse_callback lcs_glimpse;
+ ldlm_weigh_callback lcs_weigh;
+};
+
+/* ldlm_lockd.c */
+int ldlm_del_waiting_lock(struct ldlm_lock *lock);
+int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, int timeout);
+int ldlm_get_ref(void);
+void ldlm_put_ref(void);
+int ldlm_init_export(struct obd_export *exp);
+void ldlm_destroy_export(struct obd_export *exp);
+struct ldlm_lock *ldlm_request_lock(struct ptlrpc_request *req);
+
+/* ldlm_lock.c */
+void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg);
+void ldlm_lock2handle(const struct ldlm_lock *lock,
+ struct lustre_handle *lockh);
+struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *, __u64 flags);
+void ldlm_cancel_callback(struct ldlm_lock *);
+int ldlm_lock_remove_from_lru(struct ldlm_lock *);
+int ldlm_lock_set_data(struct lustre_handle *, void *);
+
+/**
+ * Obtain a lock reference by its handle.
+ */
+static inline struct ldlm_lock *ldlm_handle2lock(const struct lustre_handle *h)
+{
+ return __ldlm_handle2lock(h, 0);
+}
+
+#define LDLM_LOCK_REF_DEL(lock) \
+ lu_ref_del(&lock->l_reference, "handle", current)
+
+static inline struct ldlm_lock *
+ldlm_handle2lock_long(const struct lustre_handle *h, __u64 flags)
+{
+ struct ldlm_lock *lock;
+
+ lock = __ldlm_handle2lock(h, flags);
+ if (lock != NULL)
+ LDLM_LOCK_REF_DEL(lock);
+ return lock;
+}
+
+/**
+ * Update Lock Value Block Operations (LVBO) on a resource taking into account
+ * data from reqest \a r
+ */
+static inline int ldlm_res_lvbo_update(struct ldlm_resource *res,
+ struct ptlrpc_request *r, int increase)
+{
+ if (ldlm_res_to_ns(res)->ns_lvbo &&
+ ldlm_res_to_ns(res)->ns_lvbo->lvbo_update) {
+ return ldlm_res_to_ns(res)->ns_lvbo->lvbo_update(res, r,
+ increase);
+ }
+ return 0;
+}
+
+int ldlm_error2errno(ldlm_error_t error);
+ldlm_error_t ldlm_errno2error(int err_no); /* don't call it `errno': this
+ * confuses user-space. */
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+void ldlm_dump_export_locks(struct obd_export *exp);
+#endif
+
+/**
+ * Release a temporary lock reference obtained by ldlm_handle2lock() or
+ * __ldlm_handle2lock().
+ */
+#define LDLM_LOCK_PUT(lock) \
+do { \
+ LDLM_LOCK_REF_DEL(lock); \
+ /*LDLM_DEBUG((lock), "put");*/ \
+ ldlm_lock_put(lock); \
+} while (0)
+
+/**
+ * Release a lock reference obtained by some other means (see
+ * LDLM_LOCK_PUT()).
+ */
+#define LDLM_LOCK_RELEASE(lock) \
+do { \
+ /*LDLM_DEBUG((lock), "put");*/ \
+ ldlm_lock_put(lock); \
+} while (0)
+
+#define LDLM_LOCK_GET(lock) \
+({ \
+ ldlm_lock_get(lock); \
+ /*LDLM_DEBUG((lock), "get");*/ \
+ lock; \
+})
+
+#define ldlm_lock_list_put(head, member, count) \
+({ \
+ struct ldlm_lock *_lock, *_next; \
+ int c = count; \
+ list_for_each_entry_safe(_lock, _next, head, member) { \
+ if (c-- == 0) \
+ break; \
+ list_del_init(&_lock->member); \
+ LDLM_LOCK_RELEASE(_lock); \
+ } \
+ LASSERT(c <= 0); \
+})
+
+struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
+void ldlm_lock_put(struct ldlm_lock *lock);
+void ldlm_lock_destroy(struct ldlm_lock *lock);
+void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc);
+void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode);
+int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_fail_match_locked(struct ldlm_lock *lock);
+void ldlm_lock_fail_match(struct ldlm_lock *lock);
+void ldlm_lock_allow_match(struct ldlm_lock *lock);
+void ldlm_lock_allow_match_locked(struct ldlm_lock *lock);
+ldlm_mode_t ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
+ const struct ldlm_res_id *, ldlm_type_t type,
+ ldlm_policy_data_t *, ldlm_mode_t mode,
+ struct lustre_handle *, int unref);
+ldlm_mode_t ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
+ __u64 *bits);
+struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
+ __u32 *flags);
+void ldlm_lock_downgrade(struct ldlm_lock *lock, int new_mode);
+void ldlm_lock_cancel(struct ldlm_lock *lock);
+void ldlm_reprocess_all(struct ldlm_resource *res);
+void ldlm_reprocess_all_ns(struct ldlm_namespace *ns);
+void ldlm_lock_dump_handle(int level, struct lustre_handle *);
+void ldlm_unlink_lock_skiplist(struct ldlm_lock *req);
+
+/* resource.c */
+struct ldlm_namespace *
+ldlm_namespace_new(struct obd_device *obd, char *name,
+ ldlm_side_t client, ldlm_appetite_t apt,
+ ldlm_ns_type_t ns_type);
+int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags);
+void ldlm_namespace_free(struct ldlm_namespace *ns,
+ struct obd_import *imp, int force);
+void ldlm_namespace_register(struct ldlm_namespace *ns, ldlm_side_t client);
+void ldlm_namespace_unregister(struct ldlm_namespace *ns, ldlm_side_t client);
+void ldlm_namespace_move_locked(struct ldlm_namespace *ns, ldlm_side_t client);
+struct ldlm_namespace *ldlm_namespace_first_locked(ldlm_side_t client);
+void ldlm_namespace_get(struct ldlm_namespace *ns);
+void ldlm_namespace_put(struct ldlm_namespace *ns);
+int ldlm_proc_setup(void);
+#ifdef LPROCFS
+void ldlm_proc_cleanup(void);
+#else
+static inline void ldlm_proc_cleanup(void) {}
+#endif
+
+/* resource.c - internal */
+struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns,
+ struct ldlm_resource *parent,
+ const struct ldlm_res_id *,
+ ldlm_type_t type, int create);
+struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res);
+int ldlm_resource_putref(struct ldlm_resource *res);
+void ldlm_resource_add_lock(struct ldlm_resource *res,
+ struct list_head *head,
+ struct ldlm_lock *lock);
+void ldlm_resource_unlink_lock(struct ldlm_lock *lock);
+void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc);
+void ldlm_dump_all_namespaces(ldlm_side_t client, int level);
+void ldlm_namespace_dump(int level, struct ldlm_namespace *);
+void ldlm_resource_dump(int level, struct ldlm_resource *);
+int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
+ const struct ldlm_res_id *);
+
+#define LDLM_RESOURCE_ADDREF(res) do { \
+ lu_ref_add_atomic(&(res)->lr_reference, __FUNCTION__, current); \
+} while (0)
+
+#define LDLM_RESOURCE_DELREF(res) do { \
+ lu_ref_del(&(res)->lr_reference, __FUNCTION__, current); \
+} while (0)
+
+/* ldlm_request.c */
+int ldlm_expired_completion_wait(void *data);
+/** \defgroup ldlm_local_ast Default AST handlers for local locks
+ * These AST handlers are typically used for server-side local locks and are
+ * also used by client-side lock handlers to perform minimum level base
+ * processing.
+ * @{ */
+int ldlm_blocking_ast_nocheck(struct ldlm_lock *lock);
+int ldlm_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
+ void *data, int flag);
+int ldlm_glimpse_ast(struct ldlm_lock *lock, void *reqp);
+int ldlm_completion_ast_async(struct ldlm_lock *lock, __u64 flags, void *data);
+int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data);
+/** @} ldlm_local_ast */
+
+/** \defgroup ldlm_cli_api API to operate on locks from actual LDLM users.
+ * These are typically used by client and server (*_local versions)
+ * to obtain and release locks.
+ * @{ */
+int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
+ struct ldlm_enqueue_info *einfo,
+ const struct ldlm_res_id *res_id,
+ ldlm_policy_data_t const *policy, __u64 *flags,
+ void *lvb, __u32 lvb_len, enum lvb_type lvb_type,
+ struct lustre_handle *lockh, int async);
+int ldlm_prep_enqueue_req(struct obd_export *exp,
+ struct ptlrpc_request *req,
+ struct list_head *cancels,
+ int count);
+int ldlm_prep_elc_req(struct obd_export *exp,
+ struct ptlrpc_request *req,
+ int version, int opc, int canceloff,
+ struct list_head *cancels, int count);
+
+struct ptlrpc_request *ldlm_enqueue_pack(struct obd_export *exp, int lvb_len);
+int ldlm_handle_enqueue0(struct ldlm_namespace *ns, struct ptlrpc_request *req,
+ const struct ldlm_request *dlm_req,
+ const struct ldlm_callback_suite *cbs);
+int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
+ ldlm_type_t type, __u8 with_policy, ldlm_mode_t mode,
+ __u64 *flags, void *lvb, __u32 lvb_len,
+ struct lustre_handle *lockh, int rc);
+int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
+ const struct ldlm_res_id *res_id,
+ ldlm_type_t type, ldlm_policy_data_t *policy,
+ ldlm_mode_t mode, __u64 *flags,
+ ldlm_blocking_callback blocking,
+ ldlm_completion_callback completion,
+ ldlm_glimpse_callback glimpse,
+ void *data, __u32 lvb_len, enum lvb_type lvb_type,
+ const __u64 *client_cookie,
+ struct lustre_handle *lockh);
+int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *new,
+ void *data, __u32 data_len);
+int ldlm_cli_convert(struct lustre_handle *, int new_mode, __u32 *flags);
+int ldlm_cli_update_pool(struct ptlrpc_request *req);
+int ldlm_cli_cancel(struct lustre_handle *lockh,
+ ldlm_cancel_flags_t cancel_flags);
+int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *,
+ ldlm_cancel_flags_t flags, void *opaque);
+int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
+ const struct ldlm_res_id *res_id,
+ ldlm_policy_data_t *policy,
+ ldlm_mode_t mode,
+ ldlm_cancel_flags_t flags,
+ void *opaque);
+int ldlm_cli_cancel_req(struct obd_export *exp, struct list_head *head,
+ int count, ldlm_cancel_flags_t flags);
+int ldlm_cancel_resource_local(struct ldlm_resource *res,
+ struct list_head *cancels,
+ ldlm_policy_data_t *policy,
+ ldlm_mode_t mode, int lock_flags,
+ ldlm_cancel_flags_t cancel_flags, void *opaque);
+int ldlm_cli_cancel_list_local(struct list_head *cancels, int count,
+ ldlm_cancel_flags_t flags);
+int ldlm_cli_cancel_list(struct list_head *head, int count,
+ struct ptlrpc_request *req, ldlm_cancel_flags_t flags);
+/** @} ldlm_cli_api */
+
+/* mds/handler.c */
+/* This has to be here because recursive inclusion sucks. */
+int intent_disposition(struct ldlm_reply *rep, int flag);
+void intent_set_disposition(struct ldlm_reply *rep, int flag);
+
+
+/* ioctls for trying requests */
+#define IOC_LDLM_TYPE 'f'
+#define IOC_LDLM_MIN_NR 40
+
+#define IOC_LDLM_TEST _IOWR('f', 40, long)
+#define IOC_LDLM_DUMP _IOWR('f', 41, long)
+#define IOC_LDLM_REGRESS_START _IOWR('f', 42, long)
+#define IOC_LDLM_REGRESS_STOP _IOWR('f', 43, long)
+#define IOC_LDLM_MAX_NR 43
+
+/**
+ * "Modes" of acquiring lock_res, necessary to tell lockdep that taking more
+ * than one lock_res is dead-lock safe.
+ */
+enum lock_res_type {
+ LRT_NORMAL,
+ LRT_NEW
+};
+
+/** Lock resource. */
+static inline void lock_res(struct ldlm_resource *res)
+{
+ spin_lock(&res->lr_lock);
+}
+
+/** Lock resource with a way to instruct lockdep code about nestedness-safe. */
+static inline void lock_res_nested(struct ldlm_resource *res,
+ enum lock_res_type mode)
+{
+ spin_lock_nested(&res->lr_lock, mode);
+}
+
+/** Unlock resource. */
+static inline void unlock_res(struct ldlm_resource *res)
+{
+ spin_unlock(&res->lr_lock);
+}
+
+/** Check if resource is already locked, assert if not. */
+static inline void check_res_locked(struct ldlm_resource *res)
+{
+ LASSERT(spin_is_locked(&res->lr_lock));
+}
+
+struct ldlm_resource * lock_res_and_lock(struct ldlm_lock *lock);
+void unlock_res_and_lock(struct ldlm_lock *lock);
+
+/* ldlm_pool.c */
+/** \defgroup ldlm_pools Various LDLM pool related functions
+ * There are not used outside of ldlm.
+ * @{
+ */
+void ldlm_pools_recalc(ldlm_side_t client);
+int ldlm_pools_init(void);
+void ldlm_pools_fini(void);
+
+int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
+ int idx, ldlm_side_t client);
+int ldlm_pool_shrink(struct ldlm_pool *pl, int nr,
+ unsigned int gfp_mask);
+void ldlm_pool_fini(struct ldlm_pool *pl);
+int ldlm_pool_setup(struct ldlm_pool *pl, int limit);
+int ldlm_pool_recalc(struct ldlm_pool *pl);
+__u32 ldlm_pool_get_lvf(struct ldlm_pool *pl);
+__u64 ldlm_pool_get_slv(struct ldlm_pool *pl);
+__u64 ldlm_pool_get_clv(struct ldlm_pool *pl);
+__u32 ldlm_pool_get_limit(struct ldlm_pool *pl);
+void ldlm_pool_set_slv(struct ldlm_pool *pl, __u64 slv);
+void ldlm_pool_set_clv(struct ldlm_pool *pl, __u64 clv);
+void ldlm_pool_set_limit(struct ldlm_pool *pl, __u32 limit);
+void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock);
+void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock);
+/** @} */
+
+#endif
+/** @} LDLM */
diff --git a/drivers/staging/lustre/lustre/include/lustre_eacl.h b/drivers/staging/lustre/lustre/include/lustre_eacl.h
new file mode 100644
index 000000000000..b94f76a3301b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_eacl.h
@@ -0,0 +1,95 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lustre/include/lustre_idmap.h
+ *
+ * MDS data structures.
+ * See also lustre_idl.h for wire formats of requests.
+ */
+
+#ifndef _LUSTRE_EACL_H
+#define _LUSTRE_EACL_H
+
+/** \defgroup eacl eacl
+ *
+ * @{
+ */
+
+#ifdef CONFIG_FS_POSIX_ACL
+
+#include <linux/posix_acl_xattr.h>
+
+typedef struct {
+ __u16 e_tag;
+ __u16 e_perm;
+ __u32 e_id;
+ __u32 e_stat;
+} ext_acl_xattr_entry;
+
+typedef struct {
+ __u32 a_count;
+ ext_acl_xattr_entry a_entries[0];
+} ext_acl_xattr_header;
+
+#define CFS_ACL_XATTR_SIZE(count, prefix) \
+ (sizeof(prefix ## _header) + (count) * sizeof(prefix ## _entry))
+
+#define CFS_ACL_XATTR_COUNT(size, prefix) \
+ (((size) - sizeof(prefix ## _header)) / sizeof(prefix ## _entry))
+
+
+extern ext_acl_xattr_header *
+lustre_posix_acl_xattr_2ext(posix_acl_xattr_header *header, int size);
+extern int
+lustre_posix_acl_xattr_filter(posix_acl_xattr_header *header, int size,
+ posix_acl_xattr_header **out);
+extern void
+lustre_posix_acl_xattr_free(posix_acl_xattr_header *header, int size);
+extern void
+lustre_ext_acl_xattr_free(ext_acl_xattr_header *header);
+extern int
+lustre_acl_xattr_merge2posix(posix_acl_xattr_header *posix_header, int size,
+ ext_acl_xattr_header *ext_header,
+ posix_acl_xattr_header **out);
+extern ext_acl_xattr_header *
+lustre_acl_xattr_merge2ext(posix_acl_xattr_header *posix_header, int size,
+ ext_acl_xattr_header *ext_header);
+
+#endif /* CONFIG_FS_POSIX_ACL */
+
+/** @} eacl */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_export.h b/drivers/staging/lustre/lustre/include/lustre_export.h
new file mode 100644
index 000000000000..d61c020a4643
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_export.h
@@ -0,0 +1,389 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+/** \defgroup obd_export PortalRPC export definitions
+ *
+ * @{
+ */
+
+#ifndef __EXPORT_H
+#define __EXPORT_H
+
+/** \defgroup export export
+ *
+ * @{
+ */
+
+#include <lprocfs_status.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_dlm.h>
+
+struct mds_client_data;
+struct mdt_client_data;
+struct mds_idmap_table;
+struct mdt_idmap_table;
+
+/**
+ * Target-specific export data
+ */
+struct tg_export_data {
+ /** Protects led_lcd below */
+ struct mutex ted_lcd_lock;
+ /** Per-client data for each export */
+ struct lsd_client_data *ted_lcd;
+ /** Offset of record in last_rcvd file */
+ loff_t ted_lr_off;
+ /** Client index in last_rcvd file */
+ int ted_lr_idx;
+};
+
+/**
+ * MDT-specific export data
+ */
+struct mdt_export_data {
+ struct tg_export_data med_ted;
+ /** List of all files opened by client on this MDT */
+ struct list_head med_open_head;
+ spinlock_t med_open_lock; /* med_open_head, mfd_list */
+ /** Bitmask of all ibit locks this MDT understands */
+ __u64 med_ibits_known;
+ struct mutex med_idmap_mutex;
+ struct lustre_idmap_table *med_idmap;
+};
+
+struct ec_export_data { /* echo client */
+ struct list_head eced_locks;
+};
+
+/* In-memory access to client data from OST struct */
+/** Filter (oss-side) specific import data */
+struct filter_export_data {
+ struct tg_export_data fed_ted;
+ spinlock_t fed_lock; /**< protects fed_mod_list */
+ long fed_dirty; /* in bytes */
+ long fed_grant; /* in bytes */
+ struct list_head fed_mod_list; /* files being modified */
+ int fed_mod_count;/* items in fed_writing list */
+ long fed_pending; /* bytes just being written */
+ __u32 fed_group;
+ __u8 fed_pagesize; /* log2 of client page size */
+};
+
+struct mgs_export_data {
+ struct list_head med_clients; /* mgc fs client via this exp */
+ spinlock_t med_lock; /* protect med_clients */
+};
+
+/**
+ * per-NID statistics structure.
+ * It tracks access patterns to this export on a per-client-NID basis
+ */
+struct nid_stat {
+ lnet_nid_t nid;
+ struct hlist_node nid_hash;
+ struct list_head nid_list;
+ struct obd_device *nid_obd;
+ struct proc_dir_entry *nid_proc;
+ struct lprocfs_stats *nid_stats;
+ struct lprocfs_stats *nid_ldlm_stats;
+ atomic_t nid_exp_ref_count; /* for obd_nid_stats_hash
+ exp_nid_stats */
+};
+
+#define nidstat_getref(nidstat) \
+do { \
+ atomic_inc(&(nidstat)->nid_exp_ref_count); \
+} while(0)
+
+#define nidstat_putref(nidstat) \
+do { \
+ atomic_dec(&(nidstat)->nid_exp_ref_count); \
+ LASSERTF(atomic_read(&(nidstat)->nid_exp_ref_count) >= 0, \
+ "stat %p nid_exp_ref_count < 0\n", nidstat); \
+} while(0)
+
+enum obd_option {
+ OBD_OPT_FORCE = 0x0001,
+ OBD_OPT_FAILOVER = 0x0002,
+ OBD_OPT_ABORT_RECOV = 0x0004,
+};
+
+/**
+ * Export structure. Represents target-side of connection in portals.
+ * Also used in Lustre to connect between layers on the same node when
+ * there is no network-connection in-between.
+ * For every connected client there is an export structure on the server
+ * attached to the same obd device.
+ */
+struct obd_export {
+ /**
+ * Export handle, it's id is provided to client on connect
+ * Subsequent client RPCs contain this handle id to identify
+ * what export they are talking to.
+ */
+ struct portals_handle exp_handle;
+ atomic_t exp_refcount;
+ /**
+ * Set of counters below is to track where export references are
+ * kept. The exp_rpc_count is used for reconnect handling also,
+ * the cb_count and locks_count are for debug purposes only for now.
+ * The sum of them should be less than exp_refcount by 3
+ */
+ atomic_t exp_rpc_count; /* RPC references */
+ atomic_t exp_cb_count; /* Commit callback references */
+ /** Number of queued replay requests to be processes */
+ atomic_t exp_replay_count;
+ atomic_t exp_locks_count; /** Lock references */
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+ struct list_head exp_locks_list;
+ spinlock_t exp_locks_list_guard;
+#endif
+ /** UUID of client connected to this export */
+ struct obd_uuid exp_client_uuid;
+ /** To link all exports on an obd device */
+ struct list_head exp_obd_chain;
+ struct hlist_node exp_uuid_hash; /** uuid-export hash*/
+ struct hlist_node exp_nid_hash; /** nid-export hash */
+ /**
+ * All exports eligible for ping evictor are linked into a list
+ * through this field in "most time since last request on this export"
+ * order
+ * protected by obd_dev_lock
+ */
+ struct list_head exp_obd_chain_timed;
+ /** Obd device of this export */
+ struct obd_device *exp_obd;
+ /**
+ * "reverse" import to send requests (e.g. from ldlm) back to client
+ * exp_lock protect its change
+ */
+ struct obd_import *exp_imp_reverse;
+ struct nid_stat *exp_nid_stats;
+ struct lprocfs_stats *exp_md_stats;
+ /** Active connetion */
+ struct ptlrpc_connection *exp_connection;
+ /** Connection count value from last succesful reconnect rpc */
+ __u32 exp_conn_cnt;
+ /** Hash list of all ldlm locks granted on this export */
+ cfs_hash_t *exp_lock_hash;
+ /**
+ * Hash list for Posix lock deadlock detection, added with
+ * ldlm_lock::l_exp_flock_hash.
+ */
+ cfs_hash_t *exp_flock_hash;
+ struct list_head exp_outstanding_replies;
+ struct list_head exp_uncommitted_replies;
+ spinlock_t exp_uncommitted_replies_lock;
+ /** Last committed transno for this export */
+ __u64 exp_last_committed;
+ /** When was last request received */
+ cfs_time_t exp_last_request_time;
+ /** On replay all requests waiting for replay are linked here */
+ struct list_head exp_req_replay_queue;
+ /**
+ * protects exp_flags, exp_outstanding_replies and the change
+ * of exp_imp_reverse
+ */
+ spinlock_t exp_lock;
+ /** Compatibility flags for this export are embedded into
+ * exp_connect_data */
+ struct obd_connect_data exp_connect_data;
+ enum obd_option exp_flags;
+ unsigned long exp_failed:1,
+ exp_in_recovery:1,
+ exp_disconnected:1,
+ exp_connecting:1,
+ /** VBR: export missed recovery */
+ exp_delayed:1,
+ /** VBR: failed version checking */
+ exp_vbr_failed:1,
+ exp_req_replay_needed:1,
+ exp_lock_replay_needed:1,
+ exp_need_sync:1,
+ exp_flvr_changed:1,
+ exp_flvr_adapt:1,
+ exp_libclient:1, /* liblustre client? */
+ /* client timed out and tried to reconnect,
+ * but couldn't because of active rpcs */
+ exp_abort_active_req:1,
+ /* if to swap nidtbl entries for 2.2 clients.
+ * Only used by the MGS to fix LU-1644. */
+ exp_need_mne_swab:1;
+ /* also protected by exp_lock */
+ enum lustre_sec_part exp_sp_peer;
+ struct sptlrpc_flavor exp_flvr; /* current */
+ struct sptlrpc_flavor exp_flvr_old[2]; /* about-to-expire */
+ cfs_time_t exp_flvr_expire[2]; /* seconds */
+
+ /** protects exp_hp_rpcs */
+ spinlock_t exp_rpc_lock;
+ struct list_head exp_hp_rpcs; /* (potential) HP RPCs */
+
+ /** blocking dlm lock list, protected by exp_bl_list_lock */
+ struct list_head exp_bl_list;
+ spinlock_t exp_bl_list_lock;
+
+ /** Target specific data */
+ union {
+ struct tg_export_data eu_target_data;
+ struct mdt_export_data eu_mdt_data;
+ struct filter_export_data eu_filter_data;
+ struct ec_export_data eu_ec_data;
+ struct mgs_export_data eu_mgs_data;
+ } u;
+};
+
+#define exp_target_data u.eu_target_data
+#define exp_mdt_data u.eu_mdt_data
+#define exp_filter_data u.eu_filter_data
+#define exp_ec_data u.eu_ec_data
+
+static inline __u64 *exp_connect_flags_ptr(struct obd_export *exp)
+{
+ return &exp->exp_connect_data.ocd_connect_flags;
+}
+
+static inline __u64 exp_connect_flags(struct obd_export *exp)
+{
+ return *exp_connect_flags_ptr(exp);
+}
+
+static inline int exp_max_brw_size(struct obd_export *exp)
+{
+ LASSERT(exp != NULL);
+ if (exp_connect_flags(exp) & OBD_CONNECT_BRW_SIZE)
+ return exp->exp_connect_data.ocd_brw_size;
+
+ return ONE_MB_BRW_SIZE;
+}
+
+static inline int exp_connect_multibulk(struct obd_export *exp)
+{
+ return exp_max_brw_size(exp) > ONE_MB_BRW_SIZE;
+}
+
+static inline int exp_expired(struct obd_export *exp, cfs_duration_t age)
+{
+ LASSERT(exp->exp_delayed);
+ return cfs_time_before(cfs_time_add(exp->exp_last_request_time, age),
+ cfs_time_current_sec());
+}
+
+static inline int exp_connect_cancelset(struct obd_export *exp)
+{
+ LASSERT(exp != NULL);
+ return !!(exp_connect_flags(exp) & OBD_CONNECT_CANCELSET);
+}
+
+static inline int exp_connect_lru_resize(struct obd_export *exp)
+{
+ LASSERT(exp != NULL);
+ return !!(exp_connect_flags(exp) & OBD_CONNECT_LRU_RESIZE);
+}
+
+static inline int exp_connect_rmtclient(struct obd_export *exp)
+{
+ LASSERT(exp != NULL);
+ return !!(exp_connect_flags(exp) & OBD_CONNECT_RMT_CLIENT);
+}
+
+static inline int client_is_remote(struct obd_export *exp)
+{
+ struct obd_import *imp = class_exp2cliimp(exp);
+
+ return !!(imp->imp_connect_data.ocd_connect_flags &
+ OBD_CONNECT_RMT_CLIENT);
+}
+
+static inline int exp_connect_vbr(struct obd_export *exp)
+{
+ LASSERT(exp != NULL);
+ LASSERT(exp->exp_connection);
+ return !!(exp_connect_flags(exp) & OBD_CONNECT_VBR);
+}
+
+static inline int exp_connect_som(struct obd_export *exp)
+{
+ LASSERT(exp != NULL);
+ return !!(exp_connect_flags(exp) & OBD_CONNECT_SOM);
+}
+
+static inline int exp_connect_umask(struct obd_export *exp)
+{
+ return !!(exp_connect_flags(exp) & OBD_CONNECT_UMASK);
+}
+
+static inline int imp_connect_lru_resize(struct obd_import *imp)
+{
+ struct obd_connect_data *ocd;
+
+ LASSERT(imp != NULL);
+ ocd = &imp->imp_connect_data;
+ return !!(ocd->ocd_connect_flags & OBD_CONNECT_LRU_RESIZE);
+}
+
+static inline int exp_connect_layout(struct obd_export *exp)
+{
+ return !!(exp_connect_flags(exp) & OBD_CONNECT_LAYOUTLOCK);
+}
+
+static inline bool exp_connect_lvb_type(struct obd_export *exp)
+{
+ LASSERT(exp != NULL);
+ if (exp_connect_flags(exp) & OBD_CONNECT_LVB_TYPE)
+ return true;
+ else
+ return false;
+}
+
+static inline bool imp_connect_lvb_type(struct obd_import *imp)
+{
+ struct obd_connect_data *ocd;
+
+ LASSERT(imp != NULL);
+ ocd = &imp->imp_connect_data;
+ if (ocd->ocd_connect_flags & OBD_CONNECT_LVB_TYPE)
+ return true;
+ else
+ return false;
+}
+
+extern struct obd_export *class_conn2export(struct lustre_handle *conn);
+extern struct obd_device *class_conn2obd(struct lustre_handle *conn);
+
+/** @} export */
+
+#endif /* __EXPORT_H */
+/** @} obd_export */
diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h
new file mode 100644
index 000000000000..7d20cba07287
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_fid.h
@@ -0,0 +1,762 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_fid.h
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ */
+
+#ifndef __LINUX_FID_H
+#define __LINUX_FID_H
+
+/** \defgroup fid fid
+ *
+ * @{
+ *
+ * http://wiki.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
+ * describes the FID namespace and interoperability requirements for FIDs.
+ * The important parts of that document are included here for reference.
+ *
+ * FID
+ * File IDentifier generated by client from range allocated by the SEQuence
+ * service and stored in struct lu_fid. The FID is composed of three parts:
+ * SEQuence, ObjectID, and VERsion. The SEQ component is a filesystem
+ * unique 64-bit integer, and only one client is ever assigned any SEQ value.
+ * The first 0x400 FID_SEQ_NORMAL [2^33, 2^33 + 0x400] values are reserved
+ * for system use. The OID component is a 32-bit value generated by the
+ * client on a per-SEQ basis to allow creating many unique FIDs without
+ * communication with the server. The VER component is a 32-bit value that
+ * distinguishes between different FID instantiations, such as snapshots or
+ * separate subtrees within the filesystem. FIDs with the same VER field
+ * are considered part of the same namespace.
+ *
+ * OLD filesystems are those upgraded from Lustre 1.x that predate FIDs, and
+ * MDTs use 32-bit ldiskfs internal inode/generation numbers (IGIFs), while
+ * OSTs use 64-bit Lustre object IDs and generation numbers.
+ *
+ * NEW filesystems are those formatted since the introduction of FIDs.
+ *
+ * IGIF
+ * Inode and Generation In FID, a surrogate FID used to globally identify
+ * an existing object on OLD formatted MDT file system. This would only be
+ * used on MDT0 in a DNE filesystem, because there cannot be more than one
+ * MDT in an OLD formatted filesystem. Belongs to sequence in [12, 2^32 - 1]
+ * range, where inode number is stored in SEQ, and inode generation is in OID.
+ * NOTE: This assumes no more than 2^32-1 inodes exist in the MDT filesystem,
+ * which is the maximum possible for an ldiskfs backend. It also assumes
+ * that the reserved ext3/ext4/ldiskfs inode numbers [0-11] are never visible
+ * to clients, which has always been true.
+ *
+ * IDIF
+ * object ID In FID, a surrogate FID used to globally identify an existing
+ * OST object on OLD formatted OST file system. Belongs to a sequence in
+ * [2^32, 2^33 - 1]. Sequence number is calculated as:
+ *
+ * 1 << 32 | (ost_index << 16) | ((objid >> 32) & 0xffff)
+ *
+ * that is, SEQ consists of 16-bit OST index, and higher 16 bits of object
+ * ID. The generation of unique SEQ values per OST allows the IDIF FIDs to
+ * be identified in the FLD correctly. The OID field is calculated as:
+ *
+ * objid & 0xffffffff
+ *
+ * that is, it consists of lower 32 bits of object ID. For objects within
+ * the IDIF range, object ID extraction will be:
+ *
+ * o_id = (fid->f_seq & 0x7fff) << 16 | fid->f_oid;
+ * o_seq = 0; // formerly group number
+ *
+ * NOTE: This assumes that no more than 2^48-1 objects have ever been created
+ * on any OST, and that no more than 65535 OSTs are in use. Both are very
+ * reasonable assumptions, i.e. an IDIF can uniquely map all objects assuming
+ * a maximum creation rate of 1M objects per second for a maximum of 9 years,
+ * or combinations thereof.
+ *
+ * OST_MDT0
+ * Surrogate FID used to identify an existing object on OLD formatted OST
+ * filesystem. Belongs to the reserved SEQuence 0, and is used prior to
+ * the introduction of FID-on-OST, at which point IDIF will be used to
+ * identify objects as residing on a specific OST.
+ *
+ * LLOG
+ * For Lustre Log objects the object sequence 1 is used. This is compatible
+ * with both OLD and NEW namespaces, as this SEQ number is in the
+ * ext3/ldiskfs reserved inode range and does not conflict with IGIF
+ * sequence numbers.
+ *
+ * ECHO
+ * For testing OST IO performance the object sequence 2 is used. This is
+ * compatible with both OLD and NEW namespaces, as this SEQ number is in
+ * the ext3/ldiskfs reserved inode range and does not conflict with IGIF
+ * sequence numbers.
+ *
+ * OST_MDT1 .. OST_MAX
+ * For testing with multiple MDTs the object sequence 3 through 9 is used,
+ * allowing direct mapping of MDTs 1 through 7 respectively, for a total
+ * of 8 MDTs including OST_MDT0. This matches the legacy CMD project "group"
+ * mappings. However, this SEQ range is only for testing prior to any
+ * production DNE release, as the objects in this range conflict across all
+ * OSTs, as the OST index is not part of the FID. For production DNE usage,
+ * OST objects created by MDT1+ will use FID_SEQ_NORMAL FIDs.
+ *
+ * DLM OST objid to IDIF mapping
+ * For compatibility with existing OLD OST network protocol structures, the
+ * FID must map onto the o_id and o_seq in a manner that ensures existing
+ * objects are identified consistently for IO, as well as onto the LDLM
+ * namespace to ensure IDIFs there is only a single resource name for any
+ * object in the DLM. The OLD OST object DLM resource mapping is:
+ *
+ * resource[] = {o_id, o_seq, 0, 0}; // o_seq == 0 for production releases
+ *
+ * The NEW OST object DLM resource mapping is the same for both MDT and OST:
+ *
+ * resource[] = {SEQ, OID, VER, HASH};
+ *
+ * NOTE: for mapping IDIF values to DLM resource names the o_id may be
+ * larger than the 2^33 reserved sequence numbers for IDIF, so it is possible
+ * for the o_id numbers to overlap FID SEQ numbers in the resource. However,
+ * in all production releases the OLD o_seq field is always zero, and all
+ * valid FID OID values are non-zero, so the lock resources will not collide.
+ * Even so, the MDT and OST resources are also in different LDLM namespaces.
+ */
+
+#include <linux/libcfs/libcfs.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_req_layout.h>
+#include <lustre_mdt.h>
+#include <obd.h>
+
+
+struct lu_site;
+struct lu_context;
+
+/* Whole sequences space range and zero range definitions */
+extern const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE;
+extern const struct lu_seq_range LUSTRE_SEQ_ZERO_RANGE;
+extern const struct lu_fid LUSTRE_BFL_FID;
+extern const struct lu_fid LU_OBF_FID;
+extern const struct lu_fid LU_DOT_LUSTRE_FID;
+
+enum {
+ /*
+ * This is how may metadata FIDs may be allocated in one sequence(128k)
+ */
+ LUSTRE_METADATA_SEQ_MAX_WIDTH = 0x0000000000020000ULL,
+
+ /*
+ * This is how many data FIDs could be allocated in one sequence(4B - 1)
+ */
+ LUSTRE_DATA_SEQ_MAX_WIDTH = 0x00000000FFFFFFFFULL,
+
+ /*
+ * How many sequences to allocate to a client at once.
+ */
+ LUSTRE_SEQ_META_WIDTH = 0x0000000000000001ULL,
+
+ /*
+ * seq allocation pool size.
+ */
+ LUSTRE_SEQ_BATCH_WIDTH = LUSTRE_SEQ_META_WIDTH * 1000,
+
+ /*
+ * This is how many sequences may be in one super-sequence allocated to
+ * MDTs.
+ */
+ LUSTRE_SEQ_SUPER_WIDTH = ((1ULL << 30ULL) * LUSTRE_SEQ_META_WIDTH)
+};
+
+enum {
+ /** 2^6 FIDs for OI containers */
+ OSD_OI_FID_OID_BITS = 6,
+ /** reserve enough FIDs in case we want more in the future */
+ OSD_OI_FID_OID_BITS_MAX = 10,
+};
+
+/** special OID for local objects */
+enum local_oid {
+ /** \see fld_mod_init */
+ FLD_INDEX_OID = 3UL,
+ /** \see fid_mod_init */
+ FID_SEQ_CTL_OID = 4UL,
+ FID_SEQ_SRV_OID = 5UL,
+ /** \see mdd_mod_init */
+ MDD_ROOT_INDEX_OID = 6UL, /* deprecated in 2.4 */
+ MDD_ORPHAN_OID = 7UL, /* deprecated in 2.4 */
+ MDD_LOV_OBJ_OID = 8UL,
+ MDD_CAPA_KEYS_OID = 9UL,
+ /** \see mdt_mod_init */
+ LAST_RECV_OID = 11UL,
+ OSD_FS_ROOT_OID = 13UL,
+ ACCT_USER_OID = 15UL,
+ ACCT_GROUP_OID = 16UL,
+ LFSCK_BOOKMARK_OID = 17UL,
+ OTABLE_IT_OID = 18UL,
+ /* These two definitions are obsolete
+ * OFD_GROUP0_LAST_OID = 20UL,
+ * OFD_GROUP4K_LAST_OID = 20UL+4096,
+ */
+ OFD_LAST_GROUP_OID = 4117UL,
+ LLOG_CATALOGS_OID = 4118UL,
+ MGS_CONFIGS_OID = 4119UL,
+ OFD_HEALTH_CHECK_OID = 4120UL,
+ MDD_LOV_OBJ_OSEQ = 4121UL,
+ LFSCK_NAMESPACE_OID = 4122UL,
+ REMOTE_PARENT_DIR_OID = 4123UL,
+};
+
+static inline void lu_local_obj_fid(struct lu_fid *fid, __u32 oid)
+{
+ fid->f_seq = FID_SEQ_LOCAL_FILE;
+ fid->f_oid = oid;
+ fid->f_ver = 0;
+}
+
+static inline void lu_local_name_obj_fid(struct lu_fid *fid, __u32 oid)
+{
+ fid->f_seq = FID_SEQ_LOCAL_NAME;
+ fid->f_oid = oid;
+ fid->f_ver = 0;
+}
+
+/* For new FS (>= 2.4), the root FID will be changed to
+ * [FID_SEQ_ROOT:1:0], for existing FS, (upgraded to 2.4),
+ * the root FID will still be IGIF */
+static inline int fid_is_root(const struct lu_fid *fid)
+{
+ return unlikely((fid_seq(fid) == FID_SEQ_ROOT &&
+ fid_oid(fid) == 1));
+}
+
+static inline int fid_is_dot_lustre(const struct lu_fid *fid)
+{
+ return unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE &&
+ fid_oid(fid) == FID_OID_DOT_LUSTRE);
+}
+
+static inline int fid_is_obf(const struct lu_fid *fid)
+{
+ return unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE &&
+ fid_oid(fid) == FID_OID_DOT_LUSTRE_OBF);
+}
+
+static inline int fid_is_otable_it(const struct lu_fid *fid)
+{
+ return unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE &&
+ fid_oid(fid) == OTABLE_IT_OID);
+}
+
+static inline int fid_is_acct(const struct lu_fid *fid)
+{
+ return fid_seq(fid) == FID_SEQ_LOCAL_FILE &&
+ (fid_oid(fid) == ACCT_USER_OID ||
+ fid_oid(fid) == ACCT_GROUP_OID);
+}
+
+static inline int fid_is_quota(const struct lu_fid *fid)
+{
+ return fid_seq(fid) == FID_SEQ_QUOTA ||
+ fid_seq(fid) == FID_SEQ_QUOTA_GLB;
+}
+
+static inline int fid_is_namespace_visible(const struct lu_fid *fid)
+{
+ const __u64 seq = fid_seq(fid);
+
+ /* Here, we cannot distinguish whether the normal FID is for OST
+ * object or not. It is caller's duty to check more if needed. */
+ return (!fid_is_last_id(fid) &&
+ (fid_seq_is_norm(seq) || fid_seq_is_igif(seq))) ||
+ fid_is_root(fid) || fid_is_dot_lustre(fid);
+}
+
+static inline int fid_seq_in_fldb(__u64 seq)
+{
+ return fid_seq_is_igif(seq) || fid_seq_is_norm(seq) ||
+ fid_seq_is_root(seq) || fid_seq_is_dot(seq);
+}
+
+static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq)
+{
+ if (fid_seq_is_mdt0(seq)) {
+ fid->f_seq = fid_idif_seq(0, 0);
+ } else {
+ LASSERTF(fid_seq_is_norm(seq) || fid_seq_is_echo(seq) ||
+ fid_seq_is_idif(seq), LPX64"\n", seq);
+ fid->f_seq = seq;
+ }
+ fid->f_oid = 0;
+ fid->f_ver = 0;
+}
+
+enum lu_mgr_type {
+ LUSTRE_SEQ_SERVER,
+ LUSTRE_SEQ_CONTROLLER
+};
+
+struct lu_server_seq;
+
+/* Client sequence manager interface. */
+struct lu_client_seq {
+ /* Sequence-controller export. */
+ struct obd_export *lcs_exp;
+ struct mutex lcs_mutex;
+
+ /*
+ * Range of allowed for allocation sequeces. When using lu_client_seq on
+ * clients, this contains meta-sequence range. And for servers this
+ * contains super-sequence range.
+ */
+ struct lu_seq_range lcs_space;
+
+ /* Seq related proc */
+ proc_dir_entry_t *lcs_proc_dir;
+
+ /* This holds last allocated fid in last obtained seq */
+ struct lu_fid lcs_fid;
+
+ /* LUSTRE_SEQ_METADATA or LUSTRE_SEQ_DATA */
+ enum lu_cli_type lcs_type;
+
+ /*
+ * Service uuid, passed from MDT + seq name to form unique seq name to
+ * use it with procfs.
+ */
+ char lcs_name[80];
+
+ /*
+ * Sequence width, that is how many objects may be allocated in one
+ * sequence. Default value for it is LUSTRE_SEQ_MAX_WIDTH.
+ */
+ __u64 lcs_width;
+
+ /* Seq-server for direct talking */
+ struct lu_server_seq *lcs_srv;
+
+ /* wait queue for fid allocation and update indicator */
+ wait_queue_head_t lcs_waitq;
+ int lcs_update;
+};
+
+/* server sequence manager interface */
+struct lu_server_seq {
+ /* Available sequences space */
+ struct lu_seq_range lss_space;
+
+ /* keeps highwater in lsr_end for seq allocation algorithm */
+ struct lu_seq_range lss_lowater_set;
+ struct lu_seq_range lss_hiwater_set;
+
+ /*
+ * Device for server side seq manager needs (saving sequences to backing
+ * store).
+ */
+ struct dt_device *lss_dev;
+
+ /* /seq file object device */
+ struct dt_object *lss_obj;
+
+ /* Seq related proc */
+ proc_dir_entry_t *lss_proc_dir;
+
+ /* LUSTRE_SEQ_SERVER or LUSTRE_SEQ_CONTROLLER */
+ enum lu_mgr_type lss_type;
+
+ /* Client interafce to request controller */
+ struct lu_client_seq *lss_cli;
+
+ /* Mutex for protecting allocation */
+ struct mutex lss_mutex;
+
+ /*
+ * Service uuid, passed from MDT + seq name to form unique seq name to
+ * use it with procfs.
+ */
+ char lss_name[80];
+
+ /*
+ * Allocation chunks for super and meta sequences. Default values are
+ * LUSTRE_SEQ_SUPER_WIDTH and LUSTRE_SEQ_META_WIDTH.
+ */
+ __u64 lss_width;
+
+ /*
+ * minimum lss_alloc_set size that should be allocated from
+ * lss_space
+ */
+ __u64 lss_set_width;
+
+ /* sync is needed for update operation */
+ __u32 lss_need_sync;
+
+ /**
+ * Pointer to site object, required to access site fld.
+ */
+ struct seq_server_site *lss_site;
+};
+
+int seq_query(struct com_thread_info *info);
+int seq_handle(struct ptlrpc_request *req);
+
+/* Server methods */
+int seq_server_init(struct lu_server_seq *seq,
+ struct dt_device *dev,
+ const char *prefix,
+ enum lu_mgr_type type,
+ struct seq_server_site *ss,
+ const struct lu_env *env);
+
+void seq_server_fini(struct lu_server_seq *seq,
+ const struct lu_env *env);
+
+int seq_server_alloc_super(struct lu_server_seq *seq,
+ struct lu_seq_range *out,
+ const struct lu_env *env);
+
+int seq_server_alloc_meta(struct lu_server_seq *seq,
+ struct lu_seq_range *out,
+ const struct lu_env *env);
+
+int seq_server_set_cli(struct lu_server_seq *seq,
+ struct lu_client_seq *cli,
+ const struct lu_env *env);
+
+/* Client methods */
+int seq_client_init(struct lu_client_seq *seq,
+ struct obd_export *exp,
+ enum lu_cli_type type,
+ const char *prefix,
+ struct lu_server_seq *srv);
+
+void seq_client_fini(struct lu_client_seq *seq);
+
+void seq_client_flush(struct lu_client_seq *seq);
+
+int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq,
+ struct lu_fid *fid);
+int seq_client_get_seq(const struct lu_env *env, struct lu_client_seq *seq,
+ seqno_t *seqnr);
+int seq_site_fini(const struct lu_env *env, struct seq_server_site *ss);
+/* Fids common stuff */
+int fid_is_local(const struct lu_env *env,
+ struct lu_site *site, const struct lu_fid *fid);
+
+int client_fid_init(struct obd_device *obd, struct obd_export *exp,
+ enum lu_cli_type type);
+int client_fid_fini(struct obd_device *obd);
+
+/* fid locking */
+
+struct ldlm_namespace;
+
+/*
+ * Build (DLM) resource name from FID.
+ *
+ * NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2],
+ * but was moved into name[1] along with the OID to avoid consuming the
+ * renaming name[2,3] fields that need to be used for the quota identifier.
+ */
+static inline struct ldlm_res_id *
+fid_build_reg_res_name(const struct lu_fid *f,
+ struct ldlm_res_id *name)
+{
+ memset(name, 0, sizeof *name);
+ name->name[LUSTRE_RES_ID_SEQ_OFF] = fid_seq(f);
+ name->name[LUSTRE_RES_ID_VER_OID_OFF] = fid_ver_oid(f);
+ return name;
+}
+
+/*
+ * Build (DLM) resource identifier from global quota FID and quota ID.
+ */
+static inline struct ldlm_res_id *
+fid_build_quota_resid(const struct lu_fid *glb_fid, union lquota_id *qid,
+ struct ldlm_res_id *res)
+{
+ fid_build_reg_res_name(glb_fid, res);
+ res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF] = fid_seq(&qid->qid_fid);
+ res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] = fid_ver_oid(&qid->qid_fid);
+ return res;
+}
+
+/*
+ * Extract global FID and quota ID from resource name
+ */
+static inline void fid_extract_quota_resid(struct ldlm_res_id *res,
+ struct lu_fid *glb_fid,
+ union lquota_id *qid)
+{
+ glb_fid->f_seq = res->name[LUSTRE_RES_ID_SEQ_OFF];
+ glb_fid->f_oid = (__u32)res->name[LUSTRE_RES_ID_VER_OID_OFF];
+ glb_fid->f_ver = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32);
+
+ qid->qid_fid.f_seq = res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF];
+ qid->qid_fid.f_oid = (__u32)res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF];
+ qid->qid_fid.f_ver =
+ (__u32)(res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] >> 32);
+}
+
+/*
+ * Return true if resource is for object identified by fid.
+ */
+static inline int fid_res_name_eq(const struct lu_fid *f,
+ const struct ldlm_res_id *name)
+{
+ return name->name[LUSTRE_RES_ID_SEQ_OFF] == fid_seq(f) &&
+ name->name[LUSTRE_RES_ID_VER_OID_OFF] == fid_ver_oid(f);
+}
+
+/* reverse function of fid_build_reg_res_name() */
+static inline void fid_build_from_res_name(struct lu_fid *f,
+ const struct ldlm_res_id *name)
+{
+ fid_zero(f);
+ f->f_seq = name->name[LUSTRE_RES_ID_SEQ_OFF];
+ f->f_oid = name->name[LUSTRE_RES_ID_VER_OID_OFF] & 0xffffffff;
+ f->f_ver = name->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32;
+ LASSERT(fid_res_name_eq(f, name));
+}
+
+static inline struct ldlm_res_id *
+fid_build_pdo_res_name(const struct lu_fid *f,
+ unsigned int hash,
+ struct ldlm_res_id *name)
+{
+ fid_build_reg_res_name(f, name);
+ name->name[LUSTRE_RES_ID_HSH_OFF] = hash;
+ return name;
+}
+
+/**
+ * Build DLM resource name from object id & seq, which will be removed
+ * finally, when we replace ost_id with FID in data stack.
+ *
+ * Currently, resid from the old client, whose res[0] = object_id,
+ * res[1] = object_seq, is just oposite with Metatdata
+ * resid, where, res[0] = fid->f_seq, res[1] = fid->f_oid.
+ * To unifiy the resid identification, we will reverse the data
+ * resid to keep it same with Metadata resid, i.e.
+ *
+ * For resid from the old client,
+ * res[0] = objid, res[1] = 0, still keep the original order,
+ * for compatiblity.
+ *
+ * For new resid
+ * res will be built from normal FID directly, i.e. res[0] = f_seq,
+ * res[1] = f_oid + f_ver.
+ */
+static inline void ostid_build_res_name(struct ost_id *oi,
+ struct ldlm_res_id *name)
+{
+ memset(name, 0, sizeof *name);
+ if (fid_seq_is_mdt0(ostid_seq(oi))) {
+ name->name[LUSTRE_RES_ID_SEQ_OFF] = ostid_id(oi);
+ name->name[LUSTRE_RES_ID_VER_OID_OFF] = ostid_seq(oi);
+ } else {
+ fid_build_reg_res_name((struct lu_fid *)oi, name);
+ }
+}
+
+static inline void ostid_res_name_to_id(struct ost_id *oi,
+ struct ldlm_res_id *name)
+{
+ if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_SEQ_OFF])) {
+ /* old resid */
+ ostid_set_seq(oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]);
+ ostid_set_id(oi, name->name[LUSTRE_RES_ID_SEQ_OFF]);
+ } else {
+ /* new resid */
+ fid_build_from_res_name((struct lu_fid *)oi, name);
+ }
+}
+
+/**
+ * Return true if the resource is for the object identified by this id & group.
+ */
+static inline int ostid_res_name_eq(struct ost_id *oi,
+ struct ldlm_res_id *name)
+{
+ /* Note: it is just a trick here to save some effort, probably the
+ * correct way would be turn them into the FID and compare */
+ if (fid_seq_is_mdt0(ostid_seq(oi))) {
+ return name->name[LUSTRE_RES_ID_SEQ_OFF] == ostid_id(oi) &&
+ name->name[LUSTRE_RES_ID_VER_OID_OFF] == ostid_seq(oi);
+ } else {
+ return name->name[LUSTRE_RES_ID_SEQ_OFF] == ostid_seq(oi) &&
+ name->name[LUSTRE_RES_ID_VER_OID_OFF] == ostid_id(oi);
+ }
+}
+
+/* The same as osc_build_res_name() */
+static inline void ost_fid_build_resid(const struct lu_fid *fid,
+ struct ldlm_res_id *resname)
+{
+ if (fid_is_mdt0(fid) || fid_is_idif(fid)) {
+ struct ost_id oi;
+ oi.oi.oi_id = 0; /* gcc 4.7.2 complains otherwise */
+ if (fid_to_ostid(fid, &oi) != 0)
+ return;
+ ostid_build_res_name(&oi, resname);
+ } else {
+ fid_build_reg_res_name(fid, resname);
+ }
+}
+
+static inline void ost_fid_from_resid(struct lu_fid *fid,
+ const struct ldlm_res_id *name)
+{
+ if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_VER_OID_OFF])) {
+ /* old resid */
+ struct ost_id oi;
+ ostid_set_seq(&oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]);
+ ostid_set_id(&oi, name->name[LUSTRE_RES_ID_SEQ_OFF]);
+ ostid_to_fid(fid, &oi, 0);
+ } else {
+ /* new resid */
+ fid_build_from_res_name(fid, name);
+ }
+}
+
+/**
+ * Flatten 128-bit FID values into a 64-bit value for use as an inode number.
+ * For non-IGIF FIDs this starts just over 2^32, and continues without
+ * conflict until 2^64, at which point we wrap the high 24 bits of the SEQ
+ * into the range where there may not be many OID values in use, to minimize
+ * the risk of conflict.
+ *
+ * Suppose LUSTRE_SEQ_MAX_WIDTH less than (1 << 24) which is currently true,
+ * the time between re-used inode numbers is very long - 2^40 SEQ numbers,
+ * or about 2^40 client mounts, if clients create less than 2^24 files/mount.
+ */
+static inline __u64 fid_flatten(const struct lu_fid *fid)
+{
+ __u64 ino;
+ __u64 seq;
+
+ if (fid_is_igif(fid)) {
+ ino = lu_igif_ino(fid);
+ RETURN(ino);
+ }
+
+ seq = fid_seq(fid);
+
+ ino = (seq << 24) + ((seq >> 24) & 0xffffff0000ULL) + fid_oid(fid);
+
+ RETURN(ino ? ino : fid_oid(fid));
+}
+
+static inline __u32 fid_hash(const struct lu_fid *f, int bits)
+{
+ /* all objects with same id and different versions will belong to same
+ * collisions list. */
+ return cfs_hash_long(fid_flatten(f), bits);
+}
+
+/**
+ * map fid to 32 bit value for ino on 32bit systems. */
+static inline __u32 fid_flatten32(const struct lu_fid *fid)
+{
+ __u32 ino;
+ __u64 seq;
+
+ if (fid_is_igif(fid)) {
+ ino = lu_igif_ino(fid);
+ RETURN(ino);
+ }
+
+ seq = fid_seq(fid) - FID_SEQ_START;
+
+ /* Map the high bits of the OID into higher bits of the inode number so
+ * that inodes generated at about the same time have a reduced chance
+ * of collisions. This will give a period of 2^12 = 1024 unique clients
+ * (from SEQ) and up to min(LUSTRE_SEQ_MAX_WIDTH, 2^20) = 128k objects
+ * (from OID), or up to 128M inodes without collisions for new files. */
+ ino = ((seq & 0x000fffffULL) << 12) + ((seq >> 8) & 0xfffff000) +
+ (seq >> (64 - (40-8)) & 0xffffff00) +
+ (fid_oid(fid) & 0xff000fff) + ((fid_oid(fid) & 0x00fff000) << 8);
+
+ RETURN(ino ? ino : fid_oid(fid));
+}
+
+static inline int lu_fid_diff(struct lu_fid *fid1, struct lu_fid *fid2)
+{
+ LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:"DFID", fid2:"DFID"\n",
+ PFID(fid1), PFID(fid2));
+
+ if (fid_is_idif(fid1) && fid_is_idif(fid2))
+ return fid_idif_id(fid1->f_seq, fid1->f_oid, fid1->f_ver) -
+ fid_idif_id(fid2->f_seq, fid2->f_oid, fid2->f_ver);
+
+ return fid_oid(fid1) - fid_oid(fid2);
+}
+
+#define LUSTRE_SEQ_SRV_NAME "seq_srv"
+#define LUSTRE_SEQ_CTL_NAME "seq_ctl"
+
+/* Range common stuff */
+static inline void range_cpu_to_le(struct lu_seq_range *dst, const struct lu_seq_range *src)
+{
+ dst->lsr_start = cpu_to_le64(src->lsr_start);
+ dst->lsr_end = cpu_to_le64(src->lsr_end);
+ dst->lsr_index = cpu_to_le32(src->lsr_index);
+ dst->lsr_flags = cpu_to_le32(src->lsr_flags);
+}
+
+static inline void range_le_to_cpu(struct lu_seq_range *dst, const struct lu_seq_range *src)
+{
+ dst->lsr_start = le64_to_cpu(src->lsr_start);
+ dst->lsr_end = le64_to_cpu(src->lsr_end);
+ dst->lsr_index = le32_to_cpu(src->lsr_index);
+ dst->lsr_flags = le32_to_cpu(src->lsr_flags);
+}
+
+static inline void range_cpu_to_be(struct lu_seq_range *dst, const struct lu_seq_range *src)
+{
+ dst->lsr_start = cpu_to_be64(src->lsr_start);
+ dst->lsr_end = cpu_to_be64(src->lsr_end);
+ dst->lsr_index = cpu_to_be32(src->lsr_index);
+ dst->lsr_flags = cpu_to_be32(src->lsr_flags);
+}
+
+static inline void range_be_to_cpu(struct lu_seq_range *dst, const struct lu_seq_range *src)
+{
+ dst->lsr_start = be64_to_cpu(src->lsr_start);
+ dst->lsr_end = be64_to_cpu(src->lsr_end);
+ dst->lsr_index = be32_to_cpu(src->lsr_index);
+ dst->lsr_flags = be32_to_cpu(src->lsr_flags);
+}
+
+/** @} fid */
+
+#endif /* __LINUX_FID_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h
new file mode 100644
index 000000000000..11e034a65b17
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_fld.h
@@ -0,0 +1,202 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2013, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LINUX_FLD_H
+#define __LINUX_FLD_H
+
+/** \defgroup fld fld
+ *
+ * @{
+ */
+
+#include <lustre/lustre_idl.h>
+#include <lustre_mdt.h>
+#include <dt_object.h>
+
+#include <linux/libcfs/libcfs.h>
+
+struct lu_client_fld;
+struct lu_server_fld;
+struct lu_fld_hash;
+struct fld_cache;
+
+extern const struct dt_index_features fld_index_features;
+extern const char fld_index_name[];
+
+/*
+ * FLD (Fid Location Database) interface.
+ */
+enum {
+ LUSTRE_CLI_FLD_HASH_DHT = 0,
+ LUSTRE_CLI_FLD_HASH_RRB
+};
+
+
+struct lu_fld_target {
+ struct list_head ft_chain;
+ struct obd_export *ft_exp;
+ struct lu_server_fld *ft_srv;
+ __u64 ft_idx;
+};
+
+struct lu_server_fld {
+ /**
+ * Fld dir proc entry. */
+ proc_dir_entry_t *lsf_proc_dir;
+
+ /**
+ * /fld file object device */
+ struct dt_object *lsf_obj;
+
+ /**
+ * super sequence controller export, needed to forward fld
+ * lookup request. */
+ struct obd_export *lsf_control_exp;
+
+ /**
+ * Client FLD cache. */
+ struct fld_cache *lsf_cache;
+
+ /**
+ * Protect index modifications */
+ struct mutex lsf_lock;
+
+ /**
+ * Fld service name in form "fld-srv-lustre-MDTXXX" */
+ char lsf_name[80];
+
+};
+
+struct lu_client_fld {
+ /**
+ * Client side proc entry. */
+ proc_dir_entry_t *lcf_proc_dir;
+
+ /**
+ * List of exports client FLD knows about. */
+ struct list_head lcf_targets;
+
+ /**
+ * Current hash to be used to chose an export. */
+ struct lu_fld_hash *lcf_hash;
+
+ /**
+ * Exports count. */
+ int lcf_count;
+
+ /**
+ * Lock protecting exports list and fld_hash. */
+ spinlock_t lcf_lock;
+
+ /**
+ * Client FLD cache. */
+ struct fld_cache *lcf_cache;
+
+ /**
+ * Client fld proc entry name. */
+ char lcf_name[80];
+
+ const struct lu_context *lcf_ctx;
+
+ int lcf_flags;
+};
+
+/**
+ * number of blocks to reserve for particular operations. Should be function of
+ * ... something. Stub for now.
+ */
+enum {
+ /* one insert operation can involve two delete and one insert */
+ FLD_TXN_INDEX_INSERT_CREDITS = 60,
+ FLD_TXN_INDEX_DELETE_CREDITS = 20,
+};
+
+int fld_query(struct com_thread_info *info);
+
+/* Server methods */
+int fld_server_init(const struct lu_env *env, struct lu_server_fld *fld,
+ struct dt_device *dt, const char *prefix, int mds_node_id,
+ int type);
+
+void fld_server_fini(const struct lu_env *env, struct lu_server_fld *fld);
+
+int fld_declare_server_create(const struct lu_env *env,
+ struct lu_server_fld *fld,
+ struct lu_seq_range *new,
+ struct thandle *th);
+
+int fld_server_create(const struct lu_env *env,
+ struct lu_server_fld *fld,
+ struct lu_seq_range *add_range,
+ struct thandle *th);
+
+int fld_insert_entry(const struct lu_env *env,
+ struct lu_server_fld *fld,
+ const struct lu_seq_range *range);
+
+int fld_server_lookup(const struct lu_env *env, struct lu_server_fld *fld,
+ seqno_t seq, struct lu_seq_range *range);
+
+/* Client methods */
+int fld_client_init(struct lu_client_fld *fld,
+ const char *prefix, int hash);
+
+void fld_client_fini(struct lu_client_fld *fld);
+
+void fld_client_flush(struct lu_client_fld *fld);
+
+int fld_client_lookup(struct lu_client_fld *fld, seqno_t seq, mdsno_t *mds,
+ __u32 flags, const struct lu_env *env);
+
+int fld_client_create(struct lu_client_fld *fld,
+ struct lu_seq_range *range,
+ const struct lu_env *env);
+
+int fld_client_delete(struct lu_client_fld *fld,
+ seqno_t seq,
+ const struct lu_env *env);
+
+int fld_client_add_target(struct lu_client_fld *fld,
+ struct lu_fld_target *tar);
+
+int fld_client_del_target(struct lu_client_fld *fld,
+ __u64 idx);
+
+void fld_client_proc_fini(struct lu_client_fld *fld);
+
+/** @} fld */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_fsfilt.h b/drivers/staging/lustre/lustre/include/lustre_fsfilt.h
new file mode 100644
index 000000000000..9dcc332cb2f3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_fsfilt.h
@@ -0,0 +1,48 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_fsfilt.h
+ *
+ * Filesystem interface helper.
+ */
+
+#ifndef _LUSTRE_FSFILT_H
+#define _LUSTRE_FSFILT_H
+
+#include <linux/lustre_fsfilt.h>
+
+#define LU221_BAD_TIME (0x80000000U + 24 * 3600)
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h
new file mode 100644
index 000000000000..105f6d61eef0
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_ha.h
@@ -0,0 +1,67 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LUSTRE_HA_H
+#define _LUSTRE_HA_H
+
+/** \defgroup ha ha
+ *
+ * @{
+ */
+
+struct obd_import;
+struct obd_export;
+struct obd_device;
+struct ptlrpc_request;
+
+
+int ptlrpc_replay(struct obd_import *imp);
+int ptlrpc_resend(struct obd_import *imp);
+void ptlrpc_free_committed(struct obd_import *imp);
+void ptlrpc_wake_delayed(struct obd_import *imp);
+int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async);
+int ptlrpc_set_import_active(struct obd_import *imp, int active);
+void ptlrpc_activate_import(struct obd_import *imp);
+void ptlrpc_deactivate_import(struct obd_import *imp);
+void ptlrpc_invalidate_import(struct obd_import *imp);
+void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt);
+int ptlrpc_check_suspend(void);
+void ptlrpc_activate_timeouts(struct obd_import *imp);
+void ptlrpc_deactivate_timeouts(struct obd_import *imp);
+
+/** @} ha */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_handles.h b/drivers/staging/lustre/lustre/include/lustre_handles.h
new file mode 100644
index 000000000000..fcd40f33426a
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_handles.h
@@ -0,0 +1,93 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LUSTRE_HANDLES_H_
+#define __LUSTRE_HANDLES_H_
+
+/** \defgroup handles handles
+ *
+ * @{
+ */
+
+#include <linux/lustre_handles.h>
+
+#include <linux/libcfs/libcfs.h>
+
+
+struct portals_handle_ops {
+ void (*hop_addref)(void *object);
+ void (*hop_free)(void *object, int size);
+};
+
+/* These handles are most easily used by having them appear at the very top of
+ * whatever object that you want to make handles for. ie:
+ *
+ * struct ldlm_lock {
+ * struct portals_handle handle;
+ * ...
+ * };
+ *
+ * Now you're able to assign the results of cookie2handle directly to an
+ * ldlm_lock. If it's not at the top, you'll want to use container_of()
+ * to compute the start of the structure based on the handle field. */
+struct portals_handle {
+ struct list_head h_link;
+ __u64 h_cookie;
+ struct portals_handle_ops *h_ops;
+
+ /* newly added fields to handle the RCU issue. -jxiong */
+ cfs_rcu_head_t h_rcu;
+ spinlock_t h_lock;
+ unsigned int h_size:31;
+ unsigned int h_in:1;
+};
+#define RCU2HANDLE(rcu) container_of(rcu, struct portals_handle, h_rcu)
+
+/* handles.c */
+
+/* Add a handle to the hash table */
+void class_handle_hash(struct portals_handle *,
+ struct portals_handle_ops *ops);
+void class_handle_unhash(struct portals_handle *);
+void class_handle_hash_back(struct portals_handle *);
+void *class_handle2object(__u64 cookie);
+void class_handle_free_cb(cfs_rcu_head_t *);
+int class_handle_init(void);
+void class_handle_cleanup(void);
+
+/** @} handles */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_idmap.h b/drivers/staging/lustre/lustre/include/lustre_idmap.h
new file mode 100644
index 000000000000..084bdd6ab4db
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_idmap.h
@@ -0,0 +1,104 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lustre/include/lustre_idmap.h
+ *
+ * MDS data structures.
+ * See also lustre_idl.h for wire formats of requests.
+ */
+
+#ifndef _LUSTRE_IDMAP_H
+#define _LUSTRE_IDMAP_H
+
+/** \defgroup idmap idmap
+ *
+ * @{
+ */
+
+#include <linux/libcfs/libcfs.h>
+
+#define CFS_NGROUPS_PER_BLOCK ((int)(PAGE_CACHE_SIZE / sizeof(gid_t)))
+
+#define CFS_GROUP_AT(gi, i) \
+ ((gi)->blocks[(i) / CFS_NGROUPS_PER_BLOCK][(i) % CFS_NGROUPS_PER_BLOCK])
+
+enum {
+ CFS_IC_NOTHING = 0, /* convert nothing */
+ CFS_IC_ALL = 1, /* convert all items */
+ CFS_IC_MAPPED = 2, /* convert mapped uid/gid */
+ CFS_IC_UNMAPPED = 3 /* convert unmapped uid/gid */
+};
+
+#define CFS_IDMAP_NOTFOUND (-1)
+
+#define CFS_IDMAP_HASHSIZE 32
+
+enum lustre_idmap_idx {
+ RMT_UIDMAP_IDX,
+ LCL_UIDMAP_IDX,
+ RMT_GIDMAP_IDX,
+ LCL_GIDMAP_IDX,
+ CFS_IDMAP_N_HASHES
+};
+
+struct lustre_idmap_table {
+ spinlock_t lit_lock;
+ struct list_head lit_idmaps[CFS_IDMAP_N_HASHES][CFS_IDMAP_HASHSIZE];
+};
+
+struct lu_ucred;
+
+extern void lustre_groups_from_list(group_info_t *ginfo, gid_t *glist);
+extern void lustre_groups_sort(group_info_t *group_info);
+extern int lustre_in_group_p(struct lu_ucred *mu, gid_t grp);
+
+extern int lustre_idmap_add(struct lustre_idmap_table *t,
+ uid_t ruid, uid_t luid,
+ gid_t rgid, gid_t lgid);
+extern int lustre_idmap_del(struct lustre_idmap_table *t,
+ uid_t ruid, uid_t luid,
+ gid_t rgid, gid_t lgid);
+extern int lustre_idmap_lookup_uid(struct lu_ucred *mu,
+ struct lustre_idmap_table *t,
+ int reverse, uid_t uid);
+extern int lustre_idmap_lookup_gid(struct lu_ucred *mu,
+ struct lustre_idmap_table *t,
+ int reverse, gid_t gid);
+extern struct lustre_idmap_table *lustre_idmap_init(void);
+extern void lustre_idmap_fini(struct lustre_idmap_table *t);
+
+/** @} idmap */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h
new file mode 100644
index 000000000000..3a5dd6a94c08
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_import.h
@@ -0,0 +1,367 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+/** \defgroup obd_import PtlRPC import definitions
+ * Imports are client-side representation of remote obd target.
+ *
+ * @{
+ */
+
+#ifndef __IMPORT_H
+#define __IMPORT_H
+
+/** \defgroup export export
+ *
+ * @{
+ */
+
+#include <lustre_handles.h>
+#include <lustre/lustre_idl.h>
+
+
+/**
+ * Adaptive Timeout stuff
+ *
+ * @{
+ */
+#define D_ADAPTTO D_OTHER
+#define AT_BINS 4 /* "bin" means "N seconds of history" */
+#define AT_FLG_NOHIST 0x1 /* use last reported value only */
+
+struct adaptive_timeout {
+ time_t at_binstart; /* bin start time */
+ unsigned int at_hist[AT_BINS]; /* timeout history bins */
+ unsigned int at_flags;
+ unsigned int at_current; /* current timeout value */
+ unsigned int at_worst_ever; /* worst-ever timeout value */
+ time_t at_worst_time; /* worst-ever timeout timestamp */
+ spinlock_t at_lock;
+};
+
+struct ptlrpc_at_array {
+ struct list_head *paa_reqs_array; /** array to hold requests */
+ __u32 paa_size; /** the size of array */
+ __u32 paa_count; /** the total count of reqs */
+ time_t paa_deadline; /** the earliest deadline of reqs */
+ __u32 *paa_reqs_count; /** the count of reqs in each entry */
+};
+
+#define IMP_AT_MAX_PORTALS 8
+struct imp_at {
+ int iat_portal[IMP_AT_MAX_PORTALS];
+ struct adaptive_timeout iat_net_latency;
+ struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS];
+};
+
+
+/** @} */
+
+/** Possible import states */
+enum lustre_imp_state {
+ LUSTRE_IMP_CLOSED = 1,
+ LUSTRE_IMP_NEW = 2,
+ LUSTRE_IMP_DISCON = 3,
+ LUSTRE_IMP_CONNECTING = 4,
+ LUSTRE_IMP_REPLAY = 5,
+ LUSTRE_IMP_REPLAY_LOCKS = 6,
+ LUSTRE_IMP_REPLAY_WAIT = 7,
+ LUSTRE_IMP_RECOVER = 8,
+ LUSTRE_IMP_FULL = 9,
+ LUSTRE_IMP_EVICTED = 10,
+};
+
+/** Returns test string representation of numeric import state \a state */
+static inline char * ptlrpc_import_state_name(enum lustre_imp_state state)
+{
+ static char* import_state_names[] = {
+ "<UNKNOWN>", "CLOSED", "NEW", "DISCONN",
+ "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT",
+ "RECOVER", "FULL", "EVICTED",
+ };
+
+ LASSERT (state <= LUSTRE_IMP_EVICTED);
+ return import_state_names[state];
+}
+
+/**
+ * List of import event types
+ */
+enum obd_import_event {
+ IMP_EVENT_DISCON = 0x808001,
+ IMP_EVENT_INACTIVE = 0x808002,
+ IMP_EVENT_INVALIDATE = 0x808003,
+ IMP_EVENT_ACTIVE = 0x808004,
+ IMP_EVENT_OCD = 0x808005,
+ IMP_EVENT_DEACTIVATE = 0x808006,
+ IMP_EVENT_ACTIVATE = 0x808007,
+};
+
+/**
+ * Definition of import connection structure
+ */
+struct obd_import_conn {
+ /** Item for linking connections together */
+ struct list_head oic_item;
+ /** Pointer to actual PortalRPC connection */
+ struct ptlrpc_connection *oic_conn;
+ /** uuid of remote side */
+ struct obd_uuid oic_uuid;
+ /**
+ * Time (64 bit jiffies) of last connection attempt on this connection
+ */
+ __u64 oic_last_attempt;
+};
+
+/* state history */
+#define IMP_STATE_HIST_LEN 16
+struct import_state_hist {
+ enum lustre_imp_state ish_state;
+ time_t ish_time;
+};
+
+/**
+ * Defintion of PortalRPC import structure.
+ * Imports are representing client-side view to remote target.
+ */
+struct obd_import {
+ /** Local handle (== id) for this import. */
+ struct portals_handle imp_handle;
+ /** Reference counter */
+ atomic_t imp_refcount;
+ struct lustre_handle imp_dlm_handle; /* client's ldlm export */
+ /** Currently active connection */
+ struct ptlrpc_connection *imp_connection;
+ /** PortalRPC client structure for this import */
+ struct ptlrpc_client *imp_client;
+ /** List element for linking into pinger chain */
+ struct list_head imp_pinger_chain;
+ /** List element for linking into chain for destruction */
+ struct list_head imp_zombie_chain;
+
+ /**
+ * Lists of requests that are retained for replay, waiting for a reply,
+ * or waiting for recovery to complete, respectively.
+ * @{
+ */
+ struct list_head imp_replay_list;
+ struct list_head imp_sending_list;
+ struct list_head imp_delayed_list;
+ /** @} */
+
+ /** obd device for this import */
+ struct obd_device *imp_obd;
+
+ /**
+ * some seciruty-related fields
+ * @{
+ */
+ struct ptlrpc_sec *imp_sec;
+ struct mutex imp_sec_mutex;
+ cfs_time_t imp_sec_expire;
+ /** @} */
+
+ /** Wait queue for those who need to wait for recovery completion */
+ wait_queue_head_t imp_recovery_waitq;
+
+ /** Number of requests currently in-flight */
+ atomic_t imp_inflight;
+ /** Number of requests currently unregistering */
+ atomic_t imp_unregistering;
+ /** Number of replay requests inflight */
+ atomic_t imp_replay_inflight;
+ /** Number of currently happening import invalidations */
+ atomic_t imp_inval_count;
+ /** Numbner of request timeouts */
+ atomic_t imp_timeouts;
+ /** Current import state */
+ enum lustre_imp_state imp_state;
+ /** History of import states */
+ struct import_state_hist imp_state_hist[IMP_STATE_HIST_LEN];
+ int imp_state_hist_idx;
+ /** Current import generation. Incremented on every reconnect */
+ int imp_generation;
+ /** Incremented every time we send reconnection request */
+ __u32 imp_conn_cnt;
+ /**
+ * \see ptlrpc_free_committed remembers imp_generation value here
+ * after a check to save on unnecessary replay list iterations
+ */
+ int imp_last_generation_checked;
+ /** Last tranno we replayed */
+ __u64 imp_last_replay_transno;
+ /** Last transno committed on remote side */
+ __u64 imp_peer_committed_transno;
+ /**
+ * \see ptlrpc_free_committed remembers last_transno since its last
+ * check here and if last_transno did not change since last run of
+ * ptlrpc_free_committed and import generation is the same, we can
+ * skip looking for requests to remove from replay list as optimisation
+ */
+ __u64 imp_last_transno_checked;
+ /**
+ * Remote export handle. This is how remote side knows what export
+ * we are talking to. Filled from response to connect request
+ */
+ struct lustre_handle imp_remote_handle;
+ /** When to perform next ping. time in jiffies. */
+ cfs_time_t imp_next_ping;
+ /** When we last succesfully connected. time in 64bit jiffies */
+ __u64 imp_last_success_conn;
+
+ /** List of all possible connection for import. */
+ struct list_head imp_conn_list;
+ /**
+ * Current connection. \a imp_connection is imp_conn_current->oic_conn
+ */
+ struct obd_import_conn *imp_conn_current;
+
+ /** Protects flags, level, generation, conn_cnt, *_list */
+ spinlock_t imp_lock;
+
+ /* flags */
+ unsigned long imp_no_timeout:1, /* timeouts are disabled */
+ imp_invalid:1, /* evicted */
+ /* administratively disabled */
+ imp_deactive:1,
+ /* try to recover the import */
+ imp_replayable:1,
+ /* don't run recovery (timeout instead) */
+ imp_dlm_fake:1,
+ /* use 1/2 timeout on MDS' OSCs */
+ imp_server_timeout:1,
+ /* VBR: imp in delayed recovery */
+ imp_delayed_recovery:1,
+ /* VBR: if gap was found then no lock replays
+ */
+ imp_no_lock_replay:1,
+ /* recovery by versions was failed */
+ imp_vbr_failed:1,
+ /* force an immidiate ping */
+ imp_force_verify:1,
+ /* force a scheduled ping */
+ imp_force_next_verify:1,
+ /* pingable */
+ imp_pingable:1,
+ /* resend for replay */
+ imp_resend_replay:1,
+ /* disable normal recovery, for test only. */
+ imp_no_pinger_recover:1,
+ /* need IR MNE swab */
+ imp_need_mne_swab:1,
+ /* import must be reconnected instead of
+ * chouse new connection */
+ imp_force_reconnect:1,
+ /* import has tried to connect with server */
+ imp_connect_tried:1;
+ __u32 imp_connect_op;
+ struct obd_connect_data imp_connect_data;
+ __u64 imp_connect_flags_orig;
+ int imp_connect_error;
+
+ __u32 imp_msg_magic;
+ __u32 imp_msghdr_flags; /* adjusted based on server capability */
+
+ struct ptlrpc_request_pool *imp_rq_pool; /* emergency request pool */
+
+ struct imp_at imp_at; /* adaptive timeout data */
+ time_t imp_last_reply_time; /* for health check */
+};
+
+typedef void (*obd_import_callback)(struct obd_import *imp, void *closure,
+ int event, void *event_arg, void *cb_data);
+
+/**
+ * Structure for import observer.
+ * It is possible to register "observer" on an import and every time
+ * something happens to an import (like connect/evict/disconnect)
+ * obderver will get its callback called with event type
+ */
+struct obd_import_observer {
+ struct list_head oio_chain;
+ obd_import_callback oio_cb;
+ void *oio_cb_data;
+};
+
+void class_observe_import(struct obd_import *imp, obd_import_callback cb,
+ void *cb_data);
+void class_unobserve_import(struct obd_import *imp, obd_import_callback cb,
+ void *cb_data);
+void class_notify_import_observers(struct obd_import *imp, int event,
+ void *event_arg);
+
+/* import.c */
+static inline unsigned int at_est2timeout(unsigned int val)
+{
+ /* add an arbitrary minimum: 125% +5 sec */
+ return (val + (val >> 2) + 5);
+}
+
+static inline unsigned int at_timeout2est(unsigned int val)
+{
+ /* restore estimate value from timeout: e=4/5(t-5) */
+ LASSERT(val);
+ return (max((val << 2) / 5, 5U) - 4);
+}
+
+static inline void at_reset(struct adaptive_timeout *at, int val) {
+ at->at_current = val;
+ at->at_worst_ever = val;
+ at->at_worst_time = cfs_time_current_sec();
+}
+static inline void at_init(struct adaptive_timeout *at, int val, int flags) {
+ memset(at, 0, sizeof(*at));
+ spin_lock_init(&at->at_lock);
+ at->at_flags = flags;
+ at_reset(at, val);
+}
+extern unsigned int at_min;
+static inline int at_get(struct adaptive_timeout *at) {
+ return (at->at_current > at_min) ? at->at_current : at_min;
+}
+int at_measured(struct adaptive_timeout *at, unsigned int val);
+int import_at_get_index(struct obd_import *imp, int portal);
+extern unsigned int at_max;
+#define AT_OFF (at_max == 0)
+
+/* genops.c */
+struct obd_export;
+extern struct obd_import *class_exp2cliimp(struct obd_export *);
+extern struct obd_import *class_conn2cliimp(struct lustre_handle *);
+
+/** @} import */
+
+#endif /* __IMPORT_H */
+
+/** @} obd_import */
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
new file mode 100644
index 000000000000..bdfc5391c6d2
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -0,0 +1,667 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_lib.h
+ *
+ * Basic Lustre library routines.
+ */
+
+#ifndef _LUSTRE_LIB_H
+#define _LUSTRE_LIB_H
+
+/** \defgroup lib lib
+ *
+ * @{
+ */
+
+#include <linux/libcfs/libcfs.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_ver.h>
+#include <lustre_cfg.h>
+#include <linux/lustre_lib.h>
+
+/* target.c */
+struct ptlrpc_request;
+struct obd_export;
+struct lu_target;
+struct l_wait_info;
+#include <lustre_ha.h>
+#include <lustre_net.h>
+#include <lvfs.h>
+
+
+int target_pack_pool_reply(struct ptlrpc_request *req);
+int do_set_info_async(struct obd_import *imp,
+ int opcode, int version,
+ obd_count keylen, void *key,
+ obd_count vallen, void *val,
+ struct ptlrpc_request_set *set);
+
+#define OBD_RECOVERY_MAX_TIME (obd_timeout * 18) /* b13079 */
+#define OBD_MAX_IOCTL_BUFFER CONFIG_LUSTRE_OBD_MAX_IOCTL_BUFFER
+
+void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
+
+/* client.c */
+
+int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg);
+struct client_obd *client_conn2cli(struct lustre_handle *conn);
+
+struct md_open_data;
+struct obd_client_handle {
+ struct lustre_handle och_fh;
+ struct lu_fid och_fid;
+ struct md_open_data *och_mod;
+ __u32 och_magic;
+ int och_flags;
+};
+#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
+
+/* statfs_pack.c */
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs);
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
+
+/* l_lock.c */
+struct lustre_lock {
+ int l_depth;
+ task_t *l_owner;
+ struct semaphore l_sem;
+ spinlock_t l_spin;
+};
+
+void l_lock_init(struct lustre_lock *);
+void l_lock(struct lustre_lock *);
+void l_unlock(struct lustre_lock *);
+int l_has_lock(struct lustre_lock *);
+
+/*
+ * For md echo client
+ */
+enum md_echo_cmd {
+ ECHO_MD_CREATE = 1, /* Open/Create file on MDT */
+ ECHO_MD_MKDIR = 2, /* Mkdir on MDT */
+ ECHO_MD_DESTROY = 3, /* Unlink file on MDT */
+ ECHO_MD_RMDIR = 4, /* Rmdir on MDT */
+ ECHO_MD_LOOKUP = 5, /* Lookup on MDT */
+ ECHO_MD_GETATTR = 6, /* Getattr on MDT */
+ ECHO_MD_SETATTR = 7, /* Setattr on MDT */
+ ECHO_MD_ALLOC_FID = 8, /* Get FIDs from MDT */
+};
+
+/*
+ * OBD IOCTLS
+ */
+#define OBD_IOCTL_VERSION 0x00010004
+
+struct obd_ioctl_data {
+ __u32 ioc_len;
+ __u32 ioc_version;
+
+ union {
+ __u64 ioc_cookie;
+ __u64 ioc_u64_1;
+ };
+ union {
+ __u32 ioc_conn1;
+ __u32 ioc_u32_1;
+ };
+ union {
+ __u32 ioc_conn2;
+ __u32 ioc_u32_2;
+ };
+
+ struct obdo ioc_obdo1;
+ struct obdo ioc_obdo2;
+
+ obd_size ioc_count;
+ obd_off ioc_offset;
+ __u32 ioc_dev;
+ __u32 ioc_command;
+
+ __u64 ioc_nid;
+ __u32 ioc_nal;
+ __u32 ioc_type;
+
+ /* buffers the kernel will treat as user pointers */
+ __u32 ioc_plen1;
+ char *ioc_pbuf1;
+ __u32 ioc_plen2;
+ char *ioc_pbuf2;
+
+ /* inline buffers for various arguments */
+ __u32 ioc_inllen1;
+ char *ioc_inlbuf1;
+ __u32 ioc_inllen2;
+ char *ioc_inlbuf2;
+ __u32 ioc_inllen3;
+ char *ioc_inlbuf3;
+ __u32 ioc_inllen4;
+ char *ioc_inlbuf4;
+
+ char ioc_bulk[0];
+};
+
+struct obd_ioctl_hdr {
+ __u32 ioc_len;
+ __u32 ioc_version;
+};
+
+static inline int obd_ioctl_packlen(struct obd_ioctl_data *data)
+{
+ int len = cfs_size_round(sizeof(struct obd_ioctl_data));
+ len += cfs_size_round(data->ioc_inllen1);
+ len += cfs_size_round(data->ioc_inllen2);
+ len += cfs_size_round(data->ioc_inllen3);
+ len += cfs_size_round(data->ioc_inllen4);
+ return len;
+}
+
+
+static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
+{
+ if (data->ioc_len > (1<<30)) {
+ CERROR("OBD ioctl: ioc_len larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inllen1 > (1<<30)) {
+ CERROR("OBD ioctl: ioc_inllen1 larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inllen2 > (1<<30)) {
+ CERROR("OBD ioctl: ioc_inllen2 larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inllen3 > (1<<30)) {
+ CERROR("OBD ioctl: ioc_inllen3 larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inllen4 > (1<<30)) {
+ CERROR("OBD ioctl: ioc_inllen4 larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
+ CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
+ CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_inlbuf3 && !data->ioc_inllen3) {
+ CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_inlbuf4 && !data->ioc_inllen4) {
+ CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_pbuf1 && !data->ioc_plen1) {
+ CERROR("OBD ioctl: pbuf1 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_pbuf2 && !data->ioc_plen2) {
+ CERROR("OBD ioctl: pbuf2 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_plen1 && !data->ioc_pbuf1) {
+ CERROR("OBD ioctl: plen1 set but NULL pointer\n");
+ return 1;
+ }
+ if (data->ioc_plen2 && !data->ioc_pbuf2) {
+ CERROR("OBD ioctl: plen2 set but NULL pointer\n");
+ return 1;
+ }
+ if (obd_ioctl_packlen(data) > data->ioc_len) {
+ CERROR("OBD ioctl: packlen exceeds ioc_len (%d > %d)\n",
+ obd_ioctl_packlen(data), data->ioc_len);
+ return 1;
+ }
+ return 0;
+}
+
+
+#include <obd_support.h>
+
+/* function defined in lustre/obdclass/<platform>/<platform>-module.c */
+int obd_ioctl_getdata(char **buf, int *len, void *arg);
+int obd_ioctl_popdata(void *arg, void *data, int len);
+
+static inline void obd_ioctl_freedata(char *buf, int len)
+{
+ ENTRY;
+
+ OBD_FREE_LARGE(buf, len);
+ EXIT;
+ return;
+}
+
+/*
+ * BSD ioctl description:
+ * #define IOC_V1 _IOR(g, n1, long)
+ * #define IOC_V2 _IOW(g, n2, long)
+ *
+ * ioctl(f, IOC_V1, arg);
+ * arg will be treated as a long value,
+ *
+ * ioctl(f, IOC_V2, arg)
+ * arg will be treated as a pointer, bsd will call
+ * copyin(buf, arg, sizeof(long))
+ *
+ * To make BSD ioctl handles argument correctly and simplely,
+ * we change _IOR to _IOWR so BSD will copyin obd_ioctl_data
+ * for us. Does this change affect Linux? (XXX Liang)
+ */
+#define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DESTROY _IOW ('f', 104, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_SETATTR _IOW ('f', 107, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETATTR _IOWR ('f', 108, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE)
+
+
+#define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SYNC _IOW ('f', 114, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_COPY _IOWR('f', 120, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_MIGR _IOWR('f', 121, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PUNCH _IOWR('f', 122, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_MODULE_DEBUG _IOWR('f', 124, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_BRW_READ _IOWR('f', 125, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_BRW_WRITE _IOWR('f', 126, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_NAME2DEV _IOWR('f', 127, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_UUID2DEV _IOWR('f', 130, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_GETNAME _IOWR('f', 131, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETMDNAME _IOR('f', 131, char[MAX_OBD_NAME])
+#define OBD_IOC_GETDTNAME OBD_IOC_GETNAME
+
+#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PING_TARGET _IOW ('f', 136, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139 )
+#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SET_READONLY _IOW ('f', 141, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE)
+
+#define OBD_GET_VERSION _IOWR ('f', 144, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_GSS_SUPPORT _IOWR('f', 145, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_CHANGELOG_SEND _IOW ('f', 148, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_FID2PATH _IOWR ('f', 150, OBD_IOC_DATA_TYPE)
+/* see also <lustre/lustre_user.h> for ioctls 151-153 */
+/* OBD_IOC_LOV_SETSTRIPE: See also LL_IOC_LOV_SETSTRIPE */
+#define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, OBD_IOC_DATA_TYPE)
+/* OBD_IOC_LOV_GETSTRIPE: See also LL_IOC_LOV_GETSTRIPE */
+#define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, OBD_IOC_DATA_TYPE)
+/* OBD_IOC_LOV_SETEA: See also LL_IOC_LOV_SETEA */
+#define OBD_IOC_LOV_SETEA _IOW ('f', 156, OBD_IOC_DATA_TYPE)
+/* see <lustre/lustre_user.h> for ioctls 157-159 */
+/* OBD_IOC_QUOTACHECK: See also LL_IOC_QUOTACHECK */
+#define OBD_IOC_QUOTACHECK _IOW ('f', 160, int)
+/* OBD_IOC_POLL_QUOTACHECK: See also LL_IOC_POLL_QUOTACHECK */
+#define OBD_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *)
+/* OBD_IOC_QUOTACTL: See also LL_IOC_QUOTACTL */
+#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl)
+/* see also <lustre/lustre_user.h> for ioctls 163-176 */
+#define OBD_IOC_CHANGELOG_REG _IOW ('f', 177, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_DEREG _IOW ('f', 178, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_CLEAR _IOW ('f', 179, struct obd_ioctl_data)
+#define OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DORECORD _IOWR('f', 183, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARAM _IOW ('f', 187, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_POOL _IOWR('f', 188, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_REPLACE_NIDS _IOWR('f', 189, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_CATLOGLIST _IOWR('f', 190, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_INFO _IOWR('f', 191, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_PRINT _IOWR('f', 192, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_CANCEL _IOWR('f', 193, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_REMOVE _IOWR('f', 194, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_CHECK _IOWR('f', 195, OBD_IOC_DATA_TYPE)
+/* OBD_IOC_LLOG_CATINFO is deprecated */
+#define OBD_IOC_LLOG_CATINFO _IOWR('f', 196, OBD_IOC_DATA_TYPE)
+
+#define ECHO_IOC_GET_STRIPE _IOWR('f', 200, OBD_IOC_DATA_TYPE)
+#define ECHO_IOC_SET_STRIPE _IOWR('f', 201, OBD_IOC_DATA_TYPE)
+#define ECHO_IOC_ENQUEUE _IOWR('f', 202, OBD_IOC_DATA_TYPE)
+#define ECHO_IOC_CANCEL _IOWR('f', 203, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_GET_OBJ_VERSION _IOR('f', 210, OBD_IOC_DATA_TYPE)
+
+/* <lustre/lustre_user.h> defines ioctl number 218-219 */
+#define OBD_IOC_GET_MNTOPT _IOW('f', 220, mntopt_t)
+
+#define OBD_IOC_ECHO_MD _IOR('f', 221, struct obd_ioctl_data)
+#define OBD_IOC_ECHO_ALLOC_SEQ _IOWR('f', 222, struct obd_ioctl_data)
+
+#define OBD_IOC_START_LFSCK _IOWR('f', 230, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_STOP_LFSCK _IOW('f', 231, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PAUSE_LFSCK _IOW('f', 232, OBD_IOC_DATA_TYPE)
+
+/* XXX _IOWR('f', 250, long) has been defined in
+ * libcfs/include/libcfs/libcfs_private.h for debug, don't use it
+ */
+
+/* Until such time as we get_info the per-stripe maximum from the OST,
+ * we define this to be 2T - 4k, which is the ext3 maxbytes. */
+#define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL
+
+/* Special values for remove LOV EA from disk */
+#define LOVEA_DELETE_VALUES(size, count, offset) (size == 0 && count == 0 && \
+ offset == (typeof(offset))(-1))
+
+/* #define POISON_BULK 0 */
+
+/*
+ * l_wait_event is a flexible sleeping function, permitting simple caller
+ * configuration of interrupt and timeout sensitivity along with actions to
+ * be performed in the event of either exception.
+ *
+ * The first form of usage looks like this:
+ *
+ * struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler,
+ * intr_handler, callback_data);
+ * rc = l_wait_event(waitq, condition, &lwi);
+ *
+ * l_wait_event() makes the current process wait on 'waitq' until 'condition'
+ * is TRUE or a "killable" signal (SIGTERM, SIKGILL, SIGINT) is pending. It
+ * returns 0 to signify 'condition' is TRUE, but if a signal wakes it before
+ * 'condition' becomes true, it optionally calls the specified 'intr_handler'
+ * if not NULL, and returns -EINTR.
+ *
+ * If a non-zero timeout is specified, signals are ignored until the timeout
+ * has expired. At this time, if 'timeout_handler' is not NULL it is called.
+ * If it returns FALSE l_wait_event() continues to wait as described above with
+ * signals enabled. Otherwise it returns -ETIMEDOUT.
+ *
+ * LWI_INTR(intr_handler, callback_data) is shorthand for
+ * LWI_TIMEOUT_INTR(0, NULL, intr_handler, callback_data)
+ *
+ * The second form of usage looks like this:
+ *
+ * struct l_wait_info lwi = LWI_TIMEOUT(timeout, timeout_handler);
+ * rc = l_wait_event(waitq, condition, &lwi);
+ *
+ * This form is the same as the first except that it COMPLETELY IGNORES
+ * SIGNALS. The caller must therefore beware that if 'timeout' is zero, or if
+ * 'timeout_handler' is not NULL and returns FALSE, then the ONLY thing that
+ * can unblock the current process is 'condition' becoming TRUE.
+ *
+ * Another form of usage is:
+ * struct l_wait_info lwi = LWI_TIMEOUT_INTERVAL(timeout, interval,
+ * timeout_handler);
+ * rc = l_wait_event(waitq, condition, &lwi);
+ * This is the same as previous case, but condition is checked once every
+ * 'interval' jiffies (if non-zero).
+ *
+ * Subtle synchronization point: this macro does *not* necessary takes
+ * wait-queue spin-lock before returning, and, hence, following idiom is safe
+ * ONLY when caller provides some external locking:
+ *
+ * Thread1 Thread2
+ *
+ * l_wait_event(&obj->wq, ....); (1)
+ *
+ * wake_up(&obj->wq): (2)
+ * spin_lock(&q->lock); (2.1)
+ * __wake_up_common(q, ...); (2.2)
+ * spin_unlock(&q->lock, flags); (2.3)
+ *
+ * OBD_FREE_PTR(obj); (3)
+ *
+ * As l_wait_event() may "short-cut" execution and return without taking
+ * wait-queue spin-lock, some additional synchronization is necessary to
+ * guarantee that step (3) can begin only after (2.3) finishes.
+ *
+ * XXX nikita: some ptlrpc daemon threads have races of that sort.
+ *
+ */
+static inline int back_to_sleep(void *arg)
+{
+ return 0;
+}
+
+#define LWI_ON_SIGNAL_NOOP ((void (*)(void *))(-1))
+
+struct l_wait_info {
+ cfs_duration_t lwi_timeout;
+ cfs_duration_t lwi_interval;
+ int lwi_allow_intr;
+ int (*lwi_on_timeout)(void *);
+ void (*lwi_on_signal)(void *);
+ void *lwi_cb_data;
+};
+
+/* NB: LWI_TIMEOUT ignores signals completely */
+#define LWI_TIMEOUT(time, cb, data) \
+((struct l_wait_info) { \
+ .lwi_timeout = time, \
+ .lwi_on_timeout = cb, \
+ .lwi_cb_data = data, \
+ .lwi_interval = 0, \
+ .lwi_allow_intr = 0 \
+})
+
+#define LWI_TIMEOUT_INTERVAL(time, interval, cb, data) \
+((struct l_wait_info) { \
+ .lwi_timeout = time, \
+ .lwi_on_timeout = cb, \
+ .lwi_cb_data = data, \
+ .lwi_interval = interval, \
+ .lwi_allow_intr = 0 \
+})
+
+#define LWI_TIMEOUT_INTR(time, time_cb, sig_cb, data) \
+((struct l_wait_info) { \
+ .lwi_timeout = time, \
+ .lwi_on_timeout = time_cb, \
+ .lwi_on_signal = sig_cb, \
+ .lwi_cb_data = data, \
+ .lwi_interval = 0, \
+ .lwi_allow_intr = 0 \
+})
+
+#define LWI_TIMEOUT_INTR_ALL(time, time_cb, sig_cb, data) \
+((struct l_wait_info) { \
+ .lwi_timeout = time, \
+ .lwi_on_timeout = time_cb, \
+ .lwi_on_signal = sig_cb, \
+ .lwi_cb_data = data, \
+ .lwi_interval = 0, \
+ .lwi_allow_intr = 1 \
+})
+
+#define LWI_INTR(cb, data) LWI_TIMEOUT_INTR(0, NULL, cb, data)
+
+
+/*
+ * wait for @condition to become true, but no longer than timeout, specified
+ * by @info.
+ */
+#define __l_wait_event(wq, condition, info, ret, l_add_wait) \
+do { \
+ wait_queue_t __wait; \
+ cfs_duration_t __timeout = info->lwi_timeout; \
+ sigset_t __blocked; \
+ int __allow_intr = info->lwi_allow_intr; \
+ \
+ ret = 0; \
+ if (condition) \
+ break; \
+ \
+ init_waitqueue_entry_current(&__wait); \
+ l_add_wait(&wq, &__wait); \
+ \
+ /* Block all signals (just the non-fatal ones if no timeout). */ \
+ if (info->lwi_on_signal != NULL && (__timeout == 0 || __allow_intr)) \
+ __blocked = cfs_block_sigsinv(LUSTRE_FATAL_SIGS); \
+ else \
+ __blocked = cfs_block_sigsinv(0); \
+ \
+ for (;;) { \
+ unsigned __wstate; \
+ \
+ __wstate = info->lwi_on_signal != NULL && \
+ (__timeout == 0 || __allow_intr) ? \
+ TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; \
+ \
+ set_current_state(TASK_INTERRUPTIBLE); \
+ \
+ if (condition) \
+ break; \
+ \
+ if (__timeout == 0) { \
+ waitq_wait(&__wait, __wstate); \
+ } else { \
+ cfs_duration_t interval = info->lwi_interval? \
+ min_t(cfs_duration_t, \
+ info->lwi_interval,__timeout):\
+ __timeout; \
+ cfs_duration_t remaining = waitq_timedwait(&__wait,\
+ __wstate, \
+ interval); \
+ __timeout = cfs_time_sub(__timeout, \
+ cfs_time_sub(interval, remaining));\
+ if (__timeout == 0) { \
+ if (info->lwi_on_timeout == NULL || \
+ info->lwi_on_timeout(info->lwi_cb_data)) { \
+ ret = -ETIMEDOUT; \
+ break; \
+ } \
+ /* Take signals after the timeout expires. */ \
+ if (info->lwi_on_signal != NULL) \
+ (void)cfs_block_sigsinv(LUSTRE_FATAL_SIGS);\
+ } \
+ } \
+ \
+ if (condition) \
+ break; \
+ if (cfs_signal_pending()) { \
+ if (info->lwi_on_signal != NULL && \
+ (__timeout == 0 || __allow_intr)) { \
+ if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \
+ info->lwi_on_signal(info->lwi_cb_data);\
+ ret = -EINTR; \
+ break; \
+ } \
+ /* We have to do this here because some signals */ \
+ /* are not blockable - ie from strace(1). */ \
+ /* In these cases we want to schedule_timeout() */ \
+ /* again, because we don't want that to return */ \
+ /* -EINTR when the RPC actually succeeded. */ \
+ /* the recalc_sigpending() below will deliver the */ \
+ /* signal properly. */ \
+ cfs_clear_sigpending(); \
+ } \
+ } \
+ \
+ cfs_restore_sigs(__blocked); \
+ \
+ set_current_state(TASK_RUNNING); \
+ remove_wait_queue(&wq, &__wait); \
+} while (0)
+
+
+
+#define l_wait_event(wq, condition, info) \
+({ \
+ int __ret; \
+ struct l_wait_info *__info = (info); \
+ \
+ __l_wait_event(wq, condition, __info, \
+ __ret, add_wait_queue); \
+ __ret; \
+})
+
+#define l_wait_event_exclusive(wq, condition, info) \
+({ \
+ int __ret; \
+ struct l_wait_info *__info = (info); \
+ \
+ __l_wait_event(wq, condition, __info, \
+ __ret, add_wait_queue_exclusive); \
+ __ret; \
+})
+
+#define l_wait_event_exclusive_head(wq, condition, info) \
+({ \
+ int __ret; \
+ struct l_wait_info *__info = (info); \
+ \
+ __l_wait_event(wq, condition, __info, \
+ __ret, add_wait_queue_exclusive_head); \
+ __ret; \
+})
+
+#define l_wait_condition(wq, condition) \
+({ \
+ struct l_wait_info lwi = { 0 }; \
+ l_wait_event(wq, condition, &lwi); \
+})
+
+#define l_wait_condition_exclusive(wq, condition) \
+({ \
+ struct l_wait_info lwi = { 0 }; \
+ l_wait_event_exclusive(wq, condition, &lwi); \
+})
+
+#define l_wait_condition_exclusive_head(wq, condition) \
+({ \
+ struct l_wait_info lwi = { 0 }; \
+ l_wait_event_exclusive_head(wq, condition, &lwi); \
+})
+
+#define LIBLUSTRE_CLIENT (0)
+
+/** @} lib */
+
+#endif /* _LUSTRE_LIB_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_linkea.h b/drivers/staging/lustre/lustre/include/lustre_linkea.h
new file mode 100644
index 000000000000..5790be913bf6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_linkea.h
@@ -0,0 +1,57 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2013, Intel Corporation.
+ * Use is subject to license terms.
+ *
+ * Author: di wang <di.wang@intel.com>
+ */
+
+struct linkea_data {
+ /**
+ * Buffer to keep link EA body.
+ */
+ struct lu_buf *ld_buf;
+ /**
+ * The matched header, entry and its lenght in the EA
+ */
+ struct link_ea_header *ld_leh;
+ struct link_ea_entry *ld_lee;
+ int ld_reclen;
+};
+
+int linkea_data_new(struct linkea_data *ldata, struct lu_buf *buf);
+int linkea_init(struct linkea_data *ldata);
+void linkea_entry_unpack(const struct link_ea_entry *lee, int *reclen,
+ struct lu_name *lname, struct lu_fid *pfid);
+int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname,
+ const struct lu_fid *pfid);
+void linkea_del_buf(struct linkea_data *ldata, const struct lu_name *lname);
+int linkea_links_find(struct linkea_data *ldata, const struct lu_name *lname,
+ const struct lu_fid *pfid);
+
+#define LINKEA_NEXT_ENTRY(ldata) \
+ (struct link_ea_entry *)((char *)ldata.ld_lee + ldata.ld_reclen)
+
+#define LINKEA_FIRST_ENTRY(ldata) \
+ (struct link_ea_entry *)(ldata.ld_leh + 1)
diff --git a/drivers/staging/lustre/lustre/include/lustre_lite.h b/drivers/staging/lustre/lustre/include/lustre_lite.h
new file mode 100644
index 000000000000..25f8bfaccef3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_lite.h
@@ -0,0 +1,147 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LL_H
+#define _LL_H
+
+/** \defgroup lite lite
+ *
+ * @{
+ */
+
+#include <linux/lustre_lite.h>
+
+#include <obd_class.h>
+#include <obd_ost.h>
+#include <lustre_net.h>
+#include <lustre_mds.h>
+#include <lustre_ha.h>
+
+/* 4UL * 1024 * 1024 */
+#define LL_MAX_BLKSIZE_BITS (22)
+#define LL_MAX_BLKSIZE (1UL<<LL_MAX_BLKSIZE_BITS)
+
+#include <lustre/lustre_user.h>
+
+
+struct lustre_rw_params {
+ int lrp_lock_mode;
+ ldlm_policy_data_t lrp_policy;
+ obd_flag lrp_brw_flags;
+ int lrp_ast_flags;
+};
+
+/*
+ * XXX nikita: this function lives in the header because it is used by both
+ * llite kernel module and liblustre library, and there is no (?) better place
+ * to put it in.
+ */
+static inline void lustre_build_lock_params(int cmd, unsigned long open_flags,
+ __u64 connect_flags,
+ loff_t pos, ssize_t len,
+ struct lustre_rw_params *params)
+{
+ params->lrp_lock_mode = (cmd == OBD_BRW_READ) ? LCK_PR : LCK_PW;
+ params->lrp_brw_flags = 0;
+
+ params->lrp_policy.l_extent.start = pos;
+ params->lrp_policy.l_extent.end = pos + len - 1;
+ /*
+ * for now O_APPEND always takes local locks.
+ */
+ if (cmd == OBD_BRW_WRITE && (open_flags & O_APPEND)) {
+ params->lrp_policy.l_extent.start = 0;
+ params->lrp_policy.l_extent.end = OBD_OBJECT_EOF;
+ } else if (LIBLUSTRE_CLIENT && (connect_flags & OBD_CONNECT_SRVLOCK)) {
+ /*
+ * liblustre: OST-side locking for all non-O_APPEND
+ * reads/writes.
+ */
+ params->lrp_lock_mode = LCK_NL;
+ params->lrp_brw_flags = OBD_BRW_SRVLOCK;
+ } else {
+ /*
+ * nothing special for the kernel. In the future llite may use
+ * OST-side locks for small writes into highly contended
+ * files.
+ */
+ }
+ params->lrp_ast_flags = (open_flags & O_NONBLOCK) ?
+ LDLM_FL_BLOCK_NOWAIT : 0;
+}
+
+/*
+ * This is embedded into liblustre and llite super-blocks to keep track of
+ * connect flags (capabilities) supported by all imports given mount is
+ * connected to.
+ */
+struct lustre_client_ocd {
+ /*
+ * This is conjunction of connect_flags across all imports (LOVs) this
+ * mount is connected to. This field is updated by cl_ocd_update()
+ * under ->lco_lock.
+ */
+ __u64 lco_flags;
+ struct mutex lco_lock;
+ struct obd_export *lco_md_exp;
+ struct obd_export *lco_dt_exp;
+};
+
+/*
+ * Chain of hash overflow pages.
+ */
+struct ll_dir_chain {
+ /* XXX something. Later */
+};
+
+static inline void ll_dir_chain_init(struct ll_dir_chain *chain)
+{
+}
+
+static inline void ll_dir_chain_fini(struct ll_dir_chain *chain)
+{
+}
+
+static inline unsigned long hash_x_index(__u64 hash, int hash64)
+{
+ if (BITS_PER_LONG == 32 && hash64)
+ hash >>= 32;
+ return ~0UL - hash;
+}
+
+/** @} lite */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h
new file mode 100644
index 000000000000..714ab378e431
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_log.h
@@ -0,0 +1,576 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_log.h
+ *
+ * Generic infrastructure for managing a collection of logs.
+ * These logs are used for:
+ *
+ * - orphan recovery: OST adds record on create
+ * - mtime/size consistency: the OST adds a record on first write
+ * - open/unlinked objects: OST adds a record on destroy
+ *
+ * - mds unlink log: the MDS adds an entry upon delete
+ *
+ * - raid1 replication log between OST's
+ * - MDS replication logs
+ */
+
+#ifndef _LUSTRE_LOG_H
+#define _LUSTRE_LOG_H
+
+/** \defgroup log log
+ *
+ * @{
+ */
+
+#include <linux/lustre_log.h>
+
+#include <obd_class.h>
+#include <obd_ost.h>
+#include <lustre/lustre_idl.h>
+#include <dt_object.h>
+
+#define LOG_NAME_LIMIT(logname, name) \
+ snprintf(logname, sizeof(logname), "LOGS/%s", name)
+#define LLOG_EEMPTY 4711
+
+enum llog_open_param {
+ LLOG_OPEN_EXISTS = 0x0000,
+ LLOG_OPEN_NEW = 0x0001,
+};
+
+struct plain_handle_data {
+ struct list_head phd_entry;
+ struct llog_handle *phd_cat_handle;
+ struct llog_cookie phd_cookie; /* cookie of this log in its cat */
+};
+
+struct cat_handle_data {
+ struct list_head chd_head;
+ struct llog_handle *chd_current_log; /* currently open log */
+ struct llog_handle *chd_next_log; /* llog to be used next */
+};
+
+static inline void logid_to_fid(struct llog_logid *id, struct lu_fid *fid)
+{
+ /* For compatibility purposes we identify pre-OSD (~< 2.3.51 MDS)
+ * logid's by non-zero ogen (inode generation) and convert them
+ * into IGIF */
+ if (id->lgl_ogen == 0) {
+ fid->f_seq = id->lgl_oi.oi.oi_seq;
+ fid->f_oid = id->lgl_oi.oi.oi_id;
+ fid->f_ver = 0;
+ } else {
+ lu_igif_build(fid, id->lgl_oi.oi.oi_id, id->lgl_ogen);
+ }
+}
+
+static inline void fid_to_logid(struct lu_fid *fid, struct llog_logid *id)
+{
+ id->lgl_oi.oi.oi_seq = fid->f_seq;
+ id->lgl_oi.oi.oi_id = fid->f_oid;
+ id->lgl_ogen = 0;
+}
+
+static inline void logid_set_id(struct llog_logid *log_id, __u64 id)
+{
+ log_id->lgl_oi.oi.oi_id = id;
+}
+
+static inline __u64 logid_id(struct llog_logid *log_id)
+{
+ return log_id->lgl_oi.oi.oi_id;
+}
+
+struct llog_handle;
+
+/* llog.c - general API */
+int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
+ int flags, struct obd_uuid *uuid);
+int llog_copy_handler(const struct lu_env *env, struct llog_handle *llh,
+ struct llog_rec_hdr *rec, void *data);
+int llog_process(const struct lu_env *env, struct llog_handle *loghandle,
+ llog_cb_t cb, void *data, void *catdata);
+int llog_process_or_fork(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ llog_cb_t cb, void *data, void *catdata, bool fork);
+int llog_reverse_process(const struct lu_env *env,
+ struct llog_handle *loghandle, llog_cb_t cb,
+ void *data, void *catdata);
+int llog_cancel_rec(const struct lu_env *env, struct llog_handle *loghandle,
+ int index);
+int llog_open(const struct lu_env *env, struct llog_ctxt *ctxt,
+ struct llog_handle **lgh, struct llog_logid *logid,
+ char *name, enum llog_open_param open_param);
+int llog_close(const struct lu_env *env, struct llog_handle *cathandle);
+int llog_get_size(struct llog_handle *loghandle);
+
+/* llog_process flags */
+#define LLOG_FLAG_NODEAMON 0x0001
+
+/* llog_cat.c - catalog api */
+struct llog_process_data {
+ /**
+ * Any useful data needed while processing catalog. This is
+ * passed later to process callback.
+ */
+ void *lpd_data;
+ /**
+ * Catalog process callback function, called for each record
+ * in catalog.
+ */
+ llog_cb_t lpd_cb;
+ /**
+ * Start processing the catalog from startcat/startidx
+ */
+ int lpd_startcat;
+ int lpd_startidx;
+};
+
+struct llog_process_cat_data {
+ /**
+ * Temporary stored first_idx while scanning log.
+ */
+ int lpcd_first_idx;
+ /**
+ * Temporary stored last_idx while scanning log.
+ */
+ int lpcd_last_idx;
+};
+
+int llog_cat_close(const struct lu_env *env, struct llog_handle *cathandle);
+int llog_cat_add_rec(const struct lu_env *env, struct llog_handle *cathandle,
+ struct llog_rec_hdr *rec, struct llog_cookie *reccookie,
+ void *buf, struct thandle *th);
+int llog_cat_declare_add_rec(const struct lu_env *env,
+ struct llog_handle *cathandle,
+ struct llog_rec_hdr *rec, struct thandle *th);
+int llog_cat_add(const struct lu_env *env, struct llog_handle *cathandle,
+ struct llog_rec_hdr *rec, struct llog_cookie *reccookie,
+ void *buf);
+int llog_cat_cancel_records(const struct lu_env *env,
+ struct llog_handle *cathandle, int count,
+ struct llog_cookie *cookies);
+int llog_cat_process_or_fork(const struct lu_env *env,
+ struct llog_handle *cat_llh, llog_cb_t cb,
+ void *data, int startcat, int startidx, bool fork);
+int llog_cat_process(const struct lu_env *env, struct llog_handle *cat_llh,
+ llog_cb_t cb, void *data, int startcat, int startidx);
+int llog_cat_reverse_process(const struct lu_env *env,
+ struct llog_handle *cat_llh, llog_cb_t cb,
+ void *data);
+int llog_cat_init_and_process(const struct lu_env *env,
+ struct llog_handle *llh);
+
+/* llog_obd.c */
+int llog_setup(const struct lu_env *env, struct obd_device *obd,
+ struct obd_llog_group *olg, int index,
+ struct obd_device *disk_obd, struct llog_operations *op);
+int __llog_ctxt_put(const struct lu_env *env, struct llog_ctxt *ctxt);
+int llog_cleanup(const struct lu_env *env, struct llog_ctxt *);
+int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp, int flags);
+int llog_obd_add(const struct lu_env *env, struct llog_ctxt *ctxt,
+ struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
+ struct llog_cookie *logcookies, int numcookies);
+int llog_cancel(const struct lu_env *env, struct llog_ctxt *ctxt,
+ struct lov_stripe_md *lsm, int count,
+ struct llog_cookie *cookies, int flags);
+
+int obd_llog_init(struct obd_device *obd, struct obd_llog_group *olg,
+ struct obd_device *disk_obd, int *idx);
+
+int obd_llog_finish(struct obd_device *obd, int count);
+
+/* llog_ioctl.c */
+int llog_ioctl(const struct lu_env *env, struct llog_ctxt *ctxt, int cmd,
+ struct obd_ioctl_data *data);
+
+/* llog_net.c */
+int llog_initiator_connect(struct llog_ctxt *ctxt);
+
+struct llog_operations {
+ int (*lop_destroy)(const struct lu_env *env,
+ struct llog_handle *handle);
+ int (*lop_next_block)(const struct lu_env *env, struct llog_handle *h,
+ int *curr_idx, int next_idx, __u64 *offset,
+ void *buf, int len);
+ int (*lop_prev_block)(const struct lu_env *env, struct llog_handle *h,
+ int prev_idx, void *buf, int len);
+ int (*lop_read_header)(const struct lu_env *env,
+ struct llog_handle *handle);
+ int (*lop_setup)(const struct lu_env *env, struct obd_device *obd,
+ struct obd_llog_group *olg, int ctxt_idx,
+ struct obd_device *disk_obd);
+ int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp,
+ int flags);
+ int (*lop_cleanup)(const struct lu_env *env, struct llog_ctxt *ctxt);
+ int (*lop_cancel)(const struct lu_env *env, struct llog_ctxt *ctxt,
+ struct lov_stripe_md *lsm, int count,
+ struct llog_cookie *cookies, int flags);
+ int (*lop_connect)(struct llog_ctxt *ctxt, struct llog_logid *logid,
+ struct llog_gen *gen, struct obd_uuid *uuid);
+ /**
+ * Any llog file must be opened first using llog_open(). Llog can be
+ * opened by name, logid or without both, in last case the new logid
+ * will be generated.
+ */
+ int (*lop_open)(const struct lu_env *env, struct llog_handle *lgh,
+ struct llog_logid *logid, char *name,
+ enum llog_open_param);
+ /**
+ * Opened llog may not exist and this must be checked where needed using
+ * the llog_exist() call.
+ */
+ int (*lop_exist)(struct llog_handle *lgh);
+ /**
+ * Close llog file and calls llog_free_handle() implicitly.
+ * Any opened llog must be closed by llog_close() call.
+ */
+ int (*lop_close)(const struct lu_env *env, struct llog_handle *handle);
+ /**
+ * Create new llog file. The llog must be opened.
+ * Must be used only for local llog operations.
+ */
+ int (*lop_declare_create)(const struct lu_env *env,
+ struct llog_handle *handle,
+ struct thandle *th);
+ int (*lop_create)(const struct lu_env *env, struct llog_handle *handle,
+ struct thandle *th);
+ /**
+ * write new record in llog. It appends records usually but can edit
+ * existing records too.
+ */
+ int (*lop_declare_write_rec)(const struct lu_env *env,
+ struct llog_handle *lgh,
+ struct llog_rec_hdr *rec,
+ int idx, struct thandle *th);
+ int (*lop_write_rec)(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ struct llog_rec_hdr *rec,
+ struct llog_cookie *cookie, int cookiecount,
+ void *buf, int idx, struct thandle *th);
+ /**
+ * Add new record in llog catalog. Does the same as llog_write_rec()
+ * but using llog catalog.
+ */
+ int (*lop_declare_add)(const struct lu_env *env,
+ struct llog_handle *lgh,
+ struct llog_rec_hdr *rec, struct thandle *th);
+ int (*lop_add)(const struct lu_env *env, struct llog_handle *lgh,
+ struct llog_rec_hdr *rec, struct llog_cookie *cookie,
+ void *buf, struct thandle *th);
+ /* Old llog_add version, used in MDS-LOV-OSC now and will gone with
+ * LOD/OSP replacement */
+ int (*lop_obd_add)(const struct lu_env *env, struct llog_ctxt *ctxt,
+ struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
+ struct llog_cookie *logcookies, int numcookies);
+};
+
+/* In-memory descriptor for a log object or log catalog */
+struct llog_handle {
+ struct rw_semaphore lgh_lock;
+ spinlock_t lgh_hdr_lock; /* protect lgh_hdr data */
+ struct llog_logid lgh_id; /* id of this log */
+ struct llog_log_hdr *lgh_hdr;
+ struct file *lgh_file;
+ struct dt_object *lgh_obj;
+ int lgh_last_idx;
+ int lgh_cur_idx; /* used during llog_process */
+ __u64 lgh_cur_offset; /* used during llog_process */
+ struct llog_ctxt *lgh_ctxt;
+ union {
+ struct plain_handle_data phd;
+ struct cat_handle_data chd;
+ } u;
+ char *lgh_name;
+ void *private_data;
+ struct llog_operations *lgh_logops;
+ atomic_t lgh_refcount;
+};
+
+/* llog_lvfs.c */
+extern struct llog_operations llog_lvfs_ops;
+
+/* llog_osd.c */
+extern struct llog_operations llog_osd_ops;
+int llog_osd_get_cat_list(const struct lu_env *env, struct dt_device *d,
+ int idx, int count,
+ struct llog_catid *idarray);
+int llog_osd_put_cat_list(const struct lu_env *env, struct dt_device *d,
+ int idx, int count,
+ struct llog_catid *idarray);
+
+#define LLOG_CTXT_FLAG_UNINITIALIZED 0x00000001
+#define LLOG_CTXT_FLAG_STOP 0x00000002
+
+struct llog_ctxt {
+ int loc_idx; /* my index the obd array of ctxt's */
+ struct obd_device *loc_obd; /* points back to the containing obd*/
+ struct obd_llog_group *loc_olg; /* group containing that ctxt */
+ struct obd_export *loc_exp; /* parent "disk" export (e.g. MDS) */
+ struct obd_import *loc_imp; /* to use in RPC's: can be backward
+ pointing import */
+ struct llog_operations *loc_logops;
+ struct llog_handle *loc_handle;
+ struct mutex loc_mutex; /* protect loc_imp */
+ atomic_t loc_refcount;
+ long loc_flags; /* flags, see above defines */
+ struct dt_object *loc_dir;
+};
+
+#define LLOG_PROC_BREAK 0x0001
+#define LLOG_DEL_RECORD 0x0002
+
+static inline int llog_obd2ops(struct llog_ctxt *ctxt,
+ struct llog_operations **lop)
+{
+ if (ctxt == NULL)
+ return -ENOTCONN;
+
+ *lop = ctxt->loc_logops;
+ if (*lop == NULL)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static inline int llog_handle2ops(struct llog_handle *loghandle,
+ struct llog_operations **lop)
+{
+ if (loghandle == NULL || loghandle->lgh_logops == NULL)
+ return -EINVAL;
+
+ *lop = loghandle->lgh_logops;
+ return 0;
+}
+
+static inline int llog_data_len(int len)
+{
+ return cfs_size_round(len);
+}
+
+static inline struct llog_ctxt *llog_ctxt_get(struct llog_ctxt *ctxt)
+{
+ atomic_inc(&ctxt->loc_refcount);
+ CDEBUG(D_INFO, "GETting ctxt %p : new refcount %d\n", ctxt,
+ atomic_read(&ctxt->loc_refcount));
+ return ctxt;
+}
+
+static inline void llog_ctxt_put(struct llog_ctxt *ctxt)
+{
+ if (ctxt == NULL)
+ return;
+ LASSERT_ATOMIC_GT_LT(&ctxt->loc_refcount, 0, LI_POISON);
+ CDEBUG(D_INFO, "PUTting ctxt %p : new refcount %d\n", ctxt,
+ atomic_read(&ctxt->loc_refcount) - 1);
+ __llog_ctxt_put(NULL, ctxt);
+}
+
+static inline void llog_group_init(struct obd_llog_group *olg, int group)
+{
+ init_waitqueue_head(&olg->olg_waitq);
+ spin_lock_init(&olg->olg_lock);
+ mutex_init(&olg->olg_cat_processing);
+ olg->olg_seq = group;
+}
+
+static inline int llog_group_set_ctxt(struct obd_llog_group *olg,
+ struct llog_ctxt *ctxt, int index)
+{
+ LASSERT(index >= 0 && index < LLOG_MAX_CTXTS);
+
+ spin_lock(&olg->olg_lock);
+ if (olg->olg_ctxts[index] != NULL) {
+ spin_unlock(&olg->olg_lock);
+ return -EEXIST;
+ }
+ olg->olg_ctxts[index] = ctxt;
+ spin_unlock(&olg->olg_lock);
+ return 0;
+}
+
+static inline struct llog_ctxt *llog_group_get_ctxt(struct obd_llog_group *olg,
+ int index)
+{
+ struct llog_ctxt *ctxt;
+
+ LASSERT(index >= 0 && index < LLOG_MAX_CTXTS);
+
+ spin_lock(&olg->olg_lock);
+ if (olg->olg_ctxts[index] == NULL)
+ ctxt = NULL;
+ else
+ ctxt = llog_ctxt_get(olg->olg_ctxts[index]);
+ spin_unlock(&olg->olg_lock);
+ return ctxt;
+}
+
+static inline void llog_group_clear_ctxt(struct obd_llog_group *olg, int index)
+{
+ LASSERT(index >= 0 && index < LLOG_MAX_CTXTS);
+ spin_lock(&olg->olg_lock);
+ olg->olg_ctxts[index] = NULL;
+ spin_unlock(&olg->olg_lock);
+}
+
+static inline struct llog_ctxt *llog_get_context(struct obd_device *obd,
+ int index)
+{
+ return llog_group_get_ctxt(&obd->obd_olg, index);
+}
+
+static inline int llog_group_ctxt_null(struct obd_llog_group *olg, int index)
+{
+ return (olg->olg_ctxts[index] == NULL);
+}
+
+static inline int llog_ctxt_null(struct obd_device *obd, int index)
+{
+ return (llog_group_ctxt_null(&obd->obd_olg, index));
+}
+
+static inline int llog_destroy(const struct lu_env *env,
+ struct llog_handle *handle)
+{
+ struct llog_operations *lop;
+ int rc;
+
+ ENTRY;
+
+ rc = llog_handle2ops(handle, &lop);
+ if (rc)
+ RETURN(rc);
+ if (lop->lop_destroy == NULL)
+ RETURN(-EOPNOTSUPP);
+
+ rc = lop->lop_destroy(env, handle);
+ RETURN(rc);
+}
+
+static inline int llog_next_block(const struct lu_env *env,
+ struct llog_handle *loghandle, int *cur_idx,
+ int next_idx, __u64 *cur_offset, void *buf,
+ int len)
+{
+ struct llog_operations *lop;
+ int rc;
+
+ ENTRY;
+
+ rc = llog_handle2ops(loghandle, &lop);
+ if (rc)
+ RETURN(rc);
+ if (lop->lop_next_block == NULL)
+ RETURN(-EOPNOTSUPP);
+
+ rc = lop->lop_next_block(env, loghandle, cur_idx, next_idx,
+ cur_offset, buf, len);
+ RETURN(rc);
+}
+
+static inline int llog_prev_block(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ int prev_idx, void *buf, int len)
+{
+ struct llog_operations *lop;
+ int rc;
+
+ ENTRY;
+
+ rc = llog_handle2ops(loghandle, &lop);
+ if (rc)
+ RETURN(rc);
+ if (lop->lop_prev_block == NULL)
+ RETURN(-EOPNOTSUPP);
+
+ rc = lop->lop_prev_block(env, loghandle, prev_idx, buf, len);
+ RETURN(rc);
+}
+
+static inline int llog_connect(struct llog_ctxt *ctxt,
+ struct llog_logid *logid, struct llog_gen *gen,
+ struct obd_uuid *uuid)
+{
+ struct llog_operations *lop;
+ int rc;
+
+ ENTRY;
+
+ rc = llog_obd2ops(ctxt, &lop);
+ if (rc)
+ RETURN(rc);
+ if (lop->lop_connect == NULL)
+ RETURN(-EOPNOTSUPP);
+
+ rc = lop->lop_connect(ctxt, logid, gen, uuid);
+ RETURN(rc);
+}
+
+/* llog.c */
+int llog_exist(struct llog_handle *loghandle);
+int llog_declare_create(const struct lu_env *env,
+ struct llog_handle *loghandle, struct thandle *th);
+int llog_create(const struct lu_env *env, struct llog_handle *handle,
+ struct thandle *th);
+int llog_declare_write_rec(const struct lu_env *env,
+ struct llog_handle *handle,
+ struct llog_rec_hdr *rec, int idx,
+ struct thandle *th);
+int llog_write_rec(const struct lu_env *env, struct llog_handle *handle,
+ struct llog_rec_hdr *rec, struct llog_cookie *logcookies,
+ int numcookies, void *buf, int idx, struct thandle *th);
+int llog_add(const struct lu_env *env, struct llog_handle *lgh,
+ struct llog_rec_hdr *rec, struct llog_cookie *logcookies,
+ void *buf, struct thandle *th);
+int llog_declare_add(const struct lu_env *env, struct llog_handle *lgh,
+ struct llog_rec_hdr *rec, struct thandle *th);
+int lustre_process_log(struct super_block *sb, char *logname,
+ struct config_llog_instance *cfg);
+int lustre_end_log(struct super_block *sb, char *logname,
+ struct config_llog_instance *cfg);
+int llog_open_create(const struct lu_env *env, struct llog_ctxt *ctxt,
+ struct llog_handle **res, struct llog_logid *logid,
+ char *name);
+int llog_erase(const struct lu_env *env, struct llog_ctxt *ctxt,
+ struct llog_logid *logid, char *name);
+int llog_write(const struct lu_env *env, struct llog_handle *loghandle,
+ struct llog_rec_hdr *rec, struct llog_cookie *reccookie,
+ int cookiecount, void *buf, int idx);
+
+/** @} log */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
new file mode 100644
index 000000000000..fb1561a809b9
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h
@@ -0,0 +1,176 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_mdc.h
+ *
+ * MDS data structures.
+ * See also lustre_idl.h for wire formats of requests.
+ */
+
+#ifndef _LUSTRE_MDC_H
+#define _LUSTRE_MDC_H
+
+/** \defgroup mdc mdc
+ *
+ * @{
+ */
+
+# include <linux/fs.h>
+# include <linux/dcache.h>
+# ifdef CONFIG_FS_POSIX_ACL
+# include <linux/posix_acl_xattr.h>
+# endif /* CONFIG_FS_POSIX_ACL */
+# include <linux/lustre_intent.h>
+#include <lustre_handles.h>
+#include <linux/libcfs/libcfs.h>
+#include <obd_class.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre_dlm.h>
+#include <lustre_export.h>
+
+struct ptlrpc_client;
+struct obd_export;
+struct ptlrpc_request;
+struct obd_device;
+
+struct mdc_rpc_lock {
+ struct mutex rpcl_mutex;
+ struct lookup_intent *rpcl_it;
+ int rpcl_fakes;
+};
+
+#define MDC_FAKE_RPCL_IT ((void *)0x2c0012bfUL)
+
+static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck)
+{
+ mutex_init(&lck->rpcl_mutex);
+ lck->rpcl_it = NULL;
+}
+
+static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck,
+ struct lookup_intent *it)
+{
+ ENTRY;
+
+ if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP))
+ return;
+
+ /* This would normally block until the existing request finishes.
+ * If fail_loc is set it will block until the regular request is
+ * done, then set rpcl_it to MDC_FAKE_RPCL_IT. Once that is set
+ * it will only be cleared when all fake requests are finished.
+ * Only when all fake requests are finished can normal requests
+ * be sent, to ensure they are recoverable again. */
+ again:
+ mutex_lock(&lck->rpcl_mutex);
+
+ if (CFS_FAIL_CHECK_QUIET(OBD_FAIL_MDC_RPCS_SEM)) {
+ lck->rpcl_it = MDC_FAKE_RPCL_IT;
+ lck->rpcl_fakes++;
+ mutex_unlock(&lck->rpcl_mutex);
+ return;
+ }
+
+ /* This will only happen when the CFS_FAIL_CHECK() was
+ * just turned off but there are still requests in progress.
+ * Wait until they finish. It doesn't need to be efficient
+ * in this extremely rare case, just have low overhead in
+ * the common case when it isn't true. */
+ while (unlikely(lck->rpcl_it == MDC_FAKE_RPCL_IT)) {
+ mutex_unlock(&lck->rpcl_mutex);
+ schedule_timeout(cfs_time_seconds(1) / 4);
+ goto again;
+ }
+
+ LASSERT(lck->rpcl_it == NULL);
+ lck->rpcl_it = it;
+}
+
+static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
+ struct lookup_intent *it)
+{
+ if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP))
+ goto out;
+
+ if (lck->rpcl_it == MDC_FAKE_RPCL_IT) { /* OBD_FAIL_MDC_RPCS_SEM */
+ mutex_lock(&lck->rpcl_mutex);
+
+ LASSERTF(lck->rpcl_fakes > 0, "%d\n", lck->rpcl_fakes);
+ lck->rpcl_fakes--;
+
+ if (lck->rpcl_fakes == 0)
+ lck->rpcl_it = NULL;
+
+ } else {
+ LASSERTF(it == lck->rpcl_it, "%p != %p\n", it, lck->rpcl_it);
+ lck->rpcl_it = NULL;
+ }
+
+ mutex_unlock(&lck->rpcl_mutex);
+ out:
+ EXIT;
+}
+
+static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
+ struct mdt_body *body)
+{
+ if (body->valid & OBD_MD_FLMODEASIZE) {
+ if (exp->exp_obd->u.cli.cl_max_mds_easize < body->max_mdsize)
+ exp->exp_obd->u.cli.cl_max_mds_easize =
+ body->max_mdsize;
+ if (exp->exp_obd->u.cli.cl_max_mds_cookiesize <
+ body->max_cookiesize)
+ exp->exp_obd->u.cli.cl_max_mds_cookiesize =
+ body->max_cookiesize;
+ }
+}
+
+
+struct mdc_cache_waiter {
+ struct list_head mcw_entry;
+ wait_queue_head_t mcw_waitq;
+};
+
+/* mdc/mdc_locks.c */
+int it_disposition(struct lookup_intent *it, int flag);
+void it_clear_disposition(struct lookup_intent *it, int flag);
+void it_set_disposition(struct lookup_intent *it, int flag);
+int it_open_error(int phase, struct lookup_intent *it);
+
+/** @} mdc */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_mds.h b/drivers/staging/lustre/lustre/include/lustre_mds.h
new file mode 100644
index 000000000000..b386f87471e3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_mds.h
@@ -0,0 +1,81 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_mds.h
+ *
+ * MDS data structures.
+ * See also lustre_idl.h for wire formats of requests.
+ */
+
+#ifndef _LUSTRE_MDS_H
+#define _LUSTRE_MDS_H
+
+/** \defgroup mds mds
+ *
+ * @{
+ */
+
+#include <lustre_handles.h>
+#include <linux/libcfs/libcfs.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre_dlm.h>
+#include <lustre_export.h>
+
+struct mds_group_info {
+ struct obd_uuid *uuid;
+ int group;
+};
+
+struct mds_capa_info {
+ struct obd_uuid *uuid;
+ struct lustre_capa_key *capa;
+};
+
+#define MDD_OBD_NAME "mdd_obd"
+#define MDD_OBD_UUID "mdd_obd_uuid"
+
+static inline int md_should_create(__u64 flags)
+{
+ return !(flags & MDS_OPEN_DELAY_CREATE ||
+ !(flags & FMODE_WRITE));
+}
+
+/* these are local flags, used only on the client, private */
+#define M_CHECK_STALE 0200000000
+
+/** @} mds */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdt.h b/drivers/staging/lustre/lustre/include/lustre_mdt.h
new file mode 100644
index 000000000000..dba26a6cfa38
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_mdt.h
@@ -0,0 +1,84 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LINUX_MDT_H
+#define __LINUX_MDT_H
+
+/** \defgroup mdt mdt
+ *
+ * @{
+ */
+
+#include <lustre/lustre_idl.h>
+#include <lustre_req_layout.h>
+#include <md_object.h>
+#include <dt_object.h>
+#include <linux/libcfs/libcfs.h>
+
+/*
+ * Common thread info for mdt, seq and fld
+ */
+struct com_thread_info {
+ /*
+ * for req-layout interface.
+ */
+ struct req_capsule *cti_pill;
+};
+
+enum {
+ ESERIOUS = 0x0001000
+};
+
+static inline int err_serious(int rc)
+{
+ LASSERT(rc < 0);
+ LASSERT(-rc < ESERIOUS);
+ return -(-rc | ESERIOUS);
+}
+
+static inline int clear_serious(int rc)
+{
+ if (rc < 0)
+ rc = -(-rc & ~ESERIOUS);
+ return rc;
+}
+
+static inline int is_serious(int rc)
+{
+ return (rc < 0 && -rc & ESERIOUS);
+}
+
+/** @} mdt */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
new file mode 100644
index 000000000000..293dd90e5b6c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -0,0 +1,3451 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+/** \defgroup PtlRPC Portal RPC and networking module.
+ *
+ * PortalRPC is the layer used by rest of lustre code to achieve network
+ * communications: establish connections with corresponding export and import
+ * states, listen for a service, send and receive RPCs.
+ * PortalRPC also includes base recovery framework: packet resending and
+ * replaying, reconnections, pinger.
+ *
+ * PortalRPC utilizes LNet as its transport layer.
+ *
+ * @{
+ */
+
+
+#ifndef _LUSTRE_NET_H
+#define _LUSTRE_NET_H
+
+/** \defgroup net net
+ *
+ * @{
+ */
+
+#include <linux/lustre_net.h>
+
+#include <linux/libcfs/libcfs.h>
+// #include <obd.h>
+#include <linux/lnet/lnet.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_ha.h>
+#include <lustre_sec.h>
+#include <lustre_import.h>
+#include <lprocfs_status.h>
+#include <lu_object.h>
+#include <lustre_req_layout.h>
+
+#include <obd_support.h>
+#include <lustre_ver.h>
+
+/* MD flags we _always_ use */
+#define PTLRPC_MD_OPTIONS 0
+
+/**
+ * Max # of bulk operations in one request.
+ * In order for the client and server to properly negotiate the maximum
+ * possible transfer size, PTLRPC_BULK_OPS_COUNT must be a power-of-two
+ * value. The client is free to limit the actual RPC size for any bulk
+ * transfer via cl_max_pages_per_rpc to some non-power-of-two value. */
+#define PTLRPC_BULK_OPS_BITS 2
+#define PTLRPC_BULK_OPS_COUNT (1U << PTLRPC_BULK_OPS_BITS)
+/**
+ * PTLRPC_BULK_OPS_MASK is for the convenience of the client only, and
+ * should not be used on the server at all. Otherwise, it imposes a
+ * protocol limitation on the maximum RPC size that can be used by any
+ * RPC sent to that server in the future. Instead, the server should
+ * use the negotiated per-client ocd_brw_size to determine the bulk
+ * RPC count. */
+#define PTLRPC_BULK_OPS_MASK (~((__u64)PTLRPC_BULK_OPS_COUNT - 1))
+
+/**
+ * Define maxima for bulk I/O.
+ *
+ * A single PTLRPC BRW request is sent via up to PTLRPC_BULK_OPS_COUNT
+ * of LNET_MTU sized RDMA transfers. Clients and servers negotiate the
+ * currently supported maximum between peers at connect via ocd_brw_size.
+ */
+#define PTLRPC_MAX_BRW_BITS (LNET_MTU_BITS + PTLRPC_BULK_OPS_BITS)
+#define PTLRPC_MAX_BRW_SIZE (1 << PTLRPC_MAX_BRW_BITS)
+#define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+
+#define ONE_MB_BRW_SIZE (1 << LNET_MTU_BITS)
+#define MD_MAX_BRW_SIZE (1 << LNET_MTU_BITS)
+#define MD_MAX_BRW_PAGES (MD_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define DT_MAX_BRW_SIZE PTLRPC_MAX_BRW_SIZE
+#define DT_MAX_BRW_PAGES (DT_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define OFD_MAX_BRW_SIZE (1 << LNET_MTU_BITS)
+
+/* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */
+# if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0)
+# error "PTLRPC_MAX_BRW_PAGES isn't a power of two"
+# endif
+# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE))
+# error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE"
+# endif
+# if (PTLRPC_MAX_BRW_SIZE > LNET_MTU * PTLRPC_BULK_OPS_COUNT)
+# error "PTLRPC_MAX_BRW_SIZE too big"
+# endif
+# if (PTLRPC_MAX_BRW_PAGES > LNET_MAX_IOV * PTLRPC_BULK_OPS_COUNT)
+# error "PTLRPC_MAX_BRW_PAGES too big"
+# endif
+
+#define PTLRPC_NTHRS_INIT 2
+
+/**
+ * Buffer Constants
+ *
+ * Constants determine how memory is used to buffer incoming service requests.
+ *
+ * ?_NBUFS # buffers to allocate when growing the pool
+ * ?_BUFSIZE # bytes in a single request buffer
+ * ?_MAXREQSIZE # maximum request service will receive
+ *
+ * When fewer than ?_NBUFS/2 buffers are posted for receive, another chunk
+ * of ?_NBUFS is added to the pool.
+ *
+ * Messages larger than ?_MAXREQSIZE are dropped. Request buffers are
+ * considered full when less than ?_MAXREQSIZE is left in them.
+ */
+/**
+ * Thread Constants
+ *
+ * Constants determine how threads are created for ptlrpc service.
+ *
+ * ?_NTHRS_INIT # threads to create for each service partition on
+ * initializing. If it's non-affinity service and
+ * there is only one partition, it's the overall #
+ * threads for the service while initializing.
+ * ?_NTHRS_BASE # threads should be created at least for each
+ * ptlrpc partition to keep the service healthy.
+ * It's the low-water mark of threads upper-limit
+ * for each partition.
+ * ?_THR_FACTOR # threads can be added on threads upper-limit for
+ * each CPU core. This factor is only for reference,
+ * we might decrease value of factor if number of cores
+ * per CPT is above a limit.
+ * ?_NTHRS_MAX # overall threads can be created for a service,
+ * it's a soft limit because if service is running
+ * on machine with hundreds of cores and tens of
+ * CPU partitions, we need to guarantee each partition
+ * has ?_NTHRS_BASE threads, which means total threads
+ * will be ?_NTHRS_BASE * number_of_cpts which can
+ * exceed ?_NTHRS_MAX.
+ *
+ * Examples
+ *
+ * #define MDS_NTHRS_INIT 2
+ * #define MDS_NTHRS_BASE 64
+ * #define MDS_NTHRS_FACTOR 8
+ * #define MDS_NTHRS_MAX 1024
+ *
+ * Example 1):
+ * ---------------------------------------------------------------------
+ * Server(A) has 16 cores, user configured it to 4 partitions so each
+ * partition has 4 cores, then actual number of service threads on each
+ * partition is:
+ * MDS_NTHRS_BASE(64) + cores(4) * MDS_NTHRS_FACTOR(8) = 96
+ *
+ * Total number of threads for the service is:
+ * 96 * partitions(4) = 384
+ *
+ * Example 2):
+ * ---------------------------------------------------------------------
+ * Server(B) has 32 cores, user configured it to 4 partitions so each
+ * partition has 8 cores, then actual number of service threads on each
+ * partition is:
+ * MDS_NTHRS_BASE(64) + cores(8) * MDS_NTHRS_FACTOR(8) = 128
+ *
+ * Total number of threads for the service is:
+ * 128 * partitions(4) = 512
+ *
+ * Example 3):
+ * ---------------------------------------------------------------------
+ * Server(B) has 96 cores, user configured it to 8 partitions so each
+ * partition has 12 cores, then actual number of service threads on each
+ * partition is:
+ * MDS_NTHRS_BASE(64) + cores(12) * MDS_NTHRS_FACTOR(8) = 160
+ *
+ * Total number of threads for the service is:
+ * 160 * partitions(8) = 1280
+ *
+ * However, it's above the soft limit MDS_NTHRS_MAX, so we choose this number
+ * as upper limit of threads number for each partition:
+ * MDS_NTHRS_MAX(1024) / partitions(8) = 128
+ *
+ * Example 4):
+ * ---------------------------------------------------------------------
+ * Server(C) have a thousand of cores and user configured it to 32 partitions
+ * MDS_NTHRS_BASE(64) * 32 = 2048
+ *
+ * which is already above soft limit MDS_NTHRS_MAX(1024), but we still need
+ * to guarantee that each partition has at least MDS_NTHRS_BASE(64) threads
+ * to keep service healthy, so total number of threads will just be 2048.
+ *
+ * NB: we don't suggest to choose server with that many cores because backend
+ * filesystem itself, buffer cache, or underlying network stack might
+ * have some SMP scalability issues at that large scale.
+ *
+ * If user already has a fat machine with hundreds or thousands of cores,
+ * there are two choices for configuration:
+ * a) create CPU table from subset of all CPUs and run Lustre on
+ * top of this subset
+ * b) bind service threads on a few partitions, see modparameters of
+ * MDS and OSS for details
+*
+ * NB: these calculations (and examples below) are simplified to help
+ * understanding, the real implementation is a little more complex,
+ * please see ptlrpc_server_nthreads_check() for details.
+ *
+ */
+
+ /*
+ * LDLM threads constants:
+ *
+ * Given 8 as factor and 24 as base threads number
+ *
+ * example 1)
+ * On 4-core machine we will have 24 + 8 * 4 = 56 threads.
+ *
+ * example 2)
+ * On 8-core machine with 2 partitions we will have 24 + 4 * 8 = 56
+ * threads for each partition and total threads number will be 112.
+ *
+ * example 3)
+ * On 64-core machine with 8 partitions we will need LDLM_NTHRS_BASE(24)
+ * threads for each partition to keep service healthy, so total threads
+ * number should be 24 * 8 = 192.
+ *
+ * So with these constants, threads number will be at the similar level
+ * of old versions, unless target machine has over a hundred cores
+ */
+#define LDLM_THR_FACTOR 8
+#define LDLM_NTHRS_INIT PTLRPC_NTHRS_INIT
+#define LDLM_NTHRS_BASE 24
+#define LDLM_NTHRS_MAX (num_online_cpus() == 1 ? 64 : 128)
+
+#define LDLM_BL_THREADS LDLM_NTHRS_AUTO_INIT
+#define LDLM_CLIENT_NBUFS 1
+#define LDLM_SERVER_NBUFS 64
+#define LDLM_BUFSIZE (8 * 1024)
+#define LDLM_MAXREQSIZE (5 * 1024)
+#define LDLM_MAXREPSIZE (1024)
+
+ /*
+ * MDS threads constants:
+ *
+ * Please see examples in "Thread Constants", MDS threads number will be at
+ * the comparable level of old versions, unless the server has many cores.
+ */
+#ifndef MDS_MAX_THREADS
+#define MDS_MAX_THREADS 1024
+#define MDS_MAX_OTHR_THREADS 256
+
+#else /* MDS_MAX_THREADS */
+#if MDS_MAX_THREADS < PTLRPC_NTHRS_INIT
+#undef MDS_MAX_THREADS
+#define MDS_MAX_THREADS PTLRPC_NTHRS_INIT
+#endif
+#define MDS_MAX_OTHR_THREADS max(PTLRPC_NTHRS_INIT, MDS_MAX_THREADS / 2)
+#endif
+
+/* default service */
+#define MDS_THR_FACTOR 8
+#define MDS_NTHRS_INIT PTLRPC_NTHRS_INIT
+#define MDS_NTHRS_MAX MDS_MAX_THREADS
+#define MDS_NTHRS_BASE min(64, MDS_NTHRS_MAX)
+
+/* read-page service */
+#define MDS_RDPG_THR_FACTOR 4
+#define MDS_RDPG_NTHRS_INIT PTLRPC_NTHRS_INIT
+#define MDS_RDPG_NTHRS_MAX MDS_MAX_OTHR_THREADS
+#define MDS_RDPG_NTHRS_BASE min(48, MDS_RDPG_NTHRS_MAX)
+
+/* these should be removed when we remove setattr service in the future */
+#define MDS_SETA_THR_FACTOR 4
+#define MDS_SETA_NTHRS_INIT PTLRPC_NTHRS_INIT
+#define MDS_SETA_NTHRS_MAX MDS_MAX_OTHR_THREADS
+#define MDS_SETA_NTHRS_BASE min(48, MDS_SETA_NTHRS_MAX)
+
+/* non-affinity threads */
+#define MDS_OTHR_NTHRS_INIT PTLRPC_NTHRS_INIT
+#define MDS_OTHR_NTHRS_MAX MDS_MAX_OTHR_THREADS
+
+#define MDS_NBUFS 64
+
+/**
+ * Assume file name length = FNAME_MAX = 256 (true for ext3).
+ * path name length = PATH_MAX = 4096
+ * LOV MD size max = EA_MAX = 24 * 2000
+ * (NB: 24 is size of lov_ost_data)
+ * LOV LOGCOOKIE size max = 32 * 2000
+ * (NB: 32 is size of llog_cookie)
+ * symlink: FNAME_MAX + PATH_MAX <- largest
+ * link: FNAME_MAX + PATH_MAX (mds_rec_link < mds_rec_create)
+ * rename: FNAME_MAX + FNAME_MAX
+ * open: FNAME_MAX + EA_MAX
+ *
+ * MDS_MAXREQSIZE ~= 4736 bytes =
+ * lustre_msg + ldlm_request + mdt_body + mds_rec_create + FNAME_MAX + PATH_MAX
+ * MDS_MAXREPSIZE ~= 8300 bytes = lustre_msg + llog_header
+ *
+ * Realistic size is about 512 bytes (20 character name + 128 char symlink),
+ * except in the open case where there are a large number of OSTs in a LOV.
+ */
+#define MDS_MAXREQSIZE (5 * 1024) /* >= 4736 */
+#define MDS_MAXREPSIZE (9 * 1024) /* >= 8300 */
+
+/**
+ * MDS incoming request with LOV EA
+ * 24 = sizeof(struct lov_ost_data), i.e: replay of opencreate
+ */
+#define MDS_LOV_MAXREQSIZE max(MDS_MAXREQSIZE, \
+ 362 + LOV_MAX_STRIPE_COUNT * 24)
+/**
+ * MDS outgoing reply with LOV EA
+ *
+ * NB: max reply size Lustre 2.4+ client can get from old MDS is:
+ * LOV_MAX_STRIPE_COUNT * (llog_cookie + lov_ost_data) + extra bytes
+ *
+ * but 2.4 or later MDS will never send reply with llog_cookie to any
+ * version client. This macro is defined for server side reply buffer size.
+ */
+#define MDS_LOV_MAXREPSIZE MDS_LOV_MAXREQSIZE
+
+/**
+ * This is the size of a maximum REINT_SETXATTR request:
+ *
+ * lustre_msg 56 (32 + 4 x 5 + 4)
+ * ptlrpc_body 184
+ * mdt_rec_setxattr 136
+ * lustre_capa 120
+ * name 256 (XATTR_NAME_MAX)
+ * value 65536 (XATTR_SIZE_MAX)
+ */
+#define MDS_EA_MAXREQSIZE 66288
+
+/**
+ * These are the maximum request and reply sizes (rounded up to 1 KB
+ * boundaries) for the "regular" MDS_REQUEST_PORTAL and MDS_REPLY_PORTAL.
+ */
+#define MDS_REG_MAXREQSIZE (((max(MDS_EA_MAXREQSIZE, \
+ MDS_LOV_MAXREQSIZE) + 1023) >> 10) << 10)
+#define MDS_REG_MAXREPSIZE MDS_REG_MAXREQSIZE
+
+/**
+ * The update request includes all of updates from the create, which might
+ * include linkea (4K maxim), together with other updates, we set it to 9K:
+ * lustre_msg + ptlrpc_body + UPDATE_BUF_SIZE (8K)
+ */
+#define MDS_OUT_MAXREQSIZE (9 * 1024)
+#define MDS_OUT_MAXREPSIZE MDS_MAXREPSIZE
+
+/** MDS_BUFSIZE = max_reqsize (w/o LOV EA) + max sptlrpc payload size */
+#define MDS_BUFSIZE max(MDS_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD, \
+ 8 * 1024)
+
+/**
+ * MDS_REG_BUFSIZE should at least be MDS_REG_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD.
+ * However, we need to allocate a much larger buffer for it because LNet
+ * requires each MD(rqbd) has at least MDS_REQ_MAXREQSIZE bytes left to avoid
+ * dropping of maximum-sized incoming request. So if MDS_REG_BUFSIZE is only a
+ * little larger than MDS_REG_MAXREQSIZE, then it can only fit in one request
+ * even there are about MDS_REG_MAX_REQSIZE bytes left in a rqbd, and memory
+ * utilization is very low.
+ *
+ * In the meanwhile, size of rqbd can't be too large, because rqbd can't be
+ * reused until all requests fit in it have been processed and released,
+ * which means one long blocked request can prevent the rqbd be reused.
+ * Now we set request buffer size to 160 KB, so even each rqbd is unlinked
+ * from LNet with unused 65 KB, buffer utilization will be about 59%.
+ * Please check LU-2432 for details.
+ */
+#define MDS_REG_BUFSIZE max(MDS_REG_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD, \
+ 160 * 1024)
+
+/**
+ * MDS_OUT_BUFSIZE = max_out_reqsize + max sptlrpc payload (~1K) which is
+ * about 10K, for the same reason as MDS_REG_BUFSIZE, we also give some
+ * extra bytes to each request buffer to improve buffer utilization rate.
+ */
+#define MDS_OUT_BUFSIZE max(MDS_OUT_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD, \
+ 24 * 1024)
+
+/** FLD_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc */
+#define FLD_MAXREQSIZE (160)
+
+/** FLD_MAXREPSIZE == lustre_msg + ptlrpc_body */
+#define FLD_MAXREPSIZE (152)
+#define FLD_BUFSIZE (1 << 12)
+
+/**
+ * SEQ_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc + lu_range +
+ * __u32 padding */
+#define SEQ_MAXREQSIZE (160)
+
+/** SEQ_MAXREPSIZE == lustre_msg + ptlrpc_body + lu_range */
+#define SEQ_MAXREPSIZE (152)
+#define SEQ_BUFSIZE (1 << 12)
+
+/** MGS threads must be >= 3, see bug 22458 comment #28 */
+#define MGS_NTHRS_INIT (PTLRPC_NTHRS_INIT + 1)
+#define MGS_NTHRS_MAX 32
+
+#define MGS_NBUFS 64
+#define MGS_BUFSIZE (8 * 1024)
+#define MGS_MAXREQSIZE (7 * 1024)
+#define MGS_MAXREPSIZE (9 * 1024)
+
+ /*
+ * OSS threads constants:
+ *
+ * Given 8 as factor and 64 as base threads number
+ *
+ * example 1):
+ * On 8-core server configured to 2 partitions, we will have
+ * 64 + 8 * 4 = 96 threads for each partition, 192 total threads.
+ *
+ * example 2):
+ * On 32-core machine configured to 4 partitions, we will have
+ * 64 + 8 * 8 = 112 threads for each partition, so total threads number
+ * will be 112 * 4 = 448.
+ *
+ * example 3):
+ * On 64-core machine configured to 4 partitions, we will have
+ * 64 + 16 * 8 = 192 threads for each partition, so total threads number
+ * will be 192 * 4 = 768 which is above limit OSS_NTHRS_MAX(512), so we
+ * cut off the value to OSS_NTHRS_MAX(512) / 4 which is 128 threads
+ * for each partition.
+ *
+ * So we can see that with these constants, threads number wil be at the
+ * similar level of old versions, unless the server has many cores.
+ */
+ /* depress threads factor for VM with small memory size */
+#define OSS_THR_FACTOR min_t(int, 8, \
+ NUM_CACHEPAGES >> (28 - PAGE_CACHE_SHIFT))
+#define OSS_NTHRS_INIT (PTLRPC_NTHRS_INIT + 1)
+#define OSS_NTHRS_BASE 64
+#define OSS_NTHRS_MAX 512
+
+/* threads for handling "create" request */
+#define OSS_CR_THR_FACTOR 1
+#define OSS_CR_NTHRS_INIT PTLRPC_NTHRS_INIT
+#define OSS_CR_NTHRS_BASE 8
+#define OSS_CR_NTHRS_MAX 64
+
+/**
+ * OST_IO_MAXREQSIZE ~=
+ * lustre_msg + ptlrpc_body + obdo + obd_ioobj +
+ * DT_MAX_BRW_PAGES * niobuf_remote
+ *
+ * - single object with 16 pages is 512 bytes
+ * - OST_IO_MAXREQSIZE must be at least 1 page of cookies plus some spillover
+ * - Must be a multiple of 1024
+ * - actual size is about 18K
+ */
+#define _OST_MAXREQSIZE_SUM (sizeof(struct lustre_msg) + \
+ sizeof(struct ptlrpc_body) + \
+ sizeof(struct obdo) + \
+ sizeof(struct obd_ioobj) + \
+ sizeof(struct niobuf_remote) * DT_MAX_BRW_PAGES)
+/**
+ * FIEMAP request can be 4K+ for now
+ */
+#define OST_MAXREQSIZE (5 * 1024)
+#define OST_IO_MAXREQSIZE max_t(int, OST_MAXREQSIZE, \
+ (((_OST_MAXREQSIZE_SUM - 1) | (1024 - 1)) + 1))
+
+#define OST_MAXREPSIZE (9 * 1024)
+#define OST_IO_MAXREPSIZE OST_MAXREPSIZE
+
+#define OST_NBUFS 64
+/** OST_BUFSIZE = max_reqsize + max sptlrpc payload size */
+#define OST_BUFSIZE max_t(int, OST_MAXREQSIZE + 1024, 16 * 1024)
+/**
+ * OST_IO_MAXREQSIZE is 18K, giving extra 46K can increase buffer utilization
+ * rate of request buffer, please check comment of MDS_LOV_BUFSIZE for details.
+ */
+#define OST_IO_BUFSIZE max_t(int, OST_IO_MAXREQSIZE + 1024, 64 * 1024)
+
+/* Macro to hide a typecast. */
+#define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args)
+
+/**
+ * Structure to single define portal connection.
+ */
+struct ptlrpc_connection {
+ /** linkage for connections hash table */
+ struct hlist_node c_hash;
+ /** Our own lnet nid for this connection */
+ lnet_nid_t c_self;
+ /** Remote side nid for this connection */
+ lnet_process_id_t c_peer;
+ /** UUID of the other side */
+ struct obd_uuid c_remote_uuid;
+ /** reference counter for this connection */
+ atomic_t c_refcount;
+};
+
+/** Client definition for PortalRPC */
+struct ptlrpc_client {
+ /** What lnet portal does this client send messages to by default */
+ __u32 cli_request_portal;
+ /** What portal do we expect replies on */
+ __u32 cli_reply_portal;
+ /** Name of the client */
+ char *cli_name;
+};
+
+/** state flags of requests */
+/* XXX only ones left are those used by the bulk descs as well! */
+#define PTL_RPC_FL_INTR (1 << 0) /* reply wait was interrupted by user */
+#define PTL_RPC_FL_TIMEOUT (1 << 7) /* request timed out waiting for reply */
+
+#define REQ_MAX_ACK_LOCKS 8
+
+union ptlrpc_async_args {
+ /**
+ * Scratchpad for passing args to completion interpreter. Users
+ * cast to the struct of their choosing, and CLASSERT that this is
+ * big enough. For _tons_ of context, OBD_ALLOC a struct and store
+ * a pointer to it here. The pointer_arg ensures this struct is at
+ * least big enough for that.
+ */
+ void *pointer_arg[11];
+ __u64 space[7];
+};
+
+struct ptlrpc_request_set;
+typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int);
+typedef int (*set_producer_func)(struct ptlrpc_request_set *, void *);
+
+/**
+ * Definition of request set structure.
+ * Request set is a list of requests (not necessary to the same target) that
+ * once populated with RPCs could be sent in parallel.
+ * There are two kinds of request sets. General purpose and with dedicated
+ * serving thread. Example of the latter is ptlrpcd set.
+ * For general purpose sets once request set started sending it is impossible
+ * to add new requests to such set.
+ * Provides a way to call "completion callbacks" when all requests in the set
+ * returned.
+ */
+struct ptlrpc_request_set {
+ atomic_t set_refcount;
+ /** number of in queue requests */
+ atomic_t set_new_count;
+ /** number of uncompleted requests */
+ atomic_t set_remaining;
+ /** wait queue to wait on for request events */
+ wait_queue_head_t set_waitq;
+ wait_queue_head_t *set_wakeup_ptr;
+ /** List of requests in the set */
+ struct list_head set_requests;
+ /**
+ * List of completion callbacks to be called when the set is completed
+ * This is only used if \a set_interpret is NULL.
+ * Links struct ptlrpc_set_cbdata.
+ */
+ struct list_head set_cblist;
+ /** Completion callback, if only one. */
+ set_interpreter_func set_interpret;
+ /** opaq argument passed to completion \a set_interpret callback. */
+ void *set_arg;
+ /**
+ * Lock for \a set_new_requests manipulations
+ * locked so that any old caller can communicate requests to
+ * the set holder who can then fold them into the lock-free set
+ */
+ spinlock_t set_new_req_lock;
+ /** List of new yet unsent requests. Only used with ptlrpcd now. */
+ struct list_head set_new_requests;
+
+ /** rq_status of requests that have been freed already */
+ int set_rc;
+ /** Additional fields used by the flow control extension */
+ /** Maximum number of RPCs in flight */
+ int set_max_inflight;
+ /** Callback function used to generate RPCs */
+ set_producer_func set_producer;
+ /** opaq argument passed to the producer callback */
+ void *set_producer_arg;
+};
+
+/**
+ * Description of a single ptrlrpc_set callback
+ */
+struct ptlrpc_set_cbdata {
+ /** List linkage item */
+ struct list_head psc_item;
+ /** Pointer to interpreting function */
+ set_interpreter_func psc_interpret;
+ /** Opaq argument to pass to the callback */
+ void *psc_data;
+};
+
+struct ptlrpc_bulk_desc;
+struct ptlrpc_service_part;
+struct ptlrpc_service;
+
+/**
+ * ptlrpc callback & work item stuff
+ */
+struct ptlrpc_cb_id {
+ void (*cbid_fn)(lnet_event_t *ev); /* specific callback fn */
+ void *cbid_arg; /* additional arg */
+};
+
+/** Maximum number of locks to fit into reply state */
+#define RS_MAX_LOCKS 8
+#define RS_DEBUG 0
+
+/**
+ * Structure to define reply state on the server
+ * Reply state holds various reply message information. Also for "difficult"
+ * replies (rep-ack case) we store the state after sending reply and wait
+ * for the client to acknowledge the reception. In these cases locks could be
+ * added to the state for replay/failover consistency guarantees.
+ */
+struct ptlrpc_reply_state {
+ /** Callback description */
+ struct ptlrpc_cb_id rs_cb_id;
+ /** Linkage for list of all reply states in a system */
+ struct list_head rs_list;
+ /** Linkage for list of all reply states on same export */
+ struct list_head rs_exp_list;
+ /** Linkage for list of all reply states for same obd */
+ struct list_head rs_obd_list;
+#if RS_DEBUG
+ struct list_head rs_debug_list;
+#endif
+ /** A spinlock to protect the reply state flags */
+ spinlock_t rs_lock;
+ /** Reply state flags */
+ unsigned long rs_difficult:1; /* ACK/commit stuff */
+ unsigned long rs_no_ack:1; /* no ACK, even for
+ difficult requests */
+ unsigned long rs_scheduled:1; /* being handled? */
+ unsigned long rs_scheduled_ever:1;/* any schedule attempts? */
+ unsigned long rs_handled:1; /* been handled yet? */
+ unsigned long rs_on_net:1; /* reply_out_callback pending? */
+ unsigned long rs_prealloc:1; /* rs from prealloc list */
+ unsigned long rs_committed:1;/* the transaction was committed
+ and the rs was dispatched
+ by ptlrpc_commit_replies */
+ /** Size of the state */
+ int rs_size;
+ /** opcode */
+ __u32 rs_opc;
+ /** Transaction number */
+ __u64 rs_transno;
+ /** xid */
+ __u64 rs_xid;
+ struct obd_export *rs_export;
+ struct ptlrpc_service_part *rs_svcpt;
+ /** Lnet metadata handle for the reply */
+ lnet_handle_md_t rs_md_h;
+ atomic_t rs_refcount;
+
+ /** Context for the sevice thread */
+ struct ptlrpc_svc_ctx *rs_svc_ctx;
+ /** Reply buffer (actually sent to the client), encoded if needed */
+ struct lustre_msg *rs_repbuf; /* wrapper */
+ /** Size of the reply buffer */
+ int rs_repbuf_len; /* wrapper buf length */
+ /** Size of the reply message */
+ int rs_repdata_len; /* wrapper msg length */
+ /**
+ * Actual reply message. Its content is encrupted (if needed) to
+ * produce reply buffer for actual sending. In simple case
+ * of no network encryption we jus set \a rs_repbuf to \a rs_msg
+ */
+ struct lustre_msg *rs_msg; /* reply message */
+
+ /** Number of locks awaiting client ACK */
+ int rs_nlocks;
+ /** Handles of locks awaiting client reply ACK */
+ struct lustre_handle rs_locks[RS_MAX_LOCKS];
+ /** Lock modes of locks in \a rs_locks */
+ ldlm_mode_t rs_modes[RS_MAX_LOCKS];
+};
+
+struct ptlrpc_thread;
+
+/** RPC stages */
+enum rq_phase {
+ RQ_PHASE_NEW = 0xebc0de00,
+ RQ_PHASE_RPC = 0xebc0de01,
+ RQ_PHASE_BULK = 0xebc0de02,
+ RQ_PHASE_INTERPRET = 0xebc0de03,
+ RQ_PHASE_COMPLETE = 0xebc0de04,
+ RQ_PHASE_UNREGISTERING = 0xebc0de05,
+ RQ_PHASE_UNDEFINED = 0xebc0de06
+};
+
+/** Type of request interpreter call-back */
+typedef int (*ptlrpc_interpterer_t)(const struct lu_env *env,
+ struct ptlrpc_request *req,
+ void *arg, int rc);
+
+/**
+ * Definition of request pool structure.
+ * The pool is used to store empty preallocated requests for the case
+ * when we would actually need to send something without performing
+ * any allocations (to avoid e.g. OOM).
+ */
+struct ptlrpc_request_pool {
+ /** Locks the list */
+ spinlock_t prp_lock;
+ /** list of ptlrpc_request structs */
+ struct list_head prp_req_list;
+ /** Maximum message size that would fit into a rquest from this pool */
+ int prp_rq_size;
+ /** Function to allocate more requests for this pool */
+ void (*prp_populate)(struct ptlrpc_request_pool *, int);
+};
+
+struct lu_context;
+struct lu_env;
+
+struct ldlm_lock;
+
+/**
+ * \defgroup nrs Network Request Scheduler
+ * @{
+ */
+struct ptlrpc_nrs_policy;
+struct ptlrpc_nrs_resource;
+struct ptlrpc_nrs_request;
+
+/**
+ * NRS control operations.
+ *
+ * These are common for all policies.
+ */
+enum ptlrpc_nrs_ctl {
+ /**
+ * Not a valid opcode.
+ */
+ PTLRPC_NRS_CTL_INVALID,
+ /**
+ * Activate the policy.
+ */
+ PTLRPC_NRS_CTL_START,
+ /**
+ * Reserved for multiple primary policies, which may be a possibility
+ * in the future.
+ */
+ PTLRPC_NRS_CTL_STOP,
+ /**
+ * Policies can start using opcodes from this value and onwards for
+ * their own purposes; the assigned value itself is arbitrary.
+ */
+ PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20,
+};
+
+/**
+ * ORR policy operations
+ */
+enum nrs_ctl_orr {
+ NRS_CTL_ORR_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC,
+ NRS_CTL_ORR_WR_QUANTUM,
+ NRS_CTL_ORR_RD_OFF_TYPE,
+ NRS_CTL_ORR_WR_OFF_TYPE,
+ NRS_CTL_ORR_RD_SUPP_REQ,
+ NRS_CTL_ORR_WR_SUPP_REQ,
+};
+
+/**
+ * NRS policy operations.
+ *
+ * These determine the behaviour of a policy, and are called in response to
+ * NRS core events.
+ */
+struct ptlrpc_nrs_pol_ops {
+ /**
+ * Called during policy registration; this operation is optional.
+ *
+ * \param[in,out] policy The policy being initialized
+ */
+ int (*op_policy_init) (struct ptlrpc_nrs_policy *policy);
+ /**
+ * Called during policy unregistration; this operation is optional.
+ *
+ * \param[in,out] policy The policy being unregistered/finalized
+ */
+ void (*op_policy_fini) (struct ptlrpc_nrs_policy *policy);
+ /**
+ * Called when activating a policy via lprocfs; policies allocate and
+ * initialize their resources here; this operation is optional.
+ *
+ * \param[in,out] policy The policy being started
+ *
+ * \see nrs_policy_start_locked()
+ */
+ int (*op_policy_start) (struct ptlrpc_nrs_policy *policy);
+ /**
+ * Called when deactivating a policy via lprocfs; policies deallocate
+ * their resources here; this operation is optional
+ *
+ * \param[in,out] policy The policy being stopped
+ *
+ * \see nrs_policy_stop0()
+ */
+ void (*op_policy_stop) (struct ptlrpc_nrs_policy *policy);
+ /**
+ * Used for policy-specific operations; i.e. not generic ones like
+ * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous
+ * to an ioctl; this operation is optional.
+ *
+ * \param[in,out] policy The policy carrying out operation \a opc
+ * \param[in] opc The command operation being carried out
+ * \param[in,out] arg An generic buffer for communication between the
+ * user and the control operation
+ *
+ * \retval -ve error
+ * \retval 0 success
+ *
+ * \see ptlrpc_nrs_policy_control()
+ */
+ int (*op_policy_ctl) (struct ptlrpc_nrs_policy *policy,
+ enum ptlrpc_nrs_ctl opc, void *arg);
+
+ /**
+ * Called when obtaining references to the resources of the resource
+ * hierarchy for a request that has arrived for handling at the PTLRPC
+ * service. Policies should return -ve for requests they do not wish
+ * to handle. This operation is mandatory.
+ *
+ * \param[in,out] policy The policy we're getting resources for.
+ * \param[in,out] nrq The request we are getting resources for.
+ * \param[in] parent The parent resource of the resource being
+ * requested; set to NULL if none.
+ * \param[out] resp The resource is to be returned here; the
+ * fallback policy in an NRS head should
+ * \e always return a non-NULL pointer value.
+ * \param[in] moving_req When set, signifies that this is an attempt
+ * to obtain resources for a request being moved
+ * to the high-priority NRS head by
+ * ldlm_lock_reorder_req().
+ * This implies two things:
+ * 1. We are under obd_export::exp_rpc_lock and
+ * so should not sleep.
+ * 2. We should not perform non-idempotent or can
+ * skip performing idempotent operations that
+ * were carried out when resources were first
+ * taken for the request when it was initialized
+ * in ptlrpc_nrs_req_initialize().
+ *
+ * \retval 0, +ve The level of the returned resource in the resource
+ * hierarchy; currently only 0 (for a non-leaf resource)
+ * and 1 (for a leaf resource) are supported by the
+ * framework.
+ * \retval -ve error
+ *
+ * \see ptlrpc_nrs_req_initialize()
+ * \see ptlrpc_nrs_hpreq_add_nolock()
+ * \see ptlrpc_nrs_req_hp_move()
+ */
+ int (*op_res_get) (struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq,
+ const struct ptlrpc_nrs_resource *parent,
+ struct ptlrpc_nrs_resource **resp,
+ bool moving_req);
+ /**
+ * Called when releasing references taken for resources in the resource
+ * hierarchy for the request; this operation is optional.
+ *
+ * \param[in,out] policy The policy the resource belongs to
+ * \param[in] res The resource to be freed
+ *
+ * \see ptlrpc_nrs_req_finalize()
+ * \see ptlrpc_nrs_hpreq_add_nolock()
+ * \see ptlrpc_nrs_req_hp_move()
+ */
+ void (*op_res_put) (struct ptlrpc_nrs_policy *policy,
+ const struct ptlrpc_nrs_resource *res);
+
+ /**
+ * Obtains a request for handling from the policy, and optionally
+ * removes the request from the policy; this operation is mandatory.
+ *
+ * \param[in,out] policy The policy to poll
+ * \param[in] peek When set, signifies that we just want to
+ * examine the request, and not handle it, so the
+ * request is not removed from the policy.
+ * \param[in] force When set, it will force a policy to return a
+ * request if it has one queued.
+ *
+ * \retval NULL No request available for handling
+ * \retval valid-pointer The request polled for handling
+ *
+ * \see ptlrpc_nrs_req_get_nolock()
+ */
+ struct ptlrpc_nrs_request *
+ (*op_req_get) (struct ptlrpc_nrs_policy *policy, bool peek,
+ bool force);
+ /**
+ * Called when attempting to add a request to a policy for later
+ * handling; this operation is mandatory.
+ *
+ * \param[in,out] policy The policy on which to enqueue \a nrq
+ * \param[in,out] nrq The request to enqueue
+ *
+ * \retval 0 success
+ * \retval != 0 error
+ *
+ * \see ptlrpc_nrs_req_add_nolock()
+ */
+ int (*op_req_enqueue) (struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq);
+ /**
+ * Removes a request from the policy's set of pending requests. Normally
+ * called after a request has been polled successfully from the policy
+ * for handling; this operation is mandatory.
+ *
+ * \param[in,out] policy The policy the request \a nrq belongs to
+ * \param[in,out] nrq The request to dequeue
+ *
+ * \see ptlrpc_nrs_req_del_nolock()
+ */
+ void (*op_req_dequeue) (struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq);
+ /**
+ * Called after the request being carried out. Could be used for
+ * job/resource control; this operation is optional.
+ *
+ * \param[in,out] policy The policy which is stopping to handle request
+ * \a nrq
+ * \param[in,out] nrq The request
+ *
+ * \pre spin_is_locked(&svcpt->scp_req_lock)
+ *
+ * \see ptlrpc_nrs_req_stop_nolock()
+ */
+ void (*op_req_stop) (struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq);
+ /**
+ * Registers the policy's lprocfs interface with a PTLRPC service.
+ *
+ * \param[in] svc The service
+ *
+ * \retval 0 success
+ * \retval != 0 error
+ */
+ int (*op_lprocfs_init) (struct ptlrpc_service *svc);
+ /**
+ * Unegisters the policy's lprocfs interface with a PTLRPC service.
+ *
+ * In cases of failed policy registration in
+ * \e ptlrpc_nrs_policy_register(), this function may be called for a
+ * service which has not registered the policy successfully, so
+ * implementations of this method should make sure their operations are
+ * safe in such cases.
+ *
+ * \param[in] svc The service
+ */
+ void (*op_lprocfs_fini) (struct ptlrpc_service *svc);
+};
+
+/**
+ * Policy flags
+ */
+enum nrs_policy_flags {
+ /**
+ * Fallback policy, use this flag only on a single supported policy per
+ * service. The flag cannot be used on policies that use
+ * \e PTLRPC_NRS_FL_REG_EXTERN
+ */
+ PTLRPC_NRS_FL_FALLBACK = (1 << 0),
+ /**
+ * Start policy immediately after registering.
+ */
+ PTLRPC_NRS_FL_REG_START = (1 << 1),
+ /**
+ * This is a policy registering from a module different to the one NRS
+ * core ships in (currently ptlrpc).
+ */
+ PTLRPC_NRS_FL_REG_EXTERN = (1 << 2),
+};
+
+/**
+ * NRS queue type.
+ *
+ * Denotes whether an NRS instance is for handling normal or high-priority
+ * RPCs, or whether an operation pertains to one or both of the NRS instances
+ * in a service.
+ */
+enum ptlrpc_nrs_queue_type {
+ PTLRPC_NRS_QUEUE_REG = (1 << 0),
+ PTLRPC_NRS_QUEUE_HP = (1 << 1),
+ PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP)
+};
+
+/**
+ * NRS head
+ *
+ * A PTLRPC service has at least one NRS head instance for handling normal
+ * priority RPCs, and may optionally have a second NRS head instance for
+ * handling high-priority RPCs. Each NRS head maintains a list of available
+ * policies, of which one and only one policy is acting as the fallback policy,
+ * and optionally a different policy may be acting as the primary policy. For
+ * all RPCs handled by this NRS head instance, NRS core will first attempt to
+ * enqueue the RPC using the primary policy (if any). The fallback policy is
+ * used in the following cases:
+ * - when there was no primary policy in the
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request
+ * was initialized.
+ * - when the primary policy that was at the
+ * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
+ * RPC was initialized, denoted it did not wish, or for some other reason was
+ * not able to handle the request, by returning a non-valid NRS resource
+ * reference.
+ * - when the primary policy that was at the
+ * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
+ * RPC was initialized, fails later during the request enqueueing stage.
+ *
+ * \see nrs_resource_get_safe()
+ * \see nrs_request_enqueue()
+ */
+struct ptlrpc_nrs {
+ spinlock_t nrs_lock;
+ /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */
+ /**
+ * List of registered policies
+ */
+ struct list_head nrs_policy_list;
+ /**
+ * List of policies with queued requests. Policies that have any
+ * outstanding requests are queued here, and this list is queried
+ * in a round-robin manner from NRS core when obtaining a request
+ * for handling. This ensures that requests from policies that at some
+ * point transition away from the
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained.
+ */
+ struct list_head nrs_policy_queued;
+ /**
+ * Service partition for this NRS head
+ */
+ struct ptlrpc_service_part *nrs_svcpt;
+ /**
+ * Primary policy, which is the preferred policy for handling RPCs
+ */
+ struct ptlrpc_nrs_policy *nrs_policy_primary;
+ /**
+ * Fallback policy, which is the backup policy for handling RPCs
+ */
+ struct ptlrpc_nrs_policy *nrs_policy_fallback;
+ /**
+ * This NRS head handles either HP or regular requests
+ */
+ enum ptlrpc_nrs_queue_type nrs_queue_type;
+ /**
+ * # queued requests from all policies in this NRS head
+ */
+ unsigned long nrs_req_queued;
+ /**
+ * # scheduled requests from all policies in this NRS head
+ */
+ unsigned long nrs_req_started;
+ /**
+ * # policies on this NRS
+ */
+ unsigned nrs_num_pols;
+ /**
+ * This NRS head is in progress of starting a policy
+ */
+ unsigned nrs_policy_starting:1;
+ /**
+ * In progress of shutting down the whole NRS head; used during
+ * unregistration
+ */
+ unsigned nrs_stopping:1;
+};
+
+#define NRS_POL_NAME_MAX 16
+
+struct ptlrpc_nrs_pol_desc;
+
+/**
+ * Service compatibility predicate; this determines whether a policy is adequate
+ * for handling RPCs of a particular PTLRPC service.
+ *
+ * XXX:This should give the same result during policy registration and
+ * unregistration, and for all partitions of a service; so the result should not
+ * depend on temporal service or other properties, that may influence the
+ * result.
+ */
+typedef bool (*nrs_pol_desc_compat_t) (const struct ptlrpc_service *svc,
+ const struct ptlrpc_nrs_pol_desc *desc);
+
+struct ptlrpc_nrs_pol_conf {
+ /**
+ * Human-readable policy name
+ */
+ char nc_name[NRS_POL_NAME_MAX];
+ /**
+ * NRS operations for this policy
+ */
+ const struct ptlrpc_nrs_pol_ops *nc_ops;
+ /**
+ * Service compatibility predicate
+ */
+ nrs_pol_desc_compat_t nc_compat;
+ /**
+ * Set for policies that support a single ptlrpc service, i.e. ones that
+ * have \a pd_compat set to nrs_policy_compat_one(). The variable value
+ * depicts the name of the single service that such policies are
+ * compatible with.
+ */
+ const char *nc_compat_svc_name;
+ /**
+ * Owner module for this policy descriptor; policies registering from a
+ * different module to the one the NRS framework is held within
+ * (currently ptlrpc), should set this field to THIS_MODULE.
+ */
+ module_t *nc_owner;
+ /**
+ * Policy registration flags; a bitmast of \e nrs_policy_flags
+ */
+ unsigned nc_flags;
+};
+
+/**
+ * NRS policy registering descriptor
+ *
+ * Is used to hold a description of a policy that can be passed to NRS core in
+ * order to register the policy with NRS heads in different PTLRPC services.
+ */
+struct ptlrpc_nrs_pol_desc {
+ /**
+ * Human-readable policy name
+ */
+ char pd_name[NRS_POL_NAME_MAX];
+ /**
+ * Link into nrs_core::nrs_policies
+ */
+ struct list_head pd_list;
+ /**
+ * NRS operations for this policy
+ */
+ const struct ptlrpc_nrs_pol_ops *pd_ops;
+ /**
+ * Service compatibility predicate
+ */
+ nrs_pol_desc_compat_t pd_compat;
+ /**
+ * Set for policies that are compatible with only one PTLRPC service.
+ *
+ * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name
+ */
+ const char *pd_compat_svc_name;
+ /**
+ * Owner module for this policy descriptor.
+ *
+ * We need to hold a reference to the module whenever we might make use
+ * of any of the module's contents, i.e.
+ * - If one or more instances of the policy are at a state where they
+ * might be handling a request, i.e.
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to
+ * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference
+ * is taken on the module when
+ * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it
+ * becomes 0, so that we hold only one reference to the module maximum
+ * at any time.
+ *
+ * We do not need to hold a reference to the module, even though we
+ * might use code and data from the module, in the following cases:
+ * - During external policy registration, because this should happen in
+ * the module's init() function, in which case the module is safe from
+ * removal because a reference is being held on the module by the
+ * kernel, and iirc kmod (and I guess module-init-tools also) will
+ * serialize any racing processes properly anyway.
+ * - During external policy unregistration, because this should happen
+ * in a module's exit() function, and any attempts to start a policy
+ * instance would need to take a reference on the module, and this is
+ * not possible once we have reached the point where the exit()
+ * handler is called.
+ * - During service registration and unregistration, as service setup
+ * and cleanup, and policy registration, unregistration and policy
+ * instance starting, are serialized by \e nrs_core::nrs_mutex, so
+ * as long as users adhere to the convention of registering policies
+ * in init() and unregistering them in module exit() functions, there
+ * should not be a race between these operations.
+ * - During any policy-specific lprocfs operations, because a reference
+ * is held by the kernel on a proc entry that has been entered by a
+ * syscall, so as long as proc entries are removed during unregistration time,
+ * then unregistration and lprocfs operations will be properly
+ * serialized.
+ */
+ module_t *pd_owner;
+ /**
+ * Bitmask of \e nrs_policy_flags
+ */
+ unsigned pd_flags;
+ /**
+ * # of references on this descriptor
+ */
+ atomic_t pd_refs;
+};
+
+/**
+ * NRS policy state
+ *
+ * Policies transition from one state to the other during their lifetime
+ */
+enum ptlrpc_nrs_pol_state {
+ /**
+ * Not a valid policy state.
+ */
+ NRS_POL_STATE_INVALID,
+ /**
+ * Policies are at this state either at the start of their life, or
+ * transition here when the user selects a different policy to act
+ * as the primary one.
+ */
+ NRS_POL_STATE_STOPPED,
+ /**
+ * Policy is progress of stopping
+ */
+ NRS_POL_STATE_STOPPING,
+ /**
+ * Policy is in progress of starting
+ */
+ NRS_POL_STATE_STARTING,
+ /**
+ * A policy is in this state in two cases:
+ * - it is the fallback policy, which is always in this state.
+ * - it has been activated by the user; i.e. it is the primary policy,
+ */
+ NRS_POL_STATE_STARTED,
+};
+
+/**
+ * NRS policy information
+ *
+ * Used for obtaining information for the status of a policy via lprocfs
+ */
+struct ptlrpc_nrs_pol_info {
+ /**
+ * Policy name
+ */
+ char pi_name[NRS_POL_NAME_MAX];
+ /**
+ * Current policy state
+ */
+ enum ptlrpc_nrs_pol_state pi_state;
+ /**
+ * # RPCs enqueued for later dispatching by the policy
+ */
+ long pi_req_queued;
+ /**
+ * # RPCs started for dispatch by the policy
+ */
+ long pi_req_started;
+ /**
+ * Is this a fallback policy?
+ */
+ unsigned pi_fallback:1;
+};
+
+/**
+ * NRS policy
+ *
+ * There is one instance of this for each policy in each NRS head of each
+ * PTLRPC service partition.
+ */
+struct ptlrpc_nrs_policy {
+ /**
+ * Linkage into the NRS head's list of policies,
+ * ptlrpc_nrs:nrs_policy_list
+ */
+ struct list_head pol_list;
+ /**
+ * Linkage into the NRS head's list of policies with enqueued
+ * requests ptlrpc_nrs:nrs_policy_queued
+ */
+ struct list_head pol_list_queued;
+ /**
+ * Current state of this policy
+ */
+ enum ptlrpc_nrs_pol_state pol_state;
+ /**
+ * Bitmask of nrs_policy_flags
+ */
+ unsigned pol_flags;
+ /**
+ * # RPCs enqueued for later dispatching by the policy
+ */
+ long pol_req_queued;
+ /**
+ * # RPCs started for dispatch by the policy
+ */
+ long pol_req_started;
+ /**
+ * Usage Reference count taken on the policy instance
+ */
+ long pol_ref;
+ /**
+ * The NRS head this policy has been created at
+ */
+ struct ptlrpc_nrs *pol_nrs;
+ /**
+ * Private policy data; varies by policy type
+ */
+ void *pol_private;
+ /**
+ * Policy descriptor for this policy instance.
+ */
+ struct ptlrpc_nrs_pol_desc *pol_desc;
+};
+
+/**
+ * NRS resource
+ *
+ * Resources are embedded into two types of NRS entities:
+ * - Inside NRS policies, in the policy's private data in
+ * ptlrpc_nrs_policy::pol_private
+ * - In objects that act as prime-level scheduling entities in different NRS
+ * policies; e.g. on a policy that performs round robin or similar order
+ * scheduling across client NIDs, there would be one NRS resource per unique
+ * client NID. On a policy which performs round robin scheduling across
+ * backend filesystem objects, there would be one resource associated with
+ * each of the backend filesystem objects partaking in the scheduling
+ * performed by the policy.
+ *
+ * NRS resources share a parent-child relationship, in which resources embedded
+ * in policy instances are the parent entities, with all scheduling entities
+ * a policy schedules across being the children, thus forming a simple resource
+ * hierarchy. This hierarchy may be extended with one or more levels in the
+ * future if the ability to have more than one primary policy is added.
+ *
+ * Upon request initialization, references to the then active NRS policies are
+ * taken and used to later handle the dispatching of the request with one of
+ * these policies.
+ *
+ * \see nrs_resource_get_safe()
+ * \see ptlrpc_nrs_req_add()
+ */
+struct ptlrpc_nrs_resource {
+ /**
+ * This NRS resource's parent; is NULL for resources embedded in NRS
+ * policy instances; i.e. those are top-level ones.
+ */
+ struct ptlrpc_nrs_resource *res_parent;
+ /**
+ * The policy associated with this resource.
+ */
+ struct ptlrpc_nrs_policy *res_policy;
+};
+
+enum {
+ NRS_RES_FALLBACK,
+ NRS_RES_PRIMARY,
+ NRS_RES_MAX
+};
+
+/* \name fifo
+ *
+ * FIFO policy
+ *
+ * This policy is a logical wrapper around previous, non-NRS functionality.
+ * It dispatches RPCs in the same order as they arrive from the network. This
+ * policy is currently used as the fallback policy, and the only enabled policy
+ * on all NRS heads of all PTLRPC service partitions.
+ * @{
+ */
+
+/**
+ * Private data structure for the FIFO policy
+ */
+struct nrs_fifo_head {
+ /**
+ * Resource object for policy instance.
+ */
+ struct ptlrpc_nrs_resource fh_res;
+ /**
+ * List of queued requests.
+ */
+ struct list_head fh_list;
+ /**
+ * For debugging purposes.
+ */
+ __u64 fh_sequence;
+};
+
+struct nrs_fifo_req {
+ struct list_head fr_list;
+ __u64 fr_sequence;
+};
+
+/** @} fifo */
+
+/**
+ * \name CRR-N
+ *
+ * CRR-N, Client Round Robin over NIDs
+ * @{
+ */
+
+/**
+ * private data structure for CRR-N NRS
+ */
+struct nrs_crrn_net {
+ struct ptlrpc_nrs_resource cn_res;
+ cfs_binheap_t *cn_binheap;
+ cfs_hash_t *cn_cli_hash;
+ /**
+ * Used when a new scheduling round commences, in order to synchronize
+ * all clients with the new round number.
+ */
+ __u64 cn_round;
+ /**
+ * Determines the relevant ordering amongst request batches within a
+ * scheduling round.
+ */
+ __u64 cn_sequence;
+ /**
+ * Round Robin quantum; the maximum number of RPCs that each request
+ * batch for each client can have in a scheduling round.
+ */
+ __u16 cn_quantum;
+};
+
+/**
+ * Object representing a client in CRR-N, as identified by its NID
+ */
+struct nrs_crrn_client {
+ struct ptlrpc_nrs_resource cc_res;
+ struct hlist_node cc_hnode;
+ lnet_nid_t cc_nid;
+ /**
+ * The round number against which this client is currently scheduling
+ * requests.
+ */
+ __u64 cc_round;
+ /**
+ * The sequence number used for requests scheduled by this client during
+ * the current round number.
+ */
+ __u64 cc_sequence;
+ atomic_t cc_ref;
+ /**
+ * Round Robin quantum; the maximum number of RPCs the client is allowed
+ * to schedule in a single batch of each round.
+ */
+ __u16 cc_quantum;
+ /**
+ * # of pending requests for this client, on all existing rounds
+ */
+ __u16 cc_active;
+};
+
+/**
+ * CRR-N NRS request definition
+ */
+struct nrs_crrn_req {
+ /**
+ * Round number for this request; shared with all other requests in the
+ * same batch.
+ */
+ __u64 cr_round;
+ /**
+ * Sequence number for this request; shared with all other requests in
+ * the same batch.
+ */
+ __u64 cr_sequence;
+};
+
+/**
+ * CRR-N policy operations.
+ */
+enum nrs_ctl_crr {
+ /**
+ * Read the RR quantum size of a CRR-N policy.
+ */
+ NRS_CTL_CRRN_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC,
+ /**
+ * Write the RR quantum size of a CRR-N policy.
+ */
+ NRS_CTL_CRRN_WR_QUANTUM,
+};
+
+/** @} CRR-N */
+
+/**
+ * \name ORR/TRR
+ *
+ * ORR/TRR (Object-based Round Robin/Target-based Round Robin) NRS policies
+ * @{
+ */
+
+/**
+ * Lower and upper byte offsets of a brw RPC
+ */
+struct nrs_orr_req_range {
+ __u64 or_start;
+ __u64 or_end;
+};
+
+/**
+ * RPC types supported by the ORR/TRR policies
+ */
+enum nrs_orr_supp {
+ NOS_OST_READ = (1 << 0),
+ NOS_OST_WRITE = (1 << 1),
+ NOS_OST_RW = (NOS_OST_READ | NOS_OST_WRITE),
+ /**
+ * Default value for policies.
+ */
+ NOS_DFLT = NOS_OST_READ
+};
+
+/**
+ * As unique keys for grouping RPCs together, we use the object's OST FID for
+ * the ORR policy, and the OST index for the TRR policy.
+ *
+ * XXX: We waste some space for TRR policy instances by using a union, but it
+ * allows to consolidate some of the code between ORR and TRR, and these
+ * policies will probably eventually merge into one anyway.
+ */
+struct nrs_orr_key {
+ union {
+ /** object FID for ORR */
+ struct lu_fid ok_fid;
+ /** OST index for TRR */
+ __u32 ok_idx;
+ };
+};
+
+/**
+ * The largest base string for unique hash/slab object names is
+ * "nrs_orr_reg_", so 13 characters. We add 3 to this to be used for the CPT
+ * id number, so this _should_ be more than enough for the maximum number of
+ * CPTs on any system. If it does happen that this statement is incorrect,
+ * nrs_orr_genobjname() will inevitably yield a non-unique name and cause
+ * kmem_cache_create() to complain (on Linux), so the erroneous situation
+ * will hopefully not go unnoticed.
+ */
+#define NRS_ORR_OBJ_NAME_MAX (sizeof("nrs_orr_reg_") + 3)
+
+/**
+ * private data structure for ORR and TRR NRS
+ */
+struct nrs_orr_data {
+ struct ptlrpc_nrs_resource od_res;
+ cfs_binheap_t *od_binheap;
+ cfs_hash_t *od_obj_hash;
+ struct kmem_cache *od_cache;
+ /**
+ * Used when a new scheduling round commences, in order to synchronize
+ * all object or OST batches with the new round number.
+ */
+ __u64 od_round;
+ /**
+ * Determines the relevant ordering amongst request batches within a
+ * scheduling round.
+ */
+ __u64 od_sequence;
+ /**
+ * RPC types that are currently supported.
+ */
+ enum nrs_orr_supp od_supp;
+ /**
+ * Round Robin quantum; the maxium number of RPCs that each request
+ * batch for each object or OST can have in a scheduling round.
+ */
+ __u16 od_quantum;
+ /**
+ * Whether to use physical disk offsets or logical file offsets.
+ */
+ bool od_physical;
+ /**
+ * XXX: We need to provide a persistently allocated string to hold
+ * unique object names for this policy, since in currently supported
+ * versions of Linux by Lustre, kmem_cache_create() just sets a pointer
+ * to the name string provided. kstrdup() is used in the version of
+ * kmeme_cache_create() in current Linux mainline, so we may be able to
+ * remove this in the future.
+ */
+ char od_objname[NRS_ORR_OBJ_NAME_MAX];
+};
+
+/**
+ * Represents a backend-fs object or OST in the ORR and TRR policies
+ * respectively
+ */
+struct nrs_orr_object {
+ struct ptlrpc_nrs_resource oo_res;
+ struct hlist_node oo_hnode;
+ /**
+ * The round number against which requests are being scheduled for this
+ * object or OST
+ */
+ __u64 oo_round;
+ /**
+ * The sequence number used for requests scheduled for this object or
+ * OST during the current round number.
+ */
+ __u64 oo_sequence;
+ /**
+ * The key of the object or OST for which this structure instance is
+ * scheduling RPCs
+ */
+ struct nrs_orr_key oo_key;
+ atomic_t oo_ref;
+ /**
+ * Round Robin quantum; the maximum number of RPCs that are allowed to
+ * be scheduled for the object or OST in a single batch of each round.
+ */
+ __u16 oo_quantum;
+ /**
+ * # of pending requests for this object or OST, on all existing rounds
+ */
+ __u16 oo_active;
+};
+
+/**
+ * ORR/TRR NRS request definition
+ */
+struct nrs_orr_req {
+ /**
+ * The offset range this request covers
+ */
+ struct nrs_orr_req_range or_range;
+ /**
+ * Round number for this request; shared with all other requests in the
+ * same batch.
+ */
+ __u64 or_round;
+ /**
+ * Sequence number for this request; shared with all other requests in
+ * the same batch.
+ */
+ __u64 or_sequence;
+ /**
+ * For debugging purposes.
+ */
+ struct nrs_orr_key or_key;
+ /**
+ * An ORR policy instance has filled in request information while
+ * enqueueing the request on the service partition's regular NRS head.
+ */
+ unsigned int or_orr_set:1;
+ /**
+ * A TRR policy instance has filled in request information while
+ * enqueueing the request on the service partition's regular NRS head.
+ */
+ unsigned int or_trr_set:1;
+ /**
+ * Request offset ranges have been filled in with logical offset
+ * values.
+ */
+ unsigned int or_logical_set:1;
+ /**
+ * Request offset ranges have been filled in with physical offset
+ * values.
+ */
+ unsigned int or_physical_set:1;
+};
+
+/** @} ORR/TRR */
+
+/**
+ * NRS request
+ *
+ * Instances of this object exist embedded within ptlrpc_request; the main
+ * purpose of this object is to hold references to the request's resources
+ * for the lifetime of the request, and to hold properties that policies use
+ * use for determining the request's scheduling priority.
+ * */
+struct ptlrpc_nrs_request {
+ /**
+ * The request's resource hierarchy.
+ */
+ struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX];
+ /**
+ * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the
+ * policy that was used to enqueue the request.
+ *
+ * \see nrs_request_enqueue()
+ */
+ unsigned nr_res_idx;
+ unsigned nr_initialized:1;
+ unsigned nr_enqueued:1;
+ unsigned nr_started:1;
+ unsigned nr_finalized:1;
+ cfs_binheap_node_t nr_node;
+
+ /**
+ * Policy-specific fields, used for determining a request's scheduling
+ * priority, and other supporting functionality.
+ */
+ union {
+ /**
+ * Fields for the FIFO policy
+ */
+ struct nrs_fifo_req fifo;
+ /**
+ * CRR-N request defintion
+ */
+ struct nrs_crrn_req crr;
+ /** ORR and TRR share the same request definition */
+ struct nrs_orr_req orr;
+ } nr_u;
+ /**
+ * Externally-registering policies may want to use this to allocate
+ * their own request properties.
+ */
+ void *ext;
+};
+
+/** @} nrs */
+
+/**
+ * Basic request prioritization operations structure.
+ * The whole idea is centered around locks and RPCs that might affect locks.
+ * When a lock is contended we try to give priority to RPCs that might lead
+ * to fastest release of that lock.
+ * Currently only implemented for OSTs only in a way that makes all
+ * IO and truncate RPCs that are coming from a locked region where a lock is
+ * contended a priority over other requests.
+ */
+struct ptlrpc_hpreq_ops {
+ /**
+ * Check if the lock handle of the given lock is the same as
+ * taken from the request.
+ */
+ int (*hpreq_lock_match)(struct ptlrpc_request *, struct ldlm_lock *);
+ /**
+ * Check if the request is a high priority one.
+ */
+ int (*hpreq_check)(struct ptlrpc_request *);
+ /**
+ * Called after the request has been handled.
+ */
+ void (*hpreq_fini)(struct ptlrpc_request *);
+};
+
+/**
+ * Represents remote procedure call.
+ *
+ * This is a staple structure used by everybody wanting to send a request
+ * in Lustre.
+ */
+struct ptlrpc_request {
+ /* Request type: one of PTL_RPC_MSG_* */
+ int rq_type;
+ /** Result of request processing */
+ int rq_status;
+ /**
+ * Linkage item through which this request is included into
+ * sending/delayed lists on client and into rqbd list on server
+ */
+ struct list_head rq_list;
+ /**
+ * Server side list of incoming unserved requests sorted by arrival
+ * time. Traversed from time to time to notice about to expire
+ * requests and sent back "early replies" to clients to let them
+ * know server is alive and well, just very busy to service their
+ * requests in time
+ */
+ struct list_head rq_timed_list;
+ /** server-side history, used for debuging purposes. */
+ struct list_head rq_history_list;
+ /** server-side per-export list */
+ struct list_head rq_exp_list;
+ /** server-side hp handlers */
+ struct ptlrpc_hpreq_ops *rq_ops;
+
+ /** initial thread servicing this request */
+ struct ptlrpc_thread *rq_svc_thread;
+
+ /** history sequence # */
+ __u64 rq_history_seq;
+ /** \addtogroup nrs
+ * @{
+ */
+ /** stub for NRS request */
+ struct ptlrpc_nrs_request rq_nrq;
+ /** @} nrs */
+ /** the index of service's srv_at_array into which request is linked */
+ time_t rq_at_index;
+ /** Lock to protect request flags and some other important bits, like
+ * rq_list
+ */
+ spinlock_t rq_lock;
+ /** client-side flags are serialized by rq_lock */
+ unsigned int rq_intr:1, rq_replied:1, rq_err:1,
+ rq_timedout:1, rq_resend:1, rq_restart:1,
+ /**
+ * when ->rq_replay is set, request is kept by the client even
+ * after server commits corresponding transaction. This is
+ * used for operations that require sequence of multiple
+ * requests to be replayed. The only example currently is file
+ * open/close. When last request in such a sequence is
+ * committed, ->rq_replay is cleared on all requests in the
+ * sequence.
+ */
+ rq_replay:1,
+ rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
+ rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1,
+ rq_early:1, rq_must_unlink:1,
+ rq_memalloc:1, /* req originated from "kswapd" */
+ /* server-side flags */
+ rq_packed_final:1, /* packed final reply */
+ rq_hp:1, /* high priority RPC */
+ rq_at_linked:1, /* link into service's srv_at_array */
+ rq_reply_truncate:1,
+ rq_committed:1,
+ /* whether the "rq_set" is a valid one */
+ rq_invalid_rqset:1,
+ rq_generation_set:1,
+ /* do not resend request on -EINPROGRESS */
+ rq_no_retry_einprogress:1,
+ /* allow the req to be sent if the import is in recovery
+ * status */
+ rq_allow_replay:1;
+
+ unsigned int rq_nr_resend;
+
+ enum rq_phase rq_phase; /* one of RQ_PHASE_* */
+ enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */
+ atomic_t rq_refcount;/* client-side refcount for SENT race,
+ server-side refcounf for multiple replies */
+
+ /** Portal to which this request would be sent */
+ short rq_request_portal; /* XXX FIXME bug 249 */
+ /** Portal where to wait for reply and where reply would be sent */
+ short rq_reply_portal; /* XXX FIXME bug 249 */
+
+ /**
+ * client-side:
+ * !rq_truncate : # reply bytes actually received,
+ * rq_truncate : required repbuf_len for resend
+ */
+ int rq_nob_received;
+ /** Request length */
+ int rq_reqlen;
+ /** Reply length */
+ int rq_replen;
+ /** Request message - what client sent */
+ struct lustre_msg *rq_reqmsg;
+ /** Reply message - server response */
+ struct lustre_msg *rq_repmsg;
+ /** Transaction number */
+ __u64 rq_transno;
+ /** xid */
+ __u64 rq_xid;
+ /**
+ * List item to for replay list. Not yet commited requests get linked
+ * there.
+ * Also see \a rq_replay comment above.
+ */
+ struct list_head rq_replay_list;
+
+ /**
+ * security and encryption data
+ * @{ */
+ struct ptlrpc_cli_ctx *rq_cli_ctx; /**< client's half ctx */
+ struct ptlrpc_svc_ctx *rq_svc_ctx; /**< server's half ctx */
+ struct list_head rq_ctx_chain; /**< link to waited ctx */
+
+ struct sptlrpc_flavor rq_flvr; /**< for client & server */
+ enum lustre_sec_part rq_sp_from;
+
+ /* client/server security flags */
+ unsigned int
+ rq_ctx_init:1, /* context initiation */
+ rq_ctx_fini:1, /* context destroy */
+ rq_bulk_read:1, /* request bulk read */
+ rq_bulk_write:1, /* request bulk write */
+ /* server authentication flags */
+ rq_auth_gss:1, /* authenticated by gss */
+ rq_auth_remote:1, /* authed as remote user */
+ rq_auth_usr_root:1, /* authed as root */
+ rq_auth_usr_mdt:1, /* authed as mdt */
+ rq_auth_usr_ost:1, /* authed as ost */
+ /* security tfm flags */
+ rq_pack_udesc:1,
+ rq_pack_bulk:1,
+ /* doesn't expect reply FIXME */
+ rq_no_reply:1,
+ rq_pill_init:1; /* pill initialized */
+
+ uid_t rq_auth_uid; /* authed uid */
+ uid_t rq_auth_mapped_uid; /* authed uid mapped to */
+
+ /* (server side), pointed directly into req buffer */
+ struct ptlrpc_user_desc *rq_user_desc;
+
+ /* various buffer pointers */
+ struct lustre_msg *rq_reqbuf; /* req wrapper */
+ char *rq_repbuf; /* rep buffer */
+ struct lustre_msg *rq_repdata; /* rep wrapper msg */
+ struct lustre_msg *rq_clrbuf; /* only in priv mode */
+ int rq_reqbuf_len; /* req wrapper buf len */
+ int rq_reqdata_len; /* req wrapper msg len */
+ int rq_repbuf_len; /* rep buffer len */
+ int rq_repdata_len; /* rep wrapper msg len */
+ int rq_clrbuf_len; /* only in priv mode */
+ int rq_clrdata_len; /* only in priv mode */
+
+ /** early replies go to offset 0, regular replies go after that */
+ unsigned int rq_reply_off;
+
+ /** @} */
+
+ /** Fields that help to see if request and reply were swabbed or not */
+ __u32 rq_req_swab_mask;
+ __u32 rq_rep_swab_mask;
+
+ /** What was import generation when this request was sent */
+ int rq_import_generation;
+ enum lustre_imp_state rq_send_state;
+
+ /** how many early replies (for stats) */
+ int rq_early_count;
+
+ /** client+server request */
+ lnet_handle_md_t rq_req_md_h;
+ struct ptlrpc_cb_id rq_req_cbid;
+ /** optional time limit for send attempts */
+ cfs_duration_t rq_delay_limit;
+ /** time request was first queued */
+ cfs_time_t rq_queued_time;
+
+ /* server-side... */
+ /** request arrival time */
+ struct timeval rq_arrival_time;
+ /** separated reply state */
+ struct ptlrpc_reply_state *rq_reply_state;
+ /** incoming request buffer */
+ struct ptlrpc_request_buffer_desc *rq_rqbd;
+
+ /** client-only incoming reply */
+ lnet_handle_md_t rq_reply_md_h;
+ wait_queue_head_t rq_reply_waitq;
+ struct ptlrpc_cb_id rq_reply_cbid;
+
+ /** our LNet NID */
+ lnet_nid_t rq_self;
+ /** Peer description (the other side) */
+ lnet_process_id_t rq_peer;
+ /** Server-side, export on which request was received */
+ struct obd_export *rq_export;
+ /** Client side, import where request is being sent */
+ struct obd_import *rq_import;
+
+ /** Replay callback, called after request is replayed at recovery */
+ void (*rq_replay_cb)(struct ptlrpc_request *);
+ /**
+ * Commit callback, called when request is committed and about to be
+ * freed.
+ */
+ void (*rq_commit_cb)(struct ptlrpc_request *);
+ /** Opaq data for replay and commit callbacks. */
+ void *rq_cb_data;
+
+ /** For bulk requests on client only: bulk descriptor */
+ struct ptlrpc_bulk_desc *rq_bulk;
+
+ /** client outgoing req */
+ /**
+ * when request/reply sent (secs), or time when request should be sent
+ */
+ time_t rq_sent;
+ /** time for request really sent out */
+ time_t rq_real_sent;
+
+ /** when request must finish. volatile
+ * so that servers' early reply updates to the deadline aren't
+ * kept in per-cpu cache */
+ volatile time_t rq_deadline;
+ /** when req reply unlink must finish. */
+ time_t rq_reply_deadline;
+ /** when req bulk unlink must finish. */
+ time_t rq_bulk_deadline;
+ /**
+ * service time estimate (secs)
+ * If the requestsis not served by this time, it is marked as timed out.
+ */
+ int rq_timeout;
+
+ /** Multi-rpc bits */
+ /** Per-request waitq introduced by bug 21938 for recovery waiting */
+ wait_queue_head_t rq_set_waitq;
+ /** Link item for request set lists */
+ struct list_head rq_set_chain;
+ /** Link back to the request set */
+ struct ptlrpc_request_set *rq_set;
+ /** Async completion handler, called when reply is received */
+ ptlrpc_interpterer_t rq_interpret_reply;
+ /** Async completion context */
+ union ptlrpc_async_args rq_async_args;
+
+ /** Pool if request is from preallocated list */
+ struct ptlrpc_request_pool *rq_pool;
+
+ struct lu_context rq_session;
+ struct lu_context rq_recov_session;
+
+ /** request format description */
+ struct req_capsule rq_pill;
+};
+
+/**
+ * Call completion handler for rpc if any, return it's status or original
+ * rc if there was no handler defined for this request.
+ */
+static inline int ptlrpc_req_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req, int rc)
+{
+ if (req->rq_interpret_reply != NULL) {
+ req->rq_status = req->rq_interpret_reply(env, req,
+ &req->rq_async_args,
+ rc);
+ return req->rq_status;
+ }
+ return rc;
+}
+
+/** \addtogroup nrs
+ * @{
+ */
+int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf);
+int ptlrpc_nrs_policy_unregister(struct ptlrpc_nrs_pol_conf *conf);
+void ptlrpc_nrs_req_hp_move(struct ptlrpc_request *req);
+void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_pol_info *info);
+
+/*
+ * Can the request be moved from the regular NRS head to the high-priority NRS
+ * head (of the same PTLRPC service partition), if any?
+ *
+ * For a reliable result, this should be checked under svcpt->scp_req lock.
+ */
+static inline bool ptlrpc_nrs_req_can_move(struct ptlrpc_request *req)
+{
+ struct ptlrpc_nrs_request *nrq = &req->rq_nrq;
+
+ /**
+ * LU-898: Check ptlrpc_nrs_request::nr_enqueued to make sure the
+ * request has been enqueued first, and ptlrpc_nrs_request::nr_started
+ * to make sure it has not been scheduled yet (analogous to previous
+ * (non-NRS) checking of !list_empty(&ptlrpc_request::rq_list).
+ */
+ return nrq->nr_enqueued && !nrq->nr_started && !req->rq_hp;
+}
+/** @} nrs */
+
+/**
+ * Returns 1 if request buffer at offset \a index was already swabbed
+ */
+static inline int lustre_req_swabbed(struct ptlrpc_request *req, int index)
+{
+ LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
+ return req->rq_req_swab_mask & (1 << index);
+}
+
+/**
+ * Returns 1 if request reply buffer at offset \a index was already swabbed
+ */
+static inline int lustre_rep_swabbed(struct ptlrpc_request *req, int index)
+{
+ LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8);
+ return req->rq_rep_swab_mask & (1 << index);
+}
+
+/**
+ * Returns 1 if request needs to be swabbed into local cpu byteorder
+ */
+static inline int ptlrpc_req_need_swab(struct ptlrpc_request *req)
+{
+ return lustre_req_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+}
+
+/**
+ * Returns 1 if request reply needs to be swabbed into local cpu byteorder
+ */
+static inline int ptlrpc_rep_need_swab(struct ptlrpc_request *req)
+{
+ return lustre_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+}
+
+/**
+ * Mark request buffer at offset \a index that it was already swabbed
+ */
+static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index)
+{
+ LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
+ LASSERT((req->rq_req_swab_mask & (1 << index)) == 0);
+ req->rq_req_swab_mask |= 1 << index;
+}
+
+/**
+ * Mark request reply buffer at offset \a index that it was already swabbed
+ */
+static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req, int index)
+{
+ LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8);
+ LASSERT((req->rq_rep_swab_mask & (1 << index)) == 0);
+ req->rq_rep_swab_mask |= 1 << index;
+}
+
+/**
+ * Convert numerical request phase value \a phase into text string description
+ */
+static inline const char *
+ptlrpc_phase2str(enum rq_phase phase)
+{
+ switch (phase) {
+ case RQ_PHASE_NEW:
+ return "New";
+ case RQ_PHASE_RPC:
+ return "Rpc";
+ case RQ_PHASE_BULK:
+ return "Bulk";
+ case RQ_PHASE_INTERPRET:
+ return "Interpret";
+ case RQ_PHASE_COMPLETE:
+ return "Complete";
+ case RQ_PHASE_UNREGISTERING:
+ return "Unregistering";
+ default:
+ return "?Phase?";
+ }
+}
+
+/**
+ * Convert numerical request phase of the request \a req into text stringi
+ * description
+ */
+static inline const char *
+ptlrpc_rqphase2str(struct ptlrpc_request *req)
+{
+ return ptlrpc_phase2str(req->rq_phase);
+}
+
+/**
+ * Debugging functions and helpers to print request structure into debug log
+ * @{
+ */
+/* Spare the preprocessor, spoil the bugs. */
+#define FLAG(field, str) (field ? str : "")
+
+/** Convert bit flags into a string */
+#define DEBUG_REQ_FLAGS(req) \
+ ptlrpc_rqphase2str(req), \
+ FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \
+ FLAG(req->rq_err, "E"), \
+ FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \
+ FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \
+ FLAG(req->rq_no_resend, "N"), \
+ FLAG(req->rq_waiting, "W"), \
+ FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"), \
+ FLAG(req->rq_committed, "M")
+
+#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s"
+
+void _debug_req(struct ptlrpc_request *req,
+ struct libcfs_debug_msg_data *data, const char *fmt, ...)
+ __attribute__ ((format (printf, 3, 4)));
+
+/**
+ * Helper that decides if we need to print request accordig to current debug
+ * level settings
+ */
+#define debug_req(msgdata, mask, cdls, req, fmt, a...) \
+do { \
+ CFS_CHECK_STACK(msgdata, mask, cdls); \
+ \
+ if (((mask) & D_CANTMASK) != 0 || \
+ ((libcfs_debug & (mask)) != 0 && \
+ (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \
+ _debug_req((req), msgdata, fmt, ##a); \
+} while(0)
+
+/**
+ * This is the debug print function you need to use to print request sturucture
+ * content into lustre debug log.
+ * for most callers (level is a constant) this is resolved at compile time */
+#define DEBUG_REQ(level, req, fmt, args...) \
+do { \
+ if ((level) & (D_ERROR | D_WARNING)) { \
+ static cfs_debug_limit_state_t cdls; \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
+ debug_req(&msgdata, level, &cdls, req, "@@@ "fmt" ", ## args);\
+ } else { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, NULL); \
+ debug_req(&msgdata, level, NULL, req, "@@@ "fmt" ", ## args); \
+ } \
+} while (0)
+/** @} */
+
+/**
+ * Structure that defines a single page of a bulk transfer
+ */
+struct ptlrpc_bulk_page {
+ /** Linkage to list of pages in a bulk */
+ struct list_head bp_link;
+ /**
+ * Number of bytes in a page to transfer starting from \a bp_pageoffset
+ */
+ int bp_buflen;
+ /** offset within a page */
+ int bp_pageoffset;
+ /** The page itself */
+ struct page *bp_page;
+};
+
+#define BULK_GET_SOURCE 0
+#define BULK_PUT_SINK 1
+#define BULK_GET_SINK 2
+#define BULK_PUT_SOURCE 3
+
+/**
+ * Definition of bulk descriptor.
+ * Bulks are special "Two phase" RPCs where initial request message
+ * is sent first and it is followed bt a transfer (o receiving) of a large
+ * amount of data to be settled into pages referenced from the bulk descriptors.
+ * Bulks transfers (the actual data following the small requests) are done
+ * on separate LNet portals.
+ * In lustre we use bulk transfers for READ and WRITE transfers from/to OSTs.
+ * Another user is readpage for MDT.
+ */
+struct ptlrpc_bulk_desc {
+ /** completed with failure */
+ unsigned long bd_failure:1;
+ /** {put,get}{source,sink} */
+ unsigned long bd_type:2;
+ /** client side */
+ unsigned long bd_registered:1;
+ /** For serialization with callback */
+ spinlock_t bd_lock;
+ /** Import generation when request for this bulk was sent */
+ int bd_import_generation;
+ /** LNet portal for this bulk */
+ __u32 bd_portal;
+ /** Server side - export this bulk created for */
+ struct obd_export *bd_export;
+ /** Client side - import this bulk was sent on */
+ struct obd_import *bd_import;
+ /** Back pointer to the request */
+ struct ptlrpc_request *bd_req;
+ wait_queue_head_t bd_waitq; /* server side only WQ */
+ int bd_iov_count; /* # entries in bd_iov */
+ int bd_max_iov; /* allocated size of bd_iov */
+ int bd_nob; /* # bytes covered */
+ int bd_nob_transferred; /* # bytes GOT/PUT */
+
+ __u64 bd_last_xid;
+
+ struct ptlrpc_cb_id bd_cbid; /* network callback info */
+ lnet_nid_t bd_sender; /* stash event::sender */
+ int bd_md_count; /* # valid entries in bd_mds */
+ int bd_md_max_brw; /* max entries in bd_mds */
+ /** array of associated MDs */
+ lnet_handle_md_t bd_mds[PTLRPC_BULK_OPS_COUNT];
+
+ /*
+ * encrypt iov, size is either 0 or bd_iov_count.
+ */
+ lnet_kiov_t *bd_enc_iov;
+
+ lnet_kiov_t bd_iov[0];
+};
+
+enum {
+ SVC_STOPPED = 1 << 0,
+ SVC_STOPPING = 1 << 1,
+ SVC_STARTING = 1 << 2,
+ SVC_RUNNING = 1 << 3,
+ SVC_EVENT = 1 << 4,
+ SVC_SIGNAL = 1 << 5,
+};
+
+#define PTLRPC_THR_NAME_LEN 32
+/**
+ * Definition of server service thread structure
+ */
+struct ptlrpc_thread {
+ /**
+ * List of active threads in svc->srv_threads
+ */
+ struct list_head t_link;
+ /**
+ * thread-private data (preallocated memory)
+ */
+ void *t_data;
+ __u32 t_flags;
+ /**
+ * service thread index, from ptlrpc_start_threads
+ */
+ unsigned int t_id;
+ /**
+ * service thread pid
+ */
+ pid_t t_pid;
+ /**
+ * put watchdog in the structure per thread b=14840
+ */
+ struct lc_watchdog *t_watchdog;
+ /**
+ * the svc this thread belonged to b=18582
+ */
+ struct ptlrpc_service_part *t_svcpt;
+ wait_queue_head_t t_ctl_waitq;
+ struct lu_env *t_env;
+ char t_name[PTLRPC_THR_NAME_LEN];
+};
+
+static inline int thread_is_init(struct ptlrpc_thread *thread)
+{
+ return thread->t_flags == 0;
+}
+
+static inline int thread_is_stopped(struct ptlrpc_thread *thread)
+{
+ return !!(thread->t_flags & SVC_STOPPED);
+}
+
+static inline int thread_is_stopping(struct ptlrpc_thread *thread)
+{
+ return !!(thread->t_flags & SVC_STOPPING);
+}
+
+static inline int thread_is_starting(struct ptlrpc_thread *thread)
+{
+ return !!(thread->t_flags & SVC_STARTING);
+}
+
+static inline int thread_is_running(struct ptlrpc_thread *thread)
+{
+ return !!(thread->t_flags & SVC_RUNNING);
+}
+
+static inline int thread_is_event(struct ptlrpc_thread *thread)
+{
+ return !!(thread->t_flags & SVC_EVENT);
+}
+
+static inline int thread_is_signal(struct ptlrpc_thread *thread)
+{
+ return !!(thread->t_flags & SVC_SIGNAL);
+}
+
+static inline void thread_clear_flags(struct ptlrpc_thread *thread, __u32 flags)
+{
+ thread->t_flags &= ~flags;
+}
+
+static inline void thread_set_flags(struct ptlrpc_thread *thread, __u32 flags)
+{
+ thread->t_flags = flags;
+}
+
+static inline void thread_add_flags(struct ptlrpc_thread *thread, __u32 flags)
+{
+ thread->t_flags |= flags;
+}
+
+static inline int thread_test_and_clear_flags(struct ptlrpc_thread *thread,
+ __u32 flags)
+{
+ if (thread->t_flags & flags) {
+ thread->t_flags &= ~flags;
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * Request buffer descriptor structure.
+ * This is a structure that contains one posted request buffer for service.
+ * Once data land into a buffer, event callback creates actual request and
+ * notifies wakes one of the service threads to process new incoming request.
+ * More than one request can fit into the buffer.
+ */
+struct ptlrpc_request_buffer_desc {
+ /** Link item for rqbds on a service */
+ struct list_head rqbd_list;
+ /** History of requests for this buffer */
+ struct list_head rqbd_reqs;
+ /** Back pointer to service for which this buffer is registered */
+ struct ptlrpc_service_part *rqbd_svcpt;
+ /** LNet descriptor */
+ lnet_handle_md_t rqbd_md_h;
+ int rqbd_refcount;
+ /** The buffer itself */
+ char *rqbd_buffer;
+ struct ptlrpc_cb_id rqbd_cbid;
+ /**
+ * This "embedded" request structure is only used for the
+ * last request to fit into the buffer
+ */
+ struct ptlrpc_request rqbd_req;
+};
+
+typedef int (*svc_handler_t)(struct ptlrpc_request *req);
+
+struct ptlrpc_service_ops {
+ /**
+ * if non-NULL called during thread creation (ptlrpc_start_thread())
+ * to initialize service specific per-thread state.
+ */
+ int (*so_thr_init)(struct ptlrpc_thread *thr);
+ /**
+ * if non-NULL called during thread shutdown (ptlrpc_main()) to
+ * destruct state created by ->srv_init().
+ */
+ void (*so_thr_done)(struct ptlrpc_thread *thr);
+ /**
+ * Handler function for incoming requests for this service
+ */
+ int (*so_req_handler)(struct ptlrpc_request *req);
+ /**
+ * function to determine priority of the request, it's called
+ * on every new request
+ */
+ int (*so_hpreq_handler)(struct ptlrpc_request *);
+ /**
+ * service-specific print fn
+ */
+ void (*so_req_printer)(void *, struct ptlrpc_request *);
+};
+
+#ifndef __cfs_cacheline_aligned
+/* NB: put it here for reducing patche dependence */
+# define __cfs_cacheline_aligned
+#endif
+
+/**
+ * How many high priority requests to serve before serving one normal
+ * priority request
+ */
+#define PTLRPC_SVC_HP_RATIO 10
+
+/**
+ * Definition of PortalRPC service.
+ * The service is listening on a particular portal (like tcp port)
+ * and perform actions for a specific server like IO service for OST
+ * or general metadata service for MDS.
+ */
+struct ptlrpc_service {
+ /** serialize /proc operations */
+ spinlock_t srv_lock;
+ /** most often accessed fields */
+ /** chain thru all services */
+ struct list_head srv_list;
+ /** service operations table */
+ struct ptlrpc_service_ops srv_ops;
+ /** only statically allocated strings here; we don't clean them */
+ char *srv_name;
+ /** only statically allocated strings here; we don't clean them */
+ char *srv_thread_name;
+ /** service thread list */
+ struct list_head srv_threads;
+ /** threads # should be created for each partition on initializing */
+ int srv_nthrs_cpt_init;
+ /** limit of threads number for each partition */
+ int srv_nthrs_cpt_limit;
+ /** Root of /proc dir tree for this service */
+ proc_dir_entry_t *srv_procroot;
+ /** Pointer to statistic data for this service */
+ struct lprocfs_stats *srv_stats;
+ /** # hp per lp reqs to handle */
+ int srv_hpreq_ratio;
+ /** biggest request to receive */
+ int srv_max_req_size;
+ /** biggest reply to send */
+ int srv_max_reply_size;
+ /** size of individual buffers */
+ int srv_buf_size;
+ /** # buffers to allocate in 1 group */
+ int srv_nbuf_per_group;
+ /** Local portal on which to receive requests */
+ __u32 srv_req_portal;
+ /** Portal on the client to send replies to */
+ __u32 srv_rep_portal;
+ /**
+ * Tags for lu_context associated with this thread, see struct
+ * lu_context.
+ */
+ __u32 srv_ctx_tags;
+ /** soft watchdog timeout multiplier */
+ int srv_watchdog_factor;
+ /** under unregister_service */
+ unsigned srv_is_stopping:1;
+
+ /** max # request buffers in history per partition */
+ int srv_hist_nrqbds_cpt_max;
+ /** number of CPTs this service bound on */
+ int srv_ncpts;
+ /** CPTs array this service bound on */
+ __u32 *srv_cpts;
+ /** 2^srv_cptab_bits >= cfs_cpt_numbert(srv_cptable) */
+ int srv_cpt_bits;
+ /** CPT table this service is running over */
+ struct cfs_cpt_table *srv_cptable;
+ /**
+ * partition data for ptlrpc service
+ */
+ struct ptlrpc_service_part *srv_parts[0];
+};
+
+/**
+ * Definition of PortalRPC service partition data.
+ * Although a service only has one instance of it right now, but we
+ * will have multiple instances very soon (instance per CPT).
+ *
+ * it has four locks:
+ * \a scp_lock
+ * serialize operations on rqbd and requests waiting for preprocess
+ * \a scp_req_lock
+ * serialize operations active requests sent to this portal
+ * \a scp_at_lock
+ * serialize adaptive timeout stuff
+ * \a scp_rep_lock
+ * serialize operations on RS list (reply states)
+ *
+ * We don't have any use-case to take two or more locks at the same time
+ * for now, so there is no lock order issue.
+ */
+struct ptlrpc_service_part {
+ /** back reference to owner */
+ struct ptlrpc_service *scp_service __cfs_cacheline_aligned;
+ /* CPT id, reserved */
+ int scp_cpt;
+ /** always increasing number */
+ int scp_thr_nextid;
+ /** # of starting threads */
+ int scp_nthrs_starting;
+ /** # of stopping threads, reserved for shrinking threads */
+ int scp_nthrs_stopping;
+ /** # running threads */
+ int scp_nthrs_running;
+ /** service threads list */
+ struct list_head scp_threads;
+
+ /**
+ * serialize the following fields, used for protecting
+ * rqbd list and incoming requests waiting for preprocess,
+ * threads starting & stopping are also protected by this lock.
+ */
+ spinlock_t scp_lock __cfs_cacheline_aligned;
+ /** total # req buffer descs allocated */
+ int scp_nrqbds_total;
+ /** # posted request buffers for receiving */
+ int scp_nrqbds_posted;
+ /** in progress of allocating rqbd */
+ int scp_rqbd_allocating;
+ /** # incoming reqs */
+ int scp_nreqs_incoming;
+ /** request buffers to be reposted */
+ struct list_head scp_rqbd_idle;
+ /** req buffers receiving */
+ struct list_head scp_rqbd_posted;
+ /** incoming reqs */
+ struct list_head scp_req_incoming;
+ /** timeout before re-posting reqs, in tick */
+ cfs_duration_t scp_rqbd_timeout;
+ /**
+ * all threads sleep on this. This wait-queue is signalled when new
+ * incoming request arrives and when difficult reply has to be handled.
+ */
+ wait_queue_head_t scp_waitq;
+
+ /** request history */
+ struct list_head scp_hist_reqs;
+ /** request buffer history */
+ struct list_head scp_hist_rqbds;
+ /** # request buffers in history */
+ int scp_hist_nrqbds;
+ /** sequence number for request */
+ __u64 scp_hist_seq;
+ /** highest seq culled from history */
+ __u64 scp_hist_seq_culled;
+
+ /**
+ * serialize the following fields, used for processing requests
+ * sent to this portal
+ */
+ spinlock_t scp_req_lock __cfs_cacheline_aligned;
+ /** # reqs in either of the NRS heads below */
+ /** # reqs being served */
+ int scp_nreqs_active;
+ /** # HPreqs being served */
+ int scp_nhreqs_active;
+ /** # hp requests handled */
+ int scp_hreq_count;
+
+ /** NRS head for regular requests */
+ struct ptlrpc_nrs scp_nrs_reg;
+ /** NRS head for HP requests; this is only valid for services that can
+ * handle HP requests */
+ struct ptlrpc_nrs *scp_nrs_hp;
+
+ /** AT stuff */
+ /** @{ */
+ /**
+ * serialize the following fields, used for changes on
+ * adaptive timeout
+ */
+ spinlock_t scp_at_lock __cfs_cacheline_aligned;
+ /** estimated rpc service time */
+ struct adaptive_timeout scp_at_estimate;
+ /** reqs waiting for replies */
+ struct ptlrpc_at_array scp_at_array;
+ /** early reply timer */
+ timer_list_t scp_at_timer;
+ /** debug */
+ cfs_time_t scp_at_checktime;
+ /** check early replies */
+ unsigned scp_at_check;
+ /** @} */
+
+ /**
+ * serialize the following fields, used for processing
+ * replies for this portal
+ */
+ spinlock_t scp_rep_lock __cfs_cacheline_aligned;
+ /** all the active replies */
+ struct list_head scp_rep_active;
+ /** List of free reply_states */
+ struct list_head scp_rep_idle;
+ /** waitq to run, when adding stuff to srv_free_rs_list */
+ wait_queue_head_t scp_rep_waitq;
+ /** # 'difficult' replies */
+ atomic_t scp_nreps_difficult;
+};
+
+#define ptlrpc_service_for_each_part(part, i, svc) \
+ for (i = 0; \
+ i < (svc)->srv_ncpts && \
+ (svc)->srv_parts != NULL && \
+ ((part) = (svc)->srv_parts[i]) != NULL; i++)
+
+/**
+ * Declaration of ptlrpcd control structure
+ */
+struct ptlrpcd_ctl {
+ /**
+ * Ptlrpc thread control flags (LIOD_START, LIOD_STOP, LIOD_FORCE)
+ */
+ unsigned long pc_flags;
+ /**
+ * Thread lock protecting structure fields.
+ */
+ spinlock_t pc_lock;
+ /**
+ * Start completion.
+ */
+ struct completion pc_starting;
+ /**
+ * Stop completion.
+ */
+ struct completion pc_finishing;
+ /**
+ * Thread requests set.
+ */
+ struct ptlrpc_request_set *pc_set;
+ /**
+ * Thread name used in cfs_daemonize()
+ */
+ char pc_name[16];
+ /**
+ * Environment for request interpreters to run in.
+ */
+ struct lu_env pc_env;
+ /**
+ * Index of ptlrpcd thread in the array.
+ */
+ int pc_index;
+ /**
+ * Number of the ptlrpcd's partners.
+ */
+ int pc_npartners;
+ /**
+ * Pointer to the array of partners' ptlrpcd_ctl structure.
+ */
+ struct ptlrpcd_ctl **pc_partners;
+ /**
+ * Record the partner index to be processed next.
+ */
+ int pc_cursor;
+};
+
+/* Bits for pc_flags */
+enum ptlrpcd_ctl_flags {
+ /**
+ * Ptlrpc thread start flag.
+ */
+ LIOD_START = 1 << 0,
+ /**
+ * Ptlrpc thread stop flag.
+ */
+ LIOD_STOP = 1 << 1,
+ /**
+ * Ptlrpc thread force flag (only stop force so far).
+ * This will cause aborting any inflight rpcs handled
+ * by thread if LIOD_STOP is specified.
+ */
+ LIOD_FORCE = 1 << 2,
+ /**
+ * This is a recovery ptlrpc thread.
+ */
+ LIOD_RECOVERY = 1 << 3,
+ /**
+ * The ptlrpcd is bound to some CPU core.
+ */
+ LIOD_BIND = 1 << 4,
+};
+
+/**
+ * \addtogroup nrs
+ * @{
+ *
+ * Service compatibility function; the policy is compatible with all services.
+ *
+ * \param[in] svc The service the policy is attempting to register with.
+ * \param[in] desc The policy descriptor
+ *
+ * \retval true The policy is compatible with the service
+ *
+ * \see ptlrpc_nrs_pol_desc::pd_compat()
+ */
+static inline bool nrs_policy_compat_all(const struct ptlrpc_service *svc,
+ const struct ptlrpc_nrs_pol_desc *desc)
+{
+ return true;
+}
+
+/**
+ * Service compatibility function; the policy is compatible with only a specific
+ * service which is identified by its human-readable name at
+ * ptlrpc_service::srv_name.
+ *
+ * \param[in] svc The service the policy is attempting to register with.
+ * \param[in] desc The policy descriptor
+ *
+ * \retval false The policy is not compatible with the service
+ * \retval true The policy is compatible with the service
+ *
+ * \see ptlrpc_nrs_pol_desc::pd_compat()
+ */
+static inline bool nrs_policy_compat_one(const struct ptlrpc_service *svc,
+ const struct ptlrpc_nrs_pol_desc *desc)
+{
+ LASSERT(desc->pd_compat_svc_name != NULL);
+ return strcmp(svc->srv_name, desc->pd_compat_svc_name) == 0;
+}
+
+/** @} nrs */
+
+/* ptlrpc/events.c */
+extern lnet_handle_eq_t ptlrpc_eq_h;
+extern int ptlrpc_uuid_to_peer(struct obd_uuid *uuid,
+ lnet_process_id_t *peer, lnet_nid_t *self);
+/**
+ * These callbacks are invoked by LNet when something happened to
+ * underlying buffer
+ * @{
+ */
+extern void request_out_callback(lnet_event_t *ev);
+extern void reply_in_callback(lnet_event_t *ev);
+extern void client_bulk_callback(lnet_event_t *ev);
+extern void request_in_callback(lnet_event_t *ev);
+extern void reply_out_callback(lnet_event_t *ev);
+/** @} */
+
+/* ptlrpc/connection.c */
+struct ptlrpc_connection *ptlrpc_connection_get(lnet_process_id_t peer,
+ lnet_nid_t self,
+ struct obd_uuid *uuid);
+int ptlrpc_connection_put(struct ptlrpc_connection *c);
+struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *);
+int ptlrpc_connection_init(void);
+void ptlrpc_connection_fini(void);
+extern lnet_pid_t ptl_get_pid(void);
+
+/* ptlrpc/niobuf.c */
+/**
+ * Actual interfacing with LNet to put/get/register/unregister stuff
+ * @{
+ */
+
+int ptlrpc_register_bulk(struct ptlrpc_request *req);
+int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async);
+
+static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req)
+{
+ struct ptlrpc_bulk_desc *desc;
+ int rc;
+
+ LASSERT(req != NULL);
+ desc = req->rq_bulk;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
+ req->rq_bulk_deadline > cfs_time_current_sec())
+ return 1;
+
+ if (!desc)
+ return 0;
+
+ spin_lock(&desc->bd_lock);
+ rc = desc->bd_md_count;
+ spin_unlock(&desc->bd_lock);
+ return rc;
+}
+
+#define PTLRPC_REPLY_MAYBE_DIFFICULT 0x01
+#define PTLRPC_REPLY_EARLY 0x02
+int ptlrpc_send_reply(struct ptlrpc_request *req, int flags);
+int ptlrpc_reply(struct ptlrpc_request *req);
+int ptlrpc_send_error(struct ptlrpc_request *req, int difficult);
+int ptlrpc_error(struct ptlrpc_request *req);
+void ptlrpc_resend_req(struct ptlrpc_request *request);
+int ptlrpc_at_get_net_latency(struct ptlrpc_request *req);
+int ptl_send_rpc(struct ptlrpc_request *request, int noreply);
+int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd);
+/** @} */
+
+/* ptlrpc/client.c */
+/**
+ * Client-side portals API. Everything to send requests, receive replies,
+ * request queues, request management, etc.
+ * @{
+ */
+void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
+ struct ptlrpc_client *);
+void ptlrpc_cleanup_client(struct obd_import *imp);
+struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid);
+
+int ptlrpc_queue_wait(struct ptlrpc_request *req);
+int ptlrpc_replay_req(struct ptlrpc_request *req);
+int ptlrpc_unregister_reply(struct ptlrpc_request *req, int async);
+void ptlrpc_restart_req(struct ptlrpc_request *req);
+void ptlrpc_abort_inflight(struct obd_import *imp);
+void ptlrpc_cleanup_imp(struct obd_import *imp);
+void ptlrpc_abort_set(struct ptlrpc_request_set *set);
+
+struct ptlrpc_request_set *ptlrpc_prep_set(void);
+struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func,
+ void *arg);
+int ptlrpc_set_add_cb(struct ptlrpc_request_set *set,
+ set_interpreter_func fn, void *data);
+int ptlrpc_set_next_timeout(struct ptlrpc_request_set *);
+int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set);
+int ptlrpc_set_wait(struct ptlrpc_request_set *);
+int ptlrpc_expired_set(void *data);
+void ptlrpc_interrupted_set(void *data);
+void ptlrpc_mark_interrupted(struct ptlrpc_request *req);
+void ptlrpc_set_destroy(struct ptlrpc_request_set *);
+void ptlrpc_set_add_req(struct ptlrpc_request_set *, struct ptlrpc_request *);
+void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
+ struct ptlrpc_request *req);
+
+void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool);
+void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq);
+
+struct ptlrpc_request_pool *
+ptlrpc_init_rq_pool(int, int,
+ void (*populate_pool)(struct ptlrpc_request_pool *, int));
+
+void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req);
+struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp,
+ const struct req_format *format);
+struct ptlrpc_request *ptlrpc_request_alloc_pool(struct obd_import *imp,
+ struct ptlrpc_request_pool *,
+ const struct req_format *format);
+void ptlrpc_request_free(struct ptlrpc_request *request);
+int ptlrpc_request_pack(struct ptlrpc_request *request,
+ __u32 version, int opcode);
+struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *imp,
+ const struct req_format *format,
+ __u32 version, int opcode);
+int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
+ __u32 version, int opcode, char **bufs,
+ struct ptlrpc_cli_ctx *ctx);
+struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, __u32 version,
+ int opcode, int count, __u32 *lengths,
+ char **bufs);
+struct ptlrpc_request *ptlrpc_prep_req_pool(struct obd_import *imp,
+ __u32 version, int opcode,
+ int count, __u32 *lengths, char **bufs,
+ struct ptlrpc_request_pool *pool);
+void ptlrpc_req_finished(struct ptlrpc_request *request);
+void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request);
+struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req);
+struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
+ unsigned npages, unsigned max_brw,
+ unsigned type, unsigned portal);
+void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk, int pin);
+static inline void ptlrpc_free_bulk_pin(struct ptlrpc_bulk_desc *bulk)
+{
+ __ptlrpc_free_bulk(bulk, 1);
+}
+static inline void ptlrpc_free_bulk_nopin(struct ptlrpc_bulk_desc *bulk)
+{
+ __ptlrpc_free_bulk(bulk, 0);
+}
+void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
+ struct page *page, int pageoffset, int len, int);
+static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc,
+ struct page *page, int pageoffset,
+ int len)
+{
+ __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 1);
+}
+
+static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc,
+ struct page *page, int pageoffset,
+ int len)
+{
+ __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0);
+}
+
+void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
+ struct obd_import *imp);
+__u64 ptlrpc_next_xid(void);
+__u64 ptlrpc_sample_next_xid(void);
+__u64 ptlrpc_req_xid(struct ptlrpc_request *request);
+
+/* Set of routines to run a function in ptlrpcd context */
+void *ptlrpcd_alloc_work(struct obd_import *imp,
+ int (*cb)(const struct lu_env *, void *), void *data);
+void ptlrpcd_destroy_work(void *handler);
+int ptlrpcd_queue_work(void *handler);
+
+/** @} */
+struct ptlrpc_service_buf_conf {
+ /* nbufs is buffers # to allocate when growing the pool */
+ unsigned int bc_nbufs;
+ /* buffer size to post */
+ unsigned int bc_buf_size;
+ /* portal to listed for requests on */
+ unsigned int bc_req_portal;
+ /* portal of where to send replies to */
+ unsigned int bc_rep_portal;
+ /* maximum request size to be accepted for this service */
+ unsigned int bc_req_max_size;
+ /* maximum reply size this service can ever send */
+ unsigned int bc_rep_max_size;
+};
+
+struct ptlrpc_service_thr_conf {
+ /* threadname should be 8 characters or less - 6 will be added on */
+ char *tc_thr_name;
+ /* threads increasing factor for each CPU */
+ unsigned int tc_thr_factor;
+ /* service threads # to start on each partition while initializing */
+ unsigned int tc_nthrs_init;
+ /*
+ * low water of threads # upper-limit on each partition while running,
+ * service availability may be impacted if threads number is lower
+ * than this value. It can be ZERO if the service doesn't require
+ * CPU affinity or there is only one partition.
+ */
+ unsigned int tc_nthrs_base;
+ /* "soft" limit for total threads number */
+ unsigned int tc_nthrs_max;
+ /* user specified threads number, it will be validated due to
+ * other members of this structure. */
+ unsigned int tc_nthrs_user;
+ /* set NUMA node affinity for service threads */
+ unsigned int tc_cpu_affinity;
+ /* Tags for lu_context associated with service thread */
+ __u32 tc_ctx_tags;
+};
+
+struct ptlrpc_service_cpt_conf {
+ struct cfs_cpt_table *cc_cptable;
+ /* string pattern to describe CPTs for a service */
+ char *cc_pattern;
+};
+
+struct ptlrpc_service_conf {
+ /* service name */
+ char *psc_name;
+ /* soft watchdog timeout multiplifier to print stuck service traces */
+ unsigned int psc_watchdog_factor;
+ /* buffer information */
+ struct ptlrpc_service_buf_conf psc_buf;
+ /* thread information */
+ struct ptlrpc_service_thr_conf psc_thr;
+ /* CPU partition information */
+ struct ptlrpc_service_cpt_conf psc_cpt;
+ /* function table */
+ struct ptlrpc_service_ops psc_ops;
+};
+
+/* ptlrpc/service.c */
+/**
+ * Server-side services API. Register/unregister service, request state
+ * management, service thread management
+ *
+ * @{
+ */
+void ptlrpc_save_lock(struct ptlrpc_request *req,
+ struct lustre_handle *lock, int mode, int no_ack);
+void ptlrpc_commit_replies(struct obd_export *exp);
+void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs);
+void ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs);
+int ptlrpc_hpreq_handler(struct ptlrpc_request *req);
+struct ptlrpc_service *ptlrpc_register_service(
+ struct ptlrpc_service_conf *conf,
+ struct proc_dir_entry *proc_entry);
+void ptlrpc_stop_all_threads(struct ptlrpc_service *svc);
+
+int ptlrpc_start_threads(struct ptlrpc_service *svc);
+int ptlrpc_unregister_service(struct ptlrpc_service *service);
+int liblustre_check_services(void *arg);
+void ptlrpc_daemonize(char *name);
+int ptlrpc_service_health_check(struct ptlrpc_service *);
+void ptlrpc_server_drop_request(struct ptlrpc_request *req);
+void ptlrpc_request_change_export(struct ptlrpc_request *req,
+ struct obd_export *export);
+
+int ptlrpc_hr_init(void);
+void ptlrpc_hr_fini(void);
+
+/** @} */
+
+/* ptlrpc/import.c */
+/**
+ * Import API
+ * @{
+ */
+int ptlrpc_connect_import(struct obd_import *imp);
+int ptlrpc_init_import(struct obd_import *imp);
+int ptlrpc_disconnect_import(struct obd_import *imp, int noclose);
+int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
+void deuuidify(char *uuid, const char *prefix, char **uuid_start,
+ int *uuid_len);
+
+/* ptlrpc/pack_generic.c */
+int ptlrpc_reconnect_import(struct obd_import *imp);
+/** @} */
+
+/**
+ * ptlrpc msg buffer and swab interface
+ *
+ * @{
+ */
+int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
+ int index);
+void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout,
+ int index);
+int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len);
+int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len);
+
+int lustre_msg_check_version(struct lustre_msg *msg, __u32 version);
+void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens,
+ char **bufs);
+int lustre_pack_request(struct ptlrpc_request *, __u32 magic, int count,
+ __u32 *lens, char **bufs);
+int lustre_pack_reply(struct ptlrpc_request *, int count, __u32 *lens,
+ char **bufs);
+int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
+ __u32 *lens, char **bufs, int flags);
+#define LPRFL_EARLY_REPLY 1
+int lustre_pack_reply_flags(struct ptlrpc_request *, int count, __u32 *lens,
+ char **bufs, int flags);
+int lustre_shrink_msg(struct lustre_msg *msg, int segment,
+ unsigned int newlen, int move_data);
+void lustre_free_reply_state(struct ptlrpc_reply_state *rs);
+int __lustre_unpack_msg(struct lustre_msg *m, int len);
+int lustre_msg_hdr_size(__u32 magic, int count);
+int lustre_msg_size(__u32 magic, int count, __u32 *lengths);
+int lustre_msg_size_v2(int count, __u32 *lengths);
+int lustre_packed_msg_size(struct lustre_msg *msg);
+int lustre_msg_early_size(void);
+void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size);
+void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen);
+int lustre_msg_buflen(struct lustre_msg *m, int n);
+void lustre_msg_set_buflen(struct lustre_msg *m, int n, int len);
+int lustre_msg_bufcount(struct lustre_msg *m);
+char *lustre_msg_string(struct lustre_msg *m, int n, int max_len);
+__u32 lustre_msghdr_get_flags(struct lustre_msg *msg);
+void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags);
+__u32 lustre_msg_get_flags(struct lustre_msg *msg);
+void lustre_msg_add_flags(struct lustre_msg *msg, int flags);
+void lustre_msg_set_flags(struct lustre_msg *msg, int flags);
+void lustre_msg_clear_flags(struct lustre_msg *msg, int flags);
+__u32 lustre_msg_get_op_flags(struct lustre_msg *msg);
+void lustre_msg_add_op_flags(struct lustre_msg *msg, int flags);
+void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags);
+struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg);
+__u32 lustre_msg_get_type(struct lustre_msg *msg);
+__u32 lustre_msg_get_version(struct lustre_msg *msg);
+void lustre_msg_add_version(struct lustre_msg *msg, int version);
+__u32 lustre_msg_get_opc(struct lustre_msg *msg);
+__u64 lustre_msg_get_last_xid(struct lustre_msg *msg);
+__u64 lustre_msg_get_last_committed(struct lustre_msg *msg);
+__u64 *lustre_msg_get_versions(struct lustre_msg *msg);
+__u64 lustre_msg_get_transno(struct lustre_msg *msg);
+__u64 lustre_msg_get_slv(struct lustre_msg *msg);
+__u32 lustre_msg_get_limit(struct lustre_msg *msg);
+void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv);
+void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit);
+int lustre_msg_get_status(struct lustre_msg *msg);
+__u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg);
+int lustre_msg_is_v1(struct lustre_msg *msg);
+__u32 lustre_msg_get_magic(struct lustre_msg *msg);
+__u32 lustre_msg_get_timeout(struct lustre_msg *msg);
+__u32 lustre_msg_get_service_time(struct lustre_msg *msg);
+char *lustre_msg_get_jobid(struct lustre_msg *msg);
+__u32 lustre_msg_get_cksum(struct lustre_msg *msg);
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0)
+__u32 lustre_msg_calc_cksum(struct lustre_msg *msg, int compat18);
+#else
+# warning "remove checksum compatibility support for b1_8"
+__u32 lustre_msg_calc_cksum(struct lustre_msg *msg);
+#endif
+void lustre_msg_set_handle(struct lustre_msg *msg,struct lustre_handle *handle);
+void lustre_msg_set_type(struct lustre_msg *msg, __u32 type);
+void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc);
+void lustre_msg_set_last_xid(struct lustre_msg *msg, __u64 last_xid);
+void lustre_msg_set_last_committed(struct lustre_msg *msg,__u64 last_committed);
+void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions);
+void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno);
+void lustre_msg_set_status(struct lustre_msg *msg, __u32 status);
+void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt);
+void ptlrpc_req_set_repsize(struct ptlrpc_request *req, int count, __u32 *sizes);
+void ptlrpc_request_set_replen(struct ptlrpc_request *req);
+void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout);
+void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time);
+void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid);
+void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum);
+
+static inline void
+lustre_shrink_reply(struct ptlrpc_request *req, int segment,
+ unsigned int newlen, int move_data)
+{
+ LASSERT(req->rq_reply_state);
+ LASSERT(req->rq_repmsg);
+ req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment,
+ newlen, move_data);
+}
+/** @} */
+
+/** Change request phase of \a req to \a new_phase */
+static inline void
+ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
+{
+ if (req->rq_phase == new_phase)
+ return;
+
+ if (new_phase == RQ_PHASE_UNREGISTERING) {
+ req->rq_next_phase = req->rq_phase;
+ if (req->rq_import)
+ atomic_inc(&req->rq_import->imp_unregistering);
+ }
+
+ if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+ if (req->rq_import)
+ atomic_dec(&req->rq_import->imp_unregistering);
+ }
+
+ DEBUG_REQ(D_INFO, req, "move req \"%s\" -> \"%s\"",
+ ptlrpc_rqphase2str(req), ptlrpc_phase2str(new_phase));
+
+ req->rq_phase = new_phase;
+}
+
+/**
+ * Returns true if request \a req got early reply and hard deadline is not met
+ */
+static inline int
+ptlrpc_client_early(struct ptlrpc_request *req)
+{
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+ req->rq_reply_deadline > cfs_time_current_sec())
+ return 0;
+ return req->rq_early;
+}
+
+/**
+ * Returns true if we got real reply from server for this request
+ */
+static inline int
+ptlrpc_client_replied(struct ptlrpc_request *req)
+{
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+ req->rq_reply_deadline > cfs_time_current_sec())
+ return 0;
+ return req->rq_replied;
+}
+
+/** Returns true if request \a req is in process of receiving server reply */
+static inline int
+ptlrpc_client_recv(struct ptlrpc_request *req)
+{
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+ req->rq_reply_deadline > cfs_time_current_sec())
+ return 1;
+ return req->rq_receiving_reply;
+}
+
+static inline int
+ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req)
+{
+ int rc;
+
+ spin_lock(&req->rq_lock);
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+ req->rq_reply_deadline > cfs_time_current_sec()) {
+ spin_unlock(&req->rq_lock);
+ return 1;
+ }
+ rc = req->rq_receiving_reply || req->rq_must_unlink;
+ spin_unlock(&req->rq_lock);
+ return rc;
+}
+
+static inline void
+ptlrpc_client_wake_req(struct ptlrpc_request *req)
+{
+ if (req->rq_set == NULL)
+ wake_up(&req->rq_reply_waitq);
+ else
+ wake_up(&req->rq_set->set_waitq);
+}
+
+static inline void
+ptlrpc_rs_addref(struct ptlrpc_reply_state *rs)
+{
+ LASSERT(atomic_read(&rs->rs_refcount) > 0);
+ atomic_inc(&rs->rs_refcount);
+}
+
+static inline void
+ptlrpc_rs_decref(struct ptlrpc_reply_state *rs)
+{
+ LASSERT(atomic_read(&rs->rs_refcount) > 0);
+ if (atomic_dec_and_test(&rs->rs_refcount))
+ lustre_free_reply_state(rs);
+}
+
+/* Should only be called once per req */
+static inline void ptlrpc_req_drop_rs(struct ptlrpc_request *req)
+{
+ if (req->rq_reply_state == NULL)
+ return; /* shouldn't occur */
+ ptlrpc_rs_decref(req->rq_reply_state);
+ req->rq_reply_state = NULL;
+ req->rq_repmsg = NULL;
+}
+
+static inline __u32 lustre_request_magic(struct ptlrpc_request *req)
+{
+ return lustre_msg_get_magic(req->rq_reqmsg);
+}
+
+static inline int ptlrpc_req_get_repsize(struct ptlrpc_request *req)
+{
+ switch (req->rq_reqmsg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return req->rq_reqmsg->lm_repsize;
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n",
+ req->rq_reqmsg->lm_magic);
+ return -EFAULT;
+ }
+}
+
+static inline int ptlrpc_send_limit_expired(struct ptlrpc_request *req)
+{
+ if (req->rq_delay_limit != 0 &&
+ cfs_time_before(cfs_time_add(req->rq_queued_time,
+ cfs_time_seconds(req->rq_delay_limit)),
+ cfs_time_current())) {
+ return 1;
+ }
+ return 0;
+}
+
+static inline int ptlrpc_no_resend(struct ptlrpc_request *req)
+{
+ if (!req->rq_no_resend && ptlrpc_send_limit_expired(req)) {
+ spin_lock(&req->rq_lock);
+ req->rq_no_resend = 1;
+ spin_unlock(&req->rq_lock);
+ }
+ return req->rq_no_resend;
+}
+
+static inline int
+ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt)
+{
+ int at = AT_OFF ? 0 : at_get(&svcpt->scp_at_estimate);
+
+ return svcpt->scp_service->srv_watchdog_factor *
+ max_t(int, at, obd_timeout);
+}
+
+static inline struct ptlrpc_service *
+ptlrpc_req2svc(struct ptlrpc_request *req)
+{
+ LASSERT(req->rq_rqbd != NULL);
+ return req->rq_rqbd->rqbd_svcpt->scp_service;
+}
+
+/* ldlm/ldlm_lib.c */
+/**
+ * Target client logic
+ * @{
+ */
+int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg);
+int client_obd_cleanup(struct obd_device *obddev);
+int client_connect_import(const struct lu_env *env,
+ struct obd_export **exp, struct obd_device *obd,
+ struct obd_uuid *cluuid, struct obd_connect_data *,
+ void *localdata);
+int client_disconnect_export(struct obd_export *exp);
+int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
+ int priority);
+int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid);
+int client_import_find_conn(struct obd_import *imp, lnet_nid_t peer,
+ struct obd_uuid *uuid);
+int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid);
+void client_destroy_import(struct obd_import *imp);
+/** @} */
+
+
+/* ptlrpc/pinger.c */
+/**
+ * Pinger API (client side only)
+ * @{
+ */
+enum timeout_event {
+ TIMEOUT_GRANT = 1
+};
+struct timeout_item;
+typedef int (*timeout_cb_t)(struct timeout_item *, void *);
+int ptlrpc_pinger_add_import(struct obd_import *imp);
+int ptlrpc_pinger_del_import(struct obd_import *imp);
+int ptlrpc_add_timeout_client(int time, enum timeout_event event,
+ timeout_cb_t cb, void *data,
+ struct list_head *obd_list);
+int ptlrpc_del_timeout_client(struct list_head *obd_list,
+ enum timeout_event event);
+struct ptlrpc_request * ptlrpc_prep_ping(struct obd_import *imp);
+int ptlrpc_obd_ping(struct obd_device *obd);
+cfs_time_t ptlrpc_suspend_wakeup_time(void);
+void ping_evictor_start(void);
+void ping_evictor_stop(void);
+int ptlrpc_check_and_wait_suspend(struct ptlrpc_request *req);
+void ptlrpc_pinger_ir_up(void);
+void ptlrpc_pinger_ir_down(void);
+/** @} */
+int ptlrpc_pinger_suppress_pings(void);
+
+/* ptlrpc daemon bind policy */
+typedef enum {
+ /* all ptlrpcd threads are free mode */
+ PDB_POLICY_NONE = 1,
+ /* all ptlrpcd threads are bound mode */
+ PDB_POLICY_FULL = 2,
+ /* <free1 bound1> <free2 bound2> ... <freeN boundN> */
+ PDB_POLICY_PAIR = 3,
+ /* <free1 bound1> <bound1 free2> ... <freeN boundN> <boundN free1>,
+ * means each ptlrpcd[X] has two partners: thread[X-1] and thread[X+1].
+ * If kernel supports NUMA, pthrpcd threads are binded and
+ * grouped by NUMA node */
+ PDB_POLICY_NEIGHBOR = 4,
+} pdb_policy_t;
+
+/* ptlrpc daemon load policy
+ * It is caller's duty to specify how to push the async RPC into some ptlrpcd
+ * queue, but it is not enforced, affected by "ptlrpcd_bind_policy". If it is
+ * "PDB_POLICY_FULL", then the RPC will be processed by the selected ptlrpcd,
+ * Otherwise, the RPC may be processed by the selected ptlrpcd or its partner,
+ * depends on which is scheduled firstly, to accelerate the RPC processing. */
+typedef enum {
+ /* on the same CPU core as the caller */
+ PDL_POLICY_SAME = 1,
+ /* within the same CPU partition, but not the same core as the caller */
+ PDL_POLICY_LOCAL = 2,
+ /* round-robin on all CPU cores, but not the same core as the caller */
+ PDL_POLICY_ROUND = 3,
+ /* the specified CPU core is preferred, but not enforced */
+ PDL_POLICY_PREFERRED = 4,
+} pdl_policy_t;
+
+/* ptlrpc/ptlrpcd.c */
+void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force);
+void ptlrpcd_free(struct ptlrpcd_ctl *pc);
+void ptlrpcd_wake(struct ptlrpc_request *req);
+void ptlrpcd_add_req(struct ptlrpc_request *req, pdl_policy_t policy, int idx);
+void ptlrpcd_add_rqset(struct ptlrpc_request_set *set);
+int ptlrpcd_addref(void);
+void ptlrpcd_decref(void);
+
+/* ptlrpc/lproc_ptlrpc.c */
+/**
+ * procfs output related functions
+ * @{
+ */
+const char* ll_opcode2str(__u32 opcode);
+#ifdef LPROCFS
+void ptlrpc_lprocfs_register_obd(struct obd_device *obd);
+void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd);
+void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes);
+#else
+static inline void ptlrpc_lprocfs_register_obd(struct obd_device *obd) {}
+static inline void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) {}
+static inline void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes) {}
+#endif
+/** @} */
+
+/* ptlrpc/llog_server.c */
+int llog_origin_handle_open(struct ptlrpc_request *req);
+int llog_origin_handle_destroy(struct ptlrpc_request *req);
+int llog_origin_handle_prev_block(struct ptlrpc_request *req);
+int llog_origin_handle_next_block(struct ptlrpc_request *req);
+int llog_origin_handle_read_header(struct ptlrpc_request *req);
+int llog_origin_handle_close(struct ptlrpc_request *req);
+int llog_origin_handle_cancel(struct ptlrpc_request *req);
+
+/* ptlrpc/llog_client.c */
+extern struct llog_operations llog_client_ops;
+
+/** @} net */
+
+#endif
+/** @} PtlRPC */
diff --git a/drivers/staging/lustre/lustre/include/lustre_param.h b/drivers/staging/lustre/lustre/include/lustre_param.h
new file mode 100644
index 000000000000..ed654684cb64
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_param.h
@@ -0,0 +1,121 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_param.h
+ *
+ * User-settable parameter keys
+ *
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ */
+
+#ifndef _LUSTRE_PARAM_H
+#define _LUSTRE_PARAM_H
+
+/** \defgroup param param
+ *
+ * @{
+ */
+
+/* For interoperability */
+struct cfg_interop_param {
+ char *old_param;
+ char *new_param;
+};
+
+/* obd_config.c */
+int class_find_param(char *buf, char *key, char **valp);
+struct cfg_interop_param *class_find_old_param(const char *param,
+ struct cfg_interop_param *ptr);
+int class_get_next_param(char **params, char *copy);
+int class_match_param(char *buf, char *key, char **valp);
+int class_parse_nid(char *buf, lnet_nid_t *nid, char **endh);
+int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh);
+int class_parse_net(char *buf, __u32 *net, char **endh);
+int class_match_nid(char *buf, char *key, lnet_nid_t nid);
+int class_match_net(char *buf, char *key, __u32 net);
+/* obd_mount.c */
+int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
+ char *s1, char *s2, char *s3, char *s4);
+
+
+
+/****************** User-settable parameter keys *********************/
+/* e.g.
+ tunefs.lustre --param="failover.node=192.168.0.13@tcp0" /dev/sda
+ lctl conf_param testfs-OST0000 failover.node=3@elan,192.168.0.3@tcp0
+ ... testfs-MDT0000.lov.stripesize=4M
+ ... testfs-OST0000.ost.client_cache_seconds=15
+ ... testfs.sys.timeout=<secs>
+ ... testfs.llite.max_read_ahead_mb=16
+*/
+
+/* System global or special params not handled in obd's proc
+ * See mgs_write_log_sys()
+ */
+#define PARAM_TIMEOUT "timeout=" /* global */
+#define PARAM_LDLM_TIMEOUT "ldlm_timeout=" /* global */
+#define PARAM_AT_MIN "at_min=" /* global */
+#define PARAM_AT_MAX "at_max=" /* global */
+#define PARAM_AT_EXTRA "at_extra=" /* global */
+#define PARAM_AT_EARLY_MARGIN "at_early_margin=" /* global */
+#define PARAM_AT_HISTORY "at_history=" /* global */
+#define PARAM_JOBID_VAR "jobid_var=" /* global */
+#define PARAM_MGSNODE "mgsnode=" /* only at mounttime */
+#define PARAM_FAILNODE "failover.node=" /* add failover nid */
+#define PARAM_FAILMODE "failover.mode=" /* initial mount only */
+#define PARAM_ACTIVE "active=" /* activate/deactivate */
+#define PARAM_NETWORK "network=" /* bind on nid */
+#define PARAM_ID_UPCALL "identity_upcall=" /* identity upcall */
+
+/* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */
+#define PARAM_OST "ost."
+#define PARAM_OSC "osc."
+#define PARAM_MDT "mdt."
+#define PARAM_MDD "mdd."
+#define PARAM_MDC "mdc."
+#define PARAM_LLITE "llite."
+#define PARAM_LOV "lov."
+#define PARAM_LOD "lod."
+#define PARAM_OSP "osp."
+#define PARAM_SYS "sys." /* global */
+#define PARAM_SRPC "srpc."
+#define PARAM_SRPC_FLVR "srpc.flavor."
+#define PARAM_SRPC_UDESC "srpc.udesc.cli2mdt"
+#define PARAM_SEC "security."
+#define PARAM_QUOTA "quota." /* global */
+
+/** @} param */
+
+#endif /* _LUSTRE_PARAM_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_quota.h b/drivers/staging/lustre/lustre/include/lustre_quota.h
new file mode 100644
index 000000000000..1c3041f50049
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_quota.h
@@ -0,0 +1,239 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LUSTRE_QUOTA_H
+#define _LUSTRE_QUOTA_H
+
+/** \defgroup quota quota
+ *
+ */
+
+#include <linux/lustre_quota.h>
+
+#include <dt_object.h>
+#include <lustre_fid.h>
+#include <lustre_dlm.h>
+
+#ifndef MAX_IQ_TIME
+#define MAX_IQ_TIME 604800 /* (7*24*60*60) 1 week */
+#endif
+
+#ifndef MAX_DQ_TIME
+#define MAX_DQ_TIME 604800 /* (7*24*60*60) 1 week */
+#endif
+
+struct lquota_id_info;
+struct lquota_trans;
+
+/* Gather all quota record type in an union that can be used to read any records
+ * from disk. All fields of these records must be 64-bit aligned, otherwise the
+ * OSD layer may swab them incorrectly. */
+union lquota_rec {
+ struct lquota_glb_rec lqr_glb_rec;
+ struct lquota_slv_rec lqr_slv_rec;
+ struct lquota_acct_rec lqr_acct_rec;
+};
+
+/* Index features supported by the global index objects
+ * Only used for migration purpose and should be removed once on-disk migration
+ * is no longer needed */
+extern struct dt_index_features dt_quota_iusr_features;
+extern struct dt_index_features dt_quota_busr_features;
+extern struct dt_index_features dt_quota_igrp_features;
+extern struct dt_index_features dt_quota_bgrp_features;
+
+/* Name used in the configuration logs to identify the default metadata pool
+ * (composed of all the MDTs, with pool ID 0) and the default data pool (all
+ * the OSTs, with pool ID 0 too). */
+#define QUOTA_METAPOOL_NAME "mdt="
+#define QUOTA_DATAPOOL_NAME "ost="
+
+/*
+ * Quota Master Target support
+ */
+
+/* Request handlers for quota master operations.
+ * This is used by the MDT to pass quota/lock requests to the quota master
+ * target. This won't be needed any more once the QMT is a real target and
+ * does not rely any more on the MDT service threads and namespace. */
+struct qmt_handlers {
+ /* Handle quotactl request from client. */
+ int (*qmth_quotactl)(const struct lu_env *, struct lu_device *,
+ struct obd_quotactl *);
+
+ /* Handle dqacq/dqrel request from slave. */
+ int (*qmth_dqacq)(const struct lu_env *, struct lu_device *,
+ struct ptlrpc_request *);
+
+ /* LDLM intent policy associated with quota locks */
+ int (*qmth_intent_policy)(const struct lu_env *, struct lu_device *,
+ struct ptlrpc_request *, struct ldlm_lock **,
+ int);
+
+ /* Initialize LVB of ldlm resource associated with quota objects */
+ int (*qmth_lvbo_init)(struct lu_device *, struct ldlm_resource *);
+
+ /* Update LVB of ldlm resource associated with quota objects */
+ int (*qmth_lvbo_update)(struct lu_device *, struct ldlm_resource *,
+ struct ptlrpc_request *, int);
+
+ /* Return size of LVB to be packed in ldlm message */
+ int (*qmth_lvbo_size)(struct lu_device *, struct ldlm_lock *);
+
+ /* Fill request buffer with lvb */
+ int (*qmth_lvbo_fill)(struct lu_device *, struct ldlm_lock *, void *,
+ int);
+
+ /* Free lvb associated with ldlm resource */
+ int (*qmth_lvbo_free)(struct lu_device *, struct ldlm_resource *);
+};
+
+/* actual handlers are defined in lustre/quota/qmt_handler.c */
+extern struct qmt_handlers qmt_hdls;
+
+/*
+ * Quota enforcement support on slaves
+ */
+
+struct qsd_instance;
+
+/* The quota slave feature is implemented under the form of a library.
+ * The API is the following:
+ *
+ * - qsd_init(): the user (mostly the OSD layer) should first allocate a qsd
+ * instance via qsd_init(). This creates all required structures
+ * to manage quota enforcement for this target and performs all
+ * low-level initialization which does not involve any lustre
+ * object. qsd_init() should typically be called when the OSD
+ * is being set up.
+ *
+ * - qsd_prepare(): This sets up on-disk objects associated with the quota slave
+ * feature and initiates the quota reintegration procedure if
+ * needed. qsd_prepare() should typically be called when
+ * ->ldo_prepare is invoked.
+ *
+ * - qsd_start(): a qsd instance should be started once recovery is completed
+ * (i.e. when ->ldo_recovery_complete is called). This is used
+ * to notify the qsd layer that quota should now be enforced
+ * again via the qsd_op_begin/end functions. The last step of the
+ * reintegration prodecure (namely usage reconciliation) will be
+ * completed during start.
+ *
+ * - qsd_fini(): is used to release a qsd_instance structure allocated with
+ * qsd_init(). This releases all quota slave objects and frees the
+ * structures associated with the qsd_instance.
+ *
+ * - qsd_op_begin(): is used to enforce quota, it must be called in the
+ * declaration of each operation. qsd_op_end() should then be
+ * invoked later once all operations have been completed in
+ * order to release/adjust the quota space.
+ * Running qsd_op_begin() before qsd_start() isn't fatal and
+ * will return success.
+ * Once qsd_start() has been run, qsd_op_begin() will block
+ * until the reintegration procedure is completed.
+ *
+ * - qsd_op_end(): performs the post operation quota processing. This must be
+ * called after the operation transaction stopped.
+ * While qsd_op_begin() must be invoked each time a new
+ * operation is declared, qsd_op_end() should be called only
+ * once for the whole transaction.
+ *
+ * - qsd_op_adjust(): triggers pre-acquire/release if necessary.
+ *
+ * Below are the function prototypes to be used by OSD layer to manage quota
+ * enforcement. Arguments are documented where each function is defined. */
+
+struct qsd_instance *qsd_init(const struct lu_env *, char *, struct dt_device *,
+ proc_dir_entry_t *);
+int qsd_prepare(const struct lu_env *, struct qsd_instance *);
+int qsd_start(const struct lu_env *, struct qsd_instance *);
+void qsd_fini(const struct lu_env *, struct qsd_instance *);
+int qsd_op_begin(const struct lu_env *, struct qsd_instance *,
+ struct lquota_trans *, struct lquota_id_info *, int *);
+void qsd_op_end(const struct lu_env *, struct qsd_instance *,
+ struct lquota_trans *);
+void qsd_op_adjust(const struct lu_env *, struct qsd_instance *,
+ union lquota_id *, int);
+/* This is exported for the ldiskfs quota migration only,
+ * see convert_quota_file() */
+int lquota_disk_write_glb(const struct lu_env *, struct dt_object *,
+ __u64, struct lquota_glb_rec *);
+
+/*
+ * Quota information attached to a transaction
+ */
+
+struct lquota_entry;
+
+struct lquota_id_info {
+ /* quota identifier */
+ union lquota_id lqi_id;
+
+ /* USRQUOTA or GRPQUOTA for now, could be expanded for
+ * directory quota or other types later. */
+ int lqi_type;
+
+ /* inodes or kbytes to be consumed or released, it could
+ * be negative when releasing space. */
+ long long lqi_space;
+
+ /* quota slave entry structure associated with this ID */
+ struct lquota_entry *lqi_qentry;
+
+ /* whether we are reporting blocks or inodes */
+ bool lqi_is_blk;
+};
+
+/* Since we enforce only inode quota in meta pool (MDTs), and block quota in
+ * data pool (OSTs), there are at most 4 quota ids being enforced in a single
+ * transaction, which is chown transaction:
+ * original uid and gid, new uid and gid.
+ *
+ * This value might need to be revised when directory quota is added. */
+#define QUOTA_MAX_TRANSIDS 4
+
+/* all qids involved in a single transaction */
+struct lquota_trans {
+ unsigned short lqt_id_cnt;
+ struct lquota_id_info lqt_ids[QUOTA_MAX_TRANSIDS];
+};
+
+/* flags for quota local enforcement */
+#define QUOTA_FL_OVER_USRQUOTA 0x01
+#define QUOTA_FL_OVER_GRPQUOTA 0x02
+#define QUOTA_FL_SYNC 0x04
+
+#define IS_LQUOTA_RES(res) \
+ (res->lr_name.name[LUSTRE_RES_ID_SEQ_OFF] == FID_SEQ_QUOTA || \
+ res->lr_name.name[LUSTRE_RES_ID_SEQ_OFF] == FID_SEQ_QUOTA_GLB)
+
+/* helper function used by MDT & OFD to retrieve quota accounting information
+ * on slave */
+int lquotactl_slv(const struct lu_env *, struct dt_device *,
+ struct obd_quotactl *);
+/** @} quota */
+#endif /* _LUSTRE_QUOTA_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
new file mode 100644
index 000000000000..f4d3820865f1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
@@ -0,0 +1,334 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_req_layout.h
+ *
+ * Lustre Metadata Target (mdt) request handler
+ *
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ */
+
+#ifndef _LUSTRE_REQ_LAYOUT_H__
+#define _LUSTRE_REQ_LAYOUT_H__
+
+/** \defgroup req_layout req_layout
+ *
+ * @{
+ */
+
+struct req_msg_field;
+struct req_format;
+struct req_capsule;
+
+struct ptlrpc_request;
+
+enum req_location {
+ RCL_CLIENT,
+ RCL_SERVER,
+ RCL_NR
+};
+
+/* Maximal number of fields (buffers) in a request message. */
+#define REQ_MAX_FIELD_NR 9
+
+struct req_capsule {
+ struct ptlrpc_request *rc_req;
+ const struct req_format *rc_fmt;
+ enum req_location rc_loc;
+ __u32 rc_area[RCL_NR][REQ_MAX_FIELD_NR];
+};
+
+#if !defined(__REQ_LAYOUT_USER__)
+
+/* struct ptlrpc_request, lustre_msg* */
+#include <lustre_net.h>
+
+void req_capsule_init(struct req_capsule *pill, struct ptlrpc_request *req,
+ enum req_location location);
+void req_capsule_fini(struct req_capsule *pill);
+
+void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt);
+void req_capsule_client_dump(struct req_capsule *pill);
+void req_capsule_server_dump(struct req_capsule *pill);
+void req_capsule_init_area(struct req_capsule *pill);
+int req_capsule_filled_sizes(struct req_capsule *pill, enum req_location loc);
+int req_capsule_server_pack(struct req_capsule *pill);
+
+void *req_capsule_client_get(struct req_capsule *pill,
+ const struct req_msg_field *field);
+void *req_capsule_client_swab_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ void *swabber);
+void *req_capsule_client_sized_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ int len);
+void *req_capsule_server_get(struct req_capsule *pill,
+ const struct req_msg_field *field);
+void *req_capsule_server_sized_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ int len);
+void *req_capsule_server_swab_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ void *swabber);
+void *req_capsule_server_sized_swab_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ int len, void *swabber);
+const void *req_capsule_other_get(struct req_capsule *pill,
+ const struct req_msg_field *field);
+
+void req_capsule_set_size(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc, int size);
+int req_capsule_get_size(const struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc);
+int req_capsule_msg_size(struct req_capsule *pill, enum req_location loc);
+int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
+ enum req_location loc);
+void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt);
+
+int req_capsule_has_field(const struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc);
+int req_capsule_field_present(const struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc);
+void req_capsule_shrink(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ unsigned int newlen,
+ enum req_location loc);
+int req_capsule_server_grow(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ unsigned int newlen);
+int req_layout_init(void);
+void req_layout_fini(void);
+
+/* __REQ_LAYOUT_USER__ */
+#endif
+
+extern struct req_format RQF_OBD_PING;
+extern struct req_format RQF_OBD_SET_INFO;
+extern struct req_format RQF_SEC_CTX;
+extern struct req_format RQF_OBD_IDX_READ;
+/* MGS req_format */
+extern struct req_format RQF_MGS_TARGET_REG;
+extern struct req_format RQF_MGS_SET_INFO;
+extern struct req_format RQF_MGS_CONFIG_READ;
+/* fid/fld req_format */
+extern struct req_format RQF_SEQ_QUERY;
+extern struct req_format RQF_FLD_QUERY;
+/* MDS req_format */
+extern struct req_format RQF_MDS_CONNECT;
+extern struct req_format RQF_MDS_DISCONNECT;
+extern struct req_format RQF_MDS_STATFS;
+extern struct req_format RQF_MDS_GETSTATUS;
+extern struct req_format RQF_MDS_SYNC;
+extern struct req_format RQF_MDS_GETXATTR;
+extern struct req_format RQF_MDS_GETATTR;
+extern struct req_format RQF_UPDATE_OBJ;
+
+/*
+ * This is format of direct (non-intent) MDS_GETATTR_NAME request.
+ */
+extern struct req_format RQF_MDS_GETATTR_NAME;
+extern struct req_format RQF_MDS_CLOSE;
+extern struct req_format RQF_MDS_PIN;
+extern struct req_format RQF_MDS_UNPIN;
+extern struct req_format RQF_MDS_CONNECT;
+extern struct req_format RQF_MDS_DISCONNECT;
+extern struct req_format RQF_MDS_GET_INFO;
+extern struct req_format RQF_MDS_READPAGE;
+extern struct req_format RQF_MDS_WRITEPAGE;
+extern struct req_format RQF_MDS_IS_SUBDIR;
+extern struct req_format RQF_MDS_DONE_WRITING;
+extern struct req_format RQF_MDS_REINT;
+extern struct req_format RQF_MDS_REINT_CREATE;
+extern struct req_format RQF_MDS_REINT_CREATE_RMT_ACL;
+extern struct req_format RQF_MDS_REINT_CREATE_SLAVE;
+extern struct req_format RQF_MDS_REINT_CREATE_SYM;
+extern struct req_format RQF_MDS_REINT_OPEN;
+extern struct req_format RQF_MDS_REINT_UNLINK;
+extern struct req_format RQF_MDS_REINT_LINK;
+extern struct req_format RQF_MDS_REINT_RENAME;
+extern struct req_format RQF_MDS_REINT_SETATTR;
+extern struct req_format RQF_MDS_REINT_SETXATTR;
+extern struct req_format RQF_MDS_QUOTACHECK;
+extern struct req_format RQF_MDS_QUOTACTL;
+extern struct req_format RQF_QC_CALLBACK;
+extern struct req_format RQF_QUOTA_DQACQ;
+extern struct req_format RQF_MDS_SWAP_LAYOUTS;
+/* MDS hsm formats */
+extern struct req_format RQF_MDS_HSM_STATE_GET;
+extern struct req_format RQF_MDS_HSM_STATE_SET;
+extern struct req_format RQF_MDS_HSM_ACTION;
+extern struct req_format RQF_MDS_HSM_PROGRESS;
+extern struct req_format RQF_MDS_HSM_CT_REGISTER;
+extern struct req_format RQF_MDS_HSM_CT_UNREGISTER;
+extern struct req_format RQF_MDS_HSM_REQUEST;
+/* OST req_format */
+extern struct req_format RQF_OST_CONNECT;
+extern struct req_format RQF_OST_DISCONNECT;
+extern struct req_format RQF_OST_QUOTACHECK;
+extern struct req_format RQF_OST_QUOTACTL;
+extern struct req_format RQF_OST_GETATTR;
+extern struct req_format RQF_OST_SETATTR;
+extern struct req_format RQF_OST_CREATE;
+extern struct req_format RQF_OST_PUNCH;
+extern struct req_format RQF_OST_SYNC;
+extern struct req_format RQF_OST_DESTROY;
+extern struct req_format RQF_OST_BRW_READ;
+extern struct req_format RQF_OST_BRW_WRITE;
+extern struct req_format RQF_OST_STATFS;
+extern struct req_format RQF_OST_SET_GRANT_INFO;
+extern struct req_format RQF_OST_GET_INFO_GENERIC;
+extern struct req_format RQF_OST_GET_INFO_LAST_ID;
+extern struct req_format RQF_OST_GET_INFO_LAST_FID;
+extern struct req_format RQF_OST_SET_INFO_LAST_FID;
+extern struct req_format RQF_OST_GET_INFO_FIEMAP;
+
+/* LDLM req_format */
+extern struct req_format RQF_LDLM_ENQUEUE;
+extern struct req_format RQF_LDLM_ENQUEUE_LVB;
+extern struct req_format RQF_LDLM_CONVERT;
+extern struct req_format RQF_LDLM_INTENT;
+extern struct req_format RQF_LDLM_INTENT_BASIC;
+extern struct req_format RQF_LDLM_INTENT_LAYOUT;
+extern struct req_format RQF_LDLM_INTENT_GETATTR;
+extern struct req_format RQF_LDLM_INTENT_OPEN;
+extern struct req_format RQF_LDLM_INTENT_CREATE;
+extern struct req_format RQF_LDLM_INTENT_UNLINK;
+extern struct req_format RQF_LDLM_INTENT_QUOTA;
+extern struct req_format RQF_LDLM_CANCEL;
+extern struct req_format RQF_LDLM_CALLBACK;
+extern struct req_format RQF_LDLM_CP_CALLBACK;
+extern struct req_format RQF_LDLM_BL_CALLBACK;
+extern struct req_format RQF_LDLM_GL_CALLBACK;
+extern struct req_format RQF_LDLM_GL_DESC_CALLBACK;
+/* LOG req_format */
+extern struct req_format RQF_LOG_CANCEL;
+extern struct req_format RQF_LLOG_ORIGIN_HANDLE_CREATE;
+extern struct req_format RQF_LLOG_ORIGIN_HANDLE_DESTROY;
+extern struct req_format RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK;
+extern struct req_format RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK;
+extern struct req_format RQF_LLOG_ORIGIN_HANDLE_READ_HEADER;
+extern struct req_format RQF_LLOG_ORIGIN_CONNECT;
+
+extern struct req_msg_field RMF_GENERIC_DATA;
+extern struct req_msg_field RMF_PTLRPC_BODY;
+extern struct req_msg_field RMF_MDT_BODY;
+extern struct req_msg_field RMF_MDT_EPOCH;
+extern struct req_msg_field RMF_OBD_STATFS;
+extern struct req_msg_field RMF_NAME;
+extern struct req_msg_field RMF_SYMTGT;
+extern struct req_msg_field RMF_TGTUUID;
+extern struct req_msg_field RMF_CLUUID;
+extern struct req_msg_field RMF_SETINFO_VAL;
+extern struct req_msg_field RMF_SETINFO_KEY;
+extern struct req_msg_field RMF_GETINFO_VAL;
+extern struct req_msg_field RMF_GETINFO_VALLEN;
+extern struct req_msg_field RMF_GETINFO_KEY;
+extern struct req_msg_field RMF_IDX_INFO;
+
+/*
+ * connection handle received in MDS_CONNECT request.
+ */
+extern struct req_msg_field RMF_CONN;
+extern struct req_msg_field RMF_CONNECT_DATA;
+extern struct req_msg_field RMF_DLM_REQ;
+extern struct req_msg_field RMF_DLM_REP;
+extern struct req_msg_field RMF_DLM_LVB;
+extern struct req_msg_field RMF_DLM_GL_DESC;
+extern struct req_msg_field RMF_LDLM_INTENT;
+extern struct req_msg_field RMF_LAYOUT_INTENT;
+extern struct req_msg_field RMF_MDT_MD;
+extern struct req_msg_field RMF_REC_REINT;
+extern struct req_msg_field RMF_EADATA;
+extern struct req_msg_field RMF_ACL;
+extern struct req_msg_field RMF_LOGCOOKIES;
+extern struct req_msg_field RMF_CAPA1;
+extern struct req_msg_field RMF_CAPA2;
+extern struct req_msg_field RMF_OBD_QUOTACHECK;
+extern struct req_msg_field RMF_OBD_QUOTACTL;
+extern struct req_msg_field RMF_QUOTA_BODY;
+extern struct req_msg_field RMF_STRING;
+extern struct req_msg_field RMF_SWAP_LAYOUTS;
+extern struct req_msg_field RMF_MDS_HSM_PROGRESS;
+extern struct req_msg_field RMF_MDS_HSM_REQUEST;
+extern struct req_msg_field RMF_MDS_HSM_USER_ITEM;
+extern struct req_msg_field RMF_MDS_HSM_ARCHIVE;
+extern struct req_msg_field RMF_HSM_USER_STATE;
+extern struct req_msg_field RMF_HSM_STATE_SET;
+extern struct req_msg_field RMF_MDS_HSM_CURRENT_ACTION;
+extern struct req_msg_field RMF_MDS_HSM_REQUEST;
+
+/* seq-mgr fields */
+extern struct req_msg_field RMF_SEQ_OPC;
+extern struct req_msg_field RMF_SEQ_RANGE;
+extern struct req_msg_field RMF_FID_SPACE;
+
+/* FLD fields */
+extern struct req_msg_field RMF_FLD_OPC;
+extern struct req_msg_field RMF_FLD_MDFLD;
+
+extern struct req_msg_field RMF_LLOGD_BODY;
+extern struct req_msg_field RMF_LLOG_LOG_HDR;
+extern struct req_msg_field RMF_LLOGD_CONN_BODY;
+
+extern struct req_msg_field RMF_MGS_TARGET_INFO;
+extern struct req_msg_field RMF_MGS_SEND_PARAM;
+
+extern struct req_msg_field RMF_OST_BODY;
+extern struct req_msg_field RMF_OBD_IOOBJ;
+extern struct req_msg_field RMF_OBD_ID;
+extern struct req_msg_field RMF_FID;
+extern struct req_msg_field RMF_NIOBUF_REMOTE;
+extern struct req_msg_field RMF_RCS;
+extern struct req_msg_field RMF_FIEMAP_KEY;
+extern struct req_msg_field RMF_FIEMAP_VAL;
+extern struct req_msg_field RMF_OST_ID;
+
+/* MGS config read message format */
+extern struct req_msg_field RMF_MGS_CONFIG_BODY;
+extern struct req_msg_field RMF_MGS_CONFIG_RES;
+
+/* generic uint32 */
+extern struct req_msg_field RMF_U32;
+
+/* OBJ update format */
+extern struct req_msg_field RMF_UPDATE;
+extern struct req_msg_field RMF_UPDATE_REPLY;
+/** @} req_layout */
+
+#endif /* _LUSTRE_REQ_LAYOUT_H__ */
diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h
new file mode 100644
index 000000000000..9e0908e1c4d6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_sec.h
@@ -0,0 +1,1145 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LUSTRE_SEC_H_
+#define _LUSTRE_SEC_H_
+
+/** \defgroup sptlrpc sptlrpc
+ *
+ * @{
+ */
+
+/*
+ * to avoid include
+ */
+struct obd_import;
+struct obd_export;
+struct ptlrpc_request;
+struct ptlrpc_reply_state;
+struct ptlrpc_bulk_desc;
+struct brw_page;
+/* Linux specific */
+struct key;
+struct seq_file;
+
+/*
+ * forward declaration
+ */
+struct ptlrpc_sec_policy;
+struct ptlrpc_sec_cops;
+struct ptlrpc_sec_sops;
+struct ptlrpc_sec;
+struct ptlrpc_svc_ctx;
+struct ptlrpc_cli_ctx;
+struct ptlrpc_ctx_ops;
+
+/**
+ * \addtogroup flavor flavor
+ *
+ * RPC flavor is represented by a 32 bits integer. Currently the high 12 bits
+ * are unused, must be set to 0 for future expansion.
+ * <pre>
+ * ------------------------------------------------------------------------
+ * | 4b (bulk svc) | 4b (bulk type) | 4b (svc) | 4b (mech) | 4b (policy) |
+ * ------------------------------------------------------------------------
+ * </pre>
+ *
+ * @{
+ */
+
+/*
+ * flavor constants
+ */
+enum sptlrpc_policy {
+ SPTLRPC_POLICY_NULL = 0,
+ SPTLRPC_POLICY_PLAIN = 1,
+ SPTLRPC_POLICY_GSS = 2,
+ SPTLRPC_POLICY_MAX,
+};
+
+enum sptlrpc_mech_null {
+ SPTLRPC_MECH_NULL = 0,
+ SPTLRPC_MECH_NULL_MAX,
+};
+
+enum sptlrpc_mech_plain {
+ SPTLRPC_MECH_PLAIN = 0,
+ SPTLRPC_MECH_PLAIN_MAX,
+};
+
+enum sptlrpc_mech_gss {
+ SPTLRPC_MECH_GSS_NULL = 0,
+ SPTLRPC_MECH_GSS_KRB5 = 1,
+ SPTLRPC_MECH_GSS_MAX,
+};
+
+enum sptlrpc_service_type {
+ SPTLRPC_SVC_NULL = 0, /**< no security */
+ SPTLRPC_SVC_AUTH = 1, /**< authentication only */
+ SPTLRPC_SVC_INTG = 2, /**< integrity */
+ SPTLRPC_SVC_PRIV = 3, /**< privacy */
+ SPTLRPC_SVC_MAX,
+};
+
+enum sptlrpc_bulk_type {
+ SPTLRPC_BULK_DEFAULT = 0, /**< follow rpc flavor */
+ SPTLRPC_BULK_HASH = 1, /**< hash integrity */
+ SPTLRPC_BULK_MAX,
+};
+
+enum sptlrpc_bulk_service {
+ SPTLRPC_BULK_SVC_NULL = 0, /**< no security */
+ SPTLRPC_BULK_SVC_AUTH = 1, /**< authentication only */
+ SPTLRPC_BULK_SVC_INTG = 2, /**< integrity */
+ SPTLRPC_BULK_SVC_PRIV = 3, /**< privacy */
+ SPTLRPC_BULK_SVC_MAX,
+};
+
+/*
+ * compose/extract macros
+ */
+#define FLVR_POLICY_OFFSET (0)
+#define FLVR_MECH_OFFSET (4)
+#define FLVR_SVC_OFFSET (8)
+#define FLVR_BULK_TYPE_OFFSET (12)
+#define FLVR_BULK_SVC_OFFSET (16)
+
+#define MAKE_FLVR(policy, mech, svc, btype, bsvc) \
+ (((__u32)(policy) << FLVR_POLICY_OFFSET) | \
+ ((__u32)(mech) << FLVR_MECH_OFFSET) | \
+ ((__u32)(svc) << FLVR_SVC_OFFSET) | \
+ ((__u32)(btype) << FLVR_BULK_TYPE_OFFSET) | \
+ ((__u32)(bsvc) << FLVR_BULK_SVC_OFFSET))
+
+/*
+ * extraction
+ */
+#define SPTLRPC_FLVR_POLICY(flavor) \
+ ((((__u32)(flavor)) >> FLVR_POLICY_OFFSET) & 0xF)
+#define SPTLRPC_FLVR_MECH(flavor) \
+ ((((__u32)(flavor)) >> FLVR_MECH_OFFSET) & 0xF)
+#define SPTLRPC_FLVR_SVC(flavor) \
+ ((((__u32)(flavor)) >> FLVR_SVC_OFFSET) & 0xF)
+#define SPTLRPC_FLVR_BULK_TYPE(flavor) \
+ ((((__u32)(flavor)) >> FLVR_BULK_TYPE_OFFSET) & 0xF)
+#define SPTLRPC_FLVR_BULK_SVC(flavor) \
+ ((((__u32)(flavor)) >> FLVR_BULK_SVC_OFFSET) & 0xF)
+
+#define SPTLRPC_FLVR_BASE(flavor) \
+ ((((__u32)(flavor)) >> FLVR_POLICY_OFFSET) & 0xFFF)
+#define SPTLRPC_FLVR_BASE_SUB(flavor) \
+ ((((__u32)(flavor)) >> FLVR_MECH_OFFSET) & 0xFF)
+
+/*
+ * gss subflavors
+ */
+#define MAKE_BASE_SUBFLVR(mech, svc) \
+ ((__u32)(mech) | \
+ ((__u32)(svc) << (FLVR_SVC_OFFSET - FLVR_MECH_OFFSET)))
+
+#define SPTLRPC_SUBFLVR_KRB5N \
+ MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_NULL)
+#define SPTLRPC_SUBFLVR_KRB5A \
+ MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_AUTH)
+#define SPTLRPC_SUBFLVR_KRB5I \
+ MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_INTG)
+#define SPTLRPC_SUBFLVR_KRB5P \
+ MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_PRIV)
+
+/*
+ * "end user" flavors
+ */
+#define SPTLRPC_FLVR_NULL \
+ MAKE_FLVR(SPTLRPC_POLICY_NULL, \
+ SPTLRPC_MECH_NULL, \
+ SPTLRPC_SVC_NULL, \
+ SPTLRPC_BULK_DEFAULT, \
+ SPTLRPC_BULK_SVC_NULL)
+#define SPTLRPC_FLVR_PLAIN \
+ MAKE_FLVR(SPTLRPC_POLICY_PLAIN, \
+ SPTLRPC_MECH_PLAIN, \
+ SPTLRPC_SVC_NULL, \
+ SPTLRPC_BULK_HASH, \
+ SPTLRPC_BULK_SVC_INTG)
+#define SPTLRPC_FLVR_KRB5N \
+ MAKE_FLVR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_MECH_GSS_KRB5, \
+ SPTLRPC_SVC_NULL, \
+ SPTLRPC_BULK_DEFAULT, \
+ SPTLRPC_BULK_SVC_NULL)
+#define SPTLRPC_FLVR_KRB5A \
+ MAKE_FLVR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_MECH_GSS_KRB5, \
+ SPTLRPC_SVC_AUTH, \
+ SPTLRPC_BULK_DEFAULT, \
+ SPTLRPC_BULK_SVC_NULL)
+#define SPTLRPC_FLVR_KRB5I \
+ MAKE_FLVR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_MECH_GSS_KRB5, \
+ SPTLRPC_SVC_INTG, \
+ SPTLRPC_BULK_DEFAULT, \
+ SPTLRPC_BULK_SVC_INTG)
+#define SPTLRPC_FLVR_KRB5P \
+ MAKE_FLVR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_MECH_GSS_KRB5, \
+ SPTLRPC_SVC_PRIV, \
+ SPTLRPC_BULK_DEFAULT, \
+ SPTLRPC_BULK_SVC_PRIV)
+
+#define SPTLRPC_FLVR_DEFAULT SPTLRPC_FLVR_NULL
+
+#define SPTLRPC_FLVR_INVALID ((__u32) 0xFFFFFFFF)
+#define SPTLRPC_FLVR_ANY ((__u32) 0xFFF00000)
+
+/**
+ * extract the useful part from wire flavor
+ */
+#define WIRE_FLVR(wflvr) (((__u32) (wflvr)) & 0x000FFFFF)
+
+/** @} flavor */
+
+static inline void flvr_set_svc(__u32 *flvr, __u32 svc)
+{
+ LASSERT(svc < SPTLRPC_SVC_MAX);
+ *flvr = MAKE_FLVR(SPTLRPC_FLVR_POLICY(*flvr),
+ SPTLRPC_FLVR_MECH(*flvr),
+ svc,
+ SPTLRPC_FLVR_BULK_TYPE(*flvr),
+ SPTLRPC_FLVR_BULK_SVC(*flvr));
+}
+
+static inline void flvr_set_bulk_svc(__u32 *flvr, __u32 svc)
+{
+ LASSERT(svc < SPTLRPC_BULK_SVC_MAX);
+ *flvr = MAKE_FLVR(SPTLRPC_FLVR_POLICY(*flvr),
+ SPTLRPC_FLVR_MECH(*flvr),
+ SPTLRPC_FLVR_SVC(*flvr),
+ SPTLRPC_FLVR_BULK_TYPE(*flvr),
+ svc);
+}
+
+struct bulk_spec_hash {
+ __u8 hash_alg;
+};
+
+/**
+ * Full description of flavors being used on a ptlrpc connection, include
+ * both regular RPC and bulk transfer parts.
+ */
+struct sptlrpc_flavor {
+ /**
+ * wire flavor, should be renamed to sf_wire.
+ */
+ __u32 sf_rpc;
+ /**
+ * general flags of PTLRPC_SEC_FL_*
+ */
+ __u32 sf_flags;
+ /**
+ * rpc flavor specification
+ */
+ union {
+ /* nothing for now */
+ } u_rpc;
+ /**
+ * bulk flavor specification
+ */
+ union {
+ struct bulk_spec_hash hash;
+ } u_bulk;
+};
+
+/**
+ * identify the RPC is generated from what part of Lustre. It's encoded into
+ * RPC requests and to be checked by ptlrpc service.
+ */
+enum lustre_sec_part {
+ LUSTRE_SP_CLI = 0,
+ LUSTRE_SP_MDT,
+ LUSTRE_SP_OST,
+ LUSTRE_SP_MGC,
+ LUSTRE_SP_MGS,
+ LUSTRE_SP_ANY = 0xFF
+};
+
+const char *sptlrpc_part2name(enum lustre_sec_part sp);
+enum lustre_sec_part sptlrpc_target_sec_part(struct obd_device *obd);
+
+/**
+ * A rule specifies a flavor to be used by a ptlrpc connection between
+ * two Lustre parts.
+ */
+struct sptlrpc_rule {
+ __u32 sr_netid; /* LNET network ID */
+ __u8 sr_from; /* sec_part */
+ __u8 sr_to; /* sec_part */
+ __u16 sr_padding;
+ struct sptlrpc_flavor sr_flvr;
+};
+
+/**
+ * A set of rules in memory.
+ *
+ * Rules are generated and stored on MGS, and propagated to MDT, OST,
+ * and client when needed.
+ */
+struct sptlrpc_rule_set {
+ int srs_nslot;
+ int srs_nrule;
+ struct sptlrpc_rule *srs_rules;
+};
+
+int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr);
+int sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr);
+
+static inline void sptlrpc_rule_set_init(struct sptlrpc_rule_set *set)
+{
+ memset(set, 0, sizeof(*set));
+}
+
+void sptlrpc_rule_set_free(struct sptlrpc_rule_set *set);
+int sptlrpc_rule_set_expand(struct sptlrpc_rule_set *set);
+int sptlrpc_rule_set_merge(struct sptlrpc_rule_set *set,
+ struct sptlrpc_rule *rule);
+int sptlrpc_rule_set_choose(struct sptlrpc_rule_set *rset,
+ enum lustre_sec_part from,
+ enum lustre_sec_part to,
+ lnet_nid_t nid,
+ struct sptlrpc_flavor *sf);
+void sptlrpc_rule_set_dump(struct sptlrpc_rule_set *set);
+
+int sptlrpc_process_config(struct lustre_cfg *lcfg);
+void sptlrpc_conf_log_start(const char *logname);
+void sptlrpc_conf_log_stop(const char *logname);
+void sptlrpc_conf_log_update_begin(const char *logname);
+void sptlrpc_conf_log_update_end(const char *logname);
+void sptlrpc_conf_client_adapt(struct obd_device *obd);
+int sptlrpc_conf_target_get_rules(struct obd_device *obd,
+ struct sptlrpc_rule_set *rset,
+ int initial);
+void sptlrpc_target_choose_flavor(struct sptlrpc_rule_set *rset,
+ enum lustre_sec_part from,
+ lnet_nid_t nid,
+ struct sptlrpc_flavor *flavor);
+
+/* The maximum length of security payload. 1024 is enough for Kerberos 5,
+ * and should be enough for other future mechanisms but not sure.
+ * Only used by pre-allocated request/reply pool.
+ */
+#define SPTLRPC_MAX_PAYLOAD (1024)
+
+
+struct vfs_cred {
+ uint32_t vc_uid;
+ uint32_t vc_gid;
+};
+
+struct ptlrpc_ctx_ops {
+ /**
+ * To determine whether it's suitable to use the \a ctx for \a vcred.
+ */
+ int (*match) (struct ptlrpc_cli_ctx *ctx,
+ struct vfs_cred *vcred);
+
+ /**
+ * To bring the \a ctx uptodate.
+ */
+ int (*refresh) (struct ptlrpc_cli_ctx *ctx);
+
+ /**
+ * Validate the \a ctx.
+ */
+ int (*validate) (struct ptlrpc_cli_ctx *ctx);
+
+ /**
+ * Force the \a ctx to die.
+ */
+ void (*die) (struct ptlrpc_cli_ctx *ctx,
+ int grace);
+ int (*display) (struct ptlrpc_cli_ctx *ctx,
+ char *buf, int bufsize);
+
+ /**
+ * Sign the request message using \a ctx.
+ *
+ * \pre req->rq_reqmsg point to request message.
+ * \pre req->rq_reqlen is the request message length.
+ * \post req->rq_reqbuf point to request message with signature.
+ * \post req->rq_reqdata_len is set to the final request message size.
+ *
+ * \see null_ctx_sign(), plain_ctx_sign(), gss_cli_ctx_sign().
+ */
+ int (*sign) (struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req);
+
+ /**
+ * Verify the reply message using \a ctx.
+ *
+ * \pre req->rq_repdata point to reply message with signature.
+ * \pre req->rq_repdata_len is the total reply message length.
+ * \post req->rq_repmsg point to reply message without signature.
+ * \post req->rq_replen is the reply message length.
+ *
+ * \see null_ctx_verify(), plain_ctx_verify(), gss_cli_ctx_verify().
+ */
+ int (*verify) (struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req);
+
+ /**
+ * Encrypt the request message using \a ctx.
+ *
+ * \pre req->rq_reqmsg point to request message in clear text.
+ * \pre req->rq_reqlen is the request message length.
+ * \post req->rq_reqbuf point to request message.
+ * \post req->rq_reqdata_len is set to the final request message size.
+ *
+ * \see gss_cli_ctx_seal().
+ */
+ int (*seal) (struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req);
+
+ /**
+ * Decrypt the reply message using \a ctx.
+ *
+ * \pre req->rq_repdata point to encrypted reply message.
+ * \pre req->rq_repdata_len is the total cipher text length.
+ * \post req->rq_repmsg point to reply message in clear text.
+ * \post req->rq_replen is the reply message length in clear text.
+ *
+ * \see gss_cli_ctx_unseal().
+ */
+ int (*unseal) (struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req);
+
+ /**
+ * Wrap bulk request data. This is called before wrapping RPC
+ * request message.
+ *
+ * \pre bulk buffer is descripted by desc->bd_iov and
+ * desc->bd_iov_count. note for read it's just buffer, no data
+ * need to be sent; for write it contains data in clear text.
+ * \post when necessary, ptlrpc_bulk_sec_desc was properly prepared
+ * (usually inside of RPC request message).
+ * - encryption: cipher text bulk buffer is descripted by
+ * desc->bd_enc_iov and desc->bd_iov_count (currently assume iov
+ * count remains the same).
+ * - otherwise: bulk buffer is still desc->bd_iov and
+ * desc->bd_iov_count.
+ *
+ * \return 0: success.
+ * \return -ev: error code.
+ *
+ * \see plain_cli_wrap_bulk(), gss_cli_ctx_wrap_bulk().
+ */
+ int (*wrap_bulk) (struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+
+ /**
+ * Unwrap bulk reply data. This is called after wrapping RPC
+ * reply message.
+ *
+ * \pre bulk buffer is descripted by desc->bd_iov/desc->bd_enc_iov and
+ * desc->bd_iov_count, according to wrap_bulk().
+ * \post final bulk data in clear text is placed in buffer described
+ * by desc->bd_iov and desc->bd_iov_count.
+ * \return +ve nob of actual bulk data in clear text.
+ * \return -ve error code.
+ *
+ * \see plain_cli_unwrap_bulk(), gss_cli_ctx_unwrap_bulk().
+ */
+ int (*unwrap_bulk) (struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+};
+
+#define PTLRPC_CTX_NEW_BIT (0) /* newly created */
+#define PTLRPC_CTX_UPTODATE_BIT (1) /* uptodate */
+#define PTLRPC_CTX_DEAD_BIT (2) /* mark expired gracefully */
+#define PTLRPC_CTX_ERROR_BIT (3) /* fatal error (refresh, etc.) */
+#define PTLRPC_CTX_CACHED_BIT (8) /* in ctx cache (hash etc.) */
+#define PTLRPC_CTX_ETERNAL_BIT (9) /* always valid */
+
+#define PTLRPC_CTX_NEW (1 << PTLRPC_CTX_NEW_BIT)
+#define PTLRPC_CTX_UPTODATE (1 << PTLRPC_CTX_UPTODATE_BIT)
+#define PTLRPC_CTX_DEAD (1 << PTLRPC_CTX_DEAD_BIT)
+#define PTLRPC_CTX_ERROR (1 << PTLRPC_CTX_ERROR_BIT)
+#define PTLRPC_CTX_CACHED (1 << PTLRPC_CTX_CACHED_BIT)
+#define PTLRPC_CTX_ETERNAL (1 << PTLRPC_CTX_ETERNAL_BIT)
+
+#define PTLRPC_CTX_STATUS_MASK (PTLRPC_CTX_NEW_BIT | \
+ PTLRPC_CTX_UPTODATE | \
+ PTLRPC_CTX_DEAD | \
+ PTLRPC_CTX_ERROR)
+
+struct ptlrpc_cli_ctx {
+ struct hlist_node cc_cache; /* linked into ctx cache */
+ atomic_t cc_refcount;
+ struct ptlrpc_sec *cc_sec;
+ struct ptlrpc_ctx_ops *cc_ops;
+ cfs_time_t cc_expire; /* in seconds */
+ unsigned int cc_early_expire:1;
+ unsigned long cc_flags;
+ struct vfs_cred cc_vcred;
+ spinlock_t cc_lock;
+ struct list_head cc_req_list; /* waiting reqs linked here */
+ struct list_head cc_gc_chain; /* linked to gc chain */
+};
+
+/**
+ * client side policy operation vector.
+ */
+struct ptlrpc_sec_cops {
+ /**
+ * Given an \a imp, create and initialize a ptlrpc_sec structure.
+ * \param ctx service context:
+ * - regular import: \a ctx should be NULL;
+ * - reverse import: \a ctx is obtained from incoming request.
+ * \param flavor specify what flavor to use.
+ *
+ * When necessary, policy module is responsible for taking reference
+ * on the import.
+ *
+ * \see null_create_sec(), plain_create_sec(), gss_sec_create_kr().
+ */
+ struct ptlrpc_sec * (*create_sec) (struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx,
+ struct sptlrpc_flavor *flavor);
+
+ /**
+ * Destructor of ptlrpc_sec. When called, refcount has been dropped
+ * to 0 and all contexts has been destroyed.
+ *
+ * \see null_destroy_sec(), plain_destroy_sec(), gss_sec_destroy_kr().
+ */
+ void (*destroy_sec) (struct ptlrpc_sec *sec);
+
+ /**
+ * Notify that this ptlrpc_sec is going to die. Optionally, policy
+ * module is supposed to set sec->ps_dying and whatever necessary
+ * actions.
+ *
+ * \see plain_kill_sec(), gss_sec_kill().
+ */
+ void (*kill_sec) (struct ptlrpc_sec *sec);
+
+ /**
+ * Given \a vcred, lookup and/or create its context. The policy module
+ * is supposed to maintain its own context cache.
+ * XXX currently \a create and \a remove_dead is always 1, perhaps
+ * should be removed completely.
+ *
+ * \see null_lookup_ctx(), plain_lookup_ctx(), gss_sec_lookup_ctx_kr().
+ */
+ struct ptlrpc_cli_ctx * (*lookup_ctx) (struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred,
+ int create,
+ int remove_dead);
+
+ /**
+ * Called then the reference of \a ctx dropped to 0. The policy module
+ * is supposed to destroy this context or whatever else according to
+ * its cache maintainance mechamism.
+ *
+ * \param sync if zero, we shouldn't wait for the context being
+ * destroyed completely.
+ *
+ * \see plain_release_ctx(), gss_sec_release_ctx_kr().
+ */
+ void (*release_ctx) (struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx,
+ int sync);
+
+ /**
+ * Flush the context cache.
+ *
+ * \param uid context of which user, -1 means all contexts.
+ * \param grace if zero, the PTLRPC_CTX_UPTODATE_BIT of affected
+ * contexts should be cleared immediately.
+ * \param force if zero, only idle contexts will be flushed.
+ *
+ * \see plain_flush_ctx_cache(), gss_sec_flush_ctx_cache_kr().
+ */
+ int (*flush_ctx_cache)
+ (struct ptlrpc_sec *sec,
+ uid_t uid,
+ int grace,
+ int force);
+
+ /**
+ * Called periodically by garbage collector to remove dead contexts
+ * from cache.
+ *
+ * \see gss_sec_gc_ctx_kr().
+ */
+ void (*gc_ctx) (struct ptlrpc_sec *sec);
+
+ /**
+ * Given an context \a ctx, install a corresponding reverse service
+ * context on client side.
+ * XXX currently it's only used by GSS module, maybe we should remove
+ * this from general API.
+ */
+ int (*install_rctx)(struct obd_import *imp,
+ struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx);
+
+ /**
+ * To allocate request buffer for \a req.
+ *
+ * \pre req->rq_reqmsg == NULL.
+ * \pre req->rq_reqbuf == NULL, otherwise it must be pre-allocated,
+ * we are not supposed to free it.
+ * \post if success, req->rq_reqmsg point to a buffer with size
+ * at least \a lustre_msg_size.
+ *
+ * \see null_alloc_reqbuf(), plain_alloc_reqbuf(), gss_alloc_reqbuf().
+ */
+ int (*alloc_reqbuf)(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int lustre_msg_size);
+
+ /**
+ * To free request buffer for \a req.
+ *
+ * \pre req->rq_reqbuf != NULL.
+ *
+ * \see null_free_reqbuf(), plain_free_reqbuf(), gss_free_reqbuf().
+ */
+ void (*free_reqbuf) (struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req);
+
+ /**
+ * To allocate reply buffer for \a req.
+ *
+ * \pre req->rq_repbuf == NULL.
+ * \post if success, req->rq_repbuf point to a buffer with size
+ * req->rq_repbuf_len, the size should be large enough to receive
+ * reply which be transformed from \a lustre_msg_size of clear text.
+ *
+ * \see null_alloc_repbuf(), plain_alloc_repbuf(), gss_alloc_repbuf().
+ */
+ int (*alloc_repbuf)(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int lustre_msg_size);
+
+ /**
+ * To free reply buffer for \a req.
+ *
+ * \pre req->rq_repbuf != NULL.
+ * \post req->rq_repbuf == NULL.
+ * \post req->rq_repbuf_len == 0.
+ *
+ * \see null_free_repbuf(), plain_free_repbuf(), gss_free_repbuf().
+ */
+ void (*free_repbuf) (struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req);
+
+ /**
+ * To expand the request buffer of \a req, thus the \a segment in
+ * the request message pointed by req->rq_reqmsg can accommodate
+ * at least \a newsize of data.
+ *
+ * \pre req->rq_reqmsg->lm_buflens[segment] < newsize.
+ *
+ * \see null_enlarge_reqbuf(), plain_enlarge_reqbuf(),
+ * gss_enlarge_reqbuf().
+ */
+ int (*enlarge_reqbuf)
+ (struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int segment, int newsize);
+ /*
+ * misc
+ */
+ int (*display) (struct ptlrpc_sec *sec,
+ struct seq_file *seq);
+};
+
+/**
+ * server side policy operation vector.
+ */
+struct ptlrpc_sec_sops {
+ /**
+ * verify an incoming request.
+ *
+ * \pre request message is pointed by req->rq_reqbuf, size is
+ * req->rq_reqdata_len; and the message has been unpacked to
+ * host byte order.
+ *
+ * \retval SECSVC_OK success, req->rq_reqmsg point to request message
+ * in clear text, size is req->rq_reqlen; req->rq_svc_ctx is set;
+ * req->rq_sp_from is decoded from request.
+ * \retval SECSVC_COMPLETE success, the request has been fully
+ * processed, and reply message has been prepared; req->rq_sp_from is
+ * decoded from request.
+ * \retval SECSVC_DROP failed, this request should be dropped.
+ *
+ * \see null_accept(), plain_accept(), gss_svc_accept_kr().
+ */
+ int (*accept) (struct ptlrpc_request *req);
+
+ /**
+ * Perform security transformation upon reply message.
+ *
+ * \pre reply message is pointed by req->rq_reply_state->rs_msg, size
+ * is req->rq_replen.
+ * \post req->rs_repdata_len is the final message size.
+ * \post req->rq_reply_off is set.
+ *
+ * \see null_authorize(), plain_authorize(), gss_svc_authorize().
+ */
+ int (*authorize) (struct ptlrpc_request *req);
+
+ /**
+ * Invalidate server context \a ctx.
+ *
+ * \see gss_svc_invalidate_ctx().
+ */
+ void (*invalidate_ctx)
+ (struct ptlrpc_svc_ctx *ctx);
+
+ /**
+ * Allocate a ptlrpc_reply_state.
+ *
+ * \param msgsize size of the reply message in clear text.
+ * \pre if req->rq_reply_state != NULL, then it's pre-allocated, we
+ * should simply use it; otherwise we'll responsible for allocating
+ * a new one.
+ * \post req->rq_reply_state != NULL;
+ * \post req->rq_reply_state->rs_msg != NULL;
+ *
+ * \see null_alloc_rs(), plain_alloc_rs(), gss_svc_alloc_rs().
+ */
+ int (*alloc_rs) (struct ptlrpc_request *req,
+ int msgsize);
+
+ /**
+ * Free a ptlrpc_reply_state.
+ */
+ void (*free_rs) (struct ptlrpc_reply_state *rs);
+
+ /**
+ * Release the server context \a ctx.
+ *
+ * \see gss_svc_free_ctx().
+ */
+ void (*free_ctx) (struct ptlrpc_svc_ctx *ctx);
+
+ /**
+ * Install a reverse context based on the server context \a ctx.
+ *
+ * \see gss_svc_install_rctx_kr().
+ */
+ int (*install_rctx)(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx);
+
+ /**
+ * Prepare buffer for incoming bulk write.
+ *
+ * \pre desc->bd_iov and desc->bd_iov_count describes the buffer
+ * intended to receive the write.
+ *
+ * \see gss_svc_prep_bulk().
+ */
+ int (*prep_bulk) (struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+
+ /**
+ * Unwrap the bulk write data.
+ *
+ * \see plain_svc_unwrap_bulk(), gss_svc_unwrap_bulk().
+ */
+ int (*unwrap_bulk) (struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+
+ /**
+ * Wrap the bulk read data.
+ *
+ * \see plain_svc_wrap_bulk(), gss_svc_wrap_bulk().
+ */
+ int (*wrap_bulk) (struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+};
+
+struct ptlrpc_sec_policy {
+ module_t *sp_owner;
+ char *sp_name;
+ __u16 sp_policy; /* policy number */
+ struct ptlrpc_sec_cops *sp_cops; /* client ops */
+ struct ptlrpc_sec_sops *sp_sops; /* server ops */
+};
+
+#define PTLRPC_SEC_FL_REVERSE 0x0001 /* reverse sec */
+#define PTLRPC_SEC_FL_ROOTONLY 0x0002 /* treat everyone as root */
+#define PTLRPC_SEC_FL_UDESC 0x0004 /* ship udesc */
+#define PTLRPC_SEC_FL_BULK 0x0008 /* intensive bulk i/o expected */
+#define PTLRPC_SEC_FL_PAG 0x0010 /* PAG mode */
+
+/**
+ * The ptlrpc_sec represents the client side ptlrpc security facilities,
+ * each obd_import (both regular and reverse import) must associate with
+ * a ptlrpc_sec.
+ *
+ * \see sptlrpc_import_sec_adapt().
+ */
+struct ptlrpc_sec {
+ struct ptlrpc_sec_policy *ps_policy;
+ atomic_t ps_refcount;
+ /** statistic only */
+ atomic_t ps_nctx;
+ /** unique identifier */
+ int ps_id;
+ struct sptlrpc_flavor ps_flvr;
+ enum lustre_sec_part ps_part;
+ /** after set, no more new context will be created */
+ unsigned int ps_dying:1;
+ /** owning import */
+ struct obd_import *ps_import;
+ spinlock_t ps_lock;
+
+ /*
+ * garbage collection
+ */
+ struct list_head ps_gc_list;
+ cfs_time_t ps_gc_interval; /* in seconds */
+ cfs_time_t ps_gc_next; /* in seconds */
+};
+
+static inline int sec_is_reverse(struct ptlrpc_sec *sec)
+{
+ return (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE);
+}
+
+static inline int sec_is_rootonly(struct ptlrpc_sec *sec)
+{
+ return (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_ROOTONLY);
+}
+
+
+struct ptlrpc_svc_ctx {
+ atomic_t sc_refcount;
+ struct ptlrpc_sec_policy *sc_policy;
+};
+
+/*
+ * user identity descriptor
+ */
+#define LUSTRE_MAX_GROUPS (128)
+
+struct ptlrpc_user_desc {
+ __u32 pud_uid;
+ __u32 pud_gid;
+ __u32 pud_fsuid;
+ __u32 pud_fsgid;
+ __u32 pud_cap;
+ __u32 pud_ngroups;
+ __u32 pud_groups[0];
+};
+
+/*
+ * bulk flavors
+ */
+enum sptlrpc_bulk_hash_alg {
+ BULK_HASH_ALG_NULL = 0,
+ BULK_HASH_ALG_ADLER32,
+ BULK_HASH_ALG_CRC32,
+ BULK_HASH_ALG_MD5,
+ BULK_HASH_ALG_SHA1,
+ BULK_HASH_ALG_SHA256,
+ BULK_HASH_ALG_SHA384,
+ BULK_HASH_ALG_SHA512,
+ BULK_HASH_ALG_MAX
+};
+
+const char * sptlrpc_get_hash_name(__u8 hash_alg);
+__u8 sptlrpc_get_hash_alg(const char *algname);
+
+enum {
+ BSD_FL_ERR = 1,
+};
+
+struct ptlrpc_bulk_sec_desc {
+ __u8 bsd_version; /* 0 */
+ __u8 bsd_type; /* SPTLRPC_BULK_XXX */
+ __u8 bsd_svc; /* SPTLRPC_BULK_SVC_XXXX */
+ __u8 bsd_flags; /* flags */
+ __u32 bsd_nob; /* nob of bulk data */
+ __u8 bsd_data[0]; /* policy-specific token */
+};
+
+
+/*
+ * lprocfs
+ */
+struct proc_dir_entry;
+extern struct proc_dir_entry *sptlrpc_proc_root;
+
+/*
+ * round size up to next power of 2, for slab allocation.
+ * @size must be sane (can't overflow after round up)
+ */
+static inline int size_roundup_power2(int size)
+{
+ size--;
+ size |= size >> 1;
+ size |= size >> 2;
+ size |= size >> 4;
+ size |= size >> 8;
+ size |= size >> 16;
+ size++;
+ return size;
+}
+
+/*
+ * internal support libraries
+ */
+void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
+ int segment, int newsize);
+
+/*
+ * security policies
+ */
+int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy);
+int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy);
+
+__u32 sptlrpc_name2flavor_base(const char *name);
+const char *sptlrpc_flavor2name_base(__u32 flvr);
+char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf,
+ char *buf, int bufsize);
+char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize);
+char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize);
+
+static inline
+struct ptlrpc_sec_policy *sptlrpc_policy_get(struct ptlrpc_sec_policy *policy)
+{
+ __module_get(policy->sp_owner);
+ return policy;
+}
+
+static inline
+void sptlrpc_policy_put(struct ptlrpc_sec_policy *policy)
+{
+ module_put(policy->sp_owner);
+}
+
+/*
+ * client credential
+ */
+static inline
+unsigned long cli_ctx_status(struct ptlrpc_cli_ctx *ctx)
+{
+ return (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK);
+}
+
+static inline
+int cli_ctx_is_ready(struct ptlrpc_cli_ctx *ctx)
+{
+ return (cli_ctx_status(ctx) == PTLRPC_CTX_UPTODATE);
+}
+
+static inline
+int cli_ctx_is_refreshed(struct ptlrpc_cli_ctx *ctx)
+{
+ return (cli_ctx_status(ctx) != 0);
+}
+
+static inline
+int cli_ctx_is_uptodate(struct ptlrpc_cli_ctx *ctx)
+{
+ return ((ctx->cc_flags & PTLRPC_CTX_UPTODATE) != 0);
+}
+
+static inline
+int cli_ctx_is_error(struct ptlrpc_cli_ctx *ctx)
+{
+ return ((ctx->cc_flags & PTLRPC_CTX_ERROR) != 0);
+}
+
+static inline
+int cli_ctx_is_dead(struct ptlrpc_cli_ctx *ctx)
+{
+ return ((ctx->cc_flags & (PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR)) != 0);
+}
+
+static inline
+int cli_ctx_is_eternal(struct ptlrpc_cli_ctx *ctx)
+{
+ return ((ctx->cc_flags & PTLRPC_CTX_ETERNAL) != 0);
+}
+
+/*
+ * sec get/put
+ */
+struct ptlrpc_sec *sptlrpc_sec_get(struct ptlrpc_sec *sec);
+void sptlrpc_sec_put(struct ptlrpc_sec *sec);
+
+/*
+ * internal apis which only used by policy impelentation
+ */
+int sptlrpc_get_next_secid(void);
+void sptlrpc_sec_destroy(struct ptlrpc_sec *sec);
+
+/*
+ * exported client context api
+ */
+struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx);
+void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync);
+void sptlrpc_cli_ctx_expire(struct ptlrpc_cli_ctx *ctx);
+void sptlrpc_cli_ctx_wakeup(struct ptlrpc_cli_ctx *ctx);
+int sptlrpc_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize);
+
+/*
+ * exported client context wrap/buffers
+ */
+int sptlrpc_cli_wrap_request(struct ptlrpc_request *req);
+int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req);
+int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize);
+void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req);
+int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize);
+void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req);
+int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req,
+ int segment, int newsize);
+int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
+ struct ptlrpc_request **req_ret);
+void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req);
+
+void sptlrpc_request_out_callback(struct ptlrpc_request *req);
+
+/*
+ * exported higher interface of import & request
+ */
+int sptlrpc_import_sec_adapt(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx,
+ struct sptlrpc_flavor *flvr);
+struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp);
+void sptlrpc_import_sec_put(struct obd_import *imp);
+
+int sptlrpc_import_check_ctx(struct obd_import *imp);
+void sptlrpc_import_flush_root_ctx(struct obd_import *imp);
+void sptlrpc_import_flush_my_ctx(struct obd_import *imp);
+void sptlrpc_import_flush_all_ctx(struct obd_import *imp);
+int sptlrpc_req_get_ctx(struct ptlrpc_request *req);
+void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync);
+int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout);
+int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req);
+void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode);
+
+int sptlrpc_parse_rule(char *param, struct sptlrpc_rule *rule);
+
+/* gc */
+void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec);
+void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec);
+void sptlrpc_gc_add_ctx(struct ptlrpc_cli_ctx *ctx);
+
+/* misc */
+const char * sec2target_str(struct ptlrpc_sec *sec);
+int sptlrpc_lprocfs_cliobd_attach(struct obd_device *dev);
+
+/*
+ * server side
+ */
+enum secsvc_accept_res {
+ SECSVC_OK = 0,
+ SECSVC_COMPLETE,
+ SECSVC_DROP,
+};
+
+int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req);
+int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen);
+int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req);
+void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs);
+void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req);
+void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req);
+void sptlrpc_svc_ctx_invalidate(struct ptlrpc_request *req);
+
+int sptlrpc_target_export_check(struct obd_export *exp,
+ struct ptlrpc_request *req);
+void sptlrpc_target_update_exp_flavor(struct obd_device *obd,
+ struct sptlrpc_rule_set *rset);
+
+/*
+ * reverse context
+ */
+int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx);
+int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
+ struct ptlrpc_cli_ctx *ctx);
+
+/* bulk security api */
+int sptlrpc_enc_pool_add_user(void);
+int sptlrpc_enc_pool_del_user(void);
+int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc);
+void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc);
+
+int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc,
+ int nob);
+int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+
+/* bulk helpers (internal use only by policies) */
+int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
+ void *buf, int buflen);
+
+int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed);
+
+/* user descriptor helpers */
+static inline int sptlrpc_user_desc_size(int ngroups)
+{
+ return sizeof(struct ptlrpc_user_desc) + ngroups * sizeof(__u32);
+}
+
+int sptlrpc_current_user_desc_size(void);
+int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset);
+int sptlrpc_unpack_user_desc(struct lustre_msg *req, int offset, int swabbed);
+
+
+#define CFS_CAP_CHOWN_MASK (1 << CFS_CAP_CHOWN)
+#define CFS_CAP_SYS_RESOURCE_MASK (1 << CFS_CAP_SYS_RESOURCE)
+
+enum {
+ LUSTRE_SEC_NONE = 0,
+ LUSTRE_SEC_REMOTE = 1,
+ LUSTRE_SEC_SPECIFY = 2,
+ LUSTRE_SEC_ALL = 3
+};
+
+/** @} sptlrpc */
+
+#endif /* _LUSTRE_SEC_H_ */
diff --git a/drivers/staging/lustre/lustre/include/lustre_update.h b/drivers/staging/lustre/lustre/include/lustre_update.h
new file mode 100644
index 000000000000..84defce0f623
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_update.h
@@ -0,0 +1,189 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.htm
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2013, Intel Corporation.
+ */
+/*
+ * lustre/include/lustre_update.h
+ *
+ * Author: Di Wang <di.wang@intel.com>
+ */
+
+#ifndef _LUSTRE_UPDATE_H
+#define _LUSTRE_UPDATE_H
+
+#define UPDATE_BUFFER_SIZE 8192
+struct update_request {
+ struct dt_device *ur_dt;
+ struct list_head ur_list; /* attached itself to thandle */
+ int ur_flags;
+ int ur_rc; /* request result */
+ int ur_batchid; /* Current batch(trans) id */
+ struct update_buf *ur_buf; /* Holding the update req */
+};
+
+static inline unsigned long update_size(struct update *update)
+{
+ unsigned long size;
+ int i;
+
+ size = cfs_size_round(offsetof(struct update, u_bufs[0]));
+ for (i = 0; i < UPDATE_BUF_COUNT; i++)
+ size += cfs_size_round(update->u_lens[i]);
+
+ return size;
+}
+
+static inline void *update_param_buf(struct update *update, int index,
+ int *size)
+{
+ int i;
+ void *ptr;
+
+ if (index >= UPDATE_BUF_COUNT)
+ return NULL;
+
+ ptr = (char *)update + cfs_size_round(offsetof(struct update,
+ u_bufs[0]));
+ for (i = 0; i < index; i++) {
+ LASSERT(update->u_lens[i] > 0);
+ ptr += cfs_size_round(update->u_lens[i]);
+ }
+
+ if (size != NULL)
+ *size = update->u_lens[index];
+
+ return ptr;
+}
+
+static inline unsigned long update_buf_size(struct update_buf *buf)
+{
+ unsigned long size;
+ int i = 0;
+
+ size = cfs_size_round(offsetof(struct update_buf, ub_bufs[0]));
+ for (i = 0; i < buf->ub_count; i++) {
+ struct update *update;
+
+ update = (struct update *)((char *)buf + size);
+ size += update_size(update);
+ }
+ LASSERT(size <= UPDATE_BUFFER_SIZE);
+ return size;
+}
+
+static inline void *update_buf_get(struct update_buf *buf, int index, int *size)
+{
+ int count = buf->ub_count;
+ void *ptr;
+ int i = 0;
+
+ if (index >= count)
+ return NULL;
+
+ ptr = (char *)buf + cfs_size_round(offsetof(struct update_buf,
+ ub_bufs[0]));
+ for (i = 0; i < index; i++)
+ ptr += update_size((struct update *)ptr);
+
+ if (size != NULL)
+ *size = update_size((struct update *)ptr);
+
+ return ptr;
+}
+
+static inline void update_init_reply_buf(struct update_reply *reply, int count)
+{
+ reply->ur_version = UPDATE_REPLY_V1;
+ reply->ur_count = count;
+}
+
+static inline void *update_get_buf_internal(struct update_reply *reply,
+ int index, int *size)
+{
+ char *ptr;
+ int count = reply->ur_count;
+ int i;
+
+ if (index >= count)
+ return NULL;
+
+ ptr = (char *)reply + cfs_size_round(offsetof(struct update_reply,
+ ur_lens[count]));
+ for (i = 0; i < index; i++) {
+ LASSERT(reply->ur_lens[i] > 0);
+ ptr += cfs_size_round(reply->ur_lens[i]);
+ }
+
+ if (size != NULL)
+ *size = reply->ur_lens[index];
+
+ return ptr;
+}
+
+static inline void update_insert_reply(struct update_reply *reply, void *data,
+ int data_len, int index, int rc)
+{
+ char *ptr;
+
+ ptr = update_get_buf_internal(reply, index, NULL);
+ LASSERT(ptr != NULL);
+
+ *(int *)ptr = cpu_to_le32(rc);
+ ptr += sizeof(int);
+ if (data_len > 0) {
+ LASSERT(data != NULL);
+ memcpy(ptr, data, data_len);
+ }
+ reply->ur_lens[index] = data_len + sizeof(int);
+}
+
+static inline int update_get_reply_buf(struct update_reply *reply, void **buf,
+ int index)
+{
+ char *ptr;
+ int size = 0;
+ int result;
+
+ ptr = update_get_buf_internal(reply, index, &size);
+ result = *(int *)ptr;
+
+ if (result < 0)
+ return result;
+
+ LASSERT((ptr != NULL && size >= sizeof(int)));
+ *buf = ptr + sizeof(int);
+ return size - sizeof(int);
+}
+
+static inline int update_get_reply_result(struct update_reply *reply,
+ void **buf, int index)
+{
+ void *ptr;
+ int size;
+
+ ptr = update_get_buf_internal(reply, index, &size);
+ LASSERT(ptr != NULL && size > sizeof(int));
+ return *(int *)ptr;
+}
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_ver.h b/drivers/staging/lustre/lustre/include/lustre_ver.h
new file mode 100644
index 000000000000..dc187b8f741f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_ver.h
@@ -0,0 +1,24 @@
+#ifndef _LUSTRE_VER_H_
+#define _LUSTRE_VER_H_
+/* This file automatically generated from lustre/include/lustre_ver.h.in,
+ * based on parameters in lustre/autoconf/lustre-version.ac.
+ * Changes made directly to this file will be lost. */
+
+#define LUSTRE_MAJOR 2
+#define LUSTRE_MINOR 3
+#define LUSTRE_PATCH 64
+#define LUSTRE_FIX 0
+#define LUSTRE_VERSION_STRING "2.3.64"
+
+#define LUSTRE_VERSION_CODE OBD_OCD_VERSION(LUSTRE_MAJOR,LUSTRE_MINOR,LUSTRE_PATCH,LUSTRE_FIX)
+
+/* liblustre clients are only allowed to connect if their LUSTRE_FIX mismatches
+ * by this amount (set in lustre/autoconf/lustre-version.ac). */
+#define LUSTRE_VERSION_ALLOWED_OFFSET OBD_OCD_VERSION(0, 0, 1, 32)
+
+/* If lustre version of client and servers it connects to differs by more
+ * than this amount, client would issue a warning.
+ * (set in lustre/autoconf/lustre-version.ac) */
+#define LUSTRE_VERSION_OFFSET_WARN OBD_OCD_VERSION(0, 4, 0, 0)
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lvfs.h b/drivers/staging/lustre/lustre/include/lvfs.h
new file mode 100644
index 000000000000..28f1a6b76f73
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lvfs.h
@@ -0,0 +1,57 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lvfs.h
+ *
+ * lustre VFS/process permission interface
+ */
+
+#ifndef __LVFS_H__
+#define __LVFS_H__
+
+#define LL_FID_NAMELEN (16 + 1 + 8 + 1)
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lvfs.h>
+
+#include <linux/libcfs/lucache.h>
+
+
+/* lvfs_common.c */
+struct dentry *lvfs_fid2dentry(struct lvfs_run_ctxt *, __u64, __u32, __u64 ,void *data);
+
+void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
+ struct lvfs_ucred *cred);
+void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
+ struct lvfs_ucred *cred);
+#endif
diff --git a/drivers/staging/lustre/lustre/include/md_object.h b/drivers/staging/lustre/lustre/include/md_object.h
new file mode 100644
index 000000000000..92d6420b21da
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/md_object.h
@@ -0,0 +1,908 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/md_object.h
+ *
+ * Extention of lu_object.h for metadata objects
+ */
+
+#ifndef _LUSTRE_MD_OBJECT_H
+#define _LUSTRE_MD_OBJECT_H
+
+/** \defgroup md md
+ * Sub-class of lu_object with methods common for "meta-data" objects in MDT
+ * stack.
+ *
+ * Meta-data objects implement namespace operations: you can link, unlink
+ * them, and treat them as directories.
+ *
+ * Examples: mdt, cmm, and mdt are implementations of md interface.
+ * @{
+ */
+
+
+/*
+ * super-class definitions.
+ */
+#include <dt_object.h>
+
+struct md_device;
+struct md_device_operations;
+struct md_object;
+struct obd_export;
+
+enum {
+ UCRED_INVALID = -1,
+ UCRED_INIT = 0,
+ UCRED_OLD = 1,
+ UCRED_NEW = 2
+};
+
+enum {
+ MD_CAPAINFO_MAX = 5
+};
+
+/** there are at most 5 fids in one operation, see rename, NOTE the last one
+ * is a temporary one used for is_subdir() */
+struct md_capainfo {
+ __u32 mc_auth;
+ __u32 mc_padding;
+ struct lu_fid mc_fid[MD_CAPAINFO_MAX];
+ struct lustre_capa *mc_capa[MD_CAPAINFO_MAX];
+};
+
+struct md_quota {
+ struct obd_export *mq_exp;
+};
+
+/**
+ * Implemented in mdd/mdd_handler.c.
+ *
+ * XXX should be moved into separate .h/.c together with all md security
+ * related definitions.
+ */
+struct md_capainfo *md_capainfo(const struct lu_env *env);
+struct md_quota *md_quota(const struct lu_env *env);
+
+/** metadata attributes */
+enum ma_valid {
+ MA_INODE = (1 << 0),
+ MA_LOV = (1 << 1),
+ MA_COOKIE = (1 << 2),
+ MA_FLAGS = (1 << 3),
+ MA_LMV = (1 << 4),
+ MA_ACL_DEF = (1 << 5),
+ MA_LOV_DEF = (1 << 6),
+ MA_LAY_GEN = (1 << 7),
+ MA_HSM = (1 << 8),
+ MA_SOM = (1 << 9),
+ MA_PFID = (1 << 10)
+};
+
+typedef enum {
+ MDL_MINMODE = 0,
+ MDL_EX = 1,
+ MDL_PW = 2,
+ MDL_PR = 4,
+ MDL_CW = 8,
+ MDL_CR = 16,
+ MDL_NL = 32,
+ MDL_GROUP = 64,
+ MDL_MAXMODE
+} mdl_mode_t;
+
+typedef enum {
+ MDT_NUL_LOCK = 0,
+ MDT_REG_LOCK = (1 << 0),
+ MDT_PDO_LOCK = (1 << 1)
+} mdl_type_t;
+
+/* memory structure for hsm attributes
+ * for fields description see the on disk structure hsm_attrs
+ * which is defined in lustre_idl.h
+ */
+struct md_hsm {
+ __u32 mh_compat;
+ __u32 mh_flags;
+ __u64 mh_arch_id;
+ __u64 mh_arch_ver;
+};
+
+#define IOEPOCH_INVAL 0
+
+/* memory structure for som attributes
+ * for fields description see the on disk structure som_attrs
+ * which is defined in lustre_idl.h
+ */
+struct md_som_data {
+ __u32 msd_compat;
+ __u32 msd_incompat;
+ __u64 msd_ioepoch;
+ __u64 msd_size;
+ __u64 msd_blocks;
+ __u64 msd_mountid;
+};
+
+struct md_attr {
+ __u64 ma_valid;
+ __u64 ma_need;
+ __u64 ma_attr_flags;
+ struct lu_attr ma_attr;
+ struct lu_fid ma_pfid;
+ struct md_hsm ma_hsm;
+ struct lov_mds_md *ma_lmm;
+ struct lmv_stripe_md *ma_lmv;
+ void *ma_acl;
+ struct llog_cookie *ma_cookie;
+ struct lustre_capa *ma_capa;
+ struct md_som_data *ma_som;
+ int ma_lmm_size;
+ int ma_lmv_size;
+ int ma_acl_size;
+ int ma_cookie_size;
+ __u16 ma_layout_gen;
+};
+
+/** Additional parameters for create */
+struct md_op_spec {
+ union {
+ /** symlink target */
+ const char *sp_symname;
+ /** parent FID for cross-ref mkdir */
+ const struct lu_fid *sp_pfid;
+ /** eadata for regular files */
+ struct md_spec_reg {
+ /** lov objs exist already */
+ const struct lu_fid *fid;
+ const void *eadata;
+ int eadatalen;
+ } sp_ea;
+ } u;
+
+ /** Create flag from client: such as MDS_OPEN_CREAT, and others. */
+ __u64 sp_cr_flags;
+
+ /** don't create lov objects or llog cookie - this replay */
+ unsigned int no_create:1,
+ sp_cr_lookup:1, /* do lookup sanity check or not. */
+ sp_rm_entry:1; /* only remove name entry */
+
+ /** Current lock mode for parent dir where create is performing. */
+ mdl_mode_t sp_cr_mode;
+
+ /** to create directory */
+ const struct dt_index_features *sp_feat;
+};
+
+/**
+ * Operations implemented for each md object (both directory and leaf).
+ */
+struct md_object_operations {
+ int (*moo_permission)(const struct lu_env *env,
+ struct md_object *pobj, struct md_object *cobj,
+ struct md_attr *attr, int mask);
+
+ int (*moo_attr_get)(const struct lu_env *env, struct md_object *obj,
+ struct md_attr *attr);
+
+ int (*moo_attr_set)(const struct lu_env *env, struct md_object *obj,
+ const struct md_attr *attr);
+
+ int (*moo_xattr_get)(const struct lu_env *env, struct md_object *obj,
+ struct lu_buf *buf, const char *name);
+
+ int (*moo_xattr_list)(const struct lu_env *env, struct md_object *obj,
+ struct lu_buf *buf);
+
+ int (*moo_xattr_set)(const struct lu_env *env, struct md_object *obj,
+ const struct lu_buf *buf, const char *name,
+ int fl);
+
+ int (*moo_xattr_del)(const struct lu_env *env, struct md_object *obj,
+ const char *name);
+
+ /** This method is used to swap the layouts between 2 objects */
+ int (*moo_swap_layouts)(const struct lu_env *env,
+ struct md_object *obj1, struct md_object *obj2,
+ __u64 flags);
+
+ /** \retval number of bytes actually read upon success */
+ int (*moo_readpage)(const struct lu_env *env, struct md_object *obj,
+ const struct lu_rdpg *rdpg);
+
+ int (*moo_readlink)(const struct lu_env *env, struct md_object *obj,
+ struct lu_buf *buf);
+ int (*moo_changelog)(const struct lu_env *env,
+ enum changelog_rec_type type, int flags,
+ struct md_object *obj);
+ /** part of cross-ref operation */
+ int (*moo_object_create)(const struct lu_env *env,
+ struct md_object *obj,
+ const struct md_op_spec *spec,
+ struct md_attr *ma);
+
+ int (*moo_ref_add)(const struct lu_env *env,
+ struct md_object *obj,
+ const struct md_attr *ma);
+
+ int (*moo_ref_del)(const struct lu_env *env,
+ struct md_object *obj,
+ struct md_attr *ma);
+
+ int (*moo_open)(const struct lu_env *env,
+ struct md_object *obj, int flag);
+
+ int (*moo_close)(const struct lu_env *env, struct md_object *obj,
+ struct md_attr *ma, int mode);
+
+ int (*moo_capa_get)(const struct lu_env *, struct md_object *,
+ struct lustre_capa *, int renewal);
+
+ int (*moo_object_sync)(const struct lu_env *, struct md_object *);
+
+ int (*moo_file_lock)(const struct lu_env *env, struct md_object *obj,
+ struct lov_mds_md *lmm, struct ldlm_extent *extent,
+ struct lustre_handle *lockh);
+ int (*moo_file_unlock)(const struct lu_env *env, struct md_object *obj,
+ struct lov_mds_md *lmm,
+ struct lustre_handle *lockh);
+ int (*moo_object_lock)(const struct lu_env *env, struct md_object *obj,
+ struct lustre_handle *lh,
+ struct ldlm_enqueue_info *einfo,
+ void *policy);
+};
+
+/**
+ * Operations implemented for each directory object.
+ */
+struct md_dir_operations {
+ int (*mdo_is_subdir) (const struct lu_env *env, struct md_object *obj,
+ const struct lu_fid *fid, struct lu_fid *sfid);
+
+ int (*mdo_lookup)(const struct lu_env *env, struct md_object *obj,
+ const struct lu_name *lname, struct lu_fid *fid,
+ struct md_op_spec *spec);
+
+ mdl_mode_t (*mdo_lock_mode)(const struct lu_env *env,
+ struct md_object *obj,
+ mdl_mode_t mode);
+
+ int (*mdo_create)(const struct lu_env *env, struct md_object *pobj,
+ const struct lu_name *lname, struct md_object *child,
+ struct md_op_spec *spec,
+ struct md_attr *ma);
+
+ /** This method is used for creating data object for this meta object*/
+ int (*mdo_create_data)(const struct lu_env *env, struct md_object *p,
+ struct md_object *o,
+ const struct md_op_spec *spec,
+ struct md_attr *ma);
+
+ int (*mdo_rename)(const struct lu_env *env, struct md_object *spobj,
+ struct md_object *tpobj, const struct lu_fid *lf,
+ const struct lu_name *lsname, struct md_object *tobj,
+ const struct lu_name *ltname, struct md_attr *ma);
+
+ int (*mdo_link)(const struct lu_env *env, struct md_object *tgt_obj,
+ struct md_object *src_obj, const struct lu_name *lname,
+ struct md_attr *ma);
+
+ int (*mdo_unlink)(const struct lu_env *env, struct md_object *pobj,
+ struct md_object *cobj, const struct lu_name *lname,
+ struct md_attr *ma, int no_name);
+
+ /** This method is used to compare a requested layout to an existing
+ * layout (struct lov_mds_md_v1/3 vs struct lov_mds_md_v1/3) */
+ int (*mdo_lum_lmm_cmp)(const struct lu_env *env,
+ struct md_object *cobj,
+ const struct md_op_spec *spec,
+ struct md_attr *ma);
+
+ /** partial ops for cross-ref case */
+ int (*mdo_name_insert)(const struct lu_env *env,
+ struct md_object *obj,
+ const struct lu_name *lname,
+ const struct lu_fid *fid,
+ const struct md_attr *ma);
+
+ int (*mdo_name_remove)(const struct lu_env *env,
+ struct md_object *obj,
+ const struct lu_name *lname,
+ const struct md_attr *ma);
+
+ int (*mdo_rename_tgt)(const struct lu_env *env, struct md_object *pobj,
+ struct md_object *tobj, const struct lu_fid *fid,
+ const struct lu_name *lname, struct md_attr *ma);
+};
+
+struct md_device_operations {
+ /** meta-data device related handlers. */
+ int (*mdo_root_get)(const struct lu_env *env, struct md_device *m,
+ struct lu_fid *f);
+
+ int (*mdo_maxsize_get)(const struct lu_env *env, struct md_device *m,
+ int *md_size, int *cookie_size);
+
+ int (*mdo_statfs)(const struct lu_env *env, struct md_device *m,
+ struct obd_statfs *sfs);
+
+ int (*mdo_init_capa_ctxt)(const struct lu_env *env, struct md_device *m,
+ int mode, unsigned long timeout, __u32 alg,
+ struct lustre_capa_key *keys);
+
+ int (*mdo_update_capa_key)(const struct lu_env *env,
+ struct md_device *m,
+ struct lustre_capa_key *key);
+
+ int (*mdo_llog_ctxt_get)(const struct lu_env *env,
+ struct md_device *m, int idx, void **h);
+
+ int (*mdo_iocontrol)(const struct lu_env *env, struct md_device *m,
+ unsigned int cmd, int len, void *data);
+};
+
+enum md_upcall_event {
+ /** Sync the md layer*/
+ MD_LOV_SYNC = (1 << 0),
+ /** Just for split, no need trans, for replay */
+ MD_NO_TRANS = (1 << 1),
+ MD_LOV_CONFIG = (1 << 2),
+ /** Trigger quota recovery */
+ MD_LOV_QUOTA = (1 << 3)
+};
+
+struct md_upcall {
+ /** this lock protects upcall using against its removal
+ * read lock is for usage the upcall, write - for init/fini */
+ struct rw_semaphore mu_upcall_sem;
+ /** device to call, upper layer normally */
+ struct md_device *mu_upcall_dev;
+ /** upcall function */
+ int (*mu_upcall)(const struct lu_env *env, struct md_device *md,
+ enum md_upcall_event ev, void *data);
+};
+
+struct md_device {
+ struct lu_device md_lu_dev;
+ const struct md_device_operations *md_ops;
+ struct md_upcall md_upcall;
+};
+
+static inline void md_upcall_init(struct md_device *m, void *upcl)
+{
+ init_rwsem(&m->md_upcall.mu_upcall_sem);
+ m->md_upcall.mu_upcall_dev = NULL;
+ m->md_upcall.mu_upcall = upcl;
+}
+
+static inline void md_upcall_dev_set(struct md_device *m, struct md_device *up)
+{
+ down_write(&m->md_upcall.mu_upcall_sem);
+ m->md_upcall.mu_upcall_dev = up;
+ up_write(&m->md_upcall.mu_upcall_sem);
+}
+
+static inline void md_upcall_fini(struct md_device *m)
+{
+ down_write(&m->md_upcall.mu_upcall_sem);
+ m->md_upcall.mu_upcall_dev = NULL;
+ m->md_upcall.mu_upcall = NULL;
+ up_write(&m->md_upcall.mu_upcall_sem);
+}
+
+static inline int md_do_upcall(const struct lu_env *env, struct md_device *m,
+ enum md_upcall_event ev, void *data)
+{
+ int rc = 0;
+ down_read(&m->md_upcall.mu_upcall_sem);
+ if (m->md_upcall.mu_upcall_dev != NULL &&
+ m->md_upcall.mu_upcall_dev->md_upcall.mu_upcall != NULL) {
+ rc = m->md_upcall.mu_upcall_dev->md_upcall.mu_upcall(env,
+ m->md_upcall.mu_upcall_dev,
+ ev, data);
+ }
+ up_read(&m->md_upcall.mu_upcall_sem);
+ return rc;
+}
+
+struct md_object {
+ struct lu_object mo_lu;
+ const struct md_object_operations *mo_ops;
+ const struct md_dir_operations *mo_dir_ops;
+};
+
+/**
+ * seq-server site.
+ */
+struct seq_server_site {
+ struct lu_site *ss_lu;
+ /**
+ * mds number of this site.
+ */
+ mdsno_t ss_node_id;
+ /**
+ * Fid location database
+ */
+ struct lu_server_fld *ss_server_fld;
+ struct lu_client_fld *ss_client_fld;
+
+ /**
+ * Server Seq Manager
+ */
+ struct lu_server_seq *ss_server_seq;
+
+ /**
+ * Controller Seq Manager
+ */
+ struct lu_server_seq *ss_control_seq;
+ struct obd_export *ss_control_exp;
+
+ /**
+ * Client Seq Manager
+ */
+ struct lu_client_seq *ss_client_seq;
+};
+
+static inline struct md_device *lu2md_dev(const struct lu_device *d)
+{
+ LASSERT(IS_ERR(d) || lu_device_is_md(d));
+ return container_of0(d, struct md_device, md_lu_dev);
+}
+
+static inline struct lu_device *md2lu_dev(struct md_device *d)
+{
+ return &d->md_lu_dev;
+}
+
+static inline struct md_object *lu2md(const struct lu_object *o)
+{
+ LASSERT(o == NULL || IS_ERR(o) || lu_device_is_md(o->lo_dev));
+ return container_of0(o, struct md_object, mo_lu);
+}
+
+static inline struct md_object *md_object_next(const struct md_object *obj)
+{
+ return (obj ? lu2md(lu_object_next(&obj->mo_lu)) : NULL);
+}
+
+static inline struct md_device *md_obj2dev(const struct md_object *o)
+{
+ LASSERT(o == NULL || IS_ERR(o) || lu_device_is_md(o->mo_lu.lo_dev));
+ return container_of0(o->mo_lu.lo_dev, struct md_device, md_lu_dev);
+}
+
+static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
+{
+ return s->ld_seq_site;
+}
+
+static inline int md_device_init(struct md_device *md, struct lu_device_type *t)
+{
+ return lu_device_init(&md->md_lu_dev, t);
+}
+
+static inline void md_device_fini(struct md_device *md)
+{
+ lu_device_fini(&md->md_lu_dev);
+}
+
+static inline struct md_object *md_object_find_slice(const struct lu_env *env,
+ struct md_device *md,
+ const struct lu_fid *f)
+{
+ return lu2md(lu_object_find_slice(env, md2lu_dev(md), f, NULL));
+}
+
+
+/** md operations */
+static inline int mo_permission(const struct lu_env *env,
+ struct md_object *p,
+ struct md_object *c,
+ struct md_attr *at,
+ int mask)
+{
+ LASSERT(c->mo_ops->moo_permission);
+ return c->mo_ops->moo_permission(env, p, c, at, mask);
+}
+
+static inline int mo_attr_get(const struct lu_env *env,
+ struct md_object *m,
+ struct md_attr *at)
+{
+ LASSERT(m->mo_ops->moo_attr_get);
+ return m->mo_ops->moo_attr_get(env, m, at);
+}
+
+static inline int mo_readlink(const struct lu_env *env,
+ struct md_object *m,
+ struct lu_buf *buf)
+{
+ LASSERT(m->mo_ops->moo_readlink);
+ return m->mo_ops->moo_readlink(env, m, buf);
+}
+
+static inline int mo_changelog(const struct lu_env *env,
+ enum changelog_rec_type type,
+ int flags, struct md_object *m)
+{
+ LASSERT(m->mo_ops->moo_changelog);
+ return m->mo_ops->moo_changelog(env, type, flags, m);
+}
+
+static inline int mo_attr_set(const struct lu_env *env,
+ struct md_object *m,
+ const struct md_attr *at)
+{
+ LASSERT(m->mo_ops->moo_attr_set);
+ return m->mo_ops->moo_attr_set(env, m, at);
+}
+
+static inline int mo_xattr_get(const struct lu_env *env,
+ struct md_object *m,
+ struct lu_buf *buf,
+ const char *name)
+{
+ LASSERT(m->mo_ops->moo_xattr_get);
+ return m->mo_ops->moo_xattr_get(env, m, buf, name);
+}
+
+static inline int mo_xattr_del(const struct lu_env *env,
+ struct md_object *m,
+ const char *name)
+{
+ LASSERT(m->mo_ops->moo_xattr_del);
+ return m->mo_ops->moo_xattr_del(env, m, name);
+}
+
+static inline int mo_xattr_set(const struct lu_env *env,
+ struct md_object *m,
+ const struct lu_buf *buf,
+ const char *name,
+ int flags)
+{
+ LASSERT(m->mo_ops->moo_xattr_set);
+ return m->mo_ops->moo_xattr_set(env, m, buf, name, flags);
+}
+
+static inline int mo_xattr_list(const struct lu_env *env,
+ struct md_object *m,
+ struct lu_buf *buf)
+{
+ LASSERT(m->mo_ops->moo_xattr_list);
+ return m->mo_ops->moo_xattr_list(env, m, buf);
+}
+
+static inline int mo_swap_layouts(const struct lu_env *env,
+ struct md_object *o1,
+ struct md_object *o2, __u64 flags)
+{
+ LASSERT(o1->mo_ops->moo_swap_layouts);
+ LASSERT(o2->mo_ops->moo_swap_layouts);
+ if (o1->mo_ops->moo_swap_layouts != o2->mo_ops->moo_swap_layouts)
+ return -EPERM;
+ return o1->mo_ops->moo_swap_layouts(env, o1, o2, flags);
+}
+
+static inline int mo_open(const struct lu_env *env,
+ struct md_object *m,
+ int flags)
+{
+ LASSERT(m->mo_ops->moo_open);
+ return m->mo_ops->moo_open(env, m, flags);
+}
+
+static inline int mo_close(const struct lu_env *env,
+ struct md_object *m,
+ struct md_attr *ma,
+ int mode)
+{
+ LASSERT(m->mo_ops->moo_close);
+ return m->mo_ops->moo_close(env, m, ma, mode);
+}
+
+static inline int mo_readpage(const struct lu_env *env,
+ struct md_object *m,
+ const struct lu_rdpg *rdpg)
+{
+ LASSERT(m->mo_ops->moo_readpage);
+ return m->mo_ops->moo_readpage(env, m, rdpg);
+}
+
+static inline int mo_object_create(const struct lu_env *env,
+ struct md_object *m,
+ const struct md_op_spec *spc,
+ struct md_attr *at)
+{
+ LASSERT(m->mo_ops->moo_object_create);
+ return m->mo_ops->moo_object_create(env, m, spc, at);
+}
+
+static inline int mo_ref_add(const struct lu_env *env,
+ struct md_object *m,
+ const struct md_attr *ma)
+{
+ LASSERT(m->mo_ops->moo_ref_add);
+ return m->mo_ops->moo_ref_add(env, m, ma);
+}
+
+static inline int mo_ref_del(const struct lu_env *env,
+ struct md_object *m,
+ struct md_attr *ma)
+{
+ LASSERT(m->mo_ops->moo_ref_del);
+ return m->mo_ops->moo_ref_del(env, m, ma);
+}
+
+static inline int mo_capa_get(const struct lu_env *env,
+ struct md_object *m,
+ struct lustre_capa *c,
+ int renewal)
+{
+ LASSERT(m->mo_ops->moo_capa_get);
+ return m->mo_ops->moo_capa_get(env, m, c, renewal);
+}
+
+static inline int mo_object_sync(const struct lu_env *env, struct md_object *m)
+{
+ LASSERT(m->mo_ops->moo_object_sync);
+ return m->mo_ops->moo_object_sync(env, m);
+}
+
+static inline int mo_file_lock(const struct lu_env *env, struct md_object *m,
+ struct lov_mds_md *lmm,
+ struct ldlm_extent *extent,
+ struct lustre_handle *lockh)
+{
+ LASSERT(m->mo_ops->moo_file_lock);
+ return m->mo_ops->moo_file_lock(env, m, lmm, extent, lockh);
+}
+
+static inline int mo_file_unlock(const struct lu_env *env, struct md_object *m,
+ struct lov_mds_md *lmm,
+ struct lustre_handle *lockh)
+{
+ LASSERT(m->mo_ops->moo_file_unlock);
+ return m->mo_ops->moo_file_unlock(env, m, lmm, lockh);
+}
+
+static inline int mo_object_lock(const struct lu_env *env,
+ struct md_object *m,
+ struct lustre_handle *lh,
+ struct ldlm_enqueue_info *einfo,
+ void *policy)
+{
+ LASSERT(m->mo_ops->moo_object_lock);
+ return m->mo_ops->moo_object_lock(env, m, lh, einfo, policy);
+}
+
+static inline int mdo_lookup(const struct lu_env *env,
+ struct md_object *p,
+ const struct lu_name *lname,
+ struct lu_fid *f,
+ struct md_op_spec *spec)
+{
+ LASSERT(p->mo_dir_ops->mdo_lookup);
+ return p->mo_dir_ops->mdo_lookup(env, p, lname, f, spec);
+}
+
+static inline mdl_mode_t mdo_lock_mode(const struct lu_env *env,
+ struct md_object *mo,
+ mdl_mode_t lm)
+{
+ if (mo->mo_dir_ops->mdo_lock_mode == NULL)
+ return MDL_MINMODE;
+ return mo->mo_dir_ops->mdo_lock_mode(env, mo, lm);
+}
+
+static inline int mdo_create(const struct lu_env *env,
+ struct md_object *p,
+ const struct lu_name *lchild_name,
+ struct md_object *c,
+ struct md_op_spec *spc,
+ struct md_attr *at)
+{
+ LASSERT(p->mo_dir_ops->mdo_create);
+ return p->mo_dir_ops->mdo_create(env, p, lchild_name, c, spc, at);
+}
+
+static inline int mdo_create_data(const struct lu_env *env,
+ struct md_object *p,
+ struct md_object *c,
+ const struct md_op_spec *spec,
+ struct md_attr *ma)
+{
+ LASSERT(c->mo_dir_ops->mdo_create_data);
+ return c->mo_dir_ops->mdo_create_data(env, p, c, spec, ma);
+}
+
+static inline int mdo_rename(const struct lu_env *env,
+ struct md_object *sp,
+ struct md_object *tp,
+ const struct lu_fid *lf,
+ const struct lu_name *lsname,
+ struct md_object *t,
+ const struct lu_name *ltname,
+ struct md_attr *ma)
+{
+ LASSERT(tp->mo_dir_ops->mdo_rename);
+ return tp->mo_dir_ops->mdo_rename(env, sp, tp, lf, lsname, t, ltname,
+ ma);
+}
+
+static inline int mdo_is_subdir(const struct lu_env *env,
+ struct md_object *mo,
+ const struct lu_fid *fid,
+ struct lu_fid *sfid)
+{
+ LASSERT(mo->mo_dir_ops->mdo_is_subdir);
+ return mo->mo_dir_ops->mdo_is_subdir(env, mo, fid, sfid);
+}
+
+static inline int mdo_link(const struct lu_env *env,
+ struct md_object *p,
+ struct md_object *s,
+ const struct lu_name *lname,
+ struct md_attr *ma)
+{
+ LASSERT(s->mo_dir_ops->mdo_link);
+ return s->mo_dir_ops->mdo_link(env, p, s, lname, ma);
+}
+
+static inline int mdo_unlink(const struct lu_env *env,
+ struct md_object *p,
+ struct md_object *c,
+ const struct lu_name *lname,
+ struct md_attr *ma, int no_name)
+{
+ LASSERT(p->mo_dir_ops->mdo_unlink);
+ return p->mo_dir_ops->mdo_unlink(env, p, c, lname, ma, no_name);
+}
+
+static inline int mdo_lum_lmm_cmp(const struct lu_env *env,
+ struct md_object *c,
+ const struct md_op_spec *spec,
+ struct md_attr *ma)
+{
+ LASSERT(c->mo_dir_ops->mdo_lum_lmm_cmp);
+ return c->mo_dir_ops->mdo_lum_lmm_cmp(env, c, spec, ma);
+}
+
+static inline int mdo_name_insert(const struct lu_env *env,
+ struct md_object *p,
+ const struct lu_name *lname,
+ const struct lu_fid *f,
+ const struct md_attr *ma)
+{
+ LASSERT(p->mo_dir_ops->mdo_name_insert);
+ return p->mo_dir_ops->mdo_name_insert(env, p, lname, f, ma);
+}
+
+static inline int mdo_name_remove(const struct lu_env *env,
+ struct md_object *p,
+ const struct lu_name *lname,
+ const struct md_attr *ma)
+{
+ LASSERT(p->mo_dir_ops->mdo_name_remove);
+ return p->mo_dir_ops->mdo_name_remove(env, p, lname, ma);
+}
+
+static inline int mdo_rename_tgt(const struct lu_env *env,
+ struct md_object *p,
+ struct md_object *t,
+ const struct lu_fid *lf,
+ const struct lu_name *lname,
+ struct md_attr *ma)
+{
+ if (t) {
+ LASSERT(t->mo_dir_ops->mdo_rename_tgt);
+ return t->mo_dir_ops->mdo_rename_tgt(env, p, t, lf, lname, ma);
+ } else {
+ LASSERT(p->mo_dir_ops->mdo_rename_tgt);
+ return p->mo_dir_ops->mdo_rename_tgt(env, p, t, lf, lname, ma);
+ }
+}
+
+/**
+ * Used in MDD/OUT layer for object lock rule
+ **/
+enum mdd_object_role {
+ MOR_SRC_PARENT,
+ MOR_SRC_CHILD,
+ MOR_TGT_PARENT,
+ MOR_TGT_CHILD,
+ MOR_TGT_ORPHAN
+};
+
+struct dt_device;
+/**
+ * Structure to hold object information. This is used to create object
+ * \pre llod_dir exist
+ */
+struct lu_local_obj_desc {
+ const char *llod_dir;
+ const char *llod_name;
+ __u32 llod_oid;
+ int llod_is_index;
+ const struct dt_index_features *llod_feat;
+ struct list_head llod_linkage;
+};
+
+int lustre_buf2som(void *buf, int rc, struct md_som_data *msd);
+int lustre_buf2hsm(void *buf, int rc, struct md_hsm *mh);
+void lustre_hsm2buf(void *buf, struct md_hsm *mh);
+
+struct lu_ucred {
+ __u32 uc_valid;
+ __u32 uc_o_uid;
+ __u32 uc_o_gid;
+ __u32 uc_o_fsuid;
+ __u32 uc_o_fsgid;
+ __u32 uc_uid;
+ __u32 uc_gid;
+ __u32 uc_fsuid;
+ __u32 uc_fsgid;
+ __u32 uc_suppgids[2];
+ cfs_cap_t uc_cap;
+ __u32 uc_umask;
+ group_info_t *uc_ginfo;
+ struct md_identity *uc_identity;
+};
+
+struct lu_ucred *lu_ucred(const struct lu_env *env);
+
+struct lu_ucred *lu_ucred_check(const struct lu_env *env);
+
+struct lu_ucred *lu_ucred_assert(const struct lu_env *env);
+
+int lu_ucred_global_init(void);
+
+void lu_ucred_global_fini(void);
+
+#define md_cap_t(x) (x)
+
+#define MD_CAP_TO_MASK(x) (1 << (x))
+
+#define md_cap_raised(c, flag) (md_cap_t(c) & MD_CAP_TO_MASK(flag))
+
+/* capable() is copied from linux kernel! */
+static inline int md_capable(struct lu_ucred *uc, cfs_cap_t cap)
+{
+ if (md_cap_raised(uc->uc_cap, cap))
+ return 1;
+ return 0;
+}
+
+/** @} md */
+#endif /* _LINUX_MD_OBJECT_H */
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
new file mode 100644
index 000000000000..0a251fdfe167
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -0,0 +1,1677 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __OBD_H
+#define __OBD_H
+
+#include <linux/obd.h>
+
+#define IOC_OSC_TYPE 'h'
+#define IOC_OSC_MIN_NR 20
+#define IOC_OSC_SET_ACTIVE _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
+#define IOC_OSC_MAX_NR 50
+
+#define IOC_MDC_TYPE 'i'
+#define IOC_MDC_MIN_NR 20
+#define IOC_MDC_MAX_NR 50
+
+#include <lustre/lustre_idl.h>
+#include <lu_ref.h>
+#include <lustre_lib.h>
+#include <lustre_export.h>
+#include <lustre_fld.h>
+#include <lustre_capa.h>
+
+#include <linux/libcfs/bitmap.h>
+
+
+#define MAX_OBD_DEVICES 8192
+
+struct osc_async_rc {
+ int ar_rc;
+ int ar_force_sync;
+ __u64 ar_min_xid;
+};
+
+struct lov_oinfo { /* per-stripe data structure */
+ struct ost_id loi_oi; /* object ID/Sequence on the target OST */
+ int loi_ost_idx; /* OST stripe index in lov_tgt_desc->tgts */
+ int loi_ost_gen; /* generation of this loi_ost_idx */
+
+ unsigned long loi_kms_valid:1;
+ __u64 loi_kms; /* known minimum size */
+ struct ost_lvb loi_lvb;
+ struct osc_async_rc loi_ar;
+};
+
+static inline void loi_kms_set(struct lov_oinfo *oinfo, __u64 kms)
+{
+ oinfo->loi_kms = kms;
+ oinfo->loi_kms_valid = 1;
+}
+
+static inline void loi_init(struct lov_oinfo *loi)
+{
+}
+
+struct lov_stripe_md {
+ atomic_t lsm_refc;
+ spinlock_t lsm_lock;
+ pid_t lsm_lock_owner; /* debugging */
+
+ /* maximum possible file size, might change as OSTs status changes,
+ * e.g. disconnected, deactivated */
+ __u64 lsm_maxbytes;
+ struct {
+ /* Public members. */
+ struct ost_id lw_object_oi; /* lov object id/seq */
+
+ /* LOV-private members start here -- only for use in lov/. */
+ __u32 lw_magic;
+ __u32 lw_stripe_size; /* size of the stripe */
+ __u32 lw_pattern; /* striping pattern (RAID0, RAID1) */
+ __u16 lw_stripe_count; /* number of objects being striped over */
+ __u16 lw_layout_gen; /* generation of the layout */
+ char lw_pool_name[LOV_MAXPOOLNAME]; /* pool name */
+ } lsm_wire;
+
+ struct lov_oinfo *lsm_oinfo[0];
+};
+
+#define lsm_oi lsm_wire.lw_object_oi
+#define lsm_magic lsm_wire.lw_magic
+#define lsm_layout_gen lsm_wire.lw_layout_gen
+#define lsm_stripe_size lsm_wire.lw_stripe_size
+#define lsm_pattern lsm_wire.lw_pattern
+#define lsm_stripe_count lsm_wire.lw_stripe_count
+#define lsm_pool_name lsm_wire.lw_pool_name
+
+struct obd_info;
+
+typedef int (*obd_enqueue_update_f)(void *cookie, int rc);
+
+/* obd info for a particular level (lov, osc). */
+struct obd_info {
+ /* Lock policy. It keeps an extent which is specific for a particular
+ * OSC. (e.g. lov_prep_enqueue_set initialises extent of the policy,
+ * and osc_enqueue passes it into ldlm_lock_match & ldlm_cli_enqueue. */
+ ldlm_policy_data_t oi_policy;
+ /* Flags used for set request specific flags:
+ - while lock handling, the flags obtained on the enqueue
+ request are set here.
+ - while stats, the flags used for control delay/resend.
+ - while setattr, the flags used for distinguish punch operation
+ */
+ __u64 oi_flags;
+ /* Lock handle specific for every OSC lock. */
+ struct lustre_handle *oi_lockh;
+ /* lsm data specific for every OSC. */
+ struct lov_stripe_md *oi_md;
+ /* obdo data specific for every OSC, if needed at all. */
+ struct obdo *oi_oa;
+ /* statfs data specific for every OSC, if needed at all. */
+ struct obd_statfs *oi_osfs;
+ /* An update callback which is called to update some data on upper
+ * level. E.g. it is used for update lsm->lsm_oinfo at every recieved
+ * request in osc level for enqueue requests. It is also possible to
+ * update some caller data from LOV layer if needed. */
+ obd_enqueue_update_f oi_cb_up;
+ /* oss capability, its type is obd_capa in client to avoid copy.
+ * in contrary its type is lustre_capa in OSS. */
+ void *oi_capa;
+ /* transfer jobid from ost_sync() to filter_sync()... */
+ char *oi_jobid;
+};
+
+/* compare all relevant fields. */
+static inline int lov_stripe_md_cmp(struct lov_stripe_md *m1,
+ struct lov_stripe_md *m2)
+{
+ /*
+ * ->lsm_wire contains padding, but it should be zeroed out during
+ * allocation.
+ */
+ return memcmp(&m1->lsm_wire, &m2->lsm_wire, sizeof m1->lsm_wire);
+}
+
+static inline int lov_lum_lsm_cmp(struct lov_user_md *lum,
+ struct lov_stripe_md *lsm)
+{
+ if (lsm->lsm_magic != lum->lmm_magic)
+ return 1;
+ if ((lsm->lsm_stripe_count != 0) && (lum->lmm_stripe_count != 0) &&
+ (lsm->lsm_stripe_count != lum->lmm_stripe_count))
+ return 2;
+ if ((lsm->lsm_stripe_size != 0) && (lum->lmm_stripe_size != 0) &&
+ (lsm->lsm_stripe_size != lum->lmm_stripe_size))
+ return 3;
+ if ((lsm->lsm_pattern != 0) && (lum->lmm_pattern != 0) &&
+ (lsm->lsm_pattern != lum->lmm_pattern))
+ return 4;
+ if ((lsm->lsm_magic == LOV_MAGIC_V3) &&
+ (strncmp(lsm->lsm_pool_name,
+ ((struct lov_user_md_v3 *)lum)->lmm_pool_name,
+ LOV_MAXPOOLNAME) != 0))
+ return 5;
+ return 0;
+}
+
+static inline int lov_lum_swab_if_needed(struct lov_user_md_v3 *lumv3,
+ int *lmm_magic,
+ struct lov_user_md *lum)
+{
+ if (lum && copy_from_user(lumv3, lum,sizeof(struct lov_user_md_v1)))
+ return -EFAULT;
+
+ *lmm_magic = lumv3->lmm_magic;
+
+ if (*lmm_magic == __swab32(LOV_USER_MAGIC_V1)) {
+ lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lumv3);
+ *lmm_magic = LOV_USER_MAGIC_V1;
+ } else if (*lmm_magic == LOV_USER_MAGIC_V3) {
+ if (lum && copy_from_user(lumv3, lum, sizeof(*lumv3)))
+ return -EFAULT;
+ } else if (*lmm_magic == __swab32(LOV_USER_MAGIC_V3)) {
+ if (lum && copy_from_user(lumv3, lum, sizeof(*lumv3)))
+ return -EFAULT;
+ lustre_swab_lov_user_md_v3(lumv3);
+ *lmm_magic = LOV_USER_MAGIC_V3;
+ } else if (*lmm_magic != LOV_USER_MAGIC_V1) {
+ CDEBUG(D_IOCTL,
+ "bad userland LOV MAGIC: %#08x != %#08x nor %#08x\n",
+ *lmm_magic, LOV_USER_MAGIC_V1, LOV_USER_MAGIC_V3);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void lov_stripe_lock(struct lov_stripe_md *md);
+void lov_stripe_unlock(struct lov_stripe_md *md);
+
+struct obd_type {
+ struct list_head typ_chain;
+ struct obd_ops *typ_dt_ops;
+ struct md_ops *typ_md_ops;
+ proc_dir_entry_t *typ_procroot;
+ char *typ_name;
+ int typ_refcnt;
+ struct lu_device_type *typ_lu;
+ spinlock_t obd_type_lock;
+};
+
+struct brw_page {
+ obd_off off;
+ struct page *pg;
+ int count;
+ obd_flag flag;
+};
+
+/* Individual type definitions */
+
+struct ost_server_data;
+
+struct osd_properties {
+ size_t osd_max_ea_size;
+};
+
+#define OBT_MAGIC 0xBDDECEAE
+/* hold common fields for "target" device */
+struct obd_device_target {
+ __u32 obt_magic;
+ __u32 obt_instance;
+ struct super_block *obt_sb;
+ /** last_rcvd file */
+ struct file *obt_rcvd_filp;
+ __u64 obt_mount_count;
+ struct rw_semaphore obt_rwsem;
+ struct vfsmount *obt_vfsmnt;
+ struct file *obt_health_check_filp;
+ struct osd_properties obt_osd_properties;
+ struct obd_job_stats obt_jobstats;
+};
+
+/* llog contexts */
+enum llog_ctxt_id {
+ LLOG_CONFIG_ORIG_CTXT = 0,
+ LLOG_CONFIG_REPL_CTXT,
+ LLOG_MDS_OST_ORIG_CTXT,
+ LLOG_MDS_OST_REPL_CTXT,
+ LLOG_SIZE_ORIG_CTXT,
+ LLOG_SIZE_REPL_CTXT,
+ LLOG_RD1_ORIG_CTXT,
+ LLOG_RD1_REPL_CTXT,
+ LLOG_TEST_ORIG_CTXT,
+ LLOG_TEST_REPL_CTXT,
+ LLOG_LOVEA_ORIG_CTXT,
+ LLOG_LOVEA_REPL_CTXT,
+ LLOG_CHANGELOG_ORIG_CTXT, /**< changelog generation on mdd */
+ LLOG_CHANGELOG_REPL_CTXT, /**< changelog access on clients */
+ LLOG_CHANGELOG_USER_ORIG_CTXT, /**< for multiple changelog consumers */
+ LLOG_MAX_CTXTS
+};
+
+#define FILTER_SUBDIR_COUNT 32 /* set to zero for no subdirs */
+
+struct filter_subdirs {
+ struct dentry *dentry[FILTER_SUBDIR_COUNT];
+};
+
+
+struct filter_ext {
+ __u64 fe_start;
+ __u64 fe_end;
+};
+
+struct filter_obd {
+ /* NB this field MUST be first */
+ struct obd_device_target fo_obt;
+ const char *fo_fstype;
+
+ int fo_group_count;
+ struct dentry *fo_dentry_O;
+ struct dentry **fo_dentry_O_groups;
+ struct filter_subdirs *fo_dentry_O_sub;
+ struct mutex fo_init_lock; /* group initialization lock*/
+ int fo_committed_group;
+
+ spinlock_t fo_objidlock; /* protect fo_lastobjid */
+
+ unsigned long fo_destroys_in_progress;
+ struct mutex fo_create_locks[FILTER_SUBDIR_COUNT];
+
+ struct list_head fo_export_list;
+ int fo_subdir_count;
+
+ obd_size fo_tot_dirty; /* protected by obd_osfs_lock */
+ obd_size fo_tot_granted; /* all values in bytes */
+ obd_size fo_tot_pending;
+ int fo_tot_granted_clients;
+
+ obd_size fo_readcache_max_filesize;
+ spinlock_t fo_flags_lock;
+ unsigned int fo_read_cache:1, /**< enable read-only cache */
+ fo_writethrough_cache:1,/**< read cache writes */
+ fo_mds_ost_sync:1, /**< MDS-OST orphan recovery*/
+ fo_raid_degraded:1;/**< RAID device degraded */
+
+ struct obd_import *fo_mdc_imp;
+ struct obd_uuid fo_mdc_uuid;
+ struct lustre_handle fo_mdc_conn;
+ struct file **fo_last_objid_files;
+ __u64 *fo_last_objids; /* last created objid for groups,
+ * protected by fo_objidlock */
+
+ struct mutex fo_alloc_lock;
+
+ atomic_t fo_r_in_flight;
+ atomic_t fo_w_in_flight;
+
+ /*
+ * per-filter pool of kiobuf's allocated by filter_common_setup() and
+ * torn down by filter_cleanup().
+ *
+ * This pool contains kiobuf used by
+ * filter_{prep,commit}rw_{read,write}() and is shared by all OST
+ * threads.
+ *
+ * Locking: protected by internal lock of cfs_hash, pool can be
+ * found from this hash table by t_id of ptlrpc_thread.
+ */
+ struct cfs_hash *fo_iobuf_hash;
+
+ struct brw_stats fo_filter_stats;
+
+ int fo_fmd_max_num; /* per exp filter_mod_data */
+ int fo_fmd_max_age; /* jiffies to fmd expiry */
+ unsigned long fo_syncjournal:1, /* sync journal on writes */
+ fo_sync_lock_cancel:2;/* sync on lock cancel */
+
+
+ /* sptlrpc stuff */
+ rwlock_t fo_sptlrpc_lock;
+ struct sptlrpc_rule_set fo_sptlrpc_rset;
+
+ /* capability related */
+ unsigned int fo_fl_oss_capa;
+ struct list_head fo_capa_keys;
+ struct hlist_head *fo_capa_hash;
+ int fo_sec_level;
+};
+
+struct timeout_item {
+ enum timeout_event ti_event;
+ cfs_time_t ti_timeout;
+ timeout_cb_t ti_cb;
+ void *ti_cb_data;
+ struct list_head ti_obd_list;
+ struct list_head ti_chain;
+};
+
+#define OSC_MAX_RIF_DEFAULT 8
+#define MDS_OSC_MAX_RIF_DEFAULT 50
+#define OSC_MAX_RIF_MAX 256
+#define OSC_MAX_DIRTY_DEFAULT (OSC_MAX_RIF_DEFAULT * 4)
+#define OSC_MAX_DIRTY_MB_MAX 2048 /* arbitrary, but < MAX_LONG bytes */
+#define OSC_DEFAULT_RESENDS 10
+
+/* possible values for fo_sync_lock_cancel */
+enum {
+ NEVER_SYNC_ON_CANCEL = 0,
+ BLOCKING_SYNC_ON_CANCEL = 1,
+ ALWAYS_SYNC_ON_CANCEL = 2,
+ NUM_SYNC_ON_CANCEL_STATES
+};
+
+#define MDC_MAX_RIF_DEFAULT 8
+#define MDC_MAX_RIF_MAX 512
+
+struct mdc_rpc_lock;
+struct obd_import;
+struct client_obd {
+ struct rw_semaphore cl_sem;
+ struct obd_uuid cl_target_uuid;
+ struct obd_import *cl_import; /* ptlrpc connection state */
+ int cl_conn_count;
+ /* max_mds_easize is purely a performance thing so we don't have to
+ * call obd_size_diskmd() all the time. */
+ int cl_default_mds_easize;
+ int cl_max_mds_easize;
+ int cl_max_mds_cookiesize;
+
+ enum lustre_sec_part cl_sp_me;
+ enum lustre_sec_part cl_sp_to;
+ struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */
+
+ /* the grant values are protected by loi_list_lock below */
+ long cl_dirty; /* all _dirty_ in bytes */
+ long cl_dirty_max; /* allowed w/o rpc */
+ long cl_dirty_transit; /* dirty synchronous */
+ long cl_avail_grant; /* bytes of credit for ost */
+ long cl_lost_grant; /* lost credits (trunc) */
+
+ /* since we allocate grant by blocks, we don't know how many grant will
+ * be used to add a page into cache. As a solution, we reserve maximum
+ * grant before trying to dirty a page and unreserve the rest.
+ * See osc_{reserve|unreserve}_grant for details. */
+ long cl_reserved_grant;
+ struct list_head cl_cache_waiters; /* waiting for cache/grant */
+ cfs_time_t cl_next_shrink_grant; /* jiffies */
+ struct list_head cl_grant_shrink_list; /* Timeout event list */
+ int cl_grant_shrink_interval; /* seconds */
+
+ /* A chunk is an optimal size used by osc_extent to determine
+ * the extent size. A chunk is max(PAGE_CACHE_SIZE, OST block size) */
+ int cl_chunkbits;
+ int cl_chunk;
+ int cl_extent_tax; /* extent overhead, by bytes */
+
+ /* keep track of objects that have lois that contain pages which
+ * have been queued for async brw. this lock also protects the
+ * lists of osc_client_pages that hang off of the loi */
+ /*
+ * ->cl_loi_list_lock protects consistency of
+ * ->cl_loi_{ready,read,write}_list. ->ap_make_ready() and
+ * ->ap_completion() call-backs are executed under this lock. As we
+ * cannot guarantee that these call-backs never block on all platforms
+ * (as a matter of fact they do block on Mac OS X), type of
+ * ->cl_loi_list_lock is platform dependent: it's a spin-lock on Linux
+ * and blocking mutex on Mac OS X. (Alternative is to make this lock
+ * blocking everywhere, but we don't want to slow down fast-path of
+ * our main platform.)
+ *
+ * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together
+ * with client_obd_list_{un,}lock() and
+ * client_obd_list_lock_{init,done}() functions.
+ *
+ * NB by Jinshan: though field names are still _loi_, but actually
+ * osc_object{}s are in the list.
+ */
+ client_obd_lock_t cl_loi_list_lock;
+ struct list_head cl_loi_ready_list;
+ struct list_head cl_loi_hp_ready_list;
+ struct list_head cl_loi_write_list;
+ struct list_head cl_loi_read_list;
+ int cl_r_in_flight;
+ int cl_w_in_flight;
+ /* just a sum of the loi/lop pending numbers to be exported by /proc */
+ atomic_t cl_pending_w_pages;
+ atomic_t cl_pending_r_pages;
+ __u32 cl_max_pages_per_rpc;
+ int cl_max_rpcs_in_flight;
+ struct obd_histogram cl_read_rpc_hist;
+ struct obd_histogram cl_write_rpc_hist;
+ struct obd_histogram cl_read_page_hist;
+ struct obd_histogram cl_write_page_hist;
+ struct obd_histogram cl_read_offset_hist;
+ struct obd_histogram cl_write_offset_hist;
+
+ /* lru for osc caching pages */
+ struct cl_client_cache *cl_cache;
+ struct list_head cl_lru_osc; /* member of cl_cache->ccc_lru */
+ atomic_t *cl_lru_left;
+ atomic_t cl_lru_busy;
+ atomic_t cl_lru_shrinkers;
+ atomic_t cl_lru_in_list;
+ struct list_head cl_lru_list; /* lru page list */
+ client_obd_lock_t cl_lru_list_lock; /* page list protector */
+
+ /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
+ atomic_t cl_destroy_in_flight;
+ wait_queue_head_t cl_destroy_waitq;
+
+ struct mdc_rpc_lock *cl_rpc_lock;
+ struct mdc_rpc_lock *cl_close_lock;
+
+ /* mgc datastruct */
+ struct semaphore cl_mgc_sem;
+ struct vfsmount *cl_mgc_vfsmnt;
+ struct dentry *cl_mgc_configs_dir;
+ atomic_t cl_mgc_refcount;
+ struct obd_export *cl_mgc_mgsexp;
+
+ /* checksumming for data sent over the network */
+ unsigned int cl_checksum:1; /* 0 = disabled, 1 = enabled */
+ /* supported checksum types that are worked out at connect time */
+ __u32 cl_supp_cksum_types;
+ /* checksum algorithm to be used */
+ cksum_type_t cl_cksum_type;
+
+ /* also protected by the poorly named _loi_list_lock lock above */
+ struct osc_async_rc cl_ar;
+
+ /* used by quotacheck when the servers are older than 2.4 */
+ int cl_qchk_stat; /* quotacheck stat of the peer */
+#define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */
+#if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 7, 50, 0)
+#warning "please consider removing quotacheck compatibility code"
+#endif
+
+ /* sequence manager */
+ struct lu_client_seq *cl_seq;
+
+ atomic_t cl_resends; /* resend count */
+
+ /* ptlrpc work for writeback in ptlrpcd context */
+ void *cl_writeback_work;
+ /* hash tables for osc_quota_info */
+ cfs_hash_t *cl_quota_hash[MAXQUOTAS];
+};
+#define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
+
+struct obd_id_info {
+ __u32 idx;
+ obd_id *data;
+};
+
+/* */
+
+struct echo_obd {
+ struct obd_device_target eo_obt;
+ struct obdo eo_oa;
+ spinlock_t eo_lock;
+ __u64 eo_lastino;
+ struct lustre_handle eo_nl_lock;
+ atomic_t eo_prep;
+};
+
+struct ost_obd {
+ struct ptlrpc_service *ost_service;
+ struct ptlrpc_service *ost_create_service;
+ struct ptlrpc_service *ost_io_service;
+ struct ptlrpc_service *ost_seq_service;
+ struct mutex ost_health_mutex;
+};
+
+struct echo_client_obd {
+ struct obd_export *ec_exp; /* the local connection to osc/lov */
+ spinlock_t ec_lock;
+ struct list_head ec_objects;
+ struct list_head ec_locks;
+ int ec_nstripes;
+ __u64 ec_unique;
+};
+
+struct lov_qos_oss {
+ struct obd_uuid lqo_uuid; /* ptlrpc's c_remote_uuid */
+ struct list_head lqo_oss_list; /* link to lov_qos */
+ __u64 lqo_bavail; /* total bytes avail on OSS */
+ __u64 lqo_penalty; /* current penalty */
+ __u64 lqo_penalty_per_obj;/* penalty decrease every obj*/
+ time_t lqo_used; /* last used time, seconds */
+ __u32 lqo_ost_count; /* number of osts on this oss */
+};
+
+struct ltd_qos {
+ struct lov_qos_oss *ltq_oss; /* oss info */
+ __u64 ltq_penalty; /* current penalty */
+ __u64 ltq_penalty_per_obj; /* penalty decrease every obj*/
+ __u64 ltq_weight; /* net weighting */
+ time_t ltq_used; /* last used time, seconds */
+ unsigned int ltq_usable:1; /* usable for striping */
+};
+
+/* Generic subset of OSTs */
+struct ost_pool {
+ __u32 *op_array; /* array of index of
+ lov_obd->lov_tgts */
+ unsigned int op_count; /* number of OSTs in the array */
+ unsigned int op_size; /* allocated size of lp_array */
+ struct rw_semaphore op_rw_sem; /* to protect ost_pool use */
+};
+
+/* Round-robin allocator data */
+struct lov_qos_rr {
+ __u32 lqr_start_idx; /* start index of new inode */
+ __u32 lqr_offset_idx; /* aliasing for start_idx */
+ int lqr_start_count; /* reseed counter */
+ struct ost_pool lqr_pool; /* round-robin optimized list */
+ unsigned long lqr_dirty:1; /* recalc round-robin list */
+};
+
+/* allow statfs data caching for 1 second */
+#define OBD_STATFS_CACHE_SECONDS 1
+
+struct lov_statfs_data {
+ struct obd_info lsd_oi;
+ struct obd_statfs lsd_statfs;
+};
+/* Stripe placement optimization */
+struct lov_qos {
+ struct list_head lq_oss_list; /* list of OSSs that targets use */
+ struct rw_semaphore lq_rw_sem;
+ __u32 lq_active_oss_count;
+ unsigned int lq_prio_free; /* priority for free space */
+ unsigned int lq_threshold_rr;/* priority for rr */
+ struct lov_qos_rr lq_rr; /* round robin qos data */
+ unsigned long lq_dirty:1, /* recalc qos data */
+ lq_same_space:1,/* the ost's all have approx.
+ the same space avail */
+ lq_reset:1, /* zero current penalties */
+ lq_statfs_in_progress:1; /* statfs op in
+ progress */
+ /* qos statfs data */
+ struct lov_statfs_data *lq_statfs_data;
+ wait_queue_head_t lq_statfs_waitq; /* waitqueue to notify statfs
+ * requests completion */
+};
+
+struct lov_tgt_desc {
+ struct list_head ltd_kill;
+ struct obd_uuid ltd_uuid;
+ struct obd_device *ltd_obd;
+ struct obd_export *ltd_exp;
+ struct ltd_qos ltd_qos; /* qos info per target */
+ __u32 ltd_gen;
+ __u32 ltd_index; /* index in lov_obd->tgts */
+ unsigned long ltd_active:1,/* is this target up for requests */
+ ltd_activate:1,/* should target be activated */
+ ltd_reap:1; /* should this target be deleted */
+};
+
+/* Pool metadata */
+#define pool_tgt_size(_p) _p->pool_obds.op_size
+#define pool_tgt_count(_p) _p->pool_obds.op_count
+#define pool_tgt_array(_p) _p->pool_obds.op_array
+#define pool_tgt_rw_sem(_p) _p->pool_obds.op_rw_sem
+
+struct pool_desc {
+ char pool_name[LOV_MAXPOOLNAME + 1]; /* name of pool */
+ struct ost_pool pool_obds; /* pool members */
+ atomic_t pool_refcount; /* pool ref. counter */
+ struct lov_qos_rr pool_rr; /* round robin qos */
+ struct hlist_node pool_hash; /* access by poolname */
+ struct list_head pool_list; /* serial access */
+ proc_dir_entry_t *pool_proc_entry; /* file in /proc */
+ struct obd_device *pool_lobd; /* obd of the lov/lod to which
+ * this pool belongs */
+};
+
+struct lov_obd {
+ struct lov_desc desc;
+ struct lov_tgt_desc **lov_tgts; /* sparse array */
+ struct ost_pool lov_packed; /* all OSTs in a packed
+ array */
+ struct mutex lov_lock;
+ struct obd_connect_data lov_ocd;
+ atomic_t lov_refcount;
+ __u32 lov_tgt_count; /* how many OBD's */
+ __u32 lov_active_tgt_count; /* how many active */
+ __u32 lov_death_row;/* tgts scheduled to be deleted */
+ __u32 lov_tgt_size; /* size of tgts array */
+ int lov_connects;
+ int lov_pool_count;
+ cfs_hash_t *lov_pools_hash_body; /* used for key access */
+ struct list_head lov_pool_list; /* used for sequential access */
+ proc_dir_entry_t *lov_pool_proc_entry;
+ enum lustre_sec_part lov_sp_me;
+
+ /* Cached LRU pages from upper layer */
+ void *lov_cache;
+
+ struct rw_semaphore lov_notify_lock;
+};
+
+struct lmv_tgt_desc {
+ struct obd_uuid ltd_uuid;
+ struct obd_export *ltd_exp;
+ int ltd_idx;
+ struct mutex ltd_fid_mutex;
+ unsigned long ltd_active:1; /* target up for requests */
+};
+
+enum placement_policy {
+ PLACEMENT_CHAR_POLICY = 0,
+ PLACEMENT_NID_POLICY = 1,
+ PLACEMENT_INVAL_POLICY = 2,
+ PLACEMENT_MAX_POLICY
+};
+
+typedef enum placement_policy placement_policy_t;
+
+struct lmv_obd {
+ int refcount;
+ struct lu_client_fld lmv_fld;
+ spinlock_t lmv_lock;
+ placement_policy_t lmv_placement;
+ struct lmv_desc desc;
+ struct obd_uuid cluuid;
+ struct obd_export *exp;
+
+ struct mutex init_mutex;
+ int connected;
+ int max_easize;
+ int max_def_easize;
+ int max_cookiesize;
+ int server_timeout;
+
+ int tgts_size; /* size of tgts array */
+ struct lmv_tgt_desc **tgts;
+
+ struct obd_connect_data conn_data;
+};
+
+struct niobuf_local {
+ __u64 lnb_file_offset;
+ __u32 lnb_page_offset;
+ __u32 len;
+ __u32 flags;
+ struct page *page;
+ struct dentry *dentry;
+ int lnb_grant_used;
+ int rc;
+};
+
+#define LUSTRE_FLD_NAME "fld"
+#define LUSTRE_SEQ_NAME "seq"
+
+#define LUSTRE_MDD_NAME "mdd"
+#define LUSTRE_OSD_LDISKFS_NAME "osd-ldiskfs"
+#define LUSTRE_OSD_ZFS_NAME "osd-zfs"
+#define LUSTRE_VVP_NAME "vvp"
+#define LUSTRE_LMV_NAME "lmv"
+#define LUSTRE_SLP_NAME "slp"
+#define LUSTRE_LOD_NAME "lod"
+#define LUSTRE_OSP_NAME "osp"
+#define LUSTRE_LWP_NAME "lwp"
+
+/* obd device type names */
+ /* FIXME all the references to LUSTRE_MDS_NAME should be swapped with LUSTRE_MDT_NAME */
+#define LUSTRE_MDS_NAME "mds"
+#define LUSTRE_MDT_NAME "mdt"
+#define LUSTRE_MDC_NAME "mdc"
+#define LUSTRE_OSS_NAME "ost" /* FIXME change name to oss */
+#define LUSTRE_OST_NAME "obdfilter" /* FIXME change name to ost */
+#define LUSTRE_OSC_NAME "osc"
+#define LUSTRE_LOV_NAME "lov"
+#define LUSTRE_MGS_NAME "mgs"
+#define LUSTRE_MGC_NAME "mgc"
+
+#define LUSTRE_ECHO_NAME "obdecho"
+#define LUSTRE_ECHO_CLIENT_NAME "echo_client"
+#define LUSTRE_QMT_NAME "qmt"
+
+/* Constant obd names (post-rename) */
+#define LUSTRE_MDS_OBDNAME "MDS"
+#define LUSTRE_OSS_OBDNAME "OSS"
+#define LUSTRE_MGS_OBDNAME "MGS"
+#define LUSTRE_MGC_OBDNAME "MGC"
+
+static inline int is_osp_on_mdt(char *name)
+{
+ char *ptr;
+
+ ptr = strrchr(name, '-');
+ if (ptr == NULL) {
+ CERROR("%s is not a obdname\n", name);
+ return 0;
+ }
+
+ /* 1.8 OSC/OSP name on MDT is fsname-OSTxxxx-osc */
+ if (strncmp(ptr + 1, "osc", 3) == 0)
+ return 1;
+
+ if (strncmp(ptr + 1, "MDT", 3) != 0)
+ return 0;
+
+ while (*(--ptr) != '-' && ptr != name);
+
+ if (ptr == name)
+ return 0;
+
+ if (strncmp(ptr + 1, LUSTRE_OSP_NAME, strlen(LUSTRE_OSP_NAME)) != 0 &&
+ strncmp(ptr + 1, LUSTRE_OSC_NAME, strlen(LUSTRE_OSC_NAME)) != 0)
+ return 0;
+
+ return 1;
+}
+
+/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
+#define N_LOCAL_TEMP_PAGE 0x10000000
+
+struct obd_trans_info {
+ __u64 oti_transno;
+ __u64 oti_xid;
+ /* Only used on the server side for tracking acks. */
+ struct oti_req_ack_lock {
+ struct lustre_handle lock;
+ __u32 mode;
+ } oti_ack_locks[4];
+ void *oti_handle;
+ struct llog_cookie oti_onecookie;
+ struct llog_cookie *oti_logcookies;
+ int oti_numcookies;
+ /** synchronous write is needed */
+ unsigned long oti_sync_write:1;
+
+ /* initial thread handling transaction */
+ struct ptlrpc_thread * oti_thread;
+ __u32 oti_conn_cnt;
+ /** VBR: versions */
+ __u64 oti_pre_version;
+ /** JobID */
+ char *oti_jobid;
+
+ struct obd_uuid *oti_ost_uuid;
+};
+
+static inline void oti_init(struct obd_trans_info *oti,
+ struct ptlrpc_request *req)
+{
+ if (oti == NULL)
+ return;
+ memset(oti, 0, sizeof(*oti));
+
+ if (req == NULL)
+ return;
+
+ oti->oti_xid = req->rq_xid;
+ /** VBR: take versions from request */
+ if (req->rq_reqmsg != NULL &&
+ lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) {
+ __u64 *pre_version = lustre_msg_get_versions(req->rq_reqmsg);
+ oti->oti_pre_version = pre_version ? pre_version[0] : 0;
+ oti->oti_transno = lustre_msg_get_transno(req->rq_reqmsg);
+ }
+
+ /** called from mds_create_objects */
+ if (req->rq_repmsg != NULL)
+ oti->oti_transno = lustre_msg_get_transno(req->rq_repmsg);
+ oti->oti_thread = req->rq_svc_thread;
+ if (req->rq_reqmsg != NULL)
+ oti->oti_conn_cnt = lustre_msg_get_conn_cnt(req->rq_reqmsg);
+}
+
+static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies)
+{
+ if (!oti)
+ return;
+
+ if (num_cookies == 1)
+ oti->oti_logcookies = &oti->oti_onecookie;
+ else
+ OBD_ALLOC_LARGE(oti->oti_logcookies,
+ num_cookies * sizeof(oti->oti_onecookie));
+
+ oti->oti_numcookies = num_cookies;
+}
+
+static inline void oti_free_cookies(struct obd_trans_info *oti)
+{
+ if (!oti || !oti->oti_logcookies)
+ return;
+
+ if (oti->oti_logcookies == &oti->oti_onecookie)
+ LASSERT(oti->oti_numcookies == 1);
+ else
+ OBD_FREE_LARGE(oti->oti_logcookies,
+ oti->oti_numcookies*sizeof(oti->oti_onecookie));
+ oti->oti_logcookies = NULL;
+ oti->oti_numcookies = 0;
+}
+
+/*
+ * Events signalled through obd_notify() upcall-chain.
+ */
+enum obd_notify_event {
+ /* target added */
+ OBD_NOTIFY_CREATE,
+ /* Device connect start */
+ OBD_NOTIFY_CONNECT,
+ /* Device activated */
+ OBD_NOTIFY_ACTIVE,
+ /* Device deactivated */
+ OBD_NOTIFY_INACTIVE,
+ /* Device disconnected */
+ OBD_NOTIFY_DISCON,
+ /* Connect data for import were changed */
+ OBD_NOTIFY_OCD,
+ /* Sync request */
+ OBD_NOTIFY_SYNC_NONBLOCK,
+ OBD_NOTIFY_SYNC,
+ /* Configuration event */
+ OBD_NOTIFY_CONFIG,
+ /* Administratively deactivate/activate event */
+ OBD_NOTIFY_DEACTIVATE,
+ OBD_NOTIFY_ACTIVATE
+};
+
+/*
+ * Data structure used to pass obd_notify()-event to non-obd listeners (llite
+ * and liblustre being main examples).
+ */
+struct obd_notify_upcall {
+ int (*onu_upcall)(struct obd_device *host, struct obd_device *watched,
+ enum obd_notify_event ev, void *owner, void *data);
+ /* Opaque datum supplied by upper layer listener */
+ void *onu_owner;
+};
+
+struct target_recovery_data {
+ svc_handler_t trd_recovery_handler;
+ pid_t trd_processing_task;
+ struct completion trd_starting;
+ struct completion trd_finishing;
+};
+
+struct obd_llog_group {
+ int olg_seq;
+ struct llog_ctxt *olg_ctxts[LLOG_MAX_CTXTS];
+ wait_queue_head_t olg_waitq;
+ spinlock_t olg_lock;
+ struct mutex olg_cat_processing;
+};
+
+/* corresponds to one of the obd's */
+#define OBD_DEVICE_MAGIC 0XAB5CD6EF
+#define OBD_DEV_BY_DEVNAME 0xffffd0de
+
+struct obd_device {
+ struct obd_type *obd_type;
+ __u32 obd_magic;
+
+ /* common and UUID name of this device */
+ char obd_name[MAX_OBD_NAME];
+ struct obd_uuid obd_uuid;
+
+ struct lu_device *obd_lu_dev;
+
+ int obd_minor;
+ /* bitfield modification is protected by obd_dev_lock */
+ unsigned long obd_attached:1, /* finished attach */
+ obd_set_up:1, /* finished setup */
+ obd_recovering:1, /* there are recoverable clients */
+ obd_abort_recovery:1,/* recovery expired */
+ obd_version_recov:1, /* obd uses version checking */
+ obd_replayable:1, /* recovery is enabled; inform clients */
+ obd_no_transno:1, /* no committed-transno notification */
+ obd_no_recov:1, /* fail instead of retry messages */
+ obd_stopping:1, /* started cleanup */
+ obd_starting:1, /* started setup */
+ obd_force:1, /* cleanup with > 0 obd refcount */
+ obd_fail:1, /* cleanup with failover */
+ obd_async_recov:1, /* allow asynchronous orphan cleanup */
+ obd_no_conn:1, /* deny new connections */
+ obd_inactive:1, /* device active/inactive
+ * (for /proc/status only!!) */
+ obd_no_ir:1, /* no imperative recovery. */
+ obd_process_conf:1; /* device is processing mgs config */
+ /* use separate field as it is set in interrupt to don't mess with
+ * protection of other bits using _bh lock */
+ unsigned long obd_recovery_expired:1;
+ /* uuid-export hash body */
+ cfs_hash_t *obd_uuid_hash;
+ /* nid-export hash body */
+ cfs_hash_t *obd_nid_hash;
+ /* nid stats body */
+ cfs_hash_t *obd_nid_stats_hash;
+ struct list_head obd_nid_stats;
+ atomic_t obd_refcount;
+ wait_queue_head_t obd_refcount_waitq;
+ struct list_head obd_exports;
+ struct list_head obd_unlinked_exports;
+ struct list_head obd_delayed_exports;
+ int obd_num_exports;
+ spinlock_t obd_nid_lock;
+ struct ldlm_namespace *obd_namespace;
+ struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */
+ /* a spinlock is OK for what we do now, may need a semaphore later */
+ spinlock_t obd_dev_lock; /* protect OBD bitfield above */
+ struct mutex obd_dev_mutex;
+ __u64 obd_last_committed;
+ struct fsfilt_operations *obd_fsops;
+ spinlock_t obd_osfs_lock;
+ struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */
+ __u64 obd_osfs_age;
+ struct lvfs_run_ctxt obd_lvfs_ctxt;
+ struct obd_llog_group obd_olg; /* default llog group */
+ struct obd_device *obd_observer;
+ struct rw_semaphore obd_observer_link_sem;
+ struct obd_notify_upcall obd_upcall;
+ struct obd_export *obd_self_export;
+ /* list of exports in LRU order, for ping evictor, with obd_dev_lock */
+ struct list_head obd_exports_timed;
+ time_t obd_eviction_timer; /* for ping evictor */
+
+ int obd_max_recoverable_clients;
+ atomic_t obd_connected_clients;
+ int obd_stale_clients;
+ int obd_delayed_clients;
+ /* this lock protects all recovery list_heads, timer and
+ * obd_next_recovery_transno value */
+ spinlock_t obd_recovery_task_lock;
+ __u64 obd_next_recovery_transno;
+ int obd_replayed_requests;
+ int obd_requests_queued_for_recovery;
+ wait_queue_head_t obd_next_transno_waitq;
+ /* protected by obd_recovery_task_lock */
+ timer_list_t obd_recovery_timer;
+ time_t obd_recovery_start; /* seconds */
+ time_t obd_recovery_end; /* seconds, for lprocfs_status */
+ int obd_recovery_time_hard;
+ int obd_recovery_timeout;
+ int obd_recovery_ir_factor;
+
+ /* new recovery stuff from CMD2 */
+ struct target_recovery_data obd_recovery_data;
+ int obd_replayed_locks;
+ atomic_t obd_req_replay_clients;
+ atomic_t obd_lock_replay_clients;
+ /* all lists are protected by obd_recovery_task_lock */
+ struct list_head obd_req_replay_queue;
+ struct list_head obd_lock_replay_queue;
+ struct list_head obd_final_req_queue;
+ int obd_recovery_stage;
+
+ union {
+ struct obd_device_target obt;
+ struct filter_obd filter;
+ struct client_obd cli;
+ struct ost_obd ost;
+ struct echo_client_obd echo_client;
+ struct echo_obd echo;
+ struct lov_obd lov;
+ struct lmv_obd lmv;
+ } u;
+ /* Fields used by LProcFS */
+ unsigned int obd_cntr_base;
+ struct lprocfs_stats *obd_stats;
+
+ unsigned int md_cntr_base;
+ struct lprocfs_stats *md_stats;
+
+ proc_dir_entry_t *obd_proc_entry;
+ void *obd_proc_private; /* type private PDEs */
+ proc_dir_entry_t *obd_proc_exports_entry;
+ proc_dir_entry_t *obd_svc_procroot;
+ struct lprocfs_stats *obd_svc_stats;
+ atomic_t obd_evict_inprogress;
+ wait_queue_head_t obd_evict_inprogress_waitq;
+ struct list_head obd_evict_list; /* protected with pet_lock */
+
+ /**
+ * Ldlm pool part. Save last calculated SLV and Limit.
+ */
+ rwlock_t obd_pool_lock;
+ int obd_pool_limit;
+ __u64 obd_pool_slv;
+
+ /**
+ * A list of outstanding class_incref()'s against this obd. For
+ * debugging.
+ */
+ struct lu_ref obd_reference;
+
+ int obd_conn_inprogress;
+};
+
+#define OBD_LLOG_FL_SENDNOW 0x0001
+#define OBD_LLOG_FL_EXIT 0x0002
+
+enum obd_cleanup_stage {
+/* Special case hack for MDS LOVs */
+ OBD_CLEANUP_EARLY,
+/* can be directly mapped to .ldto_device_fini() */
+ OBD_CLEANUP_EXPORTS,
+};
+
+/* get/set_info keys */
+#define KEY_ASYNC "async"
+#define KEY_BLOCKSIZE_BITS "blocksize_bits"
+#define KEY_BLOCKSIZE "blocksize"
+#define KEY_CAPA_KEY "capa_key"
+#define KEY_CHANGELOG_CLEAR "changelog_clear"
+#define KEY_FID2PATH "fid2path"
+#define KEY_CHECKSUM "checksum"
+#define KEY_CLEAR_FS "clear_fs"
+#define KEY_CONN_DATA "conn_data"
+#define KEY_EVICT_BY_NID "evict_by_nid"
+#define KEY_FIEMAP "fiemap"
+#define KEY_FLUSH_CTX "flush_ctx"
+#define KEY_GRANT_SHRINK "grant_shrink"
+#define KEY_HSM_COPYTOOL_SEND "hsm_send"
+#define KEY_INIT_RECOV_BACKUP "init_recov_bk"
+#define KEY_INIT_RECOV "initial_recov"
+#define KEY_INTERMDS "inter_mds"
+#define KEY_LAST_ID "last_id"
+#define KEY_LAST_FID "last_fid"
+#define KEY_LOCK_TO_STRIPE "lock_to_stripe"
+#define KEY_LOVDESC "lovdesc"
+#define KEY_LOV_IDX "lov_idx"
+#define KEY_MAX_EASIZE "max_easize"
+#define KEY_MDS_CONN "mds_conn"
+#define KEY_MGSSEC "mgssec"
+#define KEY_NEXT_ID "next_id"
+#define KEY_READ_ONLY "read-only"
+#define KEY_REGISTER_TARGET "register_target"
+#define KEY_SET_FS "set_fs"
+#define KEY_TGT_COUNT "tgt_count"
+/* KEY_SET_INFO in lustre_idl.h */
+#define KEY_SPTLRPC_CONF "sptlrpc_conf"
+#define KEY_CONNECT_FLAG "connect_flags"
+#define KEY_SYNC_LOCK_CANCEL "sync_lock_cancel"
+
+#define KEY_CACHE_SET "cache_set"
+#define KEY_CACHE_LRU_SHRINK "cache_lru_shrink"
+#define KEY_CHANGELOG_INDEX "changelog_index"
+
+struct lu_context;
+
+/* /!\ must be coherent with include/linux/namei.h on patched kernel */
+#define IT_OPEN (1 << 0)
+#define IT_CREAT (1 << 1)
+#define IT_READDIR (1 << 2)
+#define IT_GETATTR (1 << 3)
+#define IT_LOOKUP (1 << 4)
+#define IT_UNLINK (1 << 5)
+#define IT_TRUNC (1 << 6)
+#define IT_GETXATTR (1 << 7)
+#define IT_EXEC (1 << 8)
+#define IT_PIN (1 << 9)
+#define IT_LAYOUT (1 << 10)
+#define IT_QUOTA_DQACQ (1 << 11)
+#define IT_QUOTA_CONN (1 << 12)
+
+static inline int it_to_lock_mode(struct lookup_intent *it)
+{
+ /* CREAT needs to be tested before open (both could be set) */
+ if (it->it_op & IT_CREAT)
+ return LCK_CW;
+ else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP |
+ IT_LAYOUT))
+ return LCK_CR;
+
+ LASSERTF(0, "Invalid it_op: %d\n", it->it_op);
+ return -EINVAL;
+}
+
+struct md_op_data {
+ struct lu_fid op_fid1; /* operation fid1 (usualy parent) */
+ struct lu_fid op_fid2; /* operation fid2 (usualy child) */
+ struct lu_fid op_fid3; /* 2 extra fids to find conflicting */
+ struct lu_fid op_fid4; /* to the operation locks. */
+ mdsno_t op_mds; /* what mds server open will go to */
+ struct lustre_handle op_handle;
+ obd_time op_mod_time;
+ const char *op_name;
+ int op_namelen;
+ __u32 op_mode;
+ struct lmv_stripe_md *op_mea1;
+ struct lmv_stripe_md *op_mea2;
+ __u32 op_suppgids[2];
+ __u32 op_fsuid;
+ __u32 op_fsgid;
+ cfs_cap_t op_cap;
+ void *op_data;
+
+ /* iattr fields and blocks. */
+ struct iattr op_attr;
+ unsigned int op_attr_flags;
+ __u64 op_valid;
+ loff_t op_attr_blocks;
+
+ /* Size-on-MDS epoch and flags. */
+ __u64 op_ioepoch;
+ __u32 op_flags;
+
+ /* Capa fields */
+ struct obd_capa *op_capa1;
+ struct obd_capa *op_capa2;
+
+ /* Various operation flags. */
+ __u32 op_bias;
+
+ /* Operation type */
+ __u32 op_opc;
+
+ /* Used by readdir */
+ __u64 op_offset;
+
+ /* Used by readdir */
+ __u32 op_npages;
+
+ /* used to transfer info between the stacks of MD client
+ * see enum op_cli_flags */
+ __u32 op_cli_flags;
+};
+
+enum op_cli_flags {
+ CLI_SET_MEA = 1 << 0,
+ CLI_RM_ENTRY = 1 << 1,
+};
+
+struct md_enqueue_info;
+/* metadata stat-ahead */
+typedef int (* md_enqueue_cb_t)(struct ptlrpc_request *req,
+ struct md_enqueue_info *minfo,
+ int rc);
+
+/* seq client type */
+enum lu_cli_type {
+ LUSTRE_SEQ_METADATA = 1,
+ LUSTRE_SEQ_DATA
+};
+
+struct md_enqueue_info {
+ struct md_op_data mi_data;
+ struct lookup_intent mi_it;
+ struct lustre_handle mi_lockh;
+ struct inode *mi_dir;
+ md_enqueue_cb_t mi_cb;
+ __u64 mi_cbdata;
+ unsigned int mi_generation;
+};
+
+struct obd_ops {
+ module_t *o_owner;
+ int (*o_iocontrol)(unsigned int cmd, struct obd_export *exp, int len,
+ void *karg, void *uarg);
+ int (*o_get_info)(const struct lu_env *env, struct obd_export *,
+ __u32 keylen, void *key, __u32 *vallen, void *val,
+ struct lov_stripe_md *lsm);
+ int (*o_set_info_async)(const struct lu_env *, struct obd_export *,
+ __u32 keylen, void *key,
+ __u32 vallen, void *val,
+ struct ptlrpc_request_set *set);
+ int (*o_attach)(struct obd_device *dev, obd_count len, void *data);
+ int (*o_detach)(struct obd_device *dev);
+ int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg);
+ int (*o_precleanup)(struct obd_device *dev,
+ enum obd_cleanup_stage cleanup_stage);
+ int (*o_cleanup)(struct obd_device *dev);
+ int (*o_process_config)(struct obd_device *dev, obd_count len,
+ void *data);
+ int (*o_postrecov)(struct obd_device *dev);
+ int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid,
+ int priority);
+ int (*o_del_conn)(struct obd_import *imp, struct obd_uuid *uuid);
+ /* connect to the target device with given connection
+ * data. @ocd->ocd_connect_flags is modified to reflect flags actually
+ * granted by the target, which are guaranteed to be a subset of flags
+ * asked for. If @ocd == NULL, use default parameters. */
+ int (*o_connect)(const struct lu_env *env,
+ struct obd_export **exp, struct obd_device *src,
+ struct obd_uuid *cluuid, struct obd_connect_data *ocd,
+ void *localdata);
+ int (*o_reconnect)(const struct lu_env *env,
+ struct obd_export *exp, struct obd_device *src,
+ struct obd_uuid *cluuid,
+ struct obd_connect_data *ocd,
+ void *localdata);
+ int (*o_disconnect)(struct obd_export *exp);
+
+ /* Initialize/finalize fids infrastructure. */
+ int (*o_fid_init)(struct obd_device *obd,
+ struct obd_export *exp, enum lu_cli_type type);
+ int (*o_fid_fini)(struct obd_device *obd);
+
+ /* Allocate new fid according to passed @hint. */
+ int (*o_fid_alloc)(struct obd_export *exp, struct lu_fid *fid,
+ struct md_op_data *op_data);
+
+ /*
+ * Object with @fid is getting deleted, we may want to do something
+ * about this.
+ */
+ int (*o_statfs)(const struct lu_env *, struct obd_export *exp,
+ struct obd_statfs *osfs, __u64 max_age, __u32 flags);
+ int (*o_statfs_async)(struct obd_export *exp, struct obd_info *oinfo,
+ __u64 max_age, struct ptlrpc_request_set *set);
+ int (*o_packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt,
+ struct lov_stripe_md *mem_src);
+ int (*o_unpackmd)(struct obd_export *exp,struct lov_stripe_md **mem_tgt,
+ struct lov_mds_md *disk_src, int disk_len);
+ int (*o_preallocate)(struct lustre_handle *, obd_count *req,
+ obd_id *ids);
+ /* FIXME: add fid capability support for create & destroy! */
+ int (*o_precreate)(struct obd_export *exp);
+ int (*o_create)(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *oa, struct lov_stripe_md **ea,
+ struct obd_trans_info *oti);
+ int (*o_create_async)(struct obd_export *exp, struct obd_info *oinfo,
+ struct lov_stripe_md **ea,
+ struct obd_trans_info *oti);
+ int (*o_destroy)(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *oa, struct lov_stripe_md *ea,
+ struct obd_trans_info *oti, struct obd_export *md_exp,
+ void *capa);
+ int (*o_setattr)(const struct lu_env *, struct obd_export *exp,
+ struct obd_info *oinfo, struct obd_trans_info *oti);
+ int (*o_setattr_async)(struct obd_export *exp, struct obd_info *oinfo,
+ struct obd_trans_info *oti,
+ struct ptlrpc_request_set *rqset);
+ int (*o_getattr)(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo);
+ int (*o_getattr_async)(struct obd_export *exp, struct obd_info *oinfo,
+ struct ptlrpc_request_set *set);
+ int (*o_brw)(int rw, struct obd_export *exp, struct obd_info *oinfo,
+ obd_count oa_bufs, struct brw_page *pgarr,
+ struct obd_trans_info *oti);
+ int (*o_merge_lvb)(struct obd_export *exp, struct lov_stripe_md *lsm,
+ struct ost_lvb *lvb, int kms_only);
+ int (*o_adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm,
+ obd_off size, int shrink);
+ int (*o_punch)(const struct lu_env *, struct obd_export *exp,
+ struct obd_info *oinfo, struct obd_trans_info *oti,
+ struct ptlrpc_request_set *rqset);
+ int (*o_sync)(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo, obd_size start, obd_size end,
+ struct ptlrpc_request_set *set);
+ int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst,
+ struct lov_stripe_md *src, obd_size start,
+ obd_size end, struct obd_trans_info *oti);
+ int (*o_copy)(struct lustre_handle *dstconn, struct lov_stripe_md *dst,
+ struct lustre_handle *srconn, struct lov_stripe_md *src,
+ obd_size start, obd_size end, struct obd_trans_info *);
+ int (*o_iterate)(struct lustre_handle *conn,
+ int (*)(obd_id, obd_seq, void *),
+ obd_id *startid, obd_seq seq, void *data);
+ int (*o_preprw)(const struct lu_env *env, int cmd,
+ struct obd_export *exp, struct obdo *oa, int objcount,
+ struct obd_ioobj *obj, struct niobuf_remote *remote,
+ int *nr_pages, struct niobuf_local *local,
+ struct obd_trans_info *oti, struct lustre_capa *capa);
+ int (*o_commitrw)(const struct lu_env *env, int cmd,
+ struct obd_export *exp, struct obdo *oa,
+ int objcount, struct obd_ioobj *obj,
+ struct niobuf_remote *remote, int pages,
+ struct niobuf_local *local,
+ struct obd_trans_info *oti, int rc);
+ int (*o_enqueue)(struct obd_export *, struct obd_info *oinfo,
+ struct ldlm_enqueue_info *einfo,
+ struct ptlrpc_request_set *rqset);
+ int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *,
+ ldlm_iterator_t it, void *data);
+ int (*o_find_cbdata)(struct obd_export *, struct lov_stripe_md *,
+ ldlm_iterator_t it, void *data);
+ int (*o_cancel)(struct obd_export *, struct lov_stripe_md *md,
+ __u32 mode, struct lustre_handle *);
+ int (*o_cancel_unused)(struct obd_export *, struct lov_stripe_md *,
+ ldlm_cancel_flags_t flags, void *opaque);
+ int (*o_init_export)(struct obd_export *exp);
+ int (*o_destroy_export)(struct obd_export *exp);
+ int (*o_extent_calc)(struct obd_export *, struct lov_stripe_md *,
+ int cmd, obd_off *);
+
+ /* llog related obd_methods */
+ int (*o_llog_init)(struct obd_device *obd, struct obd_llog_group *grp,
+ struct obd_device *disk_obd, int *idx);
+ int (*o_llog_finish)(struct obd_device *obd, int count);
+ int (*o_llog_connect)(struct obd_export *, struct llogd_conn_body *);
+
+ /* metadata-only methods */
+ int (*o_pin)(struct obd_export *, const struct lu_fid *fid,
+ struct obd_capa *, struct obd_client_handle *, int flag);
+ int (*o_unpin)(struct obd_export *, struct obd_client_handle *, int);
+
+ int (*o_import_event)(struct obd_device *, struct obd_import *,
+ enum obd_import_event);
+
+ int (*o_notify)(struct obd_device *obd, struct obd_device *watched,
+ enum obd_notify_event ev, void *data);
+
+ int (*o_health_check)(const struct lu_env *env, struct obd_device *);
+ struct obd_uuid *(*o_get_uuid) (struct obd_export *exp);
+
+ /* quota methods */
+ int (*o_quotacheck)(struct obd_device *, struct obd_export *,
+ struct obd_quotactl *);
+ int (*o_quotactl)(struct obd_device *, struct obd_export *,
+ struct obd_quotactl *);
+
+ int (*o_ping)(const struct lu_env *, struct obd_export *exp);
+
+ /* pools methods */
+ int (*o_pool_new)(struct obd_device *obd, char *poolname);
+ int (*o_pool_del)(struct obd_device *obd, char *poolname);
+ int (*o_pool_add)(struct obd_device *obd, char *poolname,
+ char *ostname);
+ int (*o_pool_rem)(struct obd_device *obd, char *poolname,
+ char *ostname);
+ void (*o_getref)(struct obd_device *obd);
+ void (*o_putref)(struct obd_device *obd);
+ /*
+ * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
+ * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
+ * Also, add a wrapper function in include/linux/obd_class.h. */
+};
+
+enum {
+ LUSTRE_OPC_MKDIR = (1 << 0),
+ LUSTRE_OPC_SYMLINK = (1 << 1),
+ LUSTRE_OPC_MKNOD = (1 << 2),
+ LUSTRE_OPC_CREATE = (1 << 3),
+ LUSTRE_OPC_ANY = (1 << 4)
+};
+
+/* lmv structures */
+#define MEA_MAGIC_LAST_CHAR 0xb2221ca1
+#define MEA_MAGIC_ALL_CHARS 0xb222a11c
+#define MEA_MAGIC_HASH_SEGMENT 0xb222a11b
+
+#define MAX_HASH_SIZE_32 0x7fffffffUL
+#define MAX_HASH_SIZE 0x7fffffffffffffffULL
+#define MAX_HASH_HIGHEST_BIT 0x1000000000000000ULL
+
+struct lustre_md {
+ struct mdt_body *body;
+ struct lov_stripe_md *lsm;
+ struct lmv_stripe_md *mea;
+#ifdef CONFIG_FS_POSIX_ACL
+ struct posix_acl *posix_acl;
+#endif
+ struct mdt_remote_perm *remote_perm;
+ struct obd_capa *mds_capa;
+ struct obd_capa *oss_capa;
+};
+
+struct md_open_data {
+ struct obd_client_handle *mod_och;
+ struct ptlrpc_request *mod_open_req;
+ struct ptlrpc_request *mod_close_req;
+ atomic_t mod_refcount;
+};
+
+struct lookup_intent;
+
+struct md_ops {
+ int (*m_getstatus)(struct obd_export *, struct lu_fid *,
+ struct obd_capa **);
+ int (*m_null_inode)(struct obd_export *, const struct lu_fid *);
+ int (*m_find_cbdata)(struct obd_export *, const struct lu_fid *,
+ ldlm_iterator_t, void *);
+ int (*m_close)(struct obd_export *, struct md_op_data *,
+ struct md_open_data *, struct ptlrpc_request **);
+ int (*m_create)(struct obd_export *, struct md_op_data *,
+ const void *, int, int, __u32, __u32, cfs_cap_t,
+ __u64, struct ptlrpc_request **);
+ int (*m_done_writing)(struct obd_export *, struct md_op_data *,
+ struct md_open_data *);
+ int (*m_enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
+ struct lookup_intent *, struct md_op_data *,
+ struct lustre_handle *, void *, int,
+ struct ptlrpc_request **, __u64);
+ int (*m_getattr)(struct obd_export *, struct md_op_data *,
+ struct ptlrpc_request **);
+ int (*m_getattr_name)(struct obd_export *, struct md_op_data *,
+ struct ptlrpc_request **);
+ int (*m_intent_lock)(struct obd_export *, struct md_op_data *,
+ void *, int, struct lookup_intent *, int,
+ struct ptlrpc_request **,
+ ldlm_blocking_callback, __u64);
+ int (*m_link)(struct obd_export *, struct md_op_data *,
+ struct ptlrpc_request **);
+ int (*m_rename)(struct obd_export *, struct md_op_data *,
+ const char *, int, const char *, int,
+ struct ptlrpc_request **);
+ int (*m_is_subdir)(struct obd_export *, const struct lu_fid *,
+ const struct lu_fid *,
+ struct ptlrpc_request **);
+ int (*m_setattr)(struct obd_export *, struct md_op_data *, void *,
+ int , void *, int, struct ptlrpc_request **,
+ struct md_open_data **mod);
+ int (*m_sync)(struct obd_export *, const struct lu_fid *,
+ struct obd_capa *, struct ptlrpc_request **);
+ int (*m_readpage)(struct obd_export *, struct md_op_data *,
+ struct page **, struct ptlrpc_request **);
+
+ int (*m_unlink)(struct obd_export *, struct md_op_data *,
+ struct ptlrpc_request **);
+
+ int (*m_setxattr)(struct obd_export *, const struct lu_fid *,
+ struct obd_capa *, obd_valid, const char *,
+ const char *, int, int, int, __u32,
+ struct ptlrpc_request **);
+
+ int (*m_getxattr)(struct obd_export *, const struct lu_fid *,
+ struct obd_capa *, obd_valid, const char *,
+ const char *, int, int, int,
+ struct ptlrpc_request **);
+
+ int (*m_init_ea_size)(struct obd_export *, int, int, int);
+
+ int (*m_get_lustre_md)(struct obd_export *, struct ptlrpc_request *,
+ struct obd_export *, struct obd_export *,
+ struct lustre_md *);
+
+ int (*m_free_lustre_md)(struct obd_export *, struct lustre_md *);
+
+ int (*m_set_open_replay_data)(struct obd_export *,
+ struct obd_client_handle *,
+ struct ptlrpc_request *);
+ int (*m_clear_open_replay_data)(struct obd_export *,
+ struct obd_client_handle *);
+ int (*m_set_lock_data)(struct obd_export *, __u64 *, void *, __u64 *);
+
+ ldlm_mode_t (*m_lock_match)(struct obd_export *, __u64,
+ const struct lu_fid *, ldlm_type_t,
+ ldlm_policy_data_t *, ldlm_mode_t,
+ struct lustre_handle *);
+
+ int (*m_cancel_unused)(struct obd_export *, const struct lu_fid *,
+ ldlm_policy_data_t *, ldlm_mode_t,
+ ldlm_cancel_flags_t flags, void *opaque);
+ int (*m_renew_capa)(struct obd_export *, struct obd_capa *oc,
+ renew_capa_cb_t cb);
+ int (*m_unpack_capa)(struct obd_export *, struct ptlrpc_request *,
+ const struct req_msg_field *, struct obd_capa **);
+
+ int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *,
+ struct obd_capa *, __u32,
+ struct ptlrpc_request **);
+
+ int (*m_intent_getattr_async)(struct obd_export *,
+ struct md_enqueue_info *,
+ struct ldlm_enqueue_info *);
+
+ int (*m_revalidate_lock)(struct obd_export *, struct lookup_intent *,
+ struct lu_fid *, __u64 *bits);
+
+ /*
+ * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
+ * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a
+ * wrapper function in include/linux/obd_class.h.
+ */
+};
+
+struct lsm_operations {
+ void (*lsm_free)(struct lov_stripe_md *);
+ int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa,
+ struct obd_export *md_exp);
+ void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, obd_off *,
+ obd_off *);
+ void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *,
+ obd_off *);
+ int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes,
+ __u16 *stripe_count);
+ int (*lsm_unpackmd) (struct lov_obd *lov, struct lov_stripe_md *lsm,
+ struct lov_mds_md *lmm);
+};
+
+extern const struct lsm_operations lsm_v1_ops;
+extern const struct lsm_operations lsm_v3_ops;
+static inline const struct lsm_operations *lsm_op_find(int magic)
+{
+ switch(magic) {
+ case LOV_MAGIC_V1:
+ return &lsm_v1_ops;
+ case LOV_MAGIC_V3:
+ return &lsm_v3_ops;
+ default:
+ CERROR("Cannot recognize lsm_magic %08x\n", magic);
+ return NULL;
+ }
+}
+
+/* Requests for obd_extent_calc() */
+#define OBD_CALC_STRIPE_START 1
+#define OBD_CALC_STRIPE_END 2
+
+static inline struct lustre_capa *oinfo_capa(struct obd_info *oinfo)
+{
+ return oinfo->oi_capa;
+}
+
+static inline struct md_open_data *obd_mod_alloc(void)
+{
+ struct md_open_data *mod;
+ OBD_ALLOC_PTR(mod);
+ if (mod == NULL)
+ return NULL;
+ atomic_set(&mod->mod_refcount, 1);
+ return mod;
+}
+
+#define obd_mod_get(mod) atomic_inc(&(mod)->mod_refcount)
+#define obd_mod_put(mod) \
+({ \
+ if (atomic_dec_and_test(&(mod)->mod_refcount)) { \
+ if ((mod)->mod_open_req) \
+ ptlrpc_req_finished((mod)->mod_open_req); \
+ OBD_FREE_PTR(mod); \
+ } \
+})
+
+void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
+void obdo_set_parent_fid(struct obdo *dst, const struct lu_fid *parent);
+
+/* return 1 if client should be resend request */
+static inline int client_should_resend(int resend, struct client_obd *cli)
+{
+ return atomic_read(&cli->cl_resends) ?
+ atomic_read(&cli->cl_resends) > resend : 1;
+}
+
+/**
+ * Return device name for this device
+ *
+ * XXX: lu_device is declared before obd_device, while a pointer pointing
+ * back to obd_device in lu_device, so this helper function defines here
+ * instead of in lu_object.h
+ */
+static inline const char *lu_dev_name(const struct lu_device *lu_dev)
+{
+ return lu_dev->ld_obd->obd_name;
+}
+
+static inline bool filename_is_volatile(const char *name, int namelen, int *idx)
+{
+ const char *start;
+ char *end;
+
+ if (strncmp(name, LUSTRE_VOLATILE_HDR, LUSTRE_VOLATILE_HDR_LEN) != 0)
+ return false;
+
+ /* caller does not care of idx */
+ if (idx == NULL)
+ return true;
+
+ /* volatile file, the MDT can be set from name */
+ /* name format is LUSTRE_VOLATILE_HDR:[idx]: */
+ /* if no MDT is specified, use std way */
+ if (namelen < LUSTRE_VOLATILE_HDR_LEN + 2)
+ goto bad_format;
+ /* test for no MDT idx case */
+ if ((*(name + LUSTRE_VOLATILE_HDR_LEN) == ':') &&
+ (*(name + LUSTRE_VOLATILE_HDR_LEN + 1) == ':')) {
+ *idx = -1;
+ return true;
+ }
+ /* we have an idx, read it */
+ start = name + LUSTRE_VOLATILE_HDR_LEN + 1;
+ *idx = strtoul(start, &end, 0);
+ /* error cases:
+ * no digit, no trailing :, negative value
+ */
+ if (((*idx == 0) && (end == start)) ||
+ (*end != ':') || (*idx < 0))
+ goto bad_format;
+
+ return true;
+bad_format:
+ /* bad format of mdt idx, we cannot return an error
+ * to caller so we use hash algo */
+ CERROR("Bad volatile file name format: %s\n",
+ name + LUSTRE_VOLATILE_HDR_LEN);
+ return false;
+}
+
+static inline int cli_brw_size(struct obd_device *obd)
+{
+ LASSERT(obd != NULL);
+ return obd->u.cli.cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+}
+
+#endif /* __OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_cache.h b/drivers/staging/lustre/lustre/include/obd_cache.h
new file mode 100644
index 000000000000..c8249fbb0d72
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd_cache.h
@@ -0,0 +1,39 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _OBD_CACHE_H__
+#define _OBD_CACHE_H__
+
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/obd_cksum.h b/drivers/staging/lustre/lustre/include/obd_cksum.h
new file mode 100644
index 000000000000..5f740f1743ca
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd_cksum.h
@@ -0,0 +1,176 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __OBD_CKSUM
+#define __OBD_CKSUM
+#include <linux/libcfs/libcfs.h>
+#include <lustre/lustre_idl.h>
+
+static inline unsigned char cksum_obd2cfs(cksum_type_t cksum_type)
+{
+ switch (cksum_type) {
+ case OBD_CKSUM_CRC32:
+ return CFS_HASH_ALG_CRC32;
+ case OBD_CKSUM_ADLER:
+ return CFS_HASH_ALG_ADLER32;
+ case OBD_CKSUM_CRC32C:
+ return CFS_HASH_ALG_CRC32C;
+ default:
+ CERROR("Unknown checksum type (%x)!!!\n", cksum_type);
+ LBUG();
+ }
+ return 0;
+}
+
+/* The OBD_FL_CKSUM_* flags is packed into 5 bits of o_flags, since there can
+ * only be a single checksum type per RPC.
+ *
+ * The OBD_CHECKSUM_* type bits passed in ocd_cksum_types are a 32-bit bitmask
+ * since they need to represent the full range of checksum algorithms that
+ * both the client and server can understand.
+ *
+ * In case of an unsupported types/flags we fall back to ADLER
+ * because that is supported by all clients since 1.8
+ *
+ * In case multiple algorithms are supported the best one is used. */
+static inline obd_flag cksum_type_pack(cksum_type_t cksum_type)
+{
+ unsigned int performance = 0, tmp;
+ obd_flag flag = OBD_FL_CKSUM_ADLER;
+
+ if (cksum_type & OBD_CKSUM_CRC32) {
+ tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32));
+ if (tmp > performance) {
+ performance = tmp;
+ flag = OBD_FL_CKSUM_CRC32;
+ }
+ }
+ if (cksum_type & OBD_CKSUM_CRC32C) {
+ tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C));
+ if (tmp > performance) {
+ performance = tmp;
+ flag = OBD_FL_CKSUM_CRC32C;
+ }
+ }
+ if (cksum_type & OBD_CKSUM_ADLER) {
+ tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER));
+ if (tmp > performance) {
+ performance = tmp;
+ flag = OBD_FL_CKSUM_ADLER;
+ }
+ }
+ if (unlikely(cksum_type && !(cksum_type & (OBD_CKSUM_CRC32C |
+ OBD_CKSUM_CRC32 |
+ OBD_CKSUM_ADLER))))
+ CWARN("unknown cksum type %x\n", cksum_type);
+
+ return flag;
+}
+
+static inline cksum_type_t cksum_type_unpack(obd_flag o_flags)
+{
+ switch (o_flags & OBD_FL_CKSUM_ALL) {
+ case OBD_FL_CKSUM_CRC32C:
+ return OBD_CKSUM_CRC32C;
+ case OBD_FL_CKSUM_CRC32:
+ return OBD_CKSUM_CRC32;
+ default:
+ break;
+ }
+
+ return OBD_CKSUM_ADLER;
+}
+
+/* Return a bitmask of the checksum types supported on this system.
+ * 1.8 supported ADLER it is base and not depend on hw
+ * Client uses all available local algos
+ */
+static inline cksum_type_t cksum_types_supported_client(void)
+{
+ cksum_type_t ret = OBD_CKSUM_ADLER;
+
+ CDEBUG(D_INFO, "Crypto hash speed: crc %d, crc32c %d, adler %d\n",
+ cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)),
+ cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)),
+ cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER)));
+
+ if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)) > 0)
+ ret |= OBD_CKSUM_CRC32C;
+ if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)) > 0)
+ ret |= OBD_CKSUM_CRC32;
+
+ return ret;
+}
+
+/* Server uses algos that perform at 50% or better of the Adler */
+static inline cksum_type_t cksum_types_supported_server(void)
+{
+ int base_speed;
+ cksum_type_t ret = OBD_CKSUM_ADLER;
+
+ CDEBUG(D_INFO, "Crypto hash speed: crc %d, crc32c %d, adler %d\n",
+ cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)),
+ cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)),
+ cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER)));
+
+ base_speed = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER)) / 2;
+
+ if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)) >=
+ base_speed)
+ ret |= OBD_CKSUM_CRC32C;
+ if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)) >=
+ base_speed)
+ ret |= OBD_CKSUM_CRC32;
+
+ return ret;
+}
+
+
+/* Select the best checksum algorithm among those supplied in the cksum_types
+ * input.
+ *
+ * Currently, calling cksum_type_pack() with a mask will return the fastest
+ * checksum type due to its benchmarking at libcfs module load.
+ * Caution is advised, however, since what is fastest on a single client may
+ * not be the fastest or most efficient algorithm on the server. */
+static inline cksum_type_t cksum_type_select(cksum_type_t cksum_types)
+{
+ return cksum_type_unpack(cksum_type_pack(cksum_types));
+}
+
+/* Checksum algorithm names. Must be defined in the same order as the
+ * OBD_CKSUM_* flags. */
+#define DECLARE_CKSUM_NAME char *cksum_name[] = {"crc32", "adler", "crc32c"}
+
+#endif /* __OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
new file mode 100644
index 000000000000..de5c5853647f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -0,0 +1,2281 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#ifndef __CLASS_OBD_H
+#define __CLASS_OBD_H
+
+
+#include <obd_support.h>
+#include <lustre_import.h>
+#include <lustre_net.h>
+#include <obd.h>
+#include <lustre_lib.h>
+#include <lustre/lustre_idl.h>
+#include <lprocfs_status.h>
+
+#include <linux/obd_class.h>
+
+#define OBD_STATFS_NODELAY 0x0001 /* requests should be send without delay
+ * and resends for avoid deadlocks */
+#define OBD_STATFS_FROM_CACHE 0x0002 /* the statfs callback should not update
+ * obd_osfs_age */
+#define OBD_STATFS_PTLRPCD 0x0004 /* requests will be sent via ptlrpcd
+ * instead of a specific set. This
+ * means that we cannot rely on the set
+ * interpret routine to be called.
+ * lov_statfs_fini() must thus be called
+ * by the request interpret routine */
+#define OBD_STATFS_FOR_MDT0 0x0008 /* The statfs is only for retrieving
+ * information from MDT0. */
+#define OBD_FL_PUNCH 0x00000001 /* To indicate it is punch operation */
+
+/* OBD Device Declarations */
+extern struct obd_device *obd_devs[MAX_OBD_DEVICES];
+extern rwlock_t obd_dev_lock;
+
+/* OBD Operations Declarations */
+extern struct obd_device *class_conn2obd(struct lustre_handle *);
+extern struct obd_device *class_exp2obd(struct obd_export *);
+extern int class_handle_ioctl(unsigned int cmd, unsigned long arg);
+extern int lustre_get_jobid(char *jobid);
+
+struct lu_device_type;
+
+/* genops.c */
+struct obd_export *class_conn2export(struct lustre_handle *);
+int class_register_type(struct obd_ops *, struct md_ops *,
+ struct lprocfs_vars *, const char *nm,
+ struct lu_device_type *ldt);
+int class_unregister_type(const char *nm);
+
+struct obd_device *class_newdev(const char *type_name, const char *name);
+void class_release_dev(struct obd_device *obd);
+
+int class_name2dev(const char *name);
+struct obd_device *class_name2obd(const char *name);
+int class_uuid2dev(struct obd_uuid *uuid);
+struct obd_device *class_uuid2obd(struct obd_uuid *uuid);
+void class_obd_list(void);
+struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
+ const char * typ_name,
+ struct obd_uuid *grp_uuid);
+struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid,
+ int *next);
+struct obd_device * class_num2obd(int num);
+int get_devices_count(void);
+
+int class_notify_sptlrpc_conf(const char *fsname, int namelen);
+
+char *obd_export_nid2str(struct obd_export *exp);
+
+int obd_export_evict_by_nid(struct obd_device *obd, const char *nid);
+int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid);
+int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep);
+
+int obd_zombie_impexp_init(void);
+void obd_zombie_impexp_stop(void);
+void obd_zombie_impexp_cull(void);
+void obd_zombie_barrier(void);
+void obd_exports_barrier(struct obd_device *obd);
+int kuc_len(int payload_len);
+struct kuc_hdr * kuc_ptr(void *p);
+int kuc_ispayload(void *p);
+void *kuc_alloc(int payload_len, int transport, int type);
+void kuc_free(void *p, int payload_len);
+
+struct llog_handle;
+struct llog_rec_hdr;
+typedef int (*llog_cb_t)(const struct lu_env *, struct llog_handle *,
+ struct llog_rec_hdr *, void *);
+/* obd_config.c */
+struct lustre_cfg *lustre_cfg_rename(struct lustre_cfg *cfg,
+ const char *new_name);
+int class_process_config(struct lustre_cfg *lcfg);
+int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
+ struct lustre_cfg *lcfg, void *data);
+int class_attach(struct lustre_cfg *lcfg);
+int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
+int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg);
+int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg);
+struct obd_device *class_incref(struct obd_device *obd,
+ const char *scope, const void *source);
+void class_decref(struct obd_device *obd,
+ const char *scope, const void *source);
+void dump_exports(struct obd_device *obd, int locks);
+int class_config_llog_handler(const struct lu_env *env,
+ struct llog_handle *handle,
+ struct llog_rec_hdr *rec, void *data);
+int class_add_conn(struct obd_device *obd, struct lustre_cfg *lcfg);
+int class_add_uuid(const char *uuid, __u64 nid);
+
+/*obdecho*/
+#ifdef LPROCFS
+extern void lprocfs_echo_init_vars(struct lprocfs_static_vars *lvars);
+#else
+static inline void lprocfs_echo_init_vars(struct lprocfs_static_vars *lvars)
+{
+ memset(lvars, 0, sizeof(*lvars));
+}
+#endif
+
+#define CFG_F_START 0x01 /* Set when we start updating from a log */
+#define CFG_F_MARKER 0x02 /* We are within a maker */
+#define CFG_F_SKIP 0x04 /* We should ignore this cfg command */
+#define CFG_F_COMPAT146 0x08 /* Allow old-style logs */
+#define CFG_F_EXCLUDE 0x10 /* OST exclusion list */
+
+/* Passed as data param to class_config_parse_llog */
+struct config_llog_instance {
+ char *cfg_obdname;
+ void *cfg_instance;
+ struct super_block *cfg_sb;
+ struct obd_uuid cfg_uuid;
+ llog_cb_t cfg_callback;
+ int cfg_last_idx; /* for partial llog processing */
+ int cfg_flags;
+};
+int class_config_parse_llog(const struct lu_env *env, struct llog_ctxt *ctxt,
+ char *name, struct config_llog_instance *cfg);
+int class_config_dump_llog(const struct lu_env *env, struct llog_ctxt *ctxt,
+ char *name, struct config_llog_instance *cfg);
+
+enum {
+ CONFIG_T_CONFIG = 0,
+ CONFIG_T_SPTLRPC = 1,
+ CONFIG_T_RECOVER = 2,
+ CONFIG_T_MAX = 3
+};
+
+/* list of active configuration logs */
+struct config_llog_data {
+ struct ldlm_res_id cld_resid;
+ struct config_llog_instance cld_cfg;
+ struct list_head cld_list_chain;
+ atomic_t cld_refcount;
+ struct config_llog_data *cld_sptlrpc;/* depended sptlrpc log */
+ struct config_llog_data *cld_recover; /* imperative recover log */
+ struct obd_export *cld_mgcexp;
+ struct mutex cld_lock;
+ int cld_type;
+ unsigned int cld_stopping:1, /* we were told to stop
+ * watching */
+ cld_lostlock:1; /* lock not requeued */
+ char cld_logname[0];
+};
+
+struct lustre_profile {
+ struct list_head lp_list;
+ char *lp_profile;
+ char *lp_dt;
+ char *lp_md;
+};
+
+struct lustre_profile *class_get_profile(const char * prof);
+void class_del_profile(const char *prof);
+void class_del_profiles(void);
+
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+
+void __class_export_add_lock_ref(struct obd_export *, struct ldlm_lock *);
+void __class_export_del_lock_ref(struct obd_export *, struct ldlm_lock *);
+extern void (*class_export_dump_hook)(struct obd_export *);
+
+#else
+
+#define __class_export_add_lock_ref(exp, lock) do {} while(0)
+#define __class_export_del_lock_ref(exp, lock) do {} while(0)
+
+#endif
+
+#define class_export_rpc_inc(exp) \
+({ \
+ atomic_inc(&(exp)->exp_rpc_count); \
+ CDEBUG(D_INFO, "RPC GETting export %p : new rpc_count %d\n", \
+ (exp), atomic_read(&(exp)->exp_rpc_count)); \
+})
+
+#define class_export_rpc_dec(exp) \
+({ \
+ LASSERT_ATOMIC_POS(&exp->exp_rpc_count); \
+ atomic_dec(&(exp)->exp_rpc_count); \
+ CDEBUG(D_INFO, "RPC PUTting export %p : new rpc_count %d\n", \
+ (exp), atomic_read(&(exp)->exp_rpc_count)); \
+})
+
+#define class_export_lock_get(exp, lock) \
+({ \
+ atomic_inc(&(exp)->exp_locks_count); \
+ __class_export_add_lock_ref(exp, lock); \
+ CDEBUG(D_INFO, "lock GETting export %p : new locks_count %d\n", \
+ (exp), atomic_read(&(exp)->exp_locks_count)); \
+ class_export_get(exp); \
+})
+
+#define class_export_lock_put(exp, lock) \
+({ \
+ LASSERT_ATOMIC_POS(&exp->exp_locks_count); \
+ atomic_dec(&(exp)->exp_locks_count); \
+ __class_export_del_lock_ref(exp, lock); \
+ CDEBUG(D_INFO, "lock PUTting export %p : new locks_count %d\n", \
+ (exp), atomic_read(&(exp)->exp_locks_count)); \
+ class_export_put(exp); \
+})
+
+#define class_export_cb_get(exp) \
+({ \
+ atomic_inc(&(exp)->exp_cb_count); \
+ CDEBUG(D_INFO, "callback GETting export %p : new cb_count %d\n",\
+ (exp), atomic_read(&(exp)->exp_cb_count)); \
+ class_export_get(exp); \
+})
+
+#define class_export_cb_put(exp) \
+({ \
+ LASSERT_ATOMIC_POS(&exp->exp_cb_count); \
+ atomic_dec(&(exp)->exp_cb_count); \
+ CDEBUG(D_INFO, "callback PUTting export %p : new cb_count %d\n",\
+ (exp), atomic_read(&(exp)->exp_cb_count)); \
+ class_export_put(exp); \
+})
+
+/* genops.c */
+struct obd_export *class_export_get(struct obd_export *exp);
+void class_export_put(struct obd_export *exp);
+struct obd_export *class_new_export(struct obd_device *obddev,
+ struct obd_uuid *cluuid);
+void class_unlink_export(struct obd_export *exp);
+
+struct obd_import *class_import_get(struct obd_import *);
+void class_import_put(struct obd_import *);
+struct obd_import *class_new_import(struct obd_device *obd);
+void class_destroy_import(struct obd_import *exp);
+
+struct obd_type *class_search_type(const char *name);
+struct obd_type *class_get_type(const char *name);
+void class_put_type(struct obd_type *type);
+int class_connect(struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_uuid *cluuid);
+int class_disconnect(struct obd_export *exp);
+void class_fail_export(struct obd_export *exp);
+int class_connected_export(struct obd_export *exp);
+void class_disconnect_exports(struct obd_device *obddev);
+int class_manual_cleanup(struct obd_device *obd);
+void class_disconnect_stale_exports(struct obd_device *,
+ int (*test_export)(struct obd_export *));
+static inline enum obd_option exp_flags_from_obd(struct obd_device *obd)
+{
+ return ((obd->obd_fail ? OBD_OPT_FAILOVER : 0) |
+ (obd->obd_force ? OBD_OPT_FORCE : 0) |
+ (obd->obd_abort_recovery ? OBD_OPT_ABORT_RECOV : 0) |
+ 0);
+}
+
+
+void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid);
+void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj);
+void obdo_from_iattr(struct obdo *oa, struct iattr *attr,
+ unsigned int ia_valid);
+void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid);
+void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, obd_flag valid);
+void obdo_from_md(struct obdo *oa, struct md_op_data *op_data,
+ unsigned int valid);
+
+void obdo_cpu_to_le(struct obdo *dobdo, struct obdo *sobdo);
+void obdo_le_to_cpu(struct obdo *dobdo, struct obdo *sobdo);
+
+#define OBT(dev) (dev)->obd_type
+#define OBP(dev, op) (dev)->obd_type->typ_dt_ops->o_ ## op
+#define MDP(dev, op) (dev)->obd_type->typ_md_ops->m_ ## op
+#define CTXTP(ctxt, op) (ctxt)->loc_logops->lop_##op
+
+/* Ensure obd_setup: used for cleanup which must be called
+ while obd is stopping */
+#define OBD_CHECK_DEV(obd) \
+do { \
+ if (!(obd)) { \
+ CERROR("NULL device\n"); \
+ RETURN(-ENODEV); \
+ } \
+} while (0)
+
+/* ensure obd_setup and !obd_stopping */
+#define OBD_CHECK_DEV_ACTIVE(obd) \
+do { \
+ OBD_CHECK_DEV(obd); \
+ if (!(obd)->obd_set_up || (obd)->obd_stopping) { \
+ CERROR("Device %d not setup\n", \
+ (obd)->obd_minor); \
+ RETURN(-ENODEV); \
+ } \
+} while (0)
+
+
+#ifdef LPROCFS
+#define OBD_COUNTER_OFFSET(op) \
+ ((offsetof(struct obd_ops, o_ ## op) - \
+ offsetof(struct obd_ops, o_iocontrol)) \
+ / sizeof(((struct obd_ops *)(0))->o_iocontrol))
+
+#define OBD_COUNTER_INCREMENT(obdx, op) \
+ if ((obdx)->obd_stats != NULL) { \
+ unsigned int coffset; \
+ coffset = (unsigned int)((obdx)->obd_cntr_base) + \
+ OBD_COUNTER_OFFSET(op); \
+ LASSERT(coffset < (obdx)->obd_stats->ls_num); \
+ lprocfs_counter_incr((obdx)->obd_stats, coffset); \
+ }
+
+#define EXP_COUNTER_INCREMENT(export, op) \
+ if ((export)->exp_obd->obd_stats != NULL) { \
+ unsigned int coffset; \
+ coffset = (unsigned int)((export)->exp_obd->obd_cntr_base) + \
+ OBD_COUNTER_OFFSET(op); \
+ LASSERT(coffset < (export)->exp_obd->obd_stats->ls_num); \
+ lprocfs_counter_incr((export)->exp_obd->obd_stats, coffset); \
+ if ((export)->exp_nid_stats != NULL && \
+ (export)->exp_nid_stats->nid_stats != NULL) \
+ lprocfs_counter_incr( \
+ (export)->exp_nid_stats->nid_stats, coffset);\
+ }
+
+#define MD_COUNTER_OFFSET(op) \
+ ((offsetof(struct md_ops, m_ ## op) - \
+ offsetof(struct md_ops, m_getstatus)) \
+ / sizeof(((struct md_ops *)(0))->m_getstatus))
+
+#define MD_COUNTER_INCREMENT(obdx, op) \
+ if ((obd)->md_stats != NULL) { \
+ unsigned int coffset; \
+ coffset = (unsigned int)((obdx)->md_cntr_base) + \
+ MD_COUNTER_OFFSET(op); \
+ LASSERT(coffset < (obdx)->md_stats->ls_num); \
+ lprocfs_counter_incr((obdx)->md_stats, coffset); \
+ }
+
+#define EXP_MD_COUNTER_INCREMENT(export, op) \
+ if ((export)->exp_obd->obd_stats != NULL) { \
+ unsigned int coffset; \
+ coffset = (unsigned int)((export)->exp_obd->md_cntr_base) + \
+ MD_COUNTER_OFFSET(op); \
+ LASSERT(coffset < (export)->exp_obd->md_stats->ls_num); \
+ lprocfs_counter_incr((export)->exp_obd->md_stats, coffset); \
+ if ((export)->exp_md_stats != NULL) \
+ lprocfs_counter_incr( \
+ (export)->exp_md_stats, coffset); \
+ }
+
+#else
+#define OBD_COUNTER_OFFSET(op)
+#define OBD_COUNTER_INCREMENT(obd, op)
+#define EXP_COUNTER_INCREMENT(exp, op)
+#define MD_COUNTER_INCREMENT(obd, op)
+#define EXP_MD_COUNTER_INCREMENT(exp, op)
+#endif
+
+static inline int lprocfs_nid_ldlm_stats_init(struct nid_stat* tmp)
+{
+ /* Always add in ldlm_stats */
+ tmp->nid_ldlm_stats = lprocfs_alloc_stats(LDLM_LAST_OPC - LDLM_FIRST_OPC
+ ,LPROCFS_STATS_FLAG_NOPERCPU);
+ if (tmp->nid_ldlm_stats == NULL)
+ return -ENOMEM;
+
+ lprocfs_init_ldlm_stats(tmp->nid_ldlm_stats);
+
+ return lprocfs_register_stats(tmp->nid_proc, "ldlm_stats",
+ tmp->nid_ldlm_stats);
+}
+
+#define OBD_CHECK_MD_OP(obd, op, err) \
+do { \
+ if (!OBT(obd) || !MDP((obd), op)) { \
+ if (err) \
+ CERROR("md_" #op ": dev %s/%d no operation\n", \
+ obd->obd_name, obd->obd_minor); \
+ RETURN(err); \
+ } \
+} while (0)
+
+#define EXP_CHECK_MD_OP(exp, op) \
+do { \
+ if ((exp) == NULL) { \
+ CERROR("obd_" #op ": NULL export\n"); \
+ RETURN(-ENODEV); \
+ } \
+ if ((exp)->exp_obd == NULL || !OBT((exp)->exp_obd)) { \
+ CERROR("obd_" #op ": cleaned up obd\n"); \
+ RETURN(-EOPNOTSUPP); \
+ } \
+ if (!OBT((exp)->exp_obd) || !MDP((exp)->exp_obd, op)) { \
+ CERROR("obd_" #op ": dev %s/%d no operation\n", \
+ (exp)->exp_obd->obd_name, \
+ (exp)->exp_obd->obd_minor); \
+ RETURN(-EOPNOTSUPP); \
+ } \
+} while (0)
+
+
+#define OBD_CHECK_DT_OP(obd, op, err) \
+do { \
+ if (!OBT(obd) || !OBP((obd), op)) { \
+ if (err) \
+ CERROR("obd_" #op ": dev %d no operation\n", \
+ obd->obd_minor); \
+ RETURN(err); \
+ } \
+} while (0)
+
+#define EXP_CHECK_DT_OP(exp, op) \
+do { \
+ if ((exp) == NULL) { \
+ CERROR("obd_" #op ": NULL export\n"); \
+ RETURN(-ENODEV); \
+ } \
+ if ((exp)->exp_obd == NULL || !OBT((exp)->exp_obd)) { \
+ CERROR("obd_" #op ": cleaned up obd\n"); \
+ RETURN(-EOPNOTSUPP); \
+ } \
+ if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \
+ CERROR("obd_" #op ": dev %d no operation\n", \
+ (exp)->exp_obd->obd_minor); \
+ RETURN(-EOPNOTSUPP); \
+ } \
+} while (0)
+
+#define CTXT_CHECK_OP(ctxt, op, err) \
+do { \
+ if (!OBT(ctxt->loc_obd) || !CTXTP((ctxt), op)) { \
+ if (err) \
+ CERROR("lop_" #op ": dev %d no operation\n", \
+ ctxt->loc_obd->obd_minor); \
+ RETURN(err); \
+ } \
+} while (0)
+
+static inline int class_devno_max(void)
+{
+ return MAX_OBD_DEVICES;
+}
+
+static inline int obd_get_info(const struct lu_env *env,
+ struct obd_export *exp, __u32 keylen,
+ void *key, __u32 *vallen, void *val,
+ struct lov_stripe_md *lsm)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, get_info);
+ EXP_COUNTER_INCREMENT(exp, get_info);
+
+ rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val,
+ lsm);
+ RETURN(rc);
+}
+
+static inline int obd_set_info_async(const struct lu_env *env,
+ struct obd_export *exp, obd_count keylen,
+ void *key, obd_count vallen, void *val,
+ struct ptlrpc_request_set *set)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, set_info_async);
+ EXP_COUNTER_INCREMENT(exp, set_info_async);
+
+ rc = OBP(exp->exp_obd, set_info_async)(env, exp, keylen, key, vallen,
+ val, set);
+ RETURN(rc);
+}
+
+/*
+ * obd-lu integration.
+ *
+ * Functionality is being moved into new lu_device-based layering, but some
+ * pieces of configuration process are still based on obd devices.
+ *
+ * Specifically, lu_device_type_operations::ldto_device_alloc() methods fully
+ * subsume ->o_setup() methods of obd devices they replace. The same for
+ * lu_device_operations::ldo_process_config() and ->o_process_config(). As a
+ * result, obd_setup() and obd_process_config() branch and call one XOR
+ * another.
+ *
+ * Yet neither lu_device_type_operations::ldto_device_fini() nor
+ * lu_device_type_operations::ldto_device_free() fully implement the
+ * functionality of ->o_precleanup() and ->o_cleanup() they override. Hence,
+ * obd_precleanup() and obd_cleanup() call both lu_device and obd operations.
+ */
+
+#define DECLARE_LU_VARS(ldt, d) \
+ struct lu_device_type *ldt; \
+ struct lu_device *d
+
+static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
+{
+ int rc;
+ DECLARE_LU_VARS(ldt, d);
+ ENTRY;
+
+ ldt = obd->obd_type->typ_lu;
+ if (ldt != NULL) {
+ struct lu_context session_ctx;
+ struct lu_env env;
+ lu_context_init(&session_ctx, LCT_SESSION);
+ session_ctx.lc_thread = NULL;
+ lu_context_enter(&session_ctx);
+
+ rc = lu_env_init(&env, ldt->ldt_ctx_tags);
+ if (rc == 0) {
+ env.le_ses = &session_ctx;
+ d = ldt->ldt_ops->ldto_device_alloc(&env, ldt, cfg);
+ lu_env_fini(&env);
+ if (!IS_ERR(d)) {
+ obd->obd_lu_dev = d;
+ d->ld_obd = obd;
+ rc = 0;
+ } else
+ rc = PTR_ERR(d);
+ }
+ lu_context_exit(&session_ctx);
+ lu_context_fini(&session_ctx);
+
+ } else {
+ OBD_CHECK_DT_OP(obd, setup, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, setup);
+ rc = OBP(obd, setup)(obd, cfg);
+ }
+ RETURN(rc);
+}
+
+static inline int obd_precleanup(struct obd_device *obd,
+ enum obd_cleanup_stage cleanup_stage)
+{
+ int rc;
+ DECLARE_LU_VARS(ldt, d);
+ ENTRY;
+
+ OBD_CHECK_DEV(obd);
+ ldt = obd->obd_type->typ_lu;
+ d = obd->obd_lu_dev;
+ if (ldt != NULL && d != NULL) {
+ if (cleanup_stage == OBD_CLEANUP_EXPORTS) {
+ struct lu_env env;
+
+ rc = lu_env_init(&env, ldt->ldt_ctx_tags);
+ if (rc == 0) {
+ ldt->ldt_ops->ldto_device_fini(&env, d);
+ lu_env_fini(&env);
+ }
+ }
+ }
+ OBD_CHECK_DT_OP(obd, precleanup, 0);
+ OBD_COUNTER_INCREMENT(obd, precleanup);
+
+ rc = OBP(obd, precleanup)(obd, cleanup_stage);
+ RETURN(rc);
+}
+
+static inline int obd_cleanup(struct obd_device *obd)
+{
+ int rc;
+ DECLARE_LU_VARS(ldt, d);
+ ENTRY;
+
+ OBD_CHECK_DEV(obd);
+
+ ldt = obd->obd_type->typ_lu;
+ d = obd->obd_lu_dev;
+ if (ldt != NULL && d != NULL) {
+ struct lu_env env;
+
+ rc = lu_env_init(&env, ldt->ldt_ctx_tags);
+ if (rc == 0) {
+ ldt->ldt_ops->ldto_device_free(&env, d);
+ lu_env_fini(&env);
+ obd->obd_lu_dev = NULL;
+ }
+ }
+ OBD_CHECK_DT_OP(obd, cleanup, 0);
+ OBD_COUNTER_INCREMENT(obd, cleanup);
+
+ rc = OBP(obd, cleanup)(obd);
+ RETURN(rc);
+}
+
+static inline void obd_cleanup_client_import(struct obd_device *obd)
+{
+ ENTRY;
+
+ /* If we set up but never connected, the
+ client import will not have been cleaned. */
+ down_write(&obd->u.cli.cl_sem);
+ if (obd->u.cli.cl_import) {
+ struct obd_import *imp;
+ imp = obd->u.cli.cl_import;
+ CDEBUG(D_CONFIG, "%s: client import never connected\n",
+ obd->obd_name);
+ ptlrpc_invalidate_import(imp);
+ if (imp->imp_rq_pool) {
+ ptlrpc_free_rq_pool(imp->imp_rq_pool);
+ imp->imp_rq_pool = NULL;
+ }
+ client_destroy_import(imp);
+ obd->u.cli.cl_import = NULL;
+ }
+ up_write(&obd->u.cli.cl_sem);
+
+ EXIT;
+}
+
+static inline int
+obd_process_config(struct obd_device *obd, int datalen, void *data)
+{
+ int rc;
+ DECLARE_LU_VARS(ldt, d);
+ ENTRY;
+
+ OBD_CHECK_DEV(obd);
+
+ obd->obd_process_conf = 1;
+ ldt = obd->obd_type->typ_lu;
+ d = obd->obd_lu_dev;
+ if (ldt != NULL && d != NULL) {
+ struct lu_env env;
+
+ rc = lu_env_init(&env, ldt->ldt_ctx_tags);
+ if (rc == 0) {
+ rc = d->ld_ops->ldo_process_config(&env, d, data);
+ lu_env_fini(&env);
+ }
+ } else {
+ OBD_CHECK_DT_OP(obd, process_config, -EOPNOTSUPP);
+ rc = OBP(obd, process_config)(obd, datalen, data);
+ }
+ OBD_COUNTER_INCREMENT(obd, process_config);
+ obd->obd_process_conf = 0;
+
+ RETURN(rc);
+}
+
+/* Pack an in-memory MD struct for storage on disk.
+ * Returns +ve size of packed MD (0 for free), or -ve error.
+ *
+ * If @disk_tgt == NULL, MD size is returned (max size if @mem_src == NULL).
+ * If @*disk_tgt != NULL and @mem_src == NULL, @*disk_tgt will be freed.
+ * If @*disk_tgt == NULL, it will be allocated
+ */
+static inline int obd_packmd(struct obd_export *exp,
+ struct lov_mds_md **disk_tgt,
+ struct lov_stripe_md *mem_src)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, packmd);
+ EXP_COUNTER_INCREMENT(exp, packmd);
+
+ rc = OBP(exp->exp_obd, packmd)(exp, disk_tgt, mem_src);
+ RETURN(rc);
+}
+
+static inline int obd_size_diskmd(struct obd_export *exp,
+ struct lov_stripe_md *mem_src)
+{
+ return obd_packmd(exp, NULL, mem_src);
+}
+
+/* helper functions */
+static inline int obd_alloc_diskmd(struct obd_export *exp,
+ struct lov_mds_md **disk_tgt)
+{
+ LASSERT(disk_tgt);
+ LASSERT(*disk_tgt == NULL);
+ return obd_packmd(exp, disk_tgt, NULL);
+}
+
+static inline int obd_free_diskmd(struct obd_export *exp,
+ struct lov_mds_md **disk_tgt)
+{
+ LASSERT(disk_tgt);
+ LASSERT(*disk_tgt);
+ /*
+ * LU-2590, for caller's convenience, *disk_tgt could be host
+ * endianness, it needs swab to LE if necessary, while just
+ * lov_mds_md header needs it for figuring out how much memory
+ * needs to be freed.
+ */
+ if ((cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) &&
+ (((*disk_tgt)->lmm_magic == LOV_MAGIC_V1) ||
+ ((*disk_tgt)->lmm_magic == LOV_MAGIC_V3)))
+ lustre_swab_lov_mds_md(*disk_tgt);
+ return obd_packmd(exp, disk_tgt, NULL);
+}
+
+/* Unpack an MD struct from disk to in-memory format.
+ * Returns +ve size of unpacked MD (0 for free), or -ve error.
+ *
+ * If @mem_tgt == NULL, MD size is returned (max size if @disk_src == NULL).
+ * If @*mem_tgt != NULL and @disk_src == NULL, @*mem_tgt will be freed.
+ * If @*mem_tgt == NULL, it will be allocated
+ */
+static inline int obd_unpackmd(struct obd_export *exp,
+ struct lov_stripe_md **mem_tgt,
+ struct lov_mds_md *disk_src,
+ int disk_len)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, unpackmd);
+ EXP_COUNTER_INCREMENT(exp, unpackmd);
+
+ rc = OBP(exp->exp_obd, unpackmd)(exp, mem_tgt, disk_src, disk_len);
+ RETURN(rc);
+}
+
+/* helper functions */
+static inline int obd_alloc_memmd(struct obd_export *exp,
+ struct lov_stripe_md **mem_tgt)
+{
+ LASSERT(mem_tgt);
+ LASSERT(*mem_tgt == NULL);
+ return obd_unpackmd(exp, mem_tgt, NULL, 0);
+}
+
+static inline int obd_free_memmd(struct obd_export *exp,
+ struct lov_stripe_md **mem_tgt)
+{
+ int rc;
+
+ LASSERT(mem_tgt);
+ LASSERT(*mem_tgt);
+ rc = obd_unpackmd(exp, mem_tgt, NULL, 0);
+ *mem_tgt = NULL;
+ return rc;
+}
+
+static inline int obd_precreate(struct obd_export *exp)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, precreate);
+ OBD_COUNTER_INCREMENT(exp->exp_obd, precreate);
+
+ rc = OBP(exp->exp_obd, precreate)(exp);
+ RETURN(rc);
+}
+
+static inline int obd_create_async(struct obd_export *exp,
+ struct obd_info *oinfo,
+ struct lov_stripe_md **ea,
+ struct obd_trans_info *oti)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, create_async);
+ EXP_COUNTER_INCREMENT(exp, create_async);
+
+ rc = OBP(exp->exp_obd, create_async)(exp, oinfo, ea, oti);
+ RETURN(rc);
+}
+
+static inline int obd_create(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *obdo, struct lov_stripe_md **ea,
+ struct obd_trans_info *oti)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, create);
+ EXP_COUNTER_INCREMENT(exp, create);
+
+ rc = OBP(exp->exp_obd, create)(env, exp, obdo, ea, oti);
+ RETURN(rc);
+}
+
+static inline int obd_destroy(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *obdo, struct lov_stripe_md *ea,
+ struct obd_trans_info *oti,
+ struct obd_export *md_exp, void *capa)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, destroy);
+ EXP_COUNTER_INCREMENT(exp, destroy);
+
+ rc = OBP(exp->exp_obd, destroy)(env, exp, obdo, ea, oti, md_exp, capa);
+ RETURN(rc);
+}
+
+static inline int obd_getattr(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, getattr);
+ EXP_COUNTER_INCREMENT(exp, getattr);
+
+ rc = OBP(exp->exp_obd, getattr)(env, exp, oinfo);
+ RETURN(rc);
+}
+
+static inline int obd_getattr_async(struct obd_export *exp,
+ struct obd_info *oinfo,
+ struct ptlrpc_request_set *set)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, getattr_async);
+ EXP_COUNTER_INCREMENT(exp, getattr_async);
+
+ rc = OBP(exp->exp_obd, getattr_async)(exp, oinfo, set);
+ RETURN(rc);
+}
+
+static inline int obd_setattr(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo,
+ struct obd_trans_info *oti)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, setattr);
+ EXP_COUNTER_INCREMENT(exp, setattr);
+
+ rc = OBP(exp->exp_obd, setattr)(env, exp, oinfo, oti);
+ RETURN(rc);
+}
+
+/* This performs all the requests set init/wait/destroy actions. */
+static inline int obd_setattr_rqset(struct obd_export *exp,
+ struct obd_info *oinfo,
+ struct obd_trans_info *oti)
+{
+ struct ptlrpc_request_set *set = NULL;
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, setattr_async);
+ EXP_COUNTER_INCREMENT(exp, setattr_async);
+
+ set = ptlrpc_prep_set();
+ if (set == NULL)
+ RETURN(-ENOMEM);
+
+ rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set);
+ if (rc == 0)
+ rc = ptlrpc_set_wait(set);
+ ptlrpc_set_destroy(set);
+ RETURN(rc);
+}
+
+/* This adds all the requests into @set if @set != NULL, otherwise
+ all requests are sent asynchronously without waiting for response. */
+static inline int obd_setattr_async(struct obd_export *exp,
+ struct obd_info *oinfo,
+ struct obd_trans_info *oti,
+ struct ptlrpc_request_set *set)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, setattr_async);
+ EXP_COUNTER_INCREMENT(exp, setattr_async);
+
+ rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set);
+ RETURN(rc);
+}
+
+static inline int obd_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
+ int priority)
+{
+ struct obd_device *obd = imp->imp_obd;
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DEV_ACTIVE(obd);
+ OBD_CHECK_DT_OP(obd, add_conn, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, add_conn);
+
+ rc = OBP(obd, add_conn)(imp, uuid, priority);
+ RETURN(rc);
+}
+
+static inline int obd_del_conn(struct obd_import *imp, struct obd_uuid *uuid)
+{
+ struct obd_device *obd = imp->imp_obd;
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DEV_ACTIVE(obd);
+ OBD_CHECK_DT_OP(obd, del_conn, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, del_conn);
+
+ rc = OBP(obd, del_conn)(imp, uuid);
+ RETURN(rc);
+}
+
+static inline struct obd_uuid *obd_get_uuid(struct obd_export *exp)
+{
+ struct obd_uuid *uuid;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(exp->exp_obd, get_uuid, NULL);
+ EXP_COUNTER_INCREMENT(exp, get_uuid);
+
+ uuid = OBP(exp->exp_obd, get_uuid)(exp);
+ RETURN(uuid);
+}
+
+/** Create a new /a exp on device /a obd for the uuid /a cluuid
+ * @param exp New export handle
+ * @param d Connect data, supported flags are set, flags also understood
+ * by obd are returned.
+ */
+static inline int obd_connect(const struct lu_env *env,
+ struct obd_export **exp,struct obd_device *obd,
+ struct obd_uuid *cluuid,
+ struct obd_connect_data *data,
+ void *localdata)
+{
+ int rc;
+ __u64 ocf = data ? data->ocd_connect_flags : 0; /* for post-condition
+ * check */
+ ENTRY;
+
+ OBD_CHECK_DEV_ACTIVE(obd);
+ OBD_CHECK_DT_OP(obd, connect, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, connect);
+
+ rc = OBP(obd, connect)(env, exp, obd, cluuid, data, localdata);
+ /* check that only subset is granted */
+ LASSERT(ergo(data != NULL, (data->ocd_connect_flags & ocf) ==
+ data->ocd_connect_flags));
+ RETURN(rc);
+}
+
+static inline int obd_reconnect(const struct lu_env *env,
+ struct obd_export *exp,
+ struct obd_device *obd,
+ struct obd_uuid *cluuid,
+ struct obd_connect_data *d,
+ void *localdata)
+{
+ int rc;
+ __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition
+ * check */
+
+ ENTRY;
+
+ OBD_CHECK_DEV_ACTIVE(obd);
+ OBD_CHECK_DT_OP(obd, reconnect, 0);
+ OBD_COUNTER_INCREMENT(obd, reconnect);
+
+ rc = OBP(obd, reconnect)(env, exp, obd, cluuid, d, localdata);
+ /* check that only subset is granted */
+ LASSERT(ergo(d != NULL,
+ (d->ocd_connect_flags & ocf) == d->ocd_connect_flags));
+ RETURN(rc);
+}
+
+static inline int obd_disconnect(struct obd_export *exp)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, disconnect);
+ EXP_COUNTER_INCREMENT(exp, disconnect);
+
+ rc = OBP(exp->exp_obd, disconnect)(exp);
+ RETURN(rc);
+}
+
+static inline int obd_fid_init(struct obd_device *obd, struct obd_export *exp,
+ enum lu_cli_type type)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(obd, fid_init, 0);
+ OBD_COUNTER_INCREMENT(obd, fid_init);
+
+ rc = OBP(obd, fid_init)(obd, exp, type);
+ RETURN(rc);
+}
+
+static inline int obd_fid_fini(struct obd_device *obd)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(obd, fid_fini, 0);
+ OBD_COUNTER_INCREMENT(obd, fid_fini);
+
+ rc = OBP(obd, fid_fini)(obd);
+ RETURN(rc);
+}
+
+static inline int obd_fid_alloc(struct obd_export *exp,
+ struct lu_fid *fid,
+ struct md_op_data *op_data)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, fid_alloc);
+ EXP_COUNTER_INCREMENT(exp, fid_alloc);
+
+ rc = OBP(exp->exp_obd, fid_alloc)(exp, fid, op_data);
+ RETURN(rc);
+}
+
+static inline int obd_ping(const struct lu_env *env, struct obd_export *exp)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(exp->exp_obd, ping, 0);
+ EXP_COUNTER_INCREMENT(exp, ping);
+
+ rc = OBP(exp->exp_obd, ping)(env, exp);
+ RETURN(rc);
+}
+
+static inline int obd_pool_new(struct obd_device *obd, char *poolname)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(obd, pool_new, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, pool_new);
+
+ rc = OBP(obd, pool_new)(obd, poolname);
+ RETURN(rc);
+}
+
+static inline int obd_pool_del(struct obd_device *obd, char *poolname)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(obd, pool_del, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, pool_del);
+
+ rc = OBP(obd, pool_del)(obd, poolname);
+ RETURN(rc);
+}
+
+static inline int obd_pool_add(struct obd_device *obd, char *poolname, char *ostname)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(obd, pool_add, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, pool_add);
+
+ rc = OBP(obd, pool_add)(obd, poolname, ostname);
+ RETURN(rc);
+}
+
+static inline int obd_pool_rem(struct obd_device *obd, char *poolname, char *ostname)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(obd, pool_rem, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, pool_rem);
+
+ rc = OBP(obd, pool_rem)(obd, poolname, ostname);
+ RETURN(rc);
+}
+
+static inline void obd_getref(struct obd_device *obd)
+{
+ ENTRY;
+ if (OBT(obd) && OBP(obd, getref)) {
+ OBD_COUNTER_INCREMENT(obd, getref);
+ OBP(obd, getref)(obd);
+ }
+ EXIT;
+}
+
+static inline void obd_putref(struct obd_device *obd)
+{
+ ENTRY;
+ if (OBT(obd) && OBP(obd, putref)) {
+ OBD_COUNTER_INCREMENT(obd, putref);
+ OBP(obd, putref)(obd);
+ }
+ EXIT;
+}
+
+static inline int obd_init_export(struct obd_export *exp)
+{
+ int rc = 0;
+
+ ENTRY;
+ if ((exp)->exp_obd != NULL && OBT((exp)->exp_obd) &&
+ OBP((exp)->exp_obd, init_export))
+ rc = OBP(exp->exp_obd, init_export)(exp);
+ RETURN(rc);
+}
+
+static inline int obd_destroy_export(struct obd_export *exp)
+{
+ ENTRY;
+ if ((exp)->exp_obd != NULL && OBT((exp)->exp_obd) &&
+ OBP((exp)->exp_obd, destroy_export))
+ OBP(exp->exp_obd, destroy_export)(exp);
+ RETURN(0);
+}
+
+static inline int obd_extent_calc(struct obd_export *exp,
+ struct lov_stripe_md *md,
+ int cmd, obd_off *offset)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_DT_OP(exp, extent_calc);
+ rc = OBP(exp->exp_obd, extent_calc)(exp, md, cmd, offset);
+ RETURN(rc);
+}
+
+static inline struct dentry *
+obd_lvfs_fid2dentry(struct obd_export *exp, struct ost_id *oi, __u32 gen)
+{
+ struct lvfs_run_ctxt *ctxt = &exp->exp_obd->obd_lvfs_ctxt;
+ LASSERT(exp->exp_obd);
+
+ return ctxt->cb_ops.l_fid2dentry(ostid_id(oi), gen, ostid_seq(oi),
+ exp->exp_obd);
+}
+
+/* @max_age is the oldest time in jiffies that we accept using a cached data.
+ * If the cache is older than @max_age we will get a new value from the
+ * target. Use a value of "cfs_time_current() + HZ" to guarantee freshness. */
+static inline int obd_statfs_async(struct obd_export *exp,
+ struct obd_info *oinfo,
+ __u64 max_age,
+ struct ptlrpc_request_set *rqset)
+{
+ int rc = 0;
+ struct obd_device *obd;
+ ENTRY;
+
+ if (exp == NULL || exp->exp_obd == NULL)
+ RETURN(-EINVAL);
+
+ obd = exp->exp_obd;
+ OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, statfs);
+
+ CDEBUG(D_SUPER, "%s: osfs %p age "LPU64", max_age "LPU64"\n",
+ obd->obd_name, &obd->obd_osfs, obd->obd_osfs_age, max_age);
+ if (cfs_time_before_64(obd->obd_osfs_age, max_age)) {
+ rc = OBP(obd, statfs_async)(exp, oinfo, max_age, rqset);
+ } else {
+ CDEBUG(D_SUPER,"%s: use %p cache blocks "LPU64"/"LPU64
+ " objects "LPU64"/"LPU64"\n",
+ obd->obd_name, &obd->obd_osfs,
+ obd->obd_osfs.os_bavail, obd->obd_osfs.os_blocks,
+ obd->obd_osfs.os_ffree, obd->obd_osfs.os_files);
+ spin_lock(&obd->obd_osfs_lock);
+ memcpy(oinfo->oi_osfs, &obd->obd_osfs, sizeof(*oinfo->oi_osfs));
+ spin_unlock(&obd->obd_osfs_lock);
+ oinfo->oi_flags |= OBD_STATFS_FROM_CACHE;
+ if (oinfo->oi_cb_up)
+ oinfo->oi_cb_up(oinfo, 0);
+ }
+ RETURN(rc);
+}
+
+static inline int obd_statfs_rqset(struct obd_export *exp,
+ struct obd_statfs *osfs, __u64 max_age,
+ __u32 flags)
+{
+ struct ptlrpc_request_set *set = NULL;
+ struct obd_info oinfo = { { { 0 } } };
+ int rc = 0;
+ ENTRY;
+
+ set = ptlrpc_prep_set();
+ if (set == NULL)
+ RETURN(-ENOMEM);
+
+ oinfo.oi_osfs = osfs;
+ oinfo.oi_flags = flags;
+ rc = obd_statfs_async(exp, &oinfo, max_age, set);
+ if (rc == 0)
+ rc = ptlrpc_set_wait(set);
+ ptlrpc_set_destroy(set);
+ RETURN(rc);
+}
+
+/* @max_age is the oldest time in jiffies that we accept using a cached data.
+ * If the cache is older than @max_age we will get a new value from the
+ * target. Use a value of "cfs_time_current() + HZ" to guarantee freshness. */
+static inline int obd_statfs(const struct lu_env *env, struct obd_export *exp,
+ struct obd_statfs *osfs, __u64 max_age,
+ __u32 flags)
+{
+ int rc = 0;
+ struct obd_device *obd = exp->exp_obd;
+ ENTRY;
+
+ if (obd == NULL)
+ RETURN(-EINVAL);
+
+ OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(obd, statfs);
+
+ CDEBUG(D_SUPER, "osfs "LPU64", max_age "LPU64"\n",
+ obd->obd_osfs_age, max_age);
+ if (cfs_time_before_64(obd->obd_osfs_age, max_age)) {
+ rc = OBP(obd, statfs)(env, exp, osfs, max_age, flags);
+ if (rc == 0) {
+ spin_lock(&obd->obd_osfs_lock);
+ memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs));
+ obd->obd_osfs_age = cfs_time_current_64();
+ spin_unlock(&obd->obd_osfs_lock);
+ }
+ } else {
+ CDEBUG(D_SUPER, "%s: use %p cache blocks "LPU64"/"LPU64
+ " objects "LPU64"/"LPU64"\n",
+ obd->obd_name, &obd->obd_osfs,
+ obd->obd_osfs.os_bavail, obd->obd_osfs.os_blocks,
+ obd->obd_osfs.os_ffree, obd->obd_osfs.os_files);
+ spin_lock(&obd->obd_osfs_lock);
+ memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
+ spin_unlock(&obd->obd_osfs_lock);
+ }
+ RETURN(rc);
+}
+
+static inline int obd_sync_rqset(struct obd_export *exp, struct obd_info *oinfo,
+ obd_size start, obd_size end)
+{
+ struct ptlrpc_request_set *set = NULL;
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(exp->exp_obd, sync, -EOPNOTSUPP);
+ EXP_COUNTER_INCREMENT(exp, sync);
+
+ set = ptlrpc_prep_set();
+ if (set == NULL)
+ RETURN(-ENOMEM);
+
+ rc = OBP(exp->exp_obd, sync)(NULL, exp, oinfo, start, end, set);
+ if (rc == 0)
+ rc = ptlrpc_set_wait(set);
+ ptlrpc_set_destroy(set);
+ RETURN(rc);
+}
+
+static inline int obd_sync(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo, obd_size start, obd_size end,
+ struct ptlrpc_request_set *set)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(exp->exp_obd, sync, -EOPNOTSUPP);
+ EXP_COUNTER_INCREMENT(exp, sync);
+
+ rc = OBP(exp->exp_obd, sync)(env, exp, oinfo, start, end, set);
+ RETURN(rc);
+}
+
+static inline int obd_punch_rqset(struct obd_export *exp,
+ struct obd_info *oinfo,
+ struct obd_trans_info *oti)
+{
+ struct ptlrpc_request_set *set = NULL;
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, punch);
+ EXP_COUNTER_INCREMENT(exp, punch);
+
+ set = ptlrpc_prep_set();
+ if (set == NULL)
+ RETURN(-ENOMEM);
+
+ rc = OBP(exp->exp_obd, punch)(NULL, exp, oinfo, oti, set);
+ if (rc == 0)
+ rc = ptlrpc_set_wait(set);
+ ptlrpc_set_destroy(set);
+ RETURN(rc);
+}
+
+static inline int obd_punch(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo, struct obd_trans_info *oti,
+ struct ptlrpc_request_set *rqset)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, punch);
+ EXP_COUNTER_INCREMENT(exp, punch);
+
+ rc = OBP(exp->exp_obd, punch)(env, exp, oinfo, oti, rqset);
+ RETURN(rc);
+}
+
+static inline int obd_brw(int cmd, struct obd_export *exp,
+ struct obd_info *oinfo, obd_count oa_bufs,
+ struct brw_page *pg, struct obd_trans_info *oti)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, brw);
+ EXP_COUNTER_INCREMENT(exp, brw);
+
+ if (!(cmd & (OBD_BRW_RWMASK | OBD_BRW_CHECK))) {
+ CERROR("obd_brw: cmd must be OBD_BRW_READ, OBD_BRW_WRITE, "
+ "or OBD_BRW_CHECK\n");
+ LBUG();
+ }
+
+ rc = OBP(exp->exp_obd, brw)(cmd, exp, oinfo, oa_bufs, pg, oti);
+ RETURN(rc);
+}
+
+static inline int obd_preprw(const struct lu_env *env, int cmd,
+ struct obd_export *exp, struct obdo *oa,
+ int objcount, struct obd_ioobj *obj,
+ struct niobuf_remote *remote, int *pages,
+ struct niobuf_local *local,
+ struct obd_trans_info *oti,
+ struct lustre_capa *capa)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, preprw);
+ EXP_COUNTER_INCREMENT(exp, preprw);
+
+ rc = OBP(exp->exp_obd, preprw)(env, cmd, exp, oa, objcount, obj, remote,
+ pages, local, oti, capa);
+ RETURN(rc);
+}
+
+static inline int obd_commitrw(const struct lu_env *env, int cmd,
+ struct obd_export *exp, struct obdo *oa,
+ int objcount, struct obd_ioobj *obj,
+ struct niobuf_remote *rnb, int pages,
+ struct niobuf_local *local,
+ struct obd_trans_info *oti, int rc)
+{
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, commitrw);
+ EXP_COUNTER_INCREMENT(exp, commitrw);
+
+ rc = OBP(exp->exp_obd, commitrw)(env, cmd, exp, oa, objcount, obj,
+ rnb, pages, local, oti, rc);
+ RETURN(rc);
+}
+
+static inline int obd_merge_lvb(struct obd_export *exp,
+ struct lov_stripe_md *lsm,
+ struct ost_lvb *lvb, int kms_only)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, merge_lvb);
+ EXP_COUNTER_INCREMENT(exp, merge_lvb);
+
+ rc = OBP(exp->exp_obd, merge_lvb)(exp, lsm, lvb, kms_only);
+ RETURN(rc);
+}
+
+static inline int obd_adjust_kms(struct obd_export *exp,
+ struct lov_stripe_md *lsm, obd_off size,
+ int shrink)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, adjust_kms);
+ EXP_COUNTER_INCREMENT(exp, adjust_kms);
+
+ rc = OBP(exp->exp_obd, adjust_kms)(exp, lsm, size, shrink);
+ RETURN(rc);
+}
+
+static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp,
+ int len, void *karg, void *uarg)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, iocontrol);
+ EXP_COUNTER_INCREMENT(exp, iocontrol);
+
+ rc = OBP(exp->exp_obd, iocontrol)(cmd, exp, len, karg, uarg);
+ RETURN(rc);
+}
+
+static inline int obd_enqueue_rqset(struct obd_export *exp,
+ struct obd_info *oinfo,
+ struct ldlm_enqueue_info *einfo)
+{
+ struct ptlrpc_request_set *set = NULL;
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, enqueue);
+ EXP_COUNTER_INCREMENT(exp, enqueue);
+
+ set = ptlrpc_prep_set();
+ if (set == NULL)
+ RETURN(-ENOMEM);
+
+ rc = OBP(exp->exp_obd, enqueue)(exp, oinfo, einfo, set);
+ if (rc == 0)
+ rc = ptlrpc_set_wait(set);
+ ptlrpc_set_destroy(set);
+ RETURN(rc);
+}
+
+static inline int obd_enqueue(struct obd_export *exp,
+ struct obd_info *oinfo,
+ struct ldlm_enqueue_info *einfo,
+ struct ptlrpc_request_set *set)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, enqueue);
+ EXP_COUNTER_INCREMENT(exp, enqueue);
+
+ rc = OBP(exp->exp_obd, enqueue)(exp, oinfo, einfo, set);
+ RETURN(rc);
+}
+
+static inline int obd_change_cbdata(struct obd_export *exp,
+ struct lov_stripe_md *lsm,
+ ldlm_iterator_t it, void *data)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, change_cbdata);
+ EXP_COUNTER_INCREMENT(exp, change_cbdata);
+
+ rc = OBP(exp->exp_obd, change_cbdata)(exp, lsm, it, data);
+ RETURN(rc);
+}
+
+static inline int obd_find_cbdata(struct obd_export *exp,
+ struct lov_stripe_md *lsm,
+ ldlm_iterator_t it, void *data)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, find_cbdata);
+ EXP_COUNTER_INCREMENT(exp, find_cbdata);
+
+ rc = OBP(exp->exp_obd, find_cbdata)(exp, lsm, it, data);
+ RETURN(rc);
+}
+
+static inline int obd_cancel(struct obd_export *exp,
+ struct lov_stripe_md *ea, __u32 mode,
+ struct lustre_handle *lockh)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, cancel);
+ EXP_COUNTER_INCREMENT(exp, cancel);
+
+ rc = OBP(exp->exp_obd, cancel)(exp, ea, mode, lockh);
+ RETURN(rc);
+}
+
+static inline int obd_cancel_unused(struct obd_export *exp,
+ struct lov_stripe_md *ea,
+ ldlm_cancel_flags_t flags,
+ void *opaque)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, cancel_unused);
+ EXP_COUNTER_INCREMENT(exp, cancel_unused);
+
+ rc = OBP(exp->exp_obd, cancel_unused)(exp, ea, flags, opaque);
+ RETURN(rc);
+}
+
+static inline int obd_pin(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, struct obd_client_handle *handle,
+ int flag)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, pin);
+ EXP_COUNTER_INCREMENT(exp, pin);
+
+ rc = OBP(exp->exp_obd, pin)(exp, fid, oc, handle, flag);
+ RETURN(rc);
+}
+
+static inline int obd_unpin(struct obd_export *exp,
+ struct obd_client_handle *handle, int flag)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, unpin);
+ EXP_COUNTER_INCREMENT(exp, unpin);
+
+ rc = OBP(exp->exp_obd, unpin)(exp, handle, flag);
+ RETURN(rc);
+}
+
+
+static inline void obd_import_event(struct obd_device *obd,
+ struct obd_import *imp,
+ enum obd_import_event event)
+{
+ ENTRY;
+ if (!obd) {
+ CERROR("NULL device\n");
+ EXIT;
+ return;
+ }
+ if (obd->obd_set_up && OBP(obd, import_event)) {
+ OBD_COUNTER_INCREMENT(obd, import_event);
+ OBP(obd, import_event)(obd, imp, event);
+ }
+ EXIT;
+}
+
+static inline int obd_llog_connect(struct obd_export *exp,
+ struct llogd_conn_body *body)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(exp->exp_obd, llog_connect, 0);
+ EXP_COUNTER_INCREMENT(exp, llog_connect);
+
+ rc = OBP(exp->exp_obd, llog_connect)(exp, body);
+ RETURN(rc);
+}
+
+
+static inline int obd_notify(struct obd_device *obd,
+ struct obd_device *watched,
+ enum obd_notify_event ev,
+ void *data)
+{
+ int rc;
+ ENTRY;
+ OBD_CHECK_DEV(obd);
+
+ /* the check for async_recov is a complete hack - I'm hereby
+ overloading the meaning to also mean "this was called from
+ mds_postsetup". I know that my mds is able to handle notifies
+ by this point, and it needs to get them to execute mds_postrecov. */
+ if (!obd->obd_set_up && !obd->obd_async_recov) {
+ CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name);
+ RETURN(-EINVAL);
+ }
+
+ if (!OBP(obd, notify)) {
+ CDEBUG(D_HA, "obd %s has no notify handler\n", obd->obd_name);
+ RETURN(-ENOSYS);
+ }
+
+ OBD_COUNTER_INCREMENT(obd, notify);
+ rc = OBP(obd, notify)(obd, watched, ev, data);
+ RETURN(rc);
+}
+
+static inline int obd_notify_observer(struct obd_device *observer,
+ struct obd_device *observed,
+ enum obd_notify_event ev,
+ void *data)
+{
+ int rc1;
+ int rc2;
+
+ struct obd_notify_upcall *onu;
+
+ if (observer->obd_observer)
+ rc1 = obd_notify(observer->obd_observer, observed, ev, data);
+ else
+ rc1 = 0;
+ /*
+ * Also, call non-obd listener, if any
+ */
+ onu = &observer->obd_upcall;
+ if (onu->onu_upcall != NULL)
+ rc2 = onu->onu_upcall(observer, observed, ev,
+ onu->onu_owner, NULL);
+ else
+ rc2 = 0;
+
+ return rc1 ? rc1 : rc2;
+}
+
+static inline int obd_quotacheck(struct obd_export *exp,
+ struct obd_quotactl *oqctl)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, quotacheck);
+ EXP_COUNTER_INCREMENT(exp, quotacheck);
+
+ rc = OBP(exp->exp_obd, quotacheck)(exp->exp_obd, exp, oqctl);
+ RETURN(rc);
+}
+
+static inline int obd_quotactl(struct obd_export *exp,
+ struct obd_quotactl *oqctl)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, quotactl);
+ EXP_COUNTER_INCREMENT(exp, quotactl);
+
+ rc = OBP(exp->exp_obd, quotactl)(exp->exp_obd, exp, oqctl);
+ RETURN(rc);
+}
+
+static inline int obd_health_check(const struct lu_env *env,
+ struct obd_device *obd)
+{
+ /* returns: 0 on healthy
+ * >0 on unhealthy + reason code/flag
+ * however the only suppored reason == 1 right now
+ * We'll need to define some better reasons
+ * or flags in the future.
+ * <0 on error
+ */
+ int rc;
+ ENTRY;
+
+ /* don't use EXP_CHECK_DT_OP, because NULL method is normal here */
+ if (obd == NULL || !OBT(obd)) {
+ CERROR("cleaned up obd\n");
+ RETURN(-EOPNOTSUPP);
+ }
+ if (!obd->obd_set_up || obd->obd_stopping)
+ RETURN(0);
+ if (!OBP(obd, health_check))
+ RETURN(0);
+
+ rc = OBP(obd, health_check)(env, obd);
+ RETURN(rc);
+}
+
+static inline int obd_register_observer(struct obd_device *obd,
+ struct obd_device *observer)
+{
+ ENTRY;
+ OBD_CHECK_DEV(obd);
+ down_write(&obd->obd_observer_link_sem);
+ if (obd->obd_observer && observer) {
+ up_write(&obd->obd_observer_link_sem);
+ RETURN(-EALREADY);
+ }
+ obd->obd_observer = observer;
+ up_write(&obd->obd_observer_link_sem);
+ RETURN(0);
+}
+
+static inline int obd_pin_observer(struct obd_device *obd,
+ struct obd_device **observer)
+{
+ ENTRY;
+ down_read(&obd->obd_observer_link_sem);
+ if (!obd->obd_observer) {
+ *observer = NULL;
+ up_read(&obd->obd_observer_link_sem);
+ RETURN(-ENOENT);
+ }
+ *observer = obd->obd_observer;
+ RETURN(0);
+}
+
+static inline int obd_unpin_observer(struct obd_device *obd)
+{
+ ENTRY;
+ up_read(&obd->obd_observer_link_sem);
+ RETURN(0);
+}
+
+#if 0
+static inline int obd_register_page_removal_cb(struct obd_export *exp,
+ obd_page_removal_cb_t cb,
+ obd_pin_extent_cb pin_cb)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(exp->exp_obd, register_page_removal_cb, 0);
+ OBD_COUNTER_INCREMENT(exp->exp_obd, register_page_removal_cb);
+
+ rc = OBP(exp->exp_obd, register_page_removal_cb)(exp, cb, pin_cb);
+ RETURN(rc);
+}
+
+static inline int obd_unregister_page_removal_cb(struct obd_export *exp,
+ obd_page_removal_cb_t cb)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(exp->exp_obd, unregister_page_removal_cb, 0);
+ OBD_COUNTER_INCREMENT(exp->exp_obd, unregister_page_removal_cb);
+
+ rc = OBP(exp->exp_obd, unregister_page_removal_cb)(exp, cb);
+ RETURN(rc);
+}
+
+static inline int obd_register_lock_cancel_cb(struct obd_export *exp,
+ obd_lock_cancel_cb cb)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(exp->exp_obd, register_lock_cancel_cb, 0);
+ OBD_COUNTER_INCREMENT(exp->exp_obd, register_lock_cancel_cb);
+
+ rc = OBP(exp->exp_obd, register_lock_cancel_cb)(exp, cb);
+ RETURN(rc);
+}
+
+static inline int obd_unregister_lock_cancel_cb(struct obd_export *exp,
+ obd_lock_cancel_cb cb)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_DT_OP(exp->exp_obd, unregister_lock_cancel_cb, 0);
+ OBD_COUNTER_INCREMENT(exp->exp_obd, unregister_lock_cancel_cb);
+
+ rc = OBP(exp->exp_obd, unregister_lock_cancel_cb)(exp, cb);
+ RETURN(rc);
+}
+#endif
+
+/* metadata helpers */
+static inline int md_getstatus(struct obd_export *exp,
+ struct lu_fid *fid, struct obd_capa **pc)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_MD_OP(exp, getstatus);
+ EXP_MD_COUNTER_INCREMENT(exp, getstatus);
+ rc = MDP(exp->exp_obd, getstatus)(exp, fid, pc);
+ RETURN(rc);
+}
+
+static inline int md_getattr(struct obd_export *exp, struct md_op_data *op_data,
+ struct ptlrpc_request **request)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, getattr);
+ EXP_MD_COUNTER_INCREMENT(exp, getattr);
+ rc = MDP(exp->exp_obd, getattr)(exp, op_data, request);
+ RETURN(rc);
+}
+
+static inline int md_null_inode(struct obd_export *exp,
+ const struct lu_fid *fid)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, null_inode);
+ EXP_MD_COUNTER_INCREMENT(exp, null_inode);
+ rc = MDP(exp->exp_obd, null_inode)(exp, fid);
+ RETURN(rc);
+}
+
+static inline int md_find_cbdata(struct obd_export *exp,
+ const struct lu_fid *fid,
+ ldlm_iterator_t it, void *data)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, find_cbdata);
+ EXP_MD_COUNTER_INCREMENT(exp, find_cbdata);
+ rc = MDP(exp->exp_obd, find_cbdata)(exp, fid, it, data);
+ RETURN(rc);
+}
+
+static inline int md_close(struct obd_export *exp, struct md_op_data *op_data,
+ struct md_open_data *mod,
+ struct ptlrpc_request **request)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, close);
+ EXP_MD_COUNTER_INCREMENT(exp, close);
+ rc = MDP(exp->exp_obd, close)(exp, op_data, mod, request);
+ RETURN(rc);
+}
+
+static inline int md_create(struct obd_export *exp, struct md_op_data *op_data,
+ const void *data, int datalen, int mode, __u32 uid,
+ __u32 gid, cfs_cap_t cap_effective, __u64 rdev,
+ struct ptlrpc_request **request)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, create);
+ EXP_MD_COUNTER_INCREMENT(exp, create);
+ rc = MDP(exp->exp_obd, create)(exp, op_data, data, datalen, mode,
+ uid, gid, cap_effective, rdev, request);
+ RETURN(rc);
+}
+
+static inline int md_done_writing(struct obd_export *exp,
+ struct md_op_data *op_data,
+ struct md_open_data *mod)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, done_writing);
+ EXP_MD_COUNTER_INCREMENT(exp, done_writing);
+ rc = MDP(exp->exp_obd, done_writing)(exp, op_data, mod);
+ RETURN(rc);
+}
+
+static inline int md_enqueue(struct obd_export *exp,
+ struct ldlm_enqueue_info *einfo,
+ struct lookup_intent *it,
+ struct md_op_data *op_data,
+ struct lustre_handle *lockh,
+ void *lmm, int lmmsize,
+ struct ptlrpc_request **req,
+ int extra_lock_flags)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, enqueue);
+ EXP_MD_COUNTER_INCREMENT(exp, enqueue);
+ rc = MDP(exp->exp_obd, enqueue)(exp, einfo, it, op_data, lockh,
+ lmm, lmmsize, req, extra_lock_flags);
+ RETURN(rc);
+}
+
+static inline int md_getattr_name(struct obd_export *exp,
+ struct md_op_data *op_data,
+ struct ptlrpc_request **request)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, getattr_name);
+ EXP_MD_COUNTER_INCREMENT(exp, getattr_name);
+ rc = MDP(exp->exp_obd, getattr_name)(exp, op_data, request);
+ RETURN(rc);
+}
+
+static inline int md_intent_lock(struct obd_export *exp,
+ struct md_op_data *op_data, void *lmm,
+ int lmmsize, struct lookup_intent *it,
+ int lookup_flags, struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking,
+ __u64 extra_lock_flags)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, intent_lock);
+ EXP_MD_COUNTER_INCREMENT(exp, intent_lock);
+ rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, lmm, lmmsize,
+ it, lookup_flags, reqp, cb_blocking,
+ extra_lock_flags);
+ RETURN(rc);
+}
+
+static inline int md_link(struct obd_export *exp, struct md_op_data *op_data,
+ struct ptlrpc_request **request)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, link);
+ EXP_MD_COUNTER_INCREMENT(exp, link);
+ rc = MDP(exp->exp_obd, link)(exp, op_data, request);
+ RETURN(rc);
+}
+
+static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data,
+ const char *old, int oldlen, const char *new,
+ int newlen, struct ptlrpc_request **request)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, rename);
+ EXP_MD_COUNTER_INCREMENT(exp, rename);
+ rc = MDP(exp->exp_obd, rename)(exp, op_data, old, oldlen, new,
+ newlen, request);
+ RETURN(rc);
+}
+
+static inline int md_is_subdir(struct obd_export *exp,
+ const struct lu_fid *pfid,
+ const struct lu_fid *cfid,
+ struct ptlrpc_request **request)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, is_subdir);
+ EXP_MD_COUNTER_INCREMENT(exp, is_subdir);
+ rc = MDP(exp->exp_obd, is_subdir)(exp, pfid, cfid, request);
+ RETURN(rc);
+}
+
+static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
+ void *ea, int ealen, void *ea2, int ea2len,
+ struct ptlrpc_request **request,
+ struct md_open_data **mod)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, setattr);
+ EXP_MD_COUNTER_INCREMENT(exp, setattr);
+ rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen,
+ ea2, ea2len, request, mod);
+ RETURN(rc);
+}
+
+static inline int md_sync(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, struct ptlrpc_request **request)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, sync);
+ EXP_MD_COUNTER_INCREMENT(exp, sync);
+ rc = MDP(exp->exp_obd, sync)(exp, fid, oc, request);
+ RETURN(rc);
+}
+
+static inline int md_readpage(struct obd_export *exp, struct md_op_data *opdata,
+ struct page **pages,
+ struct ptlrpc_request **request)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, readpage);
+ EXP_MD_COUNTER_INCREMENT(exp, readpage);
+ rc = MDP(exp->exp_obd, readpage)(exp, opdata, pages, request);
+ RETURN(rc);
+}
+
+static inline int md_unlink(struct obd_export *exp, struct md_op_data *op_data,
+ struct ptlrpc_request **request)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, unlink);
+ EXP_MD_COUNTER_INCREMENT(exp, unlink);
+ rc = MDP(exp->exp_obd, unlink)(exp, op_data, request);
+ RETURN(rc);
+}
+
+static inline int md_get_lustre_md(struct obd_export *exp,
+ struct ptlrpc_request *req,
+ struct obd_export *dt_exp,
+ struct obd_export *md_exp,
+ struct lustre_md *md)
+{
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, get_lustre_md);
+ EXP_MD_COUNTER_INCREMENT(exp, get_lustre_md);
+ RETURN(MDP(exp->exp_obd, get_lustre_md)(exp, req, dt_exp, md_exp, md));
+}
+
+static inline int md_free_lustre_md(struct obd_export *exp,
+ struct lustre_md *md)
+{
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, free_lustre_md);
+ EXP_MD_COUNTER_INCREMENT(exp, free_lustre_md);
+ RETURN(MDP(exp->exp_obd, free_lustre_md)(exp, md));
+}
+
+static inline int md_setxattr(struct obd_export *exp,
+ const struct lu_fid *fid, struct obd_capa *oc,
+ obd_valid valid, const char *name,
+ const char *input, int input_size,
+ int output_size, int flags, __u32 suppgid,
+ struct ptlrpc_request **request)
+{
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, setxattr);
+ EXP_MD_COUNTER_INCREMENT(exp, setxattr);
+ RETURN(MDP(exp->exp_obd, setxattr)(exp, fid, oc, valid, name, input,
+ input_size, output_size, flags,
+ suppgid, request));
+}
+
+static inline int md_getxattr(struct obd_export *exp,
+ const struct lu_fid *fid, struct obd_capa *oc,
+ obd_valid valid, const char *name,
+ const char *input, int input_size,
+ int output_size, int flags,
+ struct ptlrpc_request **request)
+{
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, getxattr);
+ EXP_MD_COUNTER_INCREMENT(exp, getxattr);
+ RETURN(MDP(exp->exp_obd, getxattr)(exp, fid, oc, valid, name, input,
+ input_size, output_size, flags,
+ request));
+}
+
+static inline int md_set_open_replay_data(struct obd_export *exp,
+ struct obd_client_handle *och,
+ struct ptlrpc_request *open_req)
+{
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, set_open_replay_data);
+ EXP_MD_COUNTER_INCREMENT(exp, set_open_replay_data);
+ RETURN(MDP(exp->exp_obd, set_open_replay_data)(exp, och, open_req));
+}
+
+static inline int md_clear_open_replay_data(struct obd_export *exp,
+ struct obd_client_handle *och)
+{
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, clear_open_replay_data);
+ EXP_MD_COUNTER_INCREMENT(exp, clear_open_replay_data);
+ RETURN(MDP(exp->exp_obd, clear_open_replay_data)(exp, och));
+}
+
+static inline int md_set_lock_data(struct obd_export *exp,
+ __u64 *lockh, void *data, __u64 *bits)
+{
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, set_lock_data);
+ EXP_MD_COUNTER_INCREMENT(exp, set_lock_data);
+ RETURN(MDP(exp->exp_obd, set_lock_data)(exp, lockh, data, bits));
+}
+
+static inline int md_cancel_unused(struct obd_export *exp,
+ const struct lu_fid *fid,
+ ldlm_policy_data_t *policy,
+ ldlm_mode_t mode,
+ ldlm_cancel_flags_t flags,
+ void *opaque)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_MD_OP(exp, cancel_unused);
+ EXP_MD_COUNTER_INCREMENT(exp, cancel_unused);
+
+ rc = MDP(exp->exp_obd, cancel_unused)(exp, fid, policy, mode,
+ flags, opaque);
+ RETURN(rc);
+}
+
+static inline ldlm_mode_t md_lock_match(struct obd_export *exp, __u64 flags,
+ const struct lu_fid *fid,
+ ldlm_type_t type,
+ ldlm_policy_data_t *policy,
+ ldlm_mode_t mode,
+ struct lustre_handle *lockh)
+{
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, lock_match);
+ EXP_MD_COUNTER_INCREMENT(exp, lock_match);
+ RETURN(MDP(exp->exp_obd, lock_match)(exp, flags, fid, type,
+ policy, mode, lockh));
+}
+
+static inline int md_init_ea_size(struct obd_export *exp, int easize,
+ int def_asize, int cookiesize)
+{
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, init_ea_size);
+ EXP_MD_COUNTER_INCREMENT(exp, init_ea_size);
+ RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize,
+ cookiesize));
+}
+
+static inline int md_get_remote_perm(struct obd_export *exp,
+ const struct lu_fid *fid,
+ struct obd_capa *oc, __u32 suppgid,
+ struct ptlrpc_request **request)
+{
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, get_remote_perm);
+ EXP_MD_COUNTER_INCREMENT(exp, get_remote_perm);
+ RETURN(MDP(exp->exp_obd, get_remote_perm)(exp, fid, oc, suppgid,
+ request));
+}
+
+static inline int md_renew_capa(struct obd_export *exp, struct obd_capa *ocapa,
+ renew_capa_cb_t cb)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, renew_capa);
+ EXP_MD_COUNTER_INCREMENT(exp, renew_capa);
+ rc = MDP(exp->exp_obd, renew_capa)(exp, ocapa, cb);
+ RETURN(rc);
+}
+
+static inline int md_unpack_capa(struct obd_export *exp,
+ struct ptlrpc_request *req,
+ const struct req_msg_field *field,
+ struct obd_capa **oc)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, unpack_capa);
+ EXP_MD_COUNTER_INCREMENT(exp, unpack_capa);
+ rc = MDP(exp->exp_obd, unpack_capa)(exp, req, field, oc);
+ RETURN(rc);
+}
+
+static inline int md_intent_getattr_async(struct obd_export *exp,
+ struct md_enqueue_info *minfo,
+ struct ldlm_enqueue_info *einfo)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, intent_getattr_async);
+ EXP_MD_COUNTER_INCREMENT(exp, intent_getattr_async);
+ rc = MDP(exp->exp_obd, intent_getattr_async)(exp, minfo, einfo);
+ RETURN(rc);
+}
+
+static inline int md_revalidate_lock(struct obd_export *exp,
+ struct lookup_intent *it,
+ struct lu_fid *fid, __u64 *bits)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, revalidate_lock);
+ EXP_MD_COUNTER_INCREMENT(exp, revalidate_lock);
+ rc = MDP(exp->exp_obd, revalidate_lock)(exp, it, fid, bits);
+ RETURN(rc);
+}
+
+
+/* OBD Metadata Support */
+
+extern int obd_init_caches(void);
+extern void obd_cleanup_caches(void);
+
+/* support routines */
+extern struct kmem_cache *obdo_cachep;
+
+#define OBDO_ALLOC(ptr) \
+do { \
+ OBD_SLAB_ALLOC_PTR_GFP((ptr), obdo_cachep, __GFP_IO); \
+} while(0)
+
+#define OBDO_FREE(ptr) \
+do { \
+ OBD_SLAB_FREE_PTR((ptr), obdo_cachep); \
+} while(0)
+
+
+static inline void obdo2fid(struct obdo *oa, struct lu_fid *fid)
+{
+ /* something here */
+}
+
+static inline void fid2obdo(struct lu_fid *fid, struct obdo *oa)
+{
+ /* something here */
+}
+
+typedef int (*register_lwp_cb)(void *data);
+
+struct lwp_register_item {
+ struct obd_export **lri_exp;
+ register_lwp_cb lri_cb_func;
+ void *lri_cb_data;
+ struct list_head lri_list;
+ char lri_name[MTI_NAME_MAXLEN];
+};
+
+/* I'm as embarrassed about this as you are.
+ *
+ * <shaver> // XXX do not look into _superhack with remaining eye
+ * <shaver> // XXX if this were any uglier, I'd get my own show on MTV */
+extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
+
+/* obd_mount.c */
+
+/* sysctl.c */
+extern void obd_sysctl_init (void);
+extern void obd_sysctl_clean (void);
+
+/* uuid.c */
+typedef __u8 class_uuid_t[16];
+void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out);
+
+/* lustre_peer.c */
+int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index);
+int class_add_uuid(const char *uuid, __u64 nid);
+int class_del_uuid (const char *uuid);
+int class_check_uuid(struct obd_uuid *uuid, __u64 nid);
+void class_init_uuidlist(void);
+void class_exit_uuidlist(void);
+
+/* mea.c */
+int mea_name2idx(struct lmv_stripe_md *mea, const char *name, int namelen);
+int raw_name2idx(int hashtype, int count, const char *name, int namelen);
+
+/* prng.c */
+#define ll_generate_random_uuid(uuid_out) cfs_get_random_bytes(uuid_out, sizeof(class_uuid_t))
+
+#endif /* __LINUX_OBD_CLASS_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_lov.h b/drivers/staging/lustre/lustre/include/obd_lov.h
new file mode 100644
index 000000000000..d82f3341d0a8
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd_lov.h
@@ -0,0 +1,126 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _OBD_LOV_H__
+#define _OBD_LOV_H__
+
+#define LOV_DEFAULT_STRIPE_SIZE (1 << LNET_MTU_BITS)
+
+static inline int lov_stripe_md_size(__u16 stripes)
+{
+ return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo*);
+}
+
+static inline __u32 lov_mds_md_size(__u16 stripes, __u32 lmm_magic)
+{
+ if (lmm_magic == LOV_MAGIC_V3)
+ return sizeof(struct lov_mds_md_v3) +
+ stripes * sizeof(struct lov_ost_data_v1);
+ else
+ return sizeof(struct lov_mds_md_v1) +
+ stripes * sizeof(struct lov_ost_data_v1);
+}
+
+struct lov_version_size {
+ __u32 lvs_magic;
+ size_t lvs_lmm_size;
+ size_t lvs_lod_size;
+};
+
+static inline __u32 lov_mds_md_stripecnt(int ea_size, __u32 lmm_magic)
+{
+ static const struct lov_version_size lmm_ver_size[] = {
+ { .lvs_magic = LOV_MAGIC_V3,
+ .lvs_lmm_size = sizeof(struct lov_mds_md_v3),
+ .lvs_lod_size = sizeof(struct lov_ost_data_v1) },
+ { .lvs_magic = LOV_MAGIC_V1,
+ .lvs_lmm_size = sizeof(struct lov_mds_md_v1),
+ .lvs_lod_size = sizeof(struct lov_ost_data_v1)} };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(lmm_ver_size); i++) {
+ if (lmm_magic == lmm_ver_size[i].lvs_magic) {
+ if (ea_size <= lmm_ver_size[i].lvs_lmm_size)
+ return 0;
+ return (ea_size - lmm_ver_size[i].lvs_lmm_size) /
+ lmm_ver_size[i].lvs_lod_size;
+ }
+ }
+
+ /* Invalid LOV magic, so no stripes could fit */
+ return 0;
+}
+
+/* lov_do_div64(a, b) returns a % b, and a = a / b.
+ * The 32-bit code is LOV-specific due to knowing about stripe limits in
+ * order to reduce the divisor to a 32-bit number. If the divisor is
+ * already a 32-bit value the compiler handles this directly. */
+#if BITS_PER_LONG > 32
+# define lov_do_div64(n,base) ({ \
+ uint64_t __base = (base); \
+ uint64_t __rem; \
+ __rem = ((uint64_t)(n)) % __base; \
+ (n) = ((uint64_t)(n)) / __base; \
+ __rem; \
+ })
+#else
+# define lov_do_div64(n,base) ({ \
+ uint64_t __rem; \
+ if ((sizeof(base) > 4) && (((base) & 0xffffffff00000000ULL) != 0)) { \
+ int __remainder; \
+ LASSERTF(!((base) & (LOV_MIN_STRIPE_SIZE - 1)), "64 bit lov " \
+ "division %llu / %llu\n", (n), (uint64_t)(base)); \
+ __remainder = (n) & (LOV_MIN_STRIPE_SIZE - 1); \
+ (n) >>= LOV_MIN_STRIPE_BITS; \
+ __rem = do_div(n, (base) >> LOV_MIN_STRIPE_BITS); \
+ __rem <<= LOV_MIN_STRIPE_BITS; \
+ __rem += __remainder; \
+ } else { \
+ __rem = do_div(n, base); \
+ } \
+ __rem; \
+ })
+#endif
+
+#define IOC_LOV_TYPE 'g'
+#define IOC_LOV_MIN_NR 50
+#define IOC_LOV_SET_OSC_ACTIVE _IOWR('g', 50, long)
+#define IOC_LOV_MAX_NR 50
+
+#define QOS_DEFAULT_THRESHOLD 10 /* MB */
+#define QOS_DEFAULT_MAXAGE 5 /* Seconds */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/obd_ost.h b/drivers/staging/lustre/lustre/include/obd_ost.h
new file mode 100644
index 000000000000..af89843c312b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd_ost.h
@@ -0,0 +1,96 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/obd_ost.h
+ *
+ * Data structures for object storage targets and client: OST & OSC's
+ *
+ * See also lustre_idl.h for wire formats of requests.
+ */
+
+#ifndef _LUSTRE_OST_H
+#define _LUSTRE_OST_H
+
+#include <obd_class.h>
+
+struct osc_brw_async_args {
+ struct obdo *aa_oa;
+ int aa_requested_nob;
+ int aa_nio_count;
+ obd_count aa_page_count;
+ int aa_resends;
+ struct brw_page **aa_ppga;
+ struct client_obd *aa_cli;
+ struct list_head aa_oaps;
+ struct list_head aa_exts;
+ struct obd_capa *aa_ocapa;
+ struct cl_req *aa_clerq;
+};
+
+#define osc_grant_args osc_brw_async_args
+struct osc_async_args {
+ struct obd_info *aa_oi;
+};
+
+struct osc_setattr_args {
+ struct obdo *sa_oa;
+ obd_enqueue_update_f sa_upcall;
+ void *sa_cookie;
+};
+
+struct osc_fsync_args {
+ struct obd_info *fa_oi;
+ obd_enqueue_update_f fa_upcall;
+ void *fa_cookie;
+};
+
+struct osc_enqueue_args {
+ struct obd_export *oa_exp;
+ __u64 *oa_flags;
+ obd_enqueue_update_f oa_upcall;
+ void *oa_cookie;
+ struct ost_lvb *oa_lvb;
+ struct lustre_handle *oa_lockh;
+ struct ldlm_enqueue_info *oa_ei;
+ unsigned int oa_agl:1;
+};
+
+#if 0
+int osc_extent_blocking_cb(struct ldlm_lock *lock,
+ struct ldlm_lock_desc *new, void *data,
+ int flag);
+#endif
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
new file mode 100644
index 000000000000..b5d40afc3599
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -0,0 +1,851 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _OBD_SUPPORT
+#define _OBD_SUPPORT
+
+#include <linux/libcfs/libcfs.h>
+#include <lvfs.h>
+#include <lprocfs_status.h>
+
+#include <linux/obd_support.h>
+
+/* global variables */
+extern struct lprocfs_stats *obd_memory;
+enum {
+ OBD_MEMORY_STAT = 0,
+ OBD_MEMORY_PAGES_STAT = 1,
+ OBD_STATS_NUM,
+};
+
+extern unsigned int obd_debug_peer_on_timeout;
+extern unsigned int obd_dump_on_timeout;
+extern unsigned int obd_dump_on_eviction;
+/* obd_timeout should only be used for recovery, not for
+ networking / disk / timings affected by load (use Adaptive Timeouts) */
+extern unsigned int obd_timeout; /* seconds */
+extern unsigned int ldlm_timeout; /* seconds */
+extern unsigned int obd_timeout_set;
+extern unsigned int ldlm_timeout_set;
+extern unsigned int at_min;
+extern unsigned int at_max;
+extern unsigned int at_history;
+extern int at_early_margin;
+extern int at_extra;
+extern unsigned int obd_sync_filter;
+extern unsigned int obd_max_dirty_pages;
+extern atomic_t obd_dirty_pages;
+extern atomic_t obd_dirty_transit_pages;
+extern unsigned int obd_alloc_fail_rate;
+extern char obd_jobid_var[];
+
+/* lvfs.c */
+int obd_alloc_fail(const void *ptr, const char *name, const char *type,
+ size_t size, const char *file, int line);
+
+/* Some hash init argument constants */
+#define HASH_POOLS_BKT_BITS 3
+#define HASH_POOLS_CUR_BITS 3
+#define HASH_POOLS_MAX_BITS 7
+#define HASH_UUID_BKT_BITS 5
+#define HASH_UUID_CUR_BITS 7
+#define HASH_UUID_MAX_BITS 12
+#define HASH_NID_BKT_BITS 5
+#define HASH_NID_CUR_BITS 7
+#define HASH_NID_MAX_BITS 12
+#define HASH_NID_STATS_BKT_BITS 5
+#define HASH_NID_STATS_CUR_BITS 7
+#define HASH_NID_STATS_MAX_BITS 12
+#define HASH_LQE_BKT_BITS 5
+#define HASH_LQE_CUR_BITS 7
+#define HASH_LQE_MAX_BITS 12
+#define HASH_CONN_BKT_BITS 5
+#define HASH_CONN_CUR_BITS 5
+#define HASH_CONN_MAX_BITS 15
+#define HASH_EXP_LOCK_BKT_BITS 5
+#define HASH_EXP_LOCK_CUR_BITS 7
+#define HASH_EXP_LOCK_MAX_BITS 16
+#define HASH_CL_ENV_BKT_BITS 5
+#define HASH_CL_ENV_BITS 10
+#define HASH_JOB_STATS_BKT_BITS 5
+#define HASH_JOB_STATS_CUR_BITS 7
+#define HASH_JOB_STATS_MAX_BITS 12
+
+/* Timeout definitions */
+#define OBD_TIMEOUT_DEFAULT 100
+#define LDLM_TIMEOUT_DEFAULT 20
+#define MDS_LDLM_TIMEOUT_DEFAULT 6
+/* Time to wait for all clients to reconnect during recovery (hard limit) */
+#define OBD_RECOVERY_TIME_HARD (obd_timeout * 9)
+/* Time to wait for all clients to reconnect during recovery (soft limit) */
+/* Should be very conservative; must catch the first reconnect after reboot */
+#define OBD_RECOVERY_TIME_SOFT (obd_timeout * 3)
+/* Change recovery-small 26b time if you change this */
+#define PING_INTERVAL max(obd_timeout / 4, 1U)
+/* a bit more than maximal journal commit time in seconds */
+#define PING_INTERVAL_SHORT min(PING_INTERVAL, 7U)
+/* Client may skip 1 ping; we must wait at least 2.5. But for multiple
+ * failover targets the client only pings one server at a time, and pings
+ * can be lost on a loaded network. Since eviction has serious consequences,
+ * and there's no urgent need to evict a client just because it's idle, we
+ * should be very conservative here. */
+#define PING_EVICT_TIMEOUT (PING_INTERVAL * 6)
+#define DISK_TIMEOUT 50 /* Beyond this we warn about disk speed */
+#define CONNECTION_SWITCH_MIN 5U /* Connection switching rate limiter */
+ /* Max connect interval for nonresponsive servers; ~50s to avoid building up
+ connect requests in the LND queues, but within obd_timeout so we don't
+ miss the recovery window */
+#define CONNECTION_SWITCH_MAX min(50U, max(CONNECTION_SWITCH_MIN,obd_timeout))
+#define CONNECTION_SWITCH_INC 5 /* Connection timeout backoff */
+/* In general this should be low to have quick detection of a system
+ running on a backup server. (If it's too low, import_select_connection
+ will increase the timeout anyhow.) */
+#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/20)
+/* The max delay between connects is SWITCH_MAX + SWITCH_INC + INITIAL */
+#define RECONNECT_DELAY_MAX (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + \
+ INITIAL_CONNECT_TIMEOUT)
+/* The min time a target should wait for clients to reconnect in recovery */
+#define OBD_RECOVERY_TIME_MIN (2*RECONNECT_DELAY_MAX)
+#define OBD_IR_FACTOR_MIN 1
+#define OBD_IR_FACTOR_MAX 10
+#define OBD_IR_FACTOR_DEFAULT (OBD_IR_FACTOR_MAX/2)
+/* default timeout for the MGS to become IR_FULL */
+#define OBD_IR_MGS_TIMEOUT (4*obd_timeout)
+#define LONG_UNLINK 300 /* Unlink should happen before now */
+
+/**
+ * Time interval of shrink, if the client is "idle" more than this interval,
+ * then the ll_grant thread will return the requested grant space to filter
+ */
+#define GRANT_SHRINK_INTERVAL 1200/*20 minutes*/
+
+#define OBD_FAIL_MDS 0x100
+#define OBD_FAIL_MDS_HANDLE_UNPACK 0x101
+#define OBD_FAIL_MDS_GETATTR_NET 0x102
+#define OBD_FAIL_MDS_GETATTR_PACK 0x103
+#define OBD_FAIL_MDS_READPAGE_NET 0x104
+#define OBD_FAIL_MDS_READPAGE_PACK 0x105
+#define OBD_FAIL_MDS_SENDPAGE 0x106
+#define OBD_FAIL_MDS_REINT_NET 0x107
+#define OBD_FAIL_MDS_REINT_UNPACK 0x108
+#define OBD_FAIL_MDS_REINT_SETATTR 0x109
+#define OBD_FAIL_MDS_REINT_SETATTR_WRITE 0x10a
+#define OBD_FAIL_MDS_REINT_CREATE 0x10b
+#define OBD_FAIL_MDS_REINT_CREATE_WRITE 0x10c
+#define OBD_FAIL_MDS_REINT_UNLINK 0x10d
+#define OBD_FAIL_MDS_REINT_UNLINK_WRITE 0x10e
+#define OBD_FAIL_MDS_REINT_LINK 0x10f
+#define OBD_FAIL_MDS_REINT_LINK_WRITE 0x110
+#define OBD_FAIL_MDS_REINT_RENAME 0x111
+#define OBD_FAIL_MDS_REINT_RENAME_WRITE 0x112
+#define OBD_FAIL_MDS_OPEN_NET 0x113
+#define OBD_FAIL_MDS_OPEN_PACK 0x114
+#define OBD_FAIL_MDS_CLOSE_NET 0x115
+#define OBD_FAIL_MDS_CLOSE_PACK 0x116
+#define OBD_FAIL_MDS_CONNECT_NET 0x117
+#define OBD_FAIL_MDS_CONNECT_PACK 0x118
+#define OBD_FAIL_MDS_REINT_NET_REP 0x119
+#define OBD_FAIL_MDS_DISCONNECT_NET 0x11a
+#define OBD_FAIL_MDS_GETSTATUS_NET 0x11b
+#define OBD_FAIL_MDS_GETSTATUS_PACK 0x11c
+#define OBD_FAIL_MDS_STATFS_PACK 0x11d
+#define OBD_FAIL_MDS_STATFS_NET 0x11e
+#define OBD_FAIL_MDS_GETATTR_NAME_NET 0x11f
+#define OBD_FAIL_MDS_PIN_NET 0x120
+#define OBD_FAIL_MDS_UNPIN_NET 0x121
+#define OBD_FAIL_MDS_ALL_REPLY_NET 0x122
+#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x123
+#define OBD_FAIL_MDS_SYNC_NET 0x124
+#define OBD_FAIL_MDS_SYNC_PACK 0x125
+#define OBD_FAIL_MDS_DONE_WRITING_NET 0x126
+#define OBD_FAIL_MDS_DONE_WRITING_PACK 0x127
+#define OBD_FAIL_MDS_ALLOC_OBDO 0x128
+#define OBD_FAIL_MDS_PAUSE_OPEN 0x129
+#define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a
+#define OBD_FAIL_MDS_OPEN_CREATE 0x12b
+#define OBD_FAIL_MDS_OST_SETATTR 0x12c
+#define OBD_FAIL_MDS_QUOTACHECK_NET 0x12d
+#define OBD_FAIL_MDS_QUOTACTL_NET 0x12e
+#define OBD_FAIL_MDS_CLIENT_ADD 0x12f
+#define OBD_FAIL_MDS_GETXATTR_NET 0x130
+#define OBD_FAIL_MDS_GETXATTR_PACK 0x131
+#define OBD_FAIL_MDS_SETXATTR_NET 0x132
+#define OBD_FAIL_MDS_SETXATTR 0x133
+#define OBD_FAIL_MDS_SETXATTR_WRITE 0x134
+#define OBD_FAIL_MDS_FS_SETUP 0x135
+#define OBD_FAIL_MDS_RESEND 0x136
+#define OBD_FAIL_MDS_LLOG_CREATE_FAILED 0x137
+#define OBD_FAIL_MDS_LOV_SYNC_RACE 0x138
+#define OBD_FAIL_MDS_OSC_PRECREATE 0x139
+#define OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT 0x13a
+#define OBD_FAIL_MDS_CLOSE_NET_REP 0x13b
+#define OBD_FAIL_MDS_BLOCK_QUOTA_REQ 0x13c
+#define OBD_FAIL_MDS_DROP_QUOTA_REQ 0x13d
+#define OBD_FAIL_MDS_REMOVE_COMMON_EA 0x13e
+#define OBD_FAIL_MDS_ALLOW_COMMON_EA_SETTING 0x13f
+#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140
+#define OBD_FAIL_MDS_LOV_PREP_CREATE 0x141
+#define OBD_FAIL_MDS_REINT_DELAY 0x142
+#define OBD_FAIL_MDS_READLINK_EPROTO 0x143
+#define OBD_FAIL_MDS_OPEN_WAIT_CREATE 0x144
+#define OBD_FAIL_MDS_PDO_LOCK 0x145
+#define OBD_FAIL_MDS_PDO_LOCK2 0x146
+#define OBD_FAIL_MDS_OSC_CREATE_FAIL 0x147
+#define OBD_FAIL_MDS_NEGATIVE_POSITIVE 0x148
+#define OBD_FAIL_MDS_HSM_STATE_GET_NET 0x149
+#define OBD_FAIL_MDS_HSM_STATE_SET_NET 0x14a
+#define OBD_FAIL_MDS_HSM_PROGRESS_NET 0x14b
+#define OBD_FAIL_MDS_HSM_REQUEST_NET 0x14c
+#define OBD_FAIL_MDS_HSM_CT_REGISTER_NET 0x14d
+#define OBD_FAIL_MDS_HSM_CT_UNREGISTER_NET 0x14e
+#define OBD_FAIL_MDS_SWAP_LAYOUTS_NET 0x14f
+#define OBD_FAIL_MDS_HSM_ACTION_NET 0x150
+#define OBD_FAIL_MDS_CHANGELOG_INIT 0x151
+
+/* layout lock */
+#define OBD_FAIL_MDS_NO_LL_GETATTR 0x170
+#define OBD_FAIL_MDS_NO_LL_OPEN 0x171
+#define OBD_FAIL_MDS_LL_BLOCK 0x172
+
+/* CMD */
+#define OBD_FAIL_MDS_IS_SUBDIR_NET 0x180
+#define OBD_FAIL_MDS_IS_SUBDIR_PACK 0x181
+#define OBD_FAIL_MDS_SET_INFO_NET 0x182
+#define OBD_FAIL_MDS_WRITEPAGE_NET 0x183
+#define OBD_FAIL_MDS_WRITEPAGE_PACK 0x184
+#define OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS 0x185
+#define OBD_FAIL_MDS_GET_INFO_NET 0x186
+#define OBD_FAIL_MDS_DQACQ_NET 0x187
+
+/* OI scrub */
+#define OBD_FAIL_OSD_SCRUB_DELAY 0x190
+#define OBD_FAIL_OSD_SCRUB_CRASH 0x191
+#define OBD_FAIL_OSD_SCRUB_FATAL 0x192
+#define OBD_FAIL_OSD_FID_MAPPING 0x193
+#define OBD_FAIL_OSD_LMA_INCOMPAT 0x194
+
+#define OBD_FAIL_OST 0x200
+#define OBD_FAIL_OST_CONNECT_NET 0x201
+#define OBD_FAIL_OST_DISCONNECT_NET 0x202
+#define OBD_FAIL_OST_GET_INFO_NET 0x203
+#define OBD_FAIL_OST_CREATE_NET 0x204
+#define OBD_FAIL_OST_DESTROY_NET 0x205
+#define OBD_FAIL_OST_GETATTR_NET 0x206
+#define OBD_FAIL_OST_SETATTR_NET 0x207
+#define OBD_FAIL_OST_OPEN_NET 0x208
+#define OBD_FAIL_OST_CLOSE_NET 0x209
+#define OBD_FAIL_OST_BRW_NET 0x20a
+#define OBD_FAIL_OST_PUNCH_NET 0x20b
+#define OBD_FAIL_OST_STATFS_NET 0x20c
+#define OBD_FAIL_OST_HANDLE_UNPACK 0x20d
+#define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e
+#define OBD_FAIL_OST_BRW_READ_BULK 0x20f
+#define OBD_FAIL_OST_SYNC_NET 0x210
+#define OBD_FAIL_OST_ALL_REPLY_NET 0x211
+#define OBD_FAIL_OST_ALL_REQUEST_NET 0x212
+#define OBD_FAIL_OST_LDLM_REPLY_NET 0x213
+#define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214
+#define OBD_FAIL_OST_ENOSPC 0x215
+#define OBD_FAIL_OST_EROFS 0x216
+#define OBD_FAIL_OST_ENOENT 0x217
+#define OBD_FAIL_OST_QUOTACHECK_NET 0x218
+#define OBD_FAIL_OST_QUOTACTL_NET 0x219
+#define OBD_FAIL_OST_CHECKSUM_RECEIVE 0x21a
+#define OBD_FAIL_OST_CHECKSUM_SEND 0x21b
+#define OBD_FAIL_OST_BRW_SIZE 0x21c
+#define OBD_FAIL_OST_DROP_REQ 0x21d
+#define OBD_FAIL_OST_SETATTR_CREDITS 0x21e
+#define OBD_FAIL_OST_HOLD_WRITE_RPC 0x21f
+#define OBD_FAIL_OST_BRW_WRITE_BULK2 0x220
+#define OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221
+#define OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222
+#define OBD_FAIL_OST_PAUSE_CREATE 0x223
+#define OBD_FAIL_OST_BRW_PAUSE_PACK 0x224
+#define OBD_FAIL_OST_CONNECT_NET2 0x225
+#define OBD_FAIL_OST_NOMEM 0x226
+#define OBD_FAIL_OST_BRW_PAUSE_BULK2 0x227
+#define OBD_FAIL_OST_MAPBLK_ENOSPC 0x228
+#define OBD_FAIL_OST_ENOINO 0x229
+#define OBD_FAIL_OST_DQACQ_NET 0x230
+#define OBD_FAIL_OST_STATFS_EINPROGRESS 0x231
+
+#define OBD_FAIL_LDLM 0x300
+#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301
+#define OBD_FAIL_LDLM_ENQUEUE_NET 0x302
+#define OBD_FAIL_LDLM_CONVERT_NET 0x303
+#define OBD_FAIL_LDLM_CANCEL_NET 0x304
+#define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305
+#define OBD_FAIL_LDLM_CP_CALLBACK_NET 0x306
+#define OBD_FAIL_LDLM_GL_CALLBACK_NET 0x307
+#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
+#define OBD_FAIL_LDLM_ENQUEUE_INTENT_ERR 0x309
+#define OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a
+#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b
+#define OBD_FAIL_LDLM_REPLY 0x30c
+#define OBD_FAIL_LDLM_RECOV_CLIENTS 0x30d
+#define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e
+#define OBD_FAIL_LDLM_GLIMPSE 0x30f
+#define OBD_FAIL_LDLM_CANCEL_RACE 0x310
+#define OBD_FAIL_LDLM_CANCEL_EVICT_RACE 0x311
+#define OBD_FAIL_LDLM_PAUSE_CANCEL 0x312
+#define OBD_FAIL_LDLM_CLOSE_THREAD 0x313
+#define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314
+#define OBD_FAIL_LDLM_CP_CB_WAIT 0x315
+#define OBD_FAIL_LDLM_OST_FAIL_RACE 0x316
+#define OBD_FAIL_LDLM_INTR_CP_AST 0x317
+#define OBD_FAIL_LDLM_CP_BL_RACE 0x318
+#define OBD_FAIL_LDLM_NEW_LOCK 0x319
+#define OBD_FAIL_LDLM_AGL_DELAY 0x31a
+#define OBD_FAIL_LDLM_AGL_NOLOCK 0x31b
+#define OBD_FAIL_LDLM_OST_LVB 0x31c
+
+/* LOCKLESS IO */
+#define OBD_FAIL_LDLM_SET_CONTENTION 0x385
+
+#define OBD_FAIL_OSC 0x400
+#define OBD_FAIL_OSC_BRW_READ_BULK 0x401
+#define OBD_FAIL_OSC_BRW_WRITE_BULK 0x402
+#define OBD_FAIL_OSC_LOCK_BL_AST 0x403
+#define OBD_FAIL_OSC_LOCK_CP_AST 0x404
+#define OBD_FAIL_OSC_MATCH 0x405
+#define OBD_FAIL_OSC_BRW_PREP_REQ 0x406
+#define OBD_FAIL_OSC_SHUTDOWN 0x407
+#define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408
+#define OBD_FAIL_OSC_CHECKSUM_SEND 0x409
+#define OBD_FAIL_OSC_BRW_PREP_REQ2 0x40a
+#define OBD_FAIL_OSC_CONNECT_CKSUM 0x40b
+#define OBD_FAIL_OSC_CKSUM_ADLER_ONLY 0x40c
+#define OBD_FAIL_OSC_DIO_PAUSE 0x40d
+#define OBD_FAIL_OSC_OBJECT_CONTENTION 0x40e
+#define OBD_FAIL_OSC_CP_CANCEL_RACE 0x40f
+#define OBD_FAIL_OSC_CP_ENQ_RACE 0x410
+#define OBD_FAIL_OSC_NO_GRANT 0x411
+#define OBD_FAIL_OSC_DELAY_SETTIME 0x412
+
+#define OBD_FAIL_PTLRPC 0x500
+#define OBD_FAIL_PTLRPC_ACK 0x501
+#define OBD_FAIL_PTLRPC_RQBD 0x502
+#define OBD_FAIL_PTLRPC_BULK_GET_NET 0x503
+#define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504
+#define OBD_FAIL_PTLRPC_DROP_RPC 0x505
+#define OBD_FAIL_PTLRPC_DELAY_SEND 0x506
+#define OBD_FAIL_PTLRPC_DELAY_RECOV 0x507
+#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB 0x508
+#define OBD_FAIL_PTLRPC_PAUSE_REQ 0x50a
+#define OBD_FAIL_PTLRPC_PAUSE_REP 0x50c
+#define OBD_FAIL_PTLRPC_IMP_DEACTIVE 0x50d
+#define OBD_FAIL_PTLRPC_DUMP_LOG 0x50e
+#define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f
+#define OBD_FAIL_PTLRPC_LONG_BULK_UNLINK 0x510
+#define OBD_FAIL_PTLRPC_HPREQ_TIMEOUT 0x511
+#define OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT 0x512
+#define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513
+#define OBD_FAIL_PTLRPC_FINISH_REPLAY 0x514
+#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2 0x515
+#define OBD_FAIL_PTLRPC_DELAY_IMP_FULL 0x516
+#define OBD_FAIL_PTLRPC_CANCEL_RESEND 0x517
+
+#define OBD_FAIL_OBD_PING_NET 0x600
+#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601
+#define OBD_FAIL_OBD_LOGD_NET 0x602
+#define OBD_FAIL_OBD_QC_CALLBACK_NET 0x603
+#define OBD_FAIL_OBD_DQACQ 0x604
+#define OBD_FAIL_OBD_LLOG_SETUP 0x605
+#define OBD_FAIL_OBD_LOG_CANCEL_REP 0x606
+#define OBD_FAIL_OBD_IDX_READ_NET 0x607
+#define OBD_FAIL_OBD_IDX_READ_BREAK 0x608
+#define OBD_FAIL_OBD_NO_LRU 0x609
+
+#define OBD_FAIL_TGT_REPLY_NET 0x700
+#define OBD_FAIL_TGT_CONN_RACE 0x701
+#define OBD_FAIL_TGT_FORCE_RECONNECT 0x702
+#define OBD_FAIL_TGT_DELAY_CONNECT 0x703
+#define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
+#define OBD_FAIL_TGT_DELAY_PRECREATE 0x705
+#define OBD_FAIL_TGT_TOOMANY_THREADS 0x706
+#define OBD_FAIL_TGT_REPLAY_DROP 0x707
+#define OBD_FAIL_TGT_FAKE_EXP 0x708
+#define OBD_FAIL_TGT_REPLAY_DELAY 0x709
+#define OBD_FAIL_TGT_LAST_REPLAY 0x710
+#define OBD_FAIL_TGT_CLIENT_ADD 0x711
+#define OBD_FAIL_TGT_RCVG_FLAG 0x712
+
+#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800
+#define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801
+#define OBD_FAIL_MDC_OLD_EXT_FLAGS 0x802
+#define OBD_FAIL_MDC_GETATTR_ENQUEUE 0x803
+#define OBD_FAIL_MDC_RPCS_SEM 0x804
+#define OBD_FAIL_MDC_LIGHTWEIGHT 0x805
+
+#define OBD_FAIL_MGS 0x900
+#define OBD_FAIL_MGS_ALL_REQUEST_NET 0x901
+#define OBD_FAIL_MGS_ALL_REPLY_NET 0x902
+#define OBD_FAIL_MGC_PAUSE_PROCESS_LOG 0x903
+#define OBD_FAIL_MGS_PAUSE_REQ 0x904
+#define OBD_FAIL_MGS_PAUSE_TARGET_REG 0x905
+
+#define OBD_FAIL_QUOTA_DQACQ_NET 0xA01
+#define OBD_FAIL_QUOTA_EDQUOT 0xA02
+#define OBD_FAIL_QUOTA_DELAY_REINT 0xA03
+#define OBD_FAIL_QUOTA_RECOVERABLE_ERR 0xA04
+
+#define OBD_FAIL_LPROC_REMOVE 0xB00
+
+#define OBD_FAIL_GENERAL_ALLOC 0xC00
+
+#define OBD_FAIL_SEQ 0x1000
+#define OBD_FAIL_SEQ_QUERY_NET 0x1001
+#define OBD_FAIL_SEQ_EXHAUST 0x1002
+
+#define OBD_FAIL_FLD 0x1100
+#define OBD_FAIL_FLD_QUERY_NET 0x1101
+
+#define OBD_FAIL_SEC_CTX 0x1200
+#define OBD_FAIL_SEC_CTX_INIT_NET 0x1201
+#define OBD_FAIL_SEC_CTX_INIT_CONT_NET 0x1202
+#define OBD_FAIL_SEC_CTX_FINI_NET 0x1203
+#define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x1204
+
+#define OBD_FAIL_LLOG 0x1300
+#define OBD_FAIL_LLOG_ORIGIN_CONNECT_NET 0x1301
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CREATE_NET 0x1302
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_DESTROY_NET 0x1303
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_READ_HEADER_NET 0x1304
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_NEXT_BLOCK_NET 0x1305
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_PREV_BLOCK_NET 0x1306
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_WRITE_REC_NET 0x1307
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CLOSE_NET 0x1308
+#define OBD_FAIL_LLOG_CATINFO_NET 0x1309
+#define OBD_FAIL_MDS_SYNC_CAPA_SL 0x1310
+#define OBD_FAIL_SEQ_ALLOC 0x1311
+
+#define OBD_FAIL_LLITE 0x1400
+#define OBD_FAIL_LLITE_FAULT_TRUNC_RACE 0x1401
+#define OBD_FAIL_LOCK_STATE_WAIT_INTR 0x1402
+#define OBD_FAIL_LOV_INIT 0x1403
+#define OBD_FAIL_GLIMPSE_DELAY 0x1404
+
+#define OBD_FAIL_FID_INDIR 0x1501
+#define OBD_FAIL_FID_INLMA 0x1502
+#define OBD_FAIL_FID_IGIF 0x1504
+#define OBD_FAIL_FID_LOOKUP 0x1505
+#define OBD_FAIL_FID_NOLMA 0x1506
+
+/* LFSCK */
+#define OBD_FAIL_LFSCK_DELAY1 0x1600
+#define OBD_FAIL_LFSCK_DELAY2 0x1601
+#define OBD_FAIL_LFSCK_DELAY3 0x1602
+#define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
+#define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
+#define OBD_FAIL_LFSCK_FATAL1 0x1608
+#define OBD_FAIL_LFSCK_FATAL2 0x1609
+#define OBD_FAIL_LFSCK_CRASH 0x160a
+#define OBD_FAIL_LFSCK_NO_AUTO 0x160b
+#define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
+
+/* UPDATE */
+#define OBD_FAIL_UPDATE_OBJ_NET 0x1700
+#define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+
+
+/* Assign references to moved code to reduce code changes */
+#define OBD_FAIL_PRECHECK(id) CFS_FAIL_PRECHECK(id)
+#define OBD_FAIL_CHECK(id) CFS_FAIL_CHECK(id)
+#define OBD_FAIL_CHECK_VALUE(id, value) CFS_FAIL_CHECK_VALUE(id, value)
+#define OBD_FAIL_CHECK_ORSET(id, value) CFS_FAIL_CHECK_ORSET(id, value)
+#define OBD_FAIL_CHECK_RESET(id, value) CFS_FAIL_CHECK_RESET(id, value)
+#define OBD_FAIL_RETURN(id, ret) CFS_FAIL_RETURN(id, ret)
+#define OBD_FAIL_TIMEOUT(id, secs) CFS_FAIL_TIMEOUT(id, secs)
+#define OBD_FAIL_TIMEOUT_MS(id, ms) CFS_FAIL_TIMEOUT_MS(id, ms)
+#define OBD_FAIL_TIMEOUT_ORSET(id, value, secs) CFS_FAIL_TIMEOUT_ORSET(id, value, secs)
+#define OBD_RACE(id) CFS_RACE(id)
+#define OBD_FAIL_ONCE CFS_FAIL_ONCE
+#define OBD_FAILED CFS_FAILED
+
+extern atomic_t libcfs_kmemory;
+
+#ifdef LPROCFS
+#define obd_memory_add(size) \
+ lprocfs_counter_add(obd_memory, OBD_MEMORY_STAT, (long)(size))
+#define obd_memory_sub(size) \
+ lprocfs_counter_sub(obd_memory, OBD_MEMORY_STAT, (long)(size))
+#define obd_memory_sum() \
+ lprocfs_stats_collector(obd_memory, OBD_MEMORY_STAT, \
+ LPROCFS_FIELDS_FLAGS_SUM)
+#define obd_pages_add(order) \
+ lprocfs_counter_add(obd_memory, OBD_MEMORY_PAGES_STAT, \
+ (long)(1 << (order)))
+#define obd_pages_sub(order) \
+ lprocfs_counter_sub(obd_memory, OBD_MEMORY_PAGES_STAT, \
+ (long)(1 << (order)))
+#define obd_pages_sum() \
+ lprocfs_stats_collector(obd_memory, OBD_MEMORY_PAGES_STAT, \
+ LPROCFS_FIELDS_FLAGS_SUM)
+
+extern void obd_update_maxusage(void);
+extern __u64 obd_memory_max(void);
+extern __u64 obd_pages_max(void);
+
+#else
+
+extern __u64 obd_alloc;
+extern __u64 obd_pages;
+
+extern __u64 obd_max_alloc;
+extern __u64 obd_max_pages;
+
+static inline void obd_memory_add(long size)
+{
+ obd_alloc += size;
+ if (obd_alloc > obd_max_alloc)
+ obd_max_alloc = obd_alloc;
+}
+
+static inline void obd_memory_sub(long size)
+{
+ obd_alloc -= size;
+}
+
+static inline void obd_pages_add(int order)
+{
+ obd_pages += 1<< order;
+ if (obd_pages > obd_max_pages)
+ obd_max_pages = obd_pages;
+}
+
+static inline void obd_pages_sub(int order)
+{
+ obd_pages -= 1<< order;
+}
+
+#define obd_memory_sum() (obd_alloc)
+#define obd_pages_sum() (obd_pages)
+
+#define obd_memory_max() (obd_max_alloc)
+#define obd_pages_max() (obd_max_pages)
+
+#endif
+
+#define OBD_DEBUG_MEMUSAGE (1)
+
+#if OBD_DEBUG_MEMUSAGE
+#define OBD_ALLOC_POST(ptr, size, name) \
+ obd_memory_add(size); \
+ CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p.\n", \
+ (int)(size), ptr)
+
+#define OBD_FREE_PRE(ptr, size, name) \
+ LASSERT(ptr); \
+ obd_memory_sub(size); \
+ CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p.\n", \
+ (int)(size), ptr); \
+ POISON(ptr, 0x5a, size)
+
+#else /* !OBD_DEBUG_MEMUSAGE */
+
+#define OBD_ALLOC_POST(ptr, size, name) ((void)0)
+#define OBD_FREE_PRE(ptr, size, name) ((void)0)
+
+#endif /* !OBD_DEBUG_MEMUSAGE */
+
+#define HAS_FAIL_ALLOC_FLAG OBD_FAIL_CHECK(OBD_FAIL_GENERAL_ALLOC)
+
+#define OBD_ALLOC_FAIL_BITS 24
+#define OBD_ALLOC_FAIL_MASK ((1 << OBD_ALLOC_FAIL_BITS) - 1)
+#define OBD_ALLOC_FAIL_MULT (OBD_ALLOC_FAIL_MASK / 100)
+
+#if defined(LUSTRE_UTILS) /* this version is for utils only */
+#define __OBD_MALLOC_VERBOSE(ptr, cptab, cpt, size, flags) \
+do { \
+ (ptr) = (cptab) == NULL ? \
+ kmalloc(size, flags) : \
+ kmalloc_node(size, flags, cfs_cpt_spread_node(cptab, cpt)); \
+ if (unlikely((ptr) == NULL)) { \
+ CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \
+ (int)(size), __FILE__, __LINE__); \
+ } else { \
+ memset(ptr, 0, size); \
+ CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p\n", \
+ (int)(size), ptr); \
+ } \
+} while (0)
+
+#else /* this version is for the kernel and liblustre */
+#define OBD_FREE_RTN0(ptr) \
+({ \
+ kfree(ptr); \
+ (ptr) = NULL; \
+ 0; \
+})
+
+#define __OBD_MALLOC_VERBOSE(ptr, cptab, cpt, size, flags) \
+do { \
+ (ptr) = (cptab) == NULL ? \
+ kmalloc(size, flags | __GFP_ZERO) : \
+ kmalloc_node(size, flags | __GFP_ZERO, \
+ cfs_cpt_spread_node(cptab, cpt)); \
+ if (likely((ptr) != NULL && \
+ (!HAS_FAIL_ALLOC_FLAG || obd_alloc_fail_rate == 0 || \
+ !obd_alloc_fail(ptr, #ptr, "km", size, \
+ __FILE__, __LINE__) || \
+ OBD_FREE_RTN0(ptr)))){ \
+ OBD_ALLOC_POST(ptr, size, "kmalloced"); \
+ } \
+} while (0)
+#endif
+
+#define OBD_ALLOC_GFP(ptr, size, gfp_mask) \
+ __OBD_MALLOC_VERBOSE(ptr, NULL, 0, size, gfp_mask)
+
+#define OBD_ALLOC(ptr, size) OBD_ALLOC_GFP(ptr, size, __GFP_IO)
+#define OBD_ALLOC_WAIT(ptr, size) OBD_ALLOC_GFP(ptr, size, GFP_IOFS)
+#define OBD_ALLOC_PTR(ptr) OBD_ALLOC(ptr, sizeof *(ptr))
+#define OBD_ALLOC_PTR_WAIT(ptr) OBD_ALLOC_WAIT(ptr, sizeof *(ptr))
+
+#define OBD_CPT_ALLOC_GFP(ptr, cptab, cpt, size, gfp_mask) \
+ __OBD_MALLOC_VERBOSE(ptr, cptab, cpt, size, gfp_mask)
+
+#define OBD_CPT_ALLOC(ptr, cptab, cpt, size) \
+ OBD_CPT_ALLOC_GFP(ptr, cptab, cpt, size, __GFP_IO)
+
+#define OBD_CPT_ALLOC_PTR(ptr, cptab, cpt) \
+ OBD_CPT_ALLOC(ptr, cptab, cpt, sizeof *(ptr))
+
+# define __OBD_VMALLOC_VEROBSE(ptr, cptab, cpt, size) \
+do { \
+ (ptr) = cptab == NULL ? \
+ vzalloc(size) : \
+ vzalloc_node(size, cfs_cpt_spread_node(cptab, cpt)); \
+ if (unlikely((ptr) == NULL)) { \
+ CERROR("vmalloc of '" #ptr "' (%d bytes) failed\n", \
+ (int)(size)); \
+ CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \
+ obd_memory_sum(), atomic_read(&libcfs_kmemory)); \
+ } else { \
+ OBD_ALLOC_POST(ptr, size, "vmalloced"); \
+ } \
+} while(0)
+
+# define OBD_VMALLOC(ptr, size) \
+ __OBD_VMALLOC_VEROBSE(ptr, NULL, 0, size)
+# define OBD_CPT_VMALLOC(ptr, cptab, cpt, size) \
+ __OBD_VMALLOC_VEROBSE(ptr, cptab, cpt, size)
+
+
+/* Allocations above this size are considered too big and could not be done
+ * atomically.
+ *
+ * Be very careful when changing this value, especially when decreasing it,
+ * since vmalloc in Linux doesn't perform well on multi-cores system, calling
+ * vmalloc in critical path would hurt peformance badly. See LU-66.
+ */
+#define OBD_ALLOC_BIG (4 * PAGE_CACHE_SIZE)
+
+#define OBD_ALLOC_LARGE(ptr, size) \
+do { \
+ if (size > OBD_ALLOC_BIG) \
+ OBD_VMALLOC(ptr, size); \
+ else \
+ OBD_ALLOC(ptr, size); \
+} while (0)
+
+#define OBD_CPT_ALLOC_LARGE(ptr, cptab, cpt, size) \
+do { \
+ if (size > OBD_ALLOC_BIG) \
+ OBD_CPT_VMALLOC(ptr, cptab, cpt, size); \
+ else \
+ OBD_CPT_ALLOC(ptr, cptab, cpt, size); \
+} while (0)
+
+#define OBD_FREE_LARGE(ptr, size) \
+do { \
+ if (size > OBD_ALLOC_BIG) \
+ OBD_VFREE(ptr, size); \
+ else \
+ OBD_FREE(ptr, size); \
+} while (0)
+
+
+#ifdef CONFIG_DEBUG_SLAB
+#define POISON(ptr, c, s) do {} while (0)
+#define POISON_PTR(ptr) ((void)0)
+#else
+#define POISON(ptr, c, s) memset(ptr, c, s)
+#define POISON_PTR(ptr) (ptr) = (void *)0xdeadbeef
+#endif
+
+#ifdef POISON_BULK
+#define POISON_PAGE(page, val) do { memset(kmap(page), val, PAGE_CACHE_SIZE); \
+ kunmap(page); } while (0)
+#else
+#define POISON_PAGE(page, val) do { } while (0)
+#endif
+
+#define OBD_FREE(ptr, size) \
+do { \
+ OBD_FREE_PRE(ptr, size, "kfreed"); \
+ kfree(ptr); \
+ POISON_PTR(ptr); \
+} while(0)
+
+
+#define OBD_FREE_RCU(ptr, size, handle) \
+do { \
+ struct portals_handle *__h = (handle); \
+ \
+ LASSERT(handle != NULL); \
+ __h->h_cookie = (unsigned long)(ptr); \
+ __h->h_size = (size); \
+ call_rcu(&__h->h_rcu, class_handle_free_cb); \
+ POISON_PTR(ptr); \
+} while(0)
+
+
+#define OBD_VFREE(ptr, size) \
+ do { \
+ OBD_FREE_PRE(ptr, size, "vfreed"); \
+ vfree(ptr); \
+ POISON_PTR(ptr); \
+ } while (0)
+
+/* we memset() the slab object to 0 when allocation succeeds, so DO NOT
+ * HAVE A CTOR THAT DOES ANYTHING. its work will be cleared here. we'd
+ * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */
+#define OBD_SLAB_FREE_RTN0(ptr, slab) \
+({ \
+ kmem_cache_free((slab), (ptr)); \
+ (ptr) = NULL; \
+ 0; \
+})
+
+#define __OBD_SLAB_ALLOC_VERBOSE(ptr, slab, cptab, cpt, size, type) \
+do { \
+ LASSERT(ergo((type) != GFP_ATOMIC, !in_interrupt())); \
+ (ptr) = (cptab) == NULL ? \
+ kmem_cache_alloc(slab, type | __GFP_ZERO) : \
+ kmem_cache_alloc_node(slab, type | __GFP_ZERO, \
+ cfs_cpt_spread_node(cptab, cpt)); \
+ if (likely((ptr) != NULL && \
+ (!HAS_FAIL_ALLOC_FLAG || obd_alloc_fail_rate == 0 || \
+ !obd_alloc_fail(ptr, #ptr, "slab-", size, \
+ __FILE__, __LINE__) || \
+ OBD_SLAB_FREE_RTN0(ptr, slab)))) { \
+ OBD_ALLOC_POST(ptr, size, "slab-alloced"); \
+ } \
+} while(0)
+
+#define OBD_SLAB_ALLOC_GFP(ptr, slab, size, flags) \
+ __OBD_SLAB_ALLOC_VERBOSE(ptr, slab, NULL, 0, size, flags)
+#define OBD_SLAB_CPT_ALLOC_GFP(ptr, slab, cptab, cpt, size, flags) \
+ __OBD_SLAB_ALLOC_VERBOSE(ptr, slab, cptab, cpt, size, flags)
+
+#define OBD_FREE_PTR(ptr) OBD_FREE(ptr, sizeof *(ptr))
+
+#define OBD_SLAB_FREE(ptr, slab, size) \
+do { \
+ OBD_FREE_PRE(ptr, size, "slab-freed"); \
+ kmem_cache_free(slab, ptr); \
+ POISON_PTR(ptr); \
+} while(0)
+
+#define OBD_SLAB_ALLOC(ptr, slab, size) \
+ OBD_SLAB_ALLOC_GFP(ptr, slab, size, __GFP_IO)
+
+#define OBD_SLAB_CPT_ALLOC(ptr, slab, cptab, cpt, size) \
+ OBD_SLAB_CPT_ALLOC_GFP(ptr, slab, cptab, cpt, size, __GFP_IO)
+
+#define OBD_SLAB_ALLOC_PTR(ptr, slab) \
+ OBD_SLAB_ALLOC(ptr, slab, sizeof *(ptr))
+
+#define OBD_SLAB_CPT_ALLOC_PTR(ptr, slab, cptab, cpt) \
+ OBD_SLAB_CPT_ALLOC(ptr, slab, cptab, cpt, sizeof *(ptr))
+
+#define OBD_SLAB_ALLOC_PTR_GFP(ptr, slab, flags) \
+ OBD_SLAB_ALLOC_GFP(ptr, slab, sizeof *(ptr), flags)
+
+#define OBD_SLAB_CPT_ALLOC_PTR_GFP(ptr, slab, cptab, cpt, flags) \
+ OBD_SLAB_CPT_ALLOC_GFP(ptr, slab, cptab, cpt, sizeof *(ptr), flags)
+
+#define OBD_SLAB_FREE_PTR(ptr, slab) \
+ OBD_SLAB_FREE((ptr), (slab), sizeof *(ptr))
+
+#define KEY_IS(str) \
+ (keylen >= (sizeof(str)-1) && memcmp(key, str, (sizeof(str)-1)) == 0)
+
+/* Wrapper for contiguous page frame allocation */
+#define __OBD_PAGE_ALLOC_VERBOSE(ptr, cptab, cpt, gfp_mask) \
+do { \
+ (ptr) = (cptab) == NULL ? \
+ alloc_page(gfp_mask) : \
+ alloc_pages_node(cfs_cpt_spread_node(cptab, cpt), gfp_mask, 0);\
+ if (unlikely((ptr) == NULL)) { \
+ CERROR("alloc_pages of '" #ptr "' %d page(s) / "LPU64" bytes "\
+ "failed\n", (int)1, \
+ (__u64)(1 << PAGE_CACHE_SHIFT)); \
+ CERROR(LPU64" total bytes and "LPU64" total pages " \
+ "("LPU64" bytes) allocated by Lustre, " \
+ "%d total bytes by LNET\n", \
+ obd_memory_sum(), \
+ obd_pages_sum() << PAGE_CACHE_SHIFT, \
+ obd_pages_sum(), \
+ atomic_read(&libcfs_kmemory)); \
+ } else { \
+ obd_pages_add(0); \
+ CDEBUG(D_MALLOC, "alloc_pages '" #ptr "': %d page(s) / " \
+ LPU64" bytes at %p.\n", \
+ (int)1, \
+ (__u64)(1 << PAGE_CACHE_SHIFT), ptr); \
+ } \
+} while (0)
+
+#define OBD_PAGE_ALLOC(ptr, gfp_mask) \
+ __OBD_PAGE_ALLOC_VERBOSE(ptr, NULL, 0, gfp_mask)
+#define OBD_PAGE_CPT_ALLOC(ptr, cptab, cpt, gfp_mask) \
+ __OBD_PAGE_ALLOC_VERBOSE(ptr, cptab, cpt, gfp_mask)
+
+#define OBD_PAGE_FREE(ptr) \
+do { \
+ LASSERT(ptr); \
+ obd_pages_sub(0); \
+ CDEBUG(D_MALLOC, "free_pages '" #ptr "': %d page(s) / "LPU64" bytes " \
+ "at %p.\n", \
+ (int)1, (__u64)(1 << PAGE_CACHE_SHIFT), \
+ ptr); \
+ __free_page(ptr); \
+ (ptr) = (void *)0xdeadbeef; \
+} while (0)
+
+#endif
diff --git a/drivers/staging/lustre/lustre/lclient/glimpse.c b/drivers/staging/lustre/lustre/lclient/glimpse.c
new file mode 100644
index 000000000000..7f3974be1f92
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lclient/glimpse.c
@@ -0,0 +1,274 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * glimpse code shared between vvp and liblustre (and other Lustre clients in
+ * the future).
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Oleg Drokin <oleg.drokin@sun.com>
+ */
+
+#include <linux/libcfs/libcfs.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <obd.h>
+
+# include <lustre_dlm.h>
+# include <lustre_lite.h>
+# include <lustre_mdc.h>
+# include <linux/pagemap.h>
+# include <linux/file.h>
+
+#include "cl_object.h"
+#include "lclient.h"
+# include "../llite/llite_internal.h"
+
+static const struct cl_lock_descr whole_file = {
+ .cld_start = 0,
+ .cld_end = CL_PAGE_EOF,
+ .cld_mode = CLM_READ
+};
+
+/*
+ * Check whether file has possible unwriten pages.
+ *
+ * \retval 1 file is mmap-ed or has dirty pages
+ * 0 otherwise
+ */
+blkcnt_t dirty_cnt(struct inode *inode)
+{
+ blkcnt_t cnt = 0;
+ struct ccc_object *vob = cl_inode2ccc(inode);
+ void *results[1];
+
+ if (inode->i_mapping != NULL)
+ cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree,
+ results, 0, 1,
+ PAGECACHE_TAG_DIRTY);
+ if (cnt == 0 && atomic_read(&vob->cob_mmap_cnt) > 0)
+ cnt = 1;
+
+ return (cnt > 0) ? 1 : 0;
+}
+
+int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
+ struct inode *inode, struct cl_object *clob, int agl)
+{
+ struct cl_lock_descr *descr = &ccc_env_info(env)->cti_descr;
+ struct cl_inode_info *lli = cl_i2info(inode);
+ const struct lu_fid *fid = lu_object_fid(&clob->co_lu);
+ struct ccc_io *cio = ccc_env_io(env);
+ struct cl_lock *lock;
+ int result;
+
+ ENTRY;
+ result = 0;
+ if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) {
+ CDEBUG(D_DLMTRACE, "Glimpsing inode "DFID"\n", PFID(fid));
+ if (lli->lli_has_smd) {
+ /* NOTE: this looks like DLM lock request, but it may
+ * not be one. Due to CEF_ASYNC flag (translated
+ * to LDLM_FL_HAS_INTENT by osc), this is
+ * glimpse request, that won't revoke any
+ * conflicting DLM locks held. Instead,
+ * ll_glimpse_callback() will be called on each
+ * client holding a DLM lock against this file,
+ * and resulting size will be returned for each
+ * stripe. DLM lock on [0, EOF] is acquired only
+ * if there were no conflicting locks. If there
+ * were conflicting locks, enqueuing or waiting
+ * fails with -ENAVAIL, but valid inode
+ * attributes are returned anyway. */
+ *descr = whole_file;
+ descr->cld_obj = clob;
+ descr->cld_mode = CLM_PHANTOM;
+ descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
+ if (agl)
+ descr->cld_enq_flags |= CEF_AGL;
+ cio->cui_glimpse = 1;
+ /*
+ * CEF_ASYNC is used because glimpse sub-locks cannot
+ * deadlock (because they never conflict with other
+ * locks) and, hence, can be enqueued out-of-order.
+ *
+ * CEF_MUST protects glimpse lock from conversion into
+ * a lockless mode.
+ */
+ lock = cl_lock_request(env, io, descr, "glimpse",
+ current);
+ cio->cui_glimpse = 0;
+
+ if (lock == NULL)
+ RETURN(0);
+
+ if (IS_ERR(lock))
+ RETURN(PTR_ERR(lock));
+
+ LASSERT(agl == 0);
+ result = cl_wait(env, lock);
+ if (result == 0) {
+ cl_merge_lvb(env, inode);
+ if (cl_isize_read(inode) > 0 &&
+ inode->i_blocks == 0) {
+ /*
+ * LU-417: Add dirty pages block count
+ * lest i_blocks reports 0, some "cp" or
+ * "tar" may think it's a completely
+ * sparse file and skip it.
+ */
+ inode->i_blocks = dirty_cnt(inode);
+ }
+ cl_unuse(env, lock);
+ }
+ cl_lock_release(env, lock, "glimpse", current);
+ } else {
+ CDEBUG(D_DLMTRACE, "No objects for inode\n");
+ cl_merge_lvb(env, inode);
+ }
+ }
+
+ RETURN(result);
+}
+
+static int cl_io_get(struct inode *inode, struct lu_env **envout,
+ struct cl_io **ioout, int *refcheck)
+{
+ struct lu_env *env;
+ struct cl_io *io;
+ struct cl_inode_info *lli = cl_i2info(inode);
+ struct cl_object *clob = lli->lli_clob;
+ int result;
+
+ if (S_ISREG(cl_inode_mode(inode))) {
+ env = cl_env_get(refcheck);
+ if (!IS_ERR(env)) {
+ io = ccc_env_thread_io(env);
+ io->ci_obj = clob;
+ *envout = env;
+ *ioout = io;
+ result = +1;
+ } else
+ result = PTR_ERR(env);
+ } else
+ result = 0;
+ return result;
+}
+
+int cl_glimpse_size0(struct inode *inode, int agl)
+{
+ /*
+ * We don't need ast_flags argument to cl_glimpse_size(), because
+ * osc_lock_enqueue() takes care of the possible deadlock that said
+ * argument was introduced to avoid.
+ */
+ /*
+ * XXX but note that ll_file_seek() passes LDLM_FL_BLOCK_NOWAIT to
+ * cl_glimpse_size(), which doesn't make sense: glimpse locks are not
+ * blocking anyway.
+ */
+ struct lu_env *env = NULL;
+ struct cl_io *io = NULL;
+ int result;
+ int refcheck;
+
+ ENTRY;
+
+ result = cl_io_get(inode, &env, &io, &refcheck);
+ if (result > 0) {
+ again:
+ io->ci_verify_layout = 1;
+ result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+ if (result > 0)
+ /*
+ * nothing to do for this io. This currently happens
+ * when stripe sub-object's are not yet created.
+ */
+ result = io->ci_result;
+ else if (result == 0)
+ result = cl_glimpse_lock(env, io, inode, io->ci_obj,
+ agl);
+
+ OBD_FAIL_TIMEOUT(OBD_FAIL_GLIMPSE_DELAY, 2);
+ cl_io_fini(env, io);
+ if (unlikely(io->ci_need_restart))
+ goto again;
+ cl_env_put(env, &refcheck);
+ }
+ RETURN(result);
+}
+
+int cl_local_size(struct inode *inode)
+{
+ struct lu_env *env = NULL;
+ struct cl_io *io = NULL;
+ struct ccc_thread_info *cti;
+ struct cl_object *clob;
+ struct cl_lock_descr *descr;
+ struct cl_lock *lock;
+ int result;
+ int refcheck;
+
+ ENTRY;
+
+ if (!cl_i2info(inode)->lli_has_smd)
+ RETURN(0);
+
+ result = cl_io_get(inode, &env, &io, &refcheck);
+ if (result <= 0)
+ RETURN(result);
+
+ clob = io->ci_obj;
+ result = cl_io_init(env, io, CIT_MISC, clob);
+ if (result > 0)
+ result = io->ci_result;
+ else if (result == 0) {
+ cti = ccc_env_info(env);
+ descr = &cti->cti_descr;
+
+ *descr = whole_file;
+ descr->cld_obj = clob;
+ lock = cl_lock_peek(env, io, descr, "localsize", current);
+ if (lock != NULL) {
+ cl_merge_lvb(env, inode);
+ cl_unuse(env, lock);
+ cl_lock_release(env, lock, "localsize", current);
+ result = 0;
+ } else
+ result = -ENODATA;
+ }
+ cl_io_fini(env, io);
+ cl_env_put(env, &refcheck);
+ RETURN(result);
+}
diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
new file mode 100644
index 000000000000..4a0166687f07
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
@@ -0,0 +1,1325 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * cl code shared between vvp and liblustre (and other Lustre clients in the
+ * future).
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+# include <linux/libcfs/libcfs.h>
+# include <linux/fs.h>
+# include <linux/sched.h>
+# include <linux/mm.h>
+# include <linux/quotaops.h>
+# include <linux/highmem.h>
+# include <linux/pagemap.h>
+# include <linux/rbtree.h>
+
+#include <obd.h>
+#include <obd_support.h>
+#include <lustre_fid.h>
+#include <lustre_lite.h>
+#include <lustre_dlm.h>
+#include <lustre_ver.h>
+#include <lustre_mdc.h>
+#include <cl_object.h>
+
+#include <lclient.h>
+
+#include "../llite/llite_internal.h"
+
+const struct cl_req_operations ccc_req_ops;
+
+/*
+ * ccc_ prefix stands for "Common Client Code".
+ */
+
+static struct kmem_cache *ccc_lock_kmem;
+static struct kmem_cache *ccc_object_kmem;
+static struct kmem_cache *ccc_thread_kmem;
+static struct kmem_cache *ccc_session_kmem;
+static struct kmem_cache *ccc_req_kmem;
+
+static struct lu_kmem_descr ccc_caches[] = {
+ {
+ .ckd_cache = &ccc_lock_kmem,
+ .ckd_name = "ccc_lock_kmem",
+ .ckd_size = sizeof (struct ccc_lock)
+ },
+ {
+ .ckd_cache = &ccc_object_kmem,
+ .ckd_name = "ccc_object_kmem",
+ .ckd_size = sizeof (struct ccc_object)
+ },
+ {
+ .ckd_cache = &ccc_thread_kmem,
+ .ckd_name = "ccc_thread_kmem",
+ .ckd_size = sizeof (struct ccc_thread_info),
+ },
+ {
+ .ckd_cache = &ccc_session_kmem,
+ .ckd_name = "ccc_session_kmem",
+ .ckd_size = sizeof (struct ccc_session)
+ },
+ {
+ .ckd_cache = &ccc_req_kmem,
+ .ckd_name = "ccc_req_kmem",
+ .ckd_size = sizeof (struct ccc_req)
+ },
+ {
+ .ckd_cache = NULL
+ }
+};
+
+/*****************************************************************************
+ *
+ * Vvp device and device type functions.
+ *
+ */
+
+void *ccc_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct ccc_thread_info *info;
+
+ OBD_SLAB_ALLOC_PTR_GFP(info, ccc_thread_kmem, __GFP_IO);
+ if (info == NULL)
+ info = ERR_PTR(-ENOMEM);
+ return info;
+}
+
+void ccc_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct ccc_thread_info *info = data;
+ OBD_SLAB_FREE_PTR(info, ccc_thread_kmem);
+}
+
+void *ccc_session_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct ccc_session *session;
+
+ OBD_SLAB_ALLOC_PTR_GFP(session, ccc_session_kmem, __GFP_IO);
+ if (session == NULL)
+ session = ERR_PTR(-ENOMEM);
+ return session;
+}
+
+void ccc_session_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct ccc_session *session = data;
+ OBD_SLAB_FREE_PTR(session, ccc_session_kmem);
+}
+
+struct lu_context_key ccc_key = {
+ .lct_tags = LCT_CL_THREAD,
+ .lct_init = ccc_key_init,
+ .lct_fini = ccc_key_fini
+};
+
+struct lu_context_key ccc_session_key = {
+ .lct_tags = LCT_SESSION,
+ .lct_init = ccc_session_key_init,
+ .lct_fini = ccc_session_key_fini
+};
+
+
+/* type constructor/destructor: ccc_type_{init,fini,start,stop}(). */
+// LU_TYPE_INIT_FINI(ccc, &ccc_key, &ccc_session_key);
+
+int ccc_device_init(const struct lu_env *env, struct lu_device *d,
+ const char *name, struct lu_device *next)
+{
+ struct ccc_device *vdv;
+ int rc;
+ ENTRY;
+
+ vdv = lu2ccc_dev(d);
+ vdv->cdv_next = lu2cl_dev(next);
+
+ LASSERT(d->ld_site != NULL && next->ld_type != NULL);
+ next->ld_site = d->ld_site;
+ rc = next->ld_type->ldt_ops->ldto_device_init(
+ env, next, next->ld_type->ldt_name, NULL);
+ if (rc == 0) {
+ lu_device_get(next);
+ lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
+ }
+ RETURN(rc);
+}
+
+struct lu_device *ccc_device_fini(const struct lu_env *env,
+ struct lu_device *d)
+{
+ return cl2lu_dev(lu2ccc_dev(d)->cdv_next);
+}
+
+struct lu_device *ccc_device_alloc(const struct lu_env *env,
+ struct lu_device_type *t,
+ struct lustre_cfg *cfg,
+ const struct lu_device_operations *luops,
+ const struct cl_device_operations *clops)
+{
+ struct ccc_device *vdv;
+ struct lu_device *lud;
+ struct cl_site *site;
+ int rc;
+ ENTRY;
+
+ OBD_ALLOC_PTR(vdv);
+ if (vdv == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ lud = &vdv->cdv_cl.cd_lu_dev;
+ cl_device_init(&vdv->cdv_cl, t);
+ ccc2lu_dev(vdv)->ld_ops = luops;
+ vdv->cdv_cl.cd_ops = clops;
+
+ OBD_ALLOC_PTR(site);
+ if (site != NULL) {
+ rc = cl_site_init(site, &vdv->cdv_cl);
+ if (rc == 0)
+ rc = lu_site_init_finish(&site->cs_lu);
+ else {
+ LASSERT(lud->ld_site == NULL);
+ CERROR("Cannot init lu_site, rc %d.\n", rc);
+ OBD_FREE_PTR(site);
+ }
+ } else
+ rc = -ENOMEM;
+ if (rc != 0) {
+ ccc_device_free(env, lud);
+ lud = ERR_PTR(rc);
+ }
+ RETURN(lud);
+}
+
+struct lu_device *ccc_device_free(const struct lu_env *env,
+ struct lu_device *d)
+{
+ struct ccc_device *vdv = lu2ccc_dev(d);
+ struct cl_site *site = lu2cl_site(d->ld_site);
+ struct lu_device *next = cl2lu_dev(vdv->cdv_next);
+
+ if (d->ld_site != NULL) {
+ cl_site_fini(site);
+ OBD_FREE_PTR(site);
+ }
+ cl_device_fini(lu2cl_dev(d));
+ OBD_FREE_PTR(vdv);
+ return next;
+}
+
+int ccc_req_init(const struct lu_env *env, struct cl_device *dev,
+ struct cl_req *req)
+{
+ struct ccc_req *vrq;
+ int result;
+
+ OBD_SLAB_ALLOC_PTR_GFP(vrq, ccc_req_kmem, __GFP_IO);
+ if (vrq != NULL) {
+ cl_req_slice_add(req, &vrq->crq_cl, dev, &ccc_req_ops);
+ result = 0;
+ } else
+ result = -ENOMEM;
+ return result;
+}
+
+/**
+ * An `emergency' environment used by ccc_inode_fini() when cl_env_get()
+ * fails. Access to this environment is serialized by ccc_inode_fini_guard
+ * mutex.
+ */
+static struct lu_env *ccc_inode_fini_env = NULL;
+
+/**
+ * A mutex serializing calls to slp_inode_fini() under extreme memory
+ * pressure, when environments cannot be allocated.
+ */
+static DEFINE_MUTEX(ccc_inode_fini_guard);
+static int dummy_refcheck;
+
+int ccc_global_init(struct lu_device_type *device_type)
+{
+ int result;
+
+ result = lu_kmem_init(ccc_caches);
+ if (result)
+ return result;
+
+ result = lu_device_type_init(device_type);
+ if (result)
+ goto out_kmem;
+
+ ccc_inode_fini_env = cl_env_alloc(&dummy_refcheck,
+ LCT_REMEMBER|LCT_NOREF);
+ if (IS_ERR(ccc_inode_fini_env)) {
+ result = PTR_ERR(ccc_inode_fini_env);
+ goto out_device;
+ }
+
+ ccc_inode_fini_env->le_ctx.lc_cookie = 0x4;
+ return 0;
+out_device:
+ lu_device_type_fini(device_type);
+out_kmem:
+ lu_kmem_fini(ccc_caches);
+ return result;
+}
+
+void ccc_global_fini(struct lu_device_type *device_type)
+{
+ if (ccc_inode_fini_env != NULL) {
+ cl_env_put(ccc_inode_fini_env, &dummy_refcheck);
+ ccc_inode_fini_env = NULL;
+ }
+ lu_device_type_fini(device_type);
+ lu_kmem_fini(ccc_caches);
+}
+
+/*****************************************************************************
+ *
+ * Object operations.
+ *
+ */
+
+struct lu_object *ccc_object_alloc(const struct lu_env *env,
+ const struct lu_object_header *unused,
+ struct lu_device *dev,
+ const struct cl_object_operations *clops,
+ const struct lu_object_operations *luops)
+{
+ struct ccc_object *vob;
+ struct lu_object *obj;
+
+ OBD_SLAB_ALLOC_PTR_GFP(vob, ccc_object_kmem, __GFP_IO);
+ if (vob != NULL) {
+ struct cl_object_header *hdr;
+
+ obj = ccc2lu(vob);
+ hdr = &vob->cob_header;
+ cl_object_header_init(hdr);
+ lu_object_init(obj, &hdr->coh_lu, dev);
+ lu_object_add_top(&hdr->coh_lu, obj);
+
+ vob->cob_cl.co_ops = clops;
+ obj->lo_ops = luops;
+ } else
+ obj = NULL;
+ return obj;
+}
+
+int ccc_object_init0(const struct lu_env *env,
+ struct ccc_object *vob,
+ const struct cl_object_conf *conf)
+{
+ vob->cob_inode = conf->coc_inode;
+ vob->cob_transient_pages = 0;
+ cl_object_page_init(&vob->cob_cl, sizeof(struct ccc_page));
+ return 0;
+}
+
+int ccc_object_init(const struct lu_env *env, struct lu_object *obj,
+ const struct lu_object_conf *conf)
+{
+ struct ccc_device *dev = lu2ccc_dev(obj->lo_dev);
+ struct ccc_object *vob = lu2ccc(obj);
+ struct lu_object *below;
+ struct lu_device *under;
+ int result;
+
+ under = &dev->cdv_next->cd_lu_dev;
+ below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under);
+ if (below != NULL) {
+ const struct cl_object_conf *cconf;
+
+ cconf = lu2cl_conf(conf);
+ INIT_LIST_HEAD(&vob->cob_pending_list);
+ lu_object_add(obj, below);
+ result = ccc_object_init0(env, vob, cconf);
+ } else
+ result = -ENOMEM;
+ return result;
+}
+
+void ccc_object_free(const struct lu_env *env, struct lu_object *obj)
+{
+ struct ccc_object *vob = lu2ccc(obj);
+
+ lu_object_fini(obj);
+ lu_object_header_fini(obj->lo_header);
+ OBD_SLAB_FREE_PTR(vob, ccc_object_kmem);
+}
+
+int ccc_lock_init(const struct lu_env *env,
+ struct cl_object *obj, struct cl_lock *lock,
+ const struct cl_io *unused,
+ const struct cl_lock_operations *lkops)
+{
+ struct ccc_lock *clk;
+ int result;
+
+ CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+
+ OBD_SLAB_ALLOC_PTR_GFP(clk, ccc_lock_kmem, __GFP_IO);
+ if (clk != NULL) {
+ cl_lock_slice_add(lock, &clk->clk_cl, obj, lkops);
+ result = 0;
+ } else
+ result = -ENOMEM;
+ return result;
+}
+
+int ccc_attr_set(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned valid)
+{
+ return 0;
+}
+
+int ccc_object_glimpse(const struct lu_env *env,
+ const struct cl_object *obj, struct ost_lvb *lvb)
+{
+ struct inode *inode = ccc_object_inode(obj);
+
+ ENTRY;
+ lvb->lvb_mtime = cl_inode_mtime(inode);
+ lvb->lvb_atime = cl_inode_atime(inode);
+ lvb->lvb_ctime = cl_inode_ctime(inode);
+ /*
+ * LU-417: Add dirty pages block count lest i_blocks reports 0, some
+ * "cp" or "tar" on remote node may think it's a completely sparse file
+ * and skip it.
+ */
+ if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0)
+ lvb->lvb_blocks = dirty_cnt(inode);
+ RETURN(0);
+}
+
+
+
+int ccc_conf_set(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_object_conf *conf)
+{
+ /* TODO: destroy all pages attached to this object. */
+ return 0;
+}
+
+static void ccc_object_size_lock(struct cl_object *obj)
+{
+ struct inode *inode = ccc_object_inode(obj);
+
+ cl_isize_lock(inode);
+ cl_object_attr_lock(obj);
+}
+
+static void ccc_object_size_unlock(struct cl_object *obj)
+{
+ struct inode *inode = ccc_object_inode(obj);
+
+ cl_object_attr_unlock(obj);
+ cl_isize_unlock(inode);
+}
+
+/*****************************************************************************
+ *
+ * Page operations.
+ *
+ */
+
+struct page *ccc_page_vmpage(const struct lu_env *env,
+ const struct cl_page_slice *slice)
+{
+ return cl2vm_page(slice);
+}
+
+int ccc_page_is_under_lock(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io)
+{
+ struct ccc_io *cio = ccc_env_io(env);
+ struct cl_lock_descr *desc = &ccc_env_info(env)->cti_descr;
+ struct cl_page *page = slice->cpl_page;
+
+ int result;
+
+ ENTRY;
+
+ if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE ||
+ io->ci_type == CIT_FAULT) {
+ if (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)
+ result = -EBUSY;
+ else {
+ desc->cld_start = page->cp_index;
+ desc->cld_end = page->cp_index;
+ desc->cld_obj = page->cp_obj;
+ desc->cld_mode = CLM_READ;
+ result = cl_queue_match(&io->ci_lockset.cls_done,
+ desc) ? -EBUSY : 0;
+ }
+ } else
+ result = 0;
+ RETURN(result);
+}
+
+int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice)
+{
+ /*
+ * Cached read?
+ */
+ LBUG();
+ return 0;
+}
+
+void ccc_transient_page_verify(const struct cl_page *page)
+{
+}
+
+int ccc_transient_page_own(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused,
+ int nonblock)
+{
+ ccc_transient_page_verify(slice->cpl_page);
+ return 0;
+}
+
+void ccc_transient_page_assume(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ ccc_transient_page_verify(slice->cpl_page);
+}
+
+void ccc_transient_page_unassume(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ ccc_transient_page_verify(slice->cpl_page);
+}
+
+void ccc_transient_page_disown(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ ccc_transient_page_verify(slice->cpl_page);
+}
+
+void ccc_transient_page_discard(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ struct cl_page *page = slice->cpl_page;
+
+ ccc_transient_page_verify(slice->cpl_page);
+
+ /*
+ * For transient pages, remove it from the radix tree.
+ */
+ cl_page_delete(env, page);
+}
+
+int ccc_transient_page_prep(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ ENTRY;
+ /* transient page should always be sent. */
+ RETURN(0);
+}
+
+/*****************************************************************************
+ *
+ * Lock operations.
+ *
+ */
+
+void ccc_lock_delete(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
+}
+
+void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
+{
+ struct ccc_lock *clk = cl2ccc_lock(slice);
+ OBD_SLAB_FREE_PTR(clk, ccc_lock_kmem);
+}
+
+int ccc_lock_enqueue(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ struct cl_io *unused, __u32 enqflags)
+{
+ CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
+ return 0;
+}
+
+int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice)
+{
+ CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
+ return 0;
+}
+
+int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice)
+{
+ CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
+ return 0;
+}
+
+/**
+ * Implementation of cl_lock_operations::clo_fits_into() methods for ccc
+ * layer. This function is executed every time io finds an existing lock in
+ * the lock cache while creating new lock. This function has to decide whether
+ * cached lock "fits" into io.
+ *
+ * \param slice lock to be checked
+ * \param io IO that wants a lock.
+ *
+ * \see lov_lock_fits_into().
+ */
+int ccc_lock_fits_into(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ const struct cl_lock_descr *need,
+ const struct cl_io *io)
+{
+ const struct cl_lock *lock = slice->cls_lock;
+ const struct cl_lock_descr *descr = &lock->cll_descr;
+ const struct ccc_io *cio = ccc_env_io(env);
+ int result;
+
+ ENTRY;
+ /*
+ * Work around DLM peculiarity: it assumes that glimpse
+ * (LDLM_FL_HAS_INTENT) lock is always LCK_PR, and returns reads lock
+ * when asked for LCK_PW lock with LDLM_FL_HAS_INTENT flag set. Make
+ * sure that glimpse doesn't get CLM_WRITE top-lock, so that it
+ * doesn't enqueue CLM_WRITE sub-locks.
+ */
+ if (cio->cui_glimpse)
+ result = descr->cld_mode != CLM_WRITE;
+
+ /*
+ * Also, don't match incomplete write locks for read, otherwise read
+ * would enqueue missing sub-locks in the write mode.
+ */
+ else if (need->cld_mode != descr->cld_mode)
+ result = lock->cll_state >= CLS_ENQUEUED;
+ else
+ result = 1;
+ RETURN(result);
+}
+
+/**
+ * Implements cl_lock_operations::clo_state() method for ccc layer, invoked
+ * whenever lock state changes. Transfers object attributes, that might be
+ * updated as a result of lock acquiring into inode.
+ */
+void ccc_lock_state(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ enum cl_lock_state state)
+{
+ struct cl_lock *lock = slice->cls_lock;
+ ENTRY;
+
+ /*
+ * Refresh inode attributes when the lock is moving into CLS_HELD
+ * state, and only when this is a result of real enqueue, rather than
+ * of finding lock in the cache.
+ */
+ if (state == CLS_HELD && lock->cll_state < CLS_HELD) {
+ struct cl_object *obj;
+ struct inode *inode;
+
+ obj = slice->cls_obj;
+ inode = ccc_object_inode(obj);
+
+ /* vmtruncate() sets the i_size
+ * under both a DLM lock and the
+ * ll_inode_size_lock(). If we don't get the
+ * ll_inode_size_lock() here we can match the DLM lock and
+ * reset i_size. generic_file_write can then trust the
+ * stale i_size when doing appending writes and effectively
+ * cancel the result of the truncate. Getting the
+ * ll_inode_size_lock() after the enqueue maintains the DLM
+ * -> ll_inode_size_lock() acquiring order. */
+ if (lock->cll_descr.cld_start == 0 &&
+ lock->cll_descr.cld_end == CL_PAGE_EOF)
+ cl_merge_lvb(env, inode);
+ }
+ EXIT;
+}
+
+/*****************************************************************************
+ *
+ * io operations.
+ *
+ */
+
+void ccc_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
+{
+ struct cl_io *io = ios->cis_io;
+
+ CLOBINVRNT(env, io->ci_obj, ccc_object_invariant(io->ci_obj));
+}
+
+int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
+ __u32 enqflags, enum cl_lock_mode mode,
+ pgoff_t start, pgoff_t end)
+{
+ struct ccc_io *cio = ccc_env_io(env);
+ struct cl_lock_descr *descr = &cio->cui_link.cill_descr;
+ struct cl_object *obj = io->ci_obj;
+
+ CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end);
+
+ memset(&cio->cui_link, 0, sizeof cio->cui_link);
+
+ if (cio->cui_fd && (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+ descr->cld_mode = CLM_GROUP;
+ descr->cld_gid = cio->cui_fd->fd_grouplock.cg_gid;
+ } else {
+ descr->cld_mode = mode;
+ }
+ descr->cld_obj = obj;
+ descr->cld_start = start;
+ descr->cld_end = end;
+ descr->cld_enq_flags = enqflags;
+
+ cl_io_lock_add(env, io, &cio->cui_link);
+ RETURN(0);
+}
+
+void ccc_io_update_iov(const struct lu_env *env,
+ struct ccc_io *cio, struct cl_io *io)
+{
+ int i;
+ size_t size = io->u.ci_rw.crw_count;
+
+ cio->cui_iov_olen = 0;
+ if (!cl_is_normalio(env, io) || cio->cui_tot_nrsegs == 0)
+ return;
+
+ for (i = 0; i < cio->cui_tot_nrsegs; i++) {
+ struct iovec *iv = &cio->cui_iov[i];
+
+ if (iv->iov_len < size)
+ size -= iv->iov_len;
+ else {
+ if (iv->iov_len > size) {
+ cio->cui_iov_olen = iv->iov_len;
+ iv->iov_len = size;
+ }
+ break;
+ }
+ }
+
+ cio->cui_nrsegs = i + 1;
+ LASSERTF(cio->cui_tot_nrsegs >= cio->cui_nrsegs,
+ "tot_nrsegs: %lu, nrsegs: %lu\n",
+ cio->cui_tot_nrsegs, cio->cui_nrsegs);
+}
+
+int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
+ __u32 enqflags, enum cl_lock_mode mode,
+ loff_t start, loff_t end)
+{
+ struct cl_object *obj = io->ci_obj;
+ return ccc_io_one_lock_index(env, io, enqflags, mode,
+ cl_index(obj, start), cl_index(obj, end));
+}
+
+void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
+{
+ CLOBINVRNT(env, ios->cis_io->ci_obj,
+ ccc_object_invariant(ios->cis_io->ci_obj));
+}
+
+void ccc_io_advance(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ size_t nob)
+{
+ struct ccc_io *cio = cl2ccc_io(env, ios);
+ struct cl_io *io = ios->cis_io;
+ struct cl_object *obj = ios->cis_io->ci_obj;
+
+ CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+
+ if (!cl_is_normalio(env, io))
+ return;
+
+ LASSERT(cio->cui_tot_nrsegs >= cio->cui_nrsegs);
+ LASSERT(cio->cui_tot_count >= nob);
+
+ cio->cui_iov += cio->cui_nrsegs;
+ cio->cui_tot_nrsegs -= cio->cui_nrsegs;
+ cio->cui_tot_count -= nob;
+
+ /* update the iov */
+ if (cio->cui_iov_olen > 0) {
+ struct iovec *iv;
+
+ cio->cui_iov--;
+ cio->cui_tot_nrsegs++;
+ iv = &cio->cui_iov[0];
+ if (io->ci_continue) {
+ iv->iov_base += iv->iov_len;
+ LASSERT(cio->cui_iov_olen > iv->iov_len);
+ iv->iov_len = cio->cui_iov_olen - iv->iov_len;
+ } else {
+ /* restore the iov_len, in case of restart io. */
+ iv->iov_len = cio->cui_iov_olen;
+ }
+ cio->cui_iov_olen = 0;
+ }
+}
+
+/**
+ * Helper function that if necessary adjusts file size (inode->i_size), when
+ * position at the offset \a pos is accessed. File size can be arbitrary stale
+ * on a Lustre client, but client at least knows KMS. If accessed area is
+ * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
+ *
+ * Locking: cl_isize_lock is used to serialize changes to inode size and to
+ * protect consistency between inode size and cl_object
+ * attributes. cl_object_size_lock() protects consistency between cl_attr's of
+ * top-object and sub-objects.
+ */
+int ccc_prep_size(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io, loff_t start, size_t count, int *exceed)
+{
+ struct cl_attr *attr = ccc_env_thread_attr(env);
+ struct inode *inode = ccc_object_inode(obj);
+ loff_t pos = start + count - 1;
+ loff_t kms;
+ int result;
+
+ /*
+ * Consistency guarantees: following possibilities exist for the
+ * relation between region being accessed and real file size at this
+ * moment:
+ *
+ * (A): the region is completely inside of the file;
+ *
+ * (B-x): x bytes of region are inside of the file, the rest is
+ * outside;
+ *
+ * (C): the region is completely outside of the file.
+ *
+ * This classification is stable under DLM lock already acquired by
+ * the caller, because to change the class, other client has to take
+ * DLM lock conflicting with our lock. Also, any updates to ->i_size
+ * by other threads on this client are serialized by
+ * ll_inode_size_lock(). This guarantees that short reads are handled
+ * correctly in the face of concurrent writes and truncates.
+ */
+ ccc_object_size_lock(obj);
+ result = cl_object_attr_get(env, obj, attr);
+ if (result == 0) {
+ kms = attr->cat_kms;
+ if (pos > kms) {
+ /*
+ * A glimpse is necessary to determine whether we
+ * return a short read (B) or some zeroes at the end
+ * of the buffer (C)
+ */
+ ccc_object_size_unlock(obj);
+ result = cl_glimpse_lock(env, io, inode, obj, 0);
+ if (result == 0 && exceed != NULL) {
+ /* If objective page index exceed end-of-file
+ * page index, return directly. Do not expect
+ * kernel will check such case correctly.
+ * linux-2.6.18-128.1.1 miss to do that.
+ * --bug 17336 */
+ loff_t size = cl_isize_read(inode);
+ unsigned long cur_index = start >> PAGE_CACHE_SHIFT;
+
+ if ((size == 0 && cur_index != 0) ||
+ (((size - 1) >> PAGE_CACHE_SHIFT) < cur_index))
+ *exceed = 1;
+ }
+ return result;
+ } else {
+ /*
+ * region is within kms and, hence, within real file
+ * size (A). We need to increase i_size to cover the
+ * read region so that generic_file_read() will do its
+ * job, but that doesn't mean the kms size is
+ * _correct_, it is only the _minimum_ size. If
+ * someone does a stat they will get the correct size
+ * which will always be >= the kms value here.
+ * b=11081
+ */
+ if (cl_isize_read(inode) < kms) {
+ cl_isize_write_nolock(inode, kms);
+ CDEBUG(D_VFSTRACE,
+ DFID" updating i_size "LPU64"\n",
+ PFID(lu_object_fid(&obj->co_lu)),
+ (__u64)cl_isize_read(inode));
+
+ }
+ }
+ }
+ ccc_object_size_unlock(obj);
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Transfer operations.
+ *
+ */
+
+void ccc_req_completion(const struct lu_env *env,
+ const struct cl_req_slice *slice, int ioret)
+{
+ struct ccc_req *vrq;
+
+ if (ioret > 0)
+ cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret);
+
+ vrq = cl2ccc_req(slice);
+ OBD_SLAB_FREE_PTR(vrq, ccc_req_kmem);
+}
+
+/**
+ * Implementation of struct cl_req_operations::cro_attr_set() for ccc
+ * layer. ccc is responsible for
+ *
+ * - o_[mac]time
+ *
+ * - o_mode
+ *
+ * - o_parent_seq
+ *
+ * - o_[ug]id
+ *
+ * - o_parent_oid
+ *
+ * - o_parent_ver
+ *
+ * - o_ioepoch,
+ *
+ * and capability.
+ */
+void ccc_req_attr_set(const struct lu_env *env,
+ const struct cl_req_slice *slice,
+ const struct cl_object *obj,
+ struct cl_req_attr *attr, obd_valid flags)
+{
+ struct inode *inode;
+ struct obdo *oa;
+ obd_flag valid_flags;
+
+ oa = attr->cra_oa;
+ inode = ccc_object_inode(obj);
+ valid_flags = OBD_MD_FLTYPE;
+
+ if ((flags & OBD_MD_FLOSSCAPA) != 0) {
+ LASSERT(attr->cra_capa == NULL);
+ attr->cra_capa = cl_capa_lookup(inode,
+ slice->crs_req->crq_type);
+ }
+
+ if (slice->crs_req->crq_type == CRT_WRITE) {
+ if (flags & OBD_MD_FLEPOCH) {
+ oa->o_valid |= OBD_MD_FLEPOCH;
+ oa->o_ioepoch = cl_i2info(inode)->lli_ioepoch;
+ valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+ OBD_MD_FLUID | OBD_MD_FLGID;
+ }
+ }
+ obdo_from_inode(oa, inode, valid_flags & flags);
+ obdo_set_parent_fid(oa, &cl_i2info(inode)->lli_fid);
+ memcpy(attr->cra_jobid, cl_i2info(inode)->lli_jobid,
+ JOBSTATS_JOBID_SIZE);
+}
+
+const struct cl_req_operations ccc_req_ops = {
+ .cro_attr_set = ccc_req_attr_set,
+ .cro_completion = ccc_req_completion
+};
+
+int cl_setattr_ost(struct inode *inode, const struct iattr *attr,
+ struct obd_capa *capa)
+{
+ struct lu_env *env;
+ struct cl_io *io;
+ int result;
+ int refcheck;
+
+ ENTRY;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ io = ccc_env_thread_io(env);
+ io->ci_obj = cl_i2info(inode)->lli_clob;
+
+ io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
+ io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
+ io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
+ io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
+ io->u.ci_setattr.sa_valid = attr->ia_valid;
+ io->u.ci_setattr.sa_capa = capa;
+
+again:
+ if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
+ struct ccc_io *cio = ccc_env_io(env);
+
+ if (attr->ia_valid & ATTR_FILE)
+ /* populate the file descriptor for ftruncate to honor
+ * group lock - see LU-787 */
+ cio->cui_fd = cl_iattr2fd(inode, attr);
+
+ result = cl_io_loop(env, io);
+ } else {
+ result = io->ci_result;
+ }
+ cl_io_fini(env, io);
+ if (unlikely(io->ci_need_restart))
+ goto again;
+ cl_env_put(env, &refcheck);
+ RETURN(result);
+}
+
+/*****************************************************************************
+ *
+ * Type conversions.
+ *
+ */
+
+struct lu_device *ccc2lu_dev(struct ccc_device *vdv)
+{
+ return &vdv->cdv_cl.cd_lu_dev;
+}
+
+struct ccc_device *lu2ccc_dev(const struct lu_device *d)
+{
+ return container_of0(d, struct ccc_device, cdv_cl.cd_lu_dev);
+}
+
+struct ccc_device *cl2ccc_dev(const struct cl_device *d)
+{
+ return container_of0(d, struct ccc_device, cdv_cl);
+}
+
+struct lu_object *ccc2lu(struct ccc_object *vob)
+{
+ return &vob->cob_cl.co_lu;
+}
+
+struct ccc_object *lu2ccc(const struct lu_object *obj)
+{
+ return container_of0(obj, struct ccc_object, cob_cl.co_lu);
+}
+
+struct ccc_object *cl2ccc(const struct cl_object *obj)
+{
+ return container_of0(obj, struct ccc_object, cob_cl);
+}
+
+struct ccc_lock *cl2ccc_lock(const struct cl_lock_slice *slice)
+{
+ return container_of(slice, struct ccc_lock, clk_cl);
+}
+
+struct ccc_io *cl2ccc_io(const struct lu_env *env,
+ const struct cl_io_slice *slice)
+{
+ struct ccc_io *cio;
+
+ cio = container_of(slice, struct ccc_io, cui_cl);
+ LASSERT(cio == ccc_env_io(env));
+ return cio;
+}
+
+struct ccc_req *cl2ccc_req(const struct cl_req_slice *slice)
+{
+ return container_of0(slice, struct ccc_req, crq_cl);
+}
+
+struct page *cl2vm_page(const struct cl_page_slice *slice)
+{
+ return cl2ccc_page(slice)->cpg_page;
+}
+
+/*****************************************************************************
+ *
+ * Accessors.
+ *
+ */
+int ccc_object_invariant(const struct cl_object *obj)
+{
+ struct inode *inode = ccc_object_inode(obj);
+ struct cl_inode_info *lli = cl_i2info(inode);
+
+ return (S_ISREG(cl_inode_mode(inode)) ||
+ /* i_mode of unlinked inode is zeroed. */
+ cl_inode_mode(inode) == 0) && lli->lli_clob == obj;
+}
+
+struct inode *ccc_object_inode(const struct cl_object *obj)
+{
+ return cl2ccc(obj)->cob_inode;
+}
+
+/**
+ * Returns a pointer to cl_page associated with \a vmpage, without acquiring
+ * additional reference to the resulting page. This is an unsafe version of
+ * cl_vmpage_page() that can only be used under vmpage lock.
+ */
+struct cl_page *ccc_vmpage_page_transient(struct page *vmpage)
+{
+ KLASSERT(PageLocked(vmpage));
+ return (struct cl_page *)vmpage->private;
+}
+
+/**
+ * Initialize or update CLIO structures for regular files when new
+ * meta-data arrives from the server.
+ *
+ * \param inode regular file inode
+ * \param md new file metadata from MDS
+ * - allocates cl_object if necessary,
+ * - updated layout, if object was already here.
+ */
+int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
+{
+ struct lu_env *env;
+ struct cl_inode_info *lli;
+ struct cl_object *clob;
+ struct lu_site *site;
+ struct lu_fid *fid;
+ struct cl_object_conf conf = {
+ .coc_inode = inode,
+ .u = {
+ .coc_md = md
+ }
+ };
+ int result = 0;
+ int refcheck;
+
+ LASSERT(md->body->valid & OBD_MD_FLID);
+ LASSERT(S_ISREG(cl_inode_mode(inode)));
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
+
+ site = cl_i2sbi(inode)->ll_site;
+ lli = cl_i2info(inode);
+ fid = &lli->lli_fid;
+ LASSERT(fid_is_sane(fid));
+
+ if (lli->lli_clob == NULL) {
+ /* clob is slave of inode, empty lli_clob means for new inode,
+ * there is no clob in cache with the given fid, so it is
+ * unnecessary to perform lookup-alloc-lookup-insert, just
+ * alloc and insert directly. */
+ LASSERT(inode->i_state & I_NEW);
+ conf.coc_lu.loc_flags = LOC_F_NEW;
+ clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev),
+ fid, &conf);
+ if (!IS_ERR(clob)) {
+ /*
+ * No locking is necessary, as new inode is
+ * locked by I_NEW bit.
+ */
+ lli->lli_clob = clob;
+ lli->lli_has_smd = md->lsm != NULL;
+ lu_object_ref_add(&clob->co_lu, "inode", inode);
+ } else
+ result = PTR_ERR(clob);
+ } else {
+ result = cl_conf_set(env, lli->lli_clob, &conf);
+ }
+
+ cl_env_put(env, &refcheck);
+
+ if (result != 0)
+ CERROR("Failure to initialize cl object "DFID": %d\n",
+ PFID(fid), result);
+ return result;
+}
+
+/**
+ * Wait for others drop their references of the object at first, then we drop
+ * the last one, which will lead to the object be destroyed immediately.
+ * Must be called after cl_object_kill() against this object.
+ *
+ * The reason we want to do this is: destroying top object will wait for sub
+ * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs)
+ * to initiate top object destroying which may deadlock. See bz22520.
+ */
+static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
+{
+ struct lu_object_header *header = obj->co_lu.lo_header;
+ wait_queue_t waiter;
+
+ if (unlikely(atomic_read(&header->loh_ref) != 1)) {
+ struct lu_site *site = obj->co_lu.lo_dev->ld_site;
+ struct lu_site_bkt_data *bkt;
+
+ bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
+
+ init_waitqueue_entry_current(&waiter);
+ add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+
+ while (1) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (atomic_read(&header->loh_ref) == 1)
+ break;
+ waitq_wait(&waiter, TASK_UNINTERRUPTIBLE);
+ }
+
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+ }
+
+ cl_object_put(env, obj);
+}
+
+void cl_inode_fini(struct inode *inode)
+{
+ struct lu_env *env;
+ struct cl_inode_info *lli = cl_i2info(inode);
+ struct cl_object *clob = lli->lli_clob;
+ int refcheck;
+ int emergency;
+
+ if (clob != NULL) {
+ void *cookie;
+
+ cookie = cl_env_reenter();
+ env = cl_env_get(&refcheck);
+ emergency = IS_ERR(env);
+ if (emergency) {
+ mutex_lock(&ccc_inode_fini_guard);
+ LASSERT(ccc_inode_fini_env != NULL);
+ cl_env_implant(ccc_inode_fini_env, &refcheck);
+ env = ccc_inode_fini_env;
+ }
+ /*
+ * cl_object cache is a slave to inode cache (which, in turn
+ * is a slave to dentry cache), don't keep cl_object in memory
+ * when its master is evicted.
+ */
+ cl_object_kill(env, clob);
+ lu_object_ref_del(&clob->co_lu, "inode", inode);
+ cl_object_put_last(env, clob);
+ lli->lli_clob = NULL;
+ if (emergency) {
+ cl_env_unplant(ccc_inode_fini_env, &refcheck);
+ mutex_unlock(&ccc_inode_fini_guard);
+ } else
+ cl_env_put(env, &refcheck);
+ cl_env_reexit(cookie);
+ }
+}
+
+/**
+ * return IF_* type for given lu_dirent entry.
+ * IF_* flag shld be converted to particular OS file type in
+ * platform llite module.
+ */
+__u16 ll_dirent_type_get(struct lu_dirent *ent)
+{
+ __u16 type = 0;
+ struct luda_type *lt;
+ int len = 0;
+
+ if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
+ const unsigned align = sizeof(struct luda_type) - 1;
+
+ len = le16_to_cpu(ent->lde_namelen);
+ len = (len + align) & ~align;
+ lt = (void *)ent->lde_name + len;
+ type = IFTODT(le16_to_cpu(lt->lt_type));
+ }
+ return type;
+}
+
+/**
+ * build inode number from passed @fid */
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
+{
+ if (BITS_PER_LONG == 32 || api32)
+ RETURN(fid_flatten32(fid));
+ else
+ RETURN(fid_flatten(fid));
+}
+
+/**
+ * build inode generation from passed @fid. If our FID overflows the 32-bit
+ * inode number then return a non-zero generation to distinguish them. */
+__u32 cl_fid_build_gen(const struct lu_fid *fid)
+{
+ __u32 gen;
+ ENTRY;
+
+ if (fid_is_igif(fid)) {
+ gen = lu_igif_gen(fid);
+ RETURN(gen);
+ }
+
+ gen = (fid_flatten(fid) >> 32);
+ RETURN(gen);
+}
+
+/* lsm is unreliable after hsm implementation as layout can be changed at
+ * any time. This is only to support old, non-clio-ized interfaces. It will
+ * cause deadlock if clio operations are called with this extra layout refcount
+ * because in case the layout changed during the IO, ll_layout_refresh() will
+ * have to wait for the refcount to become zero to destroy the older layout.
+ *
+ * Notice that the lsm returned by this function may not be valid unless called
+ * inside layout lock - MDS_INODELOCK_LAYOUT. */
+struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode)
+{
+ return lov_lsm_get(cl_i2info(inode)->lli_clob);
+}
+
+void inline ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm)
+{
+ lov_lsm_put(cl_i2info(inode)->lli_clob, lsm);
+}
diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_misc.c b/drivers/staging/lustre/lustre/lclient/lcommon_misc.c
new file mode 100644
index 000000000000..8ecbef92753d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lclient/lcommon_misc.c
@@ -0,0 +1,194 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * cl code shared between vvp and liblustre (and other Lustre clients in the
+ * future).
+ *
+ */
+#include <obd_class.h>
+#include <obd_support.h>
+#include <obd.h>
+#include <cl_object.h>
+#include <lclient.h>
+
+#include <lustre_lite.h>
+
+
+/* Initialize the default and maximum LOV EA and cookie sizes. This allows
+ * us to make MDS RPCs with large enough reply buffers to hold the
+ * maximum-sized (= maximum striped) EA and cookie without having to
+ * calculate this (via a call into the LOV + OSCs) each time we make an RPC. */
+int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
+{
+ struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 };
+ __u32 valsize = sizeof(struct lov_desc);
+ int rc, easize, def_easize, cookiesize;
+ struct lov_desc desc;
+ __u16 stripes;
+ ENTRY;
+
+ rc = obd_get_info(NULL, dt_exp, sizeof(KEY_LOVDESC), KEY_LOVDESC,
+ &valsize, &desc, NULL);
+ if (rc)
+ RETURN(rc);
+
+ stripes = min(desc.ld_tgt_count, (__u32)LOV_MAX_STRIPE_COUNT);
+ lsm.lsm_stripe_count = stripes;
+ easize = obd_size_diskmd(dt_exp, &lsm);
+
+ lsm.lsm_stripe_count = desc.ld_default_stripe_count;
+ def_easize = obd_size_diskmd(dt_exp, &lsm);
+
+ cookiesize = stripes * sizeof(struct llog_cookie);
+
+ CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n",
+ easize, cookiesize);
+
+ rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize);
+ RETURN(rc);
+}
+
+/**
+ * This function is used as an upcall-callback hooked by liblustre and llite
+ * clients into obd_notify() listeners chain to handle notifications about
+ * change of import connect_flags. See llu_fsswop_mount() and
+ * lustre_common_fill_super().
+ */
+int cl_ocd_update(struct obd_device *host,
+ struct obd_device *watched,
+ enum obd_notify_event ev, void *owner, void *data)
+{
+ struct lustre_client_ocd *lco;
+ struct client_obd *cli;
+ __u64 flags;
+ int result;
+
+ ENTRY;
+ if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+ cli = &watched->u.cli;
+ lco = owner;
+ flags = cli->cl_import->imp_connect_data.ocd_connect_flags;
+ CDEBUG(D_SUPER, "Changing connect_flags: "LPX64" -> "LPX64"\n",
+ lco->lco_flags, flags);
+ mutex_lock(&lco->lco_lock);
+ lco->lco_flags &= flags;
+ /* for each osc event update ea size */
+ if (lco->lco_dt_exp)
+ cl_init_ea_size(lco->lco_md_exp, lco->lco_dt_exp);
+
+ mutex_unlock(&lco->lco_lock);
+ result = 0;
+ } else {
+ CERROR("unexpected notification from %s %s!\n",
+ watched->obd_type->typ_name,
+ watched->obd_name);
+ result = -EINVAL;
+ }
+ RETURN(result);
+}
+
+#define GROUPLOCK_SCOPE "grouplock"
+
+int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
+ struct ccc_grouplock *cg)
+{
+ struct lu_env *env;
+ struct cl_io *io;
+ struct cl_lock *lock;
+ struct cl_lock_descr *descr;
+ __u32 enqflags;
+ int refcheck;
+ int rc;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
+
+ io = ccc_env_thread_io(env);
+ io->ci_obj = obj;
+ io->ci_ignore_layout = 1;
+
+ rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+ if (rc) {
+ LASSERT(rc < 0);
+ cl_env_put(env, &refcheck);
+ return rc;
+ }
+
+ descr = &ccc_env_info(env)->cti_descr;
+ descr->cld_obj = obj;
+ descr->cld_start = 0;
+ descr->cld_end = CL_PAGE_EOF;
+ descr->cld_gid = gid;
+ descr->cld_mode = CLM_GROUP;
+
+ enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0);
+ descr->cld_enq_flags = enqflags;
+
+ lock = cl_lock_request(env, io, descr, GROUPLOCK_SCOPE, current);
+ if (IS_ERR(lock)) {
+ cl_io_fini(env, io);
+ cl_env_put(env, &refcheck);
+ return PTR_ERR(lock);
+ }
+
+ cg->cg_env = cl_env_get(&refcheck);
+ cg->cg_io = io;
+ cg->cg_lock = lock;
+ cg->cg_gid = gid;
+ LASSERT(cg->cg_env == env);
+
+ cl_env_unplant(env, &refcheck);
+ return 0;
+}
+
+void cl_put_grouplock(struct ccc_grouplock *cg)
+{
+ struct lu_env *env = cg->cg_env;
+ struct cl_io *io = cg->cg_io;
+ struct cl_lock *lock = cg->cg_lock;
+ int refcheck;
+
+ LASSERT(cg->cg_env);
+ LASSERT(cg->cg_gid);
+
+ cl_env_implant(env, &refcheck);
+ cl_env_put(env, &refcheck);
+
+ cl_unuse(env, lock);
+ cl_lock_release(env, lock, GROUPLOCK_SCOPE, current);
+ cl_io_fini(env, io);
+ cl_env_put(env, NULL);
+}
diff --git a/drivers/staging/lustre/lustre/ldlm/interval_tree.c b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
new file mode 100644
index 000000000000..ce90c7e3c488
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
@@ -0,0 +1,764 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ldlm/interval_tree.c
+ *
+ * Interval tree library used by ldlm extent lock code
+ *
+ * Author: Huang Wei <huangwei@clusterfs.com>
+ * Author: Jay Xiong <jinshan.xiong@sun.com>
+ */
+# include <lustre_dlm.h>
+#include <obd_support.h>
+#include <interval_tree.h>
+
+enum {
+ INTERVAL_RED = 0,
+ INTERVAL_BLACK = 1
+};
+
+static inline int node_is_left_child(struct interval_node *node)
+{
+ LASSERT(node->in_parent != NULL);
+ return node == node->in_parent->in_left;
+}
+
+static inline int node_is_right_child(struct interval_node *node)
+{
+ LASSERT(node->in_parent != NULL);
+ return node == node->in_parent->in_right;
+}
+
+static inline int node_is_red(struct interval_node *node)
+{
+ return node->in_color == INTERVAL_RED;
+}
+
+static inline int node_is_black(struct interval_node *node)
+{
+ return node->in_color == INTERVAL_BLACK;
+}
+
+static inline int extent_compare(struct interval_node_extent *e1,
+ struct interval_node_extent *e2)
+{
+ int rc;
+ if (e1->start == e2->start) {
+ if (e1->end < e2->end)
+ rc = -1;
+ else if (e1->end > e2->end)
+ rc = 1;
+ else
+ rc = 0;
+ } else {
+ if (e1->start < e2->start)
+ rc = -1;
+ else
+ rc = 1;
+ }
+ return rc;
+}
+
+static inline int extent_equal(struct interval_node_extent *e1,
+ struct interval_node_extent *e2)
+{
+ return (e1->start == e2->start) && (e1->end == e2->end);
+}
+
+static inline int extent_overlapped(struct interval_node_extent *e1,
+ struct interval_node_extent *e2)
+{
+ return (e1->start <= e2->end) && (e2->start <= e1->end);
+}
+
+static inline int node_compare(struct interval_node *n1,
+ struct interval_node *n2)
+{
+ return extent_compare(&n1->in_extent, &n2->in_extent);
+}
+
+static inline int node_equal(struct interval_node *n1,
+ struct interval_node *n2)
+{
+ return extent_equal(&n1->in_extent, &n2->in_extent);
+}
+
+static inline __u64 max_u64(__u64 x, __u64 y)
+{
+ return x > y ? x : y;
+}
+
+static inline __u64 min_u64(__u64 x, __u64 y)
+{
+ return x < y ? x : y;
+}
+
+#define interval_for_each(node, root) \
+for (node = interval_first(root); node != NULL; \
+ node = interval_next(node))
+
+#define interval_for_each_reverse(node, root) \
+for (node = interval_last(root); node != NULL; \
+ node = interval_prev(node))
+
+static struct interval_node *interval_first(struct interval_node *node)
+{
+ ENTRY;
+
+ if (!node)
+ RETURN(NULL);
+ while (node->in_left)
+ node = node->in_left;
+ RETURN(node);
+}
+
+static struct interval_node *interval_last(struct interval_node *node)
+{
+ ENTRY;
+
+ if (!node)
+ RETURN(NULL);
+ while (node->in_right)
+ node = node->in_right;
+ RETURN(node);
+}
+
+static struct interval_node *interval_next(struct interval_node *node)
+{
+ ENTRY;
+
+ if (!node)
+ RETURN(NULL);
+ if (node->in_right)
+ RETURN(interval_first(node->in_right));
+ while (node->in_parent && node_is_right_child(node))
+ node = node->in_parent;
+ RETURN(node->in_parent);
+}
+
+static struct interval_node *interval_prev(struct interval_node *node)
+{
+ ENTRY;
+
+ if (!node)
+ RETURN(NULL);
+
+ if (node->in_left)
+ RETURN(interval_last(node->in_left));
+
+ while (node->in_parent && node_is_left_child(node))
+ node = node->in_parent;
+
+ RETURN(node->in_parent);
+}
+
+enum interval_iter interval_iterate(struct interval_node *root,
+ interval_callback_t func,
+ void *data)
+{
+ struct interval_node *node;
+ enum interval_iter rc = INTERVAL_ITER_CONT;
+ ENTRY;
+
+ interval_for_each(node, root) {
+ rc = func(node, data);
+ if (rc == INTERVAL_ITER_STOP)
+ break;
+ }
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(interval_iterate);
+
+enum interval_iter interval_iterate_reverse(struct interval_node *root,
+ interval_callback_t func,
+ void *data)
+{
+ struct interval_node *node;
+ enum interval_iter rc = INTERVAL_ITER_CONT;
+ ENTRY;
+
+ interval_for_each_reverse(node, root) {
+ rc = func(node, data);
+ if (rc == INTERVAL_ITER_STOP)
+ break;
+ }
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(interval_iterate_reverse);
+
+/* try to find a node with same interval in the tree,
+ * if found, return the pointer to the node, otherwise return NULL*/
+struct interval_node *interval_find(struct interval_node *root,
+ struct interval_node_extent *ex)
+{
+ struct interval_node *walk = root;
+ int rc;
+ ENTRY;
+
+ while (walk) {
+ rc = extent_compare(ex, &walk->in_extent);
+ if (rc == 0)
+ break;
+ else if (rc < 0)
+ walk = walk->in_left;
+ else
+ walk = walk->in_right;
+ }
+
+ RETURN(walk);
+}
+EXPORT_SYMBOL(interval_find);
+
+static void __rotate_change_maxhigh(struct interval_node *node,
+ struct interval_node *rotate)
+{
+ __u64 left_max, right_max;
+
+ rotate->in_max_high = node->in_max_high;
+ left_max = node->in_left ? node->in_left->in_max_high : 0;
+ right_max = node->in_right ? node->in_right->in_max_high : 0;
+ node->in_max_high = max_u64(interval_high(node),
+ max_u64(left_max,right_max));
+}
+
+/* The left rotation "pivots" around the link from node to node->right, and
+ * - node will be linked to node->right's left child, and
+ * - node->right's left child will be linked to node's right child. */
+static void __rotate_left(struct interval_node *node,
+ struct interval_node **root)
+{
+ struct interval_node *right = node->in_right;
+ struct interval_node *parent = node->in_parent;
+
+ node->in_right = right->in_left;
+ if (node->in_right)
+ right->in_left->in_parent = node;
+
+ right->in_left = node;
+ right->in_parent = parent;
+ if (parent) {
+ if (node_is_left_child(node))
+ parent->in_left = right;
+ else
+ parent->in_right = right;
+ } else {
+ *root = right;
+ }
+ node->in_parent = right;
+
+ /* update max_high for node and right */
+ __rotate_change_maxhigh(node, right);
+}
+
+/* The right rotation "pivots" around the link from node to node->left, and
+ * - node will be linked to node->left's right child, and
+ * - node->left's right child will be linked to node's left child. */
+static void __rotate_right(struct interval_node *node,
+ struct interval_node **root)
+{
+ struct interval_node *left = node->in_left;
+ struct interval_node *parent = node->in_parent;
+
+ node->in_left = left->in_right;
+ if (node->in_left)
+ left->in_right->in_parent = node;
+ left->in_right = node;
+
+ left->in_parent = parent;
+ if (parent) {
+ if (node_is_right_child(node))
+ parent->in_right = left;
+ else
+ parent->in_left = left;
+ } else {
+ *root = left;
+ }
+ node->in_parent = left;
+
+ /* update max_high for node and left */
+ __rotate_change_maxhigh(node, left);
+}
+
+#define interval_swap(a, b) do { \
+ struct interval_node *c = a; a = b; b = c; \
+} while (0)
+
+/*
+ * Operations INSERT and DELETE, when run on a tree with n keys,
+ * take O(logN) time.Because they modify the tree, the result
+ * may violate the red-black properties.To restore these properties,
+ * we must change the colors of some of the nodes in the tree
+ * and also change the pointer structure.
+ */
+static void interval_insert_color(struct interval_node *node,
+ struct interval_node **root)
+{
+ struct interval_node *parent, *gparent;
+ ENTRY;
+
+ while ((parent = node->in_parent) && node_is_red(parent)) {
+ gparent = parent->in_parent;
+ /* Parent is RED, so gparent must not be NULL */
+ if (node_is_left_child(parent)) {
+ struct interval_node *uncle;
+ uncle = gparent->in_right;
+ if (uncle && node_is_red(uncle)) {
+ uncle->in_color = INTERVAL_BLACK;
+ parent->in_color = INTERVAL_BLACK;
+ gparent->in_color = INTERVAL_RED;
+ node = gparent;
+ continue;
+ }
+
+ if (parent->in_right == node) {
+ __rotate_left(parent, root);
+ interval_swap(node, parent);
+ }
+
+ parent->in_color = INTERVAL_BLACK;
+ gparent->in_color = INTERVAL_RED;
+ __rotate_right(gparent, root);
+ } else {
+ struct interval_node *uncle;
+ uncle = gparent->in_left;
+ if (uncle && node_is_red(uncle)) {
+ uncle->in_color = INTERVAL_BLACK;
+ parent->in_color = INTERVAL_BLACK;
+ gparent->in_color = INTERVAL_RED;
+ node = gparent;
+ continue;
+ }
+
+ if (node_is_left_child(node)) {
+ __rotate_right(parent, root);
+ interval_swap(node, parent);
+ }
+
+ parent->in_color = INTERVAL_BLACK;
+ gparent->in_color = INTERVAL_RED;
+ __rotate_left(gparent, root);
+ }
+ }
+
+ (*root)->in_color = INTERVAL_BLACK;
+ EXIT;
+}
+
+struct interval_node *interval_insert(struct interval_node *node,
+ struct interval_node **root)
+
+{
+ struct interval_node **p, *parent = NULL;
+ ENTRY;
+
+ LASSERT(!interval_is_intree(node));
+ p = root;
+ while (*p) {
+ parent = *p;
+ if (node_equal(parent, node))
+ RETURN(parent);
+
+ /* max_high field must be updated after each iteration */
+ if (parent->in_max_high < interval_high(node))
+ parent->in_max_high = interval_high(node);
+
+ if (node_compare(node, parent) < 0)
+ p = &parent->in_left;
+ else
+ p = &parent->in_right;
+ }
+
+ /* link node into the tree */
+ node->in_parent = parent;
+ node->in_color = INTERVAL_RED;
+ node->in_left = node->in_right = NULL;
+ *p = node;
+
+ interval_insert_color(node, root);
+ node->in_intree = 1;
+
+ RETURN(NULL);
+}
+EXPORT_SYMBOL(interval_insert);
+
+static inline int node_is_black_or_0(struct interval_node *node)
+{
+ return !node || node_is_black(node);
+}
+
+static void interval_erase_color(struct interval_node *node,
+ struct interval_node *parent,
+ struct interval_node **root)
+{
+ struct interval_node *tmp;
+ ENTRY;
+
+ while (node_is_black_or_0(node) && node != *root) {
+ if (parent->in_left == node) {
+ tmp = parent->in_right;
+ if (node_is_red(tmp)) {
+ tmp->in_color = INTERVAL_BLACK;
+ parent->in_color = INTERVAL_RED;
+ __rotate_left(parent, root);
+ tmp = parent->in_right;
+ }
+ if (node_is_black_or_0(tmp->in_left) &&
+ node_is_black_or_0(tmp->in_right)) {
+ tmp->in_color = INTERVAL_RED;
+ node = parent;
+ parent = node->in_parent;
+ } else {
+ if (node_is_black_or_0(tmp->in_right)) {
+ struct interval_node *o_left;
+ if ((o_left = tmp->in_left))
+ o_left->in_color = INTERVAL_BLACK;
+ tmp->in_color = INTERVAL_RED;
+ __rotate_right(tmp, root);
+ tmp = parent->in_right;
+ }
+ tmp->in_color = parent->in_color;
+ parent->in_color = INTERVAL_BLACK;
+ if (tmp->in_right)
+ tmp->in_right->in_color = INTERVAL_BLACK;
+ __rotate_left(parent, root);
+ node = *root;
+ break;
+ }
+ } else {
+ tmp = parent->in_left;
+ if (node_is_red(tmp)) {
+ tmp->in_color = INTERVAL_BLACK;
+ parent->in_color = INTERVAL_RED;
+ __rotate_right(parent, root);
+ tmp = parent->in_left;
+ }
+ if (node_is_black_or_0(tmp->in_left) &&
+ node_is_black_or_0(tmp->in_right)) {
+ tmp->in_color = INTERVAL_RED;
+ node = parent;
+ parent = node->in_parent;
+ } else {
+ if (node_is_black_or_0(tmp->in_left)) {
+ struct interval_node *o_right;
+ if ((o_right = tmp->in_right))
+ o_right->in_color = INTERVAL_BLACK;
+ tmp->in_color = INTERVAL_RED;
+ __rotate_left(tmp, root);
+ tmp = parent->in_left;
+ }
+ tmp->in_color = parent->in_color;
+ parent->in_color = INTERVAL_BLACK;
+ if (tmp->in_left)
+ tmp->in_left->in_color = INTERVAL_BLACK;
+ __rotate_right(parent, root);
+ node = *root;
+ break;
+ }
+ }
+ }
+ if (node)
+ node->in_color = INTERVAL_BLACK;
+ EXIT;
+}
+
+/*
+ * if the @max_high value of @node is changed, this function traverse a path
+ * from node up to the root to update max_high for the whole tree.
+ */
+static void update_maxhigh(struct interval_node *node,
+ __u64 old_maxhigh)
+{
+ __u64 left_max, right_max;
+ ENTRY;
+
+ while (node) {
+ left_max = node->in_left ? node->in_left->in_max_high : 0;
+ right_max = node->in_right ? node->in_right->in_max_high : 0;
+ node->in_max_high = max_u64(interval_high(node),
+ max_u64(left_max, right_max));
+
+ if (node->in_max_high >= old_maxhigh)
+ break;
+ node = node->in_parent;
+ }
+ EXIT;
+}
+
+void interval_erase(struct interval_node *node,
+ struct interval_node **root)
+{
+ struct interval_node *child, *parent;
+ int color;
+ ENTRY;
+
+ LASSERT(interval_is_intree(node));
+ node->in_intree = 0;
+ if (!node->in_left) {
+ child = node->in_right;
+ } else if (!node->in_right) {
+ child = node->in_left;
+ } else { /* Both left and right child are not NULL */
+ struct interval_node *old = node;
+
+ node = interval_next(node);
+ child = node->in_right;
+ parent = node->in_parent;
+ color = node->in_color;
+
+ if (child)
+ child->in_parent = parent;
+ if (parent == old)
+ parent->in_right = child;
+ else
+ parent->in_left = child;
+
+ node->in_color = old->in_color;
+ node->in_right = old->in_right;
+ node->in_left = old->in_left;
+ node->in_parent = old->in_parent;
+
+ if (old->in_parent) {
+ if (node_is_left_child(old))
+ old->in_parent->in_left = node;
+ else
+ old->in_parent->in_right = node;
+ } else {
+ *root = node;
+ }
+
+ old->in_left->in_parent = node;
+ if (old->in_right)
+ old->in_right->in_parent = node;
+ update_maxhigh(child ? : parent, node->in_max_high);
+ update_maxhigh(node, old->in_max_high);
+ if (parent == old)
+ parent = node;
+ goto color;
+ }
+ parent = node->in_parent;
+ color = node->in_color;
+
+ if (child)
+ child->in_parent = parent;
+ if (parent) {
+ if (node_is_left_child(node))
+ parent->in_left = child;
+ else
+ parent->in_right = child;
+ } else {
+ *root = child;
+ }
+
+ update_maxhigh(child ? : parent, node->in_max_high);
+
+color:
+ if (color == INTERVAL_BLACK)
+ interval_erase_color(child, parent, root);
+ EXIT;
+}
+EXPORT_SYMBOL(interval_erase);
+
+static inline int interval_may_overlap(struct interval_node *node,
+ struct interval_node_extent *ext)
+{
+ return (ext->start <= node->in_max_high &&
+ ext->end >= interval_low(node));
+}
+
+/*
+ * This function finds all intervals that overlap interval ext,
+ * and calls func to handle resulted intervals one by one.
+ * in lustre, this function will find all conflicting locks in
+ * the granted queue and add these locks to the ast work list.
+ *
+ * {
+ * if (node == NULL)
+ * return 0;
+ * if (ext->end < interval_low(node)) {
+ * interval_search(node->in_left, ext, func, data);
+ * } else if (interval_may_overlap(node, ext)) {
+ * if (extent_overlapped(ext, &node->in_extent))
+ * func(node, data);
+ * interval_search(node->in_left, ext, func, data);
+ * interval_search(node->in_right, ext, func, data);
+ * }
+ * return 0;
+ * }
+ *
+ */
+enum interval_iter interval_search(struct interval_node *node,
+ struct interval_node_extent *ext,
+ interval_callback_t func,
+ void *data)
+{
+ struct interval_node *parent;
+ enum interval_iter rc = INTERVAL_ITER_CONT;
+
+ LASSERT(ext != NULL);
+ LASSERT(func != NULL);
+
+ while (node) {
+ if (ext->end < interval_low(node)) {
+ if (node->in_left) {
+ node = node->in_left;
+ continue;
+ }
+ } else if (interval_may_overlap(node, ext)) {
+ if (extent_overlapped(ext, &node->in_extent)) {
+ rc = func(node, data);
+ if (rc == INTERVAL_ITER_STOP)
+ break;
+ }
+
+ if (node->in_left) {
+ node = node->in_left;
+ continue;
+ }
+ if (node->in_right) {
+ node = node->in_right;
+ continue;
+ }
+ }
+
+ parent = node->in_parent;
+ while (parent) {
+ if (node_is_left_child(node) &&
+ parent->in_right) {
+ /* If we ever got the left, it means that the
+ * parent met ext->end<interval_low(parent), or
+ * may_overlap(parent). If the former is true,
+ * we needn't go back. So stop early and check
+ * may_overlap(parent) after this loop. */
+ node = parent->in_right;
+ break;
+ }
+ node = parent;
+ parent = parent->in_parent;
+ }
+ if (parent == NULL || !interval_may_overlap(parent, ext))
+ break;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(interval_search);
+
+static enum interval_iter interval_overlap_cb(struct interval_node *n,
+ void *args)
+{
+ *(int *)args = 1;
+ return INTERVAL_ITER_STOP;
+}
+
+int interval_is_overlapped(struct interval_node *root,
+ struct interval_node_extent *ext)
+{
+ int has = 0;
+ (void)interval_search(root, ext, interval_overlap_cb, &has);
+ return has;
+}
+EXPORT_SYMBOL(interval_is_overlapped);
+
+/* Don't expand to low. Expanding downwards is expensive, and meaningless to
+ * some extents, because programs seldom do IO backward.
+ *
+ * The recursive algorithm of expanding low:
+ * expand_low {
+ * struct interval_node *tmp;
+ * static __u64 res = 0;
+ *
+ * if (root == NULL)
+ * return res;
+ * if (root->in_max_high < low) {
+ * res = max_u64(root->in_max_high + 1, res);
+ * return res;
+ * } else if (low < interval_low(root)) {
+ * interval_expand_low(root->in_left, low);
+ * return res;
+ * }
+ *
+ * if (interval_high(root) < low)
+ * res = max_u64(interval_high(root) + 1, res);
+ * interval_expand_low(root->in_left, low);
+ * interval_expand_low(root->in_right, low);
+ *
+ * return res;
+ * }
+ *
+ * It's much easy to eliminate the recursion, see interval_search for
+ * an example. -jay
+ */
+static inline __u64 interval_expand_low(struct interval_node *root, __u64 low)
+{
+ /* we only concern the empty tree right now. */
+ if (root == NULL)
+ return 0;
+ return low;
+}
+
+static inline __u64 interval_expand_high(struct interval_node *node, __u64 high)
+{
+ __u64 result = ~0;
+
+ while (node != NULL) {
+ if (node->in_max_high < high)
+ break;
+
+ if (interval_low(node) > high) {
+ result = interval_low(node) - 1;
+ node = node->in_left;
+ } else {
+ node = node->in_right;
+ }
+ }
+
+ return result;
+}
+
+/* expanding the extent based on @ext. */
+void interval_expand(struct interval_node *root,
+ struct interval_node_extent *ext,
+ struct interval_node_extent *limiter)
+{
+ /* The assertion of interval_is_overlapped is expensive because we may
+ * travel many nodes to find the overlapped node. */
+ LASSERT(interval_is_overlapped(root, ext) == 0);
+ if (!limiter || limiter->start < ext->start)
+ ext->start = interval_expand_low(root, ext->start);
+ if (!limiter || limiter->end > ext->end)
+ ext->end = interval_expand_high(root, ext->end);
+ LASSERT(interval_is_overlapped(root, ext) == 0);
+}
+EXPORT_SYMBOL(interval_expand);
diff --git a/drivers/staging/lustre/lustre/ldlm/l_lock.c b/drivers/staging/lustre/lustre/ldlm/l_lock.c
new file mode 100644
index 000000000000..853409aa945d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ldlm/l_lock.c
@@ -0,0 +1,76 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LDLM
+#include <linux/libcfs/libcfs.h>
+
+#include <lustre_dlm.h>
+#include <lustre_lib.h>
+
+/**
+ * Lock a lock and its resource.
+ *
+ * LDLM locking uses resource to serialize access to locks
+ * but there is a case when we change resource of lock upon
+ * enqueue reply. We rely on lock->l_resource = new_res
+ * being an atomic operation.
+ */
+struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock)
+{
+ /* on server-side resource of lock doesn't change */
+ if (!lock->l_ns_srv)
+ spin_lock(&lock->l_lock);
+
+ lock_res(lock->l_resource);
+
+ lock->l_res_locked = 1;
+ return lock->l_resource;
+}
+EXPORT_SYMBOL(lock_res_and_lock);
+
+/**
+ * Unlock a lock and its resource previously locked with lock_res_and_lock
+ */
+void unlock_res_and_lock(struct ldlm_lock *lock)
+{
+ /* on server-side resource of lock doesn't change */
+ lock->l_res_locked = 0;
+
+ unlock_res(lock->l_resource);
+ if (!lock->l_ns_srv)
+ spin_unlock(&lock->l_lock);
+}
+EXPORT_SYMBOL(unlock_res_and_lock);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
new file mode 100644
index 000000000000..f7432f78e396
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
@@ -0,0 +1,242 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ldlm/ldlm_extent.c
+ *
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ */
+
+/**
+ * This file contains implementation of EXTENT lock type
+ *
+ * EXTENT lock type is for locking a contiguous range of values, represented
+ * by 64-bit starting and ending offsets (inclusive). There are several extent
+ * lock modes, some of which may be mutually incompatible. Extent locks are
+ * considered incompatible if their modes are incompatible and their extents
+ * intersect. See the lock mode compatibility matrix in lustre_dlm.h.
+ */
+
+#define DEBUG_SUBSYSTEM S_LDLM
+# include <linux/libcfs/libcfs.h>
+
+#include <lustre_dlm.h>
+#include <obd_support.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_lib.h>
+
+#include "ldlm_internal.h"
+
+
+/* When a lock is cancelled by a client, the KMS may undergo change if this
+ * is the "highest lock". This function returns the new KMS value.
+ * Caller must hold lr_lock already.
+ *
+ * NB: A lock on [x,y] protects a KMS of up to y + 1 bytes! */
+__u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms)
+{
+ struct ldlm_resource *res = lock->l_resource;
+ struct list_head *tmp;
+ struct ldlm_lock *lck;
+ __u64 kms = 0;
+ ENTRY;
+
+ /* don't let another thread in ldlm_extent_shift_kms race in
+ * just after we finish and take our lock into account in its
+ * calculation of the kms */
+ lock->l_flags |= LDLM_FL_KMS_IGNORE;
+
+ list_for_each(tmp, &res->lr_granted) {
+ lck = list_entry(tmp, struct ldlm_lock, l_res_link);
+
+ if (lck->l_flags & LDLM_FL_KMS_IGNORE)
+ continue;
+
+ if (lck->l_policy_data.l_extent.end >= old_kms)
+ RETURN(old_kms);
+
+ /* This extent _has_ to be smaller than old_kms (checked above)
+ * so kms can only ever be smaller or the same as old_kms. */
+ if (lck->l_policy_data.l_extent.end + 1 > kms)
+ kms = lck->l_policy_data.l_extent.end + 1;
+ }
+ LASSERTF(kms <= old_kms, "kms "LPU64" old_kms "LPU64"\n", kms, old_kms);
+
+ RETURN(kms);
+}
+EXPORT_SYMBOL(ldlm_extent_shift_kms);
+
+struct kmem_cache *ldlm_interval_slab;
+struct ldlm_interval *ldlm_interval_alloc(struct ldlm_lock *lock)
+{
+ struct ldlm_interval *node;
+ ENTRY;
+
+ LASSERT(lock->l_resource->lr_type == LDLM_EXTENT);
+ OBD_SLAB_ALLOC_PTR_GFP(node, ldlm_interval_slab, __GFP_IO);
+ if (node == NULL)
+ RETURN(NULL);
+
+ INIT_LIST_HEAD(&node->li_group);
+ ldlm_interval_attach(node, lock);
+ RETURN(node);
+}
+
+void ldlm_interval_free(struct ldlm_interval *node)
+{
+ if (node) {
+ LASSERT(list_empty(&node->li_group));
+ LASSERT(!interval_is_intree(&node->li_node));
+ OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
+ }
+}
+
+/* interval tree, for LDLM_EXTENT. */
+void ldlm_interval_attach(struct ldlm_interval *n,
+ struct ldlm_lock *l)
+{
+ LASSERT(l->l_tree_node == NULL);
+ LASSERT(l->l_resource->lr_type == LDLM_EXTENT);
+
+ list_add_tail(&l->l_sl_policy, &n->li_group);
+ l->l_tree_node = n;
+}
+
+struct ldlm_interval *ldlm_interval_detach(struct ldlm_lock *l)
+{
+ struct ldlm_interval *n = l->l_tree_node;
+
+ if (n == NULL)
+ return NULL;
+
+ LASSERT(!list_empty(&n->li_group));
+ l->l_tree_node = NULL;
+ list_del_init(&l->l_sl_policy);
+
+ return (list_empty(&n->li_group) ? n : NULL);
+}
+
+static inline int lock_mode_to_index(ldlm_mode_t mode)
+{
+ int index;
+
+ LASSERT(mode != 0);
+ LASSERT(IS_PO2(mode));
+ for (index = -1; mode; index++, mode >>= 1) ;
+ LASSERT(index < LCK_MODE_NUM);
+ return index;
+}
+
+/** Add newly granted lock into interval tree for the resource. */
+void ldlm_extent_add_lock(struct ldlm_resource *res,
+ struct ldlm_lock *lock)
+{
+ struct interval_node *found, **root;
+ struct ldlm_interval *node;
+ struct ldlm_extent *extent;
+ int idx;
+
+ LASSERT(lock->l_granted_mode == lock->l_req_mode);
+
+ node = lock->l_tree_node;
+ LASSERT(node != NULL);
+ LASSERT(!interval_is_intree(&node->li_node));
+
+ idx = lock_mode_to_index(lock->l_granted_mode);
+ LASSERT(lock->l_granted_mode == 1 << idx);
+ LASSERT(lock->l_granted_mode == res->lr_itree[idx].lit_mode);
+
+ /* node extent initialize */
+ extent = &lock->l_policy_data.l_extent;
+ interval_set(&node->li_node, extent->start, extent->end);
+
+ root = &res->lr_itree[idx].lit_root;
+ found = interval_insert(&node->li_node, root);
+ if (found) { /* The policy group found. */
+ struct ldlm_interval *tmp = ldlm_interval_detach(lock);
+ LASSERT(tmp != NULL);
+ ldlm_interval_free(tmp);
+ ldlm_interval_attach(to_ldlm_interval(found), lock);
+ }
+ res->lr_itree[idx].lit_size++;
+
+ /* even though we use interval tree to manage the extent lock, we also
+ * add the locks into grant list, for debug purpose, .. */
+ ldlm_resource_add_lock(res, &res->lr_granted, lock);
+}
+
+/** Remove cancelled lock from resource interval tree. */
+void ldlm_extent_unlink_lock(struct ldlm_lock *lock)
+{
+ struct ldlm_resource *res = lock->l_resource;
+ struct ldlm_interval *node = lock->l_tree_node;
+ struct ldlm_interval_tree *tree;
+ int idx;
+
+ if (!node || !interval_is_intree(&node->li_node)) /* duplicate unlink */
+ return;
+
+ idx = lock_mode_to_index(lock->l_granted_mode);
+ LASSERT(lock->l_granted_mode == 1 << idx);
+ tree = &res->lr_itree[idx];
+
+ LASSERT(tree->lit_root != NULL); /* assure the tree is not null */
+
+ tree->lit_size--;
+ node = ldlm_interval_detach(lock);
+ if (node) {
+ interval_erase(&node->li_node, &tree->lit_root);
+ ldlm_interval_free(node);
+ }
+}
+
+void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
+ ldlm_policy_data_t *lpolicy)
+{
+ memset(lpolicy, 0, sizeof(*lpolicy));
+ lpolicy->l_extent.start = wpolicy->l_extent.start;
+ lpolicy->l_extent.end = wpolicy->l_extent.end;
+ lpolicy->l_extent.gid = wpolicy->l_extent.gid;
+}
+
+void ldlm_extent_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
+ ldlm_wire_policy_data_t *wpolicy)
+{
+ memset(wpolicy, 0, sizeof(*wpolicy));
+ wpolicy->l_extent.start = lpolicy->l_extent.start;
+ wpolicy->l_extent.end = lpolicy->l_extent.end;
+ wpolicy->l_extent.gid = lpolicy->l_extent.gid;
+}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
new file mode 100644
index 000000000000..f100a84bde73
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
@@ -0,0 +1,849 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003 Hewlett-Packard Development Company LP.
+ * Developed under the sponsorship of the US Government under
+ * Subcontract No. B514193
+ *
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+/**
+ * This file implements POSIX lock type for Lustre.
+ * Its policy properties are start and end of extent and PID.
+ *
+ * These locks are only done through MDS due to POSIX semantics requiring
+ * e.g. that locks could be only partially released and as such split into
+ * two parts, and also that two adjacent locks from the same process may be
+ * merged into a single wider lock.
+ *
+ * Lock modes are mapped like this:
+ * PR and PW for READ and WRITE locks
+ * NL to request a releasing of a portion of the lock
+ *
+ * These flock locks never timeout.
+ */
+
+#define DEBUG_SUBSYSTEM S_LDLM
+
+#include <lustre_dlm.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_lib.h>
+#include <linux/list.h>
+
+#include "ldlm_internal.h"
+
+int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
+ void *data, int flag);
+
+/**
+ * list_for_remaining_safe - iterate over the remaining entries in a list
+ * and safeguard against removal of a list entry.
+ * \param pos the &struct list_head to use as a loop counter. pos MUST
+ * have been initialized prior to using it in this macro.
+ * \param n another &struct list_head to use as temporary storage
+ * \param head the head for your list.
+ */
+#define list_for_remaining_safe(pos, n, head) \
+ for (n = pos->next; pos != (head); pos = n, n = pos->next)
+
+static inline int
+ldlm_same_flock_owner(struct ldlm_lock *lock, struct ldlm_lock *new)
+{
+ return((new->l_policy_data.l_flock.owner ==
+ lock->l_policy_data.l_flock.owner) &&
+ (new->l_export == lock->l_export));
+}
+
+static inline int
+ldlm_flocks_overlap(struct ldlm_lock *lock, struct ldlm_lock *new)
+{
+ return((new->l_policy_data.l_flock.start <=
+ lock->l_policy_data.l_flock.end) &&
+ (new->l_policy_data.l_flock.end >=
+ lock->l_policy_data.l_flock.start));
+}
+
+static inline int ldlm_flock_blocking_link(struct ldlm_lock *req,
+ struct ldlm_lock *lock)
+{
+ int rc = 0;
+
+ /* For server only */
+ if (req->l_export == NULL)
+ return 0;
+
+ if (unlikely(req->l_export->exp_flock_hash == NULL)) {
+ rc = ldlm_init_flock_export(req->l_export);
+ if (rc)
+ goto error;
+ }
+
+ LASSERT(hlist_unhashed(&req->l_exp_flock_hash));
+
+ req->l_policy_data.l_flock.blocking_owner =
+ lock->l_policy_data.l_flock.owner;
+ req->l_policy_data.l_flock.blocking_export =
+ lock->l_export;
+ req->l_policy_data.l_flock.blocking_refs = 0;
+
+ cfs_hash_add(req->l_export->exp_flock_hash,
+ &req->l_policy_data.l_flock.owner,
+ &req->l_exp_flock_hash);
+error:
+ return rc;
+}
+
+static inline void ldlm_flock_blocking_unlink(struct ldlm_lock *req)
+{
+ /* For server only */
+ if (req->l_export == NULL)
+ return;
+
+ check_res_locked(req->l_resource);
+ if (req->l_export->exp_flock_hash != NULL &&
+ !hlist_unhashed(&req->l_exp_flock_hash))
+ cfs_hash_del(req->l_export->exp_flock_hash,
+ &req->l_policy_data.l_flock.owner,
+ &req->l_exp_flock_hash);
+}
+
+static inline void
+ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, __u64 flags)
+{
+ ENTRY;
+
+ LDLM_DEBUG(lock, "ldlm_flock_destroy(mode: %d, flags: 0x%llx)",
+ mode, flags);
+
+ /* Safe to not lock here, since it should be empty anyway */
+ LASSERT(hlist_unhashed(&lock->l_exp_flock_hash));
+
+ list_del_init(&lock->l_res_link);
+ if (flags == LDLM_FL_WAIT_NOREPROC &&
+ !(lock->l_flags & LDLM_FL_FAILED)) {
+ /* client side - set a flag to prevent sending a CANCEL */
+ lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING;
+
+ /* when reaching here, it is under lock_res_and_lock(). Thus,
+ need call the nolock version of ldlm_lock_decref_internal*/
+ ldlm_lock_decref_internal_nolock(lock, mode);
+ }
+
+ ldlm_lock_destroy_nolock(lock);
+ EXIT;
+}
+
+/**
+ * POSIX locks deadlock detection code.
+ *
+ * Given a new lock \a req and an existing lock \a bl_lock it conflicts
+ * with, we need to iterate through all blocked POSIX locks for this
+ * export and see if there is a deadlock condition arising. (i.e. when
+ * one client holds a lock on something and want a lock on something
+ * else and at the same time another client has the opposite situation).
+ */
+static int
+ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock)
+{
+ struct obd_export *req_exp = req->l_export;
+ struct obd_export *bl_exp = bl_lock->l_export;
+ __u64 req_owner = req->l_policy_data.l_flock.owner;
+ __u64 bl_owner = bl_lock->l_policy_data.l_flock.owner;
+
+ /* For server only */
+ if (req_exp == NULL)
+ return 0;
+
+ class_export_get(bl_exp);
+ while (1) {
+ struct obd_export *bl_exp_new;
+ struct ldlm_lock *lock = NULL;
+ struct ldlm_flock *flock;
+
+ if (bl_exp->exp_flock_hash != NULL)
+ lock = cfs_hash_lookup(bl_exp->exp_flock_hash,
+ &bl_owner);
+ if (lock == NULL)
+ break;
+
+ flock = &lock->l_policy_data.l_flock;
+ LASSERT(flock->owner == bl_owner);
+ bl_owner = flock->blocking_owner;
+ bl_exp_new = class_export_get(flock->blocking_export);
+ class_export_put(bl_exp);
+
+ cfs_hash_put(bl_exp->exp_flock_hash, &lock->l_exp_flock_hash);
+ bl_exp = bl_exp_new;
+
+ if (bl_owner == req_owner && bl_exp == req_exp) {
+ class_export_put(bl_exp);
+ return 1;
+ }
+ }
+ class_export_put(bl_exp);
+
+ return 0;
+}
+
+/**
+ * Process a granting attempt for flock lock.
+ * Must be called under ns lock held.
+ *
+ * This function looks for any conflicts for \a lock in the granted or
+ * waiting queues. The lock is granted if no conflicts are found in
+ * either queue.
+ *
+ * It is also responsible for splitting a lock if a portion of the lock
+ * is released.
+ *
+ * If \a first_enq is 0 (ie, called from ldlm_reprocess_queue):
+ * - blocking ASTs have already been sent
+ *
+ * If \a first_enq is 1 (ie, called from ldlm_lock_enqueue):
+ * - blocking ASTs have not been sent yet, so list of conflicting locks
+ * would be collected and ASTs sent.
+ */
+int
+ldlm_process_flock_lock(struct ldlm_lock *req, __u64 *flags, int first_enq,
+ ldlm_error_t *err, struct list_head *work_list)
+{
+ struct ldlm_resource *res = req->l_resource;
+ struct ldlm_namespace *ns = ldlm_res_to_ns(res);
+ struct list_head *tmp;
+ struct list_head *ownlocks = NULL;
+ struct ldlm_lock *lock = NULL;
+ struct ldlm_lock *new = req;
+ struct ldlm_lock *new2 = NULL;
+ ldlm_mode_t mode = req->l_req_mode;
+ int local = ns_is_client(ns);
+ int added = (mode == LCK_NL);
+ int overlaps = 0;
+ int splitted = 0;
+ const struct ldlm_callback_suite null_cbs = { NULL };
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_DLMTRACE, "flags %#llx owner "LPU64" pid %u mode %u start "
+ LPU64" end "LPU64"\n", *flags,
+ new->l_policy_data.l_flock.owner,
+ new->l_policy_data.l_flock.pid, mode,
+ req->l_policy_data.l_flock.start,
+ req->l_policy_data.l_flock.end);
+
+ *err = ELDLM_OK;
+
+ if (local) {
+ /* No blocking ASTs are sent to the clients for
+ * Posix file & record locks */
+ req->l_blocking_ast = NULL;
+ } else {
+ /* Called on the server for lock cancels. */
+ req->l_blocking_ast = ldlm_flock_blocking_ast;
+ }
+
+reprocess:
+ if ((*flags == LDLM_FL_WAIT_NOREPROC) || (mode == LCK_NL)) {
+ /* This loop determines where this processes locks start
+ * in the resource lr_granted list. */
+ list_for_each(tmp, &res->lr_granted) {
+ lock = list_entry(tmp, struct ldlm_lock,
+ l_res_link);
+ if (ldlm_same_flock_owner(lock, req)) {
+ ownlocks = tmp;
+ break;
+ }
+ }
+ } else {
+ lockmode_verify(mode);
+
+ /* This loop determines if there are existing locks
+ * that conflict with the new lock request. */
+ list_for_each(tmp, &res->lr_granted) {
+ lock = list_entry(tmp, struct ldlm_lock,
+ l_res_link);
+
+ if (ldlm_same_flock_owner(lock, req)) {
+ if (!ownlocks)
+ ownlocks = tmp;
+ continue;
+ }
+
+ /* locks are compatible, overlap doesn't matter */
+ if (lockmode_compat(lock->l_granted_mode, mode))
+ continue;
+
+ if (!ldlm_flocks_overlap(lock, req))
+ continue;
+
+ if (!first_enq)
+ RETURN(LDLM_ITER_CONTINUE);
+
+ if (*flags & LDLM_FL_BLOCK_NOWAIT) {
+ ldlm_flock_destroy(req, mode, *flags);
+ *err = -EAGAIN;
+ RETURN(LDLM_ITER_STOP);
+ }
+
+ if (*flags & LDLM_FL_TEST_LOCK) {
+ ldlm_flock_destroy(req, mode, *flags);
+ req->l_req_mode = lock->l_granted_mode;
+ req->l_policy_data.l_flock.pid =
+ lock->l_policy_data.l_flock.pid;
+ req->l_policy_data.l_flock.start =
+ lock->l_policy_data.l_flock.start;
+ req->l_policy_data.l_flock.end =
+ lock->l_policy_data.l_flock.end;
+ *flags |= LDLM_FL_LOCK_CHANGED;
+ RETURN(LDLM_ITER_STOP);
+ }
+
+ if (ldlm_flock_deadlock(req, lock)) {
+ ldlm_flock_destroy(req, mode, *flags);
+ *err = -EDEADLK;
+ RETURN(LDLM_ITER_STOP);
+ }
+
+ rc = ldlm_flock_blocking_link(req, lock);
+ if (rc) {
+ ldlm_flock_destroy(req, mode, *flags);
+ *err = rc;
+ RETURN(LDLM_ITER_STOP);
+ }
+ ldlm_resource_add_lock(res, &res->lr_waiting, req);
+ *flags |= LDLM_FL_BLOCK_GRANTED;
+ RETURN(LDLM_ITER_STOP);
+ }
+ }
+
+ if (*flags & LDLM_FL_TEST_LOCK) {
+ ldlm_flock_destroy(req, mode, *flags);
+ req->l_req_mode = LCK_NL;
+ *flags |= LDLM_FL_LOCK_CHANGED;
+ RETURN(LDLM_ITER_STOP);
+ }
+
+ /* In case we had slept on this lock request take it off of the
+ * deadlock detection hash list. */
+ ldlm_flock_blocking_unlink(req);
+
+ /* Scan the locks owned by this process that overlap this request.
+ * We may have to merge or split existing locks. */
+
+ if (!ownlocks)
+ ownlocks = &res->lr_granted;
+
+ list_for_remaining_safe(ownlocks, tmp, &res->lr_granted) {
+ lock = list_entry(ownlocks, struct ldlm_lock, l_res_link);
+
+ if (!ldlm_same_flock_owner(lock, new))
+ break;
+
+ if (lock->l_granted_mode == mode) {
+ /* If the modes are the same then we need to process
+ * locks that overlap OR adjoin the new lock. The extra
+ * logic condition is necessary to deal with arithmetic
+ * overflow and underflow. */
+ if ((new->l_policy_data.l_flock.start >
+ (lock->l_policy_data.l_flock.end + 1))
+ && (lock->l_policy_data.l_flock.end !=
+ OBD_OBJECT_EOF))
+ continue;
+
+ if ((new->l_policy_data.l_flock.end <
+ (lock->l_policy_data.l_flock.start - 1))
+ && (lock->l_policy_data.l_flock.start != 0))
+ break;
+
+ if (new->l_policy_data.l_flock.start <
+ lock->l_policy_data.l_flock.start) {
+ lock->l_policy_data.l_flock.start =
+ new->l_policy_data.l_flock.start;
+ } else {
+ new->l_policy_data.l_flock.start =
+ lock->l_policy_data.l_flock.start;
+ }
+
+ if (new->l_policy_data.l_flock.end >
+ lock->l_policy_data.l_flock.end) {
+ lock->l_policy_data.l_flock.end =
+ new->l_policy_data.l_flock.end;
+ } else {
+ new->l_policy_data.l_flock.end =
+ lock->l_policy_data.l_flock.end;
+ }
+
+ if (added) {
+ ldlm_flock_destroy(lock, mode, *flags);
+ } else {
+ new = lock;
+ added = 1;
+ }
+ continue;
+ }
+
+ if (new->l_policy_data.l_flock.start >
+ lock->l_policy_data.l_flock.end)
+ continue;
+
+ if (new->l_policy_data.l_flock.end <
+ lock->l_policy_data.l_flock.start)
+ break;
+
+ ++overlaps;
+
+ if (new->l_policy_data.l_flock.start <=
+ lock->l_policy_data.l_flock.start) {
+ if (new->l_policy_data.l_flock.end <
+ lock->l_policy_data.l_flock.end) {
+ lock->l_policy_data.l_flock.start =
+ new->l_policy_data.l_flock.end + 1;
+ break;
+ }
+ ldlm_flock_destroy(lock, lock->l_req_mode, *flags);
+ continue;
+ }
+ if (new->l_policy_data.l_flock.end >=
+ lock->l_policy_data.l_flock.end) {
+ lock->l_policy_data.l_flock.end =
+ new->l_policy_data.l_flock.start - 1;
+ continue;
+ }
+
+ /* split the existing lock into two locks */
+
+ /* if this is an F_UNLCK operation then we could avoid
+ * allocating a new lock and use the req lock passed in
+ * with the request but this would complicate the reply
+ * processing since updates to req get reflected in the
+ * reply. The client side replays the lock request so
+ * it must see the original lock data in the reply. */
+
+ /* XXX - if ldlm_lock_new() can sleep we should
+ * release the lr_lock, allocate the new lock,
+ * and restart processing this lock. */
+ if (!new2) {
+ unlock_res_and_lock(req);
+ new2 = ldlm_lock_create(ns, &res->lr_name, LDLM_FLOCK,
+ lock->l_granted_mode, &null_cbs,
+ NULL, 0, LVB_T_NONE);
+ lock_res_and_lock(req);
+ if (!new2) {
+ ldlm_flock_destroy(req, lock->l_granted_mode,
+ *flags);
+ *err = -ENOLCK;
+ RETURN(LDLM_ITER_STOP);
+ }
+ goto reprocess;
+ }
+
+ splitted = 1;
+
+ new2->l_granted_mode = lock->l_granted_mode;
+ new2->l_policy_data.l_flock.pid =
+ new->l_policy_data.l_flock.pid;
+ new2->l_policy_data.l_flock.owner =
+ new->l_policy_data.l_flock.owner;
+ new2->l_policy_data.l_flock.start =
+ lock->l_policy_data.l_flock.start;
+ new2->l_policy_data.l_flock.end =
+ new->l_policy_data.l_flock.start - 1;
+ lock->l_policy_data.l_flock.start =
+ new->l_policy_data.l_flock.end + 1;
+ new2->l_conn_export = lock->l_conn_export;
+ if (lock->l_export != NULL) {
+ new2->l_export = class_export_lock_get(lock->l_export, new2);
+ if (new2->l_export->exp_lock_hash &&
+ hlist_unhashed(&new2->l_exp_hash))
+ cfs_hash_add(new2->l_export->exp_lock_hash,
+ &new2->l_remote_handle,
+ &new2->l_exp_hash);
+ }
+ if (*flags == LDLM_FL_WAIT_NOREPROC)
+ ldlm_lock_addref_internal_nolock(new2,
+ lock->l_granted_mode);
+
+ /* insert new2 at lock */
+ ldlm_resource_add_lock(res, ownlocks, new2);
+ LDLM_LOCK_RELEASE(new2);
+ break;
+ }
+
+ /* if new2 is created but never used, destroy it*/
+ if (splitted == 0 && new2 != NULL)
+ ldlm_lock_destroy_nolock(new2);
+
+ /* At this point we're granting the lock request. */
+ req->l_granted_mode = req->l_req_mode;
+
+ /* Add req to the granted queue before calling ldlm_reprocess_all(). */
+ if (!added) {
+ list_del_init(&req->l_res_link);
+ /* insert new lock before ownlocks in list. */
+ ldlm_resource_add_lock(res, ownlocks, req);
+ }
+
+ if (*flags != LDLM_FL_WAIT_NOREPROC) {
+ /* The only one possible case for client-side calls flock
+ * policy function is ldlm_flock_completion_ast inside which
+ * carries LDLM_FL_WAIT_NOREPROC flag. */
+ CERROR("Illegal parameter for client-side-only module.\n");
+ LBUG();
+ }
+
+ /* In case we're reprocessing the requested lock we can't destroy
+ * it until after calling ldlm_add_ast_work_item() above so that laawi()
+ * can bump the reference count on \a req. Otherwise \a req
+ * could be freed before the completion AST can be sent. */
+ if (added)
+ ldlm_flock_destroy(req, mode, *flags);
+
+ ldlm_resource_dump(D_INFO, res);
+ RETURN(LDLM_ITER_CONTINUE);
+}
+
+struct ldlm_flock_wait_data {
+ struct ldlm_lock *fwd_lock;
+ int fwd_generation;
+};
+
+static void
+ldlm_flock_interrupted_wait(void *data)
+{
+ struct ldlm_lock *lock;
+ ENTRY;
+
+ lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock;
+
+ /* take lock off the deadlock detection hash list. */
+ lock_res_and_lock(lock);
+ ldlm_flock_blocking_unlink(lock);
+
+ /* client side - set flag to prevent lock from being put on LRU list */
+ lock->l_flags |= LDLM_FL_CBPENDING;
+ unlock_res_and_lock(lock);
+
+ EXIT;
+}
+
+/**
+ * Flock completion callback function.
+ *
+ * \param lock [in,out]: A lock to be handled
+ * \param flags [in]: flags
+ * \param *data [in]: ldlm_work_cp_ast_lock() will use ldlm_cb_set_arg
+ *
+ * \retval 0 : success
+ * \retval <0 : failure
+ */
+int
+ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
+{
+ struct file_lock *getlk = lock->l_ast_data;
+ struct obd_device *obd;
+ struct obd_import *imp = NULL;
+ struct ldlm_flock_wait_data fwd;
+ struct l_wait_info lwi;
+ ldlm_error_t err;
+ int rc = 0;
+ ENTRY;
+
+ CDEBUG(D_DLMTRACE, "flags: 0x%llx data: %p getlk: %p\n",
+ flags, data, getlk);
+
+ /* Import invalidation. We need to actually release the lock
+ * references being held, so that it can go away. No point in
+ * holding the lock even if app still believes it has it, since
+ * server already dropped it anyway. Only for granted locks too. */
+ if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) ==
+ (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) {
+ if (lock->l_req_mode == lock->l_granted_mode &&
+ lock->l_granted_mode != LCK_NL &&
+ NULL == data)
+ ldlm_lock_decref_internal(lock, lock->l_req_mode);
+
+ /* Need to wake up the waiter if we were evicted */
+ wake_up(&lock->l_waitq);
+ RETURN(0);
+ }
+
+ LASSERT(flags != LDLM_FL_WAIT_NOREPROC);
+
+ if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
+ LDLM_FL_BLOCK_CONV))) {
+ if (NULL == data)
+ /* mds granted the lock in the reply */
+ goto granted;
+ /* CP AST RPC: lock get granted, wake it up */
+ wake_up(&lock->l_waitq);
+ RETURN(0);
+ }
+
+ LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
+ "sleeping");
+ fwd.fwd_lock = lock;
+ obd = class_exp2obd(lock->l_conn_export);
+
+ /* if this is a local lock, there is no import */
+ if (NULL != obd)
+ imp = obd->u.cli.cl_import;
+
+ if (NULL != imp) {
+ spin_lock(&imp->imp_lock);
+ fwd.fwd_generation = imp->imp_generation;
+ spin_unlock(&imp->imp_lock);
+ }
+
+ lwi = LWI_TIMEOUT_INTR(0, NULL, ldlm_flock_interrupted_wait, &fwd);
+
+ /* Go to sleep until the lock is granted. */
+ rc = l_wait_event(lock->l_waitq, is_granted_or_cancelled(lock), &lwi);
+
+ if (rc) {
+ LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
+ rc);
+ RETURN(rc);
+ }
+
+granted:
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10);
+
+ if (lock->l_destroyed) {
+ LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
+ RETURN(0);
+ }
+
+ if (lock->l_flags & LDLM_FL_FAILED) {
+ LDLM_DEBUG(lock, "client-side enqueue waking up: failed");
+ RETURN(-EIO);
+ }
+
+ if (rc) {
+ LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
+ rc);
+ RETURN(rc);
+ }
+
+ LDLM_DEBUG(lock, "client-side enqueue granted");
+
+ lock_res_and_lock(lock);
+
+ /* take lock off the deadlock detection hash list. */
+ ldlm_flock_blocking_unlink(lock);
+
+ /* ldlm_lock_enqueue() has already placed lock on the granted list. */
+ list_del_init(&lock->l_res_link);
+
+ if (flags & LDLM_FL_TEST_LOCK) {
+ /* fcntl(F_GETLK) request */
+ /* The old mode was saved in getlk->fl_type so that if the mode
+ * in the lock changes we can decref the appropriate refcount.*/
+ ldlm_flock_destroy(lock, flock_type(getlk),
+ LDLM_FL_WAIT_NOREPROC);
+ switch (lock->l_granted_mode) {
+ case LCK_PR:
+ flock_set_type(getlk, F_RDLCK);
+ break;
+ case LCK_PW:
+ flock_set_type(getlk, F_WRLCK);
+ break;
+ default:
+ flock_set_type(getlk, F_UNLCK);
+ }
+ flock_set_pid(getlk, (pid_t)lock->l_policy_data.l_flock.pid);
+ flock_set_start(getlk,
+ (loff_t)lock->l_policy_data.l_flock.start);
+ flock_set_end(getlk,
+ (loff_t)lock->l_policy_data.l_flock.end);
+ } else {
+ __u64 noreproc = LDLM_FL_WAIT_NOREPROC;
+
+ /* We need to reprocess the lock to do merges or splits
+ * with existing locks owned by this process. */
+ ldlm_process_flock_lock(lock, &noreproc, 1, &err, NULL);
+ }
+ unlock_res_and_lock(lock);
+ RETURN(0);
+}
+EXPORT_SYMBOL(ldlm_flock_completion_ast);
+
+int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
+ void *data, int flag)
+{
+ ENTRY;
+
+ LASSERT(lock);
+ LASSERT(flag == LDLM_CB_CANCELING);
+
+ /* take lock off the deadlock detection hash list. */
+ lock_res_and_lock(lock);
+ ldlm_flock_blocking_unlink(lock);
+ unlock_res_and_lock(lock);
+ RETURN(0);
+}
+
+void ldlm_flock_policy_wire18_to_local(const ldlm_wire_policy_data_t *wpolicy,
+ ldlm_policy_data_t *lpolicy)
+{
+ memset(lpolicy, 0, sizeof(*lpolicy));
+ lpolicy->l_flock.start = wpolicy->l_flock.lfw_start;
+ lpolicy->l_flock.end = wpolicy->l_flock.lfw_end;
+ lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid;
+ /* Compat code, old clients had no idea about owner field and
+ * relied solely on pid for ownership. Introduced in LU-104, 2.1,
+ * April 2011 */
+ lpolicy->l_flock.owner = wpolicy->l_flock.lfw_pid;
+}
+
+
+void ldlm_flock_policy_wire21_to_local(const ldlm_wire_policy_data_t *wpolicy,
+ ldlm_policy_data_t *lpolicy)
+{
+ memset(lpolicy, 0, sizeof(*lpolicy));
+ lpolicy->l_flock.start = wpolicy->l_flock.lfw_start;
+ lpolicy->l_flock.end = wpolicy->l_flock.lfw_end;
+ lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid;
+ lpolicy->l_flock.owner = wpolicy->l_flock.lfw_owner;
+}
+
+void ldlm_flock_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
+ ldlm_wire_policy_data_t *wpolicy)
+{
+ memset(wpolicy, 0, sizeof(*wpolicy));
+ wpolicy->l_flock.lfw_start = lpolicy->l_flock.start;
+ wpolicy->l_flock.lfw_end = lpolicy->l_flock.end;
+ wpolicy->l_flock.lfw_pid = lpolicy->l_flock.pid;
+ wpolicy->l_flock.lfw_owner = lpolicy->l_flock.owner;
+}
+
+/*
+ * Export handle<->flock hash operations.
+ */
+static unsigned
+ldlm_export_flock_hash(cfs_hash_t *hs, const void *key, unsigned mask)
+{
+ return cfs_hash_u64_hash(*(__u64 *)key, mask);
+}
+
+static void *
+ldlm_export_flock_key(struct hlist_node *hnode)
+{
+ struct ldlm_lock *lock;
+
+ lock = hlist_entry(hnode, struct ldlm_lock, l_exp_flock_hash);
+ return &lock->l_policy_data.l_flock.owner;
+}
+
+static int
+ldlm_export_flock_keycmp(const void *key, struct hlist_node *hnode)
+{
+ return !memcmp(ldlm_export_flock_key(hnode), key, sizeof(__u64));
+}
+
+static void *
+ldlm_export_flock_object(struct hlist_node *hnode)
+{
+ return hlist_entry(hnode, struct ldlm_lock, l_exp_flock_hash);
+}
+
+static void
+ldlm_export_flock_get(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct ldlm_lock *lock;
+ struct ldlm_flock *flock;
+
+ lock = hlist_entry(hnode, struct ldlm_lock, l_exp_flock_hash);
+ LDLM_LOCK_GET(lock);
+
+ flock = &lock->l_policy_data.l_flock;
+ LASSERT(flock->blocking_export != NULL);
+ class_export_get(flock->blocking_export);
+ flock->blocking_refs++;
+}
+
+static void
+ldlm_export_flock_put(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct ldlm_lock *lock;
+ struct ldlm_flock *flock;
+
+ lock = hlist_entry(hnode, struct ldlm_lock, l_exp_flock_hash);
+ LDLM_LOCK_RELEASE(lock);
+
+ flock = &lock->l_policy_data.l_flock;
+ LASSERT(flock->blocking_export != NULL);
+ class_export_put(flock->blocking_export);
+ if (--flock->blocking_refs == 0) {
+ flock->blocking_owner = 0;
+ flock->blocking_export = NULL;
+ }
+}
+
+static cfs_hash_ops_t ldlm_export_flock_ops = {
+ .hs_hash = ldlm_export_flock_hash,
+ .hs_key = ldlm_export_flock_key,
+ .hs_keycmp = ldlm_export_flock_keycmp,
+ .hs_object = ldlm_export_flock_object,
+ .hs_get = ldlm_export_flock_get,
+ .hs_put = ldlm_export_flock_put,
+ .hs_put_locked = ldlm_export_flock_put,
+};
+
+int ldlm_init_flock_export(struct obd_export *exp)
+{
+ exp->exp_flock_hash =
+ cfs_hash_create(obd_uuid2str(&exp->exp_client_uuid),
+ HASH_EXP_LOCK_CUR_BITS,
+ HASH_EXP_LOCK_MAX_BITS,
+ HASH_EXP_LOCK_BKT_BITS, 0,
+ CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
+ &ldlm_export_flock_ops,
+ CFS_HASH_DEFAULT | CFS_HASH_NBLK_CHANGE);
+ if (!exp->exp_flock_hash)
+ RETURN(-ENOMEM);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(ldlm_init_flock_export);
+
+void ldlm_destroy_flock_export(struct obd_export *exp)
+{
+ ENTRY;
+ if (exp->exp_flock_hash) {
+ cfs_hash_putref(exp->exp_flock_hash);
+ exp->exp_flock_hash = NULL;
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(ldlm_destroy_flock_export);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
new file mode 100644
index 000000000000..574b2ff43b74
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
@@ -0,0 +1,75 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ldlm/ldlm_inodebits.c
+ *
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ */
+
+/**
+ * This file contains implementation of IBITS lock type
+ *
+ * IBITS lock type contains a bit mask determining various properties of an
+ * object. The meanings of specific bits are specific to the caller and are
+ * opaque to LDLM code.
+ *
+ * Locks with intersecting bitmasks and conflicting lock modes (e.g. LCK_PW)
+ * are considered conflicting. See the lock mode compatibility matrix
+ * in lustre_dlm.h.
+ */
+
+#define DEBUG_SUBSYSTEM S_LDLM
+
+#include <lustre_dlm.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
+
+#include "ldlm_internal.h"
+
+
+void ldlm_ibits_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
+ ldlm_policy_data_t *lpolicy)
+{
+ memset(lpolicy, 0, sizeof(*lpolicy));
+ lpolicy->l_inodebits.bits = wpolicy->l_inodebits.bits;
+}
+
+void ldlm_ibits_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
+ ldlm_wire_policy_data_t *wpolicy)
+{
+ memset(wpolicy, 0, sizeof(*wpolicy));
+ wpolicy->l_inodebits.bits = lpolicy->l_inodebits.bits;
+}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
new file mode 100644
index 000000000000..141a957462f1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
@@ -0,0 +1,276 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define MAX_STRING_SIZE 128
+
+extern atomic_t ldlm_srv_namespace_nr;
+extern atomic_t ldlm_cli_namespace_nr;
+extern struct mutex ldlm_srv_namespace_lock;
+extern struct list_head ldlm_srv_namespace_list;
+extern struct mutex ldlm_cli_namespace_lock;
+extern struct list_head ldlm_cli_namespace_list;
+
+static inline atomic_t *ldlm_namespace_nr(ldlm_side_t client)
+{
+ return client == LDLM_NAMESPACE_SERVER ?
+ &ldlm_srv_namespace_nr : &ldlm_cli_namespace_nr;
+}
+
+static inline struct list_head *ldlm_namespace_list(ldlm_side_t client)
+{
+ return client == LDLM_NAMESPACE_SERVER ?
+ &ldlm_srv_namespace_list : &ldlm_cli_namespace_list;
+}
+
+static inline struct mutex *ldlm_namespace_lock(ldlm_side_t client)
+{
+ return client == LDLM_NAMESPACE_SERVER ?
+ &ldlm_srv_namespace_lock : &ldlm_cli_namespace_lock;
+}
+
+/* ldlm_request.c */
+/* Cancel lru flag, it indicates we cancel aged locks. */
+enum {
+ LDLM_CANCEL_AGED = 1 << 0, /* Cancel aged locks (non lru resize). */
+ LDLM_CANCEL_PASSED = 1 << 1, /* Cancel passed number of locks. */
+ LDLM_CANCEL_SHRINK = 1 << 2, /* Cancel locks from shrinker. */
+ LDLM_CANCEL_LRUR = 1 << 3, /* Cancel locks from lru resize. */
+ LDLM_CANCEL_NO_WAIT = 1 << 4 /* Cancel locks w/o blocking (neither
+ * sending nor waiting for any rpcs) */
+};
+
+int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr,
+ ldlm_cancel_flags_t sync, int flags);
+int ldlm_cancel_lru_local(struct ldlm_namespace *ns,
+ struct list_head *cancels, int count, int max,
+ ldlm_cancel_flags_t cancel_flags, int flags);
+extern int ldlm_enqueue_min;
+int ldlm_get_enq_timeout(struct ldlm_lock *lock);
+
+/* ldlm_resource.c */
+int ldlm_resource_putref_locked(struct ldlm_resource *res);
+void ldlm_resource_insert_lock_after(struct ldlm_lock *original,
+ struct ldlm_lock *new);
+void ldlm_namespace_free_prior(struct ldlm_namespace *ns,
+ struct obd_import *imp, int force);
+void ldlm_namespace_free_post(struct ldlm_namespace *ns);
+/* ldlm_lock.c */
+
+struct ldlm_cb_set_arg {
+ struct ptlrpc_request_set *set;
+ int type; /* LDLM_{CP,BL,GL}_CALLBACK */
+ atomic_t restart;
+ struct list_head *list;
+ union ldlm_gl_desc *gl_desc; /* glimpse AST descriptor */
+};
+
+typedef enum {
+ LDLM_WORK_BL_AST,
+ LDLM_WORK_CP_AST,
+ LDLM_WORK_REVOKE_AST,
+ LDLM_WORK_GL_AST
+} ldlm_desc_ast_t;
+
+void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list);
+int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
+ enum req_location loc, void *data, int size);
+struct ldlm_lock *
+ldlm_lock_create(struct ldlm_namespace *ns, const struct ldlm_res_id *,
+ ldlm_type_t type, ldlm_mode_t,
+ const struct ldlm_callback_suite *cbs,
+ void *data, __u32 lvb_len, enum lvb_type lvb_type);
+ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *, struct ldlm_lock **,
+ void *cookie, __u64 *flags);
+void ldlm_lock_addref_internal(struct ldlm_lock *, __u32 mode);
+void ldlm_lock_addref_internal_nolock(struct ldlm_lock *, __u32 mode);
+void ldlm_lock_decref_internal(struct ldlm_lock *, __u32 mode);
+void ldlm_lock_decref_internal_nolock(struct ldlm_lock *, __u32 mode);
+void ldlm_add_ast_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
+ struct list_head *work_list);
+int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list,
+ ldlm_desc_ast_t ast_type);
+int ldlm_work_gl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq);
+int ldlm_lock_remove_from_lru(struct ldlm_lock *lock);
+int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock);
+void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock);
+void ldlm_lock_add_to_lru(struct ldlm_lock *lock);
+void ldlm_lock_touch_in_lru(struct ldlm_lock *lock);
+void ldlm_lock_destroy_nolock(struct ldlm_lock *lock);
+
+void ldlm_cancel_locks_for_export(struct obd_export *export);
+
+/* ldlm_lockd.c */
+int ldlm_bl_to_thread_lock(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
+ struct ldlm_lock *lock);
+int ldlm_bl_to_thread_list(struct ldlm_namespace *ns,
+ struct ldlm_lock_desc *ld,
+ struct list_head *cancels, int count,
+ ldlm_cancel_flags_t cancel_flags);
+
+void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
+ struct ldlm_lock_desc *ld, struct ldlm_lock *lock);
+
+
+/* ldlm_extent.c */
+void ldlm_extent_add_lock(struct ldlm_resource *res, struct ldlm_lock *lock);
+void ldlm_extent_unlink_lock(struct ldlm_lock *lock);
+
+/* ldlm_flock.c */
+int ldlm_process_flock_lock(struct ldlm_lock *req, __u64 *flags,
+ int first_enq, ldlm_error_t *err,
+ struct list_head *work_list);
+int ldlm_init_flock_export(struct obd_export *exp);
+void ldlm_destroy_flock_export(struct obd_export *exp);
+
+/* l_lock.c */
+void l_check_ns_lock(struct ldlm_namespace *ns);
+void l_check_no_ns_lock(struct ldlm_namespace *ns);
+
+extern proc_dir_entry_t *ldlm_svc_proc_dir;
+extern proc_dir_entry_t *ldlm_type_proc_dir;
+
+struct ldlm_state {
+ struct ptlrpc_service *ldlm_cb_service;
+ struct ptlrpc_service *ldlm_cancel_service;
+ struct ptlrpc_client *ldlm_client;
+ struct ptlrpc_connection *ldlm_server_conn;
+ struct ldlm_bl_pool *ldlm_bl_pool;
+};
+
+/* interval tree, for LDLM_EXTENT. */
+extern struct kmem_cache *ldlm_interval_slab; /* slab cache for ldlm_interval */
+extern void ldlm_interval_attach(struct ldlm_interval *n, struct ldlm_lock *l);
+extern struct ldlm_interval *ldlm_interval_detach(struct ldlm_lock *l);
+extern struct ldlm_interval *ldlm_interval_alloc(struct ldlm_lock *lock);
+extern void ldlm_interval_free(struct ldlm_interval *node);
+/* this function must be called with res lock held */
+static inline struct ldlm_extent *
+ldlm_interval_extent(struct ldlm_interval *node)
+{
+ struct ldlm_lock *lock;
+ LASSERT(!list_empty(&node->li_group));
+
+ lock = list_entry(node->li_group.next, struct ldlm_lock,
+ l_sl_policy);
+ return &lock->l_policy_data.l_extent;
+}
+
+int ldlm_init(void);
+void ldlm_exit(void);
+
+enum ldlm_policy_res {
+ LDLM_POLICY_CANCEL_LOCK,
+ LDLM_POLICY_KEEP_LOCK,
+ LDLM_POLICY_SKIP_LOCK
+};
+
+typedef enum ldlm_policy_res ldlm_policy_res_t;
+
+#define LDLM_POOL_PROC_READER_SEQ_SHOW(var, type) \
+ static int lprocfs_##var##_seq_show(struct seq_file *m, void *v) \
+ { \
+ struct ldlm_pool *pl = m->private; \
+ type tmp; \
+ \
+ spin_lock(&pl->pl_lock); \
+ tmp = pl->pl_##var; \
+ spin_unlock(&pl->pl_lock); \
+ \
+ return lprocfs_rd_uint(m, &tmp); \
+ } \
+ struct __##var##__dummy_read {;} /* semicolon catcher */
+
+#define LDLM_POOL_PROC_WRITER(var, type) \
+ int lprocfs_wr_##var(struct file *file, const char *buffer, \
+ unsigned long count, void *data) \
+ { \
+ struct ldlm_pool *pl = data; \
+ type tmp; \
+ int rc; \
+ \
+ rc = lprocfs_wr_uint(file, buffer, count, &tmp); \
+ if (rc < 0) { \
+ CERROR("Can't parse user input, rc = %d\n", rc); \
+ return rc; \
+ } \
+ \
+ spin_lock(&pl->pl_lock); \
+ pl->pl_##var = tmp; \
+ spin_unlock(&pl->pl_lock); \
+ \
+ return rc; \
+ } \
+ struct __##var##__dummy_write {;} /* semicolon catcher */
+
+static inline int is_granted_or_cancelled(struct ldlm_lock *lock)
+{
+ int ret = 0;
+
+ lock_res_and_lock(lock);
+ if (((lock->l_req_mode == lock->l_granted_mode) &&
+ !(lock->l_flags & LDLM_FL_CP_REQD)) ||
+ (lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_CANCEL)))
+ ret = 1;
+ unlock_res_and_lock(lock);
+
+ return ret;
+}
+
+typedef void (*ldlm_policy_wire_to_local_t)(const ldlm_wire_policy_data_t *,
+ ldlm_policy_data_t *);
+
+typedef void (*ldlm_policy_local_to_wire_t)(const ldlm_policy_data_t *,
+ ldlm_wire_policy_data_t *);
+
+void ldlm_plain_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
+ ldlm_policy_data_t *lpolicy);
+void ldlm_plain_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
+ ldlm_wire_policy_data_t *wpolicy);
+void ldlm_ibits_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
+ ldlm_policy_data_t *lpolicy);
+void ldlm_ibits_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
+ ldlm_wire_policy_data_t *wpolicy);
+void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
+ ldlm_policy_data_t *lpolicy);
+void ldlm_extent_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
+ ldlm_wire_policy_data_t *wpolicy);
+void ldlm_flock_policy_wire18_to_local(const ldlm_wire_policy_data_t *wpolicy,
+ ldlm_policy_data_t *lpolicy);
+void ldlm_flock_policy_wire21_to_local(const ldlm_wire_policy_data_t *wpolicy,
+ ldlm_policy_data_t *lpolicy);
+
+void ldlm_flock_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
+ ldlm_wire_policy_data_t *wpolicy);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
new file mode 100644
index 000000000000..42df53072dc3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
@@ -0,0 +1,868 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+/**
+ * This file deals with various client/target related logic including recovery.
+ *
+ * TODO: This code more logically belongs in the ptlrpc module than in ldlm and
+ * should be moved.
+ */
+
+#define DEBUG_SUBSYSTEM S_LDLM
+
+# include <linux/libcfs/libcfs.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_dlm.h>
+#include <lustre_net.h>
+#include <lustre_sec.h>
+#include "ldlm_internal.h"
+
+/* @priority: If non-zero, move the selected connection to the list head.
+ * @create: If zero, only search in existing connections.
+ */
+static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid,
+ int priority, int create)
+{
+ struct ptlrpc_connection *ptlrpc_conn;
+ struct obd_import_conn *imp_conn = NULL, *item;
+ int rc = 0;
+ ENTRY;
+
+ if (!create && !priority) {
+ CDEBUG(D_HA, "Nothing to do\n");
+ RETURN(-EINVAL);
+ }
+
+ ptlrpc_conn = ptlrpc_uuid_to_connection(uuid);
+ if (!ptlrpc_conn) {
+ CDEBUG(D_HA, "can't find connection %s\n", uuid->uuid);
+ RETURN (-ENOENT);
+ }
+
+ if (create) {
+ OBD_ALLOC(imp_conn, sizeof(*imp_conn));
+ if (!imp_conn) {
+ GOTO(out_put, rc = -ENOMEM);
+ }
+ }
+
+ spin_lock(&imp->imp_lock);
+ list_for_each_entry(item, &imp->imp_conn_list, oic_item) {
+ if (obd_uuid_equals(uuid, &item->oic_uuid)) {
+ if (priority) {
+ list_del(&item->oic_item);
+ list_add(&item->oic_item,
+ &imp->imp_conn_list);
+ item->oic_last_attempt = 0;
+ }
+ CDEBUG(D_HA, "imp %p@%s: found existing conn %s%s\n",
+ imp, imp->imp_obd->obd_name, uuid->uuid,
+ (priority ? ", moved to head" : ""));
+ spin_unlock(&imp->imp_lock);
+ GOTO(out_free, rc = 0);
+ }
+ }
+ /* No existing import connection found for \a uuid. */
+ if (create) {
+ imp_conn->oic_conn = ptlrpc_conn;
+ imp_conn->oic_uuid = *uuid;
+ imp_conn->oic_last_attempt = 0;
+ if (priority)
+ list_add(&imp_conn->oic_item, &imp->imp_conn_list);
+ else
+ list_add_tail(&imp_conn->oic_item,
+ &imp->imp_conn_list);
+ CDEBUG(D_HA, "imp %p@%s: add connection %s at %s\n",
+ imp, imp->imp_obd->obd_name, uuid->uuid,
+ (priority ? "head" : "tail"));
+ } else {
+ spin_unlock(&imp->imp_lock);
+ GOTO(out_free, rc = -ENOENT);
+ }
+
+ spin_unlock(&imp->imp_lock);
+ RETURN(0);
+out_free:
+ if (imp_conn)
+ OBD_FREE(imp_conn, sizeof(*imp_conn));
+out_put:
+ ptlrpc_connection_put(ptlrpc_conn);
+ RETURN(rc);
+}
+
+int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid)
+{
+ return import_set_conn(imp, uuid, 1, 0);
+}
+
+int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
+ int priority)
+{
+ return import_set_conn(imp, uuid, priority, 1);
+}
+EXPORT_SYMBOL(client_import_add_conn);
+
+int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid)
+{
+ struct obd_import_conn *imp_conn;
+ struct obd_export *dlmexp;
+ int rc = -ENOENT;
+ ENTRY;
+
+ spin_lock(&imp->imp_lock);
+ if (list_empty(&imp->imp_conn_list)) {
+ LASSERT(!imp->imp_connection);
+ GOTO(out, rc);
+ }
+
+ list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) {
+ if (!obd_uuid_equals(uuid, &imp_conn->oic_uuid))
+ continue;
+ LASSERT(imp_conn->oic_conn);
+
+ if (imp_conn == imp->imp_conn_current) {
+ LASSERT(imp_conn->oic_conn == imp->imp_connection);
+
+ if (imp->imp_state != LUSTRE_IMP_CLOSED &&
+ imp->imp_state != LUSTRE_IMP_DISCON) {
+ CERROR("can't remove current connection\n");
+ GOTO(out, rc = -EBUSY);
+ }
+
+ ptlrpc_connection_put(imp->imp_connection);
+ imp->imp_connection = NULL;
+
+ dlmexp = class_conn2export(&imp->imp_dlm_handle);
+ if (dlmexp && dlmexp->exp_connection) {
+ LASSERT(dlmexp->exp_connection ==
+ imp_conn->oic_conn);
+ ptlrpc_connection_put(dlmexp->exp_connection);
+ dlmexp->exp_connection = NULL;
+ }
+ }
+
+ list_del(&imp_conn->oic_item);
+ ptlrpc_connection_put(imp_conn->oic_conn);
+ OBD_FREE(imp_conn, sizeof(*imp_conn));
+ CDEBUG(D_HA, "imp %p@%s: remove connection %s\n",
+ imp, imp->imp_obd->obd_name, uuid->uuid);
+ rc = 0;
+ break;
+ }
+out:
+ spin_unlock(&imp->imp_lock);
+ if (rc == -ENOENT)
+ CERROR("connection %s not found\n", uuid->uuid);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(client_import_del_conn);
+
+/**
+ * Find conn UUID by peer NID. \a peer is a server NID. This function is used
+ * to find a conn uuid of \a imp which can reach \a peer.
+ */
+int client_import_find_conn(struct obd_import *imp, lnet_nid_t peer,
+ struct obd_uuid *uuid)
+{
+ struct obd_import_conn *conn;
+ int rc = -ENOENT;
+ ENTRY;
+
+ spin_lock(&imp->imp_lock);
+ list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
+ /* Check if conn UUID does have this peer NID. */
+ if (class_check_uuid(&conn->oic_uuid, peer)) {
+ *uuid = conn->oic_uuid;
+ rc = 0;
+ break;
+ }
+ }
+ spin_unlock(&imp->imp_lock);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(client_import_find_conn);
+
+void client_destroy_import(struct obd_import *imp)
+{
+ /* Drop security policy instance after all RPCs have finished/aborted
+ * to let all busy contexts be released. */
+ class_import_get(imp);
+ class_destroy_import(imp);
+ sptlrpc_import_sec_put(imp);
+ class_import_put(imp);
+}
+EXPORT_SYMBOL(client_destroy_import);
+
+/**
+ * Check whether or not the OSC is on MDT.
+ * In the config log,
+ * osc on MDT
+ * setup 0:{fsname}-OSTxxxx-osc[-MDTxxxx] 1:lustre-OST0000_UUID 2:NID
+ * osc on client
+ * setup 0:{fsname}-OSTxxxx-osc 1:lustre-OST0000_UUID 2:NID
+ *
+ **/
+static int osc_on_mdt(char *obdname)
+{
+ char *ptr;
+
+ ptr = strrchr(obdname, '-');
+ if (ptr == NULL)
+ return 0;
+
+ if (strncmp(ptr + 1, "MDT", 3) == 0)
+ return 1;
+
+ return 0;
+}
+
+/* Configure an RPC client OBD device.
+ *
+ * lcfg parameters:
+ * 1 - client UUID
+ * 2 - server UUID
+ * 3 - inactive-on-startup
+ */
+int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
+{
+ struct client_obd *cli = &obddev->u.cli;
+ struct obd_import *imp;
+ struct obd_uuid server_uuid;
+ int rq_portal, rp_portal, connect_op;
+ char *name = obddev->obd_type->typ_name;
+ ldlm_ns_type_t ns_type = LDLM_NS_TYPE_UNKNOWN;
+ int rc;
+ char *cli_name = lustre_cfg_buf(lcfg, 0);
+ ENTRY;
+
+ /* In a more perfect world, we would hang a ptlrpc_client off of
+ * obd_type and just use the values from there. */
+ if (!strcmp(name, LUSTRE_OSC_NAME) ||
+ (!(strcmp(name, LUSTRE_OSP_NAME)) &&
+ (is_osp_on_mdt(cli_name) &&
+ strstr(lustre_cfg_buf(lcfg, 1), "OST") != NULL))) {
+ /* OSC or OSP_on_MDT for OSTs */
+ rq_portal = OST_REQUEST_PORTAL;
+ rp_portal = OSC_REPLY_PORTAL;
+ connect_op = OST_CONNECT;
+ cli->cl_sp_me = LUSTRE_SP_CLI;
+ cli->cl_sp_to = LUSTRE_SP_OST;
+ ns_type = LDLM_NS_TYPE_OSC;
+ } else if (!strcmp(name, LUSTRE_MDC_NAME) ||
+ !strcmp(name, LUSTRE_LWP_NAME) ||
+ (!strcmp(name, LUSTRE_OSP_NAME) &&
+ (is_osp_on_mdt(cli_name) &&
+ strstr(lustre_cfg_buf(lcfg, 1), "OST") == NULL))) {
+ /* MDC or OSP_on_MDT for other MDTs */
+ rq_portal = MDS_REQUEST_PORTAL;
+ rp_portal = MDC_REPLY_PORTAL;
+ connect_op = MDS_CONNECT;
+ cli->cl_sp_me = LUSTRE_SP_CLI;
+ cli->cl_sp_to = LUSTRE_SP_MDT;
+ ns_type = LDLM_NS_TYPE_MDC;
+ } else if (!strcmp(name, LUSTRE_MGC_NAME)) {
+ rq_portal = MGS_REQUEST_PORTAL;
+ rp_portal = MGC_REPLY_PORTAL;
+ connect_op = MGS_CONNECT;
+ cli->cl_sp_me = LUSTRE_SP_MGC;
+ cli->cl_sp_to = LUSTRE_SP_MGS;
+ cli->cl_flvr_mgc.sf_rpc = SPTLRPC_FLVR_INVALID;
+ ns_type = LDLM_NS_TYPE_MGC;
+ } else {
+ CERROR("unknown client OBD type \"%s\", can't setup\n",
+ name);
+ RETURN(-EINVAL);
+ }
+
+ if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
+ CERROR("requires a TARGET UUID\n");
+ RETURN(-EINVAL);
+ }
+
+ if (LUSTRE_CFG_BUFLEN(lcfg, 1) > 37) {
+ CERROR("client UUID must be less than 38 characters\n");
+ RETURN(-EINVAL);
+ }
+
+ if (LUSTRE_CFG_BUFLEN(lcfg, 2) < 1) {
+ CERROR("setup requires a SERVER UUID\n");
+ RETURN(-EINVAL);
+ }
+
+ if (LUSTRE_CFG_BUFLEN(lcfg, 2) > 37) {
+ CERROR("target UUID must be less than 38 characters\n");
+ RETURN(-EINVAL);
+ }
+
+ init_rwsem(&cli->cl_sem);
+ sema_init(&cli->cl_mgc_sem, 1);
+ cli->cl_conn_count = 0;
+ memcpy(server_uuid.uuid, lustre_cfg_buf(lcfg, 2),
+ min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2),
+ sizeof(server_uuid)));
+
+ cli->cl_dirty = 0;
+ cli->cl_avail_grant = 0;
+ /* FIXME: Should limit this for the sum of all cl_dirty_max. */
+ cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
+ if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > num_physpages / 8)
+ cli->cl_dirty_max = num_physpages << (PAGE_CACHE_SHIFT - 3);
+ INIT_LIST_HEAD(&cli->cl_cache_waiters);
+ INIT_LIST_HEAD(&cli->cl_loi_ready_list);
+ INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
+ INIT_LIST_HEAD(&cli->cl_loi_write_list);
+ INIT_LIST_HEAD(&cli->cl_loi_read_list);
+ client_obd_list_lock_init(&cli->cl_loi_list_lock);
+ atomic_set(&cli->cl_pending_w_pages, 0);
+ atomic_set(&cli->cl_pending_r_pages, 0);
+ cli->cl_r_in_flight = 0;
+ cli->cl_w_in_flight = 0;
+
+ spin_lock_init(&cli->cl_read_rpc_hist.oh_lock);
+ spin_lock_init(&cli->cl_write_rpc_hist.oh_lock);
+ spin_lock_init(&cli->cl_read_page_hist.oh_lock);
+ spin_lock_init(&cli->cl_write_page_hist.oh_lock);
+ spin_lock_init(&cli->cl_read_offset_hist.oh_lock);
+ spin_lock_init(&cli->cl_write_offset_hist.oh_lock);
+
+ /* lru for osc. */
+ INIT_LIST_HEAD(&cli->cl_lru_osc);
+ atomic_set(&cli->cl_lru_shrinkers, 0);
+ atomic_set(&cli->cl_lru_busy, 0);
+ atomic_set(&cli->cl_lru_in_list, 0);
+ INIT_LIST_HEAD(&cli->cl_lru_list);
+ client_obd_list_lock_init(&cli->cl_lru_list_lock);
+
+ init_waitqueue_head(&cli->cl_destroy_waitq);
+ atomic_set(&cli->cl_destroy_in_flight, 0);
+ /* Turn on checksumming by default. */
+ cli->cl_checksum = 1;
+ /*
+ * The supported checksum types will be worked out at connect time
+ * Set cl_chksum* to CRC32 for now to avoid returning screwed info
+ * through procfs.
+ */
+ cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
+ atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS);
+
+ /* This value may be reduced at connect time in
+ * ptlrpc_connect_interpret() . We initialize it to only
+ * 1MB until we know what the performance looks like.
+ * In the future this should likely be increased. LU-1431 */
+ cli->cl_max_pages_per_rpc = min_t(int, PTLRPC_MAX_BRW_PAGES,
+ LNET_MTU >> PAGE_CACHE_SHIFT);
+
+ if (!strcmp(name, LUSTRE_MDC_NAME)) {
+ cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT;
+ } else if (num_physpages >> (20 - PAGE_CACHE_SHIFT) <= 128 /* MB */) {
+ cli->cl_max_rpcs_in_flight = 2;
+ } else if (num_physpages >> (20 - PAGE_CACHE_SHIFT) <= 256 /* MB */) {
+ cli->cl_max_rpcs_in_flight = 3;
+ } else if (num_physpages >> (20 - PAGE_CACHE_SHIFT) <= 512 /* MB */) {
+ cli->cl_max_rpcs_in_flight = 4;
+ } else {
+ if (osc_on_mdt(obddev->obd_name))
+ cli->cl_max_rpcs_in_flight = MDS_OSC_MAX_RIF_DEFAULT;
+ else
+ cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT;
+ }
+ rc = ldlm_get_ref();
+ if (rc) {
+ CERROR("ldlm_get_ref failed: %d\n", rc);
+ GOTO(err, rc);
+ }
+
+ ptlrpc_init_client(rq_portal, rp_portal, name,
+ &obddev->obd_ldlm_client);
+
+ imp = class_new_import(obddev);
+ if (imp == NULL)
+ GOTO(err_ldlm, rc = -ENOENT);
+ imp->imp_client = &obddev->obd_ldlm_client;
+ imp->imp_connect_op = connect_op;
+ memcpy(cli->cl_target_uuid.uuid, lustre_cfg_buf(lcfg, 1),
+ LUSTRE_CFG_BUFLEN(lcfg, 1));
+ class_import_put(imp);
+
+ rc = client_import_add_conn(imp, &server_uuid, 1);
+ if (rc) {
+ CERROR("can't add initial connection\n");
+ GOTO(err_import, rc);
+ }
+
+ cli->cl_import = imp;
+ /* cli->cl_max_mds_{easize,cookiesize} updated by mdc_init_ea_size() */
+ cli->cl_max_mds_easize = sizeof(struct lov_mds_md_v3);
+ cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie);
+
+ if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
+ if (!strcmp(lustre_cfg_string(lcfg, 3), "inactive")) {
+ CDEBUG(D_HA, "marking %s %s->%s as inactive\n",
+ name, obddev->obd_name,
+ cli->cl_target_uuid.uuid);
+ spin_lock(&imp->imp_lock);
+ imp->imp_deactive = 1;
+ spin_unlock(&imp->imp_lock);
+ }
+ }
+
+ obddev->obd_namespace = ldlm_namespace_new(obddev, obddev->obd_name,
+ LDLM_NAMESPACE_CLIENT,
+ LDLM_NAMESPACE_GREEDY,
+ ns_type);
+ if (obddev->obd_namespace == NULL) {
+ CERROR("Unable to create client namespace - %s\n",
+ obddev->obd_name);
+ GOTO(err_import, rc = -ENOMEM);
+ }
+
+ cli->cl_qchk_stat = CL_NOT_QUOTACHECKED;
+
+ RETURN(rc);
+
+err_import:
+ class_destroy_import(imp);
+err_ldlm:
+ ldlm_put_ref();
+err:
+ RETURN(rc);
+
+}
+EXPORT_SYMBOL(client_obd_setup);
+
+int client_obd_cleanup(struct obd_device *obddev)
+{
+ ENTRY;
+
+ ldlm_namespace_free_post(obddev->obd_namespace);
+ obddev->obd_namespace = NULL;
+
+ LASSERT(obddev->u.cli.cl_import == NULL);
+
+ ldlm_put_ref();
+ RETURN(0);
+}
+EXPORT_SYMBOL(client_obd_cleanup);
+
+/* ->o_connect() method for client side (OSC and MDC and MGC) */
+int client_connect_import(const struct lu_env *env,
+ struct obd_export **exp,
+ struct obd_device *obd, struct obd_uuid *cluuid,
+ struct obd_connect_data *data, void *localdata)
+{
+ struct client_obd *cli = &obd->u.cli;
+ struct obd_import *imp = cli->cl_import;
+ struct obd_connect_data *ocd;
+ struct lustre_handle conn = { 0 };
+ int rc;
+ ENTRY;
+
+ *exp = NULL;
+ down_write(&cli->cl_sem);
+ if (cli->cl_conn_count > 0 )
+ GOTO(out_sem, rc = -EALREADY);
+
+ rc = class_connect(&conn, obd, cluuid);
+ if (rc)
+ GOTO(out_sem, rc);
+
+ cli->cl_conn_count++;
+ *exp = class_conn2export(&conn);
+
+ LASSERT(obd->obd_namespace);
+
+ imp->imp_dlm_handle = conn;
+ rc = ptlrpc_init_import(imp);
+ if (rc != 0)
+ GOTO(out_ldlm, rc);
+
+ ocd = &imp->imp_connect_data;
+ if (data) {
+ *ocd = *data;
+ imp->imp_connect_flags_orig = data->ocd_connect_flags;
+ }
+
+ rc = ptlrpc_connect_import(imp);
+ if (rc != 0) {
+ LASSERT (imp->imp_state == LUSTRE_IMP_DISCON);
+ GOTO(out_ldlm, rc);
+ }
+ LASSERT((*exp)->exp_connection);
+
+ if (data) {
+ LASSERTF((ocd->ocd_connect_flags & data->ocd_connect_flags) ==
+ ocd->ocd_connect_flags, "old "LPX64", new "LPX64"\n",
+ data->ocd_connect_flags, ocd->ocd_connect_flags);
+ data->ocd_connect_flags = ocd->ocd_connect_flags;
+ }
+
+ ptlrpc_pinger_add_import(imp);
+
+ EXIT;
+
+ if (rc) {
+out_ldlm:
+ cli->cl_conn_count--;
+ class_disconnect(*exp);
+ *exp = NULL;
+ }
+out_sem:
+ up_write(&cli->cl_sem);
+
+ return rc;
+}
+EXPORT_SYMBOL(client_connect_import);
+
+int client_disconnect_export(struct obd_export *exp)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct client_obd *cli;
+ struct obd_import *imp;
+ int rc = 0, err;
+ ENTRY;
+
+ if (!obd) {
+ CERROR("invalid export for disconnect: exp %p cookie "LPX64"\n",
+ exp, exp ? exp->exp_handle.h_cookie : -1);
+ RETURN(-EINVAL);
+ }
+
+ cli = &obd->u.cli;
+ imp = cli->cl_import;
+
+ down_write(&cli->cl_sem);
+ CDEBUG(D_INFO, "disconnect %s - %d\n", obd->obd_name,
+ cli->cl_conn_count);
+
+ if (!cli->cl_conn_count) {
+ CERROR("disconnecting disconnected device (%s)\n",
+ obd->obd_name);
+ GOTO(out_disconnect, rc = -EINVAL);
+ }
+
+ cli->cl_conn_count--;
+ if (cli->cl_conn_count)
+ GOTO(out_disconnect, rc = 0);
+
+ /* Mark import deactivated now, so we don't try to reconnect if any
+ * of the cleanup RPCs fails (e.g. LDLM cancel, etc). We don't
+ * fully deactivate the import, or that would drop all requests. */
+ spin_lock(&imp->imp_lock);
+ imp->imp_deactive = 1;
+ spin_unlock(&imp->imp_lock);
+
+ /* Some non-replayable imports (MDS's OSCs) are pinged, so just
+ * delete it regardless. (It's safe to delete an import that was
+ * never added.) */
+ (void)ptlrpc_pinger_del_import(imp);
+
+ if (obd->obd_namespace != NULL) {
+ /* obd_force == local only */
+ ldlm_cli_cancel_unused(obd->obd_namespace, NULL,
+ obd->obd_force ? LCF_LOCAL : 0, NULL);
+ ldlm_namespace_free_prior(obd->obd_namespace, imp, obd->obd_force);
+ }
+
+ /* There's no need to hold sem while disconnecting an import,
+ * and it may actually cause deadlock in GSS. */
+ up_write(&cli->cl_sem);
+ rc = ptlrpc_disconnect_import(imp, 0);
+ down_write(&cli->cl_sem);
+
+ ptlrpc_invalidate_import(imp);
+
+ EXIT;
+
+out_disconnect:
+ /* Use server style - class_disconnect should be always called for
+ * o_disconnect. */
+ err = class_disconnect(exp);
+ if (!rc && err)
+ rc = err;
+
+ up_write(&cli->cl_sem);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(client_disconnect_export);
+
+
+/**
+ * Packs current SLV and Limit into \a req.
+ */
+int target_pack_pool_reply(struct ptlrpc_request *req)
+{
+ struct obd_device *obd;
+ ENTRY;
+
+ /* Check that we still have all structures alive as this may
+ * be some late RPC at shutdown time. */
+ if (unlikely(!req->rq_export || !req->rq_export->exp_obd ||
+ !exp_connect_lru_resize(req->rq_export))) {
+ lustre_msg_set_slv(req->rq_repmsg, 0);
+ lustre_msg_set_limit(req->rq_repmsg, 0);
+ RETURN(0);
+ }
+
+ /* OBD is alive here as export is alive, which we checked above. */
+ obd = req->rq_export->exp_obd;
+
+ read_lock(&obd->obd_pool_lock);
+ lustre_msg_set_slv(req->rq_repmsg, obd->obd_pool_slv);
+ lustre_msg_set_limit(req->rq_repmsg, obd->obd_pool_limit);
+ read_unlock(&obd->obd_pool_lock);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(target_pack_pool_reply);
+
+int target_send_reply_msg(struct ptlrpc_request *req, int rc, int fail_id)
+{
+ if (OBD_FAIL_CHECK_ORSET(fail_id & ~OBD_FAIL_ONCE, OBD_FAIL_ONCE)) {
+ DEBUG_REQ(D_ERROR, req, "dropping reply");
+ return (-ECOMM);
+ }
+
+ if (unlikely(rc)) {
+ DEBUG_REQ(D_NET, req, "processing error (%d)", rc);
+ req->rq_status = rc;
+ return (ptlrpc_send_error(req, 1));
+ } else {
+ DEBUG_REQ(D_NET, req, "sending reply");
+ }
+
+ return (ptlrpc_send_reply(req, PTLRPC_REPLY_MAYBE_DIFFICULT));
+}
+
+void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
+{
+ struct ptlrpc_service_part *svcpt;
+ int netrc;
+ struct ptlrpc_reply_state *rs;
+ struct obd_export *exp;
+ ENTRY;
+
+ if (req->rq_no_reply) {
+ EXIT;
+ return;
+ }
+
+ svcpt = req->rq_rqbd->rqbd_svcpt;
+ rs = req->rq_reply_state;
+ if (rs == NULL || !rs->rs_difficult) {
+ /* no notifiers */
+ target_send_reply_msg (req, rc, fail_id);
+ EXIT;
+ return;
+ }
+
+ /* must be an export if locks saved */
+ LASSERT (req->rq_export != NULL);
+ /* req/reply consistent */
+ LASSERT(rs->rs_svcpt == svcpt);
+
+ /* "fresh" reply */
+ LASSERT (!rs->rs_scheduled);
+ LASSERT (!rs->rs_scheduled_ever);
+ LASSERT (!rs->rs_handled);
+ LASSERT (!rs->rs_on_net);
+ LASSERT (rs->rs_export == NULL);
+ LASSERT (list_empty(&rs->rs_obd_list));
+ LASSERT (list_empty(&rs->rs_exp_list));
+
+ exp = class_export_get (req->rq_export);
+
+ /* disable reply scheduling while I'm setting up */
+ rs->rs_scheduled = 1;
+ rs->rs_on_net = 1;
+ rs->rs_xid = req->rq_xid;
+ rs->rs_transno = req->rq_transno;
+ rs->rs_export = exp;
+ rs->rs_opc = lustre_msg_get_opc(req->rq_reqmsg);
+
+ spin_lock(&exp->exp_uncommitted_replies_lock);
+ CDEBUG(D_NET, "rs transno = "LPU64", last committed = "LPU64"\n",
+ rs->rs_transno, exp->exp_last_committed);
+ if (rs->rs_transno > exp->exp_last_committed) {
+ /* not committed already */
+ list_add_tail(&rs->rs_obd_list,
+ &exp->exp_uncommitted_replies);
+ }
+ spin_unlock(&exp->exp_uncommitted_replies_lock);
+
+ spin_lock(&exp->exp_lock);
+ list_add_tail(&rs->rs_exp_list, &exp->exp_outstanding_replies);
+ spin_unlock(&exp->exp_lock);
+
+ netrc = target_send_reply_msg(req, rc, fail_id);
+
+ spin_lock(&svcpt->scp_rep_lock);
+
+ atomic_inc(&svcpt->scp_nreps_difficult);
+
+ if (netrc != 0) {
+ /* error sending: reply is off the net. Also we need +1
+ * reply ref until ptlrpc_handle_rs() is done
+ * with the reply state (if the send was successful, there
+ * would have been +1 ref for the net, which
+ * reply_out_callback leaves alone) */
+ rs->rs_on_net = 0;
+ ptlrpc_rs_addref(rs);
+ }
+
+ spin_lock(&rs->rs_lock);
+ if (rs->rs_transno <= exp->exp_last_committed ||
+ (!rs->rs_on_net && !rs->rs_no_ack) ||
+ list_empty(&rs->rs_exp_list) || /* completed already */
+ list_empty(&rs->rs_obd_list)) {
+ CDEBUG(D_HA, "Schedule reply immediately\n");
+ ptlrpc_dispatch_difficult_reply(rs);
+ } else {
+ list_add(&rs->rs_list, &svcpt->scp_rep_active);
+ rs->rs_scheduled = 0; /* allow notifier to schedule */
+ }
+ spin_unlock(&rs->rs_lock);
+ spin_unlock(&svcpt->scp_rep_lock);
+ EXIT;
+}
+EXPORT_SYMBOL(target_send_reply);
+
+ldlm_mode_t lck_compat_array[] = {
+ [LCK_EX] LCK_COMPAT_EX,
+ [LCK_PW] LCK_COMPAT_PW,
+ [LCK_PR] LCK_COMPAT_PR,
+ [LCK_CW] LCK_COMPAT_CW,
+ [LCK_CR] LCK_COMPAT_CR,
+ [LCK_NL] LCK_COMPAT_NL,
+ [LCK_GROUP] LCK_COMPAT_GROUP,
+ [LCK_COS] LCK_COMPAT_COS,
+};
+
+/**
+ * Rather arbitrary mapping from LDLM error codes to errno values. This should
+ * not escape to the user level.
+ */
+int ldlm_error2errno(ldlm_error_t error)
+{
+ int result;
+
+ switch (error) {
+ case ELDLM_OK:
+ result = 0;
+ break;
+ case ELDLM_LOCK_CHANGED:
+ result = -ESTALE;
+ break;
+ case ELDLM_LOCK_ABORTED:
+ result = -ENAVAIL;
+ break;
+ case ELDLM_LOCK_REPLACED:
+ result = -ESRCH;
+ break;
+ case ELDLM_NO_LOCK_DATA:
+ result = -ENOENT;
+ break;
+ case ELDLM_NAMESPACE_EXISTS:
+ result = -EEXIST;
+ break;
+ case ELDLM_BAD_NAMESPACE:
+ result = -EBADF;
+ break;
+ default:
+ if (((int)error) < 0) /* cast to signed type */
+ result = error; /* as ldlm_error_t can be unsigned */
+ else {
+ CERROR("Invalid DLM result code: %d\n", error);
+ result = -EPROTO;
+ }
+ }
+ return result;
+}
+EXPORT_SYMBOL(ldlm_error2errno);
+
+/**
+ * Dual to ldlm_error2errno(): maps errno values back to ldlm_error_t.
+ */
+ldlm_error_t ldlm_errno2error(int err_no)
+{
+ int error;
+
+ switch (err_no) {
+ case 0:
+ error = ELDLM_OK;
+ break;
+ case -ESTALE:
+ error = ELDLM_LOCK_CHANGED;
+ break;
+ case -ENAVAIL:
+ error = ELDLM_LOCK_ABORTED;
+ break;
+ case -ESRCH:
+ error = ELDLM_LOCK_REPLACED;
+ break;
+ case -ENOENT:
+ error = ELDLM_NO_LOCK_DATA;
+ break;
+ case -EEXIST:
+ error = ELDLM_NAMESPACE_EXISTS;
+ break;
+ case -EBADF:
+ error = ELDLM_BAD_NAMESPACE;
+ break;
+ default:
+ error = err_no;
+ }
+ return error;
+}
+EXPORT_SYMBOL(ldlm_errno2error);
+
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+void ldlm_dump_export_locks(struct obd_export *exp)
+{
+ spin_lock(&exp->exp_locks_list_guard);
+ if (!list_empty(&exp->exp_locks_list)) {
+ struct ldlm_lock *lock;
+
+ CERROR("dumping locks for export %p,"
+ "ignore if the unmount doesn't hang\n", exp);
+ list_for_each_entry(lock, &exp->exp_locks_list,
+ l_exp_refs_link)
+ LDLM_ERROR(lock, "lock:");
+ }
+ spin_unlock(&exp->exp_locks_list_guard);
+}
+#endif
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
new file mode 100644
index 000000000000..33b76a1e5dec
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
@@ -0,0 +1,2429 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ldlm/ldlm_lock.c
+ *
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LDLM
+
+# include <linux/libcfs/libcfs.h>
+# include <linux/lustre_intent.h>
+
+#include <obd_class.h>
+#include "ldlm_internal.h"
+
+/* lock types */
+char *ldlm_lockname[] = {
+ [0] "--",
+ [LCK_EX] "EX",
+ [LCK_PW] "PW",
+ [LCK_PR] "PR",
+ [LCK_CW] "CW",
+ [LCK_CR] "CR",
+ [LCK_NL] "NL",
+ [LCK_GROUP] "GROUP",
+ [LCK_COS] "COS"
+};
+EXPORT_SYMBOL(ldlm_lockname);
+
+char *ldlm_typename[] = {
+ [LDLM_PLAIN] "PLN",
+ [LDLM_EXTENT] "EXT",
+ [LDLM_FLOCK] "FLK",
+ [LDLM_IBITS] "IBT",
+};
+EXPORT_SYMBOL(ldlm_typename);
+
+static ldlm_policy_wire_to_local_t ldlm_policy_wire18_to_local[] = {
+ [LDLM_PLAIN - LDLM_MIN_TYPE] ldlm_plain_policy_wire_to_local,
+ [LDLM_EXTENT - LDLM_MIN_TYPE] ldlm_extent_policy_wire_to_local,
+ [LDLM_FLOCK - LDLM_MIN_TYPE] ldlm_flock_policy_wire18_to_local,
+ [LDLM_IBITS - LDLM_MIN_TYPE] ldlm_ibits_policy_wire_to_local,
+};
+
+static ldlm_policy_wire_to_local_t ldlm_policy_wire21_to_local[] = {
+ [LDLM_PLAIN - LDLM_MIN_TYPE] ldlm_plain_policy_wire_to_local,
+ [LDLM_EXTENT - LDLM_MIN_TYPE] ldlm_extent_policy_wire_to_local,
+ [LDLM_FLOCK - LDLM_MIN_TYPE] ldlm_flock_policy_wire21_to_local,
+ [LDLM_IBITS - LDLM_MIN_TYPE] ldlm_ibits_policy_wire_to_local,
+};
+
+static ldlm_policy_local_to_wire_t ldlm_policy_local_to_wire[] = {
+ [LDLM_PLAIN - LDLM_MIN_TYPE] ldlm_plain_policy_local_to_wire,
+ [LDLM_EXTENT - LDLM_MIN_TYPE] ldlm_extent_policy_local_to_wire,
+ [LDLM_FLOCK - LDLM_MIN_TYPE] ldlm_flock_policy_local_to_wire,
+ [LDLM_IBITS - LDLM_MIN_TYPE] ldlm_ibits_policy_local_to_wire,
+};
+
+/**
+ * Converts lock policy from local format to on the wire lock_desc format
+ */
+void ldlm_convert_policy_to_wire(ldlm_type_t type,
+ const ldlm_policy_data_t *lpolicy,
+ ldlm_wire_policy_data_t *wpolicy)
+{
+ ldlm_policy_local_to_wire_t convert;
+
+ convert = ldlm_policy_local_to_wire[type - LDLM_MIN_TYPE];
+
+ convert(lpolicy, wpolicy);
+}
+
+/**
+ * Converts lock policy from on the wire lock_desc format to local format
+ */
+void ldlm_convert_policy_to_local(struct obd_export *exp, ldlm_type_t type,
+ const ldlm_wire_policy_data_t *wpolicy,
+ ldlm_policy_data_t *lpolicy)
+{
+ ldlm_policy_wire_to_local_t convert;
+ int new_client;
+
+ /** some badness for 2.0.0 clients, but 2.0.0 isn't supported */
+ new_client = (exp_connect_flags(exp) & OBD_CONNECT_FULL20) != 0;
+ if (new_client)
+ convert = ldlm_policy_wire21_to_local[type - LDLM_MIN_TYPE];
+ else
+ convert = ldlm_policy_wire18_to_local[type - LDLM_MIN_TYPE];
+
+ convert(wpolicy, lpolicy);
+}
+
+char *ldlm_it2str(int it)
+{
+ switch (it) {
+ case IT_OPEN:
+ return "open";
+ case IT_CREAT:
+ return "creat";
+ case (IT_OPEN | IT_CREAT):
+ return "open|creat";
+ case IT_READDIR:
+ return "readdir";
+ case IT_GETATTR:
+ return "getattr";
+ case IT_LOOKUP:
+ return "lookup";
+ case IT_UNLINK:
+ return "unlink";
+ case IT_GETXATTR:
+ return "getxattr";
+ case IT_LAYOUT:
+ return "layout";
+ default:
+ CERROR("Unknown intent %d\n", it);
+ return "UNKNOWN";
+ }
+}
+EXPORT_SYMBOL(ldlm_it2str);
+
+extern struct kmem_cache *ldlm_lock_slab;
+
+
+void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg)
+{
+ ns->ns_policy = arg;
+}
+EXPORT_SYMBOL(ldlm_register_intent);
+
+/*
+ * REFCOUNTED LOCK OBJECTS
+ */
+
+
+/**
+ * Get a reference on a lock.
+ *
+ * Lock refcounts, during creation:
+ * - one special one for allocation, dec'd only once in destroy
+ * - one for being a lock that's in-use
+ * - one for the addref associated with a new lock
+ */
+struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock)
+{
+ atomic_inc(&lock->l_refc);
+ return lock;
+}
+EXPORT_SYMBOL(ldlm_lock_get);
+
+/**
+ * Release lock reference.
+ *
+ * Also frees the lock if it was last reference.
+ */
+void ldlm_lock_put(struct ldlm_lock *lock)
+{
+ ENTRY;
+
+ LASSERT(lock->l_resource != LP_POISON);
+ LASSERT(atomic_read(&lock->l_refc) > 0);
+ if (atomic_dec_and_test(&lock->l_refc)) {
+ struct ldlm_resource *res;
+
+ LDLM_DEBUG(lock,
+ "final lock_put on destroyed lock, freeing it.");
+
+ res = lock->l_resource;
+ LASSERT(lock->l_destroyed);
+ LASSERT(list_empty(&lock->l_res_link));
+ LASSERT(list_empty(&lock->l_pending_chain));
+
+ lprocfs_counter_decr(ldlm_res_to_ns(res)->ns_stats,
+ LDLM_NSS_LOCKS);
+ lu_ref_del(&res->lr_reference, "lock", lock);
+ ldlm_resource_putref(res);
+ lock->l_resource = NULL;
+ if (lock->l_export) {
+ class_export_lock_put(lock->l_export, lock);
+ lock->l_export = NULL;
+ }
+
+ if (lock->l_lvb_data != NULL)
+ OBD_FREE(lock->l_lvb_data, lock->l_lvb_len);
+
+ ldlm_interval_free(ldlm_interval_detach(lock));
+ lu_ref_fini(&lock->l_reference);
+ OBD_FREE_RCU(lock, sizeof(*lock), &lock->l_handle);
+ }
+
+ EXIT;
+}
+EXPORT_SYMBOL(ldlm_lock_put);
+
+/**
+ * Removes LDLM lock \a lock from LRU. Assumes LRU is already locked.
+ */
+int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock)
+{
+ int rc = 0;
+ if (!list_empty(&lock->l_lru)) {
+ struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
+
+ LASSERT(lock->l_resource->lr_type != LDLM_FLOCK);
+ list_del_init(&lock->l_lru);
+ if (lock->l_flags & LDLM_FL_SKIPPED)
+ lock->l_flags &= ~LDLM_FL_SKIPPED;
+ LASSERT(ns->ns_nr_unused > 0);
+ ns->ns_nr_unused--;
+ rc = 1;
+ }
+ return rc;
+}
+
+/**
+ * Removes LDLM lock \a lock from LRU. Obtains the LRU lock first.
+ */
+int ldlm_lock_remove_from_lru(struct ldlm_lock *lock)
+{
+ struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
+ int rc;
+
+ ENTRY;
+ if (lock->l_ns_srv) {
+ LASSERT(list_empty(&lock->l_lru));
+ RETURN(0);
+ }
+
+ spin_lock(&ns->ns_lock);
+ rc = ldlm_lock_remove_from_lru_nolock(lock);
+ spin_unlock(&ns->ns_lock);
+ EXIT;
+ return rc;
+}
+
+/**
+ * Adds LDLM lock \a lock to namespace LRU. Assumes LRU is already locked.
+ */
+void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock)
+{
+ struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
+
+ lock->l_last_used = cfs_time_current();
+ LASSERT(list_empty(&lock->l_lru));
+ LASSERT(lock->l_resource->lr_type != LDLM_FLOCK);
+ list_add_tail(&lock->l_lru, &ns->ns_unused_list);
+ LASSERT(ns->ns_nr_unused >= 0);
+ ns->ns_nr_unused++;
+}
+
+/**
+ * Adds LDLM lock \a lock to namespace LRU. Obtains necessary LRU locks
+ * first.
+ */
+void ldlm_lock_add_to_lru(struct ldlm_lock *lock)
+{
+ struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
+
+ ENTRY;
+ spin_lock(&ns->ns_lock);
+ ldlm_lock_add_to_lru_nolock(lock);
+ spin_unlock(&ns->ns_lock);
+ EXIT;
+}
+
+/**
+ * Moves LDLM lock \a lock that is already in namespace LRU to the tail of
+ * the LRU. Performs necessary LRU locking
+ */
+void ldlm_lock_touch_in_lru(struct ldlm_lock *lock)
+{
+ struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
+
+ ENTRY;
+ if (lock->l_ns_srv) {
+ LASSERT(list_empty(&lock->l_lru));
+ EXIT;
+ return;
+ }
+
+ spin_lock(&ns->ns_lock);
+ if (!list_empty(&lock->l_lru)) {
+ ldlm_lock_remove_from_lru_nolock(lock);
+ ldlm_lock_add_to_lru_nolock(lock);
+ }
+ spin_unlock(&ns->ns_lock);
+ EXIT;
+}
+
+/**
+ * Helper to destroy a locked lock.
+ *
+ * Used by ldlm_lock_destroy and ldlm_lock_destroy_nolock
+ * Must be called with l_lock and lr_lock held.
+ *
+ * Does not actually free the lock data, but rather marks the lock as
+ * destroyed by setting l_destroyed field in the lock to 1. Destroys a
+ * handle->lock association too, so that the lock can no longer be found
+ * and removes the lock from LRU list. Actual lock freeing occurs when
+ * last lock reference goes away.
+ *
+ * Original comment (of some historical value):
+ * This used to have a 'strict' flag, which recovery would use to mark an
+ * in-use lock as needing-to-die. Lest I am ever tempted to put it back, I
+ * shall explain why it's gone: with the new hash table scheme, once you call
+ * ldlm_lock_destroy, you can never drop your final references on this lock.
+ * Because it's not in the hash table anymore. -phil
+ */
+int ldlm_lock_destroy_internal(struct ldlm_lock *lock)
+{
+ ENTRY;
+
+ if (lock->l_readers || lock->l_writers) {
+ LDLM_ERROR(lock, "lock still has references");
+ LBUG();
+ }
+
+ if (!list_empty(&lock->l_res_link)) {
+ LDLM_ERROR(lock, "lock still on resource");
+ LBUG();
+ }
+
+ if (lock->l_destroyed) {
+ LASSERT(list_empty(&lock->l_lru));
+ EXIT;
+ return 0;
+ }
+ lock->l_destroyed = 1;
+
+ if (lock->l_export && lock->l_export->exp_lock_hash) {
+ /* NB: it's safe to call cfs_hash_del() even lock isn't
+ * in exp_lock_hash. */
+ /* In the function below, .hs_keycmp resolves to
+ * ldlm_export_lock_keycmp() */
+ /* coverity[overrun-buffer-val] */
+ cfs_hash_del(lock->l_export->exp_lock_hash,
+ &lock->l_remote_handle, &lock->l_exp_hash);
+ }
+
+ ldlm_lock_remove_from_lru(lock);
+ class_handle_unhash(&lock->l_handle);
+
+#if 0
+ /* Wake anyone waiting for this lock */
+ /* FIXME: I should probably add yet another flag, instead of using
+ * l_export to only call this on clients */
+ if (lock->l_export)
+ class_export_put(lock->l_export);
+ lock->l_export = NULL;
+ if (lock->l_export && lock->l_completion_ast)
+ lock->l_completion_ast(lock, 0);
+#endif
+ EXIT;
+ return 1;
+}
+
+/**
+ * Destroys a LDLM lock \a lock. Performs necessary locking first.
+ */
+void ldlm_lock_destroy(struct ldlm_lock *lock)
+{
+ int first;
+ ENTRY;
+ lock_res_and_lock(lock);
+ first = ldlm_lock_destroy_internal(lock);
+ unlock_res_and_lock(lock);
+
+ /* drop reference from hashtable only for first destroy */
+ if (first) {
+ lu_ref_del(&lock->l_reference, "hash", lock);
+ LDLM_LOCK_RELEASE(lock);
+ }
+ EXIT;
+}
+
+/**
+ * Destroys a LDLM lock \a lock that is already locked.
+ */
+void ldlm_lock_destroy_nolock(struct ldlm_lock *lock)
+{
+ int first;
+ ENTRY;
+ first = ldlm_lock_destroy_internal(lock);
+ /* drop reference from hashtable only for first destroy */
+ if (first) {
+ lu_ref_del(&lock->l_reference, "hash", lock);
+ LDLM_LOCK_RELEASE(lock);
+ }
+ EXIT;
+}
+
+/* this is called by portals_handle2object with the handle lock taken */
+static void lock_handle_addref(void *lock)
+{
+ LDLM_LOCK_GET((struct ldlm_lock *)lock);
+}
+
+static void lock_handle_free(void *lock, int size)
+{
+ LASSERT(size == sizeof(struct ldlm_lock));
+ OBD_SLAB_FREE(lock, ldlm_lock_slab, size);
+}
+
+struct portals_handle_ops lock_handle_ops = {
+ .hop_addref = lock_handle_addref,
+ .hop_free = lock_handle_free,
+};
+
+/**
+ *
+ * Allocate and initialize new lock structure.
+ *
+ * usage: pass in a resource on which you have done ldlm_resource_get
+ * new lock will take over the refcount.
+ * returns: lock with refcount 2 - one for current caller and one for remote
+ */
+static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource)
+{
+ struct ldlm_lock *lock;
+ ENTRY;
+
+ if (resource == NULL)
+ LBUG();
+
+ OBD_SLAB_ALLOC_PTR_GFP(lock, ldlm_lock_slab, __GFP_IO);
+ if (lock == NULL)
+ RETURN(NULL);
+
+ spin_lock_init(&lock->l_lock);
+ lock->l_resource = resource;
+ lu_ref_add(&resource->lr_reference, "lock", lock);
+
+ atomic_set(&lock->l_refc, 2);
+ INIT_LIST_HEAD(&lock->l_res_link);
+ INIT_LIST_HEAD(&lock->l_lru);
+ INIT_LIST_HEAD(&lock->l_pending_chain);
+ INIT_LIST_HEAD(&lock->l_bl_ast);
+ INIT_LIST_HEAD(&lock->l_cp_ast);
+ INIT_LIST_HEAD(&lock->l_rk_ast);
+ init_waitqueue_head(&lock->l_waitq);
+ lock->l_blocking_lock = NULL;
+ INIT_LIST_HEAD(&lock->l_sl_mode);
+ INIT_LIST_HEAD(&lock->l_sl_policy);
+ INIT_HLIST_NODE(&lock->l_exp_hash);
+ INIT_HLIST_NODE(&lock->l_exp_flock_hash);
+
+ lprocfs_counter_incr(ldlm_res_to_ns(resource)->ns_stats,
+ LDLM_NSS_LOCKS);
+ INIT_LIST_HEAD(&lock->l_handle.h_link);
+ class_handle_hash(&lock->l_handle, &lock_handle_ops);
+
+ lu_ref_init(&lock->l_reference);
+ lu_ref_add(&lock->l_reference, "hash", lock);
+ lock->l_callback_timeout = 0;
+
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+ INIT_LIST_HEAD(&lock->l_exp_refs_link);
+ lock->l_exp_refs_nr = 0;
+ lock->l_exp_refs_target = NULL;
+#endif
+ INIT_LIST_HEAD(&lock->l_exp_list);
+
+ RETURN(lock);
+}
+
+/**
+ * Moves LDLM lock \a lock to another resource.
+ * This is used on client when server returns some other lock than requested
+ * (typically as a result of intent operation)
+ */
+int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
+ const struct ldlm_res_id *new_resid)
+{
+ struct ldlm_resource *oldres = lock->l_resource;
+ struct ldlm_resource *newres;
+ int type;
+ ENTRY;
+
+ LASSERT(ns_is_client(ns));
+
+ lock_res_and_lock(lock);
+ if (memcmp(new_resid, &lock->l_resource->lr_name,
+ sizeof(lock->l_resource->lr_name)) == 0) {
+ /* Nothing to do */
+ unlock_res_and_lock(lock);
+ RETURN(0);
+ }
+
+ LASSERT(new_resid->name[0] != 0);
+
+ /* This function assumes that the lock isn't on any lists */
+ LASSERT(list_empty(&lock->l_res_link));
+
+ type = oldres->lr_type;
+ unlock_res_and_lock(lock);
+
+ newres = ldlm_resource_get(ns, NULL, new_resid, type, 1);
+ if (newres == NULL)
+ RETURN(-ENOMEM);
+
+ lu_ref_add(&newres->lr_reference, "lock", lock);
+ /*
+ * To flip the lock from the old to the new resource, lock, oldres and
+ * newres have to be locked. Resource spin-locks are nested within
+ * lock->l_lock, and are taken in the memory address order to avoid
+ * dead-locks.
+ */
+ spin_lock(&lock->l_lock);
+ oldres = lock->l_resource;
+ if (oldres < newres) {
+ lock_res(oldres);
+ lock_res_nested(newres, LRT_NEW);
+ } else {
+ lock_res(newres);
+ lock_res_nested(oldres, LRT_NEW);
+ }
+ LASSERT(memcmp(new_resid, &oldres->lr_name,
+ sizeof oldres->lr_name) != 0);
+ lock->l_resource = newres;
+ unlock_res(oldres);
+ unlock_res_and_lock(lock);
+
+ /* ...and the flowers are still standing! */
+ lu_ref_del(&oldres->lr_reference, "lock", lock);
+ ldlm_resource_putref(oldres);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(ldlm_lock_change_resource);
+
+/** \defgroup ldlm_handles LDLM HANDLES
+ * Ways to get hold of locks without any addresses.
+ * @{
+ */
+
+/**
+ * Fills in handle for LDLM lock \a lock into supplied \a lockh
+ * Does not take any references.
+ */
+void ldlm_lock2handle(const struct ldlm_lock *lock, struct lustre_handle *lockh)
+{
+ lockh->cookie = lock->l_handle.h_cookie;
+}
+EXPORT_SYMBOL(ldlm_lock2handle);
+
+/**
+ * Obtain a lock reference by handle.
+ *
+ * if \a flags: atomically get the lock and set the flags.
+ * Return NULL if flag already set
+ */
+struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
+ __u64 flags)
+{
+ struct ldlm_lock *lock;
+ ENTRY;
+
+ LASSERT(handle);
+
+ lock = class_handle2object(handle->cookie);
+ if (lock == NULL)
+ RETURN(NULL);
+
+ /* It's unlikely but possible that someone marked the lock as
+ * destroyed after we did handle2object on it */
+ if (flags == 0 && !lock->l_destroyed) {
+ lu_ref_add(&lock->l_reference, "handle", current);
+ RETURN(lock);
+ }
+
+ lock_res_and_lock(lock);
+
+ LASSERT(lock->l_resource != NULL);
+
+ lu_ref_add_atomic(&lock->l_reference, "handle", current);
+ if (unlikely(lock->l_destroyed)) {
+ unlock_res_and_lock(lock);
+ CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
+ LDLM_LOCK_PUT(lock);
+ RETURN(NULL);
+ }
+
+ if (flags && (lock->l_flags & flags)) {
+ unlock_res_and_lock(lock);
+ LDLM_LOCK_PUT(lock);
+ RETURN(NULL);
+ }
+
+ if (flags)
+ lock->l_flags |= flags;
+
+ unlock_res_and_lock(lock);
+ RETURN(lock);
+}
+EXPORT_SYMBOL(__ldlm_handle2lock);
+/** @} ldlm_handles */
+
+/**
+ * Fill in "on the wire" representation for given LDLM lock into supplied
+ * lock descriptor \a desc structure.
+ */
+void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
+{
+ struct obd_export *exp = lock->l_export ?: lock->l_conn_export;
+
+ /* INODEBITS_INTEROP: If the other side does not support
+ * inodebits, reply with a plain lock descriptor. */
+ if ((lock->l_resource->lr_type == LDLM_IBITS) &&
+ (exp && !(exp_connect_flags(exp) & OBD_CONNECT_IBITS))) {
+ /* Make sure all the right bits are set in this lock we
+ are going to pass to client */
+ LASSERTF(lock->l_policy_data.l_inodebits.bits ==
+ (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE |
+ MDS_INODELOCK_LAYOUT),
+ "Inappropriate inode lock bits during "
+ "conversion " LPU64 "\n",
+ lock->l_policy_data.l_inodebits.bits);
+
+ ldlm_res2desc(lock->l_resource, &desc->l_resource);
+ desc->l_resource.lr_type = LDLM_PLAIN;
+
+ /* Convert "new" lock mode to something old client can
+ understand */
+ if ((lock->l_req_mode == LCK_CR) ||
+ (lock->l_req_mode == LCK_CW))
+ desc->l_req_mode = LCK_PR;
+ else
+ desc->l_req_mode = lock->l_req_mode;
+ if ((lock->l_granted_mode == LCK_CR) ||
+ (lock->l_granted_mode == LCK_CW)) {
+ desc->l_granted_mode = LCK_PR;
+ } else {
+ /* We never grant PW/EX locks to clients */
+ LASSERT((lock->l_granted_mode != LCK_PW) &&
+ (lock->l_granted_mode != LCK_EX));
+ desc->l_granted_mode = lock->l_granted_mode;
+ }
+
+ /* We do not copy policy here, because there is no
+ policy for plain locks */
+ } else {
+ ldlm_res2desc(lock->l_resource, &desc->l_resource);
+ desc->l_req_mode = lock->l_req_mode;
+ desc->l_granted_mode = lock->l_granted_mode;
+ ldlm_convert_policy_to_wire(lock->l_resource->lr_type,
+ &lock->l_policy_data,
+ &desc->l_policy_data);
+ }
+}
+EXPORT_SYMBOL(ldlm_lock2desc);
+
+/**
+ * Add a lock to list of conflicting locks to send AST to.
+ *
+ * Only add if we have not sent a blocking AST to the lock yet.
+ */
+void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
+ struct list_head *work_list)
+{
+ if ((lock->l_flags & LDLM_FL_AST_SENT) == 0) {
+ LDLM_DEBUG(lock, "lock incompatible; sending blocking AST.");
+ lock->l_flags |= LDLM_FL_AST_SENT;
+ /* If the enqueuing client said so, tell the AST recipient to
+ * discard dirty data, rather than writing back. */
+ if (new->l_flags & LDLM_AST_DISCARD_DATA)
+ lock->l_flags |= LDLM_FL_DISCARD_DATA;
+ LASSERT(list_empty(&lock->l_bl_ast));
+ list_add(&lock->l_bl_ast, work_list);
+ LDLM_LOCK_GET(lock);
+ LASSERT(lock->l_blocking_lock == NULL);
+ lock->l_blocking_lock = LDLM_LOCK_GET(new);
+ }
+}
+
+/**
+ * Add a lock to list of just granted locks to send completion AST to.
+ */
+void ldlm_add_cp_work_item(struct ldlm_lock *lock, struct list_head *work_list)
+{
+ if ((lock->l_flags & LDLM_FL_CP_REQD) == 0) {
+ lock->l_flags |= LDLM_FL_CP_REQD;
+ LDLM_DEBUG(lock, "lock granted; sending completion AST.");
+ LASSERT(list_empty(&lock->l_cp_ast));
+ list_add(&lock->l_cp_ast, work_list);
+ LDLM_LOCK_GET(lock);
+ }
+}
+
+/**
+ * Aggregator function to add AST work items into a list. Determines
+ * what sort of an AST work needs to be done and calls the proper
+ * adding function.
+ * Must be called with lr_lock held.
+ */
+void ldlm_add_ast_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
+ struct list_head *work_list)
+{
+ ENTRY;
+ check_res_locked(lock->l_resource);
+ if (new)
+ ldlm_add_bl_work_item(lock, new, work_list);
+ else
+ ldlm_add_cp_work_item(lock, work_list);
+ EXIT;
+}
+
+/**
+ * Add specified reader/writer reference to LDLM lock with handle \a lockh.
+ * r/w reference type is determined by \a mode
+ * Calls ldlm_lock_addref_internal.
+ */
+void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
+{
+ struct ldlm_lock *lock;
+
+ lock = ldlm_handle2lock(lockh);
+ LASSERT(lock != NULL);
+ ldlm_lock_addref_internal(lock, mode);
+ LDLM_LOCK_PUT(lock);
+}
+EXPORT_SYMBOL(ldlm_lock_addref);
+
+/**
+ * Helper function.
+ * Add specified reader/writer reference to LDLM lock \a lock.
+ * r/w reference type is determined by \a mode
+ * Removes lock from LRU if it is there.
+ * Assumes the LDLM lock is already locked.
+ */
+void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, __u32 mode)
+{
+ ldlm_lock_remove_from_lru(lock);
+ if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
+ lock->l_readers++;
+ lu_ref_add_atomic(&lock->l_reference, "reader", lock);
+ }
+ if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS)) {
+ lock->l_writers++;
+ lu_ref_add_atomic(&lock->l_reference, "writer", lock);
+ }
+ LDLM_LOCK_GET(lock);
+ lu_ref_add_atomic(&lock->l_reference, "user", lock);
+ LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
+}
+
+/**
+ * Attempts to add reader/writer reference to a lock with handle \a lockh, and
+ * fails if lock is already LDLM_FL_CBPENDING or destroyed.
+ *
+ * \retval 0 success, lock was addref-ed
+ *
+ * \retval -EAGAIN lock is being canceled.
+ */
+int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode)
+{
+ struct ldlm_lock *lock;
+ int result;
+
+ result = -EAGAIN;
+ lock = ldlm_handle2lock(lockh);
+ if (lock != NULL) {
+ lock_res_and_lock(lock);
+ if (lock->l_readers != 0 || lock->l_writers != 0 ||
+ !(lock->l_flags & LDLM_FL_CBPENDING)) {
+ ldlm_lock_addref_internal_nolock(lock, mode);
+ result = 0;
+ }
+ unlock_res_and_lock(lock);
+ LDLM_LOCK_PUT(lock);
+ }
+ return result;
+}
+EXPORT_SYMBOL(ldlm_lock_addref_try);
+
+/**
+ * Add specified reader/writer reference to LDLM lock \a lock.
+ * Locks LDLM lock and calls ldlm_lock_addref_internal_nolock to do the work.
+ * Only called for local locks.
+ */
+void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
+{
+ lock_res_and_lock(lock);
+ ldlm_lock_addref_internal_nolock(lock, mode);
+ unlock_res_and_lock(lock);
+}
+
+/**
+ * Removes reader/writer reference for LDLM lock \a lock.
+ * Assumes LDLM lock is already locked.
+ * only called in ldlm_flock_destroy and for local locks.
+ * Does NOT add lock to LRU if no r/w references left to accomodate flock locks
+ * that cannot be placed in LRU.
+ */
+void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock, __u32 mode)
+{
+ LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
+ if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
+ LASSERT(lock->l_readers > 0);
+ lu_ref_del(&lock->l_reference, "reader", lock);
+ lock->l_readers--;
+ }
+ if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS)) {
+ LASSERT(lock->l_writers > 0);
+ lu_ref_del(&lock->l_reference, "writer", lock);
+ lock->l_writers--;
+ }
+
+ lu_ref_del(&lock->l_reference, "user", lock);
+ LDLM_LOCK_RELEASE(lock); /* matches the LDLM_LOCK_GET() in addref */
+}
+
+/**
+ * Removes reader/writer reference for LDLM lock \a lock.
+ * Locks LDLM lock first.
+ * If the lock is determined to be client lock on a client and r/w refcount
+ * drops to zero and the lock is not blocked, the lock is added to LRU lock
+ * on the namespace.
+ * For blocked LDLM locks if r/w count drops to zero, blocking_ast is called.
+ */
+void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
+{
+ struct ldlm_namespace *ns;
+ ENTRY;
+
+ lock_res_and_lock(lock);
+
+ ns = ldlm_lock_to_ns(lock);
+
+ ldlm_lock_decref_internal_nolock(lock, mode);
+
+ if (lock->l_flags & LDLM_FL_LOCAL &&
+ !lock->l_readers && !lock->l_writers) {
+ /* If this is a local lock on a server namespace and this was
+ * the last reference, cancel the lock. */
+ CDEBUG(D_INFO, "forcing cancel of local lock\n");
+ lock->l_flags |= LDLM_FL_CBPENDING;
+ }
+
+ if (!lock->l_readers && !lock->l_writers &&
+ (lock->l_flags & LDLM_FL_CBPENDING)) {
+ /* If we received a blocked AST and this was the last reference,
+ * run the callback. */
+ if (lock->l_ns_srv && lock->l_export)
+ CERROR("FL_CBPENDING set on non-local lock--just a "
+ "warning\n");
+
+ LDLM_DEBUG(lock, "final decref done on cbpending lock");
+
+ LDLM_LOCK_GET(lock); /* dropped by bl thread */
+ ldlm_lock_remove_from_lru(lock);
+ unlock_res_and_lock(lock);
+
+ if (lock->l_flags & LDLM_FL_FAIL_LOC)
+ OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
+
+ if ((lock->l_flags & LDLM_FL_ATOMIC_CB) ||
+ ldlm_bl_to_thread_lock(ns, NULL, lock) != 0)
+ ldlm_handle_bl_callback(ns, NULL, lock);
+ } else if (ns_is_client(ns) &&
+ !lock->l_readers && !lock->l_writers &&
+ !(lock->l_flags & LDLM_FL_NO_LRU) &&
+ !(lock->l_flags & LDLM_FL_BL_AST)) {
+
+ LDLM_DEBUG(lock, "add lock into lru list");
+
+ /* If this is a client-side namespace and this was the last
+ * reference, put it on the LRU. */
+ ldlm_lock_add_to_lru(lock);
+ unlock_res_and_lock(lock);
+
+ if (lock->l_flags & LDLM_FL_FAIL_LOC)
+ OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
+
+ /* Call ldlm_cancel_lru() only if EARLY_CANCEL and LRU RESIZE
+ * are not supported by the server, otherwise, it is done on
+ * enqueue. */
+ if (!exp_connect_cancelset(lock->l_conn_export) &&
+ !ns_connect_lru_resize(ns))
+ ldlm_cancel_lru(ns, 0, LCF_ASYNC, 0);
+ } else {
+ LDLM_DEBUG(lock, "do not add lock into lru list");
+ unlock_res_and_lock(lock);
+ }
+
+ EXIT;
+}
+
+/**
+ * Decrease reader/writer refcount for LDLM lock with handle \a lockh
+ */
+void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
+{
+ struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
+ LASSERTF(lock != NULL, "Non-existing lock: "LPX64"\n", lockh->cookie);
+ ldlm_lock_decref_internal(lock, mode);
+ LDLM_LOCK_PUT(lock);
+}
+EXPORT_SYMBOL(ldlm_lock_decref);
+
+/**
+ * Decrease reader/writer refcount for LDLM lock with handle
+ * \a lockh and mark it for subsequent cancellation once r/w refcount
+ * drops to zero instead of putting into LRU.
+ *
+ * Typical usage is for GROUP locks which we cannot allow to be cached.
+ */
+void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
+{
+ struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
+ ENTRY;
+
+ LASSERT(lock != NULL);
+
+ LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
+ lock_res_and_lock(lock);
+ lock->l_flags |= LDLM_FL_CBPENDING;
+ unlock_res_and_lock(lock);
+ ldlm_lock_decref_internal(lock, mode);
+ LDLM_LOCK_PUT(lock);
+}
+EXPORT_SYMBOL(ldlm_lock_decref_and_cancel);
+
+struct sl_insert_point {
+ struct list_head *res_link;
+ struct list_head *mode_link;
+ struct list_head *policy_link;
+};
+
+/**
+ * Finds a position to insert the new lock into granted lock list.
+ *
+ * Used for locks eligible for skiplist optimization.
+ *
+ * Parameters:
+ * queue [input]: the granted list where search acts on;
+ * req [input]: the lock whose position to be located;
+ * prev [output]: positions within 3 lists to insert @req to
+ * Return Value:
+ * filled @prev
+ * NOTE: called by
+ * - ldlm_grant_lock_with_skiplist
+ */
+static void search_granted_lock(struct list_head *queue,
+ struct ldlm_lock *req,
+ struct sl_insert_point *prev)
+{
+ struct list_head *tmp;
+ struct ldlm_lock *lock, *mode_end, *policy_end;
+ ENTRY;
+
+ list_for_each(tmp, queue) {
+ lock = list_entry(tmp, struct ldlm_lock, l_res_link);
+
+ mode_end = list_entry(lock->l_sl_mode.prev,
+ struct ldlm_lock, l_sl_mode);
+
+ if (lock->l_req_mode != req->l_req_mode) {
+ /* jump to last lock of mode group */
+ tmp = &mode_end->l_res_link;
+ continue;
+ }
+
+ /* suitable mode group is found */
+ if (lock->l_resource->lr_type == LDLM_PLAIN) {
+ /* insert point is last lock of the mode group */
+ prev->res_link = &mode_end->l_res_link;
+ prev->mode_link = &mode_end->l_sl_mode;
+ prev->policy_link = &req->l_sl_policy;
+ EXIT;
+ return;
+ } else if (lock->l_resource->lr_type == LDLM_IBITS) {
+ for (;;) {
+ policy_end =
+ list_entry(lock->l_sl_policy.prev,
+ struct ldlm_lock,
+ l_sl_policy);
+
+ if (lock->l_policy_data.l_inodebits.bits ==
+ req->l_policy_data.l_inodebits.bits) {
+ /* insert point is last lock of
+ * the policy group */
+ prev->res_link =
+ &policy_end->l_res_link;
+ prev->mode_link =
+ &policy_end->l_sl_mode;
+ prev->policy_link =
+ &policy_end->l_sl_policy;
+ EXIT;
+ return;
+ }
+
+ if (policy_end == mode_end)
+ /* done with mode group */
+ break;
+
+ /* go to next policy group within mode group */
+ tmp = policy_end->l_res_link.next;
+ lock = list_entry(tmp, struct ldlm_lock,
+ l_res_link);
+ } /* loop over policy groups within the mode group */
+
+ /* insert point is last lock of the mode group,
+ * new policy group is started */
+ prev->res_link = &mode_end->l_res_link;
+ prev->mode_link = &mode_end->l_sl_mode;
+ prev->policy_link = &req->l_sl_policy;
+ EXIT;
+ return;
+ } else {
+ LDLM_ERROR(lock,"is not LDLM_PLAIN or LDLM_IBITS lock");
+ LBUG();
+ }
+ }
+
+ /* insert point is last lock on the queue,
+ * new mode group and new policy group are started */
+ prev->res_link = queue->prev;
+ prev->mode_link = &req->l_sl_mode;
+ prev->policy_link = &req->l_sl_policy;
+ EXIT;
+ return;
+}
+
+/**
+ * Add a lock into resource granted list after a position described by
+ * \a prev.
+ */
+static void ldlm_granted_list_add_lock(struct ldlm_lock *lock,
+ struct sl_insert_point *prev)
+{
+ struct ldlm_resource *res = lock->l_resource;
+ ENTRY;
+
+ check_res_locked(res);
+
+ ldlm_resource_dump(D_INFO, res);
+ LDLM_DEBUG(lock, "About to add lock:");
+
+ if (lock->l_destroyed) {
+ CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
+ return;
+ }
+
+ LASSERT(list_empty(&lock->l_res_link));
+ LASSERT(list_empty(&lock->l_sl_mode));
+ LASSERT(list_empty(&lock->l_sl_policy));
+
+ /*
+ * lock->link == prev->link means lock is first starting the group.
+ * Don't re-add to itself to suppress kernel warnings.
+ */
+ if (&lock->l_res_link != prev->res_link)
+ list_add(&lock->l_res_link, prev->res_link);
+ if (&lock->l_sl_mode != prev->mode_link)
+ list_add(&lock->l_sl_mode, prev->mode_link);
+ if (&lock->l_sl_policy != prev->policy_link)
+ list_add(&lock->l_sl_policy, prev->policy_link);
+
+ EXIT;
+}
+
+/**
+ * Add a lock to granted list on a resource maintaining skiplist
+ * correctness.
+ */
+static void ldlm_grant_lock_with_skiplist(struct ldlm_lock *lock)
+{
+ struct sl_insert_point prev;
+ ENTRY;
+
+ LASSERT(lock->l_req_mode == lock->l_granted_mode);
+
+ search_granted_lock(&lock->l_resource->lr_granted, lock, &prev);
+ ldlm_granted_list_add_lock(lock, &prev);
+ EXIT;
+}
+
+/**
+ * Perform lock granting bookkeeping.
+ *
+ * Includes putting the lock into granted list and updating lock mode.
+ * NOTE: called by
+ * - ldlm_lock_enqueue
+ * - ldlm_reprocess_queue
+ * - ldlm_lock_convert
+ *
+ * must be called with lr_lock held
+ */
+void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list)
+{
+ struct ldlm_resource *res = lock->l_resource;
+ ENTRY;
+
+ check_res_locked(res);
+
+ lock->l_granted_mode = lock->l_req_mode;
+ if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS)
+ ldlm_grant_lock_with_skiplist(lock);
+ else if (res->lr_type == LDLM_EXTENT)
+ ldlm_extent_add_lock(res, lock);
+ else
+ ldlm_resource_add_lock(res, &res->lr_granted, lock);
+
+ if (lock->l_granted_mode < res->lr_most_restr)
+ res->lr_most_restr = lock->l_granted_mode;
+
+ if (work_list && lock->l_completion_ast != NULL)
+ ldlm_add_ast_work_item(lock, NULL, work_list);
+
+ ldlm_pool_add(&ldlm_res_to_ns(res)->ns_pool, lock);
+ EXIT;
+}
+
+/**
+ * Search for a lock with given properties in a queue.
+ *
+ * \retval a referenced lock or NULL. See the flag descriptions below, in the
+ * comment above ldlm_lock_match
+ */
+static struct ldlm_lock *search_queue(struct list_head *queue,
+ ldlm_mode_t *mode,
+ ldlm_policy_data_t *policy,
+ struct ldlm_lock *old_lock,
+ __u64 flags, int unref)
+{
+ struct ldlm_lock *lock;
+ struct list_head *tmp;
+
+ list_for_each(tmp, queue) {
+ ldlm_mode_t match;
+
+ lock = list_entry(tmp, struct ldlm_lock, l_res_link);
+
+ if (lock == old_lock)
+ break;
+
+ /* llite sometimes wants to match locks that will be
+ * canceled when their users drop, but we allow it to match
+ * if it passes in CBPENDING and the lock still has users.
+ * this is generally only going to be used by children
+ * whose parents already hold a lock so forward progress
+ * can still happen. */
+ if (lock->l_flags & LDLM_FL_CBPENDING &&
+ !(flags & LDLM_FL_CBPENDING))
+ continue;
+ if (!unref && lock->l_flags & LDLM_FL_CBPENDING &&
+ lock->l_readers == 0 && lock->l_writers == 0)
+ continue;
+
+ if (!(lock->l_req_mode & *mode))
+ continue;
+ match = lock->l_req_mode;
+
+ if (lock->l_resource->lr_type == LDLM_EXTENT &&
+ (lock->l_policy_data.l_extent.start >
+ policy->l_extent.start ||
+ lock->l_policy_data.l_extent.end < policy->l_extent.end))
+ continue;
+
+ if (unlikely(match == LCK_GROUP) &&
+ lock->l_resource->lr_type == LDLM_EXTENT &&
+ lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
+ continue;
+
+ /* We match if we have existing lock with same or wider set
+ of bits. */
+ if (lock->l_resource->lr_type == LDLM_IBITS &&
+ ((lock->l_policy_data.l_inodebits.bits &
+ policy->l_inodebits.bits) !=
+ policy->l_inodebits.bits))
+ continue;
+
+ if (!unref &&
+ (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED ||
+ lock->l_failed))
+ continue;
+
+ if ((flags & LDLM_FL_LOCAL_ONLY) &&
+ !(lock->l_flags & LDLM_FL_LOCAL))
+ continue;
+
+ if (flags & LDLM_FL_TEST_LOCK) {
+ LDLM_LOCK_GET(lock);
+ ldlm_lock_touch_in_lru(lock);
+ } else {
+ ldlm_lock_addref_internal_nolock(lock, match);
+ }
+ *mode = match;
+ return lock;
+ }
+
+ return NULL;
+}
+
+void ldlm_lock_fail_match_locked(struct ldlm_lock *lock)
+{
+ if (!lock->l_failed) {
+ lock->l_failed = 1;
+ wake_up_all(&lock->l_waitq);
+ }
+}
+EXPORT_SYMBOL(ldlm_lock_fail_match_locked);
+
+void ldlm_lock_fail_match(struct ldlm_lock *lock)
+{
+ lock_res_and_lock(lock);
+ ldlm_lock_fail_match_locked(lock);
+ unlock_res_and_lock(lock);
+}
+EXPORT_SYMBOL(ldlm_lock_fail_match);
+
+/**
+ * Mark lock as "matchable" by OST.
+ *
+ * Used to prevent certain races in LOV/OSC where the lock is granted, but LVB
+ * is not yet valid.
+ * Assumes LDLM lock is already locked.
+ */
+void ldlm_lock_allow_match_locked(struct ldlm_lock *lock)
+{
+ lock->l_flags |= LDLM_FL_LVB_READY;
+ wake_up_all(&lock->l_waitq);
+}
+EXPORT_SYMBOL(ldlm_lock_allow_match_locked);
+
+/**
+ * Mark lock as "matchable" by OST.
+ * Locks the lock and then \see ldlm_lock_allow_match_locked
+ */
+void ldlm_lock_allow_match(struct ldlm_lock *lock)
+{
+ lock_res_and_lock(lock);
+ ldlm_lock_allow_match_locked(lock);
+ unlock_res_and_lock(lock);
+}
+EXPORT_SYMBOL(ldlm_lock_allow_match);
+
+/**
+ * Attempt to find a lock with specified properties.
+ *
+ * Typically returns a reference to matched lock unless LDLM_FL_TEST_LOCK is
+ * set in \a flags
+ *
+ * Can be called in two ways:
+ *
+ * If 'ns' is NULL, then lockh describes an existing lock that we want to look
+ * for a duplicate of.
+ *
+ * Otherwise, all of the fields must be filled in, to match against.
+ *
+ * If 'flags' contains LDLM_FL_LOCAL_ONLY, then only match local locks on the
+ * server (ie, connh is NULL)
+ * If 'flags' contains LDLM_FL_BLOCK_GRANTED, then only locks on the granted
+ * list will be considered
+ * If 'flags' contains LDLM_FL_CBPENDING, then locks that have been marked
+ * to be canceled can still be matched as long as they still have reader
+ * or writer refernces
+ * If 'flags' contains LDLM_FL_TEST_LOCK, then don't actually reference a lock,
+ * just tell us if we would have matched.
+ *
+ * \retval 1 if it finds an already-existing lock that is compatible; in this
+ * case, lockh is filled in with a addref()ed lock
+ *
+ * We also check security context, and if that fails we simply return 0 (to
+ * keep caller code unchanged), the context failure will be discovered by
+ * caller sometime later.
+ */
+ldlm_mode_t ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
+ const struct ldlm_res_id *res_id, ldlm_type_t type,
+ ldlm_policy_data_t *policy, ldlm_mode_t mode,
+ struct lustre_handle *lockh, int unref)
+{
+ struct ldlm_resource *res;
+ struct ldlm_lock *lock, *old_lock = NULL;
+ int rc = 0;
+ ENTRY;
+
+ if (ns == NULL) {
+ old_lock = ldlm_handle2lock(lockh);
+ LASSERT(old_lock);
+
+ ns = ldlm_lock_to_ns(old_lock);
+ res_id = &old_lock->l_resource->lr_name;
+ type = old_lock->l_resource->lr_type;
+ mode = old_lock->l_req_mode;
+ }
+
+ res = ldlm_resource_get(ns, NULL, res_id, type, 0);
+ if (res == NULL) {
+ LASSERT(old_lock == NULL);
+ RETURN(0);
+ }
+
+ LDLM_RESOURCE_ADDREF(res);
+ lock_res(res);
+
+ lock = search_queue(&res->lr_granted, &mode, policy, old_lock,
+ flags, unref);
+ if (lock != NULL)
+ GOTO(out, rc = 1);
+ if (flags & LDLM_FL_BLOCK_GRANTED)
+ GOTO(out, rc = 0);
+ lock = search_queue(&res->lr_converting, &mode, policy, old_lock,
+ flags, unref);
+ if (lock != NULL)
+ GOTO(out, rc = 1);
+ lock = search_queue(&res->lr_waiting, &mode, policy, old_lock,
+ flags, unref);
+ if (lock != NULL)
+ GOTO(out, rc = 1);
+
+ EXIT;
+ out:
+ unlock_res(res);
+ LDLM_RESOURCE_DELREF(res);
+ ldlm_resource_putref(res);
+
+ if (lock) {
+ ldlm_lock2handle(lock, lockh);
+ if ((flags & LDLM_FL_LVB_READY) &&
+ (!(lock->l_flags & LDLM_FL_LVB_READY))) {
+ struct l_wait_info lwi;
+ if (lock->l_completion_ast) {
+ int err = lock->l_completion_ast(lock,
+ LDLM_FL_WAIT_NOREPROC,
+ NULL);
+ if (err) {
+ if (flags & LDLM_FL_TEST_LOCK)
+ LDLM_LOCK_RELEASE(lock);
+ else
+ ldlm_lock_decref_internal(lock,
+ mode);
+ rc = 0;
+ goto out2;
+ }
+ }
+
+ lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(obd_timeout),
+ NULL, LWI_ON_SIGNAL_NOOP, NULL);
+
+ /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */
+ l_wait_event(lock->l_waitq,
+ lock->l_flags & LDLM_FL_LVB_READY ||
+ lock->l_destroyed || lock->l_failed,
+ &lwi);
+ if (!(lock->l_flags & LDLM_FL_LVB_READY)) {
+ if (flags & LDLM_FL_TEST_LOCK)
+ LDLM_LOCK_RELEASE(lock);
+ else
+ ldlm_lock_decref_internal(lock, mode);
+ rc = 0;
+ }
+ }
+ }
+ out2:
+ if (rc) {
+ LDLM_DEBUG(lock, "matched ("LPU64" "LPU64")",
+ (type == LDLM_PLAIN || type == LDLM_IBITS) ?
+ res_id->name[2] : policy->l_extent.start,
+ (type == LDLM_PLAIN || type == LDLM_IBITS) ?
+ res_id->name[3] : policy->l_extent.end);
+
+ /* check user's security context */
+ if (lock->l_conn_export &&
+ sptlrpc_import_check_ctx(
+ class_exp2cliimp(lock->l_conn_export))) {
+ if (!(flags & LDLM_FL_TEST_LOCK))
+ ldlm_lock_decref_internal(lock, mode);
+ rc = 0;
+ }
+
+ if (flags & LDLM_FL_TEST_LOCK)
+ LDLM_LOCK_RELEASE(lock);
+
+ } else if (!(flags & LDLM_FL_TEST_LOCK)) {/*less verbose for test-only*/
+ LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res "
+ LPU64"/"LPU64" ("LPU64" "LPU64")", ns,
+ type, mode, res_id->name[0], res_id->name[1],
+ (type == LDLM_PLAIN || type == LDLM_IBITS) ?
+ res_id->name[2] :policy->l_extent.start,
+ (type == LDLM_PLAIN || type == LDLM_IBITS) ?
+ res_id->name[3] : policy->l_extent.end);
+ }
+ if (old_lock)
+ LDLM_LOCK_PUT(old_lock);
+
+ return rc ? mode : 0;
+}
+EXPORT_SYMBOL(ldlm_lock_match);
+
+ldlm_mode_t ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
+ __u64 *bits)
+{
+ struct ldlm_lock *lock;
+ ldlm_mode_t mode = 0;
+ ENTRY;
+
+ lock = ldlm_handle2lock(lockh);
+ if (lock != NULL) {
+ lock_res_and_lock(lock);
+ if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED ||
+ lock->l_failed)
+ GOTO(out, mode);
+
+ if (lock->l_flags & LDLM_FL_CBPENDING &&
+ lock->l_readers == 0 && lock->l_writers == 0)
+ GOTO(out, mode);
+
+ if (bits)
+ *bits = lock->l_policy_data.l_inodebits.bits;
+ mode = lock->l_granted_mode;
+ ldlm_lock_addref_internal_nolock(lock, mode);
+ }
+
+ EXIT;
+
+out:
+ if (lock != NULL) {
+ unlock_res_and_lock(lock);
+ LDLM_LOCK_PUT(lock);
+ }
+ return mode;
+}
+EXPORT_SYMBOL(ldlm_revalidate_lock_handle);
+
+/** The caller must guarantee that the buffer is large enough. */
+int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
+ enum req_location loc, void *data, int size)
+{
+ void *lvb;
+ ENTRY;
+
+ LASSERT(data != NULL);
+ LASSERT(size >= 0);
+
+ switch (lock->l_lvb_type) {
+ case LVB_T_OST:
+ if (size == sizeof(struct ost_lvb)) {
+ if (loc == RCL_CLIENT)
+ lvb = req_capsule_client_swab_get(pill,
+ &RMF_DLM_LVB,
+ lustre_swab_ost_lvb);
+ else
+ lvb = req_capsule_server_swab_get(pill,
+ &RMF_DLM_LVB,
+ lustre_swab_ost_lvb);
+ if (unlikely(lvb == NULL)) {
+ LDLM_ERROR(lock, "no LVB");
+ RETURN(-EPROTO);
+ }
+
+ memcpy(data, lvb, size);
+ } else if (size == sizeof(struct ost_lvb_v1)) {
+ struct ost_lvb *olvb = data;
+
+ if (loc == RCL_CLIENT)
+ lvb = req_capsule_client_swab_get(pill,
+ &RMF_DLM_LVB,
+ lustre_swab_ost_lvb_v1);
+ else
+ lvb = req_capsule_server_sized_swab_get(pill,
+ &RMF_DLM_LVB, size,
+ lustre_swab_ost_lvb_v1);
+ if (unlikely(lvb == NULL)) {
+ LDLM_ERROR(lock, "no LVB");
+ RETURN(-EPROTO);
+ }
+
+ memcpy(data, lvb, size);
+ olvb->lvb_mtime_ns = 0;
+ olvb->lvb_atime_ns = 0;
+ olvb->lvb_ctime_ns = 0;
+ } else {
+ LDLM_ERROR(lock, "Replied unexpected ost LVB size %d",
+ size);
+ RETURN(-EINVAL);
+ }
+ break;
+ case LVB_T_LQUOTA:
+ if (size == sizeof(struct lquota_lvb)) {
+ if (loc == RCL_CLIENT)
+ lvb = req_capsule_client_swab_get(pill,
+ &RMF_DLM_LVB,
+ lustre_swab_lquota_lvb);
+ else
+ lvb = req_capsule_server_swab_get(pill,
+ &RMF_DLM_LVB,
+ lustre_swab_lquota_lvb);
+ if (unlikely(lvb == NULL)) {
+ LDLM_ERROR(lock, "no LVB");
+ RETURN(-EPROTO);
+ }
+
+ memcpy(data, lvb, size);
+ } else {
+ LDLM_ERROR(lock, "Replied unexpected lquota LVB size %d",
+ size);
+ RETURN(-EINVAL);
+ }
+ break;
+ case LVB_T_LAYOUT:
+ if (size == 0)
+ break;
+
+ if (loc == RCL_CLIENT)
+ lvb = req_capsule_client_get(pill, &RMF_DLM_LVB);
+ else
+ lvb = req_capsule_server_get(pill, &RMF_DLM_LVB);
+ if (unlikely(lvb == NULL)) {
+ LDLM_ERROR(lock, "no LVB");
+ RETURN(-EPROTO);
+ }
+
+ memcpy(data, lvb, size);
+ break;
+ default:
+ LDLM_ERROR(lock, "Unknown LVB type: %d\n", lock->l_lvb_type);
+ libcfs_debug_dumpstack(NULL);
+ RETURN(-EINVAL);
+ }
+
+ RETURN(0);
+}
+
+/**
+ * Create and fill in new LDLM lock with specified properties.
+ * Returns a referenced lock
+ */
+struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
+ const struct ldlm_res_id *res_id,
+ ldlm_type_t type,
+ ldlm_mode_t mode,
+ const struct ldlm_callback_suite *cbs,
+ void *data, __u32 lvb_len,
+ enum lvb_type lvb_type)
+{
+ struct ldlm_lock *lock;
+ struct ldlm_resource *res;
+ ENTRY;
+
+ res = ldlm_resource_get(ns, NULL, res_id, type, 1);
+ if (res == NULL)
+ RETURN(NULL);
+
+ lock = ldlm_lock_new(res);
+
+ if (lock == NULL)
+ RETURN(NULL);
+
+ lock->l_req_mode = mode;
+ lock->l_ast_data = data;
+ lock->l_pid = current_pid();
+ lock->l_ns_srv = !!ns_is_server(ns);
+ if (cbs) {
+ lock->l_blocking_ast = cbs->lcs_blocking;
+ lock->l_completion_ast = cbs->lcs_completion;
+ lock->l_glimpse_ast = cbs->lcs_glimpse;
+ lock->l_weigh_ast = cbs->lcs_weigh;
+ }
+
+ lock->l_tree_node = NULL;
+ /* if this is the extent lock, allocate the interval tree node */
+ if (type == LDLM_EXTENT) {
+ if (ldlm_interval_alloc(lock) == NULL)
+ GOTO(out, 0);
+ }
+
+ if (lvb_len) {
+ lock->l_lvb_len = lvb_len;
+ OBD_ALLOC(lock->l_lvb_data, lvb_len);
+ if (lock->l_lvb_data == NULL)
+ GOTO(out, 0);
+ }
+
+ lock->l_lvb_type = lvb_type;
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_NEW_LOCK))
+ GOTO(out, 0);
+
+ RETURN(lock);
+
+out:
+ ldlm_lock_destroy(lock);
+ LDLM_LOCK_RELEASE(lock);
+ return NULL;
+}
+
+/**
+ * Enqueue (request) a lock.
+ *
+ * Does not block. As a result of enqueue the lock would be put
+ * into granted or waiting list.
+ *
+ * If namespace has intent policy sent and the lock has LDLM_FL_HAS_INTENT flag
+ * set, skip all the enqueueing and delegate lock processing to intent policy
+ * function.
+ */
+ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
+ struct ldlm_lock **lockp,
+ void *cookie, __u64 *flags)
+{
+ struct ldlm_lock *lock = *lockp;
+ struct ldlm_resource *res = lock->l_resource;
+ int local = ns_is_client(ldlm_res_to_ns(res));
+ ldlm_error_t rc = ELDLM_OK;
+ struct ldlm_interval *node = NULL;
+ ENTRY;
+
+ lock->l_last_activity = cfs_time_current_sec();
+ /* policies are not executed on the client or during replay */
+ if ((*flags & (LDLM_FL_HAS_INTENT|LDLM_FL_REPLAY)) == LDLM_FL_HAS_INTENT
+ && !local && ns->ns_policy) {
+ rc = ns->ns_policy(ns, lockp, cookie, lock->l_req_mode, *flags,
+ NULL);
+ if (rc == ELDLM_LOCK_REPLACED) {
+ /* The lock that was returned has already been granted,
+ * and placed into lockp. If it's not the same as the
+ * one we passed in, then destroy the old one and our
+ * work here is done. */
+ if (lock != *lockp) {
+ ldlm_lock_destroy(lock);
+ LDLM_LOCK_RELEASE(lock);
+ }
+ *flags |= LDLM_FL_LOCK_CHANGED;
+ RETURN(0);
+ } else if (rc != ELDLM_OK ||
+ (rc == ELDLM_OK && (*flags & LDLM_FL_INTENT_ONLY))) {
+ ldlm_lock_destroy(lock);
+ RETURN(rc);
+ }
+ }
+
+ /* For a replaying lock, it might be already in granted list. So
+ * unlinking the lock will cause the interval node to be freed, we
+ * have to allocate the interval node early otherwise we can't regrant
+ * this lock in the future. - jay */
+ if (!local && (*flags & LDLM_FL_REPLAY) && res->lr_type == LDLM_EXTENT)
+ OBD_SLAB_ALLOC_PTR_GFP(node, ldlm_interval_slab, __GFP_IO);
+
+ lock_res_and_lock(lock);
+ if (local && lock->l_req_mode == lock->l_granted_mode) {
+ /* The server returned a blocked lock, but it was granted
+ * before we got a chance to actually enqueue it. We don't
+ * need to do anything else. */
+ *flags &= ~(LDLM_FL_BLOCK_GRANTED |
+ LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
+ GOTO(out, ELDLM_OK);
+ }
+
+ ldlm_resource_unlink_lock(lock);
+ if (res->lr_type == LDLM_EXTENT && lock->l_tree_node == NULL) {
+ if (node == NULL) {
+ ldlm_lock_destroy_nolock(lock);
+ GOTO(out, rc = -ENOMEM);
+ }
+
+ INIT_LIST_HEAD(&node->li_group);
+ ldlm_interval_attach(node, lock);
+ node = NULL;
+ }
+
+ /* Some flags from the enqueue want to make it into the AST, via the
+ * lock's l_flags. */
+ lock->l_flags |= *flags & LDLM_AST_DISCARD_DATA;
+
+ /* This distinction between local lock trees is very important; a client
+ * namespace only has information about locks taken by that client, and
+ * thus doesn't have enough information to decide for itself if it can
+ * be granted (below). In this case, we do exactly what the server
+ * tells us to do, as dictated by the 'flags'.
+ *
+ * We do exactly the same thing during recovery, when the server is
+ * more or less trusting the clients not to lie.
+ *
+ * FIXME (bug 268): Detect obvious lies by checking compatibility in
+ * granted/converting queues. */
+ if (local) {
+ if (*flags & LDLM_FL_BLOCK_CONV)
+ ldlm_resource_add_lock(res, &res->lr_converting, lock);
+ else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
+ ldlm_resource_add_lock(res, &res->lr_waiting, lock);
+ else
+ ldlm_grant_lock(lock, NULL);
+ GOTO(out, ELDLM_OK);
+ } else {
+ CERROR("This is client-side-only module, cannot handle "
+ "LDLM_NAMESPACE_SERVER resource type lock.\n");
+ LBUG();
+ }
+
+out:
+ unlock_res_and_lock(lock);
+ if (node)
+ OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
+ return rc;
+}
+
+
+/**
+ * Process a call to blocking AST callback for a lock in ast_work list
+ */
+static int
+ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
+{
+ struct ldlm_cb_set_arg *arg = opaq;
+ struct ldlm_lock_desc d;
+ int rc;
+ struct ldlm_lock *lock;
+ ENTRY;
+
+ if (list_empty(arg->list))
+ RETURN(-ENOENT);
+
+ lock = list_entry(arg->list->next, struct ldlm_lock, l_bl_ast);
+
+ /* nobody should touch l_bl_ast */
+ lock_res_and_lock(lock);
+ list_del_init(&lock->l_bl_ast);
+
+ LASSERT(lock->l_flags & LDLM_FL_AST_SENT);
+ LASSERT(lock->l_bl_ast_run == 0);
+ LASSERT(lock->l_blocking_lock);
+ lock->l_bl_ast_run++;
+ unlock_res_and_lock(lock);
+
+ ldlm_lock2desc(lock->l_blocking_lock, &d);
+
+ rc = lock->l_blocking_ast(lock, &d, (void *)arg, LDLM_CB_BLOCKING);
+ LDLM_LOCK_RELEASE(lock->l_blocking_lock);
+ lock->l_blocking_lock = NULL;
+ LDLM_LOCK_RELEASE(lock);
+
+ RETURN(rc);
+}
+
+/**
+ * Process a call to completion AST callback for a lock in ast_work list
+ */
+static int
+ldlm_work_cp_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
+{
+ struct ldlm_cb_set_arg *arg = opaq;
+ int rc = 0;
+ struct ldlm_lock *lock;
+ ldlm_completion_callback completion_callback;
+ ENTRY;
+
+ if (list_empty(arg->list))
+ RETURN(-ENOENT);
+
+ lock = list_entry(arg->list->next, struct ldlm_lock, l_cp_ast);
+
+ /* It's possible to receive a completion AST before we've set
+ * the l_completion_ast pointer: either because the AST arrived
+ * before the reply, or simply because there's a small race
+ * window between receiving the reply and finishing the local
+ * enqueue. (bug 842)
+ *
+ * This can't happen with the blocking_ast, however, because we
+ * will never call the local blocking_ast until we drop our
+ * reader/writer reference, which we won't do until we get the
+ * reply and finish enqueueing. */
+
+ /* nobody should touch l_cp_ast */
+ lock_res_and_lock(lock);
+ list_del_init(&lock->l_cp_ast);
+ LASSERT(lock->l_flags & LDLM_FL_CP_REQD);
+ /* save l_completion_ast since it can be changed by
+ * mds_intent_policy(), see bug 14225 */
+ completion_callback = lock->l_completion_ast;
+ lock->l_flags &= ~LDLM_FL_CP_REQD;
+ unlock_res_and_lock(lock);
+
+ if (completion_callback != NULL)
+ rc = completion_callback(lock, 0, (void *)arg);
+ LDLM_LOCK_RELEASE(lock);
+
+ RETURN(rc);
+}
+
+/**
+ * Process a call to revocation AST callback for a lock in ast_work list
+ */
+static int
+ldlm_work_revoke_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
+{
+ struct ldlm_cb_set_arg *arg = opaq;
+ struct ldlm_lock_desc desc;
+ int rc;
+ struct ldlm_lock *lock;
+ ENTRY;
+
+ if (list_empty(arg->list))
+ RETURN(-ENOENT);
+
+ lock = list_entry(arg->list->next, struct ldlm_lock, l_rk_ast);
+ list_del_init(&lock->l_rk_ast);
+
+ /* the desc just pretend to exclusive */
+ ldlm_lock2desc(lock, &desc);
+ desc.l_req_mode = LCK_EX;
+ desc.l_granted_mode = 0;
+
+ rc = lock->l_blocking_ast(lock, &desc, (void*)arg, LDLM_CB_BLOCKING);
+ LDLM_LOCK_RELEASE(lock);
+
+ RETURN(rc);
+}
+
+/**
+ * Process a call to glimpse AST callback for a lock in ast_work list
+ */
+int ldlm_work_gl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
+{
+ struct ldlm_cb_set_arg *arg = opaq;
+ struct ldlm_glimpse_work *gl_work;
+ struct ldlm_lock *lock;
+ int rc = 0;
+ ENTRY;
+
+ if (list_empty(arg->list))
+ RETURN(-ENOENT);
+
+ gl_work = list_entry(arg->list->next, struct ldlm_glimpse_work,
+ gl_list);
+ list_del_init(&gl_work->gl_list);
+
+ lock = gl_work->gl_lock;
+
+ /* transfer the glimpse descriptor to ldlm_cb_set_arg */
+ arg->gl_desc = gl_work->gl_desc;
+
+ /* invoke the actual glimpse callback */
+ if (lock->l_glimpse_ast(lock, (void*)arg) == 0)
+ rc = 1;
+
+ LDLM_LOCK_RELEASE(lock);
+
+ if ((gl_work->gl_flags & LDLM_GL_WORK_NOFREE) == 0)
+ OBD_FREE_PTR(gl_work);
+
+ RETURN(rc);
+}
+
+/**
+ * Process list of locks in need of ASTs being sent.
+ *
+ * Used on server to send multiple ASTs together instead of sending one by
+ * one.
+ */
+int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list,
+ ldlm_desc_ast_t ast_type)
+{
+ struct ldlm_cb_set_arg *arg;
+ set_producer_func work_ast_lock;
+ int rc;
+
+ if (list_empty(rpc_list))
+ RETURN(0);
+
+ OBD_ALLOC_PTR(arg);
+ if (arg == NULL)
+ RETURN(-ENOMEM);
+
+ atomic_set(&arg->restart, 0);
+ arg->list = rpc_list;
+
+ switch (ast_type) {
+ case LDLM_WORK_BL_AST:
+ arg->type = LDLM_BL_CALLBACK;
+ work_ast_lock = ldlm_work_bl_ast_lock;
+ break;
+ case LDLM_WORK_CP_AST:
+ arg->type = LDLM_CP_CALLBACK;
+ work_ast_lock = ldlm_work_cp_ast_lock;
+ break;
+ case LDLM_WORK_REVOKE_AST:
+ arg->type = LDLM_BL_CALLBACK;
+ work_ast_lock = ldlm_work_revoke_ast_lock;
+ break;
+ case LDLM_WORK_GL_AST:
+ arg->type = LDLM_GL_CALLBACK;
+ work_ast_lock = ldlm_work_gl_ast_lock;
+ break;
+ default:
+ LBUG();
+ }
+
+ /* We create a ptlrpc request set with flow control extension.
+ * This request set will use the work_ast_lock function to produce new
+ * requests and will send a new request each time one completes in order
+ * to keep the number of requests in flight to ns_max_parallel_ast */
+ arg->set = ptlrpc_prep_fcset(ns->ns_max_parallel_ast ? : UINT_MAX,
+ work_ast_lock, arg);
+ if (arg->set == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ ptlrpc_set_wait(arg->set);
+ ptlrpc_set_destroy(arg->set);
+
+ rc = atomic_read(&arg->restart) ? -ERESTART : 0;
+ GOTO(out, rc);
+out:
+ OBD_FREE_PTR(arg);
+ return rc;
+}
+
+static int reprocess_one_queue(struct ldlm_resource *res, void *closure)
+{
+ ldlm_reprocess_all(res);
+ return LDLM_ITER_CONTINUE;
+}
+
+static int ldlm_reprocess_res(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *arg)
+{
+ struct ldlm_resource *res = cfs_hash_object(hs, hnode);
+ int rc;
+
+ rc = reprocess_one_queue(res, arg);
+
+ return rc == LDLM_ITER_STOP;
+}
+
+/**
+ * Iterate through all resources on a namespace attempting to grant waiting
+ * locks.
+ */
+void ldlm_reprocess_all_ns(struct ldlm_namespace *ns)
+{
+ ENTRY;
+
+ if (ns != NULL) {
+ cfs_hash_for_each_nolock(ns->ns_rs_hash,
+ ldlm_reprocess_res, NULL);
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(ldlm_reprocess_all_ns);
+
+/**
+ * Try to grant all waiting locks on a resource.
+ *
+ * Calls ldlm_reprocess_queue on converting and waiting queues.
+ *
+ * Typically called after some resource locks are cancelled to see
+ * if anything could be granted as a result of the cancellation.
+ */
+void ldlm_reprocess_all(struct ldlm_resource *res)
+{
+ LIST_HEAD(rpc_list);
+
+ ENTRY;
+ if (!ns_is_client(ldlm_res_to_ns(res))) {
+ CERROR("This is client-side-only module, cannot handle "
+ "LDLM_NAMESPACE_SERVER resource type lock.\n");
+ LBUG();
+ }
+ EXIT;
+}
+
+/**
+ * Helper function to call blocking AST for LDLM lock \a lock in a
+ * "cancelling" mode.
+ */
+void ldlm_cancel_callback(struct ldlm_lock *lock)
+{
+ check_res_locked(lock->l_resource);
+ if (!(lock->l_flags & LDLM_FL_CANCEL)) {
+ lock->l_flags |= LDLM_FL_CANCEL;
+ if (lock->l_blocking_ast) {
+ unlock_res_and_lock(lock);
+ lock->l_blocking_ast(lock, NULL, lock->l_ast_data,
+ LDLM_CB_CANCELING);
+ lock_res_and_lock(lock);
+ } else {
+ LDLM_DEBUG(lock, "no blocking ast");
+ }
+ }
+ lock->l_flags |= LDLM_FL_BL_DONE;
+}
+
+/**
+ * Remove skiplist-enabled LDLM lock \a req from granted list
+ */
+void ldlm_unlink_lock_skiplist(struct ldlm_lock *req)
+{
+ if (req->l_resource->lr_type != LDLM_PLAIN &&
+ req->l_resource->lr_type != LDLM_IBITS)
+ return;
+
+ list_del_init(&req->l_sl_policy);
+ list_del_init(&req->l_sl_mode);
+}
+
+/**
+ * Attempts to cancel LDLM lock \a lock that has no reader/writer references.
+ */
+void ldlm_lock_cancel(struct ldlm_lock *lock)
+{
+ struct ldlm_resource *res;
+ struct ldlm_namespace *ns;
+ ENTRY;
+
+ lock_res_and_lock(lock);
+
+ res = lock->l_resource;
+ ns = ldlm_res_to_ns(res);
+
+ /* Please do not, no matter how tempting, remove this LBUG without
+ * talking to me first. -phik */
+ if (lock->l_readers || lock->l_writers) {
+ LDLM_ERROR(lock, "lock still has references");
+ LBUG();
+ }
+
+ if (lock->l_waited)
+ ldlm_del_waiting_lock(lock);
+
+ /* Releases cancel callback. */
+ ldlm_cancel_callback(lock);
+
+ /* Yes, second time, just in case it was added again while we were
+ running with no res lock in ldlm_cancel_callback */
+ if (lock->l_waited)
+ ldlm_del_waiting_lock(lock);
+
+ ldlm_resource_unlink_lock(lock);
+ ldlm_lock_destroy_nolock(lock);
+
+ if (lock->l_granted_mode == lock->l_req_mode)
+ ldlm_pool_del(&ns->ns_pool, lock);
+
+ /* Make sure we will not be called again for same lock what is possible
+ * if not to zero out lock->l_granted_mode */
+ lock->l_granted_mode = LCK_MINMODE;
+ unlock_res_and_lock(lock);
+
+ EXIT;
+}
+EXPORT_SYMBOL(ldlm_lock_cancel);
+
+/**
+ * Set opaque data into the lock that only makes sense to upper layer.
+ */
+int ldlm_lock_set_data(struct lustre_handle *lockh, void *data)
+{
+ struct ldlm_lock *lock = ldlm_handle2lock(lockh);
+ int rc = -EINVAL;
+ ENTRY;
+
+ if (lock) {
+ if (lock->l_ast_data == NULL)
+ lock->l_ast_data = data;
+ if (lock->l_ast_data == data)
+ rc = 0;
+ LDLM_LOCK_PUT(lock);
+ }
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ldlm_lock_set_data);
+
+struct export_cl_data {
+ struct obd_export *ecl_exp;
+ int ecl_loop;
+};
+
+/**
+ * Iterator function for ldlm_cancel_locks_for_export.
+ * Cancels passed locks.
+ */
+int ldlm_cancel_locks_for_export_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *data)
+
+{
+ struct export_cl_data *ecl = (struct export_cl_data *)data;
+ struct obd_export *exp = ecl->ecl_exp;
+ struct ldlm_lock *lock = cfs_hash_object(hs, hnode);
+ struct ldlm_resource *res;
+
+ res = ldlm_resource_getref(lock->l_resource);
+ LDLM_LOCK_GET(lock);
+
+ LDLM_DEBUG(lock, "export %p", exp);
+ ldlm_res_lvbo_update(res, NULL, 1);
+ ldlm_lock_cancel(lock);
+ ldlm_reprocess_all(res);
+ ldlm_resource_putref(res);
+ LDLM_LOCK_RELEASE(lock);
+
+ ecl->ecl_loop++;
+ if ((ecl->ecl_loop & -ecl->ecl_loop) == ecl->ecl_loop) {
+ CDEBUG(D_INFO,
+ "Cancel lock %p for export %p (loop %d), still have "
+ "%d locks left on hash table.\n",
+ lock, exp, ecl->ecl_loop,
+ atomic_read(&hs->hs_count));
+ }
+
+ return 0;
+}
+
+/**
+ * Cancel all locks for given export.
+ *
+ * Typically called on client disconnection/eviction
+ */
+void ldlm_cancel_locks_for_export(struct obd_export *exp)
+{
+ struct export_cl_data ecl = {
+ .ecl_exp = exp,
+ .ecl_loop = 0,
+ };
+
+ cfs_hash_for_each_empty(exp->exp_lock_hash,
+ ldlm_cancel_locks_for_export_cb, &ecl);
+}
+
+/**
+ * Downgrade an exclusive lock.
+ *
+ * A fast variant of ldlm_lock_convert for convertion of exclusive
+ * locks. The convertion is always successful.
+ * Used by Commit on Sharing (COS) code.
+ *
+ * \param lock A lock to convert
+ * \param new_mode new lock mode
+ */
+void ldlm_lock_downgrade(struct ldlm_lock *lock, int new_mode)
+{
+ ENTRY;
+
+ LASSERT(lock->l_granted_mode & (LCK_PW | LCK_EX));
+ LASSERT(new_mode == LCK_COS);
+
+ lock_res_and_lock(lock);
+ ldlm_resource_unlink_lock(lock);
+ /*
+ * Remove the lock from pool as it will be added again in
+ * ldlm_grant_lock() called below.
+ */
+ ldlm_pool_del(&ldlm_lock_to_ns(lock)->ns_pool, lock);
+
+ lock->l_req_mode = new_mode;
+ ldlm_grant_lock(lock, NULL);
+ unlock_res_and_lock(lock);
+ ldlm_reprocess_all(lock->l_resource);
+
+ EXIT;
+}
+EXPORT_SYMBOL(ldlm_lock_downgrade);
+
+/**
+ * Attempt to convert already granted lock to a different mode.
+ *
+ * While lock conversion is not currently used, future client-side
+ * optimizations could take advantage of it to avoid discarding cached
+ * pages on a file.
+ */
+struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
+ __u32 *flags)
+{
+ LIST_HEAD(rpc_list);
+ struct ldlm_resource *res;
+ struct ldlm_namespace *ns;
+ int granted = 0;
+ struct ldlm_interval *node;
+ ENTRY;
+
+ /* Just return if mode is unchanged. */
+ if (new_mode == lock->l_granted_mode) {
+ *flags |= LDLM_FL_BLOCK_GRANTED;
+ RETURN(lock->l_resource);
+ }
+
+ /* I can't check the type of lock here because the bitlock of lock
+ * is not held here, so do the allocation blindly. -jay */
+ OBD_SLAB_ALLOC_PTR_GFP(node, ldlm_interval_slab, __GFP_IO);
+ if (node == NULL) /* Actually, this causes EDEADLOCK to be returned */
+ RETURN(NULL);
+
+ LASSERTF((new_mode == LCK_PW && lock->l_granted_mode == LCK_PR),
+ "new_mode %u, granted %u\n", new_mode, lock->l_granted_mode);
+
+ lock_res_and_lock(lock);
+
+ res = lock->l_resource;
+ ns = ldlm_res_to_ns(res);
+
+ lock->l_req_mode = new_mode;
+ if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS) {
+ ldlm_resource_unlink_lock(lock);
+ } else {
+ ldlm_resource_unlink_lock(lock);
+ if (res->lr_type == LDLM_EXTENT) {
+ /* FIXME: ugly code, I have to attach the lock to a
+ * interval node again since perhaps it will be granted
+ * soon */
+ INIT_LIST_HEAD(&node->li_group);
+ ldlm_interval_attach(node, lock);
+ node = NULL;
+ }
+ }
+
+ /*
+ * Remove old lock from the pool before adding the lock with new
+ * mode below in ->policy()
+ */
+ ldlm_pool_del(&ns->ns_pool, lock);
+
+ /* If this is a local resource, put it on the appropriate list. */
+ if (ns_is_client(ldlm_res_to_ns(res))) {
+ if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED)) {
+ ldlm_resource_add_lock(res, &res->lr_converting, lock);
+ } else {
+ /* This should never happen, because of the way the
+ * server handles conversions. */
+ LDLM_ERROR(lock, "Erroneous flags %x on local lock\n",
+ *flags);
+ LBUG();
+
+ ldlm_grant_lock(lock, &rpc_list);
+ granted = 1;
+ /* FIXME: completion handling not with lr_lock held ! */
+ if (lock->l_completion_ast)
+ lock->l_completion_ast(lock, 0, NULL);
+ }
+ } else {
+ CERROR("This is client-side-only module, cannot handle "
+ "LDLM_NAMESPACE_SERVER resource type lock.\n");
+ LBUG();
+ }
+ unlock_res_and_lock(lock);
+
+ if (granted)
+ ldlm_run_ast_work(ns, &rpc_list, LDLM_WORK_CP_AST);
+ if (node)
+ OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
+ RETURN(res);
+}
+EXPORT_SYMBOL(ldlm_lock_convert);
+
+/**
+ * Print lock with lock handle \a lockh description into debug log.
+ *
+ * Used when printing all locks on a resource for debug purposes.
+ */
+void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh)
+{
+ struct ldlm_lock *lock;
+
+ if (!((libcfs_debug | D_ERROR) & level))
+ return;
+
+ lock = ldlm_handle2lock(lockh);
+ if (lock == NULL)
+ return;
+
+ LDLM_DEBUG_LIMIT(level, lock, "###");
+
+ LDLM_LOCK_PUT(lock);
+}
+EXPORT_SYMBOL(ldlm_lock_dump_handle);
+
+/**
+ * Print lock information with custom message into debug log.
+ * Helper function.
+ */
+void _ldlm_lock_debug(struct ldlm_lock *lock,
+ struct libcfs_debug_msg_data *msgdata,
+ const char *fmt, ...)
+{
+ va_list args;
+ struct obd_export *exp = lock->l_export;
+ struct ldlm_resource *resource = lock->l_resource;
+ char *nid = "local";
+
+ va_start(args, fmt);
+
+ if (exp && exp->exp_connection) {
+ nid = libcfs_nid2str(exp->exp_connection->c_peer.nid);
+ } else if (exp && exp->exp_obd != NULL) {
+ struct obd_import *imp = exp->exp_obd->u.cli.cl_import;
+ nid = libcfs_nid2str(imp->imp_connection->c_peer.nid);
+ }
+
+ if (resource == NULL) {
+ libcfs_debug_vmsg2(msgdata, fmt, args,
+ " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+ "res: \?\? rrc=\?\? type: \?\?\? flags: "LPX64" nid: %s "
+ "remote: "LPX64" expref: %d pid: %u timeout: %lu "
+ "lvb_type: %d\n",
+ lock,
+ lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
+ lock->l_readers, lock->l_writers,
+ ldlm_lockname[lock->l_granted_mode],
+ ldlm_lockname[lock->l_req_mode],
+ lock->l_flags, nid, lock->l_remote_handle.cookie,
+ exp ? atomic_read(&exp->exp_refcount) : -99,
+ lock->l_pid, lock->l_callback_timeout, lock->l_lvb_type);
+ va_end(args);
+ return;
+ }
+
+ switch (resource->lr_type) {
+ case LDLM_EXTENT:
+ libcfs_debug_vmsg2(msgdata, fmt, args,
+ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+ "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64
+ "] (req "LPU64"->"LPU64") flags: "LPX64" nid: %s remote:"
+ " "LPX64" expref: %d pid: %u timeout: %lu lvb_type: %d\n",
+ ldlm_lock_to_ns_name(lock), lock,
+ lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
+ lock->l_readers, lock->l_writers,
+ ldlm_lockname[lock->l_granted_mode],
+ ldlm_lockname[lock->l_req_mode],
+ resource->lr_name.name[0],
+ resource->lr_name.name[1],
+ atomic_read(&resource->lr_refcount),
+ ldlm_typename[resource->lr_type],
+ lock->l_policy_data.l_extent.start,
+ lock->l_policy_data.l_extent.end,
+ lock->l_req_extent.start, lock->l_req_extent.end,
+ lock->l_flags, nid, lock->l_remote_handle.cookie,
+ exp ? atomic_read(&exp->exp_refcount) : -99,
+ lock->l_pid, lock->l_callback_timeout, lock->l_lvb_type);
+ break;
+
+ case LDLM_FLOCK:
+ libcfs_debug_vmsg2(msgdata, fmt, args,
+ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+ "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d "
+ "["LPU64"->"LPU64"] flags: "LPX64" nid: %s remote: "LPX64
+ " expref: %d pid: %u timeout: %lu\n",
+ ldlm_lock_to_ns_name(lock), lock,
+ lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
+ lock->l_readers, lock->l_writers,
+ ldlm_lockname[lock->l_granted_mode],
+ ldlm_lockname[lock->l_req_mode],
+ resource->lr_name.name[0],
+ resource->lr_name.name[1],
+ atomic_read(&resource->lr_refcount),
+ ldlm_typename[resource->lr_type],
+ lock->l_policy_data.l_flock.pid,
+ lock->l_policy_data.l_flock.start,
+ lock->l_policy_data.l_flock.end,
+ lock->l_flags, nid, lock->l_remote_handle.cookie,
+ exp ? atomic_read(&exp->exp_refcount) : -99,
+ lock->l_pid, lock->l_callback_timeout);
+ break;
+
+ case LDLM_IBITS:
+ libcfs_debug_vmsg2(msgdata, fmt, args,
+ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+ "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s "
+ "flags: "LPX64" nid: %s remote: "LPX64" expref: %d "
+ "pid: %u timeout: %lu lvb_type: %d\n",
+ ldlm_lock_to_ns_name(lock),
+ lock, lock->l_handle.h_cookie,
+ atomic_read (&lock->l_refc),
+ lock->l_readers, lock->l_writers,
+ ldlm_lockname[lock->l_granted_mode],
+ ldlm_lockname[lock->l_req_mode],
+ resource->lr_name.name[0],
+ resource->lr_name.name[1],
+ lock->l_policy_data.l_inodebits.bits,
+ atomic_read(&resource->lr_refcount),
+ ldlm_typename[resource->lr_type],
+ lock->l_flags, nid, lock->l_remote_handle.cookie,
+ exp ? atomic_read(&exp->exp_refcount) : -99,
+ lock->l_pid, lock->l_callback_timeout, lock->l_lvb_type);
+ break;
+
+ default:
+ libcfs_debug_vmsg2(msgdata, fmt, args,
+ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+ "res: "LPU64"/"LPU64" rrc: %d type: %s flags: "LPX64" "
+ "nid: %s remote: "LPX64" expref: %d pid: %u timeout: %lu"
+ "lvb_type: %d\n",
+ ldlm_lock_to_ns_name(lock),
+ lock, lock->l_handle.h_cookie,
+ atomic_read (&lock->l_refc),
+ lock->l_readers, lock->l_writers,
+ ldlm_lockname[lock->l_granted_mode],
+ ldlm_lockname[lock->l_req_mode],
+ resource->lr_name.name[0],
+ resource->lr_name.name[1],
+ atomic_read(&resource->lr_refcount),
+ ldlm_typename[resource->lr_type],
+ lock->l_flags, nid, lock->l_remote_handle.cookie,
+ exp ? atomic_read(&exp->exp_refcount) : -99,
+ lock->l_pid, lock->l_callback_timeout, lock->l_lvb_type);
+ break;
+ }
+ va_end(args);
+}
+EXPORT_SYMBOL(_ldlm_lock_debug);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
new file mode 100644
index 000000000000..324d5e4286dc
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
@@ -0,0 +1,1238 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ldlm/ldlm_lockd.c
+ *
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LDLM
+
+# include <linux/libcfs/libcfs.h>
+
+#include <lustre_dlm.h>
+#include <obd_class.h>
+#include <linux/list.h>
+#include "ldlm_internal.h"
+
+static int ldlm_num_threads;
+CFS_MODULE_PARM(ldlm_num_threads, "i", int, 0444,
+ "number of DLM service threads to start");
+
+static char *ldlm_cpts;
+CFS_MODULE_PARM(ldlm_cpts, "s", charp, 0444,
+ "CPU partitions ldlm threads should run on");
+
+extern struct kmem_cache *ldlm_resource_slab;
+extern struct kmem_cache *ldlm_lock_slab;
+static struct mutex ldlm_ref_mutex;
+static int ldlm_refcount;
+
+struct ldlm_cb_async_args {
+ struct ldlm_cb_set_arg *ca_set_arg;
+ struct ldlm_lock *ca_lock;
+};
+
+/* LDLM state */
+
+static struct ldlm_state *ldlm_state;
+
+inline cfs_time_t round_timeout(cfs_time_t timeout)
+{
+ return cfs_time_seconds((int)cfs_duration_sec(cfs_time_sub(timeout, 0)) + 1);
+}
+
+/* timeout for initial callback (AST) reply (bz10399) */
+static inline unsigned int ldlm_get_rq_timeout(void)
+{
+ /* Non-AT value */
+ unsigned int timeout = min(ldlm_timeout, obd_timeout / 3);
+
+ return timeout < 1 ? 1 : timeout;
+}
+
+#define ELT_STOPPED 0
+#define ELT_READY 1
+#define ELT_TERMINATE 2
+
+struct ldlm_bl_pool {
+ spinlock_t blp_lock;
+
+ /*
+ * blp_prio_list is used for callbacks that should be handled
+ * as a priority. It is used for LDLM_FL_DISCARD_DATA requests.
+ * see bug 13843
+ */
+ struct list_head blp_prio_list;
+
+ /*
+ * blp_list is used for all other callbacks which are likely
+ * to take longer to process.
+ */
+ struct list_head blp_list;
+
+ wait_queue_head_t blp_waitq;
+ struct completion blp_comp;
+ atomic_t blp_num_threads;
+ atomic_t blp_busy_threads;
+ int blp_min_threads;
+ int blp_max_threads;
+};
+
+struct ldlm_bl_work_item {
+ struct list_head blwi_entry;
+ struct ldlm_namespace *blwi_ns;
+ struct ldlm_lock_desc blwi_ld;
+ struct ldlm_lock *blwi_lock;
+ struct list_head blwi_head;
+ int blwi_count;
+ struct completion blwi_comp;
+ ldlm_cancel_flags_t blwi_flags;
+ int blwi_mem_pressure;
+};
+
+
+int ldlm_del_waiting_lock(struct ldlm_lock *lock)
+{
+ RETURN(0);
+}
+
+int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, int timeout)
+{
+ RETURN(0);
+}
+
+
+
+/**
+ * Callback handler for receiving incoming blocking ASTs.
+ *
+ * This can only happen on client side.
+ */
+void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
+ struct ldlm_lock_desc *ld, struct ldlm_lock *lock)
+{
+ int do_ast;
+ ENTRY;
+
+ LDLM_DEBUG(lock, "client blocking AST callback handler");
+
+ lock_res_and_lock(lock);
+ lock->l_flags |= LDLM_FL_CBPENDING;
+
+ if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)
+ lock->l_flags |= LDLM_FL_CANCEL;
+
+ do_ast = (!lock->l_readers && !lock->l_writers);
+ unlock_res_and_lock(lock);
+
+ if (do_ast) {
+ CDEBUG(D_DLMTRACE, "Lock %p already unused, calling callback (%p)\n",
+ lock, lock->l_blocking_ast);
+ if (lock->l_blocking_ast != NULL)
+ lock->l_blocking_ast(lock, ld, lock->l_ast_data,
+ LDLM_CB_BLOCKING);
+ } else {
+ CDEBUG(D_DLMTRACE, "Lock %p is referenced, will be cancelled later\n",
+ lock);
+ }
+
+ LDLM_DEBUG(lock, "client blocking callback handler END");
+ LDLM_LOCK_RELEASE(lock);
+ EXIT;
+}
+
+/**
+ * Callback handler for receiving incoming completion ASTs.
+ *
+ * This only can happen on client side.
+ */
+static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
+ struct ldlm_namespace *ns,
+ struct ldlm_request *dlm_req,
+ struct ldlm_lock *lock)
+{
+ int lvb_len;
+ LIST_HEAD(ast_list);
+ int rc = 0;
+ ENTRY;
+
+ LDLM_DEBUG(lock, "client completion callback handler START");
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) {
+ int to = cfs_time_seconds(1);
+ while (to > 0) {
+ schedule_timeout_and_set_state(
+ TASK_INTERRUPTIBLE, to);
+ if (lock->l_granted_mode == lock->l_req_mode ||
+ lock->l_destroyed)
+ break;
+ }
+ }
+
+ lvb_len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT);
+ if (lvb_len < 0) {
+ LDLM_ERROR(lock, "Fail to get lvb_len, rc = %d", lvb_len);
+ GOTO(out, rc = lvb_len);
+ } else if (lvb_len > 0) {
+ if (lock->l_lvb_len > 0) {
+ /* for extent lock, lvb contains ost_lvb{}. */
+ LASSERT(lock->l_lvb_data != NULL);
+
+ if (unlikely(lock->l_lvb_len < lvb_len)) {
+ LDLM_ERROR(lock, "Replied LVB is larger than "
+ "expectation, expected = %d, "
+ "replied = %d",
+ lock->l_lvb_len, lvb_len);
+ GOTO(out, rc = -EINVAL);
+ }
+ } else if (ldlm_has_layout(lock)) { /* for layout lock, lvb has
+ * variable length */
+ void *lvb_data;
+
+ OBD_ALLOC(lvb_data, lvb_len);
+ if (lvb_data == NULL) {
+ LDLM_ERROR(lock, "No memory: %d.\n", lvb_len);
+ GOTO(out, rc = -ENOMEM);
+ }
+
+ lock_res_and_lock(lock);
+ LASSERT(lock->l_lvb_data == NULL);
+ lock->l_lvb_data = lvb_data;
+ lock->l_lvb_len = lvb_len;
+ unlock_res_and_lock(lock);
+ }
+ }
+
+ lock_res_and_lock(lock);
+ if (lock->l_destroyed ||
+ lock->l_granted_mode == lock->l_req_mode) {
+ /* bug 11300: the lock has already been granted */
+ unlock_res_and_lock(lock);
+ LDLM_DEBUG(lock, "Double grant race happened");
+ GOTO(out, rc = 0);
+ }
+
+ /* If we receive the completion AST before the actual enqueue returned,
+ * then we might need to switch lock modes, resources, or extents. */
+ if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) {
+ lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
+ LDLM_DEBUG(lock, "completion AST, new lock mode");
+ }
+
+ if (lock->l_resource->lr_type != LDLM_PLAIN) {
+ ldlm_convert_policy_to_local(req->rq_export,
+ dlm_req->lock_desc.l_resource.lr_type,
+ &dlm_req->lock_desc.l_policy_data,
+ &lock->l_policy_data);
+ LDLM_DEBUG(lock, "completion AST, new policy data");
+ }
+
+ ldlm_resource_unlink_lock(lock);
+ if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
+ &lock->l_resource->lr_name,
+ sizeof(lock->l_resource->lr_name)) != 0) {
+ unlock_res_and_lock(lock);
+ rc = ldlm_lock_change_resource(ns, lock,
+ &dlm_req->lock_desc.l_resource.lr_name);
+ if (rc < 0) {
+ LDLM_ERROR(lock, "Failed to allocate resource");
+ GOTO(out, rc);
+ }
+ LDLM_DEBUG(lock, "completion AST, new resource");
+ CERROR("change resource!\n");
+ lock_res_and_lock(lock);
+ }
+
+ if (dlm_req->lock_flags & LDLM_FL_AST_SENT) {
+ /* BL_AST locks are not needed in LRU.
+ * Let ldlm_cancel_lru() be fast. */
+ ldlm_lock_remove_from_lru(lock);
+ lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
+ LDLM_DEBUG(lock, "completion AST includes blocking AST");
+ }
+
+ if (lock->l_lvb_len > 0) {
+ rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_CLIENT,
+ lock->l_lvb_data, lvb_len);
+ if (rc < 0) {
+ unlock_res_and_lock(lock);
+ GOTO(out, rc);
+ }
+ }
+
+ ldlm_grant_lock(lock, &ast_list);
+ unlock_res_and_lock(lock);
+
+ LDLM_DEBUG(lock, "callback handler finished, about to run_ast_work");
+
+ /* Let Enqueue to call osc_lock_upcall() and initialize
+ * l_ast_data */
+ OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 2);
+
+ ldlm_run_ast_work(ns, &ast_list, LDLM_WORK_CP_AST);
+
+ LDLM_DEBUG_NOLOCK("client completion callback handler END (lock %p)",
+ lock);
+ GOTO(out, rc);
+
+out:
+ if (rc < 0) {
+ lock_res_and_lock(lock);
+ lock->l_flags |= LDLM_FL_FAILED;
+ unlock_res_and_lock(lock);
+ wake_up(&lock->l_waitq);
+ }
+ LDLM_LOCK_RELEASE(lock);
+}
+
+/**
+ * Callback handler for receiving incoming glimpse ASTs.
+ *
+ * This only can happen on client side. After handling the glimpse AST
+ * we also consider dropping the lock here if it is unused locally for a
+ * long time.
+ */
+static void ldlm_handle_gl_callback(struct ptlrpc_request *req,
+ struct ldlm_namespace *ns,
+ struct ldlm_request *dlm_req,
+ struct ldlm_lock *lock)
+{
+ int rc = -ENOSYS;
+ ENTRY;
+
+ LDLM_DEBUG(lock, "client glimpse AST callback handler");
+
+ if (lock->l_glimpse_ast != NULL)
+ rc = lock->l_glimpse_ast(lock, req);
+
+ if (req->rq_repmsg != NULL) {
+ ptlrpc_reply(req);
+ } else {
+ req->rq_status = rc;
+ ptlrpc_error(req);
+ }
+
+ lock_res_and_lock(lock);
+ if (lock->l_granted_mode == LCK_PW &&
+ !lock->l_readers && !lock->l_writers &&
+ cfs_time_after(cfs_time_current(),
+ cfs_time_add(lock->l_last_used,
+ cfs_time_seconds(10)))) {
+ unlock_res_and_lock(lock);
+ if (ldlm_bl_to_thread_lock(ns, NULL, lock))
+ ldlm_handle_bl_callback(ns, NULL, lock);
+
+ EXIT;
+ return;
+ }
+ unlock_res_and_lock(lock);
+ LDLM_LOCK_RELEASE(lock);
+ EXIT;
+}
+
+static int ldlm_callback_reply(struct ptlrpc_request *req, int rc)
+{
+ if (req->rq_no_reply)
+ return 0;
+
+ req->rq_status = rc;
+ if (!req->rq_packed_final) {
+ rc = lustre_pack_reply(req, 1, NULL, NULL);
+ if (rc)
+ return rc;
+ }
+ return ptlrpc_reply(req);
+}
+
+static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi,
+ ldlm_cancel_flags_t cancel_flags)
+{
+ struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool;
+ ENTRY;
+
+ spin_lock(&blp->blp_lock);
+ if (blwi->blwi_lock &&
+ blwi->blwi_lock->l_flags & LDLM_FL_DISCARD_DATA) {
+ /* add LDLM_FL_DISCARD_DATA requests to the priority list */
+ list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list);
+ } else {
+ /* other blocking callbacks are added to the regular list */
+ list_add_tail(&blwi->blwi_entry, &blp->blp_list);
+ }
+ spin_unlock(&blp->blp_lock);
+
+ wake_up(&blp->blp_waitq);
+
+ /* can not check blwi->blwi_flags as blwi could be already freed in
+ LCF_ASYNC mode */
+ if (!(cancel_flags & LCF_ASYNC))
+ wait_for_completion(&blwi->blwi_comp);
+
+ RETURN(0);
+}
+
+static inline void init_blwi(struct ldlm_bl_work_item *blwi,
+ struct ldlm_namespace *ns,
+ struct ldlm_lock_desc *ld,
+ struct list_head *cancels, int count,
+ struct ldlm_lock *lock,
+ ldlm_cancel_flags_t cancel_flags)
+{
+ init_completion(&blwi->blwi_comp);
+ INIT_LIST_HEAD(&blwi->blwi_head);
+
+ if (memory_pressure_get())
+ blwi->blwi_mem_pressure = 1;
+
+ blwi->blwi_ns = ns;
+ blwi->blwi_flags = cancel_flags;
+ if (ld != NULL)
+ blwi->blwi_ld = *ld;
+ if (count) {
+ list_add(&blwi->blwi_head, cancels);
+ list_del_init(cancels);
+ blwi->blwi_count = count;
+ } else {
+ blwi->blwi_lock = lock;
+ }
+}
+
+/**
+ * Queues a list of locks \a cancels containing \a count locks
+ * for later processing by a blocking thread. If \a count is zero,
+ * then the lock referenced as \a lock is queued instead.
+ *
+ * The blocking thread would then call ->l_blocking_ast callback in the lock.
+ * If list addition fails an error is returned and caller is supposed to
+ * call ->l_blocking_ast itself.
+ */
+static int ldlm_bl_to_thread(struct ldlm_namespace *ns,
+ struct ldlm_lock_desc *ld,
+ struct ldlm_lock *lock,
+ struct list_head *cancels, int count,
+ ldlm_cancel_flags_t cancel_flags)
+{
+ ENTRY;
+
+ if (cancels && count == 0)
+ RETURN(0);
+
+ if (cancel_flags & LCF_ASYNC) {
+ struct ldlm_bl_work_item *blwi;
+
+ OBD_ALLOC(blwi, sizeof(*blwi));
+ if (blwi == NULL)
+ RETURN(-ENOMEM);
+ init_blwi(blwi, ns, ld, cancels, count, lock, cancel_flags);
+
+ RETURN(__ldlm_bl_to_thread(blwi, cancel_flags));
+ } else {
+ /* if it is synchronous call do minimum mem alloc, as it could
+ * be triggered from kernel shrinker
+ */
+ struct ldlm_bl_work_item blwi;
+
+ memset(&blwi, 0, sizeof(blwi));
+ init_blwi(&blwi, ns, ld, cancels, count, lock, cancel_flags);
+ RETURN(__ldlm_bl_to_thread(&blwi, cancel_flags));
+ }
+}
+
+
+int ldlm_bl_to_thread_lock(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
+ struct ldlm_lock *lock)
+{
+ return ldlm_bl_to_thread(ns, ld, lock, NULL, 0, LCF_ASYNC);
+}
+
+int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
+ struct list_head *cancels, int count,
+ ldlm_cancel_flags_t cancel_flags)
+{
+ return ldlm_bl_to_thread(ns, ld, NULL, cancels, count, cancel_flags);
+}
+
+/* Setinfo coming from Server (eg MDT) to Client (eg MDC)! */
+static int ldlm_handle_setinfo(struct ptlrpc_request *req)
+{
+ struct obd_device *obd = req->rq_export->exp_obd;
+ char *key;
+ void *val;
+ int keylen, vallen;
+ int rc = -ENOSYS;
+ ENTRY;
+
+ DEBUG_REQ(D_HSM, req, "%s: handle setinfo\n", obd->obd_name);
+
+ req_capsule_set(&req->rq_pill, &RQF_OBD_SET_INFO);
+
+ key = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
+ if (key == NULL) {
+ DEBUG_REQ(D_IOCTL, req, "no set_info key");
+ RETURN(-EFAULT);
+ }
+ keylen = req_capsule_get_size(&req->rq_pill, &RMF_SETINFO_KEY,
+ RCL_CLIENT);
+ val = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_VAL);
+ if (val == NULL) {
+ DEBUG_REQ(D_IOCTL, req, "no set_info val");
+ RETURN(-EFAULT);
+ }
+ vallen = req_capsule_get_size(&req->rq_pill, &RMF_SETINFO_VAL,
+ RCL_CLIENT);
+
+ /* We are responsible for swabbing contents of val */
+
+ if (KEY_IS(KEY_HSM_COPYTOOL_SEND))
+ /* Pass it on to mdc (the "export" in this case) */
+ rc = obd_set_info_async(req->rq_svc_thread->t_env,
+ req->rq_export,
+ sizeof(KEY_HSM_COPYTOOL_SEND),
+ KEY_HSM_COPYTOOL_SEND,
+ vallen, val, NULL);
+ else
+ DEBUG_REQ(D_WARNING, req, "ignoring unknown key %s", key);
+
+ return rc;
+}
+
+static inline void ldlm_callback_errmsg(struct ptlrpc_request *req,
+ const char *msg, int rc,
+ struct lustre_handle *handle)
+{
+ DEBUG_REQ((req->rq_no_reply || rc) ? D_WARNING : D_DLMTRACE, req,
+ "%s: [nid %s] [rc %d] [lock "LPX64"]",
+ msg, libcfs_id2str(req->rq_peer), rc,
+ handle ? handle->cookie : 0);
+ if (req->rq_no_reply)
+ CWARN("No reply was sent, maybe cause bug 21636.\n");
+ else if (rc)
+ CWARN("Send reply failed, maybe cause bug 21636.\n");
+}
+
+static int ldlm_handle_qc_callback(struct ptlrpc_request *req)
+{
+ struct obd_quotactl *oqctl;
+ struct client_obd *cli = &req->rq_export->exp_obd->u.cli;
+
+ oqctl = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
+ if (oqctl == NULL) {
+ CERROR("Can't unpack obd_quotactl\n");
+ RETURN(-EPROTO);
+ }
+
+ cli->cl_qchk_stat = oqctl->qc_stat;
+ return 0;
+}
+
+/* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */
+static int ldlm_callback_handler(struct ptlrpc_request *req)
+{
+ struct ldlm_namespace *ns;
+ struct ldlm_request *dlm_req;
+ struct ldlm_lock *lock;
+ int rc;
+ ENTRY;
+
+ /* Requests arrive in sender's byte order. The ptlrpc service
+ * handler has already checked and, if necessary, byte-swapped the
+ * incoming request message body, but I am responsible for the
+ * message buffers. */
+
+ /* do nothing for sec context finalize */
+ if (lustre_msg_get_opc(req->rq_reqmsg) == SEC_CTX_FINI)
+ RETURN(0);
+
+ req_capsule_init(&req->rq_pill, req, RCL_SERVER);
+
+ if (req->rq_export == NULL) {
+ rc = ldlm_callback_reply(req, -ENOTCONN);
+ ldlm_callback_errmsg(req, "Operate on unconnected server",
+ rc, NULL);
+ RETURN(0);
+ }
+
+ LASSERT(req->rq_export != NULL);
+ LASSERT(req->rq_export->exp_obd != NULL);
+
+ switch (lustre_msg_get_opc(req->rq_reqmsg)) {
+ case LDLM_BL_CALLBACK:
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+ RETURN(0);
+ break;
+ case LDLM_CP_CALLBACK:
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CP_CALLBACK_NET))
+ RETURN(0);
+ break;
+ case LDLM_GL_CALLBACK:
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_GL_CALLBACK_NET))
+ RETURN(0);
+ break;
+ case LDLM_SET_INFO:
+ rc = ldlm_handle_setinfo(req);
+ ldlm_callback_reply(req, rc);
+ RETURN(0);
+ case OBD_LOG_CANCEL: /* remove this eventually - for 1.4.0 compat */
+ CERROR("shouldn't be handling OBD_LOG_CANCEL on DLM thread\n");
+ req_capsule_set(&req->rq_pill, &RQF_LOG_CANCEL);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET))
+ RETURN(0);
+ rc = llog_origin_handle_cancel(req);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP))
+ RETURN(0);
+ ldlm_callback_reply(req, rc);
+ RETURN(0);
+ case LLOG_ORIGIN_HANDLE_CREATE:
+ req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_CREATE);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
+ RETURN(0);
+ rc = llog_origin_handle_open(req);
+ ldlm_callback_reply(req, rc);
+ RETURN(0);
+ case LLOG_ORIGIN_HANDLE_NEXT_BLOCK:
+ req_capsule_set(&req->rq_pill,
+ &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
+ RETURN(0);
+ rc = llog_origin_handle_next_block(req);
+ ldlm_callback_reply(req, rc);
+ RETURN(0);
+ case LLOG_ORIGIN_HANDLE_READ_HEADER:
+ req_capsule_set(&req->rq_pill,
+ &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
+ RETURN(0);
+ rc = llog_origin_handle_read_header(req);
+ ldlm_callback_reply(req, rc);
+ RETURN(0);
+ case LLOG_ORIGIN_HANDLE_CLOSE:
+ if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
+ RETURN(0);
+ rc = llog_origin_handle_close(req);
+ ldlm_callback_reply(req, rc);
+ RETURN(0);
+ case OBD_QC_CALLBACK:
+ req_capsule_set(&req->rq_pill, &RQF_QC_CALLBACK);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OBD_QC_CALLBACK_NET))
+ RETURN(0);
+ rc = ldlm_handle_qc_callback(req);
+ ldlm_callback_reply(req, rc);
+ RETURN(0);
+ default:
+ CERROR("unknown opcode %u\n",
+ lustre_msg_get_opc(req->rq_reqmsg));
+ ldlm_callback_reply(req, -EPROTO);
+ RETURN(0);
+ }
+
+ ns = req->rq_export->exp_obd->obd_namespace;
+ LASSERT(ns != NULL);
+
+ req_capsule_set(&req->rq_pill, &RQF_LDLM_CALLBACK);
+
+ dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
+ if (dlm_req == NULL) {
+ rc = ldlm_callback_reply(req, -EPROTO);
+ ldlm_callback_errmsg(req, "Operate without parameter", rc,
+ NULL);
+ RETURN(0);
+ }
+
+ /* Force a known safe race, send a cancel to the server for a lock
+ * which the server has already started a blocking callback on. */
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE) &&
+ lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
+ rc = ldlm_cli_cancel(&dlm_req->lock_handle[0], 0);
+ if (rc < 0)
+ CERROR("ldlm_cli_cancel: %d\n", rc);
+ }
+
+ lock = ldlm_handle2lock_long(&dlm_req->lock_handle[0], 0);
+ if (!lock) {
+ CDEBUG(D_DLMTRACE, "callback on lock "LPX64" - lock "
+ "disappeared\n", dlm_req->lock_handle[0].cookie);
+ rc = ldlm_callback_reply(req, -EINVAL);
+ ldlm_callback_errmsg(req, "Operate with invalid parameter", rc,
+ &dlm_req->lock_handle[0]);
+ RETURN(0);
+ }
+
+ if ((lock->l_flags & LDLM_FL_FAIL_LOC) &&
+ lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK)
+ OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
+
+ /* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */
+ lock_res_and_lock(lock);
+ lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags &
+ LDLM_AST_FLAGS);
+ if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
+ /* If somebody cancels lock and cache is already dropped,
+ * or lock is failed before cp_ast received on client,
+ * we can tell the server we have no lock. Otherwise, we
+ * should send cancel after dropping the cache. */
+ if (((lock->l_flags & LDLM_FL_CANCELING) &&
+ (lock->l_flags & LDLM_FL_BL_DONE)) ||
+ (lock->l_flags & LDLM_FL_FAILED)) {
+ LDLM_DEBUG(lock, "callback on lock "
+ LPX64" - lock disappeared\n",
+ dlm_req->lock_handle[0].cookie);
+ unlock_res_and_lock(lock);
+ LDLM_LOCK_RELEASE(lock);
+ rc = ldlm_callback_reply(req, -EINVAL);
+ ldlm_callback_errmsg(req, "Operate on stale lock", rc,
+ &dlm_req->lock_handle[0]);
+ RETURN(0);
+ }
+ /* BL_AST locks are not needed in LRU.
+ * Let ldlm_cancel_lru() be fast. */
+ ldlm_lock_remove_from_lru(lock);
+ lock->l_flags |= LDLM_FL_BL_AST;
+ }
+ unlock_res_and_lock(lock);
+
+ /* We want the ost thread to get this reply so that it can respond
+ * to ost requests (write cache writeback) that might be triggered
+ * in the callback.
+ *
+ * But we'd also like to be able to indicate in the reply that we're
+ * cancelling right now, because it's unused, or have an intent result
+ * in the reply, so we might have to push the responsibility for sending
+ * the reply down into the AST handlers, alas. */
+
+ switch (lustre_msg_get_opc(req->rq_reqmsg)) {
+ case LDLM_BL_CALLBACK:
+ CDEBUG(D_INODE, "blocking ast\n");
+ req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK);
+ if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)) {
+ rc = ldlm_callback_reply(req, 0);
+ if (req->rq_no_reply || rc)
+ ldlm_callback_errmsg(req, "Normal process", rc,
+ &dlm_req->lock_handle[0]);
+ }
+ if (ldlm_bl_to_thread_lock(ns, &dlm_req->lock_desc, lock))
+ ldlm_handle_bl_callback(ns, &dlm_req->lock_desc, lock);
+ break;
+ case LDLM_CP_CALLBACK:
+ CDEBUG(D_INODE, "completion ast\n");
+ req_capsule_extend(&req->rq_pill, &RQF_LDLM_CP_CALLBACK);
+ ldlm_callback_reply(req, 0);
+ ldlm_handle_cp_callback(req, ns, dlm_req, lock);
+ break;
+ case LDLM_GL_CALLBACK:
+ CDEBUG(D_INODE, "glimpse ast\n");
+ req_capsule_extend(&req->rq_pill, &RQF_LDLM_GL_CALLBACK);
+ ldlm_handle_gl_callback(req, ns, dlm_req, lock);
+ break;
+ default:
+ LBUG(); /* checked above */
+ }
+
+ RETURN(0);
+}
+
+
+static struct ldlm_bl_work_item *ldlm_bl_get_work(struct ldlm_bl_pool *blp)
+{
+ struct ldlm_bl_work_item *blwi = NULL;
+ static unsigned int num_bl = 0;
+
+ spin_lock(&blp->blp_lock);
+ /* process a request from the blp_list at least every blp_num_threads */
+ if (!list_empty(&blp->blp_list) &&
+ (list_empty(&blp->blp_prio_list) || num_bl == 0))
+ blwi = list_entry(blp->blp_list.next,
+ struct ldlm_bl_work_item, blwi_entry);
+ else
+ if (!list_empty(&blp->blp_prio_list))
+ blwi = list_entry(blp->blp_prio_list.next,
+ struct ldlm_bl_work_item,
+ blwi_entry);
+
+ if (blwi) {
+ if (++num_bl >= atomic_read(&blp->blp_num_threads))
+ num_bl = 0;
+ list_del(&blwi->blwi_entry);
+ }
+ spin_unlock(&blp->blp_lock);
+
+ return blwi;
+}
+
+/* This only contains temporary data until the thread starts */
+struct ldlm_bl_thread_data {
+ char bltd_name[CFS_CURPROC_COMM_MAX];
+ struct ldlm_bl_pool *bltd_blp;
+ struct completion bltd_comp;
+ int bltd_num;
+};
+
+static int ldlm_bl_thread_main(void *arg);
+
+static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp)
+{
+ struct ldlm_bl_thread_data bltd = { .bltd_blp = blp };
+ task_t *task;
+
+ init_completion(&bltd.bltd_comp);
+ bltd.bltd_num = atomic_read(&blp->blp_num_threads);
+ snprintf(bltd.bltd_name, sizeof(bltd.bltd_name) - 1,
+ "ldlm_bl_%02d", bltd.bltd_num);
+ task = kthread_run(ldlm_bl_thread_main, &bltd, bltd.bltd_name);
+ if (IS_ERR(task)) {
+ CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %ld\n",
+ atomic_read(&blp->blp_num_threads), PTR_ERR(task));
+ return PTR_ERR(task);
+ }
+ wait_for_completion(&bltd.bltd_comp);
+
+ return 0;
+}
+
+/**
+ * Main blocking requests processing thread.
+ *
+ * Callers put locks into its queue by calling ldlm_bl_to_thread.
+ * This thread in the end ends up doing actual call to ->l_blocking_ast
+ * for queued locks.
+ */
+static int ldlm_bl_thread_main(void *arg)
+{
+ struct ldlm_bl_pool *blp;
+ ENTRY;
+
+ {
+ struct ldlm_bl_thread_data *bltd = arg;
+
+ blp = bltd->bltd_blp;
+
+ atomic_inc(&blp->blp_num_threads);
+ atomic_inc(&blp->blp_busy_threads);
+
+ complete(&bltd->bltd_comp);
+ /* cannot use bltd after this, it is only on caller's stack */
+ }
+
+ while (1) {
+ struct l_wait_info lwi = { 0 };
+ struct ldlm_bl_work_item *blwi = NULL;
+ int busy;
+
+ blwi = ldlm_bl_get_work(blp);
+
+ if (blwi == NULL) {
+ atomic_dec(&blp->blp_busy_threads);
+ l_wait_event_exclusive(blp->blp_waitq,
+ (blwi = ldlm_bl_get_work(blp)) != NULL,
+ &lwi);
+ busy = atomic_inc_return(&blp->blp_busy_threads);
+ } else {
+ busy = atomic_read(&blp->blp_busy_threads);
+ }
+
+ if (blwi->blwi_ns == NULL)
+ /* added by ldlm_cleanup() */
+ break;
+
+ /* Not fatal if racy and have a few too many threads */
+ if (unlikely(busy < blp->blp_max_threads &&
+ busy >= atomic_read(&blp->blp_num_threads) &&
+ !blwi->blwi_mem_pressure))
+ /* discard the return value, we tried */
+ ldlm_bl_thread_start(blp);
+
+ if (blwi->blwi_mem_pressure)
+ memory_pressure_set();
+
+ if (blwi->blwi_count) {
+ int count;
+ /* The special case when we cancel locks in LRU
+ * asynchronously, we pass the list of locks here.
+ * Thus locks are marked LDLM_FL_CANCELING, but NOT
+ * canceled locally yet. */
+ count = ldlm_cli_cancel_list_local(&blwi->blwi_head,
+ blwi->blwi_count,
+ LCF_BL_AST);
+ ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL,
+ blwi->blwi_flags);
+ } else {
+ ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld,
+ blwi->blwi_lock);
+ }
+ if (blwi->blwi_mem_pressure)
+ memory_pressure_clr();
+
+ if (blwi->blwi_flags & LCF_ASYNC)
+ OBD_FREE(blwi, sizeof(*blwi));
+ else
+ complete(&blwi->blwi_comp);
+ }
+
+ atomic_dec(&blp->blp_busy_threads);
+ atomic_dec(&blp->blp_num_threads);
+ complete(&blp->blp_comp);
+ RETURN(0);
+}
+
+
+static int ldlm_setup(void);
+static int ldlm_cleanup(void);
+
+int ldlm_get_ref(void)
+{
+ int rc = 0;
+ ENTRY;
+ mutex_lock(&ldlm_ref_mutex);
+ if (++ldlm_refcount == 1) {
+ rc = ldlm_setup();
+ if (rc)
+ ldlm_refcount--;
+ }
+ mutex_unlock(&ldlm_ref_mutex);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ldlm_get_ref);
+
+void ldlm_put_ref(void)
+{
+ ENTRY;
+ mutex_lock(&ldlm_ref_mutex);
+ if (ldlm_refcount == 1) {
+ int rc = ldlm_cleanup();
+ if (rc)
+ CERROR("ldlm_cleanup failed: %d\n", rc);
+ else
+ ldlm_refcount--;
+ } else {
+ ldlm_refcount--;
+ }
+ mutex_unlock(&ldlm_ref_mutex);
+
+ EXIT;
+}
+EXPORT_SYMBOL(ldlm_put_ref);
+
+/*
+ * Export handle<->lock hash operations.
+ */
+static unsigned
+ldlm_export_lock_hash(cfs_hash_t *hs, const void *key, unsigned mask)
+{
+ return cfs_hash_u64_hash(((struct lustre_handle *)key)->cookie, mask);
+}
+
+static void *
+ldlm_export_lock_key(struct hlist_node *hnode)
+{
+ struct ldlm_lock *lock;
+
+ lock = hlist_entry(hnode, struct ldlm_lock, l_exp_hash);
+ return &lock->l_remote_handle;
+}
+
+static void
+ldlm_export_lock_keycpy(struct hlist_node *hnode, void *key)
+{
+ struct ldlm_lock *lock;
+
+ lock = hlist_entry(hnode, struct ldlm_lock, l_exp_hash);
+ lock->l_remote_handle = *(struct lustre_handle *)key;
+}
+
+static int
+ldlm_export_lock_keycmp(const void *key, struct hlist_node *hnode)
+{
+ return lustre_handle_equal(ldlm_export_lock_key(hnode), key);
+}
+
+static void *
+ldlm_export_lock_object(struct hlist_node *hnode)
+{
+ return hlist_entry(hnode, struct ldlm_lock, l_exp_hash);
+}
+
+static void
+ldlm_export_lock_get(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct ldlm_lock *lock;
+
+ lock = hlist_entry(hnode, struct ldlm_lock, l_exp_hash);
+ LDLM_LOCK_GET(lock);
+}
+
+static void
+ldlm_export_lock_put(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct ldlm_lock *lock;
+
+ lock = hlist_entry(hnode, struct ldlm_lock, l_exp_hash);
+ LDLM_LOCK_RELEASE(lock);
+}
+
+static cfs_hash_ops_t ldlm_export_lock_ops = {
+ .hs_hash = ldlm_export_lock_hash,
+ .hs_key = ldlm_export_lock_key,
+ .hs_keycmp = ldlm_export_lock_keycmp,
+ .hs_keycpy = ldlm_export_lock_keycpy,
+ .hs_object = ldlm_export_lock_object,
+ .hs_get = ldlm_export_lock_get,
+ .hs_put = ldlm_export_lock_put,
+ .hs_put_locked = ldlm_export_lock_put,
+};
+
+int ldlm_init_export(struct obd_export *exp)
+{
+ ENTRY;
+
+ exp->exp_lock_hash =
+ cfs_hash_create(obd_uuid2str(&exp->exp_client_uuid),
+ HASH_EXP_LOCK_CUR_BITS,
+ HASH_EXP_LOCK_MAX_BITS,
+ HASH_EXP_LOCK_BKT_BITS, 0,
+ CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
+ &ldlm_export_lock_ops,
+ CFS_HASH_DEFAULT | CFS_HASH_REHASH_KEY |
+ CFS_HASH_NBLK_CHANGE);
+
+ if (!exp->exp_lock_hash)
+ RETURN(-ENOMEM);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(ldlm_init_export);
+
+void ldlm_destroy_export(struct obd_export *exp)
+{
+ ENTRY;
+ cfs_hash_putref(exp->exp_lock_hash);
+ exp->exp_lock_hash = NULL;
+
+ ldlm_destroy_flock_export(exp);
+ EXIT;
+}
+EXPORT_SYMBOL(ldlm_destroy_export);
+
+static int ldlm_setup(void)
+{
+ static struct ptlrpc_service_conf conf;
+ struct ldlm_bl_pool *blp = NULL;
+ int rc = 0;
+ int i;
+ ENTRY;
+
+ if (ldlm_state != NULL)
+ RETURN(-EALREADY);
+
+ OBD_ALLOC(ldlm_state, sizeof(*ldlm_state));
+ if (ldlm_state == NULL)
+ RETURN(-ENOMEM);
+
+#ifdef LPROCFS
+ rc = ldlm_proc_setup();
+ if (rc != 0)
+ GOTO(out, rc);
+#endif
+
+ memset(&conf, 0, sizeof(conf));
+ conf = (typeof(conf)) {
+ .psc_name = "ldlm_cbd",
+ .psc_watchdog_factor = 2,
+ .psc_buf = {
+ .bc_nbufs = LDLM_CLIENT_NBUFS,
+ .bc_buf_size = LDLM_BUFSIZE,
+ .bc_req_max_size = LDLM_MAXREQSIZE,
+ .bc_rep_max_size = LDLM_MAXREPSIZE,
+ .bc_req_portal = LDLM_CB_REQUEST_PORTAL,
+ .bc_rep_portal = LDLM_CB_REPLY_PORTAL,
+ },
+ .psc_thr = {
+ .tc_thr_name = "ldlm_cb",
+ .tc_thr_factor = LDLM_THR_FACTOR,
+ .tc_nthrs_init = LDLM_NTHRS_INIT,
+ .tc_nthrs_base = LDLM_NTHRS_BASE,
+ .tc_nthrs_max = LDLM_NTHRS_MAX,
+ .tc_nthrs_user = ldlm_num_threads,
+ .tc_cpu_affinity = 1,
+ .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD,
+ },
+ .psc_cpt = {
+ .cc_pattern = ldlm_cpts,
+ },
+ .psc_ops = {
+ .so_req_handler = ldlm_callback_handler,
+ },
+ };
+ ldlm_state->ldlm_cb_service = \
+ ptlrpc_register_service(&conf, ldlm_svc_proc_dir);
+ if (IS_ERR(ldlm_state->ldlm_cb_service)) {
+ CERROR("failed to start service\n");
+ rc = PTR_ERR(ldlm_state->ldlm_cb_service);
+ ldlm_state->ldlm_cb_service = NULL;
+ GOTO(out, rc);
+ }
+
+
+ OBD_ALLOC(blp, sizeof(*blp));
+ if (blp == NULL)
+ GOTO(out, rc = -ENOMEM);
+ ldlm_state->ldlm_bl_pool = blp;
+
+ spin_lock_init(&blp->blp_lock);
+ INIT_LIST_HEAD(&blp->blp_list);
+ INIT_LIST_HEAD(&blp->blp_prio_list);
+ init_waitqueue_head(&blp->blp_waitq);
+ atomic_set(&blp->blp_num_threads, 0);
+ atomic_set(&blp->blp_busy_threads, 0);
+
+ if (ldlm_num_threads == 0) {
+ blp->blp_min_threads = LDLM_NTHRS_INIT;
+ blp->blp_max_threads = LDLM_NTHRS_MAX;
+ } else {
+ blp->blp_min_threads = blp->blp_max_threads = \
+ min_t(int, LDLM_NTHRS_MAX, max_t(int, LDLM_NTHRS_INIT,
+ ldlm_num_threads));
+ }
+
+ for (i = 0; i < blp->blp_min_threads; i++) {
+ rc = ldlm_bl_thread_start(blp);
+ if (rc < 0)
+ GOTO(out, rc);
+ }
+
+
+ rc = ldlm_pools_init();
+ if (rc) {
+ CERROR("Failed to initialize LDLM pools: %d\n", rc);
+ GOTO(out, rc);
+ }
+ RETURN(0);
+
+ out:
+ ldlm_cleanup();
+ RETURN(rc);
+}
+
+static int ldlm_cleanup(void)
+{
+ ENTRY;
+
+ if (!list_empty(ldlm_namespace_list(LDLM_NAMESPACE_SERVER)) ||
+ !list_empty(ldlm_namespace_list(LDLM_NAMESPACE_CLIENT))) {
+ CERROR("ldlm still has namespaces; clean these up first.\n");
+ ldlm_dump_all_namespaces(LDLM_NAMESPACE_SERVER, D_DLMTRACE);
+ ldlm_dump_all_namespaces(LDLM_NAMESPACE_CLIENT, D_DLMTRACE);
+ RETURN(-EBUSY);
+ }
+
+ ldlm_pools_fini();
+
+ if (ldlm_state->ldlm_bl_pool != NULL) {
+ struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool;
+
+ while (atomic_read(&blp->blp_num_threads) > 0) {
+ struct ldlm_bl_work_item blwi = { .blwi_ns = NULL };
+
+ init_completion(&blp->blp_comp);
+
+ spin_lock(&blp->blp_lock);
+ list_add_tail(&blwi.blwi_entry, &blp->blp_list);
+ wake_up(&blp->blp_waitq);
+ spin_unlock(&blp->blp_lock);
+
+ wait_for_completion(&blp->blp_comp);
+ }
+
+ OBD_FREE(blp, sizeof(*blp));
+ }
+
+ if (ldlm_state->ldlm_cb_service != NULL)
+ ptlrpc_unregister_service(ldlm_state->ldlm_cb_service);
+
+ ldlm_proc_cleanup();
+
+
+ OBD_FREE(ldlm_state, sizeof(*ldlm_state));
+ ldlm_state = NULL;
+
+ RETURN(0);
+}
+
+int ldlm_init(void)
+{
+ mutex_init(&ldlm_ref_mutex);
+ mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER));
+ mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT));
+ ldlm_resource_slab = kmem_cache_create("ldlm_resources",
+ sizeof(struct ldlm_resource), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (ldlm_resource_slab == NULL)
+ return -ENOMEM;
+
+ ldlm_lock_slab = kmem_cache_create("ldlm_locks",
+ sizeof(struct ldlm_lock), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_DESTROY_BY_RCU, NULL);
+ if (ldlm_lock_slab == NULL) {
+ kmem_cache_destroy(ldlm_resource_slab);
+ return -ENOMEM;
+ }
+
+ ldlm_interval_slab = kmem_cache_create("interval_node",
+ sizeof(struct ldlm_interval),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (ldlm_interval_slab == NULL) {
+ kmem_cache_destroy(ldlm_resource_slab);
+ kmem_cache_destroy(ldlm_lock_slab);
+ return -ENOMEM;
+ }
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+ class_export_dump_hook = ldlm_dump_export_locks;
+#endif
+ return 0;
+}
+
+void ldlm_exit(void)
+{
+ if (ldlm_refcount)
+ CERROR("ldlm_refcount is %d in ldlm_exit!\n", ldlm_refcount);
+ kmem_cache_destroy(ldlm_resource_slab);
+ /* ldlm_lock_put() use RCU to call ldlm_lock_free, so need call
+ * synchronize_rcu() to wait a grace period elapsed, so that
+ * ldlm_lock_free() get a chance to be called. */
+ synchronize_rcu();
+ kmem_cache_destroy(ldlm_lock_slab);
+ kmem_cache_destroy(ldlm_interval_slab);
+}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
new file mode 100644
index 000000000000..ec29e28624fe
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
@@ -0,0 +1,72 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ldlm/ldlm_plain.c
+ *
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ */
+
+/**
+ * This file contains implementation of PLAIN lock type.
+ *
+ * PLAIN locks are the simplest form of LDLM locking, and are used when
+ * there only needs to be a single lock on a resource. This avoids some
+ * of the complexity of EXTENT and IBITS lock types, but doesn't allow
+ * different "parts" of a resource to be locked concurrently. Example
+ * use cases for PLAIN locks include locking of MGS configuration logs
+ * and (as of Lustre 2.4) quota records.
+ */
+
+#define DEBUG_SUBSYSTEM S_LDLM
+
+#include <lustre_dlm.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
+
+#include "ldlm_internal.h"
+
+
+void ldlm_plain_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
+ ldlm_policy_data_t *lpolicy)
+{
+ /* No policy for plain locks */
+}
+
+void ldlm_plain_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
+ ldlm_wire_policy_data_t *wpolicy)
+{
+ /* No policy for plain locks */
+}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
new file mode 100644
index 000000000000..b3b60288e5f5
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
@@ -0,0 +1,1384 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ldlm/ldlm_pool.c
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ */
+
+/*
+ * Idea of this code is rather simple. Each second, for each server namespace
+ * we have SLV - server lock volume which is calculated on current number of
+ * granted locks, grant speed for past period, etc - that is, locking load.
+ * This SLV number may be thought as a flow definition for simplicity. It is
+ * sent to clients with each occasion to let them know what is current load
+ * situation on the server. By default, at the beginning, SLV on server is
+ * set max value which is calculated as the following: allow to one client
+ * have all locks of limit ->pl_limit for 10h.
+ *
+ * Next, on clients, number of cached locks is not limited artificially in any
+ * way as it was before. Instead, client calculates CLV, that is, client lock
+ * volume for each lock and compares it with last SLV from the server. CLV is
+ * calculated as the number of locks in LRU * lock live time in seconds. If
+ * CLV > SLV - lock is canceled.
+ *
+ * Client has LVF, that is, lock volume factor which regulates how much sensitive
+ * client should be about last SLV from server. The higher LVF is the more locks
+ * will be canceled on client. Default value for it is 1. Setting LVF to 2 means
+ * that client will cancel locks 2 times faster.
+ *
+ * Locks on a client will be canceled more intensively in these cases:
+ * (1) if SLV is smaller, that is, load is higher on the server;
+ * (2) client has a lot of locks (the more locks are held by client, the bigger
+ * chances that some of them should be canceled);
+ * (3) client has old locks (taken some time ago);
+ *
+ * Thus, according to flow paradigm that we use for better understanding SLV,
+ * CLV is the volume of particle in flow described by SLV. According to this,
+ * if flow is getting thinner, more and more particles become outside of it and
+ * as particles are locks, they should be canceled.
+ *
+ * General idea of this belongs to Vitaly Fertman (vitaly@clusterfs.com). Andreas
+ * Dilger (adilger@clusterfs.com) proposed few nice ideas like using LVF and many
+ * cleanups. Flow definition to allow more easy understanding of the logic belongs
+ * to Nikita Danilov (nikita@clusterfs.com) as well as many cleanups and fixes.
+ * And design and implementation are done by Yury Umanets (umka@clusterfs.com).
+ *
+ * Glossary for terms used:
+ *
+ * pl_limit - Number of allowed locks in pool. Applies to server and client
+ * side (tunable);
+ *
+ * pl_granted - Number of granted locks (calculated);
+ * pl_grant_rate - Number of granted locks for last T (calculated);
+ * pl_cancel_rate - Number of canceled locks for last T (calculated);
+ * pl_grant_speed - Grant speed (GR - CR) for last T (calculated);
+ * pl_grant_plan - Planned number of granted locks for next T (calculated);
+ * pl_server_lock_volume - Current server lock volume (calculated);
+ *
+ * As it may be seen from list above, we have few possible tunables which may
+ * affect behavior much. They all may be modified via proc. However, they also
+ * give a possibility for constructing few pre-defined behavior policies. If
+ * none of predefines is suitable for a working pattern being used, new one may
+ * be "constructed" via proc tunables.
+ */
+
+#define DEBUG_SUBSYSTEM S_LDLM
+
+# include <lustre_dlm.h>
+
+#include <cl_object.h>
+
+#include <obd_class.h>
+#include <obd_support.h>
+#include "ldlm_internal.h"
+
+
+/*
+ * 50 ldlm locks for 1MB of RAM.
+ */
+#define LDLM_POOL_HOST_L ((NUM_CACHEPAGES >> (20 - PAGE_CACHE_SHIFT)) * 50)
+
+/*
+ * Maximal possible grant step plan in %.
+ */
+#define LDLM_POOL_MAX_GSP (30)
+
+/*
+ * Minimal possible grant step plan in %.
+ */
+#define LDLM_POOL_MIN_GSP (1)
+
+/*
+ * This controls the speed of reaching LDLM_POOL_MAX_GSP
+ * with increasing thread period.
+ */
+#define LDLM_POOL_GSP_STEP_SHIFT (2)
+
+/*
+ * LDLM_POOL_GSP% of all locks is default GP.
+ */
+#define LDLM_POOL_GP(L) (((L) * LDLM_POOL_MAX_GSP) / 100)
+
+/*
+ * Max age for locks on clients.
+ */
+#define LDLM_POOL_MAX_AGE (36000)
+
+/*
+ * The granularity of SLV calculation.
+ */
+#define LDLM_POOL_SLV_SHIFT (10)
+
+extern proc_dir_entry_t *ldlm_ns_proc_dir;
+
+static inline __u64 dru(__u64 val, __u32 shift, int round_up)
+{
+ return (val + (round_up ? (1 << shift) - 1 : 0)) >> shift;
+}
+
+static inline __u64 ldlm_pool_slv_max(__u32 L)
+{
+ /*
+ * Allow to have all locks for 1 client for 10 hrs.
+ * Formula is the following: limit * 10h / 1 client.
+ */
+ __u64 lim = (__u64)L * LDLM_POOL_MAX_AGE / 1;
+ return lim;
+}
+
+static inline __u64 ldlm_pool_slv_min(__u32 L)
+{
+ return 1;
+}
+
+enum {
+ LDLM_POOL_FIRST_STAT = 0,
+ LDLM_POOL_GRANTED_STAT = LDLM_POOL_FIRST_STAT,
+ LDLM_POOL_GRANT_STAT,
+ LDLM_POOL_CANCEL_STAT,
+ LDLM_POOL_GRANT_RATE_STAT,
+ LDLM_POOL_CANCEL_RATE_STAT,
+ LDLM_POOL_GRANT_PLAN_STAT,
+ LDLM_POOL_SLV_STAT,
+ LDLM_POOL_SHRINK_REQTD_STAT,
+ LDLM_POOL_SHRINK_FREED_STAT,
+ LDLM_POOL_RECALC_STAT,
+ LDLM_POOL_TIMING_STAT,
+ LDLM_POOL_LAST_STAT
+};
+
+static inline struct ldlm_namespace *ldlm_pl2ns(struct ldlm_pool *pl)
+{
+ return container_of(pl, struct ldlm_namespace, ns_pool);
+}
+
+/**
+ * Calculates suggested grant_step in % of available locks for passed
+ * \a period. This is later used in grant_plan calculations.
+ */
+static inline int ldlm_pool_t2gsp(unsigned int t)
+{
+ /*
+ * This yields 1% grant step for anything below LDLM_POOL_GSP_STEP
+ * and up to 30% for anything higher than LDLM_POOL_GSP_STEP.
+ *
+ * How this will affect execution is the following:
+ *
+ * - for thread period 1s we will have grant_step 1% which good from
+ * pov of taking some load off from server and push it out to clients.
+ * This is like that because 1% for grant_step means that server will
+ * not allow clients to get lots of locks in short period of time and
+ * keep all old locks in their caches. Clients will always have to
+ * get some locks back if they want to take some new;
+ *
+ * - for thread period 10s (which is default) we will have 23% which
+ * means that clients will have enough of room to take some new locks
+ * without getting some back. All locks from this 23% which were not
+ * taken by clients in current period will contribute in SLV growing.
+ * SLV growing means more locks cached on clients until limit or grant
+ * plan is reached.
+ */
+ return LDLM_POOL_MAX_GSP -
+ ((LDLM_POOL_MAX_GSP - LDLM_POOL_MIN_GSP) >>
+ (t >> LDLM_POOL_GSP_STEP_SHIFT));
+}
+
+/**
+ * Recalculates next grant limit on passed \a pl.
+ *
+ * \pre ->pl_lock is locked.
+ */
+static void ldlm_pool_recalc_grant_plan(struct ldlm_pool *pl)
+{
+ int granted, grant_step, limit;
+
+ limit = ldlm_pool_get_limit(pl);
+ granted = atomic_read(&pl->pl_granted);
+
+ grant_step = ldlm_pool_t2gsp(pl->pl_recalc_period);
+ grant_step = ((limit - granted) * grant_step) / 100;
+ pl->pl_grant_plan = granted + grant_step;
+ limit = (limit * 5) >> 2;
+ if (pl->pl_grant_plan > limit)
+ pl->pl_grant_plan = limit;
+}
+
+/**
+ * Recalculates next SLV on passed \a pl.
+ *
+ * \pre ->pl_lock is locked.
+ */
+static void ldlm_pool_recalc_slv(struct ldlm_pool *pl)
+{
+ int granted;
+ int grant_plan;
+ int round_up;
+ __u64 slv;
+ __u64 slv_factor;
+ __u64 grant_usage;
+ __u32 limit;
+
+ slv = pl->pl_server_lock_volume;
+ grant_plan = pl->pl_grant_plan;
+ limit = ldlm_pool_get_limit(pl);
+ granted = atomic_read(&pl->pl_granted);
+ round_up = granted < limit;
+
+ grant_usage = max_t(int, limit - (granted - grant_plan), 1);
+
+ /*
+ * Find out SLV change factor which is the ratio of grant usage
+ * from limit. SLV changes as fast as the ratio of grant plan
+ * consumption. The more locks from grant plan are not consumed
+ * by clients in last interval (idle time), the faster grows
+ * SLV. And the opposite, the more grant plan is over-consumed
+ * (load time) the faster drops SLV.
+ */
+ slv_factor = (grant_usage << LDLM_POOL_SLV_SHIFT);
+ do_div(slv_factor, limit);
+ slv = slv * slv_factor;
+ slv = dru(slv, LDLM_POOL_SLV_SHIFT, round_up);
+
+ if (slv > ldlm_pool_slv_max(limit)) {
+ slv = ldlm_pool_slv_max(limit);
+ } else if (slv < ldlm_pool_slv_min(limit)) {
+ slv = ldlm_pool_slv_min(limit);
+ }
+
+ pl->pl_server_lock_volume = slv;
+}
+
+/**
+ * Recalculates next stats on passed \a pl.
+ *
+ * \pre ->pl_lock is locked.
+ */
+static void ldlm_pool_recalc_stats(struct ldlm_pool *pl)
+{
+ int grant_plan = pl->pl_grant_plan;
+ __u64 slv = pl->pl_server_lock_volume;
+ int granted = atomic_read(&pl->pl_granted);
+ int grant_rate = atomic_read(&pl->pl_grant_rate);
+ int cancel_rate = atomic_read(&pl->pl_cancel_rate);
+
+ lprocfs_counter_add(pl->pl_stats, LDLM_POOL_SLV_STAT,
+ slv);
+ lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANTED_STAT,
+ granted);
+ lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT,
+ grant_rate);
+ lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_PLAN_STAT,
+ grant_plan);
+ lprocfs_counter_add(pl->pl_stats, LDLM_POOL_CANCEL_RATE_STAT,
+ cancel_rate);
+}
+
+/**
+ * Sets current SLV into obd accessible via ldlm_pl2ns(pl)->ns_obd.
+ */
+static void ldlm_srv_pool_push_slv(struct ldlm_pool *pl)
+{
+ struct obd_device *obd;
+
+ /*
+ * Set new SLV in obd field for using it later without accessing the
+ * pool. This is required to avoid race between sending reply to client
+ * with new SLV and cleanup server stack in which we can't guarantee
+ * that namespace is still alive. We know only that obd is alive as
+ * long as valid export is alive.
+ */
+ obd = ldlm_pl2ns(pl)->ns_obd;
+ LASSERT(obd != NULL);
+ write_lock(&obd->obd_pool_lock);
+ obd->obd_pool_slv = pl->pl_server_lock_volume;
+ write_unlock(&obd->obd_pool_lock);
+}
+
+/**
+ * Recalculates all pool fields on passed \a pl.
+ *
+ * \pre ->pl_lock is not locked.
+ */
+static int ldlm_srv_pool_recalc(struct ldlm_pool *pl)
+{
+ time_t recalc_interval_sec;
+ ENTRY;
+
+ recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
+ if (recalc_interval_sec < pl->pl_recalc_period)
+ RETURN(0);
+
+ spin_lock(&pl->pl_lock);
+ recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
+ if (recalc_interval_sec < pl->pl_recalc_period) {
+ spin_unlock(&pl->pl_lock);
+ RETURN(0);
+ }
+ /*
+ * Recalc SLV after last period. This should be done
+ * _before_ recalculating new grant plan.
+ */
+ ldlm_pool_recalc_slv(pl);
+
+ /*
+ * Make sure that pool informed obd of last SLV changes.
+ */
+ ldlm_srv_pool_push_slv(pl);
+
+ /*
+ * Update grant_plan for new period.
+ */
+ ldlm_pool_recalc_grant_plan(pl);
+
+ pl->pl_recalc_time = cfs_time_current_sec();
+ lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
+ recalc_interval_sec);
+ spin_unlock(&pl->pl_lock);
+ RETURN(0);
+}
+
+/**
+ * This function is used on server side as main entry point for memory
+ * pressure handling. It decreases SLV on \a pl according to passed
+ * \a nr and \a gfp_mask.
+ *
+ * Our goal here is to decrease SLV such a way that clients hold \a nr
+ * locks smaller in next 10h.
+ */
+static int ldlm_srv_pool_shrink(struct ldlm_pool *pl,
+ int nr, unsigned int gfp_mask)
+{
+ __u32 limit;
+
+ /*
+ * VM is asking how many entries may be potentially freed.
+ */
+ if (nr == 0)
+ return atomic_read(&pl->pl_granted);
+
+ /*
+ * Client already canceled locks but server is already in shrinker
+ * and can't cancel anything. Let's catch this race.
+ */
+ if (atomic_read(&pl->pl_granted) == 0)
+ RETURN(0);
+
+ spin_lock(&pl->pl_lock);
+
+ /*
+ * We want shrinker to possibly cause cancellation of @nr locks from
+ * clients or grant approximately @nr locks smaller next intervals.
+ *
+ * This is why we decreased SLV by @nr. This effect will only be as
+ * long as one re-calc interval (1s these days) and this should be
+ * enough to pass this decreased SLV to all clients. On next recalc
+ * interval pool will either increase SLV if locks load is not high
+ * or will keep on same level or even decrease again, thus, shrinker
+ * decreased SLV will affect next recalc intervals and this way will
+ * make locking load lower.
+ */
+ if (nr < pl->pl_server_lock_volume) {
+ pl->pl_server_lock_volume = pl->pl_server_lock_volume - nr;
+ } else {
+ limit = ldlm_pool_get_limit(pl);
+ pl->pl_server_lock_volume = ldlm_pool_slv_min(limit);
+ }
+
+ /*
+ * Make sure that pool informed obd of last SLV changes.
+ */
+ ldlm_srv_pool_push_slv(pl);
+ spin_unlock(&pl->pl_lock);
+
+ /*
+ * We did not really free any memory here so far, it only will be
+ * freed later may be, so that we return 0 to not confuse VM.
+ */
+ return 0;
+}
+
+/**
+ * Setup server side pool \a pl with passed \a limit.
+ */
+static int ldlm_srv_pool_setup(struct ldlm_pool *pl, int limit)
+{
+ struct obd_device *obd;
+
+ obd = ldlm_pl2ns(pl)->ns_obd;
+ LASSERT(obd != NULL && obd != LP_POISON);
+ LASSERT(obd->obd_type != LP_POISON);
+ write_lock(&obd->obd_pool_lock);
+ obd->obd_pool_limit = limit;
+ write_unlock(&obd->obd_pool_lock);
+
+ ldlm_pool_set_limit(pl, limit);
+ return 0;
+}
+
+/**
+ * Sets SLV and Limit from ldlm_pl2ns(pl)->ns_obd tp passed \a pl.
+ */
+static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl)
+{
+ struct obd_device *obd;
+
+ /*
+ * Get new SLV and Limit from obd which is updated with coming
+ * RPCs.
+ */
+ obd = ldlm_pl2ns(pl)->ns_obd;
+ LASSERT(obd != NULL);
+ read_lock(&obd->obd_pool_lock);
+ pl->pl_server_lock_volume = obd->obd_pool_slv;
+ ldlm_pool_set_limit(pl, obd->obd_pool_limit);
+ read_unlock(&obd->obd_pool_lock);
+}
+
+/**
+ * Recalculates client size pool \a pl according to current SLV and Limit.
+ */
+static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
+{
+ time_t recalc_interval_sec;
+ ENTRY;
+
+ recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
+ if (recalc_interval_sec < pl->pl_recalc_period)
+ RETURN(0);
+
+ spin_lock(&pl->pl_lock);
+ /*
+ * Check if we need to recalc lists now.
+ */
+ recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
+ if (recalc_interval_sec < pl->pl_recalc_period) {
+ spin_unlock(&pl->pl_lock);
+ RETURN(0);
+ }
+
+ /*
+ * Make sure that pool knows last SLV and Limit from obd.
+ */
+ ldlm_cli_pool_pop_slv(pl);
+
+ pl->pl_recalc_time = cfs_time_current_sec();
+ lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
+ recalc_interval_sec);
+ spin_unlock(&pl->pl_lock);
+
+ /*
+ * Do not cancel locks in case lru resize is disabled for this ns.
+ */
+ if (!ns_connect_lru_resize(ldlm_pl2ns(pl)))
+ RETURN(0);
+
+ /*
+ * In the time of canceling locks on client we do not need to maintain
+ * sharp timing, we only want to cancel locks asap according to new SLV.
+ * It may be called when SLV has changed much, this is why we do not
+ * take into account pl->pl_recalc_time here.
+ */
+ RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LCF_ASYNC,
+ LDLM_CANCEL_LRUR));
+}
+
+/**
+ * This function is main entry point for memory pressure handling on client
+ * side. Main goal of this function is to cancel some number of locks on
+ * passed \a pl according to \a nr and \a gfp_mask.
+ */
+static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
+ int nr, unsigned int gfp_mask)
+{
+ struct ldlm_namespace *ns;
+ int canceled = 0, unused;
+
+ ns = ldlm_pl2ns(pl);
+
+ /*
+ * Do not cancel locks in case lru resize is disabled for this ns.
+ */
+ if (!ns_connect_lru_resize(ns))
+ RETURN(0);
+
+ /*
+ * Make sure that pool knows last SLV and Limit from obd.
+ */
+ ldlm_cli_pool_pop_slv(pl);
+
+ spin_lock(&ns->ns_lock);
+ unused = ns->ns_nr_unused;
+ spin_unlock(&ns->ns_lock);
+
+ if (nr) {
+ canceled = ldlm_cancel_lru(ns, nr, LCF_ASYNC,
+ LDLM_CANCEL_SHRINK);
+ }
+ /*
+ * Return the number of potentially reclaimable locks.
+ */
+ return ((unused - canceled) / 100) * sysctl_vfs_cache_pressure;
+}
+
+struct ldlm_pool_ops ldlm_srv_pool_ops = {
+ .po_recalc = ldlm_srv_pool_recalc,
+ .po_shrink = ldlm_srv_pool_shrink,
+ .po_setup = ldlm_srv_pool_setup
+};
+
+struct ldlm_pool_ops ldlm_cli_pool_ops = {
+ .po_recalc = ldlm_cli_pool_recalc,
+ .po_shrink = ldlm_cli_pool_shrink
+};
+
+/**
+ * Pool recalc wrapper. Will call either client or server pool recalc callback
+ * depending what pool \a pl is used.
+ */
+int ldlm_pool_recalc(struct ldlm_pool *pl)
+{
+ time_t recalc_interval_sec;
+ int count;
+
+ recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
+ if (recalc_interval_sec <= 0)
+ goto recalc;
+
+ spin_lock(&pl->pl_lock);
+ recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
+ if (recalc_interval_sec > 0) {
+ /*
+ * Update pool statistics every 1s.
+ */
+ ldlm_pool_recalc_stats(pl);
+
+ /*
+ * Zero out all rates and speed for the last period.
+ */
+ atomic_set(&pl->pl_grant_rate, 0);
+ atomic_set(&pl->pl_cancel_rate, 0);
+ }
+ spin_unlock(&pl->pl_lock);
+
+ recalc:
+ if (pl->pl_ops->po_recalc != NULL) {
+ count = pl->pl_ops->po_recalc(pl);
+ lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT,
+ count);
+ return count;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ldlm_pool_recalc);
+
+/**
+ * Pool shrink wrapper. Will call either client or server pool recalc callback
+ * depending what pool \a pl is used.
+ */
+int ldlm_pool_shrink(struct ldlm_pool *pl, int nr,
+ unsigned int gfp_mask)
+{
+ int cancel = 0;
+
+ if (pl->pl_ops->po_shrink != NULL) {
+ cancel = pl->pl_ops->po_shrink(pl, nr, gfp_mask);
+ if (nr > 0) {
+ lprocfs_counter_add(pl->pl_stats,
+ LDLM_POOL_SHRINK_REQTD_STAT,
+ nr);
+ lprocfs_counter_add(pl->pl_stats,
+ LDLM_POOL_SHRINK_FREED_STAT,
+ cancel);
+ CDEBUG(D_DLMTRACE, "%s: request to shrink %d locks, "
+ "shrunk %d\n", pl->pl_name, nr, cancel);
+ }
+ }
+ return cancel;
+}
+EXPORT_SYMBOL(ldlm_pool_shrink);
+
+/**
+ * Pool setup wrapper. Will call either client or server pool recalc callback
+ * depending what pool \a pl is used.
+ *
+ * Sets passed \a limit into pool \a pl.
+ */
+int ldlm_pool_setup(struct ldlm_pool *pl, int limit)
+{
+ if (pl->pl_ops->po_setup != NULL)
+ return(pl->pl_ops->po_setup(pl, limit));
+ return 0;
+}
+EXPORT_SYMBOL(ldlm_pool_setup);
+
+static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused)
+{
+ int granted, grant_rate, cancel_rate, grant_step;
+ int grant_speed, grant_plan, lvf;
+ struct ldlm_pool *pl = m->private;
+ __u64 slv, clv;
+ __u32 limit;
+
+ spin_lock(&pl->pl_lock);
+ slv = pl->pl_server_lock_volume;
+ clv = pl->pl_client_lock_volume;
+ limit = ldlm_pool_get_limit(pl);
+ grant_plan = pl->pl_grant_plan;
+ granted = atomic_read(&pl->pl_granted);
+ grant_rate = atomic_read(&pl->pl_grant_rate);
+ cancel_rate = atomic_read(&pl->pl_cancel_rate);
+ grant_speed = grant_rate - cancel_rate;
+ lvf = atomic_read(&pl->pl_lock_volume_factor);
+ grant_step = ldlm_pool_t2gsp(pl->pl_recalc_period);
+ spin_unlock(&pl->pl_lock);
+
+ seq_printf(m, "LDLM pool state (%s):\n"
+ " SLV: "LPU64"\n"
+ " CLV: "LPU64"\n"
+ " LVF: %d\n",
+ pl->pl_name, slv, clv, lvf);
+
+ if (ns_is_server(ldlm_pl2ns(pl))) {
+ seq_printf(m, " GSP: %d%%\n"
+ " GP: %d\n",
+ grant_step, grant_plan);
+ }
+ seq_printf(m, " GR: %d\n" " CR: %d\n" " GS: %d\n"
+ " G: %d\n" " L: %d\n",
+ grant_rate, cancel_rate, grant_speed,
+ granted, limit);
+
+ return 0;
+}
+LPROC_SEQ_FOPS_RO(lprocfs_pool_state);
+
+static int lprocfs_grant_speed_seq_show(struct seq_file *m, void *unused)
+{
+ struct ldlm_pool *pl = m->private;
+ int grant_speed;
+
+ spin_lock(&pl->pl_lock);
+ /* serialize with ldlm_pool_recalc */
+ grant_speed = atomic_read(&pl->pl_grant_rate) -
+ atomic_read(&pl->pl_cancel_rate);
+ spin_unlock(&pl->pl_lock);
+ return lprocfs_rd_uint(m, &grant_speed);
+}
+
+LDLM_POOL_PROC_READER_SEQ_SHOW(grant_plan, int);
+LPROC_SEQ_FOPS_RO(lprocfs_grant_plan);
+
+LDLM_POOL_PROC_READER_SEQ_SHOW(recalc_period, int);
+LDLM_POOL_PROC_WRITER(recalc_period, int);
+static ssize_t lprocfs_recalc_period_seq_write(struct file *file, const char *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+
+ return lprocfs_wr_recalc_period(file, buf, len, seq->private);
+}
+LPROC_SEQ_FOPS(lprocfs_recalc_period);
+
+LPROC_SEQ_FOPS_RO_TYPE(ldlm_pool, u64);
+LPROC_SEQ_FOPS_RO_TYPE(ldlm_pool, atomic);
+LPROC_SEQ_FOPS_RW_TYPE(ldlm_pool_rw, atomic);
+
+LPROC_SEQ_FOPS_RO(lprocfs_grant_speed);
+
+#define LDLM_POOL_ADD_VAR(name, var, ops) \
+ do { \
+ snprintf(var_name, MAX_STRING_SIZE, #name); \
+ pool_vars[0].data = var; \
+ pool_vars[0].fops = ops; \
+ lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0);\
+ } while (0)
+
+static int ldlm_pool_proc_init(struct ldlm_pool *pl)
+{
+ struct ldlm_namespace *ns = ldlm_pl2ns(pl);
+ struct proc_dir_entry *parent_ns_proc;
+ struct lprocfs_vars pool_vars[2];
+ char *var_name = NULL;
+ int rc = 0;
+ ENTRY;
+
+ OBD_ALLOC(var_name, MAX_STRING_SIZE + 1);
+ if (!var_name)
+ RETURN(-ENOMEM);
+
+ parent_ns_proc = ns->ns_proc_dir_entry;
+ if (parent_ns_proc == NULL) {
+ CERROR("%s: proc entry is not initialized\n",
+ ldlm_ns_name(ns));
+ GOTO(out_free_name, rc = -EINVAL);
+ }
+ pl->pl_proc_dir = lprocfs_register("pool", parent_ns_proc,
+ NULL, NULL);
+ if (IS_ERR(pl->pl_proc_dir)) {
+ CERROR("LProcFS failed in ldlm-pool-init\n");
+ rc = PTR_ERR(pl->pl_proc_dir);
+ GOTO(out_free_name, rc);
+ }
+
+ var_name[MAX_STRING_SIZE] = '\0';
+ memset(pool_vars, 0, sizeof(pool_vars));
+ pool_vars[0].name = var_name;
+
+ LDLM_POOL_ADD_VAR("server_lock_volume", &pl->pl_server_lock_volume,
+ &ldlm_pool_u64_fops);
+ LDLM_POOL_ADD_VAR("limit", &pl->pl_limit, &ldlm_pool_rw_atomic_fops);
+ LDLM_POOL_ADD_VAR("granted", &pl->pl_granted, &ldlm_pool_atomic_fops);
+ LDLM_POOL_ADD_VAR("grant_speed", pl, &lprocfs_grant_speed_fops);
+ LDLM_POOL_ADD_VAR("cancel_rate", &pl->pl_cancel_rate,
+ &ldlm_pool_atomic_fops);
+ LDLM_POOL_ADD_VAR("grant_rate", &pl->pl_grant_rate,
+ &ldlm_pool_atomic_fops);
+ LDLM_POOL_ADD_VAR("grant_plan", pl, &lprocfs_grant_plan_fops);
+ LDLM_POOL_ADD_VAR("recalc_period", pl, &lprocfs_recalc_period_fops);
+ LDLM_POOL_ADD_VAR("lock_volume_factor", &pl->pl_lock_volume_factor,
+ &ldlm_pool_rw_atomic_fops);
+ LDLM_POOL_ADD_VAR("state", pl, &lprocfs_pool_state_fops);
+
+ pl->pl_stats = lprocfs_alloc_stats(LDLM_POOL_LAST_STAT -
+ LDLM_POOL_FIRST_STAT, 0);
+ if (!pl->pl_stats)
+ GOTO(out_free_name, rc = -ENOMEM);
+
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANTED_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "granted", "locks");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "grant", "locks");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "cancel", "locks");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "grant_rate", "locks/s");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_RATE_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "cancel_rate", "locks/s");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_PLAN_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "grant_plan", "locks/s");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SLV_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "slv", "slv");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_REQTD_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "shrink_request", "locks");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_FREED_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "shrink_freed", "locks");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_RECALC_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "recalc_freed", "locks");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_TIMING_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "recalc_timing", "sec");
+ rc = lprocfs_register_stats(pl->pl_proc_dir, "stats", pl->pl_stats);
+
+ EXIT;
+out_free_name:
+ OBD_FREE(var_name, MAX_STRING_SIZE + 1);
+ return rc;
+}
+
+static void ldlm_pool_proc_fini(struct ldlm_pool *pl)
+{
+ if (pl->pl_stats != NULL) {
+ lprocfs_free_stats(&pl->pl_stats);
+ pl->pl_stats = NULL;
+ }
+ if (pl->pl_proc_dir != NULL) {
+ lprocfs_remove(&pl->pl_proc_dir);
+ pl->pl_proc_dir = NULL;
+ }
+}
+
+int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
+ int idx, ldlm_side_t client)
+{
+ int rc;
+ ENTRY;
+
+ spin_lock_init(&pl->pl_lock);
+ atomic_set(&pl->pl_granted, 0);
+ pl->pl_recalc_time = cfs_time_current_sec();
+ atomic_set(&pl->pl_lock_volume_factor, 1);
+
+ atomic_set(&pl->pl_grant_rate, 0);
+ atomic_set(&pl->pl_cancel_rate, 0);
+ pl->pl_grant_plan = LDLM_POOL_GP(LDLM_POOL_HOST_L);
+
+ snprintf(pl->pl_name, sizeof(pl->pl_name), "ldlm-pool-%s-%d",
+ ldlm_ns_name(ns), idx);
+
+ if (client == LDLM_NAMESPACE_SERVER) {
+ pl->pl_ops = &ldlm_srv_pool_ops;
+ ldlm_pool_set_limit(pl, LDLM_POOL_HOST_L);
+ pl->pl_recalc_period = LDLM_POOL_SRV_DEF_RECALC_PERIOD;
+ pl->pl_server_lock_volume = ldlm_pool_slv_max(LDLM_POOL_HOST_L);
+ } else {
+ ldlm_pool_set_limit(pl, 1);
+ pl->pl_server_lock_volume = 0;
+ pl->pl_ops = &ldlm_cli_pool_ops;
+ pl->pl_recalc_period = LDLM_POOL_CLI_DEF_RECALC_PERIOD;
+ }
+ pl->pl_client_lock_volume = 0;
+ rc = ldlm_pool_proc_init(pl);
+ if (rc)
+ RETURN(rc);
+
+ CDEBUG(D_DLMTRACE, "Lock pool %s is initialized\n", pl->pl_name);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ldlm_pool_init);
+
+void ldlm_pool_fini(struct ldlm_pool *pl)
+{
+ ENTRY;
+ ldlm_pool_proc_fini(pl);
+
+ /*
+ * Pool should not be used after this point. We can't free it here as
+ * it lives in struct ldlm_namespace, but still interested in catching
+ * any abnormal using cases.
+ */
+ POISON(pl, 0x5a, sizeof(*pl));
+ EXIT;
+}
+EXPORT_SYMBOL(ldlm_pool_fini);
+
+/**
+ * Add new taken ldlm lock \a lock into pool \a pl accounting.
+ */
+void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock)
+{
+ /*
+ * FLOCK locks are special in a sense that they are almost never
+ * cancelled, instead special kind of lock is used to drop them.
+ * also there is no LRU for flock locks, so no point in tracking
+ * them anyway.
+ */
+ if (lock->l_resource->lr_type == LDLM_FLOCK)
+ return;
+
+ atomic_inc(&pl->pl_granted);
+ atomic_inc(&pl->pl_grant_rate);
+ lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_GRANT_STAT);
+ /*
+ * Do not do pool recalc for client side as all locks which
+ * potentially may be canceled has already been packed into
+ * enqueue/cancel rpc. Also we do not want to run out of stack
+ * with too long call paths.
+ */
+ if (ns_is_server(ldlm_pl2ns(pl)))
+ ldlm_pool_recalc(pl);
+}
+EXPORT_SYMBOL(ldlm_pool_add);
+
+/**
+ * Remove ldlm lock \a lock from pool \a pl accounting.
+ */
+void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock)
+{
+ /*
+ * Filter out FLOCK locks. Read above comment in ldlm_pool_add().
+ */
+ if (lock->l_resource->lr_type == LDLM_FLOCK)
+ return;
+
+ LASSERT(atomic_read(&pl->pl_granted) > 0);
+ atomic_dec(&pl->pl_granted);
+ atomic_inc(&pl->pl_cancel_rate);
+
+ lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT);
+
+ if (ns_is_server(ldlm_pl2ns(pl)))
+ ldlm_pool_recalc(pl);
+}
+EXPORT_SYMBOL(ldlm_pool_del);
+
+/**
+ * Returns current \a pl SLV.
+ *
+ * \pre ->pl_lock is not locked.
+ */
+__u64 ldlm_pool_get_slv(struct ldlm_pool *pl)
+{
+ __u64 slv;
+ spin_lock(&pl->pl_lock);
+ slv = pl->pl_server_lock_volume;
+ spin_unlock(&pl->pl_lock);
+ return slv;
+}
+EXPORT_SYMBOL(ldlm_pool_get_slv);
+
+/**
+ * Sets passed \a slv to \a pl.
+ *
+ * \pre ->pl_lock is not locked.
+ */
+void ldlm_pool_set_slv(struct ldlm_pool *pl, __u64 slv)
+{
+ spin_lock(&pl->pl_lock);
+ pl->pl_server_lock_volume = slv;
+ spin_unlock(&pl->pl_lock);
+}
+EXPORT_SYMBOL(ldlm_pool_set_slv);
+
+/**
+ * Returns current \a pl CLV.
+ *
+ * \pre ->pl_lock is not locked.
+ */
+__u64 ldlm_pool_get_clv(struct ldlm_pool *pl)
+{
+ __u64 slv;
+ spin_lock(&pl->pl_lock);
+ slv = pl->pl_client_lock_volume;
+ spin_unlock(&pl->pl_lock);
+ return slv;
+}
+EXPORT_SYMBOL(ldlm_pool_get_clv);
+
+/**
+ * Sets passed \a clv to \a pl.
+ *
+ * \pre ->pl_lock is not locked.
+ */
+void ldlm_pool_set_clv(struct ldlm_pool *pl, __u64 clv)
+{
+ spin_lock(&pl->pl_lock);
+ pl->pl_client_lock_volume = clv;
+ spin_unlock(&pl->pl_lock);
+}
+EXPORT_SYMBOL(ldlm_pool_set_clv);
+
+/**
+ * Returns current \a pl limit.
+ */
+__u32 ldlm_pool_get_limit(struct ldlm_pool *pl)
+{
+ return atomic_read(&pl->pl_limit);
+}
+EXPORT_SYMBOL(ldlm_pool_get_limit);
+
+/**
+ * Sets passed \a limit to \a pl.
+ */
+void ldlm_pool_set_limit(struct ldlm_pool *pl, __u32 limit)
+{
+ atomic_set(&pl->pl_limit, limit);
+}
+EXPORT_SYMBOL(ldlm_pool_set_limit);
+
+/**
+ * Returns current LVF from \a pl.
+ */
+__u32 ldlm_pool_get_lvf(struct ldlm_pool *pl)
+{
+ return atomic_read(&pl->pl_lock_volume_factor);
+}
+EXPORT_SYMBOL(ldlm_pool_get_lvf);
+
+static int ldlm_pool_granted(struct ldlm_pool *pl)
+{
+ return atomic_read(&pl->pl_granted);
+}
+
+static struct ptlrpc_thread *ldlm_pools_thread;
+static struct shrinker *ldlm_pools_srv_shrinker;
+static struct shrinker *ldlm_pools_cli_shrinker;
+static struct completion ldlm_pools_comp;
+
+/*
+ * Cancel \a nr locks from all namespaces (if possible). Returns number of
+ * cached locks after shrink is finished. All namespaces are asked to
+ * cancel approximately equal amount of locks to keep balancing.
+ */
+static int ldlm_pools_shrink(ldlm_side_t client, int nr,
+ unsigned int gfp_mask)
+{
+ int total = 0, cached = 0, nr_ns;
+ struct ldlm_namespace *ns;
+ void *cookie;
+
+ if (client == LDLM_NAMESPACE_CLIENT && nr != 0 &&
+ !(gfp_mask & __GFP_FS))
+ return -1;
+
+ CDEBUG(D_DLMTRACE, "Request to shrink %d %s locks from all pools\n",
+ nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
+
+ cookie = cl_env_reenter();
+
+ /*
+ * Find out how many resources we may release.
+ */
+ for (nr_ns = atomic_read(ldlm_namespace_nr(client));
+ nr_ns > 0; nr_ns--)
+ {
+ mutex_lock(ldlm_namespace_lock(client));
+ if (list_empty(ldlm_namespace_list(client))) {
+ mutex_unlock(ldlm_namespace_lock(client));
+ cl_env_reexit(cookie);
+ return 0;
+ }
+ ns = ldlm_namespace_first_locked(client);
+ ldlm_namespace_get(ns);
+ ldlm_namespace_move_locked(ns, client);
+ mutex_unlock(ldlm_namespace_lock(client));
+ total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask);
+ ldlm_namespace_put(ns);
+ }
+
+ if (nr == 0 || total == 0) {
+ cl_env_reexit(cookie);
+ return total;
+ }
+
+ /*
+ * Shrink at least ldlm_namespace_nr(client) namespaces.
+ */
+ for (nr_ns = atomic_read(ldlm_namespace_nr(client));
+ nr_ns > 0; nr_ns--)
+ {
+ int cancel, nr_locks;
+
+ /*
+ * Do not call shrink under ldlm_namespace_lock(client)
+ */
+ mutex_lock(ldlm_namespace_lock(client));
+ if (list_empty(ldlm_namespace_list(client))) {
+ mutex_unlock(ldlm_namespace_lock(client));
+ /*
+ * If list is empty, we can't return any @cached > 0,
+ * that probably would cause needless shrinker
+ * call.
+ */
+ cached = 0;
+ break;
+ }
+ ns = ldlm_namespace_first_locked(client);
+ ldlm_namespace_get(ns);
+ ldlm_namespace_move_locked(ns, client);
+ mutex_unlock(ldlm_namespace_lock(client));
+
+ nr_locks = ldlm_pool_granted(&ns->ns_pool);
+ cancel = 1 + nr_locks * nr / total;
+ ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
+ cached += ldlm_pool_granted(&ns->ns_pool);
+ ldlm_namespace_put(ns);
+ }
+ cl_env_reexit(cookie);
+ /* we only decrease the SLV in server pools shrinker, return -1 to
+ * kernel to avoid needless loop. LU-1128 */
+ return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached;
+}
+
+static int ldlm_pools_srv_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+{
+ return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER,
+ shrink_param(sc, nr_to_scan),
+ shrink_param(sc, gfp_mask));
+}
+
+static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+{
+ return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT,
+ shrink_param(sc, nr_to_scan),
+ shrink_param(sc, gfp_mask));
+}
+
+void ldlm_pools_recalc(ldlm_side_t client)
+{
+ __u32 nr_l = 0, nr_p = 0, l;
+ struct ldlm_namespace *ns;
+ int nr, equal = 0;
+
+ /*
+ * No need to setup pool limit for client pools.
+ */
+ if (client == LDLM_NAMESPACE_SERVER) {
+ /*
+ * Check all modest namespaces first.
+ */
+ mutex_lock(ldlm_namespace_lock(client));
+ list_for_each_entry(ns, ldlm_namespace_list(client),
+ ns_list_chain)
+ {
+ if (ns->ns_appetite != LDLM_NAMESPACE_MODEST)
+ continue;
+
+ l = ldlm_pool_granted(&ns->ns_pool);
+ if (l == 0)
+ l = 1;
+
+ /*
+ * Set the modest pools limit equal to their avg granted
+ * locks + ~6%.
+ */
+ l += dru(l, LDLM_POOLS_MODEST_MARGIN_SHIFT, 0);
+ ldlm_pool_setup(&ns->ns_pool, l);
+ nr_l += l;
+ nr_p++;
+ }
+
+ /*
+ * Make sure that modest namespaces did not eat more that 2/3
+ * of limit.
+ */
+ if (nr_l >= 2 * (LDLM_POOL_HOST_L / 3)) {
+ CWARN("\"Modest\" pools eat out 2/3 of server locks "
+ "limit (%d of %lu). This means that you have too "
+ "many clients for this amount of server RAM. "
+ "Upgrade server!\n", nr_l, LDLM_POOL_HOST_L);
+ equal = 1;
+ }
+
+ /*
+ * The rest is given to greedy namespaces.
+ */
+ list_for_each_entry(ns, ldlm_namespace_list(client),
+ ns_list_chain)
+ {
+ if (!equal && ns->ns_appetite != LDLM_NAMESPACE_GREEDY)
+ continue;
+
+ if (equal) {
+ /*
+ * In the case 2/3 locks are eaten out by
+ * modest pools, we re-setup equal limit
+ * for _all_ pools.
+ */
+ l = LDLM_POOL_HOST_L /
+ atomic_read(
+ ldlm_namespace_nr(client));
+ } else {
+ /*
+ * All the rest of greedy pools will have
+ * all locks in equal parts.
+ */
+ l = (LDLM_POOL_HOST_L - nr_l) /
+ (atomic_read(
+ ldlm_namespace_nr(client)) -
+ nr_p);
+ }
+ ldlm_pool_setup(&ns->ns_pool, l);
+ }
+ mutex_unlock(ldlm_namespace_lock(client));
+ }
+
+ /*
+ * Recalc at least ldlm_namespace_nr(client) namespaces.
+ */
+ for (nr = atomic_read(ldlm_namespace_nr(client)); nr > 0; nr--) {
+ int skip;
+ /*
+ * Lock the list, get first @ns in the list, getref, move it
+ * to the tail, unlock and call pool recalc. This way we avoid
+ * calling recalc under @ns lock what is really good as we get
+ * rid of potential deadlock on client nodes when canceling
+ * locks synchronously.
+ */
+ mutex_lock(ldlm_namespace_lock(client));
+ if (list_empty(ldlm_namespace_list(client))) {
+ mutex_unlock(ldlm_namespace_lock(client));
+ break;
+ }
+ ns = ldlm_namespace_first_locked(client);
+
+ spin_lock(&ns->ns_lock);
+ /*
+ * skip ns which is being freed, and we don't want to increase
+ * its refcount again, not even temporarily. bz21519 & LU-499.
+ */
+ if (ns->ns_stopping) {
+ skip = 1;
+ } else {
+ skip = 0;
+ ldlm_namespace_get(ns);
+ }
+ spin_unlock(&ns->ns_lock);
+
+ ldlm_namespace_move_locked(ns, client);
+ mutex_unlock(ldlm_namespace_lock(client));
+
+ /*
+ * After setup is done - recalc the pool.
+ */
+ if (!skip) {
+ ldlm_pool_recalc(&ns->ns_pool);
+ ldlm_namespace_put(ns);
+ }
+ }
+}
+EXPORT_SYMBOL(ldlm_pools_recalc);
+
+static int ldlm_pools_thread_main(void *arg)
+{
+ struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg;
+ ENTRY;
+
+ thread_set_flags(thread, SVC_RUNNING);
+ wake_up(&thread->t_ctl_waitq);
+
+ CDEBUG(D_DLMTRACE, "%s: pool thread starting, process %d\n",
+ "ldlm_poold", current_pid());
+
+ while (1) {
+ struct l_wait_info lwi;
+
+ /*
+ * Recal all pools on this tick.
+ */
+ ldlm_pools_recalc(LDLM_NAMESPACE_SERVER);
+ ldlm_pools_recalc(LDLM_NAMESPACE_CLIENT);
+
+ /*
+ * Wait until the next check time, or until we're
+ * stopped.
+ */
+ lwi = LWI_TIMEOUT(cfs_time_seconds(LDLM_POOLS_THREAD_PERIOD),
+ NULL, NULL);
+ l_wait_event(thread->t_ctl_waitq,
+ thread_is_stopping(thread) ||
+ thread_is_event(thread),
+ &lwi);
+
+ if (thread_test_and_clear_flags(thread, SVC_STOPPING))
+ break;
+ else
+ thread_test_and_clear_flags(thread, SVC_EVENT);
+ }
+
+ thread_set_flags(thread, SVC_STOPPED);
+ wake_up(&thread->t_ctl_waitq);
+
+ CDEBUG(D_DLMTRACE, "%s: pool thread exiting, process %d\n",
+ "ldlm_poold", current_pid());
+
+ complete_and_exit(&ldlm_pools_comp, 0);
+}
+
+static int ldlm_pools_thread_start(void)
+{
+ struct l_wait_info lwi = { 0 };
+ task_t *task;
+ ENTRY;
+
+ if (ldlm_pools_thread != NULL)
+ RETURN(-EALREADY);
+
+ OBD_ALLOC_PTR(ldlm_pools_thread);
+ if (ldlm_pools_thread == NULL)
+ RETURN(-ENOMEM);
+
+ init_completion(&ldlm_pools_comp);
+ init_waitqueue_head(&ldlm_pools_thread->t_ctl_waitq);
+
+ task = kthread_run(ldlm_pools_thread_main, ldlm_pools_thread,
+ "ldlm_poold");
+ if (IS_ERR(task)) {
+ CERROR("Can't start pool thread, error %ld\n", PTR_ERR(task));
+ OBD_FREE(ldlm_pools_thread, sizeof(*ldlm_pools_thread));
+ ldlm_pools_thread = NULL;
+ RETURN(PTR_ERR(task));
+ }
+ l_wait_event(ldlm_pools_thread->t_ctl_waitq,
+ thread_is_running(ldlm_pools_thread), &lwi);
+ RETURN(0);
+}
+
+static void ldlm_pools_thread_stop(void)
+{
+ ENTRY;
+
+ if (ldlm_pools_thread == NULL) {
+ EXIT;
+ return;
+ }
+
+ thread_set_flags(ldlm_pools_thread, SVC_STOPPING);
+ wake_up(&ldlm_pools_thread->t_ctl_waitq);
+
+ /*
+ * Make sure that pools thread is finished before freeing @thread.
+ * This fixes possible race and oops due to accessing freed memory
+ * in pools thread.
+ */
+ wait_for_completion(&ldlm_pools_comp);
+ OBD_FREE_PTR(ldlm_pools_thread);
+ ldlm_pools_thread = NULL;
+ EXIT;
+}
+
+int ldlm_pools_init(void)
+{
+ int rc;
+ ENTRY;
+
+ rc = ldlm_pools_thread_start();
+ if (rc == 0) {
+ ldlm_pools_srv_shrinker =
+ set_shrinker(DEFAULT_SEEKS,
+ ldlm_pools_srv_shrink);
+ ldlm_pools_cli_shrinker =
+ set_shrinker(DEFAULT_SEEKS,
+ ldlm_pools_cli_shrink);
+ }
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ldlm_pools_init);
+
+void ldlm_pools_fini(void)
+{
+ if (ldlm_pools_srv_shrinker != NULL) {
+ remove_shrinker(ldlm_pools_srv_shrinker);
+ ldlm_pools_srv_shrinker = NULL;
+ }
+ if (ldlm_pools_cli_shrinker != NULL) {
+ remove_shrinker(ldlm_pools_cli_shrinker);
+ ldlm_pools_cli_shrinker = NULL;
+ }
+ ldlm_pools_thread_stop();
+}
+EXPORT_SYMBOL(ldlm_pools_fini);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
new file mode 100644
index 000000000000..1a690edaba03
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
@@ -0,0 +1,2333 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+/**
+ * This file contains Asynchronous System Trap (AST) handlers and related
+ * LDLM request-processing routines.
+ *
+ * An AST is a callback issued on a lock when its state is changed. There are
+ * several different types of ASTs (callbacks) registered for each lock:
+ *
+ * - completion AST: when a lock is enqueued by some process, but cannot be
+ * granted immediately due to other conflicting locks on the same resource,
+ * the completion AST is sent to notify the caller when the lock is
+ * eventually granted
+ *
+ * - blocking AST: when a lock is granted to some process, if another process
+ * enqueues a conflicting (blocking) lock on a resource, a blocking AST is
+ * sent to notify the holder(s) of the lock(s) of the conflicting lock
+ * request. The lock holder(s) must release their lock(s) on that resource in
+ * a timely manner or be evicted by the server.
+ *
+ * - glimpse AST: this is used when a process wants information about a lock
+ * (i.e. the lock value block (LVB)) but does not necessarily require holding
+ * the lock. If the resource is locked, the lock holder(s) are sent glimpse
+ * ASTs and the LVB is returned to the caller, and lock holder(s) may CANCEL
+ * their lock(s) if they are idle. If the resource is not locked, the server
+ * may grant the lock.
+ */
+
+#define DEBUG_SUBSYSTEM S_LDLM
+
+#include <lustre_dlm.h>
+#include <obd_class.h>
+#include <obd.h>
+
+#include "ldlm_internal.h"
+
+int ldlm_enqueue_min = OBD_TIMEOUT_DEFAULT;
+CFS_MODULE_PARM(ldlm_enqueue_min, "i", int, 0644,
+ "lock enqueue timeout minimum");
+
+/* in client side, whether the cached locks will be canceled before replay */
+unsigned int ldlm_cancel_unused_locks_before_replay = 1;
+
+static void interrupted_completion_wait(void *data)
+{
+}
+
+struct lock_wait_data {
+ struct ldlm_lock *lwd_lock;
+ __u32 lwd_conn_cnt;
+};
+
+struct ldlm_async_args {
+ struct lustre_handle lock_handle;
+};
+
+int ldlm_expired_completion_wait(void *data)
+{
+ struct lock_wait_data *lwd = data;
+ struct ldlm_lock *lock = lwd->lwd_lock;
+ struct obd_import *imp;
+ struct obd_device *obd;
+
+ ENTRY;
+ if (lock->l_conn_export == NULL) {
+ static cfs_time_t next_dump = 0, last_dump = 0;
+
+ if (ptlrpc_check_suspend())
+ RETURN(0);
+
+ LCONSOLE_WARN("lock timed out (enqueued at "CFS_TIME_T", "
+ CFS_DURATION_T"s ago)\n",
+ lock->l_last_activity,
+ cfs_time_sub(cfs_time_current_sec(),
+ lock->l_last_activity));
+ LDLM_DEBUG(lock, "lock timed out (enqueued at "CFS_TIME_T", "
+ CFS_DURATION_T"s ago); not entering recovery in "
+ "server code, just going back to sleep",
+ lock->l_last_activity,
+ cfs_time_sub(cfs_time_current_sec(),
+ lock->l_last_activity));
+ if (cfs_time_after(cfs_time_current(), next_dump)) {
+ last_dump = next_dump;
+ next_dump = cfs_time_shift(300);
+ ldlm_namespace_dump(D_DLMTRACE,
+ ldlm_lock_to_ns(lock));
+ if (last_dump == 0)
+ libcfs_debug_dumplog();
+ }
+ RETURN(0);
+ }
+
+ obd = lock->l_conn_export->exp_obd;
+ imp = obd->u.cli.cl_import;
+ ptlrpc_fail_import(imp, lwd->lwd_conn_cnt);
+ LDLM_ERROR(lock, "lock timed out (enqueued at "CFS_TIME_T", "
+ CFS_DURATION_T"s ago), entering recovery for %s@%s",
+ lock->l_last_activity,
+ cfs_time_sub(cfs_time_current_sec(), lock->l_last_activity),
+ obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(ldlm_expired_completion_wait);
+
+/* We use the same basis for both server side and client side functions
+ from a single node. */
+int ldlm_get_enq_timeout(struct ldlm_lock *lock)
+{
+ int timeout = at_get(ldlm_lock_to_ns_at(lock));
+ if (AT_OFF)
+ return obd_timeout / 2;
+ /* Since these are non-updating timeouts, we should be conservative.
+ It would be nice to have some kind of "early reply" mechanism for
+ lock callbacks too... */
+ timeout = min_t(int, at_max, timeout + (timeout >> 1)); /* 150% */
+ return max(timeout, ldlm_enqueue_min);
+}
+EXPORT_SYMBOL(ldlm_get_enq_timeout);
+
+/**
+ * Helper function for ldlm_completion_ast(), updating timings when lock is
+ * actually granted.
+ */
+static int ldlm_completion_tail(struct ldlm_lock *lock)
+{
+ long delay;
+ int result;
+
+ if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED) {
+ LDLM_DEBUG(lock, "client-side enqueue: destroyed");
+ result = -EIO;
+ } else {
+ delay = cfs_time_sub(cfs_time_current_sec(),
+ lock->l_last_activity);
+ LDLM_DEBUG(lock, "client-side enqueue: granted after "
+ CFS_DURATION_T"s", delay);
+
+ /* Update our time estimate */
+ at_measured(ldlm_lock_to_ns_at(lock),
+ delay);
+ result = 0;
+ }
+ return result;
+}
+
+/**
+ * Implementation of ->l_completion_ast() for a client, that doesn't wait
+ * until lock is granted. Suitable for locks enqueued through ptlrpcd, of
+ * other threads that cannot block for long.
+ */
+int ldlm_completion_ast_async(struct ldlm_lock *lock, __u64 flags, void *data)
+{
+ ENTRY;
+
+ if (flags == LDLM_FL_WAIT_NOREPROC) {
+ LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock");
+ RETURN(0);
+ }
+
+ if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
+ LDLM_FL_BLOCK_CONV))) {
+ wake_up(&lock->l_waitq);
+ RETURN(ldlm_completion_tail(lock));
+ }
+
+ LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
+ "going forward");
+ ldlm_reprocess_all(lock->l_resource);
+ RETURN(0);
+}
+EXPORT_SYMBOL(ldlm_completion_ast_async);
+
+/**
+ * Generic LDLM "completion" AST. This is called in several cases:
+ *
+ * - when a reply to an ENQUEUE RPC is received from the server
+ * (ldlm_cli_enqueue_fini()). Lock might be granted or not granted at
+ * this point (determined by flags);
+ *
+ * - when LDLM_CP_CALLBACK RPC comes to client to notify it that lock has
+ * been granted;
+ *
+ * - when ldlm_lock_match(LDLM_FL_LVB_READY) is about to wait until lock
+ * gets correct lvb;
+ *
+ * - to force all locks when resource is destroyed (cleanup_resource());
+ *
+ * - during lock conversion (not used currently).
+ *
+ * If lock is not granted in the first case, this function waits until second
+ * or penultimate cases happen in some other thread.
+ *
+ */
+int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
+{
+ /* XXX ALLOCATE - 160 bytes */
+ struct lock_wait_data lwd;
+ struct obd_device *obd;
+ struct obd_import *imp = NULL;
+ struct l_wait_info lwi;
+ __u32 timeout;
+ int rc = 0;
+ ENTRY;
+
+ if (flags == LDLM_FL_WAIT_NOREPROC) {
+ LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock");
+ goto noreproc;
+ }
+
+ if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
+ LDLM_FL_BLOCK_CONV))) {
+ wake_up(&lock->l_waitq);
+ RETURN(0);
+ }
+
+ LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
+ "sleeping");
+
+noreproc:
+
+ obd = class_exp2obd(lock->l_conn_export);
+
+ /* if this is a local lock, then there is no import */
+ if (obd != NULL) {
+ imp = obd->u.cli.cl_import;
+ }
+
+ /* Wait a long time for enqueue - server may have to callback a
+ lock from another client. Server will evict the other client if it
+ doesn't respond reasonably, and then give us the lock. */
+ timeout = ldlm_get_enq_timeout(lock) * 2;
+
+ lwd.lwd_lock = lock;
+
+ if (lock->l_flags & LDLM_FL_NO_TIMEOUT) {
+ LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT");
+ lwi = LWI_INTR(interrupted_completion_wait, &lwd);
+ } else {
+ lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(timeout),
+ ldlm_expired_completion_wait,
+ interrupted_completion_wait, &lwd);
+ }
+
+ if (imp != NULL) {
+ spin_lock(&imp->imp_lock);
+ lwd.lwd_conn_cnt = imp->imp_conn_cnt;
+ spin_unlock(&imp->imp_lock);
+ }
+
+ if (ns_is_client(ldlm_lock_to_ns(lock)) &&
+ OBD_FAIL_CHECK_RESET(OBD_FAIL_LDLM_INTR_CP_AST,
+ OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE)) {
+ lock->l_flags |= LDLM_FL_FAIL_LOC;
+ rc = -EINTR;
+ } else {
+ /* Go to sleep until the lock is granted or cancelled. */
+ rc = l_wait_event(lock->l_waitq,
+ is_granted_or_cancelled(lock), &lwi);
+ }
+
+ if (rc) {
+ LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
+ rc);
+ RETURN(rc);
+ }
+
+ RETURN(ldlm_completion_tail(lock));
+}
+EXPORT_SYMBOL(ldlm_completion_ast);
+
+/**
+ * A helper to build a blocking AST function
+ *
+ * Perform a common operation for blocking ASTs:
+ * defferred lock cancellation.
+ *
+ * \param lock the lock blocking or canceling AST was called on
+ * \retval 0
+ * \see mdt_blocking_ast
+ * \see ldlm_blocking_ast
+ */
+int ldlm_blocking_ast_nocheck(struct ldlm_lock *lock)
+{
+ int do_ast;
+ ENTRY;
+
+ lock->l_flags |= LDLM_FL_CBPENDING;
+ do_ast = (!lock->l_readers && !lock->l_writers);
+ unlock_res_and_lock(lock);
+
+ if (do_ast) {
+ struct lustre_handle lockh;
+ int rc;
+
+ LDLM_DEBUG(lock, "already unused, calling ldlm_cli_cancel");
+ ldlm_lock2handle(lock, &lockh);
+ rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
+ if (rc < 0)
+ CERROR("ldlm_cli_cancel: %d\n", rc);
+ } else {
+ LDLM_DEBUG(lock, "Lock still has references, will be "
+ "cancelled later");
+ }
+ RETURN(0);
+}
+EXPORT_SYMBOL(ldlm_blocking_ast_nocheck);
+
+/**
+ * Server blocking AST
+ *
+ * ->l_blocking_ast() callback for LDLM locks acquired by server-side
+ * OBDs.
+ *
+ * \param lock the lock which blocks a request or cancelling lock
+ * \param desc unused
+ * \param data unused
+ * \param flag indicates whether this cancelling or blocking callback
+ * \retval 0
+ * \see ldlm_blocking_ast_nocheck
+ */
+int ldlm_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
+ void *data, int flag)
+{
+ ENTRY;
+
+ if (flag == LDLM_CB_CANCELING) {
+ /* Don't need to do anything here. */
+ RETURN(0);
+ }
+
+ lock_res_and_lock(lock);
+ /* Get this: if ldlm_blocking_ast is racing with intent_policy, such
+ * that ldlm_blocking_ast is called just before intent_policy method
+ * takes the lr_lock, then by the time we get the lock, we might not
+ * be the correct blocking function anymore. So check, and return
+ * early, if so. */
+ if (lock->l_blocking_ast != ldlm_blocking_ast) {
+ unlock_res_and_lock(lock);
+ RETURN(0);
+ }
+ RETURN(ldlm_blocking_ast_nocheck(lock));
+}
+EXPORT_SYMBOL(ldlm_blocking_ast);
+
+/**
+ * ->l_glimpse_ast() for DLM extent locks acquired on the server-side. See
+ * comment in filter_intent_policy() on why you may need this.
+ */
+int ldlm_glimpse_ast(struct ldlm_lock *lock, void *reqp)
+{
+ /*
+ * Returning -ELDLM_NO_LOCK_DATA actually works, but the reason for
+ * that is rather subtle: with OST-side locking, it may so happen that
+ * _all_ extent locks are held by the OST. If client wants to obtain
+ * current file size it calls ll{,u}_glimpse_size(), and (as locks are
+ * on the server), dummy glimpse callback fires and does
+ * nothing. Client still receives correct file size due to the
+ * following fragment in filter_intent_policy():
+ *
+ * rc = l->l_glimpse_ast(l, NULL); // this will update the LVB
+ * if (rc != 0 && res->lr_namespace->ns_lvbo &&
+ * res->lr_namespace->ns_lvbo->lvbo_update) {
+ * res->lr_namespace->ns_lvbo->lvbo_update(res, NULL, 0, 1);
+ * }
+ *
+ * that is, after glimpse_ast() fails, filter_lvbo_update() runs, and
+ * returns correct file size to the client.
+ */
+ return -ELDLM_NO_LOCK_DATA;
+}
+EXPORT_SYMBOL(ldlm_glimpse_ast);
+
+/**
+ * Enqueue a local lock (typically on a server).
+ */
+int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
+ const struct ldlm_res_id *res_id,
+ ldlm_type_t type, ldlm_policy_data_t *policy,
+ ldlm_mode_t mode, __u64 *flags,
+ ldlm_blocking_callback blocking,
+ ldlm_completion_callback completion,
+ ldlm_glimpse_callback glimpse,
+ void *data, __u32 lvb_len, enum lvb_type lvb_type,
+ const __u64 *client_cookie,
+ struct lustre_handle *lockh)
+{
+ struct ldlm_lock *lock;
+ int err;
+ const struct ldlm_callback_suite cbs = { .lcs_completion = completion,
+ .lcs_blocking = blocking,
+ .lcs_glimpse = glimpse,
+ };
+ ENTRY;
+
+ LASSERT(!(*flags & LDLM_FL_REPLAY));
+ if (unlikely(ns_is_client(ns))) {
+ CERROR("Trying to enqueue local lock in a shadow namespace\n");
+ LBUG();
+ }
+
+ lock = ldlm_lock_create(ns, res_id, type, mode, &cbs, data, lvb_len,
+ lvb_type);
+ if (unlikely(!lock))
+ GOTO(out_nolock, err = -ENOMEM);
+
+ ldlm_lock2handle(lock, lockh);
+
+ /* NB: we don't have any lock now (lock_res_and_lock)
+ * because it's a new lock */
+ ldlm_lock_addref_internal_nolock(lock, mode);
+ lock->l_flags |= LDLM_FL_LOCAL;
+ if (*flags & LDLM_FL_ATOMIC_CB)
+ lock->l_flags |= LDLM_FL_ATOMIC_CB;
+
+ if (policy != NULL)
+ lock->l_policy_data = *policy;
+ if (client_cookie != NULL)
+ lock->l_client_cookie = *client_cookie;
+ if (type == LDLM_EXTENT)
+ lock->l_req_extent = policy->l_extent;
+
+ err = ldlm_lock_enqueue(ns, &lock, policy, flags);
+ if (unlikely(err != ELDLM_OK))
+ GOTO(out, err);
+
+ if (policy != NULL)
+ *policy = lock->l_policy_data;
+
+ if (lock->l_completion_ast)
+ lock->l_completion_ast(lock, *flags, NULL);
+
+ LDLM_DEBUG(lock, "client-side local enqueue handler, new lock created");
+ EXIT;
+ out:
+ LDLM_LOCK_RELEASE(lock);
+ out_nolock:
+ return err;
+}
+EXPORT_SYMBOL(ldlm_cli_enqueue_local);
+
+static void failed_lock_cleanup(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock, int mode)
+{
+ int need_cancel = 0;
+
+ /* Set a flag to prevent us from sending a CANCEL (bug 407) */
+ lock_res_and_lock(lock);
+ /* Check that lock is not granted or failed, we might race. */
+ if ((lock->l_req_mode != lock->l_granted_mode) &&
+ !(lock->l_flags & LDLM_FL_FAILED)) {
+ /* Make sure that this lock will not be found by raced
+ * bl_ast and -EINVAL reply is sent to server anyways.
+ * bug 17645 */
+ lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED |
+ LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING;
+ need_cancel = 1;
+ }
+ unlock_res_and_lock(lock);
+
+ if (need_cancel)
+ LDLM_DEBUG(lock,
+ "setting FL_LOCAL_ONLY | LDLM_FL_FAILED | "
+ "LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING");
+ else
+ LDLM_DEBUG(lock, "lock was granted or failed in race");
+
+ ldlm_lock_decref_internal(lock, mode);
+
+ /* XXX - HACK because we shouldn't call ldlm_lock_destroy()
+ * from llite/file.c/ll_file_flock(). */
+ /* This code makes for the fact that we do not have blocking handler on
+ * a client for flock locks. As such this is the place where we must
+ * completely kill failed locks. (interrupted and those that
+ * were waiting to be granted when server evicted us. */
+ if (lock->l_resource->lr_type == LDLM_FLOCK) {
+ lock_res_and_lock(lock);
+ ldlm_resource_unlink_lock(lock);
+ ldlm_lock_destroy_nolock(lock);
+ unlock_res_and_lock(lock);
+ }
+}
+
+/**
+ * Finishing portion of client lock enqueue code.
+ *
+ * Called after receiving reply from server.
+ */
+int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
+ ldlm_type_t type, __u8 with_policy, ldlm_mode_t mode,
+ __u64 *flags, void *lvb, __u32 lvb_len,
+ struct lustre_handle *lockh,int rc)
+{
+ struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
+ int is_replay = *flags & LDLM_FL_REPLAY;
+ struct ldlm_lock *lock;
+ struct ldlm_reply *reply;
+ int cleanup_phase = 1;
+ int size = 0;
+ ENTRY;
+
+ lock = ldlm_handle2lock(lockh);
+ /* ldlm_cli_enqueue is holding a reference on this lock. */
+ if (!lock) {
+ LASSERT(type == LDLM_FLOCK);
+ RETURN(-ENOLCK);
+ }
+
+ LASSERTF(ergo(lvb_len != 0, lvb_len == lock->l_lvb_len),
+ "lvb_len = %d, l_lvb_len = %d\n", lvb_len, lock->l_lvb_len);
+
+ if (rc != ELDLM_OK) {
+ LASSERT(!is_replay);
+ LDLM_DEBUG(lock, "client-side enqueue END (%s)",
+ rc == ELDLM_LOCK_ABORTED ? "ABORTED" : "FAILED");
+
+ if (rc != ELDLM_LOCK_ABORTED)
+ GOTO(cleanup, rc);
+ }
+
+ /* Before we return, swab the reply */
+ reply = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
+ if (reply == NULL)
+ GOTO(cleanup, rc = -EPROTO);
+
+ if (lvb_len != 0) {
+ LASSERT(lvb != NULL);
+
+ size = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB,
+ RCL_SERVER);
+ if (size < 0) {
+ LDLM_ERROR(lock, "Fail to get lvb_len, rc = %d", size);
+ GOTO(cleanup, rc = size);
+ } else if (unlikely(size > lvb_len)) {
+ LDLM_ERROR(lock, "Replied LVB is larger than "
+ "expectation, expected = %d, replied = %d",
+ lvb_len, size);
+ GOTO(cleanup, rc = -EINVAL);
+ }
+ }
+
+ if (rc == ELDLM_LOCK_ABORTED) {
+ if (lvb_len != 0)
+ rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
+ lvb, size);
+ GOTO(cleanup, rc = (rc != 0 ? rc : ELDLM_LOCK_ABORTED));
+ }
+
+ /* lock enqueued on the server */
+ cleanup_phase = 0;
+
+ lock_res_and_lock(lock);
+ /* Key change rehash lock in per-export hash with new key */
+ if (exp->exp_lock_hash) {
+ /* In the function below, .hs_keycmp resolves to
+ * ldlm_export_lock_keycmp() */
+ /* coverity[overrun-buffer-val] */
+ cfs_hash_rehash_key(exp->exp_lock_hash,
+ &lock->l_remote_handle,
+ &reply->lock_handle,
+ &lock->l_exp_hash);
+ } else {
+ lock->l_remote_handle = reply->lock_handle;
+ }
+
+ *flags = ldlm_flags_from_wire(reply->lock_flags);
+ lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
+ LDLM_INHERIT_FLAGS);
+ /* move NO_TIMEOUT flag to the lock to force ldlm_lock_match()
+ * to wait with no timeout as well */
+ lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
+ LDLM_FL_NO_TIMEOUT);
+ unlock_res_and_lock(lock);
+
+ CDEBUG(D_INFO, "local: %p, remote cookie: "LPX64", flags: 0x%llx\n",
+ lock, reply->lock_handle.cookie, *flags);
+
+ /* If enqueue returned a blocked lock but the completion handler has
+ * already run, then it fixed up the resource and we don't need to do it
+ * again. */
+ if ((*flags) & LDLM_FL_LOCK_CHANGED) {
+ int newmode = reply->lock_desc.l_req_mode;
+ LASSERT(!is_replay);
+ if (newmode && newmode != lock->l_req_mode) {
+ LDLM_DEBUG(lock, "server returned different mode %s",
+ ldlm_lockname[newmode]);
+ lock->l_req_mode = newmode;
+ }
+
+ if (memcmp(reply->lock_desc.l_resource.lr_name.name,
+ lock->l_resource->lr_name.name,
+ sizeof(struct ldlm_res_id))) {
+ CDEBUG(D_INFO, "remote intent success, locking "
+ "(%ld,%ld,%ld) instead of "
+ "(%ld,%ld,%ld)\n",
+ (long)reply->lock_desc.l_resource.lr_name.name[0],
+ (long)reply->lock_desc.l_resource.lr_name.name[1],
+ (long)reply->lock_desc.l_resource.lr_name.name[2],
+ (long)lock->l_resource->lr_name.name[0],
+ (long)lock->l_resource->lr_name.name[1],
+ (long)lock->l_resource->lr_name.name[2]);
+
+ rc = ldlm_lock_change_resource(ns, lock,
+ &reply->lock_desc.l_resource.lr_name);
+ if (rc || lock->l_resource == NULL)
+ GOTO(cleanup, rc = -ENOMEM);
+ LDLM_DEBUG(lock, "client-side enqueue, new resource");
+ }
+ if (with_policy)
+ if (!(type == LDLM_IBITS &&
+ !(exp_connect_flags(exp) & OBD_CONNECT_IBITS)))
+ /* We assume lock type cannot change on server*/
+ ldlm_convert_policy_to_local(exp,
+ lock->l_resource->lr_type,
+ &reply->lock_desc.l_policy_data,
+ &lock->l_policy_data);
+ if (type != LDLM_PLAIN)
+ LDLM_DEBUG(lock,"client-side enqueue, new policy data");
+ }
+
+ if ((*flags) & LDLM_FL_AST_SENT ||
+ /* Cancel extent locks as soon as possible on a liblustre client,
+ * because it cannot handle asynchronous ASTs robustly (see
+ * bug 7311). */
+ (LIBLUSTRE_CLIENT && type == LDLM_EXTENT)) {
+ lock_res_and_lock(lock);
+ lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
+ unlock_res_and_lock(lock);
+ LDLM_DEBUG(lock, "enqueue reply includes blocking AST");
+ }
+
+ /* If the lock has already been granted by a completion AST, don't
+ * clobber the LVB with an older one. */
+ if (lvb_len != 0) {
+ /* We must lock or a racing completion might update lvb without
+ * letting us know and we'll clobber the correct value.
+ * Cannot unlock after the check either, a that still leaves
+ * a tiny window for completion to get in */
+ lock_res_and_lock(lock);
+ if (lock->l_req_mode != lock->l_granted_mode)
+ rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
+ lock->l_lvb_data, size);
+ unlock_res_and_lock(lock);
+ if (rc < 0) {
+ cleanup_phase = 1;
+ GOTO(cleanup, rc);
+ }
+ }
+
+ if (!is_replay) {
+ rc = ldlm_lock_enqueue(ns, &lock, NULL, flags);
+ if (lock->l_completion_ast != NULL) {
+ int err = lock->l_completion_ast(lock, *flags, NULL);
+ if (!rc)
+ rc = err;
+ if (rc)
+ cleanup_phase = 1;
+ }
+ }
+
+ if (lvb_len && lvb != NULL) {
+ /* Copy the LVB here, and not earlier, because the completion
+ * AST (if any) can override what we got in the reply */
+ memcpy(lvb, lock->l_lvb_data, lvb_len);
+ }
+
+ LDLM_DEBUG(lock, "client-side enqueue END");
+ EXIT;
+cleanup:
+ if (cleanup_phase == 1 && rc)
+ failed_lock_cleanup(ns, lock, mode);
+ /* Put lock 2 times, the second reference is held by ldlm_cli_enqueue */
+ LDLM_LOCK_PUT(lock);
+ LDLM_LOCK_RELEASE(lock);
+ return rc;
+}
+EXPORT_SYMBOL(ldlm_cli_enqueue_fini);
+
+/**
+ * Estimate number of lock handles that would fit into request of given
+ * size. PAGE_SIZE-512 is to allow TCP/IP and LNET headers to fit into
+ * a single page on the send/receive side. XXX: 512 should be changed to
+ * more adequate value.
+ */
+static inline int ldlm_req_handles_avail(int req_size, int off)
+{
+ int avail;
+
+ avail = min_t(int, LDLM_MAXREQSIZE, PAGE_CACHE_SIZE - 512) - req_size;
+ if (likely(avail >= 0))
+ avail /= (int)sizeof(struct lustre_handle);
+ else
+ avail = 0;
+ avail += LDLM_LOCKREQ_HANDLES - off;
+
+ return avail;
+}
+
+static inline int ldlm_capsule_handles_avail(struct req_capsule *pill,
+ enum req_location loc,
+ int off)
+{
+ int size = req_capsule_msg_size(pill, loc);
+ return ldlm_req_handles_avail(size, off);
+}
+
+static inline int ldlm_format_handles_avail(struct obd_import *imp,
+ const struct req_format *fmt,
+ enum req_location loc, int off)
+{
+ int size = req_capsule_fmt_size(imp->imp_msg_magic, fmt, loc);
+ return ldlm_req_handles_avail(size, off);
+}
+
+/**
+ * Cancel LRU locks and pack them into the enqueue request. Pack there the given
+ * \a count locks in \a cancels.
+ *
+ * This is to be called by functions preparing their own requests that
+ * might contain lists of locks to cancel in addition to actual operation
+ * that needs to be performed.
+ */
+int ldlm_prep_elc_req(struct obd_export *exp, struct ptlrpc_request *req,
+ int version, int opc, int canceloff,
+ struct list_head *cancels, int count)
+{
+ struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
+ struct req_capsule *pill = &req->rq_pill;
+ struct ldlm_request *dlm = NULL;
+ int flags, avail, to_free, pack = 0;
+ LIST_HEAD(head);
+ int rc;
+ ENTRY;
+
+ if (cancels == NULL)
+ cancels = &head;
+ if (ns_connect_cancelset(ns)) {
+ /* Estimate the amount of available space in the request. */
+ req_capsule_filled_sizes(pill, RCL_CLIENT);
+ avail = ldlm_capsule_handles_avail(pill, RCL_CLIENT, canceloff);
+
+ flags = ns_connect_lru_resize(ns) ?
+ LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED;
+ to_free = !ns_connect_lru_resize(ns) &&
+ opc == LDLM_ENQUEUE ? 1 : 0;
+
+ /* Cancel LRU locks here _only_ if the server supports
+ * EARLY_CANCEL. Otherwise we have to send extra CANCEL
+ * RPC, which will make us slower. */
+ if (avail > count)
+ count += ldlm_cancel_lru_local(ns, cancels, to_free,
+ avail - count, 0, flags);
+ if (avail > count)
+ pack = count;
+ else
+ pack = avail;
+ req_capsule_set_size(pill, &RMF_DLM_REQ, RCL_CLIENT,
+ ldlm_request_bufsize(pack, opc));
+ }
+
+ rc = ptlrpc_request_pack(req, version, opc);
+ if (rc) {
+ ldlm_lock_list_put(cancels, l_bl_ast, count);
+ RETURN(rc);
+ }
+
+ if (ns_connect_cancelset(ns)) {
+ if (canceloff) {
+ dlm = req_capsule_client_get(pill, &RMF_DLM_REQ);
+ LASSERT(dlm);
+ /* Skip first lock handler in ldlm_request_pack(),
+ * this method will incrment @lock_count according
+ * to the lock handle amount actually written to
+ * the buffer. */
+ dlm->lock_count = canceloff;
+ }
+ /* Pack into the request @pack lock handles. */
+ ldlm_cli_cancel_list(cancels, pack, req, 0);
+ /* Prepare and send separate cancel RPC for others. */
+ ldlm_cli_cancel_list(cancels, count - pack, NULL, 0);
+ } else {
+ ldlm_lock_list_put(cancels, l_bl_ast, count);
+ }
+ RETURN(0);
+}
+EXPORT_SYMBOL(ldlm_prep_elc_req);
+
+int ldlm_prep_enqueue_req(struct obd_export *exp, struct ptlrpc_request *req,
+ struct list_head *cancels, int count)
+{
+ return ldlm_prep_elc_req(exp, req, LUSTRE_DLM_VERSION, LDLM_ENQUEUE,
+ LDLM_ENQUEUE_CANCEL_OFF, cancels, count);
+}
+EXPORT_SYMBOL(ldlm_prep_enqueue_req);
+
+struct ptlrpc_request *ldlm_enqueue_pack(struct obd_export *exp, int lvb_len)
+{
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LDLM_ENQUEUE);
+ if (req == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(ERR_PTR(rc));
+ }
+
+ req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, lvb_len);
+ ptlrpc_request_set_replen(req);
+ RETURN(req);
+}
+EXPORT_SYMBOL(ldlm_enqueue_pack);
+
+/**
+ * Client-side lock enqueue.
+ *
+ * If a request has some specific initialisation it is passed in \a reqp,
+ * otherwise it is created in ldlm_cli_enqueue.
+ *
+ * Supports sync and async requests, pass \a async flag accordingly. If a
+ * request was created in ldlm_cli_enqueue and it is the async request,
+ * pass it to the caller in \a reqp.
+ */
+int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
+ struct ldlm_enqueue_info *einfo,
+ const struct ldlm_res_id *res_id,
+ ldlm_policy_data_t const *policy, __u64 *flags,
+ void *lvb, __u32 lvb_len, enum lvb_type lvb_type,
+ struct lustre_handle *lockh, int async)
+{
+ struct ldlm_namespace *ns;
+ struct ldlm_lock *lock;
+ struct ldlm_request *body;
+ int is_replay = *flags & LDLM_FL_REPLAY;
+ int req_passed_in = 1;
+ int rc, err;
+ struct ptlrpc_request *req;
+ ENTRY;
+
+ LASSERT(exp != NULL);
+
+ ns = exp->exp_obd->obd_namespace;
+
+ /* If we're replaying this lock, just check some invariants.
+ * If we're creating a new lock, get everything all setup nice. */
+ if (is_replay) {
+ lock = ldlm_handle2lock_long(lockh, 0);
+ LASSERT(lock != NULL);
+ LDLM_DEBUG(lock, "client-side enqueue START");
+ LASSERT(exp == lock->l_conn_export);
+ } else {
+ const struct ldlm_callback_suite cbs = {
+ .lcs_completion = einfo->ei_cb_cp,
+ .lcs_blocking = einfo->ei_cb_bl,
+ .lcs_glimpse = einfo->ei_cb_gl,
+ .lcs_weigh = einfo->ei_cb_wg
+ };
+ lock = ldlm_lock_create(ns, res_id, einfo->ei_type,
+ einfo->ei_mode, &cbs, einfo->ei_cbdata,
+ lvb_len, lvb_type);
+ if (lock == NULL)
+ RETURN(-ENOMEM);
+ /* for the local lock, add the reference */
+ ldlm_lock_addref_internal(lock, einfo->ei_mode);
+ ldlm_lock2handle(lock, lockh);
+ if (policy != NULL) {
+ /* INODEBITS_INTEROP: If the server does not support
+ * inodebits, we will request a plain lock in the
+ * descriptor (ldlm_lock2desc() below) but use an
+ * inodebits lock internally with both bits set.
+ */
+ if (einfo->ei_type == LDLM_IBITS &&
+ !(exp_connect_flags(exp) &
+ OBD_CONNECT_IBITS))
+ lock->l_policy_data.l_inodebits.bits =
+ MDS_INODELOCK_LOOKUP |
+ MDS_INODELOCK_UPDATE;
+ else
+ lock->l_policy_data = *policy;
+ }
+
+ if (einfo->ei_type == LDLM_EXTENT)
+ lock->l_req_extent = policy->l_extent;
+ LDLM_DEBUG(lock, "client-side enqueue START, flags %llx\n",
+ *flags);
+ }
+
+ lock->l_conn_export = exp;
+ lock->l_export = NULL;
+ lock->l_blocking_ast = einfo->ei_cb_bl;
+ lock->l_flags |= (*flags & LDLM_FL_NO_LRU);
+
+ /* lock not sent to server yet */
+
+ if (reqp == NULL || *reqp == NULL) {
+ req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
+ &RQF_LDLM_ENQUEUE,
+ LUSTRE_DLM_VERSION,
+ LDLM_ENQUEUE);
+ if (req == NULL) {
+ failed_lock_cleanup(ns, lock, einfo->ei_mode);
+ LDLM_LOCK_RELEASE(lock);
+ RETURN(-ENOMEM);
+ }
+ req_passed_in = 0;
+ if (reqp)
+ *reqp = req;
+ } else {
+ int len;
+
+ req = *reqp;
+ len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_REQ,
+ RCL_CLIENT);
+ LASSERTF(len >= sizeof(*body), "buflen[%d] = %d, not %d\n",
+ DLM_LOCKREQ_OFF, len, (int)sizeof(*body));
+ }
+
+ /* Dump lock data into the request buffer */
+ body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
+ ldlm_lock2desc(lock, &body->lock_desc);
+ body->lock_flags = ldlm_flags_to_wire(*flags);
+ body->lock_handle[0] = *lockh;
+
+ /* Continue as normal. */
+ if (!req_passed_in) {
+ if (lvb_len > 0)
+ req_capsule_extend(&req->rq_pill,
+ &RQF_LDLM_ENQUEUE_LVB);
+ req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
+ lvb_len);
+ ptlrpc_request_set_replen(req);
+ }
+
+ /*
+ * Liblustre client doesn't get extent locks, except for O_APPEND case
+ * where [0, OBD_OBJECT_EOF] lock is taken, or truncate, where
+ * [i_size, OBD_OBJECT_EOF] lock is taken.
+ */
+ LASSERT(ergo(LIBLUSTRE_CLIENT, einfo->ei_type != LDLM_EXTENT ||
+ policy->l_extent.end == OBD_OBJECT_EOF));
+
+ if (async) {
+ LASSERT(reqp != NULL);
+ RETURN(0);
+ }
+
+ LDLM_DEBUG(lock, "sending request");
+
+ rc = ptlrpc_queue_wait(req);
+
+ err = ldlm_cli_enqueue_fini(exp, req, einfo->ei_type, policy ? 1 : 0,
+ einfo->ei_mode, flags, lvb, lvb_len,
+ lockh, rc);
+
+ /* If ldlm_cli_enqueue_fini did not find the lock, we need to free
+ * one reference that we took */
+ if (err == -ENOLCK)
+ LDLM_LOCK_RELEASE(lock);
+ else
+ rc = err;
+
+ if (!req_passed_in && req != NULL) {
+ ptlrpc_req_finished(req);
+ if (reqp)
+ *reqp = NULL;
+ }
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ldlm_cli_enqueue);
+
+static int ldlm_cli_convert_local(struct ldlm_lock *lock, int new_mode,
+ __u32 *flags)
+{
+ struct ldlm_resource *res;
+ int rc;
+ ENTRY;
+ if (ns_is_client(ldlm_lock_to_ns(lock))) {
+ CERROR("Trying to cancel local lock\n");
+ LBUG();
+ }
+ LDLM_DEBUG(lock, "client-side local convert");
+
+ res = ldlm_lock_convert(lock, new_mode, flags);
+ if (res) {
+ ldlm_reprocess_all(res);
+ rc = 0;
+ } else {
+ rc = EDEADLOCK;
+ }
+ LDLM_DEBUG(lock, "client-side local convert handler END");
+ LDLM_LOCK_PUT(lock);
+ RETURN(rc);
+}
+
+/* FIXME: one of ldlm_cli_convert or the server side should reject attempted
+ * conversion of locks which are on the waiting or converting queue */
+/* Caller of this code is supposed to take care of lock readers/writers
+ accounting */
+int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, __u32 *flags)
+{
+ struct ldlm_request *body;
+ struct ldlm_reply *reply;
+ struct ldlm_lock *lock;
+ struct ldlm_resource *res;
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ lock = ldlm_handle2lock(lockh);
+ if (!lock) {
+ LBUG();
+ RETURN(-EINVAL);
+ }
+ *flags = 0;
+
+ if (lock->l_conn_export == NULL)
+ RETURN(ldlm_cli_convert_local(lock, new_mode, flags));
+
+ LDLM_DEBUG(lock, "client-side convert");
+
+ req = ptlrpc_request_alloc_pack(class_exp2cliimp(lock->l_conn_export),
+ &RQF_LDLM_CONVERT, LUSTRE_DLM_VERSION,
+ LDLM_CONVERT);
+ if (req == NULL) {
+ LDLM_LOCK_PUT(lock);
+ RETURN(-ENOMEM);
+ }
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
+ body->lock_handle[0] = lock->l_remote_handle;
+
+ body->lock_desc.l_req_mode = new_mode;
+ body->lock_flags = ldlm_flags_to_wire(*flags);
+
+
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+ if (rc != ELDLM_OK)
+ GOTO(out, rc);
+
+ reply = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
+ if (reply == NULL)
+ GOTO(out, rc = -EPROTO);
+
+ if (req->rq_status)
+ GOTO(out, rc = req->rq_status);
+
+ res = ldlm_lock_convert(lock, new_mode, &reply->lock_flags);
+ if (res != NULL) {
+ ldlm_reprocess_all(res);
+ /* Go to sleep until the lock is granted. */
+ /* FIXME: or cancelled. */
+ if (lock->l_completion_ast) {
+ rc = lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC,
+ NULL);
+ if (rc)
+ GOTO(out, rc);
+ }
+ } else {
+ rc = EDEADLOCK;
+ }
+ EXIT;
+ out:
+ LDLM_LOCK_PUT(lock);
+ ptlrpc_req_finished(req);
+ return rc;
+}
+EXPORT_SYMBOL(ldlm_cli_convert);
+
+/**
+ * Cancel locks locally.
+ * Returns:
+ * \retval LDLM_FL_LOCAL_ONLY if there is no need for a CANCEL RPC to the server
+ * \retval LDLM_FL_CANCELING otherwise;
+ * \retval LDLM_FL_BL_AST if there is a need for a separate CANCEL RPC.
+ */
+static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock)
+{
+ __u64 rc = LDLM_FL_LOCAL_ONLY;
+ ENTRY;
+
+ if (lock->l_conn_export) {
+ bool local_only;
+
+ LDLM_DEBUG(lock, "client-side cancel");
+ /* Set this flag to prevent others from getting new references*/
+ lock_res_and_lock(lock);
+ lock->l_flags |= LDLM_FL_CBPENDING;
+ local_only = !!(lock->l_flags &
+ (LDLM_FL_LOCAL_ONLY|LDLM_FL_CANCEL_ON_BLOCK));
+ ldlm_cancel_callback(lock);
+ rc = (lock->l_flags & LDLM_FL_BL_AST) ?
+ LDLM_FL_BL_AST : LDLM_FL_CANCELING;
+ unlock_res_and_lock(lock);
+
+ if (local_only) {
+ CDEBUG(D_DLMTRACE, "not sending request (at caller's "
+ "instruction)\n");
+ rc = LDLM_FL_LOCAL_ONLY;
+ }
+ ldlm_lock_cancel(lock);
+ } else {
+ if (ns_is_client(ldlm_lock_to_ns(lock))) {
+ LDLM_ERROR(lock, "Trying to cancel local lock");
+ LBUG();
+ }
+ LDLM_DEBUG(lock, "server-side local cancel");
+ ldlm_lock_cancel(lock);
+ ldlm_reprocess_all(lock->l_resource);
+ }
+
+ RETURN(rc);
+}
+
+/**
+ * Pack \a count locks in \a head into ldlm_request buffer of request \a req.
+ */
+static void ldlm_cancel_pack(struct ptlrpc_request *req,
+ struct list_head *head, int count)
+{
+ struct ldlm_request *dlm;
+ struct ldlm_lock *lock;
+ int max, packed = 0;
+ ENTRY;
+
+ dlm = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
+ LASSERT(dlm != NULL);
+
+ /* Check the room in the request buffer. */
+ max = req_capsule_get_size(&req->rq_pill, &RMF_DLM_REQ, RCL_CLIENT) -
+ sizeof(struct ldlm_request);
+ max /= sizeof(struct lustre_handle);
+ max += LDLM_LOCKREQ_HANDLES;
+ LASSERT(max >= dlm->lock_count + count);
+
+ /* XXX: it would be better to pack lock handles grouped by resource.
+ * so that the server cancel would call filter_lvbo_update() less
+ * frequently. */
+ list_for_each_entry(lock, head, l_bl_ast) {
+ if (!count--)
+ break;
+ LASSERT(lock->l_conn_export);
+ /* Pack the lock handle to the given request buffer. */
+ LDLM_DEBUG(lock, "packing");
+ dlm->lock_handle[dlm->lock_count++] = lock->l_remote_handle;
+ packed++;
+ }
+ CDEBUG(D_DLMTRACE, "%d locks packed\n", packed);
+ EXIT;
+}
+
+/**
+ * Prepare and send a batched cancel RPC. It will include \a count lock
+ * handles of locks given in \a cancels list. */
+int ldlm_cli_cancel_req(struct obd_export *exp, struct list_head *cancels,
+ int count, ldlm_cancel_flags_t flags)
+{
+ struct ptlrpc_request *req = NULL;
+ struct obd_import *imp;
+ int free, sent = 0;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(exp != NULL);
+ LASSERT(count > 0);
+
+ CFS_FAIL_TIMEOUT(OBD_FAIL_LDLM_PAUSE_CANCEL, cfs_fail_val);
+
+ if (CFS_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_RACE))
+ RETURN(count);
+
+ free = ldlm_format_handles_avail(class_exp2cliimp(exp),
+ &RQF_LDLM_CANCEL, RCL_CLIENT, 0);
+ if (count > free)
+ count = free;
+
+ while (1) {
+ imp = class_exp2cliimp(exp);
+ if (imp == NULL || imp->imp_invalid) {
+ CDEBUG(D_DLMTRACE,
+ "skipping cancel on invalid import %p\n", imp);
+ RETURN(count);
+ }
+
+ req = ptlrpc_request_alloc(imp, &RQF_LDLM_CANCEL);
+ if (req == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ req_capsule_filled_sizes(&req->rq_pill, RCL_CLIENT);
+ req_capsule_set_size(&req->rq_pill, &RMF_DLM_REQ, RCL_CLIENT,
+ ldlm_request_bufsize(count, LDLM_CANCEL));
+
+ rc = ptlrpc_request_pack(req, LUSTRE_DLM_VERSION, LDLM_CANCEL);
+ if (rc) {
+ ptlrpc_request_free(req);
+ GOTO(out, rc);
+ }
+
+ req->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL;
+ req->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL;
+ ptlrpc_at_set_req_timeout(req);
+
+ ldlm_cancel_pack(req, cancels, count);
+
+ ptlrpc_request_set_replen(req);
+ if (flags & LCF_ASYNC) {
+ ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
+ sent = count;
+ GOTO(out, 0);
+ } else {
+ rc = ptlrpc_queue_wait(req);
+ }
+ if (rc == ESTALE) {
+ CDEBUG(D_DLMTRACE, "client/server (nid %s) "
+ "out of sync -- not fatal\n",
+ libcfs_nid2str(req->rq_import->
+ imp_connection->c_peer.nid));
+ rc = 0;
+ } else if (rc == -ETIMEDOUT && /* check there was no reconnect*/
+ req->rq_import_generation == imp->imp_generation) {
+ ptlrpc_req_finished(req);
+ continue;
+ } else if (rc != ELDLM_OK) {
+ /* -ESHUTDOWN is common on umount */
+ CDEBUG_LIMIT(rc == -ESHUTDOWN ? D_DLMTRACE : D_ERROR,
+ "Got rc %d from cancel RPC: "
+ "canceling anyway\n", rc);
+ break;
+ }
+ sent = count;
+ break;
+ }
+
+ ptlrpc_req_finished(req);
+ EXIT;
+out:
+ return sent ? sent : rc;
+}
+EXPORT_SYMBOL(ldlm_cli_cancel_req);
+
+static inline struct ldlm_pool *ldlm_imp2pl(struct obd_import *imp)
+{
+ LASSERT(imp != NULL);
+ return &imp->imp_obd->obd_namespace->ns_pool;
+}
+
+/**
+ * Update client's OBD pool related fields with new SLV and Limit from \a req.
+ */
+int ldlm_cli_update_pool(struct ptlrpc_request *req)
+{
+ struct obd_device *obd;
+ __u64 new_slv;
+ __u32 new_limit;
+ ENTRY;
+ if (unlikely(!req->rq_import || !req->rq_import->imp_obd ||
+ !imp_connect_lru_resize(req->rq_import)))
+ {
+ /*
+ * Do nothing for corner cases.
+ */
+ RETURN(0);
+ }
+
+ /* In some cases RPC may contain SLV and limit zeroed out. This
+ * is the case when server does not support LRU resize feature.
+ * This is also possible in some recovery cases when server-side
+ * reqs have no reference to the OBD export and thus access to
+ * server-side namespace is not possible. */
+ if (lustre_msg_get_slv(req->rq_repmsg) == 0 ||
+ lustre_msg_get_limit(req->rq_repmsg) == 0) {
+ DEBUG_REQ(D_HA, req, "Zero SLV or Limit found "
+ "(SLV: "LPU64", Limit: %u)",
+ lustre_msg_get_slv(req->rq_repmsg),
+ lustre_msg_get_limit(req->rq_repmsg));
+ RETURN(0);
+ }
+
+ new_limit = lustre_msg_get_limit(req->rq_repmsg);
+ new_slv = lustre_msg_get_slv(req->rq_repmsg);
+ obd = req->rq_import->imp_obd;
+
+ /* Set new SLV and limit in OBD fields to make them accessible
+ * to the pool thread. We do not access obd_namespace and pool
+ * directly here as there is no reliable way to make sure that
+ * they are still alive at cleanup time. Evil races are possible
+ * which may cause Oops at that time. */
+ write_lock(&obd->obd_pool_lock);
+ obd->obd_pool_slv = new_slv;
+ obd->obd_pool_limit = new_limit;
+ write_unlock(&obd->obd_pool_lock);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(ldlm_cli_update_pool);
+
+/**
+ * Client side lock cancel.
+ *
+ * Lock must not have any readers or writers by this time.
+ */
+int ldlm_cli_cancel(struct lustre_handle *lockh,
+ ldlm_cancel_flags_t cancel_flags)
+{
+ struct obd_export *exp;
+ int avail, flags, count = 1;
+ __u64 rc = 0;
+ struct ldlm_namespace *ns;
+ struct ldlm_lock *lock;
+ LIST_HEAD(cancels);
+ ENTRY;
+
+ /* concurrent cancels on the same handle can happen */
+ lock = ldlm_handle2lock_long(lockh, LDLM_FL_CANCELING);
+ if (lock == NULL) {
+ LDLM_DEBUG_NOLOCK("lock is already being destroyed\n");
+ RETURN(0);
+ }
+
+ rc = ldlm_cli_cancel_local(lock);
+ if (rc == LDLM_FL_LOCAL_ONLY) {
+ LDLM_LOCK_RELEASE(lock);
+ RETURN(0);
+ }
+ /* Even if the lock is marked as LDLM_FL_BL_AST, this is a LDLM_CANCEL
+ * RPC which goes to canceld portal, so we can cancel other LRU locks
+ * here and send them all as one LDLM_CANCEL RPC. */
+ LASSERT(list_empty(&lock->l_bl_ast));
+ list_add(&lock->l_bl_ast, &cancels);
+
+ exp = lock->l_conn_export;
+ if (exp_connect_cancelset(exp)) {
+ avail = ldlm_format_handles_avail(class_exp2cliimp(exp),
+ &RQF_LDLM_CANCEL,
+ RCL_CLIENT, 0);
+ LASSERT(avail > 0);
+
+ ns = ldlm_lock_to_ns(lock);
+ flags = ns_connect_lru_resize(ns) ?
+ LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED;
+ count += ldlm_cancel_lru_local(ns, &cancels, 0, avail - 1,
+ LCF_BL_AST, flags);
+ }
+ ldlm_cli_cancel_list(&cancels, count, NULL, cancel_flags);
+ RETURN(0);
+}
+EXPORT_SYMBOL(ldlm_cli_cancel);
+
+/**
+ * Locally cancel up to \a count locks in list \a cancels.
+ * Return the number of cancelled locks.
+ */
+int ldlm_cli_cancel_list_local(struct list_head *cancels, int count,
+ ldlm_cancel_flags_t flags)
+{
+ LIST_HEAD(head);
+ struct ldlm_lock *lock, *next;
+ int left = 0, bl_ast = 0;
+ __u64 rc;
+
+ left = count;
+ list_for_each_entry_safe(lock, next, cancels, l_bl_ast) {
+ if (left-- == 0)
+ break;
+
+ if (flags & LCF_LOCAL) {
+ rc = LDLM_FL_LOCAL_ONLY;
+ ldlm_lock_cancel(lock);
+ } else {
+ rc = ldlm_cli_cancel_local(lock);
+ }
+ /* Until we have compound requests and can send LDLM_CANCEL
+ * requests batched with generic RPCs, we need to send cancels
+ * with the LDLM_FL_BL_AST flag in a separate RPC from
+ * the one being generated now. */
+ if (!(flags & LCF_BL_AST) && (rc == LDLM_FL_BL_AST)) {
+ LDLM_DEBUG(lock, "Cancel lock separately");
+ list_del_init(&lock->l_bl_ast);
+ list_add(&lock->l_bl_ast, &head);
+ bl_ast++;
+ continue;
+ }
+ if (rc == LDLM_FL_LOCAL_ONLY) {
+ /* CANCEL RPC should not be sent to server. */
+ list_del_init(&lock->l_bl_ast);
+ LDLM_LOCK_RELEASE(lock);
+ count--;
+ }
+ }
+ if (bl_ast > 0) {
+ count -= bl_ast;
+ ldlm_cli_cancel_list(&head, bl_ast, NULL, 0);
+ }
+
+ RETURN(count);
+}
+EXPORT_SYMBOL(ldlm_cli_cancel_list_local);
+
+/**
+ * Cancel as many locks as possible w/o sending any RPCs (e.g. to write back
+ * dirty data, to close a file, ...) or waiting for any RPCs in-flight (e.g.
+ * readahead requests, ...)
+ */
+static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int count)
+{
+ ldlm_policy_res_t result = LDLM_POLICY_CANCEL_LOCK;
+ ldlm_cancel_for_recovery cb = ns->ns_cancel_for_recovery;
+ lock_res_and_lock(lock);
+
+ /* don't check added & count since we want to process all locks
+ * from unused list */
+ switch (lock->l_resource->lr_type) {
+ case LDLM_EXTENT:
+ case LDLM_IBITS:
+ if (cb && cb(lock))
+ break;
+ default:
+ result = LDLM_POLICY_SKIP_LOCK;
+ lock->l_flags |= LDLM_FL_SKIPPED;
+ break;
+ }
+
+ unlock_res_and_lock(lock);
+ RETURN(result);
+}
+
+/**
+ * Callback function for LRU-resize policy. Decides whether to keep
+ * \a lock in LRU for current \a LRU size \a unused, added in current
+ * scan \a added and number of locks to be preferably canceled \a count.
+ *
+ * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning
+ *
+ * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
+ */
+static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int count)
+{
+ cfs_time_t cur = cfs_time_current();
+ struct ldlm_pool *pl = &ns->ns_pool;
+ __u64 slv, lvf, lv;
+ cfs_time_t la;
+
+ /* Stop LRU processing when we reach past @count or have checked all
+ * locks in LRU. */
+ if (count && added >= count)
+ return LDLM_POLICY_KEEP_LOCK;
+
+ slv = ldlm_pool_get_slv(pl);
+ lvf = ldlm_pool_get_lvf(pl);
+ la = cfs_duration_sec(cfs_time_sub(cur,
+ lock->l_last_used));
+ lv = lvf * la * unused;
+
+ /* Inform pool about current CLV to see it via proc. */
+ ldlm_pool_set_clv(pl, lv);
+
+ /* Stop when SLV is not yet come from server or lv is smaller than
+ * it is. */
+ return (slv == 0 || lv < slv) ?
+ LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
+}
+
+/**
+ * Callback function for proc used policy. Makes decision whether to keep
+ * \a lock in LRU for current \a LRU size \a unused, added in current scan \a
+ * added and number of locks to be preferably canceled \a count.
+ *
+ * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning
+ *
+ * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
+ */
+static ldlm_policy_res_t ldlm_cancel_passed_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int count)
+{
+ /* Stop LRU processing when we reach past @count or have checked all
+ * locks in LRU. */
+ return (added >= count) ?
+ LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
+}
+
+/**
+ * Callback function for aged policy. Makes decision whether to keep \a lock in
+ * LRU for current LRU size \a unused, added in current scan \a added and
+ * number of locks to be preferably canceled \a count.
+ *
+ * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning
+ *
+ * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
+ */
+static ldlm_policy_res_t ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int count)
+{
+ /* Stop LRU processing if young lock is found and we reach past count */
+ return ((added >= count) &&
+ cfs_time_before(cfs_time_current(),
+ cfs_time_add(lock->l_last_used,
+ ns->ns_max_age))) ?
+ LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
+}
+
+/**
+ * Callback function for default policy. Makes decision whether to keep \a lock
+ * in LRU for current LRU size \a unused, added in current scan \a added and
+ * number of locks to be preferably canceled \a count.
+ *
+ * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning
+ *
+ * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
+ */
+static ldlm_policy_res_t ldlm_cancel_default_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int count)
+{
+ /* Stop LRU processing when we reach past count or have checked all
+ * locks in LRU. */
+ return (added >= count) ?
+ LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
+}
+
+typedef ldlm_policy_res_t (*ldlm_cancel_lru_policy_t)(struct ldlm_namespace *,
+ struct ldlm_lock *, int,
+ int, int);
+
+static ldlm_cancel_lru_policy_t
+ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags)
+{
+ if (flags & LDLM_CANCEL_NO_WAIT)
+ return ldlm_cancel_no_wait_policy;
+
+ if (ns_connect_lru_resize(ns)) {
+ if (flags & LDLM_CANCEL_SHRINK)
+ /* We kill passed number of old locks. */
+ return ldlm_cancel_passed_policy;
+ else if (flags & LDLM_CANCEL_LRUR)
+ return ldlm_cancel_lrur_policy;
+ else if (flags & LDLM_CANCEL_PASSED)
+ return ldlm_cancel_passed_policy;
+ } else {
+ if (flags & LDLM_CANCEL_AGED)
+ return ldlm_cancel_aged_policy;
+ }
+
+ return ldlm_cancel_default_policy;
+}
+
+/**
+ * - Free space in LRU for \a count new locks,
+ * redundant unused locks are canceled locally;
+ * - also cancel locally unused aged locks;
+ * - do not cancel more than \a max locks;
+ * - GET the found locks and add them into the \a cancels list.
+ *
+ * A client lock can be added to the l_bl_ast list only when it is
+ * marked LDLM_FL_CANCELING. Otherwise, somebody is already doing
+ * CANCEL. There are the following use cases:
+ * ldlm_cancel_resource_local(), ldlm_cancel_lru_local() and
+ * ldlm_cli_cancel(), which check and set this flag properly. As any
+ * attempt to cancel a lock rely on this flag, l_bl_ast list is accessed
+ * later without any special locking.
+ *
+ * Calling policies for enabled LRU resize:
+ * ----------------------------------------
+ * flags & LDLM_CANCEL_LRUR - use LRU resize policy (SLV from server) to
+ * cancel not more than \a count locks;
+ *
+ * flags & LDLM_CANCEL_PASSED - cancel \a count number of old locks (located at
+ * the beginning of LRU list);
+ *
+ * flags & LDLM_CANCEL_SHRINK - cancel not more than \a count locks according to
+ * memory pressre policy function;
+ *
+ * flags & LDLM_CANCEL_AGED - cancel \a count locks according to "aged policy".
+ *
+ * flags & LDLM_CANCEL_NO_WAIT - cancel as many unused locks as possible
+ * (typically before replaying locks) w/o
+ * sending any RPCs or waiting for any
+ * outstanding RPC to complete.
+ */
+static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, struct list_head *cancels,
+ int count, int max, int flags)
+{
+ ldlm_cancel_lru_policy_t pf;
+ struct ldlm_lock *lock, *next;
+ int added = 0, unused, remained;
+ ENTRY;
+
+ spin_lock(&ns->ns_lock);
+ unused = ns->ns_nr_unused;
+ remained = unused;
+
+ if (!ns_connect_lru_resize(ns))
+ count += unused - ns->ns_max_unused;
+
+ pf = ldlm_cancel_lru_policy(ns, flags);
+ LASSERT(pf != NULL);
+
+ while (!list_empty(&ns->ns_unused_list)) {
+ ldlm_policy_res_t result;
+
+ /* all unused locks */
+ if (remained-- <= 0)
+ break;
+
+ /* For any flags, stop scanning if @max is reached. */
+ if (max && added >= max)
+ break;
+
+ list_for_each_entry_safe(lock, next, &ns->ns_unused_list,
+ l_lru) {
+ /* No locks which got blocking requests. */
+ LASSERT(!(lock->l_flags & LDLM_FL_BL_AST));
+
+ if (flags & LDLM_CANCEL_NO_WAIT &&
+ lock->l_flags & LDLM_FL_SKIPPED)
+ /* already processed */
+ continue;
+
+ /* Somebody is already doing CANCEL. No need for this
+ * lock in LRU, do not traverse it again. */
+ if (!(lock->l_flags & LDLM_FL_CANCELING))
+ break;
+
+ ldlm_lock_remove_from_lru_nolock(lock);
+ }
+ if (&lock->l_lru == &ns->ns_unused_list)
+ break;
+
+ LDLM_LOCK_GET(lock);
+ spin_unlock(&ns->ns_lock);
+ lu_ref_add(&lock->l_reference, __FUNCTION__, current);
+
+ /* Pass the lock through the policy filter and see if it
+ * should stay in LRU.
+ *
+ * Even for shrinker policy we stop scanning if
+ * we find a lock that should stay in the cache.
+ * We should take into account lock age anyway
+ * as a new lock is a valuable resource even if
+ * it has a low weight.
+ *
+ * That is, for shrinker policy we drop only
+ * old locks, but additionally choose them by
+ * their weight. Big extent locks will stay in
+ * the cache. */
+ result = pf(ns, lock, unused, added, count);
+ if (result == LDLM_POLICY_KEEP_LOCK) {
+ lu_ref_del(&lock->l_reference,
+ __FUNCTION__, current);
+ LDLM_LOCK_RELEASE(lock);
+ spin_lock(&ns->ns_lock);
+ break;
+ }
+ if (result == LDLM_POLICY_SKIP_LOCK) {
+ lu_ref_del(&lock->l_reference,
+ __func__, current);
+ LDLM_LOCK_RELEASE(lock);
+ spin_lock(&ns->ns_lock);
+ continue;
+ }
+
+ lock_res_and_lock(lock);
+ /* Check flags again under the lock. */
+ if ((lock->l_flags & LDLM_FL_CANCELING) ||
+ (ldlm_lock_remove_from_lru(lock) == 0)) {
+ /* Another thread is removing lock from LRU, or
+ * somebody is already doing CANCEL, or there
+ * is a blocking request which will send cancel
+ * by itself, or the lock is no longer unused. */
+ unlock_res_and_lock(lock);
+ lu_ref_del(&lock->l_reference,
+ __FUNCTION__, current);
+ LDLM_LOCK_RELEASE(lock);
+ spin_lock(&ns->ns_lock);
+ continue;
+ }
+ LASSERT(!lock->l_readers && !lock->l_writers);
+
+ /* If we have chosen to cancel this lock voluntarily, we
+ * better send cancel notification to server, so that it
+ * frees appropriate state. This might lead to a race
+ * where while we are doing cancel here, server is also
+ * silently cancelling this lock. */
+ lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK;
+
+ /* Setting the CBPENDING flag is a little misleading,
+ * but prevents an important race; namely, once
+ * CBPENDING is set, the lock can accumulate no more
+ * readers/writers. Since readers and writers are
+ * already zero here, ldlm_lock_decref() won't see
+ * this flag and call l_blocking_ast */
+ lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING;
+
+ /* We can't re-add to l_lru as it confuses the
+ * refcounting in ldlm_lock_remove_from_lru() if an AST
+ * arrives after we drop lr_lock below. We use l_bl_ast
+ * and can't use l_pending_chain as it is used both on
+ * server and client nevertheless bug 5666 says it is
+ * used only on server */
+ LASSERT(list_empty(&lock->l_bl_ast));
+ list_add(&lock->l_bl_ast, cancels);
+ unlock_res_and_lock(lock);
+ lu_ref_del(&lock->l_reference, __FUNCTION__, current);
+ spin_lock(&ns->ns_lock);
+ added++;
+ unused--;
+ }
+ spin_unlock(&ns->ns_lock);
+ RETURN(added);
+}
+
+int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
+ int count, int max, ldlm_cancel_flags_t cancel_flags,
+ int flags)
+{
+ int added;
+ added = ldlm_prepare_lru_list(ns, cancels, count, max, flags);
+ if (added <= 0)
+ return added;
+ return ldlm_cli_cancel_list_local(cancels, added, cancel_flags);
+}
+
+/**
+ * Cancel at least \a nr locks from given namespace LRU.
+ *
+ * When called with LCF_ASYNC the blocking callback will be handled
+ * in a thread and this function will return after the thread has been
+ * asked to call the callback. When called with LCF_ASYNC the blocking
+ * callback will be performed in this function.
+ */
+int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr,
+ ldlm_cancel_flags_t cancel_flags,
+ int flags)
+{
+ LIST_HEAD(cancels);
+ int count, rc;
+ ENTRY;
+
+ /* Just prepare the list of locks, do not actually cancel them yet.
+ * Locks are cancelled later in a separate thread. */
+ count = ldlm_prepare_lru_list(ns, &cancels, nr, 0, flags);
+ rc = ldlm_bl_to_thread_list(ns, NULL, &cancels, count, cancel_flags);
+ if (rc == 0)
+ RETURN(count);
+
+ RETURN(0);
+}
+
+/**
+ * Find and cancel locally unused locks found on resource, matched to the
+ * given policy, mode. GET the found locks and add them into the \a cancels
+ * list.
+ */
+int ldlm_cancel_resource_local(struct ldlm_resource *res,
+ struct list_head *cancels,
+ ldlm_policy_data_t *policy,
+ ldlm_mode_t mode, int lock_flags,
+ ldlm_cancel_flags_t cancel_flags, void *opaque)
+{
+ struct ldlm_lock *lock;
+ int count = 0;
+ ENTRY;
+
+ lock_res(res);
+ list_for_each_entry(lock, &res->lr_granted, l_res_link) {
+ if (opaque != NULL && lock->l_ast_data != opaque) {
+ LDLM_ERROR(lock, "data %p doesn't match opaque %p",
+ lock->l_ast_data, opaque);
+ //LBUG();
+ continue;
+ }
+
+ if (lock->l_readers || lock->l_writers)
+ continue;
+
+ /* If somebody is already doing CANCEL, or blocking AST came,
+ * skip this lock. */
+ if (lock->l_flags & LDLM_FL_BL_AST ||
+ lock->l_flags & LDLM_FL_CANCELING)
+ continue;
+
+ if (lockmode_compat(lock->l_granted_mode, mode))
+ continue;
+
+ /* If policy is given and this is IBITS lock, add to list only
+ * those locks that match by policy. */
+ if (policy && (lock->l_resource->lr_type == LDLM_IBITS) &&
+ !(lock->l_policy_data.l_inodebits.bits &
+ policy->l_inodebits.bits))
+ continue;
+
+ /* See CBPENDING comment in ldlm_cancel_lru */
+ lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING |
+ lock_flags;
+
+ LASSERT(list_empty(&lock->l_bl_ast));
+ list_add(&lock->l_bl_ast, cancels);
+ LDLM_LOCK_GET(lock);
+ count++;
+ }
+ unlock_res(res);
+
+ RETURN(ldlm_cli_cancel_list_local(cancels, count, cancel_flags));
+}
+EXPORT_SYMBOL(ldlm_cancel_resource_local);
+
+/**
+ * Cancel client-side locks from a list and send/prepare cancel RPCs to the
+ * server.
+ * If \a req is NULL, send CANCEL request to server with handles of locks
+ * in the \a cancels. If EARLY_CANCEL is not supported, send CANCEL requests
+ * separately per lock.
+ * If \a req is not NULL, put handles of locks in \a cancels into the request
+ * buffer at the offset \a off.
+ * Destroy \a cancels at the end.
+ */
+int ldlm_cli_cancel_list(struct list_head *cancels, int count,
+ struct ptlrpc_request *req, ldlm_cancel_flags_t flags)
+{
+ struct ldlm_lock *lock;
+ int res = 0;
+ ENTRY;
+
+ if (list_empty(cancels) || count == 0)
+ RETURN(0);
+
+ /* XXX: requests (both batched and not) could be sent in parallel.
+ * Usually it is enough to have just 1 RPC, but it is possible that
+ * there are too many locks to be cancelled in LRU or on a resource.
+ * It would also speed up the case when the server does not support
+ * the feature. */
+ while (count > 0) {
+ LASSERT(!list_empty(cancels));
+ lock = list_entry(cancels->next, struct ldlm_lock,
+ l_bl_ast);
+ LASSERT(lock->l_conn_export);
+
+ if (exp_connect_cancelset(lock->l_conn_export)) {
+ res = count;
+ if (req)
+ ldlm_cancel_pack(req, cancels, count);
+ else
+ res = ldlm_cli_cancel_req(lock->l_conn_export,
+ cancels, count,
+ flags);
+ } else {
+ res = ldlm_cli_cancel_req(lock->l_conn_export,
+ cancels, 1, flags);
+ }
+
+ if (res < 0) {
+ CDEBUG_LIMIT(res == -ESHUTDOWN ? D_DLMTRACE : D_ERROR,
+ "ldlm_cli_cancel_list: %d\n", res);
+ res = count;
+ }
+
+ count -= res;
+ ldlm_lock_list_put(cancels, l_bl_ast, res);
+ }
+ LASSERT(count == 0);
+ RETURN(0);
+}
+EXPORT_SYMBOL(ldlm_cli_cancel_list);
+
+/**
+ * Cancel all locks on a resource that have 0 readers/writers.
+ *
+ * If flags & LDLM_FL_LOCAL_ONLY, throw the locks away without trying
+ * to notify the server. */
+int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
+ const struct ldlm_res_id *res_id,
+ ldlm_policy_data_t *policy,
+ ldlm_mode_t mode,
+ ldlm_cancel_flags_t flags,
+ void *opaque)
+{
+ struct ldlm_resource *res;
+ LIST_HEAD(cancels);
+ int count;
+ int rc;
+ ENTRY;
+
+ res = ldlm_resource_get(ns, NULL, res_id, 0, 0);
+ if (res == NULL) {
+ /* This is not a problem. */
+ CDEBUG(D_INFO, "No resource "LPU64"\n", res_id->name[0]);
+ RETURN(0);
+ }
+
+ LDLM_RESOURCE_ADDREF(res);
+ count = ldlm_cancel_resource_local(res, &cancels, policy, mode,
+ 0, flags | LCF_BL_AST, opaque);
+ rc = ldlm_cli_cancel_list(&cancels, count, NULL, flags);
+ if (rc != ELDLM_OK)
+ CERROR("ldlm_cli_cancel_unused_resource: %d\n", rc);
+
+ LDLM_RESOURCE_DELREF(res);
+ ldlm_resource_putref(res);
+ RETURN(0);
+}
+EXPORT_SYMBOL(ldlm_cli_cancel_unused_resource);
+
+struct ldlm_cli_cancel_arg {
+ int lc_flags;
+ void *lc_opaque;
+};
+
+static int ldlm_cli_hash_cancel_unused(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *arg)
+{
+ struct ldlm_resource *res = cfs_hash_object(hs, hnode);
+ struct ldlm_cli_cancel_arg *lc = arg;
+ int rc;
+
+ rc = ldlm_cli_cancel_unused_resource(ldlm_res_to_ns(res), &res->lr_name,
+ NULL, LCK_MINMODE,
+ lc->lc_flags, lc->lc_opaque);
+ if (rc != 0) {
+ CERROR("ldlm_cli_cancel_unused ("LPU64"): %d\n",
+ res->lr_name.name[0], rc);
+ }
+ /* must return 0 for hash iteration */
+ return 0;
+}
+
+/**
+ * Cancel all locks on a namespace (or a specific resource, if given)
+ * that have 0 readers/writers.
+ *
+ * If flags & LCF_LOCAL, throw the locks away without trying
+ * to notify the server. */
+int ldlm_cli_cancel_unused(struct ldlm_namespace *ns,
+ const struct ldlm_res_id *res_id,
+ ldlm_cancel_flags_t flags, void *opaque)
+{
+ struct ldlm_cli_cancel_arg arg = {
+ .lc_flags = flags,
+ .lc_opaque = opaque,
+ };
+
+ ENTRY;
+
+ if (ns == NULL)
+ RETURN(ELDLM_OK);
+
+ if (res_id != NULL) {
+ RETURN(ldlm_cli_cancel_unused_resource(ns, res_id, NULL,
+ LCK_MINMODE, flags,
+ opaque));
+ } else {
+ cfs_hash_for_each_nolock(ns->ns_rs_hash,
+ ldlm_cli_hash_cancel_unused, &arg);
+ RETURN(ELDLM_OK);
+ }
+}
+EXPORT_SYMBOL(ldlm_cli_cancel_unused);
+
+/* Lock iterators. */
+
+int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter,
+ void *closure)
+{
+ struct list_head *tmp, *next;
+ struct ldlm_lock *lock;
+ int rc = LDLM_ITER_CONTINUE;
+
+ ENTRY;
+
+ if (!res)
+ RETURN(LDLM_ITER_CONTINUE);
+
+ lock_res(res);
+ list_for_each_safe(tmp, next, &res->lr_granted) {
+ lock = list_entry(tmp, struct ldlm_lock, l_res_link);
+
+ if (iter(lock, closure) == LDLM_ITER_STOP)
+ GOTO(out, rc = LDLM_ITER_STOP);
+ }
+
+ list_for_each_safe(tmp, next, &res->lr_converting) {
+ lock = list_entry(tmp, struct ldlm_lock, l_res_link);
+
+ if (iter(lock, closure) == LDLM_ITER_STOP)
+ GOTO(out, rc = LDLM_ITER_STOP);
+ }
+
+ list_for_each_safe(tmp, next, &res->lr_waiting) {
+ lock = list_entry(tmp, struct ldlm_lock, l_res_link);
+
+ if (iter(lock, closure) == LDLM_ITER_STOP)
+ GOTO(out, rc = LDLM_ITER_STOP);
+ }
+ out:
+ unlock_res(res);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ldlm_resource_foreach);
+
+struct iter_helper_data {
+ ldlm_iterator_t iter;
+ void *closure;
+};
+
+static int ldlm_iter_helper(struct ldlm_lock *lock, void *closure)
+{
+ struct iter_helper_data *helper = closure;
+ return helper->iter(lock, helper->closure);
+}
+
+static int ldlm_res_iter_helper(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *arg)
+
+{
+ struct ldlm_resource *res = cfs_hash_object(hs, hnode);
+
+ return ldlm_resource_foreach(res, ldlm_iter_helper, arg) ==
+ LDLM_ITER_STOP;
+}
+
+void ldlm_namespace_foreach(struct ldlm_namespace *ns,
+ ldlm_iterator_t iter, void *closure)
+
+{
+ struct iter_helper_data helper = { iter: iter, closure: closure };
+
+ cfs_hash_for_each_nolock(ns->ns_rs_hash,
+ ldlm_res_iter_helper, &helper);
+
+}
+EXPORT_SYMBOL(ldlm_namespace_foreach);
+
+/* non-blocking function to manipulate a lock whose cb_data is being put away.
+ * return 0: find no resource
+ * > 0: must be LDLM_ITER_STOP/LDLM_ITER_CONTINUE.
+ * < 0: errors
+ */
+int ldlm_resource_iterate(struct ldlm_namespace *ns,
+ const struct ldlm_res_id *res_id,
+ ldlm_iterator_t iter, void *data)
+{
+ struct ldlm_resource *res;
+ int rc;
+ ENTRY;
+
+ if (ns == NULL) {
+ CERROR("must pass in namespace\n");
+ LBUG();
+ }
+
+ res = ldlm_resource_get(ns, NULL, res_id, 0, 0);
+ if (res == NULL)
+ RETURN(0);
+
+ LDLM_RESOURCE_ADDREF(res);
+ rc = ldlm_resource_foreach(res, iter, data);
+ LDLM_RESOURCE_DELREF(res);
+ ldlm_resource_putref(res);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ldlm_resource_iterate);
+
+/* Lock replay */
+
+static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
+{
+ struct list_head *list = closure;
+
+ /* we use l_pending_chain here, because it's unused on clients. */
+ LASSERTF(list_empty(&lock->l_pending_chain),
+ "lock %p next %p prev %p\n",
+ lock, &lock->l_pending_chain.next,&lock->l_pending_chain.prev);
+ /* bug 9573: don't replay locks left after eviction, or
+ * bug 17614: locks being actively cancelled. Get a reference
+ * on a lock so that it does not disapear under us (e.g. due to cancel)
+ */
+ if (!(lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_CANCELING))) {
+ list_add(&lock->l_pending_chain, list);
+ LDLM_LOCK_GET(lock);
+ }
+
+ return LDLM_ITER_CONTINUE;
+}
+
+static int replay_lock_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req,
+ struct ldlm_async_args *aa, int rc)
+{
+ struct ldlm_lock *lock;
+ struct ldlm_reply *reply;
+ struct obd_export *exp;
+
+ ENTRY;
+ atomic_dec(&req->rq_import->imp_replay_inflight);
+ if (rc != ELDLM_OK)
+ GOTO(out, rc);
+
+
+ reply = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
+ if (reply == NULL)
+ GOTO(out, rc = -EPROTO);
+
+ lock = ldlm_handle2lock(&aa->lock_handle);
+ if (!lock) {
+ CERROR("received replay ack for unknown local cookie "LPX64
+ " remote cookie "LPX64 " from server %s id %s\n",
+ aa->lock_handle.cookie, reply->lock_handle.cookie,
+ req->rq_export->exp_client_uuid.uuid,
+ libcfs_id2str(req->rq_peer));
+ GOTO(out, rc = -ESTALE);
+ }
+
+ /* Key change rehash lock in per-export hash with new key */
+ exp = req->rq_export;
+ if (exp && exp->exp_lock_hash) {
+ /* In the function below, .hs_keycmp resolves to
+ * ldlm_export_lock_keycmp() */
+ /* coverity[overrun-buffer-val] */
+ cfs_hash_rehash_key(exp->exp_lock_hash,
+ &lock->l_remote_handle,
+ &reply->lock_handle,
+ &lock->l_exp_hash);
+ } else {
+ lock->l_remote_handle = reply->lock_handle;
+ }
+
+ LDLM_DEBUG(lock, "replayed lock:");
+ ptlrpc_import_recovery_state_machine(req->rq_import);
+ LDLM_LOCK_PUT(lock);
+out:
+ if (rc != ELDLM_OK)
+ ptlrpc_connect_import(req->rq_import);
+
+ RETURN(rc);
+}
+
+static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
+{
+ struct ptlrpc_request *req;
+ struct ldlm_async_args *aa;
+ struct ldlm_request *body;
+ int flags;
+ ENTRY;
+
+
+ /* Bug 11974: Do not replay a lock which is actively being canceled */
+ if (lock->l_flags & LDLM_FL_CANCELING) {
+ LDLM_DEBUG(lock, "Not replaying canceled lock:");
+ RETURN(0);
+ }
+
+ /* If this is reply-less callback lock, we cannot replay it, since
+ * server might have long dropped it, but notification of that event was
+ * lost by network. (and server granted conflicting lock already) */
+ if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) {
+ LDLM_DEBUG(lock, "Not replaying reply-less lock:");
+ ldlm_lock_cancel(lock);
+ RETURN(0);
+ }
+
+ /*
+ * If granted mode matches the requested mode, this lock is granted.
+ *
+ * If they differ, but we have a granted mode, then we were granted
+ * one mode and now want another: ergo, converting.
+ *
+ * If we haven't been granted anything and are on a resource list,
+ * then we're blocked/waiting.
+ *
+ * If we haven't been granted anything and we're NOT on a resource list,
+ * then we haven't got a reply yet and don't have a known disposition.
+ * This happens whenever a lock enqueue is the request that triggers
+ * recovery.
+ */
+ if (lock->l_granted_mode == lock->l_req_mode)
+ flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_GRANTED;
+ else if (lock->l_granted_mode)
+ flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_CONV;
+ else if (!list_empty(&lock->l_res_link))
+ flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_WAIT;
+ else
+ flags = LDLM_FL_REPLAY;
+
+ req = ptlrpc_request_alloc_pack(imp, &RQF_LDLM_ENQUEUE,
+ LUSTRE_DLM_VERSION, LDLM_ENQUEUE);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ /* We're part of recovery, so don't wait for it. */
+ req->rq_send_state = LUSTRE_IMP_REPLAY_LOCKS;
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
+ ldlm_lock2desc(lock, &body->lock_desc);
+ body->lock_flags = ldlm_flags_to_wire(flags);
+
+ ldlm_lock2handle(lock, &body->lock_handle[0]);
+ if (lock->l_lvb_len > 0)
+ req_capsule_extend(&req->rq_pill, &RQF_LDLM_ENQUEUE_LVB);
+ req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
+ lock->l_lvb_len);
+ ptlrpc_request_set_replen(req);
+ /* notify the server we've replayed all requests.
+ * also, we mark the request to be put on a dedicated
+ * queue to be processed after all request replayes.
+ * bug 6063 */
+ lustre_msg_set_flags(req->rq_reqmsg, MSG_REQ_REPLAY_DONE);
+
+ LDLM_DEBUG(lock, "replaying lock:");
+
+ atomic_inc(&req->rq_import->imp_replay_inflight);
+ CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+ aa = ptlrpc_req_async_args(req);
+ aa->lock_handle = body->lock_handle[0];
+ req->rq_interpret_reply = (ptlrpc_interpterer_t)replay_lock_interpret;
+ ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
+
+ RETURN(0);
+}
+
+/**
+ * Cancel as many unused locks as possible before replay. since we are
+ * in recovery, we can't wait for any outstanding RPCs to send any RPC
+ * to the server.
+ *
+ * Called only in recovery before replaying locks. there is no need to
+ * replay locks that are unused. since the clients may hold thousands of
+ * cached unused locks, dropping the unused locks can greatly reduce the
+ * load on the servers at recovery time.
+ */
+static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns)
+{
+ int canceled;
+ LIST_HEAD(cancels);
+
+ CDEBUG(D_DLMTRACE, "Dropping as many unused locks as possible before"
+ "replay for namespace %s (%d)\n",
+ ldlm_ns_name(ns), ns->ns_nr_unused);
+
+ /* We don't need to care whether or not LRU resize is enabled
+ * because the LDLM_CANCEL_NO_WAIT policy doesn't use the
+ * count parameter */
+ canceled = ldlm_cancel_lru_local(ns, &cancels, ns->ns_nr_unused, 0,
+ LCF_LOCAL, LDLM_CANCEL_NO_WAIT);
+
+ CDEBUG(D_DLMTRACE, "Canceled %d unused locks from namespace %s\n",
+ canceled, ldlm_ns_name(ns));
+}
+
+int ldlm_replay_locks(struct obd_import *imp)
+{
+ struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
+ LIST_HEAD(list);
+ struct ldlm_lock *lock, *next;
+ int rc = 0;
+
+ ENTRY;
+
+ LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
+
+ /* don't replay locks if import failed recovery */
+ if (imp->imp_vbr_failed)
+ RETURN(0);
+
+ /* ensure this doesn't fall to 0 before all have been queued */
+ atomic_inc(&imp->imp_replay_inflight);
+
+ if (ldlm_cancel_unused_locks_before_replay)
+ ldlm_cancel_unused_locks_for_replay(ns);
+
+ ldlm_namespace_foreach(ns, ldlm_chain_lock_for_replay, &list);
+
+ list_for_each_entry_safe(lock, next, &list, l_pending_chain) {
+ list_del_init(&lock->l_pending_chain);
+ if (rc) {
+ LDLM_LOCK_RELEASE(lock);
+ continue; /* or try to do the rest? */
+ }
+ rc = replay_one_lock(imp, lock);
+ LDLM_LOCK_RELEASE(lock);
+ }
+
+ atomic_dec(&imp->imp_replay_inflight);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ldlm_replay_locks);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
new file mode 100644
index 000000000000..9052dc5e7ad2
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
@@ -0,0 +1,1409 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ldlm/ldlm_resource.c
+ *
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Peter Braam <braam@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LDLM
+# include <lustre_dlm.h>
+
+#include <lustre_fid.h>
+#include <obd_class.h>
+#include "ldlm_internal.h"
+
+struct kmem_cache *ldlm_resource_slab, *ldlm_lock_slab;
+
+atomic_t ldlm_srv_namespace_nr = ATOMIC_INIT(0);
+atomic_t ldlm_cli_namespace_nr = ATOMIC_INIT(0);
+
+struct mutex ldlm_srv_namespace_lock;
+LIST_HEAD(ldlm_srv_namespace_list);
+
+struct mutex ldlm_cli_namespace_lock;
+LIST_HEAD(ldlm_cli_namespace_list);
+
+proc_dir_entry_t *ldlm_type_proc_dir = NULL;
+proc_dir_entry_t *ldlm_ns_proc_dir = NULL;
+proc_dir_entry_t *ldlm_svc_proc_dir = NULL;
+
+extern unsigned int ldlm_cancel_unused_locks_before_replay;
+
+/* during debug dump certain amount of granted locks for one resource to avoid
+ * DDOS. */
+unsigned int ldlm_dump_granted_max = 256;
+
+#ifdef LPROCFS
+static ssize_t lprocfs_wr_dump_ns(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ ldlm_dump_all_namespaces(LDLM_NAMESPACE_SERVER, D_DLMTRACE);
+ ldlm_dump_all_namespaces(LDLM_NAMESPACE_CLIENT, D_DLMTRACE);
+ RETURN(count);
+}
+LPROC_SEQ_FOPS_WR_ONLY(ldlm, dump_ns);
+
+LPROC_SEQ_FOPS_RW_TYPE(ldlm_rw, uint);
+LPROC_SEQ_FOPS_RO_TYPE(ldlm, uint);
+
+int ldlm_proc_setup(void)
+{
+ int rc;
+ struct lprocfs_vars list[] = {
+ { "dump_namespaces", &ldlm_dump_ns_fops, 0, 0222 },
+ { "dump_granted_max", &ldlm_rw_uint_fops,
+ &ldlm_dump_granted_max },
+ { "cancel_unused_locks_before_replay", &ldlm_rw_uint_fops,
+ &ldlm_cancel_unused_locks_before_replay },
+ { NULL }};
+ ENTRY;
+ LASSERT(ldlm_ns_proc_dir == NULL);
+
+ ldlm_type_proc_dir = lprocfs_register(OBD_LDLM_DEVICENAME,
+ proc_lustre_root,
+ NULL, NULL);
+ if (IS_ERR(ldlm_type_proc_dir)) {
+ CERROR("LProcFS failed in ldlm-init\n");
+ rc = PTR_ERR(ldlm_type_proc_dir);
+ GOTO(err, rc);
+ }
+
+ ldlm_ns_proc_dir = lprocfs_register("namespaces",
+ ldlm_type_proc_dir,
+ NULL, NULL);
+ if (IS_ERR(ldlm_ns_proc_dir)) {
+ CERROR("LProcFS failed in ldlm-init\n");
+ rc = PTR_ERR(ldlm_ns_proc_dir);
+ GOTO(err_type, rc);
+ }
+
+ ldlm_svc_proc_dir = lprocfs_register("services",
+ ldlm_type_proc_dir,
+ NULL, NULL);
+ if (IS_ERR(ldlm_svc_proc_dir)) {
+ CERROR("LProcFS failed in ldlm-init\n");
+ rc = PTR_ERR(ldlm_svc_proc_dir);
+ GOTO(err_ns, rc);
+ }
+
+ rc = lprocfs_add_vars(ldlm_type_proc_dir, list, NULL);
+
+ RETURN(0);
+
+err_ns:
+ lprocfs_remove(&ldlm_ns_proc_dir);
+err_type:
+ lprocfs_remove(&ldlm_type_proc_dir);
+err:
+ ldlm_svc_proc_dir = NULL;
+ ldlm_type_proc_dir = NULL;
+ ldlm_ns_proc_dir = NULL;
+ RETURN(rc);
+}
+
+void ldlm_proc_cleanup(void)
+{
+ if (ldlm_svc_proc_dir)
+ lprocfs_remove(&ldlm_svc_proc_dir);
+
+ if (ldlm_ns_proc_dir)
+ lprocfs_remove(&ldlm_ns_proc_dir);
+
+ if (ldlm_type_proc_dir)
+ lprocfs_remove(&ldlm_type_proc_dir);
+
+ ldlm_svc_proc_dir = NULL;
+ ldlm_type_proc_dir = NULL;
+ ldlm_ns_proc_dir = NULL;
+}
+
+static int lprocfs_ns_resources_seq_show(struct seq_file *m, void *v)
+{
+ struct ldlm_namespace *ns = m->private;
+ __u64 res = 0;
+ cfs_hash_bd_t bd;
+ int i;
+
+ /* result is not strictly consistant */
+ cfs_hash_for_each_bucket(ns->ns_rs_hash, &bd, i)
+ res += cfs_hash_bd_count_get(&bd);
+ return lprocfs_rd_u64(m, &res);
+}
+LPROC_SEQ_FOPS_RO(lprocfs_ns_resources);
+
+static int lprocfs_ns_locks_seq_show(struct seq_file *m, void *v)
+{
+ struct ldlm_namespace *ns = m->private;
+ __u64 locks;
+
+ locks = lprocfs_stats_collector(ns->ns_stats, LDLM_NSS_LOCKS,
+ LPROCFS_FIELDS_FLAGS_SUM);
+ return lprocfs_rd_u64(m, &locks);
+}
+LPROC_SEQ_FOPS_RO(lprocfs_ns_locks);
+
+static int lprocfs_lru_size_seq_show(struct seq_file *m, void *v)
+{
+ struct ldlm_namespace *ns = m->private;
+ __u32 *nr = &ns->ns_max_unused;
+
+ if (ns_connect_lru_resize(ns))
+ nr = &ns->ns_nr_unused;
+ return lprocfs_rd_uint(m, nr);
+}
+
+static ssize_t lprocfs_lru_size_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct ldlm_namespace *ns = ((struct seq_file *)file->private_data)->private;
+ char dummy[MAX_STRING_SIZE + 1], *end;
+ unsigned long tmp;
+ int lru_resize;
+
+ dummy[MAX_STRING_SIZE] = '\0';
+ if (copy_from_user(dummy, buffer, MAX_STRING_SIZE))
+ return -EFAULT;
+
+ if (strncmp(dummy, "clear", 5) == 0) {
+ CDEBUG(D_DLMTRACE,
+ "dropping all unused locks from namespace %s\n",
+ ldlm_ns_name(ns));
+ if (ns_connect_lru_resize(ns)) {
+ int canceled, unused = ns->ns_nr_unused;
+
+ /* Try to cancel all @ns_nr_unused locks. */
+ canceled = ldlm_cancel_lru(ns, unused, 0,
+ LDLM_CANCEL_PASSED);
+ if (canceled < unused) {
+ CDEBUG(D_DLMTRACE,
+ "not all requested locks are canceled, "
+ "requested: %d, canceled: %d\n", unused,
+ canceled);
+ return -EINVAL;
+ }
+ } else {
+ tmp = ns->ns_max_unused;
+ ns->ns_max_unused = 0;
+ ldlm_cancel_lru(ns, 0, 0, LDLM_CANCEL_PASSED);
+ ns->ns_max_unused = tmp;
+ }
+ return count;
+ }
+
+ tmp = simple_strtoul(dummy, &end, 0);
+ if (dummy == end) {
+ CERROR("invalid value written\n");
+ return -EINVAL;
+ }
+ lru_resize = (tmp == 0);
+
+ if (ns_connect_lru_resize(ns)) {
+ if (!lru_resize)
+ ns->ns_max_unused = (unsigned int)tmp;
+
+ if (tmp > ns->ns_nr_unused)
+ tmp = ns->ns_nr_unused;
+ tmp = ns->ns_nr_unused - tmp;
+
+ CDEBUG(D_DLMTRACE,
+ "changing namespace %s unused locks from %u to %u\n",
+ ldlm_ns_name(ns), ns->ns_nr_unused,
+ (unsigned int)tmp);
+ ldlm_cancel_lru(ns, tmp, LCF_ASYNC, LDLM_CANCEL_PASSED);
+
+ if (!lru_resize) {
+ CDEBUG(D_DLMTRACE,
+ "disable lru_resize for namespace %s\n",
+ ldlm_ns_name(ns));
+ ns->ns_connect_flags &= ~OBD_CONNECT_LRU_RESIZE;
+ }
+ } else {
+ CDEBUG(D_DLMTRACE,
+ "changing namespace %s max_unused from %u to %u\n",
+ ldlm_ns_name(ns), ns->ns_max_unused,
+ (unsigned int)tmp);
+ ns->ns_max_unused = (unsigned int)tmp;
+ ldlm_cancel_lru(ns, 0, LCF_ASYNC, LDLM_CANCEL_PASSED);
+
+ /* Make sure that LRU resize was originally supported before
+ * turning it on here. */
+ if (lru_resize &&
+ (ns->ns_orig_connect_flags & OBD_CONNECT_LRU_RESIZE)) {
+ CDEBUG(D_DLMTRACE,
+ "enable lru_resize for namespace %s\n",
+ ldlm_ns_name(ns));
+ ns->ns_connect_flags |= OBD_CONNECT_LRU_RESIZE;
+ }
+ }
+
+ return count;
+}
+LPROC_SEQ_FOPS(lprocfs_lru_size);
+
+static int lprocfs_elc_seq_show(struct seq_file *m, void *v)
+{
+ struct ldlm_namespace *ns = m->private;
+ unsigned int supp = ns_connect_cancelset(ns);
+
+ return lprocfs_rd_uint(m, &supp);
+}
+
+static ssize_t lprocfs_elc_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct ldlm_namespace *ns = ((struct seq_file *)file->private_data)->private;
+ unsigned int supp = -1;
+ int rc;
+
+ rc = lprocfs_wr_uint(file, buffer, count, &supp);
+ if (rc < 0)
+ return rc;
+
+ if (supp == 0)
+ ns->ns_connect_flags &= ~OBD_CONNECT_CANCELSET;
+ else if (ns->ns_orig_connect_flags & OBD_CONNECT_CANCELSET)
+ ns->ns_connect_flags |= OBD_CONNECT_CANCELSET;
+ return count;
+}
+LPROC_SEQ_FOPS(lprocfs_elc);
+
+void ldlm_namespace_proc_unregister(struct ldlm_namespace *ns)
+{
+ if (ns->ns_proc_dir_entry == NULL)
+ CERROR("dlm namespace %s has no procfs dir?\n",
+ ldlm_ns_name(ns));
+ else
+ lprocfs_remove(&ns->ns_proc_dir_entry);
+
+ if (ns->ns_stats != NULL)
+ lprocfs_free_stats(&ns->ns_stats);
+}
+
+#define LDLM_NS_ADD_VAR(name, var, ops) \
+ do { \
+ snprintf(lock_name, MAX_STRING_SIZE, name); \
+ lock_vars[0].data = var; \
+ lock_vars[0].fops = ops; \
+ lprocfs_add_vars(ns_pde, lock_vars, 0); \
+ } while (0)
+
+int ldlm_namespace_proc_register(struct ldlm_namespace *ns)
+{
+ struct lprocfs_vars lock_vars[2];
+ char lock_name[MAX_STRING_SIZE + 1];
+ proc_dir_entry_t *ns_pde;
+
+ LASSERT(ns != NULL);
+ LASSERT(ns->ns_rs_hash != NULL);
+
+ if (ns->ns_proc_dir_entry != NULL) {
+ ns_pde = ns->ns_proc_dir_entry;
+ } else {
+ ns_pde = proc_mkdir(ldlm_ns_name(ns), ldlm_ns_proc_dir);
+ if (ns_pde == NULL)
+ return -ENOMEM;
+ ns->ns_proc_dir_entry = ns_pde;
+ }
+
+ ns->ns_stats = lprocfs_alloc_stats(LDLM_NSS_LAST, 0);
+ if (ns->ns_stats == NULL)
+ return -ENOMEM;
+
+ lprocfs_counter_init(ns->ns_stats, LDLM_NSS_LOCKS,
+ LPROCFS_CNTR_AVGMINMAX, "locks", "locks");
+
+ lock_name[MAX_STRING_SIZE] = '\0';
+
+ memset(lock_vars, 0, sizeof(lock_vars));
+ lock_vars[0].name = lock_name;
+
+ LDLM_NS_ADD_VAR("resource_count", ns, &lprocfs_ns_resources_fops);
+ LDLM_NS_ADD_VAR("lock_count", ns, &lprocfs_ns_locks_fops);
+
+ if (ns_is_client(ns)) {
+ LDLM_NS_ADD_VAR("lock_unused_count", &ns->ns_nr_unused,
+ &ldlm_uint_fops);
+ LDLM_NS_ADD_VAR("lru_size", ns, &lprocfs_lru_size_fops);
+ LDLM_NS_ADD_VAR("lru_max_age", &ns->ns_max_age,
+ &ldlm_rw_uint_fops);
+ LDLM_NS_ADD_VAR("early_lock_cancel", ns, &lprocfs_elc_fops);
+ } else {
+ LDLM_NS_ADD_VAR("ctime_age_limit", &ns->ns_ctime_age_limit,
+ &ldlm_rw_uint_fops);
+ LDLM_NS_ADD_VAR("lock_timeouts", &ns->ns_timeouts,
+ &ldlm_uint_fops);
+ LDLM_NS_ADD_VAR("max_nolock_bytes", &ns->ns_max_nolock_size,
+ &ldlm_rw_uint_fops);
+ LDLM_NS_ADD_VAR("contention_seconds", &ns->ns_contention_time,
+ &ldlm_rw_uint_fops);
+ LDLM_NS_ADD_VAR("contended_locks", &ns->ns_contended_locks,
+ &ldlm_rw_uint_fops);
+ LDLM_NS_ADD_VAR("max_parallel_ast", &ns->ns_max_parallel_ast,
+ &ldlm_rw_uint_fops);
+ }
+ return 0;
+}
+#undef MAX_STRING_SIZE
+#else /* LPROCFS */
+
+#define ldlm_namespace_proc_unregister(ns) ({;})
+#define ldlm_namespace_proc_register(ns) ({0;})
+
+#endif /* LPROCFS */
+
+static unsigned ldlm_res_hop_hash(cfs_hash_t *hs,
+ const void *key, unsigned mask)
+{
+ const struct ldlm_res_id *id = key;
+ unsigned val = 0;
+ unsigned i;
+
+ for (i = 0; i < RES_NAME_SIZE; i++)
+ val += id->name[i];
+ return val & mask;
+}
+
+static unsigned ldlm_res_hop_fid_hash(cfs_hash_t *hs,
+ const void *key, unsigned mask)
+{
+ const struct ldlm_res_id *id = key;
+ struct lu_fid fid;
+ __u32 hash;
+ __u32 val;
+
+ fid.f_seq = id->name[LUSTRE_RES_ID_SEQ_OFF];
+ fid.f_oid = (__u32)id->name[LUSTRE_RES_ID_VER_OID_OFF];
+ fid.f_ver = (__u32)(id->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32);
+
+ hash = fid_flatten32(&fid);
+ hash += (hash >> 4) + (hash << 12); /* mixing oid and seq */
+ if (id->name[LUSTRE_RES_ID_HSH_OFF] != 0) {
+ val = id->name[LUSTRE_RES_ID_HSH_OFF];
+ hash += (val >> 5) + (val << 11);
+ } else {
+ val = fid_oid(&fid);
+ }
+ hash = cfs_hash_long(hash, hs->hs_bkt_bits);
+ /* give me another random factor */
+ hash -= cfs_hash_long((unsigned long)hs, val % 11 + 3);
+
+ hash <<= hs->hs_cur_bits - hs->hs_bkt_bits;
+ hash |= ldlm_res_hop_hash(hs, key, CFS_HASH_NBKT(hs) - 1);
+
+ return hash & mask;
+}
+
+static void *ldlm_res_hop_key(struct hlist_node *hnode)
+{
+ struct ldlm_resource *res;
+
+ res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
+ return &res->lr_name;
+}
+
+static int ldlm_res_hop_keycmp(const void *key, struct hlist_node *hnode)
+{
+ struct ldlm_resource *res;
+
+ res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
+ return ldlm_res_eq((const struct ldlm_res_id *)key,
+ (const struct ldlm_res_id *)&res->lr_name);
+}
+
+static void *ldlm_res_hop_object(struct hlist_node *hnode)
+{
+ return hlist_entry(hnode, struct ldlm_resource, lr_hash);
+}
+
+static void ldlm_res_hop_get_locked(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct ldlm_resource *res;
+
+ res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
+ ldlm_resource_getref(res);
+}
+
+static void ldlm_res_hop_put_locked(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct ldlm_resource *res;
+
+ res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
+ /* cfs_hash_for_each_nolock is the only chance we call it */
+ ldlm_resource_putref_locked(res);
+}
+
+static void ldlm_res_hop_put(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct ldlm_resource *res;
+
+ res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
+ ldlm_resource_putref(res);
+}
+
+cfs_hash_ops_t ldlm_ns_hash_ops = {
+ .hs_hash = ldlm_res_hop_hash,
+ .hs_key = ldlm_res_hop_key,
+ .hs_keycmp = ldlm_res_hop_keycmp,
+ .hs_keycpy = NULL,
+ .hs_object = ldlm_res_hop_object,
+ .hs_get = ldlm_res_hop_get_locked,
+ .hs_put_locked = ldlm_res_hop_put_locked,
+ .hs_put = ldlm_res_hop_put
+};
+
+cfs_hash_ops_t ldlm_ns_fid_hash_ops = {
+ .hs_hash = ldlm_res_hop_fid_hash,
+ .hs_key = ldlm_res_hop_key,
+ .hs_keycmp = ldlm_res_hop_keycmp,
+ .hs_keycpy = NULL,
+ .hs_object = ldlm_res_hop_object,
+ .hs_get = ldlm_res_hop_get_locked,
+ .hs_put_locked = ldlm_res_hop_put_locked,
+ .hs_put = ldlm_res_hop_put
+};
+
+typedef struct {
+ ldlm_ns_type_t nsd_type;
+ /** hash bucket bits */
+ unsigned nsd_bkt_bits;
+ /** hash bits */
+ unsigned nsd_all_bits;
+ /** hash operations */
+ cfs_hash_ops_t *nsd_hops;
+} ldlm_ns_hash_def_t;
+
+ldlm_ns_hash_def_t ldlm_ns_hash_defs[] =
+{
+ {
+ .nsd_type = LDLM_NS_TYPE_MDC,
+ .nsd_bkt_bits = 11,
+ .nsd_all_bits = 16,
+ .nsd_hops = &ldlm_ns_fid_hash_ops,
+ },
+ {
+ .nsd_type = LDLM_NS_TYPE_MDT,
+ .nsd_bkt_bits = 14,
+ .nsd_all_bits = 21,
+ .nsd_hops = &ldlm_ns_fid_hash_ops,
+ },
+ {
+ .nsd_type = LDLM_NS_TYPE_OSC,
+ .nsd_bkt_bits = 8,
+ .nsd_all_bits = 12,
+ .nsd_hops = &ldlm_ns_hash_ops,
+ },
+ {
+ .nsd_type = LDLM_NS_TYPE_OST,
+ .nsd_bkt_bits = 11,
+ .nsd_all_bits = 17,
+ .nsd_hops = &ldlm_ns_hash_ops,
+ },
+ {
+ .nsd_type = LDLM_NS_TYPE_MGC,
+ .nsd_bkt_bits = 4,
+ .nsd_all_bits = 4,
+ .nsd_hops = &ldlm_ns_hash_ops,
+ },
+ {
+ .nsd_type = LDLM_NS_TYPE_MGT,
+ .nsd_bkt_bits = 4,
+ .nsd_all_bits = 4,
+ .nsd_hops = &ldlm_ns_hash_ops,
+ },
+ {
+ .nsd_type = LDLM_NS_TYPE_UNKNOWN,
+ },
+};
+
+/**
+ * Create and initialize new empty namespace.
+ */
+struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name,
+ ldlm_side_t client,
+ ldlm_appetite_t apt,
+ ldlm_ns_type_t ns_type)
+{
+ struct ldlm_namespace *ns = NULL;
+ struct ldlm_ns_bucket *nsb;
+ ldlm_ns_hash_def_t *nsd;
+ cfs_hash_bd_t bd;
+ int idx;
+ int rc;
+ ENTRY;
+
+ LASSERT(obd != NULL);
+
+ rc = ldlm_get_ref();
+ if (rc) {
+ CERROR("ldlm_get_ref failed: %d\n", rc);
+ RETURN(NULL);
+ }
+
+ for (idx = 0;;idx++) {
+ nsd = &ldlm_ns_hash_defs[idx];
+ if (nsd->nsd_type == LDLM_NS_TYPE_UNKNOWN) {
+ CERROR("Unknown type %d for ns %s\n", ns_type, name);
+ GOTO(out_ref, NULL);
+ }
+
+ if (nsd->nsd_type == ns_type)
+ break;
+ }
+
+ OBD_ALLOC_PTR(ns);
+ if (!ns)
+ GOTO(out_ref, NULL);
+
+ ns->ns_rs_hash = cfs_hash_create(name,
+ nsd->nsd_all_bits, nsd->nsd_all_bits,
+ nsd->nsd_bkt_bits, sizeof(*nsb),
+ CFS_HASH_MIN_THETA,
+ CFS_HASH_MAX_THETA,
+ nsd->nsd_hops,
+ CFS_HASH_DEPTH |
+ CFS_HASH_BIGNAME |
+ CFS_HASH_SPIN_BKTLOCK |
+ CFS_HASH_NO_ITEMREF);
+ if (ns->ns_rs_hash == NULL)
+ GOTO(out_ns, NULL);
+
+ cfs_hash_for_each_bucket(ns->ns_rs_hash, &bd, idx) {
+ nsb = cfs_hash_bd_extra_get(ns->ns_rs_hash, &bd);
+ at_init(&nsb->nsb_at_estimate, ldlm_enqueue_min, 0);
+ nsb->nsb_namespace = ns;
+ }
+
+ ns->ns_obd = obd;
+ ns->ns_appetite = apt;
+ ns->ns_client = client;
+
+ INIT_LIST_HEAD(&ns->ns_list_chain);
+ INIT_LIST_HEAD(&ns->ns_unused_list);
+ spin_lock_init(&ns->ns_lock);
+ atomic_set(&ns->ns_bref, 0);
+ init_waitqueue_head(&ns->ns_waitq);
+
+ ns->ns_max_nolock_size = NS_DEFAULT_MAX_NOLOCK_BYTES;
+ ns->ns_contention_time = NS_DEFAULT_CONTENTION_SECONDS;
+ ns->ns_contended_locks = NS_DEFAULT_CONTENDED_LOCKS;
+
+ ns->ns_max_parallel_ast = LDLM_DEFAULT_PARALLEL_AST_LIMIT;
+ ns->ns_nr_unused = 0;
+ ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE;
+ ns->ns_max_age = LDLM_DEFAULT_MAX_ALIVE;
+ ns->ns_ctime_age_limit = LDLM_CTIME_AGE_LIMIT;
+ ns->ns_timeouts = 0;
+ ns->ns_orig_connect_flags = 0;
+ ns->ns_connect_flags = 0;
+ ns->ns_stopping = 0;
+ rc = ldlm_namespace_proc_register(ns);
+ if (rc != 0) {
+ CERROR("Can't initialize ns proc, rc %d\n", rc);
+ GOTO(out_hash, rc);
+ }
+
+ idx = atomic_read(ldlm_namespace_nr(client));
+ rc = ldlm_pool_init(&ns->ns_pool, ns, idx, client);
+ if (rc) {
+ CERROR("Can't initialize lock pool, rc %d\n", rc);
+ GOTO(out_proc, rc);
+ }
+
+ ldlm_namespace_register(ns, client);
+ RETURN(ns);
+out_proc:
+ ldlm_namespace_proc_unregister(ns);
+ ldlm_namespace_cleanup(ns, 0);
+out_hash:
+ cfs_hash_putref(ns->ns_rs_hash);
+out_ns:
+ OBD_FREE_PTR(ns);
+out_ref:
+ ldlm_put_ref();
+ RETURN(NULL);
+}
+EXPORT_SYMBOL(ldlm_namespace_new);
+
+extern struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
+
+/**
+ * Cancel and destroy all locks on a resource.
+ *
+ * If flags contains FL_LOCAL_ONLY, don't try to tell the server, just
+ * clean up. This is currently only used for recovery, and we make
+ * certain assumptions as a result--notably, that we shouldn't cancel
+ * locks with refs.
+ */
+static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
+ __u64 flags)
+{
+ struct list_head *tmp;
+ int rc = 0, client = ns_is_client(ldlm_res_to_ns(res));
+ bool local_only = !!(flags & LDLM_FL_LOCAL_ONLY);
+
+ do {
+ struct ldlm_lock *lock = NULL;
+
+ /* First, we look for non-cleaned-yet lock
+ * all cleaned locks are marked by CLEANED flag. */
+ lock_res(res);
+ list_for_each(tmp, q) {
+ lock = list_entry(tmp, struct ldlm_lock,
+ l_res_link);
+ if (lock->l_flags & LDLM_FL_CLEANED) {
+ lock = NULL;
+ continue;
+ }
+ LDLM_LOCK_GET(lock);
+ lock->l_flags |= LDLM_FL_CLEANED;
+ break;
+ }
+
+ if (lock == NULL) {
+ unlock_res(res);
+ break;
+ }
+
+ /* Set CBPENDING so nothing in the cancellation path
+ * can match this lock. */
+ lock->l_flags |= LDLM_FL_CBPENDING;
+ lock->l_flags |= LDLM_FL_FAILED;
+ lock->l_flags |= flags;
+
+ /* ... without sending a CANCEL message for local_only. */
+ if (local_only)
+ lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+
+ if (local_only && (lock->l_readers || lock->l_writers)) {
+ /* This is a little bit gross, but much better than the
+ * alternative: pretend that we got a blocking AST from
+ * the server, so that when the lock is decref'd, it
+ * will go away ... */
+ unlock_res(res);
+ LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
+ if (lock->l_completion_ast)
+ lock->l_completion_ast(lock, 0, NULL);
+ LDLM_LOCK_RELEASE(lock);
+ continue;
+ }
+
+ if (client) {
+ struct lustre_handle lockh;
+
+ unlock_res(res);
+ ldlm_lock2handle(lock, &lockh);
+ rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
+ if (rc)
+ CERROR("ldlm_cli_cancel: %d\n", rc);
+ } else {
+ ldlm_resource_unlink_lock(lock);
+ unlock_res(res);
+ LDLM_DEBUG(lock, "Freeing a lock still held by a "
+ "client node");
+ ldlm_lock_destroy(lock);
+ }
+ LDLM_LOCK_RELEASE(lock);
+ } while (1);
+}
+
+static int ldlm_resource_clean(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *arg)
+{
+ struct ldlm_resource *res = cfs_hash_object(hs, hnode);
+ __u64 flags = *(__u64 *)arg;
+
+ cleanup_resource(res, &res->lr_granted, flags);
+ cleanup_resource(res, &res->lr_converting, flags);
+ cleanup_resource(res, &res->lr_waiting, flags);
+
+ return 0;
+}
+
+static int ldlm_resource_complain(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *arg)
+{
+ struct ldlm_resource *res = cfs_hash_object(hs, hnode);
+
+ lock_res(res);
+ CERROR("Namespace %s resource refcount nonzero "
+ "(%d) after lock cleanup; forcing "
+ "cleanup.\n",
+ ldlm_ns_name(ldlm_res_to_ns(res)),
+ atomic_read(&res->lr_refcount) - 1);
+
+ CERROR("Resource: %p ("LPU64"/"LPU64"/"LPU64"/"
+ LPU64") (rc: %d)\n", res,
+ res->lr_name.name[0], res->lr_name.name[1],
+ res->lr_name.name[2], res->lr_name.name[3],
+ atomic_read(&res->lr_refcount) - 1);
+
+ ldlm_resource_dump(D_ERROR, res);
+ unlock_res(res);
+ return 0;
+}
+
+/**
+ * Cancel and destroy all locks in the namespace.
+ *
+ * Typically used during evictions when server notified client that it was
+ * evicted and all of its state needs to be destroyed.
+ * Also used during shutdown.
+ */
+int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags)
+{
+ if (ns == NULL) {
+ CDEBUG(D_INFO, "NULL ns, skipping cleanup\n");
+ return ELDLM_OK;
+ }
+
+ cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_clean, &flags);
+ cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_complain, NULL);
+ return ELDLM_OK;
+}
+EXPORT_SYMBOL(ldlm_namespace_cleanup);
+
+/**
+ * Attempts to free namespace.
+ *
+ * Only used when namespace goes away, like during an unmount.
+ */
+static int __ldlm_namespace_free(struct ldlm_namespace *ns, int force)
+{
+ ENTRY;
+
+ /* At shutdown time, don't call the cancellation callback */
+ ldlm_namespace_cleanup(ns, force ? LDLM_FL_LOCAL_ONLY : 0);
+
+ if (atomic_read(&ns->ns_bref) > 0) {
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ int rc;
+ CDEBUG(D_DLMTRACE,
+ "dlm namespace %s free waiting on refcount %d\n",
+ ldlm_ns_name(ns), atomic_read(&ns->ns_bref));
+force_wait:
+ if (force)
+ lwi = LWI_TIMEOUT(obd_timeout * HZ / 4, NULL, NULL);
+
+ rc = l_wait_event(ns->ns_waitq,
+ atomic_read(&ns->ns_bref) == 0, &lwi);
+
+ /* Forced cleanups should be able to reclaim all references,
+ * so it's safe to wait forever... we can't leak locks... */
+ if (force && rc == -ETIMEDOUT) {
+ LCONSOLE_ERROR("Forced cleanup waiting for %s "
+ "namespace with %d resources in use, "
+ "(rc=%d)\n", ldlm_ns_name(ns),
+ atomic_read(&ns->ns_bref), rc);
+ GOTO(force_wait, rc);
+ }
+
+ if (atomic_read(&ns->ns_bref)) {
+ LCONSOLE_ERROR("Cleanup waiting for %s namespace "
+ "with %d resources in use, (rc=%d)\n",
+ ldlm_ns_name(ns),
+ atomic_read(&ns->ns_bref), rc);
+ RETURN(ELDLM_NAMESPACE_EXISTS);
+ }
+ CDEBUG(D_DLMTRACE, "dlm namespace %s free done waiting\n",
+ ldlm_ns_name(ns));
+ }
+
+ RETURN(ELDLM_OK);
+}
+
+/**
+ * Performs various cleanups for passed \a ns to make it drop refc and be
+ * ready for freeing. Waits for refc == 0.
+ *
+ * The following is done:
+ * (0) Unregister \a ns from its list to make inaccessible for potential
+ * users like pools thread and others;
+ * (1) Clear all locks in \a ns.
+ */
+void ldlm_namespace_free_prior(struct ldlm_namespace *ns,
+ struct obd_import *imp,
+ int force)
+{
+ int rc;
+ ENTRY;
+ if (!ns) {
+ EXIT;
+ return;
+ }
+
+ spin_lock(&ns->ns_lock);
+ ns->ns_stopping = 1;
+ spin_unlock(&ns->ns_lock);
+
+ /*
+ * Can fail with -EINTR when force == 0 in which case try harder.
+ */
+ rc = __ldlm_namespace_free(ns, force);
+ if (rc != ELDLM_OK) {
+ if (imp) {
+ ptlrpc_disconnect_import(imp, 0);
+ ptlrpc_invalidate_import(imp);
+ }
+
+ /*
+ * With all requests dropped and the import inactive
+ * we are gaurenteed all reference will be dropped.
+ */
+ rc = __ldlm_namespace_free(ns, 1);
+ LASSERT(rc == 0);
+ }
+ EXIT;
+}
+
+/**
+ * Performs freeing memory structures related to \a ns. This is only done
+ * when ldlm_namespce_free_prior() successfully removed all resources
+ * referencing \a ns and its refc == 0.
+ */
+void ldlm_namespace_free_post(struct ldlm_namespace *ns)
+{
+ ENTRY;
+ if (!ns) {
+ EXIT;
+ return;
+ }
+
+ /* Make sure that nobody can find this ns in its list. */
+ ldlm_namespace_unregister(ns, ns->ns_client);
+ /* Fini pool _before_ parent proc dir is removed. This is important as
+ * ldlm_pool_fini() removes own proc dir which is child to @dir.
+ * Removing it after @dir may cause oops. */
+ ldlm_pool_fini(&ns->ns_pool);
+
+ ldlm_namespace_proc_unregister(ns);
+ cfs_hash_putref(ns->ns_rs_hash);
+ /* Namespace \a ns should be not on list at this time, otherwise
+ * this will cause issues related to using freed \a ns in poold
+ * thread. */
+ LASSERT(list_empty(&ns->ns_list_chain));
+ OBD_FREE_PTR(ns);
+ ldlm_put_ref();
+ EXIT;
+}
+
+/**
+ * Cleanup the resource, and free namespace.
+ * bug 12864:
+ * Deadlock issue:
+ * proc1: destroy import
+ * class_disconnect_export(grab cl_sem) ->
+ * -> ldlm_namespace_free ->
+ * -> lprocfs_remove(grab _lprocfs_lock).
+ * proc2: read proc info
+ * lprocfs_fops_read(grab _lprocfs_lock) ->
+ * -> osc_rd_active, etc(grab cl_sem).
+ *
+ * So that I have to split the ldlm_namespace_free into two parts - the first
+ * part ldlm_namespace_free_prior is used to cleanup the resource which is
+ * being used; the 2nd part ldlm_namespace_free_post is used to unregister the
+ * lprocfs entries, and then free memory. It will be called w/o cli->cl_sem
+ * held.
+ */
+void ldlm_namespace_free(struct ldlm_namespace *ns,
+ struct obd_import *imp,
+ int force)
+{
+ ldlm_namespace_free_prior(ns, imp, force);
+ ldlm_namespace_free_post(ns);
+}
+EXPORT_SYMBOL(ldlm_namespace_free);
+
+void ldlm_namespace_get(struct ldlm_namespace *ns)
+{
+ atomic_inc(&ns->ns_bref);
+}
+EXPORT_SYMBOL(ldlm_namespace_get);
+
+void ldlm_namespace_put(struct ldlm_namespace *ns)
+{
+ if (atomic_dec_and_lock(&ns->ns_bref, &ns->ns_lock)) {
+ wake_up(&ns->ns_waitq);
+ spin_unlock(&ns->ns_lock);
+ }
+}
+EXPORT_SYMBOL(ldlm_namespace_put);
+
+/** Register \a ns in the list of namespaces */
+void ldlm_namespace_register(struct ldlm_namespace *ns, ldlm_side_t client)
+{
+ mutex_lock(ldlm_namespace_lock(client));
+ LASSERT(list_empty(&ns->ns_list_chain));
+ list_add(&ns->ns_list_chain, ldlm_namespace_list(client));
+ atomic_inc(ldlm_namespace_nr(client));
+ mutex_unlock(ldlm_namespace_lock(client));
+}
+
+/** Unregister \a ns from the list of namespaces. */
+void ldlm_namespace_unregister(struct ldlm_namespace *ns, ldlm_side_t client)
+{
+ mutex_lock(ldlm_namespace_lock(client));
+ LASSERT(!list_empty(&ns->ns_list_chain));
+ /* Some asserts and possibly other parts of the code are still
+ * using list_empty(&ns->ns_list_chain). This is why it is
+ * important to use list_del_init() here. */
+ list_del_init(&ns->ns_list_chain);
+ atomic_dec(ldlm_namespace_nr(client));
+ mutex_unlock(ldlm_namespace_lock(client));
+}
+
+/** Should be called with ldlm_namespace_lock(client) taken. */
+void ldlm_namespace_move_locked(struct ldlm_namespace *ns, ldlm_side_t client)
+{
+ LASSERT(!list_empty(&ns->ns_list_chain));
+ LASSERT(mutex_is_locked(ldlm_namespace_lock(client)));
+ list_move_tail(&ns->ns_list_chain, ldlm_namespace_list(client));
+}
+
+/** Should be called with ldlm_namespace_lock(client) taken. */
+struct ldlm_namespace *ldlm_namespace_first_locked(ldlm_side_t client)
+{
+ LASSERT(mutex_is_locked(ldlm_namespace_lock(client)));
+ LASSERT(!list_empty(ldlm_namespace_list(client)));
+ return container_of(ldlm_namespace_list(client)->next,
+ struct ldlm_namespace, ns_list_chain);
+}
+
+/** Create and initialize new resource. */
+static struct ldlm_resource *ldlm_resource_new(void)
+{
+ struct ldlm_resource *res;
+ int idx;
+
+ OBD_SLAB_ALLOC_PTR_GFP(res, ldlm_resource_slab, __GFP_IO);
+ if (res == NULL)
+ return NULL;
+
+ INIT_LIST_HEAD(&res->lr_granted);
+ INIT_LIST_HEAD(&res->lr_converting);
+ INIT_LIST_HEAD(&res->lr_waiting);
+
+ /* Initialize interval trees for each lock mode. */
+ for (idx = 0; idx < LCK_MODE_NUM; idx++) {
+ res->lr_itree[idx].lit_size = 0;
+ res->lr_itree[idx].lit_mode = 1 << idx;
+ res->lr_itree[idx].lit_root = NULL;
+ }
+
+ atomic_set(&res->lr_refcount, 1);
+ spin_lock_init(&res->lr_lock);
+ lu_ref_init(&res->lr_reference);
+
+ /* The creator of the resource must unlock the mutex after LVB
+ * initialization. */
+ mutex_init(&res->lr_lvb_mutex);
+ mutex_lock(&res->lr_lvb_mutex);
+
+ return res;
+}
+
+/**
+ * Return a reference to resource with given name, creating it if necessary.
+ * Args: namespace with ns_lock unlocked
+ * Locks: takes and releases NS hash-lock and res->lr_lock
+ * Returns: referenced, unlocked ldlm_resource or NULL
+ */
+struct ldlm_resource *
+ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
+ const struct ldlm_res_id *name, ldlm_type_t type, int create)
+{
+ struct hlist_node *hnode;
+ struct ldlm_resource *res;
+ cfs_hash_bd_t bd;
+ __u64 version;
+
+ LASSERT(ns != NULL);
+ LASSERT(parent == NULL);
+ LASSERT(ns->ns_rs_hash != NULL);
+ LASSERT(name->name[0] != 0);
+
+ cfs_hash_bd_get_and_lock(ns->ns_rs_hash, (void *)name, &bd, 0);
+ hnode = cfs_hash_bd_lookup_locked(ns->ns_rs_hash, &bd, (void *)name);
+ if (hnode != NULL) {
+ cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 0);
+ res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
+ /* Synchronize with regard to resource creation. */
+ if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
+ mutex_lock(&res->lr_lvb_mutex);
+ mutex_unlock(&res->lr_lvb_mutex);
+ }
+
+ if (unlikely(res->lr_lvb_len < 0)) {
+ ldlm_resource_putref(res);
+ res = NULL;
+ }
+ return res;
+ }
+
+ version = cfs_hash_bd_version_get(&bd);
+ cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 0);
+
+ if (create == 0)
+ return NULL;
+
+ LASSERTF(type >= LDLM_MIN_TYPE && type < LDLM_MAX_TYPE,
+ "type: %d\n", type);
+ res = ldlm_resource_new();
+ if (!res)
+ return NULL;
+
+ res->lr_ns_bucket = cfs_hash_bd_extra_get(ns->ns_rs_hash, &bd);
+ res->lr_name = *name;
+ res->lr_type = type;
+ res->lr_most_restr = LCK_NL;
+
+ cfs_hash_bd_lock(ns->ns_rs_hash, &bd, 1);
+ hnode = (version == cfs_hash_bd_version_get(&bd)) ? NULL :
+ cfs_hash_bd_lookup_locked(ns->ns_rs_hash, &bd, (void *)name);
+
+ if (hnode != NULL) {
+ /* Someone won the race and already added the resource. */
+ cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1);
+ /* Clean lu_ref for failed resource. */
+ lu_ref_fini(&res->lr_reference);
+ /* We have taken lr_lvb_mutex. Drop it. */
+ mutex_unlock(&res->lr_lvb_mutex);
+ OBD_SLAB_FREE(res, ldlm_resource_slab, sizeof *res);
+
+ res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
+ /* Synchronize with regard to resource creation. */
+ if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
+ mutex_lock(&res->lr_lvb_mutex);
+ mutex_unlock(&res->lr_lvb_mutex);
+ }
+
+ if (unlikely(res->lr_lvb_len < 0)) {
+ ldlm_resource_putref(res);
+ res = NULL;
+ }
+ return res;
+ }
+ /* We won! Let's add the resource. */
+ cfs_hash_bd_add_locked(ns->ns_rs_hash, &bd, &res->lr_hash);
+ if (cfs_hash_bd_count_get(&bd) == 1)
+ ldlm_namespace_get(ns);
+
+ cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1);
+ if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
+ int rc;
+
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CREATE_RESOURCE, 2);
+ rc = ns->ns_lvbo->lvbo_init(res);
+ if (rc < 0) {
+ CERROR("lvbo_init failed for resource "
+ LPU64": rc %d\n", name->name[0], rc);
+ if (res->lr_lvb_data) {
+ OBD_FREE(res->lr_lvb_data, res->lr_lvb_len);
+ res->lr_lvb_data = NULL;
+ }
+ res->lr_lvb_len = rc;
+ mutex_unlock(&res->lr_lvb_mutex);
+ ldlm_resource_putref(res);
+ return NULL;
+ }
+ }
+
+ /* We create resource with locked lr_lvb_mutex. */
+ mutex_unlock(&res->lr_lvb_mutex);
+
+ return res;
+}
+EXPORT_SYMBOL(ldlm_resource_get);
+
+struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res)
+{
+ LASSERT(res != NULL);
+ LASSERT(res != LP_POISON);
+ atomic_inc(&res->lr_refcount);
+ CDEBUG(D_INFO, "getref res: %p count: %d\n", res,
+ atomic_read(&res->lr_refcount));
+ return res;
+}
+
+static void __ldlm_resource_putref_final(cfs_hash_bd_t *bd,
+ struct ldlm_resource *res)
+{
+ struct ldlm_ns_bucket *nsb = res->lr_ns_bucket;
+
+ if (!list_empty(&res->lr_granted)) {
+ ldlm_resource_dump(D_ERROR, res);
+ LBUG();
+ }
+
+ if (!list_empty(&res->lr_converting)) {
+ ldlm_resource_dump(D_ERROR, res);
+ LBUG();
+ }
+
+ if (!list_empty(&res->lr_waiting)) {
+ ldlm_resource_dump(D_ERROR, res);
+ LBUG();
+ }
+
+ cfs_hash_bd_del_locked(nsb->nsb_namespace->ns_rs_hash,
+ bd, &res->lr_hash);
+ lu_ref_fini(&res->lr_reference);
+ if (cfs_hash_bd_count_get(bd) == 0)
+ ldlm_namespace_put(nsb->nsb_namespace);
+}
+
+/* Returns 1 if the resource was freed, 0 if it remains. */
+int ldlm_resource_putref(struct ldlm_resource *res)
+{
+ struct ldlm_namespace *ns = ldlm_res_to_ns(res);
+ cfs_hash_bd_t bd;
+
+ LASSERT_ATOMIC_GT_LT(&res->lr_refcount, 0, LI_POISON);
+ CDEBUG(D_INFO, "putref res: %p count: %d\n",
+ res, atomic_read(&res->lr_refcount) - 1);
+
+ cfs_hash_bd_get(ns->ns_rs_hash, &res->lr_name, &bd);
+ if (cfs_hash_bd_dec_and_lock(ns->ns_rs_hash, &bd, &res->lr_refcount)) {
+ __ldlm_resource_putref_final(&bd, res);
+ cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1);
+ if (ns->ns_lvbo && ns->ns_lvbo->lvbo_free)
+ ns->ns_lvbo->lvbo_free(res);
+ OBD_SLAB_FREE(res, ldlm_resource_slab, sizeof *res);
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ldlm_resource_putref);
+
+/* Returns 1 if the resource was freed, 0 if it remains. */
+int ldlm_resource_putref_locked(struct ldlm_resource *res)
+{
+ struct ldlm_namespace *ns = ldlm_res_to_ns(res);
+
+ LASSERT_ATOMIC_GT_LT(&res->lr_refcount, 0, LI_POISON);
+ CDEBUG(D_INFO, "putref res: %p count: %d\n",
+ res, atomic_read(&res->lr_refcount) - 1);
+
+ if (atomic_dec_and_test(&res->lr_refcount)) {
+ cfs_hash_bd_t bd;
+
+ cfs_hash_bd_get(ldlm_res_to_ns(res)->ns_rs_hash,
+ &res->lr_name, &bd);
+ __ldlm_resource_putref_final(&bd, res);
+ cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1);
+ /* NB: ns_rs_hash is created with CFS_HASH_NO_ITEMREF,
+ * so we should never be here while calling cfs_hash_del,
+ * cfs_hash_for_each_nolock is the only case we can get
+ * here, which is safe to release cfs_hash_bd_lock.
+ */
+ if (ns->ns_lvbo && ns->ns_lvbo->lvbo_free)
+ ns->ns_lvbo->lvbo_free(res);
+ OBD_SLAB_FREE(res, ldlm_resource_slab, sizeof *res);
+
+ cfs_hash_bd_lock(ns->ns_rs_hash, &bd, 1);
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * Add a lock into a given resource into specified lock list.
+ */
+void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
+ struct ldlm_lock *lock)
+{
+ check_res_locked(res);
+
+ LDLM_DEBUG(lock, "About to add this lock:\n");
+
+ if (lock->l_destroyed) {
+ CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
+ return;
+ }
+
+ LASSERT(list_empty(&lock->l_res_link));
+
+ list_add_tail(&lock->l_res_link, head);
+}
+
+/**
+ * Insert a lock into resource after specified lock.
+ *
+ * Obtain resource description from the lock we are inserting after.
+ */
+void ldlm_resource_insert_lock_after(struct ldlm_lock *original,
+ struct ldlm_lock *new)
+{
+ struct ldlm_resource *res = original->l_resource;
+
+ check_res_locked(res);
+
+ ldlm_resource_dump(D_INFO, res);
+ LDLM_DEBUG(new, "About to insert this lock after %p:\n", original);
+
+ if (new->l_destroyed) {
+ CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
+ goto out;
+ }
+
+ LASSERT(list_empty(&new->l_res_link));
+
+ list_add(&new->l_res_link, &original->l_res_link);
+ out:;
+}
+
+void ldlm_resource_unlink_lock(struct ldlm_lock *lock)
+{
+ int type = lock->l_resource->lr_type;
+
+ check_res_locked(lock->l_resource);
+ if (type == LDLM_IBITS || type == LDLM_PLAIN)
+ ldlm_unlink_lock_skiplist(lock);
+ else if (type == LDLM_EXTENT)
+ ldlm_extent_unlink_lock(lock);
+ list_del_init(&lock->l_res_link);
+}
+EXPORT_SYMBOL(ldlm_resource_unlink_lock);
+
+void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc)
+{
+ desc->lr_type = res->lr_type;
+ desc->lr_name = res->lr_name;
+}
+
+/**
+ * Print information about all locks in all namespaces on this node to debug
+ * log.
+ */
+void ldlm_dump_all_namespaces(ldlm_side_t client, int level)
+{
+ struct list_head *tmp;
+
+ if (!((libcfs_debug | D_ERROR) & level))
+ return;
+
+ mutex_lock(ldlm_namespace_lock(client));
+
+ list_for_each(tmp, ldlm_namespace_list(client)) {
+ struct ldlm_namespace *ns;
+ ns = list_entry(tmp, struct ldlm_namespace, ns_list_chain);
+ ldlm_namespace_dump(level, ns);
+ }
+
+ mutex_unlock(ldlm_namespace_lock(client));
+}
+EXPORT_SYMBOL(ldlm_dump_all_namespaces);
+
+static int ldlm_res_hash_dump(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *arg)
+{
+ struct ldlm_resource *res = cfs_hash_object(hs, hnode);
+ int level = (int)(unsigned long)arg;
+
+ lock_res(res);
+ ldlm_resource_dump(level, res);
+ unlock_res(res);
+
+ return 0;
+}
+
+/**
+ * Print information about all locks in this namespace on this node to debug
+ * log.
+ */
+void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
+{
+ if (!((libcfs_debug | D_ERROR) & level))
+ return;
+
+ CDEBUG(level, "--- Namespace: %s (rc: %d, side: %s)\n",
+ ldlm_ns_name(ns), atomic_read(&ns->ns_bref),
+ ns_is_client(ns) ? "client" : "server");
+
+ if (cfs_time_before(cfs_time_current(), ns->ns_next_dump))
+ return;
+
+ cfs_hash_for_each_nolock(ns->ns_rs_hash,
+ ldlm_res_hash_dump,
+ (void *)(unsigned long)level);
+ spin_lock(&ns->ns_lock);
+ ns->ns_next_dump = cfs_time_shift(10);
+ spin_unlock(&ns->ns_lock);
+}
+EXPORT_SYMBOL(ldlm_namespace_dump);
+
+/**
+ * Print information about all locks in this resource to debug log.
+ */
+void ldlm_resource_dump(int level, struct ldlm_resource *res)
+{
+ struct ldlm_lock *lock;
+ unsigned int granted = 0;
+
+ CLASSERT(RES_NAME_SIZE == 4);
+
+ if (!((libcfs_debug | D_ERROR) & level))
+ return;
+
+ CDEBUG(level, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64
+ ") (rc: %d)\n", res, res->lr_name.name[0], res->lr_name.name[1],
+ res->lr_name.name[2], res->lr_name.name[3],
+ atomic_read(&res->lr_refcount));
+
+ if (!list_empty(&res->lr_granted)) {
+ CDEBUG(level, "Granted locks (in reverse order):\n");
+ list_for_each_entry_reverse(lock, &res->lr_granted,
+ l_res_link) {
+ LDLM_DEBUG_LIMIT(level, lock, "###");
+ if (!(level & D_CANTMASK) &&
+ ++granted > ldlm_dump_granted_max) {
+ CDEBUG(level, "only dump %d granted locks to "
+ "avoid DDOS.\n", granted);
+ break;
+ }
+ }
+ }
+ if (!list_empty(&res->lr_converting)) {
+ CDEBUG(level, "Converting locks:\n");
+ list_for_each_entry(lock, &res->lr_converting, l_res_link)
+ LDLM_DEBUG_LIMIT(level, lock, "###");
+ }
+ if (!list_empty(&res->lr_waiting)) {
+ CDEBUG(level, "Waiting locks:\n");
+ list_for_each_entry(lock, &res->lr_waiting, l_res_link)
+ LDLM_DEBUG_LIMIT(level, lock, "###");
+ }
+}
diff --git a/drivers/staging/lustre/lustre/libcfs/Makefile b/drivers/staging/lustre/lustre/libcfs/Makefile
new file mode 100644
index 000000000000..bf5c563dcacc
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/Makefile
@@ -0,0 +1,21 @@
+obj-$(CONFIG_LUSTRE_FS) += libcfs.o
+
+libcfs-linux-objs := linux-tracefile.o linux-debug.o
+libcfs-linux-objs += linux-prim.o linux-cpu.o
+libcfs-linux-objs += linux-tcpip.o
+libcfs-linux-objs += linux-proc.o linux-curproc.o
+libcfs-linux-objs += linux-module.o
+libcfs-linux-objs += linux-crypto.o
+libcfs-linux-objs += linux-crypto-adler.o
+
+libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs))
+
+libcfs-all-objs := debug.o fail.o nidstrings.o module.o tracefile.o \
+ watchdog.o libcfs_string.o hash.o kernel_user_comm.o \
+ prng.o workitem.o upcall_cache.o libcfs_cpu.o \
+ libcfs_mem.o libcfs_lock.o
+
+libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs)
+
+ccflags-y := -I$(src)/../include
+ccflags-y += -I$(src)/
diff --git a/drivers/staging/lustre/lustre/libcfs/debug.c b/drivers/staging/lustre/lustre/libcfs/debug.c
new file mode 100644
index 000000000000..5a87b0832074
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/debug.c
@@ -0,0 +1,476 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/debug.c
+ *
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ */
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+#include "tracefile.h"
+
+static char debug_file_name[1024];
+
+unsigned int libcfs_subsystem_debug = ~0;
+CFS_MODULE_PARM(libcfs_subsystem_debug, "i", int, 0644,
+ "Lustre kernel debug subsystem mask");
+EXPORT_SYMBOL(libcfs_subsystem_debug);
+
+unsigned int libcfs_debug = (D_CANTMASK |
+ D_NETERROR | D_HA | D_CONFIG | D_IOCTL);
+CFS_MODULE_PARM(libcfs_debug, "i", int, 0644,
+ "Lustre kernel debug mask");
+EXPORT_SYMBOL(libcfs_debug);
+
+unsigned int libcfs_debug_mb = 0;
+CFS_MODULE_PARM(libcfs_debug_mb, "i", uint, 0644,
+ "Total debug buffer size.");
+EXPORT_SYMBOL(libcfs_debug_mb);
+
+unsigned int libcfs_printk = D_CANTMASK;
+CFS_MODULE_PARM(libcfs_printk, "i", uint, 0644,
+ "Lustre kernel debug console mask");
+EXPORT_SYMBOL(libcfs_printk);
+
+unsigned int libcfs_console_ratelimit = 1;
+CFS_MODULE_PARM(libcfs_console_ratelimit, "i", uint, 0644,
+ "Lustre kernel debug console ratelimit (0 to disable)");
+EXPORT_SYMBOL(libcfs_console_ratelimit);
+
+unsigned int libcfs_console_max_delay;
+CFS_MODULE_PARM(libcfs_console_max_delay, "l", uint, 0644,
+ "Lustre kernel debug console max delay (jiffies)");
+EXPORT_SYMBOL(libcfs_console_max_delay);
+
+unsigned int libcfs_console_min_delay;
+CFS_MODULE_PARM(libcfs_console_min_delay, "l", uint, 0644,
+ "Lustre kernel debug console min delay (jiffies)");
+EXPORT_SYMBOL(libcfs_console_min_delay);
+
+unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
+CFS_MODULE_PARM(libcfs_console_backoff, "i", uint, 0644,
+ "Lustre kernel debug console backoff factor");
+EXPORT_SYMBOL(libcfs_console_backoff);
+
+unsigned int libcfs_debug_binary = 1;
+EXPORT_SYMBOL(libcfs_debug_binary);
+
+unsigned int libcfs_stack = 3 * THREAD_SIZE / 4;
+EXPORT_SYMBOL(libcfs_stack);
+
+unsigned int portal_enter_debugger;
+EXPORT_SYMBOL(portal_enter_debugger);
+
+unsigned int libcfs_catastrophe;
+EXPORT_SYMBOL(libcfs_catastrophe);
+
+unsigned int libcfs_watchdog_ratelimit = 300;
+EXPORT_SYMBOL(libcfs_watchdog_ratelimit);
+
+unsigned int libcfs_panic_on_lbug = 1;
+CFS_MODULE_PARM(libcfs_panic_on_lbug, "i", uint, 0644,
+ "Lustre kernel panic on LBUG");
+EXPORT_SYMBOL(libcfs_panic_on_lbug);
+
+atomic_t libcfs_kmemory = ATOMIC_INIT(0);
+EXPORT_SYMBOL(libcfs_kmemory);
+
+static wait_queue_head_t debug_ctlwq;
+
+char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT;
+
+/* We need to pass a pointer here, but elsewhere this must be a const */
+char *libcfs_debug_file_path;
+CFS_MODULE_PARM(libcfs_debug_file_path, "s", charp, 0644,
+ "Path for dumping debug logs, "
+ "set 'NONE' to prevent log dumping");
+
+int libcfs_panic_in_progress;
+
+/* libcfs_debug_token2mask() expects the returned
+ * string in lower-case */
+const char *
+libcfs_debug_subsys2str(int subsys)
+{
+ switch (1 << subsys) {
+ default:
+ return NULL;
+ case S_UNDEFINED:
+ return "undefined";
+ case S_MDC:
+ return "mdc";
+ case S_MDS:
+ return "mds";
+ case S_OSC:
+ return "osc";
+ case S_OST:
+ return "ost";
+ case S_CLASS:
+ return "class";
+ case S_LOG:
+ return "log";
+ case S_LLITE:
+ return "llite";
+ case S_RPC:
+ return "rpc";
+ case S_LNET:
+ return "lnet";
+ case S_LND:
+ return "lnd";
+ case S_PINGER:
+ return "pinger";
+ case S_FILTER:
+ return "filter";
+ case S_ECHO:
+ return "echo";
+ case S_LDLM:
+ return "ldlm";
+ case S_LOV:
+ return "lov";
+ case S_LQUOTA:
+ return "lquota";
+ case S_OSD:
+ return "osd";
+ case S_LMV:
+ return "lmv";
+ case S_SEC:
+ return "sec";
+ case S_GSS:
+ return "gss";
+ case S_MGC:
+ return "mgc";
+ case S_MGS:
+ return "mgs";
+ case S_FID:
+ return "fid";
+ case S_FLD:
+ return "fld";
+ }
+}
+
+/* libcfs_debug_token2mask() expects the returned
+ * string in lower-case */
+const char *
+libcfs_debug_dbg2str(int debug)
+{
+ switch (1 << debug) {
+ default:
+ return NULL;
+ case D_TRACE:
+ return "trace";
+ case D_INODE:
+ return "inode";
+ case D_SUPER:
+ return "super";
+ case D_EXT2:
+ return "ext2";
+ case D_MALLOC:
+ return "malloc";
+ case D_CACHE:
+ return "cache";
+ case D_INFO:
+ return "info";
+ case D_IOCTL:
+ return "ioctl";
+ case D_NETERROR:
+ return "neterror";
+ case D_NET:
+ return "net";
+ case D_WARNING:
+ return "warning";
+ case D_BUFFS:
+ return "buffs";
+ case D_OTHER:
+ return "other";
+ case D_DENTRY:
+ return "dentry";
+ case D_NETTRACE:
+ return "nettrace";
+ case D_PAGE:
+ return "page";
+ case D_DLMTRACE:
+ return "dlmtrace";
+ case D_ERROR:
+ return "error";
+ case D_EMERG:
+ return "emerg";
+ case D_HA:
+ return "ha";
+ case D_RPCTRACE:
+ return "rpctrace";
+ case D_VFSTRACE:
+ return "vfstrace";
+ case D_READA:
+ return "reada";
+ case D_MMAP:
+ return "mmap";
+ case D_CONFIG:
+ return "config";
+ case D_CONSOLE:
+ return "console";
+ case D_QUOTA:
+ return "quota";
+ case D_SEC:
+ return "sec";
+ case D_LFSCK:
+ return "lfsck";
+ }
+}
+
+int
+libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys)
+{
+ const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
+ libcfs_debug_dbg2str;
+ int len = 0;
+ const char *token;
+ int i;
+
+ if (mask == 0) { /* "0" */
+ if (size > 0)
+ str[0] = '0';
+ len = 1;
+ } else { /* space-separated tokens */
+ for (i = 0; i < 32; i++) {
+ if ((mask & (1 << i)) == 0)
+ continue;
+
+ token = fn(i);
+ if (token == NULL) /* unused bit */
+ continue;
+
+ if (len > 0) { /* separator? */
+ if (len < size)
+ str[len] = ' ';
+ len++;
+ }
+
+ while (*token != 0) {
+ if (len < size)
+ str[len] = *token;
+ token++;
+ len++;
+ }
+ }
+ }
+
+ /* terminate 'str' */
+ if (len < size)
+ str[len] = 0;
+ else
+ str[size - 1] = 0;
+
+ return len;
+}
+
+int
+libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
+{
+ const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
+ libcfs_debug_dbg2str;
+ int m = 0;
+ int matched;
+ int n;
+ int t;
+
+ /* Allow a number for backwards compatibility */
+
+ for (n = strlen(str); n > 0; n--)
+ if (!isspace(str[n-1]))
+ break;
+ matched = n;
+
+ if ((t = sscanf(str, "%i%n", &m, &matched)) >= 1 &&
+ matched == n) {
+ /* don't print warning for lctl set_param debug=0 or -1 */
+ if (m != 0 && m != -1)
+ CWARN("You are trying to use a numerical value for the "
+ "mask - this will be deprecated in a future "
+ "release.\n");
+ *mask = m;
+ return 0;
+ }
+
+ return cfs_str2mask(str, fn, mask, is_subsys ? 0 : D_CANTMASK,
+ 0xffffffff);
+}
+
+/**
+ * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages()
+ */
+void libcfs_debug_dumplog_internal(void *arg)
+{
+ DECL_JOURNAL_DATA;
+
+ PUSH_JOURNAL;
+
+ if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0) {
+ snprintf(debug_file_name, sizeof(debug_file_name) - 1,
+ "%s.%ld." LPLD, libcfs_debug_file_path_arr,
+ cfs_time_current_sec(), (long_ptr_t)arg);
+ printk(KERN_ALERT "LustreError: dumping log to %s\n",
+ debug_file_name);
+ cfs_tracefile_dump_all_pages(debug_file_name);
+ libcfs_run_debug_log_upcall(debug_file_name);
+ }
+ POP_JOURNAL;
+}
+
+int libcfs_debug_dumplog_thread(void *arg)
+{
+ libcfs_debug_dumplog_internal(arg);
+ wake_up(&debug_ctlwq);
+ return 0;
+}
+
+void libcfs_debug_dumplog(void)
+{
+ wait_queue_t wait;
+ task_t *dumper;
+ ENTRY;
+
+ /* we're being careful to ensure that the kernel thread is
+ * able to set our state to running as it exits before we
+ * get to schedule() */
+ init_waitqueue_entry_current(&wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&debug_ctlwq, &wait);
+
+ dumper = kthread_run(libcfs_debug_dumplog_thread,
+ (void *)(long)current_pid(),
+ "libcfs_debug_dumper");
+ if (IS_ERR(dumper))
+ printk(KERN_ERR "LustreError: cannot start log dump thread:"
+ " %ld\n", PTR_ERR(dumper));
+ else
+ waitq_wait(&wait, TASK_INTERRUPTIBLE);
+
+ /* be sure to teardown if cfs_create_thread() failed */
+ remove_wait_queue(&debug_ctlwq, &wait);
+ set_current_state(TASK_RUNNING);
+}
+EXPORT_SYMBOL(libcfs_debug_dumplog);
+
+int libcfs_debug_init(unsigned long bufsize)
+{
+ int rc = 0;
+ unsigned int max = libcfs_debug_mb;
+
+ init_waitqueue_head(&debug_ctlwq);
+
+ if (libcfs_console_max_delay <= 0 || /* not set by user or */
+ libcfs_console_min_delay <= 0 || /* set to invalid values */
+ libcfs_console_min_delay >= libcfs_console_max_delay) {
+ libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
+ libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
+ }
+
+ if (libcfs_debug_file_path != NULL) {
+ memset(libcfs_debug_file_path_arr, 0, PATH_MAX);
+ strncpy(libcfs_debug_file_path_arr,
+ libcfs_debug_file_path, PATH_MAX-1);
+ }
+
+ /* If libcfs_debug_mb is set to an invalid value or uninitialized
+ * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES */
+ if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) {
+ max = TCD_MAX_PAGES;
+ } else {
+ max = (max / num_possible_cpus());
+ max = (max << (20 - PAGE_CACHE_SHIFT));
+ }
+ rc = cfs_tracefile_init(max);
+
+ if (rc == 0)
+ libcfs_register_panic_notifier();
+
+ return rc;
+}
+
+int libcfs_debug_cleanup(void)
+{
+ libcfs_unregister_panic_notifier();
+ cfs_tracefile_exit();
+ return 0;
+}
+
+int libcfs_debug_clear_buffer(void)
+{
+ cfs_trace_flush_pages();
+ return 0;
+}
+
+/* Debug markers, although printed by S_LNET
+ * should not be be marked as such. */
+#undef DEBUG_SUBSYSTEM
+#define DEBUG_SUBSYSTEM S_UNDEFINED
+int libcfs_debug_mark_buffer(const char *text)
+{
+ CDEBUG(D_TRACE,"***************************************************\n");
+ LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text);
+ CDEBUG(D_TRACE,"***************************************************\n");
+
+ return 0;
+}
+#undef DEBUG_SUBSYSTEM
+#define DEBUG_SUBSYSTEM S_LNET
+
+void libcfs_debug_set_level(unsigned int debug_level)
+{
+ printk(KERN_WARNING "Lustre: Setting portals debug level to %08x\n",
+ debug_level);
+ libcfs_debug = debug_level;
+}
+
+EXPORT_SYMBOL(libcfs_debug_set_level);
+
+long libcfs_log_return(struct libcfs_debug_msg_data *msgdata, long rc)
+{
+ libcfs_debug_msg(msgdata, "Process leaving (rc=%lu : %ld : %lx)\n",
+ rc, rc, rc);
+ return rc;
+}
+EXPORT_SYMBOL(libcfs_log_return);
+
+void libcfs_log_goto(struct libcfs_debug_msg_data *msgdata, const char *label,
+ long_ptr_t rc)
+{
+ libcfs_debug_msg(msgdata, "Process leaving via %s (rc=" LPLU " : " LPLD
+ " : " LPLX ")\n", label, (ulong_ptr_t)rc, rc, rc);
+}
+EXPORT_SYMBOL(libcfs_log_goto);
diff --git a/drivers/staging/lustre/lustre/libcfs/fail.c b/drivers/staging/lustre/lustre/libcfs/fail.c
new file mode 100644
index 000000000000..c54448d69008
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/fail.c
@@ -0,0 +1,137 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please contact Oracle Corporation, Inc., 500 Oracle Parkway, Redwood Shores,
+ * CA 94065 USA or visit www.oracle.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Oracle Corporation, Inc.
+ */
+
+#include <linux/libcfs/libcfs.h>
+
+unsigned long cfs_fail_loc = 0;
+unsigned int cfs_fail_val = 0;
+wait_queue_head_t cfs_race_waitq;
+int cfs_race_state;
+
+EXPORT_SYMBOL(cfs_fail_loc);
+EXPORT_SYMBOL(cfs_fail_val);
+EXPORT_SYMBOL(cfs_race_waitq);
+EXPORT_SYMBOL(cfs_race_state);
+
+int __cfs_fail_check_set(__u32 id, __u32 value, int set)
+{
+ static atomic_t cfs_fail_count = ATOMIC_INIT(0);
+
+ LASSERT(!(id & CFS_FAIL_ONCE));
+
+ if ((cfs_fail_loc & (CFS_FAILED | CFS_FAIL_ONCE)) ==
+ (CFS_FAILED | CFS_FAIL_ONCE)) {
+ atomic_set(&cfs_fail_count, 0); /* paranoia */
+ return 0;
+ }
+
+ /* Fail 1/cfs_fail_val times */
+ if (cfs_fail_loc & CFS_FAIL_RAND) {
+ if (cfs_fail_val < 2 || cfs_rand() % cfs_fail_val > 0)
+ return 0;
+ }
+
+ /* Skip the first cfs_fail_val, then fail */
+ if (cfs_fail_loc & CFS_FAIL_SKIP) {
+ if (atomic_inc_return(&cfs_fail_count) <= cfs_fail_val)
+ return 0;
+ }
+
+ /* check cfs_fail_val... */
+ if (set == CFS_FAIL_LOC_VALUE) {
+ if (cfs_fail_val != -1 && cfs_fail_val != value)
+ return 0;
+ }
+
+ /* Fail cfs_fail_val times, overridden by FAIL_ONCE */
+ if (cfs_fail_loc & CFS_FAIL_SOME &&
+ (!(cfs_fail_loc & CFS_FAIL_ONCE) || cfs_fail_val <= 1)) {
+ int count = atomic_inc_return(&cfs_fail_count);
+
+ if (count >= cfs_fail_val) {
+ set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
+ atomic_set(&cfs_fail_count, 0);
+ /* we are lost race to increase */
+ if (count > cfs_fail_val)
+ return 0;
+ }
+ }
+
+ if ((set == CFS_FAIL_LOC_ORSET || set == CFS_FAIL_LOC_RESET) &&
+ (value & CFS_FAIL_ONCE))
+ set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
+ /* Lost race to set CFS_FAILED_BIT. */
+ if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) {
+ /* If CFS_FAIL_ONCE is valid, only one process can fail,
+ * otherwise multi-process can fail at the same time. */
+ if (cfs_fail_loc & CFS_FAIL_ONCE)
+ return 0;
+ }
+
+ switch (set) {
+ case CFS_FAIL_LOC_NOSET:
+ case CFS_FAIL_LOC_VALUE:
+ break;
+ case CFS_FAIL_LOC_ORSET:
+ cfs_fail_loc |= value & ~(CFS_FAILED | CFS_FAIL_ONCE);
+ break;
+ case CFS_FAIL_LOC_RESET:
+ cfs_fail_loc = value;
+ break;
+ default:
+ LASSERTF(0, "called with bad set %u\n", set);
+ break;
+ }
+
+ return 1;
+}
+EXPORT_SYMBOL(__cfs_fail_check_set);
+
+int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
+{
+ int ret = 0;
+
+ ret = __cfs_fail_check_set(id, value, set);
+ if (ret) {
+ CERROR("cfs_fail_timeout id %x sleeping for %dms\n",
+ id, ms);
+ schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE,
+ cfs_time_seconds(ms) / 1000);
+ set_current_state(TASK_RUNNING);
+ CERROR("cfs_fail_timeout id %x awake\n", id);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(__cfs_fail_timeout_set);
diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c
new file mode 100644
index 000000000000..98c76dfac3dd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/hash.c
@@ -0,0 +1,2123 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/hash.c
+ *
+ * Implement a hash class for hash process in lustre system.
+ *
+ * Author: YuZhangyong <yzy@clusterfs.com>
+ *
+ * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
+ * - Simplified API and improved documentation
+ * - Added per-hash feature flags:
+ * * CFS_HASH_DEBUG additional validation
+ * * CFS_HASH_REHASH dynamic rehashing
+ * - Added per-hash statistics
+ * - General performance enhancements
+ *
+ * 2009-07-31: Liang Zhen <zhen.liang@sun.com>
+ * - move all stuff to libcfs
+ * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH
+ * - ignore hs_rwlock if without CFS_HASH_REHASH setting
+ * - buckets are allocated one by one(intead of contiguous memory),
+ * to avoid unnecessary cacheline conflict
+ *
+ * 2010-03-01: Liang Zhen <zhen.liang@sun.com>
+ * - "bucket" is a group of hlist_head now, user can speicify bucket size
+ * by bkt_bits of cfs_hash_create(), all hlist_heads in a bucket share
+ * one lock for reducing memory overhead.
+ *
+ * - support lockless hash, caller will take care of locks:
+ * avoid lock overhead for hash tables that are already protected
+ * by locking in the caller for another reason
+ *
+ * - support both spin_lock/rwlock for bucket:
+ * overhead of spinlock contention is lower than read/write
+ * contention of rwlock, so using spinlock to serialize operations on
+ * bucket is more reasonable for those frequently changed hash tables
+ *
+ * - support one-single lock mode:
+ * one lock to protect all hash operations to avoid overhead of
+ * multiple locks if hash table is always small
+ *
+ * - removed a lot of unnecessary addref & decref on hash element:
+ * addref & decref are atomic operations in many use-cases which
+ * are expensive.
+ *
+ * - support non-blocking cfs_hash_add() and cfs_hash_findadd():
+ * some lustre use-cases require these functions to be strictly
+ * non-blocking, we need to schedule required rehash on a different
+ * thread on those cases.
+ *
+ * - safer rehash on large hash table
+ * In old implementation, rehash function will exclusively lock the
+ * hash table and finish rehash in one batch, it's dangerous on SMP
+ * system because rehash millions of elements could take long time.
+ * New implemented rehash can release lock and relax CPU in middle
+ * of rehash, it's safe for another thread to search/change on the
+ * hash table even it's in rehasing.
+ *
+ * - support two different refcount modes
+ * . hash table has refcount on element
+ * . hash table doesn't change refcount on adding/removing element
+ *
+ * - support long name hash table (for param-tree)
+ *
+ * - fix a bug for cfs_hash_rehash_key:
+ * in old implementation, cfs_hash_rehash_key could screw up the
+ * hash-table because @key is overwritten without any protection.
+ * Now we need user to define hs_keycpy for those rehash enabled
+ * hash tables, cfs_hash_rehash_key will overwrite hash-key
+ * inside lock by calling hs_keycpy.
+ *
+ * - better hash iteration:
+ * Now we support both locked iteration & lockless iteration of hash
+ * table. Also, user can break the iteration by return 1 in callback.
+ */
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/seq_file.h>
+
+#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
+static unsigned int warn_on_depth = 8;
+CFS_MODULE_PARM(warn_on_depth, "i", uint, 0644,
+ "warning when hash depth is high.");
+#endif
+
+struct cfs_wi_sched *cfs_sched_rehash;
+
+static inline void
+cfs_hash_nl_lock(cfs_hash_lock_t *lock, int exclusive) {}
+
+static inline void
+cfs_hash_nl_unlock(cfs_hash_lock_t *lock, int exclusive) {}
+
+static inline void
+cfs_hash_spin_lock(cfs_hash_lock_t *lock, int exclusive)
+{
+ spin_lock(&lock->spin);
+}
+
+static inline void
+cfs_hash_spin_unlock(cfs_hash_lock_t *lock, int exclusive)
+{
+ spin_unlock(&lock->spin);
+}
+
+static inline void
+cfs_hash_rw_lock(cfs_hash_lock_t *lock, int exclusive)
+{
+ if (!exclusive)
+ read_lock(&lock->rw);
+ else
+ write_lock(&lock->rw);
+}
+
+static inline void
+cfs_hash_rw_unlock(cfs_hash_lock_t *lock, int exclusive)
+{
+ if (!exclusive)
+ read_unlock(&lock->rw);
+ else
+ write_unlock(&lock->rw);
+}
+
+/** No lock hash */
+static cfs_hash_lock_ops_t cfs_hash_nl_lops =
+{
+ .hs_lock = cfs_hash_nl_lock,
+ .hs_unlock = cfs_hash_nl_unlock,
+ .hs_bkt_lock = cfs_hash_nl_lock,
+ .hs_bkt_unlock = cfs_hash_nl_unlock,
+};
+
+/** no bucket lock, one spinlock to protect everything */
+static cfs_hash_lock_ops_t cfs_hash_nbl_lops =
+{
+ .hs_lock = cfs_hash_spin_lock,
+ .hs_unlock = cfs_hash_spin_unlock,
+ .hs_bkt_lock = cfs_hash_nl_lock,
+ .hs_bkt_unlock = cfs_hash_nl_unlock,
+};
+
+/** spin bucket lock, rehash is enabled */
+static cfs_hash_lock_ops_t cfs_hash_bkt_spin_lops =
+{
+ .hs_lock = cfs_hash_rw_lock,
+ .hs_unlock = cfs_hash_rw_unlock,
+ .hs_bkt_lock = cfs_hash_spin_lock,
+ .hs_bkt_unlock = cfs_hash_spin_unlock,
+};
+
+/** rw bucket lock, rehash is enabled */
+static cfs_hash_lock_ops_t cfs_hash_bkt_rw_lops =
+{
+ .hs_lock = cfs_hash_rw_lock,
+ .hs_unlock = cfs_hash_rw_unlock,
+ .hs_bkt_lock = cfs_hash_rw_lock,
+ .hs_bkt_unlock = cfs_hash_rw_unlock,
+};
+
+/** spin bucket lock, rehash is disabled */
+static cfs_hash_lock_ops_t cfs_hash_nr_bkt_spin_lops =
+{
+ .hs_lock = cfs_hash_nl_lock,
+ .hs_unlock = cfs_hash_nl_unlock,
+ .hs_bkt_lock = cfs_hash_spin_lock,
+ .hs_bkt_unlock = cfs_hash_spin_unlock,
+};
+
+/** rw bucket lock, rehash is disabled */
+static cfs_hash_lock_ops_t cfs_hash_nr_bkt_rw_lops =
+{
+ .hs_lock = cfs_hash_nl_lock,
+ .hs_unlock = cfs_hash_nl_unlock,
+ .hs_bkt_lock = cfs_hash_rw_lock,
+ .hs_bkt_unlock = cfs_hash_rw_unlock,
+};
+
+static void
+cfs_hash_lock_setup(cfs_hash_t *hs)
+{
+ if (cfs_hash_with_no_lock(hs)) {
+ hs->hs_lops = &cfs_hash_nl_lops;
+
+ } else if (cfs_hash_with_no_bktlock(hs)) {
+ hs->hs_lops = &cfs_hash_nbl_lops;
+ spin_lock_init(&hs->hs_lock.spin);
+
+ } else if (cfs_hash_with_rehash(hs)) {
+ rwlock_init(&hs->hs_lock.rw);
+
+ if (cfs_hash_with_rw_bktlock(hs))
+ hs->hs_lops = &cfs_hash_bkt_rw_lops;
+ else if (cfs_hash_with_spin_bktlock(hs))
+ hs->hs_lops = &cfs_hash_bkt_spin_lops;
+ else
+ LBUG();
+ } else {
+ if (cfs_hash_with_rw_bktlock(hs))
+ hs->hs_lops = &cfs_hash_nr_bkt_rw_lops;
+ else if (cfs_hash_with_spin_bktlock(hs))
+ hs->hs_lops = &cfs_hash_nr_bkt_spin_lops;
+ else
+ LBUG();
+ }
+}
+
+/**
+ * Simple hash head without depth tracking
+ * new element is always added to head of hlist
+ */
+typedef struct {
+ struct hlist_head hh_head; /**< entries list */
+} cfs_hash_head_t;
+
+static int
+cfs_hash_hh_hhead_size(cfs_hash_t *hs)
+{
+ return sizeof(cfs_hash_head_t);
+}
+
+static struct hlist_head *
+cfs_hash_hh_hhead(cfs_hash_t *hs, cfs_hash_bd_t *bd)
+{
+ cfs_hash_head_t *head = (cfs_hash_head_t *)&bd->bd_bucket->hsb_head[0];
+
+ return &head[bd->bd_offset].hh_head;
+}
+
+static int
+cfs_hash_hh_hnode_add(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode)
+{
+ hlist_add_head(hnode, cfs_hash_hh_hhead(hs, bd));
+ return -1; /* unknown depth */
+}
+
+static int
+cfs_hash_hh_hnode_del(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode)
+{
+ hlist_del_init(hnode);
+ return -1; /* unknown depth */
+}
+
+/**
+ * Simple hash head with depth tracking
+ * new element is always added to head of hlist
+ */
+typedef struct {
+ struct hlist_head hd_head; /**< entries list */
+ unsigned int hd_depth; /**< list length */
+} cfs_hash_head_dep_t;
+
+static int
+cfs_hash_hd_hhead_size(cfs_hash_t *hs)
+{
+ return sizeof(cfs_hash_head_dep_t);
+}
+
+static struct hlist_head *
+cfs_hash_hd_hhead(cfs_hash_t *hs, cfs_hash_bd_t *bd)
+{
+ cfs_hash_head_dep_t *head;
+
+ head = (cfs_hash_head_dep_t *)&bd->bd_bucket->hsb_head[0];
+ return &head[bd->bd_offset].hd_head;
+}
+
+static int
+cfs_hash_hd_hnode_add(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode)
+{
+ cfs_hash_head_dep_t *hh = container_of(cfs_hash_hd_hhead(hs, bd),
+ cfs_hash_head_dep_t, hd_head);
+ hlist_add_head(hnode, &hh->hd_head);
+ return ++hh->hd_depth;
+}
+
+static int
+cfs_hash_hd_hnode_del(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode)
+{
+ cfs_hash_head_dep_t *hh = container_of(cfs_hash_hd_hhead(hs, bd),
+ cfs_hash_head_dep_t, hd_head);
+ hlist_del_init(hnode);
+ return --hh->hd_depth;
+}
+
+/**
+ * double links hash head without depth tracking
+ * new element is always added to tail of hlist
+ */
+typedef struct {
+ struct hlist_head dh_head; /**< entries list */
+ struct hlist_node *dh_tail; /**< the last entry */
+} cfs_hash_dhead_t;
+
+static int
+cfs_hash_dh_hhead_size(cfs_hash_t *hs)
+{
+ return sizeof(cfs_hash_dhead_t);
+}
+
+static struct hlist_head *
+cfs_hash_dh_hhead(cfs_hash_t *hs, cfs_hash_bd_t *bd)
+{
+ cfs_hash_dhead_t *head;
+
+ head = (cfs_hash_dhead_t *)&bd->bd_bucket->hsb_head[0];
+ return &head[bd->bd_offset].dh_head;
+}
+
+static int
+cfs_hash_dh_hnode_add(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode)
+{
+ cfs_hash_dhead_t *dh = container_of(cfs_hash_dh_hhead(hs, bd),
+ cfs_hash_dhead_t, dh_head);
+
+ if (dh->dh_tail != NULL) /* not empty */
+ hlist_add_after(dh->dh_tail, hnode);
+ else /* empty list */
+ hlist_add_head(hnode, &dh->dh_head);
+ dh->dh_tail = hnode;
+ return -1; /* unknown depth */
+}
+
+static int
+cfs_hash_dh_hnode_del(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnd)
+{
+ cfs_hash_dhead_t *dh = container_of(cfs_hash_dh_hhead(hs, bd),
+ cfs_hash_dhead_t, dh_head);
+
+ if (hnd->next == NULL) { /* it's the tail */
+ dh->dh_tail = (hnd->pprev == &dh->dh_head.first) ? NULL :
+ container_of(hnd->pprev, struct hlist_node, next);
+ }
+ hlist_del_init(hnd);
+ return -1; /* unknown depth */
+}
+
+/**
+ * double links hash head with depth tracking
+ * new element is always added to tail of hlist
+ */
+typedef struct {
+ struct hlist_head dd_head; /**< entries list */
+ struct hlist_node *dd_tail; /**< the last entry */
+ unsigned int dd_depth; /**< list length */
+} cfs_hash_dhead_dep_t;
+
+static int
+cfs_hash_dd_hhead_size(cfs_hash_t *hs)
+{
+ return sizeof(cfs_hash_dhead_dep_t);
+}
+
+static struct hlist_head *
+cfs_hash_dd_hhead(cfs_hash_t *hs, cfs_hash_bd_t *bd)
+{
+ cfs_hash_dhead_dep_t *head;
+
+ head = (cfs_hash_dhead_dep_t *)&bd->bd_bucket->hsb_head[0];
+ return &head[bd->bd_offset].dd_head;
+}
+
+static int
+cfs_hash_dd_hnode_add(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode)
+{
+ cfs_hash_dhead_dep_t *dh = container_of(cfs_hash_dd_hhead(hs, bd),
+ cfs_hash_dhead_dep_t, dd_head);
+
+ if (dh->dd_tail != NULL) /* not empty */
+ hlist_add_after(dh->dd_tail, hnode);
+ else /* empty list */
+ hlist_add_head(hnode, &dh->dd_head);
+ dh->dd_tail = hnode;
+ return ++dh->dd_depth;
+}
+
+static int
+cfs_hash_dd_hnode_del(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnd)
+{
+ cfs_hash_dhead_dep_t *dh = container_of(cfs_hash_dd_hhead(hs, bd),
+ cfs_hash_dhead_dep_t, dd_head);
+
+ if (hnd->next == NULL) { /* it's the tail */
+ dh->dd_tail = (hnd->pprev == &dh->dd_head.first) ? NULL :
+ container_of(hnd->pprev, struct hlist_node, next);
+ }
+ hlist_del_init(hnd);
+ return --dh->dd_depth;
+}
+
+static cfs_hash_hlist_ops_t cfs_hash_hh_hops = {
+ .hop_hhead = cfs_hash_hh_hhead,
+ .hop_hhead_size = cfs_hash_hh_hhead_size,
+ .hop_hnode_add = cfs_hash_hh_hnode_add,
+ .hop_hnode_del = cfs_hash_hh_hnode_del,
+};
+
+static cfs_hash_hlist_ops_t cfs_hash_hd_hops = {
+ .hop_hhead = cfs_hash_hd_hhead,
+ .hop_hhead_size = cfs_hash_hd_hhead_size,
+ .hop_hnode_add = cfs_hash_hd_hnode_add,
+ .hop_hnode_del = cfs_hash_hd_hnode_del,
+};
+
+static cfs_hash_hlist_ops_t cfs_hash_dh_hops = {
+ .hop_hhead = cfs_hash_dh_hhead,
+ .hop_hhead_size = cfs_hash_dh_hhead_size,
+ .hop_hnode_add = cfs_hash_dh_hnode_add,
+ .hop_hnode_del = cfs_hash_dh_hnode_del,
+};
+
+static cfs_hash_hlist_ops_t cfs_hash_dd_hops = {
+ .hop_hhead = cfs_hash_dd_hhead,
+ .hop_hhead_size = cfs_hash_dd_hhead_size,
+ .hop_hnode_add = cfs_hash_dd_hnode_add,
+ .hop_hnode_del = cfs_hash_dd_hnode_del,
+};
+
+static void
+cfs_hash_hlist_setup(cfs_hash_t *hs)
+{
+ if (cfs_hash_with_add_tail(hs)) {
+ hs->hs_hops = cfs_hash_with_depth(hs) ?
+ &cfs_hash_dd_hops : &cfs_hash_dh_hops;
+ } else {
+ hs->hs_hops = cfs_hash_with_depth(hs) ?
+ &cfs_hash_hd_hops : &cfs_hash_hh_hops;
+ }
+}
+
+static void
+cfs_hash_bd_from_key(cfs_hash_t *hs, cfs_hash_bucket_t **bkts,
+ unsigned int bits, const void *key, cfs_hash_bd_t *bd)
+{
+ unsigned int index = cfs_hash_id(hs, key, (1U << bits) - 1);
+
+ LASSERT(bits == hs->hs_cur_bits || bits == hs->hs_rehash_bits);
+
+ bd->bd_bucket = bkts[index & ((1U << (bits - hs->hs_bkt_bits)) - 1)];
+ bd->bd_offset = index >> (bits - hs->hs_bkt_bits);
+}
+
+void
+cfs_hash_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bd)
+{
+ /* NB: caller should hold hs->hs_rwlock if REHASH is set */
+ if (likely(hs->hs_rehash_buckets == NULL)) {
+ cfs_hash_bd_from_key(hs, hs->hs_buckets,
+ hs->hs_cur_bits, key, bd);
+ } else {
+ LASSERT(hs->hs_rehash_bits != 0);
+ cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
+ hs->hs_rehash_bits, key, bd);
+ }
+}
+EXPORT_SYMBOL(cfs_hash_bd_get);
+
+static inline void
+cfs_hash_bd_dep_record(cfs_hash_t *hs, cfs_hash_bd_t *bd, int dep_cur)
+{
+ if (likely(dep_cur <= bd->bd_bucket->hsb_depmax))
+ return;
+
+ bd->bd_bucket->hsb_depmax = dep_cur;
+# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
+ if (likely(warn_on_depth == 0 ||
+ max(warn_on_depth, hs->hs_dep_max) >= dep_cur))
+ return;
+
+ spin_lock(&hs->hs_dep_lock);
+ hs->hs_dep_max = dep_cur;
+ hs->hs_dep_bkt = bd->bd_bucket->hsb_index;
+ hs->hs_dep_off = bd->bd_offset;
+ hs->hs_dep_bits = hs->hs_cur_bits;
+ spin_unlock(&hs->hs_dep_lock);
+
+ cfs_wi_schedule(cfs_sched_rehash, &hs->hs_dep_wi);
+# endif
+}
+
+void
+cfs_hash_bd_add_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode)
+{
+ int rc;
+
+ rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode);
+ cfs_hash_bd_dep_record(hs, bd, rc);
+ bd->bd_bucket->hsb_version++;
+ if (unlikely(bd->bd_bucket->hsb_version == 0))
+ bd->bd_bucket->hsb_version++;
+ bd->bd_bucket->hsb_count++;
+
+ if (cfs_hash_with_counter(hs))
+ atomic_inc(&hs->hs_count);
+ if (!cfs_hash_with_no_itemref(hs))
+ cfs_hash_get(hs, hnode);
+}
+EXPORT_SYMBOL(cfs_hash_bd_add_locked);
+
+void
+cfs_hash_bd_del_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode)
+{
+ hs->hs_hops->hop_hnode_del(hs, bd, hnode);
+
+ LASSERT(bd->bd_bucket->hsb_count > 0);
+ bd->bd_bucket->hsb_count--;
+ bd->bd_bucket->hsb_version++;
+ if (unlikely(bd->bd_bucket->hsb_version == 0))
+ bd->bd_bucket->hsb_version++;
+
+ if (cfs_hash_with_counter(hs)) {
+ LASSERT(atomic_read(&hs->hs_count) > 0);
+ atomic_dec(&hs->hs_count);
+ }
+ if (!cfs_hash_with_no_itemref(hs))
+ cfs_hash_put_locked(hs, hnode);
+}
+EXPORT_SYMBOL(cfs_hash_bd_del_locked);
+
+void
+cfs_hash_bd_move_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd_old,
+ cfs_hash_bd_t *bd_new, struct hlist_node *hnode)
+{
+ cfs_hash_bucket_t *obkt = bd_old->bd_bucket;
+ cfs_hash_bucket_t *nbkt = bd_new->bd_bucket;
+ int rc;
+
+ if (cfs_hash_bd_compare(bd_old, bd_new) == 0)
+ return;
+
+ /* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops
+ * in cfs_hash_bd_del/add_locked */
+ hs->hs_hops->hop_hnode_del(hs, bd_old, hnode);
+ rc = hs->hs_hops->hop_hnode_add(hs, bd_new, hnode);
+ cfs_hash_bd_dep_record(hs, bd_new, rc);
+
+ LASSERT(obkt->hsb_count > 0);
+ obkt->hsb_count--;
+ obkt->hsb_version++;
+ if (unlikely(obkt->hsb_version == 0))
+ obkt->hsb_version++;
+ nbkt->hsb_count++;
+ nbkt->hsb_version++;
+ if (unlikely(nbkt->hsb_version == 0))
+ nbkt->hsb_version++;
+}
+EXPORT_SYMBOL(cfs_hash_bd_move_locked);
+
+enum {
+ /** always set, for sanity (avoid ZERO intent) */
+ CFS_HS_LOOKUP_MASK_FIND = 1 << 0,
+ /** return entry with a ref */
+ CFS_HS_LOOKUP_MASK_REF = 1 << 1,
+ /** add entry if not existing */
+ CFS_HS_LOOKUP_MASK_ADD = 1 << 2,
+ /** delete entry, ignore other masks */
+ CFS_HS_LOOKUP_MASK_DEL = 1 << 3,
+};
+
+typedef enum cfs_hash_lookup_intent {
+ /** return item w/o refcount */
+ CFS_HS_LOOKUP_IT_PEEK = CFS_HS_LOOKUP_MASK_FIND,
+ /** return item with refcount */
+ CFS_HS_LOOKUP_IT_FIND = (CFS_HS_LOOKUP_MASK_FIND |
+ CFS_HS_LOOKUP_MASK_REF),
+ /** return item w/o refcount if existed, otherwise add */
+ CFS_HS_LOOKUP_IT_ADD = (CFS_HS_LOOKUP_MASK_FIND |
+ CFS_HS_LOOKUP_MASK_ADD),
+ /** return item with refcount if existed, otherwise add */
+ CFS_HS_LOOKUP_IT_FINDADD = (CFS_HS_LOOKUP_IT_FIND |
+ CFS_HS_LOOKUP_MASK_ADD),
+ /** delete if existed */
+ CFS_HS_LOOKUP_IT_FINDDEL = (CFS_HS_LOOKUP_MASK_FIND |
+ CFS_HS_LOOKUP_MASK_DEL)
+} cfs_hash_lookup_intent_t;
+
+static struct hlist_node *
+cfs_hash_bd_lookup_intent(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ const void *key, struct hlist_node *hnode,
+ cfs_hash_lookup_intent_t intent)
+
+{
+ struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd);
+ struct hlist_node *ehnode;
+ struct hlist_node *match;
+ int intent_add = (intent & CFS_HS_LOOKUP_MASK_ADD) != 0;
+
+ /* with this function, we can avoid a lot of useless refcount ops,
+ * which are expensive atomic operations most time. */
+ match = intent_add ? NULL : hnode;
+ hlist_for_each(ehnode, hhead) {
+ if (!cfs_hash_keycmp(hs, key, ehnode))
+ continue;
+
+ if (match != NULL && match != ehnode) /* can't match */
+ continue;
+
+ /* match and ... */
+ if ((intent & CFS_HS_LOOKUP_MASK_DEL) != 0) {
+ cfs_hash_bd_del_locked(hs, bd, ehnode);
+ return ehnode;
+ }
+
+ /* caller wants refcount? */
+ if ((intent & CFS_HS_LOOKUP_MASK_REF) != 0)
+ cfs_hash_get(hs, ehnode);
+ return ehnode;
+ }
+ /* no match item */
+ if (!intent_add)
+ return NULL;
+
+ LASSERT(hnode != NULL);
+ cfs_hash_bd_add_locked(hs, bd, hnode);
+ return hnode;
+}
+
+struct hlist_node *
+cfs_hash_bd_lookup_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd, const void *key)
+{
+ return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
+ CFS_HS_LOOKUP_IT_FIND);
+}
+EXPORT_SYMBOL(cfs_hash_bd_lookup_locked);
+
+struct hlist_node *
+cfs_hash_bd_peek_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd, const void *key)
+{
+ return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
+ CFS_HS_LOOKUP_IT_PEEK);
+}
+EXPORT_SYMBOL(cfs_hash_bd_peek_locked);
+
+struct hlist_node *
+cfs_hash_bd_findadd_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ const void *key, struct hlist_node *hnode,
+ int noref)
+{
+ return cfs_hash_bd_lookup_intent(hs, bd, key, hnode,
+ CFS_HS_LOOKUP_IT_ADD |
+ (!noref * CFS_HS_LOOKUP_MASK_REF));
+}
+EXPORT_SYMBOL(cfs_hash_bd_findadd_locked);
+
+struct hlist_node *
+cfs_hash_bd_finddel_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ const void *key, struct hlist_node *hnode)
+{
+ /* hnode can be NULL, we find the first item with @key */
+ return cfs_hash_bd_lookup_intent(hs, bd, key, hnode,
+ CFS_HS_LOOKUP_IT_FINDDEL);
+}
+EXPORT_SYMBOL(cfs_hash_bd_finddel_locked);
+
+static void
+cfs_hash_multi_bd_lock(cfs_hash_t *hs, cfs_hash_bd_t *bds,
+ unsigned n, int excl)
+{
+ cfs_hash_bucket_t *prev = NULL;
+ int i;
+
+ /**
+ * bds must be ascendantly ordered by bd->bd_bucket->hsb_index.
+ * NB: it's possible that several bds point to the same bucket but
+ * have different bd::bd_offset, so need take care of deadlock.
+ */
+ cfs_hash_for_each_bd(bds, n, i) {
+ if (prev == bds[i].bd_bucket)
+ continue;
+
+ LASSERT(prev == NULL ||
+ prev->hsb_index < bds[i].bd_bucket->hsb_index);
+ cfs_hash_bd_lock(hs, &bds[i], excl);
+ prev = bds[i].bd_bucket;
+ }
+}
+
+static void
+cfs_hash_multi_bd_unlock(cfs_hash_t *hs, cfs_hash_bd_t *bds,
+ unsigned n, int excl)
+{
+ cfs_hash_bucket_t *prev = NULL;
+ int i;
+
+ cfs_hash_for_each_bd(bds, n, i) {
+ if (prev != bds[i].bd_bucket) {
+ cfs_hash_bd_unlock(hs, &bds[i], excl);
+ prev = bds[i].bd_bucket;
+ }
+ }
+}
+
+static struct hlist_node *
+cfs_hash_multi_bd_lookup_locked(cfs_hash_t *hs, cfs_hash_bd_t *bds,
+ unsigned n, const void *key)
+{
+ struct hlist_node *ehnode;
+ unsigned i;
+
+ cfs_hash_for_each_bd(bds, n, i) {
+ ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL,
+ CFS_HS_LOOKUP_IT_FIND);
+ if (ehnode != NULL)
+ return ehnode;
+ }
+ return NULL;
+}
+
+static struct hlist_node *
+cfs_hash_multi_bd_findadd_locked(cfs_hash_t *hs,
+ cfs_hash_bd_t *bds, unsigned n, const void *key,
+ struct hlist_node *hnode, int noref)
+{
+ struct hlist_node *ehnode;
+ int intent;
+ unsigned i;
+
+ LASSERT(hnode != NULL);
+ intent = CFS_HS_LOOKUP_IT_PEEK | (!noref * CFS_HS_LOOKUP_MASK_REF);
+
+ cfs_hash_for_each_bd(bds, n, i) {
+ ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key,
+ NULL, intent);
+ if (ehnode != NULL)
+ return ehnode;
+ }
+
+ if (i == 1) { /* only one bucket */
+ cfs_hash_bd_add_locked(hs, &bds[0], hnode);
+ } else {
+ cfs_hash_bd_t mybd;
+
+ cfs_hash_bd_get(hs, key, &mybd);
+ cfs_hash_bd_add_locked(hs, &mybd, hnode);
+ }
+
+ return hnode;
+}
+
+static struct hlist_node *
+cfs_hash_multi_bd_finddel_locked(cfs_hash_t *hs, cfs_hash_bd_t *bds,
+ unsigned n, const void *key,
+ struct hlist_node *hnode)
+{
+ struct hlist_node *ehnode;
+ unsigned i;
+
+ cfs_hash_for_each_bd(bds, n, i) {
+ ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, hnode,
+ CFS_HS_LOOKUP_IT_FINDDEL);
+ if (ehnode != NULL)
+ return ehnode;
+ }
+ return NULL;
+}
+
+static void
+cfs_hash_bd_order(cfs_hash_bd_t *bd1, cfs_hash_bd_t *bd2)
+{
+ int rc;
+
+ if (bd2->bd_bucket == NULL)
+ return;
+
+ if (bd1->bd_bucket == NULL) {
+ *bd1 = *bd2;
+ bd2->bd_bucket = NULL;
+ return;
+ }
+
+ rc = cfs_hash_bd_compare(bd1, bd2);
+ if (rc == 0) {
+ bd2->bd_bucket = NULL;
+
+ } else if (rc > 0) { /* swab bd1 and bd2 */
+ cfs_hash_bd_t tmp;
+
+ tmp = *bd2;
+ *bd2 = *bd1;
+ *bd1 = tmp;
+ }
+}
+
+void
+cfs_hash_dual_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bds)
+{
+ /* NB: caller should hold hs_lock.rw if REHASH is set */
+ cfs_hash_bd_from_key(hs, hs->hs_buckets,
+ hs->hs_cur_bits, key, &bds[0]);
+ if (likely(hs->hs_rehash_buckets == NULL)) {
+ /* no rehash or not rehashing */
+ bds[1].bd_bucket = NULL;
+ return;
+ }
+
+ LASSERT(hs->hs_rehash_bits != 0);
+ cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
+ hs->hs_rehash_bits, key, &bds[1]);
+
+ cfs_hash_bd_order(&bds[0], &bds[1]);
+}
+EXPORT_SYMBOL(cfs_hash_dual_bd_get);
+
+void
+cfs_hash_dual_bd_lock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl)
+{
+ cfs_hash_multi_bd_lock(hs, bds, 2, excl);
+}
+EXPORT_SYMBOL(cfs_hash_dual_bd_lock);
+
+void
+cfs_hash_dual_bd_unlock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl)
+{
+ cfs_hash_multi_bd_unlock(hs, bds, 2, excl);
+}
+EXPORT_SYMBOL(cfs_hash_dual_bd_unlock);
+
+struct hlist_node *
+cfs_hash_dual_bd_lookup_locked(cfs_hash_t *hs, cfs_hash_bd_t *bds,
+ const void *key)
+{
+ return cfs_hash_multi_bd_lookup_locked(hs, bds, 2, key);
+}
+EXPORT_SYMBOL(cfs_hash_dual_bd_lookup_locked);
+
+struct hlist_node *
+cfs_hash_dual_bd_findadd_locked(cfs_hash_t *hs, cfs_hash_bd_t *bds,
+ const void *key, struct hlist_node *hnode,
+ int noref)
+{
+ return cfs_hash_multi_bd_findadd_locked(hs, bds, 2, key,
+ hnode, noref);
+}
+EXPORT_SYMBOL(cfs_hash_dual_bd_findadd_locked);
+
+struct hlist_node *
+cfs_hash_dual_bd_finddel_locked(cfs_hash_t *hs, cfs_hash_bd_t *bds,
+ const void *key, struct hlist_node *hnode)
+{
+ return cfs_hash_multi_bd_finddel_locked(hs, bds, 2, key, hnode);
+}
+EXPORT_SYMBOL(cfs_hash_dual_bd_finddel_locked);
+
+static void
+cfs_hash_buckets_free(cfs_hash_bucket_t **buckets,
+ int bkt_size, int prev_size, int size)
+{
+ int i;
+
+ for (i = prev_size; i < size; i++) {
+ if (buckets[i] != NULL)
+ LIBCFS_FREE(buckets[i], bkt_size);
+ }
+
+ LIBCFS_FREE(buckets, sizeof(buckets[0]) * size);
+}
+
+/*
+ * Create or grow bucket memory. Return old_buckets if no allocation was
+ * needed, the newly allocated buckets if allocation was needed and
+ * successful, and NULL on error.
+ */
+static cfs_hash_bucket_t **
+cfs_hash_buckets_realloc(cfs_hash_t *hs, cfs_hash_bucket_t **old_bkts,
+ unsigned int old_size, unsigned int new_size)
+{
+ cfs_hash_bucket_t **new_bkts;
+ int i;
+
+ LASSERT(old_size == 0 || old_bkts != NULL);
+
+ if (old_bkts != NULL && old_size == new_size)
+ return old_bkts;
+
+ LIBCFS_ALLOC(new_bkts, sizeof(new_bkts[0]) * new_size);
+ if (new_bkts == NULL)
+ return NULL;
+
+ if (old_bkts != NULL) {
+ memcpy(new_bkts, old_bkts,
+ min(old_size, new_size) * sizeof(*old_bkts));
+ }
+
+ for (i = old_size; i < new_size; i++) {
+ struct hlist_head *hhead;
+ cfs_hash_bd_t bd;
+
+ LIBCFS_ALLOC(new_bkts[i], cfs_hash_bkt_size(hs));
+ if (new_bkts[i] == NULL) {
+ cfs_hash_buckets_free(new_bkts, cfs_hash_bkt_size(hs),
+ old_size, new_size);
+ return NULL;
+ }
+
+ new_bkts[i]->hsb_index = i;
+ new_bkts[i]->hsb_version = 1; /* shouldn't be zero */
+ new_bkts[i]->hsb_depmax = -1; /* unknown */
+ bd.bd_bucket = new_bkts[i];
+ cfs_hash_bd_for_each_hlist(hs, &bd, hhead)
+ INIT_HLIST_HEAD(hhead);
+
+ if (cfs_hash_with_no_lock(hs) ||
+ cfs_hash_with_no_bktlock(hs))
+ continue;
+
+ if (cfs_hash_with_rw_bktlock(hs))
+ rwlock_init(&new_bkts[i]->hsb_lock.rw);
+ else if (cfs_hash_with_spin_bktlock(hs))
+ spin_lock_init(&new_bkts[i]->hsb_lock.spin);
+ else
+ LBUG(); /* invalid use-case */
+ }
+ return new_bkts;
+}
+
+/**
+ * Initialize new libcfs hash, where:
+ * @name - Descriptive hash name
+ * @cur_bits - Initial hash table size, in bits
+ * @max_bits - Maximum allowed hash table resize, in bits
+ * @ops - Registered hash table operations
+ * @flags - CFS_HASH_REHASH enable synamic hash resizing
+ * - CFS_HASH_SORT enable chained hash sort
+ */
+static int cfs_hash_rehash_worker(cfs_workitem_t *wi);
+
+#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
+static int cfs_hash_dep_print(cfs_workitem_t *wi)
+{
+ cfs_hash_t *hs = container_of(wi, cfs_hash_t, hs_dep_wi);
+ int dep;
+ int bkt;
+ int off;
+ int bits;
+
+ spin_lock(&hs->hs_dep_lock);
+ dep = hs->hs_dep_max;
+ bkt = hs->hs_dep_bkt;
+ off = hs->hs_dep_off;
+ bits = hs->hs_dep_bits;
+ spin_unlock(&hs->hs_dep_lock);
+
+ LCONSOLE_WARN("#### HASH %s (bits: %d): max depth %d at bucket %d/%d\n",
+ hs->hs_name, bits, dep, bkt, off);
+ spin_lock(&hs->hs_dep_lock);
+ hs->hs_dep_bits = 0; /* mark as workitem done */
+ spin_unlock(&hs->hs_dep_lock);
+ return 0;
+}
+
+static void cfs_hash_depth_wi_init(cfs_hash_t *hs)
+{
+ spin_lock_init(&hs->hs_dep_lock);
+ cfs_wi_init(&hs->hs_dep_wi, hs, cfs_hash_dep_print);
+}
+
+static void cfs_hash_depth_wi_cancel(cfs_hash_t *hs)
+{
+ if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_dep_wi))
+ return;
+
+ spin_lock(&hs->hs_dep_lock);
+ while (hs->hs_dep_bits != 0) {
+ spin_unlock(&hs->hs_dep_lock);
+ cond_resched();
+ spin_lock(&hs->hs_dep_lock);
+ }
+ spin_unlock(&hs->hs_dep_lock);
+}
+
+#else /* CFS_HASH_DEBUG_LEVEL < CFS_HASH_DEBUG_1 */
+
+static inline void cfs_hash_depth_wi_init(cfs_hash_t *hs) {}
+static inline void cfs_hash_depth_wi_cancel(cfs_hash_t *hs) {}
+
+#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */
+
+cfs_hash_t *
+cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits,
+ unsigned bkt_bits, unsigned extra_bytes,
+ unsigned min_theta, unsigned max_theta,
+ cfs_hash_ops_t *ops, unsigned flags)
+{
+ cfs_hash_t *hs;
+ int len;
+
+ ENTRY;
+
+ CLASSERT(CFS_HASH_THETA_BITS < 15);
+
+ LASSERT(name != NULL);
+ LASSERT(ops != NULL);
+ LASSERT(ops->hs_key);
+ LASSERT(ops->hs_hash);
+ LASSERT(ops->hs_object);
+ LASSERT(ops->hs_keycmp);
+ LASSERT(ops->hs_get != NULL);
+ LASSERT(ops->hs_put_locked != NULL);
+
+ if ((flags & CFS_HASH_REHASH) != 0)
+ flags |= CFS_HASH_COUNTER; /* must have counter */
+
+ LASSERT(cur_bits > 0);
+ LASSERT(cur_bits >= bkt_bits);
+ LASSERT(max_bits >= cur_bits && max_bits < 31);
+ LASSERT(ergo((flags & CFS_HASH_REHASH) == 0, cur_bits == max_bits));
+ LASSERT(ergo((flags & CFS_HASH_REHASH) != 0,
+ (flags & CFS_HASH_NO_LOCK) == 0));
+ LASSERT(ergo((flags & CFS_HASH_REHASH_KEY) != 0,
+ ops->hs_keycpy != NULL));
+
+ len = (flags & CFS_HASH_BIGNAME) == 0 ?
+ CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN;
+ LIBCFS_ALLOC(hs, offsetof(cfs_hash_t, hs_name[len]));
+ if (hs == NULL)
+ RETURN(NULL);
+
+ strncpy(hs->hs_name, name, len);
+ hs->hs_name[len - 1] = '\0';
+ hs->hs_flags = flags;
+
+ atomic_set(&hs->hs_refcount, 1);
+ atomic_set(&hs->hs_count, 0);
+
+ cfs_hash_lock_setup(hs);
+ cfs_hash_hlist_setup(hs);
+
+ hs->hs_cur_bits = (__u8)cur_bits;
+ hs->hs_min_bits = (__u8)cur_bits;
+ hs->hs_max_bits = (__u8)max_bits;
+ hs->hs_bkt_bits = (__u8)bkt_bits;
+
+ hs->hs_ops = ops;
+ hs->hs_extra_bytes = extra_bytes;
+ hs->hs_rehash_bits = 0;
+ cfs_wi_init(&hs->hs_rehash_wi, hs, cfs_hash_rehash_worker);
+ cfs_hash_depth_wi_init(hs);
+
+ if (cfs_hash_with_rehash(hs))
+ __cfs_hash_set_theta(hs, min_theta, max_theta);
+
+ hs->hs_buckets = cfs_hash_buckets_realloc(hs, NULL, 0,
+ CFS_HASH_NBKT(hs));
+ if (hs->hs_buckets != NULL)
+ return hs;
+
+ LIBCFS_FREE(hs, offsetof(cfs_hash_t, hs_name[len]));
+ RETURN(NULL);
+}
+EXPORT_SYMBOL(cfs_hash_create);
+
+/**
+ * Cleanup libcfs hash @hs.
+ */
+static void
+cfs_hash_destroy(cfs_hash_t *hs)
+{
+ struct hlist_node *hnode;
+ struct hlist_node *pos;
+ cfs_hash_bd_t bd;
+ int i;
+ ENTRY;
+
+ LASSERT(hs != NULL);
+ LASSERT(!cfs_hash_is_exiting(hs) &&
+ !cfs_hash_is_iterating(hs));
+
+ /**
+ * prohibit further rehashes, don't need any lock because
+ * I'm the only (last) one can change it.
+ */
+ hs->hs_exiting = 1;
+ if (cfs_hash_with_rehash(hs))
+ cfs_hash_rehash_cancel(hs);
+
+ cfs_hash_depth_wi_cancel(hs);
+ /* rehash should be done/canceled */
+ LASSERT(hs->hs_buckets != NULL &&
+ hs->hs_rehash_buckets == NULL);
+
+ cfs_hash_for_each_bucket(hs, &bd, i) {
+ struct hlist_head *hhead;
+
+ LASSERT(bd.bd_bucket != NULL);
+ /* no need to take this lock, just for consistent code */
+ cfs_hash_bd_lock(hs, &bd, 1);
+
+ cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
+ hlist_for_each_safe(hnode, pos, hhead) {
+ LASSERTF(!cfs_hash_with_assert_empty(hs),
+ "hash %s bucket %u(%u) is not "
+ " empty: %u items left\n",
+ hs->hs_name, bd.bd_bucket->hsb_index,
+ bd.bd_offset, bd.bd_bucket->hsb_count);
+ /* can't assert key valicate, because we
+ * can interrupt rehash */
+ cfs_hash_bd_del_locked(hs, &bd, hnode);
+ cfs_hash_exit(hs, hnode);
+ }
+ }
+ LASSERT(bd.bd_bucket->hsb_count == 0);
+ cfs_hash_bd_unlock(hs, &bd, 1);
+ cond_resched();
+ }
+
+ LASSERT(atomic_read(&hs->hs_count) == 0);
+
+ cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs),
+ 0, CFS_HASH_NBKT(hs));
+ i = cfs_hash_with_bigname(hs) ?
+ CFS_HASH_BIGNAME_LEN : CFS_HASH_NAME_LEN;
+ LIBCFS_FREE(hs, offsetof(cfs_hash_t, hs_name[i]));
+
+ EXIT;
+}
+
+cfs_hash_t *cfs_hash_getref(cfs_hash_t *hs)
+{
+ if (atomic_inc_not_zero(&hs->hs_refcount))
+ return hs;
+ return NULL;
+}
+EXPORT_SYMBOL(cfs_hash_getref);
+
+void cfs_hash_putref(cfs_hash_t *hs)
+{
+ if (atomic_dec_and_test(&hs->hs_refcount))
+ cfs_hash_destroy(hs);
+}
+EXPORT_SYMBOL(cfs_hash_putref);
+
+static inline int
+cfs_hash_rehash_bits(cfs_hash_t *hs)
+{
+ if (cfs_hash_with_no_lock(hs) ||
+ !cfs_hash_with_rehash(hs))
+ return -EOPNOTSUPP;
+
+ if (unlikely(cfs_hash_is_exiting(hs)))
+ return -ESRCH;
+
+ if (unlikely(cfs_hash_is_rehashing(hs)))
+ return -EALREADY;
+
+ if (unlikely(cfs_hash_is_iterating(hs)))
+ return -EAGAIN;
+
+ /* XXX: need to handle case with max_theta != 2.0
+ * and the case with min_theta != 0.5 */
+ if ((hs->hs_cur_bits < hs->hs_max_bits) &&
+ (__cfs_hash_theta(hs) > hs->hs_max_theta))
+ return hs->hs_cur_bits + 1;
+
+ if (!cfs_hash_with_shrink(hs))
+ return 0;
+
+ if ((hs->hs_cur_bits > hs->hs_min_bits) &&
+ (__cfs_hash_theta(hs) < hs->hs_min_theta))
+ return hs->hs_cur_bits - 1;
+
+ return 0;
+}
+
+/**
+ * don't allow inline rehash if:
+ * - user wants non-blocking change (add/del) on hash table
+ * - too many elements
+ */
+static inline int
+cfs_hash_rehash_inline(cfs_hash_t *hs)
+{
+ return !cfs_hash_with_nblk_change(hs) &&
+ atomic_read(&hs->hs_count) < CFS_HASH_LOOP_HOG;
+}
+
+/**
+ * Add item @hnode to libcfs hash @hs using @key. The registered
+ * ops->hs_get function will be called when the item is added.
+ */
+void
+cfs_hash_add(cfs_hash_t *hs, const void *key, struct hlist_node *hnode)
+{
+ cfs_hash_bd_t bd;
+ int bits;
+
+ LASSERT(hlist_unhashed(hnode));
+
+ cfs_hash_lock(hs, 0);
+ cfs_hash_bd_get_and_lock(hs, key, &bd, 1);
+
+ cfs_hash_key_validate(hs, key, hnode);
+ cfs_hash_bd_add_locked(hs, &bd, hnode);
+
+ cfs_hash_bd_unlock(hs, &bd, 1);
+
+ bits = cfs_hash_rehash_bits(hs);
+ cfs_hash_unlock(hs, 0);
+ if (bits > 0)
+ cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
+}
+EXPORT_SYMBOL(cfs_hash_add);
+
+static struct hlist_node *
+cfs_hash_find_or_add(cfs_hash_t *hs, const void *key,
+ struct hlist_node *hnode, int noref)
+{
+ struct hlist_node *ehnode;
+ cfs_hash_bd_t bds[2];
+ int bits = 0;
+
+ LASSERT(hlist_unhashed(hnode));
+
+ cfs_hash_lock(hs, 0);
+ cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
+
+ cfs_hash_key_validate(hs, key, hnode);
+ ehnode = cfs_hash_dual_bd_findadd_locked(hs, bds, key,
+ hnode, noref);
+ cfs_hash_dual_bd_unlock(hs, bds, 1);
+
+ if (ehnode == hnode) /* new item added */
+ bits = cfs_hash_rehash_bits(hs);
+ cfs_hash_unlock(hs, 0);
+ if (bits > 0)
+ cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
+
+ return ehnode;
+}
+
+/**
+ * Add item @hnode to libcfs hash @hs using @key. The registered
+ * ops->hs_get function will be called if the item was added.
+ * Returns 0 on success or -EALREADY on key collisions.
+ */
+int
+cfs_hash_add_unique(cfs_hash_t *hs, const void *key, struct hlist_node *hnode)
+{
+ return cfs_hash_find_or_add(hs, key, hnode, 1) != hnode ?
+ -EALREADY : 0;
+}
+EXPORT_SYMBOL(cfs_hash_add_unique);
+
+/**
+ * Add item @hnode to libcfs hash @hs using @key. If this @key
+ * already exists in the hash then ops->hs_get will be called on the
+ * conflicting entry and that entry will be returned to the caller.
+ * Otherwise ops->hs_get is called on the item which was added.
+ */
+void *
+cfs_hash_findadd_unique(cfs_hash_t *hs, const void *key,
+ struct hlist_node *hnode)
+{
+ hnode = cfs_hash_find_or_add(hs, key, hnode, 0);
+
+ return cfs_hash_object(hs, hnode);
+}
+EXPORT_SYMBOL(cfs_hash_findadd_unique);
+
+/**
+ * Delete item @hnode from the libcfs hash @hs using @key. The @key
+ * is required to ensure the correct hash bucket is locked since there
+ * is no direct linkage from the item to the bucket. The object
+ * removed from the hash will be returned and obs->hs_put is called
+ * on the removed object.
+ */
+void *
+cfs_hash_del(cfs_hash_t *hs, const void *key, struct hlist_node *hnode)
+{
+ void *obj = NULL;
+ int bits = 0;
+ cfs_hash_bd_t bds[2];
+
+ cfs_hash_lock(hs, 0);
+ cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
+
+ /* NB: do nothing if @hnode is not in hash table */
+ if (hnode == NULL || !hlist_unhashed(hnode)) {
+ if (bds[1].bd_bucket == NULL && hnode != NULL) {
+ cfs_hash_bd_del_locked(hs, &bds[0], hnode);
+ } else {
+ hnode = cfs_hash_dual_bd_finddel_locked(hs, bds,
+ key, hnode);
+ }
+ }
+
+ if (hnode != NULL) {
+ obj = cfs_hash_object(hs, hnode);
+ bits = cfs_hash_rehash_bits(hs);
+ }
+
+ cfs_hash_dual_bd_unlock(hs, bds, 1);
+ cfs_hash_unlock(hs, 0);
+ if (bits > 0)
+ cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
+
+ return obj;
+}
+EXPORT_SYMBOL(cfs_hash_del);
+
+/**
+ * Delete item given @key in libcfs hash @hs. The first @key found in
+ * the hash will be removed, if the key exists multiple times in the hash
+ * @hs this function must be called once per key. The removed object
+ * will be returned and ops->hs_put is called on the removed object.
+ */
+void *
+cfs_hash_del_key(cfs_hash_t *hs, const void *key)
+{
+ return cfs_hash_del(hs, key, NULL);
+}
+EXPORT_SYMBOL(cfs_hash_del_key);
+
+/**
+ * Lookup an item using @key in the libcfs hash @hs and return it.
+ * If the @key is found in the hash hs->hs_get() is called and the
+ * matching objects is returned. It is the callers responsibility
+ * to call the counterpart ops->hs_put using the cfs_hash_put() macro
+ * when when finished with the object. If the @key was not found
+ * in the hash @hs NULL is returned.
+ */
+void *
+cfs_hash_lookup(cfs_hash_t *hs, const void *key)
+{
+ void *obj = NULL;
+ struct hlist_node *hnode;
+ cfs_hash_bd_t bds[2];
+
+ cfs_hash_lock(hs, 0);
+ cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
+
+ hnode = cfs_hash_dual_bd_lookup_locked(hs, bds, key);
+ if (hnode != NULL)
+ obj = cfs_hash_object(hs, hnode);
+
+ cfs_hash_dual_bd_unlock(hs, bds, 0);
+ cfs_hash_unlock(hs, 0);
+
+ return obj;
+}
+EXPORT_SYMBOL(cfs_hash_lookup);
+
+static void
+cfs_hash_for_each_enter(cfs_hash_t *hs)
+{
+ LASSERT(!cfs_hash_is_exiting(hs));
+
+ if (!cfs_hash_with_rehash(hs))
+ return;
+ /*
+ * NB: it's race on cfs_has_t::hs_iterating, but doesn't matter
+ * because it's just an unreliable signal to rehash-thread,
+ * rehash-thread will try to finsih rehash ASAP when seeing this.
+ */
+ hs->hs_iterating = 1;
+
+ cfs_hash_lock(hs, 1);
+ hs->hs_iterators++;
+
+ /* NB: iteration is mostly called by service thread,
+ * we tend to cancel pending rehash-requst, instead of
+ * blocking service thread, we will relaunch rehash request
+ * after iteration */
+ if (cfs_hash_is_rehashing(hs))
+ cfs_hash_rehash_cancel_locked(hs);
+ cfs_hash_unlock(hs, 1);
+}
+
+static void
+cfs_hash_for_each_exit(cfs_hash_t *hs)
+{
+ int remained;
+ int bits;
+
+ if (!cfs_hash_with_rehash(hs))
+ return;
+ cfs_hash_lock(hs, 1);
+ remained = --hs->hs_iterators;
+ bits = cfs_hash_rehash_bits(hs);
+ cfs_hash_unlock(hs, 1);
+ /* NB: it's race on cfs_has_t::hs_iterating, see above */
+ if (remained == 0)
+ hs->hs_iterating = 0;
+ if (bits > 0) {
+ cfs_hash_rehash(hs, atomic_read(&hs->hs_count) <
+ CFS_HASH_LOOP_HOG);
+ }
+}
+
+/**
+ * For each item in the libcfs hash @hs call the passed callback @func
+ * and pass to it as an argument each hash item and the private @data.
+ *
+ * a) the function may sleep!
+ * b) during the callback:
+ * . the bucket lock is held so the callback must never sleep.
+ * . if @removal_safe is true, use can remove current item by
+ * cfs_hash_bd_del_locked
+ */
+static __u64
+cfs_hash_for_each_tight(cfs_hash_t *hs, cfs_hash_for_each_cb_t func,
+ void *data, int remove_safe)
+{
+ struct hlist_node *hnode;
+ struct hlist_node *pos;
+ cfs_hash_bd_t bd;
+ __u64 count = 0;
+ int excl = !!remove_safe;
+ int loop = 0;
+ int i;
+ ENTRY;
+
+ cfs_hash_for_each_enter(hs);
+
+ cfs_hash_lock(hs, 0);
+ LASSERT(!cfs_hash_is_rehashing(hs));
+
+ cfs_hash_for_each_bucket(hs, &bd, i) {
+ struct hlist_head *hhead;
+
+ cfs_hash_bd_lock(hs, &bd, excl);
+ if (func == NULL) { /* only glimpse size */
+ count += bd.bd_bucket->hsb_count;
+ cfs_hash_bd_unlock(hs, &bd, excl);
+ continue;
+ }
+
+ cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
+ hlist_for_each_safe(hnode, pos, hhead) {
+ cfs_hash_bucket_validate(hs, &bd, hnode);
+ count++;
+ loop++;
+ if (func(hs, &bd, hnode, data)) {
+ cfs_hash_bd_unlock(hs, &bd, excl);
+ goto out;
+ }
+ }
+ }
+ cfs_hash_bd_unlock(hs, &bd, excl);
+ if (loop < CFS_HASH_LOOP_HOG)
+ continue;
+ loop = 0;
+ cfs_hash_unlock(hs, 0);
+ cond_resched();
+ cfs_hash_lock(hs, 0);
+ }
+ out:
+ cfs_hash_unlock(hs, 0);
+
+ cfs_hash_for_each_exit(hs);
+ RETURN(count);
+}
+
+typedef struct {
+ cfs_hash_cond_opt_cb_t func;
+ void *arg;
+} cfs_hash_cond_arg_t;
+
+static int
+cfs_hash_cond_del_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *data)
+{
+ cfs_hash_cond_arg_t *cond = data;
+
+ if (cond->func(cfs_hash_object(hs, hnode), cond->arg))
+ cfs_hash_bd_del_locked(hs, bd, hnode);
+ return 0;
+}
+
+/**
+ * Delete item from the libcfs hash @hs when @func return true.
+ * The write lock being hold during loop for each bucket to avoid
+ * any object be reference.
+ */
+void
+cfs_hash_cond_del(cfs_hash_t *hs, cfs_hash_cond_opt_cb_t func, void *data)
+{
+ cfs_hash_cond_arg_t arg = {
+ .func = func,
+ .arg = data,
+ };
+
+ cfs_hash_for_each_tight(hs, cfs_hash_cond_del_locked, &arg, 1);
+}
+EXPORT_SYMBOL(cfs_hash_cond_del);
+
+void
+cfs_hash_for_each(cfs_hash_t *hs,
+ cfs_hash_for_each_cb_t func, void *data)
+{
+ cfs_hash_for_each_tight(hs, func, data, 0);
+}
+EXPORT_SYMBOL(cfs_hash_for_each);
+
+void
+cfs_hash_for_each_safe(cfs_hash_t *hs,
+ cfs_hash_for_each_cb_t func, void *data)
+{
+ cfs_hash_for_each_tight(hs, func, data, 1);
+}
+EXPORT_SYMBOL(cfs_hash_for_each_safe);
+
+static int
+cfs_hash_peek(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *data)
+{
+ *(int *)data = 0;
+ return 1; /* return 1 to break the loop */
+}
+
+int
+cfs_hash_is_empty(cfs_hash_t *hs)
+{
+ int empty = 1;
+
+ cfs_hash_for_each_tight(hs, cfs_hash_peek, &empty, 0);
+ return empty;
+}
+EXPORT_SYMBOL(cfs_hash_is_empty);
+
+__u64
+cfs_hash_size_get(cfs_hash_t *hs)
+{
+ return cfs_hash_with_counter(hs) ?
+ atomic_read(&hs->hs_count) :
+ cfs_hash_for_each_tight(hs, NULL, NULL, 0);
+}
+EXPORT_SYMBOL(cfs_hash_size_get);
+
+/*
+ * cfs_hash_for_each_relax:
+ * Iterate the hash table and call @func on each item without
+ * any lock. This function can't guarantee to finish iteration
+ * if these features are enabled:
+ *
+ * a. if rehash_key is enabled, an item can be moved from
+ * one bucket to another bucket
+ * b. user can remove non-zero-ref item from hash-table,
+ * so the item can be removed from hash-table, even worse,
+ * it's possible that user changed key and insert to another
+ * hash bucket.
+ * there's no way for us to finish iteration correctly on previous
+ * two cases, so iteration has to be stopped on change.
+ */
+static int
+cfs_hash_for_each_relax(cfs_hash_t *hs, cfs_hash_for_each_cb_t func, void *data)
+{
+ struct hlist_node *hnode;
+ struct hlist_node *tmp;
+ cfs_hash_bd_t bd;
+ __u32 version;
+ int count = 0;
+ int stop_on_change;
+ int rc;
+ int i;
+ ENTRY;
+
+ stop_on_change = cfs_hash_with_rehash_key(hs) ||
+ !cfs_hash_with_no_itemref(hs) ||
+ CFS_HOP(hs, put_locked) == NULL;
+ cfs_hash_lock(hs, 0);
+ LASSERT(!cfs_hash_is_rehashing(hs));
+
+ cfs_hash_for_each_bucket(hs, &bd, i) {
+ struct hlist_head *hhead;
+
+ cfs_hash_bd_lock(hs, &bd, 0);
+ version = cfs_hash_bd_version_get(&bd);
+
+ cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
+ for (hnode = hhead->first; hnode != NULL;) {
+ cfs_hash_bucket_validate(hs, &bd, hnode);
+ cfs_hash_get(hs, hnode);
+ cfs_hash_bd_unlock(hs, &bd, 0);
+ cfs_hash_unlock(hs, 0);
+
+ rc = func(hs, &bd, hnode, data);
+ if (stop_on_change)
+ cfs_hash_put(hs, hnode);
+ cond_resched();
+ count++;
+
+ cfs_hash_lock(hs, 0);
+ cfs_hash_bd_lock(hs, &bd, 0);
+ if (!stop_on_change) {
+ tmp = hnode->next;
+ cfs_hash_put_locked(hs, hnode);
+ hnode = tmp;
+ } else { /* bucket changed? */
+ if (version !=
+ cfs_hash_bd_version_get(&bd))
+ break;
+ /* safe to continue because no change */
+ hnode = hnode->next;
+ }
+ if (rc) /* callback wants to break iteration */
+ break;
+ }
+ }
+ cfs_hash_bd_unlock(hs, &bd, 0);
+ }
+ cfs_hash_unlock(hs, 0);
+
+ return count;
+}
+
+int
+cfs_hash_for_each_nolock(cfs_hash_t *hs,
+ cfs_hash_for_each_cb_t func, void *data)
+{
+ ENTRY;
+
+ if (cfs_hash_with_no_lock(hs) ||
+ cfs_hash_with_rehash_key(hs) ||
+ !cfs_hash_with_no_itemref(hs))
+ RETURN(-EOPNOTSUPP);
+
+ if (CFS_HOP(hs, get) == NULL ||
+ (CFS_HOP(hs, put) == NULL &&
+ CFS_HOP(hs, put_locked) == NULL))
+ RETURN(-EOPNOTSUPP);
+
+ cfs_hash_for_each_enter(hs);
+ cfs_hash_for_each_relax(hs, func, data);
+ cfs_hash_for_each_exit(hs);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(cfs_hash_for_each_nolock);
+
+/**
+ * For each hash bucket in the libcfs hash @hs call the passed callback
+ * @func until all the hash buckets are empty. The passed callback @func
+ * or the previously registered callback hs->hs_put must remove the item
+ * from the hash. You may either use the cfs_hash_del() or hlist_del()
+ * functions. No rwlocks will be held during the callback @func it is
+ * safe to sleep if needed. This function will not terminate until the
+ * hash is empty. Note it is still possible to concurrently add new
+ * items in to the hash. It is the callers responsibility to ensure
+ * the required locking is in place to prevent concurrent insertions.
+ */
+int
+cfs_hash_for_each_empty(cfs_hash_t *hs,
+ cfs_hash_for_each_cb_t func, void *data)
+{
+ unsigned i = 0;
+ ENTRY;
+
+ if (cfs_hash_with_no_lock(hs))
+ return -EOPNOTSUPP;
+
+ if (CFS_HOP(hs, get) == NULL ||
+ (CFS_HOP(hs, put) == NULL &&
+ CFS_HOP(hs, put_locked) == NULL))
+ return -EOPNOTSUPP;
+
+ cfs_hash_for_each_enter(hs);
+ while (cfs_hash_for_each_relax(hs, func, data)) {
+ CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n",
+ hs->hs_name, i++);
+ }
+ cfs_hash_for_each_exit(hs);
+ RETURN(0);
+}
+EXPORT_SYMBOL(cfs_hash_for_each_empty);
+
+void
+cfs_hash_hlist_for_each(cfs_hash_t *hs, unsigned hindex,
+ cfs_hash_for_each_cb_t func, void *data)
+{
+ struct hlist_head *hhead;
+ struct hlist_node *hnode;
+ cfs_hash_bd_t bd;
+
+ cfs_hash_for_each_enter(hs);
+ cfs_hash_lock(hs, 0);
+ if (hindex >= CFS_HASH_NHLIST(hs))
+ goto out;
+
+ cfs_hash_bd_index_set(hs, hindex, &bd);
+
+ cfs_hash_bd_lock(hs, &bd, 0);
+ hhead = cfs_hash_bd_hhead(hs, &bd);
+ hlist_for_each(hnode, hhead) {
+ if (func(hs, &bd, hnode, data))
+ break;
+ }
+ cfs_hash_bd_unlock(hs, &bd, 0);
+ out:
+ cfs_hash_unlock(hs, 0);
+ cfs_hash_for_each_exit(hs);
+}
+
+EXPORT_SYMBOL(cfs_hash_hlist_for_each);
+
+/*
+ * For each item in the libcfs hash @hs which matches the @key call
+ * the passed callback @func and pass to it as an argument each hash
+ * item and the private @data. During the callback the bucket lock
+ * is held so the callback must never sleep.
+ */
+void
+cfs_hash_for_each_key(cfs_hash_t *hs, const void *key,
+ cfs_hash_for_each_cb_t func, void *data)
+{
+ struct hlist_node *hnode;
+ cfs_hash_bd_t bds[2];
+ unsigned i;
+
+ cfs_hash_lock(hs, 0);
+
+ cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
+
+ cfs_hash_for_each_bd(bds, 2, i) {
+ struct hlist_head *hlist = cfs_hash_bd_hhead(hs, &bds[i]);
+
+ hlist_for_each(hnode, hlist) {
+ cfs_hash_bucket_validate(hs, &bds[i], hnode);
+
+ if (cfs_hash_keycmp(hs, key, hnode)) {
+ if (func(hs, &bds[i], hnode, data))
+ break;
+ }
+ }
+ }
+
+ cfs_hash_dual_bd_unlock(hs, bds, 0);
+ cfs_hash_unlock(hs, 0);
+}
+EXPORT_SYMBOL(cfs_hash_for_each_key);
+
+/**
+ * Rehash the libcfs hash @hs to the given @bits. This can be used
+ * to grow the hash size when excessive chaining is detected, or to
+ * shrink the hash when it is larger than needed. When the CFS_HASH_REHASH
+ * flag is set in @hs the libcfs hash may be dynamically rehashed
+ * during addition or removal if the hash's theta value exceeds
+ * either the hs->hs_min_theta or hs->max_theta values. By default
+ * these values are tuned to keep the chained hash depth small, and
+ * this approach assumes a reasonably uniform hashing function. The
+ * theta thresholds for @hs are tunable via cfs_hash_set_theta().
+ */
+void
+cfs_hash_rehash_cancel_locked(cfs_hash_t *hs)
+{
+ int i;
+
+ /* need hold cfs_hash_lock(hs, 1) */
+ LASSERT(cfs_hash_with_rehash(hs) &&
+ !cfs_hash_with_no_lock(hs));
+
+ if (!cfs_hash_is_rehashing(hs))
+ return;
+
+ if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_rehash_wi)) {
+ hs->hs_rehash_bits = 0;
+ return;
+ }
+
+ for (i = 2; cfs_hash_is_rehashing(hs); i++) {
+ cfs_hash_unlock(hs, 1);
+ /* raise console warning while waiting too long */
+ CDEBUG(IS_PO2(i >> 3) ? D_WARNING : D_INFO,
+ "hash %s is still rehashing, rescheded %d\n",
+ hs->hs_name, i - 1);
+ cond_resched();
+ cfs_hash_lock(hs, 1);
+ }
+}
+EXPORT_SYMBOL(cfs_hash_rehash_cancel_locked);
+
+void
+cfs_hash_rehash_cancel(cfs_hash_t *hs)
+{
+ cfs_hash_lock(hs, 1);
+ cfs_hash_rehash_cancel_locked(hs);
+ cfs_hash_unlock(hs, 1);
+}
+EXPORT_SYMBOL(cfs_hash_rehash_cancel);
+
+int
+cfs_hash_rehash(cfs_hash_t *hs, int do_rehash)
+{
+ int rc;
+
+ LASSERT(cfs_hash_with_rehash(hs) && !cfs_hash_with_no_lock(hs));
+
+ cfs_hash_lock(hs, 1);
+
+ rc = cfs_hash_rehash_bits(hs);
+ if (rc <= 0) {
+ cfs_hash_unlock(hs, 1);
+ return rc;
+ }
+
+ hs->hs_rehash_bits = rc;
+ if (!do_rehash) {
+ /* launch and return */
+ cfs_wi_schedule(cfs_sched_rehash, &hs->hs_rehash_wi);
+ cfs_hash_unlock(hs, 1);
+ return 0;
+ }
+
+ /* rehash right now */
+ cfs_hash_unlock(hs, 1);
+
+ return cfs_hash_rehash_worker(&hs->hs_rehash_wi);
+}
+EXPORT_SYMBOL(cfs_hash_rehash);
+
+static int
+cfs_hash_rehash_bd(cfs_hash_t *hs, cfs_hash_bd_t *old)
+{
+ cfs_hash_bd_t new;
+ struct hlist_head *hhead;
+ struct hlist_node *hnode;
+ struct hlist_node *pos;
+ void *key;
+ int c = 0;
+
+ /* hold cfs_hash_lock(hs, 1), so don't need any bucket lock */
+ cfs_hash_bd_for_each_hlist(hs, old, hhead) {
+ hlist_for_each_safe(hnode, pos, hhead) {
+ key = cfs_hash_key(hs, hnode);
+ LASSERT(key != NULL);
+ /* Validate hnode is in the correct bucket. */
+ cfs_hash_bucket_validate(hs, old, hnode);
+ /*
+ * Delete from old hash bucket; move to new bucket.
+ * ops->hs_key must be defined.
+ */
+ cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
+ hs->hs_rehash_bits, key, &new);
+ cfs_hash_bd_move_locked(hs, old, &new, hnode);
+ c++;
+ }
+ }
+
+ return c;
+}
+
+static int
+cfs_hash_rehash_worker(cfs_workitem_t *wi)
+{
+ cfs_hash_t *hs = container_of(wi, cfs_hash_t, hs_rehash_wi);
+ cfs_hash_bucket_t **bkts;
+ cfs_hash_bd_t bd;
+ unsigned int old_size;
+ unsigned int new_size;
+ int bsize;
+ int count = 0;
+ int rc = 0;
+ int i;
+
+ LASSERT (hs != NULL && cfs_hash_with_rehash(hs));
+
+ cfs_hash_lock(hs, 0);
+ LASSERT(cfs_hash_is_rehashing(hs));
+
+ old_size = CFS_HASH_NBKT(hs);
+ new_size = CFS_HASH_RH_NBKT(hs);
+
+ cfs_hash_unlock(hs, 0);
+
+ /*
+ * don't need hs::hs_rwlock for hs::hs_buckets,
+ * because nobody can change bkt-table except me.
+ */
+ bkts = cfs_hash_buckets_realloc(hs, hs->hs_buckets,
+ old_size, new_size);
+ cfs_hash_lock(hs, 1);
+ if (bkts == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if (bkts == hs->hs_buckets) {
+ bkts = NULL; /* do nothing */
+ goto out;
+ }
+
+ rc = __cfs_hash_theta(hs);
+ if ((rc >= hs->hs_min_theta) && (rc <= hs->hs_max_theta)) {
+ /* free the new allocated bkt-table */
+ old_size = new_size;
+ new_size = CFS_HASH_NBKT(hs);
+ rc = -EALREADY;
+ goto out;
+ }
+
+ LASSERT(hs->hs_rehash_buckets == NULL);
+ hs->hs_rehash_buckets = bkts;
+
+ rc = 0;
+ cfs_hash_for_each_bucket(hs, &bd, i) {
+ if (cfs_hash_is_exiting(hs)) {
+ rc = -ESRCH;
+ /* someone wants to destroy the hash, abort now */
+ if (old_size < new_size) /* OK to free old bkt-table */
+ break;
+ /* it's shrinking, need free new bkt-table */
+ hs->hs_rehash_buckets = NULL;
+ old_size = new_size;
+ new_size = CFS_HASH_NBKT(hs);
+ goto out;
+ }
+
+ count += cfs_hash_rehash_bd(hs, &bd);
+ if (count < CFS_HASH_LOOP_HOG ||
+ cfs_hash_is_iterating(hs)) { /* need to finish ASAP */
+ continue;
+ }
+
+ count = 0;
+ cfs_hash_unlock(hs, 1);
+ cond_resched();
+ cfs_hash_lock(hs, 1);
+ }
+
+ hs->hs_rehash_count++;
+
+ bkts = hs->hs_buckets;
+ hs->hs_buckets = hs->hs_rehash_buckets;
+ hs->hs_rehash_buckets = NULL;
+
+ hs->hs_cur_bits = hs->hs_rehash_bits;
+ out:
+ hs->hs_rehash_bits = 0;
+ if (rc == -ESRCH) /* never be scheduled again */
+ cfs_wi_exit(cfs_sched_rehash, wi);
+ bsize = cfs_hash_bkt_size(hs);
+ cfs_hash_unlock(hs, 1);
+ /* can't refer to @hs anymore because it could be destroyed */
+ if (bkts != NULL)
+ cfs_hash_buckets_free(bkts, bsize, new_size, old_size);
+ if (rc != 0)
+ CDEBUG(D_INFO, "early quit of of rehashing: %d\n", rc);
+ /* return 1 only if cfs_wi_exit is called */
+ return rc == -ESRCH;
+}
+
+/**
+ * Rehash the object referenced by @hnode in the libcfs hash @hs. The
+ * @old_key must be provided to locate the objects previous location
+ * in the hash, and the @new_key will be used to reinsert the object.
+ * Use this function instead of a cfs_hash_add() + cfs_hash_del()
+ * combo when it is critical that there is no window in time where the
+ * object is missing from the hash. When an object is being rehashed
+ * the registered cfs_hash_get() and cfs_hash_put() functions will
+ * not be called.
+ */
+void cfs_hash_rehash_key(cfs_hash_t *hs, const void *old_key,
+ void *new_key, struct hlist_node *hnode)
+{
+ cfs_hash_bd_t bds[3];
+ cfs_hash_bd_t old_bds[2];
+ cfs_hash_bd_t new_bd;
+
+ LASSERT(!hlist_unhashed(hnode));
+
+ cfs_hash_lock(hs, 0);
+
+ cfs_hash_dual_bd_get(hs, old_key, old_bds);
+ cfs_hash_bd_get(hs, new_key, &new_bd);
+
+ bds[0] = old_bds[0];
+ bds[1] = old_bds[1];
+ bds[2] = new_bd;
+
+ /* NB: bds[0] and bds[1] are ordered already */
+ cfs_hash_bd_order(&bds[1], &bds[2]);
+ cfs_hash_bd_order(&bds[0], &bds[1]);
+
+ cfs_hash_multi_bd_lock(hs, bds, 3, 1);
+ if (likely(old_bds[1].bd_bucket == NULL)) {
+ cfs_hash_bd_move_locked(hs, &old_bds[0], &new_bd, hnode);
+ } else {
+ cfs_hash_dual_bd_finddel_locked(hs, old_bds, old_key, hnode);
+ cfs_hash_bd_add_locked(hs, &new_bd, hnode);
+ }
+ /* overwrite key inside locks, otherwise may screw up with
+ * other operations, i.e: rehash */
+ cfs_hash_keycpy(hs, new_key, hnode);
+
+ cfs_hash_multi_bd_unlock(hs, bds, 3, 1);
+ cfs_hash_unlock(hs, 0);
+}
+EXPORT_SYMBOL(cfs_hash_rehash_key);
+
+int cfs_hash_debug_header(struct seq_file *m)
+{
+ return seq_printf(m, "%-*s%6s%6s%6s%6s%6s%6s%6s%7s%8s%8s%8s%s\n",
+ CFS_HASH_BIGNAME_LEN,
+ "name", "cur", "min", "max", "theta", "t-min", "t-max",
+ "flags", "rehash", "count", "maxdep", "maxdepb",
+ " distribution");
+}
+EXPORT_SYMBOL(cfs_hash_debug_header);
+
+static cfs_hash_bucket_t **
+cfs_hash_full_bkts(cfs_hash_t *hs)
+{
+ /* NB: caller should hold hs->hs_rwlock if REHASH is set */
+ if (hs->hs_rehash_buckets == NULL)
+ return hs->hs_buckets;
+
+ LASSERT(hs->hs_rehash_bits != 0);
+ return hs->hs_rehash_bits > hs->hs_cur_bits ?
+ hs->hs_rehash_buckets : hs->hs_buckets;
+}
+
+static unsigned int
+cfs_hash_full_nbkt(cfs_hash_t *hs)
+{
+ /* NB: caller should hold hs->hs_rwlock if REHASH is set */
+ if (hs->hs_rehash_buckets == NULL)
+ return CFS_HASH_NBKT(hs);
+
+ LASSERT(hs->hs_rehash_bits != 0);
+ return hs->hs_rehash_bits > hs->hs_cur_bits ?
+ CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs);
+}
+
+int cfs_hash_debug_str(cfs_hash_t *hs, struct seq_file *m)
+{
+ int dist[8] = { 0, };
+ int maxdep = -1;
+ int maxdepb = -1;
+ int total = 0;
+ int theta;
+ int i;
+
+ cfs_hash_lock(hs, 0);
+ theta = __cfs_hash_theta(hs);
+
+ seq_printf(m, "%-*s %5d %5d %5d %d.%03d %d.%03d %d.%03d 0x%02x %6d ",
+ CFS_HASH_BIGNAME_LEN, hs->hs_name,
+ 1 << hs->hs_cur_bits, 1 << hs->hs_min_bits,
+ 1 << hs->hs_max_bits,
+ __cfs_hash_theta_int(theta), __cfs_hash_theta_frac(theta),
+ __cfs_hash_theta_int(hs->hs_min_theta),
+ __cfs_hash_theta_frac(hs->hs_min_theta),
+ __cfs_hash_theta_int(hs->hs_max_theta),
+ __cfs_hash_theta_frac(hs->hs_max_theta),
+ hs->hs_flags, hs->hs_rehash_count);
+
+ /*
+ * The distribution is a summary of the chained hash depth in
+ * each of the libcfs hash buckets. Each buckets hsb_count is
+ * divided by the hash theta value and used to generate a
+ * histogram of the hash distribution. A uniform hash will
+ * result in all hash buckets being close to the average thus
+ * only the first few entries in the histogram will be non-zero.
+ * If you hash function results in a non-uniform hash the will
+ * be observable by outlier bucks in the distribution histogram.
+ *
+ * Uniform hash distribution: 128/128/0/0/0/0/0/0
+ * Non-Uniform hash distribution: 128/125/0/0/0/0/2/1
+ */
+ for (i = 0; i < cfs_hash_full_nbkt(hs); i++) {
+ cfs_hash_bd_t bd;
+
+ bd.bd_bucket = cfs_hash_full_bkts(hs)[i];
+ cfs_hash_bd_lock(hs, &bd, 0);
+ if (maxdep < bd.bd_bucket->hsb_depmax) {
+ maxdep = bd.bd_bucket->hsb_depmax;
+ maxdepb = ffz(~maxdep);
+ }
+ total += bd.bd_bucket->hsb_count;
+ dist[min(__cfs_fls(bd.bd_bucket->hsb_count/max(theta,1)),7)]++;
+ cfs_hash_bd_unlock(hs, &bd, 0);
+ }
+
+ seq_printf(m, "%7d %7d %7d ", total, maxdep, maxdepb);
+ for (i = 0; i < 8; i++)
+ seq_printf(m, "%d%c", dist[i], (i == 7) ? '\n' : '/');
+
+ cfs_hash_unlock(hs, 0);
+
+ return 0;
+}
+EXPORT_SYMBOL(cfs_hash_debug_str);
diff --git a/drivers/staging/lustre/lustre/libcfs/heap.c b/drivers/staging/lustre/lustre/libcfs/heap.c
new file mode 100644
index 000000000000..147e4fe4762d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/heap.c
@@ -0,0 +1,475 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details. A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011 Intel Corporation
+ */
+/*
+ * libcfs/libcfs/heap.c
+ *
+ * Author: Eric Barton <eeb@whamcloud.com>
+ * Liang Zhen <liang@whamcloud.com>
+ */
+/** \addtogroup heap
+ *
+ * @{
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+
+#define CBH_ALLOC(ptr, h) \
+do { \
+ if ((h)->cbh_flags & CBH_FLAG_ATOMIC_GROW) \
+ LIBCFS_CPT_ALLOC_GFP((ptr), h->cbh_cptab, h->cbh_cptid, \
+ CBH_NOB, GFP_ATOMIC); \
+ else \
+ LIBCFS_CPT_ALLOC((ptr), h->cbh_cptab, h->cbh_cptid, \
+ CBH_NOB); \
+} while (0)
+
+#define CBH_FREE(ptr) LIBCFS_FREE(ptr, CBH_NOB)
+
+/**
+ * Grows the capacity of a binary heap so that it can handle a larger number of
+ * \e cfs_binheap_node_t objects.
+ *
+ * \param[in] h The binary heap
+ *
+ * \retval 0 Successfully grew the heap
+ * \retval -ENOMEM OOM error
+ */
+static int
+cfs_binheap_grow(cfs_binheap_t *h)
+{
+ cfs_binheap_node_t ***frag1 = NULL;
+ cfs_binheap_node_t **frag2;
+ int hwm = h->cbh_hwm;
+
+ /* need a whole new chunk of pointers */
+ LASSERT((h->cbh_hwm & CBH_MASK) == 0);
+
+ if (hwm == 0) {
+ /* first use of single indirect */
+ CBH_ALLOC(h->cbh_elements1, h);
+ if (h->cbh_elements1 == NULL)
+ return -ENOMEM;
+
+ goto out;
+ }
+
+ hwm -= CBH_SIZE;
+ if (hwm < CBH_SIZE * CBH_SIZE) {
+ /* not filled double indirect */
+ CBH_ALLOC(frag2, h);
+ if (frag2 == NULL)
+ return -ENOMEM;
+
+ if (hwm == 0) {
+ /* first use of double indirect */
+ CBH_ALLOC(h->cbh_elements2, h);
+ if (h->cbh_elements2 == NULL) {
+ CBH_FREE(frag2);
+ return -ENOMEM;
+ }
+ }
+
+ h->cbh_elements2[hwm >> CBH_SHIFT] = frag2;
+ goto out;
+ }
+
+ hwm -= CBH_SIZE * CBH_SIZE;
+#if (CBH_SHIFT * 3 < 32)
+ if (hwm >= CBH_SIZE * CBH_SIZE * CBH_SIZE) {
+ /* filled triple indirect */
+ return -ENOMEM;
+ }
+#endif
+ CBH_ALLOC(frag2, h);
+ if (frag2 == NULL)
+ return -ENOMEM;
+
+ if (((hwm >> CBH_SHIFT) & CBH_MASK) == 0) {
+ /* first use of this 2nd level index */
+ CBH_ALLOC(frag1, h);
+ if (frag1 == NULL) {
+ CBH_FREE(frag2);
+ return -ENOMEM;
+ }
+ }
+
+ if (hwm == 0) {
+ /* first use of triple indirect */
+ CBH_ALLOC(h->cbh_elements3, h);
+ if (h->cbh_elements3 == NULL) {
+ CBH_FREE(frag2);
+ CBH_FREE(frag1);
+ return -ENOMEM;
+ }
+ }
+
+ if (frag1 != NULL) {
+ LASSERT(h->cbh_elements3[hwm >> (2 * CBH_SHIFT)] == NULL);
+ h->cbh_elements3[hwm >> (2 * CBH_SHIFT)] = frag1;
+ } else {
+ frag1 = h->cbh_elements3[hwm >> (2 * CBH_SHIFT)];
+ LASSERT(frag1 != NULL);
+ }
+
+ frag1[(hwm >> CBH_SHIFT) & CBH_MASK] = frag2;
+
+ out:
+ h->cbh_hwm += CBH_SIZE;
+ return 0;
+}
+
+/**
+ * Creates and initializes a binary heap instance.
+ *
+ * \param[in] ops The operations to be used
+ * \param[in] flags The heap flags
+ * \parm[in] count The initial heap capacity in # of elements
+ * \param[in] arg An optional private argument
+ * \param[in] cptab The CPT table this heap instance will operate over
+ * \param[in] cptid The CPT id of \a cptab this heap instance will operate over
+ *
+ * \retval valid-pointer A newly-created and initialized binary heap object
+ * \retval NULL error
+ */
+cfs_binheap_t *
+cfs_binheap_create(cfs_binheap_ops_t *ops, unsigned int flags,
+ unsigned count, void *arg, struct cfs_cpt_table *cptab,
+ int cptid)
+{
+ cfs_binheap_t *h;
+
+ LASSERT(ops != NULL);
+ LASSERT(ops->hop_compare != NULL);
+ LASSERT(cptab != NULL);
+ LASSERT(cptid == CFS_CPT_ANY ||
+ (cptid >= 0 && cptid < cptab->ctb_nparts));
+
+ LIBCFS_CPT_ALLOC(h, cptab, cptid, sizeof(*h));
+ if (h == NULL)
+ return NULL;
+
+ h->cbh_ops = ops;
+ h->cbh_nelements = 0;
+ h->cbh_hwm = 0;
+ h->cbh_private = arg;
+ h->cbh_flags = flags & (~CBH_FLAG_ATOMIC_GROW);
+ h->cbh_cptab = cptab;
+ h->cbh_cptid = cptid;
+
+ while (h->cbh_hwm < count) { /* preallocate */
+ if (cfs_binheap_grow(h) != 0) {
+ cfs_binheap_destroy(h);
+ return NULL;
+ }
+ }
+
+ h->cbh_flags |= flags & CBH_FLAG_ATOMIC_GROW;
+
+ return h;
+}
+EXPORT_SYMBOL(cfs_binheap_create);
+
+/**
+ * Releases all resources associated with a binary heap instance.
+ *
+ * Deallocates memory for all indirection levels and the binary heap object
+ * itself.
+ *
+ * \param[in] h The binary heap object
+ */
+void
+cfs_binheap_destroy(cfs_binheap_t *h)
+{
+ int idx0;
+ int idx1;
+ int n;
+
+ LASSERT(h != NULL);
+
+ n = h->cbh_hwm;
+
+ if (n > 0) {
+ CBH_FREE(h->cbh_elements1);
+ n -= CBH_SIZE;
+ }
+
+ if (n > 0) {
+ for (idx0 = 0; idx0 < CBH_SIZE && n > 0; idx0++) {
+ CBH_FREE(h->cbh_elements2[idx0]);
+ n -= CBH_SIZE;
+ }
+
+ CBH_FREE(h->cbh_elements2);
+ }
+
+ if (n > 0) {
+ for (idx0 = 0; idx0 < CBH_SIZE && n > 0; idx0++) {
+
+ for (idx1 = 0; idx1 < CBH_SIZE && n > 0; idx1++) {
+ CBH_FREE(h->cbh_elements3[idx0][idx1]);
+ n -= CBH_SIZE;
+ }
+
+ CBH_FREE(h->cbh_elements3[idx0]);
+ }
+
+ CBH_FREE(h->cbh_elements3);
+ }
+
+ LIBCFS_FREE(h, sizeof(*h));
+}
+EXPORT_SYMBOL(cfs_binheap_destroy);
+
+/**
+ * Obtains a double pointer to a heap element, given its index into the binary
+ * tree.
+ *
+ * \param[in] h The binary heap instance
+ * \param[in] idx The requested node's index
+ *
+ * \retval valid-pointer A double pointer to a heap pointer entry
+ */
+static cfs_binheap_node_t **
+cfs_binheap_pointer(cfs_binheap_t *h, unsigned int idx)
+{
+ if (idx < CBH_SIZE)
+ return &(h->cbh_elements1[idx]);
+
+ idx -= CBH_SIZE;
+ if (idx < CBH_SIZE * CBH_SIZE)
+ return &(h->cbh_elements2[idx >> CBH_SHIFT][idx & CBH_MASK]);
+
+ idx -= CBH_SIZE * CBH_SIZE;
+ return &(h->cbh_elements3[idx >> (2 * CBH_SHIFT)]\
+ [(idx >> CBH_SHIFT) & CBH_MASK]\
+ [idx & CBH_MASK]);
+}
+
+/**
+ * Obtains a pointer to a heap element, given its index into the binary tree.
+ *
+ * \param[in] h The binary heap
+ * \param[in] idx The requested node's index
+ *
+ * \retval valid-pointer The requested heap node
+ * \retval NULL Supplied index is out of bounds
+ */
+cfs_binheap_node_t *
+cfs_binheap_find(cfs_binheap_t *h, unsigned int idx)
+{
+ if (idx >= h->cbh_nelements)
+ return NULL;
+
+ return *cfs_binheap_pointer(h, idx);
+}
+EXPORT_SYMBOL(cfs_binheap_find);
+
+/**
+ * Moves a node upwards, towards the root of the binary tree.
+ *
+ * \param[in] h The heap
+ * \param[in] e The node
+ *
+ * \retval 1 The position of \a e in the tree was changed at least once
+ * \retval 0 The position of \a e in the tree was not changed
+ */
+static int
+cfs_binheap_bubble(cfs_binheap_t *h, cfs_binheap_node_t *e)
+{
+ unsigned int cur_idx = e->chn_index;
+ cfs_binheap_node_t **cur_ptr;
+ unsigned int parent_idx;
+ cfs_binheap_node_t **parent_ptr;
+ int did_sth = 0;
+
+ cur_ptr = cfs_binheap_pointer(h, cur_idx);
+ LASSERT(*cur_ptr == e);
+
+ while (cur_idx > 0) {
+ parent_idx = (cur_idx - 1) >> 1;
+
+ parent_ptr = cfs_binheap_pointer(h, parent_idx);
+ LASSERT((*parent_ptr)->chn_index == parent_idx);
+
+ if (h->cbh_ops->hop_compare(*parent_ptr, e))
+ break;
+
+ (*parent_ptr)->chn_index = cur_idx;
+ *cur_ptr = *parent_ptr;
+ cur_ptr = parent_ptr;
+ cur_idx = parent_idx;
+ did_sth = 1;
+ }
+
+ e->chn_index = cur_idx;
+ *cur_ptr = e;
+
+ return did_sth;
+}
+
+/**
+ * Moves a node downwards, towards the last level of the binary tree.
+ *
+ * \param[in] h The heap
+ * \param[in] e The node
+ *
+ * \retval 1 The position of \a e in the tree was changed at least once
+ * \retval 0 The position of \a e in the tree was not changed
+ */
+static int
+cfs_binheap_sink(cfs_binheap_t *h, cfs_binheap_node_t *e)
+{
+ unsigned int n = h->cbh_nelements;
+ unsigned int child_idx;
+ cfs_binheap_node_t **child_ptr;
+ cfs_binheap_node_t *child;
+ unsigned int child2_idx;
+ cfs_binheap_node_t **child2_ptr;
+ cfs_binheap_node_t *child2;
+ unsigned int cur_idx;
+ cfs_binheap_node_t **cur_ptr;
+ int did_sth = 0;
+
+ cur_idx = e->chn_index;
+ cur_ptr = cfs_binheap_pointer(h, cur_idx);
+ LASSERT(*cur_ptr == e);
+
+ while (cur_idx < n) {
+ child_idx = (cur_idx << 1) + 1;
+ if (child_idx >= n)
+ break;
+
+ child_ptr = cfs_binheap_pointer(h, child_idx);
+ child = *child_ptr;
+
+ child2_idx = child_idx + 1;
+ if (child2_idx < n) {
+ child2_ptr = cfs_binheap_pointer(h, child2_idx);
+ child2 = *child2_ptr;
+
+ if (h->cbh_ops->hop_compare(child2, child)) {
+ child_idx = child2_idx;
+ child_ptr = child2_ptr;
+ child = child2;
+ }
+ }
+
+ LASSERT(child->chn_index == child_idx);
+
+ if (h->cbh_ops->hop_compare(e, child))
+ break;
+
+ child->chn_index = cur_idx;
+ *cur_ptr = child;
+ cur_ptr = child_ptr;
+ cur_idx = child_idx;
+ did_sth = 1;
+ }
+
+ e->chn_index = cur_idx;
+ *cur_ptr = e;
+
+ return did_sth;
+}
+
+/**
+ * Sort-inserts a node into the binary heap.
+ *
+ * \param[in] h The heap
+ * \param[in] e The node
+ *
+ * \retval 0 Element inserted successfully
+ * \retval != 0 error
+ */
+int
+cfs_binheap_insert(cfs_binheap_t *h, cfs_binheap_node_t *e)
+{
+ cfs_binheap_node_t **new_ptr;
+ unsigned int new_idx = h->cbh_nelements;
+ int rc;
+
+ if (new_idx == h->cbh_hwm) {
+ rc = cfs_binheap_grow(h);
+ if (rc != 0)
+ return rc;
+ }
+
+ if (h->cbh_ops->hop_enter) {
+ rc = h->cbh_ops->hop_enter(h, e);
+ if (rc != 0)
+ return rc;
+ }
+
+ e->chn_index = new_idx;
+ new_ptr = cfs_binheap_pointer(h, new_idx);
+ h->cbh_nelements++;
+ *new_ptr = e;
+
+ cfs_binheap_bubble(h, e);
+
+ return 0;
+}
+EXPORT_SYMBOL(cfs_binheap_insert);
+
+/**
+ * Removes a node from the binary heap.
+ *
+ * \param[in] h The heap
+ * \param[in] e The node
+ */
+void
+cfs_binheap_remove(cfs_binheap_t *h, cfs_binheap_node_t *e)
+{
+ unsigned int n = h->cbh_nelements;
+ unsigned int cur_idx = e->chn_index;
+ cfs_binheap_node_t **cur_ptr;
+ cfs_binheap_node_t *last;
+
+ LASSERT(cur_idx != CBH_POISON);
+ LASSERT(cur_idx < n);
+
+ cur_ptr = cfs_binheap_pointer(h, cur_idx);
+ LASSERT(*cur_ptr == e);
+
+ n--;
+ last = *cfs_binheap_pointer(h, n);
+ h->cbh_nelements = n;
+ if (last == e)
+ return;
+
+ last->chn_index = cur_idx;
+ *cur_ptr = last;
+ if (!cfs_binheap_bubble(h, *cur_ptr))
+ cfs_binheap_sink(h, *cur_ptr);
+
+ e->chn_index = CBH_POISON;
+ if (h->cbh_ops->hop_exit)
+ h->cbh_ops->hop_exit(h, e);
+}
+EXPORT_SYMBOL(cfs_binheap_remove);
+
+/** @} heap */
diff --git a/drivers/staging/lustre/lustre/libcfs/kernel_user_comm.c b/drivers/staging/lustre/lustre/libcfs/kernel_user_comm.c
new file mode 100644
index 000000000000..d6d3b2e0f307
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/kernel_user_comm.c
@@ -0,0 +1,346 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Author: Nathan Rutman <nathan.rutman@sun.com>
+ *
+ * Kernel <-> userspace communication routines.
+ * Using pipes for all arches.
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+#define D_KUC D_OTHER
+
+#include <linux/libcfs/libcfs.h>
+
+#ifdef LUSTRE_UTILS
+/* This is the userspace side. */
+
+/** Start the userspace side of a KUC pipe.
+ * @param link Private descriptor for pipe/socket.
+ * @param groups KUC broadcast group to listen to
+ * (can be null for unicast to this pid)
+ */
+int libcfs_ukuc_start(lustre_kernelcomm *link, int group)
+{
+ int pfd[2];
+
+ if (pipe(pfd) < 0)
+ return -errno;
+
+ memset(link, 0, sizeof(*link));
+ link->lk_rfd = pfd[0];
+ link->lk_wfd = pfd[1];
+ link->lk_group = group;
+ link->lk_uid = getpid();
+ return 0;
+}
+
+int libcfs_ukuc_stop(lustre_kernelcomm *link)
+{
+ if (link->lk_wfd > 0)
+ close(link->lk_wfd);
+ return close(link->lk_rfd);
+}
+
+#define lhsz sizeof(*kuch)
+
+/** Read a message from the link.
+ * Allocates memory, returns handle
+ *
+ * @param link Private descriptor for pipe/socket.
+ * @param buf Buffer to read into, must include size for kuc_hdr
+ * @param maxsize Maximum message size allowed
+ * @param transport Only listen to messages on this transport
+ * (and the generic transport)
+ */
+int libcfs_ukuc_msg_get(lustre_kernelcomm *link, char *buf, int maxsize,
+ int transport)
+{
+ struct kuc_hdr *kuch;
+ int rc = 0;
+
+ memset(buf, 0, maxsize);
+
+ CDEBUG(D_KUC, "Waiting for message from kernel on fd %d\n",
+ link->lk_rfd);
+
+ while (1) {
+ /* Read header first to get message size */
+ rc = read(link->lk_rfd, buf, lhsz);
+ if (rc <= 0) {
+ rc = -errno;
+ break;
+ }
+ kuch = (struct kuc_hdr *)buf;
+
+ CDEBUG(D_KUC, "Received message mg=%x t=%d m=%d l=%d\n",
+ kuch->kuc_magic, kuch->kuc_transport, kuch->kuc_msgtype,
+ kuch->kuc_msglen);
+
+ if (kuch->kuc_magic != KUC_MAGIC) {
+ CERROR("bad message magic %x != %x\n",
+ kuch->kuc_magic, KUC_MAGIC);
+ rc = -EPROTO;
+ break;
+ }
+
+ if (kuch->kuc_msglen > maxsize) {
+ rc = -EMSGSIZE;
+ break;
+ }
+
+ /* Read payload */
+ rc = read(link->lk_rfd, buf + lhsz, kuch->kuc_msglen - lhsz);
+ if (rc < 0) {
+ rc = -errno;
+ break;
+ }
+ if (rc < (kuch->kuc_msglen - lhsz)) {
+ CERROR("short read: got %d of %d bytes\n",
+ rc, kuch->kuc_msglen);
+ rc = -EPROTO;
+ break;
+ }
+
+ if (kuch->kuc_transport == transport ||
+ kuch->kuc_transport == KUC_TRANSPORT_GENERIC) {
+ return 0;
+ }
+ /* Drop messages for other transports */
+ }
+ return rc;
+}
+
+#else /* LUSTRE_UTILS */
+/* This is the kernel side (liblustre as well). */
+
+/**
+ * libcfs_kkuc_msg_put - send an message from kernel to userspace
+ * @param fp to send the message to
+ * @param payload Payload data. First field of payload is always
+ * struct kuc_hdr
+ */
+int libcfs_kkuc_msg_put(struct file *filp, void *payload)
+{
+ struct kuc_hdr *kuch = (struct kuc_hdr *)payload;
+ ssize_t count = kuch->kuc_msglen;
+ loff_t offset = 0;
+ mm_segment_t fs;
+ int rc = -ENOSYS;
+
+ if (filp == NULL || IS_ERR(filp))
+ return -EBADF;
+
+ if (kuch->kuc_magic != KUC_MAGIC) {
+ CERROR("KernelComm: bad magic %x\n", kuch->kuc_magic);
+ return -ENOSYS;
+ }
+
+ fs = get_fs();
+ set_fs(KERNEL_DS);
+ while (count > 0) {
+ rc = vfs_write(filp, (void __force __user *)payload,
+ count, &offset);
+ if (rc < 0)
+ break;
+ count -= rc;
+ payload += rc;
+ rc = 0;
+ }
+ set_fs(fs);
+
+ if (rc < 0)
+ CWARN("message send failed (%d)\n", rc);
+ else
+ CDEBUG(D_KUC, "Sent message rc=%d, fp=%p\n", rc, filp);
+
+ return rc;
+}
+EXPORT_SYMBOL(libcfs_kkuc_msg_put);
+
+/* Broadcast groups are global across all mounted filesystems;
+ * i.e. registering for a group on 1 fs will get messages for that
+ * group from any fs */
+/** A single group reigstration has a uid and a file pointer */
+struct kkuc_reg {
+ struct list_head kr_chain;
+ int kr_uid;
+ struct file *kr_fp;
+ __u32 kr_data;
+};
+static struct list_head kkuc_groups[KUC_GRP_MAX+1] = {};
+/* Protect message sending against remove and adds */
+static DECLARE_RWSEM(kg_sem);
+
+/** Add a receiver to a broadcast group
+ * @param filp pipe to write into
+ * @param uid identidier for this receiver
+ * @param group group number
+ */
+int libcfs_kkuc_group_add(struct file *filp, int uid, int group, __u32 data)
+{
+ struct kkuc_reg *reg;
+
+ if (group > KUC_GRP_MAX) {
+ CDEBUG(D_WARNING, "Kernelcomm: bad group %d\n", group);
+ return -EINVAL;
+ }
+
+ /* fput in group_rem */
+ if (filp == NULL)
+ return -EBADF;
+
+ /* freed in group_rem */
+ reg = kmalloc(sizeof(*reg), 0);
+ if (reg == NULL)
+ return -ENOMEM;
+
+ reg->kr_fp = filp;
+ reg->kr_uid = uid;
+ reg->kr_data = data;
+
+ down_write(&kg_sem);
+ if (kkuc_groups[group].next == NULL)
+ INIT_LIST_HEAD(&kkuc_groups[group]);
+ list_add(&reg->kr_chain, &kkuc_groups[group]);
+ up_write(&kg_sem);
+
+ CDEBUG(D_KUC, "Added uid=%d fp=%p to group %d\n", uid, filp, group);
+
+ return 0;
+}
+EXPORT_SYMBOL(libcfs_kkuc_group_add);
+
+int libcfs_kkuc_group_rem(int uid, int group)
+{
+ struct kkuc_reg *reg, *next;
+ ENTRY;
+
+ if (kkuc_groups[group].next == NULL)
+ RETURN(0);
+
+ if (uid == 0) {
+ /* Broadcast a shutdown message */
+ struct kuc_hdr lh;
+
+ lh.kuc_magic = KUC_MAGIC;
+ lh.kuc_transport = KUC_TRANSPORT_GENERIC;
+ lh.kuc_msgtype = KUC_MSG_SHUTDOWN;
+ lh.kuc_msglen = sizeof(lh);
+ libcfs_kkuc_group_put(group, &lh);
+ }
+
+ down_write(&kg_sem);
+ list_for_each_entry_safe(reg, next, &kkuc_groups[group], kr_chain) {
+ if ((uid == 0) || (uid == reg->kr_uid)) {
+ list_del(&reg->kr_chain);
+ CDEBUG(D_KUC, "Removed uid=%d fp=%p from group %d\n",
+ reg->kr_uid, reg->kr_fp, group);
+ if (reg->kr_fp != NULL)
+ fput(reg->kr_fp);
+ kfree(reg);
+ }
+ }
+ up_write(&kg_sem);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(libcfs_kkuc_group_rem);
+
+int libcfs_kkuc_group_put(int group, void *payload)
+{
+ struct kkuc_reg *reg;
+ int rc = 0;
+ int one_success = 0;
+ ENTRY;
+
+ down_read(&kg_sem);
+ list_for_each_entry(reg, &kkuc_groups[group], kr_chain) {
+ if (reg->kr_fp != NULL) {
+ rc = libcfs_kkuc_msg_put(reg->kr_fp, payload);
+ if (rc == 0)
+ one_success = 1;
+ else if (rc == -EPIPE) {
+ fput(reg->kr_fp);
+ reg->kr_fp = NULL;
+ }
+ }
+ }
+ up_read(&kg_sem);
+
+ /* don't return an error if the message has been delivered
+ * at least to one agent */
+ if (one_success)
+ rc = 0;
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(libcfs_kkuc_group_put);
+
+/**
+ * Calls a callback function for each link of the given kuc group.
+ * @param group the group to call the function on.
+ * @param cb_func the function to be called.
+ * @param cb_arg iextra argument to be passed to the callback function.
+ */
+int libcfs_kkuc_group_foreach(int group, libcfs_kkuc_cb_t cb_func,
+ void *cb_arg)
+{
+ struct kkuc_reg *reg;
+ int rc = 0;
+ ENTRY;
+
+ if (group > KUC_GRP_MAX) {
+ CDEBUG(D_WARNING, "Kernelcomm: bad group %d\n", group);
+ RETURN(-EINVAL);
+ }
+
+ /* no link for this group */
+ if (kkuc_groups[group].next == NULL)
+ RETURN(0);
+
+ down_read(&kg_sem);
+ list_for_each_entry(reg, &kkuc_groups[group], kr_chain) {
+ if (reg->kr_fp != NULL) {
+ rc = cb_func(reg->kr_data, cb_arg);
+ }
+ }
+ up_read(&kg_sem);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(libcfs_kkuc_group_foreach);
+
+#endif /* LUSTRE_UTILS */
diff --git a/drivers/staging/lustre/lustre/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lustre/libcfs/libcfs_cpu.c
new file mode 100644
index 000000000000..8e88eb59dd51
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/libcfs_cpu.c
@@ -0,0 +1,204 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Please see comments in libcfs/include/libcfs/libcfs_cpu.h for introduction
+ *
+ * Author: liang@whamcloud.com
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+
+/** Global CPU partition table */
+struct cfs_cpt_table *cfs_cpt_table __read_mostly = NULL;
+EXPORT_SYMBOL(cfs_cpt_table);
+
+#ifndef HAVE_LIBCFS_CPT
+
+#define CFS_CPU_VERSION_MAGIC 0xbabecafe
+
+struct cfs_cpt_table *
+cfs_cpt_table_alloc(unsigned int ncpt)
+{
+ struct cfs_cpt_table *cptab;
+
+ if (ncpt != 1) {
+ CERROR("Can't support cpu partition number %d\n", ncpt);
+ return NULL;
+ }
+
+ LIBCFS_ALLOC(cptab, sizeof(*cptab));
+ if (cptab != NULL) {
+ cptab->ctb_version = CFS_CPU_VERSION_MAGIC;
+ cptab->ctb_nparts = ncpt;
+ }
+
+ return cptab;
+}
+EXPORT_SYMBOL(cfs_cpt_table_alloc);
+
+void
+cfs_cpt_table_free(struct cfs_cpt_table *cptab)
+{
+ LASSERT(cptab->ctb_version == CFS_CPU_VERSION_MAGIC);
+
+ LIBCFS_FREE(cptab, sizeof(*cptab));
+}
+EXPORT_SYMBOL(cfs_cpt_table_free);
+
+int
+cfs_cpt_number(struct cfs_cpt_table *cptab)
+{
+ return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_number);
+
+int
+cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
+{
+ return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_weight);
+
+int
+cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
+{
+ return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_online);
+
+int
+cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+ return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_cpu);
+
+void
+cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+}
+EXPORT_SYMBOL(cfs_cpt_unset_cpu);
+
+int
+cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
+{
+ return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_cpumask);
+
+void
+cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
+{
+}
+EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
+
+int
+cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+ return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_node);
+
+void
+cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+}
+EXPORT_SYMBOL(cfs_cpt_unset_node);
+
+int
+cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
+{
+ return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_nodemask);
+
+void
+cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
+{
+}
+EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
+
+void
+cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
+{
+}
+EXPORT_SYMBOL(cfs_cpt_clear);
+
+int
+cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
+{
+ return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_spread_node);
+
+int
+cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
+{
+ return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_current);
+
+int
+cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
+{
+ return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_of_cpu);
+
+int
+cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
+{
+ return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_bind);
+
+void
+cfs_cpu_fini(void)
+{
+ if (cfs_cpt_table != NULL) {
+ cfs_cpt_table_free(cfs_cpt_table);
+ cfs_cpt_table = NULL;
+ }
+}
+
+int
+cfs_cpu_init(void)
+{
+ cfs_cpt_table = cfs_cpt_table_alloc(1);
+
+ return cfs_cpt_table != NULL ? 0 : -1;
+}
+
+#endif /* HAVE_LIBCFS_CPT */
diff --git a/drivers/staging/lustre/lustre/libcfs/libcfs_lock.c b/drivers/staging/lustre/lustre/libcfs/libcfs_lock.c
new file mode 100644
index 000000000000..8d6c4adf2ee6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/libcfs_lock.c
@@ -0,0 +1,192 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Author: liang@whamcloud.com
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+
+
+/** destroy cpu-partition lock, see libcfs_private.h for more detail */
+void
+cfs_percpt_lock_free(struct cfs_percpt_lock *pcl)
+{
+ LASSERT(pcl->pcl_locks != NULL);
+ LASSERT(!pcl->pcl_locked);
+
+ cfs_percpt_free(pcl->pcl_locks);
+ LIBCFS_FREE(pcl, sizeof(*pcl));
+}
+EXPORT_SYMBOL(cfs_percpt_lock_free);
+
+/**
+ * create cpu-partition lock, see libcfs_private.h for more detail.
+ *
+ * cpu-partition lock is designed for large-scale SMP system, so we need to
+ * reduce cacheline conflict as possible as we can, that's the
+ * reason we always allocate cacheline-aligned memory block.
+ */
+struct cfs_percpt_lock *
+cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab)
+{
+ struct cfs_percpt_lock *pcl;
+ spinlock_t *lock;
+ int i;
+
+ /* NB: cptab can be NULL, pcl will be for HW CPUs on that case */
+ LIBCFS_ALLOC(pcl, sizeof(*pcl));
+ if (pcl == NULL)
+ return NULL;
+
+ pcl->pcl_cptab = cptab;
+ pcl->pcl_locks = cfs_percpt_alloc(cptab, sizeof(*lock));
+ if (pcl->pcl_locks == NULL) {
+ LIBCFS_FREE(pcl, sizeof(*pcl));
+ return NULL;
+ }
+
+ cfs_percpt_for_each(lock, i, pcl->pcl_locks)
+ spin_lock_init(lock);
+
+ return pcl;
+}
+EXPORT_SYMBOL(cfs_percpt_lock_alloc);
+
+/**
+ * lock a CPU partition
+ *
+ * \a index != CFS_PERCPT_LOCK_EX
+ * hold private lock indexed by \a index
+ *
+ * \a index == CFS_PERCPT_LOCK_EX
+ * exclusively lock @pcl and nobody can take private lock
+ */
+void
+cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index)
+{
+ int ncpt = cfs_cpt_number(pcl->pcl_cptab);
+ int i;
+
+ LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt);
+
+ if (ncpt == 1) {
+ index = 0;
+ } else { /* serialize with exclusive lock */
+ while (pcl->pcl_locked)
+ cpu_relax();
+ }
+
+ if (likely(index != CFS_PERCPT_LOCK_EX)) {
+ spin_lock(pcl->pcl_locks[index]);
+ return;
+ }
+
+ /* exclusive lock request */
+ for (i = 0; i < ncpt; i++) {
+ spin_lock(pcl->pcl_locks[i]);
+ if (i == 0) {
+ LASSERT(!pcl->pcl_locked);
+ /* nobody should take private lock after this
+ * so I wouldn't starve for too long time */
+ pcl->pcl_locked = 1;
+ }
+ }
+}
+EXPORT_SYMBOL(cfs_percpt_lock);
+
+/** unlock a CPU partition */
+void
+cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index)
+{
+ int ncpt = cfs_cpt_number(pcl->pcl_cptab);
+ int i;
+
+ index = ncpt == 1 ? 0 : index;
+
+ if (likely(index != CFS_PERCPT_LOCK_EX)) {
+ spin_unlock(pcl->pcl_locks[index]);
+ return;
+ }
+
+ for (i = ncpt - 1; i >= 0; i--) {
+ if (i == 0) {
+ LASSERT(pcl->pcl_locked);
+ pcl->pcl_locked = 0;
+ }
+ spin_unlock(pcl->pcl_locks[i]);
+ }
+}
+EXPORT_SYMBOL(cfs_percpt_unlock);
+
+
+/** free cpu-partition refcount */
+void
+cfs_percpt_atomic_free(atomic_t **refs)
+{
+ cfs_percpt_free(refs);
+}
+EXPORT_SYMBOL(cfs_percpt_atomic_free);
+
+/** allocate cpu-partition refcount with initial value @init_val */
+atomic_t **
+cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int init_val)
+{
+ atomic_t **refs;
+ atomic_t *ref;
+ int i;
+
+ refs = cfs_percpt_alloc(cptab, sizeof(*ref));
+ if (refs == NULL)
+ return NULL;
+
+ cfs_percpt_for_each(ref, i, refs)
+ atomic_set(ref, init_val);
+ return refs;
+}
+EXPORT_SYMBOL(cfs_percpt_atomic_alloc);
+
+/** return sum of cpu-partition refs */
+int
+cfs_percpt_atomic_summary(atomic_t **refs)
+{
+ atomic_t *ref;
+ int i;
+ int val = 0;
+
+ cfs_percpt_for_each(ref, i, refs)
+ val += atomic_read(ref);
+
+ return val;
+}
+EXPORT_SYMBOL(cfs_percpt_atomic_summary);
diff --git a/drivers/staging/lustre/lustre/libcfs/libcfs_mem.c b/drivers/staging/lustre/lustre/libcfs/libcfs_mem.c
new file mode 100644
index 000000000000..879137303482
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/libcfs_mem.c
@@ -0,0 +1,205 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Author: liang@whamcloud.com
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+
+struct cfs_var_array {
+ unsigned int va_count; /* # of buffers */
+ unsigned int va_size; /* size of each var */
+ struct cfs_cpt_table *va_cptab; /* cpu partition table */
+ void *va_ptrs[0]; /* buffer addresses */
+};
+
+/*
+ * free per-cpu data, see more detail in cfs_percpt_free
+ */
+void
+cfs_percpt_free(void *vars)
+{
+ struct cfs_var_array *arr;
+ int i;
+
+ arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
+
+ for (i = 0; i < arr->va_count; i++) {
+ if (arr->va_ptrs[i] != NULL)
+ LIBCFS_FREE(arr->va_ptrs[i], arr->va_size);
+ }
+
+ LIBCFS_FREE(arr, offsetof(struct cfs_var_array,
+ va_ptrs[arr->va_count]));
+}
+EXPORT_SYMBOL(cfs_percpt_free);
+
+/*
+ * allocate per cpu-partition variables, returned value is an array of pointers,
+ * variable can be indexed by CPU partition ID, i.e:
+ *
+ * arr = cfs_percpt_alloc(cfs_cpu_pt, size);
+ * then caller can access memory block for CPU 0 by arr[0],
+ * memory block for CPU 1 by arr[1]...
+ * memory block for CPU N by arr[N]...
+ *
+ * cacheline aligned.
+ */
+void *
+cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size)
+{
+ struct cfs_var_array *arr;
+ int count;
+ int i;
+
+ count = cfs_cpt_number(cptab);
+
+ LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count]));
+ if (arr == NULL)
+ return NULL;
+
+ arr->va_size = size = L1_CACHE_ALIGN(size);
+ arr->va_count = count;
+ arr->va_cptab = cptab;
+
+ for (i = 0; i < count; i++) {
+ LIBCFS_CPT_ALLOC(arr->va_ptrs[i], cptab, i, size);
+ if (arr->va_ptrs[i] == NULL) {
+ cfs_percpt_free((void *)&arr->va_ptrs[0]);
+ return NULL;
+ }
+ }
+
+ return (void *)&arr->va_ptrs[0];
+}
+EXPORT_SYMBOL(cfs_percpt_alloc);
+
+/*
+ * return number of CPUs (or number of elements in per-cpu data)
+ * according to cptab of @vars
+ */
+int
+cfs_percpt_number(void *vars)
+{
+ struct cfs_var_array *arr;
+
+ arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
+
+ return arr->va_count;
+}
+EXPORT_SYMBOL(cfs_percpt_number);
+
+/*
+ * return memory block shadowed from current CPU
+ */
+void *
+cfs_percpt_current(void *vars)
+{
+ struct cfs_var_array *arr;
+ int cpt;
+
+ arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
+ cpt = cfs_cpt_current(arr->va_cptab, 0);
+ if (cpt < 0)
+ return NULL;
+
+ return arr->va_ptrs[cpt];
+}
+EXPORT_SYMBOL(cfs_percpt_current);
+
+void *
+cfs_percpt_index(void *vars, int idx)
+{
+ struct cfs_var_array *arr;
+
+ arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
+
+ LASSERT(idx >= 0 && idx < arr->va_count);
+ return arr->va_ptrs[idx];
+}
+EXPORT_SYMBOL(cfs_percpt_index);
+
+/*
+ * free variable array, see more detail in cfs_array_alloc
+ */
+void
+cfs_array_free(void *vars)
+{
+ struct cfs_var_array *arr;
+ int i;
+
+ arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
+
+ for (i = 0; i < arr->va_count; i++) {
+ if (arr->va_ptrs[i] == NULL)
+ continue;
+
+ LIBCFS_FREE(arr->va_ptrs[i], arr->va_size);
+ }
+ LIBCFS_FREE(arr, offsetof(struct cfs_var_array,
+ va_ptrs[arr->va_count]));
+}
+EXPORT_SYMBOL(cfs_array_free);
+
+/*
+ * allocate a variable array, returned value is an array of pointers.
+ * Caller can specify length of array by @count, @size is size of each
+ * memory block in array.
+ */
+void *
+cfs_array_alloc(int count, unsigned int size)
+{
+ struct cfs_var_array *arr;
+ int i;
+
+ LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count]));
+ if (arr == NULL)
+ return NULL;
+
+ arr->va_count = count;
+ arr->va_size = size;
+
+ for (i = 0; i < count; i++) {
+ LIBCFS_ALLOC(arr->va_ptrs[i], size);
+
+ if (arr->va_ptrs[i] == NULL) {
+ cfs_array_free((void *)&arr->va_ptrs[0]);
+ return NULL;
+ }
+ }
+
+ return (void *)&arr->va_ptrs[0];
+}
+EXPORT_SYMBOL(cfs_array_alloc);
diff --git a/drivers/staging/lustre/lustre/libcfs/libcfs_string.c b/drivers/staging/lustre/lustre/libcfs/libcfs_string.c
new file mode 100644
index 000000000000..9edccc99683e
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/libcfs_string.c
@@ -0,0 +1,647 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * String manipulation functions.
+ *
+ * libcfs/libcfs/libcfs_string.c
+ *
+ * Author: Nathan Rutman <nathan.rutman@sun.com>
+ */
+
+#include <linux/libcfs/libcfs.h>
+
+/* non-0 = don't match */
+int cfs_strncasecmp(const char *s1, const char *s2, size_t n)
+{
+ if (s1 == NULL || s2 == NULL)
+ return 1;
+
+ if (n == 0)
+ return 0;
+
+ while (n-- != 0 && tolower(*s1) == tolower(*s2)) {
+ if (n == 0 || *s1 == '\0' || *s2 == '\0')
+ break;
+ s1++;
+ s2++;
+ }
+
+ return tolower(*(unsigned char *)s1) - tolower(*(unsigned char *)s2);
+}
+EXPORT_SYMBOL(cfs_strncasecmp);
+
+/* Convert a text string to a bitmask */
+int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
+ int *oldmask, int minmask, int allmask)
+{
+ const char *debugstr;
+ char op = 0;
+ int newmask = minmask, i, len, found = 0;
+ ENTRY;
+
+ /* <str> must be a list of tokens separated by whitespace
+ * and optionally an operator ('+' or '-'). If an operator
+ * appears first in <str>, '*oldmask' is used as the starting point
+ * (relative), otherwise minmask is used (absolute). An operator
+ * applies to all following tokens up to the next operator. */
+ while (*str != 0) {
+ while (isspace(*str))
+ str++;
+ if (*str == 0)
+ break;
+ if (*str == '+' || *str == '-') {
+ op = *str++;
+ if (!found)
+ /* only if first token is relative */
+ newmask = *oldmask;
+ while (isspace(*str))
+ str++;
+ if (*str == 0) /* trailing op */
+ return -EINVAL;
+ }
+
+ /* find token length */
+ for (len = 0; str[len] != 0 && !isspace(str[len]) &&
+ str[len] != '+' && str[len] != '-'; len++);
+
+ /* match token */
+ found = 0;
+ for (i = 0; i < 32; i++) {
+ debugstr = bit2str(i);
+ if (debugstr != NULL &&
+ strlen(debugstr) == len &&
+ cfs_strncasecmp(str, debugstr, len) == 0) {
+ if (op == '-')
+ newmask &= ~(1 << i);
+ else
+ newmask |= (1 << i);
+ found = 1;
+ break;
+ }
+ }
+ if (!found && len == 3 &&
+ (cfs_strncasecmp(str, "ALL", len) == 0)) {
+ if (op == '-')
+ newmask = minmask;
+ else
+ newmask = allmask;
+ found = 1;
+ }
+ if (!found) {
+ CWARN("unknown mask '%.*s'.\n"
+ "mask usage: [+|-]<all|type> ...\n", len, str);
+ return -EINVAL;
+ }
+ str += len;
+ }
+
+ *oldmask = newmask;
+ return 0;
+}
+EXPORT_SYMBOL(cfs_str2mask);
+
+/* Duplicate a string in a platform-independent way */
+char *cfs_strdup(const char *str, u_int32_t flags)
+{
+ size_t lenz; /* length of str + zero byte */
+ char *dup_str;
+
+ lenz = strlen(str) + 1;
+
+ dup_str = kmalloc(lenz, flags);
+ if (dup_str == NULL)
+ return NULL;
+
+ memcpy(dup_str, str, lenz);
+
+ return dup_str;
+}
+EXPORT_SYMBOL(cfs_strdup);
+
+/**
+ * cfs_{v}snprintf() return the actual size that is printed rather than
+ * the size that would be printed in standard functions.
+ */
+/* safe vsnprintf */
+int cfs_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ int i;
+
+ LASSERT(size > 0);
+ i = vsnprintf(buf, size, fmt, args);
+
+ return (i >= size ? size - 1 : i);
+}
+EXPORT_SYMBOL(cfs_vsnprintf);
+
+/* safe snprintf */
+int cfs_snprintf(char *buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = cfs_vsnprintf(buf, size, fmt, args);
+ va_end(args);
+
+ return i;
+}
+EXPORT_SYMBOL(cfs_snprintf);
+
+/* get the first string out of @str */
+char *cfs_firststr(char *str, size_t size)
+{
+ size_t i = 0;
+ char *end;
+
+ /* trim leading spaces */
+ while (i < size && *str && isspace(*str)) {
+ ++i;
+ ++str;
+ }
+
+ /* string with all spaces */
+ if (*str == '\0')
+ goto out;
+
+ end = str;
+ while (i < size && *end != '\0' && !isspace(*end)) {
+ ++i;
+ ++end;
+ }
+
+ *end= '\0';
+out:
+ return str;
+}
+EXPORT_SYMBOL(cfs_firststr);
+
+char *
+cfs_trimwhite(char *str)
+{
+ char *end;
+
+ while (cfs_iswhite(*str))
+ str++;
+
+ end = str + strlen(str);
+ while (end > str) {
+ if (!cfs_iswhite(end[-1]))
+ break;
+ end--;
+ }
+
+ *end = 0;
+ return str;
+}
+EXPORT_SYMBOL(cfs_trimwhite);
+
+/**
+ * Extracts tokens from strings.
+ *
+ * Looks for \a delim in string \a next, sets \a res to point to
+ * substring before the delimiter, sets \a next right after the found
+ * delimiter.
+ *
+ * \retval 1 if \a res points to a string of non-whitespace characters
+ * \retval 0 otherwise
+ */
+int
+cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res)
+{
+ char *end;
+
+ if (next->ls_str == NULL)
+ return 0;
+
+ /* skip leading white spaces */
+ while (next->ls_len) {
+ if (!cfs_iswhite(*next->ls_str))
+ break;
+ next->ls_str++;
+ next->ls_len--;
+ }
+
+ if (next->ls_len == 0) /* whitespaces only */
+ return 0;
+
+ if (*next->ls_str == delim) {
+ /* first non-writespace is the delimiter */
+ return 0;
+ }
+
+ res->ls_str = next->ls_str;
+ end = memchr(next->ls_str, delim, next->ls_len);
+ if (end == NULL) {
+ /* there is no the delimeter in the string */
+ end = next->ls_str + next->ls_len;
+ next->ls_str = NULL;
+ } else {
+ next->ls_str = end + 1;
+ next->ls_len -= (end - res->ls_str + 1);
+ }
+
+ /* skip ending whitespaces */
+ while (--end != res->ls_str) {
+ if (!cfs_iswhite(*end))
+ break;
+ }
+
+ res->ls_len = end - res->ls_str + 1;
+ return 1;
+}
+EXPORT_SYMBOL(cfs_gettok);
+
+/**
+ * Converts string to integer.
+ *
+ * Accepts decimal and hexadecimal number recordings.
+ *
+ * \retval 1 if first \a nob chars of \a str convert to decimal or
+ * hexadecimal integer in the range [\a min, \a max]
+ * \retval 0 otherwise
+ */
+int
+cfs_str2num_check(char *str, int nob, unsigned *num,
+ unsigned min, unsigned max)
+{
+ char *endp;
+
+ str = cfs_trimwhite(str);
+ *num = strtoul(str, &endp, 0);
+ if (endp == str)
+ return 0;
+
+ for (; endp < str + nob; endp++) {
+ if (!cfs_iswhite(*endp))
+ return 0;
+ }
+
+ return (*num >= min && *num <= max);
+}
+EXPORT_SYMBOL(cfs_str2num_check);
+
+/**
+ * Parses \<range_expr\> token of the syntax. If \a bracketed is false,
+ * \a src should only have a single token which can be \<number\> or \*
+ *
+ * \retval pointer to allocated range_expr and initialized
+ * range_expr::re_lo, range_expr::re_hi and range_expr:re_stride if \a
+ `* src parses to
+ * \<number\> |
+ * \<number\> '-' \<number\> |
+ * \<number\> '-' \<number\> '/' \<number\>
+ * \retval 0 will be returned if it can be parsed, otherwise -EINVAL or
+ * -ENOMEM will be returned.
+ */
+int
+cfs_range_expr_parse(struct cfs_lstr *src, unsigned min, unsigned max,
+ int bracketed, struct cfs_range_expr **expr)
+{
+ struct cfs_range_expr *re;
+ struct cfs_lstr tok;
+
+ LIBCFS_ALLOC(re, sizeof(*re));
+ if (re == NULL)
+ return -ENOMEM;
+
+ if (src->ls_len == 1 && src->ls_str[0] == '*') {
+ re->re_lo = min;
+ re->re_hi = max;
+ re->re_stride = 1;
+ goto out;
+ }
+
+ if (cfs_str2num_check(src->ls_str, src->ls_len,
+ &re->re_lo, min, max)) {
+ /* <number> is parsed */
+ re->re_hi = re->re_lo;
+ re->re_stride = 1;
+ goto out;
+ }
+
+ if (!bracketed || !cfs_gettok(src, '-', &tok))
+ goto failed;
+
+ if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
+ &re->re_lo, min, max))
+ goto failed;
+
+ /* <number> - */
+ if (cfs_str2num_check(src->ls_str, src->ls_len,
+ &re->re_hi, min, max)) {
+ /* <number> - <number> is parsed */
+ re->re_stride = 1;
+ goto out;
+ }
+
+ /* go to check <number> '-' <number> '/' <number> */
+ if (cfs_gettok(src, '/', &tok)) {
+ if (!cfs_str2num_check(tok.ls_str, tok.ls_len,
+ &re->re_hi, min, max))
+ goto failed;
+
+ /* <number> - <number> / ... */
+ if (cfs_str2num_check(src->ls_str, src->ls_len,
+ &re->re_stride, min, max)) {
+ /* <number> - <number> / <number> is parsed */
+ goto out;
+ }
+ }
+
+ out:
+ *expr = re;
+ return 0;
+
+ failed:
+ LIBCFS_FREE(re, sizeof(*re));
+ return -EINVAL;
+}
+EXPORT_SYMBOL(cfs_range_expr_parse);
+
+/**
+ * Matches value (\a value) against ranges expression list \a expr_list.
+ *
+ * \retval 1 if \a value matches
+ * \retval 0 otherwise
+ */
+int
+cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list)
+{
+ struct cfs_range_expr *expr;
+
+ list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
+ if (value >= expr->re_lo && value <= expr->re_hi &&
+ ((value - expr->re_lo) % expr->re_stride) == 0)
+ return 1;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(cfs_expr_list_match);
+
+/**
+ * Convert express list (\a expr_list) to an array of all matched values
+ *
+ * \retval N N is total number of all matched values
+ * \retval 0 if expression list is empty
+ * \retval < 0 for failure
+ */
+int
+cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, __u32 **valpp)
+{
+ struct cfs_range_expr *expr;
+ __u32 *val;
+ int count = 0;
+ int i;
+
+ list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
+ for (i = expr->re_lo; i <= expr->re_hi; i++) {
+ if (((i - expr->re_lo) % expr->re_stride) == 0)
+ count++;
+ }
+ }
+
+ if (count == 0) /* empty expression list */
+ return 0;
+
+ if (count > max) {
+ CERROR("Number of values %d exceeds max allowed %d\n",
+ max, count);
+ return -EINVAL;
+ }
+
+ LIBCFS_ALLOC(val, sizeof(val[0]) * count);
+ if (val == NULL)
+ return -ENOMEM;
+
+ count = 0;
+ list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
+ for (i = expr->re_lo; i <= expr->re_hi; i++) {
+ if (((i - expr->re_lo) % expr->re_stride) == 0)
+ val[count++] = i;
+ }
+ }
+
+ *valpp = val;
+ return count;
+}
+EXPORT_SYMBOL(cfs_expr_list_values);
+
+/**
+ * Frees cfs_range_expr structures of \a expr_list.
+ *
+ * \retval none
+ */
+void
+cfs_expr_list_free(struct cfs_expr_list *expr_list)
+{
+ while (!list_empty(&expr_list->el_exprs)) {
+ struct cfs_range_expr *expr;
+
+ expr = list_entry(expr_list->el_exprs.next,
+ struct cfs_range_expr, re_link),
+ list_del(&expr->re_link);
+ LIBCFS_FREE(expr, sizeof(*expr));
+ }
+
+ LIBCFS_FREE(expr_list, sizeof(*expr_list));
+}
+EXPORT_SYMBOL(cfs_expr_list_free);
+
+void
+cfs_expr_list_print(struct cfs_expr_list *expr_list)
+{
+ struct cfs_range_expr *expr;
+
+ list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
+ CDEBUG(D_WARNING, "%d-%d/%d\n",
+ expr->re_lo, expr->re_hi, expr->re_stride);
+ }
+}
+EXPORT_SYMBOL(cfs_expr_list_print);
+
+/**
+ * Parses \<cfs_expr_list\> token of the syntax.
+ *
+ * \retval 1 if \a str parses to \<number\> | \<expr_list\>
+ * \retval 0 otherwise
+ */
+int
+cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max,
+ struct cfs_expr_list **elpp)
+{
+ struct cfs_expr_list *expr_list;
+ struct cfs_range_expr *expr;
+ struct cfs_lstr src;
+ int rc;
+
+ LIBCFS_ALLOC(expr_list, sizeof(*expr_list));
+ if (expr_list == NULL)
+ return -ENOMEM;
+
+ src.ls_str = str;
+ src.ls_len = len;
+
+ INIT_LIST_HEAD(&expr_list->el_exprs);
+
+ if (src.ls_str[0] == '[' &&
+ src.ls_str[src.ls_len - 1] == ']') {
+ src.ls_str++;
+ src.ls_len -= 2;
+
+ rc = -EINVAL;
+ while (src.ls_str != NULL) {
+ struct cfs_lstr tok;
+
+ if (!cfs_gettok(&src, ',', &tok)) {
+ rc = -EINVAL;
+ break;
+ }
+
+ rc = cfs_range_expr_parse(&tok, min, max, 1, &expr);
+ if (rc != 0)
+ break;
+
+ list_add_tail(&expr->re_link,
+ &expr_list->el_exprs);
+ }
+ } else {
+ rc = cfs_range_expr_parse(&src, min, max, 0, &expr);
+ if (rc == 0) {
+ list_add_tail(&expr->re_link,
+ &expr_list->el_exprs);
+ }
+ }
+
+ if (rc != 0)
+ cfs_expr_list_free(expr_list);
+ else
+ *elpp = expr_list;
+
+ return rc;
+}
+EXPORT_SYMBOL(cfs_expr_list_parse);
+
+/**
+ * Frees cfs_expr_list structures of \a list.
+ *
+ * For each struct cfs_expr_list structure found on \a list it frees
+ * range_expr list attached to it and frees the cfs_expr_list itself.
+ *
+ * \retval none
+ */
+void
+cfs_expr_list_free_list(struct list_head *list)
+{
+ struct cfs_expr_list *el;
+
+ while (!list_empty(list)) {
+ el = list_entry(list->next,
+ struct cfs_expr_list, el_link);
+ list_del(&el->el_link);
+ cfs_expr_list_free(el);
+ }
+}
+EXPORT_SYMBOL(cfs_expr_list_free_list);
+
+int
+cfs_ip_addr_parse(char *str, int len, struct list_head *list)
+{
+ struct cfs_expr_list *el;
+ struct cfs_lstr src;
+ int rc;
+ int i;
+
+ src.ls_str = str;
+ src.ls_len = len;
+ i = 0;
+
+ while (src.ls_str != NULL) {
+ struct cfs_lstr res;
+
+ if (!cfs_gettok(&src, '.', &res)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ rc = cfs_expr_list_parse(res.ls_str, res.ls_len, 0, 255, &el);
+ if (rc != 0)
+ goto out;
+
+ list_add_tail(&el->el_link, list);
+ i++;
+ }
+
+ if (i == 4)
+ return 0;
+
+ rc = -EINVAL;
+ out:
+ cfs_expr_list_free_list(list);
+
+ return rc;
+}
+EXPORT_SYMBOL(cfs_ip_addr_parse);
+
+/**
+ * Matches address (\a addr) against address set encoded in \a list.
+ *
+ * \retval 1 if \a addr matches
+ * \retval 0 otherwise
+ */
+int
+cfs_ip_addr_match(__u32 addr, struct list_head *list)
+{
+ struct cfs_expr_list *el;
+ int i = 0;
+
+ list_for_each_entry_reverse(el, list, el_link) {
+ if (!cfs_expr_list_match(addr & 0xff, el))
+ return 0;
+ addr >>= 8;
+ i++;
+ }
+
+ return i == 4;
+}
+EXPORT_SYMBOL(cfs_ip_addr_match);
+
+void
+cfs_ip_addr_free(struct list_head *list)
+{
+ cfs_expr_list_free_list(list);
+}
+EXPORT_SYMBOL(cfs_ip_addr_free);
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c
new file mode 100644
index 000000000000..95142d190971
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c
@@ -0,0 +1,1085 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Author: liang@whamcloud.com
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/libcfs/libcfs.h>
+
+#ifdef CONFIG_SMP
+
+/**
+ * modparam for setting number of partitions
+ *
+ * 0 : estimate best value based on cores or NUMA nodes
+ * 1 : disable multiple partitions
+ * >1 : specify number of partitions
+ */
+static int cpu_npartitions;
+CFS_MODULE_PARM(cpu_npartitions, "i", int, 0444, "# of CPU partitions");
+
+/**
+ * modparam for setting CPU partitions patterns:
+ *
+ * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID,
+ * number in bracket is processor ID (core or HT)
+ *
+ * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
+ * are NUMA node ID, number before bracket is CPU partition ID.
+ *
+ * NB: If user specified cpu_pattern, cpu_npartitions will be ignored
+ */
+static char *cpu_pattern = "";
+CFS_MODULE_PARM(cpu_pattern, "s", charp, 0444, "CPU partitions pattern");
+
+struct cfs_cpt_data {
+ /* serialize hotplug etc */
+ spinlock_t cpt_lock;
+ /* reserved for hotplug */
+ unsigned long cpt_version;
+ /* mutex to protect cpt_cpumask */
+ struct semaphore cpt_mutex;
+ /* scratch buffer for set/unset_node */
+ cpumask_t *cpt_cpumask;
+};
+
+static struct cfs_cpt_data cpt_data;
+
+void
+cfs_cpu_core_siblings(int cpu, cpumask_t *mask)
+{
+ /* return cpumask of cores in the same socket */
+ cpumask_copy(mask, topology_core_cpumask(cpu));
+}
+EXPORT_SYMBOL(cfs_cpu_core_siblings);
+
+/* return number of cores in the same socket of \a cpu */
+int
+cfs_cpu_core_nsiblings(int cpu)
+{
+ int num;
+
+ down(&cpt_data.cpt_mutex);
+
+ cfs_cpu_core_siblings(cpu, cpt_data.cpt_cpumask);
+ num = cpus_weight(*cpt_data.cpt_cpumask);
+
+ up(&cpt_data.cpt_mutex);
+
+ return num;
+}
+EXPORT_SYMBOL(cfs_cpu_core_nsiblings);
+
+/* return cpumask of HTs in the same core */
+void
+cfs_cpu_ht_siblings(int cpu, cpumask_t *mask)
+{
+ cpumask_copy(mask, topology_thread_cpumask(cpu));
+}
+EXPORT_SYMBOL(cfs_cpu_ht_siblings);
+
+/* return number of HTs in the same core of \a cpu */
+int
+cfs_cpu_ht_nsiblings(int cpu)
+{
+ int num;
+
+ down(&cpt_data.cpt_mutex);
+
+ cfs_cpu_ht_siblings(cpu, cpt_data.cpt_cpumask);
+ num = cpus_weight(*cpt_data.cpt_cpumask);
+
+ up(&cpt_data.cpt_mutex);
+
+ return num;
+}
+EXPORT_SYMBOL(cfs_cpu_ht_nsiblings);
+
+void
+cfs_node_to_cpumask(int node, cpumask_t *mask)
+{
+ cpumask_copy(mask, cpumask_of_node(node));
+}
+EXPORT_SYMBOL(cfs_node_to_cpumask);
+
+void
+cfs_cpt_table_free(struct cfs_cpt_table *cptab)
+{
+ int i;
+
+ if (cptab->ctb_cpu2cpt != NULL) {
+ LIBCFS_FREE(cptab->ctb_cpu2cpt,
+ num_possible_cpus() *
+ sizeof(cptab->ctb_cpu2cpt[0]));
+ }
+
+ for (i = 0; cptab->ctb_parts != NULL && i < cptab->ctb_nparts; i++) {
+ struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
+
+ if (part->cpt_nodemask != NULL) {
+ LIBCFS_FREE(part->cpt_nodemask,
+ sizeof(*part->cpt_nodemask));
+ }
+
+ if (part->cpt_cpumask != NULL)
+ LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
+ }
+
+ if (cptab->ctb_parts != NULL) {
+ LIBCFS_FREE(cptab->ctb_parts,
+ cptab->ctb_nparts * sizeof(cptab->ctb_parts[0]));
+ }
+
+ if (cptab->ctb_nodemask != NULL)
+ LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
+ if (cptab->ctb_cpumask != NULL)
+ LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size());
+
+ LIBCFS_FREE(cptab, sizeof(*cptab));
+}
+EXPORT_SYMBOL(cfs_cpt_table_free);
+
+struct cfs_cpt_table *
+cfs_cpt_table_alloc(unsigned int ncpt)
+{
+ struct cfs_cpt_table *cptab;
+ int i;
+
+ LIBCFS_ALLOC(cptab, sizeof(*cptab));
+ if (cptab == NULL)
+ return NULL;
+
+ cptab->ctb_nparts = ncpt;
+
+ LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size());
+ LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
+
+ if (cptab->ctb_cpumask == NULL || cptab->ctb_nodemask == NULL)
+ goto failed;
+
+ LIBCFS_ALLOC(cptab->ctb_cpu2cpt,
+ num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
+ if (cptab->ctb_cpu2cpt == NULL)
+ goto failed;
+
+ memset(cptab->ctb_cpu2cpt, -1,
+ num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
+
+ LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0]));
+ if (cptab->ctb_parts == NULL)
+ goto failed;
+
+ for (i = 0; i < ncpt; i++) {
+ struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
+
+ LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size());
+ LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask));
+ if (part->cpt_cpumask == NULL || part->cpt_nodemask == NULL)
+ goto failed;
+ }
+
+ spin_lock(&cpt_data.cpt_lock);
+ /* Reserved for hotplug */
+ cptab->ctb_version = cpt_data.cpt_version;
+ spin_unlock(&cpt_data.cpt_lock);
+
+ return cptab;
+
+ failed:
+ cfs_cpt_table_free(cptab);
+ return NULL;
+}
+EXPORT_SYMBOL(cfs_cpt_table_alloc);
+
+int
+cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
+{
+ char *tmp = buf;
+ int rc = 0;
+ int i;
+ int j;
+
+ for (i = 0; i < cptab->ctb_nparts; i++) {
+ if (len > 0) {
+ rc = snprintf(tmp, len, "%d\t: ", i);
+ len -= rc;
+ }
+
+ if (len <= 0) {
+ rc = -EFBIG;
+ goto out;
+ }
+
+ tmp += rc;
+ for_each_cpu_mask(j, *cptab->ctb_parts[i].cpt_cpumask) {
+ rc = snprintf(tmp, len, "%d ", j);
+ len -= rc;
+ if (len <= 0) {
+ rc = -EFBIG;
+ goto out;
+ }
+ tmp += rc;
+ }
+
+ *tmp = '\n';
+ tmp++;
+ len--;
+ }
+
+ out:
+ if (rc < 0)
+ return rc;
+
+ return tmp - buf;
+}
+EXPORT_SYMBOL(cfs_cpt_table_print);
+
+int
+cfs_cpt_number(struct cfs_cpt_table *cptab)
+{
+ return cptab->ctb_nparts;
+}
+EXPORT_SYMBOL(cfs_cpt_number);
+
+int
+cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
+{
+ LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
+
+ return cpt == CFS_CPT_ANY ?
+ cpus_weight(*cptab->ctb_cpumask) :
+ cpus_weight(*cptab->ctb_parts[cpt].cpt_cpumask);
+}
+EXPORT_SYMBOL(cfs_cpt_weight);
+
+int
+cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
+{
+ LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
+
+ return cpt == CFS_CPT_ANY ?
+ any_online_cpu(*cptab->ctb_cpumask) != NR_CPUS :
+ any_online_cpu(*cptab->ctb_parts[cpt].cpt_cpumask) != NR_CPUS;
+}
+EXPORT_SYMBOL(cfs_cpt_online);
+
+cpumask_t *
+cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
+{
+ LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
+
+ return cpt == CFS_CPT_ANY ?
+ cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask;
+}
+EXPORT_SYMBOL(cfs_cpt_cpumask);
+
+nodemask_t *
+cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
+{
+ LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
+
+ return cpt == CFS_CPT_ANY ?
+ cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
+}
+EXPORT_SYMBOL(cfs_cpt_nodemask);
+
+int
+cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+ int node;
+
+ LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
+
+ if (cpu < 0 || cpu >= NR_CPUS || !cpu_online(cpu)) {
+ CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
+ return 0;
+ }
+
+ if (cptab->ctb_cpu2cpt[cpu] != -1) {
+ CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
+ cpu, cptab->ctb_cpu2cpt[cpu]);
+ return 0;
+ }
+
+ cptab->ctb_cpu2cpt[cpu] = cpt;
+
+ LASSERT(!cpu_isset(cpu, *cptab->ctb_cpumask));
+ LASSERT(!cpu_isset(cpu, *cptab->ctb_parts[cpt].cpt_cpumask));
+
+ cpu_set(cpu, *cptab->ctb_cpumask);
+ cpu_set(cpu, *cptab->ctb_parts[cpt].cpt_cpumask);
+
+ node = cpu_to_node(cpu);
+
+ /* first CPU of @node in this CPT table */
+ if (!node_isset(node, *cptab->ctb_nodemask))
+ node_set(node, *cptab->ctb_nodemask);
+
+ /* first CPU of @node in this partition */
+ if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
+ node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
+
+ return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_cpu);
+
+void
+cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
+{
+ int node;
+ int i;
+
+ LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
+
+ if (cpu < 0 || cpu >= NR_CPUS) {
+ CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
+ return;
+ }
+
+ if (cpt == CFS_CPT_ANY) {
+ /* caller doesn't know the partition ID */
+ cpt = cptab->ctb_cpu2cpt[cpu];
+ if (cpt < 0) { /* not set in this CPT-table */
+ CDEBUG(D_INFO, "Try to unset cpu %d which is "
+ "not in CPT-table %p\n", cpt, cptab);
+ return;
+ }
+
+ } else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
+ CDEBUG(D_INFO,
+ "CPU %d is not in cpu-partition %d\n", cpu, cpt);
+ return;
+ }
+
+ LASSERT(cpu_isset(cpu, *cptab->ctb_parts[cpt].cpt_cpumask));
+ LASSERT(cpu_isset(cpu, *cptab->ctb_cpumask));
+
+ cpu_clear(cpu, *cptab->ctb_parts[cpt].cpt_cpumask);
+ cpu_clear(cpu, *cptab->ctb_cpumask);
+ cptab->ctb_cpu2cpt[cpu] = -1;
+
+ node = cpu_to_node(cpu);
+
+ LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
+ LASSERT(node_isset(node, *cptab->ctb_nodemask));
+
+ for_each_cpu_mask(i, *cptab->ctb_parts[cpt].cpt_cpumask) {
+ /* this CPT has other CPU belonging to this node? */
+ if (cpu_to_node(i) == node)
+ break;
+ }
+
+ if (i == NR_CPUS)
+ node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
+
+ for_each_cpu_mask(i, *cptab->ctb_cpumask) {
+ /* this CPT-table has other CPU belonging to this node? */
+ if (cpu_to_node(i) == node)
+ break;
+ }
+
+ if (i == NR_CPUS)
+ node_clear(node, *cptab->ctb_nodemask);
+
+ return;
+}
+EXPORT_SYMBOL(cfs_cpt_unset_cpu);
+
+int
+cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
+{
+ int i;
+
+ if (cpus_weight(*mask) == 0 || any_online_cpu(*mask) == NR_CPUS) {
+ CDEBUG(D_INFO, "No online CPU is found in the CPU mask "
+ "for CPU partition %d\n", cpt);
+ return 0;
+ }
+
+ for_each_cpu_mask(i, *mask) {
+ if (!cfs_cpt_set_cpu(cptab, cpt, i))
+ return 0;
+ }
+
+ return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_cpumask);
+
+void
+cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
+{
+ int i;
+
+ for_each_cpu_mask(i, *mask)
+ cfs_cpt_unset_cpu(cptab, cpt, i);
+}
+EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
+
+int
+cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+ cpumask_t *mask;
+ int rc;
+
+ if (node < 0 || node >= MAX_NUMNODES) {
+ CDEBUG(D_INFO,
+ "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
+ return 0;
+ }
+
+ down(&cpt_data.cpt_mutex);
+
+ mask = cpt_data.cpt_cpumask;
+ cfs_node_to_cpumask(node, mask);
+
+ rc = cfs_cpt_set_cpumask(cptab, cpt, mask);
+
+ up(&cpt_data.cpt_mutex);
+
+ return rc;
+}
+EXPORT_SYMBOL(cfs_cpt_set_node);
+
+void
+cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
+{
+ cpumask_t *mask;
+
+ if (node < 0 || node >= MAX_NUMNODES) {
+ CDEBUG(D_INFO,
+ "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
+ return;
+ }
+
+ down(&cpt_data.cpt_mutex);
+
+ mask = cpt_data.cpt_cpumask;
+ cfs_node_to_cpumask(node, mask);
+
+ cfs_cpt_unset_cpumask(cptab, cpt, mask);
+
+ up(&cpt_data.cpt_mutex);
+}
+EXPORT_SYMBOL(cfs_cpt_unset_node);
+
+int
+cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
+{
+ int i;
+
+ for_each_node_mask(i, *mask) {
+ if (!cfs_cpt_set_node(cptab, cpt, i))
+ return 0;
+ }
+
+ return 1;
+}
+EXPORT_SYMBOL(cfs_cpt_set_nodemask);
+
+void
+cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
+{
+ int i;
+
+ for_each_node_mask(i, *mask)
+ cfs_cpt_unset_node(cptab, cpt, i);
+}
+EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
+
+void
+cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
+{
+ int last;
+ int i;
+
+ if (cpt == CFS_CPT_ANY) {
+ last = cptab->ctb_nparts - 1;
+ cpt = 0;
+ } else {
+ last = cpt;
+ }
+
+ for (; cpt <= last; cpt++) {
+ for_each_cpu_mask(i, *cptab->ctb_parts[cpt].cpt_cpumask)
+ cfs_cpt_unset_cpu(cptab, cpt, i);
+ }
+}
+EXPORT_SYMBOL(cfs_cpt_clear);
+
+int
+cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
+{
+ nodemask_t *mask;
+ int weight;
+ int rotor;
+ int node;
+
+ /* convert CPU partition ID to HW node id */
+
+ if (cpt < 0 || cpt >= cptab->ctb_nparts) {
+ mask = cptab->ctb_nodemask;
+ rotor = cptab->ctb_spread_rotor++;
+ } else {
+ mask = cptab->ctb_parts[cpt].cpt_nodemask;
+ rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
+ }
+
+ weight = nodes_weight(*mask);
+ LASSERT(weight > 0);
+
+ rotor %= weight;
+
+ for_each_node_mask(node, *mask) {
+ if (rotor-- == 0)
+ return node;
+ }
+
+ LBUG();
+ return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_spread_node);
+
+int
+cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
+{
+ int cpu = smp_processor_id();
+ int cpt = cptab->ctb_cpu2cpt[cpu];
+
+ if (cpt < 0) {
+ if (!remap)
+ return cpt;
+
+ /* don't return negative value for safety of upper layer,
+ * instead we shadow the unknown cpu to a valid partition ID */
+ cpt = cpu % cptab->ctb_nparts;
+ }
+
+ return cpt;
+}
+EXPORT_SYMBOL(cfs_cpt_current);
+
+int
+cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
+{
+ LASSERT(cpu >= 0 && cpu < NR_CPUS);
+
+ return cptab->ctb_cpu2cpt[cpu];
+}
+EXPORT_SYMBOL(cfs_cpt_of_cpu);
+
+int
+cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
+{
+ cpumask_t *cpumask;
+ nodemask_t *nodemask;
+ int rc;
+ int i;
+
+ LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
+
+ if (cpt == CFS_CPT_ANY) {
+ cpumask = cptab->ctb_cpumask;
+ nodemask = cptab->ctb_nodemask;
+ } else {
+ cpumask = cptab->ctb_parts[cpt].cpt_cpumask;
+ nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
+ }
+
+ if (any_online_cpu(*cpumask) == NR_CPUS) {
+ CERROR("No online CPU found in CPU partition %d, did someone "
+ "do CPU hotplug on system? You might need to reload "
+ "Lustre modules to keep system working well.\n", cpt);
+ return -EINVAL;
+ }
+
+ for_each_online_cpu(i) {
+ if (cpu_isset(i, *cpumask))
+ continue;
+
+ rc = set_cpus_allowed_ptr(current, cpumask);
+ set_mems_allowed(*nodemask);
+ if (rc == 0)
+ schedule(); /* switch to allowed CPU */
+
+ return rc;
+ }
+
+ /* don't need to set affinity because all online CPUs are covered */
+ return 0;
+}
+EXPORT_SYMBOL(cfs_cpt_bind);
+
+/**
+ * Choose max to \a number CPUs from \a node and set them in \a cpt.
+ * We always prefer to choose CPU in the same core/socket.
+ */
+static int
+cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
+ cpumask_t *node, int number)
+{
+ cpumask_t *socket = NULL;
+ cpumask_t *core = NULL;
+ int rc = 0;
+ int cpu;
+
+ LASSERT(number > 0);
+
+ if (number >= cpus_weight(*node)) {
+ while (!cpus_empty(*node)) {
+ cpu = first_cpu(*node);
+
+ rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
+ if (!rc)
+ return -EINVAL;
+ cpu_clear(cpu, *node);
+ }
+ return 0;
+ }
+
+ /* allocate scratch buffer */
+ LIBCFS_ALLOC(socket, cpumask_size());
+ LIBCFS_ALLOC(core, cpumask_size());
+ if (socket == NULL || core == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ while (!cpus_empty(*node)) {
+ cpu = first_cpu(*node);
+
+ /* get cpumask for cores in the same socket */
+ cfs_cpu_core_siblings(cpu, socket);
+ cpus_and(*socket, *socket, *node);
+
+ LASSERT(!cpus_empty(*socket));
+
+ while (!cpus_empty(*socket)) {
+ int i;
+
+ /* get cpumask for hts in the same core */
+ cfs_cpu_ht_siblings(cpu, core);
+ cpus_and(*core, *core, *node);
+
+ LASSERT(!cpus_empty(*core));
+
+ for_each_cpu_mask(i, *core) {
+ cpu_clear(i, *socket);
+ cpu_clear(i, *node);
+
+ rc = cfs_cpt_set_cpu(cptab, cpt, i);
+ if (!rc) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (--number == 0)
+ goto out;
+ }
+ cpu = first_cpu(*socket);
+ }
+ }
+
+ out:
+ if (socket != NULL)
+ LIBCFS_FREE(socket, cpumask_size());
+ if (core != NULL)
+ LIBCFS_FREE(core, cpumask_size());
+ return rc;
+}
+
+#define CPT_WEIGHT_MIN 4u
+
+static unsigned int
+cfs_cpt_num_estimate(void)
+{
+ unsigned nnode = num_online_nodes();
+ unsigned ncpu = num_online_cpus();
+ unsigned ncpt;
+
+ if (ncpu <= CPT_WEIGHT_MIN) {
+ ncpt = 1;
+ goto out;
+ }
+
+ /* generate reasonable number of CPU partitions based on total number
+ * of CPUs, Preferred N should be power2 and match this condition:
+ * 2 * (N - 1)^2 < NCPUS <= 2 * N^2 */
+ for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1) {}
+
+ if (ncpt <= nnode) { /* fat numa system */
+ while (nnode > ncpt)
+ nnode >>= 1;
+
+ } else { /* ncpt > nnode */
+ while ((nnode << 1) <= ncpt)
+ nnode <<= 1;
+ }
+
+ ncpt = nnode;
+
+ out:
+#if (BITS_PER_LONG == 32)
+ /* config many CPU partitions on 32-bit system could consume
+ * too much memory */
+ ncpt = min(2U, ncpt);
+#endif
+ while (ncpu % ncpt != 0)
+ ncpt--; /* worst case is 1 */
+
+ return ncpt;
+}
+
+static struct cfs_cpt_table *
+cfs_cpt_table_create(int ncpt)
+{
+ struct cfs_cpt_table *cptab = NULL;
+ cpumask_t *mask = NULL;
+ int cpt = 0;
+ int num;
+ int rc;
+ int i;
+
+ rc = cfs_cpt_num_estimate();
+ if (ncpt <= 0)
+ ncpt = rc;
+
+ if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
+ CWARN("CPU partition number %d is larger than suggested "
+ "value (%d), your system may have performance"
+ "issue or run out of memory while under pressure\n",
+ ncpt, rc);
+ }
+
+ if (num_online_cpus() % ncpt != 0) {
+ CERROR("CPU number %d is not multiple of cpu_npartition %d, "
+ "please try different cpu_npartitions value or"
+ "set pattern string by cpu_pattern=STRING\n",
+ (int)num_online_cpus(), ncpt);
+ goto failed;
+ }
+
+ cptab = cfs_cpt_table_alloc(ncpt);
+ if (cptab == NULL) {
+ CERROR("Failed to allocate CPU map(%d)\n", ncpt);
+ goto failed;
+ }
+
+ num = num_online_cpus() / ncpt;
+ if (num == 0) {
+ CERROR("CPU changed while setting CPU partition\n");
+ goto failed;
+ }
+
+ LIBCFS_ALLOC(mask, cpumask_size());
+ if (mask == NULL) {
+ CERROR("Failed to allocate scratch cpumask\n");
+ goto failed;
+ }
+
+ for_each_online_node(i) {
+ cfs_node_to_cpumask(i, mask);
+
+ while (!cpus_empty(*mask)) {
+ struct cfs_cpu_partition *part;
+ int n;
+
+ if (cpt >= ncpt)
+ goto failed;
+
+ part = &cptab->ctb_parts[cpt];
+
+ n = num - cpus_weight(*part->cpt_cpumask);
+ LASSERT(n > 0);
+
+ rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
+ if (rc < 0)
+ goto failed;
+
+ LASSERT(num >= cpus_weight(*part->cpt_cpumask));
+ if (num == cpus_weight(*part->cpt_cpumask))
+ cpt++;
+ }
+ }
+
+ if (cpt != ncpt ||
+ num != cpus_weight(*cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
+ CERROR("Expect %d(%d) CPU partitions but got %d(%d), "
+ "CPU hotplug/unplug while setting?\n",
+ cptab->ctb_nparts, num, cpt,
+ cpus_weight(*cptab->ctb_parts[ncpt - 1].cpt_cpumask));
+ goto failed;
+ }
+
+ LIBCFS_FREE(mask, cpumask_size());
+
+ return cptab;
+
+ failed:
+ CERROR("Failed to setup CPU-partition-table with %d "
+ "CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
+ ncpt, num_online_nodes(), num_online_cpus());
+
+ if (mask != NULL)
+ LIBCFS_FREE(mask, cpumask_size());
+
+ if (cptab != NULL)
+ cfs_cpt_table_free(cptab);
+
+ return NULL;
+}
+
+static struct cfs_cpt_table *
+cfs_cpt_table_create_pattern(char *pattern)
+{
+ struct cfs_cpt_table *cptab;
+ char *str = pattern;
+ int node = 0;
+ int high;
+ int ncpt;
+ int c;
+
+ for (ncpt = 0;; ncpt++) { /* quick scan bracket */
+ str = strchr(str, '[');
+ if (str == NULL)
+ break;
+ str++;
+ }
+
+ str = cfs_trimwhite(pattern);
+ if (*str == 'n' || *str == 'N') {
+ pattern = str + 1;
+ node = 1;
+ }
+
+ if (ncpt == 0 ||
+ (node && ncpt > num_online_nodes()) ||
+ (!node && ncpt > num_online_cpus())) {
+ CERROR("Invalid pattern %s, or too many partitions %d\n",
+ pattern, ncpt);
+ return NULL;
+ }
+
+ high = node ? MAX_NUMNODES - 1 : NR_CPUS - 1;
+
+ cptab = cfs_cpt_table_alloc(ncpt);
+ if (cptab == NULL) {
+ CERROR("Failed to allocate cpu partition table\n");
+ return NULL;
+ }
+
+ for (str = cfs_trimwhite(pattern), c = 0;; c++) {
+ struct cfs_range_expr *range;
+ struct cfs_expr_list *el;
+ char *bracket = strchr(str, '[');
+ int cpt;
+ int rc;
+ int i;
+ int n;
+
+ if (bracket == NULL) {
+ if (*str != 0) {
+ CERROR("Invalid pattern %s\n", str);
+ goto failed;
+ } else if (c != ncpt) {
+ CERROR("expect %d partitions but found %d\n",
+ ncpt, c);
+ goto failed;
+ }
+ break;
+ }
+
+ if (sscanf(str, "%u%n", &cpt, &n) < 1) {
+ CERROR("Invalid cpu pattern %s\n", str);
+ goto failed;
+ }
+
+ if (cpt < 0 || cpt >= ncpt) {
+ CERROR("Invalid partition id %d, total partitions %d\n",
+ cpt, ncpt);
+ goto failed;
+ }
+
+ if (cfs_cpt_weight(cptab, cpt) != 0) {
+ CERROR("Partition %d has already been set.\n", cpt);
+ goto failed;
+ }
+
+ str = cfs_trimwhite(str + n);
+ if (str != bracket) {
+ CERROR("Invalid pattern %s\n", str);
+ goto failed;
+ }
+
+ bracket = strchr(str, ']');
+ if (bracket == NULL) {
+ CERROR("missing right bracket for cpt %d, %s\n",
+ cpt, str);
+ goto failed;
+ }
+
+ if (cfs_expr_list_parse(str, (bracket - str) + 1,
+ 0, high, &el) != 0) {
+ CERROR("Can't parse number range: %s\n", str);
+ goto failed;
+ }
+
+ list_for_each_entry(range, &el->el_exprs, re_link) {
+ for (i = range->re_lo; i <= range->re_hi; i++) {
+ if ((i - range->re_lo) % range->re_stride != 0)
+ continue;
+
+ rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
+ cfs_cpt_set_cpu(cptab, cpt, i);
+ if (!rc) {
+ cfs_expr_list_free(el);
+ goto failed;
+ }
+ }
+ }
+
+ cfs_expr_list_free(el);
+
+ if (!cfs_cpt_online(cptab, cpt)) {
+ CERROR("No online CPU is found on partition %d\n", cpt);
+ goto failed;
+ }
+
+ str = cfs_trimwhite(bracket + 1);
+ }
+
+ return cptab;
+
+ failed:
+ cfs_cpt_table_free(cptab);
+ return NULL;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int
+cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ switch (action) {
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ spin_lock(&cpt_data.cpt_lock);
+ cpt_data.cpt_version++;
+ spin_unlock(&cpt_data.cpt_lock);
+ default:
+ CWARN("Lustre: can't support CPU hotplug well now, "
+ "performance and stability could be impacted"
+ "[CPU %u notify: %lx]\n", cpu, action);
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block cfs_cpu_notifier = {
+ .notifier_call = cfs_cpu_notify,
+ .priority = 0
+};
+
+#endif
+
+void
+cfs_cpu_fini(void)
+{
+ if (cfs_cpt_table != NULL)
+ cfs_cpt_table_free(cfs_cpt_table);
+
+#ifdef CONFIG_HOTPLUG_CPU
+ unregister_hotcpu_notifier(&cfs_cpu_notifier);
+#endif
+ if (cpt_data.cpt_cpumask != NULL)
+ LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size());
+}
+
+int
+cfs_cpu_init(void)
+{
+ LASSERT(cfs_cpt_table == NULL);
+
+ memset(&cpt_data, 0, sizeof(cpt_data));
+
+ LIBCFS_ALLOC(cpt_data.cpt_cpumask, cpumask_size());
+ if (cpt_data.cpt_cpumask == NULL) {
+ CERROR("Failed to allocate scratch buffer\n");
+ return -1;
+ }
+
+ spin_lock_init(&cpt_data.cpt_lock);
+ sema_init(&cpt_data.cpt_mutex, 1);
+
+#ifdef CONFIG_HOTPLUG_CPU
+ register_hotcpu_notifier(&cfs_cpu_notifier);
+#endif
+
+ if (*cpu_pattern != 0) {
+ cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern);
+ if (cfs_cpt_table == NULL) {
+ CERROR("Failed to create cptab from pattern %s\n",
+ cpu_pattern);
+ goto failed;
+ }
+
+ } else {
+ cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
+ if (cfs_cpt_table == NULL) {
+ CERROR("Failed to create ptable with npartitions %d\n",
+ cpu_npartitions);
+ goto failed;
+ }
+ }
+
+ spin_lock(&cpt_data.cpt_lock);
+ if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) {
+ spin_unlock(&cpt_data.cpt_lock);
+ CERROR("CPU hotplug/unplug during setup\n");
+ goto failed;
+ }
+ spin_unlock(&cpt_data.cpt_lock);
+
+ LCONSOLE(0, "HW CPU cores: %d, npartitions: %d\n",
+ num_online_cpus(), cfs_cpt_number(cfs_cpt_table));
+ return 0;
+
+ failed:
+ cfs_cpu_fini();
+ return -1;
+}
+
+#endif
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto-adler.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto-adler.c
new file mode 100644
index 000000000000..20b2d61d9ff2
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto-adler.c
@@ -0,0 +1,144 @@
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ */
+
+/*
+ * This is crypto api shash wrappers to zlib_adler32.
+ */
+
+#include <linux/module.h>
+#include <linux/zutil.h>
+#include <crypto/internal/hash.h>
+
+
+#define CHKSUM_BLOCK_SIZE 1
+#define CHKSUM_DIGEST_SIZE 4
+
+
+static u32 __adler32(u32 cksum, unsigned char const *p, size_t len)
+{
+ return zlib_adler32(cksum, p, len);
+}
+
+static int adler32_cra_init(struct crypto_tfm *tfm)
+{
+ u32 *key = crypto_tfm_ctx(tfm);
+
+ *key = 1;
+
+ return 0;
+}
+
+static int adler32_setkey(struct crypto_shash *hash, const u8 *key,
+ unsigned int keylen)
+{
+ u32 *mctx = crypto_shash_ctx(hash);
+
+ if (keylen != sizeof(u32)) {
+ crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+ *mctx = *(u32 *)key;
+ return 0;
+}
+
+static int adler32_init(struct shash_desc *desc)
+{
+ u32 *mctx = crypto_shash_ctx(desc->tfm);
+ u32 *cksump = shash_desc_ctx(desc);
+
+ *cksump = *mctx;
+
+ return 0;
+}
+
+static int adler32_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ u32 *cksump = shash_desc_ctx(desc);
+
+ *cksump = __adler32(*cksump, data, len);
+ return 0;
+}
+static int __adler32_finup(u32 *cksump, const u8 *data, unsigned int len,
+ u8 *out)
+{
+ *(u32 *)out = __adler32(*cksump, data, len);
+ return 0;
+}
+
+static int adler32_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ return __adler32_finup(shash_desc_ctx(desc), data, len, out);
+}
+
+static int adler32_final(struct shash_desc *desc, u8 *out)
+{
+ u32 *cksump = shash_desc_ctx(desc);
+
+ *(u32 *)out = *cksump;
+ return 0;
+}
+
+static int adler32_digest(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ return __adler32_finup(crypto_shash_ctx(desc->tfm), data, len,
+ out);
+}
+static struct shash_alg alg = {
+ .setkey = adler32_setkey,
+ .init = adler32_init,
+ .update = adler32_update,
+ .final = adler32_final,
+ .finup = adler32_finup,
+ .digest = adler32_digest,
+ .descsize = sizeof(u32),
+ .digestsize = CHKSUM_DIGEST_SIZE,
+ .base = {
+ .cra_name = "adler32",
+ .cra_driver_name = "adler32-zlib",
+ .cra_priority = 100,
+ .cra_blocksize = CHKSUM_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(u32),
+ .cra_module = THIS_MODULE,
+ .cra_init = adler32_cra_init,
+ }
+};
+
+
+int cfs_crypto_adler32_register(void)
+{
+ return crypto_register_shash(&alg);
+}
+EXPORT_SYMBOL(cfs_crypto_adler32_register);
+
+void cfs_crypto_adler32_unregister(void)
+{
+ crypto_unregister_shash(&alg);
+}
+EXPORT_SYMBOL(cfs_crypto_adler32_unregister);
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.c
new file mode 100644
index 000000000000..8e35777b4da5
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.c
@@ -0,0 +1,289 @@
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <linux/libcfs/libcfs.h>
+#include <linux/libcfs/linux/linux-crypto.h>
+/**
+ * Array of hash algorithm speed in MByte per second
+ */
+static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX];
+
+
+
+static int cfs_crypto_hash_alloc(unsigned char alg_id,
+ const struct cfs_crypto_hash_type **type,
+ struct hash_desc *desc, unsigned char *key,
+ unsigned int key_len)
+{
+ int err = 0;
+
+ *type = cfs_crypto_hash_type(alg_id);
+
+ if (*type == NULL) {
+ CWARN("Unsupported hash algorithm id = %d, max id is %d\n",
+ alg_id, CFS_HASH_ALG_MAX);
+ return -EINVAL;
+ }
+ desc->tfm = crypto_alloc_hash((*type)->cht_name, 0, 0);
+
+ if (desc->tfm == NULL)
+ return -EINVAL;
+
+ if (IS_ERR(desc->tfm)) {
+ CDEBUG(D_INFO, "Failed to alloc crypto hash %s\n",
+ (*type)->cht_name);
+ return PTR_ERR(desc->tfm);
+ }
+
+ desc->flags = 0;
+
+ /** Shash have different logic for initialization then digest
+ * shash: crypto_hash_setkey, crypto_hash_init
+ * digest: crypto_digest_init, crypto_digest_setkey
+ * Skip this function for digest, because we use shash logic at
+ * cfs_crypto_hash_alloc.
+ */
+ if (key != NULL) {
+ err = crypto_hash_setkey(desc->tfm, key, key_len);
+ } else if ((*type)->cht_key != 0) {
+ err = crypto_hash_setkey(desc->tfm,
+ (unsigned char *)&((*type)->cht_key),
+ (*type)->cht_size);
+ }
+
+ if (err != 0) {
+ crypto_free_hash(desc->tfm);
+ return err;
+ }
+
+ CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n",
+ (crypto_hash_tfm(desc->tfm))->__crt_alg->cra_name,
+ (crypto_hash_tfm(desc->tfm))->__crt_alg->cra_driver_name,
+ cfs_crypto_hash_speeds[alg_id]);
+
+ return crypto_hash_init(desc);
+}
+
+int cfs_crypto_hash_digest(unsigned char alg_id,
+ const void *buf, unsigned int buf_len,
+ unsigned char *key, unsigned int key_len,
+ unsigned char *hash, unsigned int *hash_len)
+{
+ struct scatterlist sl;
+ struct hash_desc hdesc;
+ int err;
+ const struct cfs_crypto_hash_type *type;
+
+ if (buf == NULL || buf_len == 0 || hash_len == NULL)
+ return -EINVAL;
+
+ err = cfs_crypto_hash_alloc(alg_id, &type, &hdesc, key, key_len);
+ if (err != 0)
+ return err;
+
+ if (hash == NULL || *hash_len < type->cht_size) {
+ *hash_len = type->cht_size;
+ crypto_free_hash(hdesc.tfm);
+ return -ENOSPC;
+ }
+ sg_init_one(&sl, (void *)buf, buf_len);
+
+ hdesc.flags = 0;
+ err = crypto_hash_digest(&hdesc, &sl, sl.length, hash);
+ crypto_free_hash(hdesc.tfm);
+
+ return err;
+}
+EXPORT_SYMBOL(cfs_crypto_hash_digest);
+
+struct cfs_crypto_hash_desc *
+ cfs_crypto_hash_init(unsigned char alg_id,
+ unsigned char *key, unsigned int key_len)
+{
+
+ struct hash_desc *hdesc;
+ int err;
+ const struct cfs_crypto_hash_type *type;
+
+ hdesc = kmalloc(sizeof(*hdesc), 0);
+ if (hdesc == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ err = cfs_crypto_hash_alloc(alg_id, &type, hdesc, key, key_len);
+
+ if (err) {
+ kfree(hdesc);
+ return ERR_PTR(err);
+ }
+ return (struct cfs_crypto_hash_desc *)hdesc;
+}
+EXPORT_SYMBOL(cfs_crypto_hash_init);
+
+int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *hdesc,
+ struct page *page, unsigned int offset,
+ unsigned int len)
+{
+ struct scatterlist sl;
+
+ sg_init_table(&sl, 1);
+ sg_set_page(&sl, page, len, offset & ~CFS_PAGE_MASK);
+
+ return crypto_hash_update((struct hash_desc *)hdesc, &sl, sl.length);
+}
+EXPORT_SYMBOL(cfs_crypto_hash_update_page);
+
+int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *hdesc,
+ const void *buf, unsigned int buf_len)
+{
+ struct scatterlist sl;
+
+ sg_init_one(&sl, (void *)buf, buf_len);
+
+ return crypto_hash_update((struct hash_desc *)hdesc, &sl, sl.length);
+}
+EXPORT_SYMBOL(cfs_crypto_hash_update);
+
+/* If hash_len pointer is NULL - destroy descriptor. */
+int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *hdesc,
+ unsigned char *hash, unsigned int *hash_len)
+{
+ int err;
+ int size = crypto_hash_digestsize(((struct hash_desc *)hdesc)->tfm);
+
+ if (hash_len == NULL) {
+ crypto_free_hash(((struct hash_desc *)hdesc)->tfm);
+ kfree(hdesc);
+ return 0;
+ }
+ if (hash == NULL || *hash_len < size) {
+ *hash_len = size;
+ return -ENOSPC;
+ }
+ err = crypto_hash_final((struct hash_desc *) hdesc, hash);
+
+ if (err < 0) {
+ /* May be caller can fix error */
+ return err;
+ }
+ crypto_free_hash(((struct hash_desc *)hdesc)->tfm);
+ kfree(hdesc);
+ return err;
+}
+EXPORT_SYMBOL(cfs_crypto_hash_final);
+
+static void cfs_crypto_performance_test(unsigned char alg_id,
+ const unsigned char *buf,
+ unsigned int buf_len)
+{
+ unsigned long start, end;
+ int bcount, err = 0;
+ int sec = 1; /* do test only 1 sec */
+ unsigned char hash[64];
+ unsigned int hash_len = 64;
+
+ for (start = jiffies, end = start + sec * HZ, bcount = 0;
+ time_before(jiffies, end); bcount++) {
+ err = cfs_crypto_hash_digest(alg_id, buf, buf_len, NULL, 0,
+ hash, &hash_len);
+ if (err)
+ break;
+
+ }
+ end = jiffies;
+
+ if (err) {
+ cfs_crypto_hash_speeds[alg_id] = -1;
+ CDEBUG(D_INFO, "Crypto hash algorithm %s, err = %d\n",
+ cfs_crypto_hash_name(alg_id), err);
+ } else {
+ unsigned long tmp;
+ tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) *
+ 1000) / (1024 * 1024);
+ cfs_crypto_hash_speeds[alg_id] = (int)tmp;
+ }
+ CDEBUG(D_INFO, "Crypto hash algorithm %s speed = %d MB/s\n",
+ cfs_crypto_hash_name(alg_id), cfs_crypto_hash_speeds[alg_id]);
+}
+
+int cfs_crypto_hash_speed(unsigned char hash_alg)
+{
+ if (hash_alg < CFS_HASH_ALG_MAX)
+ return cfs_crypto_hash_speeds[hash_alg];
+ else
+ return -1;
+}
+EXPORT_SYMBOL(cfs_crypto_hash_speed);
+
+/**
+ * Do performance test for all hash algorithms.
+ */
+static int cfs_crypto_test_hashes(void)
+{
+ unsigned char i;
+ unsigned char *data;
+ unsigned int j;
+ /* Data block size for testing hash. Maximum
+ * kmalloc size for 2.6.18 kernel is 128K */
+ unsigned int data_len = 1 * 128 * 1024;
+
+ data = kmalloc(data_len, 0);
+ if (data == NULL) {
+ CERROR("Failed to allocate mem\n");
+ return -ENOMEM;
+ }
+
+ for (j = 0; j < data_len; j++)
+ data[j] = j & 0xff;
+
+ for (i = 0; i < CFS_HASH_ALG_MAX; i++)
+ cfs_crypto_performance_test(i, data, data_len);
+
+ kfree(data);
+ return 0;
+}
+
+static int adler32;
+
+int cfs_crypto_register(void)
+{
+ adler32 = cfs_crypto_adler32_register();
+
+ /* check all algorithms and do performance test */
+ cfs_crypto_test_hashes();
+ return 0;
+}
+void cfs_crypto_unregister(void)
+{
+ if (adler32 == 0)
+ cfs_crypto_adler32_unregister();
+
+ return;
+}
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-curproc.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-curproc.c
new file mode 100644
index 000000000000..f236510a2f3f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-curproc.c
@@ -0,0 +1,339 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/linux/linux-curproc.c
+ *
+ * Lustre curproc API implementation for Linux kernel
+ *
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/fs_struct.h>
+
+#include <linux/compat.h>
+#include <linux/thread_info.h>
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+
+/*
+ * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h)
+ * for Linux kernel.
+ */
+
+int cfs_curproc_groups_nr(void)
+{
+ int nr;
+
+ task_lock(current);
+ nr = current_cred()->group_info->ngroups;
+ task_unlock(current);
+ return nr;
+}
+
+void cfs_curproc_groups_dump(gid_t *array, int size)
+{
+ task_lock(current);
+ size = min_t(int, size, current_cred()->group_info->ngroups);
+ memcpy(array, current_cred()->group_info->blocks[0], size * sizeof(__u32));
+ task_unlock(current);
+}
+
+
+int current_is_in_group(gid_t gid)
+{
+ return in_group_p(gid);
+}
+
+/* Currently all the CFS_CAP_* defines match CAP_* ones. */
+#define cfs_cap_pack(cap) (cap)
+#define cfs_cap_unpack(cap) (cap)
+
+void cfs_cap_raise(cfs_cap_t cap)
+{
+ struct cred *cred;
+ if ((cred = prepare_creds())) {
+ cap_raise(cred->cap_effective, cfs_cap_unpack(cap));
+ commit_creds(cred);
+ }
+}
+
+void cfs_cap_lower(cfs_cap_t cap)
+{
+ struct cred *cred;
+ if ((cred = prepare_creds())) {
+ cap_lower(cred->cap_effective, cfs_cap_unpack(cap));
+ commit_creds(cred);
+ }
+}
+
+int cfs_cap_raised(cfs_cap_t cap)
+{
+ return cap_raised(current_cap(), cfs_cap_unpack(cap));
+}
+
+void cfs_kernel_cap_pack(kernel_cap_t kcap, cfs_cap_t *cap)
+{
+#if defined (_LINUX_CAPABILITY_VERSION) && _LINUX_CAPABILITY_VERSION == 0x19980330
+ *cap = cfs_cap_pack(kcap);
+#elif defined (_LINUX_CAPABILITY_VERSION) && _LINUX_CAPABILITY_VERSION == 0x20071026
+ *cap = cfs_cap_pack(kcap[0]);
+#elif defined(_KERNEL_CAPABILITY_VERSION) && _KERNEL_CAPABILITY_VERSION == 0x20080522
+ /* XXX lost high byte */
+ *cap = cfs_cap_pack(kcap.cap[0]);
+#else
+ #error "need correct _KERNEL_CAPABILITY_VERSION "
+#endif
+}
+
+void cfs_kernel_cap_unpack(kernel_cap_t *kcap, cfs_cap_t cap)
+{
+#if defined (_LINUX_CAPABILITY_VERSION) && _LINUX_CAPABILITY_VERSION == 0x19980330
+ *kcap = cfs_cap_unpack(cap);
+#elif defined (_LINUX_CAPABILITY_VERSION) && _LINUX_CAPABILITY_VERSION == 0x20071026
+ (*kcap)[0] = cfs_cap_unpack(cap);
+#elif defined(_KERNEL_CAPABILITY_VERSION) && _KERNEL_CAPABILITY_VERSION == 0x20080522
+ kcap->cap[0] = cfs_cap_unpack(cap);
+#else
+ #error "need correct _KERNEL_CAPABILITY_VERSION "
+#endif
+}
+
+cfs_cap_t cfs_curproc_cap_pack(void)
+{
+ cfs_cap_t cap;
+ cfs_kernel_cap_pack(current_cap(), &cap);
+ return cap;
+}
+
+void cfs_curproc_cap_unpack(cfs_cap_t cap)
+{
+ struct cred *cred;
+ if ((cred = prepare_creds())) {
+ cfs_kernel_cap_unpack(&cred->cap_effective, cap);
+ commit_creds(cred);
+ }
+}
+
+int cfs_capable(cfs_cap_t cap)
+{
+ return capable(cfs_cap_unpack(cap));
+}
+
+/* Check if task is running in 32-bit API mode, for the purpose of
+ * userspace binary interfaces. On 32-bit Linux this is (unfortunately)
+ * always true, even if the application is using LARGEFILE64 and 64-bit
+ * APIs, because Linux provides no way for the filesystem to know if it
+ * is called via 32-bit or 64-bit APIs. Other clients may vary. On
+ * 64-bit systems, this will only be true if the binary is calling a
+ * 32-bit system call. */
+int current_is_32bit(void)
+{
+ return is_compat_task();
+}
+
+static int cfs_access_process_vm(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, int write)
+{
+ /* Just copied from kernel for the kernels which doesn't
+ * have access_process_vm() exported */
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ struct page *page;
+ void *old_buf = buf;
+
+ mm = get_task_mm(tsk);
+ if (!mm)
+ return 0;
+
+ down_read(&mm->mmap_sem);
+ /* ignore errors, just check how much was sucessfully transfered */
+ while (len) {
+ int bytes, rc, offset;
+ void *maddr;
+
+ rc = get_user_pages(tsk, mm, addr, 1,
+ write, 1, &page, &vma);
+ if (rc <= 0)
+ break;
+
+ bytes = len;
+ offset = addr & (PAGE_SIZE-1);
+ if (bytes > PAGE_SIZE-offset)
+ bytes = PAGE_SIZE-offset;
+
+ maddr = kmap(page);
+ if (write) {
+ copy_to_user_page(vma, page, addr,
+ maddr + offset, buf, bytes);
+ set_page_dirty_lock(page);
+ } else {
+ copy_from_user_page(vma, page, addr,
+ buf, maddr + offset, bytes);
+ }
+ kunmap(page);
+ page_cache_release(page);
+ len -= bytes;
+ buf += bytes;
+ addr += bytes;
+ }
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+
+ return buf - old_buf;
+}
+
+/* Read the environment variable of current process specified by @key. */
+int cfs_get_environ(const char *key, char *value, int *val_len)
+{
+ struct mm_struct *mm;
+ char *buffer, *tmp_buf = NULL;
+ int buf_len = PAGE_CACHE_SIZE;
+ int key_len = strlen(key);
+ unsigned long addr;
+ int rc;
+ ENTRY;
+
+ buffer = kmalloc(buf_len, GFP_USER);
+ if (!buffer)
+ RETURN(-ENOMEM);
+
+ mm = get_task_mm(current);
+ if (!mm) {
+ kfree(buffer);
+ RETURN(-EINVAL);
+ }
+
+ /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(),
+ * which is already holding mmap_sem for writes. If some other
+ * thread gets the write lock in the meantime, this thread will
+ * block, but at least it won't deadlock on itself. LU-1735 */
+ if (down_read_trylock(&mm->mmap_sem) == 0)
+ return -EDEADLK;
+ up_read(&mm->mmap_sem);
+
+ addr = mm->env_start;
+ while (addr < mm->env_end) {
+ int this_len, retval, scan_len;
+ char *env_start, *env_end;
+
+ memset(buffer, 0, buf_len);
+
+ this_len = min_t(int, mm->env_end - addr, buf_len);
+ retval = cfs_access_process_vm(current, addr, buffer,
+ this_len, 0);
+ if (retval != this_len)
+ break;
+
+ addr += retval;
+
+ /* Parse the buffer to find out the specified key/value pair.
+ * The "key=value" entries are separated by '\0'. */
+ env_start = buffer;
+ scan_len = this_len;
+ while (scan_len) {
+ char *entry;
+ int entry_len;
+
+ env_end = memscan(env_start, '\0', scan_len);
+ LASSERT(env_end >= env_start &&
+ env_end <= env_start + scan_len);
+
+ /* The last entry of this buffer cross the buffer
+ * boundary, reread it in next cycle. */
+ if (unlikely(env_end - env_start == scan_len)) {
+ /* This entry is too large to fit in buffer */
+ if (unlikely(scan_len == this_len)) {
+ CERROR("Too long env variable.\n");
+ GOTO(out, rc = -EINVAL);
+ }
+ addr -= scan_len;
+ break;
+ }
+
+ entry = env_start;
+ entry_len = env_end - env_start;
+
+ /* Key length + length of '=' */
+ if (entry_len > key_len + 1 &&
+ !memcmp(entry, key, key_len)) {
+ entry += key_len + 1;
+ entry_len -= key_len + 1;
+ /* The 'value' buffer passed in is too small.*/
+ if (entry_len >= *val_len)
+ GOTO(out, rc = -EOVERFLOW);
+
+ memcpy(value, entry, entry_len);
+ *val_len = entry_len;
+ GOTO(out, rc = 0);
+ }
+
+ scan_len -= (env_end - env_start + 1);
+ env_start = env_end + 1;
+ }
+ }
+ GOTO(out, rc = -ENOENT);
+
+out:
+ mmput(mm);
+ kfree((void *)buffer);
+ if (tmp_buf)
+ kfree((void *)tmp_buf);
+ return rc;
+}
+EXPORT_SYMBOL(cfs_get_environ);
+
+EXPORT_SYMBOL(cfs_curproc_groups_nr);
+EXPORT_SYMBOL(cfs_curproc_groups_dump);
+EXPORT_SYMBOL(current_is_in_group);
+EXPORT_SYMBOL(cfs_cap_raise);
+EXPORT_SYMBOL(cfs_cap_lower);
+EXPORT_SYMBOL(cfs_cap_raised);
+EXPORT_SYMBOL(cfs_curproc_cap_pack);
+EXPORT_SYMBOL(cfs_curproc_cap_unpack);
+EXPORT_SYMBOL(cfs_capable);
+EXPORT_SYMBOL(current_is_32bit);
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-debug.c
new file mode 100644
index 000000000000..e2c195b8dd53
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-debug.c
@@ -0,0 +1,264 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/linux/linux-debug.c
+ *
+ * Author: Phil Schwan <phil@clusterfs.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/interrupt.h>
+#include <asm/uaccess.h>
+#include <linux/completion.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <linux/miscdevice.h>
+#include <linux/version.h>
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/libcfs/linux/portals_compat25.h>
+
+#include "tracefile.h"
+
+#include <linux/kallsyms.h>
+
+char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall";
+char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall";
+
+/**
+ * Upcall function once a Lustre log has been dumped.
+ *
+ * \param file path of the dumped log
+ */
+void libcfs_run_debug_log_upcall(char *file)
+{
+ char *argv[3];
+ int rc;
+ char *envp[] = {
+ "HOME=/",
+ "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
+ NULL};
+ ENTRY;
+
+ argv[0] = lnet_debug_log_upcall;
+
+ LASSERTF(file != NULL, "called on a null filename\n");
+ argv[1] = file; //only need to pass the path of the file
+
+ argv[2] = NULL;
+
+ rc = USERMODEHELPER(argv[0], argv, envp);
+ if (rc < 0 && rc != -ENOENT) {
+ CERROR("Error %d invoking LNET debug log upcall %s %s; "
+ "check /proc/sys/lnet/debug_log_upcall\n",
+ rc, argv[0], argv[1]);
+ } else {
+ CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n",
+ argv[0], argv[1]);
+ }
+
+ EXIT;
+}
+
+void libcfs_run_upcall(char **argv)
+{
+ int rc;
+ int argc;
+ char *envp[] = {
+ "HOME=/",
+ "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
+ NULL};
+ ENTRY;
+
+ argv[0] = lnet_upcall;
+ argc = 1;
+ while (argv[argc] != NULL)
+ argc++;
+
+ LASSERT(argc >= 2);
+
+ rc = USERMODEHELPER(argv[0], argv, envp);
+ if (rc < 0 && rc != -ENOENT) {
+ CERROR("Error %d invoking LNET upcall %s %s%s%s%s%s%s%s%s; "
+ "check /proc/sys/lnet/upcall\n",
+ rc, argv[0], argv[1],
+ argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
+ argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
+ argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
+ argc < 6 ? "" : ",...");
+ } else {
+ CDEBUG(D_HA, "Invoked LNET upcall %s %s%s%s%s%s%s%s%s\n",
+ argv[0], argv[1],
+ argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
+ argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
+ argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
+ argc < 6 ? "" : ",...");
+ }
+}
+
+void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *msgdata)
+{
+ char *argv[6];
+ char buf[32];
+
+ ENTRY;
+ snprintf (buf, sizeof buf, "%d", msgdata->msg_line);
+
+ argv[1] = "LBUG";
+ argv[2] = (char *)msgdata->msg_file;
+ argv[3] = (char *)msgdata->msg_fn;
+ argv[4] = buf;
+ argv[5] = NULL;
+
+ libcfs_run_upcall (argv);
+}
+
+/* coverity[+kill] */
+void lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
+{
+ libcfs_catastrophe = 1;
+ libcfs_debug_msg(msgdata, "LBUG\n");
+
+ if (in_interrupt()) {
+ panic("LBUG in interrupt.\n");
+ /* not reached */
+ }
+
+ libcfs_debug_dumpstack(NULL);
+ if (!libcfs_panic_on_lbug)
+ libcfs_debug_dumplog();
+ libcfs_run_lbug_upcall(msgdata);
+ if (libcfs_panic_on_lbug)
+ panic("LBUG");
+ set_task_state(current, TASK_UNINTERRUPTIBLE);
+ while (1)
+ schedule();
+}
+
+
+#include <linux/nmi.h>
+#include <asm/stacktrace.h>
+
+
+static int print_trace_stack(void *data, char *name)
+{
+ printk(" <%s> ", name);
+ return 0;
+}
+
+# define RELIABLE reliable
+# define DUMP_TRACE_CONST const
+static void print_trace_address(void *data, unsigned long addr, int reliable)
+{
+ char fmt[32];
+ touch_nmi_watchdog();
+ sprintf(fmt, " [<%016lx>] %s%%s\n", addr, RELIABLE ? "": "? ");
+ __print_symbol(fmt, addr);
+}
+
+static DUMP_TRACE_CONST struct stacktrace_ops print_trace_ops = {
+ .stack = print_trace_stack,
+ .address = print_trace_address,
+ .walk_stack = print_context_stack,
+};
+
+void libcfs_debug_dumpstack(struct task_struct *tsk)
+{
+ /* dump_stack() */
+ /* show_trace() */
+ if (tsk == NULL)
+ tsk = current;
+ printk("Pid: %d, comm: %.20s\n", tsk->pid, tsk->comm);
+ /* show_trace_log_lvl() */
+ printk("\nCall Trace:\n");
+ dump_trace(tsk, NULL, NULL,
+ 0,
+ &print_trace_ops, NULL);
+ printk("\n");
+}
+
+task_t *libcfs_current(void)
+{
+ CWARN("current task struct is %p\n", current);
+ return current;
+}
+
+static int panic_notifier(struct notifier_block *self, unsigned long unused1,
+ void *unused2)
+{
+ if (libcfs_panic_in_progress)
+ return 0;
+
+ libcfs_panic_in_progress = 1;
+ mb();
+
+ return 0;
+}
+
+static struct notifier_block libcfs_panic_notifier = {
+ notifier_call : panic_notifier,
+ next : NULL,
+ priority : 10000
+};
+
+void libcfs_register_panic_notifier(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list, &libcfs_panic_notifier);
+}
+
+void libcfs_unregister_panic_notifier(void)
+{
+ atomic_notifier_chain_unregister(&panic_notifier_list, &libcfs_panic_notifier);
+}
+
+EXPORT_SYMBOL(libcfs_debug_dumpstack);
+EXPORT_SYMBOL(libcfs_current);
+
+
+EXPORT_SYMBOL(libcfs_run_upcall);
+EXPORT_SYMBOL(libcfs_run_lbug_upcall);
+EXPORT_SYMBOL(lbug_with_loc);
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-module.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-module.c
new file mode 100644
index 000000000000..2c7d4a3d660f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-module.c
@@ -0,0 +1,183 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+
+#define LNET_MINOR 240
+
+int libcfs_ioctl_getdata(char *buf, char *end, void *arg)
+{
+ struct libcfs_ioctl_hdr *hdr;
+ struct libcfs_ioctl_data *data;
+ int err;
+ ENTRY;
+
+ hdr = (struct libcfs_ioctl_hdr *)buf;
+ data = (struct libcfs_ioctl_data *)buf;
+
+ err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
+ if (err)
+ RETURN(err);
+
+ if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) {
+ CERROR("PORTALS: version mismatch kernel vs application\n");
+ RETURN(-EINVAL);
+ }
+
+ if (hdr->ioc_len + buf >= end) {
+ CERROR("PORTALS: user buffer exceeds kernel buffer\n");
+ RETURN(-EINVAL);
+ }
+
+
+ if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) {
+ CERROR("PORTALS: user buffer too small for ioctl\n");
+ RETURN(-EINVAL);
+ }
+
+ err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
+ if (err)
+ RETURN(err);
+
+ if (libcfs_ioctl_is_invalid(data)) {
+ CERROR("PORTALS: ioctl not correctly formatted\n");
+ RETURN(-EINVAL);
+ }
+
+ if (data->ioc_inllen1)
+ data->ioc_inlbuf1 = &data->ioc_bulk[0];
+
+ if (data->ioc_inllen2)
+ data->ioc_inlbuf2 = &data->ioc_bulk[0] +
+ cfs_size_round(data->ioc_inllen1);
+
+ RETURN(0);
+}
+
+int libcfs_ioctl_popdata(void *arg, void *data, int size)
+{
+ if (copy_to_user((char *)arg, data, size))
+ return -EFAULT;
+ return 0;
+}
+
+extern struct cfs_psdev_ops libcfs_psdev_ops;
+
+static int
+libcfs_psdev_open(struct inode * inode, struct file * file)
+{
+ struct libcfs_device_userstate **pdu = NULL;
+ int rc = 0;
+
+ if (!inode)
+ return (-EINVAL);
+ pdu = (struct libcfs_device_userstate **)&file->private_data;
+ if (libcfs_psdev_ops.p_open != NULL)
+ rc = libcfs_psdev_ops.p_open(0, (void *)pdu);
+ else
+ return (-EPERM);
+ return rc;
+}
+
+/* called when closing /dev/device */
+static int
+libcfs_psdev_release(struct inode * inode, struct file * file)
+{
+ struct libcfs_device_userstate *pdu;
+ int rc = 0;
+
+ if (!inode)
+ return (-EINVAL);
+ pdu = file->private_data;
+ if (libcfs_psdev_ops.p_close != NULL)
+ rc = libcfs_psdev_ops.p_close(0, (void *)pdu);
+ else
+ rc = -EPERM;
+ return rc;
+}
+
+static long libcfs_ioctl(struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct cfs_psdev_file pfile;
+ int rc = 0;
+
+ if (current_fsuid() != 0)
+ return -EACCES;
+
+ if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
+ _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR ||
+ _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) {
+ CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
+ _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
+ return (-EINVAL);
+ }
+
+ /* Handle platform-dependent IOC requests */
+ switch (cmd) {
+ case IOC_LIBCFS_PANIC:
+ if (!cfs_capable(CFS_CAP_SYS_BOOT))
+ return (-EPERM);
+ panic("debugctl-invoked panic");
+ return (0);
+ case IOC_LIBCFS_MEMHOG:
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ return -EPERM;
+ /* go thought */
+ }
+
+ pfile.off = 0;
+ pfile.private_data = file->private_data;
+ if (libcfs_psdev_ops.p_ioctl != NULL)
+ rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg);
+ else
+ rc = -EPERM;
+ return (rc);
+}
+
+static struct file_operations libcfs_fops = {
+ unlocked_ioctl: libcfs_ioctl,
+ open : libcfs_psdev_open,
+ release : libcfs_psdev_release
+};
+
+psdev_t libcfs_dev = {
+ LNET_MINOR,
+ "lnet",
+ &libcfs_fops
+};
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-prim.c
new file mode 100644
index 000000000000..b652a79a4811
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-prim.c
@@ -0,0 +1,259 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs_struct.h>
+#include <linux/sched.h>
+
+#include <linux/libcfs/libcfs.h>
+
+#if defined(CONFIG_KGDB)
+#include <asm/kgdb.h>
+#endif
+
+#define LINUX_WAITQ(w) ((wait_queue_t *) w)
+#define LINUX_WAITQ_HEAD(w) ((wait_queue_head_t *) w)
+
+void
+init_waitqueue_entry_current(wait_queue_t *link)
+{
+ init_waitqueue_entry(LINUX_WAITQ(link), current);
+}
+EXPORT_SYMBOL(init_waitqueue_entry_current);
+
+/**
+ * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively
+ * waiting threads, which is not always desirable because all threads will
+ * be waken up again and again, even user only needs a few of them to be
+ * active most time. This is not good for performance because cache can
+ * be polluted by different threads.
+ *
+ * LIFO list can resolve this problem because we always wakeup the most
+ * recent active thread by default.
+ *
+ * NB: please don't call non-exclusive & exclusive wait on the same
+ * waitq if add_wait_queue_exclusive_head is used.
+ */
+void
+add_wait_queue_exclusive_head(wait_queue_head_t *waitq, wait_queue_t *link)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&LINUX_WAITQ_HEAD(waitq)->lock, flags);
+ __add_wait_queue_exclusive(LINUX_WAITQ_HEAD(waitq), LINUX_WAITQ(link));
+ spin_unlock_irqrestore(&LINUX_WAITQ_HEAD(waitq)->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue_exclusive_head);
+
+void
+waitq_wait(wait_queue_t *link, cfs_task_state_t state)
+{
+ schedule();
+}
+EXPORT_SYMBOL(waitq_wait);
+
+int64_t
+waitq_timedwait(wait_queue_t *link, cfs_task_state_t state,
+ int64_t timeout)
+{
+ return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(waitq_timedwait);
+
+void
+schedule_timeout_and_set_state(cfs_task_state_t state, int64_t timeout)
+{
+ set_current_state(state);
+ schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_and_set_state);
+
+/* deschedule for a bit... */
+void
+cfs_pause(cfs_duration_t ticks)
+{
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(ticks);
+}
+EXPORT_SYMBOL(cfs_pause);
+
+void cfs_init_timer(timer_list_t *t)
+{
+ init_timer(t);
+}
+EXPORT_SYMBOL(cfs_init_timer);
+
+void cfs_timer_init(timer_list_t *t, cfs_timer_func_t *func, void *arg)
+{
+ init_timer(t);
+ t->function = func;
+ t->data = (unsigned long)arg;
+}
+EXPORT_SYMBOL(cfs_timer_init);
+
+void cfs_timer_done(timer_list_t *t)
+{
+ return;
+}
+EXPORT_SYMBOL(cfs_timer_done);
+
+void cfs_timer_arm(timer_list_t *t, cfs_time_t deadline)
+{
+ mod_timer(t, deadline);
+}
+EXPORT_SYMBOL(cfs_timer_arm);
+
+void cfs_timer_disarm(timer_list_t *t)
+{
+ del_timer(t);
+}
+EXPORT_SYMBOL(cfs_timer_disarm);
+
+int cfs_timer_is_armed(timer_list_t *t)
+{
+ return timer_pending(t);
+}
+EXPORT_SYMBOL(cfs_timer_is_armed);
+
+cfs_time_t cfs_timer_deadline(timer_list_t *t)
+{
+ return t->expires;
+}
+EXPORT_SYMBOL(cfs_timer_deadline);
+
+void cfs_enter_debugger(void)
+{
+#if defined(CONFIG_KGDB)
+// BREAKPOINT();
+#else
+ /* nothing */
+#endif
+}
+
+
+sigset_t
+cfs_block_allsigs(void)
+{
+ unsigned long flags;
+ sigset_t old;
+
+ SIGNAL_MASK_LOCK(current, flags);
+ old = current->blocked;
+ sigfillset(&current->blocked);
+ recalc_sigpending();
+ SIGNAL_MASK_UNLOCK(current, flags);
+
+ return old;
+}
+
+sigset_t cfs_block_sigs(unsigned long sigs)
+{
+ unsigned long flags;
+ sigset_t old;
+
+ SIGNAL_MASK_LOCK(current, flags);
+ old = current->blocked;
+ sigaddsetmask(&current->blocked, sigs);
+ recalc_sigpending();
+ SIGNAL_MASK_UNLOCK(current, flags);
+ return old;
+}
+
+/* Block all signals except for the @sigs */
+sigset_t cfs_block_sigsinv(unsigned long sigs)
+{
+ unsigned long flags;
+ sigset_t old;
+
+ SIGNAL_MASK_LOCK(current, flags);
+ old = current->blocked;
+ sigaddsetmask(&current->blocked, ~sigs);
+ recalc_sigpending();
+ SIGNAL_MASK_UNLOCK(current, flags);
+
+ return old;
+}
+
+void
+cfs_restore_sigs (sigset_t old)
+{
+ unsigned long flags;
+
+ SIGNAL_MASK_LOCK(current, flags);
+ current->blocked = old;
+ recalc_sigpending();
+ SIGNAL_MASK_UNLOCK(current, flags);
+}
+
+int
+cfs_signal_pending(void)
+{
+ return signal_pending(current);
+}
+
+void
+cfs_clear_sigpending(void)
+{
+ unsigned long flags;
+
+ SIGNAL_MASK_LOCK(current, flags);
+ clear_tsk_thread_flag(current, TIF_SIGPENDING);
+ SIGNAL_MASK_UNLOCK(current, flags);
+}
+
+int
+libcfs_arch_init(void)
+{
+ return 0;
+}
+
+void
+libcfs_arch_cleanup(void)
+{
+ return;
+}
+
+EXPORT_SYMBOL(libcfs_arch_init);
+EXPORT_SYMBOL(libcfs_arch_cleanup);
+EXPORT_SYMBOL(cfs_enter_debugger);
+EXPORT_SYMBOL(cfs_block_allsigs);
+EXPORT_SYMBOL(cfs_block_sigs);
+EXPORT_SYMBOL(cfs_block_sigsinv);
+EXPORT_SYMBOL(cfs_restore_sigs);
+EXPORT_SYMBOL(cfs_signal_pending);
+EXPORT_SYMBOL(cfs_clear_sigpending);
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-proc.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-proc.c
new file mode 100644
index 000000000000..522b28e99e41
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-proc.c
@@ -0,0 +1,580 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/linux/linux-proc.c
+ *
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <net/sock.h>
+#include <linux/uio.h>
+
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+
+#include <linux/proc_fs.h>
+#include <linux/sysctl.h>
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+#include <asm/div64.h>
+#include "tracefile.h"
+
+#ifdef CONFIG_SYSCTL
+static ctl_table_header_t *lnet_table_header = NULL;
+#endif
+extern char lnet_upcall[1024];
+/**
+ * The path of debug log dump upcall script.
+ */
+extern char lnet_debug_log_upcall[1024];
+
+#define CTL_LNET (0x100)
+enum {
+ PSDEV_DEBUG = 1, /* control debugging */
+ PSDEV_SUBSYSTEM_DEBUG, /* control debugging */
+ PSDEV_PRINTK, /* force all messages to console */
+ PSDEV_CONSOLE_RATELIMIT, /* ratelimit console messages */
+ PSDEV_CONSOLE_MAX_DELAY_CS, /* maximum delay over which we skip messages */
+ PSDEV_CONSOLE_MIN_DELAY_CS, /* initial delay over which we skip messages */
+ PSDEV_CONSOLE_BACKOFF, /* delay increase factor */
+ PSDEV_DEBUG_PATH, /* crashdump log location */
+ PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */
+ PSDEV_CPT_TABLE, /* information about cpu partitions */
+ PSDEV_LNET_UPCALL, /* User mode upcall script */
+ PSDEV_LNET_MEMUSED, /* bytes currently PORTAL_ALLOCated */
+ PSDEV_LNET_CATASTROPHE, /* if we have LBUGged or panic'd */
+ PSDEV_LNET_PANIC_ON_LBUG, /* flag to panic on LBUG */
+ PSDEV_LNET_DUMP_KERNEL, /* snapshot kernel debug buffer to file */
+ PSDEV_LNET_DAEMON_FILE, /* spool kernel debug buffer to file */
+ PSDEV_LNET_DEBUG_MB, /* size of debug buffer */
+ PSDEV_LNET_DEBUG_LOG_UPCALL, /* debug log upcall script */
+ PSDEV_LNET_WATCHDOG_RATELIMIT, /* ratelimit watchdog messages */
+ PSDEV_LNET_FORCE_LBUG, /* hook to force an LBUG */
+ PSDEV_LNET_FAIL_LOC, /* control test failures instrumentation */
+ PSDEV_LNET_FAIL_VAL, /* userdata for fail loc */
+};
+
+int
+proc_call_handler(void *data, int write,
+ loff_t *ppos, void *buffer, size_t *lenp,
+ int (*handler)(void *data, int write,
+ loff_t pos, void *buffer, int len))
+{
+ int rc = handler(data, write, *ppos, buffer, *lenp);
+
+ if (rc < 0)
+ return rc;
+
+ if (write) {
+ *ppos += *lenp;
+ } else {
+ *lenp = rc;
+ *ppos += rc;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(proc_call_handler);
+
+static int __proc_dobitmasks(void *data, int write,
+ loff_t pos, void *buffer, int nob)
+{
+ const int tmpstrlen = 512;
+ char *tmpstr;
+ int rc;
+ unsigned int *mask = data;
+ int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0;
+ int is_printk = (mask == &libcfs_printk) ? 1 : 0;
+
+ rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen);
+ if (rc < 0)
+ return rc;
+
+ if (!write) {
+ libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys);
+ rc = strlen(tmpstr);
+
+ if (pos >= rc) {
+ rc = 0;
+ } else {
+ rc = cfs_trace_copyout_string(buffer, nob,
+ tmpstr + pos, "\n");
+ }
+ } else {
+ rc = cfs_trace_copyin_string(tmpstr, tmpstrlen, buffer, nob);
+ if (rc < 0) {
+ cfs_trace_free_string_buffer(tmpstr, tmpstrlen);
+ return rc;
+ }
+
+ rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys);
+ /* Always print LBUG/LASSERT to console, so keep this mask */
+ if (is_printk)
+ *mask |= D_EMERG;
+ }
+
+ cfs_trace_free_string_buffer(tmpstr, tmpstrlen);
+ return rc;
+}
+
+DECLARE_PROC_HANDLER(proc_dobitmasks)
+
+static int min_watchdog_ratelimit = 0; /* disable ratelimiting */
+static int max_watchdog_ratelimit = (24*60*60); /* limit to once per day */
+
+static int __proc_dump_kernel(void *data, int write,
+ loff_t pos, void *buffer, int nob)
+{
+ if (!write)
+ return 0;
+
+ return cfs_trace_dump_debug_buffer_usrstr(buffer, nob);
+}
+
+DECLARE_PROC_HANDLER(proc_dump_kernel)
+
+static int __proc_daemon_file(void *data, int write,
+ loff_t pos, void *buffer, int nob)
+{
+ if (!write) {
+ int len = strlen(cfs_tracefile);
+
+ if (pos >= len)
+ return 0;
+
+ return cfs_trace_copyout_string(buffer, nob,
+ cfs_tracefile + pos, "\n");
+ }
+
+ return cfs_trace_daemon_command_usrstr(buffer, nob);
+}
+
+DECLARE_PROC_HANDLER(proc_daemon_file)
+
+static int __proc_debug_mb(void *data, int write,
+ loff_t pos, void *buffer, int nob)
+{
+ if (!write) {
+ char tmpstr[32];
+ int len = snprintf(tmpstr, sizeof(tmpstr), "%d",
+ cfs_trace_get_debug_mb());
+
+ if (pos >= len)
+ return 0;
+
+ return cfs_trace_copyout_string(buffer, nob, tmpstr + pos,
+ "\n");
+ }
+
+ return cfs_trace_set_debug_mb_usrstr(buffer, nob);
+}
+
+DECLARE_PROC_HANDLER(proc_debug_mb)
+
+int LL_PROC_PROTO(proc_console_max_delay_cs)
+{
+ int rc, max_delay_cs;
+ ctl_table_t dummy = *table;
+ cfs_duration_t d;
+
+ dummy.data = &max_delay_cs;
+ dummy.proc_handler = &proc_dointvec;
+
+ if (!write) { /* read */
+ max_delay_cs = cfs_duration_sec(libcfs_console_max_delay * 100);
+ rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+ return rc;
+ }
+
+ /* write */
+ max_delay_cs = 0;
+ rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+ if (rc < 0)
+ return rc;
+ if (max_delay_cs <= 0)
+ return -EINVAL;
+
+ d = cfs_time_seconds(max_delay_cs) / 100;
+ if (d == 0 || d < libcfs_console_min_delay)
+ return -EINVAL;
+ libcfs_console_max_delay = d;
+
+ return rc;
+}
+
+int LL_PROC_PROTO(proc_console_min_delay_cs)
+{
+ int rc, min_delay_cs;
+ ctl_table_t dummy = *table;
+ cfs_duration_t d;
+
+ dummy.data = &min_delay_cs;
+ dummy.proc_handler = &proc_dointvec;
+
+ if (!write) { /* read */
+ min_delay_cs = cfs_duration_sec(libcfs_console_min_delay * 100);
+ rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+ return rc;
+ }
+
+ /* write */
+ min_delay_cs = 0;
+ rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+ if (rc < 0)
+ return rc;
+ if (min_delay_cs <= 0)
+ return -EINVAL;
+
+ d = cfs_time_seconds(min_delay_cs) / 100;
+ if (d == 0 || d > libcfs_console_max_delay)
+ return -EINVAL;
+ libcfs_console_min_delay = d;
+
+ return rc;
+}
+
+int LL_PROC_PROTO(proc_console_backoff)
+{
+ int rc, backoff;
+ ctl_table_t dummy = *table;
+
+ dummy.data = &backoff;
+ dummy.proc_handler = &proc_dointvec;
+
+ if (!write) { /* read */
+ backoff= libcfs_console_backoff;
+ rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+ return rc;
+ }
+
+ /* write */
+ backoff = 0;
+ rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+ if (rc < 0)
+ return rc;
+ if (backoff <= 0)
+ return -EINVAL;
+
+ libcfs_console_backoff = backoff;
+
+ return rc;
+}
+
+int LL_PROC_PROTO(libcfs_force_lbug)
+{
+ if (write)
+ LBUG();
+ return 0;
+}
+
+int LL_PROC_PROTO(proc_fail_loc)
+{
+ int rc;
+ long old_fail_loc = cfs_fail_loc;
+
+ rc = ll_proc_dolongvec(table, write, filp, buffer, lenp, ppos);
+ if (old_fail_loc != cfs_fail_loc)
+ wake_up(&cfs_race_waitq);
+ return rc;
+}
+
+static int __proc_cpt_table(void *data, int write,
+ loff_t pos, void *buffer, int nob)
+{
+ char *buf = NULL;
+ int len = 4096;
+ int rc = 0;
+
+ if (write)
+ return -EPERM;
+
+ LASSERT(cfs_cpt_table != NULL);
+
+ while (1) {
+ LIBCFS_ALLOC(buf, len);
+ if (buf == NULL)
+ return -ENOMEM;
+
+ rc = cfs_cpt_table_print(cfs_cpt_table, buf, len);
+ if (rc >= 0)
+ break;
+
+ LIBCFS_FREE(buf, len);
+ if (rc == -EFBIG) {
+ len <<= 1;
+ continue;
+ }
+ goto out;
+ }
+
+ if (pos >= rc) {
+ rc = 0;
+ goto out;
+ }
+
+ rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
+ out:
+ if (buf != NULL)
+ LIBCFS_FREE(buf, len);
+ return rc;
+}
+DECLARE_PROC_HANDLER(proc_cpt_table)
+
+static ctl_table_t lnet_table[] = {
+ /*
+ * NB No .strategy entries have been provided since sysctl(8) prefers
+ * to go via /proc for portability.
+ */
+ {
+ INIT_CTL_NAME(PSDEV_DEBUG)
+ .procname = "debug",
+ .data = &libcfs_debug,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dobitmasks,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_SUBSYSTEM_DEBUG)
+ .procname = "subsystem_debug",
+ .data = &libcfs_subsystem_debug,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dobitmasks,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_PRINTK)
+ .procname = "printk",
+ .data = &libcfs_printk,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dobitmasks,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_CONSOLE_RATELIMIT)
+ .procname = "console_ratelimit",
+ .data = &libcfs_console_ratelimit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ INIT_CTL_NAME(PSDEV_CONSOLE_MAX_DELAY_CS)
+ .procname = "console_max_delay_centisecs",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_console_max_delay_cs
+ },
+ {
+ INIT_CTL_NAME(PSDEV_CONSOLE_MIN_DELAY_CS)
+ .procname = "console_min_delay_centisecs",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_console_min_delay_cs
+ },
+ {
+ INIT_CTL_NAME(PSDEV_CONSOLE_BACKOFF)
+ .procname = "console_backoff",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_console_backoff
+ },
+
+ {
+ INIT_CTL_NAME(PSDEV_DEBUG_PATH)
+ .procname = "debug_path",
+ .data = libcfs_debug_file_path_arr,
+ .maxlen = sizeof(libcfs_debug_file_path_arr),
+ .mode = 0644,
+ .proc_handler = &proc_dostring,
+ },
+
+ {
+ INIT_CTL_NAME(PSDEV_CPT_TABLE)
+ .procname = "cpu_partition_table",
+ .maxlen = 128,
+ .mode = 0444,
+ .proc_handler = &proc_cpt_table,
+ },
+
+ {
+ INIT_CTL_NAME(PSDEV_LNET_UPCALL)
+ .procname = "upcall",
+ .data = lnet_upcall,
+ .maxlen = sizeof(lnet_upcall),
+ .mode = 0644,
+ .proc_handler = &proc_dostring,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_DEBUG_LOG_UPCALL)
+ .procname = "debug_log_upcall",
+ .data = lnet_debug_log_upcall,
+ .maxlen = sizeof(lnet_debug_log_upcall),
+ .mode = 0644,
+ .proc_handler = &proc_dostring,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_MEMUSED)
+ .procname = "lnet_memused",
+ .data = (int *)&libcfs_kmemory.counter,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ INIT_STRATEGY(&sysctl_intvec)
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_CATASTROPHE)
+ .procname = "catastrophe",
+ .data = &libcfs_catastrophe,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ INIT_STRATEGY(&sysctl_intvec)
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_PANIC_ON_LBUG)
+ .procname = "panic_on_lbug",
+ .data = &libcfs_panic_on_lbug,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ INIT_STRATEGY(&sysctl_intvec)
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_DUMP_KERNEL)
+ .procname = "dump_kernel",
+ .maxlen = 256,
+ .mode = 0200,
+ .proc_handler = &proc_dump_kernel,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_DAEMON_FILE)
+ .procname = "daemon_file",
+ .mode = 0644,
+ .maxlen = 256,
+ .proc_handler = &proc_daemon_file,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_DEBUG_MB)
+ .procname = "debug_mb",
+ .mode = 0644,
+ .proc_handler = &proc_debug_mb,
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_WATCHDOG_RATELIMIT)
+ .procname = "watchdog_ratelimit",
+ .data = &libcfs_watchdog_ratelimit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = &min_watchdog_ratelimit,
+ .extra2 = &max_watchdog_ratelimit,
+ },
+ { INIT_CTL_NAME(PSDEV_LNET_FORCE_LBUG)
+ .procname = "force_lbug",
+ .data = NULL,
+ .maxlen = 0,
+ .mode = 0200,
+ .proc_handler = &libcfs_force_lbug
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_FAIL_LOC)
+ .procname = "fail_loc",
+ .data = &cfs_fail_loc,
+ .maxlen = sizeof(cfs_fail_loc),
+ .mode = 0644,
+ .proc_handler = &proc_fail_loc
+ },
+ {
+ INIT_CTL_NAME(PSDEV_LNET_FAIL_VAL)
+ .procname = "fail_val",
+ .data = &cfs_fail_val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ INIT_CTL_NAME(0)
+ }
+};
+
+#ifdef CONFIG_SYSCTL
+static ctl_table_t top_table[] = {
+ {
+ INIT_CTL_NAME(CTL_LNET)
+ .procname = "lnet",
+ .mode = 0555,
+ .data = NULL,
+ .maxlen = 0,
+ .child = lnet_table,
+ },
+ {
+ INIT_CTL_NAME(0)
+ }
+};
+#endif
+
+int insert_proc(void)
+{
+#ifdef CONFIG_SYSCTL
+ if (lnet_table_header == NULL)
+ lnet_table_header = cfs_register_sysctl_table(top_table, 0);
+#endif
+ return 0;
+}
+
+void remove_proc(void)
+{
+#ifdef CONFIG_SYSCTL
+ if (lnet_table_header != NULL)
+ unregister_sysctl_table(lnet_table_header);
+
+ lnet_table_header = NULL;
+#endif
+}
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-tcpip.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-tcpip.c
new file mode 100644
index 000000000000..855c7e87d96f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-tcpip.c
@@ -0,0 +1,659 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/libcfs/libcfs.h>
+
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/file.h>
+/* For sys_open & sys_close */
+#include <linux/syscalls.h>
+
+int
+libcfs_sock_ioctl(int cmd, unsigned long arg)
+{
+ mm_segment_t oldmm = get_fs();
+ struct socket *sock;
+ int rc;
+ struct file *sock_filp;
+
+ rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
+ if (rc != 0) {
+ CERROR ("Can't create socket: %d\n", rc);
+ return rc;
+ }
+
+ sock_filp = sock_alloc_file(sock, 0, NULL);
+ if (IS_ERR(sock_filp)) {
+ sock_release(sock);
+ rc = PTR_ERR(sock_filp);
+ goto out;
+ }
+
+ set_fs(KERNEL_DS);
+ if (sock_filp->f_op->unlocked_ioctl)
+ rc = sock_filp->f_op->unlocked_ioctl(sock_filp, cmd, arg);
+ set_fs(oldmm);
+
+ fput(sock_filp);
+out:
+ return rc;
+}
+
+int
+libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask)
+{
+ struct ifreq ifr;
+ int nob;
+ int rc;
+ __u32 val;
+
+ nob = strnlen(name, IFNAMSIZ);
+ if (nob == IFNAMSIZ) {
+ CERROR("Interface name %s too long\n", name);
+ return -EINVAL;
+ }
+
+ CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ);
+
+ strcpy(ifr.ifr_name, name);
+ rc = libcfs_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
+
+ if (rc != 0) {
+ CERROR("Can't get flags for interface %s\n", name);
+ return rc;
+ }
+
+ if ((ifr.ifr_flags & IFF_UP) == 0) {
+ CDEBUG(D_NET, "Interface %s down\n", name);
+ *up = 0;
+ *ip = *mask = 0;
+ return 0;
+ }
+
+ *up = 1;
+
+ strcpy(ifr.ifr_name, name);
+ ifr.ifr_addr.sa_family = AF_INET;
+ rc = libcfs_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
+
+ if (rc != 0) {
+ CERROR("Can't get IP address for interface %s\n", name);
+ return rc;
+ }
+
+ val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
+ *ip = ntohl(val);
+
+ strcpy(ifr.ifr_name, name);
+ ifr.ifr_addr.sa_family = AF_INET;
+ rc = libcfs_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
+
+ if (rc != 0) {
+ CERROR("Can't get netmask for interface %s\n", name);
+ return rc;
+ }
+
+ val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
+ *mask = ntohl(val);
+
+ return 0;
+}
+
+EXPORT_SYMBOL(libcfs_ipif_query);
+
+int
+libcfs_ipif_enumerate (char ***namesp)
+{
+ /* Allocate and fill in 'names', returning # interfaces/error */
+ char **names;
+ int toobig;
+ int nalloc;
+ int nfound;
+ struct ifreq *ifr;
+ struct ifconf ifc;
+ int rc;
+ int nob;
+ int i;
+
+
+ nalloc = 16; /* first guess at max interfaces */
+ toobig = 0;
+ for (;;) {
+ if (nalloc * sizeof(*ifr) > PAGE_CACHE_SIZE) {
+ toobig = 1;
+ nalloc = PAGE_CACHE_SIZE/sizeof(*ifr);
+ CWARN("Too many interfaces: only enumerating first %d\n",
+ nalloc);
+ }
+
+ LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
+ if (ifr == NULL) {
+ CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc);
+ rc = -ENOMEM;
+ goto out0;
+ }
+
+ ifc.ifc_buf = (char *)ifr;
+ ifc.ifc_len = nalloc * sizeof(*ifr);
+
+ rc = libcfs_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
+
+ if (rc < 0) {
+ CERROR ("Error %d enumerating interfaces\n", rc);
+ goto out1;
+ }
+
+ LASSERT (rc == 0);
+
+ nfound = ifc.ifc_len/sizeof(*ifr);
+ LASSERT (nfound <= nalloc);
+
+ if (nfound < nalloc || toobig)
+ break;
+
+ LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+ nalloc *= 2;
+ }
+
+ if (nfound == 0)
+ goto out1;
+
+ LIBCFS_ALLOC(names, nfound * sizeof(*names));
+ if (names == NULL) {
+ rc = -ENOMEM;
+ goto out1;
+ }
+ /* NULL out all names[i] */
+ memset (names, 0, nfound * sizeof(*names));
+
+ for (i = 0; i < nfound; i++) {
+
+ nob = strnlen (ifr[i].ifr_name, IFNAMSIZ);
+ if (nob == IFNAMSIZ) {
+ /* no space for terminating NULL */
+ CERROR("interface name %.*s too long (%d max)\n",
+ nob, ifr[i].ifr_name, IFNAMSIZ);
+ rc = -ENAMETOOLONG;
+ goto out2;
+ }
+
+ LIBCFS_ALLOC(names[i], IFNAMSIZ);
+ if (names[i] == NULL) {
+ rc = -ENOMEM;
+ goto out2;
+ }
+
+ memcpy(names[i], ifr[i].ifr_name, nob);
+ names[i][nob] = 0;
+ }
+
+ *namesp = names;
+ rc = nfound;
+
+ out2:
+ if (rc < 0)
+ libcfs_ipif_free_enumeration(names, nfound);
+ out1:
+ LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+ out0:
+ return rc;
+}
+
+EXPORT_SYMBOL(libcfs_ipif_enumerate);
+
+void
+libcfs_ipif_free_enumeration (char **names, int n)
+{
+ int i;
+
+ LASSERT (n > 0);
+
+ for (i = 0; i < n && names[i] != NULL; i++)
+ LIBCFS_FREE(names[i], IFNAMSIZ);
+
+ LIBCFS_FREE(names, n * sizeof(*names));
+}
+
+EXPORT_SYMBOL(libcfs_ipif_free_enumeration);
+
+int
+libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout)
+{
+ int rc;
+ mm_segment_t oldmm = get_fs();
+ long ticks = timeout * HZ;
+ unsigned long then;
+ struct timeval tv;
+
+ LASSERT (nob > 0);
+ /* Caller may pass a zero timeout if she thinks the socket buffer is
+ * empty enough to take the whole message immediately */
+
+ for (;;) {
+ struct iovec iov = {
+ .iov_base = buffer,
+ .iov_len = nob
+ };
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0
+ };
+
+ if (timeout != 0) {
+ /* Set send timeout to remaining time */
+ tv = (struct timeval) {
+ .tv_sec = ticks / HZ,
+ .tv_usec = ((ticks % HZ) * 1000000) / HZ
+ };
+ set_fs(KERNEL_DS);
+ rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
+ (char *)&tv, sizeof(tv));
+ set_fs(oldmm);
+ if (rc != 0) {
+ CERROR("Can't set socket send timeout "
+ "%ld.%06d: %d\n",
+ (long)tv.tv_sec, (int)tv.tv_usec, rc);
+ return rc;
+ }
+ }
+
+ set_fs (KERNEL_DS);
+ then = jiffies;
+ rc = sock_sendmsg (sock, &msg, iov.iov_len);
+ ticks -= jiffies - then;
+ set_fs (oldmm);
+
+ if (rc == nob)
+ return 0;
+
+ if (rc < 0)
+ return rc;
+
+ if (rc == 0) {
+ CERROR ("Unexpected zero rc\n");
+ return (-ECONNABORTED);
+ }
+
+ if (ticks <= 0)
+ return -EAGAIN;
+
+ buffer = ((char *)buffer) + rc;
+ nob -= rc;
+ }
+
+ return (0);
+}
+EXPORT_SYMBOL(libcfs_sock_write);
+
+int
+libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout)
+{
+ int rc;
+ mm_segment_t oldmm = get_fs();
+ long ticks = timeout * HZ;
+ unsigned long then;
+ struct timeval tv;
+
+ LASSERT (nob > 0);
+ LASSERT (ticks > 0);
+
+ for (;;) {
+ struct iovec iov = {
+ .iov_base = buffer,
+ .iov_len = nob
+ };
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+
+ /* Set receive timeout to remaining time */
+ tv = (struct timeval) {
+ .tv_sec = ticks / HZ,
+ .tv_usec = ((ticks % HZ) * 1000000) / HZ
+ };
+ set_fs(KERNEL_DS);
+ rc = sock_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
+ (char *)&tv, sizeof(tv));
+ set_fs(oldmm);
+ if (rc != 0) {
+ CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
+ (long)tv.tv_sec, (int)tv.tv_usec, rc);
+ return rc;
+ }
+
+ set_fs(KERNEL_DS);
+ then = jiffies;
+ rc = sock_recvmsg(sock, &msg, iov.iov_len, 0);
+ ticks -= jiffies - then;
+ set_fs(oldmm);
+
+ if (rc < 0)
+ return rc;
+
+ if (rc == 0)
+ return -ECONNRESET;
+
+ buffer = ((char *)buffer) + rc;
+ nob -= rc;
+
+ if (nob == 0)
+ return 0;
+
+ if (ticks <= 0)
+ return -ETIMEDOUT;
+ }
+}
+
+EXPORT_SYMBOL(libcfs_sock_read);
+
+static int
+libcfs_sock_create (struct socket **sockp, int *fatal,
+ __u32 local_ip, int local_port)
+{
+ struct sockaddr_in locaddr;
+ struct socket *sock;
+ int rc;
+ int option;
+ mm_segment_t oldmm = get_fs();
+
+ /* All errors are fatal except bind failure if the port is in use */
+ *fatal = 1;
+
+ rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
+ *sockp = sock;
+ if (rc != 0) {
+ CERROR ("Can't create socket: %d\n", rc);
+ return (rc);
+ }
+
+ set_fs (KERNEL_DS);
+ option = 1;
+ rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
+ goto failed;
+ }
+
+ if (local_ip != 0 || local_port != 0) {
+ memset(&locaddr, 0, sizeof(locaddr));
+ locaddr.sin_family = AF_INET;
+ locaddr.sin_port = htons(local_port);
+ locaddr.sin_addr.s_addr = (local_ip == 0) ?
+ INADDR_ANY : htonl(local_ip);
+
+ rc = sock->ops->bind(sock, (struct sockaddr *)&locaddr,
+ sizeof(locaddr));
+ if (rc == -EADDRINUSE) {
+ CDEBUG(D_NET, "Port %d already in use\n", local_port);
+ *fatal = 0;
+ goto failed;
+ }
+ if (rc != 0) {
+ CERROR("Error trying to bind to port %d: %d\n",
+ local_port, rc);
+ goto failed;
+ }
+ }
+
+ return 0;
+
+ failed:
+ sock_release(sock);
+ return rc;
+}
+
+int
+libcfs_sock_setbuf (struct socket *sock, int txbufsize, int rxbufsize)
+{
+ mm_segment_t oldmm = get_fs();
+ int option;
+ int rc;
+
+ if (txbufsize != 0) {
+ option = txbufsize;
+ set_fs (KERNEL_DS);
+ rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set send buffer %d: %d\n",
+ option, rc);
+ return (rc);
+ }
+ }
+
+ if (rxbufsize != 0) {
+ option = rxbufsize;
+ set_fs (KERNEL_DS);
+ rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set receive buffer %d: %d\n",
+ option, rc);
+ return (rc);
+ }
+ }
+
+ return 0;
+}
+
+EXPORT_SYMBOL(libcfs_sock_setbuf);
+
+int
+libcfs_sock_getaddr (struct socket *sock, int remote, __u32 *ip, int *port)
+{
+ struct sockaddr_in sin;
+ int len = sizeof (sin);
+ int rc;
+
+ rc = sock->ops->getname (sock, (struct sockaddr *)&sin, &len,
+ remote ? 2 : 0);
+ if (rc != 0) {
+ CERROR ("Error %d getting sock %s IP/port\n",
+ rc, remote ? "peer" : "local");
+ return rc;
+ }
+
+ if (ip != NULL)
+ *ip = ntohl (sin.sin_addr.s_addr);
+
+ if (port != NULL)
+ *port = ntohs (sin.sin_port);
+
+ return 0;
+}
+
+EXPORT_SYMBOL(libcfs_sock_getaddr);
+
+int
+libcfs_sock_getbuf (struct socket *sock, int *txbufsize, int *rxbufsize)
+{
+
+ if (txbufsize != NULL) {
+ *txbufsize = sock->sk->sk_sndbuf;
+ }
+
+ if (rxbufsize != NULL) {
+ *rxbufsize = sock->sk->sk_rcvbuf;
+ }
+
+ return 0;
+}
+
+EXPORT_SYMBOL(libcfs_sock_getbuf);
+
+int
+libcfs_sock_listen (struct socket **sockp,
+ __u32 local_ip, int local_port, int backlog)
+{
+ int fatal;
+ int rc;
+
+ rc = libcfs_sock_create(sockp, &fatal, local_ip, local_port);
+ if (rc != 0) {
+ if (!fatal)
+ CERROR("Can't create socket: port %d already in use\n",
+ local_port);
+ return rc;
+ }
+
+ rc = (*sockp)->ops->listen(*sockp, backlog);
+ if (rc == 0)
+ return 0;
+
+ CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
+ sock_release(*sockp);
+ return rc;
+}
+
+EXPORT_SYMBOL(libcfs_sock_listen);
+
+int
+libcfs_sock_accept (struct socket **newsockp, struct socket *sock)
+{
+ wait_queue_t wait;
+ struct socket *newsock;
+ int rc;
+
+ init_waitqueue_entry(&wait, current);
+
+ /* XXX this should add a ref to sock->ops->owner, if
+ * TCP could be a module */
+ rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
+ if (rc) {
+ CERROR("Can't allocate socket\n");
+ return rc;
+ }
+
+ newsock->ops = sock->ops;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(cfs_sk_sleep(sock->sk), &wait);
+
+ rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+ if (rc == -EAGAIN) {
+ /* Nothing ready, so wait for activity */
+ schedule();
+ rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+ }
+
+ remove_wait_queue(cfs_sk_sleep(sock->sk), &wait);
+ set_current_state(TASK_RUNNING);
+
+ if (rc != 0)
+ goto failed;
+
+ *newsockp = newsock;
+ return 0;
+
+ failed:
+ sock_release(newsock);
+ return rc;
+}
+
+EXPORT_SYMBOL(libcfs_sock_accept);
+
+void
+libcfs_sock_abort_accept (struct socket *sock)
+{
+ wake_up_all(cfs_sk_sleep(sock->sk));
+}
+
+EXPORT_SYMBOL(libcfs_sock_abort_accept);
+
+int
+libcfs_sock_connect (struct socket **sockp, int *fatal,
+ __u32 local_ip, int local_port,
+ __u32 peer_ip, int peer_port)
+{
+ struct sockaddr_in srvaddr;
+ int rc;
+
+ rc = libcfs_sock_create(sockp, fatal, local_ip, local_port);
+ if (rc != 0)
+ return rc;
+
+ memset (&srvaddr, 0, sizeof (srvaddr));
+ srvaddr.sin_family = AF_INET;
+ srvaddr.sin_port = htons(peer_port);
+ srvaddr.sin_addr.s_addr = htonl(peer_ip);
+
+ rc = (*sockp)->ops->connect(*sockp,
+ (struct sockaddr *)&srvaddr, sizeof(srvaddr),
+ 0);
+ if (rc == 0)
+ return 0;
+
+ /* EADDRNOTAVAIL probably means we're already connected to the same
+ * peer/port on the same local port on a differently typed
+ * connection. Let our caller retry with a different local
+ * port... */
+ *fatal = !(rc == -EADDRNOTAVAIL);
+
+ CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
+ "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
+ HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
+
+ sock_release(*sockp);
+ return rc;
+}
+
+EXPORT_SYMBOL(libcfs_sock_connect);
+
+void
+libcfs_sock_release (struct socket *sock)
+{
+ sock_release(sock);
+}
+
+EXPORT_SYMBOL(libcfs_sock_release);
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c
new file mode 100644
index 000000000000..6f563436a255
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c
@@ -0,0 +1,275 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#define LUSTRE_TRACEFILE_PRIVATE
+
+#include <linux/libcfs/libcfs.h>
+#include "tracefile.h"
+
+/* percents to share the total debug memory for each type */
+static unsigned int pages_factor[CFS_TCD_TYPE_MAX] = {
+ 80, /* 80% pages for CFS_TCD_TYPE_PROC */
+ 10, /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */
+ 10 /* 10% pages for CFS_TCD_TYPE_IRQ */
+};
+
+char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
+
+struct rw_semaphore cfs_tracefile_sem;
+
+int cfs_tracefile_init_arch()
+{
+ int i;
+ int j;
+ struct cfs_trace_cpu_data *tcd;
+
+ init_rwsem(&cfs_tracefile_sem);
+
+ /* initialize trace_data */
+ memset(cfs_trace_data, 0, sizeof(cfs_trace_data));
+ for (i = 0; i < CFS_TCD_TYPE_MAX; i++) {
+ cfs_trace_data[i] =
+ kmalloc(sizeof(union cfs_trace_data_union) *
+ num_possible_cpus(), GFP_KERNEL);
+ if (cfs_trace_data[i] == NULL)
+ goto out;
+
+ }
+
+ /* arch related info initialized */
+ cfs_tcd_for_each(tcd, i, j) {
+ spin_lock_init(&tcd->tcd_lock);
+ tcd->tcd_pages_factor = pages_factor[i];
+ tcd->tcd_type = i;
+ tcd->tcd_cpu = j;
+ }
+
+ for (i = 0; i < num_possible_cpus(); i++)
+ for (j = 0; j < 3; j++) {
+ cfs_trace_console_buffers[i][j] =
+ kmalloc(CFS_TRACE_CONSOLE_BUFFER_SIZE,
+ GFP_KERNEL);
+
+ if (cfs_trace_console_buffers[i][j] == NULL)
+ goto out;
+ }
+
+ return 0;
+
+out:
+ cfs_tracefile_fini_arch();
+ printk(KERN_ERR "lnet: Not enough memory\n");
+ return -ENOMEM;
+}
+
+void cfs_tracefile_fini_arch()
+{
+ int i;
+ int j;
+
+ for (i = 0; i < num_possible_cpus(); i++)
+ for (j = 0; j < 3; j++)
+ if (cfs_trace_console_buffers[i][j] != NULL) {
+ kfree(cfs_trace_console_buffers[i][j]);
+ cfs_trace_console_buffers[i][j] = NULL;
+ }
+
+ for (i = 0; cfs_trace_data[i] != NULL; i++) {
+ kfree(cfs_trace_data[i]);
+ cfs_trace_data[i] = NULL;
+ }
+
+ fini_rwsem(&cfs_tracefile_sem);
+}
+
+void cfs_tracefile_read_lock()
+{
+ down_read(&cfs_tracefile_sem);
+}
+
+void cfs_tracefile_read_unlock()
+{
+ up_read(&cfs_tracefile_sem);
+}
+
+void cfs_tracefile_write_lock()
+{
+ down_write(&cfs_tracefile_sem);
+}
+
+void cfs_tracefile_write_unlock()
+{
+ up_write(&cfs_tracefile_sem);
+}
+
+cfs_trace_buf_type_t cfs_trace_buf_idx_get()
+{
+ if (in_irq())
+ return CFS_TCD_TYPE_IRQ;
+ else if (in_softirq())
+ return CFS_TCD_TYPE_SOFTIRQ;
+ else
+ return CFS_TCD_TYPE_PROC;
+}
+
+/*
+ * The walking argument indicates the locking comes from all tcd types
+ * iterator and we must lock it and dissable local irqs to avoid deadlocks
+ * with other interrupt locks that might be happening. See LU-1311
+ * for details.
+ */
+int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
+{
+ __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
+ if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
+ spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
+ else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
+ spin_lock_bh(&tcd->tcd_lock);
+ else if (unlikely(walking))
+ spin_lock_irq(&tcd->tcd_lock);
+ else
+ spin_lock(&tcd->tcd_lock);
+ return 1;
+}
+
+void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
+{
+ __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
+ if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
+ spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
+ else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
+ spin_unlock_bh(&tcd->tcd_lock);
+ else if (unlikely(walking))
+ spin_unlock_irq(&tcd->tcd_lock);
+ else
+ spin_unlock(&tcd->tcd_lock);
+}
+
+int cfs_tcd_owns_tage(struct cfs_trace_cpu_data *tcd,
+ struct cfs_trace_page *tage)
+{
+ /*
+ * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+ return tcd->tcd_cpu == tage->cpu;
+}
+
+void
+cfs_set_ptldebug_header(struct ptldebug_header *header,
+ struct libcfs_debug_msg_data *msgdata,
+ unsigned long stack)
+{
+ struct timeval tv;
+
+ do_gettimeofday(&tv);
+
+ header->ph_subsys = msgdata->msg_subsys;
+ header->ph_mask = msgdata->msg_mask;
+ header->ph_cpu_id = smp_processor_id();
+ header->ph_type = cfs_trace_buf_idx_get();
+ header->ph_sec = (__u32)tv.tv_sec;
+ header->ph_usec = tv.tv_usec;
+ header->ph_stack = stack;
+ header->ph_pid = current->pid;
+ header->ph_line_num = msgdata->msg_line;
+ header->ph_extern_pid = 0;
+ return;
+}
+
+static char *
+dbghdr_to_err_string(struct ptldebug_header *hdr)
+{
+ switch (hdr->ph_subsys) {
+
+ case S_LND:
+ case S_LNET:
+ return "LNetError";
+ default:
+ return "LustreError";
+ }
+}
+
+static char *
+dbghdr_to_info_string(struct ptldebug_header *hdr)
+{
+ switch (hdr->ph_subsys) {
+
+ case S_LND:
+ case S_LNET:
+ return "LNet";
+ default:
+ return "Lustre";
+ }
+}
+
+void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
+ const char *buf, int len, const char *file,
+ const char *fn)
+{
+ char *prefix = "Lustre", *ptype = NULL;
+
+ if ((mask & D_EMERG) != 0) {
+ prefix = dbghdr_to_err_string(hdr);
+ ptype = KERN_EMERG;
+ } else if ((mask & D_ERROR) != 0) {
+ prefix = dbghdr_to_err_string(hdr);
+ ptype = KERN_ERR;
+ } else if ((mask & D_WARNING) != 0) {
+ prefix = dbghdr_to_info_string(hdr);
+ ptype = KERN_WARNING;
+ } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) {
+ prefix = dbghdr_to_info_string(hdr);
+ ptype = KERN_INFO;
+ }
+
+ if ((mask & D_CONSOLE) != 0) {
+ printk("%s%s: %.*s", ptype, prefix, len, buf);
+ } else {
+ printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix,
+ hdr->ph_pid, hdr->ph_extern_pid, file, hdr->ph_line_num,
+ fn, len, buf);
+ }
+ return;
+}
+
+int cfs_trace_max_debug_mb(void)
+{
+ int total_mb = (num_physpages >> (20 - PAGE_SHIFT));
+
+ return MAX(512, (total_mb * 80)/100);
+}
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.h b/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.h
new file mode 100644
index 000000000000..ba84e4ffddd1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.h
@@ -0,0 +1,48 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LIBCFS_LINUX_TRACEFILE_H__
+#define __LIBCFS_LINUX_TRACEFILE_H__
+
+/**
+ * three types of trace_data in linux
+ */
+typedef enum {
+ CFS_TCD_TYPE_PROC = 0,
+ CFS_TCD_TYPE_SOFTIRQ,
+ CFS_TCD_TYPE_IRQ,
+ CFS_TCD_TYPE_MAX
+} cfs_trace_buf_type_t;
+
+#endif
diff --git a/drivers/staging/lustre/lustre/libcfs/lwt.c b/drivers/staging/lustre/lustre/libcfs/lwt.c
new file mode 100644
index 000000000000..b631f7dde8e7
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/lwt.c
@@ -0,0 +1,266 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/lwt.c
+ *
+ * Author: Eric Barton <eeb@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+
+#if LWT_SUPPORT
+
+#if !KLWT_SUPPORT
+int lwt_enabled;
+lwt_cpu_t lwt_cpus[NR_CPUS];
+#endif
+
+int lwt_pages_per_cpu;
+
+/* NB only root is allowed to retrieve LWT info; it's an open door into the
+ * kernel... */
+
+int
+lwt_lookup_string (int *size, char *knl_ptr,
+ char *user_ptr, int user_size)
+{
+ int maxsize = 128;
+
+ /* knl_ptr was retrieved from an LWT snapshot and the caller wants to
+ * turn it into a string. NB we can crash with an access violation
+ * trying to determine the string length, so we're trusting our
+ * caller... */
+
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ return (-EPERM);
+
+ if (user_size > 0 &&
+ maxsize > user_size)
+ maxsize = user_size;
+
+ *size = strnlen (knl_ptr, maxsize - 1) + 1;
+
+ if (user_ptr != NULL) {
+ if (user_size < 4)
+ return (-EINVAL);
+
+ if (copy_to_user (user_ptr, knl_ptr, *size))
+ return (-EFAULT);
+
+ /* Did I truncate the string? */
+ if (knl_ptr[*size - 1] != 0)
+ copy_to_user (user_ptr + *size - 4, "...", 4);
+ }
+
+ return (0);
+}
+
+int
+lwt_control (int enable, int clear)
+{
+ lwt_page_t *p;
+ int i;
+ int j;
+
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ return (-EPERM);
+
+ if (!enable) {
+ LWT_EVENT(0,0,0,0);
+ lwt_enabled = 0;
+ mb();
+ /* give people some time to stop adding traces */
+ schedule_timeout(10);
+ }
+
+ for (i = 0; i < num_online_cpus(); i++) {
+ p = lwt_cpus[i].lwtc_current_page;
+
+ if (p == NULL)
+ return (-ENODATA);
+
+ if (!clear)
+ continue;
+
+ for (j = 0; j < lwt_pages_per_cpu; j++) {
+ memset (p->lwtp_events, 0, PAGE_CACHE_SIZE);
+
+ p = list_entry (p->lwtp_list.next,
+ lwt_page_t, lwtp_list);
+ }
+ }
+
+ if (enable) {
+ lwt_enabled = 1;
+ mb();
+ LWT_EVENT(0,0,0,0);
+ }
+
+ return (0);
+}
+
+int
+lwt_snapshot (cfs_cycles_t *now, int *ncpu, int *total_size,
+ void *user_ptr, int user_size)
+{
+ const int events_per_page = PAGE_CACHE_SIZE / sizeof(lwt_event_t);
+ const int bytes_per_page = events_per_page * sizeof(lwt_event_t);
+ lwt_page_t *p;
+ int i;
+ int j;
+
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ return (-EPERM);
+
+ *ncpu = num_online_cpus();
+ *total_size = num_online_cpus() * lwt_pages_per_cpu *
+ bytes_per_page;
+ *now = get_cycles();
+
+ if (user_ptr == NULL)
+ return (0);
+
+ for (i = 0; i < num_online_cpus(); i++) {
+ p = lwt_cpus[i].lwtc_current_page;
+
+ if (p == NULL)
+ return (-ENODATA);
+
+ for (j = 0; j < lwt_pages_per_cpu; j++) {
+ if (copy_to_user(user_ptr, p->lwtp_events,
+ bytes_per_page))
+ return (-EFAULT);
+
+ user_ptr = ((char *)user_ptr) + bytes_per_page;
+ p = list_entry(p->lwtp_list.next,
+ lwt_page_t, lwtp_list);
+ }
+ }
+
+ return (0);
+}
+
+int
+lwt_init ()
+{
+ int i;
+ int j;
+
+ for (i = 0; i < num_online_cpus(); i++)
+ if (lwt_cpus[i].lwtc_current_page != NULL)
+ return (-EALREADY);
+
+ LASSERT (!lwt_enabled);
+
+ /* NULL pointers, zero scalars */
+ memset (lwt_cpus, 0, sizeof (lwt_cpus));
+ lwt_pages_per_cpu =
+ LWT_MEMORY / (num_online_cpus() * PAGE_CACHE_SIZE);
+
+ for (i = 0; i < num_online_cpus(); i++)
+ for (j = 0; j < lwt_pages_per_cpu; j++) {
+ struct page *page = alloc_page (GFP_KERNEL);
+ lwt_page_t *lwtp;
+
+ if (page == NULL) {
+ CERROR ("Can't allocate page\n");
+ lwt_fini ();
+ return (-ENOMEM);
+ }
+
+ LIBCFS_ALLOC(lwtp, sizeof (*lwtp));
+ if (lwtp == NULL) {
+ CERROR ("Can't allocate lwtp\n");
+ __free_page(page);
+ lwt_fini ();
+ return (-ENOMEM);
+ }
+
+ lwtp->lwtp_page = page;
+ lwtp->lwtp_events = page_address(page);
+ memset (lwtp->lwtp_events, 0, PAGE_CACHE_SIZE);
+
+ if (j == 0) {
+ INIT_LIST_HEAD (&lwtp->lwtp_list);
+ lwt_cpus[i].lwtc_current_page = lwtp;
+ } else {
+ list_add (&lwtp->lwtp_list,
+ &lwt_cpus[i].lwtc_current_page->lwtp_list);
+ }
+ }
+
+ lwt_enabled = 1;
+ mb();
+
+ LWT_EVENT(0,0,0,0);
+
+ return (0);
+}
+
+void
+lwt_fini ()
+{
+ int i;
+
+ lwt_control(0, 0);
+
+ for (i = 0; i < num_online_cpus(); i++)
+ while (lwt_cpus[i].lwtc_current_page != NULL) {
+ lwt_page_t *lwtp = lwt_cpus[i].lwtc_current_page;
+
+ if (list_empty (&lwtp->lwtp_list)) {
+ lwt_cpus[i].lwtc_current_page = NULL;
+ } else {
+ lwt_cpus[i].lwtc_current_page =
+ list_entry (lwtp->lwtp_list.next,
+ lwt_page_t, lwtp_list);
+
+ list_del (&lwtp->lwtp_list);
+ }
+
+ __free_page (lwtp->lwtp_page);
+ LIBCFS_FREE (lwtp, sizeof (*lwtp));
+ }
+}
+
+EXPORT_SYMBOL(lwt_enabled);
+EXPORT_SYMBOL(lwt_cpus);
+
+EXPORT_SYMBOL(lwt_init);
+EXPORT_SYMBOL(lwt_fini);
+EXPORT_SYMBOL(lwt_lookup_string);
+EXPORT_SYMBOL(lwt_control);
+EXPORT_SYMBOL(lwt_snapshot);
+#endif
diff --git a/drivers/staging/lustre/lustre/libcfs/module.c b/drivers/staging/lustre/lustre/libcfs/module.c
new file mode 100644
index 000000000000..3372537c6f3b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/module.c
@@ -0,0 +1,498 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/libcfs/libcfs_crypto.h>
+#include <linux/lnet/lib-lnet.h>
+#include <linux/lnet/lnet.h>
+#include "tracefile.h"
+
+void
+kportal_memhog_free (struct libcfs_device_userstate *ldu)
+{
+ struct page **level0p = &ldu->ldu_memhog_root_page;
+ struct page **level1p;
+ struct page **level2p;
+ int count1;
+ int count2;
+
+ if (*level0p != NULL) {
+
+ level1p = (struct page **)page_address(*level0p);
+ count1 = 0;
+
+ while (count1 < PAGE_CACHE_SIZE/sizeof(struct page *) &&
+ *level1p != NULL) {
+
+ level2p = (struct page **)page_address(*level1p);
+ count2 = 0;
+
+ while (count2 < PAGE_CACHE_SIZE/sizeof(struct page *) &&
+ *level2p != NULL) {
+
+ __free_page(*level2p);
+ ldu->ldu_memhog_pages--;
+ level2p++;
+ count2++;
+ }
+
+ __free_page(*level1p);
+ ldu->ldu_memhog_pages--;
+ level1p++;
+ count1++;
+ }
+
+ __free_page(*level0p);
+ ldu->ldu_memhog_pages--;
+
+ *level0p = NULL;
+ }
+
+ LASSERT (ldu->ldu_memhog_pages == 0);
+}
+
+int
+kportal_memhog_alloc (struct libcfs_device_userstate *ldu, int npages, int flags)
+{
+ struct page **level0p;
+ struct page **level1p;
+ struct page **level2p;
+ int count1;
+ int count2;
+
+ LASSERT (ldu->ldu_memhog_pages == 0);
+ LASSERT (ldu->ldu_memhog_root_page == NULL);
+
+ if (npages < 0)
+ return -EINVAL;
+
+ if (npages == 0)
+ return 0;
+
+ level0p = &ldu->ldu_memhog_root_page;
+ *level0p = alloc_page(flags);
+ if (*level0p == NULL)
+ return -ENOMEM;
+ ldu->ldu_memhog_pages++;
+
+ level1p = (struct page **)page_address(*level0p);
+ count1 = 0;
+ memset(level1p, 0, PAGE_CACHE_SIZE);
+
+ while (ldu->ldu_memhog_pages < npages &&
+ count1 < PAGE_CACHE_SIZE/sizeof(struct page *)) {
+
+ if (cfs_signal_pending())
+ return (-EINTR);
+
+ *level1p = alloc_page(flags);
+ if (*level1p == NULL)
+ return -ENOMEM;
+ ldu->ldu_memhog_pages++;
+
+ level2p = (struct page **)page_address(*level1p);
+ count2 = 0;
+ memset(level2p, 0, PAGE_CACHE_SIZE);
+
+ while (ldu->ldu_memhog_pages < npages &&
+ count2 < PAGE_CACHE_SIZE/sizeof(struct page *)) {
+
+ if (cfs_signal_pending())
+ return (-EINTR);
+
+ *level2p = alloc_page(flags);
+ if (*level2p == NULL)
+ return (-ENOMEM);
+ ldu->ldu_memhog_pages++;
+
+ level2p++;
+ count2++;
+ }
+
+ level1p++;
+ count1++;
+ }
+
+ return 0;
+}
+
+/* called when opening /dev/device */
+static int libcfs_psdev_open(unsigned long flags, void *args)
+{
+ struct libcfs_device_userstate *ldu;
+ ENTRY;
+
+ try_module_get(THIS_MODULE);
+
+ LIBCFS_ALLOC(ldu, sizeof(*ldu));
+ if (ldu != NULL) {
+ ldu->ldu_memhog_pages = 0;
+ ldu->ldu_memhog_root_page = NULL;
+ }
+ *(struct libcfs_device_userstate **)args = ldu;
+
+ RETURN(0);
+}
+
+/* called when closing /dev/device */
+static int libcfs_psdev_release(unsigned long flags, void *args)
+{
+ struct libcfs_device_userstate *ldu;
+ ENTRY;
+
+ ldu = (struct libcfs_device_userstate *)args;
+ if (ldu != NULL) {
+ kportal_memhog_free(ldu);
+ LIBCFS_FREE(ldu, sizeof(*ldu));
+ }
+
+ module_put(THIS_MODULE);
+ RETURN(0);
+}
+
+static struct rw_semaphore ioctl_list_sem;
+static struct list_head ioctl_list;
+
+int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
+{
+ int rc = 0;
+
+ down_write(&ioctl_list_sem);
+ if (!list_empty(&hand->item))
+ rc = -EBUSY;
+ else
+ list_add_tail(&hand->item, &ioctl_list);
+ up_write(&ioctl_list_sem);
+
+ return rc;
+}
+EXPORT_SYMBOL(libcfs_register_ioctl);
+
+int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
+{
+ int rc = 0;
+
+ down_write(&ioctl_list_sem);
+ if (list_empty(&hand->item))
+ rc = -ENOENT;
+ else
+ list_del_init(&hand->item);
+ up_write(&ioctl_list_sem);
+
+ return rc;
+}
+EXPORT_SYMBOL(libcfs_deregister_ioctl);
+
+static int libcfs_ioctl_int(struct cfs_psdev_file *pfile,unsigned long cmd,
+ void *arg, struct libcfs_ioctl_data *data)
+{
+ int err = -EINVAL;
+ ENTRY;
+
+ switch (cmd) {
+ case IOC_LIBCFS_CLEAR_DEBUG:
+ libcfs_debug_clear_buffer();
+ RETURN(0);
+ /*
+ * case IOC_LIBCFS_PANIC:
+ * Handled in arch/cfs_module.c
+ */
+ case IOC_LIBCFS_MARK_DEBUG:
+ if (data->ioc_inlbuf1 == NULL ||
+ data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0')
+ RETURN(-EINVAL);
+ libcfs_debug_mark_buffer(data->ioc_inlbuf1);
+ RETURN(0);
+#if LWT_SUPPORT
+ case IOC_LIBCFS_LWT_CONTROL:
+ err = lwt_control ((data->ioc_flags & 1) != 0,
+ (data->ioc_flags & 2) != 0);
+ break;
+
+ case IOC_LIBCFS_LWT_SNAPSHOT: {
+ cfs_cycles_t now;
+ int ncpu;
+ int total_size;
+
+ err = lwt_snapshot (&now, &ncpu, &total_size,
+ data->ioc_pbuf1, data->ioc_plen1);
+ data->ioc_u64[0] = now;
+ data->ioc_u32[0] = ncpu;
+ data->ioc_u32[1] = total_size;
+
+ /* Hedge against broken user/kernel typedefs (e.g. cycles_t) */
+ data->ioc_u32[2] = sizeof(lwt_event_t);
+ data->ioc_u32[3] = offsetof(lwt_event_t, lwte_where);
+
+ if (err == 0 &&
+ libcfs_ioctl_popdata(arg, data, sizeof (*data)))
+ err = -EFAULT;
+ break;
+ }
+
+ case IOC_LIBCFS_LWT_LOOKUP_STRING:
+ err = lwt_lookup_string (&data->ioc_count, data->ioc_pbuf1,
+ data->ioc_pbuf2, data->ioc_plen2);
+ if (err == 0 &&
+ libcfs_ioctl_popdata(arg, data, sizeof (*data)))
+ err = -EFAULT;
+ break;
+#endif
+ case IOC_LIBCFS_MEMHOG:
+ if (pfile->private_data == NULL) {
+ err = -EINVAL;
+ } else {
+ kportal_memhog_free(pfile->private_data);
+ /* XXX The ioc_flags is not GFP flags now, need to be fixed */
+ err = kportal_memhog_alloc(pfile->private_data,
+ data->ioc_count,
+ data->ioc_flags);
+ if (err != 0)
+ kportal_memhog_free(pfile->private_data);
+ }
+ break;
+
+ case IOC_LIBCFS_PING_TEST: {
+ extern void (kping_client)(struct libcfs_ioctl_data *);
+ void (*ping)(struct libcfs_ioctl_data *);
+
+ CDEBUG(D_IOCTL, "doing %d pings to nid %s (%s)\n",
+ data->ioc_count, libcfs_nid2str(data->ioc_nid),
+ libcfs_nid2str(data->ioc_nid));
+ ping = symbol_get(kping_client);
+ if (!ping)
+ CERROR("symbol_get failed\n");
+ else {
+ ping(data);
+ symbol_put(kping_client);
+ }
+ RETURN(0);
+ }
+
+ default: {
+ struct libcfs_ioctl_handler *hand;
+ err = -EINVAL;
+ down_read(&ioctl_list_sem);
+ list_for_each_entry(hand, &ioctl_list, item) {
+ err = hand->handle_ioctl(cmd, data);
+ if (err != -EINVAL) {
+ if (err == 0)
+ err = libcfs_ioctl_popdata(arg,
+ data, sizeof (*data));
+ break;
+ }
+ }
+ up_read(&ioctl_list_sem);
+ break;
+ }
+ }
+
+ RETURN(err);
+}
+
+static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *arg)
+{
+ char *buf;
+ struct libcfs_ioctl_data *data;
+ int err = 0;
+ ENTRY;
+
+ LIBCFS_ALLOC_GFP(buf, 1024, GFP_IOFS);
+ if (buf == NULL)
+ RETURN(-ENOMEM);
+
+ /* 'cmd' and permissions get checked in our arch-specific caller */
+ if (libcfs_ioctl_getdata(buf, buf + 800, (void *)arg)) {
+ CERROR("PORTALS ioctl: data error\n");
+ GOTO(out, err = -EINVAL);
+ }
+ data = (struct libcfs_ioctl_data *)buf;
+
+ err = libcfs_ioctl_int(pfile, cmd, arg, data);
+
+out:
+ LIBCFS_FREE(buf, 1024);
+ RETURN(err);
+}
+
+
+struct cfs_psdev_ops libcfs_psdev_ops = {
+ libcfs_psdev_open,
+ libcfs_psdev_release,
+ NULL,
+ NULL,
+ libcfs_ioctl
+};
+
+extern int insert_proc(void);
+extern void remove_proc(void);
+MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
+MODULE_DESCRIPTION("Portals v3.1");
+MODULE_LICENSE("GPL");
+
+extern psdev_t libcfs_dev;
+extern struct rw_semaphore cfs_tracefile_sem;
+extern struct mutex cfs_trace_thread_mutex;
+extern struct cfs_wi_sched *cfs_sched_rehash;
+
+extern void libcfs_init_nidstrings(void);
+extern int libcfs_arch_init(void);
+extern void libcfs_arch_cleanup(void);
+
+static int init_libcfs_module(void)
+{
+ int rc;
+
+ libcfs_arch_init();
+ libcfs_init_nidstrings();
+ init_rwsem(&cfs_tracefile_sem);
+ mutex_init(&cfs_trace_thread_mutex);
+ init_rwsem(&ioctl_list_sem);
+ INIT_LIST_HEAD(&ioctl_list);
+ init_waitqueue_head(&cfs_race_waitq);
+
+ rc = libcfs_debug_init(5 * 1024 * 1024);
+ if (rc < 0) {
+ printk(KERN_ERR "LustreError: libcfs_debug_init: %d\n", rc);
+ return (rc);
+ }
+
+ rc = cfs_cpu_init();
+ if (rc != 0)
+ goto cleanup_debug;
+
+#if LWT_SUPPORT
+ rc = lwt_init();
+ if (rc != 0) {
+ CERROR("lwt_init: error %d\n", rc);
+ goto cleanup_debug;
+ }
+#endif
+ rc = misc_register(&libcfs_dev);
+ if (rc) {
+ CERROR("misc_register: error %d\n", rc);
+ goto cleanup_lwt;
+ }
+
+ rc = cfs_wi_startup();
+ if (rc) {
+ CERROR("initialize workitem: error %d\n", rc);
+ goto cleanup_deregister;
+ }
+
+ /* max to 4 threads, should be enough for rehash */
+ rc = min(cfs_cpt_weight(cfs_cpt_table, CFS_CPT_ANY), 4);
+ rc = cfs_wi_sched_create("cfs_rh", cfs_cpt_table, CFS_CPT_ANY,
+ rc, &cfs_sched_rehash);
+ if (rc != 0) {
+ CERROR("Startup workitem scheduler: error: %d\n", rc);
+ goto cleanup_deregister;
+ }
+
+ rc = cfs_crypto_register();
+ if (rc) {
+ CERROR("cfs_crypto_regster: error %d\n", rc);
+ goto cleanup_wi;
+ }
+
+
+ rc = insert_proc();
+ if (rc) {
+ CERROR("insert_proc: error %d\n", rc);
+ goto cleanup_crypto;
+ }
+
+ CDEBUG (D_OTHER, "portals setup OK\n");
+ return 0;
+ cleanup_crypto:
+ cfs_crypto_unregister();
+ cleanup_wi:
+ cfs_wi_shutdown();
+ cleanup_deregister:
+ misc_deregister(&libcfs_dev);
+ cleanup_lwt:
+#if LWT_SUPPORT
+ lwt_fini();
+#endif
+ cleanup_debug:
+ libcfs_debug_cleanup();
+ return rc;
+}
+
+static void exit_libcfs_module(void)
+{
+ int rc;
+
+ remove_proc();
+
+ CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
+ atomic_read(&libcfs_kmemory));
+
+ if (cfs_sched_rehash != NULL) {
+ cfs_wi_sched_destroy(cfs_sched_rehash);
+ cfs_sched_rehash = NULL;
+ }
+
+ cfs_crypto_unregister();
+ cfs_wi_shutdown();
+
+ rc = misc_deregister(&libcfs_dev);
+ if (rc)
+ CERROR("misc_deregister error %d\n", rc);
+
+#if LWT_SUPPORT
+ lwt_fini();
+#endif
+ cfs_cpu_fini();
+
+ if (atomic_read(&libcfs_kmemory) != 0)
+ CERROR("Portals memory leaked: %d bytes\n",
+ atomic_read(&libcfs_kmemory));
+
+ rc = libcfs_debug_cleanup();
+ if (rc)
+ printk(KERN_ERR "LustreError: libcfs_debug_cleanup: %d\n",
+ rc);
+
+ fini_rwsem(&ioctl_list_sem);
+ fini_rwsem(&cfs_tracefile_sem);
+
+ libcfs_arch_cleanup();
+}
+
+cfs_module(libcfs, "1.0.0", init_libcfs_module, exit_libcfs_module);
diff --git a/drivers/staging/lustre/lustre/libcfs/nidstrings.c b/drivers/staging/lustre/lustre/libcfs/nidstrings.c
new file mode 100644
index 000000000000..ccfd1078a906
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/nidstrings.c
@@ -0,0 +1,867 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/nidstrings.c
+ *
+ * Author: Phil Schwan <phil@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/lnet.h>
+
+/* CAVEAT VENDITOR! Keep the canonical string representation of nets/nids
+ * consistent in all conversion functions. Some code fragments are copied
+ * around for the sake of clarity...
+ */
+
+/* CAVEAT EMPTOR! Racey temporary buffer allocation!
+ * Choose the number of nidstrings to support the MAXIMUM expected number of
+ * concurrent users. If there are more, the returned string will be volatile.
+ * NB this number must allow for a process to be descheduled for a timeslice
+ * between getting its string and using it.
+ */
+
+static char libcfs_nidstrings[LNET_NIDSTR_COUNT][LNET_NIDSTR_SIZE];
+static int libcfs_nidstring_idx = 0;
+
+static spinlock_t libcfs_nidstring_lock;
+
+void libcfs_init_nidstrings (void)
+{
+ spin_lock_init(&libcfs_nidstring_lock);
+}
+
+# define NIDSTR_LOCK(f) spin_lock_irqsave(&libcfs_nidstring_lock, f)
+# define NIDSTR_UNLOCK(f) spin_unlock_irqrestore(&libcfs_nidstring_lock, f)
+
+static char *
+libcfs_next_nidstring (void)
+{
+ char *str;
+ unsigned long flags;
+
+ NIDSTR_LOCK(flags);
+
+ str = libcfs_nidstrings[libcfs_nidstring_idx++];
+ if (libcfs_nidstring_idx ==
+ sizeof(libcfs_nidstrings)/sizeof(libcfs_nidstrings[0]))
+ libcfs_nidstring_idx = 0;
+
+ NIDSTR_UNLOCK(flags);
+ return str;
+}
+
+static int libcfs_lo_str2addr(const char *str, int nob, __u32 *addr);
+static void libcfs_ip_addr2str(__u32 addr, char *str);
+static int libcfs_ip_str2addr(const char *str, int nob, __u32 *addr);
+static void libcfs_decnum_addr2str(__u32 addr, char *str);
+static void libcfs_hexnum_addr2str(__u32 addr, char *str);
+static int libcfs_num_str2addr(const char *str, int nob, __u32 *addr);
+static int libcfs_num_parse(char *str, int len, struct list_head *list);
+static int libcfs_num_match(__u32 addr, struct list_head *list);
+
+struct netstrfns {
+ int nf_type;
+ char *nf_name;
+ char *nf_modname;
+ void (*nf_addr2str)(__u32 addr, char *str);
+ int (*nf_str2addr)(const char *str, int nob, __u32 *addr);
+ int (*nf_parse_addrlist)(char *str, int len,
+ struct list_head *list);
+ int (*nf_match_addr)(__u32 addr, struct list_head *list);
+};
+
+static struct netstrfns libcfs_netstrfns[] = {
+ {/* .nf_type */ LOLND,
+ /* .nf_name */ "lo",
+ /* .nf_modname */ "klolnd",
+ /* .nf_addr2str */ libcfs_decnum_addr2str,
+ /* .nf_str2addr */ libcfs_lo_str2addr,
+ /* .nf_parse_addr*/ libcfs_num_parse,
+ /* .nf_match_addr*/ libcfs_num_match},
+ {/* .nf_type */ SOCKLND,
+ /* .nf_name */ "tcp",
+ /* .nf_modname */ "ksocklnd",
+ /* .nf_addr2str */ libcfs_ip_addr2str,
+ /* .nf_str2addr */ libcfs_ip_str2addr,
+ /* .nf_parse_addrlist*/ cfs_ip_addr_parse,
+ /* .nf_match_addr*/ cfs_ip_addr_match},
+ {/* .nf_type */ O2IBLND,
+ /* .nf_name */ "o2ib",
+ /* .nf_modname */ "ko2iblnd",
+ /* .nf_addr2str */ libcfs_ip_addr2str,
+ /* .nf_str2addr */ libcfs_ip_str2addr,
+ /* .nf_parse_addrlist*/ cfs_ip_addr_parse,
+ /* .nf_match_addr*/ cfs_ip_addr_match},
+ {/* .nf_type */ CIBLND,
+ /* .nf_name */ "cib",
+ /* .nf_modname */ "kciblnd",
+ /* .nf_addr2str */ libcfs_ip_addr2str,
+ /* .nf_str2addr */ libcfs_ip_str2addr,
+ /* .nf_parse_addrlist*/ cfs_ip_addr_parse,
+ /* .nf_match_addr*/ cfs_ip_addr_match},
+ {/* .nf_type */ OPENIBLND,
+ /* .nf_name */ "openib",
+ /* .nf_modname */ "kopeniblnd",
+ /* .nf_addr2str */ libcfs_ip_addr2str,
+ /* .nf_str2addr */ libcfs_ip_str2addr,
+ /* .nf_parse_addrlist*/ cfs_ip_addr_parse,
+ /* .nf_match_addr*/ cfs_ip_addr_match},
+ {/* .nf_type */ IIBLND,
+ /* .nf_name */ "iib",
+ /* .nf_modname */ "kiiblnd",
+ /* .nf_addr2str */ libcfs_ip_addr2str,
+ /* .nf_str2addr */ libcfs_ip_str2addr,
+ /* .nf_parse_addrlist*/ cfs_ip_addr_parse,
+ /* .nf_match_addr*/ cfs_ip_addr_match},
+ {/* .nf_type */ VIBLND,
+ /* .nf_name */ "vib",
+ /* .nf_modname */ "kviblnd",
+ /* .nf_addr2str */ libcfs_ip_addr2str,
+ /* .nf_str2addr */ libcfs_ip_str2addr,
+ /* .nf_parse_addrlist*/ cfs_ip_addr_parse,
+ /* .nf_match_addr*/ cfs_ip_addr_match},
+ {/* .nf_type */ RALND,
+ /* .nf_name */ "ra",
+ /* .nf_modname */ "kralnd",
+ /* .nf_addr2str */ libcfs_ip_addr2str,
+ /* .nf_str2addr */ libcfs_ip_str2addr,
+ /* .nf_parse_addrlist*/ cfs_ip_addr_parse,
+ /* .nf_match_addr*/ cfs_ip_addr_match},
+ {/* .nf_type */ QSWLND,
+ /* .nf_name */ "elan",
+ /* .nf_modname */ "kqswlnd",
+ /* .nf_addr2str */ libcfs_decnum_addr2str,
+ /* .nf_str2addr */ libcfs_num_str2addr,
+ /* .nf_parse_addrlist*/ libcfs_num_parse,
+ /* .nf_match_addr*/ libcfs_num_match},
+ {/* .nf_type */ GMLND,
+ /* .nf_name */ "gm",
+ /* .nf_modname */ "kgmlnd",
+ /* .nf_addr2str */ libcfs_hexnum_addr2str,
+ /* .nf_str2addr */ libcfs_num_str2addr,
+ /* .nf_parse_addrlist*/ libcfs_num_parse,
+ /* .nf_match_addr*/ libcfs_num_match},
+ {/* .nf_type */ MXLND,
+ /* .nf_name */ "mx",
+ /* .nf_modname */ "kmxlnd",
+ /* .nf_addr2str */ libcfs_ip_addr2str,
+ /* .nf_str2addr */ libcfs_ip_str2addr,
+ /* .nf_parse_addrlist*/ cfs_ip_addr_parse,
+ /* .nf_match_addr*/ cfs_ip_addr_match},
+ {/* .nf_type */ PTLLND,
+ /* .nf_name */ "ptl",
+ /* .nf_modname */ "kptllnd",
+ /* .nf_addr2str */ libcfs_decnum_addr2str,
+ /* .nf_str2addr */ libcfs_num_str2addr,
+ /* .nf_parse_addrlist*/ libcfs_num_parse,
+ /* .nf_match_addr*/ libcfs_num_match},
+ {/* .nf_type */ GNILND,
+ /* .nf_name */ "gni",
+ /* .nf_modname */ "kgnilnd",
+ /* .nf_addr2str */ libcfs_decnum_addr2str,
+ /* .nf_str2addr */ libcfs_num_str2addr,
+ /* .nf_parse_addrlist*/ libcfs_num_parse,
+ /* .nf_match_addr*/ libcfs_num_match},
+ /* placeholder for net0 alias. It MUST BE THE LAST ENTRY */
+ {/* .nf_type */ -1},
+};
+
+const int libcfs_nnetstrfns = sizeof(libcfs_netstrfns)/sizeof(libcfs_netstrfns[0]);
+
+int
+libcfs_lo_str2addr(const char *str, int nob, __u32 *addr)
+{
+ *addr = 0;
+ return 1;
+}
+
+void
+libcfs_ip_addr2str(__u32 addr, char *str)
+{
+#if 0 /* never lookup */
+#endif
+ snprintf(str, LNET_NIDSTR_SIZE, "%u.%u.%u.%u",
+ (addr >> 24) & 0xff, (addr >> 16) & 0xff,
+ (addr >> 8) & 0xff, addr & 0xff);
+}
+
+/* CAVEAT EMPTOR XscanfX
+ * I use "%n" at the end of a sscanf format to detect trailing junk. However
+ * sscanf may return immediately if it sees the terminating '0' in a string, so
+ * I initialise the %n variable to the expected length. If sscanf sets it;
+ * fine, if it doesn't, then the scan ended at the end of the string, which is
+ * fine too :) */
+
+int
+libcfs_ip_str2addr(const char *str, int nob, __u32 *addr)
+{
+ int a;
+ int b;
+ int c;
+ int d;
+ int n = nob; /* XscanfX */
+
+ /* numeric IP? */
+ if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 &&
+ n == nob &&
+ (a & ~0xff) == 0 && (b & ~0xff) == 0 &&
+ (c & ~0xff) == 0 && (d & ~0xff) == 0) {
+ *addr = ((a<<24)|(b<<16)|(c<<8)|d);
+ return 1;
+ }
+
+ return 0;
+}
+
+void
+libcfs_decnum_addr2str(__u32 addr, char *str)
+{
+ snprintf(str, LNET_NIDSTR_SIZE, "%u", addr);
+}
+
+void
+libcfs_hexnum_addr2str(__u32 addr, char *str)
+{
+ snprintf(str, LNET_NIDSTR_SIZE, "0x%x", addr);
+}
+
+int
+libcfs_num_str2addr(const char *str, int nob, __u32 *addr)
+{
+ int n;
+
+ n = nob;
+ if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob)
+ return 1;
+
+ n = nob;
+ if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob)
+ return 1;
+
+ n = nob;
+ if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob)
+ return 1;
+
+ return 0;
+}
+
+struct netstrfns *
+libcfs_lnd2netstrfns(int lnd)
+{
+ int i;
+
+ if (lnd >= 0)
+ for (i = 0; i < libcfs_nnetstrfns; i++)
+ if (lnd == libcfs_netstrfns[i].nf_type)
+ return &libcfs_netstrfns[i];
+
+ return NULL;
+}
+
+struct netstrfns *
+libcfs_namenum2netstrfns(const char *name)
+{
+ struct netstrfns *nf;
+ int i;
+
+ for (i = 0; i < libcfs_nnetstrfns; i++) {
+ nf = &libcfs_netstrfns[i];
+ if (nf->nf_type >= 0 &&
+ !strncmp(name, nf->nf_name, strlen(nf->nf_name)))
+ return nf;
+ }
+ return NULL;
+}
+
+struct netstrfns *
+libcfs_name2netstrfns(const char *name)
+{
+ int i;
+
+ for (i = 0; i < libcfs_nnetstrfns; i++)
+ if (libcfs_netstrfns[i].nf_type >= 0 &&
+ !strcmp(libcfs_netstrfns[i].nf_name, name))
+ return &libcfs_netstrfns[i];
+
+ return NULL;
+}
+
+int
+libcfs_isknown_lnd(int type)
+{
+ return libcfs_lnd2netstrfns(type) != NULL;
+}
+
+char *
+libcfs_lnd2modname(int lnd)
+{
+ struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
+
+ return (nf == NULL) ? NULL : nf->nf_modname;
+}
+
+char *
+libcfs_lnd2str(int lnd)
+{
+ char *str;
+ struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
+
+ if (nf != NULL)
+ return nf->nf_name;
+
+ str = libcfs_next_nidstring();
+ snprintf(str, LNET_NIDSTR_SIZE, "?%u?", lnd);
+ return str;
+}
+
+int
+libcfs_str2lnd(const char *str)
+{
+ struct netstrfns *nf = libcfs_name2netstrfns(str);
+
+ if (nf != NULL)
+ return nf->nf_type;
+
+ return -1;
+}
+
+char *
+libcfs_net2str(__u32 net)
+{
+ int lnd = LNET_NETTYP(net);
+ int num = LNET_NETNUM(net);
+ struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
+ char *str = libcfs_next_nidstring();
+
+ if (nf == NULL)
+ snprintf(str, LNET_NIDSTR_SIZE, "<%u:%u>", lnd, num);
+ else if (num == 0)
+ snprintf(str, LNET_NIDSTR_SIZE, "%s", nf->nf_name);
+ else
+ snprintf(str, LNET_NIDSTR_SIZE, "%s%u", nf->nf_name, num);
+
+ return str;
+}
+
+char *
+libcfs_nid2str(lnet_nid_t nid)
+{
+ __u32 addr = LNET_NIDADDR(nid);
+ __u32 net = LNET_NIDNET(nid);
+ int lnd = LNET_NETTYP(net);
+ int nnum = LNET_NETNUM(net);
+ struct netstrfns *nf;
+ char *str;
+ int nob;
+
+ if (nid == LNET_NID_ANY)
+ return "<?>";
+
+ nf = libcfs_lnd2netstrfns(lnd);
+ str = libcfs_next_nidstring();
+
+ if (nf == NULL)
+ snprintf(str, LNET_NIDSTR_SIZE, "%x@<%u:%u>", addr, lnd, nnum);
+ else {
+ nf->nf_addr2str(addr, str);
+ nob = strlen(str);
+ if (nnum == 0)
+ snprintf(str + nob, LNET_NIDSTR_SIZE - nob, "@%s",
+ nf->nf_name);
+ else
+ snprintf(str + nob, LNET_NIDSTR_SIZE - nob, "@%s%u",
+ nf->nf_name, nnum);
+ }
+
+ return str;
+}
+
+static struct netstrfns *
+libcfs_str2net_internal(const char *str, __u32 *net)
+{
+ struct netstrfns *uninitialized_var(nf);
+ int nob;
+ int netnum;
+ int i;
+
+ for (i = 0; i < libcfs_nnetstrfns; i++) {
+ nf = &libcfs_netstrfns[i];
+ if (nf->nf_type >= 0 &&
+ !strncmp(str, nf->nf_name, strlen(nf->nf_name)))
+ break;
+ }
+
+ if (i == libcfs_nnetstrfns)
+ return NULL;
+
+ nob = strlen(nf->nf_name);
+
+ if (strlen(str) == (unsigned int)nob) {
+ netnum = 0;
+ } else {
+ if (nf->nf_type == LOLND) /* net number not allowed */
+ return NULL;
+
+ str += nob;
+ i = strlen(str);
+ if (sscanf(str, "%u%n", &netnum, &i) < 1 ||
+ i != (int)strlen(str))
+ return NULL;
+ }
+
+ *net = LNET_MKNET(nf->nf_type, netnum);
+ return nf;
+}
+
+__u32
+libcfs_str2net(const char *str)
+{
+ __u32 net;
+
+ if (libcfs_str2net_internal(str, &net) != NULL)
+ return net;
+
+ return LNET_NIDNET(LNET_NID_ANY);
+}
+
+lnet_nid_t
+libcfs_str2nid(const char *str)
+{
+ const char *sep = strchr(str, '@');
+ struct netstrfns *nf;
+ __u32 net;
+ __u32 addr;
+
+ if (sep != NULL) {
+ nf = libcfs_str2net_internal(sep + 1, &net);
+ if (nf == NULL)
+ return LNET_NID_ANY;
+ } else {
+ sep = str + strlen(str);
+ net = LNET_MKNET(SOCKLND, 0);
+ nf = libcfs_lnd2netstrfns(SOCKLND);
+ LASSERT (nf != NULL);
+ }
+
+ if (!nf->nf_str2addr(str, (int)(sep - str), &addr))
+ return LNET_NID_ANY;
+
+ return LNET_MKNID(net, addr);
+}
+
+char *
+libcfs_id2str(lnet_process_id_t id)
+{
+ char *str = libcfs_next_nidstring();
+
+ if (id.pid == LNET_PID_ANY) {
+ snprintf(str, LNET_NIDSTR_SIZE,
+ "LNET_PID_ANY-%s", libcfs_nid2str(id.nid));
+ return str;
+ }
+
+ snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s",
+ ((id.pid & LNET_PID_USERFLAG) != 0) ? "U" : "",
+ (id.pid & ~LNET_PID_USERFLAG), libcfs_nid2str(id.nid));
+ return str;
+}
+
+int
+libcfs_str2anynid(lnet_nid_t *nidp, const char *str)
+{
+ if (!strcmp(str, "*")) {
+ *nidp = LNET_NID_ANY;
+ return 1;
+ }
+
+ *nidp = libcfs_str2nid(str);
+ return *nidp != LNET_NID_ANY;
+}
+
+/**
+ * Nid range list syntax.
+ * \verbatim
+ *
+ * <nidlist> :== <nidrange> [ ' ' <nidrange> ]
+ * <nidrange> :== <addrrange> '@' <net>
+ * <addrrange> :== '*' |
+ * <ipaddr_range> |
+ * <cfs_expr_list>
+ * <ipaddr_range> :== <cfs_expr_list>.<cfs_expr_list>.<cfs_expr_list>.
+ * <cfs_expr_list>
+ * <cfs_expr_list> :== <number> |
+ * <expr_list>
+ * <expr_list> :== '[' <range_expr> [ ',' <range_expr>] ']'
+ * <range_expr> :== <number> |
+ * <number> '-' <number> |
+ * <number> '-' <number> '/' <number>
+ * <net> :== <netname> | <netname><number>
+ * <netname> :== "lo" | "tcp" | "o2ib" | "cib" | "openib" | "iib" |
+ * "vib" | "ra" | "elan" | "mx" | "ptl"
+ * \endverbatim
+ */
+
+/**
+ * Structure to represent \<nidrange\> token of the syntax.
+ *
+ * One of this is created for each \<net\> parsed.
+ */
+struct nidrange {
+ /**
+ * Link to list of this structures which is built on nid range
+ * list parsing.
+ */
+ struct list_head nr_link;
+ /**
+ * List head for addrrange::ar_link.
+ */
+ struct list_head nr_addrranges;
+ /**
+ * Flag indicating that *@<net> is found.
+ */
+ int nr_all;
+ /**
+ * Pointer to corresponding element of libcfs_netstrfns.
+ */
+ struct netstrfns *nr_netstrfns;
+ /**
+ * Number of network. E.g. 5 if \<net\> is "elan5".
+ */
+ int nr_netnum;
+};
+
+/**
+ * Structure to represent \<addrrange\> token of the syntax.
+ */
+struct addrrange {
+ /**
+ * Link to nidrange::nr_addrranges.
+ */
+ struct list_head ar_link;
+ /**
+ * List head for cfs_expr_list::el_list.
+ */
+ struct list_head ar_numaddr_ranges;
+};
+
+/**
+ * Nf_parse_addrlist method for networks using numeric addresses.
+ *
+ * Examples of such networks are gm and elan.
+ *
+ * \retval 0 if \a str parsed to numeric address
+ * \retval errno otherwise
+ */
+static int
+libcfs_num_parse(char *str, int len, struct list_head *list)
+{
+ struct cfs_expr_list *el;
+ int rc;
+
+ rc = cfs_expr_list_parse(str, len, 0, MAX_NUMERIC_VALUE, &el);
+ if (rc == 0)
+ list_add_tail(&el->el_link, list);
+
+ return rc;
+}
+
+/**
+ * Parses \<addrrange\> token on the syntax.
+ *
+ * Allocates struct addrrange and links to \a nidrange via
+ * (nidrange::nr_addrranges)
+ *
+ * \retval 1 if \a src parses to '*' | \<ipaddr_range\> | \<cfs_expr_list\>
+ * \retval 0 otherwise
+ */
+static int
+parse_addrange(const struct cfs_lstr *src, struct nidrange *nidrange)
+{
+ struct addrrange *addrrange;
+
+ if (src->ls_len == 1 && src->ls_str[0] == '*') {
+ nidrange->nr_all = 1;
+ return 1;
+ }
+
+ LIBCFS_ALLOC(addrrange, sizeof(struct addrrange));
+ if (addrrange == NULL)
+ return 0;
+ list_add_tail(&addrrange->ar_link, &nidrange->nr_addrranges);
+ INIT_LIST_HEAD(&addrrange->ar_numaddr_ranges);
+
+ return nidrange->nr_netstrfns->nf_parse_addrlist(src->ls_str,
+ src->ls_len,
+ &addrrange->ar_numaddr_ranges);
+}
+
+/**
+ * Finds or creates struct nidrange.
+ *
+ * Checks if \a src is a valid network name, looks for corresponding
+ * nidrange on the ist of nidranges (\a nidlist), creates new struct
+ * nidrange if it is not found.
+ *
+ * \retval pointer to struct nidrange matching network specified via \a src
+ * \retval NULL if \a src does not match any network
+ */
+static struct nidrange *
+add_nidrange(const struct cfs_lstr *src,
+ struct list_head *nidlist)
+{
+ struct netstrfns *nf;
+ struct nidrange *nr;
+ int endlen;
+ unsigned netnum;
+
+ if (src->ls_len >= LNET_NIDSTR_SIZE)
+ return NULL;
+
+ nf = libcfs_namenum2netstrfns(src->ls_str);
+ if (nf == NULL)
+ return NULL;
+ endlen = src->ls_len - strlen(nf->nf_name);
+ if (endlen == 0)
+ /* network name only, e.g. "elan" or "tcp" */
+ netnum = 0;
+ else {
+ /* e.g. "elan25" or "tcp23", refuse to parse if
+ * network name is not appended with decimal or
+ * hexadecimal number */
+ if (!cfs_str2num_check(src->ls_str + strlen(nf->nf_name),
+ endlen, &netnum, 0, MAX_NUMERIC_VALUE))
+ return NULL;
+ }
+
+ list_for_each_entry(nr, nidlist, nr_link) {
+ if (nr->nr_netstrfns != nf)
+ continue;
+ if (nr->nr_netnum != netnum)
+ continue;
+ return nr;
+ }
+
+ LIBCFS_ALLOC(nr, sizeof(struct nidrange));
+ if (nr == NULL)
+ return NULL;
+ list_add_tail(&nr->nr_link, nidlist);
+ INIT_LIST_HEAD(&nr->nr_addrranges);
+ nr->nr_netstrfns = nf;
+ nr->nr_all = 0;
+ nr->nr_netnum = netnum;
+
+ return nr;
+}
+
+/**
+ * Parses \<nidrange\> token of the syntax.
+ *
+ * \retval 1 if \a src parses to \<addrrange\> '@' \<net\>
+ * \retval 0 otherwise
+ */
+static int
+parse_nidrange(struct cfs_lstr *src, struct list_head *nidlist)
+{
+ struct cfs_lstr addrrange;
+ struct cfs_lstr net;
+ struct cfs_lstr tmp;
+ struct nidrange *nr;
+
+ tmp = *src;
+ if (cfs_gettok(src, '@', &addrrange) == 0)
+ goto failed;
+
+ if (cfs_gettok(src, '@', &net) == 0 || src->ls_str != NULL)
+ goto failed;
+
+ nr = add_nidrange(&net, nidlist);
+ if (nr == NULL)
+ goto failed;
+
+ if (parse_addrange(&addrrange, nr) != 0)
+ goto failed;
+
+ return 1;
+ failed:
+ CWARN("can't parse nidrange: \"%.*s\"\n", tmp.ls_len, tmp.ls_str);
+ return 0;
+}
+
+/**
+ * Frees addrrange structures of \a list.
+ *
+ * For each struct addrrange structure found on \a list it frees
+ * cfs_expr_list list attached to it and frees the addrrange itself.
+ *
+ * \retval none
+ */
+static void
+free_addrranges(struct list_head *list)
+{
+ while (!list_empty(list)) {
+ struct addrrange *ar;
+
+ ar = list_entry(list->next, struct addrrange, ar_link);
+
+ cfs_expr_list_free_list(&ar->ar_numaddr_ranges);
+ list_del(&ar->ar_link);
+ LIBCFS_FREE(ar, sizeof(struct addrrange));
+ }
+}
+
+/**
+ * Frees nidrange strutures of \a list.
+ *
+ * For each struct nidrange structure found on \a list it frees
+ * addrrange list attached to it and frees the nidrange itself.
+ *
+ * \retval none
+ */
+void
+cfs_free_nidlist(struct list_head *list)
+{
+ struct list_head *pos, *next;
+ struct nidrange *nr;
+
+ list_for_each_safe(pos, next, list) {
+ nr = list_entry(pos, struct nidrange, nr_link);
+ free_addrranges(&nr->nr_addrranges);
+ list_del(pos);
+ LIBCFS_FREE(nr, sizeof(struct nidrange));
+ }
+}
+
+/**
+ * Parses nid range list.
+ *
+ * Parses with rigorous syntax and overflow checking \a str into
+ * \<nidrange\> [ ' ' \<nidrange\> ], compiles \a str into set of
+ * structures and links that structure to \a nidlist. The resulting
+ * list can be used to match a NID againts set of NIDS defined by \a
+ * str.
+ * \see cfs_match_nid
+ *
+ * \retval 1 on success
+ * \retval 0 otherwise
+ */
+int
+cfs_parse_nidlist(char *str, int len, struct list_head *nidlist)
+{
+ struct cfs_lstr src;
+ struct cfs_lstr res;
+ int rc;
+ ENTRY;
+
+ src.ls_str = str;
+ src.ls_len = len;
+ INIT_LIST_HEAD(nidlist);
+ while (src.ls_str) {
+ rc = cfs_gettok(&src, ' ', &res);
+ if (rc == 0) {
+ cfs_free_nidlist(nidlist);
+ RETURN(0);
+ }
+ rc = parse_nidrange(&res, nidlist);
+ if (rc == 0) {
+ cfs_free_nidlist(nidlist);
+ RETURN(0);
+ }
+ }
+ RETURN(1);
+}
+
+/*
+ * Nf_match_addr method for networks using numeric addresses
+ *
+ * \retval 1 on match
+ * \retval 0 otherwise
+ */
+static int
+libcfs_num_match(__u32 addr, struct list_head *numaddr)
+{
+ struct cfs_expr_list *el;
+
+ LASSERT(!list_empty(numaddr));
+ el = list_entry(numaddr->next, struct cfs_expr_list, el_link);
+
+ return cfs_expr_list_match(addr, el);
+}
+
+/**
+ * Matches a nid (\a nid) against the compiled list of nidranges (\a nidlist).
+ *
+ * \see cfs_parse_nidlist()
+ *
+ * \retval 1 on match
+ * \retval 0 otherwises
+ */
+int cfs_match_nid(lnet_nid_t nid, struct list_head *nidlist)
+{
+ struct nidrange *nr;
+ struct addrrange *ar;
+ ENTRY;
+
+ list_for_each_entry(nr, nidlist, nr_link) {
+ if (nr->nr_netstrfns->nf_type != LNET_NETTYP(LNET_NIDNET(nid)))
+ continue;
+ if (nr->nr_netnum != LNET_NETNUM(LNET_NIDNET(nid)))
+ continue;
+ if (nr->nr_all)
+ RETURN(1);
+ list_for_each_entry(ar, &nr->nr_addrranges, ar_link)
+ if (nr->nr_netstrfns->nf_match_addr(LNET_NIDADDR(nid),
+ &ar->ar_numaddr_ranges))
+ RETURN(1);
+ }
+ RETURN(0);
+}
+
+
+EXPORT_SYMBOL(libcfs_isknown_lnd);
+EXPORT_SYMBOL(libcfs_lnd2modname);
+EXPORT_SYMBOL(libcfs_lnd2str);
+EXPORT_SYMBOL(libcfs_str2lnd);
+EXPORT_SYMBOL(libcfs_net2str);
+EXPORT_SYMBOL(libcfs_nid2str);
+EXPORT_SYMBOL(libcfs_str2net);
+EXPORT_SYMBOL(libcfs_str2nid);
+EXPORT_SYMBOL(libcfs_id2str);
+EXPORT_SYMBOL(libcfs_str2anynid);
+EXPORT_SYMBOL(cfs_free_nidlist);
+EXPORT_SYMBOL(cfs_parse_nidlist);
+EXPORT_SYMBOL(cfs_match_nid);
diff --git a/drivers/staging/lustre/lustre/libcfs/prng.c b/drivers/staging/lustre/lustre/libcfs/prng.c
new file mode 100644
index 000000000000..69224d84bc4b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/prng.c
@@ -0,0 +1,139 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/prng.c
+ *
+ * concatenation of following two 16-bit multiply with carry generators
+ * x(n)=a*x(n-1)+carry mod 2^16 and y(n)=b*y(n-1)+carry mod 2^16,
+ * number and carry packed within the same 32 bit integer.
+ * algorithm recommended by Marsaglia
+*/
+
+#include <linux/libcfs/libcfs.h>
+
+/*
+From: George Marsaglia <geo@stat.fsu.edu>
+Newsgroups: sci.math
+Subject: Re: A RANDOM NUMBER GENERATOR FOR C
+Date: Tue, 30 Sep 1997 05:29:35 -0700
+
+ * You may replace the two constants 36969 and 18000 by any
+ * pair of distinct constants from this list:
+ * 18000 18030 18273 18513 18879 19074 19098 19164 19215 19584
+ * 19599 19950 20088 20508 20544 20664 20814 20970 21153 21243
+ * 21423 21723 21954 22125 22188 22293 22860 22938 22965 22974
+ * 23109 23124 23163 23208 23508 23520 23553 23658 23865 24114
+ * 24219 24660 24699 24864 24948 25023 25308 25443 26004 26088
+ * 26154 26550 26679 26838 27183 27258 27753 27795 27810 27834
+ * 27960 28320 28380 28689 28710 28794 28854 28959 28980 29013
+ * 29379 29889 30135 30345 30459 30714 30903 30963 31059 31083
+ * (or any other 16-bit constants k for which both k*2^16-1
+ * and k*2^15-1 are prime) */
+
+#define RANDOM_CONST_A 18030
+#define RANDOM_CONST_B 29013
+
+static unsigned int seed_x = 521288629;
+static unsigned int seed_y = 362436069;
+
+/**
+ * cfs_rand - creates new seeds
+ *
+ * First it creates new seeds from the previous seeds. Then it generates a
+ * new psuedo random number for use.
+ *
+ * Returns a pseudo-random 32-bit integer
+ */
+unsigned int cfs_rand(void)
+{
+ seed_x = RANDOM_CONST_A * (seed_x & 65535) + (seed_x >> 16);
+ seed_y = RANDOM_CONST_B * (seed_y & 65535) + (seed_y >> 16);
+
+ return ((seed_x << 16) + (seed_y & 65535));
+}
+EXPORT_SYMBOL(cfs_rand);
+
+/**
+ * cfs_srand - sets the inital seed
+ * @seed1 : (seed_x) should have the most entropy in the low bits of the word
+ * @seed2 : (seed_y) should have the most entropy in the high bits of the word
+ *
+ * Replaces the original seeds with new values. Used to generate a new pseudo
+ * random numbers.
+ */
+void cfs_srand(unsigned int seed1, unsigned int seed2)
+{
+ if (seed1)
+ seed_x = seed1; /* use default seeds if parameter is 0 */
+ if (seed2)
+ seed_y = seed2;
+}
+EXPORT_SYMBOL(cfs_srand);
+
+/**
+ * cfs_get_random_bytes - generate a bunch of random numbers
+ * @buf : buffer to fill with random numbers
+ * @size: size of passed in buffer
+ *
+ * Fills a buffer with random bytes
+ */
+void cfs_get_random_bytes(void *buf, int size)
+{
+ int *p = buf;
+ int rem, tmp;
+
+ LASSERT(size >= 0);
+
+ rem = min((int)((unsigned long)buf & (sizeof(int) - 1)), size);
+ if (rem) {
+ get_random_bytes(&tmp, sizeof(tmp));
+ tmp ^= cfs_rand();
+ memcpy(buf, &tmp, rem);
+ p = buf + rem;
+ size -= rem;
+ }
+
+ while (size >= sizeof(int)) {
+ get_random_bytes(&tmp, sizeof(tmp));
+ *p = cfs_rand() ^ tmp;
+ size -= sizeof(int);
+ p++;
+ }
+ buf = p;
+ if (size) {
+ get_random_bytes(&tmp, sizeof(tmp));
+ tmp ^= cfs_rand();
+ memcpy(buf, &tmp, size);
+ }
+}
+EXPORT_SYMBOL(cfs_get_random_bytes);
diff --git a/drivers/staging/lustre/lustre/libcfs/tracefile.c b/drivers/staging/lustre/lustre/libcfs/tracefile.c
new file mode 100644
index 000000000000..439e71dfae33
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/tracefile.c
@@ -0,0 +1,1195 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/tracefile.c
+ *
+ * Author: Zach Brown <zab@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ */
+
+
+#define DEBUG_SUBSYSTEM S_LNET
+#define LUSTRE_TRACEFILE_PRIVATE
+#include "tracefile.h"
+
+#include <linux/libcfs/libcfs.h>
+
+/* XXX move things up to the top, comment */
+union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
+
+char cfs_tracefile[TRACEFILE_NAME_SIZE];
+long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
+static struct tracefiled_ctl trace_tctl;
+struct mutex cfs_trace_thread_mutex;
+static int thread_running = 0;
+
+atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
+
+static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
+ struct cfs_trace_cpu_data *tcd);
+
+static inline struct cfs_trace_page *
+cfs_tage_from_list(struct list_head *list)
+{
+ return list_entry(list, struct cfs_trace_page, linkage);
+}
+
+static struct cfs_trace_page *cfs_tage_alloc(int gfp)
+{
+ struct page *page;
+ struct cfs_trace_page *tage;
+
+ /* My caller is trying to free memory */
+ if (!in_interrupt() && memory_pressure_get())
+ return NULL;
+
+ /*
+ * Don't spam console with allocation failures: they will be reported
+ * by upper layer anyway.
+ */
+ gfp |= __GFP_NOWARN;
+ page = alloc_page(gfp);
+ if (page == NULL)
+ return NULL;
+
+ tage = kmalloc(sizeof(*tage), gfp);
+ if (tage == NULL) {
+ __free_page(page);
+ return NULL;
+ }
+
+ tage->page = page;
+ atomic_inc(&cfs_tage_allocated);
+ return tage;
+}
+
+static void cfs_tage_free(struct cfs_trace_page *tage)
+{
+ __LASSERT(tage != NULL);
+ __LASSERT(tage->page != NULL);
+
+ __free_page(tage->page);
+ kfree(tage);
+ atomic_dec(&cfs_tage_allocated);
+}
+
+static void cfs_tage_to_tail(struct cfs_trace_page *tage,
+ struct list_head *queue)
+{
+ __LASSERT(tage != NULL);
+ __LASSERT(queue != NULL);
+
+ list_move_tail(&tage->linkage, queue);
+}
+
+int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, int gfp,
+ struct list_head *stock)
+{
+ int i;
+
+ /*
+ * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+
+ for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) {
+ struct cfs_trace_page *tage;
+
+ tage = cfs_tage_alloc(gfp);
+ if (tage == NULL)
+ break;
+ list_add_tail(&tage->linkage, stock);
+ }
+ return i;
+}
+
+/* return a page that has 'len' bytes left at the end */
+static struct cfs_trace_page *
+cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
+{
+ struct cfs_trace_page *tage;
+
+ if (tcd->tcd_cur_pages > 0) {
+ __LASSERT(!list_empty(&tcd->tcd_pages));
+ tage = cfs_tage_from_list(tcd->tcd_pages.prev);
+ if (tage->used + len <= PAGE_CACHE_SIZE)
+ return tage;
+ }
+
+ if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
+ if (tcd->tcd_cur_stock_pages > 0) {
+ tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
+ --tcd->tcd_cur_stock_pages;
+ list_del_init(&tage->linkage);
+ } else {
+ tage = cfs_tage_alloc(GFP_ATOMIC);
+ if (unlikely(tage == NULL)) {
+ if ((!memory_pressure_get() ||
+ in_interrupt()) && printk_ratelimit())
+ printk(KERN_WARNING
+ "cannot allocate a tage (%ld)\n",
+ tcd->tcd_cur_pages);
+ return NULL;
+ }
+ }
+
+ tage->used = 0;
+ tage->cpu = smp_processor_id();
+ tage->type = tcd->tcd_type;
+ list_add_tail(&tage->linkage, &tcd->tcd_pages);
+ tcd->tcd_cur_pages++;
+
+ if (tcd->tcd_cur_pages > 8 && thread_running) {
+ struct tracefiled_ctl *tctl = &trace_tctl;
+ /*
+ * wake up tracefiled to process some pages.
+ */
+ wake_up(&tctl->tctl_waitq);
+ }
+ return tage;
+ }
+ return NULL;
+}
+
+static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
+{
+ int pgcount = tcd->tcd_cur_pages / 10;
+ struct page_collection pc;
+ struct cfs_trace_page *tage;
+ struct cfs_trace_page *tmp;
+
+ /*
+ * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+
+ if (printk_ratelimit())
+ printk(KERN_WARNING "debug daemon buffer overflowed; "
+ "discarding 10%% of pages (%d of %ld)\n",
+ pgcount + 1, tcd->tcd_cur_pages);
+
+ INIT_LIST_HEAD(&pc.pc_pages);
+ spin_lock_init(&pc.pc_lock);
+
+ list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
+ if (pgcount-- == 0)
+ break;
+
+ list_move_tail(&tage->linkage, &pc.pc_pages);
+ tcd->tcd_cur_pages--;
+ }
+ put_pages_on_tcd_daemon_list(&pc, tcd);
+}
+
+/* return a page that has 'len' bytes left at the end */
+static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
+ unsigned long len)
+{
+ struct cfs_trace_page *tage;
+
+ /*
+ * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+
+ if (len > PAGE_CACHE_SIZE) {
+ printk(KERN_ERR
+ "cowardly refusing to write %lu bytes in a page\n", len);
+ return NULL;
+ }
+
+ tage = cfs_trace_get_tage_try(tcd, len);
+ if (tage != NULL)
+ return tage;
+ if (thread_running)
+ cfs_tcd_shrink(tcd);
+ if (tcd->tcd_cur_pages > 0) {
+ tage = cfs_tage_from_list(tcd->tcd_pages.next);
+ tage->used = 0;
+ cfs_tage_to_tail(tage, &tcd->tcd_pages);
+ }
+ return tage;
+}
+
+int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
+ const char *format, ...)
+{
+ va_list args;
+ int rc;
+
+ va_start(args, format);
+ rc = libcfs_debug_vmsg2(msgdata, format, args, NULL);
+ va_end(args);
+
+ return rc;
+}
+EXPORT_SYMBOL(libcfs_debug_msg);
+
+int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
+ const char *format1, va_list args,
+ const char *format2, ...)
+{
+ struct cfs_trace_cpu_data *tcd = NULL;
+ struct ptldebug_header header = {0};
+ struct cfs_trace_page *tage;
+ /* string_buf is used only if tcd != NULL, and is always set then */
+ char *string_buf = NULL;
+ char *debug_buf;
+ int known_size;
+ int needed = 85; /* average message length */
+ int max_nob;
+ va_list ap;
+ int depth;
+ int i;
+ int remain;
+ int mask = msgdata->msg_mask;
+ char *file = (char *)msgdata->msg_file;
+ cfs_debug_limit_state_t *cdls = msgdata->msg_cdls;
+
+ if (strchr(file, '/'))
+ file = strrchr(file, '/') + 1;
+
+ tcd = cfs_trace_get_tcd();
+
+ /* cfs_trace_get_tcd() grabs a lock, which disables preemption and
+ * pins us to a particular CPU. This avoids an smp_processor_id()
+ * warning on Linux when debugging is enabled. */
+ cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK());
+
+ if (tcd == NULL) /* arch may not log in IRQ context */
+ goto console;
+
+ if (tcd->tcd_cur_pages == 0)
+ header.ph_flags |= PH_FLAG_FIRST_RECORD;
+
+ if (tcd->tcd_shutting_down) {
+ cfs_trace_put_tcd(tcd);
+ tcd = NULL;
+ goto console;
+ }
+
+ depth = __current_nesting_level();
+ known_size = strlen(file) + 1 + depth;
+ if (msgdata->msg_fn)
+ known_size += strlen(msgdata->msg_fn) + 1;
+
+ if (libcfs_debug_binary)
+ known_size += sizeof(header);
+
+ /*/
+ * '2' used because vsnprintf return real size required for output
+ * _without_ terminating NULL.
+ * if needed is to small for this format.
+ */
+ for (i = 0; i < 2; i++) {
+ tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
+ if (tage == NULL) {
+ if (needed + known_size > PAGE_CACHE_SIZE)
+ mask |= D_ERROR;
+
+ cfs_trace_put_tcd(tcd);
+ tcd = NULL;
+ goto console;
+ }
+
+ string_buf = (char *)page_address(tage->page) +
+ tage->used + known_size;
+
+ max_nob = PAGE_CACHE_SIZE - tage->used - known_size;
+ if (max_nob <= 0) {
+ printk(KERN_EMERG "negative max_nob: %d\n",
+ max_nob);
+ mask |= D_ERROR;
+ cfs_trace_put_tcd(tcd);
+ tcd = NULL;
+ goto console;
+ }
+
+ needed = 0;
+ if (format1) {
+ va_copy(ap, args);
+ needed = vsnprintf(string_buf, max_nob, format1, ap);
+ va_end(ap);
+ }
+
+ if (format2) {
+ remain = max_nob - needed;
+ if (remain < 0)
+ remain = 0;
+
+ va_start(ap, format2);
+ needed += vsnprintf(string_buf + needed, remain,
+ format2, ap);
+ va_end(ap);
+ }
+
+ if (needed < max_nob) /* well. printing ok.. */
+ break;
+ }
+
+ if (*(string_buf+needed-1) != '\n')
+ printk(KERN_INFO "format at %s:%d:%s doesn't end in "
+ "newline\n", file, msgdata->msg_line, msgdata->msg_fn);
+
+ header.ph_len = known_size + needed;
+ debug_buf = (char *)page_address(tage->page) + tage->used;
+
+ if (libcfs_debug_binary) {
+ memcpy(debug_buf, &header, sizeof(header));
+ tage->used += sizeof(header);
+ debug_buf += sizeof(header);
+ }
+
+ /* indent message according to the nesting level */
+ while (depth-- > 0) {
+ *(debug_buf++) = '.';
+ ++ tage->used;
+ }
+
+ strcpy(debug_buf, file);
+ tage->used += strlen(file) + 1;
+ debug_buf += strlen(file) + 1;
+
+ if (msgdata->msg_fn) {
+ strcpy(debug_buf, msgdata->msg_fn);
+ tage->used += strlen(msgdata->msg_fn) + 1;
+ debug_buf += strlen(msgdata->msg_fn) + 1;
+ }
+
+ __LASSERT(debug_buf == string_buf);
+
+ tage->used += needed;
+ __LASSERT (tage->used <= PAGE_CACHE_SIZE);
+
+console:
+ if ((mask & libcfs_printk) == 0) {
+ /* no console output requested */
+ if (tcd != NULL)
+ cfs_trace_put_tcd(tcd);
+ return 1;
+ }
+
+ if (cdls != NULL) {
+ if (libcfs_console_ratelimit &&
+ cdls->cdls_next != 0 && /* not first time ever */
+ !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
+ /* skipping a console message */
+ cdls->cdls_count++;
+ if (tcd != NULL)
+ cfs_trace_put_tcd(tcd);
+ return 1;
+ }
+
+ if (cfs_time_after(cfs_time_current(), cdls->cdls_next +
+ libcfs_console_max_delay
+ + cfs_time_seconds(10))) {
+ /* last timeout was a long time ago */
+ cdls->cdls_delay /= libcfs_console_backoff * 4;
+ } else {
+ cdls->cdls_delay *= libcfs_console_backoff;
+
+ if (cdls->cdls_delay < libcfs_console_min_delay)
+ cdls->cdls_delay = libcfs_console_min_delay;
+ else if (cdls->cdls_delay > libcfs_console_max_delay)
+ cdls->cdls_delay = libcfs_console_max_delay;
+ }
+
+ /* ensure cdls_next is never zero after it's been seen */
+ cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
+ }
+
+ if (tcd != NULL) {
+ cfs_print_to_console(&header, mask, string_buf, needed, file,
+ msgdata->msg_fn);
+ cfs_trace_put_tcd(tcd);
+ } else {
+ string_buf = cfs_trace_get_console_buffer();
+
+ needed = 0;
+ if (format1 != NULL) {
+ va_copy(ap, args);
+ needed = vsnprintf(string_buf,
+ CFS_TRACE_CONSOLE_BUFFER_SIZE,
+ format1, ap);
+ va_end(ap);
+ }
+ if (format2 != NULL) {
+ remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed;
+ if (remain > 0) {
+ va_start(ap, format2);
+ needed += vsnprintf(string_buf+needed, remain,
+ format2, ap);
+ va_end(ap);
+ }
+ }
+ cfs_print_to_console(&header, mask,
+ string_buf, needed, file, msgdata->msg_fn);
+
+ cfs_trace_put_console_buffer(string_buf);
+ }
+
+ if (cdls != NULL && cdls->cdls_count != 0) {
+ string_buf = cfs_trace_get_console_buffer();
+
+ needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
+ "Skipped %d previous similar message%s\n",
+ cdls->cdls_count,
+ (cdls->cdls_count > 1) ? "s" : "");
+
+ cfs_print_to_console(&header, mask,
+ string_buf, needed, file, msgdata->msg_fn);
+
+ cfs_trace_put_console_buffer(string_buf);
+ cdls->cdls_count = 0;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(libcfs_debug_vmsg2);
+
+void
+cfs_trace_assertion_failed(const char *str,
+ struct libcfs_debug_msg_data *msgdata)
+{
+ struct ptldebug_header hdr;
+
+ libcfs_panic_in_progress = 1;
+ libcfs_catastrophe = 1;
+ mb();
+
+ cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
+
+ cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
+ msgdata->msg_file, msgdata->msg_fn);
+
+ panic("Lustre debug assertion failure\n");
+
+ /* not reached */
+}
+
+static void
+panic_collect_pages(struct page_collection *pc)
+{
+ /* Do the collect_pages job on a single CPU: assumes that all other
+ * CPUs have been stopped during a panic. If this isn't true for some
+ * arch, this will have to be implemented separately in each arch. */
+ int i;
+ int j;
+ struct cfs_trace_cpu_data *tcd;
+
+ INIT_LIST_HEAD(&pc->pc_pages);
+
+ cfs_tcd_for_each(tcd, i, j) {
+ list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
+ tcd->tcd_cur_pages = 0;
+
+ if (pc->pc_want_daemon_pages) {
+ list_splice_init(&tcd->tcd_daemon_pages,
+ &pc->pc_pages);
+ tcd->tcd_cur_daemon_pages = 0;
+ }
+ }
+}
+
+static void collect_pages_on_all_cpus(struct page_collection *pc)
+{
+ struct cfs_trace_cpu_data *tcd;
+ int i, cpu;
+
+ spin_lock(&pc->pc_lock);
+ cfs_for_each_possible_cpu(cpu) {
+ cfs_tcd_for_each_type_lock(tcd, i, cpu) {
+ list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
+ tcd->tcd_cur_pages = 0;
+ if (pc->pc_want_daemon_pages) {
+ list_splice_init(&tcd->tcd_daemon_pages,
+ &pc->pc_pages);
+ tcd->tcd_cur_daemon_pages = 0;
+ }
+ }
+ }
+ spin_unlock(&pc->pc_lock);
+}
+
+static void collect_pages(struct page_collection *pc)
+{
+ INIT_LIST_HEAD(&pc->pc_pages);
+
+ if (libcfs_panic_in_progress)
+ panic_collect_pages(pc);
+ else
+ collect_pages_on_all_cpus(pc);
+}
+
+static void put_pages_back_on_all_cpus(struct page_collection *pc)
+{
+ struct cfs_trace_cpu_data *tcd;
+ struct list_head *cur_head;
+ struct cfs_trace_page *tage;
+ struct cfs_trace_page *tmp;
+ int i, cpu;
+
+ spin_lock(&pc->pc_lock);
+ cfs_for_each_possible_cpu(cpu) {
+ cfs_tcd_for_each_type_lock(tcd, i, cpu) {
+ cur_head = tcd->tcd_pages.next;
+
+ list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
+ linkage) {
+
+ __LASSERT_TAGE_INVARIANT(tage);
+
+ if (tage->cpu != cpu || tage->type != i)
+ continue;
+
+ cfs_tage_to_tail(tage, cur_head);
+ tcd->tcd_cur_pages++;
+ }
+ }
+ }
+ spin_unlock(&pc->pc_lock);
+}
+
+static void put_pages_back(struct page_collection *pc)
+{
+ if (!libcfs_panic_in_progress)
+ put_pages_back_on_all_cpus(pc);
+}
+
+/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that
+ * we have a good amount of data at all times for dumping during an LBUG, even
+ * if we have been steadily writing (and otherwise discarding) pages via the
+ * debug daemon. */
+static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
+ struct cfs_trace_cpu_data *tcd)
+{
+ struct cfs_trace_page *tage;
+ struct cfs_trace_page *tmp;
+
+ spin_lock(&pc->pc_lock);
+ list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
+
+ __LASSERT_TAGE_INVARIANT(tage);
+
+ if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
+ continue;
+
+ cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
+ tcd->tcd_cur_daemon_pages++;
+
+ if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
+ struct cfs_trace_page *victim;
+
+ __LASSERT(!list_empty(&tcd->tcd_daemon_pages));
+ victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
+
+ __LASSERT_TAGE_INVARIANT(victim);
+
+ list_del(&victim->linkage);
+ cfs_tage_free(victim);
+ tcd->tcd_cur_daemon_pages--;
+ }
+ }
+ spin_unlock(&pc->pc_lock);
+}
+
+static void put_pages_on_daemon_list(struct page_collection *pc)
+{
+ struct cfs_trace_cpu_data *tcd;
+ int i, cpu;
+
+ cfs_for_each_possible_cpu(cpu) {
+ cfs_tcd_for_each_type_lock(tcd, i, cpu)
+ put_pages_on_tcd_daemon_list(pc, tcd);
+ }
+}
+
+void cfs_trace_debug_print(void)
+{
+ struct page_collection pc;
+ struct cfs_trace_page *tage;
+ struct cfs_trace_page *tmp;
+
+ spin_lock_init(&pc.pc_lock);
+
+ pc.pc_want_daemon_pages = 1;
+ collect_pages(&pc);
+ list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+ char *p, *file, *fn;
+ struct page *page;
+
+ __LASSERT_TAGE_INVARIANT(tage);
+
+ page = tage->page;
+ p = page_address(page);
+ while (p < ((char *)page_address(page) + tage->used)) {
+ struct ptldebug_header *hdr;
+ int len;
+ hdr = (void *)p;
+ p += sizeof(*hdr);
+ file = p;
+ p += strlen(file) + 1;
+ fn = p;
+ p += strlen(fn) + 1;
+ len = hdr->ph_len - (int)(p - (char *)hdr);
+
+ cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
+
+ p += len;
+ }
+
+ list_del(&tage->linkage);
+ cfs_tage_free(tage);
+ }
+}
+
+int cfs_tracefile_dump_all_pages(char *filename)
+{
+ struct page_collection pc;
+ struct file *filp;
+ struct cfs_trace_page *tage;
+ struct cfs_trace_page *tmp;
+ int rc;
+
+ DECL_MMSPACE;
+
+ cfs_tracefile_write_lock();
+
+ filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ filp = NULL;
+ printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
+ filename, rc);
+ goto out;
+ }
+
+ spin_lock_init(&pc.pc_lock);
+ pc.pc_want_daemon_pages = 1;
+ collect_pages(&pc);
+ if (list_empty(&pc.pc_pages)) {
+ rc = 0;
+ goto close;
+ }
+
+ /* ok, for now, just write the pages. in the future we'll be building
+ * iobufs with the pages and calling generic_direct_IO */
+ MMSPACE_OPEN;
+ list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+
+ __LASSERT_TAGE_INVARIANT(tage);
+
+ rc = filp_write(filp, page_address(tage->page),
+ tage->used, filp_poff(filp));
+ if (rc != (int)tage->used) {
+ printk(KERN_WARNING "wanted to write %u but wrote "
+ "%d\n", tage->used, rc);
+ put_pages_back(&pc);
+ __LASSERT(list_empty(&pc.pc_pages));
+ break;
+ }
+ list_del(&tage->linkage);
+ cfs_tage_free(tage);
+ }
+ MMSPACE_CLOSE;
+ rc = filp_fsync(filp);
+ if (rc)
+ printk(KERN_ERR "sync returns %d\n", rc);
+close:
+ filp_close(filp, NULL);
+out:
+ cfs_tracefile_write_unlock();
+ return rc;
+}
+
+void cfs_trace_flush_pages(void)
+{
+ struct page_collection pc;
+ struct cfs_trace_page *tage;
+ struct cfs_trace_page *tmp;
+
+ spin_lock_init(&pc.pc_lock);
+
+ pc.pc_want_daemon_pages = 1;
+ collect_pages(&pc);
+ list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+
+ __LASSERT_TAGE_INVARIANT(tage);
+
+ list_del(&tage->linkage);
+ cfs_tage_free(tage);
+ }
+}
+
+int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
+ const char *usr_buffer, int usr_buffer_nob)
+{
+ int nob;
+
+ if (usr_buffer_nob > knl_buffer_nob)
+ return -EOVERFLOW;
+
+ if (copy_from_user((void *)knl_buffer,
+ (void *)usr_buffer, usr_buffer_nob))
+ return -EFAULT;
+
+ nob = strnlen(knl_buffer, usr_buffer_nob);
+ while (nob-- >= 0) /* strip trailing whitespace */
+ if (!isspace(knl_buffer[nob]))
+ break;
+
+ if (nob < 0) /* empty string */
+ return -EINVAL;
+
+ if (nob == knl_buffer_nob) /* no space to terminate */
+ return -EOVERFLOW;
+
+ knl_buffer[nob + 1] = 0; /* terminate */
+ return 0;
+}
+EXPORT_SYMBOL(cfs_trace_copyin_string);
+
+int cfs_trace_copyout_string(char *usr_buffer, int usr_buffer_nob,
+ const char *knl_buffer, char *append)
+{
+ /* NB if 'append' != NULL, it's a single character to append to the
+ * copied out string - usually "\n", for /proc entries and "" (i.e. a
+ * terminating zero byte) for sysctl entries */
+ int nob = strlen(knl_buffer);
+
+ if (nob > usr_buffer_nob)
+ nob = usr_buffer_nob;
+
+ if (copy_to_user(usr_buffer, knl_buffer, nob))
+ return -EFAULT;
+
+ if (append != NULL && nob < usr_buffer_nob) {
+ if (copy_to_user(usr_buffer + nob, append, 1))
+ return -EFAULT;
+
+ nob++;
+ }
+
+ return nob;
+}
+EXPORT_SYMBOL(cfs_trace_copyout_string);
+
+int cfs_trace_allocate_string_buffer(char **str, int nob)
+{
+ if (nob > 2 * PAGE_CACHE_SIZE) /* string must be "sensible" */
+ return -EINVAL;
+
+ *str = kmalloc(nob, GFP_IOFS | __GFP_ZERO);
+ if (*str == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void cfs_trace_free_string_buffer(char *str, int nob)
+{
+ kfree(str);
+}
+
+int cfs_trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob)
+{
+ char *str;
+ int rc;
+
+ rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
+ if (rc != 0)
+ return rc;
+
+ rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
+ usr_str, usr_str_nob);
+ if (rc != 0)
+ goto out;
+
+ if (str[0] != '/') {
+ rc = -EINVAL;
+ goto out;
+ }
+ rc = cfs_tracefile_dump_all_pages(str);
+out:
+ cfs_trace_free_string_buffer(str, usr_str_nob + 1);
+ return rc;
+}
+
+int cfs_trace_daemon_command(char *str)
+{
+ int rc = 0;
+
+ cfs_tracefile_write_lock();
+
+ if (strcmp(str, "stop") == 0) {
+ cfs_tracefile_write_unlock();
+ cfs_trace_stop_thread();
+ cfs_tracefile_write_lock();
+ memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
+
+ } else if (strncmp(str, "size=", 5) == 0) {
+ cfs_tracefile_size = simple_strtoul(str + 5, NULL, 0);
+ if (cfs_tracefile_size < 10 || cfs_tracefile_size > 20480)
+ cfs_tracefile_size = CFS_TRACEFILE_SIZE;
+ else
+ cfs_tracefile_size <<= 20;
+
+ } else if (strlen(str) >= sizeof(cfs_tracefile)) {
+ rc = -ENAMETOOLONG;
+ } else if (str[0] != '/') {
+ rc = -EINVAL;
+ } else {
+ strcpy(cfs_tracefile, str);
+
+ printk(KERN_INFO
+ "Lustre: debug daemon will attempt to start writing "
+ "to %s (%lukB max)\n", cfs_tracefile,
+ (long)(cfs_tracefile_size >> 10));
+
+ cfs_trace_start_thread();
+ }
+
+ cfs_tracefile_write_unlock();
+ return rc;
+}
+
+int cfs_trace_daemon_command_usrstr(void *usr_str, int usr_str_nob)
+{
+ char *str;
+ int rc;
+
+ rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
+ if (rc != 0)
+ return rc;
+
+ rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
+ usr_str, usr_str_nob);
+ if (rc == 0)
+ rc = cfs_trace_daemon_command(str);
+
+ cfs_trace_free_string_buffer(str, usr_str_nob + 1);
+ return rc;
+}
+
+int cfs_trace_set_debug_mb(int mb)
+{
+ int i;
+ int j;
+ int pages;
+ int limit = cfs_trace_max_debug_mb();
+ struct cfs_trace_cpu_data *tcd;
+
+ if (mb < num_possible_cpus()) {
+ printk(KERN_WARNING
+ "Lustre: %d MB is too small for debug buffer size, "
+ "setting it to %d MB.\n", mb, num_possible_cpus());
+ mb = num_possible_cpus();
+ }
+
+ if (mb > limit) {
+ printk(KERN_WARNING
+ "Lustre: %d MB is too large for debug buffer size, "
+ "setting it to %d MB.\n", mb, limit);
+ mb = limit;
+ }
+
+ mb /= num_possible_cpus();
+ pages = mb << (20 - PAGE_CACHE_SHIFT);
+
+ cfs_tracefile_write_lock();
+
+ cfs_tcd_for_each(tcd, i, j)
+ tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
+
+ cfs_tracefile_write_unlock();
+
+ return 0;
+}
+
+int cfs_trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob)
+{
+ char str[32];
+ int rc;
+
+ rc = cfs_trace_copyin_string(str, sizeof(str), usr_str, usr_str_nob);
+ if (rc < 0)
+ return rc;
+
+ return cfs_trace_set_debug_mb(simple_strtoul(str, NULL, 0));
+}
+
+int cfs_trace_get_debug_mb(void)
+{
+ int i;
+ int j;
+ struct cfs_trace_cpu_data *tcd;
+ int total_pages = 0;
+
+ cfs_tracefile_read_lock();
+
+ cfs_tcd_for_each(tcd, i, j)
+ total_pages += tcd->tcd_max_pages;
+
+ cfs_tracefile_read_unlock();
+
+ return (total_pages >> (20 - PAGE_CACHE_SHIFT)) + 1;
+}
+
+static int tracefiled(void *arg)
+{
+ struct page_collection pc;
+ struct tracefiled_ctl *tctl = arg;
+ struct cfs_trace_page *tage;
+ struct cfs_trace_page *tmp;
+ struct file *filp;
+ int last_loop = 0;
+ int rc;
+
+ DECL_MMSPACE;
+
+ /* we're started late enough that we pick up init's fs context */
+ /* this is so broken in uml? what on earth is going on? */
+
+ spin_lock_init(&pc.pc_lock);
+ complete(&tctl->tctl_start);
+
+ while (1) {
+ wait_queue_t __wait;
+
+ pc.pc_want_daemon_pages = 0;
+ collect_pages(&pc);
+ if (list_empty(&pc.pc_pages))
+ goto end_loop;
+
+ filp = NULL;
+ cfs_tracefile_read_lock();
+ if (cfs_tracefile[0] != 0) {
+ filp = filp_open(cfs_tracefile,
+ O_CREAT | O_RDWR | O_LARGEFILE,
+ 0600);
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ filp = NULL;
+ printk(KERN_WARNING "couldn't open %s: "
+ "%d\n", cfs_tracefile, rc);
+ }
+ }
+ cfs_tracefile_read_unlock();
+ if (filp == NULL) {
+ put_pages_on_daemon_list(&pc);
+ __LASSERT(list_empty(&pc.pc_pages));
+ goto end_loop;
+ }
+
+ MMSPACE_OPEN;
+
+ list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
+ linkage) {
+ static loff_t f_pos;
+
+ __LASSERT_TAGE_INVARIANT(tage);
+
+ if (f_pos >= (off_t)cfs_tracefile_size)
+ f_pos = 0;
+ else if (f_pos > (off_t)filp_size(filp))
+ f_pos = filp_size(filp);
+
+ rc = filp_write(filp, page_address(tage->page),
+ tage->used, &f_pos);
+ if (rc != (int)tage->used) {
+ printk(KERN_WARNING "wanted to write %u "
+ "but wrote %d\n", tage->used, rc);
+ put_pages_back(&pc);
+ __LASSERT(list_empty(&pc.pc_pages));
+ }
+ }
+ MMSPACE_CLOSE;
+
+ filp_close(filp, NULL);
+ put_pages_on_daemon_list(&pc);
+ if (!list_empty(&pc.pc_pages)) {
+ int i;
+
+ printk(KERN_ALERT "Lustre: trace pages aren't "
+ " empty\n");
+ printk(KERN_ERR "total cpus(%d): ",
+ num_possible_cpus());
+ for (i = 0; i < num_possible_cpus(); i++)
+ if (cpu_online(i))
+ printk(KERN_ERR "%d(on) ", i);
+ else
+ printk(KERN_ERR "%d(off) ", i);
+ printk(KERN_ERR "\n");
+
+ i = 0;
+ list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
+ linkage)
+ printk(KERN_ERR "page %d belongs to cpu "
+ "%d\n", ++i, tage->cpu);
+ printk(KERN_ERR "There are %d pages unwritten\n",
+ i);
+ }
+ __LASSERT(list_empty(&pc.pc_pages));
+end_loop:
+ if (atomic_read(&tctl->tctl_shutdown)) {
+ if (last_loop == 0) {
+ last_loop = 1;
+ continue;
+ } else {
+ break;
+ }
+ }
+ init_waitqueue_entry_current(&__wait);
+ add_wait_queue(&tctl->tctl_waitq, &__wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+ waitq_timedwait(&__wait, TASK_INTERRUPTIBLE,
+ cfs_time_seconds(1));
+ remove_wait_queue(&tctl->tctl_waitq, &__wait);
+ }
+ complete(&tctl->tctl_stop);
+ return 0;
+}
+
+int cfs_trace_start_thread(void)
+{
+ struct tracefiled_ctl *tctl = &trace_tctl;
+ int rc = 0;
+
+ mutex_lock(&cfs_trace_thread_mutex);
+ if (thread_running)
+ goto out;
+
+ init_completion(&tctl->tctl_start);
+ init_completion(&tctl->tctl_stop);
+ init_waitqueue_head(&tctl->tctl_waitq);
+ atomic_set(&tctl->tctl_shutdown, 0);
+
+ if (IS_ERR(kthread_run(tracefiled, tctl, "ktracefiled"))) {
+ rc = -ECHILD;
+ goto out;
+ }
+
+ wait_for_completion(&tctl->tctl_start);
+ thread_running = 1;
+out:
+ mutex_unlock(&cfs_trace_thread_mutex);
+ return rc;
+}
+
+void cfs_trace_stop_thread(void)
+{
+ struct tracefiled_ctl *tctl = &trace_tctl;
+
+ mutex_lock(&cfs_trace_thread_mutex);
+ if (thread_running) {
+ printk(KERN_INFO
+ "Lustre: shutting down debug daemon thread...\n");
+ atomic_set(&tctl->tctl_shutdown, 1);
+ wait_for_completion(&tctl->tctl_stop);
+ thread_running = 0;
+ }
+ mutex_unlock(&cfs_trace_thread_mutex);
+}
+
+int cfs_tracefile_init(int max_pages)
+{
+ struct cfs_trace_cpu_data *tcd;
+ int i;
+ int j;
+ int rc;
+ int factor;
+
+ rc = cfs_tracefile_init_arch();
+ if (rc != 0)
+ return rc;
+
+ cfs_tcd_for_each(tcd, i, j) {
+ /* tcd_pages_factor is initialized int tracefile_init_arch. */
+ factor = tcd->tcd_pages_factor;
+ INIT_LIST_HEAD(&tcd->tcd_pages);
+ INIT_LIST_HEAD(&tcd->tcd_stock_pages);
+ INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
+ tcd->tcd_cur_pages = 0;
+ tcd->tcd_cur_stock_pages = 0;
+ tcd->tcd_cur_daemon_pages = 0;
+ tcd->tcd_max_pages = (max_pages * factor) / 100;
+ LASSERT(tcd->tcd_max_pages > 0);
+ tcd->tcd_shutting_down = 0;
+ }
+
+ return 0;
+}
+
+static void trace_cleanup_on_all_cpus(void)
+{
+ struct cfs_trace_cpu_data *tcd;
+ struct cfs_trace_page *tage;
+ struct cfs_trace_page *tmp;
+ int i, cpu;
+
+ cfs_for_each_possible_cpu(cpu) {
+ cfs_tcd_for_each_type_lock(tcd, i, cpu) {
+ tcd->tcd_shutting_down = 1;
+
+ list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages,
+ linkage) {
+ __LASSERT_TAGE_INVARIANT(tage);
+
+ list_del(&tage->linkage);
+ cfs_tage_free(tage);
+ }
+
+ tcd->tcd_cur_pages = 0;
+ }
+ }
+}
+
+static void cfs_trace_cleanup(void)
+{
+ struct page_collection pc;
+
+ INIT_LIST_HEAD(&pc.pc_pages);
+ spin_lock_init(&pc.pc_lock);
+
+ trace_cleanup_on_all_cpus();
+
+ cfs_tracefile_fini_arch();
+}
+
+void cfs_tracefile_exit(void)
+{
+ cfs_trace_stop_thread();
+ cfs_trace_cleanup();
+}
diff --git a/drivers/staging/lustre/lustre/libcfs/tracefile.h b/drivers/staging/lustre/lustre/libcfs/tracefile.h
new file mode 100644
index 000000000000..7e8d17c12b5b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/tracefile.h
@@ -0,0 +1,340 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LIBCFS_TRACEFILE_H__
+#define __LIBCFS_TRACEFILE_H__
+
+#include <linux/libcfs/libcfs.h>
+
+#include "linux/linux-tracefile.h"
+
+/* trace file lock routines */
+
+#define TRACEFILE_NAME_SIZE 1024
+extern char cfs_tracefile[TRACEFILE_NAME_SIZE];
+extern long long cfs_tracefile_size;
+
+extern void libcfs_run_debug_log_upcall(char *file);
+
+int cfs_tracefile_init_arch(void);
+void cfs_tracefile_fini_arch(void);
+
+void cfs_tracefile_read_lock(void);
+void cfs_tracefile_read_unlock(void);
+void cfs_tracefile_write_lock(void);
+void cfs_tracefile_write_unlock(void);
+
+int cfs_tracefile_dump_all_pages(char *filename);
+void cfs_trace_debug_print(void);
+void cfs_trace_flush_pages(void);
+int cfs_trace_start_thread(void);
+void cfs_trace_stop_thread(void);
+int cfs_tracefile_init(int max_pages);
+void cfs_tracefile_exit(void);
+
+
+
+int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
+ const char *usr_buffer, int usr_buffer_nob);
+int cfs_trace_copyout_string(char *usr_buffer, int usr_buffer_nob,
+ const char *knl_str, char *append);
+int cfs_trace_allocate_string_buffer(char **str, int nob);
+void cfs_trace_free_string_buffer(char *str, int nob);
+int cfs_trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob);
+int cfs_trace_daemon_command(char *str);
+int cfs_trace_daemon_command_usrstr(void *usr_str, int usr_str_nob);
+int cfs_trace_set_debug_mb(int mb);
+int cfs_trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob);
+int cfs_trace_get_debug_mb(void);
+
+extern void libcfs_debug_dumplog_internal(void *arg);
+extern void libcfs_register_panic_notifier(void);
+extern void libcfs_unregister_panic_notifier(void);
+extern int libcfs_panic_in_progress;
+extern int cfs_trace_max_debug_mb(void);
+
+#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT))
+#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
+#define CFS_TRACEFILE_SIZE (500 << 20)
+
+#ifdef LUSTRE_TRACEFILE_PRIVATE
+
+/*
+ * Private declare for tracefile
+ */
+#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT))
+#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
+
+#define CFS_TRACEFILE_SIZE (500 << 20)
+
+/* Size of a buffer for sprinting console messages if we can't get a page
+ * from system */
+#define CFS_TRACE_CONSOLE_BUFFER_SIZE 1024
+
+union cfs_trace_data_union {
+ struct cfs_trace_cpu_data {
+ /*
+ * Even though this structure is meant to be per-CPU, locking
+ * is needed because in some places the data may be accessed
+ * from other CPUs. This lock is directly used in trace_get_tcd
+ * and trace_put_tcd, which are called in libcfs_debug_vmsg2 and
+ * tcd_for_each_type_lock
+ */
+ spinlock_t tcd_lock;
+ unsigned long tcd_lock_flags;
+
+ /*
+ * pages with trace records not yet processed by tracefiled.
+ */
+ struct list_head tcd_pages;
+ /* number of pages on ->tcd_pages */
+ unsigned long tcd_cur_pages;
+
+ /*
+ * pages with trace records already processed by
+ * tracefiled. These pages are kept in memory, so that some
+ * portion of log can be written in the event of LBUG. This
+ * list is maintained in LRU order.
+ *
+ * Pages are moved to ->tcd_daemon_pages by tracefiled()
+ * (put_pages_on_daemon_list()). LRU pages from this list are
+ * discarded when list grows too large.
+ */
+ struct list_head tcd_daemon_pages;
+ /* number of pages on ->tcd_daemon_pages */
+ unsigned long tcd_cur_daemon_pages;
+
+ /*
+ * Maximal number of pages allowed on ->tcd_pages and
+ * ->tcd_daemon_pages each.
+ * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current
+ * implementation.
+ */
+ unsigned long tcd_max_pages;
+
+ /*
+ * preallocated pages to write trace records into. Pages from
+ * ->tcd_stock_pages are moved to ->tcd_pages by
+ * portals_debug_msg().
+ *
+ * This list is necessary, because on some platforms it's
+ * impossible to perform efficient atomic page allocation in a
+ * non-blockable context.
+ *
+ * Such platforms fill ->tcd_stock_pages "on occasion", when
+ * tracing code is entered in blockable context.
+ *
+ * trace_get_tage_try() tries to get a page from
+ * ->tcd_stock_pages first and resorts to atomic page
+ * allocation only if this queue is empty. ->tcd_stock_pages
+ * is replenished when tracing code is entered in blocking
+ * context (darwin-tracefile.c:trace_get_tcd()). We try to
+ * maintain TCD_STOCK_PAGES (40 by default) pages in this
+ * queue. Atomic allocation is only required if more than
+ * TCD_STOCK_PAGES pagesful are consumed by trace records all
+ * emitted in non-blocking contexts. Which is quite unlikely.
+ */
+ struct list_head tcd_stock_pages;
+ /* number of pages on ->tcd_stock_pages */
+ unsigned long tcd_cur_stock_pages;
+
+ unsigned short tcd_shutting_down;
+ unsigned short tcd_cpu;
+ unsigned short tcd_type;
+ /* The factors to share debug memory. */
+ unsigned short tcd_pages_factor;
+ } tcd;
+ char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))];
+};
+
+#define TCD_MAX_TYPES 8
+extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS];
+
+#define cfs_tcd_for_each(tcd, i, j) \
+ for (i = 0; cfs_trace_data[i] != NULL; i++) \
+ for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd); \
+ j < num_possible_cpus(); \
+ j++, (tcd) = &(*cfs_trace_data[i])[j].tcd)
+
+#define cfs_tcd_for_each_type_lock(tcd, i, cpu) \
+ for (i = 0; cfs_trace_data[i] && \
+ (tcd = &(*cfs_trace_data[i])[cpu].tcd) && \
+ cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++)
+
+/* XXX nikita: this declaration is internal to tracefile.c and should probably
+ * be moved there */
+struct page_collection {
+ struct list_head pc_pages;
+ /*
+ * spin-lock protecting ->pc_pages. It is taken by smp_call_function()
+ * call-back functions. XXX nikita: Which is horrible: all processors
+ * receive NMI at the same time only to be serialized by this
+ * lock. Probably ->pc_pages should be replaced with an array of
+ * NR_CPUS elements accessed locklessly.
+ */
+ spinlock_t pc_lock;
+ /*
+ * if this flag is set, collect_pages() will spill both
+ * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
+ * only ->tcd_pages are spilled.
+ */
+ int pc_want_daemon_pages;
+};
+
+/* XXX nikita: this declaration is internal to tracefile.c and should probably
+ * be moved there */
+struct tracefiled_ctl {
+ struct completion tctl_start;
+ struct completion tctl_stop;
+ wait_queue_head_t tctl_waitq;
+ pid_t tctl_pid;
+ atomic_t tctl_shutdown;
+};
+
+/*
+ * small data-structure for each page owned by tracefiled.
+ */
+/* XXX nikita: this declaration is internal to tracefile.c and should probably
+ * be moved there */
+struct cfs_trace_page {
+ /*
+ * page itself
+ */
+ struct page *page;
+ /*
+ * linkage into one of the lists in trace_data_union or
+ * page_collection
+ */
+ struct list_head linkage;
+ /*
+ * number of bytes used within this page
+ */
+ unsigned int used;
+ /*
+ * cpu that owns this page
+ */
+ unsigned short cpu;
+ /*
+ * type(context) of this page
+ */
+ unsigned short type;
+};
+
+extern void cfs_set_ptldebug_header(struct ptldebug_header *header,
+ struct libcfs_debug_msg_data *m,
+ unsigned long stack);
+extern void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
+ const char *buf, int len, const char *file,
+ const char *fn);
+
+extern int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
+extern void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
+
+/**
+ * trace_buf_type_t, trace_buf_idx_get() and trace_console_buffers[][]
+ * are not public libcfs API; they should be defined in
+ * platform-specific tracefile include files
+ * (see, for example, linux-tracefile.h).
+ */
+
+extern char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
+extern cfs_trace_buf_type_t cfs_trace_buf_idx_get(void);
+
+static inline char *
+cfs_trace_get_console_buffer(void)
+{
+ unsigned int i = get_cpu();
+ unsigned int j = cfs_trace_buf_idx_get();
+
+ return cfs_trace_console_buffers[i][j];
+}
+
+static inline void
+cfs_trace_put_console_buffer(char *buffer)
+{
+ put_cpu();
+}
+
+static inline struct cfs_trace_cpu_data *
+cfs_trace_get_tcd(void)
+{
+ struct cfs_trace_cpu_data *tcd =
+ &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd;
+
+ cfs_trace_lock_tcd(tcd, 0);
+
+ return tcd;
+}
+
+static inline void
+cfs_trace_put_tcd (struct cfs_trace_cpu_data *tcd)
+{
+ cfs_trace_unlock_tcd(tcd, 0);
+
+ put_cpu();
+}
+
+int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, int gfp,
+ struct list_head *stock);
+
+
+int cfs_tcd_owns_tage(struct cfs_trace_cpu_data *tcd,
+ struct cfs_trace_page *tage);
+
+extern void cfs_trace_assertion_failed(const char *str,
+ struct libcfs_debug_msg_data *m);
+
+/* ASSERTION that is safe to use within the debug system */
+#define __LASSERT(cond) \
+do { \
+ if (unlikely(!(cond))) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL); \
+ cfs_trace_assertion_failed("ASSERTION("#cond") failed", \
+ &msgdata); \
+ } \
+} while (0)
+
+#define __LASSERT_TAGE_INVARIANT(tage) \
+do { \
+ __LASSERT(tage != NULL); \
+ __LASSERT(tage->page != NULL); \
+ __LASSERT(tage->used <= PAGE_CACHE_SIZE); \
+ __LASSERT(page_count(tage->page) > 0); \
+} while (0)
+
+#endif /* LUSTRE_TRACEFILE_PRIVATE */
+
+#endif /* __LIBCFS_TRACEFILE_H__ */
diff --git a/drivers/staging/lustre/lustre/libcfs/upcall_cache.c b/drivers/staging/lustre/lustre/libcfs/upcall_cache.c
new file mode 100644
index 000000000000..18c68c3493b8
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/upcall_cache.c
@@ -0,0 +1,462 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/upcall_cache.c
+ *
+ * Supplementary groups cache.
+ */
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <linux/libcfs/lucache.h>
+
+static struct upcall_cache_entry *alloc_entry(struct upcall_cache *cache,
+ __u64 key, void *args)
+{
+ struct upcall_cache_entry *entry;
+
+ LIBCFS_ALLOC(entry, sizeof(*entry));
+ if (!entry)
+ return NULL;
+
+ UC_CACHE_SET_NEW(entry);
+ INIT_LIST_HEAD(&entry->ue_hash);
+ entry->ue_key = key;
+ atomic_set(&entry->ue_refcount, 0);
+ init_waitqueue_head(&entry->ue_waitq);
+ if (cache->uc_ops->init_entry)
+ cache->uc_ops->init_entry(entry, args);
+ return entry;
+}
+
+/* protected by cache lock */
+static void free_entry(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry)
+{
+ if (cache->uc_ops->free_entry)
+ cache->uc_ops->free_entry(cache, entry);
+
+ list_del(&entry->ue_hash);
+ CDEBUG(D_OTHER, "destroy cache entry %p for key "LPU64"\n",
+ entry, entry->ue_key);
+ LIBCFS_FREE(entry, sizeof(*entry));
+}
+
+static inline int upcall_compare(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry,
+ __u64 key, void *args)
+{
+ if (entry->ue_key != key)
+ return -1;
+
+ if (cache->uc_ops->upcall_compare)
+ return cache->uc_ops->upcall_compare(cache, entry, key, args);
+
+ return 0;
+}
+
+static inline int downcall_compare(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry,
+ __u64 key, void *args)
+{
+ if (entry->ue_key != key)
+ return -1;
+
+ if (cache->uc_ops->downcall_compare)
+ return cache->uc_ops->downcall_compare(cache, entry, key, args);
+
+ return 0;
+}
+
+static inline void get_entry(struct upcall_cache_entry *entry)
+{
+ atomic_inc(&entry->ue_refcount);
+}
+
+static inline void put_entry(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry)
+{
+ if (atomic_dec_and_test(&entry->ue_refcount) &&
+ (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) {
+ free_entry(cache, entry);
+ }
+}
+
+static int check_unlink_entry(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry)
+{
+ if (UC_CACHE_IS_VALID(entry) &&
+ cfs_time_before(cfs_time_current(), entry->ue_expire))
+ return 0;
+
+ if (UC_CACHE_IS_ACQUIRING(entry)) {
+ if (entry->ue_acquire_expire == 0 ||
+ cfs_time_before(cfs_time_current(),
+ entry->ue_acquire_expire))
+ return 0;
+
+ UC_CACHE_SET_EXPIRED(entry);
+ wake_up_all(&entry->ue_waitq);
+ } else if (!UC_CACHE_IS_INVALID(entry)) {
+ UC_CACHE_SET_EXPIRED(entry);
+ }
+
+ list_del_init(&entry->ue_hash);
+ if (!atomic_read(&entry->ue_refcount))
+ free_entry(cache, entry);
+ return 1;
+}
+
+static inline int refresh_entry(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry)
+{
+ LASSERT(cache->uc_ops->do_upcall);
+ return cache->uc_ops->do_upcall(cache, entry);
+}
+
+struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache,
+ __u64 key, void *args)
+{
+ struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
+ struct list_head *head;
+ wait_queue_t wait;
+ int rc, found;
+ ENTRY;
+
+ LASSERT(cache);
+
+ head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
+find_again:
+ found = 0;
+ spin_lock(&cache->uc_lock);
+ list_for_each_entry_safe(entry, next, head, ue_hash) {
+ /* check invalid & expired items */
+ if (check_unlink_entry(cache, entry))
+ continue;
+ if (upcall_compare(cache, entry, key, args) == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ if (!new) {
+ spin_unlock(&cache->uc_lock);
+ new = alloc_entry(cache, key, args);
+ if (!new) {
+ CERROR("fail to alloc entry\n");
+ RETURN(ERR_PTR(-ENOMEM));
+ }
+ goto find_again;
+ } else {
+ list_add(&new->ue_hash, head);
+ entry = new;
+ }
+ } else {
+ if (new) {
+ free_entry(cache, new);
+ new = NULL;
+ }
+ list_move(&entry->ue_hash, head);
+ }
+ get_entry(entry);
+
+ /* acquire for new one */
+ if (UC_CACHE_IS_NEW(entry)) {
+ UC_CACHE_SET_ACQUIRING(entry);
+ UC_CACHE_CLEAR_NEW(entry);
+ spin_unlock(&cache->uc_lock);
+ rc = refresh_entry(cache, entry);
+ spin_lock(&cache->uc_lock);
+ entry->ue_acquire_expire =
+ cfs_time_shift(cache->uc_acquire_expire);
+ if (rc < 0) {
+ UC_CACHE_CLEAR_ACQUIRING(entry);
+ UC_CACHE_SET_INVALID(entry);
+ wake_up_all(&entry->ue_waitq);
+ if (unlikely(rc == -EREMCHG)) {
+ put_entry(cache, entry);
+ GOTO(out, entry = ERR_PTR(rc));
+ }
+ }
+ }
+ /* someone (and only one) is doing upcall upon this item,
+ * wait it to complete */
+ if (UC_CACHE_IS_ACQUIRING(entry)) {
+ long expiry = (entry == new) ?
+ cfs_time_seconds(cache->uc_acquire_expire) :
+ MAX_SCHEDULE_TIMEOUT;
+ long left;
+
+ init_waitqueue_entry_current(&wait);
+ add_wait_queue(&entry->ue_waitq, &wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_unlock(&cache->uc_lock);
+
+ left = waitq_timedwait(&wait, TASK_INTERRUPTIBLE,
+ expiry);
+
+ spin_lock(&cache->uc_lock);
+ remove_wait_queue(&entry->ue_waitq, &wait);
+ if (UC_CACHE_IS_ACQUIRING(entry)) {
+ /* we're interrupted or upcall failed in the middle */
+ rc = left > 0 ? -EINTR : -ETIMEDOUT;
+ CERROR("acquire for key "LPU64": error %d\n",
+ entry->ue_key, rc);
+ put_entry(cache, entry);
+ GOTO(out, entry = ERR_PTR(rc));
+ }
+ }
+
+ /* invalid means error, don't need to try again */
+ if (UC_CACHE_IS_INVALID(entry)) {
+ put_entry(cache, entry);
+ GOTO(out, entry = ERR_PTR(-EIDRM));
+ }
+
+ /* check expired
+ * We can't refresh the existing one because some
+ * memory might be shared by multiple processes.
+ */
+ if (check_unlink_entry(cache, entry)) {
+ /* if expired, try again. but if this entry is
+ * created by me but too quickly turn to expired
+ * without any error, should at least give a
+ * chance to use it once.
+ */
+ if (entry != new) {
+ put_entry(cache, entry);
+ spin_unlock(&cache->uc_lock);
+ new = NULL;
+ goto find_again;
+ }
+ }
+
+ /* Now we know it's good */
+out:
+ spin_unlock(&cache->uc_lock);
+ RETURN(entry);
+}
+EXPORT_SYMBOL(upcall_cache_get_entry);
+
+void upcall_cache_put_entry(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry)
+{
+ ENTRY;
+
+ if (!entry) {
+ EXIT;
+ return;
+ }
+
+ LASSERT(atomic_read(&entry->ue_refcount) > 0);
+ spin_lock(&cache->uc_lock);
+ put_entry(cache, entry);
+ spin_unlock(&cache->uc_lock);
+ EXIT;
+}
+EXPORT_SYMBOL(upcall_cache_put_entry);
+
+int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key,
+ void *args)
+{
+ struct upcall_cache_entry *entry = NULL;
+ struct list_head *head;
+ int found = 0, rc = 0;
+ ENTRY;
+
+ LASSERT(cache);
+
+ head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
+
+ spin_lock(&cache->uc_lock);
+ list_for_each_entry(entry, head, ue_hash) {
+ if (downcall_compare(cache, entry, key, args) == 0) {
+ found = 1;
+ get_entry(entry);
+ break;
+ }
+ }
+
+ if (!found) {
+ CDEBUG(D_OTHER, "%s: upcall for key "LPU64" not expected\n",
+ cache->uc_name, key);
+ /* haven't found, it's possible */
+ spin_unlock(&cache->uc_lock);
+ RETURN(-EINVAL);
+ }
+
+ if (err) {
+ CDEBUG(D_OTHER, "%s: upcall for key "LPU64" returned %d\n",
+ cache->uc_name, entry->ue_key, err);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ if (!UC_CACHE_IS_ACQUIRING(entry)) {
+ CDEBUG(D_RPCTRACE,"%s: found uptodate entry %p (key "LPU64")\n",
+ cache->uc_name, entry, entry->ue_key);
+ GOTO(out, rc = 0);
+ }
+
+ if (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry)) {
+ CERROR("%s: found a stale entry %p (key "LPU64") in ioctl\n",
+ cache->uc_name, entry, entry->ue_key);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ spin_unlock(&cache->uc_lock);
+ if (cache->uc_ops->parse_downcall)
+ rc = cache->uc_ops->parse_downcall(cache, entry, args);
+ spin_lock(&cache->uc_lock);
+ if (rc)
+ GOTO(out, rc);
+
+ entry->ue_expire = cfs_time_shift(cache->uc_entry_expire);
+ UC_CACHE_SET_VALID(entry);
+ CDEBUG(D_OTHER, "%s: created upcall cache entry %p for key "LPU64"\n",
+ cache->uc_name, entry, entry->ue_key);
+out:
+ if (rc) {
+ UC_CACHE_SET_INVALID(entry);
+ list_del_init(&entry->ue_hash);
+ }
+ UC_CACHE_CLEAR_ACQUIRING(entry);
+ spin_unlock(&cache->uc_lock);
+ wake_up_all(&entry->ue_waitq);
+ put_entry(cache, entry);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(upcall_cache_downcall);
+
+static void cache_flush(struct upcall_cache *cache, int force)
+{
+ struct upcall_cache_entry *entry, *next;
+ int i;
+ ENTRY;
+
+ spin_lock(&cache->uc_lock);
+ for (i = 0; i < UC_CACHE_HASH_SIZE; i++) {
+ list_for_each_entry_safe(entry, next,
+ &cache->uc_hashtable[i], ue_hash) {
+ if (!force && atomic_read(&entry->ue_refcount)) {
+ UC_CACHE_SET_EXPIRED(entry);
+ continue;
+ }
+ LASSERT(!atomic_read(&entry->ue_refcount));
+ free_entry(cache, entry);
+ }
+ }
+ spin_unlock(&cache->uc_lock);
+ EXIT;
+}
+
+void upcall_cache_flush_idle(struct upcall_cache *cache)
+{
+ cache_flush(cache, 0);
+}
+EXPORT_SYMBOL(upcall_cache_flush_idle);
+
+void upcall_cache_flush_all(struct upcall_cache *cache)
+{
+ cache_flush(cache, 1);
+}
+EXPORT_SYMBOL(upcall_cache_flush_all);
+
+void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args)
+{
+ struct list_head *head;
+ struct upcall_cache_entry *entry;
+ int found = 0;
+ ENTRY;
+
+ head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
+
+ spin_lock(&cache->uc_lock);
+ list_for_each_entry(entry, head, ue_hash) {
+ if (upcall_compare(cache, entry, key, args) == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found) {
+ CWARN("%s: flush entry %p: key "LPU64", ref %d, fl %x, "
+ "cur %lu, ex %ld/%ld\n",
+ cache->uc_name, entry, entry->ue_key,
+ atomic_read(&entry->ue_refcount), entry->ue_flags,
+ cfs_time_current_sec(), entry->ue_acquire_expire,
+ entry->ue_expire);
+ UC_CACHE_SET_EXPIRED(entry);
+ if (!atomic_read(&entry->ue_refcount))
+ free_entry(cache, entry);
+ }
+ spin_unlock(&cache->uc_lock);
+}
+EXPORT_SYMBOL(upcall_cache_flush_one);
+
+struct upcall_cache *upcall_cache_init(const char *name, const char *upcall,
+ struct upcall_cache_ops *ops)
+{
+ struct upcall_cache *cache;
+ int i;
+ ENTRY;
+
+ LIBCFS_ALLOC(cache, sizeof(*cache));
+ if (!cache)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ spin_lock_init(&cache->uc_lock);
+ rwlock_init(&cache->uc_upcall_rwlock);
+ for (i = 0; i < UC_CACHE_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&cache->uc_hashtable[i]);
+ strncpy(cache->uc_name, name, sizeof(cache->uc_name) - 1);
+ /* upcall pathname proc tunable */
+ strncpy(cache->uc_upcall, upcall, sizeof(cache->uc_upcall) - 1);
+ cache->uc_entry_expire = 20 * 60;
+ cache->uc_acquire_expire = 30;
+ cache->uc_ops = ops;
+
+ RETURN(cache);
+}
+EXPORT_SYMBOL(upcall_cache_init);
+
+void upcall_cache_cleanup(struct upcall_cache *cache)
+{
+ if (!cache)
+ return;
+ upcall_cache_flush_all(cache);
+ LIBCFS_FREE(cache, sizeof(*cache));
+}
+EXPORT_SYMBOL(upcall_cache_cleanup);
diff --git a/drivers/staging/lustre/lustre/libcfs/watchdog.c b/drivers/staging/lustre/lustre/libcfs/watchdog.c
new file mode 100644
index 000000000000..7c385ada3e10
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/watchdog.c
@@ -0,0 +1,516 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/watchdog.c
+ *
+ * Author: Jacob Berkman <jacob@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+#include "tracefile.h"
+
+struct lc_watchdog {
+ spinlock_t lcw_lock; /* check or change lcw_list */
+ int lcw_refcount; /* must hold lcw_pending_timers_lock */
+ timer_list_t lcw_timer; /* kernel timer */
+ struct list_head lcw_list; /* chain on pending list */
+ cfs_time_t lcw_last_touched; /* last touched stamp */
+ task_t *lcw_task; /* owner task */
+ void (*lcw_callback)(pid_t, void *);
+ void *lcw_data;
+
+ pid_t lcw_pid;
+
+ enum {
+ LC_WATCHDOG_DISABLED,
+ LC_WATCHDOG_ENABLED,
+ LC_WATCHDOG_EXPIRED
+ } lcw_state;
+};
+
+#ifdef WITH_WATCHDOG
+/*
+ * The dispatcher will complete lcw_start_completion when it starts,
+ * and lcw_stop_completion when it exits.
+ * Wake lcw_event_waitq to signal timer callback dispatches.
+ */
+static struct completion lcw_start_completion;
+static struct completion lcw_stop_completion;
+static wait_queue_head_t lcw_event_waitq;
+
+/*
+ * Set this and wake lcw_event_waitq to stop the dispatcher.
+ */
+enum {
+ LCW_FLAG_STOP = 0
+};
+static unsigned long lcw_flags = 0;
+
+/*
+ * Number of outstanding watchdogs.
+ * When it hits 1, we start the dispatcher.
+ * When it hits 0, we stop the dispatcher.
+ */
+static __u32 lcw_refcount = 0;
+static DEFINE_MUTEX(lcw_refcount_mutex);
+
+/*
+ * List of timers that have fired that need their callbacks run by the
+ * dispatcher.
+ */
+/* BH lock! */
+static DEFINE_SPINLOCK(lcw_pending_timers_lock);
+static struct list_head lcw_pending_timers = LIST_HEAD_INIT(lcw_pending_timers);
+
+/* Last time a watchdog expired */
+static cfs_time_t lcw_last_watchdog_time;
+static int lcw_recent_watchdog_count;
+
+static void
+lcw_dump(struct lc_watchdog *lcw)
+{
+ ENTRY;
+ rcu_read_lock();
+ if (lcw->lcw_task == NULL) {
+ LCONSOLE_WARN("Process " LPPID " was not found in the task "
+ "list; watchdog callback may be incomplete\n",
+ (int)lcw->lcw_pid);
+ } else {
+ libcfs_debug_dumpstack(lcw->lcw_task);
+ }
+
+ rcu_read_unlock();
+ EXIT;
+}
+
+static void lcw_cb(ulong_ptr_t data)
+{
+ struct lc_watchdog *lcw = (struct lc_watchdog *)data;
+ ENTRY;
+
+ if (lcw->lcw_state != LC_WATCHDOG_ENABLED) {
+ EXIT;
+ return;
+ }
+
+ lcw->lcw_state = LC_WATCHDOG_EXPIRED;
+
+ spin_lock_bh(&lcw->lcw_lock);
+ LASSERT(list_empty(&lcw->lcw_list));
+
+ spin_lock_bh(&lcw_pending_timers_lock);
+ lcw->lcw_refcount++; /* +1 for pending list */
+ list_add(&lcw->lcw_list, &lcw_pending_timers);
+ wake_up(&lcw_event_waitq);
+
+ spin_unlock_bh(&lcw_pending_timers_lock);
+ spin_unlock_bh(&lcw->lcw_lock);
+ EXIT;
+}
+
+static int is_watchdog_fired(void)
+{
+ int rc;
+
+ if (test_bit(LCW_FLAG_STOP, &lcw_flags))
+ return 1;
+
+ spin_lock_bh(&lcw_pending_timers_lock);
+ rc = !list_empty(&lcw_pending_timers);
+ spin_unlock_bh(&lcw_pending_timers_lock);
+ return rc;
+}
+
+static void lcw_dump_stack(struct lc_watchdog *lcw)
+{
+ cfs_time_t current_time;
+ cfs_duration_t delta_time;
+ struct timeval timediff;
+
+ current_time = cfs_time_current();
+ delta_time = cfs_time_sub(current_time, lcw->lcw_last_touched);
+ cfs_duration_usec(delta_time, &timediff);
+
+ /*
+ * Check to see if we should throttle the watchdog timer to avoid
+ * too many dumps going to the console thus triggering an NMI.
+ */
+ delta_time = cfs_duration_sec(cfs_time_sub(current_time,
+ lcw_last_watchdog_time));
+
+ if (delta_time < libcfs_watchdog_ratelimit &&
+ lcw_recent_watchdog_count > 3) {
+ LCONSOLE_WARN("Service thread pid %u was inactive for "
+ "%lu.%.02lus. Watchdog stack traces are limited "
+ "to 3 per %d seconds, skipping this one.\n",
+ (int)lcw->lcw_pid,
+ timediff.tv_sec,
+ timediff.tv_usec / 10000,
+ libcfs_watchdog_ratelimit);
+ } else {
+ if (delta_time < libcfs_watchdog_ratelimit) {
+ lcw_recent_watchdog_count++;
+ } else {
+ memcpy(&lcw_last_watchdog_time, &current_time,
+ sizeof(current_time));
+ lcw_recent_watchdog_count = 0;
+ }
+
+ LCONSOLE_WARN("Service thread pid %u was inactive for "
+ "%lu.%.02lus. The thread might be hung, or it "
+ "might only be slow and will resume later. "
+ "Dumping the stack trace for debugging purposes:"
+ "\n",
+ (int)lcw->lcw_pid,
+ timediff.tv_sec,
+ timediff.tv_usec / 10000);
+ lcw_dump(lcw);
+ }
+}
+
+static int lcw_dispatch_main(void *data)
+{
+ int rc = 0;
+ struct lc_watchdog *lcw;
+ LIST_HEAD (zombies);
+
+ ENTRY;
+
+ complete(&lcw_start_completion);
+
+ while (1) {
+ int dumplog = 1;
+
+ cfs_wait_event_interruptible(lcw_event_waitq,
+ is_watchdog_fired(), rc);
+ CDEBUG(D_INFO, "Watchdog got woken up...\n");
+ if (test_bit(LCW_FLAG_STOP, &lcw_flags)) {
+ CDEBUG(D_INFO, "LCW_FLAG_STOP set, shutting down...\n");
+
+ spin_lock_bh(&lcw_pending_timers_lock);
+ rc = !list_empty(&lcw_pending_timers);
+ spin_unlock_bh(&lcw_pending_timers_lock);
+ if (rc) {
+ CERROR("pending timers list was not empty at "
+ "time of watchdog dispatch shutdown\n");
+ }
+ break;
+ }
+
+ spin_lock_bh(&lcw_pending_timers_lock);
+ while (!list_empty(&lcw_pending_timers)) {
+ int is_dumplog;
+
+ lcw = list_entry(lcw_pending_timers.next,
+ struct lc_watchdog, lcw_list);
+ /* +1 ref for callback to make sure lwc wouldn't be
+ * deleted after releasing lcw_pending_timers_lock */
+ lcw->lcw_refcount++;
+ spin_unlock_bh(&lcw_pending_timers_lock);
+
+ /* lock ordering */
+ spin_lock_bh(&lcw->lcw_lock);
+ spin_lock_bh(&lcw_pending_timers_lock);
+
+ if (list_empty(&lcw->lcw_list)) {
+ /* already removed from pending list */
+ lcw->lcw_refcount--; /* -1 ref for callback */
+ if (lcw->lcw_refcount == 0)
+ list_add(&lcw->lcw_list, &zombies);
+ spin_unlock_bh(&lcw->lcw_lock);
+ /* still hold lcw_pending_timers_lock */
+ continue;
+ }
+
+ list_del_init(&lcw->lcw_list);
+ lcw->lcw_refcount--; /* -1 ref for pending list */
+
+ spin_unlock_bh(&lcw_pending_timers_lock);
+ spin_unlock_bh(&lcw->lcw_lock);
+
+ CDEBUG(D_INFO, "found lcw for pid " LPPID "\n",
+ lcw->lcw_pid);
+ lcw_dump_stack(lcw);
+
+ is_dumplog = lcw->lcw_callback == lc_watchdog_dumplog;
+ if (lcw->lcw_state != LC_WATCHDOG_DISABLED &&
+ (dumplog || !is_dumplog)) {
+ lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data);
+ if (dumplog && is_dumplog)
+ dumplog = 0;
+ }
+
+ spin_lock_bh(&lcw_pending_timers_lock);
+ lcw->lcw_refcount--; /* -1 ref for callback */
+ if (lcw->lcw_refcount == 0)
+ list_add(&lcw->lcw_list, &zombies);
+ }
+ spin_unlock_bh(&lcw_pending_timers_lock);
+
+ while (!list_empty(&zombies)) {
+ lcw = list_entry(lcw_pending_timers.next,
+ struct lc_watchdog, lcw_list);
+ list_del(&lcw->lcw_list);
+ LIBCFS_FREE(lcw, sizeof(*lcw));
+ }
+ }
+
+ complete(&lcw_stop_completion);
+
+ RETURN(rc);
+}
+
+static void lcw_dispatch_start(void)
+{
+ task_t *task;
+
+ ENTRY;
+ LASSERT(lcw_refcount == 1);
+
+ init_completion(&lcw_stop_completion);
+ init_completion(&lcw_start_completion);
+ init_waitqueue_head(&lcw_event_waitq);
+
+ CDEBUG(D_INFO, "starting dispatch thread\n");
+ task = kthread_run(lcw_dispatch_main, NULL, "lc_watchdogd");
+ if (IS_ERR(task)) {
+ CERROR("error spawning watchdog dispatch thread: %ld\n",
+ PTR_ERR(task));
+ EXIT;
+ return;
+ }
+ wait_for_completion(&lcw_start_completion);
+ CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n");
+
+ EXIT;
+}
+
+static void lcw_dispatch_stop(void)
+{
+ ENTRY;
+ LASSERT(lcw_refcount == 0);
+
+ CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n");
+
+ set_bit(LCW_FLAG_STOP, &lcw_flags);
+ wake_up(&lcw_event_waitq);
+
+ wait_for_completion(&lcw_stop_completion);
+
+ CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n");
+
+ EXIT;
+}
+
+struct lc_watchdog *lc_watchdog_add(int timeout,
+ void (*callback)(pid_t, void *),
+ void *data)
+{
+ struct lc_watchdog *lcw = NULL;
+ ENTRY;
+
+ LIBCFS_ALLOC(lcw, sizeof(*lcw));
+ if (lcw == NULL) {
+ CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n");
+ RETURN(ERR_PTR(-ENOMEM));
+ }
+
+ spin_lock_init(&lcw->lcw_lock);
+ lcw->lcw_refcount = 1; /* refcount for owner */
+ lcw->lcw_task = current;
+ lcw->lcw_pid = current_pid();
+ lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog;
+ lcw->lcw_data = data;
+ lcw->lcw_state = LC_WATCHDOG_DISABLED;
+
+ INIT_LIST_HEAD(&lcw->lcw_list);
+ cfs_timer_init(&lcw->lcw_timer, lcw_cb, lcw);
+
+ mutex_lock(&lcw_refcount_mutex);
+ if (++lcw_refcount == 1)
+ lcw_dispatch_start();
+ mutex_unlock(&lcw_refcount_mutex);
+
+ /* Keep this working in case we enable them by default */
+ if (lcw->lcw_state == LC_WATCHDOG_ENABLED) {
+ lcw->lcw_last_touched = cfs_time_current();
+ cfs_timer_arm(&lcw->lcw_timer, cfs_time_seconds(timeout) +
+ cfs_time_current());
+ }
+
+ RETURN(lcw);
+}
+EXPORT_SYMBOL(lc_watchdog_add);
+
+static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
+{
+ cfs_time_t newtime = cfs_time_current();;
+
+ if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
+ struct timeval timediff;
+ cfs_time_t delta_time = cfs_time_sub(newtime,
+ lcw->lcw_last_touched);
+ cfs_duration_usec(delta_time, &timediff);
+
+ LCONSOLE_WARN("Service thread pid %u %s after %lu.%.02lus. "
+ "This indicates the system was overloaded (too "
+ "many service threads, or there were not enough "
+ "hardware resources).\n",
+ lcw->lcw_pid,
+ message,
+ timediff.tv_sec,
+ timediff.tv_usec / 10000);
+ }
+ lcw->lcw_last_touched = newtime;
+}
+
+static void lc_watchdog_del_pending(struct lc_watchdog *lcw)
+{
+ spin_lock_bh(&lcw->lcw_lock);
+ if (unlikely(!list_empty(&lcw->lcw_list))) {
+ spin_lock_bh(&lcw_pending_timers_lock);
+ list_del_init(&lcw->lcw_list);
+ lcw->lcw_refcount--; /* -1 ref for pending list */
+ spin_unlock_bh(&lcw_pending_timers_lock);
+ }
+
+ spin_unlock_bh(&lcw->lcw_lock);
+}
+
+void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
+{
+ ENTRY;
+ LASSERT(lcw != NULL);
+
+ lc_watchdog_del_pending(lcw);
+
+ lcw_update_time(lcw, "resumed");
+ lcw->lcw_state = LC_WATCHDOG_ENABLED;
+
+ cfs_timer_arm(&lcw->lcw_timer, cfs_time_current() +
+ cfs_time_seconds(timeout));
+
+ EXIT;
+}
+EXPORT_SYMBOL(lc_watchdog_touch);
+
+void lc_watchdog_disable(struct lc_watchdog *lcw)
+{
+ ENTRY;
+ LASSERT(lcw != NULL);
+
+ lc_watchdog_del_pending(lcw);
+
+ lcw_update_time(lcw, "completed");
+ lcw->lcw_state = LC_WATCHDOG_DISABLED;
+
+ EXIT;
+}
+EXPORT_SYMBOL(lc_watchdog_disable);
+
+void lc_watchdog_delete(struct lc_watchdog *lcw)
+{
+ int dead;
+
+ ENTRY;
+ LASSERT(lcw != NULL);
+
+ cfs_timer_disarm(&lcw->lcw_timer);
+
+ lcw_update_time(lcw, "stopped");
+
+ spin_lock_bh(&lcw->lcw_lock);
+ spin_lock_bh(&lcw_pending_timers_lock);
+ if (unlikely(!list_empty(&lcw->lcw_list))) {
+ list_del_init(&lcw->lcw_list);
+ lcw->lcw_refcount--; /* -1 ref for pending list */
+ }
+
+ lcw->lcw_refcount--; /* -1 ref for owner */
+ dead = lcw->lcw_refcount == 0;
+ spin_unlock_bh(&lcw_pending_timers_lock);
+ spin_unlock_bh(&lcw->lcw_lock);
+
+ if (dead)
+ LIBCFS_FREE(lcw, sizeof(*lcw));
+
+ mutex_lock(&lcw_refcount_mutex);
+ if (--lcw_refcount == 0)
+ lcw_dispatch_stop();
+ mutex_unlock(&lcw_refcount_mutex);
+
+ EXIT;
+}
+EXPORT_SYMBOL(lc_watchdog_delete);
+
+/*
+ * Provided watchdog handlers
+ */
+
+void lc_watchdog_dumplog(pid_t pid, void *data)
+{
+ libcfs_debug_dumplog_internal((void *)((long_ptr_t)pid));
+}
+EXPORT_SYMBOL(lc_watchdog_dumplog);
+
+#else /* !defined(WITH_WATCHDOG) */
+
+struct lc_watchdog *lc_watchdog_add(int timeout,
+ void (*callback)(pid_t pid, void *),
+ void *data)
+{
+ static struct lc_watchdog watchdog;
+ return &watchdog;
+}
+EXPORT_SYMBOL(lc_watchdog_add);
+
+void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
+{
+}
+EXPORT_SYMBOL(lc_watchdog_touch);
+
+void lc_watchdog_disable(struct lc_watchdog *lcw)
+{
+}
+EXPORT_SYMBOL(lc_watchdog_disable);
+
+void lc_watchdog_delete(struct lc_watchdog *lcw)
+{
+}
+EXPORT_SYMBOL(lc_watchdog_delete);
+
+#endif
diff --git a/drivers/staging/lustre/lustre/libcfs/workitem.c b/drivers/staging/lustre/lustre/libcfs/workitem.c
new file mode 100644
index 000000000000..b533666c1900
--- /dev/null
+++ b/drivers/staging/lustre/lustre/libcfs/workitem.c
@@ -0,0 +1,475 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * libcfs/libcfs/workitem.c
+ *
+ * Author: Isaac Huang <isaac@clusterfs.com>
+ * Liang Zhen <zhen.liang@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/libcfs/libcfs.h>
+
+#define CFS_WS_NAME_LEN 16
+
+typedef struct cfs_wi_sched {
+ struct list_head ws_list; /* chain on global list */
+ /** serialised workitems */
+ spinlock_t ws_lock;
+ /** where schedulers sleep */
+ wait_queue_head_t ws_waitq;
+ /** concurrent workitems */
+ struct list_head ws_runq;
+ /** rescheduled running-workitems, a workitem can be rescheduled
+ * while running in wi_action(), but we don't to execute it again
+ * unless it returns from wi_action(), so we put it on ws_rerunq
+ * while rescheduling, and move it to runq after it returns
+ * from wi_action() */
+ struct list_head ws_rerunq;
+ /** CPT-table for this scheduler */
+ struct cfs_cpt_table *ws_cptab;
+ /** CPT id for affinity */
+ int ws_cpt;
+ /** number of scheduled workitems */
+ int ws_nscheduled;
+ /** started scheduler thread, protected by cfs_wi_data::wi_glock */
+ unsigned int ws_nthreads:30;
+ /** shutting down, protected by cfs_wi_data::wi_glock */
+ unsigned int ws_stopping:1;
+ /** serialize starting thread, protected by cfs_wi_data::wi_glock */
+ unsigned int ws_starting:1;
+ /** scheduler name */
+ char ws_name[CFS_WS_NAME_LEN];
+} cfs_wi_sched_t;
+
+struct cfs_workitem_data {
+ /** serialize */
+ spinlock_t wi_glock;
+ /** list of all schedulers */
+ struct list_head wi_scheds;
+ /** WI module is initialized */
+ int wi_init;
+ /** shutting down the whole WI module */
+ int wi_stopping;
+} cfs_wi_data;
+
+static inline void
+cfs_wi_sched_lock(cfs_wi_sched_t *sched)
+{
+ spin_lock(&sched->ws_lock);
+}
+
+static inline void
+cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
+{
+ spin_unlock(&sched->ws_lock);
+}
+
+static inline int
+cfs_wi_sched_cansleep(cfs_wi_sched_t *sched)
+{
+ cfs_wi_sched_lock(sched);
+ if (sched->ws_stopping) {
+ cfs_wi_sched_unlock(sched);
+ return 0;
+ }
+
+ if (!list_empty(&sched->ws_runq)) {
+ cfs_wi_sched_unlock(sched);
+ return 0;
+ }
+ cfs_wi_sched_unlock(sched);
+ return 1;
+}
+
+
+/* XXX:
+ * 0. it only works when called from wi->wi_action.
+ * 1. when it returns no one shall try to schedule the workitem.
+ */
+void
+cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
+{
+ LASSERT(!in_interrupt()); /* because we use plain spinlock */
+ LASSERT(!sched->ws_stopping);
+
+ cfs_wi_sched_lock(sched);
+
+ LASSERT(wi->wi_running);
+ if (wi->wi_scheduled) { /* cancel pending schedules */
+ LASSERT(!list_empty(&wi->wi_list));
+ list_del_init(&wi->wi_list);
+
+ LASSERT(sched->ws_nscheduled > 0);
+ sched->ws_nscheduled--;
+ }
+
+ LASSERT(list_empty(&wi->wi_list));
+
+ wi->wi_scheduled = 1; /* LBUG future schedule attempts */
+ cfs_wi_sched_unlock(sched);
+
+ return;
+}
+EXPORT_SYMBOL(cfs_wi_exit);
+
+/**
+ * cancel schedule request of workitem \a wi
+ */
+int
+cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
+{
+ int rc;
+
+ LASSERT(!in_interrupt()); /* because we use plain spinlock */
+ LASSERT(!sched->ws_stopping);
+
+ /*
+ * return 0 if it's running already, otherwise return 1, which
+ * means the workitem will not be scheduled and will not have
+ * any race with wi_action.
+ */
+ cfs_wi_sched_lock(sched);
+
+ rc = !(wi->wi_running);
+
+ if (wi->wi_scheduled) { /* cancel pending schedules */
+ LASSERT(!list_empty(&wi->wi_list));
+ list_del_init(&wi->wi_list);
+
+ LASSERT(sched->ws_nscheduled > 0);
+ sched->ws_nscheduled--;
+
+ wi->wi_scheduled = 0;
+ }
+
+ LASSERT (list_empty(&wi->wi_list));
+
+ cfs_wi_sched_unlock(sched);
+ return rc;
+}
+EXPORT_SYMBOL(cfs_wi_deschedule);
+
+/*
+ * Workitem scheduled with (serial == 1) is strictly serialised not only with
+ * itself, but also with others scheduled this way.
+ *
+ * Now there's only one static serialised queue, but in the future more might
+ * be added, and even dynamic creation of serialised queues might be supported.
+ */
+void
+cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
+{
+ LASSERT(!in_interrupt()); /* because we use plain spinlock */
+ LASSERT(!sched->ws_stopping);
+
+ cfs_wi_sched_lock(sched);
+
+ if (!wi->wi_scheduled) {
+ LASSERT (list_empty(&wi->wi_list));
+
+ wi->wi_scheduled = 1;
+ sched->ws_nscheduled++;
+ if (!wi->wi_running) {
+ list_add_tail(&wi->wi_list, &sched->ws_runq);
+ wake_up(&sched->ws_waitq);
+ } else {
+ list_add(&wi->wi_list, &sched->ws_rerunq);
+ }
+ }
+
+ LASSERT (!list_empty(&wi->wi_list));
+ cfs_wi_sched_unlock(sched);
+ return;
+}
+EXPORT_SYMBOL(cfs_wi_schedule);
+
+
+static int
+cfs_wi_scheduler (void *arg)
+{
+ struct cfs_wi_sched *sched = (cfs_wi_sched_t *)arg;
+
+ cfs_block_allsigs();
+
+ /* CPT affinity scheduler? */
+ if (sched->ws_cptab != NULL)
+ cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt);
+
+ spin_lock(&cfs_wi_data.wi_glock);
+
+ LASSERT(sched->ws_starting == 1);
+ sched->ws_starting--;
+ sched->ws_nthreads++;
+
+ spin_unlock(&cfs_wi_data.wi_glock);
+
+ cfs_wi_sched_lock(sched);
+
+ while (!sched->ws_stopping) {
+ int nloops = 0;
+ int rc;
+ cfs_workitem_t *wi;
+
+ while (!list_empty(&sched->ws_runq) &&
+ nloops < CFS_WI_RESCHED) {
+ wi = list_entry(sched->ws_runq.next,
+ cfs_workitem_t, wi_list);
+ LASSERT(wi->wi_scheduled && !wi->wi_running);
+
+ list_del_init(&wi->wi_list);
+
+ LASSERT(sched->ws_nscheduled > 0);
+ sched->ws_nscheduled--;
+
+ wi->wi_running = 1;
+ wi->wi_scheduled = 0;
+
+
+ cfs_wi_sched_unlock(sched);
+ nloops++;
+
+ rc = (*wi->wi_action) (wi);
+
+ cfs_wi_sched_lock(sched);
+ if (rc != 0) /* WI should be dead, even be freed! */
+ continue;
+
+ wi->wi_running = 0;
+ if (list_empty(&wi->wi_list))
+ continue;
+
+ LASSERT(wi->wi_scheduled);
+ /* wi is rescheduled, should be on rerunq now, we
+ * move it to runq so it can run action now */
+ list_move_tail(&wi->wi_list, &sched->ws_runq);
+ }
+
+ if (!list_empty(&sched->ws_runq)) {
+ cfs_wi_sched_unlock(sched);
+ /* don't sleep because some workitems still
+ * expect me to come back soon */
+ cond_resched();
+ cfs_wi_sched_lock(sched);
+ continue;
+ }
+
+ cfs_wi_sched_unlock(sched);
+ cfs_wait_event_interruptible_exclusive(sched->ws_waitq,
+ !cfs_wi_sched_cansleep(sched), rc);
+ cfs_wi_sched_lock(sched);
+ }
+
+ cfs_wi_sched_unlock(sched);
+
+ spin_lock(&cfs_wi_data.wi_glock);
+ sched->ws_nthreads--;
+ spin_unlock(&cfs_wi_data.wi_glock);
+
+ return 0;
+}
+
+
+void
+cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
+{
+ int i;
+
+ LASSERT(cfs_wi_data.wi_init);
+ LASSERT(!cfs_wi_data.wi_stopping);
+
+ spin_lock(&cfs_wi_data.wi_glock);
+ if (sched->ws_stopping) {
+ CDEBUG(D_INFO, "%s is in progress of stopping\n",
+ sched->ws_name);
+ spin_unlock(&cfs_wi_data.wi_glock);
+ return;
+ }
+
+ LASSERT(!list_empty(&sched->ws_list));
+ sched->ws_stopping = 1;
+
+ spin_unlock(&cfs_wi_data.wi_glock);
+
+ i = 2;
+ wake_up_all(&sched->ws_waitq);
+
+ spin_lock(&cfs_wi_data.wi_glock);
+ while (sched->ws_nthreads > 0) {
+ CDEBUG(IS_PO2(++i) ? D_WARNING : D_NET,
+ "waiting for %d threads of WI sched[%s] to terminate\n",
+ sched->ws_nthreads, sched->ws_name);
+
+ spin_unlock(&cfs_wi_data.wi_glock);
+ cfs_pause(cfs_time_seconds(1) / 20);
+ spin_lock(&cfs_wi_data.wi_glock);
+ }
+
+ list_del(&sched->ws_list);
+
+ spin_unlock(&cfs_wi_data.wi_glock);
+ LASSERT(sched->ws_nscheduled == 0);
+
+ LIBCFS_FREE(sched, sizeof(*sched));
+}
+EXPORT_SYMBOL(cfs_wi_sched_destroy);
+
+int
+cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
+ int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
+{
+ struct cfs_wi_sched *sched;
+ int rc;
+
+ LASSERT(cfs_wi_data.wi_init);
+ LASSERT(!cfs_wi_data.wi_stopping);
+ LASSERT(cptab == NULL || cpt == CFS_CPT_ANY ||
+ (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
+
+ LIBCFS_ALLOC(sched, sizeof(*sched));
+ if (sched == NULL)
+ return -ENOMEM;
+
+ strncpy(sched->ws_name, name, CFS_WS_NAME_LEN);
+ sched->ws_cptab = cptab;
+ sched->ws_cpt = cpt;
+
+ spin_lock_init(&sched->ws_lock);
+ init_waitqueue_head(&sched->ws_waitq);
+ INIT_LIST_HEAD(&sched->ws_runq);
+ INIT_LIST_HEAD(&sched->ws_rerunq);
+ INIT_LIST_HEAD(&sched->ws_list);
+
+ rc = 0;
+ while (nthrs > 0) {
+ char name[16];
+ task_t *task;
+ spin_lock(&cfs_wi_data.wi_glock);
+ while (sched->ws_starting > 0) {
+ spin_unlock(&cfs_wi_data.wi_glock);
+ schedule();
+ spin_lock(&cfs_wi_data.wi_glock);
+ }
+
+ sched->ws_starting++;
+ spin_unlock(&cfs_wi_data.wi_glock);
+
+ if (sched->ws_cptab != NULL && sched->ws_cpt >= 0) {
+ snprintf(name, sizeof(name), "%s_%02d_%02d",
+ sched->ws_name, sched->ws_cpt,
+ sched->ws_nthreads);
+ } else {
+ snprintf(name, sizeof(name), "%s_%02d",
+ sched->ws_name, sched->ws_nthreads);
+ }
+
+ task = kthread_run(cfs_wi_scheduler, sched, name);
+ if (!IS_ERR(task)) {
+ nthrs--;
+ continue;
+ }
+ rc = PTR_ERR(task);
+
+ CERROR("Failed to create thread for WI scheduler %s: %d\n",
+ name, rc);
+
+ spin_lock(&cfs_wi_data.wi_glock);
+
+ /* make up for cfs_wi_sched_destroy */
+ list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
+ sched->ws_starting--;
+
+ spin_unlock(&cfs_wi_data.wi_glock);
+
+ cfs_wi_sched_destroy(sched);
+ return rc;
+ }
+ spin_lock(&cfs_wi_data.wi_glock);
+ list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
+ spin_unlock(&cfs_wi_data.wi_glock);
+
+ *sched_pp = sched;
+ return 0;
+}
+EXPORT_SYMBOL(cfs_wi_sched_create);
+
+int
+cfs_wi_startup(void)
+{
+ memset(&cfs_wi_data, 0, sizeof(cfs_wi_data));
+
+ spin_lock_init(&cfs_wi_data.wi_glock);
+ INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
+ cfs_wi_data.wi_init = 1;
+
+ return 0;
+}
+
+void
+cfs_wi_shutdown (void)
+{
+ struct cfs_wi_sched *sched;
+
+ spin_lock(&cfs_wi_data.wi_glock);
+ cfs_wi_data.wi_stopping = 1;
+ spin_unlock(&cfs_wi_data.wi_glock);
+
+ /* nobody should contend on this list */
+ list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
+ sched->ws_stopping = 1;
+ wake_up_all(&sched->ws_waitq);
+ }
+
+ list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
+ spin_lock(&cfs_wi_data.wi_glock);
+
+ while (sched->ws_nthreads != 0) {
+ spin_unlock(&cfs_wi_data.wi_glock);
+ cfs_pause(cfs_time_seconds(1) / 20);
+ spin_lock(&cfs_wi_data.wi_glock);
+ }
+ spin_unlock(&cfs_wi_data.wi_glock);
+ }
+ while (!list_empty(&cfs_wi_data.wi_scheds)) {
+ sched = list_entry(cfs_wi_data.wi_scheds.next,
+ struct cfs_wi_sched, ws_list);
+ list_del(&sched->ws_list);
+ LIBCFS_FREE(sched, sizeof(*sched));
+ }
+
+ cfs_wi_data.wi_stopping = 0;
+ cfs_wi_data.wi_init = 0;
+}
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile
new file mode 100644
index 000000000000..dff0c0486e77
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/Makefile
@@ -0,0 +1,13 @@
+obj-$(CONFIG_LUSTRE_FS) += lustre.o
+obj-$(CONFIG_LUSTRE_FS) += llite_lloop.o
+lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \
+ rw.o lproc_llite.o namei.o symlink.o llite_mmap.o \
+ xattr.o remote_perm.o llite_rmtacl.o llite_capa.o \
+ rw26.o super25.o statahead.o \
+ ../lclient/glimpse.o ../lclient/lcommon_cl.o ../lclient/lcommon_misc.o \
+ vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o
+
+llite_lloop-y := lloop.o
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
new file mode 100644
index 000000000000..7d6abfff9740
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -0,0 +1,675 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/quotaops.h>
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <obd_support.h>
+#include <lustre_lite.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_dlm.h>
+
+#include "llite_internal.h"
+
+static void free_dentry_data(struct rcu_head *head)
+{
+ struct ll_dentry_data *lld;
+
+ lld = container_of(head, struct ll_dentry_data, lld_rcu_head);
+ OBD_FREE_PTR(lld);
+}
+
+/* should NOT be called with the dcache lock, see fs/dcache.c */
+static void ll_release(struct dentry *de)
+{
+ struct ll_dentry_data *lld;
+ ENTRY;
+ LASSERT(de != NULL);
+ lld = ll_d2d(de);
+ if (lld == NULL) /* NFS copies the de->d_op methods (bug 4655) */
+ RETURN_EXIT;
+
+ if (lld->lld_it) {
+ ll_intent_release(lld->lld_it);
+ OBD_FREE(lld->lld_it, sizeof(*lld->lld_it));
+ }
+ LASSERT(lld->lld_cwd_count == 0);
+ LASSERT(lld->lld_mnt_count == 0);
+ de->d_fsdata = NULL;
+ call_rcu(&lld->lld_rcu_head, free_dentry_data);
+
+ EXIT;
+}
+
+/* Compare if two dentries are the same. Don't match if the existing dentry
+ * is marked invalid. Returns 1 if different, 0 if the same.
+ *
+ * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
+ * an AST before calling d_revalidate_it(). The dentry still exists (marked
+ * INVALID) so d_lookup() matches it, but we have no lock on it (so
+ * lock_match() fails) and we spin around real_lookup(). */
+int ll_dcompare(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
+{
+ ENTRY;
+
+ if (len != name->len)
+ RETURN(1);
+
+ if (memcmp(str, name->name, len))
+ RETURN(1);
+
+ CDEBUG(D_DENTRY, "found name %.*s(%p) flags %#x refc %d\n",
+ name->len, name->name, dentry, dentry->d_flags,
+ d_refcount(dentry));
+
+ /* mountpoint is always valid */
+ if (d_mountpoint((struct dentry *)dentry))
+ RETURN(0);
+
+ if (d_lustre_invalid(dentry))
+ RETURN(1);
+
+ RETURN(0);
+}
+
+static inline int return_if_equal(struct ldlm_lock *lock, void *data)
+{
+ if ((lock->l_flags &
+ (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA)) ==
+ (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA))
+ return LDLM_ITER_CONTINUE;
+ return LDLM_ITER_STOP;
+}
+
+/* find any ldlm lock of the inode in mdc and lov
+ * return 0 not find
+ * 1 find one
+ * < 0 error */
+static int find_cbdata(struct inode *inode)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct lov_stripe_md *lsm;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(inode);
+ rc = md_find_cbdata(sbi->ll_md_exp, ll_inode2fid(inode),
+ return_if_equal, NULL);
+ if (rc != 0)
+ RETURN(rc);
+
+ lsm = ccc_inode_lsm_get(inode);
+ if (lsm == NULL)
+ RETURN(rc);
+
+ rc = obd_find_cbdata(sbi->ll_dt_exp, lsm, return_if_equal, NULL);
+ ccc_inode_lsm_put(inode, lsm);
+
+ RETURN(rc);
+}
+
+/**
+ * Called when last reference to a dentry is dropped and dcache wants to know
+ * whether or not it should cache it:
+ * - return 1 to delete the dentry immediately
+ * - return 0 to cache the dentry
+ * Should NOT be called with the dcache lock, see fs/dcache.c
+ */
+static int ll_ddelete(const struct dentry *de)
+{
+ ENTRY;
+ LASSERT(de);
+
+ CDEBUG(D_DENTRY, "%s dentry %.*s (%p, parent %p, inode %p) %s%s\n",
+ d_lustre_invalid((struct dentry *)de) ? "deleting" : "keeping",
+ de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
+ d_unhashed((struct dentry *)de) ? "" : "hashed,",
+ list_empty(&de->d_subdirs) ? "" : "subdirs");
+
+ /* kernel >= 2.6.38 last refcount is decreased after this function. */
+ LASSERT(d_refcount(de) == 1);
+
+ /* Disable this piece of code temproarily because this is called
+ * inside dcache_lock so it's not appropriate to do lots of work
+ * here. ATTENTION: Before this piece of code enabling, LU-2487 must be
+ * resolved. */
+#if 0
+ /* if not ldlm lock for this inode, set i_nlink to 0 so that
+ * this inode can be recycled later b=20433 */
+ if (de->d_inode && !find_cbdata(de->d_inode))
+ clear_nlink(de->d_inode);
+#endif
+
+ if (d_lustre_invalid((struct dentry *)de))
+ RETURN(1);
+ RETURN(0);
+}
+
+static int ll_set_dd(struct dentry *de)
+{
+ ENTRY;
+ LASSERT(de != NULL);
+
+ CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
+ de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
+ d_refcount(de));
+
+ if (de->d_fsdata == NULL) {
+ struct ll_dentry_data *lld;
+
+ OBD_ALLOC_PTR(lld);
+ if (likely(lld != NULL)) {
+ spin_lock(&de->d_lock);
+ if (likely(de->d_fsdata == NULL))
+ de->d_fsdata = lld;
+ else
+ OBD_FREE_PTR(lld);
+ spin_unlock(&de->d_lock);
+ } else {
+ RETURN(-ENOMEM);
+ }
+ }
+
+ RETURN(0);
+}
+
+int ll_dops_init(struct dentry *de, int block, int init_sa)
+{
+ struct ll_dentry_data *lld = ll_d2d(de);
+ int rc = 0;
+
+ if (lld == NULL && block != 0) {
+ rc = ll_set_dd(de);
+ if (rc)
+ return rc;
+
+ lld = ll_d2d(de);
+ }
+
+ if (lld != NULL && init_sa != 0)
+ lld->lld_sa_generation = 0;
+
+ /* kernel >= 2.6.38 d_op is set in d_alloc() */
+ LASSERT(de->d_op == &ll_d_ops);
+ return rc;
+}
+
+void ll_intent_drop_lock(struct lookup_intent *it)
+{
+ if (it->it_op && it->d.lustre.it_lock_mode) {
+ struct lustre_handle handle;
+
+ handle.cookie = it->d.lustre.it_lock_handle;
+
+ CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
+ " from it %p\n", handle.cookie, it);
+ ldlm_lock_decref(&handle, it->d.lustre.it_lock_mode);
+
+ /* bug 494: intent_release may be called multiple times, from
+ * this thread and we don't want to double-decref this lock */
+ it->d.lustre.it_lock_mode = 0;
+ if (it->d.lustre.it_remote_lock_mode != 0) {
+ handle.cookie = it->d.lustre.it_remote_lock_handle;
+
+ CDEBUG(D_DLMTRACE, "releasing remote lock with cookie"
+ LPX64" from it %p\n", handle.cookie, it);
+ ldlm_lock_decref(&handle,
+ it->d.lustre.it_remote_lock_mode);
+ it->d.lustre.it_remote_lock_mode = 0;
+ }
+ }
+}
+
+void ll_intent_release(struct lookup_intent *it)
+{
+ ENTRY;
+
+ CDEBUG(D_INFO, "intent %p released\n", it);
+ ll_intent_drop_lock(it);
+ /* We are still holding extra reference on a request, need to free it */
+ if (it_disposition(it, DISP_ENQ_OPEN_REF))
+ ptlrpc_req_finished(it->d.lustre.it_data); /* ll_file_open */
+ if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */
+ ptlrpc_req_finished(it->d.lustre.it_data);
+ if (it_disposition(it, DISP_ENQ_COMPLETE)) /* saved req from revalidate
+ * to lookup */
+ ptlrpc_req_finished(it->d.lustre.it_data);
+
+ it->d.lustre.it_disposition = 0;
+ it->d.lustre.it_data = NULL;
+ EXIT;
+}
+
+void ll_invalidate_aliases(struct inode *inode)
+{
+ struct dentry *dentry;
+ struct ll_d_hlist_node *p;
+ ENTRY;
+
+ LASSERT(inode != NULL);
+
+ CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
+ inode->i_ino, inode->i_generation, inode);
+
+ ll_lock_dcache(inode);
+ ll_d_hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+ CDEBUG(D_DENTRY, "dentry in drop %.*s (%p) parent %p "
+ "inode %p flags %d\n", dentry->d_name.len,
+ dentry->d_name.name, dentry, dentry->d_parent,
+ dentry->d_inode, dentry->d_flags);
+
+ if (dentry->d_name.len == 1 && dentry->d_name.name[0] == '/') {
+ CERROR("called on root (?) dentry=%p, inode=%p "
+ "ino=%lu\n", dentry, inode, inode->i_ino);
+ lustre_dump_dentry(dentry, 1);
+ libcfs_debug_dumpstack(NULL);
+ }
+
+ d_lustre_invalidate(dentry, 0);
+ }
+ ll_unlock_dcache(inode);
+
+ EXIT;
+}
+
+int ll_revalidate_it_finish(struct ptlrpc_request *request,
+ struct lookup_intent *it,
+ struct dentry *de)
+{
+ int rc = 0;
+ ENTRY;
+
+ if (!request)
+ RETURN(0);
+
+ if (it_disposition(it, DISP_LOOKUP_NEG))
+ RETURN(-ENOENT);
+
+ rc = ll_prep_inode(&de->d_inode, request, NULL, it);
+
+ RETURN(rc);
+}
+
+void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry)
+{
+ LASSERT(it != NULL);
+ LASSERT(dentry != NULL);
+
+ if (it->d.lustre.it_lock_mode && dentry->d_inode != NULL) {
+ struct inode *inode = dentry->d_inode;
+ struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
+
+ CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+ inode, inode->i_ino, inode->i_generation);
+ ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
+ }
+
+ /* drop lookup or getattr locks immediately */
+ if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) {
+ /* on 2.6 there are situation when several lookups and
+ * revalidations may be requested during single operation.
+ * therefore, we don't release intent here -bzzz */
+ ll_intent_drop_lock(it);
+ }
+}
+
+void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)
+{
+ struct lookup_intent *it = *itp;
+
+ if (!it || it->it_op == IT_GETXATTR)
+ it = *itp = deft;
+
+}
+
+int ll_revalidate_it(struct dentry *de, int lookup_flags,
+ struct lookup_intent *it)
+{
+ struct md_op_data *op_data;
+ struct ptlrpc_request *req = NULL;
+ struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
+ struct obd_export *exp;
+ struct inode *parent = de->d_parent->d_inode;
+ int rc;
+
+ ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
+ LL_IT2STR(it));
+
+ if (de->d_inode == NULL) {
+ __u64 ibits;
+
+ /* We can only use negative dentries if this is stat or lookup,
+ for opens and stuff we do need to query server. */
+ /* If there is IT_CREAT in intent op set, then we must throw
+ away this negative dentry and actually do the request to
+ kernel to create whatever needs to be created (if possible)*/
+ if (it && (it->it_op & IT_CREAT))
+ RETURN(0);
+
+ if (d_lustre_invalid(de))
+ RETURN(0);
+
+ ibits = MDS_INODELOCK_UPDATE;
+ rc = ll_have_md_lock(parent, &ibits, LCK_MINMODE);
+ GOTO(out_sa, rc);
+ }
+
+ /* Never execute intents for mount points.
+ * Attributes will be fixed up in ll_inode_revalidate_it */
+ if (d_mountpoint(de))
+ GOTO(out_sa, rc = 1);
+
+ /* need to get attributes in case root got changed from other client */
+ if (de == de->d_sb->s_root) {
+ rc = __ll_inode_revalidate_it(de, it, MDS_INODELOCK_LOOKUP);
+ if (rc == 0)
+ rc = 1;
+ GOTO(out_sa, rc);
+ }
+
+ exp = ll_i2mdexp(de->d_inode);
+
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
+ ll_frob_intent(&it, &lookup_it);
+ LASSERT(it);
+
+ if (it->it_op == IT_LOOKUP && !d_lustre_invalid(de))
+ RETURN(1);
+
+ if (it->it_op == IT_OPEN) {
+ struct inode *inode = de->d_inode;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_client_handle **och_p;
+ __u64 *och_usecount;
+ __u64 ibits;
+
+ /*
+ * We used to check for MDS_INODELOCK_OPEN here, but in fact
+ * just having LOOKUP lock is enough to justify inode is the
+ * same. And if inode is the same and we have suitable
+ * openhandle, then there is no point in doing another OPEN RPC
+ * just to throw away newly received openhandle. There are no
+ * security implications too, if file owner or access mode is
+ * change, LOOKUP lock is revoked.
+ */
+
+
+ if (it->it_flags & FMODE_WRITE) {
+ och_p = &lli->lli_mds_write_och;
+ och_usecount = &lli->lli_open_fd_write_count;
+ } else if (it->it_flags & FMODE_EXEC) {
+ och_p = &lli->lli_mds_exec_och;
+ och_usecount = &lli->lli_open_fd_exec_count;
+ } else {
+ och_p = &lli->lli_mds_read_och;
+ och_usecount = &lli->lli_open_fd_read_count;
+ }
+ /* Check for the proper lock. */
+ ibits = MDS_INODELOCK_LOOKUP;
+ if (!ll_have_md_lock(inode, &ibits, LCK_MINMODE))
+ goto do_lock;
+ mutex_lock(&lli->lli_och_mutex);
+ if (*och_p) { /* Everything is open already, do nothing */
+ /*(*och_usecount)++; Do not let them steal our open
+ handle from under us */
+ SET_BUT_UNUSED(och_usecount);
+ /* XXX The code above was my original idea, but in case
+ we have the handle, but we cannot use it due to later
+ checks (e.g. O_CREAT|O_EXCL flags set), nobody
+ would decrement counter increased here. So we just
+ hope the lock won't be invalidated in between. But
+ if it would be, we'll reopen the open request to
+ MDS later during file open path */
+ mutex_unlock(&lli->lli_och_mutex);
+ RETURN(1);
+ } else {
+ mutex_unlock(&lli->lli_och_mutex);
+ }
+ }
+
+ if (it->it_op == IT_GETATTR) {
+ rc = ll_statahead_enter(parent, &de, 0);
+ if (rc == 1)
+ goto mark;
+ else if (rc != -EAGAIN && rc != 0)
+ GOTO(out, rc = 0);
+ }
+
+do_lock:
+ op_data = ll_prep_md_op_data(NULL, parent, de->d_inode,
+ de->d_name.name, de->d_name.len,
+ 0, LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ if (!IS_POSIXACL(parent) || !exp_connect_umask(exp))
+ it->it_create_mode &= ~current_umask();
+ it->it_create_mode |= M_CHECK_STALE;
+ rc = md_intent_lock(exp, op_data, NULL, 0, it,
+ lookup_flags,
+ &req, ll_md_blocking_ast, 0);
+ it->it_create_mode &= ~M_CHECK_STALE;
+ ll_finish_md_op_data(op_data);
+
+ /* If req is NULL, then md_intent_lock only tried to do a lock match;
+ * if all was well, it will return 1 if it found locks, 0 otherwise. */
+ if (req == NULL && rc >= 0) {
+ if (!rc)
+ goto do_lookup;
+ GOTO(out, rc);
+ }
+
+ if (rc < 0) {
+ if (rc != -ESTALE) {
+ CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status "
+ "%d\n", rc, it->d.lustre.it_status);
+ }
+ GOTO(out, rc = 0);
+ }
+
+revalidate_finish:
+ rc = ll_revalidate_it_finish(req, it, de);
+ if (rc != 0) {
+ if (rc != -ESTALE && rc != -ENOENT)
+ ll_intent_release(it);
+ GOTO(out, rc = 0);
+ }
+
+ if ((it->it_op & IT_OPEN) && de->d_inode &&
+ !S_ISREG(de->d_inode->i_mode) &&
+ !S_ISDIR(de->d_inode->i_mode)) {
+ ll_release_openhandle(de, it);
+ }
+ rc = 1;
+
+out:
+ /* We do not free request as it may be reused during following lookup
+ * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
+ * be freed in ll_lookup_it or in ll_intent_release. But if
+ * request was not completed, we need to free it. (bug 5154, 9903) */
+ if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE))
+ ptlrpc_req_finished(req);
+ if (rc == 0) {
+ /* mdt may grant layout lock for the newly created file, so
+ * release the lock to avoid leaking */
+ ll_intent_drop_lock(it);
+ ll_invalidate_aliases(de->d_inode);
+ } else {
+ __u64 bits = 0;
+ __u64 matched_bits = 0;
+
+ CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
+ "inode %p refc %d\n", de->d_name.len,
+ de->d_name.name, de, de->d_parent, de->d_inode,
+ d_refcount(de));
+
+ ll_set_lock_data(exp, de->d_inode, it, &bits);
+
+ /* Note: We have to match both LOOKUP and PERM lock
+ * here to make sure the dentry is valid and no one
+ * changing the permission.
+ * But if the client connects < 2.4 server, which will
+ * only grant LOOKUP lock, so we can only Match LOOKUP
+ * lock for old server */
+ if (exp_connect_flags(ll_i2mdexp(de->d_inode)) &&
+ OBD_CONNECT_LVB_TYPE)
+ matched_bits =
+ MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM;
+ else
+ matched_bits = MDS_INODELOCK_LOOKUP;
+
+ if (((bits & matched_bits) == matched_bits) &&
+ d_lustre_invalid(de))
+ d_lustre_revalidate(de);
+ ll_lookup_finish_locks(it, de);
+ }
+
+mark:
+ if (it != NULL && it->it_op == IT_GETATTR && rc > 0)
+ ll_statahead_mark(parent, de);
+ RETURN(rc);
+
+ /*
+ * This part is here to combat evil-evil race in real_lookup on 2.6
+ * kernels. The race details are: We enter do_lookup() looking for some
+ * name, there is nothing in dcache for this name yet and d_lookup()
+ * returns NULL. We proceed to real_lookup(), and while we do this,
+ * another process does open on the same file we looking up (most simple
+ * reproducer), open succeeds and the dentry is added. Now back to
+ * us. In real_lookup() we do d_lookup() again and suddenly find the
+ * dentry, so we call d_revalidate on it, but there is no lock, so
+ * without this code we would return 0, but unpatched real_lookup just
+ * returns -ENOENT in such a case instead of retrying the lookup. Once
+ * this is dealt with in real_lookup(), all of this ugly mess can go and
+ * we can just check locks in ->d_revalidate without doing any RPCs
+ * ever.
+ */
+do_lookup:
+ if (it != &lookup_it) {
+ /* MDS_INODELOCK_UPDATE needed for IT_GETATTR case. */
+ if (it->it_op == IT_GETATTR)
+ lookup_it.it_op = IT_GETATTR;
+ ll_lookup_finish_locks(it, de);
+ it = &lookup_it;
+ }
+
+ /* Do real lookup here. */
+ op_data = ll_prep_md_op_data(NULL, parent, NULL, de->d_name.name,
+ de->d_name.len, 0, (it->it_op & IT_CREAT ?
+ LUSTRE_OPC_CREATE :
+ LUSTRE_OPC_ANY), NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ rc = md_intent_lock(exp, op_data, NULL, 0, it, 0, &req,
+ ll_md_blocking_ast, 0);
+ if (rc >= 0) {
+ struct mdt_body *mdt_body;
+ struct lu_fid fid = {.f_seq = 0, .f_oid = 0, .f_ver = 0};
+ mdt_body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+
+ if (de->d_inode)
+ fid = *ll_inode2fid(de->d_inode);
+
+ /* see if we got same inode, if not - return error */
+ if (lu_fid_eq(&fid, &mdt_body->fid1)) {
+ ll_finish_md_op_data(op_data);
+ op_data = NULL;
+ goto revalidate_finish;
+ }
+ ll_intent_release(it);
+ }
+ ll_finish_md_op_data(op_data);
+ GOTO(out, rc = 0);
+
+out_sa:
+ /*
+ * For rc == 1 case, should not return directly to prevent losing
+ * statahead windows; for rc == 0 case, the "lookup" will be done later.
+ */
+ if (it != NULL && it->it_op == IT_GETATTR && rc == 1)
+ ll_statahead_enter(parent, &de, 1);
+ goto mark;
+}
+
+/*
+ * Always trust cached dentries. Update statahead window if necessary.
+ */
+int ll_revalidate_nd(struct dentry *dentry, unsigned int flags)
+{
+ struct inode *parent = dentry->d_parent->d_inode;
+ int unplug = 0;
+
+ ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%s,flags=%u\n",
+ dentry->d_name.name, flags);
+
+ if (!(flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE)) &&
+ ll_need_statahead(parent, dentry) > 0) {
+ if (flags & LOOKUP_RCU)
+ RETURN(-ECHILD);
+
+ if (dentry->d_inode == NULL)
+ unplug = 1;
+ do_statahead_enter(parent, &dentry, unplug);
+ ll_statahead_mark(parent, dentry);
+ }
+
+ RETURN(1);
+}
+
+
+void ll_d_iput(struct dentry *de, struct inode *inode)
+{
+ LASSERT(inode);
+ if (!find_cbdata(inode))
+ clear_nlink(inode);
+ iput(inode);
+}
+
+struct dentry_operations ll_d_ops = {
+ .d_revalidate = ll_revalidate_nd,
+ .d_release = ll_release,
+ .d_delete = ll_ddelete,
+ .d_iput = ll_d_iput,
+ .d_compare = ll_dcompare,
+};
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
new file mode 100644
index 000000000000..23c61fe81965
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -0,0 +1,1978 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/llite/dir.c
+ *
+ * Directory code for lustre client.
+ */
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/mm.h>
+#include <linux/version.h>
+#include <asm/uaccess.h>
+#include <linux/buffer_head.h> // for wait_on_buffer
+#include <linux/pagevec.h>
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <lustre/lustre_idl.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_lib.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_lite.h>
+#include <lustre_dlm.h>
+#include <lustre_fid.h>
+#include "llite_internal.h"
+
+/*
+ * (new) readdir implementation overview.
+ *
+ * Original lustre readdir implementation cached exact copy of raw directory
+ * pages on the client. These pages were indexed in client page cache by
+ * logical offset in the directory file. This design, while very simple and
+ * intuitive had some inherent problems:
+ *
+ * . it implies that byte offset to the directory entry serves as a
+ * telldir(3)/seekdir(3) cookie, but that offset is not stable: in
+ * ext3/htree directory entries may move due to splits, and more
+ * importantly,
+ *
+ * . it is incompatible with the design of split directories for cmd3,
+ * that assumes that names are distributed across nodes based on their
+ * hash, and so readdir should be done in hash order.
+ *
+ * New readdir implementation does readdir in hash order, and uses hash of a
+ * file name as a telldir/seekdir cookie. This led to number of complications:
+ *
+ * . hash is not unique, so it cannot be used to index cached directory
+ * pages on the client (note, that it requires a whole pageful of hash
+ * collided entries to cause two pages to have identical hashes);
+ *
+ * . hash is not unique, so it cannot, strictly speaking, be used as an
+ * entry cookie. ext3/htree has the same problem and lustre implementation
+ * mimics their solution: seekdir(hash) positions directory at the first
+ * entry with the given hash.
+ *
+ * Client side.
+ *
+ * 0. caching
+ *
+ * Client caches directory pages using hash of the first entry as an index. As
+ * noted above hash is not unique, so this solution doesn't work as is:
+ * special processing is needed for "page hash chains" (i.e., sequences of
+ * pages filled with entries all having the same hash value).
+ *
+ * First, such chains have to be detected. To this end, server returns to the
+ * client the hash of the first entry on the page next to one returned. When
+ * client detects that this hash is the same as hash of the first entry on the
+ * returned page, page hash collision has to be handled. Pages in the
+ * hash chain, except first one, are termed "overflow pages".
+ *
+ * Solution to index uniqueness problem is to not cache overflow
+ * pages. Instead, when page hash collision is detected, all overflow pages
+ * from emerging chain are immediately requested from the server and placed in
+ * a special data structure (struct ll_dir_chain). This data structure is used
+ * by ll_readdir() to process entries from overflow pages. When readdir
+ * invocation finishes, overflow pages are discarded. If page hash collision
+ * chain weren't completely processed, next call to readdir will again detect
+ * page hash collision, again read overflow pages in, process next portion of
+ * entries and again discard the pages. This is not as wasteful as it looks,
+ * because, given reasonable hash, page hash collisions are extremely rare.
+ *
+ * 1. directory positioning
+ *
+ * When seekdir(hash) is called, original
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ * Server.
+ *
+ * identification of and access to overflow pages
+ *
+ * page format
+ *
+ * Page in MDS_READPAGE RPC is packed in LU_PAGE_SIZE, and each page contains
+ * a header lu_dirpage which describes the start/end hash, and whether this
+ * page is empty (contains no dir entry) or hash collide with next page.
+ * After client receives reply, several pages will be integrated into dir page
+ * in PAGE_CACHE_SIZE (if PAGE_CACHE_SIZE greater than LU_PAGE_SIZE), and the
+ * lu_dirpage for this integrated page will be adjusted. See
+ * lmv_adjust_dirpages().
+ *
+ */
+
+/* returns the page unlocked, but with a reference */
+static int ll_dir_filler(void *_hash, struct page *page0)
+{
+ struct inode *inode = page0->mapping->host;
+ int hash64 = ll_i2sbi(inode)->ll_flags & LL_SBI_64BIT_HASH;
+ struct obd_export *exp = ll_i2sbi(inode)->ll_md_exp;
+ struct ptlrpc_request *request;
+ struct mdt_body *body;
+ struct md_op_data *op_data;
+ __u64 hash = *((__u64 *)_hash);
+ struct page **page_pool;
+ struct page *page;
+ struct lu_dirpage *dp;
+ int max_pages = ll_i2sbi(inode)->ll_md_brw_size >> PAGE_CACHE_SHIFT;
+ int nrdpgs = 0; /* number of pages read actually */
+ int npages;
+ int i;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) hash "LPU64"\n",
+ inode->i_ino, inode->i_generation, inode, hash);
+
+ LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES);
+
+ OBD_ALLOC(page_pool, sizeof(page) * max_pages);
+ if (page_pool != NULL) {
+ page_pool[0] = page0;
+ } else {
+ page_pool = &page0;
+ max_pages = 1;
+ }
+ for (npages = 1; npages < max_pages; npages++) {
+ page = page_cache_alloc_cold(inode->i_mapping);
+ if (!page)
+ break;
+ page_pool[npages] = page;
+ }
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ op_data->op_npages = npages;
+ op_data->op_offset = hash;
+ rc = md_readpage(exp, op_data, page_pool, &request);
+ ll_finish_md_op_data(op_data);
+ if (rc == 0) {
+ body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
+ /* Checked by mdc_readpage() */
+ LASSERT(body != NULL);
+
+ if (body->valid & OBD_MD_FLSIZE)
+ cl_isize_write(inode, body->size);
+
+ nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_CACHE_SIZE-1)
+ >> PAGE_CACHE_SHIFT;
+ SetPageUptodate(page0);
+ }
+ unlock_page(page0);
+ ptlrpc_req_finished(request);
+
+ CDEBUG(D_VFSTRACE, "read %d/%d pages\n", nrdpgs, npages);
+
+ ll_pagevec_init(&lru_pvec, 0);
+ for (i = 1; i < npages; i++) {
+ unsigned long offset;
+ int ret;
+
+ page = page_pool[i];
+
+ if (rc < 0 || i >= nrdpgs) {
+ page_cache_release(page);
+ continue;
+ }
+
+ SetPageUptodate(page);
+
+ dp = kmap(page);
+ hash = le64_to_cpu(dp->ldp_hash_start);
+ kunmap(page);
+
+ offset = hash_x_index(hash, hash64);
+
+ prefetchw(&page->flags);
+ ret = add_to_page_cache_lru(page, inode->i_mapping, offset,
+ GFP_KERNEL);
+ if (ret == 0) {
+ unlock_page(page);
+ if (ll_pagevec_add(&lru_pvec, page) == 0)
+ ll_pagevec_lru_add_file(&lru_pvec);
+ } else {
+ CDEBUG(D_VFSTRACE, "page %lu add to page cache failed:"
+ " %d\n", offset, ret);
+ }
+ page_cache_release(page);
+ }
+ ll_pagevec_lru_add_file(&lru_pvec);
+
+ if (page_pool != &page0)
+ OBD_FREE(page_pool, sizeof(struct page *) * max_pages);
+ EXIT;
+ return rc;
+}
+
+static void ll_check_page(struct inode *dir, struct page *page)
+{
+ /* XXX: check page format later */
+ SetPageChecked(page);
+}
+
+void ll_release_page(struct page *page, int remove)
+{
+ kunmap(page);
+ if (remove) {
+ lock_page(page);
+ if (likely(page->mapping != NULL))
+ truncate_complete_page(page->mapping, page);
+ unlock_page(page);
+ }
+ page_cache_release(page);
+}
+
+/*
+ * Find, kmap and return page that contains given hash.
+ */
+static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
+ __u64 *start, __u64 *end)
+{
+ int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
+ struct address_space *mapping = dir->i_mapping;
+ /*
+ * Complement of hash is used as an index so that
+ * radix_tree_gang_lookup() can be used to find a page with starting
+ * hash _smaller_ than one we are looking for.
+ */
+ unsigned long offset = hash_x_index(*hash, hash64);
+ struct page *page;
+ int found;
+
+ TREE_READ_LOCK_IRQ(mapping);
+ found = radix_tree_gang_lookup(&mapping->page_tree,
+ (void **)&page, offset, 1);
+ if (found > 0) {
+ struct lu_dirpage *dp;
+
+ page_cache_get(page);
+ TREE_READ_UNLOCK_IRQ(mapping);
+ /*
+ * In contrast to find_lock_page() we are sure that directory
+ * page cannot be truncated (while DLM lock is held) and,
+ * hence, can avoid restart.
+ *
+ * In fact, page cannot be locked here at all, because
+ * ll_dir_filler() does synchronous io.
+ */
+ wait_on_page_locked(page);
+ if (PageUptodate(page)) {
+ dp = kmap(page);
+ if (BITS_PER_LONG == 32 && hash64) {
+ *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+ *end = le64_to_cpu(dp->ldp_hash_end) >> 32;
+ *hash = *hash >> 32;
+ } else {
+ *start = le64_to_cpu(dp->ldp_hash_start);
+ *end = le64_to_cpu(dp->ldp_hash_end);
+ }
+ LASSERTF(*start <= *hash, "start = "LPX64",end = "
+ LPX64",hash = "LPX64"\n", *start, *end, *hash);
+ CDEBUG(D_VFSTRACE, "page %lu [%llu %llu], hash "LPU64"\n",
+ offset, *start, *end, *hash);
+ if (*hash > *end) {
+ ll_release_page(page, 0);
+ page = NULL;
+ } else if (*end != *start && *hash == *end) {
+ /*
+ * upon hash collision, remove this page,
+ * otherwise put page reference, and
+ * ll_get_dir_page() will issue RPC to fetch
+ * the page we want.
+ */
+ ll_release_page(page,
+ le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
+ page = NULL;
+ }
+ } else {
+ page_cache_release(page);
+ page = ERR_PTR(-EIO);
+ }
+
+ } else {
+ TREE_READ_UNLOCK_IRQ(mapping);
+ page = NULL;
+ }
+ return page;
+}
+
+struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
+ struct ll_dir_chain *chain)
+{
+ ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
+ struct address_space *mapping = dir->i_mapping;
+ struct lustre_handle lockh;
+ struct lu_dirpage *dp;
+ struct page *page;
+ ldlm_mode_t mode;
+ int rc;
+ __u64 start = 0;
+ __u64 end = 0;
+ __u64 lhash = hash;
+ struct ll_inode_info *lli = ll_i2info(dir);
+ int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
+
+ mode = LCK_PR;
+ rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
+ ll_inode2fid(dir), LDLM_IBITS, &policy, mode, &lockh);
+ if (!rc) {
+ struct ldlm_enqueue_info einfo = {.ei_type = LDLM_IBITS,
+ .ei_mode = mode,
+ .ei_cb_bl =
+ ll_md_blocking_ast,
+ .ei_cb_cp =
+ ldlm_completion_ast,
+ .ei_cb_gl = NULL,
+ .ei_cb_wg = NULL,
+ .ei_cbdata = NULL};
+ struct lookup_intent it = { .it_op = IT_READDIR };
+ struct ptlrpc_request *request;
+ struct md_op_data *op_data;
+
+ op_data = ll_prep_md_op_data(NULL, dir, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ return (void *)op_data;
+
+ rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, &einfo, &it,
+ op_data, &lockh, NULL, 0, NULL, 0);
+
+ ll_finish_md_op_data(op_data);
+
+ request = (struct ptlrpc_request *)it.d.lustre.it_data;
+ if (request)
+ ptlrpc_req_finished(request);
+ if (rc < 0) {
+ CERROR("lock enqueue: "DFID" at "LPU64": rc %d\n",
+ PFID(ll_inode2fid(dir)), hash, rc);
+ return ERR_PTR(rc);
+ }
+
+ CDEBUG(D_INODE, "setting lr_lvb_inode to inode %p (%lu/%u)\n",
+ dir, dir->i_ino, dir->i_generation);
+ md_set_lock_data(ll_i2sbi(dir)->ll_md_exp,
+ &it.d.lustre.it_lock_handle, dir, NULL);
+ } else {
+ /* for cross-ref object, l_ast_data of the lock may not be set,
+ * we reset it here */
+ md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, &lockh.cookie,
+ dir, NULL);
+ }
+ ldlm_lock_dump_handle(D_OTHER, &lockh);
+
+ mutex_lock(&lli->lli_readdir_mutex);
+ page = ll_dir_page_locate(dir, &lhash, &start, &end);
+ if (IS_ERR(page)) {
+ CERROR("dir page locate: "DFID" at "LPU64": rc %ld\n",
+ PFID(ll_inode2fid(dir)), lhash, PTR_ERR(page));
+ GOTO(out_unlock, page);
+ } else if (page != NULL) {
+ /*
+ * XXX nikita: not entirely correct handling of a corner case:
+ * suppose hash chain of entries with hash value HASH crosses
+ * border between pages P0 and P1. First both P0 and P1 are
+ * cached, seekdir() is called for some entry from the P0 part
+ * of the chain. Later P0 goes out of cache. telldir(HASH)
+ * happens and finds P1, as it starts with matching hash
+ * value. Remaining entries from P0 part of the chain are
+ * skipped. (Is that really a bug?)
+ *
+ * Possible solutions: 0. don't cache P1 is such case, handle
+ * it as an "overflow" page. 1. invalidate all pages at
+ * once. 2. use HASH|1 as an index for P1.
+ */
+ GOTO(hash_collision, page);
+ }
+
+ page = read_cache_page(mapping, hash_x_index(hash, hash64),
+ ll_dir_filler, &lhash);
+ if (IS_ERR(page)) {
+ CERROR("read cache page: "DFID" at "LPU64": rc %ld\n",
+ PFID(ll_inode2fid(dir)), hash, PTR_ERR(page));
+ GOTO(out_unlock, page);
+ }
+
+ wait_on_page_locked(page);
+ (void)kmap(page);
+ if (!PageUptodate(page)) {
+ CERROR("page not updated: "DFID" at "LPU64": rc %d\n",
+ PFID(ll_inode2fid(dir)), hash, -5);
+ goto fail;
+ }
+ if (!PageChecked(page))
+ ll_check_page(dir, page);
+ if (PageError(page)) {
+ CERROR("page error: "DFID" at "LPU64": rc %d\n",
+ PFID(ll_inode2fid(dir)), hash, -5);
+ goto fail;
+ }
+hash_collision:
+ dp = page_address(page);
+ if (BITS_PER_LONG == 32 && hash64) {
+ start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+ end = le64_to_cpu(dp->ldp_hash_end) >> 32;
+ lhash = hash >> 32;
+ } else {
+ start = le64_to_cpu(dp->ldp_hash_start);
+ end = le64_to_cpu(dp->ldp_hash_end);
+ lhash = hash;
+ }
+ if (end == start) {
+ LASSERT(start == lhash);
+ CWARN("Page-wide hash collision: "LPU64"\n", end);
+ if (BITS_PER_LONG == 32 && hash64)
+ CWARN("Real page-wide hash collision at ["LPU64" "LPU64
+ "] with hash "LPU64"\n",
+ le64_to_cpu(dp->ldp_hash_start),
+ le64_to_cpu(dp->ldp_hash_end), hash);
+ /*
+ * Fetch whole overflow chain...
+ *
+ * XXX not yet.
+ */
+ goto fail;
+ }
+out_unlock:
+ mutex_unlock(&lli->lli_readdir_mutex);
+ ldlm_lock_decref(&lockh, mode);
+ return page;
+
+fail:
+ ll_release_page(page, 1);
+ page = ERR_PTR(-EIO);
+ goto out_unlock;
+}
+
+int ll_dir_read(struct inode *inode, __u64 *_pos, void *cookie,
+ filldir_t filldir)
+{
+ struct ll_inode_info *info = ll_i2info(inode);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ __u64 pos = *_pos;
+ int api32 = ll_need_32bit_api(sbi);
+ int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
+ struct page *page;
+ struct ll_dir_chain chain;
+ int done = 0;
+ int rc = 0;
+ ENTRY;
+
+ ll_dir_chain_init(&chain);
+
+ page = ll_get_dir_page(inode, pos, &chain);
+
+ while (rc == 0 && !done) {
+ struct lu_dirpage *dp;
+ struct lu_dirent *ent;
+
+ if (!IS_ERR(page)) {
+ /*
+ * If page is empty (end of directory is reached),
+ * use this value.
+ */
+ __u64 hash = MDS_DIR_END_OFF;
+ __u64 next;
+
+ dp = page_address(page);
+ for (ent = lu_dirent_start(dp); ent != NULL && !done;
+ ent = lu_dirent_next(ent)) {
+ __u16 type;
+ int namelen;
+ struct lu_fid fid;
+ __u64 lhash;
+ __u64 ino;
+
+ /*
+ * XXX: implement correct swabbing here.
+ */
+
+ hash = le64_to_cpu(ent->lde_hash);
+ if (hash < pos)
+ /*
+ * Skip until we find target hash
+ * value.
+ */
+ continue;
+
+ namelen = le16_to_cpu(ent->lde_namelen);
+ if (namelen == 0)
+ /*
+ * Skip dummy record.
+ */
+ continue;
+
+ if (api32 && hash64)
+ lhash = hash >> 32;
+ else
+ lhash = hash;
+ fid_le_to_cpu(&fid, &ent->lde_fid);
+ ino = cl_fid_build_ino(&fid, api32);
+ type = ll_dirent_type_get(ent);
+ /* For 'll_nfs_get_name_filldir()', it will try
+ * to access the 'ent' through its 'lde_name',
+ * so the parameter 'name' for 'filldir()' must
+ * be part of the 'ent'. */
+ done = filldir(cookie, ent->lde_name, namelen,
+ lhash, ino, type);
+ }
+ next = le64_to_cpu(dp->ldp_hash_end);
+ if (!done) {
+ pos = next;
+ if (pos == MDS_DIR_END_OFF) {
+ /*
+ * End of directory reached.
+ */
+ done = 1;
+ ll_release_page(page, 0);
+ } else if (1 /* chain is exhausted*/) {
+ /*
+ * Normal case: continue to the next
+ * page.
+ */
+ ll_release_page(page,
+ le32_to_cpu(dp->ldp_flags) &
+ LDF_COLLIDE);
+ next = pos;
+ page = ll_get_dir_page(inode, pos,
+ &chain);
+ } else {
+ /*
+ * go into overflow page.
+ */
+ LASSERT(le32_to_cpu(dp->ldp_flags) &
+ LDF_COLLIDE);
+ ll_release_page(page, 1);
+ }
+ } else {
+ pos = hash;
+ ll_release_page(page, 0);
+ }
+ } else {
+ rc = PTR_ERR(page);
+ CERROR("error reading dir "DFID" at %lu: rc %d\n",
+ PFID(&info->lli_fid), (unsigned long)pos, rc);
+ }
+ }
+
+ *_pos = pos;
+ ll_dir_chain_fini(&chain);
+ RETURN(rc);
+}
+
+static int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
+{
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ __u64 pos = lfd->lfd_pos;
+ int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
+ int api32 = ll_need_32bit_api(sbi);
+ int rc;
+ struct path path;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu "
+ " 32bit_api %d\n", inode->i_ino, inode->i_generation,
+ inode, (unsigned long)pos, i_size_read(inode), api32);
+
+ if (pos == MDS_DIR_END_OFF)
+ /*
+ * end-of-file.
+ */
+ GOTO(out, rc = 0);
+
+ rc = ll_dir_read(inode, &pos, cookie, filldir);
+ lfd->lfd_pos = pos;
+ if (pos == MDS_DIR_END_OFF) {
+ if (api32)
+ filp->f_pos = LL_DIR_END_OFF_32BIT;
+ else
+ filp->f_pos = LL_DIR_END_OFF;
+ } else {
+ if (api32 && hash64)
+ filp->f_pos = pos >> 32;
+ else
+ filp->f_pos = pos;
+ }
+ filp->f_version = inode->i_version;
+ path.mnt = filp->f_path.mnt;
+ path.dentry = filp->f_dentry;
+ touch_atime(&path);
+
+out:
+ if (!rc)
+ ll_stats_ops_tally(sbi, LPROC_LL_READDIR, 1);
+
+ RETURN(rc);
+}
+
+int ll_send_mgc_param(struct obd_export *mgc, char *string)
+{
+ struct mgs_send_param *msp;
+ int rc = 0;
+
+ OBD_ALLOC_PTR(msp);
+ if (!msp)
+ return -ENOMEM;
+
+ strncpy(msp->mgs_param, string, MGS_PARAM_MAXLEN);
+ rc = obd_set_info_async(NULL, mgc, sizeof(KEY_SET_INFO), KEY_SET_INFO,
+ sizeof(struct mgs_send_param), msp, NULL);
+ if (rc)
+ CERROR("Failed to set parameter: %d\n", rc);
+ OBD_FREE_PTR(msp);
+
+ return rc;
+}
+
+int ll_dir_setdirstripe(struct inode *dir, struct lmv_user_md *lump,
+ char *filename)
+{
+ struct ptlrpc_request *request = NULL;
+ struct md_op_data *op_data;
+ struct ll_sb_info *sbi = ll_i2sbi(dir);
+ int mode;
+ int err;
+
+ ENTRY;
+
+ mode = (0755 & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
+ op_data = ll_prep_md_op_data(NULL, dir, NULL, filename,
+ strlen(filename), mode, LUSTRE_OPC_MKDIR,
+ lump);
+ if (IS_ERR(op_data))
+ GOTO(err_exit, err = PTR_ERR(op_data));
+
+ op_data->op_cli_flags |= CLI_SET_MEA;
+ err = md_create(sbi->ll_md_exp, op_data, lump, sizeof(*lump), mode,
+ current_fsuid(), current_fsgid(),
+ cfs_curproc_cap_pack(), 0, &request);
+ ll_finish_md_op_data(op_data);
+ if (err)
+ GOTO(err_exit, err);
+err_exit:
+ ptlrpc_req_finished(request);
+ return err;
+}
+
+int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
+ int set_default)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct md_op_data *op_data;
+ struct ptlrpc_request *req = NULL;
+ int rc = 0;
+ struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
+ struct obd_device *mgc = lsi->lsi_mgc;
+ int lum_size;
+ ENTRY;
+
+ if (lump != NULL) {
+ /*
+ * This is coming from userspace, so should be in
+ * local endian. But the MDS would like it in little
+ * endian, so we swab it before we send it.
+ */
+ switch (lump->lmm_magic) {
+ case LOV_USER_MAGIC_V1: {
+ if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1))
+ lustre_swab_lov_user_md_v1(lump);
+ lum_size = sizeof(struct lov_user_md_v1);
+ break;
+ }
+ case LOV_USER_MAGIC_V3: {
+ if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))
+ lustre_swab_lov_user_md_v3(
+ (struct lov_user_md_v3 *)lump);
+ lum_size = sizeof(struct lov_user_md_v3);
+ break;
+ }
+ default: {
+ CDEBUG(D_IOCTL, "bad userland LOV MAGIC:"
+ " %#08x != %#08x nor %#08x\n",
+ lump->lmm_magic, LOV_USER_MAGIC_V1,
+ LOV_USER_MAGIC_V3);
+ RETURN(-EINVAL);
+ }
+ }
+ } else {
+ lum_size = sizeof(struct lov_user_md_v1);
+ }
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ if (lump != NULL && lump->lmm_magic == cpu_to_le32(LMV_USER_MAGIC))
+ op_data->op_cli_flags |= CLI_SET_MEA;
+
+ /* swabbing is done in lov_setstripe() on server side */
+ rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size,
+ NULL, 0, &req, NULL);
+ ll_finish_md_op_data(op_data);
+ ptlrpc_req_finished(req);
+ if (rc) {
+ if (rc != -EPERM && rc != -EACCES)
+ CERROR("mdc_setattr fails: rc = %d\n", rc);
+ }
+
+ /* In the following we use the fact that LOV_USER_MAGIC_V1 and
+ LOV_USER_MAGIC_V3 have the same initial fields so we do not
+ need the make the distiction between the 2 versions */
+ if (set_default && mgc->u.cli.cl_mgc_mgsexp) {
+ char *param = NULL;
+ char *buf;
+
+ OBD_ALLOC(param, MGS_PARAM_MAXLEN);
+ if (param == NULL)
+ GOTO(end, rc = -ENOMEM);
+
+ buf = param;
+ /* Get fsname and assume devname to be -MDT0000. */
+ ll_get_fsname(inode->i_sb, buf, MTI_NAME_MAXLEN);
+ strcat(buf, "-MDT0000.lov");
+ buf += strlen(buf);
+
+ /* Set root stripesize */
+ sprintf(buf, ".stripesize=%u",
+ lump ? le32_to_cpu(lump->lmm_stripe_size) : 0);
+ rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
+ if (rc)
+ GOTO(end, rc);
+
+ /* Set root stripecount */
+ sprintf(buf, ".stripecount=%hd",
+ lump ? le16_to_cpu(lump->lmm_stripe_count) : 0);
+ rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
+ if (rc)
+ GOTO(end, rc);
+
+ /* Set root stripeoffset */
+ sprintf(buf, ".stripeoffset=%hd",
+ lump ? le16_to_cpu(lump->lmm_stripe_offset) :
+ (typeof(lump->lmm_stripe_offset))(-1));
+ rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
+
+end:
+ if (param != NULL)
+ OBD_FREE(param, MGS_PARAM_MAXLEN);
+ }
+ RETURN(rc);
+}
+
+int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
+ int *lmm_size, struct ptlrpc_request **request)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct mdt_body *body;
+ struct lov_mds_md *lmm = NULL;
+ struct ptlrpc_request *req = NULL;
+ int rc, lmmsize;
+ struct md_op_data *op_data;
+
+ rc = ll_get_max_mdsize(sbi, &lmmsize);
+ if (rc)
+ RETURN(rc);
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
+ 0, lmmsize, LUSTRE_OPC_ANY,
+ NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
+ rc = md_getattr(sbi->ll_md_exp, op_data, &req);
+ ll_finish_md_op_data(op_data);
+ if (rc < 0) {
+ CDEBUG(D_INFO, "md_getattr failed on inode "
+ "%lu/%u: rc %d\n", inode->i_ino,
+ inode->i_generation, rc);
+ GOTO(out, rc);
+ }
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ LASSERT(body != NULL);
+
+ lmmsize = body->eadatasize;
+
+ if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
+ lmmsize == 0) {
+ GOTO(out, rc = -ENODATA);
+ }
+
+ lmm = req_capsule_server_sized_get(&req->rq_pill,
+ &RMF_MDT_MD, lmmsize);
+ LASSERT(lmm != NULL);
+
+ /*
+ * This is coming from the MDS, so is probably in
+ * little endian. We convert it to host endian before
+ * passing it to userspace.
+ */
+ /* We don't swab objects for directories */
+ switch (le32_to_cpu(lmm->lmm_magic)) {
+ case LOV_MAGIC_V1:
+ if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC))
+ lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
+ break;
+ case LOV_MAGIC_V3:
+ if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC))
+ lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
+ break;
+ default:
+ CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic);
+ rc = -EPROTO;
+ }
+out:
+ *lmmp = lmm;
+ *lmm_size = lmmsize;
+ *request = req;
+ return rc;
+}
+
+/*
+ * Get MDT index for the inode.
+ */
+int ll_get_mdt_idx(struct inode *inode)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct md_op_data *op_data;
+ int rc, mdtidx;
+ ENTRY;
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0,
+ 0, LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ op_data->op_flags |= MF_GET_MDT_IDX;
+ rc = md_getattr(sbi->ll_md_exp, op_data, NULL);
+ mdtidx = op_data->op_mds;
+ ll_finish_md_op_data(op_data);
+ if (rc < 0) {
+ CDEBUG(D_INFO, "md_getattr_name: %d\n", rc);
+ RETURN(rc);
+ }
+ return mdtidx;
+}
+
+/**
+ * Generic handler to do any pre-copy work.
+ *
+ * It send a first hsm_progress (with extent length == 0) to coordinator as a
+ * first information for it that real work has started.
+ *
+ * Moreover, for a ARCHIVE request, it will sample the file data version and
+ * store it in \a copy.
+ *
+ * \return 0 on success.
+ */
+static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
+{
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct hsm_progress_kernel hpk;
+ int rc;
+ ENTRY;
+
+ /* Forge a hsm_progress based on data from copy. */
+ hpk.hpk_fid = copy->hc_hai.hai_fid;
+ hpk.hpk_cookie = copy->hc_hai.hai_cookie;
+ hpk.hpk_extent.offset = copy->hc_hai.hai_extent.offset;
+ hpk.hpk_extent.length = 0;
+ hpk.hpk_flags = 0;
+ hpk.hpk_errval = 0;
+ hpk.hpk_data_version = 0;
+
+
+ /* For archive request, we need to read the current file version. */
+ if (copy->hc_hai.hai_action == HSMA_ARCHIVE) {
+ struct inode *inode;
+ __u64 data_version = 0;
+
+ /* Get inode for this fid */
+ inode = search_inode_for_lustre(sb, &copy->hc_hai.hai_fid);
+ if (IS_ERR(inode)) {
+ hpk.hpk_flags |= HP_FLAG_RETRY;
+ /* hpk_errval is >= 0 */
+ hpk.hpk_errval = -PTR_ERR(inode);
+ GOTO(progress, rc = PTR_ERR(inode));
+ }
+
+ /* Read current file data version */
+ rc = ll_data_version(inode, &data_version, 1);
+ iput(inode);
+ if (rc != 0) {
+ CDEBUG(D_HSM, "Could not read file data version of "
+ DFID" (rc = %d). Archive request ("
+ LPX64") could not be done.\n",
+ PFID(&copy->hc_hai.hai_fid), rc,
+ copy->hc_hai.hai_cookie);
+ hpk.hpk_flags |= HP_FLAG_RETRY;
+ /* hpk_errval must be >= 0 */
+ hpk.hpk_errval = -rc;
+ GOTO(progress, rc);
+ }
+
+ /* Store it the hsm_copy for later copytool use.
+ * Always modified even if no lsm. */
+ copy->hc_data_version = data_version;
+ }
+
+progress:
+ rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
+ &hpk, NULL);
+
+ RETURN(rc);
+}
+
+/**
+ * Generic handler to do any post-copy work.
+ *
+ * It will send the last hsm_progress update to coordinator to inform it
+ * that copy is finished and whether it was successful or not.
+ *
+ * Moreover,
+ * - for ARCHIVE request, it will sample the file data version and compare it
+ * with the version saved in ll_ioc_copy_start(). If they do not match, copy
+ * will be considered as failed.
+ * - for RESTORE request, it will sample the file data version and send it to
+ * coordinator which is useful if the file was imported as 'released'.
+ *
+ * \return 0 on success.
+ */
+static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
+{
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct hsm_progress_kernel hpk;
+ int rc;
+ ENTRY;
+
+ /* If you modify the logic here, also check llapi_hsm_copy_end(). */
+ /* Take care: copy->hc_hai.hai_action, len, gid and data are not
+ * initialized if copy_end was called with copy == NULL.
+ */
+
+ /* Forge a hsm_progress based on data from copy. */
+ hpk.hpk_fid = copy->hc_hai.hai_fid;
+ hpk.hpk_cookie = copy->hc_hai.hai_cookie;
+ hpk.hpk_extent = copy->hc_hai.hai_extent;
+ hpk.hpk_flags = copy->hc_flags | HP_FLAG_COMPLETED;
+ hpk.hpk_errval = copy->hc_errval;
+ hpk.hpk_data_version = 0;
+
+ /* For archive request, we need to check the file data was not changed.
+ *
+ * For restore request, we need to send the file data version, this is
+ * useful when the file was created using hsm_import.
+ */
+ if (((copy->hc_hai.hai_action == HSMA_ARCHIVE) ||
+ (copy->hc_hai.hai_action == HSMA_RESTORE)) &&
+ (copy->hc_errval == 0)) {
+ struct inode *inode;
+ __u64 data_version = 0;
+
+ /* Get lsm for this fid */
+ inode = search_inode_for_lustre(sb, &copy->hc_hai.hai_fid);
+ if (IS_ERR(inode)) {
+ hpk.hpk_flags |= HP_FLAG_RETRY;
+ /* hpk_errval must be >= 0 */
+ hpk.hpk_errval = -PTR_ERR(inode);
+ GOTO(progress, rc = PTR_ERR(inode));
+ }
+
+ rc = ll_data_version(inode, &data_version,
+ copy->hc_hai.hai_action == HSMA_ARCHIVE);
+ iput(inode);
+ if (rc) {
+ CDEBUG(D_HSM, "Could not read file data version. "
+ "Request could not be confirmed.\n");
+ if (hpk.hpk_errval == 0)
+ hpk.hpk_errval = -rc;
+ GOTO(progress, rc);
+ }
+
+ /* Store it the hsm_copy for later copytool use.
+ * Always modified even if no lsm. */
+ hpk.hpk_data_version = data_version;
+
+ /* File could have been stripped during archiving, so we need
+ * to check anyway. */
+ if ((copy->hc_hai.hai_action == HSMA_ARCHIVE) &&
+ (copy->hc_data_version != data_version)) {
+ CDEBUG(D_HSM, "File data version mismatched. "
+ "File content was changed during archiving. "
+ DFID", start:"LPX64" current:"LPX64"\n",
+ PFID(&copy->hc_hai.hai_fid),
+ copy->hc_data_version, data_version);
+ /* File was changed, send error to cdt. Do not ask for
+ * retry because if a file is modified frequently,
+ * the cdt will loop on retried archive requests.
+ * The policy engine will ask for a new archive later
+ * when the file will not be modified for some tunable
+ * time */
+ /* we do not notify caller */
+ hpk.hpk_flags &= ~HP_FLAG_RETRY;
+ /* hpk_errval must be >= 0 */
+ hpk.hpk_errval = EBUSY;
+ }
+
+ }
+
+progress:
+ rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
+ &hpk, NULL);
+
+ RETURN(rc);
+}
+
+
+static int copy_and_ioctl(int cmd, struct obd_export *exp, void *data, int len)
+{
+ void *ptr;
+ int rc;
+
+ OBD_ALLOC(ptr, len);
+ if (ptr == NULL)
+ return -ENOMEM;
+ if (copy_from_user(ptr, data, len)) {
+ OBD_FREE(ptr, len);
+ return -EFAULT;
+ }
+ rc = obd_iocontrol(cmd, exp, len, data, NULL);
+ OBD_FREE(ptr, len);
+ return rc;
+}
+
+static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
+{
+ int cmd = qctl->qc_cmd;
+ int type = qctl->qc_type;
+ int id = qctl->qc_id;
+ int valid = qctl->qc_valid;
+ int rc = 0;
+ ENTRY;
+
+ switch (cmd) {
+ case LUSTRE_Q_INVALIDATE:
+ case LUSTRE_Q_FINVALIDATE:
+ case Q_QUOTAON:
+ case Q_QUOTAOFF:
+ case Q_SETQUOTA:
+ case Q_SETINFO:
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
+ sbi->ll_flags & LL_SBI_RMT_CLIENT)
+ RETURN(-EPERM);
+ break;
+ case Q_GETQUOTA:
+ if (((type == USRQUOTA && current_euid() != id) ||
+ (type == GRPQUOTA && !in_egroup_p(id))) &&
+ (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
+ sbi->ll_flags & LL_SBI_RMT_CLIENT))
+ RETURN(-EPERM);
+ break;
+ case Q_GETINFO:
+ break;
+ default:
+ CERROR("unsupported quotactl op: %#x\n", cmd);
+ RETURN(-ENOTTY);
+ }
+
+ if (valid != QC_GENERAL) {
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
+ RETURN(-EOPNOTSUPP);
+
+ if (cmd == Q_GETINFO)
+ qctl->qc_cmd = Q_GETOINFO;
+ else if (cmd == Q_GETQUOTA)
+ qctl->qc_cmd = Q_GETOQUOTA;
+ else
+ RETURN(-EINVAL);
+
+ switch (valid) {
+ case QC_MDTIDX:
+ rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp,
+ sizeof(*qctl), qctl, NULL);
+ break;
+ case QC_OSTIDX:
+ rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_dt_exp,
+ sizeof(*qctl), qctl, NULL);
+ break;
+ case QC_UUID:
+ rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp,
+ sizeof(*qctl), qctl, NULL);
+ if (rc == -EAGAIN)
+ rc = obd_iocontrol(OBD_IOC_QUOTACTL,
+ sbi->ll_dt_exp,
+ sizeof(*qctl), qctl, NULL);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+
+ if (rc)
+ RETURN(rc);
+
+ qctl->qc_cmd = cmd;
+ } else {
+ struct obd_quotactl *oqctl;
+
+ OBD_ALLOC_PTR(oqctl);
+ if (oqctl == NULL)
+ RETURN(-ENOMEM);
+
+ QCTL_COPY(oqctl, qctl);
+ rc = obd_quotactl(sbi->ll_md_exp, oqctl);
+ if (rc) {
+ if (rc != -EALREADY && cmd == Q_QUOTAON) {
+ oqctl->qc_cmd = Q_QUOTAOFF;
+ obd_quotactl(sbi->ll_md_exp, oqctl);
+ }
+ OBD_FREE_PTR(oqctl);
+ RETURN(rc);
+ }
+ /* If QIF_SPACE is not set, client should collect the
+ * space usage from OSSs by itself */
+ if (cmd == Q_GETQUOTA &&
+ !(oqctl->qc_dqblk.dqb_valid & QIF_SPACE) &&
+ !oqctl->qc_dqblk.dqb_curspace) {
+ struct obd_quotactl *oqctl_tmp;
+
+ OBD_ALLOC_PTR(oqctl_tmp);
+ if (oqctl_tmp == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ oqctl_tmp->qc_cmd = Q_GETOQUOTA;
+ oqctl_tmp->qc_id = oqctl->qc_id;
+ oqctl_tmp->qc_type = oqctl->qc_type;
+
+ /* collect space usage from OSTs */
+ oqctl_tmp->qc_dqblk.dqb_curspace = 0;
+ rc = obd_quotactl(sbi->ll_dt_exp, oqctl_tmp);
+ if (!rc || rc == -EREMOTEIO) {
+ oqctl->qc_dqblk.dqb_curspace =
+ oqctl_tmp->qc_dqblk.dqb_curspace;
+ oqctl->qc_dqblk.dqb_valid |= QIF_SPACE;
+ }
+
+ /* collect space & inode usage from MDTs */
+ oqctl_tmp->qc_dqblk.dqb_curspace = 0;
+ oqctl_tmp->qc_dqblk.dqb_curinodes = 0;
+ rc = obd_quotactl(sbi->ll_md_exp, oqctl_tmp);
+ if (!rc || rc == -EREMOTEIO) {
+ oqctl->qc_dqblk.dqb_curspace +=
+ oqctl_tmp->qc_dqblk.dqb_curspace;
+ oqctl->qc_dqblk.dqb_curinodes =
+ oqctl_tmp->qc_dqblk.dqb_curinodes;
+ oqctl->qc_dqblk.dqb_valid |= QIF_INODES;
+ } else {
+ oqctl->qc_dqblk.dqb_valid &= ~QIF_SPACE;
+ }
+
+ OBD_FREE_PTR(oqctl_tmp);
+ }
+out:
+ QCTL_COPY(qctl, oqctl);
+ OBD_FREE_PTR(oqctl);
+ }
+
+ RETURN(rc);
+}
+
+static char *
+ll_getname(const char __user *filename)
+{
+ int ret = 0, len;
+ char *tmp = __getname();
+
+ if (!tmp)
+ return ERR_PTR(-ENOMEM);
+
+ len = strncpy_from_user(tmp, filename, PATH_MAX);
+ if (len == 0)
+ ret = -ENOENT;
+ else if (len > PATH_MAX)
+ ret = -ENAMETOOLONG;
+
+ if (ret) {
+ __putname(tmp);
+ tmp = ERR_PTR(ret);
+ }
+ return tmp;
+}
+
+#define ll_putname(filename) __putname(filename)
+
+static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct obd_ioctl_data *data;
+ int rc = 0;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
+ inode->i_ino, inode->i_generation, inode, cmd);
+
+ /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
+ if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
+ return -ENOTTY;
+
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
+ switch(cmd) {
+ case FSFILT_IOC_GETFLAGS:
+ case FSFILT_IOC_SETFLAGS:
+ RETURN(ll_iocontrol(inode, file, cmd, arg));
+ case FSFILT_IOC_GETVERSION_OLD:
+ case FSFILT_IOC_GETVERSION:
+ RETURN(put_user(inode->i_generation, (int *)arg));
+ /* We need to special case any other ioctls we want to handle,
+ * to send them to the MDS/OST as appropriate and to properly
+ * network encode the arg field.
+ case FSFILT_IOC_SETVERSION_OLD:
+ case FSFILT_IOC_SETVERSION:
+ */
+ case LL_IOC_GET_MDTIDX: {
+ int mdtidx;
+
+ mdtidx = ll_get_mdt_idx(inode);
+ if (mdtidx < 0)
+ RETURN(mdtidx);
+
+ if (put_user((int)mdtidx, (int*)arg))
+ RETURN(-EFAULT);
+
+ return 0;
+ }
+ case IOC_MDC_LOOKUP: {
+ struct ptlrpc_request *request = NULL;
+ int namelen, len = 0;
+ char *buf = NULL;
+ char *filename;
+ struct md_op_data *op_data;
+
+ rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
+ if (rc)
+ RETURN(rc);
+ data = (void *)buf;
+
+ filename = data->ioc_inlbuf1;
+ namelen = strlen(filename);
+
+ if (namelen < 1) {
+ CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
+ GOTO(out_free, rc = -EINVAL);
+ }
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, filename, namelen,
+ 0, LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ GOTO(out_free, rc = PTR_ERR(op_data));
+
+ op_data->op_valid = OBD_MD_FLID;
+ rc = md_getattr_name(sbi->ll_md_exp, op_data, &request);
+ ll_finish_md_op_data(op_data);
+ if (rc < 0) {
+ CDEBUG(D_INFO, "md_getattr_name: %d\n", rc);
+ GOTO(out_free, rc);
+ }
+ ptlrpc_req_finished(request);
+ EXIT;
+out_free:
+ obd_ioctl_freedata(buf, len);
+ return rc;
+ }
+ case LL_IOC_LMV_SETSTRIPE: {
+ struct lmv_user_md *lum;
+ char *buf = NULL;
+ char *filename;
+ int namelen = 0;
+ int lumlen = 0;
+ int len;
+ int rc;
+
+ rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
+ if (rc)
+ RETURN(rc);
+
+ data = (void *)buf;
+ if (data->ioc_inlbuf1 == NULL || data->ioc_inlbuf2 == NULL ||
+ data->ioc_inllen1 == 0 || data->ioc_inllen2 == 0)
+ GOTO(lmv_out_free, rc = -EINVAL);
+
+ filename = data->ioc_inlbuf1;
+ namelen = data->ioc_inllen1;
+
+ if (namelen < 1) {
+ CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
+ GOTO(lmv_out_free, rc = -EINVAL);
+ }
+ lum = (struct lmv_user_md *)data->ioc_inlbuf2;
+ lumlen = data->ioc_inllen2;
+
+ if (lum->lum_magic != LMV_USER_MAGIC ||
+ lumlen != sizeof(*lum)) {
+ CERROR("%s: wrong lum magic %x or size %d: rc = %d\n",
+ filename, lum->lum_magic, lumlen, -EFAULT);
+ GOTO(lmv_out_free, rc = -EINVAL);
+ }
+
+ /**
+ * ll_dir_setdirstripe will be used to set dir stripe
+ * mdc_create--->mdt_reint_create (with dirstripe)
+ */
+ rc = ll_dir_setdirstripe(inode, lum, filename);
+lmv_out_free:
+ obd_ioctl_freedata(buf, len);
+ RETURN(rc);
+
+ }
+ case LL_IOC_LOV_SETSTRIPE: {
+ struct lov_user_md_v3 lumv3;
+ struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
+ struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
+ struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
+
+ int set_default = 0;
+
+ LASSERT(sizeof(lumv3) == sizeof(*lumv3p));
+ LASSERT(sizeof(lumv3.lmm_objects[0]) ==
+ sizeof(lumv3p->lmm_objects[0]));
+ /* first try with v1 which is smaller than v3 */
+ if (copy_from_user(lumv1, lumv1p, sizeof(*lumv1)))
+ RETURN(-EFAULT);
+
+ if ((lumv1->lmm_magic == LOV_USER_MAGIC_V3) ) {
+ if (copy_from_user(&lumv3, lumv3p, sizeof(lumv3)))
+ RETURN(-EFAULT);
+ }
+
+ if (inode->i_sb->s_root == file->f_dentry)
+ set_default = 1;
+
+ /* in v1 and v3 cases lumv1 points to data */
+ rc = ll_dir_setstripe(inode, lumv1, set_default);
+
+ RETURN(rc);
+ }
+ case LL_IOC_LMV_GETSTRIPE: {
+ struct lmv_user_md *lump = (struct lmv_user_md *)arg;
+ struct lmv_user_md lum;
+ struct lmv_user_md *tmp;
+ int lum_size;
+ int rc = 0;
+ int mdtindex;
+
+ if (copy_from_user(&lum, lump, sizeof(struct lmv_user_md)))
+ RETURN(-EFAULT);
+
+ if (lum.lum_magic != LMV_MAGIC_V1)
+ RETURN(-EINVAL);
+
+ lum_size = lmv_user_md_size(1, LMV_MAGIC_V1);
+ OBD_ALLOC(tmp, lum_size);
+ if (tmp == NULL)
+ GOTO(free_lmv, rc = -ENOMEM);
+
+ memcpy(tmp, &lum, sizeof(lum));
+ tmp->lum_type = LMV_STRIPE_TYPE;
+ tmp->lum_stripe_count = 1;
+ mdtindex = ll_get_mdt_idx(inode);
+ if (mdtindex < 0)
+ GOTO(free_lmv, rc = -ENOMEM);
+
+ tmp->lum_stripe_offset = mdtindex;
+ tmp->lum_objects[0].lum_mds = mdtindex;
+ memcpy(&tmp->lum_objects[0].lum_fid, ll_inode2fid(inode),
+ sizeof(struct lu_fid));
+ if (copy_to_user((void *)arg, tmp, lum_size))
+ GOTO(free_lmv, rc = -EFAULT);
+free_lmv:
+ if (tmp)
+ OBD_FREE(tmp, lum_size);
+ RETURN(rc);
+ }
+ case LL_IOC_REMOVE_ENTRY: {
+ char *filename = NULL;
+ int namelen = 0;
+ int rc;
+
+ /* Here is a little hack to avoid sending REINT_RMENTRY to
+ * unsupported server, which might crash the server(LU-2730),
+ * Because both LVB_TYPE and REINT_RMENTRY will be supported
+ * on 2.4, we use OBD_CONNECT_LVB_TYPE to detect whether the
+ * server will support REINT_RMENTRY XXX*/
+ if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_LVB_TYPE))
+ return -ENOTSUPP;
+
+ filename = ll_getname((const char *)arg);
+ if (IS_ERR(filename))
+ RETURN(PTR_ERR(filename));
+
+ namelen = strlen(filename);
+ if (namelen < 1)
+ GOTO(out_rmdir, rc = -EINVAL);
+
+ rc = ll_rmdir_entry(inode, filename, namelen);
+out_rmdir:
+ if (filename)
+ ll_putname(filename);
+ RETURN(rc);
+ }
+ case LL_IOC_LOV_SWAP_LAYOUTS:
+ RETURN(-EPERM);
+ case LL_IOC_OBD_STATFS:
+ RETURN(ll_obd_statfs(inode, (void *)arg));
+ case LL_IOC_LOV_GETSTRIPE:
+ case LL_IOC_MDC_GETINFO:
+ case IOC_MDC_GETFILEINFO:
+ case IOC_MDC_GETFILESTRIPE: {
+ struct ptlrpc_request *request = NULL;
+ struct lov_user_md *lump;
+ struct lov_mds_md *lmm = NULL;
+ struct mdt_body *body;
+ char *filename = NULL;
+ int lmmsize;
+
+ if (cmd == IOC_MDC_GETFILEINFO ||
+ cmd == IOC_MDC_GETFILESTRIPE) {
+ filename = ll_getname((const char *)arg);
+ if (IS_ERR(filename))
+ RETURN(PTR_ERR(filename));
+
+ rc = ll_lov_getstripe_ea_info(inode, filename, &lmm,
+ &lmmsize, &request);
+ } else {
+ rc = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
+ }
+
+ if (request) {
+ body = req_capsule_server_get(&request->rq_pill,
+ &RMF_MDT_BODY);
+ LASSERT(body != NULL);
+ } else {
+ GOTO(out_req, rc);
+ }
+
+ if (rc < 0) {
+ if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO ||
+ cmd == LL_IOC_MDC_GETINFO))
+ GOTO(skip_lmm, rc = 0);
+ else
+ GOTO(out_req, rc);
+ }
+
+ if (cmd == IOC_MDC_GETFILESTRIPE ||
+ cmd == LL_IOC_LOV_GETSTRIPE) {
+ lump = (struct lov_user_md *)arg;
+ } else {
+ struct lov_user_mds_data *lmdp;
+ lmdp = (struct lov_user_mds_data *)arg;
+ lump = &lmdp->lmd_lmm;
+ }
+ if (copy_to_user(lump, lmm, lmmsize)) {
+ if (copy_to_user(lump, lmm, sizeof(*lump)))
+ GOTO(out_req, rc = -EFAULT);
+ rc = -EOVERFLOW;
+ }
+ skip_lmm:
+ if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) {
+ struct lov_user_mds_data *lmdp;
+ lstat_t st = { 0 };
+
+ st.st_dev = inode->i_sb->s_dev;
+ st.st_mode = body->mode;
+ st.st_nlink = body->nlink;
+ st.st_uid = body->uid;
+ st.st_gid = body->gid;
+ st.st_rdev = body->rdev;
+ st.st_size = body->size;
+ st.st_blksize = PAGE_CACHE_SIZE;
+ st.st_blocks = body->blocks;
+ st.st_atime = body->atime;
+ st.st_mtime = body->mtime;
+ st.st_ctime = body->ctime;
+ st.st_ino = inode->i_ino;
+
+ lmdp = (struct lov_user_mds_data *)arg;
+ if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st)))
+ GOTO(out_req, rc = -EFAULT);
+ }
+
+ EXIT;
+ out_req:
+ ptlrpc_req_finished(request);
+ if (filename)
+ ll_putname(filename);
+ return rc;
+ }
+ case IOC_LOV_GETINFO: {
+ struct lov_user_mds_data *lumd;
+ struct lov_stripe_md *lsm;
+ struct lov_user_md *lum;
+ struct lov_mds_md *lmm;
+ int lmmsize;
+ lstat_t st;
+
+ lumd = (struct lov_user_mds_data *)arg;
+ lum = &lumd->lmd_lmm;
+
+ rc = ll_get_max_mdsize(sbi, &lmmsize);
+ if (rc)
+ RETURN(rc);
+
+ OBD_ALLOC_LARGE(lmm, lmmsize);
+ if (copy_from_user(lmm, lum, lmmsize))
+ GOTO(free_lmm, rc = -EFAULT);
+
+ switch (lmm->lmm_magic) {
+ case LOV_USER_MAGIC_V1:
+ if (LOV_USER_MAGIC_V1 == cpu_to_le32(LOV_USER_MAGIC_V1))
+ break;
+ /* swab objects first so that stripes num will be sane */
+ lustre_swab_lov_user_md_objects(
+ ((struct lov_user_md_v1 *)lmm)->lmm_objects,
+ ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
+ lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
+ break;
+ case LOV_USER_MAGIC_V3:
+ if (LOV_USER_MAGIC_V3 == cpu_to_le32(LOV_USER_MAGIC_V3))
+ break;
+ /* swab objects first so that stripes num will be sane */
+ lustre_swab_lov_user_md_objects(
+ ((struct lov_user_md_v3 *)lmm)->lmm_objects,
+ ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
+ lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
+ break;
+ default:
+ GOTO(free_lmm, rc = -EINVAL);
+ }
+
+ rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
+ if (rc < 0)
+ GOTO(free_lmm, rc = -ENOMEM);
+
+ /* Perform glimpse_size operation. */
+ memset(&st, 0, sizeof(st));
+
+ rc = ll_glimpse_ioctl(sbi, lsm, &st);
+ if (rc)
+ GOTO(free_lsm, rc);
+
+ if (copy_to_user(&lumd->lmd_st, &st, sizeof(st)))
+ GOTO(free_lsm, rc = -EFAULT);
+
+ EXIT;
+ free_lsm:
+ obd_free_memmd(sbi->ll_dt_exp, &lsm);
+ free_lmm:
+ OBD_FREE_LARGE(lmm, lmmsize);
+ return rc;
+ }
+ case OBD_IOC_LLOG_CATINFO: {
+ RETURN(-EOPNOTSUPP);
+ }
+ case OBD_IOC_QUOTACHECK: {
+ struct obd_quotactl *oqctl;
+ int error = 0;
+
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
+ sbi->ll_flags & LL_SBI_RMT_CLIENT)
+ RETURN(-EPERM);
+
+ OBD_ALLOC_PTR(oqctl);
+ if (!oqctl)
+ RETURN(-ENOMEM);
+ oqctl->qc_type = arg;
+ rc = obd_quotacheck(sbi->ll_md_exp, oqctl);
+ if (rc < 0) {
+ CDEBUG(D_INFO, "md_quotacheck failed: rc %d\n", rc);
+ error = rc;
+ }
+
+ rc = obd_quotacheck(sbi->ll_dt_exp, oqctl);
+ if (rc < 0)
+ CDEBUG(D_INFO, "obd_quotacheck failed: rc %d\n", rc);
+
+ OBD_FREE_PTR(oqctl);
+ return error ?: rc;
+ }
+ case OBD_IOC_POLL_QUOTACHECK: {
+ struct if_quotacheck *check;
+
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
+ sbi->ll_flags & LL_SBI_RMT_CLIENT)
+ RETURN(-EPERM);
+
+ OBD_ALLOC_PTR(check);
+ if (!check)
+ RETURN(-ENOMEM);
+
+ rc = obd_iocontrol(cmd, sbi->ll_md_exp, 0, (void *)check,
+ NULL);
+ if (rc) {
+ CDEBUG(D_QUOTA, "mdc ioctl %d failed: %d\n", cmd, rc);
+ if (copy_to_user((void *)arg, check,
+ sizeof(*check)))
+ CDEBUG(D_QUOTA, "copy_to_user failed\n");
+ GOTO(out_poll, rc);
+ }
+
+ rc = obd_iocontrol(cmd, sbi->ll_dt_exp, 0, (void *)check,
+ NULL);
+ if (rc) {
+ CDEBUG(D_QUOTA, "osc ioctl %d failed: %d\n", cmd, rc);
+ if (copy_to_user((void *)arg, check,
+ sizeof(*check)))
+ CDEBUG(D_QUOTA, "copy_to_user failed\n");
+ GOTO(out_poll, rc);
+ }
+ out_poll:
+ OBD_FREE_PTR(check);
+ RETURN(rc);
+ }
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0)
+ case LL_IOC_QUOTACTL_18: {
+ /* copy the old 1.x quota struct for internal use, then copy
+ * back into old format struct. For 1.8 compatibility. */
+ struct if_quotactl_18 *qctl_18;
+ struct if_quotactl *qctl_20;
+
+ OBD_ALLOC_PTR(qctl_18);
+ if (!qctl_18)
+ RETURN(-ENOMEM);
+
+ OBD_ALLOC_PTR(qctl_20);
+ if (!qctl_20)
+ GOTO(out_quotactl_18, rc = -ENOMEM);
+
+ if (copy_from_user(qctl_18, (void *)arg, sizeof(*qctl_18)))
+ GOTO(out_quotactl_20, rc = -ENOMEM);
+
+ QCTL_COPY(qctl_20, qctl_18);
+ qctl_20->qc_idx = 0;
+
+ /* XXX: dqb_valid was borrowed as a flag to mark that
+ * only mds quota is wanted */
+ if (qctl_18->qc_cmd == Q_GETQUOTA &&
+ qctl_18->qc_dqblk.dqb_valid) {
+ qctl_20->qc_valid = QC_MDTIDX;
+ qctl_20->qc_dqblk.dqb_valid = 0;
+ } else if (qctl_18->obd_uuid.uuid[0] != '\0') {
+ qctl_20->qc_valid = QC_UUID;
+ qctl_20->obd_uuid = qctl_18->obd_uuid;
+ } else {
+ qctl_20->qc_valid = QC_GENERAL;
+ }
+
+ rc = quotactl_ioctl(sbi, qctl_20);
+
+ if (rc == 0) {
+ QCTL_COPY(qctl_18, qctl_20);
+ qctl_18->obd_uuid = qctl_20->obd_uuid;
+
+ if (copy_to_user((void *)arg, qctl_18,
+ sizeof(*qctl_18)))
+ rc = -EFAULT;
+ }
+
+ out_quotactl_20:
+ OBD_FREE_PTR(qctl_20);
+ out_quotactl_18:
+ OBD_FREE_PTR(qctl_18);
+ RETURN(rc);
+ }
+#else
+#warning "remove old LL_IOC_QUOTACTL_18 compatibility code"
+#endif /* LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0) */
+ case LL_IOC_QUOTACTL: {
+ struct if_quotactl *qctl;
+
+ OBD_ALLOC_PTR(qctl);
+ if (!qctl)
+ RETURN(-ENOMEM);
+
+ if (copy_from_user(qctl, (void *)arg, sizeof(*qctl)))
+ GOTO(out_quotactl, rc = -EFAULT);
+
+ rc = quotactl_ioctl(sbi, qctl);
+
+ if (rc == 0 && copy_to_user((void *)arg,qctl,sizeof(*qctl)))
+ rc = -EFAULT;
+
+ out_quotactl:
+ OBD_FREE_PTR(qctl);
+ RETURN(rc);
+ }
+ case OBD_IOC_GETDTNAME:
+ case OBD_IOC_GETMDNAME:
+ RETURN(ll_get_obd_name(inode, cmd, arg));
+ case LL_IOC_FLUSHCTX:
+ RETURN(ll_flush_ctx(inode));
+#ifdef CONFIG_FS_POSIX_ACL
+ case LL_IOC_RMTACL: {
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
+ inode == inode->i_sb->s_root->d_inode) {
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+
+ LASSERT(fd != NULL);
+ rc = rct_add(&sbi->ll_rct, current_pid(), arg);
+ if (!rc)
+ fd->fd_flags |= LL_FILE_RMTACL;
+ RETURN(rc);
+ } else
+ RETURN(0);
+ }
+#endif
+ case LL_IOC_GETOBDCOUNT: {
+ int count, vallen;
+ struct obd_export *exp;
+
+ if (copy_from_user(&count, (int *)arg, sizeof(int)))
+ RETURN(-EFAULT);
+
+ /* get ost count when count is zero, get mdt count otherwise */
+ exp = count ? sbi->ll_md_exp : sbi->ll_dt_exp;
+ vallen = sizeof(count);
+ rc = obd_get_info(NULL, exp, sizeof(KEY_TGT_COUNT),
+ KEY_TGT_COUNT, &vallen, &count, NULL);
+ if (rc) {
+ CERROR("get target count failed: %d\n", rc);
+ RETURN(rc);
+ }
+
+ if (copy_to_user((int *)arg, &count, sizeof(int)))
+ RETURN(-EFAULT);
+
+ RETURN(0);
+ }
+ case LL_IOC_PATH2FID:
+ if (copy_to_user((void *)arg, ll_inode2fid(inode),
+ sizeof(struct lu_fid)))
+ RETURN(-EFAULT);
+ RETURN(0);
+ case LL_IOC_GET_CONNECT_FLAGS: {
+ RETURN(obd_iocontrol(cmd, sbi->ll_md_exp, 0, NULL, (void*)arg));
+ }
+ case OBD_IOC_CHANGELOG_SEND:
+ case OBD_IOC_CHANGELOG_CLEAR:
+ rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void *)arg,
+ sizeof(struct ioc_changelog));
+ RETURN(rc);
+ case OBD_IOC_FID2PATH:
+ RETURN(ll_fid2path(inode, (void *)arg));
+ case LL_IOC_HSM_REQUEST: {
+ struct hsm_user_request *hur;
+ int totalsize;
+
+ OBD_ALLOC_PTR(hur);
+ if (hur == NULL)
+ RETURN(-ENOMEM);
+
+ /* We don't know the true size yet; copy the fixed-size part */
+ if (copy_from_user(hur, (void *)arg, sizeof(*hur))) {
+ OBD_FREE_PTR(hur);
+ RETURN(-EFAULT);
+ }
+
+ /* Compute the whole struct size */
+ totalsize = hur_len(hur);
+ OBD_FREE_PTR(hur);
+ OBD_ALLOC_LARGE(hur, totalsize);
+ if (hur == NULL)
+ RETURN(-ENOMEM);
+
+ /* Copy the whole struct */
+ if (copy_from_user(hur, (void *)arg, totalsize)) {
+ OBD_FREE_LARGE(hur, totalsize);
+ RETURN(-EFAULT);
+ }
+
+ rc = obd_iocontrol(cmd, ll_i2mdexp(inode), totalsize,
+ hur, NULL);
+
+ OBD_FREE_LARGE(hur, totalsize);
+
+ RETURN(rc);
+ }
+ case LL_IOC_HSM_PROGRESS: {
+ struct hsm_progress_kernel hpk;
+ struct hsm_progress hp;
+
+ if (copy_from_user(&hp, (void *)arg, sizeof(hp)))
+ RETURN(-EFAULT);
+
+ hpk.hpk_fid = hp.hp_fid;
+ hpk.hpk_cookie = hp.hp_cookie;
+ hpk.hpk_extent = hp.hp_extent;
+ hpk.hpk_flags = hp.hp_flags;
+ hpk.hpk_errval = hp.hp_errval;
+ hpk.hpk_data_version = 0;
+
+ /* File may not exist in Lustre; all progress
+ * reported to Lustre root */
+ rc = obd_iocontrol(cmd, sbi->ll_md_exp, sizeof(hpk), &hpk,
+ NULL);
+ RETURN(rc);
+ }
+ case LL_IOC_HSM_CT_START:
+ rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void *)arg,
+ sizeof(struct lustre_kernelcomm));
+ RETURN(rc);
+
+ case LL_IOC_HSM_COPY_START: {
+ struct hsm_copy *copy;
+ int rc;
+
+ OBD_ALLOC_PTR(copy);
+ if (copy == NULL)
+ RETURN(-ENOMEM);
+ if (copy_from_user(copy, (char *)arg, sizeof(*copy))) {
+ OBD_FREE_PTR(copy);
+ RETURN(-EFAULT);
+ }
+
+ rc = ll_ioc_copy_start(inode->i_sb, copy);
+ if (copy_to_user((char *)arg, copy, sizeof(*copy)))
+ rc = -EFAULT;
+
+ OBD_FREE_PTR(copy);
+ RETURN(rc);
+ }
+ case LL_IOC_HSM_COPY_END: {
+ struct hsm_copy *copy;
+ int rc;
+
+ OBD_ALLOC_PTR(copy);
+ if (copy == NULL)
+ RETURN(-ENOMEM);
+ if (copy_from_user(copy, (char *)arg, sizeof(*copy))) {
+ OBD_FREE_PTR(copy);
+ RETURN(-EFAULT);
+ }
+
+ rc = ll_ioc_copy_end(inode->i_sb, copy);
+ if (copy_to_user((char *)arg, copy, sizeof(*copy)))
+ rc = -EFAULT;
+
+ OBD_FREE_PTR(copy);
+ RETURN(rc);
+ }
+ default:
+ RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp, 0, NULL,
+ (void *)arg));
+ }
+}
+
+static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ int api32 = ll_need_32bit_api(sbi);
+ loff_t ret = -EINVAL;
+ ENTRY;
+
+ mutex_lock(&inode->i_mutex);
+ switch (origin) {
+ case SEEK_SET:
+ break;
+ case SEEK_CUR:
+ offset += file->f_pos;
+ break;
+ case SEEK_END:
+ if (offset > 0)
+ GOTO(out, ret);
+ if (api32)
+ offset += LL_DIR_END_OFF_32BIT;
+ else
+ offset += LL_DIR_END_OFF;
+ break;
+ default:
+ GOTO(out, ret);
+ }
+
+ if (offset >= 0 &&
+ ((api32 && offset <= LL_DIR_END_OFF_32BIT) ||
+ (!api32 && offset <= LL_DIR_END_OFF))) {
+ if (offset != file->f_pos) {
+ if ((api32 && offset == LL_DIR_END_OFF_32BIT) ||
+ (!api32 && offset == LL_DIR_END_OFF))
+ fd->lfd_pos = MDS_DIR_END_OFF;
+ else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH)
+ fd->lfd_pos = offset << 32;
+ else
+ fd->lfd_pos = offset;
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+ ret = offset;
+ }
+ GOTO(out, ret);
+
+out:
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}
+
+int ll_dir_open(struct inode *inode, struct file *file)
+{
+ ENTRY;
+ RETURN(ll_file_open(inode, file));
+}
+
+int ll_dir_release(struct inode *inode, struct file *file)
+{
+ ENTRY;
+ RETURN(ll_file_release(inode, file));
+}
+
+struct file_operations ll_dir_operations = {
+ .llseek = ll_dir_seek,
+ .open = ll_dir_open,
+ .release = ll_dir_release,
+ .read = generic_read_dir,
+ .readdir = ll_readdir,
+ .unlocked_ioctl = ll_dir_ioctl,
+ .fsync = ll_fsync,
+};
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
new file mode 100644
index 000000000000..ed1e3f7b4e58
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -0,0 +1,3198 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/llite/file.c
+ *
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+#include <lustre_dlm.h>
+#include <lustre_lite.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include "llite_internal.h"
+#include <lustre/ll_fiemap.h>
+
+#include "cl_object.h"
+
+struct ll_file_data *ll_file_data_get(void)
+{
+ struct ll_file_data *fd;
+
+ OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
+ fd->fd_write_failed = false;
+ return fd;
+}
+
+static void ll_file_data_put(struct ll_file_data *fd)
+{
+ if (fd != NULL)
+ OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
+}
+
+void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
+ struct lustre_handle *fh)
+{
+ op_data->op_fid1 = ll_i2info(inode)->lli_fid;
+ op_data->op_attr.ia_mode = inode->i_mode;
+ op_data->op_attr.ia_atime = inode->i_atime;
+ op_data->op_attr.ia_mtime = inode->i_mtime;
+ op_data->op_attr.ia_ctime = inode->i_ctime;
+ op_data->op_attr.ia_size = i_size_read(inode);
+ op_data->op_attr_blocks = inode->i_blocks;
+ ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
+ ll_inode_to_ext_flags(inode->i_flags);
+ op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
+ if (fh)
+ op_data->op_handle = *fh;
+ op_data->op_capa1 = ll_mdscapa_get(inode);
+
+ if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
+ op_data->op_bias |= MDS_DATA_MODIFIED;
+}
+
+/**
+ * Closes the IO epoch and packs all the attributes into @op_data for
+ * the CLOSE rpc.
+ */
+static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
+ struct obd_client_handle *och)
+{
+ ENTRY;
+
+ op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
+ ATTR_MTIME | ATTR_MTIME_SET |
+ ATTR_CTIME | ATTR_CTIME_SET;
+
+ if (!(och->och_flags & FMODE_WRITE))
+ goto out;
+
+ if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
+ op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
+ else
+ ll_ioepoch_close(inode, op_data, &och, 0);
+
+out:
+ ll_pack_inode2opdata(inode, op_data, &och->och_fh);
+ ll_prep_md_op_data(op_data, inode, NULL, NULL,
+ 0, 0, LUSTRE_OPC_ANY, NULL);
+ EXIT;
+}
+
+static int ll_close_inode_openhandle(struct obd_export *md_exp,
+ struct inode *inode,
+ struct obd_client_handle *och)
+{
+ struct obd_export *exp = ll_i2mdexp(inode);
+ struct md_op_data *op_data;
+ struct ptlrpc_request *req = NULL;
+ struct obd_device *obd = class_exp2obd(exp);
+ int epoch_close = 1;
+ int rc;
+ ENTRY;
+
+ if (obd == NULL) {
+ /*
+ * XXX: in case of LMV, is this correct to access
+ * ->exp_handle?
+ */
+ CERROR("Invalid MDC connection handle "LPX64"\n",
+ ll_i2mdexp(inode)->exp_handle.h_cookie);
+ GOTO(out, rc = 0);
+ }
+
+ OBD_ALLOC_PTR(op_data);
+ if (op_data == NULL)
+ GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
+
+ ll_prepare_close(inode, op_data, och);
+ epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
+ rc = md_close(md_exp, op_data, och->och_mod, &req);
+ if (rc == -EAGAIN) {
+ /* This close must have the epoch closed. */
+ LASSERT(epoch_close);
+ /* MDS has instructed us to obtain Size-on-MDS attribute from
+ * OSTs and send setattr to back to MDS. */
+ rc = ll_som_update(inode, op_data);
+ if (rc) {
+ CERROR("inode %lu mdc Size-on-MDS update failed: "
+ "rc = %d\n", inode->i_ino, rc);
+ rc = 0;
+ }
+ } else if (rc) {
+ CERROR("inode %lu mdc close failed: rc = %d\n",
+ inode->i_ino, rc);
+ }
+
+ /* DATA_MODIFIED flag was successfully sent on close, cancel data
+ * modification flag. */
+ if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ spin_lock(&lli->lli_lock);
+ lli->lli_flags &= ~LLIF_DATA_MODIFIED;
+ spin_unlock(&lli->lli_lock);
+ }
+
+ ll_finish_md_op_data(op_data);
+
+ if (rc == 0) {
+ rc = ll_objects_destroy(req, inode);
+ if (rc)
+ CERROR("inode %lu ll_objects destroy: rc = %d\n",
+ inode->i_ino, rc);
+ }
+
+ EXIT;
+out:
+
+ if (exp_connect_som(exp) && !epoch_close &&
+ S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
+ ll_queue_done_writing(inode, LLIF_DONE_WRITING);
+ } else {
+ md_clear_open_replay_data(md_exp, och);
+ /* Free @och if it is not waiting for DONE_WRITING. */
+ och->och_fh.cookie = DEAD_HANDLE_MAGIC;
+ OBD_FREE_PTR(och);
+ }
+ if (req) /* This is close request */
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+int ll_md_real_close(struct inode *inode, int flags)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_client_handle **och_p;
+ struct obd_client_handle *och;
+ __u64 *och_usecount;
+ int rc = 0;
+ ENTRY;
+
+ if (flags & FMODE_WRITE) {
+ och_p = &lli->lli_mds_write_och;
+ och_usecount = &lli->lli_open_fd_write_count;
+ } else if (flags & FMODE_EXEC) {
+ och_p = &lli->lli_mds_exec_och;
+ och_usecount = &lli->lli_open_fd_exec_count;
+ } else {
+ LASSERT(flags & FMODE_READ);
+ och_p = &lli->lli_mds_read_och;
+ och_usecount = &lli->lli_open_fd_read_count;
+ }
+
+ mutex_lock(&lli->lli_och_mutex);
+ if (*och_usecount) { /* There are still users of this handle, so
+ skip freeing it. */
+ mutex_unlock(&lli->lli_och_mutex);
+ RETURN(0);
+ }
+ och=*och_p;
+ *och_p = NULL;
+ mutex_unlock(&lli->lli_och_mutex);
+
+ if (och) { /* There might be a race and somebody have freed this och
+ already */
+ rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
+ inode, och);
+ }
+
+ RETURN(rc);
+}
+
+int ll_md_close(struct obd_export *md_exp, struct inode *inode,
+ struct file *file)
+{
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int rc = 0;
+ ENTRY;
+
+ /* clear group lock, if present */
+ if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
+ ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
+
+ /* Let's see if we have good enough OPEN lock on the file and if
+ we can skip talking to MDS */
+ if (file->f_dentry->d_inode) { /* Can this ever be false? */
+ int lockmode;
+ int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
+ struct lustre_handle lockh;
+ struct inode *inode = file->f_dentry->d_inode;
+ ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
+
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_omode & FMODE_WRITE) {
+ lockmode = LCK_CW;
+ LASSERT(lli->lli_open_fd_write_count);
+ lli->lli_open_fd_write_count--;
+ } else if (fd->fd_omode & FMODE_EXEC) {
+ lockmode = LCK_PR;
+ LASSERT(lli->lli_open_fd_exec_count);
+ lli->lli_open_fd_exec_count--;
+ } else {
+ lockmode = LCK_CR;
+ LASSERT(lli->lli_open_fd_read_count);
+ lli->lli_open_fd_read_count--;
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+
+ if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
+ LDLM_IBITS, &policy, lockmode,
+ &lockh)) {
+ rc = ll_md_real_close(file->f_dentry->d_inode,
+ fd->fd_omode);
+ }
+ } else {
+ CERROR("Releasing a file %p with negative dentry %p. Name %s",
+ file, file->f_dentry, file->f_dentry->d_name.name);
+ }
+
+ LUSTRE_FPRIVATE(file) = NULL;
+ ll_file_data_put(fd);
+ ll_capa_close(inode);
+
+ RETURN(rc);
+}
+
+/* While this returns an error code, fput() the caller does not, so we need
+ * to make every effort to clean up all of our state here. Also, applications
+ * rarely check close errors and even if an error is returned they will not
+ * re-try the close call.
+ */
+int ll_file_release(struct inode *inode, struct file *file)
+{
+ struct ll_file_data *fd;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
+ inode->i_generation, inode);
+
+#ifdef CONFIG_FS_POSIX_ACL
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
+ inode == inode->i_sb->s_root->d_inode) {
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+
+ LASSERT(fd != NULL);
+ if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
+ fd->fd_flags &= ~LL_FILE_RMTACL;
+ rct_del(&sbi->ll_rct, current_pid());
+ et_search_free(&sbi->ll_et, current_pid());
+ }
+ }
+#endif
+
+ if (inode->i_sb->s_root != file->f_dentry)
+ ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
+ fd = LUSTRE_FPRIVATE(file);
+ LASSERT(fd != NULL);
+
+ /* The last ref on @file, maybe not the the owner pid of statahead.
+ * Different processes can open the same dir, "ll_opendir_key" means:
+ * it is me that should stop the statahead thread. */
+ if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
+ lli->lli_opendir_pid != 0)
+ ll_stop_statahead(inode, lli->lli_opendir_key);
+
+ if (inode->i_sb->s_root == file->f_dentry) {
+ LUSTRE_FPRIVATE(file) = NULL;
+ ll_file_data_put(fd);
+ RETURN(0);
+ }
+
+ if (!S_ISDIR(inode->i_mode)) {
+ lov_read_and_clear_async_rc(lli->lli_clob);
+ lli->lli_async_rc = 0;
+ }
+
+ rc = ll_md_close(sbi->ll_md_exp, inode, file);
+
+ if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
+ libcfs_debug_dumplog();
+
+ RETURN(rc);
+}
+
+static int ll_intent_file_open(struct file *file, void *lmm,
+ int lmmsize, struct lookup_intent *itp)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
+ struct dentry *parent = file->f_dentry->d_parent;
+ const char *name = file->f_dentry->d_name.name;
+ const int len = file->f_dentry->d_name.len;
+ struct md_op_data *op_data;
+ struct ptlrpc_request *req;
+ __u32 opc = LUSTRE_OPC_ANY;
+ int rc;
+ ENTRY;
+
+ if (!parent)
+ RETURN(-ENOENT);
+
+ /* Usually we come here only for NFSD, and we want open lock.
+ But we can also get here with pre 2.6.15 patchless kernels, and in
+ that case that lock is also ok */
+ /* We can also get here if there was cached open handle in revalidate_it
+ * but it disappeared while we were getting from there to ll_file_open.
+ * But this means this file was closed and immediatelly opened which
+ * makes a good candidate for using OPEN lock */
+ /* If lmmsize & lmm are not 0, we are just setting stripe info
+ * parameters. No need for the open lock */
+ if (lmm == NULL && lmmsize == 0) {
+ itp->it_flags |= MDS_OPEN_LOCK;
+ if (itp->it_flags & FMODE_WRITE)
+ opc = LUSTRE_OPC_CREATE;
+ }
+
+ op_data = ll_prep_md_op_data(NULL, parent->d_inode,
+ file->f_dentry->d_inode, name, len,
+ O_RDWR, opc, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ itp->it_flags |= MDS_OPEN_BY_FID;
+ rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
+ 0 /*unused */, &req, ll_md_blocking_ast, 0);
+ ll_finish_md_op_data(op_data);
+ if (rc == -ESTALE) {
+ /* reason for keep own exit path - don`t flood log
+ * with messages with -ESTALE errors.
+ */
+ if (!it_disposition(itp, DISP_OPEN_OPEN) ||
+ it_open_error(DISP_OPEN_OPEN, itp))
+ GOTO(out, rc);
+ ll_release_openhandle(file->f_dentry, itp);
+ GOTO(out, rc);
+ }
+
+ if (it_disposition(itp, DISP_LOOKUP_NEG))
+ GOTO(out, rc = -ENOENT);
+
+ if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
+ rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
+ CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
+ GOTO(out, rc);
+ }
+
+ rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
+ if (!rc && itp->d.lustre.it_lock_mode)
+ ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
+ itp, NULL);
+
+out:
+ ptlrpc_req_finished(itp->d.lustre.it_data);
+ it_clear_disposition(itp, DISP_ENQ_COMPLETE);
+ ll_intent_drop_lock(itp);
+
+ RETURN(rc);
+}
+
+/**
+ * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
+ * not believe attributes if a few ioepoch holders exist. Attributes for
+ * previous ioepoch if new one is opened are also skipped by MDS.
+ */
+void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
+{
+ if (ioepoch && lli->lli_ioepoch != ioepoch) {
+ lli->lli_ioepoch = ioepoch;
+ CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
+ ioepoch, PFID(&lli->lli_fid));
+ }
+}
+
+static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
+ struct lookup_intent *it, struct obd_client_handle *och)
+{
+ struct ptlrpc_request *req = it->d.lustre.it_data;
+ struct mdt_body *body;
+
+ LASSERT(och);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ LASSERT(body != NULL); /* reply already checked out */
+
+ memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
+ och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
+ och->och_fid = lli->lli_fid;
+ och->och_flags = it->it_flags;
+ ll_ioepoch_open(lli, body->ioepoch);
+
+ return md_set_open_replay_data(md_exp, och, req);
+}
+
+int ll_local_open(struct file *file, struct lookup_intent *it,
+ struct ll_file_data *fd, struct obd_client_handle *och)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ ENTRY;
+
+ LASSERT(!LUSTRE_FPRIVATE(file));
+
+ LASSERT(fd != NULL);
+
+ if (och) {
+ struct ptlrpc_request *req = it->d.lustre.it_data;
+ struct mdt_body *body;
+ int rc;
+
+ rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
+ if (rc)
+ RETURN(rc);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ if ((it->it_flags & FMODE_WRITE) &&
+ (body->valid & OBD_MD_FLSIZE))
+ CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
+ lli->lli_ioepoch, PFID(&lli->lli_fid));
+ }
+
+ LUSTRE_FPRIVATE(file) = fd;
+ ll_readahead_init(inode, &fd->fd_ras);
+ fd->fd_omode = it->it_flags;
+ RETURN(0);
+}
+
+/* Open a file, and (for the very first open) create objects on the OSTs at
+ * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
+ * creation or open until ll_lov_setstripe() ioctl is called.
+ *
+ * If we already have the stripe MD locally then we don't request it in
+ * md_open(), by passing a lmm_size = 0.
+ *
+ * It is up to the application to ensure no other processes open this file
+ * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
+ * used. We might be able to avoid races of that sort by getting lli_open_sem
+ * before returning in the O_LOV_DELAY_CREATE case and dropping it here
+ * or in ll_file_release(), but I'm not sure that is desirable/necessary.
+ */
+int ll_file_open(struct inode *inode, struct file *file)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct lookup_intent *it, oit = { .it_op = IT_OPEN,
+ .it_flags = file->f_flags };
+ struct obd_client_handle **och_p = NULL;
+ __u64 *och_usecount = NULL;
+ struct ll_file_data *fd;
+ int rc = 0, opendir_set = 0;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
+ inode->i_generation, inode, file->f_flags);
+
+ it = file->private_data; /* XXX: compat macro */
+ file->private_data = NULL; /* prevent ll_local_open assertion */
+
+ fd = ll_file_data_get();
+ if (fd == NULL)
+ GOTO(out_och_free, rc = -ENOMEM);
+
+ fd->fd_file = file;
+ if (S_ISDIR(inode->i_mode)) {
+ spin_lock(&lli->lli_sa_lock);
+ if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
+ lli->lli_opendir_pid == 0) {
+ lli->lli_opendir_key = fd;
+ lli->lli_opendir_pid = current_pid();
+ opendir_set = 1;
+ }
+ spin_unlock(&lli->lli_sa_lock);
+ }
+
+ if (inode->i_sb->s_root == file->f_dentry) {
+ LUSTRE_FPRIVATE(file) = fd;
+ RETURN(0);
+ }
+
+ if (!it || !it->d.lustre.it_disposition) {
+ /* Convert f_flags into access mode. We cannot use file->f_mode,
+ * because everything but O_ACCMODE mask was stripped from
+ * there */
+ if ((oit.it_flags + 1) & O_ACCMODE)
+ oit.it_flags++;
+ if (file->f_flags & O_TRUNC)
+ oit.it_flags |= FMODE_WRITE;
+
+ /* kernel only call f_op->open in dentry_open. filp_open calls
+ * dentry_open after call to open_namei that checks permissions.
+ * Only nfsd_open call dentry_open directly without checking
+ * permissions and because of that this code below is safe. */
+ if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
+ oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
+
+ /* We do not want O_EXCL here, presumably we opened the file
+ * already? XXX - NFS implications? */
+ oit.it_flags &= ~O_EXCL;
+
+ /* bug20584, if "it_flags" contains O_CREAT, the file will be
+ * created if necessary, then "IT_CREAT" should be set to keep
+ * consistent with it */
+ if (oit.it_flags & O_CREAT)
+ oit.it_op |= IT_CREAT;
+
+ it = &oit;
+ }
+
+restart:
+ /* Let's see if we have file open on MDS already. */
+ if (it->it_flags & FMODE_WRITE) {
+ och_p = &lli->lli_mds_write_och;
+ och_usecount = &lli->lli_open_fd_write_count;
+ } else if (it->it_flags & FMODE_EXEC) {
+ och_p = &lli->lli_mds_exec_och;
+ och_usecount = &lli->lli_open_fd_exec_count;
+ } else {
+ och_p = &lli->lli_mds_read_och;
+ och_usecount = &lli->lli_open_fd_read_count;
+ }
+
+ mutex_lock(&lli->lli_och_mutex);
+ if (*och_p) { /* Open handle is present */
+ if (it_disposition(it, DISP_OPEN_OPEN)) {
+ /* Well, there's extra open request that we do not need,
+ let's close it somehow. This will decref request. */
+ rc = it_open_error(DISP_OPEN_OPEN, it);
+ if (rc) {
+ mutex_unlock(&lli->lli_och_mutex);
+ GOTO(out_openerr, rc);
+ }
+
+ ll_release_openhandle(file->f_dentry, it);
+ }
+ (*och_usecount)++;
+
+ rc = ll_local_open(file, it, fd, NULL);
+ if (rc) {
+ (*och_usecount)--;
+ mutex_unlock(&lli->lli_och_mutex);
+ GOTO(out_openerr, rc);
+ }
+ } else {
+ LASSERT(*och_usecount == 0);
+ if (!it->d.lustre.it_disposition) {
+ /* We cannot just request lock handle now, new ELC code
+ means that one of other OPEN locks for this file
+ could be cancelled, and since blocking ast handler
+ would attempt to grab och_mutex as well, that would
+ result in a deadlock */
+ mutex_unlock(&lli->lli_och_mutex);
+ it->it_create_mode |= M_CHECK_STALE;
+ rc = ll_intent_file_open(file, NULL, 0, it);
+ it->it_create_mode &= ~M_CHECK_STALE;
+ if (rc)
+ GOTO(out_openerr, rc);
+
+ goto restart;
+ }
+ OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
+ if (!*och_p)
+ GOTO(out_och_free, rc = -ENOMEM);
+
+ (*och_usecount)++;
+
+ /* md_intent_lock() didn't get a request ref if there was an
+ * open error, so don't do cleanup on the request here
+ * (bug 3430) */
+ /* XXX (green): Should not we bail out on any error here, not
+ * just open error? */
+ rc = it_open_error(DISP_OPEN_OPEN, it);
+ if (rc)
+ GOTO(out_och_free, rc);
+
+ LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
+
+ rc = ll_local_open(file, it, fd, *och_p);
+ if (rc)
+ GOTO(out_och_free, rc);
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+ fd = NULL;
+
+ /* Must do this outside lli_och_mutex lock to prevent deadlock where
+ different kind of OPEN lock for this same inode gets cancelled
+ by ldlm_cancel_lru */
+ if (!S_ISREG(inode->i_mode))
+ GOTO(out_och_free, rc);
+
+ ll_capa_open(inode);
+
+ if (!lli->lli_has_smd) {
+ if (file->f_flags & O_LOV_DELAY_CREATE ||
+ !(file->f_mode & FMODE_WRITE)) {
+ CDEBUG(D_INODE, "object creation was delayed\n");
+ GOTO(out_och_free, rc);
+ }
+ }
+ file->f_flags &= ~O_LOV_DELAY_CREATE;
+ GOTO(out_och_free, rc);
+
+out_och_free:
+ if (rc) {
+ if (och_p && *och_p) {
+ OBD_FREE(*och_p, sizeof (struct obd_client_handle));
+ *och_p = NULL; /* OBD_FREE writes some magic there */
+ (*och_usecount)--;
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+
+out_openerr:
+ if (opendir_set != 0)
+ ll_stop_statahead(inode, lli->lli_opendir_key);
+ if (fd != NULL)
+ ll_file_data_put(fd);
+ } else {
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
+ }
+
+ if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
+ ptlrpc_req_finished(it->d.lustre.it_data);
+ it_clear_disposition(it, DISP_ENQ_OPEN_REF);
+ }
+
+ return rc;
+}
+
+/* Fills the obdo with the attributes for the lsm */
+static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
+ struct obd_capa *capa, struct obdo *obdo,
+ __u64 ioepoch, int sync)
+{
+ struct ptlrpc_request_set *set;
+ struct obd_info oinfo = { { { 0 } } };
+ int rc;
+
+ ENTRY;
+
+ LASSERT(lsm != NULL);
+
+ oinfo.oi_md = lsm;
+ oinfo.oi_oa = obdo;
+ oinfo.oi_oa->o_oi = lsm->lsm_oi;
+ oinfo.oi_oa->o_mode = S_IFREG;
+ oinfo.oi_oa->o_ioepoch = ioepoch;
+ oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
+ OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+ OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+ OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
+ OBD_MD_FLDATAVERSION;
+ oinfo.oi_capa = capa;
+ if (sync) {
+ oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
+ oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
+ }
+
+ set = ptlrpc_prep_set();
+ if (set == NULL) {
+ CERROR("can't allocate ptlrpc set\n");
+ rc = -ENOMEM;
+ } else {
+ rc = obd_getattr_async(exp, &oinfo, set);
+ if (rc == 0)
+ rc = ptlrpc_set_wait(set);
+ ptlrpc_set_destroy(set);
+ }
+ if (rc == 0)
+ oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
+ OBD_MD_FLATIME | OBD_MD_FLMTIME |
+ OBD_MD_FLCTIME | OBD_MD_FLSIZE |
+ OBD_MD_FLDATAVERSION);
+ RETURN(rc);
+}
+
+/**
+ * Performs the getattr on the inode and updates its fields.
+ * If @sync != 0, perform the getattr under the server-side lock.
+ */
+int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
+ __u64 ioepoch, int sync)
+{
+ struct obd_capa *capa = ll_mdscapa_get(inode);
+ struct lov_stripe_md *lsm;
+ int rc;
+ ENTRY;
+
+ lsm = ccc_inode_lsm_get(inode);
+ rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
+ capa, obdo, ioepoch, sync);
+ capa_put(capa);
+ if (rc == 0) {
+ struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
+
+ obdo_refresh_inode(inode, obdo, obdo->o_valid);
+ CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
+ " blksize %lu\n", POSTID(oi), i_size_read(inode),
+ (unsigned long long)inode->i_blocks,
+ (unsigned long)ll_inode_blksize(inode));
+ }
+ ccc_inode_lsm_put(inode, lsm);
+ RETURN(rc);
+}
+
+int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct cl_object *obj = lli->lli_clob;
+ struct cl_attr *attr = ccc_env_thread_attr(env);
+ struct ost_lvb lvb;
+ int rc = 0;
+
+ ENTRY;
+
+ ll_inode_size_lock(inode);
+ /* merge timestamps the most recently obtained from mds with
+ timestamps obtained from osts */
+ LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
+ LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
+ LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
+ inode_init_lvb(inode, &lvb);
+
+ cl_object_attr_lock(obj);
+ rc = cl_object_attr_get(env, obj, attr);
+ cl_object_attr_unlock(obj);
+
+ if (rc == 0) {
+ if (lvb.lvb_atime < attr->cat_atime)
+ lvb.lvb_atime = attr->cat_atime;
+ if (lvb.lvb_ctime < attr->cat_ctime)
+ lvb.lvb_ctime = attr->cat_ctime;
+ if (lvb.lvb_mtime < attr->cat_mtime)
+ lvb.lvb_mtime = attr->cat_mtime;
+
+ CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
+ PFID(&lli->lli_fid), attr->cat_size);
+ cl_isize_write_nolock(inode, attr->cat_size);
+
+ inode->i_blocks = attr->cat_blocks;
+
+ LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
+ LTIME_S(inode->i_atime) = lvb.lvb_atime;
+ LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
+ }
+ ll_inode_size_unlock(inode);
+
+ RETURN(rc);
+}
+
+int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
+ lstat_t *st)
+{
+ struct obdo obdo = { 0 };
+ int rc;
+
+ rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
+ if (rc == 0) {
+ st->st_size = obdo.o_size;
+ st->st_blocks = obdo.o_blocks;
+ st->st_mtime = obdo.o_mtime;
+ st->st_atime = obdo.o_atime;
+ st->st_ctime = obdo.o_ctime;
+ }
+ return rc;
+}
+
+void ll_io_init(struct cl_io *io, const struct file *file, int write)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+
+ io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
+ if (write) {
+ io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
+ io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
+ file->f_flags & O_DIRECT ||
+ IS_SYNC(inode);
+ }
+ io->ci_obj = ll_i2info(inode)->lli_clob;
+ io->ci_lockreq = CILR_MAYBE;
+ if (ll_file_nolock(file)) {
+ io->ci_lockreq = CILR_NEVER;
+ io->ci_no_srvlock = 1;
+ } else if (file->f_flags & O_APPEND) {
+ io->ci_lockreq = CILR_MANDATORY;
+ }
+}
+
+static ssize_t
+ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
+ struct file *file, enum cl_io_type iot,
+ loff_t *ppos, size_t count)
+{
+ struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct cl_io *io;
+ ssize_t result;
+ ENTRY;
+
+restart:
+ io = ccc_env_thread_io(env);
+ ll_io_init(io, file, iot == CIT_WRITE);
+
+ if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
+ struct vvp_io *vio = vvp_env_io(env);
+ struct ccc_io *cio = ccc_env_io(env);
+ int write_mutex_locked = 0;
+
+ cio->cui_fd = LUSTRE_FPRIVATE(file);
+ vio->cui_io_subtype = args->via_io_subtype;
+
+ switch (vio->cui_io_subtype) {
+ case IO_NORMAL:
+ cio->cui_iov = args->u.normal.via_iov;
+ cio->cui_nrsegs = args->u.normal.via_nrsegs;
+ cio->cui_tot_nrsegs = cio->cui_nrsegs;
+ cio->cui_iocb = args->u.normal.via_iocb;
+ if ((iot == CIT_WRITE) &&
+ !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+ if (mutex_lock_interruptible(&lli->
+ lli_write_mutex))
+ GOTO(out, result = -ERESTARTSYS);
+ write_mutex_locked = 1;
+ } else if (iot == CIT_READ) {
+ down_read(&lli->lli_trunc_sem);
+ }
+ break;
+ case IO_SENDFILE:
+ vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
+ vio->u.sendfile.cui_target = args->u.sendfile.via_target;
+ break;
+ case IO_SPLICE:
+ vio->u.splice.cui_pipe = args->u.splice.via_pipe;
+ vio->u.splice.cui_flags = args->u.splice.via_flags;
+ break;
+ default:
+ CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
+ LBUG();
+ }
+ result = cl_io_loop(env, io);
+ if (write_mutex_locked)
+ mutex_unlock(&lli->lli_write_mutex);
+ else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
+ up_read(&lli->lli_trunc_sem);
+ } else {
+ /* cl_io_rw_init() handled IO */
+ result = io->ci_result;
+ }
+
+ if (io->ci_nob > 0) {
+ result = io->ci_nob;
+ *ppos = io->u.ci_wr.wr.crw_pos;
+ }
+ GOTO(out, result);
+out:
+ cl_io_fini(env, io);
+ /* If any bit been read/written (result != 0), we just return
+ * short read/write instead of restart io. */
+ if (result == 0 && io->ci_need_restart) {
+ CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
+ iot == CIT_READ ? "read" : "write",
+ file->f_dentry->d_name.name, *ppos, count);
+ LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
+ goto restart;
+ }
+
+ if (iot == CIT_READ) {
+ if (result >= 0)
+ ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
+ LPROC_LL_READ_BYTES, result);
+ } else if (iot == CIT_WRITE) {
+ if (result >= 0) {
+ ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
+ LPROC_LL_WRITE_BYTES, result);
+ fd->fd_write_failed = false;
+ } else if (result != -ERESTARTSYS) {
+ fd->fd_write_failed = true;
+ }
+ }
+
+ return result;
+}
+
+
+/*
+ * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
+ */
+static int ll_file_get_iov_count(const struct iovec *iov,
+ unsigned long *nr_segs, size_t *count)
+{
+ size_t cnt = 0;
+ unsigned long seg;
+
+ for (seg = 0; seg < *nr_segs; seg++) {
+ const struct iovec *iv = &iov[seg];
+
+ /*
+ * If any segment has a negative length, or the cumulative
+ * length ever wraps negative then return -EINVAL.
+ */
+ cnt += iv->iov_len;
+ if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
+ return -EINVAL;
+ if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
+ continue;
+ if (seg == 0)
+ return -EFAULT;
+ *nr_segs = seg;
+ cnt -= iv->iov_len; /* This segment is no good */
+ break;
+ }
+ *count = cnt;
+ return 0;
+}
+
+static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct lu_env *env;
+ struct vvp_io_args *args;
+ size_t count;
+ ssize_t result;
+ int refcheck;
+ ENTRY;
+
+ result = ll_file_get_iov_count(iov, &nr_segs, &count);
+ if (result)
+ RETURN(result);
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ args = vvp_env_args(env, IO_NORMAL);
+ args->u.normal.via_iov = (struct iovec *)iov;
+ args->u.normal.via_nrsegs = nr_segs;
+ args->u.normal.via_iocb = iocb;
+
+ result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
+ &iocb->ki_pos, count);
+ cl_env_put(env, &refcheck);
+ RETURN(result);
+}
+
+static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
+ loff_t *ppos)
+{
+ struct lu_env *env;
+ struct iovec *local_iov;
+ struct kiocb *kiocb;
+ ssize_t result;
+ int refcheck;
+ ENTRY;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ local_iov = &vvp_env_info(env)->vti_local_iov;
+ kiocb = &vvp_env_info(env)->vti_kiocb;
+ local_iov->iov_base = (void __user *)buf;
+ local_iov->iov_len = count;
+ init_sync_kiocb(kiocb, file);
+ kiocb->ki_pos = *ppos;
+ kiocb->ki_left = count;
+
+ result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
+ *ppos = kiocb->ki_pos;
+
+ cl_env_put(env, &refcheck);
+ RETURN(result);
+}
+
+/*
+ * Write to a file (through the page cache).
+ */
+static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct lu_env *env;
+ struct vvp_io_args *args;
+ size_t count;
+ ssize_t result;
+ int refcheck;
+ ENTRY;
+
+ result = ll_file_get_iov_count(iov, &nr_segs, &count);
+ if (result)
+ RETURN(result);
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ args = vvp_env_args(env, IO_NORMAL);
+ args->u.normal.via_iov = (struct iovec *)iov;
+ args->u.normal.via_nrsegs = nr_segs;
+ args->u.normal.via_iocb = iocb;
+
+ result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
+ &iocb->ki_pos, count);
+ cl_env_put(env, &refcheck);
+ RETURN(result);
+}
+
+static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
+ loff_t *ppos)
+{
+ struct lu_env *env;
+ struct iovec *local_iov;
+ struct kiocb *kiocb;
+ ssize_t result;
+ int refcheck;
+ ENTRY;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ local_iov = &vvp_env_info(env)->vti_local_iov;
+ kiocb = &vvp_env_info(env)->vti_kiocb;
+ local_iov->iov_base = (void __user *)buf;
+ local_iov->iov_len = count;
+ init_sync_kiocb(kiocb, file);
+ kiocb->ki_pos = *ppos;
+ kiocb->ki_left = count;
+
+ result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
+ *ppos = kiocb->ki_pos;
+
+ cl_env_put(env, &refcheck);
+ RETURN(result);
+}
+
+
+
+/*
+ * Send file content (through pagecache) somewhere with helper
+ */
+static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t count,
+ unsigned int flags)
+{
+ struct lu_env *env;
+ struct vvp_io_args *args;
+ ssize_t result;
+ int refcheck;
+ ENTRY;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ args = vvp_env_args(env, IO_SPLICE);
+ args->u.splice.via_pipe = pipe;
+ args->u.splice.via_flags = flags;
+
+ result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
+ cl_env_put(env, &refcheck);
+ RETURN(result);
+}
+
+static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
+ obd_count ost_idx)
+{
+ struct obd_export *exp = ll_i2dtexp(inode);
+ struct obd_trans_info oti = { 0 };
+ struct obdo *oa = NULL;
+ int lsm_size;
+ int rc = 0;
+ struct lov_stripe_md *lsm = NULL, *lsm2;
+ ENTRY;
+
+ OBDO_ALLOC(oa);
+ if (oa == NULL)
+ RETURN(-ENOMEM);
+
+ lsm = ccc_inode_lsm_get(inode);
+ if (lsm == NULL)
+ GOTO(out, rc = -ENOENT);
+
+ lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
+ (lsm->lsm_stripe_count));
+
+ OBD_ALLOC_LARGE(lsm2, lsm_size);
+ if (lsm2 == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ oa->o_oi = *oi;
+ oa->o_nlink = ost_idx;
+ oa->o_flags |= OBD_FL_RECREATE_OBJS;
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
+ obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+ obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
+ memcpy(lsm2, lsm, lsm_size);
+ ll_inode_size_lock(inode);
+ rc = obd_create(NULL, exp, oa, &lsm2, &oti);
+ ll_inode_size_unlock(inode);
+
+ OBD_FREE_LARGE(lsm2, lsm_size);
+ GOTO(out, rc);
+out:
+ ccc_inode_lsm_put(inode, lsm);
+ OBDO_FREE(oa);
+ return rc;
+}
+
+static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
+{
+ struct ll_recreate_obj ucreat;
+ struct ost_id oi;
+ ENTRY;
+
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ RETURN(-EPERM);
+
+ if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
+ sizeof(ucreat)))
+ RETURN(-EFAULT);
+
+ ostid_set_seq_mdt0(&oi);
+ ostid_set_id(&oi, ucreat.lrc_id);
+ RETURN(ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx));
+}
+
+static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
+{
+ struct lu_fid fid;
+ struct ost_id oi;
+ obd_count ost_idx;
+ ENTRY;
+
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ RETURN(-EPERM);
+
+ if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
+ RETURN(-EFAULT);
+
+ fid_to_ostid(&fid, &oi);
+ ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
+ RETURN(ll_lov_recreate(inode, &oi, ost_idx));
+}
+
+int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
+ int flags, struct lov_user_md *lum, int lum_size)
+{
+ struct lov_stripe_md *lsm = NULL;
+ struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
+ int rc = 0;
+ ENTRY;
+
+ lsm = ccc_inode_lsm_get(inode);
+ if (lsm != NULL) {
+ ccc_inode_lsm_put(inode, lsm);
+ CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
+ inode->i_ino);
+ RETURN(-EEXIST);
+ }
+
+ ll_inode_size_lock(inode);
+ rc = ll_intent_file_open(file, lum, lum_size, &oit);
+ if (rc)
+ GOTO(out, rc);
+ rc = oit.d.lustre.it_status;
+ if (rc < 0)
+ GOTO(out_req_free, rc);
+
+ ll_release_openhandle(file->f_dentry, &oit);
+
+ out:
+ ll_inode_size_unlock(inode);
+ ll_intent_release(&oit);
+ ccc_inode_lsm_put(inode, lsm);
+ RETURN(rc);
+out_req_free:
+ ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
+ goto out;
+}
+
+int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
+ struct lov_mds_md **lmmp, int *lmm_size,
+ struct ptlrpc_request **request)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct mdt_body *body;
+ struct lov_mds_md *lmm = NULL;
+ struct ptlrpc_request *req = NULL;
+ struct md_op_data *op_data;
+ int rc, lmmsize;
+
+ rc = ll_get_max_mdsize(sbi, &lmmsize);
+ if (rc)
+ RETURN(rc);
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
+ strlen(filename), lmmsize,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
+ rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
+ ll_finish_md_op_data(op_data);
+ if (rc < 0) {
+ CDEBUG(D_INFO, "md_getattr_name failed "
+ "on %s: rc %d\n", filename, rc);
+ GOTO(out, rc);
+ }
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ LASSERT(body != NULL); /* checked by mdc_getattr_name */
+
+ lmmsize = body->eadatasize;
+
+ if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
+ lmmsize == 0) {
+ GOTO(out, rc = -ENODATA);
+ }
+
+ lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
+ LASSERT(lmm != NULL);
+
+ if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
+ (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
+ GOTO(out, rc = -EPROTO);
+ }
+
+ /*
+ * This is coming from the MDS, so is probably in
+ * little endian. We convert it to host endian before
+ * passing it to userspace.
+ */
+ if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
+ /* if function called for directory - we should
+ * avoid swab not existent lsm objects */
+ if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
+ lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
+ if (S_ISREG(body->mode))
+ lustre_swab_lov_user_md_objects(
+ ((struct lov_user_md_v1 *)lmm)->lmm_objects,
+ ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
+ } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
+ lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
+ if (S_ISREG(body->mode))
+ lustre_swab_lov_user_md_objects(
+ ((struct lov_user_md_v3 *)lmm)->lmm_objects,
+ ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
+ }
+ }
+
+out:
+ *lmmp = lmm;
+ *lmm_size = lmmsize;
+ *request = req;
+ return rc;
+}
+
+static int ll_lov_setea(struct inode *inode, struct file *file,
+ unsigned long arg)
+{
+ int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
+ struct lov_user_md *lump;
+ int lum_size = sizeof(struct lov_user_md) +
+ sizeof(struct lov_user_ost_data);
+ int rc;
+ ENTRY;
+
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ RETURN(-EPERM);
+
+ OBD_ALLOC_LARGE(lump, lum_size);
+ if (lump == NULL)
+ RETURN(-ENOMEM);
+
+ if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
+ OBD_FREE_LARGE(lump, lum_size);
+ RETURN(-EFAULT);
+ }
+
+ rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
+
+ OBD_FREE_LARGE(lump, lum_size);
+ RETURN(rc);
+}
+
+static int ll_lov_setstripe(struct inode *inode, struct file *file,
+ unsigned long arg)
+{
+ struct lov_user_md_v3 lumv3;
+ struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
+ struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
+ struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
+ int lum_size, rc;
+ int flags = FMODE_WRITE;
+ ENTRY;
+
+ /* first try with v1 which is smaller than v3 */
+ lum_size = sizeof(struct lov_user_md_v1);
+ if (copy_from_user(lumv1, lumv1p, lum_size))
+ RETURN(-EFAULT);
+
+ if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
+ lum_size = sizeof(struct lov_user_md_v3);
+ if (copy_from_user(&lumv3, lumv3p, lum_size))
+ RETURN(-EFAULT);
+ }
+
+ rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
+ if (rc == 0) {
+ struct lov_stripe_md *lsm;
+ __u32 gen;
+
+ put_user(0, &lumv1p->lmm_stripe_count);
+
+ ll_layout_refresh(inode, &gen);
+ lsm = ccc_inode_lsm_get(inode);
+ rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
+ 0, lsm, (void *)arg);
+ ccc_inode_lsm_put(inode, lsm);
+ }
+ RETURN(rc);
+}
+
+static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
+{
+ struct lov_stripe_md *lsm;
+ int rc = -ENODATA;
+ ENTRY;
+
+ lsm = ccc_inode_lsm_get(inode);
+ if (lsm != NULL)
+ rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
+ lsm, (void *)arg);
+ ccc_inode_lsm_put(inode, lsm);
+ RETURN(rc);
+}
+
+int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct ccc_grouplock grouplock;
+ int rc;
+ ENTRY;
+
+ if (ll_file_nolock(file))
+ RETURN(-EOPNOTSUPP);
+
+ spin_lock(&lli->lli_lock);
+ if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
+ CWARN("group lock already existed with gid %lu\n",
+ fd->fd_grouplock.cg_gid);
+ spin_unlock(&lli->lli_lock);
+ RETURN(-EINVAL);
+ }
+ LASSERT(fd->fd_grouplock.cg_lock == NULL);
+ spin_unlock(&lli->lli_lock);
+
+ rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
+ arg, (file->f_flags & O_NONBLOCK), &grouplock);
+ if (rc)
+ RETURN(rc);
+
+ spin_lock(&lli->lli_lock);
+ if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
+ spin_unlock(&lli->lli_lock);
+ CERROR("another thread just won the race\n");
+ cl_put_grouplock(&grouplock);
+ RETURN(-EINVAL);
+ }
+
+ fd->fd_flags |= LL_FILE_GROUP_LOCKED;
+ fd->fd_grouplock = grouplock;
+ spin_unlock(&lli->lli_lock);
+
+ CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
+ RETURN(0);
+}
+
+int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct ccc_grouplock grouplock;
+ ENTRY;
+
+ spin_lock(&lli->lli_lock);
+ if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+ spin_unlock(&lli->lli_lock);
+ CWARN("no group lock held\n");
+ RETURN(-EINVAL);
+ }
+ LASSERT(fd->fd_grouplock.cg_lock != NULL);
+
+ if (fd->fd_grouplock.cg_gid != arg) {
+ CWARN("group lock %lu doesn't match current id %lu\n",
+ arg, fd->fd_grouplock.cg_gid);
+ spin_unlock(&lli->lli_lock);
+ RETURN(-EINVAL);
+ }
+
+ grouplock = fd->fd_grouplock;
+ memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
+ fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
+ spin_unlock(&lli->lli_lock);
+
+ cl_put_grouplock(&grouplock);
+ CDEBUG(D_INFO, "group lock %lu released\n", arg);
+ RETURN(0);
+}
+
+/**
+ * Close inode open handle
+ *
+ * \param dentry [in] dentry which contains the inode
+ * \param it [in,out] intent which contains open info and result
+ *
+ * \retval 0 success
+ * \retval <0 failure
+ */
+int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
+{
+ struct inode *inode = dentry->d_inode;
+ struct obd_client_handle *och;
+ int rc;
+ ENTRY;
+
+ LASSERT(inode);
+
+ /* Root ? Do nothing. */
+ if (dentry->d_inode->i_sb->s_root == dentry)
+ RETURN(0);
+
+ /* No open handle to close? Move away */
+ if (!it_disposition(it, DISP_OPEN_OPEN))
+ RETURN(0);
+
+ LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
+
+ OBD_ALLOC(och, sizeof(*och));
+ if (!och)
+ GOTO(out, rc = -ENOMEM);
+
+ ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
+ ll_i2info(inode), it, och);
+
+ rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
+ inode, och);
+ out:
+ /* this one is in place of ll_file_open */
+ if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
+ ptlrpc_req_finished(it->d.lustre.it_data);
+ it_clear_disposition(it, DISP_ENQ_OPEN_REF);
+ }
+ RETURN(rc);
+}
+
+/**
+ * Get size for inode for which FIEMAP mapping is requested.
+ * Make the FIEMAP get_info call and returns the result.
+ */
+int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
+ int num_bytes)
+{
+ struct obd_export *exp = ll_i2dtexp(inode);
+ struct lov_stripe_md *lsm = NULL;
+ struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
+ int vallen = num_bytes;
+ int rc;
+ ENTRY;
+
+ /* Checks for fiemap flags */
+ if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
+ fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
+ return -EBADR;
+ }
+
+ /* Check for FIEMAP_FLAG_SYNC */
+ if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
+ rc = filemap_fdatawrite(inode->i_mapping);
+ if (rc)
+ return rc;
+ }
+
+ lsm = ccc_inode_lsm_get(inode);
+ if (lsm == NULL)
+ return -ENOENT;
+
+ /* If the stripe_count > 1 and the application does not understand
+ * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
+ */
+ if (lsm->lsm_stripe_count > 1 &&
+ !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
+ GOTO(out, rc = -EOPNOTSUPP);
+
+ fm_key.oa.o_oi = lsm->lsm_oi;
+ fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
+
+ obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
+ obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
+ /* If filesize is 0, then there would be no objects for mapping */
+ if (fm_key.oa.o_size == 0) {
+ fiemap->fm_mapped_extents = 0;
+ GOTO(out, rc = 0);
+ }
+
+ memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
+
+ rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
+ fiemap, lsm);
+ if (rc)
+ CERROR("obd_get_info failed: rc = %d\n", rc);
+
+out:
+ ccc_inode_lsm_put(inode, lsm);
+ RETURN(rc);
+}
+
+int ll_fid2path(struct inode *inode, void *arg)
+{
+ struct obd_export *exp = ll_i2mdexp(inode);
+ struct getinfo_fid2path *gfout, *gfin;
+ int outsize, rc;
+ ENTRY;
+
+ if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
+ !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
+ RETURN(-EPERM);
+
+ /* Need to get the buflen */
+ OBD_ALLOC_PTR(gfin);
+ if (gfin == NULL)
+ RETURN(-ENOMEM);
+ if (copy_from_user(gfin, arg, sizeof(*gfin))) {
+ OBD_FREE_PTR(gfin);
+ RETURN(-EFAULT);
+ }
+
+ outsize = sizeof(*gfout) + gfin->gf_pathlen;
+ OBD_ALLOC(gfout, outsize);
+ if (gfout == NULL) {
+ OBD_FREE_PTR(gfin);
+ RETURN(-ENOMEM);
+ }
+ memcpy(gfout, gfin, sizeof(*gfout));
+ OBD_FREE_PTR(gfin);
+
+ /* Call mdc_iocontrol */
+ rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
+ if (rc)
+ GOTO(gf_free, rc);
+
+ if (copy_to_user(arg, gfout, outsize))
+ rc = -EFAULT;
+
+gf_free:
+ OBD_FREE(gfout, outsize);
+ RETURN(rc);
+}
+
+static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
+{
+ struct ll_user_fiemap *fiemap_s;
+ size_t num_bytes, ret_bytes;
+ unsigned int extent_count;
+ int rc = 0;
+
+ /* Get the extent count so we can calculate the size of
+ * required fiemap buffer */
+ if (get_user(extent_count,
+ &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
+ RETURN(-EFAULT);
+ num_bytes = sizeof(*fiemap_s) + (extent_count *
+ sizeof(struct ll_fiemap_extent));
+
+ OBD_ALLOC_LARGE(fiemap_s, num_bytes);
+ if (fiemap_s == NULL)
+ RETURN(-ENOMEM);
+
+ /* get the fiemap value */
+ if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
+ sizeof(*fiemap_s)))
+ GOTO(error, rc = -EFAULT);
+
+ /* If fm_extent_count is non-zero, read the first extent since
+ * it is used to calculate end_offset and device from previous
+ * fiemap call. */
+ if (extent_count) {
+ if (copy_from_user(&fiemap_s->fm_extents[0],
+ (char __user *)arg + sizeof(*fiemap_s),
+ sizeof(struct ll_fiemap_extent)))
+ GOTO(error, rc = -EFAULT);
+ }
+
+ rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
+ if (rc)
+ GOTO(error, rc);
+
+ ret_bytes = sizeof(struct ll_user_fiemap);
+
+ if (extent_count != 0)
+ ret_bytes += (fiemap_s->fm_mapped_extents *
+ sizeof(struct ll_fiemap_extent));
+
+ if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
+ rc = -EFAULT;
+
+error:
+ OBD_FREE_LARGE(fiemap_s, num_bytes);
+ RETURN(rc);
+}
+
+/*
+ * Read the data_version for inode.
+ *
+ * This value is computed using stripe object version on OST.
+ * Version is computed using server side locking.
+ *
+ * @param extent_lock Take extent lock. Not needed if a process is already
+ * holding the OST object group locks.
+ */
+int ll_data_version(struct inode *inode, __u64 *data_version,
+ int extent_lock)
+{
+ struct lov_stripe_md *lsm = NULL;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct obdo *obdo = NULL;
+ int rc;
+ ENTRY;
+
+ /* If no stripe, we consider version is 0. */
+ lsm = ccc_inode_lsm_get(inode);
+ if (lsm == NULL) {
+ *data_version = 0;
+ CDEBUG(D_INODE, "No object for inode\n");
+ RETURN(0);
+ }
+
+ OBD_ALLOC_PTR(obdo);
+ if (obdo == NULL) {
+ ccc_inode_lsm_put(inode, lsm);
+ RETURN(-ENOMEM);
+ }
+
+ rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock);
+ if (!rc) {
+ if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
+ rc = -EOPNOTSUPP;
+ else
+ *data_version = obdo->o_data_version;
+ }
+
+ OBD_FREE_PTR(obdo);
+ ccc_inode_lsm_put(inode, lsm);
+
+ RETURN(rc);
+}
+
+struct ll_swap_stack {
+ struct iattr ia1, ia2;
+ __u64 dv1, dv2;
+ struct inode *inode1, *inode2;
+ bool check_dv1, check_dv2;
+};
+
+static int ll_swap_layouts(struct file *file1, struct file *file2,
+ struct lustre_swap_layouts *lsl)
+{
+ struct mdc_swap_layouts msl;
+ struct md_op_data *op_data;
+ __u32 gid;
+ __u64 dv;
+ struct ll_swap_stack *llss = NULL;
+ int rc;
+
+ OBD_ALLOC_PTR(llss);
+ if (llss == NULL)
+ RETURN(-ENOMEM);
+
+ llss->inode1 = file1->f_dentry->d_inode;
+ llss->inode2 = file2->f_dentry->d_inode;
+
+ if (!S_ISREG(llss->inode2->i_mode))
+ GOTO(free, rc = -EINVAL);
+
+ if (ll_permission(llss->inode1, MAY_WRITE, NULL) ||
+ ll_permission(llss->inode2, MAY_WRITE, NULL))
+ GOTO(free, rc = -EPERM);
+
+ if (llss->inode2->i_sb != llss->inode1->i_sb)
+ GOTO(free, rc = -EXDEV);
+
+ /* we use 2 bool because it is easier to swap than 2 bits */
+ if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
+ llss->check_dv1 = true;
+
+ if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
+ llss->check_dv2 = true;
+
+ /* we cannot use lsl->sl_dvX directly because we may swap them */
+ llss->dv1 = lsl->sl_dv1;
+ llss->dv2 = lsl->sl_dv2;
+
+ rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
+ if (rc == 0) /* same file, done! */
+ GOTO(free, rc = 0);
+
+ if (rc < 0) { /* sequentialize it */
+ swap(llss->inode1, llss->inode2);
+ swap(file1, file2);
+ swap(llss->dv1, llss->dv2);
+ swap(llss->check_dv1, llss->check_dv2);
+ }
+
+ gid = lsl->sl_gid;
+ if (gid != 0) { /* application asks to flush dirty cache */
+ rc = ll_get_grouplock(llss->inode1, file1, gid);
+ if (rc < 0)
+ GOTO(free, rc);
+
+ rc = ll_get_grouplock(llss->inode2, file2, gid);
+ if (rc < 0) {
+ ll_put_grouplock(llss->inode1, file1, gid);
+ GOTO(free, rc);
+ }
+ }
+
+ /* to be able to restore mtime and atime after swap
+ * we need to first save them */
+ if (lsl->sl_flags &
+ (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
+ llss->ia1.ia_mtime = llss->inode1->i_mtime;
+ llss->ia1.ia_atime = llss->inode1->i_atime;
+ llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
+ llss->ia2.ia_mtime = llss->inode2->i_mtime;
+ llss->ia2.ia_atime = llss->inode2->i_atime;
+ llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
+ }
+
+ /* ultimate check, before swaping the layouts we check if
+ * dataversion has changed (if requested) */
+ if (llss->check_dv1) {
+ rc = ll_data_version(llss->inode1, &dv, 0);
+ if (rc)
+ GOTO(putgl, rc);
+ if (dv != llss->dv1)
+ GOTO(putgl, rc = -EAGAIN);
+ }
+
+ if (llss->check_dv2) {
+ rc = ll_data_version(llss->inode2, &dv, 0);
+ if (rc)
+ GOTO(putgl, rc);
+ if (dv != llss->dv2)
+ GOTO(putgl, rc = -EAGAIN);
+ }
+
+ /* struct md_op_data is used to send the swap args to the mdt
+ * only flags is missing, so we use struct mdc_swap_layouts
+ * through the md_op_data->op_data */
+ /* flags from user space have to be converted before they are send to
+ * server, no flag is sent today, they are only used on the client */
+ msl.msl_flags = 0;
+ rc = -ENOMEM;
+ op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
+ 0, LUSTRE_OPC_ANY, &msl);
+ if (op_data != NULL) {
+ rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS,
+ ll_i2mdexp(llss->inode1),
+ sizeof(*op_data), op_data, NULL);
+ ll_finish_md_op_data(op_data);
+ }
+
+putgl:
+ if (gid != 0) {
+ ll_put_grouplock(llss->inode2, file2, gid);
+ ll_put_grouplock(llss->inode1, file1, gid);
+ }
+
+ /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
+ if (rc != 0)
+ GOTO(free, rc);
+
+ /* clear useless flags */
+ if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
+ llss->ia1.ia_valid &= ~ATTR_MTIME;
+ llss->ia2.ia_valid &= ~ATTR_MTIME;
+ }
+
+ if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
+ llss->ia1.ia_valid &= ~ATTR_ATIME;
+ llss->ia2.ia_valid &= ~ATTR_ATIME;
+ }
+
+ /* update time if requested */
+ rc = 0;
+ if (llss->ia2.ia_valid != 0) {
+ mutex_lock(&llss->inode1->i_mutex);
+ rc = ll_setattr(file1->f_dentry, &llss->ia2);
+ mutex_unlock(&llss->inode1->i_mutex);
+ }
+
+ if (llss->ia1.ia_valid != 0) {
+ int rc1;
+
+ mutex_lock(&llss->inode2->i_mutex);
+ rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
+ mutex_unlock(&llss->inode2->i_mutex);
+ if (rc == 0)
+ rc = rc1;
+ }
+
+free:
+ if (llss != NULL)
+ OBD_FREE_PTR(llss);
+
+ RETURN(rc);
+}
+
+long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ int flags, rc;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
+ inode->i_generation, inode, cmd);
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
+
+ /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
+ if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
+ RETURN(-ENOTTY);
+
+ switch(cmd) {
+ case LL_IOC_GETFLAGS:
+ /* Get the current value of the file flags */
+ return put_user(fd->fd_flags, (int *)arg);
+ case LL_IOC_SETFLAGS:
+ case LL_IOC_CLRFLAGS:
+ /* Set or clear specific file flags */
+ /* XXX This probably needs checks to ensure the flags are
+ * not abused, and to handle any flag side effects.
+ */
+ if (get_user(flags, (int *) arg))
+ RETURN(-EFAULT);
+
+ if (cmd == LL_IOC_SETFLAGS) {
+ if ((flags & LL_FILE_IGNORE_LOCK) &&
+ !(file->f_flags & O_DIRECT)) {
+ CERROR("%s: unable to disable locking on "
+ "non-O_DIRECT file\n", current->comm);
+ RETURN(-EINVAL);
+ }
+
+ fd->fd_flags |= flags;
+ } else {
+ fd->fd_flags &= ~flags;
+ }
+ RETURN(0);
+ case LL_IOC_LOV_SETSTRIPE:
+ RETURN(ll_lov_setstripe(inode, file, arg));
+ case LL_IOC_LOV_SETEA:
+ RETURN(ll_lov_setea(inode, file, arg));
+ case LL_IOC_LOV_SWAP_LAYOUTS: {
+ struct file *file2;
+ struct lustre_swap_layouts lsl;
+
+ if (copy_from_user(&lsl, (char *)arg,
+ sizeof(struct lustre_swap_layouts)))
+ RETURN(-EFAULT);
+
+ if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
+ RETURN(-EPERM);
+
+ file2 = fget(lsl.sl_fd);
+ if (file2 == NULL)
+ RETURN(-EBADF);
+
+ rc = -EPERM;
+ if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
+ rc = ll_swap_layouts(file, file2, &lsl);
+ fput(file2);
+ RETURN(rc);
+ }
+ case LL_IOC_LOV_GETSTRIPE:
+ RETURN(ll_lov_getstripe(inode, arg));
+ case LL_IOC_RECREATE_OBJ:
+ RETURN(ll_lov_recreate_obj(inode, arg));
+ case LL_IOC_RECREATE_FID:
+ RETURN(ll_lov_recreate_fid(inode, arg));
+ case FSFILT_IOC_FIEMAP:
+ RETURN(ll_ioctl_fiemap(inode, arg));
+ case FSFILT_IOC_GETFLAGS:
+ case FSFILT_IOC_SETFLAGS:
+ RETURN(ll_iocontrol(inode, file, cmd, arg));
+ case FSFILT_IOC_GETVERSION_OLD:
+ case FSFILT_IOC_GETVERSION:
+ RETURN(put_user(inode->i_generation, (int *)arg));
+ case LL_IOC_GROUP_LOCK:
+ RETURN(ll_get_grouplock(inode, file, arg));
+ case LL_IOC_GROUP_UNLOCK:
+ RETURN(ll_put_grouplock(inode, file, arg));
+ case IOC_OBD_STATFS:
+ RETURN(ll_obd_statfs(inode, (void *)arg));
+
+ /* We need to special case any other ioctls we want to handle,
+ * to send them to the MDS/OST as appropriate and to properly
+ * network encode the arg field.
+ case FSFILT_IOC_SETVERSION_OLD:
+ case FSFILT_IOC_SETVERSION:
+ */
+ case LL_IOC_FLUSHCTX:
+ RETURN(ll_flush_ctx(inode));
+ case LL_IOC_PATH2FID: {
+ if (copy_to_user((void *)arg, ll_inode2fid(inode),
+ sizeof(struct lu_fid)))
+ RETURN(-EFAULT);
+
+ RETURN(0);
+ }
+ case OBD_IOC_FID2PATH:
+ RETURN(ll_fid2path(inode, (void *)arg));
+ case LL_IOC_DATA_VERSION: {
+ struct ioc_data_version idv;
+ int rc;
+
+ if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
+ RETURN(-EFAULT);
+
+ rc = ll_data_version(inode, &idv.idv_version,
+ !(idv.idv_flags & LL_DV_NOFLUSH));
+
+ if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
+ RETURN(-EFAULT);
+
+ RETURN(rc);
+ }
+
+ case LL_IOC_GET_MDTIDX: {
+ int mdtidx;
+
+ mdtidx = ll_get_mdt_idx(inode);
+ if (mdtidx < 0)
+ RETURN(mdtidx);
+
+ if (put_user((int)mdtidx, (int*)arg))
+ RETURN(-EFAULT);
+
+ RETURN(0);
+ }
+ case OBD_IOC_GETDTNAME:
+ case OBD_IOC_GETMDNAME:
+ RETURN(ll_get_obd_name(inode, cmd, arg));
+ case LL_IOC_HSM_STATE_GET: {
+ struct md_op_data *op_data;
+ struct hsm_user_state *hus;
+ int rc;
+
+ OBD_ALLOC_PTR(hus);
+ if (hus == NULL)
+ RETURN(-ENOMEM);
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, hus);
+ if (op_data == NULL) {
+ OBD_FREE_PTR(hus);
+ RETURN(-ENOMEM);
+ }
+
+ rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
+ op_data, NULL);
+
+ if (copy_to_user((void *)arg, hus, sizeof(*hus)))
+ rc = -EFAULT;
+
+ ll_finish_md_op_data(op_data);
+ OBD_FREE_PTR(hus);
+ RETURN(rc);
+ }
+ case LL_IOC_HSM_STATE_SET: {
+ struct md_op_data *op_data;
+ struct hsm_state_set *hss;
+ int rc;
+
+ OBD_ALLOC_PTR(hss);
+ if (hss == NULL)
+ RETURN(-ENOMEM);
+ if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
+ OBD_FREE_PTR(hss);
+ RETURN(-EFAULT);
+ }
+
+ /* Non-root users are forbidden to set or clear flags which are
+ * NOT defined in HSM_USER_MASK. */
+ if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK)
+ && !cfs_capable(CFS_CAP_SYS_ADMIN)) {
+ OBD_FREE_PTR(hss);
+ RETURN(-EPERM);
+ }
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, hss);
+ if (op_data == NULL) {
+ OBD_FREE_PTR(hss);
+ RETURN(-ENOMEM);
+ }
+
+ rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
+ op_data, NULL);
+
+ ll_finish_md_op_data(op_data);
+
+ OBD_FREE_PTR(hss);
+ RETURN(rc);
+ }
+ case LL_IOC_HSM_ACTION: {
+ struct md_op_data *op_data;
+ struct hsm_current_action *hca;
+ int rc;
+
+ OBD_ALLOC_PTR(hca);
+ if (hca == NULL)
+ RETURN(-ENOMEM);
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, hca);
+ if (op_data == NULL) {
+ OBD_FREE_PTR(hca);
+ RETURN(-ENOMEM);
+ }
+
+ rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
+ op_data, NULL);
+
+ if (copy_to_user((char *)arg, hca, sizeof(*hca)))
+ rc = -EFAULT;
+
+ ll_finish_md_op_data(op_data);
+ OBD_FREE_PTR(hca);
+ RETURN(rc);
+ }
+ default: {
+ int err;
+
+ if (LLIOC_STOP ==
+ ll_iocontrol_call(inode, file, cmd, arg, &err))
+ RETURN(err);
+
+ RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
+ (void *)arg));
+ }
+ }
+}
+
+
+loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ loff_t retval, eof = 0;
+
+ ENTRY;
+ retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
+ (origin == SEEK_CUR) ? file->f_pos : 0);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
+ inode->i_ino, inode->i_generation, inode, retval, retval,
+ origin);
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
+
+ if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
+ retval = ll_glimpse_size(inode);
+ if (retval != 0)
+ RETURN(retval);
+ eof = i_size_read(inode);
+ }
+
+ retval = ll_generic_file_llseek_size(file, offset, origin,
+ ll_file_maxbytes(inode), eof);
+ RETURN(retval);
+}
+
+int ll_flush(struct file *file, fl_owner_t id)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ int rc, err;
+
+ LASSERT(!S_ISDIR(inode->i_mode));
+
+ /* catch async errors that were recorded back when async writeback
+ * failed for pages in this mapping. */
+ rc = lli->lli_async_rc;
+ lli->lli_async_rc = 0;
+ err = lov_read_and_clear_async_rc(lli->lli_clob);
+ if (rc == 0)
+ rc = err;
+
+ /* The application has been told write failure already.
+ * Do not report failure again. */
+ if (fd->fd_write_failed)
+ return 0;
+ return rc ? -EIO : 0;
+}
+
+/**
+ * Called to make sure a portion of file has been written out.
+ * if @local_only is not true, it will send OST_SYNC RPCs to ost.
+ *
+ * Return how many pages have been written.
+ */
+int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
+ enum cl_fsync_mode mode, int ignore_layout)
+{
+ struct cl_env_nest nest;
+ struct lu_env *env;
+ struct cl_io *io;
+ struct obd_capa *capa = NULL;
+ struct cl_fsync_io *fio;
+ int result;
+ ENTRY;
+
+ if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
+ mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
+ RETURN(-EINVAL);
+
+ env = cl_env_nested_get(&nest);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
+
+ io = ccc_env_thread_io(env);
+ io->ci_obj = cl_i2info(inode)->lli_clob;
+ io->ci_ignore_layout = ignore_layout;
+
+ /* initialize parameters for sync */
+ fio = &io->u.ci_fsync;
+ fio->fi_capa = capa;
+ fio->fi_start = start;
+ fio->fi_end = end;
+ fio->fi_fid = ll_inode2fid(inode);
+ fio->fi_mode = mode;
+ fio->fi_nr_written = 0;
+
+ if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
+ result = cl_io_loop(env, io);
+ else
+ result = io->ci_result;
+ if (result == 0)
+ result = fio->fi_nr_written;
+ cl_io_fini(env, io);
+ cl_env_nested_put(&nest, env);
+
+ capa_put(capa);
+
+ RETURN(result);
+}
+
+/*
+ * When dentry is provided (the 'else' case), *file->f_dentry may be
+ * null and dentry must be used directly rather than pulled from
+ * *file->f_dentry as is done otherwise.
+ */
+
+int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct dentry *dentry = file->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ptlrpc_request *req;
+ struct obd_capa *oc;
+ int rc, err;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
+ inode->i_generation, inode);
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
+
+ rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ mutex_lock(&inode->i_mutex);
+
+ /* catch async errors that were recorded back when async writeback
+ * failed for pages in this mapping. */
+ if (!S_ISDIR(inode->i_mode)) {
+ err = lli->lli_async_rc;
+ lli->lli_async_rc = 0;
+ if (rc == 0)
+ rc = err;
+ err = lov_read_and_clear_async_rc(lli->lli_clob);
+ if (rc == 0)
+ rc = err;
+ }
+
+ oc = ll_mdscapa_get(inode);
+ err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
+ &req);
+ capa_put(oc);
+ if (!rc)
+ rc = err;
+ if (!err)
+ ptlrpc_req_finished(req);
+
+ if (datasync && S_ISREG(inode->i_mode)) {
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+
+ err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
+ CL_FSYNC_ALL, 0);
+ if (rc == 0 && err < 0)
+ rc = err;
+ if (rc < 0)
+ fd->fd_write_failed = true;
+ else
+ fd->fd_write_failed = false;
+ }
+
+ mutex_unlock(&inode->i_mutex);
+ RETURN(rc);
+}
+
+int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
+ .ei_cb_cp =ldlm_flock_completion_ast,
+ .ei_cbdata = file_lock };
+ struct md_op_data *op_data;
+ struct lustre_handle lockh = {0};
+ ldlm_policy_data_t flock = {{0}};
+ int flags = 0;
+ int rc;
+ int rc2 = 0;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
+ inode->i_ino, file_lock);
+
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
+
+ if (file_lock->fl_flags & FL_FLOCK) {
+ LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
+ /* flocks are whole-file locks */
+ flock.l_flock.end = OFFSET_MAX;
+ /* For flocks owner is determined by the local file desctiptor*/
+ flock.l_flock.owner = (unsigned long)file_lock->fl_file;
+ } else if (file_lock->fl_flags & FL_POSIX) {
+ flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
+ flock.l_flock.start = file_lock->fl_start;
+ flock.l_flock.end = file_lock->fl_end;
+ } else {
+ RETURN(-EINVAL);
+ }
+ flock.l_flock.pid = file_lock->fl_pid;
+
+ /* Somewhat ugly workaround for svc lockd.
+ * lockd installs custom fl_lmops->lm_compare_owner that checks
+ * for the fl_owner to be the same (which it always is on local node
+ * I guess between lockd processes) and then compares pid.
+ * As such we assign pid to the owner field to make it all work,
+ * conflict with normal locks is unlikely since pid space and
+ * pointer space for current->files are not intersecting */
+ if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
+ flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
+
+ switch (file_lock->fl_type) {
+ case F_RDLCK:
+ einfo.ei_mode = LCK_PR;
+ break;
+ case F_UNLCK:
+ /* An unlock request may or may not have any relation to
+ * existing locks so we may not be able to pass a lock handle
+ * via a normal ldlm_lock_cancel() request. The request may even
+ * unlock a byte range in the middle of an existing lock. In
+ * order to process an unlock request we need all of the same
+ * information that is given with a normal read or write record
+ * lock request. To avoid creating another ldlm unlock (cancel)
+ * message we'll treat a LCK_NL flock request as an unlock. */
+ einfo.ei_mode = LCK_NL;
+ break;
+ case F_WRLCK:
+ einfo.ei_mode = LCK_PW;
+ break;
+ default:
+ CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
+ file_lock->fl_type);
+ RETURN (-ENOTSUPP);
+ }
+
+ switch (cmd) {
+ case F_SETLKW:
+#ifdef F_SETLKW64
+ case F_SETLKW64:
+#endif
+ flags = 0;
+ break;
+ case F_SETLK:
+#ifdef F_SETLK64
+ case F_SETLK64:
+#endif
+ flags = LDLM_FL_BLOCK_NOWAIT;
+ break;
+ case F_GETLK:
+#ifdef F_GETLK64
+ case F_GETLK64:
+#endif
+ flags = LDLM_FL_TEST_LOCK;
+ /* Save the old mode so that if the mode in the lock changes we
+ * can decrement the appropriate reader or writer refcount. */
+ file_lock->fl_type = einfo.ei_mode;
+ break;
+ default:
+ CERROR("unknown fcntl lock command: %d\n", cmd);
+ RETURN (-EINVAL);
+ }
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
+ "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
+ flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
+
+ rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
+ op_data, &lockh, &flock, 0, NULL /* req */, flags);
+
+ if ((file_lock->fl_flags & FL_FLOCK) &&
+ (rc == 0 || file_lock->fl_type == F_UNLCK))
+ rc2 = flock_lock_file_wait(file, file_lock);
+ if ((file_lock->fl_flags & FL_POSIX) &&
+ (rc == 0 || file_lock->fl_type == F_UNLCK) &&
+ !(flags & LDLM_FL_TEST_LOCK))
+ rc2 = posix_lock_file_wait(file, file_lock);
+
+ if (rc2 && file_lock->fl_type != F_UNLCK) {
+ einfo.ei_mode = LCK_NL;
+ md_enqueue(sbi->ll_md_exp, &einfo, NULL,
+ op_data, &lockh, &flock, 0, NULL /* req */, flags);
+ rc = rc2;
+ }
+
+ ll_finish_md_op_data(op_data);
+
+ RETURN(rc);
+}
+
+int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
+{
+ ENTRY;
+
+ RETURN(-ENOSYS);
+}
+
+/**
+ * test if some locks matching bits and l_req_mode are acquired
+ * - bits can be in different locks
+ * - if found clear the common lock bits in *bits
+ * - the bits not found, are kept in *bits
+ * \param inode [IN]
+ * \param bits [IN] searched lock bits [IN]
+ * \param l_req_mode [IN] searched lock mode
+ * \retval boolean, true iff all bits are found
+ */
+int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
+{
+ struct lustre_handle lockh;
+ ldlm_policy_data_t policy;
+ ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
+ (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
+ struct lu_fid *fid;
+ __u64 flags;
+ int i;
+ ENTRY;
+
+ if (!inode)
+ RETURN(0);
+
+ fid = &ll_i2info(inode)->lli_fid;
+ CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
+ ldlm_lockname[mode]);
+
+ flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
+ for (i = 0; i < MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
+ policy.l_inodebits.bits = *bits & (1 << i);
+ if (policy.l_inodebits.bits == 0)
+ continue;
+
+ if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
+ &policy, mode, &lockh)) {
+ struct ldlm_lock *lock;
+
+ lock = ldlm_handle2lock(&lockh);
+ if (lock) {
+ *bits &=
+ ~(lock->l_policy_data.l_inodebits.bits);
+ LDLM_LOCK_PUT(lock);
+ } else {
+ *bits &= ~policy.l_inodebits.bits;
+ }
+ }
+ }
+ RETURN(*bits == 0);
+}
+
+ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
+ struct lustre_handle *lockh, __u64 flags)
+{
+ ldlm_policy_data_t policy = { .l_inodebits = {bits}};
+ struct lu_fid *fid;
+ ldlm_mode_t rc;
+ ENTRY;
+
+ fid = &ll_i2info(inode)->lli_fid;
+ CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
+
+ rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
+ fid, LDLM_IBITS, &policy,
+ LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
+ RETURN(rc);
+}
+
+static int ll_inode_revalidate_fini(struct inode *inode, int rc)
+{
+ /* Already unlinked. Just update nlink and return success */
+ if (rc == -ENOENT) {
+ clear_nlink(inode);
+ /* This path cannot be hit for regular files unless in
+ * case of obscure races, so no need to to validate
+ * size. */
+ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+ return 0;
+ } else if (rc != 0) {
+ CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), rc);
+ }
+
+ return rc;
+}
+
+int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
+ __u64 ibits)
+{
+ struct inode *inode = dentry->d_inode;
+ struct ptlrpc_request *req = NULL;
+ struct obd_export *exp;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(inode != NULL);
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
+ inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
+
+ exp = ll_i2mdexp(inode);
+
+ /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
+ * But under CMD case, it caused some lock issues, should be fixed
+ * with new CMD ibits lock. See bug 12718 */
+ if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
+ struct lookup_intent oit = { .it_op = IT_GETATTR };
+ struct md_op_data *op_data;
+
+ if (ibits == MDS_INODELOCK_LOOKUP)
+ oit.it_op = IT_LOOKUP;
+
+ /* Call getattr by fid, so do not provide name at all. */
+ op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
+ dentry->d_inode, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ oit.it_create_mode |= M_CHECK_STALE;
+ rc = md_intent_lock(exp, op_data, NULL, 0,
+ /* we are not interested in name
+ based lookup */
+ &oit, 0, &req,
+ ll_md_blocking_ast, 0);
+ ll_finish_md_op_data(op_data);
+ oit.it_create_mode &= ~M_CHECK_STALE;
+ if (rc < 0) {
+ rc = ll_inode_revalidate_fini(inode, rc);
+ GOTO (out, rc);
+ }
+
+ rc = ll_revalidate_it_finish(req, &oit, dentry);
+ if (rc != 0) {
+ ll_intent_release(&oit);
+ GOTO(out, rc);
+ }
+
+ /* Unlinked? Unhash dentry, so it is not picked up later by
+ do_lookup() -> ll_revalidate_it(). We cannot use d_drop
+ here to preserve get_cwd functionality on 2.6.
+ Bug 10503 */
+ if (!dentry->d_inode->i_nlink)
+ d_lustre_invalidate(dentry, 0);
+
+ ll_lookup_finish_locks(&oit, dentry);
+ } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
+ struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
+ obd_valid valid = OBD_MD_FLGETATTR;
+ struct md_op_data *op_data;
+ int ealen = 0;
+
+ if (S_ISREG(inode->i_mode)) {
+ rc = ll_get_max_mdsize(sbi, &ealen);
+ if (rc)
+ RETURN(rc);
+ valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
+ }
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
+ 0, ealen, LUSTRE_OPC_ANY,
+ NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ op_data->op_valid = valid;
+ /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
+ * capa for this inode. Because we only keep capas of dirs
+ * fresh. */
+ rc = md_getattr(sbi->ll_md_exp, op_data, &req);
+ ll_finish_md_op_data(op_data);
+ if (rc) {
+ rc = ll_inode_revalidate_fini(inode, rc);
+ RETURN(rc);
+ }
+
+ rc = ll_prep_inode(&inode, req, NULL, NULL);
+ }
+out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
+ __u64 ibits)
+{
+ struct inode *inode = dentry->d_inode;
+ int rc;
+ ENTRY;
+
+ rc = __ll_inode_revalidate_it(dentry, it, ibits);
+ if (rc != 0)
+ RETURN(rc);
+
+ /* if object isn't regular file, don't validate size */
+ if (!S_ISREG(inode->i_mode)) {
+ LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
+ LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
+ LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
+ } else {
+ rc = ll_glimpse_size(inode);
+ }
+ RETURN(rc);
+}
+
+int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
+ struct lookup_intent *it, struct kstat *stat)
+{
+ struct inode *inode = de->d_inode;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int res = 0;
+
+ res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
+ MDS_INODELOCK_LOOKUP);
+ ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
+
+ if (res)
+ return res;
+
+ stat->dev = inode->i_sb->s_dev;
+ if (ll_need_32bit_api(sbi))
+ stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
+ else
+ stat->ino = inode->i_ino;
+ stat->mode = inode->i_mode;
+ stat->nlink = inode->i_nlink;
+ stat->uid = inode->i_uid;
+ stat->gid = inode->i_gid;
+ stat->rdev = inode->i_rdev;
+ stat->atime = inode->i_atime;
+ stat->mtime = inode->i_mtime;
+ stat->ctime = inode->i_ctime;
+ stat->blksize = 1 << inode->i_blkbits;
+
+ stat->size = i_size_read(inode);
+ stat->blocks = inode->i_blocks;
+
+ return 0;
+}
+int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
+{
+ struct lookup_intent it = { .it_op = IT_GETATTR };
+
+ return ll_getattr_it(mnt, de, &it, stat);
+}
+
+
+struct posix_acl * ll_get_acl(struct inode *inode, int type)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct posix_acl *acl = NULL;
+ ENTRY;
+
+ spin_lock(&lli->lli_lock);
+ /* VFS' acl_permission_check->check_acl will release the refcount */
+ acl = posix_acl_dup(lli->lli_posix_acl);
+ spin_unlock(&lli->lli_lock);
+
+ RETURN(acl);
+}
+
+
+int ll_inode_permission(struct inode *inode, int mask)
+{
+ int rc = 0;
+ ENTRY;
+
+#ifdef MAY_NOT_BLOCK
+ if (mask & MAY_NOT_BLOCK)
+ return -ECHILD;
+#endif
+
+ /* as root inode are NOT getting validated in lookup operation,
+ * need to do it before permission check. */
+
+ if (inode == inode->i_sb->s_root->d_inode) {
+ struct lookup_intent it = { .it_op = IT_LOOKUP };
+
+ rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
+ MDS_INODELOCK_LOOKUP);
+ if (rc)
+ RETURN(rc);
+ }
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
+ inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
+
+ if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
+ return lustre_check_remote_perm(inode, mask);
+
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
+ rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
+
+ RETURN(rc);
+}
+
+#define READ_METHOD aio_read
+#define READ_FUNCTION ll_file_aio_read
+#define WRITE_METHOD aio_write
+#define WRITE_FUNCTION ll_file_aio_write
+
+/* -o localflock - only provides locally consistent flock locks */
+struct file_operations ll_file_operations = {
+ .read = ll_file_read,
+ .READ_METHOD = READ_FUNCTION,
+ .write = ll_file_write,
+ .WRITE_METHOD = WRITE_FUNCTION,
+ .unlocked_ioctl = ll_file_ioctl,
+ .open = ll_file_open,
+ .release = ll_file_release,
+ .mmap = ll_file_mmap,
+ .llseek = ll_file_seek,
+ .splice_read = ll_file_splice_read,
+ .fsync = ll_fsync,
+ .flush = ll_flush
+};
+
+struct file_operations ll_file_operations_flock = {
+ .read = ll_file_read,
+ .READ_METHOD = READ_FUNCTION,
+ .write = ll_file_write,
+ .WRITE_METHOD = WRITE_FUNCTION,
+ .unlocked_ioctl = ll_file_ioctl,
+ .open = ll_file_open,
+ .release = ll_file_release,
+ .mmap = ll_file_mmap,
+ .llseek = ll_file_seek,
+ .splice_read = ll_file_splice_read,
+ .fsync = ll_fsync,
+ .flush = ll_flush,
+ .flock = ll_file_flock,
+ .lock = ll_file_flock
+};
+
+/* These are for -o noflock - to return ENOSYS on flock calls */
+struct file_operations ll_file_operations_noflock = {
+ .read = ll_file_read,
+ .READ_METHOD = READ_FUNCTION,
+ .write = ll_file_write,
+ .WRITE_METHOD = WRITE_FUNCTION,
+ .unlocked_ioctl = ll_file_ioctl,
+ .open = ll_file_open,
+ .release = ll_file_release,
+ .mmap = ll_file_mmap,
+ .llseek = ll_file_seek,
+ .splice_read = ll_file_splice_read,
+ .fsync = ll_fsync,
+ .flush = ll_flush,
+ .flock = ll_file_noflock,
+ .lock = ll_file_noflock
+};
+
+struct inode_operations ll_file_inode_operations = {
+ .setattr = ll_setattr,
+ .getattr = ll_getattr,
+ .permission = ll_inode_permission,
+ .setxattr = ll_setxattr,
+ .getxattr = ll_getxattr,
+ .listxattr = ll_listxattr,
+ .removexattr = ll_removexattr,
+ .get_acl = ll_get_acl,
+};
+
+/* dynamic ioctl number support routins */
+static struct llioc_ctl_data {
+ struct rw_semaphore ioc_sem;
+ struct list_head ioc_head;
+} llioc = {
+ __RWSEM_INITIALIZER(llioc.ioc_sem),
+ LIST_HEAD_INIT(llioc.ioc_head)
+};
+
+
+struct llioc_data {
+ struct list_head iocd_list;
+ unsigned int iocd_size;
+ llioc_callback_t iocd_cb;
+ unsigned int iocd_count;
+ unsigned int iocd_cmd[0];
+};
+
+void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
+{
+ unsigned int size;
+ struct llioc_data *in_data = NULL;
+ ENTRY;
+
+ if (cb == NULL || cmd == NULL ||
+ count > LLIOC_MAX_CMD || count < 0)
+ RETURN(NULL);
+
+ size = sizeof(*in_data) + count * sizeof(unsigned int);
+ OBD_ALLOC(in_data, size);
+ if (in_data == NULL)
+ RETURN(NULL);
+
+ memset(in_data, 0, sizeof(*in_data));
+ in_data->iocd_size = size;
+ in_data->iocd_cb = cb;
+ in_data->iocd_count = count;
+ memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
+
+ down_write(&llioc.ioc_sem);
+ list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
+ up_write(&llioc.ioc_sem);
+
+ RETURN(in_data);
+}
+
+void ll_iocontrol_unregister(void *magic)
+{
+ struct llioc_data *tmp;
+
+ if (magic == NULL)
+ return;
+
+ down_write(&llioc.ioc_sem);
+ list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
+ if (tmp == magic) {
+ unsigned int size = tmp->iocd_size;
+
+ list_del(&tmp->iocd_list);
+ up_write(&llioc.ioc_sem);
+
+ OBD_FREE(tmp, size);
+ return;
+ }
+ }
+ up_write(&llioc.ioc_sem);
+
+ CWARN("didn't find iocontrol register block with magic: %p\n", magic);
+}
+
+EXPORT_SYMBOL(ll_iocontrol_register);
+EXPORT_SYMBOL(ll_iocontrol_unregister);
+
+enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg, int *rcp)
+{
+ enum llioc_iter ret = LLIOC_CONT;
+ struct llioc_data *data;
+ int rc = -EINVAL, i;
+
+ down_read(&llioc.ioc_sem);
+ list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
+ for (i = 0; i < data->iocd_count; i++) {
+ if (cmd != data->iocd_cmd[i])
+ continue;
+
+ ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
+ break;
+ }
+
+ if (ret == LLIOC_STOP)
+ break;
+ }
+ up_read(&llioc.ioc_sem);
+
+ if (rcp)
+ *rcp = rc;
+ return ret;
+}
+
+int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct cl_env_nest nest;
+ struct lu_env *env;
+ int result;
+ ENTRY;
+
+ if (lli->lli_clob == NULL)
+ RETURN(0);
+
+ env = cl_env_nested_get(&nest);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ result = cl_conf_set(env, lli->lli_clob, conf);
+ cl_env_nested_put(&nest, env);
+
+ if (conf->coc_opc == OBJECT_CONF_SET) {
+ struct ldlm_lock *lock = conf->coc_lock;
+
+ LASSERT(lock != NULL);
+ LASSERT(ldlm_has_layout(lock));
+ if (result == 0) {
+ /* it can only be allowed to match after layout is
+ * applied to inode otherwise false layout would be
+ * seen. Applying layout shoud happen before dropping
+ * the intent lock. */
+ ldlm_lock_allow_match(lock);
+ }
+ }
+ RETURN(result);
+}
+
+/* Fetch layout from MDT with getxattr request, if it's not ready yet */
+static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
+
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct obd_capa *oc;
+ struct ptlrpc_request *req;
+ struct mdt_body *body;
+ void *lvbdata;
+ void *lmm;
+ int lmmsize;
+ int rc;
+ ENTRY;
+
+ if (lock->l_lvb_data != NULL)
+ RETURN(0);
+
+ /* if layout lock was granted right away, the layout is returned
+ * within DLM_LVB of dlm reply; otherwise if the lock was ever
+ * blocked and then granted via completion ast, we have to fetch
+ * layout here. Please note that we can't use the LVB buffer in
+ * completion AST because it doesn't have a large enough buffer */
+ oc = ll_mdscapa_get(inode);
+ rc = ll_get_max_mdsize(sbi, &lmmsize);
+ if (rc == 0)
+ rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
+ OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
+ lmmsize, 0, &req);
+ capa_put(oc);
+ if (rc < 0)
+ RETURN(rc);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ if (body == NULL || body->eadatasize > lmmsize)
+ GOTO(out, rc = -EPROTO);
+
+ lmmsize = body->eadatasize;
+ if (lmmsize == 0) /* empty layout */
+ GOTO(out, rc = 0);
+
+ lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
+ if (lmm == NULL)
+ GOTO(out, rc = -EFAULT);
+
+ OBD_ALLOC_LARGE(lvbdata, lmmsize);
+ if (lvbdata == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ memcpy(lvbdata, lmm, lmmsize);
+ lock_res_and_lock(lock);
+ if (lock->l_lvb_data == NULL) {
+ lock->l_lvb_data = lvbdata;
+ lock->l_lvb_len = lmmsize;
+ lvbdata = NULL;
+ }
+ unlock_res_and_lock(lock);
+
+ if (lvbdata != NULL)
+ OBD_FREE_LARGE(lvbdata, lmmsize);
+ EXIT;
+
+out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+/**
+ * Apply the layout to the inode. Layout lock is held and will be released
+ * in this function.
+ */
+static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
+ struct inode *inode, __u32 *gen, bool reconf)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ldlm_lock *lock;
+ struct lustre_md md = { NULL };
+ struct cl_object_conf conf;
+ int rc = 0;
+ bool lvb_ready;
+ bool wait_layout = false;
+ ENTRY;
+
+ LASSERT(lustre_handle_is_used(lockh));
+
+ lock = ldlm_handle2lock(lockh);
+ LASSERT(lock != NULL);
+ LASSERT(ldlm_has_layout(lock));
+
+ LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n",
+ inode, PFID(&lli->lli_fid), reconf);
+
+ /* in case this is a caching lock and reinstate with new inode */
+ md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
+
+ lock_res_and_lock(lock);
+ lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
+ unlock_res_and_lock(lock);
+ /* checking lvb_ready is racy but this is okay. The worst case is
+ * that multi processes may configure the file on the same time. */
+ if (lvb_ready || !reconf) {
+ rc = -ENODATA;
+ if (lvb_ready) {
+ /* layout_gen must be valid if layout lock is not
+ * cancelled and stripe has already set */
+ *gen = lli->lli_layout_gen;
+ rc = 0;
+ }
+ GOTO(out, rc);
+ }
+
+ rc = ll_layout_fetch(inode, lock);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ /* for layout lock, lmm is returned in lock's lvb.
+ * lvb_data is immutable if the lock is held so it's safe to access it
+ * without res lock. See the description in ldlm_lock_decref_internal()
+ * for the condition to free lvb_data of layout lock */
+ if (lock->l_lvb_data != NULL) {
+ rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
+ lock->l_lvb_data, lock->l_lvb_len);
+ if (rc >= 0) {
+ *gen = LL_LAYOUT_GEN_EMPTY;
+ if (md.lsm != NULL)
+ *gen = md.lsm->lsm_layout_gen;
+ rc = 0;
+ } else {
+ CERROR("%s: file "DFID" unpackmd error: %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(&lli->lli_fid), rc);
+ }
+ }
+ if (rc < 0)
+ GOTO(out, rc);
+
+ /* set layout to file. Unlikely this will fail as old layout was
+ * surely eliminated */
+ memset(&conf, 0, sizeof conf);
+ conf.coc_opc = OBJECT_CONF_SET;
+ conf.coc_inode = inode;
+ conf.coc_lock = lock;
+ conf.u.coc_md = &md;
+ rc = ll_layout_conf(inode, &conf);
+
+ if (md.lsm != NULL)
+ obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
+
+ /* refresh layout failed, need to wait */
+ wait_layout = rc == -EBUSY;
+ EXIT;
+
+out:
+ LDLM_LOCK_PUT(lock);
+ ldlm_lock_decref(lockh, mode);
+
+ /* wait for IO to complete if it's still being used. */
+ if (wait_layout) {
+ CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ inode, PFID(&lli->lli_fid));
+
+ memset(&conf, 0, sizeof conf);
+ conf.coc_opc = OBJECT_CONF_WAIT;
+ conf.coc_inode = inode;
+ rc = ll_layout_conf(inode, &conf);
+ if (rc == 0)
+ rc = -EAGAIN;
+
+ CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n",
+ PFID(&lli->lli_fid), rc);
+ }
+ RETURN(rc);
+}
+
+/**
+ * This function checks if there exists a LAYOUT lock on the client side,
+ * or enqueues it if it doesn't have one in cache.
+ *
+ * This function will not hold layout lock so it may be revoked any time after
+ * this function returns. Any operations depend on layout should be redone
+ * in that case.
+ *
+ * This function should be called before lov_io_init() to get an uptodate
+ * layout version, the caller should save the version number and after IO
+ * is finished, this function should be called again to verify that layout
+ * is not changed during IO time.
+ */
+int ll_layout_refresh(struct inode *inode, __u32 *gen)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct md_op_data *op_data;
+ struct lookup_intent it;
+ struct lustre_handle lockh;
+ ldlm_mode_t mode;
+ struct ldlm_enqueue_info einfo = { .ei_type = LDLM_IBITS,
+ .ei_mode = LCK_CR,
+ .ei_cb_bl = ll_md_blocking_ast,
+ .ei_cb_cp = ldlm_completion_ast,
+ .ei_cbdata = NULL };
+ int rc;
+ ENTRY;
+
+ *gen = lli->lli_layout_gen;
+ if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
+ RETURN(0);
+
+ /* sanity checks */
+ LASSERT(fid_is_sane(ll_inode2fid(inode)));
+ LASSERT(S_ISREG(inode->i_mode));
+
+ /* mostly layout lock is caching on the local side, so try to match
+ * it before grabbing layout lock mutex. */
+ mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0);
+ if (mode != 0) { /* hit cached lock */
+ rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
+ if (rc == 0)
+ RETURN(0);
+
+ /* better hold lli_layout_mutex to try again otherwise
+ * it will have starvation problem. */
+ }
+
+ /* take layout lock mutex to enqueue layout lock exclusively. */
+ mutex_lock(&lli->lli_layout_mutex);
+
+again:
+ /* try again. Maybe somebody else has done this. */
+ mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0);
+ if (mode != 0) { /* hit cached lock */
+ rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
+ if (rc == -EAGAIN)
+ goto again;
+
+ mutex_unlock(&lli->lli_layout_mutex);
+ RETURN(rc);
+ }
+
+ op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
+ 0, 0, LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data)) {
+ mutex_unlock(&lli->lli_layout_mutex);
+ RETURN(PTR_ERR(op_data));
+ }
+
+ /* have to enqueue one */
+ memset(&it, 0, sizeof(it));
+ it.it_op = IT_LAYOUT;
+ lockh.cookie = 0ULL;
+
+ LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n",
+ ll_get_fsname(inode->i_sb, NULL, 0), inode,
+ PFID(&lli->lli_fid));
+
+ rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
+ NULL, 0, NULL, 0);
+ if (it.d.lustre.it_data != NULL)
+ ptlrpc_req_finished(it.d.lustre.it_data);
+ it.d.lustre.it_data = NULL;
+
+ ll_finish_md_op_data(op_data);
+
+ mode = it.d.lustre.it_lock_mode;
+ it.d.lustre.it_lock_mode = 0;
+ ll_intent_drop_lock(&it);
+
+ if (rc == 0) {
+ /* set lock data in case this is a new lock */
+ ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
+ rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
+ if (rc == -EAGAIN)
+ goto again;
+ }
+ mutex_unlock(&lli->lli_layout_mutex);
+
+ RETURN(rc);
+}
diff --git a/drivers/staging/lustre/lustre/llite/llite_capa.c b/drivers/staging/lustre/lustre/llite/llite_capa.c
new file mode 100644
index 000000000000..b6fd9593325a
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/llite_capa.c
@@ -0,0 +1,661 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/llite/llite_capa.c
+ *
+ * Author: Lai Siyao <lsy@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <asm/uaccess.h>
+#include <linux/file.h>
+#include <linux/kmod.h>
+
+#include <lustre_lite.h>
+#include "llite_internal.h"
+
+/* for obd_capa.c_list, client capa might stay in three places:
+ * 1. ll_capa_list.
+ * 2. ll_idle_capas.
+ * 3. stand alone: just allocated.
+ */
+
+/* capas for oss writeback and those failed to renew */
+static LIST_HEAD(ll_idle_capas);
+static struct ptlrpc_thread ll_capa_thread;
+static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
+
+/* llite capa renewal timer */
+struct timer_list ll_capa_timer;
+/* for debug: indicate whether capa on llite is enabled or not */
+static atomic_t ll_capa_debug = ATOMIC_INIT(0);
+static unsigned long long ll_capa_renewed = 0;
+static unsigned long long ll_capa_renewal_noent = 0;
+static unsigned long long ll_capa_renewal_failed = 0;
+static unsigned long long ll_capa_renewal_retries = 0;
+
+static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry)
+{
+ if (cfs_time_before(expiry, ll_capa_timer.expires) ||
+ !timer_pending(&ll_capa_timer)) {
+ mod_timer(&ll_capa_timer, expiry);
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+ "ll_capa_timer update: %lu/%lu by", expiry, jiffies);
+ }
+}
+
+static inline cfs_time_t capa_renewal_time(struct obd_capa *ocapa)
+{
+ return cfs_time_sub(ocapa->c_expiry,
+ cfs_time_seconds(ocapa->c_capa.lc_timeout) / 2);
+}
+
+static inline int capa_is_to_expire(struct obd_capa *ocapa)
+{
+ return cfs_time_beforeq(capa_renewal_time(ocapa), cfs_time_current());
+}
+
+static inline int have_expired_capa(void)
+{
+ struct obd_capa *ocapa = NULL;
+ int expired = 0;
+
+ /* if ll_capa_list has client capa to expire or ll_idle_capas has
+ * expired capa, return 1.
+ */
+ spin_lock(&capa_lock);
+ if (!list_empty(ll_capa_list)) {
+ ocapa = list_entry(ll_capa_list->next, struct obd_capa,
+ c_list);
+ expired = capa_is_to_expire(ocapa);
+ if (!expired)
+ update_capa_timer(ocapa, capa_renewal_time(ocapa));
+ } else if (!list_empty(&ll_idle_capas)) {
+ ocapa = list_entry(ll_idle_capas.next, struct obd_capa,
+ c_list);
+ expired = capa_is_expired(ocapa);
+ if (!expired)
+ update_capa_timer(ocapa, ocapa->c_expiry);
+ }
+ spin_unlock(&capa_lock);
+
+ if (expired)
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
+ return expired;
+}
+
+static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head)
+{
+ struct obd_capa *tmp;
+ struct list_head *before = NULL;
+
+ /* TODO: client capa is sorted by expiry, this could be optimized */
+ list_for_each_entry_reverse(tmp, head, c_list) {
+ if (cfs_time_aftereq(ocapa->c_expiry, tmp->c_expiry)) {
+ before = &tmp->c_list;
+ break;
+ }
+ }
+
+ LASSERT(&ocapa->c_list != before);
+ list_add(&ocapa->c_list, before ?: head);
+}
+
+static inline int obd_capa_open_count(struct obd_capa *oc)
+{
+ struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode);
+ return atomic_read(&lli->lli_open_count);
+}
+
+static void ll_delete_capa(struct obd_capa *ocapa)
+{
+ struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
+
+ if (capa_for_mds(&ocapa->c_capa)) {
+ LASSERT(lli->lli_mds_capa == ocapa);
+ lli->lli_mds_capa = NULL;
+ } else if (capa_for_oss(&ocapa->c_capa)) {
+ list_del_init(&ocapa->u.cli.lli_list);
+ }
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
+ list_del_init(&ocapa->c_list);
+ capa_count[CAPA_SITE_CLIENT]--;
+ /* release the ref when alloc */
+ capa_put(ocapa);
+}
+
+/* three places where client capa is deleted:
+ * 1. capa_thread_main(), main place to delete expired capa.
+ * 2. ll_clear_inode_capas() in ll_clear_inode().
+ * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_setattr_ost().
+ */
+static int capa_thread_main(void *unused)
+{
+ struct obd_capa *ocapa, *tmp, *next;
+ struct inode *inode = NULL;
+ struct l_wait_info lwi = { 0 };
+ int rc;
+ ENTRY;
+
+ thread_set_flags(&ll_capa_thread, SVC_RUNNING);
+ wake_up(&ll_capa_thread.t_ctl_waitq);
+
+ while (1) {
+ l_wait_event(ll_capa_thread.t_ctl_waitq,
+ !thread_is_running(&ll_capa_thread) ||
+ have_expired_capa(),
+ &lwi);
+
+ if (!thread_is_running(&ll_capa_thread))
+ break;
+
+ next = NULL;
+
+ spin_lock(&capa_lock);
+ list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
+ __u64 ibits;
+
+ LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
+
+ if (!capa_is_to_expire(ocapa)) {
+ next = ocapa;
+ break;
+ }
+
+ list_del_init(&ocapa->c_list);
+
+ /* for MDS capability, only renew those which belong to
+ * dir, or its inode is opened, or client holds LOOKUP
+ * lock.
+ */
+ /* ibits may be changed by ll_have_md_lock() so we have
+ * to set it each time */
+ ibits = MDS_INODELOCK_LOOKUP;
+ if (capa_for_mds(&ocapa->c_capa) &&
+ !S_ISDIR(ocapa->u.cli.inode->i_mode) &&
+ obd_capa_open_count(ocapa) == 0 &&
+ !ll_have_md_lock(ocapa->u.cli.inode,
+ &ibits, LCK_MINMODE)) {
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+ "skip renewal for");
+ sort_add_capa(ocapa, &ll_idle_capas);
+ continue;
+ }
+
+ /* for OSS capability, only renew those whose inode is
+ * opened.
+ */
+ if (capa_for_oss(&ocapa->c_capa) &&
+ obd_capa_open_count(ocapa) == 0) {
+ /* oss capa with open count == 0 won't renew,
+ * move to idle list */
+ sort_add_capa(ocapa, &ll_idle_capas);
+ continue;
+ }
+
+ /* NB iput() is in ll_update_capa() */
+ inode = igrab(ocapa->u.cli.inode);
+ if (inode == NULL) {
+ DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
+ "igrab failed for");
+ continue;
+ }
+
+ capa_get(ocapa);
+ ll_capa_renewed++;
+ spin_unlock(&capa_lock);
+ rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
+ ll_update_capa);
+ spin_lock(&capa_lock);
+ if (rc) {
+ DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
+ "renew failed: %d", rc);
+ ll_capa_renewal_failed++;
+ }
+ }
+
+ if (next)
+ update_capa_timer(next, capa_renewal_time(next));
+
+ list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas,
+ c_list) {
+ if (!capa_is_expired(ocapa)) {
+ if (!next)
+ update_capa_timer(ocapa,
+ ocapa->c_expiry);
+ break;
+ }
+
+ if (atomic_read(&ocapa->c_refc) > 1) {
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+ "expired(c_refc %d), don't release",
+ atomic_read(&ocapa->c_refc));
+ /* don't try to renew any more */
+ list_del_init(&ocapa->c_list);
+ continue;
+ }
+
+ /* expired capa is released. */
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
+ ll_delete_capa(ocapa);
+ }
+
+ spin_unlock(&capa_lock);
+ }
+
+ thread_set_flags(&ll_capa_thread, SVC_STOPPED);
+ wake_up(&ll_capa_thread.t_ctl_waitq);
+ RETURN(0);
+}
+
+void ll_capa_timer_callback(unsigned long unused)
+{
+ wake_up(&ll_capa_thread.t_ctl_waitq);
+}
+
+int ll_capa_thread_start(void)
+{
+ task_t *task;
+ ENTRY;
+
+ init_waitqueue_head(&ll_capa_thread.t_ctl_waitq);
+
+ task = kthread_run(capa_thread_main, NULL, "ll_capa");
+ if (IS_ERR(task)) {
+ CERROR("cannot start expired capa thread: rc %ld\n",
+ PTR_ERR(task));
+ RETURN(PTR_ERR(task));
+ }
+ wait_event(ll_capa_thread.t_ctl_waitq,
+ thread_is_running(&ll_capa_thread));
+
+ RETURN(0);
+}
+
+void ll_capa_thread_stop(void)
+{
+ thread_set_flags(&ll_capa_thread, SVC_STOPPING);
+ wake_up(&ll_capa_thread.t_ctl_waitq);
+ wait_event(ll_capa_thread.t_ctl_waitq,
+ thread_is_stopped(&ll_capa_thread));
+}
+
+struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *ocapa;
+ int found = 0;
+
+ ENTRY;
+
+ if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
+ RETURN(NULL);
+
+ LASSERT(opc == CAPA_OPC_OSS_WRITE || opc == CAPA_OPC_OSS_RW ||
+ opc == CAPA_OPC_OSS_TRUNC);
+
+ spin_lock(&capa_lock);
+ list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
+ if (capa_is_expired(ocapa))
+ continue;
+ if ((opc & CAPA_OPC_OSS_WRITE) &&
+ capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) {
+ found = 1;
+ break;
+ } else if ((opc & CAPA_OPC_OSS_READ) &&
+ capa_opc_supported(&ocapa->c_capa,
+ CAPA_OPC_OSS_READ)) {
+ found = 1;
+ break;
+ } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
+ capa_opc_supported(&ocapa->c_capa, opc)) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found) {
+ LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
+ ll_inode2fid(inode)));
+ LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
+
+ capa_get(ocapa);
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
+ } else {
+ ocapa = NULL;
+
+ if (atomic_read(&ll_capa_debug)) {
+ CERROR("no capability for "DFID" opc "LPX64"\n",
+ PFID(&lli->lli_fid), opc);
+ atomic_set(&ll_capa_debug, 0);
+ }
+ }
+ spin_unlock(&capa_lock);
+
+ RETURN(ocapa);
+}
+EXPORT_SYMBOL(ll_osscapa_get);
+
+struct obd_capa *ll_mdscapa_get(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *ocapa;
+ ENTRY;
+
+ LASSERT(inode != NULL);
+
+ if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
+ RETURN(NULL);
+
+ spin_lock(&capa_lock);
+ ocapa = capa_get(lli->lli_mds_capa);
+ spin_unlock(&capa_lock);
+ if (!ocapa && atomic_read(&ll_capa_debug)) {
+ CERROR("no mds capability for "DFID"\n", PFID(&lli->lli_fid));
+ atomic_set(&ll_capa_debug, 0);
+ }
+
+ RETURN(ocapa);
+}
+
+static struct obd_capa *do_add_mds_capa(struct inode *inode,
+ struct obd_capa *ocapa)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *old = lli->lli_mds_capa;
+ struct lustre_capa *capa = &ocapa->c_capa;
+
+ if (!old) {
+ ocapa->u.cli.inode = inode;
+ lli->lli_mds_capa = ocapa;
+ capa_count[CAPA_SITE_CLIENT]++;
+
+ DEBUG_CAPA(D_SEC, capa, "add MDS");
+ } else {
+ spin_lock(&old->c_lock);
+ old->c_capa = *capa;
+ spin_unlock(&old->c_lock);
+
+ DEBUG_CAPA(D_SEC, capa, "update MDS");
+
+ capa_put(ocapa);
+ ocapa = old;
+ }
+ return ocapa;
+}
+
+static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *ocapa;
+
+ /* inside capa_lock */
+ list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
+ if ((capa_opc(&ocapa->c_capa) & opc) != opc)
+ continue;
+
+ LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
+ ll_inode2fid(inode)));
+ LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
+ return ocapa;
+ }
+
+ return NULL;
+}
+
+static inline void inode_add_oss_capa(struct inode *inode,
+ struct obd_capa *ocapa)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *tmp;
+ struct list_head *next = NULL;
+
+ /* capa is sorted in lli_oss_capas so lookup can always find the
+ * latest one */
+ list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
+ if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
+ next = &tmp->u.cli.lli_list;
+ break;
+ }
+ }
+ LASSERT(&ocapa->u.cli.lli_list != next);
+ list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
+}
+
+static struct obd_capa *do_add_oss_capa(struct inode *inode,
+ struct obd_capa *ocapa)
+{
+ struct obd_capa *old;
+ struct lustre_capa *capa = &ocapa->c_capa;
+
+ LASSERTF(S_ISREG(inode->i_mode),
+ "inode has oss capa, but not regular file, mode: %d\n",
+ inode->i_mode);
+
+ /* FIXME: can't replace it so easily with fine-grained opc */
+ old = do_lookup_oss_capa(inode, capa_opc(capa) & CAPA_OPC_OSS_ONLY);
+ if (!old) {
+ ocapa->u.cli.inode = inode;
+ INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
+ capa_count[CAPA_SITE_CLIENT]++;
+
+ DEBUG_CAPA(D_SEC, capa, "add OSS");
+ } else {
+ spin_lock(&old->c_lock);
+ old->c_capa = *capa;
+ spin_unlock(&old->c_lock);
+
+ DEBUG_CAPA(D_SEC, capa, "update OSS");
+
+ capa_put(ocapa);
+ ocapa = old;
+ }
+
+ inode_add_oss_capa(inode, ocapa);
+ return ocapa;
+}
+
+struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
+{
+ spin_lock(&capa_lock);
+ ocapa = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, ocapa) :
+ do_add_oss_capa(inode, ocapa);
+
+ /* truncate capa won't renew */
+ if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
+ set_capa_expiry(ocapa);
+ list_del_init(&ocapa->c_list);
+ sort_add_capa(ocapa, ll_capa_list);
+
+ update_capa_timer(ocapa, capa_renewal_time(ocapa));
+ }
+
+ spin_unlock(&capa_lock);
+
+ atomic_set(&ll_capa_debug, 1);
+ return ocapa;
+}
+
+static inline void delay_capa_renew(struct obd_capa *oc, cfs_time_t delay)
+{
+ /* NB: set a fake expiry for this capa to prevent it renew too soon */
+ oc->c_expiry = cfs_time_add(oc->c_expiry, cfs_time_seconds(delay));
+}
+
+int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
+{
+ struct inode *inode = ocapa->u.cli.inode;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(ocapa);
+
+ if (IS_ERR(capa)) {
+ /* set error code */
+ rc = PTR_ERR(capa);
+ spin_lock(&capa_lock);
+ if (rc == -ENOENT) {
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+ "renewal canceled because object removed");
+ ll_capa_renewal_noent++;
+ } else {
+ ll_capa_renewal_failed++;
+
+ /* failed capa won't be renewed any longer, but if -EIO,
+ * client might be doing recovery, retry in 2 min. */
+ if (rc == -EIO && !capa_is_expired(ocapa)) {
+ delay_capa_renew(ocapa, 120);
+ DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
+ "renewal failed: -EIO, "
+ "retry in 2 mins");
+ ll_capa_renewal_retries++;
+ GOTO(retry, rc);
+ } else {
+ DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
+ "renewal failed(rc: %d) for", rc);
+ }
+ }
+
+ list_del_init(&ocapa->c_list);
+ sort_add_capa(ocapa, &ll_idle_capas);
+ spin_unlock(&capa_lock);
+
+ capa_put(ocapa);
+ iput(inode);
+ RETURN(rc);
+ }
+
+ spin_lock(&ocapa->c_lock);
+ LASSERT(!memcmp(&ocapa->c_capa, capa,
+ offsetof(struct lustre_capa, lc_opc)));
+ ocapa->c_capa = *capa;
+ set_capa_expiry(ocapa);
+ spin_unlock(&ocapa->c_lock);
+
+ spin_lock(&capa_lock);
+ if (capa_for_oss(capa))
+ inode_add_oss_capa(inode, ocapa);
+ DEBUG_CAPA(D_SEC, capa, "renew");
+ EXIT;
+retry:
+ list_del_init(&ocapa->c_list);
+ sort_add_capa(ocapa, ll_capa_list);
+ update_capa_timer(ocapa, capa_renewal_time(ocapa));
+ spin_unlock(&capa_lock);
+
+ capa_put(ocapa);
+ iput(inode);
+ return rc;
+}
+
+void ll_capa_open(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
+ == 0)
+ return;
+
+ if (!S_ISREG(inode->i_mode))
+ return;
+
+ atomic_inc(&lli->lli_open_count);
+}
+
+void ll_capa_close(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
+ == 0)
+ return;
+
+ if (!S_ISREG(inode->i_mode))
+ return;
+
+ atomic_dec(&lli->lli_open_count);
+}
+
+/* delete CAPA_OPC_OSS_TRUNC only */
+void ll_truncate_free_capa(struct obd_capa *ocapa)
+{
+ if (!ocapa)
+ return;
+
+ LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate");
+
+ /* release ref when find */
+ capa_put(ocapa);
+ if (likely(ocapa->c_capa.lc_opc == CAPA_OPC_OSS_TRUNC)) {
+ spin_lock(&capa_lock);
+ ll_delete_capa(ocapa);
+ spin_unlock(&capa_lock);
+ }
+}
+
+void ll_clear_inode_capas(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *ocapa, *tmp;
+
+ spin_lock(&capa_lock);
+ ocapa = lli->lli_mds_capa;
+ if (ocapa)
+ ll_delete_capa(ocapa);
+
+ list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
+ u.cli.lli_list)
+ ll_delete_capa(ocapa);
+ spin_unlock(&capa_lock);
+}
+
+void ll_print_capa_stat(struct ll_sb_info *sbi)
+{
+ if (sbi->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
+ LCONSOLE_INFO("Fid capabilities renewed: %llu\n"
+ "Fid capabilities renewal ENOENT: %llu\n"
+ "Fid capabilities failed to renew: %llu\n"
+ "Fid capabilities renewal retries: %llu\n",
+ ll_capa_renewed, ll_capa_renewal_noent,
+ ll_capa_renewal_failed, ll_capa_renewal_retries);
+}
diff --git a/drivers/staging/lustre/lustre/llite/llite_close.c b/drivers/staging/lustre/lustre/llite/llite_close.c
new file mode 100644
index 000000000000..00b2b38d4c97
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/llite_close.c
@@ -0,0 +1,412 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/llite/llite_close.c
+ *
+ * Lustre Lite routines to issue a secondary close after writeback
+ */
+
+#include <linux/module.h>
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <lustre_lite.h>
+#include "llite_internal.h"
+
+/** records that a write is in flight */
+void vvp_write_pending(struct ccc_object *club, struct ccc_page *page)
+{
+ struct ll_inode_info *lli = ll_i2info(club->cob_inode);
+
+ ENTRY;
+ spin_lock(&lli->lli_lock);
+ lli->lli_flags |= LLIF_SOM_DIRTY;
+ if (page != NULL && list_empty(&page->cpg_pending_linkage))
+ list_add(&page->cpg_pending_linkage,
+ &club->cob_pending_list);
+ spin_unlock(&lli->lli_lock);
+ EXIT;
+}
+
+/** records that a write has completed */
+void vvp_write_complete(struct ccc_object *club, struct ccc_page *page)
+{
+ struct ll_inode_info *lli = ll_i2info(club->cob_inode);
+ int rc = 0;
+
+ ENTRY;
+ spin_lock(&lli->lli_lock);
+ if (page != NULL && !list_empty(&page->cpg_pending_linkage)) {
+ list_del_init(&page->cpg_pending_linkage);
+ rc = 1;
+ }
+ spin_unlock(&lli->lli_lock);
+ if (rc)
+ ll_queue_done_writing(club->cob_inode, 0);
+ EXIT;
+}
+
+/** Queues DONE_WRITING if
+ * - done writing is allowed;
+ * - inode has no no dirty pages; */
+void ll_queue_done_writing(struct inode *inode, unsigned long flags)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob);
+ ENTRY;
+
+ spin_lock(&lli->lli_lock);
+ lli->lli_flags |= flags;
+
+ if ((lli->lli_flags & LLIF_DONE_WRITING) &&
+ list_empty(&club->cob_pending_list)) {
+ struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq;
+
+ if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
+ CWARN("ino %lu/%u(flags %u) som valid it just after "
+ "recovery\n",
+ inode->i_ino, inode->i_generation,
+ lli->lli_flags);
+ /* DONE_WRITING is allowed and inode has no dirty page. */
+ spin_lock(&lcq->lcq_lock);
+
+ LASSERT(list_empty(&lli->lli_close_list));
+ CDEBUG(D_INODE, "adding inode %lu/%u to close list\n",
+ inode->i_ino, inode->i_generation);
+ list_add_tail(&lli->lli_close_list, &lcq->lcq_head);
+
+ /* Avoid a concurrent insertion into the close thread queue:
+ * an inode is already in the close thread, open(), write(),
+ * close() happen, epoch is closed as the inode is marked as
+ * LLIF_EPOCH_PENDING. When pages are written inode should not
+ * be inserted into the queue again, clear this flag to avoid
+ * it. */
+ lli->lli_flags &= ~LLIF_DONE_WRITING;
+
+ wake_up(&lcq->lcq_waitq);
+ spin_unlock(&lcq->lcq_lock);
+ }
+ spin_unlock(&lli->lli_lock);
+ EXIT;
+}
+
+/** Pack SOM attributes info @opdata for CLOSE, DONE_WRITING rpc. */
+void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ ENTRY;
+
+ op_data->op_flags |= MF_SOM_CHANGE;
+ /* Check if Size-on-MDS attributes are valid. */
+ if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
+ CERROR("ino %lu/%u(flags %u) som valid it just after "
+ "recovery\n", inode->i_ino, inode->i_generation,
+ lli->lli_flags);
+
+ if (!cl_local_size(inode)) {
+ /* Send Size-on-MDS Attributes if valid. */
+ op_data->op_attr.ia_valid |= ATTR_MTIME_SET | ATTR_CTIME_SET |
+ ATTR_ATIME_SET | ATTR_SIZE | ATTR_BLOCKS;
+ }
+ EXIT;
+}
+
+/** Closes ioepoch and packs Size-on-MDS attribute if needed into @op_data. */
+void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
+ struct obd_client_handle **och, unsigned long flags)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob);
+ ENTRY;
+
+ spin_lock(&lli->lli_lock);
+ if (!(list_empty(&club->cob_pending_list))) {
+ if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) {
+ LASSERT(*och != NULL);
+ LASSERT(lli->lli_pending_och == NULL);
+ /* Inode is dirty and there is no pending write done
+ * request yet, DONE_WRITE is to be sent later. */
+ lli->lli_flags |= LLIF_EPOCH_PENDING;
+ lli->lli_pending_och = *och;
+ spin_unlock(&lli->lli_lock);
+
+ inode = igrab(inode);
+ LASSERT(inode);
+ GOTO(out, 0);
+ }
+ if (flags & LLIF_DONE_WRITING) {
+ /* Some pages are still dirty, it is early to send
+ * DONE_WRITE. Wait untill all pages will be flushed
+ * and try DONE_WRITE again later. */
+ LASSERT(!(lli->lli_flags & LLIF_DONE_WRITING));
+ lli->lli_flags |= LLIF_DONE_WRITING;
+ spin_unlock(&lli->lli_lock);
+
+ inode = igrab(inode);
+ LASSERT(inode);
+ GOTO(out, 0);
+ }
+ }
+ CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID"\n",
+ ll_i2info(inode)->lli_ioepoch, PFID(&lli->lli_fid));
+ op_data->op_flags |= MF_EPOCH_CLOSE;
+
+ if (flags & LLIF_DONE_WRITING) {
+ LASSERT(lli->lli_flags & LLIF_SOM_DIRTY);
+ LASSERT(!(lli->lli_flags & LLIF_DONE_WRITING));
+ *och = lli->lli_pending_och;
+ lli->lli_pending_och = NULL;
+ lli->lli_flags &= ~LLIF_EPOCH_PENDING;
+ } else {
+ /* Pack Size-on-MDS inode attributes only if they has changed */
+ if (!(lli->lli_flags & LLIF_SOM_DIRTY)) {
+ spin_unlock(&lli->lli_lock);
+ GOTO(out, 0);
+ }
+
+ /* There is a pending DONE_WRITE -- close epoch with no
+ * attribute change. */
+ if (lli->lli_flags & LLIF_EPOCH_PENDING) {
+ spin_unlock(&lli->lli_lock);
+ GOTO(out, 0);
+ }
+ }
+
+ LASSERT(list_empty(&club->cob_pending_list));
+ lli->lli_flags &= ~LLIF_SOM_DIRTY;
+ spin_unlock(&lli->lli_lock);
+ ll_done_writing_attr(inode, op_data);
+
+ EXIT;
+out:
+ return;
+}
+
+/**
+ * Cliens updates SOM attributes on MDS (including llog cookies):
+ * obd_getattr with no lock and md_setattr.
+ */
+int ll_som_update(struct inode *inode, struct md_op_data *op_data)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ptlrpc_request *request = NULL;
+ __u32 old_flags;
+ struct obdo *oa;
+ int rc;
+ ENTRY;
+
+ LASSERT(op_data != NULL);
+ if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
+ CERROR("ino %lu/%u(flags %u) som valid it just after "
+ "recovery\n", inode->i_ino, inode->i_generation,
+ lli->lli_flags);
+
+ OBDO_ALLOC(oa);
+ if (!oa) {
+ CERROR("can't allocate memory for Size-on-MDS update.\n");
+ RETURN(-ENOMEM);
+ }
+
+ old_flags = op_data->op_flags;
+ op_data->op_flags = MF_SOM_CHANGE;
+
+ /* If inode is already in another epoch, skip getattr from OSTs. */
+ if (lli->lli_ioepoch == op_data->op_ioepoch) {
+ rc = ll_inode_getattr(inode, oa, op_data->op_ioepoch,
+ old_flags & MF_GETATTR_LOCK);
+ if (rc) {
+ oa->o_valid = 0;
+ if (rc != -ENOENT)
+ CERROR("inode_getattr failed (%d): unable to "
+ "send a Size-on-MDS attribute update "
+ "for inode %lu/%u\n", rc, inode->i_ino,
+ inode->i_generation);
+ } else {
+ CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n",
+ PFID(&lli->lli_fid));
+ }
+ /* Install attributes into op_data. */
+ md_from_obdo(op_data, oa, oa->o_valid);
+ }
+
+ rc = md_setattr(ll_i2sbi(inode)->ll_md_exp, op_data,
+ NULL, 0, NULL, 0, &request, NULL);
+ ptlrpc_req_finished(request);
+
+ OBDO_FREE(oa);
+ RETURN(rc);
+}
+
+/**
+ * Closes the ioepoch and packs all the attributes into @op_data for
+ * DONE_WRITING rpc.
+ */
+static void ll_prepare_done_writing(struct inode *inode,
+ struct md_op_data *op_data,
+ struct obd_client_handle **och)
+{
+ ll_ioepoch_close(inode, op_data, och, LLIF_DONE_WRITING);
+ /* If there is no @och, we do not do D_W yet. */
+ if (*och == NULL)
+ return;
+
+ ll_pack_inode2opdata(inode, op_data, &(*och)->och_fh);
+ ll_prep_md_op_data(op_data, inode, NULL, NULL,
+ 0, 0, LUSTRE_OPC_ANY, NULL);
+}
+
+/** Send a DONE_WRITING rpc. */
+static void ll_done_writing(struct inode *inode)
+{
+ struct obd_client_handle *och = NULL;
+ struct md_op_data *op_data;
+ int rc;
+ ENTRY;
+
+ LASSERT(exp_connect_som(ll_i2mdexp(inode)));
+
+ OBD_ALLOC_PTR(op_data);
+ if (op_data == NULL) {
+ CERROR("can't allocate op_data\n");
+ EXIT;
+ return;
+ }
+
+ ll_prepare_done_writing(inode, op_data, &och);
+ /* If there is no @och, we do not do D_W yet. */
+ if (och == NULL)
+ GOTO(out, 0);
+
+ rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL);
+ if (rc == -EAGAIN) {
+ /* MDS has instructed us to obtain Size-on-MDS attribute from
+ * OSTs and send setattr to back to MDS. */
+ rc = ll_som_update(inode, op_data);
+ } else if (rc) {
+ CERROR("inode %lu mdc done_writing failed: rc = %d\n",
+ inode->i_ino, rc);
+ }
+out:
+ ll_finish_md_op_data(op_data);
+ if (och) {
+ md_clear_open_replay_data(ll_i2sbi(inode)->ll_md_exp, och);
+ OBD_FREE_PTR(och);
+ }
+ EXIT;
+}
+
+static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq)
+{
+ struct ll_inode_info *lli = NULL;
+
+ spin_lock(&lcq->lcq_lock);
+
+ if (!list_empty(&lcq->lcq_head)) {
+ lli = list_entry(lcq->lcq_head.next, struct ll_inode_info,
+ lli_close_list);
+ list_del_init(&lli->lli_close_list);
+ } else if (atomic_read(&lcq->lcq_stop))
+ lli = ERR_PTR(-EALREADY);
+
+ spin_unlock(&lcq->lcq_lock);
+ return lli;
+}
+
+static int ll_close_thread(void *arg)
+{
+ struct ll_close_queue *lcq = arg;
+ ENTRY;
+
+ complete(&lcq->lcq_comp);
+
+ while (1) {
+ struct l_wait_info lwi = { 0 };
+ struct ll_inode_info *lli;
+ struct inode *inode;
+
+ l_wait_event_exclusive(lcq->lcq_waitq,
+ (lli = ll_close_next_lli(lcq)) != NULL,
+ &lwi);
+ if (IS_ERR(lli))
+ break;
+
+ inode = ll_info2i(lli);
+ CDEBUG(D_INFO, "done_writting for inode %lu/%u\n",
+ inode->i_ino, inode->i_generation);
+ ll_done_writing(inode);
+ iput(inode);
+ }
+
+ CDEBUG(D_INFO, "ll_close exiting\n");
+ complete(&lcq->lcq_comp);
+ RETURN(0);
+}
+
+int ll_close_thread_start(struct ll_close_queue **lcq_ret)
+{
+ struct ll_close_queue *lcq;
+ task_t *task;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CLOSE_THREAD))
+ return -EINTR;
+
+ OBD_ALLOC(lcq, sizeof(*lcq));
+ if (lcq == NULL)
+ return -ENOMEM;
+
+ spin_lock_init(&lcq->lcq_lock);
+ INIT_LIST_HEAD(&lcq->lcq_head);
+ init_waitqueue_head(&lcq->lcq_waitq);
+ init_completion(&lcq->lcq_comp);
+
+ task = kthread_run(ll_close_thread, lcq, "ll_close");
+ if (IS_ERR(task)) {
+ OBD_FREE(lcq, sizeof(*lcq));
+ return PTR_ERR(task);
+ }
+
+ wait_for_completion(&lcq->lcq_comp);
+ *lcq_ret = lcq;
+ return 0;
+}
+
+void ll_close_thread_shutdown(struct ll_close_queue *lcq)
+{
+ init_completion(&lcq->lcq_comp);
+ atomic_inc(&lcq->lcq_stop);
+ wake_up(&lcq->lcq_waitq);
+ wait_for_completion(&lcq->lcq_comp);
+ OBD_FREE(lcq, sizeof(*lcq));
+}
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
new file mode 100644
index 000000000000..992cd203ca1a
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -0,0 +1,1576 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef LLITE_INTERNAL_H
+#define LLITE_INTERNAL_H
+#include <lustre_debug.h>
+#include <lustre_ver.h>
+#include <lustre_disk.h> /* for s2sbi */
+#include <lustre_eacl.h>
+
+/* for struct cl_lock_descr and struct cl_io */
+#include <cl_object.h>
+#include <lclient.h>
+#include <lustre_mdc.h>
+#include <linux/lustre_intent.h>
+
+#ifndef FMODE_EXEC
+#define FMODE_EXEC 0
+#endif
+
+#ifndef VM_FAULT_RETRY
+#define VM_FAULT_RETRY 0
+#endif
+
+/* Kernel 3.1 kills LOOKUP_CONTINUE, LOOKUP_PARENT is equivalent to it.
+ * seem kernel commit 49084c3bb2055c401f3493c13edae14d49128ca0 */
+#ifndef LOOKUP_CONTINUE
+#define LOOKUP_CONTINUE LOOKUP_PARENT
+#endif
+
+/** Only used on client-side for indicating the tail of dir hash/offset. */
+#define LL_DIR_END_OFF 0x7fffffffffffffffULL
+#define LL_DIR_END_OFF_32BIT 0x7fffffffUL
+
+#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
+#define LUSTRE_FPRIVATE(file) ((file)->private_data)
+
+struct ll_dentry_data {
+ int lld_cwd_count;
+ int lld_mnt_count;
+ struct obd_client_handle lld_cwd_och;
+ struct obd_client_handle lld_mnt_och;
+ struct lookup_intent *lld_it;
+ unsigned int lld_sa_generation;
+ unsigned int lld_invalid:1;
+ struct rcu_head lld_rcu_head;
+};
+
+#define ll_d2d(de) ((struct ll_dentry_data*)((de)->d_fsdata))
+
+extern struct file_operations ll_pgcache_seq_fops;
+
+#define LLI_INODE_MAGIC 0x111d0de5
+#define LLI_INODE_DEAD 0xdeadd00d
+
+/* remote client permission cache */
+#define REMOTE_PERM_HASHSIZE 16
+
+struct ll_getname_data {
+ char *lgd_name; /* points to a buffer with NAME_MAX+1 size */
+ struct lu_fid lgd_fid; /* target fid we are looking for */
+ int lgd_found; /* inode matched? */
+};
+
+/* llite setxid/access permission for user on remote client */
+struct ll_remote_perm {
+ struct hlist_node lrp_list;
+ uid_t lrp_uid;
+ gid_t lrp_gid;
+ uid_t lrp_fsuid;
+ gid_t lrp_fsgid;
+ int lrp_access_perm; /* MAY_READ/WRITE/EXEC, this
+ is access permission with
+ lrp_fsuid/lrp_fsgid. */
+};
+
+enum lli_flags {
+ /* MDS has an authority for the Size-on-MDS attributes. */
+ LLIF_MDS_SIZE_LOCK = (1 << 0),
+ /* Epoch close is postponed. */
+ LLIF_EPOCH_PENDING = (1 << 1),
+ /* DONE WRITING is allowed. */
+ LLIF_DONE_WRITING = (1 << 2),
+ /* Sizeon-on-MDS attributes are changed. An attribute update needs to
+ * be sent to MDS. */
+ LLIF_SOM_DIRTY = (1 << 3),
+ /* File is contented */
+ LLIF_CONTENDED = (1 << 4),
+ /* Truncate uses server lock for this file */
+ LLIF_SRVLOCK = (1 << 5),
+ /* File data is modified. */
+ LLIF_DATA_MODIFIED = (1 << 6),
+};
+
+struct ll_inode_info {
+ __u32 lli_inode_magic;
+ __u32 lli_flags;
+ __u64 lli_ioepoch;
+
+ spinlock_t lli_lock;
+ struct posix_acl *lli_posix_acl;
+
+ struct hlist_head *lli_remote_perms;
+ struct mutex lli_rmtperm_mutex;
+
+ /* identifying fields for both metadata and data stacks. */
+ struct lu_fid lli_fid;
+ /* Parent fid for accessing default stripe data on parent directory
+ * for allocating OST objects after a mknod() and later open-by-FID. */
+ struct lu_fid lli_pfid;
+
+ struct list_head lli_close_list;
+ struct list_head lli_oss_capas;
+ /* open count currently used by capability only, indicate whether
+ * capability needs renewal */
+ atomic_t lli_open_count;
+ struct obd_capa *lli_mds_capa;
+ cfs_time_t lli_rmtperm_time;
+
+ /* handle is to be sent to MDS later on done_writing and setattr.
+ * Open handle data are needed for the recovery to reconstruct
+ * the inode state on the MDS. XXX: recovery is not ready yet. */
+ struct obd_client_handle *lli_pending_och;
+
+ /* We need all three because every inode may be opened in different
+ * modes */
+ struct obd_client_handle *lli_mds_read_och;
+ struct obd_client_handle *lli_mds_write_och;
+ struct obd_client_handle *lli_mds_exec_och;
+ __u64 lli_open_fd_read_count;
+ __u64 lli_open_fd_write_count;
+ __u64 lli_open_fd_exec_count;
+ /* Protects access to och pointers and their usage counters */
+ struct mutex lli_och_mutex;
+
+ struct inode lli_vfs_inode;
+
+ /* the most recent timestamps obtained from mds */
+ struct ost_lvb lli_lvb;
+ spinlock_t lli_agl_lock;
+
+ /* Try to make the d::member and f::member are aligned. Before using
+ * these members, make clear whether it is directory or not. */
+ union {
+ /* for directory */
+ struct {
+ /* serialize normal readdir and statahead-readdir. */
+ struct mutex d_readdir_mutex;
+
+ /* metadata statahead */
+ /* since parent-child threads can share the same @file
+ * struct, "opendir_key" is the token when dir close for
+ * case of parent exit before child -- it is me should
+ * cleanup the dir readahead. */
+ void *d_opendir_key;
+ struct ll_statahead_info *d_sai;
+ struct posix_acl *d_def_acl;
+ /* protect statahead stuff. */
+ spinlock_t d_sa_lock;
+ /* "opendir_pid" is the token when lookup/revalid
+ * -- I am the owner of dir statahead. */
+ pid_t d_opendir_pid;
+ } d;
+
+#define lli_readdir_mutex u.d.d_readdir_mutex
+#define lli_opendir_key u.d.d_opendir_key
+#define lli_sai u.d.d_sai
+#define lli_def_acl u.d.d_def_acl
+#define lli_sa_lock u.d.d_sa_lock
+#define lli_opendir_pid u.d.d_opendir_pid
+
+ /* for non-directory */
+ struct {
+ struct semaphore f_size_sem;
+ void *f_size_sem_owner;
+ char *f_symlink_name;
+ __u64 f_maxbytes;
+ /*
+ * struct rw_semaphore {
+ * signed long count; // align d.d_def_acl
+ * spinlock_t wait_lock; // align d.d_sa_lock
+ * struct list_head wait_list;
+ * }
+ */
+ struct rw_semaphore f_trunc_sem;
+ struct mutex f_write_mutex;
+
+ struct rw_semaphore f_glimpse_sem;
+ cfs_time_t f_glimpse_time;
+ struct list_head f_agl_list;
+ __u64 f_agl_index;
+
+ /* for writepage() only to communicate to fsync */
+ int f_async_rc;
+
+ /* volatile file criteria is based on file name, this
+ * flag is used to keep the test result, so the strcmp
+ * is done only once
+ */
+ bool f_volatile;
+ /*
+ * whenever a process try to read/write the file, the
+ * jobid of the process will be saved here, and it'll
+ * be packed into the write PRC when flush later.
+ *
+ * so the read/write statistics for jobid will not be
+ * accurate if the file is shared by different jobs.
+ */
+ char f_jobid[JOBSTATS_JOBID_SIZE];
+ } f;
+
+#define lli_size_sem u.f.f_size_sem
+#define lli_size_sem_owner u.f.f_size_sem_owner
+#define lli_symlink_name u.f.f_symlink_name
+#define lli_maxbytes u.f.f_maxbytes
+#define lli_trunc_sem u.f.f_trunc_sem
+#define lli_write_mutex u.f.f_write_mutex
+#define lli_glimpse_sem u.f.f_glimpse_sem
+#define lli_glimpse_time u.f.f_glimpse_time
+#define lli_agl_list u.f.f_agl_list
+#define lli_agl_index u.f.f_agl_index
+#define lli_async_rc u.f.f_async_rc
+#define lli_jobid u.f.f_jobid
+#define lli_volatile u.f.f_volatile
+
+ } u;
+
+ /* XXX: For following frequent used members, although they maybe special
+ * used for non-directory object, it is some time-wasting to check
+ * whether the object is directory or not before using them. On the
+ * other hand, currently, sizeof(f) > sizeof(d), it cannot reduce
+ * the "ll_inode_info" size even if moving those members into u.f.
+ * So keep them out side.
+ *
+ * In the future, if more members are added only for directory,
+ * some of the following members can be moved into u.f.
+ */
+ bool lli_has_smd;
+ struct cl_object *lli_clob;
+
+ /* mutex to request for layout lock exclusively. */
+ struct mutex lli_layout_mutex;
+ /* valid only inside LAYOUT ibits lock, protected by lli_layout_mutex */
+ __u32 lli_layout_gen;
+};
+
+/*
+ * Locking to guarantee consistency of non-atomic updates to long long i_size,
+ * consistency between file size and KMS.
+ *
+ * Implemented by ->lli_size_sem and ->lsm_lock, nested in that order.
+ */
+
+void ll_inode_size_lock(struct inode *inode);
+void ll_inode_size_unlock(struct inode *inode);
+
+// FIXME: replace the name of this with LL_I to conform to kernel stuff
+// static inline struct ll_inode_info *LL_I(struct inode *inode)
+static inline struct ll_inode_info *ll_i2info(struct inode *inode)
+{
+ return container_of(inode, struct ll_inode_info, lli_vfs_inode);
+}
+
+/* default to about 40meg of readahead on a given system. That much tied
+ * up in 512k readahead requests serviced at 40ms each is about 1GB/s. */
+#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_CACHE_SHIFT))
+
+/* default to read-ahead full files smaller than 2MB on the second read */
+#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_CACHE_SHIFT))
+
+enum ra_stat {
+ RA_STAT_HIT = 0,
+ RA_STAT_MISS,
+ RA_STAT_DISTANT_READPAGE,
+ RA_STAT_MISS_IN_WINDOW,
+ RA_STAT_FAILED_GRAB_PAGE,
+ RA_STAT_FAILED_MATCH,
+ RA_STAT_DISCARDED,
+ RA_STAT_ZERO_LEN,
+ RA_STAT_ZERO_WINDOW,
+ RA_STAT_EOF,
+ RA_STAT_MAX_IN_FLIGHT,
+ RA_STAT_WRONG_GRAB_PAGE,
+ _NR_RA_STAT,
+};
+
+struct ll_ra_info {
+ atomic_t ra_cur_pages;
+ unsigned long ra_max_pages;
+ unsigned long ra_max_pages_per_file;
+ unsigned long ra_max_read_ahead_whole_pages;
+};
+
+/* ra_io_arg will be filled in the beginning of ll_readahead with
+ * ras_lock, then the following ll_read_ahead_pages will read RA
+ * pages according to this arg, all the items in this structure are
+ * counted by page index.
+ */
+struct ra_io_arg {
+ unsigned long ria_start; /* start offset of read-ahead*/
+ unsigned long ria_end; /* end offset of read-ahead*/
+ /* If stride read pattern is detected, ria_stoff means where
+ * stride read is started. Note: for normal read-ahead, the
+ * value here is meaningless, and also it will not be accessed*/
+ pgoff_t ria_stoff;
+ /* ria_length and ria_pages are the length and pages length in the
+ * stride I/O mode. And they will also be used to check whether
+ * it is stride I/O read-ahead in the read-ahead pages*/
+ unsigned long ria_length;
+ unsigned long ria_pages;
+};
+
+/* LL_HIST_MAX=32 causes an overflow */
+#define LL_HIST_MAX 28
+#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */
+#define LL_PROCESS_HIST_MAX 10
+struct per_process_info {
+ pid_t pid;
+ struct obd_histogram pp_r_hist;
+ struct obd_histogram pp_w_hist;
+};
+
+/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */
+struct ll_rw_extents_info {
+ struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1];
+};
+
+#define LL_OFFSET_HIST_MAX 100
+struct ll_rw_process_info {
+ pid_t rw_pid;
+ int rw_op;
+ loff_t rw_range_start;
+ loff_t rw_range_end;
+ loff_t rw_last_file_pos;
+ loff_t rw_offset;
+ size_t rw_smallest_extent;
+ size_t rw_largest_extent;
+ struct ll_file_data *rw_last_file;
+};
+
+enum stats_track_type {
+ STATS_TRACK_ALL = 0, /* track all processes */
+ STATS_TRACK_PID, /* track process with this pid */
+ STATS_TRACK_PPID, /* track processes with this ppid */
+ STATS_TRACK_GID, /* track processes with this gid */
+ STATS_TRACK_LAST,
+};
+
+/* flags for sbi->ll_flags */
+#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */
+#define LL_SBI_CHECKSUM 0x02 /* checksum each page as it's written */
+#define LL_SBI_FLOCK 0x04
+#define LL_SBI_USER_XATTR 0x08 /* support user xattr */
+#define LL_SBI_ACL 0x10 /* support ACL */
+#define LL_SBI_RMT_CLIENT 0x40 /* remote client */
+#define LL_SBI_MDS_CAPA 0x80 /* support mds capa */
+#define LL_SBI_OSS_CAPA 0x100 /* support oss capa */
+#define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */
+#define LL_SBI_LRU_RESIZE 0x400 /* lru resize support */
+#define LL_SBI_LAZYSTATFS 0x800 /* lazystatfs mount option */
+#define LL_SBI_SOM_PREVIEW 0x1000 /* SOM preview mount option */
+#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */
+#define LL_SBI_64BIT_HASH 0x4000 /* support 64-bits dir hash/offset */
+#define LL_SBI_AGL_ENABLED 0x8000 /* enable agl */
+#define LL_SBI_VERBOSE 0x10000 /* verbose mount/umount */
+#define LL_SBI_LAYOUT_LOCK 0x20000 /* layout lock support */
+#define LL_SBI_USER_FID2PATH 0x40000 /* allow fid2path by unprivileged users */
+
+#define LL_SBI_FLAGS { \
+ "nolck", \
+ "checksum", \
+ "flock", \
+ "xattr", \
+ "acl", \
+ "rmt_client", \
+ "mds_capa", \
+ "oss_capa", \
+ "flock", \
+ "lru_resize", \
+ "lazy_statfs", \
+ "som", \
+ "32bit_api", \
+ "64bit_hash", \
+ "agl", \
+ "verbose", \
+ "layout", \
+ "user_fid2path" }
+
+/* default value for ll_sb_info->contention_time */
+#define SBI_DEFAULT_CONTENTION_SECONDS 60
+/* default value for lockless_truncate_enable */
+#define SBI_DEFAULT_LOCKLESS_TRUNCATE_ENABLE 1
+#define RCE_HASHES 32
+
+struct rmtacl_ctl_entry {
+ struct list_head rce_list;
+ pid_t rce_key; /* hash key */
+ int rce_ops; /* acl operation type */
+};
+
+struct rmtacl_ctl_table {
+ spinlock_t rct_lock;
+ struct list_head rct_entries[RCE_HASHES];
+};
+
+#define EE_HASHES 32
+
+struct eacl_entry {
+ struct list_head ee_list;
+ pid_t ee_key; /* hash key */
+ struct lu_fid ee_fid;
+ int ee_type; /* ACL type for ACCESS or DEFAULT */
+ ext_acl_xattr_header *ee_acl;
+};
+
+struct eacl_table {
+ spinlock_t et_lock;
+ struct list_head et_entries[EE_HASHES];
+};
+
+struct ll_sb_info {
+ struct list_head ll_list;
+ /* this protects pglist and ra_info. It isn't safe to
+ * grab from interrupt contexts */
+ spinlock_t ll_lock;
+ spinlock_t ll_pp_extent_lock; /* pp_extent entry*/
+ spinlock_t ll_process_lock; /* ll_rw_process_info */
+ struct obd_uuid ll_sb_uuid;
+ struct obd_export *ll_md_exp;
+ struct obd_export *ll_dt_exp;
+ struct proc_dir_entry* ll_proc_root;
+ struct lu_fid ll_root_fid; /* root object fid */
+
+ int ll_flags;
+ int ll_umounting:1;
+ struct list_head ll_conn_chain; /* per-conn chain of SBs */
+ struct lustre_client_ocd ll_lco;
+
+ struct list_head ll_orphan_dentry_list; /*please don't ask -p*/
+ struct ll_close_queue *ll_lcq;
+
+ struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
+
+ struct cl_client_cache ll_cache;
+
+ struct lprocfs_stats *ll_ra_stats;
+
+ struct ll_ra_info ll_ra_info;
+ unsigned int ll_namelen;
+ struct file_operations *ll_fop;
+
+ /* =0 - hold lock over whole read/write
+ * >0 - max. chunk to be read/written w/o lock re-acquiring */
+ unsigned long ll_max_rw_chunk;
+ unsigned int ll_md_brw_size; /* used by readdir */
+
+ struct lu_site *ll_site;
+ struct cl_device *ll_cl;
+ /* Statistics */
+ struct ll_rw_extents_info ll_rw_extents_info;
+ int ll_extent_process_count;
+ struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX];
+ unsigned int ll_offset_process_count;
+ struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX];
+ unsigned int ll_rw_offset_entry_count;
+ int ll_stats_track_id;
+ enum stats_track_type ll_stats_track_type;
+ int ll_rw_stats_on;
+
+ /* metadata stat-ahead */
+ unsigned int ll_sa_max; /* max statahead RPCs */
+ atomic_t ll_sa_total; /* statahead thread started
+ * count */
+ atomic_t ll_sa_wrong; /* statahead thread stopped for
+ * low hit ratio */
+ atomic_t ll_agl_total; /* AGL thread started count */
+
+ dev_t ll_sdev_orig; /* save s_dev before assign for
+ * clustred nfs */
+ struct rmtacl_ctl_table ll_rct;
+ struct eacl_table ll_et;
+};
+
+#define LL_DEFAULT_MAX_RW_CHUNK (32 * 1024 * 1024)
+
+struct ll_ra_read {
+ pgoff_t lrr_start;
+ pgoff_t lrr_count;
+ struct task_struct *lrr_reader;
+ struct list_head lrr_linkage;
+};
+
+/*
+ * per file-descriptor read-ahead data.
+ */
+struct ll_readahead_state {
+ spinlock_t ras_lock;
+ /*
+ * index of the last page that read(2) needed and that wasn't in the
+ * cache. Used by ras_update() to detect seeks.
+ *
+ * XXX nikita: if access seeks into cached region, Lustre doesn't see
+ * this.
+ */
+ unsigned long ras_last_readpage;
+ /*
+ * number of pages read after last read-ahead window reset. As window
+ * is reset on each seek, this is effectively a number of consecutive
+ * accesses. Maybe ->ras_accessed_in_window is better name.
+ *
+ * XXX nikita: window is also reset (by ras_update()) when Lustre
+ * believes that memory pressure evicts read-ahead pages. In that
+ * case, it probably doesn't make sense to expand window to
+ * PTLRPC_MAX_BRW_PAGES on the third access.
+ */
+ unsigned long ras_consecutive_pages;
+ /*
+ * number of read requests after the last read-ahead window reset
+ * As window is reset on each seek, this is effectively the number
+ * on consecutive read request and is used to trigger read-ahead.
+ */
+ unsigned long ras_consecutive_requests;
+ /*
+ * Parameters of current read-ahead window. Handled by
+ * ras_update(). On the initial access to the file or after a seek,
+ * window is reset to 0. After 3 consecutive accesses, window is
+ * expanded to PTLRPC_MAX_BRW_PAGES. Afterwards, window is enlarged by
+ * PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages.
+ */
+ unsigned long ras_window_start, ras_window_len;
+ /*
+ * Where next read-ahead should start at. This lies within read-ahead
+ * window. Read-ahead window is read in pieces rather than at once
+ * because: 1. lustre limits total number of pages under read-ahead by
+ * ->ra_max_pages (see ll_ra_count_get()), 2. client cannot read pages
+ * not covered by DLM lock.
+ */
+ unsigned long ras_next_readahead;
+ /*
+ * Total number of ll_file_read requests issued, reads originating
+ * due to mmap are not counted in this total. This value is used to
+ * trigger full file read-ahead after multiple reads to a small file.
+ */
+ unsigned long ras_requests;
+ /*
+ * Page index with respect to the current request, these value
+ * will not be accurate when dealing with reads issued via mmap.
+ */
+ unsigned long ras_request_index;
+ /*
+ * list of struct ll_ra_read's one per read(2) call current in
+ * progress against this file descriptor. Used by read-ahead code,
+ * protected by ->ras_lock.
+ */
+ struct list_head ras_read_beads;
+ /*
+ * The following 3 items are used for detecting the stride I/O
+ * mode.
+ * In stride I/O mode,
+ * ...............|-----data-----|****gap*****|--------|******|....
+ * offset |-stride_pages-|-stride_gap-|
+ * ras_stride_offset = offset;
+ * ras_stride_length = stride_pages + stride_gap;
+ * ras_stride_pages = stride_pages;
+ * Note: all these three items are counted by pages.
+ */
+ unsigned long ras_stride_length;
+ unsigned long ras_stride_pages;
+ pgoff_t ras_stride_offset;
+ /*
+ * number of consecutive stride request count, and it is similar as
+ * ras_consecutive_requests, but used for stride I/O mode.
+ * Note: only more than 2 consecutive stride request are detected,
+ * stride read-ahead will be enable
+ */
+ unsigned long ras_consecutive_stride_requests;
+};
+
+extern struct kmem_cache *ll_file_data_slab;
+struct lustre_handle;
+struct ll_file_data {
+ struct ll_readahead_state fd_ras;
+ int fd_omode;
+ struct ccc_grouplock fd_grouplock;
+ __u64 lfd_pos;
+ __u32 fd_flags;
+ struct file *fd_file;
+ /* Indicate whether need to report failure when close.
+ * true: failure is known, not report again.
+ * false: unknown failure, should report. */
+ bool fd_write_failed;
+};
+
+struct lov_stripe_md;
+
+extern spinlock_t inode_lock;
+
+extern struct proc_dir_entry *proc_lustre_fs_root;
+
+static inline struct inode *ll_info2i(struct ll_inode_info *lli)
+{
+ return &lli->lli_vfs_inode;
+}
+
+struct it_cb_data {
+ struct inode *icbd_parent;
+ struct dentry **icbd_childp;
+ obd_id hash;
+};
+
+__u32 ll_i2suppgid(struct inode *i);
+void ll_i2gids(__u32 *suppgids, struct inode *i1,struct inode *i2);
+
+static inline int ll_need_32bit_api(struct ll_sb_info *sbi)
+{
+#if BITS_PER_LONG == 32
+ return 1;
+#else
+ return unlikely(current_is_32bit() || (sbi->ll_flags & LL_SBI_32BIT_API));
+#endif
+}
+
+#define LLAP_MAGIC 98764321
+
+extern struct kmem_cache *ll_async_page_slab;
+extern size_t ll_async_page_slab_size;
+
+void ll_ra_read_in(struct file *f, struct ll_ra_read *rar);
+void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar);
+struct ll_ra_read *ll_ra_read_get(struct file *f);
+
+/* llite/lproc_llite.c */
+#ifdef LPROCFS
+int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+ struct super_block *sb, char *osc, char *mdc);
+void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
+void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count);
+void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars);
+#else
+static inline int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+ struct super_block *sb, char *osc, char *mdc){return 0;}
+static inline void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) {}
+static void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count) {}
+static void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
+{
+ memset(lvars, 0, sizeof(*lvars));
+}
+#endif
+
+
+/* llite/dir.c */
+void ll_release_page(struct page *page, int remove);
+extern struct file_operations ll_dir_operations;
+extern struct inode_operations ll_dir_inode_operations;
+struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
+ struct ll_dir_chain *chain);
+int ll_dir_read(struct inode *inode, __u64 *_pos, void *cookie,
+ filldir_t filldir);
+
+int ll_get_mdt_idx(struct inode *inode);
+/* llite/namei.c */
+int ll_objects_destroy(struct ptlrpc_request *request,
+ struct inode *dir);
+struct inode *ll_iget(struct super_block *sb, ino_t hash,
+ struct lustre_md *lic);
+int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
+ void *data, int flag);
+struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
+int ll_rmdir_entry(struct inode *dir, char *name, int namelen);
+
+/* llite/rw.c */
+int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
+int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
+int ll_writepage(struct page *page, struct writeback_control *wbc);
+int ll_writepages(struct address_space *, struct writeback_control *wbc);
+void ll_removepage(struct page *page);
+int ll_readpage(struct file *file, struct page *page);
+void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
+int ll_file_punch(struct inode *, loff_t, int);
+ssize_t ll_file_lockless_io(struct file *, char *, size_t, loff_t *, int);
+void ll_clear_file_contended(struct inode*);
+int ll_sync_page_range(struct inode *, struct address_space *, loff_t, size_t);
+int ll_readahead(const struct lu_env *env, struct cl_io *io,
+ struct ll_readahead_state *ras, struct address_space *mapping,
+ struct cl_page_list *queue, int flags);
+
+/* llite/file.c */
+extern struct file_operations ll_file_operations;
+extern struct file_operations ll_file_operations_flock;
+extern struct file_operations ll_file_operations_noflock;
+extern struct inode_operations ll_file_inode_operations;
+extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *,
+ __u64);
+extern int ll_have_md_lock(struct inode *inode, __u64 *bits,
+ ldlm_mode_t l_req_mode);
+extern ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
+ struct lustre_handle *lockh, __u64 flags);
+int __ll_inode_revalidate_it(struct dentry *, struct lookup_intent *,
+ __u64 bits);
+int ll_revalidate_nd(struct dentry *dentry, unsigned int flags);
+int ll_file_open(struct inode *inode, struct file *file);
+int ll_file_release(struct inode *inode, struct file *file);
+int ll_glimpse_ioctl(struct ll_sb_info *sbi,
+ struct lov_stripe_md *lsm, lstat_t *st);
+void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch);
+int ll_local_open(struct file *file,
+ struct lookup_intent *it, struct ll_file_data *fd,
+ struct obd_client_handle *och);
+int ll_release_openhandle(struct dentry *, struct lookup_intent *);
+int ll_md_close(struct obd_export *md_exp, struct inode *inode,
+ struct file *file);
+int ll_md_real_close(struct inode *inode, int flags);
+void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
+ struct obd_client_handle **och, unsigned long flags);
+void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data);
+int ll_som_update(struct inode *inode, struct md_op_data *op_data);
+int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
+ __u64 ioepoch, int sync);
+int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
+ struct md_open_data **mod);
+void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
+ struct lustre_handle *fh);
+extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
+ struct ll_file_data *file, loff_t pos,
+ size_t count, int rw);
+int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
+ struct lookup_intent *it, struct kstat *stat);
+int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
+struct ll_file_data *ll_file_data_get(void);
+struct posix_acl * ll_get_acl(struct inode *inode, int type);
+
+int ll_inode_permission(struct inode *inode, int mask);
+
+int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
+ int flags, struct lov_user_md *lum,
+ int lum_size);
+int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
+ struct lov_mds_md **lmm, int *lmm_size,
+ struct ptlrpc_request **request);
+int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
+ int set_default);
+int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
+ int *lmm_size, struct ptlrpc_request **request);
+int ll_fsync(struct file *file, loff_t start, loff_t end, int data);
+int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
+ int num_bytes);
+int ll_merge_lvb(const struct lu_env *env, struct inode *inode);
+int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg);
+int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
+int ll_fid2path(struct inode *inode, void *arg);
+int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock);
+
+/* llite/dcache.c */
+
+int ll_dops_init(struct dentry *de, int block, int init_sa);
+extern struct dentry_operations ll_d_ops;
+void ll_intent_drop_lock(struct lookup_intent *);
+void ll_intent_release(struct lookup_intent *);
+void ll_invalidate_aliases(struct inode *);
+void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft);
+void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
+int ll_dcompare(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *d_name);
+int ll_revalidate_it_finish(struct ptlrpc_request *request,
+ struct lookup_intent *it, struct dentry *de);
+
+/* llite/llite_lib.c */
+extern struct super_operations lustre_super_operations;
+
+char *ll_read_opt(const char *opt, char *data);
+void ll_lli_init(struct ll_inode_info *lli);
+int ll_fill_super(struct super_block *sb, struct vfsmount *mnt);
+void ll_put_super(struct super_block *sb);
+void ll_kill_super(struct super_block *sb);
+struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock);
+struct inode *ll_inode_from_lock(struct ldlm_lock *lock);
+void ll_clear_inode(struct inode *inode);
+int ll_setattr_raw(struct dentry *dentry, struct iattr *attr);
+int ll_setattr(struct dentry *de, struct iattr *attr);
+int ll_statfs(struct dentry *de, struct kstatfs *sfs);
+int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
+ __u64 max_age, __u32 flags);
+void ll_update_inode(struct inode *inode, struct lustre_md *md);
+void ll_read_inode2(struct inode *inode, void *opaque);
+void ll_delete_inode(struct inode *inode);
+int ll_iocontrol(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg);
+int ll_flush_ctx(struct inode *inode);
+void ll_umount_begin(struct super_block *sb);
+int ll_remount_fs(struct super_block *sb, int *flags, char *data);
+int ll_show_options(struct seq_file *seq, struct dentry *dentry);
+void ll_dirty_page_discard_warn(struct page *page, int ioret);
+int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
+ struct super_block *, struct lookup_intent *);
+void lustre_dump_dentry(struct dentry *, int recur);
+void lustre_dump_inode(struct inode *);
+int ll_obd_statfs(struct inode *inode, void *arg);
+int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
+int ll_process_config(struct lustre_cfg *lcfg);
+struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
+ struct inode *i1, struct inode *i2,
+ const char *name, int namelen,
+ int mode, __u32 opc, void *data);
+void ll_finish_md_op_data(struct md_op_data *op_data);
+int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg);
+char *ll_get_fsname(struct super_block *sb, char *buf, int buflen);
+
+/* llite/llite_nfs.c */
+extern struct export_operations lustre_export_operations;
+__u32 get_uuid2int(const char *name, int len);
+struct inode *search_inode_for_lustre(struct super_block *sb,
+ const struct lu_fid *fid);
+
+/* llite/special.c */
+extern struct inode_operations ll_special_inode_operations;
+extern struct file_operations ll_special_chr_inode_fops;
+extern struct file_operations ll_special_chr_file_fops;
+extern struct file_operations ll_special_blk_inode_fops;
+extern struct file_operations ll_special_fifo_inode_fops;
+extern struct file_operations ll_special_fifo_file_fops;
+extern struct file_operations ll_special_sock_inode_fops;
+
+/* llite/symlink.c */
+extern struct inode_operations ll_fast_symlink_inode_operations;
+
+/* llite/llite_close.c */
+struct ll_close_queue {
+ spinlock_t lcq_lock;
+ struct list_head lcq_head;
+ wait_queue_head_t lcq_waitq;
+ struct completion lcq_comp;
+ atomic_t lcq_stop;
+};
+
+struct ccc_object *cl_inode2ccc(struct inode *inode);
+
+
+void vvp_write_pending (struct ccc_object *club, struct ccc_page *page);
+void vvp_write_complete(struct ccc_object *club, struct ccc_page *page);
+
+/* specific achitecture can implement only part of this list */
+enum vvp_io_subtype {
+ /** normal IO */
+ IO_NORMAL,
+ /** io called from .sendfile */
+ IO_SENDFILE,
+ /** io started from splice_{read|write} */
+ IO_SPLICE
+};
+
+/* IO subtypes */
+struct vvp_io {
+ /** io subtype */
+ enum vvp_io_subtype cui_io_subtype;
+
+ union {
+ struct {
+ read_actor_t cui_actor;
+ void *cui_target;
+ } sendfile;
+ struct {
+ struct pipe_inode_info *cui_pipe;
+ unsigned int cui_flags;
+ } splice;
+ struct vvp_fault_io {
+ /**
+ * Inode modification time that is checked across DLM
+ * lock request.
+ */
+ time_t ft_mtime;
+ struct vm_area_struct *ft_vma;
+ /**
+ * locked page returned from vvp_io
+ */
+ struct page *ft_vmpage;
+ struct vm_fault_api {
+ /**
+ * kernel fault info
+ */
+ struct vm_fault *ft_vmf;
+ /**
+ * fault API used bitflags for return code.
+ */
+ unsigned int ft_flags;
+ } fault;
+ } fault;
+ } u;
+ /**
+ * Read-ahead state used by read and page-fault IO contexts.
+ */
+ struct ll_ra_read cui_bead;
+ /**
+ * Set when cui_bead has been initialized.
+ */
+ int cui_ra_window_set;
+ /**
+ * Partially truncated page, that vvp_io_trunc_start() keeps locked
+ * across truncate.
+ */
+ struct cl_page *cui_partpage;
+};
+
+/**
+ * IO arguments for various VFS I/O interfaces.
+ */
+struct vvp_io_args {
+ /** normal/sendfile/splice */
+ enum vvp_io_subtype via_io_subtype;
+
+ union {
+ struct {
+ struct kiocb *via_iocb;
+ struct iovec *via_iov;
+ unsigned long via_nrsegs;
+ } normal;
+ struct {
+ read_actor_t via_actor;
+ void *via_target;
+ } sendfile;
+ struct {
+ struct pipe_inode_info *via_pipe;
+ unsigned int via_flags;
+ } splice;
+ } u;
+};
+
+struct ll_cl_context {
+ void *lcc_cookie;
+ struct cl_io *lcc_io;
+ struct cl_page *lcc_page;
+ struct lu_env *lcc_env;
+ int lcc_refcheck;
+ int lcc_created;
+};
+
+struct vvp_thread_info {
+ struct ost_lvb vti_lvb;
+ struct cl_2queue vti_queue;
+ struct iovec vti_local_iov;
+ struct vvp_io_args vti_args;
+ struct ra_io_arg vti_ria;
+ struct kiocb vti_kiocb;
+ struct ll_cl_context vti_io_ctx;
+};
+
+static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env)
+{
+ extern struct lu_context_key vvp_key;
+ struct vvp_thread_info *info;
+
+ info = lu_context_key_get(&env->le_ctx, &vvp_key);
+ LASSERT(info != NULL);
+ return info;
+}
+
+static inline struct vvp_io_args *vvp_env_args(const struct lu_env *env,
+ enum vvp_io_subtype type)
+{
+ struct vvp_io_args *ret = &vvp_env_info(env)->vti_args;
+
+ ret->via_io_subtype = type;
+
+ return ret;
+}
+
+struct vvp_session {
+ struct vvp_io vs_ios;
+};
+
+static inline struct vvp_session *vvp_env_session(const struct lu_env *env)
+{
+ extern struct lu_context_key vvp_session_key;
+ struct vvp_session *ses;
+
+ ses = lu_context_key_get(env->le_ses, &vvp_session_key);
+ LASSERT(ses != NULL);
+ return ses;
+}
+
+static inline struct vvp_io *vvp_env_io(const struct lu_env *env)
+{
+ return &vvp_env_session(env)->vs_ios;
+}
+
+void ll_queue_done_writing(struct inode *inode, unsigned long flags);
+void ll_close_thread_shutdown(struct ll_close_queue *lcq);
+int ll_close_thread_start(struct ll_close_queue **lcq_ret);
+
+/* llite/llite_mmap.c */
+typedef struct rb_root rb_root_t;
+typedef struct rb_node rb_node_t;
+
+struct ll_lock_tree_node;
+struct ll_lock_tree {
+ rb_root_t lt_root;
+ struct list_head lt_locked_list;
+ struct ll_file_data *lt_fd;
+};
+
+int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last);
+int ll_file_mmap(struct file * file, struct vm_area_struct * vma);
+struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
+ __u64 end, ldlm_mode_t mode);
+void policy_from_vma(ldlm_policy_data_t *policy,
+ struct vm_area_struct *vma, unsigned long addr, size_t count);
+struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
+ size_t count);
+
+static inline void ll_invalidate_page(struct page *vmpage)
+{
+ struct address_space *mapping = vmpage->mapping;
+ loff_t offset = vmpage->index << PAGE_CACHE_SHIFT;
+
+ LASSERT(PageLocked(vmpage));
+ if (mapping == NULL)
+ return;
+
+ ll_teardown_mmaps(mapping, offset, offset + PAGE_CACHE_SIZE);
+ truncate_complete_page(mapping, vmpage);
+}
+
+#define ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi)
+
+/* don't need an addref as the sb_info should be holding one */
+static inline struct obd_export *ll_s2dtexp(struct super_block *sb)
+{
+ return ll_s2sbi(sb)->ll_dt_exp;
+}
+
+/* don't need an addref as the sb_info should be holding one */
+static inline struct obd_export *ll_s2mdexp(struct super_block *sb)
+{
+ return ll_s2sbi(sb)->ll_md_exp;
+}
+
+static inline struct client_obd *sbi2mdc(struct ll_sb_info *sbi)
+{
+ struct obd_device *obd = sbi->ll_md_exp->exp_obd;
+ if (obd == NULL)
+ LBUG();
+ return &obd->u.cli;
+}
+
+// FIXME: replace the name of this with LL_SB to conform to kernel stuff
+static inline struct ll_sb_info *ll_i2sbi(struct inode *inode)
+{
+ return ll_s2sbi(inode->i_sb);
+}
+
+static inline struct obd_export *ll_i2dtexp(struct inode *inode)
+{
+ return ll_s2dtexp(inode->i_sb);
+}
+
+static inline struct obd_export *ll_i2mdexp(struct inode *inode)
+{
+ return ll_s2mdexp(inode->i_sb);
+}
+
+static inline struct lu_fid *ll_inode2fid(struct inode *inode)
+{
+ struct lu_fid *fid;
+
+ LASSERT(inode != NULL);
+ fid = &ll_i2info(inode)->lli_fid;
+
+ return fid;
+}
+
+static inline int ll_mds_max_easize(struct super_block *sb)
+{
+ return sbi2mdc(ll_s2sbi(sb))->cl_max_mds_easize;
+}
+
+static inline __u64 ll_file_maxbytes(struct inode *inode)
+{
+ return ll_i2info(inode)->lli_maxbytes;
+}
+
+/* llite/xattr.c */
+int ll_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags);
+ssize_t ll_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size);
+ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
+int ll_removexattr(struct dentry *dentry, const char *name);
+
+/* llite/remote_perm.c */
+extern struct kmem_cache *ll_remote_perm_cachep;
+extern struct kmem_cache *ll_rmtperm_hash_cachep;
+
+struct hlist_head *alloc_rmtperm_hash(void);
+void free_rmtperm_hash(struct hlist_head *hash);
+int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm);
+int lustre_check_remote_perm(struct inode *inode, int mask);
+
+/* llite/llite_capa.c */
+extern timer_list_t ll_capa_timer;
+
+int ll_capa_thread_start(void);
+void ll_capa_thread_stop(void);
+void ll_capa_timer_callback(unsigned long unused);
+
+struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa);
+int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa);
+
+void ll_capa_open(struct inode *inode);
+void ll_capa_close(struct inode *inode);
+
+struct obd_capa *ll_mdscapa_get(struct inode *inode);
+struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc);
+
+void ll_truncate_free_capa(struct obd_capa *ocapa);
+void ll_clear_inode_capas(struct inode *inode);
+void ll_print_capa_stat(struct ll_sb_info *sbi);
+
+/* llite/llite_cl.c */
+extern struct lu_device_type vvp_device_type;
+
+/**
+ * Common IO arguments for various VFS I/O interfaces.
+ */
+int cl_sb_init(struct super_block *sb);
+int cl_sb_fini(struct super_block *sb);
+enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma);
+void ll_io_init(struct cl_io *io, const struct file *file, int write);
+
+void ras_update(struct ll_sb_info *sbi, struct inode *inode,
+ struct ll_readahead_state *ras, unsigned long index,
+ unsigned hit);
+void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len);
+int ll_is_file_contended(struct file *file);
+void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which);
+
+/* llite/llite_rmtacl.c */
+#ifdef CONFIG_FS_POSIX_ACL
+obd_valid rce_ops2valid(int ops);
+struct rmtacl_ctl_entry *rct_search(struct rmtacl_ctl_table *rct, pid_t key);
+int rct_add(struct rmtacl_ctl_table *rct, pid_t key, int ops);
+int rct_del(struct rmtacl_ctl_table *rct, pid_t key);
+void rct_init(struct rmtacl_ctl_table *rct);
+void rct_fini(struct rmtacl_ctl_table *rct);
+
+void ee_free(struct eacl_entry *ee);
+int ee_add(struct eacl_table *et, pid_t key, struct lu_fid *fid, int type,
+ ext_acl_xattr_header *header);
+struct eacl_entry *et_search_del(struct eacl_table *et, pid_t key,
+ struct lu_fid *fid, int type);
+void et_search_free(struct eacl_table *et, pid_t key);
+void et_init(struct eacl_table *et);
+void et_fini(struct eacl_table *et);
+#endif
+
+/* statahead.c */
+
+#define LL_SA_RPC_MIN 2
+#define LL_SA_RPC_DEF 32
+#define LL_SA_RPC_MAX 8192
+
+#define LL_SA_CACHE_BIT 5
+#define LL_SA_CACHE_SIZE (1 << LL_SA_CACHE_BIT)
+#define LL_SA_CACHE_MASK (LL_SA_CACHE_SIZE - 1)
+
+/* per inode struct, for dir only */
+struct ll_statahead_info {
+ struct inode *sai_inode;
+ atomic_t sai_refcount; /* when access this struct, hold
+ * refcount */
+ unsigned int sai_generation; /* generation for statahead */
+ unsigned int sai_max; /* max ahead of lookup */
+ __u64 sai_sent; /* stat requests sent count */
+ __u64 sai_replied; /* stat requests which received
+ * reply */
+ __u64 sai_index; /* index of statahead entry */
+ __u64 sai_index_wait; /* index of entry which is the
+ * caller is waiting for */
+ __u64 sai_hit; /* hit count */
+ __u64 sai_miss; /* miss count:
+ * for "ls -al" case, it includes
+ * hidden dentry miss;
+ * for "ls -l" case, it does not
+ * include hidden dentry miss.
+ * "sai_miss_hidden" is used for
+ * the later case.
+ */
+ unsigned int sai_consecutive_miss; /* consecutive miss */
+ unsigned int sai_miss_hidden;/* "ls -al", but first dentry
+ * is not a hidden one */
+ unsigned int sai_skip_hidden;/* skipped hidden dentry count */
+ unsigned int sai_ls_all:1, /* "ls -al", do stat-ahead for
+ * hidden entries */
+ sai_in_readpage:1,/* statahead is in readdir()*/
+ sai_agl_valid:1;/* AGL is valid for the dir */
+ wait_queue_head_t sai_waitq; /* stat-ahead wait queue */
+ struct ptlrpc_thread sai_thread; /* stat-ahead thread */
+ struct ptlrpc_thread sai_agl_thread; /* AGL thread */
+ struct list_head sai_entries; /* entry list */
+ struct list_head sai_entries_received; /* entries returned */
+ struct list_head sai_entries_stated; /* entries stated */
+ struct list_head sai_entries_agl; /* AGL entries to be sent */
+ struct list_head sai_cache[LL_SA_CACHE_SIZE];
+ spinlock_t sai_cache_lock[LL_SA_CACHE_SIZE];
+ atomic_t sai_cache_count; /* entry count in cache */
+};
+
+int do_statahead_enter(struct inode *dir, struct dentry **dentry,
+ int only_unplug);
+void ll_stop_statahead(struct inode *dir, void *key);
+
+static inline int ll_glimpse_size(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int rc;
+
+ down_read(&lli->lli_glimpse_sem);
+ rc = cl_glimpse_size(inode);
+ lli->lli_glimpse_time = cfs_time_current();
+ up_read(&lli->lli_glimpse_sem);
+ return rc;
+}
+
+static inline void
+ll_statahead_mark(struct inode *dir, struct dentry *dentry)
+{
+ struct ll_inode_info *lli = ll_i2info(dir);
+ struct ll_statahead_info *sai = lli->lli_sai;
+ struct ll_dentry_data *ldd = ll_d2d(dentry);
+
+ /* not the same process, don't mark */
+ if (lli->lli_opendir_pid != current_pid())
+ return;
+
+ if (sai != NULL && ldd != NULL)
+ ldd->lld_sa_generation = sai->sai_generation;
+}
+
+static inline int
+ll_need_statahead(struct inode *dir, struct dentry *dentryp)
+{
+ struct ll_inode_info *lli;
+ struct ll_dentry_data *ldd;
+
+ if (ll_i2sbi(dir)->ll_sa_max == 0)
+ return -EAGAIN;
+
+ lli = ll_i2info(dir);
+ /* not the same process, don't statahead */
+ if (lli->lli_opendir_pid != current_pid())
+ return -EAGAIN;
+
+ /* statahead has been stopped */
+ if (lli->lli_opendir_key == NULL)
+ return -EAGAIN;
+
+ ldd = ll_d2d(dentryp);
+ /*
+ * When stats a dentry, the system trigger more than once "revalidate"
+ * or "lookup", for "getattr", for "getxattr", and maybe for others.
+ * Under patchless client mode, the operation intent is not accurate,
+ * which maybe misguide the statahead thread. For example:
+ * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe
+ * have the same operation intent -- "IT_GETATTR".
+ * In fact, one dentry should has only one chance to interact with the
+ * statahead thread, otherwise the statahead windows will be confused.
+ * The solution is as following:
+ * Assign "lld_sa_generation" with "sai_generation" when a dentry
+ * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR"
+ * will bypass interacting with statahead thread for checking:
+ * "lld_sa_generation == lli_sai->sai_generation"
+ */
+ if (ldd && lli->lli_sai &&
+ ldd->lld_sa_generation == lli->lli_sai->sai_generation)
+ return -EAGAIN;
+
+ return 1;
+}
+
+static inline int
+ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int only_unplug)
+{
+ int ret;
+
+ ret = ll_need_statahead(dir, *dentryp);
+ if (ret <= 0)
+ return ret;
+
+ return do_statahead_enter(dir, dentryp, only_unplug);
+}
+
+/* llite ioctl register support rountine */
+enum llioc_iter {
+ LLIOC_CONT = 0,
+ LLIOC_STOP
+};
+
+#define LLIOC_MAX_CMD 256
+
+/*
+ * Rules to write a callback function:
+ *
+ * Parameters:
+ * @magic: Dynamic ioctl call routine will feed this vaule with the pointer
+ * returned to ll_iocontrol_register. Callback functions should use this
+ * data to check the potential collasion of ioctl cmd. If collasion is
+ * found, callback function should return LLIOC_CONT.
+ * @rcp: The result of ioctl command.
+ *
+ * Return values:
+ * If @magic matches the pointer returned by ll_iocontrol_data, the
+ * callback should return LLIOC_STOP; return LLIOC_STOP otherwise.
+ */
+typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode,
+ struct file *file, unsigned int cmd, unsigned long arg,
+ void *magic, int *rcp);
+
+enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg, int *rcp);
+
+/* export functions */
+/* Register ioctl block dynamatically for a regular file.
+ *
+ * @cmd: the array of ioctl command set
+ * @count: number of commands in the @cmd
+ * @cb: callback function, it will be called if an ioctl command is found to
+ * belong to the command list @cmd.
+ *
+ * Return vaule:
+ * A magic pointer will be returned if success;
+ * otherwise, NULL will be returned.
+ * */
+void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
+void ll_iocontrol_unregister(void *magic);
+
+
+/* lclient compat stuff */
+#define cl_inode_info ll_inode_info
+#define cl_i2info(info) ll_i2info(info)
+#define cl_inode_mode(inode) ((inode)->i_mode)
+#define cl_i2sbi ll_i2sbi
+
+static inline struct ll_file_data *cl_iattr2fd(struct inode *inode,
+ const struct iattr *attr)
+{
+ LASSERT(attr->ia_valid & ATTR_FILE);
+ return LUSTRE_FPRIVATE(attr->ia_file);
+}
+
+static inline void cl_isize_lock(struct inode *inode)
+{
+ ll_inode_size_lock(inode);
+}
+
+static inline void cl_isize_unlock(struct inode *inode)
+{
+ ll_inode_size_unlock(inode);
+}
+
+static inline void cl_isize_write_nolock(struct inode *inode, loff_t kms)
+{
+ LASSERT(down_trylock(&ll_i2info(inode)->lli_size_sem) != 0);
+ i_size_write(inode, kms);
+}
+
+static inline void cl_isize_write(struct inode *inode, loff_t kms)
+{
+ ll_inode_size_lock(inode);
+ i_size_write(inode, kms);
+ ll_inode_size_unlock(inode);
+}
+
+#define cl_isize_read(inode) i_size_read(inode)
+
+static inline int cl_merge_lvb(const struct lu_env *env, struct inode *inode)
+{
+ return ll_merge_lvb(env, inode);
+}
+
+#define cl_inode_atime(inode) LTIME_S((inode)->i_atime)
+#define cl_inode_ctime(inode) LTIME_S((inode)->i_ctime)
+#define cl_inode_mtime(inode) LTIME_S((inode)->i_mtime)
+
+struct obd_capa *cl_capa_lookup(struct inode *inode, enum cl_req_type crt);
+
+int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
+ enum cl_fsync_mode mode, int ignore_layout);
+
+/** direct write pages */
+struct ll_dio_pages {
+ /** page array to be written. we don't support
+ * partial pages except the last one. */
+ struct page **ldp_pages;
+ /* offset of each page */
+ loff_t *ldp_offsets;
+ /** if ldp_offsets is NULL, it means a sequential
+ * pages to be written, then this is the file offset
+ * of the * first page. */
+ loff_t ldp_start_offset;
+ /** how many bytes are to be written. */
+ size_t ldp_size;
+ /** # of pages in the array. */
+ int ldp_nr;
+};
+
+static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt,
+ int rc)
+{
+ int opc = (crt == CRT_READ) ? LPROC_LL_OSC_READ :
+ LPROC_LL_OSC_WRITE;
+
+ ll_stats_ops_tally(ll_s2sbi(cl2ccc_dev(dev)->cdv_sb), opc, rc);
+}
+
+extern ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
+ int rw, struct inode *inode,
+ struct ll_dio_pages *pv);
+
+static inline int ll_file_nolock(const struct file *file)
+{
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct inode *inode = file->f_dentry->d_inode;
+
+ LASSERT(fd != NULL);
+ return ((fd->fd_flags & LL_FILE_IGNORE_LOCK) ||
+ (ll_i2sbi(inode)->ll_flags & LL_SBI_NOLCK));
+}
+
+static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
+ struct lookup_intent *it, __u64 *bits)
+{
+ if (!it->d.lustre.it_lock_set) {
+ struct lustre_handle handle;
+
+ /* If this inode is a remote object, it will get two
+ * separate locks in different namespaces, Master MDT,
+ * where the name entry is, will grant LOOKUP lock,
+ * remote MDT, where the object is, will grant
+ * UPDATE|PERM lock. The inode will be attched to both
+ * LOOKUP and PERM locks, so revoking either locks will
+ * case the dcache being cleared */
+ if (it->d.lustre.it_remote_lock_mode) {
+ handle.cookie = it->d.lustre.it_remote_lock_handle;
+ CDEBUG(D_DLMTRACE, "setting l_data to inode %p"
+ "(%lu/%u) for remote lock "LPX64"\n", inode,
+ inode->i_ino, inode->i_generation,
+ handle.cookie);
+ md_set_lock_data(exp, &handle.cookie, inode, NULL);
+ }
+
+ handle.cookie = it->d.lustre.it_lock_handle;
+
+ CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)"
+ " for lock "LPX64"\n", inode, inode->i_ino,
+ inode->i_generation, handle.cookie);
+
+ md_set_lock_data(exp, &handle.cookie, inode,
+ &it->d.lustre.it_lock_bits);
+ it->d.lustre.it_lock_set = 1;
+ }
+
+ if (bits != NULL)
+ *bits = it->d.lustre.it_lock_bits;
+}
+
+static inline void ll_lock_dcache(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+}
+
+static inline void ll_unlock_dcache(struct inode *inode)
+{
+ spin_unlock(&inode->i_lock);
+}
+
+static inline int d_lustre_invalid(const struct dentry *dentry)
+{
+ struct ll_dentry_data *lld = ll_d2d(dentry);
+
+ return (lld == NULL) || lld->lld_invalid;
+}
+
+static inline void __d_lustre_invalidate(struct dentry *dentry)
+{
+ struct ll_dentry_data *lld = ll_d2d(dentry);
+
+ if (lld != NULL)
+ lld->lld_invalid = 1;
+}
+
+/*
+ * Mark dentry INVALID, if dentry refcount is zero (this is normally case for
+ * ll_md_blocking_ast), unhash this dentry, and let dcache to reclaim it later;
+ * else dput() of the last refcount will unhash this dentry and kill it.
+ */
+static inline void d_lustre_invalidate(struct dentry *dentry, int nested)
+{
+ CDEBUG(D_DENTRY, "invalidate dentry %.*s (%p) parent %p inode %p "
+ "refc %d\n", dentry->d_name.len, dentry->d_name.name, dentry,
+ dentry->d_parent, dentry->d_inode, d_refcount(dentry));
+
+ spin_lock_nested(&dentry->d_lock,
+ nested ? DENTRY_D_LOCK_NESTED : DENTRY_D_LOCK_NORMAL);
+ __d_lustre_invalidate(dentry);
+ if (d_refcount(dentry) == 0)
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void d_lustre_revalidate(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ LASSERT(ll_d2d(dentry) != NULL);
+ ll_d2d(dentry)->lld_invalid = 0;
+ spin_unlock(&dentry->d_lock);
+}
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0)
+/* Compatibility for old (1.8) compiled userspace quota code */
+struct if_quotactl_18 {
+ __u32 qc_cmd;
+ __u32 qc_type;
+ __u32 qc_id;
+ __u32 qc_stat;
+ struct obd_dqinfo qc_dqinfo;
+ struct obd_dqblk qc_dqblk;
+ char obd_type[16];
+ struct obd_uuid obd_uuid;
+};
+#define LL_IOC_QUOTACTL_18 _IOWR('f', 162, struct if_quotactl_18 *)
+/* End compatibility for old (1.8) compiled userspace quota code */
+#else
+#warning "remove old LL_IOC_QUOTACTL_18 compatibility code"
+#endif /* LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0) */
+
+enum {
+ LL_LAYOUT_GEN_NONE = ((__u32)-2), /* layout lock was cancelled */
+ LL_LAYOUT_GEN_EMPTY = ((__u32)-1) /* for empty layout */
+};
+
+int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf);
+int ll_layout_refresh(struct inode *inode, __u32 *gen);
+
+#endif /* LLITE_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
new file mode 100644
index 000000000000..2311b20ee99a
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -0,0 +1,2408 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/llite/llite_lib.c
+ *
+ * Lustre Light Super operations
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/mm.h>
+
+#include <lustre_lite.h>
+#include <lustre_ha.h>
+#include <lustre_dlm.h>
+#include <lprocfs_status.h>
+#include <lustre_disk.h>
+#include <lustre_param.h>
+#include <lustre_log.h>
+#include <cl_object.h>
+#include <obd_cksum.h>
+#include "llite_internal.h"
+
+struct kmem_cache *ll_file_data_slab;
+
+LIST_HEAD(ll_super_blocks);
+DEFINE_SPINLOCK(ll_sb_lock);
+
+#ifndef MS_HAS_NEW_AOPS
+extern struct address_space_operations ll_aops;
+#else
+extern struct address_space_operations_ext ll_aops;
+#endif
+
+#ifndef log2
+#define log2(n) ffz(~(n))
+#endif
+
+static struct ll_sb_info *ll_init_sbi(void)
+{
+ struct ll_sb_info *sbi = NULL;
+ unsigned long pages;
+ unsigned long lru_page_max;
+ struct sysinfo si;
+ class_uuid_t uuid;
+ int i;
+ ENTRY;
+
+ OBD_ALLOC(sbi, sizeof(*sbi));
+ if (!sbi)
+ RETURN(NULL);
+
+ spin_lock_init(&sbi->ll_lock);
+ mutex_init(&sbi->ll_lco.lco_lock);
+ spin_lock_init(&sbi->ll_pp_extent_lock);
+ spin_lock_init(&sbi->ll_process_lock);
+ sbi->ll_rw_stats_on = 0;
+
+ si_meminfo(&si);
+ pages = si.totalram - si.totalhigh;
+ if (pages >> (20 - PAGE_CACHE_SHIFT) < 512) {
+ lru_page_max = pages / 2;
+ } else {
+ lru_page_max = (pages / 4) * 3;
+ }
+
+ /* initialize lru data */
+ atomic_set(&sbi->ll_cache.ccc_users, 0);
+ sbi->ll_cache.ccc_lru_max = lru_page_max;
+ atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max);
+ spin_lock_init(&sbi->ll_cache.ccc_lru_lock);
+ INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru);
+
+ sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
+ SBI_DEFAULT_READAHEAD_MAX);
+ sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
+ sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
+ SBI_DEFAULT_READAHEAD_WHOLE_MAX;
+ INIT_LIST_HEAD(&sbi->ll_conn_chain);
+ INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
+
+ ll_generate_random_uuid(uuid);
+ class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
+ CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
+
+ spin_lock(&ll_sb_lock);
+ list_add_tail(&sbi->ll_list, &ll_super_blocks);
+ spin_unlock(&ll_sb_lock);
+
+ sbi->ll_flags |= LL_SBI_VERBOSE;
+ sbi->ll_flags |= LL_SBI_CHECKSUM;
+
+ sbi->ll_flags |= LL_SBI_LRU_RESIZE;
+
+ for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
+ spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
+ pp_r_hist.oh_lock);
+ spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
+ pp_w_hist.oh_lock);
+ }
+
+ /* metadata statahead is enabled by default */
+ sbi->ll_sa_max = LL_SA_RPC_DEF;
+ atomic_set(&sbi->ll_sa_total, 0);
+ atomic_set(&sbi->ll_sa_wrong, 0);
+ atomic_set(&sbi->ll_agl_total, 0);
+ sbi->ll_flags |= LL_SBI_AGL_ENABLED;
+
+ RETURN(sbi);
+}
+
+void ll_free_sbi(struct super_block *sb)
+{
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ ENTRY;
+
+ if (sbi != NULL) {
+ spin_lock(&ll_sb_lock);
+ list_del(&sbi->ll_list);
+ spin_unlock(&ll_sb_lock);
+ OBD_FREE(sbi, sizeof(*sbi));
+ }
+ EXIT;
+}
+
+static struct dentry_operations ll_d_root_ops = {
+ .d_compare = ll_dcompare,
+ .d_revalidate = ll_revalidate_nd,
+};
+
+static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
+ struct vfsmount *mnt)
+{
+ struct inode *root = 0;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct obd_device *obd;
+ struct obd_capa *oc = NULL;
+ struct obd_statfs *osfs = NULL;
+ struct ptlrpc_request *request = NULL;
+ struct obd_connect_data *data = NULL;
+ struct obd_uuid *uuid;
+ struct md_op_data *op_data;
+ struct lustre_md lmd;
+ obd_valid valid;
+ int size, err, checksum;
+ ENTRY;
+
+ obd = class_name2obd(md);
+ if (!obd) {
+ CERROR("MD %s: not setup or attached\n", md);
+ RETURN(-EINVAL);
+ }
+
+ OBD_ALLOC_PTR(data);
+ if (data == NULL)
+ RETURN(-ENOMEM);
+
+ OBD_ALLOC_PTR(osfs);
+ if (osfs == NULL) {
+ OBD_FREE_PTR(data);
+ RETURN(-ENOMEM);
+ }
+
+ if (proc_lustre_fs_root) {
+ err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
+ dt, md);
+ if (err < 0)
+ CERROR("could not register mount in /proc/fs/lustre\n");
+ }
+
+ /* indicate the features supported by this client */
+ data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH |
+ OBD_CONNECT_ATTRFID |
+ OBD_CONNECT_VERSION | OBD_CONNECT_BRW_SIZE |
+ OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA |
+ OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
+ OBD_CONNECT_AT | OBD_CONNECT_LOV_V3 |
+ OBD_CONNECT_RMT_CLIENT | OBD_CONNECT_VBR |
+ OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH|
+ OBD_CONNECT_EINPROGRESS |
+ OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
+ OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS;
+
+ if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
+ data->ocd_connect_flags |= OBD_CONNECT_SOM;
+
+ if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
+ data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
+#ifdef CONFIG_FS_POSIX_ACL
+ data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_UMASK;
+#endif
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDC_LIGHTWEIGHT))
+ /* flag mdc connection as lightweight, only used for test
+ * purpose, use with care */
+ data->ocd_connect_flags |= OBD_CONNECT_LIGHTWEIGHT;
+
+ data->ocd_ibits_known = MDS_INODELOCK_FULL;
+ data->ocd_version = LUSTRE_VERSION_CODE;
+
+ if (sb->s_flags & MS_RDONLY)
+ data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
+ if (sbi->ll_flags & LL_SBI_USER_XATTR)
+ data->ocd_connect_flags |= OBD_CONNECT_XATTR;
+
+#ifdef HAVE_MS_FLOCK_LOCK
+ /* force vfs to use lustre handler for flock() calls - bug 10743 */
+ sb->s_flags |= MS_FLOCK_LOCK;
+#endif
+#ifdef MS_HAS_NEW_AOPS
+ sb->s_flags |= MS_HAS_NEW_AOPS;
+#endif
+
+ if (sbi->ll_flags & LL_SBI_FLOCK)
+ sbi->ll_fop = &ll_file_operations_flock;
+ else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
+ sbi->ll_fop = &ll_file_operations;
+ else
+ sbi->ll_fop = &ll_file_operations_noflock;
+
+ /* real client */
+ data->ocd_connect_flags |= OBD_CONNECT_REAL;
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
+ data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
+
+ data->ocd_brw_size = MD_MAX_BRW_SIZE;
+
+ err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid, data, NULL);
+ if (err == -EBUSY) {
+ LCONSOLE_ERROR_MSG(0x14f, "An MDT (md %s) is performing "
+ "recovery, of which this client is not a "
+ "part. Please wait for recovery to complete,"
+ " abort, or time out.\n", md);
+ GOTO(out, err);
+ } else if (err) {
+ CERROR("cannot connect to %s: rc = %d\n", md, err);
+ GOTO(out, err);
+ }
+
+ sbi->ll_md_exp->exp_connect_data = *data;
+
+ err = obd_fid_init(sbi->ll_md_exp->exp_obd, sbi->ll_md_exp,
+ LUSTRE_SEQ_METADATA);
+ if (err) {
+ CERROR("%s: Can't init metadata layer FID infrastructure, "
+ "rc = %d\n", sbi->ll_md_exp->exp_obd->obd_name, err);
+ GOTO(out_md, err);
+ }
+
+ /* For mount, we only need fs info from MDT0, and also in DNE, it
+ * can make sure the client can be mounted as long as MDT0 is
+ * avaible */
+ err = obd_statfs(NULL, sbi->ll_md_exp, osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ OBD_STATFS_FOR_MDT0);
+ if (err)
+ GOTO(out_md_fid, err);
+
+ /* This needs to be after statfs to ensure connect has finished.
+ * Note that "data" does NOT contain the valid connect reply.
+ * If connecting to a 1.8 server there will be no LMV device, so
+ * we can access the MDC export directly and exp_connect_flags will
+ * be non-zero, but if accessing an upgraded 2.1 server it will
+ * have the correct flags filled in.
+ * XXX: fill in the LMV exp_connect_flags from MDC(s). */
+ valid = exp_connect_flags(sbi->ll_md_exp) & CLIENT_CONNECT_MDT_REQD;
+ if (exp_connect_flags(sbi->ll_md_exp) != 0 &&
+ valid != CLIENT_CONNECT_MDT_REQD) {
+ char *buf;
+
+ OBD_ALLOC_WAIT(buf, PAGE_CACHE_SIZE);
+ obd_connect_flags2str(buf, PAGE_CACHE_SIZE,
+ valid ^ CLIENT_CONNECT_MDT_REQD, ",");
+ LCONSOLE_ERROR_MSG(0x170, "Server %s does not support "
+ "feature(s) needed for correct operation "
+ "of this client (%s). Please upgrade "
+ "server or downgrade client.\n",
+ sbi->ll_md_exp->exp_obd->obd_name, buf);
+ OBD_FREE(buf, PAGE_CACHE_SIZE);
+ GOTO(out_md_fid, err = -EPROTO);
+ }
+
+ size = sizeof(*data);
+ err = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_CONN_DATA),
+ KEY_CONN_DATA, &size, data, NULL);
+ if (err) {
+ CERROR("%s: Get connect data failed: rc = %d\n",
+ sbi->ll_md_exp->exp_obd->obd_name, err);
+ GOTO(out_md_fid, err);
+ }
+
+ LASSERT(osfs->os_bsize);
+ sb->s_blocksize = osfs->os_bsize;
+ sb->s_blocksize_bits = log2(osfs->os_bsize);
+ sb->s_magic = LL_SUPER_MAGIC;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sbi->ll_namelen = osfs->os_namelen;
+ sbi->ll_max_rw_chunk = LL_DEFAULT_MAX_RW_CHUNK;
+
+ if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
+ !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
+ LCONSOLE_INFO("Disabling user_xattr feature because "
+ "it is not supported on the server\n");
+ sbi->ll_flags &= ~LL_SBI_USER_XATTR;
+ }
+
+ if (data->ocd_connect_flags & OBD_CONNECT_ACL) {
+#ifdef MS_POSIXACL
+ sb->s_flags |= MS_POSIXACL;
+#endif
+ sbi->ll_flags |= LL_SBI_ACL;
+ } else {
+ LCONSOLE_INFO("client wants to enable acl, but mdt not!\n");
+#ifdef MS_POSIXACL
+ sb->s_flags &= ~MS_POSIXACL;
+#endif
+ sbi->ll_flags &= ~LL_SBI_ACL;
+ }
+
+ if (data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) {
+ if (!(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {
+ sbi->ll_flags |= LL_SBI_RMT_CLIENT;
+ LCONSOLE_INFO("client is set as remote by default.\n");
+ }
+ } else {
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
+ sbi->ll_flags &= ~LL_SBI_RMT_CLIENT;
+ LCONSOLE_INFO("client claims to be remote, but server "
+ "rejected, forced to be local.\n");
+ }
+ }
+
+ if (data->ocd_connect_flags & OBD_CONNECT_MDS_CAPA) {
+ LCONSOLE_INFO("client enabled MDS capability!\n");
+ sbi->ll_flags |= LL_SBI_MDS_CAPA;
+ }
+
+ if (data->ocd_connect_flags & OBD_CONNECT_OSS_CAPA) {
+ LCONSOLE_INFO("client enabled OSS capability!\n");
+ sbi->ll_flags |= LL_SBI_OSS_CAPA;
+ }
+
+ if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
+ sbi->ll_flags |= LL_SBI_64BIT_HASH;
+
+ if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
+ sbi->ll_md_brw_size = data->ocd_brw_size;
+ else
+ sbi->ll_md_brw_size = PAGE_CACHE_SIZE;
+
+ if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) {
+ LCONSOLE_INFO("Layout lock feature supported.\n");
+ sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
+ }
+
+ obd = class_name2obd(dt);
+ if (!obd) {
+ CERROR("DT %s: not setup or attached\n", dt);
+ GOTO(out_md_fid, err = -ENODEV);
+ }
+
+ data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION |
+ OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
+ OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
+ OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK|
+ OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT |
+ OBD_CONNECT_OSS_CAPA | OBD_CONNECT_VBR|
+ OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH |
+ OBD_CONNECT_MAXBYTES |
+ OBD_CONNECT_EINPROGRESS |
+ OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
+ OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS;
+
+ if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
+ data->ocd_connect_flags |= OBD_CONNECT_SOM;
+
+ if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) {
+ /* OBD_CONNECT_CKSUM should always be set, even if checksums are
+ * disabled by default, because it can still be enabled on the
+ * fly via /proc. As a consequence, we still need to come to an
+ * agreement on the supported algorithms at connect time */
+ data->ocd_connect_flags |= OBD_CONNECT_CKSUM;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
+ data->ocd_cksum_types = OBD_CKSUM_ADLER;
+ else
+ data->ocd_cksum_types = cksum_types_supported_client();
+ }
+
+ data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
+ data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
+
+ CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d "
+ "ocd_grant: %d\n", data->ocd_connect_flags,
+ data->ocd_version, data->ocd_grant);
+
+ obd->obd_upcall.onu_owner = &sbi->ll_lco;
+ obd->obd_upcall.onu_upcall = cl_ocd_update;
+
+ data->ocd_brw_size = DT_MAX_BRW_SIZE;
+
+ err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, data,
+ NULL);
+ if (err == -EBUSY) {
+ LCONSOLE_ERROR_MSG(0x150, "An OST (dt %s) is performing "
+ "recovery, of which this client is not a "
+ "part. Please wait for recovery to "
+ "complete, abort, or time out.\n", dt);
+ GOTO(out_md, err);
+ } else if (err) {
+ CERROR("%s: Cannot connect to %s: rc = %d\n",
+ sbi->ll_dt_exp->exp_obd->obd_name, dt, err);
+ GOTO(out_md, err);
+ }
+
+ sbi->ll_dt_exp->exp_connect_data = *data;
+
+ err = obd_fid_init(sbi->ll_dt_exp->exp_obd, sbi->ll_dt_exp,
+ LUSTRE_SEQ_METADATA);
+ if (err) {
+ CERROR("%s: Can't init data layer FID infrastructure, "
+ "rc = %d\n", sbi->ll_dt_exp->exp_obd->obd_name, err);
+ GOTO(out_dt, err);
+ }
+
+ mutex_lock(&sbi->ll_lco.lco_lock);
+ sbi->ll_lco.lco_flags = data->ocd_connect_flags;
+ sbi->ll_lco.lco_md_exp = sbi->ll_md_exp;
+ sbi->ll_lco.lco_dt_exp = sbi->ll_dt_exp;
+ mutex_unlock(&sbi->ll_lco.lco_lock);
+
+ fid_zero(&sbi->ll_root_fid);
+ err = md_getstatus(sbi->ll_md_exp, &sbi->ll_root_fid, &oc);
+ if (err) {
+ CERROR("cannot mds_connect: rc = %d\n", err);
+ GOTO(out_lock_cn_cb, err);
+ }
+ if (!fid_is_sane(&sbi->ll_root_fid)) {
+ CERROR("%s: Invalid root fid "DFID" during mount\n",
+ sbi->ll_md_exp->exp_obd->obd_name,
+ PFID(&sbi->ll_root_fid));
+ GOTO(out_lock_cn_cb, err = -EINVAL);
+ }
+ CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&sbi->ll_root_fid));
+
+ sb->s_op = &lustre_super_operations;
+#if THREAD_SIZE >= 8192 /*b=17630*/
+ sb->s_export_op = &lustre_export_operations;
+#endif
+
+ /* make root inode
+ * XXX: move this to after cbd setup? */
+ valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMDSCAPA;
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
+ valid |= OBD_MD_FLRMTPERM;
+ else if (sbi->ll_flags & LL_SBI_ACL)
+ valid |= OBD_MD_FLACL;
+
+ OBD_ALLOC_PTR(op_data);
+ if (op_data == NULL)
+ GOTO(out_lock_cn_cb, err = -ENOMEM);
+
+ op_data->op_fid1 = sbi->ll_root_fid;
+ op_data->op_mode = 0;
+ op_data->op_capa1 = oc;
+ op_data->op_valid = valid;
+
+ err = md_getattr(sbi->ll_md_exp, op_data, &request);
+ if (oc)
+ capa_put(oc);
+ OBD_FREE_PTR(op_data);
+ if (err) {
+ CERROR("%s: md_getattr failed for root: rc = %d\n",
+ sbi->ll_md_exp->exp_obd->obd_name, err);
+ GOTO(out_lock_cn_cb, err);
+ }
+
+ err = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
+ sbi->ll_md_exp, &lmd);
+ if (err) {
+ CERROR("failed to understand root inode md: rc = %d\n", err);
+ ptlrpc_req_finished(request);
+ GOTO(out_lock_cn_cb, err);
+ }
+
+ LASSERT(fid_is_sane(&sbi->ll_root_fid));
+ root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid,
+ sbi->ll_flags & LL_SBI_32BIT_API),
+ &lmd);
+ md_free_lustre_md(sbi->ll_md_exp, &lmd);
+ ptlrpc_req_finished(request);
+
+ if (root == NULL || IS_ERR(root)) {
+ if (lmd.lsm)
+ obd_free_memmd(sbi->ll_dt_exp, &lmd.lsm);
+#ifdef CONFIG_FS_POSIX_ACL
+ if (lmd.posix_acl) {
+ posix_acl_release(lmd.posix_acl);
+ lmd.posix_acl = NULL;
+ }
+#endif
+ err = IS_ERR(root) ? PTR_ERR(root) : -EBADF;
+ root = NULL;
+ CERROR("lustre_lite: bad iget4 for root\n");
+ GOTO(out_root, err);
+ }
+
+ err = ll_close_thread_start(&sbi->ll_lcq);
+ if (err) {
+ CERROR("cannot start close thread: rc %d\n", err);
+ GOTO(out_root, err);
+ }
+
+#ifdef CONFIG_FS_POSIX_ACL
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
+ rct_init(&sbi->ll_rct);
+ et_init(&sbi->ll_et);
+ }
+#endif
+
+ checksum = sbi->ll_flags & LL_SBI_CHECKSUM;
+ err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
+ KEY_CHECKSUM, sizeof(checksum), &checksum,
+ NULL);
+ cl_sb_init(sb);
+
+ err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
+ KEY_CACHE_SET, sizeof(sbi->ll_cache),
+ &sbi->ll_cache, NULL);
+
+ sb->s_root = d_make_root(root);
+ if (sb->s_root == NULL) {
+ CERROR("%s: can't make root dentry\n",
+ ll_get_fsname(sb, NULL, 0));
+ GOTO(out_root, err = -ENOMEM);
+ }
+
+ /* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */
+ d_set_d_op(sb->s_root, &ll_d_root_ops);
+ sb->s_d_op = &ll_d_ops;
+
+ sbi->ll_sdev_orig = sb->s_dev;
+
+ /* We set sb->s_dev equal on all lustre clients in order to support
+ * NFS export clustering. NFSD requires that the FSID be the same
+ * on all clients. */
+ /* s_dev is also used in lt_compare() to compare two fs, but that is
+ * only a node-local comparison. */
+ uuid = obd_get_uuid(sbi->ll_md_exp);
+ if (uuid != NULL)
+ sb->s_dev = get_uuid2int(uuid->uuid, strlen(uuid->uuid));
+
+ if (data != NULL)
+ OBD_FREE_PTR(data);
+ if (osfs != NULL)
+ OBD_FREE_PTR(osfs);
+
+ RETURN(err);
+out_root:
+ if (root)
+ iput(root);
+out_lock_cn_cb:
+ obd_fid_fini(sbi->ll_dt_exp->exp_obd);
+out_dt:
+ obd_disconnect(sbi->ll_dt_exp);
+ sbi->ll_dt_exp = NULL;
+ /* Make sure all OScs are gone, since cl_cache is accessing sbi. */
+ obd_zombie_barrier();
+out_md_fid:
+ obd_fid_fini(sbi->ll_md_exp->exp_obd);
+out_md:
+ obd_disconnect(sbi->ll_md_exp);
+ sbi->ll_md_exp = NULL;
+out:
+ if (data != NULL)
+ OBD_FREE_PTR(data);
+ if (osfs != NULL)
+ OBD_FREE_PTR(osfs);
+ lprocfs_unregister_mountpoint(sbi);
+ return err;
+}
+
+int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
+{
+ int size, rc;
+
+ *lmmsize = obd_size_diskmd(sbi->ll_dt_exp, NULL);
+ size = sizeof(int);
+ rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE),
+ KEY_MAX_EASIZE, &size, lmmsize, NULL);
+ if (rc)
+ CERROR("Get max mdsize error rc %d \n", rc);
+
+ RETURN(rc);
+}
+
+void ll_dump_inode(struct inode *inode)
+{
+ struct ll_d_hlist_node *tmp;
+ int dentry_count = 0;
+
+ LASSERT(inode != NULL);
+
+ ll_d_hlist_for_each(tmp, &inode->i_dentry)
+ dentry_count++;
+
+ CERROR("inode %p dump: dev=%s ino=%lu mode=%o count=%u, %d dentries\n",
+ inode, ll_i2mdexp(inode)->exp_obd->obd_name, inode->i_ino,
+ inode->i_mode, atomic_read(&inode->i_count), dentry_count);
+}
+
+void lustre_dump_dentry(struct dentry *dentry, int recur)
+{
+ struct list_head *tmp;
+ int subdirs = 0;
+
+ LASSERT(dentry != NULL);
+
+ list_for_each(tmp, &dentry->d_subdirs)
+ subdirs++;
+
+ CERROR("dentry %p dump: name=%.*s parent=%.*s (%p), inode=%p, count=%u,"
+ " flags=0x%x, fsdata=%p, %d subdirs\n", dentry,
+ dentry->d_name.len, dentry->d_name.name,
+ dentry->d_parent->d_name.len, dentry->d_parent->d_name.name,
+ dentry->d_parent, dentry->d_inode, d_refcount(dentry),
+ dentry->d_flags, dentry->d_fsdata, subdirs);
+ if (dentry->d_inode != NULL)
+ ll_dump_inode(dentry->d_inode);
+
+ if (recur == 0)
+ return;
+
+ list_for_each(tmp, &dentry->d_subdirs) {
+ struct dentry *d = list_entry(tmp, struct dentry, d_u.d_child);
+ lustre_dump_dentry(d, recur - 1);
+ }
+}
+
+void client_common_put_super(struct super_block *sb)
+{
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ ENTRY;
+
+#ifdef CONFIG_FS_POSIX_ACL
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
+ et_fini(&sbi->ll_et);
+ rct_fini(&sbi->ll_rct);
+ }
+#endif
+
+ ll_close_thread_shutdown(sbi->ll_lcq);
+
+ cl_sb_fini(sb);
+
+ list_del(&sbi->ll_conn_chain);
+
+ obd_fid_fini(sbi->ll_dt_exp->exp_obd);
+ obd_disconnect(sbi->ll_dt_exp);
+ sbi->ll_dt_exp = NULL;
+ /* wait till all OSCs are gone, since cl_cache is accessing sbi.
+ * see LU-2543. */
+ obd_zombie_barrier();
+
+ lprocfs_unregister_mountpoint(sbi);
+
+ obd_fid_fini(sbi->ll_md_exp->exp_obd);
+ obd_disconnect(sbi->ll_md_exp);
+ sbi->ll_md_exp = NULL;
+
+ EXIT;
+}
+
+void ll_kill_super(struct super_block *sb)
+{
+ struct ll_sb_info *sbi;
+
+ ENTRY;
+
+ /* not init sb ?*/
+ if (!(sb->s_flags & MS_ACTIVE))
+ return;
+
+ sbi = ll_s2sbi(sb);
+ /* we need restore s_dev from changed for clustred NFS before put_super
+ * because new kernels have cached s_dev and change sb->s_dev in
+ * put_super not affected real removing devices */
+ if (sbi) {
+ sb->s_dev = sbi->ll_sdev_orig;
+ sbi->ll_umounting = 1;
+ }
+ EXIT;
+}
+
+char *ll_read_opt(const char *opt, char *data)
+{
+ char *value;
+ char *retval;
+ ENTRY;
+
+ CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
+ if (strncmp(opt, data, strlen(opt)))
+ RETURN(NULL);
+ if ((value = strchr(data, '=')) == NULL)
+ RETURN(NULL);
+
+ value++;
+ OBD_ALLOC(retval, strlen(value) + 1);
+ if (!retval) {
+ CERROR("out of memory!\n");
+ RETURN(NULL);
+ }
+
+ memcpy(retval, value, strlen(value)+1);
+ CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
+ RETURN(retval);
+}
+
+static inline int ll_set_opt(const char *opt, char *data, int fl)
+{
+ if (strncmp(opt, data, strlen(opt)) != 0)
+ return(0);
+ else
+ return(fl);
+}
+
+/* non-client-specific mount options are parsed in lmd_parse */
+static int ll_options(char *options, int *flags)
+{
+ int tmp;
+ char *s1 = options, *s2;
+ ENTRY;
+
+ if (!options)
+ RETURN(0);
+
+ CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
+
+ while (*s1) {
+ CDEBUG(D_SUPER, "next opt=%s\n", s1);
+ tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
+ if (tmp) {
+ *flags |= tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
+ if (tmp) {
+ *flags |= tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK);
+ if (tmp) {
+ *flags |= tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK|LL_SBI_LOCALFLOCK);
+ if (tmp) {
+ *flags &= ~tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
+ if (tmp) {
+ *flags |= tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
+ if (tmp) {
+ *flags &= ~tmp;
+ goto next;
+ }
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 5, 50, 0)
+ tmp = ll_set_opt("acl", s1, LL_SBI_ACL);
+ if (tmp) {
+ /* Ignore deprecated mount option. The client will
+ * always try to mount with ACL support, whether this
+ * is used depends on whether server supports it. */
+ LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated "
+ "mount option 'acl'.\n");
+ goto next;
+ }
+ tmp = ll_set_opt("noacl", s1, LL_SBI_ACL);
+ if (tmp) {
+ LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated "
+ "mount option 'noacl'.\n");
+ goto next;
+ }
+#else
+#warning "{no}acl options have been deprecated since 1.8, please remove them"
+#endif
+ tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT);
+ if (tmp) {
+ *flags |= tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
+ if (tmp) {
+ *flags |= tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("nouser_fid2path", s1, LL_SBI_USER_FID2PATH);
+ if (tmp) {
+ *flags &= ~tmp;
+ goto next;
+ }
+
+ tmp = ll_set_opt("checksum", s1, LL_SBI_CHECKSUM);
+ if (tmp) {
+ *flags |= tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("nochecksum", s1, LL_SBI_CHECKSUM);
+ if (tmp) {
+ *flags &= ~tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("lruresize", s1, LL_SBI_LRU_RESIZE);
+ if (tmp) {
+ *flags |= tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("nolruresize", s1, LL_SBI_LRU_RESIZE);
+ if (tmp) {
+ *flags &= ~tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("lazystatfs", s1, LL_SBI_LAZYSTATFS);
+ if (tmp) {
+ *flags |= tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("nolazystatfs", s1, LL_SBI_LAZYSTATFS);
+ if (tmp) {
+ *flags &= ~tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("som_preview", s1, LL_SBI_SOM_PREVIEW);
+ if (tmp) {
+ *flags |= tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("32bitapi", s1, LL_SBI_32BIT_API);
+ if (tmp) {
+ *flags |= tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("verbose", s1, LL_SBI_VERBOSE);
+ if (tmp) {
+ *flags |= tmp;
+ goto next;
+ }
+ tmp = ll_set_opt("noverbose", s1, LL_SBI_VERBOSE);
+ if (tmp) {
+ *flags &= ~tmp;
+ goto next;
+ }
+ LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
+ s1);
+ RETURN(-EINVAL);
+
+next:
+ /* Find next opt */
+ s2 = strchr(s1, ',');
+ if (s2 == NULL)
+ break;
+ s1 = s2 + 1;
+ }
+ RETURN(0);
+}
+
+void ll_lli_init(struct ll_inode_info *lli)
+{
+ lli->lli_inode_magic = LLI_INODE_MAGIC;
+ lli->lli_flags = 0;
+ lli->lli_ioepoch = 0;
+ lli->lli_maxbytes = MAX_LFS_FILESIZE;
+ spin_lock_init(&lli->lli_lock);
+ lli->lli_posix_acl = NULL;
+ lli->lli_remote_perms = NULL;
+ mutex_init(&lli->lli_rmtperm_mutex);
+ /* Do not set lli_fid, it has been initialized already. */
+ fid_zero(&lli->lli_pfid);
+ INIT_LIST_HEAD(&lli->lli_close_list);
+ INIT_LIST_HEAD(&lli->lli_oss_capas);
+ atomic_set(&lli->lli_open_count, 0);
+ lli->lli_mds_capa = NULL;
+ lli->lli_rmtperm_time = 0;
+ lli->lli_pending_och = NULL;
+ lli->lli_mds_read_och = NULL;
+ lli->lli_mds_write_och = NULL;
+ lli->lli_mds_exec_och = NULL;
+ lli->lli_open_fd_read_count = 0;
+ lli->lli_open_fd_write_count = 0;
+ lli->lli_open_fd_exec_count = 0;
+ mutex_init(&lli->lli_och_mutex);
+ spin_lock_init(&lli->lli_agl_lock);
+ lli->lli_has_smd = false;
+ lli->lli_layout_gen = LL_LAYOUT_GEN_NONE;
+ lli->lli_clob = NULL;
+
+ LASSERT(lli->lli_vfs_inode.i_mode != 0);
+ if (S_ISDIR(lli->lli_vfs_inode.i_mode)) {
+ mutex_init(&lli->lli_readdir_mutex);
+ lli->lli_opendir_key = NULL;
+ lli->lli_sai = NULL;
+ lli->lli_def_acl = NULL;
+ spin_lock_init(&lli->lli_sa_lock);
+ lli->lli_opendir_pid = 0;
+ } else {
+ sema_init(&lli->lli_size_sem, 1);
+ lli->lli_size_sem_owner = NULL;
+ lli->lli_symlink_name = NULL;
+ init_rwsem(&lli->lli_trunc_sem);
+ mutex_init(&lli->lli_write_mutex);
+ init_rwsem(&lli->lli_glimpse_sem);
+ lli->lli_glimpse_time = 0;
+ INIT_LIST_HEAD(&lli->lli_agl_list);
+ lli->lli_agl_index = 0;
+ lli->lli_async_rc = 0;
+ lli->lli_volatile = false;
+ }
+ mutex_init(&lli->lli_layout_mutex);
+}
+
+static inline int ll_bdi_register(struct backing_dev_info *bdi)
+{
+ static atomic_t ll_bdi_num = ATOMIC_INIT(0);
+
+ bdi->name = "lustre";
+ return bdi_register(bdi, NULL, "lustre-%d",
+ atomic_inc_return(&ll_bdi_num));
+}
+
+int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
+{
+ struct lustre_profile *lprof = NULL;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct ll_sb_info *sbi;
+ char *dt = NULL, *md = NULL;
+ char *profilenm = get_profile_name(sb);
+ struct config_llog_instance *cfg;
+ /* %p for void* in printf needs 16+2 characters: 0xffffffffffffffff */
+ const int instlen = sizeof(cfg->cfg_instance) * 2 + 2;
+ int err;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
+
+ OBD_ALLOC_PTR(cfg);
+ if (cfg == NULL)
+ RETURN(-ENOMEM);
+
+ try_module_get(THIS_MODULE);
+
+ /* client additional sb info */
+ lsi->lsi_llsbi = sbi = ll_init_sbi();
+ if (!sbi) {
+ module_put(THIS_MODULE);
+ OBD_FREE_PTR(cfg);
+ RETURN(-ENOMEM);
+ }
+
+ err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags);
+ if (err)
+ GOTO(out_free, err);
+
+ err = bdi_init(&lsi->lsi_bdi);
+ if (err)
+ GOTO(out_free, err);
+ lsi->lsi_flags |= LSI_BDI_INITIALIZED;
+ lsi->lsi_bdi.capabilities = BDI_CAP_MAP_COPY;
+ err = ll_bdi_register(&lsi->lsi_bdi);
+ if (err)
+ GOTO(out_free, err);
+
+ sb->s_bdi = &lsi->lsi_bdi;
+
+ /* Generate a string unique to this super, in case some joker tries
+ to mount the same fs at two mount points.
+ Use the address of the super itself.*/
+ cfg->cfg_instance = sb;
+ cfg->cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
+ cfg->cfg_callback = class_config_llog_handler;
+ /* set up client obds */
+ err = lustre_process_log(sb, profilenm, cfg);
+ if (err < 0) {
+ CERROR("Unable to process log: %d\n", err);
+ GOTO(out_free, err);
+ }
+
+ /* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */
+ lprof = class_get_profile(profilenm);
+ if (lprof == NULL) {
+ LCONSOLE_ERROR_MSG(0x156, "The client profile '%s' could not be"
+ " read from the MGS. Does that filesystem "
+ "exist?\n", profilenm);
+ GOTO(out_free, err = -EINVAL);
+ }
+ CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm,
+ lprof->lp_md, lprof->lp_dt);
+
+ OBD_ALLOC(dt, strlen(lprof->lp_dt) + instlen + 2);
+ if (!dt)
+ GOTO(out_free, err = -ENOMEM);
+ sprintf(dt, "%s-%p", lprof->lp_dt, cfg->cfg_instance);
+
+ OBD_ALLOC(md, strlen(lprof->lp_md) + instlen + 2);
+ if (!md)
+ GOTO(out_free, err = -ENOMEM);
+ sprintf(md, "%s-%p", lprof->lp_md, cfg->cfg_instance);
+
+ /* connections, registrations, sb setup */
+ err = client_common_fill_super(sb, md, dt, mnt);
+
+out_free:
+ if (md)
+ OBD_FREE(md, strlen(lprof->lp_md) + instlen + 2);
+ if (dt)
+ OBD_FREE(dt, strlen(lprof->lp_dt) + instlen + 2);
+ if (err)
+ ll_put_super(sb);
+ else if (sbi->ll_flags & LL_SBI_VERBOSE)
+ LCONSOLE_WARN("Mounted %s\n", profilenm);
+
+ OBD_FREE_PTR(cfg);
+ RETURN(err);
+} /* ll_fill_super */
+
+void ll_put_super(struct super_block *sb)
+{
+ struct config_llog_instance cfg;
+ struct obd_device *obd;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ char *profilenm = get_profile_name(sb);
+ int next, force = 1;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
+
+ ll_print_capa_stat(sbi);
+
+ cfg.cfg_instance = sb;
+ lustre_end_log(sb, profilenm, &cfg);
+
+ if (sbi->ll_md_exp) {
+ obd = class_exp2obd(sbi->ll_md_exp);
+ if (obd)
+ force = obd->obd_force;
+ }
+
+ /* We need to set force before the lov_disconnect in
+ lustre_common_put_super, since l_d cleans up osc's as well. */
+ if (force) {
+ next = 0;
+ while ((obd = class_devices_in_group(&sbi->ll_sb_uuid,
+ &next)) != NULL) {
+ obd->obd_force = force;
+ }
+ }
+
+ if (sbi->ll_lcq) {
+ /* Only if client_common_fill_super succeeded */
+ client_common_put_super(sb);
+ }
+
+ next = 0;
+ while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) {
+ class_manual_cleanup(obd);
+ }
+
+ if (sbi->ll_flags & LL_SBI_VERBOSE)
+ LCONSOLE_WARN("Unmounted %s\n", profilenm ? profilenm : "");
+
+ if (profilenm)
+ class_del_profile(profilenm);
+
+ if (lsi->lsi_flags & LSI_BDI_INITIALIZED) {
+ bdi_destroy(&lsi->lsi_bdi);
+ lsi->lsi_flags &= ~LSI_BDI_INITIALIZED;
+ }
+
+ ll_free_sbi(sb);
+ lsi->lsi_llsbi = NULL;
+
+ lustre_common_put_super(sb);
+
+ module_put(THIS_MODULE);
+
+ EXIT;
+} /* client_put_super */
+
+struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock)
+{
+ struct inode *inode = NULL;
+
+ /* NOTE: we depend on atomic igrab() -bzzz */
+ lock_res_and_lock(lock);
+ if (lock->l_resource->lr_lvb_inode) {
+ struct ll_inode_info * lli;
+ lli = ll_i2info(lock->l_resource->lr_lvb_inode);
+ if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
+ inode = igrab(lock->l_resource->lr_lvb_inode);
+ } else {
+ inode = lock->l_resource->lr_lvb_inode;
+ LDLM_DEBUG_LIMIT(inode->i_state & I_FREEING ? D_INFO :
+ D_WARNING, lock, "lr_lvb_inode %p is "
+ "bogus: magic %08x",
+ lock->l_resource->lr_lvb_inode,
+ lli->lli_inode_magic);
+ inode = NULL;
+ }
+ }
+ unlock_res_and_lock(lock);
+ return inode;
+}
+
+struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
+{
+ struct inode *inode = NULL;
+ /* NOTE: we depend on atomic igrab() -bzzz */
+ lock_res_and_lock(lock);
+ if (lock->l_ast_data) {
+ struct ll_inode_info *lli = ll_i2info(lock->l_ast_data);
+ if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
+ inode = igrab(lock->l_ast_data);
+ } else {
+ inode = lock->l_ast_data;
+ LDLM_DEBUG_LIMIT(inode->i_state & I_FREEING ? D_INFO :
+ D_WARNING, lock, "l_ast_data %p is "
+ "bogus: magic %08x", lock->l_ast_data,
+ lli->lli_inode_magic);
+ inode = NULL;
+ }
+ }
+ unlock_res_and_lock(lock);
+ return inode;
+}
+
+void ll_clear_inode(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
+ inode->i_generation, inode);
+
+ if (S_ISDIR(inode->i_mode)) {
+ /* these should have been cleared in ll_file_release */
+ LASSERT(lli->lli_opendir_key == NULL);
+ LASSERT(lli->lli_sai == NULL);
+ LASSERT(lli->lli_opendir_pid == 0);
+ }
+
+ ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
+ md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode));
+
+ LASSERT(!lli->lli_open_fd_write_count);
+ LASSERT(!lli->lli_open_fd_read_count);
+ LASSERT(!lli->lli_open_fd_exec_count);
+
+ if (lli->lli_mds_write_och)
+ ll_md_real_close(inode, FMODE_WRITE);
+ if (lli->lli_mds_exec_och)
+ ll_md_real_close(inode, FMODE_EXEC);
+ if (lli->lli_mds_read_och)
+ ll_md_real_close(inode, FMODE_READ);
+
+ if (S_ISLNK(inode->i_mode) && lli->lli_symlink_name) {
+ OBD_FREE(lli->lli_symlink_name,
+ strlen(lli->lli_symlink_name) + 1);
+ lli->lli_symlink_name = NULL;
+ }
+
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
+ LASSERT(lli->lli_posix_acl == NULL);
+ if (lli->lli_remote_perms) {
+ free_rmtperm_hash(lli->lli_remote_perms);
+ lli->lli_remote_perms = NULL;
+ }
+ }
+#ifdef CONFIG_FS_POSIX_ACL
+ else if (lli->lli_posix_acl) {
+ LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
+ LASSERT(lli->lli_remote_perms == NULL);
+ posix_acl_release(lli->lli_posix_acl);
+ lli->lli_posix_acl = NULL;
+ }
+#endif
+ lli->lli_inode_magic = LLI_INODE_DEAD;
+
+ ll_clear_inode_capas(inode);
+ if (!S_ISDIR(inode->i_mode))
+ LASSERT(list_empty(&lli->lli_agl_list));
+
+ /*
+ * XXX This has to be done before lsm is freed below, because
+ * cl_object still uses inode lsm.
+ */
+ cl_inode_fini(inode);
+ lli->lli_has_smd = false;
+
+ EXIT;
+}
+
+int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
+ struct md_open_data **mod)
+{
+ struct lustre_md md;
+ struct inode *inode = dentry->d_inode;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ptlrpc_request *request = NULL;
+ int rc, ia_valid;
+ ENTRY;
+
+ op_data = ll_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0,
+ &request, mod);
+ if (rc) {
+ ptlrpc_req_finished(request);
+ if (rc == -ENOENT) {
+ clear_nlink(inode);
+ /* Unlinked special device node? Or just a race?
+ * Pretend we done everything. */
+ if (!S_ISREG(inode->i_mode) &&
+ !S_ISDIR(inode->i_mode)) {
+ ia_valid = op_data->op_attr.ia_valid;
+ op_data->op_attr.ia_valid &= ~TIMES_SET_FLAGS;
+ rc = simple_setattr(dentry, &op_data->op_attr);
+ op_data->op_attr.ia_valid = ia_valid;
+ }
+ } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY) {
+ CERROR("md_setattr fails: rc = %d\n", rc);
+ }
+ RETURN(rc);
+ }
+
+ rc = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
+ sbi->ll_md_exp, &md);
+ if (rc) {
+ ptlrpc_req_finished(request);
+ RETURN(rc);
+ }
+
+ ia_valid = op_data->op_attr.ia_valid;
+ /* inode size will be in ll_setattr_ost, can't do it now since dirty
+ * cache is not cleared yet. */
+ op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE);
+ rc = simple_setattr(dentry, &op_data->op_attr);
+ op_data->op_attr.ia_valid = ia_valid;
+
+ /* Extract epoch data if obtained. */
+ op_data->op_handle = md.body->handle;
+ op_data->op_ioepoch = md.body->ioepoch;
+
+ ll_update_inode(inode, &md);
+ ptlrpc_req_finished(request);
+
+ RETURN(rc);
+}
+
+/* Close IO epoch and send Size-on-MDS attribute update. */
+static int ll_setattr_done_writing(struct inode *inode,
+ struct md_op_data *op_data,
+ struct md_open_data *mod)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(op_data != NULL);
+ if (!S_ISREG(inode->i_mode))
+ RETURN(0);
+
+ CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID" for truncate\n",
+ op_data->op_ioepoch, PFID(&lli->lli_fid));
+
+ op_data->op_flags = MF_EPOCH_CLOSE;
+ ll_done_writing_attr(inode, op_data);
+ ll_pack_inode2opdata(inode, op_data, NULL);
+
+ rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, mod);
+ if (rc == -EAGAIN) {
+ /* MDS has instructed us to obtain Size-on-MDS attribute
+ * from OSTs and send setattr to back to MDS. */
+ rc = ll_som_update(inode, op_data);
+ } else if (rc) {
+ CERROR("inode %lu mdc truncate failed: rc = %d\n",
+ inode->i_ino, rc);
+ }
+ RETURN(rc);
+}
+
+static int ll_setattr_ost(struct inode *inode, struct iattr *attr)
+{
+ struct obd_capa *capa;
+ int rc;
+
+ if (attr->ia_valid & ATTR_SIZE)
+ capa = ll_osscapa_get(inode, CAPA_OPC_OSS_TRUNC);
+ else
+ capa = ll_mdscapa_get(inode);
+
+ rc = cl_setattr_ost(inode, attr, capa);
+
+ if (attr->ia_valid & ATTR_SIZE)
+ ll_truncate_free_capa(capa);
+ else
+ capa_put(capa);
+
+ return rc;
+}
+
+
+/* If this inode has objects allocated to it (lsm != NULL), then the OST
+ * object(s) determine the file size and mtime. Otherwise, the MDS will
+ * keep these values until such a time that objects are allocated for it.
+ * We do the MDS operations first, as it is checking permissions for us.
+ * We don't to the MDS RPC if there is nothing that we want to store there,
+ * otherwise there is no harm in updating mtime/atime on the MDS if we are
+ * going to do an RPC anyways.
+ *
+ * If we are doing a truncate, we will send the mtime and ctime updates
+ * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
+ * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
+ * at the same time.
+ */
+int ll_setattr_raw(struct dentry *dentry, struct iattr *attr)
+{
+ struct inode *inode = dentry->d_inode;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct md_op_data *op_data = NULL;
+ struct md_open_data *mod = NULL;
+ int rc = 0, rc1 = 0;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "%s: setattr inode %p/fid:"DFID" from %llu to %llu, "
+ "valid %x\n", ll_get_fsname(inode->i_sb, NULL, 0), inode,
+ PFID(&lli->lli_fid), i_size_read(inode), attr->ia_size,
+ attr->ia_valid);
+
+ if (attr->ia_valid & ATTR_SIZE) {
+ /* Check new size against VFS/VM file size limit and rlimit */
+ rc = inode_newsize_ok(inode, attr->ia_size);
+ if (rc)
+ RETURN(rc);
+
+ /* The maximum Lustre file size is variable, based on the
+ * OST maximum object size and number of stripes. This
+ * needs another check in addition to the VFS check above. */
+ if (attr->ia_size > ll_file_maxbytes(inode)) {
+ CDEBUG(D_INODE,"file "DFID" too large %llu > "LPU64"\n",
+ PFID(&lli->lli_fid), attr->ia_size,
+ ll_file_maxbytes(inode));
+ RETURN(-EFBIG);
+ }
+
+ attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
+ }
+
+ /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
+ if (attr->ia_valid & TIMES_SET_FLAGS) {
+ if (current_fsuid() != inode->i_uid &&
+ !cfs_capable(CFS_CAP_FOWNER))
+ RETURN(-EPERM);
+ }
+
+ /* We mark all of the fields "set" so MDS/OST does not re-set them */
+ if (attr->ia_valid & ATTR_CTIME) {
+ attr->ia_ctime = CFS_CURRENT_TIME;
+ attr->ia_valid |= ATTR_CTIME_SET;
+ }
+ if (!(attr->ia_valid & ATTR_ATIME_SET) &&
+ (attr->ia_valid & ATTR_ATIME)) {
+ attr->ia_atime = CFS_CURRENT_TIME;
+ attr->ia_valid |= ATTR_ATIME_SET;
+ }
+ if (!(attr->ia_valid & ATTR_MTIME_SET) &&
+ (attr->ia_valid & ATTR_MTIME)) {
+ attr->ia_mtime = CFS_CURRENT_TIME;
+ attr->ia_valid |= ATTR_MTIME_SET;
+ }
+
+ if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
+ CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
+ LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
+ cfs_time_current_sec());
+
+ /* If we are changing file size, file content is modified, flag it. */
+ if (attr->ia_valid & ATTR_SIZE) {
+ attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
+ spin_lock(&lli->lli_lock);
+ lli->lli_flags |= LLIF_DATA_MODIFIED;
+ spin_unlock(&lli->lli_lock);
+ }
+
+ /* We always do an MDS RPC, even if we're only changing the size;
+ * only the MDS knows whether truncate() should fail with -ETXTBUSY */
+
+ OBD_ALLOC_PTR(op_data);
+ if (op_data == NULL)
+ RETURN(-ENOMEM);
+
+ if (!S_ISDIR(inode->i_mode)) {
+ if (attr->ia_valid & ATTR_SIZE)
+ inode_dio_write_done(inode);
+ mutex_unlock(&inode->i_mutex);
+ down_write(&lli->lli_trunc_sem);
+ }
+
+ memcpy(&op_data->op_attr, attr, sizeof(*attr));
+
+ /* Open epoch for truncate. */
+ if (exp_connect_som(ll_i2mdexp(inode)) &&
+ (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
+ op_data->op_flags = MF_EPOCH_OPEN;
+
+ rc = ll_md_setattr(dentry, op_data, &mod);
+ if (rc)
+ GOTO(out, rc);
+
+ /* RPC to MDT is sent, cancel data modification flag */
+ if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
+ spin_lock(&lli->lli_lock);
+ lli->lli_flags &= ~LLIF_DATA_MODIFIED;
+ spin_unlock(&lli->lli_lock);
+ }
+
+ ll_ioepoch_open(lli, op_data->op_ioepoch);
+ if (!S_ISREG(inode->i_mode))
+ GOTO(out, rc = 0);
+
+ if (attr->ia_valid & (ATTR_SIZE |
+ ATTR_ATIME | ATTR_ATIME_SET |
+ ATTR_MTIME | ATTR_MTIME_SET))
+ /* For truncate and utimes sending attributes to OSTs, setting
+ * mtime/atime to the past will be performed under PW [0:EOF]
+ * extent lock (new_size:EOF for truncate). It may seem
+ * excessive to send mtime/atime updates to OSTs when not
+ * setting times to past, but it is necessary due to possible
+ * time de-synchronization between MDT inode and OST objects */
+ rc = ll_setattr_ost(inode, attr);
+ EXIT;
+out:
+ if (op_data) {
+ if (op_data->op_ioepoch) {
+ rc1 = ll_setattr_done_writing(inode, op_data, mod);
+ if (!rc)
+ rc = rc1;
+ }
+ ll_finish_md_op_data(op_data);
+ }
+ if (!S_ISDIR(inode->i_mode)) {
+ up_write(&lli->lli_trunc_sem);
+ mutex_lock(&inode->i_mutex);
+ if (attr->ia_valid & ATTR_SIZE)
+ inode_dio_wait(inode);
+ }
+
+ ll_stats_ops_tally(ll_i2sbi(inode), (attr->ia_valid & ATTR_SIZE) ?
+ LPROC_LL_TRUNC : LPROC_LL_SETATTR, 1);
+
+ return rc;
+}
+
+int ll_setattr(struct dentry *de, struct iattr *attr)
+{
+ int mode = de->d_inode->i_mode;
+
+ if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
+ (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
+ attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
+
+ if (((attr->ia_valid & (ATTR_MODE|ATTR_FORCE|ATTR_SIZE)) ==
+ (ATTR_SIZE|ATTR_MODE)) &&
+ (((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) ||
+ (((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) &&
+ !(attr->ia_mode & S_ISGID))))
+ attr->ia_valid |= ATTR_FORCE;
+
+ if ((mode & S_ISUID) &&
+ !(attr->ia_mode & S_ISUID) &&
+ !(attr->ia_valid & ATTR_KILL_SUID))
+ attr->ia_valid |= ATTR_KILL_SUID;
+
+ if (((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) &&
+ !(attr->ia_mode & S_ISGID) &&
+ !(attr->ia_valid & ATTR_KILL_SGID))
+ attr->ia_valid |= ATTR_KILL_SGID;
+
+ return ll_setattr_raw(de, attr);
+}
+
+int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
+ __u64 max_age, __u32 flags)
+{
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct obd_statfs obd_osfs;
+ int rc;
+ ENTRY;
+
+ rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, flags);
+ if (rc) {
+ CERROR("md_statfs fails: rc = %d\n", rc);
+ RETURN(rc);
+ }
+
+ osfs->os_type = sb->s_magic;
+
+ CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
+ osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
+
+ if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
+ flags |= OBD_STATFS_NODELAY;
+
+ rc = obd_statfs_rqset(sbi->ll_dt_exp, &obd_osfs, max_age, flags);
+ if (rc) {
+ CERROR("obd_statfs fails: rc = %d\n", rc);
+ RETURN(rc);
+ }
+
+ CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
+ obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
+ obd_osfs.os_files);
+
+ osfs->os_bsize = obd_osfs.os_bsize;
+ osfs->os_blocks = obd_osfs.os_blocks;
+ osfs->os_bfree = obd_osfs.os_bfree;
+ osfs->os_bavail = obd_osfs.os_bavail;
+
+ /* If we don't have as many objects free on the OST as inodes
+ * on the MDS, we reduce the total number of inodes to
+ * compensate, so that the "inodes in use" number is correct.
+ */
+ if (obd_osfs.os_ffree < osfs->os_ffree) {
+ osfs->os_files = (osfs->os_files - osfs->os_ffree) +
+ obd_osfs.os_ffree;
+ osfs->os_ffree = obd_osfs.os_ffree;
+ }
+
+ RETURN(rc);
+}
+int ll_statfs(struct dentry *de, struct kstatfs *sfs)
+{
+ struct super_block *sb = de->d_sb;
+ struct obd_statfs osfs;
+ int rc;
+
+ CDEBUG(D_VFSTRACE, "VFS Op: at "LPU64" jiffies\n", get_jiffies_64());
+ ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1);
+
+ /* Some amount of caching on the client is allowed */
+ rc = ll_statfs_internal(sb, &osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ 0);
+ if (rc)
+ return rc;
+
+ statfs_unpack(sfs, &osfs);
+
+ /* We need to downshift for all 32-bit kernels, because we can't
+ * tell if the kernel is being called via sys_statfs64() or not.
+ * Stop before overflowing f_bsize - in which case it is better
+ * to just risk EOVERFLOW if caller is using old sys_statfs(). */
+ if (sizeof(long) < 8) {
+ while (osfs.os_blocks > ~0UL && sfs->f_bsize < 0x40000000) {
+ sfs->f_bsize <<= 1;
+
+ osfs.os_blocks >>= 1;
+ osfs.os_bfree >>= 1;
+ osfs.os_bavail >>= 1;
+ }
+ }
+
+ sfs->f_blocks = osfs.os_blocks;
+ sfs->f_bfree = osfs.os_bfree;
+ sfs->f_bavail = osfs.os_bavail;
+
+ return 0;
+}
+
+void ll_inode_size_lock(struct inode *inode)
+{
+ struct ll_inode_info *lli;
+
+ LASSERT(!S_ISDIR(inode->i_mode));
+
+ lli = ll_i2info(inode);
+ LASSERT(lli->lli_size_sem_owner != current);
+ down(&lli->lli_size_sem);
+ LASSERT(lli->lli_size_sem_owner == NULL);
+ lli->lli_size_sem_owner = current;
+}
+
+void ll_inode_size_unlock(struct inode *inode)
+{
+ struct ll_inode_info *lli;
+
+ lli = ll_i2info(inode);
+ LASSERT(lli->lli_size_sem_owner == current);
+ lli->lli_size_sem_owner = NULL;
+ up(&lli->lli_size_sem);
+}
+
+void ll_update_inode(struct inode *inode, struct lustre_md *md)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct mdt_body *body = md->body;
+ struct lov_stripe_md *lsm = md->lsm;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+
+ LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
+ if (lsm != NULL) {
+ if (!lli->lli_has_smd &&
+ !(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
+ cl_file_inode_init(inode, md);
+
+ lli->lli_maxbytes = lsm->lsm_maxbytes;
+ if (lli->lli_maxbytes > MAX_LFS_FILESIZE)
+ lli->lli_maxbytes = MAX_LFS_FILESIZE;
+ }
+
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
+ if (body->valid & OBD_MD_FLRMTPERM)
+ ll_update_remote_perm(inode, md->remote_perm);
+ }
+#ifdef CONFIG_FS_POSIX_ACL
+ else if (body->valid & OBD_MD_FLACL) {
+ spin_lock(&lli->lli_lock);
+ if (lli->lli_posix_acl)
+ posix_acl_release(lli->lli_posix_acl);
+ lli->lli_posix_acl = md->posix_acl;
+ spin_unlock(&lli->lli_lock);
+ }
+#endif
+ inode->i_ino = cl_fid_build_ino(&body->fid1,
+ sbi->ll_flags & LL_SBI_32BIT_API);
+ inode->i_generation = cl_fid_build_gen(&body->fid1);
+
+ if (body->valid & OBD_MD_FLATIME) {
+ if (body->atime > LTIME_S(inode->i_atime))
+ LTIME_S(inode->i_atime) = body->atime;
+ lli->lli_lvb.lvb_atime = body->atime;
+ }
+ if (body->valid & OBD_MD_FLMTIME) {
+ if (body->mtime > LTIME_S(inode->i_mtime)) {
+ CDEBUG(D_INODE, "setting ino %lu mtime from %lu "
+ "to "LPU64"\n", inode->i_ino,
+ LTIME_S(inode->i_mtime), body->mtime);
+ LTIME_S(inode->i_mtime) = body->mtime;
+ }
+ lli->lli_lvb.lvb_mtime = body->mtime;
+ }
+ if (body->valid & OBD_MD_FLCTIME) {
+ if (body->ctime > LTIME_S(inode->i_ctime))
+ LTIME_S(inode->i_ctime) = body->ctime;
+ lli->lli_lvb.lvb_ctime = body->ctime;
+ }
+ if (body->valid & OBD_MD_FLMODE)
+ inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
+ if (body->valid & OBD_MD_FLTYPE)
+ inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
+ LASSERT(inode->i_mode != 0);
+ if (S_ISREG(inode->i_mode)) {
+ inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1, LL_MAX_BLKSIZE_BITS);
+ } else {
+ inode->i_blkbits = inode->i_sb->s_blocksize_bits;
+ }
+ if (body->valid & OBD_MD_FLUID)
+ inode->i_uid = body->uid;
+ if (body->valid & OBD_MD_FLGID)
+ inode->i_gid = body->gid;
+ if (body->valid & OBD_MD_FLFLAGS)
+ inode->i_flags = ll_ext_to_inode_flags(body->flags);
+ if (body->valid & OBD_MD_FLNLINK)
+ set_nlink(inode, body->nlink);
+ if (body->valid & OBD_MD_FLRDEV)
+ inode->i_rdev = old_decode_dev(body->rdev);
+
+ if (body->valid & OBD_MD_FLID) {
+ /* FID shouldn't be changed! */
+ if (fid_is_sane(&lli->lli_fid)) {
+ LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1),
+ "Trying to change FID "DFID
+ " to the "DFID", inode %lu/%u(%p)\n",
+ PFID(&lli->lli_fid), PFID(&body->fid1),
+ inode->i_ino, inode->i_generation, inode);
+ } else
+ lli->lli_fid = body->fid1;
+ }
+
+ LASSERT(fid_seq(&lli->lli_fid) != 0);
+
+ if (body->valid & OBD_MD_FLSIZE) {
+ if (exp_connect_som(ll_i2mdexp(inode)) &&
+ S_ISREG(inode->i_mode)) {
+ struct lustre_handle lockh;
+ ldlm_mode_t mode;
+
+ /* As it is possible a blocking ast has been processed
+ * by this time, we need to check there is an UPDATE
+ * lock on the client and set LLIF_MDS_SIZE_LOCK holding
+ * it. */
+ mode = ll_take_md_lock(inode, MDS_INODELOCK_UPDATE,
+ &lockh, LDLM_FL_CBPENDING);
+ if (mode) {
+ if (lli->lli_flags & (LLIF_DONE_WRITING |
+ LLIF_EPOCH_PENDING |
+ LLIF_SOM_DIRTY)) {
+ CERROR("ino %lu flags %u still has "
+ "size authority! do not trust "
+ "the size got from MDS\n",
+ inode->i_ino, lli->lli_flags);
+ } else {
+ /* Use old size assignment to avoid
+ * deadlock bz14138 & bz14326 */
+ i_size_write(inode, body->size);
+ lli->lli_flags |= LLIF_MDS_SIZE_LOCK;
+ }
+ ldlm_lock_decref(&lockh, mode);
+ }
+ } else {
+ /* Use old size assignment to avoid
+ * deadlock bz14138 & bz14326 */
+ i_size_write(inode, body->size);
+
+ CDEBUG(D_VFSTRACE, "inode=%lu, updating i_size %llu\n",
+ inode->i_ino, (unsigned long long)body->size);
+ }
+
+ if (body->valid & OBD_MD_FLBLOCKS)
+ inode->i_blocks = body->blocks;
+ }
+
+ if (body->valid & OBD_MD_FLMDSCAPA) {
+ LASSERT(md->mds_capa);
+ ll_add_capa(inode, md->mds_capa);
+ }
+ if (body->valid & OBD_MD_FLOSSCAPA) {
+ LASSERT(md->oss_capa);
+ ll_add_capa(inode, md->oss_capa);
+ }
+}
+
+void ll_read_inode2(struct inode *inode, void *opaque)
+{
+ struct lustre_md *md = opaque;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+ PFID(&lli->lli_fid), inode);
+
+ LASSERT(!lli->lli_has_smd);
+
+ /* Core attributes from the MDS first. This is a new inode, and
+ * the VFS doesn't zero times in the core inode so we have to do
+ * it ourselves. They will be overwritten by either MDS or OST
+ * attributes - we just need to make sure they aren't newer. */
+ LTIME_S(inode->i_mtime) = 0;
+ LTIME_S(inode->i_atime) = 0;
+ LTIME_S(inode->i_ctime) = 0;
+ inode->i_rdev = 0;
+ ll_update_inode(inode, md);
+
+ /* OIDEBUG(inode); */
+
+ /* initializing backing dev info. */
+ inode->i_mapping->backing_dev_info = &s2lsi(inode->i_sb)->lsi_bdi;
+
+
+ if (S_ISREG(inode->i_mode)) {
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ inode->i_op = &ll_file_inode_operations;
+ inode->i_fop = sbi->ll_fop;
+ inode->i_mapping->a_ops = (struct address_space_operations *)&ll_aops;
+ EXIT;
+ } else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = &ll_dir_inode_operations;
+ inode->i_fop = &ll_dir_operations;
+ EXIT;
+ } else if (S_ISLNK(inode->i_mode)) {
+ inode->i_op = &ll_fast_symlink_inode_operations;
+ EXIT;
+ } else {
+ inode->i_op = &ll_special_inode_operations;
+
+ init_special_inode(inode, inode->i_mode,
+ inode->i_rdev);
+
+ EXIT;
+ }
+}
+
+void ll_delete_inode(struct inode *inode)
+{
+ struct cl_inode_info *lli = cl_i2info(inode);
+ ENTRY;
+
+ if (S_ISREG(inode->i_mode) && lli->lli_clob != NULL)
+ /* discard all dirty pages before truncating them, required by
+ * osc_extent implementation at LU-1030. */
+ cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
+ CL_FSYNC_DISCARD, 1);
+
+ truncate_inode_pages(&inode->i_data, 0);
+
+ /* Workaround for LU-118 */
+ if (inode->i_data.nrpages) {
+ TREE_READ_LOCK_IRQ(&inode->i_data);
+ TREE_READ_UNLOCK_IRQ(&inode->i_data);
+ LASSERTF(inode->i_data.nrpages == 0,
+ "inode=%lu/%u(%p) nrpages=%lu, see "
+ "http://jira.whamcloud.com/browse/LU-118\n",
+ inode->i_ino, inode->i_generation, inode,
+ inode->i_data.nrpages);
+ }
+ /* Workaround end */
+
+ ll_clear_inode(inode);
+ clear_inode(inode);
+
+ EXIT;
+}
+
+int ll_iocontrol(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ptlrpc_request *req = NULL;
+ int rc, flags = 0;
+ ENTRY;
+
+ switch(cmd) {
+ case FSFILT_IOC_GETFLAGS: {
+ struct mdt_body *body;
+ struct md_op_data *op_data;
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
+ 0, 0, LUSTRE_OPC_ANY,
+ NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ op_data->op_valid = OBD_MD_FLFLAGS;
+ rc = md_getattr(sbi->ll_md_exp, op_data, &req);
+ ll_finish_md_op_data(op_data);
+ if (rc) {
+ CERROR("failure %d inode %lu\n", rc, inode->i_ino);
+ RETURN(-abs(rc));
+ }
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+
+ flags = body->flags;
+
+ ptlrpc_req_finished(req);
+
+ RETURN(put_user(flags, (int *)arg));
+ }
+ case FSFILT_IOC_SETFLAGS: {
+ struct lov_stripe_md *lsm;
+ struct obd_info oinfo = { { { 0 } } };
+ struct md_op_data *op_data;
+
+ if (get_user(flags, (int *)arg))
+ RETURN(-EFAULT);
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags;
+ op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
+ rc = md_setattr(sbi->ll_md_exp, op_data,
+ NULL, 0, NULL, 0, &req, NULL);
+ ll_finish_md_op_data(op_data);
+ ptlrpc_req_finished(req);
+ if (rc)
+ RETURN(rc);
+
+ inode->i_flags = ll_ext_to_inode_flags(flags);
+
+ lsm = ccc_inode_lsm_get(inode);
+ if (lsm == NULL)
+ RETURN(0);
+
+ OBDO_ALLOC(oinfo.oi_oa);
+ if (!oinfo.oi_oa) {
+ ccc_inode_lsm_put(inode, lsm);
+ RETURN(-ENOMEM);
+ }
+ oinfo.oi_md = lsm;
+ oinfo.oi_oa->o_oi = lsm->lsm_oi;
+ oinfo.oi_oa->o_flags = flags;
+ oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS |
+ OBD_MD_FLGROUP;
+ oinfo.oi_capa = ll_mdscapa_get(inode);
+ obdo_set_parent_fid(oinfo.oi_oa, &ll_i2info(inode)->lli_fid);
+ rc = obd_setattr_rqset(sbi->ll_dt_exp, &oinfo, NULL);
+ capa_put(oinfo.oi_capa);
+ OBDO_FREE(oinfo.oi_oa);
+ ccc_inode_lsm_put(inode, lsm);
+
+ if (rc && rc != -EPERM && rc != -EACCES)
+ CERROR("osc_setattr_async fails: rc = %d\n", rc);
+
+ RETURN(rc);
+ }
+ default:
+ RETURN(-ENOSYS);
+ }
+
+ RETURN(0);
+}
+
+int ll_flush_ctx(struct inode *inode)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+
+ CDEBUG(D_SEC, "flush context for user %d\n", current_uid());
+
+ obd_set_info_async(NULL, sbi->ll_md_exp,
+ sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX,
+ 0, NULL, NULL);
+ obd_set_info_async(NULL, sbi->ll_dt_exp,
+ sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX,
+ 0, NULL, NULL);
+ return 0;
+}
+
+/* umount -f client means force down, don't save state */
+void ll_umount_begin(struct super_block *sb)
+{
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct obd_device *obd;
+ struct obd_ioctl_data *ioc_data;
+ ENTRY;
+
+
+ CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
+ sb->s_count, atomic_read(&sb->s_active));
+
+ obd = class_exp2obd(sbi->ll_md_exp);
+ if (obd == NULL) {
+ CERROR("Invalid MDC connection handle "LPX64"\n",
+ sbi->ll_md_exp->exp_handle.h_cookie);
+ EXIT;
+ return;
+ }
+ obd->obd_force = 1;
+
+ obd = class_exp2obd(sbi->ll_dt_exp);
+ if (obd == NULL) {
+ CERROR("Invalid LOV connection handle "LPX64"\n",
+ sbi->ll_dt_exp->exp_handle.h_cookie);
+ EXIT;
+ return;
+ }
+ obd->obd_force = 1;
+
+ OBD_ALLOC_PTR(ioc_data);
+ if (ioc_data) {
+ obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_md_exp,
+ sizeof *ioc_data, ioc_data, NULL);
+
+ obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_dt_exp,
+ sizeof *ioc_data, ioc_data, NULL);
+
+ OBD_FREE_PTR(ioc_data);
+ }
+
+ /* Really, we'd like to wait until there are no requests outstanding,
+ * and then continue. For now, we just invalidate the requests,
+ * schedule() and sleep one second if needed, and hope.
+ */
+ schedule();
+
+ EXIT;
+}
+
+int ll_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ char *profilenm = get_profile_name(sb);
+ int err;
+ __u32 read_only;
+
+ if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
+ read_only = *flags & MS_RDONLY;
+ err = obd_set_info_async(NULL, sbi->ll_md_exp,
+ sizeof(KEY_READ_ONLY),
+ KEY_READ_ONLY, sizeof(read_only),
+ &read_only, NULL);
+ if (err) {
+ LCONSOLE_WARN("Failed to remount %s %s (%d)\n",
+ profilenm, read_only ?
+ "read-only" : "read-write", err);
+ return err;
+ }
+
+ if (read_only)
+ sb->s_flags |= MS_RDONLY;
+ else
+ sb->s_flags &= ~MS_RDONLY;
+
+ if (sbi->ll_flags & LL_SBI_VERBOSE)
+ LCONSOLE_WARN("Remounted %s %s\n", profilenm,
+ read_only ? "read-only" : "read-write");
+ }
+ return 0;
+}
+
+int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
+ struct super_block *sb, struct lookup_intent *it)
+{
+ struct ll_sb_info *sbi = NULL;
+ struct lustre_md md;
+ int rc;
+ ENTRY;
+
+ LASSERT(*inode || sb);
+ sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
+ rc = md_get_lustre_md(sbi->ll_md_exp, req, sbi->ll_dt_exp,
+ sbi->ll_md_exp, &md);
+ if (rc)
+ RETURN(rc);
+
+ if (*inode) {
+ ll_update_inode(*inode, &md);
+ } else {
+ LASSERT(sb != NULL);
+
+ /*
+ * At this point server returns to client's same fid as client
+ * generated for creating. So using ->fid1 is okay here.
+ */
+ LASSERT(fid_is_sane(&md.body->fid1));
+
+ *inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1,
+ sbi->ll_flags & LL_SBI_32BIT_API),
+ &md);
+ if (*inode == NULL || IS_ERR(*inode)) {
+#ifdef CONFIG_FS_POSIX_ACL
+ if (md.posix_acl) {
+ posix_acl_release(md.posix_acl);
+ md.posix_acl = NULL;
+ }
+#endif
+ rc = IS_ERR(*inode) ? PTR_ERR(*inode) : -ENOMEM;
+ *inode = NULL;
+ CERROR("new_inode -fatal: rc %d\n", rc);
+ GOTO(out, rc);
+ }
+ }
+
+ /* Handling piggyback layout lock.
+ * Layout lock can be piggybacked by getattr and open request.
+ * The lsm can be applied to inode only if it comes with a layout lock
+ * otherwise correct layout may be overwritten, for example:
+ * 1. proc1: mdt returns a lsm but not granting layout
+ * 2. layout was changed by another client
+ * 3. proc2: refresh layout and layout lock granted
+ * 4. proc1: to apply a stale layout */
+ if (it != NULL && it->d.lustre.it_lock_mode != 0) {
+ struct lustre_handle lockh;
+ struct ldlm_lock *lock;
+
+ lockh.cookie = it->d.lustre.it_lock_handle;
+ lock = ldlm_handle2lock(&lockh);
+ LASSERT(lock != NULL);
+ if (ldlm_has_layout(lock)) {
+ struct cl_object_conf conf;
+
+ memset(&conf, 0, sizeof(conf));
+ conf.coc_opc = OBJECT_CONF_SET;
+ conf.coc_inode = *inode;
+ conf.coc_lock = lock;
+ conf.u.coc_md = &md;
+ (void)ll_layout_conf(*inode, &conf);
+ }
+ LDLM_LOCK_PUT(lock);
+ }
+
+out:
+ if (md.lsm != NULL)
+ obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
+ md_free_lustre_md(sbi->ll_md_exp, &md);
+ RETURN(rc);
+}
+
+int ll_obd_statfs(struct inode *inode, void *arg)
+{
+ struct ll_sb_info *sbi = NULL;
+ struct obd_export *exp;
+ char *buf = NULL;
+ struct obd_ioctl_data *data = NULL;
+ __u32 type;
+ __u32 flags;
+ int len = 0, rc;
+
+ if (!inode || !(sbi = ll_i2sbi(inode)))
+ GOTO(out_statfs, rc = -EINVAL);
+
+ rc = obd_ioctl_getdata(&buf, &len, arg);
+ if (rc)
+ GOTO(out_statfs, rc);
+
+ data = (void*)buf;
+ if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
+ !data->ioc_pbuf1 || !data->ioc_pbuf2)
+ GOTO(out_statfs, rc = -EINVAL);
+
+ if (data->ioc_inllen1 != sizeof(__u32) ||
+ data->ioc_inllen2 != sizeof(__u32) ||
+ data->ioc_plen1 != sizeof(struct obd_statfs) ||
+ data->ioc_plen2 != sizeof(struct obd_uuid))
+ GOTO(out_statfs, rc = -EINVAL);
+
+ memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
+ if (type & LL_STATFS_LMV)
+ exp = sbi->ll_md_exp;
+ else if (type & LL_STATFS_LOV)
+ exp = sbi->ll_dt_exp;
+ else
+ GOTO(out_statfs, rc = -ENODEV);
+
+ flags = (type & LL_STATFS_NODELAY) ? OBD_STATFS_NODELAY : 0;
+ rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, &flags);
+ if (rc)
+ GOTO(out_statfs, rc);
+out_statfs:
+ if (buf)
+ obd_ioctl_freedata(buf, len);
+ return rc;
+}
+
+int ll_process_config(struct lustre_cfg *lcfg)
+{
+ char *ptr;
+ void *sb;
+ struct lprocfs_static_vars lvars;
+ unsigned long x;
+ int rc = 0;
+
+ lprocfs_llite_init_vars(&lvars);
+
+ /* The instance name contains the sb: lustre-client-aacfe000 */
+ ptr = strrchr(lustre_cfg_string(lcfg, 0), '-');
+ if (!ptr || !*(++ptr))
+ return -EINVAL;
+ if (sscanf(ptr, "%lx", &x) != 1)
+ return -EINVAL;
+ sb = (void *)x;
+ /* This better be a real Lustre superblock! */
+ LASSERT(s2lsi((struct super_block *)sb)->lsi_lmd->lmd_magic == LMD_MAGIC);
+
+ /* Note we have not called client_common_fill_super yet, so
+ proc fns must be able to handle that! */
+ rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars,
+ lcfg, sb);
+ if (rc > 0)
+ rc = 0;
+ return(rc);
+}
+
+/* this function prepares md_op_data hint for passing ot down to MD stack. */
+struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data,
+ struct inode *i1, struct inode *i2,
+ const char *name, int namelen,
+ int mode, __u32 opc, void *data)
+{
+ LASSERT(i1 != NULL);
+
+ if (namelen > ll_i2sbi(i1)->ll_namelen)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ if (op_data == NULL)
+ OBD_ALLOC_PTR(op_data);
+
+ if (op_data == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ ll_i2gids(op_data->op_suppgids, i1, i2);
+ op_data->op_fid1 = *ll_inode2fid(i1);
+ op_data->op_capa1 = ll_mdscapa_get(i1);
+
+ if (i2) {
+ op_data->op_fid2 = *ll_inode2fid(i2);
+ op_data->op_capa2 = ll_mdscapa_get(i2);
+ } else {
+ fid_zero(&op_data->op_fid2);
+ op_data->op_capa2 = NULL;
+ }
+
+ op_data->op_name = name;
+ op_data->op_namelen = namelen;
+ op_data->op_mode = mode;
+ op_data->op_mod_time = cfs_time_current_sec();
+ op_data->op_fsuid = current_fsuid();
+ op_data->op_fsgid = current_fsgid();
+ op_data->op_cap = cfs_curproc_cap_pack();
+ op_data->op_bias = 0;
+ op_data->op_cli_flags = 0;
+ if ((opc == LUSTRE_OPC_CREATE) && (name != NULL) &&
+ filename_is_volatile(name, namelen, NULL))
+ op_data->op_bias |= MDS_CREATE_VOLATILE;
+ op_data->op_opc = opc;
+ op_data->op_mds = 0;
+ op_data->op_data = data;
+
+ /* If the file is being opened after mknod() (normally due to NFS)
+ * try to use the default stripe data from parent directory for
+ * allocating OST objects. Try to pass the parent FID to MDS. */
+ if (opc == LUSTRE_OPC_CREATE && i1 == i2 && S_ISREG(i2->i_mode) &&
+ !ll_i2info(i2)->lli_has_smd) {
+ struct ll_inode_info *lli = ll_i2info(i2);
+
+ spin_lock(&lli->lli_lock);
+ if (likely(!lli->lli_has_smd && !fid_is_zero(&lli->lli_pfid)))
+ op_data->op_fid1 = lli->lli_pfid;
+ spin_unlock(&lli->lli_lock);
+ /** We ignore parent's capability temporary. */
+ }
+
+ /* When called by ll_setattr_raw, file is i1. */
+ if (LLIF_DATA_MODIFIED & ll_i2info(i1)->lli_flags)
+ op_data->op_bias |= MDS_DATA_MODIFIED;
+
+ return op_data;
+}
+
+void ll_finish_md_op_data(struct md_op_data *op_data)
+{
+ capa_put(op_data->op_capa1);
+ capa_put(op_data->op_capa2);
+ OBD_FREE_PTR(op_data);
+}
+
+int ll_show_options(struct seq_file *seq, struct dentry *dentry)
+{
+ struct ll_sb_info *sbi;
+
+ LASSERT((seq != NULL) && (dentry != NULL));
+ sbi = ll_s2sbi(dentry->d_sb);
+
+ if (sbi->ll_flags & LL_SBI_NOLCK)
+ seq_puts(seq, ",nolock");
+
+ if (sbi->ll_flags & LL_SBI_FLOCK)
+ seq_puts(seq, ",flock");
+
+ if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
+ seq_puts(seq, ",localflock");
+
+ if (sbi->ll_flags & LL_SBI_USER_XATTR)
+ seq_puts(seq, ",user_xattr");
+
+ if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
+ seq_puts(seq, ",lazystatfs");
+
+ if (sbi->ll_flags & LL_SBI_USER_FID2PATH)
+ seq_puts(seq, ",user_fid2path");
+
+ RETURN(0);
+}
+
+/**
+ * Get obd name by cmd, and copy out to user space
+ */
+int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct obd_device *obd;
+ ENTRY;
+
+ if (cmd == OBD_IOC_GETDTNAME)
+ obd = class_exp2obd(sbi->ll_dt_exp);
+ else if (cmd == OBD_IOC_GETMDNAME)
+ obd = class_exp2obd(sbi->ll_md_exp);
+ else
+ RETURN(-EINVAL);
+
+ if (!obd)
+ RETURN(-ENOENT);
+
+ if (copy_to_user((void *)arg, obd->obd_name,
+ strlen(obd->obd_name) + 1))
+ RETURN(-EFAULT);
+
+ RETURN(0);
+}
+
+/**
+ * Get lustre file system name by \a sbi. If \a buf is provided(non-NULL), the
+ * fsname will be returned in this buffer; otherwise, a static buffer will be
+ * used to store the fsname and returned to caller.
+ */
+char *ll_get_fsname(struct super_block *sb, char *buf, int buflen)
+{
+ static char fsname_static[MTI_NAME_MAXLEN];
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ char *ptr;
+ int len;
+
+ if (buf == NULL) {
+ /* this means the caller wants to use static buffer
+ * and it doesn't care about race. Usually this is
+ * in error reporting path */
+ buf = fsname_static;
+ buflen = sizeof(fsname_static);
+ }
+
+ len = strlen(lsi->lsi_lmd->lmd_profile);
+ ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
+ if (ptr && (strcmp(ptr, "-client") == 0))
+ len -= 7;
+
+ if (unlikely(len >= buflen))
+ len = buflen - 1;
+ strncpy(buf, lsi->lsi_lmd->lmd_profile, len);
+ buf[len] = '\0';
+
+ return buf;
+}
+
+static char* ll_d_path(struct dentry *dentry, char *buf, int bufsize)
+{
+ char *path = NULL;
+
+ struct path p;
+
+ p.dentry = dentry;
+ p.mnt = current->fs->root.mnt;
+ path_get(&p);
+ path = d_path(&p, buf, bufsize);
+ path_put(&p);
+
+ return path;
+}
+
+void ll_dirty_page_discard_warn(struct page *page, int ioret)
+{
+ char *buf, *path = NULL;
+ struct dentry *dentry = NULL;
+ struct ccc_object *obj = cl_inode2ccc(page->mapping->host);
+
+ /* this can be called inside spin lock so use GFP_ATOMIC. */
+ buf = (char *)__get_free_page(GFP_ATOMIC);
+ if (buf != NULL) {
+ dentry = d_find_alias(page->mapping->host);
+ if (dentry != NULL)
+ path = ll_d_path(dentry, buf, PAGE_SIZE);
+ }
+
+ CWARN("%s: dirty page discard: %s/fid: "DFID"/%s may get corrupted "
+ "(rc %d)\n", ll_get_fsname(page->mapping->host->i_sb, NULL, 0),
+ s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
+ PFID(&obj->cob_header.coh_lu.loh_fid),
+ (path && !IS_ERR(path)) ? path : "", ioret);
+
+ if (dentry != NULL)
+ dput(dentry);
+
+ if (buf != NULL)
+ free_page((unsigned long)buf);
+}
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
new file mode 100644
index 000000000000..d9590d85634a
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -0,0 +1,507 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/version.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <lustre_lite.h>
+#include "llite_internal.h"
+#include <linux/lustre_compat25.h>
+
+struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
+ int *type);
+
+static struct vm_operations_struct ll_file_vm_ops;
+
+void policy_from_vma(ldlm_policy_data_t *policy,
+ struct vm_area_struct *vma, unsigned long addr,
+ size_t count)
+{
+ policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) +
+ (vma->vm_pgoff << PAGE_CACHE_SHIFT);
+ policy->l_extent.end = (policy->l_extent.start + count - 1) |
+ ~CFS_PAGE_MASK;
+}
+
+struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
+ size_t count)
+{
+ struct vm_area_struct *vma, *ret = NULL;
+ ENTRY;
+
+ /* mmap_sem must have been held by caller. */
+ LASSERT(!down_write_trylock(&mm->mmap_sem));
+
+ for(vma = find_vma(mm, addr);
+ vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) {
+ if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops &&
+ vma->vm_flags & VM_SHARED) {
+ ret = vma;
+ break;
+ }
+ }
+ RETURN(ret);
+}
+
+/**
+ * API independent part for page fault initialization.
+ * \param vma - virtual memory area addressed to page fault
+ * \param env - corespondent lu_env to processing
+ * \param nest - nested level
+ * \param index - page index corespondent to fault.
+ * \parm ra_flags - vma readahead flags.
+ *
+ * \return allocated and initialized env for fault operation.
+ * \retval EINVAL if env can't allocated
+ * \return other error codes from cl_io_init.
+ */
+struct cl_io *ll_fault_io_init(struct vm_area_struct *vma,
+ struct lu_env **env_ret,
+ struct cl_env_nest *nest,
+ pgoff_t index, unsigned long *ra_flags)
+{
+ struct file *file = vma->vm_file;
+ struct inode *inode = file->f_dentry->d_inode;
+ struct cl_io *io;
+ struct cl_fault_io *fio;
+ struct lu_env *env;
+ ENTRY;
+
+ *env_ret = NULL;
+ if (ll_file_nolock(file))
+ RETURN(ERR_PTR(-EOPNOTSUPP));
+
+ /*
+ * page fault can be called when lustre IO is
+ * already active for the current thread, e.g., when doing read/write
+ * against user level buffer mapped from Lustre buffer. To avoid
+ * stomping on existing context, optionally force an allocation of a new
+ * one.
+ */
+ env = cl_env_nested_get(nest);
+ if (IS_ERR(env))
+ RETURN(ERR_PTR(-EINVAL));
+
+ *env_ret = env;
+
+ io = ccc_env_thread_io(env);
+ io->ci_obj = ll_i2info(inode)->lli_clob;
+ LASSERT(io->ci_obj != NULL);
+
+ fio = &io->u.ci_fault;
+ fio->ft_index = index;
+ fio->ft_executable = vma->vm_flags&VM_EXEC;
+
+ /*
+ * disable VM_SEQ_READ and use VM_RAND_READ to make sure that
+ * the kernel will not read other pages not covered by ldlm in
+ * filemap_nopage. we do our readahead in ll_readpage.
+ */
+ if (ra_flags != NULL)
+ *ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ);
+ vma->vm_flags &= ~VM_SEQ_READ;
+ vma->vm_flags |= VM_RAND_READ;
+
+ CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags,
+ fio->ft_index, fio->ft_executable);
+
+ if (cl_io_init(env, io, CIT_FAULT, io->ci_obj) == 0) {
+ struct ccc_io *cio = ccc_env_io(env);
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+
+ LASSERT(cio->cui_cl.cis_io == io);
+
+ /* mmap lock must be MANDATORY
+ * it has to cache pages. */
+ io->ci_lockreq = CILR_MANDATORY;
+
+ cio->cui_fd = fd;
+ }
+
+ return io;
+}
+
+/* Sharing code of page_mkwrite method for rhel5 and rhel6 */
+static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
+ bool *retry)
+{
+ struct lu_env *env;
+ struct cl_io *io;
+ struct vvp_io *vio;
+ struct cl_env_nest nest;
+ int result;
+ sigset_t set;
+ struct inode *inode;
+ struct ll_inode_info *lli;
+ ENTRY;
+
+ LASSERT(vmpage != NULL);
+
+ io = ll_fault_io_init(vma, &env, &nest, vmpage->index, NULL);
+ if (IS_ERR(io))
+ GOTO(out, result = PTR_ERR(io));
+
+ result = io->ci_result;
+ if (result < 0)
+ GOTO(out, result);
+
+ io->u.ci_fault.ft_mkwrite = 1;
+ io->u.ci_fault.ft_writable = 1;
+
+ vio = vvp_env_io(env);
+ vio->u.fault.ft_vma = vma;
+ vio->u.fault.ft_vmpage = vmpage;
+
+ set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM));
+
+ /* we grab lli_trunc_sem to exclude truncate case.
+ * Otherwise, we could add dirty pages into osc cache
+ * while truncate is on-going. */
+ inode = ccc_object_inode(io->ci_obj);
+ lli = ll_i2info(inode);
+ down_read(&lli->lli_trunc_sem);
+
+ result = cl_io_loop(env, io);
+
+ up_read(&lli->lli_trunc_sem);
+
+ cfs_restore_sigs(set);
+
+ if (result == 0) {
+ struct inode *inode = vma->vm_file->f_dentry->d_inode;
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ lock_page(vmpage);
+ if (vmpage->mapping == NULL) {
+ unlock_page(vmpage);
+
+ /* page was truncated and lock was cancelled, return
+ * ENODATA so that VM_FAULT_NOPAGE will be returned
+ * to handle_mm_fault(). */
+ if (result == 0)
+ result = -ENODATA;
+ } else if (!PageDirty(vmpage)) {
+ /* race, the page has been cleaned by ptlrpcd after
+ * it was unlocked, it has to be added into dirty
+ * cache again otherwise this soon-to-dirty page won't
+ * consume any grants, even worse if this page is being
+ * transferred because it will break RPC checksum.
+ */
+ unlock_page(vmpage);
+
+ CDEBUG(D_MMAP, "Race on page_mkwrite %p/%lu, page has "
+ "been written out, retry.\n",
+ vmpage, vmpage->index);
+
+ *retry = true;
+ result = -EAGAIN;
+ }
+
+ if (result == 0) {
+ spin_lock(&lli->lli_lock);
+ lli->lli_flags |= LLIF_DATA_MODIFIED;
+ spin_unlock(&lli->lli_lock);
+ }
+ }
+ EXIT;
+
+out:
+ cl_io_fini(env, io);
+ cl_env_nested_put(&nest, env);
+
+ CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result);
+
+ LASSERT(ergo(result == 0, PageLocked(vmpage)));
+ return(result);
+}
+
+
+
+static inline int to_fault_error(int result)
+{
+ switch(result) {
+ case 0:
+ result = VM_FAULT_LOCKED;
+ break;
+ case -EFAULT:
+ result = VM_FAULT_NOPAGE;
+ break;
+ case -ENOMEM:
+ result = VM_FAULT_OOM;
+ break;
+ default:
+ result = VM_FAULT_SIGBUS;
+ break;
+ }
+ return result;
+}
+
+/**
+ * Lustre implementation of a vm_operations_struct::fault() method, called by
+ * VM to server page fault (both in kernel and user space).
+ *
+ * \param vma - is virtiual area struct related to page fault
+ * \param vmf - structure which describe type and address where hit fault
+ *
+ * \return allocated and filled _locked_ page for address
+ * \retval VM_FAULT_ERROR on general error
+ * \retval NOPAGE_OOM not have memory for allocate new page
+ */
+static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct lu_env *env;
+ struct cl_io *io;
+ struct vvp_io *vio = NULL;
+ struct page *vmpage;
+ unsigned long ra_flags;
+ struct cl_env_nest nest;
+ int result;
+ int fault_ret = 0;
+ ENTRY;
+
+ io = ll_fault_io_init(vma, &env, &nest, vmf->pgoff, &ra_flags);
+ if (IS_ERR(io))
+ RETURN(to_fault_error(PTR_ERR(io)));
+
+ result = io->ci_result;
+ if (result == 0) {
+ vio = vvp_env_io(env);
+ vio->u.fault.ft_vma = vma;
+ vio->u.fault.ft_vmpage = NULL;
+ vio->u.fault.fault.ft_vmf = vmf;
+
+ result = cl_io_loop(env, io);
+
+ fault_ret = vio->u.fault.fault.ft_flags;
+ vmpage = vio->u.fault.ft_vmpage;
+ if (result != 0 && vmpage != NULL) {
+ page_cache_release(vmpage);
+ vmf->page = NULL;
+ }
+ }
+ cl_io_fini(env, io);
+ cl_env_nested_put(&nest, env);
+
+ vma->vm_flags |= ra_flags;
+ if (result != 0 && !(fault_ret & VM_FAULT_RETRY))
+ fault_ret |= to_fault_error(result);
+
+ CDEBUG(D_MMAP, "%s fault %d/%d\n",
+ current->comm, fault_ret, result);
+ RETURN(fault_ret);
+}
+
+static int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ int count = 0;
+ bool printed = false;
+ int result;
+ sigset_t set;
+
+ /* Only SIGKILL and SIGTERM is allowed for fault/nopage/mkwrite
+ * so that it can be killed by admin but not cause segfault by
+ * other signals. */
+ set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM));
+
+restart:
+ result = ll_fault0(vma, vmf);
+ LASSERT(!(result & VM_FAULT_LOCKED));
+ if (result == 0) {
+ struct page *vmpage = vmf->page;
+
+ /* check if this page has been truncated */
+ lock_page(vmpage);
+ if (unlikely(vmpage->mapping == NULL)) { /* unlucky */
+ unlock_page(vmpage);
+ page_cache_release(vmpage);
+ vmf->page = NULL;
+
+ if (!printed && ++count > 16) {
+ CWARN("the page is under heavy contention,"
+ "maybe your app(%s) needs revising :-)\n",
+ current->comm);
+ printed = true;
+ }
+
+ goto restart;
+ }
+
+ result |= VM_FAULT_LOCKED;
+ }
+ cfs_restore_sigs(set);
+ return result;
+}
+
+static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ int count = 0;
+ bool printed = false;
+ bool retry;
+ int result;
+
+ do {
+ retry = false;
+ result = ll_page_mkwrite0(vma, vmf->page, &retry);
+
+ if (!printed && ++count > 16) {
+ CWARN("app(%s): the page %lu of file %lu is under heavy"
+ " contention.\n",
+ current->comm, vmf->pgoff,
+ vma->vm_file->f_dentry->d_inode->i_ino);
+ printed = true;
+ }
+ } while (retry);
+
+ switch(result) {
+ case 0:
+ LASSERT(PageLocked(vmf->page));
+ result = VM_FAULT_LOCKED;
+ break;
+ case -ENODATA:
+ case -EFAULT:
+ result = VM_FAULT_NOPAGE;
+ break;
+ case -ENOMEM:
+ result = VM_FAULT_OOM;
+ break;
+ case -EAGAIN:
+ result = VM_FAULT_RETRY;
+ break;
+ default:
+ result = VM_FAULT_SIGBUS;
+ break;
+ }
+
+ return result;
+}
+
+/**
+ * To avoid cancel the locks covering mmapped region for lock cache pressure,
+ * we track the mapped vma count in ccc_object::cob_mmap_cnt.
+ */
+static void ll_vm_open(struct vm_area_struct * vma)
+{
+ struct inode *inode = vma->vm_file->f_dentry->d_inode;
+ struct ccc_object *vob = cl_inode2ccc(inode);
+
+ ENTRY;
+ LASSERT(vma->vm_file);
+ LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0);
+ atomic_inc(&vob->cob_mmap_cnt);
+ EXIT;
+}
+
+/**
+ * Dual to ll_vm_open().
+ */
+static void ll_vm_close(struct vm_area_struct *vma)
+{
+ struct inode *inode = vma->vm_file->f_dentry->d_inode;
+ struct ccc_object *vob = cl_inode2ccc(inode);
+
+ ENTRY;
+ LASSERT(vma->vm_file);
+ atomic_dec(&vob->cob_mmap_cnt);
+ LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0);
+ EXIT;
+}
+
+
+/* return the user space pointer that maps to a file offset via a vma */
+static inline unsigned long file_to_user(struct vm_area_struct *vma, __u64 byte)
+{
+ return vma->vm_start + (byte - ((__u64)vma->vm_pgoff << PAGE_CACHE_SHIFT));
+
+}
+
+/* XXX put nice comment here. talk about __free_pte -> dirty pages and
+ * nopage's reference passing to the pte */
+int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last)
+{
+ int rc = -ENOENT;
+ ENTRY;
+
+ LASSERTF(last > first, "last "LPU64" first "LPU64"\n", last, first);
+ if (mapping_mapped(mapping)) {
+ rc = 0;
+ unmap_mapping_range(mapping, first + PAGE_CACHE_SIZE - 1,
+ last - first + 1, 0);
+ }
+
+ RETURN(rc);
+}
+
+static struct vm_operations_struct ll_file_vm_ops = {
+ .fault = ll_fault,
+ .page_mkwrite = ll_page_mkwrite,
+ .open = ll_vm_open,
+ .close = ll_vm_close,
+};
+
+int ll_file_mmap(struct file *file, struct vm_area_struct * vma)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ int rc;
+ ENTRY;
+
+ if (ll_file_nolock(file))
+ RETURN(-EOPNOTSUPP);
+
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_MAP, 1);
+ rc = generic_file_mmap(file, vma);
+ if (rc == 0) {
+ vma->vm_ops = &ll_file_vm_ops;
+ vma->vm_ops->open(vma);
+ /* update the inode's size and mtime */
+ rc = ll_glimpse_size(inode);
+ }
+
+ RETURN(rc);
+}
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
new file mode 100644
index 000000000000..28cc41e90581
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -0,0 +1,319 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lustre/llite/llite_nfs.c
+ *
+ * NFS export of Lustre Light File System
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ * Author: Huang Hua <huanghua@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+#include <lustre_lite.h>
+#include "llite_internal.h"
+#include <linux/exportfs.h>
+
+__u32 get_uuid2int(const char *name, int len)
+{
+ __u32 key0 = 0x12a3fe2d, key1 = 0x37abe8f9;
+ while (len--) {
+ __u32 key = key1 + (key0 ^ (*name++ * 7152373));
+ if (key & 0x80000000) key -= 0x7fffffff;
+ key1 = key0;
+ key0 = key;
+ }
+ return (key0 << 1);
+}
+
+static int ll_nfs_test_inode(struct inode *inode, void *opaque)
+{
+ return lu_fid_eq(&ll_i2info(inode)->lli_fid,
+ (struct lu_fid *)opaque);
+}
+
+struct inode *search_inode_for_lustre(struct super_block *sb,
+ const struct lu_fid *fid)
+{
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct ptlrpc_request *req = NULL;
+ struct inode *inode = NULL;
+ int eadatalen = 0;
+ unsigned long hash = cl_fid_build_ino(fid,
+ ll_need_32bit_api(sbi));
+ struct md_op_data *op_data;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_INFO, "searching inode for:(%lu,"DFID")\n", hash, PFID(fid));
+
+ inode = ilookup5(sb, hash, ll_nfs_test_inode, (void *)fid);
+ if (inode)
+ RETURN(inode);
+
+ rc = ll_get_max_mdsize(sbi, &eadatalen);
+ if (rc)
+ RETURN(ERR_PTR(rc));
+
+ /* Because inode is NULL, ll_prep_md_op_data can not
+ * be used here. So we allocate op_data ourselves */
+ OBD_ALLOC_PTR(op_data);
+ if (op_data == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ op_data->op_fid1 = *fid;
+ op_data->op_mode = eadatalen;
+ op_data->op_valid = OBD_MD_FLEASIZE;
+
+ /* mds_fid2dentry ignores f_type */
+ rc = md_getattr(sbi->ll_md_exp, op_data, &req);
+ OBD_FREE_PTR(op_data);
+ if (rc) {
+ CERROR("can't get object attrs, fid "DFID", rc %d\n",
+ PFID(fid), rc);
+ RETURN(ERR_PTR(rc));
+ }
+ rc = ll_prep_inode(&inode, req, sb, NULL);
+ ptlrpc_req_finished(req);
+ if (rc)
+ RETURN(ERR_PTR(rc));
+
+ RETURN(inode);
+}
+
+struct lustre_nfs_fid {
+ struct lu_fid lnf_child;
+ struct lu_fid lnf_parent;
+};
+
+static struct dentry *
+ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *parent)
+{
+ struct inode *inode;
+ struct dentry *result;
+ ENTRY;
+
+ CDEBUG(D_INFO, "Get dentry for fid: "DFID"\n", PFID(fid));
+ if (!fid_is_sane(fid))
+ RETURN(ERR_PTR(-ESTALE));
+
+ inode = search_inode_for_lustre(sb, fid);
+ if (IS_ERR(inode))
+ RETURN(ERR_PTR(PTR_ERR(inode)));
+
+ if (is_bad_inode(inode)) {
+ /* we didn't find the right inode.. */
+ iput(inode);
+ RETURN(ERR_PTR(-ESTALE));
+ }
+
+ /**
+ * It is an anonymous dentry without OST objects created yet.
+ * We have to find the parent to tell MDS how to init lov objects.
+ */
+ if (S_ISREG(inode->i_mode) && !ll_i2info(inode)->lli_has_smd &&
+ parent != NULL) {
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ spin_lock(&lli->lli_lock);
+ lli->lli_pfid = *parent;
+ spin_unlock(&lli->lli_lock);
+ }
+
+ result = d_obtain_alias(inode);
+ if (IS_ERR(result))
+ RETURN(result);
+
+ ll_dops_init(result, 1, 0);
+
+ RETURN(result);
+}
+
+#define LUSTRE_NFS_FID 0x97
+
+/**
+ * \a connectable - is nfsd will connect himself or this should be done
+ * at lustre
+ *
+ * The return value is file handle type:
+ * 1 -- contains child file handle;
+ * 2 -- contains child file handle and parent file handle;
+ * 255 -- error.
+ */
+static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen,
+ struct inode *parent)
+{
+ struct lustre_nfs_fid *nfs_fid = (void *)fh;
+ ENTRY;
+
+ CDEBUG(D_INFO, "encoding for (%lu,"DFID") maxlen=%d minlen=%d\n",
+ inode->i_ino, PFID(ll_inode2fid(inode)), *plen,
+ (int)sizeof(struct lustre_nfs_fid));
+
+ if (*plen < sizeof(struct lustre_nfs_fid) / 4)
+ RETURN(255);
+
+ nfs_fid->lnf_child = *ll_inode2fid(inode);
+ nfs_fid->lnf_parent = *ll_inode2fid(parent);
+ *plen = sizeof(struct lustre_nfs_fid) / 4;
+
+ RETURN(LUSTRE_NFS_FID);
+}
+
+static int ll_nfs_get_name_filldir(void *cookie, const char *name, int namelen,
+ loff_t hash, u64 ino, unsigned type)
+{
+ /* It is hack to access lde_fid for comparison with lgd_fid.
+ * So the input 'name' must be part of the 'lu_dirent'. */
+ struct lu_dirent *lde = container_of0(name, struct lu_dirent, lde_name);
+ struct ll_getname_data *lgd = cookie;
+ struct lu_fid fid;
+
+ fid_le_to_cpu(&fid, &lde->lde_fid);
+ if (lu_fid_eq(&fid, &lgd->lgd_fid)) {
+ memcpy(lgd->lgd_name, name, namelen);
+ lgd->lgd_name[namelen] = 0;
+ lgd->lgd_found = 1;
+ }
+ return lgd->lgd_found;
+}
+
+static int ll_get_name(struct dentry *dentry, char *name,
+ struct dentry *child)
+{
+ struct inode *dir = dentry->d_inode;
+ struct ll_getname_data lgd;
+ __u64 offset = 0;
+ int rc;
+ ENTRY;
+
+ if (!dir || !S_ISDIR(dir->i_mode))
+ GOTO(out, rc = -ENOTDIR);
+
+ if (!dir->i_fop)
+ GOTO(out, rc = -EINVAL);
+
+ lgd.lgd_name = name;
+ lgd.lgd_fid = ll_i2info(child->d_inode)->lli_fid;
+ lgd.lgd_found = 0;
+
+ mutex_lock(&dir->i_mutex);
+ rc = ll_dir_read(dir, &offset, &lgd, ll_nfs_get_name_filldir);
+ mutex_unlock(&dir->i_mutex);
+ if (!rc && !lgd.lgd_found)
+ rc = -ENOENT;
+ EXIT;
+
+out:
+ return rc;
+}
+
+static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
+
+ if (fh_type != LUSTRE_NFS_FID)
+ RETURN(ERR_PTR(-EPROTO));
+
+ RETURN(ll_iget_for_nfs(sb, &nfs_fid->lnf_child, &nfs_fid->lnf_parent));
+}
+
+static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
+
+ if (fh_type != LUSTRE_NFS_FID)
+ RETURN(ERR_PTR(-EPROTO));
+
+ RETURN(ll_iget_for_nfs(sb, &nfs_fid->lnf_parent, NULL));
+}
+
+static struct dentry *ll_get_parent(struct dentry *dchild)
+{
+ struct ptlrpc_request *req = NULL;
+ struct inode *dir = dchild->d_inode;
+ struct ll_sb_info *sbi;
+ struct dentry *result = NULL;
+ struct mdt_body *body;
+ static char dotdot[] = "..";
+ struct md_op_data *op_data;
+ int rc;
+ int lmmsize;
+ ENTRY;
+
+ LASSERT(dir && S_ISDIR(dir->i_mode));
+
+ sbi = ll_s2sbi(dir->i_sb);
+
+ CDEBUG(D_INFO, "getting parent for (%lu,"DFID")\n",
+ dir->i_ino, PFID(ll_inode2fid(dir)));
+
+ rc = ll_get_max_mdsize(sbi, &lmmsize);
+ if (rc != 0)
+ RETURN(ERR_PTR(rc));
+
+ op_data = ll_prep_md_op_data(NULL, dir, NULL, dotdot,
+ strlen(dotdot), lmmsize,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN((void *)op_data);
+
+ rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
+ ll_finish_md_op_data(op_data);
+ if (rc) {
+ CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
+ RETURN(ERR_PTR(rc));
+ }
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ LASSERT(body->valid & OBD_MD_FLID);
+
+ CDEBUG(D_INFO, "parent for "DFID" is "DFID"\n",
+ PFID(ll_inode2fid(dir)), PFID(&body->fid1));
+
+ result = ll_iget_for_nfs(dir->i_sb, &body->fid1, NULL);
+
+ ptlrpc_req_finished(req);
+ RETURN(result);
+}
+
+struct export_operations lustre_export_operations = {
+ .get_parent = ll_get_parent,
+ .encode_fh = ll_encode_fh,
+ .get_name = ll_get_name,
+ .fh_to_dentry = ll_fh_to_dentry,
+ .fh_to_parent = ll_fh_to_parent,
+};
diff --git a/drivers/staging/lustre/lustre/llite/llite_rmtacl.c b/drivers/staging/lustre/lustre/llite/llite_rmtacl.c
new file mode 100644
index 000000000000..4c610369cb9b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/llite_rmtacl.c
@@ -0,0 +1,301 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/llite/llite_rmtacl.c
+ *
+ * Lustre Remote User Access Control List.
+ *
+ * Author: Fan Yong <fanyong@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#ifdef CONFIG_FS_POSIX_ACL
+
+#include <lustre_lite.h>
+#include <lustre_eacl.h>
+#include "llite_internal.h"
+
+static inline __u32 rce_hashfunc(uid_t id)
+{
+ return id & (RCE_HASHES - 1);
+}
+
+static inline __u32 ee_hashfunc(uid_t id)
+{
+ return id & (EE_HASHES - 1);
+}
+
+obd_valid rce_ops2valid(int ops)
+{
+ switch (ops) {
+ case RMT_LSETFACL:
+ return OBD_MD_FLRMTLSETFACL;
+ case RMT_LGETFACL:
+ return OBD_MD_FLRMTLGETFACL;
+ case RMT_RSETFACL:
+ return OBD_MD_FLRMTRSETFACL;
+ case RMT_RGETFACL:
+ return OBD_MD_FLRMTRGETFACL;
+ default:
+ return 0;
+ }
+}
+
+static struct rmtacl_ctl_entry *rce_alloc(pid_t key, int ops)
+{
+ struct rmtacl_ctl_entry *rce;
+
+ OBD_ALLOC_PTR(rce);
+ if (!rce)
+ return NULL;
+
+ INIT_LIST_HEAD(&rce->rce_list);
+ rce->rce_key = key;
+ rce->rce_ops = ops;
+
+ return rce;
+}
+
+static void rce_free(struct rmtacl_ctl_entry *rce)
+{
+ if (!list_empty(&rce->rce_list))
+ list_del(&rce->rce_list);
+
+ OBD_FREE_PTR(rce);
+}
+
+static struct rmtacl_ctl_entry *__rct_search(struct rmtacl_ctl_table *rct,
+ pid_t key)
+{
+ struct rmtacl_ctl_entry *rce;
+ struct list_head *head = &rct->rct_entries[rce_hashfunc(key)];
+
+ list_for_each_entry(rce, head, rce_list)
+ if (rce->rce_key == key)
+ return rce;
+
+ return NULL;
+}
+
+struct rmtacl_ctl_entry *rct_search(struct rmtacl_ctl_table *rct, pid_t key)
+{
+ struct rmtacl_ctl_entry *rce;
+
+ spin_lock(&rct->rct_lock);
+ rce = __rct_search(rct, key);
+ spin_unlock(&rct->rct_lock);
+ return rce;
+}
+
+int rct_add(struct rmtacl_ctl_table *rct, pid_t key, int ops)
+{
+ struct rmtacl_ctl_entry *rce, *e;
+
+ rce = rce_alloc(key, ops);
+ if (rce == NULL)
+ return -ENOMEM;
+
+ spin_lock(&rct->rct_lock);
+ e = __rct_search(rct, key);
+ if (unlikely(e != NULL)) {
+ CWARN("Unexpected stale rmtacl_entry found: "
+ "[key: %d] [ops: %d]\n", (int)key, ops);
+ rce_free(e);
+ }
+ list_add_tail(&rce->rce_list, &rct->rct_entries[rce_hashfunc(key)]);
+ spin_unlock(&rct->rct_lock);
+
+ return 0;
+}
+
+int rct_del(struct rmtacl_ctl_table *rct, pid_t key)
+{
+ struct rmtacl_ctl_entry *rce;
+
+ spin_lock(&rct->rct_lock);
+ rce = __rct_search(rct, key);
+ if (rce)
+ rce_free(rce);
+ spin_unlock(&rct->rct_lock);
+
+ return rce ? 0 : -ENOENT;
+}
+
+void rct_init(struct rmtacl_ctl_table *rct)
+{
+ int i;
+
+ spin_lock_init(&rct->rct_lock);
+ for (i = 0; i < RCE_HASHES; i++)
+ INIT_LIST_HEAD(&rct->rct_entries[i]);
+}
+
+void rct_fini(struct rmtacl_ctl_table *rct)
+{
+ struct rmtacl_ctl_entry *rce;
+ int i;
+
+ spin_lock(&rct->rct_lock);
+ for (i = 0; i < RCE_HASHES; i++)
+ while (!list_empty(&rct->rct_entries[i])) {
+ rce = list_entry(rct->rct_entries[i].next,
+ struct rmtacl_ctl_entry, rce_list);
+ rce_free(rce);
+ }
+ spin_unlock(&rct->rct_lock);
+}
+
+
+static struct eacl_entry *ee_alloc(pid_t key, struct lu_fid *fid, int type,
+ ext_acl_xattr_header *header)
+{
+ struct eacl_entry *ee;
+
+ OBD_ALLOC_PTR(ee);
+ if (!ee)
+ return NULL;
+
+ INIT_LIST_HEAD(&ee->ee_list);
+ ee->ee_key = key;
+ ee->ee_fid = *fid;
+ ee->ee_type = type;
+ ee->ee_acl = header;
+
+ return ee;
+}
+
+void ee_free(struct eacl_entry *ee)
+{
+ if (!list_empty(&ee->ee_list))
+ list_del(&ee->ee_list);
+
+ if (ee->ee_acl)
+ lustre_ext_acl_xattr_free(ee->ee_acl);
+
+ OBD_FREE_PTR(ee);
+}
+
+static struct eacl_entry *__et_search_del(struct eacl_table *et, pid_t key,
+ struct lu_fid *fid, int type)
+{
+ struct eacl_entry *ee;
+ struct list_head *head = &et->et_entries[ee_hashfunc(key)];
+
+ LASSERT(fid != NULL);
+ list_for_each_entry(ee, head, ee_list)
+ if (ee->ee_key == key) {
+ if (lu_fid_eq(&ee->ee_fid, fid) &&
+ ee->ee_type == type) {
+ list_del_init(&ee->ee_list);
+ return ee;
+ }
+ }
+
+ return NULL;
+}
+
+struct eacl_entry *et_search_del(struct eacl_table *et, pid_t key,
+ struct lu_fid *fid, int type)
+{
+ struct eacl_entry *ee;
+
+ spin_lock(&et->et_lock);
+ ee = __et_search_del(et, key, fid, type);
+ spin_unlock(&et->et_lock);
+ return ee;
+}
+
+void et_search_free(struct eacl_table *et, pid_t key)
+{
+ struct eacl_entry *ee, *next;
+ struct list_head *head = &et->et_entries[ee_hashfunc(key)];
+
+ spin_lock(&et->et_lock);
+ list_for_each_entry_safe(ee, next, head, ee_list)
+ if (ee->ee_key == key)
+ ee_free(ee);
+
+ spin_unlock(&et->et_lock);
+}
+
+int ee_add(struct eacl_table *et, pid_t key, struct lu_fid *fid, int type,
+ ext_acl_xattr_header *header)
+{
+ struct eacl_entry *ee, *e;
+
+ ee = ee_alloc(key, fid, type, header);
+ if (ee == NULL)
+ return -ENOMEM;
+
+ spin_lock(&et->et_lock);
+ e = __et_search_del(et, key, fid, type);
+ if (unlikely(e != NULL)) {
+ CWARN("Unexpected stale eacl_entry found: "
+ "[key: %d] [fid: "DFID"] [type: %d]\n",
+ (int)key, PFID(fid), type);
+ ee_free(e);
+ }
+ list_add_tail(&ee->ee_list, &et->et_entries[ee_hashfunc(key)]);
+ spin_unlock(&et->et_lock);
+
+ return 0;
+}
+
+void et_init(struct eacl_table *et)
+{
+ int i;
+
+ spin_lock_init(&et->et_lock);
+ for (i = 0; i < EE_HASHES; i++)
+ INIT_LIST_HEAD(&et->et_entries[i]);
+}
+
+void et_fini(struct eacl_table *et)
+{
+ struct eacl_entry *ee;
+ int i;
+
+ spin_lock(&et->et_lock);
+ for (i = 0; i < EE_HASHES; i++)
+ while (!list_empty(&et->et_entries[i])) {
+ ee = list_entry(et->et_entries[i].next,
+ struct eacl_entry, ee_list);
+ ee_free(ee);
+ }
+ spin_unlock(&et->et_lock);
+}
+
+#endif
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
new file mode 100644
index 000000000000..9d4c17ea8808
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/lloop.c
@@ -0,0 +1,867 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+/*
+ * linux/drivers/block/loop.c
+ *
+ * Written by Theodore Ts'o, 3/29/93
+ *
+ * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
+ * permitted under the GNU General Public License.
+ *
+ * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
+ * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
+ *
+ * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
+ *
+ * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
+ *
+ * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
+ *
+ * Loadable modules and other fixes by AK, 1998
+ *
+ * Maximum number of loop devices now dynamic via max_loop module parameter.
+ * Russell Kroll <rkroll@exploits.org> 19990701
+ *
+ * Maximum number of loop devices when compiled-in now selectable by passing
+ * max_loop=<1-255> to the kernel on boot.
+ * Erik I. Bols?, <eriki@himolde.no>, Oct 31, 1999
+ *
+ * Completely rewrite request handling to be make_request_fn style and
+ * non blocking, pushing work to a helper thread. Lots of fixes from
+ * Al Viro too.
+ * Jens Axboe <axboe@suse.de>, Nov 2000
+ *
+ * Support up to 256 loop devices
+ * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
+ *
+ * Support for falling back on the write file operation when the address space
+ * operations prepare_write and/or commit_write are not available on the
+ * backing filesystem.
+ * Anton Altaparmakov, 16 Feb 2005
+ *
+ * Still To Fix:
+ * - Advisory locking is ignored here.
+ * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
+ *
+ */
+
+#include <linux/module.h>
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/wait.h>
+#include <linux/blkdev.h>
+#include <linux/blkpg.h>
+#include <linux/init.h>
+#include <linux/swap.h>
+#include <linux/slab.h>
+#include <linux/suspend.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h> /* for invalidate_bdev() */
+#include <linux/completion.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/swap.h>
+#include <linux/pagevec.h>
+
+#include <asm/uaccess.h>
+
+#include <lustre_lib.h>
+#include <lustre_lite.h>
+#include "llite_internal.h"
+
+#define LLOOP_MAX_SEGMENTS LNET_MAX_IOV
+
+/* Possible states of device */
+enum {
+ LLOOP_UNBOUND,
+ LLOOP_BOUND,
+ LLOOP_RUNDOWN,
+};
+
+struct lloop_device {
+ int lo_number;
+ int lo_refcnt;
+ loff_t lo_offset;
+ loff_t lo_sizelimit;
+ int lo_flags;
+ int (*ioctl)(struct lloop_device *, int cmd,
+ unsigned long arg);
+
+ struct file *lo_backing_file;
+ struct block_device *lo_device;
+ unsigned lo_blocksize;
+
+ int old_gfp_mask;
+
+ spinlock_t lo_lock;
+ struct bio *lo_bio;
+ struct bio *lo_biotail;
+ int lo_state;
+ struct semaphore lo_sem;
+ struct mutex lo_ctl_mutex;
+ atomic_t lo_pending;
+ wait_queue_head_t lo_bh_wait;
+
+ struct request_queue *lo_queue;
+
+ const struct lu_env *lo_env;
+ struct cl_io lo_io;
+ struct ll_dio_pages lo_pvec;
+
+ /* data to handle bio for lustre. */
+ struct lo_request_data {
+ struct page *lrd_pages[LLOOP_MAX_SEGMENTS];
+ loff_t lrd_offsets[LLOOP_MAX_SEGMENTS];
+ } lo_requests[1];
+};
+
+/*
+ * Loop flags
+ */
+enum {
+ LO_FLAGS_READ_ONLY = 1,
+};
+
+static int lloop_major;
+#define MAX_LOOP_DEFAULT 16
+static int max_loop = MAX_LOOP_DEFAULT;
+static struct lloop_device *loop_dev;
+static struct gendisk **disks;
+static struct mutex lloop_mutex;
+static void *ll_iocontrol_magic = NULL;
+
+static loff_t get_loop_size(struct lloop_device *lo, struct file *file)
+{
+ loff_t size, offset, loopsize;
+
+ /* Compute loopsize in bytes */
+ size = i_size_read(file->f_mapping->host);
+ offset = lo->lo_offset;
+ loopsize = size - offset;
+ if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
+ loopsize = lo->lo_sizelimit;
+
+ /*
+ * Unfortunately, if we want to do I/O on the device,
+ * the number of 512-byte sectors has to fit into a sector_t.
+ */
+ return loopsize >> 9;
+}
+
+static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
+{
+ const struct lu_env *env = lo->lo_env;
+ struct cl_io *io = &lo->lo_io;
+ struct inode *inode = lo->lo_backing_file->f_dentry->d_inode;
+ struct cl_object *obj = ll_i2info(inode)->lli_clob;
+ pgoff_t offset;
+ int ret;
+ int i;
+ int rw;
+ obd_count page_count = 0;
+ struct bio_vec *bvec;
+ struct bio *bio;
+ ssize_t bytes;
+
+ struct ll_dio_pages *pvec = &lo->lo_pvec;
+ struct page **pages = pvec->ldp_pages;
+ loff_t *offsets = pvec->ldp_offsets;
+
+ truncate_inode_pages(inode->i_mapping, 0);
+
+ /* initialize the IO */
+ memset(io, 0, sizeof(*io));
+ io->ci_obj = obj;
+ ret = cl_io_init(env, io, CIT_MISC, obj);
+ if (ret)
+ return io->ci_result;
+ io->ci_lockreq = CILR_NEVER;
+
+ LASSERT(head != NULL);
+ rw = head->bi_rw;
+ for (bio = head; bio != NULL; bio = bio->bi_next) {
+ LASSERT(rw == bio->bi_rw);
+
+ offset = (pgoff_t)(bio->bi_sector << 9) + lo->lo_offset;
+ bio_for_each_segment(bvec, bio, i) {
+ BUG_ON(bvec->bv_offset != 0);
+ BUG_ON(bvec->bv_len != PAGE_CACHE_SIZE);
+
+ pages[page_count] = bvec->bv_page;
+ offsets[page_count] = offset;
+ page_count++;
+ offset += bvec->bv_len;
+ }
+ LASSERT(page_count <= LLOOP_MAX_SEGMENTS);
+ }
+
+ ll_stats_ops_tally(ll_i2sbi(inode),
+ (rw == WRITE) ? LPROC_LL_BRW_WRITE : LPROC_LL_BRW_READ,
+ page_count);
+
+ pvec->ldp_size = page_count << PAGE_CACHE_SHIFT;
+ pvec->ldp_nr = page_count;
+
+ /* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to
+ * write those pages into OST. Even worse case is that more pages
+ * would be asked to write out to swap space, and then finally get here
+ * again.
+ * Unfortunately this is NOT easy to fix.
+ * Thoughts on solution:
+ * 0. Define a reserved pool for cl_pages, which could be a list of
+ * pre-allocated cl_pages;
+ * 1. Define a new operation in cl_object_operations{}, says clo_depth,
+ * which measures how many layers for this lustre object. Generally
+ * speaking, the depth would be 2, one for llite, and one for lovsub.
+ * However, for SNS, there will be more since we need additional page
+ * to store parity;
+ * 2. Reserve the # of (page_count * depth) cl_pages from the reserved
+ * pool. Afterwards, the clio would allocate the pages from reserved
+ * pool, this guarantees we neeedn't allocate the cl_pages from
+ * generic cl_page slab cache.
+ * Of course, if there is NOT enough pages in the pool, we might
+ * be asked to write less pages once, this purely depends on
+ * implementation. Anyway, we should be careful to avoid deadlocking.
+ */
+ mutex_lock(&inode->i_mutex);
+ bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
+ mutex_unlock(&inode->i_mutex);
+ cl_io_fini(env, io);
+ return (bytes == pvec->ldp_size) ? 0 : (int)bytes;
+}
+
+/*
+ * Add bio to back of pending list
+ */
+static void loop_add_bio(struct lloop_device *lo, struct bio *bio)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&lo->lo_lock, flags);
+ if (lo->lo_biotail) {
+ lo->lo_biotail->bi_next = bio;
+ lo->lo_biotail = bio;
+ } else
+ lo->lo_bio = lo->lo_biotail = bio;
+ spin_unlock_irqrestore(&lo->lo_lock, flags);
+
+ atomic_inc(&lo->lo_pending);
+ if (waitqueue_active(&lo->lo_bh_wait))
+ wake_up(&lo->lo_bh_wait);
+}
+
+/*
+ * Grab first pending buffer
+ */
+static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req)
+{
+ struct bio *first;
+ struct bio **bio;
+ unsigned int count = 0;
+ unsigned int page_count = 0;
+ int rw;
+
+ spin_lock_irq(&lo->lo_lock);
+ first = lo->lo_bio;
+ if (unlikely(first == NULL)) {
+ spin_unlock_irq(&lo->lo_lock);
+ return 0;
+ }
+
+ /* TODO: need to split the bio, too bad. */
+ LASSERT(first->bi_vcnt <= LLOOP_MAX_SEGMENTS);
+
+ rw = first->bi_rw;
+ bio = &lo->lo_bio;
+ while (*bio && (*bio)->bi_rw == rw) {
+ CDEBUG(D_INFO, "bio sector %llu size %u count %u vcnt%u \n",
+ (unsigned long long)(*bio)->bi_sector, (*bio)->bi_size,
+ page_count, (*bio)->bi_vcnt);
+ if (page_count + (*bio)->bi_vcnt > LLOOP_MAX_SEGMENTS)
+ break;
+
+
+ page_count += (*bio)->bi_vcnt;
+ count++;
+ bio = &(*bio)->bi_next;
+ }
+ if (*bio) {
+ /* Some of bios can't be mergable. */
+ lo->lo_bio = *bio;
+ *bio = NULL;
+ } else {
+ /* Hit the end of queue */
+ lo->lo_biotail = NULL;
+ lo->lo_bio = NULL;
+ }
+ *req = first;
+ spin_unlock_irq(&lo->lo_lock);
+ return count;
+}
+
+static ll_mrf_ret
+loop_make_request(struct request_queue *q, struct bio *old_bio)
+{
+ struct lloop_device *lo = q->queuedata;
+ int rw = bio_rw(old_bio);
+ int inactive;
+
+ if (!lo)
+ goto err;
+
+ CDEBUG(D_INFO, "submit bio sector %llu size %u\n",
+ (unsigned long long)old_bio->bi_sector, old_bio->bi_size);
+
+ spin_lock_irq(&lo->lo_lock);
+ inactive = (lo->lo_state != LLOOP_BOUND);
+ spin_unlock_irq(&lo->lo_lock);
+ if (inactive)
+ goto err;
+
+ if (rw == WRITE) {
+ if (lo->lo_flags & LO_FLAGS_READ_ONLY)
+ goto err;
+ } else if (rw == READA) {
+ rw = READ;
+ } else if (rw != READ) {
+ CERROR("lloop: unknown command (%x)\n", rw);
+ goto err;
+ }
+ loop_add_bio(lo, old_bio);
+ LL_MRF_RETURN(0);
+err:
+ cfs_bio_io_error(old_bio, old_bio->bi_size);
+ LL_MRF_RETURN(0);
+}
+
+
+static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio)
+{
+ int ret;
+ ret = do_bio_lustrebacked(lo, bio);
+ while (bio) {
+ struct bio *tmp = bio->bi_next;
+ bio->bi_next = NULL;
+ cfs_bio_endio(bio, bio->bi_size, ret);
+ bio = tmp;
+ }
+}
+
+static inline int loop_active(struct lloop_device *lo)
+{
+ return atomic_read(&lo->lo_pending) ||
+ (lo->lo_state == LLOOP_RUNDOWN);
+}
+
+/*
+ * worker thread that handles reads/writes to file backed loop devices,
+ * to avoid blocking in our make_request_fn.
+ */
+static int loop_thread(void *data)
+{
+ struct lloop_device *lo = data;
+ struct bio *bio;
+ unsigned int count;
+ unsigned long times = 0;
+ unsigned long total_count = 0;
+
+ struct lu_env *env;
+ int refcheck;
+ int ret = 0;
+
+ set_user_nice(current, -20);
+
+ lo->lo_state = LLOOP_BOUND;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ GOTO(out, ret = PTR_ERR(env));
+
+ lo->lo_env = env;
+ memset(&lo->lo_pvec, 0, sizeof(lo->lo_pvec));
+ lo->lo_pvec.ldp_pages = lo->lo_requests[0].lrd_pages;
+ lo->lo_pvec.ldp_offsets = lo->lo_requests[0].lrd_offsets;
+
+ /*
+ * up sem, we are running
+ */
+ up(&lo->lo_sem);
+
+ for (;;) {
+ wait_event(lo->lo_bh_wait, loop_active(lo));
+ if (!atomic_read(&lo->lo_pending)) {
+ int exiting = 0;
+ spin_lock_irq(&lo->lo_lock);
+ exiting = (lo->lo_state == LLOOP_RUNDOWN);
+ spin_unlock_irq(&lo->lo_lock);
+ if (exiting)
+ break;
+ }
+
+ bio = NULL;
+ count = loop_get_bio(lo, &bio);
+ if (!count) {
+ CWARN("lloop(minor: %d): missing bio\n", lo->lo_number);
+ continue;
+ }
+
+ total_count += count;
+ if (total_count < count) { /* overflow */
+ total_count = count;
+ times = 1;
+ } else {
+ times++;
+ }
+ if ((times & 127) == 0) {
+ CDEBUG(D_INFO, "total: %lu, count: %lu, avg: %lu\n",
+ total_count, times, total_count / times);
+ }
+
+ LASSERT(bio != NULL);
+ LASSERT(count <= atomic_read(&lo->lo_pending));
+ loop_handle_bio(lo, bio);
+ atomic_sub(count, &lo->lo_pending);
+ }
+ cl_env_put(env, &refcheck);
+
+out:
+ up(&lo->lo_sem);
+ return ret;
+}
+
+static int loop_set_fd(struct lloop_device *lo, struct file *unused,
+ struct block_device *bdev, struct file *file)
+{
+ struct inode *inode;
+ struct address_space *mapping;
+ int lo_flags = 0;
+ int error;
+ loff_t size;
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
+ error = -EBUSY;
+ if (lo->lo_state != LLOOP_UNBOUND)
+ goto out;
+
+ mapping = file->f_mapping;
+ inode = mapping->host;
+
+ error = -EINVAL;
+ if (!S_ISREG(inode->i_mode) || inode->i_sb->s_magic != LL_SUPER_MAGIC)
+ goto out;
+
+ if (!(file->f_mode & FMODE_WRITE))
+ lo_flags |= LO_FLAGS_READ_ONLY;
+
+ size = get_loop_size(lo, file);
+
+ if ((loff_t)(sector_t)size != size) {
+ error = -EFBIG;
+ goto out;
+ }
+
+ /* remove all pages in cache so as dirty pages not to be existent. */
+ truncate_inode_pages(mapping, 0);
+
+ set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
+
+ lo->lo_blocksize = PAGE_CACHE_SIZE;
+ lo->lo_device = bdev;
+ lo->lo_flags = lo_flags;
+ lo->lo_backing_file = file;
+ lo->ioctl = NULL;
+ lo->lo_sizelimit = 0;
+ lo->old_gfp_mask = mapping_gfp_mask(mapping);
+ mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
+
+ lo->lo_bio = lo->lo_biotail = NULL;
+
+ /*
+ * set queue make_request_fn, and add limits based on lower level
+ * device
+ */
+ blk_queue_make_request(lo->lo_queue, loop_make_request);
+ lo->lo_queue->queuedata = lo;
+
+ /* queue parameters */
+ CLASSERT(PAGE_CACHE_SIZE < (1 << (sizeof(unsigned short) * 8)));
+ blk_queue_logical_block_size(lo->lo_queue,
+ (unsigned short)PAGE_CACHE_SIZE);
+ blk_queue_max_hw_sectors(lo->lo_queue,
+ LLOOP_MAX_SEGMENTS << (PAGE_CACHE_SHIFT - 9));
+ blk_queue_max_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS);
+
+ set_capacity(disks[lo->lo_number], size);
+ bd_set_size(bdev, size << 9);
+
+ set_blocksize(bdev, lo->lo_blocksize);
+
+ kthread_run(loop_thread, lo, "lloop%d", lo->lo_number);
+ down(&lo->lo_sem);
+ return 0;
+
+out:
+ /* This is safe: open() is still holding a reference. */
+ module_put(THIS_MODULE);
+ return error;
+}
+
+static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev,
+ int count)
+{
+ struct file *filp = lo->lo_backing_file;
+ int gfp = lo->old_gfp_mask;
+
+ if (lo->lo_state != LLOOP_BOUND)
+ return -ENXIO;
+
+ if (lo->lo_refcnt > count) /* we needed one fd for the ioctl */
+ return -EBUSY;
+
+ if (filp == NULL)
+ return -EINVAL;
+
+ spin_lock_irq(&lo->lo_lock);
+ lo->lo_state = LLOOP_RUNDOWN;
+ spin_unlock_irq(&lo->lo_lock);
+ wake_up(&lo->lo_bh_wait);
+
+ down(&lo->lo_sem);
+ lo->lo_backing_file = NULL;
+ lo->ioctl = NULL;
+ lo->lo_device = NULL;
+ lo->lo_offset = 0;
+ lo->lo_sizelimit = 0;
+ lo->lo_flags = 0;
+ ll_invalidate_bdev(bdev, 0);
+ set_capacity(disks[lo->lo_number], 0);
+ bd_set_size(bdev, 0);
+ mapping_set_gfp_mask(filp->f_mapping, gfp);
+ lo->lo_state = LLOOP_UNBOUND;
+ fput(filp);
+ /* This is safe: open() is still holding a reference. */
+ module_put(THIS_MODULE);
+ return 0;
+}
+
+static int lo_open(struct block_device *bdev, fmode_t mode)
+{
+ struct lloop_device *lo = bdev->bd_disk->private_data;
+
+ mutex_lock(&lo->lo_ctl_mutex);
+ lo->lo_refcnt++;
+ mutex_unlock(&lo->lo_ctl_mutex);
+
+ return 0;
+}
+
+static void lo_release(struct gendisk *disk, fmode_t mode)
+{
+ struct lloop_device *lo = disk->private_data;
+
+ mutex_lock(&lo->lo_ctl_mutex);
+ --lo->lo_refcnt;
+ mutex_unlock(&lo->lo_ctl_mutex);
+}
+
+/* lloop device node's ioctl function. */
+static int lo_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ struct lloop_device *lo = bdev->bd_disk->private_data;
+ struct inode *inode = NULL;
+ int err = 0;
+
+ mutex_lock(&lloop_mutex);
+ switch (cmd) {
+ case LL_IOC_LLOOP_DETACH: {
+ err = loop_clr_fd(lo, bdev, 2);
+ if (err == 0)
+ ll_blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */
+ break;
+ }
+
+ case LL_IOC_LLOOP_INFO: {
+ struct lu_fid fid;
+
+ LASSERT(lo->lo_backing_file != NULL);
+ if (inode == NULL)
+ inode = lo->lo_backing_file->f_dentry->d_inode;
+ if (lo->lo_state == LLOOP_BOUND)
+ fid = ll_i2info(inode)->lli_fid;
+ else
+ fid_zero(&fid);
+
+ if (copy_to_user((struct lu_fid *)arg, &fid, sizeof(fid)))
+ err = -EFAULT;
+ break;
+ }
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+ mutex_unlock(&lloop_mutex);
+
+ return err;
+}
+
+static struct block_device_operations lo_fops = {
+ .owner = THIS_MODULE,
+ .open = lo_open,
+ .release = lo_release,
+ .ioctl = lo_ioctl,
+};
+
+/* dynamic iocontrol callback.
+ * This callback is registered in lloop_init and will be called by
+ * ll_iocontrol_call.
+ *
+ * This is a llite regular file ioctl function. It takes the responsibility
+ * of attaching or detaching a file by a lloop's device numner.
+ */
+static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file,
+ unsigned int cmd, unsigned long arg,
+ void *magic, int *rcp)
+{
+ struct lloop_device *lo = NULL;
+ struct block_device *bdev = NULL;
+ int err = 0;
+ dev_t dev;
+
+ if (magic != ll_iocontrol_magic)
+ return LLIOC_CONT;
+
+ if (disks == NULL)
+ GOTO(out1, err = -ENODEV);
+
+ CWARN("Enter llop_ioctl\n");
+
+ mutex_lock(&lloop_mutex);
+ switch (cmd) {
+ case LL_IOC_LLOOP_ATTACH: {
+ struct lloop_device *lo_free = NULL;
+ int i;
+
+ for (i = 0; i < max_loop; i++, lo = NULL) {
+ lo = &loop_dev[i];
+ if (lo->lo_state == LLOOP_UNBOUND) {
+ if (!lo_free)
+ lo_free = lo;
+ continue;
+ }
+ if (lo->lo_backing_file->f_dentry->d_inode ==
+ file->f_dentry->d_inode)
+ break;
+ }
+ if (lo || !lo_free)
+ GOTO(out, err = -EBUSY);
+
+ lo = lo_free;
+ dev = MKDEV(lloop_major, lo->lo_number);
+
+ /* quit if the used pointer is writable */
+ if (put_user((long)old_encode_dev(dev), (long*)arg))
+ GOTO(out, err = -EFAULT);
+
+ bdev = blkdev_get_by_dev(dev, file->f_mode, NULL);
+ if (IS_ERR(bdev))
+ GOTO(out, err = PTR_ERR(bdev));
+
+ get_file(file);
+ err = loop_set_fd(lo, NULL, bdev, file);
+ if (err) {
+ fput(file);
+ ll_blkdev_put(bdev, 0);
+ }
+
+ break;
+ }
+
+ case LL_IOC_LLOOP_DETACH_BYDEV: {
+ int minor;
+
+ dev = old_decode_dev(arg);
+ if (MAJOR(dev) != lloop_major)
+ GOTO(out, err = -EINVAL);
+
+ minor = MINOR(dev);
+ if (minor > max_loop - 1)
+ GOTO(out, err = -EINVAL);
+
+ lo = &loop_dev[minor];
+ if (lo->lo_state != LLOOP_BOUND)
+ GOTO(out, err = -EINVAL);
+
+ bdev = lo->lo_device;
+ err = loop_clr_fd(lo, bdev, 1);
+ if (err == 0)
+ ll_blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */
+
+ break;
+ }
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+out:
+ mutex_unlock(&lloop_mutex);
+out1:
+ if (rcp)
+ *rcp = err;
+ return LLIOC_STOP;
+}
+
+static int __init lloop_init(void)
+{
+ int i;
+ unsigned int cmdlist[] = {
+ LL_IOC_LLOOP_ATTACH,
+ LL_IOC_LLOOP_DETACH_BYDEV,
+ };
+
+ if (max_loop < 1 || max_loop > 256) {
+ max_loop = MAX_LOOP_DEFAULT;
+ CWARN("lloop: invalid max_loop (must be between"
+ " 1 and 256), using default (%u)\n", max_loop);
+ }
+
+ lloop_major = register_blkdev(0, "lloop");
+ if (lloop_major < 0)
+ return -EIO;
+
+ CDEBUG(D_CONFIG, "registered lloop major %d with %u minors\n",
+ lloop_major, max_loop);
+
+ ll_iocontrol_magic = ll_iocontrol_register(lloop_ioctl, 2, cmdlist);
+ if (ll_iocontrol_magic == NULL)
+ goto out_mem1;
+
+ OBD_ALLOC_WAIT(loop_dev, max_loop * sizeof(*loop_dev));
+ if (!loop_dev)
+ goto out_mem1;
+
+ OBD_ALLOC_WAIT(disks, max_loop * sizeof(*disks));
+ if (!disks)
+ goto out_mem2;
+
+ for (i = 0; i < max_loop; i++) {
+ disks[i] = alloc_disk(1);
+ if (!disks[i])
+ goto out_mem3;
+ }
+
+ mutex_init(&lloop_mutex);
+
+ for (i = 0; i < max_loop; i++) {
+ struct lloop_device *lo = &loop_dev[i];
+ struct gendisk *disk = disks[i];
+
+ lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
+ if (!lo->lo_queue)
+ goto out_mem4;
+
+ mutex_init(&lo->lo_ctl_mutex);
+ sema_init(&lo->lo_sem, 0);
+ init_waitqueue_head(&lo->lo_bh_wait);
+ lo->lo_number = i;
+ spin_lock_init(&lo->lo_lock);
+ disk->major = lloop_major;
+ disk->first_minor = i;
+ disk->fops = &lo_fops;
+ sprintf(disk->disk_name, "lloop%d", i);
+ disk->private_data = lo;
+ disk->queue = lo->lo_queue;
+ }
+
+ /* We cannot fail after we call this, so another loop!*/
+ for (i = 0; i < max_loop; i++)
+ add_disk(disks[i]);
+ return 0;
+
+out_mem4:
+ while (i--)
+ blk_cleanup_queue(loop_dev[i].lo_queue);
+ i = max_loop;
+out_mem3:
+ while (i--)
+ put_disk(disks[i]);
+ OBD_FREE(disks, max_loop * sizeof(*disks));
+out_mem2:
+ OBD_FREE(loop_dev, max_loop * sizeof(*loop_dev));
+out_mem1:
+ unregister_blkdev(lloop_major, "lloop");
+ ll_iocontrol_unregister(ll_iocontrol_magic);
+ CERROR("lloop: ran out of memory\n");
+ return -ENOMEM;
+}
+
+static void lloop_exit(void)
+{
+ int i;
+
+ ll_iocontrol_unregister(ll_iocontrol_magic);
+ for (i = 0; i < max_loop; i++) {
+ del_gendisk(disks[i]);
+ blk_cleanup_queue(loop_dev[i].lo_queue);
+ put_disk(disks[i]);
+ }
+ if (ll_unregister_blkdev(lloop_major, "lloop"))
+ CWARN("lloop: cannot unregister blkdev\n");
+ else
+ CDEBUG(D_CONFIG, "unregistered lloop major %d\n", lloop_major);
+
+ OBD_FREE(disks, max_loop * sizeof(*disks));
+ OBD_FREE(loop_dev, max_loop * sizeof(*loop_dev));
+}
+
+module_init(lloop_init);
+module_exit(lloop_exit);
+
+CFS_MODULE_PARM(max_loop, "i", int, 0444, "maximum of lloop_device");
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre virtual block device");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
new file mode 100644
index 000000000000..6a82505c7933
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -0,0 +1,1370 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <linux/version.h>
+#include <lustre_lite.h>
+#include <lprocfs_status.h>
+#include <linux/seq_file.h>
+#include <obd_support.h>
+
+#include "llite_internal.h"
+
+struct proc_dir_entry *proc_lustre_fs_root;
+
+#ifdef LPROCFS
+/* /proc/lustre/llite mount point registration */
+extern struct file_operations vvp_dump_pgcache_file_ops;
+struct file_operations ll_rw_extents_stats_fops;
+struct file_operations ll_rw_extents_stats_pp_fops;
+struct file_operations ll_rw_offset_stats_fops;
+
+static int ll_blksize_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = (struct super_block *)m->private;
+ struct obd_statfs osfs;
+ int rc;
+
+ LASSERT(sb != NULL);
+ rc = ll_statfs_internal(sb, &osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ OBD_STATFS_NODELAY);
+ if (!rc)
+ rc = seq_printf(m, "%u\n", osfs.os_bsize);
+
+ return rc;
+}
+LPROC_SEQ_FOPS_RO(ll_blksize);
+
+static int ll_kbytestotal_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = (struct super_block *)m->private;
+ struct obd_statfs osfs;
+ int rc;
+
+ LASSERT(sb != NULL);
+ rc = ll_statfs_internal(sb, &osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ OBD_STATFS_NODELAY);
+ if (!rc) {
+ __u32 blk_size = osfs.os_bsize >> 10;
+ __u64 result = osfs.os_blocks;
+
+ while (blk_size >>= 1)
+ result <<= 1;
+
+ rc = seq_printf(m, LPU64"\n", result);
+ }
+ return rc;
+}
+LPROC_SEQ_FOPS_RO(ll_kbytestotal);
+
+static int ll_kbytesfree_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = (struct super_block *)m->private;
+ struct obd_statfs osfs;
+ int rc;
+
+ LASSERT(sb != NULL);
+ rc = ll_statfs_internal(sb, &osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ OBD_STATFS_NODELAY);
+ if (!rc) {
+ __u32 blk_size = osfs.os_bsize >> 10;
+ __u64 result = osfs.os_bfree;
+
+ while (blk_size >>= 1)
+ result <<= 1;
+
+ rc = seq_printf(m, LPU64"\n", result);
+ }
+ return rc;
+}
+LPROC_SEQ_FOPS_RO(ll_kbytesfree);
+
+static int ll_kbytesavail_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = (struct super_block *)m->private;
+ struct obd_statfs osfs;
+ int rc;
+
+ LASSERT(sb != NULL);
+ rc = ll_statfs_internal(sb, &osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ OBD_STATFS_NODELAY);
+ if (!rc) {
+ __u32 blk_size = osfs.os_bsize >> 10;
+ __u64 result = osfs.os_bavail;
+
+ while (blk_size >>= 1)
+ result <<= 1;
+
+ rc = seq_printf(m, LPU64"\n", result);
+ }
+ return rc;
+}
+LPROC_SEQ_FOPS_RO(ll_kbytesavail);
+
+static int ll_filestotal_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = (struct super_block *)m->private;
+ struct obd_statfs osfs;
+ int rc;
+
+ LASSERT(sb != NULL);
+ rc = ll_statfs_internal(sb, &osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ OBD_STATFS_NODELAY);
+ if (!rc)
+ rc = seq_printf(m, LPU64"\n", osfs.os_files);
+ return rc;
+}
+LPROC_SEQ_FOPS_RO(ll_filestotal);
+
+static int ll_filesfree_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = (struct super_block *)m->private;
+ struct obd_statfs osfs;
+ int rc;
+
+ LASSERT(sb != NULL);
+ rc = ll_statfs_internal(sb, &osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ OBD_STATFS_NODELAY);
+ if (!rc)
+ rc = seq_printf(m, LPU64"\n", osfs.os_ffree);
+ return rc;
+}
+LPROC_SEQ_FOPS_RO(ll_filesfree);
+
+static int ll_client_type_seq_show(struct seq_file *m, void *v)
+{
+ struct ll_sb_info *sbi = ll_s2sbi((struct super_block *)m->private);
+ int rc;
+
+ LASSERT(sbi != NULL);
+
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
+ rc = seq_printf(m, "remote client\n");
+ else
+ rc = seq_printf(m, "local client\n");
+
+ return rc;
+}
+LPROC_SEQ_FOPS_RO(ll_client_type);
+
+static int ll_fstype_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = (struct super_block *)m->private;
+
+ LASSERT(sb != NULL);
+ return seq_printf(m, "%s\n", sb->s_type->name);
+}
+LPROC_SEQ_FOPS_RO(ll_fstype);
+
+static int ll_sb_uuid_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = (struct super_block *)m->private;
+
+ LASSERT(sb != NULL);
+ return seq_printf(m, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid);
+}
+LPROC_SEQ_FOPS_RO(ll_sb_uuid);
+
+static int ll_site_stats_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+
+ /*
+ * See description of statistical counters in struct cl_site, and
+ * struct lu_site.
+ */
+ return cl_site_stats_print(lu2cl_site(ll_s2sbi(sb)->ll_site), m);
+}
+LPROC_SEQ_FOPS_RO(ll_site_stats);
+
+static int ll_max_readahead_mb_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ long pages_number;
+ int mult;
+
+ spin_lock(&sbi->ll_lock);
+ pages_number = sbi->ll_ra_info.ra_max_pages;
+ spin_unlock(&sbi->ll_lock);
+
+ mult = 1 << (20 - PAGE_CACHE_SHIFT);
+ return lprocfs_seq_read_frac_helper(m, pages_number, mult);
+}
+
+static ssize_t ll_max_readahead_mb_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct super_block *sb = ((struct seq_file *)file->private_data)->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ int mult, rc, pages_number;
+
+ mult = 1 << (20 - PAGE_CACHE_SHIFT);
+ rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
+ if (rc)
+ return rc;
+
+ if (pages_number < 0 || pages_number > num_physpages / 2) {
+ CERROR("can't set file readahead more than %lu MB\n",
+ num_physpages >> (20 - PAGE_CACHE_SHIFT + 1)); /*1/2 of RAM*/
+ return -ERANGE;
+ }
+
+ spin_lock(&sbi->ll_lock);
+ sbi->ll_ra_info.ra_max_pages = pages_number;
+ spin_unlock(&sbi->ll_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ll_max_readahead_mb);
+
+static int ll_max_readahead_per_file_mb_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ long pages_number;
+ int mult;
+
+ spin_lock(&sbi->ll_lock);
+ pages_number = sbi->ll_ra_info.ra_max_pages_per_file;
+ spin_unlock(&sbi->ll_lock);
+
+ mult = 1 << (20 - PAGE_CACHE_SHIFT);
+ return lprocfs_seq_read_frac_helper(m, pages_number, mult);
+}
+
+static ssize_t ll_max_readahead_per_file_mb_seq_write(struct file *file,
+ const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct super_block *sb = ((struct seq_file *)file->private_data)->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ int mult, rc, pages_number;
+
+ mult = 1 << (20 - PAGE_CACHE_SHIFT);
+ rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
+ if (rc)
+ return rc;
+
+ if (pages_number < 0 ||
+ pages_number > sbi->ll_ra_info.ra_max_pages) {
+ CERROR("can't set file readahead more than"
+ "max_read_ahead_mb %lu MB\n",
+ sbi->ll_ra_info.ra_max_pages);
+ return -ERANGE;
+ }
+
+ spin_lock(&sbi->ll_lock);
+ sbi->ll_ra_info.ra_max_pages_per_file = pages_number;
+ spin_unlock(&sbi->ll_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ll_max_readahead_per_file_mb);
+
+static int ll_max_read_ahead_whole_mb_seq_show(struct seq_file *m, void *unused)
+{
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ long pages_number;
+ int mult;
+
+ spin_lock(&sbi->ll_lock);
+ pages_number = sbi->ll_ra_info.ra_max_read_ahead_whole_pages;
+ spin_unlock(&sbi->ll_lock);
+
+ mult = 1 << (20 - PAGE_CACHE_SHIFT);
+ return lprocfs_seq_read_frac_helper(m, pages_number, mult);
+}
+
+static ssize_t ll_max_read_ahead_whole_mb_seq_write(struct file *file,
+ const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct super_block *sb = ((struct seq_file *)file->private_data)->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ int mult, rc, pages_number;
+
+ mult = 1 << (20 - PAGE_CACHE_SHIFT);
+ rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
+ if (rc)
+ return rc;
+
+ /* Cap this at the current max readahead window size, the readahead
+ * algorithm does this anyway so it's pointless to set it larger. */
+ if (pages_number < 0 ||
+ pages_number > sbi->ll_ra_info.ra_max_pages_per_file) {
+ CERROR("can't set max_read_ahead_whole_mb more than "
+ "max_read_ahead_per_file_mb: %lu\n",
+ sbi->ll_ra_info.ra_max_pages_per_file >> (20 - PAGE_CACHE_SHIFT));
+ return -ERANGE;
+ }
+
+ spin_lock(&sbi->ll_lock);
+ sbi->ll_ra_info.ra_max_read_ahead_whole_pages = pages_number;
+ spin_unlock(&sbi->ll_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ll_max_read_ahead_whole_mb);
+
+static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct cl_client_cache *cache = &sbi->ll_cache;
+ int shift = 20 - PAGE_CACHE_SHIFT;
+ int max_cached_mb;
+ int unused_mb;
+
+ max_cached_mb = cache->ccc_lru_max >> shift;
+ unused_mb = atomic_read(&cache->ccc_lru_left) >> shift;
+ return seq_printf(m,
+ "users: %d\n"
+ "max_cached_mb: %d\n"
+ "used_mb: %d\n"
+ "unused_mb: %d\n"
+ "reclaim_count: %u\n",
+ atomic_read(&cache->ccc_users),
+ max_cached_mb,
+ max_cached_mb - unused_mb,
+ unused_mb,
+ cache->ccc_lru_shrinkers);
+}
+
+static ssize_t ll_max_cached_mb_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct super_block *sb = ((struct seq_file *)file->private_data)->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct cl_client_cache *cache = &sbi->ll_cache;
+ int mult, rc, pages_number;
+ int diff = 0;
+ int nrpages = 0;
+ ENTRY;
+
+ mult = 1 << (20 - PAGE_CACHE_SHIFT);
+ buffer = lprocfs_find_named_value(buffer, "max_cached_mb:", &count);
+ rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
+ if (rc)
+ RETURN(rc);
+
+ if (pages_number < 0 || pages_number > num_physpages) {
+ CERROR("%s: can't set max cache more than %lu MB\n",
+ ll_get_fsname(sb, NULL, 0),
+ num_physpages >> (20 - PAGE_CACHE_SHIFT));
+ RETURN(-ERANGE);
+ }
+
+ if (sbi->ll_dt_exp == NULL)
+ RETURN(-ENODEV);
+
+ spin_lock(&sbi->ll_lock);
+ diff = pages_number - cache->ccc_lru_max;
+ spin_unlock(&sbi->ll_lock);
+
+ /* easy - add more LRU slots. */
+ if (diff >= 0) {
+ atomic_add(diff, &cache->ccc_lru_left);
+ GOTO(out, rc = 0);
+ }
+
+ diff = -diff;
+ while (diff > 0) {
+ int tmp;
+
+ /* reduce LRU budget from free slots. */
+ do {
+ int ov, nv;
+
+ ov = atomic_read(&cache->ccc_lru_left);
+ if (ov == 0)
+ break;
+
+ nv = ov > diff ? ov - diff : 0;
+ rc = cfs_atomic_cmpxchg(&cache->ccc_lru_left, ov, nv);
+ if (likely(ov == rc)) {
+ diff -= ov - nv;
+ nrpages += ov - nv;
+ break;
+ }
+ } while (1);
+
+ if (diff <= 0)
+ break;
+
+ /* difficult - have to ask OSCs to drop LRU slots. */
+ tmp = diff << 1;
+ rc = obd_set_info_async(NULL, sbi->ll_dt_exp,
+ sizeof(KEY_CACHE_LRU_SHRINK),
+ KEY_CACHE_LRU_SHRINK,
+ sizeof(tmp), &tmp, NULL);
+ if (rc < 0)
+ break;
+ }
+
+out:
+ if (rc >= 0) {
+ spin_lock(&sbi->ll_lock);
+ cache->ccc_lru_max = pages_number;
+ spin_unlock(&sbi->ll_lock);
+ rc = count;
+ } else {
+ atomic_add(nrpages, &cache->ccc_lru_left);
+ }
+ return rc;
+}
+LPROC_SEQ_FOPS(ll_max_cached_mb);
+
+static int ll_checksum_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+
+ return seq_printf(m, "%u\n", (sbi->ll_flags & LL_SBI_CHECKSUM) ? 1 : 0);
+}
+
+static ssize_t ll_checksum_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct super_block *sb = ((struct seq_file *)file->private_data)->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ int val, rc;
+
+ if (!sbi->ll_dt_exp)
+ /* Not set up yet */
+ return -EAGAIN;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+ if (val)
+ sbi->ll_flags |= LL_SBI_CHECKSUM;
+ else
+ sbi->ll_flags &= ~LL_SBI_CHECKSUM;
+
+ rc = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
+ KEY_CHECKSUM, sizeof(val), &val, NULL);
+ if (rc)
+ CWARN("Failed to set OSC checksum flags: %d\n", rc);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ll_checksum);
+
+static int ll_max_rw_chunk_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+
+ return seq_printf(m, "%lu\n", ll_s2sbi(sb)->ll_max_rw_chunk);
+}
+
+static ssize_t ll_max_rw_chunk_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct super_block *sb = ((struct seq_file *)file->private_data)->private;
+ int rc, val;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+ ll_s2sbi(sb)->ll_max_rw_chunk = val;
+ return count;
+}
+LPROC_SEQ_FOPS(ll_max_rw_chunk);
+
+static int ll_rd_track_id(struct seq_file *m, enum stats_track_type type)
+{
+ struct super_block *sb = m->private;
+
+ if (ll_s2sbi(sb)->ll_stats_track_type == type) {
+ return seq_printf(m, "%d\n",
+ ll_s2sbi(sb)->ll_stats_track_id);
+
+ } else if (ll_s2sbi(sb)->ll_stats_track_type == STATS_TRACK_ALL) {
+ return seq_printf(m, "0 (all)\n");
+ } else {
+ return seq_printf(m, "untracked\n");
+ }
+}
+
+static int ll_wr_track_id(const char *buffer, unsigned long count, void *data,
+ enum stats_track_type type)
+{
+ struct super_block *sb = data;
+ int rc, pid;
+
+ rc = lprocfs_write_helper(buffer, count, &pid);
+ if (rc)
+ return rc;
+ ll_s2sbi(sb)->ll_stats_track_id = pid;
+ if (pid == 0)
+ ll_s2sbi(sb)->ll_stats_track_type = STATS_TRACK_ALL;
+ else
+ ll_s2sbi(sb)->ll_stats_track_type = type;
+ lprocfs_clear_stats(ll_s2sbi(sb)->ll_stats);
+ return count;
+}
+
+static int ll_track_pid_seq_show(struct seq_file *m, void *v)
+{
+ return ll_rd_track_id(m, STATS_TRACK_PID);
+}
+
+static ssize_t ll_track_pid_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ return ll_wr_track_id(buffer, count, seq->private, STATS_TRACK_PID);
+}
+LPROC_SEQ_FOPS(ll_track_pid);
+
+static int ll_track_ppid_seq_show(struct seq_file *m, void *v)
+{
+ return ll_rd_track_id(m, STATS_TRACK_PPID);
+}
+
+static ssize_t ll_track_ppid_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ return ll_wr_track_id(buffer, count, seq->private, STATS_TRACK_PPID);
+}
+LPROC_SEQ_FOPS(ll_track_ppid);
+
+static int ll_track_gid_seq_show(struct seq_file *m, void *v)
+{
+ return ll_rd_track_id(m, STATS_TRACK_GID);
+}
+
+static ssize_t ll_track_gid_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ return ll_wr_track_id(buffer, count, seq->private, STATS_TRACK_GID);
+}
+LPROC_SEQ_FOPS(ll_track_gid);
+
+static int ll_statahead_max_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+
+ return seq_printf(m, "%u\n", sbi->ll_sa_max);
+}
+
+static ssize_t ll_statahead_max_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct super_block *sb = ((struct seq_file *)file->private_data)->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ int val, rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ if (val >= 0 && val <= LL_SA_RPC_MAX)
+ sbi->ll_sa_max = val;
+ else
+ CERROR("Bad statahead_max value %d. Valid values are in the "
+ "range [0, %d]\n", val, LL_SA_RPC_MAX);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ll_statahead_max);
+
+static int ll_statahead_agl_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+
+ return seq_printf(m, "%u\n",
+ sbi->ll_flags & LL_SBI_AGL_ENABLED ? 1 : 0);
+}
+
+static ssize_t ll_statahead_agl_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct super_block *sb = ((struct seq_file *)file->private_data)->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ int val, rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ if (val)
+ sbi->ll_flags |= LL_SBI_AGL_ENABLED;
+ else
+ sbi->ll_flags &= ~LL_SBI_AGL_ENABLED;
+
+ return count;
+}
+LPROC_SEQ_FOPS(ll_statahead_agl);
+
+static int ll_statahead_stats_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+
+ return seq_printf(m,
+ "statahead total: %u\n"
+ "statahead wrong: %u\n"
+ "agl total: %u\n",
+ atomic_read(&sbi->ll_sa_total),
+ atomic_read(&sbi->ll_sa_wrong),
+ atomic_read(&sbi->ll_agl_total));
+}
+LPROC_SEQ_FOPS_RO(ll_statahead_stats);
+
+static int ll_lazystatfs_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+
+ return seq_printf(m, "%u\n",
+ (sbi->ll_flags & LL_SBI_LAZYSTATFS) ? 1 : 0);
+}
+
+static ssize_t ll_lazystatfs_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct super_block *sb = ((struct seq_file *)file->private_data)->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ int val, rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ if (val)
+ sbi->ll_flags |= LL_SBI_LAZYSTATFS;
+ else
+ sbi->ll_flags &= ~LL_SBI_LAZYSTATFS;
+
+ return count;
+}
+LPROC_SEQ_FOPS(ll_lazystatfs);
+
+static int ll_maxea_size_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ unsigned int ealen;
+ int rc;
+
+ rc = ll_get_max_mdsize(sbi, &ealen);
+ if (rc)
+ return rc;
+
+ return seq_printf(m, "%u\n", ealen);
+}
+LPROC_SEQ_FOPS_RO(ll_maxea_size);
+
+static int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
+{
+ const char *str[] = LL_SBI_FLAGS;
+ struct super_block *sb = m->private;
+ int flags = ll_s2sbi(sb)->ll_flags;
+ int i = 0;
+
+ while (flags != 0) {
+ if (ARRAY_SIZE(str) <= i) {
+ CERROR("%s: Revise array LL_SBI_FLAGS to match sbi "
+ "flags please.\n", ll_get_fsname(sb, NULL, 0));
+ return -EINVAL;
+ }
+
+ if (flags & 0x1)
+ seq_printf(m, "%s ", str[i]);
+ flags >>= 1;
+ ++i;
+ }
+ seq_printf(m, "\b\n");
+ return 0;
+}
+LPROC_SEQ_FOPS_RO(ll_sbi_flags);
+
+static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
+ { "uuid", &ll_sb_uuid_fops, 0, 0 },
+ //{ "mntpt_path", ll_rd_path, 0, 0 },
+ { "fstype", &ll_fstype_fops, 0, 0 },
+ { "site", &ll_site_stats_fops, 0, 0 },
+ { "blocksize", &ll_blksize_fops, 0, 0 },
+ { "kbytestotal", &ll_kbytestotal_fops, 0, 0 },
+ { "kbytesfree", &ll_kbytesfree_fops, 0, 0 },
+ { "kbytesavail", &ll_kbytesavail_fops, 0, 0 },
+ { "filestotal", &ll_filestotal_fops, 0, 0 },
+ { "filesfree", &ll_filesfree_fops, 0, 0 },
+ { "client_type", &ll_client_type_fops, 0, 0 },
+ //{ "filegroups", lprocfs_rd_filegroups, 0, 0 },
+ { "max_read_ahead_mb", &ll_max_readahead_mb_fops, 0 },
+ { "max_read_ahead_per_file_mb", &ll_max_readahead_per_file_mb_fops, 0 },
+ { "max_read_ahead_whole_mb", &ll_max_read_ahead_whole_mb_fops, 0 },
+ { "max_cached_mb", &ll_max_cached_mb_fops, 0 },
+ { "checksum_pages", &ll_checksum_fops, 0 },
+ { "max_rw_chunk", &ll_max_rw_chunk_fops, 0 },
+ { "stats_track_pid", &ll_track_pid_fops, 0 },
+ { "stats_track_ppid", &ll_track_ppid_fops, 0 },
+ { "stats_track_gid", &ll_track_gid_fops, 0 },
+ { "statahead_max", &ll_statahead_max_fops, 0 },
+ { "statahead_agl", &ll_statahead_agl_fops, 0 },
+ { "statahead_stats", &ll_statahead_stats_fops, 0, 0 },
+ { "lazystatfs", &ll_lazystatfs_fops, 0 },
+ { "max_easize", &ll_maxea_size_fops, 0, 0 },
+ { "sbi_flags", &ll_sbi_flags_fops, 0, 0 },
+ { 0 }
+};
+
+#define MAX_STRING_SIZE 128
+
+struct llite_file_opcode {
+ __u32 opcode;
+ __u32 type;
+ const char *opname;
+} llite_opcode_table[LPROC_LL_FILE_OPCODES] = {
+ /* file operation */
+ { LPROC_LL_DIRTY_HITS, LPROCFS_TYPE_REGS, "dirty_pages_hits" },
+ { LPROC_LL_DIRTY_MISSES, LPROCFS_TYPE_REGS, "dirty_pages_misses" },
+ { LPROC_LL_READ_BYTES, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+ "read_bytes" },
+ { LPROC_LL_WRITE_BYTES, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+ "write_bytes" },
+ { LPROC_LL_BRW_READ, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+ "brw_read" },
+ { LPROC_LL_BRW_WRITE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+ "brw_write" },
+ { LPROC_LL_OSC_READ, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+ "osc_read" },
+ { LPROC_LL_OSC_WRITE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+ "osc_write" },
+ { LPROC_LL_IOCTL, LPROCFS_TYPE_REGS, "ioctl" },
+ { LPROC_LL_OPEN, LPROCFS_TYPE_REGS, "open" },
+ { LPROC_LL_RELEASE, LPROCFS_TYPE_REGS, "close" },
+ { LPROC_LL_MAP, LPROCFS_TYPE_REGS, "mmap" },
+ { LPROC_LL_LLSEEK, LPROCFS_TYPE_REGS, "seek" },
+ { LPROC_LL_FSYNC, LPROCFS_TYPE_REGS, "fsync" },
+ { LPROC_LL_READDIR, LPROCFS_TYPE_REGS, "readdir" },
+ /* inode operation */
+ { LPROC_LL_SETATTR, LPROCFS_TYPE_REGS, "setattr" },
+ { LPROC_LL_TRUNC, LPROCFS_TYPE_REGS, "truncate" },
+ { LPROC_LL_FLOCK, LPROCFS_TYPE_REGS, "flock" },
+ { LPROC_LL_GETATTR, LPROCFS_TYPE_REGS, "getattr" },
+ /* dir inode operation */
+ { LPROC_LL_CREATE, LPROCFS_TYPE_REGS, "create" },
+ { LPROC_LL_LINK, LPROCFS_TYPE_REGS, "link" },
+ { LPROC_LL_UNLINK, LPROCFS_TYPE_REGS, "unlink" },
+ { LPROC_LL_SYMLINK, LPROCFS_TYPE_REGS, "symlink" },
+ { LPROC_LL_MKDIR, LPROCFS_TYPE_REGS, "mkdir" },
+ { LPROC_LL_RMDIR, LPROCFS_TYPE_REGS, "rmdir" },
+ { LPROC_LL_MKNOD, LPROCFS_TYPE_REGS, "mknod" },
+ { LPROC_LL_RENAME, LPROCFS_TYPE_REGS, "rename" },
+ /* special inode operation */
+ { LPROC_LL_STAFS, LPROCFS_TYPE_REGS, "statfs" },
+ { LPROC_LL_ALLOC_INODE, LPROCFS_TYPE_REGS, "alloc_inode" },
+ { LPROC_LL_SETXATTR, LPROCFS_TYPE_REGS, "setxattr" },
+ { LPROC_LL_GETXATTR, LPROCFS_TYPE_REGS, "getxattr" },
+ { LPROC_LL_LISTXATTR, LPROCFS_TYPE_REGS, "listxattr" },
+ { LPROC_LL_REMOVEXATTR, LPROCFS_TYPE_REGS, "removexattr" },
+ { LPROC_LL_INODE_PERM, LPROCFS_TYPE_REGS, "inode_permission" },
+};
+
+void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count)
+{
+ if (!sbi->ll_stats)
+ return;
+ if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
+ lprocfs_counter_add(sbi->ll_stats, op, count);
+ else if (sbi->ll_stats_track_type == STATS_TRACK_PID &&
+ sbi->ll_stats_track_id == current->pid)
+ lprocfs_counter_add(sbi->ll_stats, op, count);
+ else if (sbi->ll_stats_track_type == STATS_TRACK_PPID &&
+ sbi->ll_stats_track_id == current->parent->pid)
+ lprocfs_counter_add(sbi->ll_stats, op, count);
+ else if (sbi->ll_stats_track_type == STATS_TRACK_GID &&
+ sbi->ll_stats_track_id == current_gid())
+ lprocfs_counter_add(sbi->ll_stats, op, count);
+}
+EXPORT_SYMBOL(ll_stats_ops_tally);
+
+static const char *ra_stat_string[] = {
+ [RA_STAT_HIT] = "hits",
+ [RA_STAT_MISS] = "misses",
+ [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive",
+ [RA_STAT_MISS_IN_WINDOW] = "miss inside window",
+ [RA_STAT_FAILED_GRAB_PAGE] = "failed grab_cache_page",
+ [RA_STAT_FAILED_MATCH] = "failed lock match",
+ [RA_STAT_DISCARDED] = "read but discarded",
+ [RA_STAT_ZERO_LEN] = "zero length file",
+ [RA_STAT_ZERO_WINDOW] = "zero size window",
+ [RA_STAT_EOF] = "read-ahead to EOF",
+ [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
+ [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
+};
+
+LPROC_SEQ_FOPS_RO_TYPE(llite, name);
+LPROC_SEQ_FOPS_RO_TYPE(llite, uuid);
+
+int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+ struct super_block *sb, char *osc, char *mdc)
+{
+ struct lprocfs_vars lvars[2];
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct obd_device *obd;
+ proc_dir_entry_t *dir;
+ char name[MAX_STRING_SIZE + 1], *ptr;
+ int err, id, len, rc;
+ ENTRY;
+
+ memset(lvars, 0, sizeof(lvars));
+
+ name[MAX_STRING_SIZE] = '\0';
+ lvars[0].name = name;
+
+ LASSERT(sbi != NULL);
+ LASSERT(mdc != NULL);
+ LASSERT(osc != NULL);
+
+ /* Get fsname */
+ len = strlen(lsi->lsi_lmd->lmd_profile);
+ ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
+ if (ptr && (strcmp(ptr, "-client") == 0))
+ len -= 7;
+
+ /* Mount info */
+ snprintf(name, MAX_STRING_SIZE, "%.*s-%p", len,
+ lsi->lsi_lmd->lmd_profile, sb);
+
+ sbi->ll_proc_root = lprocfs_register(name, parent, NULL, NULL);
+ if (IS_ERR(sbi->ll_proc_root)) {
+ err = PTR_ERR(sbi->ll_proc_root);
+ sbi->ll_proc_root = NULL;
+ RETURN(err);
+ }
+
+ rc = lprocfs_seq_create(sbi->ll_proc_root, "dump_page_cache", 0444,
+ &vvp_dump_pgcache_file_ops, sbi);
+ if (rc)
+ CWARN("Error adding the dump_page_cache file\n");
+
+ rc = lprocfs_seq_create(sbi->ll_proc_root, "extents_stats", 0644,
+ &ll_rw_extents_stats_fops, sbi);
+ if (rc)
+ CWARN("Error adding the extent_stats file\n");
+
+ rc = lprocfs_seq_create(sbi->ll_proc_root, "extents_stats_per_process",
+ 0644, &ll_rw_extents_stats_pp_fops, sbi);
+ if (rc)
+ CWARN("Error adding the extents_stats_per_process file\n");
+
+ rc = lprocfs_seq_create(sbi->ll_proc_root, "offset_stats", 0644,
+ &ll_rw_offset_stats_fops, sbi);
+ if (rc)
+ CWARN("Error adding the offset_stats file\n");
+
+ /* File operations stats */
+ sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES,
+ LPROCFS_STATS_FLAG_NONE);
+ if (sbi->ll_stats == NULL)
+ GOTO(out, err = -ENOMEM);
+ /* do counter init */
+ for (id = 0; id < LPROC_LL_FILE_OPCODES; id++) {
+ __u32 type = llite_opcode_table[id].type;
+ void *ptr = NULL;
+ if (type & LPROCFS_TYPE_REGS)
+ ptr = "regs";
+ else if (type & LPROCFS_TYPE_BYTES)
+ ptr = "bytes";
+ else if (type & LPROCFS_TYPE_PAGES)
+ ptr = "pages";
+ lprocfs_counter_init(sbi->ll_stats,
+ llite_opcode_table[id].opcode,
+ (type & LPROCFS_CNTR_AVGMINMAX),
+ llite_opcode_table[id].opname, ptr);
+ }
+ err = lprocfs_register_stats(sbi->ll_proc_root, "stats", sbi->ll_stats);
+ if (err)
+ GOTO(out, err);
+
+ sbi->ll_ra_stats = lprocfs_alloc_stats(ARRAY_SIZE(ra_stat_string),
+ LPROCFS_STATS_FLAG_NONE);
+ if (sbi->ll_ra_stats == NULL)
+ GOTO(out, err = -ENOMEM);
+
+ for (id = 0; id < ARRAY_SIZE(ra_stat_string); id++)
+ lprocfs_counter_init(sbi->ll_ra_stats, id, 0,
+ ra_stat_string[id], "pages");
+ err = lprocfs_register_stats(sbi->ll_proc_root, "read_ahead_stats",
+ sbi->ll_ra_stats);
+ if (err)
+ GOTO(out, err);
+
+
+ err = lprocfs_add_vars(sbi->ll_proc_root, lprocfs_llite_obd_vars, sb);
+ if (err)
+ GOTO(out, err);
+
+ /* MDC info */
+ obd = class_name2obd(mdc);
+
+ LASSERT(obd != NULL);
+ LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+ LASSERT(obd->obd_type->typ_name != NULL);
+
+ dir = proc_mkdir(obd->obd_type->typ_name, sbi->ll_proc_root);
+ if (dir == NULL)
+ GOTO(out, err = -ENOMEM);
+
+ snprintf(name, MAX_STRING_SIZE, "common_name");
+ lvars[0].fops = &llite_name_fops;
+ err = lprocfs_add_vars(dir, lvars, obd);
+ if (err)
+ GOTO(out, err);
+
+ snprintf(name, MAX_STRING_SIZE, "uuid");
+ lvars[0].fops = &llite_uuid_fops;
+ err = lprocfs_add_vars(dir, lvars, obd);
+ if (err)
+ GOTO(out, err);
+
+ /* OSC */
+ obd = class_name2obd(osc);
+
+ LASSERT(obd != NULL);
+ LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+ LASSERT(obd->obd_type->typ_name != NULL);
+
+ dir = proc_mkdir(obd->obd_type->typ_name, sbi->ll_proc_root);
+ if (dir == NULL)
+ GOTO(out, err = -ENOMEM);
+
+ snprintf(name, MAX_STRING_SIZE, "common_name");
+ lvars[0].fops = &llite_name_fops;
+ err = lprocfs_add_vars(dir, lvars, obd);
+ if (err)
+ GOTO(out, err);
+
+ snprintf(name, MAX_STRING_SIZE, "uuid");
+ lvars[0].fops = &llite_uuid_fops;
+ err = lprocfs_add_vars(dir, lvars, obd);
+out:
+ if (err) {
+ lprocfs_remove(&sbi->ll_proc_root);
+ lprocfs_free_stats(&sbi->ll_ra_stats);
+ lprocfs_free_stats(&sbi->ll_stats);
+ }
+ RETURN(err);
+}
+
+void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi)
+{
+ if (sbi->ll_proc_root) {
+ lprocfs_remove(&sbi->ll_proc_root);
+ lprocfs_free_stats(&sbi->ll_ra_stats);
+ lprocfs_free_stats(&sbi->ll_stats);
+ }
+}
+#undef MAX_STRING_SIZE
+
+#define pct(a,b) (b ? a * 100 / b : 0)
+
+static void ll_display_extents_info(struct ll_rw_extents_info *io_extents,
+ struct seq_file *seq, int which)
+{
+ unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum;
+ unsigned long start, end, r, w;
+ char *unitp = "KMGTPEZY";
+ int i, units = 10;
+ struct per_process_info *pp_info = &io_extents->pp_extents[which];
+
+ read_cum = 0;
+ write_cum = 0;
+ start = 0;
+
+ for(i = 0; i < LL_HIST_MAX; i++) {
+ read_tot += pp_info->pp_r_hist.oh_buckets[i];
+ write_tot += pp_info->pp_w_hist.oh_buckets[i];
+ }
+
+ for(i = 0; i < LL_HIST_MAX; i++) {
+ r = pp_info->pp_r_hist.oh_buckets[i];
+ w = pp_info->pp_w_hist.oh_buckets[i];
+ read_cum += r;
+ write_cum += w;
+ end = 1 << (i + LL_HIST_START - units);
+ seq_printf(seq, "%4lu%c - %4lu%c%c: %14lu %4lu %4lu | "
+ "%14lu %4lu %4lu\n", start, *unitp, end, *unitp,
+ (i == LL_HIST_MAX - 1) ? '+' : ' ',
+ r, pct(r, read_tot), pct(read_cum, read_tot),
+ w, pct(w, write_tot), pct(write_cum, write_tot));
+ start = end;
+ if (start == 1<<10) {
+ start = 1;
+ units += 10;
+ unitp++;
+ }
+ if (read_cum == read_tot && write_cum == write_tot)
+ break;
+ }
+}
+
+static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v)
+{
+ struct timeval now;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+ int k;
+
+ do_gettimeofday(&now);
+
+ if (!sbi->ll_rw_stats_on) {
+ seq_printf(seq, "disabled\n"
+ "write anything in this file to activate, "
+ "then 0 or \"[D/d]isabled\" to deactivate\n");
+ return 0;
+ }
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+ seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write");
+ seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n",
+ "extents", "calls", "%", "cum%",
+ "calls", "%", "cum%");
+ spin_lock(&sbi->ll_pp_extent_lock);
+ for (k = 0; k < LL_PROCESS_HIST_MAX; k++) {
+ if (io_extents->pp_extents[k].pid != 0) {
+ seq_printf(seq, "\nPID: %d\n",
+ io_extents->pp_extents[k].pid);
+ ll_display_extents_info(io_extents, seq, k);
+ }
+ }
+ spin_unlock(&sbi->ll_pp_extent_lock);
+ return 0;
+}
+
+static ssize_t ll_rw_extents_stats_pp_seq_write(struct file *file,
+ const char *buf, size_t len,
+ loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+ int i;
+ int value = 1, rc = 0;
+
+ rc = lprocfs_write_helper(buf, len, &value);
+ if (rc < 0 && (strcmp(buf, "disabled") == 0 ||
+ strcmp(buf, "Disabled") == 0))
+ value = 0;
+
+ if (value == 0)
+ sbi->ll_rw_stats_on = 0;
+ else
+ sbi->ll_rw_stats_on = 1;
+
+ spin_lock(&sbi->ll_pp_extent_lock);
+ for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+ io_extents->pp_extents[i].pid = 0;
+ lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
+ lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
+ }
+ spin_unlock(&sbi->ll_pp_extent_lock);
+ return len;
+}
+
+LPROC_SEQ_FOPS(ll_rw_extents_stats_pp);
+
+static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v)
+{
+ struct timeval now;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+
+ do_gettimeofday(&now);
+
+ if (!sbi->ll_rw_stats_on) {
+ seq_printf(seq, "disabled\n"
+ "write anything in this file to activate, "
+ "then 0 or \"[D/d]isabled\" to deactivate\n");
+ return 0;
+ }
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+
+ seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write");
+ seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n",
+ "extents", "calls", "%", "cum%",
+ "calls", "%", "cum%");
+ spin_lock(&sbi->ll_lock);
+ ll_display_extents_info(io_extents, seq, LL_PROCESS_HIST_MAX);
+ spin_unlock(&sbi->ll_lock);
+
+ return 0;
+}
+
+static ssize_t ll_rw_extents_stats_seq_write(struct file *file, const char *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+ int i;
+ int value = 1, rc = 0;
+
+ rc = lprocfs_write_helper(buf, len, &value);
+ if (rc < 0 && (strcmp(buf, "disabled") == 0 ||
+ strcmp(buf, "Disabled") == 0))
+ value = 0;
+
+ if (value == 0)
+ sbi->ll_rw_stats_on = 0;
+ else
+ sbi->ll_rw_stats_on = 1;
+ spin_lock(&sbi->ll_pp_extent_lock);
+ for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
+ io_extents->pp_extents[i].pid = 0;
+ lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
+ lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
+ }
+ spin_unlock(&sbi->ll_pp_extent_lock);
+
+ return len;
+}
+
+LPROC_SEQ_FOPS(ll_rw_extents_stats);
+
+void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
+ struct ll_file_data *file, loff_t pos,
+ size_t count, int rw)
+{
+ int i, cur = -1;
+ struct ll_rw_process_info *process;
+ struct ll_rw_process_info *offset;
+ int *off_count = &sbi->ll_rw_offset_entry_count;
+ int *process_count = &sbi->ll_offset_process_count;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+
+ if(!sbi->ll_rw_stats_on)
+ return;
+ process = sbi->ll_rw_process_info;
+ offset = sbi->ll_rw_offset_info;
+
+ spin_lock(&sbi->ll_pp_extent_lock);
+ /* Extent statistics */
+ for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+ if(io_extents->pp_extents[i].pid == pid) {
+ cur = i;
+ break;
+ }
+ }
+
+ if (cur == -1) {
+ /* new process */
+ sbi->ll_extent_process_count =
+ (sbi->ll_extent_process_count + 1) % LL_PROCESS_HIST_MAX;
+ cur = sbi->ll_extent_process_count;
+ io_extents->pp_extents[cur].pid = pid;
+ lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_r_hist);
+ lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist);
+ }
+
+ for(i = 0; (count >= (1 << LL_HIST_START << i)) &&
+ (i < (LL_HIST_MAX - 1)); i++);
+ if (rw == 0) {
+ io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
+ io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
+ } else {
+ io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
+ io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
+ }
+ spin_unlock(&sbi->ll_pp_extent_lock);
+
+ spin_lock(&sbi->ll_process_lock);
+ /* Offset statistics */
+ for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+ if (process[i].rw_pid == pid) {
+ if (process[i].rw_last_file != file) {
+ process[i].rw_range_start = pos;
+ process[i].rw_last_file_pos = pos + count;
+ process[i].rw_smallest_extent = count;
+ process[i].rw_largest_extent = count;
+ process[i].rw_offset = 0;
+ process[i].rw_last_file = file;
+ spin_unlock(&sbi->ll_process_lock);
+ return;
+ }
+ if (process[i].rw_last_file_pos != pos) {
+ *off_count =
+ (*off_count + 1) % LL_OFFSET_HIST_MAX;
+ offset[*off_count].rw_op = process[i].rw_op;
+ offset[*off_count].rw_pid = pid;
+ offset[*off_count].rw_range_start =
+ process[i].rw_range_start;
+ offset[*off_count].rw_range_end =
+ process[i].rw_last_file_pos;
+ offset[*off_count].rw_smallest_extent =
+ process[i].rw_smallest_extent;
+ offset[*off_count].rw_largest_extent =
+ process[i].rw_largest_extent;
+ offset[*off_count].rw_offset =
+ process[i].rw_offset;
+ process[i].rw_op = rw;
+ process[i].rw_range_start = pos;
+ process[i].rw_smallest_extent = count;
+ process[i].rw_largest_extent = count;
+ process[i].rw_offset = pos -
+ process[i].rw_last_file_pos;
+ }
+ if(process[i].rw_smallest_extent > count)
+ process[i].rw_smallest_extent = count;
+ if(process[i].rw_largest_extent < count)
+ process[i].rw_largest_extent = count;
+ process[i].rw_last_file_pos = pos + count;
+ spin_unlock(&sbi->ll_process_lock);
+ return;
+ }
+ }
+ *process_count = (*process_count + 1) % LL_PROCESS_HIST_MAX;
+ process[*process_count].rw_pid = pid;
+ process[*process_count].rw_op = rw;
+ process[*process_count].rw_range_start = pos;
+ process[*process_count].rw_last_file_pos = pos + count;
+ process[*process_count].rw_smallest_extent = count;
+ process[*process_count].rw_largest_extent = count;
+ process[*process_count].rw_offset = 0;
+ process[*process_count].rw_last_file = file;
+ spin_unlock(&sbi->ll_process_lock);
+}
+
+static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
+{
+ struct timeval now;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_process_info *offset = sbi->ll_rw_offset_info;
+ struct ll_rw_process_info *process = sbi->ll_rw_process_info;
+ int i;
+
+ do_gettimeofday(&now);
+
+ if (!sbi->ll_rw_stats_on) {
+ seq_printf(seq, "disabled\n"
+ "write anything in this file to activate, "
+ "then 0 or \"[D/d]isabled\" to deactivate\n");
+ return 0;
+ }
+ spin_lock(&sbi->ll_process_lock);
+
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+ seq_printf(seq, "%3s %10s %14s %14s %17s %17s %14s\n",
+ "R/W", "PID", "RANGE START", "RANGE END",
+ "SMALLEST EXTENT", "LARGEST EXTENT", "OFFSET");
+ /* We stored the discontiguous offsets here; print them first */
+ for(i = 0; i < LL_OFFSET_HIST_MAX; i++) {
+ if (offset[i].rw_pid != 0)
+ seq_printf(seq,"%3c %10d %14Lu %14Lu %17lu %17lu %14Lu",
+ offset[i].rw_op ? 'W' : 'R',
+ offset[i].rw_pid,
+ offset[i].rw_range_start,
+ offset[i].rw_range_end,
+ (unsigned long)offset[i].rw_smallest_extent,
+ (unsigned long)offset[i].rw_largest_extent,
+ offset[i].rw_offset);
+ }
+ /* Then print the current offsets for each process */
+ for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+ if (process[i].rw_pid != 0)
+ seq_printf(seq,"%3c %10d %14Lu %14Lu %17lu %17lu %14Lu",
+ process[i].rw_op ? 'W' : 'R',
+ process[i].rw_pid,
+ process[i].rw_range_start,
+ process[i].rw_last_file_pos,
+ (unsigned long)process[i].rw_smallest_extent,
+ (unsigned long)process[i].rw_largest_extent,
+ process[i].rw_offset);
+ }
+ spin_unlock(&sbi->ll_process_lock);
+
+ return 0;
+}
+
+static ssize_t ll_rw_offset_stats_seq_write(struct file *file, const char *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_process_info *process_info = sbi->ll_rw_process_info;
+ struct ll_rw_process_info *offset_info = sbi->ll_rw_offset_info;
+ int value = 1, rc = 0;
+
+ rc = lprocfs_write_helper(buf, len, &value);
+
+ if (rc < 0 && (strcmp(buf, "disabled") == 0 ||
+ strcmp(buf, "Disabled") == 0))
+ value = 0;
+
+ if (value == 0)
+ sbi->ll_rw_stats_on = 0;
+ else
+ sbi->ll_rw_stats_on = 1;
+
+ spin_lock(&sbi->ll_process_lock);
+ sbi->ll_offset_process_count = 0;
+ sbi->ll_rw_offset_entry_count = 0;
+ memset(process_info, 0, sizeof(struct ll_rw_process_info) *
+ LL_PROCESS_HIST_MAX);
+ memset(offset_info, 0, sizeof(struct ll_rw_process_info) *
+ LL_OFFSET_HIST_MAX);
+ spin_unlock(&sbi->ll_process_lock);
+
+ return len;
+}
+
+LPROC_SEQ_FOPS(ll_rw_offset_stats);
+
+void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
+{
+ lvars->module_vars = NULL;
+ lvars->obd_vars = lprocfs_llite_obd_vars;
+}
+#endif /* LPROCFS */
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
new file mode 100644
index 000000000000..58d59aa12619
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -0,0 +1,1279 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/quotaops.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/security.h>
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <obd_support.h>
+#include <lustre_fid.h>
+#include <lustre_lite.h>
+#include <lustre_dlm.h>
+#include <lustre_ver.h>
+#include "llite_internal.h"
+
+static int ll_create_it(struct inode *, struct dentry *,
+ int, struct lookup_intent *);
+
+/*
+ * Check if we have something mounted at the named dchild.
+ * In such a case there would always be dentry present.
+ */
+static int ll_d_mountpoint(struct dentry *dparent, struct dentry *dchild,
+ struct qstr *name)
+{
+ int mounted = 0;
+
+ if (unlikely(dchild)) {
+ mounted = d_mountpoint(dchild);
+ } else if (dparent) {
+ dchild = d_lookup(dparent, name);
+ if (dchild) {
+ mounted = d_mountpoint(dchild);
+ dput(dchild);
+ }
+ }
+ return mounted;
+}
+
+int ll_unlock(__u32 mode, struct lustre_handle *lockh)
+{
+ ENTRY;
+
+ ldlm_lock_decref(lockh, mode);
+
+ RETURN(0);
+}
+
+
+/* called from iget5_locked->find_inode() under inode_lock spinlock */
+static int ll_test_inode(struct inode *inode, void *opaque)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct lustre_md *md = opaque;
+
+ if (unlikely(!(md->body->valid & OBD_MD_FLID))) {
+ CERROR("MDS body missing FID\n");
+ return 0;
+ }
+
+ if (!lu_fid_eq(&lli->lli_fid, &md->body->fid1))
+ return 0;
+
+ return 1;
+}
+
+static int ll_set_inode(struct inode *inode, void *opaque)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct mdt_body *body = ((struct lustre_md *)opaque)->body;
+
+ if (unlikely(!(body->valid & OBD_MD_FLID))) {
+ CERROR("MDS body missing FID\n");
+ return -EINVAL;
+ }
+
+ lli->lli_fid = body->fid1;
+ if (unlikely(!(body->valid & OBD_MD_FLTYPE))) {
+ CERROR("Can not initialize inode "DFID" without object type: "
+ "valid = "LPX64"\n", PFID(&lli->lli_fid), body->valid);
+ return -EINVAL;
+ }
+
+ inode->i_mode = (inode->i_mode & ~S_IFMT) | (body->mode & S_IFMT);
+ if (unlikely(inode->i_mode == 0)) {
+ CERROR("Invalid inode "DFID" type\n", PFID(&lli->lli_fid));
+ return -EINVAL;
+ }
+
+ ll_lli_init(lli);
+
+ return 0;
+}
+
+
+/*
+ * Get an inode by inode number (already instantiated by the intent lookup).
+ * Returns inode or NULL
+ */
+struct inode *ll_iget(struct super_block *sb, ino_t hash,
+ struct lustre_md *md)
+{
+ struct inode *inode;
+ ENTRY;
+
+ LASSERT(hash != 0);
+ inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md);
+
+ if (inode) {
+ if (inode->i_state & I_NEW) {
+ int rc = 0;
+
+ ll_read_inode2(inode, md);
+ if (S_ISREG(inode->i_mode) &&
+ ll_i2info(inode)->lli_clob == NULL) {
+ CDEBUG(D_INODE,
+ "%s: apply lsm %p to inode "DFID".\n",
+ ll_get_fsname(sb, NULL, 0), md->lsm,
+ PFID(ll_inode2fid(inode)));
+ rc = cl_file_inode_init(inode, md);
+ }
+ if (rc != 0) {
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ inode = ERR_PTR(rc);
+ } else
+ unlock_new_inode(inode);
+ } else if (!(inode->i_state & (I_FREEING | I_CLEAR)))
+ ll_update_inode(inode, md);
+ CDEBUG(D_VFSTRACE, "got inode: %p for "DFID"\n",
+ inode, PFID(&md->body->fid1));
+ }
+ RETURN(inode);
+}
+
+static void ll_invalidate_negative_children(struct inode *dir)
+{
+ struct dentry *dentry, *tmp_subdir;
+ struct ll_d_hlist_node *p;
+
+ ll_lock_dcache(dir);
+ ll_d_hlist_for_each_entry(dentry, p, &dir->i_dentry, d_alias) {
+ spin_lock(&dentry->d_lock);
+ if (!list_empty(&dentry->d_subdirs)) {
+ struct dentry *child;
+
+ list_for_each_entry_safe(child, tmp_subdir,
+ &dentry->d_subdirs,
+ d_u.d_child) {
+ if (child->d_inode == NULL)
+ d_lustre_invalidate(child, 1);
+ }
+ }
+ spin_unlock(&dentry->d_lock);
+ }
+ ll_unlock_dcache(dir);
+}
+
+int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
+ void *data, int flag)
+{
+ int rc;
+ struct lustre_handle lockh;
+ ENTRY;
+
+ switch (flag) {
+ case LDLM_CB_BLOCKING:
+ ldlm_lock2handle(lock, &lockh);
+ rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
+ if (rc < 0) {
+ CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
+ RETURN(rc);
+ }
+ break;
+ case LDLM_CB_CANCELING: {
+ struct inode *inode = ll_inode_from_resource_lock(lock);
+ struct ll_inode_info *lli;
+ __u64 bits = lock->l_policy_data.l_inodebits.bits;
+ struct lu_fid *fid;
+ ldlm_mode_t mode = lock->l_req_mode;
+
+ /* Inode is set to lock->l_resource->lr_lvb_inode
+ * for mdc - bug 24555 */
+ LASSERT(lock->l_ast_data == NULL);
+
+ /* Invalidate all dentries associated with this inode */
+ if (inode == NULL)
+ break;
+
+ LASSERT(lock->l_flags & LDLM_FL_CANCELING);
+ /* For OPEN locks we differentiate between lock modes
+ * LCK_CR, LCK_CW, LCK_PR - bug 22891 */
+ if (bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE |
+ MDS_INODELOCK_LAYOUT | MDS_INODELOCK_PERM))
+ ll_have_md_lock(inode, &bits, LCK_MINMODE);
+
+ if (bits & MDS_INODELOCK_OPEN)
+ ll_have_md_lock(inode, &bits, mode);
+
+ fid = ll_inode2fid(inode);
+ if (lock->l_resource->lr_name.name[0] != fid_seq(fid) ||
+ lock->l_resource->lr_name.name[1] != fid_oid(fid) ||
+ lock->l_resource->lr_name.name[2] != fid_ver(fid)) {
+ LDLM_ERROR(lock, "data mismatch with object "
+ DFID" (%p)", PFID(fid), inode);
+ }
+
+ if (bits & MDS_INODELOCK_OPEN) {
+ int flags = 0;
+ switch (lock->l_req_mode) {
+ case LCK_CW:
+ flags = FMODE_WRITE;
+ break;
+ case LCK_PR:
+ flags = FMODE_EXEC;
+ break;
+ case LCK_CR:
+ flags = FMODE_READ;
+ break;
+ default:
+ CERROR("Unexpected lock mode for OPEN lock "
+ "%d, inode %ld\n", lock->l_req_mode,
+ inode->i_ino);
+ }
+ ll_md_real_close(inode, flags);
+ }
+
+ lli = ll_i2info(inode);
+ if (bits & MDS_INODELOCK_LAYOUT) {
+ struct cl_object_conf conf = { { 0 } };
+
+ conf.coc_opc = OBJECT_CONF_INVALIDATE;
+ conf.coc_inode = inode;
+ rc = ll_layout_conf(inode, &conf);
+ if (rc)
+ CDEBUG(D_INODE, "invaliding layout %d.\n", rc);
+ }
+
+ if (bits & MDS_INODELOCK_UPDATE)
+ lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
+
+ if (S_ISDIR(inode->i_mode) &&
+ (bits & MDS_INODELOCK_UPDATE)) {
+ CDEBUG(D_INODE, "invalidating inode %lu\n",
+ inode->i_ino);
+ truncate_inode_pages(inode->i_mapping, 0);
+ ll_invalidate_negative_children(inode);
+ }
+
+ if (inode->i_sb->s_root &&
+ inode != inode->i_sb->s_root->d_inode &&
+ (bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM)))
+ ll_invalidate_aliases(inode);
+ iput(inode);
+ break;
+ }
+ default:
+ LBUG();
+ }
+
+ RETURN(0);
+}
+
+__u32 ll_i2suppgid(struct inode *i)
+{
+ if (current_is_in_group(i->i_gid))
+ return (__u32)i->i_gid;
+ else
+ return (__u32)(-1);
+}
+
+/* Pack the required supplementary groups into the supplied groups array.
+ * If we don't need to use the groups from the target inode(s) then we
+ * instead pack one or more groups from the user's supplementary group
+ * array in case it might be useful. Not needed if doing an MDS-side upcall. */
+void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2)
+{
+#if 0
+ int i;
+#endif
+
+ LASSERT(i1 != NULL);
+ LASSERT(suppgids != NULL);
+
+ suppgids[0] = ll_i2suppgid(i1);
+
+ if (i2)
+ suppgids[1] = ll_i2suppgid(i2);
+ else
+ suppgids[1] = -1;
+
+#if 0
+ for (i = 0; i < current_ngroups; i++) {
+ if (suppgids[0] == -1) {
+ if (current_groups[i] != suppgids[1])
+ suppgids[0] = current_groups[i];
+ continue;
+ }
+ if (suppgids[1] == -1) {
+ if (current_groups[i] != suppgids[0])
+ suppgids[1] = current_groups[i];
+ continue;
+ }
+ break;
+ }
+#endif
+}
+
+/*
+ * try to reuse three types of dentry:
+ * 1. unhashed alias, this one is unhashed by d_invalidate (but it may be valid
+ * by concurrent .revalidate).
+ * 2. INVALID alias (common case for no valid ldlm lock held, but this flag may
+ * be cleared by others calling d_lustre_revalidate).
+ * 3. DISCONNECTED alias.
+ */
+static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
+{
+ struct dentry *alias, *discon_alias, *invalid_alias;
+ struct ll_d_hlist_node *p;
+
+ if (ll_d_hlist_empty(&inode->i_dentry))
+ return NULL;
+
+ discon_alias = invalid_alias = NULL;
+
+ ll_lock_dcache(inode);
+ ll_d_hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
+ LASSERT(alias != dentry);
+
+ spin_lock(&alias->d_lock);
+ if (alias->d_flags & DCACHE_DISCONNECTED)
+ /* LASSERT(last_discon == NULL); LU-405, bz 20055 */
+ discon_alias = alias;
+ else if (alias->d_parent == dentry->d_parent &&
+ alias->d_name.hash == dentry->d_name.hash &&
+ alias->d_name.len == dentry->d_name.len &&
+ memcmp(alias->d_name.name, dentry->d_name.name,
+ dentry->d_name.len) == 0)
+ invalid_alias = alias;
+ spin_unlock(&alias->d_lock);
+
+ if (invalid_alias)
+ break;
+ }
+ alias = invalid_alias ?: discon_alias ?: NULL;
+ if (alias) {
+ spin_lock(&alias->d_lock);
+ dget_dlock(alias);
+ spin_unlock(&alias->d_lock);
+ }
+ ll_unlock_dcache(inode);
+
+ return alias;
+}
+
+/*
+ * Similar to d_splice_alias(), but lustre treats invalid alias
+ * similar to DCACHE_DISCONNECTED, and tries to use it anyway.
+ */
+struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
+{
+ struct dentry *new;
+
+ if (inode) {
+ new = ll_find_alias(inode, de);
+ if (new) {
+ ll_dops_init(new, 1, 1);
+ d_move(new, de);
+ iput(inode);
+ CDEBUG(D_DENTRY,
+ "Reuse dentry %p inode %p refc %d flags %#x\n",
+ new, new->d_inode, d_refcount(new), new->d_flags);
+ return new;
+ }
+ }
+ ll_dops_init(de, 1, 1);
+ __d_lustre_invalidate(de);
+ d_add(de, inode);
+ CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n",
+ de, de->d_inode, d_refcount(de), de->d_flags);
+ return de;
+}
+
+int ll_lookup_it_finish(struct ptlrpc_request *request,
+ struct lookup_intent *it, void *data)
+{
+ struct it_cb_data *icbd = data;
+ struct dentry **de = icbd->icbd_childp;
+ struct inode *parent = icbd->icbd_parent;
+ struct inode *inode = NULL;
+ __u64 bits = 0;
+ int rc;
+ ENTRY;
+
+ /* NB 1 request reference will be taken away by ll_intent_lock()
+ * when I return */
+ CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it,
+ it->d.lustre.it_disposition);
+ if (!it_disposition(it, DISP_LOOKUP_NEG)) {
+ rc = ll_prep_inode(&inode, request, (*de)->d_sb, it);
+ if (rc)
+ RETURN(rc);
+
+ ll_set_lock_data(ll_i2sbi(parent)->ll_md_exp, inode, it, &bits);
+
+ /* We used to query real size from OSTs here, but actually
+ this is not needed. For stat() calls size would be updated
+ from subsequent do_revalidate()->ll_inode_revalidate_it() in
+ 2.4 and
+ vfs_getattr_it->ll_getattr()->ll_inode_revalidate_it() in 2.6
+ Everybody else who needs correct file size would call
+ ll_glimpse_size or some equivalent themselves anyway.
+ Also see bug 7198. */
+ }
+
+ /* Only hash *de if it is unhashed (new dentry).
+ * Atoimc_open may passin hashed dentries for open.
+ */
+ if (d_unhashed(*de))
+ *de = ll_splice_alias(inode, *de);
+
+ if (!it_disposition(it, DISP_LOOKUP_NEG)) {
+ /* we have lookup look - unhide dentry */
+ if (bits & MDS_INODELOCK_LOOKUP)
+ d_lustre_revalidate(*de);
+ } else if (!it_disposition(it, DISP_OPEN_CREATE)) {
+ /* If file created on server, don't depend on parent UPDATE
+ * lock to unhide it. It is left hidden and next lookup can
+ * find it in ll_splice_alias.
+ */
+ /* Check that parent has UPDATE lock. */
+ struct lookup_intent parent_it = {
+ .it_op = IT_GETATTR,
+ .d.lustre.it_lock_handle = 0 };
+
+ if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it,
+ &ll_i2info(parent)->lli_fid, NULL)) {
+ d_lustre_revalidate(*de);
+ ll_intent_release(&parent_it);
+ }
+ }
+
+ RETURN(0);
+}
+
+static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
+ struct lookup_intent *it, int lookup_flags)
+{
+ struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
+ struct dentry *save = dentry, *retval;
+ struct ptlrpc_request *req = NULL;
+ struct md_op_data *op_data;
+ struct it_cb_data icbd;
+ __u32 opc;
+ int rc;
+ ENTRY;
+
+ if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen)
+ RETURN(ERR_PTR(-ENAMETOOLONG));
+
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n",
+ dentry->d_name.len, dentry->d_name.name, parent->i_ino,
+ parent->i_generation, parent, LL_IT2STR(it));
+
+ if (d_mountpoint(dentry))
+ CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
+
+ ll_frob_intent(&it, &lookup_it);
+
+ /* As do_lookup is called before follow_mount, root dentry may be left
+ * not valid, revalidate it here. */
+ if (parent->i_sb->s_root && (parent->i_sb->s_root->d_inode == parent) &&
+ (it->it_op & (IT_OPEN | IT_CREAT))) {
+ rc = ll_inode_revalidate_it(parent->i_sb->s_root, it,
+ MDS_INODELOCK_LOOKUP);
+ if (rc)
+ RETURN(ERR_PTR(rc));
+ }
+
+ if (it->it_op == IT_GETATTR) {
+ rc = ll_statahead_enter(parent, &dentry, 0);
+ if (rc == 1) {
+ if (dentry == save)
+ GOTO(out, retval = NULL);
+ GOTO(out, retval = dentry);
+ }
+ }
+
+ icbd.icbd_childp = &dentry;
+ icbd.icbd_parent = parent;
+
+ if (it->it_op & IT_CREAT ||
+ (it->it_op & IT_OPEN && it->it_create_mode & O_CREAT))
+ opc = LUSTRE_OPC_CREATE;
+ else
+ opc = LUSTRE_OPC_ANY;
+
+ op_data = ll_prep_md_op_data(NULL, parent, NULL, dentry->d_name.name,
+ dentry->d_name.len, lookup_flags, opc,
+ NULL);
+ if (IS_ERR(op_data))
+ RETURN((void *)op_data);
+
+ /* enforce umask if acl disabled or MDS doesn't support umask */
+ if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
+ it->it_create_mode &= ~current_umask();
+
+ rc = md_intent_lock(ll_i2mdexp(parent), op_data, NULL, 0, it,
+ lookup_flags, &req, ll_md_blocking_ast, 0);
+ ll_finish_md_op_data(op_data);
+ if (rc < 0)
+ GOTO(out, retval = ERR_PTR(rc));
+
+ rc = ll_lookup_it_finish(req, it, &icbd);
+ if (rc != 0) {
+ ll_intent_release(it);
+ GOTO(out, retval = ERR_PTR(rc));
+ }
+
+ if ((it->it_op & IT_OPEN) && dentry->d_inode &&
+ !S_ISREG(dentry->d_inode->i_mode) &&
+ !S_ISDIR(dentry->d_inode->i_mode)) {
+ ll_release_openhandle(dentry, it);
+ }
+ ll_lookup_finish_locks(it, dentry);
+
+ if (dentry == save)
+ GOTO(out, retval = NULL);
+ else
+ GOTO(out, retval = dentry);
+ out:
+ if (req)
+ ptlrpc_req_finished(req);
+ if (it->it_op == IT_GETATTR && (retval == NULL || retval == dentry))
+ ll_statahead_mark(parent, dentry);
+ return retval;
+}
+
+static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
+ unsigned int flags)
+{
+ struct lookup_intent *itp, it = { .it_op = IT_GETATTR };
+ struct dentry *de;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),flags=%u\n",
+ dentry->d_name.len, dentry->d_name.name, parent->i_ino,
+ parent->i_generation, parent, flags);
+
+ /* Optimize away (CREATE && !OPEN). Let .create handle the race. */
+ if ((flags & LOOKUP_CREATE ) && !(flags & LOOKUP_OPEN)) {
+ ll_dops_init(dentry, 1, 1);
+ __d_lustre_invalidate(dentry);
+ d_add(dentry, NULL);
+ return NULL;
+ }
+
+ if (flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE))
+ itp = NULL;
+ else
+ itp = &it;
+ de = ll_lookup_it(parent, dentry, itp, 0);
+
+ if (itp != NULL)
+ ll_intent_release(itp);
+
+ return de;
+}
+
+/*
+ * For cached negative dentry and new dentry, handle lookup/create/open
+ * together.
+ */
+static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
+ struct file *file, unsigned open_flags,
+ umode_t mode, int *opened)
+{
+ struct lookup_intent *it;
+ struct dentry *de;
+ long long lookup_flags = LOOKUP_OPEN;
+ int rc = 0;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),file %p,"
+ "open_flags %x,mode %x opened %d\n",
+ dentry->d_name.len, dentry->d_name.name, dir->i_ino,
+ dir->i_generation, dir, file, open_flags, mode, *opened);
+
+ OBD_ALLOC(it, sizeof(*it));
+ if (!it)
+ RETURN(-ENOMEM);
+
+ it->it_op = IT_OPEN;
+ if (mode) {
+ it->it_op |= IT_CREAT;
+ lookup_flags |= LOOKUP_CREATE;
+ }
+ it->it_create_mode = (mode & S_IALLUGO) | S_IFREG;
+ it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags);
+
+ /* Dentry added to dcache tree in ll_lookup_it */
+ de = ll_lookup_it(dir, dentry, it, lookup_flags);
+ if (IS_ERR(de))
+ rc = PTR_ERR(de);
+ else if (de != NULL)
+ dentry = de;
+
+ if (!rc) {
+ if (it_disposition(it, DISP_OPEN_CREATE)) {
+ /* Dentry instantiated in ll_create_it. */
+ rc = ll_create_it(dir, dentry, mode, it);
+ if (rc) {
+ /* We dget in ll_splice_alias. */
+ if (de != NULL)
+ dput(de);
+ goto out_release;
+ }
+
+ *opened |= FILE_CREATED;
+ }
+ if (dentry->d_inode && it_disposition(it, DISP_OPEN_OPEN)) {
+ /* Open dentry. */
+ if (S_ISFIFO(dentry->d_inode->i_mode)) {
+ /* We cannot call open here as it would
+ * deadlock.
+ */
+ if (it_disposition(it, DISP_ENQ_OPEN_REF))
+ ptlrpc_req_finished(
+ (struct ptlrpc_request *)
+ it->d.lustre.it_data);
+ rc = finish_no_open(file, de);
+ } else {
+ file->private_data = it;
+ rc = finish_open(file, dentry, NULL, opened);
+ /* We dget in ll_splice_alias. finish_open takes
+ * care of dget for fd open.
+ */
+ if (de != NULL)
+ dput(de);
+ }
+ } else {
+ rc = finish_no_open(file, de);
+ }
+ }
+
+out_release:
+ ll_intent_release(it);
+ OBD_FREE(it, sizeof(*it));
+
+ RETURN(rc);
+}
+
+
+/* We depend on "mode" being set with the proper file type/umask by now */
+static struct inode *ll_create_node(struct inode *dir, const char *name,
+ int namelen, const void *data, int datalen,
+ int mode, __u64 extra,
+ struct lookup_intent *it)
+{
+ struct inode *inode = NULL;
+ struct ptlrpc_request *request = NULL;
+ struct ll_sb_info *sbi = ll_i2sbi(dir);
+ int rc;
+ ENTRY;
+
+ LASSERT(it && it->d.lustre.it_disposition);
+
+ LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF));
+ request = it->d.lustre.it_data;
+ it_clear_disposition(it, DISP_ENQ_CREATE_REF);
+ rc = ll_prep_inode(&inode, request, dir->i_sb, it);
+ if (rc)
+ GOTO(out, inode = ERR_PTR(rc));
+
+ LASSERT(ll_d_hlist_empty(&inode->i_dentry));
+
+ /* We asked for a lock on the directory, but were granted a
+ * lock on the inode. Since we finally have an inode pointer,
+ * stuff it in the lock. */
+ CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n",
+ inode, inode->i_ino, inode->i_generation);
+ ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
+ EXIT;
+ out:
+ ptlrpc_req_finished(request);
+ return inode;
+}
+
+/*
+ * By the time this is called, we already have created the directory cache
+ * entry for the new file, but it is so far negative - it has no inode.
+ *
+ * We defer creating the OBD object(s) until open, to keep the intent and
+ * non-intent code paths similar, and also because we do not have the MDS
+ * inode number before calling ll_create_node() (which is needed for LOV),
+ * so we would need to do yet another RPC to the MDS to store the LOV EA
+ * data on the MDS. If needed, we would pass the PACKED lmm as data and
+ * lmm_size in datalen (the MDS still has code which will handle that).
+ *
+ * If the create succeeds, we fill in the inode information
+ * with d_instantiate().
+ */
+static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
+ struct lookup_intent *it)
+{
+ struct inode *inode;
+ int rc = 0;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n",
+ dentry->d_name.len, dentry->d_name.name, dir->i_ino,
+ dir->i_generation, dir, LL_IT2STR(it));
+
+ rc = it_open_error(DISP_OPEN_CREATE, it);
+ if (rc)
+ RETURN(rc);
+
+ inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
+ NULL, 0, mode, 0, it);
+ if (IS_ERR(inode))
+ RETURN(PTR_ERR(inode));
+
+ if (filename_is_volatile(dentry->d_name.name, dentry->d_name.len, NULL))
+ ll_i2info(inode)->lli_volatile = true;
+
+ d_instantiate(dentry, inode);
+ RETURN(0);
+}
+
+static void ll_update_times(struct ptlrpc_request *request,
+ struct inode *inode)
+{
+ struct mdt_body *body = req_capsule_server_get(&request->rq_pill,
+ &RMF_MDT_BODY);
+
+ LASSERT(body);
+ if (body->valid & OBD_MD_FLMTIME &&
+ body->mtime > LTIME_S(inode->i_mtime)) {
+ CDEBUG(D_INODE, "setting ino %lu mtime from %lu to "LPU64"\n",
+ inode->i_ino, LTIME_S(inode->i_mtime), body->mtime);
+ LTIME_S(inode->i_mtime) = body->mtime;
+ }
+ if (body->valid & OBD_MD_FLCTIME &&
+ body->ctime > LTIME_S(inode->i_ctime))
+ LTIME_S(inode->i_ctime) = body->ctime;
+}
+
+static int ll_new_node(struct inode *dir, struct qstr *name,
+ const char *tgt, int mode, int rdev,
+ struct dentry *dchild, __u32 opc)
+{
+ struct ptlrpc_request *request = NULL;
+ struct md_op_data *op_data;
+ struct inode *inode = NULL;
+ struct ll_sb_info *sbi = ll_i2sbi(dir);
+ int tgt_len = 0;
+ int err;
+
+ ENTRY;
+ if (unlikely(tgt != NULL))
+ tgt_len = strlen(tgt) + 1;
+
+ op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name,
+ name->len, 0, opc, NULL);
+ if (IS_ERR(op_data))
+ GOTO(err_exit, err = PTR_ERR(op_data));
+
+ err = md_create(sbi->ll_md_exp, op_data, tgt, tgt_len, mode,
+ current_fsuid(), current_fsgid(),
+ cfs_curproc_cap_pack(), rdev, &request);
+ ll_finish_md_op_data(op_data);
+ if (err)
+ GOTO(err_exit, err);
+
+ ll_update_times(request, dir);
+
+ if (dchild) {
+ err = ll_prep_inode(&inode, request, dchild->d_sb, NULL);
+ if (err)
+ GOTO(err_exit, err);
+
+ d_instantiate(dchild, inode);
+ }
+ EXIT;
+err_exit:
+ ptlrpc_req_finished(request);
+
+ return err;
+}
+
+static int ll_mknod_generic(struct inode *dir, struct qstr *name, int mode,
+ unsigned rdev, struct dentry *dchild)
+{
+ int err;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p) mode %o dev %x\n",
+ name->len, name->name, dir->i_ino, dir->i_generation, dir,
+ mode, rdev);
+
+ if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
+ mode &= ~current_umask();
+
+ switch (mode & S_IFMT) {
+ case 0:
+ mode |= S_IFREG; /* for mode = 0 case, fallthrough */
+ case S_IFREG:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFIFO:
+ case S_IFSOCK:
+ err = ll_new_node(dir, name, NULL, mode, rdev, dchild,
+ LUSTRE_OPC_MKNOD);
+ break;
+ case S_IFDIR:
+ err = -EPERM;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err)
+ ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKNOD, 1);
+
+ RETURN(err);
+}
+
+/*
+ * Plain create. Intent create is handled in atomic_open.
+ */
+static int ll_create_nd(struct inode *dir, struct dentry *dentry,
+ umode_t mode, bool want_excl)
+{
+ int rc;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),"
+ "flags=%u, excl=%d\n",
+ dentry->d_name.len, dentry->d_name.name, dir->i_ino,
+ dir->i_generation, dir, mode, want_excl);
+
+ rc = ll_mknod_generic(dir, &dentry->d_name, mode, 0, dentry);
+
+ ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_CREATE, 1);
+
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s, unhashed %d\n",
+ dentry->d_name.len, dentry->d_name.name, d_unhashed(dentry));
+
+ return rc;
+}
+
+static int ll_symlink_generic(struct inode *dir, struct qstr *name,
+ const char *tgt, struct dentry *dchild)
+{
+ int err;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),target=%.*s\n",
+ name->len, name->name, dir->i_ino, dir->i_generation,
+ dir, 3000, tgt);
+
+ err = ll_new_node(dir, name, (char *)tgt, S_IFLNK | S_IRWXUGO,
+ 0, dchild, LUSTRE_OPC_SYMLINK);
+
+ if (!err)
+ ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_SYMLINK, 1);
+
+ RETURN(err);
+}
+
+static int ll_link_generic(struct inode *src, struct inode *dir,
+ struct qstr *name, struct dentry *dchild)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(dir);
+ struct ptlrpc_request *request = NULL;
+ struct md_op_data *op_data;
+ int err;
+
+ ENTRY;
+ CDEBUG(D_VFSTRACE,
+ "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%.*s\n",
+ src->i_ino, src->i_generation, src, dir->i_ino,
+ dir->i_generation, dir, name->len, name->name);
+
+ op_data = ll_prep_md_op_data(NULL, src, dir, name->name, name->len,
+ 0, LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ err = md_link(sbi->ll_md_exp, op_data, &request);
+ ll_finish_md_op_data(op_data);
+ if (err)
+ GOTO(out, err);
+
+ ll_update_times(request, dir);
+ ll_stats_ops_tally(sbi, LPROC_LL_LINK, 1);
+ EXIT;
+out:
+ ptlrpc_req_finished(request);
+ RETURN(err);
+}
+
+static int ll_mkdir_generic(struct inode *dir, struct qstr *name,
+ int mode, struct dentry *dchild)
+
+{
+ int err;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+ name->len, name->name, dir->i_ino, dir->i_generation, dir);
+
+ if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
+ mode &= ~current_umask();
+ mode = (mode & (S_IRWXUGO|S_ISVTX)) | S_IFDIR;
+ err = ll_new_node(dir, name, NULL, mode, 0, dchild, LUSTRE_OPC_MKDIR);
+
+ if (!err)
+ ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKDIR, 1);
+
+ RETURN(err);
+}
+
+/* Try to find the child dentry by its name.
+ If found, put the result fid into @fid. */
+static void ll_get_child_fid(struct inode * dir, struct qstr *name,
+ struct lu_fid *fid)
+{
+ struct dentry *parent, *child;
+
+ parent = ll_d_hlist_entry(dir->i_dentry, struct dentry, d_alias);
+ child = d_lookup(parent, name);
+ if (child) {
+ if (child->d_inode)
+ *fid = *ll_inode2fid(child->d_inode);
+ dput(child);
+ }
+}
+
+static int ll_rmdir_generic(struct inode *dir, struct dentry *dparent,
+ struct dentry *dchild, struct qstr *name)
+{
+ struct ptlrpc_request *request = NULL;
+ struct md_op_data *op_data;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+ name->len, name->name, dir->i_ino, dir->i_generation, dir);
+
+ if (unlikely(ll_d_mountpoint(dparent, dchild, name)))
+ RETURN(-EBUSY);
+
+ op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len,
+ S_IFDIR, LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ ll_get_child_fid(dir, name, &op_data->op_fid3);
+ op_data->op_fid2 = op_data->op_fid3;
+ rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
+ ll_finish_md_op_data(op_data);
+ if (rc == 0) {
+ ll_update_times(request, dir);
+ ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_RMDIR, 1);
+ }
+
+ ptlrpc_req_finished(request);
+ RETURN(rc);
+}
+
+/**
+ * Remove dir entry
+ **/
+int ll_rmdir_entry(struct inode *dir, char *name, int namelen)
+{
+ struct ptlrpc_request *request = NULL;
+ struct md_op_data *op_data;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+ namelen, name, dir->i_ino, dir->i_generation, dir);
+
+ op_data = ll_prep_md_op_data(NULL, dir, NULL, name, strlen(name),
+ S_IFDIR, LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+ op_data->op_cli_flags |= CLI_RM_ENTRY;
+ rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
+ ll_finish_md_op_data(op_data);
+ if (rc == 0) {
+ ll_update_times(request, dir);
+ ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_RMDIR, 1);
+ }
+
+ ptlrpc_req_finished(request);
+ RETURN(rc);
+}
+
+int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
+{
+ struct mdt_body *body;
+ struct lov_mds_md *eadata;
+ struct lov_stripe_md *lsm = NULL;
+ struct obd_trans_info oti = { 0 };
+ struct obdo *oa;
+ struct obd_capa *oc = NULL;
+ int rc;
+ ENTRY;
+
+ /* req is swabbed so this is safe */
+ body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
+ if (!(body->valid & OBD_MD_FLEASIZE))
+ RETURN(0);
+
+ if (body->eadatasize == 0) {
+ CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n");
+ GOTO(out, rc = -EPROTO);
+ }
+
+ /* The MDS sent back the EA because we unlinked the last reference
+ * to this file. Use this EA to unlink the objects on the OST.
+ * It's opaque so we don't swab here; we leave it to obd_unpackmd() to
+ * check it is complete and sensible. */
+ eadata = req_capsule_server_sized_get(&request->rq_pill, &RMF_MDT_MD,
+ body->eadatasize);
+ LASSERT(eadata != NULL);
+
+ rc = obd_unpackmd(ll_i2dtexp(dir), &lsm, eadata, body->eadatasize);
+ if (rc < 0) {
+ CERROR("obd_unpackmd: %d\n", rc);
+ GOTO(out, rc);
+ }
+ LASSERT(rc >= sizeof(*lsm));
+
+ OBDO_ALLOC(oa);
+ if (oa == NULL)
+ GOTO(out_free_memmd, rc = -ENOMEM);
+
+ oa->o_oi = lsm->lsm_oi;
+ oa->o_mode = body->mode & S_IFMT;
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP;
+
+ if (body->valid & OBD_MD_FLCOOKIE) {
+ oa->o_valid |= OBD_MD_FLCOOKIE;
+ oti.oti_logcookies =
+ req_capsule_server_sized_get(&request->rq_pill,
+ &RMF_LOGCOOKIES,
+ sizeof(struct llog_cookie) *
+ lsm->lsm_stripe_count);
+ if (oti.oti_logcookies == NULL) {
+ oa->o_valid &= ~OBD_MD_FLCOOKIE;
+ body->valid &= ~OBD_MD_FLCOOKIE;
+ }
+ }
+
+ if (body->valid & OBD_MD_FLOSSCAPA) {
+ rc = md_unpack_capa(ll_i2mdexp(dir), request, &RMF_CAPA2, &oc);
+ if (rc)
+ GOTO(out_free_memmd, rc);
+ }
+
+ rc = obd_destroy(NULL, ll_i2dtexp(dir), oa, lsm, &oti,
+ ll_i2mdexp(dir), oc);
+ capa_put(oc);
+ if (rc)
+ CERROR("obd destroy objid "DOSTID" error %d\n",
+ POSTID(&lsm->lsm_oi), rc);
+out_free_memmd:
+ obd_free_memmd(ll_i2dtexp(dir), &lsm);
+ OBDO_FREE(oa);
+out:
+ return rc;
+}
+
+/* ll_unlink_generic() doesn't update the inode with the new link count.
+ * Instead, ll_ddelete() and ll_d_iput() will update it based upon if there
+ * is any lock existing. They will recycle dentries and inodes based upon locks
+ * too. b=20433 */
+static int ll_unlink_generic(struct inode *dir, struct dentry *dparent,
+ struct dentry *dchild, struct qstr *name)
+{
+ struct ptlrpc_request *request = NULL;
+ struct md_op_data *op_data;
+ int rc;
+ ENTRY;
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+ name->len, name->name, dir->i_ino, dir->i_generation, dir);
+
+ /*
+ * XXX: unlink bind mountpoint maybe call to here,
+ * just check it as vfs_unlink does.
+ */
+ if (unlikely(ll_d_mountpoint(dparent, dchild, name)))
+ RETURN(-EBUSY);
+
+ op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name,
+ name->len, 0, LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ ll_get_child_fid(dir, name, &op_data->op_fid3);
+ op_data->op_fid2 = op_data->op_fid3;
+ rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
+ ll_finish_md_op_data(op_data);
+ if (rc)
+ GOTO(out, rc);
+
+ ll_update_times(request, dir);
+ ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_UNLINK, 1);
+
+ rc = ll_objects_destroy(request, dir);
+ out:
+ ptlrpc_req_finished(request);
+ RETURN(rc);
+}
+
+static int ll_rename_generic(struct inode *src, struct dentry *src_dparent,
+ struct dentry *src_dchild, struct qstr *src_name,
+ struct inode *tgt, struct dentry *tgt_dparent,
+ struct dentry *tgt_dchild, struct qstr *tgt_name)
+{
+ struct ptlrpc_request *request = NULL;
+ struct ll_sb_info *sbi = ll_i2sbi(src);
+ struct md_op_data *op_data;
+ int err;
+ ENTRY;
+ CDEBUG(D_VFSTRACE,"VFS Op:oldname=%.*s,src_dir=%lu/%u(%p),newname=%.*s,"
+ "tgt_dir=%lu/%u(%p)\n", src_name->len, src_name->name,
+ src->i_ino, src->i_generation, src, tgt_name->len,
+ tgt_name->name, tgt->i_ino, tgt->i_generation, tgt);
+
+ if (unlikely(ll_d_mountpoint(src_dparent, src_dchild, src_name) ||
+ ll_d_mountpoint(tgt_dparent, tgt_dchild, tgt_name)))
+ RETURN(-EBUSY);
+
+ op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ ll_get_child_fid(src, src_name, &op_data->op_fid3);
+ ll_get_child_fid(tgt, tgt_name, &op_data->op_fid4);
+ err = md_rename(sbi->ll_md_exp, op_data,
+ src_name->name, src_name->len,
+ tgt_name->name, tgt_name->len, &request);
+ ll_finish_md_op_data(op_data);
+ if (!err) {
+ ll_update_times(request, src);
+ ll_update_times(request, tgt);
+ ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1);
+ err = ll_objects_destroy(request, src);
+ }
+
+ ptlrpc_req_finished(request);
+
+ RETURN(err);
+}
+
+static int ll_mknod(struct inode *dir, struct dentry *dchild, ll_umode_t mode,
+ dev_t rdev)
+{
+ return ll_mknod_generic(dir, &dchild->d_name, mode,
+ old_encode_dev(rdev), dchild);
+}
+
+static int ll_unlink(struct inode * dir, struct dentry *dentry)
+{
+ return ll_unlink_generic(dir, NULL, dentry, &dentry->d_name);
+}
+
+static int ll_mkdir(struct inode *dir, struct dentry *dentry, ll_umode_t mode)
+{
+ return ll_mkdir_generic(dir, &dentry->d_name, mode, dentry);
+}
+
+static int ll_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ return ll_rmdir_generic(dir, NULL, dentry, &dentry->d_name);
+}
+
+static int ll_symlink(struct inode *dir, struct dentry *dentry,
+ const char *oldname)
+{
+ return ll_symlink_generic(dir, &dentry->d_name, oldname, dentry);
+}
+
+static int ll_link(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *new_dentry)
+{
+ return ll_link_generic(old_dentry->d_inode, dir, &new_dentry->d_name,
+ new_dentry);
+}
+
+static int ll_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ int err;
+ err = ll_rename_generic(old_dir, NULL,
+ old_dentry, &old_dentry->d_name,
+ new_dir, NULL, new_dentry,
+ &new_dentry->d_name);
+ if (!err) {
+ d_move(old_dentry, new_dentry);
+ }
+ return err;
+}
+
+struct inode_operations ll_dir_inode_operations = {
+ .mknod = ll_mknod,
+ .atomic_open = ll_atomic_open,
+ .lookup = ll_lookup_nd,
+ .create = ll_create_nd,
+ /* We need all these non-raw things for NFSD, to not patch it. */
+ .unlink = ll_unlink,
+ .mkdir = ll_mkdir,
+ .rmdir = ll_rmdir,
+ .symlink = ll_symlink,
+ .link = ll_link,
+ .rename = ll_rename,
+ .setattr = ll_setattr,
+ .getattr = ll_getattr,
+ .permission = ll_inode_permission,
+ .setxattr = ll_setxattr,
+ .getxattr = ll_getxattr,
+ .listxattr = ll_listxattr,
+ .removexattr = ll_removexattr,
+ .get_acl = ll_get_acl,
+};
+
+struct inode_operations ll_special_inode_operations = {
+ .setattr = ll_setattr,
+ .getattr = ll_getattr,
+ .permission = ll_inode_permission,
+ .setxattr = ll_setxattr,
+ .getxattr = ll_getxattr,
+ .listxattr = ll_listxattr,
+ .removexattr = ll_removexattr,
+ .get_acl = ll_get_acl,
+};
diff --git a/drivers/staging/lustre/lustre/llite/remote_perm.c b/drivers/staging/lustre/lustre/llite/remote_perm.c
new file mode 100644
index 000000000000..68b2dc4a7b62
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/remote_perm.c
@@ -0,0 +1,333 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/llite/remote_perm.c
+ *
+ * Lustre Permission Cache for Remote Client
+ *
+ * Author: Lai Siyao <lsy@clusterfs.com>
+ * Author: Fan Yong <fanyong@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/version.h>
+
+#include <lustre_lite.h>
+#include <lustre_ha.h>
+#include <lustre_dlm.h>
+#include <lprocfs_status.h>
+#include <lustre_disk.h>
+#include <lustre_param.h>
+#include "llite_internal.h"
+
+struct kmem_cache *ll_remote_perm_cachep = NULL;
+struct kmem_cache *ll_rmtperm_hash_cachep = NULL;
+
+static inline struct ll_remote_perm *alloc_ll_remote_perm(void)
+{
+ struct ll_remote_perm *lrp;
+
+ OBD_SLAB_ALLOC_PTR_GFP(lrp, ll_remote_perm_cachep, GFP_KERNEL);
+ if (lrp)
+ INIT_HLIST_NODE(&lrp->lrp_list);
+ return lrp;
+}
+
+static inline void free_ll_remote_perm(struct ll_remote_perm *lrp)
+{
+ if (!lrp)
+ return;
+
+ if (!hlist_unhashed(&lrp->lrp_list))
+ hlist_del(&lrp->lrp_list);
+ OBD_SLAB_FREE(lrp, ll_remote_perm_cachep, sizeof(*lrp));
+}
+
+struct hlist_head *alloc_rmtperm_hash(void)
+{
+ struct hlist_head *hash;
+ int i;
+
+ OBD_SLAB_ALLOC_GFP(hash, ll_rmtperm_hash_cachep,
+ REMOTE_PERM_HASHSIZE * sizeof(*hash),
+ GFP_IOFS);
+ if (!hash)
+ return NULL;
+
+ for (i = 0; i < REMOTE_PERM_HASHSIZE; i++)
+ INIT_HLIST_HEAD(hash + i);
+
+ return hash;
+}
+
+void free_rmtperm_hash(struct hlist_head *hash)
+{
+ int i;
+ struct ll_remote_perm *lrp;
+ struct hlist_node *next;
+
+ if(!hash)
+ return;
+
+ for (i = 0; i < REMOTE_PERM_HASHSIZE; i++)
+ hlist_for_each_entry_safe(lrp, next, hash + i,
+ lrp_list)
+ free_ll_remote_perm(lrp);
+ OBD_SLAB_FREE(hash, ll_rmtperm_hash_cachep,
+ REMOTE_PERM_HASHSIZE * sizeof(*hash));
+}
+
+static inline int remote_perm_hashfunc(uid_t uid)
+{
+ return uid & (REMOTE_PERM_HASHSIZE - 1);
+}
+
+/* NB: setxid permission is not checked here, instead it's done on
+ * MDT when client get remote permission. */
+static int do_check_remote_perm(struct ll_inode_info *lli, int mask)
+{
+ struct hlist_head *head;
+ struct ll_remote_perm *lrp;
+ int found = 0, rc;
+ ENTRY;
+
+ if (!lli->lli_remote_perms)
+ RETURN(-ENOENT);
+
+ head = lli->lli_remote_perms + remote_perm_hashfunc(current_uid());
+
+ spin_lock(&lli->lli_lock);
+ hlist_for_each_entry(lrp, head, lrp_list) {
+ if (lrp->lrp_uid != current_uid())
+ continue;
+ if (lrp->lrp_gid != current_gid())
+ continue;
+ if (lrp->lrp_fsuid != current_fsuid())
+ continue;
+ if (lrp->lrp_fsgid != current_fsgid())
+ continue;
+ found = 1;
+ break;
+ }
+
+ if (!found)
+ GOTO(out, rc = -ENOENT);
+
+ CDEBUG(D_SEC, "found remote perm: %u/%u/%u/%u - %#x\n",
+ lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid,
+ lrp->lrp_access_perm);
+ rc = ((lrp->lrp_access_perm & mask) == mask) ? 0 : -EACCES;
+
+out:
+ spin_unlock(&lli->lli_lock);
+ return rc;
+}
+
+int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_remote_perm *lrp = NULL, *tmp = NULL;
+ struct hlist_head *head, *perm_hash = NULL;
+ ENTRY;
+
+ LASSERT(ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT);
+
+#if 0
+ if (perm->rp_uid != current->uid ||
+ perm->rp_gid != current->gid ||
+ perm->rp_fsuid != current->fsuid ||
+ perm->rp_fsgid != current->fsgid) {
+ /* user might setxid in this small period */
+ CDEBUG(D_SEC,
+ "remote perm user %u/%u/%u/%u != current %u/%u/%u/%u\n",
+ perm->rp_uid, perm->rp_gid, perm->rp_fsuid,
+ perm->rp_fsgid, current->uid, current->gid,
+ current->fsuid, current->fsgid);
+ RETURN(-EAGAIN);
+ }
+#endif
+
+ if (!lli->lli_remote_perms) {
+ perm_hash = alloc_rmtperm_hash();
+ if (perm_hash == NULL) {
+ CERROR("alloc lli_remote_perms failed!\n");
+ RETURN(-ENOMEM);
+ }
+ }
+
+ spin_lock(&lli->lli_lock);
+
+ if (!lli->lli_remote_perms)
+ lli->lli_remote_perms = perm_hash;
+ else if (perm_hash)
+ free_rmtperm_hash(perm_hash);
+
+ head = lli->lli_remote_perms + remote_perm_hashfunc(perm->rp_uid);
+
+again:
+ hlist_for_each_entry(tmp, head, lrp_list) {
+ if (tmp->lrp_uid != perm->rp_uid)
+ continue;
+ if (tmp->lrp_gid != perm->rp_gid)
+ continue;
+ if (tmp->lrp_fsuid != perm->rp_fsuid)
+ continue;
+ if (tmp->lrp_fsgid != perm->rp_fsgid)
+ continue;
+ if (lrp)
+ free_ll_remote_perm(lrp);
+ lrp = tmp;
+ break;
+ }
+
+ if (!lrp) {
+ spin_unlock(&lli->lli_lock);
+ lrp = alloc_ll_remote_perm();
+ if (!lrp) {
+ CERROR("alloc memory for ll_remote_perm failed!\n");
+ RETURN(-ENOMEM);
+ }
+ spin_lock(&lli->lli_lock);
+ goto again;
+ }
+
+ lrp->lrp_access_perm = perm->rp_access_perm;
+ if (lrp != tmp) {
+ lrp->lrp_uid = perm->rp_uid;
+ lrp->lrp_gid = perm->rp_gid;
+ lrp->lrp_fsuid = perm->rp_fsuid;
+ lrp->lrp_fsgid = perm->rp_fsgid;
+ hlist_add_head(&lrp->lrp_list, head);
+ }
+ lli->lli_rmtperm_time = cfs_time_current();
+ spin_unlock(&lli->lli_lock);
+
+ CDEBUG(D_SEC, "new remote perm@%p: %u/%u/%u/%u - %#x\n",
+ lrp, lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid,
+ lrp->lrp_access_perm);
+
+ RETURN(0);
+}
+
+int lustre_check_remote_perm(struct inode *inode, int mask)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ptlrpc_request *req = NULL;
+ struct mdt_remote_perm *perm;
+ struct obd_capa *oc;
+ cfs_time_t save;
+ int i = 0, rc;
+ ENTRY;
+
+ do {
+ save = lli->lli_rmtperm_time;
+ rc = do_check_remote_perm(lli, mask);
+ if (!rc || (rc != -ENOENT && i))
+ break;
+
+ might_sleep();
+
+ mutex_lock(&lli->lli_rmtperm_mutex);
+ /* check again */
+ if (save != lli->lli_rmtperm_time) {
+ rc = do_check_remote_perm(lli, mask);
+ if (!rc || (rc != -ENOENT && i)) {
+ mutex_unlock(&lli->lli_rmtperm_mutex);
+ break;
+ }
+ }
+
+ if (i++ > 5) {
+ CERROR("check remote perm falls in dead loop!\n");
+ LBUG();
+ }
+
+ oc = ll_mdscapa_get(inode);
+ rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode), oc,
+ ll_i2suppgid(inode), &req);
+ capa_put(oc);
+ if (rc) {
+ mutex_unlock(&lli->lli_rmtperm_mutex);
+ break;
+ }
+
+ perm = req_capsule_server_swab_get(&req->rq_pill, &RMF_ACL,
+ lustre_swab_mdt_remote_perm);
+ if (unlikely(perm == NULL)) {
+ mutex_unlock(&lli->lli_rmtperm_mutex);
+ rc = -EPROTO;
+ break;
+ }
+
+ rc = ll_update_remote_perm(inode, perm);
+ mutex_unlock(&lli->lli_rmtperm_mutex);
+ if (rc == -ENOMEM)
+ break;
+
+ ptlrpc_req_finished(req);
+ req = NULL;
+ } while (1);
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+}
+
+#if 0 /* NB: remote perms can't be freed in ll_mdc_blocking_ast of UPDATE lock,
+ * because it will fail sanity test 48.
+ */
+void ll_free_remote_perms(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct hlist_head *hash = lli->lli_remote_perms;
+ struct ll_remote_perm *lrp;
+ struct hlist_node *node, *next;
+ int i;
+
+ LASSERT(hash);
+
+ spin_lock(&lli->lli_lock);
+
+ for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) {
+ hlist_for_each_entry_safe(lrp, node, next, hash + i,
+ lrp_list)
+ free_ll_remote_perm(lrp);
+ }
+
+ spin_unlock(&lli->lli_lock);
+}
+#endif
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
new file mode 100644
index 000000000000..fac117889011
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -0,0 +1,1314 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/llite/rw.c
+ *
+ * Lustre Lite I/O page cache routines shared by different kernel revs
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/writeback.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+/* current_is_kswapd() */
+#include <linux/swap.h>
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <lustre_lite.h>
+#include <obd_cksum.h>
+#include "llite_internal.h"
+#include <linux/lustre_compat25.h>
+
+/**
+ * Finalizes cl-data before exiting typical address_space operation. Dual to
+ * ll_cl_init().
+ */
+static void ll_cl_fini(struct ll_cl_context *lcc)
+{
+ struct lu_env *env = lcc->lcc_env;
+ struct cl_io *io = lcc->lcc_io;
+ struct cl_page *page = lcc->lcc_page;
+
+ LASSERT(lcc->lcc_cookie == current);
+ LASSERT(env != NULL);
+
+ if (page != NULL) {
+ lu_ref_del(&page->cp_reference, "cl_io", io);
+ cl_page_put(env, page);
+ }
+
+ if (io && lcc->lcc_created) {
+ cl_io_end(env, io);
+ cl_io_unlock(env, io);
+ cl_io_iter_fini(env, io);
+ cl_io_fini(env, io);
+ }
+ cl_env_put(env, &lcc->lcc_refcheck);
+}
+
+/**
+ * Initializes common cl-data at the typical address_space operation entry
+ * point.
+ */
+static struct ll_cl_context *ll_cl_init(struct file *file,
+ struct page *vmpage, int create)
+{
+ struct ll_cl_context *lcc;
+ struct lu_env *env;
+ struct cl_io *io;
+ struct cl_object *clob;
+ struct ccc_io *cio;
+
+ int refcheck;
+ int result = 0;
+
+ clob = ll_i2info(vmpage->mapping->host)->lli_clob;
+ LASSERT(clob != NULL);
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return ERR_PTR(PTR_ERR(env));
+
+ lcc = &vvp_env_info(env)->vti_io_ctx;
+ memset(lcc, 0, sizeof(*lcc));
+ lcc->lcc_env = env;
+ lcc->lcc_refcheck = refcheck;
+ lcc->lcc_cookie = current;
+
+ cio = ccc_env_io(env);
+ io = cio->cui_cl.cis_io;
+ if (io == NULL && create) {
+ struct inode *inode = vmpage->mapping->host;
+ loff_t pos;
+
+ if (mutex_trylock(&inode->i_mutex)) {
+ mutex_unlock(&(inode)->i_mutex);
+
+ /* this is too bad. Someone is trying to write the
+ * page w/o holding inode mutex. This means we can
+ * add dirty pages into cache during truncate */
+ CERROR("Proc %s is dirting page w/o inode lock, this"
+ "will break truncate.\n", current->comm);
+ libcfs_debug_dumpstack(NULL);
+ LBUG();
+ return ERR_PTR(-EIO);
+ }
+
+ /*
+ * Loop-back driver calls ->prepare_write() and ->sendfile()
+ * methods directly, bypassing file system ->write() operation,
+ * so cl_io has to be created here.
+ */
+ io = ccc_env_thread_io(env);
+ ll_io_init(io, file, 1);
+
+ /* No lock at all for this kind of IO - we can't do it because
+ * we have held page lock, it would cause deadlock.
+ * XXX: This causes poor performance to loop device - One page
+ * per RPC.
+ * In order to get better performance, users should use
+ * lloop driver instead.
+ */
+ io->ci_lockreq = CILR_NEVER;
+
+ pos = (vmpage->index << PAGE_CACHE_SHIFT);
+
+ /* Create a temp IO to serve write. */
+ result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_CACHE_SIZE);
+ if (result == 0) {
+ cio->cui_fd = LUSTRE_FPRIVATE(file);
+ cio->cui_iov = NULL;
+ cio->cui_nrsegs = 0;
+ result = cl_io_iter_init(env, io);
+ if (result == 0) {
+ result = cl_io_lock(env, io);
+ if (result == 0)
+ result = cl_io_start(env, io);
+ }
+ } else
+ result = io->ci_result;
+ lcc->lcc_created = 1;
+ }
+
+ lcc->lcc_io = io;
+ if (io == NULL)
+ result = -EIO;
+ if (result == 0) {
+ struct cl_page *page;
+
+ LASSERT(io != NULL);
+ LASSERT(io->ci_state == CIS_IO_GOING);
+ LASSERT(cio->cui_fd == LUSTRE_FPRIVATE(file));
+ page = cl_page_find(env, clob, vmpage->index, vmpage,
+ CPT_CACHEABLE);
+ if (!IS_ERR(page)) {
+ lcc->lcc_page = page;
+ lu_ref_add(&page->cp_reference, "cl_io", io);
+ result = 0;
+ } else
+ result = PTR_ERR(page);
+ }
+ if (result) {
+ ll_cl_fini(lcc);
+ lcc = ERR_PTR(result);
+ }
+
+ CDEBUG(D_VFSTRACE, "%lu@"DFID" -> %d %p %p\n",
+ vmpage->index, PFID(lu_object_fid(&clob->co_lu)), result,
+ env, io);
+ return lcc;
+}
+
+static struct ll_cl_context *ll_cl_get(void)
+{
+ struct ll_cl_context *lcc;
+ struct lu_env *env;
+ int refcheck;
+
+ env = cl_env_get(&refcheck);
+ LASSERT(!IS_ERR(env));
+ lcc = &vvp_env_info(env)->vti_io_ctx;
+ LASSERT(env == lcc->lcc_env);
+ LASSERT(current == lcc->lcc_cookie);
+ cl_env_put(env, &refcheck);
+
+ /* env has got in ll_cl_init, so it is still usable. */
+ return lcc;
+}
+
+/**
+ * ->prepare_write() address space operation called by generic_file_write()
+ * for every page during write.
+ */
+int ll_prepare_write(struct file *file, struct page *vmpage, unsigned from,
+ unsigned to)
+{
+ struct ll_cl_context *lcc;
+ int result;
+ ENTRY;
+
+ lcc = ll_cl_init(file, vmpage, 1);
+ if (!IS_ERR(lcc)) {
+ struct lu_env *env = lcc->lcc_env;
+ struct cl_io *io = lcc->lcc_io;
+ struct cl_page *page = lcc->lcc_page;
+
+ cl_page_assume(env, io, page);
+
+ result = cl_io_prepare_write(env, io, page, from, to);
+ if (result == 0) {
+ /*
+ * Add a reference, so that page is not evicted from
+ * the cache until ->commit_write() is called.
+ */
+ cl_page_get(page);
+ lu_ref_add(&page->cp_reference, "prepare_write",
+ current);
+ } else {
+ cl_page_unassume(env, io, page);
+ ll_cl_fini(lcc);
+ }
+ /* returning 0 in prepare assumes commit must be called
+ * afterwards */
+ } else {
+ result = PTR_ERR(lcc);
+ }
+ RETURN(result);
+}
+
+int ll_commit_write(struct file *file, struct page *vmpage, unsigned from,
+ unsigned to)
+{
+ struct ll_cl_context *lcc;
+ struct lu_env *env;
+ struct cl_io *io;
+ struct cl_page *page;
+ int result = 0;
+ ENTRY;
+
+ lcc = ll_cl_get();
+ env = lcc->lcc_env;
+ page = lcc->lcc_page;
+ io = lcc->lcc_io;
+
+ LASSERT(cl_page_is_owned(page, io));
+ LASSERT(from <= to);
+ if (from != to) /* handle short write case. */
+ result = cl_io_commit_write(env, io, page, from, to);
+ if (cl_page_is_owned(page, io))
+ cl_page_unassume(env, io, page);
+
+ /*
+ * Release reference acquired by ll_prepare_write().
+ */
+ lu_ref_del(&page->cp_reference, "prepare_write", current);
+ cl_page_put(env, page);
+ ll_cl_fini(lcc);
+ RETURN(result);
+}
+
+struct obd_capa *cl_capa_lookup(struct inode *inode, enum cl_req_type crt)
+{
+ __u64 opc;
+
+ opc = crt == CRT_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_RW;
+ return ll_osscapa_get(inode, opc);
+}
+
+static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
+
+/**
+ * Get readahead pages from the filesystem readahead pool of the client for a
+ * thread.
+ *
+ * /param sbi superblock for filesystem readahead state ll_ra_info
+ * /param ria per-thread readahead state
+ * /param pages number of pages requested for readahead for the thread.
+ *
+ * WARNING: This algorithm is used to reduce contention on sbi->ll_lock.
+ * It should work well if the ra_max_pages is much greater than the single
+ * file's read-ahead window, and not too many threads contending for
+ * these readahead pages.
+ *
+ * TODO: There may be a 'global sync problem' if many threads are trying
+ * to get an ra budget that is larger than the remaining readahead pages
+ * and reach here at exactly the same time. They will compute /a ret to
+ * consume the remaining pages, but will fail at atomic_add_return() and
+ * get a zero ra window, although there is still ra space remaining. - Jay */
+
+static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
+ struct ra_io_arg *ria,
+ unsigned long pages)
+{
+ struct ll_ra_info *ra = &sbi->ll_ra_info;
+ long ret;
+ ENTRY;
+
+ /* If read-ahead pages left are less than 1M, do not do read-ahead,
+ * otherwise it will form small read RPC(< 1M), which hurt server
+ * performance a lot. */
+ ret = min(ra->ra_max_pages - atomic_read(&ra->ra_cur_pages), pages);
+ if (ret < 0 || ret < min_t(long, PTLRPC_MAX_BRW_PAGES, pages))
+ GOTO(out, ret = 0);
+
+ /* If the non-strided (ria_pages == 0) readahead window
+ * (ria_start + ret) has grown across an RPC boundary, then trim
+ * readahead size by the amount beyond the RPC so it ends on an
+ * RPC boundary. If the readahead window is already ending on
+ * an RPC boundary (beyond_rpc == 0), or smaller than a full
+ * RPC (beyond_rpc < ret) the readahead size is unchanged.
+ * The (beyond_rpc != 0) check is skipped since the conditional
+ * branch is more expensive than subtracting zero from the result.
+ *
+ * Strided read is left unaligned to avoid small fragments beyond
+ * the RPC boundary from needing an extra read RPC. */
+ if (ria->ria_pages == 0) {
+ long beyond_rpc = (ria->ria_start + ret) % PTLRPC_MAX_BRW_PAGES;
+ if (/* beyond_rpc != 0 && */ beyond_rpc < ret)
+ ret -= beyond_rpc;
+ }
+
+ if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) {
+ atomic_sub(ret, &ra->ra_cur_pages);
+ ret = 0;
+ }
+
+out:
+ RETURN(ret);
+}
+
+void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
+{
+ struct ll_ra_info *ra = &sbi->ll_ra_info;
+ atomic_sub(len, &ra->ra_cur_pages);
+}
+
+static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which)
+{
+ LASSERTF(which >= 0 && which < _NR_RA_STAT, "which: %u\n", which);
+ lprocfs_counter_incr(sbi->ll_ra_stats, which);
+}
+
+void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(mapping->host);
+ ll_ra_stats_inc_sbi(sbi, which);
+}
+
+#define RAS_CDEBUG(ras) \
+ CDEBUG(D_READA, \
+ "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu r %lu ri %lu" \
+ "csr %lu sf %lu sp %lu sl %lu \n", \
+ ras->ras_last_readpage, ras->ras_consecutive_requests, \
+ ras->ras_consecutive_pages, ras->ras_window_start, \
+ ras->ras_window_len, ras->ras_next_readahead, \
+ ras->ras_requests, ras->ras_request_index, \
+ ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \
+ ras->ras_stride_pages, ras->ras_stride_length)
+
+static int index_in_window(unsigned long index, unsigned long point,
+ unsigned long before, unsigned long after)
+{
+ unsigned long start = point - before, end = point + after;
+
+ if (start > point)
+ start = 0;
+ if (end < point)
+ end = ~0;
+
+ return start <= index && index <= end;
+}
+
+static struct ll_readahead_state *ll_ras_get(struct file *f)
+{
+ struct ll_file_data *fd;
+
+ fd = LUSTRE_FPRIVATE(f);
+ return &fd->fd_ras;
+}
+
+void ll_ra_read_in(struct file *f, struct ll_ra_read *rar)
+{
+ struct ll_readahead_state *ras;
+
+ ras = ll_ras_get(f);
+
+ spin_lock(&ras->ras_lock);
+ ras->ras_requests++;
+ ras->ras_request_index = 0;
+ ras->ras_consecutive_requests++;
+ rar->lrr_reader = current;
+
+ list_add(&rar->lrr_linkage, &ras->ras_read_beads);
+ spin_unlock(&ras->ras_lock);
+}
+
+void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar)
+{
+ struct ll_readahead_state *ras;
+
+ ras = ll_ras_get(f);
+
+ spin_lock(&ras->ras_lock);
+ list_del_init(&rar->lrr_linkage);
+ spin_unlock(&ras->ras_lock);
+}
+
+static struct ll_ra_read *ll_ra_read_get_locked(struct ll_readahead_state *ras)
+{
+ struct ll_ra_read *scan;
+
+ list_for_each_entry(scan, &ras->ras_read_beads, lrr_linkage) {
+ if (scan->lrr_reader == current)
+ return scan;
+ }
+ return NULL;
+}
+
+struct ll_ra_read *ll_ra_read_get(struct file *f)
+{
+ struct ll_readahead_state *ras;
+ struct ll_ra_read *bead;
+
+ ras = ll_ras_get(f);
+
+ spin_lock(&ras->ras_lock);
+ bead = ll_ra_read_get_locked(ras);
+ spin_unlock(&ras->ras_lock);
+ return bead;
+}
+
+static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *queue, struct cl_page *page,
+ struct page *vmpage)
+{
+ struct ccc_page *cp;
+ int rc;
+
+ ENTRY;
+
+ rc = 0;
+ cl_page_assume(env, io, page);
+ lu_ref_add(&page->cp_reference, "ra", current);
+ cp = cl2ccc_page(cl_page_at(page, &vvp_device_type));
+ if (!cp->cpg_defer_uptodate && !PageUptodate(vmpage)) {
+ rc = cl_page_is_under_lock(env, io, page);
+ if (rc == -EBUSY) {
+ cp->cpg_defer_uptodate = 1;
+ cp->cpg_ra_used = 0;
+ cl_page_list_add(queue, page);
+ rc = 1;
+ } else {
+ cl_page_delete(env, page);
+ rc = -ENOLCK;
+ }
+ } else {
+ /* skip completed pages */
+ cl_page_unassume(env, io, page);
+ }
+ lu_ref_del(&page->cp_reference, "ra", current);
+ cl_page_put(env, page);
+ RETURN(rc);
+}
+
+/**
+ * Initiates read-ahead of a page with given index.
+ *
+ * \retval +ve: page was added to \a queue.
+ *
+ * \retval -ENOLCK: there is no extent lock for this part of a file, stop
+ * read-ahead.
+ *
+ * \retval -ve, 0: page wasn't added to \a queue for other reason.
+ */
+static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *queue,
+ pgoff_t index, struct address_space *mapping)
+{
+ struct page *vmpage;
+ struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
+ struct cl_page *page;
+ enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */
+ unsigned int gfp_mask;
+ int rc = 0;
+ const char *msg = NULL;
+
+ ENTRY;
+
+ gfp_mask = GFP_HIGHUSER & ~__GFP_WAIT;
+#ifdef __GFP_NOWARN
+ gfp_mask |= __GFP_NOWARN;
+#endif
+ vmpage = grab_cache_page_nowait(mapping, index);
+ if (vmpage != NULL) {
+ /* Check if vmpage was truncated or reclaimed */
+ if (vmpage->mapping == mapping) {
+ page = cl_page_find(env, clob, vmpage->index,
+ vmpage, CPT_CACHEABLE);
+ if (!IS_ERR(page)) {
+ rc = cl_read_ahead_page(env, io, queue,
+ page, vmpage);
+ if (rc == -ENOLCK) {
+ which = RA_STAT_FAILED_MATCH;
+ msg = "lock match failed";
+ }
+ } else {
+ which = RA_STAT_FAILED_GRAB_PAGE;
+ msg = "cl_page_find failed";
+ }
+ } else {
+ which = RA_STAT_WRONG_GRAB_PAGE;
+ msg = "g_c_p_n returned invalid page";
+ }
+ if (rc != 1)
+ unlock_page(vmpage);
+ page_cache_release(vmpage);
+ } else {
+ which = RA_STAT_FAILED_GRAB_PAGE;
+ msg = "g_c_p_n failed";
+ }
+ if (msg != NULL) {
+ ll_ra_stats_inc(mapping, which);
+ CDEBUG(D_READA, "%s\n", msg);
+ }
+ RETURN(rc);
+}
+
+#define RIA_DEBUG(ria) \
+ CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n", \
+ ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
+ ria->ria_pages)
+
+/* Limit this to the blocksize instead of PTLRPC_BRW_MAX_SIZE, since we don't
+ * know what the actual RPC size is. If this needs to change, it makes more
+ * sense to tune the i_blkbits value for the file based on the OSTs it is
+ * striped over, rather than having a constant value for all files here. */
+
+/* RAS_INCREASE_STEP should be (1UL << (inode->i_blkbits - PAGE_CACHE_SHIFT)).
+ * Temprarily set RAS_INCREASE_STEP to 1MB. After 4MB RPC is enabled
+ * by default, this should be adjusted corresponding with max_read_ahead_mb
+ * and max_read_ahead_per_file_mb otherwise the readahead budget can be used
+ * up quickly which will affect read performance siginificantly. See LU-2816 */
+#define RAS_INCREASE_STEP(inode) (ONE_MB_BRW_SIZE >> PAGE_CACHE_SHIFT)
+
+static inline int stride_io_mode(struct ll_readahead_state *ras)
+{
+ return ras->ras_consecutive_stride_requests > 1;
+}
+/* The function calculates how much pages will be read in
+ * [off, off + length], in such stride IO area,
+ * stride_offset = st_off, stride_lengh = st_len,
+ * stride_pages = st_pgs
+ *
+ * |------------------|*****|------------------|*****|------------|*****|....
+ * st_off
+ * |--- st_pgs ---|
+ * |----- st_len -----|
+ *
+ * How many pages it should read in such pattern
+ * |-------------------------------------------------------------|
+ * off
+ * |<------ length ------->|
+ *
+ * = |<----->| + |-------------------------------------| + |---|
+ * start_left st_pgs * i end_left
+ */
+static unsigned long
+stride_pg_count(pgoff_t st_off, unsigned long st_len, unsigned long st_pgs,
+ unsigned long off, unsigned long length)
+{
+ __u64 start = off > st_off ? off - st_off : 0;
+ __u64 end = off + length > st_off ? off + length - st_off : 0;
+ unsigned long start_left = 0;
+ unsigned long end_left = 0;
+ unsigned long pg_count;
+
+ if (st_len == 0 || length == 0 || end == 0)
+ return length;
+
+ start_left = do_div(start, st_len);
+ if (start_left < st_pgs)
+ start_left = st_pgs - start_left;
+ else
+ start_left = 0;
+
+ end_left = do_div(end, st_len);
+ if (end_left > st_pgs)
+ end_left = st_pgs;
+
+ CDEBUG(D_READA, "start "LPU64", end "LPU64" start_left %lu end_left %lu \n",
+ start, end, start_left, end_left);
+
+ if (start == end)
+ pg_count = end_left - (st_pgs - start_left);
+ else
+ pg_count = start_left + st_pgs * (end - start - 1) + end_left;
+
+ CDEBUG(D_READA, "st_off %lu, st_len %lu st_pgs %lu off %lu length %lu"
+ "pgcount %lu\n", st_off, st_len, st_pgs, off, length, pg_count);
+
+ return pg_count;
+}
+
+static int ria_page_count(struct ra_io_arg *ria)
+{
+ __u64 length = ria->ria_end >= ria->ria_start ?
+ ria->ria_end - ria->ria_start + 1 : 0;
+
+ return stride_pg_count(ria->ria_stoff, ria->ria_length,
+ ria->ria_pages, ria->ria_start,
+ length);
+}
+
+/*Check whether the index is in the defined ra-window */
+static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
+{
+ /* If ria_length == ria_pages, it means non-stride I/O mode,
+ * idx should always inside read-ahead window in this case
+ * For stride I/O mode, just check whether the idx is inside
+ * the ria_pages. */
+ return ria->ria_length == 0 || ria->ria_length == ria->ria_pages ||
+ (idx >= ria->ria_stoff && (idx - ria->ria_stoff) %
+ ria->ria_length < ria->ria_pages);
+}
+
+static int ll_read_ahead_pages(const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *queue,
+ struct ra_io_arg *ria,
+ unsigned long *reserved_pages,
+ struct address_space *mapping,
+ unsigned long *ra_end)
+{
+ int rc, count = 0, stride_ria;
+ unsigned long page_idx;
+
+ LASSERT(ria != NULL);
+ RIA_DEBUG(ria);
+
+ stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0;
+ for (page_idx = ria->ria_start; page_idx <= ria->ria_end &&
+ *reserved_pages > 0; page_idx++) {
+ if (ras_inside_ra_window(page_idx, ria)) {
+ /* If the page is inside the read-ahead window*/
+ rc = ll_read_ahead_page(env, io, queue,
+ page_idx, mapping);
+ if (rc == 1) {
+ (*reserved_pages)--;
+ count ++;
+ } else if (rc == -ENOLCK)
+ break;
+ } else if (stride_ria) {
+ /* If it is not in the read-ahead window, and it is
+ * read-ahead mode, then check whether it should skip
+ * the stride gap */
+ pgoff_t offset;
+ /* FIXME: This assertion only is valid when it is for
+ * forward read-ahead, it will be fixed when backward
+ * read-ahead is implemented */
+ LASSERTF(page_idx > ria->ria_stoff, "Invalid page_idx %lu"
+ "rs %lu re %lu ro %lu rl %lu rp %lu\n", page_idx,
+ ria->ria_start, ria->ria_end, ria->ria_stoff,
+ ria->ria_length, ria->ria_pages);
+ offset = page_idx - ria->ria_stoff;
+ offset = offset % (ria->ria_length);
+ if (offset > ria->ria_pages) {
+ page_idx += ria->ria_length - offset;
+ CDEBUG(D_READA, "i %lu skip %lu \n", page_idx,
+ ria->ria_length - offset);
+ continue;
+ }
+ }
+ }
+ *ra_end = page_idx;
+ return count;
+}
+
+int ll_readahead(const struct lu_env *env, struct cl_io *io,
+ struct ll_readahead_state *ras, struct address_space *mapping,
+ struct cl_page_list *queue, int flags)
+{
+ struct vvp_io *vio = vvp_env_io(env);
+ struct vvp_thread_info *vti = vvp_env_info(env);
+ struct cl_attr *attr = ccc_env_thread_attr(env);
+ unsigned long start = 0, end = 0, reserved;
+ unsigned long ra_end, len;
+ struct inode *inode;
+ struct ll_ra_read *bead;
+ struct ra_io_arg *ria = &vti->vti_ria;
+ struct ll_inode_info *lli;
+ struct cl_object *clob;
+ int ret = 0;
+ __u64 kms;
+ ENTRY;
+
+ inode = mapping->host;
+ lli = ll_i2info(inode);
+ clob = lli->lli_clob;
+
+ memset(ria, 0, sizeof *ria);
+
+ cl_object_attr_lock(clob);
+ ret = cl_object_attr_get(env, clob, attr);
+ cl_object_attr_unlock(clob);
+
+ if (ret != 0)
+ RETURN(ret);
+ kms = attr->cat_kms;
+ if (kms == 0) {
+ ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN);
+ RETURN(0);
+ }
+
+ spin_lock(&ras->ras_lock);
+ if (vio->cui_ra_window_set)
+ bead = &vio->cui_bead;
+ else
+ bead = NULL;
+
+ /* Enlarge the RA window to encompass the full read */
+ if (bead != NULL && ras->ras_window_start + ras->ras_window_len <
+ bead->lrr_start + bead->lrr_count) {
+ ras->ras_window_len = bead->lrr_start + bead->lrr_count -
+ ras->ras_window_start;
+ }
+ /* Reserve a part of the read-ahead window that we'll be issuing */
+ if (ras->ras_window_len) {
+ start = ras->ras_next_readahead;
+ end = ras->ras_window_start + ras->ras_window_len - 1;
+ }
+ if (end != 0) {
+ unsigned long rpc_boundary;
+ /*
+ * Align RA window to an optimal boundary.
+ *
+ * XXX This would be better to align to cl_max_pages_per_rpc
+ * instead of PTLRPC_MAX_BRW_PAGES, because the RPC size may
+ * be aligned to the RAID stripe size in the future and that
+ * is more important than the RPC size.
+ */
+ /* Note: we only trim the RPC, instead of extending the RPC
+ * to the boundary, so to avoid reading too much pages during
+ * random reading. */
+ rpc_boundary = ((end + 1) & (~(PTLRPC_MAX_BRW_PAGES - 1)));
+ if (rpc_boundary > 0)
+ rpc_boundary--;
+
+ if (rpc_boundary > start)
+ end = rpc_boundary;
+
+ /* Truncate RA window to end of file */
+ end = min(end, (unsigned long)((kms - 1) >> PAGE_CACHE_SHIFT));
+
+ ras->ras_next_readahead = max(end, end + 1);
+ RAS_CDEBUG(ras);
+ }
+ ria->ria_start = start;
+ ria->ria_end = end;
+ /* If stride I/O mode is detected, get stride window*/
+ if (stride_io_mode(ras)) {
+ ria->ria_stoff = ras->ras_stride_offset;
+ ria->ria_length = ras->ras_stride_length;
+ ria->ria_pages = ras->ras_stride_pages;
+ }
+ spin_unlock(&ras->ras_lock);
+
+ if (end == 0) {
+ ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW);
+ RETURN(0);
+ }
+ len = ria_page_count(ria);
+ if (len == 0)
+ RETURN(0);
+
+ reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len);
+ if (reserved < len)
+ ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT);
+
+ CDEBUG(D_READA, "reserved page %lu ra_cur %d ra_max %lu\n", reserved,
+ atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
+ ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
+
+ ret = ll_read_ahead_pages(env, io, queue,
+ ria, &reserved, mapping, &ra_end);
+
+ LASSERTF(reserved >= 0, "reserved %lu\n", reserved);
+ if (reserved != 0)
+ ll_ra_count_put(ll_i2sbi(inode), reserved);
+
+ if (ra_end == end + 1 && ra_end == (kms >> PAGE_CACHE_SHIFT))
+ ll_ra_stats_inc(mapping, RA_STAT_EOF);
+
+ /* if we didn't get to the end of the region we reserved from
+ * the ras we need to go back and update the ras so that the
+ * next read-ahead tries from where we left off. we only do so
+ * if the region we failed to issue read-ahead on is still ahead
+ * of the app and behind the next index to start read-ahead from */
+ CDEBUG(D_READA, "ra_end %lu end %lu stride end %lu \n",
+ ra_end, end, ria->ria_end);
+
+ if (ra_end != end + 1) {
+ spin_lock(&ras->ras_lock);
+ if (ra_end < ras->ras_next_readahead &&
+ index_in_window(ra_end, ras->ras_window_start, 0,
+ ras->ras_window_len)) {
+ ras->ras_next_readahead = ra_end;
+ RAS_CDEBUG(ras);
+ }
+ spin_unlock(&ras->ras_lock);
+ }
+
+ RETURN(ret);
+}
+
+static void ras_set_start(struct inode *inode, struct ll_readahead_state *ras,
+ unsigned long index)
+{
+ ras->ras_window_start = index & (~(RAS_INCREASE_STEP(inode) - 1));
+}
+
+/* called with the ras_lock held or from places where it doesn't matter */
+static void ras_reset(struct inode *inode, struct ll_readahead_state *ras,
+ unsigned long index)
+{
+ ras->ras_last_readpage = index;
+ ras->ras_consecutive_requests = 0;
+ ras->ras_consecutive_pages = 0;
+ ras->ras_window_len = 0;
+ ras_set_start(inode, ras, index);
+ ras->ras_next_readahead = max(ras->ras_window_start, index);
+
+ RAS_CDEBUG(ras);
+}
+
+/* called with the ras_lock held or from places where it doesn't matter */
+static void ras_stride_reset(struct ll_readahead_state *ras)
+{
+ ras->ras_consecutive_stride_requests = 0;
+ ras->ras_stride_length = 0;
+ ras->ras_stride_pages = 0;
+ RAS_CDEBUG(ras);
+}
+
+void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
+{
+ spin_lock_init(&ras->ras_lock);
+ ras_reset(inode, ras, 0);
+ ras->ras_requests = 0;
+ INIT_LIST_HEAD(&ras->ras_read_beads);
+}
+
+/*
+ * Check whether the read request is in the stride window.
+ * If it is in the stride window, return 1, otherwise return 0.
+ */
+static int index_in_stride_window(struct ll_readahead_state *ras,
+ unsigned long index)
+{
+ unsigned long stride_gap;
+
+ if (ras->ras_stride_length == 0 || ras->ras_stride_pages == 0 ||
+ ras->ras_stride_pages == ras->ras_stride_length)
+ return 0;
+
+ stride_gap = index - ras->ras_last_readpage - 1;
+
+ /* If it is contiguous read */
+ if (stride_gap == 0)
+ return ras->ras_consecutive_pages + 1 <= ras->ras_stride_pages;
+
+ /* Otherwise check the stride by itself */
+ return (ras->ras_stride_length - ras->ras_stride_pages) == stride_gap &&
+ ras->ras_consecutive_pages == ras->ras_stride_pages;
+}
+
+static void ras_update_stride_detector(struct ll_readahead_state *ras,
+ unsigned long index)
+{
+ unsigned long stride_gap = index - ras->ras_last_readpage - 1;
+
+ if (!stride_io_mode(ras) && (stride_gap != 0 ||
+ ras->ras_consecutive_stride_requests == 0)) {
+ ras->ras_stride_pages = ras->ras_consecutive_pages;
+ ras->ras_stride_length = stride_gap +ras->ras_consecutive_pages;
+ }
+ LASSERT(ras->ras_request_index == 0);
+ LASSERT(ras->ras_consecutive_stride_requests == 0);
+
+ if (index <= ras->ras_last_readpage) {
+ /*Reset stride window for forward read*/
+ ras_stride_reset(ras);
+ return;
+ }
+
+ ras->ras_stride_pages = ras->ras_consecutive_pages;
+ ras->ras_stride_length = stride_gap +ras->ras_consecutive_pages;
+
+ RAS_CDEBUG(ras);
+ return;
+}
+
+static unsigned long
+stride_page_count(struct ll_readahead_state *ras, unsigned long len)
+{
+ return stride_pg_count(ras->ras_stride_offset, ras->ras_stride_length,
+ ras->ras_stride_pages, ras->ras_stride_offset,
+ len);
+}
+
+/* Stride Read-ahead window will be increased inc_len according to
+ * stride I/O pattern */
+static void ras_stride_increase_window(struct ll_readahead_state *ras,
+ struct ll_ra_info *ra,
+ unsigned long inc_len)
+{
+ unsigned long left, step, window_len;
+ unsigned long stride_len;
+
+ LASSERT(ras->ras_stride_length > 0);
+ LASSERTF(ras->ras_window_start + ras->ras_window_len
+ >= ras->ras_stride_offset, "window_start %lu, window_len %lu"
+ " stride_offset %lu\n", ras->ras_window_start,
+ ras->ras_window_len, ras->ras_stride_offset);
+
+ stride_len = ras->ras_window_start + ras->ras_window_len -
+ ras->ras_stride_offset;
+
+ left = stride_len % ras->ras_stride_length;
+ window_len = ras->ras_window_len - left;
+
+ if (left < ras->ras_stride_pages)
+ left += inc_len;
+ else
+ left = ras->ras_stride_pages + inc_len;
+
+ LASSERT(ras->ras_stride_pages != 0);
+
+ step = left / ras->ras_stride_pages;
+ left %= ras->ras_stride_pages;
+
+ window_len += step * ras->ras_stride_length + left;
+
+ if (stride_page_count(ras, window_len) <= ra->ra_max_pages_per_file)
+ ras->ras_window_len = window_len;
+
+ RAS_CDEBUG(ras);
+}
+
+static void ras_increase_window(struct inode *inode,
+ struct ll_readahead_state *ras,
+ struct ll_ra_info *ra)
+{
+ /* The stretch of ra-window should be aligned with max rpc_size
+ * but current clio architecture does not support retrieve such
+ * information from lower layer. FIXME later
+ */
+ if (stride_io_mode(ras))
+ ras_stride_increase_window(ras, ra, RAS_INCREASE_STEP(inode));
+ else
+ ras->ras_window_len = min(ras->ras_window_len +
+ RAS_INCREASE_STEP(inode),
+ ra->ra_max_pages_per_file);
+}
+
+void ras_update(struct ll_sb_info *sbi, struct inode *inode,
+ struct ll_readahead_state *ras, unsigned long index,
+ unsigned hit)
+{
+ struct ll_ra_info *ra = &sbi->ll_ra_info;
+ int zero = 0, stride_detect = 0, ra_miss = 0;
+ ENTRY;
+
+ spin_lock(&ras->ras_lock);
+
+ ll_ra_stats_inc_sbi(sbi, hit ? RA_STAT_HIT : RA_STAT_MISS);
+
+ /* reset the read-ahead window in two cases. First when the app seeks
+ * or reads to some other part of the file. Secondly if we get a
+ * read-ahead miss that we think we've previously issued. This can
+ * be a symptom of there being so many read-ahead pages that the VM is
+ * reclaiming it before we get to it. */
+ if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) {
+ zero = 1;
+ ll_ra_stats_inc_sbi(sbi, RA_STAT_DISTANT_READPAGE);
+ } else if (!hit && ras->ras_window_len &&
+ index < ras->ras_next_readahead &&
+ index_in_window(index, ras->ras_window_start, 0,
+ ras->ras_window_len)) {
+ ra_miss = 1;
+ ll_ra_stats_inc_sbi(sbi, RA_STAT_MISS_IN_WINDOW);
+ }
+
+ /* On the second access to a file smaller than the tunable
+ * ra_max_read_ahead_whole_pages trigger RA on all pages in the
+ * file up to ra_max_pages_per_file. This is simply a best effort
+ * and only occurs once per open file. Normal RA behavior is reverted
+ * to for subsequent IO. The mmap case does not increment
+ * ras_requests and thus can never trigger this behavior. */
+ if (ras->ras_requests == 2 && !ras->ras_request_index) {
+ __u64 kms_pages;
+
+ kms_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
+
+ CDEBUG(D_READA, "kmsp "LPU64" mwp %lu mp %lu\n", kms_pages,
+ ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages_per_file);
+
+ if (kms_pages &&
+ kms_pages <= ra->ra_max_read_ahead_whole_pages) {
+ ras->ras_window_start = 0;
+ ras->ras_last_readpage = 0;
+ ras->ras_next_readahead = 0;
+ ras->ras_window_len = min(ra->ra_max_pages_per_file,
+ ra->ra_max_read_ahead_whole_pages);
+ GOTO(out_unlock, 0);
+ }
+ }
+ if (zero) {
+ /* check whether it is in stride I/O mode*/
+ if (!index_in_stride_window(ras, index)) {
+ if (ras->ras_consecutive_stride_requests == 0 &&
+ ras->ras_request_index == 0) {
+ ras_update_stride_detector(ras, index);
+ ras->ras_consecutive_stride_requests++;
+ } else {
+ ras_stride_reset(ras);
+ }
+ ras_reset(inode, ras, index);
+ ras->ras_consecutive_pages++;
+ GOTO(out_unlock, 0);
+ } else {
+ ras->ras_consecutive_pages = 0;
+ ras->ras_consecutive_requests = 0;
+ if (++ras->ras_consecutive_stride_requests > 1)
+ stride_detect = 1;
+ RAS_CDEBUG(ras);
+ }
+ } else {
+ if (ra_miss) {
+ if (index_in_stride_window(ras, index) &&
+ stride_io_mode(ras)) {
+ /*If stride-RA hit cache miss, the stride dector
+ *will not be reset to avoid the overhead of
+ *redetecting read-ahead mode */
+ if (index != ras->ras_last_readpage + 1)
+ ras->ras_consecutive_pages = 0;
+ ras_reset(inode, ras, index);
+ RAS_CDEBUG(ras);
+ } else {
+ /* Reset both stride window and normal RA
+ * window */
+ ras_reset(inode, ras, index);
+ ras->ras_consecutive_pages++;
+ ras_stride_reset(ras);
+ GOTO(out_unlock, 0);
+ }
+ } else if (stride_io_mode(ras)) {
+ /* If this is contiguous read but in stride I/O mode
+ * currently, check whether stride step still is valid,
+ * if invalid, it will reset the stride ra window*/
+ if (!index_in_stride_window(ras, index)) {
+ /* Shrink stride read-ahead window to be zero */
+ ras_stride_reset(ras);
+ ras->ras_window_len = 0;
+ ras->ras_next_readahead = index;
+ }
+ }
+ }
+ ras->ras_consecutive_pages++;
+ ras->ras_last_readpage = index;
+ ras_set_start(inode, ras, index);
+
+ if (stride_io_mode(ras))
+ /* Since stride readahead is sentivite to the offset
+ * of read-ahead, so we use original offset here,
+ * instead of ras_window_start, which is RPC aligned */
+ ras->ras_next_readahead = max(index, ras->ras_next_readahead);
+ else
+ ras->ras_next_readahead = max(ras->ras_window_start,
+ ras->ras_next_readahead);
+ RAS_CDEBUG(ras);
+
+ /* Trigger RA in the mmap case where ras_consecutive_requests
+ * is not incremented and thus can't be used to trigger RA */
+ if (!ras->ras_window_len && ras->ras_consecutive_pages == 4) {
+ ras->ras_window_len = RAS_INCREASE_STEP(inode);
+ GOTO(out_unlock, 0);
+ }
+
+ /* Initially reset the stride window offset to next_readahead*/
+ if (ras->ras_consecutive_stride_requests == 2 && stride_detect) {
+ /**
+ * Once stride IO mode is detected, next_readahead should be
+ * reset to make sure next_readahead > stride offset
+ */
+ ras->ras_next_readahead = max(index, ras->ras_next_readahead);
+ ras->ras_stride_offset = index;
+ ras->ras_window_len = RAS_INCREASE_STEP(inode);
+ }
+
+ /* The initial ras_window_len is set to the request size. To avoid
+ * uselessly reading and discarding pages for random IO the window is
+ * only increased once per consecutive request received. */
+ if ((ras->ras_consecutive_requests > 1 || stride_detect) &&
+ !ras->ras_request_index)
+ ras_increase_window(inode, ras, ra);
+ EXIT;
+out_unlock:
+ RAS_CDEBUG(ras);
+ ras->ras_request_index++;
+ spin_unlock(&ras->ras_lock);
+ return;
+}
+
+int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
+{
+ struct inode *inode = vmpage->mapping->host;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct lu_env *env;
+ struct cl_io *io;
+ struct cl_page *page;
+ struct cl_object *clob;
+ struct cl_env_nest nest;
+ bool redirtied = false;
+ bool unlocked = false;
+ int result;
+ ENTRY;
+
+ LASSERT(PageLocked(vmpage));
+ LASSERT(!PageWriteback(vmpage));
+
+ LASSERT(ll_i2dtexp(inode) != NULL);
+
+ env = cl_env_nested_get(&nest);
+ if (IS_ERR(env))
+ GOTO(out, result = PTR_ERR(env));
+
+ clob = ll_i2info(inode)->lli_clob;
+ LASSERT(clob != NULL);
+
+ io = ccc_env_thread_io(env);
+ io->ci_obj = clob;
+ io->ci_ignore_layout = 1;
+ result = cl_io_init(env, io, CIT_MISC, clob);
+ if (result == 0) {
+ page = cl_page_find(env, clob, vmpage->index,
+ vmpage, CPT_CACHEABLE);
+ if (!IS_ERR(page)) {
+ lu_ref_add(&page->cp_reference, "writepage",
+ current);
+ cl_page_assume(env, io, page);
+ result = cl_page_flush(env, io, page);
+ if (result != 0) {
+ /*
+ * Re-dirty page on error so it retries write,
+ * but not in case when IO has actually
+ * occurred and completed with an error.
+ */
+ if (!PageError(vmpage)) {
+ redirty_page_for_writepage(wbc, vmpage);
+ result = 0;
+ redirtied = true;
+ }
+ }
+ cl_page_disown(env, io, page);
+ unlocked = true;
+ lu_ref_del(&page->cp_reference,
+ "writepage", current);
+ cl_page_put(env, page);
+ } else {
+ result = PTR_ERR(page);
+ }
+ }
+ cl_io_fini(env, io);
+
+ if (redirtied && wbc->sync_mode == WB_SYNC_ALL) {
+ loff_t offset = cl_offset(clob, vmpage->index);
+
+ /* Flush page failed because the extent is being written out.
+ * Wait for the write of extent to be finished to avoid
+ * breaking kernel which assumes ->writepage should mark
+ * PageWriteback or clean the page. */
+ result = cl_sync_file_range(inode, offset,
+ offset + PAGE_CACHE_SIZE - 1,
+ CL_FSYNC_LOCAL, 1);
+ if (result > 0) {
+ /* actually we may have written more than one page.
+ * decreasing this page because the caller will count
+ * it. */
+ wbc->nr_to_write -= result - 1;
+ result = 0;
+ }
+ }
+
+ cl_env_nested_put(&nest, env);
+ GOTO(out, result);
+
+out:
+ if (result < 0) {
+ if (!lli->lli_async_rc)
+ lli->lli_async_rc = result;
+ SetPageError(vmpage);
+ if (!unlocked)
+ unlock_page(vmpage);
+ }
+ return result;
+}
+
+int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+ struct inode *inode = mapping->host;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ loff_t start;
+ loff_t end;
+ enum cl_fsync_mode mode;
+ int range_whole = 0;
+ int result;
+ int ignore_layout = 0;
+ ENTRY;
+
+ if (wbc->range_cyclic) {
+ start = mapping->writeback_index << PAGE_CACHE_SHIFT;
+ end = OBD_OBJECT_EOF;
+ } else {
+ start = wbc->range_start;
+ end = wbc->range_end;
+ if (end == LLONG_MAX) {
+ end = OBD_OBJECT_EOF;
+ range_whole = start == 0;
+ }
+ }
+
+ mode = CL_FSYNC_NONE;
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ mode = CL_FSYNC_LOCAL;
+
+ if (sbi->ll_umounting)
+ /* if the mountpoint is being umounted, all pages have to be
+ * evicted to avoid hitting LBUG when truncate_inode_pages()
+ * is called later on. */
+ ignore_layout = 1;
+ result = cl_sync_file_range(inode, start, end, mode, ignore_layout);
+ if (result > 0) {
+ wbc->nr_to_write -= result;
+ result = 0;
+ }
+
+ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) {
+ if (end == OBD_OBJECT_EOF)
+ end = i_size_read(inode);
+ mapping->writeback_index = (end >> PAGE_CACHE_SHIFT) + 1;
+ }
+ RETURN(result);
+}
+
+int ll_readpage(struct file *file, struct page *vmpage)
+{
+ struct ll_cl_context *lcc;
+ int result;
+ ENTRY;
+
+ lcc = ll_cl_init(file, vmpage, 0);
+ if (!IS_ERR(lcc)) {
+ struct lu_env *env = lcc->lcc_env;
+ struct cl_io *io = lcc->lcc_io;
+ struct cl_page *page = lcc->lcc_page;
+
+ LASSERT(page->cp_type == CPT_CACHEABLE);
+ if (likely(!PageUptodate(vmpage))) {
+ cl_page_assume(env, io, page);
+ result = cl_io_read_page(env, io, page);
+ } else {
+ /* Page from a non-object file. */
+ unlock_page(vmpage);
+ result = 0;
+ }
+ ll_cl_fini(lcc);
+ } else {
+ unlock_page(vmpage);
+ result = PTR_ERR(lcc);
+ }
+ RETURN(result);
+}
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
new file mode 100644
index 000000000000..27e4e64bc1e7
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -0,0 +1,586 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lustre/llite/rw26.c
+ *
+ * Lustre Lite I/O page cache routines for the 2.5/2.6 kernel version
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <asm/uaccess.h>
+
+#include <linux/migrate.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <lustre_lite.h>
+#include "llite_internal.h"
+#include <linux/lustre_compat25.h>
+
+/**
+ * Implements Linux VM address_space::invalidatepage() method. This method is
+ * called when the page is truncate from a file, either as a result of
+ * explicit truncate, or when inode is removed from memory (as a result of
+ * final iput(), umount, or memory pressure induced icache shrinking).
+ *
+ * [0, offset] bytes of the page remain valid (this is for a case of not-page
+ * aligned truncate). Lustre leaves partially truncated page in the cache,
+ * relying on struct inode::i_size to limit further accesses.
+ */
+static void ll_invalidatepage(struct page *vmpage, unsigned long offset)
+{
+ struct inode *inode;
+ struct lu_env *env;
+ struct cl_page *page;
+ struct cl_object *obj;
+
+ int refcheck;
+
+ LASSERT(PageLocked(vmpage));
+ LASSERT(!PageWriteback(vmpage));
+
+ /*
+ * It is safe to not check anything in invalidatepage/releasepage
+ * below because they are run with page locked and all our io is
+ * happening with locked page too
+ */
+ if (offset == 0) {
+ env = cl_env_get(&refcheck);
+ if (!IS_ERR(env)) {
+ inode = vmpage->mapping->host;
+ obj = ll_i2info(inode)->lli_clob;
+ if (obj != NULL) {
+ page = cl_vmpage_page(vmpage, obj);
+ if (page != NULL) {
+ lu_ref_add(&page->cp_reference,
+ "delete", vmpage);
+ cl_page_delete(env, page);
+ lu_ref_del(&page->cp_reference,
+ "delete", vmpage);
+ cl_page_put(env, page);
+ }
+ } else
+ LASSERT(vmpage->private == 0);
+ cl_env_put(env, &refcheck);
+ }
+ }
+}
+
+#ifdef HAVE_RELEASEPAGE_WITH_INT
+#define RELEASEPAGE_ARG_TYPE int
+#else
+#define RELEASEPAGE_ARG_TYPE gfp_t
+#endif
+static int ll_releasepage(struct page *vmpage, RELEASEPAGE_ARG_TYPE gfp_mask)
+{
+ struct cl_env_nest nest;
+ struct lu_env *env;
+ struct cl_object *obj;
+ struct cl_page *page;
+ struct address_space *mapping;
+ int result;
+
+ LASSERT(PageLocked(vmpage));
+ if (PageWriteback(vmpage) || PageDirty(vmpage))
+ return 0;
+
+ mapping = vmpage->mapping;
+ if (mapping == NULL)
+ return 1;
+
+ obj = ll_i2info(mapping->host)->lli_clob;
+ if (obj == NULL)
+ return 1;
+
+ /* 1 for page allocator, 1 for cl_page and 1 for page cache */
+ if (page_count(vmpage) > 3)
+ return 0;
+
+ /* TODO: determine what gfp should be used by @gfp_mask. */
+ env = cl_env_nested_get(&nest);
+ if (IS_ERR(env))
+ /* If we can't allocate an env we won't call cl_page_put()
+ * later on which further means it's impossible to drop
+ * page refcount by cl_page, so ask kernel to not free
+ * this page. */
+ return 0;
+
+ page = cl_vmpage_page(vmpage, obj);
+ result = page == NULL;
+ if (page != NULL) {
+ if (!cl_page_in_use(page)) {
+ result = 1;
+ cl_page_delete(env, page);
+ }
+ cl_page_put(env, page);
+ }
+ cl_env_nested_put(&nest, env);
+ return result;
+}
+
+static int ll_set_page_dirty(struct page *vmpage)
+{
+#if 0
+ struct cl_page *page = vvp_vmpage_page_transient(vmpage);
+ struct vvp_object *obj = cl_inode2vvp(vmpage->mapping->host);
+ struct vvp_page *cpg;
+
+ /*
+ * XXX should page method be called here?
+ */
+ LASSERT(&obj->co_cl == page->cp_obj);
+ cpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
+ /*
+ * XXX cannot do much here, because page is possibly not locked:
+ * sys_munmap()->...
+ * ->unmap_page_range()->zap_pte_range()->set_page_dirty().
+ */
+ vvp_write_pending(obj, cpg);
+#endif
+ RETURN(__set_page_dirty_nobuffers(vmpage));
+}
+
+#define MAX_DIRECTIO_SIZE 2*1024*1024*1024UL
+
+static inline int ll_get_user_pages(int rw, unsigned long user_addr,
+ size_t size, struct page ***pages,
+ int *max_pages)
+{
+ int result = -ENOMEM;
+
+ /* set an arbitrary limit to prevent arithmetic overflow */
+ if (size > MAX_DIRECTIO_SIZE) {
+ *pages = NULL;
+ return -EFBIG;
+ }
+
+ *max_pages = (user_addr + size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ *max_pages -= user_addr >> PAGE_CACHE_SHIFT;
+
+ OBD_ALLOC_LARGE(*pages, *max_pages * sizeof(**pages));
+ if (*pages) {
+ down_read(&current->mm->mmap_sem);
+ result = get_user_pages(current, current->mm, user_addr,
+ *max_pages, (rw == READ), 0, *pages,
+ NULL);
+ up_read(&current->mm->mmap_sem);
+ if (unlikely(result <= 0))
+ OBD_FREE_LARGE(*pages, *max_pages * sizeof(**pages));
+ }
+
+ return result;
+}
+
+/* ll_free_user_pages - tear down page struct array
+ * @pages: array of page struct pointers underlying target buffer */
+static void ll_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+ int i;
+
+ for (i = 0; i < npages; i++) {
+ if (pages[i] == NULL)
+ break;
+ if (do_dirty)
+ set_page_dirty_lock(pages[i]);
+ page_cache_release(pages[i]);
+ }
+
+ OBD_FREE_LARGE(pages, npages * sizeof(*pages));
+}
+
+ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
+ int rw, struct inode *inode,
+ struct ll_dio_pages *pv)
+{
+ struct cl_page *clp;
+ struct cl_2queue *queue;
+ struct cl_object *obj = io->ci_obj;
+ int i;
+ ssize_t rc = 0;
+ loff_t file_offset = pv->ldp_start_offset;
+ long size = pv->ldp_size;
+ int page_count = pv->ldp_nr;
+ struct page **pages = pv->ldp_pages;
+ long page_size = cl_page_size(obj);
+ bool do_io;
+ int io_pages = 0;
+ ENTRY;
+
+ queue = &io->ci_queue;
+ cl_2queue_init(queue);
+ for (i = 0; i < page_count; i++) {
+ if (pv->ldp_offsets)
+ file_offset = pv->ldp_offsets[i];
+
+ LASSERT(!(file_offset & (page_size - 1)));
+ clp = cl_page_find(env, obj, cl_index(obj, file_offset),
+ pv->ldp_pages[i], CPT_TRANSIENT);
+ if (IS_ERR(clp)) {
+ rc = PTR_ERR(clp);
+ break;
+ }
+
+ rc = cl_page_own(env, io, clp);
+ if (rc) {
+ LASSERT(clp->cp_state == CPS_FREEING);
+ cl_page_put(env, clp);
+ break;
+ }
+
+ do_io = true;
+
+ /* check the page type: if the page is a host page, then do
+ * write directly */
+ if (clp->cp_type == CPT_CACHEABLE) {
+ struct page *vmpage = cl_page_vmpage(env, clp);
+ struct page *src_page;
+ struct page *dst_page;
+ void *src;
+ void *dst;
+
+ src_page = (rw == WRITE) ? pages[i] : vmpage;
+ dst_page = (rw == WRITE) ? vmpage : pages[i];
+
+ src = ll_kmap_atomic(src_page, KM_USER0);
+ dst = ll_kmap_atomic(dst_page, KM_USER1);
+ memcpy(dst, src, min(page_size, size));
+ ll_kunmap_atomic(dst, KM_USER1);
+ ll_kunmap_atomic(src, KM_USER0);
+
+ /* make sure page will be added to the transfer by
+ * cl_io_submit()->...->vvp_page_prep_write(). */
+ if (rw == WRITE)
+ set_page_dirty(vmpage);
+
+ if (rw == READ) {
+ /* do not issue the page for read, since it
+ * may reread a ra page which has NOT uptodate
+ * bit set. */
+ cl_page_disown(env, io, clp);
+ do_io = false;
+ }
+ }
+
+ if (likely(do_io)) {
+ cl_2queue_add(queue, clp);
+
+ /*
+ * Set page clip to tell transfer formation engine
+ * that page has to be sent even if it is beyond KMS.
+ */
+ cl_page_clip(env, clp, 0, min(size, page_size));
+
+ ++io_pages;
+ }
+
+ /* drop the reference count for cl_page_find */
+ cl_page_put(env, clp);
+ size -= page_size;
+ file_offset += page_size;
+ }
+
+ if (rc == 0 && io_pages) {
+ rc = cl_io_submit_sync(env, io,
+ rw == READ ? CRT_READ : CRT_WRITE,
+ queue, 0);
+ }
+ if (rc == 0)
+ rc = pv->ldp_size;
+
+ cl_2queue_discard(env, io, queue);
+ cl_2queue_disown(env, io, queue);
+ cl_2queue_fini(env, queue);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ll_direct_rw_pages);
+
+static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
+ int rw, struct inode *inode,
+ struct address_space *mapping,
+ size_t size, loff_t file_offset,
+ struct page **pages, int page_count)
+{
+ struct ll_dio_pages pvec = { .ldp_pages = pages,
+ .ldp_nr = page_count,
+ .ldp_size = size,
+ .ldp_offsets = NULL,
+ .ldp_start_offset = file_offset
+ };
+
+ return ll_direct_rw_pages(env, io, rw, inode, &pvec);
+}
+
+#ifdef KMALLOC_MAX_SIZE
+#define MAX_MALLOC KMALLOC_MAX_SIZE
+#else
+#define MAX_MALLOC (128 * 1024)
+#endif
+
+/* This is the maximum size of a single O_DIRECT request, based on the
+ * kmalloc limit. We need to fit all of the brw_page structs, each one
+ * representing PAGE_SIZE worth of user data, into a single buffer, and
+ * then truncate this to be a full-sized RPC. For 4kB PAGE_SIZE this is
+ * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc. */
+#define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \
+ ~(DT_MAX_BRW_SIZE - 1))
+static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
+ const struct iovec *iov, loff_t file_offset,
+ unsigned long nr_segs)
+{
+ struct lu_env *env;
+ struct cl_io *io;
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ struct ccc_object *obj = cl_inode2ccc(inode);
+ long count = iov_length(iov, nr_segs);
+ long tot_bytes = 0, result = 0;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ unsigned long seg = 0;
+ long size = MAX_DIO_SIZE;
+ int refcheck;
+ ENTRY;
+
+ if (!lli->lli_has_smd)
+ RETURN(-EBADF);
+
+ /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
+ if ((file_offset & ~CFS_PAGE_MASK) || (count & ~CFS_PAGE_MASK))
+ RETURN(-EINVAL);
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), size=%lu (max %lu), "
+ "offset=%lld=%llx, pages %lu (max %lu)\n",
+ inode->i_ino, inode->i_generation, inode, count, MAX_DIO_SIZE,
+ file_offset, file_offset, count >> PAGE_CACHE_SHIFT,
+ MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
+
+ /* Check that all user buffers are aligned as well */
+ for (seg = 0; seg < nr_segs; seg++) {
+ if (((unsigned long)iov[seg].iov_base & ~CFS_PAGE_MASK) ||
+ (iov[seg].iov_len & ~CFS_PAGE_MASK))
+ RETURN(-EINVAL);
+ }
+
+ env = cl_env_get(&refcheck);
+ LASSERT(!IS_ERR(env));
+ io = ccc_env_io(env)->cui_cl.cis_io;
+ LASSERT(io != NULL);
+
+ /* 0. Need locking between buffered and direct access. and race with
+ * size changing by concurrent truncates and writes.
+ * 1. Need inode mutex to operate transient pages.
+ */
+ if (rw == READ)
+ mutex_lock(&inode->i_mutex);
+
+ LASSERT(obj->cob_transient_pages == 0);
+ for (seg = 0; seg < nr_segs; seg++) {
+ long iov_left = iov[seg].iov_len;
+ unsigned long user_addr = (unsigned long)iov[seg].iov_base;
+
+ if (rw == READ) {
+ if (file_offset >= i_size_read(inode))
+ break;
+ if (file_offset + iov_left > i_size_read(inode))
+ iov_left = i_size_read(inode) - file_offset;
+ }
+
+ while (iov_left > 0) {
+ struct page **pages;
+ int page_count, max_pages = 0;
+ long bytes;
+
+ bytes = min(size, iov_left);
+ page_count = ll_get_user_pages(rw, user_addr, bytes,
+ &pages, &max_pages);
+ if (likely(page_count > 0)) {
+ if (unlikely(page_count < max_pages))
+ bytes = page_count << PAGE_CACHE_SHIFT;
+ result = ll_direct_IO_26_seg(env, io, rw, inode,
+ file->f_mapping,
+ bytes, file_offset,
+ pages, page_count);
+ ll_free_user_pages(pages, max_pages, rw==READ);
+ } else if (page_count == 0) {
+ GOTO(out, result = -EFAULT);
+ } else {
+ result = page_count;
+ }
+ if (unlikely(result <= 0)) {
+ /* If we can't allocate a large enough buffer
+ * for the request, shrink it to a smaller
+ * PAGE_SIZE multiple and try again.
+ * We should always be able to kmalloc for a
+ * page worth of page pointers = 4MB on i386. */
+ if (result == -ENOMEM &&
+ size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
+ PAGE_CACHE_SIZE) {
+ size = ((((size / 2) - 1) |
+ ~CFS_PAGE_MASK) + 1) &
+ CFS_PAGE_MASK;
+ CDEBUG(D_VFSTRACE,"DIO size now %lu\n",
+ size);
+ continue;
+ }
+
+ GOTO(out, result);
+ }
+
+ tot_bytes += result;
+ file_offset += result;
+ iov_left -= result;
+ user_addr += result;
+ }
+ }
+out:
+ LASSERT(obj->cob_transient_pages == 0);
+ if (rw == READ)
+ mutex_unlock(&inode->i_mutex);
+
+ if (tot_bytes > 0) {
+ if (rw == WRITE) {
+ struct lov_stripe_md *lsm;
+
+ lsm = ccc_inode_lsm_get(inode);
+ LASSERT(lsm != NULL);
+ lov_stripe_lock(lsm);
+ obd_adjust_kms(ll_i2dtexp(inode), lsm, file_offset, 0);
+ lov_stripe_unlock(lsm);
+ ccc_inode_lsm_put(inode, lsm);
+ }
+ }
+
+ cl_env_put(env, &refcheck);
+ RETURN(tot_bytes ? : result);
+}
+
+static int ll_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
+{
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ struct page *page;
+ int rc;
+ unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ ENTRY;
+
+ page = grab_cache_page_write_begin(mapping, index, flags);
+ if (!page)
+ RETURN(-ENOMEM);
+
+ *pagep = page;
+
+ rc = ll_prepare_write(file, page, from, from + len);
+ if (rc) {
+ unlock_page(page);
+ page_cache_release(page);
+ }
+ RETURN(rc);
+}
+
+static int ll_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ int rc;
+
+ rc = ll_commit_write(file, page, from, from + copied);
+ unlock_page(page);
+ page_cache_release(page);
+
+ return rc ?: copied;
+}
+
+#ifdef CONFIG_MIGRATION
+int ll_migratepage(struct address_space *mapping,
+ struct page *newpage, struct page *page
+ , enum migrate_mode mode
+ )
+{
+ /* Always fail page migration until we have a proper implementation */
+ return -EIO;
+}
+#endif
+
+#ifndef MS_HAS_NEW_AOPS
+struct address_space_operations ll_aops = {
+ .readpage = ll_readpage,
+// .readpages = ll_readpages,
+ .direct_IO = ll_direct_IO_26,
+ .writepage = ll_writepage,
+ .writepages = ll_writepages,
+ .set_page_dirty = ll_set_page_dirty,
+ .write_begin = ll_write_begin,
+ .write_end = ll_write_end,
+ .invalidatepage = ll_invalidatepage,
+ .releasepage = (void *)ll_releasepage,
+#ifdef CONFIG_MIGRATION
+ .migratepage = ll_migratepage,
+#endif
+ .bmap = NULL
+};
+#else
+struct address_space_operations_ext ll_aops = {
+ .orig_aops.readpage = ll_readpage,
+// .orig_aops.readpages = ll_readpages,
+ .orig_aops.direct_IO = ll_direct_IO_26,
+ .orig_aops.writepage = ll_writepage,
+ .orig_aops.writepages = ll_writepages,
+ .orig_aops.set_page_dirty = ll_set_page_dirty,
+ .orig_aops.prepare_write = ll_prepare_write,
+ .orig_aops.commit_write = ll_commit_write,
+ .orig_aops.invalidatepage = ll_invalidatepage,
+ .orig_aops.releasepage = ll_releasepage,
+#ifdef CONFIG_MIGRATION
+ .orig_aops.migratepage = ll_migratepage,
+#endif
+ .orig_aops.bmap = NULL,
+ .write_begin = ll_write_begin,
+ .write_end = ll_write_end
+};
+#endif
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c
new file mode 100644
index 000000000000..7747f8f2079d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/statahead.c
@@ -0,0 +1,1722 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <obd_support.h>
+#include <lustre_lite.h>
+#include <lustre_dlm.h>
+#include "llite_internal.h"
+
+#define SA_OMITTED_ENTRY_MAX 8ULL
+
+typedef enum {
+ /** negative values are for error cases */
+ SA_ENTRY_INIT = 0, /** init entry */
+ SA_ENTRY_SUCC = 1, /** stat succeed */
+ SA_ENTRY_INVA = 2, /** invalid entry */
+ SA_ENTRY_DEST = 3, /** entry to be destroyed */
+} se_stat_t;
+
+struct ll_sa_entry {
+ /* link into sai->sai_entries */
+ struct list_head se_link;
+ /* link into sai->sai_entries_{received,stated} */
+ struct list_head se_list;
+ /* link into sai hash table locally */
+ struct list_head se_hash;
+ /* entry reference count */
+ atomic_t se_refcount;
+ /* entry index in the sai */
+ __u64 se_index;
+ /* low layer ldlm lock handle */
+ __u64 se_handle;
+ /* entry status */
+ se_stat_t se_stat;
+ /* entry size, contains name */
+ int se_size;
+ /* pointer to async getattr enqueue info */
+ struct md_enqueue_info *se_minfo;
+ /* pointer to the async getattr request */
+ struct ptlrpc_request *se_req;
+ /* pointer to the target inode */
+ struct inode *se_inode;
+ /* entry name */
+ struct qstr se_qstr;
+};
+
+static unsigned int sai_generation = 0;
+static DEFINE_SPINLOCK(sai_generation_lock);
+
+static inline int ll_sa_entry_unhashed(struct ll_sa_entry *entry)
+{
+ return list_empty(&entry->se_hash);
+}
+
+/*
+ * The entry only can be released by the caller, it is necessary to hold lock.
+ */
+static inline int ll_sa_entry_stated(struct ll_sa_entry *entry)
+{
+ smp_rmb();
+ return (entry->se_stat != SA_ENTRY_INIT);
+}
+
+static inline int ll_sa_entry_hash(int val)
+{
+ return val & LL_SA_CACHE_MASK;
+}
+
+/*
+ * Insert entry to hash SA table.
+ */
+static inline void
+ll_sa_entry_enhash(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
+{
+ int i = ll_sa_entry_hash(entry->se_qstr.hash);
+
+ spin_lock(&sai->sai_cache_lock[i]);
+ list_add_tail(&entry->se_hash, &sai->sai_cache[i]);
+ spin_unlock(&sai->sai_cache_lock[i]);
+}
+
+/*
+ * Remove entry from SA table.
+ */
+static inline void
+ll_sa_entry_unhash(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
+{
+ int i = ll_sa_entry_hash(entry->se_qstr.hash);
+
+ spin_lock(&sai->sai_cache_lock[i]);
+ list_del_init(&entry->se_hash);
+ spin_unlock(&sai->sai_cache_lock[i]);
+}
+
+static inline int agl_should_run(struct ll_statahead_info *sai,
+ struct inode *inode)
+{
+ return (inode != NULL && S_ISREG(inode->i_mode) && sai->sai_agl_valid);
+}
+
+static inline struct ll_sa_entry *
+sa_first_received_entry(struct ll_statahead_info *sai)
+{
+ return list_entry(sai->sai_entries_received.next,
+ struct ll_sa_entry, se_list);
+}
+
+static inline struct ll_inode_info *
+agl_first_entry(struct ll_statahead_info *sai)
+{
+ return list_entry(sai->sai_entries_agl.next,
+ struct ll_inode_info, lli_agl_list);
+}
+
+static inline int sa_sent_full(struct ll_statahead_info *sai)
+{
+ return atomic_read(&sai->sai_cache_count) >= sai->sai_max;
+}
+
+static inline int sa_received_empty(struct ll_statahead_info *sai)
+{
+ return list_empty(&sai->sai_entries_received);
+}
+
+static inline int agl_list_empty(struct ll_statahead_info *sai)
+{
+ return list_empty(&sai->sai_entries_agl);
+}
+
+/**
+ * (1) hit ratio less than 80%
+ * or
+ * (2) consecutive miss more than 8
+ * then means low hit.
+ */
+static inline int sa_low_hit(struct ll_statahead_info *sai)
+{
+ return ((sai->sai_hit > 7 && sai->sai_hit < 4 * sai->sai_miss) ||
+ (sai->sai_consecutive_miss > 8));
+}
+
+/*
+ * If the given index is behind of statahead window more than
+ * SA_OMITTED_ENTRY_MAX, then it is old.
+ */
+static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index)
+{
+ return ((__u64)sai->sai_max + index + SA_OMITTED_ENTRY_MAX <
+ sai->sai_index);
+}
+
+/*
+ * Insert it into sai_entries tail when init.
+ */
+static struct ll_sa_entry *
+ll_sa_entry_alloc(struct ll_statahead_info *sai, __u64 index,
+ const char *name, int len)
+{
+ struct ll_inode_info *lli;
+ struct ll_sa_entry *entry;
+ int entry_size;
+ char *dname;
+ ENTRY;
+
+ entry_size = sizeof(struct ll_sa_entry) + (len & ~3) + 4;
+ OBD_ALLOC(entry, entry_size);
+ if (unlikely(entry == NULL))
+ RETURN(ERR_PTR(-ENOMEM));
+
+ CDEBUG(D_READA, "alloc sa entry %.*s(%p) index "LPU64"\n",
+ len, name, entry, index);
+
+ entry->se_index = index;
+
+ /*
+ * Statahead entry reference rules:
+ *
+ * 1) When statahead entry is initialized, its reference is set as 2.
+ * One reference is used by the directory scanner. When the scanner
+ * searches the statahead cache for the given name, it can perform
+ * lockless hash lookup (only the scanner can remove entry from hash
+ * list), and once found, it needn't to call "atomic_inc()" for the
+ * entry reference. So the performance is improved. After using the
+ * statahead entry, the scanner will call "atomic_dec()" to drop the
+ * reference held when initialization. If it is the last reference,
+ * the statahead entry will be freed.
+ *
+ * 2) All other threads, including statahead thread and ptlrpcd thread,
+ * when they process the statahead entry, the reference for target
+ * should be held to guarantee the entry will not be released by the
+ * directory scanner. After processing the entry, these threads will
+ * drop the entry reference. If it is the last reference, the entry
+ * will be freed.
+ *
+ * The second reference when initializes the statahead entry is used
+ * by the statahead thread, following the rule 2).
+ */
+ atomic_set(&entry->se_refcount, 2);
+ entry->se_stat = SA_ENTRY_INIT;
+ entry->se_size = entry_size;
+ dname = (char *)entry + sizeof(struct ll_sa_entry);
+ memcpy(dname, name, len);
+ dname[len] = 0;
+ entry->se_qstr.hash = full_name_hash(name, len);
+ entry->se_qstr.len = len;
+ entry->se_qstr.name = dname;
+
+ lli = ll_i2info(sai->sai_inode);
+ spin_lock(&lli->lli_sa_lock);
+ list_add_tail(&entry->se_link, &sai->sai_entries);
+ INIT_LIST_HEAD(&entry->se_list);
+ ll_sa_entry_enhash(sai, entry);
+ spin_unlock(&lli->lli_sa_lock);
+
+ atomic_inc(&sai->sai_cache_count);
+
+ RETURN(entry);
+}
+
+/*
+ * Used by the directory scanner to search entry with name.
+ *
+ * Only the caller can remove the entry from hash, so it is unnecessary to hold
+ * hash lock. It is caller's duty to release the init refcount on the entry, so
+ * it is also unnecessary to increase refcount on the entry.
+ */
+static struct ll_sa_entry *
+ll_sa_entry_get_byname(struct ll_statahead_info *sai, const struct qstr *qstr)
+{
+ struct ll_sa_entry *entry;
+ int i = ll_sa_entry_hash(qstr->hash);
+
+ list_for_each_entry(entry, &sai->sai_cache[i], se_hash) {
+ if (entry->se_qstr.hash == qstr->hash &&
+ entry->se_qstr.len == qstr->len &&
+ memcmp(entry->se_qstr.name, qstr->name, qstr->len) == 0)
+ return entry;
+ }
+ return NULL;
+}
+
+/*
+ * Used by the async getattr request callback to find entry with index.
+ *
+ * Inside lli_sa_lock to prevent others to change the list during the search.
+ * It needs to increase entry refcount before returning to guarantee that the
+ * entry cannot be freed by others.
+ */
+static struct ll_sa_entry *
+ll_sa_entry_get_byindex(struct ll_statahead_info *sai, __u64 index)
+{
+ struct ll_sa_entry *entry;
+
+ list_for_each_entry(entry, &sai->sai_entries, se_link) {
+ if (entry->se_index == index) {
+ LASSERT(atomic_read(&entry->se_refcount) > 0);
+ atomic_inc(&entry->se_refcount);
+ return entry;
+ }
+ if (entry->se_index > index)
+ break;
+ }
+ return NULL;
+}
+
+static void ll_sa_entry_cleanup(struct ll_statahead_info *sai,
+ struct ll_sa_entry *entry)
+{
+ struct md_enqueue_info *minfo = entry->se_minfo;
+ struct ptlrpc_request *req = entry->se_req;
+
+ if (minfo) {
+ entry->se_minfo = NULL;
+ ll_intent_release(&minfo->mi_it);
+ iput(minfo->mi_dir);
+ OBD_FREE_PTR(minfo);
+ }
+
+ if (req) {
+ entry->se_req = NULL;
+ ptlrpc_req_finished(req);
+ }
+}
+
+static void ll_sa_entry_put(struct ll_statahead_info *sai,
+ struct ll_sa_entry *entry)
+{
+ if (atomic_dec_and_test(&entry->se_refcount)) {
+ CDEBUG(D_READA, "free sa entry %.*s(%p) index "LPU64"\n",
+ entry->se_qstr.len, entry->se_qstr.name, entry,
+ entry->se_index);
+
+ LASSERT(list_empty(&entry->se_link));
+ LASSERT(list_empty(&entry->se_list));
+ LASSERT(ll_sa_entry_unhashed(entry));
+
+ ll_sa_entry_cleanup(sai, entry);
+ if (entry->se_inode)
+ iput(entry->se_inode);
+
+ OBD_FREE(entry, entry->se_size);
+ atomic_dec(&sai->sai_cache_count);
+ }
+}
+
+static inline void
+do_sa_entry_fini(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
+{
+ struct ll_inode_info *lli = ll_i2info(sai->sai_inode);
+
+ LASSERT(!ll_sa_entry_unhashed(entry));
+ LASSERT(!list_empty(&entry->se_link));
+
+ ll_sa_entry_unhash(sai, entry);
+
+ spin_lock(&lli->lli_sa_lock);
+ entry->se_stat = SA_ENTRY_DEST;
+ list_del_init(&entry->se_link);
+ if (likely(!list_empty(&entry->se_list)))
+ list_del_init(&entry->se_list);
+ spin_unlock(&lli->lli_sa_lock);
+
+ ll_sa_entry_put(sai, entry);
+}
+
+/*
+ * Delete it from sai_entries_stated list when fini.
+ */
+static void
+ll_sa_entry_fini(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
+{
+ struct ll_sa_entry *pos, *next;
+
+ if (entry)
+ do_sa_entry_fini(sai, entry);
+
+ /* drop old entry, only 'scanner' process does this, no need to lock */
+ list_for_each_entry_safe(pos, next, &sai->sai_entries, se_link) {
+ if (!is_omitted_entry(sai, pos->se_index))
+ break;
+ do_sa_entry_fini(sai, pos);
+ }
+}
+
+/*
+ * Inside lli_sa_lock.
+ */
+static void
+do_sa_entry_to_stated(struct ll_statahead_info *sai,
+ struct ll_sa_entry *entry, se_stat_t stat)
+{
+ struct ll_sa_entry *se;
+ struct list_head *pos = &sai->sai_entries_stated;
+
+ if (!list_empty(&entry->se_list))
+ list_del_init(&entry->se_list);
+
+ list_for_each_entry_reverse(se, &sai->sai_entries_stated, se_list) {
+ if (se->se_index < entry->se_index) {
+ pos = &se->se_list;
+ break;
+ }
+ }
+
+ list_add(&entry->se_list, pos);
+ entry->se_stat = stat;
+}
+
+/*
+ * Move entry to sai_entries_stated and sort with the index.
+ * \retval 1 -- entry to be destroyed.
+ * \retval 0 -- entry is inserted into stated list.
+ */
+static int
+ll_sa_entry_to_stated(struct ll_statahead_info *sai,
+ struct ll_sa_entry *entry, se_stat_t stat)
+{
+ struct ll_inode_info *lli = ll_i2info(sai->sai_inode);
+ int ret = 1;
+
+ ll_sa_entry_cleanup(sai, entry);
+
+ spin_lock(&lli->lli_sa_lock);
+ if (likely(entry->se_stat != SA_ENTRY_DEST)) {
+ do_sa_entry_to_stated(sai, entry, stat);
+ ret = 0;
+ }
+ spin_unlock(&lli->lli_sa_lock);
+
+ return ret;
+}
+
+/*
+ * Insert inode into the list of sai_entries_agl.
+ */
+static void ll_agl_add(struct ll_statahead_info *sai,
+ struct inode *inode, int index)
+{
+ struct ll_inode_info *child = ll_i2info(inode);
+ struct ll_inode_info *parent = ll_i2info(sai->sai_inode);
+ int added = 0;
+
+ spin_lock(&child->lli_agl_lock);
+ if (child->lli_agl_index == 0) {
+ child->lli_agl_index = index;
+ spin_unlock(&child->lli_agl_lock);
+
+ LASSERT(list_empty(&child->lli_agl_list));
+
+ igrab(inode);
+ spin_lock(&parent->lli_agl_lock);
+ if (agl_list_empty(sai))
+ added = 1;
+ list_add_tail(&child->lli_agl_list, &sai->sai_entries_agl);
+ spin_unlock(&parent->lli_agl_lock);
+ } else {
+ spin_unlock(&child->lli_agl_lock);
+ }
+
+ if (added > 0)
+ wake_up(&sai->sai_agl_thread.t_ctl_waitq);
+}
+
+static struct ll_statahead_info *ll_sai_alloc(void)
+{
+ struct ll_statahead_info *sai;
+ int i;
+ ENTRY;
+
+ OBD_ALLOC_PTR(sai);
+ if (!sai)
+ RETURN(NULL);
+
+ atomic_set(&sai->sai_refcount, 1);
+
+ spin_lock(&sai_generation_lock);
+ sai->sai_generation = ++sai_generation;
+ if (unlikely(sai_generation == 0))
+ sai->sai_generation = ++sai_generation;
+ spin_unlock(&sai_generation_lock);
+
+ sai->sai_max = LL_SA_RPC_MIN;
+ sai->sai_index = 1;
+ init_waitqueue_head(&sai->sai_waitq);
+ init_waitqueue_head(&sai->sai_thread.t_ctl_waitq);
+ init_waitqueue_head(&sai->sai_agl_thread.t_ctl_waitq);
+
+ INIT_LIST_HEAD(&sai->sai_entries);
+ INIT_LIST_HEAD(&sai->sai_entries_received);
+ INIT_LIST_HEAD(&sai->sai_entries_stated);
+ INIT_LIST_HEAD(&sai->sai_entries_agl);
+
+ for (i = 0; i < LL_SA_CACHE_SIZE; i++) {
+ INIT_LIST_HEAD(&sai->sai_cache[i]);
+ spin_lock_init(&sai->sai_cache_lock[i]);
+ }
+ atomic_set(&sai->sai_cache_count, 0);
+
+ RETURN(sai);
+}
+
+static inline struct ll_statahead_info *
+ll_sai_get(struct ll_statahead_info *sai)
+{
+ atomic_inc(&sai->sai_refcount);
+ return sai;
+}
+
+static void ll_sai_put(struct ll_statahead_info *sai)
+{
+ struct inode *inode = sai->sai_inode;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ ENTRY;
+
+ if (atomic_dec_and_lock(&sai->sai_refcount, &lli->lli_sa_lock)) {
+ struct ll_sa_entry *entry, *next;
+
+ if (unlikely(atomic_read(&sai->sai_refcount) > 0)) {
+ /* It is race case, the interpret callback just hold
+ * a reference count */
+ spin_unlock(&lli->lli_sa_lock);
+ RETURN_EXIT;
+ }
+
+ LASSERT(lli->lli_opendir_key == NULL);
+ LASSERT(thread_is_stopped(&sai->sai_thread));
+ LASSERT(thread_is_stopped(&sai->sai_agl_thread));
+
+ lli->lli_sai = NULL;
+ lli->lli_opendir_pid = 0;
+ spin_unlock(&lli->lli_sa_lock);
+
+ if (sai->sai_sent > sai->sai_replied)
+ CDEBUG(D_READA,"statahead for dir "DFID" does not "
+ "finish: [sent:"LPU64"] [replied:"LPU64"]\n",
+ PFID(&lli->lli_fid),
+ sai->sai_sent, sai->sai_replied);
+
+ list_for_each_entry_safe(entry, next,
+ &sai->sai_entries, se_link)
+ do_sa_entry_fini(sai, entry);
+
+ LASSERT(list_empty(&sai->sai_entries));
+ LASSERT(sa_received_empty(sai));
+ LASSERT(list_empty(&sai->sai_entries_stated));
+
+ LASSERT(atomic_read(&sai->sai_cache_count) == 0);
+ LASSERT(agl_list_empty(sai));
+
+ iput(inode);
+ OBD_FREE_PTR(sai);
+ }
+
+ EXIT;
+}
+
+/* Do NOT forget to drop inode refcount when into sai_entries_agl. */
+static void ll_agl_trigger(struct inode *inode, struct ll_statahead_info *sai)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ __u64 index = lli->lli_agl_index;
+ int rc;
+ ENTRY;
+
+ LASSERT(list_empty(&lli->lli_agl_list));
+
+ /* AGL maybe fall behind statahead with one entry */
+ if (is_omitted_entry(sai, index + 1)) {
+ lli->lli_agl_index = 0;
+ iput(inode);
+ RETURN_EXIT;
+ }
+
+ /* Someone is in glimpse (sync or async), do nothing. */
+ rc = down_write_trylock(&lli->lli_glimpse_sem);
+ if (rc == 0) {
+ lli->lli_agl_index = 0;
+ iput(inode);
+ RETURN_EXIT;
+ }
+
+ /*
+ * Someone triggered glimpse within 1 sec before.
+ * 1) The former glimpse succeeded with glimpse lock granted by OST, and
+ * if the lock is still cached on client, AGL needs to do nothing. If
+ * it is cancelled by other client, AGL maybe cannot obtaion new lock
+ * for no glimpse callback triggered by AGL.
+ * 2) The former glimpse succeeded, but OST did not grant glimpse lock.
+ * Under such case, it is quite possible that the OST will not grant
+ * glimpse lock for AGL also.
+ * 3) The former glimpse failed, compared with other two cases, it is
+ * relative rare. AGL can ignore such case, and it will not muchly
+ * affect the performance.
+ */
+ if (lli->lli_glimpse_time != 0 &&
+ cfs_time_before(cfs_time_shift(-1), lli->lli_glimpse_time)) {
+ up_write(&lli->lli_glimpse_sem);
+ lli->lli_agl_index = 0;
+ iput(inode);
+ RETURN_EXIT;
+ }
+
+ CDEBUG(D_READA, "Handling (init) async glimpse: inode = "
+ DFID", idx = "LPU64"\n", PFID(&lli->lli_fid), index);
+
+ cl_agl(inode);
+ lli->lli_agl_index = 0;
+ lli->lli_glimpse_time = cfs_time_current();
+ up_write(&lli->lli_glimpse_sem);
+
+ CDEBUG(D_READA, "Handled (init) async glimpse: inode= "
+ DFID", idx = "LPU64", rc = %d\n",
+ PFID(&lli->lli_fid), index, rc);
+
+ iput(inode);
+
+ EXIT;
+}
+
+static void ll_post_statahead(struct ll_statahead_info *sai)
+{
+ struct inode *dir = sai->sai_inode;
+ struct inode *child;
+ struct ll_inode_info *lli = ll_i2info(dir);
+ struct ll_sa_entry *entry;
+ struct md_enqueue_info *minfo;
+ struct lookup_intent *it;
+ struct ptlrpc_request *req;
+ struct mdt_body *body;
+ int rc = 0;
+ ENTRY;
+
+ spin_lock(&lli->lli_sa_lock);
+ if (unlikely(sa_received_empty(sai))) {
+ spin_unlock(&lli->lli_sa_lock);
+ RETURN_EXIT;
+ }
+ entry = sa_first_received_entry(sai);
+ atomic_inc(&entry->se_refcount);
+ list_del_init(&entry->se_list);
+ spin_unlock(&lli->lli_sa_lock);
+
+ LASSERT(entry->se_handle != 0);
+
+ minfo = entry->se_minfo;
+ it = &minfo->mi_it;
+ req = entry->se_req;
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ if (body == NULL)
+ GOTO(out, rc = -EFAULT);
+
+ child = entry->se_inode;
+ if (child == NULL) {
+ /*
+ * lookup.
+ */
+ LASSERT(fid_is_zero(&minfo->mi_data.op_fid2));
+
+ /* XXX: No fid in reply, this is probaly cross-ref case.
+ * SA can't handle it yet. */
+ if (body->valid & OBD_MD_MDS)
+ GOTO(out, rc = -EAGAIN);
+ } else {
+ /*
+ * revalidate.
+ */
+ /* unlinked and re-created with the same name */
+ if (unlikely(!lu_fid_eq(&minfo->mi_data.op_fid2, &body->fid1))){
+ entry->se_inode = NULL;
+ iput(child);
+ child = NULL;
+ }
+ }
+
+ it->d.lustre.it_lock_handle = entry->se_handle;
+ rc = md_revalidate_lock(ll_i2mdexp(dir), it, ll_inode2fid(dir), NULL);
+ if (rc != 1)
+ GOTO(out, rc = -EAGAIN);
+
+ rc = ll_prep_inode(&child, req, dir->i_sb, it);
+ if (rc)
+ GOTO(out, rc);
+
+ CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+ child, child->i_ino, child->i_generation);
+ ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL);
+
+ entry->se_inode = child;
+
+ if (agl_should_run(sai, child))
+ ll_agl_add(sai, child, entry->se_index);
+
+ EXIT;
+
+out:
+ /* The "ll_sa_entry_to_stated()" will drop related ldlm ibits lock
+ * reference count by calling "ll_intent_drop_lock()" in spite of the
+ * above operations failed or not. Do not worry about calling
+ * "ll_intent_drop_lock()" more than once. */
+ rc = ll_sa_entry_to_stated(sai, entry,
+ rc < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC);
+ if (rc == 0 && entry->se_index == sai->sai_index_wait)
+ wake_up(&sai->sai_waitq);
+ ll_sa_entry_put(sai, entry);
+}
+
+static int ll_statahead_interpret(struct ptlrpc_request *req,
+ struct md_enqueue_info *minfo, int rc)
+{
+ struct lookup_intent *it = &minfo->mi_it;
+ struct inode *dir = minfo->mi_dir;
+ struct ll_inode_info *lli = ll_i2info(dir);
+ struct ll_statahead_info *sai = NULL;
+ struct ll_sa_entry *entry;
+ int wakeup;
+ ENTRY;
+
+ if (it_disposition(it, DISP_LOOKUP_NEG))
+ rc = -ENOENT;
+
+ spin_lock(&lli->lli_sa_lock);
+ /* stale entry */
+ if (unlikely(lli->lli_sai == NULL ||
+ lli->lli_sai->sai_generation != minfo->mi_generation)) {
+ spin_unlock(&lli->lli_sa_lock);
+ GOTO(out, rc = -ESTALE);
+ } else {
+ sai = ll_sai_get(lli->lli_sai);
+ if (unlikely(!thread_is_running(&sai->sai_thread))) {
+ sai->sai_replied++;
+ spin_unlock(&lli->lli_sa_lock);
+ GOTO(out, rc = -EBADFD);
+ }
+
+ entry = ll_sa_entry_get_byindex(sai, minfo->mi_cbdata);
+ if (entry == NULL) {
+ sai->sai_replied++;
+ spin_unlock(&lli->lli_sa_lock);
+ GOTO(out, rc = -EIDRM);
+ }
+
+ if (rc != 0) {
+ do_sa_entry_to_stated(sai, entry, SA_ENTRY_INVA);
+ wakeup = (entry->se_index == sai->sai_index_wait);
+ } else {
+ entry->se_minfo = minfo;
+ entry->se_req = ptlrpc_request_addref(req);
+ /* Release the async ibits lock ASAP to avoid deadlock
+ * when statahead thread tries to enqueue lock on parent
+ * for readpage and other tries to enqueue lock on child
+ * with parent's lock held, for example: unlink. */
+ entry->se_handle = it->d.lustre.it_lock_handle;
+ ll_intent_drop_lock(it);
+ wakeup = sa_received_empty(sai);
+ list_add_tail(&entry->se_list,
+ &sai->sai_entries_received);
+ }
+ sai->sai_replied++;
+ spin_unlock(&lli->lli_sa_lock);
+
+ ll_sa_entry_put(sai, entry);
+ if (wakeup)
+ wake_up(&sai->sai_thread.t_ctl_waitq);
+ }
+
+ EXIT;
+
+out:
+ if (rc != 0) {
+ ll_intent_release(it);
+ iput(dir);
+ OBD_FREE_PTR(minfo);
+ }
+ if (sai != NULL)
+ ll_sai_put(sai);
+ return rc;
+}
+
+static void sa_args_fini(struct md_enqueue_info *minfo,
+ struct ldlm_enqueue_info *einfo)
+{
+ LASSERT(minfo && einfo);
+ iput(minfo->mi_dir);
+ capa_put(minfo->mi_data.op_capa1);
+ capa_put(minfo->mi_data.op_capa2);
+ OBD_FREE_PTR(minfo);
+ OBD_FREE_PTR(einfo);
+}
+
+/**
+ * There is race condition between "capa_put" and "ll_statahead_interpret" for
+ * accessing "op_data.op_capa[1,2]" as following:
+ * "capa_put" releases "op_data.op_capa[1,2]"'s reference count after calling
+ * "md_intent_getattr_async". But "ll_statahead_interpret" maybe run first, and
+ * fill "op_data.op_capa[1,2]" as POISON, then cause "capa_put" access invalid
+ * "ocapa". So here reserve "op_data.op_capa[1,2]" in "pcapa" before calling
+ * "md_intent_getattr_async".
+ */
+static int sa_args_init(struct inode *dir, struct inode *child,
+ struct ll_sa_entry *entry, struct md_enqueue_info **pmi,
+ struct ldlm_enqueue_info **pei,
+ struct obd_capa **pcapa)
+{
+ struct qstr *qstr = &entry->se_qstr;
+ struct ll_inode_info *lli = ll_i2info(dir);
+ struct md_enqueue_info *minfo;
+ struct ldlm_enqueue_info *einfo;
+ struct md_op_data *op_data;
+
+ OBD_ALLOC_PTR(einfo);
+ if (einfo == NULL)
+ return -ENOMEM;
+
+ OBD_ALLOC_PTR(minfo);
+ if (minfo == NULL) {
+ OBD_FREE_PTR(einfo);
+ return -ENOMEM;
+ }
+
+ op_data = ll_prep_md_op_data(&minfo->mi_data, dir, child, qstr->name,
+ qstr->len, 0, LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data)) {
+ OBD_FREE_PTR(einfo);
+ OBD_FREE_PTR(minfo);
+ return PTR_ERR(op_data);
+ }
+
+ minfo->mi_it.it_op = IT_GETATTR;
+ minfo->mi_dir = igrab(dir);
+ minfo->mi_cb = ll_statahead_interpret;
+ minfo->mi_generation = lli->lli_sai->sai_generation;
+ minfo->mi_cbdata = entry->se_index;
+
+ einfo->ei_type = LDLM_IBITS;
+ einfo->ei_mode = it_to_lock_mode(&minfo->mi_it);
+ einfo->ei_cb_bl = ll_md_blocking_ast;
+ einfo->ei_cb_cp = ldlm_completion_ast;
+ einfo->ei_cb_gl = NULL;
+ einfo->ei_cbdata = NULL;
+
+ *pmi = minfo;
+ *pei = einfo;
+ pcapa[0] = op_data->op_capa1;
+ pcapa[1] = op_data->op_capa2;
+
+ return 0;
+}
+
+static int do_sa_lookup(struct inode *dir, struct ll_sa_entry *entry)
+{
+ struct md_enqueue_info *minfo;
+ struct ldlm_enqueue_info *einfo;
+ struct obd_capa *capas[2];
+ int rc;
+ ENTRY;
+
+ rc = sa_args_init(dir, NULL, entry, &minfo, &einfo, capas);
+ if (rc)
+ RETURN(rc);
+
+ rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo, einfo);
+ if (!rc) {
+ capa_put(capas[0]);
+ capa_put(capas[1]);
+ } else {
+ sa_args_fini(minfo, einfo);
+ }
+
+ RETURN(rc);
+}
+
+/**
+ * similar to ll_revalidate_it().
+ * \retval 1 -- dentry valid
+ * \retval 0 -- will send stat-ahead request
+ * \retval others -- prepare stat-ahead request failed
+ */
+static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
+ struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ struct lookup_intent it = { .it_op = IT_GETATTR,
+ .d.lustre.it_lock_handle = 0 };
+ struct md_enqueue_info *minfo;
+ struct ldlm_enqueue_info *einfo;
+ struct obd_capa *capas[2];
+ int rc;
+ ENTRY;
+
+ if (unlikely(inode == NULL))
+ RETURN(1);
+
+ if (d_mountpoint(dentry))
+ RETURN(1);
+
+ if (unlikely(dentry == dentry->d_sb->s_root))
+ RETURN(1);
+
+ entry->se_inode = igrab(inode);
+ rc = md_revalidate_lock(ll_i2mdexp(dir), &it, ll_inode2fid(inode),NULL);
+ if (rc == 1) {
+ entry->se_handle = it.d.lustre.it_lock_handle;
+ ll_intent_release(&it);
+ RETURN(1);
+ }
+
+ rc = sa_args_init(dir, inode, entry, &minfo, &einfo, capas);
+ if (rc) {
+ entry->se_inode = NULL;
+ iput(inode);
+ RETURN(rc);
+ }
+
+ rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo, einfo);
+ if (!rc) {
+ capa_put(capas[0]);
+ capa_put(capas[1]);
+ } else {
+ entry->se_inode = NULL;
+ iput(inode);
+ sa_args_fini(minfo, einfo);
+ }
+
+ RETURN(rc);
+}
+
+static void ll_statahead_one(struct dentry *parent, const char* entry_name,
+ int entry_name_len)
+{
+ struct inode *dir = parent->d_inode;
+ struct ll_inode_info *lli = ll_i2info(dir);
+ struct ll_statahead_info *sai = lli->lli_sai;
+ struct dentry *dentry = NULL;
+ struct ll_sa_entry *entry;
+ int rc;
+ int rc1;
+ ENTRY;
+
+ entry = ll_sa_entry_alloc(sai, sai->sai_index, entry_name,
+ entry_name_len);
+ if (IS_ERR(entry))
+ RETURN_EXIT;
+
+ dentry = d_lookup(parent, &entry->se_qstr);
+ if (!dentry) {
+ rc = do_sa_lookup(dir, entry);
+ } else {
+ rc = do_sa_revalidate(dir, entry, dentry);
+ if (rc == 1 && agl_should_run(sai, dentry->d_inode))
+ ll_agl_add(sai, dentry->d_inode, entry->se_index);
+ }
+
+ if (dentry != NULL)
+ dput(dentry);
+
+ if (rc) {
+ rc1 = ll_sa_entry_to_stated(sai, entry,
+ rc < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC);
+ if (rc1 == 0 && entry->se_index == sai->sai_index_wait)
+ wake_up(&sai->sai_waitq);
+ } else {
+ sai->sai_sent++;
+ }
+
+ sai->sai_index++;
+ /* drop one refcount on entry by ll_sa_entry_alloc */
+ ll_sa_entry_put(sai, entry);
+
+ EXIT;
+}
+
+static int ll_agl_thread(void *arg)
+{
+ struct dentry *parent = (struct dentry *)arg;
+ struct inode *dir = parent->d_inode;
+ struct ll_inode_info *plli = ll_i2info(dir);
+ struct ll_inode_info *clli;
+ struct ll_sb_info *sbi = ll_i2sbi(dir);
+ struct ll_statahead_info *sai = ll_sai_get(plli->lli_sai);
+ struct ptlrpc_thread *thread = &sai->sai_agl_thread;
+ struct l_wait_info lwi = { 0 };
+ ENTRY;
+
+ CDEBUG(D_READA, "agl thread started: [pid %d] [parent %.*s]\n",
+ current_pid(), parent->d_name.len, parent->d_name.name);
+
+ atomic_inc(&sbi->ll_agl_total);
+ spin_lock(&plli->lli_agl_lock);
+ sai->sai_agl_valid = 1;
+ thread_set_flags(thread, SVC_RUNNING);
+ spin_unlock(&plli->lli_agl_lock);
+ wake_up(&thread->t_ctl_waitq);
+
+ while (1) {
+ l_wait_event(thread->t_ctl_waitq,
+ !agl_list_empty(sai) ||
+ !thread_is_running(thread),
+ &lwi);
+
+ if (!thread_is_running(thread))
+ break;
+
+ spin_lock(&plli->lli_agl_lock);
+ /* The statahead thread maybe help to process AGL entries,
+ * so check whether list empty again. */
+ if (!agl_list_empty(sai)) {
+ clli = agl_first_entry(sai);
+ list_del_init(&clli->lli_agl_list);
+ spin_unlock(&plli->lli_agl_lock);
+ ll_agl_trigger(&clli->lli_vfs_inode, sai);
+ } else {
+ spin_unlock(&plli->lli_agl_lock);
+ }
+ }
+
+ spin_lock(&plli->lli_agl_lock);
+ sai->sai_agl_valid = 0;
+ while (!agl_list_empty(sai)) {
+ clli = agl_first_entry(sai);
+ list_del_init(&clli->lli_agl_list);
+ spin_unlock(&plli->lli_agl_lock);
+ clli->lli_agl_index = 0;
+ iput(&clli->lli_vfs_inode);
+ spin_lock(&plli->lli_agl_lock);
+ }
+ thread_set_flags(thread, SVC_STOPPED);
+ spin_unlock(&plli->lli_agl_lock);
+ wake_up(&thread->t_ctl_waitq);
+ ll_sai_put(sai);
+ CDEBUG(D_READA, "agl thread stopped: [pid %d] [parent %.*s]\n",
+ current_pid(), parent->d_name.len, parent->d_name.name);
+ RETURN(0);
+}
+
+static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai)
+{
+ struct ptlrpc_thread *thread = &sai->sai_agl_thread;
+ struct l_wait_info lwi = { 0 };
+ struct ll_inode_info *plli;
+ task_t *task;
+ ENTRY;
+
+ CDEBUG(D_READA, "start agl thread: [pid %d] [parent %.*s]\n",
+ current_pid(), parent->d_name.len, parent->d_name.name);
+
+ plli = ll_i2info(parent->d_inode);
+ task = kthread_run(ll_agl_thread, parent,
+ "ll_agl_%u", plli->lli_opendir_pid);
+ if (IS_ERR(task)) {
+ CERROR("can't start ll_agl thread, rc: %ld\n", PTR_ERR(task));
+ thread_set_flags(thread, SVC_STOPPED);
+ RETURN_EXIT;
+ }
+
+ l_wait_event(thread->t_ctl_waitq,
+ thread_is_running(thread) || thread_is_stopped(thread),
+ &lwi);
+ EXIT;
+}
+
+static int ll_statahead_thread(void *arg)
+{
+ struct dentry *parent = (struct dentry *)arg;
+ struct inode *dir = parent->d_inode;
+ struct ll_inode_info *plli = ll_i2info(dir);
+ struct ll_inode_info *clli;
+ struct ll_sb_info *sbi = ll_i2sbi(dir);
+ struct ll_statahead_info *sai = ll_sai_get(plli->lli_sai);
+ struct ptlrpc_thread *thread = &sai->sai_thread;
+ struct ptlrpc_thread *agl_thread = &sai->sai_agl_thread;
+ struct page *page;
+ __u64 pos = 0;
+ int first = 0;
+ int rc = 0;
+ struct ll_dir_chain chain;
+ struct l_wait_info lwi = { 0 };
+ ENTRY;
+
+ CDEBUG(D_READA, "statahead thread started: [pid %d] [parent %.*s]\n",
+ current_pid(), parent->d_name.len, parent->d_name.name);
+
+ if (sbi->ll_flags & LL_SBI_AGL_ENABLED)
+ ll_start_agl(parent, sai);
+
+ atomic_inc(&sbi->ll_sa_total);
+ spin_lock(&plli->lli_sa_lock);
+ thread_set_flags(thread, SVC_RUNNING);
+ spin_unlock(&plli->lli_sa_lock);
+ wake_up(&thread->t_ctl_waitq);
+
+ ll_dir_chain_init(&chain);
+ page = ll_get_dir_page(dir, pos, &chain);
+
+ while (1) {
+ struct lu_dirpage *dp;
+ struct lu_dirent *ent;
+
+ if (IS_ERR(page)) {
+ rc = PTR_ERR(page);
+ CDEBUG(D_READA, "error reading dir "DFID" at "LPU64
+ "/"LPU64": [rc %d] [parent %u]\n",
+ PFID(ll_inode2fid(dir)), pos, sai->sai_index,
+ rc, plli->lli_opendir_pid);
+ GOTO(out, rc);
+ }
+
+ dp = page_address(page);
+ for (ent = lu_dirent_start(dp); ent != NULL;
+ ent = lu_dirent_next(ent)) {
+ __u64 hash;
+ int namelen;
+ char *name;
+
+ hash = le64_to_cpu(ent->lde_hash);
+ if (unlikely(hash < pos))
+ /*
+ * Skip until we find target hash value.
+ */
+ continue;
+
+ namelen = le16_to_cpu(ent->lde_namelen);
+ if (unlikely(namelen == 0))
+ /*
+ * Skip dummy record.
+ */
+ continue;
+
+ name = ent->lde_name;
+ if (name[0] == '.') {
+ if (namelen == 1) {
+ /*
+ * skip "."
+ */
+ continue;
+ } else if (name[1] == '.' && namelen == 2) {
+ /*
+ * skip ".."
+ */
+ continue;
+ } else if (!sai->sai_ls_all) {
+ /*
+ * skip hidden files.
+ */
+ sai->sai_skip_hidden++;
+ continue;
+ }
+ }
+
+ /*
+ * don't stat-ahead first entry.
+ */
+ if (unlikely(++first == 1))
+ continue;
+
+keep_it:
+ l_wait_event(thread->t_ctl_waitq,
+ !sa_sent_full(sai) ||
+ !sa_received_empty(sai) ||
+ !agl_list_empty(sai) ||
+ !thread_is_running(thread),
+ &lwi);
+
+interpret_it:
+ while (!sa_received_empty(sai))
+ ll_post_statahead(sai);
+
+ if (unlikely(!thread_is_running(thread))) {
+ ll_release_page(page, 0);
+ GOTO(out, rc = 0);
+ }
+
+ /* If no window for metadata statahead, but there are
+ * some AGL entries to be triggered, then try to help
+ * to process the AGL entries. */
+ if (sa_sent_full(sai)) {
+ spin_lock(&plli->lli_agl_lock);
+ while (!agl_list_empty(sai)) {
+ clli = agl_first_entry(sai);
+ list_del_init(&clli->lli_agl_list);
+ spin_unlock(&plli->lli_agl_lock);
+ ll_agl_trigger(&clli->lli_vfs_inode,
+ sai);
+
+ if (!sa_received_empty(sai))
+ goto interpret_it;
+
+ if (unlikely(
+ !thread_is_running(thread))) {
+ ll_release_page(page, 0);
+ GOTO(out, rc = 0);
+ }
+
+ if (!sa_sent_full(sai))
+ goto do_it;
+
+ spin_lock(&plli->lli_agl_lock);
+ }
+ spin_unlock(&plli->lli_agl_lock);
+
+ goto keep_it;
+ }
+
+do_it:
+ ll_statahead_one(parent, name, namelen);
+ }
+ pos = le64_to_cpu(dp->ldp_hash_end);
+ if (pos == MDS_DIR_END_OFF) {
+ /*
+ * End of directory reached.
+ */
+ ll_release_page(page, 0);
+ while (1) {
+ l_wait_event(thread->t_ctl_waitq,
+ !sa_received_empty(sai) ||
+ sai->sai_sent == sai->sai_replied||
+ !thread_is_running(thread),
+ &lwi);
+
+ while (!sa_received_empty(sai))
+ ll_post_statahead(sai);
+
+ if (unlikely(!thread_is_running(thread)))
+ GOTO(out, rc = 0);
+
+ if (sai->sai_sent == sai->sai_replied &&
+ sa_received_empty(sai))
+ break;
+ }
+
+ spin_lock(&plli->lli_agl_lock);
+ while (!agl_list_empty(sai) &&
+ thread_is_running(thread)) {
+ clli = agl_first_entry(sai);
+ list_del_init(&clli->lli_agl_list);
+ spin_unlock(&plli->lli_agl_lock);
+ ll_agl_trigger(&clli->lli_vfs_inode, sai);
+ spin_lock(&plli->lli_agl_lock);
+ }
+ spin_unlock(&plli->lli_agl_lock);
+
+ GOTO(out, rc = 0);
+ } else if (1) {
+ /*
+ * chain is exhausted.
+ * Normal case: continue to the next page.
+ */
+ ll_release_page(page, le32_to_cpu(dp->ldp_flags) &
+ LDF_COLLIDE);
+ sai->sai_in_readpage = 1;
+ page = ll_get_dir_page(dir, pos, &chain);
+ sai->sai_in_readpage = 0;
+ } else {
+ LASSERT(le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
+ ll_release_page(page, 1);
+ /*
+ * go into overflow page.
+ */
+ }
+ }
+ EXIT;
+
+out:
+ if (sai->sai_agl_valid) {
+ spin_lock(&plli->lli_agl_lock);
+ thread_set_flags(agl_thread, SVC_STOPPING);
+ spin_unlock(&plli->lli_agl_lock);
+ wake_up(&agl_thread->t_ctl_waitq);
+
+ CDEBUG(D_READA, "stop agl thread: [pid %d]\n",
+ current_pid());
+ l_wait_event(agl_thread->t_ctl_waitq,
+ thread_is_stopped(agl_thread),
+ &lwi);
+ } else {
+ /* Set agl_thread flags anyway. */
+ thread_set_flags(&sai->sai_agl_thread, SVC_STOPPED);
+ }
+ ll_dir_chain_fini(&chain);
+ spin_lock(&plli->lli_sa_lock);
+ if (!sa_received_empty(sai)) {
+ thread_set_flags(thread, SVC_STOPPING);
+ spin_unlock(&plli->lli_sa_lock);
+
+ /* To release the resources held by received entries. */
+ while (!sa_received_empty(sai))
+ ll_post_statahead(sai);
+
+ spin_lock(&plli->lli_sa_lock);
+ }
+ thread_set_flags(thread, SVC_STOPPED);
+ spin_unlock(&plli->lli_sa_lock);
+ wake_up(&sai->sai_waitq);
+ wake_up(&thread->t_ctl_waitq);
+ ll_sai_put(sai);
+ dput(parent);
+ CDEBUG(D_READA, "statahead thread stopped: [pid %d] [parent %.*s]\n",
+ current_pid(), parent->d_name.len, parent->d_name.name);
+ return rc;
+}
+
+/**
+ * called in ll_file_release().
+ */
+void ll_stop_statahead(struct inode *dir, void *key)
+{
+ struct ll_inode_info *lli = ll_i2info(dir);
+
+ if (unlikely(key == NULL))
+ return;
+
+ spin_lock(&lli->lli_sa_lock);
+ if (lli->lli_opendir_key != key || lli->lli_opendir_pid == 0) {
+ spin_unlock(&lli->lli_sa_lock);
+ return;
+ }
+
+ lli->lli_opendir_key = NULL;
+
+ if (lli->lli_sai) {
+ struct l_wait_info lwi = { 0 };
+ struct ptlrpc_thread *thread = &lli->lli_sai->sai_thread;
+
+ if (!thread_is_stopped(thread)) {
+ thread_set_flags(thread, SVC_STOPPING);
+ spin_unlock(&lli->lli_sa_lock);
+ wake_up(&thread->t_ctl_waitq);
+
+ CDEBUG(D_READA, "stop statahead thread: [pid %d]\n",
+ current_pid());
+ l_wait_event(thread->t_ctl_waitq,
+ thread_is_stopped(thread),
+ &lwi);
+ } else {
+ spin_unlock(&lli->lli_sa_lock);
+ }
+
+ /*
+ * Put the ref which was held when first statahead_enter.
+ * It maybe not the last ref for some statahead requests
+ * maybe inflight.
+ */
+ ll_sai_put(lli->lli_sai);
+ } else {
+ lli->lli_opendir_pid = 0;
+ spin_unlock(&lli->lli_sa_lock);
+ }
+}
+
+enum {
+ /**
+ * not first dirent, or is "."
+ */
+ LS_NONE_FIRST_DE = 0,
+ /**
+ * the first non-hidden dirent
+ */
+ LS_FIRST_DE,
+ /**
+ * the first hidden dirent, that is "."
+ */
+ LS_FIRST_DOT_DE
+};
+
+static int is_first_dirent(struct inode *dir, struct dentry *dentry)
+{
+ struct ll_dir_chain chain;
+ struct qstr *target = &dentry->d_name;
+ struct page *page;
+ __u64 pos = 0;
+ int dot_de;
+ int rc = LS_NONE_FIRST_DE;
+ ENTRY;
+
+ ll_dir_chain_init(&chain);
+ page = ll_get_dir_page(dir, pos, &chain);
+
+ while (1) {
+ struct lu_dirpage *dp;
+ struct lu_dirent *ent;
+
+ if (IS_ERR(page)) {
+ struct ll_inode_info *lli = ll_i2info(dir);
+
+ rc = PTR_ERR(page);
+ CERROR("error reading dir "DFID" at "LPU64": "
+ "[rc %d] [parent %u]\n",
+ PFID(ll_inode2fid(dir)), pos,
+ rc, lli->lli_opendir_pid);
+ break;
+ }
+
+ dp = page_address(page);
+ for (ent = lu_dirent_start(dp); ent != NULL;
+ ent = lu_dirent_next(ent)) {
+ __u64 hash;
+ int namelen;
+ char *name;
+
+ hash = le64_to_cpu(ent->lde_hash);
+ /* The ll_get_dir_page() can return any page containing
+ * the given hash which may be not the start hash. */
+ if (unlikely(hash < pos))
+ continue;
+
+ namelen = le16_to_cpu(ent->lde_namelen);
+ if (unlikely(namelen == 0))
+ /*
+ * skip dummy record.
+ */
+ continue;
+
+ name = ent->lde_name;
+ if (name[0] == '.') {
+ if (namelen == 1)
+ /*
+ * skip "."
+ */
+ continue;
+ else if (name[1] == '.' && namelen == 2)
+ /*
+ * skip ".."
+ */
+ continue;
+ else
+ dot_de = 1;
+ } else {
+ dot_de = 0;
+ }
+
+ if (dot_de && target->name[0] != '.') {
+ CDEBUG(D_READA, "%.*s skip hidden file %.*s\n",
+ target->len, target->name,
+ namelen, name);
+ continue;
+ }
+
+ if (target->len != namelen ||
+ memcmp(target->name, name, namelen) != 0)
+ rc = LS_NONE_FIRST_DE;
+ else if (!dot_de)
+ rc = LS_FIRST_DE;
+ else
+ rc = LS_FIRST_DOT_DE;
+
+ ll_release_page(page, 0);
+ GOTO(out, rc);
+ }
+ pos = le64_to_cpu(dp->ldp_hash_end);
+ if (pos == MDS_DIR_END_OFF) {
+ /*
+ * End of directory reached.
+ */
+ ll_release_page(page, 0);
+ break;
+ } else if (1) {
+ /*
+ * chain is exhausted
+ * Normal case: continue to the next page.
+ */
+ ll_release_page(page, le32_to_cpu(dp->ldp_flags) &
+ LDF_COLLIDE);
+ page = ll_get_dir_page(dir, pos, &chain);
+ } else {
+ /*
+ * go into overflow page.
+ */
+ LASSERT(le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
+ ll_release_page(page, 1);
+ }
+ }
+ EXIT;
+
+out:
+ ll_dir_chain_fini(&chain);
+ return rc;
+}
+
+static void
+ll_sai_unplug(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
+{
+ struct ptlrpc_thread *thread = &sai->sai_thread;
+ struct ll_sb_info *sbi = ll_i2sbi(sai->sai_inode);
+ int hit;
+ ENTRY;
+
+ if (entry != NULL && entry->se_stat == SA_ENTRY_SUCC)
+ hit = 1;
+ else
+ hit = 0;
+
+ ll_sa_entry_fini(sai, entry);
+ if (hit) {
+ sai->sai_hit++;
+ sai->sai_consecutive_miss = 0;
+ sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max);
+ } else {
+ struct ll_inode_info *lli = ll_i2info(sai->sai_inode);
+
+ sai->sai_miss++;
+ sai->sai_consecutive_miss++;
+ if (sa_low_hit(sai) && thread_is_running(thread)) {
+ atomic_inc(&sbi->ll_sa_wrong);
+ CDEBUG(D_READA, "Statahead for dir "DFID" hit "
+ "ratio too low: hit/miss "LPU64"/"LPU64
+ ", sent/replied "LPU64"/"LPU64", stopping "
+ "statahead thread: pid %d\n",
+ PFID(&lli->lli_fid), sai->sai_hit,
+ sai->sai_miss, sai->sai_sent,
+ sai->sai_replied, current_pid());
+ spin_lock(&lli->lli_sa_lock);
+ if (!thread_is_stopped(thread))
+ thread_set_flags(thread, SVC_STOPPING);
+ spin_unlock(&lli->lli_sa_lock);
+ }
+ }
+
+ if (!thread_is_stopped(thread))
+ wake_up(&thread->t_ctl_waitq);
+
+ EXIT;
+}
+
+/**
+ * Start statahead thread if this is the first dir entry.
+ * Otherwise if a thread is started already, wait it until it is ahead of me.
+ * \retval 1 -- find entry with lock in cache, the caller needs to do
+ * nothing.
+ * \retval 0 -- find entry in cache, but without lock, the caller needs
+ * refresh from MDS.
+ * \retval others -- the caller need to process as non-statahead.
+ */
+int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
+ int only_unplug)
+{
+ struct ll_inode_info *lli = ll_i2info(dir);
+ struct ll_statahead_info *sai = lli->lli_sai;
+ struct dentry *parent;
+ struct ll_sa_entry *entry;
+ struct ptlrpc_thread *thread;
+ struct l_wait_info lwi = { 0 };
+ int rc = 0;
+ struct ll_inode_info *plli;
+ ENTRY;
+
+ LASSERT(lli->lli_opendir_pid == current_pid());
+
+ if (sai) {
+ thread = &sai->sai_thread;
+ if (unlikely(thread_is_stopped(thread) &&
+ list_empty(&sai->sai_entries_stated))) {
+ /* to release resource */
+ ll_stop_statahead(dir, lli->lli_opendir_key);
+ RETURN(-EAGAIN);
+ }
+
+ if ((*dentryp)->d_name.name[0] == '.') {
+ if (sai->sai_ls_all ||
+ sai->sai_miss_hidden >= sai->sai_skip_hidden) {
+ /*
+ * Hidden dentry is the first one, or statahead
+ * thread does not skip so many hidden dentries
+ * before "sai_ls_all" enabled as below.
+ */
+ } else {
+ if (!sai->sai_ls_all)
+ /*
+ * It maybe because hidden dentry is not
+ * the first one, "sai_ls_all" was not
+ * set, then "ls -al" missed. Enable
+ * "sai_ls_all" for such case.
+ */
+ sai->sai_ls_all = 1;
+
+ /*
+ * Such "getattr" has been skipped before
+ * "sai_ls_all" enabled as above.
+ */
+ sai->sai_miss_hidden++;
+ RETURN(-EAGAIN);
+ }
+ }
+
+ entry = ll_sa_entry_get_byname(sai, &(*dentryp)->d_name);
+ if (entry == NULL || only_unplug) {
+ ll_sai_unplug(sai, entry);
+ RETURN(entry ? 1 : -EAGAIN);
+ }
+
+ /* if statahead is busy in readdir, help it do post-work */
+ while (!ll_sa_entry_stated(entry) &&
+ sai->sai_in_readpage &&
+ !sa_received_empty(sai))
+ ll_post_statahead(sai);
+
+ if (!ll_sa_entry_stated(entry)) {
+ sai->sai_index_wait = entry->se_index;
+ lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(30), NULL,
+ LWI_ON_SIGNAL_NOOP, NULL);
+ rc = l_wait_event(sai->sai_waitq,
+ ll_sa_entry_stated(entry) ||
+ thread_is_stopped(thread),
+ &lwi);
+ if (rc < 0) {
+ ll_sai_unplug(sai, entry);
+ RETURN(-EAGAIN);
+ }
+ }
+
+ if (entry->se_stat == SA_ENTRY_SUCC &&
+ entry->se_inode != NULL) {
+ struct inode *inode = entry->se_inode;
+ struct lookup_intent it = { .it_op = IT_GETATTR,
+ .d.lustre.it_lock_handle =
+ entry->se_handle };
+ __u64 bits;
+
+ rc = md_revalidate_lock(ll_i2mdexp(dir), &it,
+ ll_inode2fid(inode), &bits);
+ if (rc == 1) {
+ if ((*dentryp)->d_inode == NULL) {
+ *dentryp = ll_splice_alias(inode,
+ *dentryp);
+ } else if ((*dentryp)->d_inode != inode) {
+ /* revalidate, but inode is recreated */
+ CDEBUG(D_READA,
+ "stale dentry %.*s inode %lu/%u, "
+ "statahead inode %lu/%u\n",
+ (*dentryp)->d_name.len,
+ (*dentryp)->d_name.name,
+ (*dentryp)->d_inode->i_ino,
+ (*dentryp)->d_inode->i_generation,
+ inode->i_ino,
+ inode->i_generation);
+ ll_sai_unplug(sai, entry);
+ RETURN(-ESTALE);
+ } else {
+ iput(inode);
+ }
+ entry->se_inode = NULL;
+
+ if ((bits & MDS_INODELOCK_LOOKUP) &&
+ d_lustre_invalid(*dentryp))
+ d_lustre_revalidate(*dentryp);
+ ll_intent_release(&it);
+ }
+ }
+
+ ll_sai_unplug(sai, entry);
+ RETURN(rc);
+ }
+
+ /* I am the "lli_opendir_pid" owner, only me can set "lli_sai". */
+ rc = is_first_dirent(dir, *dentryp);
+ if (rc == LS_NONE_FIRST_DE)
+ /* It is not "ls -{a}l" operation, no need statahead for it. */
+ GOTO(out, rc = -EAGAIN);
+
+ sai = ll_sai_alloc();
+ if (sai == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ sai->sai_ls_all = (rc == LS_FIRST_DOT_DE);
+ sai->sai_inode = igrab(dir);
+ if (unlikely(sai->sai_inode == NULL)) {
+ CWARN("Do not start stat ahead on dying inode "DFID"\n",
+ PFID(&lli->lli_fid));
+ GOTO(out, rc = -ESTALE);
+ }
+
+ /* get parent reference count here, and put it in ll_statahead_thread */
+ parent = dget((*dentryp)->d_parent);
+ if (unlikely(sai->sai_inode != parent->d_inode)) {
+ struct ll_inode_info *nlli = ll_i2info(parent->d_inode);
+
+ CWARN("Race condition, someone changed %.*s just now: "
+ "old parent "DFID", new parent "DFID"\n",
+ (*dentryp)->d_name.len, (*dentryp)->d_name.name,
+ PFID(&lli->lli_fid), PFID(&nlli->lli_fid));
+ dput(parent);
+ iput(sai->sai_inode);
+ GOTO(out, rc = -EAGAIN);
+ }
+
+ CDEBUG(D_READA, "start statahead thread: [pid %d] [parent %.*s]\n",
+ current_pid(), parent->d_name.len, parent->d_name.name);
+
+ lli->lli_sai = sai;
+
+ plli = ll_i2info(parent->d_inode);
+ rc = PTR_ERR(kthread_run(ll_statahead_thread, parent,
+ "ll_sa_%u", plli->lli_opendir_pid));
+ thread = &sai->sai_thread;
+ if (IS_ERR_VALUE(rc)) {
+ CERROR("can't start ll_sa thread, rc: %d\n", rc);
+ dput(parent);
+ lli->lli_opendir_key = NULL;
+ thread_set_flags(thread, SVC_STOPPED);
+ thread_set_flags(&sai->sai_agl_thread, SVC_STOPPED);
+ ll_sai_put(sai);
+ LASSERT(lli->lli_sai == NULL);
+ RETURN(-EAGAIN);
+ }
+
+ l_wait_event(thread->t_ctl_waitq,
+ thread_is_running(thread) || thread_is_stopped(thread),
+ &lwi);
+
+ /*
+ * We don't stat-ahead for the first dirent since we are already in
+ * lookup.
+ */
+ RETURN(-EAGAIN);
+
+out:
+ if (sai != NULL)
+ OBD_FREE_PTR(sai);
+ spin_lock(&lli->lli_sa_lock);
+ lli->lli_opendir_key = NULL;
+ lli->lli_opendir_pid = 0;
+ spin_unlock(&lli->lli_sa_lock);
+ return rc;
+}
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c
new file mode 100644
index 000000000000..82c14a993cca
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/super25.c
@@ -0,0 +1,226 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <lustre_lite.h>
+#include <lustre_ha.h>
+#include <lustre_dlm.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <lprocfs_status.h>
+#include "llite_internal.h"
+
+static struct kmem_cache *ll_inode_cachep;
+
+static struct inode *ll_alloc_inode(struct super_block *sb)
+{
+ struct ll_inode_info *lli;
+ ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_ALLOC_INODE, 1);
+ OBD_SLAB_ALLOC_PTR_GFP(lli, ll_inode_cachep, __GFP_IO);
+ if (lli == NULL)
+ return NULL;
+
+ inode_init_once(&lli->lli_vfs_inode);
+ return &lli->lli_vfs_inode;
+}
+
+static void ll_inode_destroy_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ll_inode_info *ptr = ll_i2info(inode);
+ OBD_SLAB_FREE_PTR(ptr, ll_inode_cachep);
+}
+
+static void ll_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, ll_inode_destroy_callback);
+}
+
+int ll_init_inodecache(void)
+{
+ ll_inode_cachep = kmem_cache_create("lustre_inode_cache",
+ sizeof(struct ll_inode_info),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (ll_inode_cachep == NULL)
+ return -ENOMEM;
+ return 0;
+}
+
+void ll_destroy_inodecache(void)
+{
+ kmem_cache_destroy(ll_inode_cachep);
+}
+
+/* exported operations */
+struct super_operations lustre_super_operations =
+{
+ .alloc_inode = ll_alloc_inode,
+ .destroy_inode = ll_destroy_inode,
+ .evict_inode = ll_delete_inode,
+ .put_super = ll_put_super,
+ .statfs = ll_statfs,
+ .umount_begin = ll_umount_begin,
+ .remount_fs = ll_remount_fs,
+ .show_options = ll_show_options,
+};
+MODULE_ALIAS_FS("lustre");
+
+void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg));
+
+int vvp_global_init(void);
+void vvp_global_fini(void);
+
+static int __init init_lustre_lite(void)
+{
+ int i, rc, seed[2];
+ struct timeval tv;
+ lnet_process_id_t lnet_id;
+
+ CLASSERT(sizeof(LUSTRE_VOLATILE_HDR) == LUSTRE_VOLATILE_HDR_LEN + 1);
+
+ /* print an address of _any_ initialized kernel symbol from this
+ * module, to allow debugging with gdb that doesn't support data
+ * symbols from modules.*/
+ CDEBUG(D_INFO, "Lustre client module (%p).\n",
+ &lustre_super_operations);
+
+ rc = ll_init_inodecache();
+ if (rc)
+ return -ENOMEM;
+ ll_file_data_slab = kmem_cache_create("ll_file_data",
+ sizeof(struct ll_file_data), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (ll_file_data_slab == NULL) {
+ ll_destroy_inodecache();
+ return -ENOMEM;
+ }
+
+ ll_remote_perm_cachep = kmem_cache_create("ll_remote_perm_cache",
+ sizeof(struct ll_remote_perm),
+ 0, 0, NULL);
+ if (ll_remote_perm_cachep == NULL) {
+ kmem_cache_destroy(ll_file_data_slab);
+ ll_file_data_slab = NULL;
+ ll_destroy_inodecache();
+ return -ENOMEM;
+ }
+
+ ll_rmtperm_hash_cachep = kmem_cache_create("ll_rmtperm_hash_cache",
+ REMOTE_PERM_HASHSIZE *
+ sizeof(struct list_head),
+ 0, 0, NULL);
+ if (ll_rmtperm_hash_cachep == NULL) {
+ kmem_cache_destroy(ll_remote_perm_cachep);
+ ll_remote_perm_cachep = NULL;
+ kmem_cache_destroy(ll_file_data_slab);
+ ll_file_data_slab = NULL;
+ ll_destroy_inodecache();
+ return -ENOMEM;
+ }
+
+ proc_lustre_fs_root = proc_lustre_root ?
+ lprocfs_register("llite", proc_lustre_root, NULL, NULL) : NULL;
+
+ lustre_register_client_fill_super(ll_fill_super);
+ lustre_register_kill_super_cb(ll_kill_super);
+
+ lustre_register_client_process_config(ll_process_config);
+
+ cfs_get_random_bytes(seed, sizeof(seed));
+
+ /* Nodes with small feet have little entropy
+ * the NID for this node gives the most entropy in the low bits */
+ for (i=0; ; i++) {
+ if (LNetGetId(i, &lnet_id) == -ENOENT) {
+ break;
+ }
+ if (LNET_NETTYP(LNET_NIDNET(lnet_id.nid)) != LOLND) {
+ seed[0] ^= LNET_NIDADDR(lnet_id.nid);
+ }
+ }
+
+ do_gettimeofday(&tv);
+ cfs_srand(tv.tv_sec ^ seed[0], tv.tv_usec ^ seed[1]);
+
+ init_timer(&ll_capa_timer);
+ ll_capa_timer.function = ll_capa_timer_callback;
+ rc = ll_capa_thread_start();
+ /*
+ * XXX normal cleanup is needed here.
+ */
+ if (rc == 0)
+ rc = vvp_global_init();
+
+ return rc;
+}
+
+static void __exit exit_lustre_lite(void)
+{
+ vvp_global_fini();
+ del_timer(&ll_capa_timer);
+ ll_capa_thread_stop();
+ LASSERTF(capa_count[CAPA_SITE_CLIENT] == 0,
+ "client remaining capa count %d\n",
+ capa_count[CAPA_SITE_CLIENT]);
+
+ lustre_register_client_fill_super(NULL);
+ lustre_register_kill_super_cb(NULL);
+
+ lustre_register_client_process_config(NULL);
+
+ ll_destroy_inodecache();
+
+ kmem_cache_destroy(ll_rmtperm_hash_cachep);
+ ll_rmtperm_hash_cachep = NULL;
+
+ kmem_cache_destroy(ll_remote_perm_cachep);
+ ll_remote_perm_cachep = NULL;
+
+ kmem_cache_destroy(ll_file_data_slab);
+ if (proc_lustre_fs_root)
+ lprocfs_remove(&proc_lustre_fs_root);
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Lite Client File System");
+MODULE_LICENSE("GPL");
+
+module_init(init_lustre_lite);
+module_exit(exit_lustre_lite);
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
new file mode 100644
index 000000000000..5260e989a4e5
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/symlink.c
@@ -0,0 +1,192 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/stat.h>
+#include <linux/version.h>
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <lustre_lite.h>
+#include "llite_internal.h"
+
+static int ll_readlink_internal(struct inode *inode,
+ struct ptlrpc_request **request, char **symname)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ int rc, symlen = i_size_read(inode) + 1;
+ struct mdt_body *body;
+ struct md_op_data *op_data;
+ ENTRY;
+
+ *request = NULL;
+
+ if (lli->lli_symlink_name) {
+ int print_limit = min_t(int, PAGE_SIZE - 128, symlen);
+
+ *symname = lli->lli_symlink_name;
+ /* If the total CDEBUG() size is larger than a page, it
+ * will print a warning to the console, avoid this by
+ * printing just the last part of the symlink. */
+ CDEBUG(D_INODE, "using cached symlink %s%.*s, len = %d\n",
+ print_limit < symlen ? "..." : "", print_limit,
+ (*symname) + symlen - print_limit, symlen);
+ RETURN(0);
+ }
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, symlen,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ op_data->op_valid = OBD_MD_LINKNAME;
+ rc = md_getattr(sbi->ll_md_exp, op_data, request);
+ ll_finish_md_op_data(op_data);
+ if (rc) {
+ if (rc != -ENOENT)
+ CERROR("inode %lu: rc = %d\n", inode->i_ino, rc);
+ GOTO (failed, rc);
+ }
+
+ body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
+ LASSERT(body != NULL);
+ if ((body->valid & OBD_MD_LINKNAME) == 0) {
+ CERROR("OBD_MD_LINKNAME not set on reply\n");
+ GOTO(failed, rc = -EPROTO);
+ }
+
+ LASSERT(symlen != 0);
+ if (body->eadatasize != symlen) {
+ CERROR("inode %lu: symlink length %d not expected %d\n",
+ inode->i_ino, body->eadatasize - 1, symlen - 1);
+ GOTO(failed, rc = -EPROTO);
+ }
+
+ *symname = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_MD);
+ if (*symname == NULL ||
+ strnlen(*symname, symlen) != symlen - 1) {
+ /* not full/NULL terminated */
+ CERROR("inode %lu: symlink not NULL terminated string"
+ "of length %d\n", inode->i_ino, symlen - 1);
+ GOTO(failed, rc = -EPROTO);
+ }
+
+ OBD_ALLOC(lli->lli_symlink_name, symlen);
+ /* do not return an error if we cannot cache the symlink locally */
+ if (lli->lli_symlink_name) {
+ memcpy(lli->lli_symlink_name, *symname, symlen);
+ *symname = lli->lli_symlink_name;
+ }
+ RETURN(0);
+
+failed:
+ RETURN (rc);
+}
+
+static int ll_readlink(struct dentry *dentry, char *buffer, int buflen)
+{
+ struct inode *inode = dentry->d_inode;
+ struct ptlrpc_request *request;
+ char *symname;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
+
+ ll_inode_size_lock(inode);
+ rc = ll_readlink_internal(inode, &request, &symname);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = vfs_readlink(dentry, buffer, buflen, symname);
+ out:
+ ptlrpc_req_finished(request);
+ ll_inode_size_unlock(inode);
+ RETURN(rc);
+}
+
+static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct inode *inode = dentry->d_inode;
+ struct ptlrpc_request *request = NULL;
+ int rc;
+ char *symname;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
+ /* Limit the recursive symlink depth to 5 instead of default
+ * 8 links when kernel has 4k stack to prevent stack overflow.
+ * For 8k stacks we need to limit it to 7 for local servers. */
+ if (THREAD_SIZE < 8192 && current->link_count >= 6) {
+ rc = -ELOOP;
+ } else if (THREAD_SIZE == 8192 && current->link_count >= 8) {
+ rc = -ELOOP;
+ } else {
+ ll_inode_size_lock(inode);
+ rc = ll_readlink_internal(inode, &request, &symname);
+ ll_inode_size_unlock(inode);
+ }
+ if (rc) {
+ ptlrpc_req_finished(request);
+ request = NULL;
+ symname = ERR_PTR(rc);
+ }
+
+ nd_set_link(nd, symname);
+ /* symname may contain a pointer to the request message buffer,
+ * we delay request releasing until ll_put_link then.
+ */
+ RETURN(request);
+}
+
+static void ll_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
+{
+ ptlrpc_req_finished(cookie);
+}
+
+struct inode_operations ll_fast_symlink_inode_operations = {
+ .readlink = ll_readlink,
+ .setattr = ll_setattr,
+ .follow_link = ll_follow_link,
+ .put_link = ll_put_link,
+ .getattr = ll_getattr,
+ .permission = ll_inode_permission,
+ .setxattr = ll_setxattr,
+ .getxattr = ll_getxattr,
+ .listxattr = ll_listxattr,
+ .removexattr = ll_removexattr,
+};
diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
new file mode 100644
index 000000000000..9254b990d31c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c
@@ -0,0 +1,546 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * cl_device and cl_device_type implementation for VVP layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+
+#include <obd.h>
+#include <lustre_lite.h>
+
+#include "vvp_internal.h"
+
+/*****************************************************************************
+ *
+ * Vvp device and device type functions.
+ *
+ */
+
+/*
+ * vvp_ prefix stands for "Vfs Vm Posix". It corresponds to historical
+ * "llite_" (var. "ll_") prefix.
+ */
+
+struct kmem_cache *vvp_thread_kmem;
+static struct kmem_cache *vvp_session_kmem;
+static struct lu_kmem_descr vvp_caches[] = {
+ {
+ .ckd_cache = &vvp_thread_kmem,
+ .ckd_name = "vvp_thread_kmem",
+ .ckd_size = sizeof (struct vvp_thread_info),
+ },
+ {
+ .ckd_cache = &vvp_session_kmem,
+ .ckd_name = "vvp_session_kmem",
+ .ckd_size = sizeof (struct vvp_session)
+ },
+ {
+ .ckd_cache = NULL
+ }
+};
+
+static void *vvp_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct vvp_thread_info *info;
+
+ OBD_SLAB_ALLOC_PTR_GFP(info, vvp_thread_kmem, __GFP_IO);
+ if (info == NULL)
+ info = ERR_PTR(-ENOMEM);
+ return info;
+}
+
+static void vvp_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct vvp_thread_info *info = data;
+ OBD_SLAB_FREE_PTR(info, vvp_thread_kmem);
+}
+
+static void *vvp_session_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct vvp_session *session;
+
+ OBD_SLAB_ALLOC_PTR_GFP(session, vvp_session_kmem, __GFP_IO);
+ if (session == NULL)
+ session = ERR_PTR(-ENOMEM);
+ return session;
+}
+
+static void vvp_session_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct vvp_session *session = data;
+ OBD_SLAB_FREE_PTR(session, vvp_session_kmem);
+}
+
+
+struct lu_context_key vvp_key = {
+ .lct_tags = LCT_CL_THREAD,
+ .lct_init = vvp_key_init,
+ .lct_fini = vvp_key_fini
+};
+
+struct lu_context_key vvp_session_key = {
+ .lct_tags = LCT_SESSION,
+ .lct_init = vvp_session_key_init,
+ .lct_fini = vvp_session_key_fini
+};
+
+/* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */
+LU_TYPE_INIT_FINI(vvp, &ccc_key, &ccc_session_key, &vvp_key, &vvp_session_key);
+
+static const struct lu_device_operations vvp_lu_ops = {
+ .ldo_object_alloc = vvp_object_alloc
+};
+
+static const struct cl_device_operations vvp_cl_ops = {
+ .cdo_req_init = ccc_req_init
+};
+
+static struct lu_device *vvp_device_alloc(const struct lu_env *env,
+ struct lu_device_type *t,
+ struct lustre_cfg *cfg)
+{
+ return ccc_device_alloc(env, t, cfg, &vvp_lu_ops, &vvp_cl_ops);
+}
+
+static const struct lu_device_type_operations vvp_device_type_ops = {
+ .ldto_init = vvp_type_init,
+ .ldto_fini = vvp_type_fini,
+
+ .ldto_start = vvp_type_start,
+ .ldto_stop = vvp_type_stop,
+
+ .ldto_device_alloc = vvp_device_alloc,
+ .ldto_device_free = ccc_device_free,
+ .ldto_device_init = ccc_device_init,
+ .ldto_device_fini = ccc_device_fini
+};
+
+struct lu_device_type vvp_device_type = {
+ .ldt_tags = LU_DEVICE_CL,
+ .ldt_name = LUSTRE_VVP_NAME,
+ .ldt_ops = &vvp_device_type_ops,
+ .ldt_ctx_tags = LCT_CL_THREAD
+};
+
+/**
+ * A mutex serializing calls to vvp_inode_fini() under extreme memory
+ * pressure, when environments cannot be allocated.
+ */
+int vvp_global_init(void)
+{
+ int result;
+
+ result = lu_kmem_init(vvp_caches);
+ if (result == 0) {
+ result = ccc_global_init(&vvp_device_type);
+ if (result != 0)
+ lu_kmem_fini(vvp_caches);
+ }
+ return result;
+}
+
+void vvp_global_fini(void)
+{
+ ccc_global_fini(&vvp_device_type);
+ lu_kmem_fini(vvp_caches);
+}
+
+
+/*****************************************************************************
+ *
+ * mirror obd-devices into cl devices.
+ *
+ */
+
+int cl_sb_init(struct super_block *sb)
+{
+ struct ll_sb_info *sbi;
+ struct cl_device *cl;
+ struct lu_env *env;
+ int rc = 0;
+ int refcheck;
+
+ sbi = ll_s2sbi(sb);
+ env = cl_env_get(&refcheck);
+ if (!IS_ERR(env)) {
+ cl = cl_type_setup(env, NULL, &vvp_device_type,
+ sbi->ll_dt_exp->exp_obd->obd_lu_dev);
+ if (!IS_ERR(cl)) {
+ cl2ccc_dev(cl)->cdv_sb = sb;
+ sbi->ll_cl = cl;
+ sbi->ll_site = cl2lu_dev(cl)->ld_site;
+ }
+ cl_env_put(env, &refcheck);
+ } else
+ rc = PTR_ERR(env);
+ RETURN(rc);
+}
+
+int cl_sb_fini(struct super_block *sb)
+{
+ struct ll_sb_info *sbi;
+ struct lu_env *env;
+ struct cl_device *cld;
+ int refcheck;
+ int result;
+
+ ENTRY;
+ sbi = ll_s2sbi(sb);
+ env = cl_env_get(&refcheck);
+ if (!IS_ERR(env)) {
+ cld = sbi->ll_cl;
+
+ if (cld != NULL) {
+ cl_stack_fini(env, cld);
+ sbi->ll_cl = NULL;
+ sbi->ll_site = NULL;
+ }
+ cl_env_put(env, &refcheck);
+ result = 0;
+ } else {
+ CERROR("Cannot cleanup cl-stack due to memory shortage.\n");
+ result = PTR_ERR(env);
+ }
+ /*
+ * If mount failed (sbi->ll_cl == NULL), and this there are no other
+ * mounts, stop device types manually (this usually happens
+ * automatically when last device is destroyed).
+ */
+ lu_types_stop();
+ RETURN(result);
+}
+
+/****************************************************************************
+ *
+ * /proc/fs/lustre/llite/$MNT/dump_page_cache
+ *
+ ****************************************************************************/
+
+/*
+ * To represent contents of a page cache as a byte stream, following
+ * information if encoded in 64bit offset:
+ *
+ * - file hash bucket in lu_site::ls_hash[] 28bits
+ *
+ * - how far file is from bucket head 4bits
+ *
+ * - page index 32bits
+ *
+ * First two data identify a file in the cache uniquely.
+ */
+
+#define PGC_OBJ_SHIFT (32 + 4)
+#define PGC_DEPTH_SHIFT (32)
+
+struct vvp_pgcache_id {
+ unsigned vpi_bucket;
+ unsigned vpi_depth;
+ uint32_t vpi_index;
+
+ unsigned vpi_curdep;
+ struct lu_object_header *vpi_obj;
+};
+
+static void vvp_pgcache_id_unpack(loff_t pos, struct vvp_pgcache_id *id)
+{
+ CLASSERT(sizeof(pos) == sizeof(__u64));
+
+ id->vpi_index = pos & 0xffffffff;
+ id->vpi_depth = (pos >> PGC_DEPTH_SHIFT) & 0xf;
+ id->vpi_bucket = ((unsigned long long)pos >> PGC_OBJ_SHIFT);
+}
+
+static loff_t vvp_pgcache_id_pack(struct vvp_pgcache_id *id)
+{
+ return
+ ((__u64)id->vpi_index) |
+ ((__u64)id->vpi_depth << PGC_DEPTH_SHIFT) |
+ ((__u64)id->vpi_bucket << PGC_OBJ_SHIFT);
+}
+
+static int vvp_pgcache_obj_get(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *data)
+{
+ struct vvp_pgcache_id *id = data;
+ struct lu_object_header *hdr = cfs_hash_object(hs, hnode);
+
+ if (id->vpi_curdep-- > 0)
+ return 0; /* continue */
+
+ if (lu_object_is_dying(hdr))
+ return 1;
+
+ cfs_hash_get(hs, hnode);
+ id->vpi_obj = hdr;
+ return 1;
+}
+
+static struct cl_object *vvp_pgcache_obj(const struct lu_env *env,
+ struct lu_device *dev,
+ struct vvp_pgcache_id *id)
+{
+ LASSERT(lu_device_is_cl(dev));
+
+ id->vpi_depth &= 0xf;
+ id->vpi_obj = NULL;
+ id->vpi_curdep = id->vpi_depth;
+
+ cfs_hash_hlist_for_each(dev->ld_site->ls_obj_hash, id->vpi_bucket,
+ vvp_pgcache_obj_get, id);
+ if (id->vpi_obj != NULL) {
+ struct lu_object *lu_obj;
+
+ lu_obj = lu_object_locate(id->vpi_obj, dev->ld_type);
+ if (lu_obj != NULL) {
+ lu_object_ref_add(lu_obj, "dump", current);
+ return lu2cl(lu_obj);
+ }
+ lu_object_put(env, lu_object_top(id->vpi_obj));
+
+ } else if (id->vpi_curdep > 0) {
+ id->vpi_depth = 0xf;
+ }
+ return NULL;
+}
+
+static loff_t vvp_pgcache_find(const struct lu_env *env,
+ struct lu_device *dev, loff_t pos)
+{
+ struct cl_object *clob;
+ struct lu_site *site;
+ struct vvp_pgcache_id id;
+
+ site = dev->ld_site;
+ vvp_pgcache_id_unpack(pos, &id);
+
+ while (1) {
+ if (id.vpi_bucket >= CFS_HASH_NHLIST(site->ls_obj_hash))
+ return ~0ULL;
+ clob = vvp_pgcache_obj(env, dev, &id);
+ if (clob != NULL) {
+ struct cl_object_header *hdr;
+ int nr;
+ struct cl_page *pg;
+
+ /* got an object. Find next page. */
+ hdr = cl_object_header(clob);
+
+ spin_lock(&hdr->coh_page_guard);
+ nr = radix_tree_gang_lookup(&hdr->coh_tree,
+ (void **)&pg,
+ id.vpi_index, 1);
+ if (nr > 0) {
+ id.vpi_index = pg->cp_index;
+ /* Cant support over 16T file */
+ nr = !(pg->cp_index > 0xffffffff);
+ }
+ spin_unlock(&hdr->coh_page_guard);
+
+ lu_object_ref_del(&clob->co_lu, "dump", current);
+ cl_object_put(env, clob);
+ if (nr > 0)
+ return vvp_pgcache_id_pack(&id);
+ }
+ /* to the next object. */
+ ++id.vpi_depth;
+ id.vpi_depth &= 0xf;
+ if (id.vpi_depth == 0 && ++id.vpi_bucket == 0)
+ return ~0ULL;
+ id.vpi_index = 0;
+ }
+}
+
+#define seq_page_flag(seq, page, flag, has_flags) do { \
+ if (test_bit(PG_##flag, &(page)->flags)) { \
+ seq_printf(seq, "%s"#flag, has_flags ? "|" : ""); \
+ has_flags = 1; \
+ } \
+} while(0)
+
+static void vvp_pgcache_page_show(const struct lu_env *env,
+ struct seq_file *seq, struct cl_page *page)
+{
+ struct ccc_page *cpg;
+ struct page *vmpage;
+ int has_flags;
+
+ cpg = cl2ccc_page(cl_page_at(page, &vvp_device_type));
+ vmpage = cpg->cpg_page;
+ seq_printf(seq," %5i | %p %p %s %s %s %s | %p %lu/%u(%p) %lu %u [",
+ 0 /* gen */,
+ cpg, page,
+ "none",
+ cpg->cpg_write_queued ? "wq" : "- ",
+ cpg->cpg_defer_uptodate ? "du" : "- ",
+ PageWriteback(vmpage) ? "wb" : "-",
+ vmpage, vmpage->mapping->host->i_ino,
+ vmpage->mapping->host->i_generation,
+ vmpage->mapping->host, vmpage->index,
+ page_count(vmpage));
+ has_flags = 0;
+ seq_page_flag(seq, vmpage, locked, has_flags);
+ seq_page_flag(seq, vmpage, error, has_flags);
+ seq_page_flag(seq, vmpage, referenced, has_flags);
+ seq_page_flag(seq, vmpage, uptodate, has_flags);
+ seq_page_flag(seq, vmpage, dirty, has_flags);
+ seq_page_flag(seq, vmpage, writeback, has_flags);
+ seq_printf(seq, "%s]\n", has_flags ? "" : "-");
+}
+
+static int vvp_pgcache_show(struct seq_file *f, void *v)
+{
+ loff_t pos;
+ struct ll_sb_info *sbi;
+ struct cl_object *clob;
+ struct lu_env *env;
+ struct cl_page *page;
+ struct cl_object_header *hdr;
+ struct vvp_pgcache_id id;
+ int refcheck;
+ int result;
+
+ env = cl_env_get(&refcheck);
+ if (!IS_ERR(env)) {
+ pos = *(loff_t *) v;
+ vvp_pgcache_id_unpack(pos, &id);
+ sbi = f->private;
+ clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id);
+ if (clob != NULL) {
+ hdr = cl_object_header(clob);
+
+ spin_lock(&hdr->coh_page_guard);
+ page = cl_page_lookup(hdr, id.vpi_index);
+ spin_unlock(&hdr->coh_page_guard);
+
+ seq_printf(f, "%8x@"DFID": ",
+ id.vpi_index, PFID(&hdr->coh_lu.loh_fid));
+ if (page != NULL) {
+ vvp_pgcache_page_show(env, f, page);
+ cl_page_put(env, page);
+ } else
+ seq_puts(f, "missing\n");
+ lu_object_ref_del(&clob->co_lu, "dump", current);
+ cl_object_put(env, clob);
+ } else
+ seq_printf(f, "%llx missing\n", pos);
+ cl_env_put(env, &refcheck);
+ result = 0;
+ } else
+ result = PTR_ERR(env);
+ return result;
+}
+
+static void *vvp_pgcache_start(struct seq_file *f, loff_t *pos)
+{
+ struct ll_sb_info *sbi;
+ struct lu_env *env;
+ int refcheck;
+
+ sbi = f->private;
+
+ env = cl_env_get(&refcheck);
+ if (!IS_ERR(env)) {
+ sbi = f->private;
+ if (sbi->ll_site->ls_obj_hash->hs_cur_bits > 64 - PGC_OBJ_SHIFT)
+ pos = ERR_PTR(-EFBIG);
+ else {
+ *pos = vvp_pgcache_find(env, &sbi->ll_cl->cd_lu_dev,
+ *pos);
+ if (*pos == ~0ULL)
+ pos = NULL;
+ }
+ cl_env_put(env, &refcheck);
+ }
+ return pos;
+}
+
+static void *vvp_pgcache_next(struct seq_file *f, void *v, loff_t *pos)
+{
+ struct ll_sb_info *sbi;
+ struct lu_env *env;
+ int refcheck;
+
+ env = cl_env_get(&refcheck);
+ if (!IS_ERR(env)) {
+ sbi = f->private;
+ *pos = vvp_pgcache_find(env, &sbi->ll_cl->cd_lu_dev, *pos + 1);
+ if (*pos == ~0ULL)
+ pos = NULL;
+ cl_env_put(env, &refcheck);
+ }
+ return pos;
+}
+
+static void vvp_pgcache_stop(struct seq_file *f, void *v)
+{
+ /* Nothing to do */
+}
+
+static struct seq_operations vvp_pgcache_ops = {
+ .start = vvp_pgcache_start,
+ .next = vvp_pgcache_next,
+ .stop = vvp_pgcache_stop,
+ .show = vvp_pgcache_show
+};
+
+static int vvp_dump_pgcache_seq_open(struct inode *inode, struct file *filp)
+{
+ struct ll_sb_info *sbi = PDE_DATA(inode);
+ struct seq_file *seq;
+ int result;
+
+ result = seq_open(filp, &vvp_pgcache_ops);
+ if (result == 0) {
+ seq = filp->private_data;
+ seq->private = sbi;
+ }
+ return result;
+}
+
+struct file_operations vvp_dump_pgcache_file_ops = {
+ .owner = THIS_MODULE,
+ .open = vvp_dump_pgcache_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
new file mode 100644
index 000000000000..c82bf17f55a6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h
@@ -0,0 +1,62 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Internal definitions for VVP layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#ifndef VVP_INTERNAL_H
+#define VVP_INTERNAL_H
+
+
+#include <cl_object.h>
+#include "llite_internal.h"
+
+int vvp_io_init (const struct lu_env *env,
+ struct cl_object *obj, struct cl_io *io);
+int vvp_lock_init (const struct lu_env *env,
+ struct cl_object *obj, struct cl_lock *lock,
+ const struct cl_io *io);
+int vvp_page_init (const struct lu_env *env,
+ struct cl_object *obj,
+ struct cl_page *page, struct page *vmpage);
+struct lu_object *vvp_object_alloc(const struct lu_env *env,
+ const struct lu_object_header *hdr,
+ struct lu_device *dev);
+
+struct ccc_object *cl_inode2ccc(struct inode *inode);
+
+extern struct kmem_cache *vvp_thread_kmem;
+
+#endif /* VVP_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
new file mode 100644
index 000000000000..eb964acad45c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -0,0 +1,1186 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_io for VVP layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+
+#include <obd.h>
+#include <lustre_lite.h>
+
+#include "vvp_internal.h"
+
+static struct vvp_io *cl2vvp_io(const struct lu_env *env,
+ const struct cl_io_slice *slice);
+
+/**
+ * True, if \a io is a normal io, False for sendfile() / splice_{read|write}
+ */
+int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
+{
+ struct vvp_io *vio = vvp_env_io(env);
+
+ LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
+
+ return vio->cui_io_subtype == IO_NORMAL;
+}
+
+/**
+ * For swapping layout. The file's layout may have changed.
+ * To avoid populating pages to a wrong stripe, we have to verify the
+ * correctness of layout. It works because swapping layout processes
+ * have to acquire group lock.
+ */
+static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
+ struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ccc_io *cio = ccc_env_io(env);
+ bool rc = true;
+
+ switch (io->ci_type) {
+ case CIT_READ:
+ case CIT_WRITE:
+ /* don't need lock here to check lli_layout_gen as we have held
+ * extent lock and GROUP lock has to hold to swap layout */
+ if (lli->lli_layout_gen != cio->cui_layout_gen) {
+ io->ci_need_restart = 1;
+ /* this will return application a short read/write */
+ io->ci_continue = 0;
+ rc = false;
+ }
+ case CIT_FAULT:
+ /* fault is okay because we've already had a page. */
+ default:
+ break;
+ }
+
+ return rc;
+}
+
+/*****************************************************************************
+ *
+ * io operations.
+ *
+ */
+
+static int vvp_io_fault_iter_init(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct vvp_io *vio = cl2vvp_io(env, ios);
+ struct inode *inode = ccc_object_inode(ios->cis_obj);
+
+ LASSERT(inode ==
+ cl2ccc_io(env, ios)->cui_fd->fd_file->f_dentry->d_inode);
+ vio->u.fault.ft_mtime = LTIME_S(inode->i_mtime);
+ return 0;
+}
+
+static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
+{
+ struct cl_io *io = ios->cis_io;
+ struct cl_object *obj = io->ci_obj;
+ struct ccc_io *cio = cl2ccc_io(env, ios);
+
+ CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+
+ CDEBUG(D_VFSTRACE, "ignore/verify layout %d/%d, layout version %d.\n",
+ io->ci_ignore_layout, io->ci_verify_layout, cio->cui_layout_gen);
+
+ if (!io->ci_ignore_layout && io->ci_verify_layout) {
+ __u32 gen = 0;
+
+ /* check layout version */
+ ll_layout_refresh(ccc_object_inode(obj), &gen);
+ io->ci_need_restart = cio->cui_layout_gen != gen;
+ if (io->ci_need_restart)
+ CDEBUG(D_VFSTRACE, "layout changed from %d to %d.\n",
+ cio->cui_layout_gen, gen);
+ }
+}
+
+static void vvp_io_fault_fini(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct cl_io *io = ios->cis_io;
+ struct cl_page *page = io->u.ci_fault.ft_page;
+
+ CLOBINVRNT(env, io->ci_obj, ccc_object_invariant(io->ci_obj));
+
+ if (page != NULL) {
+ lu_ref_del(&page->cp_reference, "fault", io);
+ cl_page_put(env, page);
+ io->u.ci_fault.ft_page = NULL;
+ }
+ vvp_io_fini(env, ios);
+}
+
+enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma)
+{
+ /*
+ * we only want to hold PW locks if the mmap() can generate
+ * writes back to the file and that only happens in shared
+ * writable vmas
+ */
+ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+ return CLM_WRITE;
+ return CLM_READ;
+}
+
+static int vvp_mmap_locks(const struct lu_env *env,
+ struct ccc_io *vio, struct cl_io *io)
+{
+ struct ccc_thread_info *cti = ccc_env_info(env);
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct cl_lock_descr *descr = &cti->cti_descr;
+ ldlm_policy_data_t policy;
+ unsigned long addr;
+ unsigned long seg;
+ ssize_t count;
+ int result;
+ ENTRY;
+
+ LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
+
+ if (!cl_is_normalio(env, io))
+ RETURN(0);
+
+ if (vio->cui_iov == NULL) /* nfs or loop back device write */
+ RETURN(0);
+
+ /* No MM (e.g. NFS)? No vmas too. */
+ if (mm == NULL)
+ RETURN(0);
+
+ for (seg = 0; seg < vio->cui_nrsegs; seg++) {
+ const struct iovec *iv = &vio->cui_iov[seg];
+
+ addr = (unsigned long)iv->iov_base;
+ count = iv->iov_len;
+ if (count == 0)
+ continue;
+
+ count += addr & (~CFS_PAGE_MASK);
+ addr &= CFS_PAGE_MASK;
+
+ down_read(&mm->mmap_sem);
+ while((vma = our_vma(mm, addr, count)) != NULL) {
+ struct inode *inode = vma->vm_file->f_dentry->d_inode;
+ int flags = CEF_MUST;
+
+ if (ll_file_nolock(vma->vm_file)) {
+ /*
+ * For no lock case, a lockless lock will be
+ * generated.
+ */
+ flags = CEF_NEVER;
+ }
+
+ /*
+ * XXX: Required lock mode can be weakened: CIT_WRITE
+ * io only ever reads user level buffer, and CIT_READ
+ * only writes on it.
+ */
+ policy_from_vma(&policy, vma, addr, count);
+ descr->cld_mode = vvp_mode_from_vma(vma);
+ descr->cld_obj = ll_i2info(inode)->lli_clob;
+ descr->cld_start = cl_index(descr->cld_obj,
+ policy.l_extent.start);
+ descr->cld_end = cl_index(descr->cld_obj,
+ policy.l_extent.end);
+ descr->cld_enq_flags = flags;
+ result = cl_io_lock_alloc_add(env, io, descr);
+
+ CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
+ descr->cld_mode, descr->cld_start,
+ descr->cld_end);
+
+ if (result < 0)
+ RETURN(result);
+
+ if (vma->vm_end - addr >= count)
+ break;
+
+ count -= vma->vm_end - addr;
+ addr = vma->vm_end;
+ }
+ up_read(&mm->mmap_sem);
+ }
+ RETURN(0);
+}
+
+static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
+ enum cl_lock_mode mode, loff_t start, loff_t end)
+{
+ struct ccc_io *cio = ccc_env_io(env);
+ int result;
+ int ast_flags = 0;
+
+ LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
+ ENTRY;
+
+ ccc_io_update_iov(env, cio, io);
+
+ if (io->u.ci_rw.crw_nonblock)
+ ast_flags |= CEF_NONBLOCK;
+ result = vvp_mmap_locks(env, cio, io);
+ if (result == 0)
+ result = ccc_io_one_lock(env, io, ast_flags, mode, start, end);
+ RETURN(result);
+}
+
+static int vvp_io_read_lock(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct cl_io *io = ios->cis_io;
+ struct ll_inode_info *lli = ll_i2info(ccc_object_inode(io->ci_obj));
+ int result;
+
+ ENTRY;
+ /* XXX: Layer violation, we shouldn't see lsm at llite level. */
+ if (lli->lli_has_smd) /* lsm-less file doesn't need to lock */
+ result = vvp_io_rw_lock(env, io, CLM_READ,
+ io->u.ci_rd.rd.crw_pos,
+ io->u.ci_rd.rd.crw_pos +
+ io->u.ci_rd.rd.crw_count - 1);
+ else
+ result = 0;
+ RETURN(result);
+}
+
+static int vvp_io_fault_lock(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct cl_io *io = ios->cis_io;
+ struct vvp_io *vio = cl2vvp_io(env, ios);
+ /*
+ * XXX LDLM_FL_CBPENDING
+ */
+ return ccc_io_one_lock_index
+ (env, io, 0, vvp_mode_from_vma(vio->u.fault.ft_vma),
+ io->u.ci_fault.ft_index, io->u.ci_fault.ft_index);
+}
+
+static int vvp_io_write_lock(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct cl_io *io = ios->cis_io;
+ loff_t start;
+ loff_t end;
+
+ if (io->u.ci_wr.wr_append) {
+ start = 0;
+ end = OBD_OBJECT_EOF;
+ } else {
+ start = io->u.ci_wr.wr.crw_pos;
+ end = start + io->u.ci_wr.wr.crw_count - 1;
+ }
+ return vvp_io_rw_lock(env, io, CLM_WRITE, start, end);
+}
+
+static int vvp_io_setattr_iter_init(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ return 0;
+}
+
+/**
+ * Implementation of cl_io_operations::cio_lock() method for CIT_SETATTR io.
+ *
+ * Handles "lockless io" mode when extent locking is done by server.
+ */
+static int vvp_io_setattr_lock(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct ccc_io *cio = ccc_env_io(env);
+ struct cl_io *io = ios->cis_io;
+ __u64 new_size;
+ __u32 enqflags = 0;
+
+ if (cl_io_is_trunc(io)) {
+ new_size = io->u.ci_setattr.sa_attr.lvb_size;
+ if (new_size == 0)
+ enqflags = CEF_DISCARD_DATA;
+ } else {
+ if ((io->u.ci_setattr.sa_attr.lvb_mtime >=
+ io->u.ci_setattr.sa_attr.lvb_ctime) ||
+ (io->u.ci_setattr.sa_attr.lvb_atime >=
+ io->u.ci_setattr.sa_attr.lvb_ctime))
+ return 0;
+ new_size = 0;
+ }
+ cio->u.setattr.cui_local_lock = SETATTR_EXTENT_LOCK;
+ return ccc_io_one_lock(env, io, enqflags, CLM_WRITE,
+ new_size, OBD_OBJECT_EOF);
+}
+
+static int vvp_do_vmtruncate(struct inode *inode, size_t size)
+{
+ int result;
+ /*
+ * Only ll_inode_size_lock is taken at this level.
+ */
+ ll_inode_size_lock(inode);
+ result = inode_newsize_ok(inode, size);
+ if (result < 0) {
+ ll_inode_size_unlock(inode);
+ return result;
+ }
+ truncate_setsize(inode, size);
+ ll_inode_size_unlock(inode);
+ return result;
+}
+
+static int vvp_io_setattr_trunc(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ struct inode *inode, loff_t size)
+{
+ inode_dio_wait(inode);
+ return 0;
+}
+
+static int vvp_io_setattr_time(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct cl_io *io = ios->cis_io;
+ struct cl_object *obj = io->ci_obj;
+ struct cl_attr *attr = ccc_env_thread_attr(env);
+ int result;
+ unsigned valid = CAT_CTIME;
+
+ cl_object_attr_lock(obj);
+ attr->cat_ctime = io->u.ci_setattr.sa_attr.lvb_ctime;
+ if (io->u.ci_setattr.sa_valid & ATTR_ATIME_SET) {
+ attr->cat_atime = io->u.ci_setattr.sa_attr.lvb_atime;
+ valid |= CAT_ATIME;
+ }
+ if (io->u.ci_setattr.sa_valid & ATTR_MTIME_SET) {
+ attr->cat_mtime = io->u.ci_setattr.sa_attr.lvb_mtime;
+ valid |= CAT_MTIME;
+ }
+ result = cl_object_attr_set(env, obj, attr, valid);
+ cl_object_attr_unlock(obj);
+
+ return result;
+}
+
+static int vvp_io_setattr_start(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct cl_io *io = ios->cis_io;
+ struct inode *inode = ccc_object_inode(io->ci_obj);
+
+ mutex_lock(&inode->i_mutex);
+ if (cl_io_is_trunc(io))
+ return vvp_io_setattr_trunc(env, ios, inode,
+ io->u.ci_setattr.sa_attr.lvb_size);
+ else
+ return vvp_io_setattr_time(env, ios);
+}
+
+static void vvp_io_setattr_end(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct cl_io *io = ios->cis_io;
+ struct inode *inode = ccc_object_inode(io->ci_obj);
+
+ if (cl_io_is_trunc(io)) {
+ /* Truncate in memory pages - they must be clean pages
+ * because osc has already notified to destroy osc_extents. */
+ vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
+ inode_dio_write_done(inode);
+ }
+ mutex_unlock(&inode->i_mutex);
+}
+
+static void vvp_io_setattr_fini(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ vvp_io_fini(env, ios);
+}
+
+static ssize_t lustre_generic_file_read(struct file *file,
+ struct ccc_io *vio, loff_t *ppos)
+{
+ return generic_file_aio_read(vio->cui_iocb, vio->cui_iov,
+ vio->cui_nrsegs, *ppos);
+}
+
+static ssize_t lustre_generic_file_write(struct file *file,
+ struct ccc_io *vio, loff_t *ppos)
+{
+ return generic_file_aio_write(vio->cui_iocb, vio->cui_iov,
+ vio->cui_nrsegs, *ppos);
+}
+
+static int vvp_io_read_start(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct vvp_io *vio = cl2vvp_io(env, ios);
+ struct ccc_io *cio = cl2ccc_io(env, ios);
+ struct cl_io *io = ios->cis_io;
+ struct cl_object *obj = io->ci_obj;
+ struct inode *inode = ccc_object_inode(obj);
+ struct ll_ra_read *bead = &vio->cui_bead;
+ struct file *file = cio->cui_fd->fd_file;
+
+ int result;
+ loff_t pos = io->u.ci_rd.rd.crw_pos;
+ long cnt = io->u.ci_rd.rd.crw_count;
+ long tot = cio->cui_tot_count;
+ int exceed = 0;
+
+ CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+
+ CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt);
+
+ if (!can_populate_pages(env, io, inode))
+ return 0;
+
+ result = ccc_prep_size(env, obj, io, pos, tot, &exceed);
+ if (result != 0)
+ return result;
+ else if (exceed != 0)
+ goto out;
+
+ LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu,
+ "Read ino %lu, %lu bytes, offset %lld, size %llu\n",
+ inode->i_ino, cnt, pos, i_size_read(inode));
+
+ /* turn off the kernel's read-ahead */
+ cio->cui_fd->fd_file->f_ra.ra_pages = 0;
+
+ /* initialize read-ahead window once per syscall */
+ if (!vio->cui_ra_window_set) {
+ vio->cui_ra_window_set = 1;
+ bead->lrr_start = cl_index(obj, pos);
+ /*
+ * XXX: explicit PAGE_CACHE_SIZE
+ */
+ bead->lrr_count = cl_index(obj, tot + PAGE_CACHE_SIZE - 1);
+ ll_ra_read_in(file, bead);
+ }
+
+ /* BUG: 5972 */
+ file_accessed(file);
+ switch (vio->cui_io_subtype) {
+ case IO_NORMAL:
+ result = lustre_generic_file_read(file, cio, &pos);
+ break;
+ case IO_SPLICE:
+ result = generic_file_splice_read(file, &pos,
+ vio->u.splice.cui_pipe, cnt,
+ vio->u.splice.cui_flags);
+ /* LU-1109: do splice read stripe by stripe otherwise if it
+ * may make nfsd stuck if this read occupied all internal pipe
+ * buffers. */
+ io->ci_continue = 0;
+ break;
+ default:
+ CERROR("Wrong IO type %u\n", vio->cui_io_subtype);
+ LBUG();
+ }
+
+out:
+ if (result >= 0) {
+ if (result < cnt)
+ io->ci_continue = 0;
+ io->ci_nob += result;
+ ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
+ cio->cui_fd, pos, result, 0);
+ result = 0;
+ }
+ return result;
+}
+
+static void vvp_io_read_fini(const struct lu_env *env, const struct cl_io_slice *ios)
+{
+ struct vvp_io *vio = cl2vvp_io(env, ios);
+ struct ccc_io *cio = cl2ccc_io(env, ios);
+
+ if (vio->cui_ra_window_set)
+ ll_ra_read_ex(cio->cui_fd->fd_file, &vio->cui_bead);
+
+ vvp_io_fini(env, ios);
+}
+
+static int vvp_io_write_start(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct ccc_io *cio = cl2ccc_io(env, ios);
+ struct cl_io *io = ios->cis_io;
+ struct cl_object *obj = io->ci_obj;
+ struct inode *inode = ccc_object_inode(obj);
+ struct file *file = cio->cui_fd->fd_file;
+ ssize_t result = 0;
+ loff_t pos = io->u.ci_wr.wr.crw_pos;
+ size_t cnt = io->u.ci_wr.wr.crw_count;
+
+ ENTRY;
+
+ if (!can_populate_pages(env, io, inode))
+ return 0;
+
+ if (cl_io_is_append(io)) {
+ /*
+ * PARALLEL IO This has to be changed for parallel IO doing
+ * out-of-order writes.
+ */
+ pos = io->u.ci_wr.wr.crw_pos = i_size_read(inode);
+ cio->cui_iocb->ki_pos = pos;
+ }
+
+ CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
+
+ if (cio->cui_iov == NULL) /* from a temp io in ll_cl_init(). */
+ result = 0;
+ else
+ result = lustre_generic_file_write(file, cio, &pos);
+
+ if (result > 0) {
+ if (result < cnt)
+ io->ci_continue = 0;
+ io->ci_nob += result;
+ ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
+ cio->cui_fd, pos, result, 0);
+ result = 0;
+ }
+ RETURN(result);
+}
+
+static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
+{
+ struct vm_fault *vmf = cfio->fault.ft_vmf;
+
+ cfio->fault.ft_flags = filemap_fault(cfio->ft_vma, vmf);
+
+ if (vmf->page) {
+ LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n",
+ vmf->virtual_address);
+ if (unlikely(!(cfio->fault.ft_flags & VM_FAULT_LOCKED))) {
+ lock_page(vmf->page);
+ cfio->fault.ft_flags &= VM_FAULT_LOCKED;
+ }
+
+ cfio->ft_vmpage = vmf->page;
+ return 0;
+ }
+
+ if (cfio->fault.ft_flags & VM_FAULT_SIGBUS) {
+ CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address);
+ return -EFAULT;
+ }
+
+ if (cfio->fault.ft_flags & VM_FAULT_OOM) {
+ CDEBUG(D_PAGE, "got addr %p - OOM\n", vmf->virtual_address);
+ return -ENOMEM;
+ }
+
+ if (cfio->fault.ft_flags & VM_FAULT_RETRY)
+ return -EAGAIN;
+
+ CERROR("unknow error in page fault %d!\n", cfio->fault.ft_flags);
+ return -EINVAL;
+}
+
+
+static int vvp_io_fault_start(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct vvp_io *vio = cl2vvp_io(env, ios);
+ struct cl_io *io = ios->cis_io;
+ struct cl_object *obj = io->ci_obj;
+ struct inode *inode = ccc_object_inode(obj);
+ struct cl_fault_io *fio = &io->u.ci_fault;
+ struct vvp_fault_io *cfio = &vio->u.fault;
+ loff_t offset;
+ int result = 0;
+ struct page *vmpage = NULL;
+ struct cl_page *page;
+ loff_t size;
+ pgoff_t last; /* last page in a file data region */
+
+ if (fio->ft_executable &&
+ LTIME_S(inode->i_mtime) != vio->u.fault.ft_mtime)
+ CWARN("binary "DFID
+ " changed while waiting for the page fault lock\n",
+ PFID(lu_object_fid(&obj->co_lu)));
+
+ /* offset of the last byte on the page */
+ offset = cl_offset(obj, fio->ft_index + 1) - 1;
+ LASSERT(cl_index(obj, offset) == fio->ft_index);
+ result = ccc_prep_size(env, obj, io, 0, offset + 1, NULL);
+ if (result != 0)
+ return result;
+
+ /* must return locked page */
+ if (fio->ft_mkwrite) {
+ LASSERT(cfio->ft_vmpage != NULL);
+ lock_page(cfio->ft_vmpage);
+ } else {
+ result = vvp_io_kernel_fault(cfio);
+ if (result != 0)
+ return result;
+ }
+
+ vmpage = cfio->ft_vmpage;
+ LASSERT(PageLocked(vmpage));
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE))
+ ll_invalidate_page(vmpage);
+
+ size = i_size_read(inode);
+ /* Though we have already held a cl_lock upon this page, but
+ * it still can be truncated locally. */
+ if (unlikely((vmpage->mapping != inode->i_mapping) ||
+ (page_offset(vmpage) > size))) {
+ CDEBUG(D_PAGE, "llite: fault and truncate race happened!\n");
+
+ /* return +1 to stop cl_io_loop() and ll_fault() will catch
+ * and retry. */
+ GOTO(out, result = +1);
+ }
+
+
+ if (fio->ft_mkwrite ) {
+ pgoff_t last_index;
+ /*
+ * Capture the size while holding the lli_trunc_sem from above
+ * we want to make sure that we complete the mkwrite action
+ * while holding this lock. We need to make sure that we are
+ * not past the end of the file.
+ */
+ last_index = cl_index(obj, size - 1);
+ if (last_index < fio->ft_index) {
+ CDEBUG(D_PAGE,
+ "llite: mkwrite and truncate race happened: "
+ "%p: 0x%lx 0x%lx\n",
+ vmpage->mapping,fio->ft_index,last_index);
+ /*
+ * We need to return if we are
+ * passed the end of the file. This will propagate
+ * up the call stack to ll_page_mkwrite where
+ * we will return VM_FAULT_NOPAGE. Any non-negative
+ * value returned here will be silently
+ * converted to 0. If the vmpage->mapping is null
+ * the error code would be converted back to ENODATA
+ * in ll_page_mkwrite0. Thus we return -ENODATA
+ * to handle both cases
+ */
+ GOTO(out, result = -ENODATA);
+ }
+ }
+
+ page = cl_page_find(env, obj, fio->ft_index, vmpage, CPT_CACHEABLE);
+ if (IS_ERR(page))
+ GOTO(out, result = PTR_ERR(page));
+
+ /* if page is going to be written, we should add this page into cache
+ * earlier. */
+ if (fio->ft_mkwrite) {
+ wait_on_page_writeback(vmpage);
+ if (set_page_dirty(vmpage)) {
+ struct ccc_page *cp;
+
+ /* vvp_page_assume() calls wait_on_page_writeback(). */
+ cl_page_assume(env, io, page);
+
+ cp = cl2ccc_page(cl_page_at(page, &vvp_device_type));
+ vvp_write_pending(cl2ccc(obj), cp);
+
+ /* Do not set Dirty bit here so that in case IO is
+ * started before the page is really made dirty, we
+ * still have chance to detect it. */
+ result = cl_page_cache_add(env, io, page, CRT_WRITE);
+ LASSERT(cl_page_is_owned(page, io));
+
+ vmpage = NULL;
+ if (result < 0) {
+ cl_page_unmap(env, io, page);
+ cl_page_discard(env, io, page);
+ cl_page_disown(env, io, page);
+
+ cl_page_put(env, page);
+
+ /* we're in big trouble, what can we do now? */
+ if (result == -EDQUOT)
+ result = -ENOSPC;
+ GOTO(out, result);
+ } else
+ cl_page_disown(env, io, page);
+ }
+ }
+
+ last = cl_index(obj, size - 1);
+ /*
+ * The ft_index is only used in the case of
+ * a mkwrite action. We need to check
+ * our assertions are correct, since
+ * we should have caught this above
+ */
+ LASSERT(!fio->ft_mkwrite || fio->ft_index <= last);
+ if (fio->ft_index == last)
+ /*
+ * Last page is mapped partially.
+ */
+ fio->ft_nob = size - cl_offset(obj, fio->ft_index);
+ else
+ fio->ft_nob = cl_page_size(obj);
+
+ lu_ref_add(&page->cp_reference, "fault", io);
+ fio->ft_page = page;
+ EXIT;
+
+out:
+ /* return unlocked vmpage to avoid deadlocking */
+ if (vmpage != NULL)
+ unlock_page(vmpage);
+ cfio->fault.ft_flags &= ~VM_FAULT_LOCKED;
+ return result;
+}
+
+static int vvp_io_fsync_start(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ /* we should mark TOWRITE bit to each dirty page in radix tree to
+ * verify pages have been written, but this is difficult because of
+ * race. */
+ return 0;
+}
+
+static int vvp_io_read_page(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ const struct cl_page_slice *slice)
+{
+ struct cl_io *io = ios->cis_io;
+ struct cl_object *obj = slice->cpl_obj;
+ struct ccc_page *cp = cl2ccc_page(slice);
+ struct cl_page *page = slice->cpl_page;
+ struct inode *inode = ccc_object_inode(obj);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ll_file_data *fd = cl2ccc_io(env, ios)->cui_fd;
+ struct ll_readahead_state *ras = &fd->fd_ras;
+ struct page *vmpage = cp->cpg_page;
+ struct cl_2queue *queue = &io->ci_queue;
+ int rc;
+
+ CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+ LASSERT(slice->cpl_obj == obj);
+
+ ENTRY;
+
+ if (sbi->ll_ra_info.ra_max_pages_per_file &&
+ sbi->ll_ra_info.ra_max_pages)
+ ras_update(sbi, inode, ras, page->cp_index,
+ cp->cpg_defer_uptodate);
+
+ /* Sanity check whether the page is protected by a lock. */
+ rc = cl_page_is_under_lock(env, io, page);
+ if (rc != -EBUSY) {
+ CL_PAGE_HEADER(D_WARNING, env, page, "%s: %d\n",
+ rc == -ENODATA ? "without a lock" :
+ "match failed", rc);
+ if (rc != -ENODATA)
+ RETURN(rc);
+ }
+
+ if (cp->cpg_defer_uptodate) {
+ cp->cpg_ra_used = 1;
+ cl_page_export(env, page, 1);
+ }
+ /*
+ * Add page into the queue even when it is marked uptodate above.
+ * this will unlock it automatically as part of cl_page_list_disown().
+ */
+ cl_2queue_add(queue, page);
+ if (sbi->ll_ra_info.ra_max_pages_per_file &&
+ sbi->ll_ra_info.ra_max_pages)
+ ll_readahead(env, io, ras,
+ vmpage->mapping, &queue->c2_qin, fd->fd_flags);
+
+ RETURN(0);
+}
+
+static int vvp_page_sync_io(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, struct ccc_page *cp,
+ enum cl_req_type crt)
+{
+ struct cl_2queue *queue;
+ int result;
+
+ LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
+
+ queue = &io->ci_queue;
+ cl_2queue_init_page(queue, page);
+
+ result = cl_io_submit_sync(env, io, crt, queue, 0);
+ LASSERT(cl_page_is_owned(page, io));
+
+ if (crt == CRT_READ)
+ /*
+ * in CRT_WRITE case page is left locked even in case of
+ * error.
+ */
+ cl_page_list_disown(env, io, &queue->c2_qin);
+ cl_2queue_fini(env, queue);
+
+ return result;
+}
+
+/**
+ * Prepare partially written-to page for a write.
+ */
+static int vvp_io_prepare_partial(const struct lu_env *env, struct cl_io *io,
+ struct cl_object *obj, struct cl_page *pg,
+ struct ccc_page *cp,
+ unsigned from, unsigned to)
+{
+ struct cl_attr *attr = ccc_env_thread_attr(env);
+ loff_t offset = cl_offset(obj, pg->cp_index);
+ int result;
+
+ cl_object_attr_lock(obj);
+ result = cl_object_attr_get(env, obj, attr);
+ cl_object_attr_unlock(obj);
+ if (result == 0) {
+ /*
+ * If are writing to a new page, no need to read old data.
+ * The extent locking will have updated the KMS, and for our
+ * purposes here we can treat it like i_size.
+ */
+ if (attr->cat_kms <= offset) {
+ char *kaddr = ll_kmap_atomic(cp->cpg_page, KM_USER0);
+
+ memset(kaddr, 0, cl_page_size(obj));
+ ll_kunmap_atomic(kaddr, KM_USER0);
+ } else if (cp->cpg_defer_uptodate)
+ cp->cpg_ra_used = 1;
+ else
+ result = vvp_page_sync_io(env, io, pg, cp, CRT_READ);
+ /*
+ * In older implementations, obdo_refresh_inode is called here
+ * to update the inode because the write might modify the
+ * object info at OST. However, this has been proven useless,
+ * since LVB functions will be called when user space program
+ * tries to retrieve inode attribute. Also, see bug 15909 for
+ * details. -jay
+ */
+ if (result == 0)
+ cl_page_export(env, pg, 1);
+ }
+ return result;
+}
+
+static int vvp_io_prepare_write(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ const struct cl_page_slice *slice,
+ unsigned from, unsigned to)
+{
+ struct cl_object *obj = slice->cpl_obj;
+ struct ccc_page *cp = cl2ccc_page(slice);
+ struct cl_page *pg = slice->cpl_page;
+ struct page *vmpage = cp->cpg_page;
+
+ int result;
+
+ ENTRY;
+
+ LINVRNT(cl_page_is_vmlocked(env, pg));
+ LASSERT(vmpage->mapping->host == ccc_object_inode(obj));
+
+ result = 0;
+
+ CL_PAGE_HEADER(D_PAGE, env, pg, "preparing: [%d, %d]\n", from, to);
+ if (!PageUptodate(vmpage)) {
+ /*
+ * We're completely overwriting an existing page, so _don't_
+ * set it up to date until commit_write
+ */
+ if (from == 0 && to == PAGE_CACHE_SIZE) {
+ CL_PAGE_HEADER(D_PAGE, env, pg, "full page write\n");
+ POISON_PAGE(page, 0x11);
+ } else
+ result = vvp_io_prepare_partial(env, ios->cis_io, obj,
+ pg, cp, from, to);
+ } else
+ CL_PAGE_HEADER(D_PAGE, env, pg, "uptodate\n");
+ RETURN(result);
+}
+
+static int vvp_io_commit_write(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ const struct cl_page_slice *slice,
+ unsigned from, unsigned to)
+{
+ struct cl_object *obj = slice->cpl_obj;
+ struct cl_io *io = ios->cis_io;
+ struct ccc_page *cp = cl2ccc_page(slice);
+ struct cl_page *pg = slice->cpl_page;
+ struct inode *inode = ccc_object_inode(obj);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct page *vmpage = cp->cpg_page;
+
+ int result;
+ int tallyop;
+ loff_t size;
+
+ ENTRY;
+
+ LINVRNT(cl_page_is_vmlocked(env, pg));
+ LASSERT(vmpage->mapping->host == inode);
+
+ LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu, "commiting page write\n");
+ CL_PAGE_HEADER(D_PAGE, env, pg, "committing: [%d, %d]\n", from, to);
+
+ /*
+ * queue a write for some time in the future the first time we
+ * dirty the page.
+ *
+ * This is different from what other file systems do: they usually
+ * just mark page (and some of its buffers) dirty and rely on
+ * balance_dirty_pages() to start a write-back. Lustre wants write-back
+ * to be started earlier for the following reasons:
+ *
+ * (1) with a large number of clients we need to limit the amount
+ * of cached data on the clients a lot;
+ *
+ * (2) large compute jobs generally want compute-only then io-only
+ * and the IO should complete as quickly as possible;
+ *
+ * (3) IO is batched up to the RPC size and is async until the
+ * client max cache is hit
+ * (/proc/fs/lustre/osc/OSC.../max_dirty_mb)
+ *
+ */
+ if (!PageDirty(vmpage)) {
+ tallyop = LPROC_LL_DIRTY_MISSES;
+ result = cl_page_cache_add(env, io, pg, CRT_WRITE);
+ if (result == 0) {
+ /* page was added into cache successfully. */
+ set_page_dirty(vmpage);
+ vvp_write_pending(cl2ccc(obj), cp);
+ } else if (result == -EDQUOT) {
+ pgoff_t last_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+ bool need_clip = true;
+
+ /*
+ * Client ran out of disk space grant. Possible
+ * strategies are:
+ *
+ * (a) do a sync write, renewing grant;
+ *
+ * (b) stop writing on this stripe, switch to the
+ * next one.
+ *
+ * (b) is a part of "parallel io" design that is the
+ * ultimate goal. (a) is what "old" client did, and
+ * what the new code continues to do for the time
+ * being.
+ */
+ if (last_index > pg->cp_index) {
+ to = PAGE_CACHE_SIZE;
+ need_clip = false;
+ } else if (last_index == pg->cp_index) {
+ int size_to = i_size_read(inode) & ~CFS_PAGE_MASK;
+ if (to < size_to)
+ to = size_to;
+ }
+ if (need_clip)
+ cl_page_clip(env, pg, 0, to);
+ result = vvp_page_sync_io(env, io, pg, cp, CRT_WRITE);
+ if (result)
+ CERROR("Write page %lu of inode %p failed %d\n",
+ pg->cp_index, inode, result);
+ }
+ } else {
+ tallyop = LPROC_LL_DIRTY_HITS;
+ result = 0;
+ }
+ ll_stats_ops_tally(sbi, tallyop, 1);
+
+ /* Inode should be marked DIRTY even if no new page was marked DIRTY
+ * because page could have been not flushed between 2 modifications.
+ * It is important the file is marked DIRTY as soon as the I/O is done
+ * Indeed, when cache is flushed, file could be already closed and it
+ * is too late to warn the MDT.
+ * It is acceptable that file is marked DIRTY even if I/O is dropped
+ * for some reasons before being flushed to OST.
+ */
+ if (result == 0) {
+ spin_lock(&lli->lli_lock);
+ lli->lli_flags |= LLIF_DATA_MODIFIED;
+ spin_unlock(&lli->lli_lock);
+ }
+
+ size = cl_offset(obj, pg->cp_index) + to;
+
+ ll_inode_size_lock(inode);
+ if (result == 0) {
+ if (size > i_size_read(inode)) {
+ cl_isize_write_nolock(inode, size);
+ CDEBUG(D_VFSTRACE, DFID" updating i_size %lu\n",
+ PFID(lu_object_fid(&obj->co_lu)),
+ (unsigned long)size);
+ }
+ cl_page_export(env, pg, 1);
+ } else {
+ if (size > i_size_read(inode))
+ cl_page_discard(env, io, pg);
+ }
+ ll_inode_size_unlock(inode);
+ RETURN(result);
+}
+
+static const struct cl_io_operations vvp_io_ops = {
+ .op = {
+ [CIT_READ] = {
+ .cio_fini = vvp_io_read_fini,
+ .cio_lock = vvp_io_read_lock,
+ .cio_start = vvp_io_read_start,
+ .cio_advance = ccc_io_advance
+ },
+ [CIT_WRITE] = {
+ .cio_fini = vvp_io_fini,
+ .cio_lock = vvp_io_write_lock,
+ .cio_start = vvp_io_write_start,
+ .cio_advance = ccc_io_advance
+ },
+ [CIT_SETATTR] = {
+ .cio_fini = vvp_io_setattr_fini,
+ .cio_iter_init = vvp_io_setattr_iter_init,
+ .cio_lock = vvp_io_setattr_lock,
+ .cio_start = vvp_io_setattr_start,
+ .cio_end = vvp_io_setattr_end
+ },
+ [CIT_FAULT] = {
+ .cio_fini = vvp_io_fault_fini,
+ .cio_iter_init = vvp_io_fault_iter_init,
+ .cio_lock = vvp_io_fault_lock,
+ .cio_start = vvp_io_fault_start,
+ .cio_end = ccc_io_end
+ },
+ [CIT_FSYNC] = {
+ .cio_start = vvp_io_fsync_start,
+ .cio_fini = vvp_io_fini
+ },
+ [CIT_MISC] = {
+ .cio_fini = vvp_io_fini
+ }
+ },
+ .cio_read_page = vvp_io_read_page,
+ .cio_prepare_write = vvp_io_prepare_write,
+ .cio_commit_write = vvp_io_commit_write
+};
+
+int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io)
+{
+ struct vvp_io *vio = vvp_env_io(env);
+ struct ccc_io *cio = ccc_env_io(env);
+ struct inode *inode = ccc_object_inode(obj);
+ int result;
+
+ CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+ ENTRY;
+
+ CL_IO_SLICE_CLEAN(cio, cui_cl);
+ cl_io_slice_add(io, &cio->cui_cl, obj, &vvp_io_ops);
+ vio->cui_ra_window_set = 0;
+ result = 0;
+ if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) {
+ size_t count;
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ count = io->u.ci_rw.crw_count;
+ /* "If nbyte is 0, read() will return 0 and have no other
+ * results." -- Single Unix Spec */
+ if (count == 0)
+ result = 1;
+ else {
+ cio->cui_tot_count = count;
+ cio->cui_tot_nrsegs = 0;
+ }
+ /* for read/write, we store the jobid in the inode, and
+ * it'll be fetched by osc when building RPC.
+ *
+ * it's not accurate if the file is shared by different
+ * jobs.
+ */
+ lustre_get_jobid(lli->lli_jobid);
+ } else if (io->ci_type == CIT_SETATTR) {
+ if (!cl_io_is_trunc(io))
+ io->ci_lockreq = CILR_MANDATORY;
+ }
+
+ /* ignore layout change for generic CIT_MISC but not for glimpse.
+ * io context for glimpse must set ci_verify_layout to true,
+ * see cl_glimpse_size0() for details. */
+ if (io->ci_type == CIT_MISC && !io->ci_verify_layout)
+ io->ci_ignore_layout = 1;
+
+ /* Enqueue layout lock and get layout version. We need to do this
+ * even for operations requiring to open file, such as read and write,
+ * because it might not grant layout lock in IT_OPEN. */
+ if (result == 0 && !io->ci_ignore_layout) {
+ result = ll_layout_refresh(inode, &cio->cui_layout_gen);
+ if (result == -ENOENT)
+ /* If the inode on MDS has been removed, but the objects
+ * on OSTs haven't been destroyed (async unlink), layout
+ * fetch will return -ENOENT, we'd ingore this error
+ * and continue with dirty flush. LU-3230. */
+ result = 0;
+ if (result < 0)
+ CERROR("%s: refresh file layout " DFID " error %d.\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(lu_object_fid(&obj->co_lu)), result);
+ }
+
+ RETURN(result);
+}
+
+static struct vvp_io *cl2vvp_io(const struct lu_env *env,
+ const struct cl_io_slice *slice)
+{
+ /* Caling just for assertion */
+ cl2ccc_io(env, slice);
+ return vvp_env_io(env);
+}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c
new file mode 100644
index 000000000000..9b8712bccd92
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/vvp_lock.c
@@ -0,0 +1,85 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_lock for VVP layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+
+#include <obd.h>
+#include <lustre_lite.h>
+
+#include "vvp_internal.h"
+
+/*****************************************************************************
+ *
+ * Vvp lock functions.
+ *
+ */
+
+/**
+ * Estimates lock value for the purpose of managing the lock cache during
+ * memory shortages.
+ *
+ * Locks for memory mapped files are almost infinitely precious, others are
+ * junk. "Mapped locks" are heavy, but not infinitely heavy, so that they are
+ * ordered within themselves by weights assigned from other layers.
+ */
+static unsigned long vvp_lock_weigh(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct ccc_object *cob = cl2ccc(slice->cls_obj);
+
+ ENTRY;
+ RETURN(atomic_read(&cob->cob_mmap_cnt) > 0 ? ~0UL >> 2 : 0);
+}
+
+static const struct cl_lock_operations vvp_lock_ops = {
+ .clo_delete = ccc_lock_delete,
+ .clo_fini = ccc_lock_fini,
+ .clo_enqueue = ccc_lock_enqueue,
+ .clo_wait = ccc_lock_wait,
+ .clo_unuse = ccc_lock_unuse,
+ .clo_fits_into = ccc_lock_fits_into,
+ .clo_state = ccc_lock_state,
+ .clo_weigh = vvp_lock_weigh
+};
+
+int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_lock *lock, const struct cl_io *io)
+{
+ return ccc_lock_init(env, obj, lock, io, &vvp_lock_ops);
+}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c
new file mode 100644
index 000000000000..01edc5b63e13
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/vvp_object.c
@@ -0,0 +1,186 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * cl_object implementation for VVP layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+
+#include <linux/libcfs/libcfs.h>
+
+#include <obd.h>
+#include <lustre_lite.h>
+
+#include "vvp_internal.h"
+
+/*****************************************************************************
+ *
+ * Object operations.
+ *
+ */
+
+static int vvp_object_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct lu_object *o)
+{
+ struct ccc_object *obj = lu2ccc(o);
+ struct inode *inode = obj->cob_inode;
+ struct ll_inode_info *lli;
+
+ (*p)(env, cookie, "(%s %d %d) inode: %p ",
+ list_empty(&obj->cob_pending_list) ? "-" : "+",
+ obj->cob_transient_pages, atomic_read(&obj->cob_mmap_cnt),
+ inode);
+ if (inode) {
+ lli = ll_i2info(inode);
+ (*p)(env, cookie, "%lu/%u %o %u %d %p "DFID,
+ inode->i_ino, inode->i_generation, inode->i_mode,
+ inode->i_nlink, atomic_read(&inode->i_count),
+ lli->lli_clob, PFID(&lli->lli_fid));
+ }
+ return 0;
+}
+
+static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
+ struct cl_attr *attr)
+{
+ struct inode *inode = ccc_object_inode(obj);
+
+ /*
+ * lov overwrites most of these fields in
+ * lov_attr_get()->...lov_merge_lvb_kms(), except when inode
+ * attributes are newer.
+ */
+
+ attr->cat_size = i_size_read(inode);
+ attr->cat_mtime = LTIME_S(inode->i_mtime);
+ attr->cat_atime = LTIME_S(inode->i_atime);
+ attr->cat_ctime = LTIME_S(inode->i_ctime);
+ attr->cat_blocks = inode->i_blocks;
+ attr->cat_uid = inode->i_uid;
+ attr->cat_gid = inode->i_gid;
+ /* KMS is not known by this layer */
+ return 0; /* layers below have to fill in the rest */
+}
+
+static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned valid)
+{
+ struct inode *inode = ccc_object_inode(obj);
+
+ if (valid & CAT_UID)
+ inode->i_uid = attr->cat_uid;
+ if (valid & CAT_GID)
+ inode->i_gid = attr->cat_gid;
+ if (valid & CAT_ATIME)
+ LTIME_S(inode->i_atime) = attr->cat_atime;
+ if (valid & CAT_MTIME)
+ LTIME_S(inode->i_mtime) = attr->cat_mtime;
+ if (valid & CAT_CTIME)
+ LTIME_S(inode->i_ctime) = attr->cat_ctime;
+ if (0 && valid & CAT_SIZE)
+ cl_isize_write_nolock(inode, attr->cat_size);
+ /* not currently necessary */
+ if (0 && valid & (CAT_UID|CAT_GID|CAT_SIZE))
+ mark_inode_dirty(inode);
+ return 0;
+}
+
+int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_object_conf *conf)
+{
+ struct ll_inode_info *lli = ll_i2info(conf->coc_inode);
+
+ if (conf->coc_opc == OBJECT_CONF_INVALIDATE) {
+ lli->lli_layout_gen = LL_LAYOUT_GEN_NONE;
+ return 0;
+ }
+
+ if (conf->coc_opc != OBJECT_CONF_SET)
+ return 0;
+
+ if (conf->u.coc_md != NULL && conf->u.coc_md->lsm != NULL) {
+ CDEBUG(D_VFSTRACE, "layout lock change: %u -> %u\n",
+ lli->lli_layout_gen,
+ conf->u.coc_md->lsm->lsm_layout_gen);
+
+ lli->lli_has_smd = true;
+ lli->lli_layout_gen = conf->u.coc_md->lsm->lsm_layout_gen;
+ } else {
+ CDEBUG(D_VFSTRACE, "layout lock destroyed: %u.\n",
+ lli->lli_layout_gen);
+
+ lli->lli_has_smd = false;
+ lli->lli_layout_gen = LL_LAYOUT_GEN_EMPTY;
+ }
+ return 0;
+}
+
+static const struct cl_object_operations vvp_ops = {
+ .coo_page_init = vvp_page_init,
+ .coo_lock_init = vvp_lock_init,
+ .coo_io_init = vvp_io_init,
+ .coo_attr_get = vvp_attr_get,
+ .coo_attr_set = vvp_attr_set,
+ .coo_conf_set = vvp_conf_set,
+ .coo_glimpse = ccc_object_glimpse
+};
+
+static const struct lu_object_operations vvp_lu_obj_ops = {
+ .loo_object_init = ccc_object_init,
+ .loo_object_free = ccc_object_free,
+ .loo_object_print = vvp_object_print
+};
+
+struct ccc_object *cl_inode2ccc(struct inode *inode)
+{
+ struct cl_inode_info *lli = cl_i2info(inode);
+ struct cl_object *obj = lli->lli_clob;
+ struct lu_object *lu;
+
+ LASSERT(obj != NULL);
+ lu = lu_object_locate(obj->co_lu.lo_header, &vvp_device_type);
+ LASSERT(lu != NULL);
+ return lu2ccc(lu);
+}
+
+struct lu_object *vvp_object_alloc(const struct lu_env *env,
+ const struct lu_object_header *hdr,
+ struct lu_device *dev)
+{
+ return ccc_object_alloc(env, hdr, dev, &vvp_ops, &vvp_lu_obj_ops);
+}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
new file mode 100644
index 000000000000..4568e69bb9f0
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c
@@ -0,0 +1,558 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_page for VVP layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+
+#include <obd.h>
+#include <lustre_lite.h>
+
+#include "vvp_internal.h"
+
+/*****************************************************************************
+ *
+ * Page operations.
+ *
+ */
+
+static void vvp_page_fini_common(struct ccc_page *cp)
+{
+ struct page *vmpage = cp->cpg_page;
+
+ LASSERT(vmpage != NULL);
+ page_cache_release(vmpage);
+}
+
+static void vvp_page_fini(const struct lu_env *env,
+ struct cl_page_slice *slice)
+{
+ struct ccc_page *cp = cl2ccc_page(slice);
+ struct page *vmpage = cp->cpg_page;
+
+ /*
+ * vmpage->private was already cleared when page was moved into
+ * VPG_FREEING state.
+ */
+ LASSERT((struct cl_page *)vmpage->private != slice->cpl_page);
+ vvp_page_fini_common(cp);
+}
+
+static int vvp_page_own(const struct lu_env *env,
+ const struct cl_page_slice *slice, struct cl_io *io,
+ int nonblock)
+{
+ struct ccc_page *vpg = cl2ccc_page(slice);
+ struct page *vmpage = vpg->cpg_page;
+
+ LASSERT(vmpage != NULL);
+ if (nonblock) {
+ if (!trylock_page(vmpage))
+ return -EAGAIN;
+
+ if (unlikely(PageWriteback(vmpage))) {
+ unlock_page(vmpage);
+ return -EAGAIN;
+ }
+
+ return 0;
+ }
+
+ lock_page(vmpage);
+ wait_on_page_writeback(vmpage);
+ return 0;
+}
+
+static void vvp_page_assume(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ struct page *vmpage = cl2vm_page(slice);
+
+ LASSERT(vmpage != NULL);
+ LASSERT(PageLocked(vmpage));
+ wait_on_page_writeback(vmpage);
+}
+
+static void vvp_page_unassume(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ struct page *vmpage = cl2vm_page(slice);
+
+ LASSERT(vmpage != NULL);
+ LASSERT(PageLocked(vmpage));
+}
+
+static void vvp_page_disown(const struct lu_env *env,
+ const struct cl_page_slice *slice, struct cl_io *io)
+{
+ struct page *vmpage = cl2vm_page(slice);
+
+ LASSERT(vmpage != NULL);
+ LASSERT(PageLocked(vmpage));
+
+ unlock_page(cl2vm_page(slice));
+}
+
+static void vvp_page_discard(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ struct page *vmpage = cl2vm_page(slice);
+ struct address_space *mapping;
+ struct ccc_page *cpg = cl2ccc_page(slice);
+
+ LASSERT(vmpage != NULL);
+ LASSERT(PageLocked(vmpage));
+
+ mapping = vmpage->mapping;
+
+ if (cpg->cpg_defer_uptodate && !cpg->cpg_ra_used)
+ ll_ra_stats_inc(mapping, RA_STAT_DISCARDED);
+
+ /*
+ * truncate_complete_page() calls
+ * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
+ */
+ truncate_complete_page(mapping, vmpage);
+}
+
+static int vvp_page_unmap(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ struct page *vmpage = cl2vm_page(slice);
+ __u64 offset;
+
+ LASSERT(vmpage != NULL);
+ LASSERT(PageLocked(vmpage));
+
+ offset = vmpage->index << PAGE_CACHE_SHIFT;
+
+ /*
+ * XXX is it safe to call this with the page lock held?
+ */
+ ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_CACHE_SIZE);
+ return 0;
+}
+
+static void vvp_page_delete(const struct lu_env *env,
+ const struct cl_page_slice *slice)
+{
+ struct page *vmpage = cl2vm_page(slice);
+ struct inode *inode = vmpage->mapping->host;
+ struct cl_object *obj = slice->cpl_obj;
+
+ LASSERT(PageLocked(vmpage));
+ LASSERT((struct cl_page *)vmpage->private == slice->cpl_page);
+ LASSERT(inode == ccc_object_inode(obj));
+
+ vvp_write_complete(cl2ccc(obj), cl2ccc_page(slice));
+ ClearPagePrivate(vmpage);
+ vmpage->private = 0;
+ /*
+ * Reference from vmpage to cl_page is removed, but the reference back
+ * is still here. It is removed later in vvp_page_fini().
+ */
+}
+
+static void vvp_page_export(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ int uptodate)
+{
+ struct page *vmpage = cl2vm_page(slice);
+
+ LASSERT(vmpage != NULL);
+ LASSERT(PageLocked(vmpage));
+ if (uptodate)
+ SetPageUptodate(vmpage);
+ else
+ ClearPageUptodate(vmpage);
+}
+
+static int vvp_page_is_vmlocked(const struct lu_env *env,
+ const struct cl_page_slice *slice)
+{
+ return PageLocked(cl2vm_page(slice)) ? -EBUSY : -ENODATA;
+}
+
+static int vvp_page_prep_read(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ ENTRY;
+ /* Skip the page already marked as PG_uptodate. */
+ RETURN(PageUptodate(cl2vm_page(slice)) ? -EALREADY : 0);
+}
+
+static int vvp_page_prep_write(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ struct page *vmpage = cl2vm_page(slice);
+
+ LASSERT(PageLocked(vmpage));
+ LASSERT(!PageDirty(vmpage));
+
+ set_page_writeback(vmpage);
+ vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice));
+
+ return 0;
+}
+
+/**
+ * Handles page transfer errors at VM level.
+ *
+ * This takes inode as a separate argument, because inode on which error is to
+ * be set can be different from \a vmpage inode in case of direct-io.
+ */
+static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret)
+{
+ struct ccc_object *obj = cl_inode2ccc(inode);
+
+ if (ioret == 0) {
+ ClearPageError(vmpage);
+ obj->cob_discard_page_warned = 0;
+ } else {
+ SetPageError(vmpage);
+ if (ioret == -ENOSPC)
+ set_bit(AS_ENOSPC, &inode->i_mapping->flags);
+ else
+ set_bit(AS_EIO, &inode->i_mapping->flags);
+
+ if ((ioret == -ESHUTDOWN || ioret == -EINTR) &&
+ obj->cob_discard_page_warned == 0) {
+ obj->cob_discard_page_warned = 1;
+ ll_dirty_page_discard_warn(vmpage, ioret);
+ }
+ }
+}
+
+static void vvp_page_completion_read(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ int ioret)
+{
+ struct ccc_page *cp = cl2ccc_page(slice);
+ struct page *vmpage = cp->cpg_page;
+ struct cl_page *page = cl_page_top(slice->cpl_page);
+ struct inode *inode = ccc_object_inode(page->cp_obj);
+ ENTRY;
+
+ LASSERT(PageLocked(vmpage));
+ CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret);
+
+ if (cp->cpg_defer_uptodate)
+ ll_ra_count_put(ll_i2sbi(inode), 1);
+
+ if (ioret == 0) {
+ if (!cp->cpg_defer_uptodate)
+ cl_page_export(env, page, 1);
+ } else
+ cp->cpg_defer_uptodate = 0;
+
+ if (page->cp_sync_io == NULL)
+ unlock_page(vmpage);
+
+ EXIT;
+}
+
+static void vvp_page_completion_write(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ int ioret)
+{
+ struct ccc_page *cp = cl2ccc_page(slice);
+ struct cl_page *pg = slice->cpl_page;
+ struct page *vmpage = cp->cpg_page;
+ ENTRY;
+
+ LASSERT(ergo(pg->cp_sync_io != NULL, PageLocked(vmpage)));
+ LASSERT(PageWriteback(vmpage));
+
+ CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret);
+
+ /*
+ * TODO: Actually it makes sense to add the page into oap pending
+ * list again and so that we don't need to take the page out from
+ * SoM write pending list, if we just meet a recoverable error,
+ * -ENOMEM, etc.
+ * To implement this, we just need to return a non zero value in
+ * ->cpo_completion method. The underlying transfer should be notified
+ * and then re-add the page into pending transfer queue. -jay
+ */
+
+ cp->cpg_write_queued = 0;
+ vvp_write_complete(cl2ccc(slice->cpl_obj), cp);
+
+ /*
+ * Only mark the page error only when it's an async write because
+ * applications won't wait for IO to finish.
+ */
+ if (pg->cp_sync_io == NULL)
+ vvp_vmpage_error(ccc_object_inode(pg->cp_obj), vmpage, ioret);
+
+ end_page_writeback(vmpage);
+ EXIT;
+}
+
+/**
+ * Implements cl_page_operations::cpo_make_ready() method.
+ *
+ * This is called to yank a page from the transfer cache and to send it out as
+ * a part of transfer. This function try-locks the page. If try-lock failed,
+ * page is owned by some concurrent IO, and should be skipped (this is bad,
+ * but hopefully rare situation, as it usually results in transfer being
+ * shorter than possible).
+ *
+ * \retval 0 success, page can be placed into transfer
+ *
+ * \retval -EAGAIN page is either used by concurrent IO has been
+ * truncated. Skip it.
+ */
+static int vvp_page_make_ready(const struct lu_env *env,
+ const struct cl_page_slice *slice)
+{
+ struct page *vmpage = cl2vm_page(slice);
+ struct cl_page *pg = slice->cpl_page;
+ int result = 0;
+
+ lock_page(vmpage);
+ if (clear_page_dirty_for_io(vmpage)) {
+ LASSERT(pg->cp_state == CPS_CACHED);
+ /* This actually clears the dirty bit in the radix
+ * tree. */
+ set_page_writeback(vmpage);
+ vvp_write_pending(cl2ccc(slice->cpl_obj),
+ cl2ccc_page(slice));
+ CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n");
+ } else if (pg->cp_state == CPS_PAGEOUT) {
+ /* is it possible for osc_flush_async_page() to already
+ * make it ready? */
+ result = -EALREADY;
+ } else {
+ CL_PAGE_DEBUG(D_ERROR, env, pg, "Unexpecting page state %d.\n",
+ pg->cp_state);
+ LBUG();
+ }
+ unlock_page(vmpage);
+ RETURN(result);
+}
+
+static int vvp_page_print(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ void *cookie, lu_printer_t printer)
+{
+ struct ccc_page *vp = cl2ccc_page(slice);
+ struct page *vmpage = vp->cpg_page;
+
+ (*printer)(env, cookie, LUSTRE_VVP_NAME"-page@%p(%d:%d:%d) "
+ "vm@%p ",
+ vp, vp->cpg_defer_uptodate, vp->cpg_ra_used,
+ vp->cpg_write_queued, vmpage);
+ if (vmpage != NULL) {
+ (*printer)(env, cookie, "%lx %d:%d %lx %lu %slru",
+ (long)vmpage->flags, page_count(vmpage),
+ page_mapcount(vmpage), vmpage->private,
+ page_index(vmpage),
+ list_empty(&vmpage->lru) ? "not-" : "");
+ }
+ (*printer)(env, cookie, "\n");
+ return 0;
+}
+
+static const struct cl_page_operations vvp_page_ops = {
+ .cpo_own = vvp_page_own,
+ .cpo_assume = vvp_page_assume,
+ .cpo_unassume = vvp_page_unassume,
+ .cpo_disown = vvp_page_disown,
+ .cpo_vmpage = ccc_page_vmpage,
+ .cpo_discard = vvp_page_discard,
+ .cpo_delete = vvp_page_delete,
+ .cpo_unmap = vvp_page_unmap,
+ .cpo_export = vvp_page_export,
+ .cpo_is_vmlocked = vvp_page_is_vmlocked,
+ .cpo_fini = vvp_page_fini,
+ .cpo_print = vvp_page_print,
+ .cpo_is_under_lock = ccc_page_is_under_lock,
+ .io = {
+ [CRT_READ] = {
+ .cpo_prep = vvp_page_prep_read,
+ .cpo_completion = vvp_page_completion_read,
+ .cpo_make_ready = ccc_fail,
+ },
+ [CRT_WRITE] = {
+ .cpo_prep = vvp_page_prep_write,
+ .cpo_completion = vvp_page_completion_write,
+ .cpo_make_ready = vvp_page_make_ready,
+ }
+ }
+};
+
+static void vvp_transient_page_verify(const struct cl_page *page)
+{
+ struct inode *inode = ccc_object_inode(page->cp_obj);
+
+ LASSERT(!mutex_trylock(&inode->i_mutex));
+}
+
+static int vvp_transient_page_own(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused, int nonblock)
+{
+ vvp_transient_page_verify(slice->cpl_page);
+ return 0;
+}
+
+static void vvp_transient_page_assume(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ vvp_transient_page_verify(slice->cpl_page);
+}
+
+static void vvp_transient_page_unassume(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ vvp_transient_page_verify(slice->cpl_page);
+}
+
+static void vvp_transient_page_disown(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ vvp_transient_page_verify(slice->cpl_page);
+}
+
+static void vvp_transient_page_discard(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ struct cl_page *page = slice->cpl_page;
+
+ vvp_transient_page_verify(slice->cpl_page);
+
+ /*
+ * For transient pages, remove it from the radix tree.
+ */
+ cl_page_delete(env, page);
+}
+
+static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
+ const struct cl_page_slice *slice)
+{
+ struct inode *inode = ccc_object_inode(slice->cpl_obj);
+ int locked;
+
+ locked = !mutex_trylock(&inode->i_mutex);
+ if (!locked)
+ mutex_unlock(&inode->i_mutex);
+ return locked ? -EBUSY : -ENODATA;
+}
+
+static void
+vvp_transient_page_completion(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ int ioret)
+{
+ vvp_transient_page_verify(slice->cpl_page);
+}
+
+static void vvp_transient_page_fini(const struct lu_env *env,
+ struct cl_page_slice *slice)
+{
+ struct ccc_page *cp = cl2ccc_page(slice);
+ struct cl_page *clp = slice->cpl_page;
+ struct ccc_object *clobj = cl2ccc(clp->cp_obj);
+
+ vvp_page_fini_common(cp);
+ LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+ clobj->cob_transient_pages--;
+}
+
+static const struct cl_page_operations vvp_transient_page_ops = {
+ .cpo_own = vvp_transient_page_own,
+ .cpo_assume = vvp_transient_page_assume,
+ .cpo_unassume = vvp_transient_page_unassume,
+ .cpo_disown = vvp_transient_page_disown,
+ .cpo_discard = vvp_transient_page_discard,
+ .cpo_vmpage = ccc_page_vmpage,
+ .cpo_fini = vvp_transient_page_fini,
+ .cpo_is_vmlocked = vvp_transient_page_is_vmlocked,
+ .cpo_print = vvp_page_print,
+ .cpo_is_under_lock = ccc_page_is_under_lock,
+ .io = {
+ [CRT_READ] = {
+ .cpo_prep = ccc_transient_page_prep,
+ .cpo_completion = vvp_transient_page_completion,
+ },
+ [CRT_WRITE] = {
+ .cpo_prep = ccc_transient_page_prep,
+ .cpo_completion = vvp_transient_page_completion,
+ }
+ }
+};
+
+int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_page *page, struct page *vmpage)
+{
+ struct ccc_page *cpg = cl_object_page_slice(obj, page);
+
+ CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+
+ cpg->cpg_page = vmpage;
+ page_cache_get(vmpage);
+
+ INIT_LIST_HEAD(&cpg->cpg_pending_linkage);
+ if (page->cp_type == CPT_CACHEABLE) {
+ SetPagePrivate(vmpage);
+ vmpage->private = (unsigned long)page;
+ cl_page_slice_add(page, &cpg->cpg_cl, obj,
+ &vvp_page_ops);
+ } else {
+ struct ccc_object *clobj = cl2ccc(obj);
+
+ LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+ cl_page_slice_add(page, &cpg->cpg_cl, obj,
+ &vvp_transient_page_ops);
+ clobj->cob_transient_pages++;
+ }
+ return 0;
+}
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
new file mode 100644
index 000000000000..4176264984bb
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -0,0 +1,578 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/selinux.h>
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <obd_support.h>
+#include <lustre_lite.h>
+#include <lustre_dlm.h>
+#include <lustre_ver.h>
+#include <lustre_eacl.h>
+
+#include "llite_internal.h"
+
+#define XATTR_USER_T (1)
+#define XATTR_TRUSTED_T (2)
+#define XATTR_SECURITY_T (3)
+#define XATTR_ACL_ACCESS_T (4)
+#define XATTR_ACL_DEFAULT_T (5)
+#define XATTR_LUSTRE_T (6)
+#define XATTR_OTHER_T (7)
+
+static
+int get_xattr_type(const char *name)
+{
+ if (!strcmp(name, POSIX_ACL_XATTR_ACCESS))
+ return XATTR_ACL_ACCESS_T;
+
+ if (!strcmp(name, POSIX_ACL_XATTR_DEFAULT))
+ return XATTR_ACL_DEFAULT_T;
+
+ if (!strncmp(name, XATTR_USER_PREFIX,
+ sizeof(XATTR_USER_PREFIX) - 1))
+ return XATTR_USER_T;
+
+ if (!strncmp(name, XATTR_TRUSTED_PREFIX,
+ sizeof(XATTR_TRUSTED_PREFIX) - 1))
+ return XATTR_TRUSTED_T;
+
+ if (!strncmp(name, XATTR_SECURITY_PREFIX,
+ sizeof(XATTR_SECURITY_PREFIX) - 1))
+ return XATTR_SECURITY_T;
+
+ if (!strncmp(name, XATTR_LUSTRE_PREFIX,
+ sizeof(XATTR_LUSTRE_PREFIX) - 1))
+ return XATTR_LUSTRE_T;
+
+ return XATTR_OTHER_T;
+}
+
+static
+int xattr_type_filter(struct ll_sb_info *sbi, int xattr_type)
+{
+ if ((xattr_type == XATTR_ACL_ACCESS_T ||
+ xattr_type == XATTR_ACL_DEFAULT_T) &&
+ !(sbi->ll_flags & LL_SBI_ACL))
+ return -EOPNOTSUPP;
+
+ if (xattr_type == XATTR_USER_T && !(sbi->ll_flags & LL_SBI_USER_XATTR))
+ return -EOPNOTSUPP;
+ if (xattr_type == XATTR_TRUSTED_T && !cfs_capable(CFS_CAP_SYS_ADMIN))
+ return -EPERM;
+ if (xattr_type == XATTR_OTHER_T)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static
+int ll_setxattr_common(struct inode *inode, const char *name,
+ const void *value, size_t size,
+ int flags, __u64 valid)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ptlrpc_request *req;
+ int xattr_type, rc;
+ struct obd_capa *oc;
+ posix_acl_xattr_header *new_value = NULL;
+ struct rmtacl_ctl_entry *rce = NULL;
+ ext_acl_xattr_header *acl = NULL;
+ const char *pv = value;
+ ENTRY;
+
+ xattr_type = get_xattr_type(name);
+ rc = xattr_type_filter(sbi, xattr_type);
+ if (rc)
+ RETURN(rc);
+
+ /* b10667: ignore lustre special xattr for now */
+ if ((xattr_type == XATTR_TRUSTED_T && strcmp(name, "trusted.lov") == 0) ||
+ (xattr_type == XATTR_LUSTRE_T && strcmp(name, "lustre.lov") == 0))
+ RETURN(0);
+
+ /* b15587: ignore security.capability xattr for now */
+ if ((xattr_type == XATTR_SECURITY_T &&
+ strcmp(name, "security.capability") == 0))
+ RETURN(0);
+
+ /* LU-549: Disable security.selinux when selinux is disabled */
+ if (xattr_type == XATTR_SECURITY_T && !selinux_is_enabled() &&
+ strcmp(name, "security.selinux") == 0)
+ RETURN(-EOPNOTSUPP);
+
+#ifdef CONFIG_FS_POSIX_ACL
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
+ (xattr_type == XATTR_ACL_ACCESS_T ||
+ xattr_type == XATTR_ACL_DEFAULT_T)) {
+ rce = rct_search(&sbi->ll_rct, current_pid());
+ if (rce == NULL ||
+ (rce->rce_ops != RMT_LSETFACL &&
+ rce->rce_ops != RMT_RSETFACL))
+ RETURN(-EOPNOTSUPP);
+
+ if (rce->rce_ops == RMT_LSETFACL) {
+ struct eacl_entry *ee;
+
+ ee = et_search_del(&sbi->ll_et, current_pid(),
+ ll_inode2fid(inode), xattr_type);
+ LASSERT(ee != NULL);
+ if (valid & OBD_MD_FLXATTR) {
+ acl = lustre_acl_xattr_merge2ext(
+ (posix_acl_xattr_header *)value,
+ size, ee->ee_acl);
+ if (IS_ERR(acl)) {
+ ee_free(ee);
+ RETURN(PTR_ERR(acl));
+ }
+ size = CFS_ACL_XATTR_SIZE(\
+ le32_to_cpu(acl->a_count), \
+ ext_acl_xattr);
+ pv = (const char *)acl;
+ }
+ ee_free(ee);
+ } else if (rce->rce_ops == RMT_RSETFACL) {
+ size = lustre_posix_acl_xattr_filter(
+ (posix_acl_xattr_header *)value,
+ size, &new_value);
+ if (unlikely(size < 0))
+ RETURN(size);
+
+ pv = (const char *)new_value;
+ } else
+ RETURN(-EOPNOTSUPP);
+
+ valid |= rce_ops2valid(rce->rce_ops);
+ }
+#endif
+ oc = ll_mdscapa_get(inode);
+ rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
+ valid, name, pv, size, 0, flags, ll_i2suppgid(inode),
+ &req);
+ capa_put(oc);
+#ifdef CONFIG_FS_POSIX_ACL
+ if (new_value != NULL)
+ lustre_posix_acl_xattr_free(new_value, size);
+ if (acl != NULL)
+ lustre_ext_acl_xattr_free(acl);
+#endif
+ if (rc) {
+ if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
+ LCONSOLE_INFO("Disabling user_xattr feature because "
+ "it is not supported on the server\n");
+ sbi->ll_flags &= ~LL_SBI_USER_XATTR;
+ }
+ RETURN(rc);
+ }
+
+ ptlrpc_req_finished(req);
+ RETURN(0);
+}
+
+int ll_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct inode *inode = dentry->d_inode;
+
+ LASSERT(inode);
+ LASSERT(name);
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
+ inode->i_ino, inode->i_generation, inode, name);
+
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
+
+ if ((strncmp(name, XATTR_TRUSTED_PREFIX,
+ sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0 &&
+ strcmp(name + sizeof(XATTR_TRUSTED_PREFIX) - 1, "lov") == 0) ||
+ (strncmp(name, XATTR_LUSTRE_PREFIX,
+ sizeof(XATTR_LUSTRE_PREFIX) - 1) == 0 &&
+ strcmp(name + sizeof(XATTR_LUSTRE_PREFIX) - 1, "lov") == 0)) {
+ struct lov_user_md *lump = (struct lov_user_md *)value;
+ int rc = 0;
+
+ /* Attributes that are saved via getxattr will always have
+ * the stripe_offset as 0. Instead, the MDS should be
+ * allowed to pick the starting OST index. b=17846 */
+ if (lump != NULL && lump->lmm_stripe_offset == 0)
+ lump->lmm_stripe_offset = -1;
+
+ if (lump != NULL && S_ISREG(inode->i_mode)) {
+ struct file f;
+ int flags = FMODE_WRITE;
+ int lum_size = (lump->lmm_magic == LOV_USER_MAGIC_V1) ?
+ sizeof(*lump) : sizeof(struct lov_user_md_v3);
+
+ f.f_dentry = dentry;
+ rc = ll_lov_setstripe_ea_info(inode, &f, flags, lump,
+ lum_size);
+ /* b10667: rc always be 0 here for now */
+ rc = 0;
+ } else if (S_ISDIR(inode->i_mode)) {
+ rc = ll_dir_setstripe(inode, lump, 0);
+ }
+
+ return rc;
+
+ } else if (strcmp(name, XATTR_NAME_LMA) == 0 ||
+ strcmp(name, XATTR_NAME_LINK) == 0)
+ return 0;
+
+ return ll_setxattr_common(inode, name, value, size, flags,
+ OBD_MD_FLXATTR);
+}
+
+int ll_removexattr(struct dentry *dentry, const char *name)
+{
+ struct inode *inode = dentry->d_inode;
+
+ LASSERT(inode);
+ LASSERT(name);
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
+ inode->i_ino, inode->i_generation, inode, name);
+
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
+ return ll_setxattr_common(inode, name, NULL, 0, 0,
+ OBD_MD_FLXATTRRM);
+}
+
+static
+int ll_getxattr_common(struct inode *inode, const char *name,
+ void *buffer, size_t size, __u64 valid)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ptlrpc_request *req = NULL;
+ struct mdt_body *body;
+ int xattr_type, rc;
+ void *xdata;
+ struct obd_capa *oc;
+ struct rmtacl_ctl_entry *rce = NULL;
+ ENTRY;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
+ inode->i_ino, inode->i_generation, inode);
+
+ /* listxattr have slightly different behavior from of ext3:
+ * without 'user_xattr' ext3 will list all xattr names but
+ * filtered out "^user..*"; we list them all for simplicity.
+ */
+ if (!name) {
+ xattr_type = XATTR_OTHER_T;
+ goto do_getxattr;
+ }
+
+ xattr_type = get_xattr_type(name);
+ rc = xattr_type_filter(sbi, xattr_type);
+ if (rc)
+ RETURN(rc);
+
+ /* b15587: ignore security.capability xattr for now */
+ if ((xattr_type == XATTR_SECURITY_T &&
+ strcmp(name, "security.capability") == 0))
+ RETURN(-ENODATA);
+
+ /* LU-549: Disable security.selinux when selinux is disabled */
+ if (xattr_type == XATTR_SECURITY_T && !selinux_is_enabled() &&
+ strcmp(name, "security.selinux") == 0)
+ RETURN(-EOPNOTSUPP);
+
+#ifdef CONFIG_FS_POSIX_ACL
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
+ (xattr_type == XATTR_ACL_ACCESS_T ||
+ xattr_type == XATTR_ACL_DEFAULT_T)) {
+ rce = rct_search(&sbi->ll_rct, current_pid());
+ if (rce == NULL ||
+ (rce->rce_ops != RMT_LSETFACL &&
+ rce->rce_ops != RMT_LGETFACL &&
+ rce->rce_ops != RMT_RSETFACL &&
+ rce->rce_ops != RMT_RGETFACL))
+ RETURN(-EOPNOTSUPP);
+ }
+
+ /* posix acl is under protection of LOOKUP lock. when calling to this,
+ * we just have path resolution to the target inode, so we have great
+ * chance that cached ACL is uptodate.
+ */
+ if (xattr_type == XATTR_ACL_ACCESS_T &&
+ !(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct posix_acl *acl;
+
+ spin_lock(&lli->lli_lock);
+ acl = posix_acl_dup(lli->lli_posix_acl);
+ spin_unlock(&lli->lli_lock);
+
+ if (!acl)
+ RETURN(-ENODATA);
+
+ rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
+ posix_acl_release(acl);
+ RETURN(rc);
+ }
+ if (xattr_type == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode))
+ RETURN(-ENODATA);
+#endif
+
+do_getxattr:
+ oc = ll_mdscapa_get(inode);
+ rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
+ valid | (rce ? rce_ops2valid(rce->rce_ops) : 0),
+ name, NULL, 0, size, 0, &req);
+ capa_put(oc);
+ if (rc) {
+ if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
+ LCONSOLE_INFO("Disabling user_xattr feature because "
+ "it is not supported on the server\n");
+ sbi->ll_flags &= ~LL_SBI_USER_XATTR;
+ }
+ RETURN(rc);
+ }
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ LASSERT(body);
+
+ /* only detect the xattr size */
+ if (size == 0)
+ GOTO(out, rc = body->eadatasize);
+
+ if (size < body->eadatasize) {
+ CERROR("server bug: replied size %u > %u\n",
+ body->eadatasize, (int)size);
+ GOTO(out, rc = -ERANGE);
+ }
+
+ if (body->eadatasize == 0)
+ GOTO(out, rc = -ENODATA);
+
+ /* do not need swab xattr data */
+ xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
+ body->eadatasize);
+ if (!xdata)
+ GOTO(out, rc = -EFAULT);
+
+#ifdef CONFIG_FS_POSIX_ACL
+ if (body->eadatasize >= 0 && rce && rce->rce_ops == RMT_LSETFACL) {
+ ext_acl_xattr_header *acl;
+
+ acl = lustre_posix_acl_xattr_2ext((posix_acl_xattr_header *)xdata,
+ body->eadatasize);
+ if (IS_ERR(acl))
+ GOTO(out, rc = PTR_ERR(acl));
+
+ rc = ee_add(&sbi->ll_et, current_pid(), ll_inode2fid(inode),
+ xattr_type, acl);
+ if (unlikely(rc < 0)) {
+ lustre_ext_acl_xattr_free(acl);
+ GOTO(out, rc);
+ }
+ }
+#endif
+
+ if (body->eadatasize == 0) {
+ rc = -ENODATA;
+ } else {
+ LASSERT(buffer);
+ memcpy(buffer, xdata, body->eadatasize);
+ rc = body->eadatasize;
+ }
+ EXIT;
+out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+ssize_t ll_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size)
+{
+ struct inode *inode = dentry->d_inode;
+
+ LASSERT(inode);
+ LASSERT(name);
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
+ inode->i_ino, inode->i_generation, inode, name);
+
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
+
+ if ((strncmp(name, XATTR_TRUSTED_PREFIX,
+ sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0 &&
+ strcmp(name + sizeof(XATTR_TRUSTED_PREFIX) - 1, "lov") == 0) ||
+ (strncmp(name, XATTR_LUSTRE_PREFIX,
+ sizeof(XATTR_LUSTRE_PREFIX) - 1) == 0 &&
+ strcmp(name + sizeof(XATTR_LUSTRE_PREFIX) - 1, "lov") == 0)) {
+ struct lov_stripe_md *lsm;
+ struct lov_user_md *lump;
+ struct lov_mds_md *lmm = NULL;
+ struct ptlrpc_request *request = NULL;
+ int rc = 0, lmmsize = 0;
+
+ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+ return -ENODATA;
+
+ if (size == 0 && S_ISDIR(inode->i_mode)) {
+ /* XXX directory EA is fix for now, optimize to save
+ * RPC transfer */
+ GOTO(out, rc = sizeof(struct lov_user_md));
+ }
+
+ lsm = ccc_inode_lsm_get(inode);
+ if (lsm == NULL) {
+ if (S_ISDIR(inode->i_mode)) {
+ rc = ll_dir_getstripe(inode, &lmm,
+ &lmmsize, &request);
+ } else {
+ rc = -ENODATA;
+ }
+ } else {
+ /* LSM is present already after lookup/getattr call.
+ * we need to grab layout lock once it is implemented */
+ rc = obd_packmd(ll_i2dtexp(inode), &lmm, lsm);
+ lmmsize = rc;
+ }
+ ccc_inode_lsm_put(inode, lsm);
+
+ if (rc < 0)
+ GOTO(out, rc);
+
+ if (size == 0) {
+ /* used to call ll_get_max_mdsize() forward to get
+ * the maximum buffer size, while some apps (such as
+ * rsync 3.0.x) care much about the exact xattr value
+ * size */
+ rc = lmmsize;
+ GOTO(out, rc);
+ }
+
+ if (size < lmmsize) {
+ CERROR("server bug: replied size %d > %d for %s (%s)\n",
+ lmmsize, (int)size, dentry->d_name.name, name);
+ GOTO(out, rc = -ERANGE);
+ }
+
+ lump = (struct lov_user_md *)buffer;
+ memcpy(lump, lmm, lmmsize);
+ /* do not return layout gen for getxattr otherwise it would
+ * confuse tar --xattr by recognizing layout gen as stripe
+ * offset when the file is restored. See LU-2809. */
+ lump->lmm_layout_gen = 0;
+
+ rc = lmmsize;
+out:
+ if (request)
+ ptlrpc_req_finished(request);
+ else if (lmm)
+ obd_free_diskmd(ll_i2dtexp(inode), &lmm);
+ return(rc);
+ }
+
+ return ll_getxattr_common(inode, name, buffer, size, OBD_MD_FLXATTR);
+}
+
+ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+ struct inode *inode = dentry->d_inode;
+ int rc = 0, rc2 = 0;
+ struct lov_mds_md *lmm = NULL;
+ struct ptlrpc_request *request = NULL;
+ int lmmsize;
+
+ LASSERT(inode);
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
+ inode->i_ino, inode->i_generation, inode);
+
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1);
+
+ rc = ll_getxattr_common(inode, NULL, buffer, size, OBD_MD_FLXATTRLS);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ if (buffer != NULL) {
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ char *xattr_name = buffer;
+ int xlen, rem = rc;
+
+ while (rem > 0) {
+ xlen = strnlen(xattr_name, rem - 1) + 1;
+ rem -= xlen;
+ if (xattr_type_filter(sbi,
+ get_xattr_type(xattr_name)) == 0) {
+ /* skip OK xattr type
+ * leave it in buffer
+ */
+ xattr_name += xlen;
+ continue;
+ }
+ /* move up remaining xattrs in buffer
+ * removing the xattr that is not OK
+ */
+ memmove(xattr_name, xattr_name + xlen, rem);
+ rc -= xlen;
+ }
+ }
+ if (S_ISREG(inode->i_mode)) {
+ if (!ll_i2info(inode)->lli_has_smd)
+ rc2 = -1;
+ } else if (S_ISDIR(inode->i_mode)) {
+ rc2 = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
+ }
+
+ if (rc2 < 0) {
+ GOTO(out, rc2 = 0);
+ } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) {
+ const int prefix_len = sizeof(XATTR_LUSTRE_PREFIX) - 1;
+ const size_t name_len = sizeof("lov") - 1;
+ const size_t total_len = prefix_len + name_len + 1;
+
+ if (buffer && (rc + total_len) <= size) {
+ buffer += rc;
+ memcpy(buffer, XATTR_LUSTRE_PREFIX, prefix_len);
+ memcpy(buffer + prefix_len, "lov", name_len);
+ buffer[prefix_len + name_len] = '\0';
+ }
+ rc2 = total_len;
+ }
+out:
+ ptlrpc_req_finished(request);
+ rc = rc + rc2;
+
+ return rc;
+}
diff --git a/drivers/staging/lustre/lustre/lmv/Makefile b/drivers/staging/lustre/lustre/lmv/Makefile
new file mode 100644
index 000000000000..8cc81ade126c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lmv/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_LUSTRE_FS) += lmv.o
+lmv-y := lmv_obd.o lmv_intent.o lmv_fld.o lproc_lmv.o
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_fld.c b/drivers/staging/lustre/lustre/lmv/lmv_fld.c
new file mode 100644
index 000000000000..a4805aefa684
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lmv/lmv_fld.c
@@ -0,0 +1,88 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2013, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LMV
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <asm/div64.h>
+#include <linux/seq_file.h>
+
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_fid.h>
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <lustre_dlm.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#include "lmv_internal.h"
+
+int lmv_fld_lookup(struct lmv_obd *lmv,
+ const struct lu_fid *fid,
+ mdsno_t *mds)
+{
+ int rc;
+ ENTRY;
+
+
+ /* FIXME: Currently ZFS still use local seq for ROOT unfortunately, and
+ * this fid_is_local check should be removed once LU-2240 is fixed */
+ LASSERTF((fid_seq_in_fldb(fid_seq(fid)) ||
+ fid_seq_is_local_file(fid_seq(fid))) &&
+ fid_is_sane(fid), DFID" is insane!\n", PFID(fid));
+
+ rc = fld_client_lookup(&lmv->lmv_fld, fid_seq(fid), mds,
+ LU_SEQ_RANGE_MDT, NULL);
+ if (rc) {
+ CERROR("Error while looking for mds number. Seq "LPX64
+ ", err = %d\n", fid_seq(fid), rc);
+ RETURN(rc);
+ }
+
+ CDEBUG(D_INODE, "FLD lookup got mds #%x for fid="DFID"\n",
+ *mds, PFID(fid));
+
+ if (*mds >= lmv->desc.ld_tgt_count) {
+ CERROR("FLD lookup got invalid mds #%x (max: %x) "
+ "for fid="DFID"\n", *mds, lmv->desc.ld_tgt_count,
+ PFID(fid));
+ rc = -EINVAL;
+ }
+ RETURN(rc);
+}
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_intent.c b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
new file mode 100644
index 000000000000..7eefab5ef5d0
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
@@ -0,0 +1,328 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LMV
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <asm/div64.h>
+#include <linux/seq_file.h>
+#include <linux/namei.h>
+#include <linux/lustre_intent.h>
+
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <lustre_dlm.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#include "lmv_internal.h"
+
+static int lmv_intent_remote(struct obd_export *exp, void *lmm,
+ int lmmsize, struct lookup_intent *it,
+ const struct lu_fid *parent_fid, int flags,
+ struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking,
+ __u64 extra_lock_flags)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct ptlrpc_request *req = NULL;
+ struct lustre_handle plock;
+ struct md_op_data *op_data;
+ struct lmv_tgt_desc *tgt;
+ struct mdt_body *body;
+ int pmode;
+ int rc = 0;
+ ENTRY;
+
+ body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
+ if (body == NULL)
+ RETURN(-EPROTO);
+
+ LASSERT((body->valid & OBD_MD_MDS));
+
+ /*
+ * Unfortunately, we have to lie to MDC/MDS to retrieve
+ * attributes llite needs and provideproper locking.
+ */
+ if (it->it_op & IT_LOOKUP)
+ it->it_op = IT_GETATTR;
+
+ /*
+ * We got LOOKUP lock, but we really need attrs.
+ */
+ pmode = it->d.lustre.it_lock_mode;
+ if (pmode) {
+ plock.cookie = it->d.lustre.it_lock_handle;
+ it->d.lustre.it_lock_mode = 0;
+ it->d.lustre.it_data = NULL;
+ }
+
+ LASSERT(fid_is_sane(&body->fid1));
+
+ tgt = lmv_find_target(lmv, &body->fid1);
+ if (IS_ERR(tgt))
+ GOTO(out, rc = PTR_ERR(tgt));
+
+ OBD_ALLOC_PTR(op_data);
+ if (op_data == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ op_data->op_fid1 = body->fid1;
+ /* Sent the parent FID to the remote MDT */
+ if (parent_fid != NULL) {
+ /* The parent fid is only for remote open to
+ * check whether the open is from OBF,
+ * see mdt_cross_open */
+ LASSERT(it->it_op & IT_OPEN);
+ op_data->op_fid2 = *parent_fid;
+ /* Add object FID to op_fid3, in case it needs to check stale
+ * (M_CHECK_STALE), see mdc_finish_intent_lock */
+ op_data->op_fid3 = body->fid1;
+ }
+
+ op_data->op_bias = MDS_CROSS_REF;
+ CDEBUG(D_INODE, "REMOTE_INTENT with fid="DFID" -> mds #%d\n",
+ PFID(&body->fid1), tgt->ltd_idx);
+
+ it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE;
+ rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it,
+ flags, &req, cb_blocking, extra_lock_flags);
+ if (rc)
+ GOTO(out_free_op_data, rc);
+
+ /*
+ * LLite needs LOOKUP lock to track dentry revocation in order to
+ * maintain dcache consistency. Thus drop UPDATE|PERM lock here
+ * and put LOOKUP in request.
+ */
+ if (it->d.lustre.it_lock_mode != 0) {
+ it->d.lustre.it_remote_lock_handle =
+ it->d.lustre.it_lock_handle;
+ it->d.lustre.it_remote_lock_mode = it->d.lustre.it_lock_mode;
+ }
+
+ it->d.lustre.it_lock_handle = plock.cookie;
+ it->d.lustre.it_lock_mode = pmode;
+
+ EXIT;
+out_free_op_data:
+ OBD_FREE_PTR(op_data);
+out:
+ if (rc && pmode)
+ ldlm_lock_decref(&plock, pmode);
+
+ ptlrpc_req_finished(*reqp);
+ *reqp = req;
+ return rc;
+}
+
+/*
+ * IT_OPEN is intended to open (and create, possible) an object. Parent (pid)
+ * may be split dir.
+ */
+int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
+ void *lmm, int lmmsize, struct lookup_intent *it,
+ int flags, struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking,
+ __u64 extra_lock_flags)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ struct mdt_body *body;
+ int rc;
+ ENTRY;
+
+ tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ /* If it is ready to open the file by FID, do not need
+ * allocate FID at all, otherwise it will confuse MDT */
+ if ((it->it_op & IT_CREAT) &&
+ !(it->it_flags & MDS_OPEN_BY_FID)) {
+ /*
+ * For open with IT_CREATE and for IT_CREATE cases allocate new
+ * fid and setup FLD for it.
+ */
+ op_data->op_fid3 = op_data->op_fid2;
+ rc = lmv_fid_alloc(exp, &op_data->op_fid2, op_data);
+ if (rc != 0)
+ RETURN(rc);
+ }
+
+ CDEBUG(D_INODE, "OPEN_INTENT with fid1="DFID", fid2="DFID","
+ " name='%s' -> mds #%d\n", PFID(&op_data->op_fid1),
+ PFID(&op_data->op_fid2), op_data->op_name, tgt->ltd_idx);
+
+ rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it, flags,
+ reqp, cb_blocking, extra_lock_flags);
+ if (rc != 0)
+ RETURN(rc);
+ /*
+ * Nothing is found, do not access body->fid1 as it is zero and thus
+ * pointless.
+ */
+ if ((it->d.lustre.it_disposition & DISP_LOOKUP_NEG) &&
+ !(it->d.lustre.it_disposition & DISP_OPEN_CREATE) &&
+ !(it->d.lustre.it_disposition & DISP_OPEN_OPEN))
+ RETURN(rc);
+
+ body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
+ if (body == NULL)
+ RETURN(-EPROTO);
+ /*
+ * Not cross-ref case, just get out of here.
+ */
+ if (likely(!(body->valid & OBD_MD_MDS)))
+ RETURN(0);
+
+ /*
+ * Okay, MDS has returned success. Probably name has been resolved in
+ * remote inode.
+ */
+ rc = lmv_intent_remote(exp, lmm, lmmsize, it, &op_data->op_fid1, flags,
+ reqp, cb_blocking, extra_lock_flags);
+ if (rc != 0) {
+ LASSERT(rc < 0);
+ /*
+ * This is possible, that some userspace application will try to
+ * open file as directory and we will have -ENOTDIR here. As
+ * this is normal situation, we should not print error here,
+ * only debug info.
+ */
+ CDEBUG(D_INODE, "Can't handle remote %s: dir "DFID"("DFID"):"
+ "%*s: %d\n", LL_IT2STR(it), PFID(&op_data->op_fid2),
+ PFID(&op_data->op_fid1), op_data->op_namelen,
+ op_data->op_name, rc);
+ RETURN(rc);
+ }
+
+ RETURN(rc);
+}
+
+/*
+ * Handler for: getattr, lookup and revalidate cases.
+ */
+int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
+ void *lmm, int lmmsize, struct lookup_intent *it,
+ int flags, struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking,
+ __u64 extra_lock_flags)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt = NULL;
+ struct mdt_body *body;
+ int rc = 0;
+ ENTRY;
+
+ tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ if (!fid_is_sane(&op_data->op_fid2))
+ fid_zero(&op_data->op_fid2);
+
+ CDEBUG(D_INODE, "LOOKUP_INTENT with fid1="DFID", fid2="DFID
+ ", name='%s' -> mds #%d\n", PFID(&op_data->op_fid1),
+ PFID(&op_data->op_fid2),
+ op_data->op_name ? op_data->op_name : "<NULL>",
+ tgt->ltd_idx);
+
+ op_data->op_bias &= ~MDS_CROSS_REF;
+
+ rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it,
+ flags, reqp, cb_blocking, extra_lock_flags);
+
+ if (rc < 0 || *reqp == NULL)
+ RETURN(rc);
+
+ /*
+ * MDS has returned success. Probably name has been resolved in
+ * remote inode. Let's check this.
+ */
+ body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
+ if (body == NULL)
+ RETURN(-EPROTO);
+ /* Not cross-ref case, just get out of here. */
+ if (likely(!(body->valid & OBD_MD_MDS)))
+ RETURN(0);
+
+ rc = lmv_intent_remote(exp, lmm, lmmsize, it, NULL, flags, reqp,
+ cb_blocking, extra_lock_flags);
+
+ RETURN(rc);
+}
+
+int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
+ void *lmm, int lmmsize, struct lookup_intent *it,
+ int flags, struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking,
+ __u64 extra_lock_flags)
+{
+ struct obd_device *obd = exp->exp_obd;
+ int rc;
+ ENTRY;
+
+ LASSERT(it != NULL);
+ LASSERT(fid_is_sane(&op_data->op_fid1));
+
+ CDEBUG(D_INODE, "INTENT LOCK '%s' for '%*s' on "DFID"\n",
+ LL_IT2STR(it), op_data->op_namelen, op_data->op_name,
+ PFID(&op_data->op_fid1));
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ if (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_LAYOUT))
+ rc = lmv_intent_lookup(exp, op_data, lmm, lmmsize, it,
+ flags, reqp, cb_blocking,
+ extra_lock_flags);
+ else if (it->it_op & IT_OPEN)
+ rc = lmv_intent_open(exp, op_data, lmm, lmmsize, it,
+ flags, reqp, cb_blocking,
+ extra_lock_flags);
+ else
+ LBUG();
+ RETURN(rc);
+}
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_internal.h b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
new file mode 100644
index 000000000000..f75b0a987681
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
@@ -0,0 +1,159 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LMV_INTERNAL_H_
+#define _LMV_INTERNAL_H_
+
+#include <lustre/lustre_idl.h>
+#include <obd.h>
+
+#define LMV_MAX_TGT_COUNT 128
+
+#define lmv_init_lock(lmv) mutex_lock(&lmv->init_mutex);
+#define lmv_init_unlock(lmv) mutex_unlock(&lmv->init_mutex);
+
+#define LL_IT2STR(it) \
+ ((it) ? ldlm_it2str((it)->it_op) : "0")
+
+int lmv_check_connect(struct obd_device *obd);
+
+int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
+ void *lmm, int lmmsize, struct lookup_intent *it,
+ int flags, struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking,
+ __u64 extra_lock_flags);
+
+int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
+ void *lmm, int lmmsize, struct lookup_intent *it,
+ int flags, struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking,
+ __u64 extra_lock_flags);
+
+int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
+ void *lmm, int lmmsize, struct lookup_intent *it,
+ int flags, struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking,
+ __u64 extra_lock_flags);
+
+int lmv_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
+ void *, int);
+int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid,
+ mdsno_t *mds);
+int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid,
+ mdsno_t mds);
+int lmv_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
+ struct md_op_data *op_data);
+
+static inline struct lmv_stripe_md *lmv_get_mea(struct ptlrpc_request *req)
+{
+ struct mdt_body *body;
+ struct lmv_stripe_md *mea;
+
+ LASSERT(req != NULL);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+
+ if (!body || !S_ISDIR(body->mode) || !body->eadatasize)
+ return NULL;
+
+ mea = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD,
+ body->eadatasize);
+ LASSERT(mea != NULL);
+
+ if (mea->mea_count == 0)
+ return NULL;
+ if( mea->mea_magic != MEA_MAGIC_LAST_CHAR &&
+ mea->mea_magic != MEA_MAGIC_ALL_CHARS &&
+ mea->mea_magic != MEA_MAGIC_HASH_SEGMENT)
+ return NULL;
+
+ return mea;
+}
+
+static inline int lmv_get_easize(struct lmv_obd *lmv)
+{
+ return sizeof(struct lmv_stripe_md) +
+ lmv->desc.ld_tgt_count *
+ sizeof(struct lu_fid);
+}
+
+static inline struct lmv_tgt_desc *
+lmv_get_target(struct lmv_obd *lmv, mdsno_t mds)
+{
+ int count = lmv->desc.ld_tgt_count;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if (lmv->tgts[i] == NULL)
+ continue;
+
+ if (lmv->tgts[i]->ltd_idx == mds)
+ return lmv->tgts[i];
+ }
+
+ return ERR_PTR(-ENODEV);
+}
+
+static inline struct lmv_tgt_desc *
+lmv_find_target(struct lmv_obd *lmv, const struct lu_fid *fid)
+{
+ mdsno_t mds = 0;
+ int rc;
+
+ if (lmv->desc.ld_tgt_count > 1) {
+ rc = lmv_fld_lookup(lmv, fid, &mds);
+ if (rc)
+ return ERR_PTR(rc);
+ }
+
+ return lmv_get_target(lmv, mds);
+}
+
+struct lmv_tgt_desc
+*lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
+ struct lu_fid *fid);
+/* lproc_lmv.c */
+#ifdef LPROCFS
+void lprocfs_lmv_init_vars(struct lprocfs_static_vars *lvars);
+#else
+static inline void lprocfs_lmv_init_vars(struct lprocfs_static_vars *lvars)
+{
+ memset(lvars, 0, sizeof(*lvars));
+}
+#endif
+extern struct file_operations lmv_proc_target_fops;
+
+#endif
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
new file mode 100644
index 000000000000..1eebfbf34871
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -0,0 +1,2727 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LMV
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/mm.h>
+#include <asm/div64.h>
+#include <linux/seq_file.h>
+#include <linux/namei.h>
+
+#include <lustre/lustre_idl.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#include <lustre_lite.h>
+#include <lustre_fid.h>
+#include "lmv_internal.h"
+
+static void lmv_activate_target(struct lmv_obd *lmv,
+ struct lmv_tgt_desc *tgt,
+ int activate)
+{
+ if (tgt->ltd_active == activate)
+ return;
+
+ tgt->ltd_active = activate;
+ lmv->desc.ld_active_tgt_count += (activate ? 1 : -1);
+}
+
+/**
+ * Error codes:
+ *
+ * -EINVAL : UUID can't be found in the LMV's target list
+ * -ENOTCONN: The UUID is found, but the target connection is bad (!)
+ * -EBADF : The UUID is found, but the OBD of the wrong type (!)
+ */
+static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
+ int activate)
+{
+ struct lmv_tgt_desc *uninitialized_var(tgt);
+ struct obd_device *obd;
+ int i;
+ int rc = 0;
+ ENTRY;
+
+ CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n",
+ lmv, uuid->uuid, activate);
+
+ spin_lock(&lmv->lmv_lock);
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ tgt = lmv->tgts[i];
+ if (tgt == NULL || tgt->ltd_exp == NULL)
+ continue;
+
+ CDEBUG(D_INFO, "Target idx %d is %s conn "LPX64"\n", i,
+ tgt->ltd_uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
+
+ if (obd_uuid_equals(uuid, &tgt->ltd_uuid))
+ break;
+ }
+
+ if (i == lmv->desc.ld_tgt_count)
+ GOTO(out_lmv_lock, rc = -EINVAL);
+
+ obd = class_exp2obd(tgt->ltd_exp);
+ if (obd == NULL)
+ GOTO(out_lmv_lock, rc = -ENOTCONN);
+
+ CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n",
+ obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd,
+ obd->obd_type->typ_name, i);
+ LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0);
+
+ if (tgt->ltd_active == activate) {
+ CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
+ activate ? "" : "in");
+ GOTO(out_lmv_lock, rc);
+ }
+
+ CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd,
+ activate ? "" : "in");
+ lmv_activate_target(lmv, tgt, activate);
+ EXIT;
+
+ out_lmv_lock:
+ spin_unlock(&lmv->lmv_lock);
+ return rc;
+}
+
+struct obd_uuid *lmv_get_uuid(struct obd_export *exp)
+{
+ struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
+
+ return obd_get_uuid(lmv->tgts[0]->ltd_exp);
+}
+
+static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
+ enum obd_notify_event ev, void *data)
+{
+ struct obd_connect_data *conn_data;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct obd_uuid *uuid;
+ int rc = 0;
+ ENTRY;
+
+ if (strcmp(watched->obd_type->typ_name, LUSTRE_MDC_NAME)) {
+ CERROR("unexpected notification of %s %s!\n",
+ watched->obd_type->typ_name,
+ watched->obd_name);
+ RETURN(-EINVAL);
+ }
+
+ uuid = &watched->u.cli.cl_target_uuid;
+ if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE) {
+ /*
+ * Set MDC as active before notifying the observer, so the
+ * observer can use the MDC normally.
+ */
+ rc = lmv_set_mdc_active(lmv, uuid,
+ ev == OBD_NOTIFY_ACTIVE);
+ if (rc) {
+ CERROR("%sactivation of %s failed: %d\n",
+ ev == OBD_NOTIFY_ACTIVE ? "" : "de",
+ uuid->uuid, rc);
+ RETURN(rc);
+ }
+ } else if (ev == OBD_NOTIFY_OCD) {
+ conn_data = &watched->u.cli.cl_import->imp_connect_data;
+ /*
+ * XXX: Make sure that ocd_connect_flags from all targets are
+ * the same. Otherwise one of MDTs runs wrong version or
+ * something like this. --umka
+ */
+ obd->obd_self_export->exp_connect_data = *conn_data;
+ }
+#if 0
+ else if (ev == OBD_NOTIFY_DISCON) {
+ /*
+ * For disconnect event, flush fld cache for failout MDS case.
+ */
+ fld_client_flush(&lmv->lmv_fld);
+ }
+#endif
+ /*
+ * Pass the notification up the chain.
+ */
+ if (obd->obd_observer)
+ rc = obd_notify(obd->obd_observer, watched, ev, data);
+
+ RETURN(rc);
+}
+
+/**
+ * This is fake connect function. Its purpose is to initialize lmv and say
+ * caller that everything is okay. Real connection will be performed later.
+ */
+static int lmv_connect(const struct lu_env *env,
+ struct obd_export **exp, struct obd_device *obd,
+ struct obd_uuid *cluuid, struct obd_connect_data *data,
+ void *localdata)
+{
+ struct proc_dir_entry *lmv_proc_dir;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lustre_handle conn = { 0 };
+ int rc = 0;
+ ENTRY;
+
+ /*
+ * We don't want to actually do the underlying connections more than
+ * once, so keep track.
+ */
+ lmv->refcount++;
+ if (lmv->refcount > 1) {
+ *exp = NULL;
+ RETURN(0);
+ }
+
+ rc = class_connect(&conn, obd, cluuid);
+ if (rc) {
+ CERROR("class_connection() returned %d\n", rc);
+ RETURN(rc);
+ }
+
+ *exp = class_conn2export(&conn);
+ class_export_get(*exp);
+
+ lmv->exp = *exp;
+ lmv->connected = 0;
+ lmv->cluuid = *cluuid;
+
+ if (data)
+ lmv->conn_data = *data;
+
+ if (obd->obd_proc_private != NULL) {
+ lmv_proc_dir = obd->obd_proc_private;
+ } else {
+ lmv_proc_dir = lprocfs_register("target_obds", obd->obd_proc_entry,
+ NULL, NULL);
+ if (IS_ERR(lmv_proc_dir)) {
+ CERROR("could not register /proc/fs/lustre/%s/%s/target_obds.",
+ obd->obd_type->typ_name, obd->obd_name);
+ lmv_proc_dir = NULL;
+ }
+ obd->obd_proc_private = lmv_proc_dir;
+ }
+
+ /*
+ * All real clients should perform actual connection right away, because
+ * it is possible, that LMV will not have opportunity to connect targets
+ * and MDC stuff will be called directly, for instance while reading
+ * ../mdc/../kbytesfree procfs file, etc.
+ */
+ if (data->ocd_connect_flags & OBD_CONNECT_REAL)
+ rc = lmv_check_connect(obd);
+
+ if (rc && lmv_proc_dir) {
+ lprocfs_remove(&lmv_proc_dir);
+ obd->obd_proc_private = NULL;
+ }
+
+ RETURN(rc);
+}
+
+static void lmv_set_timeouts(struct obd_device *obd)
+{
+ struct lmv_tgt_desc *tgt;
+ struct lmv_obd *lmv;
+ int i;
+
+ lmv = &obd->u.lmv;
+ if (lmv->server_timeout == 0)
+ return;
+
+ if (lmv->connected == 0)
+ return;
+
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ tgt = lmv->tgts[i];
+ if (tgt == NULL || tgt->ltd_exp == NULL || tgt->ltd_active == 0)
+ continue;
+
+ obd_set_info_async(NULL, tgt->ltd_exp, sizeof(KEY_INTERMDS),
+ KEY_INTERMDS, 0, NULL, NULL);
+ }
+}
+
+static int lmv_init_ea_size(struct obd_export *exp, int easize,
+ int def_easize, int cookiesize)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ int i;
+ int rc = 0;
+ int change = 0;
+ ENTRY;
+
+ if (lmv->max_easize < easize) {
+ lmv->max_easize = easize;
+ change = 1;
+ }
+ if (lmv->max_def_easize < def_easize) {
+ lmv->max_def_easize = def_easize;
+ change = 1;
+ }
+ if (lmv->max_cookiesize < cookiesize) {
+ lmv->max_cookiesize = cookiesize;
+ change = 1;
+ }
+ if (change == 0)
+ RETURN(0);
+
+ if (lmv->connected == 0)
+ RETURN(0);
+
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ if (lmv->tgts[i] == NULL ||
+ lmv->tgts[i]->ltd_exp == NULL ||
+ lmv->tgts[i]->ltd_active == 0) {
+ CWARN("%s: NULL export for %d\n", obd->obd_name, i);
+ continue;
+ }
+
+ rc = md_init_ea_size(lmv->tgts[i]->ltd_exp, easize, def_easize,
+ cookiesize);
+ if (rc) {
+ CERROR("%s: obd_init_ea_size() failed on MDT target %d:"
+ " rc = %d.\n", obd->obd_name, i, rc);
+ break;
+ }
+ }
+ RETURN(rc);
+}
+
+#define MAX_STRING_SIZE 128
+
+int lmv_connect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt)
+{
+ struct proc_dir_entry *lmv_proc_dir;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct obd_uuid *cluuid = &lmv->cluuid;
+ struct obd_uuid lmv_mdc_uuid = { "LMV_MDC_UUID" };
+ struct obd_device *mdc_obd;
+ struct obd_export *mdc_exp;
+ struct lu_fld_target target;
+ int rc;
+ ENTRY;
+
+ mdc_obd = class_find_client_obd(&tgt->ltd_uuid, LUSTRE_MDC_NAME,
+ &obd->obd_uuid);
+ if (!mdc_obd) {
+ CERROR("target %s not attached\n", tgt->ltd_uuid.uuid);
+ RETURN(-EINVAL);
+ }
+
+ CDEBUG(D_CONFIG, "connect to %s(%s) - %s, %s FOR %s\n",
+ mdc_obd->obd_name, mdc_obd->obd_uuid.uuid,
+ tgt->ltd_uuid.uuid, obd->obd_uuid.uuid,
+ cluuid->uuid);
+
+ if (!mdc_obd->obd_set_up) {
+ CERROR("target %s is not set up\n", tgt->ltd_uuid.uuid);
+ RETURN(-EINVAL);
+ }
+
+ rc = obd_connect(NULL, &mdc_exp, mdc_obd, &lmv_mdc_uuid,
+ &lmv->conn_data, NULL);
+ if (rc) {
+ CERROR("target %s connect error %d\n", tgt->ltd_uuid.uuid, rc);
+ RETURN(rc);
+ }
+
+ /*
+ * Init fid sequence client for this mdc and add new fld target.
+ */
+ rc = obd_fid_init(mdc_obd, mdc_exp, LUSTRE_SEQ_METADATA);
+ if (rc)
+ RETURN(rc);
+
+ target.ft_srv = NULL;
+ target.ft_exp = mdc_exp;
+ target.ft_idx = tgt->ltd_idx;
+
+ fld_client_add_target(&lmv->lmv_fld, &target);
+
+ rc = obd_register_observer(mdc_obd, obd);
+ if (rc) {
+ obd_disconnect(mdc_exp);
+ CERROR("target %s register_observer error %d\n",
+ tgt->ltd_uuid.uuid, rc);
+ RETURN(rc);
+ }
+
+ if (obd->obd_observer) {
+ /*
+ * Tell the observer about the new target.
+ */
+ rc = obd_notify(obd->obd_observer, mdc_exp->exp_obd,
+ OBD_NOTIFY_ACTIVE,
+ (void *)(tgt - lmv->tgts[0]));
+ if (rc) {
+ obd_disconnect(mdc_exp);
+ RETURN(rc);
+ }
+ }
+
+ tgt->ltd_active = 1;
+ tgt->ltd_exp = mdc_exp;
+ lmv->desc.ld_active_tgt_count++;
+
+ md_init_ea_size(tgt->ltd_exp, lmv->max_easize,
+ lmv->max_def_easize, lmv->max_cookiesize);
+
+ CDEBUG(D_CONFIG, "Connected to %s(%s) successfully (%d)\n",
+ mdc_obd->obd_name, mdc_obd->obd_uuid.uuid,
+ atomic_read(&obd->obd_refcount));
+
+ lmv_proc_dir = obd->obd_proc_private;
+ if (lmv_proc_dir) {
+ struct proc_dir_entry *mdc_symlink;
+
+ LASSERT(mdc_obd->obd_type != NULL);
+ LASSERT(mdc_obd->obd_type->typ_name != NULL);
+ mdc_symlink = lprocfs_add_symlink(mdc_obd->obd_name,
+ lmv_proc_dir,
+ "../../../%s/%s",
+ mdc_obd->obd_type->typ_name,
+ mdc_obd->obd_name);
+ if (mdc_symlink == NULL) {
+ CERROR("Could not register LMV target "
+ "/proc/fs/lustre/%s/%s/target_obds/%s.",
+ obd->obd_type->typ_name, obd->obd_name,
+ mdc_obd->obd_name);
+ lprocfs_remove(&lmv_proc_dir);
+ obd->obd_proc_private = NULL;
+ }
+ }
+ RETURN(0);
+}
+
+static void lmv_del_target(struct lmv_obd *lmv, int index)
+{
+ if (lmv->tgts[index] == NULL)
+ return;
+
+ OBD_FREE_PTR(lmv->tgts[index]);
+ lmv->tgts[index] = NULL;
+ return;
+}
+
+static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
+ __u32 index, int gen)
+{
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc = 0;
+ ENTRY;
+
+ CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index);
+
+ lmv_init_lock(lmv);
+
+ if (lmv->desc.ld_tgt_count == 0) {
+ struct obd_device *mdc_obd;
+
+ mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME,
+ &obd->obd_uuid);
+ if (!mdc_obd) {
+ lmv_init_unlock(lmv);
+ CERROR("%s: Target %s not attached: rc = %d\n",
+ obd->obd_name, uuidp->uuid, -EINVAL);
+ RETURN(-EINVAL);
+ }
+ }
+
+ if ((index < lmv->tgts_size) && (lmv->tgts[index] != NULL)) {
+ tgt = lmv->tgts[index];
+ CERROR("%s: UUID %s already assigned at LOV target index %d:"
+ " rc = %d\n", obd->obd_name,
+ obd_uuid2str(&tgt->ltd_uuid), index, -EEXIST);
+ lmv_init_unlock(lmv);
+ RETURN(-EEXIST);
+ }
+
+ if (index >= lmv->tgts_size) {
+ /* We need to reallocate the lmv target array. */
+ struct lmv_tgt_desc **newtgts, **old = NULL;
+ __u32 newsize = 1;
+ __u32 oldsize = 0;
+
+ while (newsize < index + 1)
+ newsize = newsize << 1;
+ OBD_ALLOC(newtgts, sizeof(*newtgts) * newsize);
+ if (newtgts == NULL) {
+ lmv_init_unlock(lmv);
+ RETURN(-ENOMEM);
+ }
+
+ if (lmv->tgts_size) {
+ memcpy(newtgts, lmv->tgts,
+ sizeof(*newtgts) * lmv->tgts_size);
+ old = lmv->tgts;
+ oldsize = lmv->tgts_size;
+ }
+
+ lmv->tgts = newtgts;
+ lmv->tgts_size = newsize;
+ smp_rmb();
+ if (old)
+ OBD_FREE(old, sizeof(*old) * oldsize);
+
+ CDEBUG(D_CONFIG, "tgts: %p size: %d\n", lmv->tgts,
+ lmv->tgts_size);
+ }
+
+ OBD_ALLOC_PTR(tgt);
+ if (!tgt) {
+ lmv_init_unlock(lmv);
+ RETURN(-ENOMEM);
+ }
+
+ mutex_init(&tgt->ltd_fid_mutex);
+ tgt->ltd_idx = index;
+ tgt->ltd_uuid = *uuidp;
+ tgt->ltd_active = 0;
+ lmv->tgts[index] = tgt;
+ if (index >= lmv->desc.ld_tgt_count)
+ lmv->desc.ld_tgt_count = index + 1;
+
+ if (lmv->connected) {
+ rc = lmv_connect_mdc(obd, tgt);
+ if (rc) {
+ spin_lock(&lmv->lmv_lock);
+ lmv->desc.ld_tgt_count--;
+ memset(tgt, 0, sizeof(*tgt));
+ spin_unlock(&lmv->lmv_lock);
+ } else {
+ int easize = sizeof(struct lmv_stripe_md) +
+ lmv->desc.ld_tgt_count *
+ sizeof(struct lu_fid);
+ lmv_init_ea_size(obd->obd_self_export, easize, 0, 0);
+ }
+ }
+
+ lmv_init_unlock(lmv);
+ RETURN(rc);
+}
+
+int lmv_check_connect(struct obd_device *obd)
+{
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int i;
+ int rc;
+ int easize;
+ ENTRY;
+
+ if (lmv->connected)
+ RETURN(0);
+
+ lmv_init_lock(lmv);
+ if (lmv->connected) {
+ lmv_init_unlock(lmv);
+ RETURN(0);
+ }
+
+ if (lmv->desc.ld_tgt_count == 0) {
+ lmv_init_unlock(lmv);
+ CERROR("%s: no targets configured.\n", obd->obd_name);
+ RETURN(-EINVAL);
+ }
+
+ CDEBUG(D_CONFIG, "Time to connect %s to %s\n",
+ lmv->cluuid.uuid, obd->obd_name);
+
+ LASSERT(lmv->tgts != NULL);
+
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ tgt = lmv->tgts[i];
+ if (tgt == NULL)
+ continue;
+ rc = lmv_connect_mdc(obd, tgt);
+ if (rc)
+ GOTO(out_disc, rc);
+ }
+
+ lmv_set_timeouts(obd);
+ class_export_put(lmv->exp);
+ lmv->connected = 1;
+ easize = lmv_get_easize(lmv);
+ lmv_init_ea_size(obd->obd_self_export, easize, 0, 0);
+ lmv_init_unlock(lmv);
+ RETURN(0);
+
+ out_disc:
+ while (i-- > 0) {
+ int rc2;
+ tgt = lmv->tgts[i];
+ if (tgt == NULL)
+ continue;
+ tgt->ltd_active = 0;
+ if (tgt->ltd_exp) {
+ --lmv->desc.ld_active_tgt_count;
+ rc2 = obd_disconnect(tgt->ltd_exp);
+ if (rc2) {
+ CERROR("LMV target %s disconnect on "
+ "MDC idx %d: error %d\n",
+ tgt->ltd_uuid.uuid, i, rc2);
+ }
+ }
+ }
+ class_disconnect(lmv->exp);
+ lmv_init_unlock(lmv);
+ RETURN(rc);
+}
+
+static int lmv_disconnect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt)
+{
+ struct proc_dir_entry *lmv_proc_dir;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct obd_device *mdc_obd;
+ int rc;
+ ENTRY;
+
+ LASSERT(tgt != NULL);
+ LASSERT(obd != NULL);
+
+ mdc_obd = class_exp2obd(tgt->ltd_exp);
+
+ if (mdc_obd) {
+ mdc_obd->obd_force = obd->obd_force;
+ mdc_obd->obd_fail = obd->obd_fail;
+ mdc_obd->obd_no_recov = obd->obd_no_recov;
+ }
+
+ lmv_proc_dir = obd->obd_proc_private;
+ if (lmv_proc_dir)
+ lprocfs_remove_proc_entry(mdc_obd->obd_name, lmv_proc_dir);
+
+ rc = obd_fid_fini(tgt->ltd_exp->exp_obd);
+ if (rc)
+ CERROR("Can't finanize fids factory\n");
+
+ CDEBUG(D_INFO, "Disconnected from %s(%s) successfully\n",
+ tgt->ltd_exp->exp_obd->obd_name,
+ tgt->ltd_exp->exp_obd->obd_uuid.uuid);
+
+ obd_register_observer(tgt->ltd_exp->exp_obd, NULL);
+ rc = obd_disconnect(tgt->ltd_exp);
+ if (rc) {
+ if (tgt->ltd_active) {
+ CERROR("Target %s disconnect error %d\n",
+ tgt->ltd_uuid.uuid, rc);
+ }
+ }
+
+ lmv_activate_target(lmv, tgt, 0);
+ tgt->ltd_exp = NULL;
+ RETURN(0);
+}
+
+static int lmv_disconnect(struct obd_export *exp)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct lmv_obd *lmv = &obd->u.lmv;
+ int rc;
+ int i;
+ ENTRY;
+
+ if (!lmv->tgts)
+ goto out_local;
+
+ /*
+ * Only disconnect the underlying layers on the final disconnect.
+ */
+ lmv->refcount--;
+ if (lmv->refcount != 0)
+ goto out_local;
+
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL)
+ continue;
+
+ lmv_disconnect_mdc(obd, lmv->tgts[i]);
+ }
+
+ if (obd->obd_proc_private)
+ lprocfs_remove((proc_dir_entry_t **)&obd->obd_proc_private);
+ else
+ CERROR("/proc/fs/lustre/%s/%s/target_obds missing\n",
+ obd->obd_type->typ_name, obd->obd_name);
+
+out_local:
+ /*
+ * This is the case when no real connection is established by
+ * lmv_check_connect().
+ */
+ if (!lmv->connected)
+ class_export_put(exp);
+ rc = class_disconnect(exp);
+ if (lmv->refcount == 0)
+ lmv->connected = 0;
+ RETURN(rc);
+}
+
+static int lmv_fid2path(struct obd_export *exp, int len, void *karg, void *uarg)
+{
+ struct obd_device *obddev = class_exp2obd(exp);
+ struct lmv_obd *lmv = &obddev->u.lmv;
+ struct getinfo_fid2path *gf;
+ struct lmv_tgt_desc *tgt;
+ struct getinfo_fid2path *remote_gf = NULL;
+ int remote_gf_size = 0;
+ int rc;
+
+ gf = (struct getinfo_fid2path *)karg;
+ tgt = lmv_find_target(lmv, &gf->gf_fid);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+repeat_fid2path:
+ rc = obd_iocontrol(OBD_IOC_FID2PATH, tgt->ltd_exp, len, gf, uarg);
+ if (rc != 0 && rc != -EREMOTE)
+ GOTO(out_fid2path, rc);
+
+ /* If remote_gf != NULL, it means just building the
+ * path on the remote MDT, copy this path segement to gf */
+ if (remote_gf != NULL) {
+ struct getinfo_fid2path *ori_gf;
+ char *ptr;
+
+ ori_gf = (struct getinfo_fid2path *)karg;
+ if (strlen(ori_gf->gf_path) +
+ strlen(gf->gf_path) > ori_gf->gf_pathlen)
+ GOTO(out_fid2path, rc = -EOVERFLOW);
+
+ ptr = ori_gf->gf_path;
+
+ memmove(ptr + strlen(gf->gf_path) + 1, ptr,
+ strlen(ori_gf->gf_path));
+
+ strncpy(ptr, gf->gf_path, strlen(gf->gf_path));
+ ptr += strlen(gf->gf_path);
+ *ptr = '/';
+ }
+
+ CDEBUG(D_INFO, "%s: get path %s "DFID" rec: "LPU64" ln: %u\n",
+ tgt->ltd_exp->exp_obd->obd_name,
+ gf->gf_path, PFID(&gf->gf_fid), gf->gf_recno,
+ gf->gf_linkno);
+
+ if (rc == 0)
+ GOTO(out_fid2path, rc);
+
+ /* sigh, has to go to another MDT to do path building further */
+ if (remote_gf == NULL) {
+ remote_gf_size = sizeof(*remote_gf) + PATH_MAX;
+ OBD_ALLOC(remote_gf, remote_gf_size);
+ if (remote_gf == NULL)
+ GOTO(out_fid2path, rc = -ENOMEM);
+ remote_gf->gf_pathlen = PATH_MAX;
+ }
+
+ if (!fid_is_sane(&gf->gf_fid)) {
+ CERROR("%s: invalid FID "DFID": rc = %d\n",
+ tgt->ltd_exp->exp_obd->obd_name,
+ PFID(&gf->gf_fid), -EINVAL);
+ GOTO(out_fid2path, rc = -EINVAL);
+ }
+
+ tgt = lmv_find_target(lmv, &gf->gf_fid);
+ if (IS_ERR(tgt))
+ GOTO(out_fid2path, rc = -EINVAL);
+
+ remote_gf->gf_fid = gf->gf_fid;
+ remote_gf->gf_recno = -1;
+ remote_gf->gf_linkno = -1;
+ memset(remote_gf->gf_path, 0, remote_gf->gf_pathlen);
+ gf = remote_gf;
+ goto repeat_fid2path;
+
+out_fid2path:
+ if (remote_gf != NULL)
+ OBD_FREE(remote_gf, remote_gf_size);
+ RETURN(rc);
+}
+
+static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
+ int len, void *karg, void *uarg)
+{
+ struct obd_device *obddev = class_exp2obd(exp);
+ struct lmv_obd *lmv = &obddev->u.lmv;
+ int i = 0;
+ int rc = 0;
+ int set = 0;
+ int count = lmv->desc.ld_tgt_count;
+ ENTRY;
+
+ if (count == 0)
+ RETURN(-ENOTTY);
+
+ switch (cmd) {
+ case IOC_OBD_STATFS: {
+ struct obd_ioctl_data *data = karg;
+ struct obd_device *mdc_obd;
+ struct obd_statfs stat_buf = {0};
+ __u32 index;
+
+ memcpy(&index, data->ioc_inlbuf2, sizeof(__u32));
+ if ((index >= count))
+ RETURN(-ENODEV);
+
+ if (lmv->tgts[index] == NULL ||
+ lmv->tgts[index]->ltd_active == 0)
+ RETURN(-ENODATA);
+
+ mdc_obd = class_exp2obd(lmv->tgts[index]->ltd_exp);
+ if (!mdc_obd)
+ RETURN(-EINVAL);
+
+ /* copy UUID */
+ if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(mdc_obd),
+ min((int) data->ioc_plen2,
+ (int) sizeof(struct obd_uuid))))
+ RETURN(-EFAULT);
+
+ rc = obd_statfs(NULL, lmv->tgts[index]->ltd_exp, &stat_buf,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ 0);
+ if (rc)
+ RETURN(rc);
+ if (copy_to_user(data->ioc_pbuf1, &stat_buf,
+ min((int) data->ioc_plen1,
+ (int) sizeof(stat_buf))))
+ RETURN(-EFAULT);
+ break;
+ }
+ case OBD_IOC_QUOTACTL: {
+ struct if_quotactl *qctl = karg;
+ struct lmv_tgt_desc *tgt = NULL;
+ struct obd_quotactl *oqctl;
+
+ if (qctl->qc_valid == QC_MDTIDX) {
+ if (qctl->qc_idx < 0 || count <= qctl->qc_idx)
+ RETURN(-EINVAL);
+
+ tgt = lmv->tgts[qctl->qc_idx];
+ if (tgt == NULL || tgt->ltd_exp == NULL)
+ RETURN(-EINVAL);
+ } else if (qctl->qc_valid == QC_UUID) {
+ for (i = 0; i < count; i++) {
+ tgt = lmv->tgts[i];
+ if (tgt == NULL)
+ continue;
+ if (!obd_uuid_equals(&tgt->ltd_uuid,
+ &qctl->obd_uuid))
+ continue;
+
+ if (tgt->ltd_exp == NULL)
+ RETURN(-EINVAL);
+
+ break;
+ }
+ } else {
+ RETURN(-EINVAL);
+ }
+
+ if (i >= count)
+ RETURN(-EAGAIN);
+
+ LASSERT(tgt && tgt->ltd_exp);
+ OBD_ALLOC_PTR(oqctl);
+ if (!oqctl)
+ RETURN(-ENOMEM);
+
+ QCTL_COPY(oqctl, qctl);
+ rc = obd_quotactl(tgt->ltd_exp, oqctl);
+ if (rc == 0) {
+ QCTL_COPY(qctl, oqctl);
+ qctl->qc_valid = QC_MDTIDX;
+ qctl->obd_uuid = tgt->ltd_uuid;
+ }
+ OBD_FREE_PTR(oqctl);
+ break;
+ }
+ case OBD_IOC_CHANGELOG_SEND:
+ case OBD_IOC_CHANGELOG_CLEAR: {
+ struct ioc_changelog *icc = karg;
+
+ if (icc->icc_mdtindex >= count)
+ RETURN(-ENODEV);
+
+ if (lmv->tgts[icc->icc_mdtindex] == NULL ||
+ lmv->tgts[icc->icc_mdtindex]->ltd_exp == NULL ||
+ lmv->tgts[icc->icc_mdtindex]->ltd_active == 0)
+ RETURN(-ENODEV);
+ rc = obd_iocontrol(cmd, lmv->tgts[icc->icc_mdtindex]->ltd_exp,
+ sizeof(*icc), icc, NULL);
+ break;
+ }
+ case LL_IOC_GET_CONNECT_FLAGS: {
+ if (lmv->tgts[0] == NULL)
+ RETURN(-ENODATA);
+ rc = obd_iocontrol(cmd, lmv->tgts[0]->ltd_exp, len, karg, uarg);
+ break;
+ }
+ case OBD_IOC_FID2PATH: {
+ rc = lmv_fid2path(exp, len, karg, uarg);
+ break;
+ }
+ case LL_IOC_HSM_STATE_GET:
+ case LL_IOC_HSM_STATE_SET:
+ case LL_IOC_HSM_ACTION:
+ case LL_IOC_LOV_SWAP_LAYOUTS: {
+ struct md_op_data *op_data = karg;
+ struct lmv_tgt_desc *tgt1, *tgt2;
+
+ tgt1 = lmv_find_target(lmv, &op_data->op_fid1);
+ if (IS_ERR(tgt1))
+ RETURN(PTR_ERR(tgt1));
+
+ tgt2 = lmv_find_target(lmv, &op_data->op_fid2);
+ if (IS_ERR(tgt2))
+ RETURN(PTR_ERR(tgt2));
+
+ if ((tgt1->ltd_exp == NULL) || (tgt2->ltd_exp == NULL))
+ RETURN(-EINVAL);
+
+ /* only files on same MDT can have their layouts swapped */
+ if (tgt1->ltd_idx != tgt2->ltd_idx)
+ RETURN(-EPERM);
+
+ rc = obd_iocontrol(cmd, tgt1->ltd_exp, len, karg, uarg);
+ break;
+ }
+ default:
+ for (i = 0; i < count; i++) {
+ struct obd_device *mdc_obd;
+ int err;
+
+ if (lmv->tgts[i] == NULL ||
+ lmv->tgts[i]->ltd_exp == NULL)
+ continue;
+ /* ll_umount_begin() sets force flag but for lmv, not
+ * mdc. Let's pass it through */
+ mdc_obd = class_exp2obd(lmv->tgts[i]->ltd_exp);
+ mdc_obd->obd_force = obddev->obd_force;
+ err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len,
+ karg, uarg);
+ if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) {
+ RETURN(err);
+ } else if (err) {
+ if (lmv->tgts[i]->ltd_active) {
+ CERROR("error: iocontrol MDC %s on MDT"
+ "idx %d cmd %x: err = %d\n",
+ lmv->tgts[i]->ltd_uuid.uuid,
+ i, cmd, err);
+ if (!rc)
+ rc = err;
+ }
+ } else
+ set = 1;
+ }
+ if (!set && !rc)
+ rc = -EIO;
+ }
+ RETURN(rc);
+}
+
+#if 0
+static int lmv_all_chars_policy(int count, const char *name,
+ int len)
+{
+ unsigned int c = 0;
+
+ while (len > 0)
+ c += name[--len];
+ c = c % count;
+ return c;
+}
+
+static int lmv_nid_policy(struct lmv_obd *lmv)
+{
+ struct obd_import *imp;
+ __u32 id;
+
+ /*
+ * XXX: To get nid we assume that underlying obd device is mdc.
+ */
+ imp = class_exp2cliimp(lmv->tgts[0].ltd_exp);
+ id = imp->imp_connection->c_self ^ (imp->imp_connection->c_self >> 32);
+ return id % lmv->desc.ld_tgt_count;
+}
+
+static int lmv_choose_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
+ placement_policy_t placement)
+{
+ switch (placement) {
+ case PLACEMENT_CHAR_POLICY:
+ return lmv_all_chars_policy(lmv->desc.ld_tgt_count,
+ op_data->op_name,
+ op_data->op_namelen);
+ case PLACEMENT_NID_POLICY:
+ return lmv_nid_policy(lmv);
+
+ default:
+ break;
+ }
+
+ CERROR("Unsupported placement policy %x\n", placement);
+ return -EINVAL;
+}
+#endif
+
+/**
+ * This is _inode_ placement policy function (not name).
+ */
+static int lmv_placement_policy(struct obd_device *obd,
+ struct md_op_data *op_data,
+ mdsno_t *mds)
+{
+ struct lmv_obd *lmv = &obd->u.lmv;
+ ENTRY;
+
+ LASSERT(mds != NULL);
+
+ if (lmv->desc.ld_tgt_count == 1) {
+ *mds = 0;
+ RETURN(0);
+ }
+
+ /**
+ * If stripe_offset is provided during setdirstripe
+ * (setdirstripe -i xx), xx MDS will be choosen.
+ */
+ if (op_data->op_cli_flags & CLI_SET_MEA) {
+ struct lmv_user_md *lum;
+
+ lum = (struct lmv_user_md *)op_data->op_data;
+ if (lum->lum_type == LMV_STRIPE_TYPE &&
+ lum->lum_stripe_offset != -1) {
+ if (lum->lum_stripe_offset >= lmv->desc.ld_tgt_count) {
+ CERROR("%s: Stripe_offset %d > MDT count %d:"
+ " rc = %d\n", obd->obd_name,
+ lum->lum_stripe_offset,
+ lmv->desc.ld_tgt_count, -ERANGE);
+ RETURN(-ERANGE);
+ }
+ *mds = lum->lum_stripe_offset;
+ RETURN(0);
+ }
+ }
+
+ /* Allocate new fid on target according to operation type and parent
+ * home mds. */
+ *mds = op_data->op_mds;
+ RETURN(0);
+}
+
+int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid,
+ mdsno_t mds)
+{
+ struct lmv_tgt_desc *tgt;
+ int rc;
+ ENTRY;
+
+ tgt = lmv_get_target(lmv, mds);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ /*
+ * New seq alloc and FLD setup should be atomic. Otherwise we may find
+ * on server that seq in new allocated fid is not yet known.
+ */
+ mutex_lock(&tgt->ltd_fid_mutex);
+
+ if (tgt->ltd_active == 0 || tgt->ltd_exp == NULL)
+ GOTO(out, rc = -ENODEV);
+
+ /*
+ * Asking underlaying tgt layer to allocate new fid.
+ */
+ rc = obd_fid_alloc(tgt->ltd_exp, fid, NULL);
+ if (rc > 0) {
+ LASSERT(fid_is_sane(fid));
+ rc = 0;
+ }
+
+ EXIT;
+out:
+ mutex_unlock(&tgt->ltd_fid_mutex);
+ return rc;
+}
+
+int lmv_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
+ struct md_op_data *op_data)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct lmv_obd *lmv = &obd->u.lmv;
+ mdsno_t mds = 0;
+ int rc;
+ ENTRY;
+
+ LASSERT(op_data != NULL);
+ LASSERT(fid != NULL);
+
+ rc = lmv_placement_policy(obd, op_data, &mds);
+ if (rc) {
+ CERROR("Can't get target for allocating fid, "
+ "rc %d\n", rc);
+ RETURN(rc);
+ }
+
+ rc = __lmv_fid_alloc(lmv, fid, mds);
+ if (rc) {
+ CERROR("Can't alloc new fid, rc %d\n", rc);
+ RETURN(rc);
+ }
+
+ RETURN(rc);
+}
+
+static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lprocfs_static_vars lvars;
+ struct lmv_desc *desc;
+ int rc;
+ ENTRY;
+
+ if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
+ CERROR("LMV setup requires a descriptor\n");
+ RETURN(-EINVAL);
+ }
+
+ desc = (struct lmv_desc *)lustre_cfg_buf(lcfg, 1);
+ if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) {
+ CERROR("Lmv descriptor size wrong: %d > %d\n",
+ (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1));
+ RETURN(-EINVAL);
+ }
+
+ OBD_ALLOC(lmv->tgts, sizeof(*lmv->tgts) * 32);
+ if (lmv->tgts == NULL)
+ RETURN(-ENOMEM);
+ lmv->tgts_size = 32;
+
+ obd_str2uuid(&lmv->desc.ld_uuid, desc->ld_uuid.uuid);
+ lmv->desc.ld_tgt_count = 0;
+ lmv->desc.ld_active_tgt_count = 0;
+ lmv->max_cookiesize = 0;
+ lmv->max_def_easize = 0;
+ lmv->max_easize = 0;
+ lmv->lmv_placement = PLACEMENT_CHAR_POLICY;
+
+ spin_lock_init(&lmv->lmv_lock);
+ mutex_init(&lmv->init_mutex);
+
+ lprocfs_lmv_init_vars(&lvars);
+
+ lprocfs_obd_setup(obd, lvars.obd_vars);
+#ifdef LPROCFS
+ {
+ rc = lprocfs_seq_create(obd->obd_proc_entry, "target_obd",
+ 0444, &lmv_proc_target_fops, obd);
+ if (rc)
+ CWARN("%s: error adding LMV target_obd file: rc = %d\n",
+ obd->obd_name, rc);
+ }
+#endif
+ rc = fld_client_init(&lmv->lmv_fld, obd->obd_name,
+ LUSTRE_CLI_FLD_HASH_DHT);
+ if (rc) {
+ CERROR("Can't init FLD, err %d\n", rc);
+ GOTO(out, rc);
+ }
+
+ RETURN(0);
+
+out:
+ return rc;
+}
+
+static int lmv_cleanup(struct obd_device *obd)
+{
+ struct lmv_obd *lmv = &obd->u.lmv;
+ ENTRY;
+
+ fld_client_fini(&lmv->lmv_fld);
+ if (lmv->tgts != NULL) {
+ int i;
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ if (lmv->tgts[i] == NULL)
+ continue;
+ lmv_del_target(lmv, i);
+ }
+ OBD_FREE(lmv->tgts, sizeof(*lmv->tgts) * lmv->tgts_size);
+ lmv->tgts_size = 0;
+ }
+ RETURN(0);
+}
+
+static int lmv_process_config(struct obd_device *obd, obd_count len, void *buf)
+{
+ struct lustre_cfg *lcfg = buf;
+ struct obd_uuid obd_uuid;
+ int gen;
+ __u32 index;
+ int rc;
+ ENTRY;
+
+ switch (lcfg->lcfg_command) {
+ case LCFG_ADD_MDC:
+ /* modify_mdc_tgts add 0:lustre-clilmv 1:lustre-MDT0000_UUID
+ * 2:0 3:1 4:lustre-MDT0000-mdc_UUID */
+ if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(obd_uuid.uuid))
+ GOTO(out, rc = -EINVAL);
+
+ obd_str2uuid(&obd_uuid, lustre_cfg_buf(lcfg, 1));
+
+ if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1)
+ GOTO(out, rc = -EINVAL);
+ if (sscanf(lustre_cfg_buf(lcfg, 3), "%d", &gen) != 1)
+ GOTO(out, rc = -EINVAL);
+ rc = lmv_add_target(obd, &obd_uuid, index, gen);
+ GOTO(out, rc);
+ default:
+ CERROR("Unknown command: %d\n", lcfg->lcfg_command);
+ GOTO(out, rc = -EINVAL);
+ }
+out:
+ RETURN(rc);
+}
+
+static int lmv_statfs(const struct lu_env *env, struct obd_export *exp,
+ struct obd_statfs *osfs, __u64 max_age, __u32 flags)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct obd_statfs *temp;
+ int rc = 0;
+ int i;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ OBD_ALLOC(temp, sizeof(*temp));
+ if (temp == NULL)
+ RETURN(-ENOMEM);
+
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL)
+ continue;
+
+ rc = obd_statfs(env, lmv->tgts[i]->ltd_exp, temp,
+ max_age, flags);
+ if (rc) {
+ CERROR("can't stat MDS #%d (%s), error %d\n", i,
+ lmv->tgts[i]->ltd_exp->exp_obd->obd_name,
+ rc);
+ GOTO(out_free_temp, rc);
+ }
+
+ if (i == 0) {
+ *osfs = *temp;
+ /* If the statfs is from mount, it will needs
+ * retrieve necessary information from MDT0.
+ * i.e. mount does not need the merged osfs
+ * from all of MDT.
+ * And also clients can be mounted as long as
+ * MDT0 is in service*/
+ if (flags & OBD_STATFS_FOR_MDT0)
+ GOTO(out_free_temp, rc);
+ } else {
+ osfs->os_bavail += temp->os_bavail;
+ osfs->os_blocks += temp->os_blocks;
+ osfs->os_ffree += temp->os_ffree;
+ osfs->os_files += temp->os_files;
+ }
+ }
+
+ EXIT;
+out_free_temp:
+ OBD_FREE(temp, sizeof(*temp));
+ return rc;
+}
+
+static int lmv_getstatus(struct obd_export *exp,
+ struct lu_fid *fid,
+ struct obd_capa **pc)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ rc = md_getstatus(lmv->tgts[0]->ltd_exp, fid, pc);
+ RETURN(rc);
+}
+
+static int lmv_getxattr(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, obd_valid valid, const char *name,
+ const char *input, int input_size, int output_size,
+ int flags, struct ptlrpc_request **request)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ tgt = lmv_find_target(lmv, fid);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ rc = md_getxattr(tgt->ltd_exp, fid, oc, valid, name, input,
+ input_size, output_size, flags, request);
+
+ RETURN(rc);
+}
+
+static int lmv_setxattr(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, obd_valid valid, const char *name,
+ const char *input, int input_size, int output_size,
+ int flags, __u32 suppgid,
+ struct ptlrpc_request **request)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ tgt = lmv_find_target(lmv, fid);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ rc = md_setxattr(tgt->ltd_exp, fid, oc, valid, name, input,
+ input_size, output_size, flags, suppgid,
+ request);
+
+ RETURN(rc);
+}
+
+static int lmv_getattr(struct obd_export *exp, struct md_op_data *op_data,
+ struct ptlrpc_request **request)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ tgt = lmv_find_target(lmv, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ if (op_data->op_flags & MF_GET_MDT_IDX) {
+ op_data->op_mds = tgt->ltd_idx;
+ RETURN(0);
+ }
+
+ rc = md_getattr(tgt->ltd_exp, op_data, request);
+
+ RETURN(rc);
+}
+
+static int lmv_null_inode(struct obd_export *exp, const struct lu_fid *fid)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ int i;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ CDEBUG(D_INODE, "CBDATA for "DFID"\n", PFID(fid));
+
+ /*
+ * With DNE every object can have two locks in different namespaces:
+ * lookup lock in space of MDT storing direntry and update/open lock in
+ * space of MDT storing inode.
+ */
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL)
+ continue;
+ md_null_inode(lmv->tgts[i]->ltd_exp, fid);
+ }
+
+ RETURN(0);
+}
+
+static int lmv_find_cbdata(struct obd_export *exp, const struct lu_fid *fid,
+ ldlm_iterator_t it, void *data)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ int i;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ CDEBUG(D_INODE, "CBDATA for "DFID"\n", PFID(fid));
+
+ /*
+ * With DNE every object can have two locks in different namespaces:
+ * lookup lock in space of MDT storing direntry and update/open lock in
+ * space of MDT storing inode.
+ */
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL)
+ continue;
+ rc = md_find_cbdata(lmv->tgts[i]->ltd_exp, fid, it, data);
+ if (rc)
+ RETURN(rc);
+ }
+
+ RETURN(rc);
+}
+
+
+static int lmv_close(struct obd_export *exp, struct md_op_data *op_data,
+ struct md_open_data *mod, struct ptlrpc_request **request)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ tgt = lmv_find_target(lmv, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ CDEBUG(D_INODE, "CLOSE "DFID"\n", PFID(&op_data->op_fid1));
+ rc = md_close(tgt->ltd_exp, op_data, mod, request);
+ RETURN(rc);
+}
+
+struct lmv_tgt_desc
+*lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
+ struct lu_fid *fid)
+{
+ struct lmv_tgt_desc *tgt;
+
+ tgt = lmv_find_target(lmv, fid);
+ if (IS_ERR(tgt))
+ return tgt;
+
+ op_data->op_mds = tgt->ltd_idx;
+
+ return tgt;
+}
+
+int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
+ const void *data, int datalen, int mode, __u32 uid,
+ __u32 gid, cfs_cap_t cap_effective, __u64 rdev,
+ struct ptlrpc_request **request)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ if (!lmv->desc.ld_active_tgt_count)
+ RETURN(-EIO);
+
+ tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ rc = lmv_fid_alloc(exp, &op_data->op_fid2, op_data);
+ if (rc)
+ RETURN(rc);
+
+ CDEBUG(D_INODE, "CREATE '%*s' on "DFID" -> mds #%x\n",
+ op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1),
+ op_data->op_mds);
+
+ op_data->op_flags |= MF_MDC_CANCEL_FID1;
+ rc = md_create(tgt->ltd_exp, op_data, data, datalen, mode, uid, gid,
+ cap_effective, rdev, request);
+
+ if (rc == 0) {
+ if (*request == NULL)
+ RETURN(rc);
+ CDEBUG(D_INODE, "Created - "DFID"\n", PFID(&op_data->op_fid2));
+ }
+ RETURN(rc);
+}
+
+static int lmv_done_writing(struct obd_export *exp,
+ struct md_op_data *op_data,
+ struct md_open_data *mod)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ tgt = lmv_find_target(lmv, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ rc = md_done_writing(tgt->ltd_exp, op_data, mod);
+ RETURN(rc);
+}
+
+static int
+lmv_enqueue_remote(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
+ struct lookup_intent *it, struct md_op_data *op_data,
+ struct lustre_handle *lockh, void *lmm, int lmmsize,
+ int extra_lock_flags)
+{
+ struct ptlrpc_request *req = it->d.lustre.it_data;
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lustre_handle plock;
+ struct lmv_tgt_desc *tgt;
+ struct md_op_data *rdata;
+ struct lu_fid fid1;
+ struct mdt_body *body;
+ int rc = 0;
+ int pmode;
+ ENTRY;
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ LASSERT(body != NULL);
+
+ if (!(body->valid & OBD_MD_MDS))
+ RETURN(0);
+
+ CDEBUG(D_INODE, "REMOTE_ENQUEUE '%s' on "DFID" -> "DFID"\n",
+ LL_IT2STR(it), PFID(&op_data->op_fid1), PFID(&body->fid1));
+
+ /*
+ * We got LOOKUP lock, but we really need attrs.
+ */
+ pmode = it->d.lustre.it_lock_mode;
+ LASSERT(pmode != 0);
+ memcpy(&plock, lockh, sizeof(plock));
+ it->d.lustre.it_lock_mode = 0;
+ it->d.lustre.it_data = NULL;
+ fid1 = body->fid1;
+
+ it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE;
+ ptlrpc_req_finished(req);
+
+ tgt = lmv_find_target(lmv, &fid1);
+ if (IS_ERR(tgt))
+ GOTO(out, rc = PTR_ERR(tgt));
+
+ OBD_ALLOC_PTR(rdata);
+ if (rdata == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ rdata->op_fid1 = fid1;
+ rdata->op_bias = MDS_CROSS_REF;
+
+ rc = md_enqueue(tgt->ltd_exp, einfo, it, rdata, lockh,
+ lmm, lmmsize, NULL, extra_lock_flags);
+ OBD_FREE_PTR(rdata);
+ EXIT;
+out:
+ ldlm_lock_decref(&plock, pmode);
+ return rc;
+}
+
+static int
+lmv_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
+ struct lookup_intent *it, struct md_op_data *op_data,
+ struct lustre_handle *lockh, void *lmm, int lmmsize,
+ struct ptlrpc_request **req, __u64 extra_lock_flags)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ CDEBUG(D_INODE, "ENQUEUE '%s' on "DFID"\n",
+ LL_IT2STR(it), PFID(&op_data->op_fid1));
+
+ tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ CDEBUG(D_INODE, "ENQUEUE '%s' on "DFID" -> mds #%d\n",
+ LL_IT2STR(it), PFID(&op_data->op_fid1), tgt->ltd_idx);
+
+ rc = md_enqueue(tgt->ltd_exp, einfo, it, op_data, lockh,
+ lmm, lmmsize, req, extra_lock_flags);
+
+ if (rc == 0 && it && it->it_op == IT_OPEN) {
+ rc = lmv_enqueue_remote(exp, einfo, it, op_data, lockh,
+ lmm, lmmsize, extra_lock_flags);
+ }
+ RETURN(rc);
+}
+
+static int
+lmv_getattr_name(struct obd_export *exp,struct md_op_data *op_data,
+ struct ptlrpc_request **request)
+{
+ struct ptlrpc_request *req = NULL;
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ struct mdt_body *body;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ CDEBUG(D_INODE, "GETATTR_NAME for %*s on "DFID" -> mds #%d\n",
+ op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1),
+ tgt->ltd_idx);
+
+ rc = md_getattr_name(tgt->ltd_exp, op_data, request);
+ if (rc != 0)
+ RETURN(rc);
+
+ body = req_capsule_server_get(&(*request)->rq_pill,
+ &RMF_MDT_BODY);
+ LASSERT(body != NULL);
+
+ if (body->valid & OBD_MD_MDS) {
+ struct lu_fid rid = body->fid1;
+ CDEBUG(D_INODE, "Request attrs for "DFID"\n",
+ PFID(&rid));
+
+ tgt = lmv_find_target(lmv, &rid);
+ if (IS_ERR(tgt)) {
+ ptlrpc_req_finished(*request);
+ RETURN(PTR_ERR(tgt));
+ }
+
+ op_data->op_fid1 = rid;
+ op_data->op_valid |= OBD_MD_FLCROSSREF;
+ op_data->op_namelen = 0;
+ op_data->op_name = NULL;
+ rc = md_getattr_name(tgt->ltd_exp, op_data, &req);
+ ptlrpc_req_finished(*request);
+ *request = req;
+ }
+
+ RETURN(rc);
+}
+
+#define md_op_data_fid(op_data, fl) \
+ (fl == MF_MDC_CANCEL_FID1 ? &op_data->op_fid1 : \
+ fl == MF_MDC_CANCEL_FID2 ? &op_data->op_fid2 : \
+ fl == MF_MDC_CANCEL_FID3 ? &op_data->op_fid3 : \
+ fl == MF_MDC_CANCEL_FID4 ? &op_data->op_fid4 : \
+ NULL)
+
+static int lmv_early_cancel(struct obd_export *exp, struct md_op_data *op_data,
+ int op_tgt, ldlm_mode_t mode, int bits, int flag)
+{
+ struct lu_fid *fid = md_op_data_fid(op_data, flag);
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ ldlm_policy_data_t policy = {{0}};
+ int rc = 0;
+ ENTRY;
+
+ if (!fid_is_sane(fid))
+ RETURN(0);
+
+ tgt = lmv_find_target(lmv, fid);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ if (tgt->ltd_idx != op_tgt) {
+ CDEBUG(D_INODE, "EARLY_CANCEL on "DFID"\n", PFID(fid));
+ policy.l_inodebits.bits = bits;
+ rc = md_cancel_unused(tgt->ltd_exp, fid, &policy,
+ mode, LCF_ASYNC, NULL);
+ } else {
+ CDEBUG(D_INODE,
+ "EARLY_CANCEL skip operation target %d on "DFID"\n",
+ op_tgt, PFID(fid));
+ op_data->op_flags |= flag;
+ rc = 0;
+ }
+
+ RETURN(rc);
+}
+
+/*
+ * llite passes fid of an target inode in op_data->op_fid1 and id of directory in
+ * op_data->op_fid2
+ */
+static int lmv_link(struct obd_export *exp, struct md_op_data *op_data,
+ struct ptlrpc_request **request)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ LASSERT(op_data->op_namelen != 0);
+
+ CDEBUG(D_INODE, "LINK "DFID":%*s to "DFID"\n",
+ PFID(&op_data->op_fid2), op_data->op_namelen,
+ op_data->op_name, PFID(&op_data->op_fid1));
+
+ op_data->op_fsuid = current_fsuid();
+ op_data->op_fsgid = current_fsgid();
+ op_data->op_cap = cfs_curproc_cap_pack();
+ tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ /*
+ * Cancel UPDATE lock on child (fid1).
+ */
+ op_data->op_flags |= MF_MDC_CANCEL_FID2;
+ rc = lmv_early_cancel(exp, op_data, tgt->ltd_idx, LCK_EX,
+ MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID1);
+ if (rc != 0)
+ RETURN(rc);
+
+ rc = md_link(tgt->ltd_exp, op_data, request);
+
+ RETURN(rc);
+}
+
+static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
+ const char *old, int oldlen, const char *new, int newlen,
+ struct ptlrpc_request **request)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *src_tgt;
+ struct lmv_tgt_desc *tgt_tgt;
+ int rc;
+ ENTRY;
+
+ LASSERT(oldlen != 0);
+
+ CDEBUG(D_INODE, "RENAME %*s in "DFID" to %*s in "DFID"\n",
+ oldlen, old, PFID(&op_data->op_fid1),
+ newlen, new, PFID(&op_data->op_fid2));
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ op_data->op_fsuid = current_fsuid();
+ op_data->op_fsgid = current_fsgid();
+ op_data->op_cap = cfs_curproc_cap_pack();
+ src_tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+ if (IS_ERR(src_tgt))
+ RETURN(PTR_ERR(src_tgt));
+
+ tgt_tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2);
+ if (IS_ERR(tgt_tgt))
+ RETURN(PTR_ERR(tgt_tgt));
+ /*
+ * LOOKUP lock on src child (fid3) should also be cancelled for
+ * src_tgt in mdc_rename.
+ */
+ op_data->op_flags |= MF_MDC_CANCEL_FID1 | MF_MDC_CANCEL_FID3;
+
+ /*
+ * Cancel UPDATE locks on tgt parent (fid2), tgt_tgt is its
+ * own target.
+ */
+ rc = lmv_early_cancel(exp, op_data, src_tgt->ltd_idx,
+ LCK_EX, MDS_INODELOCK_UPDATE,
+ MF_MDC_CANCEL_FID2);
+
+ /*
+ * Cancel LOOKUP locks on tgt child (fid4) for parent tgt_tgt.
+ */
+ if (rc == 0) {
+ rc = lmv_early_cancel(exp, op_data, src_tgt->ltd_idx,
+ LCK_EX, MDS_INODELOCK_LOOKUP,
+ MF_MDC_CANCEL_FID4);
+ }
+
+ /*
+ * Cancel all the locks on tgt child (fid4).
+ */
+ if (rc == 0)
+ rc = lmv_early_cancel(exp, op_data, src_tgt->ltd_idx,
+ LCK_EX, MDS_INODELOCK_FULL,
+ MF_MDC_CANCEL_FID4);
+
+ if (rc == 0)
+ rc = md_rename(src_tgt->ltd_exp, op_data, old, oldlen,
+ new, newlen, request);
+ RETURN(rc);
+}
+
+static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
+ void *ea, int ealen, void *ea2, int ea2len,
+ struct ptlrpc_request **request,
+ struct md_open_data **mod)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc = 0;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ CDEBUG(D_INODE, "SETATTR for "DFID", valid 0x%x\n",
+ PFID(&op_data->op_fid1), op_data->op_attr.ia_valid);
+
+ op_data->op_flags |= MF_MDC_CANCEL_FID1;
+ tgt = lmv_find_target(lmv, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ rc = md_setattr(tgt->ltd_exp, op_data, ea, ealen, ea2,
+ ea2len, request, mod);
+
+ RETURN(rc);
+}
+
+static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, struct ptlrpc_request **request)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ tgt = lmv_find_target(lmv, fid);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ rc = md_sync(tgt->ltd_exp, fid, oc, request);
+ RETURN(rc);
+}
+
+/*
+ * Adjust a set of pages, each page containing an array of lu_dirpages,
+ * so that each page can be used as a single logical lu_dirpage.
+ *
+ * A lu_dirpage is laid out as follows, where s = ldp_hash_start,
+ * e = ldp_hash_end, f = ldp_flags, p = padding, and each "ent" is a
+ * struct lu_dirent. It has size up to LU_PAGE_SIZE. The ldp_hash_end
+ * value is used as a cookie to request the next lu_dirpage in a
+ * directory listing that spans multiple pages (two in this example):
+ * ________
+ * | |
+ * .|--------v------- -----.
+ * |s|e|f|p|ent|ent| ... |ent|
+ * '--|-------------- -----' Each CFS_PAGE contains a single
+ * '------. lu_dirpage.
+ * .---------v------- -----.
+ * |s|e|f|p|ent| 0 | ... | 0 |
+ * '----------------- -----'
+ *
+ * However, on hosts where the native VM page size (PAGE_CACHE_SIZE) is
+ * larger than LU_PAGE_SIZE, a single host page may contain multiple
+ * lu_dirpages. After reading the lu_dirpages from the MDS, the
+ * ldp_hash_end of the first lu_dirpage refers to the one immediately
+ * after it in the same CFS_PAGE (arrows simplified for brevity, but
+ * in general e0==s1, e1==s2, etc.):
+ *
+ * .-------------------- -----.
+ * |s0|e0|f0|p|ent|ent| ... |ent|
+ * |---v---------------- -----|
+ * |s1|e1|f1|p|ent|ent| ... |ent|
+ * |---v---------------- -----| Here, each CFS_PAGE contains
+ * ... multiple lu_dirpages.
+ * |---v---------------- -----|
+ * |s'|e'|f'|p|ent|ent| ... |ent|
+ * '---|---------------- -----'
+ * v
+ * .----------------------------.
+ * | next CFS_PAGE |
+ *
+ * This structure is transformed into a single logical lu_dirpage as follows:
+ *
+ * - Replace e0 with e' so the request for the next lu_dirpage gets the page
+ * labeled 'next CFS_PAGE'.
+ *
+ * - Copy the LDF_COLLIDE flag from f' to f0 to correctly reflect whether
+ * a hash collision with the next page exists.
+ *
+ * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span
+ * to the first entry of the next lu_dirpage.
+ */
+#if PAGE_CACHE_SIZE > LU_PAGE_SIZE
+static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs)
+{
+ int i;
+
+ for (i = 0; i < ncfspgs; i++) {
+ struct lu_dirpage *dp = kmap(pages[i]);
+ struct lu_dirpage *first = dp;
+ struct lu_dirent *end_dirent = NULL;
+ struct lu_dirent *ent;
+ __u64 hash_end = dp->ldp_hash_end;
+ __u32 flags = dp->ldp_flags;
+
+ for (; nlupgs > 1; nlupgs--) {
+ ent = lu_dirent_start(dp);
+ for (end_dirent = ent; ent != NULL;
+ end_dirent = ent, ent = lu_dirent_next(ent));
+
+ /* Advance dp to next lu_dirpage. */
+ dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE);
+
+ /* Check if we've reached the end of the CFS_PAGE. */
+ if (!((unsigned long)dp & ~CFS_PAGE_MASK))
+ break;
+
+ /* Save the hash and flags of this lu_dirpage. */
+ hash_end = dp->ldp_hash_end;
+ flags = dp->ldp_flags;
+
+ /* Check if lu_dirpage contains no entries. */
+ if (!end_dirent)
+ break;
+
+ /* Enlarge the end entry lde_reclen from 0 to
+ * first entry of next lu_dirpage. */
+ LASSERT(le16_to_cpu(end_dirent->lde_reclen) == 0);
+ end_dirent->lde_reclen =
+ cpu_to_le16((char *)(dp->ldp_entries) -
+ (char *)end_dirent);
+ }
+
+ first->ldp_hash_end = hash_end;
+ first->ldp_flags &= ~cpu_to_le32(LDF_COLLIDE);
+ first->ldp_flags |= flags & cpu_to_le32(LDF_COLLIDE);
+
+ kunmap(pages[i]);
+ }
+}
+#else
+#define lmv_adjust_dirpages(pages, ncfspgs, nlupgs) do {} while (0)
+#endif /* PAGE_CACHE_SIZE > LU_PAGE_SIZE */
+
+static int lmv_readpage(struct obd_export *exp, struct md_op_data *op_data,
+ struct page **pages, struct ptlrpc_request **request)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ __u64 offset = op_data->op_offset;
+ int rc;
+ int ncfspgs; /* pages read in PAGE_CACHE_SIZE */
+ int nlupgs; /* pages read in LU_PAGE_SIZE */
+ struct lmv_tgt_desc *tgt;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ CDEBUG(D_INODE, "READPAGE at "LPX64" from "DFID"\n",
+ offset, PFID(&op_data->op_fid1));
+
+ tgt = lmv_find_target(lmv, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ rc = md_readpage(tgt->ltd_exp, op_data, pages, request);
+ if (rc != 0)
+ RETURN(rc);
+
+ ncfspgs = ((*request)->rq_bulk->bd_nob_transferred + PAGE_CACHE_SIZE - 1)
+ >> PAGE_CACHE_SHIFT;
+ nlupgs = (*request)->rq_bulk->bd_nob_transferred >> LU_PAGE_SHIFT;
+ LASSERT(!((*request)->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK));
+ LASSERT(ncfspgs > 0 && ncfspgs <= op_data->op_npages);
+
+ CDEBUG(D_INODE, "read %d(%d)/%d pages\n", ncfspgs, nlupgs,
+ op_data->op_npages);
+
+ lmv_adjust_dirpages(pages, ncfspgs, nlupgs);
+
+ RETURN(rc);
+}
+
+static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
+ struct ptlrpc_request **request)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt = NULL;
+ struct mdt_body *body;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+retry:
+ /* Send unlink requests to the MDT where the child is located */
+ if (likely(!fid_is_zero(&op_data->op_fid2)))
+ tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2);
+ else
+ tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ op_data->op_fsuid = current_fsuid();
+ op_data->op_fsgid = current_fsgid();
+ op_data->op_cap = cfs_curproc_cap_pack();
+
+ /*
+ * If child's fid is given, cancel unused locks for it if it is from
+ * another export than parent.
+ *
+ * LOOKUP lock for child (fid3) should also be cancelled on parent
+ * tgt_tgt in mdc_unlink().
+ */
+ op_data->op_flags |= MF_MDC_CANCEL_FID1 | MF_MDC_CANCEL_FID3;
+
+ /*
+ * Cancel FULL locks on child (fid3).
+ */
+ rc = lmv_early_cancel(exp, op_data, tgt->ltd_idx, LCK_EX,
+ MDS_INODELOCK_FULL, MF_MDC_CANCEL_FID3);
+
+ if (rc != 0)
+ RETURN(rc);
+
+ CDEBUG(D_INODE, "unlink with fid="DFID"/"DFID" -> mds #%d\n",
+ PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), tgt->ltd_idx);
+
+ rc = md_unlink(tgt->ltd_exp, op_data, request);
+ if (rc != 0 && rc != -EREMOTE)
+ RETURN(rc);
+
+ body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
+ if (body == NULL)
+ RETURN(-EPROTO);
+
+ /* Not cross-ref case, just get out of here. */
+ if (likely(!(body->valid & OBD_MD_MDS)))
+ RETURN(0);
+
+ CDEBUG(D_INODE, "%s: try unlink to another MDT for "DFID"\n",
+ exp->exp_obd->obd_name, PFID(&body->fid1));
+
+ /* This is a remote object, try remote MDT, Note: it may
+ * try more than 1 time here, Considering following case
+ * /mnt/lustre is root on MDT0, remote1 is on MDT1
+ * 1. Initially A does not know where remote1 is, it send
+ * unlink RPC to MDT0, MDT0 return -EREMOTE, it will
+ * resend unlink RPC to MDT1 (retry 1st time).
+ *
+ * 2. During the unlink RPC in flight,
+ * client B mv /mnt/lustre/remote1 /mnt/lustre/remote2
+ * and create new remote1, but on MDT0
+ *
+ * 3. MDT1 get unlink RPC(from A), then do remote lock on
+ * /mnt/lustre, then lookup get fid of remote1, and find
+ * it is remote dir again, and replay -EREMOTE again.
+ *
+ * 4. Then A will resend unlink RPC to MDT0. (retry 2nd times).
+ *
+ * In theory, it might try unlimited time here, but it should
+ * be very rare case. */
+ op_data->op_fid2 = body->fid1;
+ ptlrpc_req_finished(*request);
+ *request = NULL;
+
+ goto retry;
+}
+
+static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+{
+ struct lmv_obd *lmv = &obd->u.lmv;
+ int rc = 0;
+
+ switch (stage) {
+ case OBD_CLEANUP_EARLY:
+ /* XXX: here should be calling obd_precleanup() down to
+ * stack. */
+ break;
+ case OBD_CLEANUP_EXPORTS:
+ fld_client_proc_fini(&lmv->lmv_fld);
+ lprocfs_obd_cleanup(obd);
+ break;
+ default:
+ break;
+ }
+ RETURN(rc);
+}
+
+static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
+ __u32 keylen, void *key, __u32 *vallen, void *val,
+ struct lov_stripe_md *lsm)
+{
+ struct obd_device *obd;
+ struct lmv_obd *lmv;
+ int rc = 0;
+ ENTRY;
+
+ obd = class_exp2obd(exp);
+ if (obd == NULL) {
+ CDEBUG(D_IOCTL, "Invalid client cookie "LPX64"\n",
+ exp->exp_handle.h_cookie);
+ RETURN(-EINVAL);
+ }
+
+ lmv = &obd->u.lmv;
+ if (keylen >= strlen("remote_flag") && !strcmp(key, "remote_flag")) {
+ struct lmv_tgt_desc *tgt;
+ int i;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ LASSERT(*vallen == sizeof(__u32));
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ tgt = lmv->tgts[i];
+ /*
+ * All tgts should be connected when this gets called.
+ */
+ if (tgt == NULL || tgt->ltd_exp == NULL)
+ continue;
+
+ if (!obd_get_info(env, tgt->ltd_exp, keylen, key,
+ vallen, val, NULL))
+ RETURN(0);
+ }
+ RETURN(-EINVAL);
+ } else if (KEY_IS(KEY_MAX_EASIZE) || KEY_IS(KEY_CONN_DATA)) {
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ /*
+ * Forwarding this request to first MDS, it should know LOV
+ * desc.
+ */
+ rc = obd_get_info(env, lmv->tgts[0]->ltd_exp, keylen, key,
+ vallen, val, NULL);
+ if (!rc && KEY_IS(KEY_CONN_DATA))
+ exp->exp_connect_data = *(struct obd_connect_data *)val;
+ RETURN(rc);
+ } else if (KEY_IS(KEY_TGT_COUNT)) {
+ *((int *)val) = lmv->desc.ld_tgt_count;
+ RETURN(0);
+ }
+
+ CDEBUG(D_IOCTL, "Invalid key\n");
+ RETURN(-EINVAL);
+}
+
+int lmv_set_info_async(const struct lu_env *env, struct obd_export *exp,
+ obd_count keylen, void *key, obd_count vallen,
+ void *val, struct ptlrpc_request_set *set)
+{
+ struct lmv_tgt_desc *tgt;
+ struct obd_device *obd;
+ struct lmv_obd *lmv;
+ int rc = 0;
+ ENTRY;
+
+ obd = class_exp2obd(exp);
+ if (obd == NULL) {
+ CDEBUG(D_IOCTL, "Invalid client cookie "LPX64"\n",
+ exp->exp_handle.h_cookie);
+ RETURN(-EINVAL);
+ }
+ lmv = &obd->u.lmv;
+
+ if (KEY_IS(KEY_READ_ONLY) || KEY_IS(KEY_FLUSH_CTX)) {
+ int i, err = 0;
+
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ tgt = lmv->tgts[i];
+
+ if (tgt == NULL || tgt->ltd_exp == NULL)
+ continue;
+
+ err = obd_set_info_async(env, tgt->ltd_exp,
+ keylen, key, vallen, val, set);
+ if (err && rc == 0)
+ rc = err;
+ }
+
+ RETURN(rc);
+ }
+
+ RETURN(-EINVAL);
+}
+
+int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
+ struct lov_stripe_md *lsm)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_stripe_md *meap;
+ struct lmv_stripe_md *lsmp;
+ int mea_size;
+ int i;
+ ENTRY;
+
+ mea_size = lmv_get_easize(lmv);
+ if (!lmmp)
+ RETURN(mea_size);
+
+ if (*lmmp && !lsm) {
+ OBD_FREE_LARGE(*lmmp, mea_size);
+ *lmmp = NULL;
+ RETURN(0);
+ }
+
+ if (*lmmp == NULL) {
+ OBD_ALLOC_LARGE(*lmmp, mea_size);
+ if (*lmmp == NULL)
+ RETURN(-ENOMEM);
+ }
+
+ if (!lsm)
+ RETURN(mea_size);
+
+ lsmp = (struct lmv_stripe_md *)lsm;
+ meap = (struct lmv_stripe_md *)*lmmp;
+
+ if (lsmp->mea_magic != MEA_MAGIC_LAST_CHAR &&
+ lsmp->mea_magic != MEA_MAGIC_ALL_CHARS)
+ RETURN(-EINVAL);
+
+ meap->mea_magic = cpu_to_le32(lsmp->mea_magic);
+ meap->mea_count = cpu_to_le32(lsmp->mea_count);
+ meap->mea_master = cpu_to_le32(lsmp->mea_master);
+
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ meap->mea_ids[i] = lsmp->mea_ids[i];
+ fid_cpu_to_le(&meap->mea_ids[i], &lsmp->mea_ids[i]);
+ }
+
+ RETURN(mea_size);
+}
+
+int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
+ struct lov_mds_md *lmm, int lmm_size)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct lmv_stripe_md **tmea = (struct lmv_stripe_md **)lsmp;
+ struct lmv_stripe_md *mea = (struct lmv_stripe_md *)lmm;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ int mea_size;
+ int i;
+ __u32 magic;
+ ENTRY;
+
+ mea_size = lmv_get_easize(lmv);
+ if (lsmp == NULL)
+ return mea_size;
+
+ if (*lsmp != NULL && lmm == NULL) {
+ OBD_FREE_LARGE(*tmea, mea_size);
+ *lsmp = NULL;
+ RETURN(0);
+ }
+
+ LASSERT(mea_size == lmm_size);
+
+ OBD_ALLOC_LARGE(*tmea, mea_size);
+ if (*tmea == NULL)
+ RETURN(-ENOMEM);
+
+ if (!lmm)
+ RETURN(mea_size);
+
+ if (mea->mea_magic == MEA_MAGIC_LAST_CHAR ||
+ mea->mea_magic == MEA_MAGIC_ALL_CHARS ||
+ mea->mea_magic == MEA_MAGIC_HASH_SEGMENT)
+ {
+ magic = le32_to_cpu(mea->mea_magic);
+ } else {
+ /*
+ * Old mea is not handled here.
+ */
+ CERROR("Old not supportable EA is found\n");
+ LBUG();
+ }
+
+ (*tmea)->mea_magic = magic;
+ (*tmea)->mea_count = le32_to_cpu(mea->mea_count);
+ (*tmea)->mea_master = le32_to_cpu(mea->mea_master);
+
+ for (i = 0; i < (*tmea)->mea_count; i++) {
+ (*tmea)->mea_ids[i] = mea->mea_ids[i];
+ fid_le_to_cpu(&(*tmea)->mea_ids[i], &(*tmea)->mea_ids[i]);
+ }
+ RETURN(mea_size);
+}
+
+static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
+ ldlm_policy_data_t *policy, ldlm_mode_t mode,
+ ldlm_cancel_flags_t flags, void *opaque)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ int rc = 0;
+ int err;
+ int i;
+ ENTRY;
+
+ LASSERT(fid != NULL);
+
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL ||
+ lmv->tgts[i]->ltd_active == 0)
+ continue;
+
+ err = md_cancel_unused(lmv->tgts[i]->ltd_exp, fid,
+ policy, mode, flags, opaque);
+ if (!rc)
+ rc = err;
+ }
+ RETURN(rc);
+}
+
+int lmv_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
+ __u64 *bits)
+{
+ struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
+ int rc;
+ ENTRY;
+
+ rc = md_set_lock_data(lmv->tgts[0]->ltd_exp, lockh, data, bits);
+ RETURN(rc);
+}
+
+ldlm_mode_t lmv_lock_match(struct obd_export *exp, __u64 flags,
+ const struct lu_fid *fid, ldlm_type_t type,
+ ldlm_policy_data_t *policy, ldlm_mode_t mode,
+ struct lustre_handle *lockh)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ ldlm_mode_t rc;
+ int i;
+ ENTRY;
+
+ CDEBUG(D_INODE, "Lock match for "DFID"\n", PFID(fid));
+
+ /*
+ * With CMD every object can have two locks in different namespaces:
+ * lookup lock in space of mds storing direntry and update/open lock in
+ * space of mds storing inode. Thus we check all targets, not only that
+ * one fid was created in.
+ */
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ if (lmv->tgts[i] == NULL ||
+ lmv->tgts[i]->ltd_exp == NULL ||
+ lmv->tgts[i]->ltd_active == 0)
+ continue;
+
+ rc = md_lock_match(lmv->tgts[i]->ltd_exp, flags, fid,
+ type, policy, mode, lockh);
+ if (rc)
+ RETURN(rc);
+ }
+
+ RETURN(0);
+}
+
+int lmv_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req,
+ struct obd_export *dt_exp, struct obd_export *md_exp,
+ struct lustre_md *md)
+{
+ struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
+
+ return md_get_lustre_md(lmv->tgts[0]->ltd_exp, req, dt_exp, md_exp, md);
+}
+
+int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ ENTRY;
+
+ if (md->mea)
+ obd_free_memmd(exp, (void *)&md->mea);
+ RETURN(md_free_lustre_md(lmv->tgts[0]->ltd_exp, md));
+}
+
+int lmv_set_open_replay_data(struct obd_export *exp,
+ struct obd_client_handle *och,
+ struct ptlrpc_request *open_req)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ ENTRY;
+
+ tgt = lmv_find_target(lmv, &och->och_fid);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ RETURN(md_set_open_replay_data(tgt->ltd_exp, och, open_req));
+}
+
+int lmv_clear_open_replay_data(struct obd_export *exp,
+ struct obd_client_handle *och)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ ENTRY;
+
+ tgt = lmv_find_target(lmv, &och->och_fid);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ RETURN(md_clear_open_replay_data(tgt->ltd_exp, och));
+}
+
+static int lmv_get_remote_perm(struct obd_export *exp,
+ const struct lu_fid *fid,
+ struct obd_capa *oc, __u32 suppgid,
+ struct ptlrpc_request **request)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ tgt = lmv_find_target(lmv, fid);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ rc = md_get_remote_perm(tgt->ltd_exp, fid, oc, suppgid, request);
+ RETURN(rc);
+}
+
+static int lmv_renew_capa(struct obd_export *exp, struct obd_capa *oc,
+ renew_capa_cb_t cb)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ tgt = lmv_find_target(lmv, &oc->c_capa.lc_fid);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ rc = md_renew_capa(tgt->ltd_exp, oc, cb);
+ RETURN(rc);
+}
+
+int lmv_unpack_capa(struct obd_export *exp, struct ptlrpc_request *req,
+ const struct req_msg_field *field, struct obd_capa **oc)
+{
+ struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
+
+ return md_unpack_capa(lmv->tgts[0]->ltd_exp, req, field, oc);
+}
+
+int lmv_intent_getattr_async(struct obd_export *exp,
+ struct md_enqueue_info *minfo,
+ struct ldlm_enqueue_info *einfo)
+{
+ struct md_op_data *op_data = &minfo->mi_data;
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt = NULL;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ tgt = lmv_find_target(lmv, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ rc = md_intent_getattr_async(tgt->ltd_exp, minfo, einfo);
+ RETURN(rc);
+}
+
+int lmv_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
+ struct lu_fid *fid, __u64 *bits)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ tgt = lmv_find_target(lmv, fid);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ rc = md_revalidate_lock(tgt->ltd_exp, it, fid, bits);
+ RETURN(rc);
+}
+
+/**
+ * For lmv, only need to send request to master MDT, and the master MDT will
+ * process with other slave MDTs. The only exception is Q_GETOQUOTA for which
+ * we directly fetch data from the slave MDTs.
+ */
+int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
+ struct obd_quotactl *oqctl)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt = lmv->tgts[0];
+ int rc = 0, i;
+ __u64 curspace, curinodes;
+ ENTRY;
+
+ if (!lmv->desc.ld_tgt_count || !tgt->ltd_active) {
+ CERROR("master lmv inactive\n");
+ RETURN(-EIO);
+ }
+
+ if (oqctl->qc_cmd != Q_GETOQUOTA) {
+ rc = obd_quotactl(tgt->ltd_exp, oqctl);
+ RETURN(rc);
+ }
+
+ curspace = curinodes = 0;
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ int err;
+ tgt = lmv->tgts[i];
+
+ if (tgt == NULL || tgt->ltd_exp == NULL || tgt->ltd_active == 0)
+ continue;
+ if (!tgt->ltd_active) {
+ CDEBUG(D_HA, "mdt %d is inactive.\n", i);
+ continue;
+ }
+
+ err = obd_quotactl(tgt->ltd_exp, oqctl);
+ if (err) {
+ CERROR("getquota on mdt %d failed. %d\n", i, err);
+ if (!rc)
+ rc = err;
+ } else {
+ curspace += oqctl->qc_dqblk.dqb_curspace;
+ curinodes += oqctl->qc_dqblk.dqb_curinodes;
+ }
+ }
+ oqctl->qc_dqblk.dqb_curspace = curspace;
+ oqctl->qc_dqblk.dqb_curinodes = curinodes;
+
+ RETURN(rc);
+}
+
+int lmv_quotacheck(struct obd_device *unused, struct obd_export *exp,
+ struct obd_quotactl *oqctl)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int i, rc = 0;
+ ENTRY;
+
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ int err;
+ tgt = lmv->tgts[i];
+ if (tgt == NULL || tgt->ltd_exp == NULL || !tgt->ltd_active) {
+ CERROR("lmv idx %d inactive\n", i);
+ RETURN(-EIO);
+ }
+
+ err = obd_quotacheck(tgt->ltd_exp, oqctl);
+ if (err && !rc)
+ rc = err;
+ }
+
+ RETURN(rc);
+}
+
+struct obd_ops lmv_obd_ops = {
+ .o_owner = THIS_MODULE,
+ .o_setup = lmv_setup,
+ .o_cleanup = lmv_cleanup,
+ .o_precleanup = lmv_precleanup,
+ .o_process_config = lmv_process_config,
+ .o_connect = lmv_connect,
+ .o_disconnect = lmv_disconnect,
+ .o_statfs = lmv_statfs,
+ .o_get_info = lmv_get_info,
+ .o_set_info_async = lmv_set_info_async,
+ .o_packmd = lmv_packmd,
+ .o_unpackmd = lmv_unpackmd,
+ .o_notify = lmv_notify,
+ .o_get_uuid = lmv_get_uuid,
+ .o_iocontrol = lmv_iocontrol,
+ .o_quotacheck = lmv_quotacheck,
+ .o_quotactl = lmv_quotactl
+};
+
+struct md_ops lmv_md_ops = {
+ .m_getstatus = lmv_getstatus,
+ .m_null_inode = lmv_null_inode,
+ .m_find_cbdata = lmv_find_cbdata,
+ .m_close = lmv_close,
+ .m_create = lmv_create,
+ .m_done_writing = lmv_done_writing,
+ .m_enqueue = lmv_enqueue,
+ .m_getattr = lmv_getattr,
+ .m_getxattr = lmv_getxattr,
+ .m_getattr_name = lmv_getattr_name,
+ .m_intent_lock = lmv_intent_lock,
+ .m_link = lmv_link,
+ .m_rename = lmv_rename,
+ .m_setattr = lmv_setattr,
+ .m_setxattr = lmv_setxattr,
+ .m_sync = lmv_sync,
+ .m_readpage = lmv_readpage,
+ .m_unlink = lmv_unlink,
+ .m_init_ea_size = lmv_init_ea_size,
+ .m_cancel_unused = lmv_cancel_unused,
+ .m_set_lock_data = lmv_set_lock_data,
+ .m_lock_match = lmv_lock_match,
+ .m_get_lustre_md = lmv_get_lustre_md,
+ .m_free_lustre_md = lmv_free_lustre_md,
+ .m_set_open_replay_data = lmv_set_open_replay_data,
+ .m_clear_open_replay_data = lmv_clear_open_replay_data,
+ .m_renew_capa = lmv_renew_capa,
+ .m_unpack_capa = lmv_unpack_capa,
+ .m_get_remote_perm = lmv_get_remote_perm,
+ .m_intent_getattr_async = lmv_intent_getattr_async,
+ .m_revalidate_lock = lmv_revalidate_lock
+};
+
+int __init lmv_init(void)
+{
+ struct lprocfs_static_vars lvars;
+ int rc;
+
+ lprocfs_lmv_init_vars(&lvars);
+
+ rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
+ lvars.module_vars, LUSTRE_LMV_NAME, NULL);
+ return rc;
+}
+
+static void lmv_exit(void)
+{
+ class_unregister_type(LUSTRE_LMV_NAME);
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Logical Metadata Volume OBD driver");
+MODULE_LICENSE("GPL");
+
+module_init(lmv_init);
+module_exit(lmv_exit);
diff --git a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
new file mode 100644
index 000000000000..d1c45b583cbb
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
@@ -0,0 +1,235 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/version.h>
+#include <linux/seq_file.h>
+#include <asm/statfs.h>
+#include <lprocfs_status.h>
+#include <obd_class.h>
+
+#ifndef LPROCFS
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+#else
+static int lmv_numobd_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = (struct obd_device *)m->private;
+ struct lmv_desc *desc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lmv.desc;
+ return seq_printf(m, "%u\n", desc->ld_tgt_count);
+}
+LPROC_SEQ_FOPS_RO(lmv_numobd);
+
+static const char *placement_name[] = {
+ [PLACEMENT_CHAR_POLICY] = "CHAR",
+ [PLACEMENT_NID_POLICY] = "NID",
+ [PLACEMENT_INVAL_POLICY] = "INVAL"
+};
+
+static placement_policy_t placement_name2policy(char *name, int len)
+{
+ int i;
+
+ for (i = 0; i < PLACEMENT_MAX_POLICY; i++) {
+ if (!strncmp(placement_name[i], name, len))
+ return i;
+ }
+ return PLACEMENT_INVAL_POLICY;
+}
+
+static const char *placement_policy2name(placement_policy_t placement)
+{
+ LASSERT(placement < PLACEMENT_MAX_POLICY);
+ return placement_name[placement];
+}
+
+static int lmv_placement_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = (struct obd_device *)m->private;
+ struct lmv_obd *lmv;
+
+ LASSERT(dev != NULL);
+ lmv = &dev->u.lmv;
+ return seq_printf(m, "%s\n", placement_policy2name(lmv->lmv_placement));
+}
+
+#define MAX_POLICY_STRING_SIZE 64
+
+static ssize_t lmv_placement_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
+ char dummy[MAX_POLICY_STRING_SIZE + 1];
+ int len = count;
+ placement_policy_t policy;
+ struct lmv_obd *lmv;
+
+ if (copy_from_user(dummy, buffer, MAX_POLICY_STRING_SIZE))
+ return -EFAULT;
+
+ LASSERT(dev != NULL);
+ lmv = &dev->u.lmv;
+
+ if (len > MAX_POLICY_STRING_SIZE)
+ len = MAX_POLICY_STRING_SIZE;
+
+ if (dummy[len - 1] == '\n')
+ len--;
+ dummy[len] = '\0';
+
+ policy = placement_name2policy(dummy, len);
+ if (policy != PLACEMENT_INVAL_POLICY) {
+ spin_lock(&lmv->lmv_lock);
+ lmv->lmv_placement = policy;
+ spin_unlock(&lmv->lmv_lock);
+ } else {
+ CERROR("Invalid placement policy \"%s\"!\n", dummy);
+ return -EINVAL;
+ }
+ return count;
+}
+LPROC_SEQ_FOPS(lmv_placement);
+
+static int lmv_activeobd_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = (struct obd_device *)m->private;
+ struct lmv_desc *desc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lmv.desc;
+ return seq_printf(m, "%u\n", desc->ld_active_tgt_count);
+}
+LPROC_SEQ_FOPS_RO(lmv_activeobd);
+
+static int lmv_desc_uuid_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = (struct obd_device *)m->private;
+ struct lmv_obd *lmv;
+
+ LASSERT(dev != NULL);
+ lmv = &dev->u.lmv;
+ return seq_printf(m, "%s\n", lmv->desc.ld_uuid.uuid);
+}
+LPROC_SEQ_FOPS_RO(lmv_desc_uuid);
+
+static void *lmv_tgt_seq_start(struct seq_file *p, loff_t *pos)
+{
+ struct obd_device *dev = p->private;
+ struct lmv_obd *lmv = &dev->u.lmv;
+ return (*pos >= lmv->desc.ld_tgt_count) ? NULL : lmv->tgts[*pos];
+}
+
+static void lmv_tgt_seq_stop(struct seq_file *p, void *v)
+{
+ return;
+}
+
+static void *lmv_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
+{
+ struct obd_device *dev = p->private;
+ struct lmv_obd *lmv = &dev->u.lmv;
+ ++*pos;
+ return (*pos >= lmv->desc.ld_tgt_count) ? NULL : lmv->tgts[*pos];
+}
+
+static int lmv_tgt_seq_show(struct seq_file *p, void *v)
+{
+ struct lmv_tgt_desc *tgt = v;
+
+ if (tgt == NULL)
+ return 0;
+ return seq_printf(p, "%d: %s %sACTIVE\n", tgt->ltd_idx,
+ tgt->ltd_uuid.uuid, tgt->ltd_active ? "" : "IN");
+}
+
+struct seq_operations lmv_tgt_sops = {
+ .start = lmv_tgt_seq_start,
+ .stop = lmv_tgt_seq_stop,
+ .next = lmv_tgt_seq_next,
+ .show = lmv_tgt_seq_show,
+};
+
+static int lmv_target_seq_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int rc;
+
+ rc = seq_open(file, &lmv_tgt_sops);
+ if (rc)
+ return rc;
+
+ seq = file->private_data;
+ seq->private = PDE_DATA(inode);
+
+ return 0;
+}
+
+LPROC_SEQ_FOPS_RO_TYPE(lmv, uuid);
+
+struct lprocfs_vars lprocfs_lmv_obd_vars[] = {
+ { "numobd", &lmv_numobd_fops, 0, 0 },
+ { "placement", &lmv_placement_fops, 0, 0 },
+ { "activeobd", &lmv_activeobd_fops, 0, 0 },
+ { "uuid", &lmv_uuid_fops, 0, 0 },
+ { "desc_uuid", &lmv_desc_uuid_fops, 0, 0 },
+ { 0 }
+};
+
+LPROC_SEQ_FOPS_RO_TYPE(lmv, numrefs);
+
+static struct lprocfs_vars lprocfs_lmv_module_vars[] = {
+ { "num_refs", &lmv_numrefs_fops, 0, 0 },
+ { 0 }
+};
+
+struct file_operations lmv_proc_target_fops = {
+ .owner = THIS_MODULE,
+ .open = lmv_target_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+#endif /* LPROCFS */
+void lprocfs_lmv_init_vars(struct lprocfs_static_vars *lvars)
+{
+ lvars->module_vars = lprocfs_lmv_module_vars;
+ lvars->obd_vars = lprocfs_lmv_obd_vars;
+}
diff --git a/drivers/staging/lustre/lustre/lov/Makefile b/drivers/staging/lustre/lustre/lov/Makefile
new file mode 100644
index 000000000000..67eaec29bef1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/Makefile
@@ -0,0 +1,9 @@
+obj-$(CONFIG_LUSTRE_FS) += lov.o
+lov-y := lov_log.o lov_obd.o lov_pack.o lproc_lov.o lov_offset.o lov_merge.o \
+ lov_request.o lov_ea.o lov_dev.o lov_object.o lov_page.o \
+ lov_lock.o lov_io.o lovsub_dev.o lovsub_object.o lovsub_page.o \
+ lovsub_lock.o lovsub_io.o lov_pool.o
+
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
new file mode 100644
index 000000000000..28801b8b5fdf
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
@@ -0,0 +1,820 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Internal interfaces of LOV layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
+ */
+
+#ifndef LOV_CL_INTERNAL_H
+#define LOV_CL_INTERNAL_H
+
+# include <linux/libcfs/libcfs.h>
+
+#include <obd.h>
+#include <cl_object.h>
+#include "lov_internal.h"
+
+/** \defgroup lov lov
+ * Logical object volume layer. This layer implements data striping (raid0).
+ *
+ * At the lov layer top-entity (object, page, lock, io) is connected to one or
+ * more sub-entities: top-object, representing a file is connected to a set of
+ * sub-objects, each representing a stripe, file-level top-lock is connected
+ * to a set of per-stripe sub-locks, top-page is connected to a (single)
+ * sub-page, and a top-level IO is connected to a set of (potentially
+ * concurrent) sub-IO's.
+ *
+ * Sub-object, sub-page, and sub-io have well-defined top-object and top-page
+ * respectively, while a single sub-lock can be part of multiple top-locks.
+ *
+ * Reference counting models are different for different types of entities:
+ *
+ * - top-object keeps a reference to its sub-objects, and destroys them
+ * when it is destroyed.
+ *
+ * - top-page keeps a reference to its sub-page, and destroys it when it
+ * is destroyed.
+ *
+ * - sub-lock keep a reference to its top-locks. Top-lock keeps a
+ * reference (and a hold, see cl_lock_hold()) on its sub-locks when it
+ * actively using them (that is, in cl_lock_state::CLS_QUEUING,
+ * cl_lock_state::CLS_ENQUEUED, cl_lock_state::CLS_HELD states). When
+ * moving into cl_lock_state::CLS_CACHED state, top-lock releases a
+ * hold. From this moment top-lock has only a 'weak' reference to its
+ * sub-locks. This reference is protected by top-lock
+ * cl_lock::cll_guard, and will be automatically cleared by the sub-lock
+ * when the latter is destroyed. When a sub-lock is canceled, a
+ * reference to it is removed from the top-lock array, and top-lock is
+ * moved into CLS_NEW state. It is guaranteed that all sub-locks exist
+ * while their top-lock is in CLS_HELD or CLS_CACHED states.
+ *
+ * - IO's are not reference counted.
+ *
+ * To implement a connection between top and sub entities, lov layer is split
+ * into two pieces: lov ("upper half"), and lovsub ("bottom half"), both
+ * implementing full set of cl-interfaces. For example, top-object has vvp and
+ * lov layers, and it's sub-object has lovsub and osc layers. lovsub layer is
+ * used to track child-parent relationship.
+ *
+ * @{
+ */
+
+struct lovsub_device;
+struct lovsub_object;
+struct lovsub_lock;
+
+enum lov_device_flags {
+ LOV_DEV_INITIALIZED = 1 << 0
+};
+
+/*
+ * Upper half.
+ */
+
+/**
+ * Resources that are used in memory-cleaning path, and whose allocation
+ * cannot fail even when memory is tight. They are preallocated in sufficient
+ * quantities in lov_device::ld_emerg[], and access to them is serialized
+ * lov_device::ld_mutex.
+ */
+struct lov_device_emerg {
+ /**
+ * Page list used to submit IO when memory is in pressure.
+ */
+ struct cl_page_list emrg_page_list;
+ /**
+ * sub-io's shared by all threads accessing this device when memory is
+ * too low to allocate sub-io's dynamically.
+ */
+ struct cl_io emrg_subio;
+ /**
+ * Environments used by sub-io's in
+ * lov_device_emerg::emrg_subio.
+ */
+ struct lu_env *emrg_env;
+ /**
+ * Refchecks for lov_device_emerg::emrg_env.
+ *
+ * \see cl_env_get()
+ */
+ int emrg_refcheck;
+};
+
+struct lov_device {
+ /*
+ * XXX Locking of lov-private data is missing.
+ */
+ struct cl_device ld_cl;
+ struct lov_obd *ld_lov;
+ /** size of lov_device::ld_target[] array */
+ __u32 ld_target_nr;
+ struct lovsub_device **ld_target;
+ __u32 ld_flags;
+
+ /** Emergency resources used in memory-cleansing paths. */
+ struct lov_device_emerg **ld_emrg;
+ /**
+ * Serializes access to lov_device::ld_emrg in low-memory
+ * conditions.
+ */
+ struct mutex ld_mutex;
+};
+
+/**
+ * Layout type.
+ */
+enum lov_layout_type {
+ /** empty file without body */
+ LLT_EMPTY,
+ /** striped file */
+ LLT_RAID0,
+ LLT_NR
+};
+
+/**
+ * lov-specific file state.
+ *
+ * lov object has particular layout type, determining how top-object is built
+ * on top of sub-objects. Layout type can change dynamically. When this
+ * happens, lov_object::lo_type_guard semaphore is taken in exclusive mode,
+ * all state pertaining to the old layout type is destroyed, and new state is
+ * constructed. All object methods take said semaphore in the shared mode,
+ * providing serialization against transition between layout types.
+ *
+ * To avoid multiple `if' or `switch' statements, selecting behavior for the
+ * current layout type, object methods perform double-dispatch, invoking
+ * function corresponding to the current layout type.
+ */
+struct lov_object {
+ struct cl_object lo_cl;
+ /**
+ * Serializes object operations with transitions between layout types.
+ *
+ * This semaphore is taken in shared mode by all object methods, and
+ * is taken in exclusive mode when object type is changed.
+ *
+ * \see lov_object::lo_type
+ */
+ struct rw_semaphore lo_type_guard;
+ /**
+ * Type of an object. Protected by lov_object::lo_type_guard.
+ */
+ enum lov_layout_type lo_type;
+ /**
+ * True if layout is invalid. This bit is cleared when layout lock
+ * is lost.
+ */
+ bool lo_layout_invalid;
+ /**
+ * How many IOs are on going on this object. Layout can be changed
+ * only if there is no active IO.
+ */
+ atomic_t lo_active_ios;
+ /**
+ * Waitq - wait for no one else is using lo_lsm
+ */
+ wait_queue_head_t lo_waitq;
+ /**
+ * Layout metadata. NULL if empty layout.
+ */
+ struct lov_stripe_md *lo_lsm;
+
+ union lov_layout_state {
+ struct lov_layout_raid0 {
+ unsigned lo_nr;
+ /**
+ * When this is true, lov_object::lo_attr contains
+ * valid up to date attributes for a top-level
+ * object. This field is reset to 0 when attributes of
+ * any sub-object change.
+ */
+ int lo_attr_valid;
+ /**
+ * Array of sub-objects. Allocated when top-object is
+ * created (lov_init_raid0()).
+ *
+ * Top-object is a strict master of its sub-objects:
+ * it is created before them, and outlives its
+ * children (this later is necessary so that basic
+ * functions like cl_object_top() always
+ * work). Top-object keeps a reference on every
+ * sub-object.
+ *
+ * When top-object is destroyed (lov_delete_raid0())
+ * it releases its reference to a sub-object and waits
+ * until the latter is finally destroyed.
+ */
+ struct lovsub_object **lo_sub;
+ /**
+ * protect lo_sub
+ */
+ spinlock_t lo_sub_lock;
+ /**
+ * Cached object attribute, built from sub-object
+ * attributes.
+ */
+ struct cl_attr lo_attr;
+ } raid0;
+ struct lov_layout_state_empty {
+ } empty;
+ } u;
+ /**
+ * Thread that acquired lov_object::lo_type_guard in an exclusive
+ * mode.
+ */
+ task_t *lo_owner;
+};
+
+/**
+ * Flags that top-lock can set on each of its sub-locks.
+ */
+enum lov_sub_flags {
+ /** Top-lock acquired a hold (cl_lock_hold()) on a sub-lock. */
+ LSF_HELD = 1 << 0
+};
+
+/**
+ * State lov_lock keeps for each sub-lock.
+ */
+struct lov_lock_sub {
+ /** sub-lock itself */
+ struct lovsub_lock *sub_lock;
+ /** An array of per-sub-lock flags, taken from enum lov_sub_flags */
+ unsigned sub_flags;
+ int sub_stripe;
+ struct cl_lock_descr sub_descr;
+ struct cl_lock_descr sub_got;
+};
+
+/**
+ * lov-specific lock state.
+ */
+struct lov_lock {
+ struct cl_lock_slice lls_cl;
+ /** Number of sub-locks in this lock */
+ int lls_nr;
+ /**
+ * Number of existing sub-locks.
+ */
+ unsigned lls_nr_filled;
+ /**
+ * Set when sub-lock was canceled, while top-lock was being
+ * used, or unused.
+ */
+ unsigned int lls_cancel_race:1;
+ /**
+ * An array of sub-locks
+ *
+ * There are two issues with managing sub-locks:
+ *
+ * - sub-locks are concurrently canceled, and
+ *
+ * - sub-locks are shared with other top-locks.
+ *
+ * To manage cancellation, top-lock acquires a hold on a sublock
+ * (lov_sublock_adopt()) when the latter is inserted into
+ * lov_lock::lls_sub[]. This hold is released (lov_sublock_release())
+ * when top-lock is going into CLS_CACHED state or destroyed. Hold
+ * prevents sub-lock from cancellation.
+ *
+ * Sub-lock sharing means, among other things, that top-lock that is
+ * in the process of creation (i.e., not yet inserted into lock list)
+ * is already accessible to other threads once at least one of its
+ * sub-locks is created, see lov_lock_sub_init().
+ *
+ * Sub-lock can be in one of the following states:
+ *
+ * - doesn't exist, lov_lock::lls_sub[]::sub_lock == NULL. Such
+ * sub-lock was either never created (top-lock is in CLS_NEW
+ * state), or it was created, then canceled, then destroyed
+ * (lov_lock_unlink() cleared sub-lock pointer in the top-lock).
+ *
+ * - sub-lock exists and is on
+ * hold. (lov_lock::lls_sub[]::sub_flags & LSF_HELD). This is a
+ * normal state of a sub-lock in CLS_HELD and CLS_CACHED states
+ * of a top-lock.
+ *
+ * - sub-lock exists, but is not held by the top-lock. This
+ * happens after top-lock released a hold on sub-locks before
+ * going into cache (lov_lock_unuse()).
+ *
+ * \todo To support wide-striping, array has to be replaced with a set
+ * of queues to avoid scanning.
+ */
+ struct lov_lock_sub *lls_sub;
+ /**
+ * Original description with which lock was enqueued.
+ */
+ struct cl_lock_descr lls_orig;
+};
+
+struct lov_page {
+ struct cl_page_slice lps_cl;
+ int lps_invalid;
+};
+
+/*
+ * Bottom half.
+ */
+
+struct lovsub_device {
+ struct cl_device acid_cl;
+ struct lov_device *acid_super;
+ int acid_idx;
+ struct cl_device *acid_next;
+};
+
+struct lovsub_object {
+ struct cl_object_header lso_header;
+ struct cl_object lso_cl;
+ struct lov_object *lso_super;
+ int lso_index;
+};
+
+/**
+ * A link between a top-lock and a sub-lock. Separate data-structure is
+ * necessary, because top-locks and sub-locks are in M:N relationship.
+ *
+ * \todo This can be optimized for a (by far) most frequent case of a single
+ * top-lock per sub-lock.
+ */
+struct lov_lock_link {
+ struct lov_lock *lll_super;
+ /** An index within parent lock. */
+ int lll_idx;
+ /**
+ * A linkage into per sub-lock list of all corresponding top-locks,
+ * hanging off lovsub_lock::lss_parents.
+ */
+ struct list_head lll_list;
+};
+
+/**
+ * Lock state at lovsub layer.
+ */
+struct lovsub_lock {
+ struct cl_lock_slice lss_cl;
+ /**
+ * List of top-locks that have given sub-lock as their part. Protected
+ * by cl_lock::cll_guard mutex.
+ */
+ struct list_head lss_parents;
+ /**
+ * Top-lock that initiated current operation on this sub-lock. This is
+ * only set during top-to-bottom lock operations like enqueue, and is
+ * used to optimize state change notification. Protected by
+ * cl_lock::cll_guard mutex.
+ *
+ * \see lovsub_lock_state_one().
+ */
+ struct cl_lock *lss_active;
+};
+
+/**
+ * Describe the environment settings for sublocks.
+ */
+struct lov_sublock_env {
+ const struct lu_env *lse_env;
+ struct cl_io *lse_io;
+ struct lov_io_sub *lse_sub;
+};
+
+struct lovsub_page {
+ struct cl_page_slice lsb_cl;
+};
+
+
+struct lov_thread_info {
+ struct cl_object_conf lti_stripe_conf;
+ struct lu_fid lti_fid;
+ struct cl_lock_descr lti_ldescr;
+ struct ost_lvb lti_lvb;
+ struct cl_2queue lti_cl2q;
+ struct cl_lock_closure lti_closure;
+ wait_queue_t lti_waiter;
+};
+
+/**
+ * State that lov_io maintains for every sub-io.
+ */
+struct lov_io_sub {
+ int sub_stripe;
+ /**
+ * sub-io for a stripe. Ideally sub-io's can be stopped and resumed
+ * independently, with lov acting as a scheduler to maximize overall
+ * throughput.
+ */
+ struct cl_io *sub_io;
+ /**
+ * Linkage into a list (hanging off lov_io::lis_active) of all
+ * sub-io's active for the current IO iteration.
+ */
+ struct list_head sub_linkage;
+ /**
+ * true, iff cl_io_init() was successfully executed against
+ * lov_io_sub::sub_io.
+ */
+ int sub_io_initialized;
+ /**
+ * True, iff lov_io_sub::sub_io and lov_io_sub::sub_env weren't
+ * allocated, but borrowed from a per-device emergency pool.
+ */
+ int sub_borrowed;
+ /**
+ * environment, in which sub-io executes.
+ */
+ struct lu_env *sub_env;
+ /**
+ * environment's refcheck.
+ *
+ * \see cl_env_get()
+ */
+ int sub_refcheck;
+ int sub_refcheck2;
+ int sub_reenter;
+ void *sub_cookie;
+};
+
+/**
+ * IO state private for LOV.
+ */
+struct lov_io {
+ /** super-class */
+ struct cl_io_slice lis_cl;
+ /**
+ * Pointer to the object slice. This is a duplicate of
+ * lov_io::lis_cl::cis_object.
+ */
+ struct lov_object *lis_object;
+ /**
+ * Original end-of-io position for this IO, set by the upper layer as
+ * cl_io::u::ci_rw::pos + cl_io::u::ci_rw::count. lov remembers this,
+ * changes pos and count to fit IO into a single stripe and uses saved
+ * value to determine when IO iterations have to stop.
+ *
+ * This is used only for CIT_READ and CIT_WRITE io's.
+ */
+ loff_t lis_io_endpos;
+
+ /**
+ * starting position within a file, for the current io loop iteration
+ * (stripe), used by ci_io_loop().
+ */
+ obd_off lis_pos;
+ /**
+ * end position with in a file, for the current stripe io. This is
+ * exclusive (i.e., next offset after last byte affected by io).
+ */
+ obd_off lis_endpos;
+
+ int lis_mem_frozen;
+ int lis_stripe_count;
+ int lis_active_subios;
+
+ /**
+ * the index of ls_single_subio in ls_subios array
+ */
+ int lis_single_subio_index;
+ struct cl_io lis_single_subio;
+
+ /**
+ * size of ls_subios array, actually the highest stripe #
+ */
+ int lis_nr_subios;
+ struct lov_io_sub *lis_subs;
+ /**
+ * List of active sub-io's.
+ */
+ struct list_head lis_active;
+};
+
+struct lov_session {
+ struct lov_io ls_io;
+ struct lov_sublock_env ls_subenv;
+};
+
+/**
+ * State of transfer for lov.
+ */
+struct lov_req {
+ struct cl_req_slice lr_cl;
+};
+
+/**
+ * State of transfer for lovsub.
+ */
+struct lovsub_req {
+ struct cl_req_slice lsrq_cl;
+};
+
+extern struct lu_device_type lov_device_type;
+extern struct lu_device_type lovsub_device_type;
+
+extern struct lu_context_key lov_key;
+extern struct lu_context_key lov_session_key;
+
+extern struct kmem_cache *lov_lock_kmem;
+extern struct kmem_cache *lov_object_kmem;
+extern struct kmem_cache *lov_thread_kmem;
+extern struct kmem_cache *lov_session_kmem;
+extern struct kmem_cache *lov_req_kmem;
+
+extern struct kmem_cache *lovsub_lock_kmem;
+extern struct kmem_cache *lovsub_object_kmem;
+extern struct kmem_cache *lovsub_req_kmem;
+
+extern struct kmem_cache *lov_lock_link_kmem;
+
+int lov_object_init (const struct lu_env *env, struct lu_object *obj,
+ const struct lu_object_conf *conf);
+int lovsub_object_init (const struct lu_env *env, struct lu_object *obj,
+ const struct lu_object_conf *conf);
+int lov_lock_init (const struct lu_env *env, struct cl_object *obj,
+ struct cl_lock *lock, const struct cl_io *io);
+int lov_io_init (const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io);
+int lovsub_lock_init (const struct lu_env *env, struct cl_object *obj,
+ struct cl_lock *lock, const struct cl_io *io);
+
+int lov_lock_init_raid0 (const struct lu_env *env, struct cl_object *obj,
+ struct cl_lock *lock, const struct cl_io *io);
+int lov_lock_init_empty (const struct lu_env *env, struct cl_object *obj,
+ struct cl_lock *lock, const struct cl_io *io);
+int lov_io_init_raid0 (const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io);
+int lov_io_init_empty (const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io);
+void lov_lock_unlink (const struct lu_env *env, struct lov_lock_link *link,
+ struct lovsub_lock *sub);
+
+struct lov_io_sub *lov_sub_get(const struct lu_env *env, struct lov_io *lio,
+ int stripe);
+void lov_sub_put (struct lov_io_sub *sub);
+int lov_sublock_modify (const struct lu_env *env, struct lov_lock *lov,
+ struct lovsub_lock *sublock,
+ const struct cl_lock_descr *d, int idx);
+
+
+int lov_page_init (const struct lu_env *env, struct cl_object *ob,
+ struct cl_page *page, struct page *vmpage);
+int lovsub_page_init (const struct lu_env *env, struct cl_object *ob,
+ struct cl_page *page, struct page *vmpage);
+
+int lov_page_init_empty (const struct lu_env *env,
+ struct cl_object *obj,
+ struct cl_page *page, struct page *vmpage);
+int lov_page_init_raid0 (const struct lu_env *env,
+ struct cl_object *obj,
+ struct cl_page *page, struct page *vmpage);
+struct lu_object *lov_object_alloc (const struct lu_env *env,
+ const struct lu_object_header *hdr,
+ struct lu_device *dev);
+struct lu_object *lovsub_object_alloc(const struct lu_env *env,
+ const struct lu_object_header *hdr,
+ struct lu_device *dev);
+
+struct lov_lock_link *lov_lock_link_find(const struct lu_env *env,
+ struct lov_lock *lck,
+ struct lovsub_lock *sub);
+struct lov_io_sub *lov_page_subio (const struct lu_env *env,
+ struct lov_io *lio,
+ const struct cl_page_slice *slice);
+
+void lov_lsm_decref(struct lov_object *lov, struct lov_stripe_md *lsm);
+struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov);
+
+#define lov_foreach_target(lov, var) \
+ for (var = 0; var < lov_targets_nr(lov); ++var)
+
+/*****************************************************************************
+ *
+ * Type conversions.
+ *
+ * Accessors.
+ *
+ */
+
+static inline struct lov_session *lov_env_session(const struct lu_env *env)
+{
+ struct lov_session *ses;
+
+ ses = lu_context_key_get(env->le_ses, &lov_session_key);
+ LASSERT(ses != NULL);
+ return ses;
+}
+
+static inline struct lov_io *lov_env_io(const struct lu_env *env)
+{
+ return &lov_env_session(env)->ls_io;
+}
+
+static inline int lov_is_object(const struct lu_object *obj)
+{
+ return obj->lo_dev->ld_type == &lov_device_type;
+}
+
+static inline int lovsub_is_object(const struct lu_object *obj)
+{
+ return obj->lo_dev->ld_type == &lovsub_device_type;
+}
+
+static inline struct lu_device *lov2lu_dev(struct lov_device *lov)
+{
+ return &lov->ld_cl.cd_lu_dev;
+}
+
+static inline struct lov_device *lu2lov_dev(const struct lu_device *d)
+{
+ LINVRNT(d->ld_type == &lov_device_type);
+ return container_of0(d, struct lov_device, ld_cl.cd_lu_dev);
+}
+
+static inline struct cl_device *lovsub2cl_dev(struct lovsub_device *lovsub)
+{
+ return &lovsub->acid_cl;
+}
+
+static inline struct lu_device *lovsub2lu_dev(struct lovsub_device *lovsub)
+{
+ return &lovsub2cl_dev(lovsub)->cd_lu_dev;
+}
+
+static inline struct lovsub_device *lu2lovsub_dev(const struct lu_device *d)
+{
+ LINVRNT(d->ld_type == &lovsub_device_type);
+ return container_of0(d, struct lovsub_device, acid_cl.cd_lu_dev);
+}
+
+static inline struct lovsub_device *cl2lovsub_dev(const struct cl_device *d)
+{
+ LINVRNT(d->cd_lu_dev.ld_type == &lovsub_device_type);
+ return container_of0(d, struct lovsub_device, acid_cl);
+}
+
+static inline struct lu_object *lov2lu(struct lov_object *lov)
+{
+ return &lov->lo_cl.co_lu;
+}
+
+static inline struct cl_object *lov2cl(struct lov_object *lov)
+{
+ return &lov->lo_cl;
+}
+
+static inline struct lov_object *lu2lov(const struct lu_object *obj)
+{
+ LINVRNT(lov_is_object(obj));
+ return container_of0(obj, struct lov_object, lo_cl.co_lu);
+}
+
+static inline struct lov_object *cl2lov(const struct cl_object *obj)
+{
+ LINVRNT(lov_is_object(&obj->co_lu));
+ return container_of0(obj, struct lov_object, lo_cl);
+}
+
+static inline struct lu_object *lovsub2lu(struct lovsub_object *los)
+{
+ return &los->lso_cl.co_lu;
+}
+
+static inline struct cl_object *lovsub2cl(struct lovsub_object *los)
+{
+ return &los->lso_cl;
+}
+
+static inline struct lovsub_object *cl2lovsub(const struct cl_object *obj)
+{
+ LINVRNT(lovsub_is_object(&obj->co_lu));
+ return container_of0(obj, struct lovsub_object, lso_cl);
+}
+
+static inline struct lovsub_object *lu2lovsub(const struct lu_object *obj)
+{
+ LINVRNT(lovsub_is_object(obj));
+ return container_of0(obj, struct lovsub_object, lso_cl.co_lu);
+}
+
+static inline struct lovsub_lock *
+cl2lovsub_lock(const struct cl_lock_slice *slice)
+{
+ LINVRNT(lovsub_is_object(&slice->cls_obj->co_lu));
+ return container_of(slice, struct lovsub_lock, lss_cl);
+}
+
+static inline struct lovsub_lock *cl2sub_lock(const struct cl_lock *lock)
+{
+ const struct cl_lock_slice *slice;
+
+ slice = cl_lock_at(lock, &lovsub_device_type);
+ LASSERT(slice != NULL);
+ return cl2lovsub_lock(slice);
+}
+
+static inline struct lov_lock *cl2lov_lock(const struct cl_lock_slice *slice)
+{
+ LINVRNT(lov_is_object(&slice->cls_obj->co_lu));
+ return container_of(slice, struct lov_lock, lls_cl);
+}
+
+static inline struct lov_page *cl2lov_page(const struct cl_page_slice *slice)
+{
+ LINVRNT(lov_is_object(&slice->cpl_obj->co_lu));
+ return container_of0(slice, struct lov_page, lps_cl);
+}
+
+static inline struct lov_req *cl2lov_req(const struct cl_req_slice *slice)
+{
+ return container_of0(slice, struct lov_req, lr_cl);
+}
+
+static inline struct lovsub_page *
+cl2lovsub_page(const struct cl_page_slice *slice)
+{
+ LINVRNT(lovsub_is_object(&slice->cpl_obj->co_lu));
+ return container_of0(slice, struct lovsub_page, lsb_cl);
+}
+
+static inline struct lovsub_req *cl2lovsub_req(const struct cl_req_slice *slice)
+{
+ return container_of0(slice, struct lovsub_req, lsrq_cl);
+}
+
+static inline struct cl_page *lov_sub_page(const struct cl_page_slice *slice)
+{
+ return slice->cpl_page->cp_child;
+}
+
+static inline struct lov_io *cl2lov_io(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct lov_io *lio;
+
+ lio = container_of(ios, struct lov_io, lis_cl);
+ LASSERT(lio == lov_env_io(env));
+ return lio;
+}
+
+static inline int lov_targets_nr(const struct lov_device *lov)
+{
+ return lov->ld_lov->desc.ld_tgt_count;
+}
+
+static inline struct lov_thread_info *lov_env_info(const struct lu_env *env)
+{
+ struct lov_thread_info *info;
+
+ info = lu_context_key_get(&env->le_ctx, &lov_key);
+ LASSERT(info != NULL);
+ return info;
+}
+
+static inline struct lov_layout_raid0 *lov_r0(struct lov_object *lov)
+{
+ LASSERT(lov->lo_type == LLT_RAID0);
+ LASSERT(lov->lo_lsm->lsm_wire.lw_magic == LOV_MAGIC ||
+ lov->lo_lsm->lsm_wire.lw_magic == LOV_MAGIC_V3);
+ return &lov->u.raid0;
+}
+
+/** @} lov */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/lov/lov_dev.c b/drivers/staging/lustre/lustre/lov/lov_dev.c
new file mode 100644
index 000000000000..f94f8d9d33d7
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_dev.c
@@ -0,0 +1,533 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_device and cl_device_type for LOV layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+/* class_name2obd() */
+#include <obd_class.h>
+
+#include "lov_cl_internal.h"
+
+struct kmem_cache *lov_lock_kmem;
+struct kmem_cache *lov_object_kmem;
+struct kmem_cache *lov_thread_kmem;
+struct kmem_cache *lov_session_kmem;
+struct kmem_cache *lov_req_kmem;
+
+struct kmem_cache *lovsub_lock_kmem;
+struct kmem_cache *lovsub_object_kmem;
+struct kmem_cache *lovsub_req_kmem;
+
+struct kmem_cache *lov_lock_link_kmem;
+
+/** Lock class of lov_device::ld_mutex. */
+struct lock_class_key cl_lov_device_mutex_class;
+
+struct lu_kmem_descr lov_caches[] = {
+ {
+ .ckd_cache = &lov_lock_kmem,
+ .ckd_name = "lov_lock_kmem",
+ .ckd_size = sizeof (struct lov_lock)
+ },
+ {
+ .ckd_cache = &lov_object_kmem,
+ .ckd_name = "lov_object_kmem",
+ .ckd_size = sizeof (struct lov_object)
+ },
+ {
+ .ckd_cache = &lov_thread_kmem,
+ .ckd_name = "lov_thread_kmem",
+ .ckd_size = sizeof (struct lov_thread_info)
+ },
+ {
+ .ckd_cache = &lov_session_kmem,
+ .ckd_name = "lov_session_kmem",
+ .ckd_size = sizeof (struct lov_session)
+ },
+ {
+ .ckd_cache = &lov_req_kmem,
+ .ckd_name = "lov_req_kmem",
+ .ckd_size = sizeof (struct lov_req)
+ },
+ {
+ .ckd_cache = &lovsub_lock_kmem,
+ .ckd_name = "lovsub_lock_kmem",
+ .ckd_size = sizeof (struct lovsub_lock)
+ },
+ {
+ .ckd_cache = &lovsub_object_kmem,
+ .ckd_name = "lovsub_object_kmem",
+ .ckd_size = sizeof (struct lovsub_object)
+ },
+ {
+ .ckd_cache = &lovsub_req_kmem,
+ .ckd_name = "lovsub_req_kmem",
+ .ckd_size = sizeof (struct lovsub_req)
+ },
+ {
+ .ckd_cache = &lov_lock_link_kmem,
+ .ckd_name = "lov_lock_link_kmem",
+ .ckd_size = sizeof (struct lov_lock_link)
+ },
+ {
+ .ckd_cache = NULL
+ }
+};
+
+/*****************************************************************************
+ *
+ * Lov transfer operations.
+ *
+ */
+
+static void lov_req_completion(const struct lu_env *env,
+ const struct cl_req_slice *slice, int ioret)
+{
+ struct lov_req *lr;
+
+ ENTRY;
+ lr = cl2lov_req(slice);
+ OBD_SLAB_FREE_PTR(lr, lov_req_kmem);
+ EXIT;
+}
+
+static const struct cl_req_operations lov_req_ops = {
+ .cro_completion = lov_req_completion
+};
+
+/*****************************************************************************
+ *
+ * Lov device and device type functions.
+ *
+ */
+
+static void *lov_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct lov_thread_info *info;
+
+ OBD_SLAB_ALLOC_PTR_GFP(info, lov_thread_kmem, __GFP_IO);
+ if (info != NULL)
+ INIT_LIST_HEAD(&info->lti_closure.clc_list);
+ else
+ info = ERR_PTR(-ENOMEM);
+ return info;
+}
+
+static void lov_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct lov_thread_info *info = data;
+ LINVRNT(list_empty(&info->lti_closure.clc_list));
+ OBD_SLAB_FREE_PTR(info, lov_thread_kmem);
+}
+
+struct lu_context_key lov_key = {
+ .lct_tags = LCT_CL_THREAD,
+ .lct_init = lov_key_init,
+ .lct_fini = lov_key_fini
+};
+
+static void *lov_session_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct lov_session *info;
+
+ OBD_SLAB_ALLOC_PTR_GFP(info, lov_session_kmem, __GFP_IO);
+ if (info == NULL)
+ info = ERR_PTR(-ENOMEM);
+ return info;
+}
+
+static void lov_session_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct lov_session *info = data;
+ OBD_SLAB_FREE_PTR(info, lov_session_kmem);
+}
+
+struct lu_context_key lov_session_key = {
+ .lct_tags = LCT_SESSION,
+ .lct_init = lov_session_key_init,
+ .lct_fini = lov_session_key_fini
+};
+
+/* type constructor/destructor: lov_type_{init,fini,start,stop}() */
+LU_TYPE_INIT_FINI(lov, &lov_key, &lov_session_key);
+
+static struct lu_device *lov_device_fini(const struct lu_env *env,
+ struct lu_device *d)
+{
+ int i;
+ struct lov_device *ld = lu2lov_dev(d);
+
+ LASSERT(ld->ld_lov != NULL);
+ if (ld->ld_target == NULL)
+ RETURN(NULL);
+
+ lov_foreach_target(ld, i) {
+ struct lovsub_device *lsd;
+
+ lsd = ld->ld_target[i];
+ if (lsd != NULL) {
+ cl_stack_fini(env, lovsub2cl_dev(lsd));
+ ld->ld_target[i] = NULL;
+ }
+ }
+ RETURN(NULL);
+}
+
+static int lov_device_init(const struct lu_env *env, struct lu_device *d,
+ const char *name, struct lu_device *next)
+{
+ struct lov_device *ld = lu2lov_dev(d);
+ int i;
+ int rc = 0;
+
+ LASSERT(d->ld_site != NULL);
+ if (ld->ld_target == NULL)
+ RETURN(rc);
+
+ lov_foreach_target(ld, i) {
+ struct lovsub_device *lsd;
+ struct cl_device *cl;
+ struct lov_tgt_desc *desc;
+
+ desc = ld->ld_lov->lov_tgts[i];
+ if (desc == NULL)
+ continue;
+
+ cl = cl_type_setup(env, d->ld_site, &lovsub_device_type,
+ desc->ltd_obd->obd_lu_dev);
+ if (IS_ERR(cl)) {
+ rc = PTR_ERR(cl);
+ break;
+ }
+ lsd = cl2lovsub_dev(cl);
+ lsd->acid_idx = i;
+ lsd->acid_super = ld;
+ ld->ld_target[i] = lsd;
+ }
+
+ if (rc)
+ lov_device_fini(env, d);
+ else
+ ld->ld_flags |= LOV_DEV_INITIALIZED;
+
+ RETURN(rc);
+}
+
+static int lov_req_init(const struct lu_env *env, struct cl_device *dev,
+ struct cl_req *req)
+{
+ struct lov_req *lr;
+ int result;
+
+ ENTRY;
+ OBD_SLAB_ALLOC_PTR_GFP(lr, lov_req_kmem, __GFP_IO);
+ if (lr != NULL) {
+ cl_req_slice_add(req, &lr->lr_cl, dev, &lov_req_ops);
+ result = 0;
+ } else
+ result = -ENOMEM;
+ RETURN(result);
+}
+
+static const struct cl_device_operations lov_cl_ops = {
+ .cdo_req_init = lov_req_init
+};
+
+static void lov_emerg_free(struct lov_device_emerg **emrg, int nr)
+{
+ int i;
+
+ for (i = 0; i < nr; ++i) {
+ struct lov_device_emerg *em;
+
+ em = emrg[i];
+ if (em != NULL) {
+ LASSERT(em->emrg_page_list.pl_nr == 0);
+ if (em->emrg_env != NULL)
+ cl_env_put(em->emrg_env, &em->emrg_refcheck);
+ OBD_FREE_PTR(em);
+ }
+ }
+ OBD_FREE(emrg, nr * sizeof emrg[0]);
+}
+
+static struct lu_device *lov_device_free(const struct lu_env *env,
+ struct lu_device *d)
+{
+ struct lov_device *ld = lu2lov_dev(d);
+ const int nr = ld->ld_target_nr;
+
+ cl_device_fini(lu2cl_dev(d));
+ if (ld->ld_target != NULL)
+ OBD_FREE(ld->ld_target, nr * sizeof ld->ld_target[0]);
+ if (ld->ld_emrg != NULL)
+ lov_emerg_free(ld->ld_emrg, nr);
+ OBD_FREE_PTR(ld);
+ return NULL;
+}
+
+static void lov_cl_del_target(const struct lu_env *env, struct lu_device *dev,
+ __u32 index)
+{
+ struct lov_device *ld = lu2lov_dev(dev);
+ ENTRY;
+
+ if (ld->ld_target[index] != NULL) {
+ cl_stack_fini(env, lovsub2cl_dev(ld->ld_target[index]));
+ ld->ld_target[index] = NULL;
+ }
+ EXIT;
+}
+
+static struct lov_device_emerg **lov_emerg_alloc(int nr)
+{
+ struct lov_device_emerg **emerg;
+ int i;
+ int result;
+
+ OBD_ALLOC(emerg, nr * sizeof emerg[0]);
+ if (emerg == NULL)
+ return ERR_PTR(-ENOMEM);
+ for (result = i = 0; i < nr && result == 0; i++) {
+ struct lov_device_emerg *em;
+
+ OBD_ALLOC_PTR(em);
+ if (em != NULL) {
+ emerg[i] = em;
+ cl_page_list_init(&em->emrg_page_list);
+ em->emrg_env = cl_env_alloc(&em->emrg_refcheck,
+ LCT_REMEMBER|LCT_NOREF);
+ if (!IS_ERR(em->emrg_env))
+ em->emrg_env->le_ctx.lc_cookie = 0x2;
+ else {
+ result = PTR_ERR(em->emrg_env);
+ em->emrg_env = NULL;
+ }
+ } else
+ result = -ENOMEM;
+ }
+ if (result != 0) {
+ lov_emerg_free(emerg, nr);
+ emerg = ERR_PTR(result);
+ }
+ return emerg;
+}
+
+static int lov_expand_targets(const struct lu_env *env, struct lov_device *dev)
+{
+ int result;
+ __u32 tgt_size;
+ __u32 sub_size;
+
+ ENTRY;
+ result = 0;
+ tgt_size = dev->ld_lov->lov_tgt_size;
+ sub_size = dev->ld_target_nr;
+ if (sub_size < tgt_size) {
+ struct lovsub_device **newd;
+ struct lov_device_emerg **emerg;
+ const size_t sz = sizeof newd[0];
+
+ emerg = lov_emerg_alloc(tgt_size);
+ if (IS_ERR(emerg))
+ RETURN(PTR_ERR(emerg));
+
+ OBD_ALLOC(newd, tgt_size * sz);
+ if (newd != NULL) {
+ mutex_lock(&dev->ld_mutex);
+ if (sub_size > 0) {
+ memcpy(newd, dev->ld_target, sub_size * sz);
+ OBD_FREE(dev->ld_target, sub_size * sz);
+ }
+ dev->ld_target = newd;
+ dev->ld_target_nr = tgt_size;
+
+ if (dev->ld_emrg != NULL)
+ lov_emerg_free(dev->ld_emrg, sub_size);
+ dev->ld_emrg = emerg;
+ mutex_unlock(&dev->ld_mutex);
+ } else {
+ lov_emerg_free(emerg, tgt_size);
+ result = -ENOMEM;
+ }
+ }
+ RETURN(result);
+}
+
+static int lov_cl_add_target(const struct lu_env *env, struct lu_device *dev,
+ __u32 index)
+{
+ struct obd_device *obd = dev->ld_obd;
+ struct lov_device *ld = lu2lov_dev(dev);
+ struct lov_tgt_desc *tgt;
+ struct lovsub_device *lsd;
+ struct cl_device *cl;
+ int rc;
+ ENTRY;
+
+ obd_getref(obd);
+
+ tgt = obd->u.lov.lov_tgts[index];
+ LASSERT(tgt != NULL);
+ LASSERT(tgt->ltd_obd != NULL);
+
+ if (!tgt->ltd_obd->obd_set_up) {
+ CERROR("Target %s not set up\n", obd_uuid2str(&tgt->ltd_uuid));
+ RETURN(-EINVAL);
+ }
+
+ rc = lov_expand_targets(env, ld);
+ if (rc == 0 && ld->ld_flags & LOV_DEV_INITIALIZED) {
+ LASSERT(dev->ld_site != NULL);
+
+ cl = cl_type_setup(env, dev->ld_site, &lovsub_device_type,
+ tgt->ltd_obd->obd_lu_dev);
+ if (!IS_ERR(cl)) {
+ lsd = cl2lovsub_dev(cl);
+ lsd->acid_idx = index;
+ lsd->acid_super = ld;
+ ld->ld_target[index] = lsd;
+ } else {
+ CERROR("add failed (%d), deleting %s\n", rc,
+ obd_uuid2str(&tgt->ltd_uuid));
+ lov_cl_del_target(env, dev, index);
+ rc = PTR_ERR(cl);
+ }
+ }
+ obd_putref(obd);
+ RETURN(rc);
+}
+
+static int lov_process_config(const struct lu_env *env,
+ struct lu_device *d, struct lustre_cfg *cfg)
+{
+ struct obd_device *obd = d->ld_obd;
+ int cmd;
+ int rc;
+ int gen;
+ __u32 index;
+
+ obd_getref(obd);
+
+ cmd = cfg->lcfg_command;
+ rc = lov_process_config_base(d->ld_obd, cfg, &index, &gen);
+ if (rc == 0) {
+ switch(cmd) {
+ case LCFG_LOV_ADD_OBD:
+ case LCFG_LOV_ADD_INA:
+ rc = lov_cl_add_target(env, d, index);
+ if (rc != 0)
+ lov_del_target(d->ld_obd, index, 0, 0);
+ break;
+ case LCFG_LOV_DEL_OBD:
+ lov_cl_del_target(env, d, index);
+ break;
+ }
+ }
+ obd_putref(obd);
+ RETURN(rc);
+}
+
+static const struct lu_device_operations lov_lu_ops = {
+ .ldo_object_alloc = lov_object_alloc,
+ .ldo_process_config = lov_process_config,
+};
+
+static struct lu_device *lov_device_alloc(const struct lu_env *env,
+ struct lu_device_type *t,
+ struct lustre_cfg *cfg)
+{
+ struct lu_device *d;
+ struct lov_device *ld;
+ struct obd_device *obd;
+ int rc;
+
+ OBD_ALLOC_PTR(ld);
+ if (ld == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ cl_device_init(&ld->ld_cl, t);
+ d = lov2lu_dev(ld);
+ d->ld_ops = &lov_lu_ops;
+ ld->ld_cl.cd_ops = &lov_cl_ops;
+
+ mutex_init(&ld->ld_mutex);
+ lockdep_set_class(&ld->ld_mutex, &cl_lov_device_mutex_class);
+
+ /* setup the LOV OBD */
+ obd = class_name2obd(lustre_cfg_string(cfg, 0));
+ LASSERT(obd != NULL);
+ rc = lov_setup(obd, cfg);
+ if (rc) {
+ lov_device_free(env, d);
+ RETURN(ERR_PTR(rc));
+ }
+
+ ld->ld_lov = &obd->u.lov;
+ RETURN(d);
+}
+
+static const struct lu_device_type_operations lov_device_type_ops = {
+ .ldto_init = lov_type_init,
+ .ldto_fini = lov_type_fini,
+
+ .ldto_start = lov_type_start,
+ .ldto_stop = lov_type_stop,
+
+ .ldto_device_alloc = lov_device_alloc,
+ .ldto_device_free = lov_device_free,
+
+ .ldto_device_init = lov_device_init,
+ .ldto_device_fini = lov_device_fini
+};
+
+struct lu_device_type lov_device_type = {
+ .ldt_tags = LU_DEVICE_CL,
+ .ldt_name = LUSTRE_LOV_NAME,
+ .ldt_ops = &lov_device_type_ops,
+ .ldt_ctx_tags = LCT_CL_THREAD
+};
+EXPORT_SYMBOL(lov_device_type);
+
+/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c
new file mode 100644
index 000000000000..340dbcf829e8
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_ea.c
@@ -0,0 +1,333 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lov/lov_ea.c
+ *
+ * Author: Wang Di <wangdi@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include <asm/div64.h>
+#include <linux/libcfs/libcfs.h>
+
+#include <obd_class.h>
+#include <obd_lov.h>
+#include <lustre/lustre_idl.h>
+
+#include "lov_internal.h"
+
+struct lovea_unpack_args {
+ struct lov_stripe_md *lsm;
+ int cursor;
+};
+
+static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes,
+ __u16 stripe_count)
+{
+ if (stripe_count == 0 || stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
+ CERROR("bad stripe count %d\n", stripe_count);
+ lov_dump_lmm_common(D_WARNING, lmm);
+ return -EINVAL;
+ }
+
+ if (lmm_oi_id(&lmm->lmm_oi) == 0) {
+ CERROR("zero object id\n");
+ lov_dump_lmm_common(D_WARNING, lmm);
+ return -EINVAL;
+ }
+
+ if (lmm->lmm_pattern != cpu_to_le32(LOV_PATTERN_RAID0)) {
+ CERROR("bad striping pattern\n");
+ lov_dump_lmm_common(D_WARNING, lmm);
+ return -EINVAL;
+ }
+
+ if (lmm->lmm_stripe_size == 0 ||
+ (le32_to_cpu(lmm->lmm_stripe_size)&(LOV_MIN_STRIPE_SIZE-1)) != 0) {
+ CERROR("bad stripe size %u\n",
+ le32_to_cpu(lmm->lmm_stripe_size));
+ lov_dump_lmm_common(D_WARNING, lmm);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+struct lov_stripe_md *lsm_alloc_plain(__u16 stripe_count, int *size)
+{
+ struct lov_stripe_md *lsm;
+ struct lov_oinfo *loi;
+ int i, oinfo_ptrs_size;
+
+ LASSERT(stripe_count <= LOV_MAX_STRIPE_COUNT);
+
+ oinfo_ptrs_size = sizeof(struct lov_oinfo *) * stripe_count;
+ *size = sizeof(struct lov_stripe_md) + oinfo_ptrs_size;
+
+ OBD_ALLOC_LARGE(lsm, *size);
+ if (!lsm)
+ return NULL;;
+
+ for (i = 0; i < stripe_count; i++) {
+ OBD_SLAB_ALLOC_PTR_GFP(loi, lov_oinfo_slab, __GFP_IO);
+ if (loi == NULL)
+ goto err;
+ lsm->lsm_oinfo[i] = loi;
+ }
+ lsm->lsm_stripe_count = stripe_count;
+ return lsm;
+
+err:
+ while (--i >= 0)
+ OBD_SLAB_FREE(lsm->lsm_oinfo[i], lov_oinfo_slab, sizeof(*loi));
+ OBD_FREE_LARGE(lsm, *size);
+ return NULL;
+}
+
+void lsm_free_plain(struct lov_stripe_md *lsm)
+{
+ __u16 stripe_count = lsm->lsm_stripe_count;
+ int i;
+
+ for (i = 0; i < stripe_count; i++)
+ OBD_SLAB_FREE(lsm->lsm_oinfo[i], lov_oinfo_slab,
+ sizeof(struct lov_oinfo));
+ OBD_FREE_LARGE(lsm, sizeof(struct lov_stripe_md) +
+ stripe_count * sizeof(struct lov_oinfo *));
+}
+
+static void lsm_unpackmd_common(struct lov_stripe_md *lsm,
+ struct lov_mds_md *lmm)
+{
+ /*
+ * This supposes lov_mds_md_v1/v3 first fields are
+ * are the same
+ */
+ lmm_oi_le_to_cpu(&lsm->lsm_oi, &lmm->lmm_oi);
+ lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
+ lsm->lsm_pattern = le32_to_cpu(lmm->lmm_pattern);
+ lsm->lsm_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
+ lsm->lsm_pool_name[0] = '\0';
+}
+
+static void
+lsm_stripe_by_index_plain(struct lov_stripe_md *lsm, int *stripeno,
+ obd_off *lov_off, obd_off *swidth)
+{
+ if (swidth)
+ *swidth = (obd_off)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
+}
+
+static void
+lsm_stripe_by_offset_plain(struct lov_stripe_md *lsm, int *stripeno,
+ obd_off *lov_off, obd_off *swidth)
+{
+ if (swidth)
+ *swidth = (obd_off)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
+}
+
+static int lsm_destroy_plain(struct lov_stripe_md *lsm, struct obdo *oa,
+ struct obd_export *md_exp)
+{
+ return 0;
+}
+
+/* Find minimum stripe maxbytes value. For inactive or
+ * reconnecting targets use LUSTRE_STRIPE_MAXBYTES. */
+static void lov_tgt_maxbytes(struct lov_tgt_desc *tgt, __u64 *stripe_maxbytes)
+{
+ struct obd_import *imp = tgt->ltd_obd->u.cli.cl_import;
+
+ if (imp == NULL || !tgt->ltd_active) {
+ *stripe_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+ return;
+ }
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state == LUSTRE_IMP_FULL &&
+ (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES) &&
+ imp->imp_connect_data.ocd_maxbytes > 0) {
+ if (*stripe_maxbytes > imp->imp_connect_data.ocd_maxbytes)
+ *stripe_maxbytes = imp->imp_connect_data.ocd_maxbytes;
+ } else {
+ *stripe_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+ }
+ spin_unlock(&imp->imp_lock);
+}
+
+static int lsm_lmm_verify_v1(struct lov_mds_md_v1 *lmm, int lmm_bytes,
+ __u16 *stripe_count)
+{
+ if (lmm_bytes < sizeof(*lmm)) {
+ CERROR("lov_mds_md_v1 too small: %d, need at least %d\n",
+ lmm_bytes, (int)sizeof(*lmm));
+ return -EINVAL;
+ }
+
+ *stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
+
+ if (lmm_bytes < lov_mds_md_size(*stripe_count, LOV_MAGIC_V1)) {
+ CERROR("LOV EA V1 too small: %d, need %d\n",
+ lmm_bytes, lov_mds_md_size(*stripe_count, LOV_MAGIC_V1));
+ lov_dump_lmm_common(D_WARNING, lmm);
+ return -EINVAL;
+ }
+
+ return lsm_lmm_verify_common(lmm, lmm_bytes, *stripe_count);
+}
+
+int lsm_unpackmd_v1(struct lov_obd *lov, struct lov_stripe_md *lsm,
+ struct lov_mds_md_v1 *lmm)
+{
+ struct lov_oinfo *loi;
+ int i;
+ __u64 stripe_maxbytes = OBD_OBJECT_EOF;
+
+ lsm_unpackmd_common(lsm, lmm);
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ /* XXX LOV STACKING call down to osc_unpackmd() */
+ loi = lsm->lsm_oinfo[i];
+ ostid_le_to_cpu(&lmm->lmm_objects[i].l_ost_oi, &loi->loi_oi);
+ loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx);
+ loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen);
+ if (loi->loi_ost_idx >= lov->desc.ld_tgt_count) {
+ CERROR("OST index %d more than OST count %d\n",
+ loi->loi_ost_idx, lov->desc.ld_tgt_count);
+ lov_dump_lmm_v1(D_WARNING, lmm);
+ return -EINVAL;
+ }
+ if (!lov->lov_tgts[loi->loi_ost_idx]) {
+ CERROR("OST index %d missing\n", loi->loi_ost_idx);
+ lov_dump_lmm_v1(D_WARNING, lmm);
+ return -EINVAL;
+ }
+ /* calculate the minimum stripe max bytes */
+ lov_tgt_maxbytes(lov->lov_tgts[loi->loi_ost_idx],
+ &stripe_maxbytes);
+ }
+
+ lsm->lsm_maxbytes = stripe_maxbytes * lsm->lsm_stripe_count;
+
+ return 0;
+}
+
+const struct lsm_operations lsm_v1_ops = {
+ .lsm_free = lsm_free_plain,
+ .lsm_destroy = lsm_destroy_plain,
+ .lsm_stripe_by_index = lsm_stripe_by_index_plain,
+ .lsm_stripe_by_offset = lsm_stripe_by_offset_plain,
+ .lsm_lmm_verify = lsm_lmm_verify_v1,
+ .lsm_unpackmd = lsm_unpackmd_v1,
+};
+
+static int lsm_lmm_verify_v3(struct lov_mds_md *lmmv1, int lmm_bytes,
+ __u16 *stripe_count)
+{
+ struct lov_mds_md_v3 *lmm;
+
+ lmm = (struct lov_mds_md_v3 *)lmmv1;
+
+ if (lmm_bytes < sizeof(*lmm)) {
+ CERROR("lov_mds_md_v3 too small: %d, need at least %d\n",
+ lmm_bytes, (int)sizeof(*lmm));
+ return -EINVAL;
+ }
+
+ *stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
+
+ if (lmm_bytes < lov_mds_md_size(*stripe_count, LOV_MAGIC_V3)) {
+ CERROR("LOV EA V3 too small: %d, need %d\n",
+ lmm_bytes, lov_mds_md_size(*stripe_count, LOV_MAGIC_V3));
+ lov_dump_lmm_common(D_WARNING, lmm);
+ return -EINVAL;
+ }
+
+ return lsm_lmm_verify_common((struct lov_mds_md_v1 *)lmm, lmm_bytes,
+ *stripe_count);
+}
+
+int lsm_unpackmd_v3(struct lov_obd *lov, struct lov_stripe_md *lsm,
+ struct lov_mds_md *lmmv1)
+{
+ struct lov_mds_md_v3 *lmm;
+ struct lov_oinfo *loi;
+ int i;
+ __u64 stripe_maxbytes = OBD_OBJECT_EOF;
+ int cplen = 0;
+
+ lmm = (struct lov_mds_md_v3 *)lmmv1;
+
+ lsm_unpackmd_common(lsm, (struct lov_mds_md_v1 *)lmm);
+ cplen = strlcpy(lsm->lsm_pool_name, lmm->lmm_pool_name,
+ sizeof(lsm->lsm_pool_name));
+ if (cplen >= sizeof(lsm->lsm_pool_name))
+ return -E2BIG;
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ /* XXX LOV STACKING call down to osc_unpackmd() */
+ loi = lsm->lsm_oinfo[i];
+ ostid_le_to_cpu(&lmm->lmm_objects[i].l_ost_oi, &loi->loi_oi);
+ loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx);
+ loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen);
+ if (loi->loi_ost_idx >= lov->desc.ld_tgt_count) {
+ CERROR("OST index %d more than OST count %d\n",
+ loi->loi_ost_idx, lov->desc.ld_tgt_count);
+ lov_dump_lmm_v3(D_WARNING, lmm);
+ return -EINVAL;
+ }
+ if (!lov->lov_tgts[loi->loi_ost_idx]) {
+ CERROR("OST index %d missing\n", loi->loi_ost_idx);
+ lov_dump_lmm_v3(D_WARNING, lmm);
+ return -EINVAL;
+ }
+ /* calculate the minimum stripe max bytes */
+ lov_tgt_maxbytes(lov->lov_tgts[loi->loi_ost_idx],
+ &stripe_maxbytes);
+ }
+
+ lsm->lsm_maxbytes = stripe_maxbytes * lsm->lsm_stripe_count;
+
+ return 0;
+}
+
+const struct lsm_operations lsm_v3_ops = {
+ .lsm_free = lsm_free_plain,
+ .lsm_destroy = lsm_destroy_plain,
+ .lsm_stripe_by_index = lsm_stripe_by_index_plain,
+ .lsm_stripe_by_offset = lsm_stripe_by_offset_plain,
+ .lsm_lmm_verify = lsm_lmm_verify_v3,
+ .lsm_unpackmd = lsm_unpackmd_v3,
+};
diff --git a/drivers/staging/lustre/lustre/lov/lov_internal.h b/drivers/staging/lustre/lustre/lov/lov_internal.h
new file mode 100644
index 000000000000..16770d14ee04
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_internal.h
@@ -0,0 +1,323 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef LOV_INTERNAL_H
+#define LOV_INTERNAL_H
+
+#include <obd_class.h>
+#include <obd_lov.h>
+#include <lustre/lustre_user.h>
+
+struct lov_lock_handles {
+ struct portals_handle llh_handle;
+ atomic_t llh_refcount;
+ int llh_stripe_count;
+ struct lustre_handle llh_handles[0];
+};
+
+struct lov_request {
+ struct obd_info rq_oi;
+ struct lov_request_set *rq_rqset;
+
+ struct list_head rq_link;
+
+ int rq_idx; /* index in lov->tgts array */
+ int rq_stripe; /* stripe number */
+ int rq_complete;
+ int rq_rc;
+ int rq_buflen; /* length of sub_md */
+
+ obd_count rq_oabufs;
+ obd_count rq_pgaidx;
+};
+
+struct lov_request_set {
+ struct ldlm_enqueue_info *set_ei;
+ struct obd_info *set_oi;
+ atomic_t set_refcount;
+ struct obd_export *set_exp;
+ /* XXX: There is @set_exp already, however obd_statfs gets obd_device
+ only. */
+ struct obd_device *set_obd;
+ int set_count;
+ atomic_t set_completes;
+ atomic_t set_success;
+ atomic_t set_finish_checked;
+ struct llog_cookie *set_cookies;
+ int set_cookie_sent;
+ struct obd_trans_info *set_oti;
+ obd_count set_oabufs;
+ struct brw_page *set_pga;
+ struct lov_lock_handles *set_lockh;
+ struct list_head set_list;
+ wait_queue_head_t set_waitq;
+ spinlock_t set_lock;
+};
+
+extern struct kmem_cache *lov_oinfo_slab;
+
+void lov_finish_set(struct lov_request_set *set);
+
+static inline void lov_get_reqset(struct lov_request_set *set)
+{
+ LASSERT(set != NULL);
+ LASSERT(atomic_read(&set->set_refcount) > 0);
+ atomic_inc(&set->set_refcount);
+}
+
+static inline void lov_put_reqset(struct lov_request_set *set)
+{
+ if (atomic_dec_and_test(&set->set_refcount))
+ lov_finish_set(set);
+}
+
+static inline struct lov_lock_handles *
+lov_handle2llh(struct lustre_handle *handle)
+{
+ LASSERT(handle != NULL);
+ return(class_handle2object(handle->cookie));
+}
+
+static inline void lov_llh_put(struct lov_lock_handles *llh)
+{
+ CDEBUG(D_INFO, "PUTting llh %p : new refcount %d\n", llh,
+ atomic_read(&llh->llh_refcount) - 1);
+ LASSERT(atomic_read(&llh->llh_refcount) > 0 &&
+ atomic_read(&llh->llh_refcount) < 0x5a5a);
+ if (atomic_dec_and_test(&llh->llh_refcount)) {
+ class_handle_unhash(&llh->llh_handle);
+ /* The structure may be held by other threads because RCU.
+ * -jxiong */
+ if (atomic_read(&llh->llh_refcount))
+ return;
+
+ OBD_FREE_RCU(llh, sizeof *llh +
+ sizeof(*llh->llh_handles) * llh->llh_stripe_count,
+ &llh->llh_handle);
+ }
+}
+
+#define lov_uuid2str(lv, index) \
+ (char *)((lv)->lov_tgts[index]->ltd_uuid.uuid)
+
+/* lov_merge.c */
+void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_valid valid,
+ struct lov_stripe_md *lsm, int stripeno, int *set);
+int lov_merge_lvb(struct obd_export *exp, struct lov_stripe_md *lsm,
+ struct ost_lvb *lvb, int kms_only);
+int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
+ obd_off size, int shrink);
+int lov_merge_lvb_kms(struct lov_stripe_md *lsm,
+ struct ost_lvb *lvb, __u64 *kms_place);
+
+/* lov_offset.c */
+obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
+ int stripeno);
+int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
+ int stripeno, obd_off *obd_off);
+obd_off lov_size_to_stripe(struct lov_stripe_md *lsm, obd_off file_size,
+ int stripeno);
+int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno,
+ obd_off start, obd_off end,
+ obd_off *obd_start, obd_off *obd_end);
+int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off);
+
+/* lov_qos.c */
+#define LOV_USES_ASSIGNED_STRIPE 0
+#define LOV_USES_DEFAULT_STRIPE 1
+int qos_add_tgt(struct obd_device *obd, __u32 index);
+int qos_del_tgt(struct obd_device *obd, struct lov_tgt_desc *tgt);
+void qos_shrink_lsm(struct lov_request_set *set);
+int qos_prep_create(struct obd_export *exp, struct lov_request_set *set);
+void qos_update(struct lov_obd *lov);
+void qos_statfs_done(struct lov_obd *lov);
+void qos_statfs_update(struct obd_device *obd, __u64 max_age, int wait);
+int qos_remedy_create(struct lov_request_set *set, struct lov_request *req);
+
+/* lov_request.c */
+void lov_set_add_req(struct lov_request *req, struct lov_request_set *set);
+int lov_set_finished(struct lov_request_set *set, int idempotent);
+void lov_update_set(struct lov_request_set *set,
+ struct lov_request *req, int rc);
+int lov_update_common_set(struct lov_request_set *set,
+ struct lov_request *req, int rc);
+int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx);
+int lov_prep_create_set(struct obd_export *exp, struct obd_info *oifo,
+ struct lov_stripe_md **ea, struct obdo *src_oa,
+ struct obd_trans_info *oti,
+ struct lov_request_set **reqset);
+int cb_create_update(void *cookie, int rc);
+int lov_fini_create_set(struct lov_request_set *set, struct lov_stripe_md **ea);
+int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo,
+ obd_count oa_bufs, struct brw_page *pga,
+ struct obd_trans_info *oti,
+ struct lov_request_set **reqset);
+int lov_fini_brw_set(struct lov_request_set *set);
+int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct lov_request_set **reqset);
+int lov_fini_getattr_set(struct lov_request_set *set);
+int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct obdo *src_oa, struct lov_stripe_md *lsm,
+ struct obd_trans_info *oti,
+ struct lov_request_set **reqset);
+int lov_update_destroy_set(struct lov_request_set *set,
+ struct lov_request *req, int rc);
+int lov_fini_destroy_set(struct lov_request_set *set);
+int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct obd_trans_info *oti,
+ struct lov_request_set **reqset);
+int lov_update_setattr_set(struct lov_request_set *set,
+ struct lov_request *req, int rc);
+int lov_fini_setattr_set(struct lov_request_set *set);
+int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct obd_trans_info *oti,
+ struct lov_request_set **reqset);
+int lov_fini_punch_set(struct lov_request_set *set);
+int lov_prep_sync_set(struct obd_export *exp, struct obd_info *obd_info,
+ obd_off start, obd_off end,
+ struct lov_request_set **reqset);
+int lov_fini_sync_set(struct lov_request_set *set);
+int lov_prep_enqueue_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct ldlm_enqueue_info *einfo,
+ struct lov_request_set **reqset);
+int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode, int rc,
+ struct ptlrpc_request_set *rqset);
+int lov_prep_match_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct lov_stripe_md *lsm,
+ ldlm_policy_data_t *policy, __u32 mode,
+ struct lustre_handle *lockh,
+ struct lov_request_set **reqset);
+int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags);
+int lov_prep_cancel_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct lov_stripe_md *lsm,
+ __u32 mode, struct lustre_handle *lockh,
+ struct lov_request_set **reqset);
+int lov_fini_cancel_set(struct lov_request_set *set);
+int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
+ struct lov_request_set **reqset);
+void lov_update_statfs(struct obd_statfs *osfs, struct obd_statfs *lov_sfs,
+ int success);
+int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+ int success);
+int lov_fini_statfs_set(struct lov_request_set *set);
+int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc);
+
+/* lov_obd.c */
+void lov_fix_desc(struct lov_desc *desc);
+void lov_fix_desc_stripe_size(__u64 *val);
+void lov_fix_desc_stripe_count(__u32 *val);
+void lov_fix_desc_pattern(__u32 *val);
+void lov_fix_desc_qos_maxage(__u32 *val);
+__u16 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u16 stripe_count);
+int lov_connect_obd(struct obd_device *obd, __u32 index, int activate,
+ struct obd_connect_data *data);
+int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
+int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg,
+ __u32 *indexp, int *genp);
+int lov_del_target(struct obd_device *obd, __u32 index,
+ struct obd_uuid *uuidp, int gen);
+/* lov_log.c */
+int lov_llog_init(struct obd_device *obd, struct obd_llog_group *olg,
+ struct obd_device *tgt, int *idx);
+int lov_llog_finish(struct obd_device *obd, int count);
+
+/* lov_pack.c */
+int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmm,
+ struct lov_stripe_md *lsm);
+int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
+ struct lov_mds_md *lmm, int lmm_bytes);
+int lov_setstripe(struct obd_export *exp, int max_lmm_size,
+ struct lov_stripe_md **lsmp, struct lov_user_md *lump);
+int lov_setea(struct obd_export *exp, struct lov_stripe_md **lsmp,
+ struct lov_user_md *lump);
+int lov_getstripe(struct obd_export *exp,
+ struct lov_stripe_md *lsm, struct lov_user_md *lump);
+int lov_alloc_memmd(struct lov_stripe_md **lsmp, __u16 stripe_count,
+ int pattern, int magic);
+int lov_free_memmd(struct lov_stripe_md **lsmp);
+
+void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm);
+void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm);
+void lov_dump_lmm_common(int level, void *lmmp);
+void lov_dump_lmm(int level, void *lmm);
+
+/* lov_ea.c */
+struct lov_stripe_md *lsm_alloc_plain(__u16 stripe_count, int *size);
+void lsm_free_plain(struct lov_stripe_md *lsm);
+
+int lovea_destroy_object(struct lov_obd *lov, struct lov_stripe_md *lsm,
+ struct obdo *oa, void *data);
+/* lproc_lov.c */
+extern struct file_operations lov_proc_target_fops;
+#ifdef LPROCFS
+void lprocfs_lov_init_vars(struct lprocfs_static_vars *lvars);
+#else
+static inline void lprocfs_lov_init_vars(struct lprocfs_static_vars *lvars)
+{
+ memset(lvars, 0, sizeof(*lvars));
+}
+#endif
+
+/* lov_cl.c */
+extern struct lu_device_type lov_device_type;
+
+/* pools */
+extern cfs_hash_ops_t pool_hash_operations;
+/* ost_pool methods */
+int lov_ost_pool_init(struct ost_pool *op, unsigned int count);
+int lov_ost_pool_extend(struct ost_pool *op, unsigned int min_count);
+int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count);
+int lov_ost_pool_remove(struct ost_pool *op, __u32 idx);
+int lov_ost_pool_free(struct ost_pool *op);
+
+/* high level pool methods */
+int lov_pool_new(struct obd_device *obd, char *poolname);
+int lov_pool_del(struct obd_device *obd, char *poolname);
+int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname);
+int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname);
+void lov_dump_pool(int level, struct pool_desc *pool);
+struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname);
+int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool);
+void lov_pool_putref(struct pool_desc *pool);
+
+static inline struct lov_stripe_md *lsm_addref(struct lov_stripe_md *lsm)
+{
+ LASSERT(atomic_read(&lsm->lsm_refc) > 0);
+ atomic_inc(&lsm->lsm_refc);
+ return lsm;
+}
+
+#endif
diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c
new file mode 100644
index 000000000000..1a87abdf0953
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_io.c
@@ -0,0 +1,967 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_io for LOV layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include "lov_cl_internal.h"
+
+/** \addtogroup lov
+ * @{
+ */
+
+static inline void lov_sub_enter(struct lov_io_sub *sub)
+{
+ sub->sub_reenter++;
+}
+static inline void lov_sub_exit(struct lov_io_sub *sub)
+{
+ sub->sub_reenter--;
+}
+
+static void lov_io_sub_fini(const struct lu_env *env, struct lov_io *lio,
+ struct lov_io_sub *sub)
+{
+ ENTRY;
+ if (sub->sub_io != NULL) {
+ if (sub->sub_io_initialized) {
+ lov_sub_enter(sub);
+ cl_io_fini(sub->sub_env, sub->sub_io);
+ lov_sub_exit(sub);
+ sub->sub_io_initialized = 0;
+ lio->lis_active_subios--;
+ }
+ if (sub->sub_stripe == lio->lis_single_subio_index)
+ lio->lis_single_subio_index = -1;
+ else if (!sub->sub_borrowed)
+ OBD_FREE_PTR(sub->sub_io);
+ sub->sub_io = NULL;
+ }
+ if (sub->sub_env != NULL && !IS_ERR(sub->sub_env)) {
+ if (!sub->sub_borrowed)
+ cl_env_put(sub->sub_env, &sub->sub_refcheck);
+ sub->sub_env = NULL;
+ }
+ EXIT;
+}
+
+static void lov_io_sub_inherit(struct cl_io *io, struct lov_io *lio,
+ int stripe, loff_t start, loff_t end)
+{
+ struct lov_stripe_md *lsm = lio->lis_object->lo_lsm;
+ struct cl_io *parent = lio->lis_cl.cis_io;
+
+ switch(io->ci_type) {
+ case CIT_SETATTR: {
+ io->u.ci_setattr.sa_attr = parent->u.ci_setattr.sa_attr;
+ io->u.ci_setattr.sa_valid = parent->u.ci_setattr.sa_valid;
+ io->u.ci_setattr.sa_capa = parent->u.ci_setattr.sa_capa;
+ if (cl_io_is_trunc(io)) {
+ loff_t new_size = parent->u.ci_setattr.sa_attr.lvb_size;
+
+ new_size = lov_size_to_stripe(lsm, new_size, stripe);
+ io->u.ci_setattr.sa_attr.lvb_size = new_size;
+ }
+ break;
+ }
+ case CIT_FAULT: {
+ struct cl_object *obj = parent->ci_obj;
+ loff_t off = cl_offset(obj, parent->u.ci_fault.ft_index);
+
+ io->u.ci_fault = parent->u.ci_fault;
+ off = lov_size_to_stripe(lsm, off, stripe);
+ io->u.ci_fault.ft_index = cl_index(obj, off);
+ break;
+ }
+ case CIT_FSYNC: {
+ io->u.ci_fsync.fi_start = start;
+ io->u.ci_fsync.fi_end = end;
+ io->u.ci_fsync.fi_capa = parent->u.ci_fsync.fi_capa;
+ io->u.ci_fsync.fi_fid = parent->u.ci_fsync.fi_fid;
+ io->u.ci_fsync.fi_mode = parent->u.ci_fsync.fi_mode;
+ break;
+ }
+ case CIT_READ:
+ case CIT_WRITE: {
+ io->u.ci_wr.wr_sync = cl_io_is_sync_write(parent);
+ if (cl_io_is_append(parent)) {
+ io->u.ci_wr.wr_append = 1;
+ } else {
+ io->u.ci_rw.crw_pos = start;
+ io->u.ci_rw.crw_count = end - start;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio,
+ struct lov_io_sub *sub)
+{
+ struct lov_object *lov = lio->lis_object;
+ struct lov_device *ld = lu2lov_dev(lov2cl(lov)->co_lu.lo_dev);
+ struct cl_io *sub_io;
+ struct cl_object *sub_obj;
+ struct cl_io *io = lio->lis_cl.cis_io;
+
+ int stripe = sub->sub_stripe;
+ int result;
+
+ LASSERT(sub->sub_io == NULL);
+ LASSERT(sub->sub_env == NULL);
+ LASSERT(sub->sub_stripe < lio->lis_stripe_count);
+ ENTRY;
+
+ result = 0;
+ sub->sub_io_initialized = 0;
+ sub->sub_borrowed = 0;
+
+ if (lio->lis_mem_frozen) {
+ LASSERT(mutex_is_locked(&ld->ld_mutex));
+ sub->sub_io = &ld->ld_emrg[stripe]->emrg_subio;
+ sub->sub_env = ld->ld_emrg[stripe]->emrg_env;
+ sub->sub_borrowed = 1;
+ } else {
+ void *cookie;
+
+ /* obtain new environment */
+ cookie = cl_env_reenter();
+ sub->sub_env = cl_env_get(&sub->sub_refcheck);
+ cl_env_reexit(cookie);
+ if (IS_ERR(sub->sub_env))
+ result = PTR_ERR(sub->sub_env);
+
+ if (result == 0) {
+ /*
+ * First sub-io. Use ->lis_single_subio to
+ * avoid dynamic allocation.
+ */
+ if (lio->lis_active_subios == 0) {
+ sub->sub_io = &lio->lis_single_subio;
+ lio->lis_single_subio_index = stripe;
+ } else {
+ OBD_ALLOC_PTR(sub->sub_io);
+ if (sub->sub_io == NULL)
+ result = -ENOMEM;
+ }
+ }
+ }
+
+ if (result == 0) {
+ sub_obj = lovsub2cl(lov_r0(lov)->lo_sub[stripe]);
+ sub_io = sub->sub_io;
+
+ sub_io->ci_obj = sub_obj;
+ sub_io->ci_result = 0;
+
+ sub_io->ci_parent = io;
+ sub_io->ci_lockreq = io->ci_lockreq;
+ sub_io->ci_type = io->ci_type;
+ sub_io->ci_no_srvlock = io->ci_no_srvlock;
+
+ lov_sub_enter(sub);
+ result = cl_io_sub_init(sub->sub_env, sub_io,
+ io->ci_type, sub_obj);
+ lov_sub_exit(sub);
+ if (result >= 0) {
+ lio->lis_active_subios++;
+ sub->sub_io_initialized = 1;
+ result = 0;
+ }
+ }
+ if (result != 0)
+ lov_io_sub_fini(env, lio, sub);
+ RETURN(result);
+}
+
+struct lov_io_sub *lov_sub_get(const struct lu_env *env,
+ struct lov_io *lio, int stripe)
+{
+ int rc;
+ struct lov_io_sub *sub = &lio->lis_subs[stripe];
+
+ LASSERT(stripe < lio->lis_stripe_count);
+ ENTRY;
+
+ if (!sub->sub_io_initialized) {
+ sub->sub_stripe = stripe;
+ rc = lov_io_sub_init(env, lio, sub);
+ } else
+ rc = 0;
+ if (rc == 0)
+ lov_sub_enter(sub);
+ else
+ sub = ERR_PTR(rc);
+ RETURN(sub);
+}
+
+void lov_sub_put(struct lov_io_sub *sub)
+{
+ lov_sub_exit(sub);
+}
+
+/*****************************************************************************
+ *
+ * Lov io operations.
+ *
+ */
+
+static int lov_page_stripe(const struct cl_page *page)
+{
+ struct lovsub_object *subobj;
+
+ ENTRY;
+ subobj = lu2lovsub(
+ lu_object_locate(page->cp_child->cp_obj->co_lu.lo_header,
+ &lovsub_device_type));
+ LASSERT(subobj != NULL);
+ RETURN(subobj->lso_index);
+}
+
+struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio,
+ const struct cl_page_slice *slice)
+{
+ struct lov_stripe_md *lsm = lio->lis_object->lo_lsm;
+ struct cl_page *page = slice->cpl_page;
+ int stripe;
+
+ LASSERT(lio->lis_cl.cis_io != NULL);
+ LASSERT(cl2lov(slice->cpl_obj) == lio->lis_object);
+ LASSERT(lsm != NULL);
+ LASSERT(lio->lis_nr_subios > 0);
+ ENTRY;
+
+ stripe = lov_page_stripe(page);
+ RETURN(lov_sub_get(env, lio, stripe));
+}
+
+
+static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio,
+ struct cl_io *io)
+{
+ struct lov_stripe_md *lsm = lio->lis_object->lo_lsm;
+ int result;
+
+ LASSERT(lio->lis_object != NULL);
+ ENTRY;
+
+ /*
+ * Need to be optimized, we can't afford to allocate a piece of memory
+ * when writing a page. -jay
+ */
+ OBD_ALLOC_LARGE(lio->lis_subs,
+ lsm->lsm_stripe_count * sizeof lio->lis_subs[0]);
+ if (lio->lis_subs != NULL) {
+ lio->lis_nr_subios = lio->lis_stripe_count;
+ lio->lis_single_subio_index = -1;
+ lio->lis_active_subios = 0;
+ result = 0;
+ } else
+ result = -ENOMEM;
+ RETURN(result);
+}
+
+static void lov_io_slice_init(struct lov_io *lio,
+ struct lov_object *obj, struct cl_io *io)
+{
+ ENTRY;
+
+ io->ci_result = 0;
+ lio->lis_object = obj;
+
+ LASSERT(obj->lo_lsm != NULL);
+ lio->lis_stripe_count = obj->lo_lsm->lsm_stripe_count;
+
+ switch (io->ci_type) {
+ case CIT_READ:
+ case CIT_WRITE:
+ lio->lis_pos = io->u.ci_rw.crw_pos;
+ lio->lis_endpos = io->u.ci_rw.crw_pos + io->u.ci_rw.crw_count;
+ lio->lis_io_endpos = lio->lis_endpos;
+ if (cl_io_is_append(io)) {
+ LASSERT(io->ci_type == CIT_WRITE);
+ lio->lis_pos = 0;
+ lio->lis_endpos = OBD_OBJECT_EOF;
+ }
+ break;
+
+ case CIT_SETATTR:
+ if (cl_io_is_trunc(io))
+ lio->lis_pos = io->u.ci_setattr.sa_attr.lvb_size;
+ else
+ lio->lis_pos = 0;
+ lio->lis_endpos = OBD_OBJECT_EOF;
+ break;
+
+ case CIT_FAULT: {
+ pgoff_t index = io->u.ci_fault.ft_index;
+ lio->lis_pos = cl_offset(io->ci_obj, index);
+ lio->lis_endpos = cl_offset(io->ci_obj, index + 1);
+ break;
+ }
+
+ case CIT_FSYNC: {
+ lio->lis_pos = io->u.ci_fsync.fi_start;
+ lio->lis_endpos = io->u.ci_fsync.fi_end;
+ break;
+ }
+
+ case CIT_MISC:
+ lio->lis_pos = 0;
+ lio->lis_endpos = OBD_OBJECT_EOF;
+ break;
+
+ default:
+ LBUG();
+ }
+
+ EXIT;
+}
+
+static void lov_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
+{
+ struct lov_io *lio = cl2lov_io(env, ios);
+ struct lov_object *lov = cl2lov(ios->cis_obj);
+ int i;
+
+ ENTRY;
+ if (lio->lis_subs != NULL) {
+ for (i = 0; i < lio->lis_nr_subios; i++)
+ lov_io_sub_fini(env, lio, &lio->lis_subs[i]);
+ OBD_FREE_LARGE(lio->lis_subs,
+ lio->lis_nr_subios * sizeof lio->lis_subs[0]);
+ lio->lis_nr_subios = 0;
+ }
+
+ LASSERT(atomic_read(&lov->lo_active_ios) > 0);
+ if (atomic_dec_and_test(&lov->lo_active_ios))
+ wake_up_all(&lov->lo_waitq);
+ EXIT;
+}
+
+static obd_off lov_offset_mod(obd_off val, int delta)
+{
+ if (val != OBD_OBJECT_EOF)
+ val += delta;
+ return val;
+}
+
+static int lov_io_iter_init(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct lov_io *lio = cl2lov_io(env, ios);
+ struct lov_stripe_md *lsm = lio->lis_object->lo_lsm;
+ struct lov_io_sub *sub;
+ obd_off endpos;
+ obd_off start;
+ obd_off end;
+ int stripe;
+ int rc = 0;
+
+ ENTRY;
+ endpos = lov_offset_mod(lio->lis_endpos, -1);
+ for (stripe = 0; stripe < lio->lis_stripe_count; stripe++) {
+ if (!lov_stripe_intersects(lsm, stripe, lio->lis_pos,
+ endpos, &start, &end))
+ continue;
+
+ end = lov_offset_mod(end, +1);
+ sub = lov_sub_get(env, lio, stripe);
+ if (!IS_ERR(sub)) {
+ lov_io_sub_inherit(sub->sub_io, lio, stripe,
+ start, end);
+ rc = cl_io_iter_init(sub->sub_env, sub->sub_io);
+ lov_sub_put(sub);
+ CDEBUG(D_VFSTRACE, "shrink: %d ["LPU64", "LPU64")\n",
+ stripe, start, end);
+ } else
+ rc = PTR_ERR(sub);
+
+ if (!rc)
+ list_add_tail(&sub->sub_linkage, &lio->lis_active);
+ else
+ break;
+ }
+ RETURN(rc);
+}
+
+static int lov_io_rw_iter_init(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct lov_io *lio = cl2lov_io(env, ios);
+ struct cl_io *io = ios->cis_io;
+ struct lov_stripe_md *lsm = lio->lis_object->lo_lsm;
+ loff_t start = io->u.ci_rw.crw_pos;
+ loff_t next;
+ unsigned long ssize = lsm->lsm_stripe_size;
+
+ LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
+ ENTRY;
+
+ /* fast path for common case. */
+ if (lio->lis_nr_subios != 1 && !cl_io_is_append(io)) {
+
+ lov_do_div64(start, ssize);
+ next = (start + 1) * ssize;
+ if (next <= start * ssize)
+ next = ~0ull;
+
+ io->ci_continue = next < lio->lis_io_endpos;
+ io->u.ci_rw.crw_count = min_t(loff_t, lio->lis_io_endpos,
+ next) - io->u.ci_rw.crw_pos;
+ lio->lis_pos = io->u.ci_rw.crw_pos;
+ lio->lis_endpos = io->u.ci_rw.crw_pos + io->u.ci_rw.crw_count;
+ CDEBUG(D_VFSTRACE, "stripe: "LPU64" chunk: ["LPU64", "LPU64") "
+ LPU64"\n", (__u64)start, lio->lis_pos, lio->lis_endpos,
+ (__u64)lio->lis_io_endpos);
+ }
+ /*
+ * XXX The following call should be optimized: we know, that
+ * [lio->lis_pos, lio->lis_endpos) intersects with exactly one stripe.
+ */
+ RETURN(lov_io_iter_init(env, ios));
+}
+
+static int lov_io_call(const struct lu_env *env, struct lov_io *lio,
+ int (*iofunc)(const struct lu_env *, struct cl_io *))
+{
+ struct cl_io *parent = lio->lis_cl.cis_io;
+ struct lov_io_sub *sub;
+ int rc = 0;
+
+ ENTRY;
+ list_for_each_entry(sub, &lio->lis_active, sub_linkage) {
+ lov_sub_enter(sub);
+ rc = iofunc(sub->sub_env, sub->sub_io);
+ lov_sub_exit(sub);
+ if (rc)
+ break;
+
+ if (parent->ci_result == 0)
+ parent->ci_result = sub->sub_io->ci_result;
+ }
+ RETURN(rc);
+}
+
+static int lov_io_lock(const struct lu_env *env, const struct cl_io_slice *ios)
+{
+ ENTRY;
+ RETURN(lov_io_call(env, cl2lov_io(env, ios), cl_io_lock));
+}
+
+static int lov_io_start(const struct lu_env *env, const struct cl_io_slice *ios)
+{
+ ENTRY;
+ RETURN(lov_io_call(env, cl2lov_io(env, ios), cl_io_start));
+}
+
+static int lov_io_end_wrapper(const struct lu_env *env, struct cl_io *io)
+{
+ ENTRY;
+ /*
+ * It's possible that lov_io_start() wasn't called against this
+ * sub-io, either because previous sub-io failed, or upper layer
+ * completed IO.
+ */
+ if (io->ci_state == CIS_IO_GOING)
+ cl_io_end(env, io);
+ else
+ io->ci_state = CIS_IO_FINISHED;
+ RETURN(0);
+}
+
+static int lov_io_iter_fini_wrapper(const struct lu_env *env, struct cl_io *io)
+{
+ cl_io_iter_fini(env, io);
+ RETURN(0);
+}
+
+static int lov_io_unlock_wrapper(const struct lu_env *env, struct cl_io *io)
+{
+ cl_io_unlock(env, io);
+ RETURN(0);
+}
+
+static void lov_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
+{
+ int rc;
+
+ rc = lov_io_call(env, cl2lov_io(env, ios), lov_io_end_wrapper);
+ LASSERT(rc == 0);
+}
+
+static void lov_io_iter_fini(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct lov_io *lio = cl2lov_io(env, ios);
+ int rc;
+
+ ENTRY;
+ rc = lov_io_call(env, lio, lov_io_iter_fini_wrapper);
+ LASSERT(rc == 0);
+ while (!list_empty(&lio->lis_active))
+ list_del_init(lio->lis_active.next);
+ EXIT;
+}
+
+static void lov_io_unlock(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ int rc;
+
+ ENTRY;
+ rc = lov_io_call(env, cl2lov_io(env, ios), lov_io_unlock_wrapper);
+ LASSERT(rc == 0);
+ EXIT;
+}
+
+
+static struct cl_page_list *lov_io_submit_qin(struct lov_device *ld,
+ struct cl_page_list *qin,
+ int idx, int alloc)
+{
+ return alloc ? &qin[idx] : &ld->ld_emrg[idx]->emrg_page_list;
+}
+
+/**
+ * lov implementation of cl_operations::cio_submit() method. It takes a list
+ * of pages in \a queue, splits it into per-stripe sub-lists, invokes
+ * cl_io_submit() on underlying devices to submit sub-lists, and then splices
+ * everything back.
+ *
+ * Major complication of this function is a need to handle memory cleansing:
+ * cl_io_submit() is called to write out pages as a part of VM memory
+ * reclamation, and hence it may not fail due to memory shortages (system
+ * dead-locks otherwise). To deal with this, some resources (sub-lists,
+ * sub-environment, etc.) are allocated per-device on "startup" (i.e., in a
+ * not-memory cleansing context), and in case of memory shortage, these
+ * pre-allocated resources are used by lov_io_submit() under
+ * lov_device::ld_mutex mutex.
+ */
+static int lov_io_submit(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ enum cl_req_type crt, struct cl_2queue *queue)
+{
+ struct lov_io *lio = cl2lov_io(env, ios);
+ struct lov_object *obj = lio->lis_object;
+ struct lov_device *ld = lu2lov_dev(lov2cl(obj)->co_lu.lo_dev);
+ struct cl_page_list *qin = &queue->c2_qin;
+ struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q;
+ struct cl_page_list *stripes_qin = NULL;
+ struct cl_page *page;
+ struct cl_page *tmp;
+ int stripe;
+
+#define QIN(stripe) lov_io_submit_qin(ld, stripes_qin, stripe, alloc)
+
+ int rc = 0;
+ int alloc =
+ !(current->flags & PF_MEMALLOC);
+ ENTRY;
+ if (lio->lis_active_subios == 1) {
+ int idx = lio->lis_single_subio_index;
+ struct lov_io_sub *sub;
+
+ LASSERT(idx < lio->lis_nr_subios);
+ sub = lov_sub_get(env, lio, idx);
+ LASSERT(!IS_ERR(sub));
+ LASSERT(sub->sub_io == &lio->lis_single_subio);
+ rc = cl_io_submit_rw(sub->sub_env, sub->sub_io,
+ crt, queue);
+ lov_sub_put(sub);
+ RETURN(rc);
+ }
+
+ LASSERT(lio->lis_subs != NULL);
+ if (alloc) {
+ OBD_ALLOC_LARGE(stripes_qin,
+ sizeof(*stripes_qin) * lio->lis_nr_subios);
+ if (stripes_qin == NULL)
+ RETURN(-ENOMEM);
+
+ for (stripe = 0; stripe < lio->lis_nr_subios; stripe++)
+ cl_page_list_init(&stripes_qin[stripe]);
+ } else {
+ /*
+ * If we get here, it means pageout & swap doesn't help.
+ * In order to not make things worse, even don't try to
+ * allocate the memory with __GFP_NOWARN. -jay
+ */
+ mutex_lock(&ld->ld_mutex);
+ lio->lis_mem_frozen = 1;
+ }
+
+ cl_2queue_init(cl2q);
+ cl_page_list_for_each_safe(page, tmp, qin) {
+ stripe = lov_page_stripe(page);
+ cl_page_list_move(QIN(stripe), qin, page);
+ }
+
+ for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) {
+ struct lov_io_sub *sub;
+ struct cl_page_list *sub_qin = QIN(stripe);
+
+ if (list_empty(&sub_qin->pl_pages))
+ continue;
+
+ cl_page_list_splice(sub_qin, &cl2q->c2_qin);
+ sub = lov_sub_get(env, lio, stripe);
+ if (!IS_ERR(sub)) {
+ rc = cl_io_submit_rw(sub->sub_env, sub->sub_io,
+ crt, cl2q);
+ lov_sub_put(sub);
+ } else
+ rc = PTR_ERR(sub);
+ cl_page_list_splice(&cl2q->c2_qin, &queue->c2_qin);
+ cl_page_list_splice(&cl2q->c2_qout, &queue->c2_qout);
+ if (rc != 0)
+ break;
+ }
+
+ for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) {
+ struct cl_page_list *sub_qin = QIN(stripe);
+
+ if (list_empty(&sub_qin->pl_pages))
+ continue;
+
+ cl_page_list_splice(sub_qin, qin);
+ }
+
+ if (alloc) {
+ OBD_FREE_LARGE(stripes_qin,
+ sizeof(*stripes_qin) * lio->lis_nr_subios);
+ } else {
+ int i;
+
+ for (i = 0; i < lio->lis_nr_subios; i++) {
+ struct cl_io *cio = lio->lis_subs[i].sub_io;
+
+ if (cio && cio == &ld->ld_emrg[i]->emrg_subio)
+ lov_io_sub_fini(env, lio, &lio->lis_subs[i]);
+ }
+ lio->lis_mem_frozen = 0;
+ mutex_unlock(&ld->ld_mutex);
+ }
+
+ RETURN(rc);
+#undef QIN
+}
+
+static int lov_io_prepare_write(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ const struct cl_page_slice *slice,
+ unsigned from, unsigned to)
+{
+ struct lov_io *lio = cl2lov_io(env, ios);
+ struct cl_page *sub_page = lov_sub_page(slice);
+ struct lov_io_sub *sub;
+ int result;
+
+ ENTRY;
+ sub = lov_page_subio(env, lio, slice);
+ if (!IS_ERR(sub)) {
+ result = cl_io_prepare_write(sub->sub_env, sub->sub_io,
+ sub_page, from, to);
+ lov_sub_put(sub);
+ } else
+ result = PTR_ERR(sub);
+ RETURN(result);
+}
+
+static int lov_io_commit_write(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ const struct cl_page_slice *slice,
+ unsigned from, unsigned to)
+{
+ struct lov_io *lio = cl2lov_io(env, ios);
+ struct cl_page *sub_page = lov_sub_page(slice);
+ struct lov_io_sub *sub;
+ int result;
+
+ ENTRY;
+ sub = lov_page_subio(env, lio, slice);
+ if (!IS_ERR(sub)) {
+ result = cl_io_commit_write(sub->sub_env, sub->sub_io,
+ sub_page, from, to);
+ lov_sub_put(sub);
+ } else
+ result = PTR_ERR(sub);
+ RETURN(result);
+}
+
+static int lov_io_fault_start(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct cl_fault_io *fio;
+ struct lov_io *lio;
+ struct lov_io_sub *sub;
+
+ ENTRY;
+ fio = &ios->cis_io->u.ci_fault;
+ lio = cl2lov_io(env, ios);
+ sub = lov_sub_get(env, lio, lov_page_stripe(fio->ft_page));
+ sub->sub_io->u.ci_fault.ft_nob = fio->ft_nob;
+ lov_sub_put(sub);
+ RETURN(lov_io_start(env, ios));
+}
+
+static void lov_io_fsync_end(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct lov_io *lio = cl2lov_io(env, ios);
+ struct lov_io_sub *sub;
+ unsigned int *written = &ios->cis_io->u.ci_fsync.fi_nr_written;
+ ENTRY;
+
+ *written = 0;
+ list_for_each_entry(sub, &lio->lis_active, sub_linkage) {
+ struct cl_io *subio = sub->sub_io;
+
+ lov_sub_enter(sub);
+ lov_io_end_wrapper(sub->sub_env, subio);
+ lov_sub_exit(sub);
+
+ if (subio->ci_result == 0)
+ *written += subio->u.ci_fsync.fi_nr_written;
+ }
+ RETURN_EXIT;
+}
+
+static const struct cl_io_operations lov_io_ops = {
+ .op = {
+ [CIT_READ] = {
+ .cio_fini = lov_io_fini,
+ .cio_iter_init = lov_io_rw_iter_init,
+ .cio_iter_fini = lov_io_iter_fini,
+ .cio_lock = lov_io_lock,
+ .cio_unlock = lov_io_unlock,
+ .cio_start = lov_io_start,
+ .cio_end = lov_io_end
+ },
+ [CIT_WRITE] = {
+ .cio_fini = lov_io_fini,
+ .cio_iter_init = lov_io_rw_iter_init,
+ .cio_iter_fini = lov_io_iter_fini,
+ .cio_lock = lov_io_lock,
+ .cio_unlock = lov_io_unlock,
+ .cio_start = lov_io_start,
+ .cio_end = lov_io_end
+ },
+ [CIT_SETATTR] = {
+ .cio_fini = lov_io_fini,
+ .cio_iter_init = lov_io_iter_init,
+ .cio_iter_fini = lov_io_iter_fini,
+ .cio_lock = lov_io_lock,
+ .cio_unlock = lov_io_unlock,
+ .cio_start = lov_io_start,
+ .cio_end = lov_io_end
+ },
+ [CIT_FAULT] = {
+ .cio_fini = lov_io_fini,
+ .cio_iter_init = lov_io_iter_init,
+ .cio_iter_fini = lov_io_iter_fini,
+ .cio_lock = lov_io_lock,
+ .cio_unlock = lov_io_unlock,
+ .cio_start = lov_io_fault_start,
+ .cio_end = lov_io_end
+ },
+ [CIT_FSYNC] = {
+ .cio_fini = lov_io_fini,
+ .cio_iter_init = lov_io_iter_init,
+ .cio_iter_fini = lov_io_iter_fini,
+ .cio_lock = lov_io_lock,
+ .cio_unlock = lov_io_unlock,
+ .cio_start = lov_io_start,
+ .cio_end = lov_io_fsync_end
+ },
+ [CIT_MISC] = {
+ .cio_fini = lov_io_fini
+ }
+ },
+ .req_op = {
+ [CRT_READ] = {
+ .cio_submit = lov_io_submit
+ },
+ [CRT_WRITE] = {
+ .cio_submit = lov_io_submit
+ }
+ },
+ .cio_prepare_write = lov_io_prepare_write,
+ .cio_commit_write = lov_io_commit_write
+};
+
+/*****************************************************************************
+ *
+ * Empty lov io operations.
+ *
+ */
+
+static void lov_empty_io_fini(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct lov_object *lov = cl2lov(ios->cis_obj);
+ ENTRY;
+
+ if (atomic_dec_and_test(&lov->lo_active_ios))
+ wake_up_all(&lov->lo_waitq);
+ EXIT;
+}
+
+static void lov_empty_impossible(const struct lu_env *env,
+ struct cl_io_slice *ios)
+{
+ LBUG();
+}
+
+#define LOV_EMPTY_IMPOSSIBLE ((void *)lov_empty_impossible)
+
+/**
+ * An io operation vector for files without stripes.
+ */
+static const struct cl_io_operations lov_empty_io_ops = {
+ .op = {
+ [CIT_READ] = {
+ .cio_fini = lov_empty_io_fini,
+#if 0
+ .cio_iter_init = LOV_EMPTY_IMPOSSIBLE,
+ .cio_lock = LOV_EMPTY_IMPOSSIBLE,
+ .cio_start = LOV_EMPTY_IMPOSSIBLE,
+ .cio_end = LOV_EMPTY_IMPOSSIBLE
+#endif
+ },
+ [CIT_WRITE] = {
+ .cio_fini = lov_empty_io_fini,
+ .cio_iter_init = LOV_EMPTY_IMPOSSIBLE,
+ .cio_lock = LOV_EMPTY_IMPOSSIBLE,
+ .cio_start = LOV_EMPTY_IMPOSSIBLE,
+ .cio_end = LOV_EMPTY_IMPOSSIBLE
+ },
+ [CIT_SETATTR] = {
+ .cio_fini = lov_empty_io_fini,
+ .cio_iter_init = LOV_EMPTY_IMPOSSIBLE,
+ .cio_lock = LOV_EMPTY_IMPOSSIBLE,
+ .cio_start = LOV_EMPTY_IMPOSSIBLE,
+ .cio_end = LOV_EMPTY_IMPOSSIBLE
+ },
+ [CIT_FAULT] = {
+ .cio_fini = lov_empty_io_fini,
+ .cio_iter_init = LOV_EMPTY_IMPOSSIBLE,
+ .cio_lock = LOV_EMPTY_IMPOSSIBLE,
+ .cio_start = LOV_EMPTY_IMPOSSIBLE,
+ .cio_end = LOV_EMPTY_IMPOSSIBLE
+ },
+ [CIT_FSYNC] = {
+ .cio_fini = lov_empty_io_fini
+ },
+ [CIT_MISC] = {
+ .cio_fini = lov_empty_io_fini
+ }
+ },
+ .req_op = {
+ [CRT_READ] = {
+ .cio_submit = LOV_EMPTY_IMPOSSIBLE
+ },
+ [CRT_WRITE] = {
+ .cio_submit = LOV_EMPTY_IMPOSSIBLE
+ }
+ },
+ .cio_commit_write = LOV_EMPTY_IMPOSSIBLE
+};
+
+int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io)
+{
+ struct lov_io *lio = lov_env_io(env);
+ struct lov_object *lov = cl2lov(obj);
+
+ ENTRY;
+ INIT_LIST_HEAD(&lio->lis_active);
+ lov_io_slice_init(lio, lov, io);
+ if (io->ci_result == 0) {
+ io->ci_result = lov_io_subio_init(env, lio, io);
+ if (io->ci_result == 0) {
+ cl_io_slice_add(io, &lio->lis_cl, obj, &lov_io_ops);
+ atomic_inc(&lov->lo_active_ios);
+ }
+ }
+ RETURN(io->ci_result);
+}
+
+int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io)
+{
+ struct lov_object *lov = cl2lov(obj);
+ struct lov_io *lio = lov_env_io(env);
+ int result;
+ ENTRY;
+
+ lio->lis_object = lov;
+ switch (io->ci_type) {
+ default:
+ LBUG();
+ case CIT_MISC:
+ case CIT_READ:
+ result = 0;
+ break;
+ case CIT_FSYNC:
+ case CIT_SETATTR:
+ result = +1;
+ break;
+ case CIT_WRITE:
+ result = -EBADF;
+ break;
+ case CIT_FAULT:
+ result = -EFAULT;
+ CERROR("Page fault on a file without stripes: "DFID"\n",
+ PFID(lu_object_fid(&obj->co_lu)));
+ break;
+ }
+ if (result == 0) {
+ cl_io_slice_add(io, &lio->lis_cl, obj, &lov_empty_io_ops);
+ atomic_inc(&lov->lo_active_ios);
+ }
+
+ io->ci_result = result < 0 ? result : 0;
+ RETURN(result != 0);
+}
+
+/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_lock.c b/drivers/staging/lustre/lustre/lov/lov_lock.c
new file mode 100644
index 000000000000..bdf3334e0c9f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_lock.c
@@ -0,0 +1,1253 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_lock for LOV layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include "lov_cl_internal.h"
+
+/** \addtogroup lov
+ * @{
+ */
+
+static struct cl_lock_closure *lov_closure_get(const struct lu_env *env,
+ struct cl_lock *parent);
+
+static int lov_lock_unuse(const struct lu_env *env,
+ const struct cl_lock_slice *slice);
+/*****************************************************************************
+ *
+ * Lov lock operations.
+ *
+ */
+
+static struct lov_sublock_env *lov_sublock_env_get(const struct lu_env *env,
+ struct cl_lock *parent,
+ struct lov_lock_sub *lls)
+{
+ struct lov_sublock_env *subenv;
+ struct lov_io *lio = lov_env_io(env);
+ struct cl_io *io = lio->lis_cl.cis_io;
+ struct lov_io_sub *sub;
+
+ subenv = &lov_env_session(env)->ls_subenv;
+
+ /*
+ * FIXME: We tend to use the subio's env & io to call the sublock
+ * lock operations because osc lock sometimes stores some control
+ * variables in thread's IO infomation(Now only lockless information).
+ * However, if the lock's host(object) is different from the object
+ * for current IO, we have no way to get the subenv and subio because
+ * they are not initialized at all. As a temp fix, in this case,
+ * we still borrow the parent's env to call sublock operations.
+ */
+ if (!io || !cl_object_same(io->ci_obj, parent->cll_descr.cld_obj)) {
+ subenv->lse_env = env;
+ subenv->lse_io = io;
+ subenv->lse_sub = NULL;
+ } else {
+ sub = lov_sub_get(env, lio, lls->sub_stripe);
+ if (!IS_ERR(sub)) {
+ subenv->lse_env = sub->sub_env;
+ subenv->lse_io = sub->sub_io;
+ subenv->lse_sub = sub;
+ } else {
+ subenv = (void*)sub;
+ }
+ }
+ return subenv;
+}
+
+static void lov_sublock_env_put(struct lov_sublock_env *subenv)
+{
+ if (subenv && subenv->lse_sub)
+ lov_sub_put(subenv->lse_sub);
+}
+
+static void lov_sublock_adopt(const struct lu_env *env, struct lov_lock *lck,
+ struct cl_lock *sublock, int idx,
+ struct lov_lock_link *link)
+{
+ struct lovsub_lock *lsl;
+ struct cl_lock *parent = lck->lls_cl.cls_lock;
+ int rc;
+
+ LASSERT(cl_lock_is_mutexed(parent));
+ LASSERT(cl_lock_is_mutexed(sublock));
+ ENTRY;
+
+ lsl = cl2sub_lock(sublock);
+ /*
+ * check that sub-lock doesn't have lock link to this top-lock.
+ */
+ LASSERT(lov_lock_link_find(env, lck, lsl) == NULL);
+ LASSERT(idx < lck->lls_nr);
+
+ lck->lls_sub[idx].sub_lock = lsl;
+ lck->lls_nr_filled++;
+ LASSERT(lck->lls_nr_filled <= lck->lls_nr);
+ list_add_tail(&link->lll_list, &lsl->lss_parents);
+ link->lll_idx = idx;
+ link->lll_super = lck;
+ cl_lock_get(parent);
+ lu_ref_add(&parent->cll_reference, "lov-child", sublock);
+ lck->lls_sub[idx].sub_flags |= LSF_HELD;
+ cl_lock_user_add(env, sublock);
+
+ rc = lov_sublock_modify(env, lck, lsl, &sublock->cll_descr, idx);
+ LASSERT(rc == 0); /* there is no way this can fail, currently */
+ EXIT;
+}
+
+static struct cl_lock *lov_sublock_alloc(const struct lu_env *env,
+ const struct cl_io *io,
+ struct lov_lock *lck,
+ int idx, struct lov_lock_link **out)
+{
+ struct cl_lock *sublock;
+ struct cl_lock *parent;
+ struct lov_lock_link *link;
+
+ LASSERT(idx < lck->lls_nr);
+ ENTRY;
+
+ OBD_SLAB_ALLOC_PTR_GFP(link, lov_lock_link_kmem, __GFP_IO);
+ if (link != NULL) {
+ struct lov_sublock_env *subenv;
+ struct lov_lock_sub *lls;
+ struct cl_lock_descr *descr;
+
+ parent = lck->lls_cl.cls_lock;
+ lls = &lck->lls_sub[idx];
+ descr = &lls->sub_got;
+
+ subenv = lov_sublock_env_get(env, parent, lls);
+ if (!IS_ERR(subenv)) {
+ /* CAVEAT: Don't try to add a field in lov_lock_sub
+ * to remember the subio. This is because lock is able
+ * to be cached, but this is not true for IO. This
+ * further means a sublock might be referenced in
+ * different io context. -jay */
+
+ sublock = cl_lock_hold(subenv->lse_env, subenv->lse_io,
+ descr, "lov-parent", parent);
+ lov_sublock_env_put(subenv);
+ } else {
+ /* error occurs. */
+ sublock = (void*)subenv;
+ }
+
+ if (!IS_ERR(sublock))
+ *out = link;
+ else
+ OBD_SLAB_FREE_PTR(link, lov_lock_link_kmem);
+ } else
+ sublock = ERR_PTR(-ENOMEM);
+ RETURN(sublock);
+}
+
+static void lov_sublock_unlock(const struct lu_env *env,
+ struct lovsub_lock *lsl,
+ struct cl_lock_closure *closure,
+ struct lov_sublock_env *subenv)
+{
+ ENTRY;
+ lov_sublock_env_put(subenv);
+ lsl->lss_active = NULL;
+ cl_lock_disclosure(env, closure);
+ EXIT;
+}
+
+static int lov_sublock_lock(const struct lu_env *env,
+ struct lov_lock *lck,
+ struct lov_lock_sub *lls,
+ struct cl_lock_closure *closure,
+ struct lov_sublock_env **lsep)
+{
+ struct lovsub_lock *sublock;
+ struct cl_lock *child;
+ int result = 0;
+ ENTRY;
+
+ LASSERT(list_empty(&closure->clc_list));
+
+ sublock = lls->sub_lock;
+ child = sublock->lss_cl.cls_lock;
+ result = cl_lock_closure_build(env, child, closure);
+ if (result == 0) {
+ struct cl_lock *parent = closure->clc_origin;
+
+ LASSERT(cl_lock_is_mutexed(child));
+ sublock->lss_active = parent;
+
+ if (unlikely((child->cll_state == CLS_FREEING) ||
+ (child->cll_flags & CLF_CANCELLED))) {
+ struct lov_lock_link *link;
+ /*
+ * we could race with lock deletion which temporarily
+ * put the lock in freeing state, bug 19080.
+ */
+ LASSERT(!(lls->sub_flags & LSF_HELD));
+
+ link = lov_lock_link_find(env, lck, sublock);
+ LASSERT(link != NULL);
+ lov_lock_unlink(env, link, sublock);
+ lov_sublock_unlock(env, sublock, closure, NULL);
+ lck->lls_cancel_race = 1;
+ result = CLO_REPEAT;
+ } else if (lsep) {
+ struct lov_sublock_env *subenv;
+ subenv = lov_sublock_env_get(env, parent, lls);
+ if (IS_ERR(subenv)) {
+ lov_sublock_unlock(env, sublock,
+ closure, NULL);
+ result = PTR_ERR(subenv);
+ } else {
+ *lsep = subenv;
+ }
+ }
+ }
+ RETURN(result);
+}
+
+/**
+ * Updates the result of a top-lock operation from a result of sub-lock
+ * sub-operations. Top-operations like lov_lock_{enqueue,use,unuse}() iterate
+ * over sub-locks and lov_subresult() is used to calculate return value of a
+ * top-operation. To this end, possible return values of sub-operations are
+ * ordered as
+ *
+ * - 0 success
+ * - CLO_WAIT wait for event
+ * - CLO_REPEAT repeat top-operation
+ * - -ne fundamental error
+ *
+ * Top-level return code can only go down through this list. CLO_REPEAT
+ * overwrites CLO_WAIT, because lock mutex was released and sleeping condition
+ * has to be rechecked by the upper layer.
+ */
+static int lov_subresult(int result, int rc)
+{
+ int result_rank;
+ int rc_rank;
+
+ ENTRY;
+
+ LASSERTF(result <= 0 || result == CLO_REPEAT || result == CLO_WAIT,
+ "result = %d", result);
+ LASSERTF(rc <= 0 || rc == CLO_REPEAT || rc == CLO_WAIT,
+ "rc = %d\n", rc);
+ CLASSERT(CLO_WAIT < CLO_REPEAT);
+
+ /* calculate ranks in the ordering above */
+ result_rank = result < 0 ? 1 + CLO_REPEAT : result;
+ rc_rank = rc < 0 ? 1 + CLO_REPEAT : rc;
+
+ if (result_rank < rc_rank)
+ result = rc;
+ RETURN(result);
+}
+
+/**
+ * Creates sub-locks for a given lov_lock for the first time.
+ *
+ * Goes through all sub-objects of top-object, and creates sub-locks on every
+ * sub-object intersecting with top-lock extent. This is complicated by the
+ * fact that top-lock (that is being created) can be accessed concurrently
+ * through already created sub-locks (possibly shared with other top-locks).
+ */
+static int lov_lock_sub_init(const struct lu_env *env,
+ struct lov_lock *lck, const struct cl_io *io)
+{
+ int result = 0;
+ int i;
+ int nr;
+ obd_off start;
+ obd_off end;
+ obd_off file_start;
+ obd_off file_end;
+
+ struct lov_object *loo = cl2lov(lck->lls_cl.cls_obj);
+ struct lov_layout_raid0 *r0 = lov_r0(loo);
+ struct cl_lock *parent = lck->lls_cl.cls_lock;
+
+ ENTRY;
+
+ lck->lls_orig = parent->cll_descr;
+ file_start = cl_offset(lov2cl(loo), parent->cll_descr.cld_start);
+ file_end = cl_offset(lov2cl(loo), parent->cll_descr.cld_end + 1) - 1;
+
+ for (i = 0, nr = 0; i < r0->lo_nr; i++) {
+ /*
+ * XXX for wide striping smarter algorithm is desirable,
+ * breaking out of the loop, early.
+ */
+ if (lov_stripe_intersects(loo->lo_lsm, i,
+ file_start, file_end, &start, &end))
+ nr++;
+ }
+ LASSERT(nr > 0);
+ OBD_ALLOC_LARGE(lck->lls_sub, nr * sizeof lck->lls_sub[0]);
+ if (lck->lls_sub == NULL)
+ RETURN(-ENOMEM);
+
+ lck->lls_nr = nr;
+ /*
+ * First, fill in sub-lock descriptions in
+ * lck->lls_sub[].sub_descr. They are used by lov_sublock_alloc()
+ * (called below in this function, and by lov_lock_enqueue()) to
+ * create sub-locks. At this moment, no other thread can access
+ * top-lock.
+ */
+ for (i = 0, nr = 0; i < r0->lo_nr; ++i) {
+ if (lov_stripe_intersects(loo->lo_lsm, i,
+ file_start, file_end, &start, &end)) {
+ struct cl_lock_descr *descr;
+
+ descr = &lck->lls_sub[nr].sub_descr;
+
+ LASSERT(descr->cld_obj == NULL);
+ descr->cld_obj = lovsub2cl(r0->lo_sub[i]);
+ descr->cld_start = cl_index(descr->cld_obj, start);
+ descr->cld_end = cl_index(descr->cld_obj, end);
+ descr->cld_mode = parent->cll_descr.cld_mode;
+ descr->cld_gid = parent->cll_descr.cld_gid;
+ descr->cld_enq_flags = parent->cll_descr.cld_enq_flags;
+ /* XXX has no effect */
+ lck->lls_sub[nr].sub_got = *descr;
+ lck->lls_sub[nr].sub_stripe = i;
+ nr++;
+ }
+ }
+ LASSERT(nr == lck->lls_nr);
+ /*
+ * Then, create sub-locks. Once at least one sub-lock was created,
+ * top-lock can be reached by other threads.
+ */
+ for (i = 0; i < lck->lls_nr; ++i) {
+ struct cl_lock *sublock;
+ struct lov_lock_link *link;
+
+ if (lck->lls_sub[i].sub_lock == NULL) {
+ sublock = lov_sublock_alloc(env, io, lck, i, &link);
+ if (IS_ERR(sublock)) {
+ result = PTR_ERR(sublock);
+ break;
+ }
+ cl_lock_get_trust(sublock);
+ cl_lock_mutex_get(env, sublock);
+ cl_lock_mutex_get(env, parent);
+ /*
+ * recheck under mutex that sub-lock wasn't created
+ * concurrently, and that top-lock is still alive.
+ */
+ if (lck->lls_sub[i].sub_lock == NULL &&
+ parent->cll_state < CLS_FREEING) {
+ lov_sublock_adopt(env, lck, sublock, i, link);
+ cl_lock_mutex_put(env, parent);
+ } else {
+ OBD_SLAB_FREE_PTR(link, lov_lock_link_kmem);
+ cl_lock_mutex_put(env, parent);
+ cl_lock_unhold(env, sublock,
+ "lov-parent", parent);
+ }
+ cl_lock_mutex_put(env, sublock);
+ cl_lock_put(env, sublock);
+ }
+ }
+ /*
+ * Some sub-locks can be missing at this point. This is not a problem,
+ * because enqueue will create them anyway. Main duty of this function
+ * is to fill in sub-lock descriptions in a race free manner.
+ */
+ RETURN(result);
+}
+
+static int lov_sublock_release(const struct lu_env *env, struct lov_lock *lck,
+ int i, int deluser, int rc)
+{
+ struct cl_lock *parent = lck->lls_cl.cls_lock;
+
+ LASSERT(cl_lock_is_mutexed(parent));
+ ENTRY;
+
+ if (lck->lls_sub[i].sub_flags & LSF_HELD) {
+ struct cl_lock *sublock;
+ int dying;
+
+ LASSERT(lck->lls_sub[i].sub_lock != NULL);
+ sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock;
+ LASSERT(cl_lock_is_mutexed(sublock));
+
+ lck->lls_sub[i].sub_flags &= ~LSF_HELD;
+ if (deluser)
+ cl_lock_user_del(env, sublock);
+ /*
+ * If the last hold is released, and cancellation is pending
+ * for a sub-lock, release parent mutex, to avoid keeping it
+ * while sub-lock is being paged out.
+ */
+ dying = (sublock->cll_descr.cld_mode == CLM_PHANTOM ||
+ sublock->cll_descr.cld_mode == CLM_GROUP ||
+ (sublock->cll_flags & (CLF_CANCELPEND|CLF_DOOMED))) &&
+ sublock->cll_holds == 1;
+ if (dying)
+ cl_lock_mutex_put(env, parent);
+ cl_lock_unhold(env, sublock, "lov-parent", parent);
+ if (dying) {
+ cl_lock_mutex_get(env, parent);
+ rc = lov_subresult(rc, CLO_REPEAT);
+ }
+ /*
+ * From now on lck->lls_sub[i].sub_lock is a "weak" pointer,
+ * not backed by a reference on a
+ * sub-lock. lovsub_lock_delete() will clear
+ * lck->lls_sub[i].sub_lock under semaphores, just before
+ * sub-lock is destroyed.
+ */
+ }
+ RETURN(rc);
+}
+
+static void lov_sublock_hold(const struct lu_env *env, struct lov_lock *lck,
+ int i)
+{
+ struct cl_lock *parent = lck->lls_cl.cls_lock;
+
+ LASSERT(cl_lock_is_mutexed(parent));
+ ENTRY;
+
+ if (!(lck->lls_sub[i].sub_flags & LSF_HELD)) {
+ struct cl_lock *sublock;
+
+ LASSERT(lck->lls_sub[i].sub_lock != NULL);
+ sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock;
+ LASSERT(cl_lock_is_mutexed(sublock));
+ LASSERT(sublock->cll_state != CLS_FREEING);
+
+ lck->lls_sub[i].sub_flags |= LSF_HELD;
+
+ cl_lock_get_trust(sublock);
+ cl_lock_hold_add(env, sublock, "lov-parent", parent);
+ cl_lock_user_add(env, sublock);
+ cl_lock_put(env, sublock);
+ }
+ EXIT;
+}
+
+static void lov_lock_fini(const struct lu_env *env,
+ struct cl_lock_slice *slice)
+{
+ struct lov_lock *lck;
+ int i;
+
+ ENTRY;
+ lck = cl2lov_lock(slice);
+ LASSERT(lck->lls_nr_filled == 0);
+ if (lck->lls_sub != NULL) {
+ for (i = 0; i < lck->lls_nr; ++i)
+ /*
+ * No sub-locks exists at this point, as sub-lock has
+ * a reference on its parent.
+ */
+ LASSERT(lck->lls_sub[i].sub_lock == NULL);
+ OBD_FREE_LARGE(lck->lls_sub,
+ lck->lls_nr * sizeof lck->lls_sub[0]);
+ }
+ OBD_SLAB_FREE_PTR(lck, lov_lock_kmem);
+ EXIT;
+}
+
+static int lov_lock_enqueue_wait(const struct lu_env *env,
+ struct lov_lock *lck,
+ struct cl_lock *sublock)
+{
+ struct cl_lock *lock = lck->lls_cl.cls_lock;
+ int result;
+ ENTRY;
+
+ LASSERT(cl_lock_is_mutexed(lock));
+
+ cl_lock_mutex_put(env, lock);
+ result = cl_lock_enqueue_wait(env, sublock, 0);
+ cl_lock_mutex_get(env, lock);
+ RETURN(result ?: CLO_REPEAT);
+}
+
+/**
+ * Tries to advance a state machine of a given sub-lock toward enqueuing of
+ * the top-lock.
+ *
+ * \retval 0 if state-transition can proceed
+ * \retval -ve otherwise.
+ */
+static int lov_lock_enqueue_one(const struct lu_env *env, struct lov_lock *lck,
+ struct cl_lock *sublock,
+ struct cl_io *io, __u32 enqflags, int last)
+{
+ int result;
+ ENTRY;
+
+ /* first, try to enqueue a sub-lock ... */
+ result = cl_enqueue_try(env, sublock, io, enqflags);
+ if ((sublock->cll_state == CLS_ENQUEUED) && !(enqflags & CEF_AGL)) {
+ /* if it is enqueued, try to `wait' on it---maybe it's already
+ * granted */
+ result = cl_wait_try(env, sublock);
+ if (result == CLO_REENQUEUED)
+ result = CLO_WAIT;
+ }
+ /*
+ * If CEF_ASYNC flag is set, then all sub-locks can be enqueued in
+ * parallel, otherwise---enqueue has to wait until sub-lock is granted
+ * before proceeding to the next one.
+ */
+ if ((result == CLO_WAIT) && (sublock->cll_state <= CLS_HELD) &&
+ (enqflags & CEF_ASYNC) && (!last || (enqflags & CEF_AGL)))
+ result = 0;
+ RETURN(result);
+}
+
+/**
+ * Helper function for lov_lock_enqueue() that creates missing sub-lock.
+ */
+static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent,
+ struct cl_io *io, struct lov_lock *lck, int idx)
+{
+ struct lov_lock_link *link;
+ struct cl_lock *sublock;
+ int result;
+
+ LASSERT(parent->cll_depth == 1);
+ cl_lock_mutex_put(env, parent);
+ sublock = lov_sublock_alloc(env, io, lck, idx, &link);
+ if (!IS_ERR(sublock))
+ cl_lock_mutex_get(env, sublock);
+ cl_lock_mutex_get(env, parent);
+
+ if (!IS_ERR(sublock)) {
+ cl_lock_get_trust(sublock);
+ if (parent->cll_state == CLS_QUEUING &&
+ lck->lls_sub[idx].sub_lock == NULL) {
+ lov_sublock_adopt(env, lck, sublock, idx, link);
+ } else {
+ OBD_SLAB_FREE_PTR(link, lov_lock_link_kmem);
+ /* other thread allocated sub-lock, or enqueue is no
+ * longer going on */
+ cl_lock_mutex_put(env, parent);
+ cl_lock_unhold(env, sublock, "lov-parent", parent);
+ cl_lock_mutex_get(env, parent);
+ }
+ cl_lock_mutex_put(env, sublock);
+ cl_lock_put(env, sublock);
+ result = CLO_REPEAT;
+ } else
+ result = PTR_ERR(sublock);
+ return result;
+}
+
+/**
+ * Implementation of cl_lock_operations::clo_enqueue() for lov layer. This
+ * function is rather subtle, as it enqueues top-lock (i.e., advances top-lock
+ * state machine from CLS_QUEUING to CLS_ENQUEUED states) by juggling sub-lock
+ * state machines in the face of sub-locks sharing (by multiple top-locks),
+ * and concurrent sub-lock cancellations.
+ */
+static int lov_lock_enqueue(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ struct cl_io *io, __u32 enqflags)
+{
+ struct cl_lock *lock = slice->cls_lock;
+ struct lov_lock *lck = cl2lov_lock(slice);
+ struct cl_lock_closure *closure = lov_closure_get(env, lock);
+ int i;
+ int result;
+ enum cl_lock_state minstate;
+
+ ENTRY;
+
+ for (result = 0, minstate = CLS_FREEING, i = 0; i < lck->lls_nr; ++i) {
+ int rc;
+ struct lovsub_lock *sub;
+ struct lov_lock_sub *lls;
+ struct cl_lock *sublock;
+ struct lov_sublock_env *subenv;
+
+ if (lock->cll_state != CLS_QUEUING) {
+ /*
+ * Lock might have left QUEUING state if previous
+ * iteration released its mutex. Stop enqueing in this
+ * case and let the upper layer to decide what to do.
+ */
+ LASSERT(i > 0 && result != 0);
+ break;
+ }
+
+ lls = &lck->lls_sub[i];
+ sub = lls->sub_lock;
+ /*
+ * Sub-lock might have been canceled, while top-lock was
+ * cached.
+ */
+ if (sub == NULL) {
+ result = lov_sublock_fill(env, lock, io, lck, i);
+ /* lov_sublock_fill() released @lock mutex,
+ * restart. */
+ break;
+ }
+ sublock = sub->lss_cl.cls_lock;
+ rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
+ if (rc == 0) {
+ lov_sublock_hold(env, lck, i);
+ rc = lov_lock_enqueue_one(subenv->lse_env, lck, sublock,
+ subenv->lse_io, enqflags,
+ i == lck->lls_nr - 1);
+ minstate = min(minstate, sublock->cll_state);
+ if (rc == CLO_WAIT) {
+ switch (sublock->cll_state) {
+ case CLS_QUEUING:
+ /* take recursive mutex, the lock is
+ * released in lov_lock_enqueue_wait.
+ */
+ cl_lock_mutex_get(env, sublock);
+ lov_sublock_unlock(env, sub, closure,
+ subenv);
+ rc = lov_lock_enqueue_wait(env, lck,
+ sublock);
+ break;
+ case CLS_CACHED:
+ cl_lock_get(sublock);
+ /* take recursive mutex of sublock */
+ cl_lock_mutex_get(env, sublock);
+ /* need to release all locks in closure
+ * otherwise it may deadlock. LU-2683.*/
+ lov_sublock_unlock(env, sub, closure,
+ subenv);
+ /* sublock and parent are held. */
+ rc = lov_sublock_release(env, lck, i,
+ 1, rc);
+ cl_lock_mutex_put(env, sublock);
+ cl_lock_put(env, sublock);
+ break;
+ default:
+ lov_sublock_unlock(env, sub, closure,
+ subenv);
+ break;
+ }
+ } else {
+ LASSERT(sublock->cll_conflict == NULL);
+ lov_sublock_unlock(env, sub, closure, subenv);
+ }
+ }
+ result = lov_subresult(result, rc);
+ if (result != 0)
+ break;
+ }
+ cl_lock_closure_fini(closure);
+ RETURN(result ?: minstate >= CLS_ENQUEUED ? 0 : CLO_WAIT);
+}
+
+static int lov_lock_unuse(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct lov_lock *lck = cl2lov_lock(slice);
+ struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
+ int i;
+ int result;
+
+ ENTRY;
+
+ for (result = 0, i = 0; i < lck->lls_nr; ++i) {
+ int rc;
+ struct lovsub_lock *sub;
+ struct cl_lock *sublock;
+ struct lov_lock_sub *lls;
+ struct lov_sublock_env *subenv;
+
+ /* top-lock state cannot change concurrently, because single
+ * thread (one that released the last hold) carries unlocking
+ * to the completion. */
+ LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
+ lls = &lck->lls_sub[i];
+ sub = lls->sub_lock;
+ if (sub == NULL)
+ continue;
+
+ sublock = sub->lss_cl.cls_lock;
+ rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
+ if (rc == 0) {
+ if (lls->sub_flags & LSF_HELD) {
+ LASSERT(sublock->cll_state == CLS_HELD ||
+ sublock->cll_state == CLS_ENQUEUED);
+ rc = cl_unuse_try(subenv->lse_env, sublock);
+ rc = lov_sublock_release(env, lck, i, 0, rc);
+ }
+ lov_sublock_unlock(env, sub, closure, subenv);
+ }
+ result = lov_subresult(result, rc);
+ }
+
+ if (result == 0 && lck->lls_cancel_race) {
+ lck->lls_cancel_race = 0;
+ result = -ESTALE;
+ }
+ cl_lock_closure_fini(closure);
+ RETURN(result);
+}
+
+
+static void lov_lock_cancel(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct lov_lock *lck = cl2lov_lock(slice);
+ struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
+ int i;
+ int result;
+
+ ENTRY;
+
+ for (result = 0, i = 0; i < lck->lls_nr; ++i) {
+ int rc;
+ struct lovsub_lock *sub;
+ struct cl_lock *sublock;
+ struct lov_lock_sub *lls;
+ struct lov_sublock_env *subenv;
+
+ /* top-lock state cannot change concurrently, because single
+ * thread (one that released the last hold) carries unlocking
+ * to the completion. */
+ lls = &lck->lls_sub[i];
+ sub = lls->sub_lock;
+ if (sub == NULL)
+ continue;
+
+ sublock = sub->lss_cl.cls_lock;
+ rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
+ if (rc == 0) {
+ if (!(lls->sub_flags & LSF_HELD)) {
+ lov_sublock_unlock(env, sub, closure, subenv);
+ continue;
+ }
+
+ switch(sublock->cll_state) {
+ case CLS_HELD:
+ rc = cl_unuse_try(subenv->lse_env, sublock);
+ lov_sublock_release(env, lck, i, 0, 0);
+ break;
+ default:
+ lov_sublock_release(env, lck, i, 1, 0);
+ break;
+ }
+ lov_sublock_unlock(env, sub, closure, subenv);
+ }
+
+ if (rc == CLO_REPEAT) {
+ --i;
+ continue;
+ }
+
+ result = lov_subresult(result, rc);
+ }
+
+ if (result)
+ CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock,
+ "lov_lock_cancel fails with %d.\n", result);
+
+ cl_lock_closure_fini(closure);
+}
+
+static int lov_lock_wait(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct lov_lock *lck = cl2lov_lock(slice);
+ struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
+ enum cl_lock_state minstate;
+ int reenqueued;
+ int result;
+ int i;
+
+ ENTRY;
+
+again:
+ for (result = 0, minstate = CLS_FREEING, i = 0, reenqueued = 0;
+ i < lck->lls_nr; ++i) {
+ int rc;
+ struct lovsub_lock *sub;
+ struct cl_lock *sublock;
+ struct lov_lock_sub *lls;
+ struct lov_sublock_env *subenv;
+
+ lls = &lck->lls_sub[i];
+ sub = lls->sub_lock;
+ LASSERT(sub != NULL);
+ sublock = sub->lss_cl.cls_lock;
+ rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
+ if (rc == 0) {
+ LASSERT(sublock->cll_state >= CLS_ENQUEUED);
+ if (sublock->cll_state < CLS_HELD)
+ rc = cl_wait_try(env, sublock);
+
+ minstate = min(minstate, sublock->cll_state);
+ lov_sublock_unlock(env, sub, closure, subenv);
+ }
+ if (rc == CLO_REENQUEUED) {
+ reenqueued++;
+ rc = 0;
+ }
+ result = lov_subresult(result, rc);
+ if (result != 0)
+ break;
+ }
+ /* Each sublock only can be reenqueued once, so will not loop for
+ * ever. */
+ if (result == 0 && reenqueued != 0)
+ goto again;
+ cl_lock_closure_fini(closure);
+ RETURN(result ?: minstate >= CLS_HELD ? 0 : CLO_WAIT);
+}
+
+static int lov_lock_use(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct lov_lock *lck = cl2lov_lock(slice);
+ struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
+ int result;
+ int i;
+
+ LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
+ ENTRY;
+
+ for (result = 0, i = 0; i < lck->lls_nr; ++i) {
+ int rc;
+ struct lovsub_lock *sub;
+ struct cl_lock *sublock;
+ struct lov_lock_sub *lls;
+ struct lov_sublock_env *subenv;
+
+ LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
+
+ lls = &lck->lls_sub[i];
+ sub = lls->sub_lock;
+ if (sub == NULL) {
+ /*
+ * Sub-lock might have been canceled, while top-lock was
+ * cached.
+ */
+ result = -ESTALE;
+ break;
+ }
+
+ sublock = sub->lss_cl.cls_lock;
+ rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
+ if (rc == 0) {
+ LASSERT(sublock->cll_state != CLS_FREEING);
+ lov_sublock_hold(env, lck, i);
+ if (sublock->cll_state == CLS_CACHED) {
+ rc = cl_use_try(subenv->lse_env, sublock, 0);
+ if (rc != 0)
+ rc = lov_sublock_release(env, lck,
+ i, 1, rc);
+ } else if (sublock->cll_state == CLS_NEW) {
+ /* Sub-lock might have been canceled, while
+ * top-lock was cached. */
+ result = -ESTALE;
+ lov_sublock_release(env, lck, i, 1, result);
+ }
+ lov_sublock_unlock(env, sub, closure, subenv);
+ }
+ result = lov_subresult(result, rc);
+ if (result != 0)
+ break;
+ }
+
+ if (lck->lls_cancel_race) {
+ /*
+ * If there is unlocking happened at the same time, then
+ * sublock_lock state should be FREEING, and lov_sublock_lock
+ * should return CLO_REPEAT. In this case, it should return
+ * ESTALE, and up layer should reset the lock state to be NEW.
+ */
+ lck->lls_cancel_race = 0;
+ LASSERT(result != 0);
+ result = -ESTALE;
+ }
+ cl_lock_closure_fini(closure);
+ RETURN(result);
+}
+
+#if 0
+static int lock_lock_multi_match()
+{
+ struct cl_lock *lock = slice->cls_lock;
+ struct cl_lock_descr *subneed = &lov_env_info(env)->lti_ldescr;
+ struct lov_object *loo = cl2lov(lov->lls_cl.cls_obj);
+ struct lov_layout_raid0 *r0 = lov_r0(loo);
+ struct lov_lock_sub *sub;
+ struct cl_object *subobj;
+ obd_off fstart;
+ obd_off fend;
+ obd_off start;
+ obd_off end;
+ int i;
+
+ fstart = cl_offset(need->cld_obj, need->cld_start);
+ fend = cl_offset(need->cld_obj, need->cld_end + 1) - 1;
+ subneed->cld_mode = need->cld_mode;
+ cl_lock_mutex_get(env, lock);
+ for (i = 0; i < lov->lls_nr; ++i) {
+ sub = &lov->lls_sub[i];
+ if (sub->sub_lock == NULL)
+ continue;
+ subobj = sub->sub_descr.cld_obj;
+ if (!lov_stripe_intersects(loo->lo_lsm, sub->sub_stripe,
+ fstart, fend, &start, &end))
+ continue;
+ subneed->cld_start = cl_index(subobj, start);
+ subneed->cld_end = cl_index(subobj, end);
+ subneed->cld_obj = subobj;
+ if (!cl_lock_ext_match(&sub->sub_got, subneed)) {
+ result = 0;
+ break;
+ }
+ }
+ cl_lock_mutex_put(env, lock);
+}
+#endif
+
+/**
+ * Check if the extent region \a descr is covered by \a child against the
+ * specific \a stripe.
+ */
+static int lov_lock_stripe_is_matching(const struct lu_env *env,
+ struct lov_object *lov, int stripe,
+ const struct cl_lock_descr *child,
+ const struct cl_lock_descr *descr)
+{
+ struct lov_stripe_md *lsm = lov->lo_lsm;
+ obd_off start;
+ obd_off end;
+ int result;
+
+ if (lov_r0(lov)->lo_nr == 1)
+ return cl_lock_ext_match(child, descr);
+
+ /*
+ * For a multi-stripes object:
+ * - make sure the descr only covers child's stripe, and
+ * - check if extent is matching.
+ */
+ start = cl_offset(&lov->lo_cl, descr->cld_start);
+ end = cl_offset(&lov->lo_cl, descr->cld_end + 1) - 1;
+ result = end - start <= lsm->lsm_stripe_size &&
+ stripe == lov_stripe_number(lsm, start) &&
+ stripe == lov_stripe_number(lsm, end);
+ if (result) {
+ struct cl_lock_descr *subd = &lov_env_info(env)->lti_ldescr;
+ obd_off sub_start;
+ obd_off sub_end;
+
+ subd->cld_obj = NULL; /* don't need sub object at all */
+ subd->cld_mode = descr->cld_mode;
+ subd->cld_gid = descr->cld_gid;
+ result = lov_stripe_intersects(lsm, stripe, start, end,
+ &sub_start, &sub_end);
+ LASSERT(result);
+ subd->cld_start = cl_index(child->cld_obj, sub_start);
+ subd->cld_end = cl_index(child->cld_obj, sub_end);
+ result = cl_lock_ext_match(child, subd);
+ }
+ return result;
+}
+
+/**
+ * An implementation of cl_lock_operations::clo_fits_into() method.
+ *
+ * Checks whether a lock (given by \a slice) is suitable for \a
+ * io. Multi-stripe locks can be used only for "quick" io, like truncate, or
+ * O_APPEND write.
+ *
+ * \see ccc_lock_fits_into().
+ */
+static int lov_lock_fits_into(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ const struct cl_lock_descr *need,
+ const struct cl_io *io)
+{
+ struct lov_lock *lov = cl2lov_lock(slice);
+ struct lov_object *obj = cl2lov(slice->cls_obj);
+ int result;
+
+ LASSERT(cl_object_same(need->cld_obj, slice->cls_obj));
+ LASSERT(lov->lls_nr > 0);
+
+ ENTRY;
+
+ /* for top lock, it's necessary to match enq flags otherwise it will
+ * run into problem if a sublock is missing and reenqueue. */
+ if (need->cld_enq_flags != lov->lls_orig.cld_enq_flags)
+ return 0;
+
+ if (need->cld_mode == CLM_GROUP)
+ /*
+ * always allow to match group lock.
+ */
+ result = cl_lock_ext_match(&lov->lls_orig, need);
+ else if (lov->lls_nr == 1) {
+ struct cl_lock_descr *got = &lov->lls_sub[0].sub_got;
+ result = lov_lock_stripe_is_matching(env,
+ cl2lov(slice->cls_obj),
+ lov->lls_sub[0].sub_stripe,
+ got, need);
+ } else if (io->ci_type != CIT_SETATTR && io->ci_type != CIT_MISC &&
+ !cl_io_is_append(io) && need->cld_mode != CLM_PHANTOM)
+ /*
+ * Multi-stripe locks are only suitable for `quick' IO and for
+ * glimpse.
+ */
+ result = 0;
+ else
+ /*
+ * Most general case: multi-stripe existing lock, and
+ * (potentially) multi-stripe @need lock. Check that @need is
+ * covered by @lov's sub-locks.
+ *
+ * For now, ignore lock expansions made by the server, and
+ * match against original lock extent.
+ */
+ result = cl_lock_ext_match(&lov->lls_orig, need);
+ CDEBUG(D_DLMTRACE, DDESCR"/"DDESCR" %d %d/%d: %d\n",
+ PDESCR(&lov->lls_orig), PDESCR(&lov->lls_sub[0].sub_got),
+ lov->lls_sub[0].sub_stripe, lov->lls_nr, lov_r0(obj)->lo_nr,
+ result);
+ RETURN(result);
+}
+
+void lov_lock_unlink(const struct lu_env *env,
+ struct lov_lock_link *link, struct lovsub_lock *sub)
+{
+ struct lov_lock *lck = link->lll_super;
+ struct cl_lock *parent = lck->lls_cl.cls_lock;
+
+ LASSERT(cl_lock_is_mutexed(parent));
+ LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock));
+ ENTRY;
+
+ list_del_init(&link->lll_list);
+ LASSERT(lck->lls_sub[link->lll_idx].sub_lock == sub);
+ /* yank this sub-lock from parent's array */
+ lck->lls_sub[link->lll_idx].sub_lock = NULL;
+ LASSERT(lck->lls_nr_filled > 0);
+ lck->lls_nr_filled--;
+ lu_ref_del(&parent->cll_reference, "lov-child", sub->lss_cl.cls_lock);
+ cl_lock_put(env, parent);
+ OBD_SLAB_FREE_PTR(link, lov_lock_link_kmem);
+ EXIT;
+}
+
+struct lov_lock_link *lov_lock_link_find(const struct lu_env *env,
+ struct lov_lock *lck,
+ struct lovsub_lock *sub)
+{
+ struct lov_lock_link *scan;
+
+ LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock));
+ ENTRY;
+
+ list_for_each_entry(scan, &sub->lss_parents, lll_list) {
+ if (scan->lll_super == lck)
+ RETURN(scan);
+ }
+ RETURN(NULL);
+}
+
+/**
+ * An implementation of cl_lock_operations::clo_delete() method. This is
+ * invoked for "top-to-bottom" delete, when lock destruction starts from the
+ * top-lock, e.g., as a result of inode destruction.
+ *
+ * Unlinks top-lock from all its sub-locks. Sub-locks are not deleted there:
+ * this is done separately elsewhere:
+ *
+ * - for inode destruction, lov_object_delete() calls cl_object_kill() for
+ * each sub-object, purging its locks;
+ *
+ * - in other cases (e.g., a fatal error with a top-lock) sub-locks are
+ * left in the cache.
+ */
+static void lov_lock_delete(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct lov_lock *lck = cl2lov_lock(slice);
+ struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
+ struct lov_lock_link *link;
+ int rc;
+ int i;
+
+ LASSERT(slice->cls_lock->cll_state == CLS_FREEING);
+ ENTRY;
+
+ for (i = 0; i < lck->lls_nr; ++i) {
+ struct lov_lock_sub *lls = &lck->lls_sub[i];
+ struct lovsub_lock *lsl = lls->sub_lock;
+
+ if (lsl == NULL) /* already removed */
+ continue;
+
+ rc = lov_sublock_lock(env, lck, lls, closure, NULL);
+ if (rc == CLO_REPEAT) {
+ --i;
+ continue;
+ }
+
+ LASSERT(rc == 0);
+ LASSERT(lsl->lss_cl.cls_lock->cll_state < CLS_FREEING);
+
+ if (lls->sub_flags & LSF_HELD)
+ lov_sublock_release(env, lck, i, 1, 0);
+
+ link = lov_lock_link_find(env, lck, lsl);
+ LASSERT(link != NULL);
+ lov_lock_unlink(env, link, lsl);
+ LASSERT(lck->lls_sub[i].sub_lock == NULL);
+
+ lov_sublock_unlock(env, lsl, closure, NULL);
+ }
+
+ cl_lock_closure_fini(closure);
+ EXIT;
+}
+
+static int lov_lock_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct cl_lock_slice *slice)
+{
+ struct lov_lock *lck = cl2lov_lock(slice);
+ int i;
+
+ (*p)(env, cookie, "%d\n", lck->lls_nr);
+ for (i = 0; i < lck->lls_nr; ++i) {
+ struct lov_lock_sub *sub;
+
+ sub = &lck->lls_sub[i];
+ (*p)(env, cookie, " %d %x: ", i, sub->sub_flags);
+ if (sub->sub_lock != NULL)
+ cl_lock_print(env, cookie, p,
+ sub->sub_lock->lss_cl.cls_lock);
+ else
+ (*p)(env, cookie, "---\n");
+ }
+ return 0;
+}
+
+static const struct cl_lock_operations lov_lock_ops = {
+ .clo_fini = lov_lock_fini,
+ .clo_enqueue = lov_lock_enqueue,
+ .clo_wait = lov_lock_wait,
+ .clo_use = lov_lock_use,
+ .clo_unuse = lov_lock_unuse,
+ .clo_cancel = lov_lock_cancel,
+ .clo_fits_into = lov_lock_fits_into,
+ .clo_delete = lov_lock_delete,
+ .clo_print = lov_lock_print
+};
+
+int lov_lock_init_raid0(const struct lu_env *env, struct cl_object *obj,
+ struct cl_lock *lock, const struct cl_io *io)
+{
+ struct lov_lock *lck;
+ int result;
+
+ ENTRY;
+ OBD_SLAB_ALLOC_PTR_GFP(lck, lov_lock_kmem, __GFP_IO);
+ if (lck != NULL) {
+ cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_lock_ops);
+ result = lov_lock_sub_init(env, lck, io);
+ } else
+ result = -ENOMEM;
+ RETURN(result);
+}
+
+static void lov_empty_lock_fini(const struct lu_env *env,
+ struct cl_lock_slice *slice)
+{
+ struct lov_lock *lck = cl2lov_lock(slice);
+ OBD_SLAB_FREE_PTR(lck, lov_lock_kmem);
+}
+
+static int lov_empty_lock_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct cl_lock_slice *slice)
+{
+ (*p)(env, cookie, "empty\n");
+ return 0;
+}
+
+/* XXX: more methods will be added later. */
+static const struct cl_lock_operations lov_empty_lock_ops = {
+ .clo_fini = lov_empty_lock_fini,
+ .clo_print = lov_empty_lock_print
+};
+
+int lov_lock_init_empty(const struct lu_env *env, struct cl_object *obj,
+ struct cl_lock *lock, const struct cl_io *io)
+{
+ struct lov_lock *lck;
+ int result = -ENOMEM;
+
+ ENTRY;
+ OBD_SLAB_ALLOC_PTR_GFP(lck, lov_lock_kmem, __GFP_IO);
+ if (lck != NULL) {
+ cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_empty_lock_ops);
+ lck->lls_orig = lock->cll_descr;
+ result = 0;
+ }
+ RETURN(result);
+}
+
+static struct cl_lock_closure *lov_closure_get(const struct lu_env *env,
+ struct cl_lock *parent)
+{
+ struct cl_lock_closure *closure;
+
+ closure = &lov_env_info(env)->lti_closure;
+ LASSERT(list_empty(&closure->clc_list));
+ cl_lock_closure_init(env, closure, parent, 1);
+ return closure;
+}
+
+
+/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_log.c b/drivers/staging/lustre/lustre/lov/lov_log.c
new file mode 100644
index 000000000000..63b7f8d3182f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_log.c
@@ -0,0 +1,278 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lov/lov_log.c
+ *
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Mike Shaver <shaver@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+#include <linux/libcfs/libcfs.h>
+
+#include <obd_support.h>
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_dlm.h>
+#include <lustre_mds.h>
+#include <obd_class.h>
+#include <obd_lov.h>
+#include <obd_ost.h>
+#include <lprocfs_status.h>
+#include <lustre_log.h>
+
+#include "lov_internal.h"
+
+/* Add log records for each OSC that this object is striped over, and return
+ * cookies for each one. We _would_ have nice abstraction here, except that
+ * we need to keep cookies in stripe order, even if some are NULL, so that
+ * the right cookies are passed back to the right OSTs at the client side.
+ * Unset cookies should be all-zero (which will never occur naturally). */
+static int lov_llog_origin_add(const struct lu_env *env,
+ struct llog_ctxt *ctxt,
+ struct llog_rec_hdr *rec,
+ struct lov_stripe_md *lsm,
+ struct llog_cookie *logcookies, int numcookies)
+{
+ struct obd_device *obd = ctxt->loc_obd;
+ struct lov_obd *lov = &obd->u.lov;
+ int i, rc = 0, cookies = 0;
+ ENTRY;
+
+ LASSERTF(logcookies && numcookies >= lsm->lsm_stripe_count,
+ "logcookies %p, numcookies %d lsm->lsm_stripe_count %d \n",
+ logcookies, numcookies, lsm->lsm_stripe_count);
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ struct lov_oinfo *loi = lsm->lsm_oinfo[i];
+ struct obd_device *child =
+ lov->lov_tgts[loi->loi_ost_idx]->ltd_exp->exp_obd;
+ struct llog_ctxt *cctxt = llog_get_context(child, ctxt->loc_idx);
+
+ /* fill mds unlink/setattr log record */
+ switch (rec->lrh_type) {
+ case MDS_UNLINK_REC: {
+ struct llog_unlink_rec *lur = (struct llog_unlink_rec *)rec;
+ lur->lur_oid = ostid_id(&loi->loi_oi);
+ lur->lur_oseq = (__u32)ostid_seq(&loi->loi_oi);
+ break;
+ }
+ case MDS_SETATTR64_REC: {
+ struct llog_setattr64_rec *lsr = (struct llog_setattr64_rec *)rec;
+ lsr->lsr_oi = loi->loi_oi;
+ break;
+ }
+ default:
+ break;
+ }
+
+ /* inject error in llog_obd_add() below */
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_FAIL_LOV_LOG_ADD)) {
+ llog_ctxt_put(cctxt);
+ cctxt = NULL;
+ }
+ rc = llog_obd_add(env, cctxt, rec, NULL, logcookies + cookies,
+ numcookies - cookies);
+ llog_ctxt_put(cctxt);
+ if (rc < 0) {
+ CERROR("Can't add llog (rc = %d) for stripe %d\n",
+ rc, cookies);
+ memset(logcookies + cookies, 0,
+ sizeof(struct llog_cookie));
+ rc = 1; /* skip this cookie */
+ }
+ /* Note that rc is always 1 if llog_obd_add was successful */
+ cookies += rc;
+ }
+ RETURN(cookies);
+}
+
+static int lov_llog_origin_connect(struct llog_ctxt *ctxt,
+ struct llog_logid *logid,
+ struct llog_gen *gen,
+ struct obd_uuid *uuid)
+{
+ struct obd_device *obd = ctxt->loc_obd;
+ struct lov_obd *lov = &obd->u.lov;
+ int i, rc = 0, err = 0;
+ ENTRY;
+
+ obd_getref(obd);
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ struct obd_device *child;
+ struct llog_ctxt *cctxt;
+
+ if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
+ continue;
+ if (uuid && !obd_uuid_equals(uuid, &lov->lov_tgts[i]->ltd_uuid))
+ continue;
+ CDEBUG(D_CONFIG, "connect %d/%d\n", i, lov->desc.ld_tgt_count);
+ child = lov->lov_tgts[i]->ltd_exp->exp_obd;
+ cctxt = llog_get_context(child, ctxt->loc_idx);
+ rc = llog_connect(cctxt, logid, gen, uuid);
+ llog_ctxt_put(cctxt);
+
+ if (rc) {
+ CERROR("error osc_llog_connect tgt %d (%d)\n", i, rc);
+ if (!err)
+ err = rc;
+ }
+ }
+ obd_putref(obd);
+
+ RETURN(err);
+}
+
+/* the replicators commit callback */
+static int lov_llog_repl_cancel(const struct lu_env *env,
+ struct llog_ctxt *ctxt,
+ struct lov_stripe_md *lsm,
+ int count, struct llog_cookie *cookies,
+ int flags)
+{
+ struct lov_obd *lov;
+ struct obd_device *obd = ctxt->loc_obd;
+ int rc = 0, i;
+ ENTRY;
+
+ LASSERT(lsm != NULL);
+ LASSERT(count == lsm->lsm_stripe_count);
+
+ lov = &obd->u.lov;
+ obd_getref(obd);
+ for (i = 0; i < count; i++, cookies++) {
+ struct lov_oinfo *loi = lsm->lsm_oinfo[i];
+ struct obd_device *child =
+ lov->lov_tgts[loi->loi_ost_idx]->ltd_exp->exp_obd;
+ struct llog_ctxt *cctxt =
+ llog_get_context(child, ctxt->loc_idx);
+ int err;
+
+ err = llog_cancel(env, cctxt, NULL, 1, cookies, flags);
+ llog_ctxt_put(cctxt);
+ if (err && lov->lov_tgts[loi->loi_ost_idx]->ltd_active) {
+ CERROR("%s: objid "DOSTID" subobj "DOSTID
+ " on OST idx %d: rc = %d\n",
+ obd->obd_name, POSTID(&lsm->lsm_oi),
+ POSTID(&loi->loi_oi), loi->loi_ost_idx, err);
+ if (!rc)
+ rc = err;
+ }
+ }
+ obd_putref(obd);
+ RETURN(rc);
+}
+
+static struct llog_operations lov_mds_ost_orig_logops = {
+ .lop_obd_add = lov_llog_origin_add,
+ .lop_connect = lov_llog_origin_connect,
+};
+
+static struct llog_operations lov_size_repl_logops = {
+ .lop_cancel = lov_llog_repl_cancel,
+};
+
+int lov_llog_init(struct obd_device *obd, struct obd_llog_group *olg,
+ struct obd_device *disk_obd, int *index)
+{
+ struct lov_obd *lov = &obd->u.lov;
+ struct obd_device *child;
+ int i, rc = 0;
+ ENTRY;
+
+ LASSERT(olg == &obd->obd_olg);
+ rc = llog_setup(NULL, obd, olg, LLOG_MDS_OST_ORIG_CTXT, disk_obd,
+ &lov_mds_ost_orig_logops);
+ if (rc)
+ RETURN(rc);
+
+ rc = llog_setup(NULL, obd, olg, LLOG_SIZE_REPL_CTXT, disk_obd,
+ &lov_size_repl_logops);
+ if (rc)
+ GOTO(err_cleanup, rc);
+
+ obd_getref(obd);
+ /* count may not match lov->desc.ld_tgt_count during dynamic ost add */
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ if (!lov->lov_tgts[i])
+ continue;
+
+ if (index && i != *index)
+ continue;
+
+ child = lov->lov_tgts[i]->ltd_obd;
+ rc = obd_llog_init(child, &child->obd_olg, disk_obd, &i);
+ if (rc)
+ CERROR("error osc_llog_init idx %d osc '%s' tgt '%s' "
+ "(rc=%d)\n", i, child->obd_name,
+ disk_obd->obd_name, rc);
+ rc = 0;
+ }
+ obd_putref(obd);
+ GOTO(err_cleanup, rc);
+err_cleanup:
+ if (rc) {
+ struct llog_ctxt *ctxt =
+ llog_get_context(obd, LLOG_SIZE_REPL_CTXT);
+ if (ctxt)
+ llog_cleanup(NULL, ctxt);
+ ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT);
+ if (ctxt)
+ llog_cleanup(NULL, ctxt);
+ }
+ return rc;
+}
+
+int lov_llog_finish(struct obd_device *obd, int count)
+{
+ struct llog_ctxt *ctxt;
+
+ ENTRY;
+
+ /* cleanup our llogs only if the ctxts have been setup
+ * (client lov doesn't setup, mds lov does). */
+ ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT);
+ if (ctxt)
+ llog_cleanup(NULL, ctxt);
+
+ ctxt = llog_get_context(obd, LLOG_SIZE_REPL_CTXT);
+ if (ctxt)
+ llog_cleanup(NULL, ctxt);
+
+ /* lov->tgt llogs are cleaned during osc_cleanup. */
+ RETURN(0);
+}
diff --git a/drivers/staging/lustre/lustre/lov/lov_merge.c b/drivers/staging/lustre/lustre/lov/lov_merge.c
new file mode 100644
index 000000000000..ddbac1220263
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_merge.c
@@ -0,0 +1,218 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include <linux/libcfs/libcfs.h>
+
+#include <obd_class.h>
+#include <obd_lov.h>
+
+#include "lov_internal.h"
+
+/** Merge the lock value block(&lvb) attributes and KMS from each of the
+ * stripes in a file into a single lvb. It is expected that the caller
+ * initializes the current atime, mtime, ctime to avoid regressing a more
+ * uptodate time on the local client.
+ */
+int lov_merge_lvb_kms(struct lov_stripe_md *lsm,
+ struct ost_lvb *lvb, __u64 *kms_place)
+{
+ __u64 size = 0;
+ __u64 kms = 0;
+ __u64 blocks = 0;
+ obd_time current_mtime = lvb->lvb_mtime;
+ obd_time current_atime = lvb->lvb_atime;
+ obd_time current_ctime = lvb->lvb_ctime;
+ int i;
+ int rc = 0;
+
+ LASSERT(spin_is_locked(&lsm->lsm_lock));
+ LASSERT(lsm->lsm_lock_owner == current_pid());
+
+ CDEBUG(D_INODE, "MDT ID "DOSTID" initial value: s="LPU64" m="LPU64
+ " a="LPU64" c="LPU64" b="LPU64"\n", POSTID(&lsm->lsm_oi),
+ lvb->lvb_size, lvb->lvb_mtime, lvb->lvb_atime, lvb->lvb_ctime,
+ lvb->lvb_blocks);
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ struct lov_oinfo *loi = lsm->lsm_oinfo[i];
+ obd_size lov_size, tmpsize;
+
+ if (OST_LVB_IS_ERR(loi->loi_lvb.lvb_blocks)) {
+ rc = OST_LVB_GET_ERR(loi->loi_lvb.lvb_blocks);
+ continue;
+ }
+
+ tmpsize = loi->loi_kms;
+ lov_size = lov_stripe_size(lsm, tmpsize, i);
+ if (lov_size > kms)
+ kms = lov_size;
+
+ if (loi->loi_lvb.lvb_size > tmpsize)
+ tmpsize = loi->loi_lvb.lvb_size;
+
+ lov_size = lov_stripe_size(lsm, tmpsize, i);
+ if (lov_size > size)
+ size = lov_size;
+ /* merge blocks, mtime, atime */
+ blocks += loi->loi_lvb.lvb_blocks;
+ if (loi->loi_lvb.lvb_mtime > current_mtime)
+ current_mtime = loi->loi_lvb.lvb_mtime;
+ if (loi->loi_lvb.lvb_atime > current_atime)
+ current_atime = loi->loi_lvb.lvb_atime;
+ if (loi->loi_lvb.lvb_ctime > current_ctime)
+ current_ctime = loi->loi_lvb.lvb_ctime;
+
+ CDEBUG(D_INODE, "MDT ID "DOSTID" on OST[%u]: s="LPU64" m="LPU64
+ " a="LPU64" c="LPU64" b="LPU64"\n", POSTID(&lsm->lsm_oi),
+ loi->loi_ost_idx, loi->loi_lvb.lvb_size,
+ loi->loi_lvb.lvb_mtime, loi->loi_lvb.lvb_atime,
+ loi->loi_lvb.lvb_ctime, loi->loi_lvb.lvb_blocks);
+ }
+
+ *kms_place = kms;
+ lvb->lvb_size = size;
+ lvb->lvb_blocks = blocks;
+ lvb->lvb_mtime = current_mtime;
+ lvb->lvb_atime = current_atime;
+ lvb->lvb_ctime = current_ctime;
+ RETURN(rc);
+}
+
+/** Merge the lock value block(&lvb) attributes from each of the stripes in a
+ * file into a single lvb. It is expected that the caller initializes the
+ * current atime, mtime, ctime to avoid regressing a more uptodate time on
+ * the local client.
+ *
+ * If \a kms_only is set then we do not consider the recently seen size (rss)
+ * when updating the known minimum size (kms). Even when merging RSS, we will
+ * take the KMS value if it's larger. This prevents getattr from stomping on
+ * dirty cached pages which extend the file size. */
+int lov_merge_lvb(struct obd_export *exp,
+ struct lov_stripe_md *lsm, struct ost_lvb *lvb, int kms_only)
+{
+ int rc;
+ __u64 kms;
+
+ ENTRY;
+ lov_stripe_lock(lsm);
+ rc = lov_merge_lvb_kms(lsm, lvb, &kms);
+ lov_stripe_unlock(lsm);
+ if (kms_only)
+ lvb->lvb_size = kms;
+
+ CDEBUG(D_INODE, "merged for ID "DOSTID" s="LPU64" m="LPU64" a="LPU64
+ " c="LPU64" b="LPU64"\n", POSTID(&lsm->lsm_oi), lvb->lvb_size,
+ lvb->lvb_mtime, lvb->lvb_atime, lvb->lvb_ctime, lvb->lvb_blocks);
+ RETURN(rc);
+}
+
+/* Must be called under the lov_stripe_lock() */
+int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
+ obd_off size, int shrink)
+{
+ struct lov_oinfo *loi;
+ int stripe = 0;
+ __u64 kms;
+ ENTRY;
+
+ LASSERT(spin_is_locked(&lsm->lsm_lock));
+ LASSERT(lsm->lsm_lock_owner == current_pid());
+
+ if (shrink) {
+ for (; stripe < lsm->lsm_stripe_count; stripe++) {
+ struct lov_oinfo *loi = lsm->lsm_oinfo[stripe];
+ kms = lov_size_to_stripe(lsm, size, stripe);
+ CDEBUG(D_INODE,
+ "stripe %d KMS %sing "LPU64"->"LPU64"\n",
+ stripe, kms > loi->loi_kms ? "increas":"shrink",
+ loi->loi_kms, kms);
+ loi_kms_set(loi, loi->loi_lvb.lvb_size = kms);
+ }
+ RETURN(0);
+ }
+
+ if (size > 0)
+ stripe = lov_stripe_number(lsm, size - 1);
+ kms = lov_size_to_stripe(lsm, size, stripe);
+ loi = lsm->lsm_oinfo[stripe];
+
+ CDEBUG(D_INODE, "stripe %d KMS %sincreasing "LPU64"->"LPU64"\n",
+ stripe, kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms);
+ if (kms > loi->loi_kms)
+ loi_kms_set(loi, kms);
+
+ RETURN(0);
+}
+
+void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_valid valid,
+ struct lov_stripe_md *lsm, int stripeno, int *set)
+{
+ valid &= src->o_valid;
+
+ if (*set) {
+ if (valid & OBD_MD_FLSIZE) {
+ /* this handles sparse files properly */
+ obd_size lov_size;
+
+ lov_size = lov_stripe_size(lsm, src->o_size, stripeno);
+ if (lov_size > tgt->o_size)
+ tgt->o_size = lov_size;
+ }
+ if (valid & OBD_MD_FLBLOCKS)
+ tgt->o_blocks += src->o_blocks;
+ if (valid & OBD_MD_FLBLKSZ)
+ tgt->o_blksize += src->o_blksize;
+ if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime)
+ tgt->o_ctime = src->o_ctime;
+ if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
+ tgt->o_mtime = src->o_mtime;
+ if (valid & OBD_MD_FLDATAVERSION)
+ tgt->o_data_version += src->o_data_version;
+ } else {
+ memcpy(tgt, src, sizeof(*tgt));
+ tgt->o_oi = lsm->lsm_oi;
+ if (valid & OBD_MD_FLSIZE)
+ tgt->o_size = lov_stripe_size(lsm, src->o_size,
+ stripeno);
+ }
+
+ /* data_version needs to be valid on all stripes to be correct! */
+ if (!(valid & OBD_MD_FLDATAVERSION))
+ tgt->o_valid &= ~OBD_MD_FLDATAVERSION;
+
+ *set += 1;
+}
diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c
new file mode 100644
index 000000000000..ef7ff091f048
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_obd.c
@@ -0,0 +1,2916 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lov/lov_obd.c
+ *
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Mike Shaver <shaver@clusterfs.com>
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+#include <linux/libcfs/libcfs.h>
+
+#include <obd_support.h>
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_dlm.h>
+#include <lustre_mds.h>
+#include <lustre_debug.h>
+#include <obd_class.h>
+#include <obd_lov.h>
+#include <obd_ost.h>
+#include <lprocfs_status.h>
+#include <lustre_param.h>
+#include <cl_object.h>
+#include <lclient.h> /* for cl_client_lru */
+#include <lustre/ll_fiemap.h>
+#include <lustre_log.h>
+#include <lustre_fid.h>
+
+#include "lov_internal.h"
+
+/* Keep a refcount of lov->tgt usage to prevent racing with addition/deletion.
+ Any function that expects lov_tgts to remain stationary must take a ref. */
+static void lov_getref(struct obd_device *obd)
+{
+ struct lov_obd *lov = &obd->u.lov;
+
+ /* nobody gets through here until lov_putref is done */
+ mutex_lock(&lov->lov_lock);
+ atomic_inc(&lov->lov_refcount);
+ mutex_unlock(&lov->lov_lock);
+ return;
+}
+
+static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt);
+
+static void lov_putref(struct obd_device *obd)
+{
+ struct lov_obd *lov = &obd->u.lov;
+
+ mutex_lock(&lov->lov_lock);
+ /* ok to dec to 0 more than once -- ltd_exp's will be null */
+ if (atomic_dec_and_test(&lov->lov_refcount) && lov->lov_death_row) {
+ LIST_HEAD(kill);
+ int i;
+ struct lov_tgt_desc *tgt, *n;
+ CDEBUG(D_CONFIG, "destroying %d lov targets\n",
+ lov->lov_death_row);
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ tgt = lov->lov_tgts[i];
+
+ if (!tgt || !tgt->ltd_reap)
+ continue;
+ list_add(&tgt->ltd_kill, &kill);
+ /* XXX - right now there is a dependency on ld_tgt_count
+ * being the maximum tgt index for computing the
+ * mds_max_easize. So we can't shrink it. */
+ lov_ost_pool_remove(&lov->lov_packed, i);
+ lov->lov_tgts[i] = NULL;
+ lov->lov_death_row--;
+ }
+ mutex_unlock(&lov->lov_lock);
+
+ list_for_each_entry_safe(tgt, n, &kill, ltd_kill) {
+ list_del(&tgt->ltd_kill);
+ /* Disconnect */
+ __lov_del_obd(obd, tgt);
+ }
+ } else {
+ mutex_unlock(&lov->lov_lock);
+ }
+}
+
+static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid,
+ enum obd_notify_event ev);
+static int lov_notify(struct obd_device *obd, struct obd_device *watched,
+ enum obd_notify_event ev, void *data);
+
+
+#define MAX_STRING_SIZE 128
+int lov_connect_obd(struct obd_device *obd, __u32 index, int activate,
+ struct obd_connect_data *data)
+{
+ struct lov_obd *lov = &obd->u.lov;
+ struct obd_uuid *tgt_uuid;
+ struct obd_device *tgt_obd;
+ static struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" };
+ struct obd_import *imp;
+ proc_dir_entry_t *lov_proc_dir;
+ int rc;
+ ENTRY;
+
+ if (!lov->lov_tgts[index])
+ RETURN(-EINVAL);
+
+ tgt_uuid = &lov->lov_tgts[index]->ltd_uuid;
+ tgt_obd = lov->lov_tgts[index]->ltd_obd;
+
+ if (!tgt_obd->obd_set_up) {
+ CERROR("Target %s not set up\n", obd_uuid2str(tgt_uuid));
+ RETURN(-EINVAL);
+ }
+
+ /* override the sp_me from lov */
+ tgt_obd->u.cli.cl_sp_me = lov->lov_sp_me;
+
+ if (data && (data->ocd_connect_flags & OBD_CONNECT_INDEX))
+ data->ocd_index = index;
+
+ /*
+ * Divine LOV knows that OBDs under it are OSCs.
+ */
+ imp = tgt_obd->u.cli.cl_import;
+
+ if (activate) {
+ tgt_obd->obd_no_recov = 0;
+ /* FIXME this is probably supposed to be
+ ptlrpc_set_import_active. Horrible naming. */
+ ptlrpc_activate_import(imp);
+ }
+
+ rc = obd_register_observer(tgt_obd, obd);
+ if (rc) {
+ CERROR("Target %s register_observer error %d\n",
+ obd_uuid2str(tgt_uuid), rc);
+ RETURN(rc);
+ }
+
+
+ if (imp->imp_invalid) {
+ CDEBUG(D_CONFIG, "not connecting OSC %s; administratively "
+ "disabled\n", obd_uuid2str(tgt_uuid));
+ RETURN(0);
+ }
+
+ rc = obd_connect(NULL, &lov->lov_tgts[index]->ltd_exp, tgt_obd,
+ &lov_osc_uuid, data, NULL);
+ if (rc || !lov->lov_tgts[index]->ltd_exp) {
+ CERROR("Target %s connect error %d\n",
+ obd_uuid2str(tgt_uuid), rc);
+ RETURN(-ENODEV);
+ }
+
+ lov->lov_tgts[index]->ltd_reap = 0;
+
+ CDEBUG(D_CONFIG, "Connected tgt idx %d %s (%s) %sactive\n", index,
+ obd_uuid2str(tgt_uuid), tgt_obd->obd_name, activate ? "":"in");
+
+ lov_proc_dir = obd->obd_proc_private;
+ if (lov_proc_dir) {
+ struct obd_device *osc_obd = lov->lov_tgts[index]->ltd_exp->exp_obd;
+ proc_dir_entry_t *osc_symlink;
+
+ LASSERT(osc_obd != NULL);
+ LASSERT(osc_obd->obd_magic == OBD_DEVICE_MAGIC);
+ LASSERT(osc_obd->obd_type->typ_name != NULL);
+
+ osc_symlink = lprocfs_add_symlink(osc_obd->obd_name,
+ lov_proc_dir,
+ "../../../%s/%s",
+ osc_obd->obd_type->typ_name,
+ osc_obd->obd_name);
+ if (osc_symlink == NULL) {
+ CERROR("could not register LOV target "
+ "/proc/fs/lustre/%s/%s/target_obds/%s.",
+ obd->obd_type->typ_name, obd->obd_name,
+ osc_obd->obd_name);
+ lprocfs_remove(&lov_proc_dir);
+ obd->obd_proc_private = NULL;
+ }
+ }
+
+ RETURN(0);
+}
+
+static int lov_connect(const struct lu_env *env,
+ struct obd_export **exp, struct obd_device *obd,
+ struct obd_uuid *cluuid, struct obd_connect_data *data,
+ void *localdata)
+{
+ struct lov_obd *lov = &obd->u.lov;
+ struct lov_tgt_desc *tgt;
+ struct lustre_handle conn;
+ int i, rc;
+ ENTRY;
+
+ CDEBUG(D_CONFIG, "connect #%d\n", lov->lov_connects);
+
+ rc = class_connect(&conn, obd, cluuid);
+ if (rc)
+ RETURN(rc);
+
+ *exp = class_conn2export(&conn);
+
+ /* Why should there ever be more than 1 connect? */
+ lov->lov_connects++;
+ LASSERT(lov->lov_connects == 1);
+
+ memset(&lov->lov_ocd, 0, sizeof(lov->lov_ocd));
+ if (data)
+ lov->lov_ocd = *data;
+
+ obd_getref(obd);
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ tgt = lov->lov_tgts[i];
+ if (!tgt || obd_uuid_empty(&tgt->ltd_uuid))
+ continue;
+ /* Flags will be lowest common denominator */
+ rc = lov_connect_obd(obd, i, tgt->ltd_activate, &lov->lov_ocd);
+ if (rc) {
+ CERROR("%s: lov connect tgt %d failed: %d\n",
+ obd->obd_name, i, rc);
+ continue;
+ }
+ /* connect to administrative disabled ost */
+ if (!lov->lov_tgts[i]->ltd_exp)
+ continue;
+
+ rc = lov_notify(obd, lov->lov_tgts[i]->ltd_exp->exp_obd,
+ OBD_NOTIFY_CONNECT, (void *)&i);
+ if (rc) {
+ CERROR("%s error sending notify %d\n",
+ obd->obd_name, rc);
+ }
+ }
+ obd_putref(obd);
+
+ RETURN(0);
+}
+
+static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt)
+{
+ proc_dir_entry_t *lov_proc_dir;
+ struct lov_obd *lov = &obd->u.lov;
+ struct obd_device *osc_obd;
+ int rc;
+ ENTRY;
+
+ osc_obd = class_exp2obd(tgt->ltd_exp);
+ CDEBUG(D_CONFIG, "%s: disconnecting target %s\n",
+ obd->obd_name, osc_obd->obd_name);
+
+ if (tgt->ltd_active) {
+ tgt->ltd_active = 0;
+ lov->desc.ld_active_tgt_count--;
+ tgt->ltd_exp->exp_obd->obd_inactive = 1;
+ }
+
+ lov_proc_dir = obd->obd_proc_private;
+ if (lov_proc_dir)
+ lprocfs_remove_proc_entry(osc_obd->obd_name, lov_proc_dir);
+
+ if (osc_obd) {
+ /* Pass it on to our clients.
+ * XXX This should be an argument to disconnect,
+ * XXX not a back-door flag on the OBD. Ah well.
+ */
+ osc_obd->obd_force = obd->obd_force;
+ osc_obd->obd_fail = obd->obd_fail;
+ osc_obd->obd_no_recov = obd->obd_no_recov;
+ }
+
+ obd_register_observer(osc_obd, NULL);
+
+ rc = obd_disconnect(tgt->ltd_exp);
+ if (rc) {
+ CERROR("Target %s disconnect error %d\n",
+ tgt->ltd_uuid.uuid, rc);
+ rc = 0;
+ }
+
+ tgt->ltd_exp = NULL;
+ RETURN(0);
+}
+
+static int lov_disconnect(struct obd_export *exp)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct lov_obd *lov = &obd->u.lov;
+ int i, rc;
+ ENTRY;
+
+ if (!lov->lov_tgts)
+ goto out;
+
+ /* Only disconnect the underlying layers on the final disconnect. */
+ lov->lov_connects--;
+ if (lov->lov_connects != 0) {
+ /* why should there be more than 1 connect? */
+ CERROR("disconnect #%d\n", lov->lov_connects);
+ goto out;
+ }
+
+ /* Let's hold another reference so lov_del_obd doesn't spin through
+ putref every time */
+ obd_getref(obd);
+
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ if (lov->lov_tgts[i] && lov->lov_tgts[i]->ltd_exp) {
+ /* Disconnection is the last we know about an obd */
+ lov_del_target(obd, i, 0, lov->lov_tgts[i]->ltd_gen);
+ }
+ }
+ obd_putref(obd);
+
+out:
+ rc = class_disconnect(exp); /* bz 9811 */
+ RETURN(rc);
+}
+
+/* Error codes:
+ *
+ * -EINVAL : UUID can't be found in the LOV's target list
+ * -ENOTCONN: The UUID is found, but the target connection is bad (!)
+ * -EBADF : The UUID is found, but the OBD is the wrong type (!)
+ * any >= 0 : is log target index
+ */
+static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid,
+ enum obd_notify_event ev)
+{
+ struct lov_obd *lov = &obd->u.lov;
+ struct lov_tgt_desc *tgt;
+ int index, activate, active;
+ ENTRY;
+
+ CDEBUG(D_INFO, "Searching in lov %p for uuid %s event(%d)\n",
+ lov, uuid->uuid, ev);
+
+ obd_getref(obd);
+ for (index = 0; index < lov->desc.ld_tgt_count; index++) {
+ tgt = lov->lov_tgts[index];
+ if (!tgt)
+ continue;
+ /*
+ * LU-642, initially inactive OSC could miss the obd_connect,
+ * we make up for it here.
+ */
+ if (ev == OBD_NOTIFY_ACTIVATE && tgt->ltd_exp == NULL &&
+ obd_uuid_equals(uuid, &tgt->ltd_uuid)) {
+ struct obd_uuid lov_osc_uuid = {"LOV_OSC_UUID"};
+
+ obd_connect(NULL, &tgt->ltd_exp, tgt->ltd_obd,
+ &lov_osc_uuid, &lov->lov_ocd, NULL);
+ }
+ if (!tgt->ltd_exp)
+ continue;
+
+ CDEBUG(D_INFO, "lov idx %d is %s conn "LPX64"\n",
+ index, obd_uuid2str(&tgt->ltd_uuid),
+ tgt->ltd_exp->exp_handle.h_cookie);
+ if (obd_uuid_equals(uuid, &tgt->ltd_uuid))
+ break;
+ }
+
+ if (index == lov->desc.ld_tgt_count)
+ GOTO(out, index = -EINVAL);
+
+ if (ev == OBD_NOTIFY_DEACTIVATE || ev == OBD_NOTIFY_ACTIVATE) {
+ activate = (ev == OBD_NOTIFY_ACTIVATE) ? 1 : 0;
+
+ if (lov->lov_tgts[index]->ltd_activate == activate) {
+ CDEBUG(D_INFO, "OSC %s already %sactivate!\n",
+ uuid->uuid, activate ? "" : "de");
+ } else {
+ lov->lov_tgts[index]->ltd_activate = activate;
+ CDEBUG(D_CONFIG, "%sactivate OSC %s\n",
+ activate ? "" : "de", obd_uuid2str(uuid));
+ }
+
+ } else if (ev == OBD_NOTIFY_INACTIVE || ev == OBD_NOTIFY_ACTIVE) {
+ active = (ev == OBD_NOTIFY_ACTIVE) ? 1 : 0;
+
+ if (lov->lov_tgts[index]->ltd_active == active) {
+ CDEBUG(D_INFO, "OSC %s already %sactive!\n",
+ uuid->uuid, active ? "" : "in");
+ GOTO(out, index);
+ } else {
+ CDEBUG(D_CONFIG, "Marking OSC %s %sactive\n",
+ obd_uuid2str(uuid), active ? "" : "in");
+ }
+
+ lov->lov_tgts[index]->ltd_active = active;
+ if (active) {
+ lov->desc.ld_active_tgt_count++;
+ lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 0;
+ } else {
+ lov->desc.ld_active_tgt_count--;
+ lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 1;
+ }
+ } else {
+ CERROR("Unknown event(%d) for uuid %s", ev, uuid->uuid);
+ }
+
+ out:
+ obd_putref(obd);
+ RETURN(index);
+}
+
+static int lov_notify(struct obd_device *obd, struct obd_device *watched,
+ enum obd_notify_event ev, void *data)
+{
+ int rc = 0;
+ struct lov_obd *lov = &obd->u.lov;
+ ENTRY;
+
+ down_read(&lov->lov_notify_lock);
+ if (!lov->lov_connects) {
+ up_read(&lov->lov_notify_lock);
+ RETURN(rc);
+ }
+
+ if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE ||
+ ev == OBD_NOTIFY_ACTIVATE || ev == OBD_NOTIFY_DEACTIVATE) {
+ struct obd_uuid *uuid;
+
+ LASSERT(watched);
+
+ if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+ up_read(&lov->lov_notify_lock);
+ CERROR("unexpected notification of %s %s!\n",
+ watched->obd_type->typ_name,
+ watched->obd_name);
+ RETURN(-EINVAL);
+ }
+ uuid = &watched->u.cli.cl_target_uuid;
+
+ /* Set OSC as active before notifying the observer, so the
+ * observer can use the OSC normally.
+ */
+ rc = lov_set_osc_active(obd, uuid, ev);
+ if (rc < 0) {
+ up_read(&lov->lov_notify_lock);
+ CERROR("event(%d) of %s failed: %d\n", ev,
+ obd_uuid2str(uuid), rc);
+ RETURN(rc);
+ }
+ /* active event should be pass lov target index as data */
+ data = &rc;
+ }
+
+ /* Pass the notification up the chain. */
+ if (watched) {
+ rc = obd_notify_observer(obd, watched, ev, data);
+ } else {
+ /* NULL watched means all osc's in the lov (only for syncs) */
+ /* sync event should be send lov idx as data */
+ struct lov_obd *lov = &obd->u.lov;
+ int i, is_sync;
+
+ data = &i;
+ is_sync = (ev == OBD_NOTIFY_SYNC) ||
+ (ev == OBD_NOTIFY_SYNC_NONBLOCK);
+
+ obd_getref(obd);
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ if (!lov->lov_tgts[i])
+ continue;
+
+ /* don't send sync event if target not
+ * connected/activated */
+ if (is_sync && !lov->lov_tgts[i]->ltd_active)
+ continue;
+
+ rc = obd_notify_observer(obd, lov->lov_tgts[i]->ltd_obd,
+ ev, data);
+ if (rc) {
+ CERROR("%s: notify %s of %s failed %d\n",
+ obd->obd_name,
+ obd->obd_observer->obd_name,
+ lov->lov_tgts[i]->ltd_obd->obd_name,
+ rc);
+ }
+ }
+ obd_putref(obd);
+ }
+
+ up_read(&lov->lov_notify_lock);
+ RETURN(rc);
+}
+
+static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
+ __u32 index, int gen, int active)
+{
+ struct lov_obd *lov = &obd->u.lov;
+ struct lov_tgt_desc *tgt;
+ struct obd_device *tgt_obd;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_CONFIG, "uuid:%s idx:%d gen:%d active:%d\n",
+ uuidp->uuid, index, gen, active);
+
+ if (gen <= 0) {
+ CERROR("request to add OBD %s with invalid generation: %d\n",
+ uuidp->uuid, gen);
+ RETURN(-EINVAL);
+ }
+
+ tgt_obd = class_find_client_obd(uuidp, LUSTRE_OSC_NAME,
+ &obd->obd_uuid);
+ if (tgt_obd == NULL)
+ RETURN(-EINVAL);
+
+ mutex_lock(&lov->lov_lock);
+
+ if ((index < lov->lov_tgt_size) && (lov->lov_tgts[index] != NULL)) {
+ tgt = lov->lov_tgts[index];
+ CERROR("UUID %s already assigned at LOV target index %d\n",
+ obd_uuid2str(&tgt->ltd_uuid), index);
+ mutex_unlock(&lov->lov_lock);
+ RETURN(-EEXIST);
+ }
+
+ if (index >= lov->lov_tgt_size) {
+ /* We need to reallocate the lov target array. */
+ struct lov_tgt_desc **newtgts, **old = NULL;
+ __u32 newsize, oldsize = 0;
+
+ newsize = max(lov->lov_tgt_size, (__u32)2);
+ while (newsize < index + 1)
+ newsize = newsize << 1;
+ OBD_ALLOC(newtgts, sizeof(*newtgts) * newsize);
+ if (newtgts == NULL) {
+ mutex_unlock(&lov->lov_lock);
+ RETURN(-ENOMEM);
+ }
+
+ if (lov->lov_tgt_size) {
+ memcpy(newtgts, lov->lov_tgts, sizeof(*newtgts) *
+ lov->lov_tgt_size);
+ old = lov->lov_tgts;
+ oldsize = lov->lov_tgt_size;
+ }
+
+ lov->lov_tgts = newtgts;
+ lov->lov_tgt_size = newsize;
+ smp_rmb();
+ if (old)
+ OBD_FREE(old, sizeof(*old) * oldsize);
+
+ CDEBUG(D_CONFIG, "tgts: %p size: %d\n",
+ lov->lov_tgts, lov->lov_tgt_size);
+ }
+
+ OBD_ALLOC_PTR(tgt);
+ if (!tgt) {
+ mutex_unlock(&lov->lov_lock);
+ RETURN(-ENOMEM);
+ }
+
+ rc = lov_ost_pool_add(&lov->lov_packed, index, lov->lov_tgt_size);
+ if (rc) {
+ mutex_unlock(&lov->lov_lock);
+ OBD_FREE_PTR(tgt);
+ RETURN(rc);
+ }
+
+ tgt->ltd_uuid = *uuidp;
+ tgt->ltd_obd = tgt_obd;
+ /* XXX - add a sanity check on the generation number. */
+ tgt->ltd_gen = gen;
+ tgt->ltd_index = index;
+ tgt->ltd_activate = active;
+ lov->lov_tgts[index] = tgt;
+ if (index >= lov->desc.ld_tgt_count)
+ lov->desc.ld_tgt_count = index + 1;
+
+ mutex_unlock(&lov->lov_lock);
+
+ CDEBUG(D_CONFIG, "idx=%d ltd_gen=%d ld_tgt_count=%d\n",
+ index, tgt->ltd_gen, lov->desc.ld_tgt_count);
+
+ rc = obd_notify(obd, tgt_obd, OBD_NOTIFY_CREATE, &index);
+
+ if (lov->lov_connects == 0) {
+ /* lov_connect hasn't been called yet. We'll do the
+ lov_connect_obd on this target when that fn first runs,
+ because we don't know the connect flags yet. */
+ RETURN(0);
+ }
+
+ obd_getref(obd);
+
+ rc = lov_connect_obd(obd, index, active, &lov->lov_ocd);
+ if (rc)
+ GOTO(out, rc);
+
+ /* connect to administrative disabled ost */
+ if (!tgt->ltd_exp)
+ GOTO(out, rc = 0);
+
+ if (lov->lov_cache != NULL) {
+ rc = obd_set_info_async(NULL, tgt->ltd_exp,
+ sizeof(KEY_CACHE_SET), KEY_CACHE_SET,
+ sizeof(struct cl_client_cache), lov->lov_cache,
+ NULL);
+ if (rc < 0)
+ GOTO(out, rc);
+ }
+
+ rc = lov_notify(obd, tgt->ltd_exp->exp_obd,
+ active ? OBD_NOTIFY_CONNECT : OBD_NOTIFY_INACTIVE,
+ (void *)&index);
+
+out:
+ if (rc) {
+ CERROR("add failed (%d), deleting %s\n", rc,
+ obd_uuid2str(&tgt->ltd_uuid));
+ lov_del_target(obd, index, 0, 0);
+ }
+ obd_putref(obd);
+ RETURN(rc);
+}
+
+/* Schedule a target for deletion */
+int lov_del_target(struct obd_device *obd, __u32 index,
+ struct obd_uuid *uuidp, int gen)
+{
+ struct lov_obd *lov = &obd->u.lov;
+ int count = lov->desc.ld_tgt_count;
+ int rc = 0;
+ ENTRY;
+
+ if (index >= count) {
+ CERROR("LOV target index %d >= number of LOV OBDs %d.\n",
+ index, count);
+ RETURN(-EINVAL);
+ }
+
+ /* to make sure there's no ongoing lov_notify() now */
+ down_write(&lov->lov_notify_lock);
+ obd_getref(obd);
+
+ if (!lov->lov_tgts[index]) {
+ CERROR("LOV target at index %d is not setup.\n", index);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ if (uuidp && !obd_uuid_equals(uuidp, &lov->lov_tgts[index]->ltd_uuid)) {
+ CERROR("LOV target UUID %s at index %d doesn't match %s.\n",
+ lov_uuid2str(lov, index), index,
+ obd_uuid2str(uuidp));
+ GOTO(out, rc = -EINVAL);
+ }
+
+ CDEBUG(D_CONFIG, "uuid: %s idx: %d gen: %d exp: %p active: %d\n",
+ lov_uuid2str(lov, index), index,
+ lov->lov_tgts[index]->ltd_gen, lov->lov_tgts[index]->ltd_exp,
+ lov->lov_tgts[index]->ltd_active);
+
+ lov->lov_tgts[index]->ltd_reap = 1;
+ lov->lov_death_row++;
+ /* we really delete it from obd_putref */
+out:
+ obd_putref(obd);
+ up_write(&lov->lov_notify_lock);
+
+ RETURN(rc);
+}
+
+static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt)
+{
+ struct obd_device *osc_obd;
+
+ LASSERT(tgt);
+ LASSERT(tgt->ltd_reap);
+
+ osc_obd = class_exp2obd(tgt->ltd_exp);
+
+ CDEBUG(D_CONFIG, "Removing tgt %s : %s\n",
+ tgt->ltd_uuid.uuid,
+ osc_obd ? osc_obd->obd_name : "<no obd>");
+
+ if (tgt->ltd_exp)
+ lov_disconnect_obd(obd, tgt);
+
+ OBD_FREE_PTR(tgt);
+
+ /* Manual cleanup - no cleanup logs to clean up the osc's. We must
+ do it ourselves. And we can't do it from lov_cleanup,
+ because we just lost our only reference to it. */
+ if (osc_obd)
+ class_manual_cleanup(osc_obd);
+}
+
+void lov_fix_desc_stripe_size(__u64 *val)
+{
+ if (*val < LOV_MIN_STRIPE_SIZE) {
+ if (*val != 0)
+ LCONSOLE_INFO("Increasing default stripe size to "
+ "minimum %u\n",
+ LOV_DEFAULT_STRIPE_SIZE);
+ *val = LOV_DEFAULT_STRIPE_SIZE;
+ } else if (*val & (LOV_MIN_STRIPE_SIZE - 1)) {
+ *val &= ~(LOV_MIN_STRIPE_SIZE - 1);
+ LCONSOLE_WARN("Changing default stripe size to "LPU64" (a "
+ "multiple of %u)\n",
+ *val, LOV_MIN_STRIPE_SIZE);
+ }
+}
+
+void lov_fix_desc_stripe_count(__u32 *val)
+{
+ if (*val == 0)
+ *val = 1;
+}
+
+void lov_fix_desc_pattern(__u32 *val)
+{
+ /* from lov_setstripe */
+ if ((*val != 0) && (*val != LOV_PATTERN_RAID0)) {
+ LCONSOLE_WARN("Unknown stripe pattern: %#x\n", *val);
+ *val = 0;
+ }
+}
+
+void lov_fix_desc_qos_maxage(__u32 *val)
+{
+ /* fix qos_maxage */
+ if (*val == 0)
+ *val = QOS_DEFAULT_MAXAGE;
+}
+
+void lov_fix_desc(struct lov_desc *desc)
+{
+ lov_fix_desc_stripe_size(&desc->ld_default_stripe_size);
+ lov_fix_desc_stripe_count(&desc->ld_default_stripe_count);
+ lov_fix_desc_pattern(&desc->ld_pattern);
+ lov_fix_desc_qos_maxage(&desc->ld_qos_maxage);
+}
+
+int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ struct lprocfs_static_vars lvars = { 0 };
+ struct lov_desc *desc;
+ struct lov_obd *lov = &obd->u.lov;
+ int rc;
+ ENTRY;
+
+ if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
+ CERROR("LOV setup requires a descriptor\n");
+ RETURN(-EINVAL);
+ }
+
+ desc = (struct lov_desc *)lustre_cfg_buf(lcfg, 1);
+
+ if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) {
+ CERROR("descriptor size wrong: %d > %d\n",
+ (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1));
+ RETURN(-EINVAL);
+ }
+
+ if (desc->ld_magic != LOV_DESC_MAGIC) {
+ if (desc->ld_magic == __swab32(LOV_DESC_MAGIC)) {
+ CDEBUG(D_OTHER, "%s: Swabbing lov desc %p\n",
+ obd->obd_name, desc);
+ lustre_swab_lov_desc(desc);
+ } else {
+ CERROR("%s: Bad lov desc magic: %#x\n",
+ obd->obd_name, desc->ld_magic);
+ RETURN(-EINVAL);
+ }
+ }
+
+ lov_fix_desc(desc);
+
+ desc->ld_active_tgt_count = 0;
+ lov->desc = *desc;
+ lov->lov_tgt_size = 0;
+
+ mutex_init(&lov->lov_lock);
+ atomic_set(&lov->lov_refcount, 0);
+ lov->lov_sp_me = LUSTRE_SP_CLI;
+
+ init_rwsem(&lov->lov_notify_lock);
+
+ lov->lov_pools_hash_body = cfs_hash_create("POOLS", HASH_POOLS_CUR_BITS,
+ HASH_POOLS_MAX_BITS,
+ HASH_POOLS_BKT_BITS, 0,
+ CFS_HASH_MIN_THETA,
+ CFS_HASH_MAX_THETA,
+ &pool_hash_operations,
+ CFS_HASH_DEFAULT);
+ INIT_LIST_HEAD(&lov->lov_pool_list);
+ lov->lov_pool_count = 0;
+ rc = lov_ost_pool_init(&lov->lov_packed, 0);
+ if (rc)
+ GOTO(out, rc);
+
+ lprocfs_lov_init_vars(&lvars);
+ lprocfs_obd_setup(obd, lvars.obd_vars);
+#ifdef LPROCFS
+ {
+ int rc;
+
+ rc = lprocfs_seq_create(obd->obd_proc_entry, "target_obd",
+ 0444, &lov_proc_target_fops, obd);
+ if (rc)
+ CWARN("Error adding the target_obd file\n");
+ }
+#endif
+ lov->lov_pool_proc_entry = lprocfs_register("pools",
+ obd->obd_proc_entry,
+ NULL, NULL);
+
+ RETURN(0);
+
+out:
+ return rc;
+}
+
+static int lov_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+{
+ int rc = 0;
+ struct lov_obd *lov = &obd->u.lov;
+
+ ENTRY;
+
+ switch (stage) {
+ case OBD_CLEANUP_EARLY: {
+ int i;
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
+ continue;
+ obd_precleanup(class_exp2obd(lov->lov_tgts[i]->ltd_exp),
+ OBD_CLEANUP_EARLY);
+ }
+ break;
+ }
+ case OBD_CLEANUP_EXPORTS:
+ rc = obd_llog_finish(obd, 0);
+ if (rc != 0)
+ CERROR("failed to cleanup llogging subsystems\n");
+ break;
+ }
+ RETURN(rc);
+}
+
+static int lov_cleanup(struct obd_device *obd)
+{
+ struct lov_obd *lov = &obd->u.lov;
+ struct list_head *pos, *tmp;
+ struct pool_desc *pool;
+ ENTRY;
+
+ list_for_each_safe(pos, tmp, &lov->lov_pool_list) {
+ pool = list_entry(pos, struct pool_desc, pool_list);
+ /* free pool structs */
+ CDEBUG(D_INFO, "delete pool %p\n", pool);
+ /* In the function below, .hs_keycmp resolves to
+ * pool_hashkey_keycmp() */
+ /* coverity[overrun-buffer-val] */
+ lov_pool_del(obd, pool->pool_name);
+ }
+ cfs_hash_putref(lov->lov_pools_hash_body);
+ lov_ost_pool_free(&lov->lov_packed);
+
+ lprocfs_obd_cleanup(obd);
+ if (lov->lov_tgts) {
+ int i;
+ obd_getref(obd);
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ if (!lov->lov_tgts[i])
+ continue;
+
+ /* Inactive targets may never have connected */
+ if (lov->lov_tgts[i]->ltd_active ||
+ atomic_read(&lov->lov_refcount))
+ /* We should never get here - these
+ should have been removed in the
+ disconnect. */
+ CERROR("lov tgt %d not cleaned!"
+ " deathrow=%d, lovrc=%d\n",
+ i, lov->lov_death_row,
+ atomic_read(&lov->lov_refcount));
+ lov_del_target(obd, i, 0, 0);
+ }
+ obd_putref(obd);
+ OBD_FREE(lov->lov_tgts, sizeof(*lov->lov_tgts) *
+ lov->lov_tgt_size);
+ lov->lov_tgt_size = 0;
+ }
+ RETURN(0);
+}
+
+int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg,
+ __u32 *indexp, int *genp)
+{
+ struct obd_uuid obd_uuid;
+ int cmd;
+ int rc = 0;
+ ENTRY;
+
+ switch(cmd = lcfg->lcfg_command) {
+ case LCFG_LOV_ADD_OBD:
+ case LCFG_LOV_ADD_INA:
+ case LCFG_LOV_DEL_OBD: {
+ __u32 index;
+ int gen;
+ /* lov_modify_tgts add 0:lov_mdsA 1:ost1_UUID 2:0 3:1 */
+ if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(obd_uuid.uuid))
+ GOTO(out, rc = -EINVAL);
+
+ obd_str2uuid(&obd_uuid, lustre_cfg_buf(lcfg, 1));
+
+ if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", indexp) != 1)
+ GOTO(out, rc = -EINVAL);
+ if (sscanf(lustre_cfg_buf(lcfg, 3), "%d", genp) != 1)
+ GOTO(out, rc = -EINVAL);
+ index = *indexp;
+ gen = *genp;
+ if (cmd == LCFG_LOV_ADD_OBD)
+ rc = lov_add_target(obd, &obd_uuid, index, gen, 1);
+ else if (cmd == LCFG_LOV_ADD_INA)
+ rc = lov_add_target(obd, &obd_uuid, index, gen, 0);
+ else
+ rc = lov_del_target(obd, index, &obd_uuid, gen);
+ GOTO(out, rc);
+ }
+ case LCFG_PARAM: {
+ struct lprocfs_static_vars lvars = { 0 };
+ struct lov_desc *desc = &(obd->u.lov.desc);
+
+ if (!desc)
+ GOTO(out, rc = -EINVAL);
+
+ lprocfs_lov_init_vars(&lvars);
+
+ rc = class_process_proc_param(PARAM_LOV, lvars.obd_vars,
+ lcfg, obd);
+ if (rc > 0)
+ rc = 0;
+ GOTO(out, rc);
+ }
+ case LCFG_POOL_NEW:
+ case LCFG_POOL_ADD:
+ case LCFG_POOL_DEL:
+ case LCFG_POOL_REM:
+ GOTO(out, rc);
+
+ default: {
+ CERROR("Unknown command: %d\n", lcfg->lcfg_command);
+ GOTO(out, rc = -EINVAL);
+
+ }
+ }
+out:
+ RETURN(rc);
+}
+
+static int lov_recreate(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md **ea, struct obd_trans_info *oti)
+{
+ struct lov_stripe_md *obj_mdp, *lsm;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ unsigned ost_idx;
+ int rc, i;
+ ENTRY;
+
+ LASSERT(src_oa->o_valid & OBD_MD_FLFLAGS &&
+ src_oa->o_flags & OBD_FL_RECREATE_OBJS);
+
+ OBD_ALLOC(obj_mdp, sizeof(*obj_mdp));
+ if (obj_mdp == NULL)
+ RETURN(-ENOMEM);
+
+ ost_idx = src_oa->o_nlink;
+ lsm = *ea;
+ if (lsm == NULL)
+ GOTO(out, rc = -EINVAL);
+ if (ost_idx >= lov->desc.ld_tgt_count ||
+ !lov->lov_tgts[ost_idx])
+ GOTO(out, rc = -EINVAL);
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ if (lsm->lsm_oinfo[i]->loi_ost_idx == ost_idx) {
+ if (ostid_id(&lsm->lsm_oinfo[i]->loi_oi) !=
+ ostid_id(&src_oa->o_oi))
+ GOTO(out, rc = -EINVAL);
+ break;
+ }
+ }
+ if (i == lsm->lsm_stripe_count)
+ GOTO(out, rc = -EINVAL);
+
+ rc = obd_create(NULL, lov->lov_tgts[ost_idx]->ltd_exp,
+ src_oa, &obj_mdp, oti);
+out:
+ OBD_FREE(obj_mdp, sizeof(*obj_mdp));
+ RETURN(rc);
+}
+
+/* the LOV expects oa->o_id to be set to the LOV object id */
+static int lov_create(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *src_oa, struct lov_stripe_md **ea,
+ struct obd_trans_info *oti)
+{
+ struct lov_obd *lov;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(ea != NULL);
+ if (exp == NULL)
+ RETURN(-EINVAL);
+
+ if ((src_oa->o_valid & OBD_MD_FLFLAGS) &&
+ src_oa->o_flags == OBD_FL_DELORPHAN) {
+ /* should be used with LOV anymore */
+ LBUG();
+ }
+
+ lov = &exp->exp_obd->u.lov;
+ if (!lov->desc.ld_active_tgt_count)
+ RETURN(-EIO);
+
+ obd_getref(exp->exp_obd);
+ /* Recreate a specific object id at the given OST index */
+ if ((src_oa->o_valid & OBD_MD_FLFLAGS) &&
+ (src_oa->o_flags & OBD_FL_RECREATE_OBJS)) {
+ rc = lov_recreate(exp, src_oa, ea, oti);
+ }
+
+ obd_putref(exp->exp_obd);
+ RETURN(rc);
+}
+
+#define ASSERT_LSM_MAGIC(lsmp) \
+do { \
+ LASSERT((lsmp) != NULL); \
+ LASSERTF(((lsmp)->lsm_magic == LOV_MAGIC_V1 || \
+ (lsmp)->lsm_magic == LOV_MAGIC_V3), \
+ "%p->lsm_magic=%x\n", (lsmp), (lsmp)->lsm_magic); \
+} while (0)
+
+static int lov_destroy(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *oa, struct lov_stripe_md *lsm,
+ struct obd_trans_info *oti, struct obd_export *md_exp,
+ void *capa)
+{
+ struct lov_request_set *set;
+ struct obd_info oinfo;
+ struct lov_request *req;
+ struct list_head *pos;
+ struct lov_obd *lov;
+ int rc = 0, err = 0;
+ ENTRY;
+
+ ASSERT_LSM_MAGIC(lsm);
+
+ if (!exp || !exp->exp_obd)
+ RETURN(-ENODEV);
+
+ if (oa->o_valid & OBD_MD_FLCOOKIE) {
+ LASSERT(oti);
+ LASSERT(oti->oti_logcookies);
+ }
+
+ lov = &exp->exp_obd->u.lov;
+ obd_getref(exp->exp_obd);
+ rc = lov_prep_destroy_set(exp, &oinfo, oa, lsm, oti, &set);
+ if (rc)
+ GOTO(out, rc);
+
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ if (oa->o_valid & OBD_MD_FLCOOKIE)
+ oti->oti_logcookies = set->set_cookies + req->rq_stripe;
+
+ err = obd_destroy(env, lov->lov_tgts[req->rq_idx]->ltd_exp,
+ req->rq_oi.oi_oa, NULL, oti, NULL, capa);
+ err = lov_update_common_set(set, req, err);
+ if (err) {
+ CERROR("%s: destroying objid "DOSTID" subobj "
+ DOSTID" on OST idx %d: rc = %d\n",
+ exp->exp_obd->obd_name, POSTID(&oa->o_oi),
+ POSTID(&req->rq_oi.oi_oa->o_oi),
+ req->rq_idx, err);
+ if (!rc)
+ rc = err;
+ }
+ }
+
+ if (rc == 0) {
+ LASSERT(lsm_op_find(lsm->lsm_magic) != NULL);
+ rc = lsm_op_find(lsm->lsm_magic)->lsm_destroy(lsm, oa, md_exp);
+ }
+ err = lov_fini_destroy_set(set);
+out:
+ obd_putref(exp->exp_obd);
+ RETURN(rc ? rc : err);
+}
+
+static int lov_getattr(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo)
+{
+ struct lov_request_set *set;
+ struct lov_request *req;
+ struct list_head *pos;
+ struct lov_obd *lov;
+ int err = 0, rc = 0;
+ ENTRY;
+
+ LASSERT(oinfo);
+ ASSERT_LSM_MAGIC(oinfo->oi_md);
+
+ if (!exp || !exp->exp_obd)
+ RETURN(-ENODEV);
+
+ lov = &exp->exp_obd->u.lov;
+
+ rc = lov_prep_getattr_set(exp, oinfo, &set);
+ if (rc)
+ RETURN(rc);
+
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ CDEBUG(D_INFO, "objid "DOSTID"[%d] has subobj "DOSTID" at idx"
+ " %u\n", POSTID(&oinfo->oi_oa->o_oi), req->rq_stripe,
+ POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx);
+
+ rc = obd_getattr(env, lov->lov_tgts[req->rq_idx]->ltd_exp,
+ &req->rq_oi);
+ err = lov_update_common_set(set, req, rc);
+ if (err) {
+ CERROR("%s: getattr objid "DOSTID" subobj "
+ DOSTID" on OST idx %d: rc = %d\n",
+ exp->exp_obd->obd_name,
+ POSTID(&oinfo->oi_oa->o_oi),
+ POSTID(&req->rq_oi.oi_oa->o_oi),
+ req->rq_idx, err);
+ break;
+ }
+ }
+
+ rc = lov_fini_getattr_set(set);
+ if (err)
+ rc = err;
+ RETURN(rc);
+}
+
+static int lov_getattr_interpret(struct ptlrpc_request_set *rqset,
+ void *data, int rc)
+{
+ struct lov_request_set *lovset = (struct lov_request_set *)data;
+ int err;
+ ENTRY;
+
+ /* don't do attribute merge if this aysnc op failed */
+ if (rc)
+ atomic_set(&lovset->set_completes, 0);
+ err = lov_fini_getattr_set(lovset);
+ RETURN(rc ? rc : err);
+}
+
+static int lov_getattr_async(struct obd_export *exp, struct obd_info *oinfo,
+ struct ptlrpc_request_set *rqset)
+{
+ struct lov_request_set *lovset;
+ struct lov_obd *lov;
+ struct list_head *pos;
+ struct lov_request *req;
+ int rc = 0, err;
+ ENTRY;
+
+ LASSERT(oinfo);
+ ASSERT_LSM_MAGIC(oinfo->oi_md);
+
+ if (!exp || !exp->exp_obd)
+ RETURN(-ENODEV);
+
+ lov = &exp->exp_obd->u.lov;
+
+ rc = lov_prep_getattr_set(exp, oinfo, &lovset);
+ if (rc)
+ RETURN(rc);
+
+ CDEBUG(D_INFO, "objid "DOSTID": %ux%u byte stripes\n",
+ POSTID(&oinfo->oi_md->lsm_oi), oinfo->oi_md->lsm_stripe_count,
+ oinfo->oi_md->lsm_stripe_size);
+
+ list_for_each(pos, &lovset->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ CDEBUG(D_INFO, "objid "DOSTID"[%d] has subobj "DOSTID" at idx"
+ "%u\n", POSTID(&oinfo->oi_oa->o_oi), req->rq_stripe,
+ POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx);
+ rc = obd_getattr_async(lov->lov_tgts[req->rq_idx]->ltd_exp,
+ &req->rq_oi, rqset);
+ if (rc) {
+ CERROR("%s: getattr objid "DOSTID" subobj"
+ DOSTID" on OST idx %d: rc = %d\n",
+ exp->exp_obd->obd_name,
+ POSTID(&oinfo->oi_oa->o_oi),
+ POSTID(&req->rq_oi.oi_oa->o_oi),
+ req->rq_idx, rc);
+ GOTO(out, rc);
+ }
+ }
+
+ if (!list_empty(&rqset->set_requests)) {
+ LASSERT(rc == 0);
+ LASSERT (rqset->set_interpret == NULL);
+ rqset->set_interpret = lov_getattr_interpret;
+ rqset->set_arg = (void *)lovset;
+ RETURN(rc);
+ }
+out:
+ if (rc)
+ atomic_set(&lovset->set_completes, 0);
+ err = lov_fini_getattr_set(lovset);
+ RETURN(rc ? rc : err);
+}
+
+static int lov_setattr(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo, struct obd_trans_info *oti)
+{
+ struct lov_request_set *set;
+ struct lov_obd *lov;
+ struct list_head *pos;
+ struct lov_request *req;
+ int err = 0, rc = 0;
+ ENTRY;
+
+ LASSERT(oinfo);
+ ASSERT_LSM_MAGIC(oinfo->oi_md);
+
+ if (!exp || !exp->exp_obd)
+ RETURN(-ENODEV);
+
+ /* for now, we only expect the following updates here */
+ LASSERT(!(oinfo->oi_oa->o_valid & ~(OBD_MD_FLID | OBD_MD_FLTYPE |
+ OBD_MD_FLMODE | OBD_MD_FLATIME |
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+ OBD_MD_FLFLAGS | OBD_MD_FLSIZE |
+ OBD_MD_FLGROUP | OBD_MD_FLUID |
+ OBD_MD_FLGID | OBD_MD_FLFID |
+ OBD_MD_FLGENER)));
+ lov = &exp->exp_obd->u.lov;
+ rc = lov_prep_setattr_set(exp, oinfo, oti, &set);
+ if (rc)
+ RETURN(rc);
+
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ rc = obd_setattr(env, lov->lov_tgts[req->rq_idx]->ltd_exp,
+ &req->rq_oi, NULL);
+ err = lov_update_setattr_set(set, req, rc);
+ if (err) {
+ CERROR("%s: setattr objid "DOSTID" subobj "
+ DOSTID" on OST idx %d: rc = %d\n",
+ exp->exp_obd->obd_name,
+ POSTID(&set->set_oi->oi_oa->o_oi),
+ POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx,
+ err);
+ if (!rc)
+ rc = err;
+ }
+ }
+ err = lov_fini_setattr_set(set);
+ if (!rc)
+ rc = err;
+ RETURN(rc);
+}
+
+static int lov_setattr_interpret(struct ptlrpc_request_set *rqset,
+ void *data, int rc)
+{
+ struct lov_request_set *lovset = (struct lov_request_set *)data;
+ int err;
+ ENTRY;
+
+ if (rc)
+ atomic_set(&lovset->set_completes, 0);
+ err = lov_fini_setattr_set(lovset);
+ RETURN(rc ? rc : err);
+}
+
+/* If @oti is given, the request goes from MDS and responses from OSTs are not
+ needed. Otherwise, a client is waiting for responses. */
+static int lov_setattr_async(struct obd_export *exp, struct obd_info *oinfo,
+ struct obd_trans_info *oti,
+ struct ptlrpc_request_set *rqset)
+{
+ struct lov_request_set *set;
+ struct lov_request *req;
+ struct list_head *pos;
+ struct lov_obd *lov;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(oinfo);
+ ASSERT_LSM_MAGIC(oinfo->oi_md);
+ if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) {
+ LASSERT(oti);
+ LASSERT(oti->oti_logcookies);
+ }
+
+ if (!exp || !exp->exp_obd)
+ RETURN(-ENODEV);
+
+ lov = &exp->exp_obd->u.lov;
+ rc = lov_prep_setattr_set(exp, oinfo, oti, &set);
+ if (rc)
+ RETURN(rc);
+
+ CDEBUG(D_INFO, "objid "DOSTID": %ux%u byte stripes\n",
+ POSTID(&oinfo->oi_md->lsm_oi),
+ oinfo->oi_md->lsm_stripe_count,
+ oinfo->oi_md->lsm_stripe_size);
+
+ list_for_each(pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
+ oti->oti_logcookies = set->set_cookies + req->rq_stripe;
+
+ CDEBUG(D_INFO, "objid "DOSTID"[%d] has subobj "DOSTID" at idx"
+ "%u\n", POSTID(&oinfo->oi_oa->o_oi), req->rq_stripe,
+ POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx);
+
+ rc = obd_setattr_async(lov->lov_tgts[req->rq_idx]->ltd_exp,
+ &req->rq_oi, oti, rqset);
+ if (rc) {
+ CERROR("error: setattr objid "DOSTID" subobj"
+ DOSTID" on OST idx %d: rc = %d\n",
+ POSTID(&set->set_oi->oi_oa->o_oi),
+ POSTID(&req->rq_oi.oi_oa->o_oi),
+ req->rq_idx, rc);
+ break;
+ }
+ }
+
+ /* If we are not waiting for responses on async requests, return. */
+ if (rc || !rqset || list_empty(&rqset->set_requests)) {
+ int err;
+ if (rc)
+ atomic_set(&set->set_completes, 0);
+ err = lov_fini_setattr_set(set);
+ RETURN(rc ? rc : err);
+ }
+
+ LASSERT(rqset->set_interpret == NULL);
+ rqset->set_interpret = lov_setattr_interpret;
+ rqset->set_arg = (void *)set;
+
+ RETURN(0);
+}
+
+static int lov_punch_interpret(struct ptlrpc_request_set *rqset,
+ void *data, int rc)
+{
+ struct lov_request_set *lovset = (struct lov_request_set *)data;
+ int err;
+ ENTRY;
+
+ if (rc)
+ atomic_set(&lovset->set_completes, 0);
+ err = lov_fini_punch_set(lovset);
+ RETURN(rc ? rc : err);
+}
+
+/* FIXME: maybe we'll just make one node the authoritative attribute node, then
+ * we can send this 'punch' to just the authoritative node and the nodes
+ * that the punch will affect. */
+static int lov_punch(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo, struct obd_trans_info *oti,
+ struct ptlrpc_request_set *rqset)
+{
+ struct lov_request_set *set;
+ struct lov_obd *lov;
+ struct list_head *pos;
+ struct lov_request *req;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(oinfo);
+ ASSERT_LSM_MAGIC(oinfo->oi_md);
+
+ if (!exp || !exp->exp_obd)
+ RETURN(-ENODEV);
+
+ lov = &exp->exp_obd->u.lov;
+ rc = lov_prep_punch_set(exp, oinfo, oti, &set);
+ if (rc)
+ RETURN(rc);
+
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ rc = obd_punch(env, lov->lov_tgts[req->rq_idx]->ltd_exp,
+ &req->rq_oi, NULL, rqset);
+ if (rc) {
+ CERROR("%s: punch objid "DOSTID" subobj "DOSTID
+ " on OST idx %d: rc = %d\n",
+ exp->exp_obd->obd_name,
+ POSTID(&set->set_oi->oi_oa->o_oi),
+ POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx, rc);
+ break;
+ }
+ }
+
+ if (rc || list_empty(&rqset->set_requests)) {
+ int err;
+ err = lov_fini_punch_set(set);
+ RETURN(rc ? rc : err);
+ }
+
+ LASSERT(rqset->set_interpret == NULL);
+ rqset->set_interpret = lov_punch_interpret;
+ rqset->set_arg = (void *)set;
+
+ RETURN(0);
+}
+
+static int lov_sync_interpret(struct ptlrpc_request_set *rqset,
+ void *data, int rc)
+{
+ struct lov_request_set *lovset = data;
+ int err;
+ ENTRY;
+
+ if (rc)
+ atomic_set(&lovset->set_completes, 0);
+ err = lov_fini_sync_set(lovset);
+ RETURN(rc ?: err);
+}
+
+static int lov_sync(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo, obd_off start, obd_off end,
+ struct ptlrpc_request_set *rqset)
+{
+ struct lov_request_set *set = NULL;
+ struct lov_obd *lov;
+ struct list_head *pos;
+ struct lov_request *req;
+ int rc = 0;
+ ENTRY;
+
+ ASSERT_LSM_MAGIC(oinfo->oi_md);
+ LASSERT(rqset != NULL);
+
+ if (!exp->exp_obd)
+ RETURN(-ENODEV);
+
+ lov = &exp->exp_obd->u.lov;
+ rc = lov_prep_sync_set(exp, oinfo, start, end, &set);
+ if (rc)
+ RETURN(rc);
+
+ CDEBUG(D_INFO, "fsync objid "DOSTID" ["LPX64", "LPX64"]\n",
+ POSTID(&set->set_oi->oi_oa->o_oi), start, end);
+
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ rc = obd_sync(env, lov->lov_tgts[req->rq_idx]->ltd_exp,
+ &req->rq_oi, req->rq_oi.oi_policy.l_extent.start,
+ req->rq_oi.oi_policy.l_extent.end, rqset);
+ if (rc) {
+ CERROR("%s: fsync objid "DOSTID" subobj "DOSTID
+ " on OST idx %d: rc = %d\n",
+ exp->exp_obd->obd_name,
+ POSTID(&set->set_oi->oi_oa->o_oi),
+ POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx,
+ rc);
+ break;
+ }
+ }
+
+ /* If we are not waiting for responses on async requests, return. */
+ if (rc || list_empty(&rqset->set_requests)) {
+ int err = lov_fini_sync_set(set);
+
+ RETURN(rc ?: err);
+ }
+
+ LASSERT(rqset->set_interpret == NULL);
+ rqset->set_interpret = lov_sync_interpret;
+ rqset->set_arg = (void *)set;
+
+ RETURN(0);
+}
+
+static int lov_brw_check(struct lov_obd *lov, struct obd_info *lov_oinfo,
+ obd_count oa_bufs, struct brw_page *pga)
+{
+ struct obd_info oinfo = { { { 0 } } };
+ int i, rc = 0;
+
+ oinfo.oi_oa = lov_oinfo->oi_oa;
+
+ /* The caller just wants to know if there's a chance that this
+ * I/O can succeed */
+ for (i = 0; i < oa_bufs; i++) {
+ int stripe = lov_stripe_number(lov_oinfo->oi_md, pga[i].off);
+ int ost = lov_oinfo->oi_md->lsm_oinfo[stripe]->loi_ost_idx;
+ obd_off start, end;
+
+ if (!lov_stripe_intersects(lov_oinfo->oi_md, i, pga[i].off,
+ pga[i].off + pga[i].count - 1,
+ &start, &end))
+ continue;
+
+ if (!lov->lov_tgts[ost] || !lov->lov_tgts[ost]->ltd_active) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", ost);
+ return -EIO;
+ }
+
+ rc = obd_brw(OBD_BRW_CHECK, lov->lov_tgts[ost]->ltd_exp, &oinfo,
+ 1, &pga[i], NULL);
+ if (rc)
+ break;
+ }
+ return rc;
+}
+
+static int lov_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo,
+ obd_count oa_bufs, struct brw_page *pga,
+ struct obd_trans_info *oti)
+{
+ struct lov_request_set *set;
+ struct lov_request *req;
+ struct list_head *pos;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int err, rc = 0;
+ ENTRY;
+
+ ASSERT_LSM_MAGIC(oinfo->oi_md);
+
+ if (cmd == OBD_BRW_CHECK) {
+ rc = lov_brw_check(lov, oinfo, oa_bufs, pga);
+ RETURN(rc);
+ }
+
+ rc = lov_prep_brw_set(exp, oinfo, oa_bufs, pga, oti, &set);
+ if (rc)
+ RETURN(rc);
+
+ list_for_each (pos, &set->set_list) {
+ struct obd_export *sub_exp;
+ struct brw_page *sub_pga;
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ sub_exp = lov->lov_tgts[req->rq_idx]->ltd_exp;
+ sub_pga = set->set_pga + req->rq_pgaidx;
+ rc = obd_brw(cmd, sub_exp, &req->rq_oi, req->rq_oabufs,
+ sub_pga, oti);
+ if (rc)
+ break;
+ lov_update_common_set(set, req, rc);
+ }
+
+ err = lov_fini_brw_set(set);
+ if (!rc)
+ rc = err;
+ RETURN(rc);
+}
+
+static int lov_enqueue_interpret(struct ptlrpc_request_set *rqset,
+ void *data, int rc)
+{
+ struct lov_request_set *lovset = (struct lov_request_set *)data;
+ ENTRY;
+ rc = lov_fini_enqueue_set(lovset, lovset->set_ei->ei_mode, rc, rqset);
+ RETURN(rc);
+}
+
+static int lov_enqueue(struct obd_export *exp, struct obd_info *oinfo,
+ struct ldlm_enqueue_info *einfo,
+ struct ptlrpc_request_set *rqset)
+{
+ ldlm_mode_t mode = einfo->ei_mode;
+ struct lov_request_set *set;
+ struct lov_request *req;
+ struct list_head *pos;
+ struct lov_obd *lov;
+ ldlm_error_t rc;
+ ENTRY;
+
+ LASSERT(oinfo);
+ ASSERT_LSM_MAGIC(oinfo->oi_md);
+ LASSERT(mode == (mode & -mode));
+
+ /* we should never be asked to replay a lock this way. */
+ LASSERT((oinfo->oi_flags & LDLM_FL_REPLAY) == 0);
+
+ if (!exp || !exp->exp_obd)
+ RETURN(-ENODEV);
+
+ lov = &exp->exp_obd->u.lov;
+ rc = lov_prep_enqueue_set(exp, oinfo, einfo, &set);
+ if (rc)
+ RETURN(rc);
+
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ rc = obd_enqueue(lov->lov_tgts[req->rq_idx]->ltd_exp,
+ &req->rq_oi, einfo, rqset);
+ if (rc != ELDLM_OK)
+ GOTO(out, rc);
+ }
+
+ if (rqset && !list_empty(&rqset->set_requests)) {
+ LASSERT(rc == 0);
+ LASSERT(rqset->set_interpret == NULL);
+ rqset->set_interpret = lov_enqueue_interpret;
+ rqset->set_arg = (void *)set;
+ RETURN(rc);
+ }
+out:
+ rc = lov_fini_enqueue_set(set, mode, rc, rqset);
+ RETURN(rc);
+}
+
+static int lov_change_cbdata(struct obd_export *exp,
+ struct lov_stripe_md *lsm, ldlm_iterator_t it,
+ void *data)
+{
+ struct lov_obd *lov;
+ int rc = 0, i;
+ ENTRY;
+
+ ASSERT_LSM_MAGIC(lsm);
+
+ if (!exp || !exp->exp_obd)
+ RETURN(-ENODEV);
+
+ lov = &exp->exp_obd->u.lov;
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ struct lov_stripe_md submd;
+ struct lov_oinfo *loi = lsm->lsm_oinfo[i];
+
+ if (!lov->lov_tgts[loi->loi_ost_idx]) {
+ CDEBUG(D_HA, "lov idx %d NULL \n", loi->loi_ost_idx);
+ continue;
+ }
+
+ submd.lsm_oi = loi->loi_oi;
+ submd.lsm_stripe_count = 0;
+ rc = obd_change_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp,
+ &submd, it, data);
+ }
+ RETURN(rc);
+}
+
+/* find any ldlm lock of the inode in lov
+ * return 0 not find
+ * 1 find one
+ * < 0 error */
+static int lov_find_cbdata(struct obd_export *exp,
+ struct lov_stripe_md *lsm, ldlm_iterator_t it,
+ void *data)
+{
+ struct lov_obd *lov;
+ int rc = 0, i;
+ ENTRY;
+
+ ASSERT_LSM_MAGIC(lsm);
+
+ if (!exp || !exp->exp_obd)
+ RETURN(-ENODEV);
+
+ lov = &exp->exp_obd->u.lov;
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ struct lov_stripe_md submd;
+ struct lov_oinfo *loi = lsm->lsm_oinfo[i];
+
+ if (!lov->lov_tgts[loi->loi_ost_idx]) {
+ CDEBUG(D_HA, "lov idx %d NULL \n", loi->loi_ost_idx);
+ continue;
+ }
+ submd.lsm_oi = loi->loi_oi;
+ submd.lsm_stripe_count = 0;
+ rc = obd_find_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp,
+ &submd, it, data);
+ if (rc != 0)
+ RETURN(rc);
+ }
+ RETURN(rc);
+}
+
+static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm,
+ __u32 mode, struct lustre_handle *lockh)
+{
+ struct lov_request_set *set;
+ struct obd_info oinfo;
+ struct lov_request *req;
+ struct list_head *pos;
+ struct lov_obd *lov;
+ struct lustre_handle *lov_lockhp;
+ int err = 0, rc = 0;
+ ENTRY;
+
+ ASSERT_LSM_MAGIC(lsm);
+
+ if (!exp || !exp->exp_obd)
+ RETURN(-ENODEV);
+
+ LASSERT(lockh);
+ lov = &exp->exp_obd->u.lov;
+ rc = lov_prep_cancel_set(exp, &oinfo, lsm, mode, lockh, &set);
+ if (rc)
+ RETURN(rc);
+
+ list_for_each(pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+ lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
+
+ rc = obd_cancel(lov->lov_tgts[req->rq_idx]->ltd_exp,
+ req->rq_oi.oi_md, mode, lov_lockhp);
+ rc = lov_update_common_set(set, req, rc);
+ if (rc) {
+ CERROR("%s: cancel objid "DOSTID" subobj "
+ DOSTID" on OST idx %d: rc = %d\n",
+ exp->exp_obd->obd_name, POSTID(&lsm->lsm_oi),
+ POSTID(&req->rq_oi.oi_md->lsm_oi),
+ req->rq_idx, rc);
+ err = rc;
+ }
+
+ }
+ lov_fini_cancel_set(set);
+ RETURN(err);
+}
+
+static int lov_cancel_unused(struct obd_export *exp,
+ struct lov_stripe_md *lsm,
+ ldlm_cancel_flags_t flags, void *opaque)
+{
+ struct lov_obd *lov;
+ int rc = 0, i;
+ ENTRY;
+
+ if (!exp || !exp->exp_obd)
+ RETURN(-ENODEV);
+
+ lov = &exp->exp_obd->u.lov;
+ if (lsm == NULL) {
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ int err;
+ if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_exp)
+ continue;
+
+ err = obd_cancel_unused(lov->lov_tgts[i]->ltd_exp, NULL,
+ flags, opaque);
+ if (!rc)
+ rc = err;
+ }
+ RETURN(rc);
+ }
+
+ ASSERT_LSM_MAGIC(lsm);
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ struct lov_stripe_md submd;
+ struct lov_oinfo *loi = lsm->lsm_oinfo[i];
+ int idx = loi->loi_ost_idx;
+ int err;
+
+ if (!lov->lov_tgts[idx]) {
+ CDEBUG(D_HA, "lov idx %d NULL\n", idx);
+ continue;
+ }
+
+ if (!lov->lov_tgts[idx]->ltd_active)
+ CDEBUG(D_HA, "lov idx %d inactive\n", idx);
+
+ submd.lsm_oi = loi->loi_oi;
+ submd.lsm_stripe_count = 0;
+ err = obd_cancel_unused(lov->lov_tgts[idx]->ltd_exp,
+ &submd, flags, opaque);
+ if (err && lov->lov_tgts[idx]->ltd_active) {
+ CERROR("%s: cancel unused objid "DOSTID
+ " subobj "DOSTID" on OST idx %d: rc = %d\n",
+ exp->exp_obd->obd_name, POSTID(&lsm->lsm_oi),
+ POSTID(&loi->loi_oi), idx, err);
+ if (!rc)
+ rc = err;
+ }
+ }
+ RETURN(rc);
+}
+
+int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc)
+{
+ struct lov_request_set *lovset = (struct lov_request_set *)data;
+ int err;
+ ENTRY;
+
+ if (rc)
+ atomic_set(&lovset->set_completes, 0);
+
+ err = lov_fini_statfs_set(lovset);
+ RETURN(rc ? rc : err);
+}
+
+static int lov_statfs_async(struct obd_export *exp, struct obd_info *oinfo,
+ __u64 max_age, struct ptlrpc_request_set *rqset)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct lov_request_set *set;
+ struct lov_request *req;
+ struct list_head *pos;
+ struct lov_obd *lov;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(oinfo != NULL);
+ LASSERT(oinfo->oi_osfs != NULL);
+
+ lov = &obd->u.lov;
+ rc = lov_prep_statfs_set(obd, oinfo, &set);
+ if (rc)
+ RETURN(rc);
+
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+ rc = obd_statfs_async(lov->lov_tgts[req->rq_idx]->ltd_exp,
+ &req->rq_oi, max_age, rqset);
+ if (rc)
+ break;
+ }
+
+ if (rc || list_empty(&rqset->set_requests)) {
+ int err;
+ if (rc)
+ atomic_set(&set->set_completes, 0);
+ err = lov_fini_statfs_set(set);
+ RETURN(rc ? rc : err);
+ }
+
+ LASSERT(rqset->set_interpret == NULL);
+ rqset->set_interpret = lov_statfs_interpret;
+ rqset->set_arg = (void *)set;
+ RETURN(0);
+}
+
+static int lov_statfs(const struct lu_env *env, struct obd_export *exp,
+ struct obd_statfs *osfs, __u64 max_age, __u32 flags)
+{
+ struct ptlrpc_request_set *set = NULL;
+ struct obd_info oinfo = { { { 0 } } };
+ int rc = 0;
+ ENTRY;
+
+
+ /* for obdclass we forbid using obd_statfs_rqset, but prefer using async
+ * statfs requests */
+ set = ptlrpc_prep_set();
+ if (set == NULL)
+ RETURN(-ENOMEM);
+
+ oinfo.oi_osfs = osfs;
+ oinfo.oi_flags = flags;
+ rc = lov_statfs_async(exp, &oinfo, max_age, set);
+ if (rc == 0)
+ rc = ptlrpc_set_wait(set);
+ ptlrpc_set_destroy(set);
+
+ RETURN(rc);
+}
+
+static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
+ void *karg, void *uarg)
+{
+ struct obd_device *obddev = class_exp2obd(exp);
+ struct lov_obd *lov = &obddev->u.lov;
+ int i = 0, rc = 0, count = lov->desc.ld_tgt_count;
+ struct obd_uuid *uuidp;
+ ENTRY;
+
+ switch (cmd) {
+ case IOC_OBD_STATFS: {
+ struct obd_ioctl_data *data = karg;
+ struct obd_device *osc_obd;
+ struct obd_statfs stat_buf = {0};
+ __u32 index;
+ __u32 flags;
+
+ memcpy(&index, data->ioc_inlbuf2, sizeof(__u32));
+ if ((index >= count))
+ RETURN(-ENODEV);
+
+ if (!lov->lov_tgts[index])
+ /* Try again with the next index */
+ RETURN(-EAGAIN);
+ if (!lov->lov_tgts[index]->ltd_active)
+ RETURN(-ENODATA);
+
+ osc_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp);
+ if (!osc_obd)
+ RETURN(-EINVAL);
+
+ /* copy UUID */
+ if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(osc_obd),
+ min((int) data->ioc_plen2,
+ (int) sizeof(struct obd_uuid))))
+ RETURN(-EFAULT);
+
+ flags = uarg ? *(__u32*)uarg : 0;
+ /* got statfs data */
+ rc = obd_statfs(NULL, lov->lov_tgts[index]->ltd_exp, &stat_buf,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ flags);
+ if (rc)
+ RETURN(rc);
+ if (copy_to_user(data->ioc_pbuf1, &stat_buf,
+ min((int) data->ioc_plen1,
+ (int) sizeof(stat_buf))))
+ RETURN(-EFAULT);
+ break;
+ }
+ case OBD_IOC_LOV_GET_CONFIG: {
+ struct obd_ioctl_data *data;
+ struct lov_desc *desc;
+ char *buf = NULL;
+ __u32 *genp;
+
+ len = 0;
+ if (obd_ioctl_getdata(&buf, &len, (void *)uarg))
+ RETURN(-EINVAL);
+
+ data = (struct obd_ioctl_data *)buf;
+
+ if (sizeof(*desc) > data->ioc_inllen1) {
+ obd_ioctl_freedata(buf, len);
+ RETURN(-EINVAL);
+ }
+
+ if (sizeof(uuidp->uuid) * count > data->ioc_inllen2) {
+ obd_ioctl_freedata(buf, len);
+ RETURN(-EINVAL);
+ }
+
+ if (sizeof(__u32) * count > data->ioc_inllen3) {
+ obd_ioctl_freedata(buf, len);
+ RETURN(-EINVAL);
+ }
+
+ desc = (struct lov_desc *)data->ioc_inlbuf1;
+ memcpy(desc, &(lov->desc), sizeof(*desc));
+
+ uuidp = (struct obd_uuid *)data->ioc_inlbuf2;
+ genp = (__u32 *)data->ioc_inlbuf3;
+ /* the uuid will be empty for deleted OSTs */
+ for (i = 0; i < count; i++, uuidp++, genp++) {
+ if (!lov->lov_tgts[i])
+ continue;
+ *uuidp = lov->lov_tgts[i]->ltd_uuid;
+ *genp = lov->lov_tgts[i]->ltd_gen;
+ }
+
+ if (copy_to_user((void *)uarg, buf, len))
+ rc = -EFAULT;
+ obd_ioctl_freedata(buf, len);
+ break;
+ }
+ case LL_IOC_LOV_SETSTRIPE:
+ rc = lov_setstripe(exp, len, karg, uarg);
+ break;
+ case LL_IOC_LOV_GETSTRIPE:
+ rc = lov_getstripe(exp, karg, uarg);
+ break;
+ case LL_IOC_LOV_SETEA:
+ rc = lov_setea(exp, karg, uarg);
+ break;
+ case OBD_IOC_QUOTACTL: {
+ struct if_quotactl *qctl = karg;
+ struct lov_tgt_desc *tgt = NULL;
+ struct obd_quotactl *oqctl;
+
+ if (qctl->qc_valid == QC_OSTIDX) {
+ if (qctl->qc_idx < 0 || count <= qctl->qc_idx)
+ RETURN(-EINVAL);
+
+ tgt = lov->lov_tgts[qctl->qc_idx];
+ if (!tgt || !tgt->ltd_exp)
+ RETURN(-EINVAL);
+ } else if (qctl->qc_valid == QC_UUID) {
+ for (i = 0; i < count; i++) {
+ tgt = lov->lov_tgts[i];
+ if (!tgt ||
+ !obd_uuid_equals(&tgt->ltd_uuid,
+ &qctl->obd_uuid))
+ continue;
+
+ if (tgt->ltd_exp == NULL)
+ RETURN(-EINVAL);
+
+ break;
+ }
+ } else {
+ RETURN(-EINVAL);
+ }
+
+ if (i >= count)
+ RETURN(-EAGAIN);
+
+ LASSERT(tgt && tgt->ltd_exp);
+ OBD_ALLOC_PTR(oqctl);
+ if (!oqctl)
+ RETURN(-ENOMEM);
+
+ QCTL_COPY(oqctl, qctl);
+ rc = obd_quotactl(tgt->ltd_exp, oqctl);
+ if (rc == 0) {
+ QCTL_COPY(qctl, oqctl);
+ qctl->qc_valid = QC_OSTIDX;
+ qctl->obd_uuid = tgt->ltd_uuid;
+ }
+ OBD_FREE_PTR(oqctl);
+ break;
+ }
+ default: {
+ int set = 0;
+
+ if (count == 0)
+ RETURN(-ENOTTY);
+
+ for (i = 0; i < count; i++) {
+ int err;
+ struct obd_device *osc_obd;
+
+ /* OST was disconnected */
+ if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_exp)
+ continue;
+
+ /* ll_umount_begin() sets force flag but for lov, not
+ * osc. Let's pass it through */
+ osc_obd = class_exp2obd(lov->lov_tgts[i]->ltd_exp);
+ osc_obd->obd_force = obddev->obd_force;
+ err = obd_iocontrol(cmd, lov->lov_tgts[i]->ltd_exp,
+ len, karg, uarg);
+ if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) {
+ RETURN(err);
+ } else if (err) {
+ if (lov->lov_tgts[i]->ltd_active) {
+ CDEBUG(err == -ENOTTY ?
+ D_IOCTL : D_WARNING,
+ "iocontrol OSC %s on OST "
+ "idx %d cmd %x: err = %d\n",
+ lov_uuid2str(lov, i),
+ i, cmd, err);
+ if (!rc)
+ rc = err;
+ }
+ } else {
+ set = 1;
+ }
+ }
+ if (!set && !rc)
+ rc = -EIO;
+ }
+ }
+
+ RETURN(rc);
+}
+
+#define FIEMAP_BUFFER_SIZE 4096
+
+/**
+ * Non-zero fe_logical indicates that this is a continuation FIEMAP
+ * call. The local end offset and the device are sent in the first
+ * fm_extent. This function calculates the stripe number from the index.
+ * This function returns a stripe_no on which mapping is to be restarted.
+ *
+ * This function returns fm_end_offset which is the in-OST offset at which
+ * mapping should be restarted. If fm_end_offset=0 is returned then caller
+ * will re-calculate proper offset in next stripe.
+ * Note that the first extent is passed to lov_get_info via the value field.
+ *
+ * \param fiemap fiemap request header
+ * \param lsm striping information for the file
+ * \param fm_start logical start of mapping
+ * \param fm_end logical end of mapping
+ * \param start_stripe starting stripe will be returned in this
+ */
+obd_size fiemap_calc_fm_end_offset(struct ll_user_fiemap *fiemap,
+ struct lov_stripe_md *lsm, obd_size fm_start,
+ obd_size fm_end, int *start_stripe)
+{
+ obd_size local_end = fiemap->fm_extents[0].fe_logical;
+ obd_off lun_start, lun_end;
+ obd_size fm_end_offset;
+ int stripe_no = -1, i;
+
+ if (fiemap->fm_extent_count == 0 ||
+ fiemap->fm_extents[0].fe_logical == 0)
+ return 0;
+
+ /* Find out stripe_no from ost_index saved in the fe_device */
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ if (lsm->lsm_oinfo[i]->loi_ost_idx ==
+ fiemap->fm_extents[0].fe_device) {
+ stripe_no = i;
+ break;
+ }
+ }
+ if (stripe_no == -1)
+ return -EINVAL;
+
+ /* If we have finished mapping on previous device, shift logical
+ * offset to start of next device */
+ if ((lov_stripe_intersects(lsm, stripe_no, fm_start, fm_end,
+ &lun_start, &lun_end)) != 0 &&
+ local_end < lun_end) {
+ fm_end_offset = local_end;
+ *start_stripe = stripe_no;
+ } else {
+ /* This is a special value to indicate that caller should
+ * calculate offset in next stripe. */
+ fm_end_offset = 0;
+ *start_stripe = (stripe_no + 1) % lsm->lsm_stripe_count;
+ }
+
+ return fm_end_offset;
+}
+
+/**
+ * We calculate on which OST the mapping will end. If the length of mapping
+ * is greater than (stripe_size * stripe_count) then the last_stripe will
+ * will be one just before start_stripe. Else we check if the mapping
+ * intersects each OST and find last_stripe.
+ * This function returns the last_stripe and also sets the stripe_count
+ * over which the mapping is spread
+ *
+ * \param lsm striping information for the file
+ * \param fm_start logical start of mapping
+ * \param fm_end logical end of mapping
+ * \param start_stripe starting stripe of the mapping
+ * \param stripe_count the number of stripes across which to map is returned
+ *
+ * \retval last_stripe return the last stripe of the mapping
+ */
+int fiemap_calc_last_stripe(struct lov_stripe_md *lsm, obd_size fm_start,
+ obd_size fm_end, int start_stripe,
+ int *stripe_count)
+{
+ int last_stripe;
+ obd_off obd_start, obd_end;
+ int i, j;
+
+ if (fm_end - fm_start > lsm->lsm_stripe_size * lsm->lsm_stripe_count) {
+ last_stripe = (start_stripe < 1 ? lsm->lsm_stripe_count - 1 :
+ start_stripe - 1);
+ *stripe_count = lsm->lsm_stripe_count;
+ } else {
+ for (j = 0, i = start_stripe; j < lsm->lsm_stripe_count;
+ i = (i + 1) % lsm->lsm_stripe_count, j++) {
+ if ((lov_stripe_intersects(lsm, i, fm_start, fm_end,
+ &obd_start, &obd_end)) == 0)
+ break;
+ }
+ *stripe_count = j;
+ last_stripe = (start_stripe + j - 1) %lsm->lsm_stripe_count;
+ }
+
+ return last_stripe;
+}
+
+/**
+ * Set fe_device and copy extents from local buffer into main return buffer.
+ *
+ * \param fiemap fiemap request header
+ * \param lcl_fm_ext array of local fiemap extents to be copied
+ * \param ost_index OST index to be written into the fm_device field for each
+ extent
+ * \param ext_count number of extents to be copied
+ * \param current_extent where to start copying in main extent array
+ */
+void fiemap_prepare_and_copy_exts(struct ll_user_fiemap *fiemap,
+ struct ll_fiemap_extent *lcl_fm_ext,
+ int ost_index, unsigned int ext_count,
+ int current_extent)
+{
+ char *to;
+ int ext;
+
+ for (ext = 0; ext < ext_count; ext++) {
+ lcl_fm_ext[ext].fe_device = ost_index;
+ lcl_fm_ext[ext].fe_flags |= FIEMAP_EXTENT_NET;
+ }
+
+ /* Copy fm_extent's from fm_local to return buffer */
+ to = (char *)fiemap + fiemap_count_to_size(current_extent);
+ memcpy(to, lcl_fm_ext, ext_count * sizeof(struct ll_fiemap_extent));
+}
+
+/**
+ * Break down the FIEMAP request and send appropriate calls to individual OSTs.
+ * This also handles the restarting of FIEMAP calls in case mapping overflows
+ * the available number of extents in single call.
+ */
+static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
+ __u32 *vallen, void *val, struct lov_stripe_md *lsm)
+{
+ struct ll_fiemap_info_key *fm_key = key;
+ struct ll_user_fiemap *fiemap = val;
+ struct ll_user_fiemap *fm_local = NULL;
+ struct ll_fiemap_extent *lcl_fm_ext;
+ int count_local;
+ unsigned int get_num_extents = 0;
+ int ost_index = 0, actual_start_stripe, start_stripe;
+ obd_size fm_start, fm_end, fm_length, fm_end_offset;
+ obd_size curr_loc;
+ int current_extent = 0, rc = 0, i;
+ int ost_eof = 0; /* EOF for object */
+ int ost_done = 0; /* done with required mapping for this OST? */
+ int last_stripe;
+ int cur_stripe = 0, cur_stripe_wrap = 0, stripe_count;
+ unsigned int buffer_size = FIEMAP_BUFFER_SIZE;
+
+ if (lsm == NULL)
+ GOTO(out, rc = 0);
+
+ if (fiemap_count_to_size(fm_key->fiemap.fm_extent_count) < buffer_size)
+ buffer_size = fiemap_count_to_size(fm_key->fiemap.fm_extent_count);
+
+ OBD_ALLOC_LARGE(fm_local, buffer_size);
+ if (fm_local == NULL)
+ GOTO(out, rc = -ENOMEM);
+ lcl_fm_ext = &fm_local->fm_extents[0];
+
+ count_local = fiemap_size_to_count(buffer_size);
+
+ memcpy(fiemap, &fm_key->fiemap, sizeof(*fiemap));
+ fm_start = fiemap->fm_start;
+ fm_length = fiemap->fm_length;
+ /* Calculate start stripe, last stripe and length of mapping */
+ actual_start_stripe = start_stripe = lov_stripe_number(lsm, fm_start);
+ fm_end = (fm_length == ~0ULL ? fm_key->oa.o_size :
+ fm_start + fm_length - 1);
+ /* If fm_length != ~0ULL but fm_start+fm_length-1 exceeds file size */
+ if (fm_end > fm_key->oa.o_size)
+ fm_end = fm_key->oa.o_size;
+
+ last_stripe = fiemap_calc_last_stripe(lsm, fm_start, fm_end,
+ actual_start_stripe, &stripe_count);
+
+ fm_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fm_start,
+ fm_end, &start_stripe);
+ if (fm_end_offset == -EINVAL)
+ GOTO(out, rc = -EINVAL);
+
+ if (fiemap->fm_extent_count == 0) {
+ get_num_extents = 1;
+ count_local = 0;
+ }
+
+ /* Check each stripe */
+ for (cur_stripe = start_stripe, i = 0; i < stripe_count;
+ i++, cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) {
+ obd_size req_fm_len; /* Stores length of required mapping */
+ obd_size len_mapped_single_call;
+ obd_off lun_start, lun_end, obd_object_end;
+ unsigned int ext_count;
+
+ cur_stripe_wrap = cur_stripe;
+
+ /* Find out range of mapping on this stripe */
+ if ((lov_stripe_intersects(lsm, cur_stripe, fm_start, fm_end,
+ &lun_start, &obd_object_end)) == 0)
+ continue;
+
+ /* If this is a continuation FIEMAP call and we are on
+ * starting stripe then lun_start needs to be set to
+ * fm_end_offset */
+ if (fm_end_offset != 0 && cur_stripe == start_stripe)
+ lun_start = fm_end_offset;
+
+ if (fm_length != ~0ULL) {
+ /* Handle fm_start + fm_length overflow */
+ if (fm_start + fm_length < fm_start)
+ fm_length = ~0ULL - fm_start;
+ lun_end = lov_size_to_stripe(lsm, fm_start + fm_length,
+ cur_stripe);
+ } else {
+ lun_end = ~0ULL;
+ }
+
+ if (lun_start == lun_end)
+ continue;
+
+ req_fm_len = obd_object_end - lun_start;
+ fm_local->fm_length = 0;
+ len_mapped_single_call = 0;
+
+ /* If the output buffer is very large and the objects have many
+ * extents we may need to loop on a single OST repeatedly */
+ ost_eof = 0;
+ ost_done = 0;
+ do {
+ if (get_num_extents == 0) {
+ /* Don't get too many extents. */
+ if (current_extent + count_local >
+ fiemap->fm_extent_count)
+ count_local = fiemap->fm_extent_count -
+ current_extent;
+ }
+
+ lun_start += len_mapped_single_call;
+ fm_local->fm_length = req_fm_len - len_mapped_single_call;
+ req_fm_len = fm_local->fm_length;
+ fm_local->fm_extent_count = count_local;
+ fm_local->fm_mapped_extents = 0;
+ fm_local->fm_flags = fiemap->fm_flags;
+
+ fm_key->oa.o_oi = lsm->lsm_oinfo[cur_stripe]->loi_oi;
+ ost_index = lsm->lsm_oinfo[cur_stripe]->loi_ost_idx;
+
+ if (ost_index < 0 || ost_index >=lov->desc.ld_tgt_count)
+ GOTO(out, rc = -EINVAL);
+
+ /* If OST is inactive, return extent with UNKNOWN flag */
+ if (!lov->lov_tgts[ost_index]->ltd_active) {
+ fm_local->fm_flags |= FIEMAP_EXTENT_LAST;
+ fm_local->fm_mapped_extents = 1;
+
+ lcl_fm_ext[0].fe_logical = lun_start;
+ lcl_fm_ext[0].fe_length = obd_object_end -
+ lun_start;
+ lcl_fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN;
+
+ goto inactive_tgt;
+ }
+
+ fm_local->fm_start = lun_start;
+ fm_local->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
+ memcpy(&fm_key->fiemap, fm_local, sizeof(*fm_local));
+ *vallen=fiemap_count_to_size(fm_local->fm_extent_count);
+ rc = obd_get_info(NULL,
+ lov->lov_tgts[ost_index]->ltd_exp,
+ keylen, key, vallen, fm_local, lsm);
+ if (rc != 0)
+ GOTO(out, rc);
+
+inactive_tgt:
+ ext_count = fm_local->fm_mapped_extents;
+ if (ext_count == 0) {
+ ost_done = 1;
+ /* If last stripe has hole at the end,
+ * then we need to return */
+ if (cur_stripe_wrap == last_stripe) {
+ fiemap->fm_mapped_extents = 0;
+ goto finish;
+ }
+ break;
+ }
+
+ /* If we just need num of extents then go to next device */
+ if (get_num_extents) {
+ current_extent += ext_count;
+ break;
+ }
+
+ len_mapped_single_call = lcl_fm_ext[ext_count-1].fe_logical -
+ lun_start + lcl_fm_ext[ext_count - 1].fe_length;
+
+ /* Have we finished mapping on this device? */
+ if (req_fm_len <= len_mapped_single_call)
+ ost_done = 1;
+
+ /* Clear the EXTENT_LAST flag which can be present on
+ * last extent */
+ if (lcl_fm_ext[ext_count-1].fe_flags & FIEMAP_EXTENT_LAST)
+ lcl_fm_ext[ext_count - 1].fe_flags &=
+ ~FIEMAP_EXTENT_LAST;
+
+ curr_loc = lov_stripe_size(lsm,
+ lcl_fm_ext[ext_count - 1].fe_logical+
+ lcl_fm_ext[ext_count - 1].fe_length,
+ cur_stripe);
+ if (curr_loc >= fm_key->oa.o_size)
+ ost_eof = 1;
+
+ fiemap_prepare_and_copy_exts(fiemap, lcl_fm_ext,
+ ost_index, ext_count,
+ current_extent);
+
+ current_extent += ext_count;
+
+ /* Ran out of available extents? */
+ if (current_extent >= fiemap->fm_extent_count)
+ goto finish;
+ } while (ost_done == 0 && ost_eof == 0);
+
+ if (cur_stripe_wrap == last_stripe)
+ goto finish;
+ }
+
+finish:
+ /* Indicate that we are returning device offsets unless file just has
+ * single stripe */
+ if (lsm->lsm_stripe_count > 1)
+ fiemap->fm_flags |= FIEMAP_FLAG_DEVICE_ORDER;
+
+ if (get_num_extents)
+ goto skip_last_device_calc;
+
+ /* Check if we have reached the last stripe and whether mapping for that
+ * stripe is done. */
+ if (cur_stripe_wrap == last_stripe) {
+ if (ost_done || ost_eof)
+ fiemap->fm_extents[current_extent - 1].fe_flags |=
+ FIEMAP_EXTENT_LAST;
+ }
+
+skip_last_device_calc:
+ fiemap->fm_mapped_extents = current_extent;
+
+out:
+ OBD_FREE_LARGE(fm_local, buffer_size);
+ return rc;
+}
+
+static int lov_get_info(const struct lu_env *env, struct obd_export *exp,
+ __u32 keylen, void *key, __u32 *vallen, void *val,
+ struct lov_stripe_md *lsm)
+{
+ struct obd_device *obddev = class_exp2obd(exp);
+ struct lov_obd *lov = &obddev->u.lov;
+ int i, rc;
+ ENTRY;
+
+ if (!vallen || !val)
+ RETURN(-EFAULT);
+
+ obd_getref(obddev);
+
+ if (KEY_IS(KEY_LOCK_TO_STRIPE)) {
+ struct {
+ char name[16];
+ struct ldlm_lock *lock;
+ } *data = key;
+ struct ldlm_res_id *res_id = &data->lock->l_resource->lr_name;
+ struct lov_oinfo *loi;
+ __u32 *stripe = val;
+
+ if (*vallen < sizeof(*stripe))
+ GOTO(out, rc = -EFAULT);
+ *vallen = sizeof(*stripe);
+
+ /* XXX This is another one of those bits that will need to
+ * change if we ever actually support nested LOVs. It uses
+ * the lock's export to find out which stripe it is. */
+ /* XXX - it's assumed all the locks for deleted OSTs have
+ * been cancelled. Also, the export for deleted OSTs will
+ * be NULL and won't match the lock's export. */
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ loi = lsm->lsm_oinfo[i];
+ if (!lov->lov_tgts[loi->loi_ost_idx])
+ continue;
+ if (lov->lov_tgts[loi->loi_ost_idx]->ltd_exp ==
+ data->lock->l_conn_export &&
+ ostid_res_name_eq(&loi->loi_oi, res_id)) {
+ *stripe = i;
+ GOTO(out, rc = 0);
+ }
+ }
+ LDLM_ERROR(data->lock, "lock on inode without such object");
+ dump_lsm(D_ERROR, lsm);
+ GOTO(out, rc = -ENXIO);
+ } else if (KEY_IS(KEY_LAST_ID)) {
+ struct obd_id_info *info = val;
+ __u32 size = sizeof(obd_id);
+ struct lov_tgt_desc *tgt;
+
+ LASSERT(*vallen == sizeof(struct obd_id_info));
+ tgt = lov->lov_tgts[info->idx];
+
+ if (!tgt || !tgt->ltd_active)
+ GOTO(out, rc = -ESRCH);
+
+ rc = obd_get_info(env, tgt->ltd_exp, keylen, key,
+ &size, info->data, NULL);
+ GOTO(out, rc = 0);
+ } else if (KEY_IS(KEY_LOVDESC)) {
+ struct lov_desc *desc_ret = val;
+ *desc_ret = lov->desc;
+
+ GOTO(out, rc = 0);
+ } else if (KEY_IS(KEY_FIEMAP)) {
+ rc = lov_fiemap(lov, keylen, key, vallen, val, lsm);
+ GOTO(out, rc);
+ } else if (KEY_IS(KEY_CONNECT_FLAG)) {
+ struct lov_tgt_desc *tgt;
+ __u64 ost_idx = *((__u64*)val);
+
+ LASSERT(*vallen == sizeof(__u64));
+ LASSERT(ost_idx < lov->desc.ld_tgt_count);
+ tgt = lov->lov_tgts[ost_idx];
+
+ if (!tgt || !tgt->ltd_exp)
+ GOTO(out, rc = -ESRCH);
+
+ *((__u64 *)val) = exp_connect_flags(tgt->ltd_exp);
+ GOTO(out, rc = 0);
+ } else if (KEY_IS(KEY_TGT_COUNT)) {
+ *((int *)val) = lov->desc.ld_tgt_count;
+ GOTO(out, rc = 0);
+ }
+
+ rc = -EINVAL;
+
+out:
+ obd_putref(obddev);
+ RETURN(rc);
+}
+
+static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
+ obd_count keylen, void *key, obd_count vallen,
+ void *val, struct ptlrpc_request_set *set)
+{
+ struct obd_device *obddev = class_exp2obd(exp);
+ struct lov_obd *lov = &obddev->u.lov;
+ obd_count count;
+ int i, rc = 0, err;
+ struct lov_tgt_desc *tgt;
+ unsigned incr, check_uuid,
+ do_inactive, no_set;
+ unsigned next_id = 0, mds_con = 0, capa = 0;
+ ENTRY;
+
+ incr = check_uuid = do_inactive = no_set = 0;
+ if (set == NULL) {
+ no_set = 1;
+ set = ptlrpc_prep_set();
+ if (!set)
+ RETURN(-ENOMEM);
+ }
+
+ obd_getref(obddev);
+ count = lov->desc.ld_tgt_count;
+
+ if (KEY_IS(KEY_NEXT_ID)) {
+ count = vallen / sizeof(struct obd_id_info);
+ vallen = sizeof(obd_id);
+ incr = sizeof(struct obd_id_info);
+ do_inactive = 1;
+ next_id = 1;
+ } else if (KEY_IS(KEY_CHECKSUM)) {
+ do_inactive = 1;
+ } else if (KEY_IS(KEY_EVICT_BY_NID)) {
+ /* use defaults: do_inactive = incr = 0; */
+ } else if (KEY_IS(KEY_MDS_CONN)) {
+ mds_con = 1;
+ } else if (KEY_IS(KEY_CAPA_KEY)) {
+ capa = 1;
+ } else if (KEY_IS(KEY_CACHE_SET)) {
+ LASSERT(lov->lov_cache == NULL);
+ lov->lov_cache = val;
+ do_inactive = 1;
+ }
+
+ for (i = 0; i < count; i++, val = (char *)val + incr) {
+ if (next_id) {
+ tgt = lov->lov_tgts[((struct obd_id_info*)val)->idx];
+ } else {
+ tgt = lov->lov_tgts[i];
+ }
+ /* OST was disconnected */
+ if (!tgt || !tgt->ltd_exp)
+ continue;
+
+ /* OST is inactive and we don't want inactive OSCs */
+ if (!tgt->ltd_active && !do_inactive)
+ continue;
+
+ if (mds_con) {
+ struct mds_group_info *mgi;
+
+ LASSERT(vallen == sizeof(*mgi));
+ mgi = (struct mds_group_info *)val;
+
+ /* Only want a specific OSC */
+ if (mgi->uuid && !obd_uuid_equals(mgi->uuid,
+ &tgt->ltd_uuid))
+ continue;
+
+ err = obd_set_info_async(env, tgt->ltd_exp,
+ keylen, key, sizeof(int),
+ &mgi->group, set);
+ } else if (next_id) {
+ err = obd_set_info_async(env, tgt->ltd_exp,
+ keylen, key, vallen,
+ ((struct obd_id_info*)val)->data, set);
+ } else if (capa) {
+ struct mds_capa_info *info = (struct mds_capa_info*)val;
+
+ LASSERT(vallen == sizeof(*info));
+
+ /* Only want a specific OSC */
+ if (info->uuid &&
+ !obd_uuid_equals(info->uuid, &tgt->ltd_uuid))
+ continue;
+
+ err = obd_set_info_async(env, tgt->ltd_exp, keylen,
+ key, sizeof(*info->capa),
+ info->capa, set);
+ } else {
+ /* Only want a specific OSC */
+ if (check_uuid &&
+ !obd_uuid_equals(val, &tgt->ltd_uuid))
+ continue;
+
+ err = obd_set_info_async(env, tgt->ltd_exp,
+ keylen, key, vallen, val, set);
+ }
+
+ if (!rc)
+ rc = err;
+ }
+
+ obd_putref(obddev);
+ if (no_set) {
+ err = ptlrpc_set_wait(set);
+ if (!rc)
+ rc = err;
+ ptlrpc_set_destroy(set);
+ }
+ RETURN(rc);
+}
+
+static int lov_extent_calc(struct obd_export *exp, struct lov_stripe_md *lsm,
+ int cmd, __u64 *offset)
+{
+ __u32 ssize = lsm->lsm_stripe_size;
+ __u64 start;
+
+ start = *offset;
+ lov_do_div64(start, ssize);
+ start = start * ssize;
+
+ CDEBUG(D_DLMTRACE, "offset "LPU64", stripe %u, start "LPU64
+ ", end "LPU64"\n", *offset, ssize, start,
+ start + ssize - 1);
+ if (cmd == OBD_CALC_STRIPE_END) {
+ *offset = start + ssize - 1;
+ } else if (cmd == OBD_CALC_STRIPE_START) {
+ *offset = start;
+ } else {
+ LBUG();
+ }
+
+ RETURN(0);
+}
+
+void lov_stripe_lock(struct lov_stripe_md *md)
+{
+ LASSERT(md->lsm_lock_owner != current_pid());
+ spin_lock(&md->lsm_lock);
+ LASSERT(md->lsm_lock_owner == 0);
+ md->lsm_lock_owner = current_pid();
+}
+EXPORT_SYMBOL(lov_stripe_lock);
+
+void lov_stripe_unlock(struct lov_stripe_md *md)
+{
+ LASSERT(md->lsm_lock_owner == current_pid());
+ md->lsm_lock_owner = 0;
+ spin_unlock(&md->lsm_lock);
+}
+EXPORT_SYMBOL(lov_stripe_unlock);
+
+static int lov_quotactl(struct obd_device *obd, struct obd_export *exp,
+ struct obd_quotactl *oqctl)
+{
+ struct lov_obd *lov = &obd->u.lov;
+ struct lov_tgt_desc *tgt;
+ __u64 curspace = 0;
+ __u64 bhardlimit = 0;
+ int i, rc = 0;
+ ENTRY;
+
+ if (oqctl->qc_cmd != LUSTRE_Q_QUOTAON &&
+ oqctl->qc_cmd != LUSTRE_Q_QUOTAOFF &&
+ oqctl->qc_cmd != Q_GETOQUOTA &&
+ oqctl->qc_cmd != Q_INITQUOTA &&
+ oqctl->qc_cmd != LUSTRE_Q_SETQUOTA &&
+ oqctl->qc_cmd != Q_FINVALIDATE) {
+ CERROR("bad quota opc %x for lov obd", oqctl->qc_cmd);
+ RETURN(-EFAULT);
+ }
+
+ /* for lov tgt */
+ obd_getref(obd);
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ int err;
+
+ tgt = lov->lov_tgts[i];
+
+ if (!tgt)
+ continue;
+
+ if (!tgt->ltd_active || tgt->ltd_reap) {
+ if (oqctl->qc_cmd == Q_GETOQUOTA &&
+ lov->lov_tgts[i]->ltd_activate) {
+ rc = -EREMOTEIO;
+ CERROR("ost %d is inactive\n", i);
+ } else {
+ CDEBUG(D_HA, "ost %d is inactive\n", i);
+ }
+ continue;
+ }
+
+ err = obd_quotactl(tgt->ltd_exp, oqctl);
+ if (err) {
+ if (tgt->ltd_active && !rc)
+ rc = err;
+ continue;
+ }
+
+ if (oqctl->qc_cmd == Q_GETOQUOTA) {
+ curspace += oqctl->qc_dqblk.dqb_curspace;
+ bhardlimit += oqctl->qc_dqblk.dqb_bhardlimit;
+ }
+ }
+ obd_putref(obd);
+
+ if (oqctl->qc_cmd == Q_GETOQUOTA) {
+ oqctl->qc_dqblk.dqb_curspace = curspace;
+ oqctl->qc_dqblk.dqb_bhardlimit = bhardlimit;
+ }
+ RETURN(rc);
+}
+
+static int lov_quotacheck(struct obd_device *obd, struct obd_export *exp,
+ struct obd_quotactl *oqctl)
+{
+ struct lov_obd *lov = &obd->u.lov;
+ int i, rc = 0;
+ ENTRY;
+
+ obd_getref(obd);
+
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ if (!lov->lov_tgts[i])
+ continue;
+
+ /* Skip quota check on the administratively disabled OSTs. */
+ if (!lov->lov_tgts[i]->ltd_activate) {
+ CWARN("lov idx %d was administratively disabled, "
+ "skip quotacheck on it.\n", i);
+ continue;
+ }
+
+ if (!lov->lov_tgts[i]->ltd_active) {
+ CERROR("lov idx %d inactive\n", i);
+ rc = -EIO;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ int err;
+
+ if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_activate)
+ continue;
+
+ err = obd_quotacheck(lov->lov_tgts[i]->ltd_exp, oqctl);
+ if (err && !rc)
+ rc = err;
+ }
+
+out:
+ obd_putref(obd);
+
+ RETURN(rc);
+}
+
+struct obd_ops lov_obd_ops = {
+ .o_owner = THIS_MODULE,
+ .o_setup = lov_setup,
+ .o_precleanup = lov_precleanup,
+ .o_cleanup = lov_cleanup,
+ //.o_process_config = lov_process_config,
+ .o_connect = lov_connect,
+ .o_disconnect = lov_disconnect,
+ .o_statfs = lov_statfs,
+ .o_statfs_async = lov_statfs_async,
+ .o_packmd = lov_packmd,
+ .o_unpackmd = lov_unpackmd,
+ .o_create = lov_create,
+ .o_destroy = lov_destroy,
+ .o_getattr = lov_getattr,
+ .o_getattr_async = lov_getattr_async,
+ .o_setattr = lov_setattr,
+ .o_setattr_async = lov_setattr_async,
+ .o_brw = lov_brw,
+ .o_merge_lvb = lov_merge_lvb,
+ .o_adjust_kms = lov_adjust_kms,
+ .o_punch = lov_punch,
+ .o_sync = lov_sync,
+ .o_enqueue = lov_enqueue,
+ .o_change_cbdata = lov_change_cbdata,
+ .o_find_cbdata = lov_find_cbdata,
+ .o_cancel = lov_cancel,
+ .o_cancel_unused = lov_cancel_unused,
+ .o_iocontrol = lov_iocontrol,
+ .o_get_info = lov_get_info,
+ .o_set_info_async = lov_set_info_async,
+ .o_extent_calc = lov_extent_calc,
+ .o_llog_init = lov_llog_init,
+ .o_llog_finish = lov_llog_finish,
+ .o_notify = lov_notify,
+ .o_pool_new = lov_pool_new,
+ .o_pool_rem = lov_pool_remove,
+ .o_pool_add = lov_pool_add,
+ .o_pool_del = lov_pool_del,
+ .o_getref = lov_getref,
+ .o_putref = lov_putref,
+ .o_quotactl = lov_quotactl,
+ .o_quotacheck = lov_quotacheck,
+};
+
+struct kmem_cache *lov_oinfo_slab;
+
+extern struct lu_kmem_descr lov_caches[];
+
+int __init lov_init(void)
+{
+ struct lprocfs_static_vars lvars = { 0 };
+ int rc;
+ ENTRY;
+
+ /* print an address of _any_ initialized kernel symbol from this
+ * module, to allow debugging with gdb that doesn't support data
+ * symbols from modules.*/
+ CDEBUG(D_INFO, "Lustre LOV module (%p).\n", &lov_caches);
+
+ rc = lu_kmem_init(lov_caches);
+ if (rc)
+ return rc;
+
+ lov_oinfo_slab = kmem_cache_create("lov_oinfo",
+ sizeof(struct lov_oinfo),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (lov_oinfo_slab == NULL) {
+ lu_kmem_fini(lov_caches);
+ return -ENOMEM;
+ }
+ lprocfs_lov_init_vars(&lvars);
+
+ rc = class_register_type(&lov_obd_ops, NULL, lvars.module_vars,
+ LUSTRE_LOV_NAME, &lov_device_type);
+
+ if (rc) {
+ kmem_cache_destroy(lov_oinfo_slab);
+ lu_kmem_fini(lov_caches);
+ }
+
+ RETURN(rc);
+}
+
+static void /*__exit*/ lov_exit(void)
+{
+ class_unregister_type(LUSTRE_LOV_NAME);
+ kmem_cache_destroy(lov_oinfo_slab);
+
+ lu_kmem_fini(lov_caches);
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver");
+MODULE_LICENSE("GPL");
+
+cfs_module(lov, LUSTRE_VERSION_STRING, lov_init, lov_exit);
diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
new file mode 100644
index 000000000000..aa8ae80e8121
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_object.c
@@ -0,0 +1,942 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_object for LOV layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include "lov_cl_internal.h"
+#include <lustre_debug.h>
+
+/** \addtogroup lov
+ * @{
+ */
+
+/*****************************************************************************
+ *
+ * Layout operations.
+ *
+ */
+
+struct lov_layout_operations {
+ int (*llo_init)(const struct lu_env *env, struct lov_device *dev,
+ struct lov_object *lov,
+ const struct cl_object_conf *conf,
+ union lov_layout_state *state);
+ int (*llo_delete)(const struct lu_env *env, struct lov_object *lov,
+ union lov_layout_state *state);
+ void (*llo_fini)(const struct lu_env *env, struct lov_object *lov,
+ union lov_layout_state *state);
+ void (*llo_install)(const struct lu_env *env, struct lov_object *lov,
+ union lov_layout_state *state);
+ int (*llo_print)(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct lu_object *o);
+ int (*llo_page_init)(const struct lu_env *env, struct cl_object *obj,
+ struct cl_page *page, struct page *vmpage);
+ int (*llo_lock_init)(const struct lu_env *env,
+ struct cl_object *obj, struct cl_lock *lock,
+ const struct cl_io *io);
+ int (*llo_io_init)(const struct lu_env *env,
+ struct cl_object *obj, struct cl_io *io);
+ int (*llo_getattr)(const struct lu_env *env, struct cl_object *obj,
+ struct cl_attr *attr);
+};
+
+static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov);
+
+/*****************************************************************************
+ *
+ * Lov object layout operations.
+ *
+ */
+
+static void lov_install_empty(const struct lu_env *env,
+ struct lov_object *lov,
+ union lov_layout_state *state)
+{
+ /*
+ * File without objects.
+ */
+}
+
+static int lov_init_empty(const struct lu_env *env,
+ struct lov_device *dev, struct lov_object *lov,
+ const struct cl_object_conf *conf,
+ union lov_layout_state *state)
+{
+ return 0;
+}
+
+static void lov_install_raid0(const struct lu_env *env,
+ struct lov_object *lov,
+ union lov_layout_state *state)
+{
+}
+
+static struct cl_object *lov_sub_find(const struct lu_env *env,
+ struct cl_device *dev,
+ const struct lu_fid *fid,
+ const struct cl_object_conf *conf)
+{
+ struct lu_object *o;
+
+ ENTRY;
+ o = lu_object_find_at(env, cl2lu_dev(dev), fid, &conf->coc_lu);
+ LASSERT(ergo(!IS_ERR(o), o->lo_dev->ld_type == &lovsub_device_type));
+ RETURN(lu2cl(o));
+}
+
+static int lov_init_sub(const struct lu_env *env, struct lov_object *lov,
+ struct cl_object *stripe,
+ struct lov_layout_raid0 *r0, int idx)
+{
+ struct cl_object_header *hdr;
+ struct cl_object_header *subhdr;
+ struct cl_object_header *parent;
+ struct lov_oinfo *oinfo;
+ int result;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_LOV_INIT)) {
+ /* For sanity:test_206.
+ * Do not leave the object in cache to avoid accessing
+ * freed memory. This is because osc_object is referring to
+ * lov_oinfo of lsm_stripe_data which will be freed due to
+ * this failure. */
+ cl_object_kill(env, stripe);
+ cl_object_put(env, stripe);
+ return -EIO;
+ }
+
+ hdr = cl_object_header(lov2cl(lov));
+ subhdr = cl_object_header(stripe);
+ parent = subhdr->coh_parent;
+
+ oinfo = lov->lo_lsm->lsm_oinfo[idx];
+ CDEBUG(D_INODE, DFID"@%p[%d] -> "DFID"@%p: ostid: "DOSTID
+ " idx: %d gen: %d\n",
+ PFID(&subhdr->coh_lu.loh_fid), subhdr, idx,
+ PFID(&hdr->coh_lu.loh_fid), hdr, POSTID(&oinfo->loi_oi),
+ oinfo->loi_ost_idx, oinfo->loi_ost_gen);
+
+ if (parent == NULL) {
+ subhdr->coh_parent = hdr;
+ subhdr->coh_nesting = hdr->coh_nesting + 1;
+ lu_object_ref_add(&stripe->co_lu, "lov-parent", lov);
+ r0->lo_sub[idx] = cl2lovsub(stripe);
+ r0->lo_sub[idx]->lso_super = lov;
+ r0->lo_sub[idx]->lso_index = idx;
+ result = 0;
+ } else {
+ struct lu_object *old_obj;
+ struct lov_object *old_lov;
+ unsigned int mask = D_INODE;
+
+ old_obj = lu_object_locate(&parent->coh_lu, &lov_device_type);
+ LASSERT(old_obj != NULL);
+ old_lov = cl2lov(lu2cl(old_obj));
+ if (old_lov->lo_layout_invalid) {
+ /* the object's layout has already changed but isn't
+ * refreshed */
+ lu_object_unhash(env, &stripe->co_lu);
+ result = -EAGAIN;
+ } else {
+ mask = D_ERROR;
+ result = -EIO;
+ }
+
+ LU_OBJECT_DEBUG(mask, env, &stripe->co_lu,
+ "stripe %d is already owned.\n", idx);
+ LU_OBJECT_DEBUG(mask, env, old_obj, "owned.\n");
+ LU_OBJECT_HEADER(mask, env, lov2lu(lov), "try to own.\n");
+ cl_object_put(env, stripe);
+ }
+ return result;
+}
+
+static int lov_init_raid0(const struct lu_env *env,
+ struct lov_device *dev, struct lov_object *lov,
+ const struct cl_object_conf *conf,
+ union lov_layout_state *state)
+{
+ int result;
+ int i;
+
+ struct cl_object *stripe;
+ struct lov_thread_info *lti = lov_env_info(env);
+ struct cl_object_conf *subconf = &lti->lti_stripe_conf;
+ struct lov_stripe_md *lsm = conf->u.coc_md->lsm;
+ struct lu_fid *ofid = &lti->lti_fid;
+ struct lov_layout_raid0 *r0 = &state->raid0;
+
+ ENTRY;
+
+ if (lsm->lsm_magic != LOV_MAGIC_V1 && lsm->lsm_magic != LOV_MAGIC_V3) {
+ dump_lsm(D_ERROR, lsm);
+ LASSERTF(0, "magic mismatch, expected %d/%d, actual %d.\n",
+ LOV_MAGIC_V1, LOV_MAGIC_V3, lsm->lsm_magic);
+ }
+
+ LASSERT(lov->lo_lsm == NULL);
+ lov->lo_lsm = lsm_addref(lsm);
+ r0->lo_nr = lsm->lsm_stripe_count;
+ LASSERT(r0->lo_nr <= lov_targets_nr(dev));
+
+ OBD_ALLOC_LARGE(r0->lo_sub, r0->lo_nr * sizeof r0->lo_sub[0]);
+ if (r0->lo_sub != NULL) {
+ result = 0;
+ subconf->coc_inode = conf->coc_inode;
+ spin_lock_init(&r0->lo_sub_lock);
+ /*
+ * Create stripe cl_objects.
+ */
+ for (i = 0; i < r0->lo_nr && result == 0; ++i) {
+ struct cl_device *subdev;
+ struct lov_oinfo *oinfo = lsm->lsm_oinfo[i];
+ int ost_idx = oinfo->loi_ost_idx;
+
+ result = ostid_to_fid(ofid, &oinfo->loi_oi,
+ oinfo->loi_ost_idx);
+ if (result != 0)
+ GOTO(out, result);
+
+ subdev = lovsub2cl_dev(dev->ld_target[ost_idx]);
+ subconf->u.coc_oinfo = oinfo;
+ LASSERTF(subdev != NULL, "not init ost %d\n", ost_idx);
+ /* In the function below, .hs_keycmp resolves to
+ * lu_obj_hop_keycmp() */
+ /* coverity[overrun-buffer-val] */
+ stripe = lov_sub_find(env, subdev, ofid, subconf);
+ if (!IS_ERR(stripe)) {
+ result = lov_init_sub(env, lov, stripe, r0, i);
+ if (result == -EAGAIN) { /* try again */
+ --i;
+ result = 0;
+ }
+ } else {
+ result = PTR_ERR(stripe);
+ }
+ }
+ } else
+ result = -ENOMEM;
+out:
+ RETURN(result);
+}
+
+static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov,
+ union lov_layout_state *state)
+{
+ LASSERT(lov->lo_type == LLT_EMPTY);
+
+ lov_layout_wait(env, lov);
+
+ cl_object_prune(env, &lov->lo_cl);
+ return 0;
+}
+
+static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
+ struct lovsub_object *los, int idx)
+{
+ struct cl_object *sub;
+ struct lov_layout_raid0 *r0;
+ struct lu_site *site;
+ struct lu_site_bkt_data *bkt;
+ wait_queue_t *waiter;
+
+ r0 = &lov->u.raid0;
+ LASSERT(r0->lo_sub[idx] == los);
+
+ sub = lovsub2cl(los);
+ site = sub->co_lu.lo_dev->ld_site;
+ bkt = lu_site_bkt_from_fid(site, &sub->co_lu.lo_header->loh_fid);
+
+ cl_object_kill(env, sub);
+ /* release a reference to the sub-object and ... */
+ lu_object_ref_del(&sub->co_lu, "lov-parent", lov);
+ cl_object_put(env, sub);
+
+ /* ... wait until it is actually destroyed---sub-object clears its
+ * ->lo_sub[] slot in lovsub_object_fini() */
+ if (r0->lo_sub[idx] == los) {
+ waiter = &lov_env_info(env)->lti_waiter;
+ init_waitqueue_entry_current(waiter);
+ add_wait_queue(&bkt->lsb_marche_funebre, waiter);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ while (1) {
+ /* this wait-queue is signaled at the end of
+ * lu_object_free(). */
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_lock(&r0->lo_sub_lock);
+ if (r0->lo_sub[idx] == los) {
+ spin_unlock(&r0->lo_sub_lock);
+ waitq_wait(waiter, TASK_UNINTERRUPTIBLE);
+ } else {
+ spin_unlock(&r0->lo_sub_lock);
+ set_current_state(TASK_RUNNING);
+ break;
+ }
+ }
+ remove_wait_queue(&bkt->lsb_marche_funebre, waiter);
+ }
+ LASSERT(r0->lo_sub[idx] == NULL);
+}
+
+static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
+ union lov_layout_state *state)
+{
+ struct lov_layout_raid0 *r0 = &state->raid0;
+ struct lov_stripe_md *lsm = lov->lo_lsm;
+ int i;
+
+ ENTRY;
+
+ dump_lsm(D_INODE, lsm);
+
+ lov_layout_wait(env, lov);
+ if (r0->lo_sub != NULL) {
+ for (i = 0; i < r0->lo_nr; ++i) {
+ struct lovsub_object *los = r0->lo_sub[i];
+
+ if (los != NULL) {
+ cl_locks_prune(env, &los->lso_cl, 1);
+ /*
+ * If top-level object is to be evicted from
+ * the cache, so are its sub-objects.
+ */
+ lov_subobject_kill(env, lov, los, i);
+ }
+ }
+ }
+ cl_object_prune(env, &lov->lo_cl);
+ RETURN(0);
+}
+
+static void lov_fini_empty(const struct lu_env *env, struct lov_object *lov,
+ union lov_layout_state *state)
+{
+ LASSERT(lov->lo_type == LLT_EMPTY);
+}
+
+static void lov_fini_raid0(const struct lu_env *env, struct lov_object *lov,
+ union lov_layout_state *state)
+{
+ struct lov_layout_raid0 *r0 = &state->raid0;
+ ENTRY;
+
+ if (r0->lo_sub != NULL) {
+ OBD_FREE_LARGE(r0->lo_sub, r0->lo_nr * sizeof r0->lo_sub[0]);
+ r0->lo_sub = NULL;
+ }
+
+ dump_lsm(D_INODE, lov->lo_lsm);
+ lov_free_memmd(&lov->lo_lsm);
+
+ EXIT;
+}
+
+static int lov_print_empty(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct lu_object *o)
+{
+ (*p)(env, cookie, "empty %d\n", lu2lov(o)->lo_layout_invalid);
+ return 0;
+}
+
+static int lov_print_raid0(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct lu_object *o)
+{
+ struct lov_object *lov = lu2lov(o);
+ struct lov_layout_raid0 *r0 = lov_r0(lov);
+ struct lov_stripe_md *lsm = lov->lo_lsm;
+ int i;
+
+ (*p)(env, cookie, "stripes: %d, %svalid, lsm{%p 0x%08X %d %u %u}: \n",
+ r0->lo_nr, lov->lo_layout_invalid ? "in" : "", lsm,
+ lsm->lsm_magic, atomic_read(&lsm->lsm_refc),
+ lsm->lsm_stripe_count, lsm->lsm_layout_gen);
+ for (i = 0; i < r0->lo_nr; ++i) {
+ struct lu_object *sub;
+
+ if (r0->lo_sub[i] != NULL) {
+ sub = lovsub2lu(r0->lo_sub[i]);
+ lu_object_print(env, cookie, p, sub);
+ } else
+ (*p)(env, cookie, "sub %d absent\n", i);
+ }
+ return 0;
+}
+
+/**
+ * Implements cl_object_operations::coo_attr_get() method for an object
+ * without stripes (LLT_EMPTY layout type).
+ *
+ * The only attributes this layer is authoritative in this case is
+ * cl_attr::cat_blocks---it's 0.
+ */
+static int lov_attr_get_empty(const struct lu_env *env, struct cl_object *obj,
+ struct cl_attr *attr)
+{
+ attr->cat_blocks = 0;
+ return 0;
+}
+
+static int lov_attr_get_raid0(const struct lu_env *env, struct cl_object *obj,
+ struct cl_attr *attr)
+{
+ struct lov_object *lov = cl2lov(obj);
+ struct lov_layout_raid0 *r0 = lov_r0(lov);
+ struct cl_attr *lov_attr = &r0->lo_attr;
+ int result = 0;
+
+ ENTRY;
+
+ /* this is called w/o holding type guard mutex, so it must be inside
+ * an on going IO otherwise lsm may be replaced.
+ * LU-2117: it turns out there exists one exception. For mmaped files,
+ * the lock of those files may be requested in the other file's IO
+ * context, and this function is called in ccc_lock_state(), it will
+ * hit this assertion.
+ * Anyway, it's still okay to call attr_get w/o type guard as layout
+ * can't go if locks exist. */
+ /* LASSERT(atomic_read(&lsm->lsm_refc) > 1); */
+
+ if (!r0->lo_attr_valid) {
+ struct lov_stripe_md *lsm = lov->lo_lsm;
+ struct ost_lvb *lvb = &lov_env_info(env)->lti_lvb;
+ __u64 kms = 0;
+
+ memset(lvb, 0, sizeof(*lvb));
+ /* XXX: timestamps can be negative by sanity:test_39m,
+ * how can it be? */
+ lvb->lvb_atime = LLONG_MIN;
+ lvb->lvb_ctime = LLONG_MIN;
+ lvb->lvb_mtime = LLONG_MIN;
+
+ /*
+ * XXX that should be replaced with a loop over sub-objects,
+ * doing cl_object_attr_get() on them. But for now, let's
+ * reuse old lov code.
+ */
+
+ /*
+ * XXX take lsm spin-lock to keep lov_merge_lvb_kms()
+ * happy. It's not needed, because new code uses
+ * ->coh_attr_guard spin-lock to protect consistency of
+ * sub-object attributes.
+ */
+ lov_stripe_lock(lsm);
+ result = lov_merge_lvb_kms(lsm, lvb, &kms);
+ lov_stripe_unlock(lsm);
+ if (result == 0) {
+ cl_lvb2attr(lov_attr, lvb);
+ lov_attr->cat_kms = kms;
+ r0->lo_attr_valid = 1;
+ }
+ }
+ if (result == 0) { /* merge results */
+ attr->cat_blocks = lov_attr->cat_blocks;
+ attr->cat_size = lov_attr->cat_size;
+ attr->cat_kms = lov_attr->cat_kms;
+ if (attr->cat_atime < lov_attr->cat_atime)
+ attr->cat_atime = lov_attr->cat_atime;
+ if (attr->cat_ctime < lov_attr->cat_ctime)
+ attr->cat_ctime = lov_attr->cat_ctime;
+ if (attr->cat_mtime < lov_attr->cat_mtime)
+ attr->cat_mtime = lov_attr->cat_mtime;
+ }
+ RETURN(result);
+}
+
+const static struct lov_layout_operations lov_dispatch[] = {
+ [LLT_EMPTY] = {
+ .llo_init = lov_init_empty,
+ .llo_delete = lov_delete_empty,
+ .llo_fini = lov_fini_empty,
+ .llo_install = lov_install_empty,
+ .llo_print = lov_print_empty,
+ .llo_page_init = lov_page_init_empty,
+ .llo_lock_init = lov_lock_init_empty,
+ .llo_io_init = lov_io_init_empty,
+ .llo_getattr = lov_attr_get_empty
+ },
+ [LLT_RAID0] = {
+ .llo_init = lov_init_raid0,
+ .llo_delete = lov_delete_raid0,
+ .llo_fini = lov_fini_raid0,
+ .llo_install = lov_install_raid0,
+ .llo_print = lov_print_raid0,
+ .llo_page_init = lov_page_init_raid0,
+ .llo_lock_init = lov_lock_init_raid0,
+ .llo_io_init = lov_io_init_raid0,
+ .llo_getattr = lov_attr_get_raid0
+ }
+};
+
+
+/**
+ * Performs a double-dispatch based on the layout type of an object.
+ */
+#define LOV_2DISPATCH_NOLOCK(obj, op, ...) \
+({ \
+ struct lov_object *__obj = (obj); \
+ enum lov_layout_type __llt; \
+ \
+ __llt = __obj->lo_type; \
+ LASSERT(0 <= __llt && __llt < ARRAY_SIZE(lov_dispatch)); \
+ lov_dispatch[__llt].op(__VA_ARGS__); \
+})
+
+static inline void lov_conf_freeze(struct lov_object *lov)
+{
+ if (lov->lo_owner != current)
+ down_read(&lov->lo_type_guard);
+}
+
+static inline void lov_conf_thaw(struct lov_object *lov)
+{
+ if (lov->lo_owner != current)
+ up_read(&lov->lo_type_guard);
+}
+
+#define LOV_2DISPATCH_MAYLOCK(obj, op, lock, ...) \
+({ \
+ struct lov_object *__obj = (obj); \
+ int __lock = !!(lock); \
+ typeof(lov_dispatch[0].op(__VA_ARGS__)) __result; \
+ \
+ if (__lock) \
+ lov_conf_freeze(__obj); \
+ __result = LOV_2DISPATCH_NOLOCK(obj, op, __VA_ARGS__); \
+ if (__lock) \
+ lov_conf_thaw(__obj); \
+ __result; \
+})
+
+/**
+ * Performs a locked double-dispatch based on the layout type of an object.
+ */
+#define LOV_2DISPATCH(obj, op, ...) \
+ LOV_2DISPATCH_MAYLOCK(obj, op, 1, __VA_ARGS__)
+
+#define LOV_2DISPATCH_VOID(obj, op, ...) \
+do { \
+ struct lov_object *__obj = (obj); \
+ enum lov_layout_type __llt; \
+ \
+ lov_conf_freeze(__obj); \
+ __llt = __obj->lo_type; \
+ LASSERT(0 <= __llt && __llt < ARRAY_SIZE(lov_dispatch)); \
+ lov_dispatch[__llt].op(__VA_ARGS__); \
+ lov_conf_thaw(__obj); \
+} while (0)
+
+static void lov_conf_lock(struct lov_object *lov)
+{
+ LASSERT(lov->lo_owner != current);
+ down_write(&lov->lo_type_guard);
+ LASSERT(lov->lo_owner == NULL);
+ lov->lo_owner = current;
+}
+
+static void lov_conf_unlock(struct lov_object *lov)
+{
+ lov->lo_owner = NULL;
+ up_write(&lov->lo_type_guard);
+}
+
+static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov)
+{
+ struct l_wait_info lwi = { 0 };
+ ENTRY;
+
+ while (atomic_read(&lov->lo_active_ios) > 0) {
+ CDEBUG(D_INODE, "file:"DFID" wait for active IO, now: %d.\n",
+ PFID(lu_object_fid(lov2lu(lov))),
+ atomic_read(&lov->lo_active_ios));
+
+ l_wait_event(lov->lo_waitq,
+ atomic_read(&lov->lo_active_ios) == 0, &lwi);
+ }
+ RETURN(0);
+}
+
+static int lov_layout_change(const struct lu_env *unused,
+ struct lov_object *lov,
+ const struct cl_object_conf *conf)
+{
+ int result;
+ enum lov_layout_type llt = LLT_EMPTY;
+ union lov_layout_state *state = &lov->u;
+ const struct lov_layout_operations *old_ops;
+ const struct lov_layout_operations *new_ops;
+
+ struct cl_object_header *hdr = cl_object_header(&lov->lo_cl);
+ void *cookie;
+ struct lu_env *env;
+ int refcheck;
+ ENTRY;
+
+ LASSERT(0 <= lov->lo_type && lov->lo_type < ARRAY_SIZE(lov_dispatch));
+
+ if (conf->u.coc_md != NULL && conf->u.coc_md->lsm != NULL)
+ llt = LLT_RAID0; /* only raid0 is supported. */
+ LASSERT(0 <= llt && llt < ARRAY_SIZE(lov_dispatch));
+
+ cookie = cl_env_reenter();
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env)) {
+ cl_env_reexit(cookie);
+ RETURN(PTR_ERR(env));
+ }
+
+ old_ops = &lov_dispatch[lov->lo_type];
+ new_ops = &lov_dispatch[llt];
+
+ result = old_ops->llo_delete(env, lov, &lov->u);
+ if (result == 0) {
+ old_ops->llo_fini(env, lov, &lov->u);
+
+ LASSERT(atomic_read(&lov->lo_active_ios) == 0);
+ LASSERT(hdr->coh_tree.rnode == NULL);
+ LASSERT(hdr->coh_pages == 0);
+
+ lov->lo_type = LLT_EMPTY;
+ result = new_ops->llo_init(env,
+ lu2lov_dev(lov->lo_cl.co_lu.lo_dev),
+ lov, conf, state);
+ if (result == 0) {
+ new_ops->llo_install(env, lov, state);
+ lov->lo_type = llt;
+ } else {
+ new_ops->llo_delete(env, lov, state);
+ new_ops->llo_fini(env, lov, state);
+ /* this file becomes an EMPTY file. */
+ }
+ }
+
+ cl_env_put(env, &refcheck);
+ cl_env_reexit(cookie);
+ RETURN(result);
+}
+
+/*****************************************************************************
+ *
+ * Lov object operations.
+ *
+ */
+
+int lov_object_init(const struct lu_env *env, struct lu_object *obj,
+ const struct lu_object_conf *conf)
+{
+ struct lov_device *dev = lu2lov_dev(obj->lo_dev);
+ struct lov_object *lov = lu2lov(obj);
+ const struct cl_object_conf *cconf = lu2cl_conf(conf);
+ union lov_layout_state *set = &lov->u;
+ const struct lov_layout_operations *ops;
+ int result;
+
+ ENTRY;
+ init_rwsem(&lov->lo_type_guard);
+ atomic_set(&lov->lo_active_ios, 0);
+ init_waitqueue_head(&lov->lo_waitq);
+
+ cl_object_page_init(lu2cl(obj), sizeof(struct lov_page));
+
+ /* no locking is necessary, as object is being created */
+ lov->lo_type = cconf->u.coc_md->lsm != NULL ? LLT_RAID0 : LLT_EMPTY;
+ ops = &lov_dispatch[lov->lo_type];
+ result = ops->llo_init(env, dev, lov, cconf, set);
+ if (result == 0)
+ ops->llo_install(env, lov, set);
+ RETURN(result);
+}
+
+static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_object_conf *conf)
+{
+ struct lov_stripe_md *lsm = NULL;
+ struct lov_object *lov = cl2lov(obj);
+ int result = 0;
+ ENTRY;
+
+ lov_conf_lock(lov);
+ if (conf->coc_opc == OBJECT_CONF_INVALIDATE) {
+ lov->lo_layout_invalid = true;
+ GOTO(out, result = 0);
+ }
+
+ if (conf->coc_opc == OBJECT_CONF_WAIT) {
+ if (lov->lo_layout_invalid &&
+ atomic_read(&lov->lo_active_ios) > 0) {
+ lov_conf_unlock(lov);
+ result = lov_layout_wait(env, lov);
+ lov_conf_lock(lov);
+ }
+ GOTO(out, result);
+ }
+
+ LASSERT(conf->coc_opc == OBJECT_CONF_SET);
+
+ if (conf->u.coc_md != NULL)
+ lsm = conf->u.coc_md->lsm;
+ if ((lsm == NULL && lov->lo_lsm == NULL) ||
+ (lsm != NULL && lov->lo_lsm != NULL &&
+ lov->lo_lsm->lsm_layout_gen == lsm->lsm_layout_gen)) {
+ /* same version of layout */
+ lov->lo_layout_invalid = false;
+ GOTO(out, result = 0);
+ }
+
+ /* will change layout - check if there still exists active IO. */
+ if (atomic_read(&lov->lo_active_ios) > 0) {
+ lov->lo_layout_invalid = true;
+ GOTO(out, result = -EBUSY);
+ }
+
+ lov->lo_layout_invalid = lov_layout_change(env, lov, conf);
+ EXIT;
+
+out:
+ lov_conf_unlock(lov);
+ RETURN(result);
+}
+
+static void lov_object_delete(const struct lu_env *env, struct lu_object *obj)
+{
+ struct lov_object *lov = lu2lov(obj);
+
+ ENTRY;
+ LOV_2DISPATCH_VOID(lov, llo_delete, env, lov, &lov->u);
+ EXIT;
+}
+
+static void lov_object_free(const struct lu_env *env, struct lu_object *obj)
+{
+ struct lov_object *lov = lu2lov(obj);
+
+ ENTRY;
+ LOV_2DISPATCH_VOID(lov, llo_fini, env, lov, &lov->u);
+ lu_object_fini(obj);
+ OBD_SLAB_FREE_PTR(lov, lov_object_kmem);
+ EXIT;
+}
+
+static int lov_object_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct lu_object *o)
+{
+ return LOV_2DISPATCH_NOLOCK(lu2lov(o), llo_print, env, cookie, p, o);
+}
+
+int lov_page_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_page *page, struct page *vmpage)
+{
+ return LOV_2DISPATCH_NOLOCK(cl2lov(obj),
+ llo_page_init, env, obj, page, vmpage);
+}
+
+/**
+ * Implements cl_object_operations::clo_io_init() method for lov
+ * layer. Dispatches to the appropriate layout io initialization method.
+ */
+int lov_io_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io)
+{
+ CL_IO_SLICE_CLEAN(lov_env_io(env), lis_cl);
+ return LOV_2DISPATCH_MAYLOCK(cl2lov(obj), llo_io_init,
+ !io->ci_ignore_layout, env, obj, io);
+}
+
+/**
+ * An implementation of cl_object_operations::clo_attr_get() method for lov
+ * layer. For raid0 layout this collects and merges attributes of all
+ * sub-objects.
+ */
+static int lov_attr_get(const struct lu_env *env, struct cl_object *obj,
+ struct cl_attr *attr)
+{
+ /* do not take lock, as this function is called under a
+ * spin-lock. Layout is protected from changing by ongoing IO. */
+ return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_getattr, env, obj, attr);
+}
+
+static int lov_attr_set(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned valid)
+{
+ /*
+ * No dispatch is required here, as no layout implements this.
+ */
+ return 0;
+}
+
+int lov_lock_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_lock *lock, const struct cl_io *io)
+{
+ /* No need to lock because we've taken one refcount of layout. */
+ return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_lock_init, env, obj, lock,
+ io);
+}
+
+static const struct cl_object_operations lov_ops = {
+ .coo_page_init = lov_page_init,
+ .coo_lock_init = lov_lock_init,
+ .coo_io_init = lov_io_init,
+ .coo_attr_get = lov_attr_get,
+ .coo_attr_set = lov_attr_set,
+ .coo_conf_set = lov_conf_set
+};
+
+static const struct lu_object_operations lov_lu_obj_ops = {
+ .loo_object_init = lov_object_init,
+ .loo_object_delete = lov_object_delete,
+ .loo_object_release = NULL,
+ .loo_object_free = lov_object_free,
+ .loo_object_print = lov_object_print,
+ .loo_object_invariant = NULL
+};
+
+struct lu_object *lov_object_alloc(const struct lu_env *env,
+ const struct lu_object_header *unused,
+ struct lu_device *dev)
+{
+ struct lov_object *lov;
+ struct lu_object *obj;
+
+ ENTRY;
+ OBD_SLAB_ALLOC_PTR_GFP(lov, lov_object_kmem, __GFP_IO);
+ if (lov != NULL) {
+ obj = lov2lu(lov);
+ lu_object_init(obj, NULL, dev);
+ lov->lo_cl.co_ops = &lov_ops;
+ lov->lo_type = -1; /* invalid, to catch uninitialized type */
+ /*
+ * object io operation vector (cl_object::co_iop) is installed
+ * later in lov_object_init(), as different vectors are used
+ * for object with different layouts.
+ */
+ obj->lo_ops = &lov_lu_obj_ops;
+ } else
+ obj = NULL;
+ RETURN(obj);
+}
+
+struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov)
+{
+ struct lov_stripe_md *lsm = NULL;
+
+ lov_conf_freeze(lov);
+ if (lov->lo_lsm != NULL) {
+ lsm = lsm_addref(lov->lo_lsm);
+ CDEBUG(D_INODE, "lsm %p addref %d/%d by %p.\n",
+ lsm, atomic_read(&lsm->lsm_refc),
+ lov->lo_layout_invalid, current);
+ }
+ lov_conf_thaw(lov);
+ return lsm;
+}
+
+void lov_lsm_decref(struct lov_object *lov, struct lov_stripe_md *lsm)
+{
+ if (lsm == NULL)
+ return;
+
+ CDEBUG(D_INODE, "lsm %p decref %d by %p.\n",
+ lsm, atomic_read(&lsm->lsm_refc), current);
+
+ lov_free_memmd(&lsm);
+}
+
+struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj)
+{
+ struct lu_object *luobj;
+ struct lov_stripe_md *lsm = NULL;
+
+ if (clobj == NULL)
+ return NULL;
+
+ luobj = lu_object_locate(&cl_object_header(clobj)->coh_lu,
+ &lov_device_type);
+ if (luobj != NULL)
+ lsm = lov_lsm_addref(lu2lov(luobj));
+ return lsm;
+}
+EXPORT_SYMBOL(lov_lsm_get);
+
+void lov_lsm_put(struct cl_object *unused, struct lov_stripe_md *lsm)
+{
+ if (lsm != NULL)
+ lov_free_memmd(&lsm);
+}
+EXPORT_SYMBOL(lov_lsm_put);
+
+int lov_read_and_clear_async_rc(struct cl_object *clob)
+{
+ struct lu_object *luobj;
+ int rc = 0;
+ ENTRY;
+
+ luobj = lu_object_locate(&cl_object_header(clob)->coh_lu,
+ &lov_device_type);
+ if (luobj != NULL) {
+ struct lov_object *lov = lu2lov(luobj);
+
+ lov_conf_freeze(lov);
+ switch (lov->lo_type) {
+ case LLT_RAID0: {
+ struct lov_stripe_md *lsm;
+ int i;
+
+ lsm = lov->lo_lsm;
+ LASSERT(lsm != NULL);
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ struct lov_oinfo *loi = lsm->lsm_oinfo[i];
+ if (loi->loi_ar.ar_rc && !rc)
+ rc = loi->loi_ar.ar_rc;
+ loi->loi_ar.ar_rc = 0;
+ }
+ }
+ case LLT_EMPTY:
+ break;
+ default:
+ LBUG();
+ }
+ lov_conf_thaw(lov);
+ }
+ RETURN(rc);
+}
+EXPORT_SYMBOL(lov_read_and_clear_async_rc);
+
+/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_offset.c b/drivers/staging/lustre/lustre/lov/lov_offset.c
new file mode 100644
index 000000000000..f62b7e53b665
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_offset.c
@@ -0,0 +1,267 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include <linux/libcfs/libcfs.h>
+
+#include <obd_class.h>
+#include <obd_lov.h>
+
+#include "lov_internal.h"
+
+/* compute object size given "stripeno" and the ost size */
+obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
+ int stripeno)
+{
+ unsigned long ssize = lsm->lsm_stripe_size;
+ unsigned long stripe_size;
+ obd_off swidth;
+ obd_size lov_size;
+ int magic = lsm->lsm_magic;
+ ENTRY;
+
+ if (ost_size == 0)
+ RETURN(0);
+
+ LASSERT(lsm_op_find(magic) != NULL);
+ lsm_op_find(magic)->lsm_stripe_by_index(lsm, &stripeno, NULL, &swidth);
+
+ /* lov_do_div64(a, b) returns a % b, and a = a / b */
+ stripe_size = lov_do_div64(ost_size, ssize);
+ if (stripe_size)
+ lov_size = ost_size * swidth + stripeno * ssize + stripe_size;
+ else
+ lov_size = (ost_size - 1) * swidth + (stripeno + 1) * ssize;
+
+ RETURN(lov_size);
+}
+
+/* we have an offset in file backed by an lov and want to find out where
+ * that offset lands in our given stripe of the file. for the easy
+ * case where the offset is within the stripe, we just have to scale the
+ * offset down to make it relative to the stripe instead of the lov.
+ *
+ * the harder case is what to do when the offset doesn't intersect the
+ * stripe. callers will want start offsets clamped ahead to the start
+ * of the nearest stripe in the file. end offsets similarly clamped to the
+ * nearest ending byte of a stripe in the file:
+ *
+ * all this function does is move offsets to the nearest region of the
+ * stripe, and it does its work "mod" the full length of all the stripes.
+ * consider a file with 3 stripes:
+ *
+ * S E
+ * ---------------------------------------------------------------------
+ * | 0 | 1 | 2 | 0 | 1 | 2 |
+ * ---------------------------------------------------------------------
+ *
+ * to find stripe 1's offsets for S and E, it divides by the full stripe
+ * width and does its math in the context of a single set of stripes:
+ *
+ * S E
+ * -----------------------------------
+ * | 0 | 1 | 2 |
+ * -----------------------------------
+ *
+ * it'll notice that E is outside stripe 1 and clamp it to the end of the
+ * stripe, then multiply it back out by lov_off to give the real offsets in
+ * the stripe:
+ *
+ * S E
+ * ---------------------------------------------------------------------
+ * | 1 | 1 | 1 | 1 | 1 | 1 |
+ * ---------------------------------------------------------------------
+ *
+ * it would have done similarly and pulled S forward to the start of a 1
+ * stripe if, say, S had landed in a 0 stripe.
+ *
+ * this rounding isn't always correct. consider an E lov offset that lands
+ * on a 0 stripe, the "mod stripe width" math will pull it forward to the
+ * start of a 1 stripe, when in fact it wanted to be rounded back to the end
+ * of a previous 1 stripe. this logic is handled by callers and this is why:
+ *
+ * this function returns < 0 when the offset was "before" the stripe and
+ * was moved forward to the start of the stripe in question; 0 when it
+ * falls in the stripe and no shifting was done; > 0 when the offset
+ * was outside the stripe and was pulled back to its final byte. */
+int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
+ int stripeno, obd_off *obdoff)
+{
+ unsigned long ssize = lsm->lsm_stripe_size;
+ obd_off stripe_off, this_stripe, swidth;
+ int magic = lsm->lsm_magic;
+ int ret = 0;
+
+ if (lov_off == OBD_OBJECT_EOF) {
+ *obdoff = OBD_OBJECT_EOF;
+ return 0;
+ }
+
+ LASSERT(lsm_op_find(magic) != NULL);
+
+ lsm_op_find(magic)->lsm_stripe_by_index(lsm, &stripeno, &lov_off,
+ &swidth);
+
+ /* lov_do_div64(a, b) returns a % b, and a = a / b */
+ stripe_off = lov_do_div64(lov_off, swidth);
+
+ this_stripe = (obd_off)stripeno * ssize;
+ if (stripe_off < this_stripe) {
+ stripe_off = 0;
+ ret = -1;
+ } else {
+ stripe_off -= this_stripe;
+
+ if (stripe_off >= ssize) {
+ stripe_off = ssize;
+ ret = 1;
+ }
+ }
+
+ *obdoff = lov_off * ssize + stripe_off;
+ return ret;
+}
+
+/* Given a whole-file size and a stripe number, give the file size which
+ * corresponds to the individual object of that stripe.
+ *
+ * This behaves basically in the same was as lov_stripe_offset, except that
+ * file sizes falling before the beginning of a stripe are clamped to the end
+ * of the previous stripe, not the beginning of the next:
+ *
+ * S
+ * ---------------------------------------------------------------------
+ * | 0 | 1 | 2 | 0 | 1 | 2 |
+ * ---------------------------------------------------------------------
+ *
+ * if clamped to stripe 2 becomes:
+ *
+ * S
+ * ---------------------------------------------------------------------
+ * | 0 | 1 | 2 | 0 | 1 | 2 |
+ * ---------------------------------------------------------------------
+ */
+obd_off lov_size_to_stripe(struct lov_stripe_md *lsm, obd_off file_size,
+ int stripeno)
+{
+ unsigned long ssize = lsm->lsm_stripe_size;
+ obd_off stripe_off, this_stripe, swidth;
+ int magic = lsm->lsm_magic;
+
+ if (file_size == OBD_OBJECT_EOF)
+ return OBD_OBJECT_EOF;
+
+ LASSERT(lsm_op_find(magic) != NULL);
+ lsm_op_find(magic)->lsm_stripe_by_index(lsm, &stripeno, &file_size,
+ &swidth);
+
+ /* lov_do_div64(a, b) returns a % b, and a = a / b */
+ stripe_off = lov_do_div64(file_size, swidth);
+
+ this_stripe = (obd_off)stripeno * ssize;
+ if (stripe_off < this_stripe) {
+ /* Move to end of previous stripe, or zero */
+ if (file_size > 0) {
+ file_size--;
+ stripe_off = ssize;
+ } else {
+ stripe_off = 0;
+ }
+ } else {
+ stripe_off -= this_stripe;
+
+ if (stripe_off >= ssize) {
+ /* Clamp to end of this stripe */
+ stripe_off = ssize;
+ }
+ }
+
+ return (file_size * ssize + stripe_off);
+}
+
+/* given an extent in an lov and a stripe, calculate the extent of the stripe
+ * that is contained within the lov extent. this returns true if the given
+ * stripe does intersect with the lov extent. */
+int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno,
+ obd_off start, obd_off end,
+ obd_off *obd_start, obd_off *obd_end)
+{
+ int start_side, end_side;
+
+ start_side = lov_stripe_offset(lsm, start, stripeno, obd_start);
+ end_side = lov_stripe_offset(lsm, end, stripeno, obd_end);
+
+ CDEBUG(D_INODE, "["LPU64"->"LPU64"] -> [(%d) "LPU64"->"LPU64" (%d)]\n",
+ start, end, start_side, *obd_start, *obd_end, end_side);
+
+ /* this stripe doesn't intersect the file extent when neither
+ * start or the end intersected the stripe and obd_start and
+ * obd_end got rounded up to the save value. */
+ if (start_side != 0 && end_side != 0 && *obd_start == *obd_end)
+ return 0;
+
+ /* as mentioned in the lov_stripe_offset commentary, end
+ * might have been shifted in the wrong direction. This
+ * happens when an end offset is before the stripe when viewed
+ * through the "mod stripe size" math. we detect it being shifted
+ * in the wrong direction and touch it up.
+ * interestingly, this can't underflow since end must be > start
+ * if we passed through the previous check.
+ * (should we assert for that somewhere?) */
+ if (end_side != 0)
+ (*obd_end)--;
+
+ return 1;
+}
+
+/* compute which stripe number "lov_off" will be written into */
+int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off)
+{
+ unsigned long ssize = lsm->lsm_stripe_size;
+ obd_off stripe_off, swidth;
+ int magic = lsm->lsm_magic;
+
+ LASSERT(lsm_op_find(magic) != NULL);
+ lsm_op_find(magic)->lsm_stripe_by_offset(lsm, NULL, &lov_off, &swidth);
+
+ stripe_off = lov_do_div64(lov_off, swidth);
+
+ /* Puts stripe_off/ssize result into stripe_off */
+ lov_do_div64(stripe_off, ssize);
+
+ return stripe_off;
+}
diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c
new file mode 100644
index 000000000000..492948aad685
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_pack.c
@@ -0,0 +1,678 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lov/lov_pack.c
+ *
+ * (Un)packing of OST/MDS requests
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include <lustre_net.h>
+#include <obd.h>
+#include <obd_lov.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre/lustre_user.h>
+
+#include "lov_internal.h"
+
+void lov_dump_lmm_common(int level, void *lmmp)
+{
+ struct lov_mds_md *lmm = lmmp;
+ struct ost_id oi;
+
+ lmm_oi_le_to_cpu(&oi, &lmm->lmm_oi);
+ CDEBUG(level, "objid "DOSTID", magic 0x%08x, pattern %#x\n",
+ POSTID(&oi), le32_to_cpu(lmm->lmm_magic),
+ le32_to_cpu(lmm->lmm_pattern));
+ CDEBUG(level, "stripe_size %u, stripe_count %u, layout_gen %u\n",
+ le32_to_cpu(lmm->lmm_stripe_size),
+ le16_to_cpu(lmm->lmm_stripe_count),
+ le16_to_cpu(lmm->lmm_layout_gen));
+}
+
+static void lov_dump_lmm_objects(int level, struct lov_ost_data *lod,
+ int stripe_count)
+{
+ int i;
+
+ if (stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
+ CDEBUG(level, "bad stripe_count %u > max_stripe_count %u\n",
+ stripe_count, LOV_V1_INSANE_STRIPE_COUNT);
+ return;
+ }
+
+ for (i = 0; i < stripe_count; ++i, ++lod) {
+ struct ost_id oi;
+
+ ostid_le_to_cpu(&lod->l_ost_oi, &oi);
+ CDEBUG(level, "stripe %u idx %u subobj "DOSTID"\n", i,
+ le32_to_cpu(lod->l_ost_idx), POSTID(&oi));
+ }
+}
+
+void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm)
+{
+ lov_dump_lmm_common(level, lmm);
+ lov_dump_lmm_objects(level, lmm->lmm_objects,
+ le16_to_cpu(lmm->lmm_stripe_count));
+}
+
+void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm)
+{
+ lov_dump_lmm_common(level, lmm);
+ CDEBUG(level,"pool_name "LOV_POOLNAMEF"\n", lmm->lmm_pool_name);
+ lov_dump_lmm_objects(level, lmm->lmm_objects,
+ le16_to_cpu(lmm->lmm_stripe_count));
+}
+
+void lov_dump_lmm(int level, void *lmm)
+{
+ int magic;
+
+ magic = ((struct lov_mds_md_v1 *)(lmm))->lmm_magic;
+ switch (magic) {
+ case LOV_MAGIC_V1:
+ return lov_dump_lmm_v1(level, (struct lov_mds_md_v1 *)(lmm));
+ case LOV_MAGIC_V3:
+ return lov_dump_lmm_v3(level, (struct lov_mds_md_v3 *)(lmm));
+ default:
+ CERROR("Cannot recognize lmm_magic %x", magic);
+ }
+ return;
+}
+
+#define LMM_ASSERT(test) \
+do { \
+ if (!(test)) lov_dump_lmm(D_ERROR, lmm); \
+ LASSERT(test); /* so we know what assertion failed */ \
+} while(0)
+
+/* Pack LOV object metadata for disk storage. It is packed in LE byte
+ * order and is opaque to the networking layer.
+ *
+ * XXX In the future, this will be enhanced to get the EA size from the
+ * underlying OSC device(s) to get their EA sizes so we can stack
+ * LOVs properly. For now lov_mds_md_size() just assumes one obd_id
+ * per stripe.
+ */
+int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
+ struct lov_stripe_md *lsm)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct lov_obd *lov = &obd->u.lov;
+ struct lov_mds_md_v1 *lmmv1;
+ struct lov_mds_md_v3 *lmmv3;
+ __u16 stripe_count;
+ struct lov_ost_data_v1 *lmm_objects;
+ int lmm_size, lmm_magic;
+ int i;
+ int cplen = 0;
+ ENTRY;
+
+ if (lsm) {
+ lmm_magic = lsm->lsm_magic;
+ } else {
+ if (lmmp && *lmmp)
+ lmm_magic = le32_to_cpu((*lmmp)->lmm_magic);
+ else
+ /* lsm == NULL and lmmp == NULL */
+ lmm_magic = LOV_MAGIC;
+ }
+
+ if ((lmm_magic != LOV_MAGIC_V1) &&
+ (lmm_magic != LOV_MAGIC_V3)) {
+ CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
+ lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
+ RETURN(-EINVAL);
+
+ }
+
+ if (lsm) {
+ /* If we are just sizing the EA, limit the stripe count
+ * to the actual number of OSTs in this filesystem. */
+ if (!lmmp) {
+ stripe_count = lov_get_stripecnt(lov, lmm_magic,
+ lsm->lsm_stripe_count);
+ lsm->lsm_stripe_count = stripe_count;
+ } else {
+ stripe_count = lsm->lsm_stripe_count;
+ }
+ } else {
+ /* No need to allocate more than maximum supported stripes.
+ * Anyway, this is pretty inaccurate since ld_tgt_count now
+ * represents max index and we should rely on the actual number
+ * of OSTs instead */
+ stripe_count = lov_mds_md_stripecnt(lov->lov_ocd.ocd_max_easize,
+ lmm_magic);
+ if (stripe_count > lov->desc.ld_tgt_count)
+ stripe_count = lov->desc.ld_tgt_count;
+ }
+
+ /* XXX LOV STACKING call into osc for sizes */
+ lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
+
+ if (!lmmp)
+ RETURN(lmm_size);
+
+ if (*lmmp && !lsm) {
+ stripe_count = le16_to_cpu((*lmmp)->lmm_stripe_count);
+ lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
+ OBD_FREE_LARGE(*lmmp, lmm_size);
+ *lmmp = NULL;
+ RETURN(0);
+ }
+
+ if (!*lmmp) {
+ OBD_ALLOC_LARGE(*lmmp, lmm_size);
+ if (!*lmmp)
+ RETURN(-ENOMEM);
+ }
+
+ CDEBUG(D_INFO, "lov_packmd: LOV_MAGIC 0x%08X, lmm_size = %d \n",
+ lmm_magic, lmm_size);
+
+ lmmv1 = *lmmp;
+ lmmv3 = (struct lov_mds_md_v3 *)*lmmp;
+ if (lmm_magic == LOV_MAGIC_V3)
+ lmmv3->lmm_magic = cpu_to_le32(LOV_MAGIC_V3);
+ else
+ lmmv1->lmm_magic = cpu_to_le32(LOV_MAGIC_V1);
+
+ if (!lsm)
+ RETURN(lmm_size);
+
+ /* lmmv1 and lmmv3 point to the same struct and have the
+ * same first fields
+ */
+ lmm_oi_cpu_to_le(&lmmv1->lmm_oi, &lsm->lsm_oi);
+ lmmv1->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size);
+ lmmv1->lmm_stripe_count = cpu_to_le16(stripe_count);
+ lmmv1->lmm_pattern = cpu_to_le32(lsm->lsm_pattern);
+ lmmv1->lmm_layout_gen = cpu_to_le16(lsm->lsm_layout_gen);
+ if (lsm->lsm_magic == LOV_MAGIC_V3) {
+ cplen = strlcpy(lmmv3->lmm_pool_name, lsm->lsm_pool_name,
+ sizeof(lmmv3->lmm_pool_name));
+ if (cplen >= sizeof(lmmv3->lmm_pool_name))
+ RETURN(-E2BIG);
+ lmm_objects = lmmv3->lmm_objects;
+ } else {
+ lmm_objects = lmmv1->lmm_objects;
+ }
+
+ for (i = 0; i < stripe_count; i++) {
+ struct lov_oinfo *loi = lsm->lsm_oinfo[i];
+ /* XXX LOV STACKING call down to osc_packmd() to do packing */
+ LASSERTF(ostid_id(&loi->loi_oi) != 0, "lmm_oi "DOSTID
+ " stripe %u/%u idx %u\n", POSTID(&lmmv1->lmm_oi),
+ i, stripe_count, loi->loi_ost_idx);
+ ostid_cpu_to_le(&loi->loi_oi, &lmm_objects[i].l_ost_oi);
+ lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen);
+ lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx);
+ }
+
+ RETURN(lmm_size);
+}
+
+/* Find the max stripecount we should use */
+__u16 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u16 stripe_count)
+{
+ __u32 max_stripes = LOV_MAX_STRIPE_COUNT_OLD;
+
+ if (!stripe_count)
+ stripe_count = lov->desc.ld_default_stripe_count;
+ if (stripe_count > lov->desc.ld_active_tgt_count)
+ stripe_count = lov->desc.ld_active_tgt_count;
+ if (!stripe_count)
+ stripe_count = 1;
+
+ /* stripe count is based on whether ldiskfs can handle
+ * larger EA sizes */
+ if (lov->lov_ocd.ocd_connect_flags & OBD_CONNECT_MAX_EASIZE &&
+ lov->lov_ocd.ocd_max_easize)
+ max_stripes = lov_mds_md_stripecnt(lov->lov_ocd.ocd_max_easize,
+ magic);
+
+ if (stripe_count > max_stripes)
+ stripe_count = max_stripes;
+
+ return stripe_count;
+}
+
+
+static int lov_verify_lmm(void *lmm, int lmm_bytes, __u16 *stripe_count)
+{
+ int rc;
+
+ if (lsm_op_find(le32_to_cpu(*(__u32 *)lmm)) == NULL) {
+ char *buffer;
+ int sz;
+
+ CERROR("bad disk LOV MAGIC: 0x%08X; dumping LMM (size=%d):\n",
+ le32_to_cpu(*(__u32 *)lmm), lmm_bytes);
+ sz = lmm_bytes * 2 + 1;
+ OBD_ALLOC_LARGE(buffer, sz);
+ if (buffer != NULL) {
+ int i;
+
+ for (i = 0; i < lmm_bytes; i++)
+ sprintf(buffer+2*i, "%.2X", ((char *)lmm)[i]);
+ buffer[sz - 1] = '\0';
+ CERROR("%s\n", buffer);
+ OBD_FREE_LARGE(buffer, sz);
+ }
+ return -EINVAL;
+ }
+ rc = lsm_op_find(le32_to_cpu(*(__u32 *)lmm))->lsm_lmm_verify(lmm,
+ lmm_bytes, stripe_count);
+ return rc;
+}
+
+int lov_alloc_memmd(struct lov_stripe_md **lsmp, __u16 stripe_count,
+ int pattern, int magic)
+{
+ int i, lsm_size;
+ ENTRY;
+
+ CDEBUG(D_INFO, "alloc lsm, stripe_count %d\n", stripe_count);
+
+ *lsmp = lsm_alloc_plain(stripe_count, &lsm_size);
+ if (!*lsmp) {
+ CERROR("can't allocate lsmp stripe_count %d\n", stripe_count);
+ RETURN(-ENOMEM);
+ }
+
+ atomic_set(&(*lsmp)->lsm_refc, 1);
+ spin_lock_init(&(*lsmp)->lsm_lock);
+ (*lsmp)->lsm_magic = magic;
+ (*lsmp)->lsm_stripe_count = stripe_count;
+ (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+ (*lsmp)->lsm_pattern = pattern;
+ (*lsmp)->lsm_pool_name[0] = '\0';
+ (*lsmp)->lsm_layout_gen = 0;
+ (*lsmp)->lsm_oinfo[0]->loi_ost_idx = ~0;
+
+ for (i = 0; i < stripe_count; i++)
+ loi_init((*lsmp)->lsm_oinfo[i]);
+
+ RETURN(lsm_size);
+}
+
+int lov_free_memmd(struct lov_stripe_md **lsmp)
+{
+ struct lov_stripe_md *lsm = *lsmp;
+ int refc;
+
+ *lsmp = NULL;
+ LASSERT(atomic_read(&lsm->lsm_refc) > 0);
+ if ((refc = atomic_dec_return(&lsm->lsm_refc)) == 0) {
+ LASSERT(lsm_op_find(lsm->lsm_magic) != NULL);
+ lsm_op_find(lsm->lsm_magic)->lsm_free(lsm);
+ }
+ return refc;
+}
+
+
+/* Unpack LOV object metadata from disk storage. It is packed in LE byte
+ * order and is opaque to the networking layer.
+ */
+int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
+ struct lov_mds_md *lmm, int lmm_bytes)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct lov_obd *lov = &obd->u.lov;
+ int rc = 0, lsm_size;
+ __u16 stripe_count;
+ __u32 magic;
+ ENTRY;
+
+ /* If passed an MDS struct use values from there, otherwise defaults */
+ if (lmm) {
+ rc = lov_verify_lmm(lmm, lmm_bytes, &stripe_count);
+ if (rc)
+ RETURN(rc);
+ magic = le32_to_cpu(lmm->lmm_magic);
+ } else {
+ magic = LOV_MAGIC;
+ stripe_count = lov_get_stripecnt(lov, magic, 0);
+ }
+
+ /* If we aren't passed an lsmp struct, we just want the size */
+ if (!lsmp) {
+ /* XXX LOV STACKING call into osc for sizes */
+ LBUG();
+ RETURN(lov_stripe_md_size(stripe_count));
+ }
+ /* If we are passed an allocated struct but nothing to unpack, free */
+ if (*lsmp && !lmm) {
+ lov_free_memmd(lsmp);
+ RETURN(0);
+ }
+
+ lsm_size = lov_alloc_memmd(lsmp, stripe_count, LOV_PATTERN_RAID0,
+ magic);
+ if (lsm_size < 0)
+ RETURN(lsm_size);
+
+ /* If we are passed a pointer but nothing to unpack, we only alloc */
+ if (!lmm)
+ RETURN(lsm_size);
+
+ LASSERT(lsm_op_find(magic) != NULL);
+ rc = lsm_op_find(magic)->lsm_unpackmd(lov, *lsmp, lmm);
+ if (rc) {
+ lov_free_memmd(lsmp);
+ RETURN(rc);
+ }
+
+ RETURN(lsm_size);
+}
+
+static int __lov_setstripe(struct obd_export *exp, int max_lmm_size,
+ struct lov_stripe_md **lsmp,
+ struct lov_user_md *lump)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct lov_obd *lov = &obd->u.lov;
+ char buffer[sizeof(struct lov_user_md_v3)];
+ struct lov_user_md_v3 *lumv3 = (struct lov_user_md_v3 *)&buffer[0];
+ struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&buffer[0];
+ int lmm_magic;
+ __u16 stripe_count;
+ int rc;
+ int cplen = 0;
+ ENTRY;
+
+ rc = lov_lum_swab_if_needed(lumv3, &lmm_magic, lump);
+ if (rc)
+ RETURN(rc);
+
+ /* in the rest of the tests, as *lumv1 and lumv3 have the same
+ * fields, we use lumv1 to avoid code duplication */
+
+ if (lumv1->lmm_pattern == 0) {
+ lumv1->lmm_pattern = lov->desc.ld_pattern ?
+ lov->desc.ld_pattern : LOV_PATTERN_RAID0;
+ }
+
+ if (lumv1->lmm_pattern != LOV_PATTERN_RAID0) {
+ CDEBUG(D_IOCTL, "bad userland stripe pattern: %#x\n",
+ lumv1->lmm_pattern);
+ RETURN(-EINVAL);
+ }
+
+ /* 64kB is the largest common page size we see (ia64), and matches the
+ * check in lfs */
+ if (lumv1->lmm_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) {
+ CDEBUG(D_IOCTL, "stripe size %u not multiple of %u, fixing\n",
+ lumv1->lmm_stripe_size, LOV_MIN_STRIPE_SIZE);
+ lumv1->lmm_stripe_size = LOV_MIN_STRIPE_SIZE;
+ }
+
+ if ((lumv1->lmm_stripe_offset >= lov->desc.ld_tgt_count) &&
+ (lumv1->lmm_stripe_offset !=
+ (typeof(lumv1->lmm_stripe_offset))(-1))) {
+ CDEBUG(D_IOCTL, "stripe offset %u > number of OSTs %u\n",
+ lumv1->lmm_stripe_offset, lov->desc.ld_tgt_count);
+ RETURN(-EINVAL);
+ }
+ stripe_count = lov_get_stripecnt(lov, lmm_magic,
+ lumv1->lmm_stripe_count);
+
+ if (max_lmm_size) {
+ int max_stripes = (max_lmm_size -
+ lov_mds_md_size(0, lmm_magic)) /
+ sizeof(struct lov_ost_data_v1);
+ if (unlikely(max_stripes < stripe_count)) {
+ CDEBUG(D_IOCTL, "stripe count reset from %d to %d\n",
+ stripe_count, max_stripes);
+ stripe_count = max_stripes;
+ }
+ }
+
+ if (lmm_magic == LOV_USER_MAGIC_V3) {
+ struct pool_desc *pool;
+
+ /* In the function below, .hs_keycmp resolves to
+ * pool_hashkey_keycmp() */
+ /* coverity[overrun-buffer-val] */
+ pool = lov_find_pool(lov, lumv3->lmm_pool_name);
+ if (pool != NULL) {
+ if (lumv3->lmm_stripe_offset !=
+ (typeof(lumv3->lmm_stripe_offset))(-1)) {
+ rc = lov_check_index_in_pool(
+ lumv3->lmm_stripe_offset, pool);
+ if (rc < 0) {
+ lov_pool_putref(pool);
+ RETURN(-EINVAL);
+ }
+ }
+
+ if (stripe_count > pool_tgt_count(pool))
+ stripe_count = pool_tgt_count(pool);
+
+ lov_pool_putref(pool);
+ }
+ }
+
+ rc = lov_alloc_memmd(lsmp, stripe_count, lumv1->lmm_pattern, lmm_magic);
+
+ if (rc >= 0) {
+ (*lsmp)->lsm_oinfo[0]->loi_ost_idx = lumv1->lmm_stripe_offset;
+ (*lsmp)->lsm_stripe_size = lumv1->lmm_stripe_size;
+ if (lmm_magic == LOV_USER_MAGIC_V3) {
+ cplen = strlcpy((*lsmp)->lsm_pool_name,
+ lumv3->lmm_pool_name,
+ sizeof((*lsmp)->lsm_pool_name));
+ if (cplen >= sizeof((*lsmp)->lsm_pool_name))
+ rc = -E2BIG;
+ }
+ rc = 0;
+ }
+
+ RETURN(rc);
+}
+
+/* Configure object striping information on a new file.
+ *
+ * @lmmu is a pointer to a user struct with one or more of the fields set to
+ * indicate the application preference: lmm_stripe_count, lmm_stripe_size,
+ * lmm_stripe_offset, and lmm_stripe_pattern. lmm_magic must be LOV_MAGIC.
+ * @lsmp is a pointer to an in-core stripe MD that needs to be filled in.
+ */
+int lov_setstripe(struct obd_export *exp, int max_lmm_size,
+ struct lov_stripe_md **lsmp, struct lov_user_md *lump)
+{
+ int rc;
+ mm_segment_t seg;
+
+ seg = get_fs();
+ set_fs(KERNEL_DS);
+
+ rc = __lov_setstripe(exp, max_lmm_size, lsmp, lump);
+ set_fs(seg);
+ RETURN(rc);
+}
+
+int lov_setea(struct obd_export *exp, struct lov_stripe_md **lsmp,
+ struct lov_user_md *lump)
+{
+ int i;
+ int rc;
+ struct obd_export *oexp;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ obd_id last_id = 0;
+ struct lov_user_ost_data_v1 *lmm_objects;
+
+ ENTRY;
+
+ if (lump->lmm_magic == LOV_USER_MAGIC_V3)
+ lmm_objects = ((struct lov_user_md_v3 *)lump)->lmm_objects;
+ else
+ lmm_objects = lump->lmm_objects;
+
+ for (i = 0; i < lump->lmm_stripe_count; i++) {
+ __u32 len = sizeof(last_id);
+ oexp = lov->lov_tgts[lmm_objects[i].l_ost_idx]->ltd_exp;
+ rc = obd_get_info(NULL, oexp, sizeof(KEY_LAST_ID), KEY_LAST_ID,
+ &len, &last_id, NULL);
+ if (rc)
+ RETURN(rc);
+ if (ostid_id(&lmm_objects[i].l_ost_oi) > last_id) {
+ CERROR("Setting EA for object > than last id on"
+ " ost idx %d "DOSTID" > "LPD64" \n",
+ lmm_objects[i].l_ost_idx,
+ POSTID(&lmm_objects[i].l_ost_oi), last_id);
+ RETURN(-EINVAL);
+ }
+ }
+
+ rc = lov_setstripe(exp, 0, lsmp, lump);
+ if (rc)
+ RETURN(rc);
+
+ for (i = 0; i < lump->lmm_stripe_count; i++) {
+ (*lsmp)->lsm_oinfo[i]->loi_ost_idx =
+ lmm_objects[i].l_ost_idx;
+ (*lsmp)->lsm_oinfo[i]->loi_oi = lmm_objects[i].l_ost_oi;
+ }
+ RETURN(0);
+}
+
+
+/* Retrieve object striping information.
+ *
+ * @lump is a pointer to an in-core struct with lmm_ost_count indicating
+ * the maximum number of OST indices which will fit in the user buffer.
+ * lmm_magic must be LOV_USER_MAGIC.
+ */
+int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
+ struct lov_user_md *lump)
+{
+ /*
+ * XXX huge struct allocated on stack.
+ */
+ /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
+ struct lov_user_md_v3 lum;
+ struct lov_mds_md *lmmk = NULL;
+ int rc, lmm_size;
+ int lum_size;
+ mm_segment_t seg;
+ ENTRY;
+
+ if (!lsm)
+ RETURN(-ENODATA);
+
+ /*
+ * "Switch to kernel segment" to allow copying from kernel space by
+ * copy_{to,from}_user().
+ */
+ seg = get_fs();
+ set_fs(KERNEL_DS);
+
+ /* we only need the header part from user space to get lmm_magic and
+ * lmm_stripe_count, (the header part is common to v1 and v3) */
+ lum_size = sizeof(struct lov_user_md_v1);
+ if (copy_from_user(&lum, lump, lum_size))
+ GOTO(out_set, rc = -EFAULT);
+ else if ((lum.lmm_magic != LOV_USER_MAGIC) &&
+ (lum.lmm_magic != LOV_USER_MAGIC_V3))
+ GOTO(out_set, rc = -EINVAL);
+
+ if (lum.lmm_stripe_count &&
+ (lum.lmm_stripe_count < lsm->lsm_stripe_count)) {
+ /* Return right size of stripe to user */
+ lum.lmm_stripe_count = lsm->lsm_stripe_count;
+ rc = copy_to_user(lump, &lum, lum_size);
+ GOTO(out_set, rc = -EOVERFLOW);
+ }
+ rc = lov_packmd(exp, &lmmk, lsm);
+ if (rc < 0)
+ GOTO(out_set, rc);
+ lmm_size = rc;
+ rc = 0;
+
+ /* FIXME: Bug 1185 - copy fields properly when structs change */
+ /* struct lov_user_md_v3 and struct lov_mds_md_v3 must be the same */
+ CLASSERT(sizeof(lum) == sizeof(struct lov_mds_md_v3));
+ CLASSERT(sizeof lum.lmm_objects[0] == sizeof lmmk->lmm_objects[0]);
+
+ if ((cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) &&
+ ((lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) ||
+ (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)))) {
+ lustre_swab_lov_mds_md(lmmk);
+ lustre_swab_lov_user_md_objects(
+ (struct lov_user_ost_data*)lmmk->lmm_objects,
+ lmmk->lmm_stripe_count);
+ }
+ if (lum.lmm_magic == LOV_USER_MAGIC) {
+ /* User request for v1, we need skip lmm_pool_name */
+ if (lmmk->lmm_magic == LOV_MAGIC_V3) {
+ memmove((char*)(&lmmk->lmm_stripe_count) +
+ sizeof(lmmk->lmm_stripe_count),
+ ((struct lov_mds_md_v3*)lmmk)->lmm_objects,
+ lmmk->lmm_stripe_count *
+ sizeof(struct lov_ost_data_v1));
+ lmm_size -= LOV_MAXPOOLNAME;
+ }
+ } else {
+ /* if v3 we just have to update the lum_size */
+ lum_size = sizeof(struct lov_user_md_v3);
+ }
+
+ /* User wasn't expecting this many OST entries */
+ if (lum.lmm_stripe_count == 0)
+ lmm_size = lum_size;
+ else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count)
+ GOTO(out_set, rc = -EOVERFLOW);
+ /*
+ * Have a difference between lov_mds_md & lov_user_md.
+ * So we have to re-order the data before copy to user.
+ */
+ lum.lmm_stripe_count = lmmk->lmm_stripe_count;
+ lum.lmm_layout_gen = lmmk->lmm_layout_gen;
+ ((struct lov_user_md *)lmmk)->lmm_layout_gen = lum.lmm_layout_gen;
+ ((struct lov_user_md *)lmmk)->lmm_stripe_count = lum.lmm_stripe_count;
+ if (copy_to_user(lump, lmmk, lmm_size))
+ rc = -EFAULT;
+
+ obd_free_diskmd(exp, &lmmk);
+out_set:
+ set_fs(seg);
+ RETURN(rc);
+}
diff --git a/drivers/staging/lustre/lustre/lov/lov_page.c b/drivers/staging/lustre/lustre/lov/lov_page.c
new file mode 100644
index 000000000000..65790d684720
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_page.c
@@ -0,0 +1,235 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_page for LOV layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include "lov_cl_internal.h"
+
+/** \addtogroup lov
+ * @{
+ */
+
+/*****************************************************************************
+ *
+ * Lov page operations.
+ *
+ */
+
+static int lov_page_invariant(const struct cl_page_slice *slice)
+{
+ const struct cl_page *page = slice->cpl_page;
+ const struct cl_page *sub = lov_sub_page(slice);
+
+ return ergo(sub != NULL,
+ page->cp_child == sub &&
+ sub->cp_parent == page &&
+ page->cp_state == sub->cp_state);
+}
+
+static void lov_page_fini(const struct lu_env *env,
+ struct cl_page_slice *slice)
+{
+ struct cl_page *sub = lov_sub_page(slice);
+
+ LINVRNT(lov_page_invariant(slice));
+ ENTRY;
+
+ if (sub != NULL) {
+ LASSERT(sub->cp_state == CPS_FREEING);
+ lu_ref_del(&sub->cp_reference, "lov", sub->cp_parent);
+ sub->cp_parent = NULL;
+ slice->cpl_page->cp_child = NULL;
+ cl_page_put(env, sub);
+ }
+ EXIT;
+}
+
+static int lov_page_own(const struct lu_env *env,
+ const struct cl_page_slice *slice, struct cl_io *io,
+ int nonblock)
+{
+ struct lov_io *lio = lov_env_io(env);
+ struct lov_io_sub *sub;
+
+ LINVRNT(lov_page_invariant(slice));
+ LINVRNT(!cl2lov_page(slice)->lps_invalid);
+ ENTRY;
+
+ sub = lov_page_subio(env, lio, slice);
+ if (!IS_ERR(sub)) {
+ lov_sub_page(slice)->cp_owner = sub->sub_io;
+ lov_sub_put(sub);
+ } else
+ LBUG(); /* Arrgh */
+ RETURN(0);
+}
+
+static void lov_page_assume(const struct lu_env *env,
+ const struct cl_page_slice *slice, struct cl_io *io)
+{
+ lov_page_own(env, slice, io, 0);
+}
+
+static int lov_page_cache_add(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io)
+{
+ struct lov_io *lio = lov_env_io(env);
+ struct lov_io_sub *sub;
+ int rc = 0;
+
+ LINVRNT(lov_page_invariant(slice));
+ LINVRNT(!cl2lov_page(slice)->lps_invalid);
+ ENTRY;
+
+ sub = lov_page_subio(env, lio, slice);
+ if (!IS_ERR(sub)) {
+ rc = cl_page_cache_add(sub->sub_env, sub->sub_io,
+ slice->cpl_page->cp_child, CRT_WRITE);
+ lov_sub_put(sub);
+ } else {
+ rc = PTR_ERR(sub);
+ CL_PAGE_DEBUG(D_ERROR, env, slice->cpl_page, "rc = %d\n", rc);
+ }
+ RETURN(rc);
+}
+
+static int lov_page_print(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ void *cookie, lu_printer_t printer)
+{
+ struct lov_page *lp = cl2lov_page(slice);
+
+ return (*printer)(env, cookie, LUSTRE_LOV_NAME"-page@%p\n", lp);
+}
+
+static const struct cl_page_operations lov_page_ops = {
+ .cpo_fini = lov_page_fini,
+ .cpo_own = lov_page_own,
+ .cpo_assume = lov_page_assume,
+ .io = {
+ [CRT_WRITE] = {
+ .cpo_cache_add = lov_page_cache_add
+ }
+ },
+ .cpo_print = lov_page_print
+};
+
+static void lov_empty_page_fini(const struct lu_env *env,
+ struct cl_page_slice *slice)
+{
+ LASSERT(slice->cpl_page->cp_child == NULL);
+}
+
+int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
+ struct cl_page *page, struct page *vmpage)
+{
+ struct lov_object *loo = cl2lov(obj);
+ struct lov_layout_raid0 *r0 = lov_r0(loo);
+ struct lov_io *lio = lov_env_io(env);
+ struct cl_page *subpage;
+ struct cl_object *subobj;
+ struct lov_io_sub *sub;
+ struct lov_page *lpg = cl_object_page_slice(obj, page);
+ loff_t offset;
+ obd_off suboff;
+ int stripe;
+ int rc;
+ ENTRY;
+
+ offset = cl_offset(obj, page->cp_index);
+ stripe = lov_stripe_number(loo->lo_lsm, offset);
+ LASSERT(stripe < r0->lo_nr);
+ rc = lov_stripe_offset(loo->lo_lsm, offset, stripe,
+ &suboff);
+ LASSERT(rc == 0);
+
+ lpg->lps_invalid = 1;
+ cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_page_ops);
+
+ sub = lov_sub_get(env, lio, stripe);
+ if (IS_ERR(sub))
+ GOTO(out, rc = PTR_ERR(sub));
+
+ subobj = lovsub2cl(r0->lo_sub[stripe]);
+ subpage = cl_page_find_sub(sub->sub_env, subobj,
+ cl_index(subobj, suboff), vmpage, page);
+ lov_sub_put(sub);
+ if (IS_ERR(subpage))
+ GOTO(out, rc = PTR_ERR(subpage));
+
+ if (likely(subpage->cp_parent == page)) {
+ lu_ref_add(&subpage->cp_reference, "lov", page);
+ lpg->lps_invalid = 0;
+ rc = 0;
+ } else {
+ CL_PAGE_DEBUG(D_ERROR, env, page, "parent page\n");
+ CL_PAGE_DEBUG(D_ERROR, env, subpage, "child page\n");
+ LASSERT(0);
+ }
+
+ EXIT;
+out:
+ return rc;
+}
+
+
+static const struct cl_page_operations lov_empty_page_ops = {
+ .cpo_fini = lov_empty_page_fini,
+ .cpo_print = lov_page_print
+};
+
+int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj,
+ struct cl_page *page, struct page *vmpage)
+{
+ struct lov_page *lpg = cl_object_page_slice(obj, page);
+ void *addr;
+ ENTRY;
+
+ cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_empty_page_ops);
+ addr = kmap(vmpage);
+ memset(addr, 0, cl_page_size(obj));
+ kunmap(vmpage);
+ cl_page_export(env, page, 1);
+ RETURN(0);
+}
+
+
+/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c
new file mode 100644
index 000000000000..a96f90880c64
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_pool.c
@@ -0,0 +1,681 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see [sun.com URL with a
+ * copy of GPLv2].
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lov/lov_pool.c
+ *
+ * OST pool methods
+ *
+ * Author: Jacques-Charles LAFOUCRIERE <jc.lafoucriere@cea.fr>
+ * Author: Alex Lyashkov <Alexey.Lyashkov@Sun.COM>
+ * Author: Nathaniel Rutman <Nathan.Rutman@Sun.COM>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include <linux/libcfs/libcfs.h>
+
+#include <obd.h>
+#include "lov_internal.h"
+
+#define pool_tgt(_p, _i) \
+ _p->pool_lobd->u.lov.lov_tgts[_p->pool_obds.op_array[_i]]
+
+static void lov_pool_getref(struct pool_desc *pool)
+{
+ CDEBUG(D_INFO, "pool %p\n", pool);
+ atomic_inc(&pool->pool_refcount);
+}
+
+void lov_pool_putref(struct pool_desc *pool)
+{
+ CDEBUG(D_INFO, "pool %p\n", pool);
+ if (atomic_dec_and_test(&pool->pool_refcount)) {
+ LASSERT(hlist_unhashed(&pool->pool_hash));
+ LASSERT(list_empty(&pool->pool_list));
+ LASSERT(pool->pool_proc_entry == NULL);
+ lov_ost_pool_free(&(pool->pool_rr.lqr_pool));
+ lov_ost_pool_free(&(pool->pool_obds));
+ OBD_FREE_PTR(pool);
+ EXIT;
+ }
+}
+
+void lov_pool_putref_locked(struct pool_desc *pool)
+{
+ CDEBUG(D_INFO, "pool %p\n", pool);
+ LASSERT(atomic_read(&pool->pool_refcount) > 1);
+
+ atomic_dec(&pool->pool_refcount);
+}
+
+/*
+ * hash function using a Rotating Hash algorithm
+ * Knuth, D. The Art of Computer Programming,
+ * Volume 3: Sorting and Searching,
+ * Chapter 6.4.
+ * Addison Wesley, 1973
+ */
+static __u32 pool_hashfn(cfs_hash_t *hash_body, const void *key, unsigned mask)
+{
+ int i;
+ __u32 result;
+ char *poolname;
+
+ result = 0;
+ poolname = (char *)key;
+ for (i = 0; i < LOV_MAXPOOLNAME; i++) {
+ if (poolname[i] == '\0')
+ break;
+ result = (result << 4)^(result >> 28) ^ poolname[i];
+ }
+ return (result % mask);
+}
+
+static void *pool_key(struct hlist_node *hnode)
+{
+ struct pool_desc *pool;
+
+ pool = hlist_entry(hnode, struct pool_desc, pool_hash);
+ return (pool->pool_name);
+}
+
+static int pool_hashkey_keycmp(const void *key, struct hlist_node *compared_hnode)
+{
+ char *pool_name;
+ struct pool_desc *pool;
+
+ pool_name = (char *)key;
+ pool = hlist_entry(compared_hnode, struct pool_desc, pool_hash);
+ return !strncmp(pool_name, pool->pool_name, LOV_MAXPOOLNAME);
+}
+
+static void *pool_hashobject(struct hlist_node *hnode)
+{
+ return hlist_entry(hnode, struct pool_desc, pool_hash);
+}
+
+static void pool_hashrefcount_get(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct pool_desc *pool;
+
+ pool = hlist_entry(hnode, struct pool_desc, pool_hash);
+ lov_pool_getref(pool);
+}
+
+static void pool_hashrefcount_put_locked(cfs_hash_t *hs,
+ struct hlist_node *hnode)
+{
+ struct pool_desc *pool;
+
+ pool = hlist_entry(hnode, struct pool_desc, pool_hash);
+ lov_pool_putref_locked(pool);
+}
+
+cfs_hash_ops_t pool_hash_operations = {
+ .hs_hash = pool_hashfn,
+ .hs_key = pool_key,
+ .hs_keycmp = pool_hashkey_keycmp,
+ .hs_object = pool_hashobject,
+ .hs_get = pool_hashrefcount_get,
+ .hs_put_locked = pool_hashrefcount_put_locked,
+
+};
+
+#ifdef LPROCFS
+/* ifdef needed for liblustre support */
+/*
+ * pool /proc seq_file methods
+ */
+/*
+ * iterator is used to go through the target pool entries
+ * index is the current entry index in the lp_array[] array
+ * index >= pos returned to the seq_file interface
+ * pos is from 0 to (pool->pool_obds.op_count - 1)
+ */
+#define POOL_IT_MAGIC 0xB001CEA0
+struct pool_iterator {
+ int magic;
+ struct pool_desc *pool;
+ int idx; /* from 0 to pool_tgt_size - 1 */
+};
+
+static void *pool_proc_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct pool_iterator *iter = (struct pool_iterator *)s->private;
+ int prev_idx;
+
+ LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X", iter->magic);
+
+ /* test if end of file */
+ if (*pos >= pool_tgt_count(iter->pool))
+ return NULL;
+
+ /* iterate to find a non empty entry */
+ prev_idx = iter->idx;
+ down_read(&pool_tgt_rw_sem(iter->pool));
+ iter->idx++;
+ if (iter->idx == pool_tgt_count(iter->pool)) {
+ iter->idx = prev_idx; /* we stay on the last entry */
+ up_read(&pool_tgt_rw_sem(iter->pool));
+ return NULL;
+ }
+ up_read(&pool_tgt_rw_sem(iter->pool));
+ (*pos)++;
+ /* return != NULL to continue */
+ return iter;
+}
+
+static void *pool_proc_start(struct seq_file *s, loff_t *pos)
+{
+ struct pool_desc *pool = (struct pool_desc *)s->private;
+ struct pool_iterator *iter;
+
+ lov_pool_getref(pool);
+ if ((pool_tgt_count(pool) == 0) ||
+ (*pos >= pool_tgt_count(pool))) {
+ /* iter is not created, so stop() has no way to
+ * find pool to dec ref */
+ lov_pool_putref(pool);
+ return NULL;
+ }
+
+ OBD_ALLOC_PTR(iter);
+ if (!iter)
+ return ERR_PTR(-ENOMEM);
+ iter->magic = POOL_IT_MAGIC;
+ iter->pool = pool;
+ iter->idx = 0;
+
+ /* we use seq_file private field to memorized iterator so
+ * we can free it at stop() */
+ /* /!\ do not forget to restore it to pool before freeing it */
+ s->private = iter;
+ if (*pos > 0) {
+ loff_t i;
+ void *ptr;
+
+ i = 0;
+ do {
+ ptr = pool_proc_next(s, &iter, &i);
+ } while ((i < *pos) && (ptr != NULL));
+ return ptr;
+ }
+ return iter;
+}
+
+static void pool_proc_stop(struct seq_file *s, void *v)
+{
+ struct pool_iterator *iter = (struct pool_iterator *)s->private;
+
+ /* in some cases stop() method is called 2 times, without
+ * calling start() method (see seq_read() from fs/seq_file.c)
+ * we have to free only if s->private is an iterator */
+ if ((iter) && (iter->magic == POOL_IT_MAGIC)) {
+ /* we restore s->private so next call to pool_proc_start()
+ * will work */
+ s->private = iter->pool;
+ lov_pool_putref(iter->pool);
+ OBD_FREE_PTR(iter);
+ }
+ return;
+}
+
+static int pool_proc_show(struct seq_file *s, void *v)
+{
+ struct pool_iterator *iter = (struct pool_iterator *)v;
+ struct lov_tgt_desc *tgt;
+
+ LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X", iter->magic);
+ LASSERT(iter->pool != NULL);
+ LASSERT(iter->idx <= pool_tgt_count(iter->pool));
+
+ down_read(&pool_tgt_rw_sem(iter->pool));
+ tgt = pool_tgt(iter->pool, iter->idx);
+ up_read(&pool_tgt_rw_sem(iter->pool));
+ if (tgt)
+ seq_printf(s, "%s\n", obd_uuid2str(&(tgt->ltd_uuid)));
+
+ return 0;
+}
+
+static struct seq_operations pool_proc_ops = {
+ .start = pool_proc_start,
+ .next = pool_proc_next,
+ .stop = pool_proc_stop,
+ .show = pool_proc_show,
+};
+
+static int pool_proc_open(struct inode *inode, struct file *file)
+{
+ int rc;
+
+ rc = seq_open(file, &pool_proc_ops);
+ if (!rc) {
+ struct seq_file *s = file->private_data;
+ s->private = PDE_DATA(inode);
+ }
+ return rc;
+}
+
+static struct file_operations pool_proc_operations = {
+ .open = pool_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+#endif /* LPROCFS */
+
+void lov_dump_pool(int level, struct pool_desc *pool)
+{
+ int i;
+
+ lov_pool_getref(pool);
+
+ CDEBUG(level, "pool "LOV_POOLNAMEF" has %d members\n",
+ pool->pool_name, pool->pool_obds.op_count);
+ down_read(&pool_tgt_rw_sem(pool));
+
+ for (i = 0; i < pool_tgt_count(pool) ; i++) {
+ if (!pool_tgt(pool, i) || !(pool_tgt(pool, i))->ltd_exp)
+ continue;
+ CDEBUG(level, "pool "LOV_POOLNAMEF"[%d] = %s\n",
+ pool->pool_name, i,
+ obd_uuid2str(&((pool_tgt(pool, i))->ltd_uuid)));
+ }
+
+ up_read(&pool_tgt_rw_sem(pool));
+ lov_pool_putref(pool);
+}
+
+#define LOV_POOL_INIT_COUNT 2
+int lov_ost_pool_init(struct ost_pool *op, unsigned int count)
+{
+ ENTRY;
+
+ if (count == 0)
+ count = LOV_POOL_INIT_COUNT;
+ op->op_array = NULL;
+ op->op_count = 0;
+ init_rwsem(&op->op_rw_sem);
+ op->op_size = count;
+ OBD_ALLOC(op->op_array, op->op_size * sizeof(op->op_array[0]));
+ if (op->op_array == NULL) {
+ op->op_size = 0;
+ RETURN(-ENOMEM);
+ }
+ EXIT;
+ return 0;
+}
+
+/* Caller must hold write op_rwlock */
+int lov_ost_pool_extend(struct ost_pool *op, unsigned int min_count)
+{
+ __u32 *new;
+ int new_size;
+
+ LASSERT(min_count != 0);
+
+ if (op->op_count < op->op_size)
+ return 0;
+
+ new_size = max(min_count, 2 * op->op_size);
+ OBD_ALLOC(new, new_size * sizeof(op->op_array[0]));
+ if (new == NULL)
+ return -ENOMEM;
+
+ /* copy old array to new one */
+ memcpy(new, op->op_array, op->op_size * sizeof(op->op_array[0]));
+ OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
+ op->op_array = new;
+ op->op_size = new_size;
+ return 0;
+}
+
+int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count)
+{
+ int rc = 0, i;
+ ENTRY;
+
+ down_write(&op->op_rw_sem);
+
+ rc = lov_ost_pool_extend(op, min_count);
+ if (rc)
+ GOTO(out, rc);
+
+ /* search ost in pool array */
+ for (i = 0; i < op->op_count; i++) {
+ if (op->op_array[i] == idx)
+ GOTO(out, rc = -EEXIST);
+ }
+ /* ost not found we add it */
+ op->op_array[op->op_count] = idx;
+ op->op_count++;
+ EXIT;
+out:
+ up_write(&op->op_rw_sem);
+ return rc;
+}
+
+int lov_ost_pool_remove(struct ost_pool *op, __u32 idx)
+{
+ int i;
+ ENTRY;
+
+ down_write(&op->op_rw_sem);
+
+ for (i = 0; i < op->op_count; i++) {
+ if (op->op_array[i] == idx) {
+ memmove(&op->op_array[i], &op->op_array[i + 1],
+ (op->op_count - i - 1) * sizeof(op->op_array[0]));
+ op->op_count--;
+ up_write(&op->op_rw_sem);
+ EXIT;
+ return 0;
+ }
+ }
+
+ up_write(&op->op_rw_sem);
+ RETURN(-EINVAL);
+}
+
+int lov_ost_pool_free(struct ost_pool *op)
+{
+ ENTRY;
+
+ if (op->op_size == 0)
+ RETURN(0);
+
+ down_write(&op->op_rw_sem);
+
+ OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
+ op->op_array = NULL;
+ op->op_count = 0;
+ op->op_size = 0;
+
+ up_write(&op->op_rw_sem);
+ RETURN(0);
+}
+
+
+int lov_pool_new(struct obd_device *obd, char *poolname)
+{
+ struct lov_obd *lov;
+ struct pool_desc *new_pool;
+ int rc;
+ ENTRY;
+
+ lov = &(obd->u.lov);
+
+ if (strlen(poolname) > LOV_MAXPOOLNAME)
+ RETURN(-ENAMETOOLONG);
+
+ OBD_ALLOC_PTR(new_pool);
+ if (new_pool == NULL)
+ RETURN(-ENOMEM);
+
+ strncpy(new_pool->pool_name, poolname, LOV_MAXPOOLNAME);
+ new_pool->pool_name[LOV_MAXPOOLNAME] = '\0';
+ new_pool->pool_lobd = obd;
+ /* ref count init to 1 because when created a pool is always used
+ * up to deletion
+ */
+ atomic_set(&new_pool->pool_refcount, 1);
+ rc = lov_ost_pool_init(&new_pool->pool_obds, 0);
+ if (rc)
+ GOTO(out_err, rc);
+
+ memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr));
+ rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
+ if (rc)
+ GOTO(out_free_pool_obds, rc);
+
+ INIT_HLIST_NODE(&new_pool->pool_hash);
+
+#ifdef LPROCFS
+ /* we need this assert seq_file is not implementated for liblustre */
+ /* get ref for /proc file */
+ lov_pool_getref(new_pool);
+ new_pool->pool_proc_entry = lprocfs_add_simple(lov->lov_pool_proc_entry,
+ poolname, new_pool,
+ &pool_proc_operations);
+ if (IS_ERR(new_pool->pool_proc_entry)) {
+ CWARN("Cannot add proc pool entry "LOV_POOLNAMEF"\n", poolname);
+ new_pool->pool_proc_entry = NULL;
+ lov_pool_putref(new_pool);
+ }
+ CDEBUG(D_INFO, "pool %p - proc %p\n", new_pool, new_pool->pool_proc_entry);
+#endif
+
+ spin_lock(&obd->obd_dev_lock);
+ list_add_tail(&new_pool->pool_list, &lov->lov_pool_list);
+ lov->lov_pool_count++;
+ spin_unlock(&obd->obd_dev_lock);
+
+ /* add to find only when it fully ready */
+ rc = cfs_hash_add_unique(lov->lov_pools_hash_body, poolname,
+ &new_pool->pool_hash);
+ if (rc)
+ GOTO(out_err, rc = -EEXIST);
+
+ CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
+ poolname, lov->lov_pool_count);
+
+ RETURN(0);
+
+out_err:
+ spin_lock(&obd->obd_dev_lock);
+ list_del_init(&new_pool->pool_list);
+ lov->lov_pool_count--;
+ spin_unlock(&obd->obd_dev_lock);
+
+ lprocfs_remove(&new_pool->pool_proc_entry);
+
+ lov_ost_pool_free(&new_pool->pool_rr.lqr_pool);
+out_free_pool_obds:
+ lov_ost_pool_free(&new_pool->pool_obds);
+ OBD_FREE_PTR(new_pool);
+ return rc;
+}
+
+int lov_pool_del(struct obd_device *obd, char *poolname)
+{
+ struct lov_obd *lov;
+ struct pool_desc *pool;
+ ENTRY;
+
+ lov = &(obd->u.lov);
+
+ /* lookup and kill hash reference */
+ pool = cfs_hash_del_key(lov->lov_pools_hash_body, poolname);
+ if (pool == NULL)
+ RETURN(-ENOENT);
+
+ if (pool->pool_proc_entry != NULL) {
+ CDEBUG(D_INFO, "proc entry %p\n", pool->pool_proc_entry);
+ lprocfs_remove(&pool->pool_proc_entry);
+ lov_pool_putref(pool);
+ }
+
+ spin_lock(&obd->obd_dev_lock);
+ list_del_init(&pool->pool_list);
+ lov->lov_pool_count--;
+ spin_unlock(&obd->obd_dev_lock);
+
+ /* release last reference */
+ lov_pool_putref(pool);
+
+ RETURN(0);
+}
+
+
+int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
+{
+ struct obd_uuid ost_uuid;
+ struct lov_obd *lov;
+ struct pool_desc *pool;
+ unsigned int lov_idx;
+ int rc;
+ ENTRY;
+
+ lov = &(obd->u.lov);
+
+ pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
+ if (pool == NULL)
+ RETURN(-ENOENT);
+
+ obd_str2uuid(&ost_uuid, ostname);
+
+
+ /* search ost in lov array */
+ obd_getref(obd);
+ for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
+ if (!lov->lov_tgts[lov_idx])
+ continue;
+ if (obd_uuid_equals(&ost_uuid,
+ &(lov->lov_tgts[lov_idx]->ltd_uuid)))
+ break;
+ }
+ /* test if ost found in lov */
+ if (lov_idx == lov->desc.ld_tgt_count)
+ GOTO(out, rc = -EINVAL);
+
+ rc = lov_ost_pool_add(&pool->pool_obds, lov_idx, lov->lov_tgt_size);
+ if (rc)
+ GOTO(out, rc);
+
+ pool->pool_rr.lqr_dirty = 1;
+
+ CDEBUG(D_CONFIG, "Added %s to "LOV_POOLNAMEF" as member %d\n",
+ ostname, poolname, pool_tgt_count(pool));
+
+ EXIT;
+out:
+ obd_putref(obd);
+ lov_pool_putref(pool);
+ return rc;
+}
+
+int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
+{
+ struct obd_uuid ost_uuid;
+ struct lov_obd *lov;
+ struct pool_desc *pool;
+ unsigned int lov_idx;
+ int rc = 0;
+ ENTRY;
+
+ lov = &(obd->u.lov);
+
+ pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
+ if (pool == NULL)
+ RETURN(-ENOENT);
+
+ obd_str2uuid(&ost_uuid, ostname);
+
+ obd_getref(obd);
+ /* search ost in lov array, to get index */
+ for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
+ if (!lov->lov_tgts[lov_idx])
+ continue;
+
+ if (obd_uuid_equals(&ost_uuid,
+ &(lov->lov_tgts[lov_idx]->ltd_uuid)))
+ break;
+ }
+
+ /* test if ost found in lov */
+ if (lov_idx == lov->desc.ld_tgt_count)
+ GOTO(out, rc = -EINVAL);
+
+ lov_ost_pool_remove(&pool->pool_obds, lov_idx);
+
+ pool->pool_rr.lqr_dirty = 1;
+
+ CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname,
+ poolname);
+
+ EXIT;
+out:
+ obd_putref(obd);
+ lov_pool_putref(pool);
+ return rc;
+}
+
+int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool)
+{
+ int i, rc;
+ ENTRY;
+
+ /* caller may no have a ref on pool if it got the pool
+ * without calling lov_find_pool() (e.g. go through the lov pool
+ * list)
+ */
+ lov_pool_getref(pool);
+
+ down_read(&pool_tgt_rw_sem(pool));
+
+ for (i = 0; i < pool_tgt_count(pool); i++) {
+ if (pool_tgt_array(pool)[i] == idx)
+ GOTO(out, rc = 0);
+ }
+ rc = -ENOENT;
+ EXIT;
+out:
+ up_read(&pool_tgt_rw_sem(pool));
+
+ lov_pool_putref(pool);
+ return rc;
+}
+
+struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname)
+{
+ struct pool_desc *pool;
+
+ pool = NULL;
+ if (poolname[0] != '\0') {
+ pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
+ if (pool == NULL)
+ CWARN("Request for an unknown pool ("LOV_POOLNAMEF")\n",
+ poolname);
+ if ((pool != NULL) && (pool_tgt_count(pool) == 0)) {
+ CWARN("Request for an empty pool ("LOV_POOLNAMEF")\n",
+ poolname);
+ /* pool is ignored, so we remove ref on it */
+ lov_pool_putref(pool);
+ pool = NULL;
+ }
+ }
+ return pool;
+}
diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c
new file mode 100644
index 000000000000..13f1637bc700
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lov_request.c
@@ -0,0 +1,1551 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include <linux/libcfs/libcfs.h>
+
+#include <obd_class.h>
+#include <obd_lov.h>
+#include <lustre/lustre_idl.h>
+
+#include "lov_internal.h"
+
+static void lov_init_set(struct lov_request_set *set)
+{
+ set->set_count = 0;
+ atomic_set(&set->set_completes, 0);
+ atomic_set(&set->set_success, 0);
+ atomic_set(&set->set_finish_checked, 0);
+ set->set_cookies = 0;
+ INIT_LIST_HEAD(&set->set_list);
+ atomic_set(&set->set_refcount, 1);
+ init_waitqueue_head(&set->set_waitq);
+ spin_lock_init(&set->set_lock);
+}
+
+void lov_finish_set(struct lov_request_set *set)
+{
+ struct list_head *pos, *n;
+ ENTRY;
+
+ LASSERT(set);
+ list_for_each_safe(pos, n, &set->set_list) {
+ struct lov_request *req = list_entry(pos,
+ struct lov_request,
+ rq_link);
+ list_del_init(&req->rq_link);
+
+ if (req->rq_oi.oi_oa)
+ OBDO_FREE(req->rq_oi.oi_oa);
+ if (req->rq_oi.oi_md)
+ OBD_FREE_LARGE(req->rq_oi.oi_md, req->rq_buflen);
+ if (req->rq_oi.oi_osfs)
+ OBD_FREE(req->rq_oi.oi_osfs,
+ sizeof(*req->rq_oi.oi_osfs));
+ OBD_FREE(req, sizeof(*req));
+ }
+
+ if (set->set_pga) {
+ int len = set->set_oabufs * sizeof(*set->set_pga);
+ OBD_FREE_LARGE(set->set_pga, len);
+ }
+ if (set->set_lockh)
+ lov_llh_put(set->set_lockh);
+
+ OBD_FREE(set, sizeof(*set));
+ EXIT;
+}
+
+int lov_set_finished(struct lov_request_set *set, int idempotent)
+{
+ int completes = atomic_read(&set->set_completes);
+
+ CDEBUG(D_INFO, "check set %d/%d\n", completes, set->set_count);
+
+ if (completes == set->set_count) {
+ if (idempotent)
+ return 1;
+ if (atomic_inc_return(&set->set_finish_checked) == 1)
+ return 1;
+ }
+ return 0;
+}
+
+void lov_update_set(struct lov_request_set *set,
+ struct lov_request *req, int rc)
+{
+ req->rq_complete = 1;
+ req->rq_rc = rc;
+
+ atomic_inc(&set->set_completes);
+ if (rc == 0)
+ atomic_inc(&set->set_success);
+
+ wake_up(&set->set_waitq);
+}
+
+int lov_update_common_set(struct lov_request_set *set,
+ struct lov_request *req, int rc)
+{
+ struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
+ ENTRY;
+
+ lov_update_set(set, req, rc);
+
+ /* grace error on inactive ost */
+ if (rc && !(lov->lov_tgts[req->rq_idx] &&
+ lov->lov_tgts[req->rq_idx]->ltd_active))
+ rc = 0;
+
+ /* FIXME in raid1 regime, should return 0 */
+ RETURN(rc);
+}
+
+void lov_set_add_req(struct lov_request *req, struct lov_request_set *set)
+{
+ list_add_tail(&req->rq_link, &set->set_list);
+ set->set_count++;
+ req->rq_rqset = set;
+}
+
+static int lov_check_set(struct lov_obd *lov, int idx)
+{
+ int rc = 0;
+ mutex_lock(&lov->lov_lock);
+
+ if (lov->lov_tgts[idx] == NULL ||
+ lov->lov_tgts[idx]->ltd_active ||
+ (lov->lov_tgts[idx]->ltd_exp != NULL &&
+ class_exp2cliimp(lov->lov_tgts[idx]->ltd_exp)->imp_connect_tried))
+ rc = 1;
+
+ mutex_unlock(&lov->lov_lock);
+ return rc;
+}
+
+/* Check if the OSC connection exists and is active.
+ * If the OSC has not yet had a chance to connect to the OST the first time,
+ * wait once for it to connect instead of returning an error.
+ */
+int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx)
+{
+ wait_queue_head_t waitq;
+ struct l_wait_info lwi;
+ struct lov_tgt_desc *tgt;
+ int rc = 0;
+
+ mutex_lock(&lov->lov_lock);
+
+ tgt = lov->lov_tgts[ost_idx];
+
+ if (unlikely(tgt == NULL))
+ GOTO(out, rc = 0);
+
+ if (likely(tgt->ltd_active))
+ GOTO(out, rc = 1);
+
+ if (tgt->ltd_exp && class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried)
+ GOTO(out, rc = 0);
+
+ mutex_unlock(&lov->lov_lock);
+
+ init_waitqueue_head(&waitq);
+ lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(obd_timeout),
+ cfs_time_seconds(1), NULL, NULL);
+
+ rc = l_wait_event(waitq, lov_check_set(lov, ost_idx), &lwi);
+ if (tgt != NULL && tgt->ltd_active)
+ return 1;
+
+ return 0;
+
+out:
+ mutex_unlock(&lov->lov_lock);
+ return rc;
+}
+
+extern void osc_update_enqueue(struct lustre_handle *lov_lockhp,
+ struct lov_oinfo *loi, int flags,
+ struct ost_lvb *lvb, __u32 mode, int rc);
+
+static int lov_update_enqueue_lov(struct obd_export *exp,
+ struct lustre_handle *lov_lockhp,
+ struct lov_oinfo *loi, int flags, int idx,
+ struct ost_id *oi, int rc)
+{
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+
+ if (rc != ELDLM_OK &&
+ !(rc == ELDLM_LOCK_ABORTED && (flags & LDLM_FL_HAS_INTENT))) {
+ memset(lov_lockhp, 0, sizeof(*lov_lockhp));
+ if (lov->lov_tgts[idx] && lov->lov_tgts[idx]->ltd_active) {
+ /* -EUSERS used by OST to report file contention */
+ if (rc != -EINTR && rc != -EUSERS)
+ CERROR("%s: enqueue objid "DOSTID" subobj"
+ DOSTID" on OST idx %d: rc %d\n",
+ exp->exp_obd->obd_name,
+ POSTID(oi), POSTID(&loi->loi_oi),
+ loi->loi_ost_idx, rc);
+ } else
+ rc = ELDLM_OK;
+ }
+ return rc;
+}
+
+int lov_update_enqueue_set(struct lov_request *req, __u32 mode, int rc)
+{
+ struct lov_request_set *set = req->rq_rqset;
+ struct lustre_handle *lov_lockhp;
+ struct obd_info *oi = set->set_oi;
+ struct lov_oinfo *loi;
+ ENTRY;
+
+ LASSERT(oi != NULL);
+
+ lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
+ loi = oi->oi_md->lsm_oinfo[req->rq_stripe];
+
+ /* XXX LOV STACKING: OSC gets a copy, created in lov_prep_enqueue_set
+ * and that copy can be arbitrarily out of date.
+ *
+ * The LOV API is due for a serious rewriting anyways, and this
+ * can be addressed then. */
+
+ lov_stripe_lock(oi->oi_md);
+ osc_update_enqueue(lov_lockhp, loi, oi->oi_flags,
+ &req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb, mode, rc);
+ if (rc == ELDLM_LOCK_ABORTED && (oi->oi_flags & LDLM_FL_HAS_INTENT))
+ memset(lov_lockhp, 0, sizeof *lov_lockhp);
+ rc = lov_update_enqueue_lov(set->set_exp, lov_lockhp, loi, oi->oi_flags,
+ req->rq_idx, &oi->oi_md->lsm_oi, rc);
+ lov_stripe_unlock(oi->oi_md);
+ lov_update_set(set, req, rc);
+ RETURN(rc);
+}
+
+/* The callback for osc_enqueue that updates lov info for every OSC request. */
+static int cb_update_enqueue(void *cookie, int rc)
+{
+ struct obd_info *oinfo = cookie;
+ struct ldlm_enqueue_info *einfo;
+ struct lov_request *lovreq;
+
+ lovreq = container_of(oinfo, struct lov_request, rq_oi);
+ einfo = lovreq->rq_rqset->set_ei;
+ return lov_update_enqueue_set(lovreq, einfo->ei_mode, rc);
+}
+
+static int enqueue_done(struct lov_request_set *set, __u32 mode)
+{
+ struct lov_request *req;
+ struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
+ int completes = atomic_read(&set->set_completes);
+ int rc = 0;
+ ENTRY;
+
+ /* enqueue/match success, just return */
+ if (completes && completes == atomic_read(&set->set_success))
+ RETURN(0);
+
+ /* cancel enqueued/matched locks */
+ list_for_each_entry(req, &set->set_list, rq_link) {
+ struct lustre_handle *lov_lockhp;
+
+ if (!req->rq_complete || req->rq_rc)
+ continue;
+
+ lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
+ LASSERT(lov_lockhp);
+ if (!lustre_handle_is_used(lov_lockhp))
+ continue;
+
+ rc = obd_cancel(lov->lov_tgts[req->rq_idx]->ltd_exp,
+ req->rq_oi.oi_md, mode, lov_lockhp);
+ if (rc && lov->lov_tgts[req->rq_idx] &&
+ lov->lov_tgts[req->rq_idx]->ltd_active)
+ CERROR("%s: cancelling obdjid "DOSTID" on OST"
+ "idx %d error: rc = %d\n",
+ set->set_exp->exp_obd->obd_name,
+ POSTID(&req->rq_oi.oi_md->lsm_oi),
+ req->rq_idx, rc);
+ }
+ if (set->set_lockh)
+ lov_llh_put(set->set_lockh);
+ RETURN(rc);
+}
+
+int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode, int rc,
+ struct ptlrpc_request_set *rqset)
+{
+ int ret = 0;
+ ENTRY;
+
+ if (set == NULL)
+ RETURN(0);
+ LASSERT(set->set_exp);
+ /* Do enqueue_done only for sync requests and if any request
+ * succeeded. */
+ if (!rqset) {
+ if (rc)
+ atomic_set(&set->set_completes, 0);
+ ret = enqueue_done(set, mode);
+ } else if (set->set_lockh)
+ lov_llh_put(set->set_lockh);
+
+ lov_put_reqset(set);
+
+ RETURN(rc ? rc : ret);
+}
+
+static void lov_llh_addref(void *llhp)
+{
+ struct lov_lock_handles *llh = llhp;
+
+ atomic_inc(&llh->llh_refcount);
+ CDEBUG(D_INFO, "GETting llh %p : new refcount %d\n", llh,
+ atomic_read(&llh->llh_refcount));
+}
+
+static struct portals_handle_ops lov_handle_ops = {
+ .hop_addref = lov_llh_addref,
+ .hop_free = NULL,
+};
+
+static struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm)
+{
+ struct lov_lock_handles *llh;
+
+ OBD_ALLOC(llh, sizeof *llh +
+ sizeof(*llh->llh_handles) * lsm->lsm_stripe_count);
+ if (llh == NULL)
+ return NULL;
+
+ atomic_set(&llh->llh_refcount, 2);
+ llh->llh_stripe_count = lsm->lsm_stripe_count;
+ INIT_LIST_HEAD(&llh->llh_handle.h_link);
+ class_handle_hash(&llh->llh_handle, &lov_handle_ops);
+
+ return llh;
+}
+
+int lov_prep_enqueue_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct ldlm_enqueue_info *einfo,
+ struct lov_request_set **reqset)
+{
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ struct lov_request_set *set;
+ int i, rc = 0;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_oi = oinfo;
+ set->set_ei = einfo;
+ set->set_lockh = lov_llh_new(oinfo->oi_md);
+ if (set->set_lockh == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ oinfo->oi_lockh->cookie = set->set_lockh->llh_handle.h_cookie;
+
+ for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
+ struct lov_oinfo *loi;
+ struct lov_request *req;
+ obd_off start, end;
+
+ loi = oinfo->oi_md->lsm_oinfo[i];
+ if (!lov_stripe_intersects(oinfo->oi_md, i,
+ oinfo->oi_policy.l_extent.start,
+ oinfo->oi_policy.l_extent.end,
+ &start, &end))
+ continue;
+
+ if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ continue;
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ req->rq_buflen = sizeof(*req->rq_oi.oi_md) +
+ sizeof(struct lov_oinfo *) +
+ sizeof(struct lov_oinfo);
+ OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
+ if (req->rq_oi.oi_md == NULL) {
+ OBD_FREE(req, sizeof(*req));
+ GOTO(out_set, rc = -ENOMEM);
+ }
+ req->rq_oi.oi_md->lsm_oinfo[0] =
+ ((void *)req->rq_oi.oi_md) + sizeof(*req->rq_oi.oi_md) +
+ sizeof(struct lov_oinfo *);
+
+ /* Set lov request specific parameters. */
+ req->rq_oi.oi_lockh = set->set_lockh->llh_handles + i;
+ req->rq_oi.oi_cb_up = cb_update_enqueue;
+ req->rq_oi.oi_flags = oinfo->oi_flags;
+
+ LASSERT(req->rq_oi.oi_lockh);
+
+ req->rq_oi.oi_policy.l_extent.gid =
+ oinfo->oi_policy.l_extent.gid;
+ req->rq_oi.oi_policy.l_extent.start = start;
+ req->rq_oi.oi_policy.l_extent.end = end;
+
+ req->rq_idx = loi->loi_ost_idx;
+ req->rq_stripe = i;
+
+ /* XXX LOV STACKING: submd should be from the subobj */
+ req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
+ req->rq_oi.oi_md->lsm_stripe_count = 0;
+ req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms_valid =
+ loi->loi_kms_valid;
+ req->rq_oi.oi_md->lsm_oinfo[0]->loi_kms = loi->loi_kms;
+ req->rq_oi.oi_md->lsm_oinfo[0]->loi_lvb = loi->loi_lvb;
+
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(0);
+out_set:
+ lov_fini_enqueue_set(set, einfo->ei_mode, rc, NULL);
+ RETURN(rc);
+}
+
+int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags)
+{
+ int rc = 0;
+ ENTRY;
+
+ if (set == NULL)
+ RETURN(0);
+ LASSERT(set->set_exp);
+ rc = enqueue_done(set, mode);
+ if ((set->set_count == atomic_read(&set->set_success)) &&
+ (flags & LDLM_FL_TEST_LOCK))
+ lov_llh_put(set->set_lockh);
+
+ lov_put_reqset(set);
+
+ RETURN(rc);
+}
+
+int lov_prep_match_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct lov_stripe_md *lsm, ldlm_policy_data_t *policy,
+ __u32 mode, struct lustre_handle *lockh,
+ struct lov_request_set **reqset)
+{
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ struct lov_request_set *set;
+ int i, rc = 0;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_oi = oinfo;
+ set->set_oi->oi_md = lsm;
+ set->set_lockh = lov_llh_new(lsm);
+ if (set->set_lockh == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ lockh->cookie = set->set_lockh->llh_handle.h_cookie;
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++){
+ struct lov_oinfo *loi;
+ struct lov_request *req;
+ obd_off start, end;
+
+ loi = lsm->lsm_oinfo[i];
+ if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
+ policy->l_extent.end, &start, &end))
+ continue;
+
+ /* FIXME raid1 should grace this error */
+ if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ GOTO(out_set, rc = -EIO);
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ req->rq_buflen = sizeof(*req->rq_oi.oi_md);
+ OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
+ if (req->rq_oi.oi_md == NULL) {
+ OBD_FREE(req, sizeof(*req));
+ GOTO(out_set, rc = -ENOMEM);
+ }
+
+ req->rq_oi.oi_policy.l_extent.start = start;
+ req->rq_oi.oi_policy.l_extent.end = end;
+ req->rq_oi.oi_policy.l_extent.gid = policy->l_extent.gid;
+
+ req->rq_idx = loi->loi_ost_idx;
+ req->rq_stripe = i;
+
+ /* XXX LOV STACKING: submd should be from the subobj */
+ req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
+ req->rq_oi.oi_md->lsm_stripe_count = 0;
+
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_match_set(set, mode, 0);
+ RETURN(rc);
+}
+
+int lov_fini_cancel_set(struct lov_request_set *set)
+{
+ int rc = 0;
+ ENTRY;
+
+ if (set == NULL)
+ RETURN(0);
+
+ LASSERT(set->set_exp);
+ if (set->set_lockh)
+ lov_llh_put(set->set_lockh);
+
+ lov_put_reqset(set);
+
+ RETURN(rc);
+}
+
+int lov_prep_cancel_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct lov_stripe_md *lsm, __u32 mode,
+ struct lustre_handle *lockh,
+ struct lov_request_set **reqset)
+{
+ struct lov_request_set *set;
+ int i, rc = 0;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_oi = oinfo;
+ set->set_oi->oi_md = lsm;
+ set->set_lockh = lov_handle2llh(lockh);
+ if (set->set_lockh == NULL) {
+ CERROR("LOV: invalid lov lock handle %p\n", lockh);
+ GOTO(out_set, rc = -EINVAL);
+ }
+ lockh->cookie = set->set_lockh->llh_handle.h_cookie;
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++){
+ struct lov_request *req;
+ struct lustre_handle *lov_lockhp;
+ struct lov_oinfo *loi = lsm->lsm_oinfo[i];
+
+ lov_lockhp = set->set_lockh->llh_handles + i;
+ if (!lustre_handle_is_used(lov_lockhp)) {
+ CDEBUG(D_INFO, "lov idx %d subobj "DOSTID" no lock\n",
+ loi->loi_ost_idx, POSTID(&loi->loi_oi));
+ continue;
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ req->rq_buflen = sizeof(*req->rq_oi.oi_md);
+ OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
+ if (req->rq_oi.oi_md == NULL) {
+ OBD_FREE(req, sizeof(*req));
+ GOTO(out_set, rc = -ENOMEM);
+ }
+
+ req->rq_idx = loi->loi_ost_idx;
+ req->rq_stripe = i;
+
+ /* XXX LOV STACKING: submd should be from the subobj */
+ req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
+ req->rq_oi.oi_md->lsm_stripe_count = 0;
+
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_cancel_set(set);
+ RETURN(rc);
+}
+static int common_attr_done(struct lov_request_set *set)
+{
+ struct list_head *pos;
+ struct lov_request *req;
+ struct obdo *tmp_oa;
+ int rc = 0, attrset = 0;
+ ENTRY;
+
+ LASSERT(set->set_oi != NULL);
+
+ if (set->set_oi->oi_oa == NULL)
+ RETURN(0);
+
+ if (!atomic_read(&set->set_success))
+ RETURN(-EIO);
+
+ OBDO_ALLOC(tmp_oa);
+ if (tmp_oa == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ if (!req->rq_complete || req->rq_rc)
+ continue;
+ if (req->rq_oi.oi_oa->o_valid == 0) /* inactive stripe */
+ continue;
+ lov_merge_attrs(tmp_oa, req->rq_oi.oi_oa,
+ req->rq_oi.oi_oa->o_valid,
+ set->set_oi->oi_md, req->rq_stripe, &attrset);
+ }
+ if (!attrset) {
+ CERROR("No stripes had valid attrs\n");
+ rc = -EIO;
+ }
+ if ((set->set_oi->oi_oa->o_valid & OBD_MD_FLEPOCH) &&
+ (set->set_oi->oi_md->lsm_stripe_count != attrset)) {
+ /* When we take attributes of some epoch, we require all the
+ * ost to be active. */
+ CERROR("Not all the stripes had valid attrs\n");
+ GOTO(out, rc = -EIO);
+ }
+
+ tmp_oa->o_oi = set->set_oi->oi_oa->o_oi;
+ memcpy(set->set_oi->oi_oa, tmp_oa, sizeof(*set->set_oi->oi_oa));
+out:
+ if (tmp_oa)
+ OBDO_FREE(tmp_oa);
+ RETURN(rc);
+
+}
+
+static int brw_done(struct lov_request_set *set)
+{
+ struct lov_stripe_md *lsm = set->set_oi->oi_md;
+ struct lov_oinfo *loi = NULL;
+ struct list_head *pos;
+ struct lov_request *req;
+ ENTRY;
+
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ if (!req->rq_complete || req->rq_rc)
+ continue;
+
+ loi = lsm->lsm_oinfo[req->rq_stripe];
+
+ if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS)
+ loi->loi_lvb.lvb_blocks = req->rq_oi.oi_oa->o_blocks;
+ }
+
+ RETURN(0);
+}
+
+int lov_fini_brw_set(struct lov_request_set *set)
+{
+ int rc = 0;
+ ENTRY;
+
+ if (set == NULL)
+ RETURN(0);
+ LASSERT(set->set_exp);
+ if (atomic_read(&set->set_completes)) {
+ rc = brw_done(set);
+ /* FIXME update qos data here */
+ }
+ lov_put_reqset(set);
+
+ RETURN(rc);
+}
+
+int lov_prep_brw_set(struct obd_export *exp, struct obd_info *oinfo,
+ obd_count oa_bufs, struct brw_page *pga,
+ struct obd_trans_info *oti,
+ struct lov_request_set **reqset)
+{
+ struct {
+ obd_count index;
+ obd_count count;
+ obd_count off;
+ } *info = NULL;
+ struct lov_request_set *set;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int rc = 0, i, shift;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_oti = oti;
+ set->set_oi = oinfo;
+ set->set_oabufs = oa_bufs;
+ OBD_ALLOC_LARGE(set->set_pga, oa_bufs * sizeof(*set->set_pga));
+ if (!set->set_pga)
+ GOTO(out, rc = -ENOMEM);
+
+ OBD_ALLOC_LARGE(info, sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
+ if (!info)
+ GOTO(out, rc = -ENOMEM);
+
+ /* calculate the page count for each stripe */
+ for (i = 0; i < oa_bufs; i++) {
+ int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
+ info[stripe].count++;
+ }
+
+ /* alloc and initialize lov request */
+ shift = 0;
+ for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++){
+ struct lov_oinfo *loi = NULL;
+ struct lov_request *req;
+
+ if (info[i].count == 0)
+ continue;
+
+ loi = oinfo->oi_md->lsm_oinfo[i];
+ if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ GOTO(out, rc = -EIO);
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ OBDO_ALLOC(req->rq_oi.oi_oa);
+ if (req->rq_oi.oi_oa == NULL) {
+ OBD_FREE(req, sizeof(*req));
+ GOTO(out, rc = -ENOMEM);
+ }
+
+ if (oinfo->oi_oa) {
+ memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
+ sizeof(*req->rq_oi.oi_oa));
+ }
+ req->rq_oi.oi_oa->o_oi = loi->loi_oi;
+ req->rq_oi.oi_oa->o_stripe_idx = i;
+
+ req->rq_buflen = sizeof(*req->rq_oi.oi_md);
+ OBD_ALLOC_LARGE(req->rq_oi.oi_md, req->rq_buflen);
+ if (req->rq_oi.oi_md == NULL) {
+ OBDO_FREE(req->rq_oi.oi_oa);
+ OBD_FREE(req, sizeof(*req));
+ GOTO(out, rc = -ENOMEM);
+ }
+
+ req->rq_idx = loi->loi_ost_idx;
+ req->rq_stripe = i;
+
+ /* XXX LOV STACKING */
+ req->rq_oi.oi_md->lsm_oi = loi->loi_oi;
+ req->rq_oabufs = info[i].count;
+ req->rq_pgaidx = shift;
+ shift += req->rq_oabufs;
+
+ /* remember the index for sort brw_page array */
+ info[i].index = req->rq_pgaidx;
+
+ req->rq_oi.oi_capa = oinfo->oi_capa;
+
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out, rc = -EIO);
+
+ /* rotate & sort the brw_page array */
+ for (i = 0; i < oa_bufs; i++) {
+ int stripe = lov_stripe_number(oinfo->oi_md, pga[i].off);
+
+ shift = info[stripe].index + info[stripe].off;
+ LASSERT(shift < oa_bufs);
+ set->set_pga[shift] = pga[i];
+ lov_stripe_offset(oinfo->oi_md, pga[i].off, stripe,
+ &set->set_pga[shift].off);
+ info[stripe].off++;
+ }
+out:
+ if (info)
+ OBD_FREE_LARGE(info,
+ sizeof(*info) * oinfo->oi_md->lsm_stripe_count);
+
+ if (rc == 0)
+ *reqset = set;
+ else
+ lov_fini_brw_set(set);
+
+ RETURN(rc);
+}
+
+int lov_fini_getattr_set(struct lov_request_set *set)
+{
+ int rc = 0;
+ ENTRY;
+
+ if (set == NULL)
+ RETURN(0);
+ LASSERT(set->set_exp);
+ if (atomic_read(&set->set_completes))
+ rc = common_attr_done(set);
+
+ lov_put_reqset(set);
+
+ RETURN(rc);
+}
+
+/* The callback for osc_getattr_async that finilizes a request info when a
+ * response is received. */
+static int cb_getattr_update(void *cookie, int rc)
+{
+ struct obd_info *oinfo = cookie;
+ struct lov_request *lovreq;
+ lovreq = container_of(oinfo, struct lov_request, rq_oi);
+ return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
+}
+
+int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct lov_request_set **reqset)
+{
+ struct lov_request_set *set;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int rc = 0, i;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_oi = oinfo;
+
+ for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
+ struct lov_oinfo *loi;
+ struct lov_request *req;
+
+ loi = oinfo->oi_md->lsm_oinfo[i];
+ if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ if (oinfo->oi_oa->o_valid & OBD_MD_FLEPOCH)
+ /* SOM requires all the OSTs to be active. */
+ GOTO(out_set, rc = -EIO);
+ continue;
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ req->rq_stripe = i;
+ req->rq_idx = loi->loi_ost_idx;
+
+ OBDO_ALLOC(req->rq_oi.oi_oa);
+ if (req->rq_oi.oi_oa == NULL) {
+ OBD_FREE(req, sizeof(*req));
+ GOTO(out_set, rc = -ENOMEM);
+ }
+ memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
+ sizeof(*req->rq_oi.oi_oa));
+ req->rq_oi.oi_oa->o_oi = loi->loi_oi;
+ req->rq_oi.oi_cb_up = cb_getattr_update;
+ req->rq_oi.oi_capa = oinfo->oi_capa;
+
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_getattr_set(set);
+ RETURN(rc);
+}
+
+int lov_fini_destroy_set(struct lov_request_set *set)
+{
+ ENTRY;
+
+ if (set == NULL)
+ RETURN(0);
+ LASSERT(set->set_exp);
+ if (atomic_read(&set->set_completes)) {
+ /* FIXME update qos data here */
+ }
+
+ lov_put_reqset(set);
+
+ RETURN(0);
+}
+
+int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct obdo *src_oa, struct lov_stripe_md *lsm,
+ struct obd_trans_info *oti,
+ struct lov_request_set **reqset)
+{
+ struct lov_request_set *set;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int rc = 0, i;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_oi = oinfo;
+ set->set_oi->oi_md = lsm;
+ set->set_oi->oi_oa = src_oa;
+ set->set_oti = oti;
+ if (oti != NULL && src_oa->o_valid & OBD_MD_FLCOOKIE)
+ set->set_cookies = oti->oti_logcookies;
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ struct lov_oinfo *loi;
+ struct lov_request *req;
+
+ loi = lsm->lsm_oinfo[i];
+ if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ continue;
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ req->rq_stripe = i;
+ req->rq_idx = loi->loi_ost_idx;
+
+ OBDO_ALLOC(req->rq_oi.oi_oa);
+ if (req->rq_oi.oi_oa == NULL) {
+ OBD_FREE(req, sizeof(*req));
+ GOTO(out_set, rc = -ENOMEM);
+ }
+ memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa));
+ req->rq_oi.oi_oa->o_oi = loi->loi_oi;
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_destroy_set(set);
+ RETURN(rc);
+}
+
+int lov_fini_setattr_set(struct lov_request_set *set)
+{
+ int rc = 0;
+ ENTRY;
+
+ if (set == NULL)
+ RETURN(0);
+ LASSERT(set->set_exp);
+ if (atomic_read(&set->set_completes)) {
+ rc = common_attr_done(set);
+ /* FIXME update qos data here */
+ }
+
+ lov_put_reqset(set);
+ RETURN(rc);
+}
+
+int lov_update_setattr_set(struct lov_request_set *set,
+ struct lov_request *req, int rc)
+{
+ struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
+ struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
+ ENTRY;
+
+ lov_update_set(set, req, rc);
+
+ /* grace error on inactive ost */
+ if (rc && !(lov->lov_tgts[req->rq_idx] &&
+ lov->lov_tgts[req->rq_idx]->ltd_active))
+ rc = 0;
+
+ if (rc == 0) {
+ if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCTIME)
+ lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_ctime =
+ req->rq_oi.oi_oa->o_ctime;
+ if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLMTIME)
+ lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_mtime =
+ req->rq_oi.oi_oa->o_mtime;
+ if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLATIME)
+ lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_atime =
+ req->rq_oi.oi_oa->o_atime;
+ }
+
+ RETURN(rc);
+}
+
+/* The callback for osc_setattr_async that finilizes a request info when a
+ * response is received. */
+static int cb_setattr_update(void *cookie, int rc)
+{
+ struct obd_info *oinfo = cookie;
+ struct lov_request *lovreq;
+ lovreq = container_of(oinfo, struct lov_request, rq_oi);
+ return lov_update_setattr_set(lovreq->rq_rqset, lovreq, rc);
+}
+
+int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct obd_trans_info *oti,
+ struct lov_request_set **reqset)
+{
+ struct lov_request_set *set;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int rc = 0, i;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_oti = oti;
+ set->set_oi = oinfo;
+ if (oti != NULL && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
+ set->set_cookies = oti->oti_logcookies;
+
+ for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
+ struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
+ struct lov_request *req;
+
+ if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ continue;
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ req->rq_stripe = i;
+ req->rq_idx = loi->loi_ost_idx;
+
+ OBDO_ALLOC(req->rq_oi.oi_oa);
+ if (req->rq_oi.oi_oa == NULL) {
+ OBD_FREE(req, sizeof(*req));
+ GOTO(out_set, rc = -ENOMEM);
+ }
+ memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
+ sizeof(*req->rq_oi.oi_oa));
+ req->rq_oi.oi_oa->o_oi = loi->loi_oi;
+ req->rq_oi.oi_oa->o_stripe_idx = i;
+ req->rq_oi.oi_cb_up = cb_setattr_update;
+ req->rq_oi.oi_capa = oinfo->oi_capa;
+
+ if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE) {
+ int off = lov_stripe_offset(oinfo->oi_md,
+ oinfo->oi_oa->o_size, i,
+ &req->rq_oi.oi_oa->o_size);
+
+ if (off < 0 && req->rq_oi.oi_oa->o_size)
+ req->rq_oi.oi_oa->o_size--;
+
+ CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
+ i, req->rq_oi.oi_oa->o_size,
+ oinfo->oi_oa->o_size);
+ }
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_setattr_set(set);
+ RETURN(rc);
+}
+
+int lov_fini_punch_set(struct lov_request_set *set)
+{
+ int rc = 0;
+ ENTRY;
+
+ if (set == NULL)
+ RETURN(0);
+ LASSERT(set->set_exp);
+ if (atomic_read(&set->set_completes)) {
+ rc = -EIO;
+ /* FIXME update qos data here */
+ if (atomic_read(&set->set_success))
+ rc = common_attr_done(set);
+ }
+
+ lov_put_reqset(set);
+
+ RETURN(rc);
+}
+
+int lov_update_punch_set(struct lov_request_set *set,
+ struct lov_request *req, int rc)
+{
+ struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
+ struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
+ ENTRY;
+
+ lov_update_set(set, req, rc);
+
+ /* grace error on inactive ost */
+ if (rc && !lov->lov_tgts[req->rq_idx]->ltd_active)
+ rc = 0;
+
+ if (rc == 0) {
+ lov_stripe_lock(lsm);
+ if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLBLOCKS) {
+ lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_blocks =
+ req->rq_oi.oi_oa->o_blocks;
+ }
+
+ lov_stripe_unlock(lsm);
+ }
+
+ RETURN(rc);
+}
+
+/* The callback for osc_punch that finilizes a request info when a response
+ * is received. */
+static int cb_update_punch(void *cookie, int rc)
+{
+ struct obd_info *oinfo = cookie;
+ struct lov_request *lovreq;
+ lovreq = container_of(oinfo, struct lov_request, rq_oi);
+ return lov_update_punch_set(lovreq->rq_rqset, lovreq, rc);
+}
+
+int lov_prep_punch_set(struct obd_export *exp, struct obd_info *oinfo,
+ struct obd_trans_info *oti,
+ struct lov_request_set **reqset)
+{
+ struct lov_request_set *set;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int rc = 0, i;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_oi = oinfo;
+ set->set_exp = exp;
+
+ for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
+ struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
+ struct lov_request *req;
+ obd_off rs, re;
+
+ if (!lov_stripe_intersects(oinfo->oi_md, i,
+ oinfo->oi_policy.l_extent.start,
+ oinfo->oi_policy.l_extent.end,
+ &rs, &re))
+ continue;
+
+ if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ GOTO(out_set, rc = -EIO);
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ req->rq_stripe = i;
+ req->rq_idx = loi->loi_ost_idx;
+
+ OBDO_ALLOC(req->rq_oi.oi_oa);
+ if (req->rq_oi.oi_oa == NULL) {
+ OBD_FREE(req, sizeof(*req));
+ GOTO(out_set, rc = -ENOMEM);
+ }
+ memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
+ sizeof(*req->rq_oi.oi_oa));
+ req->rq_oi.oi_oa->o_oi = loi->loi_oi;
+ req->rq_oi.oi_oa->o_valid |= OBD_MD_FLGROUP;
+
+ req->rq_oi.oi_oa->o_stripe_idx = i;
+ req->rq_oi.oi_cb_up = cb_update_punch;
+
+ req->rq_oi.oi_policy.l_extent.start = rs;
+ req->rq_oi.oi_policy.l_extent.end = re;
+ req->rq_oi.oi_policy.l_extent.gid = -1;
+
+ req->rq_oi.oi_capa = oinfo->oi_capa;
+
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_punch_set(set);
+ RETURN(rc);
+}
+
+int lov_fini_sync_set(struct lov_request_set *set)
+{
+ int rc = 0;
+ ENTRY;
+
+ if (set == NULL)
+ RETURN(0);
+ LASSERT(set->set_exp);
+ if (atomic_read(&set->set_completes)) {
+ if (!atomic_read(&set->set_success))
+ rc = -EIO;
+ /* FIXME update qos data here */
+ }
+
+ lov_put_reqset(set);
+
+ RETURN(rc);
+}
+
+/* The callback for osc_sync that finilizes a request info when a
+ * response is recieved. */
+static int cb_sync_update(void *cookie, int rc)
+{
+ struct obd_info *oinfo = cookie;
+ struct lov_request *lovreq;
+
+ lovreq = container_of(oinfo, struct lov_request, rq_oi);
+ return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
+}
+
+int lov_prep_sync_set(struct obd_export *exp, struct obd_info *oinfo,
+ obd_off start, obd_off end,
+ struct lov_request_set **reqset)
+{
+ struct lov_request_set *set;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int rc = 0, i;
+ ENTRY;
+
+ OBD_ALLOC_PTR(set);
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_oi = oinfo;
+
+ for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
+ struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
+ struct lov_request *req;
+ obd_off rs, re;
+
+ if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ continue;
+ }
+
+ if (!lov_stripe_intersects(oinfo->oi_md, i, start, end, &rs,
+ &re))
+ continue;
+
+ OBD_ALLOC_PTR(req);
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ req->rq_stripe = i;
+ req->rq_idx = loi->loi_ost_idx;
+
+ OBDO_ALLOC(req->rq_oi.oi_oa);
+ if (req->rq_oi.oi_oa == NULL) {
+ OBD_FREE(req, sizeof(*req));
+ GOTO(out_set, rc = -ENOMEM);
+ }
+ *req->rq_oi.oi_oa = *oinfo->oi_oa;
+ req->rq_oi.oi_oa->o_oi = loi->loi_oi;
+ req->rq_oi.oi_oa->o_stripe_idx = i;
+
+ req->rq_oi.oi_policy.l_extent.start = rs;
+ req->rq_oi.oi_policy.l_extent.end = re;
+ req->rq_oi.oi_policy.l_extent.gid = -1;
+ req->rq_oi.oi_cb_up = cb_sync_update;
+
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_sync_set(set);
+ RETURN(rc);
+}
+
+#define LOV_U64_MAX ((__u64)~0ULL)
+#define LOV_SUM_MAX(tot, add) \
+ do { \
+ if ((tot) + (add) < (tot)) \
+ (tot) = LOV_U64_MAX; \
+ else \
+ (tot) += (add); \
+ } while(0)
+
+int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,int success)
+{
+ ENTRY;
+
+ if (success) {
+ __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov,
+ LOV_MAGIC, 0);
+ if (osfs->os_files != LOV_U64_MAX)
+ lov_do_div64(osfs->os_files, expected_stripes);
+ if (osfs->os_ffree != LOV_U64_MAX)
+ lov_do_div64(osfs->os_ffree, expected_stripes);
+
+ spin_lock(&obd->obd_osfs_lock);
+ memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
+ obd->obd_osfs_age = cfs_time_current_64();
+ spin_unlock(&obd->obd_osfs_lock);
+ RETURN(0);
+ }
+
+ RETURN(-EIO);
+}
+
+int lov_fini_statfs_set(struct lov_request_set *set)
+{
+ int rc = 0;
+ ENTRY;
+
+ if (set == NULL)
+ RETURN(0);
+
+ if (atomic_read(&set->set_completes)) {
+ rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
+ atomic_read(&set->set_success));
+ }
+ lov_put_reqset(set);
+ RETURN(rc);
+}
+
+void lov_update_statfs(struct obd_statfs *osfs, struct obd_statfs *lov_sfs,
+ int success)
+{
+ int shift = 0, quit = 0;
+ __u64 tmp;
+
+ if (success == 0) {
+ memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
+ } else {
+ if (osfs->os_bsize != lov_sfs->os_bsize) {
+ /* assume all block sizes are always powers of 2 */
+ /* get the bits difference */
+ tmp = osfs->os_bsize | lov_sfs->os_bsize;
+ for (shift = 0; shift <= 64; ++shift) {
+ if (tmp & 1) {
+ if (quit)
+ break;
+ else
+ quit = 1;
+ shift = 0;
+ }
+ tmp >>= 1;
+ }
+ }
+
+ if (osfs->os_bsize < lov_sfs->os_bsize) {
+ osfs->os_bsize = lov_sfs->os_bsize;
+
+ osfs->os_bfree >>= shift;
+ osfs->os_bavail >>= shift;
+ osfs->os_blocks >>= shift;
+ } else if (shift != 0) {
+ lov_sfs->os_bfree >>= shift;
+ lov_sfs->os_bavail >>= shift;
+ lov_sfs->os_blocks >>= shift;
+ }
+ osfs->os_bfree += lov_sfs->os_bfree;
+ osfs->os_bavail += lov_sfs->os_bavail;
+ osfs->os_blocks += lov_sfs->os_blocks;
+ /* XXX not sure about this one - depends on policy.
+ * - could be minimum if we always stripe on all OBDs
+ * (but that would be wrong for any other policy,
+ * if one of the OBDs has no more objects left)
+ * - could be sum if we stripe whole objects
+ * - could be average, just to give a nice number
+ *
+ * To give a "reasonable" (if not wholly accurate)
+ * number, we divide the total number of free objects
+ * by expected stripe count (watch out for overflow).
+ */
+ LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
+ LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
+ }
+}
+
+/* The callback for osc_statfs_async that finilizes a request info when a
+ * response is received. */
+static int cb_statfs_update(void *cookie, int rc)
+{
+ struct obd_info *oinfo = cookie;
+ struct lov_request *lovreq;
+ struct lov_request_set *set;
+ struct obd_statfs *osfs, *lov_sfs;
+ struct lov_obd *lov;
+ struct lov_tgt_desc *tgt;
+ struct obd_device *lovobd, *tgtobd;
+ int success;
+ ENTRY;
+
+ lovreq = container_of(oinfo, struct lov_request, rq_oi);
+ set = lovreq->rq_rqset;
+ lovobd = set->set_obd;
+ lov = &lovobd->u.lov;
+ osfs = set->set_oi->oi_osfs;
+ lov_sfs = oinfo->oi_osfs;
+ success = atomic_read(&set->set_success);
+ /* XXX: the same is done in lov_update_common_set, however
+ lovset->set_exp is not initialized. */
+ lov_update_set(set, lovreq, rc);
+ if (rc)
+ GOTO(out, rc);
+
+ obd_getref(lovobd);
+ tgt = lov->lov_tgts[lovreq->rq_idx];
+ if (!tgt || !tgt->ltd_active)
+ GOTO(out_update, rc);
+
+ tgtobd = class_exp2obd(tgt->ltd_exp);
+ spin_lock(&tgtobd->obd_osfs_lock);
+ memcpy(&tgtobd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
+ if ((oinfo->oi_flags & OBD_STATFS_FROM_CACHE) == 0)
+ tgtobd->obd_osfs_age = cfs_time_current_64();
+ spin_unlock(&tgtobd->obd_osfs_lock);
+
+out_update:
+ lov_update_statfs(osfs, lov_sfs, success);
+ obd_putref(lovobd);
+
+out:
+ if (set->set_oi->oi_flags & OBD_STATFS_PTLRPCD &&
+ lov_set_finished(set, 0)) {
+ lov_statfs_interpret(NULL, set, set->set_count !=
+ atomic_read(&set->set_success));
+ }
+
+ RETURN(0);
+}
+
+int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
+ struct lov_request_set **reqset)
+{
+ struct lov_request_set *set;
+ struct lov_obd *lov = &obd->u.lov;
+ int rc = 0, i;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_obd = obd;
+ set->set_oi = oinfo;
+
+ /* We only get block data from the OBD */
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ struct lov_request *req;
+
+ if (lov->lov_tgts[i] == NULL ||
+ (!lov_check_and_wait_active(lov, i) &&
+ (oinfo->oi_flags & OBD_STATFS_NODELAY))) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", i);
+ continue;
+ }
+
+ /* skip targets that have been explicitely disabled by the
+ * administrator */
+ if (!lov->lov_tgts[i]->ltd_exp) {
+ CDEBUG(D_HA, "lov idx %d administratively disabled\n", i);
+ continue;
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
+ if (req->rq_oi.oi_osfs == NULL) {
+ OBD_FREE(req, sizeof(*req));
+ GOTO(out_set, rc = -ENOMEM);
+ }
+
+ req->rq_idx = i;
+ req->rq_oi.oi_cb_up = cb_statfs_update;
+ req->rq_oi.oi_flags = oinfo->oi_flags;
+
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_statfs_set(set);
+ RETURN(rc);
+}
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_dev.c b/drivers/staging/lustre/lustre/lov/lovsub_dev.c
new file mode 100644
index 000000000000..204ecd0b8639
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lovsub_dev.c
@@ -0,0 +1,211 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_device and cl_device_type for LOVSUB layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include "lov_cl_internal.h"
+
+/** \addtogroup lov
+ * @{
+ */
+
+/*****************************************************************************
+ *
+ * Lovsub transfer operations.
+ *
+ */
+
+static void lovsub_req_completion(const struct lu_env *env,
+ const struct cl_req_slice *slice, int ioret)
+{
+ struct lovsub_req *lsr;
+
+ ENTRY;
+ lsr = cl2lovsub_req(slice);
+ OBD_SLAB_FREE_PTR(lsr, lovsub_req_kmem);
+ EXIT;
+}
+
+/**
+ * Implementation of struct cl_req_operations::cro_attr_set() for lovsub
+ * layer. Lov and lovsub are responsible only for struct obdo::o_stripe_idx
+ * field, which is filled there.
+ */
+static void lovsub_req_attr_set(const struct lu_env *env,
+ const struct cl_req_slice *slice,
+ const struct cl_object *obj,
+ struct cl_req_attr *attr, obd_valid flags)
+{
+ struct lovsub_object *subobj;
+
+ ENTRY;
+ subobj = cl2lovsub(obj);
+ /*
+ * There is no OBD_MD_* flag for obdo::o_stripe_idx, so set it
+ * unconditionally. It never changes anyway.
+ */
+ attr->cra_oa->o_stripe_idx = subobj->lso_index;
+ EXIT;
+}
+
+static const struct cl_req_operations lovsub_req_ops = {
+ .cro_attr_set = lovsub_req_attr_set,
+ .cro_completion = lovsub_req_completion
+};
+
+/*****************************************************************************
+ *
+ * Lov-sub device and device type functions.
+ *
+ */
+
+static int lovsub_device_init(const struct lu_env *env, struct lu_device *d,
+ const char *name, struct lu_device *next)
+{
+ struct lovsub_device *lsd = lu2lovsub_dev(d);
+ struct lu_device_type *ldt;
+ int rc;
+
+ ENTRY;
+ next->ld_site = d->ld_site;
+ ldt = next->ld_type;
+ LASSERT(ldt != NULL);
+ rc = ldt->ldt_ops->ldto_device_init(env, next, ldt->ldt_name, NULL);
+ if (rc) {
+ next->ld_site = NULL;
+ RETURN(rc);
+ }
+
+ lu_device_get(next);
+ lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
+ lsd->acid_next = lu2cl_dev(next);
+ RETURN(rc);
+}
+
+static struct lu_device *lovsub_device_fini(const struct lu_env *env,
+ struct lu_device *d)
+{
+ struct lu_device *next;
+ struct lovsub_device *lsd;
+
+ ENTRY;
+ lsd = lu2lovsub_dev(d);
+ next = cl2lu_dev(lsd->acid_next);
+ lsd->acid_super = NULL;
+ lsd->acid_next = NULL;
+ RETURN(next);
+}
+
+static struct lu_device *lovsub_device_free(const struct lu_env *env,
+ struct lu_device *d)
+{
+ struct lovsub_device *lsd = lu2lovsub_dev(d);
+ struct lu_device *next = cl2lu_dev(lsd->acid_next);
+
+ cl_device_fini(lu2cl_dev(d));
+ OBD_FREE_PTR(lsd);
+ return next;
+}
+
+static int lovsub_req_init(const struct lu_env *env, struct cl_device *dev,
+ struct cl_req *req)
+{
+ struct lovsub_req *lsr;
+ int result;
+
+ OBD_SLAB_ALLOC_PTR_GFP(lsr, lovsub_req_kmem, __GFP_IO);
+ if (lsr != NULL) {
+ cl_req_slice_add(req, &lsr->lsrq_cl, dev, &lovsub_req_ops);
+ result = 0;
+ } else
+ result = -ENOMEM;
+ return result;
+}
+
+static const struct lu_device_operations lovsub_lu_ops = {
+ .ldo_object_alloc = lovsub_object_alloc,
+ .ldo_process_config = NULL,
+ .ldo_recovery_complete = NULL
+};
+
+static const struct cl_device_operations lovsub_cl_ops = {
+ .cdo_req_init = lovsub_req_init
+};
+
+static struct lu_device *lovsub_device_alloc(const struct lu_env *env,
+ struct lu_device_type *t,
+ struct lustre_cfg *cfg)
+{
+ struct lu_device *d;
+ struct lovsub_device *lsd;
+
+ OBD_ALLOC_PTR(lsd);
+ if (lsd != NULL) {
+ int result;
+
+ result = cl_device_init(&lsd->acid_cl, t);
+ if (result == 0) {
+ d = lovsub2lu_dev(lsd);
+ d->ld_ops = &lovsub_lu_ops;
+ lsd->acid_cl.cd_ops = &lovsub_cl_ops;
+ } else
+ d = ERR_PTR(result);
+ } else
+ d = ERR_PTR(-ENOMEM);
+ return d;
+}
+
+static const struct lu_device_type_operations lovsub_device_type_ops = {
+ .ldto_device_alloc = lovsub_device_alloc,
+ .ldto_device_free = lovsub_device_free,
+
+ .ldto_device_init = lovsub_device_init,
+ .ldto_device_fini = lovsub_device_fini
+};
+
+#define LUSTRE_LOVSUB_NAME "lovsub"
+
+struct lu_device_type lovsub_device_type = {
+ .ldt_tags = LU_DEVICE_CL,
+ .ldt_name = LUSTRE_LOVSUB_NAME,
+ .ldt_ops = &lovsub_device_type_ops,
+ .ldt_ctx_tags = LCT_CL_THREAD
+};
+
+
+/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_io.c b/drivers/staging/lustre/lustre/lov/lovsub_io.c
new file mode 100644
index 000000000000..783ec687a4e7
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lovsub_io.c
@@ -0,0 +1,55 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_io for LOVSUB layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include "lov_cl_internal.h"
+
+/** \addtogroup lov
+ * @{
+ */
+
+/*****************************************************************************
+ *
+ * Lovsub io operations.
+ *
+ */
+
+/* All trivial */
+
+/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_lock.c b/drivers/staging/lustre/lustre/lov/lovsub_lock.c
new file mode 100644
index 000000000000..03bab17ccc64
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lovsub_lock.c
@@ -0,0 +1,485 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_lock for LOVSUB layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include "lov_cl_internal.h"
+
+/** \addtogroup lov
+ * @{
+ */
+
+/*****************************************************************************
+ *
+ * Lovsub lock operations.
+ *
+ */
+
+static void lovsub_lock_fini(const struct lu_env *env,
+ struct cl_lock_slice *slice)
+{
+ struct lovsub_lock *lsl;
+
+ ENTRY;
+ lsl = cl2lovsub_lock(slice);
+ LASSERT(list_empty(&lsl->lss_parents));
+ OBD_SLAB_FREE_PTR(lsl, lovsub_lock_kmem);
+ EXIT;
+}
+
+static void lovsub_parent_lock(const struct lu_env *env, struct lov_lock *lov)
+{
+ struct cl_lock *parent;
+
+ ENTRY;
+ parent = lov->lls_cl.cls_lock;
+ cl_lock_get(parent);
+ lu_ref_add(&parent->cll_reference, "lovsub-parent", current);
+ cl_lock_mutex_get(env, parent);
+ EXIT;
+}
+
+static void lovsub_parent_unlock(const struct lu_env *env, struct lov_lock *lov)
+{
+ struct cl_lock *parent;
+
+ ENTRY;
+ parent = lov->lls_cl.cls_lock;
+ cl_lock_mutex_put(env, lov->lls_cl.cls_lock);
+ lu_ref_del(&parent->cll_reference, "lovsub-parent", current);
+ cl_lock_put(env, parent);
+ EXIT;
+}
+
+/**
+ * Implements cl_lock_operations::clo_state() method for lovsub layer, which
+ * method is called whenever sub-lock state changes. Propagates state change
+ * to the top-locks.
+ */
+static void lovsub_lock_state(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ enum cl_lock_state state)
+{
+ struct lovsub_lock *sub = cl2lovsub_lock(slice);
+ struct lov_lock_link *scan;
+
+ LASSERT(cl_lock_is_mutexed(slice->cls_lock));
+ ENTRY;
+
+ list_for_each_entry(scan, &sub->lss_parents, lll_list) {
+ struct lov_lock *lov = scan->lll_super;
+ struct cl_lock *parent = lov->lls_cl.cls_lock;
+
+ if (sub->lss_active != parent) {
+ lovsub_parent_lock(env, lov);
+ cl_lock_signal(env, parent);
+ lovsub_parent_unlock(env, lov);
+ }
+ }
+ EXIT;
+}
+
+/**
+ * Implementation of cl_lock_operation::clo_weigh() estimating lock weight by
+ * asking parent lock.
+ */
+static unsigned long lovsub_lock_weigh(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct lovsub_lock *lock = cl2lovsub_lock(slice);
+ struct lov_lock *lov;
+ unsigned long dumbbell;
+
+ ENTRY;
+
+ LASSERT(cl_lock_is_mutexed(slice->cls_lock));
+
+ if (!list_empty(&lock->lss_parents)) {
+ /*
+ * It is not clear whether all parents have to be asked and
+ * their estimations summed, or it is enough to ask one. For
+ * the current usages, one is always enough.
+ */
+ lov = container_of(lock->lss_parents.next,
+ struct lov_lock_link, lll_list)->lll_super;
+
+ lovsub_parent_lock(env, lov);
+ dumbbell = cl_lock_weigh(env, lov->lls_cl.cls_lock);
+ lovsub_parent_unlock(env, lov);
+ } else
+ dumbbell = 0;
+
+ RETURN(dumbbell);
+}
+
+/**
+ * Maps start/end offsets within a stripe, to offsets within a file.
+ */
+static void lovsub_lock_descr_map(const struct cl_lock_descr *in,
+ struct lov_object *lov,
+ int stripe, struct cl_lock_descr *out)
+{
+ pgoff_t size; /* stripe size in pages */
+ pgoff_t skip; /* how many pages in every stripe are occupied by
+ * "other" stripes */
+ pgoff_t start;
+ pgoff_t end;
+
+ ENTRY;
+ start = in->cld_start;
+ end = in->cld_end;
+
+ if (lov->lo_lsm->lsm_stripe_count > 1) {
+ size = cl_index(lov2cl(lov), lov->lo_lsm->lsm_stripe_size);
+ skip = (lov->lo_lsm->lsm_stripe_count - 1) * size;
+
+ /* XXX overflow check here? */
+ start += start/size * skip + stripe * size;
+
+ if (end != CL_PAGE_EOF) {
+ end += end/size * skip + stripe * size;
+ /*
+ * And check for overflow...
+ */
+ if (end < in->cld_end)
+ end = CL_PAGE_EOF;
+ }
+ }
+ out->cld_start = start;
+ out->cld_end = end;
+ EXIT;
+}
+
+/**
+ * Adjusts parent lock extent when a sub-lock is attached to a parent. This is
+ * called in two ways:
+ *
+ * - as part of receive call-back, when server returns granted extent to
+ * the client, and
+ *
+ * - when top-lock finds existing sub-lock in the cache.
+ *
+ * Note, that lock mode is not propagated to the parent: i.e., if CLM_READ
+ * top-lock matches CLM_WRITE sub-lock, top-lock is still CLM_READ.
+ */
+int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov,
+ struct lovsub_lock *sublock,
+ const struct cl_lock_descr *d, int idx)
+{
+ struct cl_lock *parent;
+ struct lovsub_object *subobj;
+ struct cl_lock_descr *pd;
+ struct cl_lock_descr *parent_descr;
+ int result;
+
+ parent = lov->lls_cl.cls_lock;
+ parent_descr = &parent->cll_descr;
+ LASSERT(cl_lock_mode_match(d->cld_mode, parent_descr->cld_mode));
+
+ subobj = cl2lovsub(sublock->lss_cl.cls_obj);
+ pd = &lov_env_info(env)->lti_ldescr;
+
+ pd->cld_obj = parent_descr->cld_obj;
+ pd->cld_mode = parent_descr->cld_mode;
+ pd->cld_gid = parent_descr->cld_gid;
+ lovsub_lock_descr_map(d, subobj->lso_super, subobj->lso_index, pd);
+ lov->lls_sub[idx].sub_got = *d;
+ /*
+ * Notify top-lock about modification, if lock description changes
+ * materially.
+ */
+ if (!cl_lock_ext_match(parent_descr, pd))
+ result = cl_lock_modify(env, parent, pd);
+ else
+ result = 0;
+ return result;
+}
+
+static int lovsub_lock_modify(const struct lu_env *env,
+ const struct cl_lock_slice *s,
+ const struct cl_lock_descr *d)
+{
+ struct lovsub_lock *lock = cl2lovsub_lock(s);
+ struct lov_lock_link *scan;
+ struct lov_lock *lov;
+ int result = 0;
+
+ ENTRY;
+
+ LASSERT(cl_lock_mode_match(d->cld_mode,
+ s->cls_lock->cll_descr.cld_mode));
+ list_for_each_entry(scan, &lock->lss_parents, lll_list) {
+ int rc;
+
+ lov = scan->lll_super;
+ lovsub_parent_lock(env, lov);
+ rc = lov_sublock_modify(env, lov, lock, d, scan->lll_idx);
+ lovsub_parent_unlock(env, lov);
+ result = result ?: rc;
+ }
+ RETURN(result);
+}
+
+static int lovsub_lock_closure(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ struct cl_lock_closure *closure)
+{
+ struct lovsub_lock *sub;
+ struct cl_lock *parent;
+ struct lov_lock_link *scan;
+ int result;
+
+ LASSERT(cl_lock_is_mutexed(slice->cls_lock));
+ ENTRY;
+
+ sub = cl2lovsub_lock(slice);
+ result = 0;
+
+ list_for_each_entry(scan, &sub->lss_parents, lll_list) {
+ parent = scan->lll_super->lls_cl.cls_lock;
+ result = cl_lock_closure_build(env, parent, closure);
+ if (result != 0)
+ break;
+ }
+ RETURN(result);
+}
+
+/**
+ * A helper function for lovsub_lock_delete() that deals with a given parent
+ * top-lock.
+ */
+static int lovsub_lock_delete_one(const struct lu_env *env,
+ struct cl_lock *child, struct lov_lock *lov)
+{
+ struct cl_lock *parent;
+ int result;
+ ENTRY;
+
+ parent = lov->lls_cl.cls_lock;
+ if (parent->cll_error)
+ RETURN(0);
+
+ result = 0;
+ switch (parent->cll_state) {
+ case CLS_ENQUEUED:
+ /* See LU-1355 for the case that a glimpse lock is
+ * interrupted by signal */
+ LASSERT(parent->cll_flags & CLF_CANCELLED);
+ break;
+ case CLS_QUEUING:
+ case CLS_FREEING:
+ cl_lock_signal(env, parent);
+ break;
+ case CLS_INTRANSIT:
+ /*
+ * Here lies a problem: a sub-lock is canceled while top-lock
+ * is being unlocked. Top-lock cannot be moved into CLS_NEW
+ * state, because unlocking has to succeed eventually by
+ * placing lock into CLS_CACHED (or failing it), see
+ * cl_unuse_try(). Nor can top-lock be left in CLS_CACHED
+ * state, because lov maintains an invariant that all
+ * sub-locks exist in CLS_CACHED (this allows cached top-lock
+ * to be reused immediately). Nor can we wait for top-lock
+ * state to change, because this can be synchronous to the
+ * current thread.
+ *
+ * We know for sure that lov_lock_unuse() will be called at
+ * least one more time to finish un-using, so leave a mark on
+ * the top-lock, that will be seen by the next call to
+ * lov_lock_unuse().
+ */
+ if (cl_lock_is_intransit(parent))
+ lov->lls_cancel_race = 1;
+ break;
+ case CLS_CACHED:
+ /*
+ * if a sub-lock is canceled move its top-lock into CLS_NEW
+ * state to preserve an invariant that a top-lock in
+ * CLS_CACHED is immediately ready for re-use (i.e., has all
+ * sub-locks), and so that next attempt to re-use the top-lock
+ * enqueues missing sub-lock.
+ */
+ cl_lock_state_set(env, parent, CLS_NEW);
+ /* fall through */
+ case CLS_NEW:
+ /*
+ * if last sub-lock is canceled, destroy the top-lock (which
+ * is now `empty') proactively.
+ */
+ if (lov->lls_nr_filled == 0) {
+ /* ... but unfortunately, this cannot be done easily,
+ * as cancellation of a top-lock might acquire mutices
+ * of its other sub-locks, violating lock ordering,
+ * see cl_lock_{cancel,delete}() preconditions.
+ *
+ * To work around this, the mutex of this sub-lock is
+ * released, top-lock is destroyed, and sub-lock mutex
+ * acquired again. The list of parents has to be
+ * re-scanned from the beginning after this.
+ *
+ * Only do this if no mutices other than on @child and
+ * @parent are held by the current thread.
+ *
+ * TODO: The lock modal here is too complex, because
+ * the lock may be canceled and deleted by voluntarily:
+ * cl_lock_request
+ * -> osc_lock_enqueue_wait
+ * -> osc_lock_cancel_wait
+ * -> cl_lock_delete
+ * -> lovsub_lock_delete
+ * -> cl_lock_cancel/delete
+ * -> ...
+ *
+ * The better choice is to spawn a kernel thread for
+ * this purpose. -jay
+ */
+ if (cl_lock_nr_mutexed(env) == 2) {
+ cl_lock_mutex_put(env, child);
+ cl_lock_cancel(env, parent);
+ cl_lock_delete(env, parent);
+ result = 1;
+ }
+ }
+ break;
+ case CLS_HELD:
+ CL_LOCK_DEBUG(D_ERROR, env, parent, "Delete CLS_HELD lock\n");
+ default:
+ CERROR("Impossible state: %d\n", parent->cll_state);
+ LBUG();
+ break;
+ }
+
+ RETURN(result);
+}
+
+/**
+ * An implementation of cl_lock_operations::clo_delete() method. This is
+ * invoked in "bottom-to-top" delete, when lock destruction starts from the
+ * sub-lock (e.g, as a result of ldlm lock LRU policy).
+ */
+static void lovsub_lock_delete(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct cl_lock *child = slice->cls_lock;
+ struct lovsub_lock *sub = cl2lovsub_lock(slice);
+ int restart;
+
+ LASSERT(cl_lock_is_mutexed(child));
+
+ ENTRY;
+ /*
+ * Destruction of a sub-lock might take multiple iterations, because
+ * when the last sub-lock of a given top-lock is deleted, top-lock is
+ * canceled proactively, and this requires to release sub-lock
+ * mutex. Once sub-lock mutex has been released, list of its parents
+ * has to be re-scanned from the beginning.
+ */
+ do {
+ struct lov_lock *lov;
+ struct lov_lock_link *scan;
+ struct lov_lock_link *temp;
+ struct lov_lock_sub *subdata;
+
+ restart = 0;
+ list_for_each_entry_safe(scan, temp,
+ &sub->lss_parents, lll_list) {
+ lov = scan->lll_super;
+ subdata = &lov->lls_sub[scan->lll_idx];
+ lovsub_parent_lock(env, lov);
+ subdata->sub_got = subdata->sub_descr;
+ lov_lock_unlink(env, scan, sub);
+ restart = lovsub_lock_delete_one(env, child, lov);
+ lovsub_parent_unlock(env, lov);
+
+ if (restart) {
+ cl_lock_mutex_get(env, child);
+ break;
+ }
+ }
+ } while (restart);
+ EXIT;
+}
+
+static int lovsub_lock_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct cl_lock_slice *slice)
+{
+ struct lovsub_lock *sub = cl2lovsub_lock(slice);
+ struct lov_lock *lov;
+ struct lov_lock_link *scan;
+
+ list_for_each_entry(scan, &sub->lss_parents, lll_list) {
+ lov = scan->lll_super;
+ (*p)(env, cookie, "[%d %p ", scan->lll_idx, lov);
+ if (lov != NULL)
+ cl_lock_descr_print(env, cookie, p,
+ &lov->lls_cl.cls_lock->cll_descr);
+ (*p)(env, cookie, "] ");
+ }
+ return 0;
+}
+
+static const struct cl_lock_operations lovsub_lock_ops = {
+ .clo_fini = lovsub_lock_fini,
+ .clo_state = lovsub_lock_state,
+ .clo_delete = lovsub_lock_delete,
+ .clo_modify = lovsub_lock_modify,
+ .clo_closure = lovsub_lock_closure,
+ .clo_weigh = lovsub_lock_weigh,
+ .clo_print = lovsub_lock_print
+};
+
+int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_lock *lock, const struct cl_io *io)
+{
+ struct lovsub_lock *lsk;
+ int result;
+
+ ENTRY;
+ OBD_SLAB_ALLOC_PTR_GFP(lsk, lovsub_lock_kmem, __GFP_IO);
+ if (lsk != NULL) {
+ INIT_LIST_HEAD(&lsk->lss_parents);
+ cl_lock_slice_add(lock, &lsk->lss_cl, obj, &lovsub_lock_ops);
+ result = 0;
+ } else
+ result = -ENOMEM;
+ RETURN(result);
+}
+
+/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_object.c b/drivers/staging/lustre/lustre/lov/lovsub_object.c
new file mode 100644
index 000000000000..1b83d9081c40
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lovsub_object.c
@@ -0,0 +1,170 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_object for LOVSUB layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include "lov_cl_internal.h"
+
+/** \addtogroup lov
+ * @{
+ */
+
+/*****************************************************************************
+ *
+ * Lovsub object operations.
+ *
+ */
+
+int lovsub_object_init(const struct lu_env *env, struct lu_object *obj,
+ const struct lu_object_conf *conf)
+{
+ struct lovsub_device *dev = lu2lovsub_dev(obj->lo_dev);
+ struct lu_object *below;
+ struct lu_device *under;
+
+ int result;
+
+ ENTRY;
+ under = &dev->acid_next->cd_lu_dev;
+ below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under);
+ if (below != NULL) {
+ lu_object_add(obj, below);
+ cl_object_page_init(lu2cl(obj), sizeof(struct lovsub_page));
+ result = 0;
+ } else
+ result = -ENOMEM;
+ RETURN(result);
+
+}
+
+static void lovsub_object_free(const struct lu_env *env, struct lu_object *obj)
+{
+ struct lovsub_object *los = lu2lovsub(obj);
+ struct lov_object *lov = los->lso_super;
+ ENTRY;
+
+ /* We can't assume lov was assigned here, because of the shadow
+ * object handling in lu_object_find.
+ */
+ if (lov) {
+ LASSERT(lov->lo_type == LLT_RAID0);
+ LASSERT(lov->u.raid0.lo_sub[los->lso_index] == los);
+ spin_lock(&lov->u.raid0.lo_sub_lock);
+ lov->u.raid0.lo_sub[los->lso_index] = NULL;
+ spin_unlock(&lov->u.raid0.lo_sub_lock);
+ }
+
+ lu_object_fini(obj);
+ lu_object_header_fini(&los->lso_header.coh_lu);
+ OBD_SLAB_FREE_PTR(los, lovsub_object_kmem);
+ EXIT;
+}
+
+static int lovsub_object_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct lu_object *obj)
+{
+ struct lovsub_object *los = lu2lovsub(obj);
+
+ return (*p)(env, cookie, "[%d]", los->lso_index);
+}
+
+static int lovsub_attr_set(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned valid)
+{
+ struct lov_object *lov = cl2lovsub(obj)->lso_super;
+
+ ENTRY;
+ lov_r0(lov)->lo_attr_valid = 0;
+ RETURN(0);
+}
+
+static int lovsub_object_glimpse(const struct lu_env *env,
+ const struct cl_object *obj,
+ struct ost_lvb *lvb)
+{
+ struct lovsub_object *los = cl2lovsub(obj);
+
+ ENTRY;
+ RETURN(cl_object_glimpse(env, &los->lso_super->lo_cl, lvb));
+}
+
+
+
+static const struct cl_object_operations lovsub_ops = {
+ .coo_page_init = lovsub_page_init,
+ .coo_lock_init = lovsub_lock_init,
+ .coo_attr_set = lovsub_attr_set,
+ .coo_glimpse = lovsub_object_glimpse
+};
+
+static const struct lu_object_operations lovsub_lu_obj_ops = {
+ .loo_object_init = lovsub_object_init,
+ .loo_object_delete = NULL,
+ .loo_object_release = NULL,
+ .loo_object_free = lovsub_object_free,
+ .loo_object_print = lovsub_object_print,
+ .loo_object_invariant = NULL
+};
+
+struct lu_object *lovsub_object_alloc(const struct lu_env *env,
+ const struct lu_object_header *unused,
+ struct lu_device *dev)
+{
+ struct lovsub_object *los;
+ struct lu_object *obj;
+
+ ENTRY;
+ OBD_SLAB_ALLOC_PTR_GFP(los, lovsub_object_kmem, __GFP_IO);
+ if (los != NULL) {
+ struct cl_object_header *hdr;
+
+ obj = lovsub2lu(los);
+ hdr = &los->lso_header;
+ cl_object_header_init(hdr);
+ lu_object_init(obj, &hdr->coh_lu, dev);
+ lu_object_add_top(&hdr->coh_lu, obj);
+ los->lso_cl.co_ops = &lovsub_ops;
+ obj->lo_ops = &lovsub_lu_obj_ops;
+ } else
+ obj = NULL;
+ RETURN(obj);
+}
+
+/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_page.c b/drivers/staging/lustre/lustre/lov/lovsub_page.c
new file mode 100644
index 000000000000..bc9e683968da
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lovsub_page.c
@@ -0,0 +1,72 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_page for LOVSUB layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOV
+
+#include "lov_cl_internal.h"
+
+/** \addtogroup lov
+ * @{
+ */
+
+/*****************************************************************************
+ *
+ * Lovsub page operations.
+ *
+ */
+
+static void lovsub_page_fini(const struct lu_env *env,
+ struct cl_page_slice *slice)
+{
+}
+
+static const struct cl_page_operations lovsub_page_ops = {
+ .cpo_fini = lovsub_page_fini
+};
+
+int lovsub_page_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_page *page, struct page *unused)
+{
+ struct lovsub_page *lsb = cl_object_page_slice(obj, page);
+ ENTRY;
+
+ cl_page_slice_add(page, &lsb->lsb_cl, obj, &lovsub_page_ops);
+ RETURN(0);
+}
+
+/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lproc_lov.c b/drivers/staging/lustre/lustre/lov/lproc_lov.c
new file mode 100644
index 000000000000..5b2c0d88add1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lov/lproc_lov.c
@@ -0,0 +1,302 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/version.h>
+#include <asm/statfs.h>
+#include <lprocfs_status.h>
+#include <obd_class.h>
+#include <linux/seq_file.h>
+#include "lov_internal.h"
+
+#ifdef LPROCFS
+static int lov_stripesize_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = (struct obd_device *)m->private;
+ struct lov_desc *desc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ return seq_printf(m, LPU64"\n", desc->ld_default_stripe_size);
+}
+
+static ssize_t lov_stripesize_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
+ struct lov_desc *desc;
+ __u64 val;
+ int rc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ rc = lprocfs_write_u64_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ lov_fix_desc_stripe_size(&val);
+ desc->ld_default_stripe_size = val;
+ return count;
+}
+LPROC_SEQ_FOPS(lov_stripesize);
+
+static int lov_stripeoffset_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = (struct obd_device *)m->private;
+ struct lov_desc *desc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ return seq_printf(m, LPU64"\n", desc->ld_default_stripe_offset);
+}
+
+static ssize_t lov_stripeoffset_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
+ struct lov_desc *desc;
+ __u64 val;
+ int rc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ rc = lprocfs_write_u64_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ desc->ld_default_stripe_offset = val;
+ return count;
+}
+LPROC_SEQ_FOPS(lov_stripeoffset);
+
+static int lov_stripetype_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = (struct obd_device *)m->private;
+ struct lov_desc *desc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ return seq_printf(m, "%u\n", desc->ld_pattern);
+}
+
+static ssize_t lov_stripetype_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
+ struct lov_desc *desc;
+ int val, rc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ lov_fix_desc_pattern(&val);
+ desc->ld_pattern = val;
+ return count;
+}
+LPROC_SEQ_FOPS(lov_stripetype);
+
+static int lov_stripecount_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = (struct obd_device *)m->private;
+ struct lov_desc *desc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ return seq_printf(m, "%d\n",
+ (__s16)(desc->ld_default_stripe_count + 1) - 1);
+}
+
+static ssize_t lov_stripecount_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
+ struct lov_desc *desc;
+ int val, rc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ lov_fix_desc_stripe_count(&val);
+ desc->ld_default_stripe_count = val;
+ return count;
+}
+LPROC_SEQ_FOPS(lov_stripecount);
+
+static int lov_numobd_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = (struct obd_device *)m->private;
+ struct lov_desc *desc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ return seq_printf(m, "%u\n", desc->ld_tgt_count);
+}
+LPROC_SEQ_FOPS_RO(lov_numobd);
+
+static int lov_activeobd_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = (struct obd_device *)m->private;
+ struct lov_desc *desc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ return seq_printf(m, "%u\n", desc->ld_active_tgt_count);
+}
+LPROC_SEQ_FOPS_RO(lov_activeobd);
+
+static int lov_desc_uuid_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = (struct obd_device *)m->private;
+ struct lov_obd *lov;
+
+ LASSERT(dev != NULL);
+ lov = &dev->u.lov;
+ return seq_printf(m, "%s\n", lov->desc.ld_uuid.uuid);
+}
+LPROC_SEQ_FOPS_RO(lov_desc_uuid);
+
+static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos)
+{
+ struct obd_device *dev = p->private;
+ struct lov_obd *lov = &dev->u.lov;
+
+ while (*pos < lov->desc.ld_tgt_count) {
+ if (lov->lov_tgts[*pos])
+ return lov->lov_tgts[*pos];
+ ++*pos;
+ }
+ return NULL;
+}
+
+static void lov_tgt_seq_stop(struct seq_file *p, void *v)
+{
+}
+
+static void *lov_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
+{
+ struct obd_device *dev = p->private;
+ struct lov_obd *lov = &dev->u.lov;
+
+ while (++*pos < lov->desc.ld_tgt_count) {
+ if (lov->lov_tgts[*pos])
+ return lov->lov_tgts[*pos];
+ }
+ return NULL;
+}
+
+static int lov_tgt_seq_show(struct seq_file *p, void *v)
+{
+ struct lov_tgt_desc *tgt = v;
+ return seq_printf(p, "%d: %s %sACTIVE\n", tgt->ltd_index,
+ obd_uuid2str(&tgt->ltd_uuid),
+ tgt->ltd_active ? "" : "IN");
+}
+
+struct seq_operations lov_tgt_sops = {
+ .start = lov_tgt_seq_start,
+ .stop = lov_tgt_seq_stop,
+ .next = lov_tgt_seq_next,
+ .show = lov_tgt_seq_show,
+};
+
+static int lov_target_seq_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int rc;
+
+ rc = seq_open(file, &lov_tgt_sops);
+ if (rc)
+ return rc;
+
+ seq = file->private_data;
+ seq->private = PDE_DATA(inode);
+ return 0;
+}
+
+LPROC_SEQ_FOPS_RO_TYPE(lov, uuid);
+LPROC_SEQ_FOPS_RO_TYPE(lov, filestotal);
+LPROC_SEQ_FOPS_RO_TYPE(lov, filesfree);
+LPROC_SEQ_FOPS_RO_TYPE(lov, blksize);
+LPROC_SEQ_FOPS_RO_TYPE(lov, kbytestotal);
+LPROC_SEQ_FOPS_RO_TYPE(lov, kbytesfree);
+LPROC_SEQ_FOPS_RO_TYPE(lov, kbytesavail);
+
+struct lprocfs_vars lprocfs_lov_obd_vars[] = {
+ { "uuid", &lov_uuid_fops, 0, 0 },
+ { "stripesize", &lov_stripesize_fops, 0 },
+ { "stripeoffset", &lov_stripeoffset_fops, 0 },
+ { "stripecount", &lov_stripecount_fops, 0 },
+ { "stripetype", &lov_stripetype_fops, 0 },
+ { "numobd", &lov_numobd_fops, 0, 0 },
+ { "activeobd", &lov_activeobd_fops, 0, 0 },
+ { "filestotal", &lov_filestotal_fops, 0, 0 },
+ { "filesfree", &lov_filesfree_fops, 0, 0 },
+ /*{ "filegroups", lprocfs_rd_filegroups, 0, 0 },*/
+ { "blocksize", &lov_blksize_fops, 0, 0 },
+ { "kbytestotal", &lov_kbytestotal_fops, 0, 0 },
+ { "kbytesfree", &lov_kbytesfree_fops, 0, 0 },
+ { "kbytesavail", &lov_kbytesavail_fops, 0, 0 },
+ { "desc_uuid", &lov_desc_uuid_fops, 0, 0 },
+ { 0 }
+};
+
+LPROC_SEQ_FOPS_RO_TYPE(lov, numrefs);
+
+static struct lprocfs_vars lprocfs_lov_module_vars[] = {
+ { "num_refs", &lov_numrefs_fops, 0, 0 },
+ { 0 }
+};
+
+void lprocfs_lov_init_vars(struct lprocfs_static_vars *lvars)
+{
+ lvars->module_vars = lprocfs_lov_module_vars;
+ lvars->obd_vars = lprocfs_lov_obd_vars;
+}
+
+struct file_operations lov_proc_target_fops = {
+ .owner = THIS_MODULE,
+ .open = lov_target_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = lprocfs_seq_release,
+};
+#endif /* LPROCFS */
diff --git a/drivers/staging/lustre/lustre/lvfs/Makefile b/drivers/staging/lustre/lustre/lvfs/Makefile
new file mode 100644
index 000000000000..f50b1c574385
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lvfs/Makefile
@@ -0,0 +1,6 @@
+obj-$(CONFIG_LUSTRE_FS) += lvfs.o
+
+lvfs-y := lvfs_linux.o fsfilt.o lvfs_lib.o
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lustre/lvfs/fsfilt.c b/drivers/staging/lustre/lustre/lvfs/fsfilt.c
new file mode 100644
index 000000000000..064445cbdb57
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lvfs/fsfilt.c
@@ -0,0 +1,138 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/slab.h>
+#include <linux/libcfs/libcfs.h>
+#include <lustre_fsfilt.h>
+
+LIST_HEAD(fsfilt_types);
+
+static struct fsfilt_operations *fsfilt_search_type(const char *type)
+{
+ struct fsfilt_operations *found;
+ struct list_head *p;
+
+ list_for_each(p, &fsfilt_types) {
+ found = list_entry(p, struct fsfilt_operations, fs_list);
+ if (!strcmp(found->fs_type, type)) {
+ return found;
+ }
+ }
+ return NULL;
+}
+
+int fsfilt_register_ops(struct fsfilt_operations *fs_ops)
+{
+ struct fsfilt_operations *found;
+
+ /* lock fsfilt_types list */
+ if ((found = fsfilt_search_type(fs_ops->fs_type))) {
+ if (found != fs_ops) {
+ CERROR("different operations for type %s\n",
+ fs_ops->fs_type);
+ /* unlock fsfilt_types list */
+ RETURN(-EEXIST);
+ }
+ } else {
+ try_module_get(THIS_MODULE);
+ list_add(&fs_ops->fs_list, &fsfilt_types);
+ }
+
+ /* unlock fsfilt_types list */
+ return 0;
+}
+EXPORT_SYMBOL(fsfilt_register_ops);
+
+void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops)
+{
+ struct list_head *p;
+
+ /* lock fsfilt_types list */
+ list_for_each(p, &fsfilt_types) {
+ struct fsfilt_operations *found;
+
+ found = list_entry(p, typeof(*found), fs_list);
+ if (found == fs_ops) {
+ list_del(p);
+ module_put(THIS_MODULE);
+ break;
+ }
+ }
+ /* unlock fsfilt_types list */
+}
+EXPORT_SYMBOL(fsfilt_unregister_ops);
+
+struct fsfilt_operations *fsfilt_get_ops(const char *type)
+{
+ struct fsfilt_operations *fs_ops;
+
+ /* lock fsfilt_types list */
+ if (!(fs_ops = fsfilt_search_type(type))) {
+ char name[32];
+ int rc;
+
+ snprintf(name, sizeof(name) - 1, "fsfilt_%s", type);
+ name[sizeof(name) - 1] = '\0';
+
+ if (!(rc = request_module("%s", name))) {
+ fs_ops = fsfilt_search_type(type);
+ CDEBUG(D_INFO, "Loaded module '%s'\n", name);
+ if (!fs_ops)
+ rc = -ENOENT;
+ }
+
+ if (rc) {
+ CERROR("Can't find %s interface\n", name);
+ RETURN(ERR_PTR(rc < 0 ? rc : -rc));
+ /* unlock fsfilt_types list */
+ }
+ }
+ try_module_get(fs_ops->fs_owner);
+ /* unlock fsfilt_types list */
+
+ return fs_ops;
+}
+EXPORT_SYMBOL(fsfilt_get_ops);
+
+void fsfilt_put_ops(struct fsfilt_operations *fs_ops)
+{
+ module_put(fs_ops->fs_owner);
+}
+EXPORT_SYMBOL(fsfilt_put_ops);
diff --git a/drivers/staging/lustre/lustre/lvfs/fsfilt_ext3.c b/drivers/staging/lustre/lustre/lvfs/fsfilt_ext3.c
new file mode 100644
index 000000000000..c1e99b37572e
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lvfs/fsfilt_ext3.c
@@ -0,0 +1,761 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lvfs/fsfilt_ext3.c
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <ldiskfs/ldiskfs_config.h>
+#include <ext4/ext4.h>
+#include <ext4/ext4_jbd2.h>
+#include <linux/version.h>
+#include <linux/bitops.h>
+#include <linux/quota.h>
+
+#include <linux/libcfs/libcfs.h>
+#include <lustre_fsfilt.h>
+#include <obd.h>
+#include <linux/lustre_compat25.h>
+#include <linux/lprocfs_status.h>
+
+#include <ext4/ext4_extents.h>
+
+#ifdef HAVE_EXT_PBLOCK /* Name changed to ext4_ext_pblock for kernel 2.6.35 */
+#define ext3_ext_pblock(ex) ext_pblock((ex))
+#endif
+
+/* for kernels 2.6.18 and later */
+#define FSFILT_SINGLEDATA_TRANS_BLOCKS(sb) EXT3_SINGLEDATA_TRANS_BLOCKS(sb)
+
+#define fsfilt_ext3_ext_insert_extent(handle, inode, path, newext, flag) \
+ ext3_ext_insert_extent(handle, inode, path, newext, flag)
+
+#define ext3_mb_discard_inode_preallocations(inode) \
+ ext3_discard_preallocations(inode)
+
+#define fsfilt_log_start_commit(journal, tid) jbd2_log_start_commit(journal, tid)
+#define fsfilt_log_wait_commit(journal, tid) jbd2_log_wait_commit(journal, tid)
+
+static struct kmem_cache *fcb_cache;
+
+struct fsfilt_cb_data {
+ struct ext4_journal_cb_entry cb_jcb; /* private data - MUST BE FIRST */
+ fsfilt_cb_t cb_func; /* MDS/OBD completion function */
+ struct obd_device *cb_obd; /* MDS/OBD completion device */
+ __u64 cb_last_rcvd; /* MDS/OST last committed operation */
+ void *cb_data; /* MDS/OST completion function data */
+};
+
+static char *fsfilt_ext3_get_label(struct super_block *sb)
+{
+ return EXT3_SB(sb)->s_es->s_volume_name;
+}
+
+/* kernel has ext4_blocks_for_truncate since linux-3.1.1 */
+# include <ext4/truncate.h>
+
+/*
+ * We don't currently need any additional blocks for rmdir and
+ * unlink transactions because we are storing the OST oa_id inside
+ * the inode (which we will be changing anyways as part of this
+ * transaction).
+ */
+static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
+ int logs)
+{
+ /* For updates to the last received file */
+ int nblocks = FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb);
+ journal_t *journal;
+ void *handle;
+
+ if (current->journal_info) {
+ CDEBUG(D_INODE, "increasing refcount on %p\n",
+ current->journal_info);
+ goto journal_start;
+ }
+
+ switch(op) {
+ case FSFILT_OP_UNLINK:
+ /* delete one file + create/update logs for each stripe */
+ nblocks += EXT3_DELETE_TRANS_BLOCKS(inode->i_sb);
+ nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
+ FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb)) * logs;
+ break;
+ case FSFILT_OP_CANCEL_UNLINK:
+ LASSERT(logs == 1);
+
+ /* blocks for log header bitmap update OR
+ * blocks for catalog header bitmap update + unlink of logs +
+ * blocks for delete the inode (include blocks truncating). */
+ nblocks = (LLOG_CHUNK_SIZE >> inode->i_blkbits) +
+ EXT3_DELETE_TRANS_BLOCKS(inode->i_sb) +
+ ext4_blocks_for_truncate(inode) + 3;
+ break;
+ default: CERROR("unknown transaction start op %d\n", op);
+ LBUG();
+ }
+
+ LASSERT(current->journal_info == desc_private);
+ journal = EXT3_SB(inode->i_sb)->s_journal;
+ if (nblocks > journal->j_max_transaction_buffers) {
+ CWARN("too many credits %d for op %ux%u using %d instead\n",
+ nblocks, op, logs, journal->j_max_transaction_buffers);
+ nblocks = journal->j_max_transaction_buffers;
+ }
+
+ journal_start:
+ LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks);
+ handle = ext3_journal_start(inode, nblocks);
+
+ if (!IS_ERR(handle))
+ LASSERT(current->journal_info == handle);
+ else
+ CERROR("error starting handle for op %u (%u credits): rc %ld\n",
+ op, nblocks, PTR_ERR(handle));
+ return handle;
+}
+
+static int fsfilt_ext3_commit(struct inode *inode, void *h, int force_sync)
+{
+ int rc;
+ handle_t *handle = h;
+
+ LASSERT(current->journal_info == handle);
+ if (force_sync)
+ handle->h_sync = 1; /* recovery likes this */
+
+ rc = ext3_journal_stop(handle);
+
+ return rc;
+}
+
+#ifndef EXT3_EXTENTS_FL
+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */
+#endif
+
+#ifndef EXT_ASSERT
+#define EXT_ASSERT(cond) BUG_ON(!(cond))
+#endif
+
+#define EXT_GENERATION(inode) (EXT4_I(inode)->i_ext_generation)
+#define ext3_ext_base inode
+#define ext3_ext_base2inode(inode) (inode)
+#define EXT_DEPTH(inode) ext_depth(inode)
+#define fsfilt_ext3_ext_walk_space(inode, block, num, cb, cbdata) \
+ ext3_ext_walk_space(inode, block, num, cb, cbdata);
+
+struct bpointers {
+ unsigned long *blocks;
+ unsigned long start;
+ int num;
+ int init_num;
+ int create;
+};
+
+static long ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path,
+ unsigned long block, int *aflags)
+{
+ struct ext3_inode_info *ei = EXT3_I(inode);
+ unsigned long bg_start;
+ unsigned long colour;
+ int depth;
+
+ if (path) {
+ struct ext3_extent *ex;
+ depth = path->p_depth;
+
+ /* try to predict block placement */
+ if ((ex = path[depth].p_ext))
+ return ext4_ext_pblock(ex) + (block - le32_to_cpu(ex->ee_block));
+
+ /* it looks index is empty
+ * try to find starting from index itself */
+ if (path[depth].p_bh)
+ return path[depth].p_bh->b_blocknr;
+ }
+
+ /* OK. use inode's group */
+ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
+ colour = (current->pid % 16) *
+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+ return bg_start + colour + block;
+}
+
+#define ll_unmap_underlying_metadata(sb, blocknr) \
+ unmap_underlying_metadata((sb)->s_bdev, blocknr)
+
+#ifndef EXT3_MB_HINT_GROUP_ALLOC
+static unsigned long new_blocks(handle_t *handle, struct ext3_ext_base *base,
+ struct ext3_ext_path *path, unsigned long block,
+ unsigned long *count, int *err)
+{
+ unsigned long pblock, goal;
+ int aflags = 0;
+ struct inode *inode = ext3_ext_base2inode(base);
+
+ goal = ext3_ext_find_goal(inode, path, block, &aflags);
+ aflags |= 2; /* block have been already reserved */
+ pblock = ext3_mb_new_blocks(handle, inode, goal, count, aflags, err);
+ return pblock;
+
+}
+#else
+static unsigned long new_blocks(handle_t *handle, struct ext3_ext_base *base,
+ struct ext3_ext_path *path, unsigned long block,
+ unsigned long *count, int *err)
+{
+ struct inode *inode = ext3_ext_base2inode(base);
+ struct ext3_allocation_request ar;
+ unsigned long pblock;
+ int aflags;
+
+ /* find neighbour allocated blocks */
+ ar.lleft = block;
+ *err = ext3_ext_search_left(base, path, &ar.lleft, &ar.pleft);
+ if (*err)
+ return 0;
+ ar.lright = block;
+ *err = ext3_ext_search_right(base, path, &ar.lright, &ar.pright);
+ if (*err)
+ return 0;
+
+ /* allocate new block */
+ ar.goal = ext3_ext_find_goal(inode, path, block, &aflags);
+ ar.inode = inode;
+ ar.logical = block;
+ ar.len = *count;
+ ar.flags = EXT3_MB_HINT_DATA;
+ pblock = ext3_mb_new_blocks(handle, &ar, err);
+ *count = ar.len;
+ return pblock;
+}
+#endif
+
+static int ext3_ext_new_extent_cb(struct ext3_ext_base *base,
+ struct ext3_ext_path *path,
+ struct ext3_ext_cache *cex,
+#ifdef HAVE_EXT_PREPARE_CB_EXTENT
+ struct ext3_extent *ex,
+#endif
+ void *cbdata)
+{
+ struct bpointers *bp = cbdata;
+ struct inode *inode = ext3_ext_base2inode(base);
+ struct ext3_extent nex;
+ unsigned long pblock;
+ unsigned long tgen;
+ int err, i;
+ unsigned long count;
+ handle_t *handle;
+
+#ifdef EXT3_EXT_CACHE_EXTENT
+ if (cex->ec_type == EXT3_EXT_CACHE_EXTENT)
+#else
+ if ((cex->ec_len != 0) && (cex->ec_start != 0))
+#endif
+ {
+ err = EXT_CONTINUE;
+ goto map;
+ }
+
+ if (bp->create == 0) {
+ i = 0;
+ if (cex->ec_block < bp->start)
+ i = bp->start - cex->ec_block;
+ if (i >= cex->ec_len)
+ CERROR("nothing to do?! i = %d, e_num = %u\n",
+ i, cex->ec_len);
+ for (; i < cex->ec_len && bp->num; i++) {
+ *(bp->blocks) = 0;
+ bp->blocks++;
+ bp->num--;
+ bp->start++;
+ }
+
+ return EXT_CONTINUE;
+ }
+
+ tgen = EXT_GENERATION(base);
+ count = ext3_ext_calc_credits_for_insert(base, path);
+
+ handle = ext3_journal_start(inode, count+EXT3_ALLOC_NEEDED+1);
+ if (IS_ERR(handle)) {
+ return PTR_ERR(handle);
+ }
+
+ if (tgen != EXT_GENERATION(base)) {
+ /* the tree has changed. so path can be invalid at moment */
+ ext3_journal_stop(handle);
+ return EXT_REPEAT;
+ }
+
+ /* In 2.6.32 kernel, ext4_ext_walk_space()'s callback func is not
+ * protected by i_data_sem as whole. so we patch it to store
+ * generation to path and now verify the tree hasn't changed */
+ down_write((&EXT4_I(inode)->i_data_sem));
+
+ /* validate extent, make sure the extent tree does not changed */
+ if (EXT_GENERATION(base) != path[0].p_generation) {
+ /* cex is invalid, try again */
+ up_write(&EXT4_I(inode)->i_data_sem);
+ ext3_journal_stop(handle);
+ return EXT_REPEAT;
+ }
+
+ count = cex->ec_len;
+ pblock = new_blocks(handle, base, path, cex->ec_block, &count, &err);
+ if (!pblock)
+ goto out;
+ EXT_ASSERT(count <= cex->ec_len);
+
+ /* insert new extent */
+ nex.ee_block = cpu_to_le32(cex->ec_block);
+ ext3_ext_store_pblock(&nex, pblock);
+ nex.ee_len = cpu_to_le16(count);
+ err = fsfilt_ext3_ext_insert_extent(handle, base, path, &nex, 0);
+ if (err) {
+ /* free data blocks we just allocated */
+ /* not a good idea to call discard here directly,
+ * but otherwise we'd need to call it every free() */
+#ifdef EXT3_MB_HINT_GROUP_ALLOC
+ ext3_mb_discard_inode_preallocations(inode);
+#endif
+#ifdef HAVE_EXT_FREE_BLOCK_WITH_BUFFER_HEAD /* Introduced in 2.6.32-rc7 */
+ ext3_free_blocks(handle, inode, NULL, ext4_ext_pblock(&nex),
+ cpu_to_le16(nex.ee_len), 0);
+#else
+ ext3_free_blocks(handle, inode, ext4_ext_pblock(&nex),
+ cpu_to_le16(nex.ee_len), 0);
+#endif
+ goto out;
+ }
+
+ /*
+ * Putting len of the actual extent we just inserted,
+ * we are asking ext3_ext_walk_space() to continue
+ * scaning after that block
+ */
+ cex->ec_len = le16_to_cpu(nex.ee_len);
+ cex->ec_start = ext4_ext_pblock(&nex);
+ BUG_ON(le16_to_cpu(nex.ee_len) == 0);
+ BUG_ON(le32_to_cpu(nex.ee_block) != cex->ec_block);
+
+out:
+ up_write((&EXT4_I(inode)->i_data_sem));
+ ext3_journal_stop(handle);
+map:
+ if (err >= 0) {
+ /* map blocks */
+ if (bp->num == 0) {
+ CERROR("hmm. why do we find this extent?\n");
+ CERROR("initial space: %lu:%u\n",
+ bp->start, bp->init_num);
+#ifdef EXT3_EXT_CACHE_EXTENT
+ CERROR("current extent: %u/%u/%llu %d\n",
+ cex->ec_block, cex->ec_len,
+ (unsigned long long)cex->ec_start,
+ cex->ec_type);
+#else
+ CERROR("current extent: %u/%u/%llu\n",
+ cex->ec_block, cex->ec_len,
+ (unsigned long long)cex->ec_start);
+#endif
+ }
+ i = 0;
+ if (cex->ec_block < bp->start)
+ i = bp->start - cex->ec_block;
+ if (i >= cex->ec_len)
+ CERROR("nothing to do?! i = %d, e_num = %u\n",
+ i, cex->ec_len);
+ for (; i < cex->ec_len && bp->num; i++) {
+ *(bp->blocks) = cex->ec_start + i;
+#ifdef EXT3_EXT_CACHE_EXTENT
+ if (cex->ec_type != EXT3_EXT_CACHE_EXTENT)
+#else
+ if ((cex->ec_len == 0) || (cex->ec_start == 0))
+#endif
+ {
+ /* unmap any possible underlying metadata from
+ * the block device mapping. bug 6998. */
+ ll_unmap_underlying_metadata(inode->i_sb,
+ *(bp->blocks));
+ }
+ bp->blocks++;
+ bp->num--;
+ bp->start++;
+ }
+ }
+ return err;
+}
+
+int fsfilt_map_nblocks(struct inode *inode, unsigned long block,
+ unsigned long num, unsigned long *blocks,
+ int create)
+{
+ struct ext3_ext_base *base = inode;
+ struct bpointers bp;
+ int err;
+
+ CDEBUG(D_OTHER, "blocks %lu-%lu requested for inode %u\n",
+ block, block + num - 1, (unsigned) inode->i_ino);
+
+ bp.blocks = blocks;
+ bp.start = block;
+ bp.init_num = bp.num = num;
+ bp.create = create;
+
+ err = fsfilt_ext3_ext_walk_space(base, block, num,
+ ext3_ext_new_extent_cb, &bp);
+ ext3_ext_invalidate_cache(base);
+
+ return err;
+}
+
+int fsfilt_ext3_map_ext_inode_pages(struct inode *inode, struct page **page,
+ int pages, unsigned long *blocks,
+ int create)
+{
+ int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
+ int rc = 0, i = 0;
+ struct page *fp = NULL;
+ int clen = 0;
+
+ CDEBUG(D_OTHER, "inode %lu: map %d pages from %lu\n",
+ inode->i_ino, pages, (*page)->index);
+
+ /* pages are sorted already. so, we just have to find
+ * contig. space and process them properly */
+ while (i < pages) {
+ if (fp == NULL) {
+ /* start new extent */
+ fp = *page++;
+ clen = 1;
+ i++;
+ continue;
+ } else if (fp->index + clen == (*page)->index) {
+ /* continue the extent */
+ page++;
+ clen++;
+ i++;
+ continue;
+ }
+
+ /* process found extent */
+ rc = fsfilt_map_nblocks(inode, fp->index * blocks_per_page,
+ clen * blocks_per_page, blocks,
+ create);
+ if (rc)
+ GOTO(cleanup, rc);
+
+ /* look for next extent */
+ fp = NULL;
+ blocks += blocks_per_page * clen;
+ }
+
+ if (fp)
+ rc = fsfilt_map_nblocks(inode, fp->index * blocks_per_page,
+ clen * blocks_per_page, blocks,
+ create);
+cleanup:
+ return rc;
+}
+
+int fsfilt_ext3_map_bm_inode_pages(struct inode *inode, struct page **page,
+ int pages, unsigned long *blocks,
+ int create)
+{
+ int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
+ unsigned long *b;
+ int rc = 0, i;
+
+ for (i = 0, b = blocks; i < pages; i++, page++) {
+ rc = ext3_map_inode_page(inode, *page, b, create);
+ if (rc) {
+ CERROR("ino %lu, blk %lu create %d: rc %d\n",
+ inode->i_ino, *b, create, rc);
+ break;
+ }
+
+ b += blocks_per_page;
+ }
+ return rc;
+}
+
+int fsfilt_ext3_map_inode_pages(struct inode *inode, struct page **page,
+ int pages, unsigned long *blocks,
+ int create, struct mutex *optional_mutex)
+{
+ int rc;
+
+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) {
+ rc = fsfilt_ext3_map_ext_inode_pages(inode, page, pages,
+ blocks, create);
+ return rc;
+ }
+ if (optional_mutex != NULL)
+ mutex_lock(optional_mutex);
+ rc = fsfilt_ext3_map_bm_inode_pages(inode, page, pages, blocks, create);
+ if (optional_mutex != NULL)
+ mutex_unlock(optional_mutex);
+
+ return rc;
+}
+
+int fsfilt_ext3_read(struct inode *inode, void *buf, int size, loff_t *offs)
+{
+ unsigned long block;
+ struct buffer_head *bh;
+ int err, blocksize, csize, boffs, osize = size;
+
+ /* prevent reading after eof */
+ spin_lock(&inode->i_lock);
+ if (i_size_read(inode) < *offs + size) {
+ size = i_size_read(inode) - *offs;
+ spin_unlock(&inode->i_lock);
+ if (size < 0) {
+ CDEBUG(D_EXT2, "size %llu is too short for read @%llu\n",
+ i_size_read(inode), *offs);
+ return -EBADR;
+ } else if (size == 0) {
+ return 0;
+ }
+ } else {
+ spin_unlock(&inode->i_lock);
+ }
+
+ blocksize = 1 << inode->i_blkbits;
+
+ while (size > 0) {
+ block = *offs >> inode->i_blkbits;
+ boffs = *offs & (blocksize - 1);
+ csize = min(blocksize - boffs, size);
+ bh = ext3_bread(NULL, inode, block, 0, &err);
+ if (!bh) {
+ CERROR("can't read block: %d\n", err);
+ return err;
+ }
+
+ memcpy(buf, bh->b_data + boffs, csize);
+ brelse(bh);
+
+ *offs += csize;
+ buf += csize;
+ size -= csize;
+ }
+ return osize;
+}
+EXPORT_SYMBOL(fsfilt_ext3_read);
+
+static int fsfilt_ext3_read_record(struct file * file, void *buf,
+ int size, loff_t *offs)
+{
+ int rc;
+ rc = fsfilt_ext3_read(file->f_dentry->d_inode, buf, size, offs);
+ if (rc > 0)
+ rc = 0;
+ return rc;
+}
+
+int fsfilt_ext3_write_handle(struct inode *inode, void *buf, int bufsize,
+ loff_t *offs, handle_t *handle)
+{
+ struct buffer_head *bh = NULL;
+ loff_t old_size = i_size_read(inode), offset = *offs;
+ loff_t new_size = i_size_read(inode);
+ unsigned long block;
+ int err = 0, blocksize = 1 << inode->i_blkbits, size, boffs;
+
+ while (bufsize > 0) {
+ if (bh != NULL)
+ brelse(bh);
+
+ block = offset >> inode->i_blkbits;
+ boffs = offset & (blocksize - 1);
+ size = min(blocksize - boffs, bufsize);
+ bh = ext3_bread(handle, inode, block, 1, &err);
+ if (!bh) {
+ CERROR("can't read/create block: %d\n", err);
+ break;
+ }
+
+ err = ext3_journal_get_write_access(handle, bh);
+ if (err) {
+ CERROR("journal_get_write_access() returned error %d\n",
+ err);
+ break;
+ }
+ LASSERT(bh->b_data + boffs + size <= bh->b_data + bh->b_size);
+ memcpy(bh->b_data + boffs, buf, size);
+ err = ext3_journal_dirty_metadata(handle, bh);
+ if (err) {
+ CERROR("journal_dirty_metadata() returned error %d\n",
+ err);
+ break;
+ }
+ if (offset + size > new_size)
+ new_size = offset + size;
+ offset += size;
+ bufsize -= size;
+ buf += size;
+ }
+ if (bh)
+ brelse(bh);
+
+ /* correct in-core and on-disk sizes */
+ if (new_size > i_size_read(inode)) {
+ spin_lock(&inode->i_lock);
+ if (new_size > i_size_read(inode))
+ i_size_write(inode, new_size);
+ if (i_size_read(inode) > EXT3_I(inode)->i_disksize)
+ EXT3_I(inode)->i_disksize = i_size_read(inode);
+ if (i_size_read(inode) > old_size) {
+ spin_unlock(&inode->i_lock);
+ mark_inode_dirty(inode);
+ } else {
+ spin_unlock(&inode->i_lock);
+ }
+ }
+
+ if (err == 0)
+ *offs = offset;
+ return err;
+}
+EXPORT_SYMBOL(fsfilt_ext3_write_handle);
+
+static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
+ loff_t *offs, int force_sync)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ handle_t *handle;
+ int err, block_count = 0, blocksize;
+
+ /* Determine how many transaction credits are needed */
+ blocksize = 1 << inode->i_blkbits;
+ block_count = (*offs & (blocksize - 1)) + bufsize;
+ block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
+
+ handle = ext3_journal_start(inode,
+ block_count * EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + 2);
+ if (IS_ERR(handle)) {
+ CERROR("can't start transaction for %d blocks (%d bytes)\n",
+ block_count * EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + 2,
+ bufsize);
+ return PTR_ERR(handle);
+ }
+
+ err = fsfilt_ext3_write_handle(inode, buf, bufsize, offs, handle);
+
+ if (!err && force_sync)
+ handle->h_sync = 1; /* recovery likes this */
+
+ ext3_journal_stop(handle);
+
+ return err;
+}
+
+static int fsfilt_ext3_setup(struct super_block *sb)
+{
+ if (!EXT3_HAS_COMPAT_FEATURE(sb,
+ EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
+ CERROR("ext3 mounted without journal\n");
+ return -EINVAL;
+ }
+
+#ifdef S_PDIROPS
+ CWARN("Enabling PDIROPS\n");
+ set_opt(EXT3_SB(sb)->s_mount_opt, PDIROPS);
+ sb->s_flags |= S_PDIROPS;
+#endif
+ if (!EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
+ CWARN("filesystem doesn't have dir_index feature enabled\n");
+ return 0;
+}
+static struct fsfilt_operations fsfilt_ext3_ops = {
+ .fs_type = "ext3",
+ .fs_owner = THIS_MODULE,
+ .fs_getlabel = fsfilt_ext3_get_label,
+ .fs_start = fsfilt_ext3_start,
+ .fs_commit = fsfilt_ext3_commit,
+ .fs_map_inode_pages = fsfilt_ext3_map_inode_pages,
+ .fs_write_record = fsfilt_ext3_write_record,
+ .fs_read_record = fsfilt_ext3_read_record,
+ .fs_setup = fsfilt_ext3_setup,
+};
+
+static int __init fsfilt_ext3_init(void)
+{
+ int rc;
+
+ fcb_cache = kmem_cache_create("fsfilt_ext3_fcb",
+ sizeof(struct fsfilt_cb_data), 0, 0);
+ if (!fcb_cache) {
+ CERROR("error allocating fsfilt journal callback cache\n");
+ GOTO(out, rc = -ENOMEM);
+ }
+
+ rc = fsfilt_register_ops(&fsfilt_ext3_ops);
+
+ if (rc) {
+ int err = kmem_cache_destroy(fcb_cache);
+ LASSERTF(err == 0, "error destroying new cache: rc %d\n", err);
+ }
+out:
+ return rc;
+}
+
+static void __exit fsfilt_ext3_exit(void)
+{
+ int rc;
+
+ fsfilt_unregister_ops(&fsfilt_ext3_ops);
+ rc = kmem_cache_destroy(fcb_cache);
+ LASSERTF(rc == 0, "couldn't destroy fcb_cache slab\n");
+}
+
+module_init(fsfilt_ext3_init);
+module_exit(fsfilt_ext3_exit);
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre ext3 Filesystem Helper v0.1");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/lustre/lustre/lvfs/lvfs_lib.c b/drivers/staging/lustre/lustre/lvfs/lvfs_lib.c
new file mode 100644
index 000000000000..97a8be2300dd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lvfs/lvfs_lib.c
@@ -0,0 +1,173 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lvfs/lvfs_lib.c
+ *
+ * Lustre filesystem abstraction routines
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+#include <linux/module.h>
+#include <lustre_lib.h>
+#include <lprocfs_status.h>
+
+#ifdef LPROCFS
+void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, long amount)
+{
+ struct lprocfs_counter *percpu_cntr;
+ struct lprocfs_counter_header *header;
+ int smp_id;
+ unsigned long flags = 0;
+
+ if (stats == NULL)
+ return;
+
+ /* With per-client stats, statistics are allocated only for
+ * single CPU area, so the smp_id should be 0 always. */
+ smp_id = lprocfs_stats_lock(stats, LPROCFS_GET_SMP_ID, &flags);
+ if (smp_id < 0)
+ return;
+
+ header = &stats->ls_cnt_header[idx];
+ percpu_cntr = lprocfs_stats_counter_get(stats, smp_id, idx);
+ percpu_cntr->lc_count++;
+
+ if (header->lc_config & LPROCFS_CNTR_AVGMINMAX) {
+ /*
+ * lprocfs_counter_add() can be called in interrupt context,
+ * as memory allocation could trigger memory shrinker call
+ * ldlm_pool_shrink(), which calls lprocfs_counter_add().
+ * LU-1727.
+ *
+ * Only obd_memory uses LPROCFS_STATS_FLAG_IRQ_SAFE
+ * flag, because it needs accurate counting lest memory leak
+ * check reports error.
+ */
+ if (in_interrupt() &&
+ (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
+ percpu_cntr->lc_sum_irq += amount;
+ else
+ percpu_cntr->lc_sum += amount;
+
+ if (header->lc_config & LPROCFS_CNTR_STDDEV)
+ percpu_cntr->lc_sumsquare += (__s64)amount * amount;
+ if (amount < percpu_cntr->lc_min)
+ percpu_cntr->lc_min = amount;
+ if (amount > percpu_cntr->lc_max)
+ percpu_cntr->lc_max = amount;
+ }
+ lprocfs_stats_unlock(stats, LPROCFS_GET_SMP_ID, &flags);
+}
+EXPORT_SYMBOL(lprocfs_counter_add);
+
+void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx, long amount)
+{
+ struct lprocfs_counter *percpu_cntr;
+ struct lprocfs_counter_header *header;
+ int smp_id;
+ unsigned long flags = 0;
+
+ if (stats == NULL)
+ return;
+
+ /* With per-client stats, statistics are allocated only for
+ * single CPU area, so the smp_id should be 0 always. */
+ smp_id = lprocfs_stats_lock(stats, LPROCFS_GET_SMP_ID, &flags);
+ if (smp_id < 0)
+ return;
+
+ header = &stats->ls_cnt_header[idx];
+ percpu_cntr = lprocfs_stats_counter_get(stats, smp_id, idx);
+ if (header->lc_config & LPROCFS_CNTR_AVGMINMAX) {
+ /*
+ * Sometimes we use RCU callbacks to free memory which calls
+ * lprocfs_counter_sub(), and RCU callbacks may execute in
+ * softirq context - right now that's the only case we're in
+ * softirq context here, use separate counter for that.
+ * bz20650.
+ *
+ * Only obd_memory uses LPROCFS_STATS_FLAG_IRQ_SAFE
+ * flag, because it needs accurate counting lest memory leak
+ * check reports error.
+ */
+ if (in_interrupt() &&
+ (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
+ percpu_cntr->lc_sum_irq -= amount;
+ else
+ percpu_cntr->lc_sum -= amount;
+ }
+ lprocfs_stats_unlock(stats, LPROCFS_GET_SMP_ID, &flags);
+}
+EXPORT_SYMBOL(lprocfs_counter_sub);
+
+int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid)
+{
+ struct lprocfs_counter *cntr;
+ unsigned int percpusize;
+ int rc = -ENOMEM;
+ unsigned long flags = 0;
+ int i;
+
+ LASSERT(stats->ls_percpu[cpuid] == NULL);
+ LASSERT((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0);
+
+ percpusize = lprocfs_stats_counter_size(stats);
+ LIBCFS_ALLOC_ATOMIC(stats->ls_percpu[cpuid], percpusize);
+ if (stats->ls_percpu[cpuid] != NULL) {
+ rc = 0;
+ if (unlikely(stats->ls_biggest_alloc_num <= cpuid)) {
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+ spin_lock_irqsave(&stats->ls_lock, flags);
+ else
+ spin_lock(&stats->ls_lock);
+ if (stats->ls_biggest_alloc_num <= cpuid)
+ stats->ls_biggest_alloc_num = cpuid + 1;
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) {
+ spin_unlock_irqrestore(&stats->ls_lock, flags);
+ } else {
+ spin_unlock(&stats->ls_lock);
+ }
+ }
+ /* initialize the ls_percpu[cpuid] non-zero counter */
+ for (i = 0; i < stats->ls_num; ++i) {
+ cntr = lprocfs_stats_counter_get(stats, cpuid, i);
+ cntr->lc_min = LC_MIN_INIT;
+ }
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_stats_alloc_one);
+#endif /* LPROCFS */
diff --git a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
new file mode 100644
index 000000000000..1e6f32c3549b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
@@ -0,0 +1,295 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lvfs/lvfs_linux.c
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <asm/unistd.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/version.h>
+#include <linux/libcfs/libcfs.h>
+#include <lustre_fsfilt.h>
+#include <obd.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/lustre_compat25.h>
+#include <lvfs.h>
+
+#include <obd.h>
+#include <lustre_lib.h>
+
+struct lprocfs_stats *obd_memory = NULL;
+EXPORT_SYMBOL(obd_memory);
+/* refine later and change to seqlock or simlar from libcfs */
+
+/* Debugging check only needed during development */
+#ifdef OBD_CTXT_DEBUG
+# define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
+# define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\
+ msg)
+# define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg)
+#else
+# define ASSERT_CTXT_MAGIC(magic) do {} while(0)
+# define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
+# define ASSERT_KERNEL_CTXT(msg) do {} while(0)
+#endif
+
+static void push_group_info(struct lvfs_run_ctxt *save,
+ struct group_info *ginfo)
+{
+ if (!ginfo) {
+ save->ngroups = current_ngroups;
+ current_ngroups = 0;
+ } else {
+ struct cred *cred;
+ task_lock(current);
+ save->group_info = current_cred()->group_info;
+ if ((cred = prepare_creds())) {
+ cred->group_info = ginfo;
+ commit_creds(cred);
+ }
+ task_unlock(current);
+ }
+}
+
+static void pop_group_info(struct lvfs_run_ctxt *save,
+ struct group_info *ginfo)
+{
+ if (!ginfo) {
+ current_ngroups = save->ngroups;
+ } else {
+ struct cred *cred;
+ task_lock(current);
+ if ((cred = prepare_creds())) {
+ cred->group_info = save->group_info;
+ commit_creds(cred);
+ }
+ task_unlock(current);
+ }
+}
+
+/* push / pop to root of obd store */
+void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
+ struct lvfs_ucred *uc)
+{
+ /* if there is underlaying dt_device then push_ctxt is not needed */
+ if (new_ctx->dt != NULL)
+ return;
+
+ //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
+ ASSERT_CTXT_MAGIC(new_ctx->magic);
+ OBD_SET_CTXT_MAGIC(save);
+
+ save->fs = get_fs();
+ LASSERT(d_refcount(cfs_fs_pwd(current->fs)));
+ LASSERT(d_refcount(new_ctx->pwd));
+ save->pwd = dget(cfs_fs_pwd(current->fs));
+ save->pwdmnt = mntget(cfs_fs_mnt(current->fs));
+ save->luc.luc_umask = current_umask();
+ save->ngroups = current_cred()->group_info->ngroups;
+
+ LASSERT(save->pwd);
+ LASSERT(save->pwdmnt);
+ LASSERT(new_ctx->pwd);
+ LASSERT(new_ctx->pwdmnt);
+
+ if (uc) {
+ struct cred *cred;
+ save->luc.luc_uid = current_uid();
+ save->luc.luc_gid = current_gid();
+ save->luc.luc_fsuid = current_fsuid();
+ save->luc.luc_fsgid = current_fsgid();
+ save->luc.luc_cap = current_cap();
+
+ if ((cred = prepare_creds())) {
+ cred->uid = uc->luc_uid;
+ cred->gid = uc->luc_gid;
+ cred->fsuid = uc->luc_fsuid;
+ cred->fsgid = uc->luc_fsgid;
+ cred->cap_effective = uc->luc_cap;
+ commit_creds(cred);
+ }
+
+ push_group_info(save,
+ uc->luc_ginfo ?:
+ uc->luc_identity ? uc->luc_identity->mi_ginfo :
+ NULL);
+ }
+ current->fs->umask = 0; /* umask already applied on client */
+ set_fs(new_ctx->fs);
+ ll_set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
+}
+EXPORT_SYMBOL(push_ctxt);
+
+void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
+ struct lvfs_ucred *uc)
+{
+ /* if there is underlaying dt_device then pop_ctxt is not needed */
+ if (new_ctx->dt != NULL)
+ return;
+
+ ASSERT_CTXT_MAGIC(saved->magic);
+ ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
+
+ LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n",
+ cfs_fs_pwd(current->fs), new_ctx->pwd);
+ LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n",
+ cfs_fs_mnt(current->fs), new_ctx->pwdmnt);
+
+ set_fs(saved->fs);
+ ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
+
+ dput(saved->pwd);
+ mntput(saved->pwdmnt);
+ current->fs->umask = saved->luc.luc_umask;
+ if (uc) {
+ struct cred *cred;
+ if ((cred = prepare_creds())) {
+ cred->uid = saved->luc.luc_uid;
+ cred->gid = saved->luc.luc_gid;
+ cred->fsuid = saved->luc.luc_fsuid;
+ cred->fsgid = saved->luc.luc_fsgid;
+ cred->cap_effective = saved->luc.luc_cap;
+ commit_creds(cred);
+ }
+
+ pop_group_info(saved,
+ uc->luc_ginfo ?:
+ uc->luc_identity ? uc->luc_identity->mi_ginfo :
+ NULL);
+ }
+}
+EXPORT_SYMBOL(pop_ctxt);
+
+/* utility to rename a file */
+int lustre_rename(struct dentry *dir, struct vfsmount *mnt,
+ char *oldname, char *newname)
+{
+ struct dentry *dchild_old, *dchild_new;
+ int err = 0;
+ ENTRY;
+
+ ASSERT_KERNEL_CTXT("kernel doing rename outside kernel context\n");
+ CDEBUG(D_INODE, "renaming file %.*s to %.*s\n",
+ (int)strlen(oldname), oldname, (int)strlen(newname), newname);
+
+ dchild_old = ll_lookup_one_len(oldname, dir, strlen(oldname));
+ if (IS_ERR(dchild_old))
+ RETURN(PTR_ERR(dchild_old));
+
+ if (!dchild_old->d_inode)
+ GOTO(put_old, err = -ENOENT);
+
+ dchild_new = ll_lookup_one_len(newname, dir, strlen(newname));
+ if (IS_ERR(dchild_new))
+ GOTO(put_old, err = PTR_ERR(dchild_new));
+
+ err = ll_vfs_rename(dir->d_inode, dchild_old, mnt,
+ dir->d_inode, dchild_new, mnt);
+
+ dput(dchild_new);
+put_old:
+ dput(dchild_old);
+ RETURN(err);
+}
+EXPORT_SYMBOL(lustre_rename);
+
+/* Note: dput(dchild) will *not* be called if there is an error */
+struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
+ int flags)
+{
+ struct path path = {
+ .dentry = de,
+ .mnt = ctxt->pwdmnt,
+ };
+ return ll_dentry_open(&path, flags, current_cred());
+}
+EXPORT_SYMBOL(l_dentry_open);
+
+#ifdef LPROCFS
+__s64 lprocfs_read_helper(struct lprocfs_counter *lc,
+ struct lprocfs_counter_header *header,
+ enum lprocfs_stats_flags flags,
+ enum lprocfs_fields_flags field)
+{
+ __s64 ret = 0;
+
+ if (lc == NULL || header == NULL)
+ RETURN(0);
+
+ switch (field) {
+ case LPROCFS_FIELDS_FLAGS_CONFIG:
+ ret = header->lc_config;
+ break;
+ case LPROCFS_FIELDS_FLAGS_SUM:
+ ret = lc->lc_sum;
+ if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
+ ret += lc->lc_sum_irq;
+ break;
+ case LPROCFS_FIELDS_FLAGS_MIN:
+ ret = lc->lc_min;
+ break;
+ case LPROCFS_FIELDS_FLAGS_MAX:
+ ret = lc->lc_max;
+ break;
+ case LPROCFS_FIELDS_FLAGS_AVG:
+ ret = (lc->lc_max - lc->lc_min) / 2;
+ break;
+ case LPROCFS_FIELDS_FLAGS_SUMSQUARE:
+ ret = lc->lc_sumsquare;
+ break;
+ case LPROCFS_FIELDS_FLAGS_COUNT:
+ ret = lc->lc_count;
+ break;
+ default:
+ break;
+ };
+
+ RETURN(ret);
+}
+EXPORT_SYMBOL(lprocfs_read_helper);
+#endif /* LPROCFS */
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/lustre/lustre/mdc/Makefile b/drivers/staging/lustre/lustre/mdc/Makefile
new file mode 100644
index 000000000000..93bae242e761
--- /dev/null
+++ b/drivers/staging/lustre/lustre/mdc/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_LUSTRE_FS) += mdc.o
+mdc-y := mdc_request.o mdc_reint.o lproc_mdc.o mdc_lib.o mdc_locks.o
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
new file mode 100644
index 000000000000..6592478e9b6d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
@@ -0,0 +1,219 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/version.h>
+#include <linux/vfs.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+
+#ifdef LPROCFS
+
+static int mdc_max_rpcs_in_flight_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = m->private;
+ struct client_obd *cli = &dev->u.cli;
+ int rc;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ rc = seq_printf(m, "%u\n", cli->cl_max_rpcs_in_flight);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ return rc;
+}
+
+static ssize_t mdc_max_rpcs_in_flight_seq_write(struct file *file,
+ const char *buffer,
+ size_t count,
+ loff_t *off)
+{
+ struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
+ struct client_obd *cli = &dev->u.cli;
+ int val, rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ if (val < 1 || val > MDC_MAX_RIF_MAX)
+ return -ERANGE;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ cli->cl_max_rpcs_in_flight = val;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(mdc_max_rpcs_in_flight);
+
+static int mdc_kuc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, NULL, PDE_DATA(inode));
+}
+
+/* temporary for testing */
+static ssize_t mdc_kuc_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ struct kuc_hdr *lh;
+ struct hsm_action_list *hal;
+ struct hsm_action_item *hai;
+ int len;
+ int fd, rc;
+ ENTRY;
+
+ rc = lprocfs_write_helper(buffer, count, &fd);
+ if (rc)
+ RETURN(rc);
+
+ if (fd < 0)
+ RETURN(-ERANGE);
+ CWARN("message to fd %d\n", fd);
+
+ len = sizeof(*lh) + sizeof(*hal) + MTI_NAME_MAXLEN +
+ /* for mockup below */ 2 * cfs_size_round(sizeof(*hai));
+
+ OBD_ALLOC(lh, len);
+
+ lh->kuc_magic = KUC_MAGIC;
+ lh->kuc_transport = KUC_TRANSPORT_HSM;
+ lh->kuc_msgtype = HMT_ACTION_LIST;
+ lh->kuc_msglen = len;
+
+ hal = (struct hsm_action_list *)(lh + 1);
+ hal->hal_version = HAL_VERSION;
+ hal->hal_archive_id = 1;
+ hal->hal_flags = 0;
+ obd_uuid2fsname(hal->hal_fsname, obd->obd_name, MTI_NAME_MAXLEN);
+
+ /* mock up an action list */
+ hal->hal_count = 2;
+ hai = hai_zero(hal);
+ hai->hai_action = HSMA_ARCHIVE;
+ hai->hai_fid.f_oid = 5;
+ hai->hai_len = sizeof(*hai);
+ hai = hai_next(hai);
+ hai->hai_action = HSMA_RESTORE;
+ hai->hai_fid.f_oid = 10;
+ hai->hai_len = sizeof(*hai);
+
+ /* This works for either broadcast or unicast to a single fd */
+ if (fd == 0) {
+ rc = libcfs_kkuc_group_put(KUC_GRP_HSM, lh);
+ } else {
+ struct file *fp = fget(fd);
+
+ rc = libcfs_kkuc_msg_put(fp, lh);
+ fput(fp);
+ }
+ OBD_FREE(lh, len);
+ if (rc < 0)
+ RETURN(rc);
+ RETURN(count);
+}
+
+struct file_operations mdc_kuc_fops = {
+ .open = mdc_kuc_open,
+ .write = mdc_kuc_write,
+ .release = single_release,
+};
+
+LPROC_SEQ_FOPS_WR_ONLY(mdc, ping);
+
+LPROC_SEQ_FOPS_RO_TYPE(mdc, uuid);
+LPROC_SEQ_FOPS_RO_TYPE(mdc, connect_flags);
+LPROC_SEQ_FOPS_RO_TYPE(mdc, blksize);
+LPROC_SEQ_FOPS_RO_TYPE(mdc, kbytestotal);
+LPROC_SEQ_FOPS_RO_TYPE(mdc, kbytesfree);
+LPROC_SEQ_FOPS_RO_TYPE(mdc, kbytesavail);
+LPROC_SEQ_FOPS_RO_TYPE(mdc, filestotal);
+LPROC_SEQ_FOPS_RO_TYPE(mdc, filesfree);
+LPROC_SEQ_FOPS_RO_TYPE(mdc, server_uuid);
+LPROC_SEQ_FOPS_RO_TYPE(mdc, conn_uuid);
+LPROC_SEQ_FOPS_RO_TYPE(mdc, timeouts);
+LPROC_SEQ_FOPS_RO_TYPE(mdc, state);
+
+static int mdc_obd_max_pages_per_rpc_seq_show(struct seq_file *m, void *v)
+{
+ return lprocfs_obd_rd_max_pages_per_rpc(m, m->private);
+}
+LPROC_SEQ_FOPS_RO(mdc_obd_max_pages_per_rpc);
+
+LPROC_SEQ_FOPS_RW_TYPE(mdc, import);
+LPROC_SEQ_FOPS_RW_TYPE(mdc, pinger_recov);
+
+static struct lprocfs_vars lprocfs_mdc_obd_vars[] = {
+ { "uuid", &mdc_uuid_fops, 0, 0 },
+ { "ping", &mdc_ping_fops, 0, 0222 },
+ { "connect_flags", &mdc_connect_flags_fops, 0, 0 },
+ { "blocksize", &mdc_blksize_fops, 0, 0 },
+ { "kbytestotal", &mdc_kbytestotal_fops, 0, 0 },
+ { "kbytesfree", &mdc_kbytesfree_fops, 0, 0 },
+ { "kbytesavail", &mdc_kbytesavail_fops, 0, 0 },
+ { "filestotal", &mdc_filestotal_fops, 0, 0 },
+ { "filesfree", &mdc_filesfree_fops, 0, 0 },
+ /*{ "filegroups", lprocfs_rd_filegroups, 0, 0 },*/
+ { "mds_server_uuid", &mdc_server_uuid_fops, 0, 0 },
+ { "mds_conn_uuid", &mdc_conn_uuid_fops, 0, 0 },
+ /*
+ * FIXME: below proc entry is provided, but not in used, instead
+ * sbi->sb_md_brw_size is used, the per obd variable should be used
+ * when CMD is enabled, and dir pages are managed in MDC layer.
+ * Remember to enable proc write function.
+ */
+ { "max_pages_per_rpc", &mdc_obd_max_pages_per_rpc_fops, 0, 0 },
+ { "max_rpcs_in_flight", &mdc_max_rpcs_in_flight_fops, 0, 0 },
+ { "timeouts", &mdc_timeouts_fops, 0, 0 },
+ { "import", &mdc_import_fops, 0 },
+ { "state", &mdc_state_fops, 0, 0 },
+ { "hsm_nl", &mdc_kuc_fops, 0, 0200 },
+ { "pinger_recov", &mdc_pinger_recov_fops, 0, 0 },
+ { 0 }
+};
+
+LPROC_SEQ_FOPS_RO_TYPE(mdc, numrefs);
+
+static struct lprocfs_vars lprocfs_mdc_module_vars[] = {
+ { "num_refs", &mdc_numrefs_fops, 0, 0 },
+ { 0 }
+};
+
+void lprocfs_mdc_init_vars(struct lprocfs_static_vars *lvars)
+{
+ lvars->module_vars = lprocfs_mdc_module_vars;
+ lvars->obd_vars = lprocfs_mdc_obd_vars;
+}
+#endif /* LPROCFS */
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_internal.h b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
new file mode 100644
index 000000000000..2aeff0ecec34
--- /dev/null
+++ b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
@@ -0,0 +1,180 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _MDC_INTERNAL_H
+#define _MDC_INTERNAL_H
+
+#include <lustre_mdc.h>
+#include <lustre_mds.h>
+
+#ifdef LPROCFS
+void lprocfs_mdc_init_vars(struct lprocfs_static_vars *lvars);
+#else
+static inline void lprocfs_mdc_init_vars(struct lprocfs_static_vars *lvars)
+{
+ memset(lvars, 0, sizeof(*lvars));
+}
+#endif
+
+void mdc_pack_body(struct ptlrpc_request *req, const struct lu_fid *fid,
+ struct obd_capa *oc, __u64 valid, int ea_size,
+ __u32 suppgid, int flags);
+void mdc_pack_capa(struct ptlrpc_request *req,
+ const struct req_msg_field *field, struct obd_capa *oc);
+int mdc_pack_req(struct ptlrpc_request *req, int version, int opc);
+void mdc_is_subdir_pack(struct ptlrpc_request *req, const struct lu_fid *pfid,
+ const struct lu_fid *cfid, int flags);
+void mdc_swap_layouts_pack(struct ptlrpc_request *req,
+ struct md_op_data *op_data);
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff, __u32 size,
+ const struct lu_fid *fid, struct obd_capa *oc);
+void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, int flags,
+ struct md_op_data *data, int ea_size);
+void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
+ void *ea, int ealen, void *ea2, int ea2len);
+void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
+ const void *data, int datalen, __u32 mode, __u32 uid,
+ __u32 gid, cfs_cap_t capability, __u64 rdev);
+void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
+ __u32 mode, __u64 rdev, __u32 flags, const void *data,
+ int datalen);
+void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
+void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
+void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
+ const char *old, int oldlen, const char *new, int newlen);
+void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
+int mdc_enter_request(struct client_obd *cli);
+void mdc_exit_request(struct client_obd *cli);
+
+/* mdc/mdc_locks.c */
+int mdc_set_lock_data(struct obd_export *exp,
+ __u64 *lockh, void *data, __u64 *bits);
+
+int mdc_null_inode(struct obd_export *exp, const struct lu_fid *fid);
+
+int mdc_find_cbdata(struct obd_export *exp, const struct lu_fid *fid,
+ ldlm_iterator_t it, void *data);
+
+int mdc_intent_lock(struct obd_export *exp,
+ struct md_op_data *,
+ void *lmm, int lmmsize,
+ struct lookup_intent *, int,
+ struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking,
+ __u64 extra_lock_flags);
+int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
+ struct lookup_intent *it, struct md_op_data *op_data,
+ struct lustre_handle *lockh, void *lmm, int lmmsize,
+ struct ptlrpc_request **req, __u64 extra_lock_flags);
+
+int mdc_resource_get_unused(struct obd_export *exp, struct lu_fid *fid,
+ struct list_head *cancels, ldlm_mode_t mode,
+ __u64 bits);
+/* mdc/mdc_request.c */
+int mdc_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
+ struct md_op_data *op_data);
+
+int mdc_open(struct obd_export *exp, obd_id ino, int type, int flags,
+ struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
+ struct ptlrpc_request **);
+
+struct obd_client_handle;
+
+int mdc_get_lustre_md(struct obd_export *md_exp, struct ptlrpc_request *req,
+ struct obd_export *dt_exp, struct obd_export *lmv_exp,
+ struct lustre_md *md);
+
+int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md);
+
+int mdc_set_open_replay_data(struct obd_export *exp,
+ struct obd_client_handle *och,
+ struct ptlrpc_request *open_req);
+
+int mdc_clear_open_replay_data(struct obd_export *exp,
+ struct obd_client_handle *och);
+void mdc_commit_open(struct ptlrpc_request *req);
+void mdc_replay_open(struct ptlrpc_request *req);
+
+int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
+ const void *data, int datalen, int mode, __u32 uid, __u32 gid,
+ cfs_cap_t capability, __u64 rdev,
+ struct ptlrpc_request **request);
+int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
+ struct ptlrpc_request **request);
+int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
+ const char *old, int oldlen, const char *new, int newlen,
+ struct ptlrpc_request **request);
+int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
+ void *ea, int ealen, void *ea2, int ea2len,
+ struct ptlrpc_request **request, struct md_open_data **mod);
+int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
+ struct ptlrpc_request **request);
+int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
+ ldlm_policy_data_t *policy, ldlm_mode_t mode,
+ ldlm_cancel_flags_t flags, void *opaque);
+
+static inline void mdc_set_capa_size(struct ptlrpc_request *req,
+ const struct req_msg_field *field,
+ struct obd_capa *oc)
+{
+ if (oc == NULL)
+ req_capsule_set_size(&req->rq_pill, field, RCL_CLIENT, 0);
+ else
+ /* it is already calculated as sizeof struct obd_capa */
+ ;
+}
+
+int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
+ struct lu_fid *fid, __u64 *bits);
+
+int mdc_intent_getattr_async(struct obd_export *exp,
+ struct md_enqueue_info *minfo,
+ struct ldlm_enqueue_info *einfo);
+
+ldlm_mode_t mdc_lock_match(struct obd_export *exp, __u64 flags,
+ const struct lu_fid *fid, ldlm_type_t type,
+ ldlm_policy_data_t *policy, ldlm_mode_t mode,
+ struct lustre_handle *lockh);
+
+static inline int mdc_prep_elc_req(struct obd_export *exp,
+ struct ptlrpc_request *req, int opc,
+ struct list_head *cancels, int count)
+{
+ return ldlm_prep_elc_req(exp, req, LUSTRE_MDS_VERSION, opc, 0, cancels,
+ count);
+}
+
+#endif
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
new file mode 100644
index 000000000000..e789aed98de0
--- /dev/null
+++ b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
@@ -0,0 +1,564 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_MDC
+#include <lustre_net.h>
+#include <lustre/lustre_idl.h>
+#include "mdc_internal.h"
+
+
+static void __mdc_pack_body(struct mdt_body *b, __u32 suppgid)
+{
+ LASSERT (b != NULL);
+
+ b->suppgid = suppgid;
+ b->uid = current_uid();
+ b->gid = current_gid();
+ b->fsuid = current_fsuid();
+ b->fsgid = current_fsgid();
+ b->capability = cfs_curproc_cap_pack();
+}
+
+void mdc_pack_capa(struct ptlrpc_request *req, const struct req_msg_field *field,
+ struct obd_capa *oc)
+{
+ struct req_capsule *pill = &req->rq_pill;
+ struct lustre_capa *c;
+
+ if (oc == NULL) {
+ LASSERT(req_capsule_get_size(pill, field, RCL_CLIENT) == 0);
+ return;
+ }
+
+ c = req_capsule_client_get(pill, field);
+ LASSERT(c != NULL);
+ capa_cpy(c, oc);
+ DEBUG_CAPA(D_SEC, c, "pack");
+}
+
+void mdc_is_subdir_pack(struct ptlrpc_request *req, const struct lu_fid *pfid,
+ const struct lu_fid *cfid, int flags)
+{
+ struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
+ &RMF_MDT_BODY);
+
+ if (pfid) {
+ b->fid1 = *pfid;
+ b->valid = OBD_MD_FLID;
+ }
+ if (cfid)
+ b->fid2 = *cfid;
+ b->flags = flags;
+}
+
+void mdc_swap_layouts_pack(struct ptlrpc_request *req,
+ struct md_op_data *op_data)
+{
+ struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
+ &RMF_MDT_BODY);
+
+ __mdc_pack_body(b, op_data->op_suppgids[0]);
+ b->fid1 = op_data->op_fid1;
+ b->fid2 = op_data->op_fid2;
+ b->valid |= OBD_MD_FLID;
+
+ mdc_pack_capa(req, &RMF_CAPA1, op_data->op_capa1);
+ mdc_pack_capa(req, &RMF_CAPA2, op_data->op_capa2);
+}
+
+void mdc_pack_body(struct ptlrpc_request *req,
+ const struct lu_fid *fid, struct obd_capa *oc,
+ __u64 valid, int ea_size, __u32 suppgid, int flags)
+{
+ struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
+ &RMF_MDT_BODY);
+ LASSERT(b != NULL);
+ b->valid = valid;
+ b->eadatasize = ea_size;
+ b->flags = flags;
+ __mdc_pack_body(b, suppgid);
+ if (fid) {
+ b->fid1 = *fid;
+ b->valid |= OBD_MD_FLID;
+ mdc_pack_capa(req, &RMF_CAPA1, oc);
+ }
+}
+
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff,
+ __u32 size, const struct lu_fid *fid, struct obd_capa *oc)
+{
+ struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
+ &RMF_MDT_BODY);
+ b->fid1 = *fid;
+ b->valid |= OBD_MD_FLID;
+ b->size = pgoff; /* !! */
+ b->nlink = size; /* !! */
+ __mdc_pack_body(b, -1);
+ b->mode = LUDA_FID | LUDA_TYPE;
+
+ mdc_pack_capa(req, &RMF_CAPA1, oc);
+}
+
+/* packing of MDS records */
+void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
+ const void *data, int datalen, __u32 mode,
+ __u32 uid, __u32 gid, cfs_cap_t cap_effective, __u64 rdev)
+{
+ struct mdt_rec_create *rec;
+ char *tmp;
+ __u64 flags;
+
+ CLASSERT(sizeof(struct mdt_rec_reint) == sizeof(struct mdt_rec_create));
+ rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
+
+
+ rec->cr_opcode = REINT_CREATE;
+ rec->cr_fsuid = uid;
+ rec->cr_fsgid = gid;
+ rec->cr_cap = cap_effective;
+ rec->cr_fid1 = op_data->op_fid1;
+ rec->cr_fid2 = op_data->op_fid2;
+ rec->cr_mode = mode;
+ rec->cr_rdev = rdev;
+ rec->cr_time = op_data->op_mod_time;
+ rec->cr_suppgid1 = op_data->op_suppgids[0];
+ rec->cr_suppgid2 = op_data->op_suppgids[1];
+ flags = op_data->op_flags & MF_SOM_LOCAL_FLAGS;
+ if (op_data->op_bias & MDS_CREATE_VOLATILE)
+ flags |= MDS_OPEN_VOLATILE;
+ set_mrc_cr_flags(rec, flags);
+ rec->cr_bias = op_data->op_bias;
+ rec->cr_umask = current_umask();
+
+ mdc_pack_capa(req, &RMF_CAPA1, op_data->op_capa1);
+
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
+ LOGL0(op_data->op_name, op_data->op_namelen, tmp);
+
+ if (data) {
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_EADATA);
+ memcpy(tmp, data, datalen);
+ }
+}
+
+static __u64 mds_pack_open_flags(__u32 flags, __u32 mode)
+{
+ __u64 cr_flags = (flags & (FMODE_READ | FMODE_WRITE |
+ MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS |
+ MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK |
+ MDS_OPEN_BY_FID));
+ if (flags & O_CREAT)
+ cr_flags |= MDS_OPEN_CREAT;
+ if (flags & O_EXCL)
+ cr_flags |= MDS_OPEN_EXCL;
+ if (flags & O_TRUNC)
+ cr_flags |= MDS_OPEN_TRUNC;
+ if (flags & O_APPEND)
+ cr_flags |= MDS_OPEN_APPEND;
+ if (flags & O_SYNC)
+ cr_flags |= MDS_OPEN_SYNC;
+ if (flags & O_DIRECTORY)
+ cr_flags |= MDS_OPEN_DIRECTORY;
+#ifdef FMODE_EXEC
+ if (flags & FMODE_EXEC)
+ cr_flags |= MDS_FMODE_EXEC;
+#endif
+ if (flags & O_LOV_DELAY_CREATE)
+ cr_flags |= MDS_OPEN_DELAY_CREATE;
+
+ if (flags & O_NONBLOCK)
+ cr_flags |= MDS_OPEN_NORESTORE;
+
+ return cr_flags;
+}
+
+/* packing of MDS records */
+void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
+ __u32 mode, __u64 rdev, __u32 flags, const void *lmm,
+ int lmmlen)
+{
+ struct mdt_rec_create *rec;
+ char *tmp;
+ __u64 cr_flags;
+
+ CLASSERT(sizeof(struct mdt_rec_reint) == sizeof(struct mdt_rec_create));
+ rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
+
+ /* XXX do something about time, uid, gid */
+ rec->cr_opcode = REINT_OPEN;
+ rec->cr_fsuid = current_fsuid();
+ rec->cr_fsgid = current_fsgid();
+ rec->cr_cap = cfs_curproc_cap_pack();
+ if (op_data != NULL) {
+ rec->cr_fid1 = op_data->op_fid1;
+ rec->cr_fid2 = op_data->op_fid2;
+ }
+ rec->cr_mode = mode;
+ cr_flags = mds_pack_open_flags(flags, mode);
+ rec->cr_rdev = rdev;
+ rec->cr_time = op_data->op_mod_time;
+ rec->cr_suppgid1 = op_data->op_suppgids[0];
+ rec->cr_suppgid2 = op_data->op_suppgids[1];
+ rec->cr_bias = op_data->op_bias;
+ rec->cr_umask = current_umask();
+
+ mdc_pack_capa(req, &RMF_CAPA1, op_data->op_capa1);
+ /* the next buffer is child capa, which is used for replay,
+ * will be packed from the data in reply message. */
+
+ if (op_data->op_name) {
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
+ LOGL0(op_data->op_name, op_data->op_namelen, tmp);
+ if (op_data->op_bias & MDS_CREATE_VOLATILE)
+ cr_flags |= MDS_OPEN_VOLATILE;
+ }
+
+ if (lmm) {
+ cr_flags |= MDS_OPEN_HAS_EA;
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_EADATA);
+ memcpy(tmp, lmm, lmmlen);
+ }
+ set_mrc_cr_flags(rec, cr_flags);
+}
+
+static inline __u64 attr_pack(unsigned int ia_valid) {
+ __u64 sa_valid = 0;
+
+ if (ia_valid & ATTR_MODE)
+ sa_valid |= MDS_ATTR_MODE;
+ if (ia_valid & ATTR_UID)
+ sa_valid |= MDS_ATTR_UID;
+ if (ia_valid & ATTR_GID)
+ sa_valid |= MDS_ATTR_GID;
+ if (ia_valid & ATTR_SIZE)
+ sa_valid |= MDS_ATTR_SIZE;
+ if (ia_valid & ATTR_ATIME)
+ sa_valid |= MDS_ATTR_ATIME;
+ if (ia_valid & ATTR_MTIME)
+ sa_valid |= MDS_ATTR_MTIME;
+ if (ia_valid & ATTR_CTIME)
+ sa_valid |= MDS_ATTR_CTIME;
+ if (ia_valid & ATTR_ATIME_SET)
+ sa_valid |= MDS_ATTR_ATIME_SET;
+ if (ia_valid & ATTR_MTIME_SET)
+ sa_valid |= MDS_ATTR_MTIME_SET;
+ if (ia_valid & ATTR_FORCE)
+ sa_valid |= MDS_ATTR_FORCE;
+ if (ia_valid & ATTR_ATTR_FLAG)
+ sa_valid |= MDS_ATTR_ATTR_FLAG;
+ if (ia_valid & ATTR_KILL_SUID)
+ sa_valid |= MDS_ATTR_KILL_SUID;
+ if (ia_valid & ATTR_KILL_SGID)
+ sa_valid |= MDS_ATTR_KILL_SGID;
+ if (ia_valid & ATTR_CTIME_SET)
+ sa_valid |= MDS_ATTR_CTIME_SET;
+ if (ia_valid & ATTR_FROM_OPEN)
+ sa_valid |= MDS_ATTR_FROM_OPEN;
+ if (ia_valid & ATTR_BLOCKS)
+ sa_valid |= MDS_ATTR_BLOCKS;
+ if (ia_valid & MDS_OPEN_OWNEROVERRIDE)
+ /* NFSD hack (see bug 5781) */
+ sa_valid |= MDS_OPEN_OWNEROVERRIDE;
+ return sa_valid;
+}
+
+static void mdc_setattr_pack_rec(struct mdt_rec_setattr *rec,
+ struct md_op_data *op_data)
+{
+ rec->sa_opcode = REINT_SETATTR;
+ rec->sa_fsuid = current_fsuid();
+ rec->sa_fsgid = current_fsgid();
+ rec->sa_cap = cfs_curproc_cap_pack();
+ rec->sa_suppgid = -1;
+
+ rec->sa_fid = op_data->op_fid1;
+ rec->sa_valid = attr_pack(op_data->op_attr.ia_valid);
+ rec->sa_mode = op_data->op_attr.ia_mode;
+ rec->sa_uid = op_data->op_attr.ia_uid;
+ rec->sa_gid = op_data->op_attr.ia_gid;
+ rec->sa_size = op_data->op_attr.ia_size;
+ rec->sa_blocks = op_data->op_attr_blocks;
+ rec->sa_atime = LTIME_S(op_data->op_attr.ia_atime);
+ rec->sa_mtime = LTIME_S(op_data->op_attr.ia_mtime);
+ rec->sa_ctime = LTIME_S(op_data->op_attr.ia_ctime);
+ rec->sa_attr_flags = ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags;
+ if ((op_data->op_attr.ia_valid & ATTR_GID) &&
+ current_is_in_group(op_data->op_attr.ia_gid))
+ rec->sa_suppgid = op_data->op_attr.ia_gid;
+ else
+ rec->sa_suppgid = op_data->op_suppgids[0];
+
+ rec->sa_bias = op_data->op_bias;
+}
+
+static void mdc_ioepoch_pack(struct mdt_ioepoch *epoch,
+ struct md_op_data *op_data)
+{
+ memcpy(&epoch->handle, &op_data->op_handle, sizeof(epoch->handle));
+ epoch->ioepoch = op_data->op_ioepoch;
+ epoch->flags = op_data->op_flags & MF_SOM_LOCAL_FLAGS;
+}
+
+void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
+ void *ea, int ealen, void *ea2, int ea2len)
+{
+ struct mdt_rec_setattr *rec;
+ struct mdt_ioepoch *epoch;
+ struct lov_user_md *lum = NULL;
+
+ CLASSERT(sizeof(struct mdt_rec_reint) ==sizeof(struct mdt_rec_setattr));
+ rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
+ mdc_setattr_pack_rec(rec, op_data);
+
+ mdc_pack_capa(req, &RMF_CAPA1, op_data->op_capa1);
+
+ if (op_data->op_flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) {
+ epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
+ mdc_ioepoch_pack(epoch, op_data);
+ }
+
+ if (ealen == 0)
+ return;
+
+ lum = req_capsule_client_get(&req->rq_pill, &RMF_EADATA);
+ if (ea == NULL) { /* Remove LOV EA */
+ lum->lmm_magic = LOV_USER_MAGIC_V1;
+ lum->lmm_stripe_size = 0;
+ lum->lmm_stripe_count = 0;
+ lum->lmm_stripe_offset = (typeof(lum->lmm_stripe_offset))(-1);
+ } else {
+ memcpy(lum, ea, ealen);
+ }
+
+ if (ea2len == 0)
+ return;
+
+ memcpy(req_capsule_client_get(&req->rq_pill, &RMF_LOGCOOKIES), ea2,
+ ea2len);
+}
+
+void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
+{
+ struct mdt_rec_unlink *rec;
+ char *tmp;
+
+ CLASSERT(sizeof(struct mdt_rec_reint) == sizeof(struct mdt_rec_unlink));
+ rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
+ LASSERT(rec != NULL);
+
+ rec->ul_opcode = op_data->op_cli_flags & CLI_RM_ENTRY ?
+ REINT_RMENTRY : REINT_UNLINK;
+ rec->ul_fsuid = op_data->op_fsuid;
+ rec->ul_fsgid = op_data->op_fsgid;
+ rec->ul_cap = op_data->op_cap;
+ rec->ul_mode = op_data->op_mode;
+ rec->ul_suppgid1= op_data->op_suppgids[0];
+ rec->ul_suppgid2= -1;
+ rec->ul_fid1 = op_data->op_fid1;
+ rec->ul_fid2 = op_data->op_fid2;
+ rec->ul_time = op_data->op_mod_time;
+ rec->ul_bias = op_data->op_bias;
+
+ mdc_pack_capa(req, &RMF_CAPA1, op_data->op_capa1);
+
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
+ LASSERT(tmp != NULL);
+ LOGL0(op_data->op_name, op_data->op_namelen, tmp);
+}
+
+void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
+{
+ struct mdt_rec_link *rec;
+ char *tmp;
+
+ CLASSERT(sizeof(struct mdt_rec_reint) == sizeof(struct mdt_rec_link));
+ rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
+ LASSERT (rec != NULL);
+
+ rec->lk_opcode = REINT_LINK;
+ rec->lk_fsuid = op_data->op_fsuid;//current->fsuid;
+ rec->lk_fsgid = op_data->op_fsgid;//current->fsgid;
+ rec->lk_cap = op_data->op_cap;//current->cap_effective;
+ rec->lk_suppgid1 = op_data->op_suppgids[0];
+ rec->lk_suppgid2 = op_data->op_suppgids[1];
+ rec->lk_fid1 = op_data->op_fid1;
+ rec->lk_fid2 = op_data->op_fid2;
+ rec->lk_time = op_data->op_mod_time;
+ rec->lk_bias = op_data->op_bias;
+
+ mdc_pack_capa(req, &RMF_CAPA1, op_data->op_capa1);
+ mdc_pack_capa(req, &RMF_CAPA2, op_data->op_capa2);
+
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
+ LOGL0(op_data->op_name, op_data->op_namelen, tmp);
+}
+
+void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
+ const char *old, int oldlen, const char *new, int newlen)
+{
+ struct mdt_rec_rename *rec;
+ char *tmp;
+
+ CLASSERT(sizeof(struct mdt_rec_reint) == sizeof(struct mdt_rec_rename));
+ rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
+
+ /* XXX do something about time, uid, gid */
+ rec->rn_opcode = REINT_RENAME;
+ rec->rn_fsuid = op_data->op_fsuid;
+ rec->rn_fsgid = op_data->op_fsgid;
+ rec->rn_cap = op_data->op_cap;
+ rec->rn_suppgid1 = op_data->op_suppgids[0];
+ rec->rn_suppgid2 = op_data->op_suppgids[1];
+ rec->rn_fid1 = op_data->op_fid1;
+ rec->rn_fid2 = op_data->op_fid2;
+ rec->rn_time = op_data->op_mod_time;
+ rec->rn_mode = op_data->op_mode;
+ rec->rn_bias = op_data->op_bias;
+
+ mdc_pack_capa(req, &RMF_CAPA1, op_data->op_capa1);
+ mdc_pack_capa(req, &RMF_CAPA2, op_data->op_capa2);
+
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
+ LOGL0(old, oldlen, tmp);
+
+ if (new) {
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_SYMTGT);
+ LOGL0(new, newlen, tmp);
+ }
+}
+
+void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, int flags,
+ struct md_op_data *op_data, int ea_size)
+{
+ struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
+ &RMF_MDT_BODY);
+
+ b->valid = valid;
+ if (op_data->op_bias & MDS_CHECK_SPLIT)
+ b->valid |= OBD_MD_FLCKSPLIT;
+ if (op_data->op_bias & MDS_CROSS_REF)
+ b->valid |= OBD_MD_FLCROSSREF;
+ b->eadatasize = ea_size;
+ b->flags = flags;
+ __mdc_pack_body(b, op_data->op_suppgids[0]);
+
+ b->fid1 = op_data->op_fid1;
+ b->fid2 = op_data->op_fid2;
+ b->valid |= OBD_MD_FLID;
+
+ mdc_pack_capa(req, &RMF_CAPA1, op_data->op_capa1);
+
+ if (op_data->op_name) {
+ char *tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
+ LOGL0(op_data->op_name, op_data->op_namelen, tmp);
+
+ }
+}
+
+void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
+{
+ struct mdt_ioepoch *epoch;
+ struct mdt_rec_setattr *rec;
+
+ epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
+ rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
+
+ mdc_setattr_pack_rec(rec, op_data);
+ mdc_pack_capa(req, &RMF_CAPA1, op_data->op_capa1);
+ mdc_ioepoch_pack(epoch, op_data);
+}
+
+static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
+{
+ int rc;
+ ENTRY;
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ rc = list_empty(&mcw->mcw_entry);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ RETURN(rc);
+};
+
+/* We record requests in flight in cli->cl_r_in_flight here.
+ * There is only one write rpc possible in mdc anyway. If this to change
+ * in the future - the code may need to be revisited. */
+int mdc_enter_request(struct client_obd *cli)
+{
+ int rc = 0;
+ struct mdc_cache_waiter mcw;
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
+ list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
+ init_waitqueue_head(&mcw.mcw_waitq);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ rc = l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw), &lwi);
+ if (rc) {
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ if (list_empty(&mcw.mcw_entry))
+ cli->cl_r_in_flight--;
+ list_del_init(&mcw.mcw_entry);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ }
+ } else {
+ cli->cl_r_in_flight++;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ }
+ return rc;
+}
+
+void mdc_exit_request(struct client_obd *cli)
+{
+ struct list_head *l, *tmp;
+ struct mdc_cache_waiter *mcw;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ cli->cl_r_in_flight--;
+ list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
+ if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
+ /* No free request slots anymore */
+ break;
+ }
+
+ mcw = list_entry(l, struct mdc_cache_waiter, mcw_entry);
+ list_del_init(&mcw->mcw_entry);
+ cli->cl_r_in_flight++;
+ wake_up(&mcw->mcw_waitq);
+ }
+ /* Empty waiting list? Decrease reqs in-flight number */
+
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
new file mode 100644
index 000000000000..1cc90b635fb5
--- /dev/null
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -0,0 +1,1229 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_MDC
+
+# include <linux/module.h>
+# include <linux/pagemap.h>
+# include <linux/miscdevice.h>
+# include <linux/init.h>
+
+#include <lustre_acl.h>
+#include <obd_class.h>
+#include <lustre_dlm.h>
+/* fid_res_name_eq() */
+#include <lustre_fid.h>
+#include <lprocfs_status.h>
+#include "mdc_internal.h"
+
+struct mdc_getattr_args {
+ struct obd_export *ga_exp;
+ struct md_enqueue_info *ga_minfo;
+ struct ldlm_enqueue_info *ga_einfo;
+};
+
+int it_disposition(struct lookup_intent *it, int flag)
+{
+ return it->d.lustre.it_disposition & flag;
+}
+EXPORT_SYMBOL(it_disposition);
+
+void it_set_disposition(struct lookup_intent *it, int flag)
+{
+ it->d.lustre.it_disposition |= flag;
+}
+EXPORT_SYMBOL(it_set_disposition);
+
+void it_clear_disposition(struct lookup_intent *it, int flag)
+{
+ it->d.lustre.it_disposition &= ~flag;
+}
+EXPORT_SYMBOL(it_clear_disposition);
+
+int it_open_error(int phase, struct lookup_intent *it)
+{
+ if (it_disposition(it, DISP_OPEN_OPEN)) {
+ if (phase >= DISP_OPEN_OPEN)
+ return it->d.lustre.it_status;
+ else
+ return 0;
+ }
+
+ if (it_disposition(it, DISP_OPEN_CREATE)) {
+ if (phase >= DISP_OPEN_CREATE)
+ return it->d.lustre.it_status;
+ else
+ return 0;
+ }
+
+ if (it_disposition(it, DISP_LOOKUP_EXECD)) {
+ if (phase >= DISP_LOOKUP_EXECD)
+ return it->d.lustre.it_status;
+ else
+ return 0;
+ }
+
+ if (it_disposition(it, DISP_IT_EXECD)) {
+ if (phase >= DISP_IT_EXECD)
+ return it->d.lustre.it_status;
+ else
+ return 0;
+ }
+ CERROR("it disp: %X, status: %d\n", it->d.lustre.it_disposition,
+ it->d.lustre.it_status);
+ LBUG();
+ return 0;
+}
+EXPORT_SYMBOL(it_open_error);
+
+/* this must be called on a lockh that is known to have a referenced lock */
+int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
+ __u64 *bits)
+{
+ struct ldlm_lock *lock;
+ struct inode *new_inode = data;
+ ENTRY;
+
+ if(bits)
+ *bits = 0;
+
+ if (!*lockh)
+ RETURN(0);
+
+ lock = ldlm_handle2lock((struct lustre_handle *)lockh);
+
+ LASSERT(lock != NULL);
+ lock_res_and_lock(lock);
+ if (lock->l_resource->lr_lvb_inode &&
+ lock->l_resource->lr_lvb_inode != data) {
+ struct inode *old_inode = lock->l_resource->lr_lvb_inode;
+ LASSERTF(old_inode->i_state & I_FREEING,
+ "Found existing inode %p/%lu/%u state %lu in lock: "
+ "setting data to %p/%lu/%u\n", old_inode,
+ old_inode->i_ino, old_inode->i_generation,
+ old_inode->i_state,
+ new_inode, new_inode->i_ino, new_inode->i_generation);
+ }
+ lock->l_resource->lr_lvb_inode = new_inode;
+ if (bits)
+ *bits = lock->l_policy_data.l_inodebits.bits;
+
+ unlock_res_and_lock(lock);
+ LDLM_LOCK_PUT(lock);
+
+ RETURN(0);
+}
+
+ldlm_mode_t mdc_lock_match(struct obd_export *exp, __u64 flags,
+ const struct lu_fid *fid, ldlm_type_t type,
+ ldlm_policy_data_t *policy, ldlm_mode_t mode,
+ struct lustre_handle *lockh)
+{
+ struct ldlm_res_id res_id;
+ ldlm_mode_t rc;
+ ENTRY;
+
+ fid_build_reg_res_name(fid, &res_id);
+ rc = ldlm_lock_match(class_exp2obd(exp)->obd_namespace, flags,
+ &res_id, type, policy, mode, lockh, 0);
+ RETURN(rc);
+}
+
+int mdc_cancel_unused(struct obd_export *exp,
+ const struct lu_fid *fid,
+ ldlm_policy_data_t *policy,
+ ldlm_mode_t mode,
+ ldlm_cancel_flags_t flags,
+ void *opaque)
+{
+ struct ldlm_res_id res_id;
+ struct obd_device *obd = class_exp2obd(exp);
+ int rc;
+
+ ENTRY;
+
+ fid_build_reg_res_name(fid, &res_id);
+ rc = ldlm_cli_cancel_unused_resource(obd->obd_namespace, &res_id,
+ policy, mode, flags, opaque);
+ RETURN(rc);
+}
+
+int mdc_null_inode(struct obd_export *exp,
+ const struct lu_fid *fid)
+{
+ struct ldlm_res_id res_id;
+ struct ldlm_resource *res;
+ struct ldlm_namespace *ns = class_exp2obd(exp)->obd_namespace;
+ ENTRY;
+
+ LASSERTF(ns != NULL, "no namespace passed\n");
+
+ fid_build_reg_res_name(fid, &res_id);
+
+ res = ldlm_resource_get(ns, NULL, &res_id, 0, 0);
+ if(res == NULL)
+ RETURN(0);
+
+ lock_res(res);
+ res->lr_lvb_inode = NULL;
+ unlock_res(res);
+
+ ldlm_resource_putref(res);
+ RETURN(0);
+}
+
+/* find any ldlm lock of the inode in mdc
+ * return 0 not find
+ * 1 find one
+ * < 0 error */
+int mdc_find_cbdata(struct obd_export *exp,
+ const struct lu_fid *fid,
+ ldlm_iterator_t it, void *data)
+{
+ struct ldlm_res_id res_id;
+ int rc = 0;
+ ENTRY;
+
+ fid_build_reg_res_name((struct lu_fid*)fid, &res_id);
+ rc = ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace, &res_id,
+ it, data);
+ if (rc == LDLM_ITER_STOP)
+ RETURN(1);
+ else if (rc == LDLM_ITER_CONTINUE)
+ RETURN(0);
+ RETURN(rc);
+}
+
+static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc)
+{
+ /* Don't hold error requests for replay. */
+ if (req->rq_replay) {
+ spin_lock(&req->rq_lock);
+ req->rq_replay = 0;
+ spin_unlock(&req->rq_lock);
+ }
+ if (rc && req->rq_transno != 0) {
+ DEBUG_REQ(D_ERROR, req, "transno returned on error rc %d", rc);
+ LBUG();
+ }
+}
+
+/* Save a large LOV EA into the request buffer so that it is available
+ * for replay. We don't do this in the initial request because the
+ * original request doesn't need this buffer (at most it sends just the
+ * lov_mds_md) and it is a waste of RAM/bandwidth to send the empty
+ * buffer and may also be difficult to allocate and save a very large
+ * request buffer for each open. (bug 5707)
+ *
+ * OOM here may cause recovery failure if lmm is needed (only for the
+ * original open if the MDS crashed just when this client also OOM'd)
+ * but this is incredibly unlikely, and questionable whether the client
+ * could do MDS recovery under OOM anyways... */
+static void mdc_realloc_openmsg(struct ptlrpc_request *req,
+ struct mdt_body *body)
+{
+ int rc;
+
+ /* FIXME: remove this explicit offset. */
+ rc = sptlrpc_cli_enlarge_reqbuf(req, DLM_INTENT_REC_OFF + 4,
+ body->eadatasize);
+ if (rc) {
+ CERROR("Can't enlarge segment %d size to %d\n",
+ DLM_INTENT_REC_OFF + 4, body->eadatasize);
+ body->valid &= ~OBD_MD_FLEASIZE;
+ body->eadatasize = 0;
+ }
+}
+
+static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp,
+ struct lookup_intent *it,
+ struct md_op_data *op_data,
+ void *lmm, int lmmsize,
+ void *cb_data)
+{
+ struct ptlrpc_request *req;
+ struct obd_device *obddev = class_exp2obd(exp);
+ struct ldlm_intent *lit;
+ LIST_HEAD(cancels);
+ int count = 0;
+ int mode;
+ int rc;
+ ENTRY;
+
+ it->it_create_mode = (it->it_create_mode & ~S_IFMT) | S_IFREG;
+
+ /* XXX: openlock is not cancelled for cross-refs. */
+ /* If inode is known, cancel conflicting OPEN locks. */
+ if (fid_is_sane(&op_data->op_fid2)) {
+ if (it->it_flags & (FMODE_WRITE|MDS_OPEN_TRUNC))
+ mode = LCK_CW;
+#ifdef FMODE_EXEC
+ else if (it->it_flags & FMODE_EXEC)
+ mode = LCK_PR;
+#endif
+ else
+ mode = LCK_CR;
+ count = mdc_resource_get_unused(exp, &op_data->op_fid2,
+ &cancels, mode,
+ MDS_INODELOCK_OPEN);
+ }
+
+ /* If CREATE, cancel parent's UPDATE lock. */
+ if (it->it_op & IT_CREAT)
+ mode = LCK_EX;
+ else
+ mode = LCK_CR;
+ count += mdc_resource_get_unused(exp, &op_data->op_fid1,
+ &cancels, mode,
+ MDS_INODELOCK_UPDATE);
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_LDLM_INTENT_OPEN);
+ if (req == NULL) {
+ ldlm_lock_list_put(&cancels, l_bl_ast, count);
+ RETURN(ERR_PTR(-ENOMEM));
+ }
+
+ /* parent capability */
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+ /* child capability, reserve the size according to parent capa, it will
+ * be filled after we get the reply */
+ mdc_set_capa_size(req, &RMF_CAPA2, op_data->op_capa1);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
+ op_data->op_namelen + 1);
+ req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT,
+ max(lmmsize, obddev->u.cli.cl_default_mds_easize));
+
+ rc = ldlm_prep_enqueue_req(exp, req, &cancels, count);
+ if (rc) {
+ ptlrpc_request_free(req);
+ return NULL;
+ }
+
+ spin_lock(&req->rq_lock);
+ req->rq_replay = req->rq_import->imp_replayable;
+ spin_unlock(&req->rq_lock);
+
+ /* pack the intent */
+ lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
+ lit->opc = (__u64)it->it_op;
+
+ /* pack the intended request */
+ mdc_open_pack(req, op_data, it->it_create_mode, 0, it->it_flags, lmm,
+ lmmsize);
+
+ /* for remote client, fetch remote perm for current user */
+ if (client_is_remote(exp))
+ req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
+ sizeof(struct mdt_remote_perm));
+ ptlrpc_request_set_replen(req);
+ return req;
+}
+
+static struct ptlrpc_request *mdc_intent_unlink_pack(struct obd_export *exp,
+ struct lookup_intent *it,
+ struct md_op_data *op_data)
+{
+ struct ptlrpc_request *req;
+ struct obd_device *obddev = class_exp2obd(exp);
+ struct ldlm_intent *lit;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_LDLM_INTENT_UNLINK);
+ if (req == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+ req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
+ op_data->op_namelen + 1);
+
+ rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(ERR_PTR(rc));
+ }
+
+ /* pack the intent */
+ lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
+ lit->opc = (__u64)it->it_op;
+
+ /* pack the intended request */
+ mdc_unlink_pack(req, op_data);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
+ obddev->u.cli.cl_max_mds_easize);
+ req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
+ obddev->u.cli.cl_max_mds_cookiesize);
+ ptlrpc_request_set_replen(req);
+ RETURN(req);
+}
+
+static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp,
+ struct lookup_intent *it,
+ struct md_op_data *op_data)
+{
+ struct ptlrpc_request *req;
+ struct obd_device *obddev = class_exp2obd(exp);
+ obd_valid valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE |
+ OBD_MD_FLMODEASIZE | OBD_MD_FLDIREA |
+ OBD_MD_FLMDSCAPA | OBD_MD_MEA |
+ (client_is_remote(exp) ?
+ OBD_MD_FLRMTPERM : OBD_MD_FLACL);
+ struct ldlm_intent *lit;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_LDLM_INTENT_GETATTR);
+ if (req == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+ req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
+ op_data->op_namelen + 1);
+
+ rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(ERR_PTR(rc));
+ }
+
+ /* pack the intent */
+ lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
+ lit->opc = (__u64)it->it_op;
+
+ /* pack the intended request */
+ mdc_getattr_pack(req, valid, it->it_flags, op_data,
+ obddev->u.cli.cl_max_mds_easize);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
+ obddev->u.cli.cl_max_mds_easize);
+ if (client_is_remote(exp))
+ req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
+ sizeof(struct mdt_remote_perm));
+ ptlrpc_request_set_replen(req);
+ RETURN(req);
+}
+
+static struct ptlrpc_request *mdc_intent_layout_pack(struct obd_export *exp,
+ struct lookup_intent *it,
+ struct md_op_data *unused)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct ptlrpc_request *req;
+ struct ldlm_intent *lit;
+ struct layout_intent *layout;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_LDLM_INTENT_LAYOUT);
+ if (req == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, 0);
+ rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(ERR_PTR(rc));
+ }
+
+ /* pack the intent */
+ lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT);
+ lit->opc = (__u64)it->it_op;
+
+ /* pack the layout intent request */
+ layout = req_capsule_client_get(&req->rq_pill, &RMF_LAYOUT_INTENT);
+ /* LAYOUT_INTENT_ACCESS is generic, specific operation will be
+ * set for replication */
+ layout->li_opc = LAYOUT_INTENT_ACCESS;
+
+ req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
+ obd->u.cli.cl_max_mds_easize);
+ ptlrpc_request_set_replen(req);
+ RETURN(req);
+}
+
+static struct ptlrpc_request *
+mdc_enqueue_pack(struct obd_export *exp, int lvb_len)
+{
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LDLM_ENQUEUE);
+ if (req == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(ERR_PTR(rc));
+ }
+
+ req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, lvb_len);
+ ptlrpc_request_set_replen(req);
+ RETURN(req);
+}
+
+static int mdc_finish_enqueue(struct obd_export *exp,
+ struct ptlrpc_request *req,
+ struct ldlm_enqueue_info *einfo,
+ struct lookup_intent *it,
+ struct lustre_handle *lockh,
+ int rc)
+{
+ struct req_capsule *pill = &req->rq_pill;
+ struct ldlm_request *lockreq;
+ struct ldlm_reply *lockrep;
+ struct lustre_intent_data *intent = &it->d.lustre;
+ struct ldlm_lock *lock;
+ void *lvb_data = NULL;
+ int lvb_len = 0;
+ ENTRY;
+
+ LASSERT(rc >= 0);
+ /* Similarly, if we're going to replay this request, we don't want to
+ * actually get a lock, just perform the intent. */
+ if (req->rq_transno || req->rq_replay) {
+ lockreq = req_capsule_client_get(pill, &RMF_DLM_REQ);
+ lockreq->lock_flags |= ldlm_flags_to_wire(LDLM_FL_INTENT_ONLY);
+ }
+
+ if (rc == ELDLM_LOCK_ABORTED) {
+ einfo->ei_mode = 0;
+ memset(lockh, 0, sizeof(*lockh));
+ rc = 0;
+ } else { /* rc = 0 */
+ lock = ldlm_handle2lock(lockh);
+ LASSERT(lock != NULL);
+
+ /* If the server gave us back a different lock mode, we should
+ * fix up our variables. */
+ if (lock->l_req_mode != einfo->ei_mode) {
+ ldlm_lock_addref(lockh, lock->l_req_mode);
+ ldlm_lock_decref(lockh, einfo->ei_mode);
+ einfo->ei_mode = lock->l_req_mode;
+ }
+ LDLM_LOCK_PUT(lock);
+ }
+
+ lockrep = req_capsule_server_get(pill, &RMF_DLM_REP);
+ LASSERT(lockrep != NULL); /* checked by ldlm_cli_enqueue() */
+
+ intent->it_disposition = (int)lockrep->lock_policy_res1;
+ intent->it_status = (int)lockrep->lock_policy_res2;
+ intent->it_lock_mode = einfo->ei_mode;
+ intent->it_lock_handle = lockh->cookie;
+ intent->it_data = req;
+
+ /* Technically speaking rq_transno must already be zero if
+ * it_status is in error, so the check is a bit redundant */
+ if ((!req->rq_transno || intent->it_status < 0) && req->rq_replay)
+ mdc_clear_replay_flag(req, intent->it_status);
+
+ /* If we're doing an IT_OPEN which did not result in an actual
+ * successful open, then we need to remove the bit which saves
+ * this request for unconditional replay.
+ *
+ * It's important that we do this first! Otherwise we might exit the
+ * function without doing so, and try to replay a failed create
+ * (bug 3440) */
+ if (it->it_op & IT_OPEN && req->rq_replay &&
+ (!it_disposition(it, DISP_OPEN_OPEN) ||intent->it_status != 0))
+ mdc_clear_replay_flag(req, intent->it_status);
+
+ DEBUG_REQ(D_RPCTRACE, req, "op: %d disposition: %x, status: %d",
+ it->it_op, intent->it_disposition, intent->it_status);
+
+ /* We know what to expect, so we do any byte flipping required here */
+ if (it->it_op & (IT_OPEN | IT_UNLINK | IT_LOOKUP | IT_GETATTR)) {
+ struct mdt_body *body;
+
+ body = req_capsule_server_get(pill, &RMF_MDT_BODY);
+ if (body == NULL) {
+ CERROR ("Can't swab mdt_body\n");
+ RETURN (-EPROTO);
+ }
+
+ if (it_disposition(it, DISP_OPEN_OPEN) &&
+ !it_open_error(DISP_OPEN_OPEN, it)) {
+ /*
+ * If this is a successful OPEN request, we need to set
+ * replay handler and data early, so that if replay
+ * happens immediately after swabbing below, new reply
+ * is swabbed by that handler correctly.
+ */
+ mdc_set_open_replay_data(NULL, NULL, req);
+ }
+
+ if ((body->valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE)) != 0) {
+ void *eadata;
+
+ mdc_update_max_ea_from_body(exp, body);
+
+ /*
+ * The eadata is opaque; just check that it is there.
+ * Eventually, obd_unpackmd() will check the contents.
+ */
+ eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
+ body->eadatasize);
+ if (eadata == NULL)
+ RETURN(-EPROTO);
+
+ /* save lvb data and length in case this is for layout
+ * lock */
+ lvb_data = eadata;
+ lvb_len = body->eadatasize;
+
+ /*
+ * We save the reply LOV EA in case we have to replay a
+ * create for recovery. If we didn't allocate a large
+ * enough request buffer above we need to reallocate it
+ * here to hold the actual LOV EA.
+ *
+ * To not save LOV EA if request is not going to replay
+ * (for example error one).
+ */
+ if ((it->it_op & IT_OPEN) && req->rq_replay) {
+ void *lmm;
+ if (req_capsule_get_size(pill, &RMF_EADATA,
+ RCL_CLIENT) <
+ body->eadatasize)
+ mdc_realloc_openmsg(req, body);
+ else
+ req_capsule_shrink(pill, &RMF_EADATA,
+ body->eadatasize,
+ RCL_CLIENT);
+
+ req_capsule_set_size(pill, &RMF_EADATA,
+ RCL_CLIENT,
+ body->eadatasize);
+
+ lmm = req_capsule_client_get(pill, &RMF_EADATA);
+ if (lmm)
+ memcpy(lmm, eadata, body->eadatasize);
+ }
+ }
+
+ if (body->valid & OBD_MD_FLRMTPERM) {
+ struct mdt_remote_perm *perm;
+
+ LASSERT(client_is_remote(exp));
+ perm = req_capsule_server_swab_get(pill, &RMF_ACL,
+ lustre_swab_mdt_remote_perm);
+ if (perm == NULL)
+ RETURN(-EPROTO);
+ }
+ if (body->valid & OBD_MD_FLMDSCAPA) {
+ struct lustre_capa *capa, *p;
+
+ capa = req_capsule_server_get(pill, &RMF_CAPA1);
+ if (capa == NULL)
+ RETURN(-EPROTO);
+
+ if (it->it_op & IT_OPEN) {
+ /* client fid capa will be checked in replay */
+ p = req_capsule_client_get(pill, &RMF_CAPA2);
+ LASSERT(p);
+ *p = *capa;
+ }
+ }
+ if (body->valid & OBD_MD_FLOSSCAPA) {
+ struct lustre_capa *capa;
+
+ capa = req_capsule_server_get(pill, &RMF_CAPA2);
+ if (capa == NULL)
+ RETURN(-EPROTO);
+ }
+ } else if (it->it_op & IT_LAYOUT) {
+ /* maybe the lock was granted right away and layout
+ * is packed into RMF_DLM_LVB of req */
+ lvb_len = req_capsule_get_size(pill, &RMF_DLM_LVB, RCL_SERVER);
+ if (lvb_len > 0) {
+ lvb_data = req_capsule_server_sized_get(pill,
+ &RMF_DLM_LVB, lvb_len);
+ if (lvb_data == NULL)
+ RETURN(-EPROTO);
+ }
+ }
+
+ /* fill in stripe data for layout lock */
+ lock = ldlm_handle2lock(lockh);
+ if (lock != NULL && ldlm_has_layout(lock) && lvb_data != NULL) {
+ void *lmm;
+
+ LDLM_DEBUG(lock, "layout lock returned by: %s, lvb_len: %d\n",
+ ldlm_it2str(it->it_op), lvb_len);
+
+ OBD_ALLOC_LARGE(lmm, lvb_len);
+ if (lmm == NULL) {
+ LDLM_LOCK_PUT(lock);
+ RETURN(-ENOMEM);
+ }
+ memcpy(lmm, lvb_data, lvb_len);
+
+ /* install lvb_data */
+ lock_res_and_lock(lock);
+ if (lock->l_lvb_data == NULL) {
+ lock->l_lvb_data = lmm;
+ lock->l_lvb_len = lvb_len;
+ lmm = NULL;
+ }
+ unlock_res_and_lock(lock);
+ if (lmm != NULL)
+ OBD_FREE_LARGE(lmm, lvb_len);
+ }
+ if (lock != NULL)
+ LDLM_LOCK_PUT(lock);
+
+ RETURN(rc);
+}
+
+/* We always reserve enough space in the reply packet for a stripe MD, because
+ * we don't know in advance the file type. */
+int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
+ struct lookup_intent *it, struct md_op_data *op_data,
+ struct lustre_handle *lockh, void *lmm, int lmmsize,
+ struct ptlrpc_request **reqp, __u64 extra_lock_flags)
+{
+ struct obd_device *obddev = class_exp2obd(exp);
+ struct ptlrpc_request *req = NULL;
+ __u64 flags, saved_flags = extra_lock_flags;
+ int rc;
+ struct ldlm_res_id res_id;
+ static const ldlm_policy_data_t lookup_policy =
+ { .l_inodebits = { MDS_INODELOCK_LOOKUP } };
+ static const ldlm_policy_data_t update_policy =
+ { .l_inodebits = { MDS_INODELOCK_UPDATE } };
+ static const ldlm_policy_data_t layout_policy =
+ { .l_inodebits = { MDS_INODELOCK_LAYOUT } };
+ ldlm_policy_data_t const *policy = &lookup_policy;
+ int generation, resends = 0;
+ struct ldlm_reply *lockrep;
+ enum lvb_type lvb_type = 0;
+ ENTRY;
+
+ LASSERTF(!it || einfo->ei_type == LDLM_IBITS, "lock type %d\n",
+ einfo->ei_type);
+
+ fid_build_reg_res_name(&op_data->op_fid1, &res_id);
+
+ if (it) {
+ saved_flags |= LDLM_FL_HAS_INTENT;
+ if (it->it_op & (IT_UNLINK | IT_GETATTR | IT_READDIR))
+ policy = &update_policy;
+ else if (it->it_op & IT_LAYOUT)
+ policy = &layout_policy;
+ }
+
+ LASSERT(reqp == NULL);
+
+ generation = obddev->u.cli.cl_import->imp_generation;
+resend:
+ flags = saved_flags;
+ if (!it) {
+ /* The only way right now is FLOCK, in this case we hide flock
+ policy as lmm, but lmmsize is 0 */
+ LASSERT(lmm && lmmsize == 0);
+ LASSERTF(einfo->ei_type == LDLM_FLOCK, "lock type %d\n",
+ einfo->ei_type);
+ policy = (ldlm_policy_data_t *)lmm;
+ res_id.name[3] = LDLM_FLOCK;
+ } else if (it->it_op & IT_OPEN) {
+ req = mdc_intent_open_pack(exp, it, op_data, lmm, lmmsize,
+ einfo->ei_cbdata);
+ policy = &update_policy;
+ einfo->ei_cbdata = NULL;
+ lmm = NULL;
+ } else if (it->it_op & IT_UNLINK) {
+ req = mdc_intent_unlink_pack(exp, it, op_data);
+ } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
+ req = mdc_intent_getattr_pack(exp, it, op_data);
+ } else if (it->it_op & IT_READDIR) {
+ req = mdc_enqueue_pack(exp, 0);
+ } else if (it->it_op & IT_LAYOUT) {
+ if (!imp_connect_lvb_type(class_exp2cliimp(exp)))
+ RETURN(-EOPNOTSUPP);
+
+ req = mdc_intent_layout_pack(exp, it, op_data);
+ lvb_type = LVB_T_LAYOUT;
+ } else {
+ LBUG();
+ RETURN(-EINVAL);
+ }
+
+ if (IS_ERR(req))
+ RETURN(PTR_ERR(req));
+
+ if (req != NULL && it && it->it_op & IT_CREAT)
+ /* ask ptlrpc not to resend on EINPROGRESS since we have our own
+ * retry logic */
+ req->rq_no_retry_einprogress = 1;
+
+ if (resends) {
+ req->rq_generation_set = 1;
+ req->rq_import_generation = generation;
+ req->rq_sent = cfs_time_current_sec() + resends;
+ }
+
+ /* It is important to obtain rpc_lock first (if applicable), so that
+ * threads that are serialised with rpc_lock are not polluting our
+ * rpcs in flight counter. We do not do flock request limiting, though*/
+ if (it) {
+ mdc_get_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
+ rc = mdc_enter_request(&obddev->u.cli);
+ if (rc != 0) {
+ mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
+ mdc_clear_replay_flag(req, 0);
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+ }
+ }
+
+ rc = ldlm_cli_enqueue(exp, &req, einfo, &res_id, policy, &flags, NULL,
+ 0, lvb_type, lockh, 0);
+ if (!it) {
+ /* For flock requests we immediatelly return without further
+ delay and let caller deal with the rest, since rest of
+ this function metadata processing makes no sense for flock
+ requests anyway */
+ RETURN(rc);
+ }
+
+ mdc_exit_request(&obddev->u.cli);
+ mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
+
+ if (rc < 0) {
+ CERROR("ldlm_cli_enqueue: %d\n", rc);
+ mdc_clear_replay_flag(req, rc);
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+ }
+
+ lockrep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
+ LASSERT(lockrep != NULL);
+
+ /* Retry the create infinitely when we get -EINPROGRESS from
+ * server. This is required by the new quota design. */
+ if (it && it->it_op & IT_CREAT &&
+ (int)lockrep->lock_policy_res2 == -EINPROGRESS) {
+ mdc_clear_replay_flag(req, rc);
+ ptlrpc_req_finished(req);
+ resends++;
+
+ CDEBUG(D_HA, "%s: resend:%d op:%d "DFID"/"DFID"\n",
+ obddev->obd_name, resends, it->it_op,
+ PFID(&op_data->op_fid1), PFID(&op_data->op_fid2));
+
+ if (generation == obddev->u.cli.cl_import->imp_generation) {
+ goto resend;
+ } else {
+ CDEBUG(D_HA, "resend cross eviction\n");
+ RETURN(-EIO);
+ }
+ }
+
+ rc = mdc_finish_enqueue(exp, req, einfo, it, lockh, rc);
+ if (rc < 0) {
+ if (lustre_handle_is_used(lockh)) {
+ ldlm_lock_decref(lockh, einfo->ei_mode);
+ memset(lockh, 0, sizeof(*lockh));
+ }
+ ptlrpc_req_finished(req);
+ }
+ RETURN(rc);
+}
+
+static int mdc_finish_intent_lock(struct obd_export *exp,
+ struct ptlrpc_request *request,
+ struct md_op_data *op_data,
+ struct lookup_intent *it,
+ struct lustre_handle *lockh)
+{
+ struct lustre_handle old_lock;
+ struct mdt_body *mdt_body;
+ struct ldlm_lock *lock;
+ int rc;
+
+
+ LASSERT(request != NULL);
+ LASSERT(request != LP_POISON);
+ LASSERT(request->rq_repmsg != LP_POISON);
+
+ if (!it_disposition(it, DISP_IT_EXECD)) {
+ /* The server failed before it even started executing the
+ * intent, i.e. because it couldn't unpack the request. */
+ LASSERT(it->d.lustre.it_status != 0);
+ RETURN(it->d.lustre.it_status);
+ }
+ rc = it_open_error(DISP_IT_EXECD, it);
+ if (rc)
+ RETURN(rc);
+
+ mdt_body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
+ LASSERT(mdt_body != NULL); /* mdc_enqueue checked */
+
+ /* If we were revalidating a fid/name pair, mark the intent in
+ * case we fail and get called again from lookup */
+ if (fid_is_sane(&op_data->op_fid2) &&
+ it->it_create_mode & M_CHECK_STALE &&
+ it->it_op != IT_GETATTR) {
+ it_set_disposition(it, DISP_ENQ_COMPLETE);
+
+ /* Also: did we find the same inode? */
+ /* sever can return one of two fids:
+ * op_fid2 - new allocated fid - if file is created.
+ * op_fid3 - existent fid - if file only open.
+ * op_fid3 is saved in lmv_intent_open */
+ if ((!lu_fid_eq(&op_data->op_fid2, &mdt_body->fid1)) &&
+ (!lu_fid_eq(&op_data->op_fid3, &mdt_body->fid1))) {
+ CDEBUG(D_DENTRY, "Found stale data "DFID"("DFID")/"DFID
+ "\n", PFID(&op_data->op_fid2),
+ PFID(&op_data->op_fid2), PFID(&mdt_body->fid1));
+ RETURN(-ESTALE);
+ }
+ }
+
+ rc = it_open_error(DISP_LOOKUP_EXECD, it);
+ if (rc)
+ RETURN(rc);
+
+ /* keep requests around for the multiple phases of the call
+ * this shows the DISP_XX must guarantee we make it into the call
+ */
+ if (!it_disposition(it, DISP_ENQ_CREATE_REF) &&
+ it_disposition(it, DISP_OPEN_CREATE) &&
+ !it_open_error(DISP_OPEN_CREATE, it)) {
+ it_set_disposition(it, DISP_ENQ_CREATE_REF);
+ ptlrpc_request_addref(request); /* balanced in ll_create_node */
+ }
+ if (!it_disposition(it, DISP_ENQ_OPEN_REF) &&
+ it_disposition(it, DISP_OPEN_OPEN) &&
+ !it_open_error(DISP_OPEN_OPEN, it)) {
+ it_set_disposition(it, DISP_ENQ_OPEN_REF);
+ ptlrpc_request_addref(request); /* balanced in ll_file_open */
+ /* BUG 11546 - eviction in the middle of open rpc processing */
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_ENQUEUE_PAUSE, obd_timeout);
+ }
+
+ if (it->it_op & IT_CREAT) {
+ /* XXX this belongs in ll_create_it */
+ } else if (it->it_op == IT_OPEN) {
+ LASSERT(!it_disposition(it, DISP_OPEN_CREATE));
+ } else {
+ LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP | IT_LAYOUT));
+ }
+
+ /* If we already have a matching lock, then cancel the new
+ * one. We have to set the data here instead of in
+ * mdc_enqueue, because we need to use the child's inode as
+ * the l_ast_data to match, and that's not available until
+ * intent_finish has performed the iget().) */
+ lock = ldlm_handle2lock(lockh);
+ if (lock) {
+ ldlm_policy_data_t policy = lock->l_policy_data;
+ LDLM_DEBUG(lock, "matching against this");
+
+ LASSERTF(fid_res_name_eq(&mdt_body->fid1,
+ &lock->l_resource->lr_name),
+ "Lock res_id: %lu/%lu/%lu, fid: %lu/%lu/%lu.\n",
+ (unsigned long)lock->l_resource->lr_name.name[0],
+ (unsigned long)lock->l_resource->lr_name.name[1],
+ (unsigned long)lock->l_resource->lr_name.name[2],
+ (unsigned long)fid_seq(&mdt_body->fid1),
+ (unsigned long)fid_oid(&mdt_body->fid1),
+ (unsigned long)fid_ver(&mdt_body->fid1));
+ LDLM_LOCK_PUT(lock);
+
+ memcpy(&old_lock, lockh, sizeof(*lockh));
+ if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL,
+ LDLM_IBITS, &policy, LCK_NL, &old_lock, 0)) {
+ ldlm_lock_decref_and_cancel(lockh,
+ it->d.lustre.it_lock_mode);
+ memcpy(lockh, &old_lock, sizeof(old_lock));
+ it->d.lustre.it_lock_handle = lockh->cookie;
+ }
+ }
+ CDEBUG(D_DENTRY,"D_IT dentry %.*s intent: %s status %d disp %x rc %d\n",
+ op_data->op_namelen, op_data->op_name, ldlm_it2str(it->it_op),
+ it->d.lustre.it_status, it->d.lustre.it_disposition, rc);
+ RETURN(rc);
+}
+
+int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
+ struct lu_fid *fid, __u64 *bits)
+{
+ /* We could just return 1 immediately, but since we should only
+ * be called in revalidate_it if we already have a lock, let's
+ * verify that. */
+ struct ldlm_res_id res_id;
+ struct lustre_handle lockh;
+ ldlm_policy_data_t policy;
+ ldlm_mode_t mode;
+ ENTRY;
+
+ if (it->d.lustre.it_lock_handle) {
+ lockh.cookie = it->d.lustre.it_lock_handle;
+ mode = ldlm_revalidate_lock_handle(&lockh, bits);
+ } else {
+ fid_build_reg_res_name(fid, &res_id);
+ switch (it->it_op) {
+ case IT_GETATTR:
+ policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
+ break;
+ case IT_LAYOUT:
+ policy.l_inodebits.bits = MDS_INODELOCK_LAYOUT;
+ break;
+ default:
+ policy.l_inodebits.bits = MDS_INODELOCK_LOOKUP;
+ break;
+ }
+ mode = ldlm_lock_match(exp->exp_obd->obd_namespace,
+ LDLM_FL_BLOCK_GRANTED, &res_id,
+ LDLM_IBITS, &policy,
+ LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh, 0);
+ }
+
+ if (mode) {
+ it->d.lustre.it_lock_handle = lockh.cookie;
+ it->d.lustre.it_lock_mode = mode;
+ } else {
+ it->d.lustre.it_lock_handle = 0;
+ it->d.lustre.it_lock_mode = 0;
+ }
+
+ RETURN(!!mode);
+}
+
+/*
+ * This long block is all about fixing up the lock and request state
+ * so that it is correct as of the moment _before_ the operation was
+ * applied; that way, the VFS will think that everything is normal and
+ * call Lustre's regular VFS methods.
+ *
+ * If we're performing a creation, that means that unless the creation
+ * failed with EEXIST, we should fake up a negative dentry.
+ *
+ * For everything else, we want to lookup to succeed.
+ *
+ * One additional note: if CREATE or OPEN succeeded, we add an extra
+ * reference to the request because we need to keep it around until
+ * ll_create/ll_open gets called.
+ *
+ * The server will return to us, in it_disposition, an indication of
+ * exactly what d.lustre.it_status refers to.
+ *
+ * If DISP_OPEN_OPEN is set, then d.lustre.it_status refers to the open() call,
+ * otherwise if DISP_OPEN_CREATE is set, then it status is the
+ * creation failure mode. In either case, one of DISP_LOOKUP_NEG or
+ * DISP_LOOKUP_POS will be set, indicating whether the child lookup
+ * was successful.
+ *
+ * Else, if DISP_LOOKUP_EXECD then d.lustre.it_status is the rc of the
+ * child lookup.
+ */
+int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
+ void *lmm, int lmmsize, struct lookup_intent *it,
+ int lookup_flags, struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking,
+ __u64 extra_lock_flags)
+{
+ struct lustre_handle lockh;
+ int rc = 0;
+ ENTRY;
+ LASSERT(it);
+
+ CDEBUG(D_DLMTRACE, "(name: %.*s,"DFID") in obj "DFID
+ ", intent: %s flags %#o\n", op_data->op_namelen,
+ op_data->op_name, PFID(&op_data->op_fid2),
+ PFID(&op_data->op_fid1), ldlm_it2str(it->it_op),
+ it->it_flags);
+
+ lockh.cookie = 0;
+ if (fid_is_sane(&op_data->op_fid2) &&
+ (it->it_op & (IT_LOOKUP | IT_GETATTR))) {
+ /* We could just return 1 immediately, but since we should only
+ * be called in revalidate_it if we already have a lock, let's
+ * verify that. */
+ it->d.lustre.it_lock_handle = 0;
+ rc = mdc_revalidate_lock(exp, it, &op_data->op_fid2, NULL);
+ /* Only return failure if it was not GETATTR by cfid
+ (from inode_revalidate) */
+ if (rc || op_data->op_namelen != 0)
+ RETURN(rc);
+ }
+
+ /* lookup_it may be called only after revalidate_it has run, because
+ * revalidate_it cannot return errors, only zero. Returning zero causes
+ * this call to lookup, which *can* return an error.
+ *
+ * We only want to execute the request associated with the intent one
+ * time, however, so don't send the request again. Instead, skip past
+ * this and use the request from revalidate. In this case, revalidate
+ * never dropped its reference, so the refcounts are all OK */
+ if (!it_disposition(it, DISP_ENQ_COMPLETE)) {
+ struct ldlm_enqueue_info einfo =
+ { LDLM_IBITS, it_to_lock_mode(it), cb_blocking,
+ ldlm_completion_ast, NULL, NULL, NULL };
+
+ /* For case if upper layer did not alloc fid, do it now. */
+ if (!fid_is_sane(&op_data->op_fid2) && it->it_op & IT_CREAT) {
+ rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data);
+ if (rc < 0) {
+ CERROR("Can't alloc new fid, rc %d\n", rc);
+ RETURN(rc);
+ }
+ }
+ rc = mdc_enqueue(exp, &einfo, it, op_data, &lockh,
+ lmm, lmmsize, NULL, extra_lock_flags);
+ if (rc < 0)
+ RETURN(rc);
+ } else if (!fid_is_sane(&op_data->op_fid2) ||
+ !(it->it_create_mode & M_CHECK_STALE)) {
+ /* DISP_ENQ_COMPLETE set means there is extra reference on
+ * request referenced from this intent, saved for subsequent
+ * lookup. This path is executed when we proceed to this
+ * lookup, so we clear DISP_ENQ_COMPLETE */
+ it_clear_disposition(it, DISP_ENQ_COMPLETE);
+ }
+ *reqp = it->d.lustre.it_data;
+ rc = mdc_finish_intent_lock(exp, *reqp, op_data, it, &lockh);
+ RETURN(rc);
+}
+
+static int mdc_intent_getattr_async_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req,
+ void *args, int rc)
+{
+ struct mdc_getattr_args *ga = args;
+ struct obd_export *exp = ga->ga_exp;
+ struct md_enqueue_info *minfo = ga->ga_minfo;
+ struct ldlm_enqueue_info *einfo = ga->ga_einfo;
+ struct lookup_intent *it;
+ struct lustre_handle *lockh;
+ struct obd_device *obddev;
+ __u64 flags = LDLM_FL_HAS_INTENT;
+ ENTRY;
+
+ it = &minfo->mi_it;
+ lockh = &minfo->mi_lockh;
+
+ obddev = class_exp2obd(exp);
+
+ mdc_exit_request(&obddev->u.cli);
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDC_GETATTR_ENQUEUE))
+ rc = -ETIMEDOUT;
+
+ rc = ldlm_cli_enqueue_fini(exp, req, einfo->ei_type, 1, einfo->ei_mode,
+ &flags, NULL, 0, lockh, rc);
+ if (rc < 0) {
+ CERROR("ldlm_cli_enqueue_fini: %d\n", rc);
+ mdc_clear_replay_flag(req, rc);
+ GOTO(out, rc);
+ }
+
+ rc = mdc_finish_enqueue(exp, req, einfo, it, lockh, rc);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = mdc_finish_intent_lock(exp, req, &minfo->mi_data, it, lockh);
+ EXIT;
+
+out:
+ OBD_FREE_PTR(einfo);
+ minfo->mi_cb(req, minfo, rc);
+ return 0;
+}
+
+int mdc_intent_getattr_async(struct obd_export *exp,
+ struct md_enqueue_info *minfo,
+ struct ldlm_enqueue_info *einfo)
+{
+ struct md_op_data *op_data = &minfo->mi_data;
+ struct lookup_intent *it = &minfo->mi_it;
+ struct ptlrpc_request *req;
+ struct mdc_getattr_args *ga;
+ struct obd_device *obddev = class_exp2obd(exp);
+ struct ldlm_res_id res_id;
+ /*XXX: Both MDS_INODELOCK_LOOKUP and MDS_INODELOCK_UPDATE are needed
+ * for statahead currently. Consider CMD in future, such two bits
+ * maybe managed by different MDS, should be adjusted then. */
+ ldlm_policy_data_t policy = {
+ .l_inodebits = { MDS_INODELOCK_LOOKUP |
+ MDS_INODELOCK_UPDATE }
+ };
+ int rc = 0;
+ __u64 flags = LDLM_FL_HAS_INTENT;
+ ENTRY;
+
+ CDEBUG(D_DLMTRACE,"name: %.*s in inode "DFID", intent: %s flags %#o\n",
+ op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1),
+ ldlm_it2str(it->it_op), it->it_flags);
+
+ fid_build_reg_res_name(&op_data->op_fid1, &res_id);
+ req = mdc_intent_getattr_pack(exp, it, op_data);
+ if (!req)
+ RETURN(-ENOMEM);
+
+ rc = mdc_enter_request(&obddev->u.cli);
+ if (rc != 0) {
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+ }
+
+ rc = ldlm_cli_enqueue(exp, &req, einfo, &res_id, &policy, &flags, NULL,
+ 0, LVB_T_NONE, &minfo->mi_lockh, 1);
+ if (rc < 0) {
+ mdc_exit_request(&obddev->u.cli);
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+ }
+
+ CLASSERT(sizeof(*ga) <= sizeof(req->rq_async_args));
+ ga = ptlrpc_req_async_args(req);
+ ga->ga_exp = exp;
+ ga->ga_minfo = minfo;
+ ga->ga_einfo = einfo;
+
+ req->rq_interpret_reply = mdc_intent_getattr_async_interpret;
+ ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
+
+ RETURN(0);
+}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_reint.c b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
new file mode 100644
index 000000000000..5e25a07c52bd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
@@ -0,0 +1,489 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_MDC
+
+# include <linux/module.h>
+# include <linux/kernel.h>
+
+#include <obd_class.h>
+#include "mdc_internal.h"
+#include <lustre_fid.h>
+
+/* mdc_setattr does its own semaphore handling */
+static int mdc_reint(struct ptlrpc_request *request,
+ struct mdc_rpc_lock *rpc_lock,
+ int level)
+{
+ int rc;
+
+ request->rq_send_state = level;
+
+ mdc_get_rpc_lock(rpc_lock, NULL);
+ rc = ptlrpc_queue_wait(request);
+ mdc_put_rpc_lock(rpc_lock, NULL);
+ if (rc)
+ CDEBUG(D_INFO, "error in handling %d\n", rc);
+ else if (!req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY)) {
+ rc = -EPROTO;
+ }
+ return rc;
+}
+
+/* Find and cancel locally locks matched by inode @bits & @mode in the resource
+ * found by @fid. Found locks are added into @cancel list. Returns the amount of
+ * locks added to @cancels list. */
+int mdc_resource_get_unused(struct obd_export *exp, struct lu_fid *fid,
+ struct list_head *cancels, ldlm_mode_t mode,
+ __u64 bits)
+{
+ struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
+ ldlm_policy_data_t policy = {{0}};
+ struct ldlm_res_id res_id;
+ struct ldlm_resource *res;
+ int count;
+ ENTRY;
+
+ /* Return, i.e. cancel nothing, only if ELC is supported (flag in
+ * export) but disabled through procfs (flag in NS).
+ *
+ * This distinguishes from a case when ELC is not supported originally,
+ * when we still want to cancel locks in advance and just cancel them
+ * locally, without sending any RPC. */
+ if (exp_connect_cancelset(exp) && !ns_connect_cancelset(ns))
+ RETURN(0);
+
+ fid_build_reg_res_name(fid, &res_id);
+ res = ldlm_resource_get(exp->exp_obd->obd_namespace,
+ NULL, &res_id, 0, 0);
+ if (res == NULL)
+ RETURN(0);
+ LDLM_RESOURCE_ADDREF(res);
+ /* Initialize ibits lock policy. */
+ policy.l_inodebits.bits = bits;
+ count = ldlm_cancel_resource_local(res, cancels, &policy,
+ mode, 0, 0, NULL);
+ LDLM_RESOURCE_DELREF(res);
+ ldlm_resource_putref(res);
+ RETURN(count);
+}
+
+int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
+ void *ea, int ealen, void *ea2, int ea2len,
+ struct ptlrpc_request **request, struct md_open_data **mod)
+{
+ LIST_HEAD(cancels);
+ struct ptlrpc_request *req;
+ struct mdc_rpc_lock *rpc_lock;
+ struct obd_device *obd = exp->exp_obd;
+ int count = 0, rc;
+ __u64 bits;
+ ENTRY;
+
+ LASSERT(op_data != NULL);
+
+ bits = MDS_INODELOCK_UPDATE;
+ if (op_data->op_attr.ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID))
+ bits |= MDS_INODELOCK_LOOKUP;
+ if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
+ (fid_is_sane(&op_data->op_fid1)) &&
+ !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+ count = mdc_resource_get_unused(exp, &op_data->op_fid1,
+ &cancels, LCK_EX, bits);
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_MDS_REINT_SETATTR);
+ if (req == NULL) {
+ ldlm_lock_list_put(&cancels, l_bl_ast, count);
+ RETURN(-ENOMEM);
+ }
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+ if ((op_data->op_flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) == 0)
+ req_capsule_set_size(&req->rq_pill, &RMF_MDT_EPOCH, RCL_CLIENT,
+ 0);
+ req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, ealen);
+ req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_CLIENT,
+ ea2len);
+
+ rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ rpc_lock = obd->u.cli.cl_rpc_lock;
+
+ if (op_data->op_attr.ia_valid & (ATTR_MTIME | ATTR_CTIME))
+ CDEBUG(D_INODE, "setting mtime "CFS_TIME_T
+ ", ctime "CFS_TIME_T"\n",
+ LTIME_S(op_data->op_attr.ia_mtime),
+ LTIME_S(op_data->op_attr.ia_ctime));
+ mdc_setattr_pack(req, op_data, ea, ealen, ea2, ea2len);
+
+ ptlrpc_request_set_replen(req);
+ if (mod && (op_data->op_flags & MF_EPOCH_OPEN) &&
+ req->rq_import->imp_replayable)
+ {
+ LASSERT(*mod == NULL);
+
+ *mod = obd_mod_alloc();
+ if (*mod == NULL) {
+ DEBUG_REQ(D_ERROR, req, "Can't allocate "
+ "md_open_data");
+ } else {
+ req->rq_replay = 1;
+ req->rq_cb_data = *mod;
+ (*mod)->mod_open_req = req;
+ req->rq_commit_cb = mdc_commit_open;
+ /**
+ * Take an extra reference on \var mod, it protects \var
+ * mod from being freed on eviction (commit callback is
+ * called despite rq_replay flag).
+ * Will be put on mdc_done_writing().
+ */
+ obd_mod_get(*mod);
+ }
+ }
+
+ rc = mdc_reint(req, rpc_lock, LUSTRE_IMP_FULL);
+
+ /* Save the obtained info in the original RPC for the replay case. */
+ if (rc == 0 && (op_data->op_flags & MF_EPOCH_OPEN)) {
+ struct mdt_ioepoch *epoch;
+ struct mdt_body *body;
+
+ epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ LASSERT(epoch != NULL);
+ LASSERT(body != NULL);
+ epoch->handle = body->handle;
+ epoch->ioepoch = body->ioepoch;
+ req->rq_replay_cb = mdc_replay_open;
+ /** bug 3633, open may be committed and estale answer is not error */
+ } else if (rc == -ESTALE && (op_data->op_flags & MF_SOM_CHANGE)) {
+ rc = 0;
+ } else if (rc == -ERESTARTSYS) {
+ rc = 0;
+ }
+ *request = req;
+ if (rc && req->rq_commit_cb) {
+ /* Put an extra reference on \var mod on error case. */
+ obd_mod_put(*mod);
+ req->rq_commit_cb(req);
+ }
+ RETURN(rc);
+}
+
+int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
+ const void *data, int datalen, int mode, __u32 uid, __u32 gid,
+ cfs_cap_t cap_effective, __u64 rdev,
+ struct ptlrpc_request **request)
+{
+ struct ptlrpc_request *req;
+ int level, rc;
+ int count, resends = 0;
+ struct obd_import *import = exp->exp_obd->u.cli.cl_import;
+ int generation = import->imp_generation;
+ LIST_HEAD(cancels);
+ ENTRY;
+
+ /* For case if upper layer did not alloc fid, do it now. */
+ if (!fid_is_sane(&op_data->op_fid2)) {
+ /*
+ * mdc_fid_alloc() may return errno 1 in case of switch to new
+ * sequence, handle this.
+ */
+ rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data);
+ if (rc < 0) {
+ CERROR("Can't alloc new fid, rc %d\n", rc);
+ RETURN(rc);
+ }
+ }
+
+rebuild:
+ count = 0;
+ if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
+ (fid_is_sane(&op_data->op_fid1)))
+ count = mdc_resource_get_unused(exp, &op_data->op_fid1,
+ &cancels, LCK_EX,
+ MDS_INODELOCK_UPDATE);
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_MDS_REINT_CREATE_RMT_ACL);
+ if (req == NULL) {
+ ldlm_lock_list_put(&cancels, l_bl_ast, count);
+ RETURN(-ENOMEM);
+ }
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+ req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
+ op_data->op_namelen + 1);
+ req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT,
+ data && datalen ? datalen : 0);
+
+ rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ /*
+ * mdc_create_pack() fills msg->bufs[1] with name and msg->bufs[2] with
+ * tgt, for symlinks or lov MD data.
+ */
+ mdc_create_pack(req, op_data, data, datalen, mode, uid,
+ gid, cap_effective, rdev);
+
+ ptlrpc_request_set_replen(req);
+
+ /* ask ptlrpc not to resend on EINPROGRESS since we have our own retry
+ * logic here */
+ req->rq_no_retry_einprogress = 1;
+
+ if (resends) {
+ req->rq_generation_set = 1;
+ req->rq_import_generation = generation;
+ req->rq_sent = cfs_time_current_sec() + resends;
+ }
+ level = LUSTRE_IMP_FULL;
+ resend:
+ rc = mdc_reint(req, exp->exp_obd->u.cli.cl_rpc_lock, level);
+
+ /* Resend if we were told to. */
+ if (rc == -ERESTARTSYS) {
+ level = LUSTRE_IMP_RECOVER;
+ goto resend;
+ } else if (rc == -EINPROGRESS) {
+ /* Retry create infinitely until succeed or get other
+ * error code. */
+ ptlrpc_req_finished(req);
+ resends++;
+
+ CDEBUG(D_HA, "%s: resend:%d create on "DFID"/"DFID"\n",
+ exp->exp_obd->obd_name, resends,
+ PFID(&op_data->op_fid1), PFID(&op_data->op_fid2));
+
+ if (generation == import->imp_generation) {
+ goto rebuild;
+ } else {
+ CDEBUG(D_HA, "resend cross eviction\n");
+ RETURN(-EIO);
+ }
+ } else if (rc == 0) {
+ struct mdt_body *body;
+ struct lustre_capa *capa;
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ LASSERT(body);
+ if (body->valid & OBD_MD_FLMDSCAPA) {
+ capa = req_capsule_server_get(&req->rq_pill,
+ &RMF_CAPA1);
+ if (capa == NULL)
+ rc = -EPROTO;
+ }
+ }
+
+ *request = req;
+ RETURN(rc);
+}
+
+int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
+ struct ptlrpc_request **request)
+{
+ LIST_HEAD(cancels);
+ struct obd_device *obd = class_exp2obd(exp);
+ struct ptlrpc_request *req = *request;
+ int count = 0, rc;
+ ENTRY;
+
+ LASSERT(req == NULL);
+
+ if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
+ (fid_is_sane(&op_data->op_fid1)) &&
+ !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+ count = mdc_resource_get_unused(exp, &op_data->op_fid1,
+ &cancels, LCK_EX,
+ MDS_INODELOCK_UPDATE);
+ if ((op_data->op_flags & MF_MDC_CANCEL_FID3) &&
+ (fid_is_sane(&op_data->op_fid3)) &&
+ !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+ count += mdc_resource_get_unused(exp, &op_data->op_fid3,
+ &cancels, LCK_EX,
+ MDS_INODELOCK_FULL);
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_MDS_REINT_UNLINK);
+ if (req == NULL) {
+ ldlm_lock_list_put(&cancels, l_bl_ast, count);
+ RETURN(-ENOMEM);
+ }
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+ req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
+ op_data->op_namelen + 1);
+
+ rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ mdc_unlink_pack(req, op_data);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
+ obd->u.cli.cl_max_mds_easize);
+ req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER,
+ obd->u.cli.cl_max_mds_cookiesize);
+ ptlrpc_request_set_replen(req);
+
+ *request = req;
+
+ rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL);
+ if (rc == -ERESTARTSYS)
+ rc = 0;
+ RETURN(rc);
+}
+
+int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
+ struct ptlrpc_request **request)
+{
+ LIST_HEAD(cancels);
+ struct obd_device *obd = exp->exp_obd;
+ struct ptlrpc_request *req;
+ int count = 0, rc;
+ ENTRY;
+
+ if ((op_data->op_flags & MF_MDC_CANCEL_FID2) &&
+ (fid_is_sane(&op_data->op_fid2)))
+ count = mdc_resource_get_unused(exp, &op_data->op_fid2,
+ &cancels, LCK_EX,
+ MDS_INODELOCK_UPDATE);
+ if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
+ (fid_is_sane(&op_data->op_fid1)))
+ count += mdc_resource_get_unused(exp, &op_data->op_fid1,
+ &cancels, LCK_EX,
+ MDS_INODELOCK_UPDATE);
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_REINT_LINK);
+ if (req == NULL) {
+ ldlm_lock_list_put(&cancels, l_bl_ast, count);
+ RETURN(-ENOMEM);
+ }
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+ mdc_set_capa_size(req, &RMF_CAPA2, op_data->op_capa2);
+ req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
+ op_data->op_namelen + 1);
+
+ rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ mdc_link_pack(req, op_data);
+ ptlrpc_request_set_replen(req);
+
+ rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL);
+ *request = req;
+ if (rc == -ERESTARTSYS)
+ rc = 0;
+
+ RETURN(rc);
+}
+
+int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
+ const char *old, int oldlen, const char *new, int newlen,
+ struct ptlrpc_request **request)
+{
+ LIST_HEAD(cancels);
+ struct obd_device *obd = exp->exp_obd;
+ struct ptlrpc_request *req;
+ int count = 0, rc;
+ ENTRY;
+
+ if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
+ (fid_is_sane(&op_data->op_fid1)))
+ count = mdc_resource_get_unused(exp, &op_data->op_fid1,
+ &cancels, LCK_EX,
+ MDS_INODELOCK_UPDATE);
+ if ((op_data->op_flags & MF_MDC_CANCEL_FID2) &&
+ (fid_is_sane(&op_data->op_fid2)))
+ count += mdc_resource_get_unused(exp, &op_data->op_fid2,
+ &cancels, LCK_EX,
+ MDS_INODELOCK_UPDATE);
+ if ((op_data->op_flags & MF_MDC_CANCEL_FID3) &&
+ (fid_is_sane(&op_data->op_fid3)))
+ count += mdc_resource_get_unused(exp, &op_data->op_fid3,
+ &cancels, LCK_EX,
+ MDS_INODELOCK_LOOKUP);
+ if ((op_data->op_flags & MF_MDC_CANCEL_FID4) &&
+ (fid_is_sane(&op_data->op_fid4)))
+ count += mdc_resource_get_unused(exp, &op_data->op_fid4,
+ &cancels, LCK_EX,
+ MDS_INODELOCK_FULL);
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_MDS_REINT_RENAME);
+ if (req == NULL) {
+ ldlm_lock_list_put(&cancels, l_bl_ast, count);
+ RETURN(-ENOMEM);
+ }
+
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+ mdc_set_capa_size(req, &RMF_CAPA2, op_data->op_capa2);
+ req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, oldlen + 1);
+ req_capsule_set_size(&req->rq_pill, &RMF_SYMTGT, RCL_CLIENT, newlen+1);
+
+ rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ if (exp_connect_cancelset(exp) && req)
+ ldlm_cli_cancel_list(&cancels, count, req, 0);
+
+ mdc_rename_pack(req, op_data, old, oldlen, new, newlen);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
+ obd->u.cli.cl_max_mds_easize);
+ req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER,
+ obd->u.cli.cl_max_mds_cookiesize);
+ ptlrpc_request_set_replen(req);
+
+ rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL);
+ *request = req;
+ if (rc == -ERESTARTSYS)
+ rc = 0;
+
+ RETURN(rc);
+}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
new file mode 100644
index 000000000000..3cf9d8d3f2ec
--- /dev/null
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -0,0 +1,2753 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_MDC
+
+# include <linux/module.h>
+# include <linux/pagemap.h>
+# include <linux/miscdevice.h>
+# include <linux/init.h>
+# include <linux/utsname.h>
+
+#include <lustre_acl.h>
+#include <obd_class.h>
+#include <lustre_fid.h>
+#include <lprocfs_status.h>
+#include <lustre_param.h>
+#include <lustre_log.h>
+
+#include "mdc_internal.h"
+
+#define REQUEST_MINOR 244
+
+struct mdc_renew_capa_args {
+ struct obd_capa *ra_oc;
+ renew_capa_cb_t ra_cb;
+};
+
+static int mdc_cleanup(struct obd_device *obd);
+
+int mdc_unpack_capa(struct obd_export *exp, struct ptlrpc_request *req,
+ const struct req_msg_field *field, struct obd_capa **oc)
+{
+ struct lustre_capa *capa;
+ struct obd_capa *c;
+ ENTRY;
+
+ /* swabbed already in mdc_enqueue */
+ capa = req_capsule_server_get(&req->rq_pill, field);
+ if (capa == NULL)
+ RETURN(-EPROTO);
+
+ c = alloc_capa(CAPA_SITE_CLIENT);
+ if (IS_ERR(c)) {
+ CDEBUG(D_INFO, "alloc capa failed!\n");
+ RETURN(PTR_ERR(c));
+ } else {
+ c->c_capa = *capa;
+ *oc = c;
+ RETURN(0);
+ }
+}
+
+static inline int mdc_queue_wait(struct ptlrpc_request *req)
+{
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+ int rc;
+
+ /* mdc_enter_request() ensures that this client has no more
+ * than cl_max_rpcs_in_flight RPCs simultaneously inf light
+ * against an MDT. */
+ rc = mdc_enter_request(cli);
+ if (rc != 0)
+ return rc;
+
+ rc = ptlrpc_queue_wait(req);
+ mdc_exit_request(cli);
+
+ return rc;
+}
+
+/* Helper that implements most of mdc_getstatus and signal_completed_replay. */
+/* XXX this should become mdc_get_info("key"), sending MDS_GET_INFO RPC */
+static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid,
+ struct obd_capa **pc, int level, int msg_flags)
+{
+ struct ptlrpc_request *req;
+ struct mdt_body *body;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_GETSTATUS,
+ LUSTRE_MDS_VERSION, MDS_GETSTATUS);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_pack_body(req, NULL, NULL, 0, 0, -1, 0);
+ lustre_msg_add_flags(req->rq_reqmsg, msg_flags);
+ req->rq_send_state = level;
+
+ ptlrpc_request_set_replen(req);
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ if (body == NULL)
+ GOTO(out, rc = -EPROTO);
+
+ if (body->valid & OBD_MD_FLMDSCAPA) {
+ rc = mdc_unpack_capa(NULL, req, &RMF_CAPA1, pc);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ *rootfid = body->fid1;
+ CDEBUG(D_NET,
+ "root fid="DFID", last_committed="LPU64"\n",
+ PFID(rootfid),
+ lustre_msg_get_last_committed(req->rq_repmsg));
+ EXIT;
+out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+/* This should be mdc_get_info("rootfid") */
+int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid,
+ struct obd_capa **pc)
+{
+ return send_getstatus(class_exp2cliimp(exp), rootfid, pc,
+ LUSTRE_IMP_FULL, 0);
+}
+
+/*
+ * This function now is known to always saying that it will receive 4 buffers
+ * from server. Even for cases when acl_size and md_size is zero, RPC header
+ * will contain 4 fields and RPC itself will contain zero size fields. This is
+ * because mdt_getattr*() _always_ returns 4 fields, but if acl is not needed
+ * and thus zero, it shrinks it, making zero size. The same story about
+ * md_size. And this is course of problem when client waits for smaller number
+ * of fields. This issue will be fixed later when client gets aware of RPC
+ * layouts. --umka
+ */
+static int mdc_getattr_common(struct obd_export *exp,
+ struct ptlrpc_request *req)
+{
+ struct req_capsule *pill = &req->rq_pill;
+ struct mdt_body *body;
+ void *eadata;
+ int rc;
+ ENTRY;
+
+ /* Request message already built. */
+ rc = ptlrpc_queue_wait(req);
+ if (rc != 0)
+ RETURN(rc);
+
+ /* sanity check for the reply */
+ body = req_capsule_server_get(pill, &RMF_MDT_BODY);
+ if (body == NULL)
+ RETURN(-EPROTO);
+
+ CDEBUG(D_NET, "mode: %o\n", body->mode);
+
+ if (body->eadatasize != 0) {
+ mdc_update_max_ea_from_body(exp, body);
+
+ eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
+ body->eadatasize);
+ if (eadata == NULL)
+ RETURN(-EPROTO);
+ }
+
+ if (body->valid & OBD_MD_FLRMTPERM) {
+ struct mdt_remote_perm *perm;
+
+ LASSERT(client_is_remote(exp));
+ perm = req_capsule_server_swab_get(pill, &RMF_ACL,
+ lustre_swab_mdt_remote_perm);
+ if (perm == NULL)
+ RETURN(-EPROTO);
+ }
+
+ if (body->valid & OBD_MD_FLMDSCAPA) {
+ struct lustre_capa *capa;
+ capa = req_capsule_server_get(pill, &RMF_CAPA1);
+ if (capa == NULL)
+ RETURN(-EPROTO);
+ }
+
+ RETURN(0);
+}
+
+int mdc_getattr(struct obd_export *exp, struct md_op_data *op_data,
+ struct ptlrpc_request **request)
+{
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ /* Single MDS without an LMV case */
+ if (op_data->op_flags & MF_GET_MDT_IDX) {
+ op_data->op_mds = 0;
+ RETURN(0);
+ }
+ *request = NULL;
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_GETATTR);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GETATTR);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ mdc_pack_body(req, &op_data->op_fid1, op_data->op_capa1,
+ op_data->op_valid, op_data->op_mode, -1, 0);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
+ op_data->op_mode);
+ if (op_data->op_valid & OBD_MD_FLRMTPERM) {
+ LASSERT(client_is_remote(exp));
+ req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
+ sizeof(struct mdt_remote_perm));
+ }
+ ptlrpc_request_set_replen(req);
+
+ rc = mdc_getattr_common(exp, req);
+ if (rc)
+ ptlrpc_req_finished(req);
+ else
+ *request = req;
+ RETURN(rc);
+}
+
+int mdc_getattr_name(struct obd_export *exp, struct md_op_data *op_data,
+ struct ptlrpc_request **request)
+{
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ *request = NULL;
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_MDS_GETATTR_NAME);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+ req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
+ op_data->op_namelen + 1);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GETATTR_NAME);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ mdc_pack_body(req, &op_data->op_fid1, op_data->op_capa1,
+ op_data->op_valid, op_data->op_mode,
+ op_data->op_suppgids[0], 0);
+
+ if (op_data->op_name) {
+ char *name = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
+ LASSERT(strnlen(op_data->op_name, op_data->op_namelen) ==
+ op_data->op_namelen);
+ memcpy(name, op_data->op_name, op_data->op_namelen);
+ }
+
+ req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
+ op_data->op_mode);
+ ptlrpc_request_set_replen(req);
+
+ rc = mdc_getattr_common(exp, req);
+ if (rc)
+ ptlrpc_req_finished(req);
+ else
+ *request = req;
+ RETURN(rc);
+}
+
+static int mdc_is_subdir(struct obd_export *exp,
+ const struct lu_fid *pfid,
+ const struct lu_fid *cfid,
+ struct ptlrpc_request **request)
+{
+ struct ptlrpc_request *req;
+ int rc;
+
+ ENTRY;
+
+ *request = NULL;
+ req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
+ &RQF_MDS_IS_SUBDIR, LUSTRE_MDS_VERSION,
+ MDS_IS_SUBDIR);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_is_subdir_pack(req, pfid, cfid, 0);
+ ptlrpc_request_set_replen(req);
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc && rc != -EREMOTE)
+ ptlrpc_req_finished(req);
+ else
+ *request = req;
+ RETURN(rc);
+}
+
+static int mdc_xattr_common(struct obd_export *exp,const struct req_format *fmt,
+ const struct lu_fid *fid,
+ struct obd_capa *oc, int opcode, obd_valid valid,
+ const char *xattr_name, const char *input,
+ int input_size, int output_size, int flags,
+ __u32 suppgid, struct ptlrpc_request **request)
+{
+ struct ptlrpc_request *req;
+ int xattr_namelen = 0;
+ char *tmp;
+ int rc;
+ ENTRY;
+
+ *request = NULL;
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), fmt);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_set_capa_size(req, &RMF_CAPA1, oc);
+ if (xattr_name) {
+ xattr_namelen = strlen(xattr_name) + 1;
+ req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
+ xattr_namelen);
+ }
+ if (input_size) {
+ LASSERT(input);
+ req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT,
+ input_size);
+ }
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, opcode);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ if (opcode == MDS_REINT) {
+ struct mdt_rec_setxattr *rec;
+
+ CLASSERT(sizeof(struct mdt_rec_setxattr) ==
+ sizeof(struct mdt_rec_reint));
+ rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
+ rec->sx_opcode = REINT_SETXATTR;
+ /* TODO:
+ * cfs_curproc_fs{u,g}id() should replace
+ * current->fs{u,g}id for portability.
+ */
+ rec->sx_fsuid = current_fsuid();
+ rec->sx_fsgid = current_fsgid();
+ rec->sx_cap = cfs_curproc_cap_pack();
+ rec->sx_suppgid1 = suppgid;
+ rec->sx_suppgid2 = -1;
+ rec->sx_fid = *fid;
+ rec->sx_valid = valid | OBD_MD_FLCTIME;
+ rec->sx_time = cfs_time_current_sec();
+ rec->sx_size = output_size;
+ rec->sx_flags = flags;
+
+ mdc_pack_capa(req, &RMF_CAPA1, oc);
+ } else {
+ mdc_pack_body(req, fid, oc, valid, output_size, suppgid, flags);
+ }
+
+ if (xattr_name) {
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
+ memcpy(tmp, xattr_name, xattr_namelen);
+ }
+ if (input_size) {
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_EADATA);
+ memcpy(tmp, input, input_size);
+ }
+
+ if (req_capsule_has_field(&req->rq_pill, &RMF_EADATA, RCL_SERVER))
+ req_capsule_set_size(&req->rq_pill, &RMF_EADATA,
+ RCL_SERVER, output_size);
+ ptlrpc_request_set_replen(req);
+
+ /* make rpc */
+ if (opcode == MDS_REINT)
+ mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
+
+ rc = ptlrpc_queue_wait(req);
+
+ if (opcode == MDS_REINT)
+ mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
+
+ if (rc)
+ ptlrpc_req_finished(req);
+ else
+ *request = req;
+ RETURN(rc);
+}
+
+int mdc_setxattr(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, obd_valid valid, const char *xattr_name,
+ const char *input, int input_size, int output_size,
+ int flags, __u32 suppgid, struct ptlrpc_request **request)
+{
+ return mdc_xattr_common(exp, &RQF_MDS_REINT_SETXATTR,
+ fid, oc, MDS_REINT, valid, xattr_name,
+ input, input_size, output_size, flags,
+ suppgid, request);
+}
+
+int mdc_getxattr(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, obd_valid valid, const char *xattr_name,
+ const char *input, int input_size, int output_size,
+ int flags, struct ptlrpc_request **request)
+{
+ return mdc_xattr_common(exp, &RQF_MDS_GETXATTR,
+ fid, oc, MDS_GETXATTR, valid, xattr_name,
+ input, input_size, output_size, flags,
+ -1, request);
+}
+
+#ifdef CONFIG_FS_POSIX_ACL
+static int mdc_unpack_acl(struct ptlrpc_request *req, struct lustre_md *md)
+{
+ struct req_capsule *pill = &req->rq_pill;
+ struct mdt_body *body = md->body;
+ struct posix_acl *acl;
+ void *buf;
+ int rc;
+ ENTRY;
+
+ if (!body->aclsize)
+ RETURN(0);
+
+ buf = req_capsule_server_sized_get(pill, &RMF_ACL, body->aclsize);
+
+ if (!buf)
+ RETURN(-EPROTO);
+
+ acl = posix_acl_from_xattr(&init_user_ns, buf, body->aclsize);
+ if (IS_ERR(acl)) {
+ rc = PTR_ERR(acl);
+ CERROR("convert xattr to acl: %d\n", rc);
+ RETURN(rc);
+ }
+
+ rc = posix_acl_valid(acl);
+ if (rc) {
+ CERROR("validate acl: %d\n", rc);
+ posix_acl_release(acl);
+ RETURN(rc);
+ }
+
+ md->posix_acl = acl;
+ RETURN(0);
+}
+#else
+#define mdc_unpack_acl(req, md) 0
+#endif
+
+int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req,
+ struct obd_export *dt_exp, struct obd_export *md_exp,
+ struct lustre_md *md)
+{
+ struct req_capsule *pill = &req->rq_pill;
+ int rc;
+ ENTRY;
+
+ LASSERT(md);
+ memset(md, 0, sizeof(*md));
+
+ md->body = req_capsule_server_get(pill, &RMF_MDT_BODY);
+ LASSERT(md->body != NULL);
+
+ if (md->body->valid & OBD_MD_FLEASIZE) {
+ int lmmsize;
+ struct lov_mds_md *lmm;
+
+ if (!S_ISREG(md->body->mode)) {
+ CDEBUG(D_INFO, "OBD_MD_FLEASIZE set, should be a "
+ "regular file, but is not\n");
+ GOTO(out, rc = -EPROTO);
+ }
+
+ if (md->body->eadatasize == 0) {
+ CDEBUG(D_INFO, "OBD_MD_FLEASIZE set, "
+ "but eadatasize 0\n");
+ GOTO(out, rc = -EPROTO);
+ }
+ lmmsize = md->body->eadatasize;
+ lmm = req_capsule_server_sized_get(pill, &RMF_MDT_MD, lmmsize);
+ if (!lmm)
+ GOTO(out, rc = -EPROTO);
+
+ rc = obd_unpackmd(dt_exp, &md->lsm, lmm, lmmsize);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ if (rc < sizeof(*md->lsm)) {
+ CDEBUG(D_INFO, "lsm size too small: "
+ "rc < sizeof (*md->lsm) (%d < %d)\n",
+ rc, (int)sizeof(*md->lsm));
+ GOTO(out, rc = -EPROTO);
+ }
+
+ } else if (md->body->valid & OBD_MD_FLDIREA) {
+ int lmvsize;
+ struct lov_mds_md *lmv;
+
+ if(!S_ISDIR(md->body->mode)) {
+ CDEBUG(D_INFO, "OBD_MD_FLDIREA set, should be a "
+ "directory, but is not\n");
+ GOTO(out, rc = -EPROTO);
+ }
+
+ if (md->body->eadatasize == 0) {
+ CDEBUG(D_INFO, "OBD_MD_FLDIREA is set, "
+ "but eadatasize 0\n");
+ RETURN(-EPROTO);
+ }
+ if (md->body->valid & OBD_MD_MEA) {
+ lmvsize = md->body->eadatasize;
+ lmv = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
+ lmvsize);
+ if (!lmv)
+ GOTO(out, rc = -EPROTO);
+
+ rc = obd_unpackmd(md_exp, (void *)&md->mea, lmv,
+ lmvsize);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ if (rc < sizeof(*md->mea)) {
+ CDEBUG(D_INFO, "size too small: "
+ "rc < sizeof(*md->mea) (%d < %d)\n",
+ rc, (int)sizeof(*md->mea));
+ GOTO(out, rc = -EPROTO);
+ }
+ }
+ }
+ rc = 0;
+
+ if (md->body->valid & OBD_MD_FLRMTPERM) {
+ /* remote permission */
+ LASSERT(client_is_remote(exp));
+ md->remote_perm = req_capsule_server_swab_get(pill, &RMF_ACL,
+ lustre_swab_mdt_remote_perm);
+ if (!md->remote_perm)
+ GOTO(out, rc = -EPROTO);
+ }
+ else if (md->body->valid & OBD_MD_FLACL) {
+ /* for ACL, it's possible that FLACL is set but aclsize is zero.
+ * only when aclsize != 0 there's an actual segment for ACL
+ * in reply buffer.
+ */
+ if (md->body->aclsize) {
+ rc = mdc_unpack_acl(req, md);
+ if (rc)
+ GOTO(out, rc);
+#ifdef CONFIG_FS_POSIX_ACL
+ } else {
+ md->posix_acl = NULL;
+#endif
+ }
+ }
+ if (md->body->valid & OBD_MD_FLMDSCAPA) {
+ struct obd_capa *oc = NULL;
+
+ rc = mdc_unpack_capa(NULL, req, &RMF_CAPA1, &oc);
+ if (rc)
+ GOTO(out, rc);
+ md->mds_capa = oc;
+ }
+
+ if (md->body->valid & OBD_MD_FLOSSCAPA) {
+ struct obd_capa *oc = NULL;
+
+ rc = mdc_unpack_capa(NULL, req, &RMF_CAPA2, &oc);
+ if (rc)
+ GOTO(out, rc);
+ md->oss_capa = oc;
+ }
+
+ EXIT;
+out:
+ if (rc) {
+ if (md->oss_capa) {
+ capa_put(md->oss_capa);
+ md->oss_capa = NULL;
+ }
+ if (md->mds_capa) {
+ capa_put(md->mds_capa);
+ md->mds_capa = NULL;
+ }
+#ifdef CONFIG_FS_POSIX_ACL
+ posix_acl_release(md->posix_acl);
+#endif
+ if (md->lsm)
+ obd_free_memmd(dt_exp, &md->lsm);
+ }
+ return rc;
+}
+
+int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
+{
+ ENTRY;
+ RETURN(0);
+}
+
+/**
+ * Handles both OPEN and SETATTR RPCs for OPEN-CLOSE and SETATTR-DONE_WRITING
+ * RPC chains.
+ */
+void mdc_replay_open(struct ptlrpc_request *req)
+{
+ struct md_open_data *mod = req->rq_cb_data;
+ struct ptlrpc_request *close_req;
+ struct obd_client_handle *och;
+ struct lustre_handle old;
+ struct mdt_body *body;
+ ENTRY;
+
+ if (mod == NULL) {
+ DEBUG_REQ(D_ERROR, req,
+ "Can't properly replay without open data.");
+ EXIT;
+ return;
+ }
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ LASSERT(body != NULL);
+
+ och = mod->mod_och;
+ if (och != NULL) {
+ struct lustre_handle *file_fh;
+
+ LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC);
+
+ file_fh = &och->och_fh;
+ CDEBUG(D_HA, "updating handle from "LPX64" to "LPX64"\n",
+ file_fh->cookie, body->handle.cookie);
+ old = *file_fh;
+ *file_fh = body->handle;
+ }
+ close_req = mod->mod_close_req;
+ if (close_req != NULL) {
+ __u32 opc = lustre_msg_get_opc(close_req->rq_reqmsg);
+ struct mdt_ioepoch *epoch;
+
+ LASSERT(opc == MDS_CLOSE || opc == MDS_DONE_WRITING);
+ epoch = req_capsule_client_get(&close_req->rq_pill,
+ &RMF_MDT_EPOCH);
+ LASSERT(epoch);
+
+ if (och != NULL)
+ LASSERT(!memcmp(&old, &epoch->handle, sizeof(old)));
+ DEBUG_REQ(D_HA, close_req, "updating close body with new fh");
+ epoch->handle = body->handle;
+ }
+ EXIT;
+}
+
+void mdc_commit_open(struct ptlrpc_request *req)
+{
+ struct md_open_data *mod = req->rq_cb_data;
+ if (mod == NULL)
+ return;
+
+ /**
+ * No need to touch md_open_data::mod_och, it holds a reference on
+ * \var mod and will zero references to each other, \var mod will be
+ * freed after that when md_open_data::mod_och will put the reference.
+ */
+
+ /**
+ * Do not let open request to disappear as it still may be needed
+ * for close rpc to happen (it may happen on evict only, otherwise
+ * ptlrpc_request::rq_replay does not let mdc_commit_open() to be
+ * called), just mark this rpc as committed to distinguish these 2
+ * cases, see mdc_close() for details. The open request reference will
+ * be put along with freeing \var mod.
+ */
+ ptlrpc_request_addref(req);
+ spin_lock(&req->rq_lock);
+ req->rq_committed = 1;
+ spin_unlock(&req->rq_lock);
+ req->rq_cb_data = NULL;
+ obd_mod_put(mod);
+}
+
+int mdc_set_open_replay_data(struct obd_export *exp,
+ struct obd_client_handle *och,
+ struct ptlrpc_request *open_req)
+{
+ struct md_open_data *mod;
+ struct mdt_rec_create *rec;
+ struct mdt_body *body;
+ struct obd_import *imp = open_req->rq_import;
+ ENTRY;
+
+ if (!open_req->rq_replay)
+ RETURN(0);
+
+ rec = req_capsule_client_get(&open_req->rq_pill, &RMF_REC_REINT);
+ body = req_capsule_server_get(&open_req->rq_pill, &RMF_MDT_BODY);
+ LASSERT(rec != NULL);
+ /* Incoming message in my byte order (it's been swabbed). */
+ /* Outgoing messages always in my byte order. */
+ LASSERT(body != NULL);
+
+ /* Only if the import is replayable, we set replay_open data */
+ if (och && imp->imp_replayable) {
+ mod = obd_mod_alloc();
+ if (mod == NULL) {
+ DEBUG_REQ(D_ERROR, open_req,
+ "Can't allocate md_open_data");
+ RETURN(0);
+ }
+
+ /**
+ * Take a reference on \var mod, to be freed on mdc_close().
+ * It protects \var mod from being freed on eviction (commit
+ * callback is called despite rq_replay flag).
+ * Another reference for \var och.
+ */
+ obd_mod_get(mod);
+ obd_mod_get(mod);
+
+ spin_lock(&open_req->rq_lock);
+ och->och_mod = mod;
+ mod->mod_och = och;
+ mod->mod_open_req = open_req;
+ open_req->rq_cb_data = mod;
+ open_req->rq_commit_cb = mdc_commit_open;
+ spin_unlock(&open_req->rq_lock);
+ }
+
+ rec->cr_fid2 = body->fid1;
+ rec->cr_ioepoch = body->ioepoch;
+ rec->cr_old_handle.cookie = body->handle.cookie;
+ open_req->rq_replay_cb = mdc_replay_open;
+ if (!fid_is_sane(&body->fid1)) {
+ DEBUG_REQ(D_ERROR, open_req, "Saving replay request with "
+ "insane fid");
+ LBUG();
+ }
+
+ DEBUG_REQ(D_RPCTRACE, open_req, "Set up open replay data");
+ RETURN(0);
+}
+
+int mdc_clear_open_replay_data(struct obd_export *exp,
+ struct obd_client_handle *och)
+{
+ struct md_open_data *mod = och->och_mod;
+ ENTRY;
+
+ /**
+ * It is possible to not have \var mod in a case of eviction between
+ * lookup and ll_file_open().
+ **/
+ if (mod == NULL)
+ RETURN(0);
+
+ LASSERT(mod != LP_POISON);
+
+ mod->mod_och = NULL;
+ och->och_mod = NULL;
+ obd_mod_put(mod);
+
+ RETURN(0);
+}
+
+/* Prepares the request for the replay by the given reply */
+static void mdc_close_handle_reply(struct ptlrpc_request *req,
+ struct md_op_data *op_data, int rc) {
+ struct mdt_body *repbody;
+ struct mdt_ioepoch *epoch;
+
+ if (req && rc == -EAGAIN) {
+ repbody = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
+
+ epoch->flags |= MF_SOM_AU;
+ if (repbody->valid & OBD_MD_FLGETATTRLOCK)
+ op_data->op_flags |= MF_GETATTR_LOCK;
+ }
+}
+
+int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
+ struct md_open_data *mod, struct ptlrpc_request **request)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ *request = NULL;
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_CLOSE);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_CLOSE);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ /* To avoid a livelock (bug 7034), we need to send CLOSE RPCs to a
+ * portal whose threads are not taking any DLM locks and are therefore
+ * always progressing */
+ req->rq_request_portal = MDS_READPAGE_PORTAL;
+ ptlrpc_at_set_req_timeout(req);
+
+ /* Ensure that this close's handle is fixed up during replay. */
+ if (likely(mod != NULL)) {
+ LASSERTF(mod->mod_open_req != NULL &&
+ mod->mod_open_req->rq_type != LI_POISON,
+ "POISONED open %p!\n", mod->mod_open_req);
+
+ mod->mod_close_req = req;
+
+ DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
+ /* We no longer want to preserve this open for replay even
+ * though the open was committed. b=3632, b=3633 */
+ spin_lock(&mod->mod_open_req->rq_lock);
+ mod->mod_open_req->rq_replay = 0;
+ spin_unlock(&mod->mod_open_req->rq_lock);
+ } else {
+ CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
+ }
+
+ mdc_close_pack(req, op_data);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
+ obd->u.cli.cl_max_mds_easize);
+ req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER,
+ obd->u.cli.cl_max_mds_cookiesize);
+
+ ptlrpc_request_set_replen(req);
+
+ mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL);
+ rc = ptlrpc_queue_wait(req);
+ mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL);
+
+ if (req->rq_repmsg == NULL) {
+ CDEBUG(D_RPCTRACE, "request failed to send: %p, %d\n", req,
+ req->rq_status);
+ if (rc == 0)
+ rc = req->rq_status ?: -EIO;
+ } else if (rc == 0 || rc == -EAGAIN) {
+ struct mdt_body *body;
+
+ rc = lustre_msg_get_status(req->rq_repmsg);
+ if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR) {
+ DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR, err "
+ "= %d", rc);
+ if (rc > 0)
+ rc = -rc;
+ }
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ if (body == NULL)
+ rc = -EPROTO;
+ } else if (rc == -ESTALE) {
+ /**
+ * it can be allowed error after 3633 if open was committed and
+ * server failed before close was sent. Let's check if mod
+ * exists and return no error in that case
+ */
+ if (mod) {
+ DEBUG_REQ(D_HA, req, "Reset ESTALE = %d", rc);
+ LASSERT(mod->mod_open_req != NULL);
+ if (mod->mod_open_req->rq_committed)
+ rc = 0;
+ }
+ }
+
+ if (mod) {
+ if (rc != 0)
+ mod->mod_close_req = NULL;
+ /* Since now, mod is accessed through open_req only,
+ * thus close req does not keep a reference on mod anymore. */
+ obd_mod_put(mod);
+ }
+ *request = req;
+ mdc_close_handle_reply(req, op_data, rc);
+ RETURN(rc);
+}
+
+int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data,
+ struct md_open_data *mod)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_MDS_DONE_WRITING);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_DONE_WRITING);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ if (mod != NULL) {
+ LASSERTF(mod->mod_open_req != NULL &&
+ mod->mod_open_req->rq_type != LI_POISON,
+ "POISONED setattr %p!\n", mod->mod_open_req);
+
+ mod->mod_close_req = req;
+ DEBUG_REQ(D_HA, mod->mod_open_req, "matched setattr");
+ /* We no longer want to preserve this setattr for replay even
+ * though the open was committed. b=3632, b=3633 */
+ spin_lock(&mod->mod_open_req->rq_lock);
+ mod->mod_open_req->rq_replay = 0;
+ spin_unlock(&mod->mod_open_req->rq_lock);
+ }
+
+ mdc_close_pack(req, op_data);
+ ptlrpc_request_set_replen(req);
+
+ mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL);
+ rc = ptlrpc_queue_wait(req);
+ mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL);
+
+ if (rc == -ESTALE) {
+ /**
+ * it can be allowed error after 3633 if open or setattr were
+ * committed and server failed before close was sent.
+ * Let's check if mod exists and return no error in that case
+ */
+ if (mod) {
+ LASSERT(mod->mod_open_req != NULL);
+ if (mod->mod_open_req->rq_committed)
+ rc = 0;
+ }
+ }
+
+ if (mod) {
+ if (rc != 0)
+ mod->mod_close_req = NULL;
+ /* Since now, mod is accessed through setattr req only,
+ * thus DW req does not keep a reference on mod anymore. */
+ obd_mod_put(mod);
+ }
+
+ mdc_close_handle_reply(req, op_data, rc);
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+}
+
+
+int mdc_readpage(struct obd_export *exp, struct md_op_data *op_data,
+ struct page **pages, struct ptlrpc_request **request)
+{
+ struct ptlrpc_request *req;
+ struct ptlrpc_bulk_desc *desc;
+ int i;
+ wait_queue_head_t waitq;
+ int resends = 0;
+ struct l_wait_info lwi;
+ int rc;
+ ENTRY;
+
+ *request = NULL;
+ init_waitqueue_head(&waitq);
+
+restart_bulk:
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_READPAGE);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_READPAGE);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ req->rq_request_portal = MDS_READPAGE_PORTAL;
+ ptlrpc_at_set_req_timeout(req);
+
+ desc = ptlrpc_prep_bulk_imp(req, op_data->op_npages, 1, BULK_PUT_SINK,
+ MDS_BULK_PORTAL);
+ if (desc == NULL) {
+ ptlrpc_request_free(req);
+ RETURN(-ENOMEM);
+ }
+
+ /* NB req now owns desc and will free it when it gets freed */
+ for (i = 0; i < op_data->op_npages; i++)
+ ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_CACHE_SIZE);
+
+ mdc_readdir_pack(req, op_data->op_offset,
+ PAGE_CACHE_SIZE * op_data->op_npages,
+ &op_data->op_fid1, op_data->op_capa1);
+
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+ if (rc) {
+ ptlrpc_req_finished(req);
+ if (rc != -ETIMEDOUT)
+ RETURN(rc);
+
+ resends++;
+ if (!client_should_resend(resends, &exp->exp_obd->u.cli)) {
+ CERROR("too many resend retries, returning error\n");
+ RETURN(-EIO);
+ }
+ lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, NULL);
+ l_wait_event(waitq, 0, &lwi);
+
+ goto restart_bulk;
+ }
+
+ rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk,
+ req->rq_bulk->bd_nob_transferred);
+ if (rc < 0) {
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+ }
+
+ if (req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK) {
+ CERROR("Unexpected # bytes transferred: %d (%ld expected)\n",
+ req->rq_bulk->bd_nob_transferred,
+ PAGE_CACHE_SIZE * op_data->op_npages);
+ ptlrpc_req_finished(req);
+ RETURN(-EPROTO);
+ }
+
+ *request = req;
+ RETURN(0);
+}
+
+static int mdc_statfs(const struct lu_env *env,
+ struct obd_export *exp, struct obd_statfs *osfs,
+ __u64 max_age, __u32 flags)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct ptlrpc_request *req;
+ struct obd_statfs *msfs;
+ struct obd_import *imp = NULL;
+ int rc;
+ ENTRY;
+
+ /*
+ * Since the request might also come from lprocfs, so we need
+ * sync this with client_disconnect_export Bug15684
+ */
+ down_read(&obd->u.cli.cl_sem);
+ if (obd->u.cli.cl_import)
+ imp = class_import_get(obd->u.cli.cl_import);
+ up_read(&obd->u.cli.cl_sem);
+ if (!imp)
+ RETURN(-ENODEV);
+
+ req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_STATFS,
+ LUSTRE_MDS_VERSION, MDS_STATFS);
+ if (req == NULL)
+ GOTO(output, rc = -ENOMEM);
+
+ ptlrpc_request_set_replen(req);
+
+ if (flags & OBD_STATFS_NODELAY) {
+ /* procfs requests not want stay in wait for avoid deadlock */
+ req->rq_no_resend = 1;
+ req->rq_no_delay = 1;
+ }
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc) {
+ /* check connection error first */
+ if (imp->imp_connect_error)
+ rc = imp->imp_connect_error;
+ GOTO(out, rc);
+ }
+
+ msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS);
+ if (msfs == NULL)
+ GOTO(out, rc = -EPROTO);
+
+ *osfs = *msfs;
+ EXIT;
+out:
+ ptlrpc_req_finished(req);
+output:
+ class_import_put(imp);
+ return rc;
+}
+
+static int mdc_ioc_fid2path(struct obd_export *exp, struct getinfo_fid2path *gf)
+{
+ __u32 keylen, vallen;
+ void *key;
+ int rc;
+
+ if (gf->gf_pathlen > PATH_MAX)
+ RETURN(-ENAMETOOLONG);
+ if (gf->gf_pathlen < 2)
+ RETURN(-EOVERFLOW);
+
+ /* Key is KEY_FID2PATH + getinfo_fid2path description */
+ keylen = cfs_size_round(sizeof(KEY_FID2PATH)) + sizeof(*gf);
+ OBD_ALLOC(key, keylen);
+ if (key == NULL)
+ RETURN(-ENOMEM);
+ memcpy(key, KEY_FID2PATH, sizeof(KEY_FID2PATH));
+ memcpy(key + cfs_size_round(sizeof(KEY_FID2PATH)), gf, sizeof(*gf));
+
+ CDEBUG(D_IOCTL, "path get "DFID" from "LPU64" #%d\n",
+ PFID(&gf->gf_fid), gf->gf_recno, gf->gf_linkno);
+
+ if (!fid_is_sane(&gf->gf_fid))
+ GOTO(out, rc = -EINVAL);
+
+ /* Val is struct getinfo_fid2path result plus path */
+ vallen = sizeof(*gf) + gf->gf_pathlen;
+
+ rc = obd_get_info(NULL, exp, keylen, key, &vallen, gf, NULL);
+ if (rc != 0 && rc != -EREMOTE)
+ GOTO(out, rc);
+
+ if (vallen <= sizeof(*gf))
+ GOTO(out, rc = -EPROTO);
+ else if (vallen > sizeof(*gf) + gf->gf_pathlen)
+ GOTO(out, rc = -EOVERFLOW);
+
+ CDEBUG(D_IOCTL, "path get "DFID" from "LPU64" #%d\n%s\n",
+ PFID(&gf->gf_fid), gf->gf_recno, gf->gf_linkno, gf->gf_path);
+
+out:
+ OBD_FREE(key, keylen);
+ return rc;
+}
+
+static int mdc_ioc_hsm_progress(struct obd_export *exp,
+ struct hsm_progress_kernel *hpk)
+{
+ struct obd_import *imp = class_exp2cliimp(exp);
+ struct hsm_progress_kernel *req_hpk;
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_HSM_PROGRESS,
+ LUSTRE_MDS_VERSION, MDS_HSM_PROGRESS);
+ if (req == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ mdc_pack_body(req, NULL, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+
+ /* Copy hsm_progress struct */
+ req_hpk = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_PROGRESS);
+ if (req_hpk == NULL)
+ GOTO(out, rc = -EPROTO);
+
+ *req_hpk = *hpk;
+
+ ptlrpc_request_set_replen(req);
+
+ rc = mdc_queue_wait(req);
+ GOTO(out, rc);
+out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+static int mdc_ioc_hsm_ct_register(struct obd_import *imp, __u32 archives)
+{
+ __u32 *archive_mask;
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_HSM_CT_REGISTER,
+ LUSTRE_MDS_VERSION,
+ MDS_HSM_CT_REGISTER);
+ if (req == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ mdc_pack_body(req, NULL, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+
+ /* Copy hsm_progress struct */
+ archive_mask = req_capsule_client_get(&req->rq_pill,
+ &RMF_MDS_HSM_ARCHIVE);
+ if (archive_mask == NULL)
+ GOTO(out, rc = -EPROTO);
+
+ *archive_mask = archives;
+
+ ptlrpc_request_set_replen(req);
+
+ rc = mdc_queue_wait(req);
+ GOTO(out, rc);
+out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+static int mdc_ioc_hsm_current_action(struct obd_export *exp,
+ struct md_op_data *op_data)
+{
+ struct hsm_current_action *hca = op_data->op_data;
+ struct hsm_current_action *req_hca;
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_MDS_HSM_ACTION);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_ACTION);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ mdc_pack_body(req, &op_data->op_fid1, op_data->op_capa1,
+ OBD_MD_FLRMTPERM, 0, op_data->op_suppgids[0], 0);
+
+ ptlrpc_request_set_replen(req);
+
+ rc = mdc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+
+ req_hca = req_capsule_server_get(&req->rq_pill,
+ &RMF_MDS_HSM_CURRENT_ACTION);
+ if (req_hca == NULL)
+ GOTO(out, rc = -EPROTO);
+
+ *hca = *req_hca;
+
+ EXIT;
+out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+static int mdc_ioc_hsm_ct_unregister(struct obd_import *imp)
+{
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_HSM_CT_UNREGISTER,
+ LUSTRE_MDS_VERSION,
+ MDS_HSM_CT_UNREGISTER);
+ if (req == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ mdc_pack_body(req, NULL, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+
+ ptlrpc_request_set_replen(req);
+
+ rc = mdc_queue_wait(req);
+ GOTO(out, rc);
+out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+static int mdc_ioc_hsm_state_get(struct obd_export *exp,
+ struct md_op_data *op_data)
+{
+ struct hsm_user_state *hus = op_data->op_data;
+ struct hsm_user_state *req_hus;
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_MDS_HSM_STATE_GET);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_STATE_GET);
+ if (rc != 0) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ mdc_pack_body(req, &op_data->op_fid1, op_data->op_capa1,
+ OBD_MD_FLRMTPERM, 0, op_data->op_suppgids[0], 0);
+
+ ptlrpc_request_set_replen(req);
+
+ rc = mdc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+
+ req_hus = req_capsule_server_get(&req->rq_pill, &RMF_HSM_USER_STATE);
+ if (req_hus == NULL)
+ GOTO(out, rc = -EPROTO);
+
+ *hus = *req_hus;
+
+ EXIT;
+out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+static int mdc_ioc_hsm_state_set(struct obd_export *exp,
+ struct md_op_data *op_data)
+{
+ struct hsm_state_set *hss = op_data->op_data;
+ struct hsm_state_set *req_hss;
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_MDS_HSM_STATE_SET);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_STATE_SET);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ mdc_pack_body(req, &op_data->op_fid1, op_data->op_capa1,
+ OBD_MD_FLRMTPERM, 0, op_data->op_suppgids[0], 0);
+
+ /* Copy states */
+ req_hss = req_capsule_client_get(&req->rq_pill, &RMF_HSM_STATE_SET);
+ if (req_hss == NULL)
+ GOTO(out, rc = -EPROTO);
+ *req_hss = *hss;
+
+ ptlrpc_request_set_replen(req);
+
+ rc = mdc_queue_wait(req);
+ GOTO(out, rc);
+
+ EXIT;
+out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+static int mdc_ioc_hsm_request(struct obd_export *exp,
+ struct hsm_user_request *hur)
+{
+ struct obd_import *imp = class_exp2cliimp(exp);
+ struct ptlrpc_request *req;
+ struct hsm_request *req_hr;
+ struct hsm_user_item *req_hui;
+ char *req_opaque;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(imp, &RQF_MDS_HSM_REQUEST);
+ if (req == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_MDS_HSM_USER_ITEM, RCL_CLIENT,
+ hur->hur_request.hr_itemcount
+ * sizeof(struct hsm_user_item));
+ req_capsule_set_size(&req->rq_pill, &RMF_GENERIC_DATA, RCL_CLIENT,
+ hur->hur_request.hr_data_len);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_REQUEST);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ mdc_pack_body(req, NULL, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+
+ /* Copy hsm_request struct */
+ req_hr = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_REQUEST);
+ if (req_hr == NULL)
+ GOTO(out, rc = -EPROTO);
+ *req_hr = hur->hur_request;
+
+ /* Copy hsm_user_item structs */
+ req_hui = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_USER_ITEM);
+ if (req_hui == NULL)
+ GOTO(out, rc = -EPROTO);
+ memcpy(req_hui, hur->hur_user_item,
+ hur->hur_request.hr_itemcount * sizeof(struct hsm_user_item));
+
+ /* Copy opaque field */
+ req_opaque = req_capsule_client_get(&req->rq_pill, &RMF_GENERIC_DATA);
+ if (req_opaque == NULL)
+ GOTO(out, rc = -EPROTO);
+ memcpy(req_opaque, hur_data(hur), hur->hur_request.hr_data_len);
+
+ ptlrpc_request_set_replen(req);
+
+ rc = mdc_queue_wait(req);
+ GOTO(out, rc);
+
+out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+static struct kuc_hdr *changelog_kuc_hdr(char *buf, int len, int flags)
+{
+ struct kuc_hdr *lh = (struct kuc_hdr *)buf;
+
+ LASSERT(len <= CR_MAXSIZE);
+
+ lh->kuc_magic = KUC_MAGIC;
+ lh->kuc_transport = KUC_TRANSPORT_CHANGELOG;
+ lh->kuc_flags = flags;
+ lh->kuc_msgtype = CL_RECORD;
+ lh->kuc_msglen = len;
+ return lh;
+}
+
+#define D_CHANGELOG 0
+
+struct changelog_show {
+ __u64 cs_startrec;
+ __u32 cs_flags;
+ struct file *cs_fp;
+ char *cs_buf;
+ struct obd_device *cs_obd;
+};
+
+static int changelog_kkuc_cb(const struct lu_env *env, struct llog_handle *llh,
+ struct llog_rec_hdr *hdr, void *data)
+{
+ struct changelog_show *cs = data;
+ struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr;
+ struct kuc_hdr *lh;
+ int len, rc;
+ ENTRY;
+
+ if (rec->cr_hdr.lrh_type != CHANGELOG_REC) {
+ rc = -EINVAL;
+ CERROR("%s: not a changelog rec %x/%d: rc = %d\n",
+ cs->cs_obd->obd_name, rec->cr_hdr.lrh_type,
+ rec->cr.cr_type, rc);
+ RETURN(rc);
+ }
+
+ if (rec->cr.cr_index < cs->cs_startrec) {
+ /* Skip entries earlier than what we are interested in */
+ CDEBUG(D_CHANGELOG, "rec="LPU64" start="LPU64"\n",
+ rec->cr.cr_index, cs->cs_startrec);
+ RETURN(0);
+ }
+
+ CDEBUG(D_CHANGELOG, LPU64" %02d%-5s "LPU64" 0x%x t="DFID" p="DFID
+ " %.*s\n", rec->cr.cr_index, rec->cr.cr_type,
+ changelog_type2str(rec->cr.cr_type), rec->cr.cr_time,
+ rec->cr.cr_flags & CLF_FLAGMASK,
+ PFID(&rec->cr.cr_tfid), PFID(&rec->cr.cr_pfid),
+ rec->cr.cr_namelen, changelog_rec_name(&rec->cr));
+
+ len = sizeof(*lh) + changelog_rec_size(&rec->cr) + rec->cr.cr_namelen;
+
+ /* Set up the message */
+ lh = changelog_kuc_hdr(cs->cs_buf, len, cs->cs_flags);
+ memcpy(lh + 1, &rec->cr, len - sizeof(*lh));
+
+ rc = libcfs_kkuc_msg_put(cs->cs_fp, lh);
+ CDEBUG(D_CHANGELOG, "kucmsg fp %p len %d rc %d\n", cs->cs_fp, len,rc);
+
+ RETURN(rc);
+}
+
+static int mdc_changelog_send_thread(void *csdata)
+{
+ struct changelog_show *cs = csdata;
+ struct llog_ctxt *ctxt = NULL;
+ struct llog_handle *llh = NULL;
+ struct kuc_hdr *kuch;
+ int rc;
+
+ CDEBUG(D_CHANGELOG, "changelog to fp=%p start "LPU64"\n",
+ cs->cs_fp, cs->cs_startrec);
+
+ OBD_ALLOC(cs->cs_buf, CR_MAXSIZE);
+ if (cs->cs_buf == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ /* Set up the remote catalog handle */
+ ctxt = llog_get_context(cs->cs_obd, LLOG_CHANGELOG_REPL_CTXT);
+ if (ctxt == NULL)
+ GOTO(out, rc = -ENOENT);
+ rc = llog_open(NULL, ctxt, &llh, NULL, CHANGELOG_CATALOG,
+ LLOG_OPEN_EXISTS);
+ if (rc) {
+ CERROR("%s: fail to open changelog catalog: rc = %d\n",
+ cs->cs_obd->obd_name, rc);
+ GOTO(out, rc);
+ }
+ rc = llog_init_handle(NULL, llh, LLOG_F_IS_CAT, NULL);
+ if (rc) {
+ CERROR("llog_init_handle failed %d\n", rc);
+ GOTO(out, rc);
+ }
+
+ rc = llog_cat_process(NULL, llh, changelog_kkuc_cb, cs, 0, 0);
+
+ /* Send EOF no matter what our result */
+ if ((kuch = changelog_kuc_hdr(cs->cs_buf, sizeof(*kuch),
+ cs->cs_flags))) {
+ kuch->kuc_msgtype = CL_EOF;
+ libcfs_kkuc_msg_put(cs->cs_fp, kuch);
+ }
+
+out:
+ fput(cs->cs_fp);
+ if (llh)
+ llog_cat_close(NULL, llh);
+ if (ctxt)
+ llog_ctxt_put(ctxt);
+ if (cs->cs_buf)
+ OBD_FREE(cs->cs_buf, CR_MAXSIZE);
+ OBD_FREE_PTR(cs);
+ return rc;
+}
+
+static int mdc_ioc_changelog_send(struct obd_device *obd,
+ struct ioc_changelog *icc)
+{
+ struct changelog_show *cs;
+ int rc;
+
+ /* Freed in mdc_changelog_send_thread */
+ OBD_ALLOC_PTR(cs);
+ if (!cs)
+ return -ENOMEM;
+
+ cs->cs_obd = obd;
+ cs->cs_startrec = icc->icc_recno;
+ /* matching fput in mdc_changelog_send_thread */
+ cs->cs_fp = fget(icc->icc_id);
+ cs->cs_flags = icc->icc_flags;
+
+ /*
+ * New thread because we should return to user app before
+ * writing into our pipe
+ */
+ rc = PTR_ERR(kthread_run(mdc_changelog_send_thread, cs,
+ "mdc_clg_send_thread"));
+ if (!IS_ERR_VALUE(rc)) {
+ CDEBUG(D_CHANGELOG, "start changelog thread\n");
+ return 0;
+ }
+
+ CERROR("Failed to start changelog thread: %d\n", rc);
+ OBD_FREE_PTR(cs);
+ return rc;
+}
+
+static int mdc_ioc_hsm_ct_start(struct obd_export *exp,
+ struct lustre_kernelcomm *lk);
+
+static int mdc_quotacheck(struct obd_device *unused, struct obd_export *exp,
+ struct obd_quotactl *oqctl)
+{
+ struct client_obd *cli = &exp->exp_obd->u.cli;
+ struct ptlrpc_request *req;
+ struct obd_quotactl *body;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
+ &RQF_MDS_QUOTACHECK, LUSTRE_MDS_VERSION,
+ MDS_QUOTACHECK);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
+ *body = *oqctl;
+
+ ptlrpc_request_set_replen(req);
+
+ /* the next poll will find -ENODATA, that means quotacheck is
+ * going on */
+ cli->cl_qchk_stat = -ENODATA;
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ cli->cl_qchk_stat = rc;
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+}
+
+static int mdc_quota_poll_check(struct obd_export *exp,
+ struct if_quotacheck *qchk)
+{
+ struct client_obd *cli = &exp->exp_obd->u.cli;
+ int rc;
+ ENTRY;
+
+ qchk->obd_uuid = cli->cl_target_uuid;
+ memcpy(qchk->obd_type, LUSTRE_MDS_NAME, strlen(LUSTRE_MDS_NAME));
+
+ rc = cli->cl_qchk_stat;
+ /* the client is not the previous one */
+ if (rc == CL_NOT_QUOTACHECKED)
+ rc = -EINTR;
+ RETURN(rc);
+}
+
+static int mdc_quotactl(struct obd_device *unused, struct obd_export *exp,
+ struct obd_quotactl *oqctl)
+{
+ struct ptlrpc_request *req;
+ struct obd_quotactl *oqc;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
+ &RQF_MDS_QUOTACTL, LUSTRE_MDS_VERSION,
+ MDS_QUOTACTL);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ oqc = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
+ *oqc = *oqctl;
+
+ ptlrpc_request_set_replen(req);
+ ptlrpc_at_set_req_timeout(req);
+ req->rq_no_resend = 1;
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ CERROR("ptlrpc_queue_wait failed, rc: %d\n", rc);
+
+ if (req->rq_repmsg &&
+ (oqc = req_capsule_server_get(&req->rq_pill, &RMF_OBD_QUOTACTL))) {
+ *oqctl = *oqc;
+ } else if (!rc) {
+ CERROR ("Can't unpack obd_quotactl\n");
+ rc = -EPROTO;
+ }
+ ptlrpc_req_finished(req);
+
+ RETURN(rc);
+}
+
+static int mdc_ioc_swap_layouts(struct obd_export *exp,
+ struct md_op_data *op_data)
+{
+ LIST_HEAD(cancels);
+ struct ptlrpc_request *req;
+ int rc, count;
+ struct mdc_swap_layouts *msl, *payload;
+ ENTRY;
+
+ msl = op_data->op_data;
+
+ /* When the MDT will get the MDS_SWAP_LAYOUTS RPC the
+ * first thing it will do is to cancel the 2 layout
+ * locks hold by this client.
+ * So the client must cancel its layout locks on the 2 fids
+ * with the request RPC to avoid extra RPC round trips
+ */
+ count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels,
+ LCK_CR, MDS_INODELOCK_LAYOUT);
+ count += mdc_resource_get_unused(exp, &op_data->op_fid2, &cancels,
+ LCK_CR, MDS_INODELOCK_LAYOUT);
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_MDS_SWAP_LAYOUTS);
+ if (req == NULL) {
+ ldlm_lock_list_put(&cancels, l_bl_ast, count);
+ RETURN(-ENOMEM);
+ }
+
+ mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+ mdc_set_capa_size(req, &RMF_CAPA2, op_data->op_capa2);
+
+ rc = mdc_prep_elc_req(exp, req, MDS_SWAP_LAYOUTS, &cancels, count);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ mdc_swap_layouts_pack(req, op_data);
+
+ payload = req_capsule_client_get(&req->rq_pill, &RMF_SWAP_LAYOUTS);
+ LASSERT(payload);
+
+ *payload = *msl;
+
+ ptlrpc_request_set_replen(req);
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+ EXIT;
+
+out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
+ void *karg, void *uarg)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct obd_ioctl_data *data = karg;
+ struct obd_import *imp = obd->u.cli.cl_import;
+ struct llog_ctxt *ctxt;
+ int rc;
+ ENTRY;
+
+ if (!try_module_get(THIS_MODULE)) {
+ CERROR("Can't get module. Is it alive?");
+ return -EINVAL;
+ }
+ switch (cmd) {
+ case OBD_IOC_CHANGELOG_SEND:
+ rc = mdc_ioc_changelog_send(obd, karg);
+ GOTO(out, rc);
+ case OBD_IOC_CHANGELOG_CLEAR: {
+ struct ioc_changelog *icc = karg;
+ struct changelog_setinfo cs =
+ {.cs_recno = icc->icc_recno, .cs_id = icc->icc_id};
+ rc = obd_set_info_async(NULL, exp, strlen(KEY_CHANGELOG_CLEAR),
+ KEY_CHANGELOG_CLEAR, sizeof(cs), &cs,
+ NULL);
+ GOTO(out, rc);
+ }
+ case OBD_IOC_FID2PATH:
+ rc = mdc_ioc_fid2path(exp, karg);
+ GOTO(out, rc);
+ case LL_IOC_HSM_CT_START:
+ rc = mdc_ioc_hsm_ct_start(exp, karg);
+ GOTO(out, rc);
+ case LL_IOC_HSM_PROGRESS:
+ rc = mdc_ioc_hsm_progress(exp, karg);
+ GOTO(out, rc);
+ case LL_IOC_HSM_STATE_GET:
+ rc = mdc_ioc_hsm_state_get(exp, karg);
+ GOTO(out, rc);
+ case LL_IOC_HSM_STATE_SET:
+ rc = mdc_ioc_hsm_state_set(exp, karg);
+ case LL_IOC_HSM_ACTION:
+ rc = mdc_ioc_hsm_current_action(exp, karg);
+ GOTO(out, rc);
+ case LL_IOC_HSM_REQUEST:
+ rc = mdc_ioc_hsm_request(exp, karg);
+ GOTO(out, rc);
+ case OBD_IOC_CLIENT_RECOVER:
+ rc = ptlrpc_recover_import(imp, data->ioc_inlbuf1, 0);
+ if (rc < 0)
+ GOTO(out, rc);
+ GOTO(out, rc = 0);
+ case IOC_OSC_SET_ACTIVE:
+ rc = ptlrpc_set_import_active(imp, data->ioc_offset);
+ GOTO(out, rc);
+ case OBD_IOC_PARSE: {
+ ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
+ rc = class_config_parse_llog(NULL, ctxt, data->ioc_inlbuf1,
+ NULL);
+ llog_ctxt_put(ctxt);
+ GOTO(out, rc);
+ }
+ case OBD_IOC_LLOG_INFO:
+ case OBD_IOC_LLOG_PRINT: {
+ ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
+ rc = llog_ioctl(NULL, ctxt, cmd, data);
+ llog_ctxt_put(ctxt);
+ GOTO(out, rc);
+ }
+ case OBD_IOC_POLL_QUOTACHECK:
+ rc = mdc_quota_poll_check(exp, (struct if_quotacheck *)karg);
+ GOTO(out, rc);
+ case OBD_IOC_PING_TARGET:
+ rc = ptlrpc_obd_ping(obd);
+ GOTO(out, rc);
+ /*
+ * Normally IOC_OBD_STATFS, OBD_IOC_QUOTACTL iocontrol are handled by
+ * LMV instead of MDC. But when the cluster is upgraded from 1.8,
+ * there'd be no LMV layer thus we might be called here. Eventually
+ * this code should be removed.
+ * bz20731, LU-592.
+ */
+ case IOC_OBD_STATFS: {
+ struct obd_statfs stat_buf = {0};
+
+ if (*((__u32 *) data->ioc_inlbuf2) != 0)
+ GOTO(out, rc = -ENODEV);
+
+ /* copy UUID */
+ if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(obd),
+ min((int) data->ioc_plen2,
+ (int) sizeof(struct obd_uuid))))
+ GOTO(out, rc = -EFAULT);
+
+ rc = mdc_statfs(NULL, obd->obd_self_export, &stat_buf,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ 0);
+ if (rc != 0)
+ GOTO(out, rc);
+
+ if (copy_to_user(data->ioc_pbuf1, &stat_buf,
+ min((int) data->ioc_plen1,
+ (int) sizeof(stat_buf))))
+ GOTO(out, rc = -EFAULT);
+
+ GOTO(out, rc = 0);
+ }
+ case OBD_IOC_QUOTACTL: {
+ struct if_quotactl *qctl = karg;
+ struct obd_quotactl *oqctl;
+
+ OBD_ALLOC_PTR(oqctl);
+ if (!oqctl)
+ RETURN(-ENOMEM);
+
+ QCTL_COPY(oqctl, qctl);
+ rc = obd_quotactl(exp, oqctl);
+ if (rc == 0) {
+ QCTL_COPY(qctl, oqctl);
+ qctl->qc_valid = QC_MDTIDX;
+ qctl->obd_uuid = obd->u.cli.cl_target_uuid;
+ }
+ OBD_FREE_PTR(oqctl);
+ break;
+ }
+ case LL_IOC_GET_CONNECT_FLAGS: {
+ if (copy_to_user(uarg,
+ exp_connect_flags_ptr(exp),
+ sizeof(__u64)))
+ GOTO(out, rc = -EFAULT);
+ else
+ GOTO(out, rc = 0);
+ }
+ case LL_IOC_LOV_SWAP_LAYOUTS: {
+ rc = mdc_ioc_swap_layouts(exp, karg);
+ break;
+ }
+ default:
+ CERROR("mdc_ioctl(): unrecognised ioctl %#x\n", cmd);
+ GOTO(out, rc = -ENOTTY);
+ }
+out:
+ module_put(THIS_MODULE);
+
+ return rc;
+}
+
+int mdc_get_info_rpc(struct obd_export *exp,
+ obd_count keylen, void *key,
+ int vallen, void *val)
+{
+ struct obd_import *imp = class_exp2cliimp(exp);
+ struct ptlrpc_request *req;
+ char *tmp;
+ int rc = -EINVAL;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(imp, &RQF_MDS_GET_INFO);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_KEY,
+ RCL_CLIENT, keylen);
+ req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_VALLEN,
+ RCL_CLIENT, sizeof(__u32));
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GET_INFO);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_GETINFO_KEY);
+ memcpy(tmp, key, keylen);
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_GETINFO_VALLEN);
+ memcpy(tmp, &vallen, sizeof(__u32));
+
+ req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_VAL,
+ RCL_SERVER, vallen);
+ ptlrpc_request_set_replen(req);
+
+ rc = ptlrpc_queue_wait(req);
+ /* -EREMOTE means the get_info result is partial, and it needs to
+ * continue on another MDT, see fid2path part in lmv_iocontrol */
+ if (rc == 0 || rc == -EREMOTE) {
+ tmp = req_capsule_server_get(&req->rq_pill, &RMF_GETINFO_VAL);
+ memcpy(val, tmp, vallen);
+ if (ptlrpc_rep_need_swab(req)) {
+ if (KEY_IS(KEY_FID2PATH))
+ lustre_swab_fid2path(val);
+ }
+ }
+ ptlrpc_req_finished(req);
+
+ RETURN(rc);
+}
+
+static void lustre_swab_hai(struct hsm_action_item *h)
+{
+ __swab32s(&h->hai_len);
+ __swab32s(&h->hai_action);
+ lustre_swab_lu_fid(&h->hai_fid);
+ lustre_swab_lu_fid(&h->hai_dfid);
+ __swab64s(&h->hai_cookie);
+ __swab64s(&h->hai_extent.offset);
+ __swab64s(&h->hai_extent.length);
+ __swab64s(&h->hai_gid);
+}
+
+static void lustre_swab_hal(struct hsm_action_list *h)
+{
+ struct hsm_action_item *hai;
+ int i;
+
+ __swab32s(&h->hal_version);
+ __swab32s(&h->hal_count);
+ __swab32s(&h->hal_archive_id);
+ __swab64s(&h->hal_flags);
+ hai = hai_zero(h);
+ for (i = 0; i < h->hal_count; i++) {
+ lustre_swab_hai(hai);
+ hai = hai_next(hai);
+ }
+}
+
+static void lustre_swab_kuch(struct kuc_hdr *l)
+{
+ __swab16s(&l->kuc_magic);
+ /* __u8 l->kuc_transport */
+ __swab16s(&l->kuc_msgtype);
+ __swab16s(&l->kuc_msglen);
+}
+
+static int mdc_ioc_hsm_ct_start(struct obd_export *exp,
+ struct lustre_kernelcomm *lk)
+{
+ struct obd_import *imp = class_exp2cliimp(exp);
+ __u32 archive = lk->lk_data;
+ int rc = 0;
+
+ if (lk->lk_group != KUC_GRP_HSM) {
+ CERROR("Bad copytool group %d\n", lk->lk_group);
+ return -EINVAL;
+ }
+
+ CDEBUG(D_HSM, "CT start r%d w%d u%d g%d f%#x\n", lk->lk_rfd, lk->lk_wfd,
+ lk->lk_uid, lk->lk_group, lk->lk_flags);
+
+ if (lk->lk_flags & LK_FLG_STOP) {
+ rc = libcfs_kkuc_group_rem(lk->lk_uid, lk->lk_group);
+ /* Unregister with the coordinator */
+ if (rc == 0)
+ rc = mdc_ioc_hsm_ct_unregister(imp);
+ } else {
+ struct file *fp = fget(lk->lk_wfd);
+
+ rc = libcfs_kkuc_group_add(fp, lk->lk_uid, lk->lk_group,
+ lk->lk_data);
+ if (rc && fp)
+ fput(fp);
+ if (rc == 0)
+ rc = mdc_ioc_hsm_ct_register(imp, archive);
+ }
+
+ return rc;
+}
+
+/**
+ * Send a message to any listening copytools
+ * @param val KUC message (kuc_hdr + hsm_action_list)
+ * @param len total length of message
+ */
+static int mdc_hsm_copytool_send(int len, void *val)
+{
+ struct kuc_hdr *lh = (struct kuc_hdr *)val;
+ struct hsm_action_list *hal = (struct hsm_action_list *)(lh + 1);
+ int rc;
+ ENTRY;
+
+ if (len < sizeof(*lh) + sizeof(*hal)) {
+ CERROR("Short HSM message %d < %d\n", len,
+ (int) (sizeof(*lh) + sizeof(*hal)));
+ RETURN(-EPROTO);
+ }
+ if (lh->kuc_magic == __swab16(KUC_MAGIC)) {
+ lustre_swab_kuch(lh);
+ lustre_swab_hal(hal);
+ } else if (lh->kuc_magic != KUC_MAGIC) {
+ CERROR("Bad magic %x!=%x\n", lh->kuc_magic, KUC_MAGIC);
+ RETURN(-EPROTO);
+ }
+
+ CDEBUG(D_HSM, " Received message mg=%x t=%d m=%d l=%d actions=%d "
+ "on %s\n",
+ lh->kuc_magic, lh->kuc_transport, lh->kuc_msgtype,
+ lh->kuc_msglen, hal->hal_count, hal->hal_fsname);
+
+ /* Broadcast to HSM listeners */
+ rc = libcfs_kkuc_group_put(KUC_GRP_HSM, lh);
+
+ RETURN(rc);
+}
+
+/**
+ * callback function passed to kuc for re-registering each HSM copytool
+ * running on MDC, after MDT shutdown/recovery.
+ * @param data archive id served by the copytool
+ * @param cb_arg callback argument (obd_import)
+ */
+static int mdc_hsm_ct_reregister(__u32 data, void *cb_arg)
+{
+ struct obd_import *imp = (struct obd_import *)cb_arg;
+ __u32 archive = data;
+ int rc;
+
+ CDEBUG(D_HA, "recover copytool registration to MDT (archive=%#x)\n",
+ archive);
+ rc = mdc_ioc_hsm_ct_register(imp, archive);
+
+ /* ignore error if the copytool is already registered */
+ return ((rc != 0) && (rc != -EEXIST)) ? rc : 0;
+}
+
+/**
+ * Re-establish all kuc contexts with MDT
+ * after MDT shutdown/recovery.
+ */
+static int mdc_kuc_reregister(struct obd_import *imp)
+{
+ /* re-register HSM agents */
+ return libcfs_kkuc_group_foreach(KUC_GRP_HSM, mdc_hsm_ct_reregister,
+ (void *)imp);
+}
+
+int mdc_set_info_async(const struct lu_env *env,
+ struct obd_export *exp,
+ obd_count keylen, void *key,
+ obd_count vallen, void *val,
+ struct ptlrpc_request_set *set)
+{
+ struct obd_import *imp = class_exp2cliimp(exp);
+ int rc;
+ ENTRY;
+
+ if (KEY_IS(KEY_READ_ONLY)) {
+ if (vallen != sizeof(int))
+ RETURN(-EINVAL);
+
+ spin_lock(&imp->imp_lock);
+ if (*((int *)val)) {
+ imp->imp_connect_flags_orig |= OBD_CONNECT_RDONLY;
+ imp->imp_connect_data.ocd_connect_flags |=
+ OBD_CONNECT_RDONLY;
+ } else {
+ imp->imp_connect_flags_orig &= ~OBD_CONNECT_RDONLY;
+ imp->imp_connect_data.ocd_connect_flags &=
+ ~OBD_CONNECT_RDONLY;
+ }
+ spin_unlock(&imp->imp_lock);
+
+ rc = do_set_info_async(imp, MDS_SET_INFO, LUSTRE_MDS_VERSION,
+ keylen, key, vallen, val, set);
+ RETURN(rc);
+ }
+ if (KEY_IS(KEY_SPTLRPC_CONF)) {
+ sptlrpc_conf_client_adapt(exp->exp_obd);
+ RETURN(0);
+ }
+ if (KEY_IS(KEY_FLUSH_CTX)) {
+ sptlrpc_import_flush_my_ctx(imp);
+ RETURN(0);
+ }
+ if (KEY_IS(KEY_MDS_CONN)) {
+ /* mds-mds import */
+ spin_lock(&imp->imp_lock);
+ imp->imp_server_timeout = 1;
+ spin_unlock(&imp->imp_lock);
+ imp->imp_client->cli_request_portal = MDS_MDS_PORTAL;
+ CDEBUG(D_OTHER, "%s: timeout / 2\n", exp->exp_obd->obd_name);
+ RETURN(0);
+ }
+ if (KEY_IS(KEY_CHANGELOG_CLEAR)) {
+ rc = do_set_info_async(imp, MDS_SET_INFO, LUSTRE_MDS_VERSION,
+ keylen, key, vallen, val, set);
+ RETURN(rc);
+ }
+ if (KEY_IS(KEY_HSM_COPYTOOL_SEND)) {
+ rc = mdc_hsm_copytool_send(vallen, val);
+ RETURN(rc);
+ }
+
+ CERROR("Unknown key %s\n", (char *)key);
+ RETURN(-EINVAL);
+}
+
+int mdc_get_info(const struct lu_env *env, struct obd_export *exp,
+ __u32 keylen, void *key, __u32 *vallen, void *val,
+ struct lov_stripe_md *lsm)
+{
+ int rc = -EINVAL;
+
+ if (KEY_IS(KEY_MAX_EASIZE)) {
+ int mdsize, *max_easize;
+
+ if (*vallen != sizeof(int))
+ RETURN(-EINVAL);
+ mdsize = *(int*)val;
+ if (mdsize > exp->exp_obd->u.cli.cl_max_mds_easize)
+ exp->exp_obd->u.cli.cl_max_mds_easize = mdsize;
+ max_easize = val;
+ *max_easize = exp->exp_obd->u.cli.cl_max_mds_easize;
+ RETURN(0);
+ } else if (KEY_IS(KEY_CONN_DATA)) {
+ struct obd_import *imp = class_exp2cliimp(exp);
+ struct obd_connect_data *data = val;
+
+ if (*vallen != sizeof(*data))
+ RETURN(-EINVAL);
+
+ *data = imp->imp_connect_data;
+ RETURN(0);
+ } else if (KEY_IS(KEY_TGT_COUNT)) {
+ *((int *)val) = 1;
+ RETURN(0);
+ }
+
+ rc = mdc_get_info_rpc(exp, keylen, key, *vallen, val);
+
+ RETURN(rc);
+}
+
+static int mdc_pin(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, struct obd_client_handle *handle,
+ int flags)
+{
+ struct ptlrpc_request *req;
+ struct mdt_body *body;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_PIN);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_set_capa_size(req, &RMF_CAPA1, oc);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_PIN);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ mdc_pack_body(req, fid, oc, 0, 0, -1, flags);
+
+ ptlrpc_request_set_replen(req);
+
+ mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
+ rc = ptlrpc_queue_wait(req);
+ mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
+ if (rc) {
+ CERROR("Pin failed: %d\n", rc);
+ GOTO(err_out, rc);
+ }
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ if (body == NULL)
+ GOTO(err_out, rc = -EPROTO);
+
+ handle->och_fh = body->handle;
+ handle->och_magic = OBD_CLIENT_HANDLE_MAGIC;
+
+ handle->och_mod = obd_mod_alloc();
+ if (handle->och_mod == NULL) {
+ DEBUG_REQ(D_ERROR, req, "can't allocate md_open_data");
+ GOTO(err_out, rc = -ENOMEM);
+ }
+ handle->och_mod->mod_open_req = req; /* will be dropped by unpin */
+
+ RETURN(0);
+
+err_out:
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+}
+
+static int mdc_unpin(struct obd_export *exp, struct obd_client_handle *handle,
+ int flag)
+{
+ struct ptlrpc_request *req;
+ struct mdt_body *body;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_MDS_UNPIN,
+ LUSTRE_MDS_VERSION, MDS_UNPIN);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_MDT_BODY);
+ body->handle = handle->och_fh;
+ body->flags = flag;
+
+ ptlrpc_request_set_replen(req);
+
+ mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
+ rc = ptlrpc_queue_wait(req);
+ mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
+
+ if (rc != 0)
+ CERROR("Unpin failed: %d\n", rc);
+
+ ptlrpc_req_finished(req);
+ ptlrpc_req_finished(handle->och_mod->mod_open_req);
+
+ obd_mod_put(handle->och_mod);
+ RETURN(rc);
+}
+
+int mdc_sync(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, struct ptlrpc_request **request)
+{
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ *request = NULL;
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_SYNC);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_set_capa_size(req, &RMF_CAPA1, oc);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_SYNC);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ mdc_pack_body(req, fid, oc, 0, 0, -1, 0);
+
+ ptlrpc_request_set_replen(req);
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ ptlrpc_req_finished(req);
+ else
+ *request = req;
+ RETURN(rc);
+}
+
+static int mdc_import_event(struct obd_device *obd, struct obd_import *imp,
+ enum obd_import_event event)
+{
+ int rc = 0;
+
+ LASSERT(imp->imp_obd == obd);
+
+ switch (event) {
+ case IMP_EVENT_DISCON: {
+#if 0
+ /* XXX Pass event up to OBDs stack. used only for FLD now */
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_DISCON, NULL);
+#endif
+ break;
+ }
+ case IMP_EVENT_INACTIVE: {
+ struct client_obd *cli = &obd->u.cli;
+ /*
+ * Flush current sequence to make client obtain new one
+ * from server in case of disconnect/reconnect.
+ */
+ if (cli->cl_seq != NULL)
+ seq_client_flush(cli->cl_seq);
+
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE, NULL);
+ break;
+ }
+ case IMP_EVENT_INVALIDATE: {
+ struct ldlm_namespace *ns = obd->obd_namespace;
+
+ ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
+
+ break;
+ }
+ case IMP_EVENT_ACTIVE:
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE, NULL);
+ /* restore re-establish kuc registration after reconnecting */
+ if (rc == 0)
+ rc = mdc_kuc_reregister(imp);
+ break;
+ case IMP_EVENT_OCD:
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL);
+ break;
+ case IMP_EVENT_DEACTIVATE:
+ case IMP_EVENT_ACTIVATE:
+ break;
+ default:
+ CERROR("Unknown import event %x\n", event);
+ LBUG();
+ }
+ RETURN(rc);
+}
+
+int mdc_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
+ struct md_op_data *op_data)
+{
+ struct client_obd *cli = &exp->exp_obd->u.cli;
+ struct lu_client_seq *seq = cli->cl_seq;
+ ENTRY;
+ RETURN(seq_client_alloc_fid(NULL, seq, fid));
+}
+
+struct obd_uuid *mdc_get_uuid(struct obd_export *exp) {
+ struct client_obd *cli = &exp->exp_obd->u.cli;
+ return &cli->cl_target_uuid;
+}
+
+/**
+ * Determine whether the lock can be canceled before replaying it during
+ * recovery, non zero value will be return if the lock can be canceled,
+ * or zero returned for not
+ */
+static int mdc_cancel_for_recovery(struct ldlm_lock *lock)
+{
+ if (lock->l_resource->lr_type != LDLM_IBITS)
+ RETURN(0);
+
+ /* FIXME: if we ever get into a situation where there are too many
+ * opened files with open locks on a single node, then we really
+ * should replay these open locks to reget it */
+ if (lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_OPEN)
+ RETURN(0);
+
+ RETURN(1);
+}
+
+static int mdc_resource_inode_free(struct ldlm_resource *res)
+{
+ if (res->lr_lvb_inode)
+ res->lr_lvb_inode = NULL;
+
+ return 0;
+}
+
+struct ldlm_valblock_ops inode_lvbo = {
+ lvbo_free: mdc_resource_inode_free
+};
+
+static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
+{
+ struct client_obd *cli = &obd->u.cli;
+ struct lprocfs_static_vars lvars = { 0 };
+ int rc;
+ ENTRY;
+
+ OBD_ALLOC(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock));
+ if (!cli->cl_rpc_lock)
+ RETURN(-ENOMEM);
+ mdc_init_rpc_lock(cli->cl_rpc_lock);
+
+ ptlrpcd_addref();
+
+ OBD_ALLOC(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
+ if (!cli->cl_close_lock)
+ GOTO(err_rpc_lock, rc = -ENOMEM);
+ mdc_init_rpc_lock(cli->cl_close_lock);
+
+ rc = client_obd_setup(obd, cfg);
+ if (rc)
+ GOTO(err_close_lock, rc);
+ lprocfs_mdc_init_vars(&lvars);
+ lprocfs_obd_setup(obd, lvars.obd_vars);
+ sptlrpc_lprocfs_cliobd_attach(obd);
+ ptlrpc_lprocfs_register_obd(obd);
+
+ ns_register_cancel(obd->obd_namespace, mdc_cancel_for_recovery);
+
+ obd->obd_namespace->ns_lvbo = &inode_lvbo;
+
+ rc = obd_llog_init(obd, &obd->obd_olg, obd, NULL);
+ if (rc) {
+ mdc_cleanup(obd);
+ CERROR("failed to setup llogging subsystems\n");
+ }
+
+ RETURN(rc);
+
+err_close_lock:
+ OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
+err_rpc_lock:
+ OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock));
+ ptlrpcd_decref();
+ RETURN(rc);
+}
+
+/* Initialize the default and maximum LOV EA and cookie sizes. This allows
+ * us to make MDS RPCs with large enough reply buffers to hold the
+ * maximum-sized (= maximum striped) EA and cookie without having to
+ * calculate this (via a call into the LOV + OSCs) each time we make an RPC. */
+static int mdc_init_ea_size(struct obd_export *exp, int easize,
+ int def_easize, int cookiesize)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct client_obd *cli = &obd->u.cli;
+ ENTRY;
+
+ if (cli->cl_max_mds_easize < easize)
+ cli->cl_max_mds_easize = easize;
+
+ if (cli->cl_default_mds_easize < def_easize)
+ cli->cl_default_mds_easize = def_easize;
+
+ if (cli->cl_max_mds_cookiesize < cookiesize)
+ cli->cl_max_mds_cookiesize = cookiesize;
+
+ RETURN(0);
+}
+
+static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+{
+ int rc = 0;
+ ENTRY;
+
+ switch (stage) {
+ case OBD_CLEANUP_EARLY:
+ break;
+ case OBD_CLEANUP_EXPORTS:
+ /* Failsafe, ok if racy */
+ if (obd->obd_type->typ_refcnt <= 1)
+ libcfs_kkuc_group_rem(0, KUC_GRP_HSM);
+
+ obd_cleanup_client_import(obd);
+ ptlrpc_lprocfs_unregister_obd(obd);
+ lprocfs_obd_cleanup(obd);
+
+ rc = obd_llog_finish(obd, 0);
+ if (rc != 0)
+ CERROR("failed to cleanup llogging subsystems\n");
+ break;
+ }
+ RETURN(rc);
+}
+
+static int mdc_cleanup(struct obd_device *obd)
+{
+ struct client_obd *cli = &obd->u.cli;
+
+ OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock));
+ OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
+
+ ptlrpcd_decref();
+
+ return client_obd_cleanup(obd);
+}
+
+
+static int mdc_llog_init(struct obd_device *obd, struct obd_llog_group *olg,
+ struct obd_device *tgt, int *index)
+{
+ struct llog_ctxt *ctxt;
+ int rc;
+
+ ENTRY;
+
+ LASSERT(olg == &obd->obd_olg);
+
+ rc = llog_setup(NULL, obd, olg, LLOG_CHANGELOG_REPL_CTXT, tgt,
+ &llog_client_ops);
+ if (rc)
+ RETURN(rc);
+
+ ctxt = llog_group_get_ctxt(olg, LLOG_CHANGELOG_REPL_CTXT);
+ llog_initiator_connect(ctxt);
+ llog_ctxt_put(ctxt);
+
+ RETURN(0);
+}
+
+static int mdc_llog_finish(struct obd_device *obd, int count)
+{
+ struct llog_ctxt *ctxt;
+
+ ENTRY;
+
+ ctxt = llog_get_context(obd, LLOG_CHANGELOG_REPL_CTXT);
+ if (ctxt)
+ llog_cleanup(NULL, ctxt);
+
+ RETURN(0);
+}
+
+static int mdc_process_config(struct obd_device *obd, obd_count len, void *buf)
+{
+ struct lustre_cfg *lcfg = buf;
+ struct lprocfs_static_vars lvars = { 0 };
+ int rc = 0;
+
+ lprocfs_mdc_init_vars(&lvars);
+ switch (lcfg->lcfg_command) {
+ default:
+ rc = class_process_proc_param(PARAM_MDC, lvars.obd_vars,
+ lcfg, obd);
+ if (rc > 0)
+ rc = 0;
+ break;
+ }
+ return(rc);
+}
+
+
+/* get remote permission for current user on fid */
+int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, __u32 suppgid,
+ struct ptlrpc_request **request)
+{
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ LASSERT(client_is_remote(exp));
+
+ *request = NULL;
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_GETATTR);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ mdc_set_capa_size(req, &RMF_CAPA1, oc);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GETATTR);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ mdc_pack_body(req, fid, oc, OBD_MD_FLRMTPERM, 0, suppgid, 0);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
+ sizeof(struct mdt_remote_perm));
+
+ ptlrpc_request_set_replen(req);
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ ptlrpc_req_finished(req);
+ else
+ *request = req;
+ RETURN(rc);
+}
+
+static int mdc_interpret_renew_capa(const struct lu_env *env,
+ struct ptlrpc_request *req, void *args,
+ int status)
+{
+ struct mdc_renew_capa_args *ra = args;
+ struct mdt_body *body = NULL;
+ struct lustre_capa *capa;
+ ENTRY;
+
+ if (status)
+ GOTO(out, capa = ERR_PTR(status));
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ if (body == NULL)
+ GOTO(out, capa = ERR_PTR(-EFAULT));
+
+ if ((body->valid & OBD_MD_FLOSSCAPA) == 0)
+ GOTO(out, capa = ERR_PTR(-ENOENT));
+
+ capa = req_capsule_server_get(&req->rq_pill, &RMF_CAPA2);
+ if (!capa)
+ GOTO(out, capa = ERR_PTR(-EFAULT));
+ EXIT;
+out:
+ ra->ra_cb(ra->ra_oc, capa);
+ return 0;
+}
+
+static int mdc_renew_capa(struct obd_export *exp, struct obd_capa *oc,
+ renew_capa_cb_t cb)
+{
+ struct ptlrpc_request *req;
+ struct mdc_renew_capa_args *ra;
+ ENTRY;
+
+ req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_MDS_GETATTR,
+ LUSTRE_MDS_VERSION, MDS_GETATTR);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ /* NB, OBD_MD_FLOSSCAPA is set here, but it doesn't necessarily mean the
+ * capa to renew is oss capa.
+ */
+ mdc_pack_body(req, &oc->c_capa.lc_fid, oc, OBD_MD_FLOSSCAPA, 0, -1, 0);
+ ptlrpc_request_set_replen(req);
+
+ CLASSERT(sizeof(*ra) <= sizeof(req->rq_async_args));
+ ra = ptlrpc_req_async_args(req);
+ ra->ra_oc = oc;
+ ra->ra_cb = cb;
+ req->rq_interpret_reply = mdc_interpret_renew_capa;
+ ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
+ RETURN(0);
+}
+
+static int mdc_connect(const struct lu_env *env,
+ struct obd_export **exp,
+ struct obd_device *obd, struct obd_uuid *cluuid,
+ struct obd_connect_data *data,
+ void *localdata)
+{
+ struct obd_import *imp = obd->u.cli.cl_import;
+
+ /* mds-mds import features */
+ if (data && (data->ocd_connect_flags & OBD_CONNECT_MDS_MDS)) {
+ spin_lock(&imp->imp_lock);
+ imp->imp_server_timeout = 1;
+ spin_unlock(&imp->imp_lock);
+ imp->imp_client->cli_request_portal = MDS_MDS_PORTAL;
+ CDEBUG(D_OTHER, "%s: Set 'mds' portal and timeout\n",
+ obd->obd_name);
+ }
+
+ return client_connect_import(env, exp, obd, cluuid, data, NULL);
+}
+
+struct obd_ops mdc_obd_ops = {
+ .o_owner = THIS_MODULE,
+ .o_setup = mdc_setup,
+ .o_precleanup = mdc_precleanup,
+ .o_cleanup = mdc_cleanup,
+ .o_add_conn = client_import_add_conn,
+ .o_del_conn = client_import_del_conn,
+ .o_connect = mdc_connect,
+ .o_disconnect = client_disconnect_export,
+ .o_iocontrol = mdc_iocontrol,
+ .o_set_info_async = mdc_set_info_async,
+ .o_statfs = mdc_statfs,
+ .o_pin = mdc_pin,
+ .o_unpin = mdc_unpin,
+ .o_fid_init = client_fid_init,
+ .o_fid_fini = client_fid_fini,
+ .o_fid_alloc = mdc_fid_alloc,
+ .o_import_event = mdc_import_event,
+ .o_llog_init = mdc_llog_init,
+ .o_llog_finish = mdc_llog_finish,
+ .o_get_info = mdc_get_info,
+ .o_process_config = mdc_process_config,
+ .o_get_uuid = mdc_get_uuid,
+ .o_quotactl = mdc_quotactl,
+ .o_quotacheck = mdc_quotacheck
+};
+
+struct md_ops mdc_md_ops = {
+ .m_getstatus = mdc_getstatus,
+ .m_null_inode = mdc_null_inode,
+ .m_find_cbdata = mdc_find_cbdata,
+ .m_close = mdc_close,
+ .m_create = mdc_create,
+ .m_done_writing = mdc_done_writing,
+ .m_enqueue = mdc_enqueue,
+ .m_getattr = mdc_getattr,
+ .m_getattr_name = mdc_getattr_name,
+ .m_intent_lock = mdc_intent_lock,
+ .m_link = mdc_link,
+ .m_is_subdir = mdc_is_subdir,
+ .m_rename = mdc_rename,
+ .m_setattr = mdc_setattr,
+ .m_setxattr = mdc_setxattr,
+ .m_getxattr = mdc_getxattr,
+ .m_sync = mdc_sync,
+ .m_readpage = mdc_readpage,
+ .m_unlink = mdc_unlink,
+ .m_cancel_unused = mdc_cancel_unused,
+ .m_init_ea_size = mdc_init_ea_size,
+ .m_set_lock_data = mdc_set_lock_data,
+ .m_lock_match = mdc_lock_match,
+ .m_get_lustre_md = mdc_get_lustre_md,
+ .m_free_lustre_md = mdc_free_lustre_md,
+ .m_set_open_replay_data = mdc_set_open_replay_data,
+ .m_clear_open_replay_data = mdc_clear_open_replay_data,
+ .m_renew_capa = mdc_renew_capa,
+ .m_unpack_capa = mdc_unpack_capa,
+ .m_get_remote_perm = mdc_get_remote_perm,
+ .m_intent_getattr_async = mdc_intent_getattr_async,
+ .m_revalidate_lock = mdc_revalidate_lock
+};
+
+int __init mdc_init(void)
+{
+ int rc;
+ struct lprocfs_static_vars lvars = { 0 };
+ lprocfs_mdc_init_vars(&lvars);
+
+ rc = class_register_type(&mdc_obd_ops, &mdc_md_ops, lvars.module_vars,
+ LUSTRE_MDC_NAME, NULL);
+ RETURN(rc);
+}
+
+static void /*__exit*/ mdc_exit(void)
+{
+ class_unregister_type(LUSTRE_MDC_NAME);
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Metadata Client");
+MODULE_LICENSE("GPL");
+
+module_init(mdc_init);
+module_exit(mdc_exit);
diff --git a/drivers/staging/lustre/lustre/mgc/Makefile b/drivers/staging/lustre/lustre/mgc/Makefile
new file mode 100644
index 000000000000..267246344e1c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/mgc/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_LUSTRE_FS) += mgc.o
+mgc-y := mgc_request.o lproc_mgc.o
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lustre/mgc/libmgc.c b/drivers/staging/lustre/lustre/mgc/libmgc.c
new file mode 100644
index 000000000000..442146cc7e60
--- /dev/null
+++ b/drivers/staging/lustre/lustre/mgc/libmgc.c
@@ -0,0 +1,166 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/mgc/libmgc.c
+ *
+ * Lustre Management Client
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ */
+
+/* Minimal MGC for liblustre: only used to read the config log from the MGS
+ at setup time, no updates. */
+
+#define DEBUG_SUBSYSTEM S_MGC
+
+#include <liblustre.h>
+
+#include <obd_class.h>
+#include <lustre_dlm.h>
+#include <lustre_log.h>
+#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
+
+
+static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ int rc;
+ ENTRY;
+
+ ptlrpcd_addref();
+
+ rc = client_obd_setup(obd, lcfg);
+ if (rc)
+ GOTO(err_decref, rc);
+
+ /* liblustre only support null flavor to MGS */
+ obd->u.cli.cl_flvr_mgc.sf_rpc = SPTLRPC_FLVR_NULL;
+
+ rc = obd_llog_init(obd, &obd->obd_olg, obd, NULL);
+ if (rc) {
+ CERROR("failed to setup llogging subsystems\n");
+ GOTO(err_cleanup, rc);
+ }
+
+ RETURN(rc);
+
+err_cleanup:
+ client_obd_cleanup(obd);
+err_decref:
+ ptlrpcd_decref();
+ RETURN(rc);
+}
+
+static int mgc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+{
+ int rc = 0;
+ ENTRY;
+
+ switch (stage) {
+ case OBD_CLEANUP_EARLY:
+ case OBD_CLEANUP_EXPORTS:
+ obd_cleanup_client_import(obd);
+ rc = obd_llog_finish(obd, 0);
+ if (rc != 0)
+ CERROR("failed to cleanup llogging subsystems\n");
+ break;
+ }
+ RETURN(rc);
+}
+
+static int mgc_cleanup(struct obd_device *obd)
+{
+ struct client_obd *cli = &obd->u.cli;
+ int rc;
+ ENTRY;
+
+ LASSERT(cli->cl_mgc_vfsmnt == NULL);
+
+ ptlrpcd_decref();
+
+ rc = client_obd_cleanup(obd);
+ RETURN(rc);
+}
+
+static int mgc_llog_init(struct obd_device *obd, struct obd_llog_group *olg,
+ struct obd_device *tgt, int *index)
+{
+ struct llog_ctxt *ctxt;
+ int rc;
+ ENTRY;
+
+ LASSERT(olg == &obd->obd_olg);
+ rc = llog_setup(NULL, obd, olg, LLOG_CONFIG_REPL_CTXT, tgt,
+ &llog_client_ops);
+ if (rc < 0)
+ RETURN(rc);
+
+ ctxt = llog_group_get_ctxt(olg, LLOG_CONFIG_REPL_CTXT);
+ llog_initiator_connect(ctxt);
+ llog_ctxt_put(ctxt);
+
+ RETURN(rc);
+}
+
+static int mgc_llog_finish(struct obd_device *obd, int count)
+{
+ struct llog_ctxt *ctxt;
+
+ ENTRY;
+
+ ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
+ if (ctxt)
+ llog_cleanup(NULL, ctxt);
+
+ RETURN(0);
+}
+
+struct obd_ops mgc_obd_ops = {
+ .o_owner = THIS_MODULE,
+ .o_setup = mgc_setup,
+ .o_precleanup = mgc_precleanup,
+ .o_cleanup = mgc_cleanup,
+ .o_add_conn = client_import_add_conn,
+ .o_del_conn = client_import_del_conn,
+ .o_connect = client_connect_import,
+ .o_disconnect = client_disconnect_export,
+ .o_llog_init = mgc_llog_init,
+ .o_llog_finish = mgc_llog_finish,
+};
+
+int __init mgc_init(void)
+{
+ return class_register_type(&mgc_obd_ops, NULL,
+ NULL, LUSTRE_MGC_NAME, NULL);
+}
diff --git a/drivers/staging/lustre/lustre/mgc/lproc_mgc.c b/drivers/staging/lustre/lustre/mgc/lproc_mgc.c
new file mode 100644
index 000000000000..1105eaa24313
--- /dev/null
+++ b/drivers/staging/lustre/lustre/mgc/lproc_mgc.c
@@ -0,0 +1,84 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/version.h>
+#include <linux/vfs.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#include "mgc_internal.h"
+
+#ifdef LPROCFS
+
+LPROC_SEQ_FOPS_RO_TYPE(mgc, uuid);
+LPROC_SEQ_FOPS_RO_TYPE(mgc, connect_flags);
+LPROC_SEQ_FOPS_RO_TYPE(mgc, server_uuid);
+LPROC_SEQ_FOPS_RO_TYPE(mgc, conn_uuid);
+LPROC_SEQ_FOPS_RO_TYPE(mgc, import);
+LPROC_SEQ_FOPS_RO_TYPE(mgc, state);
+
+LPROC_SEQ_FOPS_WR_ONLY(mgc, ping);
+
+static int mgc_ir_state_seq_show(struct seq_file *m, void *v)
+{
+ return lprocfs_mgc_rd_ir_state(m, m->private);
+}
+LPROC_SEQ_FOPS_RO(mgc_ir_state);
+
+static struct lprocfs_vars lprocfs_mgc_obd_vars[] = {
+ { "uuid", &mgc_uuid_fops, 0, 0 },
+ { "ping", &mgc_ping_fops, 0, 0222 },
+ { "connect_flags", &mgc_connect_flags_fops, 0, 0 },
+ { "mgs_server_uuid", &mgc_server_uuid_fops, 0, 0 },
+ { "mgs_conn_uuid", &mgc_conn_uuid_fops, 0, 0 },
+ { "import", &mgc_import_fops, 0, 0 },
+ { "state", &mgc_state_fops, 0, 0 },
+ { "ir_state", &mgc_ir_state_fops, 0, 0 },
+ { 0 }
+};
+
+LPROC_SEQ_FOPS_RO_TYPE(mgc, numrefs);
+static struct lprocfs_vars lprocfs_mgc_module_vars[] = {
+ { "num_refs", &mgc_numrefs_fops, 0, 0 },
+ { 0 }
+};
+
+void lprocfs_mgc_init_vars(struct lprocfs_static_vars *lvars)
+{
+ lvars->module_vars = lprocfs_mgc_module_vars;
+ lvars->obd_vars = lprocfs_mgc_obd_vars;
+}
+#endif /* LPROCFS */
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_internal.h b/drivers/staging/lustre/lustre/mgc/mgc_internal.h
new file mode 100644
index 000000000000..dbd698272a84
--- /dev/null
+++ b/drivers/staging/lustre/lustre/mgc/mgc_internal.h
@@ -0,0 +1,73 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _MGC_INTERNAL_H
+#define _MGC_INTERNAL_H
+
+#include <linux/libcfs/libcfs.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre_dlm.h>
+#include <lustre_log.h>
+#include <lustre_export.h>
+
+#ifdef LPROCFS
+void lprocfs_mgc_init_vars(struct lprocfs_static_vars *lvars);
+int lprocfs_mgc_rd_ir_state(struct seq_file *m, void *data);
+#else
+static void lprocfs_mgc_init_vars(struct lprocfs_static_vars *lvars)
+{
+ memset(lvars, 0, sizeof(*lvars));
+}
+static inline int lprocfs_mgc_rd_ir_state(struct seq_file *m, void *data)
+{
+ return 0;
+}
+#endif /* LPROCFS */
+
+int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld);
+
+static inline int cld_is_sptlrpc(struct config_llog_data *cld)
+{
+ return cld->cld_type == CONFIG_T_SPTLRPC;
+}
+
+static inline int cld_is_recover(struct config_llog_data *cld)
+{
+ return cld->cld_type == CONFIG_T_RECOVER;
+}
+
+#endif /* _MGC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
new file mode 100644
index 000000000000..c6c84d97ce4e
--- /dev/null
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -0,0 +1,1860 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/mgc/mgc_request.c
+ *
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_MGC
+#define D_MGC D_CONFIG /*|D_WARNING*/
+
+# include <linux/module.h>
+# include <linux/pagemap.h>
+# include <linux/miscdevice.h>
+# include <linux/init.h>
+
+#include <obd_class.h>
+#include <lustre_dlm.h>
+#include <lprocfs_status.h>
+#include <lustre_log.h>
+#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
+#include "mgc_internal.h"
+
+static int mgc_name2resid(char *name, int len, struct ldlm_res_id *res_id,
+ int type)
+{
+ __u64 resname = 0;
+
+ if (len > 8) {
+ CERROR("name too long: %s\n", name);
+ return -EINVAL;
+ }
+ if (len <= 0) {
+ CERROR("missing name: %s\n", name);
+ return -EINVAL;
+ }
+ memcpy(&resname, name, len);
+
+ /* Always use the same endianness for the resid */
+ memset(res_id, 0, sizeof(*res_id));
+ res_id->name[0] = cpu_to_le64(resname);
+ /* XXX: unfortunately, sptlprc and config llog share one lock */
+ switch(type) {
+ case CONFIG_T_CONFIG:
+ case CONFIG_T_SPTLRPC:
+ resname = 0;
+ break;
+ case CONFIG_T_RECOVER:
+ resname = type;
+ break;
+ default:
+ LBUG();
+ }
+ res_id->name[1] = cpu_to_le64(resname);
+ CDEBUG(D_MGC, "log %s to resid "LPX64"/"LPX64" (%.8s)\n", name,
+ res_id->name[0], res_id->name[1], (char *)&res_id->name[0]);
+ return 0;
+}
+
+int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type)
+{
+ /* fsname is at most 8 chars long, maybe contain "-".
+ * e.g. "lustre", "SUN-000" */
+ return mgc_name2resid(fsname, strlen(fsname), res_id, type);
+}
+EXPORT_SYMBOL(mgc_fsname2resid);
+
+int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id, int type)
+{
+ char *name_end;
+ int len;
+
+ /* logname consists of "fsname-nodetype".
+ * e.g. "lustre-MDT0001", "SUN-000-client" */
+ name_end = strrchr(logname, '-');
+ LASSERT(name_end);
+ len = name_end - logname;
+ return mgc_name2resid(logname, len, res_id, type);
+}
+
+/********************** config llog list **********************/
+static LIST_HEAD(config_llog_list);
+static DEFINE_SPINLOCK(config_list_lock);
+
+/* Take a reference to a config log */
+static int config_log_get(struct config_llog_data *cld)
+{
+ ENTRY;
+ atomic_inc(&cld->cld_refcount);
+ CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname,
+ atomic_read(&cld->cld_refcount));
+ RETURN(0);
+}
+
+/* Drop a reference to a config log. When no longer referenced,
+ we can free the config log data */
+static void config_log_put(struct config_llog_data *cld)
+{
+ ENTRY;
+
+ CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname,
+ atomic_read(&cld->cld_refcount));
+ LASSERT(atomic_read(&cld->cld_refcount) > 0);
+
+ /* spinlock to make sure no item with 0 refcount in the list */
+ if (atomic_dec_and_lock(&cld->cld_refcount, &config_list_lock)) {
+ list_del(&cld->cld_list_chain);
+ spin_unlock(&config_list_lock);
+
+ CDEBUG(D_MGC, "dropping config log %s\n", cld->cld_logname);
+
+ if (cld->cld_recover)
+ config_log_put(cld->cld_recover);
+ if (cld->cld_sptlrpc)
+ config_log_put(cld->cld_sptlrpc);
+ if (cld_is_sptlrpc(cld))
+ sptlrpc_conf_log_stop(cld->cld_logname);
+
+ class_export_put(cld->cld_mgcexp);
+ OBD_FREE(cld, sizeof(*cld) + strlen(cld->cld_logname) + 1);
+ }
+
+ EXIT;
+}
+
+/* Find a config log by name */
+static
+struct config_llog_data *config_log_find(char *logname,
+ struct config_llog_instance *cfg)
+{
+ struct config_llog_data *cld;
+ struct config_llog_data *found = NULL;
+ void * instance;
+ ENTRY;
+
+ LASSERT(logname != NULL);
+
+ instance = cfg ? cfg->cfg_instance : NULL;
+ spin_lock(&config_list_lock);
+ list_for_each_entry(cld, &config_llog_list, cld_list_chain) {
+ /* check if instance equals */
+ if (instance != cld->cld_cfg.cfg_instance)
+ continue;
+
+ /* instance may be NULL, should check name */
+ if (strcmp(logname, cld->cld_logname) == 0) {
+ found = cld;
+ break;
+ }
+ }
+ if (found) {
+ atomic_inc(&found->cld_refcount);
+ LASSERT(found->cld_stopping == 0 || cld_is_sptlrpc(found) == 0);
+ }
+ spin_unlock(&config_list_lock);
+ RETURN(found);
+}
+
+static
+struct config_llog_data *do_config_log_add(struct obd_device *obd,
+ char *logname,
+ int type,
+ struct config_llog_instance *cfg,
+ struct super_block *sb)
+{
+ struct config_llog_data *cld;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_MGC, "do adding config log %s:%p\n", logname,
+ cfg ? cfg->cfg_instance : 0);
+
+ OBD_ALLOC(cld, sizeof(*cld) + strlen(logname) + 1);
+ if (!cld)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ strcpy(cld->cld_logname, logname);
+ if (cfg)
+ cld->cld_cfg = *cfg;
+ else
+ cld->cld_cfg.cfg_callback = class_config_llog_handler;
+ mutex_init(&cld->cld_lock);
+ cld->cld_cfg.cfg_last_idx = 0;
+ cld->cld_cfg.cfg_flags = 0;
+ cld->cld_cfg.cfg_sb = sb;
+ cld->cld_type = type;
+ atomic_set(&cld->cld_refcount, 1);
+
+ /* Keep the mgc around until we are done */
+ cld->cld_mgcexp = class_export_get(obd->obd_self_export);
+
+ if (cld_is_sptlrpc(cld)) {
+ sptlrpc_conf_log_start(logname);
+ cld->cld_cfg.cfg_obdname = obd->obd_name;
+ }
+
+ rc = mgc_logname2resid(logname, &cld->cld_resid, type);
+
+ spin_lock(&config_list_lock);
+ list_add(&cld->cld_list_chain, &config_llog_list);
+ spin_unlock(&config_list_lock);
+
+ if (rc) {
+ config_log_put(cld);
+ RETURN(ERR_PTR(rc));
+ }
+
+ if (cld_is_sptlrpc(cld)) {
+ rc = mgc_process_log(obd, cld);
+ if (rc && rc != -ENOENT)
+ CERROR("failed processing sptlrpc log: %d\n", rc);
+ }
+
+ RETURN(cld);
+}
+
+static struct config_llog_data *config_recover_log_add(struct obd_device *obd,
+ char *fsname,
+ struct config_llog_instance *cfg,
+ struct super_block *sb)
+{
+ struct config_llog_instance lcfg = *cfg;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct config_llog_data *cld;
+ char logname[32];
+
+ if (IS_OST(lsi))
+ return NULL;
+
+ /* for osp-on-ost, see lustre_start_osp() */
+ if (IS_MDT(lsi) && lcfg.cfg_instance)
+ return NULL;
+
+ /* we have to use different llog for clients and mdts for cmd
+ * where only clients are notified if one of cmd server restarts */
+ LASSERT(strlen(fsname) < sizeof(logname) / 2);
+ strcpy(logname, fsname);
+ if (IS_SERVER(lsi)) { /* mdt */
+ LASSERT(lcfg.cfg_instance == NULL);
+ lcfg.cfg_instance = sb;
+ strcat(logname, "-mdtir");
+ } else {
+ LASSERT(lcfg.cfg_instance != NULL);
+ strcat(logname, "-cliir");
+ }
+
+ cld = do_config_log_add(obd, logname, CONFIG_T_RECOVER, &lcfg, sb);
+ return cld;
+}
+
+
+/** Add this log to the list of active logs watched by an MGC.
+ * Active means we're watching for updates.
+ * We have one active log per "mount" - client instance or servername.
+ * Each instance may be at a different point in the log.
+ */
+static int config_log_add(struct obd_device *obd, char *logname,
+ struct config_llog_instance *cfg,
+ struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct config_llog_data *cld;
+ struct config_llog_data *sptlrpc_cld;
+ char seclogname[32];
+ char *ptr;
+ ENTRY;
+
+ CDEBUG(D_MGC, "adding config log %s:%p\n", logname, cfg->cfg_instance);
+
+ /*
+ * for each regular log, the depended sptlrpc log name is
+ * <fsname>-sptlrpc. multiple regular logs may share one sptlrpc log.
+ */
+ ptr = strrchr(logname, '-');
+ if (ptr == NULL || ptr - logname > 8) {
+ CERROR("logname %s is too long\n", logname);
+ RETURN(-EINVAL);
+ }
+
+ memcpy(seclogname, logname, ptr - logname);
+ strcpy(seclogname + (ptr - logname), "-sptlrpc");
+
+ sptlrpc_cld = config_log_find(seclogname, NULL);
+ if (sptlrpc_cld == NULL) {
+ sptlrpc_cld = do_config_log_add(obd, seclogname,
+ CONFIG_T_SPTLRPC, NULL, NULL);
+ if (IS_ERR(sptlrpc_cld)) {
+ CERROR("can't create sptlrpc log: %s\n", seclogname);
+ RETURN(PTR_ERR(sptlrpc_cld));
+ }
+ }
+
+ cld = do_config_log_add(obd, logname, CONFIG_T_CONFIG, cfg, sb);
+ if (IS_ERR(cld)) {
+ CERROR("can't create log: %s\n", logname);
+ config_log_put(sptlrpc_cld);
+ RETURN(PTR_ERR(cld));
+ }
+
+ cld->cld_sptlrpc = sptlrpc_cld;
+
+ LASSERT(lsi->lsi_lmd);
+ if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)) {
+ struct config_llog_data *recover_cld;
+ *strrchr(seclogname, '-') = 0;
+ recover_cld = config_recover_log_add(obd, seclogname, cfg, sb);
+ if (IS_ERR(recover_cld)) {
+ config_log_put(cld);
+ RETURN(PTR_ERR(recover_cld));
+ }
+ cld->cld_recover = recover_cld;
+ }
+
+ RETURN(0);
+}
+
+DEFINE_MUTEX(llog_process_lock);
+
+/** Stop watching for updates on this log.
+ */
+static int config_log_end(char *logname, struct config_llog_instance *cfg)
+{
+ struct config_llog_data *cld;
+ struct config_llog_data *cld_sptlrpc = NULL;
+ struct config_llog_data *cld_recover = NULL;
+ int rc = 0;
+ ENTRY;
+
+ cld = config_log_find(logname, cfg);
+ if (cld == NULL)
+ RETURN(-ENOENT);
+
+ mutex_lock(&cld->cld_lock);
+ /*
+ * if cld_stopping is set, it means we didn't start the log thus
+ * not owning the start ref. this can happen after previous umount:
+ * the cld still hanging there waiting for lock cancel, and we
+ * remount again but failed in the middle and call log_end without
+ * calling start_log.
+ */
+ if (unlikely(cld->cld_stopping)) {
+ mutex_unlock(&cld->cld_lock);
+ /* drop the ref from the find */
+ config_log_put(cld);
+ RETURN(rc);
+ }
+
+ cld->cld_stopping = 1;
+
+ cld_recover = cld->cld_recover;
+ cld->cld_recover = NULL;
+ mutex_unlock(&cld->cld_lock);
+
+ if (cld_recover) {
+ mutex_lock(&cld_recover->cld_lock);
+ cld_recover->cld_stopping = 1;
+ mutex_unlock(&cld_recover->cld_lock);
+ config_log_put(cld_recover);
+ }
+
+ spin_lock(&config_list_lock);
+ cld_sptlrpc = cld->cld_sptlrpc;
+ cld->cld_sptlrpc = NULL;
+ spin_unlock(&config_list_lock);
+
+ if (cld_sptlrpc)
+ config_log_put(cld_sptlrpc);
+
+ /* drop the ref from the find */
+ config_log_put(cld);
+ /* drop the start ref */
+ config_log_put(cld);
+
+ CDEBUG(D_MGC, "end config log %s (%d)\n", logname ? logname : "client",
+ rc);
+ RETURN(rc);
+}
+
+int lprocfs_mgc_rd_ir_state(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = data;
+ struct obd_import *imp = obd->u.cli.cl_import;
+ struct obd_connect_data *ocd = &imp->imp_connect_data;
+ struct config_llog_data *cld;
+ ENTRY;
+
+ seq_printf(m, "imperative_recovery: %s\n",
+ OCD_HAS_FLAG(ocd, IMP_RECOV) ? "ENABLED" : "DISABLED");
+ seq_printf(m, "client_state:\n");
+
+ spin_lock(&config_list_lock);
+ list_for_each_entry(cld, &config_llog_list, cld_list_chain) {
+ if (cld->cld_recover == NULL)
+ continue;
+ seq_printf(m, " - { client: %s, nidtbl_version: %u }\n",
+ cld->cld_logname,
+ cld->cld_recover->cld_cfg.cfg_last_idx);
+ }
+ spin_unlock(&config_list_lock);
+
+ RETURN(0);
+}
+
+/* reenqueue any lost locks */
+#define RQ_RUNNING 0x1
+#define RQ_NOW 0x2
+#define RQ_LATER 0x4
+#define RQ_STOP 0x8
+static int rq_state = 0;
+static wait_queue_head_t rq_waitq;
+static DECLARE_COMPLETION(rq_exit);
+
+static void do_requeue(struct config_llog_data *cld)
+{
+ ENTRY;
+ LASSERT(atomic_read(&cld->cld_refcount) > 0);
+
+ /* Do not run mgc_process_log on a disconnected export or an
+ export which is being disconnected. Take the client
+ semaphore to make the check non-racy. */
+ down_read(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem);
+ if (cld->cld_mgcexp->exp_obd->u.cli.cl_conn_count != 0) {
+ CDEBUG(D_MGC, "updating log %s\n", cld->cld_logname);
+ mgc_process_log(cld->cld_mgcexp->exp_obd, cld);
+ } else {
+ CDEBUG(D_MGC, "disconnecting, won't update log %s\n",
+ cld->cld_logname);
+ }
+ up_read(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem);
+
+ EXIT;
+}
+
+/* this timeout represents how many seconds MGC should wait before
+ * requeue config and recover lock to the MGS. We need to randomize this
+ * in order to not flood the MGS.
+ */
+#define MGC_TIMEOUT_MIN_SECONDS 5
+#define MGC_TIMEOUT_RAND_CENTISEC 0x1ff /* ~500 */
+
+static int mgc_requeue_thread(void *data)
+{
+ int rc = 0;
+ ENTRY;
+
+ CDEBUG(D_MGC, "Starting requeue thread\n");
+
+ /* Keep trying failed locks periodically */
+ spin_lock(&config_list_lock);
+ rq_state |= RQ_RUNNING;
+ while (1) {
+ struct l_wait_info lwi;
+ struct config_llog_data *cld, *cld_prev;
+ int rand = cfs_rand() & MGC_TIMEOUT_RAND_CENTISEC;
+ int stopped = !!(rq_state & RQ_STOP);
+ int to;
+
+ /* Any new or requeued lostlocks will change the state */
+ rq_state &= ~(RQ_NOW | RQ_LATER);
+ spin_unlock(&config_list_lock);
+
+ /* Always wait a few seconds to allow the server who
+ caused the lock revocation to finish its setup, plus some
+ random so everyone doesn't try to reconnect at once. */
+ to = MGC_TIMEOUT_MIN_SECONDS * HZ;
+ to += rand * HZ / 100; /* rand is centi-seconds */
+ lwi = LWI_TIMEOUT(to, NULL, NULL);
+ l_wait_event(rq_waitq, rq_state & RQ_STOP, &lwi);
+
+ /*
+ * iterate & processing through the list. for each cld, process
+ * its depending sptlrpc cld firstly (if any) and then itself.
+ *
+ * it's guaranteed any item in the list must have
+ * reference > 0; and if cld_lostlock is set, at
+ * least one reference is taken by the previous enqueue.
+ */
+ cld_prev = NULL;
+
+ spin_lock(&config_list_lock);
+ list_for_each_entry(cld, &config_llog_list,
+ cld_list_chain) {
+ if (!cld->cld_lostlock)
+ continue;
+
+ spin_unlock(&config_list_lock);
+
+ LASSERT(atomic_read(&cld->cld_refcount) > 0);
+
+ /* Whether we enqueued again or not in mgc_process_log,
+ * we're done with the ref from the old enqueue */
+ if (cld_prev)
+ config_log_put(cld_prev);
+ cld_prev = cld;
+
+ cld->cld_lostlock = 0;
+ if (likely(!stopped))
+ do_requeue(cld);
+
+ spin_lock(&config_list_lock);
+ }
+ spin_unlock(&config_list_lock);
+ if (cld_prev)
+ config_log_put(cld_prev);
+
+ /* break after scanning the list so that we can drop
+ * refcount to losing lock clds */
+ if (unlikely(stopped)) {
+ spin_lock(&config_list_lock);
+ break;
+ }
+
+ /* Wait a bit to see if anyone else needs a requeue */
+ lwi = (struct l_wait_info) { 0 };
+ l_wait_event(rq_waitq, rq_state & (RQ_NOW | RQ_STOP),
+ &lwi);
+ spin_lock(&config_list_lock);
+ }
+ /* spinlock and while guarantee RQ_NOW and RQ_LATER are not set */
+ rq_state &= ~RQ_RUNNING;
+ spin_unlock(&config_list_lock);
+
+ complete(&rq_exit);
+
+ CDEBUG(D_MGC, "Ending requeue thread\n");
+ RETURN(rc);
+}
+
+/* Add a cld to the list to requeue. Start the requeue thread if needed.
+ We are responsible for dropping the config log reference from here on out. */
+static void mgc_requeue_add(struct config_llog_data *cld)
+{
+ ENTRY;
+
+ CDEBUG(D_INFO, "log %s: requeue (r=%d sp=%d st=%x)\n",
+ cld->cld_logname, atomic_read(&cld->cld_refcount),
+ cld->cld_stopping, rq_state);
+ LASSERT(atomic_read(&cld->cld_refcount) > 0);
+
+ mutex_lock(&cld->cld_lock);
+ if (cld->cld_stopping || cld->cld_lostlock) {
+ mutex_unlock(&cld->cld_lock);
+ RETURN_EXIT;
+ }
+ /* this refcount will be released in mgc_requeue_thread. */
+ config_log_get(cld);
+ cld->cld_lostlock = 1;
+ mutex_unlock(&cld->cld_lock);
+
+ /* Hold lock for rq_state */
+ spin_lock(&config_list_lock);
+ if (rq_state & RQ_STOP) {
+ spin_unlock(&config_list_lock);
+ cld->cld_lostlock = 0;
+ config_log_put(cld);
+ } else {
+ rq_state |= RQ_NOW;
+ spin_unlock(&config_list_lock);
+ wake_up(&rq_waitq);
+ }
+ EXIT;
+}
+
+/********************** class fns **********************/
+
+static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb,
+ struct vfsmount *mnt)
+{
+ struct lvfs_run_ctxt saved;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct client_obd *cli = &obd->u.cli;
+ struct dentry *dentry;
+ char *label;
+ int err = 0;
+ ENTRY;
+
+ LASSERT(lsi);
+ LASSERT(lsi->lsi_srv_mnt == mnt);
+
+ /* The mgc fs exclusion sem. Only one fs can be setup at a time. */
+ down(&cli->cl_mgc_sem);
+
+ cfs_cleanup_group_info();
+
+ obd->obd_fsops = fsfilt_get_ops(lsi->lsi_fstype);
+ if (IS_ERR(obd->obd_fsops)) {
+ up(&cli->cl_mgc_sem);
+ CERROR("%s: No fstype %s: rc = %ld\n", lsi->lsi_fstype,
+ obd->obd_name, PTR_ERR(obd->obd_fsops));
+ RETURN(PTR_ERR(obd->obd_fsops));
+ }
+
+ cli->cl_mgc_vfsmnt = mnt;
+ err = fsfilt_setup(obd, mnt->mnt_sb);
+ if (err)
+ GOTO(err_ops, err);
+
+ OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
+ obd->obd_lvfs_ctxt.pwdmnt = mnt;
+ obd->obd_lvfs_ctxt.pwd = mnt->mnt_root;
+ obd->obd_lvfs_ctxt.fs = get_ds();
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ dentry = ll_lookup_one_len(MOUNT_CONFIGS_DIR, cfs_fs_pwd(current->fs),
+ strlen(MOUNT_CONFIGS_DIR));
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ CERROR("cannot lookup %s directory: rc = %d\n",
+ MOUNT_CONFIGS_DIR, err);
+ GOTO(err_ops, err);
+ }
+ cli->cl_mgc_configs_dir = dentry;
+
+ /* We take an obd ref to insure that we can't get to mgc_cleanup
+ without calling mgc_fs_cleanup first. */
+ class_incref(obd, "mgc_fs", obd);
+
+ label = fsfilt_get_label(obd, mnt->mnt_sb);
+ if (label)
+ CDEBUG(D_MGC, "MGC using disk labelled=%s\n", label);
+
+ /* We keep the cl_mgc_sem until mgc_fs_cleanup */
+ RETURN(0);
+
+err_ops:
+ fsfilt_put_ops(obd->obd_fsops);
+ obd->obd_fsops = NULL;
+ cli->cl_mgc_vfsmnt = NULL;
+ up(&cli->cl_mgc_sem);
+ RETURN(err);
+}
+
+static int mgc_fs_cleanup(struct obd_device *obd)
+{
+ struct client_obd *cli = &obd->u.cli;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(cli->cl_mgc_vfsmnt != NULL);
+
+ if (cli->cl_mgc_configs_dir != NULL) {
+ struct lvfs_run_ctxt saved;
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ l_dput(cli->cl_mgc_configs_dir);
+ cli->cl_mgc_configs_dir = NULL;
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ class_decref(obd, "mgc_fs", obd);
+ }
+
+ cli->cl_mgc_vfsmnt = NULL;
+ if (obd->obd_fsops)
+ fsfilt_put_ops(obd->obd_fsops);
+
+ up(&cli->cl_mgc_sem);
+
+ RETURN(rc);
+}
+
+static atomic_t mgc_count = ATOMIC_INIT(0);
+static int mgc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+{
+ int rc = 0;
+ ENTRY;
+
+ switch (stage) {
+ case OBD_CLEANUP_EARLY:
+ break;
+ case OBD_CLEANUP_EXPORTS:
+ if (atomic_dec_and_test(&mgc_count)) {
+ int running;
+ /* stop requeue thread */
+ spin_lock(&config_list_lock);
+ running = rq_state & RQ_RUNNING;
+ if (running)
+ rq_state |= RQ_STOP;
+ spin_unlock(&config_list_lock);
+ if (running) {
+ wake_up(&rq_waitq);
+ wait_for_completion(&rq_exit);
+ }
+ }
+ obd_cleanup_client_import(obd);
+ rc = obd_llog_finish(obd, 0);
+ if (rc != 0)
+ CERROR("failed to cleanup llogging subsystems\n");
+ break;
+ }
+ RETURN(rc);
+}
+
+static int mgc_cleanup(struct obd_device *obd)
+{
+ struct client_obd *cli = &obd->u.cli;
+ int rc;
+ ENTRY;
+
+ LASSERT(cli->cl_mgc_vfsmnt == NULL);
+
+ /* COMPAT_146 - old config logs may have added profiles we don't
+ know about */
+ if (obd->obd_type->typ_refcnt <= 1)
+ /* Only for the last mgc */
+ class_del_profiles();
+
+ lprocfs_obd_cleanup(obd);
+ ptlrpcd_decref();
+
+ rc = client_obd_cleanup(obd);
+ RETURN(rc);
+}
+
+static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ struct lprocfs_static_vars lvars;
+ int rc;
+ ENTRY;
+
+ ptlrpcd_addref();
+
+ rc = client_obd_setup(obd, lcfg);
+ if (rc)
+ GOTO(err_decref, rc);
+
+ rc = obd_llog_init(obd, &obd->obd_olg, obd, NULL);
+ if (rc) {
+ CERROR("failed to setup llogging subsystems\n");
+ GOTO(err_cleanup, rc);
+ }
+
+ lprocfs_mgc_init_vars(&lvars);
+ lprocfs_obd_setup(obd, lvars.obd_vars);
+ sptlrpc_lprocfs_cliobd_attach(obd);
+
+ if (atomic_inc_return(&mgc_count) == 1) {
+ rq_state = 0;
+ init_waitqueue_head(&rq_waitq);
+
+ /* start requeue thread */
+ rc = PTR_ERR(kthread_run(mgc_requeue_thread, NULL,
+ "ll_cfg_requeue"));
+ if (IS_ERR_VALUE(rc)) {
+ CERROR("%s: Cannot start requeue thread (%d),"
+ "no more log updates!\n",
+ obd->obd_name, rc);
+ GOTO(err_cleanup, rc);
+ }
+ /* rc is the task_struct pointer of mgc_requeue_thread. */
+ rc = 0;
+ }
+
+ RETURN(rc);
+
+err_cleanup:
+ client_obd_cleanup(obd);
+err_decref:
+ ptlrpcd_decref();
+ RETURN(rc);
+}
+
+/* based on ll_mdc_blocking_ast */
+static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
+ void *data, int flag)
+{
+ struct lustre_handle lockh;
+ struct config_llog_data *cld = (struct config_llog_data *)data;
+ int rc = 0;
+ ENTRY;
+
+ switch (flag) {
+ case LDLM_CB_BLOCKING:
+ /* mgs wants the lock, give it up... */
+ LDLM_DEBUG(lock, "MGC blocking CB");
+ ldlm_lock2handle(lock, &lockh);
+ rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
+ break;
+ case LDLM_CB_CANCELING:
+ /* We've given up the lock, prepare ourselves to update. */
+ LDLM_DEBUG(lock, "MGC cancel CB");
+
+ CDEBUG(D_MGC, "Lock res "LPX64" (%.8s)\n",
+ lock->l_resource->lr_name.name[0],
+ (char *)&lock->l_resource->lr_name.name[0]);
+
+ if (!cld) {
+ CDEBUG(D_INFO, "missing data, won't requeue\n");
+ break;
+ }
+
+ /* held at mgc_process_log(). */
+ LASSERT(atomic_read(&cld->cld_refcount) > 0);
+ /* Are we done with this log? */
+ if (cld->cld_stopping) {
+ CDEBUG(D_MGC, "log %s: stopping, won't requeue\n",
+ cld->cld_logname);
+ config_log_put(cld);
+ break;
+ }
+ /* Make sure not to re-enqueue when the mgc is stopping
+ (we get called from client_disconnect_export) */
+ if (!lock->l_conn_export ||
+ !lock->l_conn_export->exp_obd->u.cli.cl_conn_count) {
+ CDEBUG(D_MGC, "log %.8s: disconnecting, won't requeue\n",
+ cld->cld_logname);
+ config_log_put(cld);
+ break;
+ }
+
+ /* Re-enqueue now */
+ mgc_requeue_add(cld);
+ config_log_put(cld);
+ break;
+ default:
+ LBUG();
+ }
+
+ RETURN(rc);
+}
+
+/* Not sure where this should go... */
+#define MGC_ENQUEUE_LIMIT 50
+#define MGC_TARGET_REG_LIMIT 10
+#define MGC_SEND_PARAM_LIMIT 10
+
+/* Send parameter to MGS*/
+static int mgc_set_mgs_param(struct obd_export *exp,
+ struct mgs_send_param *msp)
+{
+ struct ptlrpc_request *req;
+ struct mgs_send_param *req_msp, *rep_msp;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
+ &RQF_MGS_SET_INFO, LUSTRE_MGS_VERSION,
+ MGS_SET_INFO);
+ if (!req)
+ RETURN(-ENOMEM);
+
+ req_msp = req_capsule_client_get(&req->rq_pill, &RMF_MGS_SEND_PARAM);
+ if (!req_msp) {
+ ptlrpc_req_finished(req);
+ RETURN(-ENOMEM);
+ }
+
+ memcpy(req_msp, msp, sizeof(*req_msp));
+ ptlrpc_request_set_replen(req);
+
+ /* Limit how long we will wait for the enqueue to complete */
+ req->rq_delay_limit = MGC_SEND_PARAM_LIMIT;
+ rc = ptlrpc_queue_wait(req);
+ if (!rc) {
+ rep_msp = req_capsule_server_get(&req->rq_pill, &RMF_MGS_SEND_PARAM);
+ memcpy(msp, rep_msp, sizeof(*rep_msp));
+ }
+
+ ptlrpc_req_finished(req);
+
+ RETURN(rc);
+}
+
+/* Take a config lock so we can get cancel notifications */
+static int mgc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
+ __u32 type, ldlm_policy_data_t *policy, __u32 mode,
+ __u64 *flags, void *bl_cb, void *cp_cb, void *gl_cb,
+ void *data, __u32 lvb_len, void *lvb_swabber,
+ struct lustre_handle *lockh)
+{
+ struct config_llog_data *cld = (struct config_llog_data *)data;
+ struct ldlm_enqueue_info einfo = { type, mode, mgc_blocking_ast,
+ ldlm_completion_ast, NULL, NULL, NULL };
+ struct ptlrpc_request *req;
+ int short_limit = cld_is_sptlrpc(cld);
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_MGC, "Enqueue for %s (res "LPX64")\n", cld->cld_logname,
+ cld->cld_resid.name[0]);
+
+ /* We need a callback for every lockholder, so don't try to
+ ldlm_lock_match (see rev 1.1.2.11.2.47) */
+ req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
+ &RQF_LDLM_ENQUEUE, LUSTRE_DLM_VERSION,
+ LDLM_ENQUEUE);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, 0);
+ ptlrpc_request_set_replen(req);
+
+ /* check if this is server or client */
+ if (cld->cld_cfg.cfg_sb) {
+ struct lustre_sb_info *lsi = s2lsi(cld->cld_cfg.cfg_sb);
+ if (lsi && IS_SERVER(lsi))
+ short_limit = 1;
+ }
+ /* Limit how long we will wait for the enqueue to complete */
+ req->rq_delay_limit = short_limit ? 5 : MGC_ENQUEUE_LIMIT;
+ rc = ldlm_cli_enqueue(exp, &req, &einfo, &cld->cld_resid, NULL, flags,
+ NULL, 0, LVB_T_NONE, lockh, 0);
+ /* A failed enqueue should still call the mgc_blocking_ast,
+ where it will be requeued if needed ("grant failed"). */
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+}
+
+static int mgc_cancel(struct obd_export *exp, struct lov_stripe_md *md,
+ __u32 mode, struct lustre_handle *lockh)
+{
+ ENTRY;
+
+ ldlm_lock_decref(lockh, mode);
+
+ RETURN(0);
+}
+
+static void mgc_notify_active(struct obd_device *unused)
+{
+ /* wakeup mgc_requeue_thread to requeue mgc lock */
+ spin_lock(&config_list_lock);
+ rq_state |= RQ_NOW;
+ spin_unlock(&config_list_lock);
+ wake_up(&rq_waitq);
+
+ /* TODO: Help the MGS rebuild nidtbl. -jay */
+}
+
+/* Send target_reg message to MGS */
+static int mgc_target_register(struct obd_export *exp,
+ struct mgs_target_info *mti)
+{
+ struct ptlrpc_request *req;
+ struct mgs_target_info *req_mti, *rep_mti;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
+ &RQF_MGS_TARGET_REG, LUSTRE_MGS_VERSION,
+ MGS_TARGET_REG);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ req_mti = req_capsule_client_get(&req->rq_pill, &RMF_MGS_TARGET_INFO);
+ if (!req_mti) {
+ ptlrpc_req_finished(req);
+ RETURN(-ENOMEM);
+ }
+
+ memcpy(req_mti, mti, sizeof(*req_mti));
+ ptlrpc_request_set_replen(req);
+ CDEBUG(D_MGC, "register %s\n", mti->mti_svname);
+ /* Limit how long we will wait for the enqueue to complete */
+ req->rq_delay_limit = MGC_TARGET_REG_LIMIT;
+
+ rc = ptlrpc_queue_wait(req);
+ if (!rc) {
+ rep_mti = req_capsule_server_get(&req->rq_pill,
+ &RMF_MGS_TARGET_INFO);
+ memcpy(mti, rep_mti, sizeof(*rep_mti));
+ CDEBUG(D_MGC, "register %s got index = %d\n",
+ mti->mti_svname, mti->mti_stripe_index);
+ }
+ ptlrpc_req_finished(req);
+
+ RETURN(rc);
+}
+
+int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp,
+ obd_count keylen, void *key, obd_count vallen,
+ void *val, struct ptlrpc_request_set *set)
+{
+ int rc = -EINVAL;
+ ENTRY;
+
+ /* Turn off initial_recov after we try all backup servers once */
+ if (KEY_IS(KEY_INIT_RECOV_BACKUP)) {
+ struct obd_import *imp = class_exp2cliimp(exp);
+ int value;
+ if (vallen != sizeof(int))
+ RETURN(-EINVAL);
+ value = *(int *)val;
+ CDEBUG(D_MGC, "InitRecov %s %d/d%d:i%d:r%d:or%d:%s\n",
+ imp->imp_obd->obd_name, value,
+ imp->imp_deactive, imp->imp_invalid,
+ imp->imp_replayable, imp->imp_obd->obd_replayable,
+ ptlrpc_import_state_name(imp->imp_state));
+ /* Resurrect if we previously died */
+ if ((imp->imp_state != LUSTRE_IMP_FULL &&
+ imp->imp_state != LUSTRE_IMP_NEW) || value > 1)
+ ptlrpc_reconnect_import(imp);
+ RETURN(0);
+ }
+ /* FIXME move this to mgc_process_config */
+ if (KEY_IS(KEY_REGISTER_TARGET)) {
+ struct mgs_target_info *mti;
+ if (vallen != sizeof(struct mgs_target_info))
+ RETURN(-EINVAL);
+ mti = (struct mgs_target_info *)val;
+ CDEBUG(D_MGC, "register_target %s %#x\n",
+ mti->mti_svname, mti->mti_flags);
+ rc = mgc_target_register(exp, mti);
+ RETURN(rc);
+ }
+ if (KEY_IS(KEY_SET_FS)) {
+ struct super_block *sb = (struct super_block *)val;
+ struct lustre_sb_info *lsi;
+ if (vallen != sizeof(struct super_block))
+ RETURN(-EINVAL);
+ lsi = s2lsi(sb);
+ rc = mgc_fs_setup(exp->exp_obd, sb, lsi->lsi_srv_mnt);
+ if (rc) {
+ CERROR("set_fs got %d\n", rc);
+ }
+ RETURN(rc);
+ }
+ if (KEY_IS(KEY_CLEAR_FS)) {
+ if (vallen != 0)
+ RETURN(-EINVAL);
+ rc = mgc_fs_cleanup(exp->exp_obd);
+ if (rc) {
+ CERROR("clear_fs got %d\n", rc);
+ }
+ RETURN(rc);
+ }
+ if (KEY_IS(KEY_SET_INFO)) {
+ struct mgs_send_param *msp;
+
+ msp = (struct mgs_send_param *)val;
+ rc = mgc_set_mgs_param(exp, msp);
+ RETURN(rc);
+ }
+ if (KEY_IS(KEY_MGSSEC)) {
+ struct client_obd *cli = &exp->exp_obd->u.cli;
+ struct sptlrpc_flavor flvr;
+
+ /*
+ * empty string means using current flavor, if which haven't
+ * been set yet, set it as null.
+ *
+ * if flavor has been set previously, check the asking flavor
+ * must match the existing one.
+ */
+ if (vallen == 0) {
+ if (cli->cl_flvr_mgc.sf_rpc != SPTLRPC_FLVR_INVALID)
+ RETURN(0);
+ val = "null";
+ vallen = 4;
+ }
+
+ rc = sptlrpc_parse_flavor(val, &flvr);
+ if (rc) {
+ CERROR("invalid sptlrpc flavor %s to MGS\n",
+ (char *) val);
+ RETURN(rc);
+ }
+
+ /*
+ * caller already hold a mutex
+ */
+ if (cli->cl_flvr_mgc.sf_rpc == SPTLRPC_FLVR_INVALID) {
+ cli->cl_flvr_mgc = flvr;
+ } else if (memcmp(&cli->cl_flvr_mgc, &flvr,
+ sizeof(flvr)) != 0) {
+ char str[20];
+
+ sptlrpc_flavor2name(&cli->cl_flvr_mgc,
+ str, sizeof(str));
+ LCONSOLE_ERROR("asking sptlrpc flavor %s to MGS but "
+ "currently %s is in use\n",
+ (char *) val, str);
+ rc = -EPERM;
+ }
+ RETURN(rc);
+ }
+
+ RETURN(rc);
+}
+
+static int mgc_get_info(const struct lu_env *env, struct obd_export *exp,
+ __u32 keylen, void *key, __u32 *vallen, void *val,
+ struct lov_stripe_md *unused)
+{
+ int rc = -EINVAL;
+
+ if (KEY_IS(KEY_CONN_DATA)) {
+ struct obd_import *imp = class_exp2cliimp(exp);
+ struct obd_connect_data *data = val;
+
+ if (*vallen == sizeof(*data)) {
+ *data = imp->imp_connect_data;
+ rc = 0;
+ }
+ }
+
+ return rc;
+}
+
+static int mgc_import_event(struct obd_device *obd,
+ struct obd_import *imp,
+ enum obd_import_event event)
+{
+ int rc = 0;
+
+ LASSERT(imp->imp_obd == obd);
+ CDEBUG(D_MGC, "import event %#x\n", event);
+
+ switch (event) {
+ case IMP_EVENT_DISCON:
+ /* MGC imports should not wait for recovery */
+ if (OCD_HAS_FLAG(&imp->imp_connect_data, IMP_RECOV))
+ ptlrpc_pinger_ir_down();
+ break;
+ case IMP_EVENT_INACTIVE:
+ break;
+ case IMP_EVENT_INVALIDATE: {
+ struct ldlm_namespace *ns = obd->obd_namespace;
+ ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
+ break;
+ }
+ case IMP_EVENT_ACTIVE:
+ CDEBUG(D_INFO, "%s: Reactivating import\n", obd->obd_name);
+ /* Clearing obd_no_recov allows us to continue pinging */
+ obd->obd_no_recov = 0;
+ mgc_notify_active(obd);
+ if (OCD_HAS_FLAG(&imp->imp_connect_data, IMP_RECOV))
+ ptlrpc_pinger_ir_up();
+ break;
+ case IMP_EVENT_OCD:
+ break;
+ case IMP_EVENT_DEACTIVATE:
+ case IMP_EVENT_ACTIVATE:
+ break;
+ default:
+ CERROR("Unknown import event %#x\n", event);
+ LBUG();
+ }
+ RETURN(rc);
+}
+
+static int mgc_llog_init(struct obd_device *obd, struct obd_llog_group *olg,
+ struct obd_device *tgt, int *index)
+{
+ struct llog_ctxt *ctxt;
+ int rc;
+ ENTRY;
+
+ LASSERT(olg == &obd->obd_olg);
+
+
+ rc = llog_setup(NULL, obd, olg, LLOG_CONFIG_REPL_CTXT, tgt,
+ &llog_client_ops);
+ if (rc)
+ GOTO(out, rc);
+
+ ctxt = llog_group_get_ctxt(olg, LLOG_CONFIG_REPL_CTXT);
+ if (!ctxt)
+ GOTO(out, rc = -ENODEV);
+
+ llog_initiator_connect(ctxt);
+ llog_ctxt_put(ctxt);
+
+ RETURN(0);
+out:
+ ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
+ if (ctxt)
+ llog_cleanup(NULL, ctxt);
+ RETURN(rc);
+}
+
+static int mgc_llog_finish(struct obd_device *obd, int count)
+{
+ struct llog_ctxt *ctxt;
+
+ ENTRY;
+
+ ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
+ if (ctxt)
+ llog_cleanup(NULL, ctxt);
+
+ ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
+ if (ctxt)
+ llog_cleanup(NULL, ctxt);
+ RETURN(0);
+}
+
+enum {
+ CONFIG_READ_NRPAGES_INIT = 1 << (20 - PAGE_CACHE_SHIFT),
+ CONFIG_READ_NRPAGES = 4
+};
+
+static int mgc_apply_recover_logs(struct obd_device *mgc,
+ struct config_llog_data *cld,
+ __u64 max_version,
+ void *data, int datalen, bool mne_swab)
+{
+ struct config_llog_instance *cfg = &cld->cld_cfg;
+ struct lustre_sb_info *lsi = s2lsi(cfg->cfg_sb);
+ struct mgs_nidtbl_entry *entry;
+ struct lustre_cfg *lcfg;
+ struct lustre_cfg_bufs bufs;
+ u64 prev_version = 0;
+ char *inst;
+ char *buf;
+ int bufsz;
+ int pos;
+ int rc = 0;
+ int off = 0;
+ ENTRY;
+
+ LASSERT(cfg->cfg_instance != NULL);
+ LASSERT(cfg->cfg_sb == cfg->cfg_instance);
+
+ OBD_ALLOC(inst, PAGE_CACHE_SIZE);
+ if (inst == NULL)
+ RETURN(-ENOMEM);
+
+ if (!IS_SERVER(lsi)) {
+ pos = snprintf(inst, PAGE_CACHE_SIZE, "%p", cfg->cfg_instance);
+ if (pos >= PAGE_CACHE_SIZE) {
+ OBD_FREE(inst, PAGE_CACHE_SIZE);
+ return -E2BIG;
+ }
+ } else {
+ LASSERT(IS_MDT(lsi));
+ rc = server_name2svname(lsi->lsi_svname, inst, NULL,
+ PAGE_CACHE_SIZE);
+ if (rc) {
+ OBD_FREE(inst, PAGE_CACHE_SIZE);
+ RETURN(-EINVAL);
+ }
+ pos = strlen(inst);
+ }
+
+ ++pos;
+ buf = inst + pos;
+ bufsz = PAGE_CACHE_SIZE - pos;
+
+ while (datalen > 0) {
+ int entry_len = sizeof(*entry);
+ int is_ost;
+ struct obd_device *obd;
+ char *obdname;
+ char *cname;
+ char *params;
+ char *uuid;
+
+ rc = -EINVAL;
+ if (datalen < sizeof(*entry))
+ break;
+
+ entry = (typeof(entry))(data + off);
+
+ /* sanity check */
+ if (entry->mne_nid_type != 0) /* only support type 0 for ipv4 */
+ break;
+ if (entry->mne_nid_count == 0) /* at least one nid entry */
+ break;
+ if (entry->mne_nid_size != sizeof(lnet_nid_t))
+ break;
+
+ entry_len += entry->mne_nid_count * entry->mne_nid_size;
+ if (datalen < entry_len) /* must have entry_len at least */
+ break;
+
+ /* Keep this swab for normal mixed endian handling. LU-1644 */
+ if (mne_swab)
+ lustre_swab_mgs_nidtbl_entry(entry);
+ if (entry->mne_length > PAGE_CACHE_SIZE) {
+ CERROR("MNE too large (%u)\n", entry->mne_length);
+ break;
+ }
+
+ if (entry->mne_length < entry_len)
+ break;
+
+ off += entry->mne_length;
+ datalen -= entry->mne_length;
+ if (datalen < 0)
+ break;
+
+ if (entry->mne_version > max_version) {
+ CERROR("entry index(%lld) is over max_index(%lld)\n",
+ entry->mne_version, max_version);
+ break;
+ }
+
+ if (prev_version >= entry->mne_version) {
+ CERROR("index unsorted, prev %lld, now %lld\n",
+ prev_version, entry->mne_version);
+ break;
+ }
+ prev_version = entry->mne_version;
+
+ /*
+ * Write a string with format "nid::instance" to
+ * lustre/<osc|mdc>/<target>-<osc|mdc>-<instance>/import.
+ */
+
+ is_ost = entry->mne_type == LDD_F_SV_TYPE_OST;
+ memset(buf, 0, bufsz);
+ obdname = buf;
+ pos = 0;
+
+ /* lustre-OST0001-osc-<instance #> */
+ strcpy(obdname, cld->cld_logname);
+ cname = strrchr(obdname, '-');
+ if (cname == NULL) {
+ CERROR("mgc %s: invalid logname %s\n",
+ mgc->obd_name, obdname);
+ break;
+ }
+
+ pos = cname - obdname;
+ obdname[pos] = 0;
+ pos += sprintf(obdname + pos, "-%s%04x",
+ is_ost ? "OST" : "MDT", entry->mne_index);
+
+ cname = is_ost ? "osc" : "mdc",
+ pos += sprintf(obdname + pos, "-%s-%s", cname, inst);
+ lustre_cfg_bufs_reset(&bufs, obdname);
+
+ /* find the obd by obdname */
+ obd = class_name2obd(obdname);
+ if (obd == NULL) {
+ CDEBUG(D_INFO, "mgc %s: cannot find obdname %s\n",
+ mgc->obd_name, obdname);
+ rc = 0;
+ /* this is a safe race, when the ost is starting up...*/
+ continue;
+ }
+
+ /* osc.import = "connection=<Conn UUID>::<target instance>" */
+ ++pos;
+ params = buf + pos;
+ pos += sprintf(params, "%s.import=%s", cname, "connection=");
+ uuid = buf + pos;
+
+ down_read(&obd->u.cli.cl_sem);
+ if (obd->u.cli.cl_import == NULL) {
+ /* client does not connect to the OST yet */
+ up_read(&obd->u.cli.cl_sem);
+ rc = 0;
+ continue;
+ }
+
+ /* TODO: iterate all nids to find one */
+ /* find uuid by nid */
+ rc = client_import_find_conn(obd->u.cli.cl_import,
+ entry->u.nids[0],
+ (struct obd_uuid *)uuid);
+ up_read(&obd->u.cli.cl_sem);
+ if (rc < 0) {
+ CERROR("mgc: cannot find uuid by nid %s\n",
+ libcfs_nid2str(entry->u.nids[0]));
+ break;
+ }
+
+ CDEBUG(D_INFO, "Find uuid %s by nid %s\n",
+ uuid, libcfs_nid2str(entry->u.nids[0]));
+
+ pos += strlen(uuid);
+ pos += sprintf(buf + pos, "::%u", entry->mne_instance);
+ LASSERT(pos < bufsz);
+
+ lustre_cfg_bufs_set_string(&bufs, 1, params);
+
+ rc = -ENOMEM;
+ lcfg = lustre_cfg_new(LCFG_PARAM, &bufs);
+ if (lcfg == NULL) {
+ CERROR("mgc: cannot allocate memory\n");
+ break;
+ }
+
+ CDEBUG(D_INFO, "ir apply logs "LPD64"/"LPD64" for %s -> %s\n",
+ prev_version, max_version, obdname, params);
+
+ rc = class_process_config(lcfg);
+ lustre_cfg_free(lcfg);
+ if (rc)
+ CDEBUG(D_INFO, "process config for %s error %d\n",
+ obdname, rc);
+
+ /* continue, even one with error */
+ }
+
+ OBD_FREE(inst, PAGE_CACHE_SIZE);
+ RETURN(rc);
+}
+
+/**
+ * This function is called if this client was notified for target restarting
+ * by the MGS. A CONFIG_READ RPC is going to send to fetch recovery logs.
+ */
+static int mgc_process_recover_log(struct obd_device *obd,
+ struct config_llog_data *cld)
+{
+ struct ptlrpc_request *req = NULL;
+ struct config_llog_instance *cfg = &cld->cld_cfg;
+ struct mgs_config_body *body;
+ struct mgs_config_res *res;
+ struct ptlrpc_bulk_desc *desc;
+ struct page **pages;
+ int nrpages;
+ bool eof = true;
+ bool mne_swab = false;
+ int i;
+ int ealen;
+ int rc;
+ ENTRY;
+
+ /* allocate buffer for bulk transfer.
+ * if this is the first time for this mgs to read logs,
+ * CONFIG_READ_NRPAGES_INIT will be used since it will read all logs
+ * once; otherwise, it only reads increment of logs, this should be
+ * small and CONFIG_READ_NRPAGES will be used.
+ */
+ nrpages = CONFIG_READ_NRPAGES;
+ if (cfg->cfg_last_idx == 0) /* the first time */
+ nrpages = CONFIG_READ_NRPAGES_INIT;
+
+ OBD_ALLOC(pages, sizeof(*pages) * nrpages);
+ if (pages == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ for (i = 0; i < nrpages; i++) {
+ pages[i] = alloc_page(GFP_IOFS);
+ if (pages[i] == NULL)
+ GOTO(out, rc = -ENOMEM);
+ }
+
+again:
+ LASSERT(cld_is_recover(cld));
+ LASSERT(mutex_is_locked(&cld->cld_lock));
+ req = ptlrpc_request_alloc(class_exp2cliimp(cld->cld_mgcexp),
+ &RQF_MGS_CONFIG_READ);
+ if (req == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MGS_VERSION, MGS_CONFIG_READ);
+ if (rc)
+ GOTO(out, rc);
+
+ /* pack request */
+ body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
+ LASSERT(body != NULL);
+ LASSERT(sizeof(body->mcb_name) > strlen(cld->cld_logname));
+ if (strlcpy(body->mcb_name, cld->cld_logname, sizeof(body->mcb_name))
+ >= sizeof(body->mcb_name))
+ GOTO(out, rc = -E2BIG);
+ body->mcb_offset = cfg->cfg_last_idx + 1;
+ body->mcb_type = cld->cld_type;
+ body->mcb_bits = PAGE_CACHE_SHIFT;
+ body->mcb_units = nrpages;
+
+ /* allocate bulk transfer descriptor */
+ desc = ptlrpc_prep_bulk_imp(req, nrpages, 1, BULK_PUT_SINK,
+ MGS_BULK_PORTAL);
+ if (desc == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ for (i = 0; i < nrpages; i++)
+ ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_CACHE_SIZE);
+
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+
+ res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
+ if (res->mcr_size < res->mcr_offset)
+ GOTO(out, rc = -EINVAL);
+
+ /* always update the index even though it might have errors with
+ * handling the recover logs */
+ cfg->cfg_last_idx = res->mcr_offset;
+ eof = res->mcr_offset == res->mcr_size;
+
+ CDEBUG(D_INFO, "Latest version "LPD64", more %d.\n",
+ res->mcr_offset, eof == false);
+
+ ealen = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, 0);
+ if (ealen < 0)
+ GOTO(out, rc = ealen);
+
+ if (ealen > nrpages << PAGE_CACHE_SHIFT)
+ GOTO(out, rc = -EINVAL);
+
+ if (ealen == 0) { /* no logs transferred */
+ if (!eof)
+ rc = -EINVAL;
+ GOTO(out, rc);
+ }
+
+ mne_swab = !!ptlrpc_rep_need_swab(req);
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
+ /* This import flag means the server did an extra swab of IR MNE
+ * records (fixed in LU-1252), reverse it here if needed. LU-1644 */
+ if (unlikely(req->rq_import->imp_need_mne_swab))
+ mne_swab = !mne_swab;
+#else
+#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
+#endif
+
+ for (i = 0; i < nrpages && ealen > 0; i++) {
+ int rc2;
+ void *ptr;
+
+ ptr = kmap(pages[i]);
+ rc2 = mgc_apply_recover_logs(obd, cld, res->mcr_offset, ptr,
+ min_t(int, ealen, PAGE_CACHE_SIZE),
+ mne_swab);
+ kunmap(pages[i]);
+ if (rc2 < 0) {
+ CWARN("Process recover log %s error %d\n",
+ cld->cld_logname, rc2);
+ break;
+ }
+
+ ealen -= PAGE_CACHE_SIZE;
+ }
+
+out:
+ if (req)
+ ptlrpc_req_finished(req);
+
+ if (rc == 0 && !eof)
+ goto again;
+
+ if (pages) {
+ for (i = 0; i < nrpages; i++) {
+ if (pages[i] == NULL)
+ break;
+ __free_page(pages[i]);
+ }
+ OBD_FREE(pages, sizeof(*pages) * nrpages);
+ }
+ return rc;
+}
+
+
+/* local_only means it cannot get remote llogs */
+static int mgc_process_cfg_log(struct obd_device *mgc,
+ struct config_llog_data *cld,
+ int local_only)
+{
+ struct llog_ctxt *ctxt, *lctxt = NULL;
+ struct lvfs_run_ctxt *saved_ctxt;
+ struct lustre_sb_info *lsi = NULL;
+ int rc = 0, must_pop = 0;
+ bool sptlrpc_started = false;
+
+ ENTRY;
+
+ LASSERT(cld);
+ LASSERT(mutex_is_locked(&cld->cld_lock));
+
+ /*
+ * local copy of sptlrpc log is controlled elsewhere, don't try to
+ * read it up here.
+ */
+ if (cld_is_sptlrpc(cld) && local_only)
+ RETURN(0);
+
+ if (cld->cld_cfg.cfg_sb)
+ lsi = s2lsi(cld->cld_cfg.cfg_sb);
+
+ ctxt = llog_get_context(mgc, LLOG_CONFIG_REPL_CTXT);
+ if (!ctxt) {
+ CERROR("missing llog context\n");
+ RETURN(-EINVAL);
+ }
+
+ OBD_ALLOC_PTR(saved_ctxt);
+ if (saved_ctxt == NULL)
+ RETURN(-ENOMEM);
+
+ lctxt = llog_get_context(mgc, LLOG_CONFIG_ORIG_CTXT);
+
+ if (local_only) { /* no local log at client side */
+ GOTO(out_pop, rc = -EIO);
+ }
+
+ if (cld_is_sptlrpc(cld)) {
+ sptlrpc_conf_log_update_begin(cld->cld_logname);
+ sptlrpc_started = true;
+ }
+
+ /* logname and instance info should be the same, so use our
+ copy of the instance for the update. The cfg_last_idx will
+ be updated here. */
+ rc = class_config_parse_llog(NULL, ctxt, cld->cld_logname,
+ &cld->cld_cfg);
+ EXIT;
+
+out_pop:
+ llog_ctxt_put(ctxt);
+ if (lctxt)
+ llog_ctxt_put(lctxt);
+ if (must_pop)
+ pop_ctxt(saved_ctxt, &mgc->obd_lvfs_ctxt, NULL);
+
+ OBD_FREE_PTR(saved_ctxt);
+ /*
+ * update settings on existing OBDs. doing it inside
+ * of llog_process_lock so no device is attaching/detaching
+ * in parallel.
+ * the logname must be <fsname>-sptlrpc
+ */
+ if (sptlrpc_started) {
+ LASSERT(cld_is_sptlrpc(cld));
+ sptlrpc_conf_log_update_end(cld->cld_logname);
+ class_notify_sptlrpc_conf(cld->cld_logname,
+ strlen(cld->cld_logname) -
+ strlen("-sptlrpc"));
+ }
+
+ RETURN(rc);
+}
+
+/** Get a config log from the MGS and process it.
+ * This func is called for both clients and servers.
+ * Copy the log locally before parsing it if appropriate (non-MGS server)
+ */
+int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld)
+{
+ struct lustre_handle lockh = { 0 };
+ __u64 flags = LDLM_FL_NO_LRU;
+ int rc = 0, rcl;
+ ENTRY;
+
+ LASSERT(cld);
+
+ /* I don't want multiple processes running process_log at once --
+ sounds like badness. It actually might be fine, as long as
+ we're not trying to update from the same log
+ simultaneously (in which case we should use a per-log sem.) */
+ mutex_lock(&cld->cld_lock);
+ if (cld->cld_stopping) {
+ mutex_unlock(&cld->cld_lock);
+ RETURN(0);
+ }
+
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MGC_PAUSE_PROCESS_LOG, 20);
+
+ CDEBUG(D_MGC, "Process log %s:%p from %d\n", cld->cld_logname,
+ cld->cld_cfg.cfg_instance, cld->cld_cfg.cfg_last_idx + 1);
+
+ /* Get the cfg lock on the llog */
+ rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, NULL, LDLM_PLAIN, NULL,
+ LCK_CR, &flags, NULL, NULL, NULL,
+ cld, 0, NULL, &lockh);
+ if (rcl == 0) {
+ /* Get the cld, it will be released in mgc_blocking_ast. */
+ config_log_get(cld);
+ rc = ldlm_lock_set_data(&lockh, (void *)cld);
+ LASSERT(rc == 0);
+ } else {
+ CDEBUG(D_MGC, "Can't get cfg lock: %d\n", rcl);
+
+ /* mark cld_lostlock so that it will requeue
+ * after MGC becomes available. */
+ cld->cld_lostlock = 1;
+ /* Get extra reference, it will be put in requeue thread */
+ config_log_get(cld);
+ }
+
+
+ if (cld_is_recover(cld)) {
+ rc = 0; /* this is not a fatal error for recover log */
+ if (rcl == 0)
+ rc = mgc_process_recover_log(mgc, cld);
+ } else {
+ rc = mgc_process_cfg_log(mgc, cld, rcl != 0);
+ }
+
+ CDEBUG(D_MGC, "%s: configuration from log '%s' %sed (%d).\n",
+ mgc->obd_name, cld->cld_logname, rc ? "fail" : "succeed", rc);
+
+ mutex_unlock(&cld->cld_lock);
+
+ /* Now drop the lock so MGS can revoke it */
+ if (!rcl) {
+ rcl = mgc_cancel(mgc->u.cli.cl_mgc_mgsexp, NULL,
+ LCK_CR, &lockh);
+ if (rcl)
+ CERROR("Can't drop cfg lock: %d\n", rcl);
+ }
+
+ RETURN(rc);
+}
+
+
+/** Called from lustre_process_log.
+ * LCFG_LOG_START gets the config log from the MGS, processes it to start
+ * any services, and adds it to the list logs to watch (follow).
+ */
+static int mgc_process_config(struct obd_device *obd, obd_count len, void *buf)
+{
+ struct lustre_cfg *lcfg = buf;
+ struct config_llog_instance *cfg = NULL;
+ char *logname;
+ int rc = 0;
+ ENTRY;
+
+ switch(lcfg->lcfg_command) {
+ case LCFG_LOV_ADD_OBD: {
+ /* Overloading this cfg command: register a new target */
+ struct mgs_target_info *mti;
+
+ if (LUSTRE_CFG_BUFLEN(lcfg, 1) !=
+ sizeof(struct mgs_target_info))
+ GOTO(out, rc = -EINVAL);
+
+ mti = (struct mgs_target_info *)lustre_cfg_buf(lcfg, 1);
+ CDEBUG(D_MGC, "add_target %s %#x\n",
+ mti->mti_svname, mti->mti_flags);
+ rc = mgc_target_register(obd->u.cli.cl_mgc_mgsexp, mti);
+ break;
+ }
+ case LCFG_LOV_DEL_OBD:
+ /* Unregister has no meaning at the moment. */
+ CERROR("lov_del_obd unimplemented\n");
+ rc = -ENOSYS;
+ break;
+ case LCFG_SPTLRPC_CONF: {
+ rc = sptlrpc_process_config(lcfg);
+ break;
+ }
+ case LCFG_LOG_START: {
+ struct config_llog_data *cld;
+ struct super_block *sb;
+
+ logname = lustre_cfg_string(lcfg, 1);
+ cfg = (struct config_llog_instance *)lustre_cfg_buf(lcfg, 2);
+ sb = *(struct super_block **)lustre_cfg_buf(lcfg, 3);
+
+ CDEBUG(D_MGC, "parse_log %s from %d\n", logname,
+ cfg->cfg_last_idx);
+
+ /* We're only called through here on the initial mount */
+ rc = config_log_add(obd, logname, cfg, sb);
+ if (rc)
+ break;
+ cld = config_log_find(logname, cfg);
+ if (cld == NULL) {
+ rc = -ENOENT;
+ break;
+ }
+
+ /* COMPAT_146 */
+ /* FIXME only set this for old logs! Right now this forces
+ us to always skip the "inside markers" check */
+ cld->cld_cfg.cfg_flags |= CFG_F_COMPAT146;
+
+ rc = mgc_process_log(obd, cld);
+ if (rc == 0 && cld->cld_recover != NULL) {
+ if (OCD_HAS_FLAG(&obd->u.cli.cl_import->
+ imp_connect_data, IMP_RECOV)) {
+ rc = mgc_process_log(obd, cld->cld_recover);
+ } else {
+ struct config_llog_data *cir = cld->cld_recover;
+ cld->cld_recover = NULL;
+ config_log_put(cir);
+ }
+ if (rc)
+ CERROR("Cannot process recover llog %d\n", rc);
+ }
+ config_log_put(cld);
+
+ break;
+ }
+ case LCFG_LOG_END: {
+ logname = lustre_cfg_string(lcfg, 1);
+
+ if (lcfg->lcfg_bufcount >= 2)
+ cfg = (struct config_llog_instance *)lustre_cfg_buf(
+ lcfg, 2);
+ rc = config_log_end(logname, cfg);
+ break;
+ }
+ default: {
+ CERROR("Unknown command: %d\n", lcfg->lcfg_command);
+ GOTO(out, rc = -EINVAL);
+
+ }
+ }
+out:
+ RETURN(rc);
+}
+
+struct obd_ops mgc_obd_ops = {
+ .o_owner = THIS_MODULE,
+ .o_setup = mgc_setup,
+ .o_precleanup = mgc_precleanup,
+ .o_cleanup = mgc_cleanup,
+ .o_add_conn = client_import_add_conn,
+ .o_del_conn = client_import_del_conn,
+ .o_connect = client_connect_import,
+ .o_disconnect = client_disconnect_export,
+ //.o_enqueue = mgc_enqueue,
+ .o_cancel = mgc_cancel,
+ //.o_iocontrol = mgc_iocontrol,
+ .o_set_info_async = mgc_set_info_async,
+ .o_get_info = mgc_get_info,
+ .o_import_event = mgc_import_event,
+ .o_llog_init = mgc_llog_init,
+ .o_llog_finish = mgc_llog_finish,
+ .o_process_config = mgc_process_config,
+};
+
+int __init mgc_init(void)
+{
+ return class_register_type(&mgc_obd_ops, NULL, NULL,
+ LUSTRE_MGC_NAME, NULL);
+}
+
+static void /*__exit*/ mgc_exit(void)
+{
+ class_unregister_type(LUSTRE_MGC_NAME);
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Management Client");
+MODULE_LICENSE("GPL");
+
+module_init(mgc_init);
+module_exit(mgc_exit);
diff --git a/drivers/staging/lustre/lustre/obdclass/Makefile b/drivers/staging/lustre/lustre/obdclass/Makefile
new file mode 100644
index 000000000000..b80c13c6f5dd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/Makefile
@@ -0,0 +1,13 @@
+obj-$(CONFIG_LUSTRE_FS) += obdclass.o llog_test.o
+
+obdclass-y := linux/linux-module.o linux/linux-obdo.o linux/linux-sysctl.o \
+ llog.o llog_cat.o llog_obd.o llog_swab.o class_obd.o debug.o \
+ genops.o uuid.o llog_ioctl.o lprocfs_status.o \
+ lprocfs_jobstats.o lustre_handles.o lustre_peer.o llog_osd.o \
+ local_storage.o statfs_pack.o obdo.o obd_config.o obd_mount.o\
+ mea.o lu_object.o dt_object.o capa.o cl_object.o \
+ cl_page.o cl_lock.o cl_io.o lu_ref.o acl.o idmap.o \
+ lu_ucred.o
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lustre/obdclass/acl.c b/drivers/staging/lustre/lustre/obdclass/acl.c
new file mode 100644
index 000000000000..c2a6702c9f2c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/acl.c
@@ -0,0 +1,546 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/acl.c
+ *
+ * Lustre Access Control List.
+ *
+ * Author: Fan Yong <fanyong@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+#include <lu_object.h>
+#include <lustre_acl.h>
+#include <lustre_eacl.h>
+#include <obd_support.h>
+
+#ifdef CONFIG_FS_POSIX_ACL
+
+#define CFS_ACL_XATTR_VERSION POSIX_ACL_XATTR_VERSION
+
+enum {
+ ES_UNK = 0, /* unknown stat */
+ ES_UNC = 1, /* ACL entry is not changed */
+ ES_MOD = 2, /* ACL entry is modified */
+ ES_ADD = 3, /* ACL entry is added */
+ ES_DEL = 4 /* ACL entry is deleted */
+};
+
+static inline void lustre_ext_acl_le_to_cpu(ext_acl_xattr_entry *d,
+ ext_acl_xattr_entry *s)
+{
+ d->e_tag = le16_to_cpu(s->e_tag);
+ d->e_perm = le16_to_cpu(s->e_perm);
+ d->e_id = le32_to_cpu(s->e_id);
+ d->e_stat = le32_to_cpu(s->e_stat);
+}
+
+static inline void lustre_ext_acl_cpu_to_le(ext_acl_xattr_entry *d,
+ ext_acl_xattr_entry *s)
+{
+ d->e_tag = cpu_to_le16(s->e_tag);
+ d->e_perm = cpu_to_le16(s->e_perm);
+ d->e_id = cpu_to_le32(s->e_id);
+ d->e_stat = cpu_to_le32(s->e_stat);
+}
+
+static inline void lustre_posix_acl_le_to_cpu(posix_acl_xattr_entry *d,
+ posix_acl_xattr_entry *s)
+{
+ d->e_tag = le16_to_cpu(s->e_tag);
+ d->e_perm = le16_to_cpu(s->e_perm);
+ d->e_id = le32_to_cpu(s->e_id);
+}
+
+static inline void lustre_posix_acl_cpu_to_le(posix_acl_xattr_entry *d,
+ posix_acl_xattr_entry *s)
+{
+ d->e_tag = cpu_to_le16(s->e_tag);
+ d->e_perm = cpu_to_le16(s->e_perm);
+ d->e_id = cpu_to_le32(s->e_id);
+}
+
+
+/* if "new_count == 0", then "new = {a_version, NULL}", NOT NULL. */
+static int lustre_posix_acl_xattr_reduce_space(posix_acl_xattr_header **header,
+ int old_count, int new_count)
+{
+ int old_size = CFS_ACL_XATTR_SIZE(old_count, posix_acl_xattr);
+ int new_size = CFS_ACL_XATTR_SIZE(new_count, posix_acl_xattr);
+ posix_acl_xattr_header *new;
+
+ if (unlikely(old_count <= new_count))
+ return old_size;
+
+ OBD_ALLOC(new, new_size);
+ if (unlikely(new == NULL))
+ return -ENOMEM;
+
+ memcpy(new, *header, new_size);
+ OBD_FREE(*header, old_size);
+ *header = new;
+ return new_size;
+}
+
+/* if "new_count == 0", then "new = {0, NULL}", NOT NULL. */
+static int lustre_ext_acl_xattr_reduce_space(ext_acl_xattr_header **header,
+ int old_count)
+{
+ int ext_count = le32_to_cpu((*header)->a_count);
+ int ext_size = CFS_ACL_XATTR_SIZE(ext_count, ext_acl_xattr);
+ int old_size = CFS_ACL_XATTR_SIZE(old_count, ext_acl_xattr);
+ ext_acl_xattr_header *new;
+
+ if (unlikely(old_count <= ext_count))
+ return 0;
+
+ OBD_ALLOC(new, ext_size);
+ if (unlikely(new == NULL))
+ return -ENOMEM;
+
+ memcpy(new, *header, ext_size);
+ OBD_FREE(*header, old_size);
+ *header = new;
+ return 0;
+}
+
+/*
+ * Generate new extended ACL based on the posix ACL.
+ */
+ext_acl_xattr_header *
+lustre_posix_acl_xattr_2ext(posix_acl_xattr_header *header, int size)
+{
+ int count, i, esize;
+ ext_acl_xattr_header *new;
+ ENTRY;
+
+ if (unlikely(size < 0))
+ RETURN(ERR_PTR(-EINVAL));
+ else if (!size)
+ count = 0;
+ else
+ count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr);
+ esize = CFS_ACL_XATTR_SIZE(count, ext_acl_xattr);
+ OBD_ALLOC(new, esize);
+ if (unlikely(new == NULL))
+ RETURN(ERR_PTR(-ENOMEM));
+
+ new->a_count = cpu_to_le32(count);
+ for (i = 0; i < count; i++) {
+ new->a_entries[i].e_tag = header->a_entries[i].e_tag;
+ new->a_entries[i].e_perm = header->a_entries[i].e_perm;
+ new->a_entries[i].e_id = header->a_entries[i].e_id;
+ new->a_entries[i].e_stat = cpu_to_le32(ES_UNK);
+ }
+
+ RETURN(new);
+}
+EXPORT_SYMBOL(lustre_posix_acl_xattr_2ext);
+
+/*
+ * Filter out the "nobody" entries in the posix ACL.
+ */
+int lustre_posix_acl_xattr_filter(posix_acl_xattr_header *header, int size,
+ posix_acl_xattr_header **out)
+{
+ int count, i, j, rc = 0;
+ __u32 id;
+ posix_acl_xattr_header *new;
+ ENTRY;
+
+ if (unlikely(size < 0))
+ RETURN(-EINVAL);
+ else if (!size)
+ RETURN(0);
+
+ OBD_ALLOC(new, size);
+ if (unlikely(new == NULL))
+ RETURN(-ENOMEM);
+
+ new->a_version = cpu_to_le32(CFS_ACL_XATTR_VERSION);
+ count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr);
+ for (i = 0, j = 0; i < count; i++) {
+ id = le32_to_cpu(header->a_entries[i].e_id);
+ switch (le16_to_cpu(header->a_entries[i].e_tag)) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ if (id != ACL_UNDEFINED_ID)
+ GOTO(_out, rc = -EIO);
+
+ memcpy(&new->a_entries[j++], &header->a_entries[i],
+ sizeof(posix_acl_xattr_entry));
+ break;
+ case ACL_USER:
+ if (id != NOBODY_UID)
+ memcpy(&new->a_entries[j++],
+ &header->a_entries[i],
+ sizeof(posix_acl_xattr_entry));
+ break;
+ case ACL_GROUP:
+ if (id != NOBODY_GID)
+ memcpy(&new->a_entries[j++],
+ &header->a_entries[i],
+ sizeof(posix_acl_xattr_entry));
+ break;
+ default:
+ GOTO(_out, rc = -EIO);
+ }
+ }
+
+ /* free unused space. */
+ rc = lustre_posix_acl_xattr_reduce_space(&new, count, j);
+ if (rc >= 0) {
+ size = rc;
+ *out = new;
+ rc = 0;
+ }
+ EXIT;
+
+_out:
+ if (rc) {
+ OBD_FREE(new, size);
+ size = rc;
+ }
+ return size;
+}
+EXPORT_SYMBOL(lustre_posix_acl_xattr_filter);
+
+/*
+ * Release the posix ACL space.
+ */
+void lustre_posix_acl_xattr_free(posix_acl_xattr_header *header, int size)
+{
+ OBD_FREE(header, size);
+}
+EXPORT_SYMBOL(lustre_posix_acl_xattr_free);
+
+/*
+ * Release the extended ACL space.
+ */
+void lustre_ext_acl_xattr_free(ext_acl_xattr_header *header)
+{
+ OBD_FREE(header, CFS_ACL_XATTR_SIZE(le32_to_cpu(header->a_count), \
+ ext_acl_xattr));
+}
+EXPORT_SYMBOL(lustre_ext_acl_xattr_free);
+
+static ext_acl_xattr_entry *
+lustre_ext_acl_xattr_search(ext_acl_xattr_header *header,
+ posix_acl_xattr_entry *entry, int *pos)
+{
+ int once, start, end, i, j, count = le32_to_cpu(header->a_count);
+
+ once = 0;
+ start = *pos;
+ end = count;
+
+again:
+ for (i = start; i < end; i++) {
+ if (header->a_entries[i].e_tag == entry->e_tag &&
+ header->a_entries[i].e_id == entry->e_id) {
+ j = i;
+ if (++i >= count)
+ i = 0;
+ *pos = i;
+ return &header->a_entries[j];
+ }
+ }
+
+ if (!once) {
+ once = 1;
+ start = 0;
+ end = *pos;
+ goto again;
+ }
+
+ return NULL;
+}
+
+/*
+ * Merge the posix ACL and the extended ACL into new posix ACL.
+ */
+int lustre_acl_xattr_merge2posix(posix_acl_xattr_header *posix_header, int size,
+ ext_acl_xattr_header *ext_header,
+ posix_acl_xattr_header **out)
+{
+ int posix_count, posix_size, i, j;
+ int ext_count = le32_to_cpu(ext_header->a_count), pos = 0, rc = 0;
+ posix_acl_xattr_entry pe = {ACL_MASK, 0, ACL_UNDEFINED_ID};
+ posix_acl_xattr_header *new;
+ ext_acl_xattr_entry *ee, ae;
+ ENTRY;
+
+ lustre_posix_acl_cpu_to_le(&pe, &pe);
+ ee = lustre_ext_acl_xattr_search(ext_header, &pe, &pos);
+ if (ee == NULL || le32_to_cpu(ee->e_stat) == ES_DEL) {
+ /* there are only base ACL entries at most. */
+ posix_count = 3;
+ posix_size = CFS_ACL_XATTR_SIZE(posix_count, posix_acl_xattr);
+ OBD_ALLOC(new, posix_size);
+ if (unlikely(new == NULL))
+ RETURN(-ENOMEM);
+
+ new->a_version = cpu_to_le32(CFS_ACL_XATTR_VERSION);
+ for (i = 0, j = 0; i < ext_count; i++) {
+ lustre_ext_acl_le_to_cpu(&ae,
+ &ext_header->a_entries[i]);
+ switch (ae.e_tag) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_OTHER:
+ if (ae.e_id != ACL_UNDEFINED_ID)
+ GOTO(_out, rc = -EIO);
+
+ if (ae.e_stat != ES_DEL) {
+ new->a_entries[j].e_tag =
+ ext_header->a_entries[i].e_tag;
+ new->a_entries[j].e_perm =
+ ext_header->a_entries[i].e_perm;
+ new->a_entries[j++].e_id =
+ ext_header->a_entries[i].e_id;
+ }
+ break;
+ case ACL_MASK:
+ case ACL_USER:
+ case ACL_GROUP:
+ if (ae.e_stat == ES_DEL)
+ break;
+ default:
+ GOTO(_out, rc = -EIO);
+ }
+ }
+ } else {
+ /* maybe there are valid ACL_USER or ACL_GROUP entries in the
+ * original server-side ACL, they are regarded as ES_UNC stat.*/
+ int ori_posix_count;
+
+ if (unlikely(size < 0))
+ RETURN(-EINVAL);
+ else if (!size)
+ ori_posix_count = 0;
+ else
+ ori_posix_count =
+ CFS_ACL_XATTR_COUNT(size, posix_acl_xattr);
+ posix_count = ori_posix_count + ext_count;
+ posix_size =
+ CFS_ACL_XATTR_SIZE(posix_count, posix_acl_xattr);
+ OBD_ALLOC(new, posix_size);
+ if (unlikely(new == NULL))
+ RETURN(-ENOMEM);
+
+ new->a_version = cpu_to_le32(CFS_ACL_XATTR_VERSION);
+ /* 1. process the unchanged ACL entries
+ * in the original server-side ACL. */
+ pos = 0;
+ for (i = 0, j = 0; i < ori_posix_count; i++) {
+ ee = lustre_ext_acl_xattr_search(ext_header,
+ &posix_header->a_entries[i], &pos);
+ if (ee == NULL)
+ memcpy(&new->a_entries[j++],
+ &posix_header->a_entries[i],
+ sizeof(posix_acl_xattr_entry));
+ }
+
+ /* 2. process the non-deleted entries
+ * from client-side extended ACL. */
+ for (i = 0; i < ext_count; i++) {
+ if (le16_to_cpu(ext_header->a_entries[i].e_stat) !=
+ ES_DEL) {
+ new->a_entries[j].e_tag =
+ ext_header->a_entries[i].e_tag;
+ new->a_entries[j].e_perm =
+ ext_header->a_entries[i].e_perm;
+ new->a_entries[j++].e_id =
+ ext_header->a_entries[i].e_id;
+ }
+ }
+ }
+
+ /* free unused space. */
+ rc = lustre_posix_acl_xattr_reduce_space(&new, posix_count, j);
+ if (rc >= 0) {
+ posix_size = rc;
+ *out = new;
+ rc = 0;
+ }
+ EXIT;
+
+_out:
+ if (rc) {
+ OBD_FREE(new, posix_size);
+ posix_size = rc;
+ }
+ return posix_size;
+}
+EXPORT_SYMBOL(lustre_acl_xattr_merge2posix);
+
+/*
+ * Merge the posix ACL and the extended ACL into new extended ACL.
+ */
+ext_acl_xattr_header *
+lustre_acl_xattr_merge2ext(posix_acl_xattr_header *posix_header, int size,
+ ext_acl_xattr_header *ext_header)
+{
+ int ori_ext_count, posix_count, ext_count, ext_size;
+ int i, j, pos = 0, rc = 0;
+ posix_acl_xattr_entry pae;
+ ext_acl_xattr_header *new;
+ ext_acl_xattr_entry *ee, eae;
+ ENTRY;
+
+ if (unlikely(size < 0))
+ RETURN(ERR_PTR(-EINVAL));
+ else if (!size)
+ posix_count = 0;
+ else
+ posix_count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr);
+ ori_ext_count = le32_to_cpu(ext_header->a_count);
+ ext_count = posix_count + ori_ext_count;
+ ext_size = CFS_ACL_XATTR_SIZE(ext_count, ext_acl_xattr);
+
+ OBD_ALLOC(new, ext_size);
+ if (unlikely(new == NULL))
+ RETURN(ERR_PTR(-ENOMEM));
+
+ for (i = 0, j = 0; i < posix_count; i++) {
+ lustre_posix_acl_le_to_cpu(&pae, &posix_header->a_entries[i]);
+ switch (pae.e_tag) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ if (pae.e_id != ACL_UNDEFINED_ID)
+ GOTO(out, rc = -EIO);
+ case ACL_USER:
+ /* ignore "nobody" entry. */
+ if (pae.e_id == NOBODY_UID)
+ break;
+
+ new->a_entries[j].e_tag =
+ posix_header->a_entries[i].e_tag;
+ new->a_entries[j].e_perm =
+ posix_header->a_entries[i].e_perm;
+ new->a_entries[j].e_id =
+ posix_header->a_entries[i].e_id;
+ ee = lustre_ext_acl_xattr_search(ext_header,
+ &posix_header->a_entries[i], &pos);
+ if (ee) {
+ if (posix_header->a_entries[i].e_perm !=
+ ee->e_perm)
+ /* entry modified. */
+ ee->e_stat =
+ new->a_entries[j++].e_stat =
+ cpu_to_le32(ES_MOD);
+ else
+ /* entry unchanged. */
+ ee->e_stat =
+ new->a_entries[j++].e_stat =
+ cpu_to_le32(ES_UNC);
+ } else {
+ /* new entry. */
+ new->a_entries[j++].e_stat =
+ cpu_to_le32(ES_ADD);
+ }
+ break;
+ case ACL_GROUP:
+ /* ignore "nobody" entry. */
+ if (pae.e_id == NOBODY_GID)
+ break;
+ new->a_entries[j].e_tag =
+ posix_header->a_entries[i].e_tag;
+ new->a_entries[j].e_perm =
+ posix_header->a_entries[i].e_perm;
+ new->a_entries[j].e_id =
+ posix_header->a_entries[i].e_id;
+ ee = lustre_ext_acl_xattr_search(ext_header,
+ &posix_header->a_entries[i], &pos);
+ if (ee) {
+ if (posix_header->a_entries[i].e_perm !=
+ ee->e_perm)
+ /* entry modified. */
+ ee->e_stat =
+ new->a_entries[j++].e_stat =
+ cpu_to_le32(ES_MOD);
+ else
+ /* entry unchanged. */
+ ee->e_stat =
+ new->a_entries[j++].e_stat =
+ cpu_to_le32(ES_UNC);
+ } else {
+ /* new entry. */
+ new->a_entries[j++].e_stat =
+ cpu_to_le32(ES_ADD);
+ }
+ break;
+ default:
+ GOTO(out, rc = -EIO);
+ }
+ }
+
+ /* process deleted entries. */
+ for (i = 0; i < ori_ext_count; i++) {
+ lustre_ext_acl_le_to_cpu(&eae, &ext_header->a_entries[i]);
+ if (eae.e_stat == ES_UNK) {
+ /* ignore "nobody" entry. */
+ if ((eae.e_tag == ACL_USER && eae.e_id == NOBODY_UID) ||
+ (eae.e_tag == ACL_GROUP && eae.e_id == NOBODY_GID))
+ continue;
+
+ new->a_entries[j].e_tag =
+ ext_header->a_entries[i].e_tag;
+ new->a_entries[j].e_perm =
+ ext_header->a_entries[i].e_perm;
+ new->a_entries[j].e_id = ext_header->a_entries[i].e_id;
+ new->a_entries[j++].e_stat = cpu_to_le32(ES_DEL);
+ }
+ }
+
+ new->a_count = cpu_to_le32(j);
+ /* free unused space. */
+ rc = lustre_ext_acl_xattr_reduce_space(&new, ext_count);
+ EXIT;
+
+out:
+ if (rc) {
+ OBD_FREE(new, ext_size);
+ new = ERR_PTR(rc);
+ }
+ return new;
+}
+EXPORT_SYMBOL(lustre_acl_xattr_merge2ext);
+
+#endif
diff --git a/drivers/staging/lustre/lustre/obdclass/capa.c b/drivers/staging/lustre/lustre/obdclass/capa.c
new file mode 100644
index 000000000000..3e532f5106e4
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/capa.c
@@ -0,0 +1,401 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/capa.c
+ *
+ * Lustre Capability Hash Management
+ *
+ * Author: Lai Siyao<lsy@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <asm/unistd.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <obd_class.h>
+#include <lustre_debug.h>
+#include <lustre/lustre_idl.h>
+
+#include <linux/list.h>
+#include <lustre_capa.h>
+
+#define NR_CAPAHASH 32
+#define CAPA_HASH_SIZE 3000 /* for MDS & OSS */
+
+struct kmem_cache *capa_cachep = NULL;
+
+/* lock for capa hash/capa_list/fo_capa_keys */
+DEFINE_SPINLOCK(capa_lock);
+
+struct list_head capa_list[CAPA_SITE_MAX];
+
+static struct capa_hmac_alg capa_hmac_algs[] = {
+ DEF_CAPA_HMAC_ALG("sha1", SHA1, 20, 20),
+};
+/* capa count */
+int capa_count[CAPA_SITE_MAX] = { 0, };
+
+EXPORT_SYMBOL(capa_cachep);
+EXPORT_SYMBOL(capa_list);
+EXPORT_SYMBOL(capa_lock);
+EXPORT_SYMBOL(capa_count);
+
+struct hlist_head *init_capa_hash(void)
+{
+ struct hlist_head *hash;
+ int nr_hash, i;
+
+ OBD_ALLOC(hash, PAGE_CACHE_SIZE);
+ if (!hash)
+ return NULL;
+
+ nr_hash = PAGE_CACHE_SIZE / sizeof(struct hlist_head);
+ LASSERT(nr_hash > NR_CAPAHASH);
+
+ for (i = 0; i < NR_CAPAHASH; i++)
+ INIT_HLIST_HEAD(hash + i);
+ return hash;
+}
+EXPORT_SYMBOL(init_capa_hash);
+
+static inline int capa_on_server(struct obd_capa *ocapa)
+{
+ return ocapa->c_site == CAPA_SITE_SERVER;
+}
+
+static inline void capa_delete(struct obd_capa *ocapa)
+{
+ LASSERT(capa_on_server(ocapa));
+ hlist_del_init(&ocapa->u.tgt.c_hash);
+ list_del_init(&ocapa->c_list);
+ capa_count[ocapa->c_site]--;
+ /* release the ref when alloc */
+ capa_put(ocapa);
+}
+
+void cleanup_capa_hash(struct hlist_head *hash)
+{
+ int i;
+ struct hlist_node *next;
+ struct obd_capa *oc;
+
+ spin_lock(&capa_lock);
+ for (i = 0; i < NR_CAPAHASH; i++) {
+ hlist_for_each_entry_safe(oc, next, hash + i,
+ u.tgt.c_hash)
+ capa_delete(oc);
+ }
+ spin_unlock(&capa_lock);
+
+ OBD_FREE(hash, PAGE_CACHE_SIZE);
+}
+EXPORT_SYMBOL(cleanup_capa_hash);
+
+static inline int capa_hashfn(struct lu_fid *fid)
+{
+ return (fid_oid(fid) ^ fid_ver(fid)) *
+ (unsigned long)(fid_seq(fid) + 1) % NR_CAPAHASH;
+}
+
+/* capa renewal time check is earlier than that on client, which is to prevent
+ * client renew right after obtaining it. */
+static inline int capa_is_to_expire(struct obd_capa *oc)
+{
+ return cfs_time_before(cfs_time_sub(oc->c_expiry,
+ cfs_time_seconds(oc->c_capa.lc_timeout)*2/3),
+ cfs_time_current());
+}
+
+static struct obd_capa *find_capa(struct lustre_capa *capa,
+ struct hlist_head *head, int alive)
+{
+ struct obd_capa *ocapa;
+ int len = alive ? offsetof(struct lustre_capa, lc_keyid):sizeof(*capa);
+
+ hlist_for_each_entry(ocapa, head, u.tgt.c_hash) {
+ if (memcmp(&ocapa->c_capa, capa, len))
+ continue;
+ /* don't return one that will expire soon in this case */
+ if (alive && capa_is_to_expire(ocapa))
+ continue;
+
+ LASSERT(capa_on_server(ocapa));
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found");
+ return ocapa;
+ }
+
+ return NULL;
+}
+
+#define LRU_CAPA_DELETE_COUNT 12
+static inline void capa_delete_lru(struct list_head *head)
+{
+ struct obd_capa *ocapa;
+ struct list_head *node = head->next;
+ int count = 0;
+
+ /* free LRU_CAPA_DELETE_COUNT unused capa from head */
+ while (count++ < LRU_CAPA_DELETE_COUNT) {
+ ocapa = list_entry(node, struct obd_capa, c_list);
+ node = node->next;
+ if (atomic_read(&ocapa->c_refc))
+ continue;
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free lru");
+ capa_delete(ocapa);
+ }
+}
+
+/* add or update */
+struct obd_capa *capa_add(struct hlist_head *hash, struct lustre_capa *capa)
+{
+ struct hlist_head *head = hash + capa_hashfn(&capa->lc_fid);
+ struct obd_capa *ocapa, *old = NULL;
+ struct list_head *list = &capa_list[CAPA_SITE_SERVER];
+
+ ocapa = alloc_capa(CAPA_SITE_SERVER);
+ if (IS_ERR(ocapa))
+ return NULL;
+
+ spin_lock(&capa_lock);
+ old = find_capa(capa, head, 0);
+ if (!old) {
+ ocapa->c_capa = *capa;
+ set_capa_expiry(ocapa);
+ hlist_add_head(&ocapa->u.tgt.c_hash, head);
+ list_add_tail(&ocapa->c_list, list);
+ capa_get(ocapa);
+ capa_count[CAPA_SITE_SERVER]++;
+ if (capa_count[CAPA_SITE_SERVER] > CAPA_HASH_SIZE)
+ capa_delete_lru(list);
+ spin_unlock(&capa_lock);
+ return ocapa;
+ } else {
+ capa_get(old);
+ spin_unlock(&capa_lock);
+ capa_put(ocapa);
+ return old;
+ }
+}
+EXPORT_SYMBOL(capa_add);
+
+struct obd_capa *capa_lookup(struct hlist_head *hash, struct lustre_capa *capa,
+ int alive)
+{
+ struct obd_capa *ocapa;
+
+ spin_lock(&capa_lock);
+ ocapa = find_capa(capa, hash + capa_hashfn(&capa->lc_fid), alive);
+ if (ocapa) {
+ list_move_tail(&ocapa->c_list,
+ &capa_list[CAPA_SITE_SERVER]);
+ capa_get(ocapa);
+ }
+ spin_unlock(&capa_lock);
+
+ return ocapa;
+}
+EXPORT_SYMBOL(capa_lookup);
+
+int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key)
+{
+ struct ll_crypto_hash *tfm;
+ struct capa_hmac_alg *alg;
+ int keylen;
+ struct scatterlist sl;
+
+ if (capa_alg(capa) != CAPA_HMAC_ALG_SHA1) {
+ CERROR("unknown capability hmac algorithm!\n");
+ return -EFAULT;
+ }
+
+ alg = &capa_hmac_algs[capa_alg(capa)];
+
+ tfm = ll_crypto_alloc_hash(alg->ha_name, 0, 0);
+ if (!tfm) {
+ CERROR("crypto_alloc_tfm failed, check whether your kernel"
+ "has crypto support!\n");
+ return -ENOMEM;
+ }
+ keylen = alg->ha_keylen;
+
+ sg_set_page(&sl, virt_to_page(capa),
+ offsetof(struct lustre_capa, lc_hmac),
+ (unsigned long)(capa) % PAGE_CACHE_SIZE);
+
+ ll_crypto_hmac(tfm, key, &keylen, &sl, sl.length, hmac);
+ ll_crypto_free_hash(tfm);
+
+ return 0;
+}
+EXPORT_SYMBOL(capa_hmac);
+
+int capa_encrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen)
+{
+ struct ll_crypto_cipher *tfm;
+ struct scatterlist sd;
+ struct scatterlist ss;
+ struct blkcipher_desc desc;
+ unsigned int min;
+ int rc;
+ char alg[CRYPTO_MAX_ALG_NAME+1] = "aes";
+ ENTRY;
+
+ /* passing "aes" in a variable instead of a constant string keeps gcc
+ * 4.3.2 happy */
+ tfm = ll_crypto_alloc_blkcipher(alg, 0, 0 );
+ if (IS_ERR(tfm)) {
+ CERROR("failed to load transform for aes\n");
+ RETURN(PTR_ERR(tfm));
+ }
+
+ min = ll_crypto_tfm_alg_min_keysize(tfm);
+ if (keylen < min) {
+ CERROR("keylen at least %d bits for aes\n", min * 8);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ rc = ll_crypto_blkcipher_setkey(tfm, key, min);
+ if (rc) {
+ CERROR("failed to setting key for aes\n");
+ GOTO(out, rc);
+ }
+
+ sg_set_page(&sd, virt_to_page(d), 16,
+ (unsigned long)(d) % PAGE_CACHE_SIZE);
+
+ sg_set_page(&ss, virt_to_page(s), 16,
+ (unsigned long)(s) % PAGE_CACHE_SIZE);
+ desc.tfm = tfm;
+ desc.info = NULL;
+ desc.flags = 0;
+ rc = ll_crypto_blkcipher_encrypt(&desc, &sd, &ss, 16);
+ if (rc) {
+ CERROR("failed to encrypt for aes\n");
+ GOTO(out, rc);
+ }
+
+ EXIT;
+
+out:
+ ll_crypto_free_blkcipher(tfm);
+ return rc;
+}
+EXPORT_SYMBOL(capa_encrypt_id);
+
+int capa_decrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen)
+{
+ struct ll_crypto_cipher *tfm;
+ struct scatterlist sd;
+ struct scatterlist ss;
+ struct blkcipher_desc desc;
+ unsigned int min;
+ int rc;
+ char alg[CRYPTO_MAX_ALG_NAME+1] = "aes";
+ ENTRY;
+
+ /* passing "aes" in a variable instead of a constant string keeps gcc
+ * 4.3.2 happy */
+ tfm = ll_crypto_alloc_blkcipher(alg, 0, 0 );
+ if (IS_ERR(tfm)) {
+ CERROR("failed to load transform for aes\n");
+ RETURN(PTR_ERR(tfm));
+ }
+
+ min = ll_crypto_tfm_alg_min_keysize(tfm);
+ if (keylen < min) {
+ CERROR("keylen at least %d bits for aes\n", min * 8);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ rc = ll_crypto_blkcipher_setkey(tfm, key, min);
+ if (rc) {
+ CERROR("failed to setting key for aes\n");
+ GOTO(out, rc);
+ }
+
+ sg_set_page(&sd, virt_to_page(d), 16,
+ (unsigned long)(d) % PAGE_CACHE_SIZE);
+
+ sg_set_page(&ss, virt_to_page(s), 16,
+ (unsigned long)(s) % PAGE_CACHE_SIZE);
+
+ desc.tfm = tfm;
+ desc.info = NULL;
+ desc.flags = 0;
+ rc = ll_crypto_blkcipher_decrypt(&desc, &sd, &ss, 16);
+ if (rc) {
+ CERROR("failed to decrypt for aes\n");
+ GOTO(out, rc);
+ }
+
+ EXIT;
+
+out:
+ ll_crypto_free_blkcipher(tfm);
+ return rc;
+}
+EXPORT_SYMBOL(capa_decrypt_id);
+
+void capa_cpy(void *capa, struct obd_capa *ocapa)
+{
+ spin_lock(&ocapa->c_lock);
+ *(struct lustre_capa *)capa = ocapa->c_capa;
+ spin_unlock(&ocapa->c_lock);
+}
+EXPORT_SYMBOL(capa_cpy);
+
+void _debug_capa(struct lustre_capa *c,
+ struct libcfs_debug_msg_data *msgdata,
+ const char *fmt, ... )
+{
+ va_list args;
+ va_start(args, fmt);
+ libcfs_debug_vmsg2(msgdata, fmt, args,
+ " capability@%p fid "DFID" opc "LPX64" uid "LPU64
+ " gid "LPU64" flags %u alg %d keyid %u timeout %u "
+ "expiry %u\n", c, PFID(capa_fid(c)), capa_opc(c),
+ capa_uid(c), capa_gid(c), capa_flags(c),
+ capa_alg(c), capa_keyid(c), capa_timeout(c),
+ capa_expiry(c));
+ va_end(args);
+}
+EXPORT_SYMBOL(_debug_capa);
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_internal.h b/drivers/staging/lustre/lustre/obdclass/cl_internal.h
new file mode 100644
index 000000000000..7eb0ad7b3644
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/cl_internal.h
@@ -0,0 +1,121 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Internal cl interfaces.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+#ifndef _CL_INTERNAL_H
+#define _CL_INTERNAL_H
+
+#define CLT_PVEC_SIZE (14)
+
+/**
+ * Possible levels of the nesting. Currently this is 2: there are "top"
+ * entities (files, extent locks), and "sub" entities (stripes and stripe
+ * locks). This is used only for debugging counters right now.
+ */
+enum clt_nesting_level {
+ CNL_TOP,
+ CNL_SUB,
+ CNL_NR
+};
+
+/**
+ * Counters used to check correctness of cl_lock interface usage.
+ */
+struct cl_thread_counters {
+ /**
+ * Number of outstanding calls to cl_lock_mutex_get() made by the
+ * current thread. For debugging.
+ */
+ int ctc_nr_locks_locked;
+ /** List of locked locks. */
+ struct lu_ref ctc_locks_locked;
+ /** Number of outstanding holds on locks. */
+ int ctc_nr_held;
+ /** Number of outstanding uses on locks. */
+ int ctc_nr_used;
+ /** Number of held extent locks. */
+ int ctc_nr_locks_acquired;
+};
+
+/**
+ * Thread local state internal for generic cl-code.
+ */
+struct cl_thread_info {
+ /*
+ * Common fields.
+ */
+ struct cl_io clt_io;
+ struct cl_2queue clt_queue;
+
+ /*
+ * Fields used by cl_lock.c
+ */
+ struct cl_lock_descr clt_descr;
+ struct cl_page_list clt_list;
+ /**
+ * Counters for every level of lock nesting.
+ */
+ struct cl_thread_counters clt_counters[CNL_NR];
+ /** @} debugging */
+
+ /*
+ * Fields used by cl_page.c
+ */
+ struct cl_page *clt_pvec[CLT_PVEC_SIZE];
+
+ /*
+ * Fields used by cl_io.c
+ */
+ /**
+ * Pointer to the topmost ongoing IO in this thread.
+ */
+ struct cl_io *clt_current_io;
+ /**
+ * Used for submitting a sync io.
+ */
+ struct cl_sync_io clt_anchor;
+ /**
+ * Fields used by cl_lock_discard_pages().
+ */
+ pgoff_t clt_next_index;
+ pgoff_t clt_fn_index; /* first non-overlapped index */
+};
+
+struct cl_thread_info *cl_env_info(const struct lu_env *env);
+
+#endif /* _CL_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c
new file mode 100644
index 000000000000..75c9be8875e0
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c
@@ -0,0 +1,1753 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Client IO.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_fid.h>
+#include <linux/list.h>
+#include <cl_object.h>
+#include "cl_internal.h"
+
+/*****************************************************************************
+ *
+ * cl_io interface.
+ *
+ */
+
+#define cl_io_for_each(slice, io) \
+ list_for_each_entry((slice), &io->ci_layers, cis_linkage)
+#define cl_io_for_each_reverse(slice, io) \
+ list_for_each_entry_reverse((slice), &io->ci_layers, cis_linkage)
+
+static inline int cl_io_type_is_valid(enum cl_io_type type)
+{
+ return CIT_READ <= type && type < CIT_OP_NR;
+}
+
+static inline int cl_io_is_loopable(const struct cl_io *io)
+{
+ return cl_io_type_is_valid(io->ci_type) && io->ci_type != CIT_MISC;
+}
+
+/**
+ * Returns true iff there is an IO ongoing in the given environment.
+ */
+int cl_io_is_going(const struct lu_env *env)
+{
+ return cl_env_info(env)->clt_current_io != NULL;
+}
+EXPORT_SYMBOL(cl_io_is_going);
+
+/**
+ * cl_io invariant that holds at all times when exported cl_io_*() functions
+ * are entered and left.
+ */
+static int cl_io_invariant(const struct cl_io *io)
+{
+ struct cl_io *up;
+
+ up = io->ci_parent;
+ return
+ /*
+ * io can own pages only when it is ongoing. Sub-io might
+ * still be in CIS_LOCKED state when top-io is in
+ * CIS_IO_GOING.
+ */
+ ergo(io->ci_owned_nr > 0, io->ci_state == CIS_IO_GOING ||
+ (io->ci_state == CIS_LOCKED && up != NULL));
+}
+
+/**
+ * Finalize \a io, by calling cl_io_operations::cio_fini() bottom-to-top.
+ */
+void cl_io_fini(const struct lu_env *env, struct cl_io *io)
+{
+ struct cl_io_slice *slice;
+ struct cl_thread_info *info;
+
+ LINVRNT(cl_io_type_is_valid(io->ci_type));
+ LINVRNT(cl_io_invariant(io));
+ ENTRY;
+
+ while (!list_empty(&io->ci_layers)) {
+ slice = container_of(io->ci_layers.prev, struct cl_io_slice,
+ cis_linkage);
+ list_del_init(&slice->cis_linkage);
+ if (slice->cis_iop->op[io->ci_type].cio_fini != NULL)
+ slice->cis_iop->op[io->ci_type].cio_fini(env, slice);
+ /*
+ * Invalidate slice to catch use after free. This assumes that
+ * slices are allocated within session and can be touched
+ * after ->cio_fini() returns.
+ */
+ slice->cis_io = NULL;
+ }
+ io->ci_state = CIS_FINI;
+ info = cl_env_info(env);
+ if (info->clt_current_io == io)
+ info->clt_current_io = NULL;
+
+ /* sanity check for layout change */
+ switch(io->ci_type) {
+ case CIT_READ:
+ case CIT_WRITE:
+ break;
+ case CIT_FAULT:
+ case CIT_FSYNC:
+ LASSERT(!io->ci_need_restart);
+ break;
+ case CIT_SETATTR:
+ case CIT_MISC:
+ /* Check ignore layout change conf */
+ LASSERT(ergo(io->ci_ignore_layout || !io->ci_verify_layout,
+ !io->ci_need_restart));
+ break;
+ default:
+ LBUG();
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(cl_io_fini);
+
+static int cl_io_init0(const struct lu_env *env, struct cl_io *io,
+ enum cl_io_type iot, struct cl_object *obj)
+{
+ struct cl_object *scan;
+ int result;
+
+ LINVRNT(io->ci_state == CIS_ZERO || io->ci_state == CIS_FINI);
+ LINVRNT(cl_io_type_is_valid(iot));
+ LINVRNT(cl_io_invariant(io));
+ ENTRY;
+
+ io->ci_type = iot;
+ INIT_LIST_HEAD(&io->ci_lockset.cls_todo);
+ INIT_LIST_HEAD(&io->ci_lockset.cls_curr);
+ INIT_LIST_HEAD(&io->ci_lockset.cls_done);
+ INIT_LIST_HEAD(&io->ci_layers);
+
+ result = 0;
+ cl_object_for_each(scan, obj) {
+ if (scan->co_ops->coo_io_init != NULL) {
+ result = scan->co_ops->coo_io_init(env, scan, io);
+ if (result != 0)
+ break;
+ }
+ }
+ if (result == 0)
+ io->ci_state = CIS_INIT;
+ RETURN(result);
+}
+
+/**
+ * Initialize sub-io, by calling cl_io_operations::cio_init() top-to-bottom.
+ *
+ * \pre obj != cl_object_top(obj)
+ */
+int cl_io_sub_init(const struct lu_env *env, struct cl_io *io,
+ enum cl_io_type iot, struct cl_object *obj)
+{
+ struct cl_thread_info *info = cl_env_info(env);
+
+ LASSERT(obj != cl_object_top(obj));
+ if (info->clt_current_io == NULL)
+ info->clt_current_io = io;
+ return cl_io_init0(env, io, iot, obj);
+}
+EXPORT_SYMBOL(cl_io_sub_init);
+
+/**
+ * Initialize \a io, by calling cl_io_operations::cio_init() top-to-bottom.
+ *
+ * Caller has to call cl_io_fini() after a call to cl_io_init(), no matter
+ * what the latter returned.
+ *
+ * \pre obj == cl_object_top(obj)
+ * \pre cl_io_type_is_valid(iot)
+ * \post cl_io_type_is_valid(io->ci_type) && io->ci_type == iot
+ */
+int cl_io_init(const struct lu_env *env, struct cl_io *io,
+ enum cl_io_type iot, struct cl_object *obj)
+{
+ struct cl_thread_info *info = cl_env_info(env);
+
+ LASSERT(obj == cl_object_top(obj));
+ LASSERT(info->clt_current_io == NULL);
+
+ info->clt_current_io = io;
+ return cl_io_init0(env, io, iot, obj);
+}
+EXPORT_SYMBOL(cl_io_init);
+
+/**
+ * Initialize read or write io.
+ *
+ * \pre iot == CIT_READ || iot == CIT_WRITE
+ */
+int cl_io_rw_init(const struct lu_env *env, struct cl_io *io,
+ enum cl_io_type iot, loff_t pos, size_t count)
+{
+ LINVRNT(iot == CIT_READ || iot == CIT_WRITE);
+ LINVRNT(io->ci_obj != NULL);
+ ENTRY;
+
+ LU_OBJECT_HEADER(D_VFSTRACE, env, &io->ci_obj->co_lu,
+ "io range: %u ["LPU64", "LPU64") %u %u\n",
+ iot, (__u64)pos, (__u64)pos + count,
+ io->u.ci_rw.crw_nonblock, io->u.ci_wr.wr_append);
+ io->u.ci_rw.crw_pos = pos;
+ io->u.ci_rw.crw_count = count;
+ RETURN(cl_io_init(env, io, iot, io->ci_obj));
+}
+EXPORT_SYMBOL(cl_io_rw_init);
+
+static inline const struct lu_fid *
+cl_lock_descr_fid(const struct cl_lock_descr *descr)
+{
+ return lu_object_fid(&descr->cld_obj->co_lu);
+}
+
+static int cl_lock_descr_sort(const struct cl_lock_descr *d0,
+ const struct cl_lock_descr *d1)
+{
+ return lu_fid_cmp(cl_lock_descr_fid(d0), cl_lock_descr_fid(d1)) ?:
+ __diff_normalize(d0->cld_start, d1->cld_start);
+}
+
+static int cl_lock_descr_cmp(const struct cl_lock_descr *d0,
+ const struct cl_lock_descr *d1)
+{
+ int ret;
+
+ ret = lu_fid_cmp(cl_lock_descr_fid(d0), cl_lock_descr_fid(d1));
+ if (ret)
+ return ret;
+ if (d0->cld_end < d1->cld_start)
+ return -1;
+ if (d0->cld_start > d0->cld_end)
+ return 1;
+ return 0;
+}
+
+static void cl_lock_descr_merge(struct cl_lock_descr *d0,
+ const struct cl_lock_descr *d1)
+{
+ d0->cld_start = min(d0->cld_start, d1->cld_start);
+ d0->cld_end = max(d0->cld_end, d1->cld_end);
+
+ if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
+ d0->cld_mode = CLM_WRITE;
+
+ if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
+ d0->cld_mode = CLM_GROUP;
+}
+
+/*
+ * Sort locks in lexicographical order of their (fid, start-offset) pairs.
+ */
+static void cl_io_locks_sort(struct cl_io *io)
+{
+ int done = 0;
+
+ ENTRY;
+ /* hidden treasure: bubble sort for now. */
+ do {
+ struct cl_io_lock_link *curr;
+ struct cl_io_lock_link *prev;
+ struct cl_io_lock_link *temp;
+
+ done = 1;
+ prev = NULL;
+
+ list_for_each_entry_safe(curr, temp,
+ &io->ci_lockset.cls_todo,
+ cill_linkage) {
+ if (prev != NULL) {
+ switch (cl_lock_descr_sort(&prev->cill_descr,
+ &curr->cill_descr)) {
+ case 0:
+ /*
+ * IMPOSSIBLE: Identical locks are
+ * already removed at
+ * this point.
+ */
+ default:
+ LBUG();
+ case +1:
+ list_move_tail(&curr->cill_linkage,
+ &prev->cill_linkage);
+ done = 0;
+ continue; /* don't change prev: it's
+ * still "previous" */
+ case -1: /* already in order */
+ break;
+ }
+ }
+ prev = curr;
+ }
+ } while (!done);
+ EXIT;
+}
+
+/**
+ * Check whether \a queue contains locks matching \a need.
+ *
+ * \retval +ve there is a matching lock in the \a queue
+ * \retval 0 there are no matching locks in the \a queue
+ */
+int cl_queue_match(const struct list_head *queue,
+ const struct cl_lock_descr *need)
+{
+ struct cl_io_lock_link *scan;
+
+ ENTRY;
+ list_for_each_entry(scan, queue, cill_linkage) {
+ if (cl_lock_descr_match(&scan->cill_descr, need))
+ RETURN(+1);
+ }
+ RETURN(0);
+}
+EXPORT_SYMBOL(cl_queue_match);
+
+static int cl_queue_merge(const struct list_head *queue,
+ const struct cl_lock_descr *need)
+{
+ struct cl_io_lock_link *scan;
+
+ ENTRY;
+ list_for_each_entry(scan, queue, cill_linkage) {
+ if (cl_lock_descr_cmp(&scan->cill_descr, need))
+ continue;
+ cl_lock_descr_merge(&scan->cill_descr, need);
+ CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
+ scan->cill_descr.cld_mode, scan->cill_descr.cld_start,
+ scan->cill_descr.cld_end);
+ RETURN(+1);
+ }
+ RETURN(0);
+
+}
+
+static int cl_lockset_match(const struct cl_lockset *set,
+ const struct cl_lock_descr *need)
+{
+ return cl_queue_match(&set->cls_curr, need) ||
+ cl_queue_match(&set->cls_done, need);
+}
+
+static int cl_lockset_merge(const struct cl_lockset *set,
+ const struct cl_lock_descr *need)
+{
+ return cl_queue_merge(&set->cls_todo, need) ||
+ cl_lockset_match(set, need);
+}
+
+static int cl_lockset_lock_one(const struct lu_env *env,
+ struct cl_io *io, struct cl_lockset *set,
+ struct cl_io_lock_link *link)
+{
+ struct cl_lock *lock;
+ int result;
+
+ ENTRY;
+
+ lock = cl_lock_request(env, io, &link->cill_descr, "io", io);
+
+ if (!IS_ERR(lock)) {
+ link->cill_lock = lock;
+ list_move(&link->cill_linkage, &set->cls_curr);
+ if (!(link->cill_descr.cld_enq_flags & CEF_ASYNC)) {
+ result = cl_wait(env, lock);
+ if (result == 0)
+ list_move(&link->cill_linkage,
+ &set->cls_done);
+ } else
+ result = 0;
+ } else
+ result = PTR_ERR(lock);
+ RETURN(result);
+}
+
+static void cl_lock_link_fini(const struct lu_env *env, struct cl_io *io,
+ struct cl_io_lock_link *link)
+{
+ struct cl_lock *lock = link->cill_lock;
+
+ ENTRY;
+ list_del_init(&link->cill_linkage);
+ if (lock != NULL) {
+ cl_lock_release(env, lock, "io", io);
+ link->cill_lock = NULL;
+ }
+ if (link->cill_fini != NULL)
+ link->cill_fini(env, link);
+ EXIT;
+}
+
+static int cl_lockset_lock(const struct lu_env *env, struct cl_io *io,
+ struct cl_lockset *set)
+{
+ struct cl_io_lock_link *link;
+ struct cl_io_lock_link *temp;
+ struct cl_lock *lock;
+ int result;
+
+ ENTRY;
+ result = 0;
+ list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
+ if (!cl_lockset_match(set, &link->cill_descr)) {
+ /* XXX some locking to guarantee that locks aren't
+ * expanded in between. */
+ result = cl_lockset_lock_one(env, io, set, link);
+ if (result != 0)
+ break;
+ } else
+ cl_lock_link_fini(env, io, link);
+ }
+ if (result == 0) {
+ list_for_each_entry_safe(link, temp,
+ &set->cls_curr, cill_linkage) {
+ lock = link->cill_lock;
+ result = cl_wait(env, lock);
+ if (result == 0)
+ list_move(&link->cill_linkage,
+ &set->cls_done);
+ else
+ break;
+ }
+ }
+ RETURN(result);
+}
+
+/**
+ * Takes locks necessary for the current iteration of io.
+ *
+ * Calls cl_io_operations::cio_lock() top-to-bottom to collect locks required
+ * by layers for the current iteration. Then sort locks (to avoid dead-locks),
+ * and acquire them.
+ */
+int cl_io_lock(const struct lu_env *env, struct cl_io *io)
+{
+ const struct cl_io_slice *scan;
+ int result = 0;
+
+ LINVRNT(cl_io_is_loopable(io));
+ LINVRNT(io->ci_state == CIS_IT_STARTED);
+ LINVRNT(cl_io_invariant(io));
+
+ ENTRY;
+ cl_io_for_each(scan, io) {
+ if (scan->cis_iop->op[io->ci_type].cio_lock == NULL)
+ continue;
+ result = scan->cis_iop->op[io->ci_type].cio_lock(env, scan);
+ if (result != 0)
+ break;
+ }
+ if (result == 0) {
+ cl_io_locks_sort(io);
+ result = cl_lockset_lock(env, io, &io->ci_lockset);
+ }
+ if (result != 0)
+ cl_io_unlock(env, io);
+ else
+ io->ci_state = CIS_LOCKED;
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_io_lock);
+
+/**
+ * Release locks takes by io.
+ */
+void cl_io_unlock(const struct lu_env *env, struct cl_io *io)
+{
+ struct cl_lockset *set;
+ struct cl_io_lock_link *link;
+ struct cl_io_lock_link *temp;
+ const struct cl_io_slice *scan;
+
+ LASSERT(cl_io_is_loopable(io));
+ LASSERT(CIS_IT_STARTED <= io->ci_state && io->ci_state < CIS_UNLOCKED);
+ LINVRNT(cl_io_invariant(io));
+
+ ENTRY;
+ set = &io->ci_lockset;
+
+ list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage)
+ cl_lock_link_fini(env, io, link);
+
+ list_for_each_entry_safe(link, temp, &set->cls_curr, cill_linkage)
+ cl_lock_link_fini(env, io, link);
+
+ list_for_each_entry_safe(link, temp, &set->cls_done, cill_linkage) {
+ cl_unuse(env, link->cill_lock);
+ cl_lock_link_fini(env, io, link);
+ }
+ cl_io_for_each_reverse(scan, io) {
+ if (scan->cis_iop->op[io->ci_type].cio_unlock != NULL)
+ scan->cis_iop->op[io->ci_type].cio_unlock(env, scan);
+ }
+ io->ci_state = CIS_UNLOCKED;
+ LASSERT(!cl_env_info(env)->clt_counters[CNL_TOP].ctc_nr_locks_acquired);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_io_unlock);
+
+/**
+ * Prepares next iteration of io.
+ *
+ * Calls cl_io_operations::cio_iter_init() top-to-bottom. This exists to give
+ * layers a chance to modify io parameters, e.g., so that lov can restrict io
+ * to a single stripe.
+ */
+int cl_io_iter_init(const struct lu_env *env, struct cl_io *io)
+{
+ const struct cl_io_slice *scan;
+ int result;
+
+ LINVRNT(cl_io_is_loopable(io));
+ LINVRNT(io->ci_state == CIS_INIT || io->ci_state == CIS_IT_ENDED);
+ LINVRNT(cl_io_invariant(io));
+
+ ENTRY;
+ result = 0;
+ cl_io_for_each(scan, io) {
+ if (scan->cis_iop->op[io->ci_type].cio_iter_init == NULL)
+ continue;
+ result = scan->cis_iop->op[io->ci_type].cio_iter_init(env,
+ scan);
+ if (result != 0)
+ break;
+ }
+ if (result == 0)
+ io->ci_state = CIS_IT_STARTED;
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_io_iter_init);
+
+/**
+ * Finalizes io iteration.
+ *
+ * Calls cl_io_operations::cio_iter_fini() bottom-to-top.
+ */
+void cl_io_iter_fini(const struct lu_env *env, struct cl_io *io)
+{
+ const struct cl_io_slice *scan;
+
+ LINVRNT(cl_io_is_loopable(io));
+ LINVRNT(io->ci_state == CIS_UNLOCKED);
+ LINVRNT(cl_io_invariant(io));
+
+ ENTRY;
+ cl_io_for_each_reverse(scan, io) {
+ if (scan->cis_iop->op[io->ci_type].cio_iter_fini != NULL)
+ scan->cis_iop->op[io->ci_type].cio_iter_fini(env, scan);
+ }
+ io->ci_state = CIS_IT_ENDED;
+ EXIT;
+}
+EXPORT_SYMBOL(cl_io_iter_fini);
+
+/**
+ * Records that read or write io progressed \a nob bytes forward.
+ */
+void cl_io_rw_advance(const struct lu_env *env, struct cl_io *io, size_t nob)
+{
+ const struct cl_io_slice *scan;
+
+ LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE ||
+ nob == 0);
+ LINVRNT(cl_io_is_loopable(io));
+ LINVRNT(cl_io_invariant(io));
+
+ ENTRY;
+
+ io->u.ci_rw.crw_pos += nob;
+ io->u.ci_rw.crw_count -= nob;
+
+ /* layers have to be notified. */
+ cl_io_for_each_reverse(scan, io) {
+ if (scan->cis_iop->op[io->ci_type].cio_advance != NULL)
+ scan->cis_iop->op[io->ci_type].cio_advance(env, scan,
+ nob);
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(cl_io_rw_advance);
+
+/**
+ * Adds a lock to a lockset.
+ */
+int cl_io_lock_add(const struct lu_env *env, struct cl_io *io,
+ struct cl_io_lock_link *link)
+{
+ int result;
+
+ ENTRY;
+ if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr))
+ result = +1;
+ else {
+ list_add(&link->cill_linkage, &io->ci_lockset.cls_todo);
+ result = 0;
+ }
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_io_lock_add);
+
+static void cl_free_io_lock_link(const struct lu_env *env,
+ struct cl_io_lock_link *link)
+{
+ OBD_FREE_PTR(link);
+}
+
+/**
+ * Allocates new lock link, and uses it to add a lock to a lockset.
+ */
+int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
+ struct cl_lock_descr *descr)
+{
+ struct cl_io_lock_link *link;
+ int result;
+
+ ENTRY;
+ OBD_ALLOC_PTR(link);
+ if (link != NULL) {
+ link->cill_descr = *descr;
+ link->cill_fini = cl_free_io_lock_link;
+ result = cl_io_lock_add(env, io, link);
+ if (result) /* lock match */
+ link->cill_fini(env, link);
+ } else
+ result = -ENOMEM;
+
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_io_lock_alloc_add);
+
+/**
+ * Starts io by calling cl_io_operations::cio_start() top-to-bottom.
+ */
+int cl_io_start(const struct lu_env *env, struct cl_io *io)
+{
+ const struct cl_io_slice *scan;
+ int result = 0;
+
+ LINVRNT(cl_io_is_loopable(io));
+ LINVRNT(io->ci_state == CIS_LOCKED);
+ LINVRNT(cl_io_invariant(io));
+ ENTRY;
+
+ io->ci_state = CIS_IO_GOING;
+ cl_io_for_each(scan, io) {
+ if (scan->cis_iop->op[io->ci_type].cio_start == NULL)
+ continue;
+ result = scan->cis_iop->op[io->ci_type].cio_start(env, scan);
+ if (result != 0)
+ break;
+ }
+ if (result >= 0)
+ result = 0;
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_io_start);
+
+/**
+ * Wait until current io iteration is finished by calling
+ * cl_io_operations::cio_end() bottom-to-top.
+ */
+void cl_io_end(const struct lu_env *env, struct cl_io *io)
+{
+ const struct cl_io_slice *scan;
+
+ LINVRNT(cl_io_is_loopable(io));
+ LINVRNT(io->ci_state == CIS_IO_GOING);
+ LINVRNT(cl_io_invariant(io));
+ ENTRY;
+
+ cl_io_for_each_reverse(scan, io) {
+ if (scan->cis_iop->op[io->ci_type].cio_end != NULL)
+ scan->cis_iop->op[io->ci_type].cio_end(env, scan);
+ /* TODO: error handling. */
+ }
+ io->ci_state = CIS_IO_FINISHED;
+ EXIT;
+}
+EXPORT_SYMBOL(cl_io_end);
+
+static const struct cl_page_slice *
+cl_io_slice_page(const struct cl_io_slice *ios, struct cl_page *page)
+{
+ const struct cl_page_slice *slice;
+
+ slice = cl_page_at(page, ios->cis_obj->co_lu.lo_dev->ld_type);
+ LINVRNT(slice != NULL);
+ return slice;
+}
+
+/**
+ * True iff \a page is within \a io range.
+ */
+static int cl_page_in_io(const struct cl_page *page, const struct cl_io *io)
+{
+ int result = 1;
+ loff_t start;
+ loff_t end;
+ pgoff_t idx;
+
+ idx = page->cp_index;
+ switch (io->ci_type) {
+ case CIT_READ:
+ case CIT_WRITE:
+ /*
+ * check that [start, end) and [pos, pos + count) extents
+ * overlap.
+ */
+ if (!cl_io_is_append(io)) {
+ const struct cl_io_rw_common *crw = &(io->u.ci_rw);
+ start = cl_offset(page->cp_obj, idx);
+ end = cl_offset(page->cp_obj, idx + 1);
+ result = crw->crw_pos < end &&
+ start < crw->crw_pos + crw->crw_count;
+ }
+ break;
+ case CIT_FAULT:
+ result = io->u.ci_fault.ft_index == idx;
+ break;
+ default:
+ LBUG();
+ }
+ return result;
+}
+
+/**
+ * Called by read io, when page has to be read from the server.
+ *
+ * \see cl_io_operations::cio_read_page()
+ */
+int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page)
+{
+ const struct cl_io_slice *scan;
+ struct cl_2queue *queue;
+ int result = 0;
+
+ LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_FAULT);
+ LINVRNT(cl_page_is_owned(page, io));
+ LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
+ LINVRNT(cl_page_in_io(page, io));
+ LINVRNT(cl_io_invariant(io));
+ ENTRY;
+
+ queue = &io->ci_queue;
+
+ cl_2queue_init(queue);
+ /*
+ * ->cio_read_page() methods called in the loop below are supposed to
+ * never block waiting for network (the only subtle point is the
+ * creation of new pages for read-ahead that might result in cache
+ * shrinking, but currently only clean pages are shrunk and this
+ * requires no network io).
+ *
+ * Should this ever starts blocking, retry loop would be needed for
+ * "parallel io" (see CLO_REPEAT loops in cl_lock.c).
+ */
+ cl_io_for_each(scan, io) {
+ if (scan->cis_iop->cio_read_page != NULL) {
+ const struct cl_page_slice *slice;
+
+ slice = cl_io_slice_page(scan, page);
+ LINVRNT(slice != NULL);
+ result = scan->cis_iop->cio_read_page(env, scan, slice);
+ if (result != 0)
+ break;
+ }
+ }
+ if (result == 0)
+ result = cl_io_submit_rw(env, io, CRT_READ, queue);
+ /*
+ * Unlock unsent pages in case of error.
+ */
+ cl_page_list_disown(env, io, &queue->c2_qin);
+ cl_2queue_fini(env, queue);
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_io_read_page);
+
+/**
+ * Called by write io to prepare page to receive data from user buffer.
+ *
+ * \see cl_io_operations::cio_prepare_write()
+ */
+int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, unsigned from, unsigned to)
+{
+ const struct cl_io_slice *scan;
+ int result = 0;
+
+ LINVRNT(io->ci_type == CIT_WRITE);
+ LINVRNT(cl_page_is_owned(page, io));
+ LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
+ LINVRNT(cl_io_invariant(io));
+ LASSERT(cl_page_in_io(page, io));
+ ENTRY;
+
+ cl_io_for_each_reverse(scan, io) {
+ if (scan->cis_iop->cio_prepare_write != NULL) {
+ const struct cl_page_slice *slice;
+
+ slice = cl_io_slice_page(scan, page);
+ result = scan->cis_iop->cio_prepare_write(env, scan,
+ slice,
+ from, to);
+ if (result != 0)
+ break;
+ }
+ }
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_io_prepare_write);
+
+/**
+ * Called by write io after user data were copied into a page.
+ *
+ * \see cl_io_operations::cio_commit_write()
+ */
+int cl_io_commit_write(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, unsigned from, unsigned to)
+{
+ const struct cl_io_slice *scan;
+ int result = 0;
+
+ LINVRNT(io->ci_type == CIT_WRITE);
+ LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
+ LINVRNT(cl_io_invariant(io));
+ /*
+ * XXX Uh... not nice. Top level cl_io_commit_write() call (vvp->lov)
+ * already called cl_page_cache_add(), moving page into CPS_CACHED
+ * state. Better (and more general) way of dealing with such situation
+ * is needed.
+ */
+ LASSERT(cl_page_is_owned(page, io) || page->cp_parent != NULL);
+ LASSERT(cl_page_in_io(page, io));
+ ENTRY;
+
+ cl_io_for_each(scan, io) {
+ if (scan->cis_iop->cio_commit_write != NULL) {
+ const struct cl_page_slice *slice;
+
+ slice = cl_io_slice_page(scan, page);
+ result = scan->cis_iop->cio_commit_write(env, scan,
+ slice,
+ from, to);
+ if (result != 0)
+ break;
+ }
+ }
+ LINVRNT(result <= 0);
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_io_commit_write);
+
+/**
+ * Submits a list of pages for immediate io.
+ *
+ * After the function gets returned, The submitted pages are moved to
+ * queue->c2_qout queue, and queue->c2_qin contain both the pages don't need
+ * to be submitted, and the pages are errant to submit.
+ *
+ * \returns 0 if at least one page was submitted, error code otherwise.
+ * \see cl_io_operations::cio_submit()
+ */
+int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
+ enum cl_req_type crt, struct cl_2queue *queue)
+{
+ const struct cl_io_slice *scan;
+ int result = 0;
+
+ LINVRNT(crt < ARRAY_SIZE(scan->cis_iop->req_op));
+ ENTRY;
+
+ cl_io_for_each(scan, io) {
+ if (scan->cis_iop->req_op[crt].cio_submit == NULL)
+ continue;
+ result = scan->cis_iop->req_op[crt].cio_submit(env, scan, crt,
+ queue);
+ if (result != 0)
+ break;
+ }
+ /*
+ * If ->cio_submit() failed, no pages were sent.
+ */
+ LASSERT(ergo(result != 0, list_empty(&queue->c2_qout.pl_pages)));
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_io_submit_rw);
+
+/**
+ * Submit a sync_io and wait for the IO to be finished, or error happens.
+ * If \a timeout is zero, it means to wait for the IO unconditionally.
+ */
+int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
+ enum cl_req_type iot, struct cl_2queue *queue,
+ long timeout)
+{
+ struct cl_sync_io *anchor = &cl_env_info(env)->clt_anchor;
+ struct cl_page *pg;
+ int rc;
+
+ cl_page_list_for_each(pg, &queue->c2_qin) {
+ LASSERT(pg->cp_sync_io == NULL);
+ pg->cp_sync_io = anchor;
+ }
+
+ cl_sync_io_init(anchor, queue->c2_qin.pl_nr);
+ rc = cl_io_submit_rw(env, io, iot, queue);
+ if (rc == 0) {
+ /*
+ * If some pages weren't sent for any reason (e.g.,
+ * read found up-to-date pages in the cache, or write found
+ * clean pages), count them as completed to avoid infinite
+ * wait.
+ */
+ cl_page_list_for_each(pg, &queue->c2_qin) {
+ pg->cp_sync_io = NULL;
+ cl_sync_io_note(anchor, +1);
+ }
+
+ /* wait for the IO to be finished. */
+ rc = cl_sync_io_wait(env, io, &queue->c2_qout,
+ anchor, timeout);
+ } else {
+ LASSERT(list_empty(&queue->c2_qout.pl_pages));
+ cl_page_list_for_each(pg, &queue->c2_qin)
+ pg->cp_sync_io = NULL;
+ }
+ return rc;
+}
+EXPORT_SYMBOL(cl_io_submit_sync);
+
+/**
+ * Cancel an IO which has been submitted by cl_io_submit_rw.
+ */
+int cl_io_cancel(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *queue)
+{
+ struct cl_page *page;
+ int result = 0;
+
+ CERROR("Canceling ongoing page trasmission\n");
+ cl_page_list_for_each(page, queue) {
+ int rc;
+
+ LINVRNT(cl_page_in_io(page, io));
+ rc = cl_page_cancel(env, page);
+ result = result ?: rc;
+ }
+ return result;
+}
+EXPORT_SYMBOL(cl_io_cancel);
+
+/**
+ * Main io loop.
+ *
+ * Pumps io through iterations calling
+ *
+ * - cl_io_iter_init()
+ *
+ * - cl_io_lock()
+ *
+ * - cl_io_start()
+ *
+ * - cl_io_end()
+ *
+ * - cl_io_unlock()
+ *
+ * - cl_io_iter_fini()
+ *
+ * repeatedly until there is no more io to do.
+ */
+int cl_io_loop(const struct lu_env *env, struct cl_io *io)
+{
+ int result = 0;
+
+ LINVRNT(cl_io_is_loopable(io));
+ ENTRY;
+
+ do {
+ size_t nob;
+
+ io->ci_continue = 0;
+ result = cl_io_iter_init(env, io);
+ if (result == 0) {
+ nob = io->ci_nob;
+ result = cl_io_lock(env, io);
+ if (result == 0) {
+ /*
+ * Notify layers that locks has been taken,
+ * and do actual i/o.
+ *
+ * - llite: kms, short read;
+ * - llite: generic_file_read();
+ */
+ result = cl_io_start(env, io);
+ /*
+ * Send any remaining pending
+ * io, etc.
+ *
+ * - llite: ll_rw_stats_tally.
+ */
+ cl_io_end(env, io);
+ cl_io_unlock(env, io);
+ cl_io_rw_advance(env, io, io->ci_nob - nob);
+ }
+ }
+ cl_io_iter_fini(env, io);
+ } while (result == 0 && io->ci_continue);
+ if (result == 0)
+ result = io->ci_result;
+ RETURN(result < 0 ? result : 0);
+}
+EXPORT_SYMBOL(cl_io_loop);
+
+/**
+ * Adds io slice to the cl_io.
+ *
+ * This is called by cl_object_operations::coo_io_init() methods to add a
+ * per-layer state to the io. New state is added at the end of
+ * cl_io::ci_layers list, that is, it is at the bottom of the stack.
+ *
+ * \see cl_lock_slice_add(), cl_req_slice_add(), cl_page_slice_add()
+ */
+void cl_io_slice_add(struct cl_io *io, struct cl_io_slice *slice,
+ struct cl_object *obj,
+ const struct cl_io_operations *ops)
+{
+ struct list_head *linkage = &slice->cis_linkage;
+
+ LASSERT((linkage->prev == NULL && linkage->next == NULL) ||
+ list_empty(linkage));
+ ENTRY;
+
+ list_add_tail(linkage, &io->ci_layers);
+ slice->cis_io = io;
+ slice->cis_obj = obj;
+ slice->cis_iop = ops;
+ EXIT;
+}
+EXPORT_SYMBOL(cl_io_slice_add);
+
+
+/**
+ * Initializes page list.
+ */
+void cl_page_list_init(struct cl_page_list *plist)
+{
+ ENTRY;
+ plist->pl_nr = 0;
+ INIT_LIST_HEAD(&plist->pl_pages);
+ plist->pl_owner = current;
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_list_init);
+
+/**
+ * Adds a page to a page list.
+ */
+void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page)
+{
+ ENTRY;
+ /* it would be better to check that page is owned by "current" io, but
+ * it is not passed here. */
+ LASSERT(page->cp_owner != NULL);
+ LINVRNT(plist->pl_owner == current);
+
+ lockdep_off();
+ mutex_lock(&page->cp_mutex);
+ lockdep_on();
+ LASSERT(list_empty(&page->cp_batch));
+ list_add_tail(&page->cp_batch, &plist->pl_pages);
+ ++plist->pl_nr;
+ page->cp_queue_ref = lu_ref_add(&page->cp_reference, "queue", plist);
+ cl_page_get(page);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_list_add);
+
+/**
+ * Removes a page from a page list.
+ */
+void cl_page_list_del(const struct lu_env *env,
+ struct cl_page_list *plist, struct cl_page *page)
+{
+ LASSERT(plist->pl_nr > 0);
+ LINVRNT(plist->pl_owner == current);
+
+ ENTRY;
+ list_del_init(&page->cp_batch);
+ lockdep_off();
+ mutex_unlock(&page->cp_mutex);
+ lockdep_on();
+ --plist->pl_nr;
+ lu_ref_del_at(&page->cp_reference, page->cp_queue_ref, "queue", plist);
+ cl_page_put(env, page);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_list_del);
+
+/**
+ * Moves a page from one page list to another.
+ */
+void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
+ struct cl_page *page)
+{
+ LASSERT(src->pl_nr > 0);
+ LINVRNT(dst->pl_owner == current);
+ LINVRNT(src->pl_owner == current);
+
+ ENTRY;
+ list_move_tail(&page->cp_batch, &dst->pl_pages);
+ --src->pl_nr;
+ ++dst->pl_nr;
+ lu_ref_set_at(&page->cp_reference,
+ page->cp_queue_ref, "queue", src, dst);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_list_move);
+
+/**
+ * splice the cl_page_list, just as list head does
+ */
+void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head)
+{
+ struct cl_page *page;
+ struct cl_page *tmp;
+
+ LINVRNT(list->pl_owner == current);
+ LINVRNT(head->pl_owner == current);
+
+ ENTRY;
+ cl_page_list_for_each_safe(page, tmp, list)
+ cl_page_list_move(head, list, page);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_list_splice);
+
+void cl_page_disown0(const struct lu_env *env,
+ struct cl_io *io, struct cl_page *pg);
+
+/**
+ * Disowns pages in a queue.
+ */
+void cl_page_list_disown(const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist)
+{
+ struct cl_page *page;
+ struct cl_page *temp;
+
+ LINVRNT(plist->pl_owner == current);
+
+ ENTRY;
+ cl_page_list_for_each_safe(page, temp, plist) {
+ LASSERT(plist->pl_nr > 0);
+
+ list_del_init(&page->cp_batch);
+ lockdep_off();
+ mutex_unlock(&page->cp_mutex);
+ lockdep_on();
+ --plist->pl_nr;
+ /*
+ * cl_page_disown0 rather than usual cl_page_disown() is used,
+ * because pages are possibly in CPS_FREEING state already due
+ * to the call to cl_page_list_discard().
+ */
+ /*
+ * XXX cl_page_disown0() will fail if page is not locked.
+ */
+ cl_page_disown0(env, io, page);
+ lu_ref_del(&page->cp_reference, "queue", plist);
+ cl_page_put(env, page);
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_list_disown);
+
+/**
+ * Releases pages from queue.
+ */
+void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist)
+{
+ struct cl_page *page;
+ struct cl_page *temp;
+
+ LINVRNT(plist->pl_owner == current);
+
+ ENTRY;
+ cl_page_list_for_each_safe(page, temp, plist)
+ cl_page_list_del(env, plist, page);
+ LASSERT(plist->pl_nr == 0);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_list_fini);
+
+/**
+ * Owns all pages in a queue.
+ */
+int cl_page_list_own(const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist)
+{
+ struct cl_page *page;
+ struct cl_page *temp;
+ pgoff_t index = 0;
+ int result;
+
+ LINVRNT(plist->pl_owner == current);
+
+ ENTRY;
+ result = 0;
+ cl_page_list_for_each_safe(page, temp, plist) {
+ LASSERT(index <= page->cp_index);
+ index = page->cp_index;
+ if (cl_page_own(env, io, page) == 0)
+ result = result ?: page->cp_error;
+ else
+ cl_page_list_del(env, plist, page);
+ }
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_page_list_own);
+
+/**
+ * Assumes all pages in a queue.
+ */
+void cl_page_list_assume(const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist)
+{
+ struct cl_page *page;
+
+ LINVRNT(plist->pl_owner == current);
+
+ cl_page_list_for_each(page, plist)
+ cl_page_assume(env, io, page);
+}
+EXPORT_SYMBOL(cl_page_list_assume);
+
+/**
+ * Discards all pages in a queue.
+ */
+void cl_page_list_discard(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *plist)
+{
+ struct cl_page *page;
+
+ LINVRNT(plist->pl_owner == current);
+ ENTRY;
+ cl_page_list_for_each(page, plist)
+ cl_page_discard(env, io, page);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_list_discard);
+
+/**
+ * Unmaps all pages in a queue from user virtual memory.
+ */
+int cl_page_list_unmap(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *plist)
+{
+ struct cl_page *page;
+ int result;
+
+ LINVRNT(plist->pl_owner == current);
+ ENTRY;
+ result = 0;
+ cl_page_list_for_each(page, plist) {
+ result = cl_page_unmap(env, io, page);
+ if (result != 0)
+ break;
+ }
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_page_list_unmap);
+
+/**
+ * Initialize dual page queue.
+ */
+void cl_2queue_init(struct cl_2queue *queue)
+{
+ ENTRY;
+ cl_page_list_init(&queue->c2_qin);
+ cl_page_list_init(&queue->c2_qout);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_2queue_init);
+
+/**
+ * Add a page to the incoming page list of 2-queue.
+ */
+void cl_2queue_add(struct cl_2queue *queue, struct cl_page *page)
+{
+ ENTRY;
+ cl_page_list_add(&queue->c2_qin, page);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_2queue_add);
+
+/**
+ * Disown pages in both lists of a 2-queue.
+ */
+void cl_2queue_disown(const struct lu_env *env,
+ struct cl_io *io, struct cl_2queue *queue)
+{
+ ENTRY;
+ cl_page_list_disown(env, io, &queue->c2_qin);
+ cl_page_list_disown(env, io, &queue->c2_qout);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_2queue_disown);
+
+/**
+ * Discard (truncate) pages in both lists of a 2-queue.
+ */
+void cl_2queue_discard(const struct lu_env *env,
+ struct cl_io *io, struct cl_2queue *queue)
+{
+ ENTRY;
+ cl_page_list_discard(env, io, &queue->c2_qin);
+ cl_page_list_discard(env, io, &queue->c2_qout);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_2queue_discard);
+
+/**
+ * Assume to own the pages in cl_2queue
+ */
+void cl_2queue_assume(const struct lu_env *env,
+ struct cl_io *io, struct cl_2queue *queue)
+{
+ cl_page_list_assume(env, io, &queue->c2_qin);
+ cl_page_list_assume(env, io, &queue->c2_qout);
+}
+EXPORT_SYMBOL(cl_2queue_assume);
+
+/**
+ * Finalize both page lists of a 2-queue.
+ */
+void cl_2queue_fini(const struct lu_env *env, struct cl_2queue *queue)
+{
+ ENTRY;
+ cl_page_list_fini(env, &queue->c2_qout);
+ cl_page_list_fini(env, &queue->c2_qin);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_2queue_fini);
+
+/**
+ * Initialize a 2-queue to contain \a page in its incoming page list.
+ */
+void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page)
+{
+ ENTRY;
+ cl_2queue_init(queue);
+ cl_2queue_add(queue, page);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_2queue_init_page);
+
+/**
+ * Returns top-level io.
+ *
+ * \see cl_object_top(), cl_page_top().
+ */
+struct cl_io *cl_io_top(struct cl_io *io)
+{
+ ENTRY;
+ while (io->ci_parent != NULL)
+ io = io->ci_parent;
+ RETURN(io);
+}
+EXPORT_SYMBOL(cl_io_top);
+
+/**
+ * Prints human readable representation of \a io to the \a f.
+ */
+void cl_io_print(const struct lu_env *env, void *cookie,
+ lu_printer_t printer, const struct cl_io *io)
+{
+}
+
+/**
+ * Adds request slice to the compound request.
+ *
+ * This is called by cl_device_operations::cdo_req_init() methods to add a
+ * per-layer state to the request. New state is added at the end of
+ * cl_req::crq_layers list, that is, it is at the bottom of the stack.
+ *
+ * \see cl_lock_slice_add(), cl_page_slice_add(), cl_io_slice_add()
+ */
+void cl_req_slice_add(struct cl_req *req, struct cl_req_slice *slice,
+ struct cl_device *dev,
+ const struct cl_req_operations *ops)
+{
+ ENTRY;
+ list_add_tail(&slice->crs_linkage, &req->crq_layers);
+ slice->crs_dev = dev;
+ slice->crs_ops = ops;
+ slice->crs_req = req;
+ EXIT;
+}
+EXPORT_SYMBOL(cl_req_slice_add);
+
+static void cl_req_free(const struct lu_env *env, struct cl_req *req)
+{
+ unsigned i;
+
+ LASSERT(list_empty(&req->crq_pages));
+ LASSERT(req->crq_nrpages == 0);
+ LINVRNT(list_empty(&req->crq_layers));
+ LINVRNT(equi(req->crq_nrobjs > 0, req->crq_o != NULL));
+ ENTRY;
+
+ if (req->crq_o != NULL) {
+ for (i = 0; i < req->crq_nrobjs; ++i) {
+ struct cl_object *obj = req->crq_o[i].ro_obj;
+ if (obj != NULL) {
+ lu_object_ref_del_at(&obj->co_lu,
+ req->crq_o[i].ro_obj_ref,
+ "cl_req", req);
+ cl_object_put(env, obj);
+ }
+ }
+ OBD_FREE(req->crq_o, req->crq_nrobjs * sizeof req->crq_o[0]);
+ }
+ OBD_FREE_PTR(req);
+ EXIT;
+}
+
+static int cl_req_init(const struct lu_env *env, struct cl_req *req,
+ struct cl_page *page)
+{
+ struct cl_device *dev;
+ struct cl_page_slice *slice;
+ int result;
+
+ ENTRY;
+ result = 0;
+ page = cl_page_top(page);
+ do {
+ list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
+ dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev);
+ if (dev->cd_ops->cdo_req_init != NULL) {
+ result = dev->cd_ops->cdo_req_init(env,
+ dev, req);
+ if (result != 0)
+ break;
+ }
+ }
+ page = page->cp_child;
+ } while (page != NULL && result == 0);
+ RETURN(result);
+}
+
+/**
+ * Invokes per-request transfer completion call-backs
+ * (cl_req_operations::cro_completion()) bottom-to-top.
+ */
+void cl_req_completion(const struct lu_env *env, struct cl_req *req, int rc)
+{
+ struct cl_req_slice *slice;
+
+ ENTRY;
+ /*
+ * for the lack of list_for_each_entry_reverse_safe()...
+ */
+ while (!list_empty(&req->crq_layers)) {
+ slice = list_entry(req->crq_layers.prev,
+ struct cl_req_slice, crs_linkage);
+ list_del_init(&slice->crs_linkage);
+ if (slice->crs_ops->cro_completion != NULL)
+ slice->crs_ops->cro_completion(env, slice, rc);
+ }
+ cl_req_free(env, req);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_req_completion);
+
+/**
+ * Allocates new transfer request.
+ */
+struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page,
+ enum cl_req_type crt, int nr_objects)
+{
+ struct cl_req *req;
+
+ LINVRNT(nr_objects > 0);
+ ENTRY;
+
+ OBD_ALLOC_PTR(req);
+ if (req != NULL) {
+ int result;
+
+ OBD_ALLOC(req->crq_o, nr_objects * sizeof req->crq_o[0]);
+ if (req->crq_o != NULL) {
+ req->crq_nrobjs = nr_objects;
+ req->crq_type = crt;
+ INIT_LIST_HEAD(&req->crq_pages);
+ INIT_LIST_HEAD(&req->crq_layers);
+ result = cl_req_init(env, req, page);
+ } else
+ result = -ENOMEM;
+ if (result != 0) {
+ cl_req_completion(env, req, result);
+ req = ERR_PTR(result);
+ }
+ } else
+ req = ERR_PTR(-ENOMEM);
+ RETURN(req);
+}
+EXPORT_SYMBOL(cl_req_alloc);
+
+/**
+ * Adds a page to a request.
+ */
+void cl_req_page_add(const struct lu_env *env,
+ struct cl_req *req, struct cl_page *page)
+{
+ struct cl_object *obj;
+ struct cl_req_obj *rqo;
+ int i;
+
+ ENTRY;
+ page = cl_page_top(page);
+
+ LASSERT(list_empty(&page->cp_flight));
+ LASSERT(page->cp_req == NULL);
+
+ CL_PAGE_DEBUG(D_PAGE, env, page, "req %p, %d, %u\n",
+ req, req->crq_type, req->crq_nrpages);
+
+ list_add_tail(&page->cp_flight, &req->crq_pages);
+ ++req->crq_nrpages;
+ page->cp_req = req;
+ obj = cl_object_top(page->cp_obj);
+ for (i = 0, rqo = req->crq_o; obj != rqo->ro_obj; ++i, ++rqo) {
+ if (rqo->ro_obj == NULL) {
+ rqo->ro_obj = obj;
+ cl_object_get(obj);
+ rqo->ro_obj_ref = lu_object_ref_add(&obj->co_lu,
+ "cl_req", req);
+ break;
+ }
+ }
+ LASSERT(i < req->crq_nrobjs);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_req_page_add);
+
+/**
+ * Removes a page from a request.
+ */
+void cl_req_page_done(const struct lu_env *env, struct cl_page *page)
+{
+ struct cl_req *req = page->cp_req;
+
+ ENTRY;
+ page = cl_page_top(page);
+
+ LASSERT(!list_empty(&page->cp_flight));
+ LASSERT(req->crq_nrpages > 0);
+
+ list_del_init(&page->cp_flight);
+ --req->crq_nrpages;
+ page->cp_req = NULL;
+ EXIT;
+}
+EXPORT_SYMBOL(cl_req_page_done);
+
+/**
+ * Notifies layers that request is about to depart by calling
+ * cl_req_operations::cro_prep() top-to-bottom.
+ */
+int cl_req_prep(const struct lu_env *env, struct cl_req *req)
+{
+ int i;
+ int result;
+ const struct cl_req_slice *slice;
+
+ ENTRY;
+ /*
+ * Check that the caller of cl_req_alloc() didn't lie about the number
+ * of objects.
+ */
+ for (i = 0; i < req->crq_nrobjs; ++i)
+ LASSERT(req->crq_o[i].ro_obj != NULL);
+
+ result = 0;
+ list_for_each_entry(slice, &req->crq_layers, crs_linkage) {
+ if (slice->crs_ops->cro_prep != NULL) {
+ result = slice->crs_ops->cro_prep(env, slice);
+ if (result != 0)
+ break;
+ }
+ }
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_req_prep);
+
+/**
+ * Fills in attributes that are passed to server together with transfer. Only
+ * attributes from \a flags may be touched. This can be called multiple times
+ * for the same request.
+ */
+void cl_req_attr_set(const struct lu_env *env, struct cl_req *req,
+ struct cl_req_attr *attr, obd_valid flags)
+{
+ const struct cl_req_slice *slice;
+ struct cl_page *page;
+ int i;
+
+ LASSERT(!list_empty(&req->crq_pages));
+ ENTRY;
+
+ /* Take any page to use as a model. */
+ page = list_entry(req->crq_pages.next, struct cl_page, cp_flight);
+
+ for (i = 0; i < req->crq_nrobjs; ++i) {
+ list_for_each_entry(slice, &req->crq_layers, crs_linkage) {
+ const struct cl_page_slice *scan;
+ const struct cl_object *obj;
+
+ scan = cl_page_at(page,
+ slice->crs_dev->cd_lu_dev.ld_type);
+ LASSERT(scan != NULL);
+ obj = scan->cpl_obj;
+ if (slice->crs_ops->cro_attr_set != NULL)
+ slice->crs_ops->cro_attr_set(env, slice, obj,
+ attr + i, flags);
+ }
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(cl_req_attr_set);
+
+/* XXX complete(), init_completion(), and wait_for_completion(), until they are
+ * implemented in libcfs. */
+# include <linux/sched.h>
+
+/**
+ * Initialize synchronous io wait anchor, for transfer of \a nrpages pages.
+ */
+void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages)
+{
+ ENTRY;
+ init_waitqueue_head(&anchor->csi_waitq);
+ atomic_set(&anchor->csi_sync_nr, nrpages);
+ atomic_set(&anchor->csi_barrier, nrpages > 0);
+ anchor->csi_sync_rc = 0;
+ EXIT;
+}
+EXPORT_SYMBOL(cl_sync_io_init);
+
+/**
+ * Wait until all transfer completes. Transfer completion routine has to call
+ * cl_sync_io_note() for every page.
+ */
+int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *queue, struct cl_sync_io *anchor,
+ long timeout)
+{
+ struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(timeout),
+ NULL, NULL, NULL);
+ int rc;
+ ENTRY;
+
+ LASSERT(timeout >= 0);
+
+ rc = l_wait_event(anchor->csi_waitq,
+ atomic_read(&anchor->csi_sync_nr) == 0,
+ &lwi);
+ if (rc < 0) {
+ CERROR("SYNC IO failed with error: %d, try to cancel "
+ "%d remaining pages\n",
+ rc, atomic_read(&anchor->csi_sync_nr));
+
+ (void)cl_io_cancel(env, io, queue);
+
+ lwi = (struct l_wait_info) { 0 };
+ (void)l_wait_event(anchor->csi_waitq,
+ atomic_read(&anchor->csi_sync_nr) == 0,
+ &lwi);
+ } else {
+ rc = anchor->csi_sync_rc;
+ }
+ LASSERT(atomic_read(&anchor->csi_sync_nr) == 0);
+ cl_page_list_assume(env, io, queue);
+
+ /* wait until cl_sync_io_note() has done wakeup */
+ while (unlikely(atomic_read(&anchor->csi_barrier) != 0)) {
+ cpu_relax();
+ }
+
+ POISON(anchor, 0x5a, sizeof *anchor);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(cl_sync_io_wait);
+
+/**
+ * Indicate that transfer of a single page completed.
+ */
+void cl_sync_io_note(struct cl_sync_io *anchor, int ioret)
+{
+ ENTRY;
+ if (anchor->csi_sync_rc == 0 && ioret < 0)
+ anchor->csi_sync_rc = ioret;
+ /*
+ * Synchronous IO done without releasing page lock (e.g., as a part of
+ * ->{prepare,commit}_write(). Completion is used to signal the end of
+ * IO.
+ */
+ LASSERT(atomic_read(&anchor->csi_sync_nr) > 0);
+ if (atomic_dec_and_test(&anchor->csi_sync_nr)) {
+ wake_up_all(&anchor->csi_waitq);
+ /* it's safe to nuke or reuse anchor now */
+ atomic_set(&anchor->csi_barrier, 0);
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(cl_sync_io_note);
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_lock.c b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
new file mode 100644
index 000000000000..d34e044fc854
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
@@ -0,0 +1,2304 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Client Extent Lock.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_fid.h>
+#include <linux/list.h>
+#include <cl_object.h>
+#include "cl_internal.h"
+
+/** Lock class of cl_lock::cll_guard */
+static struct lock_class_key cl_lock_guard_class;
+static struct kmem_cache *cl_lock_kmem;
+
+static struct lu_kmem_descr cl_lock_caches[] = {
+ {
+ .ckd_cache = &cl_lock_kmem,
+ .ckd_name = "cl_lock_kmem",
+ .ckd_size = sizeof (struct cl_lock)
+ },
+ {
+ .ckd_cache = NULL
+ }
+};
+
+#define CS_LOCK_INC(o, item)
+#define CS_LOCK_DEC(o, item)
+#define CS_LOCKSTATE_INC(o, state)
+#define CS_LOCKSTATE_DEC(o, state)
+
+/**
+ * Basic lock invariant that is maintained at all times. Caller either has a
+ * reference to \a lock, or somehow assures that \a lock cannot be freed.
+ *
+ * \see cl_lock_invariant()
+ */
+static int cl_lock_invariant_trusted(const struct lu_env *env,
+ const struct cl_lock *lock)
+{
+ return ergo(lock->cll_state == CLS_FREEING, lock->cll_holds == 0) &&
+ atomic_read(&lock->cll_ref) >= lock->cll_holds &&
+ lock->cll_holds >= lock->cll_users &&
+ lock->cll_holds >= 0 &&
+ lock->cll_users >= 0 &&
+ lock->cll_depth >= 0;
+}
+
+/**
+ * Stronger lock invariant, checking that caller has a reference on a lock.
+ *
+ * \see cl_lock_invariant_trusted()
+ */
+static int cl_lock_invariant(const struct lu_env *env,
+ const struct cl_lock *lock)
+{
+ int result;
+
+ result = atomic_read(&lock->cll_ref) > 0 &&
+ cl_lock_invariant_trusted(env, lock);
+ if (!result && env != NULL)
+ CL_LOCK_DEBUG(D_ERROR, env, lock, "invariant broken");
+ return result;
+}
+
+/**
+ * Returns lock "nesting": 0 for a top-lock and 1 for a sub-lock.
+ */
+static enum clt_nesting_level cl_lock_nesting(const struct cl_lock *lock)
+{
+ return cl_object_header(lock->cll_descr.cld_obj)->coh_nesting;
+}
+
+/**
+ * Returns a set of counters for this lock, depending on a lock nesting.
+ */
+static struct cl_thread_counters *cl_lock_counters(const struct lu_env *env,
+ const struct cl_lock *lock)
+{
+ struct cl_thread_info *info;
+ enum clt_nesting_level nesting;
+
+ info = cl_env_info(env);
+ nesting = cl_lock_nesting(lock);
+ LASSERT(nesting < ARRAY_SIZE(info->clt_counters));
+ return &info->clt_counters[nesting];
+}
+
+static void cl_lock_trace0(int level, const struct lu_env *env,
+ const char *prefix, const struct cl_lock *lock,
+ const char *func, const int line)
+{
+ struct cl_object_header *h = cl_object_header(lock->cll_descr.cld_obj);
+ CDEBUG(level, "%s: %p@(%d %p %d %d %d %d %d %lx)"
+ "(%p/%d/%d) at %s():%d\n",
+ prefix, lock, atomic_read(&lock->cll_ref),
+ lock->cll_guarder, lock->cll_depth,
+ lock->cll_state, lock->cll_error, lock->cll_holds,
+ lock->cll_users, lock->cll_flags,
+ env, h->coh_nesting, cl_lock_nr_mutexed(env),
+ func, line);
+}
+#define cl_lock_trace(level, env, prefix, lock) \
+ cl_lock_trace0(level, env, prefix, lock, __FUNCTION__, __LINE__)
+
+#define RETIP ((unsigned long)__builtin_return_address(0))
+
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key cl_lock_key;
+
+static void cl_lock_lockdep_init(struct cl_lock *lock)
+{
+ lockdep_set_class_and_name(lock, &cl_lock_key, "EXT");
+}
+
+static void cl_lock_lockdep_acquire(const struct lu_env *env,
+ struct cl_lock *lock, __u32 enqflags)
+{
+ cl_lock_counters(env, lock)->ctc_nr_locks_acquired++;
+ lock_map_acquire(&lock->dep_map);
+}
+
+static void cl_lock_lockdep_release(const struct lu_env *env,
+ struct cl_lock *lock)
+{
+ cl_lock_counters(env, lock)->ctc_nr_locks_acquired--;
+ lock_release(&lock->dep_map, 0, RETIP);
+}
+
+#else /* !CONFIG_LOCKDEP */
+
+static void cl_lock_lockdep_init(struct cl_lock *lock)
+{}
+static void cl_lock_lockdep_acquire(const struct lu_env *env,
+ struct cl_lock *lock, __u32 enqflags)
+{}
+static void cl_lock_lockdep_release(const struct lu_env *env,
+ struct cl_lock *lock)
+{}
+
+#endif /* !CONFIG_LOCKDEP */
+
+/**
+ * Adds lock slice to the compound lock.
+ *
+ * This is called by cl_object_operations::coo_lock_init() methods to add a
+ * per-layer state to the lock. New state is added at the end of
+ * cl_lock::cll_layers list, that is, it is at the bottom of the stack.
+ *
+ * \see cl_req_slice_add(), cl_page_slice_add(), cl_io_slice_add()
+ */
+void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
+ struct cl_object *obj,
+ const struct cl_lock_operations *ops)
+{
+ ENTRY;
+ slice->cls_lock = lock;
+ list_add_tail(&slice->cls_linkage, &lock->cll_layers);
+ slice->cls_obj = obj;
+ slice->cls_ops = ops;
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lock_slice_add);
+
+/**
+ * Returns true iff a lock with the mode \a has provides at least the same
+ * guarantees as a lock with the mode \a need.
+ */
+int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need)
+{
+ LINVRNT(need == CLM_READ || need == CLM_WRITE ||
+ need == CLM_PHANTOM || need == CLM_GROUP);
+ LINVRNT(has == CLM_READ || has == CLM_WRITE ||
+ has == CLM_PHANTOM || has == CLM_GROUP);
+ CLASSERT(CLM_PHANTOM < CLM_READ);
+ CLASSERT(CLM_READ < CLM_WRITE);
+ CLASSERT(CLM_WRITE < CLM_GROUP);
+
+ if (has != CLM_GROUP)
+ return need <= has;
+ else
+ return need == has;
+}
+EXPORT_SYMBOL(cl_lock_mode_match);
+
+/**
+ * Returns true iff extent portions of lock descriptions match.
+ */
+int cl_lock_ext_match(const struct cl_lock_descr *has,
+ const struct cl_lock_descr *need)
+{
+ return
+ has->cld_start <= need->cld_start &&
+ has->cld_end >= need->cld_end &&
+ cl_lock_mode_match(has->cld_mode, need->cld_mode) &&
+ (has->cld_mode != CLM_GROUP || has->cld_gid == need->cld_gid);
+}
+EXPORT_SYMBOL(cl_lock_ext_match);
+
+/**
+ * Returns true iff a lock with the description \a has provides at least the
+ * same guarantees as a lock with the description \a need.
+ */
+int cl_lock_descr_match(const struct cl_lock_descr *has,
+ const struct cl_lock_descr *need)
+{
+ return
+ cl_object_same(has->cld_obj, need->cld_obj) &&
+ cl_lock_ext_match(has, need);
+}
+EXPORT_SYMBOL(cl_lock_descr_match);
+
+static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock)
+{
+ struct cl_object *obj = lock->cll_descr.cld_obj;
+
+ LINVRNT(!cl_lock_is_mutexed(lock));
+
+ ENTRY;
+ cl_lock_trace(D_DLMTRACE, env, "free lock", lock);
+ might_sleep();
+ while (!list_empty(&lock->cll_layers)) {
+ struct cl_lock_slice *slice;
+
+ slice = list_entry(lock->cll_layers.next,
+ struct cl_lock_slice, cls_linkage);
+ list_del_init(lock->cll_layers.next);
+ slice->cls_ops->clo_fini(env, slice);
+ }
+ CS_LOCK_DEC(obj, total);
+ CS_LOCKSTATE_DEC(obj, lock->cll_state);
+ lu_object_ref_del_at(&obj->co_lu, lock->cll_obj_ref, "cl_lock", lock);
+ cl_object_put(env, obj);
+ lu_ref_fini(&lock->cll_reference);
+ lu_ref_fini(&lock->cll_holders);
+ mutex_destroy(&lock->cll_guard);
+ OBD_SLAB_FREE_PTR(lock, cl_lock_kmem);
+ EXIT;
+}
+
+/**
+ * Releases a reference on a lock.
+ *
+ * When last reference is released, lock is returned to the cache, unless it
+ * is in cl_lock_state::CLS_FREEING state, in which case it is destroyed
+ * immediately.
+ *
+ * \see cl_object_put(), cl_page_put()
+ */
+void cl_lock_put(const struct lu_env *env, struct cl_lock *lock)
+{
+ struct cl_object *obj;
+
+ LINVRNT(cl_lock_invariant(env, lock));
+ ENTRY;
+ obj = lock->cll_descr.cld_obj;
+ LINVRNT(obj != NULL);
+
+ CDEBUG(D_TRACE, "releasing reference: %d %p %lu\n",
+ atomic_read(&lock->cll_ref), lock, RETIP);
+
+ if (atomic_dec_and_test(&lock->cll_ref)) {
+ if (lock->cll_state == CLS_FREEING) {
+ LASSERT(list_empty(&lock->cll_linkage));
+ cl_lock_free(env, lock);
+ }
+ CS_LOCK_DEC(obj, busy);
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lock_put);
+
+/**
+ * Acquires an additional reference to a lock.
+ *
+ * This can be called only by caller already possessing a reference to \a
+ * lock.
+ *
+ * \see cl_object_get(), cl_page_get()
+ */
+void cl_lock_get(struct cl_lock *lock)
+{
+ LINVRNT(cl_lock_invariant(NULL, lock));
+ CDEBUG(D_TRACE, "acquiring reference: %d %p %lu\n",
+ atomic_read(&lock->cll_ref), lock, RETIP);
+ atomic_inc(&lock->cll_ref);
+}
+EXPORT_SYMBOL(cl_lock_get);
+
+/**
+ * Acquires a reference to a lock.
+ *
+ * This is much like cl_lock_get(), except that this function can be used to
+ * acquire initial reference to the cached lock. Caller has to deal with all
+ * possible races. Use with care!
+ *
+ * \see cl_page_get_trust()
+ */
+void cl_lock_get_trust(struct cl_lock *lock)
+{
+ CDEBUG(D_TRACE, "acquiring trusted reference: %d %p %lu\n",
+ atomic_read(&lock->cll_ref), lock, RETIP);
+ if (atomic_inc_return(&lock->cll_ref) == 1)
+ CS_LOCK_INC(lock->cll_descr.cld_obj, busy);
+}
+EXPORT_SYMBOL(cl_lock_get_trust);
+
+/**
+ * Helper function destroying the lock that wasn't completely initialized.
+ *
+ * Other threads can acquire references to the top-lock through its
+ * sub-locks. Hence, it cannot be cl_lock_free()-ed immediately.
+ */
+static void cl_lock_finish(const struct lu_env *env, struct cl_lock *lock)
+{
+ cl_lock_mutex_get(env, lock);
+ cl_lock_cancel(env, lock);
+ cl_lock_delete(env, lock);
+ cl_lock_mutex_put(env, lock);
+ cl_lock_put(env, lock);
+}
+
+static struct cl_lock *cl_lock_alloc(const struct lu_env *env,
+ struct cl_object *obj,
+ const struct cl_io *io,
+ const struct cl_lock_descr *descr)
+{
+ struct cl_lock *lock;
+ struct lu_object_header *head;
+
+ ENTRY;
+ OBD_SLAB_ALLOC_PTR_GFP(lock, cl_lock_kmem, __GFP_IO);
+ if (lock != NULL) {
+ atomic_set(&lock->cll_ref, 1);
+ lock->cll_descr = *descr;
+ lock->cll_state = CLS_NEW;
+ cl_object_get(obj);
+ lock->cll_obj_ref = lu_object_ref_add(&obj->co_lu,
+ "cl_lock", lock);
+ INIT_LIST_HEAD(&lock->cll_layers);
+ INIT_LIST_HEAD(&lock->cll_linkage);
+ INIT_LIST_HEAD(&lock->cll_inclosure);
+ lu_ref_init(&lock->cll_reference);
+ lu_ref_init(&lock->cll_holders);
+ mutex_init(&lock->cll_guard);
+ lockdep_set_class(&lock->cll_guard, &cl_lock_guard_class);
+ init_waitqueue_head(&lock->cll_wq);
+ head = obj->co_lu.lo_header;
+ CS_LOCKSTATE_INC(obj, CLS_NEW);
+ CS_LOCK_INC(obj, total);
+ CS_LOCK_INC(obj, create);
+ cl_lock_lockdep_init(lock);
+ list_for_each_entry(obj, &head->loh_layers,
+ co_lu.lo_linkage) {
+ int err;
+
+ err = obj->co_ops->coo_lock_init(env, obj, lock, io);
+ if (err != 0) {
+ cl_lock_finish(env, lock);
+ lock = ERR_PTR(err);
+ break;
+ }
+ }
+ } else
+ lock = ERR_PTR(-ENOMEM);
+ RETURN(lock);
+}
+
+/**
+ * Transfer the lock into INTRANSIT state and return the original state.
+ *
+ * \pre state: CLS_CACHED, CLS_HELD or CLS_ENQUEUED
+ * \post state: CLS_INTRANSIT
+ * \see CLS_INTRANSIT
+ */
+enum cl_lock_state cl_lock_intransit(const struct lu_env *env,
+ struct cl_lock *lock)
+{
+ enum cl_lock_state state = lock->cll_state;
+
+ LASSERT(cl_lock_is_mutexed(lock));
+ LASSERT(state != CLS_INTRANSIT);
+ LASSERTF(state >= CLS_ENQUEUED && state <= CLS_CACHED,
+ "Malformed lock state %d.\n", state);
+
+ cl_lock_state_set(env, lock, CLS_INTRANSIT);
+ lock->cll_intransit_owner = current;
+ cl_lock_hold_add(env, lock, "intransit", current);
+ return state;
+}
+EXPORT_SYMBOL(cl_lock_intransit);
+
+/**
+ * Exit the intransit state and restore the lock state to the original state
+ */
+void cl_lock_extransit(const struct lu_env *env, struct cl_lock *lock,
+ enum cl_lock_state state)
+{
+ LASSERT(cl_lock_is_mutexed(lock));
+ LASSERT(lock->cll_state == CLS_INTRANSIT);
+ LASSERT(state != CLS_INTRANSIT);
+ LASSERT(lock->cll_intransit_owner == current);
+
+ lock->cll_intransit_owner = NULL;
+ cl_lock_state_set(env, lock, state);
+ cl_lock_unhold(env, lock, "intransit", current);
+}
+EXPORT_SYMBOL(cl_lock_extransit);
+
+/**
+ * Checking whether the lock is intransit state
+ */
+int cl_lock_is_intransit(struct cl_lock *lock)
+{
+ LASSERT(cl_lock_is_mutexed(lock));
+ return lock->cll_state == CLS_INTRANSIT &&
+ lock->cll_intransit_owner != current;
+}
+EXPORT_SYMBOL(cl_lock_is_intransit);
+/**
+ * Returns true iff lock is "suitable" for given io. E.g., locks acquired by
+ * truncate and O_APPEND cannot be reused for read/non-append-write, as they
+ * cover multiple stripes and can trigger cascading timeouts.
+ */
+static int cl_lock_fits_into(const struct lu_env *env,
+ const struct cl_lock *lock,
+ const struct cl_lock_descr *need,
+ const struct cl_io *io)
+{
+ const struct cl_lock_slice *slice;
+
+ LINVRNT(cl_lock_invariant_trusted(env, lock));
+ ENTRY;
+ list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
+ if (slice->cls_ops->clo_fits_into != NULL &&
+ !slice->cls_ops->clo_fits_into(env, slice, need, io))
+ RETURN(0);
+ }
+ RETURN(1);
+}
+
+static struct cl_lock *cl_lock_lookup(const struct lu_env *env,
+ struct cl_object *obj,
+ const struct cl_io *io,
+ const struct cl_lock_descr *need)
+{
+ struct cl_lock *lock;
+ struct cl_object_header *head;
+
+ ENTRY;
+
+ head = cl_object_header(obj);
+ LINVRNT(spin_is_locked(&head->coh_lock_guard));
+ CS_LOCK_INC(obj, lookup);
+ list_for_each_entry(lock, &head->coh_locks, cll_linkage) {
+ int matched;
+
+ matched = cl_lock_ext_match(&lock->cll_descr, need) &&
+ lock->cll_state < CLS_FREEING &&
+ lock->cll_error == 0 &&
+ !(lock->cll_flags & CLF_CANCELLED) &&
+ cl_lock_fits_into(env, lock, need, io);
+ CDEBUG(D_DLMTRACE, "has: "DDESCR"(%d) need: "DDESCR": %d\n",
+ PDESCR(&lock->cll_descr), lock->cll_state, PDESCR(need),
+ matched);
+ if (matched) {
+ cl_lock_get_trust(lock);
+ CS_LOCK_INC(obj, hit);
+ RETURN(lock);
+ }
+ }
+ RETURN(NULL);
+}
+
+/**
+ * Returns a lock matching description \a need.
+ *
+ * This is the main entry point into the cl_lock caching interface. First, a
+ * cache (implemented as a per-object linked list) is consulted. If lock is
+ * found there, it is returned immediately. Otherwise new lock is allocated
+ * and returned. In any case, additional reference to lock is acquired.
+ *
+ * \see cl_object_find(), cl_page_find()
+ */
+static struct cl_lock *cl_lock_find(const struct lu_env *env,
+ const struct cl_io *io,
+ const struct cl_lock_descr *need)
+{
+ struct cl_object_header *head;
+ struct cl_object *obj;
+ struct cl_lock *lock;
+
+ ENTRY;
+
+ obj = need->cld_obj;
+ head = cl_object_header(obj);
+
+ spin_lock(&head->coh_lock_guard);
+ lock = cl_lock_lookup(env, obj, io, need);
+ spin_unlock(&head->coh_lock_guard);
+
+ if (lock == NULL) {
+ lock = cl_lock_alloc(env, obj, io, need);
+ if (!IS_ERR(lock)) {
+ struct cl_lock *ghost;
+
+ spin_lock(&head->coh_lock_guard);
+ ghost = cl_lock_lookup(env, obj, io, need);
+ if (ghost == NULL) {
+ list_add_tail(&lock->cll_linkage,
+ &head->coh_locks);
+ spin_unlock(&head->coh_lock_guard);
+ CS_LOCK_INC(obj, busy);
+ } else {
+ spin_unlock(&head->coh_lock_guard);
+ /*
+ * Other threads can acquire references to the
+ * top-lock through its sub-locks. Hence, it
+ * cannot be cl_lock_free()-ed immediately.
+ */
+ cl_lock_finish(env, lock);
+ lock = ghost;
+ }
+ }
+ }
+ RETURN(lock);
+}
+
+/**
+ * Returns existing lock matching given description. This is similar to
+ * cl_lock_find() except that no new lock is created, and returned lock is
+ * guaranteed to be in enum cl_lock_state::CLS_HELD state.
+ */
+struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
+ const struct cl_lock_descr *need,
+ const char *scope, const void *source)
+{
+ struct cl_object_header *head;
+ struct cl_object *obj;
+ struct cl_lock *lock;
+
+ obj = need->cld_obj;
+ head = cl_object_header(obj);
+
+ do {
+ spin_lock(&head->coh_lock_guard);
+ lock = cl_lock_lookup(env, obj, io, need);
+ spin_unlock(&head->coh_lock_guard);
+ if (lock == NULL)
+ return NULL;
+
+ cl_lock_mutex_get(env, lock);
+ if (lock->cll_state == CLS_INTRANSIT)
+ /* Don't care return value. */
+ cl_lock_state_wait(env, lock);
+ if (lock->cll_state == CLS_FREEING) {
+ cl_lock_mutex_put(env, lock);
+ cl_lock_put(env, lock);
+ lock = NULL;
+ }
+ } while (lock == NULL);
+
+ cl_lock_hold_add(env, lock, scope, source);
+ cl_lock_user_add(env, lock);
+ if (lock->cll_state == CLS_CACHED)
+ cl_use_try(env, lock, 1);
+ if (lock->cll_state == CLS_HELD) {
+ cl_lock_mutex_put(env, lock);
+ cl_lock_lockdep_acquire(env, lock, 0);
+ cl_lock_put(env, lock);
+ } else {
+ cl_unuse_try(env, lock);
+ cl_lock_unhold(env, lock, scope, source);
+ cl_lock_mutex_put(env, lock);
+ cl_lock_put(env, lock);
+ lock = NULL;
+ }
+
+ return lock;
+}
+EXPORT_SYMBOL(cl_lock_peek);
+
+/**
+ * Returns a slice within a lock, corresponding to the given layer in the
+ * device stack.
+ *
+ * \see cl_page_at()
+ */
+const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
+ const struct lu_device_type *dtype)
+{
+ const struct cl_lock_slice *slice;
+
+ LINVRNT(cl_lock_invariant_trusted(NULL, lock));
+ ENTRY;
+
+ list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
+ if (slice->cls_obj->co_lu.lo_dev->ld_type == dtype)
+ RETURN(slice);
+ }
+ RETURN(NULL);
+}
+EXPORT_SYMBOL(cl_lock_at);
+
+static void cl_lock_mutex_tail(const struct lu_env *env, struct cl_lock *lock)
+{
+ struct cl_thread_counters *counters;
+
+ counters = cl_lock_counters(env, lock);
+ lock->cll_depth++;
+ counters->ctc_nr_locks_locked++;
+ lu_ref_add(&counters->ctc_locks_locked, "cll_guard", lock);
+ cl_lock_trace(D_TRACE, env, "got mutex", lock);
+}
+
+/**
+ * Locks cl_lock object.
+ *
+ * This is used to manipulate cl_lock fields, and to serialize state
+ * transitions in the lock state machine.
+ *
+ * \post cl_lock_is_mutexed(lock)
+ *
+ * \see cl_lock_mutex_put()
+ */
+void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock)
+{
+ LINVRNT(cl_lock_invariant(env, lock));
+
+ if (lock->cll_guarder == current) {
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(lock->cll_depth > 0);
+ } else {
+ struct cl_object_header *hdr;
+ struct cl_thread_info *info;
+ int i;
+
+ LINVRNT(lock->cll_guarder != current);
+ hdr = cl_object_header(lock->cll_descr.cld_obj);
+ /*
+ * Check that mutices are taken in the bottom-to-top order.
+ */
+ info = cl_env_info(env);
+ for (i = 0; i < hdr->coh_nesting; ++i)
+ LASSERT(info->clt_counters[i].ctc_nr_locks_locked == 0);
+ mutex_lock_nested(&lock->cll_guard, hdr->coh_nesting);
+ lock->cll_guarder = current;
+ LINVRNT(lock->cll_depth == 0);
+ }
+ cl_lock_mutex_tail(env, lock);
+}
+EXPORT_SYMBOL(cl_lock_mutex_get);
+
+/**
+ * Try-locks cl_lock object.
+ *
+ * \retval 0 \a lock was successfully locked
+ *
+ * \retval -EBUSY \a lock cannot be locked right now
+ *
+ * \post ergo(result == 0, cl_lock_is_mutexed(lock))
+ *
+ * \see cl_lock_mutex_get()
+ */
+int cl_lock_mutex_try(const struct lu_env *env, struct cl_lock *lock)
+{
+ int result;
+
+ LINVRNT(cl_lock_invariant_trusted(env, lock));
+ ENTRY;
+
+ result = 0;
+ if (lock->cll_guarder == current) {
+ LINVRNT(lock->cll_depth > 0);
+ cl_lock_mutex_tail(env, lock);
+ } else if (mutex_trylock(&lock->cll_guard)) {
+ LINVRNT(lock->cll_depth == 0);
+ lock->cll_guarder = current;
+ cl_lock_mutex_tail(env, lock);
+ } else
+ result = -EBUSY;
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_lock_mutex_try);
+
+/**
+ {* Unlocks cl_lock object.
+ *
+ * \pre cl_lock_is_mutexed(lock)
+ *
+ * \see cl_lock_mutex_get()
+ */
+void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock)
+{
+ struct cl_thread_counters *counters;
+
+ LINVRNT(cl_lock_invariant(env, lock));
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(lock->cll_guarder == current);
+ LINVRNT(lock->cll_depth > 0);
+
+ counters = cl_lock_counters(env, lock);
+ LINVRNT(counters->ctc_nr_locks_locked > 0);
+
+ cl_lock_trace(D_TRACE, env, "put mutex", lock);
+ lu_ref_del(&counters->ctc_locks_locked, "cll_guard", lock);
+ counters->ctc_nr_locks_locked--;
+ if (--lock->cll_depth == 0) {
+ lock->cll_guarder = NULL;
+ mutex_unlock(&lock->cll_guard);
+ }
+}
+EXPORT_SYMBOL(cl_lock_mutex_put);
+
+/**
+ * Returns true iff lock's mutex is owned by the current thread.
+ */
+int cl_lock_is_mutexed(struct cl_lock *lock)
+{
+ return lock->cll_guarder == current;
+}
+EXPORT_SYMBOL(cl_lock_is_mutexed);
+
+/**
+ * Returns number of cl_lock mutices held by the current thread (environment).
+ */
+int cl_lock_nr_mutexed(const struct lu_env *env)
+{
+ struct cl_thread_info *info;
+ int i;
+ int locked;
+
+ /*
+ * NOTE: if summation across all nesting levels (currently 2) proves
+ * too expensive, a summary counter can be added to
+ * struct cl_thread_info.
+ */
+ info = cl_env_info(env);
+ for (i = 0, locked = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
+ locked += info->clt_counters[i].ctc_nr_locks_locked;
+ return locked;
+}
+EXPORT_SYMBOL(cl_lock_nr_mutexed);
+
+static void cl_lock_cancel0(const struct lu_env *env, struct cl_lock *lock)
+{
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+ ENTRY;
+ if (!(lock->cll_flags & CLF_CANCELLED)) {
+ const struct cl_lock_slice *slice;
+
+ lock->cll_flags |= CLF_CANCELLED;
+ list_for_each_entry_reverse(slice, &lock->cll_layers,
+ cls_linkage) {
+ if (slice->cls_ops->clo_cancel != NULL)
+ slice->cls_ops->clo_cancel(env, slice);
+ }
+ }
+ EXIT;
+}
+
+static void cl_lock_delete0(const struct lu_env *env, struct cl_lock *lock)
+{
+ struct cl_object_header *head;
+ const struct cl_lock_slice *slice;
+
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+
+ ENTRY;
+ if (lock->cll_state < CLS_FREEING) {
+ LASSERT(lock->cll_state != CLS_INTRANSIT);
+ cl_lock_state_set(env, lock, CLS_FREEING);
+
+ head = cl_object_header(lock->cll_descr.cld_obj);
+
+ spin_lock(&head->coh_lock_guard);
+ list_del_init(&lock->cll_linkage);
+ spin_unlock(&head->coh_lock_guard);
+
+ /*
+ * From now on, no new references to this lock can be acquired
+ * by cl_lock_lookup().
+ */
+ list_for_each_entry_reverse(slice, &lock->cll_layers,
+ cls_linkage) {
+ if (slice->cls_ops->clo_delete != NULL)
+ slice->cls_ops->clo_delete(env, slice);
+ }
+ /*
+ * From now on, no new references to this lock can be acquired
+ * by layer-specific means (like a pointer from struct
+ * ldlm_lock in osc, or a pointer from top-lock to sub-lock in
+ * lov).
+ *
+ * Lock will be finally freed in cl_lock_put() when last of
+ * existing references goes away.
+ */
+ }
+ EXIT;
+}
+
+/**
+ * Mod(ifie)s cl_lock::cll_holds counter for a given lock. Also, for a
+ * top-lock (nesting == 0) accounts for this modification in the per-thread
+ * debugging counters. Sub-lock holds can be released by a thread different
+ * from one that acquired it.
+ */
+static void cl_lock_hold_mod(const struct lu_env *env, struct cl_lock *lock,
+ int delta)
+{
+ struct cl_thread_counters *counters;
+ enum clt_nesting_level nesting;
+
+ lock->cll_holds += delta;
+ nesting = cl_lock_nesting(lock);
+ if (nesting == CNL_TOP) {
+ counters = &cl_env_info(env)->clt_counters[CNL_TOP];
+ counters->ctc_nr_held += delta;
+ LASSERT(counters->ctc_nr_held >= 0);
+ }
+}
+
+/**
+ * Mod(ifie)s cl_lock::cll_users counter for a given lock. See
+ * cl_lock_hold_mod() for the explanation of the debugging code.
+ */
+static void cl_lock_used_mod(const struct lu_env *env, struct cl_lock *lock,
+ int delta)
+{
+ struct cl_thread_counters *counters;
+ enum clt_nesting_level nesting;
+
+ lock->cll_users += delta;
+ nesting = cl_lock_nesting(lock);
+ if (nesting == CNL_TOP) {
+ counters = &cl_env_info(env)->clt_counters[CNL_TOP];
+ counters->ctc_nr_used += delta;
+ LASSERT(counters->ctc_nr_used >= 0);
+ }
+}
+
+void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
+ const char *scope, const void *source)
+{
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+ LASSERT(lock->cll_holds > 0);
+
+ ENTRY;
+ cl_lock_trace(D_DLMTRACE, env, "hold release lock", lock);
+ lu_ref_del(&lock->cll_holders, scope, source);
+ cl_lock_hold_mod(env, lock, -1);
+ if (lock->cll_holds == 0) {
+ CL_LOCK_ASSERT(lock->cll_state != CLS_HELD, env, lock);
+ if (lock->cll_descr.cld_mode == CLM_PHANTOM ||
+ lock->cll_descr.cld_mode == CLM_GROUP ||
+ lock->cll_state != CLS_CACHED)
+ /*
+ * If lock is still phantom or grouplock when user is
+ * done with it---destroy the lock.
+ */
+ lock->cll_flags |= CLF_CANCELPEND|CLF_DOOMED;
+ if (lock->cll_flags & CLF_CANCELPEND) {
+ lock->cll_flags &= ~CLF_CANCELPEND;
+ cl_lock_cancel0(env, lock);
+ }
+ if (lock->cll_flags & CLF_DOOMED) {
+ /* no longer doomed: it's dead... Jim. */
+ lock->cll_flags &= ~CLF_DOOMED;
+ cl_lock_delete0(env, lock);
+ }
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lock_hold_release);
+
+/**
+ * Waits until lock state is changed.
+ *
+ * This function is called with cl_lock mutex locked, atomically releases
+ * mutex and goes to sleep, waiting for a lock state change (signaled by
+ * cl_lock_signal()), and re-acquires the mutex before return.
+ *
+ * This function is used to wait until lock state machine makes some progress
+ * and to emulate synchronous operations on top of asynchronous lock
+ * interface.
+ *
+ * \retval -EINTR wait was interrupted
+ *
+ * \retval 0 wait wasn't interrupted
+ *
+ * \pre cl_lock_is_mutexed(lock)
+ *
+ * \see cl_lock_signal()
+ */
+int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock)
+{
+ wait_queue_t waiter;
+ sigset_t blocked;
+ int result;
+
+ ENTRY;
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+ LASSERT(lock->cll_depth == 1);
+ LASSERT(lock->cll_state != CLS_FREEING); /* too late to wait */
+
+ cl_lock_trace(D_DLMTRACE, env, "state wait lock", lock);
+ result = lock->cll_error;
+ if (result == 0) {
+ /* To avoid being interrupted by the 'non-fatal' signals
+ * (SIGCHLD, for instance), we'd block them temporarily.
+ * LU-305 */
+ blocked = cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
+
+ init_waitqueue_entry_current(&waiter);
+ add_wait_queue(&lock->cll_wq, &waiter);
+ set_current_state(TASK_INTERRUPTIBLE);
+ cl_lock_mutex_put(env, lock);
+
+ LASSERT(cl_lock_nr_mutexed(env) == 0);
+
+ /* Returning ERESTARTSYS instead of EINTR so syscalls
+ * can be restarted if signals are pending here */
+ result = -ERESTARTSYS;
+ if (likely(!OBD_FAIL_CHECK(OBD_FAIL_LOCK_STATE_WAIT_INTR))) {
+ waitq_wait(&waiter, TASK_INTERRUPTIBLE);
+ if (!cfs_signal_pending())
+ result = 0;
+ }
+
+ cl_lock_mutex_get(env, lock);
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&lock->cll_wq, &waiter);
+
+ /* Restore old blocked signals */
+ cfs_restore_sigs(blocked);
+ }
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_lock_state_wait);
+
+static void cl_lock_state_signal(const struct lu_env *env, struct cl_lock *lock,
+ enum cl_lock_state state)
+{
+ const struct cl_lock_slice *slice;
+
+ ENTRY;
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+
+ list_for_each_entry(slice, &lock->cll_layers, cls_linkage)
+ if (slice->cls_ops->clo_state != NULL)
+ slice->cls_ops->clo_state(env, slice, state);
+ wake_up_all(&lock->cll_wq);
+ EXIT;
+}
+
+/**
+ * Notifies waiters that lock state changed.
+ *
+ * Wakes up all waiters sleeping in cl_lock_state_wait(), also notifies all
+ * layers about state change by calling cl_lock_operations::clo_state()
+ * top-to-bottom.
+ */
+void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock)
+{
+ ENTRY;
+ cl_lock_trace(D_DLMTRACE, env, "state signal lock", lock);
+ cl_lock_state_signal(env, lock, lock->cll_state);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lock_signal);
+
+/**
+ * Changes lock state.
+ *
+ * This function is invoked to notify layers that lock state changed, possible
+ * as a result of an asynchronous event such as call-back reception.
+ *
+ * \post lock->cll_state == state
+ *
+ * \see cl_lock_operations::clo_state()
+ */
+void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock,
+ enum cl_lock_state state)
+{
+ ENTRY;
+ LASSERT(lock->cll_state <= state ||
+ (lock->cll_state == CLS_CACHED &&
+ (state == CLS_HELD || /* lock found in cache */
+ state == CLS_NEW || /* sub-lock canceled */
+ state == CLS_INTRANSIT)) ||
+ /* lock is in transit state */
+ lock->cll_state == CLS_INTRANSIT);
+
+ if (lock->cll_state != state) {
+ CS_LOCKSTATE_DEC(lock->cll_descr.cld_obj, lock->cll_state);
+ CS_LOCKSTATE_INC(lock->cll_descr.cld_obj, state);
+
+ cl_lock_state_signal(env, lock, state);
+ lock->cll_state = state;
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lock_state_set);
+
+static int cl_unuse_try_internal(const struct lu_env *env, struct cl_lock *lock)
+{
+ const struct cl_lock_slice *slice;
+ int result;
+
+ do {
+ result = 0;
+
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+ LASSERT(lock->cll_state == CLS_INTRANSIT);
+
+ result = -ENOSYS;
+ list_for_each_entry_reverse(slice, &lock->cll_layers,
+ cls_linkage) {
+ if (slice->cls_ops->clo_unuse != NULL) {
+ result = slice->cls_ops->clo_unuse(env, slice);
+ if (result != 0)
+ break;
+ }
+ }
+ LASSERT(result != -ENOSYS);
+ } while (result == CLO_REPEAT);
+
+ return result;
+}
+
+/**
+ * Yanks lock from the cache (cl_lock_state::CLS_CACHED state) by calling
+ * cl_lock_operations::clo_use() top-to-bottom to notify layers.
+ * @atomic = 1, it must unuse the lock to recovery the lock to keep the
+ * use process atomic
+ */
+int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic)
+{
+ const struct cl_lock_slice *slice;
+ int result;
+ enum cl_lock_state state;
+
+ ENTRY;
+ cl_lock_trace(D_DLMTRACE, env, "use lock", lock);
+
+ LASSERT(lock->cll_state == CLS_CACHED);
+ if (lock->cll_error)
+ RETURN(lock->cll_error);
+
+ result = -ENOSYS;
+ state = cl_lock_intransit(env, lock);
+ list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
+ if (slice->cls_ops->clo_use != NULL) {
+ result = slice->cls_ops->clo_use(env, slice);
+ if (result != 0)
+ break;
+ }
+ }
+ LASSERT(result != -ENOSYS);
+
+ LASSERTF(lock->cll_state == CLS_INTRANSIT, "Wrong state %d.\n",
+ lock->cll_state);
+
+ if (result == 0) {
+ state = CLS_HELD;
+ } else {
+ if (result == -ESTALE) {
+ /*
+ * ESTALE means sublock being cancelled
+ * at this time, and set lock state to
+ * be NEW here and ask the caller to repeat.
+ */
+ state = CLS_NEW;
+ result = CLO_REPEAT;
+ }
+
+ /* @atomic means back-off-on-failure. */
+ if (atomic) {
+ int rc;
+ rc = cl_unuse_try_internal(env, lock);
+ /* Vet the results. */
+ if (rc < 0 && result > 0)
+ result = rc;
+ }
+
+ }
+ cl_lock_extransit(env, lock, state);
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_use_try);
+
+/**
+ * Helper for cl_enqueue_try() that calls ->clo_enqueue() across all layers
+ * top-to-bottom.
+ */
+static int cl_enqueue_kick(const struct lu_env *env,
+ struct cl_lock *lock,
+ struct cl_io *io, __u32 flags)
+{
+ int result;
+ const struct cl_lock_slice *slice;
+
+ ENTRY;
+ result = -ENOSYS;
+ list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
+ if (slice->cls_ops->clo_enqueue != NULL) {
+ result = slice->cls_ops->clo_enqueue(env,
+ slice, io, flags);
+ if (result != 0)
+ break;
+ }
+ }
+ LASSERT(result != -ENOSYS);
+ RETURN(result);
+}
+
+/**
+ * Tries to enqueue a lock.
+ *
+ * This function is called repeatedly by cl_enqueue() until either lock is
+ * enqueued, or error occurs. This function does not block waiting for
+ * networking communication to complete.
+ *
+ * \post ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
+ * lock->cll_state == CLS_HELD)
+ *
+ * \see cl_enqueue() cl_lock_operations::clo_enqueue()
+ * \see cl_lock_state::CLS_ENQUEUED
+ */
+int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
+ struct cl_io *io, __u32 flags)
+{
+ int result;
+
+ ENTRY;
+ cl_lock_trace(D_DLMTRACE, env, "enqueue lock", lock);
+ do {
+ LINVRNT(cl_lock_is_mutexed(lock));
+
+ result = lock->cll_error;
+ if (result != 0)
+ break;
+
+ switch (lock->cll_state) {
+ case CLS_NEW:
+ cl_lock_state_set(env, lock, CLS_QUEUING);
+ /* fall-through */
+ case CLS_QUEUING:
+ /* kick layers. */
+ result = cl_enqueue_kick(env, lock, io, flags);
+ /* For AGL case, the cl_lock::cll_state may
+ * become CLS_HELD already. */
+ if (result == 0 && lock->cll_state == CLS_QUEUING)
+ cl_lock_state_set(env, lock, CLS_ENQUEUED);
+ break;
+ case CLS_INTRANSIT:
+ LASSERT(cl_lock_is_intransit(lock));
+ result = CLO_WAIT;
+ break;
+ case CLS_CACHED:
+ /* yank lock from the cache. */
+ result = cl_use_try(env, lock, 0);
+ break;
+ case CLS_ENQUEUED:
+ case CLS_HELD:
+ result = 0;
+ break;
+ default:
+ case CLS_FREEING:
+ /*
+ * impossible, only held locks with increased
+ * ->cll_holds can be enqueued, and they cannot be
+ * freed.
+ */
+ LBUG();
+ }
+ } while (result == CLO_REPEAT);
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_enqueue_try);
+
+/**
+ * Cancel the conflicting lock found during previous enqueue.
+ *
+ * \retval 0 conflicting lock has been canceled.
+ * \retval -ve error code.
+ */
+int cl_lock_enqueue_wait(const struct lu_env *env,
+ struct cl_lock *lock,
+ int keep_mutex)
+{
+ struct cl_lock *conflict;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(cl_lock_is_mutexed(lock));
+ LASSERT(lock->cll_state == CLS_QUEUING);
+ LASSERT(lock->cll_conflict != NULL);
+
+ conflict = lock->cll_conflict;
+ lock->cll_conflict = NULL;
+
+ cl_lock_mutex_put(env, lock);
+ LASSERT(cl_lock_nr_mutexed(env) == 0);
+
+ cl_lock_mutex_get(env, conflict);
+ cl_lock_trace(D_DLMTRACE, env, "enqueue wait", conflict);
+ cl_lock_cancel(env, conflict);
+ cl_lock_delete(env, conflict);
+
+ while (conflict->cll_state != CLS_FREEING) {
+ rc = cl_lock_state_wait(env, conflict);
+ if (rc != 0)
+ break;
+ }
+ cl_lock_mutex_put(env, conflict);
+ lu_ref_del(&conflict->cll_reference, "cancel-wait", lock);
+ cl_lock_put(env, conflict);
+
+ if (keep_mutex)
+ cl_lock_mutex_get(env, lock);
+
+ LASSERT(rc <= 0);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(cl_lock_enqueue_wait);
+
+static int cl_enqueue_locked(const struct lu_env *env, struct cl_lock *lock,
+ struct cl_io *io, __u32 enqflags)
+{
+ int result;
+
+ ENTRY;
+
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+ LASSERT(lock->cll_holds > 0);
+
+ cl_lock_user_add(env, lock);
+ do {
+ result = cl_enqueue_try(env, lock, io, enqflags);
+ if (result == CLO_WAIT) {
+ if (lock->cll_conflict != NULL)
+ result = cl_lock_enqueue_wait(env, lock, 1);
+ else
+ result = cl_lock_state_wait(env, lock);
+ if (result == 0)
+ continue;
+ }
+ break;
+ } while (1);
+ if (result != 0)
+ cl_unuse_try(env, lock);
+ LASSERT(ergo(result == 0 && !(enqflags & CEF_AGL),
+ lock->cll_state == CLS_ENQUEUED ||
+ lock->cll_state == CLS_HELD));
+ RETURN(result);
+}
+
+/**
+ * Enqueues a lock.
+ *
+ * \pre current thread or io owns a hold on lock.
+ *
+ * \post ergo(result == 0, lock->users increased)
+ * \post ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
+ * lock->cll_state == CLS_HELD)
+ */
+int cl_enqueue(const struct lu_env *env, struct cl_lock *lock,
+ struct cl_io *io, __u32 enqflags)
+{
+ int result;
+
+ ENTRY;
+
+ cl_lock_lockdep_acquire(env, lock, enqflags);
+ cl_lock_mutex_get(env, lock);
+ result = cl_enqueue_locked(env, lock, io, enqflags);
+ cl_lock_mutex_put(env, lock);
+ if (result != 0)
+ cl_lock_lockdep_release(env, lock);
+ LASSERT(ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
+ lock->cll_state == CLS_HELD));
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_enqueue);
+
+/**
+ * Tries to unlock a lock.
+ *
+ * This function is called to release underlying resource:
+ * 1. for top lock, the resource is sublocks it held;
+ * 2. for sublock, the resource is the reference to dlmlock.
+ *
+ * cl_unuse_try is a one-shot operation, so it must NOT return CLO_WAIT.
+ *
+ * \see cl_unuse() cl_lock_operations::clo_unuse()
+ * \see cl_lock_state::CLS_CACHED
+ */
+int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock)
+{
+ int result;
+ enum cl_lock_state state = CLS_NEW;
+
+ ENTRY;
+ cl_lock_trace(D_DLMTRACE, env, "unuse lock", lock);
+
+ if (lock->cll_users > 1) {
+ cl_lock_user_del(env, lock);
+ RETURN(0);
+ }
+
+ /* Only if the lock is in CLS_HELD or CLS_ENQUEUED state, it can hold
+ * underlying resources. */
+ if (!(lock->cll_state == CLS_HELD || lock->cll_state == CLS_ENQUEUED)) {
+ cl_lock_user_del(env, lock);
+ RETURN(0);
+ }
+
+ /*
+ * New lock users (->cll_users) are not protecting unlocking
+ * from proceeding. From this point, lock eventually reaches
+ * CLS_CACHED, is reinitialized to CLS_NEW or fails into
+ * CLS_FREEING.
+ */
+ state = cl_lock_intransit(env, lock);
+
+ result = cl_unuse_try_internal(env, lock);
+ LASSERT(lock->cll_state == CLS_INTRANSIT);
+ LASSERT(result != CLO_WAIT);
+ cl_lock_user_del(env, lock);
+ if (result == 0 || result == -ESTALE) {
+ /*
+ * Return lock back to the cache. This is the only
+ * place where lock is moved into CLS_CACHED state.
+ *
+ * If one of ->clo_unuse() methods returned -ESTALE, lock
+ * cannot be placed into cache and has to be
+ * re-initialized. This happens e.g., when a sub-lock was
+ * canceled while unlocking was in progress.
+ */
+ if (state == CLS_HELD && result == 0)
+ state = CLS_CACHED;
+ else
+ state = CLS_NEW;
+ cl_lock_extransit(env, lock, state);
+
+ /*
+ * Hide -ESTALE error.
+ * If the lock is a glimpse lock, and it has multiple
+ * stripes. Assuming that one of its sublock returned -ENAVAIL,
+ * and other sublocks are matched write locks. In this case,
+ * we can't set this lock to error because otherwise some of
+ * its sublocks may not be canceled. This causes some dirty
+ * pages won't be written to OSTs. -jay
+ */
+ result = 0;
+ } else {
+ CERROR("result = %d, this is unlikely!\n", result);
+ state = CLS_NEW;
+ cl_lock_extransit(env, lock, state);
+ }
+ RETURN(result ?: lock->cll_error);
+}
+EXPORT_SYMBOL(cl_unuse_try);
+
+static void cl_unuse_locked(const struct lu_env *env, struct cl_lock *lock)
+{
+ int result;
+ ENTRY;
+
+ result = cl_unuse_try(env, lock);
+ if (result)
+ CL_LOCK_DEBUG(D_ERROR, env, lock, "unuse return %d\n", result);
+
+ EXIT;
+}
+
+/**
+ * Unlocks a lock.
+ */
+void cl_unuse(const struct lu_env *env, struct cl_lock *lock)
+{
+ ENTRY;
+ cl_lock_mutex_get(env, lock);
+ cl_unuse_locked(env, lock);
+ cl_lock_mutex_put(env, lock);
+ cl_lock_lockdep_release(env, lock);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_unuse);
+
+/**
+ * Tries to wait for a lock.
+ *
+ * This function is called repeatedly by cl_wait() until either lock is
+ * granted, or error occurs. This function does not block waiting for network
+ * communication to complete.
+ *
+ * \see cl_wait() cl_lock_operations::clo_wait()
+ * \see cl_lock_state::CLS_HELD
+ */
+int cl_wait_try(const struct lu_env *env, struct cl_lock *lock)
+{
+ const struct cl_lock_slice *slice;
+ int result;
+
+ ENTRY;
+ cl_lock_trace(D_DLMTRACE, env, "wait lock try", lock);
+ do {
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+ LASSERTF(lock->cll_state == CLS_QUEUING ||
+ lock->cll_state == CLS_ENQUEUED ||
+ lock->cll_state == CLS_HELD ||
+ lock->cll_state == CLS_INTRANSIT,
+ "lock state: %d\n", lock->cll_state);
+ LASSERT(lock->cll_users > 0);
+ LASSERT(lock->cll_holds > 0);
+
+ result = lock->cll_error;
+ if (result != 0)
+ break;
+
+ if (cl_lock_is_intransit(lock)) {
+ result = CLO_WAIT;
+ break;
+ }
+
+ if (lock->cll_state == CLS_HELD)
+ /* nothing to do */
+ break;
+
+ result = -ENOSYS;
+ list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
+ if (slice->cls_ops->clo_wait != NULL) {
+ result = slice->cls_ops->clo_wait(env, slice);
+ if (result != 0)
+ break;
+ }
+ }
+ LASSERT(result != -ENOSYS);
+ if (result == 0) {
+ LASSERT(lock->cll_state != CLS_INTRANSIT);
+ cl_lock_state_set(env, lock, CLS_HELD);
+ }
+ } while (result == CLO_REPEAT);
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_wait_try);
+
+/**
+ * Waits until enqueued lock is granted.
+ *
+ * \pre current thread or io owns a hold on the lock
+ * \pre ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
+ * lock->cll_state == CLS_HELD)
+ *
+ * \post ergo(result == 0, lock->cll_state == CLS_HELD)
+ */
+int cl_wait(const struct lu_env *env, struct cl_lock *lock)
+{
+ int result;
+
+ ENTRY;
+ cl_lock_mutex_get(env, lock);
+
+ LINVRNT(cl_lock_invariant(env, lock));
+ LASSERTF(lock->cll_state == CLS_ENQUEUED || lock->cll_state == CLS_HELD,
+ "Wrong state %d \n", lock->cll_state);
+ LASSERT(lock->cll_holds > 0);
+
+ do {
+ result = cl_wait_try(env, lock);
+ if (result == CLO_WAIT) {
+ result = cl_lock_state_wait(env, lock);
+ if (result == 0)
+ continue;
+ }
+ break;
+ } while (1);
+ if (result < 0) {
+ cl_unuse_try(env, lock);
+ cl_lock_lockdep_release(env, lock);
+ }
+ cl_lock_trace(D_DLMTRACE, env, "wait lock", lock);
+ cl_lock_mutex_put(env, lock);
+ LASSERT(ergo(result == 0, lock->cll_state == CLS_HELD));
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_wait);
+
+/**
+ * Executes cl_lock_operations::clo_weigh(), and sums results to estimate lock
+ * value.
+ */
+unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock)
+{
+ const struct cl_lock_slice *slice;
+ unsigned long pound;
+ unsigned long ounce;
+
+ ENTRY;
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+
+ pound = 0;
+ list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
+ if (slice->cls_ops->clo_weigh != NULL) {
+ ounce = slice->cls_ops->clo_weigh(env, slice);
+ pound += ounce;
+ if (pound < ounce) /* over-weight^Wflow */
+ pound = ~0UL;
+ }
+ }
+ RETURN(pound);
+}
+EXPORT_SYMBOL(cl_lock_weigh);
+
+/**
+ * Notifies layers that lock description changed.
+ *
+ * The server can grant client a lock different from one that was requested
+ * (e.g., larger in extent). This method is called when actually granted lock
+ * description becomes known to let layers to accommodate for changed lock
+ * description.
+ *
+ * \see cl_lock_operations::clo_modify()
+ */
+int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock,
+ const struct cl_lock_descr *desc)
+{
+ const struct cl_lock_slice *slice;
+ struct cl_object *obj = lock->cll_descr.cld_obj;
+ struct cl_object_header *hdr = cl_object_header(obj);
+ int result;
+
+ ENTRY;
+ cl_lock_trace(D_DLMTRACE, env, "modify lock", lock);
+ /* don't allow object to change */
+ LASSERT(obj == desc->cld_obj);
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+
+ list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
+ if (slice->cls_ops->clo_modify != NULL) {
+ result = slice->cls_ops->clo_modify(env, slice, desc);
+ if (result != 0)
+ RETURN(result);
+ }
+ }
+ CL_LOCK_DEBUG(D_DLMTRACE, env, lock, " -> "DDESCR"@"DFID"\n",
+ PDESCR(desc), PFID(lu_object_fid(&desc->cld_obj->co_lu)));
+ /*
+ * Just replace description in place. Nothing more is needed for
+ * now. If locks were indexed according to their extent and/or mode,
+ * that index would have to be updated here.
+ */
+ spin_lock(&hdr->coh_lock_guard);
+ lock->cll_descr = *desc;
+ spin_unlock(&hdr->coh_lock_guard);
+ RETURN(0);
+}
+EXPORT_SYMBOL(cl_lock_modify);
+
+/**
+ * Initializes lock closure with a given origin.
+ *
+ * \see cl_lock_closure
+ */
+void cl_lock_closure_init(const struct lu_env *env,
+ struct cl_lock_closure *closure,
+ struct cl_lock *origin, int wait)
+{
+ LINVRNT(cl_lock_is_mutexed(origin));
+ LINVRNT(cl_lock_invariant(env, origin));
+
+ INIT_LIST_HEAD(&closure->clc_list);
+ closure->clc_origin = origin;
+ closure->clc_wait = wait;
+ closure->clc_nr = 0;
+}
+EXPORT_SYMBOL(cl_lock_closure_init);
+
+/**
+ * Builds a closure of \a lock.
+ *
+ * Building of a closure consists of adding initial lock (\a lock) into it,
+ * and calling cl_lock_operations::clo_closure() methods of \a lock. These
+ * methods might call cl_lock_closure_build() recursively again, adding more
+ * locks to the closure, etc.
+ *
+ * \see cl_lock_closure
+ */
+int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
+ struct cl_lock_closure *closure)
+{
+ const struct cl_lock_slice *slice;
+ int result;
+
+ ENTRY;
+ LINVRNT(cl_lock_is_mutexed(closure->clc_origin));
+ LINVRNT(cl_lock_invariant(env, closure->clc_origin));
+
+ result = cl_lock_enclosure(env, lock, closure);
+ if (result == 0) {
+ list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
+ if (slice->cls_ops->clo_closure != NULL) {
+ result = slice->cls_ops->clo_closure(env, slice,
+ closure);
+ if (result != 0)
+ break;
+ }
+ }
+ }
+ if (result != 0)
+ cl_lock_disclosure(env, closure);
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_lock_closure_build);
+
+/**
+ * Adds new lock to a closure.
+ *
+ * Try-locks \a lock and if succeeded, adds it to the closure (never more than
+ * once). If try-lock failed, returns CLO_REPEAT, after optionally waiting
+ * until next try-lock is likely to succeed.
+ */
+int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock,
+ struct cl_lock_closure *closure)
+{
+ int result = 0;
+ ENTRY;
+ cl_lock_trace(D_DLMTRACE, env, "enclosure lock", lock);
+ if (!cl_lock_mutex_try(env, lock)) {
+ /*
+ * If lock->cll_inclosure is not empty, lock is already in
+ * this closure.
+ */
+ if (list_empty(&lock->cll_inclosure)) {
+ cl_lock_get_trust(lock);
+ lu_ref_add(&lock->cll_reference, "closure", closure);
+ list_add(&lock->cll_inclosure, &closure->clc_list);
+ closure->clc_nr++;
+ } else
+ cl_lock_mutex_put(env, lock);
+ result = 0;
+ } else {
+ cl_lock_disclosure(env, closure);
+ if (closure->clc_wait) {
+ cl_lock_get_trust(lock);
+ lu_ref_add(&lock->cll_reference, "closure-w", closure);
+ cl_lock_mutex_put(env, closure->clc_origin);
+
+ LASSERT(cl_lock_nr_mutexed(env) == 0);
+ cl_lock_mutex_get(env, lock);
+ cl_lock_mutex_put(env, lock);
+
+ cl_lock_mutex_get(env, closure->clc_origin);
+ lu_ref_del(&lock->cll_reference, "closure-w", closure);
+ cl_lock_put(env, lock);
+ }
+ result = CLO_REPEAT;
+ }
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_lock_enclosure);
+
+/** Releases mutices of enclosed locks. */
+void cl_lock_disclosure(const struct lu_env *env,
+ struct cl_lock_closure *closure)
+{
+ struct cl_lock *scan;
+ struct cl_lock *temp;
+
+ cl_lock_trace(D_DLMTRACE, env, "disclosure lock", closure->clc_origin);
+ list_for_each_entry_safe(scan, temp, &closure->clc_list,
+ cll_inclosure){
+ list_del_init(&scan->cll_inclosure);
+ cl_lock_mutex_put(env, scan);
+ lu_ref_del(&scan->cll_reference, "closure", closure);
+ cl_lock_put(env, scan);
+ closure->clc_nr--;
+ }
+ LASSERT(closure->clc_nr == 0);
+}
+EXPORT_SYMBOL(cl_lock_disclosure);
+
+/** Finalizes a closure. */
+void cl_lock_closure_fini(struct cl_lock_closure *closure)
+{
+ LASSERT(closure->clc_nr == 0);
+ LASSERT(list_empty(&closure->clc_list));
+}
+EXPORT_SYMBOL(cl_lock_closure_fini);
+
+/**
+ * Destroys this lock. Notifies layers (bottom-to-top) that lock is being
+ * destroyed, then destroy the lock. If there are holds on the lock, postpone
+ * destruction until all holds are released. This is called when a decision is
+ * made to destroy the lock in the future. E.g., when a blocking AST is
+ * received on it, or fatal communication error happens.
+ *
+ * Caller must have a reference on this lock to prevent a situation, when
+ * deleted lock lingers in memory for indefinite time, because nobody calls
+ * cl_lock_put() to finish it.
+ *
+ * \pre atomic_read(&lock->cll_ref) > 0
+ * \pre ergo(cl_lock_nesting(lock) == CNL_TOP,
+ * cl_lock_nr_mutexed(env) == 1)
+ * [i.e., if a top-lock is deleted, mutices of no other locks can be
+ * held, as deletion of sub-locks might require releasing a top-lock
+ * mutex]
+ *
+ * \see cl_lock_operations::clo_delete()
+ * \see cl_lock::cll_holds
+ */
+void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock)
+{
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+ LASSERT(ergo(cl_lock_nesting(lock) == CNL_TOP,
+ cl_lock_nr_mutexed(env) == 1));
+
+ ENTRY;
+ cl_lock_trace(D_DLMTRACE, env, "delete lock", lock);
+ if (lock->cll_holds == 0)
+ cl_lock_delete0(env, lock);
+ else
+ lock->cll_flags |= CLF_DOOMED;
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lock_delete);
+
+/**
+ * Mark lock as irrecoverably failed, and mark it for destruction. This
+ * happens when, e.g., server fails to grant a lock to us, or networking
+ * time-out happens.
+ *
+ * \pre atomic_read(&lock->cll_ref) > 0
+ *
+ * \see clo_lock_delete()
+ * \see cl_lock::cll_holds
+ */
+void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error)
+{
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+
+ ENTRY;
+ if (lock->cll_error == 0 && error != 0) {
+ cl_lock_trace(D_DLMTRACE, env, "set lock error", lock);
+ lock->cll_error = error;
+ cl_lock_signal(env, lock);
+ cl_lock_cancel(env, lock);
+ cl_lock_delete(env, lock);
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lock_error);
+
+/**
+ * Cancels this lock. Notifies layers
+ * (bottom-to-top) that lock is being cancelled, then destroy the lock. If
+ * there are holds on the lock, postpone cancellation until
+ * all holds are released.
+ *
+ * Cancellation notification is delivered to layers at most once.
+ *
+ * \see cl_lock_operations::clo_cancel()
+ * \see cl_lock::cll_holds
+ */
+void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock)
+{
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+
+ ENTRY;
+ cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock);
+ if (lock->cll_holds == 0)
+ cl_lock_cancel0(env, lock);
+ else
+ lock->cll_flags |= CLF_CANCELPEND;
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lock_cancel);
+
+/**
+ * Finds an existing lock covering given index and optionally different from a
+ * given \a except lock.
+ */
+struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
+ struct cl_object *obj, pgoff_t index,
+ struct cl_lock *except,
+ int pending, int canceld)
+{
+ struct cl_object_header *head;
+ struct cl_lock *scan;
+ struct cl_lock *lock;
+ struct cl_lock_descr *need;
+
+ ENTRY;
+
+ head = cl_object_header(obj);
+ need = &cl_env_info(env)->clt_descr;
+ lock = NULL;
+
+ need->cld_mode = CLM_READ; /* CLM_READ matches both READ & WRITE, but
+ * not PHANTOM */
+ need->cld_start = need->cld_end = index;
+ need->cld_enq_flags = 0;
+
+ spin_lock(&head->coh_lock_guard);
+ /* It is fine to match any group lock since there could be only one
+ * with a uniq gid and it conflicts with all other lock modes too */
+ list_for_each_entry(scan, &head->coh_locks, cll_linkage) {
+ if (scan != except &&
+ (scan->cll_descr.cld_mode == CLM_GROUP ||
+ cl_lock_ext_match(&scan->cll_descr, need)) &&
+ scan->cll_state >= CLS_HELD &&
+ scan->cll_state < CLS_FREEING &&
+ /*
+ * This check is racy as the lock can be canceled right
+ * after it is done, but this is fine, because page exists
+ * already.
+ */
+ (canceld || !(scan->cll_flags & CLF_CANCELLED)) &&
+ (pending || !(scan->cll_flags & CLF_CANCELPEND))) {
+ /* Don't increase cs_hit here since this
+ * is just a helper function. */
+ cl_lock_get_trust(scan);
+ lock = scan;
+ break;
+ }
+ }
+ spin_unlock(&head->coh_lock_guard);
+ RETURN(lock);
+}
+EXPORT_SYMBOL(cl_lock_at_pgoff);
+
+/**
+ * Calculate the page offset at the layer of @lock.
+ * At the time of this writing, @page is top page and @lock is sub lock.
+ */
+static pgoff_t pgoff_at_lock(struct cl_page *page, struct cl_lock *lock)
+{
+ struct lu_device_type *dtype;
+ const struct cl_page_slice *slice;
+
+ dtype = lock->cll_descr.cld_obj->co_lu.lo_dev->ld_type;
+ slice = cl_page_at(page, dtype);
+ LASSERT(slice != NULL);
+ return slice->cpl_page->cp_index;
+}
+
+/**
+ * Check if page @page is covered by an extra lock or discard it.
+ */
+static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, void *cbdata)
+{
+ struct cl_thread_info *info = cl_env_info(env);
+ struct cl_lock *lock = cbdata;
+ pgoff_t index = pgoff_at_lock(page, lock);
+
+ if (index >= info->clt_fn_index) {
+ struct cl_lock *tmp;
+
+ /* refresh non-overlapped index */
+ tmp = cl_lock_at_pgoff(env, lock->cll_descr.cld_obj, index,
+ lock, 1, 0);
+ if (tmp != NULL) {
+ /* Cache the first-non-overlapped index so as to skip
+ * all pages within [index, clt_fn_index). This
+ * is safe because if tmp lock is canceled, it will
+ * discard these pages. */
+ info->clt_fn_index = tmp->cll_descr.cld_end + 1;
+ if (tmp->cll_descr.cld_end == CL_PAGE_EOF)
+ info->clt_fn_index = CL_PAGE_EOF;
+ cl_lock_put(env, tmp);
+ } else if (cl_page_own(env, io, page) == 0) {
+ /* discard the page */
+ cl_page_unmap(env, io, page);
+ cl_page_discard(env, io, page);
+ cl_page_disown(env, io, page);
+ } else {
+ LASSERT(page->cp_state == CPS_FREEING);
+ }
+ }
+
+ info->clt_next_index = index + 1;
+ return CLP_GANG_OKAY;
+}
+
+static int discard_cb(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, void *cbdata)
+{
+ struct cl_thread_info *info = cl_env_info(env);
+ struct cl_lock *lock = cbdata;
+
+ LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
+ KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
+ !PageWriteback(cl_page_vmpage(env, page))));
+ KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
+ !PageDirty(cl_page_vmpage(env, page))));
+
+ info->clt_next_index = pgoff_at_lock(page, lock) + 1;
+ if (cl_page_own(env, io, page) == 0) {
+ /* discard the page */
+ cl_page_unmap(env, io, page);
+ cl_page_discard(env, io, page);
+ cl_page_disown(env, io, page);
+ } else {
+ LASSERT(page->cp_state == CPS_FREEING);
+ }
+
+ return CLP_GANG_OKAY;
+}
+
+/**
+ * Discard pages protected by the given lock. This function traverses radix
+ * tree to find all covering pages and discard them. If a page is being covered
+ * by other locks, it should remain in cache.
+ *
+ * If error happens on any step, the process continues anyway (the reasoning
+ * behind this being that lock cancellation cannot be delayed indefinitely).
+ */
+int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock)
+{
+ struct cl_thread_info *info = cl_env_info(env);
+ struct cl_io *io = &info->clt_io;
+ struct cl_lock_descr *descr = &lock->cll_descr;
+ cl_page_gang_cb_t cb;
+ int res;
+ int result;
+
+ LINVRNT(cl_lock_invariant(env, lock));
+ ENTRY;
+
+ io->ci_obj = cl_object_top(descr->cld_obj);
+ io->ci_ignore_layout = 1;
+ result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+ if (result != 0)
+ GOTO(out, result);
+
+ cb = descr->cld_mode == CLM_READ ? check_and_discard_cb : discard_cb;
+ info->clt_fn_index = info->clt_next_index = descr->cld_start;
+ do {
+ res = cl_page_gang_lookup(env, descr->cld_obj, io,
+ info->clt_next_index, descr->cld_end,
+ cb, (void *)lock);
+ if (info->clt_next_index > descr->cld_end)
+ break;
+
+ if (res == CLP_GANG_RESCHED)
+ cond_resched();
+ } while (res != CLP_GANG_OKAY);
+out:
+ cl_io_fini(env, io);
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_lock_discard_pages);
+
+/**
+ * Eliminate all locks for a given object.
+ *
+ * Caller has to guarantee that no lock is in active use.
+ *
+ * \param cancel when this is set, cl_locks_prune() cancels locks before
+ * destroying.
+ */
+void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int cancel)
+{
+ struct cl_object_header *head;
+ struct cl_lock *lock;
+
+ ENTRY;
+ head = cl_object_header(obj);
+ /*
+ * If locks are destroyed without cancellation, all pages must be
+ * already destroyed (as otherwise they will be left unprotected).
+ */
+ LASSERT(ergo(!cancel,
+ head->coh_tree.rnode == NULL && head->coh_pages == 0));
+
+ spin_lock(&head->coh_lock_guard);
+ while (!list_empty(&head->coh_locks)) {
+ lock = container_of(head->coh_locks.next,
+ struct cl_lock, cll_linkage);
+ cl_lock_get_trust(lock);
+ spin_unlock(&head->coh_lock_guard);
+ lu_ref_add(&lock->cll_reference, "prune", current);
+
+again:
+ cl_lock_mutex_get(env, lock);
+ if (lock->cll_state < CLS_FREEING) {
+ LASSERT(lock->cll_users <= 1);
+ if (unlikely(lock->cll_users == 1)) {
+ struct l_wait_info lwi = { 0 };
+
+ cl_lock_mutex_put(env, lock);
+ l_wait_event(lock->cll_wq,
+ lock->cll_users == 0,
+ &lwi);
+ goto again;
+ }
+
+ if (cancel)
+ cl_lock_cancel(env, lock);
+ cl_lock_delete(env, lock);
+ }
+ cl_lock_mutex_put(env, lock);
+ lu_ref_del(&lock->cll_reference, "prune", current);
+ cl_lock_put(env, lock);
+ spin_lock(&head->coh_lock_guard);
+ }
+ spin_unlock(&head->coh_lock_guard);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_locks_prune);
+
+static struct cl_lock *cl_lock_hold_mutex(const struct lu_env *env,
+ const struct cl_io *io,
+ const struct cl_lock_descr *need,
+ const char *scope, const void *source)
+{
+ struct cl_lock *lock;
+
+ ENTRY;
+
+ while (1) {
+ lock = cl_lock_find(env, io, need);
+ if (IS_ERR(lock))
+ break;
+ cl_lock_mutex_get(env, lock);
+ if (lock->cll_state < CLS_FREEING &&
+ !(lock->cll_flags & CLF_CANCELLED)) {
+ cl_lock_hold_mod(env, lock, +1);
+ lu_ref_add(&lock->cll_holders, scope, source);
+ lu_ref_add(&lock->cll_reference, scope, source);
+ break;
+ }
+ cl_lock_mutex_put(env, lock);
+ cl_lock_put(env, lock);
+ }
+ RETURN(lock);
+}
+
+/**
+ * Returns a lock matching \a need description with a reference and a hold on
+ * it.
+ *
+ * This is much like cl_lock_find(), except that cl_lock_hold() additionally
+ * guarantees that lock is not in the CLS_FREEING state on return.
+ */
+struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
+ const struct cl_lock_descr *need,
+ const char *scope, const void *source)
+{
+ struct cl_lock *lock;
+
+ ENTRY;
+
+ lock = cl_lock_hold_mutex(env, io, need, scope, source);
+ if (!IS_ERR(lock))
+ cl_lock_mutex_put(env, lock);
+ RETURN(lock);
+}
+EXPORT_SYMBOL(cl_lock_hold);
+
+/**
+ * Main high-level entry point of cl_lock interface that finds existing or
+ * enqueues new lock matching given description.
+ */
+struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
+ const struct cl_lock_descr *need,
+ const char *scope, const void *source)
+{
+ struct cl_lock *lock;
+ int rc;
+ __u32 enqflags = need->cld_enq_flags;
+
+ ENTRY;
+ do {
+ lock = cl_lock_hold_mutex(env, io, need, scope, source);
+ if (IS_ERR(lock))
+ break;
+
+ rc = cl_enqueue_locked(env, lock, io, enqflags);
+ if (rc == 0) {
+ if (cl_lock_fits_into(env, lock, need, io)) {
+ if (!(enqflags & CEF_AGL)) {
+ cl_lock_mutex_put(env, lock);
+ cl_lock_lockdep_acquire(env, lock,
+ enqflags);
+ break;
+ }
+ rc = 1;
+ }
+ cl_unuse_locked(env, lock);
+ }
+ cl_lock_trace(D_DLMTRACE, env,
+ rc <= 0 ? "enqueue failed" : "agl succeed", lock);
+ cl_lock_hold_release(env, lock, scope, source);
+ cl_lock_mutex_put(env, lock);
+ lu_ref_del(&lock->cll_reference, scope, source);
+ cl_lock_put(env, lock);
+ if (rc > 0) {
+ LASSERT(enqflags & CEF_AGL);
+ lock = NULL;
+ } else if (rc != 0) {
+ lock = ERR_PTR(rc);
+ }
+ } while (rc == 0);
+ RETURN(lock);
+}
+EXPORT_SYMBOL(cl_lock_request);
+
+/**
+ * Adds a hold to a known lock.
+ */
+void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock,
+ const char *scope, const void *source)
+{
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+ LASSERT(lock->cll_state != CLS_FREEING);
+
+ ENTRY;
+ cl_lock_hold_mod(env, lock, +1);
+ cl_lock_get(lock);
+ lu_ref_add(&lock->cll_holders, scope, source);
+ lu_ref_add(&lock->cll_reference, scope, source);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lock_hold_add);
+
+/**
+ * Releases a hold and a reference on a lock, on which caller acquired a
+ * mutex.
+ */
+void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock,
+ const char *scope, const void *source)
+{
+ LINVRNT(cl_lock_invariant(env, lock));
+ ENTRY;
+ cl_lock_hold_release(env, lock, scope, source);
+ lu_ref_del(&lock->cll_reference, scope, source);
+ cl_lock_put(env, lock);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lock_unhold);
+
+/**
+ * Releases a hold and a reference on a lock, obtained by cl_lock_hold().
+ */
+void cl_lock_release(const struct lu_env *env, struct cl_lock *lock,
+ const char *scope, const void *source)
+{
+ LINVRNT(cl_lock_invariant(env, lock));
+ ENTRY;
+ cl_lock_trace(D_DLMTRACE, env, "release lock", lock);
+ cl_lock_mutex_get(env, lock);
+ cl_lock_hold_release(env, lock, scope, source);
+ cl_lock_mutex_put(env, lock);
+ lu_ref_del(&lock->cll_reference, scope, source);
+ cl_lock_put(env, lock);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lock_release);
+
+void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock)
+{
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+
+ ENTRY;
+ cl_lock_used_mod(env, lock, +1);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lock_user_add);
+
+void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock)
+{
+ LINVRNT(cl_lock_is_mutexed(lock));
+ LINVRNT(cl_lock_invariant(env, lock));
+ LASSERT(lock->cll_users > 0);
+
+ ENTRY;
+ cl_lock_used_mod(env, lock, -1);
+ if (lock->cll_users == 0)
+ wake_up_all(&lock->cll_wq);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lock_user_del);
+
+const char *cl_lock_mode_name(const enum cl_lock_mode mode)
+{
+ static const char *names[] = {
+ [CLM_PHANTOM] = "P",
+ [CLM_READ] = "R",
+ [CLM_WRITE] = "W",
+ [CLM_GROUP] = "G"
+ };
+ if (0 <= mode && mode < ARRAY_SIZE(names))
+ return names[mode];
+ else
+ return "U";
+}
+EXPORT_SYMBOL(cl_lock_mode_name);
+
+/**
+ * Prints human readable representation of a lock description.
+ */
+void cl_lock_descr_print(const struct lu_env *env, void *cookie,
+ lu_printer_t printer,
+ const struct cl_lock_descr *descr)
+{
+ const struct lu_fid *fid;
+
+ fid = lu_object_fid(&descr->cld_obj->co_lu);
+ (*printer)(env, cookie, DDESCR"@"DFID, PDESCR(descr), PFID(fid));
+}
+EXPORT_SYMBOL(cl_lock_descr_print);
+
+/**
+ * Prints human readable representation of \a lock to the \a f.
+ */
+void cl_lock_print(const struct lu_env *env, void *cookie,
+ lu_printer_t printer, const struct cl_lock *lock)
+{
+ const struct cl_lock_slice *slice;
+ (*printer)(env, cookie, "lock@%p[%d %d %d %d %d %08lx] ",
+ lock, atomic_read(&lock->cll_ref),
+ lock->cll_state, lock->cll_error, lock->cll_holds,
+ lock->cll_users, lock->cll_flags);
+ cl_lock_descr_print(env, cookie, printer, &lock->cll_descr);
+ (*printer)(env, cookie, " {\n");
+
+ list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
+ (*printer)(env, cookie, " %s@%p: ",
+ slice->cls_obj->co_lu.lo_dev->ld_type->ldt_name,
+ slice);
+ if (slice->cls_ops->clo_print != NULL)
+ slice->cls_ops->clo_print(env, cookie, printer, slice);
+ (*printer)(env, cookie, "\n");
+ }
+ (*printer)(env, cookie, "} lock@%p\n", lock);
+}
+EXPORT_SYMBOL(cl_lock_print);
+
+int cl_lock_init(void)
+{
+ return lu_kmem_init(cl_lock_caches);
+}
+
+void cl_lock_fini(void)
+{
+ lu_kmem_fini(cl_lock_caches);
+}
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c
new file mode 100644
index 000000000000..cdb5fba04591
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c
@@ -0,0 +1,1148 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Client Lustre Object.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+/*
+ * Locking.
+ *
+ * i_mutex
+ * PG_locked
+ * ->coh_page_guard
+ * ->coh_lock_guard
+ * ->coh_attr_guard
+ * ->ls_guard
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/libcfs/libcfs.h>
+/* class_put_type() */
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_fid.h>
+#include <linux/list.h>
+#include <linux/libcfs/libcfs_hash.h> /* for cfs_hash stuff */
+#include <cl_object.h>
+#include "cl_internal.h"
+
+static struct kmem_cache *cl_env_kmem;
+
+/** Lock class of cl_object_header::coh_page_guard */
+static struct lock_class_key cl_page_guard_class;
+/** Lock class of cl_object_header::coh_lock_guard */
+static struct lock_class_key cl_lock_guard_class;
+/** Lock class of cl_object_header::coh_attr_guard */
+static struct lock_class_key cl_attr_guard_class;
+
+extern __u32 lu_context_tags_default;
+extern __u32 lu_session_tags_default;
+/**
+ * Initialize cl_object_header.
+ */
+int cl_object_header_init(struct cl_object_header *h)
+{
+ int result;
+
+ ENTRY;
+ result = lu_object_header_init(&h->coh_lu);
+ if (result == 0) {
+ spin_lock_init(&h->coh_page_guard);
+ spin_lock_init(&h->coh_lock_guard);
+ spin_lock_init(&h->coh_attr_guard);
+ lockdep_set_class(&h->coh_page_guard, &cl_page_guard_class);
+ lockdep_set_class(&h->coh_lock_guard, &cl_lock_guard_class);
+ lockdep_set_class(&h->coh_attr_guard, &cl_attr_guard_class);
+ h->coh_pages = 0;
+ /* XXX hard coded GFP_* mask. */
+ INIT_RADIX_TREE(&h->coh_tree, GFP_ATOMIC);
+ INIT_LIST_HEAD(&h->coh_locks);
+ h->coh_page_bufsize = ALIGN(sizeof(struct cl_page), 8);
+ }
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_object_header_init);
+
+/**
+ * Finalize cl_object_header.
+ */
+void cl_object_header_fini(struct cl_object_header *h)
+{
+ LASSERT(list_empty(&h->coh_locks));
+ lu_object_header_fini(&h->coh_lu);
+}
+EXPORT_SYMBOL(cl_object_header_fini);
+
+/**
+ * Returns a cl_object with a given \a fid.
+ *
+ * Returns either cached or newly created object. Additional reference on the
+ * returned object is acquired.
+ *
+ * \see lu_object_find(), cl_page_find(), cl_lock_find()
+ */
+struct cl_object *cl_object_find(const struct lu_env *env,
+ struct cl_device *cd, const struct lu_fid *fid,
+ const struct cl_object_conf *c)
+{
+ might_sleep();
+ return lu2cl(lu_object_find_slice(env, cl2lu_dev(cd), fid, &c->coc_lu));
+}
+EXPORT_SYMBOL(cl_object_find);
+
+/**
+ * Releases a reference on \a o.
+ *
+ * When last reference is released object is returned to the cache, unless
+ * lu_object_header_flags::LU_OBJECT_HEARD_BANSHEE bit is set in its header.
+ *
+ * \see cl_page_put(), cl_lock_put().
+ */
+void cl_object_put(const struct lu_env *env, struct cl_object *o)
+{
+ lu_object_put(env, &o->co_lu);
+}
+EXPORT_SYMBOL(cl_object_put);
+
+/**
+ * Acquire an additional reference to the object \a o.
+ *
+ * This can only be used to acquire _additional_ reference, i.e., caller
+ * already has to possess at least one reference to \a o before calling this.
+ *
+ * \see cl_page_get(), cl_lock_get().
+ */
+void cl_object_get(struct cl_object *o)
+{
+ lu_object_get(&o->co_lu);
+}
+EXPORT_SYMBOL(cl_object_get);
+
+/**
+ * Returns the top-object for a given \a o.
+ *
+ * \see cl_page_top(), cl_io_top()
+ */
+struct cl_object *cl_object_top(struct cl_object *o)
+{
+ struct cl_object_header *hdr = cl_object_header(o);
+ struct cl_object *top;
+
+ while (hdr->coh_parent != NULL)
+ hdr = hdr->coh_parent;
+
+ top = lu2cl(lu_object_top(&hdr->coh_lu));
+ CDEBUG(D_TRACE, "%p -> %p\n", o, top);
+ return top;
+}
+EXPORT_SYMBOL(cl_object_top);
+
+/**
+ * Returns pointer to the lock protecting data-attributes for the given object
+ * \a o.
+ *
+ * Data-attributes are protected by the cl_object_header::coh_attr_guard
+ * spin-lock in the top-object.
+ *
+ * \see cl_attr, cl_object_attr_lock(), cl_object_operations::coo_attr_get().
+ */
+static spinlock_t *cl_object_attr_guard(struct cl_object *o)
+{
+ return &cl_object_header(cl_object_top(o))->coh_attr_guard;
+}
+
+/**
+ * Locks data-attributes.
+ *
+ * Prevents data-attributes from changing, until lock is released by
+ * cl_object_attr_unlock(). This has to be called before calls to
+ * cl_object_attr_get(), cl_object_attr_set().
+ */
+void cl_object_attr_lock(struct cl_object *o)
+{
+ spin_lock(cl_object_attr_guard(o));
+}
+EXPORT_SYMBOL(cl_object_attr_lock);
+
+/**
+ * Releases data-attributes lock, acquired by cl_object_attr_lock().
+ */
+void cl_object_attr_unlock(struct cl_object *o)
+{
+ spin_unlock(cl_object_attr_guard(o));
+}
+EXPORT_SYMBOL(cl_object_attr_unlock);
+
+/**
+ * Returns data-attributes of an object \a obj.
+ *
+ * Every layer is asked (by calling cl_object_operations::coo_attr_get())
+ * top-to-bottom to fill in parts of \a attr that this layer is responsible
+ * for.
+ */
+int cl_object_attr_get(const struct lu_env *env, struct cl_object *obj,
+ struct cl_attr *attr)
+{
+ struct lu_object_header *top;
+ int result;
+
+ LASSERT(spin_is_locked(cl_object_attr_guard(obj)));
+ ENTRY;
+
+ top = obj->co_lu.lo_header;
+ result = 0;
+ list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
+ if (obj->co_ops->coo_attr_get != NULL) {
+ result = obj->co_ops->coo_attr_get(env, obj, attr);
+ if (result != 0) {
+ if (result > 0)
+ result = 0;
+ break;
+ }
+ }
+ }
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_object_attr_get);
+
+/**
+ * Updates data-attributes of an object \a obj.
+ *
+ * Only attributes, mentioned in a validness bit-mask \a v are
+ * updated. Calls cl_object_operations::coo_attr_set() on every layer, bottom
+ * to top.
+ */
+int cl_object_attr_set(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned v)
+{
+ struct lu_object_header *top;
+ int result;
+
+ LASSERT(spin_is_locked(cl_object_attr_guard(obj)));
+ ENTRY;
+
+ top = obj->co_lu.lo_header;
+ result = 0;
+ list_for_each_entry_reverse(obj, &top->loh_layers,
+ co_lu.lo_linkage) {
+ if (obj->co_ops->coo_attr_set != NULL) {
+ result = obj->co_ops->coo_attr_set(env, obj, attr, v);
+ if (result != 0) {
+ if (result > 0)
+ result = 0;
+ break;
+ }
+ }
+ }
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_object_attr_set);
+
+/**
+ * Notifies layers (bottom-to-top) that glimpse AST was received.
+ *
+ * Layers have to fill \a lvb fields with information that will be shipped
+ * back to glimpse issuer.
+ *
+ * \see cl_lock_operations::clo_glimpse()
+ */
+int cl_object_glimpse(const struct lu_env *env, struct cl_object *obj,
+ struct ost_lvb *lvb)
+{
+ struct lu_object_header *top;
+ int result;
+
+ ENTRY;
+ top = obj->co_lu.lo_header;
+ result = 0;
+ list_for_each_entry_reverse(obj, &top->loh_layers,
+ co_lu.lo_linkage) {
+ if (obj->co_ops->coo_glimpse != NULL) {
+ result = obj->co_ops->coo_glimpse(env, obj, lvb);
+ if (result != 0)
+ break;
+ }
+ }
+ LU_OBJECT_HEADER(D_DLMTRACE, env, lu_object_top(top),
+ "size: "LPU64" mtime: "LPU64" atime: "LPU64" "
+ "ctime: "LPU64" blocks: "LPU64"\n",
+ lvb->lvb_size, lvb->lvb_mtime, lvb->lvb_atime,
+ lvb->lvb_ctime, lvb->lvb_blocks);
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_object_glimpse);
+
+/**
+ * Updates a configuration of an object \a obj.
+ */
+int cl_conf_set(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_object_conf *conf)
+{
+ struct lu_object_header *top;
+ int result;
+
+ ENTRY;
+ top = obj->co_lu.lo_header;
+ result = 0;
+ list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
+ if (obj->co_ops->coo_conf_set != NULL) {
+ result = obj->co_ops->coo_conf_set(env, obj, conf);
+ if (result != 0)
+ break;
+ }
+ }
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_conf_set);
+
+/**
+ * Helper function removing all object locks, and marking object for
+ * deletion. All object pages must have been deleted at this point.
+ *
+ * This is called by cl_inode_fini() and lov_object_delete() to destroy top-
+ * and sub- objects respectively.
+ */
+void cl_object_kill(const struct lu_env *env, struct cl_object *obj)
+{
+ struct cl_object_header *hdr;
+
+ hdr = cl_object_header(obj);
+ LASSERT(hdr->coh_tree.rnode == NULL);
+ LASSERT(hdr->coh_pages == 0);
+
+ set_bit(LU_OBJECT_HEARD_BANSHEE, &hdr->coh_lu.loh_flags);
+ /*
+ * Destroy all locks. Object destruction (including cl_inode_fini())
+ * cannot cancel the locks, because in the case of a local client,
+ * where client and server share the same thread running
+ * prune_icache(), this can dead-lock with ldlm_cancel_handler()
+ * waiting on __wait_on_freeing_inode().
+ */
+ cl_locks_prune(env, obj, 0);
+}
+EXPORT_SYMBOL(cl_object_kill);
+
+/**
+ * Prunes caches of pages and locks for this object.
+ */
+void cl_object_prune(const struct lu_env *env, struct cl_object *obj)
+{
+ ENTRY;
+ cl_pages_prune(env, obj);
+ cl_locks_prune(env, obj, 1);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_object_prune);
+
+/**
+ * Check if the object has locks.
+ */
+int cl_object_has_locks(struct cl_object *obj)
+{
+ struct cl_object_header *head = cl_object_header(obj);
+ int has;
+
+ spin_lock(&head->coh_lock_guard);
+ has = list_empty(&head->coh_locks);
+ spin_unlock(&head->coh_lock_guard);
+
+ return (has == 0);
+}
+EXPORT_SYMBOL(cl_object_has_locks);
+
+void cache_stats_init(struct cache_stats *cs, const char *name)
+{
+ int i;
+
+ cs->cs_name = name;
+ for (i = 0; i < CS_NR; i++)
+ atomic_set(&cs->cs_stats[i], 0);
+}
+
+int cache_stats_print(const struct cache_stats *cs, struct seq_file *m, int h)
+{
+ int i;
+ /*
+ * lookup hit total cached create
+ * env: ...... ...... ...... ...... ......
+ */
+ if (h) {
+ const char *names[CS_NR] = CS_NAMES;
+
+ seq_printf(m, "%6s", " ");
+ for (i = 0; i < CS_NR; i++)
+ seq_printf(m, "%8s", names[i]);
+ seq_printf(m, "\n");
+ }
+
+ seq_printf(m, "%5.5s:", cs->cs_name);
+ for (i = 0; i < CS_NR; i++)
+ seq_printf(m, "%8u", atomic_read(&cs->cs_stats[i]));
+ return 0;
+}
+
+/**
+ * Initialize client site.
+ *
+ * Perform common initialization (lu_site_init()), and initialize statistical
+ * counters. Also perform global initializations on the first call.
+ */
+int cl_site_init(struct cl_site *s, struct cl_device *d)
+{
+ int i;
+ int result;
+
+ result = lu_site_init(&s->cs_lu, &d->cd_lu_dev);
+ if (result == 0) {
+ cache_stats_init(&s->cs_pages, "pages");
+ cache_stats_init(&s->cs_locks, "locks");
+ for (i = 0; i < ARRAY_SIZE(s->cs_pages_state); ++i)
+ atomic_set(&s->cs_pages_state[0], 0);
+ for (i = 0; i < ARRAY_SIZE(s->cs_locks_state); ++i)
+ atomic_set(&s->cs_locks_state[i], 0);
+ }
+ return result;
+}
+EXPORT_SYMBOL(cl_site_init);
+
+/**
+ * Finalize client site. Dual to cl_site_init().
+ */
+void cl_site_fini(struct cl_site *s)
+{
+ lu_site_fini(&s->cs_lu);
+}
+EXPORT_SYMBOL(cl_site_fini);
+
+static struct cache_stats cl_env_stats = {
+ .cs_name = "envs",
+ .cs_stats = { ATOMIC_INIT(0), }
+};
+
+/**
+ * Outputs client site statistical counters into a buffer. Suitable for
+ * ll_rd_*()-style functions.
+ */
+int cl_site_stats_print(const struct cl_site *site, struct seq_file *m)
+{
+ int i;
+ static const char *pstate[] = {
+ [CPS_CACHED] = "c",
+ [CPS_OWNED] = "o",
+ [CPS_PAGEOUT] = "w",
+ [CPS_PAGEIN] = "r",
+ [CPS_FREEING] = "f"
+ };
+ static const char *lstate[] = {
+ [CLS_NEW] = "n",
+ [CLS_QUEUING] = "q",
+ [CLS_ENQUEUED] = "e",
+ [CLS_HELD] = "h",
+ [CLS_INTRANSIT] = "t",
+ [CLS_CACHED] = "c",
+ [CLS_FREEING] = "f"
+ };
+/*
+ lookup hit total busy create
+pages: ...... ...... ...... ...... ...... [...... ...... ...... ......]
+locks: ...... ...... ...... ...... ...... [...... ...... ...... ...... ......]
+ env: ...... ...... ...... ...... ......
+ */
+ lu_site_stats_print(&site->cs_lu, m);
+ cache_stats_print(&site->cs_pages, m, 1);
+ seq_printf(m, " [");
+ for (i = 0; i < ARRAY_SIZE(site->cs_pages_state); ++i)
+ seq_printf(m, "%s: %u ", pstate[i],
+ atomic_read(&site->cs_pages_state[i]));
+ seq_printf(m, "]\n");
+ cache_stats_print(&site->cs_locks, m, 0);
+ seq_printf(m, " [");
+ for (i = 0; i < ARRAY_SIZE(site->cs_locks_state); ++i)
+ seq_printf(m, "%s: %u ", lstate[i],
+ atomic_read(&site->cs_locks_state[i]));
+ seq_printf(m, "]\n");
+ cache_stats_print(&cl_env_stats, m, 0);
+ seq_printf(m, "\n");
+ return 0;
+}
+EXPORT_SYMBOL(cl_site_stats_print);
+
+/*****************************************************************************
+ *
+ * lu_env handling on client.
+ *
+ */
+
+/**
+ * The most efficient way is to store cl_env pointer in task specific
+ * structures. On Linux, it wont' be easy to use task_struct->journal_info
+ * because Lustre code may call into other fs which has certain assumptions
+ * about journal_info. Currently following fields in task_struct are identified
+ * can be used for this purpose:
+ * - cl_env: for liblustre.
+ * - tux_info: ony on RedHat kernel.
+ * - ...
+ * \note As long as we use task_struct to store cl_env, we assume that once
+ * called into Lustre, we'll never call into the other part of the kernel
+ * which will use those fields in task_struct without explicitly exiting
+ * Lustre.
+ *
+ * If there's no space in task_struct is available, hash will be used.
+ * bz20044, bz22683.
+ */
+
+struct cl_env {
+ void *ce_magic;
+ struct lu_env ce_lu;
+ struct lu_context ce_ses;
+
+ /**
+ * This allows cl_env to be entered into cl_env_hash which implements
+ * the current thread -> client environment lookup.
+ */
+ struct hlist_node ce_node;
+ /**
+ * Owner for the current cl_env.
+ *
+ * If LL_TASK_CL_ENV is defined, this point to the owning current,
+ * only for debugging purpose ;
+ * Otherwise hash is used, and this is the key for cfs_hash.
+ * Now current thread pid is stored. Note using thread pointer would
+ * lead to unbalanced hash because of its specific allocation locality
+ * and could be varied for different platforms and OSes, even different
+ * OS versions.
+ */
+ void *ce_owner;
+
+ /*
+ * Linkage into global list of all client environments. Used for
+ * garbage collection.
+ */
+ struct list_head ce_linkage;
+ /*
+ *
+ */
+ int ce_ref;
+ /*
+ * Debugging field: address of the caller who made original
+ * allocation.
+ */
+ void *ce_debug;
+};
+
+#define CL_ENV_INC(counter)
+#define CL_ENV_DEC(counter)
+
+static void cl_env_init0(struct cl_env *cle, void *debug)
+{
+ LASSERT(cle->ce_ref == 0);
+ LASSERT(cle->ce_magic == &cl_env_init0);
+ LASSERT(cle->ce_debug == NULL && cle->ce_owner == NULL);
+
+ cle->ce_ref = 1;
+ cle->ce_debug = debug;
+ CL_ENV_INC(busy);
+}
+
+
+/*
+ * The implementation of using hash table to connect cl_env and thread
+ */
+
+static cfs_hash_t *cl_env_hash;
+
+static unsigned cl_env_hops_hash(cfs_hash_t *lh,
+ const void *key, unsigned mask)
+{
+#if BITS_PER_LONG == 64
+ return cfs_hash_u64_hash((__u64)key, mask);
+#else
+ return cfs_hash_u32_hash((__u32)key, mask);
+#endif
+}
+
+static void *cl_env_hops_obj(struct hlist_node *hn)
+{
+ struct cl_env *cle = hlist_entry(hn, struct cl_env, ce_node);
+ LASSERT(cle->ce_magic == &cl_env_init0);
+ return (void *)cle;
+}
+
+static int cl_env_hops_keycmp(const void *key, struct hlist_node *hn)
+{
+ struct cl_env *cle = cl_env_hops_obj(hn);
+
+ LASSERT(cle->ce_owner != NULL);
+ return (key == cle->ce_owner);
+}
+
+static void cl_env_hops_noop(cfs_hash_t *hs, struct hlist_node *hn)
+{
+ struct cl_env *cle = hlist_entry(hn, struct cl_env, ce_node);
+ LASSERT(cle->ce_magic == &cl_env_init0);
+}
+
+static cfs_hash_ops_t cl_env_hops = {
+ .hs_hash = cl_env_hops_hash,
+ .hs_key = cl_env_hops_obj,
+ .hs_keycmp = cl_env_hops_keycmp,
+ .hs_object = cl_env_hops_obj,
+ .hs_get = cl_env_hops_noop,
+ .hs_put_locked = cl_env_hops_noop,
+};
+
+static inline struct cl_env *cl_env_fetch(void)
+{
+ struct cl_env *cle;
+
+ cle = cfs_hash_lookup(cl_env_hash, (void *) (long) current->pid);
+ LASSERT(ergo(cle, cle->ce_magic == &cl_env_init0));
+ return cle;
+}
+
+static inline void cl_env_attach(struct cl_env *cle)
+{
+ if (cle) {
+ int rc;
+
+ LASSERT(cle->ce_owner == NULL);
+ cle->ce_owner = (void *) (long) current->pid;
+ rc = cfs_hash_add_unique(cl_env_hash, cle->ce_owner,
+ &cle->ce_node);
+ LASSERT(rc == 0);
+ }
+}
+
+static inline void cl_env_do_detach(struct cl_env *cle)
+{
+ void *cookie;
+
+ LASSERT(cle->ce_owner == (void *) (long) current->pid);
+ cookie = cfs_hash_del(cl_env_hash, cle->ce_owner,
+ &cle->ce_node);
+ LASSERT(cookie == cle);
+ cle->ce_owner = NULL;
+}
+
+static int cl_env_store_init(void) {
+ cl_env_hash = cfs_hash_create("cl_env",
+ HASH_CL_ENV_BITS, HASH_CL_ENV_BITS,
+ HASH_CL_ENV_BKT_BITS, 0,
+ CFS_HASH_MIN_THETA,
+ CFS_HASH_MAX_THETA,
+ &cl_env_hops,
+ CFS_HASH_RW_BKTLOCK);
+ return cl_env_hash != NULL ? 0 :-ENOMEM;
+}
+
+static void cl_env_store_fini(void) {
+ cfs_hash_putref(cl_env_hash);
+}
+
+
+static inline struct cl_env *cl_env_detach(struct cl_env *cle)
+{
+ if (cle == NULL)
+ cle = cl_env_fetch();
+
+ if (cle && cle->ce_owner)
+ cl_env_do_detach(cle);
+
+ return cle;
+}
+
+static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
+{
+ struct lu_env *env;
+ struct cl_env *cle;
+
+ OBD_SLAB_ALLOC_PTR_GFP(cle, cl_env_kmem, __GFP_IO);
+ if (cle != NULL) {
+ int rc;
+
+ INIT_LIST_HEAD(&cle->ce_linkage);
+ cle->ce_magic = &cl_env_init0;
+ env = &cle->ce_lu;
+ rc = lu_env_init(env, LCT_CL_THREAD|ctx_tags);
+ if (rc == 0) {
+ rc = lu_context_init(&cle->ce_ses,
+ LCT_SESSION | ses_tags);
+ if (rc == 0) {
+ lu_context_enter(&cle->ce_ses);
+ env->le_ses = &cle->ce_ses;
+ cl_env_init0(cle, debug);
+ } else
+ lu_env_fini(env);
+ }
+ if (rc != 0) {
+ OBD_SLAB_FREE_PTR(cle, cl_env_kmem);
+ env = ERR_PTR(rc);
+ } else {
+ CL_ENV_INC(create);
+ CL_ENV_INC(total);
+ }
+ } else
+ env = ERR_PTR(-ENOMEM);
+ return env;
+}
+
+static void cl_env_fini(struct cl_env *cle)
+{
+ CL_ENV_DEC(total);
+ lu_context_fini(&cle->ce_lu.le_ctx);
+ lu_context_fini(&cle->ce_ses);
+ OBD_SLAB_FREE_PTR(cle, cl_env_kmem);
+}
+
+static inline struct cl_env *cl_env_container(struct lu_env *env)
+{
+ return container_of(env, struct cl_env, ce_lu);
+}
+
+struct lu_env *cl_env_peek(int *refcheck)
+{
+ struct lu_env *env;
+ struct cl_env *cle;
+
+ CL_ENV_INC(lookup);
+
+ /* check that we don't go far from untrusted pointer */
+ CLASSERT(offsetof(struct cl_env, ce_magic) == 0);
+
+ env = NULL;
+ cle = cl_env_fetch();
+ if (cle != NULL) {
+ CL_ENV_INC(hit);
+ env = &cle->ce_lu;
+ *refcheck = ++cle->ce_ref;
+ }
+ CDEBUG(D_OTHER, "%d@%p\n", cle ? cle->ce_ref : 0, cle);
+ return env;
+}
+EXPORT_SYMBOL(cl_env_peek);
+
+/**
+ * Returns lu_env: if there already is an environment associated with the
+ * current thread, it is returned, otherwise, new environment is allocated.
+ *
+ * \param refcheck pointer to a counter used to detect environment leaks. In
+ * the usual case cl_env_get() and cl_env_put() are called in the same lexical
+ * scope and pointer to the same integer is passed as \a refcheck. This is
+ * used to detect missed cl_env_put().
+ *
+ * \see cl_env_put()
+ */
+struct lu_env *cl_env_get(int *refcheck)
+{
+ struct lu_env *env;
+
+ env = cl_env_peek(refcheck);
+ if (env == NULL) {
+ env = cl_env_new(lu_context_tags_default,
+ lu_session_tags_default,
+ __builtin_return_address(0));
+
+ if (!IS_ERR(env)) {
+ struct cl_env *cle;
+
+ cle = cl_env_container(env);
+ cl_env_attach(cle);
+ *refcheck = cle->ce_ref;
+ CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
+ }
+ }
+ return env;
+}
+EXPORT_SYMBOL(cl_env_get);
+
+/**
+ * Forces an allocation of a fresh environment with given tags.
+ *
+ * \see cl_env_get()
+ */
+struct lu_env *cl_env_alloc(int *refcheck, __u32 tags)
+{
+ struct lu_env *env;
+
+ LASSERT(cl_env_peek(refcheck) == NULL);
+ env = cl_env_new(tags, tags, __builtin_return_address(0));
+ if (!IS_ERR(env)) {
+ struct cl_env *cle;
+
+ cle = cl_env_container(env);
+ *refcheck = cle->ce_ref;
+ CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
+ }
+ return env;
+}
+EXPORT_SYMBOL(cl_env_alloc);
+
+static void cl_env_exit(struct cl_env *cle)
+{
+ LASSERT(cle->ce_owner == NULL);
+ lu_context_exit(&cle->ce_lu.le_ctx);
+ lu_context_exit(&cle->ce_ses);
+}
+
+/**
+ * Release an environment.
+ *
+ * Decrement \a env reference counter. When counter drops to 0, nothing in
+ * this thread is using environment and it is returned to the allocation
+ * cache, or freed straight away, if cache is large enough.
+ */
+void cl_env_put(struct lu_env *env, int *refcheck)
+{
+ struct cl_env *cle;
+
+ cle = cl_env_container(env);
+
+ LASSERT(cle->ce_ref > 0);
+ LASSERT(ergo(refcheck != NULL, cle->ce_ref == *refcheck));
+
+ CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
+ if (--cle->ce_ref == 0) {
+ CL_ENV_DEC(busy);
+ cl_env_detach(cle);
+ cle->ce_debug = NULL;
+ cl_env_exit(cle);
+ cl_env_fini(cle);
+ }
+}
+EXPORT_SYMBOL(cl_env_put);
+
+/**
+ * Declares a point of re-entrancy.
+ *
+ * \see cl_env_reexit()
+ */
+void *cl_env_reenter(void)
+{
+ return cl_env_detach(NULL);
+}
+EXPORT_SYMBOL(cl_env_reenter);
+
+/**
+ * Exits re-entrancy.
+ */
+void cl_env_reexit(void *cookie)
+{
+ cl_env_detach(NULL);
+ cl_env_attach(cookie);
+}
+EXPORT_SYMBOL(cl_env_reexit);
+
+/**
+ * Setup user-supplied \a env as a current environment. This is to be used to
+ * guaranteed that environment exists even when cl_env_get() fails. It is up
+ * to user to ensure proper concurrency control.
+ *
+ * \see cl_env_unplant()
+ */
+void cl_env_implant(struct lu_env *env, int *refcheck)
+{
+ struct cl_env *cle = cl_env_container(env);
+
+ LASSERT(cle->ce_ref > 0);
+
+ cl_env_attach(cle);
+ cl_env_get(refcheck);
+ CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
+}
+EXPORT_SYMBOL(cl_env_implant);
+
+/**
+ * Detach environment installed earlier by cl_env_implant().
+ */
+void cl_env_unplant(struct lu_env *env, int *refcheck)
+{
+ struct cl_env *cle = cl_env_container(env);
+
+ LASSERT(cle->ce_ref > 1);
+
+ CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
+
+ cl_env_detach(cle);
+ cl_env_put(env, refcheck);
+}
+EXPORT_SYMBOL(cl_env_unplant);
+
+struct lu_env *cl_env_nested_get(struct cl_env_nest *nest)
+{
+ struct lu_env *env;
+
+ nest->cen_cookie = NULL;
+ env = cl_env_peek(&nest->cen_refcheck);
+ if (env != NULL) {
+ if (!cl_io_is_going(env))
+ return env;
+ else {
+ cl_env_put(env, &nest->cen_refcheck);
+ nest->cen_cookie = cl_env_reenter();
+ }
+ }
+ env = cl_env_get(&nest->cen_refcheck);
+ if (IS_ERR(env)) {
+ cl_env_reexit(nest->cen_cookie);
+ return env;
+ }
+
+ LASSERT(!cl_io_is_going(env));
+ return env;
+}
+EXPORT_SYMBOL(cl_env_nested_get);
+
+void cl_env_nested_put(struct cl_env_nest *nest, struct lu_env *env)
+{
+ cl_env_put(env, &nest->cen_refcheck);
+ cl_env_reexit(nest->cen_cookie);
+}
+EXPORT_SYMBOL(cl_env_nested_put);
+
+/**
+ * Converts struct cl_attr to struct ost_lvb.
+ *
+ * \see cl_lvb2attr
+ */
+void cl_attr2lvb(struct ost_lvb *lvb, const struct cl_attr *attr)
+{
+ ENTRY;
+ lvb->lvb_size = attr->cat_size;
+ lvb->lvb_mtime = attr->cat_mtime;
+ lvb->lvb_atime = attr->cat_atime;
+ lvb->lvb_ctime = attr->cat_ctime;
+ lvb->lvb_blocks = attr->cat_blocks;
+ EXIT;
+}
+EXPORT_SYMBOL(cl_attr2lvb);
+
+/**
+ * Converts struct ost_lvb to struct cl_attr.
+ *
+ * \see cl_attr2lvb
+ */
+void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb)
+{
+ ENTRY;
+ attr->cat_size = lvb->lvb_size;
+ attr->cat_mtime = lvb->lvb_mtime;
+ attr->cat_atime = lvb->lvb_atime;
+ attr->cat_ctime = lvb->lvb_ctime;
+ attr->cat_blocks = lvb->lvb_blocks;
+ EXIT;
+}
+EXPORT_SYMBOL(cl_lvb2attr);
+
+/*****************************************************************************
+ *
+ * Temporary prototype thing: mirror obd-devices into cl devices.
+ *
+ */
+
+struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site,
+ struct lu_device_type *ldt,
+ struct lu_device *next)
+{
+ const char *typename;
+ struct lu_device *d;
+
+ LASSERT(ldt != NULL);
+
+ typename = ldt->ldt_name;
+ d = ldt->ldt_ops->ldto_device_alloc(env, ldt, NULL);
+ if (!IS_ERR(d)) {
+ int rc;
+
+ if (site != NULL)
+ d->ld_site = site;
+ rc = ldt->ldt_ops->ldto_device_init(env, d, typename, next);
+ if (rc == 0) {
+ lu_device_get(d);
+ lu_ref_add(&d->ld_reference,
+ "lu-stack", &lu_site_init);
+ } else {
+ ldt->ldt_ops->ldto_device_free(env, d);
+ CERROR("can't init device '%s', %d\n", typename, rc);
+ d = ERR_PTR(rc);
+ }
+ } else
+ CERROR("Cannot allocate device: '%s'\n", typename);
+ return lu2cl_dev(d);
+}
+EXPORT_SYMBOL(cl_type_setup);
+
+/**
+ * Finalize device stack by calling lu_stack_fini().
+ */
+void cl_stack_fini(const struct lu_env *env, struct cl_device *cl)
+{
+ lu_stack_fini(env, cl2lu_dev(cl));
+}
+EXPORT_SYMBOL(cl_stack_fini);
+
+int cl_lock_init(void);
+void cl_lock_fini(void);
+
+int cl_page_init(void);
+void cl_page_fini(void);
+
+static struct lu_context_key cl_key;
+
+struct cl_thread_info *cl_env_info(const struct lu_env *env)
+{
+ return lu_context_key_get(&env->le_ctx, &cl_key);
+}
+
+/* defines cl0_key_{init,fini}() */
+LU_KEY_INIT_FINI(cl0, struct cl_thread_info);
+
+static void *cl_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct cl_thread_info *info;
+
+ info = cl0_key_init(ctx, key);
+ if (!IS_ERR(info)) {
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
+ lu_ref_init(&info->clt_counters[i].ctc_locks_locked);
+ }
+ return info;
+}
+
+static void cl_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct cl_thread_info *info;
+ int i;
+
+ info = data;
+ for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
+ lu_ref_fini(&info->clt_counters[i].ctc_locks_locked);
+ cl0_key_fini(ctx, key, data);
+}
+
+static void cl_key_exit(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct cl_thread_info *info = data;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i) {
+ LASSERT(info->clt_counters[i].ctc_nr_held == 0);
+ LASSERT(info->clt_counters[i].ctc_nr_used == 0);
+ LASSERT(info->clt_counters[i].ctc_nr_locks_acquired == 0);
+ LASSERT(info->clt_counters[i].ctc_nr_locks_locked == 0);
+ lu_ref_fini(&info->clt_counters[i].ctc_locks_locked);
+ lu_ref_init(&info->clt_counters[i].ctc_locks_locked);
+ }
+}
+
+static struct lu_context_key cl_key = {
+ .lct_tags = LCT_CL_THREAD,
+ .lct_init = cl_key_init,
+ .lct_fini = cl_key_fini,
+ .lct_exit = cl_key_exit
+};
+
+static struct lu_kmem_descr cl_object_caches[] = {
+ {
+ .ckd_cache = &cl_env_kmem,
+ .ckd_name = "cl_env_kmem",
+ .ckd_size = sizeof (struct cl_env)
+ },
+ {
+ .ckd_cache = NULL
+ }
+};
+
+/**
+ * Global initialization of cl-data. Create kmem caches, register
+ * lu_context_key's, etc.
+ *
+ * \see cl_global_fini()
+ */
+int cl_global_init(void)
+{
+ int result;
+
+ result = cl_env_store_init();
+ if (result)
+ return result;
+
+ result = lu_kmem_init(cl_object_caches);
+ if (result)
+ goto out_store;
+
+ LU_CONTEXT_KEY_INIT(&cl_key);
+ result = lu_context_key_register(&cl_key);
+ if (result)
+ goto out_kmem;
+
+ result = cl_lock_init();
+ if (result)
+ goto out_context;
+
+ result = cl_page_init();
+ if (result)
+ goto out_lock;
+
+ return 0;
+out_lock:
+ cl_lock_fini();
+out_context:
+ lu_context_key_degister(&cl_key);
+out_kmem:
+ lu_kmem_fini(cl_object_caches);
+out_store:
+ cl_env_store_fini();
+ return result;
+}
+
+/**
+ * Finalization of global cl-data. Dual to cl_global_init().
+ */
+void cl_global_fini(void)
+{
+ cl_lock_fini();
+ cl_page_fini();
+ lu_context_key_degister(&cl_key);
+ lu_kmem_fini(cl_object_caches);
+ cl_env_store_fini();
+}
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c
new file mode 100644
index 000000000000..bb9335911c34
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c
@@ -0,0 +1,1605 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Client Lustre Page.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/libcfs/libcfs.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <linux/list.h>
+
+#include <cl_object.h>
+#include "cl_internal.h"
+
+static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
+ int radix);
+
+# define PASSERT(env, page, expr) \
+ do { \
+ if (unlikely(!(expr))) { \
+ CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n"); \
+ LASSERT(0); \
+ } \
+ } while (0)
+
+# define PINVRNT(env, page, exp) \
+ ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
+
+/* Disable page statistic by default due to huge performance penalty. */
+#define CS_PAGE_INC(o, item)
+#define CS_PAGE_DEC(o, item)
+#define CS_PAGESTATE_INC(o, state)
+#define CS_PAGESTATE_DEC(o, state)
+
+/**
+ * Internal version of cl_page_top, it should be called if the page is
+ * known to be not freed, says with page referenced, or radix tree lock held,
+ * or page owned.
+ */
+static struct cl_page *cl_page_top_trusted(struct cl_page *page)
+{
+ while (page->cp_parent != NULL)
+ page = page->cp_parent;
+ return page;
+}
+
+/**
+ * Internal version of cl_page_get().
+ *
+ * This function can be used to obtain initial reference to previously
+ * unreferenced cached object. It can be called only if concurrent page
+ * reclamation is somehow prevented, e.g., by locking page radix-tree
+ * (cl_object_header::hdr->coh_page_guard), or by keeping a lock on a VM page,
+ * associated with \a page.
+ *
+ * Use with care! Not exported.
+ */
+static void cl_page_get_trust(struct cl_page *page)
+{
+ LASSERT(atomic_read(&page->cp_ref) > 0);
+ atomic_inc(&page->cp_ref);
+}
+
+/**
+ * Returns a slice within a page, corresponding to the given layer in the
+ * device stack.
+ *
+ * \see cl_lock_at()
+ */
+static const struct cl_page_slice *
+cl_page_at_trusted(const struct cl_page *page,
+ const struct lu_device_type *dtype)
+{
+ const struct cl_page_slice *slice;
+ ENTRY;
+
+ page = cl_page_top_trusted((struct cl_page *)page);
+ do {
+ list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
+ if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
+ RETURN(slice);
+ }
+ page = page->cp_child;
+ } while (page != NULL);
+ RETURN(NULL);
+}
+
+/**
+ * Returns a page with given index in the given object, or NULL if no page is
+ * found. Acquires a reference on \a page.
+ *
+ * Locking: called under cl_object_header::coh_page_guard spin-lock.
+ */
+struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index)
+{
+ struct cl_page *page;
+
+ LASSERT(spin_is_locked(&hdr->coh_page_guard));
+
+ page = radix_tree_lookup(&hdr->coh_tree, index);
+ if (page != NULL)
+ cl_page_get_trust(page);
+ return page;
+}
+EXPORT_SYMBOL(cl_page_lookup);
+
+/**
+ * Returns a list of pages by a given [start, end] of \a obj.
+ *
+ * \param resched If not NULL, then we give up before hogging CPU for too
+ * long and set *resched = 1, in that case caller should implement a retry
+ * logic.
+ *
+ * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely
+ * crucial in the face of [offset, EOF] locks.
+ *
+ * Return at least one page in @queue unless there is no covered page.
+ */
+int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io, pgoff_t start, pgoff_t end,
+ cl_page_gang_cb_t cb, void *cbdata)
+{
+ struct cl_object_header *hdr;
+ struct cl_page *page;
+ struct cl_page **pvec;
+ const struct cl_page_slice *slice;
+ const struct lu_device_type *dtype;
+ pgoff_t idx;
+ unsigned int nr;
+ unsigned int i;
+ unsigned int j;
+ int res = CLP_GANG_OKAY;
+ int tree_lock = 1;
+ ENTRY;
+
+ idx = start;
+ hdr = cl_object_header(obj);
+ pvec = cl_env_info(env)->clt_pvec;
+ dtype = cl_object_top(obj)->co_lu.lo_dev->ld_type;
+ spin_lock(&hdr->coh_page_guard);
+ while ((nr = radix_tree_gang_lookup(&hdr->coh_tree, (void **)pvec,
+ idx, CLT_PVEC_SIZE)) > 0) {
+ int end_of_region = 0;
+ idx = pvec[nr - 1]->cp_index + 1;
+ for (i = 0, j = 0; i < nr; ++i) {
+ page = pvec[i];
+ pvec[i] = NULL;
+
+ LASSERT(page->cp_type == CPT_CACHEABLE);
+ if (page->cp_index > end) {
+ end_of_region = 1;
+ break;
+ }
+ if (page->cp_state == CPS_FREEING)
+ continue;
+
+ slice = cl_page_at_trusted(page, dtype);
+ /*
+ * Pages for lsm-less file has no underneath sub-page
+ * for osc, in case of ...
+ */
+ PASSERT(env, page, slice != NULL);
+
+ page = slice->cpl_page;
+ /*
+ * Can safely call cl_page_get_trust() under
+ * radix-tree spin-lock.
+ *
+ * XXX not true, because @page is from object another
+ * than @hdr and protected by different tree lock.
+ */
+ cl_page_get_trust(page);
+ lu_ref_add_atomic(&page->cp_reference,
+ "gang_lookup", current);
+ pvec[j++] = page;
+ }
+
+ /*
+ * Here a delicate locking dance is performed. Current thread
+ * holds a reference to a page, but has to own it before it
+ * can be placed into queue. Owning implies waiting, so
+ * radix-tree lock is to be released. After a wait one has to
+ * check that pages weren't truncated (cl_page_own() returns
+ * error in the latter case).
+ */
+ spin_unlock(&hdr->coh_page_guard);
+ tree_lock = 0;
+
+ for (i = 0; i < j; ++i) {
+ page = pvec[i];
+ if (res == CLP_GANG_OKAY)
+ res = (*cb)(env, io, page, cbdata);
+ lu_ref_del(&page->cp_reference,
+ "gang_lookup", current);
+ cl_page_put(env, page);
+ }
+ if (nr < CLT_PVEC_SIZE || end_of_region)
+ break;
+
+ if (res == CLP_GANG_OKAY && need_resched())
+ res = CLP_GANG_RESCHED;
+ if (res != CLP_GANG_OKAY)
+ break;
+
+ spin_lock(&hdr->coh_page_guard);
+ tree_lock = 1;
+ }
+ if (tree_lock)
+ spin_unlock(&hdr->coh_page_guard);
+ RETURN(res);
+}
+EXPORT_SYMBOL(cl_page_gang_lookup);
+
+static void cl_page_free(const struct lu_env *env, struct cl_page *page)
+{
+ struct cl_object *obj = page->cp_obj;
+ int pagesize = cl_object_header(obj)->coh_page_bufsize;
+
+ PASSERT(env, page, list_empty(&page->cp_batch));
+ PASSERT(env, page, page->cp_owner == NULL);
+ PASSERT(env, page, page->cp_req == NULL);
+ PASSERT(env, page, page->cp_parent == NULL);
+ PASSERT(env, page, page->cp_state == CPS_FREEING);
+
+ ENTRY;
+ might_sleep();
+ while (!list_empty(&page->cp_layers)) {
+ struct cl_page_slice *slice;
+
+ slice = list_entry(page->cp_layers.next,
+ struct cl_page_slice, cpl_linkage);
+ list_del_init(page->cp_layers.next);
+ slice->cpl_ops->cpo_fini(env, slice);
+ }
+ CS_PAGE_DEC(obj, total);
+ CS_PAGESTATE_DEC(obj, page->cp_state);
+ lu_object_ref_del_at(&obj->co_lu, page->cp_obj_ref, "cl_page", page);
+ cl_object_put(env, obj);
+ lu_ref_fini(&page->cp_reference);
+ OBD_FREE(page, pagesize);
+ EXIT;
+}
+
+/**
+ * Helper function updating page state. This is the only place in the code
+ * where cl_page::cp_state field is mutated.
+ */
+static inline void cl_page_state_set_trust(struct cl_page *page,
+ enum cl_page_state state)
+{
+ /* bypass const. */
+ *(enum cl_page_state *)&page->cp_state = state;
+}
+
+static struct cl_page *cl_page_alloc(const struct lu_env *env,
+ struct cl_object *o, pgoff_t ind, struct page *vmpage,
+ enum cl_page_type type)
+{
+ struct cl_page *page;
+ struct lu_object_header *head;
+
+ ENTRY;
+ OBD_ALLOC_GFP(page, cl_object_header(o)->coh_page_bufsize,
+ __GFP_IO);
+ if (page != NULL) {
+ int result = 0;
+ atomic_set(&page->cp_ref, 1);
+ if (type == CPT_CACHEABLE) /* for radix tree */
+ atomic_inc(&page->cp_ref);
+ page->cp_obj = o;
+ cl_object_get(o);
+ page->cp_obj_ref = lu_object_ref_add(&o->co_lu, "cl_page",page);
+ page->cp_index = ind;
+ cl_page_state_set_trust(page, CPS_CACHED);
+ page->cp_type = type;
+ INIT_LIST_HEAD(&page->cp_layers);
+ INIT_LIST_HEAD(&page->cp_batch);
+ INIT_LIST_HEAD(&page->cp_flight);
+ mutex_init(&page->cp_mutex);
+ lu_ref_init(&page->cp_reference);
+ head = o->co_lu.lo_header;
+ list_for_each_entry(o, &head->loh_layers,
+ co_lu.lo_linkage) {
+ if (o->co_ops->coo_page_init != NULL) {
+ result = o->co_ops->coo_page_init(env, o,
+ page, vmpage);
+ if (result != 0) {
+ cl_page_delete0(env, page, 0);
+ cl_page_free(env, page);
+ page = ERR_PTR(result);
+ break;
+ }
+ }
+ }
+ if (result == 0) {
+ CS_PAGE_INC(o, total);
+ CS_PAGE_INC(o, create);
+ CS_PAGESTATE_DEC(o, CPS_CACHED);
+ }
+ } else {
+ page = ERR_PTR(-ENOMEM);
+ }
+ RETURN(page);
+}
+
+/**
+ * Returns a cl_page with index \a idx at the object \a o, and associated with
+ * the VM page \a vmpage.
+ *
+ * This is the main entry point into the cl_page caching interface. First, a
+ * cache (implemented as a per-object radix tree) is consulted. If page is
+ * found there, it is returned immediately. Otherwise new page is allocated
+ * and returned. In any case, additional reference to page is acquired.
+ *
+ * \see cl_object_find(), cl_lock_find()
+ */
+static struct cl_page *cl_page_find0(const struct lu_env *env,
+ struct cl_object *o,
+ pgoff_t idx, struct page *vmpage,
+ enum cl_page_type type,
+ struct cl_page *parent)
+{
+ struct cl_page *page = NULL;
+ struct cl_page *ghost = NULL;
+ struct cl_object_header *hdr;
+ int err;
+
+ LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
+ might_sleep();
+
+ ENTRY;
+
+ hdr = cl_object_header(o);
+ CS_PAGE_INC(o, lookup);
+
+ CDEBUG(D_PAGE, "%lu@"DFID" %p %lx %d\n",
+ idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
+ /* fast path. */
+ if (type == CPT_CACHEABLE) {
+ /* vmpage lock is used to protect the child/parent
+ * relationship */
+ KLASSERT(PageLocked(vmpage));
+ /*
+ * cl_vmpage_page() can be called here without any locks as
+ *
+ * - "vmpage" is locked (which prevents ->private from
+ * concurrent updates), and
+ *
+ * - "o" cannot be destroyed while current thread holds a
+ * reference on it.
+ */
+ page = cl_vmpage_page(vmpage, o);
+ PINVRNT(env, page,
+ ergo(page != NULL,
+ cl_page_vmpage(env, page) == vmpage &&
+ (void *)radix_tree_lookup(&hdr->coh_tree,
+ idx) == page));
+ }
+
+ if (page != NULL) {
+ CS_PAGE_INC(o, hit);
+ RETURN(page);
+ }
+
+ /* allocate and initialize cl_page */
+ page = cl_page_alloc(env, o, idx, vmpage, type);
+ if (IS_ERR(page))
+ RETURN(page);
+
+ if (type == CPT_TRANSIENT) {
+ if (parent) {
+ LASSERT(page->cp_parent == NULL);
+ page->cp_parent = parent;
+ parent->cp_child = page;
+ }
+ RETURN(page);
+ }
+
+ /*
+ * XXX optimization: use radix_tree_preload() here, and change tree
+ * gfp mask to GFP_KERNEL in cl_object_header_init().
+ */
+ spin_lock(&hdr->coh_page_guard);
+ err = radix_tree_insert(&hdr->coh_tree, idx, page);
+ if (err != 0) {
+ ghost = page;
+ /*
+ * Noted by Jay: a lock on \a vmpage protects cl_page_find()
+ * from this race, but
+ *
+ * 0. it's better to have cl_page interface "locally
+ * consistent" so that its correctness can be reasoned
+ * about without appealing to the (obscure world of) VM
+ * locking.
+ *
+ * 1. handling this race allows ->coh_tree to remain
+ * consistent even when VM locking is somehow busted,
+ * which is very useful during diagnosing and debugging.
+ */
+ page = ERR_PTR(err);
+ CL_PAGE_DEBUG(D_ERROR, env, ghost,
+ "fail to insert into radix tree: %d\n", err);
+ } else {
+ if (parent) {
+ LASSERT(page->cp_parent == NULL);
+ page->cp_parent = parent;
+ parent->cp_child = page;
+ }
+ hdr->coh_pages++;
+ }
+ spin_unlock(&hdr->coh_page_guard);
+
+ if (unlikely(ghost != NULL)) {
+ cl_page_delete0(env, ghost, 0);
+ cl_page_free(env, ghost);
+ }
+ RETURN(page);
+}
+
+struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *o,
+ pgoff_t idx, struct page *vmpage,
+ enum cl_page_type type)
+{
+ return cl_page_find0(env, o, idx, vmpage, type, NULL);
+}
+EXPORT_SYMBOL(cl_page_find);
+
+
+struct cl_page *cl_page_find_sub(const struct lu_env *env, struct cl_object *o,
+ pgoff_t idx, struct page *vmpage,
+ struct cl_page *parent)
+{
+ return cl_page_find0(env, o, idx, vmpage, parent->cp_type, parent);
+}
+EXPORT_SYMBOL(cl_page_find_sub);
+
+static inline int cl_page_invariant(const struct cl_page *pg)
+{
+ struct cl_object_header *header;
+ struct cl_page *parent;
+ struct cl_page *child;
+ struct cl_io *owner;
+
+ /*
+ * Page invariant is protected by a VM lock.
+ */
+ LINVRNT(cl_page_is_vmlocked(NULL, pg));
+
+ header = cl_object_header(pg->cp_obj);
+ parent = pg->cp_parent;
+ child = pg->cp_child;
+ owner = pg->cp_owner;
+
+ return cl_page_in_use(pg) &&
+ ergo(parent != NULL, parent->cp_child == pg) &&
+ ergo(child != NULL, child->cp_parent == pg) &&
+ ergo(child != NULL, pg->cp_obj != child->cp_obj) &&
+ ergo(parent != NULL, pg->cp_obj != parent->cp_obj) &&
+ ergo(owner != NULL && parent != NULL,
+ parent->cp_owner == pg->cp_owner->ci_parent) &&
+ ergo(owner != NULL && child != NULL,
+ child->cp_owner->ci_parent == owner) &&
+ /*
+ * Either page is early in initialization (has neither child
+ * nor parent yet), or it is in the object radix tree.
+ */
+ ergo(pg->cp_state < CPS_FREEING && pg->cp_type == CPT_CACHEABLE,
+ (void *)radix_tree_lookup(&header->coh_tree,
+ pg->cp_index) == pg ||
+ (child == NULL && parent == NULL));
+}
+
+static void cl_page_state_set0(const struct lu_env *env,
+ struct cl_page *page, enum cl_page_state state)
+{
+ enum cl_page_state old;
+
+ /*
+ * Matrix of allowed state transitions [old][new], for sanity
+ * checking.
+ */
+ static const int allowed_transitions[CPS_NR][CPS_NR] = {
+ [CPS_CACHED] = {
+ [CPS_CACHED] = 0,
+ [CPS_OWNED] = 1, /* io finds existing cached page */
+ [CPS_PAGEIN] = 0,
+ [CPS_PAGEOUT] = 1, /* write-out from the cache */
+ [CPS_FREEING] = 1, /* eviction on the memory pressure */
+ },
+ [CPS_OWNED] = {
+ [CPS_CACHED] = 1, /* release to the cache */
+ [CPS_OWNED] = 0,
+ [CPS_PAGEIN] = 1, /* start read immediately */
+ [CPS_PAGEOUT] = 1, /* start write immediately */
+ [CPS_FREEING] = 1, /* lock invalidation or truncate */
+ },
+ [CPS_PAGEIN] = {
+ [CPS_CACHED] = 1, /* io completion */
+ [CPS_OWNED] = 0,
+ [CPS_PAGEIN] = 0,
+ [CPS_PAGEOUT] = 0,
+ [CPS_FREEING] = 0,
+ },
+ [CPS_PAGEOUT] = {
+ [CPS_CACHED] = 1, /* io completion */
+ [CPS_OWNED] = 0,
+ [CPS_PAGEIN] = 0,
+ [CPS_PAGEOUT] = 0,
+ [CPS_FREEING] = 0,
+ },
+ [CPS_FREEING] = {
+ [CPS_CACHED] = 0,
+ [CPS_OWNED] = 0,
+ [CPS_PAGEIN] = 0,
+ [CPS_PAGEOUT] = 0,
+ [CPS_FREEING] = 0,
+ }
+ };
+
+ ENTRY;
+ old = page->cp_state;
+ PASSERT(env, page, allowed_transitions[old][state]);
+ CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
+ for (; page != NULL; page = page->cp_child) {
+ PASSERT(env, page, page->cp_state == old);
+ PASSERT(env, page,
+ equi(state == CPS_OWNED, page->cp_owner != NULL));
+
+ CS_PAGESTATE_DEC(page->cp_obj, page->cp_state);
+ CS_PAGESTATE_INC(page->cp_obj, state);
+ cl_page_state_set_trust(page, state);
+ }
+ EXIT;
+}
+
+static void cl_page_state_set(const struct lu_env *env,
+ struct cl_page *page, enum cl_page_state state)
+{
+ cl_page_state_set0(env, page, state);
+}
+
+/**
+ * Acquires an additional reference to a page.
+ *
+ * This can be called only by caller already possessing a reference to \a
+ * page.
+ *
+ * \see cl_object_get(), cl_lock_get().
+ */
+void cl_page_get(struct cl_page *page)
+{
+ ENTRY;
+ cl_page_get_trust(page);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_get);
+
+/**
+ * Releases a reference to a page.
+ *
+ * When last reference is released, page is returned to the cache, unless it
+ * is in cl_page_state::CPS_FREEING state, in which case it is immediately
+ * destroyed.
+ *
+ * \see cl_object_put(), cl_lock_put().
+ */
+void cl_page_put(const struct lu_env *env, struct cl_page *page)
+{
+ PASSERT(env, page, atomic_read(&page->cp_ref) > !!page->cp_parent);
+
+ ENTRY;
+ CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
+ atomic_read(&page->cp_ref));
+
+ if (atomic_dec_and_test(&page->cp_ref)) {
+ LASSERT(page->cp_state == CPS_FREEING);
+
+ LASSERT(atomic_read(&page->cp_ref) == 0);
+ PASSERT(env, page, page->cp_owner == NULL);
+ PASSERT(env, page, list_empty(&page->cp_batch));
+ /*
+ * Page is no longer reachable by other threads. Tear
+ * it down.
+ */
+ cl_page_free(env, page);
+ }
+
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_put);
+
+/**
+ * Returns a VM page associated with a given cl_page.
+ */
+struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page)
+{
+ const struct cl_page_slice *slice;
+
+ /*
+ * Find uppermost layer with ->cpo_vmpage() method, and return its
+ * result.
+ */
+ page = cl_page_top(page);
+ do {
+ list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
+ if (slice->cpl_ops->cpo_vmpage != NULL)
+ RETURN(slice->cpl_ops->cpo_vmpage(env, slice));
+ }
+ page = page->cp_child;
+ } while (page != NULL);
+ LBUG(); /* ->cpo_vmpage() has to be defined somewhere in the stack */
+}
+EXPORT_SYMBOL(cl_page_vmpage);
+
+/**
+ * Returns a cl_page associated with a VM page, and given cl_object.
+ */
+struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
+{
+ struct cl_page *top;
+ struct cl_page *page;
+
+ ENTRY;
+ KLASSERT(PageLocked(vmpage));
+
+ /*
+ * NOTE: absence of races and liveness of data are guaranteed by page
+ * lock on a "vmpage". That works because object destruction has
+ * bottom-to-top pass.
+ */
+
+ /*
+ * This loop assumes that ->private points to the top-most page. This
+ * can be rectified easily.
+ */
+ top = (struct cl_page *)vmpage->private;
+ if (top == NULL)
+ RETURN(NULL);
+
+ for (page = top; page != NULL; page = page->cp_child) {
+ if (cl_object_same(page->cp_obj, obj)) {
+ cl_page_get_trust(page);
+ break;
+ }
+ }
+ LASSERT(ergo(page, page->cp_type == CPT_CACHEABLE));
+ RETURN(page);
+}
+EXPORT_SYMBOL(cl_vmpage_page);
+
+/**
+ * Returns the top-page for a given page.
+ *
+ * \see cl_object_top(), cl_io_top()
+ */
+struct cl_page *cl_page_top(struct cl_page *page)
+{
+ return cl_page_top_trusted(page);
+}
+EXPORT_SYMBOL(cl_page_top);
+
+const struct cl_page_slice *cl_page_at(const struct cl_page *page,
+ const struct lu_device_type *dtype)
+{
+ return cl_page_at_trusted(page, dtype);
+}
+EXPORT_SYMBOL(cl_page_at);
+
+#define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname)
+
+#define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...) \
+({ \
+ const struct lu_env *__env = (_env); \
+ struct cl_page *__page = (_page); \
+ const struct cl_page_slice *__scan; \
+ int __result; \
+ ptrdiff_t __op = (_op); \
+ int (*__method)_proto; \
+ \
+ __result = 0; \
+ __page = cl_page_top(__page); \
+ do { \
+ list_for_each_entry(__scan, &__page->cp_layers, \
+ cpl_linkage) { \
+ __method = *(void **)((char *)__scan->cpl_ops + \
+ __op); \
+ if (__method != NULL) { \
+ __result = (*__method)(__env, __scan, \
+ ## __VA_ARGS__); \
+ if (__result != 0) \
+ break; \
+ } \
+ } \
+ __page = __page->cp_child; \
+ } while (__page != NULL && __result == 0); \
+ if (__result > 0) \
+ __result = 0; \
+ __result; \
+})
+
+#define CL_PAGE_INVOID(_env, _page, _op, _proto, ...) \
+do { \
+ const struct lu_env *__env = (_env); \
+ struct cl_page *__page = (_page); \
+ const struct cl_page_slice *__scan; \
+ ptrdiff_t __op = (_op); \
+ void (*__method)_proto; \
+ \
+ __page = cl_page_top(__page); \
+ do { \
+ list_for_each_entry(__scan, &__page->cp_layers, \
+ cpl_linkage) { \
+ __method = *(void **)((char *)__scan->cpl_ops + \
+ __op); \
+ if (__method != NULL) \
+ (*__method)(__env, __scan, \
+ ## __VA_ARGS__); \
+ } \
+ __page = __page->cp_child; \
+ } while (__page != NULL); \
+} while (0)
+
+#define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...) \
+do { \
+ const struct lu_env *__env = (_env); \
+ struct cl_page *__page = (_page); \
+ const struct cl_page_slice *__scan; \
+ ptrdiff_t __op = (_op); \
+ void (*__method)_proto; \
+ \
+ /* get to the bottom page. */ \
+ while (__page->cp_child != NULL) \
+ __page = __page->cp_child; \
+ do { \
+ list_for_each_entry_reverse(__scan, &__page->cp_layers, \
+ cpl_linkage) { \
+ __method = *(void **)((char *)__scan->cpl_ops + \
+ __op); \
+ if (__method != NULL) \
+ (*__method)(__env, __scan, \
+ ## __VA_ARGS__); \
+ } \
+ __page = __page->cp_parent; \
+ } while (__page != NULL); \
+} while (0)
+
+static int cl_page_invoke(const struct lu_env *env,
+ struct cl_io *io, struct cl_page *page, ptrdiff_t op)
+
+{
+ PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
+ ENTRY;
+ RETURN(CL_PAGE_INVOKE(env, page, op,
+ (const struct lu_env *,
+ const struct cl_page_slice *, struct cl_io *),
+ io));
+}
+
+static void cl_page_invoid(const struct lu_env *env,
+ struct cl_io *io, struct cl_page *page, ptrdiff_t op)
+
+{
+ PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
+ ENTRY;
+ CL_PAGE_INVOID(env, page, op,
+ (const struct lu_env *,
+ const struct cl_page_slice *, struct cl_io *), io);
+ EXIT;
+}
+
+static void cl_page_owner_clear(struct cl_page *page)
+{
+ ENTRY;
+ for (page = cl_page_top(page); page != NULL; page = page->cp_child) {
+ if (page->cp_owner != NULL) {
+ LASSERT(page->cp_owner->ci_owned_nr > 0);
+ page->cp_owner->ci_owned_nr--;
+ page->cp_owner = NULL;
+ page->cp_task = NULL;
+ }
+ }
+ EXIT;
+}
+
+static void cl_page_owner_set(struct cl_page *page)
+{
+ ENTRY;
+ for (page = cl_page_top(page); page != NULL; page = page->cp_child) {
+ LASSERT(page->cp_owner != NULL);
+ page->cp_owner->ci_owned_nr++;
+ }
+ EXIT;
+}
+
+void cl_page_disown0(const struct lu_env *env,
+ struct cl_io *io, struct cl_page *pg)
+{
+ enum cl_page_state state;
+
+ ENTRY;
+ state = pg->cp_state;
+ PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
+ PINVRNT(env, pg, cl_page_invariant(pg));
+ cl_page_owner_clear(pg);
+
+ if (state == CPS_OWNED)
+ cl_page_state_set(env, pg, CPS_CACHED);
+ /*
+ * Completion call-backs are executed in the bottom-up order, so that
+ * uppermost layer (llite), responsible for VFS/VM interaction runs
+ * last and can release locks safely.
+ */
+ CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_disown),
+ (const struct lu_env *,
+ const struct cl_page_slice *, struct cl_io *),
+ io);
+ EXIT;
+}
+
+/**
+ * returns true, iff page is owned by the given io.
+ */
+int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
+{
+ LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
+ ENTRY;
+ RETURN(pg->cp_state == CPS_OWNED && pg->cp_owner == io);
+}
+EXPORT_SYMBOL(cl_page_is_owned);
+
+/**
+ * Try to own a page by IO.
+ *
+ * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
+ * into cl_page_state::CPS_OWNED state.
+ *
+ * \pre !cl_page_is_owned(pg, io)
+ * \post result == 0 iff cl_page_is_owned(pg, io)
+ *
+ * \retval 0 success
+ *
+ * \retval -ve failure, e.g., page was destroyed (and landed in
+ * cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
+ * or, page was owned by another thread, or in IO.
+ *
+ * \see cl_page_disown()
+ * \see cl_page_operations::cpo_own()
+ * \see cl_page_own_try()
+ * \see cl_page_own
+ */
+static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg, int nonblock)
+{
+ int result;
+
+ PINVRNT(env, pg, !cl_page_is_owned(pg, io));
+
+ ENTRY;
+ pg = cl_page_top(pg);
+ io = cl_io_top(io);
+
+ if (pg->cp_state == CPS_FREEING) {
+ result = -ENOENT;
+ } else {
+ result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own),
+ (const struct lu_env *,
+ const struct cl_page_slice *,
+ struct cl_io *, int),
+ io, nonblock);
+ if (result == 0) {
+ PASSERT(env, pg, pg->cp_owner == NULL);
+ PASSERT(env, pg, pg->cp_req == NULL);
+ pg->cp_owner = io;
+ pg->cp_task = current;
+ cl_page_owner_set(pg);
+ if (pg->cp_state != CPS_FREEING) {
+ cl_page_state_set(env, pg, CPS_OWNED);
+ } else {
+ cl_page_disown0(env, io, pg);
+ result = -ENOENT;
+ }
+ }
+ }
+ PINVRNT(env, pg, ergo(result == 0, cl_page_invariant(pg)));
+ RETURN(result);
+}
+
+/**
+ * Own a page, might be blocked.
+ *
+ * \see cl_page_own0()
+ */
+int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg)
+{
+ return cl_page_own0(env, io, pg, 0);
+}
+EXPORT_SYMBOL(cl_page_own);
+
+/**
+ * Nonblock version of cl_page_own().
+ *
+ * \see cl_page_own0()
+ */
+int cl_page_own_try(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg)
+{
+ return cl_page_own0(env, io, pg, 1);
+}
+EXPORT_SYMBOL(cl_page_own_try);
+
+
+/**
+ * Assume page ownership.
+ *
+ * Called when page is already locked by the hosting VM.
+ *
+ * \pre !cl_page_is_owned(pg, io)
+ * \post cl_page_is_owned(pg, io)
+ *
+ * \see cl_page_operations::cpo_assume()
+ */
+void cl_page_assume(const struct lu_env *env,
+ struct cl_io *io, struct cl_page *pg)
+{
+ PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
+
+ ENTRY;
+ pg = cl_page_top(pg);
+ io = cl_io_top(io);
+
+ cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
+ PASSERT(env, pg, pg->cp_owner == NULL);
+ pg->cp_owner = io;
+ pg->cp_task = current;
+ cl_page_owner_set(pg);
+ cl_page_state_set(env, pg, CPS_OWNED);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_assume);
+
+/**
+ * Releases page ownership without unlocking the page.
+ *
+ * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the
+ * underlying VM page (as VM is supposed to do this itself).
+ *
+ * \pre cl_page_is_owned(pg, io)
+ * \post !cl_page_is_owned(pg, io)
+ *
+ * \see cl_page_assume()
+ */
+void cl_page_unassume(const struct lu_env *env,
+ struct cl_io *io, struct cl_page *pg)
+{
+ PINVRNT(env, pg, cl_page_is_owned(pg, io));
+ PINVRNT(env, pg, cl_page_invariant(pg));
+
+ ENTRY;
+ pg = cl_page_top(pg);
+ io = cl_io_top(io);
+ cl_page_owner_clear(pg);
+ cl_page_state_set(env, pg, CPS_CACHED);
+ CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_unassume),
+ (const struct lu_env *,
+ const struct cl_page_slice *, struct cl_io *),
+ io);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_unassume);
+
+/**
+ * Releases page ownership.
+ *
+ * Moves page into cl_page_state::CPS_CACHED.
+ *
+ * \pre cl_page_is_owned(pg, io)
+ * \post !cl_page_is_owned(pg, io)
+ *
+ * \see cl_page_own()
+ * \see cl_page_operations::cpo_disown()
+ */
+void cl_page_disown(const struct lu_env *env,
+ struct cl_io *io, struct cl_page *pg)
+{
+ PINVRNT(env, pg, cl_page_is_owned(pg, io));
+
+ ENTRY;
+ pg = cl_page_top(pg);
+ io = cl_io_top(io);
+ cl_page_disown0(env, io, pg);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_disown);
+
+/**
+ * Called when page is to be removed from the object, e.g., as a result of
+ * truncate.
+ *
+ * Calls cl_page_operations::cpo_discard() top-to-bottom.
+ *
+ * \pre cl_page_is_owned(pg, io)
+ *
+ * \see cl_page_operations::cpo_discard()
+ */
+void cl_page_discard(const struct lu_env *env,
+ struct cl_io *io, struct cl_page *pg)
+{
+ PINVRNT(env, pg, cl_page_is_owned(pg, io));
+ PINVRNT(env, pg, cl_page_invariant(pg));
+
+ cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_discard));
+}
+EXPORT_SYMBOL(cl_page_discard);
+
+/**
+ * Version of cl_page_delete() that can be called for not fully constructed
+ * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
+ * path. Doesn't check page invariant.
+ */
+static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
+ int radix)
+{
+ struct cl_page *tmp = pg;
+ ENTRY;
+
+ PASSERT(env, pg, pg == cl_page_top(pg));
+ PASSERT(env, pg, pg->cp_state != CPS_FREEING);
+
+ /*
+ * Severe all ways to obtain new pointers to @pg.
+ */
+ cl_page_owner_clear(pg);
+
+ /*
+ * unexport the page firstly before freeing it so that
+ * the page content is considered to be invalid.
+ * We have to do this because a CPS_FREEING cl_page may
+ * be NOT under the protection of a cl_lock.
+ * Afterwards, if this page is found by other threads, then this
+ * page will be forced to reread.
+ */
+ cl_page_export(env, pg, 0);
+ cl_page_state_set0(env, pg, CPS_FREEING);
+
+ CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete),
+ (const struct lu_env *, const struct cl_page_slice *));
+
+ if (tmp->cp_type == CPT_CACHEABLE) {
+ if (!radix)
+ /* !radix means that @pg is not yet in the radix tree,
+ * skip removing it.
+ */
+ tmp = pg->cp_child;
+ for (; tmp != NULL; tmp = tmp->cp_child) {
+ void *value;
+ struct cl_object_header *hdr;
+
+ hdr = cl_object_header(tmp->cp_obj);
+ spin_lock(&hdr->coh_page_guard);
+ value = radix_tree_delete(&hdr->coh_tree,
+ tmp->cp_index);
+ PASSERT(env, tmp, value == tmp);
+ PASSERT(env, tmp, hdr->coh_pages > 0);
+ hdr->coh_pages--;
+ spin_unlock(&hdr->coh_page_guard);
+ cl_page_put(env, tmp);
+ }
+ }
+
+ EXIT;
+}
+
+/**
+ * Called when a decision is made to throw page out of memory.
+ *
+ * Notifies all layers about page destruction by calling
+ * cl_page_operations::cpo_delete() method top-to-bottom.
+ *
+ * Moves page into cl_page_state::CPS_FREEING state (this is the only place
+ * where transition to this state happens).
+ *
+ * Eliminates all venues through which new references to the page can be
+ * obtained:
+ *
+ * - removes page from the radix trees,
+ *
+ * - breaks linkage from VM page to cl_page.
+ *
+ * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
+ * drain after some time, at which point page will be recycled.
+ *
+ * \pre pg == cl_page_top(pg)
+ * \pre VM page is locked
+ * \post pg->cp_state == CPS_FREEING
+ *
+ * \see cl_page_operations::cpo_delete()
+ */
+void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
+{
+ PINVRNT(env, pg, cl_page_invariant(pg));
+ ENTRY;
+ cl_page_delete0(env, pg, 1);
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_delete);
+
+/**
+ * Unmaps page from user virtual memory.
+ *
+ * Calls cl_page_operations::cpo_unmap() through all layers top-to-bottom. The
+ * layer responsible for VM interaction has to unmap page from user space
+ * virtual memory.
+ *
+ * \see cl_page_operations::cpo_unmap()
+ */
+int cl_page_unmap(const struct lu_env *env,
+ struct cl_io *io, struct cl_page *pg)
+{
+ PINVRNT(env, pg, cl_page_is_owned(pg, io));
+ PINVRNT(env, pg, cl_page_invariant(pg));
+
+ return cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_unmap));
+}
+EXPORT_SYMBOL(cl_page_unmap);
+
+/**
+ * Marks page up-to-date.
+ *
+ * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
+ * layer responsible for VM interaction has to mark/clear page as up-to-date
+ * by the \a uptodate argument.
+ *
+ * \see cl_page_operations::cpo_export()
+ */
+void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate)
+{
+ PINVRNT(env, pg, cl_page_invariant(pg));
+ CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_export),
+ (const struct lu_env *,
+ const struct cl_page_slice *, int), uptodate);
+}
+EXPORT_SYMBOL(cl_page_export);
+
+/**
+ * Returns true, iff \a pg is VM locked in a suitable sense by the calling
+ * thread.
+ */
+int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
+{
+ int result;
+ const struct cl_page_slice *slice;
+
+ ENTRY;
+ pg = cl_page_top_trusted((struct cl_page *)pg);
+ slice = container_of(pg->cp_layers.next,
+ const struct cl_page_slice, cpl_linkage);
+ PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked != NULL);
+ /*
+ * Call ->cpo_is_vmlocked() directly instead of going through
+ * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by
+ * cl_page_invariant().
+ */
+ result = slice->cpl_ops->cpo_is_vmlocked(env, slice);
+ PASSERT(env, pg, result == -EBUSY || result == -ENODATA);
+ RETURN(result == -EBUSY);
+}
+EXPORT_SYMBOL(cl_page_is_vmlocked);
+
+static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
+{
+ ENTRY;
+ RETURN(crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN);
+}
+
+static void cl_page_io_start(const struct lu_env *env,
+ struct cl_page *pg, enum cl_req_type crt)
+{
+ /*
+ * Page is queued for IO, change its state.
+ */
+ ENTRY;
+ cl_page_owner_clear(pg);
+ cl_page_state_set(env, pg, cl_req_type_state(crt));
+ EXIT;
+}
+
+/**
+ * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is
+ * called top-to-bottom. Every layer either agrees to submit this page (by
+ * returning 0), or requests to omit this page (by returning -EALREADY). Layer
+ * handling interactions with the VM also has to inform VM that page is under
+ * transfer now.
+ */
+int cl_page_prep(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg, enum cl_req_type crt)
+{
+ int result;
+
+ PINVRNT(env, pg, cl_page_is_owned(pg, io));
+ PINVRNT(env, pg, cl_page_invariant(pg));
+ PINVRNT(env, pg, crt < CRT_NR);
+
+ /*
+ * XXX this has to be called bottom-to-top, so that llite can set up
+ * PG_writeback without risking other layers deciding to skip this
+ * page.
+ */
+ if (crt >= CRT_NR)
+ return -EINVAL;
+ result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep));
+ if (result == 0)
+ cl_page_io_start(env, pg, crt);
+
+ KLASSERT(ergo(crt == CRT_WRITE && pg->cp_type == CPT_CACHEABLE,
+ equi(result == 0,
+ PageWriteback(cl_page_vmpage(env, pg)))));
+ CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
+ return result;
+}
+EXPORT_SYMBOL(cl_page_prep);
+
+/**
+ * Notify layers about transfer completion.
+ *
+ * Invoked by transfer sub-system (which is a part of osc) to notify layers
+ * that a transfer, of which this page is a part of has completed.
+ *
+ * Completion call-backs are executed in the bottom-up order, so that
+ * uppermost layer (llite), responsible for the VFS/VM interaction runs last
+ * and can release locks safely.
+ *
+ * \pre pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
+ * \post pg->cp_state == CPS_CACHED
+ *
+ * \see cl_page_operations::cpo_completion()
+ */
+void cl_page_completion(const struct lu_env *env,
+ struct cl_page *pg, enum cl_req_type crt, int ioret)
+{
+ struct cl_sync_io *anchor = pg->cp_sync_io;
+
+ PASSERT(env, pg, crt < CRT_NR);
+ /* cl_page::cp_req already cleared by the caller (osc_completion()) */
+ PASSERT(env, pg, pg->cp_req == NULL);
+ PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
+
+ ENTRY;
+ CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
+ if (crt == CRT_READ && ioret == 0) {
+ PASSERT(env, pg, !(pg->cp_flags & CPF_READ_COMPLETED));
+ pg->cp_flags |= CPF_READ_COMPLETED;
+ }
+
+ cl_page_state_set(env, pg, CPS_CACHED);
+ if (crt >= CRT_NR)
+ return;
+ CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion),
+ (const struct lu_env *,
+ const struct cl_page_slice *, int), ioret);
+ if (anchor) {
+ LASSERT(cl_page_is_vmlocked(env, pg));
+ LASSERT(pg->cp_sync_io == anchor);
+ pg->cp_sync_io = NULL;
+ }
+ /*
+ * As page->cp_obj is pinned by a reference from page->cp_req, it is
+ * safe to call cl_page_put() without risking object destruction in a
+ * non-blocking context.
+ */
+ cl_page_put(env, pg);
+
+ if (anchor)
+ cl_sync_io_note(anchor, ioret);
+
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_completion);
+
+/**
+ * Notify layers that transfer formation engine decided to yank this page from
+ * the cache and to make it a part of a transfer.
+ *
+ * \pre pg->cp_state == CPS_CACHED
+ * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
+ *
+ * \see cl_page_operations::cpo_make_ready()
+ */
+int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
+ enum cl_req_type crt)
+{
+ int result;
+
+ PINVRNT(env, pg, crt < CRT_NR);
+
+ ENTRY;
+ if (crt >= CRT_NR)
+ RETURN(-EINVAL);
+ result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready),
+ (const struct lu_env *,
+ const struct cl_page_slice *));
+ if (result == 0) {
+ PASSERT(env, pg, pg->cp_state == CPS_CACHED);
+ cl_page_io_start(env, pg, crt);
+ }
+ CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_page_make_ready);
+
+/**
+ * Notify layers that high level io decided to place this page into a cache
+ * for future transfer.
+ *
+ * The layer implementing transfer engine (osc) has to register this page in
+ * its queues.
+ *
+ * \pre cl_page_is_owned(pg, io)
+ * \post cl_page_is_owned(pg, io)
+ *
+ * \see cl_page_operations::cpo_cache_add()
+ */
+int cl_page_cache_add(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg, enum cl_req_type crt)
+{
+ const struct cl_page_slice *scan;
+ int result = 0;
+
+ PINVRNT(env, pg, crt < CRT_NR);
+ PINVRNT(env, pg, cl_page_is_owned(pg, io));
+ PINVRNT(env, pg, cl_page_invariant(pg));
+
+ ENTRY;
+
+ if (crt >= CRT_NR)
+ RETURN(-EINVAL);
+
+ list_for_each_entry(scan, &pg->cp_layers, cpl_linkage) {
+ if (scan->cpl_ops->io[crt].cpo_cache_add == NULL)
+ continue;
+
+ result = scan->cpl_ops->io[crt].cpo_cache_add(env, scan, io);
+ if (result != 0)
+ break;
+ }
+ CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_page_cache_add);
+
+/**
+ * Called if a pge is being written back by kernel's intention.
+ *
+ * \pre cl_page_is_owned(pg, io)
+ * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
+ *
+ * \see cl_page_operations::cpo_flush()
+ */
+int cl_page_flush(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg)
+{
+ int result;
+
+ PINVRNT(env, pg, cl_page_is_owned(pg, io));
+ PINVRNT(env, pg, cl_page_invariant(pg));
+
+ ENTRY;
+
+ result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
+
+ CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_page_flush);
+
+/**
+ * Checks whether page is protected by any extent lock is at least required
+ * mode.
+ *
+ * \return the same as in cl_page_operations::cpo_is_under_lock() method.
+ * \see cl_page_operations::cpo_is_under_lock()
+ */
+int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page)
+{
+ int rc;
+
+ PINVRNT(env, page, cl_page_invariant(page));
+
+ ENTRY;
+ rc = CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_is_under_lock),
+ (const struct lu_env *,
+ const struct cl_page_slice *, struct cl_io *),
+ io);
+ PASSERT(env, page, rc != 0);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(cl_page_is_under_lock);
+
+static int page_prune_cb(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, void *cbdata)
+{
+ cl_page_own(env, io, page);
+ cl_page_unmap(env, io, page);
+ cl_page_discard(env, io, page);
+ cl_page_disown(env, io, page);
+ return CLP_GANG_OKAY;
+}
+
+/**
+ * Purges all cached pages belonging to the object \a obj.
+ */
+int cl_pages_prune(const struct lu_env *env, struct cl_object *clobj)
+{
+ struct cl_thread_info *info;
+ struct cl_object *obj = cl_object_top(clobj);
+ struct cl_io *io;
+ int result;
+
+ ENTRY;
+ info = cl_env_info(env);
+ io = &info->clt_io;
+
+ /*
+ * initialize the io. This is ugly since we never do IO in this
+ * function, we just make cl_page_list functions happy. -jay
+ */
+ io->ci_obj = obj;
+ io->ci_ignore_layout = 1;
+ result = cl_io_init(env, io, CIT_MISC, obj);
+ if (result != 0) {
+ cl_io_fini(env, io);
+ RETURN(io->ci_result);
+ }
+
+ do {
+ result = cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF,
+ page_prune_cb, NULL);
+ if (result == CLP_GANG_RESCHED)
+ cond_resched();
+ } while (result != CLP_GANG_OKAY);
+
+ cl_io_fini(env, io);
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_pages_prune);
+
+/**
+ * Tells transfer engine that only part of a page is to be transmitted.
+ *
+ * \see cl_page_operations::cpo_clip()
+ */
+void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
+ int from, int to)
+{
+ PINVRNT(env, pg, cl_page_invariant(pg));
+
+ CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to);
+ CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip),
+ (const struct lu_env *,
+ const struct cl_page_slice *,int, int),
+ from, to);
+}
+EXPORT_SYMBOL(cl_page_clip);
+
+/**
+ * Prints human readable representation of \a pg to the \a f.
+ */
+void cl_page_header_print(const struct lu_env *env, void *cookie,
+ lu_printer_t printer, const struct cl_page *pg)
+{
+ (*printer)(env, cookie,
+ "page@%p[%d %p:%lu ^%p_%p %d %d %d %p %p %#x]\n",
+ pg, atomic_read(&pg->cp_ref), pg->cp_obj,
+ pg->cp_index, pg->cp_parent, pg->cp_child,
+ pg->cp_state, pg->cp_error, pg->cp_type,
+ pg->cp_owner, pg->cp_req, pg->cp_flags);
+}
+EXPORT_SYMBOL(cl_page_header_print);
+
+/**
+ * Prints human readable representation of \a pg to the \a f.
+ */
+void cl_page_print(const struct lu_env *env, void *cookie,
+ lu_printer_t printer, const struct cl_page *pg)
+{
+ struct cl_page *scan;
+
+ for (scan = cl_page_top((struct cl_page *)pg);
+ scan != NULL; scan = scan->cp_child)
+ cl_page_header_print(env, cookie, printer, scan);
+ CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
+ (const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ void *cookie, lu_printer_t p), cookie, printer);
+ (*printer)(env, cookie, "end page@%p\n", pg);
+}
+EXPORT_SYMBOL(cl_page_print);
+
+/**
+ * Cancel a page which is still in a transfer.
+ */
+int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
+{
+ return CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_cancel),
+ (const struct lu_env *,
+ const struct cl_page_slice *));
+}
+EXPORT_SYMBOL(cl_page_cancel);
+
+/**
+ * Converts a byte offset within object \a obj into a page index.
+ */
+loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
+{
+ /*
+ * XXX for now.
+ */
+ return (loff_t)idx << PAGE_CACHE_SHIFT;
+}
+EXPORT_SYMBOL(cl_offset);
+
+/**
+ * Converts a page index into a byte offset within object \a obj.
+ */
+pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
+{
+ /*
+ * XXX for now.
+ */
+ return offset >> PAGE_CACHE_SHIFT;
+}
+EXPORT_SYMBOL(cl_index);
+
+int cl_page_size(const struct cl_object *obj)
+{
+ return 1 << PAGE_CACHE_SHIFT;
+}
+EXPORT_SYMBOL(cl_page_size);
+
+/**
+ * Adds page slice to the compound page.
+ *
+ * This is called by cl_object_operations::coo_page_init() methods to add a
+ * per-layer state to the page. New state is added at the end of
+ * cl_page::cp_layers list, that is, it is at the bottom of the stack.
+ *
+ * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
+ */
+void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
+ struct cl_object *obj,
+ const struct cl_page_operations *ops)
+{
+ ENTRY;
+ list_add_tail(&slice->cpl_linkage, &page->cp_layers);
+ slice->cpl_obj = obj;
+ slice->cpl_ops = ops;
+ slice->cpl_page = page;
+ EXIT;
+}
+EXPORT_SYMBOL(cl_page_slice_add);
+
+int cl_page_init(void)
+{
+ return 0;
+}
+
+void cl_page_fini(void)
+{
+}
diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c
new file mode 100644
index 000000000000..af1c2d09c47b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c
@@ -0,0 +1,689 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+# include <asm/atomic.h>
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <linux/lnet/lnetctl.h>
+#include <lustre_debug.h>
+#include <lprocfs_status.h>
+#include <lustre/lustre_build_version.h>
+#include <linux/list.h>
+#include <cl_object.h>
+#include "llog_internal.h"
+
+
+struct obd_device *obd_devs[MAX_OBD_DEVICES];
+EXPORT_SYMBOL(obd_devs);
+struct list_head obd_types;
+DEFINE_RWLOCK(obd_dev_lock);
+
+__u64 obd_max_pages = 0;
+__u64 obd_max_alloc = 0;
+DEFINE_SPINLOCK(obd_updatemax_lock);
+
+/* The following are visible and mutable through /proc/sys/lustre/. */
+unsigned int obd_alloc_fail_rate = 0;
+EXPORT_SYMBOL(obd_alloc_fail_rate);
+unsigned int obd_debug_peer_on_timeout;
+EXPORT_SYMBOL(obd_debug_peer_on_timeout);
+unsigned int obd_dump_on_timeout;
+EXPORT_SYMBOL(obd_dump_on_timeout);
+unsigned int obd_dump_on_eviction;
+EXPORT_SYMBOL(obd_dump_on_eviction);
+unsigned int obd_max_dirty_pages = 256;
+EXPORT_SYMBOL(obd_max_dirty_pages);
+atomic_t obd_dirty_pages;
+EXPORT_SYMBOL(obd_dirty_pages);
+unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */
+EXPORT_SYMBOL(obd_timeout);
+unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */
+EXPORT_SYMBOL(ldlm_timeout);
+unsigned int obd_timeout_set;
+EXPORT_SYMBOL(obd_timeout_set);
+unsigned int ldlm_timeout_set;
+EXPORT_SYMBOL(ldlm_timeout_set);
+/* Adaptive timeout defs here instead of ptlrpc module for /proc/sys/ access */
+unsigned int at_min = 0;
+EXPORT_SYMBOL(at_min);
+unsigned int at_max = 600;
+EXPORT_SYMBOL(at_max);
+unsigned int at_history = 600;
+EXPORT_SYMBOL(at_history);
+int at_early_margin = 5;
+EXPORT_SYMBOL(at_early_margin);
+int at_extra = 30;
+EXPORT_SYMBOL(at_extra);
+
+atomic_t obd_dirty_transit_pages;
+EXPORT_SYMBOL(obd_dirty_transit_pages);
+
+char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
+EXPORT_SYMBOL(obd_jobid_var);
+
+/* Get jobid of current process by reading the environment variable
+ * stored in between the "env_start" & "env_end" of task struct.
+ *
+ * TODO:
+ * It's better to cache the jobid for later use if there is any
+ * efficient way, the cl_env code probably could be reused for this
+ * purpose.
+ *
+ * If some job scheduler doesn't store jobid in the "env_start/end",
+ * then an upcall could be issued here to get the jobid by utilizing
+ * the userspace tools/api. Then, the jobid must be cached.
+ */
+int lustre_get_jobid(char *jobid)
+{
+ int jobid_len = JOBSTATS_JOBID_SIZE;
+ int rc = 0;
+ ENTRY;
+
+ memset(jobid, 0, JOBSTATS_JOBID_SIZE);
+ /* Jobstats isn't enabled */
+ if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0)
+ RETURN(0);
+
+ /* Use process name + fsuid as jobid */
+ if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
+ snprintf(jobid, JOBSTATS_JOBID_SIZE, "%s.%u",
+ current_comm(), current_fsuid());
+ RETURN(0);
+ }
+
+ rc = cfs_get_environ(obd_jobid_var, jobid, &jobid_len);
+ if (rc) {
+ if (rc == -EOVERFLOW) {
+ /* For the PBS_JOBID and LOADL_STEP_ID keys (which are
+ * variable length strings instead of just numbers), it
+ * might make sense to keep the unique parts for JobID,
+ * instead of just returning an error. That means a
+ * larger temp buffer for cfs_get_environ(), then
+ * truncating the string at some separator to fit into
+ * the specified jobid_len. Fix later if needed. */
+ static bool printed;
+ if (unlikely(!printed)) {
+ LCONSOLE_ERROR_MSG(0x16b, "%s value too large "
+ "for JobID buffer (%d)\n",
+ obd_jobid_var, jobid_len);
+ printed = true;
+ }
+ } else {
+ CDEBUG((rc == -ENOENT || rc == -EINVAL ||
+ rc == -EDEADLK) ? D_INFO : D_ERROR,
+ "Get jobid for (%s) failed: rc = %d\n",
+ obd_jobid_var, rc);
+ }
+ }
+ RETURN(rc);
+}
+EXPORT_SYMBOL(lustre_get_jobid);
+
+int obd_alloc_fail(const void *ptr, const char *name, const char *type,
+ size_t size, const char *file, int line)
+{
+ if (ptr == NULL ||
+ (cfs_rand() & OBD_ALLOC_FAIL_MASK) < obd_alloc_fail_rate) {
+ CERROR("%s%salloc of %s ("LPU64" bytes) failed at %s:%d\n",
+ ptr ? "force " :"", type, name, (__u64)size, file,
+ line);
+ CERROR(LPU64" total bytes and "LPU64" total pages "
+ "("LPU64" bytes) allocated by Lustre, "
+ "%d total bytes by LNET\n",
+ obd_memory_sum(),
+ obd_pages_sum() << PAGE_CACHE_SHIFT,
+ obd_pages_sum(),
+ atomic_read(&libcfs_kmemory));
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(obd_alloc_fail);
+
+static inline void obd_data2conn(struct lustre_handle *conn,
+ struct obd_ioctl_data *data)
+{
+ memset(conn, 0, sizeof *conn);
+ conn->cookie = data->ioc_cookie;
+}
+
+static inline void obd_conn2data(struct obd_ioctl_data *data,
+ struct lustre_handle *conn)
+{
+ data->ioc_cookie = conn->cookie;
+}
+
+int class_resolve_dev_name(__u32 len, const char *name)
+{
+ int rc;
+ int dev;
+
+ ENTRY;
+ if (!len || !name) {
+ CERROR("No name passed,!\n");
+ GOTO(out, rc = -EINVAL);
+ }
+ if (name[len - 1] != 0) {
+ CERROR("Name not nul terminated!\n");
+ GOTO(out, rc = -EINVAL);
+ }
+
+ CDEBUG(D_IOCTL, "device name %s\n", name);
+ dev = class_name2dev(name);
+ if (dev == -1) {
+ CDEBUG(D_IOCTL, "No device for name %s!\n", name);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ CDEBUG(D_IOCTL, "device name %s, dev %d\n", name, dev);
+ rc = dev;
+
+out:
+ RETURN(rc);
+}
+
+int class_handle_ioctl(unsigned int cmd, unsigned long arg)
+{
+ char *buf = NULL;
+ struct obd_ioctl_data *data;
+ struct libcfs_debug_ioctl_data *debug_data;
+ struct obd_device *obd = NULL;
+ int err = 0, len = 0;
+ ENTRY;
+
+ /* only for debugging */
+ if (cmd == LIBCFS_IOC_DEBUG_MASK) {
+ debug_data = (struct libcfs_debug_ioctl_data*)arg;
+ libcfs_subsystem_debug = debug_data->subs;
+ libcfs_debug = debug_data->debug;
+ return 0;
+ }
+
+ CDEBUG(D_IOCTL, "cmd = %x\n", cmd);
+ if (obd_ioctl_getdata(&buf, &len, (void *)arg)) {
+ CERROR("OBD ioctl: data error\n");
+ RETURN(-EINVAL);
+ }
+ data = (struct obd_ioctl_data *)buf;
+
+ switch (cmd) {
+ case OBD_IOC_PROCESS_CFG: {
+ struct lustre_cfg *lcfg;
+
+ if (!data->ioc_plen1 || !data->ioc_pbuf1) {
+ CERROR("No config buffer passed!\n");
+ GOTO(out, err = -EINVAL);
+ }
+ OBD_ALLOC(lcfg, data->ioc_plen1);
+ if (lcfg == NULL)
+ GOTO(out, err = -ENOMEM);
+ err = copy_from_user(lcfg, data->ioc_pbuf1,
+ data->ioc_plen1);
+ if (!err)
+ err = lustre_cfg_sanity_check(lcfg, data->ioc_plen1);
+ if (!err)
+ err = class_process_config(lcfg);
+
+ OBD_FREE(lcfg, data->ioc_plen1);
+ GOTO(out, err);
+ }
+
+ case OBD_GET_VERSION:
+ if (!data->ioc_inlbuf1) {
+ CERROR("No buffer passed in ioctl\n");
+ GOTO(out, err = -EINVAL);
+ }
+
+ if (strlen(BUILD_VERSION) + 1 > data->ioc_inllen1) {
+ CERROR("ioctl buffer too small to hold version\n");
+ GOTO(out, err = -EINVAL);
+ }
+
+ memcpy(data->ioc_bulk, BUILD_VERSION,
+ strlen(BUILD_VERSION) + 1);
+
+ err = obd_ioctl_popdata((void *)arg, data, len);
+ if (err)
+ err = -EFAULT;
+ GOTO(out, err);
+
+ case OBD_IOC_NAME2DEV: {
+ /* Resolve a device name. This does not change the
+ * currently selected device.
+ */
+ int dev;
+
+ dev = class_resolve_dev_name(data->ioc_inllen1,
+ data->ioc_inlbuf1);
+ data->ioc_dev = dev;
+ if (dev < 0)
+ GOTO(out, err = -EINVAL);
+
+ err = obd_ioctl_popdata((void *)arg, data, sizeof(*data));
+ if (err)
+ err = -EFAULT;
+ GOTO(out, err);
+ }
+
+ case OBD_IOC_UUID2DEV: {
+ /* Resolve a device uuid. This does not change the
+ * currently selected device.
+ */
+ int dev;
+ struct obd_uuid uuid;
+
+ if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
+ CERROR("No UUID passed!\n");
+ GOTO(out, err = -EINVAL);
+ }
+ if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) {
+ CERROR("UUID not NUL terminated!\n");
+ GOTO(out, err = -EINVAL);
+ }
+
+ CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1);
+ obd_str2uuid(&uuid, data->ioc_inlbuf1);
+ dev = class_uuid2dev(&uuid);
+ data->ioc_dev = dev;
+ if (dev == -1) {
+ CDEBUG(D_IOCTL, "No device for UUID %s!\n",
+ data->ioc_inlbuf1);
+ GOTO(out, err = -EINVAL);
+ }
+
+ CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
+ dev);
+ err = obd_ioctl_popdata((void *)arg, data, sizeof(*data));
+ if (err)
+ err = -EFAULT;
+ GOTO(out, err);
+ }
+
+ case OBD_IOC_CLOSE_UUID: {
+ CDEBUG(D_IOCTL, "closing all connections to uuid %s (NOOP)\n",
+ data->ioc_inlbuf1);
+ GOTO(out, err = 0);
+ }
+
+ case OBD_IOC_GETDEVICE: {
+ int index = data->ioc_count;
+ char *status, *str;
+
+ if (!data->ioc_inlbuf1) {
+ CERROR("No buffer passed in ioctl\n");
+ GOTO(out, err = -EINVAL);
+ }
+ if (data->ioc_inllen1 < 128) {
+ CERROR("ioctl buffer too small to hold version\n");
+ GOTO(out, err = -EINVAL);
+ }
+
+ obd = class_num2obd(index);
+ if (!obd)
+ GOTO(out, err = -ENOENT);
+
+ if (obd->obd_stopping)
+ status = "ST";
+ else if (obd->obd_set_up)
+ status = "UP";
+ else if (obd->obd_attached)
+ status = "AT";
+ else
+ status = "--";
+ str = (char *)data->ioc_bulk;
+ snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d",
+ (int)index, status, obd->obd_type->typ_name,
+ obd->obd_name, obd->obd_uuid.uuid,
+ atomic_read(&obd->obd_refcount));
+ err = obd_ioctl_popdata((void *)arg, data, len);
+
+ GOTO(out, err = 0);
+ }
+
+ }
+
+ if (data->ioc_dev == OBD_DEV_BY_DEVNAME) {
+ if (data->ioc_inllen4 <= 0 || data->ioc_inlbuf4 == NULL)
+ GOTO(out, err = -EINVAL);
+ if (strnlen(data->ioc_inlbuf4, MAX_OBD_NAME) >= MAX_OBD_NAME)
+ GOTO(out, err = -EINVAL);
+ obd = class_name2obd(data->ioc_inlbuf4);
+ } else if (data->ioc_dev < class_devno_max()) {
+ obd = class_num2obd(data->ioc_dev);
+ } else {
+ CERROR("OBD ioctl: No device\n");
+ GOTO(out, err = -EINVAL);
+ }
+
+ if (obd == NULL) {
+ CERROR("OBD ioctl : No Device %d\n", data->ioc_dev);
+ GOTO(out, err = -EINVAL);
+ }
+ LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+
+ if (!obd->obd_set_up || obd->obd_stopping) {
+ CERROR("OBD ioctl: device not setup %d \n", data->ioc_dev);
+ GOTO(out, err = -EINVAL);
+ }
+
+ switch(cmd) {
+ case OBD_IOC_NO_TRANSNO: {
+ if (!obd->obd_attached) {
+ CERROR("Device %d not attached\n", obd->obd_minor);
+ GOTO(out, err = -ENODEV);
+ }
+ CDEBUG(D_HA, "%s: disabling committed-transno notification\n",
+ obd->obd_name);
+ obd->obd_no_transno = 1;
+ GOTO(out, err = 0);
+ }
+
+ default: {
+ err = obd_iocontrol(cmd, obd->obd_self_export, len, data, NULL);
+ if (err)
+ GOTO(out, err);
+
+ err = obd_ioctl_popdata((void *)arg, data, len);
+ if (err)
+ err = -EFAULT;
+ GOTO(out, err);
+ }
+ }
+
+ out:
+ if (buf)
+ obd_ioctl_freedata(buf, len);
+ RETURN(err);
+} /* class_handle_ioctl */
+
+extern psdev_t obd_psdev;
+
+#define OBD_INIT_CHECK
+int obd_init_checks(void)
+{
+ __u64 u64val, div64val;
+ char buf[64];
+ int len, ret = 0;
+
+ CDEBUG(D_INFO, "LPU64=%s, LPD64=%s, LPX64=%s\n", LPU64, LPD64, LPX64);
+
+ CDEBUG(D_INFO, "OBD_OBJECT_EOF = "LPX64"\n", (__u64)OBD_OBJECT_EOF);
+
+ u64val = OBD_OBJECT_EOF;
+ CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = "LPX64"\n", u64val);
+ if (u64val != OBD_OBJECT_EOF) {
+ CERROR("__u64 "LPX64"(%d) != 0xffffffffffffffff\n",
+ u64val, (int)sizeof(u64val));
+ ret = -EINVAL;
+ }
+ len = snprintf(buf, sizeof(buf), LPX64, u64val);
+ if (len != 18) {
+ CWARN("LPX64 wrong length! strlen(%s)=%d != 18\n", buf, len);
+ ret = -EINVAL;
+ }
+
+ div64val = OBD_OBJECT_EOF;
+ CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = "LPX64"\n", u64val);
+ if (u64val != OBD_OBJECT_EOF) {
+ CERROR("__u64 "LPX64"(%d) != 0xffffffffffffffff\n",
+ u64val, (int)sizeof(u64val));
+ ret = -EOVERFLOW;
+ }
+ if (u64val >> 8 != OBD_OBJECT_EOF >> 8) {
+ CERROR("__u64 "LPX64"(%d) != 0xffffffffffffffff\n",
+ u64val, (int)sizeof(u64val));
+ return -EOVERFLOW;
+ }
+ if (do_div(div64val, 256) != (u64val & 255)) {
+ CERROR("do_div("LPX64",256) != "LPU64"\n", u64val, u64val &255);
+ return -EOVERFLOW;
+ }
+ if (u64val >> 8 != div64val) {
+ CERROR("do_div("LPX64",256) "LPU64" != "LPU64"\n",
+ u64val, div64val, u64val >> 8);
+ return -EOVERFLOW;
+ }
+ len = snprintf(buf, sizeof(buf), LPX64, u64val);
+ if (len != 18) {
+ CWARN("LPX64 wrong length! strlen(%s)=%d != 18\n", buf, len);
+ ret = -EINVAL;
+ }
+ len = snprintf(buf, sizeof(buf), LPU64, u64val);
+ if (len != 20) {
+ CWARN("LPU64 wrong length! strlen(%s)=%d != 20\n", buf, len);
+ ret = -EINVAL;
+ }
+ len = snprintf(buf, sizeof(buf), LPD64, u64val);
+ if (len != 2) {
+ CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len);
+ ret = -EINVAL;
+ }
+ if ((u64val & ~CFS_PAGE_MASK) >= PAGE_CACHE_SIZE) {
+ CWARN("mask failed: u64val "LPU64" >= "LPU64"\n", u64val,
+ (__u64)PAGE_CACHE_SIZE);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+extern spinlock_t obd_types_lock;
+extern int class_procfs_init(void);
+extern int class_procfs_clean(void);
+
+static int __init init_obdclass(void)
+{
+ int i, err;
+ int lustre_register_fs(void);
+
+ for (i = CAPA_SITE_CLIENT; i < CAPA_SITE_MAX; i++)
+ INIT_LIST_HEAD(&capa_list[i]);
+
+ LCONSOLE_INFO("Lustre: Build Version: "BUILD_VERSION"\n");
+
+ spin_lock_init(&obd_types_lock);
+ obd_zombie_impexp_init();
+#ifdef LPROCFS
+ obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM,
+ LPROCFS_STATS_FLAG_NONE |
+ LPROCFS_STATS_FLAG_IRQ_SAFE);
+ if (obd_memory == NULL) {
+ CERROR("kmalloc of 'obd_memory' failed\n");
+ RETURN(-ENOMEM);
+ }
+
+ lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT,
+ LPROCFS_CNTR_AVGMINMAX,
+ "memused", "bytes");
+ lprocfs_counter_init(obd_memory, OBD_MEMORY_PAGES_STAT,
+ LPROCFS_CNTR_AVGMINMAX,
+ "pagesused", "pages");
+#endif
+ err = obd_init_checks();
+ if (err == -EOVERFLOW)
+ return err;
+
+ class_init_uuidlist();
+ err = class_handle_init();
+ if (err)
+ return err;
+
+ INIT_LIST_HEAD(&obd_types);
+
+ err = misc_register(&obd_psdev);
+ if (err) {
+ CERROR("cannot register %d err %d\n", OBD_DEV_MINOR, err);
+ return err;
+ }
+
+ /* This struct is already zeroed for us (static global) */
+ for (i = 0; i < class_devno_max(); i++)
+ obd_devs[i] = NULL;
+
+ /* Default the dirty page cache cap to 1/2 of system memory.
+ * For clients with less memory, a larger fraction is needed
+ * for other purposes (mostly for BGL). */
+ if (num_physpages <= 512 << (20 - PAGE_CACHE_SHIFT))
+ obd_max_dirty_pages = num_physpages / 4;
+ else
+ obd_max_dirty_pages = num_physpages / 2;
+
+ err = obd_init_caches();
+ if (err)
+ return err;
+ err = class_procfs_init();
+ if (err)
+ return err;
+
+ err = lu_global_init();
+ if (err)
+ return err;
+
+ err = cl_global_init();
+ if (err != 0)
+ return err;
+
+
+ err = llog_info_init();
+ if (err)
+ return err;
+
+ err = lustre_register_fs();
+
+ return err;
+}
+
+void obd_update_maxusage(void)
+{
+ __u64 max1, max2;
+
+ max1 = obd_pages_sum();
+ max2 = obd_memory_sum();
+
+ spin_lock(&obd_updatemax_lock);
+ if (max1 > obd_max_pages)
+ obd_max_pages = max1;
+ if (max2 > obd_max_alloc)
+ obd_max_alloc = max2;
+ spin_unlock(&obd_updatemax_lock);
+}
+EXPORT_SYMBOL(obd_update_maxusage);
+
+#ifdef LPROCFS
+__u64 obd_memory_max(void)
+{
+ __u64 ret;
+
+ spin_lock(&obd_updatemax_lock);
+ ret = obd_max_alloc;
+ spin_unlock(&obd_updatemax_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(obd_memory_max);
+
+__u64 obd_pages_max(void)
+{
+ __u64 ret;
+
+ spin_lock(&obd_updatemax_lock);
+ ret = obd_max_pages;
+ spin_unlock(&obd_updatemax_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(obd_pages_max);
+#endif
+
+/* liblustre doesn't call cleanup_obdclass, apparently. we carry on in this
+ * ifdef to the end of the file to cover module and versioning goo.*/
+static void cleanup_obdclass(void)
+{
+ int i;
+ int lustre_unregister_fs(void);
+ __u64 memory_leaked, pages_leaked;
+ __u64 memory_max, pages_max;
+ ENTRY;
+
+ lustre_unregister_fs();
+
+ misc_deregister(&obd_psdev);
+ for (i = 0; i < class_devno_max(); i++) {
+ struct obd_device *obd = class_num2obd(i);
+ if (obd && obd->obd_set_up &&
+ OBT(obd) && OBP(obd, detach)) {
+ /* XXX should this call generic detach otherwise? */
+ LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+ OBP(obd, detach)(obd);
+ }
+ }
+ llog_info_fini();
+ cl_global_fini();
+ lu_global_fini();
+
+ obd_cleanup_caches();
+ obd_sysctl_clean();
+
+ class_procfs_clean();
+
+ class_handle_cleanup();
+ class_exit_uuidlist();
+ obd_zombie_impexp_stop();
+
+ memory_leaked = obd_memory_sum();
+ pages_leaked = obd_pages_sum();
+
+ memory_max = obd_memory_max();
+ pages_max = obd_pages_max();
+
+ lprocfs_free_stats(&obd_memory);
+ CDEBUG((memory_leaked) ? D_ERROR : D_INFO,
+ "obd_memory max: "LPU64", leaked: "LPU64"\n",
+ memory_max, memory_leaked);
+ CDEBUG((pages_leaked) ? D_ERROR : D_INFO,
+ "obd_memory_pages max: "LPU64", leaked: "LPU64"\n",
+ pages_max, pages_leaked);
+
+ EXIT;
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Class Driver Build Version: " BUILD_VERSION);
+MODULE_LICENSE("GPL");
+
+cfs_module(obdclass, LUSTRE_VERSION_STRING, init_obdclass, cleanup_obdclass);
diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c
new file mode 100644
index 000000000000..15f71bbb7276
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/debug.c
@@ -0,0 +1,124 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/debug.c
+ *
+ * Helper routines for dumping data structs for debugging.
+ */
+
+#define DEBUG_SUBSYSTEM D_OTHER
+
+
+#include <obd_ost.h>
+#include <obd_support.h>
+#include <lustre_debug.h>
+#include <lustre_net.h>
+
+void dump_lniobuf(struct niobuf_local *nb)
+{
+ CDEBUG(D_RPCTRACE,
+ "niobuf_local: file_offset="LPD64", len=%d, page=%p, rc=%d\n",
+ nb->lnb_file_offset, nb->len, nb->page, nb->rc);
+ CDEBUG(D_RPCTRACE, "nb->page: index = %ld\n",
+ nb->page ? page_index(nb->page) : -1);
+}
+EXPORT_SYMBOL(dump_lniobuf);
+
+void dump_lsm(int level, struct lov_stripe_md *lsm)
+{
+ CDEBUG(level, "lsm %p, objid "DOSTID", maxbytes "LPX64", magic 0x%08X,"
+ " stripe_size %u, stripe_count %u, refc: %d,"
+ " layout_gen %u, pool ["LOV_POOLNAMEF"]\n", lsm,
+ POSTID(&lsm->lsm_oi), lsm->lsm_maxbytes, lsm->lsm_magic,
+ lsm->lsm_stripe_size, lsm->lsm_stripe_count,
+ atomic_read(&lsm->lsm_refc), lsm->lsm_layout_gen,
+ lsm->lsm_pool_name);
+}
+EXPORT_SYMBOL(dump_lsm);
+
+#define LPDS sizeof(__u64)
+int block_debug_setup(void *addr, int len, __u64 off, __u64 id)
+{
+ LASSERT(addr);
+
+ off = cpu_to_le64 (off);
+ id = cpu_to_le64 (id);
+ memcpy(addr, (char *)&off, LPDS);
+ memcpy(addr + LPDS, (char *)&id, LPDS);
+
+ addr += len - LPDS - LPDS;
+ memcpy(addr, (char *)&off, LPDS);
+ memcpy(addr + LPDS, (char *)&id, LPDS);
+
+ return 0;
+}
+EXPORT_SYMBOL(block_debug_setup);
+
+int block_debug_check(char *who, void *addr, int end, __u64 off, __u64 id)
+{
+ __u64 ne_off;
+ int err = 0;
+
+ LASSERT(addr);
+
+ ne_off = le64_to_cpu (off);
+ id = le64_to_cpu (id);
+ if (memcmp(addr, (char *)&ne_off, LPDS)) {
+ CDEBUG(D_ERROR, "%s: id "LPX64" offset "LPU64" off: "LPX64" != "
+ LPX64"\n", who, id, off, *(__u64 *)addr, ne_off);
+ err = -EINVAL;
+ }
+ if (memcmp(addr + LPDS, (char *)&id, LPDS)) {
+ CDEBUG(D_ERROR, "%s: id "LPX64" offset "LPU64" id: "LPX64" != "LPX64"\n",
+ who, id, off, *(__u64 *)(addr + LPDS), id);
+ err = -EINVAL;
+ }
+
+ addr += end - LPDS - LPDS;
+ if (memcmp(addr, (char *)&ne_off, LPDS)) {
+ CDEBUG(D_ERROR, "%s: id "LPX64" offset "LPU64" end off: "LPX64" != "
+ LPX64"\n", who, id, off, *(__u64 *)addr, ne_off);
+ err = -EINVAL;
+ }
+ if (memcmp(addr + LPDS, (char *)&id, LPDS)) {
+ CDEBUG(D_ERROR, "%s: id "LPX64" offset "LPU64" end id: "LPX64" != "
+ LPX64"\n", who, id, off, *(__u64 *)(addr + LPDS), id);
+ err = -EINVAL;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(block_debug_check);
+#undef LPDS
diff --git a/drivers/staging/lustre/lustre/obdclass/dt_object.c b/drivers/staging/lustre/lustre/obdclass/dt_object.c
new file mode 100644
index 000000000000..1c962dd3bd2f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/dt_object.c
@@ -0,0 +1,1055 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/dt_object.c
+ *
+ * Dt Object.
+ * Generic functions from dt_object.h
+ *
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <obd.h>
+#include <dt_object.h>
+#include <linux/list.h>
+/* fid_be_to_cpu() */
+#include <lustre_fid.h>
+
+#include <lustre_quota.h>
+
+/* context key constructor/destructor: dt_global_key_init, dt_global_key_fini */
+LU_KEY_INIT(dt_global, struct dt_thread_info);
+LU_KEY_FINI(dt_global, struct dt_thread_info);
+
+struct lu_context_key dt_key = {
+ .lct_tags = LCT_MD_THREAD | LCT_DT_THREAD | LCT_MG_THREAD | LCT_LOCAL,
+ .lct_init = dt_global_key_init,
+ .lct_fini = dt_global_key_fini
+};
+EXPORT_SYMBOL(dt_key);
+
+/* no lock is necessary to protect the list, because call-backs
+ * are added during system startup. Please refer to "struct dt_device".
+ */
+void dt_txn_callback_add(struct dt_device *dev, struct dt_txn_callback *cb)
+{
+ list_add(&cb->dtc_linkage, &dev->dd_txn_callbacks);
+}
+EXPORT_SYMBOL(dt_txn_callback_add);
+
+void dt_txn_callback_del(struct dt_device *dev, struct dt_txn_callback *cb)
+{
+ list_del_init(&cb->dtc_linkage);
+}
+EXPORT_SYMBOL(dt_txn_callback_del);
+
+int dt_txn_hook_start(const struct lu_env *env,
+ struct dt_device *dev, struct thandle *th)
+{
+ int rc = 0;
+ struct dt_txn_callback *cb;
+
+ if (th->th_local)
+ return 0;
+
+ list_for_each_entry(cb, &dev->dd_txn_callbacks, dtc_linkage) {
+ if (cb->dtc_txn_start == NULL ||
+ !(cb->dtc_tag & env->le_ctx.lc_tags))
+ continue;
+ rc = cb->dtc_txn_start(env, th, cb->dtc_cookie);
+ if (rc < 0)
+ break;
+ }
+ return rc;
+}
+EXPORT_SYMBOL(dt_txn_hook_start);
+
+int dt_txn_hook_stop(const struct lu_env *env, struct thandle *txn)
+{
+ struct dt_device *dev = txn->th_dev;
+ struct dt_txn_callback *cb;
+ int rc = 0;
+
+ if (txn->th_local)
+ return 0;
+
+ list_for_each_entry(cb, &dev->dd_txn_callbacks, dtc_linkage) {
+ if (cb->dtc_txn_stop == NULL ||
+ !(cb->dtc_tag & env->le_ctx.lc_tags))
+ continue;
+ rc = cb->dtc_txn_stop(env, txn, cb->dtc_cookie);
+ if (rc < 0)
+ break;
+ }
+ return rc;
+}
+EXPORT_SYMBOL(dt_txn_hook_stop);
+
+void dt_txn_hook_commit(struct thandle *txn)
+{
+ struct dt_txn_callback *cb;
+
+ if (txn->th_local)
+ return;
+
+ list_for_each_entry(cb, &txn->th_dev->dd_txn_callbacks,
+ dtc_linkage) {
+ if (cb->dtc_txn_commit)
+ cb->dtc_txn_commit(txn, cb->dtc_cookie);
+ }
+}
+EXPORT_SYMBOL(dt_txn_hook_commit);
+
+int dt_device_init(struct dt_device *dev, struct lu_device_type *t)
+{
+
+ INIT_LIST_HEAD(&dev->dd_txn_callbacks);
+ return lu_device_init(&dev->dd_lu_dev, t);
+}
+EXPORT_SYMBOL(dt_device_init);
+
+void dt_device_fini(struct dt_device *dev)
+{
+ lu_device_fini(&dev->dd_lu_dev);
+}
+EXPORT_SYMBOL(dt_device_fini);
+
+int dt_object_init(struct dt_object *obj,
+ struct lu_object_header *h, struct lu_device *d)
+
+{
+ return lu_object_init(&obj->do_lu, h, d);
+}
+EXPORT_SYMBOL(dt_object_init);
+
+void dt_object_fini(struct dt_object *obj)
+{
+ lu_object_fini(&obj->do_lu);
+}
+EXPORT_SYMBOL(dt_object_fini);
+
+int dt_try_as_dir(const struct lu_env *env, struct dt_object *obj)
+{
+ if (obj->do_index_ops == NULL)
+ obj->do_ops->do_index_try(env, obj, &dt_directory_features);
+ return obj->do_index_ops != NULL;
+}
+EXPORT_SYMBOL(dt_try_as_dir);
+
+enum dt_format_type dt_mode_to_dft(__u32 mode)
+{
+ enum dt_format_type result;
+
+ switch (mode & S_IFMT) {
+ case S_IFDIR:
+ result = DFT_DIR;
+ break;
+ case S_IFREG:
+ result = DFT_REGULAR;
+ break;
+ case S_IFLNK:
+ result = DFT_SYM;
+ break;
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFIFO:
+ case S_IFSOCK:
+ result = DFT_NODE;
+ break;
+ default:
+ LBUG();
+ break;
+ }
+ return result;
+}
+EXPORT_SYMBOL(dt_mode_to_dft);
+
+/**
+ * lookup fid for object named \a name in directory \a dir.
+ */
+
+int dt_lookup_dir(const struct lu_env *env, struct dt_object *dir,
+ const char *name, struct lu_fid *fid)
+{
+ if (dt_try_as_dir(env, dir))
+ return dt_lookup(env, dir, (struct dt_rec *)fid,
+ (const struct dt_key *)name, BYPASS_CAPA);
+ return -ENOTDIR;
+}
+EXPORT_SYMBOL(dt_lookup_dir);
+
+/* this differs from dt_locate by top_dev as parameter
+ * but not one from lu_site */
+struct dt_object *dt_locate_at(const struct lu_env *env,
+ struct dt_device *dev, const struct lu_fid *fid,
+ struct lu_device *top_dev)
+{
+ struct lu_object *lo, *n;
+ ENTRY;
+
+ lo = lu_object_find_at(env, top_dev, fid, NULL);
+ if (IS_ERR(lo))
+ return (void *)lo;
+
+ LASSERT(lo != NULL);
+
+ list_for_each_entry(n, &lo->lo_header->loh_layers, lo_linkage) {
+ if (n->lo_dev == &dev->dd_lu_dev)
+ return container_of0(n, struct dt_object, do_lu);
+ }
+ return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL(dt_locate_at);
+
+/**
+ * find a object named \a entry in given \a dfh->dfh_o directory.
+ */
+static int dt_find_entry(const struct lu_env *env, const char *entry, void *data)
+{
+ struct dt_find_hint *dfh = data;
+ struct dt_device *dt = dfh->dfh_dt;
+ struct lu_fid *fid = dfh->dfh_fid;
+ struct dt_object *obj = dfh->dfh_o;
+ int result;
+
+ result = dt_lookup_dir(env, obj, entry, fid);
+ lu_object_put(env, &obj->do_lu);
+ if (result == 0) {
+ obj = dt_locate(env, dt, fid);
+ if (IS_ERR(obj))
+ result = PTR_ERR(obj);
+ }
+ dfh->dfh_o = obj;
+ return result;
+}
+
+/**
+ * Abstract function which parses path name. This function feeds
+ * path component to \a entry_func.
+ */
+int dt_path_parser(const struct lu_env *env,
+ char *path, dt_entry_func_t entry_func,
+ void *data)
+{
+ char *e;
+ int rc = 0;
+
+ while (1) {
+ e = strsep(&path, "/");
+ if (e == NULL)
+ break;
+
+ if (e[0] == 0) {
+ if (!path || path[0] == '\0')
+ break;
+ continue;
+ }
+ rc = entry_func(env, e, data);
+ if (rc)
+ break;
+ }
+
+ return rc;
+}
+
+struct dt_object *
+dt_store_resolve(const struct lu_env *env, struct dt_device *dt,
+ const char *path, struct lu_fid *fid)
+{
+ struct dt_thread_info *info = dt_info(env);
+ struct dt_find_hint *dfh = &info->dti_dfh;
+ struct dt_object *obj;
+ char *local = info->dti_buf;
+ int result;
+
+
+ dfh->dfh_dt = dt;
+ dfh->dfh_fid = fid;
+
+ strncpy(local, path, DT_MAX_PATH);
+ local[DT_MAX_PATH - 1] = '\0';
+
+ result = dt->dd_ops->dt_root_get(env, dt, fid);
+ if (result == 0) {
+ obj = dt_locate(env, dt, fid);
+ if (!IS_ERR(obj)) {
+ dfh->dfh_o = obj;
+ result = dt_path_parser(env, local, dt_find_entry, dfh);
+ if (result != 0)
+ obj = ERR_PTR(result);
+ else
+ obj = dfh->dfh_o;
+ }
+ } else {
+ obj = ERR_PTR(result);
+ }
+ return obj;
+}
+EXPORT_SYMBOL(dt_store_resolve);
+
+static struct dt_object *dt_reg_open(const struct lu_env *env,
+ struct dt_device *dt,
+ struct dt_object *p,
+ const char *name,
+ struct lu_fid *fid)
+{
+ struct dt_object *o;
+ int result;
+
+ result = dt_lookup_dir(env, p, name, fid);
+ if (result == 0){
+ o = dt_locate(env, dt, fid);
+ }
+ else
+ o = ERR_PTR(result);
+
+ return o;
+}
+
+/**
+ * Open dt object named \a filename from \a dirname directory.
+ * \param dt dt device
+ * \param fid on success, object fid is stored in *fid
+ */
+struct dt_object *dt_store_open(const struct lu_env *env,
+ struct dt_device *dt,
+ const char *dirname,
+ const char *filename,
+ struct lu_fid *fid)
+{
+ struct dt_object *file;
+ struct dt_object *dir;
+
+ dir = dt_store_resolve(env, dt, dirname, fid);
+ if (!IS_ERR(dir)) {
+ file = dt_reg_open(env, dt, dir,
+ filename, fid);
+ lu_object_put(env, &dir->do_lu);
+ } else {
+ file = dir;
+ }
+ return file;
+}
+EXPORT_SYMBOL(dt_store_open);
+
+struct dt_object *dt_find_or_create(const struct lu_env *env,
+ struct dt_device *dt,
+ const struct lu_fid *fid,
+ struct dt_object_format *dof,
+ struct lu_attr *at)
+{
+ struct dt_object *dto;
+ struct thandle *th;
+ int rc;
+
+ ENTRY;
+
+ dto = dt_locate(env, dt, fid);
+ if (IS_ERR(dto))
+ RETURN(dto);
+
+ LASSERT(dto != NULL);
+ if (dt_object_exists(dto))
+ RETURN(dto);
+
+ th = dt_trans_create(env, dt);
+ if (IS_ERR(th))
+ GOTO(out, rc = PTR_ERR(th));
+
+ rc = dt_declare_create(env, dto, at, NULL, dof, th);
+ if (rc)
+ GOTO(trans_stop, rc);
+
+ rc = dt_trans_start_local(env, dt, th);
+ if (rc)
+ GOTO(trans_stop, rc);
+
+ dt_write_lock(env, dto, 0);
+ if (dt_object_exists(dto))
+ GOTO(unlock, rc = 0);
+
+ CDEBUG(D_OTHER, "create new object "DFID"\n", PFID(fid));
+
+ rc = dt_create(env, dto, at, NULL, dof, th);
+ if (rc)
+ GOTO(unlock, rc);
+ LASSERT(dt_object_exists(dto));
+unlock:
+ dt_write_unlock(env, dto);
+trans_stop:
+ dt_trans_stop(env, dt, th);
+out:
+ if (rc) {
+ lu_object_put(env, &dto->do_lu);
+ RETURN(ERR_PTR(rc));
+ }
+ RETURN(dto);
+}
+EXPORT_SYMBOL(dt_find_or_create);
+
+/* dt class init function. */
+int dt_global_init(void)
+{
+ int result;
+
+ LU_CONTEXT_KEY_INIT(&dt_key);
+ result = lu_context_key_register(&dt_key);
+ return result;
+}
+
+void dt_global_fini(void)
+{
+ lu_context_key_degister(&dt_key);
+}
+
+/**
+ * Generic read helper. May return an error for partial reads.
+ *
+ * \param env lustre environment
+ * \param dt object to be read
+ * \param buf lu_buf to be filled, with buffer pointer and length
+ * \param pos position to start reading, updated as data is read
+ *
+ * \retval real size of data read
+ * \retval -ve errno on failure
+ */
+int dt_read(const struct lu_env *env, struct dt_object *dt,
+ struct lu_buf *buf, loff_t *pos)
+{
+ LASSERTF(dt != NULL, "dt is NULL when we want to read record\n");
+ return dt->do_body_ops->dbo_read(env, dt, buf, pos, BYPASS_CAPA);
+}
+EXPORT_SYMBOL(dt_read);
+
+/**
+ * Read structures of fixed size from storage. Unlike dt_read(), using
+ * dt_record_read() will return an error for partial reads.
+ *
+ * \param env lustre environment
+ * \param dt object to be read
+ * \param buf lu_buf to be filled, with buffer pointer and length
+ * \param pos position to start reading, updated as data is read
+ *
+ * \retval 0 on successfully reading full buffer
+ * \retval -EFAULT on short read
+ * \retval -ve errno on failure
+ */
+int dt_record_read(const struct lu_env *env, struct dt_object *dt,
+ struct lu_buf *buf, loff_t *pos)
+{
+ int rc;
+
+ LASSERTF(dt != NULL, "dt is NULL when we want to read record\n");
+
+ rc = dt->do_body_ops->dbo_read(env, dt, buf, pos, BYPASS_CAPA);
+
+ if (rc == buf->lb_len)
+ rc = 0;
+ else if (rc >= 0)
+ rc = -EFAULT;
+ return rc;
+}
+EXPORT_SYMBOL(dt_record_read);
+
+int dt_record_write(const struct lu_env *env, struct dt_object *dt,
+ const struct lu_buf *buf, loff_t *pos, struct thandle *th)
+{
+ int rc;
+
+ LASSERTF(dt != NULL, "dt is NULL when we want to write record\n");
+ LASSERT(th != NULL);
+ LASSERT(dt->do_body_ops);
+ LASSERT(dt->do_body_ops->dbo_write);
+ rc = dt->do_body_ops->dbo_write(env, dt, buf, pos, th, BYPASS_CAPA, 1);
+ if (rc == buf->lb_len)
+ rc = 0;
+ else if (rc >= 0)
+ rc = -EFAULT;
+ return rc;
+}
+EXPORT_SYMBOL(dt_record_write);
+
+int dt_declare_version_set(const struct lu_env *env, struct dt_object *o,
+ struct thandle *th)
+{
+ struct lu_buf vbuf;
+ char *xname = XATTR_NAME_VERSION;
+
+ LASSERT(o);
+ vbuf.lb_buf = NULL;
+ vbuf.lb_len = sizeof(dt_obj_version_t);
+ return dt_declare_xattr_set(env, o, &vbuf, xname, 0, th);
+
+}
+EXPORT_SYMBOL(dt_declare_version_set);
+
+void dt_version_set(const struct lu_env *env, struct dt_object *o,
+ dt_obj_version_t version, struct thandle *th)
+{
+ struct lu_buf vbuf;
+ char *xname = XATTR_NAME_VERSION;
+ int rc;
+
+ LASSERT(o);
+ vbuf.lb_buf = &version;
+ vbuf.lb_len = sizeof(version);
+
+ rc = dt_xattr_set(env, o, &vbuf, xname, 0, th, BYPASS_CAPA);
+ if (rc < 0)
+ CDEBUG(D_INODE, "Can't set version, rc %d\n", rc);
+ return;
+}
+EXPORT_SYMBOL(dt_version_set);
+
+dt_obj_version_t dt_version_get(const struct lu_env *env, struct dt_object *o)
+{
+ struct lu_buf vbuf;
+ char *xname = XATTR_NAME_VERSION;
+ dt_obj_version_t version;
+ int rc;
+
+ LASSERT(o);
+ vbuf.lb_buf = &version;
+ vbuf.lb_len = sizeof(version);
+ rc = dt_xattr_get(env, o, &vbuf, xname, BYPASS_CAPA);
+ if (rc != sizeof(version)) {
+ CDEBUG(D_INODE, "Can't get version, rc %d\n", rc);
+ version = 0;
+ }
+ return version;
+}
+EXPORT_SYMBOL(dt_version_get);
+
+/* list of all supported index types */
+
+/* directories */
+const struct dt_index_features dt_directory_features;
+EXPORT_SYMBOL(dt_directory_features);
+
+/* scrub iterator */
+const struct dt_index_features dt_otable_features;
+EXPORT_SYMBOL(dt_otable_features);
+
+/* lfsck */
+const struct dt_index_features dt_lfsck_features = {
+ .dif_flags = DT_IND_UPDATE,
+ .dif_keysize_min = sizeof(struct lu_fid),
+ .dif_keysize_max = sizeof(struct lu_fid),
+ .dif_recsize_min = sizeof(__u8),
+ .dif_recsize_max = sizeof(__u8),
+ .dif_ptrsize = 4
+};
+EXPORT_SYMBOL(dt_lfsck_features);
+
+/* accounting indexes */
+const struct dt_index_features dt_acct_features = {
+ .dif_flags = DT_IND_UPDATE,
+ .dif_keysize_min = sizeof(__u64), /* 64-bit uid/gid */
+ .dif_keysize_max = sizeof(__u64), /* 64-bit uid/gid */
+ .dif_recsize_min = sizeof(struct lquota_acct_rec), /* 16 bytes */
+ .dif_recsize_max = sizeof(struct lquota_acct_rec), /* 16 bytes */
+ .dif_ptrsize = 4
+};
+EXPORT_SYMBOL(dt_acct_features);
+
+/* global quota files */
+const struct dt_index_features dt_quota_glb_features = {
+ .dif_flags = DT_IND_UPDATE,
+ /* a different key would have to be used for per-directory quota */
+ .dif_keysize_min = sizeof(__u64), /* 64-bit uid/gid */
+ .dif_keysize_max = sizeof(__u64), /* 64-bit uid/gid */
+ .dif_recsize_min = sizeof(struct lquota_glb_rec), /* 32 bytes */
+ .dif_recsize_max = sizeof(struct lquota_glb_rec), /* 32 bytes */
+ .dif_ptrsize = 4
+};
+EXPORT_SYMBOL(dt_quota_glb_features);
+
+/* slave quota files */
+const struct dt_index_features dt_quota_slv_features = {
+ .dif_flags = DT_IND_UPDATE,
+ /* a different key would have to be used for per-directory quota */
+ .dif_keysize_min = sizeof(__u64), /* 64-bit uid/gid */
+ .dif_keysize_max = sizeof(__u64), /* 64-bit uid/gid */
+ .dif_recsize_min = sizeof(struct lquota_slv_rec), /* 8 bytes */
+ .dif_recsize_max = sizeof(struct lquota_slv_rec), /* 8 bytes */
+ .dif_ptrsize = 4
+};
+EXPORT_SYMBOL(dt_quota_slv_features);
+
+/* helper function returning what dt_index_features structure should be used
+ * based on the FID sequence. This is used by OBD_IDX_READ RPC */
+static inline const struct dt_index_features *dt_index_feat_select(__u64 seq,
+ __u32 mode)
+{
+ if (seq == FID_SEQ_QUOTA_GLB) {
+ /* global quota index */
+ if (!S_ISREG(mode))
+ /* global quota index should be a regular file */
+ return ERR_PTR(-ENOENT);
+ return &dt_quota_glb_features;
+ } else if (seq == FID_SEQ_QUOTA) {
+ /* quota slave index */
+ if (!S_ISREG(mode))
+ /* slave index should be a regular file */
+ return ERR_PTR(-ENOENT);
+ return &dt_quota_slv_features;
+ } else if (seq >= FID_SEQ_NORMAL) {
+ /* object is part of the namespace, verify that it is a
+ * directory */
+ if (!S_ISDIR(mode))
+ /* sorry, we can only deal with directory */
+ return ERR_PTR(-ENOTDIR);
+ return &dt_directory_features;
+ }
+
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+/*
+ * Fill a lu_idxpage with key/record pairs read for transfer via OBD_IDX_READ
+ * RPC
+ *
+ * \param env - is the environment passed by the caller
+ * \param lp - is a pointer to the lu_page to fill
+ * \param nob - is the maximum number of bytes that should be copied
+ * \param iops - is the index operation vector associated with the index object
+ * \param it - is a pointer to the current iterator
+ * \param attr - is the index attribute to pass to iops->rec()
+ * \param arg - is a pointer to the idx_info structure
+ */
+static int dt_index_page_build(const struct lu_env *env, union lu_page *lp,
+ int nob, const struct dt_it_ops *iops,
+ struct dt_it *it, __u32 attr, void *arg)
+{
+ struct idx_info *ii = (struct idx_info *)arg;
+ struct lu_idxpage *lip = &lp->lp_idx;
+ char *entry;
+ int rc, size;
+ ENTRY;
+
+ /* no support for variable key & record size for now */
+ LASSERT((ii->ii_flags & II_FL_VARKEY) == 0);
+ LASSERT((ii->ii_flags & II_FL_VARREC) == 0);
+
+ /* initialize the header of the new container */
+ memset(lip, 0, LIP_HDR_SIZE);
+ lip->lip_magic = LIP_MAGIC;
+ nob -= LIP_HDR_SIZE;
+
+ /* compute size needed to store a key/record pair */
+ size = ii->ii_recsize + ii->ii_keysize;
+ if ((ii->ii_flags & II_FL_NOHASH) == 0)
+ /* add hash if the client wants it */
+ size += sizeof(__u64);
+
+ entry = lip->lip_entries;
+ do {
+ char *tmp_entry = entry;
+ struct dt_key *key;
+ __u64 hash;
+
+ /* fetch 64-bit hash value */
+ hash = iops->store(env, it);
+ ii->ii_hash_end = hash;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_OBD_IDX_READ_BREAK)) {
+ if (lip->lip_nr != 0)
+ GOTO(out, rc = 0);
+ }
+
+ if (nob < size) {
+ if (lip->lip_nr == 0)
+ GOTO(out, rc = -EINVAL);
+ GOTO(out, rc = 0);
+ }
+
+ if ((ii->ii_flags & II_FL_NOHASH) == 0) {
+ /* client wants to the 64-bit hash value associated with
+ * each record */
+ memcpy(tmp_entry, &hash, sizeof(hash));
+ tmp_entry += sizeof(hash);
+ }
+
+ /* then the key value */
+ LASSERT(iops->key_size(env, it) == ii->ii_keysize);
+ key = iops->key(env, it);
+ memcpy(tmp_entry, key, ii->ii_keysize);
+ tmp_entry += ii->ii_keysize;
+
+ /* and finally the record */
+ rc = iops->rec(env, it, (struct dt_rec *)tmp_entry, attr);
+ if (rc != -ESTALE) {
+ if (rc != 0)
+ GOTO(out, rc);
+
+ /* hash/key/record successfully copied! */
+ lip->lip_nr++;
+ if (unlikely(lip->lip_nr == 1 && ii->ii_count == 0))
+ ii->ii_hash_start = hash;
+ entry = tmp_entry + ii->ii_recsize;
+ nob -= size;
+ }
+
+ /* move on to the next record */
+ do {
+ rc = iops->next(env, it);
+ } while (rc == -ESTALE);
+
+ } while (rc == 0);
+
+ GOTO(out, rc);
+out:
+ if (rc >= 0 && lip->lip_nr > 0)
+ /* one more container */
+ ii->ii_count++;
+ if (rc > 0)
+ /* no more entries */
+ ii->ii_hash_end = II_END_OFF;
+ return rc;
+}
+
+/*
+ * Walk index and fill lu_page containers with key/record pairs
+ *
+ * \param env - is the environment passed by the caller
+ * \param obj - is the index object to parse
+ * \param rdpg - is the lu_rdpg descriptor associated with the transfer
+ * \param filler - is the callback function responsible for filling a lu_page
+ * with key/record pairs in the format wanted by the caller
+ * \param arg - is an opaq argument passed to the filler function
+ *
+ * \retval sum (in bytes) of all filled lu_pages
+ * \retval -ve errno on failure
+ */
+int dt_index_walk(const struct lu_env *env, struct dt_object *obj,
+ const struct lu_rdpg *rdpg, dt_index_page_build_t filler,
+ void *arg)
+{
+ struct dt_it *it;
+ const struct dt_it_ops *iops;
+ unsigned int pageidx, nob, nlupgs = 0;
+ int rc;
+ ENTRY;
+
+ LASSERT(rdpg->rp_pages != NULL);
+ LASSERT(obj->do_index_ops != NULL);
+
+ nob = rdpg->rp_count;
+ if (nob <= 0)
+ RETURN(-EFAULT);
+
+ /* Iterate through index and fill containers from @rdpg */
+ iops = &obj->do_index_ops->dio_it;
+ LASSERT(iops != NULL);
+ it = iops->init(env, obj, rdpg->rp_attrs, BYPASS_CAPA);
+ if (IS_ERR(it))
+ RETURN(PTR_ERR(it));
+
+ rc = iops->load(env, it, rdpg->rp_hash);
+ if (rc == 0) {
+ /*
+ * Iterator didn't find record with exactly the key requested.
+ *
+ * It is currently either
+ *
+ * - positioned above record with key less than
+ * requested---skip it.
+ * - or not positioned at all (is in IAM_IT_SKEWED
+ * state)---position it on the next item.
+ */
+ rc = iops->next(env, it);
+ } else if (rc > 0) {
+ rc = 0;
+ }
+
+ /* Fill containers one after the other. There might be multiple
+ * containers per physical page.
+ *
+ * At this point and across for-loop:
+ * rc == 0 -> ok, proceed.
+ * rc > 0 -> end of index.
+ * rc < 0 -> error. */
+ for (pageidx = 0; rc == 0 && nob > 0; pageidx++) {
+ union lu_page *lp;
+ int i;
+
+ LASSERT(pageidx < rdpg->rp_npages);
+ lp = kmap(rdpg->rp_pages[pageidx]);
+
+ /* fill lu pages */
+ for (i = 0; i < LU_PAGE_COUNT; i++, lp++, nob -= LU_PAGE_SIZE) {
+ rc = filler(env, lp, min_t(int, nob, LU_PAGE_SIZE),
+ iops, it, rdpg->rp_attrs, arg);
+ if (rc < 0)
+ break;
+ /* one more lu_page */
+ nlupgs++;
+ if (rc > 0)
+ /* end of index */
+ break;
+ }
+ kunmap(rdpg->rp_pages[i]);
+ }
+
+ iops->put(env, it);
+ iops->fini(env, it);
+
+ if (rc >= 0)
+ rc = min_t(unsigned int, nlupgs * LU_PAGE_SIZE, rdpg->rp_count);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(dt_index_walk);
+
+/**
+ * Walk key/record pairs of an index and copy them into 4KB containers to be
+ * transferred over the network. This is the common handler for OBD_IDX_READ
+ * RPC processing.
+ *
+ * \param env - is the environment passed by the caller
+ * \param dev - is the dt_device storing the index
+ * \param ii - is the idx_info structure packed by the client in the
+ * OBD_IDX_READ request
+ * \param rdpg - is the lu_rdpg descriptor
+ *
+ * \retval on success, return sum (in bytes) of all filled containers
+ * \retval appropriate error otherwise.
+ */
+int dt_index_read(const struct lu_env *env, struct dt_device *dev,
+ struct idx_info *ii, const struct lu_rdpg *rdpg)
+{
+ const struct dt_index_features *feat;
+ struct dt_object *obj;
+ int rc;
+ ENTRY;
+
+ /* rp_count shouldn't be null and should be a multiple of the container
+ * size */
+ if (rdpg->rp_count <= 0 && (rdpg->rp_count & (LU_PAGE_SIZE - 1)) != 0)
+ RETURN(-EFAULT);
+
+ if (fid_seq(&ii->ii_fid) >= FID_SEQ_NORMAL)
+ /* we don't support directory transfer via OBD_IDX_READ for the
+ * time being */
+ RETURN(-EOPNOTSUPP);
+
+ if (!fid_is_quota(&ii->ii_fid))
+ /* block access to all local files except quota files */
+ RETURN(-EPERM);
+
+ /* lookup index object subject to the transfer */
+ obj = dt_locate(env, dev, &ii->ii_fid);
+ if (IS_ERR(obj))
+ RETURN(PTR_ERR(obj));
+ if (dt_object_exists(obj) == 0)
+ GOTO(out, rc = -ENOENT);
+
+ /* fetch index features associated with index object */
+ feat = dt_index_feat_select(fid_seq(&ii->ii_fid),
+ lu_object_attr(&obj->do_lu));
+ if (IS_ERR(feat))
+ GOTO(out, rc = PTR_ERR(feat));
+
+ /* load index feature if not done already */
+ if (obj->do_index_ops == NULL) {
+ rc = obj->do_ops->do_index_try(env, obj, feat);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ /* fill ii_flags with supported index features */
+ ii->ii_flags &= II_FL_NOHASH;
+
+ ii->ii_keysize = feat->dif_keysize_max;
+ if ((feat->dif_flags & DT_IND_VARKEY) != 0) {
+ /* key size is variable */
+ ii->ii_flags |= II_FL_VARKEY;
+ /* we don't support variable key size for the time being */
+ GOTO(out, rc = -EOPNOTSUPP);
+ }
+
+ ii->ii_recsize = feat->dif_recsize_max;
+ if ((feat->dif_flags & DT_IND_VARREC) != 0) {
+ /* record size is variable */
+ ii->ii_flags |= II_FL_VARREC;
+ /* we don't support variable record size for the time being */
+ GOTO(out, rc = -EOPNOTSUPP);
+ }
+
+ if ((feat->dif_flags & DT_IND_NONUNQ) != 0)
+ /* key isn't necessarily unique */
+ ii->ii_flags |= II_FL_NONUNQ;
+
+ dt_read_lock(env, obj, 0);
+ /* fetch object version before walking the index */
+ ii->ii_version = dt_version_get(env, obj);
+
+ /* walk the index and fill lu_idxpages with key/record pairs */
+ rc = dt_index_walk(env, obj, rdpg, dt_index_page_build ,ii);
+ dt_read_unlock(env, obj);
+
+ if (rc == 0) {
+ /* index is empty */
+ LASSERT(ii->ii_count == 0);
+ ii->ii_hash_end = II_END_OFF;
+ }
+
+ GOTO(out, rc);
+out:
+ lu_object_put(env, &obj->do_lu);
+ return rc;
+}
+EXPORT_SYMBOL(dt_index_read);
+
+#ifdef LPROCFS
+
+int lprocfs_dt_rd_blksize(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct dt_device *dt = data;
+ struct obd_statfs osfs;
+
+ int rc = dt_statfs(NULL, dt, &osfs);
+ if (rc == 0) {
+ *eof = 1;
+ rc = snprintf(page, count, "%u\n",
+ (unsigned) osfs.os_bsize);
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_dt_rd_blksize);
+
+int lprocfs_dt_rd_kbytestotal(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct dt_device *dt = data;
+ struct obd_statfs osfs;
+
+ int rc = dt_statfs(NULL, dt, &osfs);
+ if (rc == 0) {
+ __u32 blk_size = osfs.os_bsize >> 10;
+ __u64 result = osfs.os_blocks;
+
+ while (blk_size >>= 1)
+ result <<= 1;
+
+ *eof = 1;
+ rc = snprintf(page, count, LPU64"\n", result);
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_dt_rd_kbytestotal);
+
+int lprocfs_dt_rd_kbytesfree(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct dt_device *dt = data;
+ struct obd_statfs osfs;
+
+ int rc = dt_statfs(NULL, dt, &osfs);
+ if (rc == 0) {
+ __u32 blk_size = osfs.os_bsize >> 10;
+ __u64 result = osfs.os_bfree;
+
+ while (blk_size >>= 1)
+ result <<= 1;
+
+ *eof = 1;
+ rc = snprintf(page, count, LPU64"\n", result);
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_dt_rd_kbytesfree);
+
+int lprocfs_dt_rd_kbytesavail(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct dt_device *dt = data;
+ struct obd_statfs osfs;
+
+ int rc = dt_statfs(NULL, dt, &osfs);
+ if (rc == 0) {
+ __u32 blk_size = osfs.os_bsize >> 10;
+ __u64 result = osfs.os_bavail;
+
+ while (blk_size >>= 1)
+ result <<= 1;
+
+ *eof = 1;
+ rc = snprintf(page, count, LPU64"\n", result);
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_dt_rd_kbytesavail);
+
+int lprocfs_dt_rd_filestotal(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct dt_device *dt = data;
+ struct obd_statfs osfs;
+
+ int rc = dt_statfs(NULL, dt, &osfs);
+ if (rc == 0) {
+ *eof = 1;
+ rc = snprintf(page, count, LPU64"\n", osfs.os_files);
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_dt_rd_filestotal);
+
+int lprocfs_dt_rd_filesfree(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct dt_device *dt = data;
+ struct obd_statfs osfs;
+
+ int rc = dt_statfs(NULL, dt, &osfs);
+ if (rc == 0) {
+ *eof = 1;
+ rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_dt_rd_filesfree);
+
+#endif /* LPROCFS */
diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c
new file mode 100644
index 000000000000..d96876e0bc68
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/genops.c
@@ -0,0 +1,1853 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/genops.c
+ *
+ * These are the only exported functions, they provide some generic
+ * infrastructure for managing object devices
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+#include <obd_ost.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+
+extern struct list_head obd_types;
+spinlock_t obd_types_lock;
+
+struct kmem_cache *obd_device_cachep;
+struct kmem_cache *obdo_cachep;
+EXPORT_SYMBOL(obdo_cachep);
+struct kmem_cache *import_cachep;
+
+struct list_head obd_zombie_imports;
+struct list_head obd_zombie_exports;
+spinlock_t obd_zombie_impexp_lock;
+static void obd_zombie_impexp_notify(void);
+static void obd_zombie_export_add(struct obd_export *exp);
+static void obd_zombie_import_add(struct obd_import *imp);
+static void print_export_data(struct obd_export *exp,
+ const char *status, int locks);
+
+int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
+EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
+
+/*
+ * support functions: we could use inter-module communication, but this
+ * is more portable to other OS's
+ */
+static struct obd_device *obd_device_alloc(void)
+{
+ struct obd_device *obd;
+
+ OBD_SLAB_ALLOC_PTR_GFP(obd, obd_device_cachep, __GFP_IO);
+ if (obd != NULL) {
+ obd->obd_magic = OBD_DEVICE_MAGIC;
+ }
+ return obd;
+}
+
+static void obd_device_free(struct obd_device *obd)
+{
+ LASSERT(obd != NULL);
+ LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "obd %p obd_magic %08x != %08x\n",
+ obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+ if (obd->obd_namespace != NULL) {
+ CERROR("obd %p: namespace %p was not properly cleaned up (obd_force=%d)!\n",
+ obd, obd->obd_namespace, obd->obd_force);
+ LBUG();
+ }
+ lu_ref_fini(&obd->obd_reference);
+ OBD_SLAB_FREE_PTR(obd, obd_device_cachep);
+}
+
+struct obd_type *class_search_type(const char *name)
+{
+ struct list_head *tmp;
+ struct obd_type *type;
+
+ spin_lock(&obd_types_lock);
+ list_for_each(tmp, &obd_types) {
+ type = list_entry(tmp, struct obd_type, typ_chain);
+ if (strcmp(type->typ_name, name) == 0) {
+ spin_unlock(&obd_types_lock);
+ return type;
+ }
+ }
+ spin_unlock(&obd_types_lock);
+ return NULL;
+}
+EXPORT_SYMBOL(class_search_type);
+
+struct obd_type *class_get_type(const char *name)
+{
+ struct obd_type *type = class_search_type(name);
+
+ if (!type) {
+ const char *modname = name;
+
+ if (strcmp(modname, "obdfilter") == 0)
+ modname = "ofd";
+
+ if (strcmp(modname, LUSTRE_LWP_NAME) == 0)
+ modname = LUSTRE_OSP_NAME;
+
+ if (!strncmp(modname, LUSTRE_MDS_NAME, strlen(LUSTRE_MDS_NAME)))
+ modname = LUSTRE_MDT_NAME;
+
+ if (!request_module("%s", modname)) {
+ CDEBUG(D_INFO, "Loaded module '%s'\n", modname);
+ type = class_search_type(name);
+ } else {
+ LCONSOLE_ERROR_MSG(0x158, "Can't load module '%s'\n",
+ modname);
+ }
+ }
+ if (type) {
+ spin_lock(&type->obd_type_lock);
+ type->typ_refcnt++;
+ try_module_get(type->typ_dt_ops->o_owner);
+ spin_unlock(&type->obd_type_lock);
+ }
+ return type;
+}
+EXPORT_SYMBOL(class_get_type);
+
+void class_put_type(struct obd_type *type)
+{
+ LASSERT(type);
+ spin_lock(&type->obd_type_lock);
+ type->typ_refcnt--;
+ module_put(type->typ_dt_ops->o_owner);
+ spin_unlock(&type->obd_type_lock);
+}
+EXPORT_SYMBOL(class_put_type);
+
+#define CLASS_MAX_NAME 1024
+
+int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
+ struct lprocfs_vars *vars, const char *name,
+ struct lu_device_type *ldt)
+{
+ struct obd_type *type;
+ int rc = 0;
+ ENTRY;
+
+ /* sanity check */
+ LASSERT(strnlen(name, CLASS_MAX_NAME) < CLASS_MAX_NAME);
+
+ if (class_search_type(name)) {
+ CDEBUG(D_IOCTL, "Type %s already registered\n", name);
+ RETURN(-EEXIST);
+ }
+
+ rc = -ENOMEM;
+ OBD_ALLOC(type, sizeof(*type));
+ if (type == NULL)
+ RETURN(rc);
+
+ OBD_ALLOC_PTR(type->typ_dt_ops);
+ OBD_ALLOC_PTR(type->typ_md_ops);
+ OBD_ALLOC(type->typ_name, strlen(name) + 1);
+
+ if (type->typ_dt_ops == NULL ||
+ type->typ_md_ops == NULL ||
+ type->typ_name == NULL)
+ GOTO (failed, rc);
+
+ *(type->typ_dt_ops) = *dt_ops;
+ /* md_ops is optional */
+ if (md_ops)
+ *(type->typ_md_ops) = *md_ops;
+ strcpy(type->typ_name, name);
+ spin_lock_init(&type->obd_type_lock);
+
+#ifdef LPROCFS
+ type->typ_procroot = lprocfs_register(type->typ_name, proc_lustre_root,
+ vars, type);
+ if (IS_ERR(type->typ_procroot)) {
+ rc = PTR_ERR(type->typ_procroot);
+ type->typ_procroot = NULL;
+ GOTO (failed, rc);
+ }
+#endif
+ if (ldt != NULL) {
+ type->typ_lu = ldt;
+ rc = lu_device_type_init(ldt);
+ if (rc != 0)
+ GOTO (failed, rc);
+ }
+
+ spin_lock(&obd_types_lock);
+ list_add(&type->typ_chain, &obd_types);
+ spin_unlock(&obd_types_lock);
+
+ RETURN (0);
+
+ failed:
+ if (type->typ_name != NULL)
+ OBD_FREE(type->typ_name, strlen(name) + 1);
+ if (type->typ_md_ops != NULL)
+ OBD_FREE_PTR(type->typ_md_ops);
+ if (type->typ_dt_ops != NULL)
+ OBD_FREE_PTR(type->typ_dt_ops);
+ OBD_FREE(type, sizeof(*type));
+ RETURN(rc);
+}
+EXPORT_SYMBOL(class_register_type);
+
+int class_unregister_type(const char *name)
+{
+ struct obd_type *type = class_search_type(name);
+ ENTRY;
+
+ if (!type) {
+ CERROR("unknown obd type\n");
+ RETURN(-EINVAL);
+ }
+
+ if (type->typ_refcnt) {
+ CERROR("type %s has refcount (%d)\n", name, type->typ_refcnt);
+ /* This is a bad situation, let's make the best of it */
+ /* Remove ops, but leave the name for debugging */
+ OBD_FREE_PTR(type->typ_dt_ops);
+ OBD_FREE_PTR(type->typ_md_ops);
+ RETURN(-EBUSY);
+ }
+
+ if (type->typ_procroot) {
+ lprocfs_remove(&type->typ_procroot);
+ }
+
+ if (type->typ_lu)
+ lu_device_type_fini(type->typ_lu);
+
+ spin_lock(&obd_types_lock);
+ list_del(&type->typ_chain);
+ spin_unlock(&obd_types_lock);
+ OBD_FREE(type->typ_name, strlen(name) + 1);
+ if (type->typ_dt_ops != NULL)
+ OBD_FREE_PTR(type->typ_dt_ops);
+ if (type->typ_md_ops != NULL)
+ OBD_FREE_PTR(type->typ_md_ops);
+ OBD_FREE(type, sizeof(*type));
+ RETURN(0);
+} /* class_unregister_type */
+EXPORT_SYMBOL(class_unregister_type);
+
+/**
+ * Create a new obd device.
+ *
+ * Find an empty slot in ::obd_devs[], create a new obd device in it.
+ *
+ * \param[in] type_name obd device type string.
+ * \param[in] name obd device name.
+ *
+ * \retval NULL if create fails, otherwise return the obd device
+ * pointer created.
+ */
+struct obd_device *class_newdev(const char *type_name, const char *name)
+{
+ struct obd_device *result = NULL;
+ struct obd_device *newdev;
+ struct obd_type *type = NULL;
+ int i;
+ int new_obd_minor = 0;
+ ENTRY;
+
+ if (strlen(name) >= MAX_OBD_NAME) {
+ CERROR("name/uuid must be < %u bytes long\n", MAX_OBD_NAME);
+ RETURN(ERR_PTR(-EINVAL));
+ }
+
+ type = class_get_type(type_name);
+ if (type == NULL){
+ CERROR("OBD: unknown type: %s\n", type_name);
+ RETURN(ERR_PTR(-ENODEV));
+ }
+
+ newdev = obd_device_alloc();
+ if (newdev == NULL)
+ GOTO(out_type, result = ERR_PTR(-ENOMEM));
+
+ LASSERT(newdev->obd_magic == OBD_DEVICE_MAGIC);
+
+ write_lock(&obd_dev_lock);
+ for (i = 0; i < class_devno_max(); i++) {
+ struct obd_device *obd = class_num2obd(i);
+
+ if (obd && (strcmp(name, obd->obd_name) == 0)) {
+ CERROR("Device %s already exists at %d, won't add\n",
+ name, i);
+ if (result) {
+ LASSERTF(result->obd_magic == OBD_DEVICE_MAGIC,
+ "%p obd_magic %08x != %08x\n", result,
+ result->obd_magic, OBD_DEVICE_MAGIC);
+ LASSERTF(result->obd_minor == new_obd_minor,
+ "%p obd_minor %d != %d\n", result,
+ result->obd_minor, new_obd_minor);
+
+ obd_devs[result->obd_minor] = NULL;
+ result->obd_name[0]='\0';
+ }
+ result = ERR_PTR(-EEXIST);
+ break;
+ }
+ if (!result && !obd) {
+ result = newdev;
+ result->obd_minor = i;
+ new_obd_minor = i;
+ result->obd_type = type;
+ strncpy(result->obd_name, name,
+ sizeof(result->obd_name) - 1);
+ obd_devs[i] = result;
+ }
+ }
+ write_unlock(&obd_dev_lock);
+
+ if (result == NULL && i >= class_devno_max()) {
+ CERROR("all %u OBD devices used, increase MAX_OBD_DEVICES\n",
+ class_devno_max());
+ GOTO(out, result = ERR_PTR(-EOVERFLOW));
+ }
+
+ if (IS_ERR(result))
+ GOTO(out, result);
+
+ CDEBUG(D_IOCTL, "Adding new device %s (%p)\n",
+ result->obd_name, result);
+
+ RETURN(result);
+out:
+ obd_device_free(newdev);
+out_type:
+ class_put_type(type);
+ return result;
+}
+
+void class_release_dev(struct obd_device *obd)
+{
+ struct obd_type *obd_type = obd->obd_type;
+
+ LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "%p obd_magic %08x != %08x\n",
+ obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+ LASSERTF(obd == obd_devs[obd->obd_minor], "obd %p != obd_devs[%d] %p\n",
+ obd, obd->obd_minor, obd_devs[obd->obd_minor]);
+ LASSERT(obd_type != NULL);
+
+ CDEBUG(D_INFO, "Release obd device %s at %d obd_type name =%s\n",
+ obd->obd_name, obd->obd_minor, obd->obd_type->typ_name);
+
+ write_lock(&obd_dev_lock);
+ obd_devs[obd->obd_minor] = NULL;
+ write_unlock(&obd_dev_lock);
+ obd_device_free(obd);
+
+ class_put_type(obd_type);
+}
+
+int class_name2dev(const char *name)
+{
+ int i;
+
+ if (!name)
+ return -1;
+
+ read_lock(&obd_dev_lock);
+ for (i = 0; i < class_devno_max(); i++) {
+ struct obd_device *obd = class_num2obd(i);
+
+ if (obd && strcmp(name, obd->obd_name) == 0) {
+ /* Make sure we finished attaching before we give
+ out any references */
+ LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+ if (obd->obd_attached) {
+ read_unlock(&obd_dev_lock);
+ return i;
+ }
+ break;
+ }
+ }
+ read_unlock(&obd_dev_lock);
+
+ return -1;
+}
+EXPORT_SYMBOL(class_name2dev);
+
+struct obd_device *class_name2obd(const char *name)
+{
+ int dev = class_name2dev(name);
+
+ if (dev < 0 || dev > class_devno_max())
+ return NULL;
+ return class_num2obd(dev);
+}
+EXPORT_SYMBOL(class_name2obd);
+
+int class_uuid2dev(struct obd_uuid *uuid)
+{
+ int i;
+
+ read_lock(&obd_dev_lock);
+ for (i = 0; i < class_devno_max(); i++) {
+ struct obd_device *obd = class_num2obd(i);
+
+ if (obd && obd_uuid_equals(uuid, &obd->obd_uuid)) {
+ LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+ read_unlock(&obd_dev_lock);
+ return i;
+ }
+ }
+ read_unlock(&obd_dev_lock);
+
+ return -1;
+}
+EXPORT_SYMBOL(class_uuid2dev);
+
+struct obd_device *class_uuid2obd(struct obd_uuid *uuid)
+{
+ int dev = class_uuid2dev(uuid);
+ if (dev < 0)
+ return NULL;
+ return class_num2obd(dev);
+}
+EXPORT_SYMBOL(class_uuid2obd);
+
+/**
+ * Get obd device from ::obd_devs[]
+ *
+ * \param num [in] array index
+ *
+ * \retval NULL if ::obd_devs[\a num] does not contains an obd device
+ * otherwise return the obd device there.
+ */
+struct obd_device *class_num2obd(int num)
+{
+ struct obd_device *obd = NULL;
+
+ if (num < class_devno_max()) {
+ obd = obd_devs[num];
+ if (obd == NULL)
+ return NULL;
+
+ LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
+ "%p obd_magic %08x != %08x\n",
+ obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+ LASSERTF(obd->obd_minor == num,
+ "%p obd_minor %0d != %0d\n",
+ obd, obd->obd_minor, num);
+ }
+
+ return obd;
+}
+EXPORT_SYMBOL(class_num2obd);
+
+/**
+ * Get obd devices count. Device in any
+ * state are counted
+ * \retval obd device count
+ */
+int get_devices_count(void)
+{
+ int index, max_index = class_devno_max(), dev_count = 0;
+
+ read_lock(&obd_dev_lock);
+ for (index = 0; index <= max_index; index++) {
+ struct obd_device *obd = class_num2obd(index);
+ if (obd != NULL)
+ dev_count++;
+ }
+ read_unlock(&obd_dev_lock);
+
+ return dev_count;
+}
+EXPORT_SYMBOL(get_devices_count);
+
+void class_obd_list(void)
+{
+ char *status;
+ int i;
+
+ read_lock(&obd_dev_lock);
+ for (i = 0; i < class_devno_max(); i++) {
+ struct obd_device *obd = class_num2obd(i);
+
+ if (obd == NULL)
+ continue;
+ if (obd->obd_stopping)
+ status = "ST";
+ else if (obd->obd_set_up)
+ status = "UP";
+ else if (obd->obd_attached)
+ status = "AT";
+ else
+ status = "--";
+ LCONSOLE(D_CONFIG, "%3d %s %s %s %s %d\n",
+ i, status, obd->obd_type->typ_name,
+ obd->obd_name, obd->obd_uuid.uuid,
+ atomic_read(&obd->obd_refcount));
+ }
+ read_unlock(&obd_dev_lock);
+ return;
+}
+
+/* Search for a client OBD connected to tgt_uuid. If grp_uuid is
+ specified, then only the client with that uuid is returned,
+ otherwise any client connected to the tgt is returned. */
+struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
+ const char * typ_name,
+ struct obd_uuid *grp_uuid)
+{
+ int i;
+
+ read_lock(&obd_dev_lock);
+ for (i = 0; i < class_devno_max(); i++) {
+ struct obd_device *obd = class_num2obd(i);
+
+ if (obd == NULL)
+ continue;
+ if ((strncmp(obd->obd_type->typ_name, typ_name,
+ strlen(typ_name)) == 0)) {
+ if (obd_uuid_equals(tgt_uuid,
+ &obd->u.cli.cl_target_uuid) &&
+ ((grp_uuid)? obd_uuid_equals(grp_uuid,
+ &obd->obd_uuid) : 1)) {
+ read_unlock(&obd_dev_lock);
+ return obd;
+ }
+ }
+ }
+ read_unlock(&obd_dev_lock);
+
+ return NULL;
+}
+EXPORT_SYMBOL(class_find_client_obd);
+
+/* Iterate the obd_device list looking devices have grp_uuid. Start
+ searching at *next, and if a device is found, the next index to look
+ at is saved in *next. If next is NULL, then the first matching device
+ will always be returned. */
+struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next)
+{
+ int i;
+
+ if (next == NULL)
+ i = 0;
+ else if (*next >= 0 && *next < class_devno_max())
+ i = *next;
+ else
+ return NULL;
+
+ read_lock(&obd_dev_lock);
+ for (; i < class_devno_max(); i++) {
+ struct obd_device *obd = class_num2obd(i);
+
+ if (obd == NULL)
+ continue;
+ if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) {
+ if (next != NULL)
+ *next = i+1;
+ read_unlock(&obd_dev_lock);
+ return obd;
+ }
+ }
+ read_unlock(&obd_dev_lock);
+
+ return NULL;
+}
+EXPORT_SYMBOL(class_devices_in_group);
+
+/**
+ * to notify sptlrpc log for \a fsname has changed, let every relevant OBD
+ * adjust sptlrpc settings accordingly.
+ */
+int class_notify_sptlrpc_conf(const char *fsname, int namelen)
+{
+ struct obd_device *obd;
+ const char *type;
+ int i, rc = 0, rc2;
+
+ LASSERT(namelen > 0);
+
+ read_lock(&obd_dev_lock);
+ for (i = 0; i < class_devno_max(); i++) {
+ obd = class_num2obd(i);
+
+ if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping)
+ continue;
+
+ /* only notify mdc, osc, mdt, ost */
+ type = obd->obd_type->typ_name;
+ if (strcmp(type, LUSTRE_MDC_NAME) != 0 &&
+ strcmp(type, LUSTRE_OSC_NAME) != 0 &&
+ strcmp(type, LUSTRE_MDT_NAME) != 0 &&
+ strcmp(type, LUSTRE_OST_NAME) != 0)
+ continue;
+
+ if (strncmp(obd->obd_name, fsname, namelen))
+ continue;
+
+ class_incref(obd, __FUNCTION__, obd);
+ read_unlock(&obd_dev_lock);
+ rc2 = obd_set_info_async(NULL, obd->obd_self_export,
+ sizeof(KEY_SPTLRPC_CONF),
+ KEY_SPTLRPC_CONF, 0, NULL, NULL);
+ rc = rc ? rc : rc2;
+ class_decref(obd, __FUNCTION__, obd);
+ read_lock(&obd_dev_lock);
+ }
+ read_unlock(&obd_dev_lock);
+ return rc;
+}
+EXPORT_SYMBOL(class_notify_sptlrpc_conf);
+
+void obd_cleanup_caches(void)
+{
+ ENTRY;
+ if (obd_device_cachep) {
+ kmem_cache_destroy(obd_device_cachep);
+ obd_device_cachep = NULL;
+ }
+ if (obdo_cachep) {
+ kmem_cache_destroy(obdo_cachep);
+ obdo_cachep = NULL;
+ }
+ if (import_cachep) {
+ kmem_cache_destroy(import_cachep);
+ import_cachep = NULL;
+ }
+ if (capa_cachep) {
+ kmem_cache_destroy(capa_cachep);
+ capa_cachep = NULL;
+ }
+ EXIT;
+}
+
+int obd_init_caches(void)
+{
+ ENTRY;
+
+ LASSERT(obd_device_cachep == NULL);
+ obd_device_cachep = kmem_cache_create("ll_obd_dev_cache",
+ sizeof(struct obd_device),
+ 0, 0, NULL);
+ if (!obd_device_cachep)
+ GOTO(out, -ENOMEM);
+
+ LASSERT(obdo_cachep == NULL);
+ obdo_cachep = kmem_cache_create("ll_obdo_cache", sizeof(struct obdo),
+ 0, 0, NULL);
+ if (!obdo_cachep)
+ GOTO(out, -ENOMEM);
+
+ LASSERT(import_cachep == NULL);
+ import_cachep = kmem_cache_create("ll_import_cache",
+ sizeof(struct obd_import),
+ 0, 0, NULL);
+ if (!import_cachep)
+ GOTO(out, -ENOMEM);
+
+ LASSERT(capa_cachep == NULL);
+ capa_cachep = kmem_cache_create("capa_cache",
+ sizeof(struct obd_capa), 0, 0, NULL);
+ if (!capa_cachep)
+ GOTO(out, -ENOMEM);
+
+ RETURN(0);
+ out:
+ obd_cleanup_caches();
+ RETURN(-ENOMEM);
+
+}
+
+/* map connection to client */
+struct obd_export *class_conn2export(struct lustre_handle *conn)
+{
+ struct obd_export *export;
+ ENTRY;
+
+ if (!conn) {
+ CDEBUG(D_CACHE, "looking for null handle\n");
+ RETURN(NULL);
+ }
+
+ if (conn->cookie == -1) { /* this means assign a new connection */
+ CDEBUG(D_CACHE, "want a new connection\n");
+ RETURN(NULL);
+ }
+
+ CDEBUG(D_INFO, "looking for export cookie "LPX64"\n", conn->cookie);
+ export = class_handle2object(conn->cookie);
+ RETURN(export);
+}
+EXPORT_SYMBOL(class_conn2export);
+
+struct obd_device *class_exp2obd(struct obd_export *exp)
+{
+ if (exp)
+ return exp->exp_obd;
+ return NULL;
+}
+EXPORT_SYMBOL(class_exp2obd);
+
+struct obd_device *class_conn2obd(struct lustre_handle *conn)
+{
+ struct obd_export *export;
+ export = class_conn2export(conn);
+ if (export) {
+ struct obd_device *obd = export->exp_obd;
+ class_export_put(export);
+ return obd;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(class_conn2obd);
+
+struct obd_import *class_exp2cliimp(struct obd_export *exp)
+{
+ struct obd_device *obd = exp->exp_obd;
+ if (obd == NULL)
+ return NULL;
+ return obd->u.cli.cl_import;
+}
+EXPORT_SYMBOL(class_exp2cliimp);
+
+struct obd_import *class_conn2cliimp(struct lustre_handle *conn)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ if (obd == NULL)
+ return NULL;
+ return obd->u.cli.cl_import;
+}
+EXPORT_SYMBOL(class_conn2cliimp);
+
+/* Export management functions */
+static void class_export_destroy(struct obd_export *exp)
+{
+ struct obd_device *obd = exp->exp_obd;
+ ENTRY;
+
+ LASSERT_ATOMIC_ZERO(&exp->exp_refcount);
+ LASSERT(obd != NULL);
+
+ CDEBUG(D_IOCTL, "destroying export %p/%s for %s\n", exp,
+ exp->exp_client_uuid.uuid, obd->obd_name);
+
+ /* "Local" exports (lctl, LOV->{mdc,osc}) have no connection. */
+ if (exp->exp_connection)
+ ptlrpc_put_connection_superhack(exp->exp_connection);
+
+ LASSERT(list_empty(&exp->exp_outstanding_replies));
+ LASSERT(list_empty(&exp->exp_uncommitted_replies));
+ LASSERT(list_empty(&exp->exp_req_replay_queue));
+ LASSERT(list_empty(&exp->exp_hp_rpcs));
+ obd_destroy_export(exp);
+ class_decref(obd, "export", exp);
+
+ OBD_FREE_RCU(exp, sizeof(*exp), &exp->exp_handle);
+ EXIT;
+}
+
+static void export_handle_addref(void *export)
+{
+ class_export_get(export);
+}
+
+static struct portals_handle_ops export_handle_ops = {
+ .hop_addref = export_handle_addref,
+ .hop_free = NULL,
+};
+
+struct obd_export *class_export_get(struct obd_export *exp)
+{
+ atomic_inc(&exp->exp_refcount);
+ CDEBUG(D_INFO, "GETting export %p : new refcount %d\n", exp,
+ atomic_read(&exp->exp_refcount));
+ return exp;
+}
+EXPORT_SYMBOL(class_export_get);
+
+void class_export_put(struct obd_export *exp)
+{
+ LASSERT(exp != NULL);
+ LASSERT_ATOMIC_GT_LT(&exp->exp_refcount, 0, LI_POISON);
+ CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", exp,
+ atomic_read(&exp->exp_refcount) - 1);
+
+ if (atomic_dec_and_test(&exp->exp_refcount)) {
+ LASSERT(!list_empty(&exp->exp_obd_chain));
+ CDEBUG(D_IOCTL, "final put %p/%s\n",
+ exp, exp->exp_client_uuid.uuid);
+
+ /* release nid stat refererence */
+ lprocfs_exp_cleanup(exp);
+
+ obd_zombie_export_add(exp);
+ }
+}
+EXPORT_SYMBOL(class_export_put);
+
+/* Creates a new export, adds it to the hash table, and returns a
+ * pointer to it. The refcount is 2: one for the hash reference, and
+ * one for the pointer returned by this function. */
+struct obd_export *class_new_export(struct obd_device *obd,
+ struct obd_uuid *cluuid)
+{
+ struct obd_export *export;
+ cfs_hash_t *hash = NULL;
+ int rc = 0;
+ ENTRY;
+
+ OBD_ALLOC_PTR(export);
+ if (!export)
+ return ERR_PTR(-ENOMEM);
+
+ export->exp_conn_cnt = 0;
+ export->exp_lock_hash = NULL;
+ export->exp_flock_hash = NULL;
+ atomic_set(&export->exp_refcount, 2);
+ atomic_set(&export->exp_rpc_count, 0);
+ atomic_set(&export->exp_cb_count, 0);
+ atomic_set(&export->exp_locks_count, 0);
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+ INIT_LIST_HEAD(&export->exp_locks_list);
+ spin_lock_init(&export->exp_locks_list_guard);
+#endif
+ atomic_set(&export->exp_replay_count, 0);
+ export->exp_obd = obd;
+ INIT_LIST_HEAD(&export->exp_outstanding_replies);
+ spin_lock_init(&export->exp_uncommitted_replies_lock);
+ INIT_LIST_HEAD(&export->exp_uncommitted_replies);
+ INIT_LIST_HEAD(&export->exp_req_replay_queue);
+ INIT_LIST_HEAD(&export->exp_handle.h_link);
+ INIT_LIST_HEAD(&export->exp_hp_rpcs);
+ class_handle_hash(&export->exp_handle, &export_handle_ops);
+ export->exp_last_request_time = cfs_time_current_sec();
+ spin_lock_init(&export->exp_lock);
+ spin_lock_init(&export->exp_rpc_lock);
+ INIT_HLIST_NODE(&export->exp_uuid_hash);
+ INIT_HLIST_NODE(&export->exp_nid_hash);
+ spin_lock_init(&export->exp_bl_list_lock);
+ INIT_LIST_HEAD(&export->exp_bl_list);
+
+ export->exp_sp_peer = LUSTRE_SP_ANY;
+ export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID;
+ export->exp_client_uuid = *cluuid;
+ obd_init_export(export);
+
+ spin_lock(&obd->obd_dev_lock);
+ /* shouldn't happen, but might race */
+ if (obd->obd_stopping)
+ GOTO(exit_unlock, rc = -ENODEV);
+
+ hash = cfs_hash_getref(obd->obd_uuid_hash);
+ if (hash == NULL)
+ GOTO(exit_unlock, rc = -ENODEV);
+ spin_unlock(&obd->obd_dev_lock);
+
+ if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) {
+ rc = cfs_hash_add_unique(hash, cluuid, &export->exp_uuid_hash);
+ if (rc != 0) {
+ LCONSOLE_WARN("%s: denying duplicate export for %s, %d\n",
+ obd->obd_name, cluuid->uuid, rc);
+ GOTO(exit_err, rc = -EALREADY);
+ }
+ }
+
+ spin_lock(&obd->obd_dev_lock);
+ if (obd->obd_stopping) {
+ cfs_hash_del(hash, cluuid, &export->exp_uuid_hash);
+ GOTO(exit_unlock, rc = -ENODEV);
+ }
+
+ class_incref(obd, "export", export);
+ list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports);
+ list_add_tail(&export->exp_obd_chain_timed,
+ &export->exp_obd->obd_exports_timed);
+ export->exp_obd->obd_num_exports++;
+ spin_unlock(&obd->obd_dev_lock);
+ cfs_hash_putref(hash);
+ RETURN(export);
+
+exit_unlock:
+ spin_unlock(&obd->obd_dev_lock);
+exit_err:
+ if (hash)
+ cfs_hash_putref(hash);
+ class_handle_unhash(&export->exp_handle);
+ LASSERT(hlist_unhashed(&export->exp_uuid_hash));
+ obd_destroy_export(export);
+ OBD_FREE_PTR(export);
+ return ERR_PTR(rc);
+}
+EXPORT_SYMBOL(class_new_export);
+
+void class_unlink_export(struct obd_export *exp)
+{
+ class_handle_unhash(&exp->exp_handle);
+
+ spin_lock(&exp->exp_obd->obd_dev_lock);
+ /* delete an uuid-export hashitem from hashtables */
+ if (!hlist_unhashed(&exp->exp_uuid_hash))
+ cfs_hash_del(exp->exp_obd->obd_uuid_hash,
+ &exp->exp_client_uuid,
+ &exp->exp_uuid_hash);
+
+ list_move(&exp->exp_obd_chain, &exp->exp_obd->obd_unlinked_exports);
+ list_del_init(&exp->exp_obd_chain_timed);
+ exp->exp_obd->obd_num_exports--;
+ spin_unlock(&exp->exp_obd->obd_dev_lock);
+ class_export_put(exp);
+}
+EXPORT_SYMBOL(class_unlink_export);
+
+/* Import management functions */
+void class_import_destroy(struct obd_import *imp)
+{
+ ENTRY;
+
+ CDEBUG(D_IOCTL, "destroying import %p for %s\n", imp,
+ imp->imp_obd->obd_name);
+
+ LASSERT_ATOMIC_ZERO(&imp->imp_refcount);
+
+ ptlrpc_put_connection_superhack(imp->imp_connection);
+
+ while (!list_empty(&imp->imp_conn_list)) {
+ struct obd_import_conn *imp_conn;
+
+ imp_conn = list_entry(imp->imp_conn_list.next,
+ struct obd_import_conn, oic_item);
+ list_del_init(&imp_conn->oic_item);
+ ptlrpc_put_connection_superhack(imp_conn->oic_conn);
+ OBD_FREE(imp_conn, sizeof(*imp_conn));
+ }
+
+ LASSERT(imp->imp_sec == NULL);
+ class_decref(imp->imp_obd, "import", imp);
+ OBD_FREE_RCU(imp, sizeof(*imp), &imp->imp_handle);
+ EXIT;
+}
+
+static void import_handle_addref(void *import)
+{
+ class_import_get(import);
+}
+
+static struct portals_handle_ops import_handle_ops = {
+ .hop_addref = import_handle_addref,
+ .hop_free = NULL,
+};
+
+struct obd_import *class_import_get(struct obd_import *import)
+{
+ atomic_inc(&import->imp_refcount);
+ CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", import,
+ atomic_read(&import->imp_refcount),
+ import->imp_obd->obd_name);
+ return import;
+}
+EXPORT_SYMBOL(class_import_get);
+
+void class_import_put(struct obd_import *imp)
+{
+ ENTRY;
+
+ LASSERT(list_empty(&imp->imp_zombie_chain));
+ LASSERT_ATOMIC_GT_LT(&imp->imp_refcount, 0, LI_POISON);
+
+ CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", imp,
+ atomic_read(&imp->imp_refcount) - 1,
+ imp->imp_obd->obd_name);
+
+ if (atomic_dec_and_test(&imp->imp_refcount)) {
+ CDEBUG(D_INFO, "final put import %p\n", imp);
+ obd_zombie_import_add(imp);
+ }
+
+ /* catch possible import put race */
+ LASSERT_ATOMIC_GE_LT(&imp->imp_refcount, 0, LI_POISON);
+ EXIT;
+}
+EXPORT_SYMBOL(class_import_put);
+
+static void init_imp_at(struct imp_at *at) {
+ int i;
+ at_init(&at->iat_net_latency, 0, 0);
+ for (i = 0; i < IMP_AT_MAX_PORTALS; i++) {
+ /* max service estimates are tracked on the server side, so
+ don't use the AT history here, just use the last reported
+ val. (But keep hist for proc histogram, worst_ever) */
+ at_init(&at->iat_service_estimate[i], INITIAL_CONNECT_TIMEOUT,
+ AT_FLG_NOHIST);
+ }
+}
+
+struct obd_import *class_new_import(struct obd_device *obd)
+{
+ struct obd_import *imp;
+
+ OBD_ALLOC(imp, sizeof(*imp));
+ if (imp == NULL)
+ return NULL;
+
+ INIT_LIST_HEAD(&imp->imp_pinger_chain);
+ INIT_LIST_HEAD(&imp->imp_zombie_chain);
+ INIT_LIST_HEAD(&imp->imp_replay_list);
+ INIT_LIST_HEAD(&imp->imp_sending_list);
+ INIT_LIST_HEAD(&imp->imp_delayed_list);
+ spin_lock_init(&imp->imp_lock);
+ imp->imp_last_success_conn = 0;
+ imp->imp_state = LUSTRE_IMP_NEW;
+ imp->imp_obd = class_incref(obd, "import", imp);
+ mutex_init(&imp->imp_sec_mutex);
+ init_waitqueue_head(&imp->imp_recovery_waitq);
+
+ atomic_set(&imp->imp_refcount, 2);
+ atomic_set(&imp->imp_unregistering, 0);
+ atomic_set(&imp->imp_inflight, 0);
+ atomic_set(&imp->imp_replay_inflight, 0);
+ atomic_set(&imp->imp_inval_count, 0);
+ INIT_LIST_HEAD(&imp->imp_conn_list);
+ INIT_LIST_HEAD(&imp->imp_handle.h_link);
+ class_handle_hash(&imp->imp_handle, &import_handle_ops);
+ init_imp_at(&imp->imp_at);
+
+ /* the default magic is V2, will be used in connect RPC, and
+ * then adjusted according to the flags in request/reply. */
+ imp->imp_msg_magic = LUSTRE_MSG_MAGIC_V2;
+
+ return imp;
+}
+EXPORT_SYMBOL(class_new_import);
+
+void class_destroy_import(struct obd_import *import)
+{
+ LASSERT(import != NULL);
+ LASSERT(import != LP_POISON);
+
+ class_handle_unhash(&import->imp_handle);
+
+ spin_lock(&import->imp_lock);
+ import->imp_generation++;
+ spin_unlock(&import->imp_lock);
+ class_import_put(import);
+}
+EXPORT_SYMBOL(class_destroy_import);
+
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+
+void __class_export_add_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
+{
+ spin_lock(&exp->exp_locks_list_guard);
+
+ LASSERT(lock->l_exp_refs_nr >= 0);
+
+ if (lock->l_exp_refs_target != NULL &&
+ lock->l_exp_refs_target != exp) {
+ LCONSOLE_WARN("setting export %p for lock %p which already has export %p\n",
+ exp, lock, lock->l_exp_refs_target);
+ }
+ if ((lock->l_exp_refs_nr ++) == 0) {
+ list_add(&lock->l_exp_refs_link, &exp->exp_locks_list);
+ lock->l_exp_refs_target = exp;
+ }
+ CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n",
+ lock, exp, lock->l_exp_refs_nr);
+ spin_unlock(&exp->exp_locks_list_guard);
+}
+EXPORT_SYMBOL(__class_export_add_lock_ref);
+
+void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
+{
+ spin_lock(&exp->exp_locks_list_guard);
+ LASSERT(lock->l_exp_refs_nr > 0);
+ if (lock->l_exp_refs_target != exp) {
+ LCONSOLE_WARN("lock %p, "
+ "mismatching export pointers: %p, %p\n",
+ lock, lock->l_exp_refs_target, exp);
+ }
+ if (-- lock->l_exp_refs_nr == 0) {
+ list_del_init(&lock->l_exp_refs_link);
+ lock->l_exp_refs_target = NULL;
+ }
+ CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n",
+ lock, exp, lock->l_exp_refs_nr);
+ spin_unlock(&exp->exp_locks_list_guard);
+}
+EXPORT_SYMBOL(__class_export_del_lock_ref);
+#endif
+
+/* A connection defines an export context in which preallocation can
+ be managed. This releases the export pointer reference, and returns
+ the export handle, so the export refcount is 1 when this function
+ returns. */
+int class_connect(struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_uuid *cluuid)
+{
+ struct obd_export *export;
+ LASSERT(conn != NULL);
+ LASSERT(obd != NULL);
+ LASSERT(cluuid != NULL);
+ ENTRY;
+
+ export = class_new_export(obd, cluuid);
+ if (IS_ERR(export))
+ RETURN(PTR_ERR(export));
+
+ conn->cookie = export->exp_handle.h_cookie;
+ class_export_put(export);
+
+ CDEBUG(D_IOCTL, "connect: client %s, cookie "LPX64"\n",
+ cluuid->uuid, conn->cookie);
+ RETURN(0);
+}
+EXPORT_SYMBOL(class_connect);
+
+/* if export is involved in recovery then clean up related things */
+void class_export_recovery_cleanup(struct obd_export *exp)
+{
+ struct obd_device *obd = exp->exp_obd;
+
+ spin_lock(&obd->obd_recovery_task_lock);
+ if (exp->exp_delayed)
+ obd->obd_delayed_clients--;
+ if (obd->obd_recovering) {
+ if (exp->exp_in_recovery) {
+ spin_lock(&exp->exp_lock);
+ exp->exp_in_recovery = 0;
+ spin_unlock(&exp->exp_lock);
+ LASSERT_ATOMIC_POS(&obd->obd_connected_clients);
+ atomic_dec(&obd->obd_connected_clients);
+ }
+
+ /* if called during recovery then should update
+ * obd_stale_clients counter,
+ * lightweight exports are not counted */
+ if (exp->exp_failed &&
+ (exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT) == 0)
+ exp->exp_obd->obd_stale_clients++;
+ }
+ spin_unlock(&obd->obd_recovery_task_lock);
+ /** Cleanup req replay fields */
+ if (exp->exp_req_replay_needed) {
+ spin_lock(&exp->exp_lock);
+ exp->exp_req_replay_needed = 0;
+ spin_unlock(&exp->exp_lock);
+ LASSERT(atomic_read(&obd->obd_req_replay_clients));
+ atomic_dec(&obd->obd_req_replay_clients);
+ }
+ /** Cleanup lock replay data */
+ if (exp->exp_lock_replay_needed) {
+ spin_lock(&exp->exp_lock);
+ exp->exp_lock_replay_needed = 0;
+ spin_unlock(&exp->exp_lock);
+ LASSERT(atomic_read(&obd->obd_lock_replay_clients));
+ atomic_dec(&obd->obd_lock_replay_clients);
+ }
+}
+
+/* This function removes 1-3 references from the export:
+ * 1 - for export pointer passed
+ * and if disconnect really need
+ * 2 - removing from hash
+ * 3 - in client_unlink_export
+ * The export pointer passed to this function can destroyed */
+int class_disconnect(struct obd_export *export)
+{
+ int already_disconnected;
+ ENTRY;
+
+ if (export == NULL) {
+ CWARN("attempting to free NULL export %p\n", export);
+ RETURN(-EINVAL);
+ }
+
+ spin_lock(&export->exp_lock);
+ already_disconnected = export->exp_disconnected;
+ export->exp_disconnected = 1;
+ spin_unlock(&export->exp_lock);
+
+ /* class_cleanup(), abort_recovery(), and class_fail_export()
+ * all end up in here, and if any of them race we shouldn't
+ * call extra class_export_puts(). */
+ if (already_disconnected) {
+ LASSERT(hlist_unhashed(&export->exp_nid_hash));
+ GOTO(no_disconn, already_disconnected);
+ }
+
+ CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n",
+ export->exp_handle.h_cookie);
+
+ if (!hlist_unhashed(&export->exp_nid_hash))
+ cfs_hash_del(export->exp_obd->obd_nid_hash,
+ &export->exp_connection->c_peer.nid,
+ &export->exp_nid_hash);
+
+ class_export_recovery_cleanup(export);
+ class_unlink_export(export);
+no_disconn:
+ class_export_put(export);
+ RETURN(0);
+}
+EXPORT_SYMBOL(class_disconnect);
+
+/* Return non-zero for a fully connected export */
+int class_connected_export(struct obd_export *exp)
+{
+ if (exp) {
+ int connected;
+ spin_lock(&exp->exp_lock);
+ connected = (exp->exp_conn_cnt > 0);
+ spin_unlock(&exp->exp_lock);
+ return connected;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(class_connected_export);
+
+static void class_disconnect_export_list(struct list_head *list,
+ enum obd_option flags)
+{
+ int rc;
+ struct obd_export *exp;
+ ENTRY;
+
+ /* It's possible that an export may disconnect itself, but
+ * nothing else will be added to this list. */
+ while (!list_empty(list)) {
+ exp = list_entry(list->next, struct obd_export,
+ exp_obd_chain);
+ /* need for safe call CDEBUG after obd_disconnect */
+ class_export_get(exp);
+
+ spin_lock(&exp->exp_lock);
+ exp->exp_flags = flags;
+ spin_unlock(&exp->exp_lock);
+
+ if (obd_uuid_equals(&exp->exp_client_uuid,
+ &exp->exp_obd->obd_uuid)) {
+ CDEBUG(D_HA,
+ "exp %p export uuid == obd uuid, don't discon\n",
+ exp);
+ /* Need to delete this now so we don't end up pointing
+ * to work_list later when this export is cleaned up. */
+ list_del_init(&exp->exp_obd_chain);
+ class_export_put(exp);
+ continue;
+ }
+
+ class_export_get(exp);
+ CDEBUG(D_HA, "%s: disconnecting export at %s (%p), "
+ "last request at "CFS_TIME_T"\n",
+ exp->exp_obd->obd_name, obd_export_nid2str(exp),
+ exp, exp->exp_last_request_time);
+ /* release one export reference anyway */
+ rc = obd_disconnect(exp);
+
+ CDEBUG(D_HA, "disconnected export at %s (%p): rc %d\n",
+ obd_export_nid2str(exp), exp, rc);
+ class_export_put(exp);
+ }
+ EXIT;
+}
+
+void class_disconnect_exports(struct obd_device *obd)
+{
+ struct list_head work_list;
+ ENTRY;
+
+ /* Move all of the exports from obd_exports to a work list, en masse. */
+ INIT_LIST_HEAD(&work_list);
+ spin_lock(&obd->obd_dev_lock);
+ list_splice_init(&obd->obd_exports, &work_list);
+ list_splice_init(&obd->obd_delayed_exports, &work_list);
+ spin_unlock(&obd->obd_dev_lock);
+
+ if (!list_empty(&work_list)) {
+ CDEBUG(D_HA, "OBD device %d (%p) has exports, "
+ "disconnecting them\n", obd->obd_minor, obd);
+ class_disconnect_export_list(&work_list,
+ exp_flags_from_obd(obd));
+ } else
+ CDEBUG(D_HA, "OBD device %d (%p) has no exports\n",
+ obd->obd_minor, obd);
+ EXIT;
+}
+EXPORT_SYMBOL(class_disconnect_exports);
+
+/* Remove exports that have not completed recovery.
+ */
+void class_disconnect_stale_exports(struct obd_device *obd,
+ int (*test_export)(struct obd_export *))
+{
+ struct list_head work_list;
+ struct obd_export *exp, *n;
+ int evicted = 0;
+ ENTRY;
+
+ INIT_LIST_HEAD(&work_list);
+ spin_lock(&obd->obd_dev_lock);
+ list_for_each_entry_safe(exp, n, &obd->obd_exports,
+ exp_obd_chain) {
+ /* don't count self-export as client */
+ if (obd_uuid_equals(&exp->exp_client_uuid,
+ &exp->exp_obd->obd_uuid))
+ continue;
+
+ /* don't evict clients which have no slot in last_rcvd
+ * (e.g. lightweight connection) */
+ if (exp->exp_target_data.ted_lr_idx == -1)
+ continue;
+
+ spin_lock(&exp->exp_lock);
+ if (exp->exp_failed || test_export(exp)) {
+ spin_unlock(&exp->exp_lock);
+ continue;
+ }
+ exp->exp_failed = 1;
+ spin_unlock(&exp->exp_lock);
+
+ list_move(&exp->exp_obd_chain, &work_list);
+ evicted++;
+ CDEBUG(D_HA, "%s: disconnect stale client %s@%s\n",
+ obd->obd_name, exp->exp_client_uuid.uuid,
+ exp->exp_connection == NULL ? "<unknown>" :
+ libcfs_nid2str(exp->exp_connection->c_peer.nid));
+ print_export_data(exp, "EVICTING", 0);
+ }
+ spin_unlock(&obd->obd_dev_lock);
+
+ if (evicted)
+ LCONSOLE_WARN("%s: disconnecting %d stale clients\n",
+ obd->obd_name, evicted);
+
+ class_disconnect_export_list(&work_list, exp_flags_from_obd(obd) |
+ OBD_OPT_ABORT_RECOV);
+ EXIT;
+}
+EXPORT_SYMBOL(class_disconnect_stale_exports);
+
+void class_fail_export(struct obd_export *exp)
+{
+ int rc, already_failed;
+
+ spin_lock(&exp->exp_lock);
+ already_failed = exp->exp_failed;
+ exp->exp_failed = 1;
+ spin_unlock(&exp->exp_lock);
+
+ if (already_failed) {
+ CDEBUG(D_HA, "disconnecting dead export %p/%s; skipping\n",
+ exp, exp->exp_client_uuid.uuid);
+ return;
+ }
+
+ CDEBUG(D_HA, "disconnecting export %p/%s\n",
+ exp, exp->exp_client_uuid.uuid);
+
+ if (obd_dump_on_timeout)
+ libcfs_debug_dumplog();
+
+ /* need for safe call CDEBUG after obd_disconnect */
+ class_export_get(exp);
+
+ /* Most callers into obd_disconnect are removing their own reference
+ * (request, for example) in addition to the one from the hash table.
+ * We don't have such a reference here, so make one. */
+ class_export_get(exp);
+ rc = obd_disconnect(exp);
+ if (rc)
+ CERROR("disconnecting export %p failed: %d\n", exp, rc);
+ else
+ CDEBUG(D_HA, "disconnected export %p/%s\n",
+ exp, exp->exp_client_uuid.uuid);
+ class_export_put(exp);
+}
+EXPORT_SYMBOL(class_fail_export);
+
+char *obd_export_nid2str(struct obd_export *exp)
+{
+ if (exp->exp_connection != NULL)
+ return libcfs_nid2str(exp->exp_connection->c_peer.nid);
+
+ return "(no nid)";
+}
+EXPORT_SYMBOL(obd_export_nid2str);
+
+int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
+{
+ cfs_hash_t *nid_hash;
+ struct obd_export *doomed_exp = NULL;
+ int exports_evicted = 0;
+
+ lnet_nid_t nid_key = libcfs_str2nid((char *)nid);
+
+ spin_lock(&obd->obd_dev_lock);
+ /* umount has run already, so evict thread should leave
+ * its task to umount thread now */
+ if (obd->obd_stopping) {
+ spin_unlock(&obd->obd_dev_lock);
+ return exports_evicted;
+ }
+ nid_hash = obd->obd_nid_hash;
+ cfs_hash_getref(nid_hash);
+ spin_unlock(&obd->obd_dev_lock);
+
+ do {
+ doomed_exp = cfs_hash_lookup(nid_hash, &nid_key);
+ if (doomed_exp == NULL)
+ break;
+
+ LASSERTF(doomed_exp->exp_connection->c_peer.nid == nid_key,
+ "nid %s found, wanted nid %s, requested nid %s\n",
+ obd_export_nid2str(doomed_exp),
+ libcfs_nid2str(nid_key), nid);
+ LASSERTF(doomed_exp != obd->obd_self_export,
+ "self-export is hashed by NID?\n");
+ exports_evicted++;
+ LCONSOLE_WARN("%s: evicting %s (at %s) by administrative "
+ "request\n", obd->obd_name,
+ obd_uuid2str(&doomed_exp->exp_client_uuid),
+ obd_export_nid2str(doomed_exp));
+ class_fail_export(doomed_exp);
+ class_export_put(doomed_exp);
+ } while (1);
+
+ cfs_hash_putref(nid_hash);
+
+ if (!exports_evicted)
+ CDEBUG(D_HA,"%s: can't disconnect NID '%s': no exports found\n",
+ obd->obd_name, nid);
+ return exports_evicted;
+}
+EXPORT_SYMBOL(obd_export_evict_by_nid);
+
+int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
+{
+ cfs_hash_t *uuid_hash;
+ struct obd_export *doomed_exp = NULL;
+ struct obd_uuid doomed_uuid;
+ int exports_evicted = 0;
+
+ spin_lock(&obd->obd_dev_lock);
+ if (obd->obd_stopping) {
+ spin_unlock(&obd->obd_dev_lock);
+ return exports_evicted;
+ }
+ uuid_hash = obd->obd_uuid_hash;
+ cfs_hash_getref(uuid_hash);
+ spin_unlock(&obd->obd_dev_lock);
+
+ obd_str2uuid(&doomed_uuid, uuid);
+ if (obd_uuid_equals(&doomed_uuid, &obd->obd_uuid)) {
+ CERROR("%s: can't evict myself\n", obd->obd_name);
+ cfs_hash_putref(uuid_hash);
+ return exports_evicted;
+ }
+
+ doomed_exp = cfs_hash_lookup(uuid_hash, &doomed_uuid);
+
+ if (doomed_exp == NULL) {
+ CERROR("%s: can't disconnect %s: no exports found\n",
+ obd->obd_name, uuid);
+ } else {
+ CWARN("%s: evicting %s at adminstrative request\n",
+ obd->obd_name, doomed_exp->exp_client_uuid.uuid);
+ class_fail_export(doomed_exp);
+ class_export_put(doomed_exp);
+ exports_evicted++;
+ }
+ cfs_hash_putref(uuid_hash);
+
+ return exports_evicted;
+}
+EXPORT_SYMBOL(obd_export_evict_by_uuid);
+
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+void (*class_export_dump_hook)(struct obd_export*) = NULL;
+EXPORT_SYMBOL(class_export_dump_hook);
+#endif
+
+static void print_export_data(struct obd_export *exp, const char *status,
+ int locks)
+{
+ struct ptlrpc_reply_state *rs;
+ struct ptlrpc_reply_state *first_reply = NULL;
+ int nreplies = 0;
+
+ spin_lock(&exp->exp_lock);
+ list_for_each_entry(rs, &exp->exp_outstanding_replies,
+ rs_exp_list) {
+ if (nreplies == 0)
+ first_reply = rs;
+ nreplies++;
+ }
+ spin_unlock(&exp->exp_lock);
+
+ CDEBUG(D_HA, "%s: %s %p %s %s %d (%d %d %d) %d %d %d %d: %p %s "LPU64"\n",
+ exp->exp_obd->obd_name, status, exp, exp->exp_client_uuid.uuid,
+ obd_export_nid2str(exp), atomic_read(&exp->exp_refcount),
+ atomic_read(&exp->exp_rpc_count),
+ atomic_read(&exp->exp_cb_count),
+ atomic_read(&exp->exp_locks_count),
+ exp->exp_disconnected, exp->exp_delayed, exp->exp_failed,
+ nreplies, first_reply, nreplies > 3 ? "..." : "",
+ exp->exp_last_committed);
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+ if (locks && class_export_dump_hook != NULL)
+ class_export_dump_hook(exp);
+#endif
+}
+
+void dump_exports(struct obd_device *obd, int locks)
+{
+ struct obd_export *exp;
+
+ spin_lock(&obd->obd_dev_lock);
+ list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain)
+ print_export_data(exp, "ACTIVE", locks);
+ list_for_each_entry(exp, &obd->obd_unlinked_exports, exp_obd_chain)
+ print_export_data(exp, "UNLINKED", locks);
+ list_for_each_entry(exp, &obd->obd_delayed_exports, exp_obd_chain)
+ print_export_data(exp, "DELAYED", locks);
+ spin_unlock(&obd->obd_dev_lock);
+ spin_lock(&obd_zombie_impexp_lock);
+ list_for_each_entry(exp, &obd_zombie_exports, exp_obd_chain)
+ print_export_data(exp, "ZOMBIE", locks);
+ spin_unlock(&obd_zombie_impexp_lock);
+}
+EXPORT_SYMBOL(dump_exports);
+
+void obd_exports_barrier(struct obd_device *obd)
+{
+ int waited = 2;
+ LASSERT(list_empty(&obd->obd_exports));
+ spin_lock(&obd->obd_dev_lock);
+ while (!list_empty(&obd->obd_unlinked_exports)) {
+ spin_unlock(&obd->obd_dev_lock);
+ schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE,
+ cfs_time_seconds(waited));
+ if (waited > 5 && IS_PO2(waited)) {
+ LCONSOLE_WARN("%s is waiting for obd_unlinked_exports "
+ "more than %d seconds. "
+ "The obd refcount = %d. Is it stuck?\n",
+ obd->obd_name, waited,
+ atomic_read(&obd->obd_refcount));
+ dump_exports(obd, 1);
+ }
+ waited *= 2;
+ spin_lock(&obd->obd_dev_lock);
+ }
+ spin_unlock(&obd->obd_dev_lock);
+}
+EXPORT_SYMBOL(obd_exports_barrier);
+
+/* Total amount of zombies to be destroyed */
+static int zombies_count = 0;
+
+/**
+ * kill zombie imports and exports
+ */
+void obd_zombie_impexp_cull(void)
+{
+ struct obd_import *import;
+ struct obd_export *export;
+ ENTRY;
+
+ do {
+ spin_lock(&obd_zombie_impexp_lock);
+
+ import = NULL;
+ if (!list_empty(&obd_zombie_imports)) {
+ import = list_entry(obd_zombie_imports.next,
+ struct obd_import,
+ imp_zombie_chain);
+ list_del_init(&import->imp_zombie_chain);
+ }
+
+ export = NULL;
+ if (!list_empty(&obd_zombie_exports)) {
+ export = list_entry(obd_zombie_exports.next,
+ struct obd_export,
+ exp_obd_chain);
+ list_del_init(&export->exp_obd_chain);
+ }
+
+ spin_unlock(&obd_zombie_impexp_lock);
+
+ if (import != NULL) {
+ class_import_destroy(import);
+ spin_lock(&obd_zombie_impexp_lock);
+ zombies_count--;
+ spin_unlock(&obd_zombie_impexp_lock);
+ }
+
+ if (export != NULL) {
+ class_export_destroy(export);
+ spin_lock(&obd_zombie_impexp_lock);
+ zombies_count--;
+ spin_unlock(&obd_zombie_impexp_lock);
+ }
+
+ cond_resched();
+ } while (import != NULL || export != NULL);
+ EXIT;
+}
+
+static struct completion obd_zombie_start;
+static struct completion obd_zombie_stop;
+static unsigned long obd_zombie_flags;
+static wait_queue_head_t obd_zombie_waitq;
+static pid_t obd_zombie_pid;
+
+enum {
+ OBD_ZOMBIE_STOP = 0x0001,
+};
+
+/**
+ * check for work for kill zombie import/export thread.
+ */
+static int obd_zombie_impexp_check(void *arg)
+{
+ int rc;
+
+ spin_lock(&obd_zombie_impexp_lock);
+ rc = (zombies_count == 0) &&
+ !test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags);
+ spin_unlock(&obd_zombie_impexp_lock);
+
+ RETURN(rc);
+}
+
+/**
+ * Add export to the obd_zombe thread and notify it.
+ */
+static void obd_zombie_export_add(struct obd_export *exp) {
+ spin_lock(&exp->exp_obd->obd_dev_lock);
+ LASSERT(!list_empty(&exp->exp_obd_chain));
+ list_del_init(&exp->exp_obd_chain);
+ spin_unlock(&exp->exp_obd->obd_dev_lock);
+ spin_lock(&obd_zombie_impexp_lock);
+ zombies_count++;
+ list_add(&exp->exp_obd_chain, &obd_zombie_exports);
+ spin_unlock(&obd_zombie_impexp_lock);
+
+ obd_zombie_impexp_notify();
+}
+
+/**
+ * Add import to the obd_zombe thread and notify it.
+ */
+static void obd_zombie_import_add(struct obd_import *imp) {
+ LASSERT(imp->imp_sec == NULL);
+ LASSERT(imp->imp_rq_pool == NULL);
+ spin_lock(&obd_zombie_impexp_lock);
+ LASSERT(list_empty(&imp->imp_zombie_chain));
+ zombies_count++;
+ list_add(&imp->imp_zombie_chain, &obd_zombie_imports);
+ spin_unlock(&obd_zombie_impexp_lock);
+
+ obd_zombie_impexp_notify();
+}
+
+/**
+ * notify import/export destroy thread about new zombie.
+ */
+static void obd_zombie_impexp_notify(void)
+{
+ /*
+ * Make sure obd_zomebie_impexp_thread get this notification.
+ * It is possible this signal only get by obd_zombie_barrier, and
+ * barrier gulps this notification and sleeps away and hangs ensues
+ */
+ wake_up_all(&obd_zombie_waitq);
+}
+
+/**
+ * check whether obd_zombie is idle
+ */
+static int obd_zombie_is_idle(void)
+{
+ int rc;
+
+ LASSERT(!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags));
+ spin_lock(&obd_zombie_impexp_lock);
+ rc = (zombies_count == 0);
+ spin_unlock(&obd_zombie_impexp_lock);
+ return rc;
+}
+
+/**
+ * wait when obd_zombie import/export queues become empty
+ */
+void obd_zombie_barrier(void)
+{
+ struct l_wait_info lwi = { 0 };
+
+ if (obd_zombie_pid == current_pid())
+ /* don't wait for myself */
+ return;
+ l_wait_event(obd_zombie_waitq, obd_zombie_is_idle(), &lwi);
+}
+EXPORT_SYMBOL(obd_zombie_barrier);
+
+
+/**
+ * destroy zombie export/import thread.
+ */
+static int obd_zombie_impexp_thread(void *unused)
+{
+ unshare_fs_struct();
+ complete(&obd_zombie_start);
+
+ obd_zombie_pid = current_pid();
+
+ while (!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)) {
+ struct l_wait_info lwi = { 0 };
+
+ l_wait_event(obd_zombie_waitq,
+ !obd_zombie_impexp_check(NULL), &lwi);
+ obd_zombie_impexp_cull();
+
+ /*
+ * Notify obd_zombie_barrier callers that queues
+ * may be empty.
+ */
+ wake_up(&obd_zombie_waitq);
+ }
+
+ complete(&obd_zombie_stop);
+
+ RETURN(0);
+}
+
+
+/**
+ * start destroy zombie import/export thread
+ */
+int obd_zombie_impexp_init(void)
+{
+ task_t *task;
+
+ INIT_LIST_HEAD(&obd_zombie_imports);
+ INIT_LIST_HEAD(&obd_zombie_exports);
+ spin_lock_init(&obd_zombie_impexp_lock);
+ init_completion(&obd_zombie_start);
+ init_completion(&obd_zombie_stop);
+ init_waitqueue_head(&obd_zombie_waitq);
+ obd_zombie_pid = 0;
+
+ task = kthread_run(obd_zombie_impexp_thread, NULL, "obd_zombid");
+ if (IS_ERR(task))
+ RETURN(PTR_ERR(task));
+
+ wait_for_completion(&obd_zombie_start);
+ RETURN(0);
+}
+/**
+ * stop destroy zombie import/export thread
+ */
+void obd_zombie_impexp_stop(void)
+{
+ set_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags);
+ obd_zombie_impexp_notify();
+ wait_for_completion(&obd_zombie_stop);
+}
+
+/***** Kernel-userspace comm helpers *******/
+
+/* Get length of entire message, including header */
+int kuc_len(int payload_len)
+{
+ return sizeof(struct kuc_hdr) + payload_len;
+}
+EXPORT_SYMBOL(kuc_len);
+
+/* Get a pointer to kuc header, given a ptr to the payload
+ * @param p Pointer to payload area
+ * @returns Pointer to kuc header
+ */
+struct kuc_hdr * kuc_ptr(void *p)
+{
+ struct kuc_hdr *lh = ((struct kuc_hdr *)p) - 1;
+ LASSERT(lh->kuc_magic == KUC_MAGIC);
+ return lh;
+}
+EXPORT_SYMBOL(kuc_ptr);
+
+/* Test if payload is part of kuc message
+ * @param p Pointer to payload area
+ * @returns boolean
+ */
+int kuc_ispayload(void *p)
+{
+ struct kuc_hdr *kh = ((struct kuc_hdr *)p) - 1;
+
+ if (kh->kuc_magic == KUC_MAGIC)
+ return 1;
+ else
+ return 0;
+}
+EXPORT_SYMBOL(kuc_ispayload);
+
+/* Alloc space for a message, and fill in header
+ * @return Pointer to payload area
+ */
+void *kuc_alloc(int payload_len, int transport, int type)
+{
+ struct kuc_hdr *lh;
+ int len = kuc_len(payload_len);
+
+ OBD_ALLOC(lh, len);
+ if (lh == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ lh->kuc_magic = KUC_MAGIC;
+ lh->kuc_transport = transport;
+ lh->kuc_msgtype = type;
+ lh->kuc_msglen = len;
+
+ return (void *)(lh + 1);
+}
+EXPORT_SYMBOL(kuc_alloc);
+
+/* Takes pointer to payload area */
+inline void kuc_free(void *p, int payload_len)
+{
+ struct kuc_hdr *lh = kuc_ptr(p);
+ OBD_FREE(lh, kuc_len(payload_len));
+}
+EXPORT_SYMBOL(kuc_free);
diff --git a/drivers/staging/lustre/lustre/obdclass/idmap.c b/drivers/staging/lustre/lustre/obdclass/idmap.c
new file mode 100644
index 000000000000..622f8d165275
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/idmap.c
@@ -0,0 +1,474 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/idmap.c
+ *
+ * Lustre user identity mapping.
+ *
+ * Author: Fan Yong <fanyong@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <lustre_idmap.h>
+#include <md_object.h>
+#include <obd_support.h>
+
+#define lustre_get_group_info(group_info) do { \
+ atomic_inc(&(group_info)->usage); \
+} while (0)
+
+#define lustre_put_group_info(group_info) do { \
+ if (atomic_dec_and_test(&(group_info)->usage)) \
+ groups_free(group_info); \
+} while (0)
+
+/*
+ * groups_search() is copied from linux kernel!
+ * A simple bsearch.
+ */
+static int lustre_groups_search(group_info_t *group_info,
+ gid_t grp)
+{
+ int left, right;
+
+ if (!group_info)
+ return 0;
+
+ left = 0;
+ right = group_info->ngroups;
+ while (left < right) {
+ int mid = (left + right) / 2;
+ int cmp = grp - CFS_GROUP_AT(group_info, mid);
+
+ if (cmp > 0)
+ left = mid + 1;
+ else if (cmp < 0)
+ right = mid;
+ else
+ return 1;
+ }
+ return 0;
+}
+
+void lustre_groups_from_list(group_info_t *ginfo, gid_t *glist)
+{
+ int i;
+ int count = ginfo->ngroups;
+
+ /* fill group_info from gid array */
+ for (i = 0; i < ginfo->nblocks && count > 0; i++) {
+ int cp_count = min(CFS_NGROUPS_PER_BLOCK, count);
+ int off = i * CFS_NGROUPS_PER_BLOCK;
+ int len = cp_count * sizeof(*glist);
+
+ memcpy(ginfo->blocks[i], glist + off, len);
+ count -= cp_count;
+ }
+}
+EXPORT_SYMBOL(lustre_groups_from_list);
+
+/* groups_sort() is copied from linux kernel! */
+/* a simple shell-metzner sort */
+void lustre_groups_sort(group_info_t *group_info)
+{
+ int base, max, stride;
+ int gidsetsize = group_info->ngroups;
+
+ for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
+ ; /* nothing */
+ stride /= 3;
+
+ while (stride) {
+ max = gidsetsize - stride;
+ for (base = 0; base < max; base++) {
+ int left = base;
+ int right = left + stride;
+ gid_t tmp = CFS_GROUP_AT(group_info, right);
+
+ while (left >= 0 &&
+ CFS_GROUP_AT(group_info, left) > tmp) {
+ CFS_GROUP_AT(group_info, right) =
+ CFS_GROUP_AT(group_info, left);
+ right = left;
+ left -= stride;
+ }
+ CFS_GROUP_AT(group_info, right) = tmp;
+ }
+ stride /= 3;
+ }
+}
+EXPORT_SYMBOL(lustre_groups_sort);
+
+int lustre_in_group_p(struct lu_ucred *mu, gid_t grp)
+{
+ int rc = 1;
+
+ if (grp != mu->uc_fsgid) {
+ group_info_t *group_info = NULL;
+
+ if (mu->uc_ginfo || !mu->uc_identity ||
+ mu->uc_valid == UCRED_OLD)
+ if (grp == mu->uc_suppgids[0] ||
+ grp == mu->uc_suppgids[1])
+ return 1;
+
+ if (mu->uc_ginfo)
+ group_info = mu->uc_ginfo;
+ else if (mu->uc_identity)
+ group_info = mu->uc_identity->mi_ginfo;
+
+ if (!group_info)
+ return 0;
+
+ lustre_get_group_info(group_info);
+ rc = lustre_groups_search(group_info, grp);
+ lustre_put_group_info(group_info);
+ }
+ return rc;
+}
+EXPORT_SYMBOL(lustre_in_group_p);
+
+struct lustre_idmap_entry {
+ struct list_head lie_rmt_uid_hash; /* hashed as lie_rmt_uid; */
+ struct list_head lie_lcl_uid_hash; /* hashed as lie_lcl_uid; */
+ struct list_head lie_rmt_gid_hash; /* hashed as lie_rmt_gid; */
+ struct list_head lie_lcl_gid_hash; /* hashed as lie_lcl_gid; */
+ uid_t lie_rmt_uid; /* remote uid */
+ uid_t lie_lcl_uid; /* local uid */
+ gid_t lie_rmt_gid; /* remote gid */
+ gid_t lie_lcl_gid; /* local gid */
+};
+
+static inline __u32 lustre_idmap_hashfunc(__u32 id)
+{
+ return id & (CFS_IDMAP_HASHSIZE - 1);
+}
+
+static
+struct lustre_idmap_entry *idmap_entry_alloc(uid_t rmt_uid, uid_t lcl_uid,
+ gid_t rmt_gid, gid_t lcl_gid)
+{
+ struct lustre_idmap_entry *e;
+
+ OBD_ALLOC_PTR(e);
+ if (e == NULL)
+ return NULL;
+
+ INIT_LIST_HEAD(&e->lie_rmt_uid_hash);
+ INIT_LIST_HEAD(&e->lie_lcl_uid_hash);
+ INIT_LIST_HEAD(&e->lie_rmt_gid_hash);
+ INIT_LIST_HEAD(&e->lie_lcl_gid_hash);
+ e->lie_rmt_uid = rmt_uid;
+ e->lie_lcl_uid = lcl_uid;
+ e->lie_rmt_gid = rmt_gid;
+ e->lie_lcl_gid = lcl_gid;
+
+ return e;
+}
+
+static void idmap_entry_free(struct lustre_idmap_entry *e)
+{
+ if (!list_empty(&e->lie_rmt_uid_hash))
+ list_del(&e->lie_rmt_uid_hash);
+ if (!list_empty(&e->lie_lcl_uid_hash))
+ list_del(&e->lie_lcl_uid_hash);
+ if (!list_empty(&e->lie_rmt_gid_hash))
+ list_del(&e->lie_rmt_gid_hash);
+ if (!list_empty(&e->lie_lcl_gid_hash))
+ list_del(&e->lie_lcl_gid_hash);
+ OBD_FREE_PTR(e);
+}
+
+/*
+ * return value
+ * NULL: not found entry
+ * ERR_PTR(-EACCES): found 1(remote):N(local) mapped entry
+ * others: found normal entry
+ */
+static
+struct lustre_idmap_entry *idmap_search_entry(struct lustre_idmap_table *t,
+ uid_t rmt_uid, uid_t lcl_uid,
+ gid_t rmt_gid, gid_t lcl_gid)
+{
+ struct list_head *head;
+ struct lustre_idmap_entry *e;
+
+ head = &t->lit_idmaps[RMT_UIDMAP_IDX][lustre_idmap_hashfunc(rmt_uid)];
+ list_for_each_entry(e, head, lie_rmt_uid_hash)
+ if (e->lie_rmt_uid == rmt_uid) {
+ if (e->lie_lcl_uid == lcl_uid) {
+ if (e->lie_rmt_gid == rmt_gid &&
+ e->lie_lcl_gid == lcl_gid)
+ /* must be quaternion match */
+ return e;
+ } else {
+ /* 1:N uid mapping */
+ CERROR("rmt uid %u already be mapped to %u"
+ " (new %u)\n", e->lie_rmt_uid,
+ e->lie_lcl_uid, lcl_uid);
+ return ERR_PTR(-EACCES);
+ }
+ }
+
+ head = &t->lit_idmaps[RMT_GIDMAP_IDX][lustre_idmap_hashfunc(rmt_gid)];
+ list_for_each_entry(e, head, lie_rmt_gid_hash)
+ if (e->lie_rmt_gid == rmt_gid) {
+ if (e->lie_lcl_gid == lcl_gid) {
+ if (unlikely(e->lie_rmt_uid == rmt_uid &&
+ e->lie_lcl_uid == lcl_uid))
+ /* after uid mapping search above,
+ * we should never come here */
+ LBUG();
+ } else {
+ /* 1:N gid mapping */
+ CERROR("rmt gid %u already be mapped to %u"
+ " (new %u)\n", e->lie_rmt_gid,
+ e->lie_lcl_gid, lcl_gid);
+ return ERR_PTR(-EACCES);
+ }
+ }
+
+ return NULL;
+}
+
+static __u32 idmap_lookup_uid(struct list_head *hash, int reverse,
+ __u32 uid)
+{
+ struct list_head *head = &hash[lustre_idmap_hashfunc(uid)];
+ struct lustre_idmap_entry *e;
+
+ if (!reverse) {
+ list_for_each_entry(e, head, lie_rmt_uid_hash)
+ if (e->lie_rmt_uid == uid)
+ return e->lie_lcl_uid;
+ } else {
+ list_for_each_entry(e, head, lie_lcl_uid_hash)
+ if (e->lie_lcl_uid == uid)
+ return e->lie_rmt_uid;
+ }
+
+ return CFS_IDMAP_NOTFOUND;
+}
+
+static __u32 idmap_lookup_gid(struct list_head *hash, int reverse, __u32 gid)
+{
+ struct list_head *head = &hash[lustre_idmap_hashfunc(gid)];
+ struct lustre_idmap_entry *e;
+
+ if (!reverse) {
+ list_for_each_entry(e, head, lie_rmt_gid_hash)
+ if (e->lie_rmt_gid == gid)
+ return e->lie_lcl_gid;
+ } else {
+ list_for_each_entry(e, head, lie_lcl_gid_hash)
+ if (e->lie_lcl_gid == gid)
+ return e->lie_rmt_gid;
+ }
+
+ return CFS_IDMAP_NOTFOUND;
+}
+
+int lustre_idmap_add(struct lustre_idmap_table *t,
+ uid_t ruid, uid_t luid,
+ gid_t rgid, gid_t lgid)
+{
+ struct lustre_idmap_entry *e0, *e1;
+
+ LASSERT(t);
+
+ spin_lock(&t->lit_lock);
+ e0 = idmap_search_entry(t, ruid, luid, rgid, lgid);
+ spin_unlock(&t->lit_lock);
+ if (!e0) {
+ e0 = idmap_entry_alloc(ruid, luid, rgid, lgid);
+ if (!e0)
+ return -ENOMEM;
+
+ spin_lock(&t->lit_lock);
+ e1 = idmap_search_entry(t, ruid, luid, rgid, lgid);
+ if (e1 == NULL) {
+ list_add_tail(&e0->lie_rmt_uid_hash,
+ &t->lit_idmaps[RMT_UIDMAP_IDX]
+ [lustre_idmap_hashfunc(ruid)]);
+ list_add_tail(&e0->lie_lcl_uid_hash,
+ &t->lit_idmaps[LCL_UIDMAP_IDX]
+ [lustre_idmap_hashfunc(luid)]);
+ list_add_tail(&e0->lie_rmt_gid_hash,
+ &t->lit_idmaps[RMT_GIDMAP_IDX]
+ [lustre_idmap_hashfunc(rgid)]);
+ list_add_tail(&e0->lie_lcl_gid_hash,
+ &t->lit_idmaps[LCL_GIDMAP_IDX]
+ [lustre_idmap_hashfunc(lgid)]);
+ }
+ spin_unlock(&t->lit_lock);
+ if (e1 != NULL) {
+ idmap_entry_free(e0);
+ if (IS_ERR(e1))
+ return PTR_ERR(e1);
+ }
+ } else if (IS_ERR(e0)) {
+ return PTR_ERR(e0);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(lustre_idmap_add);
+
+int lustre_idmap_del(struct lustre_idmap_table *t,
+ uid_t ruid, uid_t luid,
+ gid_t rgid, gid_t lgid)
+{
+ struct lustre_idmap_entry *e;
+ int rc = 0;
+
+ LASSERT(t);
+
+ spin_lock(&t->lit_lock);
+ e = idmap_search_entry(t, ruid, luid, rgid, lgid);
+ if (IS_ERR(e))
+ rc = PTR_ERR(e);
+ else if (e)
+ idmap_entry_free(e);
+ spin_unlock(&t->lit_lock);
+
+ return rc;
+}
+EXPORT_SYMBOL(lustre_idmap_del);
+
+int lustre_idmap_lookup_uid(struct lu_ucred *mu,
+ struct lustre_idmap_table *t,
+ int reverse, uid_t uid)
+{
+ struct list_head *hash;
+
+ if (mu && (mu->uc_valid == UCRED_OLD || mu->uc_valid == UCRED_NEW)) {
+ if (!reverse) {
+ if (uid == mu->uc_o_uid)
+ return mu->uc_uid;
+ else if (uid == mu->uc_o_fsuid)
+ return mu->uc_fsuid;
+ } else {
+ if (uid == mu->uc_uid)
+ return mu->uc_o_uid;
+ else if (uid == mu->uc_fsuid)
+ return mu->uc_o_fsuid;
+ }
+ }
+
+ if (t == NULL)
+ return CFS_IDMAP_NOTFOUND;
+
+ hash = t->lit_idmaps[reverse ? LCL_UIDMAP_IDX : RMT_UIDMAP_IDX];
+
+ spin_lock(&t->lit_lock);
+ uid = idmap_lookup_uid(hash, reverse, uid);
+ spin_unlock(&t->lit_lock);
+
+ return uid;
+}
+EXPORT_SYMBOL(lustre_idmap_lookup_uid);
+
+int lustre_idmap_lookup_gid(struct lu_ucred *mu, struct lustre_idmap_table *t,
+ int reverse, gid_t gid)
+{
+ struct list_head *hash;
+
+ if (mu && (mu->uc_valid == UCRED_OLD || mu->uc_valid == UCRED_NEW)) {
+ if (!reverse) {
+ if (gid == mu->uc_o_gid)
+ return mu->uc_gid;
+ else if (gid == mu->uc_o_fsgid)
+ return mu->uc_fsgid;
+ } else {
+ if (gid == mu->uc_gid)
+ return mu->uc_o_gid;
+ else if (gid == mu->uc_fsgid)
+ return mu->uc_o_fsgid;
+ }
+ }
+
+ if (t == NULL)
+ return CFS_IDMAP_NOTFOUND;
+
+ hash = t->lit_idmaps[reverse ? LCL_GIDMAP_IDX : RMT_GIDMAP_IDX];
+
+ spin_lock(&t->lit_lock);
+ gid = idmap_lookup_gid(hash, reverse, gid);
+ spin_unlock(&t->lit_lock);
+
+ return gid;
+}
+EXPORT_SYMBOL(lustre_idmap_lookup_gid);
+
+struct lustre_idmap_table *lustre_idmap_init(void)
+{
+ struct lustre_idmap_table *t;
+ int i, j;
+
+ OBD_ALLOC_PTR(t);
+ if(unlikely(t == NULL))
+ return (ERR_PTR(-ENOMEM));
+
+ spin_lock_init(&t->lit_lock);
+ for (i = 0; i < ARRAY_SIZE(t->lit_idmaps); i++)
+ for (j = 0; j < ARRAY_SIZE(t->lit_idmaps[i]); j++)
+ INIT_LIST_HEAD(&t->lit_idmaps[i][j]);
+
+ return t;
+}
+EXPORT_SYMBOL(lustre_idmap_init);
+
+void lustre_idmap_fini(struct lustre_idmap_table *t)
+{
+ struct list_head *list;
+ struct lustre_idmap_entry *e;
+ int i;
+ LASSERT(t);
+
+ list = t->lit_idmaps[RMT_UIDMAP_IDX];
+ spin_lock(&t->lit_lock);
+ for (i = 0; i < CFS_IDMAP_HASHSIZE; i++)
+ while (!list_empty(&list[i])) {
+ e = list_entry(list[i].next,
+ struct lustre_idmap_entry,
+ lie_rmt_uid_hash);
+ idmap_entry_free(e);
+ }
+ spin_unlock(&t->lit_lock);
+
+ OBD_FREE_PTR(t);
+}
+EXPORT_SYMBOL(lustre_idmap_fini);
diff --git a/drivers/staging/lustre/lustre/obdclass/linkea.c b/drivers/staging/lustre/lustre/obdclass/linkea.c
new file mode 100644
index 000000000000..b5c19ac1470f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/linkea.c
@@ -0,0 +1,194 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2013, Intel Corporation.
+ * Use is subject to license terms.
+ *
+ * Author: Di Wang <di.wang@intel.com>
+ */
+
+#include <lustre/lustre_idl.h>
+#include <obd.h>
+#include <lustre_linkea.h>
+
+int linkea_data_new(struct linkea_data *ldata, struct lu_buf *buf)
+{
+ ldata->ld_buf = lu_buf_check_and_alloc(buf, PAGE_CACHE_SIZE);
+ if (ldata->ld_buf->lb_buf == NULL)
+ return -ENOMEM;
+ ldata->ld_leh = ldata->ld_buf->lb_buf;
+ ldata->ld_leh->leh_magic = LINK_EA_MAGIC;
+ ldata->ld_leh->leh_len = sizeof(struct link_ea_header);
+ ldata->ld_leh->leh_reccount = 0;
+ return 0;
+}
+EXPORT_SYMBOL(linkea_data_new);
+
+int linkea_init(struct linkea_data *ldata)
+{
+ struct link_ea_header *leh;
+
+ LASSERT(ldata->ld_buf != NULL);
+ leh = ldata->ld_buf->lb_buf;
+ if (leh->leh_magic == __swab32(LINK_EA_MAGIC)) {
+ leh->leh_magic = LINK_EA_MAGIC;
+ leh->leh_reccount = __swab32(leh->leh_reccount);
+ leh->leh_len = __swab64(leh->leh_len);
+ /* entries are swabbed by linkea_entry_unpack */
+ }
+ if (leh->leh_magic != LINK_EA_MAGIC)
+ return -EINVAL;
+ if (leh->leh_reccount == 0)
+ return -ENODATA;
+
+ ldata->ld_leh = leh;
+ return 0;
+}
+EXPORT_SYMBOL(linkea_init);
+
+/**
+ * Pack a link_ea_entry.
+ * All elements are stored as chars to avoid alignment issues.
+ * Numbers are always big-endian
+ * \retval record length
+ */
+static int linkea_entry_pack(struct link_ea_entry *lee,
+ const struct lu_name *lname,
+ const struct lu_fid *pfid)
+{
+ struct lu_fid tmpfid;
+ int reclen;
+
+ fid_cpu_to_be(&tmpfid, pfid);
+ if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_CRASH))
+ tmpfid.f_ver = ~0;
+ memcpy(&lee->lee_parent_fid, &tmpfid, sizeof(tmpfid));
+ memcpy(lee->lee_name, lname->ln_name, lname->ln_namelen);
+ reclen = sizeof(struct link_ea_entry) + lname->ln_namelen;
+
+ lee->lee_reclen[0] = (reclen >> 8) & 0xff;
+ lee->lee_reclen[1] = reclen & 0xff;
+ return reclen;
+}
+
+void linkea_entry_unpack(const struct link_ea_entry *lee, int *reclen,
+ struct lu_name *lname, struct lu_fid *pfid)
+{
+ *reclen = (lee->lee_reclen[0] << 8) | lee->lee_reclen[1];
+ memcpy(pfid, &lee->lee_parent_fid, sizeof(*pfid));
+ fid_be_to_cpu(pfid, pfid);
+ lname->ln_name = lee->lee_name;
+ lname->ln_namelen = *reclen - sizeof(struct link_ea_entry);
+}
+EXPORT_SYMBOL(linkea_entry_unpack);
+
+/**
+ * Add a record to the end of link ea buf
+ **/
+int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname,
+ const struct lu_fid *pfid)
+{
+ LASSERT(ldata->ld_leh != NULL);
+
+ if (lname == NULL || pfid == NULL)
+ return -EINVAL;
+
+ ldata->ld_reclen = lname->ln_namelen + sizeof(struct link_ea_entry);
+ if (ldata->ld_leh->leh_len + ldata->ld_reclen >
+ ldata->ld_buf->lb_len) {
+ if (lu_buf_check_and_grow(ldata->ld_buf,
+ ldata->ld_leh->leh_len +
+ ldata->ld_reclen) < 0)
+ return -ENOMEM;
+ }
+
+ ldata->ld_leh = ldata->ld_buf->lb_buf;
+ ldata->ld_lee = ldata->ld_buf->lb_buf + ldata->ld_leh->leh_len;
+ ldata->ld_reclen = linkea_entry_pack(ldata->ld_lee, lname, pfid);
+ ldata->ld_leh->leh_len += ldata->ld_reclen;
+ ldata->ld_leh->leh_reccount++;
+ CDEBUG(D_INODE, "New link_ea name '%.*s' is added\n",
+ lname->ln_namelen, lname->ln_name);
+ return 0;
+}
+EXPORT_SYMBOL(linkea_add_buf);
+
+/** Del the current record from the link ea buf */
+void linkea_del_buf(struct linkea_data *ldata, const struct lu_name *lname)
+{
+ LASSERT(ldata->ld_leh != NULL && ldata->ld_lee != NULL);
+
+ ldata->ld_leh->leh_reccount--;
+ ldata->ld_leh->leh_len -= ldata->ld_reclen;
+ memmove(ldata->ld_lee, (char *)ldata->ld_lee + ldata->ld_reclen,
+ (char *)ldata->ld_leh + ldata->ld_leh->leh_len -
+ (char *)ldata->ld_lee);
+ CDEBUG(D_INODE, "Old link_ea name '%.*s' is removed\n",
+ lname->ln_namelen, lname->ln_name);
+}
+EXPORT_SYMBOL(linkea_del_buf);
+
+/**
+ * Check if such a link exists in linkEA.
+ *
+ * \param ldata link data the search to be done on
+ * \param lname name in the parent's directory entry pointing to this object
+ * \param pfid parent fid the link to be found for
+ *
+ * \retval 0 success
+ * \retval -ENOENT link does not exist
+ * \retval -ve on error
+ */
+int linkea_links_find(struct linkea_data *ldata, const struct lu_name *lname,
+ const struct lu_fid *pfid)
+{
+ struct lu_name tmpname;
+ struct lu_fid tmpfid;
+ int count;
+
+ LASSERT(ldata->ld_leh != NULL);
+
+ /* link #0 */
+ ldata->ld_lee = (struct link_ea_entry *)(ldata->ld_leh + 1);
+
+ for (count = 0; count < ldata->ld_leh->leh_reccount; count++) {
+ linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen,
+ &tmpname, &tmpfid);
+ if (tmpname.ln_namelen == lname->ln_namelen &&
+ lu_fid_eq(&tmpfid, pfid) &&
+ (strncmp(tmpname.ln_name, lname->ln_name,
+ tmpname.ln_namelen) == 0))
+ break;
+ ldata->ld_lee = (struct link_ea_entry *)((char *)ldata->ld_lee +
+ ldata->ld_reclen);
+ }
+
+ if (count == ldata->ld_leh->leh_reccount) {
+ CDEBUG(D_INODE, "Old link_ea name '%.*s' not found\n",
+ lname->ln_namelen, lname->ln_name);
+ ldata->ld_lee = NULL;
+ return -ENOENT;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(linkea_links_find);
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
new file mode 100644
index 000000000000..d2c3072541d1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
@@ -0,0 +1,408 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/linux/linux-module.c
+ *
+ * Object Devices Class Driver
+ * These are the only exported functions, they provide some generic
+ * infrastructure for managing object devices
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/lp.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/fcntl.h>
+#include <linux/delay.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/highmem.h>
+#include <asm/io.h>
+#include <asm/ioctls.h>
+#include <asm/poll.h>
+#include <asm/uaccess.h>
+#include <linux/miscdevice.h>
+#include <linux/seq_file.h>
+
+#include <linux/libcfs/libcfs.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <linux/lnet/lnetctl.h>
+#include <lprocfs_status.h>
+#include <lustre_ver.h>
+#include <lustre/lustre_build_version.h>
+
+int proc_version;
+
+/* buffer MUST be at least the size of obd_ioctl_hdr */
+int obd_ioctl_getdata(char **buf, int *len, void *arg)
+{
+ struct obd_ioctl_hdr hdr;
+ struct obd_ioctl_data *data;
+ int err;
+ int offset = 0;
+ ENTRY;
+
+ err = copy_from_user(&hdr, (void *)arg, sizeof(hdr));
+ if ( err )
+ RETURN(err);
+
+ if (hdr.ioc_version != OBD_IOCTL_VERSION) {
+ CERROR("Version mismatch kernel (%x) vs application (%x)\n",
+ OBD_IOCTL_VERSION, hdr.ioc_version);
+ RETURN(-EINVAL);
+ }
+
+ if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) {
+ CERROR("User buffer len %d exceeds %d max buffer\n",
+ hdr.ioc_len, OBD_MAX_IOCTL_BUFFER);
+ RETURN(-EINVAL);
+ }
+
+ if (hdr.ioc_len < sizeof(struct obd_ioctl_data)) {
+ CERROR("User buffer too small for ioctl (%d)\n", hdr.ioc_len);
+ RETURN(-EINVAL);
+ }
+
+ /* When there are lots of processes calling vmalloc on multi-core
+ * system, the high lock contention will hurt performance badly,
+ * obdfilter-survey is an example, which relies on ioctl. So we'd
+ * better avoid vmalloc on ioctl path. LU-66 */
+ OBD_ALLOC_LARGE(*buf, hdr.ioc_len);
+ if (*buf == NULL) {
+ CERROR("Cannot allocate control buffer of len %d\n",
+ hdr.ioc_len);
+ RETURN(-EINVAL);
+ }
+ *len = hdr.ioc_len;
+ data = (struct obd_ioctl_data *)*buf;
+
+ err = copy_from_user(*buf, (void *)arg, hdr.ioc_len);
+ if ( err ) {
+ OBD_FREE_LARGE(*buf, hdr.ioc_len);
+ RETURN(err);
+ }
+
+ if (obd_ioctl_is_invalid(data)) {
+ CERROR("ioctl not correctly formatted\n");
+ OBD_FREE_LARGE(*buf, hdr.ioc_len);
+ RETURN(-EINVAL);
+ }
+
+ if (data->ioc_inllen1) {
+ data->ioc_inlbuf1 = &data->ioc_bulk[0];
+ offset += cfs_size_round(data->ioc_inllen1);
+ }
+
+ if (data->ioc_inllen2) {
+ data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset;
+ offset += cfs_size_round(data->ioc_inllen2);
+ }
+
+ if (data->ioc_inllen3) {
+ data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset;
+ offset += cfs_size_round(data->ioc_inllen3);
+ }
+
+ if (data->ioc_inllen4) {
+ data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset;
+ }
+
+ EXIT;
+ return 0;
+}
+EXPORT_SYMBOL(obd_ioctl_getdata);
+
+int obd_ioctl_popdata(void *arg, void *data, int len)
+{
+ int err;
+
+ err = copy_to_user(arg, data, len);
+ if (err)
+ err = -EFAULT;
+ return err;
+}
+EXPORT_SYMBOL(obd_ioctl_popdata);
+
+/* opening /dev/obd */
+static int obd_class_open(struct inode * inode, struct file * file)
+{
+ ENTRY;
+
+ try_module_get(THIS_MODULE);
+ RETURN(0);
+}
+
+/* closing /dev/obd */
+static int obd_class_release(struct inode * inode, struct file * file)
+{
+ ENTRY;
+
+ module_put(THIS_MODULE);
+ RETURN(0);
+}
+
+/* to control /dev/obd */
+static long obd_class_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ int err = 0;
+ ENTRY;
+
+ /* Allow non-root access for OBD_IOC_PING_TARGET - used by lfs check */
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN) && (cmd != OBD_IOC_PING_TARGET))
+ RETURN(err = -EACCES);
+ if ((cmd & 0xffffff00) == ((int)'T') << 8) /* ignore all tty ioctls */
+ RETURN(err = -ENOTTY);
+
+ err = class_handle_ioctl(cmd, (unsigned long)arg);
+
+ RETURN(err);
+}
+
+/* declare character device */
+static struct file_operations obd_psdev_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = obd_class_ioctl, /* unlocked_ioctl */
+ .open = obd_class_open, /* open */
+ .release = obd_class_release, /* release */
+};
+
+/* modules setup */
+psdev_t obd_psdev = {
+ .minor = OBD_DEV_MINOR,
+ .name = OBD_DEV_NAME,
+ .fops = &obd_psdev_fops,
+};
+
+
+#ifdef LPROCFS
+int obd_proc_version_seq_show(struct seq_file *m, void *v)
+{
+ return seq_printf(m, "lustre: %s\nkernel: %s\nbuild: %s\n",
+ LUSTRE_VERSION_STRING, "patchless_client",
+ BUILD_VERSION);
+}
+LPROC_SEQ_FOPS_RO(obd_proc_version);
+
+int obd_proc_pinger_seq_show(struct seq_file *m, void *v)
+{
+ return seq_printf(m, "%s\n", "on");
+}
+LPROC_SEQ_FOPS_RO(obd_proc_pinger);
+
+static int obd_proc_health_seq_show(struct seq_file *m, void *v)
+{
+ int rc = 0, i;
+
+ if (libcfs_catastrophe)
+ seq_printf(m, "LBUG\n");
+
+ read_lock(&obd_dev_lock);
+ for (i = 0; i < class_devno_max(); i++) {
+ struct obd_device *obd;
+
+ obd = class_num2obd(i);
+ if (obd == NULL || !obd->obd_attached || !obd->obd_set_up)
+ continue;
+
+ LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+ if (obd->obd_stopping)
+ continue;
+
+ class_incref(obd, __FUNCTION__, current);
+ read_unlock(&obd_dev_lock);
+
+ if (obd_health_check(NULL, obd)) {
+ seq_printf(m, "device %s reported unhealthy\n",
+ obd->obd_name);
+ rc++;
+ }
+ class_decref(obd, __FUNCTION__, current);
+ read_lock(&obd_dev_lock);
+ }
+ read_unlock(&obd_dev_lock);
+
+ if (rc == 0)
+ return seq_printf(m, "healthy\n");
+
+ seq_printf(m, "NOT HEALTHY\n");
+ return 0;
+}
+LPROC_SEQ_FOPS_RO(obd_proc_health);
+
+static int obd_proc_jobid_var_seq_show(struct seq_file *m, void *v)
+{
+ return seq_printf(m, "%s\n", obd_jobid_var);
+}
+
+static ssize_t obd_proc_jobid_var_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ if (!count || count > JOBSTATS_JOBID_VAR_MAX_LEN)
+ return -EINVAL;
+
+ memset(obd_jobid_var, 0, JOBSTATS_JOBID_VAR_MAX_LEN + 1);
+ /* Trim the trailing '\n' if any */
+ memcpy(obd_jobid_var, buffer, count - (buffer[count - 1] == '\n'));
+ return count;
+}
+LPROC_SEQ_FOPS(obd_proc_jobid_var);
+
+/* Root for /proc/fs/lustre */
+struct proc_dir_entry *proc_lustre_root = NULL;
+EXPORT_SYMBOL(proc_lustre_root);
+
+struct lprocfs_vars lprocfs_base[] = {
+ { "version", &obd_proc_version_fops },
+ { "pinger", &obd_proc_pinger_fops },
+ { "health_check", &obd_proc_health_fops },
+ { "jobid_var", &obd_proc_jobid_var_fops },
+ { 0 }
+};
+#else
+#define lprocfs_base NULL
+#endif /* LPROCFS */
+
+static void *obd_device_list_seq_start(struct seq_file *p, loff_t *pos)
+{
+ if (*pos >= class_devno_max())
+ return NULL;
+
+ return pos;
+}
+
+static void obd_device_list_seq_stop(struct seq_file *p, void *v)
+{
+}
+
+static void *obd_device_list_seq_next(struct seq_file *p, void *v, loff_t *pos)
+{
+ ++*pos;
+ if (*pos >= class_devno_max())
+ return NULL;
+
+ return pos;
+}
+
+static int obd_device_list_seq_show(struct seq_file *p, void *v)
+{
+ loff_t index = *(loff_t *)v;
+ struct obd_device *obd = class_num2obd((int)index);
+ char *status;
+
+ if (obd == NULL)
+ return 0;
+
+ LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+ if (obd->obd_stopping)
+ status = "ST";
+ else if (obd->obd_inactive)
+ status = "IN";
+ else if (obd->obd_set_up)
+ status = "UP";
+ else if (obd->obd_attached)
+ status = "AT";
+ else
+ status = "--";
+
+ return seq_printf(p, "%3d %s %s %s %s %d\n",
+ (int)index, status, obd->obd_type->typ_name,
+ obd->obd_name, obd->obd_uuid.uuid,
+ atomic_read(&obd->obd_refcount));
+}
+
+struct seq_operations obd_device_list_sops = {
+ .start = obd_device_list_seq_start,
+ .stop = obd_device_list_seq_stop,
+ .next = obd_device_list_seq_next,
+ .show = obd_device_list_seq_show,
+};
+
+static int obd_device_list_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int rc = seq_open(file, &obd_device_list_sops);
+
+ if (rc)
+ return rc;
+
+ seq = file->private_data;
+ seq->private = PDE_DATA(inode);
+
+ return 0;
+}
+
+struct file_operations obd_device_list_fops = {
+ .owner = THIS_MODULE,
+ .open = obd_device_list_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+int class_procfs_init(void)
+{
+ int rc;
+ ENTRY;
+
+ obd_sysctl_init();
+ proc_lustre_root = lprocfs_register("fs/lustre", NULL,
+ lprocfs_base, NULL);
+ rc = lprocfs_seq_create(proc_lustre_root, "devices", 0444,
+ &obd_device_list_fops, NULL);
+ if (rc)
+ CERROR("error adding /proc/fs/lustre/devices file\n");
+ RETURN(0);
+}
+
+int class_procfs_clean(void)
+{
+ ENTRY;
+ if (proc_lustre_root) {
+ lprocfs_remove(&proc_lustre_root);
+ }
+ RETURN(0);
+}
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
new file mode 100644
index 000000000000..6ee347153a16
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
@@ -0,0 +1,222 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/linux/linux-obdo.c
+ *
+ * Object Devices Class Driver
+ * These are the only exported functions, they provide some generic
+ * infrastructure for managing object devices
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/module.h>
+#include <obd_class.h>
+#include <lustre/lustre_idl.h>
+
+#include <linux/fs.h>
+#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
+
+/*FIXME: Just copy from obdo_from_inode*/
+void obdo_from_la(struct obdo *dst, struct lu_attr *la, __u64 valid)
+{
+ obd_flag newvalid = 0;
+
+ if (valid & LA_ATIME) {
+ dst->o_atime = la->la_atime;
+ newvalid |= OBD_MD_FLATIME;
+ }
+ if (valid & LA_MTIME) {
+ dst->o_mtime = la->la_mtime;
+ newvalid |= OBD_MD_FLMTIME;
+ }
+ if (valid & LA_CTIME) {
+ dst->o_ctime = la->la_ctime;
+ newvalid |= OBD_MD_FLCTIME;
+ }
+ if (valid & LA_SIZE) {
+ dst->o_size = la->la_size;
+ newvalid |= OBD_MD_FLSIZE;
+ }
+ if (valid & LA_BLOCKS) { /* allocation of space (x512 bytes) */
+ dst->o_blocks = la->la_blocks;
+ newvalid |= OBD_MD_FLBLOCKS;
+ }
+ if (valid & LA_TYPE) {
+ dst->o_mode = (dst->o_mode & S_IALLUGO) |
+ (la->la_mode & S_IFMT);
+ newvalid |= OBD_MD_FLTYPE;
+ }
+ if (valid & LA_MODE) {
+ dst->o_mode = (dst->o_mode & S_IFMT) |
+ (la->la_mode & S_IALLUGO);
+ newvalid |= OBD_MD_FLMODE;
+ }
+ if (valid & LA_UID) {
+ dst->o_uid = la->la_uid;
+ newvalid |= OBD_MD_FLUID;
+ }
+ if (valid & LA_GID) {
+ dst->o_gid = la->la_gid;
+ newvalid |= OBD_MD_FLGID;
+ }
+ dst->o_valid |= newvalid;
+}
+EXPORT_SYMBOL(obdo_from_la);
+
+/*FIXME: Just copy from obdo_from_inode*/
+void la_from_obdo(struct lu_attr *dst, struct obdo *obdo, obd_flag valid)
+{
+ __u64 newvalid = 0;
+
+ valid &= obdo->o_valid;
+
+ if (valid & OBD_MD_FLATIME) {
+ dst->la_atime = obdo->o_atime;
+ newvalid |= LA_ATIME;
+ }
+ if (valid & OBD_MD_FLMTIME) {
+ dst->la_mtime = obdo->o_mtime;
+ newvalid |= LA_MTIME;
+ }
+ if (valid & OBD_MD_FLCTIME) {
+ dst->la_ctime = obdo->o_ctime;
+ newvalid |= LA_CTIME;
+ }
+ if (valid & OBD_MD_FLSIZE) {
+ dst->la_size = obdo->o_size;
+ newvalid |= LA_SIZE;
+ }
+ if (valid & OBD_MD_FLBLOCKS) {
+ dst->la_blocks = obdo->o_blocks;
+ newvalid |= LA_BLOCKS;
+ }
+ if (valid & OBD_MD_FLTYPE) {
+ dst->la_mode = (dst->la_mode & S_IALLUGO) |
+ (obdo->o_mode & S_IFMT);
+ newvalid |= LA_TYPE;
+ }
+ if (valid & OBD_MD_FLMODE) {
+ dst->la_mode = (dst->la_mode & S_IFMT) |
+ (obdo->o_mode & S_IALLUGO);
+ newvalid |= LA_MODE;
+ }
+ if (valid & OBD_MD_FLUID) {
+ dst->la_uid = obdo->o_uid;
+ newvalid |= LA_UID;
+ }
+ if (valid & OBD_MD_FLGID) {
+ dst->la_gid = obdo->o_gid;
+ newvalid |= LA_GID;
+ }
+ dst->la_valid = newvalid;
+}
+EXPORT_SYMBOL(la_from_obdo);
+
+void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid)
+{
+ valid &= src->o_valid;
+
+ if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
+ CDEBUG(D_INODE,
+ "valid "LPX64", cur time %lu/%lu, new "LPU64"/"LPU64"\n",
+ src->o_valid, LTIME_S(dst->i_mtime),
+ LTIME_S(dst->i_ctime), src->o_mtime, src->o_ctime);
+
+ if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(dst->i_atime))
+ LTIME_S(dst->i_atime) = src->o_atime;
+ if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(dst->i_mtime))
+ LTIME_S(dst->i_mtime) = src->o_mtime;
+ if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
+ LTIME_S(dst->i_ctime) = src->o_ctime;
+ if (valid & OBD_MD_FLSIZE)
+ i_size_write(dst, src->o_size);
+ /* optimum IO size */
+ if (valid & OBD_MD_FLBLKSZ && src->o_blksize > (1 << dst->i_blkbits))
+ dst->i_blkbits = ffs(src->o_blksize) - 1;
+
+ if (dst->i_blkbits < PAGE_CACHE_SHIFT)
+ dst->i_blkbits = PAGE_CACHE_SHIFT;
+
+ /* allocation of space */
+ if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks)
+ /*
+ * XXX shouldn't overflow be checked here like in
+ * obdo_to_inode().
+ */
+ dst->i_blocks = src->o_blocks;
+}
+EXPORT_SYMBOL(obdo_refresh_inode);
+
+void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
+{
+ valid &= src->o_valid;
+
+ LASSERTF(!(valid & (OBD_MD_FLTYPE | OBD_MD_FLGENER | OBD_MD_FLFID |
+ OBD_MD_FLID | OBD_MD_FLGROUP)),
+ "object "DOSTID", valid %x\n", POSTID(&src->o_oi), valid);
+
+ if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
+ CDEBUG(D_INODE,
+ "valid "LPX64", cur time %lu/%lu, new "LPU64"/"LPU64"\n",
+ src->o_valid, LTIME_S(dst->i_mtime),
+ LTIME_S(dst->i_ctime), src->o_mtime, src->o_ctime);
+
+ if (valid & OBD_MD_FLATIME)
+ LTIME_S(dst->i_atime) = src->o_atime;
+ if (valid & OBD_MD_FLMTIME)
+ LTIME_S(dst->i_mtime) = src->o_mtime;
+ if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
+ LTIME_S(dst->i_ctime) = src->o_ctime;
+ if (valid & OBD_MD_FLSIZE)
+ i_size_write(dst, src->o_size);
+ if (valid & OBD_MD_FLBLOCKS) { /* allocation of space */
+ dst->i_blocks = src->o_blocks;
+ if (dst->i_blocks < src->o_blocks) /* overflow */
+ dst->i_blocks = -1;
+
+ }
+ if (valid & OBD_MD_FLBLKSZ)
+ dst->i_blkbits = ffs(src->o_blksize)-1;
+ if (valid & OBD_MD_FLMODE)
+ dst->i_mode = (dst->i_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
+ if (valid & OBD_MD_FLUID)
+ dst->i_uid = src->o_uid;
+ if (valid & OBD_MD_FLGID)
+ dst->i_gid = src->o_gid;
+ if (valid & OBD_MD_FLFLAGS)
+ dst->i_flags = src->o_flags;
+}
+EXPORT_SYMBOL(obdo_to_inode);
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
new file mode 100644
index 000000000000..46aad6813cab
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
@@ -0,0 +1,445 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/sysctl.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/version.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/ctype.h>
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <linux/utsname.h>
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <obd_support.h>
+#include <lprocfs_status.h>
+
+#ifdef CONFIG_SYSCTL
+ctl_table_header_t *obd_table_header = NULL;
+#endif
+
+
+#define OBD_SYSCTL 300
+
+enum {
+ OBD_TIMEOUT = 3, /* RPC timeout before recovery/intr */
+ OBD_DUMP_ON_TIMEOUT, /* dump kernel debug log upon eviction */
+ OBD_MEMUSED, /* bytes currently OBD_ALLOCated */
+ OBD_PAGESUSED, /* pages currently OBD_PAGE_ALLOCated */
+ OBD_MAXMEMUSED, /* maximum bytes OBD_ALLOCated concurrently */
+ OBD_MAXPAGESUSED, /* maximum pages OBD_PAGE_ALLOCated concurrently */
+ OBD_SYNCFILTER, /* XXX temporary, as we play with sync osts.. */
+ OBD_LDLM_TIMEOUT, /* LDLM timeout for ASTs before client eviction */
+ OBD_DUMP_ON_EVICTION, /* dump kernel debug log upon eviction */
+ OBD_DEBUG_PEER_ON_TIMEOUT, /* dump peer debug when RPC times out */
+ OBD_ALLOC_FAIL_RATE, /* memory allocation random failure rate */
+ OBD_MAX_DIRTY_PAGES, /* maximum dirty pages */
+ OBD_AT_MIN, /* Adaptive timeouts params */
+ OBD_AT_MAX,
+ OBD_AT_EXTRA,
+ OBD_AT_EARLY_MARGIN,
+ OBD_AT_HISTORY,
+};
+
+
+int LL_PROC_PROTO(proc_set_timeout)
+{
+ int rc;
+
+ rc = ll_proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ if (ldlm_timeout >= obd_timeout)
+ ldlm_timeout = max(obd_timeout / 3, 1U);
+ return rc;
+}
+
+int LL_PROC_PROTO(proc_memory_alloc)
+{
+ char buf[22];
+ int len;
+ DECLARE_LL_PROC_PPOS_DECL;
+
+ if (!*lenp || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+ if (write)
+ return -EINVAL;
+
+ len = snprintf(buf, sizeof(buf), LPU64"\n", obd_memory_sum());
+ if (len > *lenp)
+ len = *lenp;
+ buf[len] = '\0';
+ if (copy_to_user(buffer, buf, len))
+ return -EFAULT;
+ *lenp = len;
+ *ppos += *lenp;
+ return 0;
+}
+
+int LL_PROC_PROTO(proc_pages_alloc)
+{
+ char buf[22];
+ int len;
+ DECLARE_LL_PROC_PPOS_DECL;
+
+ if (!*lenp || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+ if (write)
+ return -EINVAL;
+
+ len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_sum());
+ if (len > *lenp)
+ len = *lenp;
+ buf[len] = '\0';
+ if (copy_to_user(buffer, buf, len))
+ return -EFAULT;
+ *lenp = len;
+ *ppos += *lenp;
+ return 0;
+}
+
+int LL_PROC_PROTO(proc_mem_max)
+{
+ char buf[22];
+ int len;
+ DECLARE_LL_PROC_PPOS_DECL;
+
+ if (!*lenp || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+ if (write)
+ return -EINVAL;
+
+ len = snprintf(buf, sizeof(buf), LPU64"\n", obd_memory_max());
+ if (len > *lenp)
+ len = *lenp;
+ buf[len] = '\0';
+ if (copy_to_user(buffer, buf, len))
+ return -EFAULT;
+ *lenp = len;
+ *ppos += *lenp;
+ return 0;
+}
+
+int LL_PROC_PROTO(proc_pages_max)
+{
+ char buf[22];
+ int len;
+ DECLARE_LL_PROC_PPOS_DECL;
+
+ if (!*lenp || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+ if (write)
+ return -EINVAL;
+
+ len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_max());
+ if (len > *lenp)
+ len = *lenp;
+ buf[len] = '\0';
+ if (copy_to_user(buffer, buf, len))
+ return -EFAULT;
+ *lenp = len;
+ *ppos += *lenp;
+ return 0;
+}
+
+int LL_PROC_PROTO(proc_max_dirty_pages_in_mb)
+{
+ int rc = 0;
+ DECLARE_LL_PROC_PPOS_DECL;
+
+ if (!table->data || !table->maxlen || !*lenp || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+ if (write) {
+ rc = lprocfs_write_frac_helper(buffer, *lenp,
+ (unsigned int*)table->data,
+ 1 << (20 - PAGE_CACHE_SHIFT));
+ /* Don't allow them to let dirty pages exceed 90% of system
+ * memory and set a hard minimum of 4MB. */
+ if (obd_max_dirty_pages > ((num_physpages / 10) * 9)) {
+ CERROR("Refusing to set max dirty pages to %u, which "
+ "is more than 90%% of available RAM; setting "
+ "to %lu\n", obd_max_dirty_pages,
+ ((num_physpages / 10) * 9));
+ obd_max_dirty_pages = ((num_physpages / 10) * 9);
+ } else if (obd_max_dirty_pages < 4 << (20 - PAGE_CACHE_SHIFT)) {
+ obd_max_dirty_pages = 4 << (20 - PAGE_CACHE_SHIFT);
+ }
+ } else {
+ char buf[21];
+ int len;
+
+ len = lprocfs_read_frac_helper(buf, sizeof(buf),
+ *(unsigned int*)table->data,
+ 1 << (20 - PAGE_CACHE_SHIFT));
+ if (len > *lenp)
+ len = *lenp;
+ buf[len] = '\0';
+ if (copy_to_user(buffer, buf, len))
+ return -EFAULT;
+ *lenp = len;
+ }
+ *ppos += *lenp;
+ return rc;
+}
+
+int LL_PROC_PROTO(proc_alloc_fail_rate)
+{
+ int rc = 0;
+ DECLARE_LL_PROC_PPOS_DECL;
+
+ if (!table->data || !table->maxlen || !*lenp || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+ if (write) {
+ rc = lprocfs_write_frac_helper(buffer, *lenp,
+ (unsigned int*)table->data,
+ OBD_ALLOC_FAIL_MULT);
+ } else {
+ char buf[21];
+ int len;
+
+ len = lprocfs_read_frac_helper(buf, 21,
+ *(unsigned int*)table->data,
+ OBD_ALLOC_FAIL_MULT);
+ if (len > *lenp)
+ len = *lenp;
+ buf[len] = '\0';
+ if (copy_to_user(buffer, buf, len))
+ return -EFAULT;
+ *lenp = len;
+ }
+ *ppos += *lenp;
+ return rc;
+}
+
+int LL_PROC_PROTO(proc_at_min)
+{
+ return ll_proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
+int LL_PROC_PROTO(proc_at_max)
+{
+ return ll_proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
+int LL_PROC_PROTO(proc_at_extra)
+{
+ return ll_proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
+int LL_PROC_PROTO(proc_at_early_margin)
+{
+ return ll_proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
+int LL_PROC_PROTO(proc_at_history)
+{
+ return ll_proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
+
+#ifdef CONFIG_SYSCTL
+static ctl_table_t obd_table[] = {
+ {
+ INIT_CTL_NAME(OBD_TIMEOUT)
+ .procname = "timeout",
+ .data = &obd_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_set_timeout
+ },
+ {
+ INIT_CTL_NAME(OBD_DEBUG_PEER_ON_TIMEOUT)
+ .procname = "debug_peer_on_timeout",
+ .data = &obd_debug_peer_on_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ INIT_CTL_NAME(OBD_DUMP_ON_TIMEOUT)
+ .procname = "dump_on_timeout",
+ .data = &obd_dump_on_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ INIT_CTL_NAME(OBD_DUMP_ON_EVICTION)
+ .procname = "dump_on_eviction",
+ .data = &obd_dump_on_eviction,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ INIT_CTL_NAME(OBD_MEMUSED)
+ .procname = "memused",
+ .data = NULL,
+ .maxlen = 0,
+ .mode = 0444,
+ .proc_handler = &proc_memory_alloc
+ },
+ {
+ INIT_CTL_NAME(OBD_PAGESUSED)
+ .procname = "pagesused",
+ .data = NULL,
+ .maxlen = 0,
+ .mode = 0444,
+ .proc_handler = &proc_pages_alloc
+ },
+ {
+ INIT_CTL_NAME(OBD_MAXMEMUSED)
+ .procname = "memused_max",
+ .data = NULL,
+ .maxlen = 0,
+ .mode = 0444,
+ .proc_handler = &proc_mem_max
+ },
+ {
+ INIT_CTL_NAME(OBD_MAXPAGESUSED)
+ .procname = "pagesused_max",
+ .data = NULL,
+ .maxlen = 0,
+ .mode = 0444,
+ .proc_handler = &proc_pages_max
+ },
+ {
+ INIT_CTL_NAME(OBD_LDLM_TIMEOUT)
+ .procname = "ldlm_timeout",
+ .data = &ldlm_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_set_timeout
+ },
+ {
+ INIT_CTL_NAME(OBD_ALLOC_FAIL_RATE)
+ .procname = "alloc_fail_rate",
+ .data = &obd_alloc_fail_rate,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_alloc_fail_rate
+ },
+ {
+ INIT_CTL_NAME(OBD_MAX_DIRTY_PAGES)
+ .procname = "max_dirty_mb",
+ .data = &obd_max_dirty_pages,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_max_dirty_pages_in_mb
+ },
+ {
+ INIT_CTL_NAME(OBD_AT_MIN)
+ .procname = "at_min",
+ .data = &at_min,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_at_min
+ },
+ {
+ INIT_CTL_NAME(OBD_AT_MAX)
+ .procname = "at_max",
+ .data = &at_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_at_max
+ },
+ {
+ INIT_CTL_NAME(OBD_AT_EXTRA)
+ .procname = "at_extra",
+ .data = &at_extra,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_at_extra
+ },
+ {
+ INIT_CTL_NAME(OBD_AT_EARLY_MARGIN)
+ .procname = "at_early_margin",
+ .data = &at_early_margin,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_at_early_margin
+ },
+ {
+ INIT_CTL_NAME(OBD_AT_HISTORY)
+ .procname = "at_history",
+ .data = &at_history,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_at_history
+ },
+ { INIT_CTL_NAME(0) }
+};
+
+static ctl_table_t parent_table[] = {
+ {
+ INIT_CTL_NAME(OBD_SYSCTL)
+ .procname = "lustre",
+ .data = NULL,
+ .maxlen = 0,
+ .mode = 0555,
+ .child = obd_table
+ },
+ { INIT_CTL_NAME(0) }
+};
+#endif
+
+void obd_sysctl_init (void)
+{
+#ifdef CONFIG_SYSCTL
+ if ( !obd_table_header )
+ obd_table_header = cfs_register_sysctl_table(parent_table, 0);
+#endif
+}
+
+void obd_sysctl_clean (void)
+{
+#ifdef CONFIG_SYSCTL
+ if ( obd_table_header )
+ unregister_sysctl_table(obd_table_header);
+ obd_table_header = NULL;
+#endif
+}
diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c
new file mode 100644
index 000000000000..b1d215e56991
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/llog.c
@@ -0,0 +1,966 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/llog.c
+ *
+ * OST<->MDS recovery logging infrastructure.
+ * Invariants in implementation:
+ * - we do not share logs among different OST<->MDS connections, so that
+ * if an OST or MDS fails it need only look at log(s) relevant to itself
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ * Author: Alex Zhuravlev <bzzz@whamcloud.com>
+ * Author: Mikhail Pershin <tappro@whamcloud.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+
+#include <obd_class.h>
+#include <lustre_log.h>
+#include "llog_internal.h"
+
+/*
+ * Allocate a new log or catalog handle
+ * Used inside llog_open().
+ */
+struct llog_handle *llog_alloc_handle(void)
+{
+ struct llog_handle *loghandle;
+
+ OBD_ALLOC_PTR(loghandle);
+ if (loghandle == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ init_rwsem(&loghandle->lgh_lock);
+ spin_lock_init(&loghandle->lgh_hdr_lock);
+ INIT_LIST_HEAD(&loghandle->u.phd.phd_entry);
+ atomic_set(&loghandle->lgh_refcount, 1);
+
+ return loghandle;
+}
+
+/*
+ * Free llog handle and header data if exists. Used in llog_close() only
+ */
+void llog_free_handle(struct llog_handle *loghandle)
+{
+ LASSERT(loghandle != NULL);
+
+ /* failed llog_init_handle */
+ if (!loghandle->lgh_hdr)
+ goto out;
+
+ if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)
+ LASSERT(list_empty(&loghandle->u.phd.phd_entry));
+ else if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)
+ LASSERT(list_empty(&loghandle->u.chd.chd_head));
+ LASSERT(sizeof(*(loghandle->lgh_hdr)) == LLOG_CHUNK_SIZE);
+ OBD_FREE(loghandle->lgh_hdr, LLOG_CHUNK_SIZE);
+out:
+ OBD_FREE_PTR(loghandle);
+}
+
+void llog_handle_get(struct llog_handle *loghandle)
+{
+ atomic_inc(&loghandle->lgh_refcount);
+}
+
+void llog_handle_put(struct llog_handle *loghandle)
+{
+ LASSERT(atomic_read(&loghandle->lgh_refcount) > 0);
+ if (atomic_dec_and_test(&loghandle->lgh_refcount))
+ llog_free_handle(loghandle);
+}
+
+/* returns negative on error; 0 if success; 1 if success & log destroyed */
+int llog_cancel_rec(const struct lu_env *env, struct llog_handle *loghandle,
+ int index)
+{
+ struct llog_log_hdr *llh = loghandle->lgh_hdr;
+ int rc = 0;
+ ENTRY;
+
+ CDEBUG(D_RPCTRACE, "Canceling %d in log "DOSTID"\n",
+ index, POSTID(&loghandle->lgh_id.lgl_oi));
+
+ if (index == 0) {
+ CERROR("Can't cancel index 0 which is header\n");
+ RETURN(-EINVAL);
+ }
+
+ spin_lock(&loghandle->lgh_hdr_lock);
+ if (!ext2_clear_bit(index, llh->llh_bitmap)) {
+ spin_unlock(&loghandle->lgh_hdr_lock);
+ CDEBUG(D_RPCTRACE, "Catalog index %u already clear?\n", index);
+ RETURN(-ENOENT);
+ }
+
+ llh->llh_count--;
+
+ if ((llh->llh_flags & LLOG_F_ZAP_WHEN_EMPTY) &&
+ (llh->llh_count == 1) &&
+ (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) {
+ spin_unlock(&loghandle->lgh_hdr_lock);
+ rc = llog_destroy(env, loghandle);
+ if (rc < 0) {
+ CERROR("%s: can't destroy empty llog #"DOSTID
+ "#%08x: rc = %d\n",
+ loghandle->lgh_ctxt->loc_obd->obd_name,
+ POSTID(&loghandle->lgh_id.lgl_oi),
+ loghandle->lgh_id.lgl_ogen, rc);
+ GOTO(out_err, rc);
+ }
+ RETURN(1);
+ }
+ spin_unlock(&loghandle->lgh_hdr_lock);
+
+ rc = llog_write(env, loghandle, &llh->llh_hdr, NULL, 0, NULL, 0);
+ if (rc < 0) {
+ CERROR("%s: fail to write header for llog #"DOSTID
+ "#%08x: rc = %d\n",
+ loghandle->lgh_ctxt->loc_obd->obd_name,
+ POSTID(&loghandle->lgh_id.lgl_oi),
+ loghandle->lgh_id.lgl_ogen, rc);
+ GOTO(out_err, rc);
+ }
+ RETURN(0);
+out_err:
+ spin_lock(&loghandle->lgh_hdr_lock);
+ ext2_set_bit(index, llh->llh_bitmap);
+ llh->llh_count++;
+ spin_unlock(&loghandle->lgh_hdr_lock);
+ return rc;
+}
+EXPORT_SYMBOL(llog_cancel_rec);
+
+static int llog_read_header(const struct lu_env *env,
+ struct llog_handle *handle,
+ struct obd_uuid *uuid)
+{
+ struct llog_operations *lop;
+ int rc;
+
+ rc = llog_handle2ops(handle, &lop);
+ if (rc)
+ RETURN(rc);
+
+ if (lop->lop_read_header == NULL)
+ RETURN(-EOPNOTSUPP);
+
+ rc = lop->lop_read_header(env, handle);
+ if (rc == LLOG_EEMPTY) {
+ struct llog_log_hdr *llh = handle->lgh_hdr;
+
+ handle->lgh_last_idx = 0; /* header is record with index 0 */
+ llh->llh_count = 1; /* for the header record */
+ llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
+ llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
+ llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
+ llh->llh_timestamp = cfs_time_current_sec();
+ if (uuid)
+ memcpy(&llh->llh_tgtuuid, uuid,
+ sizeof(llh->llh_tgtuuid));
+ llh->llh_bitmap_offset = offsetof(typeof(*llh), llh_bitmap);
+ ext2_set_bit(0, llh->llh_bitmap);
+ rc = 0;
+ }
+ return rc;
+}
+
+int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
+ int flags, struct obd_uuid *uuid)
+{
+ struct llog_log_hdr *llh;
+ int rc;
+
+ ENTRY;
+ LASSERT(handle->lgh_hdr == NULL);
+
+ OBD_ALLOC_PTR(llh);
+ if (llh == NULL)
+ RETURN(-ENOMEM);
+ handle->lgh_hdr = llh;
+ /* first assign flags to use llog_client_ops */
+ llh->llh_flags = flags;
+ rc = llog_read_header(env, handle, uuid);
+ if (rc == 0) {
+ if (unlikely((llh->llh_flags & LLOG_F_IS_PLAIN &&
+ flags & LLOG_F_IS_CAT) ||
+ (llh->llh_flags & LLOG_F_IS_CAT &&
+ flags & LLOG_F_IS_PLAIN))) {
+ CERROR("%s: llog type is %s but initializing %s\n",
+ handle->lgh_ctxt->loc_obd->obd_name,
+ llh->llh_flags & LLOG_F_IS_CAT ?
+ "catalog" : "plain",
+ flags & LLOG_F_IS_CAT ? "catalog" : "plain");
+ GOTO(out, rc = -EINVAL);
+ } else if (llh->llh_flags &
+ (LLOG_F_IS_PLAIN | LLOG_F_IS_CAT)) {
+ /*
+ * it is possible to open llog without specifying llog
+ * type so it is taken from llh_flags
+ */
+ flags = llh->llh_flags;
+ } else {
+ /* for some reason the llh_flags has no type set */
+ CERROR("llog type is not specified!\n");
+ GOTO(out, rc = -EINVAL);
+ }
+ if (unlikely(uuid &&
+ !obd_uuid_equals(uuid, &llh->llh_tgtuuid))) {
+ CERROR("%s: llog uuid mismatch: %s/%s\n",
+ handle->lgh_ctxt->loc_obd->obd_name,
+ (char *)uuid->uuid,
+ (char *)llh->llh_tgtuuid.uuid);
+ GOTO(out, rc = -EEXIST);
+ }
+ }
+ if (flags & LLOG_F_IS_CAT) {
+ LASSERT(list_empty(&handle->u.chd.chd_head));
+ INIT_LIST_HEAD(&handle->u.chd.chd_head);
+ llh->llh_size = sizeof(struct llog_logid_rec);
+ } else if (!(flags & LLOG_F_IS_PLAIN)) {
+ CERROR("%s: unknown flags: %#x (expected %#x or %#x)\n",
+ handle->lgh_ctxt->loc_obd->obd_name,
+ flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
+ rc = -EINVAL;
+ }
+out:
+ if (rc) {
+ OBD_FREE_PTR(llh);
+ handle->lgh_hdr = NULL;
+ }
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_init_handle);
+
+int llog_copy_handler(const struct lu_env *env,
+ struct llog_handle *llh,
+ struct llog_rec_hdr *rec,
+ void *data)
+{
+ struct llog_rec_hdr local_rec = *rec;
+ struct llog_handle *local_llh = (struct llog_handle *)data;
+ char *cfg_buf = (char*) (rec + 1);
+ struct lustre_cfg *lcfg;
+ int rc = 0;
+ ENTRY;
+
+ /* Append all records */
+ local_rec.lrh_len -= sizeof(*rec) + sizeof(struct llog_rec_tail);
+ rc = llog_write(env, local_llh, &local_rec, NULL, 0,
+ (void *)cfg_buf, -1);
+
+ lcfg = (struct lustre_cfg *)cfg_buf;
+ CDEBUG(D_INFO, "idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
+ rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
+ lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_copy_handler);
+
+static int llog_process_thread(void *arg)
+{
+ struct llog_process_info *lpi = arg;
+ struct llog_handle *loghandle = lpi->lpi_loghandle;
+ struct llog_log_hdr *llh = loghandle->lgh_hdr;
+ struct llog_process_cat_data *cd = lpi->lpi_catdata;
+ char *buf;
+ __u64 cur_offset = LLOG_CHUNK_SIZE;
+ __u64 last_offset;
+ int rc = 0, index = 1, last_index;
+ int saved_index = 0;
+ int last_called_index = 0;
+
+ ENTRY;
+
+ LASSERT(llh);
+
+ OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
+ if (!buf) {
+ lpi->lpi_rc = -ENOMEM;
+ RETURN(0);
+ }
+
+ if (cd != NULL) {
+ last_called_index = cd->lpcd_first_idx;
+ index = cd->lpcd_first_idx + 1;
+ }
+ if (cd != NULL && cd->lpcd_last_idx)
+ last_index = cd->lpcd_last_idx;
+ else
+ last_index = LLOG_BITMAP_BYTES * 8 - 1;
+
+ while (rc == 0) {
+ struct llog_rec_hdr *rec;
+
+ /* skip records not set in bitmap */
+ while (index <= last_index &&
+ !ext2_test_bit(index, llh->llh_bitmap))
+ ++index;
+
+ LASSERT(index <= last_index + 1);
+ if (index == last_index + 1)
+ break;
+repeat:
+ CDEBUG(D_OTHER, "index: %d last_index %d\n",
+ index, last_index);
+
+ /* get the buf with our target record; avoid old garbage */
+ memset(buf, 0, LLOG_CHUNK_SIZE);
+ last_offset = cur_offset;
+ rc = llog_next_block(lpi->lpi_env, loghandle, &saved_index,
+ index, &cur_offset, buf, LLOG_CHUNK_SIZE);
+ if (rc)
+ GOTO(out, rc);
+
+ /* NB: when rec->lrh_len is accessed it is already swabbed
+ * since it is used at the "end" of the loop and the rec
+ * swabbing is done at the beginning of the loop. */
+ for (rec = (struct llog_rec_hdr *)buf;
+ (char *)rec < buf + LLOG_CHUNK_SIZE;
+ rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)){
+
+ CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
+ rec, rec->lrh_type);
+
+ if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
+ lustre_swab_llog_rec(rec);
+
+ CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n",
+ rec->lrh_type, rec->lrh_index);
+
+ if (rec->lrh_index == 0) {
+ /* probably another rec just got added? */
+ if (index <= loghandle->lgh_last_idx)
+ GOTO(repeat, rc = 0);
+ GOTO(out, rc = 0); /* no more records */
+ }
+ if (rec->lrh_len == 0 ||
+ rec->lrh_len > LLOG_CHUNK_SIZE) {
+ CWARN("invalid length %d in llog record for "
+ "index %d/%d\n", rec->lrh_len,
+ rec->lrh_index, index);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ if (rec->lrh_index < index) {
+ CDEBUG(D_OTHER, "skipping lrh_index %d\n",
+ rec->lrh_index);
+ continue;
+ }
+
+ CDEBUG(D_OTHER,
+ "lrh_index: %d lrh_len: %d (%d remains)\n",
+ rec->lrh_index, rec->lrh_len,
+ (int)(buf + LLOG_CHUNK_SIZE - (char *)rec));
+
+ loghandle->lgh_cur_idx = rec->lrh_index;
+ loghandle->lgh_cur_offset = (char *)rec - (char *)buf +
+ last_offset;
+
+ /* if set, process the callback on this record */
+ if (ext2_test_bit(index, llh->llh_bitmap)) {
+ rc = lpi->lpi_cb(lpi->lpi_env, loghandle, rec,
+ lpi->lpi_cbdata);
+ last_called_index = index;
+ if (rc == LLOG_PROC_BREAK) {
+ GOTO(out, rc);
+ } else if (rc == LLOG_DEL_RECORD) {
+ llog_cancel_rec(lpi->lpi_env,
+ loghandle,
+ rec->lrh_index);
+ rc = 0;
+ }
+ if (rc)
+ GOTO(out, rc);
+ } else {
+ CDEBUG(D_OTHER, "Skipped index %d\n", index);
+ }
+
+ /* next record, still in buffer? */
+ ++index;
+ if (index > last_index)
+ GOTO(out, rc = 0);
+ }
+ }
+
+out:
+ if (cd != NULL)
+ cd->lpcd_last_idx = last_called_index;
+
+ OBD_FREE(buf, LLOG_CHUNK_SIZE);
+ lpi->lpi_rc = rc;
+ return 0;
+}
+
+static int llog_process_thread_daemonize(void *arg)
+{
+ struct llog_process_info *lpi = arg;
+ struct lu_env env;
+ int rc;
+
+ unshare_fs_struct();
+
+ /* client env has no keys, tags is just 0 */
+ rc = lu_env_init(&env, LCT_LOCAL | LCT_MG_THREAD);
+ if (rc)
+ goto out;
+ lpi->lpi_env = &env;
+
+ rc = llog_process_thread(arg);
+
+ lu_env_fini(&env);
+out:
+ complete(&lpi->lpi_completion);
+ return rc;
+}
+
+int llog_process_or_fork(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ llog_cb_t cb, void *data, void *catdata, bool fork)
+{
+ struct llog_process_info *lpi;
+ int rc;
+
+ ENTRY;
+
+ OBD_ALLOC_PTR(lpi);
+ if (lpi == NULL) {
+ CERROR("cannot alloc pointer\n");
+ RETURN(-ENOMEM);
+ }
+ lpi->lpi_loghandle = loghandle;
+ lpi->lpi_cb = cb;
+ lpi->lpi_cbdata = data;
+ lpi->lpi_catdata = catdata;
+
+ if (fork) {
+ /* The new thread can't use parent env,
+ * init the new one in llog_process_thread_daemonize. */
+ lpi->lpi_env = NULL;
+ init_completion(&lpi->lpi_completion);
+ rc = PTR_ERR(kthread_run(llog_process_thread_daemonize, lpi,
+ "llog_process_thread"));
+ if (IS_ERR_VALUE(rc)) {
+ CERROR("%s: cannot start thread: rc = %d\n",
+ loghandle->lgh_ctxt->loc_obd->obd_name, rc);
+ OBD_FREE_PTR(lpi);
+ RETURN(rc);
+ }
+ wait_for_completion(&lpi->lpi_completion);
+ } else {
+ lpi->lpi_env = env;
+ llog_process_thread(lpi);
+ }
+ rc = lpi->lpi_rc;
+ OBD_FREE_PTR(lpi);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_process_or_fork);
+
+int llog_process(const struct lu_env *env, struct llog_handle *loghandle,
+ llog_cb_t cb, void *data, void *catdata)
+{
+ return llog_process_or_fork(env, loghandle, cb, data, catdata, true);
+}
+EXPORT_SYMBOL(llog_process);
+
+inline int llog_get_size(struct llog_handle *loghandle)
+{
+ if (loghandle && loghandle->lgh_hdr)
+ return loghandle->lgh_hdr->llh_count;
+ return 0;
+}
+EXPORT_SYMBOL(llog_get_size);
+
+int llog_reverse_process(const struct lu_env *env,
+ struct llog_handle *loghandle, llog_cb_t cb,
+ void *data, void *catdata)
+{
+ struct llog_log_hdr *llh = loghandle->lgh_hdr;
+ struct llog_process_cat_data *cd = catdata;
+ void *buf;
+ int rc = 0, first_index = 1, index, idx;
+ ENTRY;
+
+ OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
+ if (!buf)
+ RETURN(-ENOMEM);
+
+ if (cd != NULL)
+ first_index = cd->lpcd_first_idx + 1;
+ if (cd != NULL && cd->lpcd_last_idx)
+ index = cd->lpcd_last_idx;
+ else
+ index = LLOG_BITMAP_BYTES * 8 - 1;
+
+ while (rc == 0) {
+ struct llog_rec_hdr *rec;
+ struct llog_rec_tail *tail;
+
+ /* skip records not set in bitmap */
+ while (index >= first_index &&
+ !ext2_test_bit(index, llh->llh_bitmap))
+ --index;
+
+ LASSERT(index >= first_index - 1);
+ if (index == first_index - 1)
+ break;
+
+ /* get the buf with our target record; avoid old garbage */
+ memset(buf, 0, LLOG_CHUNK_SIZE);
+ rc = llog_prev_block(env, loghandle, index, buf,
+ LLOG_CHUNK_SIZE);
+ if (rc)
+ GOTO(out, rc);
+
+ rec = buf;
+ idx = rec->lrh_index;
+ CDEBUG(D_RPCTRACE, "index %u : idx %u\n", index, idx);
+ while (idx < index) {
+ rec = (void *)rec + rec->lrh_len;
+ if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
+ lustre_swab_llog_rec(rec);
+ idx ++;
+ }
+ LASSERT(idx == index);
+ tail = (void *)rec + rec->lrh_len - sizeof(*tail);
+
+ /* process records in buffer, starting where we found one */
+ while ((void *)tail > buf) {
+ if (tail->lrt_index == 0)
+ GOTO(out, rc = 0); /* no more records */
+
+ /* if set, process the callback on this record */
+ if (ext2_test_bit(index, llh->llh_bitmap)) {
+ rec = (void *)tail - tail->lrt_len +
+ sizeof(*tail);
+
+ rc = cb(env, loghandle, rec, data);
+ if (rc == LLOG_PROC_BREAK) {
+ GOTO(out, rc);
+ } else if (rc == LLOG_DEL_RECORD) {
+ llog_cancel_rec(env, loghandle,
+ tail->lrt_index);
+ rc = 0;
+ }
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ /* previous record, still in buffer? */
+ --index;
+ if (index < first_index)
+ GOTO(out, rc = 0);
+ tail = (void *)tail - tail->lrt_len;
+ }
+ }
+
+out:
+ if (buf)
+ OBD_FREE(buf, LLOG_CHUNK_SIZE);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_reverse_process);
+
+/**
+ * new llog API
+ *
+ * API functions:
+ * llog_open - open llog, may not exist
+ * llog_exist - check if llog exists
+ * llog_close - close opened llog, pair for open, frees llog_handle
+ * llog_declare_create - declare llog creation
+ * llog_create - create new llog on disk, need transaction handle
+ * llog_declare_write_rec - declaration of llog write
+ * llog_write_rec - write llog record on disk, need transaction handle
+ * llog_declare_add - declare llog catalog record addition
+ * llog_add - add llog record in catalog, need transaction handle
+ */
+int llog_exist(struct llog_handle *loghandle)
+{
+ struct llog_operations *lop;
+ int rc;
+
+ ENTRY;
+
+ rc = llog_handle2ops(loghandle, &lop);
+ if (rc)
+ RETURN(rc);
+ if (lop->lop_exist == NULL)
+ RETURN(-EOPNOTSUPP);
+
+ rc = lop->lop_exist(loghandle);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_exist);
+
+int llog_declare_create(const struct lu_env *env,
+ struct llog_handle *loghandle, struct thandle *th)
+{
+ struct llog_operations *lop;
+ int raised, rc;
+
+ ENTRY;
+
+ rc = llog_handle2ops(loghandle, &lop);
+ if (rc)
+ RETURN(rc);
+ if (lop->lop_declare_create == NULL)
+ RETURN(-EOPNOTSUPP);
+
+ raised = cfs_cap_raised(CFS_CAP_SYS_RESOURCE);
+ if (!raised)
+ cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
+ rc = lop->lop_declare_create(env, loghandle, th);
+ if (!raised)
+ cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_declare_create);
+
+int llog_create(const struct lu_env *env, struct llog_handle *handle,
+ struct thandle *th)
+{
+ struct llog_operations *lop;
+ int raised, rc;
+
+ ENTRY;
+
+ rc = llog_handle2ops(handle, &lop);
+ if (rc)
+ RETURN(rc);
+ if (lop->lop_create == NULL)
+ RETURN(-EOPNOTSUPP);
+
+ raised = cfs_cap_raised(CFS_CAP_SYS_RESOURCE);
+ if (!raised)
+ cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
+ rc = lop->lop_create(env, handle, th);
+ if (!raised)
+ cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_create);
+
+int llog_declare_write_rec(const struct lu_env *env,
+ struct llog_handle *handle,
+ struct llog_rec_hdr *rec, int idx,
+ struct thandle *th)
+{
+ struct llog_operations *lop;
+ int raised, rc;
+
+ ENTRY;
+
+ rc = llog_handle2ops(handle, &lop);
+ if (rc)
+ RETURN(rc);
+ LASSERT(lop);
+ if (lop->lop_declare_write_rec == NULL)
+ RETURN(-EOPNOTSUPP);
+
+ raised = cfs_cap_raised(CFS_CAP_SYS_RESOURCE);
+ if (!raised)
+ cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
+ rc = lop->lop_declare_write_rec(env, handle, rec, idx, th);
+ if (!raised)
+ cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_declare_write_rec);
+
+int llog_write_rec(const struct lu_env *env, struct llog_handle *handle,
+ struct llog_rec_hdr *rec, struct llog_cookie *logcookies,
+ int numcookies, void *buf, int idx, struct thandle *th)
+{
+ struct llog_operations *lop;
+ int raised, rc, buflen;
+
+ ENTRY;
+
+ rc = llog_handle2ops(handle, &lop);
+ if (rc)
+ RETURN(rc);
+
+ LASSERT(lop);
+ if (lop->lop_write_rec == NULL)
+ RETURN(-EOPNOTSUPP);
+
+ if (buf)
+ buflen = rec->lrh_len + sizeof(struct llog_rec_hdr) +
+ sizeof(struct llog_rec_tail);
+ else
+ buflen = rec->lrh_len;
+ LASSERT(cfs_size_round(buflen) == buflen);
+
+ raised = cfs_cap_raised(CFS_CAP_SYS_RESOURCE);
+ if (!raised)
+ cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
+ rc = lop->lop_write_rec(env, handle, rec, logcookies, numcookies,
+ buf, idx, th);
+ if (!raised)
+ cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_write_rec);
+
+int llog_add(const struct lu_env *env, struct llog_handle *lgh,
+ struct llog_rec_hdr *rec, struct llog_cookie *logcookies,
+ void *buf, struct thandle *th)
+{
+ int raised, rc;
+
+ ENTRY;
+
+ if (lgh->lgh_logops->lop_add == NULL)
+ RETURN(-EOPNOTSUPP);
+
+ raised = cfs_cap_raised(CFS_CAP_SYS_RESOURCE);
+ if (!raised)
+ cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
+ rc = lgh->lgh_logops->lop_add(env, lgh, rec, logcookies, buf, th);
+ if (!raised)
+ cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_add);
+
+int llog_declare_add(const struct lu_env *env, struct llog_handle *lgh,
+ struct llog_rec_hdr *rec, struct thandle *th)
+{
+ int raised, rc;
+
+ ENTRY;
+
+ if (lgh->lgh_logops->lop_declare_add == NULL)
+ RETURN(-EOPNOTSUPP);
+
+ raised = cfs_cap_raised(CFS_CAP_SYS_RESOURCE);
+ if (!raised)
+ cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
+ rc = lgh->lgh_logops->lop_declare_add(env, lgh, rec, th);
+ if (!raised)
+ cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_declare_add);
+
+/**
+ * Helper function to open llog or create it if doesn't exist.
+ * It hides all transaction handling from caller.
+ */
+int llog_open_create(const struct lu_env *env, struct llog_ctxt *ctxt,
+ struct llog_handle **res, struct llog_logid *logid,
+ char *name)
+{
+ struct thandle *th;
+ int rc;
+
+ ENTRY;
+
+ rc = llog_open(env, ctxt, res, logid, name, LLOG_OPEN_NEW);
+ if (rc)
+ RETURN(rc);
+
+ if (llog_exist(*res))
+ RETURN(0);
+
+ if ((*res)->lgh_obj != NULL) {
+ struct dt_device *d;
+
+ d = lu2dt_dev((*res)->lgh_obj->do_lu.lo_dev);
+
+ th = dt_trans_create(env, d);
+ if (IS_ERR(th))
+ GOTO(out, rc = PTR_ERR(th));
+
+ rc = llog_declare_create(env, *res, th);
+ if (rc == 0) {
+ rc = dt_trans_start_local(env, d, th);
+ if (rc == 0)
+ rc = llog_create(env, *res, th);
+ }
+ dt_trans_stop(env, d, th);
+ } else {
+ /* lvfs compat code */
+ LASSERT((*res)->lgh_file == NULL);
+ rc = llog_create(env, *res, NULL);
+ }
+out:
+ if (rc)
+ llog_close(env, *res);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_open_create);
+
+/**
+ * Helper function to delete existent llog.
+ */
+int llog_erase(const struct lu_env *env, struct llog_ctxt *ctxt,
+ struct llog_logid *logid, char *name)
+{
+ struct llog_handle *handle;
+ int rc = 0, rc2;
+
+ ENTRY;
+
+ /* nothing to erase */
+ if (name == NULL && logid == NULL)
+ RETURN(0);
+
+ rc = llog_open(env, ctxt, &handle, logid, name, LLOG_OPEN_EXISTS);
+ if (rc < 0)
+ RETURN(rc);
+
+ rc = llog_init_handle(env, handle, LLOG_F_IS_PLAIN, NULL);
+ if (rc == 0)
+ rc = llog_destroy(env, handle);
+
+ rc2 = llog_close(env, handle);
+ if (rc == 0)
+ rc = rc2;
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_erase);
+
+/*
+ * Helper function for write record in llog.
+ * It hides all transaction handling from caller.
+ * Valid only with local llog.
+ */
+int llog_write(const struct lu_env *env, struct llog_handle *loghandle,
+ struct llog_rec_hdr *rec, struct llog_cookie *reccookie,
+ int cookiecount, void *buf, int idx)
+{
+ int rc;
+
+ ENTRY;
+
+ LASSERT(loghandle);
+ LASSERT(loghandle->lgh_ctxt);
+
+ if (loghandle->lgh_obj != NULL) {
+ struct dt_device *dt;
+ struct thandle *th;
+
+ dt = lu2dt_dev(loghandle->lgh_obj->do_lu.lo_dev);
+
+ th = dt_trans_create(env, dt);
+ if (IS_ERR(th))
+ RETURN(PTR_ERR(th));
+
+ rc = llog_declare_write_rec(env, loghandle, rec, idx, th);
+ if (rc)
+ GOTO(out_trans, rc);
+
+ rc = dt_trans_start_local(env, dt, th);
+ if (rc)
+ GOTO(out_trans, rc);
+
+ down_write(&loghandle->lgh_lock);
+ rc = llog_write_rec(env, loghandle, rec, reccookie,
+ cookiecount, buf, idx, th);
+ up_write(&loghandle->lgh_lock);
+out_trans:
+ dt_trans_stop(env, dt, th);
+ } else { /* lvfs compatibility */
+ down_write(&loghandle->lgh_lock);
+ rc = llog_write_rec(env, loghandle, rec, reccookie,
+ cookiecount, buf, idx, NULL);
+ up_write(&loghandle->lgh_lock);
+ }
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_write);
+
+int llog_open(const struct lu_env *env, struct llog_ctxt *ctxt,
+ struct llog_handle **lgh, struct llog_logid *logid,
+ char *name, enum llog_open_param open_param)
+{
+ int raised;
+ int rc;
+
+ ENTRY;
+
+ LASSERT(ctxt);
+ LASSERT(ctxt->loc_logops);
+
+ if (ctxt->loc_logops->lop_open == NULL) {
+ *lgh = NULL;
+ RETURN(-EOPNOTSUPP);
+ }
+
+ *lgh = llog_alloc_handle();
+ if (*lgh == NULL)
+ RETURN(-ENOMEM);
+ (*lgh)->lgh_ctxt = ctxt;
+ (*lgh)->lgh_logops = ctxt->loc_logops;
+
+ raised = cfs_cap_raised(CFS_CAP_SYS_RESOURCE);
+ if (!raised)
+ cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
+ rc = ctxt->loc_logops->lop_open(env, *lgh, logid, name, open_param);
+ if (!raised)
+ cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
+ if (rc) {
+ llog_free_handle(*lgh);
+ *lgh = NULL;
+ }
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_open);
+
+int llog_close(const struct lu_env *env, struct llog_handle *loghandle)
+{
+ struct llog_operations *lop;
+ int rc;
+
+ ENTRY;
+
+ rc = llog_handle2ops(loghandle, &lop);
+ if (rc)
+ GOTO(out, rc);
+ if (lop->lop_close == NULL)
+ GOTO(out, rc = -EOPNOTSUPP);
+ rc = lop->lop_close(env, loghandle);
+out:
+ llog_handle_put(loghandle);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_close);
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_cat.c b/drivers/staging/lustre/lustre/obdclass/llog_cat.c
new file mode 100644
index 000000000000..cf00b2f550ac
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/llog_cat.c
@@ -0,0 +1,833 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/llog_cat.c
+ *
+ * OST<->MDS recovery logging infrastructure.
+ *
+ * Invariants in implementation:
+ * - we do not share logs among different OST<->MDS connections, so that
+ * if an OST or MDS fails it need only look at log(s) relevant to itself
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ * Author: Alexey Zhuravlev <alexey.zhuravlev@intel.com>
+ * Author: Mikhail Pershin <mike.pershin@intel.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+
+#include <obd_class.h>
+
+#include "llog_internal.h"
+
+/* Create a new log handle and add it to the open list.
+ * This log handle will be closed when all of the records in it are removed.
+ *
+ * Assumes caller has already pushed us into the kernel context and is locking.
+ */
+static int llog_cat_new_log(const struct lu_env *env,
+ struct llog_handle *cathandle,
+ struct llog_handle *loghandle,
+ struct thandle *th)
+{
+
+ struct llog_log_hdr *llh;
+ struct llog_logid_rec rec = { { 0 }, };
+ int rc, index, bitmap_size;
+ ENTRY;
+
+ llh = cathandle->lgh_hdr;
+ bitmap_size = LLOG_BITMAP_SIZE(llh);
+
+ index = (cathandle->lgh_last_idx + 1) % bitmap_size;
+
+ /* maximum number of available slots in catlog is bitmap_size - 2 */
+ if (llh->llh_cat_idx == index) {
+ CERROR("no free catalog slots for log...\n");
+ RETURN(-ENOSPC);
+ }
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_LLOG_CREATE_FAILED))
+ RETURN(-ENOSPC);
+
+ rc = llog_create(env, loghandle, th);
+ /* if llog is already created, no need to initialize it */
+ if (rc == -EEXIST) {
+ RETURN(0);
+ } else if (rc != 0) {
+ CERROR("%s: can't create new plain llog in catalog: rc = %d\n",
+ loghandle->lgh_ctxt->loc_obd->obd_name, rc);
+ RETURN(rc);
+ }
+
+ rc = llog_init_handle(env, loghandle,
+ LLOG_F_IS_PLAIN | LLOG_F_ZAP_WHEN_EMPTY,
+ &cathandle->lgh_hdr->llh_tgtuuid);
+ if (rc)
+ GOTO(out_destroy, rc);
+
+ if (index == 0)
+ index = 1;
+
+ spin_lock(&loghandle->lgh_hdr_lock);
+ llh->llh_count++;
+ if (ext2_set_bit(index, llh->llh_bitmap)) {
+ CERROR("argh, index %u already set in log bitmap?\n",
+ index);
+ spin_unlock(&loghandle->lgh_hdr_lock);
+ LBUG(); /* should never happen */
+ }
+ spin_unlock(&loghandle->lgh_hdr_lock);
+
+ cathandle->lgh_last_idx = index;
+ llh->llh_tail.lrt_index = index;
+
+ CDEBUG(D_RPCTRACE,"new recovery log "DOSTID":%x for index %u of catalog"
+ DOSTID"\n", POSTID(&loghandle->lgh_id.lgl_oi),
+ loghandle->lgh_id.lgl_ogen, index,
+ POSTID(&cathandle->lgh_id.lgl_oi));
+ /* build the record for this log in the catalog */
+ rec.lid_hdr.lrh_len = sizeof(rec);
+ rec.lid_hdr.lrh_index = index;
+ rec.lid_hdr.lrh_type = LLOG_LOGID_MAGIC;
+ rec.lid_id = loghandle->lgh_id;
+ rec.lid_tail.lrt_len = sizeof(rec);
+ rec.lid_tail.lrt_index = index;
+
+ /* update the catalog: header and record */
+ rc = llog_write_rec(env, cathandle, &rec.lid_hdr,
+ &loghandle->u.phd.phd_cookie, 1, NULL, index, th);
+ if (rc < 0)
+ GOTO(out_destroy, rc);
+
+ loghandle->lgh_hdr->llh_cat_idx = index;
+ RETURN(0);
+out_destroy:
+ llog_destroy(env, loghandle);
+ RETURN(rc);
+}
+
+/* Open an existent log handle and add it to the open list.
+ * This log handle will be closed when all of the records in it are removed.
+ *
+ * Assumes caller has already pushed us into the kernel context and is locking.
+ * We return a lock on the handle to ensure nobody yanks it from us.
+ *
+ * This takes extra reference on llog_handle via llog_handle_get() and require
+ * this reference to be put by caller using llog_handle_put()
+ */
+int llog_cat_id2handle(const struct lu_env *env, struct llog_handle *cathandle,
+ struct llog_handle **res, struct llog_logid *logid)
+{
+ struct llog_handle *loghandle;
+ int rc = 0;
+
+ ENTRY;
+
+ if (cathandle == NULL)
+ RETURN(-EBADF);
+
+ down_write(&cathandle->lgh_lock);
+ list_for_each_entry(loghandle, &cathandle->u.chd.chd_head,
+ u.phd.phd_entry) {
+ struct llog_logid *cgl = &loghandle->lgh_id;
+
+ if (ostid_id(&cgl->lgl_oi) == ostid_id(&logid->lgl_oi) &&
+ ostid_seq(&cgl->lgl_oi) == ostid_seq(&logid->lgl_oi)) {
+ if (cgl->lgl_ogen != logid->lgl_ogen) {
+ CERROR("%s: log "DOSTID" generation %x != %x\n",
+ loghandle->lgh_ctxt->loc_obd->obd_name,
+ POSTID(&logid->lgl_oi), cgl->lgl_ogen,
+ logid->lgl_ogen);
+ continue;
+ }
+ loghandle->u.phd.phd_cat_handle = cathandle;
+ up_write(&cathandle->lgh_lock);
+ GOTO(out, rc = 0);
+ }
+ }
+ up_write(&cathandle->lgh_lock);
+
+ rc = llog_open(env, cathandle->lgh_ctxt, &loghandle, logid, NULL,
+ LLOG_OPEN_EXISTS);
+ if (rc < 0) {
+ CERROR("%s: error opening log id "DOSTID":%x: rc = %d\n",
+ cathandle->lgh_ctxt->loc_obd->obd_name,
+ POSTID(&logid->lgl_oi), logid->lgl_ogen, rc);
+ RETURN(rc);
+ }
+
+ rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
+ if (rc < 0) {
+ llog_close(env, loghandle);
+ loghandle = NULL;
+ RETURN(rc);
+ }
+
+ down_write(&cathandle->lgh_lock);
+ list_add(&loghandle->u.phd.phd_entry, &cathandle->u.chd.chd_head);
+ up_write(&cathandle->lgh_lock);
+
+ loghandle->u.phd.phd_cat_handle = cathandle;
+ loghandle->u.phd.phd_cookie.lgc_lgl = cathandle->lgh_id;
+ loghandle->u.phd.phd_cookie.lgc_index =
+ loghandle->lgh_hdr->llh_cat_idx;
+ EXIT;
+out:
+ llog_handle_get(loghandle);
+ *res = loghandle;
+ return 0;
+}
+
+int llog_cat_close(const struct lu_env *env, struct llog_handle *cathandle)
+{
+ struct llog_handle *loghandle, *n;
+ int rc;
+
+ ENTRY;
+
+ list_for_each_entry_safe(loghandle, n, &cathandle->u.chd.chd_head,
+ u.phd.phd_entry) {
+ struct llog_log_hdr *llh = loghandle->lgh_hdr;
+ int index;
+
+ /* unlink open-not-created llogs */
+ list_del_init(&loghandle->u.phd.phd_entry);
+ llh = loghandle->lgh_hdr;
+ if (loghandle->lgh_obj != NULL && llh != NULL &&
+ (llh->llh_flags & LLOG_F_ZAP_WHEN_EMPTY) &&
+ (llh->llh_count == 1)) {
+ rc = llog_destroy(env, loghandle);
+ if (rc)
+ CERROR("%s: failure destroying log during "
+ "cleanup: rc = %d\n",
+ loghandle->lgh_ctxt->loc_obd->obd_name,
+ rc);
+
+ index = loghandle->u.phd.phd_cookie.lgc_index;
+ llog_cat_cleanup(env, cathandle, NULL, index);
+ }
+ llog_close(env, loghandle);
+ }
+ /* if handle was stored in ctxt, remove it too */
+ if (cathandle->lgh_ctxt->loc_handle == cathandle)
+ cathandle->lgh_ctxt->loc_handle = NULL;
+ rc = llog_close(env, cathandle);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cat_close);
+
+/**
+ * lockdep markers for nested struct llog_handle::lgh_lock locking.
+ */
+enum {
+ LLOGH_CAT,
+ LLOGH_LOG
+};
+
+/** Return the currently active log handle. If the current log handle doesn't
+ * have enough space left for the current record, start a new one.
+ *
+ * If reclen is 0, we only want to know what the currently active log is,
+ * otherwise we get a lock on this log so nobody can steal our space.
+ *
+ * Assumes caller has already pushed us into the kernel context and is locking.
+ *
+ * NOTE: loghandle is write-locked upon successful return
+ */
+static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle,
+ struct thandle *th)
+{
+ struct llog_handle *loghandle = NULL;
+ ENTRY;
+
+ down_read_nested(&cathandle->lgh_lock, LLOGH_CAT);
+ loghandle = cathandle->u.chd.chd_current_log;
+ if (loghandle) {
+ struct llog_log_hdr *llh;
+
+ down_write_nested(&loghandle->lgh_lock, LLOGH_LOG);
+ llh = loghandle->lgh_hdr;
+ if (llh == NULL ||
+ loghandle->lgh_last_idx < LLOG_BITMAP_SIZE(llh) - 1) {
+ up_read(&cathandle->lgh_lock);
+ RETURN(loghandle);
+ } else {
+ up_write(&loghandle->lgh_lock);
+ }
+ }
+ up_read(&cathandle->lgh_lock);
+
+ /* time to use next log */
+
+ /* first, we have to make sure the state hasn't changed */
+ down_write_nested(&cathandle->lgh_lock, LLOGH_CAT);
+ loghandle = cathandle->u.chd.chd_current_log;
+ if (loghandle) {
+ struct llog_log_hdr *llh;
+
+ down_write_nested(&loghandle->lgh_lock, LLOGH_LOG);
+ llh = loghandle->lgh_hdr;
+ LASSERT(llh);
+ if (loghandle->lgh_last_idx < LLOG_BITMAP_SIZE(llh) - 1) {
+ up_write(&cathandle->lgh_lock);
+ RETURN(loghandle);
+ } else {
+ up_write(&loghandle->lgh_lock);
+ }
+ }
+
+ CDEBUG(D_INODE, "use next log\n");
+
+ loghandle = cathandle->u.chd.chd_next_log;
+ cathandle->u.chd.chd_current_log = loghandle;
+ cathandle->u.chd.chd_next_log = NULL;
+ down_write_nested(&loghandle->lgh_lock, LLOGH_LOG);
+ up_write(&cathandle->lgh_lock);
+ LASSERT(loghandle);
+ RETURN(loghandle);
+}
+
+/* Add a single record to the recovery log(s) using a catalog
+ * Returns as llog_write_record
+ *
+ * Assumes caller has already pushed us into the kernel context.
+ */
+int llog_cat_add_rec(const struct lu_env *env, struct llog_handle *cathandle,
+ struct llog_rec_hdr *rec, struct llog_cookie *reccookie,
+ void *buf, struct thandle *th)
+{
+ struct llog_handle *loghandle;
+ int rc;
+ ENTRY;
+
+ LASSERT(rec->lrh_len <= LLOG_CHUNK_SIZE);
+ loghandle = llog_cat_current_log(cathandle, th);
+ LASSERT(!IS_ERR(loghandle));
+
+ /* loghandle is already locked by llog_cat_current_log() for us */
+ if (!llog_exist(loghandle)) {
+ rc = llog_cat_new_log(env, cathandle, loghandle, th);
+ if (rc < 0) {
+ up_write(&loghandle->lgh_lock);
+ RETURN(rc);
+ }
+ }
+ /* now let's try to add the record */
+ rc = llog_write_rec(env, loghandle, rec, reccookie, 1, buf, -1, th);
+ if (rc < 0)
+ CDEBUG_LIMIT(rc == -ENOSPC ? D_HA : D_ERROR,
+ "llog_write_rec %d: lh=%p\n", rc, loghandle);
+ up_write(&loghandle->lgh_lock);
+ if (rc == -ENOSPC) {
+ /* try to use next log */
+ loghandle = llog_cat_current_log(cathandle, th);
+ LASSERT(!IS_ERR(loghandle));
+ /* new llog can be created concurrently */
+ if (!llog_exist(loghandle)) {
+ rc = llog_cat_new_log(env, cathandle, loghandle, th);
+ if (rc < 0) {
+ up_write(&loghandle->lgh_lock);
+ RETURN(rc);
+ }
+ }
+ /* now let's try to add the record */
+ rc = llog_write_rec(env, loghandle, rec, reccookie, 1, buf,
+ -1, th);
+ if (rc < 0)
+ CERROR("llog_write_rec %d: lh=%p\n", rc, loghandle);
+ up_write(&loghandle->lgh_lock);
+ }
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cat_add_rec);
+
+int llog_cat_declare_add_rec(const struct lu_env *env,
+ struct llog_handle *cathandle,
+ struct llog_rec_hdr *rec, struct thandle *th)
+{
+ struct llog_handle *loghandle, *next;
+ int rc = 0;
+
+ ENTRY;
+
+ if (cathandle->u.chd.chd_current_log == NULL) {
+ /* declare new plain llog */
+ down_write(&cathandle->lgh_lock);
+ if (cathandle->u.chd.chd_current_log == NULL) {
+ rc = llog_open(env, cathandle->lgh_ctxt, &loghandle,
+ NULL, NULL, LLOG_OPEN_NEW);
+ if (rc == 0) {
+ cathandle->u.chd.chd_current_log = loghandle;
+ list_add_tail(&loghandle->u.phd.phd_entry,
+ &cathandle->u.chd.chd_head);
+ }
+ }
+ up_write(&cathandle->lgh_lock);
+ } else if (cathandle->u.chd.chd_next_log == NULL) {
+ /* declare next plain llog */
+ down_write(&cathandle->lgh_lock);
+ if (cathandle->u.chd.chd_next_log == NULL) {
+ rc = llog_open(env, cathandle->lgh_ctxt, &loghandle,
+ NULL, NULL, LLOG_OPEN_NEW);
+ if (rc == 0) {
+ cathandle->u.chd.chd_next_log = loghandle;
+ list_add_tail(&loghandle->u.phd.phd_entry,
+ &cathandle->u.chd.chd_head);
+ }
+ }
+ up_write(&cathandle->lgh_lock);
+ }
+ if (rc)
+ GOTO(out, rc);
+
+ if (!llog_exist(cathandle->u.chd.chd_current_log)) {
+ rc = llog_declare_create(env, cathandle->u.chd.chd_current_log,
+ th);
+ if (rc)
+ GOTO(out, rc);
+ llog_declare_write_rec(env, cathandle, NULL, -1, th);
+ }
+ /* declare records in the llogs */
+ rc = llog_declare_write_rec(env, cathandle->u.chd.chd_current_log,
+ rec, -1, th);
+ if (rc)
+ GOTO(out, rc);
+
+ next = cathandle->u.chd.chd_next_log;
+ if (next) {
+ if (!llog_exist(next)) {
+ rc = llog_declare_create(env, next, th);
+ llog_declare_write_rec(env, cathandle, NULL, -1, th);
+ }
+ llog_declare_write_rec(env, next, rec, -1, th);
+ }
+out:
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cat_declare_add_rec);
+
+int llog_cat_add(const struct lu_env *env, struct llog_handle *cathandle,
+ struct llog_rec_hdr *rec, struct llog_cookie *reccookie,
+ void *buf)
+{
+ struct llog_ctxt *ctxt;
+ struct dt_device *dt;
+ struct thandle *th = NULL;
+ int rc;
+
+ ctxt = cathandle->lgh_ctxt;
+ LASSERT(ctxt);
+ LASSERT(ctxt->loc_exp);
+
+ if (cathandle->lgh_obj != NULL) {
+ dt = ctxt->loc_exp->exp_obd->obd_lvfs_ctxt.dt;
+ LASSERT(dt);
+
+ th = dt_trans_create(env, dt);
+ if (IS_ERR(th))
+ RETURN(PTR_ERR(th));
+
+ rc = llog_cat_declare_add_rec(env, cathandle, rec, th);
+ if (rc)
+ GOTO(out_trans, rc);
+
+ rc = dt_trans_start_local(env, dt, th);
+ if (rc)
+ GOTO(out_trans, rc);
+ rc = llog_cat_add_rec(env, cathandle, rec, reccookie, buf, th);
+out_trans:
+ dt_trans_stop(env, dt, th);
+ } else { /* lvfs compat code */
+ LASSERT(cathandle->lgh_file != NULL);
+ rc = llog_cat_declare_add_rec(env, cathandle, rec, th);
+ if (rc == 0)
+ rc = llog_cat_add_rec(env, cathandle, rec, reccookie,
+ buf, th);
+ }
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cat_add);
+
+/* For each cookie in the cookie array, we clear the log in-use bit and either:
+ * - the log is empty, so mark it free in the catalog header and delete it
+ * - the log is not empty, just write out the log header
+ *
+ * The cookies may be in different log files, so we need to get new logs
+ * each time.
+ *
+ * Assumes caller has already pushed us into the kernel context.
+ */
+int llog_cat_cancel_records(const struct lu_env *env,
+ struct llog_handle *cathandle, int count,
+ struct llog_cookie *cookies)
+{
+ int i, index, rc = 0, failed = 0;
+
+ ENTRY;
+
+ for (i = 0; i < count; i++, cookies++) {
+ struct llog_handle *loghandle;
+ struct llog_logid *lgl = &cookies->lgc_lgl;
+ int lrc;
+
+ rc = llog_cat_id2handle(env, cathandle, &loghandle, lgl);
+ if (rc) {
+ CERROR("%s: cannot find handle for llog "DOSTID": %d\n",
+ cathandle->lgh_ctxt->loc_obd->obd_name,
+ POSTID(&lgl->lgl_oi), rc);
+ failed++;
+ continue;
+ }
+
+ lrc = llog_cancel_rec(env, loghandle, cookies->lgc_index);
+ if (lrc == 1) { /* log has been destroyed */
+ index = loghandle->u.phd.phd_cookie.lgc_index;
+ rc = llog_cat_cleanup(env, cathandle, loghandle,
+ index);
+ } else if (lrc == -ENOENT) {
+ if (rc == 0) /* ENOENT shouldn't rewrite any error */
+ rc = lrc;
+ } else if (lrc < 0) {
+ failed++;
+ rc = lrc;
+ }
+ llog_handle_put(loghandle);
+ }
+ if (rc)
+ CERROR("%s: fail to cancel %d of %d llog-records: rc = %d\n",
+ cathandle->lgh_ctxt->loc_obd->obd_name, failed, count,
+ rc);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cat_cancel_records);
+
+int llog_cat_process_cb(const struct lu_env *env, struct llog_handle *cat_llh,
+ struct llog_rec_hdr *rec, void *data)
+{
+ struct llog_process_data *d = data;
+ struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
+ struct llog_handle *llh;
+ int rc;
+
+ ENTRY;
+ if (rec->lrh_type != LLOG_LOGID_MAGIC) {
+ CERROR("invalid record in catalog\n");
+ RETURN(-EINVAL);
+ }
+ CDEBUG(D_HA, "processing log "DOSTID":%x at index %u of catalog "
+ DOSTID"\n", POSTID(&lir->lid_id.lgl_oi), lir->lid_id.lgl_ogen,
+ rec->lrh_index, POSTID(&cat_llh->lgh_id.lgl_oi));
+
+ rc = llog_cat_id2handle(env, cat_llh, &llh, &lir->lid_id);
+ if (rc) {
+ CERROR("%s: cannot find handle for llog "DOSTID": %d\n",
+ cat_llh->lgh_ctxt->loc_obd->obd_name,
+ POSTID(&lir->lid_id.lgl_oi), rc);
+ RETURN(rc);
+ }
+
+ if (rec->lrh_index < d->lpd_startcat)
+ /* Skip processing of the logs until startcat */
+ RETURN(0);
+
+ if (d->lpd_startidx > 0) {
+ struct llog_process_cat_data cd;
+
+ cd.lpcd_first_idx = d->lpd_startidx;
+ cd.lpcd_last_idx = 0;
+ rc = llog_process_or_fork(env, llh, d->lpd_cb, d->lpd_data,
+ &cd, false);
+ /* Continue processing the next log from idx 0 */
+ d->lpd_startidx = 0;
+ } else {
+ rc = llog_process_or_fork(env, llh, d->lpd_cb, d->lpd_data,
+ NULL, false);
+ }
+ llog_handle_put(llh);
+
+ RETURN(rc);
+}
+
+int llog_cat_process_or_fork(const struct lu_env *env,
+ struct llog_handle *cat_llh,
+ llog_cb_t cb, void *data, int startcat,
+ int startidx, bool fork)
+{
+ struct llog_process_data d;
+ struct llog_log_hdr *llh = cat_llh->lgh_hdr;
+ int rc;
+ ENTRY;
+
+ LASSERT(llh->llh_flags & LLOG_F_IS_CAT);
+ d.lpd_data = data;
+ d.lpd_cb = cb;
+ d.lpd_startcat = startcat;
+ d.lpd_startidx = startidx;
+
+ if (llh->llh_cat_idx > cat_llh->lgh_last_idx) {
+ struct llog_process_cat_data cd;
+
+ CWARN("catlog "DOSTID" crosses index zero\n",
+ POSTID(&cat_llh->lgh_id.lgl_oi));
+
+ cd.lpcd_first_idx = llh->llh_cat_idx;
+ cd.lpcd_last_idx = 0;
+ rc = llog_process_or_fork(env, cat_llh, llog_cat_process_cb,
+ &d, &cd, fork);
+ if (rc != 0)
+ RETURN(rc);
+
+ cd.lpcd_first_idx = 0;
+ cd.lpcd_last_idx = cat_llh->lgh_last_idx;
+ rc = llog_process_or_fork(env, cat_llh, llog_cat_process_cb,
+ &d, &cd, fork);
+ } else {
+ rc = llog_process_or_fork(env, cat_llh, llog_cat_process_cb,
+ &d, NULL, fork);
+ }
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cat_process_or_fork);
+
+int llog_cat_process(const struct lu_env *env, struct llog_handle *cat_llh,
+ llog_cb_t cb, void *data, int startcat, int startidx)
+{
+ return llog_cat_process_or_fork(env, cat_llh, cb, data, startcat,
+ startidx, false);
+}
+EXPORT_SYMBOL(llog_cat_process);
+
+static int llog_cat_reverse_process_cb(const struct lu_env *env,
+ struct llog_handle *cat_llh,
+ struct llog_rec_hdr *rec, void *data)
+{
+ struct llog_process_data *d = data;
+ struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
+ struct llog_handle *llh;
+ int rc;
+
+ if (le32_to_cpu(rec->lrh_type) != LLOG_LOGID_MAGIC) {
+ CERROR("invalid record in catalog\n");
+ RETURN(-EINVAL);
+ }
+ CDEBUG(D_HA, "processing log "DOSTID":%x at index %u of catalog "
+ DOSTID"\n", POSTID(&lir->lid_id.lgl_oi), lir->lid_id.lgl_ogen,
+ le32_to_cpu(rec->lrh_index), POSTID(&cat_llh->lgh_id.lgl_oi));
+
+ rc = llog_cat_id2handle(env, cat_llh, &llh, &lir->lid_id);
+ if (rc) {
+ CERROR("%s: cannot find handle for llog "DOSTID": %d\n",
+ cat_llh->lgh_ctxt->loc_obd->obd_name,
+ POSTID(&lir->lid_id.lgl_oi), rc);
+ RETURN(rc);
+ }
+
+ rc = llog_reverse_process(env, llh, d->lpd_cb, d->lpd_data, NULL);
+ llog_handle_put(llh);
+ RETURN(rc);
+}
+
+int llog_cat_reverse_process(const struct lu_env *env,
+ struct llog_handle *cat_llh,
+ llog_cb_t cb, void *data)
+{
+ struct llog_process_data d;
+ struct llog_process_cat_data cd;
+ struct llog_log_hdr *llh = cat_llh->lgh_hdr;
+ int rc;
+ ENTRY;
+
+ LASSERT(llh->llh_flags & LLOG_F_IS_CAT);
+ d.lpd_data = data;
+ d.lpd_cb = cb;
+
+ if (llh->llh_cat_idx > cat_llh->lgh_last_idx) {
+ CWARN("catalog "DOSTID" crosses index zero\n",
+ POSTID(&cat_llh->lgh_id.lgl_oi));
+
+ cd.lpcd_first_idx = 0;
+ cd.lpcd_last_idx = cat_llh->lgh_last_idx;
+ rc = llog_reverse_process(env, cat_llh,
+ llog_cat_reverse_process_cb,
+ &d, &cd);
+ if (rc != 0)
+ RETURN(rc);
+
+ cd.lpcd_first_idx = le32_to_cpu(llh->llh_cat_idx);
+ cd.lpcd_last_idx = 0;
+ rc = llog_reverse_process(env, cat_llh,
+ llog_cat_reverse_process_cb,
+ &d, &cd);
+ } else {
+ rc = llog_reverse_process(env, cat_llh,
+ llog_cat_reverse_process_cb,
+ &d, NULL);
+ }
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cat_reverse_process);
+
+int llog_cat_set_first_idx(struct llog_handle *cathandle, int index)
+{
+ struct llog_log_hdr *llh = cathandle->lgh_hdr;
+ int i, bitmap_size, idx;
+ ENTRY;
+
+ bitmap_size = LLOG_BITMAP_SIZE(llh);
+ if (llh->llh_cat_idx == (index - 1)) {
+ idx = llh->llh_cat_idx + 1;
+ llh->llh_cat_idx = idx;
+ if (idx == cathandle->lgh_last_idx)
+ goto out;
+ for (i = (index + 1) % bitmap_size;
+ i != cathandle->lgh_last_idx;
+ i = (i + 1) % bitmap_size) {
+ if (!ext2_test_bit(i, llh->llh_bitmap)) {
+ idx = llh->llh_cat_idx + 1;
+ llh->llh_cat_idx = idx;
+ } else if (i == 0) {
+ llh->llh_cat_idx = 0;
+ } else {
+ break;
+ }
+ }
+out:
+ CDEBUG(D_RPCTRACE, "set catlog "DOSTID" first idx %u\n",
+ POSTID(&cathandle->lgh_id.lgl_oi), llh->llh_cat_idx);
+ }
+
+ RETURN(0);
+}
+
+/* Cleanup deleted plain llog traces from catalog */
+int llog_cat_cleanup(const struct lu_env *env, struct llog_handle *cathandle,
+ struct llog_handle *loghandle, int index)
+{
+ int rc;
+
+ LASSERT(index);
+ if (loghandle != NULL) {
+ /* remove destroyed llog from catalog list and
+ * chd_current_log variable */
+ down_write(&cathandle->lgh_lock);
+ if (cathandle->u.chd.chd_current_log == loghandle)
+ cathandle->u.chd.chd_current_log = NULL;
+ list_del_init(&loghandle->u.phd.phd_entry);
+ up_write(&cathandle->lgh_lock);
+ LASSERT(index == loghandle->u.phd.phd_cookie.lgc_index);
+ /* llog was opened and keep in a list, close it now */
+ llog_close(env, loghandle);
+ }
+ /* remove plain llog entry from catalog by index */
+ llog_cat_set_first_idx(cathandle, index);
+ rc = llog_cancel_rec(env, cathandle, index);
+ if (rc == 0)
+ CDEBUG(D_HA, "cancel plain log at index"
+ " %u of catalog "DOSTID"\n",
+ index, POSTID(&cathandle->lgh_id.lgl_oi));
+ return rc;
+}
+
+int cat_cancel_cb(const struct lu_env *env, struct llog_handle *cathandle,
+ struct llog_rec_hdr *rec, void *data)
+{
+ struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
+ struct llog_handle *loghandle;
+ struct llog_log_hdr *llh;
+ int rc;
+
+ ENTRY;
+
+ if (rec->lrh_type != LLOG_LOGID_MAGIC) {
+ CERROR("invalid record in catalog\n");
+ RETURN(-EINVAL);
+ }
+
+ CDEBUG(D_HA, "processing log "DOSTID":%x at index %u of catalog "
+ DOSTID"\n", POSTID(&lir->lid_id.lgl_oi), lir->lid_id.lgl_ogen,
+ rec->lrh_index, POSTID(&cathandle->lgh_id.lgl_oi));
+
+ rc = llog_cat_id2handle(env, cathandle, &loghandle, &lir->lid_id);
+ if (rc) {
+ CERROR("%s: cannot find handle for llog "DOSTID": %d\n",
+ cathandle->lgh_ctxt->loc_obd->obd_name,
+ POSTID(&lir->lid_id.lgl_oi), rc);
+ if (rc == -ENOENT || rc == -ESTALE) {
+ /* remove index from catalog */
+ llog_cat_cleanup(env, cathandle, NULL, rec->lrh_index);
+ }
+ RETURN(rc);
+ }
+
+ llh = loghandle->lgh_hdr;
+ if ((llh->llh_flags & LLOG_F_ZAP_WHEN_EMPTY) &&
+ (llh->llh_count == 1)) {
+ rc = llog_destroy(env, loghandle);
+ if (rc)
+ CERROR("%s: fail to destroy empty log: rc = %d\n",
+ loghandle->lgh_ctxt->loc_obd->obd_name, rc);
+
+ llog_cat_cleanup(env, cathandle, loghandle,
+ loghandle->u.phd.phd_cookie.lgc_index);
+ }
+ llog_handle_put(loghandle);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(cat_cancel_cb);
+
+/* helper to initialize catalog llog and process it to cancel */
+int llog_cat_init_and_process(const struct lu_env *env,
+ struct llog_handle *llh)
+{
+ int rc;
+
+ rc = llog_init_handle(env, llh, LLOG_F_IS_CAT, NULL);
+ if (rc)
+ RETURN(rc);
+
+ rc = llog_process_or_fork(env, llh, cat_cancel_cb, NULL, NULL, false);
+ if (rc)
+ CERROR("%s: llog_process() with cat_cancel_cb failed: rc = "
+ "%d\n", llh->lgh_ctxt->loc_obd->obd_name, rc);
+ RETURN(0);
+}
+EXPORT_SYMBOL(llog_cat_init_and_process);
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_internal.h b/drivers/staging/lustre/lustre/obdclass/llog_internal.h
new file mode 100644
index 000000000000..539e1d4f9d4c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/llog_internal.h
@@ -0,0 +1,98 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LLOG_INTERNAL_H__
+#define __LLOG_INTERNAL_H__
+
+#include <lustre_log.h>
+
+struct llog_process_info {
+ struct llog_handle *lpi_loghandle;
+ llog_cb_t lpi_cb;
+ void *lpi_cbdata;
+ void *lpi_catdata;
+ int lpi_rc;
+ struct completion lpi_completion;
+ const struct lu_env *lpi_env;
+
+};
+
+struct llog_thread_info {
+ struct lu_attr lgi_attr;
+ struct lu_fid lgi_fid;
+ struct dt_object_format lgi_dof;
+ struct lu_buf lgi_buf;
+ loff_t lgi_off;
+ struct llog_rec_hdr lgi_lrh;
+ struct llog_rec_tail lgi_tail;
+};
+
+extern struct lu_context_key llog_thread_key;
+
+static inline struct llog_thread_info *llog_info(const struct lu_env *env)
+{
+ struct llog_thread_info *lgi;
+
+ lgi = lu_context_key_get(&env->le_ctx, &llog_thread_key);
+ LASSERT(lgi);
+ return lgi;
+}
+
+static inline void
+lustre_build_llog_lvfs_oid(struct llog_logid *logid, __u64 ino, __u32 gen)
+{
+ ostid_set_seq_llog(&logid->lgl_oi);
+ ostid_set_id(&logid->lgl_oi, ino);
+ logid->lgl_ogen = gen;
+}
+
+int llog_info_init(void);
+void llog_info_fini(void);
+
+void llog_handle_get(struct llog_handle *loghandle);
+void llog_handle_put(struct llog_handle *loghandle);
+int llog_cat_id2handle(const struct lu_env *env, struct llog_handle *cathandle,
+ struct llog_handle **res, struct llog_logid *logid);
+int class_config_dump_handler(const struct lu_env *env,
+ struct llog_handle *handle,
+ struct llog_rec_hdr *rec, void *data);
+int class_config_parse_rec(struct llog_rec_hdr *rec, char *buf, int size);
+int llog_process_or_fork(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ llog_cb_t cb, void *data, void *catdata, bool fork);
+int llog_cat_cleanup(const struct lu_env *env, struct llog_handle *cathandle,
+ struct llog_handle *loghandle, int index);
+#endif
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_ioctl.c b/drivers/staging/lustre/lustre/obdclass/llog_ioctl.c
new file mode 100644
index 000000000000..0732874e26c5
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/llog_ioctl.c
@@ -0,0 +1,427 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+#include <obd_class.h>
+#include <lustre_log.h>
+#include "llog_internal.h"
+
+static int str2logid(struct llog_logid *logid, char *str, int len)
+{
+ char *start, *end, *endp;
+ __u64 id, seq;
+
+ ENTRY;
+ start = str;
+ if (*start != '#')
+ RETURN(-EINVAL);
+
+ start++;
+ if (start - str >= len - 1)
+ RETURN(-EINVAL);
+ end = strchr(start, '#');
+ if (end == NULL || end == start)
+ RETURN(-EINVAL);
+
+ *end = '\0';
+ id = simple_strtoull(start, &endp, 0);
+ if (endp != end)
+ RETURN(-EINVAL);
+
+ start = ++end;
+ if (start - str >= len - 1)
+ RETURN(-EINVAL);
+ end = strchr(start, '#');
+ if (end == NULL || end == start)
+ RETURN(-EINVAL);
+
+ *end = '\0';
+ seq = simple_strtoull(start, &endp, 0);
+ if (endp != end)
+ RETURN(-EINVAL);
+
+ ostid_set_seq(&logid->lgl_oi, seq);
+ ostid_set_id(&logid->lgl_oi, id);
+
+ start = ++end;
+ if (start - str >= len - 1)
+ RETURN(-EINVAL);
+ logid->lgl_ogen = simple_strtoul(start, &endp, 16);
+ if (*endp != '\0')
+ RETURN(-EINVAL);
+
+ RETURN(0);
+}
+
+static int llog_check_cb(const struct lu_env *env, struct llog_handle *handle,
+ struct llog_rec_hdr *rec, void *data)
+{
+ struct obd_ioctl_data *ioc_data = (struct obd_ioctl_data *)data;
+ static int l, remains, from, to;
+ static char *out;
+ char *endp;
+ int cur_index, rc = 0;
+
+ ENTRY;
+
+ if (ioc_data && ioc_data->ioc_inllen1 > 0) {
+ l = 0;
+ remains = ioc_data->ioc_inllen4 +
+ cfs_size_round(ioc_data->ioc_inllen1) +
+ cfs_size_round(ioc_data->ioc_inllen2) +
+ cfs_size_round(ioc_data->ioc_inllen3);
+ from = simple_strtol(ioc_data->ioc_inlbuf2, &endp, 0);
+ if (*endp != '\0')
+ RETURN(-EINVAL);
+ to = simple_strtol(ioc_data->ioc_inlbuf3, &endp, 0);
+ if (*endp != '\0')
+ RETURN(-EINVAL);
+ ioc_data->ioc_inllen1 = 0;
+ out = ioc_data->ioc_bulk;
+ }
+
+ cur_index = rec->lrh_index;
+ if (cur_index < from)
+ RETURN(0);
+ if (to > 0 && cur_index > to)
+ RETURN(-LLOG_EEMPTY);
+
+ if (handle->lgh_hdr->llh_flags & LLOG_F_IS_CAT) {
+ struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
+ struct llog_handle *loghandle;
+
+ if (rec->lrh_type != LLOG_LOGID_MAGIC) {
+ l = snprintf(out, remains, "[index]: %05d [type]: "
+ "%02x [len]: %04d failed\n",
+ cur_index, rec->lrh_type,
+ rec->lrh_len);
+ }
+ if (handle->lgh_ctxt == NULL)
+ RETURN(-EOPNOTSUPP);
+ rc = llog_cat_id2handle(env, handle, &loghandle, &lir->lid_id);
+ if (rc) {
+ CDEBUG(D_IOCTL, "cannot find log #"DOSTID"#%08x\n",
+ POSTID(&lir->lid_id.lgl_oi),
+ lir->lid_id.lgl_ogen);
+ RETURN(rc);
+ }
+ rc = llog_process(env, loghandle, llog_check_cb, NULL, NULL);
+ llog_handle_put(loghandle);
+ } else {
+ bool ok;
+
+ switch (rec->lrh_type) {
+ case OST_SZ_REC:
+ case MDS_UNLINK_REC:
+ case MDS_UNLINK64_REC:
+ case MDS_SETATTR64_REC:
+ case OBD_CFG_REC:
+ case LLOG_GEN_REC:
+ case LLOG_HDR_MAGIC:
+ ok = true;
+ break;
+ default:
+ ok = false;
+ }
+
+ l = snprintf(out, remains, "[index]: %05d [type]: "
+ "%02x [len]: %04d %s\n",
+ cur_index, rec->lrh_type, rec->lrh_len,
+ ok ? "ok" : "failed");
+ out += l;
+ remains -= l;
+ if (remains <= 0) {
+ CERROR("%s: no space to print log records\n",
+ handle->lgh_ctxt->loc_obd->obd_name);
+ RETURN(-LLOG_EEMPTY);
+ }
+ }
+ RETURN(rc);
+}
+
+static int llog_print_cb(const struct lu_env *env, struct llog_handle *handle,
+ struct llog_rec_hdr *rec, void *data)
+{
+ struct obd_ioctl_data *ioc_data = (struct obd_ioctl_data *)data;
+ static int l, remains, from, to;
+ static char *out;
+ char *endp;
+ int cur_index;
+
+ ENTRY;
+ if (ioc_data != NULL && ioc_data->ioc_inllen1 > 0) {
+ l = 0;
+ remains = ioc_data->ioc_inllen4 +
+ cfs_size_round(ioc_data->ioc_inllen1) +
+ cfs_size_round(ioc_data->ioc_inllen2) +
+ cfs_size_round(ioc_data->ioc_inllen3);
+ from = simple_strtol(ioc_data->ioc_inlbuf2, &endp, 0);
+ if (*endp != '\0')
+ RETURN(-EINVAL);
+ to = simple_strtol(ioc_data->ioc_inlbuf3, &endp, 0);
+ if (*endp != '\0')
+ RETURN(-EINVAL);
+ out = ioc_data->ioc_bulk;
+ ioc_data->ioc_inllen1 = 0;
+ }
+
+ cur_index = rec->lrh_index;
+ if (cur_index < from)
+ RETURN(0);
+ if (to > 0 && cur_index > to)
+ RETURN(-LLOG_EEMPTY);
+
+ if (handle->lgh_hdr->llh_flags & LLOG_F_IS_CAT) {
+ struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
+
+ if (rec->lrh_type != LLOG_LOGID_MAGIC) {
+ CERROR("invalid record in catalog\n");
+ RETURN(-EINVAL);
+ }
+
+ l = snprintf(out, remains,
+ "[index]: %05d [logid]: #"DOSTID"#%08x\n",
+ cur_index, POSTID(&lir->lid_id.lgl_oi),
+ lir->lid_id.lgl_ogen);
+ } else if (rec->lrh_type == OBD_CFG_REC) {
+ int rc;
+
+ rc = class_config_parse_rec(rec, out, remains);
+ if (rc < 0)
+ RETURN(rc);
+ l = rc;
+ } else {
+ l = snprintf(out, remains,
+ "[index]: %05d [type]: %02x [len]: %04d\n",
+ cur_index, rec->lrh_type, rec->lrh_len);
+ }
+ out += l;
+ remains -= l;
+ if (remains <= 0) {
+ CERROR("not enough space for print log records\n");
+ RETURN(-LLOG_EEMPTY);
+ }
+
+ RETURN(0);
+}
+static int llog_remove_log(const struct lu_env *env, struct llog_handle *cat,
+ struct llog_logid *logid)
+{
+ struct llog_handle *log;
+ int rc;
+
+ ENTRY;
+
+ rc = llog_cat_id2handle(env, cat, &log, logid);
+ if (rc) {
+ CDEBUG(D_IOCTL, "cannot find log #"DOSTID"#%08x\n",
+ POSTID(&logid->lgl_oi), logid->lgl_ogen);
+ RETURN(-ENOENT);
+ }
+
+ rc = llog_destroy(env, log);
+ if (rc) {
+ CDEBUG(D_IOCTL, "cannot destroy log\n");
+ GOTO(out, rc);
+ }
+ llog_cat_cleanup(env, cat, log, log->u.phd.phd_cookie.lgc_index);
+out:
+ llog_handle_put(log);
+ RETURN(rc);
+
+}
+
+static int llog_delete_cb(const struct lu_env *env, struct llog_handle *handle,
+ struct llog_rec_hdr *rec, void *data)
+{
+ struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
+ int rc;
+
+ ENTRY;
+ if (rec->lrh_type != LLOG_LOGID_MAGIC)
+ RETURN(-EINVAL);
+ rc = llog_remove_log(env, handle, &lir->lid_id);
+
+ RETURN(rc);
+}
+
+
+int llog_ioctl(const struct lu_env *env, struct llog_ctxt *ctxt, int cmd,
+ struct obd_ioctl_data *data)
+{
+ struct llog_logid logid;
+ int rc = 0;
+ struct llog_handle *handle = NULL;
+
+ ENTRY;
+
+ if (*data->ioc_inlbuf1 == '#') {
+ rc = str2logid(&logid, data->ioc_inlbuf1, data->ioc_inllen1);
+ if (rc)
+ RETURN(rc);
+ rc = llog_open(env, ctxt, &handle, &logid, NULL,
+ LLOG_OPEN_EXISTS);
+ if (rc)
+ RETURN(rc);
+ } else if (*data->ioc_inlbuf1 == '$') {
+ char *name = data->ioc_inlbuf1 + 1;
+
+ rc = llog_open(env, ctxt, &handle, NULL, name,
+ LLOG_OPEN_EXISTS);
+ if (rc)
+ RETURN(rc);
+ } else {
+ RETURN(-EINVAL);
+ }
+
+ rc = llog_init_handle(env, handle, 0, NULL);
+ if (rc)
+ GOTO(out_close, rc = -ENOENT);
+
+ switch (cmd) {
+ case OBD_IOC_LLOG_INFO: {
+ int l;
+ int remains = data->ioc_inllen2 +
+ cfs_size_round(data->ioc_inllen1);
+ char *out = data->ioc_bulk;
+
+ l = snprintf(out, remains,
+ "logid: #"DOSTID"#%08x\n"
+ "flags: %x (%s)\n"
+ "records count: %d\n"
+ "last index: %d\n",
+ POSTID(&handle->lgh_id.lgl_oi),
+ handle->lgh_id.lgl_ogen,
+ handle->lgh_hdr->llh_flags,
+ handle->lgh_hdr->llh_flags &
+ LLOG_F_IS_CAT ? "cat" : "plain",
+ handle->lgh_hdr->llh_count,
+ handle->lgh_last_idx);
+ out += l;
+ remains -= l;
+ if (remains <= 0) {
+ CERROR("%s: not enough space for log header info\n",
+ ctxt->loc_obd->obd_name);
+ rc = -ENOSPC;
+ }
+ break;
+ }
+ case OBD_IOC_LLOG_CHECK:
+ LASSERT(data->ioc_inllen1 > 0);
+ rc = llog_process(env, handle, llog_check_cb, data, NULL);
+ if (rc == -LLOG_EEMPTY)
+ rc = 0;
+ else if (rc)
+ GOTO(out_close, rc);
+ break;
+ case OBD_IOC_LLOG_PRINT:
+ LASSERT(data->ioc_inllen1 > 0);
+ rc = llog_process(env, handle, llog_print_cb, data, NULL);
+ if (rc == -LLOG_EEMPTY)
+ rc = 0;
+ else if (rc)
+ GOTO(out_close, rc);
+ break;
+ case OBD_IOC_LLOG_CANCEL: {
+ struct llog_cookie cookie;
+ struct llog_logid plain;
+ char *endp;
+
+ cookie.lgc_index = simple_strtoul(data->ioc_inlbuf3, &endp, 0);
+ if (*endp != '\0')
+ GOTO(out_close, rc = -EINVAL);
+
+ if (handle->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN) {
+ rc = llog_cancel_rec(NULL, handle, cookie.lgc_index);
+ GOTO(out_close, rc);
+ } else if (!(handle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)) {
+ GOTO(out_close, rc = -EINVAL);
+ }
+
+ if (data->ioc_inlbuf2 == NULL) /* catalog but no logid */
+ GOTO(out_close, rc = -ENOTTY);
+
+ rc = str2logid(&plain, data->ioc_inlbuf2, data->ioc_inllen2);
+ if (rc)
+ GOTO(out_close, rc);
+ cookie.lgc_lgl = plain;
+ rc = llog_cat_cancel_records(env, handle, 1, &cookie);
+ if (rc)
+ GOTO(out_close, rc);
+ break;
+ }
+ case OBD_IOC_LLOG_REMOVE: {
+ struct llog_logid plain;
+
+ if (handle->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN) {
+ rc = llog_destroy(env, handle);
+ GOTO(out_close, rc);
+ } else if (!(handle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)) {
+ GOTO(out_close, rc = -EINVAL);
+ }
+
+ if (data->ioc_inlbuf2 > 0) {
+ /* remove indicate log from the catalog */
+ rc = str2logid(&plain, data->ioc_inlbuf2,
+ data->ioc_inllen2);
+ if (rc)
+ GOTO(out_close, rc);
+ rc = llog_remove_log(env, handle, &plain);
+ } else {
+ /* remove all the log of the catalog */
+ rc = llog_process(env, handle, llog_delete_cb, NULL,
+ NULL);
+ if (rc)
+ GOTO(out_close, rc);
+ }
+ break;
+ }
+ default:
+ CERROR("%s: Unknown ioctl cmd %#x\n",
+ ctxt->loc_obd->obd_name, cmd);
+ GOTO(out_close, rc = -ENOTTY);
+ }
+
+out_close:
+ if (handle->lgh_hdr &&
+ handle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)
+ llog_cat_close(env, handle);
+ else
+ llog_close(env, handle);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_ioctl);
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_lvfs.c b/drivers/staging/lustre/lustre/obdclass/llog_lvfs.c
new file mode 100644
index 000000000000..7e12dc62141f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/llog_lvfs.c
@@ -0,0 +1,862 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/llog_lvfs.c
+ *
+ * OST<->MDS recovery logging infrastructure.
+ * Invariants in implementation:
+ * - we do not share logs among different OST<->MDS connections, so that
+ * if an OST or MDS fails it need only look at log(s) relevant to itself
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_log.h>
+#include <obd_ost.h>
+#include <linux/list.h>
+#include <lvfs.h>
+#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
+#include "llog_internal.h"
+
+#if defined(LLOG_LVFS)
+
+static int llog_lvfs_pad(struct obd_device *obd, struct l_file *file,
+ int len, int index)
+{
+ struct llog_rec_hdr rec = { 0 };
+ struct llog_rec_tail tail;
+ int rc;
+ ENTRY;
+
+ LASSERT(len >= LLOG_MIN_REC_SIZE && (len & 0x7) == 0);
+
+ tail.lrt_len = rec.lrh_len = len;
+ tail.lrt_index = rec.lrh_index = index;
+ rec.lrh_type = LLOG_PAD_MAGIC;
+
+ rc = fsfilt_write_record(obd, file, &rec, sizeof(rec), &file->f_pos, 0);
+ if (rc) {
+ CERROR("error writing padding record: rc %d\n", rc);
+ goto out;
+ }
+
+ file->f_pos += len - sizeof(rec) - sizeof(tail);
+ rc = fsfilt_write_record(obd, file, &tail, sizeof(tail),&file->f_pos,0);
+ if (rc) {
+ CERROR("error writing padding record: rc %d\n", rc);
+ goto out;
+ }
+
+ out:
+ RETURN(rc);
+}
+
+static int llog_lvfs_write_blob(struct obd_device *obd, struct l_file *file,
+ struct llog_rec_hdr *rec, void *buf, loff_t off)
+{
+ int rc;
+ struct llog_rec_tail end;
+ loff_t saved_off = file->f_pos;
+ int buflen = rec->lrh_len;
+
+ ENTRY;
+
+ file->f_pos = off;
+
+ if (buflen == 0)
+ CWARN("0-length record\n");
+
+ if (!buf) {
+ rc = fsfilt_write_record(obd, file, rec, buflen,&file->f_pos,0);
+ if (rc) {
+ CERROR("error writing log record: rc %d\n", rc);
+ goto out;
+ }
+ GOTO(out, rc = 0);
+ }
+
+ /* the buf case */
+ rec->lrh_len = sizeof(*rec) + buflen + sizeof(end);
+ rc = fsfilt_write_record(obd, file, rec, sizeof(*rec), &file->f_pos, 0);
+ if (rc) {
+ CERROR("error writing log hdr: rc %d\n", rc);
+ goto out;
+ }
+
+ rc = fsfilt_write_record(obd, file, buf, buflen, &file->f_pos, 0);
+ if (rc) {
+ CERROR("error writing log buffer: rc %d\n", rc);
+ goto out;
+ }
+
+ end.lrt_len = rec->lrh_len;
+ end.lrt_index = rec->lrh_index;
+ rc = fsfilt_write_record(obd, file, &end, sizeof(end), &file->f_pos, 0);
+ if (rc) {
+ CERROR("error writing log tail: rc %d\n", rc);
+ goto out;
+ }
+
+ rc = 0;
+ out:
+ if (saved_off > file->f_pos)
+ file->f_pos = saved_off;
+ LASSERT(rc <= 0);
+ RETURN(rc);
+}
+
+static int llog_lvfs_read_blob(struct obd_device *obd, struct l_file *file,
+ void *buf, int size, loff_t off)
+{
+ loff_t offset = off;
+ int rc;
+ ENTRY;
+
+ rc = fsfilt_read_record(obd, file, buf, size, &offset);
+ if (rc) {
+ CERROR("error reading log record: rc %d\n", rc);
+ RETURN(rc);
+ }
+ RETURN(0);
+}
+
+static int llog_lvfs_read_header(const struct lu_env *env,
+ struct llog_handle *handle)
+{
+ struct obd_device *obd;
+ int rc;
+ ENTRY;
+
+ LASSERT(sizeof(*handle->lgh_hdr) == LLOG_CHUNK_SIZE);
+
+ obd = handle->lgh_ctxt->loc_exp->exp_obd;
+
+ if (i_size_read(handle->lgh_file->f_dentry->d_inode) == 0) {
+ CDEBUG(D_HA, "not reading header from 0-byte log\n");
+ RETURN(LLOG_EEMPTY);
+ }
+
+ rc = llog_lvfs_read_blob(obd, handle->lgh_file, handle->lgh_hdr,
+ LLOG_CHUNK_SIZE, 0);
+ if (rc) {
+ CERROR("error reading log header from %.*s\n",
+ handle->lgh_file->f_dentry->d_name.len,
+ handle->lgh_file->f_dentry->d_name.name);
+ } else {
+ struct llog_rec_hdr *llh_hdr = &handle->lgh_hdr->llh_hdr;
+
+ if (LLOG_REC_HDR_NEEDS_SWABBING(llh_hdr))
+ lustre_swab_llog_hdr(handle->lgh_hdr);
+
+ if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) {
+ CERROR("bad log %.*s header magic: %#x (expected %#x)\n",
+ handle->lgh_file->f_dentry->d_name.len,
+ handle->lgh_file->f_dentry->d_name.name,
+ llh_hdr->lrh_type, LLOG_HDR_MAGIC);
+ rc = -EIO;
+ } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) {
+ CERROR("incorrectly sized log %.*s header: %#x "
+ "(expected %#x)\n",
+ handle->lgh_file->f_dentry->d_name.len,
+ handle->lgh_file->f_dentry->d_name.name,
+ llh_hdr->lrh_len, LLOG_CHUNK_SIZE);
+ CERROR("you may need to re-run lconf --write_conf.\n");
+ rc = -EIO;
+ }
+ }
+
+ handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index;
+ handle->lgh_file->f_pos = i_size_read(handle->lgh_file->f_dentry->d_inode);
+
+ RETURN(rc);
+}
+
+/* returns negative in on error; 0 if success && reccookie == 0; 1 otherwise */
+/* appends if idx == -1, otherwise overwrites record idx. */
+static int llog_lvfs_write_rec(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ struct llog_rec_hdr *rec,
+ struct llog_cookie *reccookie, int cookiecount,
+ void *buf, int idx, struct thandle *th)
+{
+ struct llog_log_hdr *llh;
+ int reclen = rec->lrh_len, index, rc;
+ struct llog_rec_tail *lrt;
+ struct obd_device *obd;
+ struct file *file;
+ size_t left;
+ ENTRY;
+
+ llh = loghandle->lgh_hdr;
+ file = loghandle->lgh_file;
+ obd = loghandle->lgh_ctxt->loc_exp->exp_obd;
+
+ /* record length should not bigger than LLOG_CHUNK_SIZE */
+ if (buf)
+ rc = (reclen > LLOG_CHUNK_SIZE - sizeof(struct llog_rec_hdr) -
+ sizeof(struct llog_rec_tail)) ? -E2BIG : 0;
+ else
+ rc = (reclen > LLOG_CHUNK_SIZE) ? -E2BIG : 0;
+ if (rc)
+ RETURN(rc);
+
+ if (buf)
+ /* write_blob adds header and tail to lrh_len. */
+ reclen = sizeof(*rec) + rec->lrh_len +
+ sizeof(struct llog_rec_tail);
+
+ if (idx != -1) {
+ loff_t saved_offset;
+
+ /* no header: only allowed to insert record 1 */
+ if (idx != 1 && !i_size_read(file->f_dentry->d_inode)) {
+ CERROR("idx != -1 in empty log\n");
+ LBUG();
+ }
+
+ if (idx && llh->llh_size && llh->llh_size != rec->lrh_len)
+ RETURN(-EINVAL);
+
+ if (!ext2_test_bit(idx, llh->llh_bitmap))
+ CERROR("Modify unset record %u\n", idx);
+ if (idx != rec->lrh_index)
+ CERROR("Index mismatch %d %u\n", idx, rec->lrh_index);
+
+ rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0);
+ /* we are done if we only write the header or on error */
+ if (rc || idx == 0)
+ RETURN(rc);
+
+ if (buf) {
+ /* We assume that caller has set lgh_cur_* */
+ saved_offset = loghandle->lgh_cur_offset;
+ CDEBUG(D_OTHER,
+ "modify record "DOSTID": idx:%d/%u/%d, len:%u "
+ "offset %llu\n",
+ POSTID(&loghandle->lgh_id.lgl_oi), idx, rec->lrh_index,
+ loghandle->lgh_cur_idx, rec->lrh_len,
+ (long long)(saved_offset - sizeof(*llh)));
+ if (rec->lrh_index != loghandle->lgh_cur_idx) {
+ CERROR("modify idx mismatch %u/%d\n",
+ idx, loghandle->lgh_cur_idx);
+ RETURN(-EFAULT);
+ }
+ } else {
+ /* Assumes constant lrh_len */
+ saved_offset = sizeof(*llh) + (idx - 1) * reclen;
+ }
+
+ rc = llog_lvfs_write_blob(obd, file, rec, buf, saved_offset);
+ if (rc == 0 && reccookie) {
+ reccookie->lgc_lgl = loghandle->lgh_id;
+ reccookie->lgc_index = idx;
+ rc = 1;
+ }
+ RETURN(rc);
+ }
+
+ /* Make sure that records don't cross a chunk boundary, so we can
+ * process them page-at-a-time if needed. If it will cross a chunk
+ * boundary, write in a fake (but referenced) entry to pad the chunk.
+ *
+ * We know that llog_current_log() will return a loghandle that is
+ * big enough to hold reclen, so all we care about is padding here.
+ */
+ left = LLOG_CHUNK_SIZE - (file->f_pos & (LLOG_CHUNK_SIZE - 1));
+
+ /* NOTE: padding is a record, but no bit is set */
+ if (left != 0 && left != reclen &&
+ left < (reclen + LLOG_MIN_REC_SIZE)) {
+ index = loghandle->lgh_last_idx + 1;
+ rc = llog_lvfs_pad(obd, file, left, index);
+ if (rc)
+ RETURN(rc);
+ loghandle->lgh_last_idx++; /*for pad rec*/
+ }
+ /* if it's the last idx in log file, then return -ENOSPC */
+ if (loghandle->lgh_last_idx >= LLOG_BITMAP_SIZE(llh) - 1)
+ RETURN(-ENOSPC);
+ loghandle->lgh_last_idx++;
+ index = loghandle->lgh_last_idx;
+ LASSERT(index < LLOG_BITMAP_SIZE(llh));
+ rec->lrh_index = index;
+ if (buf == NULL) {
+ lrt = (struct llog_rec_tail *)
+ ((char *)rec + rec->lrh_len - sizeof(*lrt));
+ lrt->lrt_len = rec->lrh_len;
+ lrt->lrt_index = rec->lrh_index;
+ }
+ /*The caller should make sure only 1 process access the lgh_last_idx,
+ *Otherwise it might hit the assert.*/
+ LASSERT(index < LLOG_BITMAP_SIZE(llh));
+ spin_lock(&loghandle->lgh_hdr_lock);
+ if (ext2_set_bit(index, llh->llh_bitmap)) {
+ CERROR("argh, index %u already set in log bitmap?\n", index);
+ spin_unlock(&loghandle->lgh_hdr_lock);
+ LBUG(); /* should never happen */
+ }
+ llh->llh_count++;
+ spin_unlock(&loghandle->lgh_hdr_lock);
+ llh->llh_tail.lrt_index = index;
+
+ rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0);
+ if (rc)
+ RETURN(rc);
+
+ rc = llog_lvfs_write_blob(obd, file, rec, buf, file->f_pos);
+ if (rc)
+ RETURN(rc);
+
+ CDEBUG(D_RPCTRACE, "added record "DOSTID": idx: %u, %u \n",
+ POSTID(&loghandle->lgh_id.lgl_oi), index, rec->lrh_len);
+ if (rc == 0 && reccookie) {
+ reccookie->lgc_lgl = loghandle->lgh_id;
+ reccookie->lgc_index = index;
+ if ((rec->lrh_type == MDS_UNLINK_REC) ||
+ (rec->lrh_type == MDS_SETATTR64_REC))
+ reccookie->lgc_subsys = LLOG_MDS_OST_ORIG_CTXT;
+ else if (rec->lrh_type == OST_SZ_REC)
+ reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT;
+ else
+ reccookie->lgc_subsys = -1;
+ rc = 1;
+ }
+ if (rc == 0 && rec->lrh_type == LLOG_GEN_REC)
+ rc = 1;
+
+ RETURN(rc);
+}
+
+/* We can skip reading at least as many log blocks as the number of
+* minimum sized log records we are skipping. If it turns out
+* that we are not far enough along the log (because the
+* actual records are larger than minimum size) we just skip
+* some more records. */
+
+static void llog_skip_over(__u64 *off, int curr, int goal)
+{
+ if (goal <= curr)
+ return;
+ *off = (*off + (goal-curr-1) * LLOG_MIN_REC_SIZE) &
+ ~(LLOG_CHUNK_SIZE - 1);
+}
+
+
+/* sets:
+ * - cur_offset to the furthest point read in the log file
+ * - cur_idx to the log index preceeding cur_offset
+ * returns -EIO/-EINVAL on error
+ */
+static int llog_lvfs_next_block(const struct lu_env *env,
+ struct llog_handle *loghandle, int *cur_idx,
+ int next_idx, __u64 *cur_offset, void *buf,
+ int len)
+{
+ int rc;
+ ENTRY;
+
+ if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
+ RETURN(-EINVAL);
+
+ CDEBUG(D_OTHER, "looking for log index %u (cur idx %u off "LPU64")\n",
+ next_idx, *cur_idx, *cur_offset);
+
+ while (*cur_offset < i_size_read(loghandle->lgh_file->f_dentry->d_inode)) {
+ struct llog_rec_hdr *rec, *last_rec;
+ struct llog_rec_tail *tail;
+ loff_t ppos;
+ int llen;
+
+ llog_skip_over(cur_offset, *cur_idx, next_idx);
+
+ /* read up to next LLOG_CHUNK_SIZE block */
+ ppos = *cur_offset;
+ llen = LLOG_CHUNK_SIZE - (*cur_offset & (LLOG_CHUNK_SIZE - 1));
+ rc = fsfilt_read_record(loghandle->lgh_ctxt->loc_exp->exp_obd,
+ loghandle->lgh_file, buf, llen,
+ cur_offset);
+ if (rc < 0) {
+ CERROR("Cant read llog block at log id "DOSTID
+ "/%u offset "LPU64"\n",
+ POSTID(&loghandle->lgh_id.lgl_oi),
+ loghandle->lgh_id.lgl_ogen,
+ *cur_offset);
+ RETURN(rc);
+ }
+
+ /* put number of bytes read into rc to make code simpler */
+ rc = *cur_offset - ppos;
+ if (rc < len) {
+ /* signal the end of the valid buffer to llog_process */
+ memset(buf + rc, 0, len - rc);
+ }
+
+ if (rc == 0) /* end of file, nothing to do */
+ RETURN(0);
+
+ if (rc < sizeof(*tail)) {
+ CERROR("Invalid llog block at log id "DOSTID"/%u offset"
+ LPU64"\n", POSTID(&loghandle->lgh_id.lgl_oi),
+ loghandle->lgh_id.lgl_ogen, *cur_offset);
+ RETURN(-EINVAL);
+ }
+
+ rec = buf;
+ if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
+ lustre_swab_llog_rec(rec);
+
+ tail = (struct llog_rec_tail *)(buf + rc -
+ sizeof(struct llog_rec_tail));
+
+ /* get the last record in block */
+ last_rec = (struct llog_rec_hdr *)(buf + rc -
+ le32_to_cpu(tail->lrt_len));
+
+ if (LLOG_REC_HDR_NEEDS_SWABBING(last_rec))
+ lustre_swab_llog_rec(last_rec);
+ LASSERT(last_rec->lrh_index == tail->lrt_index);
+
+ *cur_idx = tail->lrt_index;
+
+ /* this shouldn't happen */
+ if (tail->lrt_index == 0) {
+ CERROR("Invalid llog tail at log id "DOSTID"/%u offset "
+ LPU64"\n", POSTID(&loghandle->lgh_id.lgl_oi),
+ loghandle->lgh_id.lgl_ogen, *cur_offset);
+ RETURN(-EINVAL);
+ }
+ if (tail->lrt_index < next_idx)
+ continue;
+
+ /* sanity check that the start of the new buffer is no farther
+ * than the record that we wanted. This shouldn't happen. */
+ if (rec->lrh_index > next_idx) {
+ CERROR("missed desired record? %u > %u\n",
+ rec->lrh_index, next_idx);
+ RETURN(-ENOENT);
+ }
+ RETURN(0);
+ }
+ RETURN(-EIO);
+}
+
+static int llog_lvfs_prev_block(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ int prev_idx, void *buf, int len)
+{
+ __u64 cur_offset;
+ int rc;
+ ENTRY;
+
+ if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
+ RETURN(-EINVAL);
+
+ CDEBUG(D_OTHER, "looking for log index %u\n", prev_idx);
+
+ cur_offset = LLOG_CHUNK_SIZE;
+ llog_skip_over(&cur_offset, 0, prev_idx);
+
+ while (cur_offset < i_size_read(loghandle->lgh_file->f_dentry->d_inode)) {
+ struct llog_rec_hdr *rec, *last_rec;
+ struct llog_rec_tail *tail;
+ loff_t ppos = cur_offset;
+
+ rc = fsfilt_read_record(loghandle->lgh_ctxt->loc_exp->exp_obd,
+ loghandle->lgh_file, buf, len,
+ &cur_offset);
+ if (rc < 0) {
+ CERROR("Cant read llog block at log id "DOSTID
+ "/%u offset "LPU64"\n",
+ POSTID(&loghandle->lgh_id.lgl_oi),
+ loghandle->lgh_id.lgl_ogen,
+ cur_offset);
+ RETURN(rc);
+ }
+
+ /* put number of bytes read into rc to make code simpler */
+ rc = cur_offset - ppos;
+
+ if (rc == 0) /* end of file, nothing to do */
+ RETURN(0);
+
+ if (rc < sizeof(*tail)) {
+ CERROR("Invalid llog block at log id "DOSTID"/%u offset"
+ LPU64"\n", POSTID(&loghandle->lgh_id.lgl_oi),
+ loghandle->lgh_id.lgl_ogen, cur_offset);
+ RETURN(-EINVAL);
+ }
+
+ rec = buf;
+ if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
+ lustre_swab_llog_rec(rec);
+
+ tail = (struct llog_rec_tail *)(buf + rc -
+ sizeof(struct llog_rec_tail));
+
+ /* get the last record in block */
+ last_rec = (struct llog_rec_hdr *)(buf + rc -
+ le32_to_cpu(tail->lrt_len));
+
+ if (LLOG_REC_HDR_NEEDS_SWABBING(last_rec))
+ lustre_swab_llog_rec(last_rec);
+ LASSERT(last_rec->lrh_index == tail->lrt_index);
+
+ /* this shouldn't happen */
+ if (tail->lrt_index == 0) {
+ CERROR("Invalid llog tail at log id "DOSTID"/%u offset"
+ LPU64"\n", POSTID(&loghandle->lgh_id.lgl_oi),
+ loghandle->lgh_id.lgl_ogen, cur_offset);
+ RETURN(-EINVAL);
+ }
+ if (tail->lrt_index < prev_idx)
+ continue;
+
+ /* sanity check that the start of the new buffer is no farther
+ * than the record that we wanted. This shouldn't happen. */
+ if (rec->lrh_index > prev_idx) {
+ CERROR("missed desired record? %u > %u\n",
+ rec->lrh_index, prev_idx);
+ RETURN(-ENOENT);
+ }
+ RETURN(0);
+ }
+ RETURN(-EIO);
+}
+
+static struct file *llog_filp_open(char *dir, char *name, int flags, int mode)
+{
+ char *logname;
+ struct file *filp;
+ int len;
+
+ OBD_ALLOC(logname, PATH_MAX);
+ if (logname == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ len = snprintf(logname, PATH_MAX, "%s/%s", dir, name);
+ if (len >= PATH_MAX - 1) {
+ filp = ERR_PTR(-ENAMETOOLONG);
+ } else {
+ filp = l_filp_open(logname, flags, mode);
+ if (IS_ERR(filp) && PTR_ERR(filp) != -ENOENT)
+ CERROR("logfile creation %s: %ld\n", logname,
+ PTR_ERR(filp));
+ }
+ OBD_FREE(logname, PATH_MAX);
+ return filp;
+}
+
+static int llog_lvfs_open(const struct lu_env *env, struct llog_handle *handle,
+ struct llog_logid *logid, char *name,
+ enum llog_open_param open_param)
+{
+ struct llog_ctxt *ctxt = handle->lgh_ctxt;
+ struct l_dentry *dchild = NULL;
+ struct obd_device *obd;
+ int rc = 0;
+
+ ENTRY;
+
+ LASSERT(ctxt);
+ LASSERT(ctxt->loc_exp);
+ LASSERT(ctxt->loc_exp->exp_obd);
+ obd = ctxt->loc_exp->exp_obd;
+
+ LASSERT(handle);
+ if (logid != NULL) {
+ dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, &logid->lgl_oi,
+ logid->lgl_ogen);
+ if (IS_ERR(dchild)) {
+ rc = PTR_ERR(dchild);
+ CERROR("%s: error looking up logfile #"DOSTID "#%08x:"
+ " rc = %d\n", ctxt->loc_obd->obd_name,
+ POSTID(&logid->lgl_oi), logid->lgl_ogen, rc);
+ GOTO(out, rc);
+ }
+ if (dchild->d_inode == NULL) {
+ l_dput(dchild);
+ rc = -ENOENT;
+ CERROR("%s: nonexistent llog #"DOSTID"#%08x:"
+ "rc = %d\n", ctxt->loc_obd->obd_name,
+ POSTID(&logid->lgl_oi), logid->lgl_ogen, rc);
+ GOTO(out, rc);
+ }
+ handle->lgh_file = l_dentry_open(&obd->obd_lvfs_ctxt, dchild,
+ O_RDWR | O_LARGEFILE);
+ l_dput(dchild);
+ if (IS_ERR(handle->lgh_file)) {
+ rc = PTR_ERR(handle->lgh_file);
+ handle->lgh_file = NULL;
+ CERROR("%s: error opening llog #"DOSTID"#%08x:"
+ "rc = %d\n", ctxt->loc_obd->obd_name,
+ POSTID(&logid->lgl_oi), logid->lgl_ogen, rc);
+ GOTO(out, rc);
+ }
+ handle->lgh_id = *logid;
+ } else if (name) {
+ handle->lgh_file = llog_filp_open(MOUNT_CONFIGS_DIR, name,
+ O_RDWR | O_LARGEFILE, 0644);
+ if (IS_ERR(handle->lgh_file)) {
+ rc = PTR_ERR(handle->lgh_file);
+ handle->lgh_file = NULL;
+ if (rc == -ENOENT && open_param == LLOG_OPEN_NEW) {
+ OBD_ALLOC(handle->lgh_name, strlen(name) + 1);
+ if (handle->lgh_name)
+ strcpy(handle->lgh_name, name);
+ else
+ GOTO(out, rc = -ENOMEM);
+ rc = 0;
+ } else {
+ GOTO(out, rc);
+ }
+ } else {
+ lustre_build_llog_lvfs_oid(&handle->lgh_id,
+ handle->lgh_file->f_dentry->d_inode->i_ino,
+ handle->lgh_file->f_dentry->d_inode->i_generation);
+ }
+ } else {
+ LASSERTF(open_param == LLOG_OPEN_NEW, "%#x\n", open_param);
+ handle->lgh_file = NULL;
+ }
+
+ /* No new llog is expected but doesn't exist */
+ if (open_param != LLOG_OPEN_NEW && handle->lgh_file == NULL)
+ GOTO(out_name, rc = -ENOENT);
+
+ RETURN(0);
+out_name:
+ if (handle->lgh_name != NULL)
+ OBD_FREE(handle->lgh_name, strlen(name) + 1);
+out:
+ RETURN(rc);
+}
+
+static int llog_lvfs_exist(struct llog_handle *handle)
+{
+ return (handle->lgh_file != NULL);
+}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+static int llog_lvfs_create(const struct lu_env *env,
+ struct llog_handle *handle,
+ struct thandle *th)
+{
+ struct llog_ctxt *ctxt = handle->lgh_ctxt;
+ struct obd_device *obd;
+ struct l_dentry *dchild = NULL;
+ struct file *file;
+ struct obdo *oa = NULL;
+ int rc = 0;
+ int open_flags = O_RDWR | O_CREAT | O_LARGEFILE;
+
+ ENTRY;
+
+ LASSERT(ctxt);
+ LASSERT(ctxt->loc_exp);
+ obd = ctxt->loc_exp->exp_obd;
+ LASSERT(handle->lgh_file == NULL);
+
+ if (handle->lgh_name) {
+ file = llog_filp_open(MOUNT_CONFIGS_DIR, handle->lgh_name,
+ open_flags, 0644);
+ if (IS_ERR(file))
+ RETURN(PTR_ERR(file));
+
+ lustre_build_llog_lvfs_oid(&handle->lgh_id,
+ file->f_dentry->d_inode->i_ino,
+ file->f_dentry->d_inode->i_generation);
+ handle->lgh_file = file;
+ } else {
+ OBDO_ALLOC(oa);
+ if (oa == NULL)
+ RETURN(-ENOMEM);
+
+ ostid_set_seq_llog(&oa->o_oi);
+ oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLGROUP;
+
+ rc = obd_create(NULL, ctxt->loc_exp, oa, NULL, NULL);
+ if (rc)
+ GOTO(out, rc);
+
+ /* FIXME: rationalize the misuse of o_generation in
+ * this API along with mds_obd_{create,destroy}.
+ * Hopefully it is only an internal API issue. */
+#define o_generation o_parent_oid
+ dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, &oa->o_oi,
+ oa->o_generation);
+ if (IS_ERR(dchild))
+ GOTO(out, rc = PTR_ERR(dchild));
+
+ file = l_dentry_open(&obd->obd_lvfs_ctxt, dchild, open_flags);
+ l_dput(dchild);
+ if (IS_ERR(file))
+ GOTO(out, rc = PTR_ERR(file));
+ handle->lgh_id.lgl_oi = oa->o_oi;
+ handle->lgh_id.lgl_ogen = oa->o_generation;
+ handle->lgh_file = file;
+out:
+ OBDO_FREE(oa);
+ }
+ RETURN(rc);
+}
+
+static int llog_lvfs_close(const struct lu_env *env,
+ struct llog_handle *handle)
+{
+ int rc;
+
+ ENTRY;
+
+ if (handle->lgh_file == NULL)
+ RETURN(0);
+ rc = filp_close(handle->lgh_file, 0);
+ if (rc)
+ CERROR("%s: error closing llog #"DOSTID"#%08x: "
+ "rc = %d\n", handle->lgh_ctxt->loc_obd->obd_name,
+ POSTID(&handle->lgh_id.lgl_oi),
+ handle->lgh_id.lgl_ogen, rc);
+ handle->lgh_file = NULL;
+ if (handle->lgh_name) {
+ OBD_FREE(handle->lgh_name, strlen(handle->lgh_name) + 1);
+ handle->lgh_name = NULL;
+ }
+ RETURN(rc);
+}
+
+static int llog_lvfs_destroy(const struct lu_env *env,
+ struct llog_handle *handle)
+{
+ struct dentry *fdentry;
+ struct obdo *oa;
+ struct obd_device *obd = handle->lgh_ctxt->loc_exp->exp_obd;
+ char *dir;
+ void *th;
+ struct inode *inode;
+ int rc, rc1;
+ ENTRY;
+
+ dir = MOUNT_CONFIGS_DIR;
+
+ LASSERT(handle->lgh_file);
+ fdentry = handle->lgh_file->f_dentry;
+ inode = fdentry->d_parent->d_inode;
+ if (strcmp(fdentry->d_parent->d_name.name, dir) == 0) {
+ struct lvfs_run_ctxt saved;
+ struct vfsmount *mnt = mntget(handle->lgh_file->f_vfsmnt);
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ dget(fdentry);
+ rc = llog_lvfs_close(env, handle);
+ if (rc == 0) {
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+ rc = ll_vfs_unlink(inode, fdentry, mnt);
+ mutex_unlock(&inode->i_mutex);
+ }
+ mntput(mnt);
+
+ dput(fdentry);
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ RETURN(rc);
+ }
+
+ OBDO_ALLOC(oa);
+ if (oa == NULL)
+ RETURN(-ENOMEM);
+
+ oa->o_oi = handle->lgh_id.lgl_oi;
+ oa->o_generation = handle->lgh_id.lgl_ogen;
+#undef o_generation
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLGENER;
+
+ rc = llog_lvfs_close(env, handle);
+ if (rc)
+ GOTO(out, rc);
+
+ th = fsfilt_start_log(obd, inode, FSFILT_OP_UNLINK, NULL, 1);
+ if (IS_ERR(th)) {
+ CERROR("fsfilt_start failed: %ld\n", PTR_ERR(th));
+ GOTO(out, rc = PTR_ERR(th));
+ }
+
+ rc = obd_destroy(NULL, handle->lgh_ctxt->loc_exp, oa,
+ NULL, NULL, NULL, NULL);
+
+ rc1 = fsfilt_commit(obd, inode, th, 0);
+ if (rc == 0 && rc1 != 0)
+ rc = rc1;
+ out:
+ OBDO_FREE(oa);
+ RETURN(rc);
+}
+
+static int llog_lvfs_declare_create(const struct lu_env *env,
+ struct llog_handle *res,
+ struct thandle *th)
+{
+ return 0;
+}
+
+static int llog_lvfs_declare_write_rec(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ struct llog_rec_hdr *rec,
+ int idx, struct thandle *th)
+{
+ return 0;
+}
+
+struct llog_operations llog_lvfs_ops = {
+ .lop_write_rec = llog_lvfs_write_rec,
+ .lop_next_block = llog_lvfs_next_block,
+ .lop_prev_block = llog_lvfs_prev_block,
+ .lop_read_header = llog_lvfs_read_header,
+ .lop_create = llog_lvfs_create,
+ .lop_destroy = llog_lvfs_destroy,
+ .lop_close = llog_lvfs_close,
+ .lop_open = llog_lvfs_open,
+ .lop_exist = llog_lvfs_exist,
+ .lop_declare_create = llog_lvfs_declare_create,
+ .lop_declare_write_rec = llog_lvfs_declare_write_rec,
+};
+EXPORT_SYMBOL(llog_lvfs_ops);
+#else /* !__KERNEL__ */
+struct llog_operations llog_lvfs_ops = {};
+#endif
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_obd.c b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
new file mode 100644
index 000000000000..7e2290796315
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
@@ -0,0 +1,319 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+
+#include <obd_class.h>
+#include <lustre_log.h>
+#include "llog_internal.h"
+
+/* helper functions for calling the llog obd methods */
+static struct llog_ctxt* llog_new_ctxt(struct obd_device *obd)
+{
+ struct llog_ctxt *ctxt;
+
+ OBD_ALLOC_PTR(ctxt);
+ if (!ctxt)
+ return NULL;
+
+ ctxt->loc_obd = obd;
+ atomic_set(&ctxt->loc_refcount, 1);
+
+ return ctxt;
+}
+
+static void llog_ctxt_destroy(struct llog_ctxt *ctxt)
+{
+ if (ctxt->loc_exp) {
+ class_export_put(ctxt->loc_exp);
+ ctxt->loc_exp = NULL;
+ }
+ if (ctxt->loc_imp) {
+ class_import_put(ctxt->loc_imp);
+ ctxt->loc_imp = NULL;
+ }
+ OBD_FREE_PTR(ctxt);
+}
+
+int __llog_ctxt_put(const struct lu_env *env, struct llog_ctxt *ctxt)
+{
+ struct obd_llog_group *olg = ctxt->loc_olg;
+ struct obd_device *obd;
+ int rc = 0;
+
+ spin_lock(&olg->olg_lock);
+ if (!atomic_dec_and_test(&ctxt->loc_refcount)) {
+ spin_unlock(&olg->olg_lock);
+ return rc;
+ }
+ olg->olg_ctxts[ctxt->loc_idx] = NULL;
+ spin_unlock(&olg->olg_lock);
+
+ obd = ctxt->loc_obd;
+ spin_lock(&obd->obd_dev_lock);
+ /* sync with llog ctxt user thread */
+ spin_unlock(&obd->obd_dev_lock);
+
+ /* obd->obd_starting is needed for the case of cleanup
+ * in error case while obd is starting up. */
+ LASSERTF(obd->obd_starting == 1 ||
+ obd->obd_stopping == 1 || obd->obd_set_up == 0,
+ "wrong obd state: %d/%d/%d\n", !!obd->obd_starting,
+ !!obd->obd_stopping, !!obd->obd_set_up);
+
+ /* cleanup the llog ctxt here */
+ if (CTXTP(ctxt, cleanup))
+ rc = CTXTP(ctxt, cleanup)(env, ctxt);
+
+ llog_ctxt_destroy(ctxt);
+ wake_up(&olg->olg_waitq);
+ return rc;
+}
+EXPORT_SYMBOL(__llog_ctxt_put);
+
+int llog_cleanup(const struct lu_env *env, struct llog_ctxt *ctxt)
+{
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ struct obd_llog_group *olg;
+ int rc, idx;
+ ENTRY;
+
+ LASSERT(ctxt != NULL);
+ LASSERT(ctxt != LP_POISON);
+
+ olg = ctxt->loc_olg;
+ LASSERT(olg != NULL);
+ LASSERT(olg != LP_POISON);
+
+ idx = ctxt->loc_idx;
+
+ /*
+ * Banlance the ctxt get when calling llog_cleanup()
+ */
+ LASSERT(atomic_read(&ctxt->loc_refcount) < LI_POISON);
+ LASSERT(atomic_read(&ctxt->loc_refcount) > 1);
+ llog_ctxt_put(ctxt);
+
+ /*
+ * Try to free the ctxt.
+ */
+ rc = __llog_ctxt_put(env, ctxt);
+ if (rc)
+ CERROR("Error %d while cleaning up ctxt %p\n",
+ rc, ctxt);
+
+ l_wait_event(olg->olg_waitq,
+ llog_group_ctxt_null(olg, idx), &lwi);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cleanup);
+
+int llog_setup(const struct lu_env *env, struct obd_device *obd,
+ struct obd_llog_group *olg, int index,
+ struct obd_device *disk_obd, struct llog_operations *op)
+{
+ struct llog_ctxt *ctxt;
+ int rc = 0;
+ ENTRY;
+
+ if (index < 0 || index >= LLOG_MAX_CTXTS)
+ RETURN(-EINVAL);
+
+ LASSERT(olg != NULL);
+
+ ctxt = llog_new_ctxt(obd);
+ if (!ctxt)
+ RETURN(-ENOMEM);
+
+ ctxt->loc_obd = obd;
+ ctxt->loc_olg = olg;
+ ctxt->loc_idx = index;
+ ctxt->loc_logops = op;
+ mutex_init(&ctxt->loc_mutex);
+ ctxt->loc_exp = class_export_get(disk_obd->obd_self_export);
+ ctxt->loc_flags = LLOG_CTXT_FLAG_UNINITIALIZED;
+
+ rc = llog_group_set_ctxt(olg, ctxt, index);
+ if (rc) {
+ llog_ctxt_destroy(ctxt);
+ if (rc == -EEXIST) {
+ ctxt = llog_group_get_ctxt(olg, index);
+ if (ctxt) {
+ /*
+ * mds_lov_update_desc() might call here multiple
+ * times. So if the llog is already set up then
+ * don't to do it again.
+ */
+ CDEBUG(D_CONFIG, "obd %s ctxt %d already set up\n",
+ obd->obd_name, index);
+ LASSERT(ctxt->loc_olg == olg);
+ LASSERT(ctxt->loc_obd == obd);
+ LASSERT(ctxt->loc_exp == disk_obd->obd_self_export);
+ LASSERT(ctxt->loc_logops == op);
+ llog_ctxt_put(ctxt);
+ }
+ rc = 0;
+ }
+ RETURN(rc);
+ }
+
+ if (op->lop_setup) {
+ if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LLOG_SETUP))
+ rc = -EOPNOTSUPP;
+ else
+ rc = op->lop_setup(env, obd, olg, index, disk_obd);
+ }
+
+ if (rc) {
+ CERROR("%s: ctxt %d lop_setup=%p failed: rc = %d\n",
+ obd->obd_name, index, op->lop_setup, rc);
+ llog_group_clear_ctxt(olg, index);
+ llog_ctxt_destroy(ctxt);
+ } else {
+ CDEBUG(D_CONFIG, "obd %s ctxt %d is initialized\n",
+ obd->obd_name, index);
+ ctxt->loc_flags &= ~LLOG_CTXT_FLAG_UNINITIALIZED;
+ }
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_setup);
+
+int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp, int flags)
+{
+ int rc = 0;
+ ENTRY;
+
+ if (!ctxt)
+ RETURN(0);
+
+ if (CTXTP(ctxt, sync))
+ rc = CTXTP(ctxt, sync)(ctxt, exp, flags);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_sync);
+
+int llog_obd_add(const struct lu_env *env, struct llog_ctxt *ctxt,
+ struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
+ struct llog_cookie *logcookies, int numcookies)
+{
+ int raised, rc;
+ ENTRY;
+
+ if (!ctxt) {
+ CERROR("No ctxt\n");
+ RETURN(-ENODEV);
+ }
+
+ if (ctxt->loc_flags & LLOG_CTXT_FLAG_UNINITIALIZED)
+ RETURN(-ENXIO);
+
+ CTXT_CHECK_OP(ctxt, obd_add, -EOPNOTSUPP);
+ raised = cfs_cap_raised(CFS_CAP_SYS_RESOURCE);
+ if (!raised)
+ cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
+ rc = CTXTP(ctxt, obd_add)(env, ctxt, rec, lsm, logcookies,
+ numcookies);
+ if (!raised)
+ cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_obd_add);
+
+int llog_cancel(const struct lu_env *env, struct llog_ctxt *ctxt,
+ struct lov_stripe_md *lsm, int count,
+ struct llog_cookie *cookies, int flags)
+{
+ int rc;
+ ENTRY;
+
+ if (!ctxt) {
+ CERROR("No ctxt\n");
+ RETURN(-ENODEV);
+ }
+
+ CTXT_CHECK_OP(ctxt, cancel, -EOPNOTSUPP);
+ rc = CTXTP(ctxt, cancel)(env, ctxt, lsm, count, cookies, flags);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cancel);
+
+int obd_llog_init(struct obd_device *obd, struct obd_llog_group *olg,
+ struct obd_device *disk_obd, int *index)
+{
+ int rc;
+ ENTRY;
+ OBD_CHECK_DT_OP(obd, llog_init, 0);
+ OBD_COUNTER_INCREMENT(obd, llog_init);
+
+ rc = OBP(obd, llog_init)(obd, olg, disk_obd, index);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(obd_llog_init);
+
+int obd_llog_finish(struct obd_device *obd, int count)
+{
+ int rc;
+ ENTRY;
+ OBD_CHECK_DT_OP(obd, llog_finish, 0);
+ OBD_COUNTER_INCREMENT(obd, llog_finish);
+
+ rc = OBP(obd, llog_finish)(obd, count);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(obd_llog_finish);
+
+/* context key constructor/destructor: llog_key_init, llog_key_fini */
+LU_KEY_INIT_FINI(llog, struct llog_thread_info);
+/* context key: llog_thread_key */
+LU_CONTEXT_KEY_DEFINE(llog, LCT_MD_THREAD | LCT_MG_THREAD | LCT_LOCAL);
+LU_KEY_INIT_GENERIC(llog);
+EXPORT_SYMBOL(llog_thread_key);
+
+int llog_info_init(void)
+{
+ llog_key_init_generic(&llog_thread_key, NULL);
+ lu_context_key_register(&llog_thread_key);
+ return 0;
+}
+
+void llog_info_fini(void)
+{
+ lu_context_key_degister(&llog_thread_key);
+}
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_osd.c b/drivers/staging/lustre/lustre/obdclass/llog_osd.c
new file mode 100644
index 000000000000..6dbd21a863c2
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/llog_osd.c
@@ -0,0 +1,1323 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/llog_osd.c - low level llog routines on top of OSD API
+ *
+ * Author: Alexey Zhuravlev <alexey.zhuravlev@intel.com>
+ * Author: Mikhail Pershin <mike.pershin@intel.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_fid.h>
+#include <dt_object.h>
+
+#include "llog_internal.h"
+#include "local_storage.h"
+
+/*
+ * - multi-chunks or big-declaration approach
+ * - use unique sequence instead of llog sb tracking unique ids
+ * - re-use existing environment
+ * - named llog support (can be used for testing only at the present)
+ * - llog_origin_connect() work with OSD API
+ */
+
+static int llog_osd_declare_new_object(const struct lu_env *env,
+ struct local_oid_storage *los,
+ struct dt_object *o,
+ struct thandle *th)
+{
+ struct llog_thread_info *lgi = llog_info(env);
+
+ lgi->lgi_attr.la_valid = LA_MODE;
+ lgi->lgi_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
+ lgi->lgi_dof.dof_type = dt_mode_to_dft(S_IFREG);
+
+ return local_object_declare_create(env, los, o, &lgi->lgi_attr,
+ &lgi->lgi_dof, th);
+}
+
+static int llog_osd_create_new_object(const struct lu_env *env,
+ struct local_oid_storage *los,
+ struct dt_object *o,
+ struct thandle *th)
+{
+ struct llog_thread_info *lgi = llog_info(env);
+
+ lgi->lgi_attr.la_valid = LA_MODE;
+ lgi->lgi_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
+ lgi->lgi_dof.dof_type = dt_mode_to_dft(S_IFREG);
+
+ return local_object_create(env, los, o, &lgi->lgi_attr,
+ &lgi->lgi_dof, th);
+}
+
+static int llog_osd_pad(const struct lu_env *env, struct dt_object *o,
+ loff_t *off, int len, int index, struct thandle *th)
+{
+ struct llog_thread_info *lgi = llog_info(env);
+ int rc;
+
+ ENTRY;
+
+ LASSERT(th);
+ LASSERT(off);
+ LASSERT(len >= LLOG_MIN_REC_SIZE && (len & 0x7) == 0);
+
+ lgi->lgi_tail.lrt_len = lgi->lgi_lrh.lrh_len = len;
+ lgi->lgi_tail.lrt_index = lgi->lgi_lrh.lrh_index = index;
+ lgi->lgi_lrh.lrh_type = LLOG_PAD_MAGIC;
+
+ lgi->lgi_buf.lb_buf = &lgi->lgi_lrh;
+ lgi->lgi_buf.lb_len = sizeof(lgi->lgi_lrh);
+ dt_write_lock(env, o, 0);
+ rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
+ if (rc) {
+ CERROR("%s: error writing padding record: rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name, rc);
+ GOTO(out, rc);
+ }
+
+ lgi->lgi_buf.lb_buf = &lgi->lgi_tail;
+ lgi->lgi_buf.lb_len = sizeof(lgi->lgi_tail);
+ *off += len - sizeof(lgi->lgi_lrh) - sizeof(lgi->lgi_tail);
+ rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
+ if (rc)
+ CERROR("%s: error writing padding record: rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name, rc);
+out:
+ dt_write_unlock(env, o);
+ RETURN(rc);
+}
+
+static int llog_osd_write_blob(const struct lu_env *env, struct dt_object *o,
+ struct llog_rec_hdr *rec, void *buf,
+ loff_t *off, struct thandle *th)
+{
+ struct llog_thread_info *lgi = llog_info(env);
+ int buflen = rec->lrh_len;
+ int rc;
+
+ ENTRY;
+
+ LASSERT(env);
+ LASSERT(o);
+
+ if (buflen == 0)
+ CWARN("0-length record\n");
+
+ CDEBUG(D_OTHER, "write blob with type %x, buf %p/%u at off %llu\n",
+ rec->lrh_type, buf, buflen, *off);
+
+ lgi->lgi_attr.la_valid = LA_SIZE;
+ lgi->lgi_attr.la_size = *off;
+
+ if (!buf) {
+ lgi->lgi_buf.lb_len = buflen;
+ lgi->lgi_buf.lb_buf = rec;
+ rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
+ if (rc)
+ CERROR("%s: error writing log record: rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name, rc);
+ GOTO(out, rc);
+ }
+
+ /* the buf case */
+ /* protect the following 3 writes from concurrent read */
+ dt_write_lock(env, o, 0);
+ rec->lrh_len = sizeof(*rec) + buflen + sizeof(lgi->lgi_tail);
+ lgi->lgi_buf.lb_len = sizeof(*rec);
+ lgi->lgi_buf.lb_buf = rec;
+ rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
+ if (rc) {
+ CERROR("%s: error writing log hdr: rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name, rc);
+ GOTO(out_unlock, rc);
+ }
+
+ lgi->lgi_buf.lb_len = buflen;
+ lgi->lgi_buf.lb_buf = buf;
+ rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
+ if (rc) {
+ CERROR("%s: error writing log buffer: rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name, rc);
+ GOTO(out_unlock, rc);
+ }
+
+ lgi->lgi_tail.lrt_len = rec->lrh_len;
+ lgi->lgi_tail.lrt_index = rec->lrh_index;
+ lgi->lgi_buf.lb_len = sizeof(lgi->lgi_tail);
+ lgi->lgi_buf.lb_buf = &lgi->lgi_tail;
+ rc = dt_record_write(env, o, &lgi->lgi_buf, off, th);
+ if (rc)
+ CERROR("%s: error writing log tail: rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name, rc);
+
+out_unlock:
+ dt_write_unlock(env, o);
+
+out:
+ /* cleanup the content written above */
+ if (rc) {
+ dt_punch(env, o, lgi->lgi_attr.la_size, OBD_OBJECT_EOF, th,
+ BYPASS_CAPA);
+ dt_attr_set(env, o, &lgi->lgi_attr, th, BYPASS_CAPA);
+ }
+
+ RETURN(rc);
+}
+
+static int llog_osd_read_header(const struct lu_env *env,
+ struct llog_handle *handle)
+{
+ struct llog_rec_hdr *llh_hdr;
+ struct dt_object *o;
+ struct llog_thread_info *lgi;
+ int rc;
+
+ ENTRY;
+
+ LASSERT(sizeof(*handle->lgh_hdr) == LLOG_CHUNK_SIZE);
+
+ o = handle->lgh_obj;
+ LASSERT(o);
+
+ lgi = llog_info(env);
+
+ rc = dt_attr_get(env, o, &lgi->lgi_attr, NULL);
+ if (rc)
+ RETURN(rc);
+
+ LASSERT(lgi->lgi_attr.la_valid & LA_SIZE);
+
+ if (lgi->lgi_attr.la_size == 0) {
+ CDEBUG(D_HA, "not reading header from 0-byte log\n");
+ RETURN(LLOG_EEMPTY);
+ }
+
+ lgi->lgi_off = 0;
+ lgi->lgi_buf.lb_buf = handle->lgh_hdr;
+ lgi->lgi_buf.lb_len = LLOG_CHUNK_SIZE;
+
+ rc = dt_record_read(env, o, &lgi->lgi_buf, &lgi->lgi_off);
+ if (rc) {
+ CERROR("%s: error reading log header from "DFID": rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ PFID(lu_object_fid(&o->do_lu)), rc);
+ RETURN(rc);
+ }
+
+ llh_hdr = &handle->lgh_hdr->llh_hdr;
+ if (LLOG_REC_HDR_NEEDS_SWABBING(llh_hdr))
+ lustre_swab_llog_hdr(handle->lgh_hdr);
+
+ if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) {
+ CERROR("%s: bad log %s "DFID" header magic: %#x "
+ "(expected %#x)\n", o->do_lu.lo_dev->ld_obd->obd_name,
+ handle->lgh_name ? handle->lgh_name : "",
+ PFID(lu_object_fid(&o->do_lu)),
+ llh_hdr->lrh_type, LLOG_HDR_MAGIC);
+ RETURN(-EIO);
+ } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) {
+ CERROR("%s: incorrectly sized log %s "DFID" header: "
+ "%#x (expected %#x)\n"
+ "you may need to re-run lconf --write_conf.\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ handle->lgh_name ? handle->lgh_name : "",
+ PFID(lu_object_fid(&o->do_lu)),
+ llh_hdr->lrh_len, LLOG_CHUNK_SIZE);
+ RETURN(-EIO);
+ }
+
+ handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index;
+
+ RETURN(0);
+}
+
+static int llog_osd_declare_write_rec(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ struct llog_rec_hdr *rec,
+ int idx, struct thandle *th)
+{
+ struct llog_thread_info *lgi = llog_info(env);
+ struct dt_object *o;
+ int rc;
+
+ ENTRY;
+
+ LASSERT(env);
+ LASSERT(th);
+ LASSERT(loghandle);
+
+ o = loghandle->lgh_obj;
+ LASSERT(o);
+
+ /* each time we update header */
+ rc = dt_declare_record_write(env, o, sizeof(struct llog_log_hdr), 0,
+ th);
+ if (rc || idx == 0) /* if error or just header */
+ RETURN(rc);
+
+ if (dt_object_exists(o)) {
+ rc = dt_attr_get(env, o, &lgi->lgi_attr, BYPASS_CAPA);
+ lgi->lgi_off = lgi->lgi_attr.la_size;
+ LASSERT(ergo(rc == 0, lgi->lgi_attr.la_valid & LA_SIZE));
+ if (rc)
+ RETURN(rc);
+
+ rc = dt_declare_punch(env, o, lgi->lgi_off, OBD_OBJECT_EOF, th);
+ if (rc)
+ RETURN(rc);
+ } else {
+ lgi->lgi_off = 0;
+ }
+
+ /* XXX: implement declared window or multi-chunks approach */
+ rc = dt_declare_record_write(env, o, 32 * 1024, lgi->lgi_off, th);
+
+ RETURN(rc);
+}
+
+/* returns negative in on error; 0 if success && reccookie == 0; 1 otherwise */
+/* appends if idx == -1, otherwise overwrites record idx. */
+static int llog_osd_write_rec(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ struct llog_rec_hdr *rec,
+ struct llog_cookie *reccookie, int cookiecount,
+ void *buf, int idx, struct thandle *th)
+{
+ struct llog_thread_info *lgi = llog_info(env);
+ struct llog_log_hdr *llh;
+ int reclen = rec->lrh_len;
+ int index, rc, old_tail_idx;
+ struct llog_rec_tail *lrt;
+ struct dt_object *o;
+ size_t left;
+
+ ENTRY;
+
+ LASSERT(env);
+ llh = loghandle->lgh_hdr;
+ LASSERT(llh);
+ o = loghandle->lgh_obj;
+ LASSERT(o);
+ LASSERT(th);
+
+ CDEBUG(D_OTHER, "new record %x to "DFID"\n",
+ rec->lrh_type, PFID(lu_object_fid(&o->do_lu)));
+
+ /* record length should not bigger than LLOG_CHUNK_SIZE */
+ if (buf)
+ rc = (reclen > LLOG_CHUNK_SIZE - sizeof(struct llog_rec_hdr) -
+ sizeof(struct llog_rec_tail)) ? -E2BIG : 0;
+ else
+ rc = (reclen > LLOG_CHUNK_SIZE) ? -E2BIG : 0;
+ if (rc)
+ RETURN(rc);
+
+ rc = dt_attr_get(env, o, &lgi->lgi_attr, NULL);
+ if (rc)
+ RETURN(rc);
+
+ if (buf)
+ /* write_blob adds header and tail to lrh_len. */
+ reclen = sizeof(*rec) + rec->lrh_len +
+ sizeof(struct llog_rec_tail);
+
+ if (idx != -1) {
+ /* no header: only allowed to insert record 1 */
+ if (idx != 1 && lgi->lgi_attr.la_size == 0)
+ LBUG();
+
+ if (idx && llh->llh_size && llh->llh_size != rec->lrh_len)
+ RETURN(-EINVAL);
+
+ if (!ext2_test_bit(idx, llh->llh_bitmap))
+ CERROR("%s: modify unset record %u\n",
+ o->do_lu.lo_dev->ld_obd->obd_name, idx);
+ if (idx != rec->lrh_index)
+ CERROR("%s: index mismatch %d %u\n",
+ o->do_lu.lo_dev->ld_obd->obd_name, idx,
+ rec->lrh_index);
+
+ lgi->lgi_off = 0;
+ rc = llog_osd_write_blob(env, o, &llh->llh_hdr, NULL,
+ &lgi->lgi_off, th);
+ /* we are done if we only write the header or on error */
+ if (rc || idx == 0)
+ RETURN(rc);
+
+ if (buf) {
+ /* We assume that caller has set lgh_cur_* */
+ lgi->lgi_off = loghandle->lgh_cur_offset;
+ CDEBUG(D_OTHER,
+ "modify record "DOSTID": idx:%d/%u/%d, len:%u "
+ "offset %llu\n",
+ POSTID(&loghandle->lgh_id.lgl_oi), idx,
+ rec->lrh_index,
+ loghandle->lgh_cur_idx, rec->lrh_len,
+ (long long)(lgi->lgi_off - sizeof(*llh)));
+ if (rec->lrh_index != loghandle->lgh_cur_idx) {
+ CERROR("%s: modify idx mismatch %u/%d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name, idx,
+ loghandle->lgh_cur_idx);
+ RETURN(-EFAULT);
+ }
+ } else {
+ /* Assumes constant lrh_len */
+ lgi->lgi_off = sizeof(*llh) + (idx - 1) * reclen;
+ }
+
+ rc = llog_osd_write_blob(env, o, rec, buf, &lgi->lgi_off, th);
+ if (rc == 0 && reccookie) {
+ reccookie->lgc_lgl = loghandle->lgh_id;
+ reccookie->lgc_index = idx;
+ rc = 1;
+ }
+ RETURN(rc);
+ }
+
+ /* Make sure that records don't cross a chunk boundary, so we can
+ * process them page-at-a-time if needed. If it will cross a chunk
+ * boundary, write in a fake (but referenced) entry to pad the chunk.
+ *
+ * We know that llog_current_log() will return a loghandle that is
+ * big enough to hold reclen, so all we care about is padding here.
+ */
+ LASSERT(lgi->lgi_attr.la_valid & LA_SIZE);
+ lgi->lgi_off = lgi->lgi_attr.la_size;
+ left = LLOG_CHUNK_SIZE - (lgi->lgi_off & (LLOG_CHUNK_SIZE - 1));
+ /* NOTE: padding is a record, but no bit is set */
+ if (left != 0 && left != reclen &&
+ left < (reclen + LLOG_MIN_REC_SIZE)) {
+ index = loghandle->lgh_last_idx + 1;
+ rc = llog_osd_pad(env, o, &lgi->lgi_off, left, index, th);
+ if (rc)
+ RETURN(rc);
+ loghandle->lgh_last_idx++; /*for pad rec*/
+ }
+ /* if it's the last idx in log file, then return -ENOSPC */
+ if (loghandle->lgh_last_idx >= LLOG_BITMAP_SIZE(llh) - 1)
+ RETURN(-ENOSPC);
+
+ loghandle->lgh_last_idx++;
+ index = loghandle->lgh_last_idx;
+ LASSERT(index < LLOG_BITMAP_SIZE(llh));
+ rec->lrh_index = index;
+ if (buf == NULL) {
+ lrt = (struct llog_rec_tail *)((char *)rec + rec->lrh_len -
+ sizeof(*lrt));
+ lrt->lrt_len = rec->lrh_len;
+ lrt->lrt_index = rec->lrh_index;
+ }
+ /* The caller should make sure only 1 process access the lgh_last_idx,
+ * Otherwise it might hit the assert.*/
+ LASSERT(index < LLOG_BITMAP_SIZE(llh));
+ spin_lock(&loghandle->lgh_hdr_lock);
+ if (ext2_set_bit(index, llh->llh_bitmap)) {
+ CERROR("%s: index %u already set in log bitmap\n",
+ o->do_lu.lo_dev->ld_obd->obd_name, index);
+ spin_unlock(&loghandle->lgh_hdr_lock);
+ LBUG(); /* should never happen */
+ }
+ llh->llh_count++;
+ spin_unlock(&loghandle->lgh_hdr_lock);
+ old_tail_idx = llh->llh_tail.lrt_index;
+ llh->llh_tail.lrt_index = index;
+
+ lgi->lgi_off = 0;
+ rc = llog_osd_write_blob(env, o, &llh->llh_hdr, NULL, &lgi->lgi_off,
+ th);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = dt_attr_get(env, o, &lgi->lgi_attr, NULL);
+ if (rc)
+ GOTO(out, rc);
+
+ LASSERT(lgi->lgi_attr.la_valid & LA_SIZE);
+ lgi->lgi_off = lgi->lgi_attr.la_size;
+
+ rc = llog_osd_write_blob(env, o, rec, buf, &lgi->lgi_off, th);
+
+out:
+ /* cleanup llog for error case */
+ if (rc) {
+ spin_lock(&loghandle->lgh_hdr_lock);
+ ext2_clear_bit(index, llh->llh_bitmap);
+ llh->llh_count--;
+ spin_unlock(&loghandle->lgh_hdr_lock);
+
+ /* restore the header */
+ loghandle->lgh_last_idx--;
+ llh->llh_tail.lrt_index = old_tail_idx;
+ lgi->lgi_off = 0;
+ llog_osd_write_blob(env, o, &llh->llh_hdr, NULL,
+ &lgi->lgi_off, th);
+ }
+
+ CDEBUG(D_RPCTRACE, "added record "DOSTID": idx: %u, %u\n",
+ POSTID(&loghandle->lgh_id.lgl_oi), index, rec->lrh_len);
+ if (rc == 0 && reccookie) {
+ reccookie->lgc_lgl = loghandle->lgh_id;
+ reccookie->lgc_index = index;
+ if ((rec->lrh_type == MDS_UNLINK_REC) ||
+ (rec->lrh_type == MDS_SETATTR64_REC))
+ reccookie->lgc_subsys = LLOG_MDS_OST_ORIG_CTXT;
+ else if (rec->lrh_type == OST_SZ_REC)
+ reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT;
+ else
+ reccookie->lgc_subsys = -1;
+ rc = 1;
+ }
+ RETURN(rc);
+}
+
+/* We can skip reading at least as many log blocks as the number of
+ * minimum sized log records we are skipping. If it turns out
+ * that we are not far enough along the log (because the
+ * actual records are larger than minimum size) we just skip
+ * some more records.
+ */
+static void llog_skip_over(__u64 *off, int curr, int goal)
+{
+ if (goal <= curr)
+ return;
+ *off = (*off + (goal - curr - 1) * LLOG_MIN_REC_SIZE) &
+ ~(LLOG_CHUNK_SIZE - 1);
+}
+
+/* sets:
+ * - cur_offset to the furthest point read in the log file
+ * - cur_idx to the log index preceeding cur_offset
+ * returns -EIO/-EINVAL on error
+ */
+static int llog_osd_next_block(const struct lu_env *env,
+ struct llog_handle *loghandle, int *cur_idx,
+ int next_idx, __u64 *cur_offset, void *buf,
+ int len)
+{
+ struct llog_thread_info *lgi = llog_info(env);
+ struct dt_object *o;
+ struct dt_device *dt;
+ int rc;
+
+ ENTRY;
+
+ LASSERT(env);
+ LASSERT(lgi);
+
+ if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
+ RETURN(-EINVAL);
+
+ CDEBUG(D_OTHER, "looking for log index %u (cur idx %u off "LPU64")\n",
+ next_idx, *cur_idx, *cur_offset);
+
+ LASSERT(loghandle);
+ LASSERT(loghandle->lgh_ctxt);
+
+ o = loghandle->lgh_obj;
+ LASSERT(o);
+ LASSERT(dt_object_exists(o));
+ dt = lu2dt_dev(o->do_lu.lo_dev);
+ LASSERT(dt);
+
+ rc = dt_attr_get(env, o, &lgi->lgi_attr, BYPASS_CAPA);
+ if (rc)
+ GOTO(out, rc);
+
+ while (*cur_offset < lgi->lgi_attr.la_size) {
+ struct llog_rec_hdr *rec, *last_rec;
+ struct llog_rec_tail *tail;
+
+ llog_skip_over(cur_offset, *cur_idx, next_idx);
+
+ /* read up to next LLOG_CHUNK_SIZE block */
+ lgi->lgi_buf.lb_len = LLOG_CHUNK_SIZE -
+ (*cur_offset & (LLOG_CHUNK_SIZE - 1));
+ lgi->lgi_buf.lb_buf = buf;
+
+ /* Note: read lock is not needed around la_size get above at
+ * the time of dt_attr_get(). There are only two cases that
+ * matter. Either la_size == cur_offset, in which case the
+ * entire read is skipped, or la_size > cur_offset and the loop
+ * is entered and this thread is blocked at dt_read_lock()
+ * until the write is completed. When the write completes, then
+ * the dt_read() will be done with the full length, and will
+ * get the full data.
+ */
+ dt_read_lock(env, o, 0);
+ rc = dt_read(env, o, &lgi->lgi_buf, cur_offset);
+ dt_read_unlock(env, o);
+ if (rc < 0) {
+ CERROR("%s: can't read llog block from log "DFID
+ " offset "LPU64": rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ PFID(lu_object_fid(&o->do_lu)), *cur_offset,
+ rc);
+ GOTO(out, rc);
+ }
+
+ if (rc < len) {
+ /* signal the end of the valid buffer to
+ * llog_process */
+ memset(buf + rc, 0, len - rc);
+ }
+
+ if (rc == 0) /* end of file, nothing to do */
+ GOTO(out, rc);
+
+ if (rc < sizeof(*tail)) {
+ CERROR("%s: invalid llog block at log id "DOSTID"/%u "
+ "offset "LPU64"\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ POSTID(&loghandle->lgh_id.lgl_oi),
+ loghandle->lgh_id.lgl_ogen, *cur_offset);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ rec = buf;
+ if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
+ lustre_swab_llog_rec(rec);
+
+ tail = (struct llog_rec_tail *)((char *)buf + rc -
+ sizeof(struct llog_rec_tail));
+ /* get the last record in block */
+ last_rec = (struct llog_rec_hdr *)((char *)buf + rc -
+ le32_to_cpu(tail->lrt_len));
+
+ if (LLOG_REC_HDR_NEEDS_SWABBING(last_rec))
+ lustre_swab_llog_rec(last_rec);
+ LASSERT(last_rec->lrh_index == tail->lrt_index);
+
+ *cur_idx = tail->lrt_index;
+
+ /* this shouldn't happen */
+ if (tail->lrt_index == 0) {
+ CERROR("%s: invalid llog tail at log id "DOSTID"/%u "
+ "offset "LPU64"\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ POSTID(&loghandle->lgh_id.lgl_oi),
+ loghandle->lgh_id.lgl_ogen, *cur_offset);
+ GOTO(out, rc = -EINVAL);
+ }
+ if (tail->lrt_index < next_idx)
+ continue;
+
+ /* sanity check that the start of the new buffer is no farther
+ * than the record that we wanted. This shouldn't happen. */
+ if (rec->lrh_index > next_idx) {
+ CERROR("%s: missed desired record? %u > %u\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ rec->lrh_index, next_idx);
+ GOTO(out, rc = -ENOENT);
+ }
+ GOTO(out, rc = 0);
+ }
+ GOTO(out, rc = -EIO);
+out:
+ return rc;
+}
+
+static int llog_osd_prev_block(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ int prev_idx, void *buf, int len)
+{
+ struct llog_thread_info *lgi = llog_info(env);
+ struct dt_object *o;
+ struct dt_device *dt;
+ loff_t cur_offset;
+ int rc;
+
+ ENTRY;
+
+ if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
+ RETURN(-EINVAL);
+
+ CDEBUG(D_OTHER, "looking for log index %u\n", prev_idx);
+
+ LASSERT(loghandle);
+ LASSERT(loghandle->lgh_ctxt);
+
+ o = loghandle->lgh_obj;
+ LASSERT(o);
+ LASSERT(dt_object_exists(o));
+ dt = lu2dt_dev(o->do_lu.lo_dev);
+ LASSERT(dt);
+
+ cur_offset = LLOG_CHUNK_SIZE;
+ llog_skip_over(&cur_offset, 0, prev_idx);
+
+ rc = dt_attr_get(env, o, &lgi->lgi_attr, BYPASS_CAPA);
+ if (rc)
+ GOTO(out, rc);
+
+ while (cur_offset < lgi->lgi_attr.la_size) {
+ struct llog_rec_hdr *rec, *last_rec;
+ struct llog_rec_tail *tail;
+
+ lgi->lgi_buf.lb_len = len;
+ lgi->lgi_buf.lb_buf = buf;
+ /* It is OK to have locking around dt_read() only, see
+ * comment in llog_osd_next_block for details
+ */
+ dt_read_lock(env, o, 0);
+ rc = dt_read(env, o, &lgi->lgi_buf, &cur_offset);
+ dt_read_unlock(env, o);
+ if (rc < 0) {
+ CERROR("%s: can't read llog block from log "DFID
+ " offset "LPU64": rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ PFID(lu_object_fid(&o->do_lu)), cur_offset, rc);
+ GOTO(out, rc);
+ }
+
+ if (rc == 0) /* end of file, nothing to do */
+ GOTO(out, rc);
+
+ if (rc < sizeof(*tail)) {
+ CERROR("%s: invalid llog block at log id "DOSTID"/%u "
+ "offset "LPU64"\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ POSTID(&loghandle->lgh_id.lgl_oi),
+ loghandle->lgh_id.lgl_ogen, cur_offset);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ rec = buf;
+ if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
+ lustre_swab_llog_rec(rec);
+
+ tail = (struct llog_rec_tail *)((char *)buf + rc -
+ sizeof(struct llog_rec_tail));
+ /* get the last record in block */
+ last_rec = (struct llog_rec_hdr *)((char *)buf + rc -
+ le32_to_cpu(tail->lrt_len));
+
+ if (LLOG_REC_HDR_NEEDS_SWABBING(last_rec))
+ lustre_swab_llog_rec(last_rec);
+ LASSERT(last_rec->lrh_index == tail->lrt_index);
+
+ /* this shouldn't happen */
+ if (tail->lrt_index == 0) {
+ CERROR("%s: invalid llog tail at log id "DOSTID"/%u "
+ "offset "LPU64"\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ POSTID(&loghandle->lgh_id.lgl_oi),
+ loghandle->lgh_id.lgl_ogen, cur_offset);
+ GOTO(out, rc = -EINVAL);
+ }
+ if (tail->lrt_index < prev_idx)
+ continue;
+
+ /* sanity check that the start of the new buffer is no farther
+ * than the record that we wanted. This shouldn't happen. */
+ if (rec->lrh_index > prev_idx) {
+ CERROR("%s: missed desired record? %u > %u\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ rec->lrh_index, prev_idx);
+ GOTO(out, rc = -ENOENT);
+ }
+ GOTO(out, rc = 0);
+ }
+ GOTO(out, rc = -EIO);
+out:
+ return rc;
+}
+
+struct dt_object *llog_osd_dir_get(const struct lu_env *env,
+ struct llog_ctxt *ctxt)
+{
+ struct dt_device *dt;
+ struct dt_thread_info *dti = dt_info(env);
+ struct dt_object *dir;
+ int rc;
+
+ dt = ctxt->loc_exp->exp_obd->obd_lvfs_ctxt.dt;
+ if (ctxt->loc_dir == NULL) {
+ rc = dt_root_get(env, dt, &dti->dti_fid);
+ if (rc)
+ return ERR_PTR(rc);
+ dir = dt_locate(env, dt, &dti->dti_fid);
+ } else {
+ lu_object_get(&ctxt->loc_dir->do_lu);
+ dir = ctxt->loc_dir;
+ }
+
+ return dir;
+}
+
+static int llog_osd_open(const struct lu_env *env, struct llog_handle *handle,
+ struct llog_logid *logid, char *name,
+ enum llog_open_param open_param)
+{
+ struct llog_thread_info *lgi = llog_info(env);
+ struct llog_ctxt *ctxt = handle->lgh_ctxt;
+ struct dt_object *o;
+ struct dt_device *dt;
+ struct ls_device *ls;
+ struct local_oid_storage *los;
+ int rc = 0;
+
+ ENTRY;
+
+ LASSERT(env);
+ LASSERT(ctxt);
+ LASSERT(ctxt->loc_exp);
+ LASSERT(ctxt->loc_exp->exp_obd);
+ dt = ctxt->loc_exp->exp_obd->obd_lvfs_ctxt.dt;
+ LASSERT(dt);
+
+ ls = ls_device_get(dt);
+ if (IS_ERR(ls))
+ RETURN(PTR_ERR(ls));
+
+ mutex_lock(&ls->ls_los_mutex);
+ los = dt_los_find(ls, name != NULL ? FID_SEQ_LLOG_NAME : FID_SEQ_LLOG);
+ mutex_unlock(&ls->ls_los_mutex);
+ LASSERT(los);
+ ls_device_put(env, ls);
+
+ LASSERT(handle);
+
+ if (logid != NULL) {
+ logid_to_fid(logid, &lgi->lgi_fid);
+ } else if (name) {
+ struct dt_object *llog_dir;
+
+ llog_dir = llog_osd_dir_get(env, ctxt);
+ if (IS_ERR(llog_dir))
+ GOTO(out, rc = PTR_ERR(llog_dir));
+ dt_read_lock(env, llog_dir, 0);
+ rc = dt_lookup_dir(env, llog_dir, name, &lgi->lgi_fid);
+ dt_read_unlock(env, llog_dir);
+ lu_object_put(env, &llog_dir->do_lu);
+ if (rc == -ENOENT && open_param == LLOG_OPEN_NEW) {
+ /* generate fid for new llog */
+ rc = local_object_fid_generate(env, los,
+ &lgi->lgi_fid);
+ }
+ if (rc < 0)
+ GOTO(out, rc);
+ OBD_ALLOC(handle->lgh_name, strlen(name) + 1);
+ if (handle->lgh_name)
+ strcpy(handle->lgh_name, name);
+ else
+ GOTO(out, rc = -ENOMEM);
+ } else {
+ LASSERTF(open_param & LLOG_OPEN_NEW, "%#x\n", open_param);
+ /* generate fid for new llog */
+ rc = local_object_fid_generate(env, los, &lgi->lgi_fid);
+ if (rc < 0)
+ GOTO(out, rc);
+ }
+
+ o = ls_locate(env, ls, &lgi->lgi_fid);
+ if (IS_ERR(o))
+ GOTO(out_name, rc = PTR_ERR(o));
+
+ /* No new llog is expected but doesn't exist */
+ if (open_param != LLOG_OPEN_NEW && !dt_object_exists(o))
+ GOTO(out_put, rc = -ENOENT);
+
+ fid_to_logid(&lgi->lgi_fid, &handle->lgh_id);
+ handle->lgh_obj = o;
+ handle->private_data = los;
+ LASSERT(handle->lgh_ctxt);
+
+ RETURN(rc);
+
+out_put:
+ lu_object_put(env, &o->do_lu);
+out_name:
+ if (handle->lgh_name != NULL)
+ OBD_FREE(handle->lgh_name, strlen(name) + 1);
+out:
+ dt_los_put(los);
+ RETURN(rc);
+}
+
+static int llog_osd_exist(struct llog_handle *handle)
+{
+ LASSERT(handle->lgh_obj);
+ return (dt_object_exists(handle->lgh_obj) &&
+ !lu_object_is_dying(handle->lgh_obj->do_lu.lo_header));
+}
+
+static int llog_osd_declare_create(const struct lu_env *env,
+ struct llog_handle *res, struct thandle *th)
+{
+ struct llog_thread_info *lgi = llog_info(env);
+ struct local_oid_storage *los;
+ struct dt_object *o;
+ int rc;
+
+ ENTRY;
+
+ LASSERT(res->lgh_obj);
+ LASSERT(th);
+
+ /* object can be created by another thread */
+ o = res->lgh_obj;
+ if (dt_object_exists(o))
+ RETURN(0);
+
+ los = res->private_data;
+ LASSERT(los);
+
+ rc = llog_osd_declare_new_object(env, los, o, th);
+ if (rc)
+ RETURN(rc);
+
+ rc = dt_declare_record_write(env, o, LLOG_CHUNK_SIZE, 0, th);
+ if (rc)
+ RETURN(rc);
+
+ if (res->lgh_name) {
+ struct dt_object *llog_dir;
+
+ llog_dir = llog_osd_dir_get(env, res->lgh_ctxt);
+ if (IS_ERR(llog_dir))
+ RETURN(PTR_ERR(llog_dir));
+ logid_to_fid(&res->lgh_id, &lgi->lgi_fid);
+ rc = dt_declare_insert(env, llog_dir,
+ (struct dt_rec *)&lgi->lgi_fid,
+ (struct dt_key *)res->lgh_name, th);
+ lu_object_put(env, &llog_dir->do_lu);
+ if (rc)
+ CERROR("%s: can't declare named llog %s: rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ res->lgh_name, rc);
+ }
+ RETURN(rc);
+}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+static int llog_osd_create(const struct lu_env *env, struct llog_handle *res,
+ struct thandle *th)
+{
+ struct llog_thread_info *lgi = llog_info(env);
+ struct local_oid_storage *los;
+ struct dt_object *o;
+ int rc = 0;
+
+ ENTRY;
+
+ LASSERT(env);
+ o = res->lgh_obj;
+ LASSERT(o);
+
+ /* llog can be already created */
+ if (dt_object_exists(o))
+ RETURN(-EEXIST);
+
+ los = res->private_data;
+ LASSERT(los);
+
+ dt_write_lock(env, o, 0);
+ if (!dt_object_exists(o))
+ rc = llog_osd_create_new_object(env, los, o, th);
+ else
+ rc = -EEXIST;
+
+ dt_write_unlock(env, o);
+ if (rc)
+ RETURN(rc);
+
+ if (res->lgh_name) {
+ struct dt_object *llog_dir;
+
+ llog_dir = llog_osd_dir_get(env, res->lgh_ctxt);
+ if (IS_ERR(llog_dir))
+ RETURN(PTR_ERR(llog_dir));
+
+ logid_to_fid(&res->lgh_id, &lgi->lgi_fid);
+ dt_read_lock(env, llog_dir, 0);
+ rc = dt_insert(env, llog_dir,
+ (struct dt_rec *)&lgi->lgi_fid,
+ (struct dt_key *)res->lgh_name,
+ th, BYPASS_CAPA, 1);
+ dt_read_unlock(env, llog_dir);
+ lu_object_put(env, &llog_dir->do_lu);
+ if (rc)
+ CERROR("%s: can't create named llog %s: rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ res->lgh_name, rc);
+ }
+ RETURN(rc);
+}
+
+static int llog_osd_close(const struct lu_env *env, struct llog_handle *handle)
+{
+ struct local_oid_storage *los;
+ int rc = 0;
+
+ ENTRY;
+
+ LASSERT(handle->lgh_obj);
+
+ lu_object_put(env, &handle->lgh_obj->do_lu);
+
+ los = handle->private_data;
+ LASSERT(los);
+ dt_los_put(los);
+
+ if (handle->lgh_name)
+ OBD_FREE(handle->lgh_name, strlen(handle->lgh_name) + 1);
+
+ RETURN(rc);
+}
+
+static int llog_osd_destroy(const struct lu_env *env,
+ struct llog_handle *loghandle)
+{
+ struct llog_ctxt *ctxt;
+ struct dt_object *o, *llog_dir = NULL;
+ struct dt_device *d;
+ struct thandle *th;
+ char *name = NULL;
+ int rc;
+
+ ENTRY;
+
+ ctxt = loghandle->lgh_ctxt;
+ LASSERT(ctxt);
+
+ o = loghandle->lgh_obj;
+ LASSERT(o);
+
+ d = lu2dt_dev(o->do_lu.lo_dev);
+ LASSERT(d);
+ LASSERT(d == ctxt->loc_exp->exp_obd->obd_lvfs_ctxt.dt);
+
+ th = dt_trans_create(env, d);
+ if (IS_ERR(th))
+ RETURN(PTR_ERR(th));
+
+ if (loghandle->lgh_name) {
+ llog_dir = llog_osd_dir_get(env, ctxt);
+ if (IS_ERR(llog_dir))
+ GOTO(out_trans, rc = PTR_ERR(llog_dir));
+
+ name = loghandle->lgh_name;
+ rc = dt_declare_delete(env, llog_dir,
+ (struct dt_key *)name, th);
+ if (rc)
+ GOTO(out_trans, rc);
+ }
+
+ dt_declare_ref_del(env, o, th);
+
+ rc = dt_declare_destroy(env, o, th);
+ if (rc)
+ GOTO(out_trans, rc);
+
+ rc = dt_trans_start_local(env, d, th);
+ if (rc)
+ GOTO(out_trans, rc);
+
+ dt_write_lock(env, o, 0);
+ if (dt_object_exists(o)) {
+ if (name) {
+ dt_read_lock(env, llog_dir, 0);
+ rc = dt_delete(env, llog_dir,
+ (struct dt_key *) name,
+ th, BYPASS_CAPA);
+ dt_read_unlock(env, llog_dir);
+ if (rc) {
+ CERROR("%s: can't remove llog %s: rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ name, rc);
+ GOTO(out_unlock, rc);
+ }
+ }
+ dt_ref_del(env, o, th);
+ rc = dt_destroy(env, o, th);
+ if (rc)
+ GOTO(out_unlock, rc);
+ }
+out_unlock:
+ dt_write_unlock(env, o);
+out_trans:
+ dt_trans_stop(env, d, th);
+ if (llog_dir != NULL)
+ lu_object_put(env, &llog_dir->do_lu);
+ RETURN(rc);
+}
+
+static int llog_osd_setup(const struct lu_env *env, struct obd_device *obd,
+ struct obd_llog_group *olg, int ctxt_idx,
+ struct obd_device *disk_obd)
+{
+ struct local_oid_storage *los;
+ struct llog_thread_info *lgi = llog_info(env);
+ struct llog_ctxt *ctxt;
+ int rc = 0;
+
+ ENTRY;
+
+ LASSERT(obd);
+ LASSERT(olg->olg_ctxts[ctxt_idx]);
+
+ ctxt = llog_ctxt_get(olg->olg_ctxts[ctxt_idx]);
+ LASSERT(ctxt);
+
+ /* initialize data allowing to generate new fids,
+ * literally we need a sequece */
+ lgi->lgi_fid.f_seq = FID_SEQ_LLOG;
+ lgi->lgi_fid.f_oid = 1;
+ lgi->lgi_fid.f_ver = 0;
+ rc = local_oid_storage_init(env, disk_obd->obd_lvfs_ctxt.dt,
+ &lgi->lgi_fid, &los);
+ if (rc < 0)
+ return rc;
+
+ lgi->lgi_fid.f_seq = FID_SEQ_LLOG_NAME;
+ lgi->lgi_fid.f_oid = 1;
+ lgi->lgi_fid.f_ver = 0;
+ rc = local_oid_storage_init(env, disk_obd->obd_lvfs_ctxt.dt,
+ &lgi->lgi_fid, &los);
+ llog_ctxt_put(ctxt);
+ return rc;
+}
+
+static int llog_osd_cleanup(const struct lu_env *env, struct llog_ctxt *ctxt)
+{
+ struct dt_device *dt;
+ struct ls_device *ls;
+ struct local_oid_storage *los, *nlos;
+
+ LASSERT(ctxt->loc_exp->exp_obd);
+ dt = ctxt->loc_exp->exp_obd->obd_lvfs_ctxt.dt;
+ ls = ls_device_get(dt);
+ if (IS_ERR(ls))
+ RETURN(PTR_ERR(ls));
+
+ mutex_lock(&ls->ls_los_mutex);
+ los = dt_los_find(ls, FID_SEQ_LLOG);
+ nlos = dt_los_find(ls, FID_SEQ_LLOG_NAME);
+ mutex_unlock(&ls->ls_los_mutex);
+ if (los != NULL) {
+ dt_los_put(los);
+ local_oid_storage_fini(env, los);
+ }
+ if (nlos != NULL) {
+ dt_los_put(nlos);
+ local_oid_storage_fini(env, nlos);
+ }
+ ls_device_put(env, ls);
+ return 0;
+}
+
+struct llog_operations llog_osd_ops = {
+ .lop_next_block = llog_osd_next_block,
+ .lop_prev_block = llog_osd_prev_block,
+ .lop_read_header = llog_osd_read_header,
+ .lop_destroy = llog_osd_destroy,
+ .lop_setup = llog_osd_setup,
+ .lop_cleanup = llog_osd_cleanup,
+ .lop_open = llog_osd_open,
+ .lop_exist = llog_osd_exist,
+ .lop_declare_create = llog_osd_declare_create,
+ .lop_create = llog_osd_create,
+ .lop_declare_write_rec = llog_osd_declare_write_rec,
+ .lop_write_rec = llog_osd_write_rec,
+ .lop_close = llog_osd_close,
+};
+EXPORT_SYMBOL(llog_osd_ops);
+
+/* reads the catalog list */
+int llog_osd_get_cat_list(const struct lu_env *env, struct dt_device *d,
+ int idx, int count, struct llog_catid *idarray)
+{
+ struct llog_thread_info *lgi = llog_info(env);
+ struct dt_object *o = NULL;
+ struct thandle *th;
+ int rc, size;
+
+ ENTRY;
+
+ LASSERT(d);
+
+ size = sizeof(*idarray) * count;
+ lgi->lgi_off = idx * sizeof(*idarray);
+
+ lu_local_obj_fid(&lgi->lgi_fid, LLOG_CATALOGS_OID);
+
+ o = dt_locate(env, d, &lgi->lgi_fid);
+ if (IS_ERR(o))
+ RETURN(PTR_ERR(o));
+
+ if (!dt_object_exists(o)) {
+ th = dt_trans_create(env, d);
+ if (IS_ERR(th))
+ GOTO(out, rc = PTR_ERR(th));
+
+ lgi->lgi_attr.la_valid = LA_MODE;
+ lgi->lgi_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
+ lgi->lgi_dof.dof_type = dt_mode_to_dft(S_IFREG);
+
+ rc = dt_declare_create(env, o, &lgi->lgi_attr, NULL,
+ &lgi->lgi_dof, th);
+ if (rc)
+ GOTO(out_trans, rc);
+
+ rc = dt_trans_start_local(env, d, th);
+ if (rc)
+ GOTO(out_trans, rc);
+
+ dt_write_lock(env, o, 0);
+ if (!dt_object_exists(o))
+ rc = dt_create(env, o, &lgi->lgi_attr, NULL,
+ &lgi->lgi_dof, th);
+ dt_write_unlock(env, o);
+out_trans:
+ dt_trans_stop(env, d, th);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ rc = dt_attr_get(env, o, &lgi->lgi_attr, BYPASS_CAPA);
+ if (rc)
+ GOTO(out, rc);
+
+ if (!S_ISREG(lgi->lgi_attr.la_mode)) {
+ CERROR("%s: CATALOGS is not a regular file!: mode = %o\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ lgi->lgi_attr.la_mode);
+ GOTO(out, rc = -ENOENT);
+ }
+
+ CDEBUG(D_CONFIG, "cat list: disk size=%d, read=%d\n",
+ (int)lgi->lgi_attr.la_size, size);
+
+ /* return just number of llogs */
+ if (idarray == NULL) {
+ rc = lgi->lgi_attr.la_size / sizeof(*idarray);
+ GOTO(out, rc);
+ }
+
+ /* read for new ost index or for empty file */
+ memset(idarray, 0, size);
+ if (lgi->lgi_attr.la_size < lgi->lgi_off + size)
+ GOTO(out, rc = 0);
+ if (lgi->lgi_attr.la_size < lgi->lgi_off + size)
+ size = lgi->lgi_attr.la_size - lgi->lgi_off;
+
+ lgi->lgi_buf.lb_buf = idarray;
+ lgi->lgi_buf.lb_len = size;
+ rc = dt_record_read(env, o, &lgi->lgi_buf, &lgi->lgi_off);
+ if (rc) {
+ CERROR("%s: error reading CATALOGS: rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name, rc);
+ GOTO(out, rc);
+ }
+
+ EXIT;
+out:
+ lu_object_put(env, &o->do_lu);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_osd_get_cat_list);
+
+/* writes the cat list */
+int llog_osd_put_cat_list(const struct lu_env *env, struct dt_device *d,
+ int idx, int count, struct llog_catid *idarray)
+{
+ struct llog_thread_info *lgi = llog_info(env);
+ struct dt_object *o = NULL;
+ struct thandle *th;
+ int rc, size;
+
+ if (!count)
+ RETURN(0);
+
+ LASSERT(d);
+
+ size = sizeof(*idarray) * count;
+ lgi->lgi_off = idx * sizeof(*idarray);
+
+ lu_local_obj_fid(&lgi->lgi_fid, LLOG_CATALOGS_OID);
+
+ o = dt_locate(env, d, &lgi->lgi_fid);
+ if (IS_ERR(o))
+ RETURN(PTR_ERR(o));
+
+ if (!dt_object_exists(o))
+ GOTO(out, rc = -ENOENT);
+
+ rc = dt_attr_get(env, o, &lgi->lgi_attr, BYPASS_CAPA);
+ if (rc)
+ GOTO(out, rc);
+
+ if (!S_ISREG(lgi->lgi_attr.la_mode)) {
+ CERROR("%s: CATALOGS is not a regular file!: mode = %o\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ lgi->lgi_attr.la_mode);
+ GOTO(out, rc = -ENOENT);
+ }
+
+ th = dt_trans_create(env, d);
+ if (IS_ERR(th))
+ GOTO(out, rc = PTR_ERR(th));
+
+ rc = dt_declare_record_write(env, o, size, lgi->lgi_off, th);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = dt_trans_start_local(env, d, th);
+ if (rc)
+ GOTO(out_trans, rc);
+
+ lgi->lgi_buf.lb_buf = idarray;
+ lgi->lgi_buf.lb_len = size;
+ rc = dt_record_write(env, o, &lgi->lgi_buf, &lgi->lgi_off, th);
+ if (rc)
+ CDEBUG(D_INODE, "error writeing CATALOGS: rc = %d\n", rc);
+out_trans:
+ dt_trans_stop(env, d, th);
+out:
+ lu_object_put(env, &o->do_lu);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_osd_put_cat_list);
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_swab.c b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
new file mode 100644
index 000000000000..dedfecff95bc
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
@@ -0,0 +1,407 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/llog_swab.c
+ *
+ * Swabbing of llog datatypes (from disk or over the wire).
+ *
+ * Author: jacob berkman <jacob@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+
+#include <lustre_log.h>
+
+static void print_llogd_body(struct llogd_body *d)
+{
+ CDEBUG(D_OTHER, "llogd body: %p\n", d);
+ CDEBUG(D_OTHER, "\tlgd_logid.lgl_oi: "DOSTID"\n",
+ POSTID(&d->lgd_logid.lgl_oi));
+ CDEBUG(D_OTHER, "\tlgd_logid.lgl_ogen: %#x\n", d->lgd_logid.lgl_ogen);
+ CDEBUG(D_OTHER, "\tlgd_ctxt_idx: %#x\n", d->lgd_ctxt_idx);
+ CDEBUG(D_OTHER, "\tlgd_llh_flags: %#x\n", d->lgd_llh_flags);
+ CDEBUG(D_OTHER, "\tlgd_index: %#x\n", d->lgd_index);
+ CDEBUG(D_OTHER, "\tlgd_saved_index: %#x\n", d->lgd_saved_index);
+ CDEBUG(D_OTHER, "\tlgd_len: %#x\n", d->lgd_len);
+ CDEBUG(D_OTHER, "\tlgd_cur_offset: "LPX64"\n", d->lgd_cur_offset);
+}
+
+void lustre_swab_lu_fid(struct lu_fid *fid)
+{
+ __swab64s (&fid->f_seq);
+ __swab32s (&fid->f_oid);
+ __swab32s (&fid->f_ver);
+}
+EXPORT_SYMBOL(lustre_swab_lu_fid);
+
+void lustre_swab_ost_id(struct ost_id *oid)
+{
+ if (fid_seq_is_mdt0(oid->oi.oi_seq)) {
+ __swab64s(&oid->oi.oi_id);
+ __swab64s(&oid->oi.oi_seq);
+ } else {
+ lustre_swab_lu_fid(&oid->oi_fid);
+ }
+}
+EXPORT_SYMBOL(lustre_swab_ost_id);
+
+void lustre_swab_llog_id(struct llog_logid *log_id)
+{
+ __swab64s(&log_id->lgl_oi.oi.oi_id);
+ __swab64s(&log_id->lgl_oi.oi.oi_seq);
+ __swab32s(&log_id->lgl_ogen);
+}
+EXPORT_SYMBOL(lustre_swab_llog_id);
+
+void lustre_swab_llogd_body (struct llogd_body *d)
+{
+ ENTRY;
+ print_llogd_body(d);
+ lustre_swab_llog_id(&d->lgd_logid);
+ __swab32s (&d->lgd_ctxt_idx);
+ __swab32s (&d->lgd_llh_flags);
+ __swab32s (&d->lgd_index);
+ __swab32s (&d->lgd_saved_index);
+ __swab32s (&d->lgd_len);
+ __swab64s (&d->lgd_cur_offset);
+ print_llogd_body(d);
+ EXIT;
+}
+EXPORT_SYMBOL(lustre_swab_llogd_body);
+
+void lustre_swab_llogd_conn_body (struct llogd_conn_body *d)
+{
+ __swab64s (&d->lgdc_gen.mnt_cnt);
+ __swab64s (&d->lgdc_gen.conn_cnt);
+ lustre_swab_llog_id(&d->lgdc_logid);
+ __swab32s (&d->lgdc_ctxt_idx);
+}
+EXPORT_SYMBOL(lustre_swab_llogd_conn_body);
+
+void lustre_swab_ll_fid(struct ll_fid *fid)
+{
+ __swab64s (&fid->id);
+ __swab32s (&fid->generation);
+ __swab32s (&fid->f_type);
+}
+EXPORT_SYMBOL(lustre_swab_ll_fid);
+
+void lustre_swab_lu_seq_range(struct lu_seq_range *range)
+{
+ __swab64s (&range->lsr_start);
+ __swab64s (&range->lsr_end);
+ __swab32s (&range->lsr_index);
+ __swab32s (&range->lsr_flags);
+}
+EXPORT_SYMBOL(lustre_swab_lu_seq_range);
+
+void lustre_swab_llog_rec(struct llog_rec_hdr *rec)
+{
+ struct llog_rec_tail *tail = NULL;
+
+ __swab32s(&rec->lrh_len);
+ __swab32s(&rec->lrh_index);
+ __swab32s(&rec->lrh_type);
+ __swab32s(&rec->lrh_id);
+
+ switch (rec->lrh_type) {
+ case OST_SZ_REC:
+ {
+ struct llog_size_change_rec *lsc =
+ (struct llog_size_change_rec *)rec;
+
+ lustre_swab_ll_fid(&lsc->lsc_fid);
+ __swab32s(&lsc->lsc_ioepoch);
+ tail = &lsc->lsc_tail;
+ break;
+ }
+ case MDS_UNLINK_REC:
+ {
+ struct llog_unlink_rec *lur = (struct llog_unlink_rec *)rec;
+
+ __swab64s(&lur->lur_oid);
+ __swab32s(&lur->lur_oseq);
+ __swab32s(&lur->lur_count);
+ tail = &lur->lur_tail;
+ break;
+ }
+ case MDS_UNLINK64_REC:
+ {
+ struct llog_unlink64_rec *lur =
+ (struct llog_unlink64_rec *)rec;
+
+ lustre_swab_lu_fid(&lur->lur_fid);
+ __swab32s(&lur->lur_count);
+ tail = &lur->lur_tail;
+ break;
+ }
+ case CHANGELOG_REC:
+ {
+ struct llog_changelog_rec *cr = (struct llog_changelog_rec*)rec;
+
+ __swab16s(&cr->cr.cr_namelen);
+ __swab16s(&cr->cr.cr_flags);
+ __swab32s(&cr->cr.cr_type);
+ __swab64s(&cr->cr.cr_index);
+ __swab64s(&cr->cr.cr_prev);
+ __swab64s(&cr->cr.cr_time);
+ lustre_swab_lu_fid(&cr->cr.cr_tfid);
+ lustre_swab_lu_fid(&cr->cr.cr_pfid);
+ if (CHANGELOG_REC_EXTENDED(&cr->cr)) {
+ struct llog_changelog_ext_rec *ext =
+ (struct llog_changelog_ext_rec *)rec;
+
+ lustre_swab_lu_fid(&ext->cr.cr_sfid);
+ lustre_swab_lu_fid(&ext->cr.cr_spfid);
+ tail = &ext->cr_tail;
+ } else {
+ tail = &cr->cr_tail;
+ }
+ break;
+ }
+ case CHANGELOG_USER_REC:
+ {
+ struct llog_changelog_user_rec *cur =
+ (struct llog_changelog_user_rec*)rec;
+
+ __swab32s(&cur->cur_id);
+ __swab64s(&cur->cur_endrec);
+ tail = &cur->cur_tail;
+ break;
+ }
+
+ case MDS_SETATTR64_REC:
+ {
+ struct llog_setattr64_rec *lsr =
+ (struct llog_setattr64_rec *)rec;
+
+ lustre_swab_ost_id(&lsr->lsr_oi);
+ __swab32s(&lsr->lsr_uid);
+ __swab32s(&lsr->lsr_uid_h);
+ __swab32s(&lsr->lsr_gid);
+ __swab32s(&lsr->lsr_gid_h);
+ tail = &lsr->lsr_tail;
+ break;
+ }
+ case OBD_CFG_REC:
+ /* these are swabbed as they are consumed */
+ break;
+ case LLOG_HDR_MAGIC:
+ {
+ struct llog_log_hdr *llh = (struct llog_log_hdr *)rec;
+
+ __swab64s(&llh->llh_timestamp);
+ __swab32s(&llh->llh_count);
+ __swab32s(&llh->llh_bitmap_offset);
+ __swab32s(&llh->llh_flags);
+ __swab32s(&llh->llh_size);
+ __swab32s(&llh->llh_cat_idx);
+ tail = &llh->llh_tail;
+ break;
+ }
+ case LLOG_LOGID_MAGIC:
+ {
+ struct llog_logid_rec *lid = (struct llog_logid_rec *)rec;
+
+ lustre_swab_llog_id(&lid->lid_id);
+ tail = &lid->lid_tail;
+ break;
+ }
+ case LLOG_GEN_REC:
+ {
+ struct llog_gen_rec *lgr = (struct llog_gen_rec *)rec;
+
+ __swab64s(&lgr->lgr_gen.mnt_cnt);
+ __swab64s(&lgr->lgr_gen.conn_cnt);
+ tail = &lgr->lgr_tail;
+ break;
+ }
+ case LLOG_PAD_MAGIC:
+ break;
+ default:
+ CERROR("Unknown llog rec type %#x swabbing rec %p\n",
+ rec->lrh_type, rec);
+ }
+
+ if (tail) {
+ __swab32s(&tail->lrt_len);
+ __swab32s(&tail->lrt_index);
+ }
+}
+EXPORT_SYMBOL(lustre_swab_llog_rec);
+
+static void print_llog_hdr(struct llog_log_hdr *h)
+{
+ CDEBUG(D_OTHER, "llog header: %p\n", h);
+ CDEBUG(D_OTHER, "\tllh_hdr.lrh_index: %#x\n", h->llh_hdr.lrh_index);
+ CDEBUG(D_OTHER, "\tllh_hdr.lrh_len: %#x\n", h->llh_hdr.lrh_len);
+ CDEBUG(D_OTHER, "\tllh_hdr.lrh_type: %#x\n", h->llh_hdr.lrh_type);
+ CDEBUG(D_OTHER, "\tllh_timestamp: "LPX64"\n", h->llh_timestamp);
+ CDEBUG(D_OTHER, "\tllh_count: %#x\n", h->llh_count);
+ CDEBUG(D_OTHER, "\tllh_bitmap_offset: %#x\n", h->llh_bitmap_offset);
+ CDEBUG(D_OTHER, "\tllh_flags: %#x\n", h->llh_flags);
+ CDEBUG(D_OTHER, "\tllh_size: %#x\n", h->llh_size);
+ CDEBUG(D_OTHER, "\tllh_cat_idx: %#x\n", h->llh_cat_idx);
+ CDEBUG(D_OTHER, "\tllh_tail.lrt_index: %#x\n", h->llh_tail.lrt_index);
+ CDEBUG(D_OTHER, "\tllh_tail.lrt_len: %#x\n", h->llh_tail.lrt_len);
+}
+
+void lustre_swab_llog_hdr (struct llog_log_hdr *h)
+{
+ ENTRY;
+ print_llog_hdr(h);
+
+ lustre_swab_llog_rec(&h->llh_hdr);
+
+ print_llog_hdr(h);
+ EXIT;
+}
+EXPORT_SYMBOL(lustre_swab_llog_hdr);
+
+static void print_lustre_cfg(struct lustre_cfg *lcfg)
+{
+ int i;
+ ENTRY;
+
+ if (!(libcfs_debug & D_OTHER)) /* don't loop on nothing */
+ return;
+ CDEBUG(D_OTHER, "lustre_cfg: %p\n", lcfg);
+ CDEBUG(D_OTHER, "\tlcfg->lcfg_version: %#x\n", lcfg->lcfg_version);
+
+ CDEBUG(D_OTHER, "\tlcfg->lcfg_command: %#x\n", lcfg->lcfg_command);
+ CDEBUG(D_OTHER, "\tlcfg->lcfg_num: %#x\n", lcfg->lcfg_num);
+ CDEBUG(D_OTHER, "\tlcfg->lcfg_flags: %#x\n", lcfg->lcfg_flags);
+ CDEBUG(D_OTHER, "\tlcfg->lcfg_nid: %s\n", libcfs_nid2str(lcfg->lcfg_nid));
+
+ CDEBUG(D_OTHER, "\tlcfg->lcfg_bufcount: %d\n", lcfg->lcfg_bufcount);
+ if (lcfg->lcfg_bufcount < LUSTRE_CFG_MAX_BUFCOUNT)
+ for (i = 0; i < lcfg->lcfg_bufcount; i++)
+ CDEBUG(D_OTHER, "\tlcfg->lcfg_buflens[%d]: %d\n",
+ i, lcfg->lcfg_buflens[i]);
+ EXIT;
+}
+
+void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg)
+{
+ int i;
+ ENTRY;
+
+ __swab32s(&lcfg->lcfg_version);
+
+ if (lcfg->lcfg_version != LUSTRE_CFG_VERSION) {
+ CERROR("not swabbing lustre_cfg version %#x (expecting %#x)\n",
+ lcfg->lcfg_version, LUSTRE_CFG_VERSION);
+ EXIT;
+ return;
+ }
+
+ __swab32s(&lcfg->lcfg_command);
+ __swab32s(&lcfg->lcfg_num);
+ __swab32s(&lcfg->lcfg_flags);
+ __swab64s(&lcfg->lcfg_nid);
+ __swab32s(&lcfg->lcfg_bufcount);
+ for (i = 0; i < lcfg->lcfg_bufcount && i < LUSTRE_CFG_MAX_BUFCOUNT; i++)
+ __swab32s(&lcfg->lcfg_buflens[i]);
+
+ print_lustre_cfg(lcfg);
+ EXIT;
+ return;
+}
+EXPORT_SYMBOL(lustre_swab_lustre_cfg);
+
+/* used only for compatibility with old on-disk cfg_marker data */
+struct cfg_marker32 {
+ __u32 cm_step;
+ __u32 cm_flags;
+ __u32 cm_vers;
+ __u32 padding;
+ __u32 cm_createtime;
+ __u32 cm_canceltime;
+ char cm_tgtname[MTI_NAME_MAXLEN];
+ char cm_comment[MTI_NAME_MAXLEN];
+};
+
+#define MTI_NAMELEN32 (MTI_NAME_MAXLEN - \
+ (sizeof(struct cfg_marker) - sizeof(struct cfg_marker32)))
+
+void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size)
+{
+ struct cfg_marker32 *cm32 = (struct cfg_marker32*)marker;
+ ENTRY;
+
+ if (swab) {
+ __swab32s(&marker->cm_step);
+ __swab32s(&marker->cm_flags);
+ __swab32s(&marker->cm_vers);
+ }
+ if (size == sizeof(*cm32)) {
+ __u32 createtime, canceltime;
+ /* There was a problem with the original declaration of
+ * cfg_marker on 32-bit systems because it used time_t as
+ * a wire protocol structure, and didn't verify this in
+ * wirecheck. We now have to convert the offsets of the
+ * later fields in order to work on 32- and 64-bit systems.
+ *
+ * Fortunately, the cm_comment field has no functional use
+ * so can be sacrificed when converting the timestamp size.
+ *
+ * Overwrite fields from the end first, so they are not
+ * clobbered, and use memmove() instead of memcpy() because
+ * the source and target buffers overlap. bug 16771 */
+ createtime = cm32->cm_createtime;
+ canceltime = cm32->cm_canceltime;
+ memmove(marker->cm_comment, cm32->cm_comment, MTI_NAMELEN32);
+ marker->cm_comment[MTI_NAMELEN32 - 1] = '\0';
+ memmove(marker->cm_tgtname, cm32->cm_tgtname,
+ sizeof(marker->cm_tgtname));
+ if (swab) {
+ __swab32s(&createtime);
+ __swab32s(&canceltime);
+ }
+ marker->cm_createtime = createtime;
+ marker->cm_canceltime = canceltime;
+ CDEBUG(D_CONFIG, "Find old cfg_marker(Srv32b,Clt64b) "
+ "for target %s, converting\n",
+ marker->cm_tgtname);
+ } else if (swab) {
+ __swab64s(&marker->cm_createtime);
+ __swab64s(&marker->cm_canceltime);
+ }
+
+ EXIT;
+ return;
+}
+EXPORT_SYMBOL(lustre_swab_cfg_marker);
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_test.c b/drivers/staging/lustre/lustre/obdclass/llog_test.c
new file mode 100644
index 000000000000..d397f781ec43
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/llog_test.c
@@ -0,0 +1,1087 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/llog_test.c
+ *
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Mikhail Pershin <mike.pershin@intel.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <obd_class.h>
+#include <lustre_fid.h>
+#include <lustre_log.h>
+
+/* This is slightly more than the number of records that can fit into a
+ * single llog file, because the llog_log_header takes up some of the
+ * space in the first block that cannot be used for the bitmap. */
+#define LLOG_TEST_RECNUM (LLOG_CHUNK_SIZE * 8)
+
+static int llog_test_rand;
+static struct obd_uuid uuid = { .uuid = "test_uuid" };
+static struct llog_logid cat_logid;
+
+struct llog_mini_rec {
+ struct llog_rec_hdr lmr_hdr;
+ struct llog_rec_tail lmr_tail;
+} __attribute__((packed));
+
+static int verify_handle(char *test, struct llog_handle *llh, int num_recs)
+{
+ int i;
+ int last_idx = 0;
+ int active_recs = 0;
+
+ for (i = 0; i < LLOG_BITMAP_BYTES * 8; i++) {
+ if (ext2_test_bit(i, llh->lgh_hdr->llh_bitmap)) {
+ last_idx = i;
+ active_recs++;
+ }
+ }
+
+ if (active_recs != num_recs) {
+ CERROR("%s: expected %d active recs after write, found %d\n",
+ test, num_recs, active_recs);
+ RETURN(-ERANGE);
+ }
+
+ if (llh->lgh_hdr->llh_count != num_recs) {
+ CERROR("%s: handle->count is %d, expected %d after write\n",
+ test, llh->lgh_hdr->llh_count, num_recs);
+ RETURN(-ERANGE);
+ }
+
+ if (llh->lgh_last_idx < last_idx) {
+ CERROR("%s: handle->last_idx is %d, expected %d after write\n",
+ test, llh->lgh_last_idx, last_idx);
+ RETURN(-ERANGE);
+ }
+
+ RETURN(0);
+}
+
+/* Test named-log create/open, close */
+static int llog_test_1(const struct lu_env *env,
+ struct obd_device *obd, char *name)
+{
+ struct llog_handle *llh;
+ struct llog_ctxt *ctxt;
+ int rc;
+ int rc2;
+
+ ENTRY;
+
+ CWARN("1a: create a log with name: %s\n", name);
+ ctxt = llog_get_context(obd, LLOG_TEST_ORIG_CTXT);
+ LASSERT(ctxt);
+
+ rc = llog_open_create(env, ctxt, &llh, NULL, name);
+ if (rc) {
+ CERROR("1a: llog_create with name %s failed: %d\n", name, rc);
+ GOTO(out, rc);
+ }
+ rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, &uuid);
+ if (rc) {
+ CERROR("1a: can't init llog handle: %d\n", rc);
+ GOTO(out_close, rc);
+ }
+
+ rc = verify_handle("1", llh, 1);
+
+ CWARN("1b: close newly-created log\n");
+out_close:
+ rc2 = llog_close(env, llh);
+ if (rc2) {
+ CERROR("1b: close log %s failed: %d\n", name, rc2);
+ if (rc == 0)
+ rc = rc2;
+ }
+out:
+ llog_ctxt_put(ctxt);
+ RETURN(rc);
+}
+
+/* Test named-log reopen; returns opened log on success */
+static int llog_test_2(const struct lu_env *env, struct obd_device *obd,
+ char *name, struct llog_handle **llh)
+{
+ struct llog_ctxt *ctxt;
+ struct llog_handle *loghandle;
+ struct llog_logid logid;
+ int rc;
+
+ ENTRY;
+
+ CWARN("2a: re-open a log with name: %s\n", name);
+ ctxt = llog_get_context(obd, LLOG_TEST_ORIG_CTXT);
+ LASSERT(ctxt);
+
+ rc = llog_open(env, ctxt, llh, NULL, name, LLOG_OPEN_EXISTS);
+ if (rc) {
+ CERROR("2a: re-open log with name %s failed: %d\n", name, rc);
+ GOTO(out_put, rc);
+ }
+
+ rc = llog_init_handle(env, *llh, LLOG_F_IS_PLAIN, &uuid);
+ if (rc) {
+ CERROR("2a: can't init llog handle: %d\n", rc);
+ GOTO(out_close_llh, rc);
+ }
+
+ rc = verify_handle("2", *llh, 1);
+ if (rc)
+ GOTO(out_close_llh, rc);
+
+ /* XXX: there is known issue with tests 2b, MGS is not able to create
+ * anonymous llog, exit now to allow following tests run.
+ * It is fixed in upcoming llog over OSD code */
+ GOTO(out_put, rc);
+
+ CWARN("2b: create a log without specified NAME & LOGID\n");
+ rc = llog_open_create(env, ctxt, &loghandle, NULL, NULL);
+ if (rc) {
+ CERROR("2b: create log failed\n");
+ GOTO(out_close_llh, rc);
+ }
+ rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, &uuid);
+ if (rc) {
+ CERROR("2b: can't init llog handle: %d\n", rc);
+ GOTO(out_close, rc);
+ }
+
+ logid = loghandle->lgh_id;
+ llog_close(env, loghandle);
+
+ CWARN("2c: re-open the log by LOGID\n");
+ rc = llog_open(env, ctxt, &loghandle, &logid, NULL, LLOG_OPEN_EXISTS);
+ if (rc) {
+ CERROR("2c: re-open log by LOGID failed\n");
+ GOTO(out_close_llh, rc);
+ }
+
+ rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, &uuid);
+ if (rc) {
+ CERROR("2c: can't init llog handle: %d\n", rc);
+ GOTO(out_close, rc);
+ }
+
+ CWARN("2b: destroy this log\n");
+ rc = llog_destroy(env, loghandle);
+ if (rc)
+ CERROR("2d: destroy log failed\n");
+out_close:
+ llog_close(env, loghandle);
+out_close_llh:
+ if (rc)
+ llog_close(env, *llh);
+out_put:
+ llog_ctxt_put(ctxt);
+
+ RETURN(rc);
+}
+
+/* Test record writing, single and in bulk */
+static int llog_test_3(const struct lu_env *env, struct obd_device *obd,
+ struct llog_handle *llh)
+{
+ struct llog_gen_rec lgr;
+ int rc, i;
+ int num_recs = 1; /* 1 for the header */
+
+ ENTRY;
+
+ lgr.lgr_hdr.lrh_len = lgr.lgr_tail.lrt_len = sizeof(lgr);
+ lgr.lgr_hdr.lrh_type = LLOG_GEN_REC;
+
+ CWARN("3a: write one create_rec\n");
+ rc = llog_write(env, llh, &lgr.lgr_hdr, NULL, 0, NULL, -1);
+ num_recs++;
+ if (rc < 0) {
+ CERROR("3a: write one log record failed: %d\n", rc);
+ RETURN(rc);
+ }
+
+ rc = verify_handle("3a", llh, num_recs);
+ if (rc)
+ RETURN(rc);
+
+ CWARN("3b: write 10 cfg log records with 8 bytes bufs\n");
+ for (i = 0; i < 10; i++) {
+ struct llog_rec_hdr hdr;
+ char buf[8];
+
+ hdr.lrh_len = 8;
+ hdr.lrh_type = OBD_CFG_REC;
+ memset(buf, 0, sizeof buf);
+ rc = llog_write(env, llh, &hdr, NULL, 0, buf, -1);
+ if (rc < 0) {
+ CERROR("3b: write 10 records failed at #%d: %d\n",
+ i + 1, rc);
+ RETURN(rc);
+ }
+ num_recs++;
+ }
+
+ rc = verify_handle("3b", llh, num_recs);
+ if (rc)
+ RETURN(rc);
+
+ CWARN("3c: write 1000 more log records\n");
+ for (i = 0; i < 1000; i++) {
+ rc = llog_write(env, llh, &lgr.lgr_hdr, NULL, 0, NULL, -1);
+ if (rc < 0) {
+ CERROR("3c: write 1000 records failed at #%d: %d\n",
+ i + 1, rc);
+ RETURN(rc);
+ }
+ num_recs++;
+ }
+
+ rc = verify_handle("3c", llh, num_recs);
+ if (rc)
+ RETURN(rc);
+
+ CWARN("3d: write log more than BITMAP_SIZE, return -ENOSPC\n");
+ for (i = 0; i < LLOG_BITMAP_SIZE(llh->lgh_hdr) + 1; i++) {
+ struct llog_rec_hdr hdr;
+ char buf_even[24];
+ char buf_odd[32];
+
+ memset(buf_odd, 0, sizeof buf_odd);
+ memset(buf_even, 0, sizeof buf_even);
+ if ((i % 2) == 0) {
+ hdr.lrh_len = 24;
+ hdr.lrh_type = OBD_CFG_REC;
+ rc = llog_write(env, llh, &hdr, NULL, 0, buf_even, -1);
+ } else {
+ hdr.lrh_len = 32;
+ hdr.lrh_type = OBD_CFG_REC;
+ rc = llog_write(env, llh, &hdr, NULL, 0, buf_odd, -1);
+ }
+ if (rc == -ENOSPC) {
+ break;
+ } else if (rc < 0) {
+ CERROR("3d: write recs failed at #%d: %d\n",
+ i + 1, rc);
+ RETURN(rc);
+ }
+ num_recs++;
+ }
+ if (rc != -ENOSPC) {
+ CWARN("3d: write record more than BITMAP size!\n");
+ RETURN(-EINVAL);
+ }
+ CWARN("3d: wrote %d more records before end of llog is reached\n",
+ num_recs);
+
+ rc = verify_handle("3d", llh, num_recs);
+
+ RETURN(rc);
+}
+
+/* Test catalogue additions */
+static int llog_test_4(const struct lu_env *env, struct obd_device *obd)
+{
+ struct llog_handle *cath;
+ char name[10];
+ int rc, rc2, i, buflen;
+ struct llog_mini_rec lmr;
+ struct llog_cookie cookie;
+ struct llog_ctxt *ctxt;
+ int num_recs = 0;
+ char *buf;
+ struct llog_rec_hdr rec;
+
+ ENTRY;
+
+ ctxt = llog_get_context(obd, LLOG_TEST_ORIG_CTXT);
+ LASSERT(ctxt);
+
+ lmr.lmr_hdr.lrh_len = lmr.lmr_tail.lrt_len = LLOG_MIN_REC_SIZE;
+ lmr.lmr_hdr.lrh_type = 0xf00f00;
+
+ sprintf(name, "%x", llog_test_rand + 1);
+ CWARN("4a: create a catalog log with name: %s\n", name);
+ rc = llog_open_create(env, ctxt, &cath, NULL, name);
+ if (rc) {
+ CERROR("4a: llog_create with name %s failed: %d\n", name, rc);
+ GOTO(ctxt_release, rc);
+ }
+ rc = llog_init_handle(env, cath, LLOG_F_IS_CAT, &uuid);
+ if (rc) {
+ CERROR("4a: can't init llog handle: %d\n", rc);
+ GOTO(out, rc);
+ }
+
+ num_recs++;
+ cat_logid = cath->lgh_id;
+
+ CWARN("4b: write 1 record into the catalog\n");
+ rc = llog_cat_add(env, cath, &lmr.lmr_hdr, &cookie, NULL);
+ if (rc != 1) {
+ CERROR("4b: write 1 catalog record failed at: %d\n", rc);
+ GOTO(out, rc);
+ }
+ num_recs++;
+ rc = verify_handle("4b", cath, 2);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = verify_handle("4b", cath->u.chd.chd_current_log, num_recs);
+ if (rc)
+ GOTO(out, rc);
+
+ CWARN("4c: cancel 1 log record\n");
+ rc = llog_cat_cancel_records(env, cath, 1, &cookie);
+ if (rc) {
+ CERROR("4c: cancel 1 catalog based record failed: %d\n", rc);
+ GOTO(out, rc);
+ }
+ num_recs--;
+
+ rc = verify_handle("4c", cath->u.chd.chd_current_log, num_recs);
+ if (rc)
+ GOTO(out, rc);
+
+ CWARN("4d: write %d more log records\n", LLOG_TEST_RECNUM);
+ for (i = 0; i < LLOG_TEST_RECNUM; i++) {
+ rc = llog_cat_add(env, cath, &lmr.lmr_hdr, NULL, NULL);
+ if (rc) {
+ CERROR("4d: write %d records failed at #%d: %d\n",
+ LLOG_TEST_RECNUM, i + 1, rc);
+ GOTO(out, rc);
+ }
+ num_recs++;
+ }
+
+ /* make sure new plain llog appears */
+ rc = verify_handle("4d", cath, 3);
+ if (rc)
+ GOTO(out, rc);
+
+ CWARN("4e: add 5 large records, one record per block\n");
+ buflen = LLOG_CHUNK_SIZE - sizeof(struct llog_rec_hdr) -
+ sizeof(struct llog_rec_tail);
+ OBD_ALLOC(buf, buflen);
+ if (buf == NULL)
+ GOTO(out, rc = -ENOMEM);
+ for (i = 0; i < 5; i++) {
+ rec.lrh_len = buflen;
+ rec.lrh_type = OBD_CFG_REC;
+ rc = llog_cat_add(env, cath, &rec, NULL, buf);
+ if (rc) {
+ CERROR("4e: write 5 records failed at #%d: %d\n",
+ i + 1, rc);
+ GOTO(out_free, rc);
+ }
+ num_recs++;
+ }
+out_free:
+ OBD_FREE(buf, buflen);
+out:
+ CWARN("4f: put newly-created catalog\n");
+ rc2 = llog_cat_close(env, cath);
+ if (rc2) {
+ CERROR("4: close log %s failed: %d\n", name, rc2);
+ if (rc == 0)
+ rc = rc2;
+ }
+ctxt_release:
+ llog_ctxt_put(ctxt);
+ RETURN(rc);
+}
+
+static int cat_counter;
+
+static int cat_print_cb(const struct lu_env *env, struct llog_handle *llh,
+ struct llog_rec_hdr *rec, void *data)
+{
+ struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
+ struct lu_fid fid = {0};
+
+ if (rec->lrh_type != LLOG_LOGID_MAGIC) {
+ CERROR("invalid record in catalog\n");
+ RETURN(-EINVAL);
+ }
+
+ logid_to_fid(&lir->lid_id, &fid);
+
+ CWARN("seeing record at index %d - "DFID" in log "DFID"\n",
+ rec->lrh_index, PFID(&fid),
+ PFID(lu_object_fid(&llh->lgh_obj->do_lu)));
+
+ cat_counter++;
+
+ RETURN(0);
+}
+
+static int plain_counter;
+
+static int plain_print_cb(const struct lu_env *env, struct llog_handle *llh,
+ struct llog_rec_hdr *rec, void *data)
+{
+ struct lu_fid fid = {0};
+
+ if (!(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)) {
+ CERROR("log is not plain\n");
+ RETURN(-EINVAL);
+ }
+
+ logid_to_fid(&llh->lgh_id, &fid);
+
+ CDEBUG(D_INFO, "seeing record at index %d in log "DFID"\n",
+ rec->lrh_index, PFID(&fid));
+
+ plain_counter++;
+
+ RETURN(0);
+}
+
+static int cancel_count;
+
+static int llog_cancel_rec_cb(const struct lu_env *env,
+ struct llog_handle *llh,
+ struct llog_rec_hdr *rec, void *data)
+{
+ struct llog_cookie cookie;
+
+ if (!(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)) {
+ CERROR("log is not plain\n");
+ RETURN(-EINVAL);
+ }
+
+ cookie.lgc_lgl = llh->lgh_id;
+ cookie.lgc_index = rec->lrh_index;
+
+ llog_cat_cancel_records(env, llh->u.phd.phd_cat_handle, 1, &cookie);
+ cancel_count++;
+ if (cancel_count == LLOG_TEST_RECNUM)
+ RETURN(-LLOG_EEMPTY);
+ RETURN(0);
+}
+
+/* Test log and catalogue processing */
+static int llog_test_5(const struct lu_env *env, struct obd_device *obd)
+{
+ struct llog_handle *llh = NULL;
+ char name[10];
+ int rc, rc2;
+ struct llog_mini_rec lmr;
+ struct llog_ctxt *ctxt;
+
+ ENTRY;
+
+ ctxt = llog_get_context(obd, LLOG_TEST_ORIG_CTXT);
+ LASSERT(ctxt);
+
+ lmr.lmr_hdr.lrh_len = lmr.lmr_tail.lrt_len = LLOG_MIN_REC_SIZE;
+ lmr.lmr_hdr.lrh_type = 0xf00f00;
+
+ CWARN("5a: re-open catalog by id\n");
+ rc = llog_open(env, ctxt, &llh, &cat_logid, NULL, LLOG_OPEN_EXISTS);
+ if (rc) {
+ CERROR("5a: llog_create with logid failed: %d\n", rc);
+ GOTO(out_put, rc);
+ }
+
+ rc = llog_init_handle(env, llh, LLOG_F_IS_CAT, &uuid);
+ if (rc) {
+ CERROR("5a: can't init llog handle: %d\n", rc);
+ GOTO(out, rc);
+ }
+
+ CWARN("5b: print the catalog entries.. we expect 2\n");
+ cat_counter = 0;
+ rc = llog_process(env, llh, cat_print_cb, "test 5", NULL);
+ if (rc) {
+ CERROR("5b: process with cat_print_cb failed: %d\n", rc);
+ GOTO(out, rc);
+ }
+ if (cat_counter != 2) {
+ CERROR("5b: %d entries in catalog\n", cat_counter);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ CWARN("5c: Cancel %d records, see one log zapped\n", LLOG_TEST_RECNUM);
+ cancel_count = 0;
+ rc = llog_cat_process(env, llh, llog_cancel_rec_cb, "foobar", 0, 0);
+ if (rc != -LLOG_EEMPTY) {
+ CERROR("5c: process with cat_cancel_cb failed: %d\n", rc);
+ GOTO(out, rc);
+ }
+
+ CWARN("5c: print the catalog entries.. we expect 1\n");
+ cat_counter = 0;
+ rc = llog_process(env, llh, cat_print_cb, "test 5", NULL);
+ if (rc) {
+ CERROR("5c: process with cat_print_cb failed: %d\n", rc);
+ GOTO(out, rc);
+ }
+ if (cat_counter != 1) {
+ CERROR("5c: %d entries in catalog\n", cat_counter);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ CWARN("5d: add 1 record to the log with many canceled empty pages\n");
+ rc = llog_cat_add(env, llh, &lmr.lmr_hdr, NULL, NULL);
+ if (rc) {
+ CERROR("5d: add record to the log with many canceled empty "
+ "pages failed\n");
+ GOTO(out, rc);
+ }
+
+ CWARN("5e: print plain log entries.. expect 6\n");
+ plain_counter = 0;
+ rc = llog_cat_process(env, llh, plain_print_cb, "foobar", 0, 0);
+ if (rc) {
+ CERROR("5e: process with plain_print_cb failed: %d\n", rc);
+ GOTO(out, rc);
+ }
+ if (plain_counter != 6) {
+ CERROR("5e: found %d records\n", plain_counter);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ CWARN("5f: print plain log entries reversely.. expect 6\n");
+ plain_counter = 0;
+ rc = llog_cat_reverse_process(env, llh, plain_print_cb, "foobar");
+ if (rc) {
+ CERROR("5f: reversely process with plain_print_cb failed:"
+ "%d\n", rc);
+ GOTO(out, rc);
+ }
+ if (plain_counter != 6) {
+ CERROR("5f: found %d records\n", plain_counter);
+ GOTO(out, rc = -EINVAL);
+ }
+
+out:
+ CWARN("5g: close re-opened catalog\n");
+ rc2 = llog_cat_close(env, llh);
+ if (rc2) {
+ CERROR("5g: close log %s failed: %d\n", name, rc2);
+ if (rc == 0)
+ rc = rc2;
+ }
+out_put:
+ llog_ctxt_put(ctxt);
+
+ RETURN(rc);
+}
+
+/* Test client api; open log by name and process */
+static int llog_test_6(const struct lu_env *env, struct obd_device *obd,
+ char *name)
+{
+ struct obd_device *mgc_obd;
+ struct llog_ctxt *ctxt;
+ struct obd_uuid *mgs_uuid;
+ struct obd_export *exp;
+ struct obd_uuid uuid = { "LLOG_TEST6_UUID" };
+ struct llog_handle *llh = NULL;
+ struct llog_ctxt *nctxt;
+ int rc, rc2;
+
+ ctxt = llog_get_context(obd, LLOG_TEST_ORIG_CTXT);
+ LASSERT(ctxt);
+ mgs_uuid = &ctxt->loc_exp->exp_obd->obd_uuid;
+
+ CWARN("6a: re-open log %s using client API\n", name);
+ mgc_obd = class_find_client_obd(mgs_uuid, LUSTRE_MGC_NAME, NULL);
+ if (mgc_obd == NULL) {
+ CERROR("6a: no MGC devices connected to %s found.\n",
+ mgs_uuid->uuid);
+ GOTO(ctxt_release, rc = -ENOENT);
+ }
+
+ rc = obd_connect(NULL, &exp, mgc_obd, &uuid,
+ NULL /* obd_connect_data */, NULL);
+ if (rc != -EALREADY) {
+ CERROR("6a: connect on connected MGC (%s) failed to return"
+ " -EALREADY", mgc_obd->obd_name);
+ if (rc == 0)
+ obd_disconnect(exp);
+ GOTO(ctxt_release, rc = -EINVAL);
+ }
+
+ nctxt = llog_get_context(mgc_obd, LLOG_CONFIG_REPL_CTXT);
+ rc = llog_open(env, nctxt, &llh, NULL, name, LLOG_OPEN_EXISTS);
+ if (rc) {
+ CERROR("6a: llog_open failed %d\n", rc);
+ GOTO(nctxt_put, rc);
+ }
+
+ rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
+ if (rc) {
+ CERROR("6a: llog_init_handle failed %d\n", rc);
+ GOTO(parse_out, rc);
+ }
+
+ plain_counter = 1; /* llog header is first record */
+ CWARN("6b: process log %s using client API\n", name);
+ rc = llog_process(env, llh, plain_print_cb, NULL, NULL);
+ if (rc)
+ CERROR("6b: llog_process failed %d\n", rc);
+ CWARN("6b: processed %d records\n", plain_counter);
+
+ rc = verify_handle("6b", llh, plain_counter);
+ if (rc)
+ GOTO(parse_out, rc);
+
+ plain_counter = 1; /* llog header is first record */
+ CWARN("6c: process log %s reversely using client API\n", name);
+ rc = llog_reverse_process(env, llh, plain_print_cb, NULL, NULL);
+ if (rc)
+ CERROR("6c: llog_reverse_process failed %d\n", rc);
+ CWARN("6c: processed %d records\n", plain_counter);
+
+ rc = verify_handle("6c", llh, plain_counter);
+ if (rc)
+ GOTO(parse_out, rc);
+
+parse_out:
+ rc2 = llog_close(env, llh);
+ if (rc2) {
+ CERROR("6: llog_close failed: rc = %d\n", rc2);
+ if (rc == 0)
+ rc = rc2;
+ }
+nctxt_put:
+ llog_ctxt_put(nctxt);
+ctxt_release:
+ llog_ctxt_put(ctxt);
+ RETURN(rc);
+}
+
+static union {
+ struct llog_rec_hdr lrh; /* common header */
+ struct llog_logid_rec llr; /* LLOG_LOGID_MAGIC */
+ struct llog_unlink64_rec lur; /* MDS_UNLINK64_REC */
+ struct llog_setattr64_rec lsr64; /* MDS_SETATTR64_REC */
+ struct llog_size_change_rec lscr; /* OST_SZ_REC */
+ struct llog_changelog_rec lcr; /* CHANGELOG_REC */
+ struct llog_changelog_user_rec lcur; /* CHANGELOG_USER_REC */
+ struct llog_gen_rec lgr; /* LLOG_GEN_REC */
+} llog_records;
+
+static int test_7_print_cb(const struct lu_env *env, struct llog_handle *llh,
+ struct llog_rec_hdr *rec, void *data)
+{
+ struct lu_fid fid = {0};
+
+ logid_to_fid(&llh->lgh_id, &fid);
+
+ CDEBUG(D_OTHER, "record type %#x at index %d in log "DFID"\n",
+ rec->lrh_type, rec->lrh_index, PFID(&fid));
+
+ plain_counter++;
+ return 0;
+}
+
+static int test_7_cancel_cb(const struct lu_env *env, struct llog_handle *llh,
+ struct llog_rec_hdr *rec, void *data)
+{
+ plain_counter++;
+ /* test LLOG_DEL_RECORD is working */
+ return LLOG_DEL_RECORD;
+}
+
+static int llog_test_7_sub(const struct lu_env *env, struct llog_ctxt *ctxt)
+{
+ struct llog_handle *llh;
+ int rc = 0, i, process_count;
+ int num_recs = 0;
+
+ ENTRY;
+
+ rc = llog_open_create(env, ctxt, &llh, NULL, NULL);
+ if (rc) {
+ CERROR("7_sub: create log failed\n");
+ RETURN(rc);
+ }
+
+ rc = llog_init_handle(env, llh,
+ LLOG_F_IS_PLAIN | LLOG_F_ZAP_WHEN_EMPTY,
+ &uuid);
+ if (rc) {
+ CERROR("7_sub: can't init llog handle: %d\n", rc);
+ GOTO(out_close, rc);
+ }
+ for (i = 0; i < LLOG_BITMAP_SIZE(llh->lgh_hdr); i++) {
+ rc = llog_write(env, llh, &llog_records.lrh, NULL, 0,
+ NULL, -1);
+ if (rc == -ENOSPC) {
+ break;
+ } else if (rc < 0) {
+ CERROR("7_sub: write recs failed at #%d: %d\n",
+ i + 1, rc);
+ GOTO(out_close, rc);
+ }
+ num_recs++;
+ }
+ if (rc != -ENOSPC) {
+ CWARN("7_sub: write record more than BITMAP size!\n");
+ GOTO(out_close, rc = -EINVAL);
+ }
+
+ rc = verify_handle("7_sub", llh, num_recs + 1);
+ if (rc) {
+ CERROR("7_sub: verify handle failed: %d\n", rc);
+ GOTO(out_close, rc);
+ }
+ if (num_recs < LLOG_BITMAP_SIZE(llh->lgh_hdr) - 1)
+ CWARN("7_sub: records are not aligned, written %d from %u\n",
+ num_recs, LLOG_BITMAP_SIZE(llh->lgh_hdr) - 1);
+
+ plain_counter = 0;
+ rc = llog_process(env, llh, test_7_print_cb, "test 7", NULL);
+ if (rc) {
+ CERROR("7_sub: llog process failed: %d\n", rc);
+ GOTO(out_close, rc);
+ }
+ process_count = plain_counter;
+ if (process_count != num_recs) {
+ CERROR("7_sub: processed %d records from %d total\n",
+ process_count, num_recs);
+ GOTO(out_close, rc = -EINVAL);
+ }
+
+ plain_counter = 0;
+ rc = llog_reverse_process(env, llh, test_7_cancel_cb, "test 7", NULL);
+ if (rc) {
+ CERROR("7_sub: reverse llog process failed: %d\n", rc);
+ GOTO(out_close, rc);
+ }
+ if (process_count != plain_counter) {
+ CERROR("7_sub: Reverse/direct processing found different"
+ "number of records: %d/%d\n",
+ plain_counter, process_count);
+ GOTO(out_close, rc = -EINVAL);
+ }
+ if (llog_exist(llh)) {
+ CERROR("7_sub: llog exists but should be zapped\n");
+ GOTO(out_close, rc = -EEXIST);
+ }
+
+ rc = verify_handle("7_sub", llh, 1);
+out_close:
+ if (rc)
+ llog_destroy(env, llh);
+ llog_close(env, llh);
+ RETURN(rc);
+}
+
+/* Test all llog records writing and processing */
+static int llog_test_7(const struct lu_env *env, struct obd_device *obd)
+{
+ struct llog_ctxt *ctxt;
+ int rc;
+
+ ENTRY;
+
+ ctxt = llog_get_context(obd, LLOG_TEST_ORIG_CTXT);
+
+ CWARN("7a: test llog_logid_rec\n");
+ llog_records.llr.lid_hdr.lrh_len = sizeof(llog_records.llr);
+ llog_records.llr.lid_tail.lrt_len = sizeof(llog_records.llr);
+ llog_records.llr.lid_hdr.lrh_type = LLOG_LOGID_MAGIC;
+
+ rc = llog_test_7_sub(env, ctxt);
+ if (rc) {
+ CERROR("7a: llog_logid_rec test failed\n");
+ GOTO(out, rc);
+ }
+
+ CWARN("7b: test llog_unlink64_rec\n");
+ llog_records.lur.lur_hdr.lrh_len = sizeof(llog_records.lur);
+ llog_records.lur.lur_tail.lrt_len = sizeof(llog_records.lur);
+ llog_records.lur.lur_hdr.lrh_type = MDS_UNLINK64_REC;
+
+ rc = llog_test_7_sub(env, ctxt);
+ if (rc) {
+ CERROR("7b: llog_unlink_rec test failed\n");
+ GOTO(out, rc);
+ }
+
+ CWARN("7c: test llog_setattr64_rec\n");
+ llog_records.lsr64.lsr_hdr.lrh_len = sizeof(llog_records.lsr64);
+ llog_records.lsr64.lsr_tail.lrt_len = sizeof(llog_records.lsr64);
+ llog_records.lsr64.lsr_hdr.lrh_type = MDS_SETATTR64_REC;
+
+ rc = llog_test_7_sub(env, ctxt);
+ if (rc) {
+ CERROR("7c: llog_setattr64_rec test failed\n");
+ GOTO(out, rc);
+ }
+
+ CWARN("7d: test llog_size_change_rec\n");
+ llog_records.lscr.lsc_hdr.lrh_len = sizeof(llog_records.lscr);
+ llog_records.lscr.lsc_tail.lrt_len = sizeof(llog_records.lscr);
+ llog_records.lscr.lsc_hdr.lrh_type = OST_SZ_REC;
+
+ rc = llog_test_7_sub(env, ctxt);
+ if (rc) {
+ CERROR("7d: llog_size_change_rec test failed\n");
+ GOTO(out, rc);
+ }
+
+ CWARN("7e: test llog_changelog_rec\n");
+ llog_records.lcr.cr_hdr.lrh_len = sizeof(llog_records.lcr);
+ llog_records.lcr.cr_tail.lrt_len = sizeof(llog_records.lcr);
+ llog_records.lcr.cr_hdr.lrh_type = CHANGELOG_REC;
+
+ rc = llog_test_7_sub(env, ctxt);
+ if (rc) {
+ CERROR("7e: llog_changelog_rec test failed\n");
+ GOTO(out, rc);
+ }
+
+ CWARN("7f: test llog_changelog_user_rec\n");
+ llog_records.lcur.cur_hdr.lrh_len = sizeof(llog_records.lcur);
+ llog_records.lcur.cur_tail.lrt_len = sizeof(llog_records.lcur);
+ llog_records.lcur.cur_hdr.lrh_type = CHANGELOG_USER_REC;
+
+ rc = llog_test_7_sub(env, ctxt);
+ if (rc) {
+ CERROR("7f: llog_changelog_user_rec test failed\n");
+ GOTO(out, rc);
+ }
+
+ CWARN("7g: test llog_gen_rec\n");
+ llog_records.lgr.lgr_hdr.lrh_len = sizeof(llog_records.lgr);
+ llog_records.lgr.lgr_tail.lrt_len = sizeof(llog_records.lgr);
+ llog_records.lgr.lgr_hdr.lrh_type = LLOG_GEN_REC;
+
+ rc = llog_test_7_sub(env, ctxt);
+ if (rc) {
+ CERROR("7g: llog_size_change_rec test failed\n");
+ GOTO(out, rc);
+ }
+out:
+ llog_ctxt_put(ctxt);
+ RETURN(rc);
+}
+
+/* -------------------------------------------------------------------------
+ * Tests above, boring obd functions below
+ * ------------------------------------------------------------------------- */
+static int llog_run_tests(const struct lu_env *env, struct obd_device *obd)
+{
+ struct llog_handle *llh = NULL;
+ struct llog_ctxt *ctxt;
+ int rc, err;
+ char name[10];
+
+ ENTRY;
+ ctxt = llog_get_context(obd, LLOG_TEST_ORIG_CTXT);
+ LASSERT(ctxt);
+
+ sprintf(name, "%x", llog_test_rand);
+
+ rc = llog_test_1(env, obd, name);
+ if (rc)
+ GOTO(cleanup_ctxt, rc);
+
+ rc = llog_test_2(env, obd, name, &llh);
+ if (rc)
+ GOTO(cleanup_ctxt, rc);
+
+ rc = llog_test_3(env, obd, llh);
+ if (rc)
+ GOTO(cleanup, rc);
+
+ rc = llog_test_4(env, obd);
+ if (rc)
+ GOTO(cleanup, rc);
+
+ rc = llog_test_5(env, obd);
+ if (rc)
+ GOTO(cleanup, rc);
+
+ rc = llog_test_6(env, obd, name);
+ if (rc)
+ GOTO(cleanup, rc);
+
+ rc = llog_test_7(env, obd);
+ if (rc)
+ GOTO(cleanup, rc);
+
+cleanup:
+ err = llog_destroy(env, llh);
+ if (err)
+ CERROR("cleanup: llog_destroy failed: %d\n", err);
+ llog_close(env, llh);
+ if (rc == 0)
+ rc = err;
+cleanup_ctxt:
+ llog_ctxt_put(ctxt);
+ return rc;
+}
+
+#ifdef LPROCFS
+static struct lprocfs_vars lprocfs_llog_test_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_llog_test_module_vars[] = { {0} };
+static void lprocfs_llog_test_init_vars(struct lprocfs_static_vars *lvars)
+{
+ lvars->module_vars = lprocfs_llog_test_module_vars;
+ lvars->obd_vars = lprocfs_llog_test_obd_vars;
+}
+#endif
+
+static int llog_test_cleanup(struct obd_device *obd)
+{
+ struct obd_device *tgt;
+ struct lu_env env;
+ int rc;
+
+ ENTRY;
+
+ rc = lu_env_init(&env, LCT_LOCAL | LCT_MG_THREAD);
+ if (rc)
+ RETURN(rc);
+
+ tgt = obd->obd_lvfs_ctxt.dt->dd_lu_dev.ld_obd;
+ rc = llog_cleanup(&env, llog_get_context(tgt, LLOG_TEST_ORIG_CTXT));
+ if (rc)
+ CERROR("failed to llog_test_llog_finish: %d\n", rc);
+ lu_env_fini(&env);
+ RETURN(rc);
+}
+
+static int llog_test_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ struct obd_device *tgt;
+ struct llog_ctxt *ctxt;
+ struct dt_object *o;
+ struct lu_env env;
+ struct lu_context test_session;
+ int rc;
+
+ ENTRY;
+
+ if (lcfg->lcfg_bufcount < 2) {
+ CERROR("requires a TARGET OBD name\n");
+ RETURN(-EINVAL);
+ }
+
+ if (lcfg->lcfg_buflens[1] < 1) {
+ CERROR("requires a TARGET OBD name\n");
+ RETURN(-EINVAL);
+ }
+
+ /* disk obd */
+ tgt = class_name2obd(lustre_cfg_string(lcfg, 1));
+ if (!tgt || !tgt->obd_attached || !tgt->obd_set_up) {
+ CERROR("target device not attached or not set up (%s)\n",
+ lustre_cfg_string(lcfg, 1));
+ RETURN(-EINVAL);
+ }
+
+ rc = lu_env_init(&env, LCT_LOCAL | LCT_MG_THREAD);
+ if (rc)
+ RETURN(rc);
+
+ rc = lu_context_init(&test_session, LCT_SESSION);
+ if (rc)
+ GOTO(cleanup_env, rc);
+ test_session.lc_thread = (struct ptlrpc_thread *)current;
+ lu_context_enter(&test_session);
+ env.le_ses = &test_session;
+
+ CWARN("Setup llog-test device over %s device\n",
+ lustre_cfg_string(lcfg, 1));
+
+ OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
+ obd->obd_lvfs_ctxt.dt = lu2dt_dev(tgt->obd_lu_dev);
+
+ rc = llog_setup(&env, tgt, &tgt->obd_olg, LLOG_TEST_ORIG_CTXT, tgt,
+ &llog_osd_ops);
+ if (rc)
+ GOTO(cleanup_session, rc);
+
+ /* use MGS llog dir for tests */
+ ctxt = llog_get_context(tgt, LLOG_CONFIG_ORIG_CTXT);
+ LASSERT(ctxt);
+ o = ctxt->loc_dir;
+ llog_ctxt_put(ctxt);
+
+ ctxt = llog_get_context(tgt, LLOG_TEST_ORIG_CTXT);
+ LASSERT(ctxt);
+ ctxt->loc_dir = o;
+ llog_ctxt_put(ctxt);
+
+ llog_test_rand = cfs_rand();
+
+ rc = llog_run_tests(&env, tgt);
+ if (rc)
+ llog_test_cleanup(obd);
+cleanup_session:
+ lu_context_exit(&test_session);
+ lu_context_fini(&test_session);
+cleanup_env:
+ lu_env_fini(&env);
+ RETURN(rc);
+}
+
+static struct obd_ops llog_obd_ops = {
+ .o_owner = THIS_MODULE,
+ .o_setup = llog_test_setup,
+ .o_cleanup = llog_test_cleanup,
+};
+
+static int __init llog_test_init(void)
+{
+ struct lprocfs_static_vars lvars;
+
+ lprocfs_llog_test_init_vars(&lvars);
+ return class_register_type(&llog_obd_ops, NULL,
+ lvars.module_vars, "llog_test", NULL);
+}
+
+static void __exit llog_test_exit(void)
+{
+ class_unregister_type("llog_test");
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("llog test module");
+MODULE_LICENSE("GPL");
+
+module_init(llog_test_init);
+module_exit(llog_test_exit);
diff --git a/drivers/staging/lustre/lustre/obdclass/local_storage.c b/drivers/staging/lustre/lustre/obdclass/local_storage.c
new file mode 100644
index 000000000000..3be35a83a495
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/local_storage.c
@@ -0,0 +1,903 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details. A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * lustre/obdclass/local_storage.c
+ *
+ * Local storage for file/objects with fid generation. Works on top of OSD.
+ *
+ * Author: Mikhail Pershin <mike.pershin@intel.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include "local_storage.h"
+
+/* all initialized local storages on this node are linked on this */
+static LIST_HEAD(ls_list_head);
+static DEFINE_MUTEX(ls_list_mutex);
+
+static int ls_object_init(const struct lu_env *env, struct lu_object *o,
+ const struct lu_object_conf *unused)
+{
+ struct ls_device *ls;
+ struct lu_object *below;
+ struct lu_device *under;
+
+ ENTRY;
+
+ ls = container_of0(o->lo_dev, struct ls_device, ls_top_dev.dd_lu_dev);
+ under = &ls->ls_osd->dd_lu_dev;
+ below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under);
+ if (below == NULL)
+ RETURN(-ENOMEM);
+
+ lu_object_add(o, below);
+
+ RETURN(0);
+}
+
+static void ls_object_free(const struct lu_env *env, struct lu_object *o)
+{
+ struct ls_object *obj = lu2ls_obj(o);
+ struct lu_object_header *h = o->lo_header;
+
+ dt_object_fini(&obj->ls_obj);
+ lu_object_header_fini(h);
+ OBD_FREE_PTR(obj);
+}
+
+struct lu_object_operations ls_lu_obj_ops = {
+ .loo_object_init = ls_object_init,
+ .loo_object_free = ls_object_free,
+};
+
+struct lu_object *ls_object_alloc(const struct lu_env *env,
+ const struct lu_object_header *_h,
+ struct lu_device *d)
+{
+ struct lu_object_header *h;
+ struct ls_object *o;
+ struct lu_object *l;
+
+ LASSERT(_h == NULL);
+
+ OBD_ALLOC_PTR(o);
+ if (o != NULL) {
+ l = &o->ls_obj.do_lu;
+ h = &o->ls_header;
+
+ lu_object_header_init(h);
+ dt_object_init(&o->ls_obj, h, d);
+ lu_object_add_top(h, l);
+
+ l->lo_ops = &ls_lu_obj_ops;
+
+ return l;
+ } else {
+ return NULL;
+ }
+}
+
+static struct lu_device_operations ls_lu_dev_ops = {
+ .ldo_object_alloc = ls_object_alloc
+};
+
+static struct ls_device *__ls_find_dev(struct dt_device *dev)
+{
+ struct ls_device *ls, *ret = NULL;
+
+ list_for_each_entry(ls, &ls_list_head, ls_linkage) {
+ if (ls->ls_osd == dev) {
+ atomic_inc(&ls->ls_refcount);
+ ret = ls;
+ break;
+ }
+ }
+ return ret;
+}
+
+struct ls_device *ls_find_dev(struct dt_device *dev)
+{
+ struct ls_device *ls;
+
+ mutex_lock(&ls_list_mutex);
+ ls = __ls_find_dev(dev);
+ mutex_unlock(&ls_list_mutex);
+
+ return ls;
+}
+
+static struct lu_device_type_operations ls_device_type_ops = {
+ .ldto_start = NULL,
+ .ldto_stop = NULL,
+};
+
+static struct lu_device_type ls_lu_type = {
+ .ldt_name = "local_storage",
+ .ldt_ops = &ls_device_type_ops,
+};
+
+struct ls_device *ls_device_get(struct dt_device *dev)
+{
+ struct ls_device *ls;
+
+ ENTRY;
+
+ mutex_lock(&ls_list_mutex);
+ ls = __ls_find_dev(dev);
+ if (ls)
+ GOTO(out_ls, ls);
+
+ /* not found, then create */
+ OBD_ALLOC_PTR(ls);
+ if (ls == NULL)
+ GOTO(out_ls, ls = ERR_PTR(-ENOMEM));
+
+ atomic_set(&ls->ls_refcount, 1);
+ INIT_LIST_HEAD(&ls->ls_los_list);
+ mutex_init(&ls->ls_los_mutex);
+
+ ls->ls_osd = dev;
+
+ LASSERT(dev->dd_lu_dev.ld_site);
+ lu_device_init(&ls->ls_top_dev.dd_lu_dev, &ls_lu_type);
+ ls->ls_top_dev.dd_lu_dev.ld_ops = &ls_lu_dev_ops;
+ ls->ls_top_dev.dd_lu_dev.ld_site = dev->dd_lu_dev.ld_site;
+
+ /* finally add ls to the list */
+ list_add(&ls->ls_linkage, &ls_list_head);
+out_ls:
+ mutex_unlock(&ls_list_mutex);
+ RETURN(ls);
+}
+
+void ls_device_put(const struct lu_env *env, struct ls_device *ls)
+{
+ LASSERT(env);
+ if (!atomic_dec_and_test(&ls->ls_refcount))
+ return;
+
+ mutex_lock(&ls_list_mutex);
+ if (atomic_read(&ls->ls_refcount) == 0) {
+ LASSERT(list_empty(&ls->ls_los_list));
+ list_del(&ls->ls_linkage);
+ lu_site_purge(env, ls->ls_top_dev.dd_lu_dev.ld_site, ~0);
+ lu_device_fini(&ls->ls_top_dev.dd_lu_dev);
+ OBD_FREE_PTR(ls);
+ }
+ mutex_unlock(&ls_list_mutex);
+}
+
+/**
+ * local file fid generation
+ */
+int local_object_fid_generate(const struct lu_env *env,
+ struct local_oid_storage *los,
+ struct lu_fid *fid)
+{
+ LASSERT(los->los_dev);
+ LASSERT(los->los_obj);
+
+ /* take next OID */
+
+ /* to make it unique after reboot we store
+ * the latest generated fid atomically with
+ * object creation see local_object_create() */
+
+ mutex_lock(&los->los_id_lock);
+ fid->f_seq = los->los_seq;
+ fid->f_oid = ++los->los_last_oid;
+ fid->f_ver = 0;
+ mutex_unlock(&los->los_id_lock);
+
+ return 0;
+}
+
+int local_object_declare_create(const struct lu_env *env,
+ struct local_oid_storage *los,
+ struct dt_object *o, struct lu_attr *attr,
+ struct dt_object_format *dof,
+ struct thandle *th)
+{
+ struct dt_thread_info *dti = dt_info(env);
+ int rc;
+
+ ENTRY;
+
+ /* update fid generation file */
+ if (los != NULL) {
+ LASSERT(dt_object_exists(los->los_obj));
+ rc = dt_declare_record_write(env, los->los_obj,
+ sizeof(struct los_ondisk), 0, th);
+ if (rc)
+ RETURN(rc);
+ }
+
+ rc = dt_declare_create(env, o, attr, NULL, dof, th);
+ if (rc)
+ RETURN(rc);
+
+ dti->dti_lb.lb_buf = NULL;
+ dti->dti_lb.lb_len = sizeof(dti->dti_lma);
+ rc = dt_declare_xattr_set(env, o, &dti->dti_lb, XATTR_NAME_LMA, 0, th);
+
+ RETURN(rc);
+}
+
+int local_object_create(const struct lu_env *env,
+ struct local_oid_storage *los,
+ struct dt_object *o, struct lu_attr *attr,
+ struct dt_object_format *dof, struct thandle *th)
+{
+ struct dt_thread_info *dti = dt_info(env);
+ obd_id lastid;
+ int rc;
+
+ ENTRY;
+
+ rc = dt_create(env, o, attr, NULL, dof, th);
+ if (rc)
+ RETURN(rc);
+
+ if (los == NULL)
+ RETURN(rc);
+
+ LASSERT(los->los_obj);
+ LASSERT(dt_object_exists(los->los_obj));
+
+ /* many threads can be updated this, serialize
+ * them here to avoid the race where one thread
+ * takes the value first, but writes it last */
+ mutex_lock(&los->los_id_lock);
+
+ /* update local oid number on disk so that
+ * we know the last one used after reboot */
+ lastid = cpu_to_le64(los->los_last_oid);
+
+ dti->dti_off = 0;
+ dti->dti_lb.lb_buf = &lastid;
+ dti->dti_lb.lb_len = sizeof(lastid);
+ rc = dt_record_write(env, los->los_obj, &dti->dti_lb, &dti->dti_off,
+ th);
+ mutex_unlock(&los->los_id_lock);
+
+ RETURN(rc);
+}
+
+/*
+ * Create local named object (file, directory or index) in parent directory.
+ */
+struct dt_object *__local_file_create(const struct lu_env *env,
+ const struct lu_fid *fid,
+ struct local_oid_storage *los,
+ struct ls_device *ls,
+ struct dt_object *parent,
+ const char *name, struct lu_attr *attr,
+ struct dt_object_format *dof)
+{
+ struct dt_thread_info *dti = dt_info(env);
+ struct dt_object *dto;
+ struct thandle *th;
+ int rc;
+
+ dto = ls_locate(env, ls, fid);
+ if (unlikely(IS_ERR(dto)))
+ RETURN(dto);
+
+ LASSERT(dto != NULL);
+ if (dt_object_exists(dto))
+ GOTO(out, rc = -EEXIST);
+
+ th = dt_trans_create(env, ls->ls_osd);
+ if (IS_ERR(th))
+ GOTO(out, rc = PTR_ERR(th));
+
+ rc = local_object_declare_create(env, los, dto, attr, dof, th);
+ if (rc)
+ GOTO(trans_stop, rc);
+
+ if (dti->dti_dof.dof_type == DFT_DIR) {
+ dt_declare_ref_add(env, dto, th);
+ dt_declare_ref_add(env, parent, th);
+ }
+
+ rc = dt_declare_insert(env, parent, (void *)fid, (void *)name, th);
+ if (rc)
+ GOTO(trans_stop, rc);
+
+ rc = dt_trans_start_local(env, ls->ls_osd, th);
+ if (rc)
+ GOTO(trans_stop, rc);
+
+ dt_write_lock(env, dto, 0);
+ if (dt_object_exists(dto))
+ GOTO(unlock, rc = 0);
+
+ CDEBUG(D_OTHER, "create new object "DFID"\n",
+ PFID(lu_object_fid(&dto->do_lu)));
+ rc = local_object_create(env, los, dto, attr, dof, th);
+ if (rc)
+ GOTO(unlock, rc);
+ LASSERT(dt_object_exists(dto));
+
+ if (dti->dti_dof.dof_type == DFT_DIR) {
+ if (!dt_try_as_dir(env, dto))
+ GOTO(destroy, rc = -ENOTDIR);
+ /* Add "." and ".." for newly created dir */
+ rc = dt_insert(env, dto, (void *)fid, (void *)".", th,
+ BYPASS_CAPA, 1);
+ if (rc)
+ GOTO(destroy, rc);
+ dt_ref_add(env, dto, th);
+ rc = dt_insert(env, dto, (void *)lu_object_fid(&parent->do_lu),
+ (void *)"..", th, BYPASS_CAPA, 1);
+ if (rc)
+ GOTO(destroy, rc);
+ }
+
+ dt_write_lock(env, parent, 0);
+ rc = dt_insert(env, parent, (const struct dt_rec *)fid,
+ (const struct dt_key *)name, th, BYPASS_CAPA, 1);
+ if (dti->dti_dof.dof_type == DFT_DIR)
+ dt_ref_add(env, parent, th);
+ dt_write_unlock(env, parent);
+ if (rc)
+ GOTO(destroy, rc);
+destroy:
+ if (rc)
+ dt_destroy(env, dto, th);
+unlock:
+ dt_write_unlock(env, dto);
+trans_stop:
+ dt_trans_stop(env, ls->ls_osd, th);
+out:
+ if (rc) {
+ lu_object_put_nocache(env, &dto->do_lu);
+ dto = ERR_PTR(rc);
+ }
+ RETURN(dto);
+}
+
+/*
+ * Look up and create (if it does not exist) a local named file or directory in
+ * parent directory.
+ */
+struct dt_object *local_file_find_or_create(const struct lu_env *env,
+ struct local_oid_storage *los,
+ struct dt_object *parent,
+ const char *name, __u32 mode)
+{
+ struct dt_thread_info *dti = dt_info(env);
+ struct dt_object *dto;
+ int rc;
+
+ LASSERT(parent);
+
+ rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
+ if (rc == 0)
+ /* name is found, get the object */
+ dto = ls_locate(env, dt2ls_dev(los->los_dev), &dti->dti_fid);
+ else if (rc != -ENOENT)
+ dto = ERR_PTR(rc);
+ else {
+ rc = local_object_fid_generate(env, los, &dti->dti_fid);
+ if (rc < 0) {
+ dto = ERR_PTR(rc);
+ } else {
+ /* create the object */
+ dti->dti_attr.la_valid = LA_MODE;
+ dti->dti_attr.la_mode = mode;
+ dti->dti_dof.dof_type = dt_mode_to_dft(mode & S_IFMT);
+ dto = __local_file_create(env, &dti->dti_fid, los,
+ dt2ls_dev(los->los_dev),
+ parent, name, &dti->dti_attr,
+ &dti->dti_dof);
+ }
+ }
+ return dto;
+}
+EXPORT_SYMBOL(local_file_find_or_create);
+
+struct dt_object *local_file_find_or_create_with_fid(const struct lu_env *env,
+ struct dt_device *dt,
+ const struct lu_fid *fid,
+ struct dt_object *parent,
+ const char *name,
+ __u32 mode)
+{
+ struct dt_thread_info *dti = dt_info(env);
+ struct dt_object *dto;
+ int rc;
+
+ LASSERT(parent);
+
+ rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
+ if (rc == 0) {
+ dto = dt_locate(env, dt, &dti->dti_fid);
+ } else if (rc != -ENOENT) {
+ dto = ERR_PTR(rc);
+ } else {
+ struct ls_device *ls;
+
+ ls = ls_device_get(dt);
+ if (IS_ERR(ls)) {
+ dto = ERR_PTR(PTR_ERR(ls));
+ } else {
+ /* create the object */
+ dti->dti_attr.la_valid = LA_MODE;
+ dti->dti_attr.la_mode = mode;
+ dti->dti_dof.dof_type = dt_mode_to_dft(mode & S_IFMT);
+ dto = __local_file_create(env, fid, NULL, ls, parent,
+ name, &dti->dti_attr,
+ &dti->dti_dof);
+ /* ls_device_put() will finalize the ls device, we
+ * have to open the object in other device stack */
+ if (!IS_ERR(dto)) {
+ dti->dti_fid = dto->do_lu.lo_header->loh_fid;
+ lu_object_put_nocache(env, &dto->do_lu);
+ dto = dt_locate(env, dt, &dti->dti_fid);
+ }
+ ls_device_put(env, ls);
+ }
+ }
+ return dto;
+}
+EXPORT_SYMBOL(local_file_find_or_create_with_fid);
+
+/*
+ * Look up and create (if it does not exist) a local named index file in parent
+ * directory.
+ */
+struct dt_object *local_index_find_or_create(const struct lu_env *env,
+ struct local_oid_storage *los,
+ struct dt_object *parent,
+ const char *name, __u32 mode,
+ const struct dt_index_features *ft)
+{
+ struct dt_thread_info *dti = dt_info(env);
+ struct dt_object *dto;
+ int rc;
+
+ LASSERT(parent);
+
+ rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
+ if (rc == 0) {
+ /* name is found, get the object */
+ dto = ls_locate(env, dt2ls_dev(los->los_dev), &dti->dti_fid);
+ } else if (rc != -ENOENT) {
+ dto = ERR_PTR(rc);
+ } else {
+ rc = local_object_fid_generate(env, los, &dti->dti_fid);
+ if (rc < 0) {
+ dto = ERR_PTR(rc);
+ } else {
+ /* create the object */
+ dti->dti_attr.la_valid = LA_MODE;
+ dti->dti_attr.la_mode = mode;
+ dti->dti_dof.dof_type = DFT_INDEX;
+ dti->dti_dof.u.dof_idx.di_feat = ft;
+ dto = __local_file_create(env, &dti->dti_fid, los,
+ dt2ls_dev(los->los_dev),
+ parent, name, &dti->dti_attr,
+ &dti->dti_dof);
+ }
+ }
+ return dto;
+
+}
+EXPORT_SYMBOL(local_index_find_or_create);
+
+struct dt_object *
+local_index_find_or_create_with_fid(const struct lu_env *env,
+ struct dt_device *dt,
+ const struct lu_fid *fid,
+ struct dt_object *parent,
+ const char *name, __u32 mode,
+ const struct dt_index_features *ft)
+{
+ struct dt_thread_info *dti = dt_info(env);
+ struct dt_object *dto;
+ int rc;
+
+ LASSERT(parent);
+
+ rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
+ if (rc == 0) {
+ /* name is found, get the object */
+ if (!lu_fid_eq(fid, &dti->dti_fid))
+ dto = ERR_PTR(-EINVAL);
+ else
+ dto = dt_locate(env, dt, fid);
+ } else if (rc != -ENOENT) {
+ dto = ERR_PTR(rc);
+ } else {
+ struct ls_device *ls;
+
+ ls = ls_device_get(dt);
+ if (IS_ERR(ls)) {
+ dto = ERR_PTR(PTR_ERR(ls));
+ } else {
+ /* create the object */
+ dti->dti_attr.la_valid = LA_MODE;
+ dti->dti_attr.la_mode = mode;
+ dti->dti_dof.dof_type = DFT_INDEX;
+ dti->dti_dof.u.dof_idx.di_feat = ft;
+ dto = __local_file_create(env, fid, NULL, ls, parent,
+ name, &dti->dti_attr,
+ &dti->dti_dof);
+ /* ls_device_put() will finalize the ls device, we
+ * have to open the object in other device stack */
+ if (!IS_ERR(dto)) {
+ dti->dti_fid = dto->do_lu.lo_header->loh_fid;
+ lu_object_put_nocache(env, &dto->do_lu);
+ dto = dt_locate(env, dt, &dti->dti_fid);
+ }
+ ls_device_put(env, ls);
+ }
+ }
+ return dto;
+}
+EXPORT_SYMBOL(local_index_find_or_create_with_fid);
+
+static int local_object_declare_unlink(const struct lu_env *env,
+ struct dt_device *dt,
+ struct dt_object *p,
+ struct dt_object *c, const char *name,
+ struct thandle *th)
+{
+ int rc;
+
+ rc = dt_declare_delete(env, p, (const struct dt_key *)name, th);
+ if (rc < 0)
+ return rc;
+
+ rc = dt_declare_ref_del(env, c, th);
+ if (rc < 0)
+ return rc;
+
+ return dt_declare_destroy(env, c, th);
+}
+
+int local_object_unlink(const struct lu_env *env, struct dt_device *dt,
+ struct dt_object *parent, const char *name)
+{
+ struct dt_thread_info *dti = dt_info(env);
+ struct dt_object *dto;
+ struct thandle *th;
+ int rc;
+
+ ENTRY;
+
+ rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
+ if (rc == -ENOENT)
+ RETURN(0);
+ else if (rc < 0)
+ RETURN(rc);
+
+ dto = dt_locate(env, dt, &dti->dti_fid);
+ if (unlikely(IS_ERR(dto)))
+ RETURN(PTR_ERR(dto));
+
+ th = dt_trans_create(env, dt);
+ if (IS_ERR(th))
+ GOTO(out, rc = PTR_ERR(th));
+
+ rc = local_object_declare_unlink(env, dt, parent, dto, name, th);
+ if (rc < 0)
+ GOTO(stop, rc);
+
+ rc = dt_trans_start_local(env, dt, th);
+ if (rc < 0)
+ GOTO(stop, rc);
+
+ dt_write_lock(env, dto, 0);
+ rc = dt_delete(env, parent, (struct dt_key *)name, th, BYPASS_CAPA);
+ if (rc < 0)
+ GOTO(unlock, rc);
+
+ rc = dt_ref_del(env, dto, th);
+ if (rc < 0) {
+ rc = dt_insert(env, parent,
+ (const struct dt_rec *)&dti->dti_fid,
+ (const struct dt_key *)name, th, BYPASS_CAPA, 1);
+ GOTO(unlock, rc);
+ }
+
+ rc = dt_destroy(env, dto, th);
+unlock:
+ dt_write_unlock(env, dto);
+stop:
+ dt_trans_stop(env, dt, th);
+out:
+ lu_object_put_nocache(env, &dto->do_lu);
+ return rc;
+}
+EXPORT_SYMBOL(local_object_unlink);
+
+struct local_oid_storage *dt_los_find(struct ls_device *ls, __u64 seq)
+{
+ struct local_oid_storage *los, *ret = NULL;
+
+ list_for_each_entry(los, &ls->ls_los_list, los_list) {
+ if (los->los_seq == seq) {
+ atomic_inc(&los->los_refcount);
+ ret = los;
+ break;
+ }
+ }
+ return ret;
+}
+
+void dt_los_put(struct local_oid_storage *los)
+{
+ if (atomic_dec_and_test(&los->los_refcount))
+ /* should never happen, only local_oid_storage_fini should
+ * drop refcount to zero */
+ LBUG();
+ return;
+}
+
+/* after Lustre 2.3 release there may be old file to store last generated FID
+ * If such file exists then we have to read its content
+ */
+int lastid_compat_check(const struct lu_env *env, struct dt_device *dev,
+ __u64 lastid_seq, __u32 *first_oid, struct ls_device *ls)
+{
+ struct dt_thread_info *dti = dt_info(env);
+ struct dt_object *root = NULL;
+ struct los_ondisk losd;
+ struct dt_object *o = NULL;
+ int rc = 0;
+
+ rc = dt_root_get(env, dev, &dti->dti_fid);
+ if (rc)
+ return rc;
+
+ root = ls_locate(env, ls, &dti->dti_fid);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+
+ /* find old last_id file */
+ snprintf(dti->dti_buf, sizeof(dti->dti_buf), "seq-"LPX64"-lastid",
+ lastid_seq);
+ rc = dt_lookup_dir(env, root, dti->dti_buf, &dti->dti_fid);
+ lu_object_put_nocache(env, &root->do_lu);
+ if (rc == -ENOENT) {
+ /* old llog lastid accessed by FID only */
+ if (lastid_seq != FID_SEQ_LLOG)
+ return 0;
+ dti->dti_fid.f_seq = FID_SEQ_LLOG;
+ dti->dti_fid.f_oid = 1;
+ dti->dti_fid.f_ver = 0;
+ o = ls_locate(env, ls, &dti->dti_fid);
+ if (IS_ERR(o))
+ return PTR_ERR(o);
+
+ if (!dt_object_exists(o)) {
+ lu_object_put_nocache(env, &o->do_lu);
+ return 0;
+ }
+ CDEBUG(D_INFO, "Found old llog lastid file\n");
+ } else if (rc < 0) {
+ return rc;
+ } else {
+ CDEBUG(D_INFO, "Found old lastid file for sequence "LPX64"\n",
+ lastid_seq);
+ o = ls_locate(env, ls, &dti->dti_fid);
+ if (IS_ERR(o))
+ return PTR_ERR(o);
+ }
+ /* let's read seq-NNNNNN-lastid file value */
+ LASSERT(dt_object_exists(o));
+ dti->dti_off = 0;
+ dti->dti_lb.lb_buf = &losd;
+ dti->dti_lb.lb_len = sizeof(losd);
+ dt_read_lock(env, o, 0);
+ rc = dt_record_read(env, o, &dti->dti_lb, &dti->dti_off);
+ dt_read_unlock(env, o);
+ lu_object_put_nocache(env, &o->do_lu);
+ if (rc == 0 && le32_to_cpu(losd.lso_magic) != LOS_MAGIC) {
+ CERROR("%s: wrong content of seq-"LPX64"-lastid file, magic %x\n",
+ o->do_lu.lo_dev->ld_obd->obd_name, lastid_seq,
+ le32_to_cpu(losd.lso_magic));
+ return -EINVAL;
+ } else if (rc < 0) {
+ CERROR("%s: failed to read seq-"LPX64"-lastid: rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name, lastid_seq, rc);
+ return rc;
+ }
+ *first_oid = le32_to_cpu(losd.lso_next_oid);
+ return rc;
+}
+
+/**
+ * Initialize local OID storage for required sequence.
+ * That may be needed for services that uses local files and requires
+ * dynamic OID allocation for them.
+ *
+ * Per each sequence we have an object with 'first_fid' identificator
+ * containing the counter for OIDs of locally created files with that
+ * sequence.
+ *
+ * It is used now by llog subsystem and MGS for NID tables
+ *
+ * Function gets first_fid to create counter object.
+ * All dynamic fids will be generated with the same sequence and incremented
+ * OIDs
+ *
+ * Returned local_oid_storage is in-memory representaion of OID storage
+ */
+int local_oid_storage_init(const struct lu_env *env, struct dt_device *dev,
+ const struct lu_fid *first_fid,
+ struct local_oid_storage **los)
+{
+ struct dt_thread_info *dti = dt_info(env);
+ struct ls_device *ls;
+ obd_id lastid;
+ struct dt_object *o = NULL;
+ struct thandle *th;
+ __u32 first_oid = fid_oid(first_fid);
+ int rc = 0;
+
+ ENTRY;
+
+ ls = ls_device_get(dev);
+ if (IS_ERR(ls))
+ RETURN(PTR_ERR(ls));
+
+ mutex_lock(&ls->ls_los_mutex);
+ *los = dt_los_find(ls, fid_seq(first_fid));
+ if (*los != NULL)
+ GOTO(out, rc = 0);
+
+ /* not found, then create */
+ OBD_ALLOC_PTR(*los);
+ if (*los == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ atomic_set(&(*los)->los_refcount, 1);
+ mutex_init(&(*los)->los_id_lock);
+ (*los)->los_dev = &ls->ls_top_dev;
+ atomic_inc(&ls->ls_refcount);
+ list_add(&(*los)->los_list, &ls->ls_los_list);
+
+ /* Use {seq, 0, 0} to create the LAST_ID file for every
+ * sequence. OIDs start at LUSTRE_FID_INIT_OID.
+ */
+ dti->dti_fid.f_seq = fid_seq(first_fid);
+ dti->dti_fid.f_oid = LUSTRE_FID_LASTID_OID;
+ dti->dti_fid.f_ver = 0;
+ o = ls_locate(env, ls, &dti->dti_fid);
+ if (IS_ERR(o))
+ GOTO(out_los, rc = PTR_ERR(o));
+
+ if (!dt_object_exists(o)) {
+ rc = lastid_compat_check(env, dev, fid_seq(first_fid),
+ &first_oid, ls);
+ if (rc < 0)
+ GOTO(out_los, rc);
+
+ th = dt_trans_create(env, dev);
+ if (IS_ERR(th))
+ GOTO(out_los, rc = PTR_ERR(th));
+
+ dti->dti_attr.la_valid = LA_MODE | LA_TYPE;
+ dti->dti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
+ dti->dti_dof.dof_type = dt_mode_to_dft(S_IFREG);
+
+ rc = dt_declare_create(env, o, &dti->dti_attr, NULL,
+ &dti->dti_dof, th);
+ if (rc)
+ GOTO(out_trans, rc);
+
+ rc = dt_declare_record_write(env, o, sizeof(lastid), 0, th);
+ if (rc)
+ GOTO(out_trans, rc);
+
+ rc = dt_trans_start_local(env, dev, th);
+ if (rc)
+ GOTO(out_trans, rc);
+
+ dt_write_lock(env, o, 0);
+ if (dt_object_exists(o))
+ GOTO(out_lock, rc = 0);
+
+ rc = dt_create(env, o, &dti->dti_attr, NULL, &dti->dti_dof,
+ th);
+ if (rc)
+ GOTO(out_lock, rc);
+
+ lastid = cpu_to_le64(first_oid);
+
+ dti->dti_off = 0;
+ dti->dti_lb.lb_buf = &lastid;
+ dti->dti_lb.lb_len = sizeof(lastid);
+ rc = dt_record_write(env, o, &dti->dti_lb, &dti->dti_off, th);
+ if (rc)
+ GOTO(out_lock, rc);
+out_lock:
+ dt_write_unlock(env, o);
+out_trans:
+ dt_trans_stop(env, dev, th);
+ } else {
+ dti->dti_off = 0;
+ dti->dti_lb.lb_buf = &lastid;
+ dti->dti_lb.lb_len = sizeof(lastid);
+ dt_read_lock(env, o, 0);
+ rc = dt_record_read(env, o, &dti->dti_lb, &dti->dti_off);
+ dt_read_unlock(env, o);
+ if (rc == 0 && le64_to_cpu(lastid) > OBIF_MAX_OID) {
+ CERROR("%s: bad oid "LPU64" is read from LAST_ID\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ le64_to_cpu(lastid));
+ rc = -EINVAL;
+ }
+ }
+out_los:
+ if (rc != 0) {
+ list_del(&(*los)->los_list);
+ atomic_dec(&ls->ls_refcount);
+ OBD_FREE_PTR(*los);
+ *los = NULL;
+ if (o != NULL && !IS_ERR(o))
+ lu_object_put_nocache(env, &o->do_lu);
+ } else {
+ (*los)->los_seq = fid_seq(first_fid);
+ (*los)->los_last_oid = le64_to_cpu(lastid);
+ (*los)->los_obj = o;
+ /* read value should not be less than initial one */
+ LASSERTF((*los)->los_last_oid >= first_oid, "%u < %u\n",
+ (*los)->los_last_oid, first_oid);
+ }
+out:
+ mutex_unlock(&ls->ls_los_mutex);
+ ls_device_put(env, ls);
+ return rc;
+}
+EXPORT_SYMBOL(local_oid_storage_init);
+
+void local_oid_storage_fini(const struct lu_env *env,
+ struct local_oid_storage *los)
+{
+ struct ls_device *ls;
+
+ if (!atomic_dec_and_test(&los->los_refcount))
+ return;
+
+ LASSERT(env);
+ LASSERT(los->los_dev);
+ ls = dt2ls_dev(los->los_dev);
+
+ mutex_lock(&ls->ls_los_mutex);
+ if (atomic_read(&los->los_refcount) == 0) {
+ if (los->los_obj)
+ lu_object_put_nocache(env, &los->los_obj->do_lu);
+ list_del(&los->los_list);
+ OBD_FREE_PTR(los);
+ }
+ mutex_unlock(&ls->ls_los_mutex);
+ ls_device_put(env, ls);
+}
+EXPORT_SYMBOL(local_oid_storage_fini);
diff --git a/drivers/staging/lustre/lustre/obdclass/local_storage.h b/drivers/staging/lustre/lustre/obdclass/local_storage.h
new file mode 100644
index 000000000000..d553c3752703
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/local_storage.h
@@ -0,0 +1,88 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details. A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * lustre/obdclass/local_storage.c
+ *
+ * Local storage for file/objects with fid generation. Works on top of OSD.
+ *
+ * Author: Mikhail Pershin <mike.pershin@intel.com>
+ */
+
+#include <dt_object.h>
+#include <obd.h>
+#include <lustre_fid.h>
+#include <lustre_disk.h>
+
+struct ls_device {
+ struct dt_device ls_top_dev;
+ /* all initialized ls_devices on this node linked by this */
+ struct list_head ls_linkage;
+ /* how many handle's reference this local storage */
+ atomic_t ls_refcount;
+ /* underlaying OSD device */
+ struct dt_device *ls_osd;
+ /* list of all local OID storages */
+ struct list_head ls_los_list;
+ struct mutex ls_los_mutex;
+};
+
+static inline struct ls_device *dt2ls_dev(struct dt_device *d)
+{
+ return container_of0(d, struct ls_device, ls_top_dev);
+}
+
+struct ls_object {
+ struct lu_object_header ls_header;
+ struct dt_object ls_obj;
+};
+
+static inline struct ls_object *lu2ls_obj(struct lu_object *o)
+{
+ return container_of0(o, struct ls_object, ls_obj.do_lu);
+}
+
+static inline struct dt_object *ls_locate(const struct lu_env *env,
+ struct ls_device *ls,
+ const struct lu_fid *fid)
+{
+ return dt_locate_at(env, ls->ls_osd, fid, &ls->ls_top_dev.dd_lu_dev);
+}
+
+struct ls_device *ls_device_get(struct dt_device *dev);
+void ls_device_put(const struct lu_env *env, struct ls_device *ls);
+struct local_oid_storage *dt_los_find(struct ls_device *ls, __u64 seq);
+void dt_los_put(struct local_oid_storage *los);
+
+/* Lustre 2.3 on-disk structure describing local object OIDs storage
+ * the structure to be used with any sequence managed by
+ * local object library.
+ * Obsoleted since 2.4 but is kept for compatibility reasons,
+ * see lastid_compat_check() in obdclass/local_storage.c */
+struct los_ondisk {
+ __u32 lso_magic;
+ __u32 lso_next_oid;
+};
+
+#define LOS_MAGIC 0xdecafbee
diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_jobstats.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_jobstats.c
new file mode 100644
index 000000000000..e2d57fef0da3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_jobstats.c
@@ -0,0 +1,562 @@
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ * Use is subject to license terms.
+ *
+ * Author: Niu Yawei <niu@whamcloud.com>
+ */
+/*
+ * lustre/obdclass/lprocfs_jobstats.c
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_CLASS
+
+
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#include <lustre/lustre_idl.h>
+
+#if defined(LPROCFS)
+
+/*
+ * JobID formats & JobID environment variable names for supported
+ * job schedulers:
+ *
+ * SLURM:
+ * JobID format: 32 bit integer.
+ * JobID env var: SLURM_JOB_ID.
+ * SGE:
+ * JobID format: Decimal integer range to 99999.
+ * JobID env var: JOB_ID.
+ * LSF:
+ * JobID format: 6 digit integer by default (up to 999999), can be
+ * increased to 10 digit (up to 2147483646).
+ * JobID env var: LSB_JOBID.
+ * Loadleveler:
+ * JobID format: String of machine_name.cluster_id.process_id, for
+ * example: fr2n02.32.0
+ * JobID env var: LOADL_STEP_ID.
+ * PBS:
+ * JobID format: String of sequence_number[.server_name][@server].
+ * JobID env var: PBS_JOBID.
+ * Maui/MOAB:
+ * JobID format: Same as PBS.
+ * JobID env var: Same as PBS.
+ */
+
+struct job_stat {
+ struct hlist_node js_hash;
+ struct list_head js_list;
+ atomic_t js_refcount;
+ char js_jobid[JOBSTATS_JOBID_SIZE];
+ time_t js_timestamp; /* seconds */
+ struct lprocfs_stats *js_stats;
+ struct obd_job_stats *js_jobstats;
+};
+
+static unsigned job_stat_hash(cfs_hash_t *hs, const void *key, unsigned mask)
+{
+ return cfs_hash_djb2_hash(key, strlen(key), mask);
+}
+
+static void *job_stat_key(struct hlist_node *hnode)
+{
+ struct job_stat *job;
+ job = hlist_entry(hnode, struct job_stat, js_hash);
+ return job->js_jobid;
+}
+
+static int job_stat_keycmp(const void *key, struct hlist_node *hnode)
+{
+ struct job_stat *job;
+ job = hlist_entry(hnode, struct job_stat, js_hash);
+ return (strlen(job->js_jobid) == strlen(key)) &&
+ !strncmp(job->js_jobid, key, strlen(key));
+}
+
+static void *job_stat_object(struct hlist_node *hnode)
+{
+ return hlist_entry(hnode, struct job_stat, js_hash);
+}
+
+static void job_stat_get(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct job_stat *job;
+ job = hlist_entry(hnode, struct job_stat, js_hash);
+ atomic_inc(&job->js_refcount);
+}
+
+static void job_free(struct job_stat *job)
+{
+ LASSERT(atomic_read(&job->js_refcount) == 0);
+ LASSERT(job->js_jobstats);
+
+ write_lock(&job->js_jobstats->ojs_lock);
+ list_del_init(&job->js_list);
+ write_unlock(&job->js_jobstats->ojs_lock);
+
+ lprocfs_free_stats(&job->js_stats);
+ OBD_FREE_PTR(job);
+}
+
+static void job_putref(struct job_stat *job)
+{
+ LASSERT(atomic_read(&job->js_refcount) > 0);
+ if (atomic_dec_and_test(&job->js_refcount))
+ job_free(job);
+}
+
+static void job_stat_put_locked(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct job_stat *job;
+ job = hlist_entry(hnode, struct job_stat, js_hash);
+ job_putref(job);
+}
+
+static void job_stat_exit(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ CERROR("Should not have any items!");
+}
+
+static cfs_hash_ops_t job_stats_hash_ops = {
+ .hs_hash = job_stat_hash,
+ .hs_key = job_stat_key,
+ .hs_keycmp = job_stat_keycmp,
+ .hs_object = job_stat_object,
+ .hs_get = job_stat_get,
+ .hs_put_locked = job_stat_put_locked,
+ .hs_exit = job_stat_exit,
+};
+
+static int job_iter_callback(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *data)
+{
+ time_t oldest = *((time_t *)data);
+ struct job_stat *job;
+
+ job = hlist_entry(hnode, struct job_stat, js_hash);
+ if (!oldest || job->js_timestamp < oldest)
+ cfs_hash_bd_del_locked(hs, bd, hnode);
+
+ return 0;
+}
+
+static void lprocfs_job_cleanup(struct obd_job_stats *stats, bool force)
+{
+ time_t oldest, now;
+
+ if (stats->ojs_cleanup_interval == 0)
+ return;
+
+ now = cfs_time_current_sec();
+ if (!force && now < stats->ojs_last_cleanup +
+ stats->ojs_cleanup_interval)
+ return;
+
+ oldest = now - stats->ojs_cleanup_interval;
+ cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback,
+ &oldest);
+ stats->ojs_last_cleanup = cfs_time_current_sec();
+}
+
+static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs)
+{
+ struct job_stat *job;
+
+ LASSERT(jobs->ojs_cntr_num && jobs->ojs_cntr_init_fn);
+
+ OBD_ALLOC_PTR(job);
+ if (job == NULL)
+ return NULL;
+
+ job->js_stats = lprocfs_alloc_stats(jobs->ojs_cntr_num, 0);
+ if (job->js_stats == NULL) {
+ OBD_FREE_PTR(job);
+ return NULL;
+ }
+
+ jobs->ojs_cntr_init_fn(job->js_stats);
+
+ memcpy(job->js_jobid, jobid, JOBSTATS_JOBID_SIZE);
+ job->js_timestamp = cfs_time_current_sec();
+ job->js_jobstats = jobs;
+ INIT_HLIST_NODE(&job->js_hash);
+ INIT_LIST_HEAD(&job->js_list);
+ atomic_set(&job->js_refcount, 1);
+
+ return job;
+}
+
+int lprocfs_job_stats_log(struct obd_device *obd, char *jobid,
+ int event, long amount)
+{
+ struct obd_job_stats *stats = &obd->u.obt.obt_jobstats;
+ struct job_stat *job, *job2;
+ ENTRY;
+
+ LASSERT(stats && stats->ojs_hash);
+
+ lprocfs_job_cleanup(stats, false);
+
+ if (!jobid || !strlen(jobid))
+ RETURN(-EINVAL);
+
+ if (strlen(jobid) >= JOBSTATS_JOBID_SIZE) {
+ CERROR("Invalid jobid size (%lu), expect(%d)\n",
+ (unsigned long)strlen(jobid) + 1, JOBSTATS_JOBID_SIZE);
+ RETURN(-EINVAL);
+ }
+
+ job = cfs_hash_lookup(stats->ojs_hash, jobid);
+ if (job)
+ goto found;
+
+ job = job_alloc(jobid, stats);
+ if (job == NULL)
+ RETURN(-ENOMEM);
+
+ job2 = cfs_hash_findadd_unique(stats->ojs_hash, job->js_jobid,
+ &job->js_hash);
+ if (job2 != job) {
+ job_putref(job);
+ job = job2;
+ /* We cannot LASSERT(!list_empty(&job->js_list)) here,
+ * since we just lost the race for inserting "job" into the
+ * ojs_list, and some other thread is doing it _right_now_.
+ * Instead, be content the other thread is doing this, since
+ * "job2" was initialized in job_alloc() already. LU-2163 */
+ } else {
+ LASSERT(list_empty(&job->js_list));
+ write_lock(&stats->ojs_lock);
+ list_add_tail(&job->js_list, &stats->ojs_list);
+ write_unlock(&stats->ojs_lock);
+ }
+
+found:
+ LASSERT(stats == job->js_jobstats);
+ LASSERT(stats->ojs_cntr_num > event);
+ job->js_timestamp = cfs_time_current_sec();
+ lprocfs_counter_add(job->js_stats, event, amount);
+
+ job_putref(job);
+ RETURN(0);
+}
+EXPORT_SYMBOL(lprocfs_job_stats_log);
+
+void lprocfs_job_stats_fini(struct obd_device *obd)
+{
+ struct obd_job_stats *stats = &obd->u.obt.obt_jobstats;
+ time_t oldest = 0;
+
+ if (stats->ojs_hash == NULL)
+ return;
+ cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback, &oldest);
+ cfs_hash_putref(stats->ojs_hash);
+ stats->ojs_hash = NULL;
+ LASSERT(list_empty(&stats->ojs_list));
+}
+EXPORT_SYMBOL(lprocfs_job_stats_fini);
+
+static void *lprocfs_jobstats_seq_start(struct seq_file *p, loff_t *pos)
+{
+ struct obd_job_stats *stats = p->private;
+ loff_t off = *pos;
+ struct job_stat *job;
+
+ read_lock(&stats->ojs_lock);
+ if (off == 0)
+ return SEQ_START_TOKEN;
+ off--;
+ list_for_each_entry(job, &stats->ojs_list, js_list) {
+ if (!off--)
+ return job;
+ }
+ return NULL;
+}
+
+static void lprocfs_jobstats_seq_stop(struct seq_file *p, void *v)
+{
+ struct obd_job_stats *stats = p->private;
+
+ read_unlock(&stats->ojs_lock);
+}
+
+static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos)
+{
+ struct obd_job_stats *stats = p->private;
+ struct job_stat *job;
+ struct list_head *next;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN) {
+ next = stats->ojs_list.next;
+ } else {
+ job = (struct job_stat *)v;
+ next = job->js_list.next;
+ }
+
+ return next == &stats->ojs_list ? NULL :
+ list_entry(next, struct job_stat, js_list);
+}
+
+/*
+ * Example of output on MDT:
+ *
+ * job_stats:
+ * - job_id: test_id.222.25844
+ * snapshot_time: 1322494486
+ * open: { samples: 3, unit: reqs }
+ * close: { samples: 3, unit: reqs }
+ * mknod: { samples: 0, unit: reqs }
+ * link: { samples: 0, unit: reqs }
+ * unlink: { samples: 0, unit: reqs }
+ * mkdir: { samples: 0, unit: reqs }
+ * rmdir: { samples: 0, unit: reqs }
+ * rename: { samples: 1, unit: reqs }
+ * getattr: { samples: 7, unit: reqs }
+ * setattr: { samples: 0, unit: reqs }
+ * getxattr: { samples: 0, unit: reqs }
+ * setxattr: { samples: 0, unit: reqs }
+ * statfs: { samples: 0, unit: reqs }
+ * sync: { samples: 0, unit: reqs }
+ *
+ * Example of output on OST:
+ *
+ * job_stats:
+ * - job_id 4854
+ * snapshot_time: 1322494602
+ * read: { samples: 0, unit: bytes, min: 0, max: 0, sum: 0 }
+ * write: { samples: 1, unit: bytes, min: 10, max: 10, sum: 10 }
+ * setattr: { samples: 0, unit: reqs }
+ * punch: { samples: 0, unit: reqs }
+ * sync: { samples: 0, unit: reqs }
+ */
+
+static const char spaces[] = " ";
+
+static int inline width(const char *str, int len)
+{
+ return len - min((int)strlen(str), 15);
+}
+
+static int lprocfs_jobstats_seq_show(struct seq_file *p, void *v)
+{
+ struct job_stat *job = v;
+ struct lprocfs_stats *s;
+ struct lprocfs_counter ret;
+ struct lprocfs_counter *cntr;
+ struct lprocfs_counter_header *cntr_header;
+ int i;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(p, "job_stats:\n");
+ return 0;
+ }
+
+ seq_printf(p, "- %-16s %s\n", "job_id:", job->js_jobid);
+ seq_printf(p, " %-16s %ld\n", "snapshot_time:", job->js_timestamp);
+
+ s = job->js_stats;
+ for (i = 0; i < s->ls_num; i++) {
+ cntr = lprocfs_stats_counter_get(s, 0, i);
+ cntr_header = &s->ls_cnt_header[i];
+ lprocfs_stats_collect(s, i, &ret);
+
+ seq_printf(p, " %s:%.*s { samples: %11"LPF64"u",
+ cntr_header->lc_name,
+ width(cntr_header->lc_name, 15), spaces,
+ ret.lc_count);
+ if (cntr_header->lc_units[0] != '\0')
+ seq_printf(p, ", unit: %5s", cntr_header->lc_units);
+
+ if (cntr_header->lc_config & LPROCFS_CNTR_AVGMINMAX) {
+ seq_printf(p, ", min:%8"LPF64"u, max:%8"LPF64"u,"
+ " sum:%16"LPF64"u",
+ ret.lc_count ? ret.lc_min : 0,
+ ret.lc_count ? ret.lc_max : 0,
+ ret.lc_count ? ret.lc_sum : 0);
+ }
+ if (cntr_header->lc_config & LPROCFS_CNTR_STDDEV) {
+ seq_printf(p, ", sumsq: %18"LPF64"u",
+ ret.lc_count ? ret.lc_sumsquare : 0);
+ }
+
+ seq_printf(p, " }\n");
+
+ }
+ return 0;
+}
+
+struct seq_operations lprocfs_jobstats_seq_sops = {
+ start: lprocfs_jobstats_seq_start,
+ stop: lprocfs_jobstats_seq_stop,
+ next: lprocfs_jobstats_seq_next,
+ show: lprocfs_jobstats_seq_show,
+};
+
+static int lprocfs_jobstats_seq_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int rc;
+
+ rc = seq_open(file, &lprocfs_jobstats_seq_sops);
+ if (rc)
+ return rc;
+ seq = file->private_data;
+ seq->private = PDE_DATA(inode);
+ return 0;
+}
+
+static ssize_t lprocfs_jobstats_seq_write(struct file *file, const char *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct obd_job_stats *stats = seq->private;
+ char jobid[JOBSTATS_JOBID_SIZE];
+ int all = 0;
+ struct job_stat *job;
+
+ if (!memcmp(buf, "clear", strlen("clear"))) {
+ all = 1;
+ } else if (len < JOBSTATS_JOBID_SIZE) {
+ memset(jobid, 0, JOBSTATS_JOBID_SIZE);
+ /* Trim '\n' if any */
+ if (buf[len - 1] == '\n')
+ memcpy(jobid, buf, len - 1);
+ else
+ memcpy(jobid, buf, len);
+ } else {
+ return -EINVAL;
+ }
+
+ LASSERT(stats->ojs_hash);
+ if (all) {
+ time_t oldest = 0;
+ cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback,
+ &oldest);
+ return len;
+ }
+
+ if (!strlen(jobid))
+ return -EINVAL;
+
+ job = cfs_hash_lookup(stats->ojs_hash, jobid);
+ if (!job)
+ return -EINVAL;
+
+ cfs_hash_del_key(stats->ojs_hash, jobid);
+
+ job_putref(job);
+ return len;
+}
+
+struct file_operations lprocfs_jobstats_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = lprocfs_jobstats_seq_open,
+ .read = seq_read,
+ .write = lprocfs_jobstats_seq_write,
+ .llseek = seq_lseek,
+ .release = lprocfs_seq_release,
+};
+
+int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num,
+ cntr_init_callback init_fn)
+{
+ struct proc_dir_entry *entry;
+ struct obd_job_stats *stats;
+ ENTRY;
+
+ LASSERT(obd->obd_proc_entry != NULL);
+ LASSERT(obd->obd_type->typ_name);
+
+ if (strcmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME) &&
+ strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME)) {
+ CERROR("Invalid obd device type.\n");
+ RETURN(-EINVAL);
+ }
+ stats = &obd->u.obt.obt_jobstats;
+
+ LASSERT(stats->ojs_hash == NULL);
+ stats->ojs_hash = cfs_hash_create("JOB_STATS",
+ HASH_JOB_STATS_CUR_BITS,
+ HASH_JOB_STATS_MAX_BITS,
+ HASH_JOB_STATS_BKT_BITS, 0,
+ CFS_HASH_MIN_THETA,
+ CFS_HASH_MAX_THETA,
+ &job_stats_hash_ops,
+ CFS_HASH_DEFAULT);
+ if (stats->ojs_hash == NULL)
+ RETURN(-ENOMEM);
+
+ INIT_LIST_HEAD(&stats->ojs_list);
+ rwlock_init(&stats->ojs_lock);
+ stats->ojs_cntr_num = cntr_num;
+ stats->ojs_cntr_init_fn = init_fn;
+ stats->ojs_cleanup_interval = 600; /* 10 mins by default */
+ stats->ojs_last_cleanup = cfs_time_current_sec();
+
+ entry = proc_create_data("job_stats", 0644, obd->obd_proc_entry,
+ &lprocfs_jobstats_seq_fops, stats);
+ if (entry)
+ RETURN(0);
+ else
+ RETURN(-ENOMEM);
+}
+EXPORT_SYMBOL(lprocfs_job_stats_init);
+
+int lprocfs_rd_job_interval(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = (struct obd_device *)data;
+ struct obd_job_stats *stats;
+
+ LASSERT(obd != NULL);
+ stats = &obd->u.obt.obt_jobstats;
+ return seq_printf(m, "%d\n", stats->ojs_cleanup_interval);
+}
+EXPORT_SYMBOL(lprocfs_rd_job_interval);
+
+int lprocfs_wr_job_interval(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct obd_device *obd = (struct obd_device *)data;
+ struct obd_job_stats *stats;
+ int val, rc;
+
+ LASSERT(obd != NULL);
+ stats = &obd->u.obt.obt_jobstats;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ stats->ojs_cleanup_interval = val;
+ lprocfs_job_cleanup(stats, true);
+
+ return count;
+
+}
+EXPORT_SYMBOL(lprocfs_wr_job_interval);
+
+#endif /* LPROCFS*/
diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
new file mode 100644
index 000000000000..3b157f89c300
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
@@ -0,0 +1,1985 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/lprocfs_status.c
+ *
+ * Author: Hariharan Thantry <thantry@users.sourceforge.net>
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#include <lustre/lustre_idl.h>
+#include <linux/seq_file.h>
+
+#if defined(LPROCFS)
+
+static int lprocfs_no_percpu_stats = 0;
+CFS_MODULE_PARM(lprocfs_no_percpu_stats, "i", int, 0644,
+ "Do not alloc percpu data for lprocfs stats");
+
+#define MAX_STRING_SIZE 128
+
+int lprocfs_single_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+EXPORT_SYMBOL(lprocfs_single_release);
+
+int lprocfs_seq_release(struct inode *inode, struct file *file)
+{
+ return seq_release(inode, file);
+}
+EXPORT_SYMBOL(lprocfs_seq_release);
+
+/* lprocfs API calls */
+
+proc_dir_entry_t *lprocfs_add_simple(struct proc_dir_entry *root,
+ char *name, void *data,
+ struct file_operations *fops)
+{
+ proc_dir_entry_t *proc;
+ mode_t mode = 0;
+
+ if (root == NULL || name == NULL || fops == NULL)
+ return ERR_PTR(-EINVAL);
+
+ if (fops->read)
+ mode = 0444;
+ if (fops->write)
+ mode |= 0200;
+ proc = proc_create_data(name, mode, root, fops, data);
+ if (!proc) {
+ CERROR("LprocFS: No memory to create /proc entry %s", name);
+ return ERR_PTR(-ENOMEM);
+ }
+ return proc;
+}
+EXPORT_SYMBOL(lprocfs_add_simple);
+
+struct proc_dir_entry *lprocfs_add_symlink(const char *name,
+ struct proc_dir_entry *parent, const char *format, ...)
+{
+ struct proc_dir_entry *entry;
+ char *dest;
+ va_list ap;
+
+ if (parent == NULL || format == NULL)
+ return NULL;
+
+ OBD_ALLOC_WAIT(dest, MAX_STRING_SIZE + 1);
+ if (dest == NULL)
+ return NULL;
+
+ va_start(ap, format);
+ vsnprintf(dest, MAX_STRING_SIZE, format, ap);
+ va_end(ap);
+
+ entry = proc_symlink(name, parent, dest);
+ if (entry == NULL)
+ CERROR("LprocFS: Could not create symbolic link from %s to %s",
+ name, dest);
+
+ OBD_FREE(dest, MAX_STRING_SIZE + 1);
+ return entry;
+}
+EXPORT_SYMBOL(lprocfs_add_symlink);
+
+static struct file_operations lprocfs_generic_fops = { };
+
+/**
+ * Add /proc entries.
+ *
+ * \param root [in] The parent proc entry on which new entry will be added.
+ * \param list [in] Array of proc entries to be added.
+ * \param data [in] The argument to be passed when entries read/write routines
+ * are called through /proc file.
+ *
+ * \retval 0 on success
+ * < 0 on error
+ */
+int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
+ void *data)
+{
+ if (root == NULL || list == NULL)
+ return -EINVAL;
+
+ while (list->name != NULL) {
+ struct proc_dir_entry *proc;
+ mode_t mode = 0;
+
+ if (list->proc_mode != 0000) {
+ mode = list->proc_mode;
+ } else if (list->fops) {
+ if (list->fops->read)
+ mode = 0444;
+ if (list->fops->write)
+ mode |= 0200;
+ }
+ proc = proc_create_data(list->name, mode, root,
+ list->fops ?: &lprocfs_generic_fops,
+ list->data ?: data);
+ if (proc == NULL)
+ return -ENOMEM;
+ list++;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(lprocfs_add_vars);
+
+void lprocfs_remove(struct proc_dir_entry **rooth)
+{
+ proc_remove(*rooth);
+ *rooth = NULL;
+}
+EXPORT_SYMBOL(lprocfs_remove);
+
+void lprocfs_remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+{
+ LASSERT(parent != NULL);
+ remove_proc_entry(name, parent);
+}
+EXPORT_SYMBOL(lprocfs_remove_proc_entry);
+
+struct proc_dir_entry *lprocfs_register(const char *name,
+ struct proc_dir_entry *parent,
+ struct lprocfs_vars *list, void *data)
+{
+ struct proc_dir_entry *newchild;
+
+ newchild = proc_mkdir(name, parent);
+ if (newchild != NULL && list != NULL) {
+ int rc = lprocfs_add_vars(newchild, list, data);
+ if (rc) {
+ lprocfs_remove(&newchild);
+ return ERR_PTR(rc);
+ }
+ }
+ return newchild;
+}
+EXPORT_SYMBOL(lprocfs_register);
+
+/* Generic callbacks */
+int lprocfs_rd_uint(struct seq_file *m, void *data)
+{
+ return seq_printf(m, "%u\n", *(unsigned int *)data);
+}
+EXPORT_SYMBOL(lprocfs_rd_uint);
+
+int lprocfs_wr_uint(struct file *file, const char __user *buffer,
+ unsigned long count, void *data)
+{
+ unsigned *p = data;
+ char dummy[MAX_STRING_SIZE + 1], *end;
+ unsigned long tmp;
+
+ dummy[MAX_STRING_SIZE] = '\0';
+ if (copy_from_user(dummy, buffer, MAX_STRING_SIZE))
+ return -EFAULT;
+
+ tmp = simple_strtoul(dummy, &end, 0);
+ if (dummy == end)
+ return -EINVAL;
+
+ *p = (unsigned int)tmp;
+ return count;
+}
+EXPORT_SYMBOL(lprocfs_wr_uint);
+
+int lprocfs_rd_u64(struct seq_file *m, void *data)
+{
+ return seq_printf(m, LPU64"\n", *(__u64 *)data);
+}
+EXPORT_SYMBOL(lprocfs_rd_u64);
+
+int lprocfs_rd_atomic(struct seq_file *m, void *data)
+{
+ atomic_t *atom = data;
+ LASSERT(atom != NULL);
+ return seq_printf(m, "%d\n", atomic_read(atom));
+}
+EXPORT_SYMBOL(lprocfs_rd_atomic);
+
+int lprocfs_wr_atomic(struct file *file, const char __user *buffer,
+ unsigned long count, void *data)
+{
+ atomic_t *atm = data;
+ int val = 0;
+ int rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc < 0)
+ return rc;
+
+ if (val <= 0)
+ return -ERANGE;
+
+ atomic_set(atm, val);
+ return count;
+}
+EXPORT_SYMBOL(lprocfs_wr_atomic);
+
+int lprocfs_rd_uuid(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = data;
+
+ LASSERT(obd != NULL);
+ return seq_printf(m, "%s\n", obd->obd_uuid.uuid);
+}
+EXPORT_SYMBOL(lprocfs_rd_uuid);
+
+int lprocfs_rd_name(struct seq_file *m, void *data)
+{
+ struct obd_device *dev = data;
+
+ LASSERT(dev != NULL);
+ return seq_printf(m, "%s\n", dev->obd_name);
+}
+EXPORT_SYMBOL(lprocfs_rd_name);
+
+int lprocfs_rd_blksize(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = data;
+ struct obd_statfs osfs;
+ int rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ OBD_STATFS_NODELAY);
+ if (!rc)
+ rc = seq_printf(m, "%u\n", osfs.os_bsize);
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_rd_blksize);
+
+int lprocfs_rd_kbytestotal(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = data;
+ struct obd_statfs osfs;
+ int rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ OBD_STATFS_NODELAY);
+ if (!rc) {
+ __u32 blk_size = osfs.os_bsize >> 10;
+ __u64 result = osfs.os_blocks;
+
+ while (blk_size >>= 1)
+ result <<= 1;
+
+ rc = seq_printf(m, LPU64"\n", result);
+ }
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_rd_kbytestotal);
+
+int lprocfs_rd_kbytesfree(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = data;
+ struct obd_statfs osfs;
+ int rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ OBD_STATFS_NODELAY);
+ if (!rc) {
+ __u32 blk_size = osfs.os_bsize >> 10;
+ __u64 result = osfs.os_bfree;
+
+ while (blk_size >>= 1)
+ result <<= 1;
+
+ rc = seq_printf(m, LPU64"\n", result);
+ }
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_rd_kbytesfree);
+
+int lprocfs_rd_kbytesavail(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = data;
+ struct obd_statfs osfs;
+ int rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ OBD_STATFS_NODELAY);
+ if (!rc) {
+ __u32 blk_size = osfs.os_bsize >> 10;
+ __u64 result = osfs.os_bavail;
+
+ while (blk_size >>= 1)
+ result <<= 1;
+
+ rc = seq_printf(m, LPU64"\n", result);
+ }
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_rd_kbytesavail);
+
+int lprocfs_rd_filestotal(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = data;
+ struct obd_statfs osfs;
+ int rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ OBD_STATFS_NODELAY);
+ if (!rc)
+ rc = seq_printf(m, LPU64"\n", osfs.os_files);
+
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_rd_filestotal);
+
+int lprocfs_rd_filesfree(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = data;
+ struct obd_statfs osfs;
+ int rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ OBD_STATFS_NODELAY);
+ if (!rc)
+ rc = seq_printf(m, LPU64"\n", osfs.os_ffree);
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_rd_filesfree);
+
+int lprocfs_rd_server_uuid(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = data;
+ struct obd_import *imp;
+ char *imp_state_name = NULL;
+ int rc = 0;
+
+ LASSERT(obd != NULL);
+ LPROCFS_CLIMP_CHECK(obd);
+ imp = obd->u.cli.cl_import;
+ imp_state_name = ptlrpc_import_state_name(imp->imp_state);
+ rc = seq_printf(m, "%s\t%s%s\n", obd2cli_tgt(obd), imp_state_name,
+ imp->imp_deactive ? "\tDEACTIVATED" : "");
+
+ LPROCFS_CLIMP_EXIT(obd);
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_rd_server_uuid);
+
+int lprocfs_rd_conn_uuid(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = data;
+ struct ptlrpc_connection *conn;
+ int rc = 0;
+
+ LASSERT(obd != NULL);
+
+ LPROCFS_CLIMP_CHECK(obd);
+ conn = obd->u.cli.cl_import->imp_connection;
+ if (conn && obd->u.cli.cl_import)
+ rc = seq_printf(m, "%s\n", conn->c_remote_uuid.uuid);
+ else
+ rc = seq_printf(m, "%s\n", "<none>");
+
+ LPROCFS_CLIMP_EXIT(obd);
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_rd_conn_uuid);
+
+/** add up per-cpu counters */
+void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx,
+ struct lprocfs_counter *cnt)
+{
+ unsigned int num_entry;
+ struct lprocfs_counter *percpu_cntr;
+ struct lprocfs_counter_header *cntr_header;
+ int i;
+ unsigned long flags = 0;
+
+ memset(cnt, 0, sizeof(*cnt));
+
+ if (stats == NULL) {
+ /* set count to 1 to avoid divide-by-zero errs in callers */
+ cnt->lc_count = 1;
+ return;
+ }
+
+ cnt->lc_min = LC_MIN_INIT;
+
+ num_entry = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
+
+ for (i = 0; i < num_entry; i++) {
+ if (stats->ls_percpu[i] == NULL)
+ continue;
+ cntr_header = &stats->ls_cnt_header[idx];
+ percpu_cntr = lprocfs_stats_counter_get(stats, i, idx);
+
+ cnt->lc_count += percpu_cntr->lc_count;
+ cnt->lc_sum += percpu_cntr->lc_sum;
+ if (percpu_cntr->lc_min < cnt->lc_min)
+ cnt->lc_min = percpu_cntr->lc_min;
+ if (percpu_cntr->lc_max > cnt->lc_max)
+ cnt->lc_max = percpu_cntr->lc_max;
+ cnt->lc_sumsquare += percpu_cntr->lc_sumsquare;
+ }
+
+ lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
+}
+EXPORT_SYMBOL(lprocfs_stats_collect);
+
+/**
+ * Append a space separated list of current set flags to str.
+ */
+#define flag2str(flag, first) \
+ do { \
+ if (imp->imp_##flag) \
+ seq_printf(m, "%s" #flag, first ? "" : ", "); \
+ } while (0)
+static int obd_import_flags2str(struct obd_import *imp, struct seq_file *m)
+{
+ bool first = true;
+
+ if (imp->imp_obd->obd_no_recov) {
+ seq_printf(m, "no_recov");
+ first = false;
+ }
+
+ flag2str(invalid, first);
+ first = false;
+ flag2str(deactive, first);
+ flag2str(replayable, first);
+ flag2str(pingable, first);
+ return 0;
+}
+#undef flags2str
+
+static const char *obd_connect_names[] = {
+ "read_only",
+ "lov_index",
+ "unused",
+ "write_grant",
+ "server_lock",
+ "version",
+ "request_portal",
+ "acl",
+ "xattr",
+ "create_on_write",
+ "truncate_lock",
+ "initial_transno",
+ "inode_bit_locks",
+ "join_file(obsolete)",
+ "getattr_by_fid",
+ "no_oh_for_devices",
+ "remote_client",
+ "remote_client_by_force",
+ "max_byte_per_rpc",
+ "64bit_qdata",
+ "mds_capability",
+ "oss_capability",
+ "early_lock_cancel",
+ "som",
+ "adaptive_timeouts",
+ "lru_resize",
+ "mds_mds_connection",
+ "real_conn",
+ "change_qunit_size",
+ "alt_checksum_algorithm",
+ "fid_is_enabled",
+ "version_recovery",
+ "pools",
+ "grant_shrink",
+ "skip_orphan",
+ "large_ea",
+ "full20",
+ "layout_lock",
+ "64bithash",
+ "object_max_bytes",
+ "imp_recov",
+ "jobstats",
+ "umask",
+ "einprogress",
+ "grant_param",
+ "flock_owner",
+ "lvb_type",
+ "nanoseconds_times",
+ "lightweight_conn",
+ "short_io",
+ "pingless",
+ "unknown",
+ NULL
+};
+
+static void obd_connect_seq_flags2str(struct seq_file *m, __u64 flags, char *sep)
+{
+ __u64 mask = 1;
+ int i;
+ bool first = true;
+
+ for (i = 0; obd_connect_names[i] != NULL; i++, mask <<= 1) {
+ if (flags & mask) {
+ seq_printf(m, "%s%s",
+ first ? sep : "", obd_connect_names[i]);
+ first = false;
+ }
+ }
+ if (flags & ~(mask - 1))
+ seq_printf(m, "%sunknown flags "LPX64,
+ first ? sep : "", flags & ~(mask - 1));
+}
+
+int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep)
+{
+ __u64 mask = 1;
+ int i, ret = 0;
+
+ for (i = 0; obd_connect_names[i] != NULL; i++, mask <<= 1) {
+ if (flags & mask)
+ ret += snprintf(page + ret, count - ret, "%s%s",
+ ret ? sep : "", obd_connect_names[i]);
+ }
+ if (flags & ~(mask - 1))
+ ret += snprintf(page + ret, count - ret,
+ "%sunknown flags "LPX64,
+ ret ? sep : "", flags & ~(mask - 1));
+ return ret;
+}
+EXPORT_SYMBOL(obd_connect_flags2str);
+
+int lprocfs_rd_import(struct seq_file *m, void *data)
+{
+ struct lprocfs_counter ret;
+ struct lprocfs_counter_header *header;
+ struct obd_device *obd = (struct obd_device *)data;
+ struct obd_import *imp;
+ struct obd_import_conn *conn;
+ int j;
+ int k;
+ int rw = 0;
+
+ LASSERT(obd != NULL);
+ LPROCFS_CLIMP_CHECK(obd);
+ imp = obd->u.cli.cl_import;
+
+ seq_printf(m,
+ "import:\n"
+ " name: %s\n"
+ " target: %s\n"
+ " state: %s\n"
+ " instance: %u\n"
+ " connect_flags: [",
+ obd->obd_name,
+ obd2cli_tgt(obd),
+ ptlrpc_import_state_name(imp->imp_state),
+ imp->imp_connect_data.ocd_instance);
+ obd_connect_seq_flags2str(m, imp->imp_connect_data.ocd_connect_flags, ", ");
+ seq_printf(m,
+ "]\n"
+ " import_flags: [");
+ obd_import_flags2str(imp, m);
+
+ seq_printf(m,
+ "]\n"
+ " connection:\n"
+ " failover_nids: [");
+ spin_lock(&imp->imp_lock);
+ j = 0;
+ list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
+ seq_printf(m, "%s%s", j ? ", " : "",
+ libcfs_nid2str(conn->oic_conn->c_peer.nid));
+ j++;
+ }
+ seq_printf(m,
+ "]\n"
+ " current_connection: %s\n"
+ " connection_attempts: %u\n"
+ " generation: %u\n"
+ " in-progress_invalidations: %u\n",
+ imp->imp_connection == NULL ? "<none>" :
+ libcfs_nid2str(imp->imp_connection->c_peer.nid),
+ imp->imp_conn_cnt,
+ imp->imp_generation,
+ atomic_read(&imp->imp_inval_count));
+ spin_unlock(&imp->imp_lock);
+
+ if (obd->obd_svc_stats == NULL)
+ goto out_climp;
+
+ header = &obd->obd_svc_stats->ls_cnt_header[PTLRPC_REQWAIT_CNTR];
+ lprocfs_stats_collect(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR, &ret);
+ if (ret.lc_count != 0) {
+ /* first argument to do_div MUST be __u64 */
+ __u64 sum = ret.lc_sum;
+ do_div(sum, ret.lc_count);
+ ret.lc_sum = sum;
+ } else
+ ret.lc_sum = 0;
+ seq_printf(m,
+ " rpcs:\n"
+ " inflight: %u\n"
+ " unregistering: %u\n"
+ " timeouts: %u\n"
+ " avg_waittime: "LPU64" %s\n",
+ atomic_read(&imp->imp_inflight),
+ atomic_read(&imp->imp_unregistering),
+ atomic_read(&imp->imp_timeouts),
+ ret.lc_sum, header->lc_units);
+
+ k = 0;
+ for(j = 0; j < IMP_AT_MAX_PORTALS; j++) {
+ if (imp->imp_at.iat_portal[j] == 0)
+ break;
+ k = max_t(unsigned int, k,
+ at_get(&imp->imp_at.iat_service_estimate[j]));
+ }
+ seq_printf(m,
+ " service_estimates:\n"
+ " services: %u sec\n"
+ " network: %u sec\n",
+ k,
+ at_get(&imp->imp_at.iat_net_latency));
+
+ seq_printf(m,
+ " transactions:\n"
+ " last_replay: "LPU64"\n"
+ " peer_committed: "LPU64"\n"
+ " last_checked: "LPU64"\n",
+ imp->imp_last_replay_transno,
+ imp->imp_peer_committed_transno,
+ imp->imp_last_transno_checked);
+
+ /* avg data rates */
+ for (rw = 0; rw <= 1; rw++) {
+ lprocfs_stats_collect(obd->obd_svc_stats,
+ PTLRPC_LAST_CNTR + BRW_READ_BYTES + rw,
+ &ret);
+ if (ret.lc_sum > 0 && ret.lc_count > 0) {
+ /* first argument to do_div MUST be __u64 */
+ __u64 sum = ret.lc_sum;
+ do_div(sum, ret.lc_count);
+ ret.lc_sum = sum;
+ seq_printf(m,
+ " %s_data_averages:\n"
+ " bytes_per_rpc: "LPU64"\n",
+ rw ? "write" : "read",
+ ret.lc_sum);
+ }
+ k = (int)ret.lc_sum;
+ j = opcode_offset(OST_READ + rw) + EXTRA_MAX_OPCODES;
+ header = &obd->obd_svc_stats->ls_cnt_header[j];
+ lprocfs_stats_collect(obd->obd_svc_stats, j, &ret);
+ if (ret.lc_sum > 0 && ret.lc_count != 0) {
+ /* first argument to do_div MUST be __u64 */
+ __u64 sum = ret.lc_sum;
+ do_div(sum, ret.lc_count);
+ ret.lc_sum = sum;
+ seq_printf(m,
+ " %s_per_rpc: "LPU64"\n",
+ header->lc_units, ret.lc_sum);
+ j = (int)ret.lc_sum;
+ if (j > 0)
+ seq_printf(m,
+ " MB_per_sec: %u.%.02u\n",
+ k / j, (100 * k / j) % 100);
+ }
+ }
+
+out_climp:
+ LPROCFS_CLIMP_EXIT(obd);
+ return 0;
+}
+EXPORT_SYMBOL(lprocfs_rd_import);
+
+int lprocfs_rd_state(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = (struct obd_device *)data;
+ struct obd_import *imp;
+ int j, k;
+
+ LASSERT(obd != NULL);
+ LPROCFS_CLIMP_CHECK(obd);
+ imp = obd->u.cli.cl_import;
+
+ seq_printf(m, "current_state: %s\n",
+ ptlrpc_import_state_name(imp->imp_state));
+ seq_printf(m, "state_history:\n");
+ k = imp->imp_state_hist_idx;
+ for (j = 0; j < IMP_STATE_HIST_LEN; j++) {
+ struct import_state_hist *ish =
+ &imp->imp_state_hist[(k + j) % IMP_STATE_HIST_LEN];
+ if (ish->ish_state == 0)
+ continue;
+ seq_printf(m, " - ["CFS_TIME_T", %s]\n",
+ ish->ish_time,
+ ptlrpc_import_state_name(ish->ish_state));
+ }
+
+ LPROCFS_CLIMP_EXIT(obd);
+ return 0;
+}
+EXPORT_SYMBOL(lprocfs_rd_state);
+
+int lprocfs_at_hist_helper(struct seq_file *m, struct adaptive_timeout *at)
+{
+ int i;
+ for (i = 0; i < AT_BINS; i++)
+ seq_printf(m, "%3u ", at->at_hist[i]);
+ seq_printf(m, "\n");
+ return 0;
+}
+EXPORT_SYMBOL(lprocfs_at_hist_helper);
+
+/* See also ptlrpc_lprocfs_rd_timeouts */
+int lprocfs_rd_timeouts(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = (struct obd_device *)data;
+ struct obd_import *imp;
+ unsigned int cur, worst;
+ time_t now, worstt;
+ struct dhms ts;
+ int i;
+
+ LASSERT(obd != NULL);
+ LPROCFS_CLIMP_CHECK(obd);
+ imp = obd->u.cli.cl_import;
+
+ now = cfs_time_current_sec();
+
+ /* Some network health info for kicks */
+ s2dhms(&ts, now - imp->imp_last_reply_time);
+ seq_printf(m, "%-10s : %ld, "DHMS_FMT" ago\n",
+ "last reply", imp->imp_last_reply_time, DHMS_VARS(&ts));
+
+ cur = at_get(&imp->imp_at.iat_net_latency);
+ worst = imp->imp_at.iat_net_latency.at_worst_ever;
+ worstt = imp->imp_at.iat_net_latency.at_worst_time;
+ s2dhms(&ts, now - worstt);
+ seq_printf(m, "%-10s : cur %3u worst %3u (at %ld, "DHMS_FMT" ago) ",
+ "network", cur, worst, worstt, DHMS_VARS(&ts));
+ lprocfs_at_hist_helper(m, &imp->imp_at.iat_net_latency);
+
+ for(i = 0; i < IMP_AT_MAX_PORTALS; i++) {
+ if (imp->imp_at.iat_portal[i] == 0)
+ break;
+ cur = at_get(&imp->imp_at.iat_service_estimate[i]);
+ worst = imp->imp_at.iat_service_estimate[i].at_worst_ever;
+ worstt = imp->imp_at.iat_service_estimate[i].at_worst_time;
+ s2dhms(&ts, now - worstt);
+ seq_printf(m, "portal %-2d : cur %3u worst %3u (at %ld, "
+ DHMS_FMT" ago) ", imp->imp_at.iat_portal[i],
+ cur, worst, worstt, DHMS_VARS(&ts));
+ lprocfs_at_hist_helper(m, &imp->imp_at.iat_service_estimate[i]);
+ }
+
+ LPROCFS_CLIMP_EXIT(obd);
+ return 0;
+}
+EXPORT_SYMBOL(lprocfs_rd_timeouts);
+
+int lprocfs_rd_connect_flags(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = data;
+ __u64 flags;
+
+ LPROCFS_CLIMP_CHECK(obd);
+ flags = obd->u.cli.cl_import->imp_connect_data.ocd_connect_flags;
+ seq_printf(m, "flags="LPX64"\n", flags);
+ obd_connect_seq_flags2str(m, flags, "\n");
+ seq_printf(m, "\n");
+ LPROCFS_CLIMP_EXIT(obd);
+ return 0;
+}
+EXPORT_SYMBOL(lprocfs_rd_connect_flags);
+
+int lprocfs_rd_num_exports(struct seq_file *m, void *data)
+{
+ struct obd_device *obd = data;
+
+ LASSERT(obd != NULL);
+ return seq_printf(m, "%u\n", obd->obd_num_exports);
+}
+EXPORT_SYMBOL(lprocfs_rd_num_exports);
+
+int lprocfs_rd_numrefs(struct seq_file *m, void *data)
+{
+ struct obd_type *class = (struct obd_type*) data;
+
+ LASSERT(class != NULL);
+ return seq_printf(m, "%d\n", class->typ_refcnt);
+}
+EXPORT_SYMBOL(lprocfs_rd_numrefs);
+
+int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list)
+{
+ int rc = 0;
+
+ LASSERT(obd != NULL);
+ LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+ LASSERT(obd->obd_type->typ_procroot != NULL);
+
+ obd->obd_proc_entry = lprocfs_register(obd->obd_name,
+ obd->obd_type->typ_procroot,
+ list, obd);
+ if (IS_ERR(obd->obd_proc_entry)) {
+ rc = PTR_ERR(obd->obd_proc_entry);
+ CERROR("error %d setting up lprocfs for %s\n",rc,obd->obd_name);
+ obd->obd_proc_entry = NULL;
+ }
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_obd_setup);
+
+int lprocfs_obd_cleanup(struct obd_device *obd)
+{
+ if (!obd)
+ return -EINVAL;
+ if (obd->obd_proc_exports_entry) {
+ /* Should be no exports left */
+ lprocfs_remove(&obd->obd_proc_exports_entry);
+ obd->obd_proc_exports_entry = NULL;
+ }
+ if (obd->obd_proc_entry) {
+ lprocfs_remove(&obd->obd_proc_entry);
+ obd->obd_proc_entry = NULL;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(lprocfs_obd_cleanup);
+
+static void lprocfs_free_client_stats(struct nid_stat *client_stat)
+{
+ CDEBUG(D_CONFIG, "stat %p - data %p/%p\n", client_stat,
+ client_stat->nid_proc, client_stat->nid_stats);
+
+ LASSERTF(atomic_read(&client_stat->nid_exp_ref_count) == 0,
+ "nid %s:count %d\n", libcfs_nid2str(client_stat->nid),
+ atomic_read(&client_stat->nid_exp_ref_count));
+
+ if (client_stat->nid_proc)
+ lprocfs_remove(&client_stat->nid_proc);
+
+ if (client_stat->nid_stats)
+ lprocfs_free_stats(&client_stat->nid_stats);
+
+ if (client_stat->nid_ldlm_stats)
+ lprocfs_free_stats(&client_stat->nid_ldlm_stats);
+
+ OBD_FREE_PTR(client_stat);
+ return;
+
+}
+
+void lprocfs_free_per_client_stats(struct obd_device *obd)
+{
+ cfs_hash_t *hash = obd->obd_nid_stats_hash;
+ struct nid_stat *stat;
+ ENTRY;
+
+ /* we need extra list - because hash_exit called to early */
+ /* not need locking because all clients is died */
+ while (!list_empty(&obd->obd_nid_stats)) {
+ stat = list_entry(obd->obd_nid_stats.next,
+ struct nid_stat, nid_list);
+ list_del_init(&stat->nid_list);
+ cfs_hash_del(hash, &stat->nid, &stat->nid_hash);
+ lprocfs_free_client_stats(stat);
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(lprocfs_free_per_client_stats);
+
+struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
+ enum lprocfs_stats_flags flags)
+{
+ struct lprocfs_stats *stats;
+ unsigned int num_entry;
+ unsigned int percpusize = 0;
+ int i;
+
+ if (num == 0)
+ return NULL;
+
+ if (lprocfs_no_percpu_stats != 0)
+ flags |= LPROCFS_STATS_FLAG_NOPERCPU;
+
+ if (flags & LPROCFS_STATS_FLAG_NOPERCPU)
+ num_entry = 1;
+ else
+ num_entry = num_possible_cpus();
+
+ /* alloc percpu pointers for all possible cpu slots */
+ LIBCFS_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_entry]));
+ if (stats == NULL)
+ return NULL;
+
+ stats->ls_num = num;
+ stats->ls_flags = flags;
+ spin_lock_init(&stats->ls_lock);
+
+ /* alloc num of counter headers */
+ LIBCFS_ALLOC(stats->ls_cnt_header,
+ stats->ls_num * sizeof(struct lprocfs_counter_header));
+ if (stats->ls_cnt_header == NULL)
+ goto fail;
+
+ if ((flags & LPROCFS_STATS_FLAG_NOPERCPU) != 0) {
+ /* contains only one set counters */
+ percpusize = lprocfs_stats_counter_size(stats);
+ LIBCFS_ALLOC_ATOMIC(stats->ls_percpu[0], percpusize);
+ if (stats->ls_percpu[0] == NULL)
+ goto fail;
+ stats->ls_biggest_alloc_num = 1;
+ } else if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) {
+ /* alloc all percpu data, currently only obd_memory use this */
+ for (i = 0; i < num_entry; ++i)
+ if (lprocfs_stats_alloc_one(stats, i) < 0)
+ goto fail;
+ }
+
+ return stats;
+
+fail:
+ lprocfs_free_stats(&stats);
+ return NULL;
+}
+EXPORT_SYMBOL(lprocfs_alloc_stats);
+
+void lprocfs_free_stats(struct lprocfs_stats **statsh)
+{
+ struct lprocfs_stats *stats = *statsh;
+ unsigned int num_entry;
+ unsigned int percpusize;
+ unsigned int i;
+
+ if (stats == NULL || stats->ls_num == 0)
+ return;
+ *statsh = NULL;
+
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
+ num_entry = 1;
+ else
+ num_entry = num_possible_cpus();
+
+ percpusize = lprocfs_stats_counter_size(stats);
+ for (i = 0; i < num_entry; i++)
+ if (stats->ls_percpu[i] != NULL)
+ LIBCFS_FREE(stats->ls_percpu[i], percpusize);
+ if (stats->ls_cnt_header != NULL)
+ LIBCFS_FREE(stats->ls_cnt_header, stats->ls_num *
+ sizeof(struct lprocfs_counter_header));
+ LIBCFS_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_entry]));
+}
+EXPORT_SYMBOL(lprocfs_free_stats);
+
+void lprocfs_clear_stats(struct lprocfs_stats *stats)
+{
+ struct lprocfs_counter *percpu_cntr;
+ struct lprocfs_counter_header *header;
+ int i;
+ int j;
+ unsigned int num_entry;
+ unsigned long flags = 0;
+
+ num_entry = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
+
+ for (i = 0; i < num_entry; i++) {
+ if (stats->ls_percpu[i] == NULL)
+ continue;
+ for (j = 0; j < stats->ls_num; j++) {
+ header = &stats->ls_cnt_header[j];
+ percpu_cntr = lprocfs_stats_counter_get(stats, i, j);
+ percpu_cntr->lc_count = 0;
+ percpu_cntr->lc_min = LC_MIN_INIT;
+ percpu_cntr->lc_max = 0;
+ percpu_cntr->lc_sumsquare = 0;
+ percpu_cntr->lc_sum = 0;
+ if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+ percpu_cntr->lc_sum_irq = 0;
+ }
+ }
+
+ lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
+}
+EXPORT_SYMBOL(lprocfs_clear_stats);
+
+static ssize_t lprocfs_stats_seq_write(struct file *file,
+ const char __user *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct lprocfs_stats *stats = seq->private;
+
+ lprocfs_clear_stats(stats);
+
+ return len;
+}
+
+static void *lprocfs_stats_seq_start(struct seq_file *p, loff_t *pos)
+{
+ struct lprocfs_stats *stats = p->private;
+
+ return (*pos < stats->ls_num) ? pos : NULL;
+}
+
+static void lprocfs_stats_seq_stop(struct seq_file *p, void *v)
+{
+}
+
+static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return lprocfs_stats_seq_start(p, pos);
+}
+
+/* seq file export of one lprocfs counter */
+static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
+{
+ struct lprocfs_stats *stats = p->private;
+ struct lprocfs_counter_header *hdr;
+ struct lprocfs_counter ctr;
+ int idx = *(loff_t *)v;
+ int rc = 0;
+
+ if (idx == 0) {
+ struct timeval now;
+ do_gettimeofday(&now);
+ rc = seq_printf(p, "%-25s %lu.%lu secs.usecs\n",
+ "snapshot_time", now.tv_sec, now.tv_usec);
+ if (rc < 0)
+ return rc;
+ }
+ hdr = &stats->ls_cnt_header[idx];
+ lprocfs_stats_collect(stats, idx, &ctr);
+
+ if (ctr.lc_count == 0)
+ goto out;
+
+ rc = seq_printf(p, "%-25s "LPD64" samples [%s]", hdr->lc_name,
+ ctr.lc_count, hdr->lc_units);
+
+ if (rc < 0)
+ goto out;
+
+ if ((hdr->lc_config & LPROCFS_CNTR_AVGMINMAX) && (ctr.lc_count > 0)) {
+ rc = seq_printf(p, " "LPD64" "LPD64" "LPD64,
+ ctr.lc_min, ctr.lc_max, ctr.lc_sum);
+ if (rc < 0)
+ goto out;
+ if (hdr->lc_config & LPROCFS_CNTR_STDDEV)
+ rc = seq_printf(p, " "LPD64, ctr.lc_sumsquare);
+ if (rc < 0)
+ goto out;
+ }
+ rc = seq_printf(p, "\n");
+out:
+ return (rc < 0) ? rc : 0;
+}
+
+struct seq_operations lprocfs_stats_seq_sops = {
+ .start = lprocfs_stats_seq_start,
+ .stop = lprocfs_stats_seq_stop,
+ .next = lprocfs_stats_seq_next,
+ .show = lprocfs_stats_seq_show,
+};
+
+static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int rc;
+
+ rc = seq_open(file, &lprocfs_stats_seq_sops);
+ if (rc)
+ return rc;
+ seq = file->private_data;
+ seq->private = PDE_DATA(inode);
+ return 0;
+}
+
+struct file_operations lprocfs_stats_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = lprocfs_stats_seq_open,
+ .read = seq_read,
+ .write = lprocfs_stats_seq_write,
+ .llseek = seq_lseek,
+ .release = lprocfs_seq_release,
+};
+
+int lprocfs_register_stats(struct proc_dir_entry *root, const char *name,
+ struct lprocfs_stats *stats)
+{
+ struct proc_dir_entry *entry;
+ LASSERT(root != NULL);
+
+ entry = proc_create_data(name, 0644, root,
+ &lprocfs_stats_seq_fops, stats);
+ if (entry == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL(lprocfs_register_stats);
+
+void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
+ unsigned conf, const char *name, const char *units)
+{
+ struct lprocfs_counter_header *header;
+ struct lprocfs_counter *percpu_cntr;
+ unsigned long flags = 0;
+ unsigned int i;
+ unsigned int num_cpu;
+
+ LASSERT(stats != NULL);
+
+ header = &stats->ls_cnt_header[index];
+ LASSERTF(header != NULL, "Failed to allocate stats header:[%d]%s/%s\n",
+ index, name, units);
+
+ header->lc_config = conf;
+ header->lc_name = name;
+ header->lc_units = units;
+
+ num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
+ for (i = 0; i < num_cpu; ++i) {
+ if (stats->ls_percpu[i] == NULL)
+ continue;
+ percpu_cntr = lprocfs_stats_counter_get(stats, i, index);
+ percpu_cntr->lc_count = 0;
+ percpu_cntr->lc_min = LC_MIN_INIT;
+ percpu_cntr->lc_max = 0;
+ percpu_cntr->lc_sumsquare = 0;
+ percpu_cntr->lc_sum = 0;
+ if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
+ percpu_cntr->lc_sum_irq = 0;
+ }
+ lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
+}
+EXPORT_SYMBOL(lprocfs_counter_init);
+
+#define LPROCFS_OBD_OP_INIT(base, stats, op) \
+do { \
+ unsigned int coffset = base + OBD_COUNTER_OFFSET(op); \
+ LASSERT(coffset < stats->ls_num); \
+ lprocfs_counter_init(stats, coffset, 0, #op, "reqs"); \
+} while (0)
+
+void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats)
+{
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, get_info);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_info_async);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, attach);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, detach);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, setup);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, precleanup);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, cleanup);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, process_config);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, postrecov);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, add_conn);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, del_conn);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, connect);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, reconnect);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, disconnect);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, fid_init);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, fid_fini);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, fid_alloc);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs_async);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, packmd);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpackmd);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, preallocate);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, precreate);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, create);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, create_async);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr_async);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr_async);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, merge_lvb);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, adjust_kms);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, punch);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, sync);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, migrate);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, copy);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, iterate);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, preprw);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, commitrw);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, enqueue);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, change_cbdata);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, find_cbdata);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, init_export);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, extent_calc);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_init);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_connect);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_finish);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, pin);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpin);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, import_event);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, notify);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, health_check);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, get_uuid);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotacheck);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotactl);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, ping);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_new);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_rem);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_add);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_del);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, getref);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, putref);
+}
+EXPORT_SYMBOL(lprocfs_init_ops_stats);
+
+int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
+{
+ struct lprocfs_stats *stats;
+ unsigned int num_stats;
+ int rc, i;
+
+ LASSERT(obd->obd_stats == NULL);
+ LASSERT(obd->obd_proc_entry != NULL);
+ LASSERT(obd->obd_cntr_base == 0);
+
+ num_stats = ((int)sizeof(*obd->obd_type->typ_dt_ops) / sizeof(void *)) +
+ num_private_stats - 1 /* o_owner */;
+ stats = lprocfs_alloc_stats(num_stats, 0);
+ if (stats == NULL)
+ return -ENOMEM;
+
+ lprocfs_init_ops_stats(num_private_stats, stats);
+
+ for (i = num_private_stats; i < num_stats; i++) {
+ /* If this LBUGs, it is likely that an obd
+ * operation was added to struct obd_ops in
+ * <obd.h>, and that the corresponding line item
+ * LPROCFS_OBD_OP_INIT(.., .., opname)
+ * is missing from the list above. */
+ LASSERTF(stats->ls_cnt_header[i].lc_name != NULL,
+ "Missing obd_stat initializer obd_op "
+ "operation at offset %d.\n", i - num_private_stats);
+ }
+ rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats);
+ if (rc < 0) {
+ lprocfs_free_stats(&stats);
+ } else {
+ obd->obd_stats = stats;
+ obd->obd_cntr_base = num_private_stats;
+ }
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_alloc_obd_stats);
+
+void lprocfs_free_obd_stats(struct obd_device *obd)
+{
+ if (obd->obd_stats)
+ lprocfs_free_stats(&obd->obd_stats);
+}
+EXPORT_SYMBOL(lprocfs_free_obd_stats);
+
+#define LPROCFS_MD_OP_INIT(base, stats, op) \
+do { \
+ unsigned int coffset = base + MD_COUNTER_OFFSET(op); \
+ LASSERT(coffset < stats->ls_num); \
+ lprocfs_counter_init(stats, coffset, 0, #op, "reqs"); \
+} while (0)
+
+void lprocfs_init_mps_stats(int num_private_stats, struct lprocfs_stats *stats)
+{
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, getstatus);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, null_inode);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, find_cbdata);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, close);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, create);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, done_writing);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, enqueue);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, getattr);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, getattr_name);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, intent_lock);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, link);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, rename);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, is_subdir);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, setattr);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, sync);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, readpage);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, unlink);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, setxattr);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, getxattr);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, init_ea_size);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, get_lustre_md);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, free_lustre_md);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, set_open_replay_data);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, clear_open_replay_data);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, set_lock_data);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, lock_match);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, cancel_unused);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, renew_capa);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, unpack_capa);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, get_remote_perm);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, intent_getattr_async);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, revalidate_lock);
+}
+EXPORT_SYMBOL(lprocfs_init_mps_stats);
+
+int lprocfs_alloc_md_stats(struct obd_device *obd,
+ unsigned num_private_stats)
+{
+ struct lprocfs_stats *stats;
+ unsigned int num_stats;
+ int rc, i;
+
+ LASSERT(obd->md_stats == NULL);
+ LASSERT(obd->obd_proc_entry != NULL);
+ LASSERT(obd->md_cntr_base == 0);
+
+ num_stats = 1 + MD_COUNTER_OFFSET(revalidate_lock) +
+ num_private_stats;
+ stats = lprocfs_alloc_stats(num_stats, 0);
+ if (stats == NULL)
+ return -ENOMEM;
+
+ lprocfs_init_mps_stats(num_private_stats, stats);
+
+ for (i = num_private_stats; i < num_stats; i++) {
+ if (stats->ls_cnt_header[i].lc_name == NULL) {
+ CERROR("Missing md_stat initializer md_op "
+ "operation at offset %d. Aborting.\n",
+ i - num_private_stats);
+ LBUG();
+ }
+ }
+ rc = lprocfs_register_stats(obd->obd_proc_entry, "md_stats", stats);
+ if (rc < 0) {
+ lprocfs_free_stats(&stats);
+ } else {
+ obd->md_stats = stats;
+ obd->md_cntr_base = num_private_stats;
+ }
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_alloc_md_stats);
+
+void lprocfs_free_md_stats(struct obd_device *obd)
+{
+ struct lprocfs_stats *stats = obd->md_stats;
+
+ if (stats != NULL) {
+ obd->md_stats = NULL;
+ obd->md_cntr_base = 0;
+ lprocfs_free_stats(&stats);
+ }
+}
+EXPORT_SYMBOL(lprocfs_free_md_stats);
+
+void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
+{
+ lprocfs_counter_init(ldlm_stats,
+ LDLM_ENQUEUE - LDLM_FIRST_OPC,
+ 0, "ldlm_enqueue", "reqs");
+ lprocfs_counter_init(ldlm_stats,
+ LDLM_CONVERT - LDLM_FIRST_OPC,
+ 0, "ldlm_convert", "reqs");
+ lprocfs_counter_init(ldlm_stats,
+ LDLM_CANCEL - LDLM_FIRST_OPC,
+ 0, "ldlm_cancel", "reqs");
+ lprocfs_counter_init(ldlm_stats,
+ LDLM_BL_CALLBACK - LDLM_FIRST_OPC,
+ 0, "ldlm_bl_callback", "reqs");
+ lprocfs_counter_init(ldlm_stats,
+ LDLM_CP_CALLBACK - LDLM_FIRST_OPC,
+ 0, "ldlm_cp_callback", "reqs");
+ lprocfs_counter_init(ldlm_stats,
+ LDLM_GL_CALLBACK - LDLM_FIRST_OPC,
+ 0, "ldlm_gl_callback", "reqs");
+}
+EXPORT_SYMBOL(lprocfs_init_ldlm_stats);
+
+int lprocfs_exp_print_uuid(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *data)
+
+{
+ struct obd_export *exp = cfs_hash_object(hs, hnode);
+ struct seq_file *m = (struct seq_file *)data;
+
+ if (exp->exp_nid_stats)
+ seq_printf(m, "%s\n", obd_uuid2str(&exp->exp_client_uuid));
+
+ return 0;
+}
+
+static int
+lproc_exp_uuid_seq_show(struct seq_file *m, void *unused)
+{
+ struct nid_stat *stats = (struct nid_stat *)m->private;
+ struct obd_device *obd = stats->nid_obd;
+
+ cfs_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
+ lprocfs_exp_print_uuid, m);
+ return 0;
+}
+
+LPROC_SEQ_FOPS_RO(lproc_exp_uuid);
+
+struct exp_hash_cb_data {
+ struct seq_file *m;
+ bool first;
+};
+
+int lprocfs_exp_print_hash(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *cb_data)
+
+{
+ struct exp_hash_cb_data *data = (struct exp_hash_cb_data *)cb_data;
+ struct obd_export *exp = cfs_hash_object(hs, hnode);
+
+ if (exp->exp_lock_hash != NULL) {
+ if (data->first) {
+ cfs_hash_debug_header(data->m);
+ data->first = false;
+ }
+ cfs_hash_debug_str(hs, data->m);
+ }
+
+ return 0;
+}
+
+static int
+lproc_exp_hash_seq_show(struct seq_file *m, void *unused)
+{
+ struct nid_stat *stats = (struct nid_stat *)m->private;
+ struct obd_device *obd = stats->nid_obd;
+ struct exp_hash_cb_data cb_data = {m, true};
+
+ cfs_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
+ lprocfs_exp_print_hash, &cb_data);
+ return 0;
+}
+
+LPROC_SEQ_FOPS_RO(lproc_exp_hash);
+
+int lprocfs_nid_stats_clear_read(struct seq_file *m, void *data)
+{
+ return seq_printf(m, "%s\n",
+ "Write into this file to clear all nid stats and "
+ "stale nid entries");
+}
+EXPORT_SYMBOL(lprocfs_nid_stats_clear_read);
+
+static int lprocfs_nid_stats_clear_write_cb(void *obj, void *data)
+{
+ struct nid_stat *stat = obj;
+ ENTRY;
+
+ CDEBUG(D_INFO,"refcnt %d\n", atomic_read(&stat->nid_exp_ref_count));
+ if (atomic_read(&stat->nid_exp_ref_count) == 1) {
+ /* object has only hash references. */
+ spin_lock(&stat->nid_obd->obd_nid_lock);
+ list_move(&stat->nid_list, data);
+ spin_unlock(&stat->nid_obd->obd_nid_lock);
+ RETURN(1);
+ }
+ /* we has reference to object - only clear data*/
+ if (stat->nid_stats)
+ lprocfs_clear_stats(stat->nid_stats);
+
+ RETURN(0);
+}
+
+int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct obd_device *obd = (struct obd_device *)data;
+ struct nid_stat *client_stat;
+ LIST_HEAD(free_list);
+
+ cfs_hash_cond_del(obd->obd_nid_stats_hash,
+ lprocfs_nid_stats_clear_write_cb, &free_list);
+
+ while (!list_empty(&free_list)) {
+ client_stat = list_entry(free_list.next, struct nid_stat,
+ nid_list);
+ list_del_init(&client_stat->nid_list);
+ lprocfs_free_client_stats(client_stat);
+ }
+
+ return count;
+}
+EXPORT_SYMBOL(lprocfs_nid_stats_clear_write);
+
+int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid)
+{
+ struct nid_stat *new_stat, *old_stat;
+ struct obd_device *obd = NULL;
+ proc_dir_entry_t *entry;
+ char *buffer = NULL;
+ int rc = 0;
+ ENTRY;
+
+ *newnid = 0;
+
+ if (!exp || !exp->exp_obd || !exp->exp_obd->obd_proc_exports_entry ||
+ !exp->exp_obd->obd_nid_stats_hash)
+ RETURN(-EINVAL);
+
+ /* not test against zero because eric say:
+ * You may only test nid against another nid, or LNET_NID_ANY.
+ * Anything else is nonsense.*/
+ if (!nid || *nid == LNET_NID_ANY)
+ RETURN(0);
+
+ obd = exp->exp_obd;
+
+ CDEBUG(D_CONFIG, "using hash %p\n", obd->obd_nid_stats_hash);
+
+ OBD_ALLOC_PTR(new_stat);
+ if (new_stat == NULL)
+ RETURN(-ENOMEM);
+
+ new_stat->nid = *nid;
+ new_stat->nid_obd = exp->exp_obd;
+ /* we need set default refcount to 1 to balance obd_disconnect */
+ atomic_set(&new_stat->nid_exp_ref_count, 1);
+
+ old_stat = cfs_hash_findadd_unique(obd->obd_nid_stats_hash,
+ nid, &new_stat->nid_hash);
+ CDEBUG(D_INFO, "Found stats %p for nid %s - ref %d\n",
+ old_stat, libcfs_nid2str(*nid),
+ atomic_read(&new_stat->nid_exp_ref_count));
+
+ /* We need to release old stats because lprocfs_exp_cleanup() hasn't
+ * been and will never be called. */
+ if (exp->exp_nid_stats) {
+ nidstat_putref(exp->exp_nid_stats);
+ exp->exp_nid_stats = NULL;
+ }
+
+ /* Return -EALREADY here so that we know that the /proc
+ * entry already has been created */
+ if (old_stat != new_stat) {
+ exp->exp_nid_stats = old_stat;
+ GOTO(destroy_new, rc = -EALREADY);
+ }
+ /* not found - create */
+ OBD_ALLOC(buffer, LNET_NIDSTR_SIZE);
+ if (buffer == NULL)
+ GOTO(destroy_new, rc = -ENOMEM);
+
+ memcpy(buffer, libcfs_nid2str(*nid), LNET_NIDSTR_SIZE);
+ new_stat->nid_proc = lprocfs_register(buffer,
+ obd->obd_proc_exports_entry,
+ NULL, NULL);
+ OBD_FREE(buffer, LNET_NIDSTR_SIZE);
+
+ if (new_stat->nid_proc == NULL) {
+ CERROR("Error making export directory for nid %s\n",
+ libcfs_nid2str(*nid));
+ GOTO(destroy_new_ns, rc = -ENOMEM);
+ }
+
+ entry = lprocfs_add_simple(new_stat->nid_proc, "uuid",
+ new_stat, &lproc_exp_uuid_fops);
+ if (IS_ERR(entry)) {
+ CWARN("Error adding the NID stats file\n");
+ rc = PTR_ERR(entry);
+ GOTO(destroy_new_ns, rc);
+ }
+
+ entry = lprocfs_add_simple(new_stat->nid_proc, "hash",
+ new_stat, &lproc_exp_hash_fops);
+ if (IS_ERR(entry)) {
+ CWARN("Error adding the hash file\n");
+ rc = PTR_ERR(entry);
+ GOTO(destroy_new_ns, rc);
+ }
+
+ exp->exp_nid_stats = new_stat;
+ *newnid = 1;
+ /* protect competitive add to list, not need locking on destroy */
+ spin_lock(&obd->obd_nid_lock);
+ list_add(&new_stat->nid_list, &obd->obd_nid_stats);
+ spin_unlock(&obd->obd_nid_lock);
+
+ RETURN(rc);
+
+destroy_new_ns:
+ if (new_stat->nid_proc != NULL)
+ lprocfs_remove(&new_stat->nid_proc);
+ cfs_hash_del(obd->obd_nid_stats_hash, nid, &new_stat->nid_hash);
+
+destroy_new:
+ nidstat_putref(new_stat);
+ OBD_FREE_PTR(new_stat);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(lprocfs_exp_setup);
+
+int lprocfs_exp_cleanup(struct obd_export *exp)
+{
+ struct nid_stat *stat = exp->exp_nid_stats;
+
+ if(!stat || !exp->exp_obd)
+ RETURN(0);
+
+ nidstat_putref(exp->exp_nid_stats);
+ exp->exp_nid_stats = NULL;
+
+ return 0;
+}
+EXPORT_SYMBOL(lprocfs_exp_cleanup);
+
+int lprocfs_write_helper(const char *buffer, unsigned long count,
+ int *val)
+{
+ return lprocfs_write_frac_helper(buffer, count, val, 1);
+}
+EXPORT_SYMBOL(lprocfs_write_helper);
+
+int lprocfs_write_frac_helper(const char *buffer, unsigned long count,
+ int *val, int mult)
+{
+ char kernbuf[20], *end, *pbuf;
+
+ if (count > (sizeof(kernbuf) - 1))
+ return -EINVAL;
+
+ if (copy_from_user(kernbuf, buffer, count))
+ return -EFAULT;
+
+ kernbuf[count] = '\0';
+ pbuf = kernbuf;
+ if (*pbuf == '-') {
+ mult = -mult;
+ pbuf++;
+ }
+
+ *val = (int)simple_strtoul(pbuf, &end, 10) * mult;
+ if (pbuf == end)
+ return -EINVAL;
+
+ if (end != NULL && *end == '.') {
+ int temp_val, pow = 1;
+ int i;
+
+ pbuf = end + 1;
+ if (strlen(pbuf) > 5)
+ pbuf[5] = '\0'; /*only allow 5bits fractional*/
+
+ temp_val = (int)simple_strtoul(pbuf, &end, 10) * mult;
+
+ if (pbuf < end) {
+ for (i = 0; i < (end - pbuf); i++)
+ pow *= 10;
+
+ *val += temp_val / pow;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL(lprocfs_write_frac_helper);
+
+int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val,
+ int mult)
+{
+ long decimal_val, frac_val;
+ int prtn;
+
+ if (count < 10)
+ return -EINVAL;
+
+ decimal_val = val / mult;
+ prtn = snprintf(buffer, count, "%ld", decimal_val);
+ frac_val = val % mult;
+
+ if (prtn < (count - 4) && frac_val > 0) {
+ long temp_frac;
+ int i, temp_mult = 1, frac_bits = 0;
+
+ temp_frac = frac_val * 10;
+ buffer[prtn++] = '.';
+ while (frac_bits < 2 && (temp_frac / mult) < 1 ) {
+ /* only reserved 2 bits fraction */
+ buffer[prtn++] ='0';
+ temp_frac *= 10;
+ frac_bits++;
+ }
+ /*
+ * Need to think these cases :
+ * 1. #echo x.00 > /proc/xxx output result : x
+ * 2. #echo x.0x > /proc/xxx output result : x.0x
+ * 3. #echo x.x0 > /proc/xxx output result : x.x
+ * 4. #echo x.xx > /proc/xxx output result : x.xx
+ * Only reserved 2 bits fraction.
+ */
+ for (i = 0; i < (5 - prtn); i++)
+ temp_mult *= 10;
+
+ frac_bits = min((int)count - prtn, 3 - frac_bits);
+ prtn += snprintf(buffer + prtn, frac_bits, "%ld",
+ frac_val * temp_mult / mult);
+
+ prtn--;
+ while(buffer[prtn] < '1' || buffer[prtn] > '9') {
+ prtn--;
+ if (buffer[prtn] == '.') {
+ prtn--;
+ break;
+ }
+ }
+ prtn++;
+ }
+ buffer[prtn++] ='\n';
+ return prtn;
+}
+EXPORT_SYMBOL(lprocfs_read_frac_helper);
+
+int lprocfs_seq_read_frac_helper(struct seq_file *m, long val, int mult)
+{
+ long decimal_val, frac_val;
+
+ decimal_val = val / mult;
+ seq_printf(m, "%ld", decimal_val);
+ frac_val = val % mult;
+
+ if (frac_val > 0) {
+ frac_val *= 100;
+ frac_val /= mult;
+ }
+ if (frac_val > 0) {
+ /* Three cases: x0, xx, 0x */
+ if ((frac_val % 10) != 0)
+ seq_printf(m, ".%ld", frac_val);
+ else
+ seq_printf(m, ".%ld", frac_val / 10);
+ }
+
+ seq_printf(m, "\n");
+ return 0;
+}
+EXPORT_SYMBOL(lprocfs_seq_read_frac_helper);
+
+int lprocfs_write_u64_helper(const char *buffer, unsigned long count,__u64 *val)
+{
+ return lprocfs_write_frac_u64_helper(buffer, count, val, 1);
+}
+EXPORT_SYMBOL(lprocfs_write_u64_helper);
+
+int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count,
+ __u64 *val, int mult)
+{
+ char kernbuf[22], *end, *pbuf;
+ __u64 whole, frac = 0, units;
+ unsigned frac_d = 1;
+
+ if (count > (sizeof(kernbuf) - 1))
+ return -EINVAL;
+
+ if (copy_from_user(kernbuf, buffer, count))
+ return -EFAULT;
+
+ kernbuf[count] = '\0';
+ pbuf = kernbuf;
+ if (*pbuf == '-') {
+ mult = -mult;
+ pbuf++;
+ }
+
+ whole = simple_strtoull(pbuf, &end, 10);
+ if (pbuf == end)
+ return -EINVAL;
+
+ if (end != NULL && *end == '.') {
+ int i;
+ pbuf = end + 1;
+
+ /* need to limit frac_d to a __u32 */
+ if (strlen(pbuf) > 10)
+ pbuf[10] = '\0';
+
+ frac = simple_strtoull(pbuf, &end, 10);
+ /* count decimal places */
+ for (i = 0; i < (end - pbuf); i++)
+ frac_d *= 10;
+ }
+
+ units = 1;
+ switch(*end) {
+ case 'p': case 'P':
+ units <<= 10;
+ case 't': case 'T':
+ units <<= 10;
+ case 'g': case 'G':
+ units <<= 10;
+ case 'm': case 'M':
+ units <<= 10;
+ case 'k': case 'K':
+ units <<= 10;
+ }
+ /* Specified units override the multiplier */
+ if (units)
+ mult = mult < 0 ? -units : units;
+
+ frac *= mult;
+ do_div(frac, frac_d);
+ *val = whole * mult + frac;
+ return 0;
+}
+EXPORT_SYMBOL(lprocfs_write_frac_u64_helper);
+
+static char *lprocfs_strnstr(const char *s1, const char *s2, size_t len)
+{
+ size_t l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ while (len >= l2) {
+ len--;
+ if (!memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
+
+/**
+ * Find the string \a name in the input \a buffer, and return a pointer to the
+ * value immediately following \a name, reducing \a count appropriately.
+ * If \a name is not found the original \a buffer is returned.
+ */
+char *lprocfs_find_named_value(const char *buffer, const char *name,
+ unsigned long *count)
+{
+ char *val;
+ size_t buflen = *count;
+
+ /* there is no strnstr() in rhel5 and ubuntu kernels */
+ val = lprocfs_strnstr(buffer, name, buflen);
+ if (val == NULL)
+ return (char *)buffer;
+
+ val += strlen(name); /* skip prefix */
+ while (val < buffer + buflen && isspace(*val)) /* skip separator */
+ val++;
+
+ *count = 0;
+ while (val < buffer + buflen && isalnum(*val)) {
+ ++*count;
+ ++val;
+ }
+
+ return val - *count;
+}
+EXPORT_SYMBOL(lprocfs_find_named_value);
+
+int lprocfs_seq_create(proc_dir_entry_t *parent,
+ const char *name,
+ mode_t mode,
+ const struct file_operations *seq_fops,
+ void *data)
+{
+ struct proc_dir_entry *entry;
+ ENTRY;
+
+ /* Disallow secretly (un)writable entries. */
+ LASSERT((seq_fops->write == NULL) == ((mode & 0222) == 0));
+ entry = proc_create_data(name, mode, parent, seq_fops, data);
+
+ if (entry == NULL)
+ RETURN(-ENOMEM);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(lprocfs_seq_create);
+
+int lprocfs_obd_seq_create(struct obd_device *dev,
+ const char *name,
+ mode_t mode,
+ const struct file_operations *seq_fops,
+ void *data)
+{
+ return (lprocfs_seq_create(dev->obd_proc_entry, name,
+ mode, seq_fops, data));
+}
+EXPORT_SYMBOL(lprocfs_obd_seq_create);
+
+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value)
+{
+ if (value >= OBD_HIST_MAX)
+ value = OBD_HIST_MAX - 1;
+
+ spin_lock(&oh->oh_lock);
+ oh->oh_buckets[value]++;
+ spin_unlock(&oh->oh_lock);
+}
+EXPORT_SYMBOL(lprocfs_oh_tally);
+
+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value)
+{
+ unsigned int val;
+
+ for (val = 0; ((1 << val) < value) && (val <= OBD_HIST_MAX); val++)
+ ;
+
+ lprocfs_oh_tally(oh, val);
+}
+EXPORT_SYMBOL(lprocfs_oh_tally_log2);
+
+unsigned long lprocfs_oh_sum(struct obd_histogram *oh)
+{
+ unsigned long ret = 0;
+ int i;
+
+ for (i = 0; i < OBD_HIST_MAX; i++)
+ ret += oh->oh_buckets[i];
+ return ret;
+}
+EXPORT_SYMBOL(lprocfs_oh_sum);
+
+void lprocfs_oh_clear(struct obd_histogram *oh)
+{
+ spin_lock(&oh->oh_lock);
+ memset(oh->oh_buckets, 0, sizeof(oh->oh_buckets));
+ spin_unlock(&oh->oh_lock);
+}
+EXPORT_SYMBOL(lprocfs_oh_clear);
+
+int lprocfs_obd_rd_max_pages_per_rpc(struct seq_file *m, void *data)
+{
+ struct obd_device *dev = data;
+ struct client_obd *cli = &dev->u.cli;
+ int rc;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ rc = seq_printf(m, "%d\n", cli->cl_max_pages_per_rpc);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_obd_rd_max_pages_per_rpc);
+
+#endif /* LPROCFS*/
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
new file mode 100644
index 000000000000..fdf0ed367693
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -0,0 +1,2185 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/lu_object.c
+ *
+ * Lustre Object.
+ * These are the only exported functions, they provide some generic
+ * infrastructure for managing object devices
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/libcfs/libcfs.h>
+
+# include <linux/module.h>
+
+/* hash_long() */
+#include <linux/libcfs/libcfs_hash.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_disk.h>
+#include <lustre_fid.h>
+#include <lu_object.h>
+#include <lu_ref.h>
+#include <linux/list.h>
+
+static void lu_object_free(const struct lu_env *env, struct lu_object *o);
+
+/**
+ * Decrease reference counter on object. If last reference is freed, return
+ * object to the cache, unless lu_object_is_dying(o) holds. In the latter
+ * case, free object immediately.
+ */
+void lu_object_put(const struct lu_env *env, struct lu_object *o)
+{
+ struct lu_site_bkt_data *bkt;
+ struct lu_object_header *top;
+ struct lu_site *site;
+ struct lu_object *orig;
+ cfs_hash_bd_t bd;
+ const struct lu_fid *fid;
+
+ top = o->lo_header;
+ site = o->lo_dev->ld_site;
+ orig = o;
+
+ /*
+ * till we have full fids-on-OST implemented anonymous objects
+ * are possible in OSP. such an object isn't listed in the site
+ * so we should not remove it from the site.
+ */
+ fid = lu_object_fid(o);
+ if (fid_is_zero(fid)) {
+ LASSERT(top->loh_hash.next == NULL
+ && top->loh_hash.pprev == NULL);
+ LASSERT(list_empty(&top->loh_lru));
+ if (!atomic_dec_and_test(&top->loh_ref))
+ return;
+ list_for_each_entry_reverse(o, &top->loh_layers, lo_linkage) {
+ if (o->lo_ops->loo_object_release != NULL)
+ o->lo_ops->loo_object_release(env, o);
+ }
+ lu_object_free(env, orig);
+ return;
+ }
+
+ cfs_hash_bd_get(site->ls_obj_hash, &top->loh_fid, &bd);
+ bkt = cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
+
+ if (!cfs_hash_bd_dec_and_lock(site->ls_obj_hash, &bd, &top->loh_ref)) {
+ if (lu_object_is_dying(top)) {
+
+ /*
+ * somebody may be waiting for this, currently only
+ * used for cl_object, see cl_object_put_last().
+ */
+ wake_up_all(&bkt->lsb_marche_funebre);
+ }
+ return;
+ }
+
+ LASSERT(bkt->lsb_busy > 0);
+ bkt->lsb_busy--;
+ /*
+ * When last reference is released, iterate over object
+ * layers, and notify them that object is no longer busy.
+ */
+ list_for_each_entry_reverse(o, &top->loh_layers, lo_linkage) {
+ if (o->lo_ops->loo_object_release != NULL)
+ o->lo_ops->loo_object_release(env, o);
+ }
+
+ if (!lu_object_is_dying(top)) {
+ LASSERT(list_empty(&top->loh_lru));
+ list_add_tail(&top->loh_lru, &bkt->lsb_lru);
+ cfs_hash_bd_unlock(site->ls_obj_hash, &bd, 1);
+ return;
+ }
+
+ /*
+ * If object is dying (will not be cached), removed it
+ * from hash table and LRU.
+ *
+ * This is done with hash table and LRU lists locked. As the only
+ * way to acquire first reference to previously unreferenced
+ * object is through hash-table lookup (lu_object_find()),
+ * or LRU scanning (lu_site_purge()), that are done under hash-table
+ * and LRU lock, no race with concurrent object lookup is possible
+ * and we can safely destroy object below.
+ */
+ if (!test_and_set_bit(LU_OBJECT_UNHASHED, &top->loh_flags))
+ cfs_hash_bd_del_locked(site->ls_obj_hash, &bd, &top->loh_hash);
+ cfs_hash_bd_unlock(site->ls_obj_hash, &bd, 1);
+ /*
+ * Object was already removed from hash and lru above, can
+ * kill it.
+ */
+ lu_object_free(env, orig);
+}
+EXPORT_SYMBOL(lu_object_put);
+
+/**
+ * Put object and don't keep in cache. This is temporary solution for
+ * multi-site objects when its layering is not constant.
+ */
+void lu_object_put_nocache(const struct lu_env *env, struct lu_object *o)
+{
+ set_bit(LU_OBJECT_HEARD_BANSHEE, &o->lo_header->loh_flags);
+ return lu_object_put(env, o);
+}
+EXPORT_SYMBOL(lu_object_put_nocache);
+
+/**
+ * Kill the object and take it out of LRU cache.
+ * Currently used by client code for layout change.
+ */
+void lu_object_unhash(const struct lu_env *env, struct lu_object *o)
+{
+ struct lu_object_header *top;
+
+ top = o->lo_header;
+ set_bit(LU_OBJECT_HEARD_BANSHEE, &top->loh_flags);
+ if (!test_and_set_bit(LU_OBJECT_UNHASHED, &top->loh_flags)) {
+ cfs_hash_t *obj_hash = o->lo_dev->ld_site->ls_obj_hash;
+ cfs_hash_bd_t bd;
+
+ cfs_hash_bd_get_and_lock(obj_hash, &top->loh_fid, &bd, 1);
+ list_del_init(&top->loh_lru);
+ cfs_hash_bd_del_locked(obj_hash, &bd, &top->loh_hash);
+ cfs_hash_bd_unlock(obj_hash, &bd, 1);
+ }
+}
+EXPORT_SYMBOL(lu_object_unhash);
+
+/**
+ * Allocate new object.
+ *
+ * This follows object creation protocol, described in the comment within
+ * struct lu_device_operations definition.
+ */
+static struct lu_object *lu_object_alloc(const struct lu_env *env,
+ struct lu_device *dev,
+ const struct lu_fid *f,
+ const struct lu_object_conf *conf)
+{
+ struct lu_object *scan;
+ struct lu_object *top;
+ struct list_head *layers;
+ int clean;
+ int result;
+ ENTRY;
+
+ /*
+ * Create top-level object slice. This will also create
+ * lu_object_header.
+ */
+ top = dev->ld_ops->ldo_object_alloc(env, NULL, dev);
+ if (top == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+ if (IS_ERR(top))
+ RETURN(top);
+ /*
+ * This is the only place where object fid is assigned. It's constant
+ * after this point.
+ */
+ top->lo_header->loh_fid = *f;
+ layers = &top->lo_header->loh_layers;
+ do {
+ /*
+ * Call ->loo_object_init() repeatedly, until no more new
+ * object slices are created.
+ */
+ clean = 1;
+ list_for_each_entry(scan, layers, lo_linkage) {
+ if (scan->lo_flags & LU_OBJECT_ALLOCATED)
+ continue;
+ clean = 0;
+ scan->lo_header = top->lo_header;
+ result = scan->lo_ops->loo_object_init(env, scan, conf);
+ if (result != 0) {
+ lu_object_free(env, top);
+ RETURN(ERR_PTR(result));
+ }
+ scan->lo_flags |= LU_OBJECT_ALLOCATED;
+ }
+ } while (!clean);
+
+ list_for_each_entry_reverse(scan, layers, lo_linkage) {
+ if (scan->lo_ops->loo_object_start != NULL) {
+ result = scan->lo_ops->loo_object_start(env, scan);
+ if (result != 0) {
+ lu_object_free(env, top);
+ RETURN(ERR_PTR(result));
+ }
+ }
+ }
+
+ lprocfs_counter_incr(dev->ld_site->ls_stats, LU_SS_CREATED);
+ RETURN(top);
+}
+
+/**
+ * Free an object.
+ */
+static void lu_object_free(const struct lu_env *env, struct lu_object *o)
+{
+ struct lu_site_bkt_data *bkt;
+ struct lu_site *site;
+ struct lu_object *scan;
+ struct list_head *layers;
+ struct list_head splice;
+
+ site = o->lo_dev->ld_site;
+ layers = &o->lo_header->loh_layers;
+ bkt = lu_site_bkt_from_fid(site, &o->lo_header->loh_fid);
+ /*
+ * First call ->loo_object_delete() method to release all resources.
+ */
+ list_for_each_entry_reverse(scan, layers, lo_linkage) {
+ if (scan->lo_ops->loo_object_delete != NULL)
+ scan->lo_ops->loo_object_delete(env, scan);
+ }
+
+ /*
+ * Then, splice object layers into stand-alone list, and call
+ * ->loo_object_free() on all layers to free memory. Splice is
+ * necessary, because lu_object_header is freed together with the
+ * top-level slice.
+ */
+ INIT_LIST_HEAD(&splice);
+ list_splice_init(layers, &splice);
+ while (!list_empty(&splice)) {
+ /*
+ * Free layers in bottom-to-top order, so that object header
+ * lives as long as possible and ->loo_object_free() methods
+ * can look at its contents.
+ */
+ o = container_of0(splice.prev, struct lu_object, lo_linkage);
+ list_del_init(&o->lo_linkage);
+ LASSERT(o->lo_ops->loo_object_free != NULL);
+ o->lo_ops->loo_object_free(env, o);
+ }
+
+ if (waitqueue_active(&bkt->lsb_marche_funebre))
+ wake_up_all(&bkt->lsb_marche_funebre);
+}
+
+/**
+ * Free \a nr objects from the cold end of the site LRU list.
+ */
+int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
+{
+ struct lu_object_header *h;
+ struct lu_object_header *temp;
+ struct lu_site_bkt_data *bkt;
+ cfs_hash_bd_t bd;
+ cfs_hash_bd_t bd2;
+ struct list_head dispose;
+ int did_sth;
+ int start;
+ int count;
+ int bnr;
+ int i;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_OBD_NO_LRU))
+ RETURN(0);
+
+ INIT_LIST_HEAD(&dispose);
+ /*
+ * Under LRU list lock, scan LRU list and move unreferenced objects to
+ * the dispose list, removing them from LRU and hash table.
+ */
+ start = s->ls_purge_start;
+ bnr = (nr == ~0) ? -1 : nr / CFS_HASH_NBKT(s->ls_obj_hash) + 1;
+ again:
+ did_sth = 0;
+ cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) {
+ if (i < start)
+ continue;
+ count = bnr;
+ cfs_hash_bd_lock(s->ls_obj_hash, &bd, 1);
+ bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, &bd);
+
+ list_for_each_entry_safe(h, temp, &bkt->lsb_lru, loh_lru) {
+ LASSERT(atomic_read(&h->loh_ref) == 0);
+
+ cfs_hash_bd_get(s->ls_obj_hash, &h->loh_fid, &bd2);
+ LASSERT(bd.bd_bucket == bd2.bd_bucket);
+
+ cfs_hash_bd_del_locked(s->ls_obj_hash,
+ &bd2, &h->loh_hash);
+ list_move(&h->loh_lru, &dispose);
+ if (did_sth == 0)
+ did_sth = 1;
+
+ if (nr != ~0 && --nr == 0)
+ break;
+
+ if (count > 0 && --count == 0)
+ break;
+
+ }
+ cfs_hash_bd_unlock(s->ls_obj_hash, &bd, 1);
+ cond_resched();
+ /*
+ * Free everything on the dispose list. This is safe against
+ * races due to the reasons described in lu_object_put().
+ */
+ while (!list_empty(&dispose)) {
+ h = container_of0(dispose.next,
+ struct lu_object_header, loh_lru);
+ list_del_init(&h->loh_lru);
+ lu_object_free(env, lu_object_top(h));
+ lprocfs_counter_incr(s->ls_stats, LU_SS_LRU_PURGED);
+ }
+
+ if (nr == 0)
+ break;
+ }
+
+ if (nr != 0 && did_sth && start != 0) {
+ start = 0; /* restart from the first bucket */
+ goto again;
+ }
+ /* race on s->ls_purge_start, but nobody cares */
+ s->ls_purge_start = i % CFS_HASH_NBKT(s->ls_obj_hash);
+
+ return nr;
+}
+EXPORT_SYMBOL(lu_site_purge);
+
+/*
+ * Object printing.
+ *
+ * Code below has to jump through certain loops to output object description
+ * into libcfs_debug_msg-based log. The problem is that lu_object_print()
+ * composes object description from strings that are parts of _lines_ of
+ * output (i.e., strings that are not terminated by newline). This doesn't fit
+ * very well into libcfs_debug_msg() interface that assumes that each message
+ * supplied to it is a self-contained output line.
+ *
+ * To work around this, strings are collected in a temporary buffer
+ * (implemented as a value of lu_cdebug_key key), until terminating newline
+ * character is detected.
+ *
+ */
+
+enum {
+ /**
+ * Maximal line size.
+ *
+ * XXX overflow is not handled correctly.
+ */
+ LU_CDEBUG_LINE = 512
+};
+
+struct lu_cdebug_data {
+ /**
+ * Temporary buffer.
+ */
+ char lck_area[LU_CDEBUG_LINE];
+};
+
+/* context key constructor/destructor: lu_global_key_init, lu_global_key_fini */
+LU_KEY_INIT_FINI(lu_global, struct lu_cdebug_data);
+
+/**
+ * Key, holding temporary buffer. This key is registered very early by
+ * lu_global_init().
+ */
+struct lu_context_key lu_global_key = {
+ .lct_tags = LCT_MD_THREAD | LCT_DT_THREAD |
+ LCT_MG_THREAD | LCT_CL_THREAD,
+ .lct_init = lu_global_key_init,
+ .lct_fini = lu_global_key_fini
+};
+
+/**
+ * Printer function emitting messages through libcfs_debug_msg().
+ */
+int lu_cdebug_printer(const struct lu_env *env,
+ void *cookie, const char *format, ...)
+{
+ struct libcfs_debug_msg_data *msgdata = cookie;
+ struct lu_cdebug_data *key;
+ int used;
+ int complete;
+ va_list args;
+
+ va_start(args, format);
+
+ key = lu_context_key_get(&env->le_ctx, &lu_global_key);
+ LASSERT(key != NULL);
+
+ used = strlen(key->lck_area);
+ complete = format[strlen(format) - 1] == '\n';
+ /*
+ * Append new chunk to the buffer.
+ */
+ vsnprintf(key->lck_area + used,
+ ARRAY_SIZE(key->lck_area) - used, format, args);
+ if (complete) {
+ if (cfs_cdebug_show(msgdata->msg_mask, msgdata->msg_subsys))
+ libcfs_debug_msg(msgdata, "%s", key->lck_area);
+ key->lck_area[0] = 0;
+ }
+ va_end(args);
+ return 0;
+}
+EXPORT_SYMBOL(lu_cdebug_printer);
+
+/**
+ * Print object header.
+ */
+void lu_object_header_print(const struct lu_env *env, void *cookie,
+ lu_printer_t printer,
+ const struct lu_object_header *hdr)
+{
+ (*printer)(env, cookie, "header@%p[%#lx, %d, "DFID"%s%s%s]",
+ hdr, hdr->loh_flags, atomic_read(&hdr->loh_ref),
+ PFID(&hdr->loh_fid),
+ hlist_unhashed(&hdr->loh_hash) ? "" : " hash",
+ list_empty((struct list_head *)&hdr->loh_lru) ? \
+ "" : " lru",
+ hdr->loh_attr & LOHA_EXISTS ? " exist":"");
+}
+EXPORT_SYMBOL(lu_object_header_print);
+
+/**
+ * Print human readable representation of the \a o to the \a printer.
+ */
+void lu_object_print(const struct lu_env *env, void *cookie,
+ lu_printer_t printer, const struct lu_object *o)
+{
+ static const char ruler[] = "........................................";
+ struct lu_object_header *top;
+ int depth;
+
+ top = o->lo_header;
+ lu_object_header_print(env, cookie, printer, top);
+ (*printer)(env, cookie, "{ \n");
+ list_for_each_entry(o, &top->loh_layers, lo_linkage) {
+ depth = o->lo_depth + 4;
+
+ /*
+ * print `.' \a depth times followed by type name and address
+ */
+ (*printer)(env, cookie, "%*.*s%s@%p", depth, depth, ruler,
+ o->lo_dev->ld_type->ldt_name, o);
+ if (o->lo_ops->loo_object_print != NULL)
+ o->lo_ops->loo_object_print(env, cookie, printer, o);
+ (*printer)(env, cookie, "\n");
+ }
+ (*printer)(env, cookie, "} header@%p\n", top);
+}
+EXPORT_SYMBOL(lu_object_print);
+
+/**
+ * Check object consistency.
+ */
+int lu_object_invariant(const struct lu_object *o)
+{
+ struct lu_object_header *top;
+
+ top = o->lo_header;
+ list_for_each_entry(o, &top->loh_layers, lo_linkage) {
+ if (o->lo_ops->loo_object_invariant != NULL &&
+ !o->lo_ops->loo_object_invariant(o))
+ return 0;
+ }
+ return 1;
+}
+EXPORT_SYMBOL(lu_object_invariant);
+
+static struct lu_object *htable_lookup(struct lu_site *s,
+ cfs_hash_bd_t *bd,
+ const struct lu_fid *f,
+ wait_queue_t *waiter,
+ __u64 *version)
+{
+ struct lu_site_bkt_data *bkt;
+ struct lu_object_header *h;
+ struct hlist_node *hnode;
+ __u64 ver = cfs_hash_bd_version_get(bd);
+
+ if (*version == ver)
+ return NULL;
+
+ *version = ver;
+ bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
+ /* cfs_hash_bd_peek_locked is a somehow "internal" function
+ * of cfs_hash, it doesn't add refcount on object. */
+ hnode = cfs_hash_bd_peek_locked(s->ls_obj_hash, bd, (void *)f);
+ if (hnode == NULL) {
+ lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_MISS);
+ return NULL;
+ }
+
+ h = container_of0(hnode, struct lu_object_header, loh_hash);
+ if (likely(!lu_object_is_dying(h))) {
+ cfs_hash_get(s->ls_obj_hash, hnode);
+ lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
+ list_del_init(&h->loh_lru);
+ return lu_object_top(h);
+ }
+
+ /*
+ * Lookup found an object being destroyed this object cannot be
+ * returned (to assure that references to dying objects are eventually
+ * drained), and moreover, lookup has to wait until object is freed.
+ */
+
+ init_waitqueue_entry_current(waiter);
+ add_wait_queue(&bkt->lsb_marche_funebre, waiter);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
+ return ERR_PTR(-EAGAIN);
+}
+
+/**
+ * Search cache for an object with the fid \a f. If such object is found,
+ * return it. Otherwise, create new object, insert it into cache and return
+ * it. In any case, additional reference is acquired on the returned object.
+ */
+struct lu_object *lu_object_find(const struct lu_env *env,
+ struct lu_device *dev, const struct lu_fid *f,
+ const struct lu_object_conf *conf)
+{
+ return lu_object_find_at(env, dev->ld_site->ls_top_dev, f, conf);
+}
+EXPORT_SYMBOL(lu_object_find);
+
+static struct lu_object *lu_object_new(const struct lu_env *env,
+ struct lu_device *dev,
+ const struct lu_fid *f,
+ const struct lu_object_conf *conf)
+{
+ struct lu_object *o;
+ cfs_hash_t *hs;
+ cfs_hash_bd_t bd;
+ struct lu_site_bkt_data *bkt;
+
+ o = lu_object_alloc(env, dev, f, conf);
+ if (unlikely(IS_ERR(o)))
+ return o;
+
+ hs = dev->ld_site->ls_obj_hash;
+ cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
+ bkt = cfs_hash_bd_extra_get(hs, &bd);
+ cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
+ bkt->lsb_busy++;
+ cfs_hash_bd_unlock(hs, &bd, 1);
+ return o;
+}
+
+/**
+ * Core logic of lu_object_find*() functions.
+ */
+static struct lu_object *lu_object_find_try(const struct lu_env *env,
+ struct lu_device *dev,
+ const struct lu_fid *f,
+ const struct lu_object_conf *conf,
+ wait_queue_t *waiter)
+{
+ struct lu_object *o;
+ struct lu_object *shadow;
+ struct lu_site *s;
+ cfs_hash_t *hs;
+ cfs_hash_bd_t bd;
+ __u64 version = 0;
+
+ /*
+ * This uses standard index maintenance protocol:
+ *
+ * - search index under lock, and return object if found;
+ * - otherwise, unlock index, allocate new object;
+ * - lock index and search again;
+ * - if nothing is found (usual case), insert newly created
+ * object into index;
+ * - otherwise (race: other thread inserted object), free
+ * object just allocated.
+ * - unlock index;
+ * - return object.
+ *
+ * For "LOC_F_NEW" case, we are sure the object is new established.
+ * It is unnecessary to perform lookup-alloc-lookup-insert, instead,
+ * just alloc and insert directly.
+ *
+ * If dying object is found during index search, add @waiter to the
+ * site wait-queue and return ERR_PTR(-EAGAIN).
+ */
+ if (conf != NULL && conf->loc_flags & LOC_F_NEW)
+ return lu_object_new(env, dev, f, conf);
+
+ s = dev->ld_site;
+ hs = s->ls_obj_hash;
+ cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
+ o = htable_lookup(s, &bd, f, waiter, &version);
+ cfs_hash_bd_unlock(hs, &bd, 1);
+ if (o != NULL)
+ return o;
+
+ /*
+ * Allocate new object. This may result in rather complicated
+ * operations, including fld queries, inode loading, etc.
+ */
+ o = lu_object_alloc(env, dev, f, conf);
+ if (unlikely(IS_ERR(o)))
+ return o;
+
+ LASSERT(lu_fid_eq(lu_object_fid(o), f));
+
+ cfs_hash_bd_lock(hs, &bd, 1);
+
+ shadow = htable_lookup(s, &bd, f, waiter, &version);
+ if (likely(shadow == NULL)) {
+ struct lu_site_bkt_data *bkt;
+
+ bkt = cfs_hash_bd_extra_get(hs, &bd);
+ cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
+ bkt->lsb_busy++;
+ cfs_hash_bd_unlock(hs, &bd, 1);
+ return o;
+ }
+
+ lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_RACE);
+ cfs_hash_bd_unlock(hs, &bd, 1);
+ lu_object_free(env, o);
+ return shadow;
+}
+
+/**
+ * Much like lu_object_find(), but top level device of object is specifically
+ * \a dev rather than top level device of the site. This interface allows
+ * objects of different "stacking" to be created within the same site.
+ */
+struct lu_object *lu_object_find_at(const struct lu_env *env,
+ struct lu_device *dev,
+ const struct lu_fid *f,
+ const struct lu_object_conf *conf)
+{
+ struct lu_site_bkt_data *bkt;
+ struct lu_object *obj;
+ wait_queue_t wait;
+
+ while (1) {
+ obj = lu_object_find_try(env, dev, f, conf, &wait);
+ if (obj != ERR_PTR(-EAGAIN))
+ return obj;
+ /*
+ * lu_object_find_try() already added waiter into the
+ * wait queue.
+ */
+ waitq_wait(&wait, TASK_UNINTERRUPTIBLE);
+ bkt = lu_site_bkt_from_fid(dev->ld_site, (void *)f);
+ remove_wait_queue(&bkt->lsb_marche_funebre, &wait);
+ }
+}
+EXPORT_SYMBOL(lu_object_find_at);
+
+/**
+ * Find object with given fid, and return its slice belonging to given device.
+ */
+struct lu_object *lu_object_find_slice(const struct lu_env *env,
+ struct lu_device *dev,
+ const struct lu_fid *f,
+ const struct lu_object_conf *conf)
+{
+ struct lu_object *top;
+ struct lu_object *obj;
+
+ top = lu_object_find(env, dev, f, conf);
+ if (!IS_ERR(top)) {
+ obj = lu_object_locate(top->lo_header, dev->ld_type);
+ if (obj == NULL)
+ lu_object_put(env, top);
+ } else
+ obj = top;
+ return obj;
+}
+EXPORT_SYMBOL(lu_object_find_slice);
+
+/**
+ * Global list of all device types.
+ */
+static LIST_HEAD(lu_device_types);
+
+int lu_device_type_init(struct lu_device_type *ldt)
+{
+ int result = 0;
+
+ INIT_LIST_HEAD(&ldt->ldt_linkage);
+ if (ldt->ldt_ops->ldto_init)
+ result = ldt->ldt_ops->ldto_init(ldt);
+ if (result == 0)
+ list_add(&ldt->ldt_linkage, &lu_device_types);
+ return result;
+}
+EXPORT_SYMBOL(lu_device_type_init);
+
+void lu_device_type_fini(struct lu_device_type *ldt)
+{
+ list_del_init(&ldt->ldt_linkage);
+ if (ldt->ldt_ops->ldto_fini)
+ ldt->ldt_ops->ldto_fini(ldt);
+}
+EXPORT_SYMBOL(lu_device_type_fini);
+
+void lu_types_stop(void)
+{
+ struct lu_device_type *ldt;
+
+ list_for_each_entry(ldt, &lu_device_types, ldt_linkage) {
+ if (ldt->ldt_device_nr == 0 && ldt->ldt_ops->ldto_stop)
+ ldt->ldt_ops->ldto_stop(ldt);
+ }
+}
+EXPORT_SYMBOL(lu_types_stop);
+
+/**
+ * Global list of all sites on this node
+ */
+static LIST_HEAD(lu_sites);
+static DEFINE_MUTEX(lu_sites_guard);
+
+/**
+ * Global environment used by site shrinker.
+ */
+static struct lu_env lu_shrink_env;
+
+struct lu_site_print_arg {
+ struct lu_env *lsp_env;
+ void *lsp_cookie;
+ lu_printer_t lsp_printer;
+};
+
+static int
+lu_site_obj_print(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ struct hlist_node *hnode, void *data)
+{
+ struct lu_site_print_arg *arg = (struct lu_site_print_arg *)data;
+ struct lu_object_header *h;
+
+ h = hlist_entry(hnode, struct lu_object_header, loh_hash);
+ if (!list_empty(&h->loh_layers)) {
+ const struct lu_object *o;
+
+ o = lu_object_top(h);
+ lu_object_print(arg->lsp_env, arg->lsp_cookie,
+ arg->lsp_printer, o);
+ } else {
+ lu_object_header_print(arg->lsp_env, arg->lsp_cookie,
+ arg->lsp_printer, h);
+ }
+ return 0;
+}
+
+/**
+ * Print all objects in \a s.
+ */
+void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie,
+ lu_printer_t printer)
+{
+ struct lu_site_print_arg arg = {
+ .lsp_env = (struct lu_env *)env,
+ .lsp_cookie = cookie,
+ .lsp_printer = printer,
+ };
+
+ cfs_hash_for_each(s->ls_obj_hash, lu_site_obj_print, &arg);
+}
+EXPORT_SYMBOL(lu_site_print);
+
+enum {
+ LU_CACHE_PERCENT_MAX = 50,
+ LU_CACHE_PERCENT_DEFAULT = 20
+};
+
+static unsigned int lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
+CFS_MODULE_PARM(lu_cache_percent, "i", int, 0644,
+ "Percentage of memory to be used as lu_object cache");
+
+/**
+ * Return desired hash table order.
+ */
+static int lu_htable_order(void)
+{
+ unsigned long cache_size;
+ int bits;
+
+ /*
+ * Calculate hash table size, assuming that we want reasonable
+ * performance when 20% of total memory is occupied by cache of
+ * lu_objects.
+ *
+ * Size of lu_object is (arbitrary) taken as 1K (together with inode).
+ */
+ cache_size = num_physpages;
+
+#if BITS_PER_LONG == 32
+ /* limit hashtable size for lowmem systems to low RAM */
+ if (cache_size > 1 << (30 - PAGE_CACHE_SHIFT))
+ cache_size = 1 << (30 - PAGE_CACHE_SHIFT) * 3 / 4;
+#endif
+
+ /* clear off unreasonable cache setting. */
+ if (lu_cache_percent == 0 || lu_cache_percent > LU_CACHE_PERCENT_MAX) {
+ CWARN("obdclass: invalid lu_cache_percent: %u, it must be in"
+ " the range of (0, %u]. Will use default value: %u.\n",
+ lu_cache_percent, LU_CACHE_PERCENT_MAX,
+ LU_CACHE_PERCENT_DEFAULT);
+
+ lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
+ }
+ cache_size = cache_size / 100 * lu_cache_percent *
+ (PAGE_CACHE_SIZE / 1024);
+
+ for (bits = 1; (1 << bits) < cache_size; ++bits) {
+ ;
+ }
+ return bits;
+}
+
+static unsigned lu_obj_hop_hash(cfs_hash_t *hs,
+ const void *key, unsigned mask)
+{
+ struct lu_fid *fid = (struct lu_fid *)key;
+ __u32 hash;
+
+ hash = fid_flatten32(fid);
+ hash += (hash >> 4) + (hash << 12); /* mixing oid and seq */
+ hash = cfs_hash_long(hash, hs->hs_bkt_bits);
+
+ /* give me another random factor */
+ hash -= cfs_hash_long((unsigned long)hs, fid_oid(fid) % 11 + 3);
+
+ hash <<= hs->hs_cur_bits - hs->hs_bkt_bits;
+ hash |= (fid_seq(fid) + fid_oid(fid)) & (CFS_HASH_NBKT(hs) - 1);
+
+ return hash & mask;
+}
+
+static void *lu_obj_hop_object(struct hlist_node *hnode)
+{
+ return hlist_entry(hnode, struct lu_object_header, loh_hash);
+}
+
+static void *lu_obj_hop_key(struct hlist_node *hnode)
+{
+ struct lu_object_header *h;
+
+ h = hlist_entry(hnode, struct lu_object_header, loh_hash);
+ return &h->loh_fid;
+}
+
+static int lu_obj_hop_keycmp(const void *key, struct hlist_node *hnode)
+{
+ struct lu_object_header *h;
+
+ h = hlist_entry(hnode, struct lu_object_header, loh_hash);
+ return lu_fid_eq(&h->loh_fid, (struct lu_fid *)key);
+}
+
+static void lu_obj_hop_get(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct lu_object_header *h;
+
+ h = hlist_entry(hnode, struct lu_object_header, loh_hash);
+ if (atomic_add_return(1, &h->loh_ref) == 1) {
+ struct lu_site_bkt_data *bkt;
+ cfs_hash_bd_t bd;
+
+ cfs_hash_bd_get(hs, &h->loh_fid, &bd);
+ bkt = cfs_hash_bd_extra_get(hs, &bd);
+ bkt->lsb_busy++;
+ }
+}
+
+static void lu_obj_hop_put_locked(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ LBUG(); /* we should never called it */
+}
+
+cfs_hash_ops_t lu_site_hash_ops = {
+ .hs_hash = lu_obj_hop_hash,
+ .hs_key = lu_obj_hop_key,
+ .hs_keycmp = lu_obj_hop_keycmp,
+ .hs_object = lu_obj_hop_object,
+ .hs_get = lu_obj_hop_get,
+ .hs_put_locked = lu_obj_hop_put_locked,
+};
+
+void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d)
+{
+ spin_lock(&s->ls_ld_lock);
+ if (list_empty(&d->ld_linkage))
+ list_add(&d->ld_linkage, &s->ls_ld_linkage);
+ spin_unlock(&s->ls_ld_lock);
+}
+EXPORT_SYMBOL(lu_dev_add_linkage);
+
+void lu_dev_del_linkage(struct lu_site *s, struct lu_device *d)
+{
+ spin_lock(&s->ls_ld_lock);
+ list_del_init(&d->ld_linkage);
+ spin_unlock(&s->ls_ld_lock);
+}
+EXPORT_SYMBOL(lu_dev_del_linkage);
+
+/**
+ * Initialize site \a s, with \a d as the top level device.
+ */
+#define LU_SITE_BITS_MIN 12
+#define LU_SITE_BITS_MAX 24
+/**
+ * total 256 buckets, we don't want too many buckets because:
+ * - consume too much memory
+ * - avoid unbalanced LRU list
+ */
+#define LU_SITE_BKT_BITS 8
+
+int lu_site_init(struct lu_site *s, struct lu_device *top)
+{
+ struct lu_site_bkt_data *bkt;
+ cfs_hash_bd_t bd;
+ char name[16];
+ int bits;
+ int i;
+ ENTRY;
+
+ memset(s, 0, sizeof *s);
+ bits = lu_htable_order();
+ snprintf(name, 16, "lu_site_%s", top->ld_type->ldt_name);
+ for (bits = min(max(LU_SITE_BITS_MIN, bits), LU_SITE_BITS_MAX);
+ bits >= LU_SITE_BITS_MIN; bits--) {
+ s->ls_obj_hash = cfs_hash_create(name, bits, bits,
+ bits - LU_SITE_BKT_BITS,
+ sizeof(*bkt), 0, 0,
+ &lu_site_hash_ops,
+ CFS_HASH_SPIN_BKTLOCK |
+ CFS_HASH_NO_ITEMREF |
+ CFS_HASH_DEPTH |
+ CFS_HASH_ASSERT_EMPTY);
+ if (s->ls_obj_hash != NULL)
+ break;
+ }
+
+ if (s->ls_obj_hash == NULL) {
+ CERROR("failed to create lu_site hash with bits: %d\n", bits);
+ return -ENOMEM;
+ }
+
+ cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) {
+ bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, &bd);
+ INIT_LIST_HEAD(&bkt->lsb_lru);
+ init_waitqueue_head(&bkt->lsb_marche_funebre);
+ }
+
+ s->ls_stats = lprocfs_alloc_stats(LU_SS_LAST_STAT, 0);
+ if (s->ls_stats == NULL) {
+ cfs_hash_putref(s->ls_obj_hash);
+ s->ls_obj_hash = NULL;
+ return -ENOMEM;
+ }
+
+ lprocfs_counter_init(s->ls_stats, LU_SS_CREATED,
+ 0, "created", "created");
+ lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_HIT,
+ 0, "cache_hit", "cache_hit");
+ lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_MISS,
+ 0, "cache_miss", "cache_miss");
+ lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_RACE,
+ 0, "cache_race", "cache_race");
+ lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_DEATH_RACE,
+ 0, "cache_death_race", "cache_death_race");
+ lprocfs_counter_init(s->ls_stats, LU_SS_LRU_PURGED,
+ 0, "lru_purged", "lru_purged");
+
+ INIT_LIST_HEAD(&s->ls_linkage);
+ s->ls_top_dev = top;
+ top->ld_site = s;
+ lu_device_get(top);
+ lu_ref_add(&top->ld_reference, "site-top", s);
+
+ INIT_LIST_HEAD(&s->ls_ld_linkage);
+ spin_lock_init(&s->ls_ld_lock);
+
+ lu_dev_add_linkage(s, top);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(lu_site_init);
+
+/**
+ * Finalize \a s and release its resources.
+ */
+void lu_site_fini(struct lu_site *s)
+{
+ mutex_lock(&lu_sites_guard);
+ list_del_init(&s->ls_linkage);
+ mutex_unlock(&lu_sites_guard);
+
+ if (s->ls_obj_hash != NULL) {
+ cfs_hash_putref(s->ls_obj_hash);
+ s->ls_obj_hash = NULL;
+ }
+
+ if (s->ls_top_dev != NULL) {
+ s->ls_top_dev->ld_site = NULL;
+ lu_ref_del(&s->ls_top_dev->ld_reference, "site-top", s);
+ lu_device_put(s->ls_top_dev);
+ s->ls_top_dev = NULL;
+ }
+
+ if (s->ls_stats != NULL)
+ lprocfs_free_stats(&s->ls_stats);
+}
+EXPORT_SYMBOL(lu_site_fini);
+
+/**
+ * Called when initialization of stack for this site is completed.
+ */
+int lu_site_init_finish(struct lu_site *s)
+{
+ int result;
+ mutex_lock(&lu_sites_guard);
+ result = lu_context_refill(&lu_shrink_env.le_ctx);
+ if (result == 0)
+ list_add(&s->ls_linkage, &lu_sites);
+ mutex_unlock(&lu_sites_guard);
+ return result;
+}
+EXPORT_SYMBOL(lu_site_init_finish);
+
+/**
+ * Acquire additional reference on device \a d
+ */
+void lu_device_get(struct lu_device *d)
+{
+ atomic_inc(&d->ld_ref);
+}
+EXPORT_SYMBOL(lu_device_get);
+
+/**
+ * Release reference on device \a d.
+ */
+void lu_device_put(struct lu_device *d)
+{
+ LASSERT(atomic_read(&d->ld_ref) > 0);
+ atomic_dec(&d->ld_ref);
+}
+EXPORT_SYMBOL(lu_device_put);
+
+/**
+ * Initialize device \a d of type \a t.
+ */
+int lu_device_init(struct lu_device *d, struct lu_device_type *t)
+{
+ if (t->ldt_device_nr++ == 0 && t->ldt_ops->ldto_start != NULL)
+ t->ldt_ops->ldto_start(t);
+ memset(d, 0, sizeof *d);
+ atomic_set(&d->ld_ref, 0);
+ d->ld_type = t;
+ lu_ref_init(&d->ld_reference);
+ INIT_LIST_HEAD(&d->ld_linkage);
+ return 0;
+}
+EXPORT_SYMBOL(lu_device_init);
+
+/**
+ * Finalize device \a d.
+ */
+void lu_device_fini(struct lu_device *d)
+{
+ struct lu_device_type *t;
+
+ t = d->ld_type;
+ if (d->ld_obd != NULL) {
+ d->ld_obd->obd_lu_dev = NULL;
+ d->ld_obd = NULL;
+ }
+
+ lu_ref_fini(&d->ld_reference);
+ LASSERTF(atomic_read(&d->ld_ref) == 0,
+ "Refcount is %u\n", atomic_read(&d->ld_ref));
+ LASSERT(t->ldt_device_nr > 0);
+ if (--t->ldt_device_nr == 0 && t->ldt_ops->ldto_stop != NULL)
+ t->ldt_ops->ldto_stop(t);
+}
+EXPORT_SYMBOL(lu_device_fini);
+
+/**
+ * Initialize object \a o that is part of compound object \a h and was created
+ * by device \a d.
+ */
+int lu_object_init(struct lu_object *o,
+ struct lu_object_header *h, struct lu_device *d)
+{
+ memset(o, 0, sizeof *o);
+ o->lo_header = h;
+ o->lo_dev = d;
+ lu_device_get(d);
+ o->lo_dev_ref = lu_ref_add(&d->ld_reference, "lu_object", o);
+ INIT_LIST_HEAD(&o->lo_linkage);
+ return 0;
+}
+EXPORT_SYMBOL(lu_object_init);
+
+/**
+ * Finalize object and release its resources.
+ */
+void lu_object_fini(struct lu_object *o)
+{
+ struct lu_device *dev = o->lo_dev;
+
+ LASSERT(list_empty(&o->lo_linkage));
+
+ if (dev != NULL) {
+ lu_ref_del_at(&dev->ld_reference,
+ o->lo_dev_ref , "lu_object", o);
+ lu_device_put(dev);
+ o->lo_dev = NULL;
+ }
+}
+EXPORT_SYMBOL(lu_object_fini);
+
+/**
+ * Add object \a o as first layer of compound object \a h
+ *
+ * This is typically called by the ->ldo_object_alloc() method of top-level
+ * device.
+ */
+void lu_object_add_top(struct lu_object_header *h, struct lu_object *o)
+{
+ list_move(&o->lo_linkage, &h->loh_layers);
+}
+EXPORT_SYMBOL(lu_object_add_top);
+
+/**
+ * Add object \a o as a layer of compound object, going after \a before.
+ *
+ * This is typically called by the ->ldo_object_alloc() method of \a
+ * before->lo_dev.
+ */
+void lu_object_add(struct lu_object *before, struct lu_object *o)
+{
+ list_move(&o->lo_linkage, &before->lo_linkage);
+}
+EXPORT_SYMBOL(lu_object_add);
+
+/**
+ * Initialize compound object.
+ */
+int lu_object_header_init(struct lu_object_header *h)
+{
+ memset(h, 0, sizeof *h);
+ atomic_set(&h->loh_ref, 1);
+ INIT_HLIST_NODE(&h->loh_hash);
+ INIT_LIST_HEAD(&h->loh_lru);
+ INIT_LIST_HEAD(&h->loh_layers);
+ lu_ref_init(&h->loh_reference);
+ return 0;
+}
+EXPORT_SYMBOL(lu_object_header_init);
+
+/**
+ * Finalize compound object.
+ */
+void lu_object_header_fini(struct lu_object_header *h)
+{
+ LASSERT(list_empty(&h->loh_layers));
+ LASSERT(list_empty(&h->loh_lru));
+ LASSERT(hlist_unhashed(&h->loh_hash));
+ lu_ref_fini(&h->loh_reference);
+}
+EXPORT_SYMBOL(lu_object_header_fini);
+
+/**
+ * Given a compound object, find its slice, corresponding to the device type
+ * \a dtype.
+ */
+struct lu_object *lu_object_locate(struct lu_object_header *h,
+ const struct lu_device_type *dtype)
+{
+ struct lu_object *o;
+
+ list_for_each_entry(o, &h->loh_layers, lo_linkage) {
+ if (o->lo_dev->ld_type == dtype)
+ return o;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(lu_object_locate);
+
+
+
+/**
+ * Finalize and free devices in the device stack.
+ *
+ * Finalize device stack by purging object cache, and calling
+ * lu_device_type_operations::ldto_device_fini() and
+ * lu_device_type_operations::ldto_device_free() on all devices in the stack.
+ */
+void lu_stack_fini(const struct lu_env *env, struct lu_device *top)
+{
+ struct lu_site *site = top->ld_site;
+ struct lu_device *scan;
+ struct lu_device *next;
+
+ lu_site_purge(env, site, ~0);
+ for (scan = top; scan != NULL; scan = next) {
+ next = scan->ld_type->ldt_ops->ldto_device_fini(env, scan);
+ lu_ref_del(&scan->ld_reference, "lu-stack", &lu_site_init);
+ lu_device_put(scan);
+ }
+
+ /* purge again. */
+ lu_site_purge(env, site, ~0);
+
+ for (scan = top; scan != NULL; scan = next) {
+ const struct lu_device_type *ldt = scan->ld_type;
+ struct obd_type *type;
+
+ next = ldt->ldt_ops->ldto_device_free(env, scan);
+ type = ldt->ldt_obd_type;
+ if (type != NULL) {
+ type->typ_refcnt--;
+ class_put_type(type);
+ }
+ }
+}
+EXPORT_SYMBOL(lu_stack_fini);
+
+enum {
+ /**
+ * Maximal number of tld slots.
+ */
+ LU_CONTEXT_KEY_NR = 40
+};
+
+static struct lu_context_key *lu_keys[LU_CONTEXT_KEY_NR] = { NULL, };
+
+static DEFINE_SPINLOCK(lu_keys_guard);
+
+/**
+ * Global counter incremented whenever key is registered, unregistered,
+ * revived or quiesced. This is used to void unnecessary calls to
+ * lu_context_refill(). No locking is provided, as initialization and shutdown
+ * are supposed to be externally serialized.
+ */
+static unsigned key_set_version = 0;
+
+/**
+ * Register new key.
+ */
+int lu_context_key_register(struct lu_context_key *key)
+{
+ int result;
+ int i;
+
+ LASSERT(key->lct_init != NULL);
+ LASSERT(key->lct_fini != NULL);
+ LASSERT(key->lct_tags != 0);
+ LASSERT(key->lct_owner != NULL);
+
+ result = -ENFILE;
+ spin_lock(&lu_keys_guard);
+ for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
+ if (lu_keys[i] == NULL) {
+ key->lct_index = i;
+ atomic_set(&key->lct_used, 1);
+ lu_keys[i] = key;
+ lu_ref_init(&key->lct_reference);
+ result = 0;
+ ++key_set_version;
+ break;
+ }
+ }
+ spin_unlock(&lu_keys_guard);
+ return result;
+}
+EXPORT_SYMBOL(lu_context_key_register);
+
+static void key_fini(struct lu_context *ctx, int index)
+{
+ if (ctx->lc_value != NULL && ctx->lc_value[index] != NULL) {
+ struct lu_context_key *key;
+
+ key = lu_keys[index];
+ LASSERT(key != NULL);
+ LASSERT(key->lct_fini != NULL);
+ LASSERT(atomic_read(&key->lct_used) > 1);
+
+ key->lct_fini(ctx, key, ctx->lc_value[index]);
+ lu_ref_del(&key->lct_reference, "ctx", ctx);
+ atomic_dec(&key->lct_used);
+
+ LASSERT(key->lct_owner != NULL);
+ if ((ctx->lc_tags & LCT_NOREF) == 0) {
+#ifdef CONFIG_MODULE_UNLOAD
+ LINVRNT(module_refcount(key->lct_owner) > 0);
+#endif
+ module_put(key->lct_owner);
+ }
+ ctx->lc_value[index] = NULL;
+ }
+}
+
+/**
+ * Deregister key.
+ */
+void lu_context_key_degister(struct lu_context_key *key)
+{
+ LASSERT(atomic_read(&key->lct_used) >= 1);
+ LINVRNT(0 <= key->lct_index && key->lct_index < ARRAY_SIZE(lu_keys));
+
+ lu_context_key_quiesce(key);
+
+ ++key_set_version;
+ spin_lock(&lu_keys_guard);
+ key_fini(&lu_shrink_env.le_ctx, key->lct_index);
+ if (lu_keys[key->lct_index]) {
+ lu_keys[key->lct_index] = NULL;
+ lu_ref_fini(&key->lct_reference);
+ }
+ spin_unlock(&lu_keys_guard);
+
+ LASSERTF(atomic_read(&key->lct_used) == 1,
+ "key has instances: %d\n",
+ atomic_read(&key->lct_used));
+}
+EXPORT_SYMBOL(lu_context_key_degister);
+
+/**
+ * Register a number of keys. This has to be called after all keys have been
+ * initialized by a call to LU_CONTEXT_KEY_INIT().
+ */
+int lu_context_key_register_many(struct lu_context_key *k, ...)
+{
+ struct lu_context_key *key = k;
+ va_list args;
+ int result;
+
+ va_start(args, k);
+ do {
+ result = lu_context_key_register(key);
+ if (result)
+ break;
+ key = va_arg(args, struct lu_context_key *);
+ } while (key != NULL);
+ va_end(args);
+
+ if (result != 0) {
+ va_start(args, k);
+ while (k != key) {
+ lu_context_key_degister(k);
+ k = va_arg(args, struct lu_context_key *);
+ }
+ va_end(args);
+ }
+
+ return result;
+}
+EXPORT_SYMBOL(lu_context_key_register_many);
+
+/**
+ * De-register a number of keys. This is a dual to
+ * lu_context_key_register_many().
+ */
+void lu_context_key_degister_many(struct lu_context_key *k, ...)
+{
+ va_list args;
+
+ va_start(args, k);
+ do {
+ lu_context_key_degister(k);
+ k = va_arg(args, struct lu_context_key*);
+ } while (k != NULL);
+ va_end(args);
+}
+EXPORT_SYMBOL(lu_context_key_degister_many);
+
+/**
+ * Revive a number of keys.
+ */
+void lu_context_key_revive_many(struct lu_context_key *k, ...)
+{
+ va_list args;
+
+ va_start(args, k);
+ do {
+ lu_context_key_revive(k);
+ k = va_arg(args, struct lu_context_key*);
+ } while (k != NULL);
+ va_end(args);
+}
+EXPORT_SYMBOL(lu_context_key_revive_many);
+
+/**
+ * Quiescent a number of keys.
+ */
+void lu_context_key_quiesce_many(struct lu_context_key *k, ...)
+{
+ va_list args;
+
+ va_start(args, k);
+ do {
+ lu_context_key_quiesce(k);
+ k = va_arg(args, struct lu_context_key*);
+ } while (k != NULL);
+ va_end(args);
+}
+EXPORT_SYMBOL(lu_context_key_quiesce_many);
+
+/**
+ * Return value associated with key \a key in context \a ctx.
+ */
+void *lu_context_key_get(const struct lu_context *ctx,
+ const struct lu_context_key *key)
+{
+ LINVRNT(ctx->lc_state == LCS_ENTERED);
+ LINVRNT(0 <= key->lct_index && key->lct_index < ARRAY_SIZE(lu_keys));
+ LASSERT(lu_keys[key->lct_index] == key);
+ return ctx->lc_value[key->lct_index];
+}
+EXPORT_SYMBOL(lu_context_key_get);
+
+/**
+ * List of remembered contexts. XXX document me.
+ */
+static LIST_HEAD(lu_context_remembered);
+
+/**
+ * Destroy \a key in all remembered contexts. This is used to destroy key
+ * values in "shared" contexts (like service threads), when a module owning
+ * the key is about to be unloaded.
+ */
+void lu_context_key_quiesce(struct lu_context_key *key)
+{
+ struct lu_context *ctx;
+
+ if (!(key->lct_tags & LCT_QUIESCENT)) {
+ /*
+ * XXX layering violation.
+ */
+ key->lct_tags |= LCT_QUIESCENT;
+ /*
+ * XXX memory barrier has to go here.
+ */
+ spin_lock(&lu_keys_guard);
+ list_for_each_entry(ctx, &lu_context_remembered,
+ lc_remember)
+ key_fini(ctx, key->lct_index);
+ spin_unlock(&lu_keys_guard);
+ ++key_set_version;
+ }
+}
+EXPORT_SYMBOL(lu_context_key_quiesce);
+
+void lu_context_key_revive(struct lu_context_key *key)
+{
+ key->lct_tags &= ~LCT_QUIESCENT;
+ ++key_set_version;
+}
+EXPORT_SYMBOL(lu_context_key_revive);
+
+static void keys_fini(struct lu_context *ctx)
+{
+ int i;
+
+ if (ctx->lc_value == NULL)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(lu_keys); ++i)
+ key_fini(ctx, i);
+
+ OBD_FREE(ctx->lc_value, ARRAY_SIZE(lu_keys) * sizeof ctx->lc_value[0]);
+ ctx->lc_value = NULL;
+}
+
+static int keys_fill(struct lu_context *ctx)
+{
+ int i;
+
+ LINVRNT(ctx->lc_value != NULL);
+ for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
+ struct lu_context_key *key;
+
+ key = lu_keys[i];
+ if (ctx->lc_value[i] == NULL && key != NULL &&
+ (key->lct_tags & ctx->lc_tags) &&
+ /*
+ * Don't create values for a LCT_QUIESCENT key, as this
+ * will pin module owning a key.
+ */
+ !(key->lct_tags & LCT_QUIESCENT)) {
+ void *value;
+
+ LINVRNT(key->lct_init != NULL);
+ LINVRNT(key->lct_index == i);
+
+ value = key->lct_init(ctx, key);
+ if (unlikely(IS_ERR(value)))
+ return PTR_ERR(value);
+
+ LASSERT(key->lct_owner != NULL);
+ if (!(ctx->lc_tags & LCT_NOREF))
+ try_module_get(key->lct_owner);
+ lu_ref_add_atomic(&key->lct_reference, "ctx", ctx);
+ atomic_inc(&key->lct_used);
+ /*
+ * This is the only place in the code, where an
+ * element of ctx->lc_value[] array is set to non-NULL
+ * value.
+ */
+ ctx->lc_value[i] = value;
+ if (key->lct_exit != NULL)
+ ctx->lc_tags |= LCT_HAS_EXIT;
+ }
+ ctx->lc_version = key_set_version;
+ }
+ return 0;
+}
+
+static int keys_init(struct lu_context *ctx)
+{
+ OBD_ALLOC(ctx->lc_value, ARRAY_SIZE(lu_keys) * sizeof ctx->lc_value[0]);
+ if (likely(ctx->lc_value != NULL))
+ return keys_fill(ctx);
+
+ return -ENOMEM;
+}
+
+/**
+ * Initialize context data-structure. Create values for all keys.
+ */
+int lu_context_init(struct lu_context *ctx, __u32 tags)
+{
+ int rc;
+
+ memset(ctx, 0, sizeof *ctx);
+ ctx->lc_state = LCS_INITIALIZED;
+ ctx->lc_tags = tags;
+ if (tags & LCT_REMEMBER) {
+ spin_lock(&lu_keys_guard);
+ list_add(&ctx->lc_remember, &lu_context_remembered);
+ spin_unlock(&lu_keys_guard);
+ } else {
+ INIT_LIST_HEAD(&ctx->lc_remember);
+ }
+
+ rc = keys_init(ctx);
+ if (rc != 0)
+ lu_context_fini(ctx);
+
+ return rc;
+}
+EXPORT_SYMBOL(lu_context_init);
+
+/**
+ * Finalize context data-structure. Destroy key values.
+ */
+void lu_context_fini(struct lu_context *ctx)
+{
+ LINVRNT(ctx->lc_state == LCS_INITIALIZED || ctx->lc_state == LCS_LEFT);
+ ctx->lc_state = LCS_FINALIZED;
+
+ if ((ctx->lc_tags & LCT_REMEMBER) == 0) {
+ LASSERT(list_empty(&ctx->lc_remember));
+ keys_fini(ctx);
+
+ } else { /* could race with key degister */
+ spin_lock(&lu_keys_guard);
+ keys_fini(ctx);
+ list_del_init(&ctx->lc_remember);
+ spin_unlock(&lu_keys_guard);
+ }
+}
+EXPORT_SYMBOL(lu_context_fini);
+
+/**
+ * Called before entering context.
+ */
+void lu_context_enter(struct lu_context *ctx)
+{
+ LINVRNT(ctx->lc_state == LCS_INITIALIZED || ctx->lc_state == LCS_LEFT);
+ ctx->lc_state = LCS_ENTERED;
+}
+EXPORT_SYMBOL(lu_context_enter);
+
+/**
+ * Called after exiting from \a ctx
+ */
+void lu_context_exit(struct lu_context *ctx)
+{
+ int i;
+
+ LINVRNT(ctx->lc_state == LCS_ENTERED);
+ ctx->lc_state = LCS_LEFT;
+ if (ctx->lc_tags & LCT_HAS_EXIT && ctx->lc_value != NULL) {
+ for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
+ if (ctx->lc_value[i] != NULL) {
+ struct lu_context_key *key;
+
+ key = lu_keys[i];
+ LASSERT(key != NULL);
+ if (key->lct_exit != NULL)
+ key->lct_exit(ctx,
+ key, ctx->lc_value[i]);
+ }
+ }
+ }
+}
+EXPORT_SYMBOL(lu_context_exit);
+
+/**
+ * Allocate for context all missing keys that were registered after context
+ * creation. key_set_version is only changed in rare cases when modules
+ * are loaded and removed.
+ */
+int lu_context_refill(struct lu_context *ctx)
+{
+ return likely(ctx->lc_version == key_set_version) ? 0 : keys_fill(ctx);
+}
+EXPORT_SYMBOL(lu_context_refill);
+
+/**
+ * lu_ctx_tags/lu_ses_tags will be updated if there are new types of
+ * obd being added. Currently, this is only used on client side, specifically
+ * for echo device client, for other stack (like ptlrpc threads), context are
+ * predefined when the lu_device type are registered, during the module probe
+ * phase.
+ */
+__u32 lu_context_tags_default = 0;
+__u32 lu_session_tags_default = 0;
+
+void lu_context_tags_update(__u32 tags)
+{
+ spin_lock(&lu_keys_guard);
+ lu_context_tags_default |= tags;
+ key_set_version++;
+ spin_unlock(&lu_keys_guard);
+}
+EXPORT_SYMBOL(lu_context_tags_update);
+
+void lu_context_tags_clear(__u32 tags)
+{
+ spin_lock(&lu_keys_guard);
+ lu_context_tags_default &= ~tags;
+ key_set_version++;
+ spin_unlock(&lu_keys_guard);
+}
+EXPORT_SYMBOL(lu_context_tags_clear);
+
+void lu_session_tags_update(__u32 tags)
+{
+ spin_lock(&lu_keys_guard);
+ lu_session_tags_default |= tags;
+ key_set_version++;
+ spin_unlock(&lu_keys_guard);
+}
+EXPORT_SYMBOL(lu_session_tags_update);
+
+void lu_session_tags_clear(__u32 tags)
+{
+ spin_lock(&lu_keys_guard);
+ lu_session_tags_default &= ~tags;
+ key_set_version++;
+ spin_unlock(&lu_keys_guard);
+}
+EXPORT_SYMBOL(lu_session_tags_clear);
+
+int lu_env_init(struct lu_env *env, __u32 tags)
+{
+ int result;
+
+ env->le_ses = NULL;
+ result = lu_context_init(&env->le_ctx, tags);
+ if (likely(result == 0))
+ lu_context_enter(&env->le_ctx);
+ return result;
+}
+EXPORT_SYMBOL(lu_env_init);
+
+void lu_env_fini(struct lu_env *env)
+{
+ lu_context_exit(&env->le_ctx);
+ lu_context_fini(&env->le_ctx);
+ env->le_ses = NULL;
+}
+EXPORT_SYMBOL(lu_env_fini);
+
+int lu_env_refill(struct lu_env *env)
+{
+ int result;
+
+ result = lu_context_refill(&env->le_ctx);
+ if (result == 0 && env->le_ses != NULL)
+ result = lu_context_refill(env->le_ses);
+ return result;
+}
+EXPORT_SYMBOL(lu_env_refill);
+
+/**
+ * Currently, this API will only be used by echo client.
+ * Because echo client and normal lustre client will share
+ * same cl_env cache. So echo client needs to refresh
+ * the env context after it get one from the cache, especially
+ * when normal client and echo client co-exist in the same client.
+ */
+int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags,
+ __u32 stags)
+{
+ int result;
+
+ if ((env->le_ctx.lc_tags & ctags) != ctags) {
+ env->le_ctx.lc_version = 0;
+ env->le_ctx.lc_tags |= ctags;
+ }
+
+ if (env->le_ses && (env->le_ses->lc_tags & stags) != stags) {
+ env->le_ses->lc_version = 0;
+ env->le_ses->lc_tags |= stags;
+ }
+
+ result = lu_env_refill(env);
+
+ return result;
+}
+EXPORT_SYMBOL(lu_env_refill_by_tags);
+
+static struct shrinker *lu_site_shrinker = NULL;
+
+typedef struct lu_site_stats{
+ unsigned lss_populated;
+ unsigned lss_max_search;
+ unsigned lss_total;
+ unsigned lss_busy;
+} lu_site_stats_t;
+
+static void lu_site_stats_get(cfs_hash_t *hs,
+ lu_site_stats_t *stats, int populated)
+{
+ cfs_hash_bd_t bd;
+ int i;
+
+ cfs_hash_for_each_bucket(hs, &bd, i) {
+ struct lu_site_bkt_data *bkt = cfs_hash_bd_extra_get(hs, &bd);
+ struct hlist_head *hhead;
+
+ cfs_hash_bd_lock(hs, &bd, 1);
+ stats->lss_busy += bkt->lsb_busy;
+ stats->lss_total += cfs_hash_bd_count_get(&bd);
+ stats->lss_max_search = max((int)stats->lss_max_search,
+ cfs_hash_bd_depmax_get(&bd));
+ if (!populated) {
+ cfs_hash_bd_unlock(hs, &bd, 1);
+ continue;
+ }
+
+ cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
+ if (!hlist_empty(hhead))
+ stats->lss_populated++;
+ }
+ cfs_hash_bd_unlock(hs, &bd, 1);
+ }
+}
+
+
+/*
+ * There exists a potential lock inversion deadlock scenario when using
+ * Lustre on top of ZFS. This occurs between one of ZFS's
+ * buf_hash_table.ht_lock's, and Lustre's lu_sites_guard lock. Essentially,
+ * thread A will take the lu_sites_guard lock and sleep on the ht_lock,
+ * while thread B will take the ht_lock and sleep on the lu_sites_guard
+ * lock. Obviously neither thread will wake and drop their respective hold
+ * on their lock.
+ *
+ * To prevent this from happening we must ensure the lu_sites_guard lock is
+ * not taken while down this code path. ZFS reliably does not set the
+ * __GFP_FS bit in its code paths, so this can be used to determine if it
+ * is safe to take the lu_sites_guard lock.
+ *
+ * Ideally we should accurately return the remaining number of cached
+ * objects without taking the lu_sites_guard lock, but this is not
+ * possible in the current implementation.
+ */
+static int lu_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+{
+ lu_site_stats_t stats;
+ struct lu_site *s;
+ struct lu_site *tmp;
+ int cached = 0;
+ int remain = shrink_param(sc, nr_to_scan);
+ LIST_HEAD(splice);
+
+ if (!(shrink_param(sc, gfp_mask) & __GFP_FS)) {
+ if (remain != 0)
+ return -1;
+ else
+ /* We must not take the lu_sites_guard lock when
+ * __GFP_FS is *not* set because of the deadlock
+ * possibility detailed above. Additionally,
+ * since we cannot determine the number of
+ * objects in the cache without taking this
+ * lock, we're in a particularly tough spot. As
+ * a result, we'll just lie and say our cache is
+ * empty. This _should_ be ok, as we can't
+ * reclaim objects when __GFP_FS is *not* set
+ * anyways.
+ */
+ return 0;
+ }
+
+ CDEBUG(D_INODE, "Shrink %d objects\n", remain);
+
+ mutex_lock(&lu_sites_guard);
+ list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
+ if (shrink_param(sc, nr_to_scan) != 0) {
+ remain = lu_site_purge(&lu_shrink_env, s, remain);
+ /*
+ * Move just shrunk site to the tail of site list to
+ * assure shrinking fairness.
+ */
+ list_move_tail(&s->ls_linkage, &splice);
+ }
+
+ memset(&stats, 0, sizeof(stats));
+ lu_site_stats_get(s->ls_obj_hash, &stats, 0);
+ cached += stats.lss_total - stats.lss_busy;
+ if (shrink_param(sc, nr_to_scan) && remain <= 0)
+ break;
+ }
+ list_splice(&splice, lu_sites.prev);
+ mutex_unlock(&lu_sites_guard);
+
+ cached = (cached / 100) * sysctl_vfs_cache_pressure;
+ if (shrink_param(sc, nr_to_scan) == 0)
+ CDEBUG(D_INODE, "%d objects cached\n", cached);
+ return cached;
+}
+
+/*
+ * Debugging stuff.
+ */
+
+/**
+ * Environment to be used in debugger, contains all tags.
+ */
+struct lu_env lu_debugging_env;
+
+/**
+ * Debugging printer function using printk().
+ */
+int lu_printk_printer(const struct lu_env *env,
+ void *unused, const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+ vprintk(format, args);
+ va_end(args);
+ return 0;
+}
+
+/**
+ * Initialization of global lu_* data.
+ */
+int lu_global_init(void)
+{
+ int result;
+
+ CDEBUG(D_INFO, "Lustre LU module (%p).\n", &lu_keys);
+
+ result = lu_ref_global_init();
+ if (result != 0)
+ return result;
+
+ LU_CONTEXT_KEY_INIT(&lu_global_key);
+ result = lu_context_key_register(&lu_global_key);
+ if (result != 0)
+ return result;
+
+ /*
+ * At this level, we don't know what tags are needed, so allocate them
+ * conservatively. This should not be too bad, because this
+ * environment is global.
+ */
+ mutex_lock(&lu_sites_guard);
+ result = lu_env_init(&lu_shrink_env, LCT_SHRINKER);
+ mutex_unlock(&lu_sites_guard);
+ if (result != 0)
+ return result;
+
+ /*
+ * seeks estimation: 3 seeks to read a record from oi, one to read
+ * inode, one for ea. Unfortunately setting this high value results in
+ * lu_object/inode cache consuming all the memory.
+ */
+ lu_site_shrinker = set_shrinker(DEFAULT_SEEKS, lu_cache_shrink);
+ if (lu_site_shrinker == NULL)
+ return -ENOMEM;
+
+ return result;
+}
+
+/**
+ * Dual to lu_global_init().
+ */
+void lu_global_fini(void)
+{
+ if (lu_site_shrinker != NULL) {
+ remove_shrinker(lu_site_shrinker);
+ lu_site_shrinker = NULL;
+ }
+
+ lu_context_key_degister(&lu_global_key);
+
+ /*
+ * Tear shrinker environment down _after_ de-registering
+ * lu_global_key, because the latter has a value in the former.
+ */
+ mutex_lock(&lu_sites_guard);
+ lu_env_fini(&lu_shrink_env);
+ mutex_unlock(&lu_sites_guard);
+
+ lu_ref_global_fini();
+}
+
+static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx)
+{
+#ifdef LPROCFS
+ struct lprocfs_counter ret;
+
+ lprocfs_stats_collect(stats, idx, &ret);
+ return (__u32)ret.lc_count;
+#else
+ return 0;
+#endif
+}
+
+/**
+ * Output site statistical counters into a buffer. Suitable for
+ * lprocfs_rd_*()-style functions.
+ */
+int lu_site_stats_print(const struct lu_site *s, struct seq_file *m)
+{
+ lu_site_stats_t stats;
+
+ memset(&stats, 0, sizeof(stats));
+ lu_site_stats_get(s->ls_obj_hash, &stats, 1);
+
+ return seq_printf(m, "%d/%d %d/%d %d %d %d %d %d %d %d\n",
+ stats.lss_busy,
+ stats.lss_total,
+ stats.lss_populated,
+ CFS_HASH_NHLIST(s->ls_obj_hash),
+ stats.lss_max_search,
+ ls_stats_read(s->ls_stats, LU_SS_CREATED),
+ ls_stats_read(s->ls_stats, LU_SS_CACHE_HIT),
+ ls_stats_read(s->ls_stats, LU_SS_CACHE_MISS),
+ ls_stats_read(s->ls_stats, LU_SS_CACHE_RACE),
+ ls_stats_read(s->ls_stats, LU_SS_CACHE_DEATH_RACE),
+ ls_stats_read(s->ls_stats, LU_SS_LRU_PURGED));
+}
+EXPORT_SYMBOL(lu_site_stats_print);
+
+/**
+ * Helper function to initialize a number of kmem slab caches at once.
+ */
+int lu_kmem_init(struct lu_kmem_descr *caches)
+{
+ int result;
+ struct lu_kmem_descr *iter = caches;
+
+ for (result = 0; iter->ckd_cache != NULL; ++iter) {
+ *iter->ckd_cache = kmem_cache_create(iter->ckd_name,
+ iter->ckd_size,
+ 0, 0, NULL);
+ if (*iter->ckd_cache == NULL) {
+ result = -ENOMEM;
+ /* free all previously allocated caches */
+ lu_kmem_fini(caches);
+ break;
+ }
+ }
+ return result;
+}
+EXPORT_SYMBOL(lu_kmem_init);
+
+/**
+ * Helper function to finalize a number of kmem slab cached at once. Dual to
+ * lu_kmem_init().
+ */
+void lu_kmem_fini(struct lu_kmem_descr *caches)
+{
+ for (; caches->ckd_cache != NULL; ++caches) {
+ if (*caches->ckd_cache != NULL) {
+ kmem_cache_destroy(*caches->ckd_cache);
+ *caches->ckd_cache = NULL;
+ }
+ }
+}
+EXPORT_SYMBOL(lu_kmem_fini);
+
+/**
+ * Temporary solution to be able to assign fid in ->do_create()
+ * till we have fully-functional OST fids
+ */
+void lu_object_assign_fid(const struct lu_env *env, struct lu_object *o,
+ const struct lu_fid *fid)
+{
+ struct lu_site *s = o->lo_dev->ld_site;
+ struct lu_fid *old = &o->lo_header->loh_fid;
+ struct lu_site_bkt_data *bkt;
+ struct lu_object *shadow;
+ wait_queue_t waiter;
+ cfs_hash_t *hs;
+ cfs_hash_bd_t bd;
+ __u64 version = 0;
+
+ LASSERT(fid_is_zero(old));
+
+ hs = s->ls_obj_hash;
+ cfs_hash_bd_get_and_lock(hs, (void *)fid, &bd, 1);
+ shadow = htable_lookup(s, &bd, fid, &waiter, &version);
+ /* supposed to be unique */
+ LASSERT(shadow == NULL);
+ *old = *fid;
+ bkt = cfs_hash_bd_extra_get(hs, &bd);
+ cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
+ bkt->lsb_busy++;
+ cfs_hash_bd_unlock(hs, &bd, 1);
+}
+EXPORT_SYMBOL(lu_object_assign_fid);
+
+/**
+ * allocates object with 0 (non-assiged) fid
+ * XXX: temporary solution to be able to assign fid in ->do_create()
+ * till we have fully-functional OST fids
+ */
+struct lu_object *lu_object_anon(const struct lu_env *env,
+ struct lu_device *dev,
+ const struct lu_object_conf *conf)
+{
+ struct lu_fid fid;
+ struct lu_object *o;
+
+ fid_zero(&fid);
+ o = lu_object_alloc(env, dev, &fid, conf);
+
+ return o;
+}
+EXPORT_SYMBOL(lu_object_anon);
+
+struct lu_buf LU_BUF_NULL = {
+ .lb_buf = NULL,
+ .lb_len = 0
+};
+EXPORT_SYMBOL(LU_BUF_NULL);
+
+void lu_buf_free(struct lu_buf *buf)
+{
+ LASSERT(buf);
+ if (buf->lb_buf) {
+ LASSERT(buf->lb_len > 0);
+ OBD_FREE_LARGE(buf->lb_buf, buf->lb_len);
+ buf->lb_buf = NULL;
+ buf->lb_len = 0;
+ }
+}
+EXPORT_SYMBOL(lu_buf_free);
+
+void lu_buf_alloc(struct lu_buf *buf, int size)
+{
+ LASSERT(buf);
+ LASSERT(buf->lb_buf == NULL);
+ LASSERT(buf->lb_len == 0);
+ OBD_ALLOC_LARGE(buf->lb_buf, size);
+ if (likely(buf->lb_buf))
+ buf->lb_len = size;
+}
+EXPORT_SYMBOL(lu_buf_alloc);
+
+void lu_buf_realloc(struct lu_buf *buf, int size)
+{
+ lu_buf_free(buf);
+ lu_buf_alloc(buf, size);
+}
+EXPORT_SYMBOL(lu_buf_realloc);
+
+struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, int len)
+{
+ if (buf->lb_buf == NULL && buf->lb_len == 0)
+ lu_buf_alloc(buf, len);
+
+ if ((len > buf->lb_len) && (buf->lb_buf != NULL))
+ lu_buf_realloc(buf, len);
+
+ return buf;
+}
+EXPORT_SYMBOL(lu_buf_check_and_alloc);
+
+/**
+ * Increase the size of the \a buf.
+ * preserves old data in buffer
+ * old buffer remains unchanged on error
+ * \retval 0 or -ENOMEM
+ */
+int lu_buf_check_and_grow(struct lu_buf *buf, int len)
+{
+ char *ptr;
+
+ if (len <= buf->lb_len)
+ return 0;
+
+ OBD_ALLOC_LARGE(ptr, len);
+ if (ptr == NULL)
+ return -ENOMEM;
+
+ /* Free the old buf */
+ if (buf->lb_buf != NULL) {
+ memcpy(ptr, buf->lb_buf, buf->lb_len);
+ OBD_FREE_LARGE(buf->lb_buf, buf->lb_len);
+ }
+
+ buf->lb_buf = ptr;
+ buf->lb_len = len;
+ return 0;
+}
+EXPORT_SYMBOL(lu_buf_check_and_grow);
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_ref.c b/drivers/staging/lustre/lustre/obdclass/lu_ref.c
new file mode 100644
index 000000000000..23a76f158356
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/lu_ref.c
@@ -0,0 +1,50 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/lu_ref.c
+ *
+ * Lustre reference.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+# include <linux/libcfs/libcfs.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lu_ref.h>
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_ucred.c b/drivers/staging/lustre/lustre/obdclass/lu_ucred.c
new file mode 100644
index 000000000000..229db6c39b78
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/lu_ucred.c
@@ -0,0 +1,107 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/lu_object.c
+ *
+ * Lustre Object.
+ * These are the only exported functions, they provide some generic
+ * infrastructure for managing object devices
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/libcfs/libcfs.h>
+#include <obd_support.h>
+#include <lu_object.h>
+#include <md_object.h>
+
+/* context key constructor/destructor: lu_ucred_key_init, lu_ucred_key_fini */
+LU_KEY_INIT_FINI(lu_ucred, struct lu_ucred);
+
+static struct lu_context_key lu_ucred_key = {
+ .lct_tags = LCT_SESSION,
+ .lct_init = lu_ucred_key_init,
+ .lct_fini = lu_ucred_key_fini
+};
+
+/**
+ * Get ucred key if session exists and ucred key is allocated on it.
+ * Return NULL otherwise.
+ */
+struct lu_ucred *lu_ucred(const struct lu_env *env)
+{
+ if (!env->le_ses)
+ return NULL;
+ return lu_context_key_get(env->le_ses, &lu_ucred_key);
+}
+EXPORT_SYMBOL(lu_ucred);
+
+/**
+ * Get ucred key and check if it is properly initialized.
+ * Return NULL otherwise.
+ */
+struct lu_ucred *lu_ucred_check(const struct lu_env *env)
+{
+ struct lu_ucred *uc = lu_ucred(env);
+ if (uc && uc->uc_valid != UCRED_OLD && uc->uc_valid != UCRED_NEW)
+ return NULL;
+ return uc;
+}
+EXPORT_SYMBOL(lu_ucred_check);
+
+/**
+ * Get ucred key, which must exist and must be properly initialized.
+ * Assert otherwise.
+ */
+struct lu_ucred *lu_ucred_assert(const struct lu_env *env)
+{
+ struct lu_ucred *uc = lu_ucred_check(env);
+ LASSERT(uc != NULL);
+ return uc;
+}
+EXPORT_SYMBOL(lu_ucred_assert);
+
+int lu_ucred_global_init(void)
+{
+ LU_CONTEXT_KEY_INIT(&lu_ucred_key);
+ return lu_context_key_register(&lu_ucred_key);
+}
+
+void lu_ucred_global_fini(void)
+{
+ lu_context_key_degister(&lu_ucred_key);
+}
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
new file mode 100644
index 000000000000..69d6499ef731
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
@@ -0,0 +1,263 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/lustre_handles.c
+ *
+ * Author: Phil Schwan <phil@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <obd_support.h>
+#include <lustre_handles.h>
+#include <lustre_lib.h>
+
+
+static __u64 handle_base;
+#define HANDLE_INCR 7
+static spinlock_t handle_base_lock;
+
+static struct handle_bucket {
+ spinlock_t lock;
+ struct list_head head;
+} *handle_hash;
+
+#define HANDLE_HASH_SIZE (1 << 16)
+#define HANDLE_HASH_MASK (HANDLE_HASH_SIZE - 1)
+
+/*
+ * Generate a unique 64bit cookie (hash) for a handle and insert it into
+ * global (per-node) hash-table.
+ */
+void class_handle_hash(struct portals_handle *h,
+ struct portals_handle_ops *ops)
+{
+ struct handle_bucket *bucket;
+ ENTRY;
+
+ LASSERT(h != NULL);
+ LASSERT(list_empty(&h->h_link));
+
+ /*
+ * This is fast, but simplistic cookie generation algorithm, it will
+ * need a re-do at some point in the future for security.
+ */
+ spin_lock(&handle_base_lock);
+ handle_base += HANDLE_INCR;
+
+ if (unlikely(handle_base == 0)) {
+ /*
+ * Cookie of zero is "dangerous", because in many places it's
+ * assumed that 0 means "unassigned" handle, not bound to any
+ * object.
+ */
+ CWARN("The universe has been exhausted: cookie wrap-around.\n");
+ handle_base += HANDLE_INCR;
+ }
+ h->h_cookie = handle_base;
+ spin_unlock(&handle_base_lock);
+
+ h->h_ops = ops;
+ spin_lock_init(&h->h_lock);
+
+ bucket = &handle_hash[h->h_cookie & HANDLE_HASH_MASK];
+ spin_lock(&bucket->lock);
+ list_add_rcu(&h->h_link, &bucket->head);
+ h->h_in = 1;
+ spin_unlock(&bucket->lock);
+
+ CDEBUG(D_INFO, "added object %p with handle "LPX64" to hash\n",
+ h, h->h_cookie);
+ EXIT;
+}
+EXPORT_SYMBOL(class_handle_hash);
+
+static void class_handle_unhash_nolock(struct portals_handle *h)
+{
+ if (list_empty(&h->h_link)) {
+ CERROR("removing an already-removed handle ("LPX64")\n",
+ h->h_cookie);
+ return;
+ }
+
+ CDEBUG(D_INFO, "removing object %p with handle "LPX64" from hash\n",
+ h, h->h_cookie);
+
+ spin_lock(&h->h_lock);
+ if (h->h_in == 0) {
+ spin_unlock(&h->h_lock);
+ return;
+ }
+ h->h_in = 0;
+ spin_unlock(&h->h_lock);
+ list_del_rcu(&h->h_link);
+}
+
+void class_handle_unhash(struct portals_handle *h)
+{
+ struct handle_bucket *bucket;
+ bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK);
+
+ spin_lock(&bucket->lock);
+ class_handle_unhash_nolock(h);
+ spin_unlock(&bucket->lock);
+}
+EXPORT_SYMBOL(class_handle_unhash);
+
+void class_handle_hash_back(struct portals_handle *h)
+{
+ struct handle_bucket *bucket;
+ ENTRY;
+
+ bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK);
+
+ spin_lock(&bucket->lock);
+ list_add_rcu(&h->h_link, &bucket->head);
+ h->h_in = 1;
+ spin_unlock(&bucket->lock);
+
+ EXIT;
+}
+EXPORT_SYMBOL(class_handle_hash_back);
+
+void *class_handle2object(__u64 cookie)
+{
+ struct handle_bucket *bucket;
+ struct portals_handle *h;
+ void *retval = NULL;
+ ENTRY;
+
+ LASSERT(handle_hash != NULL);
+
+ /* Be careful when you want to change this code. See the
+ * rcu_read_lock() definition on top this file. - jxiong */
+ bucket = handle_hash + (cookie & HANDLE_HASH_MASK);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(h, &bucket->head, h_link) {
+ if (h->h_cookie != cookie)
+ continue;
+
+ spin_lock(&h->h_lock);
+ if (likely(h->h_in != 0)) {
+ h->h_ops->hop_addref(h);
+ retval = h;
+ }
+ spin_unlock(&h->h_lock);
+ break;
+ }
+ rcu_read_unlock();
+
+ RETURN(retval);
+}
+EXPORT_SYMBOL(class_handle2object);
+
+void class_handle_free_cb(cfs_rcu_head_t *rcu)
+{
+ struct portals_handle *h = RCU2HANDLE(rcu);
+ void *ptr = (void *)(unsigned long)h->h_cookie;
+
+ if (h->h_ops->hop_free != NULL)
+ h->h_ops->hop_free(ptr, h->h_size);
+ else
+ OBD_FREE(ptr, h->h_size);
+}
+EXPORT_SYMBOL(class_handle_free_cb);
+
+int class_handle_init(void)
+{
+ struct handle_bucket *bucket;
+ struct timeval tv;
+ int seed[2];
+
+ LASSERT(handle_hash == NULL);
+
+ OBD_ALLOC_LARGE(handle_hash, sizeof(*bucket) * HANDLE_HASH_SIZE);
+ if (handle_hash == NULL)
+ return -ENOMEM;
+
+ spin_lock_init(&handle_base_lock);
+ for (bucket = handle_hash + HANDLE_HASH_SIZE - 1; bucket >= handle_hash;
+ bucket--) {
+ INIT_LIST_HEAD(&bucket->head);
+ spin_lock_init(&bucket->lock);
+ }
+
+ /** bug 21430: add randomness to the initial base */
+ cfs_get_random_bytes(seed, sizeof(seed));
+ do_gettimeofday(&tv);
+ cfs_srand(tv.tv_sec ^ seed[0], tv.tv_usec ^ seed[1]);
+
+ cfs_get_random_bytes(&handle_base, sizeof(handle_base));
+ LASSERT(handle_base != 0ULL);
+
+ return 0;
+}
+
+static int cleanup_all_handles(void)
+{
+ int rc;
+ int i;
+
+ for (rc = i = 0; i < HANDLE_HASH_SIZE; i++) {
+ struct portals_handle *h;
+
+ spin_lock(&handle_hash[i].lock);
+ list_for_each_entry_rcu(h, &(handle_hash[i].head), h_link) {
+ CERROR("force clean handle "LPX64" addr %p ops %p\n",
+ h->h_cookie, h, h->h_ops);
+
+ class_handle_unhash_nolock(h);
+ rc++;
+ }
+ spin_unlock(&handle_hash[i].lock);
+ }
+
+ return rc;
+}
+
+void class_handle_cleanup(void)
+{
+ int count;
+ LASSERT(handle_hash != NULL);
+
+ count = cleanup_all_handles();
+
+ OBD_FREE_LARGE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
+ handle_hash = NULL;
+
+ if (count != 0)
+ CERROR("handle_count at cleanup: %d\n", count);
+}
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
new file mode 100644
index 000000000000..2fa2589dc8eb
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
@@ -0,0 +1,218 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include <obd.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_lib.h>
+#include <lustre_ha.h>
+#include <lustre_net.h>
+#include <lprocfs_status.h>
+
+#define NIDS_MAX 32
+
+struct uuid_nid_data {
+ struct list_head un_list;
+ struct obd_uuid un_uuid;
+ int un_nid_count;
+ lnet_nid_t un_nids[NIDS_MAX];
+};
+
+/* FIXME: This should probably become more elegant than a global linked list */
+static struct list_head g_uuid_list;
+static spinlock_t g_uuid_lock;
+
+void class_init_uuidlist(void)
+{
+ INIT_LIST_HEAD(&g_uuid_list);
+ spin_lock_init(&g_uuid_lock);
+}
+
+void class_exit_uuidlist(void)
+{
+ /* delete all */
+ class_del_uuid(NULL);
+}
+
+int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index)
+{
+ struct uuid_nid_data *data;
+ struct obd_uuid tmp;
+ int rc = -ENOENT;
+
+ obd_str2uuid(&tmp, uuid);
+ spin_lock(&g_uuid_lock);
+ list_for_each_entry(data, &g_uuid_list, un_list) {
+ if (obd_uuid_equals(&data->un_uuid, &tmp)) {
+ if (index >= data->un_nid_count)
+ break;
+
+ rc = 0;
+ *peer_nid = data->un_nids[index];
+ break;
+ }
+ }
+ spin_unlock(&g_uuid_lock);
+ return rc;
+}
+EXPORT_SYMBOL(lustre_uuid_to_peer);
+
+/* Add a nid to a niduuid. Multiple nids can be added to a single uuid;
+ LNET will choose the best one. */
+int class_add_uuid(const char *uuid, __u64 nid)
+{
+ struct uuid_nid_data *data, *entry;
+ int found = 0;
+
+ LASSERT(nid != 0); /* valid newconfig NID is never zero */
+
+ if (strlen(uuid) > UUID_MAX - 1)
+ return -EOVERFLOW;
+
+ OBD_ALLOC_PTR(data);
+ if (data == NULL)
+ return -ENOMEM;
+
+ obd_str2uuid(&data->un_uuid, uuid);
+ data->un_nids[0] = nid;
+ data->un_nid_count = 1;
+
+ spin_lock(&g_uuid_lock);
+ list_for_each_entry(entry, &g_uuid_list, un_list) {
+ if (obd_uuid_equals(&entry->un_uuid, &data->un_uuid)) {
+ int i;
+
+ found = 1;
+ for (i = 0; i < entry->un_nid_count; i++)
+ if (nid == entry->un_nids[i])
+ break;
+
+ if (i == entry->un_nid_count) {
+ LASSERT(entry->un_nid_count < NIDS_MAX);
+ entry->un_nids[entry->un_nid_count++] = nid;
+ }
+ break;
+ }
+ }
+ if (!found)
+ list_add(&data->un_list, &g_uuid_list);
+ spin_unlock(&g_uuid_lock);
+
+ if (found) {
+ CDEBUG(D_INFO, "found uuid %s %s cnt=%d\n", uuid,
+ libcfs_nid2str(nid), entry->un_nid_count);
+ OBD_FREE(data, sizeof(*data));
+ } else {
+ CDEBUG(D_INFO, "add uuid %s %s\n", uuid, libcfs_nid2str(nid));
+ }
+ return 0;
+}
+EXPORT_SYMBOL(class_add_uuid);
+
+/* Delete the nids for one uuid if specified, otherwise delete all */
+int class_del_uuid(const char *uuid)
+{
+ LIST_HEAD(deathrow);
+ struct uuid_nid_data *data;
+
+ spin_lock(&g_uuid_lock);
+ if (uuid != NULL) {
+ struct obd_uuid tmp;
+
+ obd_str2uuid(&tmp, uuid);
+ list_for_each_entry(data, &g_uuid_list, un_list) {
+ if (obd_uuid_equals(&data->un_uuid, &tmp)) {
+ list_move(&data->un_list, &deathrow);
+ break;
+ }
+ }
+ } else
+ list_splice_init(&g_uuid_list, &deathrow);
+ spin_unlock(&g_uuid_lock);
+
+ if (uuid != NULL && list_empty(&deathrow)) {
+ CDEBUG(D_INFO, "Try to delete a non-existent uuid %s\n", uuid);
+ return -EINVAL;
+ }
+
+ while (!list_empty(&deathrow)) {
+ data = list_entry(deathrow.next, struct uuid_nid_data,
+ un_list);
+ list_del(&data->un_list);
+
+ CDEBUG(D_INFO, "del uuid %s %s/%d\n",
+ obd_uuid2str(&data->un_uuid),
+ libcfs_nid2str(data->un_nids[0]),
+ data->un_nid_count);
+
+ OBD_FREE(data, sizeof(*data));
+ }
+
+ return 0;
+}
+
+/* check if @nid exists in nid list of @uuid */
+int class_check_uuid(struct obd_uuid *uuid, __u64 nid)
+{
+ struct uuid_nid_data *entry;
+ int found = 0;
+ ENTRY;
+
+ CDEBUG(D_INFO, "check if uuid %s has %s.\n",
+ obd_uuid2str(uuid), libcfs_nid2str(nid));
+
+ spin_lock(&g_uuid_lock);
+ list_for_each_entry(entry, &g_uuid_list, un_list) {
+ int i;
+
+ if (!obd_uuid_equals(&entry->un_uuid, uuid))
+ continue;
+
+ /* found the uuid, check if it has @nid */
+ for (i = 0; i < entry->un_nid_count; i++) {
+ if (entry->un_nids[i] == nid) {
+ found = 1;
+ break;
+ }
+ }
+ break;
+ }
+ spin_unlock(&g_uuid_lock);
+ RETURN(found);
+}
+EXPORT_SYMBOL(class_check_uuid);
diff --git a/drivers/staging/lustre/lustre/obdclass/md_attrs.c b/drivers/staging/lustre/lustre/obdclass/md_attrs.c
new file mode 100644
index 000000000000..b71344a04c7e
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/md_attrs.c
@@ -0,0 +1,202 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2012, Intel Corporation.
+ * Use is subject to license terms.
+ *
+ * Author: Johann Lombardi <johann.lombardi@intel.com>
+ */
+
+#include <lustre/lustre_idl.h>
+#include <obd.h>
+#include <md_object.h>
+
+/**
+ * Initialize new \a lma. Only fid is stored.
+ *
+ * \param lma - is the new LMA structure to be initialized
+ * \param fid - is the FID of the object this LMA belongs to
+ * \param incompat - features that MDS must understand to access object
+ */
+void lustre_lma_init(struct lustre_mdt_attrs *lma, const struct lu_fid *fid,
+ __u32 incompat)
+{
+ lma->lma_compat = 0;
+ lma->lma_incompat = incompat;
+ lma->lma_self_fid = *fid;
+
+ /* If a field is added in struct lustre_mdt_attrs, zero it explicitly
+ * and change the test below. */
+ LASSERT(sizeof(*lma) ==
+ (offsetof(struct lustre_mdt_attrs, lma_self_fid) +
+ sizeof(lma->lma_self_fid)));
+};
+EXPORT_SYMBOL(lustre_lma_init);
+
+/**
+ * Swab, if needed, LMA structure which is stored on-disk in little-endian order.
+ *
+ * \param lma - is a pointer to the LMA structure to be swabbed.
+ */
+void lustre_lma_swab(struct lustre_mdt_attrs *lma)
+{
+ /* Use LUSTRE_MSG_MAGIC to detect local endianess. */
+ if (LUSTRE_MSG_MAGIC != cpu_to_le32(LUSTRE_MSG_MAGIC)) {
+ __swab32s(&lma->lma_compat);
+ __swab32s(&lma->lma_incompat);
+ lustre_swab_lu_fid(&lma->lma_self_fid);
+ }
+};
+EXPORT_SYMBOL(lustre_lma_swab);
+
+/**
+ * Swab, if needed, SOM structure which is stored on-disk in little-endian
+ * order.
+ *
+ * \param attrs - is a pointer to the SOM structure to be swabbed.
+ */
+void lustre_som_swab(struct som_attrs *attrs)
+{
+ /* Use LUSTRE_MSG_MAGIC to detect local endianess. */
+ if (LUSTRE_MSG_MAGIC != cpu_to_le32(LUSTRE_MSG_MAGIC)) {
+ __swab32s(&attrs->som_compat);
+ __swab32s(&attrs->som_incompat);
+ __swab64s(&attrs->som_ioepoch);
+ __swab64s(&attrs->som_size);
+ __swab64s(&attrs->som_blocks);
+ __swab64s(&attrs->som_mountid);
+ }
+};
+EXPORT_SYMBOL(lustre_som_swab);
+
+/*
+ * Swab and extract SOM attributes from on-disk xattr.
+ *
+ * \param buf - is a buffer containing the on-disk SOM extended attribute.
+ * \param rc - is the SOM xattr stored in \a buf
+ * \param msd - is the md_som_data structure where to extract SOM attributes.
+ */
+int lustre_buf2som(void *buf, int rc, struct md_som_data *msd)
+{
+ struct som_attrs *attrs = (struct som_attrs *)buf;
+ ENTRY;
+
+ if (rc == 0 || rc == -ENODATA)
+ /* no SOM attributes */
+ RETURN(-ENODATA);
+
+ if (rc < 0)
+ /* error hit while fetching xattr */
+ RETURN(rc);
+
+ /* check SOM compatibility */
+ if (attrs->som_incompat & ~cpu_to_le32(SOM_INCOMPAT_SUPP))
+ RETURN(-ENODATA);
+
+ /* unpack SOM attributes */
+ lustre_som_swab(attrs);
+
+ /* fill in-memory msd structure */
+ msd->msd_compat = attrs->som_compat;
+ msd->msd_incompat = attrs->som_incompat;
+ msd->msd_ioepoch = attrs->som_ioepoch;
+ msd->msd_size = attrs->som_size;
+ msd->msd_blocks = attrs->som_blocks;
+ msd->msd_mountid = attrs->som_mountid;
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(lustre_buf2som);
+
+/**
+ * Swab, if needed, HSM structure which is stored on-disk in little-endian
+ * order.
+ *
+ * \param attrs - is a pointer to the HSM structure to be swabbed.
+ */
+void lustre_hsm_swab(struct hsm_attrs *attrs)
+{
+ /* Use LUSTRE_MSG_MAGIC to detect local endianess. */
+ if (LUSTRE_MSG_MAGIC != cpu_to_le32(LUSTRE_MSG_MAGIC)) {
+ __swab32s(&attrs->hsm_compat);
+ __swab32s(&attrs->hsm_flags);
+ __swab64s(&attrs->hsm_arch_id);
+ __swab64s(&attrs->hsm_arch_ver);
+ }
+};
+EXPORT_SYMBOL(lustre_hsm_swab);
+
+/*
+ * Swab and extract HSM attributes from on-disk xattr.
+ *
+ * \param buf - is a buffer containing the on-disk HSM extended attribute.
+ * \param rc - is the HSM xattr stored in \a buf
+ * \param mh - is the md_hsm structure where to extract HSM attributes.
+ */
+int lustre_buf2hsm(void *buf, int rc, struct md_hsm *mh)
+{
+ struct hsm_attrs *attrs = (struct hsm_attrs *)buf;
+ ENTRY;
+
+ if (rc == 0 || rc == -ENODATA)
+ /* no HSM attributes */
+ RETURN(-ENODATA);
+
+ if (rc < 0)
+ /* error hit while fetching xattr */
+ RETURN(rc);
+
+ /* unpack HSM attributes */
+ lustre_hsm_swab(attrs);
+
+ /* fill md_hsm structure */
+ mh->mh_compat = attrs->hsm_compat;
+ mh->mh_flags = attrs->hsm_flags;
+ mh->mh_arch_id = attrs->hsm_arch_id;
+ mh->mh_arch_ver = attrs->hsm_arch_ver;
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(lustre_buf2hsm);
+
+/*
+ * Pack HSM attributes.
+ *
+ * \param buf - is the output buffer where to pack the on-disk HSM xattr.
+ * \param mh - is the md_hsm structure to pack.
+ */
+void lustre_hsm2buf(void *buf, struct md_hsm *mh)
+{
+ struct hsm_attrs *attrs = (struct hsm_attrs *)buf;
+ ENTRY;
+
+ /* copy HSM attributes */
+ attrs->hsm_compat = mh->mh_compat;
+ attrs->hsm_flags = mh->mh_flags;
+ attrs->hsm_arch_id = mh->mh_arch_id;
+ attrs->hsm_arch_ver = mh->mh_arch_ver;
+
+ /* pack xattr */
+ lustre_hsm_swab(attrs);
+}
+EXPORT_SYMBOL(lustre_hsm2buf);
diff --git a/drivers/staging/lustre/lustre/obdclass/mea.c b/drivers/staging/lustre/lustre/obdclass/mea.c
new file mode 100644
index 000000000000..c4f0dbc23611
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/mea.c
@@ -0,0 +1,112 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+#include <obd_class.h>
+#include <linux/kmod.h> /* for request_module() */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <lprocfs_status.h>
+#include <lustre/lustre_idl.h>
+
+static int mea_last_char_hash(int count, char *name, int namelen)
+{
+ unsigned int c;
+
+ c = name[namelen - 1];
+ if (c == 0)
+ CWARN("looks like wrong len is passed\n");
+ c = c % count;
+ return c;
+}
+
+static int mea_all_chars_hash(int count, char *name, int namelen)
+{
+ unsigned int c = 0;
+
+ while (--namelen >= 0)
+ c += name[namelen];
+ c = c % count;
+ return c;
+}
+
+int raw_name2idx(int hashtype, int count, const char *name, int namelen)
+{
+ unsigned int c = 0;
+ int idx;
+
+ LASSERT(namelen > 0);
+
+ if (filename_is_volatile(name, namelen, &idx)) {
+ if ((idx >= 0) && (idx < count))
+ return idx;
+ goto hashchoice;
+ }
+
+ if (count <= 1)
+ return 0;
+
+hashchoice:
+ switch (hashtype) {
+ case MEA_MAGIC_LAST_CHAR:
+ c = mea_last_char_hash(count, (char *)name, namelen);
+ break;
+ case MEA_MAGIC_ALL_CHARS:
+ c = mea_all_chars_hash(count, (char *)name, namelen);
+ break;
+ case MEA_MAGIC_HASH_SEGMENT:
+ CERROR("Unsupported hash type MEA_MAGIC_HASH_SEGMENT\n");
+ break;
+ default:
+ CERROR("Unknown hash type 0x%x\n", hashtype);
+ }
+
+ LASSERT(c < count);
+ return c;
+}
+EXPORT_SYMBOL(raw_name2idx);
+
+int mea_name2idx(struct lmv_stripe_md *mea, const char *name, int namelen)
+{
+ unsigned int c;
+
+ LASSERT(mea && mea->mea_count);
+
+ c = raw_name2idx(mea->mea_magic, mea->mea_count, name, namelen);
+
+ LASSERT(c < mea->mea_count);
+ return c;
+}
+EXPORT_SYMBOL(mea_name2idx);
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c
new file mode 100644
index 000000000000..bbf06d009fd0
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/obd_config.c
@@ -0,0 +1,1904 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/obd_config.c
+ *
+ * Config API
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+#include <obd_class.h>
+#include <linux/string.h>
+#include <lustre_log.h>
+#include <lprocfs_status.h>
+#include <lustre_param.h>
+
+#include "llog_internal.h"
+
+static cfs_hash_ops_t uuid_hash_ops;
+static cfs_hash_ops_t nid_hash_ops;
+static cfs_hash_ops_t nid_stat_hash_ops;
+
+/*********** string parsing utils *********/
+
+/* returns 0 if we find this key in the buffer, else 1 */
+int class_find_param(char *buf, char *key, char **valp)
+{
+ char *ptr;
+
+ if (!buf)
+ return 1;
+
+ if ((ptr = strstr(buf, key)) == NULL)
+ return 1;
+
+ if (valp)
+ *valp = ptr + strlen(key);
+
+ return 0;
+}
+EXPORT_SYMBOL(class_find_param);
+
+/**
+ * Check whether the proc parameter \a param is an old parameter or not from
+ * the array \a ptr which contains the mapping from old parameters to new ones.
+ * If it's an old one, then return the pointer to the cfg_interop_param struc-
+ * ture which contains both the old and new parameters.
+ *
+ * \param param proc parameter
+ * \param ptr an array which contains the mapping from
+ * old parameters to new ones
+ *
+ * \retval valid-pointer pointer to the cfg_interop_param structure
+ * which contains the old and new parameters
+ * \retval NULL \a param or \a ptr is NULL,
+ * or \a param is not an old parameter
+ */
+struct cfg_interop_param *class_find_old_param(const char *param,
+ struct cfg_interop_param *ptr)
+{
+ char *value = NULL;
+ int name_len = 0;
+
+ if (param == NULL || ptr == NULL)
+ RETURN(NULL);
+
+ value = strchr(param, '=');
+ if (value == NULL)
+ name_len = strlen(param);
+ else
+ name_len = value - param;
+
+ while (ptr->old_param != NULL) {
+ if (strncmp(param, ptr->old_param, name_len) == 0 &&
+ name_len == strlen(ptr->old_param))
+ RETURN(ptr);
+ ptr++;
+ }
+
+ RETURN(NULL);
+}
+EXPORT_SYMBOL(class_find_old_param);
+
+/**
+ * Finds a parameter in \a params and copies it to \a copy.
+ *
+ * Leading spaces are skipped. Next space or end of string is the
+ * parameter terminator with the exception that spaces inside single or double
+ * quotes get included into a parameter. The parameter is copied into \a copy
+ * which has to be allocated big enough by a caller, quotes are stripped in
+ * the copy and the copy is terminated by 0.
+ *
+ * On return \a params is set to next parameter or to NULL if last
+ * parameter is returned.
+ *
+ * \retval 0 if parameter is returned in \a copy
+ * \retval 1 otherwise
+ * \retval -EINVAL if unbalanced quota is found
+ */
+int class_get_next_param(char **params, char *copy)
+{
+ char *q1, *q2, *str;
+ int len;
+
+ str = *params;
+ while (*str == ' ')
+ str++;
+
+ if (*str == '\0') {
+ *params = NULL;
+ return 1;
+ }
+
+ while (1) {
+ q1 = strpbrk(str, " '\"");
+ if (q1 == NULL) {
+ len = strlen(str);
+ memcpy(copy, str, len);
+ copy[len] = '\0';
+ *params = NULL;
+ return 0;
+ }
+ len = q1 - str;
+ if (*q1 == ' ') {
+ memcpy(copy, str, len);
+ copy[len] = '\0';
+ *params = str + len;
+ return 0;
+ }
+
+ memcpy(copy, str, len);
+ copy += len;
+
+ /* search for the matching closing quote */
+ str = q1 + 1;
+ q2 = strchr(str, *q1);
+ if (q2 == NULL) {
+ CERROR("Unbalanced quota in parameters: \"%s\"\n",
+ *params);
+ return -EINVAL;
+ }
+ len = q2 - str;
+ memcpy(copy, str, len);
+ copy += len;
+ str = q2 + 1;
+ }
+ return 1;
+}
+EXPORT_SYMBOL(class_get_next_param);
+
+/* returns 0 if this is the first key in the buffer, else 1.
+ valp points to first char after key. */
+int class_match_param(char *buf, char *key, char **valp)
+{
+ if (!buf)
+ return 1;
+
+ if (memcmp(buf, key, strlen(key)) != 0)
+ return 1;
+
+ if (valp)
+ *valp = buf + strlen(key);
+
+ return 0;
+}
+EXPORT_SYMBOL(class_match_param);
+
+static int parse_nid(char *buf, void *value, int quiet)
+{
+ lnet_nid_t *nid = (lnet_nid_t *)value;
+
+ *nid = libcfs_str2nid(buf);
+ if (*nid != LNET_NID_ANY)
+ return 0;
+
+ if (!quiet)
+ LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", buf);
+ return -EINVAL;
+}
+
+static int parse_net(char *buf, void *value)
+{
+ __u32 *net = (__u32 *)value;
+
+ *net = libcfs_str2net(buf);
+ CDEBUG(D_INFO, "Net %s\n", libcfs_net2str(*net));
+ return 0;
+}
+
+enum {
+ CLASS_PARSE_NID = 1,
+ CLASS_PARSE_NET,
+};
+
+/* 0 is good nid,
+ 1 not found
+ < 0 error
+ endh is set to next separator */
+static int class_parse_value(char *buf, int opc, void *value, char **endh,
+ int quiet)
+{
+ char *endp;
+ char tmp;
+ int rc = 0;
+
+ if (!buf)
+ return 1;
+ while (*buf == ',' || *buf == ':')
+ buf++;
+ if (*buf == ' ' || *buf == '/' || *buf == '\0')
+ return 1;
+
+ /* nid separators or end of nids */
+ endp = strpbrk(buf, ",: /");
+ if (endp == NULL)
+ endp = buf + strlen(buf);
+
+ tmp = *endp;
+ *endp = '\0';
+ switch (opc) {
+ default:
+ LBUG();
+ case CLASS_PARSE_NID:
+ rc = parse_nid(buf, value, quiet);
+ break;
+ case CLASS_PARSE_NET:
+ rc = parse_net(buf, value);
+ break;
+ }
+ *endp = tmp;
+ if (rc != 0)
+ return rc;
+ if (endh)
+ *endh = endp;
+ return 0;
+}
+
+int class_parse_nid(char *buf, lnet_nid_t *nid, char **endh)
+{
+ return class_parse_value(buf, CLASS_PARSE_NID, (void *)nid, endh, 0);
+}
+EXPORT_SYMBOL(class_parse_nid);
+
+int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh)
+{
+ return class_parse_value(buf, CLASS_PARSE_NID, (void *)nid, endh, 1);
+}
+EXPORT_SYMBOL(class_parse_nid_quiet);
+
+int class_parse_net(char *buf, __u32 *net, char **endh)
+{
+ return class_parse_value(buf, CLASS_PARSE_NET, (void *)net, endh, 0);
+}
+EXPORT_SYMBOL(class_parse_net);
+
+/* 1 param contains key and match
+ * 0 param contains key and not match
+ * -1 param does not contain key
+ */
+int class_match_nid(char *buf, char *key, lnet_nid_t nid)
+{
+ lnet_nid_t tmp;
+ int rc = -1;
+
+ while (class_find_param(buf, key, &buf) == 0) {
+ /* please restrict to the nids pertaining to
+ * the specified nids */
+ while (class_parse_nid(buf, &tmp, &buf) == 0) {
+ if (tmp == nid)
+ return 1;
+ }
+ rc = 0;
+ }
+ return rc;
+}
+EXPORT_SYMBOL(class_match_nid);
+
+int class_match_net(char *buf, char *key, __u32 net)
+{
+ __u32 tmp;
+ int rc = -1;
+
+ while (class_find_param(buf, key, &buf) == 0) {
+ /* please restrict to the nids pertaining to
+ * the specified networks */
+ while (class_parse_net(buf, &tmp, &buf) == 0) {
+ if (tmp == net)
+ return 1;
+ }
+ rc = 0;
+ }
+ return rc;
+}
+EXPORT_SYMBOL(class_match_net);
+
+/********************** class fns **********************/
+
+/**
+ * Create a new obd device and set the type, name and uuid. If successful,
+ * the new device can be accessed by either name or uuid.
+ */
+int class_attach(struct lustre_cfg *lcfg)
+{
+ struct obd_device *obd = NULL;
+ char *typename, *name, *uuid;
+ int rc, len;
+ ENTRY;
+
+ if (!LUSTRE_CFG_BUFLEN(lcfg, 1)) {
+ CERROR("No type passed!\n");
+ RETURN(-EINVAL);
+ }
+ typename = lustre_cfg_string(lcfg, 1);
+
+ if (!LUSTRE_CFG_BUFLEN(lcfg, 0)) {
+ CERROR("No name passed!\n");
+ RETURN(-EINVAL);
+ }
+ name = lustre_cfg_string(lcfg, 0);
+
+ if (!LUSTRE_CFG_BUFLEN(lcfg, 2)) {
+ CERROR("No UUID passed!\n");
+ RETURN(-EINVAL);
+ }
+ uuid = lustre_cfg_string(lcfg, 2);
+
+ CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n",
+ MKSTR(typename), MKSTR(name), MKSTR(uuid));
+
+ obd = class_newdev(typename, name);
+ if (IS_ERR(obd)) {
+ /* Already exists or out of obds */
+ rc = PTR_ERR(obd);
+ obd = NULL;
+ CERROR("Cannot create device %s of type %s : %d\n",
+ name, typename, rc);
+ GOTO(out, rc);
+ }
+ LASSERTF(obd != NULL, "Cannot get obd device %s of type %s\n",
+ name, typename);
+ LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
+ "obd %p obd_magic %08X != %08X\n",
+ obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+ LASSERTF(strncmp(obd->obd_name, name, strlen(name)) == 0,
+ "%p obd_name %s != %s\n", obd, obd->obd_name, name);
+
+ rwlock_init(&obd->obd_pool_lock);
+ obd->obd_pool_limit = 0;
+ obd->obd_pool_slv = 0;
+
+ INIT_LIST_HEAD(&obd->obd_exports);
+ INIT_LIST_HEAD(&obd->obd_unlinked_exports);
+ INIT_LIST_HEAD(&obd->obd_delayed_exports);
+ INIT_LIST_HEAD(&obd->obd_exports_timed);
+ INIT_LIST_HEAD(&obd->obd_nid_stats);
+ spin_lock_init(&obd->obd_nid_lock);
+ spin_lock_init(&obd->obd_dev_lock);
+ mutex_init(&obd->obd_dev_mutex);
+ spin_lock_init(&obd->obd_osfs_lock);
+ /* obd->obd_osfs_age must be set to a value in the distant
+ * past to guarantee a fresh statfs is fetched on mount. */
+ obd->obd_osfs_age = cfs_time_shift_64(-1000);
+
+ /* XXX belongs in setup not attach */
+ init_rwsem(&obd->obd_observer_link_sem);
+ /* recovery data */
+ cfs_init_timer(&obd->obd_recovery_timer);
+ spin_lock_init(&obd->obd_recovery_task_lock);
+ init_waitqueue_head(&obd->obd_next_transno_waitq);
+ init_waitqueue_head(&obd->obd_evict_inprogress_waitq);
+ INIT_LIST_HEAD(&obd->obd_req_replay_queue);
+ INIT_LIST_HEAD(&obd->obd_lock_replay_queue);
+ INIT_LIST_HEAD(&obd->obd_final_req_queue);
+ INIT_LIST_HEAD(&obd->obd_evict_list);
+
+ llog_group_init(&obd->obd_olg, FID_SEQ_LLOG);
+
+ obd->obd_conn_inprogress = 0;
+
+ len = strlen(uuid);
+ if (len >= sizeof(obd->obd_uuid)) {
+ CERROR("uuid must be < %d bytes long\n",
+ (int)sizeof(obd->obd_uuid));
+ GOTO(out, rc = -EINVAL);
+ }
+ memcpy(obd->obd_uuid.uuid, uuid, len);
+
+ /* do the attach */
+ if (OBP(obd, attach)) {
+ rc = OBP(obd,attach)(obd, sizeof *lcfg, lcfg);
+ if (rc)
+ GOTO(out, rc = -EINVAL);
+ }
+
+ /* Detach drops this */
+ spin_lock(&obd->obd_dev_lock);
+ atomic_set(&obd->obd_refcount, 1);
+ spin_unlock(&obd->obd_dev_lock);
+ lu_ref_init(&obd->obd_reference);
+ lu_ref_add(&obd->obd_reference, "attach", obd);
+
+ obd->obd_attached = 1;
+ CDEBUG(D_IOCTL, "OBD: dev %d attached type %s with refcount %d\n",
+ obd->obd_minor, typename, atomic_read(&obd->obd_refcount));
+ RETURN(0);
+ out:
+ if (obd != NULL) {
+ class_release_dev(obd);
+ }
+ return rc;
+}
+EXPORT_SYMBOL(class_attach);
+
+/** Create hashes, self-export, and call type-specific setup.
+ * Setup is effectively the "start this obd" call.
+ */
+int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ int err = 0;
+ struct obd_export *exp;
+ ENTRY;
+
+ LASSERT(obd != NULL);
+ LASSERTF(obd == class_num2obd(obd->obd_minor),
+ "obd %p != obd_devs[%d] %p\n",
+ obd, obd->obd_minor, class_num2obd(obd->obd_minor));
+ LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
+ "obd %p obd_magic %08x != %08x\n",
+ obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+
+ /* have we attached a type to this device? */
+ if (!obd->obd_attached) {
+ CERROR("Device %d not attached\n", obd->obd_minor);
+ RETURN(-ENODEV);
+ }
+
+ if (obd->obd_set_up) {
+ CERROR("Device %d already setup (type %s)\n",
+ obd->obd_minor, obd->obd_type->typ_name);
+ RETURN(-EEXIST);
+ }
+
+ /* is someone else setting us up right now? (attach inits spinlock) */
+ spin_lock(&obd->obd_dev_lock);
+ if (obd->obd_starting) {
+ spin_unlock(&obd->obd_dev_lock);
+ CERROR("Device %d setup in progress (type %s)\n",
+ obd->obd_minor, obd->obd_type->typ_name);
+ RETURN(-EEXIST);
+ }
+ /* just leave this on forever. I can't use obd_set_up here because
+ other fns check that status, and we're not actually set up yet. */
+ obd->obd_starting = 1;
+ obd->obd_uuid_hash = NULL;
+ obd->obd_nid_hash = NULL;
+ obd->obd_nid_stats_hash = NULL;
+ spin_unlock(&obd->obd_dev_lock);
+
+ /* create an uuid-export lustre hash */
+ obd->obd_uuid_hash = cfs_hash_create("UUID_HASH",
+ HASH_UUID_CUR_BITS,
+ HASH_UUID_MAX_BITS,
+ HASH_UUID_BKT_BITS, 0,
+ CFS_HASH_MIN_THETA,
+ CFS_HASH_MAX_THETA,
+ &uuid_hash_ops, CFS_HASH_DEFAULT);
+ if (!obd->obd_uuid_hash)
+ GOTO(err_hash, err = -ENOMEM);
+
+ /* create a nid-export lustre hash */
+ obd->obd_nid_hash = cfs_hash_create("NID_HASH",
+ HASH_NID_CUR_BITS,
+ HASH_NID_MAX_BITS,
+ HASH_NID_BKT_BITS, 0,
+ CFS_HASH_MIN_THETA,
+ CFS_HASH_MAX_THETA,
+ &nid_hash_ops, CFS_HASH_DEFAULT);
+ if (!obd->obd_nid_hash)
+ GOTO(err_hash, err = -ENOMEM);
+
+ /* create a nid-stats lustre hash */
+ obd->obd_nid_stats_hash = cfs_hash_create("NID_STATS",
+ HASH_NID_STATS_CUR_BITS,
+ HASH_NID_STATS_MAX_BITS,
+ HASH_NID_STATS_BKT_BITS, 0,
+ CFS_HASH_MIN_THETA,
+ CFS_HASH_MAX_THETA,
+ &nid_stat_hash_ops, CFS_HASH_DEFAULT);
+ if (!obd->obd_nid_stats_hash)
+ GOTO(err_hash, err = -ENOMEM);
+
+ exp = class_new_export(obd, &obd->obd_uuid);
+ if (IS_ERR(exp))
+ GOTO(err_hash, err = PTR_ERR(exp));
+
+ obd->obd_self_export = exp;
+ list_del_init(&exp->exp_obd_chain_timed);
+ class_export_put(exp);
+
+ err = obd_setup(obd, lcfg);
+ if (err)
+ GOTO(err_exp, err);
+
+ obd->obd_set_up = 1;
+
+ spin_lock(&obd->obd_dev_lock);
+ /* cleanup drops this */
+ class_incref(obd, "setup", obd);
+ spin_unlock(&obd->obd_dev_lock);
+
+ CDEBUG(D_IOCTL, "finished setup of obd %s (uuid %s)\n",
+ obd->obd_name, obd->obd_uuid.uuid);
+
+ RETURN(0);
+err_exp:
+ if (obd->obd_self_export) {
+ class_unlink_export(obd->obd_self_export);
+ obd->obd_self_export = NULL;
+ }
+err_hash:
+ if (obd->obd_uuid_hash) {
+ cfs_hash_putref(obd->obd_uuid_hash);
+ obd->obd_uuid_hash = NULL;
+ }
+ if (obd->obd_nid_hash) {
+ cfs_hash_putref(obd->obd_nid_hash);
+ obd->obd_nid_hash = NULL;
+ }
+ if (obd->obd_nid_stats_hash) {
+ cfs_hash_putref(obd->obd_nid_stats_hash);
+ obd->obd_nid_stats_hash = NULL;
+ }
+ obd->obd_starting = 0;
+ CERROR("setup %s failed (%d)\n", obd->obd_name, err);
+ return err;
+}
+EXPORT_SYMBOL(class_setup);
+
+/** We have finished using this obd and are ready to destroy it.
+ * There can be no more references to this obd.
+ */
+int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ ENTRY;
+
+ if (obd->obd_set_up) {
+ CERROR("OBD device %d still set up\n", obd->obd_minor);
+ RETURN(-EBUSY);
+ }
+
+ spin_lock(&obd->obd_dev_lock);
+ if (!obd->obd_attached) {
+ spin_unlock(&obd->obd_dev_lock);
+ CERROR("OBD device %d not attached\n", obd->obd_minor);
+ RETURN(-ENODEV);
+ }
+ obd->obd_attached = 0;
+ spin_unlock(&obd->obd_dev_lock);
+
+ CDEBUG(D_IOCTL, "detach on obd %s (uuid %s)\n",
+ obd->obd_name, obd->obd_uuid.uuid);
+
+ class_decref(obd, "attach", obd);
+ RETURN(0);
+}
+EXPORT_SYMBOL(class_detach);
+
+/** Start shutting down the obd. There may be in-progess ops when
+ * this is called. We tell them to start shutting down with a call
+ * to class_disconnect_exports().
+ */
+int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ int err = 0;
+ char *flag;
+ ENTRY;
+
+ OBD_RACE(OBD_FAIL_LDLM_RECOV_CLIENTS);
+
+ if (!obd->obd_set_up) {
+ CERROR("Device %d not setup\n", obd->obd_minor);
+ RETURN(-ENODEV);
+ }
+
+ spin_lock(&obd->obd_dev_lock);
+ if (obd->obd_stopping) {
+ spin_unlock(&obd->obd_dev_lock);
+ CERROR("OBD %d already stopping\n", obd->obd_minor);
+ RETURN(-ENODEV);
+ }
+ /* Leave this on forever */
+ obd->obd_stopping = 1;
+
+ /* wait for already-arrived-connections to finish. */
+ while (obd->obd_conn_inprogress > 0) {
+ spin_unlock(&obd->obd_dev_lock);
+
+ cond_resched();
+
+ spin_lock(&obd->obd_dev_lock);
+ }
+ spin_unlock(&obd->obd_dev_lock);
+
+ if (lcfg->lcfg_bufcount >= 2 && LUSTRE_CFG_BUFLEN(lcfg, 1) > 0) {
+ for (flag = lustre_cfg_string(lcfg, 1); *flag != 0; flag++)
+ switch (*flag) {
+ case 'F':
+ obd->obd_force = 1;
+ break;
+ case 'A':
+ LCONSOLE_WARN("Failing over %s\n",
+ obd->obd_name);
+ obd->obd_fail = 1;
+ obd->obd_no_transno = 1;
+ obd->obd_no_recov = 1;
+ if (OBP(obd, iocontrol)) {
+ obd_iocontrol(OBD_IOC_SYNC,
+ obd->obd_self_export,
+ 0, NULL, NULL);
+ }
+ break;
+ default:
+ CERROR("Unrecognised flag '%c'\n", *flag);
+ }
+ }
+
+ LASSERT(obd->obd_self_export);
+
+ /* The three references that should be remaining are the
+ * obd_self_export and the attach and setup references. */
+ if (atomic_read(&obd->obd_refcount) > 3) {
+ /* refcounf - 3 might be the number of real exports
+ (excluding self export). But class_incref is called
+ by other things as well, so don't count on it. */
+ CDEBUG(D_IOCTL, "%s: forcing exports to disconnect: %d\n",
+ obd->obd_name, atomic_read(&obd->obd_refcount) - 3);
+ dump_exports(obd, 0);
+ class_disconnect_exports(obd);
+ }
+
+ /* Precleanup, we must make sure all exports get destroyed. */
+ err = obd_precleanup(obd, OBD_CLEANUP_EXPORTS);
+ if (err)
+ CERROR("Precleanup %s returned %d\n",
+ obd->obd_name, err);
+
+ /* destroy an uuid-export hash body */
+ if (obd->obd_uuid_hash) {
+ cfs_hash_putref(obd->obd_uuid_hash);
+ obd->obd_uuid_hash = NULL;
+ }
+
+ /* destroy a nid-export hash body */
+ if (obd->obd_nid_hash) {
+ cfs_hash_putref(obd->obd_nid_hash);
+ obd->obd_nid_hash = NULL;
+ }
+
+ /* destroy a nid-stats hash body */
+ if (obd->obd_nid_stats_hash) {
+ cfs_hash_putref(obd->obd_nid_stats_hash);
+ obd->obd_nid_stats_hash = NULL;
+ }
+
+ class_decref(obd, "setup", obd);
+ obd->obd_set_up = 0;
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(class_cleanup);
+
+struct obd_device *class_incref(struct obd_device *obd,
+ const char *scope, const void *source)
+{
+ lu_ref_add_atomic(&obd->obd_reference, scope, source);
+ atomic_inc(&obd->obd_refcount);
+ CDEBUG(D_INFO, "incref %s (%p) now %d\n", obd->obd_name, obd,
+ atomic_read(&obd->obd_refcount));
+
+ return obd;
+}
+EXPORT_SYMBOL(class_incref);
+
+void class_decref(struct obd_device *obd, const char *scope, const void *source)
+{
+ int err;
+ int refs;
+
+ spin_lock(&obd->obd_dev_lock);
+ atomic_dec(&obd->obd_refcount);
+ refs = atomic_read(&obd->obd_refcount);
+ spin_unlock(&obd->obd_dev_lock);
+ lu_ref_del(&obd->obd_reference, scope, source);
+
+ CDEBUG(D_INFO, "Decref %s (%p) now %d\n", obd->obd_name, obd, refs);
+
+ if ((refs == 1) && obd->obd_stopping) {
+ /* All exports have been destroyed; there should
+ be no more in-progress ops by this point.*/
+
+ spin_lock(&obd->obd_self_export->exp_lock);
+ obd->obd_self_export->exp_flags |= exp_flags_from_obd(obd);
+ spin_unlock(&obd->obd_self_export->exp_lock);
+
+ /* note that we'll recurse into class_decref again */
+ class_unlink_export(obd->obd_self_export);
+ return;
+ }
+
+ if (refs == 0) {
+ CDEBUG(D_CONFIG, "finishing cleanup of obd %s (%s)\n",
+ obd->obd_name, obd->obd_uuid.uuid);
+ LASSERT(!obd->obd_attached);
+ if (obd->obd_stopping) {
+ /* If we're not stopping, we were never set up */
+ err = obd_cleanup(obd);
+ if (err)
+ CERROR("Cleanup %s returned %d\n",
+ obd->obd_name, err);
+ }
+ if (OBP(obd, detach)) {
+ err = OBP(obd, detach)(obd);
+ if (err)
+ CERROR("Detach returned %d\n", err);
+ }
+ class_release_dev(obd);
+ }
+}
+EXPORT_SYMBOL(class_decref);
+
+/** Add a failover nid location.
+ * Client obd types contact server obd types using this nid list.
+ */
+int class_add_conn(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ struct obd_import *imp;
+ struct obd_uuid uuid;
+ int rc;
+ ENTRY;
+
+ if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1 ||
+ LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(struct obd_uuid)) {
+ CERROR("invalid conn_uuid\n");
+ RETURN(-EINVAL);
+ }
+ if (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) &&
+ strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) &&
+ strcmp(obd->obd_type->typ_name, LUSTRE_OSP_NAME) &&
+ strcmp(obd->obd_type->typ_name, LUSTRE_LWP_NAME) &&
+ strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME)) {
+ CERROR("can't add connection on non-client dev\n");
+ RETURN(-EINVAL);
+ }
+
+ imp = obd->u.cli.cl_import;
+ if (!imp) {
+ CERROR("try to add conn on immature client dev\n");
+ RETURN(-EINVAL);
+ }
+
+ obd_str2uuid(&uuid, lustre_cfg_string(lcfg, 1));
+ rc = obd_add_conn(imp, &uuid, lcfg->lcfg_num);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(class_add_conn);
+
+/** Remove a failover nid location.
+ */
+int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ struct obd_import *imp;
+ struct obd_uuid uuid;
+ int rc;
+ ENTRY;
+
+ if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1 ||
+ LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(struct obd_uuid)) {
+ CERROR("invalid conn_uuid\n");
+ RETURN(-EINVAL);
+ }
+ if (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) &&
+ strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+ CERROR("can't del connection on non-client dev\n");
+ RETURN(-EINVAL);
+ }
+
+ imp = obd->u.cli.cl_import;
+ if (!imp) {
+ CERROR("try to del conn on immature client dev\n");
+ RETURN(-EINVAL);
+ }
+
+ obd_str2uuid(&uuid, lustre_cfg_string(lcfg, 1));
+ rc = obd_del_conn(imp, &uuid);
+
+ RETURN(rc);
+}
+
+LIST_HEAD(lustre_profile_list);
+
+struct lustre_profile *class_get_profile(const char * prof)
+{
+ struct lustre_profile *lprof;
+
+ ENTRY;
+ list_for_each_entry(lprof, &lustre_profile_list, lp_list) {
+ if (!strcmp(lprof->lp_profile, prof)) {
+ RETURN(lprof);
+ }
+ }
+ RETURN(NULL);
+}
+EXPORT_SYMBOL(class_get_profile);
+
+/** Create a named "profile".
+ * This defines the mdc and osc names to use for a client.
+ * This also is used to define the lov to be used by a mdt.
+ */
+int class_add_profile(int proflen, char *prof, int osclen, char *osc,
+ int mdclen, char *mdc)
+{
+ struct lustre_profile *lprof;
+ int err = 0;
+ ENTRY;
+
+ CDEBUG(D_CONFIG, "Add profile %s\n", prof);
+
+ OBD_ALLOC(lprof, sizeof(*lprof));
+ if (lprof == NULL)
+ RETURN(-ENOMEM);
+ INIT_LIST_HEAD(&lprof->lp_list);
+
+ LASSERT(proflen == (strlen(prof) + 1));
+ OBD_ALLOC(lprof->lp_profile, proflen);
+ if (lprof->lp_profile == NULL)
+ GOTO(out, err = -ENOMEM);
+ memcpy(lprof->lp_profile, prof, proflen);
+
+ LASSERT(osclen == (strlen(osc) + 1));
+ OBD_ALLOC(lprof->lp_dt, osclen);
+ if (lprof->lp_dt == NULL)
+ GOTO(out, err = -ENOMEM);
+ memcpy(lprof->lp_dt, osc, osclen);
+
+ if (mdclen > 0) {
+ LASSERT(mdclen == (strlen(mdc) + 1));
+ OBD_ALLOC(lprof->lp_md, mdclen);
+ if (lprof->lp_md == NULL)
+ GOTO(out, err = -ENOMEM);
+ memcpy(lprof->lp_md, mdc, mdclen);
+ }
+
+ list_add(&lprof->lp_list, &lustre_profile_list);
+ RETURN(err);
+
+out:
+ if (lprof->lp_md)
+ OBD_FREE(lprof->lp_md, mdclen);
+ if (lprof->lp_dt)
+ OBD_FREE(lprof->lp_dt, osclen);
+ if (lprof->lp_profile)
+ OBD_FREE(lprof->lp_profile, proflen);
+ OBD_FREE(lprof, sizeof(*lprof));
+ RETURN(err);
+}
+
+void class_del_profile(const char *prof)
+{
+ struct lustre_profile *lprof;
+ ENTRY;
+
+ CDEBUG(D_CONFIG, "Del profile %s\n", prof);
+
+ lprof = class_get_profile(prof);
+ if (lprof) {
+ list_del(&lprof->lp_list);
+ OBD_FREE(lprof->lp_profile, strlen(lprof->lp_profile) + 1);
+ OBD_FREE(lprof->lp_dt, strlen(lprof->lp_dt) + 1);
+ if (lprof->lp_md)
+ OBD_FREE(lprof->lp_md, strlen(lprof->lp_md) + 1);
+ OBD_FREE(lprof, sizeof *lprof);
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(class_del_profile);
+
+/* COMPAT_146 */
+void class_del_profiles(void)
+{
+ struct lustre_profile *lprof, *n;
+ ENTRY;
+
+ list_for_each_entry_safe(lprof, n, &lustre_profile_list, lp_list) {
+ list_del(&lprof->lp_list);
+ OBD_FREE(lprof->lp_profile, strlen(lprof->lp_profile) + 1);
+ OBD_FREE(lprof->lp_dt, strlen(lprof->lp_dt) + 1);
+ if (lprof->lp_md)
+ OBD_FREE(lprof->lp_md, strlen(lprof->lp_md) + 1);
+ OBD_FREE(lprof, sizeof *lprof);
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(class_del_profiles);
+
+static int class_set_global(char *ptr, int val, struct lustre_cfg *lcfg)
+{
+ ENTRY;
+ if (class_match_param(ptr, PARAM_AT_MIN, NULL) == 0)
+ at_min = val;
+ else if (class_match_param(ptr, PARAM_AT_MAX, NULL) == 0)
+ at_max = val;
+ else if (class_match_param(ptr, PARAM_AT_EXTRA, NULL) == 0)
+ at_extra = val;
+ else if (class_match_param(ptr, PARAM_AT_EARLY_MARGIN, NULL) == 0)
+ at_early_margin = val;
+ else if (class_match_param(ptr, PARAM_AT_HISTORY, NULL) == 0)
+ at_history = val;
+ else if (class_match_param(ptr, PARAM_JOBID_VAR, NULL) == 0)
+ strlcpy(obd_jobid_var, lustre_cfg_string(lcfg, 2),
+ JOBSTATS_JOBID_VAR_MAX_LEN + 1);
+ else
+ RETURN(-EINVAL);
+
+ CDEBUG(D_IOCTL, "global %s = %d\n", ptr, val);
+ RETURN(0);
+}
+
+
+/* We can't call ll_process_config or lquota_process_config directly because
+ * it lives in a module that must be loaded after this one. */
+static int (*client_process_config)(struct lustre_cfg *lcfg) = NULL;
+static int (*quota_process_config)(struct lustre_cfg *lcfg) = NULL;
+
+void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg))
+{
+ client_process_config = cpc;
+}
+EXPORT_SYMBOL(lustre_register_client_process_config);
+
+/**
+ * Rename the proc parameter in \a cfg with a new name \a new_name.
+ *
+ * \param cfg config structure which contains the proc parameter
+ * \param new_name new name of the proc parameter
+ *
+ * \retval valid-pointer pointer to the newly-allocated config structure
+ * which contains the renamed proc parameter
+ * \retval ERR_PTR(-EINVAL) if \a cfg or \a new_name is NULL, or \a cfg does
+ * not contain a proc parameter
+ * \retval ERR_PTR(-ENOMEM) if memory allocation failure occurs
+ */
+struct lustre_cfg *lustre_cfg_rename(struct lustre_cfg *cfg,
+ const char *new_name)
+{
+ struct lustre_cfg_bufs *bufs = NULL;
+ struct lustre_cfg *new_cfg = NULL;
+ char *param = NULL;
+ char *new_param = NULL;
+ char *value = NULL;
+ int name_len = 0;
+ int new_len = 0;
+ ENTRY;
+
+ if (cfg == NULL || new_name == NULL)
+ RETURN(ERR_PTR(-EINVAL));
+
+ param = lustre_cfg_string(cfg, 1);
+ if (param == NULL)
+ RETURN(ERR_PTR(-EINVAL));
+
+ value = strchr(param, '=');
+ if (value == NULL)
+ name_len = strlen(param);
+ else
+ name_len = value - param;
+
+ new_len = LUSTRE_CFG_BUFLEN(cfg, 1) + strlen(new_name) - name_len;
+
+ OBD_ALLOC(new_param, new_len);
+ if (new_param == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ strcpy(new_param, new_name);
+ if (value != NULL)
+ strcat(new_param, value);
+
+ OBD_ALLOC_PTR(bufs);
+ if (bufs == NULL) {
+ OBD_FREE(new_param, new_len);
+ RETURN(ERR_PTR(-ENOMEM));
+ }
+
+ lustre_cfg_bufs_reset(bufs, NULL);
+ lustre_cfg_bufs_init(bufs, cfg);
+ lustre_cfg_bufs_set_string(bufs, 1, new_param);
+
+ new_cfg = lustre_cfg_new(cfg->lcfg_command, bufs);
+
+ OBD_FREE(new_param, new_len);
+ OBD_FREE_PTR(bufs);
+ if (new_cfg == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ new_cfg->lcfg_num = cfg->lcfg_num;
+ new_cfg->lcfg_flags = cfg->lcfg_flags;
+ new_cfg->lcfg_nid = cfg->lcfg_nid;
+ new_cfg->lcfg_nal = cfg->lcfg_nal;
+
+ RETURN(new_cfg);
+}
+EXPORT_SYMBOL(lustre_cfg_rename);
+
+void lustre_register_quota_process_config(int (*qpc)(struct lustre_cfg *lcfg))
+{
+ quota_process_config = qpc;
+}
+EXPORT_SYMBOL(lustre_register_quota_process_config);
+
+/** Process configuration commands given in lustre_cfg form.
+ * These may come from direct calls (e.g. class_manual_cleanup)
+ * or processing the config llog, or ioctl from lctl.
+ */
+int class_process_config(struct lustre_cfg *lcfg)
+{
+ struct obd_device *obd;
+ int err;
+
+ LASSERT(lcfg && !IS_ERR(lcfg));
+ CDEBUG(D_IOCTL, "processing cmd: %x\n", lcfg->lcfg_command);
+
+ /* Commands that don't need a device */
+ switch(lcfg->lcfg_command) {
+ case LCFG_ATTACH: {
+ err = class_attach(lcfg);
+ GOTO(out, err);
+ }
+ case LCFG_ADD_UUID: {
+ CDEBUG(D_IOCTL, "adding mapping from uuid %s to nid "LPX64
+ " (%s)\n", lustre_cfg_string(lcfg, 1),
+ lcfg->lcfg_nid, libcfs_nid2str(lcfg->lcfg_nid));
+
+ err = class_add_uuid(lustre_cfg_string(lcfg, 1), lcfg->lcfg_nid);
+ GOTO(out, err);
+ }
+ case LCFG_DEL_UUID: {
+ CDEBUG(D_IOCTL, "removing mappings for uuid %s\n",
+ (lcfg->lcfg_bufcount < 2 || LUSTRE_CFG_BUFLEN(lcfg, 1) == 0)
+ ? "<all uuids>" : lustre_cfg_string(lcfg, 1));
+
+ err = class_del_uuid(lustre_cfg_string(lcfg, 1));
+ GOTO(out, err);
+ }
+ case LCFG_MOUNTOPT: {
+ CDEBUG(D_IOCTL, "mountopt: profile %s osc %s mdc %s\n",
+ lustre_cfg_string(lcfg, 1),
+ lustre_cfg_string(lcfg, 2),
+ lustre_cfg_string(lcfg, 3));
+ /* set these mount options somewhere, so ll_fill_super
+ * can find them. */
+ err = class_add_profile(LUSTRE_CFG_BUFLEN(lcfg, 1),
+ lustre_cfg_string(lcfg, 1),
+ LUSTRE_CFG_BUFLEN(lcfg, 2),
+ lustre_cfg_string(lcfg, 2),
+ LUSTRE_CFG_BUFLEN(lcfg, 3),
+ lustre_cfg_string(lcfg, 3));
+ GOTO(out, err);
+ }
+ case LCFG_DEL_MOUNTOPT: {
+ CDEBUG(D_IOCTL, "mountopt: profile %s\n",
+ lustre_cfg_string(lcfg, 1));
+ class_del_profile(lustre_cfg_string(lcfg, 1));
+ GOTO(out, err = 0);
+ }
+ case LCFG_SET_TIMEOUT: {
+ CDEBUG(D_IOCTL, "changing lustre timeout from %d to %d\n",
+ obd_timeout, lcfg->lcfg_num);
+ obd_timeout = max(lcfg->lcfg_num, 1U);
+ obd_timeout_set = 1;
+ GOTO(out, err = 0);
+ }
+ case LCFG_SET_LDLM_TIMEOUT: {
+ CDEBUG(D_IOCTL, "changing lustre ldlm_timeout from %d to %d\n",
+ ldlm_timeout, lcfg->lcfg_num);
+ ldlm_timeout = max(lcfg->lcfg_num, 1U);
+ if (ldlm_timeout >= obd_timeout)
+ ldlm_timeout = max(obd_timeout / 3, 1U);
+ ldlm_timeout_set = 1;
+ GOTO(out, err = 0);
+ }
+ case LCFG_SET_UPCALL: {
+ LCONSOLE_ERROR_MSG(0x15a, "recovery upcall is deprecated\n");
+ /* COMPAT_146 Don't fail on old configs */
+ GOTO(out, err = 0);
+ }
+ case LCFG_MARKER: {
+ struct cfg_marker *marker;
+ marker = lustre_cfg_buf(lcfg, 1);
+ CDEBUG(D_IOCTL, "marker %d (%#x) %.16s %s\n", marker->cm_step,
+ marker->cm_flags, marker->cm_tgtname, marker->cm_comment);
+ GOTO(out, err = 0);
+ }
+ case LCFG_PARAM: {
+ char *tmp;
+ /* llite has no obd */
+ if ((class_match_param(lustre_cfg_string(lcfg, 1),
+ PARAM_LLITE, 0) == 0) &&
+ client_process_config) {
+ err = (*client_process_config)(lcfg);
+ GOTO(out, err);
+ } else if ((class_match_param(lustre_cfg_string(lcfg, 1),
+ PARAM_SYS, &tmp) == 0)) {
+ /* Global param settings */
+ err = class_set_global(tmp, lcfg->lcfg_num, lcfg);
+ /*
+ * Client or server should not fail to mount if
+ * it hits an unknown configuration parameter.
+ */
+ if (err != 0)
+ CWARN("Ignoring unknown param %s\n", tmp);
+
+ GOTO(out, err = 0);
+ } else if ((class_match_param(lustre_cfg_string(lcfg, 1),
+ PARAM_QUOTA, &tmp) == 0) &&
+ quota_process_config) {
+ err = (*quota_process_config)(lcfg);
+ GOTO(out, err);
+ }
+ /* Fall through */
+ break;
+ }
+ }
+
+ /* Commands that require a device */
+ obd = class_name2obd(lustre_cfg_string(lcfg, 0));
+ if (obd == NULL) {
+ if (!LUSTRE_CFG_BUFLEN(lcfg, 0))
+ CERROR("this lcfg command requires a device name\n");
+ else
+ CERROR("no device for: %s\n",
+ lustre_cfg_string(lcfg, 0));
+
+ GOTO(out, err = -EINVAL);
+ }
+
+ switch(lcfg->lcfg_command) {
+ case LCFG_SETUP: {
+ err = class_setup(obd, lcfg);
+ GOTO(out, err);
+ }
+ case LCFG_DETACH: {
+ err = class_detach(obd, lcfg);
+ GOTO(out, err = 0);
+ }
+ case LCFG_CLEANUP: {
+ err = class_cleanup(obd, lcfg);
+ GOTO(out, err = 0);
+ }
+ case LCFG_ADD_CONN: {
+ err = class_add_conn(obd, lcfg);
+ GOTO(out, err = 0);
+ }
+ case LCFG_DEL_CONN: {
+ err = class_del_conn(obd, lcfg);
+ GOTO(out, err = 0);
+ }
+ case LCFG_POOL_NEW: {
+ err = obd_pool_new(obd, lustre_cfg_string(lcfg, 2));
+ GOTO(out, err = 0);
+ break;
+ }
+ case LCFG_POOL_ADD: {
+ err = obd_pool_add(obd, lustre_cfg_string(lcfg, 2),
+ lustre_cfg_string(lcfg, 3));
+ GOTO(out, err = 0);
+ break;
+ }
+ case LCFG_POOL_REM: {
+ err = obd_pool_rem(obd, lustre_cfg_string(lcfg, 2),
+ lustre_cfg_string(lcfg, 3));
+ GOTO(out, err = 0);
+ break;
+ }
+ case LCFG_POOL_DEL: {
+ err = obd_pool_del(obd, lustre_cfg_string(lcfg, 2));
+ GOTO(out, err = 0);
+ break;
+ }
+ default: {
+ err = obd_process_config(obd, sizeof(*lcfg), lcfg);
+ GOTO(out, err);
+
+ }
+ }
+out:
+ if ((err < 0) && !(lcfg->lcfg_command & LCFG_REQUIRED)) {
+ CWARN("Ignoring error %d on optional command %#x\n", err,
+ lcfg->lcfg_command);
+ err = 0;
+ }
+ return err;
+}
+EXPORT_SYMBOL(class_process_config);
+
+int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
+ struct lustre_cfg *lcfg, void *data)
+{
+ struct lprocfs_vars *var;
+ struct file fakefile;
+ struct seq_file fake_seqfile;
+ char *key, *sval;
+ int i, keylen, vallen;
+ int matched = 0, j = 0;
+ int rc = 0;
+ int skip = 0;
+ ENTRY;
+
+ if (lcfg->lcfg_command != LCFG_PARAM) {
+ CERROR("Unknown command: %d\n", lcfg->lcfg_command);
+ RETURN(-EINVAL);
+ }
+
+ /* fake a seq file so that var->fops->write can work... */
+ fakefile.private_data = &fake_seqfile;
+ fake_seqfile.private = data;
+ /* e.g. tunefs.lustre --param mdt.group_upcall=foo /r/tmp/lustre-mdt
+ or lctl conf_param lustre-MDT0000.mdt.group_upcall=bar
+ or lctl conf_param lustre-OST0000.osc.max_dirty_mb=36 */
+ for (i = 1; i < lcfg->lcfg_bufcount; i++) {
+ key = lustre_cfg_buf(lcfg, i);
+ /* Strip off prefix */
+ class_match_param(key, prefix, &key);
+ sval = strchr(key, '=');
+ if (!sval || (*(sval + 1) == 0)) {
+ CERROR("Can't parse param %s (missing '=')\n", key);
+ /* rc = -EINVAL; continue parsing other params */
+ continue;
+ }
+ keylen = sval - key;
+ sval++;
+ vallen = strlen(sval);
+ matched = 0;
+ j = 0;
+ /* Search proc entries */
+ while (lvars[j].name) {
+ var = &lvars[j];
+ if (class_match_param(key, (char *)var->name, 0) == 0 &&
+ keylen == strlen(var->name)) {
+ matched++;
+ rc = -EROFS;
+ if (var->fops && var->fops->write) {
+ mm_segment_t oldfs;
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ rc = (var->fops->write)(&fakefile, sval,
+ vallen, NULL);
+ set_fs(oldfs);
+ }
+ break;
+ }
+ j++;
+ }
+ if (!matched) {
+ /* If the prefix doesn't match, return error so we
+ can pass it down the stack */
+ if (strnchr(key, keylen, '.'))
+ RETURN(-ENOSYS);
+ CERROR("%s: unknown param %s\n",
+ (char *)lustre_cfg_string(lcfg, 0), key);
+ /* rc = -EINVAL; continue parsing other params */
+ skip++;
+ } else if (rc < 0) {
+ CERROR("writing proc entry %s err %d\n",
+ var->name, rc);
+ rc = 0;
+ } else {
+ CDEBUG(D_CONFIG, "%s.%.*s: Set parameter %.*s=%s\n",
+ lustre_cfg_string(lcfg, 0),
+ (int)strlen(prefix) - 1, prefix,
+ (int)(sval - key - 1), key, sval);
+ }
+ }
+
+ if (rc > 0)
+ rc = 0;
+ if (!rc && skip)
+ rc = skip;
+ RETURN(rc);
+}
+EXPORT_SYMBOL(class_process_proc_param);
+
+extern int lustre_check_exclusion(struct super_block *sb, char *svname);
+
+/** Parse a configuration llog, doing various manipulations on them
+ * for various reasons, (modifications for compatibility, skip obsolete
+ * records, change uuids, etc), then class_process_config() resulting
+ * net records.
+ */
+int class_config_llog_handler(const struct lu_env *env,
+ struct llog_handle *handle,
+ struct llog_rec_hdr *rec, void *data)
+{
+ struct config_llog_instance *clli = data;
+ int cfg_len = rec->lrh_len;
+ char *cfg_buf = (char*) (rec + 1);
+ int rc = 0;
+ ENTRY;
+
+ //class_config_dump_handler(handle, rec, data);
+
+ switch (rec->lrh_type) {
+ case OBD_CFG_REC: {
+ struct lustre_cfg *lcfg, *lcfg_new;
+ struct lustre_cfg_bufs bufs;
+ char *inst_name = NULL;
+ int inst_len = 0;
+ int inst = 0, swab = 0;
+
+ lcfg = (struct lustre_cfg *)cfg_buf;
+ if (lcfg->lcfg_version == __swab32(LUSTRE_CFG_VERSION)) {
+ lustre_swab_lustre_cfg(lcfg);
+ swab = 1;
+ }
+
+ rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
+ if (rc)
+ GOTO(out, rc);
+
+ /* Figure out config state info */
+ if (lcfg->lcfg_command == LCFG_MARKER) {
+ struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
+ lustre_swab_cfg_marker(marker, swab,
+ LUSTRE_CFG_BUFLEN(lcfg, 1));
+ CDEBUG(D_CONFIG, "Marker, inst_flg=%#x mark_flg=%#x\n",
+ clli->cfg_flags, marker->cm_flags);
+ if (marker->cm_flags & CM_START) {
+ /* all previous flags off */
+ clli->cfg_flags = CFG_F_MARKER;
+ if (marker->cm_flags & CM_SKIP) {
+ clli->cfg_flags |= CFG_F_SKIP;
+ CDEBUG(D_CONFIG, "SKIP #%d\n",
+ marker->cm_step);
+ } else if ((marker->cm_flags & CM_EXCLUDE) ||
+ (clli->cfg_sb &&
+ lustre_check_exclusion(clli->cfg_sb,
+ marker->cm_tgtname))) {
+ clli->cfg_flags |= CFG_F_EXCLUDE;
+ CDEBUG(D_CONFIG, "EXCLUDE %d\n",
+ marker->cm_step);
+ }
+ } else if (marker->cm_flags & CM_END) {
+ clli->cfg_flags = 0;
+ }
+ }
+ /* A config command without a start marker before it is
+ illegal (post 146) */
+ if (!(clli->cfg_flags & CFG_F_COMPAT146) &&
+ !(clli->cfg_flags & CFG_F_MARKER) &&
+ (lcfg->lcfg_command != LCFG_MARKER)) {
+ CWARN("Config not inside markers, ignoring! "
+ "(inst: %p, uuid: %s, flags: %#x)\n",
+ clli->cfg_instance,
+ clli->cfg_uuid.uuid, clli->cfg_flags);
+ clli->cfg_flags |= CFG_F_SKIP;
+ }
+ if (clli->cfg_flags & CFG_F_SKIP) {
+ CDEBUG(D_CONFIG, "skipping %#x\n",
+ clli->cfg_flags);
+ rc = 0;
+ /* No processing! */
+ break;
+ }
+
+ /*
+ * For interoperability between 1.8 and 2.0,
+ * rename "mds" obd device type to "mdt".
+ */
+ {
+ char *typename = lustre_cfg_string(lcfg, 1);
+ char *index = lustre_cfg_string(lcfg, 2);
+
+ if ((lcfg->lcfg_command == LCFG_ATTACH && typename &&
+ strcmp(typename, "mds") == 0)) {
+ CWARN("For 1.8 interoperability, rename obd "
+ "type from mds to mdt\n");
+ typename[2] = 't';
+ }
+ if ((lcfg->lcfg_command == LCFG_SETUP && index &&
+ strcmp(index, "type") == 0)) {
+ CDEBUG(D_INFO, "For 1.8 interoperability, "
+ "set this index to '0'\n");
+ index[0] = '0';
+ index[1] = 0;
+ }
+ }
+
+
+ if ((clli->cfg_flags & CFG_F_EXCLUDE) &&
+ (lcfg->lcfg_command == LCFG_LOV_ADD_OBD))
+ /* Add inactive instead */
+ lcfg->lcfg_command = LCFG_LOV_ADD_INA;
+
+ lustre_cfg_bufs_init(&bufs, lcfg);
+
+ if (clli && clli->cfg_instance &&
+ LUSTRE_CFG_BUFLEN(lcfg, 0) > 0){
+ inst = 1;
+ inst_len = LUSTRE_CFG_BUFLEN(lcfg, 0) +
+ sizeof(clli->cfg_instance) * 2 + 4;
+ OBD_ALLOC(inst_name, inst_len);
+ if (inst_name == NULL)
+ GOTO(out, rc = -ENOMEM);
+ sprintf(inst_name, "%s-%p",
+ lustre_cfg_string(lcfg, 0),
+ clli->cfg_instance);
+ lustre_cfg_bufs_set_string(&bufs, 0, inst_name);
+ CDEBUG(D_CONFIG, "cmd %x, instance name: %s\n",
+ lcfg->lcfg_command, inst_name);
+ }
+
+ /* we override the llog's uuid for clients, to insure they
+ are unique */
+ if (clli && clli->cfg_instance != NULL &&
+ lcfg->lcfg_command == LCFG_ATTACH) {
+ lustre_cfg_bufs_set_string(&bufs, 2,
+ clli->cfg_uuid.uuid);
+ }
+ /*
+ * sptlrpc config record, we expect 2 data segments:
+ * [0]: fs_name/target_name,
+ * [1]: rule string
+ * moving them to index [1] and [2], and insert MGC's
+ * obdname at index [0].
+ */
+ if (clli && clli->cfg_instance == NULL &&
+ lcfg->lcfg_command == LCFG_SPTLRPC_CONF) {
+ lustre_cfg_bufs_set(&bufs, 2, bufs.lcfg_buf[1],
+ bufs.lcfg_buflen[1]);
+ lustre_cfg_bufs_set(&bufs, 1, bufs.lcfg_buf[0],
+ bufs.lcfg_buflen[0]);
+ lustre_cfg_bufs_set_string(&bufs, 0,
+ clli->cfg_obdname);
+ }
+
+ lcfg_new = lustre_cfg_new(lcfg->lcfg_command, &bufs);
+
+ lcfg_new->lcfg_num = lcfg->lcfg_num;
+ lcfg_new->lcfg_flags = lcfg->lcfg_flags;
+
+ /* XXX Hack to try to remain binary compatible with
+ * pre-newconfig logs */
+ if (lcfg->lcfg_nal != 0 && /* pre-newconfig log? */
+ (lcfg->lcfg_nid >> 32) == 0) {
+ __u32 addr = (__u32)(lcfg->lcfg_nid & 0xffffffff);
+
+ lcfg_new->lcfg_nid =
+ LNET_MKNID(LNET_MKNET(lcfg->lcfg_nal, 0), addr);
+ CWARN("Converted pre-newconfig NAL %d NID %x to %s\n",
+ lcfg->lcfg_nal, addr,
+ libcfs_nid2str(lcfg_new->lcfg_nid));
+ } else {
+ lcfg_new->lcfg_nid = lcfg->lcfg_nid;
+ }
+
+ lcfg_new->lcfg_nal = 0; /* illegal value for obsolete field */
+
+ rc = class_process_config(lcfg_new);
+ lustre_cfg_free(lcfg_new);
+
+ if (inst)
+ OBD_FREE(inst_name, inst_len);
+ break;
+ }
+ default:
+ CERROR("Unknown llog record type %#x encountered\n",
+ rec->lrh_type);
+ break;
+ }
+out:
+ if (rc) {
+ CERROR("%s: cfg command failed: rc = %d\n",
+ handle->lgh_ctxt->loc_obd->obd_name, rc);
+ class_config_dump_handler(NULL, handle, rec, data);
+ }
+ RETURN(rc);
+}
+EXPORT_SYMBOL(class_config_llog_handler);
+
+int class_config_parse_llog(const struct lu_env *env, struct llog_ctxt *ctxt,
+ char *name, struct config_llog_instance *cfg)
+{
+ struct llog_process_cat_data cd = {0, 0};
+ struct llog_handle *llh;
+ llog_cb_t callback;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_INFO, "looking up llog %s\n", name);
+ rc = llog_open(env, ctxt, &llh, NULL, name, LLOG_OPEN_EXISTS);
+ if (rc)
+ RETURN(rc);
+
+ rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
+ if (rc)
+ GOTO(parse_out, rc);
+
+ /* continue processing from where we last stopped to end-of-log */
+ if (cfg) {
+ cd.lpcd_first_idx = cfg->cfg_last_idx;
+ callback = cfg->cfg_callback;
+ LASSERT(callback != NULL);
+ } else {
+ callback = class_config_llog_handler;
+ }
+
+ cd.lpcd_last_idx = 0;
+
+ rc = llog_process(env, llh, callback, cfg, &cd);
+
+ CDEBUG(D_CONFIG, "Processed log %s gen %d-%d (rc=%d)\n", name,
+ cd.lpcd_first_idx + 1, cd.lpcd_last_idx, rc);
+ if (cfg)
+ cfg->cfg_last_idx = cd.lpcd_last_idx;
+
+parse_out:
+ llog_close(env, llh);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(class_config_parse_llog);
+
+/**
+ * parse config record and output dump in supplied buffer.
+ * This is separated from class_config_dump_handler() to use
+ * for ioctl needs as well
+ */
+int class_config_parse_rec(struct llog_rec_hdr *rec, char *buf, int size)
+{
+ struct lustre_cfg *lcfg = (struct lustre_cfg *)(rec + 1);
+ char *ptr = buf;
+ char *end = buf + size;
+ int rc = 0;
+
+ ENTRY;
+
+ LASSERT(rec->lrh_type == OBD_CFG_REC);
+ rc = lustre_cfg_sanity_check(lcfg, rec->lrh_len);
+ if (rc < 0)
+ RETURN(rc);
+
+ ptr += snprintf(ptr, end-ptr, "cmd=%05x ", lcfg->lcfg_command);
+ if (lcfg->lcfg_flags)
+ ptr += snprintf(ptr, end-ptr, "flags=%#08x ",
+ lcfg->lcfg_flags);
+
+ if (lcfg->lcfg_num)
+ ptr += snprintf(ptr, end-ptr, "num=%#08x ", lcfg->lcfg_num);
+
+ if (lcfg->lcfg_nid)
+ ptr += snprintf(ptr, end-ptr, "nid=%s("LPX64")\n ",
+ libcfs_nid2str(lcfg->lcfg_nid),
+ lcfg->lcfg_nid);
+
+ if (lcfg->lcfg_command == LCFG_MARKER) {
+ struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
+
+ ptr += snprintf(ptr, end-ptr, "marker=%d(%#x)%s '%s'",
+ marker->cm_step, marker->cm_flags,
+ marker->cm_tgtname, marker->cm_comment);
+ } else {
+ int i;
+
+ for (i = 0; i < lcfg->lcfg_bufcount; i++) {
+ ptr += snprintf(ptr, end-ptr, "%d:%s ", i,
+ lustre_cfg_string(lcfg, i));
+ }
+ }
+ /* return consumed bytes */
+ rc = ptr - buf;
+ RETURN(rc);
+}
+
+int class_config_dump_handler(const struct lu_env *env,
+ struct llog_handle *handle,
+ struct llog_rec_hdr *rec, void *data)
+{
+ char *outstr;
+ int rc = 0;
+
+ ENTRY;
+
+ OBD_ALLOC(outstr, 256);
+ if (outstr == NULL)
+ RETURN(-ENOMEM);
+
+ if (rec->lrh_type == OBD_CFG_REC) {
+ class_config_parse_rec(rec, outstr, 256);
+ LCONSOLE(D_WARNING, " %s\n", outstr);
+ } else {
+ LCONSOLE(D_WARNING, "unhandled lrh_type: %#x\n", rec->lrh_type);
+ rc = -EINVAL;
+ }
+
+ OBD_FREE(outstr, 256);
+ RETURN(rc);
+}
+
+int class_config_dump_llog(const struct lu_env *env, struct llog_ctxt *ctxt,
+ char *name, struct config_llog_instance *cfg)
+{
+ struct llog_handle *llh;
+ int rc;
+
+ ENTRY;
+
+ LCONSOLE_INFO("Dumping config log %s\n", name);
+
+ rc = llog_open(env, ctxt, &llh, NULL, name, LLOG_OPEN_EXISTS);
+ if (rc)
+ RETURN(rc);
+
+ rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
+ if (rc)
+ GOTO(parse_out, rc);
+
+ rc = llog_process(env, llh, class_config_dump_handler, cfg, NULL);
+parse_out:
+ llog_close(env, llh);
+
+ LCONSOLE_INFO("End config log %s\n", name);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(class_config_dump_llog);
+
+/** Call class_cleanup and class_detach.
+ * "Manual" only in the sense that we're faking lcfg commands.
+ */
+int class_manual_cleanup(struct obd_device *obd)
+{
+ char flags[3] = "";
+ struct lustre_cfg *lcfg;
+ struct lustre_cfg_bufs bufs;
+ int rc;
+ ENTRY;
+
+ if (!obd) {
+ CERROR("empty cleanup\n");
+ RETURN(-EALREADY);
+ }
+
+ if (obd->obd_force)
+ strcat(flags, "F");
+ if (obd->obd_fail)
+ strcat(flags, "A");
+
+ CDEBUG(D_CONFIG, "Manual cleanup of %s (flags='%s')\n",
+ obd->obd_name, flags);
+
+ lustre_cfg_bufs_reset(&bufs, obd->obd_name);
+ lustre_cfg_bufs_set_string(&bufs, 1, flags);
+ lcfg = lustre_cfg_new(LCFG_CLEANUP, &bufs);
+ if (!lcfg)
+ RETURN(-ENOMEM);
+
+ rc = class_process_config(lcfg);
+ if (rc) {
+ CERROR("cleanup failed %d: %s\n", rc, obd->obd_name);
+ GOTO(out, rc);
+ }
+
+ /* the lcfg is almost the same for both ops */
+ lcfg->lcfg_command = LCFG_DETACH;
+ rc = class_process_config(lcfg);
+ if (rc)
+ CERROR("detach failed %d: %s\n", rc, obd->obd_name);
+out:
+ lustre_cfg_free(lcfg);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(class_manual_cleanup);
+
+/*
+ * uuid<->export lustre hash operations
+ */
+
+static unsigned
+uuid_hash(cfs_hash_t *hs, const void *key, unsigned mask)
+{
+ return cfs_hash_djb2_hash(((struct obd_uuid *)key)->uuid,
+ sizeof(((struct obd_uuid *)key)->uuid), mask);
+}
+
+static void *
+uuid_key(struct hlist_node *hnode)
+{
+ struct obd_export *exp;
+
+ exp = hlist_entry(hnode, struct obd_export, exp_uuid_hash);
+
+ return &exp->exp_client_uuid;
+}
+
+/*
+ * NOTE: It is impossible to find an export that is in failed
+ * state with this function
+ */
+static int
+uuid_keycmp(const void *key, struct hlist_node *hnode)
+{
+ struct obd_export *exp;
+
+ LASSERT(key);
+ exp = hlist_entry(hnode, struct obd_export, exp_uuid_hash);
+
+ return obd_uuid_equals(key, &exp->exp_client_uuid) &&
+ !exp->exp_failed;
+}
+
+static void *
+uuid_export_object(struct hlist_node *hnode)
+{
+ return hlist_entry(hnode, struct obd_export, exp_uuid_hash);
+}
+
+static void
+uuid_export_get(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct obd_export *exp;
+
+ exp = hlist_entry(hnode, struct obd_export, exp_uuid_hash);
+ class_export_get(exp);
+}
+
+static void
+uuid_export_put_locked(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct obd_export *exp;
+
+ exp = hlist_entry(hnode, struct obd_export, exp_uuid_hash);
+ class_export_put(exp);
+}
+
+static cfs_hash_ops_t uuid_hash_ops = {
+ .hs_hash = uuid_hash,
+ .hs_key = uuid_key,
+ .hs_keycmp = uuid_keycmp,
+ .hs_object = uuid_export_object,
+ .hs_get = uuid_export_get,
+ .hs_put_locked = uuid_export_put_locked,
+};
+
+
+/*
+ * nid<->export hash operations
+ */
+
+static unsigned
+nid_hash(cfs_hash_t *hs, const void *key, unsigned mask)
+{
+ return cfs_hash_djb2_hash(key, sizeof(lnet_nid_t), mask);
+}
+
+static void *
+nid_key(struct hlist_node *hnode)
+{
+ struct obd_export *exp;
+
+ exp = hlist_entry(hnode, struct obd_export, exp_nid_hash);
+
+ RETURN(&exp->exp_connection->c_peer.nid);
+}
+
+/*
+ * NOTE: It is impossible to find an export that is in failed
+ * state with this function
+ */
+static int
+nid_kepcmp(const void *key, struct hlist_node *hnode)
+{
+ struct obd_export *exp;
+
+ LASSERT(key);
+ exp = hlist_entry(hnode, struct obd_export, exp_nid_hash);
+
+ RETURN(exp->exp_connection->c_peer.nid == *(lnet_nid_t *)key &&
+ !exp->exp_failed);
+}
+
+static void *
+nid_export_object(struct hlist_node *hnode)
+{
+ return hlist_entry(hnode, struct obd_export, exp_nid_hash);
+}
+
+static void
+nid_export_get(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct obd_export *exp;
+
+ exp = hlist_entry(hnode, struct obd_export, exp_nid_hash);
+ class_export_get(exp);
+}
+
+static void
+nid_export_put_locked(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct obd_export *exp;
+
+ exp = hlist_entry(hnode, struct obd_export, exp_nid_hash);
+ class_export_put(exp);
+}
+
+static cfs_hash_ops_t nid_hash_ops = {
+ .hs_hash = nid_hash,
+ .hs_key = nid_key,
+ .hs_keycmp = nid_kepcmp,
+ .hs_object = nid_export_object,
+ .hs_get = nid_export_get,
+ .hs_put_locked = nid_export_put_locked,
+};
+
+
+/*
+ * nid<->nidstats hash operations
+ */
+
+static void *
+nidstats_key(struct hlist_node *hnode)
+{
+ struct nid_stat *ns;
+
+ ns = hlist_entry(hnode, struct nid_stat, nid_hash);
+
+ return &ns->nid;
+}
+
+static int
+nidstats_keycmp(const void *key, struct hlist_node *hnode)
+{
+ return *(lnet_nid_t *)nidstats_key(hnode) == *(lnet_nid_t *)key;
+}
+
+static void *
+nidstats_object(struct hlist_node *hnode)
+{
+ return hlist_entry(hnode, struct nid_stat, nid_hash);
+}
+
+static void
+nidstats_get(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct nid_stat *ns;
+
+ ns = hlist_entry(hnode, struct nid_stat, nid_hash);
+ nidstat_getref(ns);
+}
+
+static void
+nidstats_put_locked(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct nid_stat *ns;
+
+ ns = hlist_entry(hnode, struct nid_stat, nid_hash);
+ nidstat_putref(ns);
+}
+
+static cfs_hash_ops_t nid_stat_hash_ops = {
+ .hs_hash = nid_hash,
+ .hs_key = nidstats_key,
+ .hs_keycmp = nidstats_keycmp,
+ .hs_object = nidstats_object,
+ .hs_get = nidstats_get,
+ .hs_put_locked = nidstats_put_locked,
+};
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
new file mode 100644
index 000000000000..99adad9793c5
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
@@ -0,0 +1,1321 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/obd_mount.c
+ *
+ * Client mount routines
+ *
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ */
+
+
+#define DEBUG_SUBSYSTEM S_CLASS
+#define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
+#define PRINT_CMD CDEBUG
+
+#include <obd.h>
+#include <lvfs.h>
+#include <lustre_fsfilt.h>
+#include <obd_class.h>
+#include <lustre/lustre_user.h>
+#include <linux/version.h>
+#include <lustre_log.h>
+#include <lustre_disk.h>
+#include <lustre_param.h>
+
+static int (*client_fill_super)(struct super_block *sb,
+ struct vfsmount *mnt);
+
+static void (*kill_super_cb)(struct super_block *sb);
+
+/**************** config llog ********************/
+
+/** Get a config log from the MGS and process it.
+ * This func is called for both clients and servers.
+ * Continue to process new statements appended to the logs
+ * (whenever the config lock is revoked) until lustre_end_log
+ * is called.
+ * @param sb The superblock is used by the MGC to write to the local copy of
+ * the config log
+ * @param logname The name of the llog to replicate from the MGS
+ * @param cfg Since the same mgc may be used to follow multiple config logs
+ * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
+ * this log, and is added to the mgc's list of logs to follow.
+ */
+int lustre_process_log(struct super_block *sb, char *logname,
+ struct config_llog_instance *cfg)
+{
+ struct lustre_cfg *lcfg;
+ struct lustre_cfg_bufs *bufs;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct obd_device *mgc = lsi->lsi_mgc;
+ int rc;
+ ENTRY;
+
+ LASSERT(mgc);
+ LASSERT(cfg);
+
+ OBD_ALLOC_PTR(bufs);
+ if (bufs == NULL)
+ RETURN(-ENOMEM);
+
+ /* mgc_process_config */
+ lustre_cfg_bufs_reset(bufs, mgc->obd_name);
+ lustre_cfg_bufs_set_string(bufs, 1, logname);
+ lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
+ lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
+ lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
+ rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
+ lustre_cfg_free(lcfg);
+
+ OBD_FREE_PTR(bufs);
+
+ if (rc == -EINVAL)
+ LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
+ "failed from the MGS (%d). Make sure this "
+ "client and the MGS are running compatible "
+ "versions of Lustre.\n",
+ mgc->obd_name, logname, rc);
+
+ if (rc)
+ LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
+ "failed (%d). This may be the result of "
+ "communication errors between this node and "
+ "the MGS, a bad configuration, or other "
+ "errors. See the syslog for more "
+ "information.\n", mgc->obd_name, logname,
+ rc);
+
+ /* class_obd_list(); */
+ RETURN(rc);
+}
+EXPORT_SYMBOL(lustre_process_log);
+
+/* Stop watching this config log for updates */
+int lustre_end_log(struct super_block *sb, char *logname,
+ struct config_llog_instance *cfg)
+{
+ struct lustre_cfg *lcfg;
+ struct lustre_cfg_bufs bufs;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct obd_device *mgc = lsi->lsi_mgc;
+ int rc;
+ ENTRY;
+
+ if (!mgc)
+ RETURN(-ENOENT);
+
+ /* mgc_process_config */
+ lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
+ lustre_cfg_bufs_set_string(&bufs, 1, logname);
+ if (cfg)
+ lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
+ lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
+ rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
+ lustre_cfg_free(lcfg);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(lustre_end_log);
+
+/**************** obd start *******************/
+
+/** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
+ * lctl (and do for echo cli/srv.
+ */
+int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
+ char *s1, char *s2, char *s3, char *s4)
+{
+ struct lustre_cfg_bufs bufs;
+ struct lustre_cfg * lcfg = NULL;
+ int rc;
+
+ CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
+ cmd, s1, s2, s3, s4);
+
+ lustre_cfg_bufs_reset(&bufs, cfgname);
+ if (s1)
+ lustre_cfg_bufs_set_string(&bufs, 1, s1);
+ if (s2)
+ lustre_cfg_bufs_set_string(&bufs, 2, s2);
+ if (s3)
+ lustre_cfg_bufs_set_string(&bufs, 3, s3);
+ if (s4)
+ lustre_cfg_bufs_set_string(&bufs, 4, s4);
+
+ lcfg = lustre_cfg_new(cmd, &bufs);
+ lcfg->lcfg_nid = nid;
+ rc = class_process_config(lcfg);
+ lustre_cfg_free(lcfg);
+ return(rc);
+}
+EXPORT_SYMBOL(do_lcfg);
+
+/** Call class_attach and class_setup. These methods in turn call
+ * obd type-specific methods.
+ */
+int lustre_start_simple(char *obdname, char *type, char *uuid,
+ char *s1, char *s2, char *s3, char *s4)
+{
+ int rc;
+ CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
+
+ rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
+ if (rc) {
+ CERROR("%s attach error %d\n", obdname, rc);
+ return rc;
+ }
+ rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
+ if (rc) {
+ CERROR("%s setup error %d\n", obdname, rc);
+ do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
+ }
+ return rc;
+}
+
+DEFINE_MUTEX(mgc_start_lock);
+
+/** Set up a mgc obd to process startup logs
+ *
+ * \param sb [in] super block of the mgc obd
+ *
+ * \retval 0 success, otherwise error code
+ */
+int lustre_start_mgc(struct super_block *sb)
+{
+ struct obd_connect_data *data = NULL;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct obd_device *obd;
+ struct obd_export *exp;
+ struct obd_uuid *uuid;
+ class_uuid_t uuidc;
+ lnet_nid_t nid;
+ char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
+ char *ptr;
+ int recov_bk;
+ int rc = 0, i = 0, j, len;
+ ENTRY;
+
+ LASSERT(lsi->lsi_lmd);
+
+ /* Find the first non-lo MGS nid for our MGC name */
+ if (IS_SERVER(lsi)) {
+ /* mount -o mgsnode=nid */
+ ptr = lsi->lsi_lmd->lmd_mgs;
+ if (lsi->lsi_lmd->lmd_mgs &&
+ (class_parse_nid(lsi->lsi_lmd->lmd_mgs, &nid, &ptr) == 0)) {
+ i++;
+ } else if (IS_MGS(lsi)) {
+ lnet_process_id_t id;
+ while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
+ if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
+ continue;
+ nid = id.nid;
+ i++;
+ break;
+ }
+ }
+ } else { /* client */
+ /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
+ ptr = lsi->lsi_lmd->lmd_dev;
+ if (class_parse_nid(ptr, &nid, &ptr) == 0)
+ i++;
+ }
+ if (i == 0) {
+ CERROR("No valid MGS nids found.\n");
+ RETURN(-EINVAL);
+ }
+
+ mutex_lock(&mgc_start_lock);
+
+ len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
+ OBD_ALLOC(mgcname, len);
+ OBD_ALLOC(niduuid, len + 2);
+ if (!mgcname || !niduuid)
+ GOTO(out_free, rc = -ENOMEM);
+ sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
+
+ mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
+
+ OBD_ALLOC_PTR(data);
+ if (data == NULL)
+ GOTO(out_free, rc = -ENOMEM);
+
+ obd = class_name2obd(mgcname);
+ if (obd && !obd->obd_stopping) {
+ rc = obd_set_info_async(NULL, obd->obd_self_export,
+ strlen(KEY_MGSSEC), KEY_MGSSEC,
+ strlen(mgssec), mgssec, NULL);
+ if (rc)
+ GOTO(out_free, rc);
+
+ /* Re-using an existing MGC */
+ atomic_inc(&obd->u.cli.cl_mgc_refcount);
+
+ /* IR compatibility check, only for clients */
+ if (lmd_is_client(lsi->lsi_lmd)) {
+ int has_ir;
+ int vallen = sizeof(*data);
+ __u32 *flags = &lsi->lsi_lmd->lmd_flags;
+
+ rc = obd_get_info(NULL, obd->obd_self_export,
+ strlen(KEY_CONN_DATA), KEY_CONN_DATA,
+ &vallen, data, NULL);
+ LASSERT(rc == 0);
+ has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
+ if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
+ /* LMD_FLG_NOIR is for test purpose only */
+ LCONSOLE_WARN(
+ "Trying to mount a client with IR setting "
+ "not compatible with current mgc. "
+ "Force to use current mgc setting that is "
+ "IR %s.\n",
+ has_ir ? "enabled" : "disabled");
+ if (has_ir)
+ *flags &= ~LMD_FLG_NOIR;
+ else
+ *flags |= LMD_FLG_NOIR;
+ }
+ }
+
+ recov_bk = 0;
+ /* If we are restarting the MGS, don't try to keep the MGC's
+ old connection, or registration will fail. */
+ if (IS_MGS(lsi)) {
+ CDEBUG(D_MOUNT, "New MGS with live MGC\n");
+ recov_bk = 1;
+ }
+
+ /* Try all connections, but only once (again).
+ We don't want to block another target from starting
+ (using its local copy of the log), but we do want to connect
+ if at all possible. */
+ recov_bk++;
+ CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
+ rc = obd_set_info_async(NULL, obd->obd_self_export,
+ sizeof(KEY_INIT_RECOV_BACKUP),
+ KEY_INIT_RECOV_BACKUP,
+ sizeof(recov_bk), &recov_bk, NULL);
+ GOTO(out, rc = 0);
+ }
+
+ CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
+
+ /* Add the primary nids for the MGS */
+ i = 0;
+ sprintf(niduuid, "%s_%x", mgcname, i);
+ if (IS_SERVER(lsi)) {
+ ptr = lsi->lsi_lmd->lmd_mgs;
+ if (IS_MGS(lsi)) {
+ /* Use local nids (including LO) */
+ lnet_process_id_t id;
+ while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
+ rc = do_lcfg(mgcname, id.nid,
+ LCFG_ADD_UUID, niduuid, 0,0,0);
+ }
+ } else {
+ /* Use mgsnode= nids */
+ /* mount -o mgsnode=nid */
+ if (lsi->lsi_lmd->lmd_mgs) {
+ ptr = lsi->lsi_lmd->lmd_mgs;
+ } else if (class_find_param(ptr, PARAM_MGSNODE,
+ &ptr) != 0) {
+ CERROR("No MGS nids given.\n");
+ GOTO(out_free, rc = -EINVAL);
+ }
+ while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+ rc = do_lcfg(mgcname, nid,
+ LCFG_ADD_UUID, niduuid, 0,0,0);
+ i++;
+ }
+ }
+ } else { /* client */
+ /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
+ ptr = lsi->lsi_lmd->lmd_dev;
+ while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+ rc = do_lcfg(mgcname, nid,
+ LCFG_ADD_UUID, niduuid, 0,0,0);
+ i++;
+ /* Stop at the first failover nid */
+ if (*ptr == ':')
+ break;
+ }
+ }
+ if (i == 0) {
+ CERROR("No valid MGS nids found.\n");
+ GOTO(out_free, rc = -EINVAL);
+ }
+ lsi->lsi_lmd->lmd_mgs_failnodes = 1;
+
+ /* Random uuid for MGC allows easier reconnects */
+ OBD_ALLOC_PTR(uuid);
+ ll_generate_random_uuid(uuidc);
+ class_uuid_unparse(uuidc, uuid);
+
+ /* Start the MGC */
+ rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
+ (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
+ niduuid, 0, 0);
+ OBD_FREE_PTR(uuid);
+ if (rc)
+ GOTO(out_free, rc);
+
+ /* Add any failover MGS nids */
+ i = 1;
+ while (ptr && ((*ptr == ':' ||
+ class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
+ /* New failover node */
+ sprintf(niduuid, "%s_%x", mgcname, i);
+ j = 0;
+ while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
+ j++;
+ rc = do_lcfg(mgcname, nid,
+ LCFG_ADD_UUID, niduuid, 0,0,0);
+ if (*ptr == ':')
+ break;
+ }
+ if (j > 0) {
+ rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
+ niduuid, 0, 0, 0);
+ i++;
+ } else {
+ /* at ":/fsname" */
+ break;
+ }
+ }
+ lsi->lsi_lmd->lmd_mgs_failnodes = i;
+
+ obd = class_name2obd(mgcname);
+ if (!obd) {
+ CERROR("Can't find mgcobd %s\n", mgcname);
+ GOTO(out_free, rc = -ENOTCONN);
+ }
+
+ rc = obd_set_info_async(NULL, obd->obd_self_export,
+ strlen(KEY_MGSSEC), KEY_MGSSEC,
+ strlen(mgssec), mgssec, NULL);
+ if (rc)
+ GOTO(out_free, rc);
+
+ /* Keep a refcount of servers/clients who started with "mount",
+ so we know when we can get rid of the mgc. */
+ atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
+
+ /* Try all connections, but only once. */
+ recov_bk = 1;
+ rc = obd_set_info_async(NULL, obd->obd_self_export,
+ sizeof(KEY_INIT_RECOV_BACKUP),
+ KEY_INIT_RECOV_BACKUP,
+ sizeof(recov_bk), &recov_bk, NULL);
+ if (rc)
+ /* nonfatal */
+ CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
+
+ /* We connect to the MGS at setup, and don't disconnect until cleanup */
+ data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
+ OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
+ OBD_CONNECT_LVB_TYPE;
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
+ data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
+#else
+#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
+#endif
+
+ if (lmd_is_client(lsi->lsi_lmd) &&
+ lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
+ data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
+ data->ocd_version = LUSTRE_VERSION_CODE;
+ rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
+ if (rc) {
+ CERROR("connect failed %d\n", rc);
+ GOTO(out, rc);
+ }
+
+ obd->u.cli.cl_mgc_mgsexp = exp;
+
+out:
+ /* Keep the mgc info in the sb. Note that many lsi's can point
+ to the same mgc.*/
+ lsi->lsi_mgc = obd;
+out_free:
+ mutex_unlock(&mgc_start_lock);
+
+ if (data)
+ OBD_FREE_PTR(data);
+ if (mgcname)
+ OBD_FREE(mgcname, len);
+ if (niduuid)
+ OBD_FREE(niduuid, len + 2);
+ RETURN(rc);
+}
+
+static int lustre_stop_mgc(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct obd_device *obd;
+ char *niduuid = 0, *ptr = 0;
+ int i, rc = 0, len = 0;
+ ENTRY;
+
+ if (!lsi)
+ RETURN(-ENOENT);
+ obd = lsi->lsi_mgc;
+ if (!obd)
+ RETURN(-ENOENT);
+ lsi->lsi_mgc = NULL;
+
+ mutex_lock(&mgc_start_lock);
+ LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
+ if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
+ /* This is not fatal, every client that stops
+ will call in here. */
+ CDEBUG(D_MOUNT, "mgc still has %d references.\n",
+ atomic_read(&obd->u.cli.cl_mgc_refcount));
+ GOTO(out, rc = -EBUSY);
+ }
+
+ /* The MGC has no recoverable data in any case.
+ * force shotdown set in umount_begin */
+ obd->obd_no_recov = 1;
+
+ if (obd->u.cli.cl_mgc_mgsexp) {
+ /* An error is not fatal, if we are unable to send the
+ disconnect mgs ping evictor cleans up the export */
+ rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
+ if (rc)
+ CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
+ }
+
+ /* Save the obdname for cleaning the nid uuids, which are
+ obdname_XX */
+ len = strlen(obd->obd_name) + 6;
+ OBD_ALLOC(niduuid, len);
+ if (niduuid) {
+ strcpy(niduuid, obd->obd_name);
+ ptr = niduuid + strlen(niduuid);
+ }
+
+ rc = class_manual_cleanup(obd);
+ if (rc)
+ GOTO(out, rc);
+
+ /* Clean the nid uuids */
+ if (!niduuid)
+ GOTO(out, rc = -ENOMEM);
+
+ for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
+ sprintf(ptr, "_%x", i);
+ rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
+ niduuid, 0, 0, 0);
+ if (rc)
+ CERROR("del MDC UUID %s failed: rc = %d\n",
+ niduuid, rc);
+ }
+out:
+ if (niduuid)
+ OBD_FREE(niduuid, len);
+
+ /* class_import_put will get rid of the additional connections */
+ mutex_unlock(&mgc_start_lock);
+ RETURN(rc);
+}
+
+/***************** lustre superblock **************/
+
+struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi;
+ ENTRY;
+
+ OBD_ALLOC_PTR(lsi);
+ if (!lsi)
+ RETURN(NULL);
+ OBD_ALLOC_PTR(lsi->lsi_lmd);
+ if (!lsi->lsi_lmd) {
+ OBD_FREE_PTR(lsi);
+ RETURN(NULL);
+ }
+
+ lsi->lsi_lmd->lmd_exclude_count = 0;
+ lsi->lsi_lmd->lmd_recovery_time_soft = 0;
+ lsi->lsi_lmd->lmd_recovery_time_hard = 0;
+ s2lsi_nocast(sb) = lsi;
+ /* we take 1 extra ref for our setup */
+ atomic_set(&lsi->lsi_mounts, 1);
+
+ /* Default umount style */
+ lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
+
+ RETURN(lsi);
+}
+
+static int lustre_free_lsi(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ ENTRY;
+
+ LASSERT(lsi != NULL);
+ CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
+
+ /* someone didn't call server_put_mount. */
+ LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
+
+ if (lsi->lsi_lmd != NULL) {
+ if (lsi->lsi_lmd->lmd_dev != NULL)
+ OBD_FREE(lsi->lsi_lmd->lmd_dev,
+ strlen(lsi->lsi_lmd->lmd_dev) + 1);
+ if (lsi->lsi_lmd->lmd_profile != NULL)
+ OBD_FREE(lsi->lsi_lmd->lmd_profile,
+ strlen(lsi->lsi_lmd->lmd_profile) + 1);
+ if (lsi->lsi_lmd->lmd_mgssec != NULL)
+ OBD_FREE(lsi->lsi_lmd->lmd_mgssec,
+ strlen(lsi->lsi_lmd->lmd_mgssec) + 1);
+ if (lsi->lsi_lmd->lmd_opts != NULL)
+ OBD_FREE(lsi->lsi_lmd->lmd_opts,
+ strlen(lsi->lsi_lmd->lmd_opts) + 1);
+ if (lsi->lsi_lmd->lmd_exclude_count)
+ OBD_FREE(lsi->lsi_lmd->lmd_exclude,
+ sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
+ lsi->lsi_lmd->lmd_exclude_count);
+ if (lsi->lsi_lmd->lmd_mgs != NULL)
+ OBD_FREE(lsi->lsi_lmd->lmd_mgs,
+ strlen(lsi->lsi_lmd->lmd_mgs) + 1);
+ if (lsi->lsi_lmd->lmd_osd_type != NULL)
+ OBD_FREE(lsi->lsi_lmd->lmd_osd_type,
+ strlen(lsi->lsi_lmd->lmd_osd_type) + 1);
+ if (lsi->lsi_lmd->lmd_params != NULL)
+ OBD_FREE(lsi->lsi_lmd->lmd_params, 4096);
+
+ OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
+ }
+
+ LASSERT(lsi->lsi_llsbi == NULL);
+ OBD_FREE(lsi, sizeof(*lsi));
+ s2lsi_nocast(sb) = NULL;
+
+ RETURN(0);
+}
+
+/* The lsi has one reference for every server that is using the disk -
+ e.g. MDT, MGS, and potentially MGC */
+int lustre_put_lsi(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ ENTRY;
+
+ LASSERT(lsi != NULL);
+
+ CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
+ if (atomic_dec_and_test(&lsi->lsi_mounts)) {
+ if (IS_SERVER(lsi) && lsi->lsi_osd_exp) {
+ obd_disconnect(lsi->lsi_osd_exp);
+ /* wait till OSD is gone */
+ obd_zombie_barrier();
+ }
+ lustre_free_lsi(sb);
+ RETURN(1);
+ }
+ RETURN(0);
+}
+
+/** Get the fsname ("lustre") from the server name ("lustre-OST003F").
+ * @param [in] svname server name including type and index
+ * @param [out] fsname Buffer to copy filesystem name prefix into.
+ * Must have at least 'strlen(fsname) + 1' chars.
+ * @param [out] endptr if endptr isn't NULL it is set to end of fsname
+ * rc < 0 on error
+ */
+int server_name2fsname(const char *svname, char *fsname, const char **endptr)
+{
+ const char *dash = strrchr(svname, '-');
+ if (!dash) {
+ dash = strrchr(svname, ':');
+ if (!dash)
+ return -EINVAL;
+ }
+
+ /* interpret <fsname>-MDTXXXXX-mdc as mdt, the better way is to pass
+ * in the fsname, then determine the server index */
+ if (!strcmp(LUSTRE_MDC_NAME, dash + 1)) {
+ dash--;
+ for (; dash > svname && *dash != '-' && *dash != ':'; dash--)
+ ;
+ if (dash == svname)
+ return -EINVAL;
+ }
+
+ if (fsname != NULL) {
+ strncpy(fsname, svname, dash - svname);
+ fsname[dash - svname] = '\0';
+ }
+
+ if (endptr != NULL)
+ *endptr = dash;
+
+ return 0;
+}
+EXPORT_SYMBOL(server_name2fsname);
+
+/**
+ * Get service name (svname) from string
+ * rc < 0 on error
+ * if endptr isn't NULL it is set to end of fsname *
+ */
+int server_name2svname(const char *label, char *svname, const char **endptr,
+ size_t svsize)
+{
+ int rc;
+ const const char *dash;
+
+ /* We use server_name2fsname() just for parsing */
+ rc = server_name2fsname(label, NULL, &dash);
+ if (rc != 0)
+ return rc;
+
+ if (*dash != '-')
+ return -1;
+
+ if (strlcpy(svname, dash + 1, svsize) >= svsize)
+ return -E2BIG;
+
+ return 0;
+}
+EXPORT_SYMBOL(server_name2svname);
+
+
+/* Get the index from the obd name.
+ rc = server type, or
+ rc < 0 on error
+ if endptr isn't NULL it is set to end of name */
+int server_name2index(const char *svname, __u32 *idx, const char **endptr)
+{
+ unsigned long index;
+ int rc;
+ const char *dash;
+
+ /* We use server_name2fsname() just for parsing */
+ rc = server_name2fsname(svname, NULL, &dash);
+ if (rc != 0)
+ return rc;
+
+ if (*dash != '-')
+ return -EINVAL;
+
+ dash++;
+
+ if (strncmp(dash, "MDT", 3) == 0)
+ rc = LDD_F_SV_TYPE_MDT;
+ else if (strncmp(dash, "OST", 3) == 0)
+ rc = LDD_F_SV_TYPE_OST;
+ else
+ return -EINVAL;
+
+ dash += 3;
+
+ if (strcmp(dash, "all") == 0)
+ return rc | LDD_F_SV_ALL;
+
+ index = simple_strtoul(dash, (char **)endptr, 16);
+ *idx = index;
+
+ return rc;
+}
+EXPORT_SYMBOL(server_name2index);
+
+/*************** mount common betweeen server and client ***************/
+
+/* Common umount */
+int lustre_common_put_super(struct super_block *sb)
+{
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
+
+ /* Drop a ref to the MGC */
+ rc = lustre_stop_mgc(sb);
+ if (rc && (rc != -ENOENT)) {
+ if (rc != -EBUSY) {
+ CERROR("Can't stop MGC: %d\n", rc);
+ RETURN(rc);
+ }
+ /* BUSY just means that there's some other obd that
+ needs the mgc. Let him clean it up. */
+ CDEBUG(D_MOUNT, "MGC still in use\n");
+ }
+ /* Drop a ref to the mounted disk */
+ lustre_put_lsi(sb);
+ lu_types_stop();
+ RETURN(rc);
+}
+EXPORT_SYMBOL(lustre_common_put_super);
+
+static void lmd_print(struct lustre_mount_data *lmd)
+{
+ int i;
+
+ PRINT_CMD(D_MOUNT, " mount data:\n");
+ if (lmd_is_client(lmd))
+ PRINT_CMD(D_MOUNT, "profile: %s\n", lmd->lmd_profile);
+ PRINT_CMD(D_MOUNT, "device: %s\n", lmd->lmd_dev);
+ PRINT_CMD(D_MOUNT, "flags: %x\n", lmd->lmd_flags);
+
+ if (lmd->lmd_opts)
+ PRINT_CMD(D_MOUNT, "options: %s\n", lmd->lmd_opts);
+
+ if (lmd->lmd_recovery_time_soft)
+ PRINT_CMD(D_MOUNT, "recovery time soft: %d\n",
+ lmd->lmd_recovery_time_soft);
+
+ if (lmd->lmd_recovery_time_hard)
+ PRINT_CMD(D_MOUNT, "recovery time hard: %d\n",
+ lmd->lmd_recovery_time_hard);
+
+ for (i = 0; i < lmd->lmd_exclude_count; i++) {
+ PRINT_CMD(D_MOUNT, "exclude %d: OST%04x\n", i,
+ lmd->lmd_exclude[i]);
+ }
+}
+
+/* Is this server on the exclusion list */
+int lustre_check_exclusion(struct super_block *sb, char *svname)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct lustre_mount_data *lmd = lsi->lsi_lmd;
+ __u32 index;
+ int i, rc;
+ ENTRY;
+
+ rc = server_name2index(svname, &index, NULL);
+ if (rc != LDD_F_SV_TYPE_OST)
+ /* Only exclude OSTs */
+ RETURN(0);
+
+ CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
+ index, lmd->lmd_exclude_count, lmd->lmd_dev);
+
+ for(i = 0; i < lmd->lmd_exclude_count; i++) {
+ if (index == lmd->lmd_exclude[i]) {
+ CWARN("Excluding %s (on exclusion list)\n", svname);
+ RETURN(1);
+ }
+ }
+ RETURN(0);
+}
+
+/* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
+static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
+{
+ const char *s1 = ptr, *s2;
+ __u32 index, *exclude_list;
+ int rc = 0, devmax;
+ ENTRY;
+
+ /* The shortest an ost name can be is 8 chars: -OST0000.
+ We don't actually know the fsname at this time, so in fact
+ a user could specify any fsname. */
+ devmax = strlen(ptr) / 8 + 1;
+
+ /* temp storage until we figure out how many we have */
+ OBD_ALLOC(exclude_list, sizeof(index) * devmax);
+ if (!exclude_list)
+ RETURN(-ENOMEM);
+
+ /* we enter this fn pointing at the '=' */
+ while (*s1 && *s1 != ' ' && *s1 != ',') {
+ s1++;
+ rc = server_name2index(s1, &index, &s2);
+ if (rc < 0) {
+ CERROR("Can't parse server name '%s'\n", s1);
+ break;
+ }
+ if (rc == LDD_F_SV_TYPE_OST)
+ exclude_list[lmd->lmd_exclude_count++] = index;
+ else
+ CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1);
+ s1 = s2;
+ /* now we are pointing at ':' (next exclude)
+ or ',' (end of excludes) */
+ if (lmd->lmd_exclude_count >= devmax)
+ break;
+ }
+ if (rc >= 0) /* non-err */
+ rc = 0;
+
+ if (lmd->lmd_exclude_count) {
+ /* permanent, freed in lustre_free_lsi */
+ OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
+ lmd->lmd_exclude_count);
+ if (lmd->lmd_exclude) {
+ memcpy(lmd->lmd_exclude, exclude_list,
+ sizeof(index) * lmd->lmd_exclude_count);
+ } else {
+ rc = -ENOMEM;
+ lmd->lmd_exclude_count = 0;
+ }
+ }
+ OBD_FREE(exclude_list, sizeof(index) * devmax);
+ RETURN(rc);
+}
+
+static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
+{
+ char *tail;
+ int length;
+
+ if (lmd->lmd_mgssec != NULL) {
+ OBD_FREE(lmd->lmd_mgssec, strlen(lmd->lmd_mgssec) + 1);
+ lmd->lmd_mgssec = NULL;
+ }
+
+ tail = strchr(ptr, ',');
+ if (tail == NULL)
+ length = strlen(ptr);
+ else
+ length = tail - ptr;
+
+ OBD_ALLOC(lmd->lmd_mgssec, length + 1);
+ if (lmd->lmd_mgssec == NULL)
+ return -ENOMEM;
+
+ memcpy(lmd->lmd_mgssec, ptr, length);
+ lmd->lmd_mgssec[length] = '\0';
+ return 0;
+}
+
+static int lmd_parse_string(char **handle, char *ptr)
+{
+ char *tail;
+ int length;
+
+ if ((handle == NULL) || (ptr == NULL))
+ return -EINVAL;
+
+ if (*handle != NULL) {
+ OBD_FREE(*handle, strlen(*handle) + 1);
+ *handle = NULL;
+ }
+
+ tail = strchr(ptr, ',');
+ if (tail == NULL)
+ length = strlen(ptr);
+ else
+ length = tail - ptr;
+
+ OBD_ALLOC(*handle, length + 1);
+ if (*handle == NULL)
+ return -ENOMEM;
+
+ memcpy(*handle, ptr, length);
+ (*handle)[length] = '\0';
+
+ return 0;
+}
+
+/* Collect multiple values for mgsnid specifiers */
+static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
+{
+ lnet_nid_t nid;
+ char *tail = *ptr;
+ char *mgsnid;
+ int length;
+ int oldlen = 0;
+
+ /* Find end of nidlist */
+ while (class_parse_nid_quiet(tail, &nid, &tail) == 0) {}
+ length = tail - *ptr;
+ if (length == 0) {
+ LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr);
+ return -EINVAL;
+ }
+
+ if (lmd->lmd_mgs != NULL)
+ oldlen = strlen(lmd->lmd_mgs) + 1;
+
+ OBD_ALLOC(mgsnid, oldlen + length + 1);
+ if (mgsnid == NULL)
+ return -ENOMEM;
+
+ if (lmd->lmd_mgs != NULL) {
+ /* Multiple mgsnid= are taken to mean failover locations */
+ memcpy(mgsnid, lmd->lmd_mgs, oldlen);
+ mgsnid[oldlen - 1] = ':';
+ OBD_FREE(lmd->lmd_mgs, oldlen);
+ }
+ memcpy(mgsnid + oldlen, *ptr, length);
+ mgsnid[oldlen + length] = '\0';
+ lmd->lmd_mgs = mgsnid;
+ *ptr = tail;
+
+ return 0;
+}
+
+/** Parse mount line options
+ * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
+ * dev is passed as device=uml1:/lustre by mount.lustre
+ */
+static int lmd_parse(char *options, struct lustre_mount_data *lmd)
+{
+ char *s1, *s2, *devname = NULL;
+ struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(lmd);
+ if (!options) {
+ LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
+ "/sbin/mount.lustre is installed.\n");
+ RETURN(-EINVAL);
+ }
+
+ /* Options should be a string - try to detect old lmd data */
+ if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
+ LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
+ "/sbin/mount.lustre. Please install "
+ "version %s\n", LUSTRE_VERSION_STRING);
+ RETURN(-EINVAL);
+ }
+ lmd->lmd_magic = LMD_MAGIC;
+
+ OBD_ALLOC(lmd->lmd_params, 4096);
+ if (lmd->lmd_params == NULL)
+ RETURN(-ENOMEM);
+ lmd->lmd_params[0] = '\0';
+
+ /* Set default flags here */
+
+ s1 = options;
+ while (*s1) {
+ int clear = 0;
+ int time_min = OBD_RECOVERY_TIME_MIN;
+
+ /* Skip whitespace and extra commas */
+ while (*s1 == ' ' || *s1 == ',')
+ s1++;
+
+ /* Client options are parsed in ll_options: eg. flock,
+ user_xattr, acl */
+
+ /* Parse non-ldiskfs options here. Rather than modifying
+ ldiskfs, we just zero these out here */
+ if (strncmp(s1, "abort_recov", 11) == 0) {
+ lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
+ clear++;
+ } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
+ lmd->lmd_recovery_time_soft = max_t(int,
+ simple_strtoul(s1 + 19, NULL, 10), time_min);
+ clear++;
+ } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
+ lmd->lmd_recovery_time_hard = max_t(int,
+ simple_strtoul(s1 + 19, NULL, 10), time_min);
+ clear++;
+ } else if (strncmp(s1, "noir", 4) == 0) {
+ lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
+ clear++;
+ } else if (strncmp(s1, "nosvc", 5) == 0) {
+ lmd->lmd_flags |= LMD_FLG_NOSVC;
+ clear++;
+ } else if (strncmp(s1, "nomgs", 5) == 0) {
+ lmd->lmd_flags |= LMD_FLG_NOMGS;
+ clear++;
+ } else if (strncmp(s1, "noscrub", 7) == 0) {
+ lmd->lmd_flags |= LMD_FLG_NOSCRUB;
+ clear++;
+ } else if (strncmp(s1, PARAM_MGSNODE,
+ sizeof(PARAM_MGSNODE) - 1) == 0) {
+ s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
+ /* Assume the next mount opt is the first
+ invalid nid we get to. */
+ rc = lmd_parse_mgs(lmd, &s2);
+ if (rc)
+ goto invalid;
+ clear++;
+ } else if (strncmp(s1, "writeconf", 9) == 0) {
+ lmd->lmd_flags |= LMD_FLG_WRITECONF;
+ clear++;
+ } else if (strncmp(s1, "update", 6) == 0) {
+ lmd->lmd_flags |= LMD_FLG_UPDATE;
+ clear++;
+ } else if (strncmp(s1, "virgin", 6) == 0) {
+ lmd->lmd_flags |= LMD_FLG_VIRGIN;
+ clear++;
+ } else if (strncmp(s1, "noprimnode", 10) == 0) {
+ lmd->lmd_flags |= LMD_FLG_NO_PRIMNODE;
+ clear++;
+ } else if (strncmp(s1, "mgssec=", 7) == 0) {
+ rc = lmd_parse_mgssec(lmd, s1 + 7);
+ if (rc)
+ goto invalid;
+ clear++;
+ /* ost exclusion list */
+ } else if (strncmp(s1, "exclude=", 8) == 0) {
+ rc = lmd_make_exclusion(lmd, s1 + 7);
+ if (rc)
+ goto invalid;
+ clear++;
+ } else if (strncmp(s1, "mgs", 3) == 0) {
+ /* We are an MGS */
+ lmd->lmd_flags |= LMD_FLG_MGS;
+ clear++;
+ } else if (strncmp(s1, "svname=", 7) == 0) {
+ rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7);
+ if (rc)
+ goto invalid;
+ clear++;
+ } else if (strncmp(s1, "param=", 6) == 0) {
+ int length;
+ char *tail = strchr(s1 + 6, ',');
+ if (tail == NULL)
+ length = strlen(s1);
+ else
+ length = tail - s1;
+ length -= 6;
+ strncat(lmd->lmd_params, s1 + 6, length);
+ strcat(lmd->lmd_params, " ");
+ clear++;
+ } else if (strncmp(s1, "osd=", 4) == 0) {
+ rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
+ if (rc)
+ goto invalid;
+ clear++;
+ }
+ /* Linux 2.4 doesn't pass the device, so we stuck it at the
+ end of the options. */
+ else if (strncmp(s1, "device=", 7) == 0) {
+ devname = s1 + 7;
+ /* terminate options right before device. device
+ must be the last one. */
+ *s1 = '\0';
+ break;
+ }
+
+ /* Find next opt */
+ s2 = strchr(s1, ',');
+ if (s2 == NULL) {
+ if (clear)
+ *s1 = '\0';
+ break;
+ }
+ s2++;
+ if (clear)
+ memmove(s1, s2, strlen(s2) + 1);
+ else
+ s1 = s2;
+ }
+
+ if (!devname) {
+ LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
+ "(need mount option 'device=...')\n");
+ goto invalid;
+ }
+
+ s1 = strstr(devname, ":/");
+ if (s1) {
+ ++s1;
+ lmd->lmd_flags |= LMD_FLG_CLIENT;
+ /* Remove leading /s from fsname */
+ while (*++s1 == '/') ;
+ /* Freed in lustre_free_lsi */
+ OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
+ if (!lmd->lmd_profile)
+ RETURN(-ENOMEM);
+ sprintf(lmd->lmd_profile, "%s-client", s1);
+ }
+
+ /* Freed in lustre_free_lsi */
+ OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
+ if (!lmd->lmd_dev)
+ RETURN(-ENOMEM);
+ strcpy(lmd->lmd_dev, devname);
+
+ /* Save mount options */
+ s1 = options + strlen(options) - 1;
+ while (s1 >= options && (*s1 == ',' || *s1 == ' '))
+ *s1-- = 0;
+ if (*options != 0) {
+ /* Freed in lustre_free_lsi */
+ OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
+ if (!lmd->lmd_opts)
+ RETURN(-ENOMEM);
+ strcpy(lmd->lmd_opts, options);
+ }
+
+ lmd_print(lmd);
+ lmd->lmd_magic = LMD_MAGIC;
+
+ RETURN(rc);
+
+invalid:
+ CERROR("Bad mount options %s\n", options);
+ RETURN(-EINVAL);
+}
+
+struct lustre_mount_data2 {
+ void *lmd2_data;
+ struct vfsmount *lmd2_mnt;
+};
+
+/** This is the entry point for the mount call into Lustre.
+ * This is called when a server or client is mounted,
+ * and this is where we start setting things up.
+ * @param data Mount options (e.g. -o flock,abort_recov)
+ */
+int lustre_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct lustre_mount_data *lmd;
+ struct lustre_mount_data2 *lmd2 = data;
+ struct lustre_sb_info *lsi;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
+
+ lsi = lustre_init_lsi(sb);
+ if (!lsi)
+ RETURN(-ENOMEM);
+ lmd = lsi->lsi_lmd;
+
+ /*
+ * Disable lockdep during mount, because mount locking patterns are
+ * `special'.
+ */
+ lockdep_off();
+
+ /*
+ * LU-639: the obd cleanup of last mount may not finish yet, wait here.
+ */
+ obd_zombie_barrier();
+
+ /* Figure out the lmd from the mount options */
+ if (lmd_parse((char *)(lmd2->lmd2_data), lmd)) {
+ lustre_put_lsi(sb);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ if (lmd_is_client(lmd)) {
+ CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
+ if (!client_fill_super) {
+ LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
+ "client mount! Is the 'lustre' "
+ "module loaded?\n");
+ lustre_put_lsi(sb);
+ rc = -ENODEV;
+ } else {
+ rc = lustre_start_mgc(sb);
+ if (rc) {
+ lustre_put_lsi(sb);
+ GOTO(out, rc);
+ }
+ /* Connect and start */
+ /* (should always be ll_fill_super) */
+ rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
+ /* c_f_s will call lustre_common_put_super on failure */
+ }
+ } else {
+ CERROR("This is client-side-only module, "
+ "cannot handle server mount.\n");
+ rc = -EINVAL;
+ }
+
+ /* If error happens in fill_super() call, @lsi will be killed there.
+ * This is why we do not put it here. */
+ GOTO(out, rc);
+out:
+ if (rc) {
+ CERROR("Unable to mount %s (%d)\n",
+ s2lsi(sb) ? lmd->lmd_dev : "", rc);
+ } else {
+ CDEBUG(D_SUPER, "Mount %s complete\n",
+ lmd->lmd_dev);
+ }
+ lockdep_on();
+ return rc;
+}
+
+
+/* We can't call ll_fill_super by name because it lives in a module that
+ must be loaded after this one. */
+void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
+ struct vfsmount *mnt))
+{
+ client_fill_super = cfs;
+}
+EXPORT_SYMBOL(lustre_register_client_fill_super);
+
+void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
+{
+ kill_super_cb = cfs;
+}
+EXPORT_SYMBOL(lustre_register_kill_super_cb);
+
+/***************** FS registration ******************/
+struct dentry *lustre_mount(struct file_system_type *fs_type, int flags,
+ const char *devname, void *data)
+{
+ struct lustre_mount_data2 lmd2 = { data, NULL };
+
+ return mount_nodev(fs_type, flags, &lmd2, lustre_fill_super);
+}
+
+void lustre_kill_super(struct super_block *sb)
+{
+ struct lustre_sb_info *lsi = s2lsi(sb);
+
+ if (kill_super_cb && lsi && !IS_SERVER(lsi))
+ (*kill_super_cb)(sb);
+
+ kill_anon_super(sb);
+}
+
+/** Register the "lustre" fs type
+ */
+struct file_system_type lustre_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "lustre",
+ .mount = lustre_mount,
+ .kill_sb = lustre_kill_super,
+ .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
+ FS_HAS_FIEMAP | FS_RENAME_DOES_D_MOVE,
+};
+
+int lustre_register_fs(void)
+{
+ return register_filesystem(&lustre_fs_type);
+}
+
+int lustre_unregister_fs(void)
+{
+ return unregister_filesystem(&lustre_fs_type);
+}
diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c
new file mode 100644
index 000000000000..01a0e1f83a68
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/obdo.c
@@ -0,0 +1,362 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/obdo.c
+ *
+ * Object Devices Class Driver
+ * These are the only exported functions, they provide some generic
+ * infrastructure for managing object devices
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <obd_class.h>
+#include <lustre/lustre_idl.h>
+
+void obdo_set_parent_fid(struct obdo *dst, const struct lu_fid *parent)
+{
+ dst->o_parent_oid = fid_oid(parent);
+ dst->o_parent_seq = fid_seq(parent);
+ dst->o_parent_ver = fid_ver(parent);
+ dst->o_valid |= OBD_MD_FLGENER | OBD_MD_FLFID;
+}
+EXPORT_SYMBOL(obdo_set_parent_fid);
+
+/* WARNING: the file systems must take care not to tinker with
+ attributes they don't manage (such as blocks). */
+void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
+{
+ obd_flag newvalid = 0;
+
+ if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
+ CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n",
+ valid, LTIME_S(src->i_mtime),
+ LTIME_S(src->i_ctime));
+
+ if (valid & OBD_MD_FLATIME) {
+ dst->o_atime = LTIME_S(src->i_atime);
+ newvalid |= OBD_MD_FLATIME;
+ }
+ if (valid & OBD_MD_FLMTIME) {
+ dst->o_mtime = LTIME_S(src->i_mtime);
+ newvalid |= OBD_MD_FLMTIME;
+ }
+ if (valid & OBD_MD_FLCTIME) {
+ dst->o_ctime = LTIME_S(src->i_ctime);
+ newvalid |= OBD_MD_FLCTIME;
+ }
+ if (valid & OBD_MD_FLSIZE) {
+ dst->o_size = i_size_read(src);
+ newvalid |= OBD_MD_FLSIZE;
+ }
+ if (valid & OBD_MD_FLBLOCKS) { /* allocation of space (x512 bytes) */
+ dst->o_blocks = src->i_blocks;
+ newvalid |= OBD_MD_FLBLOCKS;
+ }
+ if (valid & OBD_MD_FLBLKSZ) { /* optimal block size */
+ dst->o_blksize = ll_inode_blksize(src);
+ newvalid |= OBD_MD_FLBLKSZ;
+ }
+ if (valid & OBD_MD_FLTYPE) {
+ dst->o_mode = (dst->o_mode & S_IALLUGO) |
+ (src->i_mode & S_IFMT);
+ newvalid |= OBD_MD_FLTYPE;
+ }
+ if (valid & OBD_MD_FLMODE) {
+ dst->o_mode = (dst->o_mode & S_IFMT) |
+ (src->i_mode & S_IALLUGO);
+ newvalid |= OBD_MD_FLMODE;
+ }
+ if (valid & OBD_MD_FLUID) {
+ dst->o_uid = src->i_uid;
+ newvalid |= OBD_MD_FLUID;
+ }
+ if (valid & OBD_MD_FLGID) {
+ dst->o_gid = src->i_gid;
+ newvalid |= OBD_MD_FLGID;
+ }
+ if (valid & OBD_MD_FLFLAGS) {
+ dst->o_flags = ll_inode_flags(src);
+ newvalid |= OBD_MD_FLFLAGS;
+ }
+ dst->o_valid |= newvalid;
+}
+EXPORT_SYMBOL(obdo_from_inode);
+
+void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid)
+{
+ CDEBUG(D_INODE, "src obdo "DOSTID" valid "LPX64", dst obdo "DOSTID"\n",
+ POSTID(&src->o_oi), src->o_valid, POSTID(&dst->o_oi));
+ if (valid & OBD_MD_FLATIME)
+ dst->o_atime = src->o_atime;
+ if (valid & OBD_MD_FLMTIME)
+ dst->o_mtime = src->o_mtime;
+ if (valid & OBD_MD_FLCTIME)
+ dst->o_ctime = src->o_ctime;
+ if (valid & OBD_MD_FLSIZE)
+ dst->o_size = src->o_size;
+ if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
+ dst->o_blocks = src->o_blocks;
+ if (valid & OBD_MD_FLBLKSZ)
+ dst->o_blksize = src->o_blksize;
+ if (valid & OBD_MD_FLTYPE)
+ dst->o_mode = (dst->o_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
+ if (valid & OBD_MD_FLMODE)
+ dst->o_mode = (dst->o_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
+ if (valid & OBD_MD_FLUID)
+ dst->o_uid = src->o_uid;
+ if (valid & OBD_MD_FLGID)
+ dst->o_gid = src->o_gid;
+ if (valid & OBD_MD_FLFLAGS)
+ dst->o_flags = src->o_flags;
+ if (valid & OBD_MD_FLFID) {
+ dst->o_parent_seq = src->o_parent_seq;
+ dst->o_parent_ver = src->o_parent_ver;
+ }
+ if (valid & OBD_MD_FLGENER)
+ dst->o_parent_oid = src->o_parent_oid;
+ if (valid & OBD_MD_FLHANDLE)
+ dst->o_handle = src->o_handle;
+ if (valid & OBD_MD_FLCOOKIE)
+ dst->o_lcookie = src->o_lcookie;
+
+ dst->o_valid |= valid;
+}
+EXPORT_SYMBOL(obdo_cpy_md);
+
+/* returns FALSE if comparison (by flags) is same, TRUE if changed */
+int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare)
+{
+ int res = 0;
+
+ if ( compare & OBD_MD_FLATIME )
+ res = (res || (dst->o_atime != src->o_atime));
+ if ( compare & OBD_MD_FLMTIME )
+ res = (res || (dst->o_mtime != src->o_mtime));
+ if ( compare & OBD_MD_FLCTIME )
+ res = (res || (dst->o_ctime != src->o_ctime));
+ if ( compare & OBD_MD_FLSIZE )
+ res = (res || (dst->o_size != src->o_size));
+ if ( compare & OBD_MD_FLBLOCKS ) /* allocation of space */
+ res = (res || (dst->o_blocks != src->o_blocks));
+ if ( compare & OBD_MD_FLBLKSZ )
+ res = (res || (dst->o_blksize != src->o_blksize));
+ if ( compare & OBD_MD_FLTYPE )
+ res = (res || (((dst->o_mode ^ src->o_mode) & S_IFMT) != 0));
+ if ( compare & OBD_MD_FLMODE )
+ res = (res || (((dst->o_mode ^ src->o_mode) & ~S_IFMT) != 0));
+ if ( compare & OBD_MD_FLUID )
+ res = (res || (dst->o_uid != src->o_uid));
+ if ( compare & OBD_MD_FLGID )
+ res = (res || (dst->o_gid != src->o_gid));
+ if ( compare & OBD_MD_FLFLAGS )
+ res = (res || (dst->o_flags != src->o_flags));
+ if ( compare & OBD_MD_FLNLINK )
+ res = (res || (dst->o_nlink != src->o_nlink));
+ if ( compare & OBD_MD_FLFID ) {
+ res = (res || (dst->o_parent_seq != src->o_parent_seq));
+ res = (res || (dst->o_parent_ver != src->o_parent_ver));
+ }
+ if ( compare & OBD_MD_FLGENER )
+ res = (res || (dst->o_parent_oid != src->o_parent_oid));
+ /* XXX Don't know if thses should be included here - wasn't previously
+ if ( compare & OBD_MD_FLINLINE )
+ res = (res || memcmp(dst->o_inline, src->o_inline));
+ */
+ return res;
+}
+EXPORT_SYMBOL(obdo_cmp_md);
+
+void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj)
+{
+ ioobj->ioo_oid = oa->o_oi;
+ if (unlikely(!(oa->o_valid & OBD_MD_FLGROUP)))
+ ostid_set_seq_mdt0(&ioobj->ioo_oid);
+
+ /* Since 2.4 this does not contain o_mode in the low 16 bits.
+ * Instead, it holds (bd_md_max_brw - 1) for multi-bulk BRW RPCs */
+ ioobj->ioo_max_brw = 0;
+}
+EXPORT_SYMBOL(obdo_to_ioobj);
+
+void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned int ia_valid)
+{
+ if (ia_valid & ATTR_ATIME) {
+ oa->o_atime = LTIME_S(attr->ia_atime);
+ oa->o_valid |= OBD_MD_FLATIME;
+ }
+ if (ia_valid & ATTR_MTIME) {
+ oa->o_mtime = LTIME_S(attr->ia_mtime);
+ oa->o_valid |= OBD_MD_FLMTIME;
+ }
+ if (ia_valid & ATTR_CTIME) {
+ oa->o_ctime = LTIME_S(attr->ia_ctime);
+ oa->o_valid |= OBD_MD_FLCTIME;
+ }
+ if (ia_valid & ATTR_SIZE) {
+ oa->o_size = attr->ia_size;
+ oa->o_valid |= OBD_MD_FLSIZE;
+ }
+ if (ia_valid & ATTR_MODE) {
+ oa->o_mode = attr->ia_mode;
+ oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE;
+ if (!current_is_in_group(oa->o_gid) &&
+ !cfs_capable(CFS_CAP_FSETID))
+ oa->o_mode &= ~S_ISGID;
+ }
+ if (ia_valid & ATTR_UID) {
+ oa->o_uid = attr->ia_uid;
+ oa->o_valid |= OBD_MD_FLUID;
+ }
+ if (ia_valid & ATTR_GID) {
+ oa->o_gid = attr->ia_gid;
+ oa->o_valid |= OBD_MD_FLGID;
+ }
+}
+EXPORT_SYMBOL(obdo_from_iattr);
+
+void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid)
+{
+ valid &= oa->o_valid;
+
+ if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
+ CDEBUG(D_INODE, "valid "LPX64", new time "LPU64"/"LPU64"\n",
+ oa->o_valid, oa->o_mtime, oa->o_ctime);
+
+ attr->ia_valid = 0;
+ if (valid & OBD_MD_FLATIME) {
+ LTIME_S(attr->ia_atime) = oa->o_atime;
+ attr->ia_valid |= ATTR_ATIME;
+ }
+ if (valid & OBD_MD_FLMTIME) {
+ LTIME_S(attr->ia_mtime) = oa->o_mtime;
+ attr->ia_valid |= ATTR_MTIME;
+ }
+ if (valid & OBD_MD_FLCTIME) {
+ LTIME_S(attr->ia_ctime) = oa->o_ctime;
+ attr->ia_valid |= ATTR_CTIME;
+ }
+ if (valid & OBD_MD_FLSIZE) {
+ attr->ia_size = oa->o_size;
+ attr->ia_valid |= ATTR_SIZE;
+ }
+#if 0 /* you shouldn't be able to change a file's type with setattr */
+ if (valid & OBD_MD_FLTYPE) {
+ attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT);
+ attr->ia_valid |= ATTR_MODE;
+ }
+#endif
+ if (valid & OBD_MD_FLMODE) {
+ attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT);
+ attr->ia_valid |= ATTR_MODE;
+ if (!current_is_in_group(oa->o_gid) &&
+ !cfs_capable(CFS_CAP_FSETID))
+ attr->ia_mode &= ~S_ISGID;
+ }
+ if (valid & OBD_MD_FLUID) {
+ attr->ia_uid = oa->o_uid;
+ attr->ia_valid |= ATTR_UID;
+ }
+ if (valid & OBD_MD_FLGID) {
+ attr->ia_gid = oa->o_gid;
+ attr->ia_valid |= ATTR_GID;
+ }
+}
+EXPORT_SYMBOL(iattr_from_obdo);
+
+void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, obd_flag valid)
+{
+ iattr_from_obdo(&op_data->op_attr, oa, valid);
+ if (valid & OBD_MD_FLBLOCKS) {
+ op_data->op_attr_blocks = oa->o_blocks;
+ op_data->op_attr.ia_valid |= ATTR_BLOCKS;
+ }
+ if (valid & OBD_MD_FLFLAGS) {
+ ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
+ oa->o_flags;
+ op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
+ }
+}
+EXPORT_SYMBOL(md_from_obdo);
+
+void obdo_from_md(struct obdo *oa, struct md_op_data *op_data,
+ unsigned int valid)
+{
+ obdo_from_iattr(oa, &op_data->op_attr, valid);
+ if (valid & ATTR_BLOCKS) {
+ oa->o_blocks = op_data->op_attr_blocks;
+ oa->o_valid |= OBD_MD_FLBLOCKS;
+ }
+ if (valid & ATTR_ATTR_FLAG) {
+ oa->o_flags =
+ ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags;
+ oa->o_valid |= OBD_MD_FLFLAGS;
+ }
+}
+EXPORT_SYMBOL(obdo_from_md);
+
+void obdo_cpu_to_le(struct obdo *dobdo, struct obdo *sobdo)
+{
+ dobdo->o_size = cpu_to_le64(sobdo->o_size);
+ dobdo->o_mtime = cpu_to_le64(sobdo->o_mtime);
+ dobdo->o_atime = cpu_to_le64(sobdo->o_atime);
+ dobdo->o_ctime = cpu_to_le64(sobdo->o_ctime);
+ dobdo->o_blocks = cpu_to_le64(sobdo->o_blocks);
+ dobdo->o_mode = cpu_to_le32(sobdo->o_mode);
+ dobdo->o_uid = cpu_to_le32(sobdo->o_uid);
+ dobdo->o_gid = cpu_to_le32(sobdo->o_gid);
+ dobdo->o_flags = cpu_to_le32(sobdo->o_flags);
+ dobdo->o_nlink = cpu_to_le32(sobdo->o_nlink);
+ dobdo->o_blksize = cpu_to_le32(sobdo->o_blksize);
+ dobdo->o_valid = cpu_to_le64(sobdo->o_valid);
+}
+EXPORT_SYMBOL(obdo_cpu_to_le);
+
+void obdo_le_to_cpu(struct obdo *dobdo, struct obdo *sobdo)
+{
+ dobdo->o_size = le64_to_cpu(sobdo->o_size);
+ dobdo->o_mtime = le64_to_cpu(sobdo->o_mtime);
+ dobdo->o_atime = le64_to_cpu(sobdo->o_atime);
+ dobdo->o_ctime = le64_to_cpu(sobdo->o_ctime);
+ dobdo->o_blocks = le64_to_cpu(sobdo->o_blocks);
+ dobdo->o_mode = le32_to_cpu(sobdo->o_mode);
+ dobdo->o_uid = le32_to_cpu(sobdo->o_uid);
+ dobdo->o_gid = le32_to_cpu(sobdo->o_gid);
+ dobdo->o_flags = le32_to_cpu(sobdo->o_flags);
+ dobdo->o_nlink = le32_to_cpu(sobdo->o_nlink);
+ dobdo->o_blksize = le32_to_cpu(sobdo->o_blksize);
+ dobdo->o_valid = le64_to_cpu(sobdo->o_valid);
+}
+EXPORT_SYMBOL(obdo_le_to_cpu);
diff --git a/drivers/staging/lustre/lustre/obdclass/statfs_pack.c b/drivers/staging/lustre/lustre/obdclass/statfs_pack.c
new file mode 100644
index 000000000000..c3b7a78dba50
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/statfs_pack.c
@@ -0,0 +1,75 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/statfs_pack.c
+ *
+ * (Un)packing of OST/MDS requests
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+
+#include <lustre_export.h>
+#include <lustre_net.h>
+#include <obd_support.h>
+#include <obd_class.h>
+
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs)
+{
+ memset(osfs, 0, sizeof(*osfs));
+ osfs->os_type = sfs->f_type;
+ osfs->os_blocks = sfs->f_blocks;
+ osfs->os_bfree = sfs->f_bfree;
+ osfs->os_bavail = sfs->f_bavail;
+ osfs->os_files = sfs->f_files;
+ osfs->os_ffree = sfs->f_ffree;
+ osfs->os_bsize = sfs->f_bsize;
+ osfs->os_namelen = sfs->f_namelen;
+}
+EXPORT_SYMBOL(statfs_pack);
+
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs)
+{
+ memset(sfs, 0, sizeof(*sfs));
+ sfs->f_type = osfs->os_type;
+ sfs->f_blocks = osfs->os_blocks;
+ sfs->f_bfree = osfs->os_bfree;
+ sfs->f_bavail = osfs->os_bavail;
+ sfs->f_files = osfs->os_files;
+ sfs->f_ffree = osfs->os_ffree;
+ sfs->f_bsize = osfs->os_bsize;
+ sfs->f_namelen = osfs->os_namelen;
+}
+EXPORT_SYMBOL(statfs_unpack);
diff --git a/drivers/staging/lustre/lustre/obdclass/uuid.c b/drivers/staging/lustre/lustre/obdclass/uuid.c
new file mode 100644
index 000000000000..af5f27f82bc5
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/uuid.c
@@ -0,0 +1,82 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/uuid.c
+ *
+ * Public include file for the UUID library
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+
+# include <linux/libcfs/libcfs.h>
+
+#include <obd_support.h>
+#include <obd_class.h>
+
+
+static inline __u32 consume(int nob, __u8 **ptr)
+{
+ __u32 value;
+
+ LASSERT(nob <= sizeof value);
+
+ for (value = 0; nob > 0; --nob)
+ value = (value << 8) | *((*ptr)++);
+ return value;
+}
+
+#define CONSUME(val, ptr) (val) = consume(sizeof(val), (ptr))
+
+static void uuid_unpack(class_uuid_t in, __u16 *uu, int nr)
+{
+ __u8 *ptr = in;
+
+ LASSERT(nr * sizeof *uu == sizeof(class_uuid_t));
+
+ while (nr-- > 0)
+ CONSUME(uu[nr], &ptr);
+}
+
+void class_uuid_unparse(class_uuid_t uu, struct obd_uuid *out)
+{
+ /* uu as an array of __u16's */
+ __u16 uuid[sizeof(class_uuid_t) / sizeof(__u16)];
+
+ CLASSERT(ARRAY_SIZE(uuid) == 8);
+
+ uuid_unpack(uu, uuid, ARRAY_SIZE(uuid));
+ sprintf(out->uuid, "%04x%04x-%04x-%04x-%04x-%04x%04x%04x",
+ uuid[0], uuid[1], uuid[2], uuid[3],
+ uuid[4], uuid[5], uuid[6], uuid[7]);
+}
+EXPORT_SYMBOL(class_uuid_unparse);
diff --git a/drivers/staging/lustre/lustre/obdecho/Makefile b/drivers/staging/lustre/lustre/obdecho/Makefile
new file mode 100644
index 000000000000..4c48e2432f9b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdecho/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_LUSTRE_FS) += obdecho.o
+obdecho-y := echo_client.o lproc_echo.o
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lustre/obdecho/echo.c b/drivers/staging/lustre/lustre/obdecho/echo.c
new file mode 100644
index 000000000000..9e64939af9dc
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdecho/echo.c
@@ -0,0 +1,679 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdecho/echo.c
+ *
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_ECHO
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_debug.h>
+#include <lustre_dlm.h>
+#include <lprocfs_status.h>
+
+#include "echo_internal.h"
+
+/* The echo objid needs to be below 2^32, because regular FID numbers are
+ * limited to 2^32 objects in f_oid for the FID_SEQ_ECHO range. b=23335 */
+#define ECHO_INIT_OID 0x10000000ULL
+#define ECHO_HANDLE_MAGIC 0xabcd0123fedc9876ULL
+
+#define ECHO_PERSISTENT_PAGES (ECHO_PERSISTENT_SIZE >> PAGE_CACHE_SHIFT)
+static struct page *echo_persistent_pages[ECHO_PERSISTENT_PAGES];
+
+enum {
+ LPROC_ECHO_READ_BYTES = 1,
+ LPROC_ECHO_WRITE_BYTES = 2,
+ LPROC_ECHO_LAST = LPROC_ECHO_WRITE_BYTES +1
+};
+
+static int echo_connect(const struct lu_env *env,
+ struct obd_export **exp, struct obd_device *obd,
+ struct obd_uuid *cluuid, struct obd_connect_data *data,
+ void *localdata)
+{
+ struct lustre_handle conn = { 0 };
+ int rc;
+
+ data->ocd_connect_flags &= ECHO_CONNECT_SUPPORTED;
+ rc = class_connect(&conn, obd, cluuid);
+ if (rc) {
+ CERROR("can't connect %d\n", rc);
+ return rc;
+ }
+ *exp = class_conn2export(&conn);
+
+ return 0;
+}
+
+static int echo_disconnect(struct obd_export *exp)
+{
+ LASSERT (exp != NULL);
+
+ return server_disconnect_export(exp);
+}
+
+static int echo_init_export(struct obd_export *exp)
+{
+ return ldlm_init_export(exp);
+}
+
+static int echo_destroy_export(struct obd_export *exp)
+{
+ ENTRY;
+
+ target_destroy_export(exp);
+ ldlm_destroy_export(exp);
+
+ RETURN(0);
+}
+
+ static __u64 echo_next_id(struct obd_device *obddev)
+{
+ obd_id id;
+
+ spin_lock(&obddev->u.echo.eo_lock);
+ id = ++obddev->u.echo.eo_lastino;
+ spin_unlock(&obddev->u.echo.eo_lock);
+
+ return id;
+}
+
+static int echo_create(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *oa, struct lov_stripe_md **ea,
+ struct obd_trans_info *oti)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+
+ if (!obd) {
+ CERROR("invalid client cookie "LPX64"\n",
+ exp->exp_handle.h_cookie);
+ return -EINVAL;
+ }
+
+ if (!(oa->o_mode && S_IFMT)) {
+ CERROR("echo obd: no type!\n");
+ return -ENOENT;
+ }
+
+ if (!(oa->o_valid & OBD_MD_FLTYPE)) {
+ CERROR("invalid o_valid "LPX64"\n", oa->o_valid);
+ return -EINVAL;
+ }
+
+ ostid_set_seq_echo(&oa->o_oi);
+ ostid_set_id(&oa->o_oi, echo_next_id(obd));
+ oa->o_valid = OBD_MD_FLID;
+
+ return 0;
+}
+
+static int echo_destroy(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *oa, struct lov_stripe_md *ea,
+ struct obd_trans_info *oti, struct obd_export *md_exp,
+ void *capa)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+
+ ENTRY;
+ if (!obd) {
+ CERROR("invalid client cookie "LPX64"\n",
+ exp->exp_handle.h_cookie);
+ RETURN(-EINVAL);
+ }
+
+ if (!(oa->o_valid & OBD_MD_FLID)) {
+ CERROR("obdo missing FLID valid flag: "LPX64"\n", oa->o_valid);
+ RETURN(-EINVAL);
+ }
+
+ if (ostid_id(&oa->o_oi) > obd->u.echo.eo_lastino ||
+ ostid_id(&oa->o_oi) < ECHO_INIT_OID) {
+ CERROR("bad destroy objid: "DOSTID"\n", POSTID(&oa->o_oi));
+ RETURN(-EINVAL);
+ }
+
+ RETURN(0);
+}
+
+static int echo_getattr(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ obd_id id = ostid_id(&oinfo->oi_oa->o_oi);
+
+ ENTRY;
+ if (!obd) {
+ CERROR("invalid client cookie "LPX64"\n",
+ exp->exp_handle.h_cookie);
+ RETURN(-EINVAL);
+ }
+
+ if (!(oinfo->oi_oa->o_valid & OBD_MD_FLID)) {
+ CERROR("obdo missing FLID valid flag: "LPX64"\n",
+ oinfo->oi_oa->o_valid);
+ RETURN(-EINVAL);
+ }
+
+ obdo_cpy_md(oinfo->oi_oa, &obd->u.echo.eo_oa, oinfo->oi_oa->o_valid);
+ ostid_set_seq_echo(&oinfo->oi_oa->o_oi);
+ ostid_set_id(&oinfo->oi_oa->o_oi, id);
+
+ RETURN(0);
+}
+
+static int echo_setattr(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo, struct obd_trans_info *oti)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+
+ ENTRY;
+ if (!obd) {
+ CERROR("invalid client cookie "LPX64"\n",
+ exp->exp_handle.h_cookie);
+ RETURN(-EINVAL);
+ }
+
+ if (!(oinfo->oi_oa->o_valid & OBD_MD_FLID)) {
+ CERROR("obdo missing FLID valid flag: "LPX64"\n",
+ oinfo->oi_oa->o_valid);
+ RETURN(-EINVAL);
+ }
+
+ memcpy(&obd->u.echo.eo_oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa));
+
+ if (ostid_id(&oinfo->oi_oa->o_oi) & 4) {
+ /* Save lock to force ACKed reply */
+ ldlm_lock_addref (&obd->u.echo.eo_nl_lock, LCK_NL);
+ oti->oti_ack_locks[0].mode = LCK_NL;
+ oti->oti_ack_locks[0].lock = obd->u.echo.eo_nl_lock;
+ }
+
+ RETURN(0);
+}
+
+static void
+echo_page_debug_setup(struct page *page, int rw, obd_id id,
+ __u64 offset, int len)
+{
+ int page_offset = offset & ~CFS_PAGE_MASK;
+ char *addr = ((char *)kmap(page)) + page_offset;
+
+ if (len % OBD_ECHO_BLOCK_SIZE != 0)
+ CERROR("Unexpected block size %d\n", len);
+
+ while (len > 0) {
+ if (rw & OBD_BRW_READ)
+ block_debug_setup(addr, OBD_ECHO_BLOCK_SIZE,
+ offset, id);
+ else
+ block_debug_setup(addr, OBD_ECHO_BLOCK_SIZE,
+ 0xecc0ecc0ecc0ecc0ULL,
+ 0xecc0ecc0ecc0ecc0ULL);
+
+ addr += OBD_ECHO_BLOCK_SIZE;
+ offset += OBD_ECHO_BLOCK_SIZE;
+ len -= OBD_ECHO_BLOCK_SIZE;
+ }
+
+ kunmap(page);
+}
+
+static int
+echo_page_debug_check(struct page *page, obd_id id,
+ __u64 offset, int len)
+{
+ int page_offset = offset & ~CFS_PAGE_MASK;
+ char *addr = ((char *)kmap(page)) + page_offset;
+ int rc = 0;
+ int rc2;
+
+ if (len % OBD_ECHO_BLOCK_SIZE != 0)
+ CERROR("Unexpected block size %d\n", len);
+
+ while (len > 0) {
+ rc2 = block_debug_check("echo", addr, OBD_ECHO_BLOCK_SIZE,
+ offset, id);
+
+ if (rc2 != 0 && rc == 0)
+ rc = rc2;
+
+ addr += OBD_ECHO_BLOCK_SIZE;
+ offset += OBD_ECHO_BLOCK_SIZE;
+ len -= OBD_ECHO_BLOCK_SIZE;
+ }
+
+ kunmap(page);
+
+ return (rc);
+}
+
+/* This allows us to verify that desc_private is passed unmolested */
+#define DESC_PRIV 0x10293847
+
+static int echo_map_nb_to_lb(struct obdo *oa, struct obd_ioobj *obj,
+ struct niobuf_remote *nb, int *pages,
+ struct niobuf_local *lb, int cmd, int *left)
+{
+ int gfp_mask = (ostid_id(&obj->ioo_oid) & 1) ?
+ GFP_HIGHUSER : GFP_IOFS;
+ int ispersistent = ostid_id(&obj->ioo_oid) == ECHO_PERSISTENT_OBJID;
+ int debug_setup = (!ispersistent &&
+ (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
+ (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
+ struct niobuf_local *res = lb;
+ obd_off offset = nb->offset;
+ int len = nb->len;
+
+ while (len > 0) {
+ int plen = PAGE_CACHE_SIZE - (offset & (PAGE_CACHE_SIZE-1));
+ if (len < plen)
+ plen = len;
+
+ /* check for local buf overflow */
+ if (*left == 0)
+ return -EINVAL;
+
+ res->lnb_file_offset = offset;
+ res->len = plen;
+ LASSERT((res->lnb_file_offset & ~CFS_PAGE_MASK) + res->len <=
+ PAGE_CACHE_SIZE);
+
+ if (ispersistent &&
+ ((res->lnb_file_offset >> PAGE_CACHE_SHIFT) <
+ ECHO_PERSISTENT_PAGES)) {
+ res->page =
+ echo_persistent_pages[res->lnb_file_offset >>
+ PAGE_CACHE_SHIFT];
+ /* Take extra ref so __free_pages() can be called OK */
+ get_page (res->page);
+ } else {
+ OBD_PAGE_ALLOC(res->page, gfp_mask);
+ if (res->page == NULL) {
+ CERROR("can't get page for id " DOSTID"\n",
+ POSTID(&obj->ioo_oid));
+ return -ENOMEM;
+ }
+ }
+
+ CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n",
+ res->page, res->lnb_file_offset, res->len);
+
+ if (cmd & OBD_BRW_READ)
+ res->rc = res->len;
+
+ if (debug_setup)
+ echo_page_debug_setup(res->page, cmd,
+ ostid_id(&obj->ioo_oid),
+ res->lnb_file_offset, res->len);
+
+ offset += plen;
+ len -= plen;
+ res++;
+
+ (*left)--;
+ (*pages)++;
+ }
+
+ return 0;
+}
+
+static int echo_finalize_lb(struct obdo *oa, struct obd_ioobj *obj,
+ struct niobuf_remote *rb, int *pgs,
+ struct niobuf_local *lb, int verify)
+{
+ struct niobuf_local *res = lb;
+ obd_off start = rb->offset >> PAGE_CACHE_SHIFT;
+ obd_off end = (rb->offset + rb->len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ int count = (int)(end - start);
+ int rc = 0;
+ int i;
+
+ for (i = 0; i < count; i++, (*pgs) ++, res++) {
+ struct page *page = res->page;
+ void *addr;
+
+ if (page == NULL) {
+ CERROR("null page objid "LPU64":%p, buf %d/%d\n",
+ ostid_id(&obj->ioo_oid), page, i,
+ obj->ioo_bufcnt);
+ return -EFAULT;
+ }
+
+ addr = kmap(page);
+
+ CDEBUG(D_PAGE, "$$$$ use page %p, addr %p@"LPU64"\n",
+ res->page, addr, res->lnb_file_offset);
+
+ if (verify) {
+ int vrc = echo_page_debug_check(page,
+ ostid_id(&obj->ioo_oid),
+ res->lnb_file_offset,
+ res->len);
+ /* check all the pages always */
+ if (vrc != 0 && rc == 0)
+ rc = vrc;
+ }
+
+ kunmap(page);
+ /* NB see comment above regarding persistent pages */
+ OBD_PAGE_FREE(page);
+ }
+
+ return rc;
+}
+
+static int echo_preprw(const struct lu_env *env, int cmd,
+ struct obd_export *export, struct obdo *oa,
+ int objcount, struct obd_ioobj *obj,
+ struct niobuf_remote *nb, int *pages,
+ struct niobuf_local *res, struct obd_trans_info *oti,
+ struct lustre_capa *unused)
+{
+ struct obd_device *obd;
+ int tot_bytes = 0;
+ int rc = 0;
+ int i, left;
+ ENTRY;
+
+ obd = export->exp_obd;
+ if (obd == NULL)
+ RETURN(-EINVAL);
+
+ /* Temp fix to stop falling foul of osc_announce_cached() */
+ oa->o_valid &= ~(OBD_MD_FLBLOCKS | OBD_MD_FLGRANT);
+
+ memset(res, 0, sizeof(*res) * *pages);
+
+ CDEBUG(D_PAGE, "%s %d obdos with %d IOs\n",
+ cmd == OBD_BRW_READ ? "reading" : "writing", objcount, *pages);
+
+ if (oti)
+ oti->oti_handle = (void *)DESC_PRIV;
+
+ left = *pages;
+ *pages = 0;
+
+ for (i = 0; i < objcount; i++, obj++) {
+ int j;
+
+ for (j = 0 ; j < obj->ioo_bufcnt ; j++, nb++) {
+
+ rc = echo_map_nb_to_lb(oa, obj, nb, pages,
+ res + *pages, cmd, &left);
+ if (rc)
+ GOTO(preprw_cleanup, rc);
+
+ tot_bytes += nb->len;
+ }
+ }
+
+ atomic_add(*pages, &obd->u.echo.eo_prep);
+
+ if (cmd & OBD_BRW_READ)
+ lprocfs_counter_add(obd->obd_stats, LPROC_ECHO_READ_BYTES,
+ tot_bytes);
+ else
+ lprocfs_counter_add(obd->obd_stats, LPROC_ECHO_WRITE_BYTES,
+ tot_bytes);
+
+ CDEBUG(D_PAGE, "%d pages allocated after prep\n",
+ atomic_read(&obd->u.echo.eo_prep));
+
+ RETURN(0);
+
+preprw_cleanup:
+ /* It is possible that we would rather handle errors by allow
+ * any already-set-up pages to complete, rather than tearing them
+ * all down again. I believe that this is what the in-kernel
+ * prep/commit operations do.
+ */
+ CERROR("cleaning up %u pages (%d obdos)\n", *pages, objcount);
+ for (i = 0; i < *pages; i++) {
+ kunmap(res[i].page);
+ /* NB if this is a persistent page, __free_pages will just
+ * lose the extra ref gained above */
+ OBD_PAGE_FREE(res[i].page);
+ res[i].page = NULL;
+ atomic_dec(&obd->u.echo.eo_prep);
+ }
+
+ return rc;
+}
+
+static int echo_commitrw(const struct lu_env *env, int cmd,
+ struct obd_export *export, struct obdo *oa,
+ int objcount, struct obd_ioobj *obj,
+ struct niobuf_remote *rb, int niocount,
+ struct niobuf_local *res, struct obd_trans_info *oti,
+ int rc)
+{
+ struct obd_device *obd;
+ int pgs = 0;
+ int i;
+ ENTRY;
+
+ obd = export->exp_obd;
+ if (obd == NULL)
+ RETURN(-EINVAL);
+
+ if (rc)
+ GOTO(commitrw_cleanup, rc);
+
+ if ((cmd & OBD_BRW_RWMASK) == OBD_BRW_READ) {
+ CDEBUG(D_PAGE, "reading %d obdos with %d IOs\n",
+ objcount, niocount);
+ } else {
+ CDEBUG(D_PAGE, "writing %d obdos with %d IOs\n",
+ objcount, niocount);
+ }
+
+ if (niocount && res == NULL) {
+ CERROR("NULL res niobuf with niocount %d\n", niocount);
+ RETURN(-EINVAL);
+ }
+
+ LASSERT(oti == NULL || oti->oti_handle == (void *)DESC_PRIV);
+
+ for (i = 0; i < objcount; i++, obj++) {
+ int verify = (rc == 0 &&
+ ostid_id(&obj->ioo_oid) != ECHO_PERSISTENT_OBJID &&
+ (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
+ (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
+ int j;
+
+ for (j = 0 ; j < obj->ioo_bufcnt ; j++, rb++) {
+ int vrc = echo_finalize_lb(oa, obj, rb, &pgs, &res[pgs],
+ verify);
+ if (vrc == 0)
+ continue;
+
+ if (vrc == -EFAULT)
+ GOTO(commitrw_cleanup, rc = vrc);
+
+ if (rc == 0)
+ rc = vrc;
+ }
+
+ }
+
+ atomic_sub(pgs, &obd->u.echo.eo_prep);
+
+ CDEBUG(D_PAGE, "%d pages remain after commit\n",
+ atomic_read(&obd->u.echo.eo_prep));
+ RETURN(rc);
+
+commitrw_cleanup:
+ atomic_sub(pgs, &obd->u.echo.eo_prep);
+
+ CERROR("cleaning up %d pages (%d obdos)\n",
+ niocount - pgs - 1, objcount);
+
+ while (pgs < niocount) {
+ struct page *page = res[pgs++].page;
+
+ if (page == NULL)
+ continue;
+
+ /* NB see comment above regarding persistent pages */
+ OBD_PAGE_FREE(page);
+ atomic_dec(&obd->u.echo.eo_prep);
+ }
+ return rc;
+}
+
+static int echo_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ struct lprocfs_static_vars lvars;
+ int rc;
+ __u64 lock_flags = 0;
+ struct ldlm_res_id res_id = {.name = {1}};
+ char ns_name[48];
+ ENTRY;
+
+ obd->u.echo.eo_obt.obt_magic = OBT_MAGIC;
+ spin_lock_init(&obd->u.echo.eo_lock);
+ obd->u.echo.eo_lastino = ECHO_INIT_OID;
+
+ sprintf(ns_name, "echotgt-%s", obd->obd_uuid.uuid);
+ obd->obd_namespace = ldlm_namespace_new(obd, ns_name,
+ LDLM_NAMESPACE_SERVER,
+ LDLM_NAMESPACE_MODEST,
+ LDLM_NS_TYPE_OST);
+ if (obd->obd_namespace == NULL) {
+ LBUG();
+ RETURN(-ENOMEM);
+ }
+
+ rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id, LDLM_PLAIN,
+ NULL, LCK_NL, &lock_flags, NULL,
+ ldlm_completion_ast, NULL, NULL, 0,
+ LVB_T_NONE, NULL, &obd->u.echo.eo_nl_lock);
+ LASSERT (rc == ELDLM_OK);
+
+ lprocfs_echo_init_vars(&lvars);
+ if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0 &&
+ lprocfs_alloc_obd_stats(obd, LPROC_ECHO_LAST) == 0) {
+ lprocfs_counter_init(obd->obd_stats, LPROC_ECHO_READ_BYTES,
+ LPROCFS_CNTR_AVGMINMAX,
+ "read_bytes", "bytes");
+ lprocfs_counter_init(obd->obd_stats, LPROC_ECHO_WRITE_BYTES,
+ LPROCFS_CNTR_AVGMINMAX,
+ "write_bytes", "bytes");
+ }
+
+ ptlrpc_init_client (LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
+ "echo_ldlm_cb_client", &obd->obd_ldlm_client);
+ RETURN(0);
+}
+
+static int echo_cleanup(struct obd_device *obd)
+{
+ int leaked;
+ ENTRY;
+
+ lprocfs_obd_cleanup(obd);
+ lprocfs_free_obd_stats(obd);
+
+ ldlm_lock_decref(&obd->u.echo.eo_nl_lock, LCK_NL);
+
+ /* XXX Bug 3413; wait for a bit to ensure the BL callback has
+ * happened before calling ldlm_namespace_free() */
+ schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE, cfs_time_seconds(1));
+
+ ldlm_namespace_free(obd->obd_namespace, NULL, obd->obd_force);
+ obd->obd_namespace = NULL;
+
+ leaked = atomic_read(&obd->u.echo.eo_prep);
+ if (leaked != 0)
+ CERROR("%d prep/commitrw pages leaked\n", leaked);
+
+ RETURN(0);
+}
+
+struct obd_ops echo_obd_ops = {
+ .o_owner = THIS_MODULE,
+ .o_connect = echo_connect,
+ .o_disconnect = echo_disconnect,
+ .o_init_export = echo_init_export,
+ .o_destroy_export = echo_destroy_export,
+ .o_create = echo_create,
+ .o_destroy = echo_destroy,
+ .o_getattr = echo_getattr,
+ .o_setattr = echo_setattr,
+ .o_preprw = echo_preprw,
+ .o_commitrw = echo_commitrw,
+ .o_setup = echo_setup,
+ .o_cleanup = echo_cleanup
+};
+
+void echo_persistent_pages_fini(void)
+{
+ int i;
+
+ for (i = 0; i < ECHO_PERSISTENT_PAGES; i++)
+ if (echo_persistent_pages[i] != NULL) {
+ OBD_PAGE_FREE(echo_persistent_pages[i]);
+ echo_persistent_pages[i] = NULL;
+ }
+}
+
+int echo_persistent_pages_init(void)
+{
+ struct page *pg;
+ int i;
+
+ for (i = 0; i < ECHO_PERSISTENT_PAGES; i++) {
+ int gfp_mask = (i < ECHO_PERSISTENT_PAGES/2) ?
+ GFP_IOFS : GFP_HIGHUSER;
+
+ OBD_PAGE_ALLOC(pg, gfp_mask);
+ if (pg == NULL) {
+ echo_persistent_pages_fini ();
+ return (-ENOMEM);
+ }
+
+ memset (kmap (pg), 0, PAGE_CACHE_SIZE);
+ kunmap (pg);
+
+ echo_persistent_pages[i] = pg;
+ }
+
+ return (0);
+}
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
new file mode 100644
index 000000000000..184195fde621
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -0,0 +1,3223 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_ECHO
+#include <linux/libcfs/libcfs.h>
+
+#include <obd.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_debug.h>
+#include <lprocfs_status.h>
+#include <cl_object.h>
+#include <lustre_fid.h>
+#include <lustre_acl.h>
+#include <lustre_net.h>
+#include <obd_lov.h>
+
+#include "echo_internal.h"
+
+/** \defgroup echo_client Echo Client
+ * @{
+ */
+
+struct echo_device {
+ struct cl_device ed_cl;
+ struct echo_client_obd *ed_ec;
+
+ struct cl_site ed_site_myself;
+ struct cl_site *ed_site;
+ struct lu_device *ed_next;
+ int ed_next_islov;
+ int ed_next_ismd;
+ struct lu_client_seq *ed_cl_seq;
+};
+
+struct echo_object {
+ struct cl_object eo_cl;
+ struct cl_object_header eo_hdr;
+
+ struct echo_device *eo_dev;
+ struct list_head eo_obj_chain;
+ struct lov_stripe_md *eo_lsm;
+ atomic_t eo_npages;
+ int eo_deleted;
+};
+
+struct echo_object_conf {
+ struct cl_object_conf eoc_cl;
+ struct lov_stripe_md **eoc_md;
+};
+
+struct echo_page {
+ struct cl_page_slice ep_cl;
+ struct mutex ep_lock;
+ struct page *ep_vmpage;
+};
+
+struct echo_lock {
+ struct cl_lock_slice el_cl;
+ struct list_head el_chain;
+ struct echo_object *el_object;
+ __u64 el_cookie;
+ atomic_t el_refcount;
+};
+
+struct echo_io {
+ struct cl_io_slice ei_cl;
+};
+
+#if 0
+struct echo_req {
+ struct cl_req_slice er_cl;
+};
+#endif
+
+static int echo_client_setup(const struct lu_env *env,
+ struct obd_device *obddev,
+ struct lustre_cfg *lcfg);
+static int echo_client_cleanup(struct obd_device *obddev);
+
+
+/** \defgroup echo_helpers Helper functions
+ * @{
+ */
+static inline struct echo_device *cl2echo_dev(const struct cl_device *dev)
+{
+ return container_of0(dev, struct echo_device, ed_cl);
+}
+
+static inline struct cl_device *echo_dev2cl(struct echo_device *d)
+{
+ return &d->ed_cl;
+}
+
+static inline struct echo_device *obd2echo_dev(const struct obd_device *obd)
+{
+ return cl2echo_dev(lu2cl_dev(obd->obd_lu_dev));
+}
+
+static inline struct cl_object *echo_obj2cl(struct echo_object *eco)
+{
+ return &eco->eo_cl;
+}
+
+static inline struct echo_object *cl2echo_obj(const struct cl_object *o)
+{
+ return container_of(o, struct echo_object, eo_cl);
+}
+
+static inline struct echo_page *cl2echo_page(const struct cl_page_slice *s)
+{
+ return container_of(s, struct echo_page, ep_cl);
+}
+
+static inline struct echo_lock *cl2echo_lock(const struct cl_lock_slice *s)
+{
+ return container_of(s, struct echo_lock, el_cl);
+}
+
+static inline struct cl_lock *echo_lock2cl(const struct echo_lock *ecl)
+{
+ return ecl->el_cl.cls_lock;
+}
+
+static struct lu_context_key echo_thread_key;
+static inline struct echo_thread_info *echo_env_info(const struct lu_env *env)
+{
+ struct echo_thread_info *info;
+ info = lu_context_key_get(&env->le_ctx, &echo_thread_key);
+ LASSERT(info != NULL);
+ return info;
+}
+
+static inline
+struct echo_object_conf *cl2echo_conf(const struct cl_object_conf *c)
+{
+ return container_of(c, struct echo_object_conf, eoc_cl);
+}
+
+/** @} echo_helpers */
+
+static struct echo_object *cl_echo_object_find(struct echo_device *d,
+ struct lov_stripe_md **lsm);
+static int cl_echo_object_put(struct echo_object *eco);
+static int cl_echo_enqueue (struct echo_object *eco, obd_off start,
+ obd_off end, int mode, __u64 *cookie);
+static int cl_echo_cancel (struct echo_device *d, __u64 cookie);
+static int cl_echo_object_brw(struct echo_object *eco, int rw, obd_off offset,
+ struct page **pages, int npages, int async);
+
+static struct echo_thread_info *echo_env_info(const struct lu_env *env);
+
+struct echo_thread_info {
+ struct echo_object_conf eti_conf;
+ struct lustre_md eti_md;
+
+ struct cl_2queue eti_queue;
+ struct cl_io eti_io;
+ struct cl_lock_descr eti_descr;
+ struct lu_fid eti_fid;
+ struct lu_fid eti_fid2;
+ struct md_op_spec eti_spec;
+ struct lov_mds_md_v3 eti_lmm;
+ struct lov_user_md_v3 eti_lum;
+ struct md_attr eti_ma;
+ struct lu_name eti_lname;
+ /* per-thread values, can be re-used */
+ void *eti_big_lmm;
+ int eti_big_lmmsize;
+ char eti_name[20];
+ struct lu_buf eti_buf;
+ char eti_xattr_buf[LUSTRE_POSIX_ACL_MAX_SIZE];
+};
+
+/* No session used right now */
+struct echo_session_info {
+ unsigned long dummy;
+};
+
+static struct kmem_cache *echo_lock_kmem;
+static struct kmem_cache *echo_object_kmem;
+static struct kmem_cache *echo_thread_kmem;
+static struct kmem_cache *echo_session_kmem;
+//static struct kmem_cache *echo_req_kmem;
+
+static struct lu_kmem_descr echo_caches[] = {
+ {
+ .ckd_cache = &echo_lock_kmem,
+ .ckd_name = "echo_lock_kmem",
+ .ckd_size = sizeof (struct echo_lock)
+ },
+ {
+ .ckd_cache = &echo_object_kmem,
+ .ckd_name = "echo_object_kmem",
+ .ckd_size = sizeof (struct echo_object)
+ },
+ {
+ .ckd_cache = &echo_thread_kmem,
+ .ckd_name = "echo_thread_kmem",
+ .ckd_size = sizeof (struct echo_thread_info)
+ },
+ {
+ .ckd_cache = &echo_session_kmem,
+ .ckd_name = "echo_session_kmem",
+ .ckd_size = sizeof (struct echo_session_info)
+ },
+#if 0
+ {
+ .ckd_cache = &echo_req_kmem,
+ .ckd_name = "echo_req_kmem",
+ .ckd_size = sizeof (struct echo_req)
+ },
+#endif
+ {
+ .ckd_cache = NULL
+ }
+};
+
+/** \defgroup echo_page Page operations
+ *
+ * Echo page operations.
+ *
+ * @{
+ */
+static struct page *echo_page_vmpage(const struct lu_env *env,
+ const struct cl_page_slice *slice)
+{
+ return cl2echo_page(slice)->ep_vmpage;
+}
+
+static int echo_page_own(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io, int nonblock)
+{
+ struct echo_page *ep = cl2echo_page(slice);
+
+ if (!nonblock)
+ mutex_lock(&ep->ep_lock);
+ else if (!mutex_trylock(&ep->ep_lock))
+ return -EAGAIN;
+ return 0;
+}
+
+static void echo_page_disown(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io)
+{
+ struct echo_page *ep = cl2echo_page(slice);
+
+ LASSERT(mutex_is_locked(&ep->ep_lock));
+ mutex_unlock(&ep->ep_lock);
+}
+
+static void echo_page_discard(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ cl_page_delete(env, slice->cpl_page);
+}
+
+static int echo_page_is_vmlocked(const struct lu_env *env,
+ const struct cl_page_slice *slice)
+{
+ if (mutex_is_locked(&cl2echo_page(slice)->ep_lock))
+ return -EBUSY;
+ return -ENODATA;
+}
+
+static void echo_page_completion(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ int ioret)
+{
+ LASSERT(slice->cpl_page->cp_sync_io != NULL);
+}
+
+static void echo_page_fini(const struct lu_env *env,
+ struct cl_page_slice *slice)
+{
+ struct echo_page *ep = cl2echo_page(slice);
+ struct echo_object *eco = cl2echo_obj(slice->cpl_obj);
+ struct page *vmpage = ep->ep_vmpage;
+ ENTRY;
+
+ atomic_dec(&eco->eo_npages);
+ page_cache_release(vmpage);
+ EXIT;
+}
+
+static int echo_page_prep(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ return 0;
+}
+
+static int echo_page_print(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ void *cookie, lu_printer_t printer)
+{
+ struct echo_page *ep = cl2echo_page(slice);
+
+ (*printer)(env, cookie, LUSTRE_ECHO_CLIENT_NAME"-page@%p %d vm@%p\n",
+ ep, mutex_is_locked(&ep->ep_lock), ep->ep_vmpage);
+ return 0;
+}
+
+static const struct cl_page_operations echo_page_ops = {
+ .cpo_own = echo_page_own,
+ .cpo_disown = echo_page_disown,
+ .cpo_discard = echo_page_discard,
+ .cpo_vmpage = echo_page_vmpage,
+ .cpo_fini = echo_page_fini,
+ .cpo_print = echo_page_print,
+ .cpo_is_vmlocked = echo_page_is_vmlocked,
+ .io = {
+ [CRT_READ] = {
+ .cpo_prep = echo_page_prep,
+ .cpo_completion = echo_page_completion,
+ },
+ [CRT_WRITE] = {
+ .cpo_prep = echo_page_prep,
+ .cpo_completion = echo_page_completion,
+ }
+ }
+};
+/** @} echo_page */
+
+/** \defgroup echo_lock Locking
+ *
+ * echo lock operations
+ *
+ * @{
+ */
+static void echo_lock_fini(const struct lu_env *env,
+ struct cl_lock_slice *slice)
+{
+ struct echo_lock *ecl = cl2echo_lock(slice);
+
+ LASSERT(list_empty(&ecl->el_chain));
+ OBD_SLAB_FREE_PTR(ecl, echo_lock_kmem);
+}
+
+static void echo_lock_delete(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct echo_lock *ecl = cl2echo_lock(slice);
+
+ LASSERT(list_empty(&ecl->el_chain));
+}
+
+static int echo_lock_fits_into(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ const struct cl_lock_descr *need,
+ const struct cl_io *unused)
+{
+ return 1;
+}
+
+static struct cl_lock_operations echo_lock_ops = {
+ .clo_fini = echo_lock_fini,
+ .clo_delete = echo_lock_delete,
+ .clo_fits_into = echo_lock_fits_into
+};
+
+/** @} echo_lock */
+
+/** \defgroup echo_cl_ops cl_object operations
+ *
+ * operations for cl_object
+ *
+ * @{
+ */
+static int echo_page_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_page *page, struct page *vmpage)
+{
+ struct echo_page *ep = cl_object_page_slice(obj, page);
+ struct echo_object *eco = cl2echo_obj(obj);
+ ENTRY;
+
+ ep->ep_vmpage = vmpage;
+ page_cache_get(vmpage);
+ mutex_init(&ep->ep_lock);
+ cl_page_slice_add(page, &ep->ep_cl, obj, &echo_page_ops);
+ atomic_inc(&eco->eo_npages);
+ RETURN(0);
+}
+
+static int echo_io_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io)
+{
+ return 0;
+}
+
+static int echo_lock_init(const struct lu_env *env,
+ struct cl_object *obj, struct cl_lock *lock,
+ const struct cl_io *unused)
+{
+ struct echo_lock *el;
+ ENTRY;
+
+ OBD_SLAB_ALLOC_PTR_GFP(el, echo_lock_kmem, __GFP_IO);
+ if (el != NULL) {
+ cl_lock_slice_add(lock, &el->el_cl, obj, &echo_lock_ops);
+ el->el_object = cl2echo_obj(obj);
+ INIT_LIST_HEAD(&el->el_chain);
+ atomic_set(&el->el_refcount, 0);
+ }
+ RETURN(el == NULL ? -ENOMEM : 0);
+}
+
+static int echo_conf_set(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_object_conf *conf)
+{
+ return 0;
+}
+
+static const struct cl_object_operations echo_cl_obj_ops = {
+ .coo_page_init = echo_page_init,
+ .coo_lock_init = echo_lock_init,
+ .coo_io_init = echo_io_init,
+ .coo_conf_set = echo_conf_set
+};
+/** @} echo_cl_ops */
+
+/** \defgroup echo_lu_ops lu_object operations
+ *
+ * operations for echo lu object.
+ *
+ * @{
+ */
+static int echo_object_init(const struct lu_env *env, struct lu_object *obj,
+ const struct lu_object_conf *conf)
+{
+ struct echo_device *ed = cl2echo_dev(lu2cl_dev(obj->lo_dev));
+ struct echo_client_obd *ec = ed->ed_ec;
+ struct echo_object *eco = cl2echo_obj(lu2cl(obj));
+ ENTRY;
+
+ if (ed->ed_next) {
+ struct lu_object *below;
+ struct lu_device *under;
+
+ under = ed->ed_next;
+ below = under->ld_ops->ldo_object_alloc(env, obj->lo_header,
+ under);
+ if (below == NULL)
+ RETURN(-ENOMEM);
+ lu_object_add(obj, below);
+ }
+
+ if (!ed->ed_next_ismd) {
+ const struct cl_object_conf *cconf = lu2cl_conf(conf);
+ struct echo_object_conf *econf = cl2echo_conf(cconf);
+
+ LASSERT(econf->eoc_md);
+ eco->eo_lsm = *econf->eoc_md;
+ /* clear the lsm pointer so that it won't get freed. */
+ *econf->eoc_md = NULL;
+ } else {
+ eco->eo_lsm = NULL;
+ }
+
+ eco->eo_dev = ed;
+ atomic_set(&eco->eo_npages, 0);
+ cl_object_page_init(lu2cl(obj), sizeof(struct echo_page));
+
+ spin_lock(&ec->ec_lock);
+ list_add_tail(&eco->eo_obj_chain, &ec->ec_objects);
+ spin_unlock(&ec->ec_lock);
+
+ RETURN(0);
+}
+
+/* taken from osc_unpackmd() */
+static int echo_alloc_memmd(struct echo_device *ed,
+ struct lov_stripe_md **lsmp)
+{
+ int lsm_size;
+
+ ENTRY;
+
+ /* If export is lov/osc then use their obd method */
+ if (ed->ed_next != NULL)
+ return obd_alloc_memmd(ed->ed_ec->ec_exp, lsmp);
+ /* OFD has no unpackmd method, do everything here */
+ lsm_size = lov_stripe_md_size(1);
+
+ LASSERT(*lsmp == NULL);
+ OBD_ALLOC(*lsmp, lsm_size);
+ if (*lsmp == NULL)
+ RETURN(-ENOMEM);
+
+ OBD_ALLOC((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo));
+ if ((*lsmp)->lsm_oinfo[0] == NULL) {
+ OBD_FREE(*lsmp, lsm_size);
+ RETURN(-ENOMEM);
+ }
+
+ loi_init((*lsmp)->lsm_oinfo[0]);
+ (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+ ostid_set_seq_echo(&(*lsmp)->lsm_oi);
+
+ RETURN(lsm_size);
+}
+
+static int echo_free_memmd(struct echo_device *ed, struct lov_stripe_md **lsmp)
+{
+ int lsm_size;
+
+ ENTRY;
+
+ /* If export is lov/osc then use their obd method */
+ if (ed->ed_next != NULL)
+ return obd_free_memmd(ed->ed_ec->ec_exp, lsmp);
+ /* OFD has no unpackmd method, do everything here */
+ lsm_size = lov_stripe_md_size(1);
+
+ LASSERT(*lsmp != NULL);
+ OBD_FREE((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo));
+ OBD_FREE(*lsmp, lsm_size);
+ *lsmp = NULL;
+ RETURN(0);
+}
+
+static void echo_object_free(const struct lu_env *env, struct lu_object *obj)
+{
+ struct echo_object *eco = cl2echo_obj(lu2cl(obj));
+ struct echo_client_obd *ec = eco->eo_dev->ed_ec;
+ ENTRY;
+
+ LASSERT(atomic_read(&eco->eo_npages) == 0);
+
+ spin_lock(&ec->ec_lock);
+ list_del_init(&eco->eo_obj_chain);
+ spin_unlock(&ec->ec_lock);
+
+ lu_object_fini(obj);
+ lu_object_header_fini(obj->lo_header);
+
+ if (eco->eo_lsm)
+ echo_free_memmd(eco->eo_dev, &eco->eo_lsm);
+ OBD_SLAB_FREE_PTR(eco, echo_object_kmem);
+ EXIT;
+}
+
+static int echo_object_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct lu_object *o)
+{
+ struct echo_object *obj = cl2echo_obj(lu2cl(o));
+
+ return (*p)(env, cookie, "echoclient-object@%p", obj);
+}
+
+static const struct lu_object_operations echo_lu_obj_ops = {
+ .loo_object_init = echo_object_init,
+ .loo_object_delete = NULL,
+ .loo_object_release = NULL,
+ .loo_object_free = echo_object_free,
+ .loo_object_print = echo_object_print,
+ .loo_object_invariant = NULL
+};
+/** @} echo_lu_ops */
+
+/** \defgroup echo_lu_dev_ops lu_device operations
+ *
+ * Operations for echo lu device.
+ *
+ * @{
+ */
+static struct lu_object *echo_object_alloc(const struct lu_env *env,
+ const struct lu_object_header *hdr,
+ struct lu_device *dev)
+{
+ struct echo_object *eco;
+ struct lu_object *obj = NULL;
+ ENTRY;
+
+ /* we're the top dev. */
+ LASSERT(hdr == NULL);
+ OBD_SLAB_ALLOC_PTR_GFP(eco, echo_object_kmem, __GFP_IO);
+ if (eco != NULL) {
+ struct cl_object_header *hdr = &eco->eo_hdr;
+
+ obj = &echo_obj2cl(eco)->co_lu;
+ cl_object_header_init(hdr);
+ lu_object_init(obj, &hdr->coh_lu, dev);
+ lu_object_add_top(&hdr->coh_lu, obj);
+
+ eco->eo_cl.co_ops = &echo_cl_obj_ops;
+ obj->lo_ops = &echo_lu_obj_ops;
+ }
+ RETURN(obj);
+}
+
+static struct lu_device_operations echo_device_lu_ops = {
+ .ldo_object_alloc = echo_object_alloc,
+};
+
+/** @} echo_lu_dev_ops */
+
+static struct cl_device_operations echo_device_cl_ops = {
+};
+
+/** \defgroup echo_init Setup and teardown
+ *
+ * Init and fini functions for echo client.
+ *
+ * @{
+ */
+static int echo_site_init(const struct lu_env *env, struct echo_device *ed)
+{
+ struct cl_site *site = &ed->ed_site_myself;
+ int rc;
+
+ /* initialize site */
+ rc = cl_site_init(site, &ed->ed_cl);
+ if (rc) {
+ CERROR("Cannot initilize site for echo client(%d)\n", rc);
+ return rc;
+ }
+
+ rc = lu_site_init_finish(&site->cs_lu);
+ if (rc)
+ return rc;
+
+ ed->ed_site = site;
+ return 0;
+}
+
+static void echo_site_fini(const struct lu_env *env, struct echo_device *ed)
+{
+ if (ed->ed_site) {
+ if (!ed->ed_next_ismd)
+ cl_site_fini(ed->ed_site);
+ ed->ed_site = NULL;
+ }
+}
+
+static void *echo_thread_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct echo_thread_info *info;
+
+ OBD_SLAB_ALLOC_PTR_GFP(info, echo_thread_kmem, __GFP_IO);
+ if (info == NULL)
+ info = ERR_PTR(-ENOMEM);
+ return info;
+}
+
+static void echo_thread_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct echo_thread_info *info = data;
+ OBD_SLAB_FREE_PTR(info, echo_thread_kmem);
+}
+
+static void echo_thread_key_exit(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+}
+
+static struct lu_context_key echo_thread_key = {
+ .lct_tags = LCT_CL_THREAD,
+ .lct_init = echo_thread_key_init,
+ .lct_fini = echo_thread_key_fini,
+ .lct_exit = echo_thread_key_exit
+};
+
+static void *echo_session_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct echo_session_info *session;
+
+ OBD_SLAB_ALLOC_PTR_GFP(session, echo_session_kmem, __GFP_IO);
+ if (session == NULL)
+ session = ERR_PTR(-ENOMEM);
+ return session;
+}
+
+static void echo_session_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct echo_session_info *session = data;
+ OBD_SLAB_FREE_PTR(session, echo_session_kmem);
+}
+
+static void echo_session_key_exit(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+}
+
+static struct lu_context_key echo_session_key = {
+ .lct_tags = LCT_SESSION,
+ .lct_init = echo_session_key_init,
+ .lct_fini = echo_session_key_fini,
+ .lct_exit = echo_session_key_exit
+};
+
+LU_TYPE_INIT_FINI(echo, &echo_thread_key, &echo_session_key);
+
+#define ECHO_SEQ_WIDTH 0xffffffff
+static int echo_fid_init(struct echo_device *ed, char *obd_name,
+ struct seq_server_site *ss)
+{
+ char *prefix;
+ int rc;
+ ENTRY;
+
+ OBD_ALLOC_PTR(ed->ed_cl_seq);
+ if (ed->ed_cl_seq == NULL)
+ RETURN(-ENOMEM);
+
+ OBD_ALLOC(prefix, MAX_OBD_NAME + 5);
+ if (prefix == NULL)
+ GOTO(out_free_seq, rc = -ENOMEM);
+
+ snprintf(prefix, MAX_OBD_NAME + 5, "srv-%s", obd_name);
+
+ /* Init client side sequence-manager */
+ rc = seq_client_init(ed->ed_cl_seq, NULL,
+ LUSTRE_SEQ_METADATA,
+ prefix, ss->ss_server_seq);
+ ed->ed_cl_seq->lcs_width = ECHO_SEQ_WIDTH;
+ OBD_FREE(prefix, MAX_OBD_NAME + 5);
+ if (rc)
+ GOTO(out_free_seq, rc);
+
+ RETURN(0);
+
+out_free_seq:
+ OBD_FREE_PTR(ed->ed_cl_seq);
+ ed->ed_cl_seq = NULL;
+ RETURN(rc);
+}
+
+static int echo_fid_fini(struct obd_device *obddev)
+{
+ struct echo_device *ed = obd2echo_dev(obddev);
+ ENTRY;
+
+ if (ed->ed_cl_seq != NULL) {
+ seq_client_fini(ed->ed_cl_seq);
+ OBD_FREE_PTR(ed->ed_cl_seq);
+ ed->ed_cl_seq = NULL;
+ }
+
+ RETURN(0);
+}
+
+static struct lu_device *echo_device_alloc(const struct lu_env *env,
+ struct lu_device_type *t,
+ struct lustre_cfg *cfg)
+{
+ struct lu_device *next;
+ struct echo_device *ed;
+ struct cl_device *cd;
+ struct obd_device *obd = NULL; /* to keep compiler happy */
+ struct obd_device *tgt;
+ const char *tgt_type_name;
+ int rc;
+ int cleanup = 0;
+ ENTRY;
+
+ OBD_ALLOC_PTR(ed);
+ if (ed == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ cleanup = 1;
+ cd = &ed->ed_cl;
+ rc = cl_device_init(cd, t);
+ if (rc)
+ GOTO(out, rc);
+
+ cd->cd_lu_dev.ld_ops = &echo_device_lu_ops;
+ cd->cd_ops = &echo_device_cl_ops;
+
+ cleanup = 2;
+ obd = class_name2obd(lustre_cfg_string(cfg, 0));
+ LASSERT(obd != NULL);
+ LASSERT(env != NULL);
+
+ tgt = class_name2obd(lustre_cfg_string(cfg, 1));
+ if (tgt == NULL) {
+ CERROR("Can not find tgt device %s\n",
+ lustre_cfg_string(cfg, 1));
+ GOTO(out, rc = -ENODEV);
+ }
+
+ next = tgt->obd_lu_dev;
+ if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) {
+ ed->ed_next_ismd = 1;
+ } else {
+ ed->ed_next_ismd = 0;
+ rc = echo_site_init(env, ed);
+ if (rc)
+ GOTO(out, rc);
+ }
+ cleanup = 3;
+
+ rc = echo_client_setup(env, obd, cfg);
+ if (rc)
+ GOTO(out, rc);
+
+ ed->ed_ec = &obd->u.echo_client;
+ cleanup = 4;
+
+ if (ed->ed_next_ismd) {
+ /* Suppose to connect to some Metadata layer */
+ struct lu_site *ls;
+ struct lu_device *ld;
+ int found = 0;
+
+ if (next == NULL) {
+ CERROR("%s is not lu device type!\n",
+ lustre_cfg_string(cfg, 1));
+ GOTO(out, rc = -EINVAL);
+ }
+
+ tgt_type_name = lustre_cfg_string(cfg, 2);
+ if (!tgt_type_name) {
+ CERROR("%s no type name for echo %s setup\n",
+ lustre_cfg_string(cfg, 1),
+ tgt->obd_type->typ_name);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ ls = next->ld_site;
+
+ spin_lock(&ls->ls_ld_lock);
+ list_for_each_entry(ld, &ls->ls_ld_linkage, ld_linkage) {
+ if (strcmp(ld->ld_type->ldt_name, tgt_type_name) == 0) {
+ found = 1;
+ break;
+ }
+ }
+ spin_unlock(&ls->ls_ld_lock);
+
+ if (found == 0) {
+ CERROR("%s is not lu device type!\n",
+ lustre_cfg_string(cfg, 1));
+ GOTO(out, rc = -EINVAL);
+ }
+
+ next = ld;
+ /* For MD echo client, it will use the site in MDS stack */
+ ed->ed_site_myself.cs_lu = *ls;
+ ed->ed_site = &ed->ed_site_myself;
+ ed->ed_cl.cd_lu_dev.ld_site = &ed->ed_site_myself.cs_lu;
+ rc = echo_fid_init(ed, obd->obd_name, lu_site2seq(ls));
+ if (rc) {
+ CERROR("echo fid init error %d\n", rc);
+ GOTO(out, rc);
+ }
+ } else {
+ /* if echo client is to be stacked upon ost device, the next is
+ * NULL since ost is not a clio device so far */
+ if (next != NULL && !lu_device_is_cl(next))
+ next = NULL;
+
+ tgt_type_name = tgt->obd_type->typ_name;
+ if (next != NULL) {
+ LASSERT(next != NULL);
+ if (next->ld_site != NULL)
+ GOTO(out, rc = -EBUSY);
+
+ next->ld_site = &ed->ed_site->cs_lu;
+ rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
+ next->ld_type->ldt_name,
+ NULL);
+ if (rc)
+ GOTO(out, rc);
+
+ /* Tricky case, I have to determine the obd type since
+ * CLIO uses the different parameters to initialize
+ * objects for lov & osc. */
+ if (strcmp(tgt_type_name, LUSTRE_LOV_NAME) == 0)
+ ed->ed_next_islov = 1;
+ else
+ LASSERT(strcmp(tgt_type_name,
+ LUSTRE_OSC_NAME) == 0);
+ } else
+ LASSERT(strcmp(tgt_type_name, LUSTRE_OST_NAME) == 0);
+ }
+
+ ed->ed_next = next;
+ RETURN(&cd->cd_lu_dev);
+out:
+ switch(cleanup) {
+ case 4: {
+ int rc2;
+ rc2 = echo_client_cleanup(obd);
+ if (rc2)
+ CERROR("Cleanup obd device %s error(%d)\n",
+ obd->obd_name, rc2);
+ }
+
+ case 3:
+ echo_site_fini(env, ed);
+ case 2:
+ cl_device_fini(&ed->ed_cl);
+ case 1:
+ OBD_FREE_PTR(ed);
+ case 0:
+ default:
+ break;
+ }
+ return(ERR_PTR(rc));
+}
+
+static int echo_device_init(const struct lu_env *env, struct lu_device *d,
+ const char *name, struct lu_device *next)
+{
+ LBUG();
+ return 0;
+}
+
+static struct lu_device *echo_device_fini(const struct lu_env *env,
+ struct lu_device *d)
+{
+ struct echo_device *ed = cl2echo_dev(lu2cl_dev(d));
+ struct lu_device *next = ed->ed_next;
+
+ while (next && !ed->ed_next_ismd)
+ next = next->ld_type->ldt_ops->ldto_device_fini(env, next);
+ return NULL;
+}
+
+static void echo_lock_release(const struct lu_env *env,
+ struct echo_lock *ecl,
+ int still_used)
+{
+ struct cl_lock *clk = echo_lock2cl(ecl);
+
+ cl_lock_get(clk);
+ cl_unuse(env, clk);
+ cl_lock_release(env, clk, "ec enqueue", ecl->el_object);
+ if (!still_used) {
+ cl_lock_mutex_get(env, clk);
+ cl_lock_cancel(env, clk);
+ cl_lock_delete(env, clk);
+ cl_lock_mutex_put(env, clk);
+ }
+ cl_lock_put(env, clk);
+}
+
+static struct lu_device *echo_device_free(const struct lu_env *env,
+ struct lu_device *d)
+{
+ struct echo_device *ed = cl2echo_dev(lu2cl_dev(d));
+ struct echo_client_obd *ec = ed->ed_ec;
+ struct echo_object *eco;
+ struct lu_device *next = ed->ed_next;
+
+ CDEBUG(D_INFO, "echo device:%p is going to be freed, next = %p\n",
+ ed, next);
+
+ lu_site_purge(env, &ed->ed_site->cs_lu, -1);
+
+ /* check if there are objects still alive.
+ * It shouldn't have any object because lu_site_purge would cleanup
+ * all of cached objects. Anyway, probably the echo device is being
+ * parallelly accessed.
+ */
+ spin_lock(&ec->ec_lock);
+ list_for_each_entry(eco, &ec->ec_objects, eo_obj_chain)
+ eco->eo_deleted = 1;
+ spin_unlock(&ec->ec_lock);
+
+ /* purge again */
+ lu_site_purge(env, &ed->ed_site->cs_lu, -1);
+
+ CDEBUG(D_INFO,
+ "Waiting for the reference of echo object to be dropped\n");
+
+ /* Wait for the last reference to be dropped. */
+ spin_lock(&ec->ec_lock);
+ while (!list_empty(&ec->ec_objects)) {
+ spin_unlock(&ec->ec_lock);
+ CERROR("echo_client still has objects at cleanup time, "
+ "wait for 1 second\n");
+ schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE,
+ cfs_time_seconds(1));
+ lu_site_purge(env, &ed->ed_site->cs_lu, -1);
+ spin_lock(&ec->ec_lock);
+ }
+ spin_unlock(&ec->ec_lock);
+
+ LASSERT(list_empty(&ec->ec_locks));
+
+ CDEBUG(D_INFO, "No object exists, exiting...\n");
+
+ echo_client_cleanup(d->ld_obd);
+ echo_fid_fini(d->ld_obd);
+ while (next && !ed->ed_next_ismd)
+ next = next->ld_type->ldt_ops->ldto_device_free(env, next);
+
+ LASSERT(ed->ed_site == lu2cl_site(d->ld_site));
+ echo_site_fini(env, ed);
+ cl_device_fini(&ed->ed_cl);
+ OBD_FREE_PTR(ed);
+
+ return NULL;
+}
+
+static const struct lu_device_type_operations echo_device_type_ops = {
+ .ldto_init = echo_type_init,
+ .ldto_fini = echo_type_fini,
+
+ .ldto_start = echo_type_start,
+ .ldto_stop = echo_type_stop,
+
+ .ldto_device_alloc = echo_device_alloc,
+ .ldto_device_free = echo_device_free,
+ .ldto_device_init = echo_device_init,
+ .ldto_device_fini = echo_device_fini
+};
+
+static struct lu_device_type echo_device_type = {
+ .ldt_tags = LU_DEVICE_CL,
+ .ldt_name = LUSTRE_ECHO_CLIENT_NAME,
+ .ldt_ops = &echo_device_type_ops,
+ .ldt_ctx_tags = LCT_CL_THREAD | LCT_MD_THREAD | LCT_DT_THREAD,
+};
+/** @} echo_init */
+
+/** \defgroup echo_exports Exported operations
+ *
+ * exporting functions to echo client
+ *
+ * @{
+ */
+
+/* Interfaces to echo client obd device */
+static struct echo_object *cl_echo_object_find(struct echo_device *d,
+ struct lov_stripe_md **lsmp)
+{
+ struct lu_env *env;
+ struct echo_thread_info *info;
+ struct echo_object_conf *conf;
+ struct lov_stripe_md *lsm;
+ struct echo_object *eco;
+ struct cl_object *obj;
+ struct lu_fid *fid;
+ int refcheck;
+ int rc;
+ ENTRY;
+
+ LASSERT(lsmp);
+ lsm = *lsmp;
+ LASSERT(lsm);
+ LASSERTF(ostid_id(&lsm->lsm_oi) != 0, DOSTID"\n", POSTID(&lsm->lsm_oi));
+ LASSERTF(ostid_seq(&lsm->lsm_oi) == FID_SEQ_ECHO, DOSTID"\n",
+ POSTID(&lsm->lsm_oi));
+
+ /* Never return an object if the obd is to be freed. */
+ if (echo_dev2cl(d)->cd_lu_dev.ld_obd->obd_stopping)
+ RETURN(ERR_PTR(-ENODEV));
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN((void *)env);
+
+ info = echo_env_info(env);
+ conf = &info->eti_conf;
+ if (d->ed_next) {
+ if (!d->ed_next_islov) {
+ struct lov_oinfo *oinfo = lsm->lsm_oinfo[0];
+ LASSERT(oinfo != NULL);
+ oinfo->loi_oi = lsm->lsm_oi;
+ conf->eoc_cl.u.coc_oinfo = oinfo;
+ } else {
+ struct lustre_md *md;
+ md = &info->eti_md;
+ memset(md, 0, sizeof *md);
+ md->lsm = lsm;
+ conf->eoc_cl.u.coc_md = md;
+ }
+ }
+ conf->eoc_md = lsmp;
+
+ fid = &info->eti_fid;
+ rc = ostid_to_fid(fid, &lsm->lsm_oi, 0);
+ if (rc != 0)
+ GOTO(out, eco = ERR_PTR(rc));
+
+ /* In the function below, .hs_keycmp resolves to
+ * lu_obj_hop_keycmp() */
+ /* coverity[overrun-buffer-val] */
+ obj = cl_object_find(env, echo_dev2cl(d), fid, &conf->eoc_cl);
+ if (IS_ERR(obj))
+ GOTO(out, eco = (void*)obj);
+
+ eco = cl2echo_obj(obj);
+ if (eco->eo_deleted) {
+ cl_object_put(env, obj);
+ eco = ERR_PTR(-EAGAIN);
+ }
+
+out:
+ cl_env_put(env, &refcheck);
+ RETURN(eco);
+}
+
+static int cl_echo_object_put(struct echo_object *eco)
+{
+ struct lu_env *env;
+ struct cl_object *obj = echo_obj2cl(eco);
+ int refcheck;
+ ENTRY;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ /* an external function to kill an object? */
+ if (eco->eo_deleted) {
+ struct lu_object_header *loh = obj->co_lu.lo_header;
+ LASSERT(&eco->eo_hdr == luh2coh(loh));
+ set_bit(LU_OBJECT_HEARD_BANSHEE, &loh->loh_flags);
+ }
+
+ cl_object_put(env, obj);
+ cl_env_put(env, &refcheck);
+ RETURN(0);
+}
+
+static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
+ obd_off start, obd_off end, int mode,
+ __u64 *cookie , __u32 enqflags)
+{
+ struct cl_io *io;
+ struct cl_lock *lck;
+ struct cl_object *obj;
+ struct cl_lock_descr *descr;
+ struct echo_thread_info *info;
+ int rc = -ENOMEM;
+ ENTRY;
+
+ info = echo_env_info(env);
+ io = &info->eti_io;
+ descr = &info->eti_descr;
+ obj = echo_obj2cl(eco);
+
+ descr->cld_obj = obj;
+ descr->cld_start = cl_index(obj, start);
+ descr->cld_end = cl_index(obj, end);
+ descr->cld_mode = mode == LCK_PW ? CLM_WRITE : CLM_READ;
+ descr->cld_enq_flags = enqflags;
+ io->ci_obj = obj;
+
+ lck = cl_lock_request(env, io, descr, "ec enqueue", eco);
+ if (lck) {
+ struct echo_client_obd *ec = eco->eo_dev->ed_ec;
+ struct echo_lock *el;
+
+ rc = cl_wait(env, lck);
+ if (rc == 0) {
+ el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
+ spin_lock(&ec->ec_lock);
+ if (list_empty(&el->el_chain)) {
+ list_add(&el->el_chain, &ec->ec_locks);
+ el->el_cookie = ++ec->ec_unique;
+ }
+ atomic_inc(&el->el_refcount);
+ *cookie = el->el_cookie;
+ spin_unlock(&ec->ec_lock);
+ } else {
+ cl_lock_release(env, lck, "ec enqueue", current);
+ }
+ }
+ RETURN(rc);
+}
+
+static int cl_echo_enqueue(struct echo_object *eco, obd_off start, obd_off end,
+ int mode, __u64 *cookie)
+{
+ struct echo_thread_info *info;
+ struct lu_env *env;
+ struct cl_io *io;
+ int refcheck;
+ int result;
+ ENTRY;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ info = echo_env_info(env);
+ io = &info->eti_io;
+
+ io->ci_ignore_layout = 1;
+ result = cl_io_init(env, io, CIT_MISC, echo_obj2cl(eco));
+ if (result < 0)
+ GOTO(out, result);
+ LASSERT(result == 0);
+
+ result = cl_echo_enqueue0(env, eco, start, end, mode, cookie, 0);
+ cl_io_fini(env, io);
+
+ EXIT;
+out:
+ cl_env_put(env, &refcheck);
+ return result;
+}
+
+static int cl_echo_cancel0(struct lu_env *env, struct echo_device *ed,
+ __u64 cookie)
+{
+ struct echo_client_obd *ec = ed->ed_ec;
+ struct echo_lock *ecl = NULL;
+ struct list_head *el;
+ int found = 0, still_used = 0;
+ ENTRY;
+
+ LASSERT(ec != NULL);
+ spin_lock(&ec->ec_lock);
+ list_for_each (el, &ec->ec_locks) {
+ ecl = list_entry (el, struct echo_lock, el_chain);
+ CDEBUG(D_INFO, "ecl: %p, cookie: "LPX64"\n", ecl, ecl->el_cookie);
+ found = (ecl->el_cookie == cookie);
+ if (found) {
+ if (atomic_dec_and_test(&ecl->el_refcount))
+ list_del_init(&ecl->el_chain);
+ else
+ still_used = 1;
+ break;
+ }
+ }
+ spin_unlock(&ec->ec_lock);
+
+ if (!found)
+ RETURN(-ENOENT);
+
+ echo_lock_release(env, ecl, still_used);
+ RETURN(0);
+}
+
+static int cl_echo_cancel(struct echo_device *ed, __u64 cookie)
+{
+ struct lu_env *env;
+ int refcheck;
+ int rc;
+ ENTRY;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ rc = cl_echo_cancel0(env, ed, cookie);
+
+ cl_env_put(env, &refcheck);
+ RETURN(rc);
+}
+
+static int cl_echo_async_brw(const struct lu_env *env, struct cl_io *io,
+ enum cl_req_type unused, struct cl_2queue *queue)
+{
+ struct cl_page *clp;
+ struct cl_page *temp;
+ int result = 0;
+ ENTRY;
+
+ cl_page_list_for_each_safe(clp, temp, &queue->c2_qin) {
+ int rc;
+ rc = cl_page_cache_add(env, io, clp, CRT_WRITE);
+ if (rc == 0)
+ continue;
+ result = result ?: rc;
+ }
+ RETURN(result);
+}
+
+static int cl_echo_object_brw(struct echo_object *eco, int rw, obd_off offset,
+ struct page **pages, int npages, int async)
+{
+ struct lu_env *env;
+ struct echo_thread_info *info;
+ struct cl_object *obj = echo_obj2cl(eco);
+ struct echo_device *ed = eco->eo_dev;
+ struct cl_2queue *queue;
+ struct cl_io *io;
+ struct cl_page *clp;
+ struct lustre_handle lh = { 0 };
+ int page_size = cl_page_size(obj);
+ int refcheck;
+ int rc;
+ int i;
+ ENTRY;
+
+ LASSERT((offset & ~CFS_PAGE_MASK) == 0);
+ LASSERT(ed->ed_next != NULL);
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ info = echo_env_info(env);
+ io = &info->eti_io;
+ queue = &info->eti_queue;
+
+ cl_2queue_init(queue);
+
+ io->ci_ignore_layout = 1;
+ rc = cl_io_init(env, io, CIT_MISC, obj);
+ if (rc < 0)
+ GOTO(out, rc);
+ LASSERT(rc == 0);
+
+
+ rc = cl_echo_enqueue0(env, eco, offset,
+ offset + npages * PAGE_CACHE_SIZE - 1,
+ rw == READ ? LCK_PR : LCK_PW, &lh.cookie,
+ CEF_NEVER);
+ if (rc < 0)
+ GOTO(error_lock, rc);
+
+ for (i = 0; i < npages; i++) {
+ LASSERT(pages[i]);
+ clp = cl_page_find(env, obj, cl_index(obj, offset),
+ pages[i], CPT_TRANSIENT);
+ if (IS_ERR(clp)) {
+ rc = PTR_ERR(clp);
+ break;
+ }
+ LASSERT(clp->cp_type == CPT_TRANSIENT);
+
+ rc = cl_page_own(env, io, clp);
+ if (rc) {
+ LASSERT(clp->cp_state == CPS_FREEING);
+ cl_page_put(env, clp);
+ break;
+ }
+
+ cl_2queue_add(queue, clp);
+
+ /* drop the reference count for cl_page_find, so that the page
+ * will be freed in cl_2queue_fini. */
+ cl_page_put(env, clp);
+ cl_page_clip(env, clp, 0, page_size);
+
+ offset += page_size;
+ }
+
+ if (rc == 0) {
+ enum cl_req_type typ = rw == READ ? CRT_READ : CRT_WRITE;
+
+ async = async && (typ == CRT_WRITE);
+ if (async)
+ rc = cl_echo_async_brw(env, io, typ, queue);
+ else
+ rc = cl_io_submit_sync(env, io, typ, queue, 0);
+ CDEBUG(D_INFO, "echo_client %s write returns %d\n",
+ async ? "async" : "sync", rc);
+ }
+
+ cl_echo_cancel0(env, ed, lh.cookie);
+ EXIT;
+error_lock:
+ cl_2queue_discard(env, io, queue);
+ cl_2queue_disown(env, io, queue);
+ cl_2queue_fini(env, queue);
+ cl_io_fini(env, io);
+out:
+ cl_env_put(env, &refcheck);
+ return rc;
+}
+/** @} echo_exports */
+
+
+static obd_id last_object_id;
+
+static int
+echo_copyout_lsm (struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob)
+{
+ struct lov_stripe_md *ulsm = _ulsm;
+ int nob, i;
+
+ nob = offsetof (struct lov_stripe_md, lsm_oinfo[lsm->lsm_stripe_count]);
+ if (nob > ulsm_nob)
+ return (-EINVAL);
+
+ if (copy_to_user (ulsm, lsm, sizeof(ulsm)))
+ return (-EFAULT);
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ if (copy_to_user (ulsm->lsm_oinfo[i], lsm->lsm_oinfo[i],
+ sizeof(lsm->lsm_oinfo[0])))
+ return (-EFAULT);
+ }
+ return 0;
+}
+
+static int
+echo_copyin_lsm (struct echo_device *ed, struct lov_stripe_md *lsm,
+ void *ulsm, int ulsm_nob)
+{
+ struct echo_client_obd *ec = ed->ed_ec;
+ int i;
+
+ if (ulsm_nob < sizeof (*lsm))
+ return (-EINVAL);
+
+ if (copy_from_user (lsm, ulsm, sizeof (*lsm)))
+ return (-EFAULT);
+
+ if (lsm->lsm_stripe_count > ec->ec_nstripes ||
+ lsm->lsm_magic != LOV_MAGIC ||
+ (lsm->lsm_stripe_size & (~CFS_PAGE_MASK)) != 0 ||
+ ((__u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL))
+ return (-EINVAL);
+
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ if (copy_from_user(lsm->lsm_oinfo[i],
+ ((struct lov_stripe_md *)ulsm)-> \
+ lsm_oinfo[i],
+ sizeof(lsm->lsm_oinfo[0])))
+ return (-EFAULT);
+ }
+ return (0);
+}
+
+static inline void echo_md_build_name(struct lu_name *lname, char *name,
+ __u64 id)
+{
+ sprintf(name, LPU64, id);
+ lname->ln_name = name;
+ lname->ln_namelen = strlen(name);
+}
+
+/* similar to mdt_attr_get_complex */
+static int echo_big_lmm_get(const struct lu_env *env, struct md_object *o,
+ struct md_attr *ma)
+{
+ struct echo_thread_info *info = echo_env_info(env);
+ int rc;
+
+ ENTRY;
+
+ LASSERT(ma->ma_lmm_size > 0);
+
+ rc = mo_xattr_get(env, o, &LU_BUF_NULL, XATTR_NAME_LOV);
+ if (rc < 0)
+ RETURN(rc);
+
+ /* big_lmm may need to be grown */
+ if (info->eti_big_lmmsize < rc) {
+ int size = size_roundup_power2(rc);
+
+ if (info->eti_big_lmmsize > 0) {
+ /* free old buffer */
+ LASSERT(info->eti_big_lmm);
+ OBD_FREE_LARGE(info->eti_big_lmm,
+ info->eti_big_lmmsize);
+ info->eti_big_lmm = NULL;
+ info->eti_big_lmmsize = 0;
+ }
+
+ OBD_ALLOC_LARGE(info->eti_big_lmm, size);
+ if (info->eti_big_lmm == NULL)
+ RETURN(-ENOMEM);
+ info->eti_big_lmmsize = size;
+ }
+ LASSERT(info->eti_big_lmmsize >= rc);
+
+ info->eti_buf.lb_buf = info->eti_big_lmm;
+ info->eti_buf.lb_len = info->eti_big_lmmsize;
+ rc = mo_xattr_get(env, o, &info->eti_buf, XATTR_NAME_LOV);
+ if (rc < 0)
+ RETURN(rc);
+
+ ma->ma_valid |= MA_LOV;
+ ma->ma_lmm = info->eti_big_lmm;
+ ma->ma_lmm_size = rc;
+
+ RETURN(0);
+}
+
+int echo_attr_get_complex(const struct lu_env *env, struct md_object *next,
+ struct md_attr *ma)
+{
+ struct echo_thread_info *info = echo_env_info(env);
+ struct lu_buf *buf = &info->eti_buf;
+ umode_t mode = lu_object_attr(&next->mo_lu);
+ int need = ma->ma_need;
+ int rc = 0, rc2;
+
+ ENTRY;
+
+ ma->ma_valid = 0;
+
+ if (need & MA_INODE) {
+ ma->ma_need = MA_INODE;
+ rc = mo_attr_get(env, next, ma);
+ if (rc)
+ GOTO(out, rc);
+ ma->ma_valid |= MA_INODE;
+ }
+
+ if (need & MA_LOV) {
+ if (S_ISREG(mode) || S_ISDIR(mode)) {
+ LASSERT(ma->ma_lmm_size > 0);
+ buf->lb_buf = ma->ma_lmm;
+ buf->lb_len = ma->ma_lmm_size;
+ rc2 = mo_xattr_get(env, next, buf, XATTR_NAME_LOV);
+ if (rc2 > 0) {
+ ma->ma_lmm_size = rc2;
+ ma->ma_valid |= MA_LOV;
+ } else if (rc2 == -ENODATA) {
+ /* no LOV EA */
+ ma->ma_lmm_size = 0;
+ } else if (rc2 == -ERANGE) {
+ rc2 = echo_big_lmm_get(env, next, ma);
+ if (rc2 < 0)
+ GOTO(out, rc = rc2);
+ } else {
+ GOTO(out, rc = rc2);
+ }
+ }
+ }
+
+#ifdef CONFIG_FS_POSIX_ACL
+ if (need & MA_ACL_DEF && S_ISDIR(mode)) {
+ buf->lb_buf = ma->ma_acl;
+ buf->lb_len = ma->ma_acl_size;
+ rc2 = mo_xattr_get(env, next, buf, XATTR_NAME_ACL_DEFAULT);
+ if (rc2 > 0) {
+ ma->ma_acl_size = rc2;
+ ma->ma_valid |= MA_ACL_DEF;
+ } else if (rc2 == -ENODATA) {
+ /* no ACLs */
+ ma->ma_acl_size = 0;
+ } else {
+ GOTO(out, rc = rc2);
+ }
+ }
+#endif
+out:
+ ma->ma_need = need;
+ CDEBUG(D_INODE, "after getattr rc = %d, ma_valid = "LPX64" ma_lmm=%p\n",
+ rc, ma->ma_valid, ma->ma_lmm);
+ RETURN(rc);
+}
+
+static int
+echo_md_create_internal(const struct lu_env *env, struct echo_device *ed,
+ struct md_object *parent, struct lu_fid *fid,
+ struct lu_name *lname, struct md_op_spec *spec,
+ struct md_attr *ma)
+{
+ struct lu_object *ec_child, *child;
+ struct lu_device *ld = ed->ed_next;
+ struct echo_thread_info *info = echo_env_info(env);
+ struct lu_fid *fid2 = &info->eti_fid2;
+ struct lu_object_conf conf = { .loc_flags = LOC_F_NEW };
+ int rc;
+
+ ENTRY;
+
+ rc = mdo_lookup(env, parent, lname, fid2, spec);
+ if (rc == 0)
+ return -EEXIST;
+ else if (rc != -ENOENT)
+ return rc;
+
+ ec_child = lu_object_find_at(env, &ed->ed_cl.cd_lu_dev,
+ fid, &conf);
+ if (IS_ERR(ec_child)) {
+ CERROR("Can not find the child "DFID": rc = %ld\n", PFID(fid),
+ PTR_ERR(ec_child));
+ RETURN(PTR_ERR(ec_child));
+ }
+
+ child = lu_object_locate(ec_child->lo_header, ld->ld_type);
+ if (child == NULL) {
+ CERROR("Can not locate the child "DFID"\n", PFID(fid));
+ GOTO(out_put, rc = -EINVAL);
+ }
+
+ CDEBUG(D_RPCTRACE, "Start creating object "DFID" %s %p\n",
+ PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent);
+
+ /*
+ * Do not perform lookup sanity check. We know that name does not exist.
+ */
+ spec->sp_cr_lookup = 0;
+ rc = mdo_create(env, parent, lname, lu2md(child), spec, ma);
+ if (rc) {
+ CERROR("Can not create child "DFID": rc = %d\n", PFID(fid), rc);
+ GOTO(out_put, rc);
+ }
+ CDEBUG(D_RPCTRACE, "End creating object "DFID" %s %p rc = %d\n",
+ PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent, rc);
+ EXIT;
+out_put:
+ lu_object_put(env, ec_child);
+ return rc;
+}
+
+static int echo_set_lmm_size(const struct lu_env *env, struct lu_device *ld,
+ struct md_attr *ma)
+{
+ struct echo_thread_info *info = echo_env_info(env);
+
+ if (strcmp(ld->ld_type->ldt_name, LUSTRE_MDD_NAME)) {
+ ma->ma_lmm = (void *)&info->eti_lmm;
+ ma->ma_lmm_size = sizeof(info->eti_lmm);
+ } else {
+ LASSERT(info->eti_big_lmmsize);
+ ma->ma_lmm = info->eti_big_lmm;
+ ma->ma_lmm_size = info->eti_big_lmmsize;
+ }
+
+ return 0;
+}
+
+static int echo_create_md_object(const struct lu_env *env,
+ struct echo_device *ed,
+ struct lu_object *ec_parent,
+ struct lu_fid *fid,
+ char *name, int namelen,
+ __u64 id, __u32 mode, int count,
+ int stripe_count, int stripe_offset)
+{
+ struct lu_object *parent;
+ struct echo_thread_info *info = echo_env_info(env);
+ struct lu_name *lname = &info->eti_lname;
+ struct md_op_spec *spec = &info->eti_spec;
+ struct md_attr *ma = &info->eti_ma;
+ struct lu_device *ld = ed->ed_next;
+ int rc = 0;
+ int i;
+
+ ENTRY;
+
+ if (ec_parent == NULL)
+ return -1;
+ parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
+ if (parent == NULL)
+ RETURN(-ENXIO);
+
+ memset(ma, 0, sizeof(*ma));
+ memset(spec, 0, sizeof(*spec));
+ if (stripe_count != 0) {
+ spec->sp_cr_flags |= FMODE_WRITE;
+ echo_set_lmm_size(env, ld, ma);
+ if (stripe_count != -1) {
+ struct lov_user_md_v3 *lum = &info->eti_lum;
+
+ lum->lmm_magic = LOV_USER_MAGIC_V3;
+ lum->lmm_stripe_count = stripe_count;
+ lum->lmm_stripe_offset = stripe_offset;
+ lum->lmm_pattern = 0;
+ spec->u.sp_ea.eadata = lum;
+ spec->u.sp_ea.eadatalen = sizeof(*lum);
+ spec->sp_cr_flags |= MDS_OPEN_HAS_EA;
+ }
+ }
+
+ ma->ma_attr.la_mode = mode;
+ ma->ma_attr.la_valid = LA_CTIME | LA_MODE;
+ ma->ma_attr.la_ctime = cfs_time_current_64();
+
+ if (name != NULL) {
+ lname->ln_name = name;
+ lname->ln_namelen = namelen;
+ /* If name is specified, only create one object by name */
+ rc = echo_md_create_internal(env, ed, lu2md(parent), fid, lname,
+ spec, ma);
+ RETURN(rc);
+ }
+
+ /* Create multiple object sequenced by id */
+ for (i = 0; i < count; i++) {
+ char *tmp_name = info->eti_name;
+
+ echo_md_build_name(lname, tmp_name, id);
+
+ rc = echo_md_create_internal(env, ed, lu2md(parent), fid, lname,
+ spec, ma);
+ if (rc) {
+ CERROR("Can not create child %s: rc = %d\n", tmp_name,
+ rc);
+ break;
+ }
+ id++;
+ fid->f_oid++;
+ }
+
+ RETURN(rc);
+}
+
+static struct lu_object *echo_md_lookup(const struct lu_env *env,
+ struct echo_device *ed,
+ struct md_object *parent,
+ struct lu_name *lname)
+{
+ struct echo_thread_info *info = echo_env_info(env);
+ struct lu_fid *fid = &info->eti_fid;
+ struct lu_object *child;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_INFO, "lookup %s in parent "DFID" %p\n", lname->ln_name,
+ PFID(fid), parent);
+ rc = mdo_lookup(env, parent, lname, fid, NULL);
+ if (rc) {
+ CERROR("lookup %s: rc = %d\n", lname->ln_name, rc);
+ RETURN(ERR_PTR(rc));
+ }
+
+ /* In the function below, .hs_keycmp resolves to
+ * lu_obj_hop_keycmp() */
+ /* coverity[overrun-buffer-val] */
+ child = lu_object_find_at(env, &ed->ed_cl.cd_lu_dev, fid, NULL);
+
+ RETURN(child);
+}
+
+static int echo_setattr_object(const struct lu_env *env,
+ struct echo_device *ed,
+ struct lu_object *ec_parent,
+ __u64 id, int count)
+{
+ struct lu_object *parent;
+ struct echo_thread_info *info = echo_env_info(env);
+ struct lu_name *lname = &info->eti_lname;
+ char *name = info->eti_name;
+ struct lu_device *ld = ed->ed_next;
+ struct lu_buf *buf = &info->eti_buf;
+ int rc = 0;
+ int i;
+
+ ENTRY;
+
+ if (ec_parent == NULL)
+ return -1;
+ parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
+ if (parent == NULL)
+ RETURN(-ENXIO);
+
+ for (i = 0; i < count; i++) {
+ struct lu_object *ec_child, *child;
+
+ echo_md_build_name(lname, name, id);
+
+ ec_child = echo_md_lookup(env, ed, lu2md(parent), lname);
+ if (IS_ERR(ec_child)) {
+ CERROR("Can't find child %s: rc = %ld\n",
+ lname->ln_name, PTR_ERR(ec_child));
+ RETURN(PTR_ERR(ec_child));
+ }
+
+ child = lu_object_locate(ec_child->lo_header, ld->ld_type);
+ if (child == NULL) {
+ CERROR("Can not locate the child %s\n", lname->ln_name);
+ lu_object_put(env, ec_child);
+ rc = -EINVAL;
+ break;
+ }
+
+ CDEBUG(D_RPCTRACE, "Start setattr object "DFID"\n",
+ PFID(lu_object_fid(child)));
+
+ buf->lb_buf = info->eti_xattr_buf;
+ buf->lb_len = sizeof(info->eti_xattr_buf);
+
+ sprintf(name, "%s.test1", XATTR_USER_PREFIX);
+ rc = mo_xattr_set(env, lu2md(child), buf, name,
+ LU_XATTR_CREATE);
+ if (rc < 0) {
+ CERROR("Can not setattr child "DFID": rc = %d\n",
+ PFID(lu_object_fid(child)), rc);
+ lu_object_put(env, ec_child);
+ break;
+ }
+ CDEBUG(D_RPCTRACE, "End setattr object "DFID"\n",
+ PFID(lu_object_fid(child)));
+ id++;
+ lu_object_put(env, ec_child);
+ }
+ RETURN(rc);
+}
+
+static int echo_getattr_object(const struct lu_env *env,
+ struct echo_device *ed,
+ struct lu_object *ec_parent,
+ __u64 id, int count)
+{
+ struct lu_object *parent;
+ struct echo_thread_info *info = echo_env_info(env);
+ struct lu_name *lname = &info->eti_lname;
+ char *name = info->eti_name;
+ struct md_attr *ma = &info->eti_ma;
+ struct lu_device *ld = ed->ed_next;
+ int rc = 0;
+ int i;
+
+ ENTRY;
+
+ if (ec_parent == NULL)
+ return -1;
+ parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
+ if (parent == NULL)
+ RETURN(-ENXIO);
+
+ memset(ma, 0, sizeof(*ma));
+ ma->ma_need |= MA_INODE | MA_LOV | MA_PFID | MA_HSM | MA_ACL_DEF;
+ ma->ma_acl = info->eti_xattr_buf;
+ ma->ma_acl_size = sizeof(info->eti_xattr_buf);
+
+ for (i = 0; i < count; i++) {
+ struct lu_object *ec_child, *child;
+
+ ma->ma_valid = 0;
+ echo_md_build_name(lname, name, id);
+ echo_set_lmm_size(env, ld, ma);
+
+ ec_child = echo_md_lookup(env, ed, lu2md(parent), lname);
+ if (IS_ERR(ec_child)) {
+ CERROR("Can't find child %s: rc = %ld\n",
+ lname->ln_name, PTR_ERR(ec_child));
+ RETURN(PTR_ERR(ec_child));
+ }
+
+ child = lu_object_locate(ec_child->lo_header, ld->ld_type);
+ if (child == NULL) {
+ CERROR("Can not locate the child %s\n", lname->ln_name);
+ lu_object_put(env, ec_child);
+ RETURN(-EINVAL);
+ }
+
+ CDEBUG(D_RPCTRACE, "Start getattr object "DFID"\n",
+ PFID(lu_object_fid(child)));
+ rc = echo_attr_get_complex(env, lu2md(child), ma);
+ if (rc) {
+ CERROR("Can not getattr child "DFID": rc = %d\n",
+ PFID(lu_object_fid(child)), rc);
+ lu_object_put(env, ec_child);
+ break;
+ }
+ CDEBUG(D_RPCTRACE, "End getattr object "DFID"\n",
+ PFID(lu_object_fid(child)));
+ id++;
+ lu_object_put(env, ec_child);
+ }
+
+ RETURN(rc);
+}
+
+static int echo_lookup_object(const struct lu_env *env,
+ struct echo_device *ed,
+ struct lu_object *ec_parent,
+ __u64 id, int count)
+{
+ struct lu_object *parent;
+ struct echo_thread_info *info = echo_env_info(env);
+ struct lu_name *lname = &info->eti_lname;
+ char *name = info->eti_name;
+ struct lu_fid *fid = &info->eti_fid;
+ struct lu_device *ld = ed->ed_next;
+ int rc = 0;
+ int i;
+
+ if (ec_parent == NULL)
+ return -1;
+ parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
+ if (parent == NULL)
+ return -ENXIO;
+
+ /*prepare the requests*/
+ for (i = 0; i < count; i++) {
+ echo_md_build_name(lname, name, id);
+
+ CDEBUG(D_RPCTRACE, "Start lookup object "DFID" %s %p\n",
+ PFID(lu_object_fid(parent)), lname->ln_name, parent);
+
+ rc = mdo_lookup(env, lu2md(parent), lname, fid, NULL);
+ if (rc) {
+ CERROR("Can not lookup child %s: rc = %d\n", name, rc);
+ break;
+ }
+ CDEBUG(D_RPCTRACE, "End lookup object "DFID" %s %p\n",
+ PFID(lu_object_fid(parent)), lname->ln_name, parent);
+
+ id++;
+ }
+ return rc;
+}
+
+static int echo_md_destroy_internal(const struct lu_env *env,
+ struct echo_device *ed,
+ struct md_object *parent,
+ struct lu_name *lname,
+ struct md_attr *ma)
+{
+ struct lu_device *ld = ed->ed_next;
+ struct lu_object *ec_child;
+ struct lu_object *child;
+ int rc;
+
+ ENTRY;
+
+ ec_child = echo_md_lookup(env, ed, parent, lname);
+ if (IS_ERR(ec_child)) {
+ CERROR("Can't find child %s: rc = %ld\n", lname->ln_name,
+ PTR_ERR(ec_child));
+ RETURN(PTR_ERR(ec_child));
+ }
+
+ child = lu_object_locate(ec_child->lo_header, ld->ld_type);
+ if (child == NULL) {
+ CERROR("Can not locate the child %s\n", lname->ln_name);
+ GOTO(out_put, rc = -EINVAL);
+ }
+
+ CDEBUG(D_RPCTRACE, "Start destroy object "DFID" %s %p\n",
+ PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent);
+
+ rc = mdo_unlink(env, parent, lu2md(child), lname, ma, 0);
+ if (rc) {
+ CERROR("Can not unlink child %s: rc = %d\n",
+ lname->ln_name, rc);
+ GOTO(out_put, rc);
+ }
+ CDEBUG(D_RPCTRACE, "End destroy object "DFID" %s %p\n",
+ PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent);
+out_put:
+ lu_object_put(env, ec_child);
+ return rc;
+}
+
+static int echo_destroy_object(const struct lu_env *env,
+ struct echo_device *ed,
+ struct lu_object *ec_parent,
+ char *name, int namelen,
+ __u64 id, __u32 mode,
+ int count)
+{
+ struct echo_thread_info *info = echo_env_info(env);
+ struct lu_name *lname = &info->eti_lname;
+ struct md_attr *ma = &info->eti_ma;
+ struct lu_device *ld = ed->ed_next;
+ struct lu_object *parent;
+ int rc = 0;
+ int i;
+ ENTRY;
+
+ parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
+ if (parent == NULL)
+ RETURN(-EINVAL);
+
+ memset(ma, 0, sizeof(*ma));
+ ma->ma_attr.la_mode = mode;
+ ma->ma_attr.la_valid = LA_CTIME;
+ ma->ma_attr.la_ctime = cfs_time_current_64();
+ ma->ma_need = MA_INODE;
+ ma->ma_valid = 0;
+
+ if (name != NULL) {
+ lname->ln_name = name;
+ lname->ln_namelen = namelen;
+ rc = echo_md_destroy_internal(env, ed, lu2md(parent), lname,
+ ma);
+ RETURN(rc);
+ }
+
+ /*prepare the requests*/
+ for (i = 0; i < count; i++) {
+ char *tmp_name = info->eti_name;
+
+ ma->ma_valid = 0;
+ echo_md_build_name(lname, tmp_name, id);
+
+ rc = echo_md_destroy_internal(env, ed, lu2md(parent), lname,
+ ma);
+ if (rc) {
+ CERROR("Can not unlink child %s: rc = %d\n", name, rc);
+ break;
+ }
+ id++;
+ }
+
+ RETURN(rc);
+}
+
+static struct lu_object *echo_resolve_path(const struct lu_env *env,
+ struct echo_device *ed, char *path,
+ int path_len)
+{
+ struct lu_device *ld = ed->ed_next;
+ struct md_device *md = lu2md_dev(ld);
+ struct echo_thread_info *info = echo_env_info(env);
+ struct lu_fid *fid = &info->eti_fid;
+ struct lu_name *lname = &info->eti_lname;
+ struct lu_object *parent = NULL;
+ struct lu_object *child = NULL;
+ int rc = 0;
+ ENTRY;
+
+ /*Only support MDD layer right now*/
+ rc = md->md_ops->mdo_root_get(env, md, fid);
+ if (rc) {
+ CERROR("get root error: rc = %d\n", rc);
+ RETURN(ERR_PTR(rc));
+ }
+
+ /* In the function below, .hs_keycmp resolves to
+ * lu_obj_hop_keycmp() */
+ /* coverity[overrun-buffer-val] */
+ parent = lu_object_find_at(env, &ed->ed_cl.cd_lu_dev, fid, NULL);
+ if (IS_ERR(parent)) {
+ CERROR("Can not find the parent "DFID": rc = %ld\n",
+ PFID(fid), PTR_ERR(parent));
+ RETURN(parent);
+ }
+
+ while (1) {
+ struct lu_object *ld_parent;
+ char *e;
+
+ e = strsep(&path, "/");
+ if (e == NULL)
+ break;
+
+ if (e[0] == 0) {
+ if (!path || path[0] == '\0')
+ break;
+ continue;
+ }
+
+ lname->ln_name = e;
+ lname->ln_namelen = strlen(e);
+
+ ld_parent = lu_object_locate(parent->lo_header, ld->ld_type);
+ if (ld_parent == NULL) {
+ lu_object_put(env, parent);
+ rc = -EINVAL;
+ break;
+ }
+
+ child = echo_md_lookup(env, ed, lu2md(ld_parent), lname);
+ lu_object_put(env, parent);
+ if (IS_ERR(child)) {
+ rc = (int)PTR_ERR(child);
+ CERROR("lookup %s under parent "DFID": rc = %d\n",
+ lname->ln_name, PFID(lu_object_fid(ld_parent)),
+ rc);
+ break;
+ }
+ parent = child;
+ }
+ if (rc)
+ RETURN(ERR_PTR(rc));
+
+ RETURN(parent);
+}
+
+static void echo_ucred_init(struct lu_env *env)
+{
+ struct lu_ucred *ucred = lu_ucred(env);
+
+ ucred->uc_valid = UCRED_INVALID;
+
+ ucred->uc_suppgids[0] = -1;
+ ucred->uc_suppgids[1] = -1;
+
+ ucred->uc_uid = ucred->uc_o_uid = current_uid();
+ ucred->uc_gid = ucred->uc_o_gid = current_gid();
+ ucred->uc_fsuid = ucred->uc_o_fsuid = current_fsuid();
+ ucred->uc_fsgid = ucred->uc_o_fsgid = current_fsgid();
+ ucred->uc_cap = cfs_curproc_cap_pack();
+
+ /* remove fs privilege for non-root user. */
+ if (ucred->uc_fsuid)
+ ucred->uc_cap &= ~CFS_CAP_FS_MASK;
+ ucred->uc_valid = UCRED_NEW;
+}
+
+static void echo_ucred_fini(struct lu_env *env)
+{
+ struct lu_ucred *ucred = lu_ucred(env);
+ ucred->uc_valid = UCRED_INIT;
+}
+
+#define ECHO_MD_CTX_TAG (LCT_REMEMBER | LCT_MD_THREAD)
+#define ECHO_MD_SES_TAG (LCT_REMEMBER | LCT_SESSION)
+static int echo_md_handler(struct echo_device *ed, int command,
+ char *path, int path_len, __u64 id, int count,
+ struct obd_ioctl_data *data)
+{
+ struct echo_thread_info *info;
+ struct lu_device *ld = ed->ed_next;
+ struct lu_env *env;
+ int refcheck;
+ struct lu_object *parent;
+ char *name = NULL;
+ int namelen = data->ioc_plen2;
+ int rc = 0;
+ ENTRY;
+
+ if (ld == NULL) {
+ CERROR("MD echo client is not being initialized properly\n");
+ RETURN(-EINVAL);
+ }
+
+ if (strcmp(ld->ld_type->ldt_name, LUSTRE_MDD_NAME)) {
+ CERROR("Only support MDD layer right now!\n");
+ RETURN(-EINVAL);
+ }
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ rc = lu_env_refill_by_tags(env, ECHO_MD_CTX_TAG, ECHO_MD_SES_TAG);
+ if (rc != 0)
+ GOTO(out_env, rc);
+
+ /* init big_lmm buffer */
+ info = echo_env_info(env);
+ LASSERT(info->eti_big_lmm == NULL);
+ OBD_ALLOC_LARGE(info->eti_big_lmm, MIN_MD_SIZE);
+ if (info->eti_big_lmm == NULL)
+ GOTO(out_env, rc = -ENOMEM);
+ info->eti_big_lmmsize = MIN_MD_SIZE;
+
+ parent = echo_resolve_path(env, ed, path, path_len);
+ if (IS_ERR(parent)) {
+ CERROR("Can not resolve the path %s: rc = %ld\n", path,
+ PTR_ERR(parent));
+ GOTO(out_free, rc = PTR_ERR(parent));
+ }
+
+ if (namelen > 0) {
+ OBD_ALLOC(name, namelen + 1);
+ if (name == NULL)
+ GOTO(out_put, rc = -ENOMEM);
+ if (copy_from_user(name, data->ioc_pbuf2, namelen))
+ GOTO(out_name, rc = -EFAULT);
+ }
+
+ echo_ucred_init(env);
+
+ switch (command) {
+ case ECHO_MD_CREATE:
+ case ECHO_MD_MKDIR: {
+ struct echo_thread_info *info = echo_env_info(env);
+ __u32 mode = data->ioc_obdo2.o_mode;
+ struct lu_fid *fid = &info->eti_fid;
+ int stripe_count = (int)data->ioc_obdo2.o_misc;
+ int stripe_index = (int)data->ioc_obdo2.o_stripe_idx;
+
+ rc = ostid_to_fid(fid, &data->ioc_obdo1.o_oi, 0);
+ if (rc != 0)
+ break;
+
+ /* In the function below, .hs_keycmp resolves to
+ * lu_obj_hop_keycmp() */
+ /* coverity[overrun-buffer-val] */
+ rc = echo_create_md_object(env, ed, parent, fid, name, namelen,
+ id, mode, count, stripe_count,
+ stripe_index);
+ break;
+ }
+ case ECHO_MD_DESTROY:
+ case ECHO_MD_RMDIR: {
+ __u32 mode = data->ioc_obdo2.o_mode;
+
+ rc = echo_destroy_object(env, ed, parent, name, namelen,
+ id, mode, count);
+ break;
+ }
+ case ECHO_MD_LOOKUP:
+ rc = echo_lookup_object(env, ed, parent, id, count);
+ break;
+ case ECHO_MD_GETATTR:
+ rc = echo_getattr_object(env, ed, parent, id, count);
+ break;
+ case ECHO_MD_SETATTR:
+ rc = echo_setattr_object(env, ed, parent, id, count);
+ break;
+ default:
+ CERROR("unknown command %d\n", command);
+ rc = -EINVAL;
+ break;
+ }
+ echo_ucred_fini(env);
+
+out_name:
+ if (name != NULL)
+ OBD_FREE(name, namelen + 1);
+out_put:
+ lu_object_put(env, parent);
+out_free:
+ LASSERT(info->eti_big_lmm);
+ OBD_FREE_LARGE(info->eti_big_lmm, info->eti_big_lmmsize);
+ info->eti_big_lmm = NULL;
+ info->eti_big_lmmsize = 0;
+out_env:
+ cl_env_put(env, &refcheck);
+ return rc;
+}
+
+static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
+ int on_target, struct obdo *oa, void *ulsm,
+ int ulsm_nob, struct obd_trans_info *oti)
+{
+ struct echo_object *eco;
+ struct echo_client_obd *ec = ed->ed_ec;
+ struct lov_stripe_md *lsm = NULL;
+ int rc;
+ int created = 0;
+ ENTRY;
+
+ if ((oa->o_valid & OBD_MD_FLID) == 0 && /* no obj id */
+ (on_target || /* set_stripe */
+ ec->ec_nstripes != 0)) { /* LOV */
+ CERROR ("No valid oid\n");
+ RETURN(-EINVAL);
+ }
+
+ rc = echo_alloc_memmd(ed, &lsm);
+ if (rc < 0) {
+ CERROR("Cannot allocate md: rc = %d\n", rc);
+ GOTO(failed, rc);
+ }
+
+ if (ulsm != NULL) {
+ int i, idx;
+
+ rc = echo_copyin_lsm (ed, lsm, ulsm, ulsm_nob);
+ if (rc != 0)
+ GOTO(failed, rc);
+
+ if (lsm->lsm_stripe_count == 0)
+ lsm->lsm_stripe_count = ec->ec_nstripes;
+
+ if (lsm->lsm_stripe_size == 0)
+ lsm->lsm_stripe_size = PAGE_CACHE_SIZE;
+
+ idx = cfs_rand();
+
+ /* setup stripes: indices + default ids if required */
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ if (ostid_id(&lsm->lsm_oinfo[i]->loi_oi) == 0)
+ lsm->lsm_oinfo[i]->loi_oi = lsm->lsm_oi;
+
+ lsm->lsm_oinfo[i]->loi_ost_idx =
+ (idx + i) % ec->ec_nstripes;
+ }
+ }
+
+ /* setup object ID here for !on_target and LOV hint */
+ if (oa->o_valid & OBD_MD_FLID) {
+ LASSERT(oa->o_valid & OBD_MD_FLGROUP);
+ lsm->lsm_oi = oa->o_oi;
+ }
+
+ if (ostid_id(&lsm->lsm_oi) == 0)
+ ostid_set_id(&lsm->lsm_oi, ++last_object_id);
+
+ rc = 0;
+ if (on_target) {
+ /* Only echo objects are allowed to be created */
+ LASSERT((oa->o_valid & OBD_MD_FLGROUP) &&
+ (ostid_seq(&oa->o_oi) == FID_SEQ_ECHO));
+ rc = obd_create(env, ec->ec_exp, oa, &lsm, oti);
+ if (rc != 0) {
+ CERROR("Cannot create objects: rc = %d\n", rc);
+ GOTO(failed, rc);
+ }
+ created = 1;
+ }
+
+ /* See what object ID we were given */
+ oa->o_oi = lsm->lsm_oi;
+ oa->o_valid |= OBD_MD_FLID;
+
+ eco = cl_echo_object_find(ed, &lsm);
+ if (IS_ERR(eco))
+ GOTO(failed, rc = PTR_ERR(eco));
+ cl_echo_object_put(eco);
+
+ CDEBUG(D_INFO, "oa oid "DOSTID"\n", POSTID(&oa->o_oi));
+ EXIT;
+
+ failed:
+ if (created && rc)
+ obd_destroy(env, ec->ec_exp, oa, lsm, oti, NULL, NULL);
+ if (lsm)
+ echo_free_memmd(ed, &lsm);
+ if (rc)
+ CERROR("create object failed with: rc = %d\n", rc);
+ return (rc);
+}
+
+static int echo_get_object(struct echo_object **ecop, struct echo_device *ed,
+ struct obdo *oa)
+{
+ struct lov_stripe_md *lsm = NULL;
+ struct echo_object *eco;
+ int rc;
+ ENTRY;
+
+ if ((oa->o_valid & OBD_MD_FLID) == 0 || ostid_id(&oa->o_oi) == 0) {
+ /* disallow use of object id 0 */
+ CERROR ("No valid oid\n");
+ RETURN(-EINVAL);
+ }
+
+ rc = echo_alloc_memmd(ed, &lsm);
+ if (rc < 0)
+ RETURN(rc);
+
+ lsm->lsm_oi = oa->o_oi;
+ if (!(oa->o_valid & OBD_MD_FLGROUP))
+ ostid_set_seq_echo(&lsm->lsm_oi);
+
+ rc = 0;
+ eco = cl_echo_object_find(ed, &lsm);
+ if (!IS_ERR(eco))
+ *ecop = eco;
+ else
+ rc = PTR_ERR(eco);
+ if (lsm)
+ echo_free_memmd(ed, &lsm);
+ RETURN(rc);
+}
+
+static void echo_put_object(struct echo_object *eco)
+{
+ if (cl_echo_object_put(eco))
+ CERROR("echo client: drop an object failed");
+}
+
+static void
+echo_get_stripe_off_id (struct lov_stripe_md *lsm, obd_off *offp, obd_id *idp)
+{
+ unsigned long stripe_count;
+ unsigned long stripe_size;
+ unsigned long width;
+ unsigned long woffset;
+ int stripe_index;
+ obd_off offset;
+
+ if (lsm->lsm_stripe_count <= 1)
+ return;
+
+ offset = *offp;
+ stripe_size = lsm->lsm_stripe_size;
+ stripe_count = lsm->lsm_stripe_count;
+
+ /* width = # bytes in all stripes */
+ width = stripe_size * stripe_count;
+
+ /* woffset = offset within a width; offset = whole number of widths */
+ woffset = do_div (offset, width);
+
+ stripe_index = woffset / stripe_size;
+
+ *idp = ostid_id(&lsm->lsm_oinfo[stripe_index]->loi_oi);
+ *offp = offset * stripe_size + woffset % stripe_size;
+}
+
+static void
+echo_client_page_debug_setup(struct lov_stripe_md *lsm,
+ struct page *page, int rw, obd_id id,
+ obd_off offset, obd_off count)
+{
+ char *addr;
+ obd_off stripe_off;
+ obd_id stripe_id;
+ int delta;
+
+ /* no partial pages on the client */
+ LASSERT(count == PAGE_CACHE_SIZE);
+
+ addr = kmap(page);
+
+ for (delta = 0; delta < PAGE_CACHE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
+ if (rw == OBD_BRW_WRITE) {
+ stripe_off = offset + delta;
+ stripe_id = id;
+ echo_get_stripe_off_id(lsm, &stripe_off, &stripe_id);
+ } else {
+ stripe_off = 0xdeadbeef00c0ffeeULL;
+ stripe_id = 0xdeadbeef00c0ffeeULL;
+ }
+ block_debug_setup(addr + delta, OBD_ECHO_BLOCK_SIZE,
+ stripe_off, stripe_id);
+ }
+
+ kunmap(page);
+}
+
+static int echo_client_page_debug_check(struct lov_stripe_md *lsm,
+ struct page *page, obd_id id,
+ obd_off offset, obd_off count)
+{
+ obd_off stripe_off;
+ obd_id stripe_id;
+ char *addr;
+ int delta;
+ int rc;
+ int rc2;
+
+ /* no partial pages on the client */
+ LASSERT(count == PAGE_CACHE_SIZE);
+
+ addr = kmap(page);
+
+ for (rc = delta = 0; delta < PAGE_CACHE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
+ stripe_off = offset + delta;
+ stripe_id = id;
+ echo_get_stripe_off_id (lsm, &stripe_off, &stripe_id);
+
+ rc2 = block_debug_check("test_brw",
+ addr + delta, OBD_ECHO_BLOCK_SIZE,
+ stripe_off, stripe_id);
+ if (rc2 != 0) {
+ CERROR ("Error in echo object "LPX64"\n", id);
+ rc = rc2;
+ }
+ }
+
+ kunmap(page);
+ return rc;
+}
+
+static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
+ struct echo_object *eco, obd_off offset,
+ obd_size count, int async,
+ struct obd_trans_info *oti)
+{
+ struct lov_stripe_md *lsm = eco->eo_lsm;
+ obd_count npages;
+ struct brw_page *pga;
+ struct brw_page *pgp;
+ struct page **pages;
+ obd_off off;
+ int i;
+ int rc;
+ int verify;
+ int gfp_mask;
+ int brw_flags = 0;
+ ENTRY;
+
+ verify = (ostid_id(&oa->o_oi) != ECHO_PERSISTENT_OBJID &&
+ (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
+ (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
+
+ gfp_mask = ((ostid_id(&oa->o_oi) & 2) == 0) ? GFP_IOFS : GFP_HIGHUSER;
+
+ LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
+ LASSERT(lsm != NULL);
+ LASSERT(ostid_id(&lsm->lsm_oi) == ostid_id(&oa->o_oi));
+
+ if (count <= 0 ||
+ (count & (~CFS_PAGE_MASK)) != 0)
+ RETURN(-EINVAL);
+
+ /* XXX think again with misaligned I/O */
+ npages = count >> PAGE_CACHE_SHIFT;
+
+ if (rw == OBD_BRW_WRITE)
+ brw_flags = OBD_BRW_ASYNC;
+
+ OBD_ALLOC(pga, npages * sizeof(*pga));
+ if (pga == NULL)
+ RETURN(-ENOMEM);
+
+ OBD_ALLOC(pages, npages * sizeof(*pages));
+ if (pages == NULL) {
+ OBD_FREE(pga, npages * sizeof(*pga));
+ RETURN(-ENOMEM);
+ }
+
+ for (i = 0, pgp = pga, off = offset;
+ i < npages;
+ i++, pgp++, off += PAGE_CACHE_SIZE) {
+
+ LASSERT (pgp->pg == NULL); /* for cleanup */
+
+ rc = -ENOMEM;
+ OBD_PAGE_ALLOC(pgp->pg, gfp_mask);
+ if (pgp->pg == NULL)
+ goto out;
+
+ pages[i] = pgp->pg;
+ pgp->count = PAGE_CACHE_SIZE;
+ pgp->off = off;
+ pgp->flag = brw_flags;
+
+ if (verify)
+ echo_client_page_debug_setup(lsm, pgp->pg, rw,
+ ostid_id(&oa->o_oi), off,
+ pgp->count);
+ }
+
+ /* brw mode can only be used at client */
+ LASSERT(ed->ed_next != NULL);
+ rc = cl_echo_object_brw(eco, rw, offset, pages, npages, async);
+
+ out:
+ if (rc != 0 || rw != OBD_BRW_READ)
+ verify = 0;
+
+ for (i = 0, pgp = pga; i < npages; i++, pgp++) {
+ if (pgp->pg == NULL)
+ continue;
+
+ if (verify) {
+ int vrc;
+ vrc = echo_client_page_debug_check(lsm, pgp->pg,
+ ostid_id(&oa->o_oi),
+ pgp->off, pgp->count);
+ if (vrc != 0 && rc == 0)
+ rc = vrc;
+ }
+ OBD_PAGE_FREE(pgp->pg);
+ }
+ OBD_FREE(pga, npages * sizeof(*pga));
+ OBD_FREE(pages, npages * sizeof(*pages));
+ RETURN(rc);
+}
+
+static int echo_client_prep_commit(const struct lu_env *env,
+ struct obd_export *exp, int rw,
+ struct obdo *oa, struct echo_object *eco,
+ obd_off offset, obd_size count,
+ obd_size batch, struct obd_trans_info *oti,
+ int async)
+{
+ struct lov_stripe_md *lsm = eco->eo_lsm;
+ struct obd_ioobj ioo;
+ struct niobuf_local *lnb;
+ struct niobuf_remote *rnb;
+ obd_off off;
+ obd_size npages, tot_pages;
+ int i, ret = 0, brw_flags = 0;
+
+ ENTRY;
+
+ if (count <= 0 || (count & (~CFS_PAGE_MASK)) != 0 ||
+ (lsm != NULL && ostid_id(&lsm->lsm_oi) != ostid_id(&oa->o_oi)))
+ RETURN(-EINVAL);
+
+ npages = batch >> PAGE_CACHE_SHIFT;
+ tot_pages = count >> PAGE_CACHE_SHIFT;
+
+ OBD_ALLOC(lnb, npages * sizeof(struct niobuf_local));
+ OBD_ALLOC(rnb, npages * sizeof(struct niobuf_remote));
+
+ if (lnb == NULL || rnb == NULL)
+ GOTO(out, ret = -ENOMEM);
+
+ if (rw == OBD_BRW_WRITE && async)
+ brw_flags |= OBD_BRW_ASYNC;
+
+ obdo_to_ioobj(oa, &ioo);
+
+ off = offset;
+
+ for(; tot_pages; tot_pages -= npages) {
+ int lpages;
+
+ if (tot_pages < npages)
+ npages = tot_pages;
+
+ for (i = 0; i < npages; i++, off += PAGE_CACHE_SIZE) {
+ rnb[i].offset = off;
+ rnb[i].len = PAGE_CACHE_SIZE;
+ rnb[i].flags = brw_flags;
+ }
+
+ ioo.ioo_bufcnt = npages;
+ oti->oti_transno = 0;
+
+ lpages = npages;
+ ret = obd_preprw(env, rw, exp, oa, 1, &ioo, rnb, &lpages,
+ lnb, oti, NULL);
+ if (ret != 0)
+ GOTO(out, ret);
+ LASSERT(lpages == npages);
+
+ for (i = 0; i < lpages; i++) {
+ struct page *page = lnb[i].page;
+
+ /* read past eof? */
+ if (page == NULL && lnb[i].rc == 0)
+ continue;
+
+ if (async)
+ lnb[i].flags |= OBD_BRW_ASYNC;
+
+ if (ostid_id(&oa->o_oi) == ECHO_PERSISTENT_OBJID ||
+ (oa->o_valid & OBD_MD_FLFLAGS) == 0 ||
+ (oa->o_flags & OBD_FL_DEBUG_CHECK) == 0)
+ continue;
+
+ if (rw == OBD_BRW_WRITE)
+ echo_client_page_debug_setup(lsm, page, rw,
+ ostid_id(&oa->o_oi),
+ rnb[i].offset,
+ rnb[i].len);
+ else
+ echo_client_page_debug_check(lsm, page,
+ ostid_id(&oa->o_oi),
+ rnb[i].offset,
+ rnb[i].len);
+ }
+
+ ret = obd_commitrw(env, rw, exp, oa, 1, &ioo,
+ rnb, npages, lnb, oti, ret);
+ if (ret != 0)
+ GOTO(out, ret);
+
+ /* Reset oti otherwise it would confuse ldiskfs. */
+ memset(oti, 0, sizeof(*oti));
+
+ /* Reuse env context. */
+ lu_context_exit((struct lu_context *)&env->le_ctx);
+ lu_context_enter((struct lu_context *)&env->le_ctx);
+ }
+
+out:
+ if (lnb)
+ OBD_FREE(lnb, npages * sizeof(struct niobuf_local));
+ if (rnb)
+ OBD_FREE(rnb, npages * sizeof(struct niobuf_remote));
+ RETURN(ret);
+}
+
+static int echo_client_brw_ioctl(const struct lu_env *env, int rw,
+ struct obd_export *exp,
+ struct obd_ioctl_data *data,
+ struct obd_trans_info *dummy_oti)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct echo_device *ed = obd2echo_dev(obd);
+ struct echo_client_obd *ec = ed->ed_ec;
+ struct obdo *oa = &data->ioc_obdo1;
+ struct echo_object *eco;
+ int rc;
+ int async = 1;
+ long test_mode;
+ ENTRY;
+
+ LASSERT(oa->o_valid & OBD_MD_FLGROUP);
+
+ rc = echo_get_object(&eco, ed, oa);
+ if (rc)
+ RETURN(rc);
+
+ oa->o_valid &= ~OBD_MD_FLHANDLE;
+
+ /* OFD/obdfilter works only via prep/commit */
+ test_mode = (long)data->ioc_pbuf1;
+ if (test_mode == 1)
+ async = 0;
+
+ if (ed->ed_next == NULL && test_mode != 3) {
+ test_mode = 3;
+ data->ioc_plen1 = data->ioc_count;
+ }
+
+ /* Truncate batch size to maximum */
+ if (data->ioc_plen1 > PTLRPC_MAX_BRW_SIZE)
+ data->ioc_plen1 = PTLRPC_MAX_BRW_SIZE;
+
+ switch (test_mode) {
+ case 1:
+ /* fall through */
+ case 2:
+ rc = echo_client_kbrw(ed, rw, oa,
+ eco, data->ioc_offset,
+ data->ioc_count, async, dummy_oti);
+ break;
+ case 3:
+ rc = echo_client_prep_commit(env, ec->ec_exp, rw, oa,
+ eco, data->ioc_offset,
+ data->ioc_count, data->ioc_plen1,
+ dummy_oti, async);
+ break;
+ default:
+ rc = -EINVAL;
+ }
+ echo_put_object(eco);
+ RETURN(rc);
+}
+
+static int
+echo_client_enqueue(struct obd_export *exp, struct obdo *oa,
+ int mode, obd_off offset, obd_size nob)
+{
+ struct echo_device *ed = obd2echo_dev(exp->exp_obd);
+ struct lustre_handle *ulh = &oa->o_handle;
+ struct echo_object *eco;
+ obd_off end;
+ int rc;
+ ENTRY;
+
+ if (ed->ed_next == NULL)
+ RETURN(-EOPNOTSUPP);
+
+ if (!(mode == LCK_PR || mode == LCK_PW))
+ RETURN(-EINVAL);
+
+ if ((offset & (~CFS_PAGE_MASK)) != 0 ||
+ (nob & (~CFS_PAGE_MASK)) != 0)
+ RETURN(-EINVAL);
+
+ rc = echo_get_object (&eco, ed, oa);
+ if (rc != 0)
+ RETURN(rc);
+
+ end = (nob == 0) ? ((obd_off) -1) : (offset + nob - 1);
+ rc = cl_echo_enqueue(eco, offset, end, mode, &ulh->cookie);
+ if (rc == 0) {
+ oa->o_valid |= OBD_MD_FLHANDLE;
+ CDEBUG(D_INFO, "Cookie is "LPX64"\n", ulh->cookie);
+ }
+ echo_put_object(eco);
+ RETURN(rc);
+}
+
+static int
+echo_client_cancel(struct obd_export *exp, struct obdo *oa)
+{
+ struct echo_device *ed = obd2echo_dev(exp->exp_obd);
+ __u64 cookie = oa->o_handle.cookie;
+
+ if ((oa->o_valid & OBD_MD_FLHANDLE) == 0)
+ return -EINVAL;
+
+ CDEBUG(D_INFO, "Cookie is "LPX64"\n", cookie);
+ return cl_echo_cancel(ed, cookie);
+}
+
+static int
+echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
+ void *karg, void *uarg)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct echo_device *ed = obd2echo_dev(obd);
+ struct echo_client_obd *ec = ed->ed_ec;
+ struct echo_object *eco;
+ struct obd_ioctl_data *data = karg;
+ struct obd_trans_info dummy_oti;
+ struct lu_env *env;
+ struct oti_req_ack_lock *ack_lock;
+ struct obdo *oa;
+ struct lu_fid fid;
+ int rw = OBD_BRW_READ;
+ int rc = 0;
+ int i;
+ ENTRY;
+
+ memset(&dummy_oti, 0, sizeof(dummy_oti));
+
+ oa = &data->ioc_obdo1;
+ if (!(oa->o_valid & OBD_MD_FLGROUP)) {
+ oa->o_valid |= OBD_MD_FLGROUP;
+ ostid_set_seq_echo(&oa->o_oi);
+ }
+
+ /* This FID is unpacked just for validation at this point */
+ rc = ostid_to_fid(&fid, &oa->o_oi, 0);
+ if (rc < 0)
+ RETURN(rc);
+
+ OBD_ALLOC_PTR(env);
+ if (env == NULL)
+ RETURN(-ENOMEM);
+
+ rc = lu_env_init(env, LCT_DT_THREAD);
+ if (rc)
+ GOTO(out, rc = -ENOMEM);
+
+ switch (cmd) {
+ case OBD_IOC_CREATE: /* may create echo object */
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ GOTO (out, rc = -EPERM);
+
+ rc = echo_create_object(env, ed, 1, oa, data->ioc_pbuf1,
+ data->ioc_plen1, &dummy_oti);
+ GOTO(out, rc);
+
+ case OBD_IOC_ECHO_MD: {
+ int count;
+ int cmd;
+ char *dir = NULL;
+ int dirlen;
+ __u64 id;
+
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ GOTO(out, rc = -EPERM);
+
+ count = data->ioc_count;
+ cmd = data->ioc_command;
+
+ id = ostid_id(&data->ioc_obdo2.o_oi);
+
+ dirlen = data->ioc_plen1;
+ OBD_ALLOC(dir, dirlen + 1);
+ if (dir == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ if (copy_from_user(dir, data->ioc_pbuf1, dirlen)) {
+ OBD_FREE(dir, data->ioc_plen1 + 1);
+ GOTO(out, rc = -EFAULT);
+ }
+
+ rc = echo_md_handler(ed, cmd, dir, dirlen, id, count, data);
+ OBD_FREE(dir, dirlen + 1);
+ GOTO(out, rc);
+ }
+ case OBD_IOC_ECHO_ALLOC_SEQ: {
+ struct lu_env *cl_env;
+ int refcheck;
+ __u64 seq;
+ int max_count;
+
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ GOTO(out, rc = -EPERM);
+
+ cl_env = cl_env_get(&refcheck);
+ if (IS_ERR(cl_env))
+ GOTO(out, rc = PTR_ERR(cl_env));
+
+ rc = lu_env_refill_by_tags(cl_env, ECHO_MD_CTX_TAG,
+ ECHO_MD_SES_TAG);
+ if (rc != 0) {
+ cl_env_put(cl_env, &refcheck);
+ GOTO(out, rc);
+ }
+
+ rc = seq_client_get_seq(cl_env, ed->ed_cl_seq, &seq);
+ cl_env_put(cl_env, &refcheck);
+ if (rc < 0) {
+ CERROR("%s: Can not alloc seq: rc = %d\n",
+ obd->obd_name, rc);
+ GOTO(out, rc);
+ }
+
+ if (copy_to_user(data->ioc_pbuf1, &seq, data->ioc_plen1))
+ return -EFAULT;
+
+ max_count = LUSTRE_METADATA_SEQ_MAX_WIDTH;
+ if (copy_to_user(data->ioc_pbuf2, &max_count,
+ data->ioc_plen2))
+ return -EFAULT;
+ GOTO(out, rc);
+ }
+ case OBD_IOC_DESTROY:
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ GOTO (out, rc = -EPERM);
+
+ rc = echo_get_object(&eco, ed, oa);
+ if (rc == 0) {
+ rc = obd_destroy(env, ec->ec_exp, oa, eco->eo_lsm,
+ &dummy_oti, NULL, NULL);
+ if (rc == 0)
+ eco->eo_deleted = 1;
+ echo_put_object(eco);
+ }
+ GOTO(out, rc);
+
+ case OBD_IOC_GETATTR:
+ rc = echo_get_object(&eco, ed, oa);
+ if (rc == 0) {
+ struct obd_info oinfo = { { { 0 } } };
+ oinfo.oi_md = eco->eo_lsm;
+ oinfo.oi_oa = oa;
+ rc = obd_getattr(env, ec->ec_exp, &oinfo);
+ echo_put_object(eco);
+ }
+ GOTO(out, rc);
+
+ case OBD_IOC_SETATTR:
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ GOTO (out, rc = -EPERM);
+
+ rc = echo_get_object(&eco, ed, oa);
+ if (rc == 0) {
+ struct obd_info oinfo = { { { 0 } } };
+ oinfo.oi_oa = oa;
+ oinfo.oi_md = eco->eo_lsm;
+
+ rc = obd_setattr(env, ec->ec_exp, &oinfo, NULL);
+ echo_put_object(eco);
+ }
+ GOTO(out, rc);
+
+ case OBD_IOC_BRW_WRITE:
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ GOTO (out, rc = -EPERM);
+
+ rw = OBD_BRW_WRITE;
+ /* fall through */
+ case OBD_IOC_BRW_READ:
+ rc = echo_client_brw_ioctl(env, rw, exp, data, &dummy_oti);
+ GOTO(out, rc);
+
+ case ECHO_IOC_GET_STRIPE:
+ rc = echo_get_object(&eco, ed, oa);
+ if (rc == 0) {
+ rc = echo_copyout_lsm(eco->eo_lsm, data->ioc_pbuf1,
+ data->ioc_plen1);
+ echo_put_object(eco);
+ }
+ GOTO(out, rc);
+
+ case ECHO_IOC_SET_STRIPE:
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ GOTO (out, rc = -EPERM);
+
+ if (data->ioc_pbuf1 == NULL) { /* unset */
+ rc = echo_get_object(&eco, ed, oa);
+ if (rc == 0) {
+ eco->eo_deleted = 1;
+ echo_put_object(eco);
+ }
+ } else {
+ rc = echo_create_object(env, ed, 0, oa,
+ data->ioc_pbuf1,
+ data->ioc_plen1, &dummy_oti);
+ }
+ GOTO (out, rc);
+
+ case ECHO_IOC_ENQUEUE:
+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+ GOTO (out, rc = -EPERM);
+
+ rc = echo_client_enqueue(exp, oa,
+ data->ioc_conn1, /* lock mode */
+ data->ioc_offset,
+ data->ioc_count);/*extent*/
+ GOTO (out, rc);
+
+ case ECHO_IOC_CANCEL:
+ rc = echo_client_cancel(exp, oa);
+ GOTO (out, rc);
+
+ default:
+ CERROR ("echo_ioctl(): unrecognised ioctl %#x\n", cmd);
+ GOTO (out, rc = -ENOTTY);
+ }
+
+ EXIT;
+out:
+ lu_env_fini(env);
+ OBD_FREE_PTR(env);
+
+ /* XXX this should be in a helper also called by target_send_reply */
+ for (ack_lock = dummy_oti.oti_ack_locks, i = 0; i < 4;
+ i++, ack_lock++) {
+ if (!ack_lock->mode)
+ break;
+ ldlm_lock_decref(&ack_lock->lock, ack_lock->mode);
+ }
+
+ return rc;
+}
+
+static int echo_client_setup(const struct lu_env *env,
+ struct obd_device *obddev, struct lustre_cfg *lcfg)
+{
+ struct echo_client_obd *ec = &obddev->u.echo_client;
+ struct obd_device *tgt;
+ struct obd_uuid echo_uuid = { "ECHO_UUID" };
+ struct obd_connect_data *ocd = NULL;
+ int rc;
+ ENTRY;
+
+ if (lcfg->lcfg_bufcount < 2 || LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
+ CERROR("requires a TARGET OBD name\n");
+ RETURN(-EINVAL);
+ }
+
+ tgt = class_name2obd(lustre_cfg_string(lcfg, 1));
+ if (!tgt || !tgt->obd_attached || !tgt->obd_set_up) {
+ CERROR("device not attached or not set up (%s)\n",
+ lustre_cfg_string(lcfg, 1));
+ RETURN(-EINVAL);
+ }
+
+ spin_lock_init(&ec->ec_lock);
+ INIT_LIST_HEAD (&ec->ec_objects);
+ INIT_LIST_HEAD (&ec->ec_locks);
+ ec->ec_unique = 0;
+ ec->ec_nstripes = 0;
+
+ if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) {
+ lu_context_tags_update(ECHO_MD_CTX_TAG);
+ lu_session_tags_update(ECHO_MD_SES_TAG);
+ RETURN(0);
+ }
+
+ OBD_ALLOC(ocd, sizeof(*ocd));
+ if (ocd == NULL) {
+ CERROR("Can't alloc ocd connecting to %s\n",
+ lustre_cfg_string(lcfg, 1));
+ return -ENOMEM;
+ }
+
+ ocd->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL |
+ OBD_CONNECT_BRW_SIZE |
+ OBD_CONNECT_GRANT | OBD_CONNECT_FULL20 |
+ OBD_CONNECT_64BITHASH | OBD_CONNECT_LVB_TYPE |
+ OBD_CONNECT_FID;
+ ocd->ocd_brw_size = DT_MAX_BRW_SIZE;
+ ocd->ocd_version = LUSTRE_VERSION_CODE;
+ ocd->ocd_group = FID_SEQ_ECHO;
+
+ rc = obd_connect(env, &ec->ec_exp, tgt, &echo_uuid, ocd, NULL);
+ if (rc == 0) {
+ /* Turn off pinger because it connects to tgt obd directly. */
+ spin_lock(&tgt->obd_dev_lock);
+ list_del_init(&ec->ec_exp->exp_obd_chain_timed);
+ spin_unlock(&tgt->obd_dev_lock);
+ }
+
+ OBD_FREE(ocd, sizeof(*ocd));
+
+ if (rc != 0) {
+ CERROR("fail to connect to device %s\n",
+ lustre_cfg_string(lcfg, 1));
+ return (rc);
+ }
+
+ RETURN(rc);
+}
+
+static int echo_client_cleanup(struct obd_device *obddev)
+{
+ struct echo_device *ed = obd2echo_dev(obddev);
+ struct echo_client_obd *ec = &obddev->u.echo_client;
+ int rc;
+ ENTRY;
+
+ /*Do nothing for Metadata echo client*/
+ if (ed == NULL )
+ RETURN(0);
+
+ if (ed->ed_next_ismd) {
+ lu_context_tags_clear(ECHO_MD_CTX_TAG);
+ lu_session_tags_clear(ECHO_MD_SES_TAG);
+ RETURN(0);
+ }
+
+ if (!list_empty(&obddev->obd_exports)) {
+ CERROR("still has clients!\n");
+ RETURN(-EBUSY);
+ }
+
+ LASSERT(atomic_read(&ec->ec_exp->exp_refcount) > 0);
+ rc = obd_disconnect(ec->ec_exp);
+ if (rc != 0)
+ CERROR("fail to disconnect device: %d\n", rc);
+
+ RETURN(rc);
+}
+
+static int echo_client_connect(const struct lu_env *env,
+ struct obd_export **exp,
+ struct obd_device *src, struct obd_uuid *cluuid,
+ struct obd_connect_data *data, void *localdata)
+{
+ int rc;
+ struct lustre_handle conn = { 0 };
+
+ ENTRY;
+ rc = class_connect(&conn, src, cluuid);
+ if (rc == 0) {
+ *exp = class_conn2export(&conn);
+ }
+
+ RETURN (rc);
+}
+
+static int echo_client_disconnect(struct obd_export *exp)
+{
+#if 0
+ struct obd_device *obd;
+ struct echo_client_obd *ec;
+ struct ec_lock *ecl;
+#endif
+ int rc;
+ ENTRY;
+
+ if (exp == NULL)
+ GOTO(out, rc = -EINVAL);
+
+#if 0
+ obd = exp->exp_obd;
+ ec = &obd->u.echo_client;
+
+ /* no more contention on export's lock list */
+ while (!list_empty (&exp->exp_ec_data.eced_locks)) {
+ ecl = list_entry (exp->exp_ec_data.eced_locks.next,
+ struct ec_lock, ecl_exp_chain);
+ list_del (&ecl->ecl_exp_chain);
+
+ rc = obd_cancel(ec->ec_exp, ecl->ecl_object->eco_lsm,
+ ecl->ecl_mode, &ecl->ecl_lock_handle);
+
+ CDEBUG (D_INFO, "Cancel lock on object "LPX64" on disconnect "
+ "(%d)\n", ecl->ecl_object->eco_id, rc);
+
+ echo_put_object (ecl->ecl_object);
+ OBD_FREE (ecl, sizeof (*ecl));
+ }
+#endif
+
+ rc = class_disconnect(exp);
+ GOTO(out, rc);
+ out:
+ return rc;
+}
+
+static struct obd_ops echo_client_obd_ops = {
+ .o_owner = THIS_MODULE,
+
+#if 0
+ .o_setup = echo_client_setup,
+ .o_cleanup = echo_client_cleanup,
+#endif
+
+ .o_iocontrol = echo_client_iocontrol,
+ .o_connect = echo_client_connect,
+ .o_disconnect = echo_client_disconnect
+};
+
+int echo_client_init(void)
+{
+ struct lprocfs_static_vars lvars = { 0 };
+ int rc;
+
+ lprocfs_echo_init_vars(&lvars);
+
+ rc = lu_kmem_init(echo_caches);
+ if (rc == 0) {
+ rc = class_register_type(&echo_client_obd_ops, NULL,
+ lvars.module_vars,
+ LUSTRE_ECHO_CLIENT_NAME,
+ &echo_device_type);
+ if (rc)
+ lu_kmem_fini(echo_caches);
+ }
+ return rc;
+}
+
+void echo_client_exit(void)
+{
+ class_unregister_type(LUSTRE_ECHO_CLIENT_NAME);
+ lu_kmem_fini(echo_caches);
+}
+
+static int __init obdecho_init(void)
+{
+ struct lprocfs_static_vars lvars;
+ int rc;
+
+ ENTRY;
+ LCONSOLE_INFO("Echo OBD driver; http://www.lustre.org/\n");
+
+ LASSERT(PAGE_CACHE_SIZE % OBD_ECHO_BLOCK_SIZE == 0);
+
+ lprocfs_echo_init_vars(&lvars);
+
+
+ rc = echo_client_init();
+
+ RETURN(rc);
+}
+
+static void /*__exit*/ obdecho_exit(void)
+{
+ echo_client_exit();
+
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Testing Echo OBD driver");
+MODULE_LICENSE("GPL");
+
+cfs_module(obdecho, LUSTRE_VERSION_STRING, obdecho_init, obdecho_exit);
+
+/** @} echo_client */
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_internal.h b/drivers/staging/lustre/lustre/obdecho/echo_internal.h
new file mode 100644
index 000000000000..8e9dbc2351e7
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdecho/echo_internal.h
@@ -0,0 +1,47 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Whamcloud, Inc.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdecho/echo_internal.h
+ */
+
+#ifndef _ECHO_INTERNAL_H
+#define _ECHO_INTERNAL_H
+
+/* The persistent object (i.e. actually stores stuff!) */
+#define ECHO_PERSISTENT_OBJID 1ULL
+#define ECHO_PERSISTENT_SIZE ((__u64)(1<<20))
+
+/* block size to use for data verification */
+#define OBD_ECHO_BLOCK_SIZE (4<<10)
+
+
+#endif
diff --git a/drivers/staging/lustre/lustre/obdecho/lproc_echo.c b/drivers/staging/lustre/lustre/obdecho/lproc_echo.c
new file mode 100644
index 000000000000..b9abac1c4dca
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdecho/lproc_echo.c
@@ -0,0 +1,57 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#define DEBUG_SUBSYSTEM S_ECHO
+
+#include <lprocfs_status.h>
+#include <obd_class.h>
+
+#ifdef LPROCFS
+LPROC_SEQ_FOPS_RO_TYPE(echo, uuid);
+static struct lprocfs_vars lprocfs_echo_obd_vars[] = {
+ { "uuid", &echo_uuid_fops, 0, 0 },
+ { 0 }
+};
+
+LPROC_SEQ_FOPS_RO_TYPE(echo, numrefs);
+static struct lprocfs_vars lprocfs_echo_module_vars[] = {
+ { "num_refs", &echo_numrefs_fops, 0, 0 },
+ { 0 }
+};
+
+void lprocfs_echo_init_vars(struct lprocfs_static_vars *lvars)
+{
+ lvars->module_vars = lprocfs_echo_module_vars;
+ lvars->obd_vars = lprocfs_echo_obd_vars;
+}
+#endif /* LPROCFS */
diff --git a/drivers/staging/lustre/lustre/osc/Makefile b/drivers/staging/lustre/lustre/osc/Makefile
new file mode 100644
index 000000000000..bbd2f7707e9f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/osc/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_LUSTRE_FS) += osc.o
+osc-y := osc_request.o lproc_osc.o osc_dev.o osc_object.o \
+ osc_page.o osc_lock.o osc_io.o osc_quota.o osc_cache.o
+
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c
new file mode 100644
index 000000000000..198cf3ba1374
--- /dev/null
+++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c
@@ -0,0 +1,728 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/version.h>
+#include <asm/statfs.h>
+#include <obd_cksum.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#include <linux/seq_file.h>
+#include "osc_internal.h"
+
+#ifdef LPROCFS
+static int osc_active_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = m->private;
+ int rc;
+
+ LPROCFS_CLIMP_CHECK(dev);
+ rc = seq_printf(m, "%d\n", !dev->u.cli.cl_import->imp_deactive);
+ LPROCFS_CLIMP_EXIT(dev);
+ return rc;
+}
+
+static ssize_t osc_active_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
+ int val, rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+ if (val < 0 || val > 1)
+ return -ERANGE;
+
+ /* opposite senses */
+ if (dev->u.cli.cl_import->imp_deactive == val)
+ rc = ptlrpc_set_import_active(dev->u.cli.cl_import, val);
+ else
+ CDEBUG(D_CONFIG, "activate %d: ignoring repeat request\n", val);
+
+ return count;
+}
+LPROC_SEQ_FOPS(osc_active);
+
+static int osc_max_rpcs_in_flight_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = m->private;
+ struct client_obd *cli = &dev->u.cli;
+ int rc;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ rc = seq_printf(m, "%u\n", cli->cl_max_rpcs_in_flight);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ return rc;
+}
+
+static ssize_t osc_max_rpcs_in_flight_seq_write(struct file *file,
+ const char *buffer, size_t count, loff_t *off)
+{
+ struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
+ struct client_obd *cli = &dev->u.cli;
+ struct ptlrpc_request_pool *pool = cli->cl_import->imp_rq_pool;
+ int val, rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ if (val < 1 || val > OSC_MAX_RIF_MAX)
+ return -ERANGE;
+
+ LPROCFS_CLIMP_CHECK(dev);
+ if (pool && val > cli->cl_max_rpcs_in_flight)
+ pool->prp_populate(pool, val-cli->cl_max_rpcs_in_flight);
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ cli->cl_max_rpcs_in_flight = val;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ LPROCFS_CLIMP_EXIT(dev);
+ return count;
+}
+LPROC_SEQ_FOPS(osc_max_rpcs_in_flight);
+
+static int osc_max_dirty_mb_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = m->private;
+ struct client_obd *cli = &dev->u.cli;
+ long val;
+ int mult;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ val = cli->cl_dirty_max;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ mult = 1 << 20;
+ return lprocfs_seq_read_frac_helper(m, val, mult);
+}
+
+static ssize_t osc_max_dirty_mb_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
+ struct client_obd *cli = &dev->u.cli;
+ int pages_number, mult, rc;
+
+ mult = 1 << (20 - PAGE_CACHE_SHIFT);
+ rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
+ if (rc)
+ return rc;
+
+ if (pages_number <= 0 ||
+ pages_number > OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_CACHE_SHIFT) ||
+ pages_number > num_physpages / 4) /* 1/4 of RAM */
+ return -ERANGE;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ cli->cl_dirty_max = (obd_count)(pages_number << PAGE_CACHE_SHIFT);
+ osc_wake_cache_waiters(cli);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(osc_max_dirty_mb);
+
+static int osc_cached_mb_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = m->private;
+ struct client_obd *cli = &dev->u.cli;
+ int shift = 20 - PAGE_CACHE_SHIFT;
+ int rc;
+
+ rc = seq_printf(m,
+ "used_mb: %d\n"
+ "busy_cnt: %d\n",
+ (atomic_read(&cli->cl_lru_in_list) +
+ atomic_read(&cli->cl_lru_busy)) >> shift,
+ atomic_read(&cli->cl_lru_busy));
+
+ return rc;
+}
+
+/* shrink the number of caching pages to a specific number */
+static ssize_t osc_cached_mb_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
+ struct client_obd *cli = &dev->u.cli;
+ int pages_number, mult, rc;
+
+ mult = 1 << (20 - PAGE_CACHE_SHIFT);
+ buffer = lprocfs_find_named_value(buffer, "used_mb:", &count);
+ rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
+ if (rc)
+ return rc;
+
+ if (pages_number < 0)
+ return -ERANGE;
+
+ rc = atomic_read(&cli->cl_lru_in_list) - pages_number;
+ if (rc > 0)
+ (void)osc_lru_shrink(cli, rc);
+
+ return count;
+}
+LPROC_SEQ_FOPS(osc_cached_mb);
+
+static int osc_cur_dirty_bytes_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = m->private;
+ struct client_obd *cli = &dev->u.cli;
+ int rc;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ rc = seq_printf(m, "%lu\n", cli->cl_dirty);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ return rc;
+}
+LPROC_SEQ_FOPS_RO(osc_cur_dirty_bytes);
+
+static int osc_cur_grant_bytes_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = m->private;
+ struct client_obd *cli = &dev->u.cli;
+ int rc;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ rc = seq_printf(m, "%lu\n", cli->cl_avail_grant);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ return rc;
+}
+
+static ssize_t osc_cur_grant_bytes_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ struct client_obd *cli = &obd->u.cli;
+ int rc;
+ __u64 val;
+
+ if (obd == NULL)
+ return 0;
+
+ rc = lprocfs_write_u64_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ /* this is only for shrinking grant */
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ if (val >= cli->cl_avail_grant) {
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ return 0;
+ }
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ LPROCFS_CLIMP_CHECK(obd);
+ if (cli->cl_import->imp_state == LUSTRE_IMP_FULL)
+ rc = osc_shrink_grant_to_target(cli, val);
+ LPROCFS_CLIMP_EXIT(obd);
+ if (rc)
+ return rc;
+ return count;
+}
+LPROC_SEQ_FOPS(osc_cur_grant_bytes);
+
+static int osc_cur_lost_grant_bytes_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = m->private;
+ struct client_obd *cli = &dev->u.cli;
+ int rc;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ rc = seq_printf(m, "%lu\n", cli->cl_lost_grant);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ return rc;
+}
+LPROC_SEQ_FOPS_RO(osc_cur_lost_grant_bytes);
+
+static int osc_grant_shrink_interval_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *obd = m->private;
+
+ if (obd == NULL)
+ return 0;
+ return seq_printf(m, "%d\n",
+ obd->u.cli.cl_grant_shrink_interval);
+}
+
+static ssize_t osc_grant_shrink_interval_seq_write(struct file *file,
+ const char *buffer, size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ int val, rc;
+
+ if (obd == NULL)
+ return 0;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ if (val <= 0)
+ return -ERANGE;
+
+ obd->u.cli.cl_grant_shrink_interval = val;
+
+ return count;
+}
+LPROC_SEQ_FOPS(osc_grant_shrink_interval);
+
+static int osc_checksum_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *obd = m->private;
+
+ if (obd == NULL)
+ return 0;
+
+ return seq_printf(m, "%d\n",
+ obd->u.cli.cl_checksum ? 1 : 0);
+}
+
+static ssize_t osc_checksum_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ int val, rc;
+
+ if (obd == NULL)
+ return 0;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ obd->u.cli.cl_checksum = (val ? 1 : 0);
+
+ return count;
+}
+LPROC_SEQ_FOPS(osc_checksum);
+
+static int osc_checksum_type_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *obd = m->private;
+ int i;
+ DECLARE_CKSUM_NAME;
+
+ if (obd == NULL)
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(cksum_name); i++) {
+ if (((1 << i) & obd->u.cli.cl_supp_cksum_types) == 0)
+ continue;
+ if (obd->u.cli.cl_cksum_type == (1 << i))
+ seq_printf(m, "[%s] ", cksum_name[i]);
+ else
+ seq_printf(m, "%s ", cksum_name[i]);
+ }
+ seq_printf(m, "\n");
+ return 0;
+}
+
+static ssize_t osc_checksum_type_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ int i;
+ DECLARE_CKSUM_NAME;
+ char kernbuf[10];
+
+ if (obd == NULL)
+ return 0;
+
+ if (count > sizeof(kernbuf) - 1)
+ return -EINVAL;
+ if (copy_from_user(kernbuf, buffer, count))
+ return -EFAULT;
+ if (count > 0 && kernbuf[count - 1] == '\n')
+ kernbuf[count - 1] = '\0';
+ else
+ kernbuf[count] = '\0';
+
+ for (i = 0; i < ARRAY_SIZE(cksum_name); i++) {
+ if (((1 << i) & obd->u.cli.cl_supp_cksum_types) == 0)
+ continue;
+ if (!strcmp(kernbuf, cksum_name[i])) {
+ obd->u.cli.cl_cksum_type = 1 << i;
+ return count;
+ }
+ }
+ return -EINVAL;
+}
+LPROC_SEQ_FOPS(osc_checksum_type);
+
+static int osc_resend_count_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *obd = m->private;
+
+ return seq_printf(m, "%u\n", atomic_read(&obd->u.cli.cl_resends));
+}
+
+static ssize_t osc_resend_count_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ int val, rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ if (val < 0)
+ return -EINVAL;
+
+ atomic_set(&obd->u.cli.cl_resends, val);
+
+ return count;
+}
+LPROC_SEQ_FOPS(osc_resend_count);
+
+static int osc_contention_seconds_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *obd = m->private;
+ struct osc_device *od = obd2osc_dev(obd);
+
+ return seq_printf(m, "%u\n", od->od_contention_time);
+}
+
+static ssize_t osc_contention_seconds_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ struct osc_device *od = obd2osc_dev(obd);
+
+ return lprocfs_write_helper(buffer, count, &od->od_contention_time) ?:
+ count;
+}
+LPROC_SEQ_FOPS(osc_contention_seconds);
+
+static int osc_lockless_truncate_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *obd = m->private;
+ struct osc_device *od = obd2osc_dev(obd);
+
+ return seq_printf(m, "%u\n", od->od_lockless_truncate);
+}
+
+static ssize_t osc_lockless_truncate_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ struct osc_device *od = obd2osc_dev(obd);
+
+ return lprocfs_write_helper(buffer, count, &od->od_lockless_truncate) ?:
+ count;
+}
+LPROC_SEQ_FOPS(osc_lockless_truncate);
+
+static int osc_destroys_in_flight_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *obd = m->private;
+ return seq_printf(m, "%u\n",
+ atomic_read(&obd->u.cli.cl_destroy_in_flight));
+}
+LPROC_SEQ_FOPS_RO(osc_destroys_in_flight);
+
+static int osc_obd_max_pages_per_rpc_seq_show(struct seq_file *m, void *v)
+{
+ return lprocfs_obd_rd_max_pages_per_rpc(m, m->private);
+}
+
+static ssize_t osc_obd_max_pages_per_rpc_seq_write(struct file *file,
+ const char *buffer, size_t count, loff_t *off)
+{
+ struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
+ struct client_obd *cli = &dev->u.cli;
+ struct obd_connect_data *ocd = &cli->cl_import->imp_connect_data;
+ int chunk_mask, rc;
+ __u64 val;
+
+ rc = lprocfs_write_u64_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ /* if the max_pages is specified in bytes, convert to pages */
+ if (val >= ONE_MB_BRW_SIZE)
+ val >>= PAGE_CACHE_SHIFT;
+
+ LPROCFS_CLIMP_CHECK(dev);
+
+ chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_CACHE_SHIFT)) - 1);
+ /* max_pages_per_rpc must be chunk aligned */
+ val = (val + ~chunk_mask) & chunk_mask;
+ if (val == 0 || val > ocd->ocd_brw_size >> PAGE_CACHE_SHIFT) {
+ LPROCFS_CLIMP_EXIT(dev);
+ return -ERANGE;
+ }
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ cli->cl_max_pages_per_rpc = val;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ LPROCFS_CLIMP_EXIT(dev);
+ return count;
+}
+LPROC_SEQ_FOPS(osc_obd_max_pages_per_rpc);
+
+LPROC_SEQ_FOPS_RO_TYPE(osc, uuid);
+LPROC_SEQ_FOPS_RO_TYPE(osc, connect_flags);
+LPROC_SEQ_FOPS_RO_TYPE(osc, blksize);
+LPROC_SEQ_FOPS_RO_TYPE(osc, kbytestotal);
+LPROC_SEQ_FOPS_RO_TYPE(osc, kbytesfree);
+LPROC_SEQ_FOPS_RO_TYPE(osc, kbytesavail);
+LPROC_SEQ_FOPS_RO_TYPE(osc, filestotal);
+LPROC_SEQ_FOPS_RO_TYPE(osc, filesfree);
+LPROC_SEQ_FOPS_RO_TYPE(osc, server_uuid);
+LPROC_SEQ_FOPS_RO_TYPE(osc, conn_uuid);
+LPROC_SEQ_FOPS_RO_TYPE(osc, timeouts);
+LPROC_SEQ_FOPS_RO_TYPE(osc, state);
+
+LPROC_SEQ_FOPS_WR_ONLY(osc, ping);
+
+LPROC_SEQ_FOPS_RW_TYPE(osc, import);
+LPROC_SEQ_FOPS_RW_TYPE(osc, pinger_recov);
+
+static struct lprocfs_vars lprocfs_osc_obd_vars[] = {
+ { "uuid", &osc_uuid_fops, 0, 0 },
+ { "ping", &osc_ping_fops, 0, 0222 },
+ { "connect_flags", &osc_connect_flags_fops, 0, 0 },
+ { "blocksize", &osc_blksize_fops, 0, 0 },
+ { "kbytestotal", &osc_kbytestotal_fops, 0, 0 },
+ { "kbytesfree", &osc_kbytesfree_fops, 0, 0 },
+ { "kbytesavail", &osc_kbytesavail_fops, 0, 0 },
+ { "filestotal", &osc_filestotal_fops, 0, 0 },
+ { "filesfree", &osc_filesfree_fops, 0, 0 },
+ //{ "filegroups", lprocfs_rd_filegroups, 0, 0 },
+ { "ost_server_uuid", &osc_server_uuid_fops, 0, 0 },
+ { "ost_conn_uuid", &osc_conn_uuid_fops, 0, 0 },
+ { "active", &osc_active_fops, 0 },
+ { "max_pages_per_rpc", &osc_obd_max_pages_per_rpc_fops, 0 },
+ { "max_rpcs_in_flight", &osc_max_rpcs_in_flight_fops, 0 },
+ { "destroys_in_flight", &osc_destroys_in_flight_fops, 0, 0 },
+ { "max_dirty_mb", &osc_max_dirty_mb_fops, 0 },
+ { "osc_cached_mb", &osc_cached_mb_fops, 0 },
+ { "cur_dirty_bytes", &osc_cur_dirty_bytes_fops, 0, 0 },
+ { "cur_grant_bytes", &osc_cur_grant_bytes_fops, 0 },
+ { "cur_lost_grant_bytes", &osc_cur_lost_grant_bytes_fops, 0, 0},
+ { "grant_shrink_interval", &osc_grant_shrink_interval_fops, 0 },
+ { "checksums", &osc_checksum_fops, 0 },
+ { "checksum_type", &osc_checksum_type_fops, 0 },
+ { "resend_count", &osc_resend_count_fops, 0},
+ { "timeouts", &osc_timeouts_fops, 0, 0 },
+ { "contention_seconds", &osc_contention_seconds_fops, 0 },
+ { "lockless_truncate", &osc_lockless_truncate_fops, 0 },
+ { "import", &osc_import_fops, 0 },
+ { "state", &osc_state_fops, 0, 0 },
+ { "pinger_recov", &osc_pinger_recov_fops, 0 },
+ { 0 }
+};
+
+LPROC_SEQ_FOPS_RO_TYPE(osc, numrefs);
+static struct lprocfs_vars lprocfs_osc_module_vars[] = {
+ { "num_refs", &osc_numrefs_fops, 0, 0 },
+ { 0 }
+};
+
+#define pct(a,b) (b ? a * 100 / b : 0)
+
+static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v)
+{
+ struct timeval now;
+ struct obd_device *dev = seq->private;
+ struct client_obd *cli = &dev->u.cli;
+ unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum;
+ int i;
+
+ do_gettimeofday(&now);
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+ seq_printf(seq, "read RPCs in flight: %d\n",
+ cli->cl_r_in_flight);
+ seq_printf(seq, "write RPCs in flight: %d\n",
+ cli->cl_w_in_flight);
+ seq_printf(seq, "pending write pages: %d\n",
+ atomic_read(&cli->cl_pending_w_pages));
+ seq_printf(seq, "pending read pages: %d\n",
+ atomic_read(&cli->cl_pending_r_pages));
+
+ seq_printf(seq, "\n\t\t\tread\t\t\twrite\n");
+ seq_printf(seq, "pages per rpc rpcs %% cum %% |");
+ seq_printf(seq, " rpcs %% cum %%\n");
+
+ read_tot = lprocfs_oh_sum(&cli->cl_read_page_hist);
+ write_tot = lprocfs_oh_sum(&cli->cl_write_page_hist);
+
+ read_cum = 0;
+ write_cum = 0;
+ for (i = 0; i < OBD_HIST_MAX; i++) {
+ unsigned long r = cli->cl_read_page_hist.oh_buckets[i];
+ unsigned long w = cli->cl_write_page_hist.oh_buckets[i];
+ read_cum += r;
+ write_cum += w;
+ seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n",
+ 1 << i, r, pct(r, read_tot),
+ pct(read_cum, read_tot), w,
+ pct(w, write_tot),
+ pct(write_cum, write_tot));
+ if (read_cum == read_tot && write_cum == write_tot)
+ break;
+ }
+
+ seq_printf(seq, "\n\t\t\tread\t\t\twrite\n");
+ seq_printf(seq, "rpcs in flight rpcs %% cum %% |");
+ seq_printf(seq, " rpcs %% cum %%\n");
+
+ read_tot = lprocfs_oh_sum(&cli->cl_read_rpc_hist);
+ write_tot = lprocfs_oh_sum(&cli->cl_write_rpc_hist);
+
+ read_cum = 0;
+ write_cum = 0;
+ for (i = 0; i < OBD_HIST_MAX; i++) {
+ unsigned long r = cli->cl_read_rpc_hist.oh_buckets[i];
+ unsigned long w = cli->cl_write_rpc_hist.oh_buckets[i];
+ read_cum += r;
+ write_cum += w;
+ seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n",
+ i, r, pct(r, read_tot),
+ pct(read_cum, read_tot), w,
+ pct(w, write_tot),
+ pct(write_cum, write_tot));
+ if (read_cum == read_tot && write_cum == write_tot)
+ break;
+ }
+
+ seq_printf(seq, "\n\t\t\tread\t\t\twrite\n");
+ seq_printf(seq, "offset rpcs %% cum %% |");
+ seq_printf(seq, " rpcs %% cum %%\n");
+
+ read_tot = lprocfs_oh_sum(&cli->cl_read_offset_hist);
+ write_tot = lprocfs_oh_sum(&cli->cl_write_offset_hist);
+
+ read_cum = 0;
+ write_cum = 0;
+ for (i = 0; i < OBD_HIST_MAX; i++) {
+ unsigned long r = cli->cl_read_offset_hist.oh_buckets[i];
+ unsigned long w = cli->cl_write_offset_hist.oh_buckets[i];
+ read_cum += r;
+ write_cum += w;
+ seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n",
+ (i == 0) ? 0 : 1 << (i - 1),
+ r, pct(r, read_tot), pct(read_cum, read_tot),
+ w, pct(w, write_tot), pct(write_cum, write_tot));
+ if (read_cum == read_tot && write_cum == write_tot)
+ break;
+ }
+
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ return 0;
+}
+#undef pct
+
+static ssize_t osc_rpc_stats_seq_write(struct file *file, const char *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct obd_device *dev = seq->private;
+ struct client_obd *cli = &dev->u.cli;
+
+ lprocfs_oh_clear(&cli->cl_read_rpc_hist);
+ lprocfs_oh_clear(&cli->cl_write_rpc_hist);
+ lprocfs_oh_clear(&cli->cl_read_page_hist);
+ lprocfs_oh_clear(&cli->cl_write_page_hist);
+ lprocfs_oh_clear(&cli->cl_read_offset_hist);
+ lprocfs_oh_clear(&cli->cl_write_offset_hist);
+
+ return len;
+}
+
+LPROC_SEQ_FOPS(osc_rpc_stats);
+
+static int osc_stats_seq_show(struct seq_file *seq, void *v)
+{
+ struct timeval now;
+ struct obd_device *dev = seq->private;
+ struct osc_stats *stats = &obd2osc_dev(dev)->od_stats;
+
+ do_gettimeofday(&now);
+
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+ seq_printf(seq, "lockless_write_bytes\t\t"LPU64"\n",
+ stats->os_lockless_writes);
+ seq_printf(seq, "lockless_read_bytes\t\t"LPU64"\n",
+ stats->os_lockless_reads);
+ seq_printf(seq, "lockless_truncate\t\t"LPU64"\n",
+ stats->os_lockless_truncates);
+ return 0;
+}
+
+static ssize_t osc_stats_seq_write(struct file *file, const char *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct obd_device *dev = seq->private;
+ struct osc_stats *stats = &obd2osc_dev(dev)->od_stats;
+
+ memset(stats, 0, sizeof(*stats));
+ return len;
+}
+
+LPROC_SEQ_FOPS(osc_stats);
+
+int lproc_osc_attach_seqstat(struct obd_device *dev)
+{
+ int rc;
+
+ rc = lprocfs_seq_create(dev->obd_proc_entry, "osc_stats", 0644,
+ &osc_stats_fops, dev);
+ if (rc == 0)
+ rc = lprocfs_obd_seq_create(dev, "rpc_stats", 0644,
+ &osc_rpc_stats_fops, dev);
+
+ return rc;
+}
+
+void lprocfs_osc_init_vars(struct lprocfs_static_vars *lvars)
+{
+ lvars->module_vars = lprocfs_osc_module_vars;
+ lvars->obd_vars = lprocfs_osc_obd_vars;
+}
+#endif /* LPROCFS */
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
new file mode 100644
index 000000000000..0a0ec6f7d2dd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -0,0 +1,2916 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ *
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * osc cache management.
+ *
+ * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_OSC
+
+#include "osc_cl_internal.h"
+#include "osc_internal.h"
+
+static int extent_debug; /* set it to be true for more debug */
+
+static void osc_update_pending(struct osc_object *obj, int cmd, int delta);
+static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
+ int state);
+static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
+ struct osc_async_page *oap, int sent, int rc);
+static int osc_make_ready(const struct lu_env *env, struct osc_async_page *oap,
+ int cmd);
+static int osc_refresh_count(const struct lu_env *env,
+ struct osc_async_page *oap, int cmd);
+static int osc_io_unplug_async(const struct lu_env *env,
+ struct client_obd *cli, struct osc_object *osc);
+static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
+ unsigned int lost_grant);
+
+static void osc_extent_tree_dump0(int level, struct osc_object *obj,
+ const char *func, int line);
+#define osc_extent_tree_dump(lvl, obj) \
+ osc_extent_tree_dump0(lvl, obj, __func__, __LINE__)
+
+/** \addtogroup osc
+ * @{
+ */
+
+/* ------------------ osc extent ------------------ */
+static inline char *ext_flags(struct osc_extent *ext, char *flags)
+{
+ char *buf = flags;
+ *buf++ = ext->oe_rw ? 'r' : 'w';
+ if (ext->oe_intree)
+ *buf++ = 'i';
+ if (ext->oe_srvlock)
+ *buf++ = 's';
+ if (ext->oe_hp)
+ *buf++ = 'h';
+ if (ext->oe_urgent)
+ *buf++ = 'u';
+ if (ext->oe_memalloc)
+ *buf++ = 'm';
+ if (ext->oe_trunc_pending)
+ *buf++ = 't';
+ if (ext->oe_fsync_wait)
+ *buf++ = 'Y';
+ *buf = 0;
+ return flags;
+}
+
+static inline char list_empty_marker(struct list_head *list)
+{
+ return list_empty(list) ? '-' : '+';
+}
+
+#define EXTSTR "[%lu -> %lu/%lu]"
+#define EXTPARA(ext) (ext)->oe_start, (ext)->oe_end, (ext)->oe_max_end
+static const char *oes_strings[] = {
+ "inv", "active", "cache", "locking", "lockdone", "rpc", "trunc", NULL };
+
+#define OSC_EXTENT_DUMP(lvl, extent, fmt, ...) do { \
+ struct osc_extent *__ext = (extent); \
+ char __buf[16]; \
+ \
+ CDEBUG(lvl, \
+ "extent %p@{" EXTSTR ", " \
+ "[%d|%d|%c|%s|%s|%p], [%d|%d|%c|%c|%p|%u|%p]} " fmt, \
+ /* ----- extent part 0 ----- */ \
+ __ext, EXTPARA(__ext), \
+ /* ----- part 1 ----- */ \
+ atomic_read(&__ext->oe_refc), \
+ atomic_read(&__ext->oe_users), \
+ list_empty_marker(&__ext->oe_link), \
+ oes_strings[__ext->oe_state], ext_flags(__ext, __buf), \
+ __ext->oe_obj, \
+ /* ----- part 2 ----- */ \
+ __ext->oe_grants, __ext->oe_nr_pages, \
+ list_empty_marker(&__ext->oe_pages), \
+ waitqueue_active(&__ext->oe_waitq) ? '+' : '-', \
+ __ext->oe_osclock, __ext->oe_mppr, __ext->oe_owner, \
+ /* ----- part 4 ----- */ \
+ ## __VA_ARGS__); \
+} while (0)
+
+#undef EASSERTF
+#define EASSERTF(expr, ext, fmt, args...) do { \
+ if (!(expr)) { \
+ OSC_EXTENT_DUMP(D_ERROR, (ext), fmt, ##args); \
+ osc_extent_tree_dump(D_ERROR, (ext)->oe_obj); \
+ LASSERT(expr); \
+ } \
+} while (0)
+
+#undef EASSERT
+#define EASSERT(expr, ext) EASSERTF(expr, ext, "\n")
+
+static inline struct osc_extent *rb_extent(struct rb_node *n)
+{
+ if (n == NULL)
+ return NULL;
+
+ return container_of(n, struct osc_extent, oe_node);
+}
+
+static inline struct osc_extent *next_extent(struct osc_extent *ext)
+{
+ if (ext == NULL)
+ return NULL;
+
+ LASSERT(ext->oe_intree);
+ return rb_extent(rb_next(&ext->oe_node));
+}
+
+static inline struct osc_extent *prev_extent(struct osc_extent *ext)
+{
+ if (ext == NULL)
+ return NULL;
+
+ LASSERT(ext->oe_intree);
+ return rb_extent(rb_prev(&ext->oe_node));
+}
+
+static inline struct osc_extent *first_extent(struct osc_object *obj)
+{
+ return rb_extent(rb_first(&obj->oo_root));
+}
+
+/* object must be locked by caller. */
+static int osc_extent_sanity_check0(struct osc_extent *ext,
+ const char *func, const int line)
+{
+ struct osc_object *obj = ext->oe_obj;
+ struct osc_async_page *oap;
+ int page_count;
+ int rc = 0;
+
+ if (!osc_object_is_locked(obj))
+ GOTO(out, rc = 9);
+
+ if (ext->oe_state >= OES_STATE_MAX)
+ GOTO(out, rc = 10);
+
+ if (atomic_read(&ext->oe_refc) <= 0)
+ GOTO(out, rc = 20);
+
+ if (atomic_read(&ext->oe_refc) < atomic_read(&ext->oe_users))
+ GOTO(out, rc = 30);
+
+ switch (ext->oe_state) {
+ case OES_INV:
+ if (ext->oe_nr_pages > 0 || !list_empty(&ext->oe_pages))
+ GOTO(out, rc = 35);
+ GOTO(out, rc = 0);
+ break;
+ case OES_ACTIVE:
+ if (atomic_read(&ext->oe_users) == 0)
+ GOTO(out, rc = 40);
+ if (ext->oe_hp)
+ GOTO(out, rc = 50);
+ if (ext->oe_fsync_wait && !ext->oe_urgent)
+ GOTO(out, rc = 55);
+ break;
+ case OES_CACHE:
+ if (ext->oe_grants == 0)
+ GOTO(out, rc = 60);
+ if (ext->oe_fsync_wait && !ext->oe_urgent && !ext->oe_hp)
+ GOTO(out, rc = 65);
+ default:
+ if (atomic_read(&ext->oe_users) > 0)
+ GOTO(out, rc = 70);
+ }
+
+ if (ext->oe_max_end < ext->oe_end || ext->oe_end < ext->oe_start)
+ GOTO(out, rc = 80);
+
+ if (ext->oe_osclock == NULL && ext->oe_grants > 0)
+ GOTO(out, rc = 90);
+
+ if (ext->oe_osclock) {
+ struct cl_lock_descr *descr;
+ descr = &ext->oe_osclock->cll_descr;
+ if (!(descr->cld_start <= ext->oe_start &&
+ descr->cld_end >= ext->oe_max_end))
+ GOTO(out, rc = 100);
+ }
+
+ if (ext->oe_nr_pages > ext->oe_mppr)
+ GOTO(out, rc = 105);
+
+ /* Do not verify page list if extent is in RPC. This is because an
+ * in-RPC extent is supposed to be exclusively accessible w/o lock. */
+ if (ext->oe_state > OES_CACHE)
+ GOTO(out, rc = 0);
+
+ if (!extent_debug)
+ GOTO(out, rc = 0);
+
+ page_count = 0;
+ list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
+ pgoff_t index = oap2cl_page(oap)->cp_index;
+ ++page_count;
+ if (index > ext->oe_end || index < ext->oe_start)
+ GOTO(out, rc = 110);
+ }
+ if (page_count != ext->oe_nr_pages)
+ GOTO(out, rc = 120);
+
+out:
+ if (rc != 0)
+ OSC_EXTENT_DUMP(D_ERROR, ext,
+ "%s:%d sanity check %p failed with rc = %d\n",
+ func, line, ext, rc);
+ return rc;
+}
+
+#define sanity_check_nolock(ext) \
+ osc_extent_sanity_check0(ext, __func__, __LINE__)
+
+#define sanity_check(ext) ({ \
+ int __res; \
+ osc_object_lock((ext)->oe_obj); \
+ __res = sanity_check_nolock(ext); \
+ osc_object_unlock((ext)->oe_obj); \
+ __res; \
+})
+
+
+/**
+ * sanity check - to make sure there is no overlapped extent in the tree.
+ */
+static int osc_extent_is_overlapped(struct osc_object *obj,
+ struct osc_extent *ext)
+{
+ struct osc_extent *tmp;
+
+ LASSERT(osc_object_is_locked(obj));
+
+ if (!extent_debug)
+ return 0;
+
+ for (tmp = first_extent(obj); tmp != NULL; tmp = next_extent(tmp)) {
+ if (tmp == ext)
+ continue;
+ if (tmp->oe_end >= ext->oe_start &&
+ tmp->oe_start <= ext->oe_end)
+ return 1;
+ }
+ return 0;
+}
+
+static void osc_extent_state_set(struct osc_extent *ext, int state)
+{
+ LASSERT(osc_object_is_locked(ext->oe_obj));
+ LASSERT(state >= OES_INV && state < OES_STATE_MAX);
+
+ /* Never try to sanity check a state changing extent :-) */
+ /* LASSERT(sanity_check_nolock(ext) == 0); */
+
+ /* TODO: validate the state machine */
+ ext->oe_state = state;
+ wake_up_all(&ext->oe_waitq);
+}
+
+static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
+{
+ struct osc_extent *ext;
+
+ OBD_SLAB_ALLOC_PTR_GFP(ext, osc_extent_kmem, GFP_IOFS);
+ if (ext == NULL)
+ return NULL;
+
+ RB_CLEAR_NODE(&ext->oe_node);
+ ext->oe_obj = obj;
+ atomic_set(&ext->oe_refc, 1);
+ atomic_set(&ext->oe_users, 0);
+ INIT_LIST_HEAD(&ext->oe_link);
+ ext->oe_state = OES_INV;
+ INIT_LIST_HEAD(&ext->oe_pages);
+ init_waitqueue_head(&ext->oe_waitq);
+ ext->oe_osclock = NULL;
+
+ return ext;
+}
+
+static void osc_extent_free(struct osc_extent *ext)
+{
+ OBD_SLAB_FREE_PTR(ext, osc_extent_kmem);
+}
+
+static struct osc_extent *osc_extent_get(struct osc_extent *ext)
+{
+ LASSERT(atomic_read(&ext->oe_refc) >= 0);
+ atomic_inc(&ext->oe_refc);
+ return ext;
+}
+
+static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext)
+{
+ LASSERT(atomic_read(&ext->oe_refc) > 0);
+ if (atomic_dec_and_test(&ext->oe_refc)) {
+ LASSERT(list_empty(&ext->oe_link));
+ LASSERT(atomic_read(&ext->oe_users) == 0);
+ LASSERT(ext->oe_state == OES_INV);
+ LASSERT(!ext->oe_intree);
+
+ if (ext->oe_osclock) {
+ cl_lock_put(env, ext->oe_osclock);
+ ext->oe_osclock = NULL;
+ }
+ osc_extent_free(ext);
+ }
+}
+
+/**
+ * osc_extent_put_trust() is a special version of osc_extent_put() when
+ * it's known that the caller is not the last user. This is to address the
+ * problem of lacking of lu_env ;-).
+ */
+static void osc_extent_put_trust(struct osc_extent *ext)
+{
+ LASSERT(atomic_read(&ext->oe_refc) > 1);
+ LASSERT(osc_object_is_locked(ext->oe_obj));
+ atomic_dec(&ext->oe_refc);
+}
+
+/**
+ * Return the extent which includes pgoff @index, or return the greatest
+ * previous extent in the tree.
+ */
+static struct osc_extent *osc_extent_search(struct osc_object *obj,
+ pgoff_t index)
+{
+ struct rb_node *n = obj->oo_root.rb_node;
+ struct osc_extent *tmp, *p = NULL;
+
+ LASSERT(osc_object_is_locked(obj));
+ while (n != NULL) {
+ tmp = rb_extent(n);
+ if (index < tmp->oe_start) {
+ n = n->rb_left;
+ } else if (index > tmp->oe_end) {
+ p = rb_extent(n);
+ n = n->rb_right;
+ } else {
+ return tmp;
+ }
+ }
+ return p;
+}
+
+/*
+ * Return the extent covering @index, otherwise return NULL.
+ * caller must have held object lock.
+ */
+static struct osc_extent *osc_extent_lookup(struct osc_object *obj,
+ pgoff_t index)
+{
+ struct osc_extent *ext;
+
+ ext = osc_extent_search(obj, index);
+ if (ext != NULL && ext->oe_start <= index && index <= ext->oe_end)
+ return osc_extent_get(ext);
+ return NULL;
+}
+
+/* caller must have held object lock. */
+static void osc_extent_insert(struct osc_object *obj, struct osc_extent *ext)
+{
+ struct rb_node **n = &obj->oo_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct osc_extent *tmp;
+
+ LASSERT(ext->oe_intree == 0);
+ LASSERT(ext->oe_obj == obj);
+ LASSERT(osc_object_is_locked(obj));
+ while (*n != NULL) {
+ tmp = rb_extent(*n);
+ parent = *n;
+
+ if (ext->oe_end < tmp->oe_start)
+ n = &(*n)->rb_left;
+ else if (ext->oe_start > tmp->oe_end)
+ n = &(*n)->rb_right;
+ else
+ EASSERTF(0, tmp, EXTSTR, EXTPARA(ext));
+ }
+ rb_link_node(&ext->oe_node, parent, n);
+ rb_insert_color(&ext->oe_node, &obj->oo_root);
+ osc_extent_get(ext);
+ ext->oe_intree = 1;
+}
+
+/* caller must have held object lock. */
+static void osc_extent_erase(struct osc_extent *ext)
+{
+ struct osc_object *obj = ext->oe_obj;
+ LASSERT(osc_object_is_locked(obj));
+ if (ext->oe_intree) {
+ rb_erase(&ext->oe_node, &obj->oo_root);
+ ext->oe_intree = 0;
+ /* rbtree held a refcount */
+ osc_extent_put_trust(ext);
+ }
+}
+
+static struct osc_extent *osc_extent_hold(struct osc_extent *ext)
+{
+ struct osc_object *obj = ext->oe_obj;
+
+ LASSERT(osc_object_is_locked(obj));
+ LASSERT(ext->oe_state == OES_ACTIVE || ext->oe_state == OES_CACHE);
+ if (ext->oe_state == OES_CACHE) {
+ osc_extent_state_set(ext, OES_ACTIVE);
+ osc_update_pending(obj, OBD_BRW_WRITE, -ext->oe_nr_pages);
+ }
+ atomic_inc(&ext->oe_users);
+ list_del_init(&ext->oe_link);
+ return osc_extent_get(ext);
+}
+
+static void __osc_extent_remove(struct osc_extent *ext)
+{
+ LASSERT(osc_object_is_locked(ext->oe_obj));
+ LASSERT(list_empty(&ext->oe_pages));
+ osc_extent_erase(ext);
+ list_del_init(&ext->oe_link);
+ osc_extent_state_set(ext, OES_INV);
+ OSC_EXTENT_DUMP(D_CACHE, ext, "destroyed.\n");
+}
+
+static void osc_extent_remove(struct osc_extent *ext)
+{
+ struct osc_object *obj = ext->oe_obj;
+
+ osc_object_lock(obj);
+ __osc_extent_remove(ext);
+ osc_object_unlock(obj);
+}
+
+/**
+ * This function is used to merge extents to get better performance. It checks
+ * if @cur and @victim are contiguous at chunk level.
+ */
+static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur,
+ struct osc_extent *victim)
+{
+ struct osc_object *obj = cur->oe_obj;
+ pgoff_t chunk_start;
+ pgoff_t chunk_end;
+ int ppc_bits;
+
+ LASSERT(cur->oe_state == OES_CACHE);
+ LASSERT(osc_object_is_locked(obj));
+ if (victim == NULL)
+ return -EINVAL;
+
+ if (victim->oe_state != OES_CACHE || victim->oe_fsync_wait)
+ return -EBUSY;
+
+ if (cur->oe_max_end != victim->oe_max_end)
+ return -ERANGE;
+
+ LASSERT(cur->oe_osclock == victim->oe_osclock);
+ ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_CACHE_SHIFT;
+ chunk_start = cur->oe_start >> ppc_bits;
+ chunk_end = cur->oe_end >> ppc_bits;
+ if (chunk_start != (victim->oe_end >> ppc_bits) + 1 &&
+ chunk_end + 1 != victim->oe_start >> ppc_bits)
+ return -ERANGE;
+
+ OSC_EXTENT_DUMP(D_CACHE, victim, "will be merged by %p.\n", cur);
+
+ cur->oe_start = min(cur->oe_start, victim->oe_start);
+ cur->oe_end = max(cur->oe_end, victim->oe_end);
+ cur->oe_grants += victim->oe_grants;
+ cur->oe_nr_pages += victim->oe_nr_pages;
+ /* only the following bits are needed to merge */
+ cur->oe_urgent |= victim->oe_urgent;
+ cur->oe_memalloc |= victim->oe_memalloc;
+ list_splice_init(&victim->oe_pages, &cur->oe_pages);
+ list_del_init(&victim->oe_link);
+ victim->oe_nr_pages = 0;
+
+ osc_extent_get(victim);
+ __osc_extent_remove(victim);
+ osc_extent_put(env, victim);
+
+ OSC_EXTENT_DUMP(D_CACHE, cur, "after merging %p.\n", victim);
+ return 0;
+}
+
+/**
+ * Drop user count of osc_extent, and unplug IO asynchronously.
+ */
+int osc_extent_release(const struct lu_env *env, struct osc_extent *ext)
+{
+ struct osc_object *obj = ext->oe_obj;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(atomic_read(&ext->oe_users) > 0);
+ LASSERT(sanity_check(ext) == 0);
+ LASSERT(ext->oe_grants > 0);
+
+ if (atomic_dec_and_lock(&ext->oe_users, &obj->oo_lock)) {
+ LASSERT(ext->oe_state == OES_ACTIVE);
+ if (ext->oe_trunc_pending) {
+ /* a truncate process is waiting for this extent.
+ * This may happen due to a race, check
+ * osc_cache_truncate_start(). */
+ osc_extent_state_set(ext, OES_TRUNC);
+ ext->oe_trunc_pending = 0;
+ } else {
+ osc_extent_state_set(ext, OES_CACHE);
+ osc_update_pending(obj, OBD_BRW_WRITE,
+ ext->oe_nr_pages);
+
+ /* try to merge the previous and next extent. */
+ osc_extent_merge(env, ext, prev_extent(ext));
+ osc_extent_merge(env, ext, next_extent(ext));
+
+ if (ext->oe_urgent)
+ list_move_tail(&ext->oe_link,
+ &obj->oo_urgent_exts);
+ }
+ osc_object_unlock(obj);
+
+ osc_io_unplug_async(env, osc_cli(obj), obj);
+ }
+ osc_extent_put(env, ext);
+ RETURN(rc);
+}
+
+static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2)
+{
+ return !(ex1->oe_end < ex2->oe_start || ex2->oe_end < ex1->oe_start);
+}
+
+/**
+ * Find or create an extent which includes @index, core function to manage
+ * extent tree.
+ */
+struct osc_extent *osc_extent_find(const struct lu_env *env,
+ struct osc_object *obj, pgoff_t index,
+ int *grants)
+
+{
+ struct client_obd *cli = osc_cli(obj);
+ struct cl_lock *lock;
+ struct osc_extent *cur;
+ struct osc_extent *ext;
+ struct osc_extent *conflict = NULL;
+ struct osc_extent *found = NULL;
+ pgoff_t chunk;
+ pgoff_t max_end;
+ int max_pages; /* max_pages_per_rpc */
+ int chunksize;
+ int ppc_bits; /* pages per chunk bits */
+ int chunk_mask;
+ int rc;
+ ENTRY;
+
+ cur = osc_extent_alloc(obj);
+ if (cur == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ lock = cl_lock_at_pgoff(env, osc2cl(obj), index, NULL, 1, 0);
+ LASSERT(lock != NULL);
+ LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
+
+ LASSERT(cli->cl_chunkbits >= PAGE_CACHE_SHIFT);
+ ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
+ chunk_mask = ~((1 << ppc_bits) - 1);
+ chunksize = 1 << cli->cl_chunkbits;
+ chunk = index >> ppc_bits;
+
+ /* align end to rpc edge, rpc size may not be a power 2 integer. */
+ max_pages = cli->cl_max_pages_per_rpc;
+ LASSERT((max_pages & ~chunk_mask) == 0);
+ max_end = index - (index % max_pages) + max_pages - 1;
+ max_end = min_t(pgoff_t, max_end, lock->cll_descr.cld_end);
+
+ /* initialize new extent by parameters so far */
+ cur->oe_max_end = max_end;
+ cur->oe_start = index & chunk_mask;
+ cur->oe_end = ((index + ~chunk_mask + 1) & chunk_mask) - 1;
+ if (cur->oe_start < lock->cll_descr.cld_start)
+ cur->oe_start = lock->cll_descr.cld_start;
+ if (cur->oe_end > max_end)
+ cur->oe_end = max_end;
+ cur->oe_osclock = lock;
+ cur->oe_grants = 0;
+ cur->oe_mppr = max_pages;
+
+ /* grants has been allocated by caller */
+ LASSERTF(*grants >= chunksize + cli->cl_extent_tax,
+ "%u/%u/%u.\n", *grants, chunksize, cli->cl_extent_tax);
+ LASSERTF((max_end - cur->oe_start) < max_pages, EXTSTR, EXTPARA(cur));
+
+restart:
+ osc_object_lock(obj);
+ ext = osc_extent_search(obj, cur->oe_start);
+ if (ext == NULL)
+ ext = first_extent(obj);
+ while (ext != NULL) {
+ loff_t ext_chk_start = ext->oe_start >> ppc_bits;
+ loff_t ext_chk_end = ext->oe_end >> ppc_bits;
+
+ LASSERT(sanity_check_nolock(ext) == 0);
+ if (chunk > ext_chk_end + 1)
+ break;
+
+ /* if covering by different locks, no chance to match */
+ if (lock != ext->oe_osclock) {
+ EASSERTF(!overlapped(ext, cur), ext,
+ EXTSTR, EXTPARA(cur));
+
+ ext = next_extent(ext);
+ continue;
+ }
+
+ /* discontiguous chunks? */
+ if (chunk + 1 < ext_chk_start) {
+ ext = next_extent(ext);
+ continue;
+ }
+
+ /* ok, from now on, ext and cur have these attrs:
+ * 1. covered by the same lock
+ * 2. contiguous at chunk level or overlapping. */
+
+ if (overlapped(ext, cur)) {
+ /* cur is the minimum unit, so overlapping means
+ * full contain. */
+ EASSERTF((ext->oe_start <= cur->oe_start &&
+ ext->oe_end >= cur->oe_end),
+ ext, EXTSTR, EXTPARA(cur));
+
+ if (ext->oe_state > OES_CACHE || ext->oe_fsync_wait) {
+ /* for simplicity, we wait for this extent to
+ * finish before going forward. */
+ conflict = osc_extent_get(ext);
+ break;
+ }
+
+ found = osc_extent_hold(ext);
+ break;
+ }
+
+ /* non-overlapped extent */
+ if (ext->oe_state != OES_CACHE || ext->oe_fsync_wait) {
+ /* we can't do anything for a non OES_CACHE extent, or
+ * if there is someone waiting for this extent to be
+ * flushed, try next one. */
+ ext = next_extent(ext);
+ continue;
+ }
+
+ /* check if they belong to the same rpc slot before trying to
+ * merge. the extents are not overlapped and contiguous at
+ * chunk level to get here. */
+ if (ext->oe_max_end != max_end) {
+ /* if they don't belong to the same RPC slot or
+ * max_pages_per_rpc has ever changed, do not merge. */
+ ext = next_extent(ext);
+ continue;
+ }
+
+ /* it's required that an extent must be contiguous at chunk
+ * level so that we know the whole extent is covered by grant
+ * (the pages in the extent are NOT required to be contiguous).
+ * Otherwise, it will be too much difficult to know which
+ * chunks have grants allocated. */
+
+ /* try to do front merge - extend ext's start */
+ if (chunk + 1 == ext_chk_start) {
+ /* ext must be chunk size aligned */
+ EASSERT((ext->oe_start & ~chunk_mask) == 0, ext);
+
+ /* pull ext's start back to cover cur */
+ ext->oe_start = cur->oe_start;
+ ext->oe_grants += chunksize;
+ *grants -= chunksize;
+
+ found = osc_extent_hold(ext);
+ } else if (chunk == ext_chk_end + 1) {
+ /* rear merge */
+ ext->oe_end = cur->oe_end;
+ ext->oe_grants += chunksize;
+ *grants -= chunksize;
+
+ /* try to merge with the next one because we just fill
+ * in a gap */
+ if (osc_extent_merge(env, ext, next_extent(ext)) == 0)
+ /* we can save extent tax from next extent */
+ *grants += cli->cl_extent_tax;
+
+ found = osc_extent_hold(ext);
+ }
+ if (found != NULL)
+ break;
+
+ ext = next_extent(ext);
+ }
+
+ osc_extent_tree_dump(D_CACHE, obj);
+ if (found != NULL) {
+ LASSERT(conflict == NULL);
+ if (!IS_ERR(found)) {
+ LASSERT(found->oe_osclock == cur->oe_osclock);
+ OSC_EXTENT_DUMP(D_CACHE, found,
+ "found caching ext for %lu.\n", index);
+ }
+ } else if (conflict == NULL) {
+ /* create a new extent */
+ EASSERT(osc_extent_is_overlapped(obj, cur) == 0, cur);
+ cur->oe_grants = chunksize + cli->cl_extent_tax;
+ *grants -= cur->oe_grants;
+ LASSERT(*grants >= 0);
+
+ cur->oe_state = OES_CACHE;
+ found = osc_extent_hold(cur);
+ osc_extent_insert(obj, cur);
+ OSC_EXTENT_DUMP(D_CACHE, cur, "add into tree %lu/%lu.\n",
+ index, lock->cll_descr.cld_end);
+ }
+ osc_object_unlock(obj);
+
+ if (conflict != NULL) {
+ LASSERT(found == NULL);
+
+ /* waiting for IO to finish. Please notice that it's impossible
+ * to be an OES_TRUNC extent. */
+ rc = osc_extent_wait(env, conflict, OES_INV);
+ osc_extent_put(env, conflict);
+ conflict = NULL;
+ if (rc < 0)
+ GOTO(out, found = ERR_PTR(rc));
+
+ goto restart;
+ }
+ EXIT;
+
+out:
+ osc_extent_put(env, cur);
+ LASSERT(*grants >= 0);
+ return found;
+}
+
+/**
+ * Called when IO is finished to an extent.
+ */
+int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
+ int sent, int rc)
+{
+ struct client_obd *cli = osc_cli(ext->oe_obj);
+ struct osc_async_page *oap;
+ struct osc_async_page *tmp;
+ int nr_pages = ext->oe_nr_pages;
+ int lost_grant = 0;
+ int blocksize = cli->cl_import->imp_obd->obd_osfs.os_bsize ? : 4096;
+ __u64 last_off = 0;
+ int last_count = -1;
+ ENTRY;
+
+ OSC_EXTENT_DUMP(D_CACHE, ext, "extent finished.\n");
+
+ ext->oe_rc = rc ?: ext->oe_nr_pages;
+ EASSERT(ergo(rc == 0, ext->oe_state == OES_RPC), ext);
+ list_for_each_entry_safe(oap, tmp, &ext->oe_pages,
+ oap_pending_item) {
+ list_del_init(&oap->oap_rpc_item);
+ list_del_init(&oap->oap_pending_item);
+ if (last_off <= oap->oap_obj_off) {
+ last_off = oap->oap_obj_off;
+ last_count = oap->oap_count;
+ }
+
+ --ext->oe_nr_pages;
+ osc_ap_completion(env, cli, oap, sent, rc);
+ }
+ EASSERT(ext->oe_nr_pages == 0, ext);
+
+ if (!sent) {
+ lost_grant = ext->oe_grants;
+ } else if (blocksize < PAGE_CACHE_SIZE &&
+ last_count != PAGE_CACHE_SIZE) {
+ /* For short writes we shouldn't count parts of pages that
+ * span a whole chunk on the OST side, or our accounting goes
+ * wrong. Should match the code in filter_grant_check. */
+ int offset = oap->oap_page_off & ~CFS_PAGE_MASK;
+ int count = oap->oap_count + (offset & (blocksize - 1));
+ int end = (offset + oap->oap_count) & (blocksize - 1);
+ if (end)
+ count += blocksize - end;
+
+ lost_grant = PAGE_CACHE_SIZE - count;
+ }
+ if (ext->oe_grants > 0)
+ osc_free_grant(cli, nr_pages, lost_grant);
+
+ osc_extent_remove(ext);
+ /* put the refcount for RPC */
+ osc_extent_put(env, ext);
+ RETURN(0);
+}
+
+static int extent_wait_cb(struct osc_extent *ext, int state)
+{
+ int ret;
+
+ osc_object_lock(ext->oe_obj);
+ ret = ext->oe_state == state;
+ osc_object_unlock(ext->oe_obj);
+
+ return ret;
+}
+
+/**
+ * Wait for the extent's state to become @state.
+ */
+static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
+ int state)
+{
+ struct osc_object *obj = ext->oe_obj;
+ struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL,
+ LWI_ON_SIGNAL_NOOP, NULL);
+ int rc = 0;
+ ENTRY;
+
+ osc_object_lock(obj);
+ LASSERT(sanity_check_nolock(ext) == 0);
+ /* `Kick' this extent only if the caller is waiting for it to be
+ * written out. */
+ if (state == OES_INV && !ext->oe_urgent && !ext->oe_hp) {
+ if (ext->oe_state == OES_ACTIVE) {
+ ext->oe_urgent = 1;
+ } else if (ext->oe_state == OES_CACHE) {
+ ext->oe_urgent = 1;
+ osc_extent_hold(ext);
+ rc = 1;
+ }
+ }
+ osc_object_unlock(obj);
+ if (rc == 1)
+ osc_extent_release(env, ext);
+
+ /* wait for the extent until its state becomes @state */
+ rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state), &lwi);
+ if (rc == -ETIMEDOUT) {
+ OSC_EXTENT_DUMP(D_ERROR, ext,
+ "%s: wait ext to %d timedout, recovery in progress?\n",
+ osc_export(obj)->exp_obd->obd_name, state);
+
+ lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state),
+ &lwi);
+ }
+ if (rc == 0 && ext->oe_rc < 0)
+ rc = ext->oe_rc;
+ RETURN(rc);
+}
+
+/**
+ * Discard pages with index greater than @size. If @ext is overlapped with
+ * @size, then partial truncate happens.
+ */
+static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
+ bool partial)
+{
+ struct cl_env_nest nest;
+ struct lu_env *env;
+ struct cl_io *io;
+ struct osc_object *obj = ext->oe_obj;
+ struct client_obd *cli = osc_cli(obj);
+ struct osc_async_page *oap;
+ struct osc_async_page *tmp;
+ int pages_in_chunk = 0;
+ int ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
+ __u64 trunc_chunk = trunc_index >> ppc_bits;
+ int grants = 0;
+ int nr_pages = 0;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(sanity_check(ext) == 0);
+ LASSERT(ext->oe_state == OES_TRUNC);
+ LASSERT(!ext->oe_urgent);
+
+ /* Request new lu_env.
+ * We can't use that env from osc_cache_truncate_start() because
+ * it's from lov_io_sub and not fully initialized. */
+ env = cl_env_nested_get(&nest);
+ io = &osc_env_info(env)->oti_io;
+ io->ci_obj = cl_object_top(osc2cl(obj));
+ rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ /* discard all pages with index greater then trunc_index */
+ list_for_each_entry_safe(oap, tmp, &ext->oe_pages,
+ oap_pending_item) {
+ struct cl_page *sub = oap2cl_page(oap);
+ struct cl_page *page = cl_page_top(sub);
+
+ LASSERT(list_empty(&oap->oap_rpc_item));
+
+ /* only discard the pages with their index greater than
+ * trunc_index, and ... */
+ if (sub->cp_index < trunc_index ||
+ (sub->cp_index == trunc_index && partial)) {
+ /* accounting how many pages remaining in the chunk
+ * so that we can calculate grants correctly. */
+ if (sub->cp_index >> ppc_bits == trunc_chunk)
+ ++pages_in_chunk;
+ continue;
+ }
+
+ list_del_init(&oap->oap_pending_item);
+
+ cl_page_get(page);
+ lu_ref_add(&page->cp_reference, "truncate", current);
+
+ if (cl_page_own(env, io, page) == 0) {
+ cl_page_unmap(env, io, page);
+ cl_page_discard(env, io, page);
+ cl_page_disown(env, io, page);
+ } else {
+ LASSERT(page->cp_state == CPS_FREEING);
+ LASSERT(0);
+ }
+
+ lu_ref_del(&page->cp_reference, "truncate", current);
+ cl_page_put(env, page);
+
+ --ext->oe_nr_pages;
+ ++nr_pages;
+ }
+ EASSERTF(ergo(ext->oe_start >= trunc_index + !!partial,
+ ext->oe_nr_pages == 0),
+ ext, "trunc_index %lu, partial %d\n", trunc_index, partial);
+
+ osc_object_lock(obj);
+ if (ext->oe_nr_pages == 0) {
+ LASSERT(pages_in_chunk == 0);
+ grants = ext->oe_grants;
+ ext->oe_grants = 0;
+ } else { /* calculate how many grants we can free */
+ int chunks = (ext->oe_end >> ppc_bits) - trunc_chunk;
+ pgoff_t last_index;
+
+
+ /* if there is no pages in this chunk, we can also free grants
+ * for the last chunk */
+ if (pages_in_chunk == 0) {
+ /* if this is the 1st chunk and no pages in this chunk,
+ * ext->oe_nr_pages must be zero, so we should be in
+ * the other if-clause. */
+ LASSERT(trunc_chunk > 0);
+ --trunc_chunk;
+ ++chunks;
+ }
+
+ /* this is what we can free from this extent */
+ grants = chunks << cli->cl_chunkbits;
+ ext->oe_grants -= grants;
+ last_index = ((trunc_chunk + 1) << ppc_bits) - 1;
+ ext->oe_end = min(last_index, ext->oe_max_end);
+ LASSERT(ext->oe_end >= ext->oe_start);
+ LASSERT(ext->oe_grants > 0);
+ }
+ osc_object_unlock(obj);
+
+ if (grants > 0 || nr_pages > 0)
+ osc_free_grant(cli, nr_pages, grants);
+
+out:
+ cl_io_fini(env, io);
+ cl_env_nested_put(&nest, env);
+ RETURN(rc);
+}
+
+/**
+ * This function is used to make the extent prepared for transfer.
+ * A race with flusing page - ll_writepage() has to be handled cautiously.
+ */
+static int osc_extent_make_ready(const struct lu_env *env,
+ struct osc_extent *ext)
+{
+ struct osc_async_page *oap;
+ struct osc_async_page *last = NULL;
+ struct osc_object *obj = ext->oe_obj;
+ int page_count = 0;
+ int rc;
+ ENTRY;
+
+ /* we're going to grab page lock, so object lock must not be taken. */
+ LASSERT(sanity_check(ext) == 0);
+ /* in locking state, any process should not touch this extent. */
+ EASSERT(ext->oe_state == OES_LOCKING, ext);
+ EASSERT(ext->oe_owner != NULL, ext);
+
+ OSC_EXTENT_DUMP(D_CACHE, ext, "make ready\n");
+
+ list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
+ ++page_count;
+ if (last == NULL || last->oap_obj_off < oap->oap_obj_off)
+ last = oap;
+
+ /* checking ASYNC_READY is race safe */
+ if ((oap->oap_async_flags & ASYNC_READY) != 0)
+ continue;
+
+ rc = osc_make_ready(env, oap, OBD_BRW_WRITE);
+ switch (rc) {
+ case 0:
+ spin_lock(&oap->oap_lock);
+ oap->oap_async_flags |= ASYNC_READY;
+ spin_unlock(&oap->oap_lock);
+ break;
+ case -EALREADY:
+ LASSERT((oap->oap_async_flags & ASYNC_READY) != 0);
+ break;
+ default:
+ LASSERTF(0, "unknown return code: %d\n", rc);
+ }
+ }
+
+ LASSERT(page_count == ext->oe_nr_pages);
+ LASSERT(last != NULL);
+ /* the last page is the only one we need to refresh its count by
+ * the size of file. */
+ if (!(last->oap_async_flags & ASYNC_COUNT_STABLE)) {
+ last->oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE);
+ LASSERT(last->oap_count > 0);
+ LASSERT(last->oap_page_off + last->oap_count <= PAGE_CACHE_SIZE);
+ last->oap_async_flags |= ASYNC_COUNT_STABLE;
+ }
+
+ /* for the rest of pages, we don't need to call osf_refresh_count()
+ * because it's known they are not the last page */
+ list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
+ if (!(oap->oap_async_flags & ASYNC_COUNT_STABLE)) {
+ oap->oap_count = PAGE_CACHE_SIZE - oap->oap_page_off;
+ oap->oap_async_flags |= ASYNC_COUNT_STABLE;
+ }
+ }
+
+ osc_object_lock(obj);
+ osc_extent_state_set(ext, OES_RPC);
+ osc_object_unlock(obj);
+ /* get a refcount for RPC. */
+ osc_extent_get(ext);
+
+ RETURN(0);
+}
+
+/**
+ * Quick and simple version of osc_extent_find(). This function is frequently
+ * called to expand the extent for the same IO. To expand the extent, the
+ * page index must be in the same or next chunk of ext->oe_end.
+ */
+static int osc_extent_expand(struct osc_extent *ext, pgoff_t index, int *grants)
+{
+ struct osc_object *obj = ext->oe_obj;
+ struct client_obd *cli = osc_cli(obj);
+ struct osc_extent *next;
+ int ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
+ pgoff_t chunk = index >> ppc_bits;
+ pgoff_t end_chunk;
+ pgoff_t end_index;
+ int chunksize = 1 << cli->cl_chunkbits;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(ext->oe_max_end >= index && ext->oe_start <= index);
+ osc_object_lock(obj);
+ LASSERT(sanity_check_nolock(ext) == 0);
+ end_chunk = ext->oe_end >> ppc_bits;
+ if (chunk > end_chunk + 1)
+ GOTO(out, rc = -ERANGE);
+
+ if (end_chunk >= chunk)
+ GOTO(out, rc = 0);
+
+ LASSERT(end_chunk + 1 == chunk);
+ /* try to expand this extent to cover @index */
+ end_index = min(ext->oe_max_end, ((chunk + 1) << ppc_bits) - 1);
+
+ next = next_extent(ext);
+ if (next != NULL && next->oe_start <= end_index)
+ /* complex mode - overlapped with the next extent,
+ * this case will be handled by osc_extent_find() */
+ GOTO(out, rc = -EAGAIN);
+
+ ext->oe_end = end_index;
+ ext->oe_grants += chunksize;
+ *grants -= chunksize;
+ LASSERT(*grants >= 0);
+ EASSERTF(osc_extent_is_overlapped(obj, ext) == 0, ext,
+ "overlapped after expanding for %lu.\n", index);
+ EXIT;
+
+out:
+ osc_object_unlock(obj);
+ RETURN(rc);
+}
+
+static void osc_extent_tree_dump0(int level, struct osc_object *obj,
+ const char *func, int line)
+{
+ struct osc_extent *ext;
+ int cnt;
+
+ CDEBUG(level, "Dump object %p extents at %s:%d, mppr: %u.\n",
+ obj, func, line, osc_cli(obj)->cl_max_pages_per_rpc);
+
+ /* osc_object_lock(obj); */
+ cnt = 1;
+ for (ext = first_extent(obj); ext != NULL; ext = next_extent(ext))
+ OSC_EXTENT_DUMP(level, ext, "in tree %d.\n", cnt++);
+
+ cnt = 1;
+ list_for_each_entry(ext, &obj->oo_hp_exts, oe_link)
+ OSC_EXTENT_DUMP(level, ext, "hp %d.\n", cnt++);
+
+ cnt = 1;
+ list_for_each_entry(ext, &obj->oo_urgent_exts, oe_link)
+ OSC_EXTENT_DUMP(level, ext, "urgent %d.\n", cnt++);
+
+ cnt = 1;
+ list_for_each_entry(ext, &obj->oo_reading_exts, oe_link)
+ OSC_EXTENT_DUMP(level, ext, "reading %d.\n", cnt++);
+ /* osc_object_unlock(obj); */
+}
+
+/* ------------------ osc extent end ------------------ */
+
+static inline int osc_is_ready(struct osc_object *osc)
+{
+ return !list_empty(&osc->oo_ready_item) ||
+ !list_empty(&osc->oo_hp_ready_item);
+}
+
+#define OSC_IO_DEBUG(OSC, STR, args...) \
+ CDEBUG(D_CACHE, "obj %p ready %d|%c|%c wr %d|%c|%c rd %d|%c " STR, \
+ (OSC), osc_is_ready(OSC), \
+ list_empty_marker(&(OSC)->oo_hp_ready_item), \
+ list_empty_marker(&(OSC)->oo_ready_item), \
+ atomic_read(&(OSC)->oo_nr_writes), \
+ list_empty_marker(&(OSC)->oo_hp_exts), \
+ list_empty_marker(&(OSC)->oo_urgent_exts), \
+ atomic_read(&(OSC)->oo_nr_reads), \
+ list_empty_marker(&(OSC)->oo_reading_exts), \
+ ##args)
+
+static int osc_make_ready(const struct lu_env *env, struct osc_async_page *oap,
+ int cmd)
+{
+ struct osc_page *opg = oap2osc_page(oap);
+ struct cl_page *page = cl_page_top(oap2cl_page(oap));
+ int result;
+
+ LASSERT(cmd == OBD_BRW_WRITE); /* no cached reads */
+
+ ENTRY;
+ result = cl_page_make_ready(env, page, CRT_WRITE);
+ if (result == 0)
+ opg->ops_submit_time = cfs_time_current();
+ RETURN(result);
+}
+
+static int osc_refresh_count(const struct lu_env *env,
+ struct osc_async_page *oap, int cmd)
+{
+ struct osc_page *opg = oap2osc_page(oap);
+ struct cl_page *page = oap2cl_page(oap);
+ struct cl_object *obj;
+ struct cl_attr *attr = &osc_env_info(env)->oti_attr;
+
+ int result;
+ loff_t kms;
+
+ /* readpage queues with _COUNT_STABLE, shouldn't get here. */
+ LASSERT(!(cmd & OBD_BRW_READ));
+ LASSERT(opg != NULL);
+ obj = opg->ops_cl.cpl_obj;
+
+ cl_object_attr_lock(obj);
+ result = cl_object_attr_get(env, obj, attr);
+ cl_object_attr_unlock(obj);
+ if (result < 0)
+ return result;
+ kms = attr->cat_kms;
+ if (cl_offset(obj, page->cp_index) >= kms)
+ /* catch race with truncate */
+ return 0;
+ else if (cl_offset(obj, page->cp_index + 1) > kms)
+ /* catch sub-page write at end of file */
+ return kms % PAGE_CACHE_SIZE;
+ else
+ return PAGE_CACHE_SIZE;
+}
+
+static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
+ int cmd, int rc)
+{
+ struct osc_page *opg = oap2osc_page(oap);
+ struct cl_page *page = cl_page_top(oap2cl_page(oap));
+ struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
+ enum cl_req_type crt;
+ int srvlock;
+
+ ENTRY;
+
+ cmd &= ~OBD_BRW_NOQUOTA;
+ LASSERT(equi(page->cp_state == CPS_PAGEIN, cmd == OBD_BRW_READ));
+ LASSERT(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE));
+ LASSERT(opg->ops_transfer_pinned);
+
+ /*
+ * page->cp_req can be NULL if io submission failed before
+ * cl_req was allocated.
+ */
+ if (page->cp_req != NULL)
+ cl_req_page_done(env, page);
+ LASSERT(page->cp_req == NULL);
+
+ crt = cmd == OBD_BRW_READ ? CRT_READ : CRT_WRITE;
+ /* Clear opg->ops_transfer_pinned before VM lock is released. */
+ opg->ops_transfer_pinned = 0;
+
+ spin_lock(&obj->oo_seatbelt);
+ LASSERT(opg->ops_submitter != NULL);
+ LASSERT(!list_empty(&opg->ops_inflight));
+ list_del_init(&opg->ops_inflight);
+ opg->ops_submitter = NULL;
+ spin_unlock(&obj->oo_seatbelt);
+
+ opg->ops_submit_time = 0;
+ srvlock = oap->oap_brw_flags & OBD_BRW_SRVLOCK;
+
+ /* statistic */
+ if (rc == 0 && srvlock) {
+ struct lu_device *ld = opg->ops_cl.cpl_obj->co_lu.lo_dev;
+ struct osc_stats *stats = &lu2osc_dev(ld)->od_stats;
+ int bytes = oap->oap_count;
+
+ if (crt == CRT_READ)
+ stats->os_lockless_reads += bytes;
+ else
+ stats->os_lockless_writes += bytes;
+ }
+
+ /*
+ * This has to be the last operation with the page, as locks are
+ * released in cl_page_completion() and nothing except for the
+ * reference counter protects page from concurrent reclaim.
+ */
+ lu_ref_del(&page->cp_reference, "transfer", page);
+
+ cl_page_completion(env, page, crt, rc);
+
+ RETURN(0);
+}
+
+#define OSC_DUMP_GRANT(cli, fmt, args...) do { \
+ struct client_obd *__tmp = (cli); \
+ CDEBUG(D_CACHE, "%s: { dirty: %ld/%ld dirty_pages: %d/%d " \
+ "dropped: %ld avail: %ld, reserved: %ld, flight: %d } " fmt, \
+ __tmp->cl_import->imp_obd->obd_name, \
+ __tmp->cl_dirty, __tmp->cl_dirty_max, \
+ atomic_read(&obd_dirty_pages), obd_max_dirty_pages, \
+ __tmp->cl_lost_grant, __tmp->cl_avail_grant, \
+ __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, ##args); \
+} while (0)
+
+/* caller must hold loi_list_lock */
+static void osc_consume_write_grant(struct client_obd *cli,
+ struct brw_page *pga)
+{
+ LASSERT(spin_is_locked(&cli->cl_loi_list_lock.lock));
+ LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT));
+ atomic_inc(&obd_dirty_pages);
+ cli->cl_dirty += PAGE_CACHE_SIZE;
+ pga->flag |= OBD_BRW_FROM_GRANT;
+ CDEBUG(D_CACHE, "using %lu grant credits for brw %p page %p\n",
+ PAGE_CACHE_SIZE, pga, pga->pg);
+ osc_update_next_shrink(cli);
+}
+
+/* the companion to osc_consume_write_grant, called when a brw has completed.
+ * must be called with the loi lock held. */
+static void osc_release_write_grant(struct client_obd *cli,
+ struct brw_page *pga)
+{
+ ENTRY;
+
+ LASSERT(spin_is_locked(&cli->cl_loi_list_lock.lock));
+ if (!(pga->flag & OBD_BRW_FROM_GRANT)) {
+ EXIT;
+ return;
+ }
+
+ pga->flag &= ~OBD_BRW_FROM_GRANT;
+ atomic_dec(&obd_dirty_pages);
+ cli->cl_dirty -= PAGE_CACHE_SIZE;
+ if (pga->flag & OBD_BRW_NOCACHE) {
+ pga->flag &= ~OBD_BRW_NOCACHE;
+ atomic_dec(&obd_dirty_transit_pages);
+ cli->cl_dirty_transit -= PAGE_CACHE_SIZE;
+ }
+ EXIT;
+}
+
+/**
+ * To avoid sleeping with object lock held, it's good for us allocate enough
+ * grants before entering into critical section.
+ *
+ * client_obd_list_lock held by caller
+ */
+static int osc_reserve_grant(struct client_obd *cli, unsigned int bytes)
+{
+ int rc = -EDQUOT;
+
+ if (cli->cl_avail_grant >= bytes) {
+ cli->cl_avail_grant -= bytes;
+ cli->cl_reserved_grant += bytes;
+ rc = 0;
+ }
+ return rc;
+}
+
+static void __osc_unreserve_grant(struct client_obd *cli,
+ unsigned int reserved, unsigned int unused)
+{
+ /* it's quite normal for us to get more grant than reserved.
+ * Thinking about a case that two extents merged by adding a new
+ * chunk, we can save one extent tax. If extent tax is greater than
+ * one chunk, we can save more grant by adding a new chunk */
+ cli->cl_reserved_grant -= reserved;
+ if (unused > reserved) {
+ cli->cl_avail_grant += reserved;
+ cli->cl_lost_grant += unused - reserved;
+ } else {
+ cli->cl_avail_grant += unused;
+ }
+}
+
+void osc_unreserve_grant(struct client_obd *cli,
+ unsigned int reserved, unsigned int unused)
+{
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ __osc_unreserve_grant(cli, reserved, unused);
+ if (unused > 0)
+ osc_wake_cache_waiters(cli);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+}
+
+/**
+ * Free grant after IO is finished or canceled.
+ *
+ * @lost_grant is used to remember how many grants we have allocated but not
+ * used, we should return these grants to OST. There're two cases where grants
+ * can be lost:
+ * 1. truncate;
+ * 2. blocksize at OST is less than PAGE_CACHE_SIZE and a partial page was
+ * written. In this case OST may use less chunks to serve this partial
+ * write. OSTs don't actually know the page size on the client side. so
+ * clients have to calculate lost grant by the blocksize on the OST.
+ * See filter_grant_check() for details.
+ */
+static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
+ unsigned int lost_grant)
+{
+ int grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ atomic_sub(nr_pages, &obd_dirty_pages);
+ cli->cl_dirty -= nr_pages << PAGE_CACHE_SHIFT;
+ cli->cl_lost_grant += lost_grant;
+ if (cli->cl_avail_grant < grant && cli->cl_lost_grant >= grant) {
+ /* borrow some grant from truncate to avoid the case that
+ * truncate uses up all avail grant */
+ cli->cl_lost_grant -= grant;
+ cli->cl_avail_grant += grant;
+ }
+ osc_wake_cache_waiters(cli);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n",
+ lost_grant, cli->cl_lost_grant,
+ cli->cl_avail_grant, cli->cl_dirty);
+}
+
+/**
+ * The companion to osc_enter_cache(), called when @oap is no longer part of
+ * the dirty accounting due to error.
+ */
+static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap)
+{
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ osc_release_write_grant(cli, &oap->oap_brw_page);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+}
+
+/**
+ * Non-blocking version of osc_enter_cache() that consumes grant only when it
+ * is available.
+ */
+static int osc_enter_cache_try(struct client_obd *cli,
+ struct osc_async_page *oap,
+ int bytes, int transient)
+{
+ int rc;
+
+ OSC_DUMP_GRANT(cli, "need:%d.\n", bytes);
+
+ rc = osc_reserve_grant(cli, bytes);
+ if (rc < 0)
+ return 0;
+
+ if (cli->cl_dirty + PAGE_CACHE_SIZE <= cli->cl_dirty_max &&
+ atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
+ osc_consume_write_grant(cli, &oap->oap_brw_page);
+ if (transient) {
+ cli->cl_dirty_transit += PAGE_CACHE_SIZE;
+ atomic_inc(&obd_dirty_transit_pages);
+ oap->oap_brw_flags |= OBD_BRW_NOCACHE;
+ }
+ rc = 1;
+ } else {
+ __osc_unreserve_grant(cli, bytes, bytes);
+ rc = 0;
+ }
+ return rc;
+}
+
+static int ocw_granted(struct client_obd *cli, struct osc_cache_waiter *ocw)
+{
+ int rc;
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ rc = list_empty(&ocw->ocw_entry);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ return rc;
+}
+
+/**
+ * The main entry to reserve dirty page accounting. Usually the grant reserved
+ * in this function will be freed in bulk in osc_free_grant() unless it fails
+ * to add osc cache, in that case, it will be freed in osc_exit_cache().
+ *
+ * The process will be put into sleep if it's already run out of grant.
+ */
+static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
+ struct osc_async_page *oap, int bytes)
+{
+ struct osc_object *osc = oap->oap_obj;
+ struct lov_oinfo *loi = osc->oo_oinfo;
+ struct osc_cache_waiter ocw;
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ int rc = -EDQUOT;
+ ENTRY;
+
+ OSC_DUMP_GRANT(cli, "need:%d.\n", bytes);
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+
+ /* force the caller to try sync io. this can jump the list
+ * of queued writes and create a discontiguous rpc stream */
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_NO_GRANT) ||
+ cli->cl_dirty_max < PAGE_CACHE_SIZE ||
+ cli->cl_ar.ar_force_sync || loi->loi_ar.ar_force_sync)
+ GOTO(out, rc = -EDQUOT);
+
+ /* Hopefully normal case - cache space and write credits available */
+ if (osc_enter_cache_try(cli, oap, bytes, 0))
+ GOTO(out, rc = 0);
+
+ /* We can get here for two reasons: too many dirty pages in cache, or
+ * run out of grants. In both cases we should write dirty pages out.
+ * Adding a cache waiter will trigger urgent write-out no matter what
+ * RPC size will be.
+ * The exiting condition is no avail grants and no dirty pages caching,
+ * that really means there is no space on the OST. */
+ init_waitqueue_head(&ocw.ocw_waitq);
+ ocw.ocw_oap = oap;
+ ocw.ocw_grant = bytes;
+ while (cli->cl_dirty > 0 || cli->cl_w_in_flight > 0) {
+ list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters);
+ ocw.ocw_rc = 0;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ osc_io_unplug_async(env, cli, NULL);
+
+ CDEBUG(D_CACHE, "%s: sleeping for cache space @ %p for %p\n",
+ cli->cl_import->imp_obd->obd_name, &ocw, oap);
+
+ rc = l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi);
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+
+ /* l_wait_event is interrupted by signal */
+ if (rc < 0) {
+ list_del_init(&ocw.ocw_entry);
+ GOTO(out, rc);
+ }
+
+ LASSERT(list_empty(&ocw.ocw_entry));
+ rc = ocw.ocw_rc;
+
+ if (rc != -EDQUOT)
+ GOTO(out, rc);
+ if (osc_enter_cache_try(cli, oap, bytes, 0))
+ GOTO(out, rc = 0);
+ }
+ EXIT;
+out:
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ OSC_DUMP_GRANT(cli, "returned %d.\n", rc);
+ RETURN(rc);
+}
+
+/* caller must hold loi_list_lock */
+void osc_wake_cache_waiters(struct client_obd *cli)
+{
+ struct list_head *l, *tmp;
+ struct osc_cache_waiter *ocw;
+
+ ENTRY;
+ list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
+ ocw = list_entry(l, struct osc_cache_waiter, ocw_entry);
+ list_del_init(&ocw->ocw_entry);
+
+ ocw->ocw_rc = -EDQUOT;
+ /* we can't dirty more */
+ if ((cli->cl_dirty + PAGE_CACHE_SIZE > cli->cl_dirty_max) ||
+ (atomic_read(&obd_dirty_pages) + 1 >
+ obd_max_dirty_pages)) {
+ CDEBUG(D_CACHE, "no dirty room: dirty: %ld "
+ "osc max %ld, sys max %d\n", cli->cl_dirty,
+ cli->cl_dirty_max, obd_max_dirty_pages);
+ goto wakeup;
+ }
+
+ ocw->ocw_rc = 0;
+ if (!osc_enter_cache_try(cli, ocw->ocw_oap, ocw->ocw_grant, 0))
+ ocw->ocw_rc = -EDQUOT;
+
+wakeup:
+ CDEBUG(D_CACHE, "wake up %p for oap %p, avail grant %ld, %d\n",
+ ocw, ocw->ocw_oap, cli->cl_avail_grant, ocw->ocw_rc);
+
+ wake_up(&ocw->ocw_waitq);
+ }
+
+ EXIT;
+}
+
+static int osc_max_rpc_in_flight(struct client_obd *cli, struct osc_object *osc)
+{
+ int hprpc = !!list_empty(&osc->oo_hp_exts);
+ return rpcs_in_flight(cli) >= cli->cl_max_rpcs_in_flight + hprpc;
+}
+
+/* This maintains the lists of pending pages to read/write for a given object
+ * (lop). This is used by osc_check_rpcs->osc_next_obj() and osc_list_maint()
+ * to quickly find objects that are ready to send an RPC. */
+static int osc_makes_rpc(struct client_obd *cli, struct osc_object *osc,
+ int cmd)
+{
+ int invalid_import = 0;
+ ENTRY;
+
+ /* if we have an invalid import we want to drain the queued pages
+ * by forcing them through rpcs that immediately fail and complete
+ * the pages. recovery relies on this to empty the queued pages
+ * before canceling the locks and evicting down the llite pages */
+ if ((cli->cl_import == NULL || cli->cl_import->imp_invalid))
+ invalid_import = 1;
+
+ if (cmd & OBD_BRW_WRITE) {
+ if (atomic_read(&osc->oo_nr_writes) == 0)
+ RETURN(0);
+ if (invalid_import) {
+ CDEBUG(D_CACHE, "invalid import forcing RPC\n");
+ RETURN(1);
+ }
+ if (!list_empty(&osc->oo_hp_exts)) {
+ CDEBUG(D_CACHE, "high prio request forcing RPC\n");
+ RETURN(1);
+ }
+ if (!list_empty(&osc->oo_urgent_exts)) {
+ CDEBUG(D_CACHE, "urgent request forcing RPC\n");
+ RETURN(1);
+ }
+ /* trigger a write rpc stream as long as there are dirtiers
+ * waiting for space. as they're waiting, they're not going to
+ * create more pages to coalesce with what's waiting.. */
+ if (!list_empty(&cli->cl_cache_waiters)) {
+ CDEBUG(D_CACHE, "cache waiters forcing RPC\n");
+ RETURN(1);
+ }
+ if (atomic_read(&osc->oo_nr_writes) >=
+ cli->cl_max_pages_per_rpc)
+ RETURN(1);
+ } else {
+ if (atomic_read(&osc->oo_nr_reads) == 0)
+ RETURN(0);
+ if (invalid_import) {
+ CDEBUG(D_CACHE, "invalid import forcing RPC\n");
+ RETURN(1);
+ }
+ /* all read are urgent. */
+ if (!list_empty(&osc->oo_reading_exts))
+ RETURN(1);
+ }
+
+ RETURN(0);
+}
+
+static void osc_update_pending(struct osc_object *obj, int cmd, int delta)
+{
+ struct client_obd *cli = osc_cli(obj);
+ if (cmd & OBD_BRW_WRITE) {
+ atomic_add(delta, &obj->oo_nr_writes);
+ atomic_add(delta, &cli->cl_pending_w_pages);
+ LASSERT(atomic_read(&obj->oo_nr_writes) >= 0);
+ } else {
+ atomic_add(delta, &obj->oo_nr_reads);
+ atomic_add(delta, &cli->cl_pending_r_pages);
+ LASSERT(atomic_read(&obj->oo_nr_reads) >= 0);
+ }
+ OSC_IO_DEBUG(obj, "update pending cmd %d delta %d.\n", cmd, delta);
+}
+
+static int osc_makes_hprpc(struct osc_object *obj)
+{
+ return !list_empty(&obj->oo_hp_exts);
+}
+
+static void on_list(struct list_head *item, struct list_head *list, int should_be_on)
+{
+ if (list_empty(item) && should_be_on)
+ list_add_tail(item, list);
+ else if (!list_empty(item) && !should_be_on)
+ list_del_init(item);
+}
+
+/* maintain the osc's cli list membership invariants so that osc_send_oap_rpc
+ * can find pages to build into rpcs quickly */
+static int __osc_list_maint(struct client_obd *cli, struct osc_object *osc)
+{
+ if (osc_makes_hprpc(osc)) {
+ /* HP rpc */
+ on_list(&osc->oo_ready_item, &cli->cl_loi_ready_list, 0);
+ on_list(&osc->oo_hp_ready_item, &cli->cl_loi_hp_ready_list, 1);
+ } else {
+ on_list(&osc->oo_hp_ready_item, &cli->cl_loi_hp_ready_list, 0);
+ on_list(&osc->oo_ready_item, &cli->cl_loi_ready_list,
+ osc_makes_rpc(cli, osc, OBD_BRW_WRITE) ||
+ osc_makes_rpc(cli, osc, OBD_BRW_READ));
+ }
+
+ on_list(&osc->oo_write_item, &cli->cl_loi_write_list,
+ atomic_read(&osc->oo_nr_writes) > 0);
+
+ on_list(&osc->oo_read_item, &cli->cl_loi_read_list,
+ atomic_read(&osc->oo_nr_reads) > 0);
+
+ return osc_is_ready(osc);
+}
+
+static int osc_list_maint(struct client_obd *cli, struct osc_object *osc)
+{
+ int is_ready;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ is_ready = __osc_list_maint(cli, osc);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ return is_ready;
+}
+
+/* this is trying to propogate async writeback errors back up to the
+ * application. As an async write fails we record the error code for later if
+ * the app does an fsync. As long as errors persist we force future rpcs to be
+ * sync so that the app can get a sync error and break the cycle of queueing
+ * pages for which writeback will fail. */
+static void osc_process_ar(struct osc_async_rc *ar, __u64 xid,
+ int rc)
+{
+ if (rc) {
+ if (!ar->ar_rc)
+ ar->ar_rc = rc;
+
+ ar->ar_force_sync = 1;
+ ar->ar_min_xid = ptlrpc_sample_next_xid();
+ return;
+
+ }
+
+ if (ar->ar_force_sync && (xid >= ar->ar_min_xid))
+ ar->ar_force_sync = 0;
+}
+
+
+/* this must be called holding the loi list lock to give coverage to exit_cache,
+ * async_flag maintenance, and oap_request */
+static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
+ struct osc_async_page *oap, int sent, int rc)
+{
+ struct osc_object *osc = oap->oap_obj;
+ struct lov_oinfo *loi = osc->oo_oinfo;
+ __u64 xid = 0;
+
+ ENTRY;
+ if (oap->oap_request != NULL) {
+ xid = ptlrpc_req_xid(oap->oap_request);
+ ptlrpc_req_finished(oap->oap_request);
+ oap->oap_request = NULL;
+ }
+
+ /* As the transfer for this page is being done, clear the flags */
+ spin_lock(&oap->oap_lock);
+ oap->oap_async_flags = 0;
+ spin_unlock(&oap->oap_lock);
+ oap->oap_interrupted = 0;
+
+ if (oap->oap_cmd & OBD_BRW_WRITE && xid > 0) {
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ osc_process_ar(&cli->cl_ar, xid, rc);
+ osc_process_ar(&loi->loi_ar, xid, rc);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ }
+
+ rc = osc_completion(env, oap, oap->oap_cmd, rc);
+ if (rc)
+ CERROR("completion on oap %p obj %p returns %d.\n",
+ oap, osc, rc);
+
+ EXIT;
+}
+
+/**
+ * Try to add extent to one RPC. We need to think about the following things:
+ * - # of pages must not be over max_pages_per_rpc
+ * - extent must be compatible with previous ones
+ */
+static int try_to_add_extent_for_io(struct client_obd *cli,
+ struct osc_extent *ext, struct list_head *rpclist,
+ int *pc, unsigned int *max_pages)
+{
+ struct osc_extent *tmp;
+ ENTRY;
+
+ EASSERT((ext->oe_state == OES_CACHE || ext->oe_state == OES_LOCK_DONE),
+ ext);
+
+ *max_pages = max(ext->oe_mppr, *max_pages);
+ if (*pc + ext->oe_nr_pages > *max_pages)
+ RETURN(0);
+
+ list_for_each_entry(tmp, rpclist, oe_link) {
+ EASSERT(tmp->oe_owner == current, tmp);
+#if 0
+ if (overlapped(tmp, ext)) {
+ OSC_EXTENT_DUMP(D_ERROR, tmp, "overlapped %p.\n", ext);
+ EASSERT(0, ext);
+ }
+#endif
+
+ if (tmp->oe_srvlock != ext->oe_srvlock ||
+ !tmp->oe_grants != !ext->oe_grants)
+ RETURN(0);
+
+ /* remove break for strict check */
+ break;
+ }
+
+ *pc += ext->oe_nr_pages;
+ list_move_tail(&ext->oe_link, rpclist);
+ ext->oe_owner = current;
+ RETURN(1);
+}
+
+/**
+ * In order to prevent multiple ptlrpcd from breaking contiguous extents,
+ * get_write_extent() takes all appropriate extents in atomic.
+ *
+ * The following policy is used to collect extents for IO:
+ * 1. Add as many HP extents as possible;
+ * 2. Add the first urgent extent in urgent extent list and take it out of
+ * urgent list;
+ * 3. Add subsequent extents of this urgent extent;
+ * 4. If urgent list is not empty, goto 2;
+ * 5. Traverse the extent tree from the 1st extent;
+ * 6. Above steps exit if there is no space in this RPC.
+ */
+static int get_write_extents(struct osc_object *obj, struct list_head *rpclist)
+{
+ struct client_obd *cli = osc_cli(obj);
+ struct osc_extent *ext;
+ int page_count = 0;
+ unsigned int max_pages = cli->cl_max_pages_per_rpc;
+
+ LASSERT(osc_object_is_locked(obj));
+ while (!list_empty(&obj->oo_hp_exts)) {
+ ext = list_entry(obj->oo_hp_exts.next, struct osc_extent,
+ oe_link);
+ LASSERT(ext->oe_state == OES_CACHE);
+ if (!try_to_add_extent_for_io(cli, ext, rpclist, &page_count,
+ &max_pages))
+ return page_count;
+ EASSERT(ext->oe_nr_pages <= max_pages, ext);
+ }
+ if (page_count == max_pages)
+ return page_count;
+
+ while (!list_empty(&obj->oo_urgent_exts)) {
+ ext = list_entry(obj->oo_urgent_exts.next,
+ struct osc_extent, oe_link);
+ if (!try_to_add_extent_for_io(cli, ext, rpclist, &page_count,
+ &max_pages))
+ return page_count;
+
+ if (!ext->oe_intree)
+ continue;
+
+ while ((ext = next_extent(ext)) != NULL) {
+ if ((ext->oe_state != OES_CACHE) ||
+ (!list_empty(&ext->oe_link) &&
+ ext->oe_owner != NULL))
+ continue;
+
+ if (!try_to_add_extent_for_io(cli, ext, rpclist,
+ &page_count, &max_pages))
+ return page_count;
+ }
+ }
+ if (page_count == max_pages)
+ return page_count;
+
+ ext = first_extent(obj);
+ while (ext != NULL) {
+ if ((ext->oe_state != OES_CACHE) ||
+ /* this extent may be already in current rpclist */
+ (!list_empty(&ext->oe_link) && ext->oe_owner != NULL)) {
+ ext = next_extent(ext);
+ continue;
+ }
+
+ if (!try_to_add_extent_for_io(cli, ext, rpclist, &page_count,
+ &max_pages))
+ return page_count;
+
+ ext = next_extent(ext);
+ }
+ return page_count;
+}
+
+static int
+osc_send_write_rpc(const struct lu_env *env, struct client_obd *cli,
+ struct osc_object *osc, pdl_policy_t pol)
+{
+ LIST_HEAD(rpclist);
+ struct osc_extent *ext;
+ struct osc_extent *tmp;
+ struct osc_extent *first = NULL;
+ obd_count page_count = 0;
+ int srvlock = 0;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(osc_object_is_locked(osc));
+
+ page_count = get_write_extents(osc, &rpclist);
+ LASSERT(equi(page_count == 0, list_empty(&rpclist)));
+
+ if (list_empty(&rpclist))
+ RETURN(0);
+
+ osc_update_pending(osc, OBD_BRW_WRITE, -page_count);
+
+ list_for_each_entry(ext, &rpclist, oe_link) {
+ LASSERT(ext->oe_state == OES_CACHE ||
+ ext->oe_state == OES_LOCK_DONE);
+ if (ext->oe_state == OES_CACHE)
+ osc_extent_state_set(ext, OES_LOCKING);
+ else
+ osc_extent_state_set(ext, OES_RPC);
+ }
+
+ /* we're going to grab page lock, so release object lock because
+ * lock order is page lock -> object lock. */
+ osc_object_unlock(osc);
+
+ list_for_each_entry_safe(ext, tmp, &rpclist, oe_link) {
+ if (ext->oe_state == OES_LOCKING) {
+ rc = osc_extent_make_ready(env, ext);
+ if (unlikely(rc < 0)) {
+ list_del_init(&ext->oe_link);
+ osc_extent_finish(env, ext, 0, rc);
+ continue;
+ }
+ }
+ if (first == NULL) {
+ first = ext;
+ srvlock = ext->oe_srvlock;
+ } else {
+ LASSERT(srvlock == ext->oe_srvlock);
+ }
+ }
+
+ if (!list_empty(&rpclist)) {
+ LASSERT(page_count > 0);
+ rc = osc_build_rpc(env, cli, &rpclist, OBD_BRW_WRITE, pol);
+ LASSERT(list_empty(&rpclist));
+ }
+
+ osc_object_lock(osc);
+ RETURN(rc);
+}
+
+/**
+ * prepare pages for ASYNC io and put pages in send queue.
+ *
+ * \param cmd OBD_BRW_* macroses
+ * \param lop pending pages
+ *
+ * \return zero if no page added to send queue.
+ * \return 1 if pages successfully added to send queue.
+ * \return negative on errors.
+ */
+static int
+osc_send_read_rpc(const struct lu_env *env, struct client_obd *cli,
+ struct osc_object *osc, pdl_policy_t pol)
+{
+ struct osc_extent *ext;
+ struct osc_extent *next;
+ LIST_HEAD(rpclist);
+ int page_count = 0;
+ unsigned int max_pages = cli->cl_max_pages_per_rpc;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(osc_object_is_locked(osc));
+ list_for_each_entry_safe(ext, next,
+ &osc->oo_reading_exts, oe_link) {
+ EASSERT(ext->oe_state == OES_LOCK_DONE, ext);
+ if (!try_to_add_extent_for_io(cli, ext, &rpclist, &page_count,
+ &max_pages))
+ break;
+ osc_extent_state_set(ext, OES_RPC);
+ EASSERT(ext->oe_nr_pages <= max_pages, ext);
+ }
+ LASSERT(page_count <= max_pages);
+
+ osc_update_pending(osc, OBD_BRW_READ, -page_count);
+
+ if (!list_empty(&rpclist)) {
+ osc_object_unlock(osc);
+
+ LASSERT(page_count > 0);
+ rc = osc_build_rpc(env, cli, &rpclist, OBD_BRW_READ, pol);
+ LASSERT(list_empty(&rpclist));
+
+ osc_object_lock(osc);
+ }
+ RETURN(rc);
+}
+
+#define list_to_obj(list, item) ({ \
+ struct list_head *__tmp = (list)->next; \
+ list_del_init(__tmp); \
+ list_entry(__tmp, struct osc_object, oo_##item); \
+})
+
+/* This is called by osc_check_rpcs() to find which objects have pages that
+ * we could be sending. These lists are maintained by osc_makes_rpc(). */
+static struct osc_object *osc_next_obj(struct client_obd *cli)
+{
+ ENTRY;
+
+ /* First return objects that have blocked locks so that they
+ * will be flushed quickly and other clients can get the lock,
+ * then objects which have pages ready to be stuffed into RPCs */
+ if (!list_empty(&cli->cl_loi_hp_ready_list))
+ RETURN(list_to_obj(&cli->cl_loi_hp_ready_list, hp_ready_item));
+ if (!list_empty(&cli->cl_loi_ready_list))
+ RETURN(list_to_obj(&cli->cl_loi_ready_list, ready_item));
+
+ /* then if we have cache waiters, return all objects with queued
+ * writes. This is especially important when many small files
+ * have filled up the cache and not been fired into rpcs because
+ * they don't pass the nr_pending/object threshhold */
+ if (!list_empty(&cli->cl_cache_waiters) &&
+ !list_empty(&cli->cl_loi_write_list))
+ RETURN(list_to_obj(&cli->cl_loi_write_list, write_item));
+
+ /* then return all queued objects when we have an invalid import
+ * so that they get flushed */
+ if (cli->cl_import == NULL || cli->cl_import->imp_invalid) {
+ if (!list_empty(&cli->cl_loi_write_list))
+ RETURN(list_to_obj(&cli->cl_loi_write_list,
+ write_item));
+ if (!list_empty(&cli->cl_loi_read_list))
+ RETURN(list_to_obj(&cli->cl_loi_read_list,
+ read_item));
+ }
+ RETURN(NULL);
+}
+
+/* called with the loi list lock held */
+static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli,
+ pdl_policy_t pol)
+{
+ struct osc_object *osc;
+ int rc = 0;
+ ENTRY;
+
+ while ((osc = osc_next_obj(cli)) != NULL) {
+ struct cl_object *obj = osc2cl(osc);
+ struct lu_ref_link *link;
+
+ OSC_IO_DEBUG(osc, "%lu in flight\n", rpcs_in_flight(cli));
+
+ if (osc_max_rpc_in_flight(cli, osc)) {
+ __osc_list_maint(cli, osc);
+ break;
+ }
+
+ cl_object_get(obj);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ link = lu_object_ref_add(&obj->co_lu, "check", current);
+
+ /* attempt some read/write balancing by alternating between
+ * reads and writes in an object. The makes_rpc checks here
+ * would be redundant if we were getting read/write work items
+ * instead of objects. we don't want send_oap_rpc to drain a
+ * partial read pending queue when we're given this object to
+ * do io on writes while there are cache waiters */
+ osc_object_lock(osc);
+ if (osc_makes_rpc(cli, osc, OBD_BRW_WRITE)) {
+ rc = osc_send_write_rpc(env, cli, osc, pol);
+ if (rc < 0) {
+ CERROR("Write request failed with %d\n", rc);
+
+ /* osc_send_write_rpc failed, mostly because of
+ * memory pressure.
+ *
+ * It can't break here, because if:
+ * - a page was submitted by osc_io_submit, so
+ * page locked;
+ * - no request in flight
+ * - no subsequent request
+ * The system will be in live-lock state,
+ * because there is no chance to call
+ * osc_io_unplug() and osc_check_rpcs() any
+ * more. pdflush can't help in this case,
+ * because it might be blocked at grabbing
+ * the page lock as we mentioned.
+ *
+ * Anyway, continue to drain pages. */
+ /* break; */
+ }
+ }
+ if (osc_makes_rpc(cli, osc, OBD_BRW_READ)) {
+ rc = osc_send_read_rpc(env, cli, osc, pol);
+ if (rc < 0)
+ CERROR("Read request failed with %d\n", rc);
+ }
+ osc_object_unlock(osc);
+
+ osc_list_maint(cli, osc);
+ lu_object_ref_del_at(&obj->co_lu, link, "check", current);
+ cl_object_put(env, obj);
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ }
+}
+
+static int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli,
+ struct osc_object *osc, pdl_policy_t pol, int async)
+{
+ int rc = 0;
+
+ if (osc != NULL && osc_list_maint(cli, osc) == 0)
+ return 0;
+
+ if (!async) {
+ /* disable osc_lru_shrink() temporarily to avoid
+ * potential stack overrun problem. LU-2859 */
+ atomic_inc(&cli->cl_lru_shrinkers);
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ osc_check_rpcs(env, cli, pol);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ atomic_dec(&cli->cl_lru_shrinkers);
+ } else {
+ CDEBUG(D_CACHE, "Queue writeback work for client %p.\n", cli);
+ LASSERT(cli->cl_writeback_work != NULL);
+ rc = ptlrpcd_queue_work(cli->cl_writeback_work);
+ }
+ return rc;
+}
+
+static int osc_io_unplug_async(const struct lu_env *env,
+ struct client_obd *cli, struct osc_object *osc)
+{
+ /* XXX: policy is no use actually. */
+ return osc_io_unplug0(env, cli, osc, PDL_POLICY_ROUND, 1);
+}
+
+void osc_io_unplug(const struct lu_env *env, struct client_obd *cli,
+ struct osc_object *osc, pdl_policy_t pol)
+{
+ (void)osc_io_unplug0(env, cli, osc, pol, 0);
+}
+
+int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
+ struct page *page, loff_t offset)
+{
+ struct obd_export *exp = osc_export(osc);
+ struct osc_async_page *oap = &ops->ops_oap;
+ ENTRY;
+
+ if (!page)
+ return cfs_size_round(sizeof(*oap));
+
+ oap->oap_magic = OAP_MAGIC;
+ oap->oap_cli = &exp->exp_obd->u.cli;
+ oap->oap_obj = osc;
+
+ oap->oap_page = page;
+ oap->oap_obj_off = offset;
+ LASSERT(!(offset & ~CFS_PAGE_MASK));
+
+ if (!client_is_remote(exp) && cfs_capable(CFS_CAP_SYS_RESOURCE))
+ oap->oap_brw_flags = OBD_BRW_NOQUOTA;
+
+ INIT_LIST_HEAD(&oap->oap_pending_item);
+ INIT_LIST_HEAD(&oap->oap_rpc_item);
+
+ spin_lock_init(&oap->oap_lock);
+ CDEBUG(D_INFO, "oap %p page %p obj off "LPU64"\n",
+ oap, page, oap->oap_obj_off);
+ RETURN(0);
+}
+
+int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
+ struct osc_page *ops)
+{
+ struct osc_io *oio = osc_env_io(env);
+ struct osc_extent *ext = NULL;
+ struct osc_async_page *oap = &ops->ops_oap;
+ struct client_obd *cli = oap->oap_cli;
+ struct osc_object *osc = oap->oap_obj;
+ pgoff_t index;
+ int grants = 0;
+ int brw_flags = OBD_BRW_ASYNC;
+ int cmd = OBD_BRW_WRITE;
+ int need_release = 0;
+ int rc = 0;
+ ENTRY;
+
+ if (oap->oap_magic != OAP_MAGIC)
+ RETURN(-EINVAL);
+
+ if (cli->cl_import == NULL || cli->cl_import->imp_invalid)
+ RETURN(-EIO);
+
+ if (!list_empty(&oap->oap_pending_item) ||
+ !list_empty(&oap->oap_rpc_item))
+ RETURN(-EBUSY);
+
+ /* Set the OBD_BRW_SRVLOCK before the page is queued. */
+ brw_flags |= ops->ops_srvlock ? OBD_BRW_SRVLOCK : 0;
+ if (!client_is_remote(osc_export(osc)) &&
+ cfs_capable(CFS_CAP_SYS_RESOURCE)) {
+ brw_flags |= OBD_BRW_NOQUOTA;
+ cmd |= OBD_BRW_NOQUOTA;
+ }
+
+ /* check if the file's owner/group is over quota */
+ if (!(cmd & OBD_BRW_NOQUOTA)) {
+ struct cl_object *obj;
+ struct cl_attr *attr;
+ unsigned int qid[MAXQUOTAS];
+
+ obj = cl_object_top(&osc->oo_cl);
+ attr = &osc_env_info(env)->oti_attr;
+
+ cl_object_attr_lock(obj);
+ rc = cl_object_attr_get(env, obj, attr);
+ cl_object_attr_unlock(obj);
+
+ qid[USRQUOTA] = attr->cat_uid;
+ qid[GRPQUOTA] = attr->cat_gid;
+ if (rc == 0 && osc_quota_chkdq(cli, qid) == NO_QUOTA)
+ rc = -EDQUOT;
+ if (rc)
+ RETURN(rc);
+ }
+
+ oap->oap_cmd = cmd;
+ oap->oap_page_off = ops->ops_from;
+ oap->oap_count = ops->ops_to - ops->ops_from;
+ oap->oap_async_flags = 0;
+ oap->oap_brw_flags = brw_flags;
+
+ OSC_IO_DEBUG(osc, "oap %p page %p added for cmd %d\n",
+ oap, oap->oap_page, oap->oap_cmd & OBD_BRW_RWMASK);
+
+ index = oap2cl_page(oap)->cp_index;
+
+ /* Add this page into extent by the following steps:
+ * 1. if there exists an active extent for this IO, mostly this page
+ * can be added to the active extent and sometimes we need to
+ * expand extent to accomodate this page;
+ * 2. otherwise, a new extent will be allocated. */
+
+ ext = oio->oi_active;
+ if (ext != NULL && ext->oe_start <= index && ext->oe_max_end >= index) {
+ /* one chunk plus extent overhead must be enough to write this
+ * page */
+ grants = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
+ if (ext->oe_end >= index)
+ grants = 0;
+
+ /* it doesn't need any grant to dirty this page */
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ rc = osc_enter_cache_try(cli, oap, grants, 0);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ if (rc == 0) { /* try failed */
+ grants = 0;
+ need_release = 1;
+ } else if (ext->oe_end < index) {
+ int tmp = grants;
+ /* try to expand this extent */
+ rc = osc_extent_expand(ext, index, &tmp);
+ if (rc < 0) {
+ need_release = 1;
+ /* don't free reserved grant */
+ } else {
+ OSC_EXTENT_DUMP(D_CACHE, ext,
+ "expanded for %lu.\n", index);
+ osc_unreserve_grant(cli, grants, tmp);
+ grants = 0;
+ }
+ }
+ rc = 0;
+ } else if (ext != NULL) {
+ /* index is located outside of active extent */
+ need_release = 1;
+ }
+ if (need_release) {
+ osc_extent_release(env, ext);
+ oio->oi_active = NULL;
+ ext = NULL;
+ }
+
+ if (ext == NULL) {
+ int tmp = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
+
+ /* try to find new extent to cover this page */
+ LASSERT(oio->oi_active == NULL);
+ /* we may have allocated grant for this page if we failed
+ * to expand the previous active extent. */
+ LASSERT(ergo(grants > 0, grants >= tmp));
+
+ rc = 0;
+ if (grants == 0) {
+ /* we haven't allocated grant for this page. */
+ rc = osc_enter_cache(env, cli, oap, tmp);
+ if (rc == 0)
+ grants = tmp;
+ }
+
+ tmp = grants;
+ if (rc == 0) {
+ ext = osc_extent_find(env, osc, index, &tmp);
+ if (IS_ERR(ext)) {
+ LASSERT(tmp == grants);
+ osc_exit_cache(cli, oap);
+ rc = PTR_ERR(ext);
+ ext = NULL;
+ } else {
+ oio->oi_active = ext;
+ }
+ }
+ if (grants > 0)
+ osc_unreserve_grant(cli, grants, tmp);
+ }
+
+ LASSERT(ergo(rc == 0, ext != NULL));
+ if (ext != NULL) {
+ EASSERTF(ext->oe_end >= index && ext->oe_start <= index,
+ ext, "index = %lu.\n", index);
+ LASSERT((oap->oap_brw_flags & OBD_BRW_FROM_GRANT) != 0);
+
+ osc_object_lock(osc);
+ if (ext->oe_nr_pages == 0)
+ ext->oe_srvlock = ops->ops_srvlock;
+ else
+ LASSERT(ext->oe_srvlock == ops->ops_srvlock);
+ ++ext->oe_nr_pages;
+ list_add_tail(&oap->oap_pending_item, &ext->oe_pages);
+ osc_object_unlock(osc);
+ }
+ RETURN(rc);
+}
+
+int osc_teardown_async_page(const struct lu_env *env,
+ struct osc_object *obj, struct osc_page *ops)
+{
+ struct osc_async_page *oap = &ops->ops_oap;
+ struct osc_extent *ext = NULL;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(oap->oap_magic == OAP_MAGIC);
+
+ CDEBUG(D_INFO, "teardown oap %p page %p at index %lu.\n",
+ oap, ops, oap2cl_page(oap)->cp_index);
+
+ osc_object_lock(obj);
+ if (!list_empty(&oap->oap_rpc_item)) {
+ CDEBUG(D_CACHE, "oap %p is not in cache.\n", oap);
+ rc = -EBUSY;
+ } else if (!list_empty(&oap->oap_pending_item)) {
+ ext = osc_extent_lookup(obj, oap2cl_page(oap)->cp_index);
+ /* only truncated pages are allowed to be taken out.
+ * See osc_extent_truncate() and osc_cache_truncate_start()
+ * for details. */
+ if (ext != NULL && ext->oe_state != OES_TRUNC) {
+ OSC_EXTENT_DUMP(D_ERROR, ext, "trunc at %lu.\n",
+ oap2cl_page(oap)->cp_index);
+ rc = -EBUSY;
+ }
+ }
+ osc_object_unlock(obj);
+ if (ext != NULL)
+ osc_extent_put(env, ext);
+ RETURN(rc);
+}
+
+/**
+ * This is called when a page is picked up by kernel to write out.
+ *
+ * We should find out the corresponding extent and add the whole extent
+ * into urgent list. The extent may be being truncated or used, handle it
+ * carefully.
+ */
+int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
+ struct osc_page *ops)
+{
+ struct osc_extent *ext = NULL;
+ struct osc_object *obj = cl2osc(ops->ops_cl.cpl_obj);
+ struct cl_page *cp = ops->ops_cl.cpl_page;
+ pgoff_t index = cp->cp_index;
+ struct osc_async_page *oap = &ops->ops_oap;
+ bool unplug = false;
+ int rc = 0;
+ ENTRY;
+
+ osc_object_lock(obj);
+ ext = osc_extent_lookup(obj, index);
+ if (ext == NULL) {
+ osc_extent_tree_dump(D_ERROR, obj);
+ LASSERTF(0, "page index %lu is NOT covered.\n", index);
+ }
+
+ switch (ext->oe_state) {
+ case OES_RPC:
+ case OES_LOCK_DONE:
+ CL_PAGE_DEBUG(D_ERROR, env, cl_page_top(cp),
+ "flush an in-rpc page?\n");
+ LASSERT(0);
+ break;
+ case OES_LOCKING:
+ /* If we know this extent is being written out, we should abort
+ * so that the writer can make this page ready. Otherwise, there
+ * exists a deadlock problem because other process can wait for
+ * page writeback bit holding page lock; and meanwhile in
+ * vvp_page_make_ready(), we need to grab page lock before
+ * really sending the RPC. */
+ case OES_TRUNC:
+ /* race with truncate, page will be redirtied */
+ GOTO(out, rc = -EAGAIN);
+ default:
+ break;
+ }
+
+ rc = cl_page_prep(env, io, cl_page_top(cp), CRT_WRITE);
+ if (rc)
+ GOTO(out, rc);
+
+ spin_lock(&oap->oap_lock);
+ oap->oap_async_flags |= ASYNC_READY|ASYNC_URGENT;
+ spin_unlock(&oap->oap_lock);
+
+ if (memory_pressure_get())
+ ext->oe_memalloc = 1;
+
+ ext->oe_urgent = 1;
+ if (ext->oe_state == OES_CACHE) {
+ OSC_EXTENT_DUMP(D_CACHE, ext,
+ "flush page %p make it urgent.\n", oap);
+ if (list_empty(&ext->oe_link))
+ list_add_tail(&ext->oe_link, &obj->oo_urgent_exts);
+ unplug = true;
+ }
+ rc = 0;
+ EXIT;
+
+out:
+ osc_object_unlock(obj);
+ osc_extent_put(env, ext);
+ if (unplug)
+ osc_io_unplug_async(env, osc_cli(obj), obj);
+ return rc;
+}
+
+/**
+ * this is called when a sync waiter receives an interruption. Its job is to
+ * get the caller woken as soon as possible. If its page hasn't been put in an
+ * rpc yet it can dequeue immediately. Otherwise it has to mark the rpc as
+ * desiring interruption which will forcefully complete the rpc once the rpc
+ * has timed out.
+ */
+int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops)
+{
+ struct osc_async_page *oap = &ops->ops_oap;
+ struct osc_object *obj = oap->oap_obj;
+ struct client_obd *cli = osc_cli(obj);
+ struct osc_extent *ext;
+ struct osc_extent *found = NULL;
+ struct list_head *plist;
+ pgoff_t index = oap2cl_page(oap)->cp_index;
+ int rc = -EBUSY;
+ int cmd;
+ ENTRY;
+
+ LASSERT(!oap->oap_interrupted);
+ oap->oap_interrupted = 1;
+
+ /* Find out the caching extent */
+ osc_object_lock(obj);
+ if (oap->oap_cmd & OBD_BRW_WRITE) {
+ plist = &obj->oo_urgent_exts;
+ cmd = OBD_BRW_WRITE;
+ } else {
+ plist = &obj->oo_reading_exts;
+ cmd = OBD_BRW_READ;
+ }
+ list_for_each_entry(ext, plist, oe_link) {
+ if (ext->oe_start <= index && ext->oe_end >= index) {
+ LASSERT(ext->oe_state == OES_LOCK_DONE);
+ /* For OES_LOCK_DONE state extent, it has already held
+ * a refcount for RPC. */
+ found = osc_extent_get(ext);
+ break;
+ }
+ }
+ if (found != NULL) {
+ list_del_init(&found->oe_link);
+ osc_update_pending(obj, cmd, -found->oe_nr_pages);
+ osc_object_unlock(obj);
+
+ osc_extent_finish(env, found, 0, -EINTR);
+ osc_extent_put(env, found);
+ rc = 0;
+ } else {
+ osc_object_unlock(obj);
+ /* ok, it's been put in an rpc. only one oap gets a request
+ * reference */
+ if (oap->oap_request != NULL) {
+ ptlrpc_mark_interrupted(oap->oap_request);
+ ptlrpcd_wake(oap->oap_request);
+ ptlrpc_req_finished(oap->oap_request);
+ oap->oap_request = NULL;
+ }
+ }
+
+ osc_list_maint(cli, obj);
+ RETURN(rc);
+}
+
+int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
+ struct list_head *list, int cmd, int brw_flags)
+{
+ struct client_obd *cli = osc_cli(obj);
+ struct osc_extent *ext;
+ struct osc_async_page *oap, *tmp;
+ int page_count = 0;
+ int mppr = cli->cl_max_pages_per_rpc;
+ pgoff_t start = CL_PAGE_EOF;
+ pgoff_t end = 0;
+ ENTRY;
+
+ list_for_each_entry(oap, list, oap_pending_item) {
+ struct cl_page *cp = oap2cl_page(oap);
+ if (cp->cp_index > end)
+ end = cp->cp_index;
+ if (cp->cp_index < start)
+ start = cp->cp_index;
+ ++page_count;
+ mppr <<= (page_count > mppr);
+ }
+
+ ext = osc_extent_alloc(obj);
+ if (ext == NULL) {
+ list_for_each_entry_safe(oap, tmp, list, oap_pending_item) {
+ list_del_init(&oap->oap_pending_item);
+ osc_ap_completion(env, cli, oap, 0, -ENOMEM);
+ }
+ RETURN(-ENOMEM);
+ }
+
+ ext->oe_rw = !!(cmd & OBD_BRW_READ);
+ ext->oe_urgent = 1;
+ ext->oe_start = start;
+ ext->oe_end = ext->oe_max_end = end;
+ ext->oe_obj = obj;
+ ext->oe_srvlock = !!(brw_flags & OBD_BRW_SRVLOCK);
+ ext->oe_nr_pages = page_count;
+ ext->oe_mppr = mppr;
+ list_splice_init(list, &ext->oe_pages);
+
+ osc_object_lock(obj);
+ /* Reuse the initial refcount for RPC, don't drop it */
+ osc_extent_state_set(ext, OES_LOCK_DONE);
+ if (cmd & OBD_BRW_WRITE) {
+ list_add_tail(&ext->oe_link, &obj->oo_urgent_exts);
+ osc_update_pending(obj, OBD_BRW_WRITE, page_count);
+ } else {
+ list_add_tail(&ext->oe_link, &obj->oo_reading_exts);
+ osc_update_pending(obj, OBD_BRW_READ, page_count);
+ }
+ osc_object_unlock(obj);
+
+ osc_io_unplug(env, cli, obj, PDL_POLICY_ROUND);
+ RETURN(0);
+}
+
+/**
+ * Called by osc_io_setattr_start() to freeze and destroy covering extents.
+ */
+int osc_cache_truncate_start(const struct lu_env *env, struct osc_io *oio,
+ struct osc_object *obj, __u64 size)
+{
+ struct client_obd *cli = osc_cli(obj);
+ struct osc_extent *ext;
+ struct osc_extent *waiting = NULL;
+ pgoff_t index;
+ LIST_HEAD(list);
+ int result = 0;
+ bool partial;
+ ENTRY;
+
+ /* pages with index greater or equal to index will be truncated. */
+ index = cl_index(osc2cl(obj), size);
+ partial = size > cl_offset(osc2cl(obj), index);
+
+again:
+ osc_object_lock(obj);
+ ext = osc_extent_search(obj, index);
+ if (ext == NULL)
+ ext = first_extent(obj);
+ else if (ext->oe_end < index)
+ ext = next_extent(ext);
+ while (ext != NULL) {
+ EASSERT(ext->oe_state != OES_TRUNC, ext);
+
+ if (ext->oe_state > OES_CACHE || ext->oe_urgent) {
+ /* if ext is in urgent state, it means there must exist
+ * a page already having been flushed by write_page().
+ * We have to wait for this extent because we can't
+ * truncate that page. */
+ LASSERT(!ext->oe_hp);
+ OSC_EXTENT_DUMP(D_CACHE, ext,
+ "waiting for busy extent\n");
+ waiting = osc_extent_get(ext);
+ break;
+ }
+
+ OSC_EXTENT_DUMP(D_CACHE, ext, "try to trunc:"LPU64".\n", size);
+
+ osc_extent_get(ext);
+ if (ext->oe_state == OES_ACTIVE) {
+ /* though we grab inode mutex for write path, but we
+ * release it before releasing extent(in osc_io_end()),
+ * so there is a race window that an extent is still
+ * in OES_ACTIVE when truncate starts. */
+ LASSERT(!ext->oe_trunc_pending);
+ ext->oe_trunc_pending = 1;
+ } else {
+ EASSERT(ext->oe_state == OES_CACHE, ext);
+ osc_extent_state_set(ext, OES_TRUNC);
+ osc_update_pending(obj, OBD_BRW_WRITE,
+ -ext->oe_nr_pages);
+ }
+ EASSERT(list_empty(&ext->oe_link), ext);
+ list_add_tail(&ext->oe_link, &list);
+
+ ext = next_extent(ext);
+ }
+ osc_object_unlock(obj);
+
+ osc_list_maint(cli, obj);
+
+ while (!list_empty(&list)) {
+ int rc;
+
+ ext = list_entry(list.next, struct osc_extent, oe_link);
+ list_del_init(&ext->oe_link);
+
+ /* extent may be in OES_ACTIVE state because inode mutex
+ * is released before osc_io_end() in file write case */
+ if (ext->oe_state != OES_TRUNC)
+ osc_extent_wait(env, ext, OES_TRUNC);
+
+ rc = osc_extent_truncate(ext, index, partial);
+ if (rc < 0) {
+ if (result == 0)
+ result = rc;
+
+ OSC_EXTENT_DUMP(D_ERROR, ext,
+ "truncate error %d\n", rc);
+ } else if (ext->oe_nr_pages == 0) {
+ osc_extent_remove(ext);
+ } else {
+ /* this must be an overlapped extent which means only
+ * part of pages in this extent have been truncated.
+ */
+ EASSERTF(ext->oe_start <= index, ext,
+ "trunc index = %lu/%d.\n", index, partial);
+ /* fix index to skip this partially truncated extent */
+ index = ext->oe_end + 1;
+ partial = false;
+
+ /* we need to hold this extent in OES_TRUNC state so
+ * that no writeback will happen. This is to avoid
+ * BUG 17397. */
+ LASSERT(oio->oi_trunc == NULL);
+ oio->oi_trunc = osc_extent_get(ext);
+ OSC_EXTENT_DUMP(D_CACHE, ext,
+ "trunc at "LPU64"\n", size);
+ }
+ osc_extent_put(env, ext);
+ }
+ if (waiting != NULL) {
+ int rc;
+
+ /* ignore the result of osc_extent_wait the write initiator
+ * should take care of it. */
+ rc = osc_extent_wait(env, waiting, OES_INV);
+ if (rc < 0)
+ OSC_EXTENT_DUMP(D_CACHE, waiting, "error: %d.\n", rc);
+
+ osc_extent_put(env, waiting);
+ waiting = NULL;
+ goto again;
+ }
+ RETURN(result);
+}
+
+/**
+ * Called after osc_io_setattr_end to add oio->oi_trunc back to cache.
+ */
+void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio,
+ struct osc_object *obj)
+{
+ struct osc_extent *ext = oio->oi_trunc;
+
+ oio->oi_trunc = NULL;
+ if (ext != NULL) {
+ bool unplug = false;
+
+ EASSERT(ext->oe_nr_pages > 0, ext);
+ EASSERT(ext->oe_state == OES_TRUNC, ext);
+ EASSERT(!ext->oe_urgent, ext);
+
+ OSC_EXTENT_DUMP(D_CACHE, ext, "trunc -> cache.\n");
+ osc_object_lock(obj);
+ osc_extent_state_set(ext, OES_CACHE);
+ if (ext->oe_fsync_wait && !ext->oe_urgent) {
+ ext->oe_urgent = 1;
+ list_move_tail(&ext->oe_link, &obj->oo_urgent_exts);
+ unplug = true;
+ }
+ osc_update_pending(obj, OBD_BRW_WRITE, ext->oe_nr_pages);
+ osc_object_unlock(obj);
+ osc_extent_put(env, ext);
+
+ if (unplug)
+ osc_io_unplug_async(env, osc_cli(obj), obj);
+ }
+}
+
+/**
+ * Wait for extents in a specific range to be written out.
+ * The caller must have called osc_cache_writeback_range() to issue IO
+ * otherwise it will take a long time for this function to finish.
+ *
+ * Caller must hold inode_mutex , or cancel exclusive dlm lock so that
+ * nobody else can dirty this range of file while we're waiting for
+ * extents to be written.
+ */
+int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
+ pgoff_t start, pgoff_t end)
+{
+ struct osc_extent *ext;
+ pgoff_t index = start;
+ int result = 0;
+ ENTRY;
+
+again:
+ osc_object_lock(obj);
+ ext = osc_extent_search(obj, index);
+ if (ext == NULL)
+ ext = first_extent(obj);
+ else if (ext->oe_end < index)
+ ext = next_extent(ext);
+ while (ext != NULL) {
+ int rc;
+
+ if (ext->oe_start > end)
+ break;
+
+ if (!ext->oe_fsync_wait) {
+ ext = next_extent(ext);
+ continue;
+ }
+
+ EASSERT(ergo(ext->oe_state == OES_CACHE,
+ ext->oe_hp || ext->oe_urgent), ext);
+ EASSERT(ergo(ext->oe_state == OES_ACTIVE,
+ !ext->oe_hp && ext->oe_urgent), ext);
+
+ index = ext->oe_end + 1;
+ osc_extent_get(ext);
+ osc_object_unlock(obj);
+
+ rc = osc_extent_wait(env, ext, OES_INV);
+ if (result == 0)
+ result = rc;
+ osc_extent_put(env, ext);
+ goto again;
+ }
+ osc_object_unlock(obj);
+
+ OSC_IO_DEBUG(obj, "sync file range.\n");
+ RETURN(result);
+}
+
+/**
+ * Called to write out a range of osc object.
+ *
+ * @hp : should be set this is caused by lock cancel;
+ * @discard: is set if dirty pages should be dropped - file will be deleted or
+ * truncated, this implies there is no partially discarding extents.
+ *
+ * Return how many pages will be issued, or error code if error occurred.
+ */
+int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj,
+ pgoff_t start, pgoff_t end, int hp, int discard)
+{
+ struct osc_extent *ext;
+ LIST_HEAD(discard_list);
+ bool unplug = false;
+ int result = 0;
+ ENTRY;
+
+ osc_object_lock(obj);
+ ext = osc_extent_search(obj, start);
+ if (ext == NULL)
+ ext = first_extent(obj);
+ else if (ext->oe_end < start)
+ ext = next_extent(ext);
+ while (ext != NULL) {
+ if (ext->oe_start > end)
+ break;
+
+ ext->oe_fsync_wait = 1;
+ switch (ext->oe_state) {
+ case OES_CACHE:
+ result += ext->oe_nr_pages;
+ if (!discard) {
+ struct list_head *list = NULL;
+ if (hp) {
+ EASSERT(!ext->oe_hp, ext);
+ ext->oe_hp = 1;
+ list = &obj->oo_hp_exts;
+ } else if (!ext->oe_urgent) {
+ ext->oe_urgent = 1;
+ list = &obj->oo_urgent_exts;
+ }
+ if (list != NULL)
+ list_move_tail(&ext->oe_link, list);
+ unplug = true;
+ } else {
+ /* the only discarder is lock cancelling, so
+ * [start, end] must contain this extent */
+ EASSERT(ext->oe_start >= start &&
+ ext->oe_max_end <= end, ext);
+ osc_extent_state_set(ext, OES_LOCKING);
+ ext->oe_owner = current;
+ list_move_tail(&ext->oe_link,
+ &discard_list);
+ osc_update_pending(obj, OBD_BRW_WRITE,
+ -ext->oe_nr_pages);
+ }
+ break;
+ case OES_ACTIVE:
+ /* It's pretty bad to wait for ACTIVE extents, because
+ * we don't know how long we will wait for it to be
+ * flushed since it may be blocked at awaiting more
+ * grants. We do this for the correctness of fsync. */
+ LASSERT(hp == 0 && discard == 0);
+ ext->oe_urgent = 1;
+ break;
+ case OES_TRUNC:
+ /* this extent is being truncated, can't do anything
+ * for it now. it will be set to urgent after truncate
+ * is finished in osc_cache_truncate_end(). */
+ default:
+ break;
+ }
+ ext = next_extent(ext);
+ }
+ osc_object_unlock(obj);
+
+ LASSERT(ergo(!discard, list_empty(&discard_list)));
+ if (!list_empty(&discard_list)) {
+ struct osc_extent *tmp;
+ int rc;
+
+ osc_list_maint(osc_cli(obj), obj);
+ list_for_each_entry_safe(ext, tmp, &discard_list, oe_link) {
+ list_del_init(&ext->oe_link);
+ EASSERT(ext->oe_state == OES_LOCKING, ext);
+
+ /* Discard caching pages. We don't actually write this
+ * extent out but we complete it as if we did. */
+ rc = osc_extent_make_ready(env, ext);
+ if (unlikely(rc < 0)) {
+ OSC_EXTENT_DUMP(D_ERROR, ext,
+ "make_ready returned %d\n", rc);
+ if (result >= 0)
+ result = rc;
+ }
+
+ /* finish the extent as if the pages were sent */
+ osc_extent_finish(env, ext, 0, 0);
+ }
+ }
+
+ if (unplug)
+ osc_io_unplug(env, osc_cli(obj), obj, PDL_POLICY_ROUND);
+
+ if (hp || discard) {
+ int rc;
+ rc = osc_cache_wait_range(env, obj, start, end);
+ if (result >= 0 && rc < 0)
+ result = rc;
+ }
+
+ OSC_IO_DEBUG(obj, "cache page out.\n");
+ RETURN(result);
+}
+
+/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
new file mode 100644
index 000000000000..158e8fff838f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
@@ -0,0 +1,677 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Internal interfaces of OSC layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
+ */
+
+#ifndef OSC_CL_INTERNAL_H
+#define OSC_CL_INTERNAL_H
+
+# include <linux/libcfs/libcfs.h>
+
+#include <obd.h>
+/* osc_build_res_name() */
+#include <obd_ost.h>
+#include <cl_object.h>
+#include <lclient.h>
+#include "osc_internal.h"
+
+/** \defgroup osc osc
+ * @{
+ */
+
+struct osc_extent;
+
+/**
+ * State maintained by osc layer for each IO context.
+ */
+struct osc_io {
+ /** super class */
+ struct cl_io_slice oi_cl;
+ /** true if this io is lockless. */
+ int oi_lockless;
+ /** active extents, we know how many bytes is going to be written,
+ * so having an active extent will prevent it from being fragmented */
+ struct osc_extent *oi_active;
+ /** partially truncated extent, we need to hold this extent to prevent
+ * page writeback from happening. */
+ struct osc_extent *oi_trunc;
+
+ struct obd_info oi_info;
+ struct obdo oi_oa;
+ struct osc_async_cbargs {
+ bool opc_rpc_sent;
+ int opc_rc;
+ struct completion opc_sync;
+ } oi_cbarg;
+};
+
+/**
+ * State of transfer for osc.
+ */
+struct osc_req {
+ struct cl_req_slice or_cl;
+};
+
+/**
+ * State maintained by osc layer for the duration of a system call.
+ */
+struct osc_session {
+ struct osc_io os_io;
+};
+
+#define OTI_PVEC_SIZE 64
+struct osc_thread_info {
+ struct ldlm_res_id oti_resname;
+ ldlm_policy_data_t oti_policy;
+ struct cl_lock_descr oti_descr;
+ struct cl_attr oti_attr;
+ struct lustre_handle oti_handle;
+ struct cl_page_list oti_plist;
+ struct cl_io oti_io;
+ struct cl_page *oti_pvec[OTI_PVEC_SIZE];
+};
+
+struct osc_object {
+ struct cl_object oo_cl;
+ struct lov_oinfo *oo_oinfo;
+ /**
+ * True if locking against this stripe got -EUSERS.
+ */
+ int oo_contended;
+ cfs_time_t oo_contention_time;
+ /**
+ * List of pages in transfer.
+ */
+ struct list_head oo_inflight[CRT_NR];
+ /**
+ * Lock, protecting ccc_object::cob_inflight, because a seat-belt is
+ * locked during take-off and landing.
+ */
+ spinlock_t oo_seatbelt;
+
+ /**
+ * used by the osc to keep track of what objects to build into rpcs.
+ * Protected by client_obd->cli_loi_list_lock.
+ */
+ struct list_head oo_ready_item;
+ struct list_head oo_hp_ready_item;
+ struct list_head oo_write_item;
+ struct list_head oo_read_item;
+
+ /**
+ * extent is a red black tree to manage (async) dirty pages.
+ */
+ struct rb_root oo_root;
+ /**
+ * Manage write(dirty) extents.
+ */
+ struct list_head oo_hp_exts; /* list of hp extents */
+ struct list_head oo_urgent_exts; /* list of writeback extents */
+ struct list_head oo_rpc_exts;
+
+ struct list_head oo_reading_exts;
+
+ atomic_t oo_nr_reads;
+ atomic_t oo_nr_writes;
+
+ /** Protect extent tree. Will be used to protect
+ * oo_{read|write}_pages soon. */
+ spinlock_t oo_lock;
+};
+
+static inline void osc_object_lock(struct osc_object *obj)
+{
+ spin_lock(&obj->oo_lock);
+}
+
+static inline int osc_object_trylock(struct osc_object *obj)
+{
+ return spin_trylock(&obj->oo_lock);
+}
+
+static inline void osc_object_unlock(struct osc_object *obj)
+{
+ spin_unlock(&obj->oo_lock);
+}
+
+static inline int osc_object_is_locked(struct osc_object *obj)
+{
+ return spin_is_locked(&obj->oo_lock);
+}
+
+/*
+ * Lock "micro-states" for osc layer.
+ */
+enum osc_lock_state {
+ OLS_NEW,
+ OLS_ENQUEUED,
+ OLS_UPCALL_RECEIVED,
+ OLS_GRANTED,
+ OLS_RELEASED,
+ OLS_BLOCKED,
+ OLS_CANCELLED
+};
+
+/**
+ * osc-private state of cl_lock.
+ *
+ * Interaction with DLM.
+ *
+ * CLIO enqueues all DLM locks through ptlrpcd (that is, in "async" mode).
+ *
+ * Once receive upcall is invoked, osc_lock remembers a handle of DLM lock in
+ * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_lock.
+ *
+ * This pointer is protected through a reference, acquired by
+ * osc_lock_upcall0(). Also, an additional reference is acquired by
+ * ldlm_lock_addref() call protecting the lock from cancellation, until
+ * osc_lock_unuse() releases it.
+ *
+ * Below is a description of how lock references are acquired and released
+ * inside of DLM.
+ *
+ * - When new lock is created and enqueued to the server (ldlm_cli_enqueue())
+ * - ldlm_lock_create()
+ * - ldlm_lock_new(): initializes a lock with 2 references. One for
+ * the caller (released when reply from the server is received, or on
+ * error), and another for the hash table.
+ * - ldlm_lock_addref_internal(): protects the lock from cancellation.
+ *
+ * - When reply is received from the server (osc_enqueue_interpret())
+ * - ldlm_cli_enqueue_fini()
+ * - LDLM_LOCK_PUT(): releases caller reference acquired by
+ * ldlm_lock_new().
+ * - if (rc != 0)
+ * ldlm_lock_decref(): error case: matches ldlm_cli_enqueue().
+ * - ldlm_lock_decref(): for async locks, matches ldlm_cli_enqueue().
+ *
+ * - When lock is being cancelled (ldlm_lock_cancel())
+ * - ldlm_lock_destroy()
+ * - LDLM_LOCK_PUT(): releases hash-table reference acquired by
+ * ldlm_lock_new().
+ *
+ * osc_lock is detached from ldlm_lock by osc_lock_detach() that is called
+ * either when lock is cancelled (osc_lock_blocking()), or when locks is
+ * deleted without cancellation (e.g., from cl_locks_prune()). In the latter
+ * case ldlm lock remains in memory, and can be re-attached to osc_lock in the
+ * future.
+ */
+struct osc_lock {
+ struct cl_lock_slice ols_cl;
+ /** underlying DLM lock */
+ struct ldlm_lock *ols_lock;
+ /** lock value block */
+ struct ost_lvb ols_lvb;
+ /** DLM flags with which osc_lock::ols_lock was enqueued */
+ __u64 ols_flags;
+ /** osc_lock::ols_lock handle */
+ struct lustre_handle ols_handle;
+ struct ldlm_enqueue_info ols_einfo;
+ enum osc_lock_state ols_state;
+
+ /**
+ * How many pages are using this lock for io, currently only used by
+ * read-ahead. If non-zero, the underlying dlm lock won't be cancelled
+ * during recovery to avoid deadlock. see bz16774.
+ *
+ * \see osc_page::ops_lock
+ * \see osc_page_addref_lock(), osc_page_putref_lock()
+ */
+ atomic_t ols_pageref;
+
+ /**
+ * true, if ldlm_lock_addref() was called against
+ * osc_lock::ols_lock. This is used for sanity checking.
+ *
+ * \see osc_lock::ols_has_ref
+ */
+ unsigned ols_hold :1,
+ /**
+ * this is much like osc_lock::ols_hold, except that this bit is
+ * cleared _after_ reference in released in osc_lock_unuse(). This
+ * fine distinction is needed because:
+ *
+ * - if ldlm lock still has a reference, osc_ast_data_get() needs
+ * to return associated cl_lock (so that a flag is needed that is
+ * cleared after ldlm_lock_decref() returned), and
+ *
+ * - ldlm_lock_decref() can invoke blocking ast (for a
+ * LDLM_FL_CBPENDING lock), and osc_lock functions like
+ * osc_lock_cancel() called from there need to know whether to
+ * release lock reference (so that a flag is needed that is
+ * cleared before ldlm_lock_decref() is called).
+ */
+ ols_has_ref:1,
+ /**
+ * inherit the lockless attribute from top level cl_io.
+ * If true, osc_lock_enqueue is able to tolerate the -EUSERS error.
+ */
+ ols_locklessable:1,
+ /**
+ * set by osc_lock_use() to wait until blocking AST enters into
+ * osc_ldlm_blocking_ast0(), so that cl_lock mutex can be used for
+ * further synchronization.
+ */
+ ols_ast_wait:1,
+ /**
+ * If the data of this lock has been flushed to server side.
+ */
+ ols_flush:1,
+ /**
+ * if set, the osc_lock is a glimpse lock. For glimpse locks, we treat
+ * the EVAVAIL error as torerable, this will make upper logic happy
+ * to wait all glimpse locks to each OSTs to be completed.
+ * Glimpse lock converts to normal lock if the server lock is
+ * granted.
+ * Glimpse lock should be destroyed immediately after use.
+ */
+ ols_glimpse:1,
+ /**
+ * For async glimpse lock.
+ */
+ ols_agl:1;
+ /**
+ * IO that owns this lock. This field is used for a dead-lock
+ * avoidance by osc_lock_enqueue_wait().
+ *
+ * XXX: unfortunately, the owner of a osc_lock is not unique,
+ * the lock may have multiple users, if the lock is granted and
+ * then matched.
+ */
+ struct osc_io *ols_owner;
+};
+
+
+/**
+ * Page state private for osc layer.
+ */
+struct osc_page {
+ struct cl_page_slice ops_cl;
+ /**
+ * Page queues used by osc to detect when RPC can be formed.
+ */
+ struct osc_async_page ops_oap;
+ /**
+ * An offset within page from which next transfer starts. This is used
+ * by cl_page_clip() to submit partial page transfers.
+ */
+ int ops_from;
+ /**
+ * An offset within page at which next transfer ends.
+ *
+ * \see osc_page::ops_from.
+ */
+ int ops_to;
+ /**
+ * Boolean, true iff page is under transfer. Used for sanity checking.
+ */
+ unsigned ops_transfer_pinned:1,
+ /**
+ * True for a `temporary page' created by read-ahead code, probably
+ * outside of any DLM lock.
+ */
+ ops_temp:1,
+ /**
+ * in LRU?
+ */
+ ops_in_lru:1,
+ /**
+ * Set if the page must be transferred with OBD_BRW_SRVLOCK.
+ */
+ ops_srvlock:1;
+ union {
+ /**
+ * lru page list. ops_inflight and ops_lru are exclusive so
+ * that they can share the same data.
+ */
+ struct list_head ops_lru;
+ /**
+ * Linkage into a per-osc_object list of pages in flight. For
+ * debugging.
+ */
+ struct list_head ops_inflight;
+ };
+ /**
+ * Thread that submitted this page for transfer. For debugging.
+ */
+ task_t *ops_submitter;
+ /**
+ * Submit time - the time when the page is starting RPC. For debugging.
+ */
+ cfs_time_t ops_submit_time;
+
+ /**
+ * A lock of which we hold a reference covers this page. Only used by
+ * read-ahead: for a readahead page, we hold it's covering lock to
+ * prevent it from being canceled during recovery.
+ *
+ * \see osc_lock::ols_pageref
+ * \see osc_page_addref_lock(), osc_page_putref_lock().
+ */
+ struct cl_lock *ops_lock;
+};
+
+extern struct kmem_cache *osc_lock_kmem;
+extern struct kmem_cache *osc_object_kmem;
+extern struct kmem_cache *osc_thread_kmem;
+extern struct kmem_cache *osc_session_kmem;
+extern struct kmem_cache *osc_req_kmem;
+extern struct kmem_cache *osc_extent_kmem;
+
+extern struct lu_device_type osc_device_type;
+extern struct lu_context_key osc_key;
+extern struct lu_context_key osc_session_key;
+
+#define OSC_FLAGS (ASYNC_URGENT|ASYNC_READY)
+
+int osc_lock_init(const struct lu_env *env,
+ struct cl_object *obj, struct cl_lock *lock,
+ const struct cl_io *io);
+int osc_io_init (const struct lu_env *env,
+ struct cl_object *obj, struct cl_io *io);
+int osc_req_init (const struct lu_env *env, struct cl_device *dev,
+ struct cl_req *req);
+struct lu_object *osc_object_alloc(const struct lu_env *env,
+ const struct lu_object_header *hdr,
+ struct lu_device *dev);
+int osc_page_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_page *page, struct page *vmpage);
+
+void osc_index2policy (ldlm_policy_data_t *policy, const struct cl_object *obj,
+ pgoff_t start, pgoff_t end);
+int osc_lvb_print (const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct ost_lvb *lvb);
+
+void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
+ enum cl_req_type crt, int brw_flags);
+int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops);
+int osc_set_async_flags(struct osc_object *obj, struct osc_page *opg,
+ obd_flag async_flags);
+int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
+ struct page *page, loff_t offset);
+int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
+ struct osc_page *ops);
+int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj,
+ struct osc_page *ops);
+int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
+ struct osc_page *ops);
+int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
+ struct list_head *list, int cmd, int brw_flags);
+int osc_cache_truncate_start(const struct lu_env *env, struct osc_io *oio,
+ struct osc_object *obj, __u64 size);
+void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio,
+ struct osc_object *obj);
+int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj,
+ pgoff_t start, pgoff_t end, int hp, int discard);
+int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
+ pgoff_t start, pgoff_t end);
+void osc_io_unplug(const struct lu_env *env, struct client_obd *cli,
+ struct osc_object *osc, pdl_policy_t pol);
+
+void osc_object_set_contended (struct osc_object *obj);
+void osc_object_clear_contended(struct osc_object *obj);
+int osc_object_is_contended (struct osc_object *obj);
+
+int osc_lock_is_lockless (const struct osc_lock *olck);
+
+/*****************************************************************************
+ *
+ * Accessors.
+ *
+ */
+
+static inline struct osc_thread_info *osc_env_info(const struct lu_env *env)
+{
+ struct osc_thread_info *info;
+
+ info = lu_context_key_get(&env->le_ctx, &osc_key);
+ LASSERT(info != NULL);
+ return info;
+}
+
+static inline struct osc_session *osc_env_session(const struct lu_env *env)
+{
+ struct osc_session *ses;
+
+ ses = lu_context_key_get(env->le_ses, &osc_session_key);
+ LASSERT(ses != NULL);
+ return ses;
+}
+
+static inline struct osc_io *osc_env_io(const struct lu_env *env)
+{
+ return &osc_env_session(env)->os_io;
+}
+
+static inline int osc_is_object(const struct lu_object *obj)
+{
+ return obj->lo_dev->ld_type == &osc_device_type;
+}
+
+static inline struct osc_device *lu2osc_dev(const struct lu_device *d)
+{
+ LINVRNT(d->ld_type == &osc_device_type);
+ return container_of0(d, struct osc_device, od_cl.cd_lu_dev);
+}
+
+static inline struct obd_export *osc_export(const struct osc_object *obj)
+{
+ return lu2osc_dev(obj->oo_cl.co_lu.lo_dev)->od_exp;
+}
+
+static inline struct client_obd *osc_cli(const struct osc_object *obj)
+{
+ return &osc_export(obj)->exp_obd->u.cli;
+}
+
+static inline struct osc_object *cl2osc(const struct cl_object *obj)
+{
+ LINVRNT(osc_is_object(&obj->co_lu));
+ return container_of0(obj, struct osc_object, oo_cl);
+}
+
+static inline struct cl_object *osc2cl(const struct osc_object *obj)
+{
+ return (struct cl_object *)&obj->oo_cl;
+}
+
+static inline ldlm_mode_t osc_cl_lock2ldlm(enum cl_lock_mode mode)
+{
+ LASSERT(mode == CLM_READ || mode == CLM_WRITE || mode == CLM_GROUP);
+ if (mode == CLM_READ)
+ return LCK_PR;
+ else if (mode == CLM_WRITE)
+ return LCK_PW;
+ else
+ return LCK_GROUP;
+}
+
+static inline enum cl_lock_mode osc_ldlm2cl_lock(ldlm_mode_t mode)
+{
+ LASSERT(mode == LCK_PR || mode == LCK_PW || mode == LCK_GROUP);
+ if (mode == LCK_PR)
+ return CLM_READ;
+ else if (mode == LCK_PW)
+ return CLM_WRITE;
+ else
+ return CLM_GROUP;
+}
+
+static inline struct osc_page *cl2osc_page(const struct cl_page_slice *slice)
+{
+ LINVRNT(osc_is_object(&slice->cpl_obj->co_lu));
+ return container_of0(slice, struct osc_page, ops_cl);
+}
+
+static inline struct osc_page *oap2osc(struct osc_async_page *oap)
+{
+ return container_of0(oap, struct osc_page, ops_oap);
+}
+
+static inline struct cl_page *oap2cl_page(struct osc_async_page *oap)
+{
+ return oap2osc(oap)->ops_cl.cpl_page;
+}
+
+static inline struct osc_page *oap2osc_page(struct osc_async_page *oap)
+{
+ return (struct osc_page *)container_of(oap, struct osc_page, ops_oap);
+}
+
+static inline struct osc_lock *cl2osc_lock(const struct cl_lock_slice *slice)
+{
+ LINVRNT(osc_is_object(&slice->cls_obj->co_lu));
+ return container_of0(slice, struct osc_lock, ols_cl);
+}
+
+static inline struct osc_lock *osc_lock_at(const struct cl_lock *lock)
+{
+ return cl2osc_lock(cl_lock_at(lock, &osc_device_type));
+}
+
+static inline int osc_io_srvlock(struct osc_io *oio)
+{
+ return (oio->oi_lockless && !oio->oi_cl.cis_io->ci_no_srvlock);
+}
+
+enum osc_extent_state {
+ OES_INV = 0, /** extent is just initialized or destroyed */
+ OES_ACTIVE = 1, /** process is using this extent */
+ OES_CACHE = 2, /** extent is ready for IO */
+ OES_LOCKING = 3, /** locking page to prepare IO */
+ OES_LOCK_DONE = 4, /** locking finished, ready to send */
+ OES_RPC = 5, /** in RPC */
+ OES_TRUNC = 6, /** being truncated */
+ OES_STATE_MAX
+};
+
+/**
+ * osc_extent data to manage dirty pages.
+ * osc_extent has the following attributes:
+ * 1. all pages in the same must be in one RPC in write back;
+ * 2. # of pages must be less than max_pages_per_rpc - implied by 1;
+ * 3. must be covered by only 1 osc_lock;
+ * 4. exclusive. It's impossible to have overlapped osc_extent.
+ *
+ * The lifetime of an extent is from when the 1st page is dirtied to when
+ * all pages inside it are written out.
+ *
+ * LOCKING ORDER
+ * =============
+ * page lock -> client_obd_list_lock -> object lock(osc_object::oo_lock)
+ */
+struct osc_extent {
+ /** red-black tree node */
+ struct rb_node oe_node;
+ /** osc_object of this extent */
+ struct osc_object *oe_obj;
+ /** refcount, removed from red-black tree if reaches zero. */
+ atomic_t oe_refc;
+ /** busy if non-zero */
+ atomic_t oe_users;
+ /** link list of osc_object's oo_{hp|urgent|locking}_exts. */
+ struct list_head oe_link;
+ /** state of this extent */
+ unsigned int oe_state;
+ /** flags for this extent. */
+ unsigned int oe_intree:1,
+ /** 0 is write, 1 is read */
+ oe_rw:1,
+ oe_srvlock:1,
+ oe_memalloc:1,
+ /** an ACTIVE extent is going to be truncated, so when this extent
+ * is released, it will turn into TRUNC state instead of CACHE. */
+ oe_trunc_pending:1,
+ /** this extent should be written asap and someone may wait for the
+ * write to finish. This bit is usually set along with urgent if
+ * the extent was CACHE state.
+ * fsync_wait extent can't be merged because new extent region may
+ * exceed fsync range. */
+ oe_fsync_wait:1,
+ /** covering lock is being canceled */
+ oe_hp:1,
+ /** this extent should be written back asap. set if one of pages is
+ * called by page WB daemon, or sync write or reading requests. */
+ oe_urgent:1;
+ /** how many grants allocated for this extent.
+ * Grant allocated for this extent. There is no grant allocated
+ * for reading extents and sync write extents. */
+ unsigned int oe_grants;
+ /** # of dirty pages in this extent */
+ unsigned int oe_nr_pages;
+ /** list of pending oap pages. Pages in this list are NOT sorted. */
+ struct list_head oe_pages;
+ /** Since an extent has to be written out in atomic, this is used to
+ * remember the next page need to be locked to write this extent out.
+ * Not used right now.
+ */
+ struct osc_page *oe_next_page;
+ /** start and end index of this extent, include start and end
+ * themselves. Page offset here is the page index of osc_pages.
+ * oe_start is used as keyword for red-black tree. */
+ pgoff_t oe_start;
+ pgoff_t oe_end;
+ /** maximum ending index of this extent, this is limited by
+ * max_pages_per_rpc, lock extent and chunk size. */
+ pgoff_t oe_max_end;
+ /** waitqueue - for those who want to be notified if this extent's
+ * state has changed. */
+ wait_queue_head_t oe_waitq;
+ /** lock covering this extent */
+ struct cl_lock *oe_osclock;
+ /** terminator of this extent. Must be true if this extent is in IO. */
+ task_t *oe_owner;
+ /** return value of writeback. If somebody is waiting for this extent,
+ * this value can be known by outside world. */
+ int oe_rc;
+ /** max pages per rpc when this extent was created */
+ unsigned int oe_mppr;
+};
+
+int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
+ int sent, int rc);
+int osc_extent_release(const struct lu_env *env, struct osc_extent *ext);
+
+/** @} osc */
+
+#endif /* OSC_CL_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/osc/osc_dev.c b/drivers/staging/lustre/lustre/osc/osc_dev.c
new file mode 100644
index 000000000000..4208ddfd73b3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/osc/osc_dev.c
@@ -0,0 +1,261 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_device, cl_req for OSC layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_OSC
+
+/* class_name2obd() */
+#include <obd_class.h>
+
+#include "osc_cl_internal.h"
+
+/** \addtogroup osc
+ * @{
+ */
+
+struct kmem_cache *osc_lock_kmem;
+struct kmem_cache *osc_object_kmem;
+struct kmem_cache *osc_thread_kmem;
+struct kmem_cache *osc_session_kmem;
+struct kmem_cache *osc_req_kmem;
+struct kmem_cache *osc_extent_kmem;
+struct kmem_cache *osc_quota_kmem;
+
+struct lu_kmem_descr osc_caches[] = {
+ {
+ .ckd_cache = &osc_lock_kmem,
+ .ckd_name = "osc_lock_kmem",
+ .ckd_size = sizeof (struct osc_lock)
+ },
+ {
+ .ckd_cache = &osc_object_kmem,
+ .ckd_name = "osc_object_kmem",
+ .ckd_size = sizeof (struct osc_object)
+ },
+ {
+ .ckd_cache = &osc_thread_kmem,
+ .ckd_name = "osc_thread_kmem",
+ .ckd_size = sizeof (struct osc_thread_info)
+ },
+ {
+ .ckd_cache = &osc_session_kmem,
+ .ckd_name = "osc_session_kmem",
+ .ckd_size = sizeof (struct osc_session)
+ },
+ {
+ .ckd_cache = &osc_req_kmem,
+ .ckd_name = "osc_req_kmem",
+ .ckd_size = sizeof (struct osc_req)
+ },
+ {
+ .ckd_cache = &osc_extent_kmem,
+ .ckd_name = "osc_extent_kmem",
+ .ckd_size = sizeof (struct osc_extent)
+ },
+ {
+ .ckd_cache = &osc_quota_kmem,
+ .ckd_name = "osc_quota_kmem",
+ .ckd_size = sizeof(struct osc_quota_info)
+ },
+ {
+ .ckd_cache = NULL
+ }
+};
+
+struct lock_class_key osc_ast_guard_class;
+
+/*****************************************************************************
+ *
+ * Type conversions.
+ *
+ */
+
+static struct lu_device *osc2lu_dev(struct osc_device *osc)
+{
+ return &osc->od_cl.cd_lu_dev;
+}
+
+/*****************************************************************************
+ *
+ * Osc device and device type functions.
+ *
+ */
+
+static void *osc_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct osc_thread_info *info;
+
+ OBD_SLAB_ALLOC_PTR_GFP(info, osc_thread_kmem, __GFP_IO);
+ if (info == NULL)
+ info = ERR_PTR(-ENOMEM);
+ return info;
+}
+
+static void osc_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct osc_thread_info *info = data;
+ OBD_SLAB_FREE_PTR(info, osc_thread_kmem);
+}
+
+struct lu_context_key osc_key = {
+ .lct_tags = LCT_CL_THREAD,
+ .lct_init = osc_key_init,
+ .lct_fini = osc_key_fini
+};
+
+static void *osc_session_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct osc_session *info;
+
+ OBD_SLAB_ALLOC_PTR_GFP(info, osc_session_kmem, __GFP_IO);
+ if (info == NULL)
+ info = ERR_PTR(-ENOMEM);
+ return info;
+}
+
+static void osc_session_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct osc_session *info = data;
+ OBD_SLAB_FREE_PTR(info, osc_session_kmem);
+}
+
+struct lu_context_key osc_session_key = {
+ .lct_tags = LCT_SESSION,
+ .lct_init = osc_session_init,
+ .lct_fini = osc_session_fini
+};
+
+/* type constructor/destructor: osc_type_{init,fini,start,stop}(). */
+LU_TYPE_INIT_FINI(osc, &osc_key, &osc_session_key);
+
+static int osc_cl_process_config(const struct lu_env *env,
+ struct lu_device *d, struct lustre_cfg *cfg)
+{
+ ENTRY;
+ RETURN(osc_process_config_base(d->ld_obd, cfg));
+}
+
+static const struct lu_device_operations osc_lu_ops = {
+ .ldo_object_alloc = osc_object_alloc,
+ .ldo_process_config = osc_cl_process_config,
+ .ldo_recovery_complete = NULL
+};
+
+static const struct cl_device_operations osc_cl_ops = {
+ .cdo_req_init = osc_req_init
+};
+
+static int osc_device_init(const struct lu_env *env, struct lu_device *d,
+ const char *name, struct lu_device *next)
+{
+ RETURN(0);
+}
+
+static struct lu_device *osc_device_fini(const struct lu_env *env,
+ struct lu_device *d)
+{
+ return 0;
+}
+
+static struct lu_device *osc_device_free(const struct lu_env *env,
+ struct lu_device *d)
+{
+ struct osc_device *od = lu2osc_dev(d);
+
+ cl_device_fini(lu2cl_dev(d));
+ OBD_FREE_PTR(od);
+ return NULL;
+}
+
+static struct lu_device *osc_device_alloc(const struct lu_env *env,
+ struct lu_device_type *t,
+ struct lustre_cfg *cfg)
+{
+ struct lu_device *d;
+ struct osc_device *od;
+ struct obd_device *obd;
+ int rc;
+
+ OBD_ALLOC_PTR(od);
+ if (od == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ cl_device_init(&od->od_cl, t);
+ d = osc2lu_dev(od);
+ d->ld_ops = &osc_lu_ops;
+ od->od_cl.cd_ops = &osc_cl_ops;
+
+ /* Setup OSC OBD */
+ obd = class_name2obd(lustre_cfg_string(cfg, 0));
+ LASSERT(obd != NULL);
+ rc = osc_setup(obd, cfg);
+ if (rc) {
+ osc_device_free(env, d);
+ RETURN(ERR_PTR(rc));
+ }
+ od->od_exp = obd->obd_self_export;
+ RETURN(d);
+}
+
+static const struct lu_device_type_operations osc_device_type_ops = {
+ .ldto_init = osc_type_init,
+ .ldto_fini = osc_type_fini,
+
+ .ldto_start = osc_type_start,
+ .ldto_stop = osc_type_stop,
+
+ .ldto_device_alloc = osc_device_alloc,
+ .ldto_device_free = osc_device_free,
+
+ .ldto_device_init = osc_device_init,
+ .ldto_device_fini = osc_device_fini
+};
+
+struct lu_device_type osc_device_type = {
+ .ldt_tags = LU_DEVICE_CL,
+ .ldt_name = LUSTRE_OSC_NAME,
+ .ldt_ops = &osc_device_type_ops,
+ .ldt_ctx_tags = LCT_CL_THREAD
+};
+
+/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h
new file mode 100644
index 000000000000..efc5db47c260
--- /dev/null
+++ b/drivers/staging/lustre/lustre/osc/osc_internal.h
@@ -0,0 +1,208 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef OSC_INTERNAL_H
+#define OSC_INTERNAL_H
+
+#define OAP_MAGIC 8675309
+
+struct lu_env;
+
+enum async_flags {
+ ASYNC_READY = 0x1, /* ap_make_ready will not be called before this
+ page is added to an rpc */
+ ASYNC_URGENT = 0x2, /* page must be put into an RPC before return */
+ ASYNC_COUNT_STABLE = 0x4, /* ap_refresh_count will not be called
+ to give the caller a chance to update
+ or cancel the size of the io */
+ ASYNC_HP = 0x10,
+};
+
+struct osc_async_page {
+ int oap_magic;
+ unsigned short oap_cmd;
+ unsigned short oap_interrupted:1;
+
+ struct list_head oap_pending_item;
+ struct list_head oap_rpc_item;
+
+ obd_off oap_obj_off;
+ unsigned oap_page_off;
+ enum async_flags oap_async_flags;
+
+ struct brw_page oap_brw_page;
+
+ struct ptlrpc_request *oap_request;
+ struct client_obd *oap_cli;
+ struct osc_object *oap_obj;
+
+ struct ldlm_lock *oap_ldlm_lock;
+ spinlock_t oap_lock;
+};
+
+#define oap_page oap_brw_page.pg
+#define oap_count oap_brw_page.count
+#define oap_brw_flags oap_brw_page.flag
+
+struct osc_cache_waiter {
+ struct list_head ocw_entry;
+ wait_queue_head_t ocw_waitq;
+ struct osc_async_page *ocw_oap;
+ int ocw_grant;
+ int ocw_rc;
+};
+
+int osc_create(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *oa, struct lov_stripe_md **ea,
+ struct obd_trans_info *oti);
+int osc_real_create(struct obd_export *exp, struct obdo *oa,
+ struct lov_stripe_md **ea, struct obd_trans_info *oti);
+void osc_wake_cache_waiters(struct client_obd *cli);
+int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes);
+void osc_update_next_shrink(struct client_obd *cli);
+
+/*
+ * cl integration.
+ */
+#include <cl_object.h>
+
+extern struct ptlrpc_request_set *PTLRPCD_SET;
+
+int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
+ __u64 *flags, ldlm_policy_data_t *policy,
+ struct ost_lvb *lvb, int kms_valid,
+ obd_enqueue_update_f upcall,
+ void *cookie, struct ldlm_enqueue_info *einfo,
+ struct lustre_handle *lockh,
+ struct ptlrpc_request_set *rqset, int async, int agl);
+int osc_cancel_base(struct lustre_handle *lockh, __u32 mode);
+
+int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
+ __u32 type, ldlm_policy_data_t *policy, __u32 mode,
+ int *flags, void *data, struct lustre_handle *lockh,
+ int unref);
+
+int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
+ struct obd_trans_info *oti,
+ obd_enqueue_update_f upcall, void *cookie,
+ struct ptlrpc_request_set *rqset);
+int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo,
+ obd_enqueue_update_f upcall, void *cookie,
+ struct ptlrpc_request_set *rqset);
+int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
+ obd_enqueue_update_f upcall, void *cookie,
+ struct ptlrpc_request_set *rqset);
+
+int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg);
+int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
+ struct list_head *ext_list, int cmd, pdl_policy_t p);
+int osc_lru_shrink(struct client_obd *cli, int target);
+
+extern spinlock_t osc_ast_guard;
+
+int osc_cleanup(struct obd_device *obd);
+int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
+
+#ifdef LPROCFS
+int lproc_osc_attach_seqstat(struct obd_device *dev);
+void lprocfs_osc_init_vars(struct lprocfs_static_vars *lvars);
+#else
+static inline int lproc_osc_attach_seqstat(struct obd_device *dev) {return 0;}
+static inline void lprocfs_osc_init_vars(struct lprocfs_static_vars *lvars)
+{
+ memset(lvars, 0, sizeof(*lvars));
+}
+#endif
+
+extern struct lu_device_type osc_device_type;
+
+static inline int osc_recoverable_error(int rc)
+{
+ return (rc == -EIO || rc == -EROFS || rc == -ENOMEM ||
+ rc == -EAGAIN || rc == -EINPROGRESS);
+}
+
+static inline unsigned long rpcs_in_flight(struct client_obd *cli)
+{
+ return cli->cl_r_in_flight + cli->cl_w_in_flight;
+}
+
+#ifndef min_t
+#define min_t(type,x,y) \
+ ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
+#endif
+
+struct osc_device {
+ struct cl_device od_cl;
+ struct obd_export *od_exp;
+
+ /* Write stats is actually protected by client_obd's lock. */
+ struct osc_stats {
+ uint64_t os_lockless_writes; /* by bytes */
+ uint64_t os_lockless_reads; /* by bytes */
+ uint64_t os_lockless_truncates; /* by times */
+ } od_stats;
+
+ /* configuration item(s) */
+ int od_contention_time;
+ int od_lockless_truncate;
+};
+
+static inline struct osc_device *obd2osc_dev(const struct obd_device *d)
+{
+ return container_of0(d->obd_lu_dev, struct osc_device, od_cl.cd_lu_dev);
+}
+
+int osc_dlm_lock_pageref(struct ldlm_lock *dlm);
+
+extern struct kmem_cache *osc_quota_kmem;
+struct osc_quota_info {
+ /** linkage for quota hash table */
+ struct hlist_node oqi_hash;
+ obd_uid oqi_id;
+};
+int osc_quota_setup(struct obd_device *obd);
+int osc_quota_cleanup(struct obd_device *obd);
+int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
+ obd_flag valid, obd_flag flags);
+int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[]);
+int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
+ struct obd_quotactl *oqctl);
+int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
+ struct obd_quotactl *oqctl);
+int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
+
+#endif /* OSC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c
new file mode 100644
index 000000000000..1b277045b3e4
--- /dev/null
+++ b/drivers/staging/lustre/lustre/osc/osc_io.c
@@ -0,0 +1,836 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_io for OSC layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_OSC
+
+#include "osc_cl_internal.h"
+
+/** \addtogroup osc
+ * @{
+ */
+
+/*****************************************************************************
+ *
+ * Type conversions.
+ *
+ */
+
+static struct osc_req *cl2osc_req(const struct cl_req_slice *slice)
+{
+ LINVRNT(slice->crs_dev->cd_lu_dev.ld_type == &osc_device_type);
+ return container_of0(slice, struct osc_req, or_cl);
+}
+
+static struct osc_io *cl2osc_io(const struct lu_env *env,
+ const struct cl_io_slice *slice)
+{
+ struct osc_io *oio = container_of0(slice, struct osc_io, oi_cl);
+ LINVRNT(oio == osc_env_io(env));
+ return oio;
+}
+
+static struct osc_page *osc_cl_page_osc(struct cl_page *page)
+{
+ const struct cl_page_slice *slice;
+
+ slice = cl_page_at(page, &osc_device_type);
+ LASSERT(slice != NULL);
+
+ return cl2osc_page(slice);
+}
+
+
+/*****************************************************************************
+ *
+ * io operations.
+ *
+ */
+
+static void osc_io_fini(const struct lu_env *env, const struct cl_io_slice *io)
+{
+}
+
+/**
+ * An implementation of cl_io_operations::cio_io_submit() method for osc
+ * layer. Iterates over pages in the in-queue, prepares each for io by calling
+ * cl_page_prep() and then either submits them through osc_io_submit_page()
+ * or, if page is already submitted, changes osc flags through
+ * osc_set_async_flags().
+ */
+static int osc_io_submit(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ enum cl_req_type crt, struct cl_2queue *queue)
+{
+ struct cl_page *page;
+ struct cl_page *tmp;
+ struct client_obd *cli = NULL;
+ struct osc_object *osc = NULL; /* to keep gcc happy */
+ struct osc_page *opg;
+ struct cl_io *io;
+ LIST_HEAD (list);
+
+ struct cl_page_list *qin = &queue->c2_qin;
+ struct cl_page_list *qout = &queue->c2_qout;
+ int queued = 0;
+ int result = 0;
+ int cmd;
+ int brw_flags;
+ int max_pages;
+
+ LASSERT(qin->pl_nr > 0);
+
+ CDEBUG(D_CACHE, "%d %d\n", qin->pl_nr, crt);
+
+ osc = cl2osc(ios->cis_obj);
+ cli = osc_cli(osc);
+ max_pages = cli->cl_max_pages_per_rpc;
+
+ cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
+ brw_flags = osc_io_srvlock(cl2osc_io(env, ios)) ? OBD_BRW_SRVLOCK : 0;
+
+ /*
+ * NOTE: here @page is a top-level page. This is done to avoid
+ * creation of sub-page-list.
+ */
+ cl_page_list_for_each_safe(page, tmp, qin) {
+ struct osc_async_page *oap;
+
+ /* Top level IO. */
+ io = page->cp_owner;
+ LASSERT(io != NULL);
+
+ opg = osc_cl_page_osc(page);
+ oap = &opg->ops_oap;
+ LASSERT(osc == oap->oap_obj);
+
+ if (!list_empty(&oap->oap_pending_item) ||
+ !list_empty(&oap->oap_rpc_item)) {
+ CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n",
+ oap, opg);
+ result = -EBUSY;
+ break;
+ }
+
+ result = cl_page_prep(env, io, page, crt);
+ if (result != 0) {
+ LASSERT(result < 0);
+ if (result != -EALREADY)
+ break;
+ /*
+ * Handle -EALREADY error: for read case, the page is
+ * already in UPTODATE state; for write, the page
+ * is not dirty.
+ */
+ result = 0;
+ continue;
+ }
+
+ cl_page_list_move(qout, qin, page);
+ oap->oap_async_flags = ASYNC_URGENT|ASYNC_READY;
+ oap->oap_async_flags |= ASYNC_COUNT_STABLE;
+
+ osc_page_submit(env, opg, crt, brw_flags);
+ list_add_tail(&oap->oap_pending_item, &list);
+ if (++queued == max_pages) {
+ queued = 0;
+ result = osc_queue_sync_pages(env, osc, &list, cmd,
+ brw_flags);
+ if (result < 0)
+ break;
+ }
+ }
+
+ if (queued > 0)
+ result = osc_queue_sync_pages(env, osc, &list, cmd, brw_flags);
+
+ CDEBUG(D_INFO, "%d/%d %d\n", qin->pl_nr, qout->pl_nr, result);
+ return qout->pl_nr > 0 ? 0 : result;
+}
+
+static void osc_page_touch_at(const struct lu_env *env,
+ struct cl_object *obj, pgoff_t idx, unsigned to)
+{
+ struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo;
+ struct cl_attr *attr = &osc_env_info(env)->oti_attr;
+ int valid;
+ __u64 kms;
+
+ /* offset within stripe */
+ kms = cl_offset(obj, idx) + to;
+
+ cl_object_attr_lock(obj);
+ /*
+ * XXX old code used
+ *
+ * ll_inode_size_lock(inode, 0); lov_stripe_lock(lsm);
+ *
+ * here
+ */
+ CDEBUG(D_INODE, "stripe KMS %sincreasing "LPU64"->"LPU64" "LPU64"\n",
+ kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms,
+ loi->loi_lvb.lvb_size);
+
+ valid = 0;
+ if (kms > loi->loi_kms) {
+ attr->cat_kms = kms;
+ valid |= CAT_KMS;
+ }
+ if (kms > loi->loi_lvb.lvb_size) {
+ attr->cat_size = kms;
+ valid |= CAT_SIZE;
+ }
+ cl_object_attr_set(env, obj, attr, valid);
+ cl_object_attr_unlock(obj);
+}
+
+/**
+ * This is called when a page is accessed within file in a way that creates
+ * new page, if one were missing (i.e., if there were a hole at that place in
+ * the file, or accessed page is beyond the current file size). Examples:
+ * ->commit_write() and ->nopage() methods.
+ *
+ * Expand stripe KMS if necessary.
+ */
+static void osc_page_touch(const struct lu_env *env,
+ struct osc_page *opage, unsigned to)
+{
+ struct cl_page *page = opage->ops_cl.cpl_page;
+ struct cl_object *obj = opage->ops_cl.cpl_obj;
+
+ osc_page_touch_at(env, obj, page->cp_index, to);
+}
+
+/**
+ * Implements cl_io_operations::cio_prepare_write() method for osc layer.
+ *
+ * \retval -EIO transfer initiated against this osc will most likely fail
+ * \retval 0 transfer initiated against this osc will most likely succeed.
+ *
+ * The reason for this check is to immediately return an error to the caller
+ * in the case of a deactivated import. Note, that import can be deactivated
+ * later, while pages, dirtied by this IO, are still in the cache, but this is
+ * irrelevant, because that would still return an error to the application (if
+ * it does fsync), but many applications don't do fsync because of performance
+ * issues, and we wanted to return an -EIO at write time to notify the
+ * application.
+ */
+static int osc_io_prepare_write(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ const struct cl_page_slice *slice,
+ unsigned from, unsigned to)
+{
+ struct osc_device *dev = lu2osc_dev(slice->cpl_obj->co_lu.lo_dev);
+ struct obd_import *imp = class_exp2cliimp(dev->od_exp);
+ struct osc_io *oio = cl2osc_io(env, ios);
+ int result = 0;
+ ENTRY;
+
+ /*
+ * This implements OBD_BRW_CHECK logic from old client.
+ */
+
+ if (imp == NULL || imp->imp_invalid)
+ result = -EIO;
+ if (result == 0 && oio->oi_lockless)
+ /* this page contains `invalid' data, but who cares?
+ * nobody can access the invalid data.
+ * in osc_io_commit_write(), we're going to write exact
+ * [from, to) bytes of this page to OST. -jay */
+ cl_page_export(env, slice->cpl_page, 1);
+
+ RETURN(result);
+}
+
+static int osc_io_commit_write(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ const struct cl_page_slice *slice,
+ unsigned from, unsigned to)
+{
+ struct osc_io *oio = cl2osc_io(env, ios);
+ struct osc_page *opg = cl2osc_page(slice);
+ struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
+ struct osc_async_page *oap = &opg->ops_oap;
+ ENTRY;
+
+ LASSERT(to > 0);
+ /*
+ * XXX instead of calling osc_page_touch() here and in
+ * osc_io_fault_start() it might be more logical to introduce
+ * cl_page_touch() method, that generic cl_io_commit_write() and page
+ * fault code calls.
+ */
+ osc_page_touch(env, cl2osc_page(slice), to);
+ if (!client_is_remote(osc_export(obj)) &&
+ cfs_capable(CFS_CAP_SYS_RESOURCE))
+ oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
+
+ if (oio->oi_lockless)
+ /* see osc_io_prepare_write() for lockless io handling. */
+ cl_page_clip(env, slice->cpl_page, from, to);
+
+ RETURN(0);
+}
+
+static int osc_io_fault_start(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct cl_io *io;
+ struct cl_fault_io *fio;
+
+ ENTRY;
+
+ io = ios->cis_io;
+ fio = &io->u.ci_fault;
+ CDEBUG(D_INFO, "%lu %d %d\n",
+ fio->ft_index, fio->ft_writable, fio->ft_nob);
+ /*
+ * If mapping is writeable, adjust kms to cover this page,
+ * but do not extend kms beyond actual file size.
+ * See bug 10919.
+ */
+ if (fio->ft_writable)
+ osc_page_touch_at(env, ios->cis_obj,
+ fio->ft_index, fio->ft_nob);
+ RETURN(0);
+}
+
+static int osc_async_upcall(void *a, int rc)
+{
+ struct osc_async_cbargs *args = a;
+
+ args->opc_rc = rc;
+ complete(&args->opc_sync);
+ return 0;
+}
+
+/**
+ * Checks that there are no pages being written in the extent being truncated.
+ */
+static int trunc_check_cb(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, void *cbdata)
+{
+ const struct cl_page_slice *slice;
+ struct osc_page *ops;
+ struct osc_async_page *oap;
+ __u64 start = *(__u64 *)cbdata;
+
+ slice = cl_page_at(page, &osc_device_type);
+ LASSERT(slice != NULL);
+ ops = cl2osc_page(slice);
+ oap = &ops->ops_oap;
+
+ if (oap->oap_cmd & OBD_BRW_WRITE &&
+ !list_empty(&oap->oap_pending_item))
+ CL_PAGE_DEBUG(D_ERROR, env, page, "exists " LPU64 "/%s.\n",
+ start, current->comm);
+
+ {
+ struct page *vmpage = cl_page_vmpage(env, page);
+ if (PageLocked(vmpage))
+ CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n",
+ ops, page->cp_index,
+ (oap->oap_cmd & OBD_BRW_RWMASK));
+ }
+
+ return CLP_GANG_OKAY;
+}
+
+static void osc_trunc_check(const struct lu_env *env, struct cl_io *io,
+ struct osc_io *oio, __u64 size)
+{
+ struct cl_object *clob;
+ int partial;
+ pgoff_t start;
+
+ clob = oio->oi_cl.cis_obj;
+ start = cl_index(clob, size);
+ partial = cl_offset(clob, start) < size;
+
+ /*
+ * Complain if there are pages in the truncated region.
+ */
+ cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF,
+ trunc_check_cb, (void *)&size);
+}
+
+static int osc_io_setattr_start(const struct lu_env *env,
+ const struct cl_io_slice *slice)
+{
+ struct cl_io *io = slice->cis_io;
+ struct osc_io *oio = cl2osc_io(env, slice);
+ struct cl_object *obj = slice->cis_obj;
+ struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo;
+ struct cl_attr *attr = &osc_env_info(env)->oti_attr;
+ struct obdo *oa = &oio->oi_oa;
+ struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
+ __u64 size = io->u.ci_setattr.sa_attr.lvb_size;
+ unsigned int ia_valid = io->u.ci_setattr.sa_valid;
+ int result = 0;
+ struct obd_info oinfo = { { { 0 } } };
+
+ /* truncate cache dirty pages first */
+ if (cl_io_is_trunc(io))
+ result = osc_cache_truncate_start(env, oio, cl2osc(obj), size);
+
+ if (result == 0 && oio->oi_lockless == 0) {
+ cl_object_attr_lock(obj);
+ result = cl_object_attr_get(env, obj, attr);
+ if (result == 0) {
+ struct ost_lvb *lvb = &io->u.ci_setattr.sa_attr;
+ unsigned int cl_valid = 0;
+
+ if (ia_valid & ATTR_SIZE) {
+ attr->cat_size = attr->cat_kms = size;
+ cl_valid = (CAT_SIZE | CAT_KMS);
+ }
+ if (ia_valid & ATTR_MTIME_SET) {
+ attr->cat_mtime = lvb->lvb_mtime;
+ cl_valid |= CAT_MTIME;
+ }
+ if (ia_valid & ATTR_ATIME_SET) {
+ attr->cat_atime = lvb->lvb_atime;
+ cl_valid |= CAT_ATIME;
+ }
+ if (ia_valid & ATTR_CTIME_SET) {
+ attr->cat_ctime = lvb->lvb_ctime;
+ cl_valid |= CAT_CTIME;
+ }
+ result = cl_object_attr_set(env, obj, attr, cl_valid);
+ }
+ cl_object_attr_unlock(obj);
+ }
+ memset(oa, 0, sizeof(*oa));
+ if (result == 0) {
+ oa->o_oi = loi->loi_oi;
+ oa->o_mtime = attr->cat_mtime;
+ oa->o_atime = attr->cat_atime;
+ oa->o_ctime = attr->cat_ctime;
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLATIME |
+ OBD_MD_FLCTIME | OBD_MD_FLMTIME;
+ if (ia_valid & ATTR_SIZE) {
+ oa->o_size = size;
+ oa->o_blocks = OBD_OBJECT_EOF;
+ oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+
+ if (oio->oi_lockless) {
+ oa->o_flags = OBD_FL_SRVLOCK;
+ oa->o_valid |= OBD_MD_FLFLAGS;
+ }
+ } else {
+ LASSERT(oio->oi_lockless == 0);
+ }
+
+ oinfo.oi_oa = oa;
+ oinfo.oi_capa = io->u.ci_setattr.sa_capa;
+ init_completion(&cbargs->opc_sync);
+
+ if (ia_valid & ATTR_SIZE)
+ result = osc_punch_base(osc_export(cl2osc(obj)),
+ &oinfo, osc_async_upcall,
+ cbargs, PTLRPCD_SET);
+ else
+ result = osc_setattr_async_base(osc_export(cl2osc(obj)),
+ &oinfo, NULL,
+ osc_async_upcall,
+ cbargs, PTLRPCD_SET);
+ cbargs->opc_rpc_sent = result == 0;
+ }
+ return result;
+}
+
+static void osc_io_setattr_end(const struct lu_env *env,
+ const struct cl_io_slice *slice)
+{
+ struct cl_io *io = slice->cis_io;
+ struct osc_io *oio = cl2osc_io(env, slice);
+ struct cl_object *obj = slice->cis_obj;
+ struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
+ int result = 0;
+
+ if (cbargs->opc_rpc_sent) {
+ wait_for_completion(&cbargs->opc_sync);
+ result = io->ci_result = cbargs->opc_rc;
+ }
+ if (result == 0) {
+ if (oio->oi_lockless) {
+ /* lockless truncate */
+ struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev);
+
+ LASSERT(cl_io_is_trunc(io));
+ /* XXX: Need a lock. */
+ osd->od_stats.os_lockless_truncates++;
+ }
+ }
+
+ if (cl_io_is_trunc(io)) {
+ __u64 size = io->u.ci_setattr.sa_attr.lvb_size;
+ osc_trunc_check(env, io, oio, size);
+ if (oio->oi_trunc != NULL) {
+ osc_cache_truncate_end(env, oio, cl2osc(obj));
+ oio->oi_trunc = NULL;
+ }
+ }
+}
+
+static int osc_io_read_start(const struct lu_env *env,
+ const struct cl_io_slice *slice)
+{
+ struct osc_io *oio = cl2osc_io(env, slice);
+ struct cl_object *obj = slice->cis_obj;
+ struct cl_attr *attr = &osc_env_info(env)->oti_attr;
+ int result = 0;
+ ENTRY;
+
+ if (oio->oi_lockless == 0) {
+ cl_object_attr_lock(obj);
+ result = cl_object_attr_get(env, obj, attr);
+ if (result == 0) {
+ attr->cat_atime = LTIME_S(CFS_CURRENT_TIME);
+ result = cl_object_attr_set(env, obj, attr,
+ CAT_ATIME);
+ }
+ cl_object_attr_unlock(obj);
+ }
+ RETURN(result);
+}
+
+static int osc_io_write_start(const struct lu_env *env,
+ const struct cl_io_slice *slice)
+{
+ struct osc_io *oio = cl2osc_io(env, slice);
+ struct cl_object *obj = slice->cis_obj;
+ struct cl_attr *attr = &osc_env_info(env)->oti_attr;
+ int result = 0;
+ ENTRY;
+
+ if (oio->oi_lockless == 0) {
+ OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_SETTIME, 1);
+ cl_object_attr_lock(obj);
+ result = cl_object_attr_get(env, obj, attr);
+ if (result == 0) {
+ attr->cat_mtime = attr->cat_ctime =
+ LTIME_S(CFS_CURRENT_TIME);
+ result = cl_object_attr_set(env, obj, attr,
+ CAT_MTIME | CAT_CTIME);
+ }
+ cl_object_attr_unlock(obj);
+ }
+ RETURN(result);
+}
+
+static int osc_fsync_ost(const struct lu_env *env, struct osc_object *obj,
+ struct cl_fsync_io *fio)
+{
+ struct osc_io *oio = osc_env_io(env);
+ struct obdo *oa = &oio->oi_oa;
+ struct obd_info *oinfo = &oio->oi_info;
+ struct lov_oinfo *loi = obj->oo_oinfo;
+ struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
+ int rc = 0;
+ ENTRY;
+
+ memset(oa, 0, sizeof(*oa));
+ oa->o_oi = loi->loi_oi;
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
+
+ /* reload size abd blocks for start and end of sync range */
+ oa->o_size = fio->fi_start;
+ oa->o_blocks = fio->fi_end;
+ oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+
+ obdo_set_parent_fid(oa, fio->fi_fid);
+
+ memset(oinfo, 0, sizeof(*oinfo));
+ oinfo->oi_oa = oa;
+ oinfo->oi_capa = fio->fi_capa;
+ init_completion(&cbargs->opc_sync);
+
+ rc = osc_sync_base(osc_export(obj), oinfo, osc_async_upcall, cbargs,
+ PTLRPCD_SET);
+ RETURN(rc);
+}
+
+static int osc_io_fsync_start(const struct lu_env *env,
+ const struct cl_io_slice *slice)
+{
+ struct cl_io *io = slice->cis_io;
+ struct cl_fsync_io *fio = &io->u.ci_fsync;
+ struct cl_object *obj = slice->cis_obj;
+ struct osc_object *osc = cl2osc(obj);
+ pgoff_t start = cl_index(obj, fio->fi_start);
+ pgoff_t end = cl_index(obj, fio->fi_end);
+ int result = 0;
+ ENTRY;
+
+ if (fio->fi_end == OBD_OBJECT_EOF)
+ end = CL_PAGE_EOF;
+
+ result = osc_cache_writeback_range(env, osc, start, end, 0,
+ fio->fi_mode == CL_FSYNC_DISCARD);
+ if (result > 0) {
+ fio->fi_nr_written += result;
+ result = 0;
+ }
+ if (fio->fi_mode == CL_FSYNC_ALL) {
+ int rc;
+
+ /* we have to wait for writeback to finish before we can
+ * send OST_SYNC RPC. This is bad because it causes extents
+ * to be written osc by osc. However, we usually start
+ * writeback before CL_FSYNC_ALL so this won't have any real
+ * problem. */
+ rc = osc_cache_wait_range(env, osc, start, end);
+ if (result == 0)
+ result = rc;
+ rc = osc_fsync_ost(env, osc, fio);
+ if (result == 0)
+ result = rc;
+ }
+
+ RETURN(result);
+}
+
+static void osc_io_fsync_end(const struct lu_env *env,
+ const struct cl_io_slice *slice)
+{
+ struct cl_fsync_io *fio = &slice->cis_io->u.ci_fsync;
+ struct cl_object *obj = slice->cis_obj;
+ pgoff_t start = cl_index(obj, fio->fi_start);
+ pgoff_t end = cl_index(obj, fio->fi_end);
+ int result = 0;
+
+ if (fio->fi_mode == CL_FSYNC_LOCAL) {
+ result = osc_cache_wait_range(env, cl2osc(obj), start, end);
+ } else if (fio->fi_mode == CL_FSYNC_ALL) {
+ struct osc_io *oio = cl2osc_io(env, slice);
+ struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
+
+ wait_for_completion(&cbargs->opc_sync);
+ if (result == 0)
+ result = cbargs->opc_rc;
+ }
+ slice->cis_io->ci_result = result;
+}
+
+static void osc_io_end(const struct lu_env *env,
+ const struct cl_io_slice *slice)
+{
+ struct osc_io *oio = cl2osc_io(env, slice);
+
+ if (oio->oi_active) {
+ osc_extent_release(env, oio->oi_active);
+ oio->oi_active = NULL;
+ }
+}
+
+static const struct cl_io_operations osc_io_ops = {
+ .op = {
+ [CIT_READ] = {
+ .cio_start = osc_io_read_start,
+ .cio_fini = osc_io_fini
+ },
+ [CIT_WRITE] = {
+ .cio_start = osc_io_write_start,
+ .cio_end = osc_io_end,
+ .cio_fini = osc_io_fini
+ },
+ [CIT_SETATTR] = {
+ .cio_start = osc_io_setattr_start,
+ .cio_end = osc_io_setattr_end
+ },
+ [CIT_FAULT] = {
+ .cio_start = osc_io_fault_start,
+ .cio_end = osc_io_end,
+ .cio_fini = osc_io_fini
+ },
+ [CIT_FSYNC] = {
+ .cio_start = osc_io_fsync_start,
+ .cio_end = osc_io_fsync_end,
+ .cio_fini = osc_io_fini
+ },
+ [CIT_MISC] = {
+ .cio_fini = osc_io_fini
+ }
+ },
+ .req_op = {
+ [CRT_READ] = {
+ .cio_submit = osc_io_submit
+ },
+ [CRT_WRITE] = {
+ .cio_submit = osc_io_submit
+ }
+ },
+ .cio_prepare_write = osc_io_prepare_write,
+ .cio_commit_write = osc_io_commit_write
+};
+
+/*****************************************************************************
+ *
+ * Transfer operations.
+ *
+ */
+
+static int osc_req_prep(const struct lu_env *env,
+ const struct cl_req_slice *slice)
+{
+ return 0;
+}
+
+static void osc_req_completion(const struct lu_env *env,
+ const struct cl_req_slice *slice, int ioret)
+{
+ struct osc_req *or;
+
+ or = cl2osc_req(slice);
+ OBD_SLAB_FREE_PTR(or, osc_req_kmem);
+}
+
+/**
+ * Implementation of struct cl_req_operations::cro_attr_set() for osc
+ * layer. osc is responsible for struct obdo::o_id and struct obdo::o_seq
+ * fields.
+ */
+static void osc_req_attr_set(const struct lu_env *env,
+ const struct cl_req_slice *slice,
+ const struct cl_object *obj,
+ struct cl_req_attr *attr, obd_valid flags)
+{
+ struct lov_oinfo *oinfo;
+ struct cl_req *clerq;
+ struct cl_page *apage; /* _some_ page in @clerq */
+ struct cl_lock *lock; /* _some_ lock protecting @apage */
+ struct osc_lock *olck;
+ struct osc_page *opg;
+ struct obdo *oa;
+ struct ost_lvb *lvb;
+
+ oinfo = cl2osc(obj)->oo_oinfo;
+ lvb = &oinfo->loi_lvb;
+ oa = attr->cra_oa;
+
+ if ((flags & OBD_MD_FLMTIME) != 0) {
+ oa->o_mtime = lvb->lvb_mtime;
+ oa->o_valid |= OBD_MD_FLMTIME;
+ }
+ if ((flags & OBD_MD_FLATIME) != 0) {
+ oa->o_atime = lvb->lvb_atime;
+ oa->o_valid |= OBD_MD_FLATIME;
+ }
+ if ((flags & OBD_MD_FLCTIME) != 0) {
+ oa->o_ctime = lvb->lvb_ctime;
+ oa->o_valid |= OBD_MD_FLCTIME;
+ }
+ if (flags & OBD_MD_FLGROUP) {
+ ostid_set_seq(&oa->o_oi, ostid_seq(&oinfo->loi_oi));
+ oa->o_valid |= OBD_MD_FLGROUP;
+ }
+ if (flags & OBD_MD_FLID) {
+ ostid_set_id(&oa->o_oi, ostid_id(&oinfo->loi_oi));
+ oa->o_valid |= OBD_MD_FLID;
+ }
+ if (flags & OBD_MD_FLHANDLE) {
+ clerq = slice->crs_req;
+ LASSERT(!list_empty(&clerq->crq_pages));
+ apage = container_of(clerq->crq_pages.next,
+ struct cl_page, cp_flight);
+ opg = osc_cl_page_osc(apage);
+ apage = opg->ops_cl.cpl_page; /* now apage is a sub-page */
+ lock = cl_lock_at_page(env, apage->cp_obj, apage, NULL, 1, 1);
+ if (lock == NULL) {
+ struct cl_object_header *head;
+ struct cl_lock *scan;
+
+ head = cl_object_header(apage->cp_obj);
+ list_for_each_entry(scan, &head->coh_locks,
+ cll_linkage)
+ CL_LOCK_DEBUG(D_ERROR, env, scan,
+ "no cover page!\n");
+ CL_PAGE_DEBUG(D_ERROR, env, apage,
+ "dump uncover page!\n");
+ libcfs_debug_dumpstack(NULL);
+ LBUG();
+ }
+
+ olck = osc_lock_at(lock);
+ LASSERT(olck != NULL);
+ LASSERT(ergo(opg->ops_srvlock, olck->ols_lock == NULL));
+ /* check for lockless io. */
+ if (olck->ols_lock != NULL) {
+ oa->o_handle = olck->ols_lock->l_remote_handle;
+ oa->o_valid |= OBD_MD_FLHANDLE;
+ }
+ cl_lock_put(env, lock);
+ }
+}
+
+static const struct cl_req_operations osc_req_ops = {
+ .cro_prep = osc_req_prep,
+ .cro_attr_set = osc_req_attr_set,
+ .cro_completion = osc_req_completion
+};
+
+
+int osc_io_init(const struct lu_env *env,
+ struct cl_object *obj, struct cl_io *io)
+{
+ struct osc_io *oio = osc_env_io(env);
+
+ CL_IO_SLICE_CLEAN(oio, oi_cl);
+ cl_io_slice_add(io, &oio->oi_cl, obj, &osc_io_ops);
+ return 0;
+}
+
+int osc_req_init(const struct lu_env *env, struct cl_device *dev,
+ struct cl_req *req)
+{
+ struct osc_req *or;
+ int result;
+
+ OBD_SLAB_ALLOC_PTR_GFP(or, osc_req_kmem, __GFP_IO);
+ if (or != NULL) {
+ cl_req_slice_add(req, &or->or_cl, dev, &osc_req_ops);
+ result = 0;
+ } else
+ result = -ENOMEM;
+ return result;
+}
+
+/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c
new file mode 100644
index 000000000000..640bc3d34709
--- /dev/null
+++ b/drivers/staging/lustre/lustre/osc/osc_lock.c
@@ -0,0 +1,1663 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_lock for OSC layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_OSC
+
+# include <linux/libcfs/libcfs.h>
+/* fid_build_reg_res_name() */
+#include <lustre_fid.h>
+
+#include "osc_cl_internal.h"
+
+/** \addtogroup osc
+ * @{
+ */
+
+#define _PAGEREF_MAGIC (-10000000)
+
+/*****************************************************************************
+ *
+ * Type conversions.
+ *
+ */
+
+static const struct cl_lock_operations osc_lock_ops;
+static const struct cl_lock_operations osc_lock_lockless_ops;
+static void osc_lock_to_lockless(const struct lu_env *env,
+ struct osc_lock *ols, int force);
+static int osc_lock_has_pages(struct osc_lock *olck);
+
+int osc_lock_is_lockless(const struct osc_lock *olck)
+{
+ return (olck->ols_cl.cls_ops == &osc_lock_lockless_ops);
+}
+
+/**
+ * Returns a weak pointer to the ldlm lock identified by a handle. Returned
+ * pointer cannot be dereferenced, as lock is not protected from concurrent
+ * reclaim. This function is a helper for osc_lock_invariant().
+ */
+static struct ldlm_lock *osc_handle_ptr(struct lustre_handle *handle)
+{
+ struct ldlm_lock *lock;
+
+ lock = ldlm_handle2lock(handle);
+ if (lock != NULL)
+ LDLM_LOCK_PUT(lock);
+ return lock;
+}
+
+/**
+ * Invariant that has to be true all of the time.
+ */
+static int osc_lock_invariant(struct osc_lock *ols)
+{
+ struct ldlm_lock *lock = osc_handle_ptr(&ols->ols_handle);
+ struct ldlm_lock *olock = ols->ols_lock;
+ int handle_used = lustre_handle_is_used(&ols->ols_handle);
+
+ return
+ ergo(osc_lock_is_lockless(ols),
+ ols->ols_locklessable && ols->ols_lock == NULL) ||
+ (ergo(olock != NULL, handle_used) &&
+ ergo(olock != NULL,
+ olock->l_handle.h_cookie == ols->ols_handle.cookie) &&
+ /*
+ * Check that ->ols_handle and ->ols_lock are consistent, but
+ * take into account that they are set at the different time.
+ */
+ ergo(handle_used,
+ ergo(lock != NULL && olock != NULL, lock == olock) &&
+ ergo(lock == NULL, olock == NULL)) &&
+ ergo(ols->ols_state == OLS_CANCELLED,
+ olock == NULL && !handle_used) &&
+ /*
+ * DLM lock is destroyed only after we have seen cancellation
+ * ast.
+ */
+ ergo(olock != NULL && ols->ols_state < OLS_CANCELLED,
+ !olock->l_destroyed) &&
+ ergo(ols->ols_state == OLS_GRANTED,
+ olock != NULL &&
+ olock->l_req_mode == olock->l_granted_mode &&
+ ols->ols_hold));
+}
+
+/*****************************************************************************
+ *
+ * Lock operations.
+ *
+ */
+
+/**
+ * Breaks a link between osc_lock and dlm_lock.
+ */
+static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck)
+{
+ struct ldlm_lock *dlmlock;
+
+ spin_lock(&osc_ast_guard);
+ dlmlock = olck->ols_lock;
+ if (dlmlock == NULL) {
+ spin_unlock(&osc_ast_guard);
+ return;
+ }
+
+ olck->ols_lock = NULL;
+ /* wb(); --- for all who checks (ols->ols_lock != NULL) before
+ * call to osc_lock_detach() */
+ dlmlock->l_ast_data = NULL;
+ olck->ols_handle.cookie = 0ULL;
+ spin_unlock(&osc_ast_guard);
+
+ lock_res_and_lock(dlmlock);
+ if (dlmlock->l_granted_mode == dlmlock->l_req_mode) {
+ struct cl_object *obj = olck->ols_cl.cls_obj;
+ struct cl_attr *attr = &osc_env_info(env)->oti_attr;
+ __u64 old_kms;
+
+ cl_object_attr_lock(obj);
+ /* Must get the value under the lock to avoid possible races. */
+ old_kms = cl2osc(obj)->oo_oinfo->loi_kms;
+ /* Update the kms. Need to loop all granted locks.
+ * Not a problem for the client */
+ attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms);
+
+ cl_object_attr_set(env, obj, attr, CAT_KMS);
+ cl_object_attr_unlock(obj);
+ }
+ unlock_res_and_lock(dlmlock);
+
+ /* release a reference taken in osc_lock_upcall0(). */
+ LASSERT(olck->ols_has_ref);
+ lu_ref_del(&dlmlock->l_reference, "osc_lock", olck);
+ LDLM_LOCK_RELEASE(dlmlock);
+ olck->ols_has_ref = 0;
+}
+
+static int osc_lock_unhold(struct osc_lock *ols)
+{
+ int result = 0;
+
+ if (ols->ols_hold) {
+ ols->ols_hold = 0;
+ result = osc_cancel_base(&ols->ols_handle,
+ ols->ols_einfo.ei_mode);
+ }
+ return result;
+}
+
+static int osc_lock_unuse(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct osc_lock *ols = cl2osc_lock(slice);
+
+ LINVRNT(osc_lock_invariant(ols));
+
+ switch (ols->ols_state) {
+ case OLS_NEW:
+ LASSERT(!ols->ols_hold);
+ LASSERT(ols->ols_agl);
+ return 0;
+ case OLS_UPCALL_RECEIVED:
+ osc_lock_unhold(ols);
+ case OLS_ENQUEUED:
+ LASSERT(!ols->ols_hold);
+ osc_lock_detach(env, ols);
+ ols->ols_state = OLS_NEW;
+ return 0;
+ case OLS_GRANTED:
+ LASSERT(!ols->ols_glimpse);
+ LASSERT(ols->ols_hold);
+ /*
+ * Move lock into OLS_RELEASED state before calling
+ * osc_cancel_base() so that possible synchronous cancellation
+ * (that always happens e.g., for liblustre) sees that lock is
+ * released.
+ */
+ ols->ols_state = OLS_RELEASED;
+ return osc_lock_unhold(ols);
+ default:
+ CERROR("Impossible state: %d\n", ols->ols_state);
+ LBUG();
+ }
+}
+
+static void osc_lock_fini(const struct lu_env *env,
+ struct cl_lock_slice *slice)
+{
+ struct osc_lock *ols = cl2osc_lock(slice);
+
+ LINVRNT(osc_lock_invariant(ols));
+ /*
+ * ->ols_hold can still be true at this point if, for example, a
+ * thread that requested a lock was killed (and released a reference
+ * to the lock), before reply from a server was received. In this case
+ * lock is destroyed immediately after upcall.
+ */
+ osc_lock_unhold(ols);
+ LASSERT(ols->ols_lock == NULL);
+ LASSERT(atomic_read(&ols->ols_pageref) == 0 ||
+ atomic_read(&ols->ols_pageref) == _PAGEREF_MAGIC);
+
+ OBD_SLAB_FREE_PTR(ols, osc_lock_kmem);
+}
+
+static void osc_lock_build_policy(const struct lu_env *env,
+ const struct cl_lock *lock,
+ ldlm_policy_data_t *policy)
+{
+ const struct cl_lock_descr *d = &lock->cll_descr;
+
+ osc_index2policy(policy, d->cld_obj, d->cld_start, d->cld_end);
+ policy->l_extent.gid = d->cld_gid;
+}
+
+static __u64 osc_enq2ldlm_flags(__u32 enqflags)
+{
+ __u64 result = 0;
+
+ LASSERT((enqflags & ~CEF_MASK) == 0);
+
+ if (enqflags & CEF_NONBLOCK)
+ result |= LDLM_FL_BLOCK_NOWAIT;
+ if (enqflags & CEF_ASYNC)
+ result |= LDLM_FL_HAS_INTENT;
+ if (enqflags & CEF_DISCARD_DATA)
+ result |= LDLM_AST_DISCARD_DATA;
+ return result;
+}
+
+/**
+ * Global spin-lock protecting consistency of ldlm_lock::l_ast_data
+ * pointers. Initialized in osc_init().
+ */
+spinlock_t osc_ast_guard;
+
+static struct osc_lock *osc_ast_data_get(struct ldlm_lock *dlm_lock)
+{
+ struct osc_lock *olck;
+
+ lock_res_and_lock(dlm_lock);
+ spin_lock(&osc_ast_guard);
+ olck = dlm_lock->l_ast_data;
+ if (olck != NULL) {
+ struct cl_lock *lock = olck->ols_cl.cls_lock;
+ /*
+ * If osc_lock holds a reference on ldlm lock, return it even
+ * when cl_lock is in CLS_FREEING state. This way
+ *
+ * osc_ast_data_get(dlmlock) == NULL
+ *
+ * guarantees that all osc references on dlmlock were
+ * released. osc_dlm_blocking_ast0() relies on that.
+ */
+ if (lock->cll_state < CLS_FREEING || olck->ols_has_ref) {
+ cl_lock_get_trust(lock);
+ lu_ref_add_atomic(&lock->cll_reference,
+ "ast", current);
+ } else
+ olck = NULL;
+ }
+ spin_unlock(&osc_ast_guard);
+ unlock_res_and_lock(dlm_lock);
+ return olck;
+}
+
+static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck)
+{
+ struct cl_lock *lock;
+
+ lock = olck->ols_cl.cls_lock;
+ lu_ref_del(&lock->cll_reference, "ast", current);
+ cl_lock_put(env, lock);
+}
+
+/**
+ * Updates object attributes from a lock value block (lvb) received together
+ * with the DLM lock reply from the server. Copy of osc_update_enqueue()
+ * logic.
+ *
+ * This can be optimized to not update attributes when lock is a result of a
+ * local match.
+ *
+ * Called under lock and resource spin-locks.
+ */
+static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck,
+ int rc)
+{
+ struct ost_lvb *lvb;
+ struct cl_object *obj;
+ struct lov_oinfo *oinfo;
+ struct cl_attr *attr;
+ unsigned valid;
+
+ ENTRY;
+
+ if (!(olck->ols_flags & LDLM_FL_LVB_READY))
+ RETURN_EXIT;
+
+ lvb = &olck->ols_lvb;
+ obj = olck->ols_cl.cls_obj;
+ oinfo = cl2osc(obj)->oo_oinfo;
+ attr = &osc_env_info(env)->oti_attr;
+ valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME | CAT_SIZE;
+ cl_lvb2attr(attr, lvb);
+
+ cl_object_attr_lock(obj);
+ if (rc == 0) {
+ struct ldlm_lock *dlmlock;
+ __u64 size;
+
+ dlmlock = olck->ols_lock;
+ LASSERT(dlmlock != NULL);
+
+ /* re-grab LVB from a dlm lock under DLM spin-locks. */
+ *lvb = *(struct ost_lvb *)dlmlock->l_lvb_data;
+ size = lvb->lvb_size;
+ /* Extend KMS up to the end of this lock and no further
+ * A lock on [x,y] means a KMS of up to y + 1 bytes! */
+ if (size > dlmlock->l_policy_data.l_extent.end)
+ size = dlmlock->l_policy_data.l_extent.end + 1;
+ if (size >= oinfo->loi_kms) {
+ LDLM_DEBUG(dlmlock, "lock acquired, setting rss="LPU64
+ ", kms="LPU64, lvb->lvb_size, size);
+ valid |= CAT_KMS;
+ attr->cat_kms = size;
+ } else {
+ LDLM_DEBUG(dlmlock, "lock acquired, setting rss="
+ LPU64"; leaving kms="LPU64", end="LPU64,
+ lvb->lvb_size, oinfo->loi_kms,
+ dlmlock->l_policy_data.l_extent.end);
+ }
+ ldlm_lock_allow_match_locked(dlmlock);
+ } else if (rc == -ENAVAIL && olck->ols_glimpse) {
+ CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving"
+ " kms="LPU64"\n", lvb->lvb_size, oinfo->loi_kms);
+ } else
+ valid = 0;
+
+ if (valid != 0)
+ cl_object_attr_set(env, obj, attr, valid);
+
+ cl_object_attr_unlock(obj);
+
+ EXIT;
+}
+
+/**
+ * Called when a lock is granted, from an upcall (when server returned a
+ * granted lock), or from completion AST, when server returned a blocked lock.
+ *
+ * Called under lock and resource spin-locks, that are released temporarily
+ * here.
+ */
+static void osc_lock_granted(const struct lu_env *env, struct osc_lock *olck,
+ struct ldlm_lock *dlmlock, int rc)
+{
+ struct ldlm_extent *ext;
+ struct cl_lock *lock;
+ struct cl_lock_descr *descr;
+
+ LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode);
+
+ ENTRY;
+ if (olck->ols_state < OLS_GRANTED) {
+ lock = olck->ols_cl.cls_lock;
+ ext = &dlmlock->l_policy_data.l_extent;
+ descr = &osc_env_info(env)->oti_descr;
+ descr->cld_obj = lock->cll_descr.cld_obj;
+
+ /* XXX check that ->l_granted_mode is valid. */
+ descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode);
+ descr->cld_start = cl_index(descr->cld_obj, ext->start);
+ descr->cld_end = cl_index(descr->cld_obj, ext->end);
+ descr->cld_gid = ext->gid;
+ /*
+ * tell upper layers the extent of the lock that was actually
+ * granted
+ */
+ olck->ols_state = OLS_GRANTED;
+ osc_lock_lvb_update(env, olck, rc);
+
+ /* release DLM spin-locks to allow cl_lock_{modify,signal}()
+ * to take a semaphore on a parent lock. This is safe, because
+ * spin-locks are needed to protect consistency of
+ * dlmlock->l_*_mode and LVB, and we have finished processing
+ * them. */
+ unlock_res_and_lock(dlmlock);
+ cl_lock_modify(env, lock, descr);
+ cl_lock_signal(env, lock);
+ LINVRNT(osc_lock_invariant(olck));
+ lock_res_and_lock(dlmlock);
+ }
+ EXIT;
+}
+
+static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck)
+
+{
+ struct ldlm_lock *dlmlock;
+
+ ENTRY;
+
+ dlmlock = ldlm_handle2lock_long(&olck->ols_handle, 0);
+ LASSERT(dlmlock != NULL);
+
+ lock_res_and_lock(dlmlock);
+ spin_lock(&osc_ast_guard);
+ LASSERT(dlmlock->l_ast_data == olck);
+ LASSERT(olck->ols_lock == NULL);
+ olck->ols_lock = dlmlock;
+ spin_unlock(&osc_ast_guard);
+
+ /*
+ * Lock might be not yet granted. In this case, completion ast
+ * (osc_ldlm_completion_ast()) comes later and finishes lock
+ * granting.
+ */
+ if (dlmlock->l_granted_mode == dlmlock->l_req_mode)
+ osc_lock_granted(env, olck, dlmlock, 0);
+ unlock_res_and_lock(dlmlock);
+
+ /*
+ * osc_enqueue_interpret() decrefs asynchronous locks, counter
+ * this.
+ */
+ ldlm_lock_addref(&olck->ols_handle, olck->ols_einfo.ei_mode);
+ olck->ols_hold = 1;
+
+ /* lock reference taken by ldlm_handle2lock_long() is owned by
+ * osc_lock and released in osc_lock_detach() */
+ lu_ref_add(&dlmlock->l_reference, "osc_lock", olck);
+ olck->ols_has_ref = 1;
+}
+
+/**
+ * Lock upcall function that is executed either when a reply to ENQUEUE rpc is
+ * received from a server, or after osc_enqueue_base() matched a local DLM
+ * lock.
+ */
+static int osc_lock_upcall(void *cookie, int errcode)
+{
+ struct osc_lock *olck = cookie;
+ struct cl_lock_slice *slice = &olck->ols_cl;
+ struct cl_lock *lock = slice->cls_lock;
+ struct lu_env *env;
+ struct cl_env_nest nest;
+
+ ENTRY;
+ env = cl_env_nested_get(&nest);
+ if (!IS_ERR(env)) {
+ int rc;
+
+ cl_lock_mutex_get(env, lock);
+
+ LASSERT(lock->cll_state >= CLS_QUEUING);
+ if (olck->ols_state == OLS_ENQUEUED) {
+ olck->ols_state = OLS_UPCALL_RECEIVED;
+ rc = ldlm_error2errno(errcode);
+ } else if (olck->ols_state == OLS_CANCELLED) {
+ rc = -EIO;
+ } else {
+ CERROR("Impossible state: %d\n", olck->ols_state);
+ LBUG();
+ }
+ if (rc) {
+ struct ldlm_lock *dlmlock;
+
+ dlmlock = ldlm_handle2lock(&olck->ols_handle);
+ if (dlmlock != NULL) {
+ lock_res_and_lock(dlmlock);
+ spin_lock(&osc_ast_guard);
+ LASSERT(olck->ols_lock == NULL);
+ dlmlock->l_ast_data = NULL;
+ olck->ols_handle.cookie = 0ULL;
+ spin_unlock(&osc_ast_guard);
+ ldlm_lock_fail_match_locked(dlmlock);
+ unlock_res_and_lock(dlmlock);
+ LDLM_LOCK_PUT(dlmlock);
+ }
+ } else {
+ if (olck->ols_glimpse)
+ olck->ols_glimpse = 0;
+ osc_lock_upcall0(env, olck);
+ }
+
+ /* Error handling, some errors are tolerable. */
+ if (olck->ols_locklessable && rc == -EUSERS) {
+ /* This is a tolerable error, turn this lock into
+ * lockless lock.
+ */
+ osc_object_set_contended(cl2osc(slice->cls_obj));
+ LASSERT(slice->cls_ops == &osc_lock_ops);
+
+ /* Change this lock to ldlmlock-less lock. */
+ osc_lock_to_lockless(env, olck, 1);
+ olck->ols_state = OLS_GRANTED;
+ rc = 0;
+ } else if (olck->ols_glimpse && rc == -ENAVAIL) {
+ osc_lock_lvb_update(env, olck, rc);
+ cl_lock_delete(env, lock);
+ /* Hide the error. */
+ rc = 0;
+ }
+
+ if (rc == 0) {
+ /* For AGL case, the RPC sponsor may exits the cl_lock
+ * processing without wait() called before related OSC
+ * lock upcall(). So update the lock status according
+ * to the enqueue result inside AGL upcall(). */
+ if (olck->ols_agl) {
+ lock->cll_flags |= CLF_FROM_UPCALL;
+ cl_wait_try(env, lock);
+ lock->cll_flags &= ~CLF_FROM_UPCALL;
+ if (!olck->ols_glimpse)
+ olck->ols_agl = 0;
+ }
+ cl_lock_signal(env, lock);
+ /* del user for lock upcall cookie */
+ cl_unuse_try(env, lock);
+ } else {
+ /* del user for lock upcall cookie */
+ cl_lock_user_del(env, lock);
+ cl_lock_error(env, lock, rc);
+ }
+
+ /* release cookie reference, acquired by osc_lock_enqueue() */
+ cl_lock_hold_release(env, lock, "upcall", lock);
+ cl_lock_mutex_put(env, lock);
+
+ lu_ref_del(&lock->cll_reference, "upcall", lock);
+ /* This maybe the last reference, so must be called after
+ * cl_lock_mutex_put(). */
+ cl_lock_put(env, lock);
+
+ cl_env_nested_put(&nest, env);
+ } else {
+ /* should never happen, similar to osc_ldlm_blocking_ast(). */
+ LBUG();
+ }
+ RETURN(errcode);
+}
+
+/**
+ * Core of osc_dlm_blocking_ast() logic.
+ */
+static void osc_lock_blocking(const struct lu_env *env,
+ struct ldlm_lock *dlmlock,
+ struct osc_lock *olck, int blocking)
+{
+ struct cl_lock *lock = olck->ols_cl.cls_lock;
+
+ LASSERT(olck->ols_lock == dlmlock);
+ CLASSERT(OLS_BLOCKED < OLS_CANCELLED);
+ LASSERT(!osc_lock_is_lockless(olck));
+
+ /*
+ * Lock might be still addref-ed here, if e.g., blocking ast
+ * is sent for a failed lock.
+ */
+ osc_lock_unhold(olck);
+
+ if (blocking && olck->ols_state < OLS_BLOCKED)
+ /*
+ * Move osc_lock into OLS_BLOCKED before canceling the lock,
+ * because it recursively re-enters osc_lock_blocking(), with
+ * the state set to OLS_CANCELLED.
+ */
+ olck->ols_state = OLS_BLOCKED;
+ /*
+ * cancel and destroy lock at least once no matter how blocking ast is
+ * entered (see comment above osc_ldlm_blocking_ast() for use
+ * cases). cl_lock_cancel() and cl_lock_delete() are idempotent.
+ */
+ cl_lock_cancel(env, lock);
+ cl_lock_delete(env, lock);
+}
+
+/**
+ * Helper for osc_dlm_blocking_ast() handling discrepancies between cl_lock
+ * and ldlm_lock caches.
+ */
+static int osc_dlm_blocking_ast0(const struct lu_env *env,
+ struct ldlm_lock *dlmlock,
+ void *data, int flag)
+{
+ struct osc_lock *olck;
+ struct cl_lock *lock;
+ int result;
+ int cancel;
+
+ LASSERT(flag == LDLM_CB_BLOCKING || flag == LDLM_CB_CANCELING);
+
+ cancel = 0;
+ olck = osc_ast_data_get(dlmlock);
+ if (olck != NULL) {
+ lock = olck->ols_cl.cls_lock;
+ cl_lock_mutex_get(env, lock);
+ LINVRNT(osc_lock_invariant(olck));
+ if (olck->ols_ast_wait) {
+ /* wake up osc_lock_use() */
+ cl_lock_signal(env, lock);
+ olck->ols_ast_wait = 0;
+ }
+ /*
+ * Lock might have been canceled while this thread was
+ * sleeping for lock mutex, but olck is pinned in memory.
+ */
+ if (olck == dlmlock->l_ast_data) {
+ /*
+ * NOTE: DLM sends blocking AST's for failed locks
+ * (that are still in pre-OLS_GRANTED state)
+ * too, and they have to be canceled otherwise
+ * DLM lock is never destroyed and stuck in
+ * the memory.
+ *
+ * Alternatively, ldlm_cli_cancel() can be
+ * called here directly for osc_locks with
+ * ols_state < OLS_GRANTED to maintain an
+ * invariant that ->clo_cancel() is only called
+ * for locks that were granted.
+ */
+ LASSERT(data == olck);
+ osc_lock_blocking(env, dlmlock,
+ olck, flag == LDLM_CB_BLOCKING);
+ } else
+ cancel = 1;
+ cl_lock_mutex_put(env, lock);
+ osc_ast_data_put(env, olck);
+ } else
+ /*
+ * DLM lock exists, but there is no cl_lock attached to it.
+ * This is a `normal' race. cl_object and its cl_lock's can be
+ * removed by memory pressure, together with all pages.
+ */
+ cancel = (flag == LDLM_CB_BLOCKING);
+
+ if (cancel) {
+ struct lustre_handle *lockh;
+
+ lockh = &osc_env_info(env)->oti_handle;
+ ldlm_lock2handle(dlmlock, lockh);
+ result = ldlm_cli_cancel(lockh, LCF_ASYNC);
+ } else
+ result = 0;
+ return result;
+}
+
+/**
+ * Blocking ast invoked by ldlm when dlm lock is either blocking progress of
+ * some other lock, or is canceled. This function is installed as a
+ * ldlm_lock::l_blocking_ast() for client extent locks.
+ *
+ * Control flow is tricky, because ldlm uses the same call-back
+ * (ldlm_lock::l_blocking_ast()) for both blocking and cancellation ast's.
+ *
+ * \param dlmlock lock for which ast occurred.
+ *
+ * \param new description of a conflicting lock in case of blocking ast.
+ *
+ * \param data value of dlmlock->l_ast_data
+ *
+ * \param flag LDLM_CB_BLOCKING or LDLM_CB_CANCELING. Used to distinguish
+ * cancellation and blocking ast's.
+ *
+ * Possible use cases:
+ *
+ * - ldlm calls dlmlock->l_blocking_ast(..., LDLM_CB_CANCELING) to cancel
+ * lock due to lock lru pressure, or explicit user request to purge
+ * locks.
+ *
+ * - ldlm calls dlmlock->l_blocking_ast(..., LDLM_CB_BLOCKING) to notify
+ * us that dlmlock conflicts with another lock that some client is
+ * enqueing. Lock is canceled.
+ *
+ * - cl_lock_cancel() is called. osc_lock_cancel() calls
+ * ldlm_cli_cancel() that calls
+ *
+ * dlmlock->l_blocking_ast(..., LDLM_CB_CANCELING)
+ *
+ * recursively entering osc_ldlm_blocking_ast().
+ *
+ * - client cancels lock voluntary (e.g., as a part of early cancellation):
+ *
+ * cl_lock_cancel()->
+ * osc_lock_cancel()->
+ * ldlm_cli_cancel()->
+ * dlmlock->l_blocking_ast(..., LDLM_CB_CANCELING)
+ *
+ */
+static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
+ struct ldlm_lock_desc *new, void *data,
+ int flag)
+{
+ struct lu_env *env;
+ struct cl_env_nest nest;
+ int result;
+
+ /*
+ * This can be called in the context of outer IO, e.g.,
+ *
+ * cl_enqueue()->...
+ * ->osc_enqueue_base()->...
+ * ->ldlm_prep_elc_req()->...
+ * ->ldlm_cancel_callback()->...
+ * ->osc_ldlm_blocking_ast()
+ *
+ * new environment has to be created to not corrupt outer context.
+ */
+ env = cl_env_nested_get(&nest);
+ if (!IS_ERR(env)) {
+ result = osc_dlm_blocking_ast0(env, dlmlock, data, flag);
+ cl_env_nested_put(&nest, env);
+ } else {
+ result = PTR_ERR(env);
+ /*
+ * XXX This should never happen, as cl_lock is
+ * stuck. Pre-allocated environment a la vvp_inode_fini_env
+ * should be used.
+ */
+ LBUG();
+ }
+ if (result != 0) {
+ if (result == -ENODATA)
+ result = 0;
+ else
+ CERROR("BAST failed: %d\n", result);
+ }
+ return result;
+}
+
+static int osc_ldlm_completion_ast(struct ldlm_lock *dlmlock,
+ __u64 flags, void *data)
+{
+ struct cl_env_nest nest;
+ struct lu_env *env;
+ struct osc_lock *olck;
+ struct cl_lock *lock;
+ int result;
+ int dlmrc;
+
+ /* first, do dlm part of the work */
+ dlmrc = ldlm_completion_ast_async(dlmlock, flags, data);
+ /* then, notify cl_lock */
+ env = cl_env_nested_get(&nest);
+ if (!IS_ERR(env)) {
+ olck = osc_ast_data_get(dlmlock);
+ if (olck != NULL) {
+ lock = olck->ols_cl.cls_lock;
+ cl_lock_mutex_get(env, lock);
+ /*
+ * ldlm_handle_cp_callback() copied LVB from request
+ * to lock->l_lvb_data, store it in osc_lock.
+ */
+ LASSERT(dlmlock->l_lvb_data != NULL);
+ lock_res_and_lock(dlmlock);
+ olck->ols_lvb = *(struct ost_lvb *)dlmlock->l_lvb_data;
+ if (olck->ols_lock == NULL) {
+ /*
+ * upcall (osc_lock_upcall()) hasn't yet been
+ * called. Do nothing now, upcall will bind
+ * olck to dlmlock and signal the waiters.
+ *
+ * This maintains an invariant that osc_lock
+ * and ldlm_lock are always bound when
+ * osc_lock is in OLS_GRANTED state.
+ */
+ } else if (dlmlock->l_granted_mode ==
+ dlmlock->l_req_mode) {
+ osc_lock_granted(env, olck, dlmlock, dlmrc);
+ }
+ unlock_res_and_lock(dlmlock);
+
+ if (dlmrc != 0) {
+ CL_LOCK_DEBUG(D_ERROR, env, lock,
+ "dlmlock returned %d\n", dlmrc);
+ cl_lock_error(env, lock, dlmrc);
+ }
+ cl_lock_mutex_put(env, lock);
+ osc_ast_data_put(env, olck);
+ result = 0;
+ } else
+ result = -ELDLM_NO_LOCK_DATA;
+ cl_env_nested_put(&nest, env);
+ } else
+ result = PTR_ERR(env);
+ return dlmrc ?: result;
+}
+
+static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
+{
+ struct ptlrpc_request *req = data;
+ struct osc_lock *olck;
+ struct cl_lock *lock;
+ struct cl_object *obj;
+ struct cl_env_nest nest;
+ struct lu_env *env;
+ struct ost_lvb *lvb;
+ struct req_capsule *cap;
+ int result;
+
+ LASSERT(lustre_msg_get_opc(req->rq_reqmsg) == LDLM_GL_CALLBACK);
+
+ env = cl_env_nested_get(&nest);
+ if (!IS_ERR(env)) {
+ /* osc_ast_data_get() has to go after environment is
+ * allocated, because osc_ast_data() acquires a
+ * reference to a lock, and it can only be released in
+ * environment.
+ */
+ olck = osc_ast_data_get(dlmlock);
+ if (olck != NULL) {
+ lock = olck->ols_cl.cls_lock;
+ /* Do not grab the mutex of cl_lock for glimpse.
+ * See LU-1274 for details.
+ * BTW, it's okay for cl_lock to be cancelled during
+ * this period because server can handle this race.
+ * See ldlm_server_glimpse_ast() for details.
+ * cl_lock_mutex_get(env, lock); */
+ cap = &req->rq_pill;
+ req_capsule_extend(cap, &RQF_LDLM_GL_CALLBACK);
+ req_capsule_set_size(cap, &RMF_DLM_LVB, RCL_SERVER,
+ sizeof *lvb);
+ result = req_capsule_server_pack(cap);
+ if (result == 0) {
+ lvb = req_capsule_server_get(cap, &RMF_DLM_LVB);
+ obj = lock->cll_descr.cld_obj;
+ result = cl_object_glimpse(env, obj, lvb);
+ }
+ if (!exp_connect_lvb_type(req->rq_export))
+ req_capsule_shrink(&req->rq_pill,
+ &RMF_DLM_LVB,
+ sizeof(struct ost_lvb_v1),
+ RCL_SERVER);
+ osc_ast_data_put(env, olck);
+ } else {
+ /*
+ * These errors are normal races, so we don't want to
+ * fill the console with messages by calling
+ * ptlrpc_error()
+ */
+ lustre_pack_reply(req, 1, NULL, NULL);
+ result = -ELDLM_NO_LOCK_DATA;
+ }
+ cl_env_nested_put(&nest, env);
+ } else
+ result = PTR_ERR(env);
+ req->rq_status = result;
+ return result;
+}
+
+static unsigned long osc_lock_weigh(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ /*
+ * don't need to grab coh_page_guard since we don't care the exact #
+ * of pages..
+ */
+ return cl_object_header(slice->cls_obj)->coh_pages;
+}
+
+/**
+ * Get the weight of dlm lock for early cancellation.
+ *
+ * XXX: it should return the pages covered by this \a dlmlock.
+ */
+static unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
+{
+ struct cl_env_nest nest;
+ struct lu_env *env;
+ struct osc_lock *lock;
+ struct cl_lock *cll;
+ unsigned long weight;
+ ENTRY;
+
+ might_sleep();
+ /*
+ * osc_ldlm_weigh_ast has a complex context since it might be called
+ * because of lock canceling, or from user's input. We have to make
+ * a new environment for it. Probably it is implementation safe to use
+ * the upper context because cl_lock_put don't modify environment
+ * variables. But in case of ..
+ */
+ env = cl_env_nested_get(&nest);
+ if (IS_ERR(env))
+ /* Mostly because lack of memory, tend to eliminate this lock*/
+ RETURN(0);
+
+ LASSERT(dlmlock->l_resource->lr_type == LDLM_EXTENT);
+ lock = osc_ast_data_get(dlmlock);
+ if (lock == NULL) {
+ /* cl_lock was destroyed because of memory pressure.
+ * It is much reasonable to assign this type of lock
+ * a lower cost.
+ */
+ GOTO(out, weight = 0);
+ }
+
+ cll = lock->ols_cl.cls_lock;
+ cl_lock_mutex_get(env, cll);
+ weight = cl_lock_weigh(env, cll);
+ cl_lock_mutex_put(env, cll);
+ osc_ast_data_put(env, lock);
+ EXIT;
+
+out:
+ cl_env_nested_put(&nest, env);
+ return weight;
+}
+
+static void osc_lock_build_einfo(const struct lu_env *env,
+ const struct cl_lock *clock,
+ struct osc_lock *lock,
+ struct ldlm_enqueue_info *einfo)
+{
+ enum cl_lock_mode mode;
+
+ mode = clock->cll_descr.cld_mode;
+ if (mode == CLM_PHANTOM)
+ /*
+ * For now, enqueue all glimpse locks in read mode. In the
+ * future, client might choose to enqueue LCK_PW lock for
+ * glimpse on a file opened for write.
+ */
+ mode = CLM_READ;
+
+ einfo->ei_type = LDLM_EXTENT;
+ einfo->ei_mode = osc_cl_lock2ldlm(mode);
+ einfo->ei_cb_bl = osc_ldlm_blocking_ast;
+ einfo->ei_cb_cp = osc_ldlm_completion_ast;
+ einfo->ei_cb_gl = osc_ldlm_glimpse_ast;
+ einfo->ei_cb_wg = osc_ldlm_weigh_ast;
+ einfo->ei_cbdata = lock; /* value to be put into ->l_ast_data */
+}
+
+/**
+ * Determine if the lock should be converted into a lockless lock.
+ *
+ * Steps to check:
+ * - if the lock has an explicite requirment for a non-lockless lock;
+ * - if the io lock request type ci_lockreq;
+ * - send the enqueue rpc to ost to make the further decision;
+ * - special treat to truncate lockless lock
+ *
+ * Additional policy can be implemented here, e.g., never do lockless-io
+ * for large extents.
+ */
+static void osc_lock_to_lockless(const struct lu_env *env,
+ struct osc_lock *ols, int force)
+{
+ struct cl_lock_slice *slice = &ols->ols_cl;
+
+ LASSERT(ols->ols_state == OLS_NEW ||
+ ols->ols_state == OLS_UPCALL_RECEIVED);
+
+ if (force) {
+ ols->ols_locklessable = 1;
+ slice->cls_ops = &osc_lock_lockless_ops;
+ } else {
+ struct osc_io *oio = osc_env_io(env);
+ struct cl_io *io = oio->oi_cl.cis_io;
+ struct cl_object *obj = slice->cls_obj;
+ struct osc_object *oob = cl2osc(obj);
+ const struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev);
+ struct obd_connect_data *ocd;
+
+ LASSERT(io->ci_lockreq == CILR_MANDATORY ||
+ io->ci_lockreq == CILR_MAYBE ||
+ io->ci_lockreq == CILR_NEVER);
+
+ ocd = &class_exp2cliimp(osc_export(oob))->imp_connect_data;
+ ols->ols_locklessable = (io->ci_type != CIT_SETATTR) &&
+ (io->ci_lockreq == CILR_MAYBE) &&
+ (ocd->ocd_connect_flags & OBD_CONNECT_SRVLOCK);
+ if (io->ci_lockreq == CILR_NEVER ||
+ /* lockless IO */
+ (ols->ols_locklessable && osc_object_is_contended(oob)) ||
+ /* lockless truncate */
+ (cl_io_is_trunc(io) &&
+ (ocd->ocd_connect_flags & OBD_CONNECT_TRUNCLOCK) &&
+ osd->od_lockless_truncate)) {
+ ols->ols_locklessable = 1;
+ slice->cls_ops = &osc_lock_lockless_ops;
+ }
+ }
+ LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
+}
+
+static int osc_lock_compatible(const struct osc_lock *qing,
+ const struct osc_lock *qed)
+{
+ enum cl_lock_mode qing_mode;
+ enum cl_lock_mode qed_mode;
+
+ qing_mode = qing->ols_cl.cls_lock->cll_descr.cld_mode;
+ if (qed->ols_glimpse &&
+ (qed->ols_state >= OLS_UPCALL_RECEIVED || qing_mode == CLM_READ))
+ return 1;
+
+ qed_mode = qed->ols_cl.cls_lock->cll_descr.cld_mode;
+ return ((qing_mode == CLM_READ) && (qed_mode == CLM_READ));
+}
+
+/**
+ * Cancel all conflicting locks and wait for them to be destroyed.
+ *
+ * This function is used for two purposes:
+ *
+ * - early cancel all conflicting locks before starting IO, and
+ *
+ * - guarantee that pages added to the page cache by lockless IO are never
+ * covered by locks other than lockless IO lock, and, hence, are not
+ * visible to other threads.
+ */
+static int osc_lock_enqueue_wait(const struct lu_env *env,
+ const struct osc_lock *olck)
+{
+ struct cl_lock *lock = olck->ols_cl.cls_lock;
+ struct cl_lock_descr *descr = &lock->cll_descr;
+ struct cl_object_header *hdr = cl_object_header(descr->cld_obj);
+ struct cl_lock *scan;
+ struct cl_lock *conflict= NULL;
+ int lockless = osc_lock_is_lockless(olck);
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(cl_lock_is_mutexed(lock));
+
+ /* make it enqueue anyway for glimpse lock, because we actually
+ * don't need to cancel any conflicting locks. */
+ if (olck->ols_glimpse)
+ return 0;
+
+ spin_lock(&hdr->coh_lock_guard);
+ list_for_each_entry(scan, &hdr->coh_locks, cll_linkage) {
+ struct cl_lock_descr *cld = &scan->cll_descr;
+ const struct osc_lock *scan_ols;
+
+ if (scan == lock)
+ break;
+
+ if (scan->cll_state < CLS_QUEUING ||
+ scan->cll_state == CLS_FREEING ||
+ cld->cld_start > descr->cld_end ||
+ cld->cld_end < descr->cld_start)
+ continue;
+
+ /* overlapped and living locks. */
+
+ /* We're not supposed to give up group lock. */
+ if (scan->cll_descr.cld_mode == CLM_GROUP) {
+ LASSERT(descr->cld_mode != CLM_GROUP ||
+ descr->cld_gid != scan->cll_descr.cld_gid);
+ continue;
+ }
+
+ scan_ols = osc_lock_at(scan);
+
+ /* We need to cancel the compatible locks if we're enqueuing
+ * a lockless lock, for example:
+ * imagine that client has PR lock on [0, 1000], and thread T0
+ * is doing lockless IO in [500, 1500] region. Concurrent
+ * thread T1 can see lockless data in [500, 1000], which is
+ * wrong, because these data are possibly stale. */
+ if (!lockless && osc_lock_compatible(olck, scan_ols))
+ continue;
+
+ cl_lock_get_trust(scan);
+ conflict = scan;
+ break;
+ }
+ spin_unlock(&hdr->coh_lock_guard);
+
+ if (conflict) {
+ if (lock->cll_descr.cld_mode == CLM_GROUP) {
+ /* we want a group lock but a previous lock request
+ * conflicts, we do not wait but return 0 so the
+ * request is send to the server
+ */
+ CDEBUG(D_DLMTRACE, "group lock %p is conflicted "
+ "with %p, no wait, send to server\n",
+ lock, conflict);
+ cl_lock_put(env, conflict);
+ rc = 0;
+ } else {
+ CDEBUG(D_DLMTRACE, "lock %p is conflicted with %p, "
+ "will wait\n",
+ lock, conflict);
+ LASSERT(lock->cll_conflict == NULL);
+ lu_ref_add(&conflict->cll_reference, "cancel-wait",
+ lock);
+ lock->cll_conflict = conflict;
+ rc = CLO_WAIT;
+ }
+ }
+ RETURN(rc);
+}
+
+/**
+ * Implementation of cl_lock_operations::clo_enqueue() method for osc
+ * layer. This initiates ldlm enqueue:
+ *
+ * - cancels conflicting locks early (osc_lock_enqueue_wait());
+ *
+ * - calls osc_enqueue_base() to do actual enqueue.
+ *
+ * osc_enqueue_base() is supplied with an upcall function that is executed
+ * when lock is received either after a local cached ldlm lock is matched, or
+ * when a reply from the server is received.
+ *
+ * This function does not wait for the network communication to complete.
+ */
+static int osc_lock_enqueue(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ struct cl_io *unused, __u32 enqflags)
+{
+ struct osc_lock *ols = cl2osc_lock(slice);
+ struct cl_lock *lock = ols->ols_cl.cls_lock;
+ int result;
+ ENTRY;
+
+ LASSERT(cl_lock_is_mutexed(lock));
+ LASSERTF(ols->ols_state == OLS_NEW,
+ "Impossible state: %d\n", ols->ols_state);
+
+ LASSERTF(ergo(ols->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ),
+ "lock = %p, ols = %p\n", lock, ols);
+
+ result = osc_lock_enqueue_wait(env, ols);
+ if (result == 0) {
+ if (!osc_lock_is_lockless(ols)) {
+ struct osc_object *obj = cl2osc(slice->cls_obj);
+ struct osc_thread_info *info = osc_env_info(env);
+ struct ldlm_res_id *resname = &info->oti_resname;
+ ldlm_policy_data_t *policy = &info->oti_policy;
+ struct ldlm_enqueue_info *einfo = &ols->ols_einfo;
+
+ /* lock will be passed as upcall cookie,
+ * hold ref to prevent to be released. */
+ cl_lock_hold_add(env, lock, "upcall", lock);
+ /* a user for lock also */
+ cl_lock_user_add(env, lock);
+ ols->ols_state = OLS_ENQUEUED;
+
+ /*
+ * XXX: this is possible blocking point as
+ * ldlm_lock_match(LDLM_FL_LVB_READY) waits for
+ * LDLM_CP_CALLBACK.
+ */
+ ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname);
+ osc_lock_build_policy(env, lock, policy);
+ result = osc_enqueue_base(osc_export(obj), resname,
+ &ols->ols_flags, policy,
+ &ols->ols_lvb,
+ obj->oo_oinfo->loi_kms_valid,
+ osc_lock_upcall,
+ ols, einfo, &ols->ols_handle,
+ PTLRPCD_SET, 1, ols->ols_agl);
+ if (result != 0) {
+ cl_lock_user_del(env, lock);
+ cl_lock_unhold(env, lock, "upcall", lock);
+ if (unlikely(result == -ECANCELED)) {
+ ols->ols_state = OLS_NEW;
+ result = 0;
+ }
+ }
+ } else {
+ ols->ols_state = OLS_GRANTED;
+ ols->ols_owner = osc_env_io(env);
+ }
+ }
+ LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
+ RETURN(result);
+}
+
+static int osc_lock_wait(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct osc_lock *olck = cl2osc_lock(slice);
+ struct cl_lock *lock = olck->ols_cl.cls_lock;
+
+ LINVRNT(osc_lock_invariant(olck));
+
+ if (olck->ols_glimpse && olck->ols_state >= OLS_UPCALL_RECEIVED) {
+ if (olck->ols_flags & LDLM_FL_LVB_READY) {
+ return 0;
+ } else if (olck->ols_agl) {
+ if (lock->cll_flags & CLF_FROM_UPCALL)
+ /* It is from enqueue RPC reply upcall for
+ * updating state. Do not re-enqueue. */
+ return -ENAVAIL;
+ else
+ olck->ols_state = OLS_NEW;
+ } else {
+ LASSERT(lock->cll_error);
+ return lock->cll_error;
+ }
+ }
+
+ if (olck->ols_state == OLS_NEW) {
+ int rc;
+
+ LASSERT(olck->ols_agl);
+ olck->ols_agl = 0;
+ rc = osc_lock_enqueue(env, slice, NULL, CEF_ASYNC | CEF_MUST);
+ if (rc != 0)
+ return rc;
+ else
+ return CLO_REENQUEUED;
+ }
+
+ LASSERT(equi(olck->ols_state >= OLS_UPCALL_RECEIVED &&
+ lock->cll_error == 0, olck->ols_lock != NULL));
+
+ return lock->cll_error ?: olck->ols_state >= OLS_GRANTED ? 0 : CLO_WAIT;
+}
+
+/**
+ * An implementation of cl_lock_operations::clo_use() method that pins cached
+ * lock.
+ */
+static int osc_lock_use(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct osc_lock *olck = cl2osc_lock(slice);
+ int rc;
+
+ LASSERT(!olck->ols_hold);
+
+ /*
+ * Atomically check for LDLM_FL_CBPENDING and addref a lock if this
+ * flag is not set. This protects us from a concurrent blocking ast.
+ */
+ rc = ldlm_lock_addref_try(&olck->ols_handle, olck->ols_einfo.ei_mode);
+ if (rc == 0) {
+ olck->ols_hold = 1;
+ olck->ols_state = OLS_GRANTED;
+ } else {
+ struct cl_lock *lock;
+
+ /*
+ * Lock is being cancelled somewhere within
+ * ldlm_handle_bl_callback(): LDLM_FL_CBPENDING is already
+ * set, but osc_ldlm_blocking_ast() hasn't yet acquired
+ * cl_lock mutex.
+ */
+ lock = slice->cls_lock;
+ LASSERT(lock->cll_state == CLS_INTRANSIT);
+ LASSERT(lock->cll_users > 0);
+ /* set a flag for osc_dlm_blocking_ast0() to signal the
+ * lock.*/
+ olck->ols_ast_wait = 1;
+ rc = CLO_WAIT;
+ }
+ return rc;
+}
+
+static int osc_lock_flush(struct osc_lock *ols, int discard)
+{
+ struct cl_lock *lock = ols->ols_cl.cls_lock;
+ struct cl_env_nest nest;
+ struct lu_env *env;
+ int result = 0;
+ ENTRY;
+
+ env = cl_env_nested_get(&nest);
+ if (!IS_ERR(env)) {
+ struct osc_object *obj = cl2osc(ols->ols_cl.cls_obj);
+ struct cl_lock_descr *descr = &lock->cll_descr;
+ int rc = 0;
+
+ if (descr->cld_mode >= CLM_WRITE) {
+ result = osc_cache_writeback_range(env, obj,
+ descr->cld_start, descr->cld_end,
+ 1, discard);
+ LDLM_DEBUG(ols->ols_lock,
+ "lock %p: %d pages were %s.\n", lock, result,
+ discard ? "discarded" : "written");
+ if (result > 0)
+ result = 0;
+ }
+
+ rc = cl_lock_discard_pages(env, lock);
+ if (result == 0 && rc < 0)
+ result = rc;
+
+ cl_env_nested_put(&nest, env);
+ } else
+ result = PTR_ERR(env);
+ if (result == 0) {
+ ols->ols_flush = 1;
+ LINVRNT(!osc_lock_has_pages(ols));
+ }
+ RETURN(result);
+}
+
+/**
+ * Implements cl_lock_operations::clo_cancel() method for osc layer. This is
+ * called (as part of cl_lock_cancel()) when lock is canceled either voluntary
+ * (LRU pressure, early cancellation, umount, etc.) or due to the conflict
+ * with some other lock some where in the cluster. This function does the
+ * following:
+ *
+ * - invalidates all pages protected by this lock (after sending dirty
+ * ones to the server, as necessary);
+ *
+ * - decref's underlying ldlm lock;
+ *
+ * - cancels ldlm lock (ldlm_cli_cancel()).
+ */
+static void osc_lock_cancel(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct cl_lock *lock = slice->cls_lock;
+ struct osc_lock *olck = cl2osc_lock(slice);
+ struct ldlm_lock *dlmlock = olck->ols_lock;
+ int result = 0;
+ int discard;
+
+ LASSERT(cl_lock_is_mutexed(lock));
+ LINVRNT(osc_lock_invariant(olck));
+
+ if (dlmlock != NULL) {
+ int do_cancel;
+
+ discard = !!(dlmlock->l_flags & LDLM_FL_DISCARD_DATA);
+ if (olck->ols_state >= OLS_GRANTED)
+ result = osc_lock_flush(olck, discard);
+ osc_lock_unhold(olck);
+
+ lock_res_and_lock(dlmlock);
+ /* Now that we're the only user of dlm read/write reference,
+ * mostly the ->l_readers + ->l_writers should be zero.
+ * However, there is a corner case.
+ * See bug 18829 for details.*/
+ do_cancel = (dlmlock->l_readers == 0 &&
+ dlmlock->l_writers == 0);
+ dlmlock->l_flags |= LDLM_FL_CBPENDING;
+ unlock_res_and_lock(dlmlock);
+ if (do_cancel)
+ result = ldlm_cli_cancel(&olck->ols_handle, LCF_ASYNC);
+ if (result < 0)
+ CL_LOCK_DEBUG(D_ERROR, env, lock,
+ "lock %p cancel failure with error(%d)\n",
+ lock, result);
+ }
+ olck->ols_state = OLS_CANCELLED;
+ olck->ols_flags &= ~LDLM_FL_LVB_READY;
+ osc_lock_detach(env, olck);
+}
+
+static int osc_lock_has_pages(struct osc_lock *olck)
+{
+ return 0;
+}
+
+static void osc_lock_delete(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct osc_lock *olck;
+
+ olck = cl2osc_lock(slice);
+ if (olck->ols_glimpse) {
+ LASSERT(!olck->ols_hold);
+ LASSERT(!olck->ols_lock);
+ return;
+ }
+
+ LINVRNT(osc_lock_invariant(olck));
+ LINVRNT(!osc_lock_has_pages(olck));
+
+ osc_lock_unhold(olck);
+ osc_lock_detach(env, olck);
+}
+
+/**
+ * Implements cl_lock_operations::clo_state() method for osc layer.
+ *
+ * Maintains osc_lock::ols_owner field.
+ *
+ * This assumes that lock always enters CLS_HELD (from some other state) in
+ * the same IO context as one that requested the lock. This should not be a
+ * problem, because context is by definition shared by all activity pertaining
+ * to the same high-level IO.
+ */
+static void osc_lock_state(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ enum cl_lock_state state)
+{
+ struct osc_lock *lock = cl2osc_lock(slice);
+
+ /*
+ * XXX multiple io contexts can use the lock at the same time.
+ */
+ LINVRNT(osc_lock_invariant(lock));
+ if (state == CLS_HELD && slice->cls_lock->cll_state != CLS_HELD) {
+ struct osc_io *oio = osc_env_io(env);
+
+ LASSERT(lock->ols_owner == NULL);
+ lock->ols_owner = oio;
+ } else if (state != CLS_HELD)
+ lock->ols_owner = NULL;
+}
+
+static int osc_lock_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct cl_lock_slice *slice)
+{
+ struct osc_lock *lock = cl2osc_lock(slice);
+
+ /*
+ * XXX print ldlm lock and einfo properly.
+ */
+ (*p)(env, cookie, "%p %#16llx "LPX64" %d %p ",
+ lock->ols_lock, lock->ols_flags, lock->ols_handle.cookie,
+ lock->ols_state, lock->ols_owner);
+ osc_lvb_print(env, cookie, p, &lock->ols_lvb);
+ return 0;
+}
+
+static int osc_lock_fits_into(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ const struct cl_lock_descr *need,
+ const struct cl_io *io)
+{
+ struct osc_lock *ols = cl2osc_lock(slice);
+
+ if (need->cld_enq_flags & CEF_NEVER)
+ return 0;
+
+ if (ols->ols_state >= OLS_CANCELLED)
+ return 0;
+
+ if (need->cld_mode == CLM_PHANTOM) {
+ if (ols->ols_agl)
+ return !(ols->ols_state > OLS_RELEASED);
+
+ /*
+ * Note: the QUEUED lock can't be matched here, otherwise
+ * it might cause the deadlocks.
+ * In read_process,
+ * P1: enqueued read lock, create sublock1
+ * P2: enqueued write lock, create sublock2(conflicted
+ * with sublock1).
+ * P1: Grant read lock.
+ * P1: enqueued glimpse lock(with holding sublock1_read),
+ * matched with sublock2, waiting sublock2 to be granted.
+ * But sublock2 can not be granted, because P1
+ * will not release sublock1. Bang!
+ */
+ if (ols->ols_state < OLS_GRANTED ||
+ ols->ols_state > OLS_RELEASED)
+ return 0;
+ } else if (need->cld_enq_flags & CEF_MUST) {
+ /*
+ * If the lock hasn't ever enqueued, it can't be matched
+ * because enqueue process brings in many information
+ * which can be used to determine things such as lockless,
+ * CEF_MUST, etc.
+ */
+ if (ols->ols_state < OLS_UPCALL_RECEIVED &&
+ ols->ols_locklessable)
+ return 0;
+ }
+ return 1;
+}
+
+static const struct cl_lock_operations osc_lock_ops = {
+ .clo_fini = osc_lock_fini,
+ .clo_enqueue = osc_lock_enqueue,
+ .clo_wait = osc_lock_wait,
+ .clo_unuse = osc_lock_unuse,
+ .clo_use = osc_lock_use,
+ .clo_delete = osc_lock_delete,
+ .clo_state = osc_lock_state,
+ .clo_cancel = osc_lock_cancel,
+ .clo_weigh = osc_lock_weigh,
+ .clo_print = osc_lock_print,
+ .clo_fits_into = osc_lock_fits_into,
+};
+
+static int osc_lock_lockless_unuse(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct osc_lock *ols = cl2osc_lock(slice);
+ struct cl_lock *lock = slice->cls_lock;
+
+ LASSERT(ols->ols_state == OLS_GRANTED);
+ LINVRNT(osc_lock_invariant(ols));
+
+ cl_lock_cancel(env, lock);
+ cl_lock_delete(env, lock);
+ return 0;
+}
+
+static void osc_lock_lockless_cancel(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct osc_lock *ols = cl2osc_lock(slice);
+ int result;
+
+ result = osc_lock_flush(ols, 0);
+ if (result)
+ CERROR("Pages for lockless lock %p were not purged(%d)\n",
+ ols, result);
+ ols->ols_state = OLS_CANCELLED;
+}
+
+static int osc_lock_lockless_wait(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct osc_lock *olck = cl2osc_lock(slice);
+ struct cl_lock *lock = olck->ols_cl.cls_lock;
+
+ LINVRNT(osc_lock_invariant(olck));
+ LASSERT(olck->ols_state >= OLS_UPCALL_RECEIVED);
+
+ return lock->cll_error;
+}
+
+static void osc_lock_lockless_state(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ enum cl_lock_state state)
+{
+ struct osc_lock *lock = cl2osc_lock(slice);
+
+ LINVRNT(osc_lock_invariant(lock));
+ if (state == CLS_HELD) {
+ struct osc_io *oio = osc_env_io(env);
+
+ LASSERT(ergo(lock->ols_owner, lock->ols_owner == oio));
+ lock->ols_owner = oio;
+
+ /* set the io to be lockless if this lock is for io's
+ * host object */
+ if (cl_object_same(oio->oi_cl.cis_obj, slice->cls_obj))
+ oio->oi_lockless = 1;
+ }
+}
+
+static int osc_lock_lockless_fits_into(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ const struct cl_lock_descr *need,
+ const struct cl_io *io)
+{
+ struct osc_lock *lock = cl2osc_lock(slice);
+
+ if (!(need->cld_enq_flags & CEF_NEVER))
+ return 0;
+
+ /* lockless lock should only be used by its owning io. b22147 */
+ return (lock->ols_owner == osc_env_io(env));
+}
+
+static const struct cl_lock_operations osc_lock_lockless_ops = {
+ .clo_fini = osc_lock_fini,
+ .clo_enqueue = osc_lock_enqueue,
+ .clo_wait = osc_lock_lockless_wait,
+ .clo_unuse = osc_lock_lockless_unuse,
+ .clo_state = osc_lock_lockless_state,
+ .clo_fits_into = osc_lock_lockless_fits_into,
+ .clo_cancel = osc_lock_lockless_cancel,
+ .clo_print = osc_lock_print
+};
+
+int osc_lock_init(const struct lu_env *env,
+ struct cl_object *obj, struct cl_lock *lock,
+ const struct cl_io *unused)
+{
+ struct osc_lock *clk;
+ int result;
+
+ OBD_SLAB_ALLOC_PTR_GFP(clk, osc_lock_kmem, __GFP_IO);
+ if (clk != NULL) {
+ __u32 enqflags = lock->cll_descr.cld_enq_flags;
+
+ osc_lock_build_einfo(env, lock, clk, &clk->ols_einfo);
+ atomic_set(&clk->ols_pageref, 0);
+ clk->ols_state = OLS_NEW;
+
+ clk->ols_flags = osc_enq2ldlm_flags(enqflags);
+ clk->ols_agl = !!(enqflags & CEF_AGL);
+ if (clk->ols_agl)
+ clk->ols_flags |= LDLM_FL_BLOCK_NOWAIT;
+ if (clk->ols_flags & LDLM_FL_HAS_INTENT)
+ clk->ols_glimpse = 1;
+
+ cl_lock_slice_add(lock, &clk->ols_cl, obj, &osc_lock_ops);
+
+ if (!(enqflags & CEF_MUST))
+ /* try to convert this lock to a lockless lock */
+ osc_lock_to_lockless(env, clk, (enqflags & CEF_NEVER));
+ if (clk->ols_locklessable && !(enqflags & CEF_DISCARD_DATA))
+ clk->ols_flags |= LDLM_FL_DENY_ON_CONTENTION;
+
+ LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx\n",
+ lock, clk, clk->ols_flags);
+
+ result = 0;
+ } else
+ result = -ENOMEM;
+ return result;
+}
+
+int osc_dlm_lock_pageref(struct ldlm_lock *dlm)
+{
+ struct osc_lock *olock;
+ int rc = 0;
+
+ spin_lock(&osc_ast_guard);
+ olock = dlm->l_ast_data;
+ /*
+ * there's a very rare race with osc_page_addref_lock(), but that
+ * doesn't matter because in the worst case we don't cancel a lock
+ * which we actually can, that's no harm.
+ */
+ if (olock != NULL &&
+ atomic_add_return(_PAGEREF_MAGIC,
+ &olock->ols_pageref) != _PAGEREF_MAGIC) {
+ atomic_sub(_PAGEREF_MAGIC, &olock->ols_pageref);
+ rc = 1;
+ }
+ spin_unlock(&osc_ast_guard);
+ return rc;
+}
+
+/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c
new file mode 100644
index 000000000000..ca94e6331381
--- /dev/null
+++ b/drivers/staging/lustre/lustre/osc/osc_object.c
@@ -0,0 +1,275 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_object for OSC layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_OSC
+
+#include "osc_cl_internal.h"
+
+/** \addtogroup osc
+ * @{
+ */
+
+/*****************************************************************************
+ *
+ * Type conversions.
+ *
+ */
+
+static struct lu_object *osc2lu(struct osc_object *osc)
+{
+ return &osc->oo_cl.co_lu;
+}
+
+static struct osc_object *lu2osc(const struct lu_object *obj)
+{
+ LINVRNT(osc_is_object(obj));
+ return container_of0(obj, struct osc_object, oo_cl.co_lu);
+}
+
+/*****************************************************************************
+ *
+ * Object operations.
+ *
+ */
+
+static int osc_object_init(const struct lu_env *env, struct lu_object *obj,
+ const struct lu_object_conf *conf)
+{
+ struct osc_object *osc = lu2osc(obj);
+ const struct cl_object_conf *cconf = lu2cl_conf(conf);
+ int i;
+
+ osc->oo_oinfo = cconf->u.coc_oinfo;
+ spin_lock_init(&osc->oo_seatbelt);
+ for (i = 0; i < CRT_NR; ++i)
+ INIT_LIST_HEAD(&osc->oo_inflight[i]);
+
+ INIT_LIST_HEAD(&osc->oo_ready_item);
+ INIT_LIST_HEAD(&osc->oo_hp_ready_item);
+ INIT_LIST_HEAD(&osc->oo_write_item);
+ INIT_LIST_HEAD(&osc->oo_read_item);
+
+ osc->oo_root.rb_node = NULL;
+ INIT_LIST_HEAD(&osc->oo_hp_exts);
+ INIT_LIST_HEAD(&osc->oo_urgent_exts);
+ INIT_LIST_HEAD(&osc->oo_rpc_exts);
+ INIT_LIST_HEAD(&osc->oo_reading_exts);
+ atomic_set(&osc->oo_nr_reads, 0);
+ atomic_set(&osc->oo_nr_writes, 0);
+ spin_lock_init(&osc->oo_lock);
+
+ cl_object_page_init(lu2cl(obj), sizeof(struct osc_page));
+
+ return 0;
+}
+
+static void osc_object_free(const struct lu_env *env, struct lu_object *obj)
+{
+ struct osc_object *osc = lu2osc(obj);
+ int i;
+
+ for (i = 0; i < CRT_NR; ++i)
+ LASSERT(list_empty(&osc->oo_inflight[i]));
+
+ LASSERT(list_empty(&osc->oo_ready_item));
+ LASSERT(list_empty(&osc->oo_hp_ready_item));
+ LASSERT(list_empty(&osc->oo_write_item));
+ LASSERT(list_empty(&osc->oo_read_item));
+
+ LASSERT(osc->oo_root.rb_node == NULL);
+ LASSERT(list_empty(&osc->oo_hp_exts));
+ LASSERT(list_empty(&osc->oo_urgent_exts));
+ LASSERT(list_empty(&osc->oo_rpc_exts));
+ LASSERT(list_empty(&osc->oo_reading_exts));
+ LASSERT(atomic_read(&osc->oo_nr_reads) == 0);
+ LASSERT(atomic_read(&osc->oo_nr_writes) == 0);
+
+ lu_object_fini(obj);
+ OBD_SLAB_FREE_PTR(osc, osc_object_kmem);
+}
+
+int osc_lvb_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct ost_lvb *lvb)
+{
+ return (*p)(env, cookie, "size: "LPU64" mtime: "LPU64" atime: "LPU64" "
+ "ctime: "LPU64" blocks: "LPU64,
+ lvb->lvb_size, lvb->lvb_mtime, lvb->lvb_atime,
+ lvb->lvb_ctime, lvb->lvb_blocks);
+}
+
+static int osc_object_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct lu_object *obj)
+{
+ struct osc_object *osc = lu2osc(obj);
+ struct lov_oinfo *oinfo = osc->oo_oinfo;
+ struct osc_async_rc *ar = &oinfo->loi_ar;
+
+ (*p)(env, cookie, "id: "DOSTID" "
+ "idx: %d gen: %d kms_valid: %u kms "LPU64" "
+ "rc: %d force_sync: %d min_xid: "LPU64" ",
+ POSTID(&oinfo->loi_oi), oinfo->loi_ost_idx,
+ oinfo->loi_ost_gen, oinfo->loi_kms_valid, oinfo->loi_kms,
+ ar->ar_rc, ar->ar_force_sync, ar->ar_min_xid);
+ osc_lvb_print(env, cookie, p, &oinfo->loi_lvb);
+ return 0;
+}
+
+
+static int osc_attr_get(const struct lu_env *env, struct cl_object *obj,
+ struct cl_attr *attr)
+{
+ struct lov_oinfo *oinfo = cl2osc(obj)->oo_oinfo;
+
+ cl_lvb2attr(attr, &oinfo->loi_lvb);
+ attr->cat_kms = oinfo->loi_kms_valid ? oinfo->loi_kms : 0;
+ return 0;
+}
+
+int osc_attr_set(const struct lu_env *env, struct cl_object *obj,
+ const struct cl_attr *attr, unsigned valid)
+{
+ struct lov_oinfo *oinfo = cl2osc(obj)->oo_oinfo;
+ struct ost_lvb *lvb = &oinfo->loi_lvb;
+
+ if (valid & CAT_SIZE)
+ lvb->lvb_size = attr->cat_size;
+ if (valid & CAT_MTIME)
+ lvb->lvb_mtime = attr->cat_mtime;
+ if (valid & CAT_ATIME)
+ lvb->lvb_atime = attr->cat_atime;
+ if (valid & CAT_CTIME)
+ lvb->lvb_ctime = attr->cat_ctime;
+ if (valid & CAT_BLOCKS)
+ lvb->lvb_blocks = attr->cat_blocks;
+ if (valid & CAT_KMS) {
+ CDEBUG(D_CACHE, "set kms from "LPU64"to "LPU64"\n",
+ oinfo->loi_kms, (__u64)attr->cat_kms);
+ loi_kms_set(oinfo, attr->cat_kms);
+ }
+ return 0;
+}
+
+static int osc_object_glimpse(const struct lu_env *env,
+ const struct cl_object *obj, struct ost_lvb *lvb)
+{
+ struct lov_oinfo *oinfo = cl2osc(obj)->oo_oinfo;
+
+ ENTRY;
+ lvb->lvb_size = oinfo->loi_kms;
+ lvb->lvb_blocks = oinfo->loi_lvb.lvb_blocks;
+ RETURN(0);
+}
+
+
+void osc_object_set_contended(struct osc_object *obj)
+{
+ obj->oo_contention_time = cfs_time_current();
+ /* mb(); */
+ obj->oo_contended = 1;
+}
+
+void osc_object_clear_contended(struct osc_object *obj)
+{
+ obj->oo_contended = 0;
+}
+
+int osc_object_is_contended(struct osc_object *obj)
+{
+ struct osc_device *dev = lu2osc_dev(obj->oo_cl.co_lu.lo_dev);
+ int osc_contention_time = dev->od_contention_time;
+ cfs_time_t cur_time = cfs_time_current();
+ cfs_time_t retry_time;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_OBJECT_CONTENTION))
+ return 1;
+
+ if (!obj->oo_contended)
+ return 0;
+
+ /*
+ * I like copy-paste. the code is copied from
+ * ll_file_is_contended.
+ */
+ retry_time = cfs_time_add(obj->oo_contention_time,
+ cfs_time_seconds(osc_contention_time));
+ if (cfs_time_after(cur_time, retry_time)) {
+ osc_object_clear_contended(obj);
+ return 0;
+ }
+ return 1;
+}
+
+static const struct cl_object_operations osc_ops = {
+ .coo_page_init = osc_page_init,
+ .coo_lock_init = osc_lock_init,
+ .coo_io_init = osc_io_init,
+ .coo_attr_get = osc_attr_get,
+ .coo_attr_set = osc_attr_set,
+ .coo_glimpse = osc_object_glimpse
+};
+
+static const struct lu_object_operations osc_lu_obj_ops = {
+ .loo_object_init = osc_object_init,
+ .loo_object_delete = NULL,
+ .loo_object_release = NULL,
+ .loo_object_free = osc_object_free,
+ .loo_object_print = osc_object_print,
+ .loo_object_invariant = NULL
+};
+
+struct lu_object *osc_object_alloc(const struct lu_env *env,
+ const struct lu_object_header *unused,
+ struct lu_device *dev)
+{
+ struct osc_object *osc;
+ struct lu_object *obj;
+
+ OBD_SLAB_ALLOC_PTR_GFP(osc, osc_object_kmem, __GFP_IO);
+ if (osc != NULL) {
+ obj = osc2lu(osc);
+ lu_object_init(obj, NULL, dev);
+ osc->oo_cl.co_ops = &osc_ops;
+ obj->lo_ops = &osc_lu_obj_ops;
+ } else
+ obj = NULL;
+ return obj;
+}
+
+/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c
new file mode 100644
index 000000000000..baba959a7450
--- /dev/null
+++ b/drivers/staging/lustre/lustre/osc/osc_page.c
@@ -0,0 +1,927 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Implementation of cl_page for OSC layer.
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_OSC
+
+#include "osc_cl_internal.h"
+
+static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del);
+static void osc_lru_add(struct client_obd *cli, struct osc_page *opg);
+static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
+ struct osc_page *opg);
+
+/** \addtogroup osc
+ * @{
+ */
+
+/*
+ * Comment out osc_page_protected because it may sleep inside the
+ * the client_obd_list_lock.
+ * client_obd_list_lock -> osc_ap_completion -> osc_completion ->
+ * -> osc_page_protected -> osc_page_is_dlocked -> osc_match_base
+ * -> ldlm_lock_match -> sptlrpc_import_check_ctx -> sleep.
+ */
+#if 0
+static int osc_page_is_dlocked(const struct lu_env *env,
+ const struct osc_page *opg,
+ enum cl_lock_mode mode, int pending, int unref)
+{
+ struct cl_page *page;
+ struct osc_object *obj;
+ struct osc_thread_info *info;
+ struct ldlm_res_id *resname;
+ struct lustre_handle *lockh;
+ ldlm_policy_data_t *policy;
+ ldlm_mode_t dlmmode;
+ int flags;
+
+ might_sleep();
+
+ info = osc_env_info(env);
+ resname = &info->oti_resname;
+ policy = &info->oti_policy;
+ lockh = &info->oti_handle;
+ page = opg->ops_cl.cpl_page;
+ obj = cl2osc(opg->ops_cl.cpl_obj);
+
+ flags = LDLM_FL_TEST_LOCK | LDLM_FL_BLOCK_GRANTED;
+ if (pending)
+ flags |= LDLM_FL_CBPENDING;
+
+ dlmmode = osc_cl_lock2ldlm(mode) | LCK_PW;
+ osc_lock_build_res(env, obj, resname);
+ osc_index2policy(policy, page->cp_obj, page->cp_index, page->cp_index);
+ return osc_match_base(osc_export(obj), resname, LDLM_EXTENT, policy,
+ dlmmode, &flags, NULL, lockh, unref);
+}
+
+/**
+ * Checks an invariant that a page in the cache is covered by a lock, as
+ * needed.
+ */
+static int osc_page_protected(const struct lu_env *env,
+ const struct osc_page *opg,
+ enum cl_lock_mode mode, int unref)
+{
+ struct cl_object_header *hdr;
+ struct cl_lock *scan;
+ struct cl_page *page;
+ struct cl_lock_descr *descr;
+ int result;
+
+ LINVRNT(!opg->ops_temp);
+
+ page = opg->ops_cl.cpl_page;
+ if (page->cp_owner != NULL &&
+ cl_io_top(page->cp_owner)->ci_lockreq == CILR_NEVER)
+ /*
+ * If IO is done without locks (liblustre, or lloop), lock is
+ * not required.
+ */
+ result = 1;
+ else
+ /* otherwise check for a DLM lock */
+ result = osc_page_is_dlocked(env, opg, mode, 1, unref);
+ if (result == 0) {
+ /* maybe this page is a part of a lockless io? */
+ hdr = cl_object_header(opg->ops_cl.cpl_obj);
+ descr = &osc_env_info(env)->oti_descr;
+ descr->cld_mode = mode;
+ descr->cld_start = page->cp_index;
+ descr->cld_end = page->cp_index;
+ spin_lock(&hdr->coh_lock_guard);
+ list_for_each_entry(scan, &hdr->coh_locks, cll_linkage) {
+ /*
+ * Lock-less sub-lock has to be either in HELD state
+ * (when io is actively going on), or in CACHED state,
+ * when top-lock is being unlocked:
+ * cl_io_unlock()->cl_unuse()->...->lov_lock_unuse().
+ */
+ if ((scan->cll_state == CLS_HELD ||
+ scan->cll_state == CLS_CACHED) &&
+ cl_lock_ext_match(&scan->cll_descr, descr)) {
+ struct osc_lock *olck;
+
+ olck = osc_lock_at(scan);
+ result = osc_lock_is_lockless(olck);
+ break;
+ }
+ }
+ spin_unlock(&hdr->coh_lock_guard);
+ }
+ return result;
+}
+#else
+static int osc_page_protected(const struct lu_env *env,
+ const struct osc_page *opg,
+ enum cl_lock_mode mode, int unref)
+{
+ return 1;
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Page operations.
+ *
+ */
+static void osc_page_fini(const struct lu_env *env,
+ struct cl_page_slice *slice)
+{
+ struct osc_page *opg = cl2osc_page(slice);
+ CDEBUG(D_TRACE, "%p\n", opg);
+ LASSERT(opg->ops_lock == NULL);
+}
+
+static void osc_page_transfer_get(struct osc_page *opg, const char *label)
+{
+ struct cl_page *page = cl_page_top(opg->ops_cl.cpl_page);
+
+ LASSERT(!opg->ops_transfer_pinned);
+ cl_page_get(page);
+ lu_ref_add_atomic(&page->cp_reference, label, page);
+ opg->ops_transfer_pinned = 1;
+}
+
+static void osc_page_transfer_put(const struct lu_env *env,
+ struct osc_page *opg)
+{
+ struct cl_page *page = cl_page_top(opg->ops_cl.cpl_page);
+
+ if (opg->ops_transfer_pinned) {
+ lu_ref_del(&page->cp_reference, "transfer", page);
+ opg->ops_transfer_pinned = 0;
+ cl_page_put(env, page);
+ }
+}
+
+/**
+ * This is called once for every page when it is submitted for a transfer
+ * either opportunistic (osc_page_cache_add()), or immediate
+ * (osc_page_submit()).
+ */
+static void osc_page_transfer_add(const struct lu_env *env,
+ struct osc_page *opg, enum cl_req_type crt)
+{
+ struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
+
+ /* ops_lru and ops_inflight share the same field, so take it from LRU
+ * first and then use it as inflight. */
+ osc_lru_del(osc_cli(obj), opg, false);
+
+ spin_lock(&obj->oo_seatbelt);
+ list_add(&opg->ops_inflight, &obj->oo_inflight[crt]);
+ opg->ops_submitter = current;
+ spin_unlock(&obj->oo_seatbelt);
+}
+
+static int osc_page_cache_add(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io)
+{
+ struct osc_io *oio = osc_env_io(env);
+ struct osc_page *opg = cl2osc_page(slice);
+ int result;
+ ENTRY;
+
+ LINVRNT(osc_page_protected(env, opg, CLM_WRITE, 0));
+
+ osc_page_transfer_get(opg, "transfer\0cache");
+ result = osc_queue_async_io(env, io, opg);
+ if (result != 0)
+ osc_page_transfer_put(env, opg);
+ else
+ osc_page_transfer_add(env, opg, CRT_WRITE);
+
+ /* for sync write, kernel will wait for this page to be flushed before
+ * osc_io_end() is called, so release it earlier.
+ * for mkwrite(), it's known there is no further pages. */
+ if (cl_io_is_sync_write(io) || cl_io_is_mkwrite(io)) {
+ if (oio->oi_active != NULL) {
+ osc_extent_release(env, oio->oi_active);
+ oio->oi_active = NULL;
+ }
+ }
+
+ RETURN(result);
+}
+
+void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj,
+ pgoff_t start, pgoff_t end)
+{
+ memset(policy, 0, sizeof *policy);
+ policy->l_extent.start = cl_offset(obj, start);
+ policy->l_extent.end = cl_offset(obj, end + 1) - 1;
+}
+
+static int osc_page_addref_lock(const struct lu_env *env,
+ struct osc_page *opg,
+ struct cl_lock *lock)
+{
+ struct osc_lock *olock;
+ int rc;
+
+ LASSERT(opg->ops_lock == NULL);
+
+ olock = osc_lock_at(lock);
+ if (atomic_inc_return(&olock->ols_pageref) <= 0) {
+ atomic_dec(&olock->ols_pageref);
+ rc = -ENODATA;
+ } else {
+ cl_lock_get(lock);
+ opg->ops_lock = lock;
+ rc = 0;
+ }
+ return rc;
+}
+
+static void osc_page_putref_lock(const struct lu_env *env,
+ struct osc_page *opg)
+{
+ struct cl_lock *lock = opg->ops_lock;
+ struct osc_lock *olock;
+
+ LASSERT(lock != NULL);
+ olock = osc_lock_at(lock);
+
+ atomic_dec(&olock->ols_pageref);
+ opg->ops_lock = NULL;
+
+ cl_lock_put(env, lock);
+}
+
+static int osc_page_is_under_lock(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ struct cl_lock *lock;
+ int result = -ENODATA;
+
+ ENTRY;
+ lock = cl_lock_at_page(env, slice->cpl_obj, slice->cpl_page,
+ NULL, 1, 0);
+ if (lock != NULL) {
+ if (osc_page_addref_lock(env, cl2osc_page(slice), lock) == 0)
+ result = -EBUSY;
+ cl_lock_put(env, lock);
+ }
+ RETURN(result);
+}
+
+static void osc_page_disown(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io)
+{
+ struct osc_page *opg = cl2osc_page(slice);
+
+ if (unlikely(opg->ops_lock))
+ osc_page_putref_lock(env, opg);
+}
+
+static void osc_page_completion_read(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ int ioret)
+{
+ struct osc_page *opg = cl2osc_page(slice);
+ struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
+
+ if (likely(opg->ops_lock))
+ osc_page_putref_lock(env, opg);
+ osc_lru_add(osc_cli(obj), opg);
+}
+
+static void osc_page_completion_write(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ int ioret)
+{
+ struct osc_page *opg = cl2osc_page(slice);
+ struct osc_object *obj = cl2osc(slice->cpl_obj);
+
+ osc_lru_add(osc_cli(obj), opg);
+}
+
+static int osc_page_fail(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ /*
+ * Cached read?
+ */
+ LBUG();
+ return 0;
+}
+
+
+static const char *osc_list(struct list_head *head)
+{
+ return list_empty(head) ? "-" : "+";
+}
+
+static inline cfs_time_t osc_submit_duration(struct osc_page *opg)
+{
+ if (opg->ops_submit_time == 0)
+ return 0;
+
+ return (cfs_time_current() - opg->ops_submit_time);
+}
+
+static int osc_page_print(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ void *cookie, lu_printer_t printer)
+{
+ struct osc_page *opg = cl2osc_page(slice);
+ struct osc_async_page *oap = &opg->ops_oap;
+ struct osc_object *obj = cl2osc(slice->cpl_obj);
+ struct client_obd *cli = &osc_export(obj)->exp_obd->u.cli;
+
+ return (*printer)(env, cookie, LUSTRE_OSC_NAME"-page@%p: "
+ "1< %#x %d %u %s %s > "
+ "2< "LPU64" %u %u %#x %#x | %p %p %p > "
+ "3< %s %p %d %lu %d > "
+ "4< %d %d %d %lu %s | %s %s %s %s > "
+ "5< %s %s %s %s | %d %s | %d %s %s>\n",
+ opg,
+ /* 1 */
+ oap->oap_magic, oap->oap_cmd,
+ oap->oap_interrupted,
+ osc_list(&oap->oap_pending_item),
+ osc_list(&oap->oap_rpc_item),
+ /* 2 */
+ oap->oap_obj_off, oap->oap_page_off, oap->oap_count,
+ oap->oap_async_flags, oap->oap_brw_flags,
+ oap->oap_request, oap->oap_cli, obj,
+ /* 3 */
+ osc_list(&opg->ops_inflight),
+ opg->ops_submitter, opg->ops_transfer_pinned,
+ osc_submit_duration(opg), opg->ops_srvlock,
+ /* 4 */
+ cli->cl_r_in_flight, cli->cl_w_in_flight,
+ cli->cl_max_rpcs_in_flight,
+ cli->cl_avail_grant,
+ osc_list(&cli->cl_cache_waiters),
+ osc_list(&cli->cl_loi_ready_list),
+ osc_list(&cli->cl_loi_hp_ready_list),
+ osc_list(&cli->cl_loi_write_list),
+ osc_list(&cli->cl_loi_read_list),
+ /* 5 */
+ osc_list(&obj->oo_ready_item),
+ osc_list(&obj->oo_hp_ready_item),
+ osc_list(&obj->oo_write_item),
+ osc_list(&obj->oo_read_item),
+ atomic_read(&obj->oo_nr_reads),
+ osc_list(&obj->oo_reading_exts),
+ atomic_read(&obj->oo_nr_writes),
+ osc_list(&obj->oo_hp_exts),
+ osc_list(&obj->oo_urgent_exts));
+}
+
+static void osc_page_delete(const struct lu_env *env,
+ const struct cl_page_slice *slice)
+{
+ struct osc_page *opg = cl2osc_page(slice);
+ struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
+ int rc;
+
+ LINVRNT(opg->ops_temp || osc_page_protected(env, opg, CLM_READ, 1));
+
+ ENTRY;
+ CDEBUG(D_TRACE, "%p\n", opg);
+ osc_page_transfer_put(env, opg);
+ rc = osc_teardown_async_page(env, obj, opg);
+ if (rc) {
+ CL_PAGE_DEBUG(D_ERROR, env, cl_page_top(slice->cpl_page),
+ "Trying to teardown failed: %d\n", rc);
+ LASSERT(0);
+ }
+
+ spin_lock(&obj->oo_seatbelt);
+ if (opg->ops_submitter != NULL) {
+ LASSERT(!list_empty(&opg->ops_inflight));
+ list_del_init(&opg->ops_inflight);
+ opg->ops_submitter = NULL;
+ }
+ spin_unlock(&obj->oo_seatbelt);
+
+ osc_lru_del(osc_cli(obj), opg, true);
+ EXIT;
+}
+
+void osc_page_clip(const struct lu_env *env, const struct cl_page_slice *slice,
+ int from, int to)
+{
+ struct osc_page *opg = cl2osc_page(slice);
+ struct osc_async_page *oap = &opg->ops_oap;
+
+ LINVRNT(osc_page_protected(env, opg, CLM_READ, 0));
+
+ opg->ops_from = from;
+ opg->ops_to = to;
+ spin_lock(&oap->oap_lock);
+ oap->oap_async_flags |= ASYNC_COUNT_STABLE;
+ spin_unlock(&oap->oap_lock);
+}
+
+static int osc_page_cancel(const struct lu_env *env,
+ const struct cl_page_slice *slice)
+{
+ struct osc_page *opg = cl2osc_page(slice);
+ int rc = 0;
+
+ LINVRNT(osc_page_protected(env, opg, CLM_READ, 0));
+
+ /* Check if the transferring against this page
+ * is completed, or not even queued. */
+ if (opg->ops_transfer_pinned)
+ /* FIXME: may not be interrupted.. */
+ rc = osc_cancel_async_page(env, opg);
+ LASSERT(ergo(rc == 0, opg->ops_transfer_pinned == 0));
+ return rc;
+}
+
+static int osc_page_flush(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io)
+{
+ struct osc_page *opg = cl2osc_page(slice);
+ int rc = 0;
+ ENTRY;
+ rc = osc_flush_async_page(env, io, opg);
+ RETURN(rc);
+}
+
+static const struct cl_page_operations osc_page_ops = {
+ .cpo_fini = osc_page_fini,
+ .cpo_print = osc_page_print,
+ .cpo_delete = osc_page_delete,
+ .cpo_is_under_lock = osc_page_is_under_lock,
+ .cpo_disown = osc_page_disown,
+ .io = {
+ [CRT_READ] = {
+ .cpo_cache_add = osc_page_fail,
+ .cpo_completion = osc_page_completion_read
+ },
+ [CRT_WRITE] = {
+ .cpo_cache_add = osc_page_cache_add,
+ .cpo_completion = osc_page_completion_write
+ }
+ },
+ .cpo_clip = osc_page_clip,
+ .cpo_cancel = osc_page_cancel,
+ .cpo_flush = osc_page_flush
+};
+
+int osc_page_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_page *page, struct page *vmpage)
+{
+ struct osc_object *osc = cl2osc(obj);
+ struct osc_page *opg = cl_object_page_slice(obj, page);
+ int result;
+
+ opg->ops_from = 0;
+ opg->ops_to = PAGE_CACHE_SIZE;
+
+ result = osc_prep_async_page(osc, opg, vmpage,
+ cl_offset(obj, page->cp_index));
+ if (result == 0) {
+ struct osc_io *oio = osc_env_io(env);
+ opg->ops_srvlock = osc_io_srvlock(oio);
+ cl_page_slice_add(page, &opg->ops_cl, obj,
+ &osc_page_ops);
+ }
+ /*
+ * Cannot assert osc_page_protected() here as read-ahead
+ * creates temporary pages outside of a lock.
+ */
+ /* ops_inflight and ops_lru are the same field, but it doesn't
+ * hurt to initialize it twice :-) */
+ INIT_LIST_HEAD(&opg->ops_inflight);
+ INIT_LIST_HEAD(&opg->ops_lru);
+
+ /* reserve an LRU space for this page */
+ if (page->cp_type == CPT_CACHEABLE && result == 0)
+ result = osc_lru_reserve(env, osc, opg);
+
+ return result;
+}
+
+/**
+ * Helper function called by osc_io_submit() for every page in an immediate
+ * transfer (i.e., transferred synchronously).
+ */
+void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
+ enum cl_req_type crt, int brw_flags)
+{
+ struct osc_async_page *oap = &opg->ops_oap;
+ struct osc_object *obj = oap->oap_obj;
+
+ LINVRNT(osc_page_protected(env, opg,
+ crt == CRT_WRITE ? CLM_WRITE : CLM_READ, 1));
+
+ LASSERTF(oap->oap_magic == OAP_MAGIC, "Bad oap magic: oap %p, "
+ "magic 0x%x\n", oap, oap->oap_magic);
+ LASSERT(oap->oap_async_flags & ASYNC_READY);
+ LASSERT(oap->oap_async_flags & ASYNC_COUNT_STABLE);
+
+ oap->oap_cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
+ oap->oap_page_off = opg->ops_from;
+ oap->oap_count = opg->ops_to - opg->ops_from;
+ oap->oap_brw_flags = OBD_BRW_SYNC | brw_flags;
+
+ if (!client_is_remote(osc_export(obj)) &&
+ cfs_capable(CFS_CAP_SYS_RESOURCE)) {
+ oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
+ oap->oap_cmd |= OBD_BRW_NOQUOTA;
+ }
+
+ opg->ops_submit_time = cfs_time_current();
+ osc_page_transfer_get(opg, "transfer\0imm");
+ osc_page_transfer_add(env, opg, crt);
+}
+
+/* --------------- LRU page management ------------------ */
+
+/* OSC is a natural place to manage LRU pages as applications are specialized
+ * to write OSC by OSC. Ideally, if one OSC is used more frequently it should
+ * occupy more LRU slots. On the other hand, we should avoid using up all LRU
+ * slots (client_obd::cl_lru_left) otherwise process has to be put into sleep
+ * for free LRU slots - this will be very bad so the algorithm requires each
+ * OSC to free slots voluntarily to maintain a reasonable number of free slots
+ * at any time.
+ */
+
+static CFS_DECL_WAITQ(osc_lru_waitq);
+static atomic_t osc_lru_waiters = ATOMIC_INIT(0);
+/* LRU pages are freed in batch mode. OSC should at least free this
+ * number of pages to avoid running out of LRU budget, and.. */
+static const int lru_shrink_min = 2 << (20 - PAGE_CACHE_SHIFT); /* 2M */
+/* free this number at most otherwise it will take too long time to finsih. */
+static const int lru_shrink_max = 32 << (20 - PAGE_CACHE_SHIFT); /* 32M */
+
+/* Check if we can free LRU slots from this OSC. If there exists LRU waiters,
+ * we should free slots aggressively. In this way, slots are freed in a steady
+ * step to maintain fairness among OSCs.
+ *
+ * Return how many LRU pages should be freed. */
+static int osc_cache_too_much(struct client_obd *cli)
+{
+ struct cl_client_cache *cache = cli->cl_cache;
+ int pages = atomic_read(&cli->cl_lru_in_list) >> 1;
+
+ if (atomic_read(&osc_lru_waiters) > 0 &&
+ atomic_read(cli->cl_lru_left) < lru_shrink_max)
+ /* drop lru pages aggressively */
+ return min(pages, lru_shrink_max);
+
+ /* if it's going to run out LRU slots, we should free some, but not
+ * too much to maintain faireness among OSCs. */
+ if (atomic_read(cli->cl_lru_left) < cache->ccc_lru_max >> 4) {
+ unsigned long tmp;
+
+ tmp = cache->ccc_lru_max / atomic_read(&cache->ccc_users);
+ if (pages > tmp)
+ return min(pages, lru_shrink_max);
+
+ return pages > lru_shrink_min ? lru_shrink_min : 0;
+ }
+
+ return 0;
+}
+
+/* Return how many pages are not discarded in @pvec. */
+static int discard_pagevec(const struct lu_env *env, struct cl_io *io,
+ struct cl_page **pvec, int max_index)
+{
+ int count;
+ int i;
+
+ for (count = 0, i = 0; i < max_index; i++) {
+ struct cl_page *page = pvec[i];
+ if (cl_page_own_try(env, io, page) == 0) {
+ /* free LRU page only if nobody is using it.
+ * This check is necessary to avoid freeing the pages
+ * having already been removed from LRU and pinned
+ * for IO. */
+ if (!cl_page_in_use(page)) {
+ cl_page_unmap(env, io, page);
+ cl_page_discard(env, io, page);
+ ++count;
+ }
+ cl_page_disown(env, io, page);
+ }
+ cl_page_put(env, page);
+ pvec[i] = NULL;
+ }
+ return max_index - count;
+}
+
+/**
+ * Drop @target of pages from LRU at most.
+ */
+int osc_lru_shrink(struct client_obd *cli, int target)
+{
+ struct cl_env_nest nest;
+ struct lu_env *env;
+ struct cl_io *io;
+ struct cl_object *clobj = NULL;
+ struct cl_page **pvec;
+ struct osc_page *opg;
+ int maxscan = 0;
+ int count = 0;
+ int index = 0;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(atomic_read(&cli->cl_lru_in_list) >= 0);
+ if (atomic_read(&cli->cl_lru_in_list) == 0 || target <= 0)
+ RETURN(0);
+
+ env = cl_env_nested_get(&nest);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ pvec = osc_env_info(env)->oti_pvec;
+ io = &osc_env_info(env)->oti_io;
+
+ client_obd_list_lock(&cli->cl_lru_list_lock);
+ atomic_inc(&cli->cl_lru_shrinkers);
+ maxscan = min(target << 1, atomic_read(&cli->cl_lru_in_list));
+ while (!list_empty(&cli->cl_lru_list)) {
+ struct cl_page *page;
+
+ if (--maxscan < 0)
+ break;
+
+ opg = list_entry(cli->cl_lru_list.next, struct osc_page,
+ ops_lru);
+ page = cl_page_top(opg->ops_cl.cpl_page);
+ if (cl_page_in_use_noref(page)) {
+ list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
+ continue;
+ }
+
+ LASSERT(page->cp_obj != NULL);
+ if (clobj != page->cp_obj) {
+ struct cl_object *tmp = page->cp_obj;
+
+ cl_object_get(tmp);
+ client_obd_list_unlock(&cli->cl_lru_list_lock);
+
+ if (clobj != NULL) {
+ count -= discard_pagevec(env, io, pvec, index);
+ index = 0;
+
+ cl_io_fini(env, io);
+ cl_object_put(env, clobj);
+ clobj = NULL;
+ }
+
+ clobj = tmp;
+ io->ci_obj = clobj;
+ io->ci_ignore_layout = 1;
+ rc = cl_io_init(env, io, CIT_MISC, clobj);
+
+ client_obd_list_lock(&cli->cl_lru_list_lock);
+
+ if (rc != 0)
+ break;
+
+ ++maxscan;
+ continue;
+ }
+
+ /* move this page to the end of list as it will be discarded
+ * soon. The page will be finally removed from LRU list in
+ * osc_page_delete(). */
+ list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
+
+ /* it's okay to grab a refcount here w/o holding lock because
+ * it has to grab cl_lru_list_lock to delete the page. */
+ cl_page_get(page);
+ pvec[index++] = page;
+ if (++count >= target)
+ break;
+
+ if (unlikely(index == OTI_PVEC_SIZE)) {
+ client_obd_list_unlock(&cli->cl_lru_list_lock);
+ count -= discard_pagevec(env, io, pvec, index);
+ index = 0;
+
+ client_obd_list_lock(&cli->cl_lru_list_lock);
+ }
+ }
+ client_obd_list_unlock(&cli->cl_lru_list_lock);
+
+ if (clobj != NULL) {
+ count -= discard_pagevec(env, io, pvec, index);
+
+ cl_io_fini(env, io);
+ cl_object_put(env, clobj);
+ }
+ cl_env_nested_put(&nest, env);
+
+ atomic_dec(&cli->cl_lru_shrinkers);
+ RETURN(count > 0 ? count : rc);
+}
+
+static void osc_lru_add(struct client_obd *cli, struct osc_page *opg)
+{
+ bool wakeup = false;
+
+ if (!opg->ops_in_lru)
+ return;
+
+ atomic_dec(&cli->cl_lru_busy);
+ client_obd_list_lock(&cli->cl_lru_list_lock);
+ if (list_empty(&opg->ops_lru)) {
+ list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
+ atomic_inc_return(&cli->cl_lru_in_list);
+ wakeup = atomic_read(&osc_lru_waiters) > 0;
+ }
+ client_obd_list_unlock(&cli->cl_lru_list_lock);
+
+ if (wakeup) {
+ osc_lru_shrink(cli, osc_cache_too_much(cli));
+ wake_up_all(&osc_lru_waitq);
+ }
+}
+
+/* delete page from LRUlist. The page can be deleted from LRUlist for two
+ * reasons: redirtied or deleted from page cache. */
+static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del)
+{
+ if (opg->ops_in_lru) {
+ client_obd_list_lock(&cli->cl_lru_list_lock);
+ if (!list_empty(&opg->ops_lru)) {
+ LASSERT(atomic_read(&cli->cl_lru_in_list) > 0);
+ list_del_init(&opg->ops_lru);
+ atomic_dec(&cli->cl_lru_in_list);
+ if (!del)
+ atomic_inc(&cli->cl_lru_busy);
+ } else if (del) {
+ LASSERT(atomic_read(&cli->cl_lru_busy) > 0);
+ atomic_dec(&cli->cl_lru_busy);
+ }
+ client_obd_list_unlock(&cli->cl_lru_list_lock);
+ if (del) {
+ atomic_inc(cli->cl_lru_left);
+ /* this is a great place to release more LRU pages if
+ * this osc occupies too many LRU pages and kernel is
+ * stealing one of them.
+ * cl_lru_shrinkers is to avoid recursive call in case
+ * we're already in the context of osc_lru_shrink(). */
+ if (atomic_read(&cli->cl_lru_shrinkers) == 0 &&
+ !memory_pressure_get())
+ osc_lru_shrink(cli, osc_cache_too_much(cli));
+ wake_up(&osc_lru_waitq);
+ }
+ } else {
+ LASSERT(list_empty(&opg->ops_lru));
+ }
+}
+
+static inline int max_to_shrink(struct client_obd *cli)
+{
+ return min(atomic_read(&cli->cl_lru_in_list) >> 1, lru_shrink_max);
+}
+
+static int osc_lru_reclaim(struct client_obd *cli)
+{
+ struct cl_client_cache *cache = cli->cl_cache;
+ int max_scans;
+ int rc;
+
+ LASSERT(cache != NULL);
+ LASSERT(!list_empty(&cache->ccc_lru));
+
+ rc = osc_lru_shrink(cli, lru_shrink_min);
+ if (rc != 0) {
+ CDEBUG(D_CACHE, "%s: Free %d pages from own LRU: %p.\n",
+ cli->cl_import->imp_obd->obd_name, rc, cli);
+ return rc;
+ }
+
+ CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %d, busy: %d.\n",
+ cli->cl_import->imp_obd->obd_name, cli,
+ atomic_read(&cli->cl_lru_in_list),
+ atomic_read(&cli->cl_lru_busy));
+
+ /* Reclaim LRU slots from other client_obd as it can't free enough
+ * from its own. This should rarely happen. */
+ spin_lock(&cache->ccc_lru_lock);
+ cache->ccc_lru_shrinkers++;
+ list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
+
+ max_scans = atomic_read(&cache->ccc_users);
+ while (--max_scans > 0 && !list_empty(&cache->ccc_lru)) {
+ cli = list_entry(cache->ccc_lru.next, struct client_obd,
+ cl_lru_osc);
+
+ CDEBUG(D_CACHE, "%s: cli %p LRU pages: %d, busy: %d.\n",
+ cli->cl_import->imp_obd->obd_name, cli,
+ atomic_read(&cli->cl_lru_in_list),
+ atomic_read(&cli->cl_lru_busy));
+
+ list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
+ if (atomic_read(&cli->cl_lru_in_list) > 0) {
+ spin_unlock(&cache->ccc_lru_lock);
+
+ rc = osc_lru_shrink(cli, max_to_shrink(cli));
+ spin_lock(&cache->ccc_lru_lock);
+ if (rc != 0)
+ break;
+ }
+ }
+ spin_unlock(&cache->ccc_lru_lock);
+
+ CDEBUG(D_CACHE, "%s: cli %p freed %d pages.\n",
+ cli->cl_import->imp_obd->obd_name, cli, rc);
+ return rc;
+}
+
+static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
+ struct osc_page *opg)
+{
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ struct client_obd *cli = osc_cli(obj);
+ int rc = 0;
+ ENTRY;
+
+ if (cli->cl_cache == NULL) /* shall not be in LRU */
+ RETURN(0);
+
+ LASSERT(atomic_read(cli->cl_lru_left) >= 0);
+ while (!cfs_atomic_add_unless(cli->cl_lru_left, -1, 0)) {
+ int gen;
+
+ /* run out of LRU spaces, try to drop some by itself */
+ rc = osc_lru_reclaim(cli);
+ if (rc < 0)
+ break;
+ if (rc > 0)
+ continue;
+
+ cond_resched();
+
+ /* slowest case, all of caching pages are busy, notifying
+ * other OSCs that we're lack of LRU slots. */
+ atomic_inc(&osc_lru_waiters);
+
+ gen = atomic_read(&cli->cl_lru_in_list);
+ rc = l_wait_event(osc_lru_waitq,
+ atomic_read(cli->cl_lru_left) > 0 ||
+ (atomic_read(&cli->cl_lru_in_list) > 0 &&
+ gen != atomic_read(&cli->cl_lru_in_list)),
+ &lwi);
+
+ atomic_dec(&osc_lru_waiters);
+ if (rc < 0)
+ break;
+ }
+
+ if (rc >= 0) {
+ atomic_inc(&cli->cl_lru_busy);
+ opg->ops_in_lru = 1;
+ rc = 0;
+ }
+
+ RETURN(rc);
+}
+
+/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_quota.c b/drivers/staging/lustre/lustre/osc/osc_quota.c
new file mode 100644
index 000000000000..69caab76ced3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/osc/osc_quota.c
@@ -0,0 +1,332 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ *
+ * Code originally extracted from quota directory
+ */
+
+#include <obd_ost.h>
+#include "osc_internal.h"
+
+static inline struct osc_quota_info *osc_oqi_alloc(obd_uid id)
+{
+ struct osc_quota_info *oqi;
+
+ OBD_SLAB_ALLOC_PTR(oqi, osc_quota_kmem);
+ if (oqi != NULL)
+ oqi->oqi_id = id;
+
+ return oqi;
+}
+
+int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[])
+{
+ int type;
+ ENTRY;
+
+ for (type = 0; type < MAXQUOTAS; type++) {
+ struct osc_quota_info *oqi;
+
+ oqi = cfs_hash_lookup(cli->cl_quota_hash[type], &qid[type]);
+ if (oqi) {
+ obd_uid id = oqi->oqi_id;
+
+ LASSERTF(id == qid[type],
+ "The ids don't match %u != %u\n",
+ id, qid[type]);
+
+ /* the slot is busy, the user is about to run out of
+ * quota space on this OST */
+ CDEBUG(D_QUOTA, "chkdq found noquota for %s %d\n",
+ type == USRQUOTA ? "user" : "grout", qid[type]);
+ RETURN(NO_QUOTA);
+ }
+ }
+
+ RETURN(QUOTA_OK);
+}
+
+#define MD_QUOTA_FLAG(type) ((type == USRQUOTA) ? OBD_MD_FLUSRQUOTA \
+ : OBD_MD_FLGRPQUOTA)
+#define FL_QUOTA_FLAG(type) ((type == USRQUOTA) ? OBD_FL_NO_USRQUOTA \
+ : OBD_FL_NO_GRPQUOTA)
+
+int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
+ obd_flag valid, obd_flag flags)
+{
+ int type;
+ int rc = 0;
+ ENTRY;
+
+ if ((valid & (OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA)) == 0)
+ RETURN(0);
+
+ for (type = 0; type < MAXQUOTAS; type++) {
+ struct osc_quota_info *oqi;
+
+ if ((valid & MD_QUOTA_FLAG(type)) == 0)
+ continue;
+
+ /* lookup the ID in the per-type hash table */
+ oqi = cfs_hash_lookup(cli->cl_quota_hash[type], &qid[type]);
+ if ((flags & FL_QUOTA_FLAG(type)) != 0) {
+ /* This ID is getting close to its quota limit, let's
+ * switch to sync I/O */
+ if (oqi != NULL)
+ continue;
+
+ oqi = osc_oqi_alloc(qid[type]);
+ if (oqi == NULL) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ rc = cfs_hash_add_unique(cli->cl_quota_hash[type],
+ &qid[type], &oqi->oqi_hash);
+ /* race with others? */
+ if (rc == -EALREADY) {
+ rc = 0;
+ OBD_SLAB_FREE_PTR(oqi, osc_quota_kmem);
+ }
+
+ CDEBUG(D_QUOTA, "%s: setdq to insert for %s %d (%d)\n",
+ cli->cl_import->imp_obd->obd_name,
+ type == USRQUOTA ? "user" : "group",
+ qid[type], rc);
+ } else {
+ /* This ID is now off the hook, let's remove it from
+ * the hash table */
+ if (oqi == NULL)
+ continue;
+
+ oqi = cfs_hash_del_key(cli->cl_quota_hash[type],
+ &qid[type]);
+ if (oqi)
+ OBD_SLAB_FREE_PTR(oqi, osc_quota_kmem);
+
+ CDEBUG(D_QUOTA, "%s: setdq to remove for %s %d (%p)\n",
+ cli->cl_import->imp_obd->obd_name,
+ type == USRQUOTA ? "user" : "group",
+ qid[type], oqi);
+ }
+ }
+
+ RETURN(rc);
+}
+
+/*
+ * Hash operations for uid/gid <-> osc_quota_info
+ */
+static unsigned
+oqi_hashfn(cfs_hash_t *hs, const void *key, unsigned mask)
+{
+ return cfs_hash_u32_hash(*((__u32*)key), mask);
+}
+
+static int
+oqi_keycmp(const void *key, struct hlist_node *hnode)
+{
+ struct osc_quota_info *oqi;
+ obd_uid uid;
+
+ LASSERT(key != NULL);
+ uid = *((obd_uid*)key);
+ oqi = hlist_entry(hnode, struct osc_quota_info, oqi_hash);
+
+ return uid == oqi->oqi_id;
+}
+
+static void *
+oqi_key(struct hlist_node *hnode)
+{
+ struct osc_quota_info *oqi;
+ oqi = hlist_entry(hnode, struct osc_quota_info, oqi_hash);
+ return &oqi->oqi_id;
+}
+
+static void *
+oqi_object(struct hlist_node *hnode)
+{
+ return hlist_entry(hnode, struct osc_quota_info, oqi_hash);
+}
+
+static void
+oqi_get(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+}
+
+static void
+oqi_put_locked(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+}
+
+static void
+oqi_exit(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct osc_quota_info *oqi;
+
+ oqi = hlist_entry(hnode, struct osc_quota_info, oqi_hash);
+
+ OBD_SLAB_FREE_PTR(oqi, osc_quota_kmem);
+}
+
+#define HASH_QUOTA_BKT_BITS 5
+#define HASH_QUOTA_CUR_BITS 5
+#define HASH_QUOTA_MAX_BITS 15
+
+static cfs_hash_ops_t quota_hash_ops = {
+ .hs_hash = oqi_hashfn,
+ .hs_keycmp = oqi_keycmp,
+ .hs_key = oqi_key,
+ .hs_object = oqi_object,
+ .hs_get = oqi_get,
+ .hs_put_locked = oqi_put_locked,
+ .hs_exit = oqi_exit,
+};
+
+int osc_quota_setup(struct obd_device *obd)
+{
+ struct client_obd *cli = &obd->u.cli;
+ int i, type;
+ ENTRY;
+
+ for (type = 0; type < MAXQUOTAS; type++) {
+ cli->cl_quota_hash[type] = cfs_hash_create("QUOTA_HASH",
+ HASH_QUOTA_CUR_BITS,
+ HASH_QUOTA_MAX_BITS,
+ HASH_QUOTA_BKT_BITS,
+ 0,
+ CFS_HASH_MIN_THETA,
+ CFS_HASH_MAX_THETA,
+ &quota_hash_ops,
+ CFS_HASH_DEFAULT);
+ if (cli->cl_quota_hash[type] == NULL)
+ break;
+ }
+
+ if (type == MAXQUOTAS)
+ RETURN(0);
+
+ for (i = 0; i < type; i++)
+ cfs_hash_putref(cli->cl_quota_hash[i]);
+
+ RETURN(-ENOMEM);
+}
+
+int osc_quota_cleanup(struct obd_device *obd)
+{
+ struct client_obd *cli = &obd->u.cli;
+ int type;
+ ENTRY;
+
+ for (type = 0; type < MAXQUOTAS; type++)
+ cfs_hash_putref(cli->cl_quota_hash[type]);
+
+ RETURN(0);
+}
+
+int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
+ struct obd_quotactl *oqctl)
+{
+ struct ptlrpc_request *req;
+ struct obd_quotactl *oqc;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
+ &RQF_OST_QUOTACTL, LUSTRE_OST_VERSION,
+ OST_QUOTACTL);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ oqc = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
+ *oqc = *oqctl;
+
+ ptlrpc_request_set_replen(req);
+ ptlrpc_at_set_req_timeout(req);
+ req->rq_no_resend = 1;
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ CERROR("ptlrpc_queue_wait failed, rc: %d\n", rc);
+
+ if (req->rq_repmsg &&
+ (oqc = req_capsule_server_get(&req->rq_pill, &RMF_OBD_QUOTACTL))) {
+ *oqctl = *oqc;
+ } else if (!rc) {
+ CERROR ("Can't unpack obd_quotactl\n");
+ rc = -EPROTO;
+ }
+ ptlrpc_req_finished(req);
+
+ RETURN(rc);
+}
+
+int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
+ struct obd_quotactl *oqctl)
+{
+ struct client_obd *cli = &exp->exp_obd->u.cli;
+ struct ptlrpc_request *req;
+ struct obd_quotactl *body;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
+ &RQF_OST_QUOTACHECK, LUSTRE_OST_VERSION,
+ OST_QUOTACHECK);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
+ *body = *oqctl;
+
+ ptlrpc_request_set_replen(req);
+
+ /* the next poll will find -ENODATA, that means quotacheck is
+ * going on */
+ cli->cl_qchk_stat = -ENODATA;
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ cli->cl_qchk_stat = rc;
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+}
+
+int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk)
+{
+ struct client_obd *cli = &exp->exp_obd->u.cli;
+ int rc;
+ ENTRY;
+
+ qchk->obd_uuid = cli->cl_target_uuid;
+ memcpy(qchk->obd_type, LUSTRE_OST_NAME, strlen(LUSTRE_OST_NAME));
+
+ rc = cli->cl_qchk_stat;
+ /* the client is not the previous one */
+ if (rc == CL_NOT_QUOTACHECKED)
+ rc = -EINTR;
+ RETURN(rc);
+}
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
new file mode 100644
index 000000000000..53d6a35c80b9
--- /dev/null
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -0,0 +1,3708 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_OSC
+
+#include <linux/libcfs/libcfs.h>
+
+
+#include <lustre_dlm.h>
+#include <lustre_net.h>
+#include <lustre/lustre_user.h>
+#include <obd_cksum.h>
+#include <obd_ost.h>
+#include <obd_lov.h>
+
+#ifdef __CYGWIN__
+# include <ctype.h>
+#endif
+
+#include <lustre_ha.h>
+#include <lprocfs_status.h>
+#include <lustre_log.h>
+#include <lustre_debug.h>
+#include <lustre_param.h>
+#include <lustre_fid.h>
+#include "osc_internal.h"
+#include "osc_cl_internal.h"
+
+static void osc_release_ppga(struct brw_page **ppga, obd_count count);
+static int brw_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req, void *data, int rc);
+int osc_cleanup(struct obd_device *obd);
+
+/* Pack OSC object metadata for disk storage (LE byte order). */
+static int osc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
+ struct lov_stripe_md *lsm)
+{
+ int lmm_size;
+ ENTRY;
+
+ lmm_size = sizeof(**lmmp);
+ if (lmmp == NULL)
+ RETURN(lmm_size);
+
+ if (*lmmp != NULL && lsm == NULL) {
+ OBD_FREE(*lmmp, lmm_size);
+ *lmmp = NULL;
+ RETURN(0);
+ } else if (unlikely(lsm != NULL && ostid_id(&lsm->lsm_oi) == 0)) {
+ RETURN(-EBADF);
+ }
+
+ if (*lmmp == NULL) {
+ OBD_ALLOC(*lmmp, lmm_size);
+ if (*lmmp == NULL)
+ RETURN(-ENOMEM);
+ }
+
+ if (lsm)
+ ostid_cpu_to_le(&lsm->lsm_oi, &(*lmmp)->lmm_oi);
+
+ RETURN(lmm_size);
+}
+
+/* Unpack OSC object metadata from disk storage (LE byte order). */
+static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
+ struct lov_mds_md *lmm, int lmm_bytes)
+{
+ int lsm_size;
+ struct obd_import *imp = class_exp2cliimp(exp);
+ ENTRY;
+
+ if (lmm != NULL) {
+ if (lmm_bytes < sizeof(*lmm)) {
+ CERROR("%s: lov_mds_md too small: %d, need %d\n",
+ exp->exp_obd->obd_name, lmm_bytes,
+ (int)sizeof(*lmm));
+ RETURN(-EINVAL);
+ }
+ /* XXX LOV_MAGIC etc check? */
+
+ if (unlikely(ostid_id(&lmm->lmm_oi) == 0)) {
+ CERROR("%s: zero lmm_object_id: rc = %d\n",
+ exp->exp_obd->obd_name, -EINVAL);
+ RETURN(-EINVAL);
+ }
+ }
+
+ lsm_size = lov_stripe_md_size(1);
+ if (lsmp == NULL)
+ RETURN(lsm_size);
+
+ if (*lsmp != NULL && lmm == NULL) {
+ OBD_FREE((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo));
+ OBD_FREE(*lsmp, lsm_size);
+ *lsmp = NULL;
+ RETURN(0);
+ }
+
+ if (*lsmp == NULL) {
+ OBD_ALLOC(*lsmp, lsm_size);
+ if (unlikely(*lsmp == NULL))
+ RETURN(-ENOMEM);
+ OBD_ALLOC((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo));
+ if (unlikely((*lsmp)->lsm_oinfo[0] == NULL)) {
+ OBD_FREE(*lsmp, lsm_size);
+ RETURN(-ENOMEM);
+ }
+ loi_init((*lsmp)->lsm_oinfo[0]);
+ } else if (unlikely(ostid_id(&(*lsmp)->lsm_oi) == 0)) {
+ RETURN(-EBADF);
+ }
+
+ if (lmm != NULL)
+ /* XXX zero *lsmp? */
+ ostid_le_to_cpu(&lmm->lmm_oi, &(*lsmp)->lsm_oi);
+
+ if (imp != NULL &&
+ (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES))
+ (*lsmp)->lsm_maxbytes = imp->imp_connect_data.ocd_maxbytes;
+ else
+ (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+
+ RETURN(lsm_size);
+}
+
+static inline void osc_pack_capa(struct ptlrpc_request *req,
+ struct ost_body *body, void *capa)
+{
+ struct obd_capa *oc = (struct obd_capa *)capa;
+ struct lustre_capa *c;
+
+ if (!capa)
+ return;
+
+ c = req_capsule_client_get(&req->rq_pill, &RMF_CAPA1);
+ LASSERT(c);
+ capa_cpy(c, oc);
+ body->oa.o_valid |= OBD_MD_FLOSSCAPA;
+ DEBUG_CAPA(D_SEC, c, "pack");
+}
+
+static inline void osc_pack_req_body(struct ptlrpc_request *req,
+ struct obd_info *oinfo)
+{
+ struct ost_body *body;
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
+ LASSERT(body);
+
+ lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
+ oinfo->oi_oa);
+ osc_pack_capa(req, body, oinfo->oi_capa);
+}
+
+static inline void osc_set_capa_size(struct ptlrpc_request *req,
+ const struct req_msg_field *field,
+ struct obd_capa *oc)
+{
+ if (oc == NULL)
+ req_capsule_set_size(&req->rq_pill, field, RCL_CLIENT, 0);
+ else
+ /* it is already calculated as sizeof struct obd_capa */
+ ;
+}
+
+static int osc_getattr_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req,
+ struct osc_async_args *aa, int rc)
+{
+ struct ost_body *body;
+ ENTRY;
+
+ if (rc != 0)
+ GOTO(out, rc);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ if (body) {
+ CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
+ lustre_get_wire_obdo(&req->rq_import->imp_connect_data,
+ aa->aa_oi->oi_oa, &body->oa);
+
+ /* This should really be sent by the OST */
+ aa->aa_oi->oi_oa->o_blksize = DT_MAX_BRW_SIZE;
+ aa->aa_oi->oi_oa->o_valid |= OBD_MD_FLBLKSZ;
+ } else {
+ CDEBUG(D_INFO, "can't unpack ost_body\n");
+ rc = -EPROTO;
+ aa->aa_oi->oi_oa->o_valid = 0;
+ }
+out:
+ rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc);
+ RETURN(rc);
+}
+
+static int osc_getattr_async(struct obd_export *exp, struct obd_info *oinfo,
+ struct ptlrpc_request_set *set)
+{
+ struct ptlrpc_request *req;
+ struct osc_async_args *aa;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ osc_pack_req_body(req, oinfo);
+
+ ptlrpc_request_set_replen(req);
+ req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_getattr_interpret;
+
+ CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+ aa = ptlrpc_req_async_args(req);
+ aa->aa_oi = oinfo;
+
+ ptlrpc_set_add_req(set, req);
+ RETURN(0);
+}
+
+static int osc_getattr(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo)
+{
+ struct ptlrpc_request *req;
+ struct ost_body *body;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ osc_pack_req_body(req, oinfo);
+
+ ptlrpc_request_set_replen(req);
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ if (body == NULL)
+ GOTO(out, rc = -EPROTO);
+
+ CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
+ lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oinfo->oi_oa,
+ &body->oa);
+
+ oinfo->oi_oa->o_blksize = cli_brw_size(exp->exp_obd);
+ oinfo->oi_oa->o_valid |= OBD_MD_FLBLKSZ;
+
+ EXIT;
+ out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+static int osc_setattr(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo, struct obd_trans_info *oti)
+{
+ struct ptlrpc_request *req;
+ struct ost_body *body;
+ int rc;
+ ENTRY;
+
+ LASSERT(oinfo->oi_oa->o_valid & OBD_MD_FLGROUP);
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ osc_pack_req_body(req, oinfo);
+
+ ptlrpc_request_set_replen(req);
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ if (body == NULL)
+ GOTO(out, rc = -EPROTO);
+
+ lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oinfo->oi_oa,
+ &body->oa);
+
+ EXIT;
+out:
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+}
+
+static int osc_setattr_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req,
+ struct osc_setattr_args *sa, int rc)
+{
+ struct ost_body *body;
+ ENTRY;
+
+ if (rc != 0)
+ GOTO(out, rc);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ if (body == NULL)
+ GOTO(out, rc = -EPROTO);
+
+ lustre_get_wire_obdo(&req->rq_import->imp_connect_data, sa->sa_oa,
+ &body->oa);
+out:
+ rc = sa->sa_upcall(sa->sa_cookie, rc);
+ RETURN(rc);
+}
+
+int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
+ struct obd_trans_info *oti,
+ obd_enqueue_update_f upcall, void *cookie,
+ struct ptlrpc_request_set *rqset)
+{
+ struct ptlrpc_request *req;
+ struct osc_setattr_args *sa;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ if (oti && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
+ oinfo->oi_oa->o_lcookie = *oti->oti_logcookies;
+
+ osc_pack_req_body(req, oinfo);
+
+ ptlrpc_request_set_replen(req);
+
+ /* do mds to ost setattr asynchronously */
+ if (!rqset) {
+ /* Do not wait for response. */
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ } else {
+ req->rq_interpret_reply =
+ (ptlrpc_interpterer_t)osc_setattr_interpret;
+
+ CLASSERT (sizeof(*sa) <= sizeof(req->rq_async_args));
+ sa = ptlrpc_req_async_args(req);
+ sa->sa_oa = oinfo->oi_oa;
+ sa->sa_upcall = upcall;
+ sa->sa_cookie = cookie;
+
+ if (rqset == PTLRPCD_SET)
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ else
+ ptlrpc_set_add_req(rqset, req);
+ }
+
+ RETURN(0);
+}
+
+static int osc_setattr_async(struct obd_export *exp, struct obd_info *oinfo,
+ struct obd_trans_info *oti,
+ struct ptlrpc_request_set *rqset)
+{
+ return osc_setattr_async_base(exp, oinfo, oti,
+ oinfo->oi_cb_up, oinfo, rqset);
+}
+
+int osc_real_create(struct obd_export *exp, struct obdo *oa,
+ struct lov_stripe_md **ea, struct obd_trans_info *oti)
+{
+ struct ptlrpc_request *req;
+ struct ost_body *body;
+ struct lov_stripe_md *lsm;
+ int rc;
+ ENTRY;
+
+ LASSERT(oa);
+ LASSERT(ea);
+
+ lsm = *ea;
+ if (!lsm) {
+ rc = obd_alloc_memmd(exp, &lsm);
+ if (rc < 0)
+ RETURN(rc);
+ }
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_CREATE);
+ if (req == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_CREATE);
+ if (rc) {
+ ptlrpc_request_free(req);
+ GOTO(out, rc);
+ }
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
+ LASSERT(body);
+
+ lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
+
+ ptlrpc_request_set_replen(req);
+
+ if ((oa->o_valid & OBD_MD_FLFLAGS) &&
+ oa->o_flags == OBD_FL_DELORPHAN) {
+ DEBUG_REQ(D_HA, req,
+ "delorphan from OST integration");
+ /* Don't resend the delorphan req */
+ req->rq_no_resend = req->rq_no_delay = 1;
+ }
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out_req, rc);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ if (body == NULL)
+ GOTO(out_req, rc = -EPROTO);
+
+ CDEBUG(D_INFO, "oa flags %x\n", oa->o_flags);
+ lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa, &body->oa);
+
+ oa->o_blksize = cli_brw_size(exp->exp_obd);
+ oa->o_valid |= OBD_MD_FLBLKSZ;
+
+ /* XXX LOV STACKING: the lsm that is passed to us from LOV does not
+ * have valid lsm_oinfo data structs, so don't go touching that.
+ * This needs to be fixed in a big way.
+ */
+ lsm->lsm_oi = oa->o_oi;
+ *ea = lsm;
+
+ if (oti != NULL) {
+ oti->oti_transno = lustre_msg_get_transno(req->rq_repmsg);
+
+ if (oa->o_valid & OBD_MD_FLCOOKIE) {
+ if (!oti->oti_logcookies)
+ oti_alloc_cookies(oti, 1);
+ *oti->oti_logcookies = oa->o_lcookie;
+ }
+ }
+
+ CDEBUG(D_HA, "transno: "LPD64"\n",
+ lustre_msg_get_transno(req->rq_repmsg));
+out_req:
+ ptlrpc_req_finished(req);
+out:
+ if (rc && !*ea)
+ obd_free_memmd(exp, &lsm);
+ RETURN(rc);
+}
+
+int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo,
+ obd_enqueue_update_f upcall, void *cookie,
+ struct ptlrpc_request_set *rqset)
+{
+ struct ptlrpc_request *req;
+ struct osc_setattr_args *sa;
+ struct ost_body *body;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_PUNCH);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_PUNCH);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+ req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */
+ ptlrpc_at_set_req_timeout(req);
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
+ LASSERT(body);
+ lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
+ oinfo->oi_oa);
+ osc_pack_capa(req, body, oinfo->oi_capa);
+
+ ptlrpc_request_set_replen(req);
+
+ req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret;
+ CLASSERT (sizeof(*sa) <= sizeof(req->rq_async_args));
+ sa = ptlrpc_req_async_args(req);
+ sa->sa_oa = oinfo->oi_oa;
+ sa->sa_upcall = upcall;
+ sa->sa_cookie = cookie;
+ if (rqset == PTLRPCD_SET)
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ else
+ ptlrpc_set_add_req(rqset, req);
+
+ RETURN(0);
+}
+
+static int osc_punch(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo, struct obd_trans_info *oti,
+ struct ptlrpc_request_set *rqset)
+{
+ oinfo->oi_oa->o_size = oinfo->oi_policy.l_extent.start;
+ oinfo->oi_oa->o_blocks = oinfo->oi_policy.l_extent.end;
+ oinfo->oi_oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+ return osc_punch_base(exp, oinfo,
+ oinfo->oi_cb_up, oinfo, rqset);
+}
+
+static int osc_sync_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req,
+ void *arg, int rc)
+{
+ struct osc_fsync_args *fa = arg;
+ struct ost_body *body;
+ ENTRY;
+
+ if (rc)
+ GOTO(out, rc);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ if (body == NULL) {
+ CERROR ("can't unpack ost_body\n");
+ GOTO(out, rc = -EPROTO);
+ }
+
+ *fa->fa_oi->oi_oa = body->oa;
+out:
+ rc = fa->fa_upcall(fa->fa_cookie, rc);
+ RETURN(rc);
+}
+
+int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
+ obd_enqueue_update_f upcall, void *cookie,
+ struct ptlrpc_request_set *rqset)
+{
+ struct ptlrpc_request *req;
+ struct ost_body *body;
+ struct osc_fsync_args *fa;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SYNC);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SYNC);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ /* overload the size and blocks fields in the oa with start/end */
+ body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
+ LASSERT(body);
+ lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
+ oinfo->oi_oa);
+ osc_pack_capa(req, body, oinfo->oi_capa);
+
+ ptlrpc_request_set_replen(req);
+ req->rq_interpret_reply = osc_sync_interpret;
+
+ CLASSERT(sizeof(*fa) <= sizeof(req->rq_async_args));
+ fa = ptlrpc_req_async_args(req);
+ fa->fa_oi = oinfo;
+ fa->fa_upcall = upcall;
+ fa->fa_cookie = cookie;
+
+ if (rqset == PTLRPCD_SET)
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ else
+ ptlrpc_set_add_req(rqset, req);
+
+ RETURN (0);
+}
+
+static int osc_sync(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo, obd_size start, obd_size end,
+ struct ptlrpc_request_set *set)
+{
+ ENTRY;
+
+ if (!oinfo->oi_oa) {
+ CDEBUG(D_INFO, "oa NULL\n");
+ RETURN(-EINVAL);
+ }
+
+ oinfo->oi_oa->o_size = start;
+ oinfo->oi_oa->o_blocks = end;
+ oinfo->oi_oa->o_valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
+
+ RETURN(osc_sync_base(exp, oinfo, oinfo->oi_cb_up, oinfo, set));
+}
+
+/* Find and cancel locally locks matched by @mode in the resource found by
+ * @objid. Found locks are added into @cancel list. Returns the amount of
+ * locks added to @cancels list. */
+static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa,
+ struct list_head *cancels,
+ ldlm_mode_t mode, int lock_flags)
+{
+ struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
+ struct ldlm_res_id res_id;
+ struct ldlm_resource *res;
+ int count;
+ ENTRY;
+
+ /* Return, i.e. cancel nothing, only if ELC is supported (flag in
+ * export) but disabled through procfs (flag in NS).
+ *
+ * This distinguishes from a case when ELC is not supported originally,
+ * when we still want to cancel locks in advance and just cancel them
+ * locally, without sending any RPC. */
+ if (exp_connect_cancelset(exp) && !ns_connect_cancelset(ns))
+ RETURN(0);
+
+ ostid_build_res_name(&oa->o_oi, &res_id);
+ res = ldlm_resource_get(ns, NULL, &res_id, 0, 0);
+ if (res == NULL)
+ RETURN(0);
+
+ LDLM_RESOURCE_ADDREF(res);
+ count = ldlm_cancel_resource_local(res, cancels, NULL, mode,
+ lock_flags, 0, NULL);
+ LDLM_RESOURCE_DELREF(res);
+ ldlm_resource_putref(res);
+ RETURN(count);
+}
+
+static int osc_destroy_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req, void *data,
+ int rc)
+{
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+
+ atomic_dec(&cli->cl_destroy_in_flight);
+ wake_up(&cli->cl_destroy_waitq);
+ return 0;
+}
+
+static int osc_can_send_destroy(struct client_obd *cli)
+{
+ if (atomic_inc_return(&cli->cl_destroy_in_flight) <=
+ cli->cl_max_rpcs_in_flight) {
+ /* The destroy request can be sent */
+ return 1;
+ }
+ if (atomic_dec_return(&cli->cl_destroy_in_flight) <
+ cli->cl_max_rpcs_in_flight) {
+ /*
+ * The counter has been modified between the two atomic
+ * operations.
+ */
+ wake_up(&cli->cl_destroy_waitq);
+ }
+ return 0;
+}
+
+int osc_create(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *oa, struct lov_stripe_md **ea,
+ struct obd_trans_info *oti)
+{
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(oa);
+ LASSERT(ea);
+ LASSERT(oa->o_valid & OBD_MD_FLGROUP);
+
+ if ((oa->o_valid & OBD_MD_FLFLAGS) &&
+ oa->o_flags == OBD_FL_RECREATE_OBJS) {
+ RETURN(osc_real_create(exp, oa, ea, oti));
+ }
+
+ if (!fid_seq_is_mdt(ostid_seq(&oa->o_oi)))
+ RETURN(osc_real_create(exp, oa, ea, oti));
+
+ /* we should not get here anymore */
+ LBUG();
+
+ RETURN(rc);
+}
+
+/* Destroy requests can be async always on the client, and we don't even really
+ * care about the return code since the client cannot do anything at all about
+ * a destroy failure.
+ * When the MDS is unlinking a filename, it saves the file objects into a
+ * recovery llog, and these object records are cancelled when the OST reports
+ * they were destroyed and sync'd to disk (i.e. transaction committed).
+ * If the client dies, or the OST is down when the object should be destroyed,
+ * the records are not cancelled, and when the OST reconnects to the MDS next,
+ * it will retrieve the llog unlink logs and then sends the log cancellation
+ * cookies to the MDS after committing destroy transactions. */
+static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *oa, struct lov_stripe_md *ea,
+ struct obd_trans_info *oti, struct obd_export *md_export,
+ void *capa)
+{
+ struct client_obd *cli = &exp->exp_obd->u.cli;
+ struct ptlrpc_request *req;
+ struct ost_body *body;
+ LIST_HEAD(cancels);
+ int rc, count;
+ ENTRY;
+
+ if (!oa) {
+ CDEBUG(D_INFO, "oa NULL\n");
+ RETURN(-EINVAL);
+ }
+
+ count = osc_resource_get_unused(exp, oa, &cancels, LCK_PW,
+ LDLM_FL_DISCARD_DATA);
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_DESTROY);
+ if (req == NULL) {
+ ldlm_lock_list_put(&cancels, l_bl_ast, count);
+ RETURN(-ENOMEM);
+ }
+
+ osc_set_capa_size(req, &RMF_CAPA1, (struct obd_capa *)capa);
+ rc = ldlm_prep_elc_req(exp, req, LUSTRE_OST_VERSION, OST_DESTROY,
+ 0, &cancels, count);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */
+ ptlrpc_at_set_req_timeout(req);
+
+ if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE)
+ oa->o_lcookie = *oti->oti_logcookies;
+ body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
+ LASSERT(body);
+ lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
+
+ osc_pack_capa(req, body, (struct obd_capa *)capa);
+ ptlrpc_request_set_replen(req);
+
+ /* If osc_destory is for destroying the unlink orphan,
+ * sent from MDT to OST, which should not be blocked here,
+ * because the process might be triggered by ptlrpcd, and
+ * it is not good to block ptlrpcd thread (b=16006)*/
+ if (!(oa->o_flags & OBD_FL_DELORPHAN)) {
+ req->rq_interpret_reply = osc_destroy_interpret;
+ if (!osc_can_send_destroy(cli)) {
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP,
+ NULL);
+
+ /*
+ * Wait until the number of on-going destroy RPCs drops
+ * under max_rpc_in_flight
+ */
+ l_wait_event_exclusive(cli->cl_destroy_waitq,
+ osc_can_send_destroy(cli), &lwi);
+ }
+ }
+
+ /* Do not wait for response */
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ RETURN(0);
+}
+
+static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
+ long writing_bytes)
+{
+ obd_flag bits = OBD_MD_FLBLOCKS|OBD_MD_FLGRANT;
+
+ LASSERT(!(oa->o_valid & bits));
+
+ oa->o_valid |= bits;
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ oa->o_dirty = cli->cl_dirty;
+ if (unlikely(cli->cl_dirty - cli->cl_dirty_transit >
+ cli->cl_dirty_max)) {
+ CERROR("dirty %lu - %lu > dirty_max %lu\n",
+ cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max);
+ oa->o_undirty = 0;
+ } else if (unlikely(atomic_read(&obd_dirty_pages) -
+ atomic_read(&obd_dirty_transit_pages) >
+ (long)(obd_max_dirty_pages + 1))) {
+ /* The atomic_read() allowing the atomic_inc() are
+ * not covered by a lock thus they may safely race and trip
+ * this CERROR() unless we add in a small fudge factor (+1). */
+ CERROR("dirty %d - %d > system dirty_max %d\n",
+ atomic_read(&obd_dirty_pages),
+ atomic_read(&obd_dirty_transit_pages),
+ obd_max_dirty_pages);
+ oa->o_undirty = 0;
+ } else if (unlikely(cli->cl_dirty_max - cli->cl_dirty > 0x7fffffff)) {
+ CERROR("dirty %lu - dirty_max %lu too big???\n",
+ cli->cl_dirty, cli->cl_dirty_max);
+ oa->o_undirty = 0;
+ } else {
+ long max_in_flight = (cli->cl_max_pages_per_rpc <<
+ PAGE_CACHE_SHIFT)*
+ (cli->cl_max_rpcs_in_flight + 1);
+ oa->o_undirty = max(cli->cl_dirty_max, max_in_flight);
+ }
+ oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant;
+ oa->o_dropped = cli->cl_lost_grant;
+ cli->cl_lost_grant = 0;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ CDEBUG(D_CACHE,"dirty: "LPU64" undirty: %u dropped %u grant: "LPU64"\n",
+ oa->o_dirty, oa->o_undirty, oa->o_dropped, oa->o_grant);
+
+}
+
+void osc_update_next_shrink(struct client_obd *cli)
+{
+ cli->cl_next_shrink_grant =
+ cfs_time_shift(cli->cl_grant_shrink_interval);
+ CDEBUG(D_CACHE, "next time %ld to shrink grant \n",
+ cli->cl_next_shrink_grant);
+}
+
+static void __osc_update_grant(struct client_obd *cli, obd_size grant)
+{
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ cli->cl_avail_grant += grant;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+}
+
+static void osc_update_grant(struct client_obd *cli, struct ost_body *body)
+{
+ if (body->oa.o_valid & OBD_MD_FLGRANT) {
+ CDEBUG(D_CACHE, "got "LPU64" extra grant\n", body->oa.o_grant);
+ __osc_update_grant(cli, body->oa.o_grant);
+ }
+}
+
+static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
+ obd_count keylen, void *key, obd_count vallen,
+ void *val, struct ptlrpc_request_set *set);
+
+static int osc_shrink_grant_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req,
+ void *aa, int rc)
+{
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+ struct obdo *oa = ((struct osc_grant_args *)aa)->aa_oa;
+ struct ost_body *body;
+
+ if (rc != 0) {
+ __osc_update_grant(cli, oa->o_grant);
+ GOTO(out, rc);
+ }
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ LASSERT(body);
+ osc_update_grant(cli, body);
+out:
+ OBDO_FREE(oa);
+ return rc;
+}
+
+static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa)
+{
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ oa->o_grant = cli->cl_avail_grant / 4;
+ cli->cl_avail_grant -= oa->o_grant;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ if (!(oa->o_valid & OBD_MD_FLFLAGS)) {
+ oa->o_valid |= OBD_MD_FLFLAGS;
+ oa->o_flags = 0;
+ }
+ oa->o_flags |= OBD_FL_SHRINK_GRANT;
+ osc_update_next_shrink(cli);
+}
+
+/* Shrink the current grant, either from some large amount to enough for a
+ * full set of in-flight RPCs, or if we have already shrunk to that limit
+ * then to enough for a single RPC. This avoids keeping more grant than
+ * needed, and avoids shrinking the grant piecemeal. */
+static int osc_shrink_grant(struct client_obd *cli)
+{
+ __u64 target_bytes = (cli->cl_max_rpcs_in_flight + 1) *
+ (cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT);
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ if (cli->cl_avail_grant <= target_bytes)
+ target_bytes = cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ return osc_shrink_grant_to_target(cli, target_bytes);
+}
+
+int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
+{
+ int rc = 0;
+ struct ost_body *body;
+ ENTRY;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ /* Don't shrink if we are already above or below the desired limit
+ * We don't want to shrink below a single RPC, as that will negatively
+ * impact block allocation and long-term performance. */
+ if (target_bytes < cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT)
+ target_bytes = cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+
+ if (target_bytes >= cli->cl_avail_grant) {
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ RETURN(0);
+ }
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ OBD_ALLOC_PTR(body);
+ if (!body)
+ RETURN(-ENOMEM);
+
+ osc_announce_cached(cli, &body->oa, 0);
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ body->oa.o_grant = cli->cl_avail_grant - target_bytes;
+ cli->cl_avail_grant = target_bytes;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ if (!(body->oa.o_valid & OBD_MD_FLFLAGS)) {
+ body->oa.o_valid |= OBD_MD_FLFLAGS;
+ body->oa.o_flags = 0;
+ }
+ body->oa.o_flags |= OBD_FL_SHRINK_GRANT;
+ osc_update_next_shrink(cli);
+
+ rc = osc_set_info_async(NULL, cli->cl_import->imp_obd->obd_self_export,
+ sizeof(KEY_GRANT_SHRINK), KEY_GRANT_SHRINK,
+ sizeof(*body), body, NULL);
+ if (rc != 0)
+ __osc_update_grant(cli, body->oa.o_grant);
+ OBD_FREE_PTR(body);
+ RETURN(rc);
+}
+
+static int osc_should_shrink_grant(struct client_obd *client)
+{
+ cfs_time_t time = cfs_time_current();
+ cfs_time_t next_shrink = client->cl_next_shrink_grant;
+
+ if ((client->cl_import->imp_connect_data.ocd_connect_flags &
+ OBD_CONNECT_GRANT_SHRINK) == 0)
+ return 0;
+
+ if (cfs_time_aftereq(time, next_shrink - 5 * CFS_TICK)) {
+ /* Get the current RPC size directly, instead of going via:
+ * cli_brw_size(obd->u.cli.cl_import->imp_obd->obd_self_export)
+ * Keep comment here so that it can be found by searching. */
+ int brw_size = client->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+
+ if (client->cl_import->imp_state == LUSTRE_IMP_FULL &&
+ client->cl_avail_grant > brw_size)
+ return 1;
+ else
+ osc_update_next_shrink(client);
+ }
+ return 0;
+}
+
+static int osc_grant_shrink_grant_cb(struct timeout_item *item, void *data)
+{
+ struct client_obd *client;
+
+ list_for_each_entry(client, &item->ti_obd_list,
+ cl_grant_shrink_list) {
+ if (osc_should_shrink_grant(client))
+ osc_shrink_grant(client);
+ }
+ return 0;
+}
+
+static int osc_add_shrink_grant(struct client_obd *client)
+{
+ int rc;
+
+ rc = ptlrpc_add_timeout_client(client->cl_grant_shrink_interval,
+ TIMEOUT_GRANT,
+ osc_grant_shrink_grant_cb, NULL,
+ &client->cl_grant_shrink_list);
+ if (rc) {
+ CERROR("add grant client %s error %d\n",
+ client->cl_import->imp_obd->obd_name, rc);
+ return rc;
+ }
+ CDEBUG(D_CACHE, "add grant client %s \n",
+ client->cl_import->imp_obd->obd_name);
+ osc_update_next_shrink(client);
+ return 0;
+}
+
+static int osc_del_shrink_grant(struct client_obd *client)
+{
+ return ptlrpc_del_timeout_client(&client->cl_grant_shrink_list,
+ TIMEOUT_GRANT);
+}
+
+static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
+{
+ /*
+ * ocd_grant is the total grant amount we're expect to hold: if we've
+ * been evicted, it's the new avail_grant amount, cl_dirty will drop
+ * to 0 as inflight RPCs fail out; otherwise, it's avail_grant + dirty.
+ *
+ * race is tolerable here: if we're evicted, but imp_state already
+ * left EVICTED state, then cl_dirty must be 0 already.
+ */
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED)
+ cli->cl_avail_grant = ocd->ocd_grant;
+ else
+ cli->cl_avail_grant = ocd->ocd_grant - cli->cl_dirty;
+
+ if (cli->cl_avail_grant < 0) {
+ CWARN("%s: available grant < 0: avail/ocd/dirty %ld/%u/%ld\n",
+ cli->cl_import->imp_obd->obd_name, cli->cl_avail_grant,
+ ocd->ocd_grant, cli->cl_dirty);
+ /* workaround for servers which do not have the patch from
+ * LU-2679 */
+ cli->cl_avail_grant = ocd->ocd_grant;
+ }
+
+ /* determine the appropriate chunk size used by osc_extent. */
+ cli->cl_chunkbits = max_t(int, PAGE_CACHE_SHIFT, ocd->ocd_blocksize);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld."
+ "chunk bits: %d.\n", cli->cl_import->imp_obd->obd_name,
+ cli->cl_avail_grant, cli->cl_lost_grant, cli->cl_chunkbits);
+
+ if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT_SHRINK &&
+ list_empty(&cli->cl_grant_shrink_list))
+ osc_add_shrink_grant(cli);
+}
+
+/* We assume that the reason this OSC got a short read is because it read
+ * beyond the end of a stripe file; i.e. lustre is reading a sparse file
+ * via the LOV, and it _knows_ it's reading inside the file, it's just that
+ * this stripe never got written at or beyond this stripe offset yet. */
+static void handle_short_read(int nob_read, obd_count page_count,
+ struct brw_page **pga)
+{
+ char *ptr;
+ int i = 0;
+
+ /* skip bytes read OK */
+ while (nob_read > 0) {
+ LASSERT (page_count > 0);
+
+ if (pga[i]->count > nob_read) {
+ /* EOF inside this page */
+ ptr = kmap(pga[i]->pg) +
+ (pga[i]->off & ~CFS_PAGE_MASK);
+ memset(ptr + nob_read, 0, pga[i]->count - nob_read);
+ kunmap(pga[i]->pg);
+ page_count--;
+ i++;
+ break;
+ }
+
+ nob_read -= pga[i]->count;
+ page_count--;
+ i++;
+ }
+
+ /* zero remaining pages */
+ while (page_count-- > 0) {
+ ptr = kmap(pga[i]->pg) + (pga[i]->off & ~CFS_PAGE_MASK);
+ memset(ptr, 0, pga[i]->count);
+ kunmap(pga[i]->pg);
+ i++;
+ }
+}
+
+static int check_write_rcs(struct ptlrpc_request *req,
+ int requested_nob, int niocount,
+ obd_count page_count, struct brw_page **pga)
+{
+ int i;
+ __u32 *remote_rcs;
+
+ remote_rcs = req_capsule_server_sized_get(&req->rq_pill, &RMF_RCS,
+ sizeof(*remote_rcs) *
+ niocount);
+ if (remote_rcs == NULL) {
+ CDEBUG(D_INFO, "Missing/short RC vector on BRW_WRITE reply\n");
+ return(-EPROTO);
+ }
+
+ /* return error if any niobuf was in error */
+ for (i = 0; i < niocount; i++) {
+ if ((int)remote_rcs[i] < 0)
+ return(remote_rcs[i]);
+
+ if (remote_rcs[i] != 0) {
+ CDEBUG(D_INFO, "rc[%d] invalid (%d) req %p\n",
+ i, remote_rcs[i], req);
+ return(-EPROTO);
+ }
+ }
+
+ if (req->rq_bulk->bd_nob_transferred != requested_nob) {
+ CERROR("Unexpected # bytes transferred: %d (requested %d)\n",
+ req->rq_bulk->bd_nob_transferred, requested_nob);
+ return(-EPROTO);
+ }
+
+ return (0);
+}
+
+static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2)
+{
+ if (p1->flag != p2->flag) {
+ unsigned mask = ~(OBD_BRW_FROM_GRANT| OBD_BRW_NOCACHE|
+ OBD_BRW_SYNC|OBD_BRW_ASYNC|OBD_BRW_NOQUOTA);
+
+ /* warn if we try to combine flags that we don't know to be
+ * safe to combine */
+ if (unlikely((p1->flag & mask) != (p2->flag & mask))) {
+ CWARN("Saw flags 0x%x and 0x%x in the same brw, please "
+ "report this at http://bugs.whamcloud.com/\n",
+ p1->flag, p2->flag);
+ }
+ return 0;
+ }
+
+ return (p1->off + p1->count == p2->off);
+}
+
+static obd_count osc_checksum_bulk(int nob, obd_count pg_count,
+ struct brw_page **pga, int opc,
+ cksum_type_t cksum_type)
+{
+ __u32 cksum;
+ int i = 0;
+ struct cfs_crypto_hash_desc *hdesc;
+ unsigned int bufsize;
+ int err;
+ unsigned char cfs_alg = cksum_obd2cfs(cksum_type);
+
+ LASSERT(pg_count > 0);
+
+ hdesc = cfs_crypto_hash_init(cfs_alg, NULL, 0);
+ if (IS_ERR(hdesc)) {
+ CERROR("Unable to initialize checksum hash %s\n",
+ cfs_crypto_hash_name(cfs_alg));
+ return PTR_ERR(hdesc);
+ }
+
+ while (nob > 0 && pg_count > 0) {
+ int count = pga[i]->count > nob ? nob : pga[i]->count;
+
+ /* corrupt the data before we compute the checksum, to
+ * simulate an OST->client data error */
+ if (i == 0 && opc == OST_READ &&
+ OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) {
+ unsigned char *ptr = kmap(pga[i]->pg);
+ int off = pga[i]->off & ~CFS_PAGE_MASK;
+ memcpy(ptr + off, "bad1", min(4, nob));
+ kunmap(pga[i]->pg);
+ }
+ cfs_crypto_hash_update_page(hdesc, pga[i]->pg,
+ pga[i]->off & ~CFS_PAGE_MASK,
+ count);
+ LL_CDEBUG_PAGE(D_PAGE, pga[i]->pg, "off %d\n",
+ (int)(pga[i]->off & ~CFS_PAGE_MASK));
+
+ nob -= pga[i]->count;
+ pg_count--;
+ i++;
+ }
+
+ bufsize = 4;
+ err = cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize);
+
+ if (err)
+ cfs_crypto_hash_final(hdesc, NULL, NULL);
+
+ /* For sending we only compute the wrong checksum instead
+ * of corrupting the data so it is still correct on a redo */
+ if (opc == OST_WRITE && OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND))
+ cksum++;
+
+ return cksum;
+}
+
+static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa,
+ struct lov_stripe_md *lsm, obd_count page_count,
+ struct brw_page **pga,
+ struct ptlrpc_request **reqp,
+ struct obd_capa *ocapa, int reserve,
+ int resend)
+{
+ struct ptlrpc_request *req;
+ struct ptlrpc_bulk_desc *desc;
+ struct ost_body *body;
+ struct obd_ioobj *ioobj;
+ struct niobuf_remote *niobuf;
+ int niocount, i, requested_nob, opc, rc;
+ struct osc_brw_async_args *aa;
+ struct req_capsule *pill;
+ struct brw_page *pg_prev;
+
+ ENTRY;
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ))
+ RETURN(-ENOMEM); /* Recoverable */
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ2))
+ RETURN(-EINVAL); /* Fatal */
+
+ if ((cmd & OBD_BRW_WRITE) != 0) {
+ opc = OST_WRITE;
+ req = ptlrpc_request_alloc_pool(cli->cl_import,
+ cli->cl_import->imp_rq_pool,
+ &RQF_OST_BRW_WRITE);
+ } else {
+ opc = OST_READ;
+ req = ptlrpc_request_alloc(cli->cl_import, &RQF_OST_BRW_READ);
+ }
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ for (niocount = i = 1; i < page_count; i++) {
+ if (!can_merge_pages(pga[i - 1], pga[i]))
+ niocount++;
+ }
+
+ pill = &req->rq_pill;
+ req_capsule_set_size(pill, &RMF_OBD_IOOBJ, RCL_CLIENT,
+ sizeof(*ioobj));
+ req_capsule_set_size(pill, &RMF_NIOBUF_REMOTE, RCL_CLIENT,
+ niocount * sizeof(*niobuf));
+ osc_set_capa_size(req, &RMF_CAPA1, ocapa);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, opc);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+ req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */
+ ptlrpc_at_set_req_timeout(req);
+ /* ask ptlrpc not to resend on EINPROGRESS since BRWs have their own
+ * retry logic */
+ req->rq_no_retry_einprogress = 1;
+
+ desc = ptlrpc_prep_bulk_imp(req, page_count,
+ cli->cl_import->imp_connect_data.ocd_brw_size >> LNET_MTU_BITS,
+ opc == OST_WRITE ? BULK_GET_SOURCE : BULK_PUT_SINK,
+ OST_BULK_PORTAL);
+
+ if (desc == NULL)
+ GOTO(out, rc = -ENOMEM);
+ /* NB request now owns desc and will free it when it gets freed */
+
+ body = req_capsule_client_get(pill, &RMF_OST_BODY);
+ ioobj = req_capsule_client_get(pill, &RMF_OBD_IOOBJ);
+ niobuf = req_capsule_client_get(pill, &RMF_NIOBUF_REMOTE);
+ LASSERT(body != NULL && ioobj != NULL && niobuf != NULL);
+
+ lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
+
+ obdo_to_ioobj(oa, ioobj);
+ ioobj->ioo_bufcnt = niocount;
+ /* The high bits of ioo_max_brw tells server _maximum_ number of bulks
+ * that might be send for this request. The actual number is decided
+ * when the RPC is finally sent in ptlrpc_register_bulk(). It sends
+ * "max - 1" for old client compatibility sending "0", and also so the
+ * the actual maximum is a power-of-two number, not one less. LU-1431 */
+ ioobj_max_brw_set(ioobj, desc->bd_md_max_brw);
+ osc_pack_capa(req, body, ocapa);
+ LASSERT(page_count > 0);
+ pg_prev = pga[0];
+ for (requested_nob = i = 0; i < page_count; i++, niobuf++) {
+ struct brw_page *pg = pga[i];
+ int poff = pg->off & ~CFS_PAGE_MASK;
+
+ LASSERT(pg->count > 0);
+ /* make sure there is no gap in the middle of page array */
+ LASSERTF(page_count == 1 ||
+ (ergo(i == 0, poff + pg->count == PAGE_CACHE_SIZE) &&
+ ergo(i > 0 && i < page_count - 1,
+ poff == 0 && pg->count == PAGE_CACHE_SIZE) &&
+ ergo(i == page_count - 1, poff == 0)),
+ "i: %d/%d pg: %p off: "LPU64", count: %u\n",
+ i, page_count, pg, pg->off, pg->count);
+ LASSERTF(i == 0 || pg->off > pg_prev->off,
+ "i %d p_c %u pg %p [pri %lu ind %lu] off "LPU64
+ " prev_pg %p [pri %lu ind %lu] off "LPU64"\n",
+ i, page_count,
+ pg->pg, page_private(pg->pg), pg->pg->index, pg->off,
+ pg_prev->pg, page_private(pg_prev->pg),
+ pg_prev->pg->index, pg_prev->off);
+ LASSERT((pga[0]->flag & OBD_BRW_SRVLOCK) ==
+ (pg->flag & OBD_BRW_SRVLOCK));
+
+ ptlrpc_prep_bulk_page_pin(desc, pg->pg, poff, pg->count);
+ requested_nob += pg->count;
+
+ if (i > 0 && can_merge_pages(pg_prev, pg)) {
+ niobuf--;
+ niobuf->len += pg->count;
+ } else {
+ niobuf->offset = pg->off;
+ niobuf->len = pg->count;
+ niobuf->flags = pg->flag;
+ }
+ pg_prev = pg;
+ }
+
+ LASSERTF((void *)(niobuf - niocount) ==
+ req_capsule_client_get(&req->rq_pill, &RMF_NIOBUF_REMOTE),
+ "want %p - real %p\n", req_capsule_client_get(&req->rq_pill,
+ &RMF_NIOBUF_REMOTE), (void *)(niobuf - niocount));
+
+ osc_announce_cached(cli, &body->oa, opc == OST_WRITE ? requested_nob:0);
+ if (resend) {
+ if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0) {
+ body->oa.o_valid |= OBD_MD_FLFLAGS;
+ body->oa.o_flags = 0;
+ }
+ body->oa.o_flags |= OBD_FL_RECOV_RESEND;
+ }
+
+ if (osc_should_shrink_grant(cli))
+ osc_shrink_grant_local(cli, &body->oa);
+
+ /* size[REQ_REC_OFF] still sizeof (*body) */
+ if (opc == OST_WRITE) {
+ if (cli->cl_checksum &&
+ !sptlrpc_flavor_has_bulk(&req->rq_flvr)) {
+ /* store cl_cksum_type in a local variable since
+ * it can be changed via lprocfs */
+ cksum_type_t cksum_type = cli->cl_cksum_type;
+
+ if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0) {
+ oa->o_flags &= OBD_FL_LOCAL_MASK;
+ body->oa.o_flags = 0;
+ }
+ body->oa.o_flags |= cksum_type_pack(cksum_type);
+ body->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
+ body->oa.o_cksum = osc_checksum_bulk(requested_nob,
+ page_count, pga,
+ OST_WRITE,
+ cksum_type);
+ CDEBUG(D_PAGE, "checksum at write origin: %x\n",
+ body->oa.o_cksum);
+ /* save this in 'oa', too, for later checking */
+ oa->o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
+ oa->o_flags |= cksum_type_pack(cksum_type);
+ } else {
+ /* clear out the checksum flag, in case this is a
+ * resend but cl_checksum is no longer set. b=11238 */
+ oa->o_valid &= ~OBD_MD_FLCKSUM;
+ }
+ oa->o_cksum = body->oa.o_cksum;
+ /* 1 RC per niobuf */
+ req_capsule_set_size(pill, &RMF_RCS, RCL_SERVER,
+ sizeof(__u32) * niocount);
+ } else {
+ if (cli->cl_checksum &&
+ !sptlrpc_flavor_has_bulk(&req->rq_flvr)) {
+ if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0)
+ body->oa.o_flags = 0;
+ body->oa.o_flags |= cksum_type_pack(cli->cl_cksum_type);
+ body->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
+ }
+ }
+ ptlrpc_request_set_replen(req);
+
+ CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+ aa = ptlrpc_req_async_args(req);
+ aa->aa_oa = oa;
+ aa->aa_requested_nob = requested_nob;
+ aa->aa_nio_count = niocount;
+ aa->aa_page_count = page_count;
+ aa->aa_resends = 0;
+ aa->aa_ppga = pga;
+ aa->aa_cli = cli;
+ INIT_LIST_HEAD(&aa->aa_oaps);
+ if (ocapa && reserve)
+ aa->aa_ocapa = capa_get(ocapa);
+
+ *reqp = req;
+ RETURN(0);
+
+ out:
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+}
+
+static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer,
+ __u32 client_cksum, __u32 server_cksum, int nob,
+ obd_count page_count, struct brw_page **pga,
+ cksum_type_t client_cksum_type)
+{
+ __u32 new_cksum;
+ char *msg;
+ cksum_type_t cksum_type;
+
+ if (server_cksum == client_cksum) {
+ CDEBUG(D_PAGE, "checksum %x confirmed\n", client_cksum);
+ return 0;
+ }
+
+ cksum_type = cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ?
+ oa->o_flags : 0);
+ new_cksum = osc_checksum_bulk(nob, page_count, pga, OST_WRITE,
+ cksum_type);
+
+ if (cksum_type != client_cksum_type)
+ msg = "the server did not use the checksum type specified in "
+ "the original request - likely a protocol problem";
+ else if (new_cksum == server_cksum)
+ msg = "changed on the client after we checksummed it - "
+ "likely false positive due to mmap IO (bug 11742)";
+ else if (new_cksum == client_cksum)
+ msg = "changed in transit before arrival at OST";
+ else
+ msg = "changed in transit AND doesn't match the original - "
+ "likely false positive due to mmap IO (bug 11742)";
+
+ LCONSOLE_ERROR_MSG(0x132, "BAD WRITE CHECKSUM: %s: from %s inode "DFID
+ " object "DOSTID" extent ["LPU64"-"LPU64"]\n",
+ msg, libcfs_nid2str(peer->nid),
+ oa->o_valid & OBD_MD_FLFID ? oa->o_parent_seq : (__u64)0,
+ oa->o_valid & OBD_MD_FLFID ? oa->o_parent_oid : 0,
+ oa->o_valid & OBD_MD_FLFID ? oa->o_parent_ver : 0,
+ POSTID(&oa->o_oi), pga[0]->off,
+ pga[page_count-1]->off + pga[page_count-1]->count - 1);
+ CERROR("original client csum %x (type %x), server csum %x (type %x), "
+ "client csum now %x\n", client_cksum, client_cksum_type,
+ server_cksum, cksum_type, new_cksum);
+ return 1;
+}
+
+/* Note rc enters this function as number of bytes transferred */
+static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
+{
+ struct osc_brw_async_args *aa = (void *)&req->rq_async_args;
+ const lnet_process_id_t *peer =
+ &req->rq_import->imp_connection->c_peer;
+ struct client_obd *cli = aa->aa_cli;
+ struct ost_body *body;
+ __u32 client_cksum = 0;
+ ENTRY;
+
+ if (rc < 0 && rc != -EDQUOT) {
+ DEBUG_REQ(D_INFO, req, "Failed request with rc = %d\n", rc);
+ RETURN(rc);
+ }
+
+ LASSERTF(req->rq_repmsg != NULL, "rc = %d\n", rc);
+ body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ if (body == NULL) {
+ DEBUG_REQ(D_INFO, req, "Can't unpack body\n");
+ RETURN(-EPROTO);
+ }
+
+ /* set/clear over quota flag for a uid/gid */
+ if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE &&
+ body->oa.o_valid & (OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA)) {
+ unsigned int qid[MAXQUOTAS] = { body->oa.o_uid, body->oa.o_gid };
+
+ CDEBUG(D_QUOTA, "setdq for [%u %u] with valid "LPX64", flags %x\n",
+ body->oa.o_uid, body->oa.o_gid, body->oa.o_valid,
+ body->oa.o_flags);
+ osc_quota_setdq(cli, qid, body->oa.o_valid, body->oa.o_flags);
+ }
+
+ osc_update_grant(cli, body);
+
+ if (rc < 0)
+ RETURN(rc);
+
+ if (aa->aa_oa->o_valid & OBD_MD_FLCKSUM)
+ client_cksum = aa->aa_oa->o_cksum; /* save for later */
+
+ if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) {
+ if (rc > 0) {
+ CERROR("Unexpected +ve rc %d\n", rc);
+ RETURN(-EPROTO);
+ }
+ LASSERT(req->rq_bulk->bd_nob == aa->aa_requested_nob);
+
+ if (sptlrpc_cli_unwrap_bulk_write(req, req->rq_bulk))
+ RETURN(-EAGAIN);
+
+ if ((aa->aa_oa->o_valid & OBD_MD_FLCKSUM) && client_cksum &&
+ check_write_checksum(&body->oa, peer, client_cksum,
+ body->oa.o_cksum, aa->aa_requested_nob,
+ aa->aa_page_count, aa->aa_ppga,
+ cksum_type_unpack(aa->aa_oa->o_flags)))
+ RETURN(-EAGAIN);
+
+ rc = check_write_rcs(req, aa->aa_requested_nob,aa->aa_nio_count,
+ aa->aa_page_count, aa->aa_ppga);
+ GOTO(out, rc);
+ }
+
+ /* The rest of this function executes only for OST_READs */
+
+ /* if unwrap_bulk failed, return -EAGAIN to retry */
+ rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, rc);
+ if (rc < 0)
+ GOTO(out, rc = -EAGAIN);
+
+ if (rc > aa->aa_requested_nob) {
+ CERROR("Unexpected rc %d (%d requested)\n", rc,
+ aa->aa_requested_nob);
+ RETURN(-EPROTO);
+ }
+
+ if (rc != req->rq_bulk->bd_nob_transferred) {
+ CERROR ("Unexpected rc %d (%d transferred)\n",
+ rc, req->rq_bulk->bd_nob_transferred);
+ return (-EPROTO);
+ }
+
+ if (rc < aa->aa_requested_nob)
+ handle_short_read(rc, aa->aa_page_count, aa->aa_ppga);
+
+ if (body->oa.o_valid & OBD_MD_FLCKSUM) {
+ static int cksum_counter;
+ __u32 server_cksum = body->oa.o_cksum;
+ char *via;
+ char *router;
+ cksum_type_t cksum_type;
+
+ cksum_type = cksum_type_unpack(body->oa.o_valid &OBD_MD_FLFLAGS?
+ body->oa.o_flags : 0);
+ client_cksum = osc_checksum_bulk(rc, aa->aa_page_count,
+ aa->aa_ppga, OST_READ,
+ cksum_type);
+
+ if (peer->nid == req->rq_bulk->bd_sender) {
+ via = router = "";
+ } else {
+ via = " via ";
+ router = libcfs_nid2str(req->rq_bulk->bd_sender);
+ }
+
+ if (server_cksum == ~0 && rc > 0) {
+ CERROR("Protocol error: server %s set the 'checksum' "
+ "bit, but didn't send a checksum. Not fatal, "
+ "but please notify on http://bugs.whamcloud.com/\n",
+ libcfs_nid2str(peer->nid));
+ } else if (server_cksum != client_cksum) {
+ LCONSOLE_ERROR_MSG(0x133, "%s: BAD READ CHECKSUM: from "
+ "%s%s%s inode "DFID" object "DOSTID
+ " extent ["LPU64"-"LPU64"]\n",
+ req->rq_import->imp_obd->obd_name,
+ libcfs_nid2str(peer->nid),
+ via, router,
+ body->oa.o_valid & OBD_MD_FLFID ?
+ body->oa.o_parent_seq : (__u64)0,
+ body->oa.o_valid & OBD_MD_FLFID ?
+ body->oa.o_parent_oid : 0,
+ body->oa.o_valid & OBD_MD_FLFID ?
+ body->oa.o_parent_ver : 0,
+ POSTID(&body->oa.o_oi),
+ aa->aa_ppga[0]->off,
+ aa->aa_ppga[aa->aa_page_count-1]->off +
+ aa->aa_ppga[aa->aa_page_count-1]->count -
+ 1);
+ CERROR("client %x, server %x, cksum_type %x\n",
+ client_cksum, server_cksum, cksum_type);
+ cksum_counter = 0;
+ aa->aa_oa->o_cksum = client_cksum;
+ rc = -EAGAIN;
+ } else {
+ cksum_counter++;
+ CDEBUG(D_PAGE, "checksum %x confirmed\n", client_cksum);
+ rc = 0;
+ }
+ } else if (unlikely(client_cksum)) {
+ static int cksum_missed;
+
+ cksum_missed++;
+ if ((cksum_missed & (-cksum_missed)) == cksum_missed)
+ CERROR("Checksum %u requested from %s but not sent\n",
+ cksum_missed, libcfs_nid2str(peer->nid));
+ } else {
+ rc = 0;
+ }
+out:
+ if (rc >= 0)
+ lustre_get_wire_obdo(&req->rq_import->imp_connect_data,
+ aa->aa_oa, &body->oa);
+
+ RETURN(rc);
+}
+
+static int osc_brw_internal(int cmd, struct obd_export *exp, struct obdo *oa,
+ struct lov_stripe_md *lsm,
+ obd_count page_count, struct brw_page **pga,
+ struct obd_capa *ocapa)
+{
+ struct ptlrpc_request *req;
+ int rc;
+ wait_queue_head_t waitq;
+ int generation, resends = 0;
+ struct l_wait_info lwi;
+
+ ENTRY;
+
+ init_waitqueue_head(&waitq);
+ generation = exp->exp_obd->u.cli.cl_import->imp_generation;
+
+restart_bulk:
+ rc = osc_brw_prep_request(cmd, &exp->exp_obd->u.cli, oa, lsm,
+ page_count, pga, &req, ocapa, 0, resends);
+ if (rc != 0)
+ return (rc);
+
+ if (resends) {
+ req->rq_generation_set = 1;
+ req->rq_import_generation = generation;
+ req->rq_sent = cfs_time_current_sec() + resends;
+ }
+
+ rc = ptlrpc_queue_wait(req);
+
+ if (rc == -ETIMEDOUT && req->rq_resend) {
+ DEBUG_REQ(D_HA, req, "BULK TIMEOUT");
+ ptlrpc_req_finished(req);
+ goto restart_bulk;
+ }
+
+ rc = osc_brw_fini_request(req, rc);
+
+ ptlrpc_req_finished(req);
+ /* When server return -EINPROGRESS, client should always retry
+ * regardless of the number of times the bulk was resent already.*/
+ if (osc_recoverable_error(rc)) {
+ resends++;
+ if (rc != -EINPROGRESS &&
+ !client_should_resend(resends, &exp->exp_obd->u.cli)) {
+ CERROR("%s: too many resend retries for object: "
+ ""DOSTID", rc = %d.\n", exp->exp_obd->obd_name,
+ POSTID(&oa->o_oi), rc);
+ goto out;
+ }
+ if (generation !=
+ exp->exp_obd->u.cli.cl_import->imp_generation) {
+ CDEBUG(D_HA, "%s: resend cross eviction for object: "
+ ""DOSTID", rc = %d.\n", exp->exp_obd->obd_name,
+ POSTID(&oa->o_oi), rc);
+ goto out;
+ }
+
+ lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL,
+ NULL);
+ l_wait_event(waitq, 0, &lwi);
+
+ goto restart_bulk;
+ }
+out:
+ if (rc == -EAGAIN || rc == -EINPROGRESS)
+ rc = -EIO;
+ RETURN (rc);
+}
+
+static int osc_brw_redo_request(struct ptlrpc_request *request,
+ struct osc_brw_async_args *aa, int rc)
+{
+ struct ptlrpc_request *new_req;
+ struct osc_brw_async_args *new_aa;
+ struct osc_async_page *oap;
+ ENTRY;
+
+ DEBUG_REQ(rc == -EINPROGRESS ? D_RPCTRACE : D_ERROR, request,
+ "redo for recoverable error %d", rc);
+
+ rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) ==
+ OST_WRITE ? OBD_BRW_WRITE :OBD_BRW_READ,
+ aa->aa_cli, aa->aa_oa,
+ NULL /* lsm unused by osc currently */,
+ aa->aa_page_count, aa->aa_ppga,
+ &new_req, aa->aa_ocapa, 0, 1);
+ if (rc)
+ RETURN(rc);
+
+ list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) {
+ if (oap->oap_request != NULL) {
+ LASSERTF(request == oap->oap_request,
+ "request %p != oap_request %p\n",
+ request, oap->oap_request);
+ if (oap->oap_interrupted) {
+ ptlrpc_req_finished(new_req);
+ RETURN(-EINTR);
+ }
+ }
+ }
+ /* New request takes over pga and oaps from old request.
+ * Note that copying a list_head doesn't work, need to move it... */
+ aa->aa_resends++;
+ new_req->rq_interpret_reply = request->rq_interpret_reply;
+ new_req->rq_async_args = request->rq_async_args;
+ /* cap resend delay to the current request timeout, this is similar to
+ * what ptlrpc does (see after_reply()) */
+ if (aa->aa_resends > new_req->rq_timeout)
+ new_req->rq_sent = cfs_time_current_sec() + new_req->rq_timeout;
+ else
+ new_req->rq_sent = cfs_time_current_sec() + aa->aa_resends;
+ new_req->rq_generation_set = 1;
+ new_req->rq_import_generation = request->rq_import_generation;
+
+ new_aa = ptlrpc_req_async_args(new_req);
+
+ INIT_LIST_HEAD(&new_aa->aa_oaps);
+ list_splice_init(&aa->aa_oaps, &new_aa->aa_oaps);
+ INIT_LIST_HEAD(&new_aa->aa_exts);
+ list_splice_init(&aa->aa_exts, &new_aa->aa_exts);
+ new_aa->aa_resends = aa->aa_resends;
+
+ list_for_each_entry(oap, &new_aa->aa_oaps, oap_rpc_item) {
+ if (oap->oap_request) {
+ ptlrpc_req_finished(oap->oap_request);
+ oap->oap_request = ptlrpc_request_addref(new_req);
+ }
+ }
+
+ new_aa->aa_ocapa = aa->aa_ocapa;
+ aa->aa_ocapa = NULL;
+
+ /* XXX: This code will run into problem if we're going to support
+ * to add a series of BRW RPCs into a self-defined ptlrpc_request_set
+ * and wait for all of them to be finished. We should inherit request
+ * set from old request. */
+ ptlrpcd_add_req(new_req, PDL_POLICY_SAME, -1);
+
+ DEBUG_REQ(D_INFO, new_req, "new request");
+ RETURN(0);
+}
+
+/*
+ * ugh, we want disk allocation on the target to happen in offset order. we'll
+ * follow sedgewicks advice and stick to the dead simple shellsort -- it'll do
+ * fine for our small page arrays and doesn't require allocation. its an
+ * insertion sort that swaps elements that are strides apart, shrinking the
+ * stride down until its '1' and the array is sorted.
+ */
+static void sort_brw_pages(struct brw_page **array, int num)
+{
+ int stride, i, j;
+ struct brw_page *tmp;
+
+ if (num == 1)
+ return;
+ for (stride = 1; stride < num ; stride = (stride * 3) + 1)
+ ;
+
+ do {
+ stride /= 3;
+ for (i = stride ; i < num ; i++) {
+ tmp = array[i];
+ j = i;
+ while (j >= stride && array[j - stride]->off > tmp->off) {
+ array[j] = array[j - stride];
+ j -= stride;
+ }
+ array[j] = tmp;
+ }
+ } while (stride > 1);
+}
+
+static obd_count max_unfragmented_pages(struct brw_page **pg, obd_count pages)
+{
+ int count = 1;
+ int offset;
+ int i = 0;
+
+ LASSERT (pages > 0);
+ offset = pg[i]->off & ~CFS_PAGE_MASK;
+
+ for (;;) {
+ pages--;
+ if (pages == 0) /* that's all */
+ return count;
+
+ if (offset + pg[i]->count < PAGE_CACHE_SIZE)
+ return count; /* doesn't end on page boundary */
+
+ i++;
+ offset = pg[i]->off & ~CFS_PAGE_MASK;
+ if (offset != 0) /* doesn't start on page boundary */
+ return count;
+
+ count++;
+ }
+}
+
+static struct brw_page **osc_build_ppga(struct brw_page *pga, obd_count count)
+{
+ struct brw_page **ppga;
+ int i;
+
+ OBD_ALLOC(ppga, sizeof(*ppga) * count);
+ if (ppga == NULL)
+ return NULL;
+
+ for (i = 0; i < count; i++)
+ ppga[i] = pga + i;
+ return ppga;
+}
+
+static void osc_release_ppga(struct brw_page **ppga, obd_count count)
+{
+ LASSERT(ppga != NULL);
+ OBD_FREE(ppga, sizeof(*ppga) * count);
+}
+
+static int osc_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo,
+ obd_count page_count, struct brw_page *pga,
+ struct obd_trans_info *oti)
+{
+ struct obdo *saved_oa = NULL;
+ struct brw_page **ppga, **orig;
+ struct obd_import *imp = class_exp2cliimp(exp);
+ struct client_obd *cli;
+ int rc, page_count_orig;
+ ENTRY;
+
+ LASSERT((imp != NULL) && (imp->imp_obd != NULL));
+ cli = &imp->imp_obd->u.cli;
+
+ if (cmd & OBD_BRW_CHECK) {
+ /* The caller just wants to know if there's a chance that this
+ * I/O can succeed */
+
+ if (imp->imp_invalid)
+ RETURN(-EIO);
+ RETURN(0);
+ }
+
+ /* test_brw with a failed create can trip this, maybe others. */
+ LASSERT(cli->cl_max_pages_per_rpc);
+
+ rc = 0;
+
+ orig = ppga = osc_build_ppga(pga, page_count);
+ if (ppga == NULL)
+ RETURN(-ENOMEM);
+ page_count_orig = page_count;
+
+ sort_brw_pages(ppga, page_count);
+ while (page_count) {
+ obd_count pages_per_brw;
+
+ if (page_count > cli->cl_max_pages_per_rpc)
+ pages_per_brw = cli->cl_max_pages_per_rpc;
+ else
+ pages_per_brw = page_count;
+
+ pages_per_brw = max_unfragmented_pages(ppga, pages_per_brw);
+
+ if (saved_oa != NULL) {
+ /* restore previously saved oa */
+ *oinfo->oi_oa = *saved_oa;
+ } else if (page_count > pages_per_brw) {
+ /* save a copy of oa (brw will clobber it) */
+ OBDO_ALLOC(saved_oa);
+ if (saved_oa == NULL)
+ GOTO(out, rc = -ENOMEM);
+ *saved_oa = *oinfo->oi_oa;
+ }
+
+ rc = osc_brw_internal(cmd, exp, oinfo->oi_oa, oinfo->oi_md,
+ pages_per_brw, ppga, oinfo->oi_capa);
+
+ if (rc != 0)
+ break;
+
+ page_count -= pages_per_brw;
+ ppga += pages_per_brw;
+ }
+
+out:
+ osc_release_ppga(orig, page_count_orig);
+
+ if (saved_oa != NULL)
+ OBDO_FREE(saved_oa);
+
+ RETURN(rc);
+}
+
+static int brw_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req, void *data, int rc)
+{
+ struct osc_brw_async_args *aa = data;
+ struct osc_extent *ext;
+ struct osc_extent *tmp;
+ struct cl_object *obj = NULL;
+ struct client_obd *cli = aa->aa_cli;
+ ENTRY;
+
+ rc = osc_brw_fini_request(req, rc);
+ CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc);
+ /* When server return -EINPROGRESS, client should always retry
+ * regardless of the number of times the bulk was resent already. */
+ if (osc_recoverable_error(rc)) {
+ if (req->rq_import_generation !=
+ req->rq_import->imp_generation) {
+ CDEBUG(D_HA, "%s: resend cross eviction for object: "
+ ""DOSTID", rc = %d.\n",
+ req->rq_import->imp_obd->obd_name,
+ POSTID(&aa->aa_oa->o_oi), rc);
+ } else if (rc == -EINPROGRESS ||
+ client_should_resend(aa->aa_resends, aa->aa_cli)) {
+ rc = osc_brw_redo_request(req, aa, rc);
+ } else {
+ CERROR("%s: too many resent retries for object: "
+ ""LPU64":"LPU64", rc = %d.\n",
+ req->rq_import->imp_obd->obd_name,
+ POSTID(&aa->aa_oa->o_oi), rc);
+ }
+
+ if (rc == 0)
+ RETURN(0);
+ else if (rc == -EAGAIN || rc == -EINPROGRESS)
+ rc = -EIO;
+ }
+
+ if (aa->aa_ocapa) {
+ capa_put(aa->aa_ocapa);
+ aa->aa_ocapa = NULL;
+ }
+
+ list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
+ if (obj == NULL && rc == 0) {
+ obj = osc2cl(ext->oe_obj);
+ cl_object_get(obj);
+ }
+
+ list_del_init(&ext->oe_link);
+ osc_extent_finish(env, ext, 1, rc);
+ }
+ LASSERT(list_empty(&aa->aa_exts));
+ LASSERT(list_empty(&aa->aa_oaps));
+
+ if (obj != NULL) {
+ struct obdo *oa = aa->aa_oa;
+ struct cl_attr *attr = &osc_env_info(env)->oti_attr;
+ unsigned long valid = 0;
+
+ LASSERT(rc == 0);
+ if (oa->o_valid & OBD_MD_FLBLOCKS) {
+ attr->cat_blocks = oa->o_blocks;
+ valid |= CAT_BLOCKS;
+ }
+ if (oa->o_valid & OBD_MD_FLMTIME) {
+ attr->cat_mtime = oa->o_mtime;
+ valid |= CAT_MTIME;
+ }
+ if (oa->o_valid & OBD_MD_FLATIME) {
+ attr->cat_atime = oa->o_atime;
+ valid |= CAT_ATIME;
+ }
+ if (oa->o_valid & OBD_MD_FLCTIME) {
+ attr->cat_ctime = oa->o_ctime;
+ valid |= CAT_CTIME;
+ }
+ if (valid != 0) {
+ cl_object_attr_lock(obj);
+ cl_object_attr_set(env, obj, attr, valid);
+ cl_object_attr_unlock(obj);
+ }
+ cl_object_put(env, obj);
+ }
+ OBDO_FREE(aa->aa_oa);
+
+ cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc :
+ req->rq_bulk->bd_nob_transferred);
+ osc_release_ppga(aa->aa_ppga, aa->aa_page_count);
+ ptlrpc_lprocfs_brw(req, req->rq_bulk->bd_nob_transferred);
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ /* We need to decrement before osc_ap_completion->osc_wake_cache_waiters
+ * is called so we know whether to go to sync BRWs or wait for more
+ * RPCs to complete */
+ if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE)
+ cli->cl_w_in_flight--;
+ else
+ cli->cl_r_in_flight--;
+ osc_wake_cache_waiters(cli);
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ osc_io_unplug(env, cli, NULL, PDL_POLICY_SAME);
+ RETURN(rc);
+}
+
+/**
+ * Build an RPC by the list of extent @ext_list. The caller must ensure
+ * that the total pages in this list are NOT over max pages per RPC.
+ * Extents in the list must be in OES_RPC state.
+ */
+int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
+ struct list_head *ext_list, int cmd, pdl_policy_t pol)
+{
+ struct ptlrpc_request *req = NULL;
+ struct osc_extent *ext;
+ struct brw_page **pga = NULL;
+ struct osc_brw_async_args *aa = NULL;
+ struct obdo *oa = NULL;
+ struct osc_async_page *oap;
+ struct osc_async_page *tmp;
+ struct cl_req *clerq = NULL;
+ enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE :
+ CRT_READ;
+ struct ldlm_lock *lock = NULL;
+ struct cl_req_attr *crattr = NULL;
+ obd_off starting_offset = OBD_OBJECT_EOF;
+ obd_off ending_offset = 0;
+ int mpflag = 0;
+ int mem_tight = 0;
+ int page_count = 0;
+ int i;
+ int rc;
+ LIST_HEAD(rpc_list);
+
+ ENTRY;
+ LASSERT(!list_empty(ext_list));
+
+ /* add pages into rpc_list to build BRW rpc */
+ list_for_each_entry(ext, ext_list, oe_link) {
+ LASSERT(ext->oe_state == OES_RPC);
+ mem_tight |= ext->oe_memalloc;
+ list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
+ ++page_count;
+ list_add_tail(&oap->oap_rpc_item, &rpc_list);
+ if (starting_offset > oap->oap_obj_off)
+ starting_offset = oap->oap_obj_off;
+ else
+ LASSERT(oap->oap_page_off == 0);
+ if (ending_offset < oap->oap_obj_off + oap->oap_count)
+ ending_offset = oap->oap_obj_off +
+ oap->oap_count;
+ else
+ LASSERT(oap->oap_page_off + oap->oap_count ==
+ PAGE_CACHE_SIZE);
+ }
+ }
+
+ if (mem_tight)
+ mpflag = cfs_memory_pressure_get_and_set();
+
+ OBD_ALLOC(crattr, sizeof(*crattr));
+ if (crattr == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ OBD_ALLOC(pga, sizeof(*pga) * page_count);
+ if (pga == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ OBDO_ALLOC(oa);
+ if (oa == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ i = 0;
+ list_for_each_entry(oap, &rpc_list, oap_rpc_item) {
+ struct cl_page *page = oap2cl_page(oap);
+ if (clerq == NULL) {
+ clerq = cl_req_alloc(env, page, crt,
+ 1 /* only 1-object rpcs for now */);
+ if (IS_ERR(clerq))
+ GOTO(out, rc = PTR_ERR(clerq));
+ lock = oap->oap_ldlm_lock;
+ }
+ if (mem_tight)
+ oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
+ pga[i] = &oap->oap_brw_page;
+ pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
+ CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
+ pga[i]->pg, page_index(oap->oap_page), oap,
+ pga[i]->flag);
+ i++;
+ cl_req_page_add(env, clerq, page);
+ }
+
+ /* always get the data for the obdo for the rpc */
+ LASSERT(clerq != NULL);
+ crattr->cra_oa = oa;
+ cl_req_attr_set(env, clerq, crattr, ~0ULL);
+ if (lock) {
+ oa->o_handle = lock->l_remote_handle;
+ oa->o_valid |= OBD_MD_FLHANDLE;
+ }
+
+ rc = cl_req_prep(env, clerq);
+ if (rc != 0) {
+ CERROR("cl_req_prep failed: %d\n", rc);
+ GOTO(out, rc);
+ }
+
+ sort_brw_pages(pga, page_count);
+ rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count,
+ pga, &req, crattr->cra_capa, 1, 0);
+ if (rc != 0) {
+ CERROR("prep_req failed: %d\n", rc);
+ GOTO(out, rc);
+ }
+
+ req->rq_interpret_reply = brw_interpret;
+
+ if (mem_tight != 0)
+ req->rq_memalloc = 1;
+
+ /* Need to update the timestamps after the request is built in case
+ * we race with setattr (locally or in queue at OST). If OST gets
+ * later setattr before earlier BRW (as determined by the request xid),
+ * the OST will not use BRW timestamps. Sadly, there is no obvious
+ * way to do this in a single call. bug 10150 */
+ cl_req_attr_set(env, clerq, crattr,
+ OBD_MD_FLMTIME|OBD_MD_FLCTIME|OBD_MD_FLATIME);
+
+ lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid);
+
+ CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+ aa = ptlrpc_req_async_args(req);
+ INIT_LIST_HEAD(&aa->aa_oaps);
+ list_splice_init(&rpc_list, &aa->aa_oaps);
+ INIT_LIST_HEAD(&aa->aa_exts);
+ list_splice_init(ext_list, &aa->aa_exts);
+ aa->aa_clerq = clerq;
+
+ /* queued sync pages can be torn down while the pages
+ * were between the pending list and the rpc */
+ tmp = NULL;
+ list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) {
+ /* only one oap gets a request reference */
+ if (tmp == NULL)
+ tmp = oap;
+ if (oap->oap_interrupted && !req->rq_intr) {
+ CDEBUG(D_INODE, "oap %p in req %p interrupted\n",
+ oap, req);
+ ptlrpc_mark_interrupted(req);
+ }
+ }
+ if (tmp != NULL)
+ tmp->oap_request = ptlrpc_request_addref(req);
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ starting_offset >>= PAGE_CACHE_SHIFT;
+ if (cmd == OBD_BRW_READ) {
+ cli->cl_r_in_flight++;
+ lprocfs_oh_tally_log2(&cli->cl_read_page_hist, page_count);
+ lprocfs_oh_tally(&cli->cl_read_rpc_hist, cli->cl_r_in_flight);
+ lprocfs_oh_tally_log2(&cli->cl_read_offset_hist,
+ starting_offset + 1);
+ } else {
+ cli->cl_w_in_flight++;
+ lprocfs_oh_tally_log2(&cli->cl_write_page_hist, page_count);
+ lprocfs_oh_tally(&cli->cl_write_rpc_hist, cli->cl_w_in_flight);
+ lprocfs_oh_tally_log2(&cli->cl_write_offset_hist,
+ starting_offset + 1);
+ }
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %dr/%dw in flight",
+ page_count, aa, cli->cl_r_in_flight,
+ cli->cl_w_in_flight);
+
+ /* XXX: Maybe the caller can check the RPC bulk descriptor to
+ * see which CPU/NUMA node the majority of pages were allocated
+ * on, and try to assign the async RPC to the CPU core
+ * (PDL_POLICY_PREFERRED) to reduce cross-CPU memory traffic.
+ *
+ * But on the other hand, we expect that multiple ptlrpcd
+ * threads and the initial write sponsor can run in parallel,
+ * especially when data checksum is enabled, which is CPU-bound
+ * operation and single ptlrpcd thread cannot process in time.
+ * So more ptlrpcd threads sharing BRW load
+ * (with PDL_POLICY_ROUND) seems better.
+ */
+ ptlrpcd_add_req(req, pol, -1);
+ rc = 0;
+ EXIT;
+
+out:
+ if (mem_tight != 0)
+ cfs_memory_pressure_restore(mpflag);
+
+ if (crattr != NULL) {
+ capa_put(crattr->cra_capa);
+ OBD_FREE(crattr, sizeof(*crattr));
+ }
+
+ if (rc != 0) {
+ LASSERT(req == NULL);
+
+ if (oa)
+ OBDO_FREE(oa);
+ if (pga)
+ OBD_FREE(pga, sizeof(*pga) * page_count);
+ /* this should happen rarely and is pretty bad, it makes the
+ * pending list not follow the dirty order */
+ while (!list_empty(ext_list)) {
+ ext = list_entry(ext_list->next, struct osc_extent,
+ oe_link);
+ list_del_init(&ext->oe_link);
+ osc_extent_finish(env, ext, 0, rc);
+ }
+ if (clerq && !IS_ERR(clerq))
+ cl_req_completion(env, clerq, rc);
+ }
+ RETURN(rc);
+}
+
+static int osc_set_lock_data_with_check(struct ldlm_lock *lock,
+ struct ldlm_enqueue_info *einfo)
+{
+ void *data = einfo->ei_cbdata;
+ int set = 0;
+
+ LASSERT(lock != NULL);
+ LASSERT(lock->l_blocking_ast == einfo->ei_cb_bl);
+ LASSERT(lock->l_resource->lr_type == einfo->ei_type);
+ LASSERT(lock->l_completion_ast == einfo->ei_cb_cp);
+ LASSERT(lock->l_glimpse_ast == einfo->ei_cb_gl);
+
+ lock_res_and_lock(lock);
+ spin_lock(&osc_ast_guard);
+
+ if (lock->l_ast_data == NULL)
+ lock->l_ast_data = data;
+ if (lock->l_ast_data == data)
+ set = 1;
+
+ spin_unlock(&osc_ast_guard);
+ unlock_res_and_lock(lock);
+
+ return set;
+}
+
+static int osc_set_data_with_check(struct lustre_handle *lockh,
+ struct ldlm_enqueue_info *einfo)
+{
+ struct ldlm_lock *lock = ldlm_handle2lock(lockh);
+ int set = 0;
+
+ if (lock != NULL) {
+ set = osc_set_lock_data_with_check(lock, einfo);
+ LDLM_LOCK_PUT(lock);
+ } else
+ CERROR("lockh %p, data %p - client evicted?\n",
+ lockh, einfo->ei_cbdata);
+ return set;
+}
+
+static int osc_change_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm,
+ ldlm_iterator_t replace, void *data)
+{
+ struct ldlm_res_id res_id;
+ struct obd_device *obd = class_exp2obd(exp);
+
+ ostid_build_res_name(&lsm->lsm_oi, &res_id);
+ ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data);
+ return 0;
+}
+
+/* find any ldlm lock of the inode in osc
+ * return 0 not find
+ * 1 find one
+ * < 0 error */
+static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm,
+ ldlm_iterator_t replace, void *data)
+{
+ struct ldlm_res_id res_id;
+ struct obd_device *obd = class_exp2obd(exp);
+ int rc = 0;
+
+ ostid_build_res_name(&lsm->lsm_oi, &res_id);
+ rc = ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data);
+ if (rc == LDLM_ITER_STOP)
+ return(1);
+ if (rc == LDLM_ITER_CONTINUE)
+ return(0);
+ return(rc);
+}
+
+static int osc_enqueue_fini(struct ptlrpc_request *req, struct ost_lvb *lvb,
+ obd_enqueue_update_f upcall, void *cookie,
+ __u64 *flags, int agl, int rc)
+{
+ int intent = *flags & LDLM_FL_HAS_INTENT;
+ ENTRY;
+
+ if (intent) {
+ /* The request was created before ldlm_cli_enqueue call. */
+ if (rc == ELDLM_LOCK_ABORTED) {
+ struct ldlm_reply *rep;
+ rep = req_capsule_server_get(&req->rq_pill,
+ &RMF_DLM_REP);
+
+ LASSERT(rep != NULL);
+ if (rep->lock_policy_res1)
+ rc = rep->lock_policy_res1;
+ }
+ }
+
+ if ((intent != 0 && rc == ELDLM_LOCK_ABORTED && agl == 0) ||
+ (rc == 0)) {
+ *flags |= LDLM_FL_LVB_READY;
+ CDEBUG(D_INODE,"got kms "LPU64" blocks "LPU64" mtime "LPU64"\n",
+ lvb->lvb_size, lvb->lvb_blocks, lvb->lvb_mtime);
+ }
+
+ /* Call the update callback. */
+ rc = (*upcall)(cookie, rc);
+ RETURN(rc);
+}
+
+static int osc_enqueue_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req,
+ struct osc_enqueue_args *aa, int rc)
+{
+ struct ldlm_lock *lock;
+ struct lustre_handle handle;
+ __u32 mode;
+ struct ost_lvb *lvb;
+ __u32 lvb_len;
+ __u64 *flags = aa->oa_flags;
+
+ /* Make a local copy of a lock handle and a mode, because aa->oa_*
+ * might be freed anytime after lock upcall has been called. */
+ lustre_handle_copy(&handle, aa->oa_lockh);
+ mode = aa->oa_ei->ei_mode;
+
+ /* ldlm_cli_enqueue is holding a reference on the lock, so it must
+ * be valid. */
+ lock = ldlm_handle2lock(&handle);
+
+ /* Take an additional reference so that a blocking AST that
+ * ldlm_cli_enqueue_fini() might post for a failed lock, is guaranteed
+ * to arrive after an upcall has been executed by
+ * osc_enqueue_fini(). */
+ ldlm_lock_addref(&handle, mode);
+
+ /* Let CP AST to grant the lock first. */
+ OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1);
+
+ if (aa->oa_agl && rc == ELDLM_LOCK_ABORTED) {
+ lvb = NULL;
+ lvb_len = 0;
+ } else {
+ lvb = aa->oa_lvb;
+ lvb_len = sizeof(*aa->oa_lvb);
+ }
+
+ /* Complete obtaining the lock procedure. */
+ rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_ei->ei_type, 1,
+ mode, flags, lvb, lvb_len, &handle, rc);
+ /* Complete osc stuff. */
+ rc = osc_enqueue_fini(req, aa->oa_lvb, aa->oa_upcall, aa->oa_cookie,
+ flags, aa->oa_agl, rc);
+
+ OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_CANCEL_RACE, 10);
+
+ /* Release the lock for async request. */
+ if (lustre_handle_is_used(&handle) && rc == ELDLM_OK)
+ /*
+ * Releases a reference taken by ldlm_cli_enqueue(), if it is
+ * not already released by
+ * ldlm_cli_enqueue_fini()->failed_lock_cleanup()
+ */
+ ldlm_lock_decref(&handle, mode);
+
+ LASSERTF(lock != NULL, "lockh %p, req %p, aa %p - client evicted?\n",
+ aa->oa_lockh, req, aa);
+ ldlm_lock_decref(&handle, mode);
+ LDLM_LOCK_PUT(lock);
+ return rc;
+}
+
+void osc_update_enqueue(struct lustre_handle *lov_lockhp,
+ struct lov_oinfo *loi, int flags,
+ struct ost_lvb *lvb, __u32 mode, int rc)
+{
+ struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp);
+
+ if (rc == ELDLM_OK) {
+ __u64 tmp;
+
+ LASSERT(lock != NULL);
+ loi->loi_lvb = *lvb;
+ tmp = loi->loi_lvb.lvb_size;
+ /* Extend KMS up to the end of this lock and no further
+ * A lock on [x,y] means a KMS of up to y + 1 bytes! */
+ if (tmp > lock->l_policy_data.l_extent.end)
+ tmp = lock->l_policy_data.l_extent.end + 1;
+ if (tmp >= loi->loi_kms) {
+ LDLM_DEBUG(lock, "lock acquired, setting rss="LPU64
+ ", kms="LPU64, loi->loi_lvb.lvb_size, tmp);
+ loi_kms_set(loi, tmp);
+ } else {
+ LDLM_DEBUG(lock, "lock acquired, setting rss="
+ LPU64"; leaving kms="LPU64", end="LPU64,
+ loi->loi_lvb.lvb_size, loi->loi_kms,
+ lock->l_policy_data.l_extent.end);
+ }
+ ldlm_lock_allow_match(lock);
+ } else if (rc == ELDLM_LOCK_ABORTED && (flags & LDLM_FL_HAS_INTENT)) {
+ LASSERT(lock != NULL);
+ loi->loi_lvb = *lvb;
+ ldlm_lock_allow_match(lock);
+ CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving"
+ " kms="LPU64"\n", loi->loi_lvb.lvb_size, loi->loi_kms);
+ rc = ELDLM_OK;
+ }
+
+ if (lock != NULL) {
+ if (rc != ELDLM_OK)
+ ldlm_lock_fail_match(lock);
+
+ LDLM_LOCK_PUT(lock);
+ }
+}
+EXPORT_SYMBOL(osc_update_enqueue);
+
+struct ptlrpc_request_set *PTLRPCD_SET = (void *)1;
+
+/* When enqueuing asynchronously, locks are not ordered, we can obtain a lock
+ * from the 2nd OSC before a lock from the 1st one. This does not deadlock with
+ * other synchronous requests, however keeping some locks and trying to obtain
+ * others may take a considerable amount of time in a case of ost failure; and
+ * when other sync requests do not get released lock from a client, the client
+ * is excluded from the cluster -- such scenarious make the life difficult, so
+ * release locks just after they are obtained. */
+int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
+ __u64 *flags, ldlm_policy_data_t *policy,
+ struct ost_lvb *lvb, int kms_valid,
+ obd_enqueue_update_f upcall, void *cookie,
+ struct ldlm_enqueue_info *einfo,
+ struct lustre_handle *lockh,
+ struct ptlrpc_request_set *rqset, int async, int agl)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct ptlrpc_request *req = NULL;
+ int intent = *flags & LDLM_FL_HAS_INTENT;
+ int match_lvb = (agl != 0 ? 0 : LDLM_FL_LVB_READY);
+ ldlm_mode_t mode;
+ int rc;
+ ENTRY;
+
+ /* Filesystem lock extents are extended to page boundaries so that
+ * dealing with the page cache is a little smoother. */
+ policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK;
+ policy->l_extent.end |= ~CFS_PAGE_MASK;
+
+ /*
+ * kms is not valid when either object is completely fresh (so that no
+ * locks are cached), or object was evicted. In the latter case cached
+ * lock cannot be used, because it would prime inode state with
+ * potentially stale LVB.
+ */
+ if (!kms_valid)
+ goto no_match;
+
+ /* Next, search for already existing extent locks that will cover us */
+ /* If we're trying to read, we also search for an existing PW lock. The
+ * VFS and page cache already protect us locally, so lots of readers/
+ * writers can share a single PW lock.
+ *
+ * There are problems with conversion deadlocks, so instead of
+ * converting a read lock to a write lock, we'll just enqueue a new
+ * one.
+ *
+ * At some point we should cancel the read lock instead of making them
+ * send us a blocking callback, but there are problems with canceling
+ * locks out from other users right now, too. */
+ mode = einfo->ei_mode;
+ if (einfo->ei_mode == LCK_PR)
+ mode |= LCK_PW;
+ mode = ldlm_lock_match(obd->obd_namespace, *flags | match_lvb, res_id,
+ einfo->ei_type, policy, mode, lockh, 0);
+ if (mode) {
+ struct ldlm_lock *matched = ldlm_handle2lock(lockh);
+
+ if ((agl != 0) && !(matched->l_flags & LDLM_FL_LVB_READY)) {
+ /* For AGL, if enqueue RPC is sent but the lock is not
+ * granted, then skip to process this strpe.
+ * Return -ECANCELED to tell the caller. */
+ ldlm_lock_decref(lockh, mode);
+ LDLM_LOCK_PUT(matched);
+ RETURN(-ECANCELED);
+ } else if (osc_set_lock_data_with_check(matched, einfo)) {
+ *flags |= LDLM_FL_LVB_READY;
+ /* addref the lock only if not async requests and PW
+ * lock is matched whereas we asked for PR. */
+ if (!rqset && einfo->ei_mode != mode)
+ ldlm_lock_addref(lockh, LCK_PR);
+ if (intent) {
+ /* I would like to be able to ASSERT here that
+ * rss <= kms, but I can't, for reasons which
+ * are explained in lov_enqueue() */
+ }
+
+ /* We already have a lock, and it's referenced.
+ *
+ * At this point, the cl_lock::cll_state is CLS_QUEUING,
+ * AGL upcall may change it to CLS_HELD directly. */
+ (*upcall)(cookie, ELDLM_OK);
+
+ if (einfo->ei_mode != mode)
+ ldlm_lock_decref(lockh, LCK_PW);
+ else if (rqset)
+ /* For async requests, decref the lock. */
+ ldlm_lock_decref(lockh, einfo->ei_mode);
+ LDLM_LOCK_PUT(matched);
+ RETURN(ELDLM_OK);
+ } else {
+ ldlm_lock_decref(lockh, mode);
+ LDLM_LOCK_PUT(matched);
+ }
+ }
+
+ no_match:
+ if (intent) {
+ LIST_HEAD(cancels);
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_LDLM_ENQUEUE_LVB);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ rc = ldlm_prep_enqueue_req(exp, req, &cancels, 0);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
+ sizeof *lvb);
+ ptlrpc_request_set_replen(req);
+ }
+
+ /* users of osc_enqueue() can pass this flag for ldlm_lock_match() */
+ *flags &= ~LDLM_FL_BLOCK_GRANTED;
+
+ rc = ldlm_cli_enqueue(exp, &req, einfo, res_id, policy, flags, lvb,
+ sizeof(*lvb), LVB_T_OST, lockh, async);
+ if (rqset) {
+ if (!rc) {
+ struct osc_enqueue_args *aa;
+ CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args));
+ aa = ptlrpc_req_async_args(req);
+ aa->oa_ei = einfo;
+ aa->oa_exp = exp;
+ aa->oa_flags = flags;
+ aa->oa_upcall = upcall;
+ aa->oa_cookie = cookie;
+ aa->oa_lvb = lvb;
+ aa->oa_lockh = lockh;
+ aa->oa_agl = !!agl;
+
+ req->rq_interpret_reply =
+ (ptlrpc_interpterer_t)osc_enqueue_interpret;
+ if (rqset == PTLRPCD_SET)
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ else
+ ptlrpc_set_add_req(rqset, req);
+ } else if (intent) {
+ ptlrpc_req_finished(req);
+ }
+ RETURN(rc);
+ }
+
+ rc = osc_enqueue_fini(req, lvb, upcall, cookie, flags, agl, rc);
+ if (intent)
+ ptlrpc_req_finished(req);
+
+ RETURN(rc);
+}
+
+static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo,
+ struct ldlm_enqueue_info *einfo,
+ struct ptlrpc_request_set *rqset)
+{
+ struct ldlm_res_id res_id;
+ int rc;
+ ENTRY;
+
+ ostid_build_res_name(&oinfo->oi_md->lsm_oi, &res_id);
+ rc = osc_enqueue_base(exp, &res_id, &oinfo->oi_flags, &oinfo->oi_policy,
+ &oinfo->oi_md->lsm_oinfo[0]->loi_lvb,
+ oinfo->oi_md->lsm_oinfo[0]->loi_kms_valid,
+ oinfo->oi_cb_up, oinfo, einfo, oinfo->oi_lockh,
+ rqset, rqset != NULL, 0);
+ RETURN(rc);
+}
+
+int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
+ __u32 type, ldlm_policy_data_t *policy, __u32 mode,
+ int *flags, void *data, struct lustre_handle *lockh,
+ int unref)
+{
+ struct obd_device *obd = exp->exp_obd;
+ int lflags = *flags;
+ ldlm_mode_t rc;
+ ENTRY;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_MATCH))
+ RETURN(-EIO);
+
+ /* Filesystem lock extents are extended to page boundaries so that
+ * dealing with the page cache is a little smoother */
+ policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK;
+ policy->l_extent.end |= ~CFS_PAGE_MASK;
+
+ /* Next, search for already existing extent locks that will cover us */
+ /* If we're trying to read, we also search for an existing PW lock. The
+ * VFS and page cache already protect us locally, so lots of readers/
+ * writers can share a single PW lock. */
+ rc = mode;
+ if (mode == LCK_PR)
+ rc |= LCK_PW;
+ rc = ldlm_lock_match(obd->obd_namespace, lflags,
+ res_id, type, policy, rc, lockh, unref);
+ if (rc) {
+ if (data != NULL) {
+ if (!osc_set_data_with_check(lockh, data)) {
+ if (!(lflags & LDLM_FL_TEST_LOCK))
+ ldlm_lock_decref(lockh, rc);
+ RETURN(0);
+ }
+ }
+ if (!(lflags & LDLM_FL_TEST_LOCK) && mode != rc) {
+ ldlm_lock_addref(lockh, LCK_PR);
+ ldlm_lock_decref(lockh, LCK_PW);
+ }
+ RETURN(rc);
+ }
+ RETURN(rc);
+}
+
+int osc_cancel_base(struct lustre_handle *lockh, __u32 mode)
+{
+ ENTRY;
+
+ if (unlikely(mode == LCK_GROUP))
+ ldlm_lock_decref_and_cancel(lockh, mode);
+ else
+ ldlm_lock_decref(lockh, mode);
+
+ RETURN(0);
+}
+
+static int osc_cancel(struct obd_export *exp, struct lov_stripe_md *md,
+ __u32 mode, struct lustre_handle *lockh)
+{
+ ENTRY;
+ RETURN(osc_cancel_base(lockh, mode));
+}
+
+static int osc_cancel_unused(struct obd_export *exp,
+ struct lov_stripe_md *lsm,
+ ldlm_cancel_flags_t flags,
+ void *opaque)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct ldlm_res_id res_id, *resp = NULL;
+
+ if (lsm != NULL) {
+ ostid_build_res_name(&lsm->lsm_oi, &res_id);
+ resp = &res_id;
+ }
+
+ return ldlm_cli_cancel_unused(obd->obd_namespace, resp, flags, opaque);
+}
+
+static int osc_statfs_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req,
+ struct osc_async_args *aa, int rc)
+{
+ struct obd_statfs *msfs;
+ ENTRY;
+
+ if (rc == -EBADR)
+ /* The request has in fact never been sent
+ * due to issues at a higher level (LOV).
+ * Exit immediately since the caller is
+ * aware of the problem and takes care
+ * of the clean up */
+ RETURN(rc);
+
+ if ((rc == -ENOTCONN || rc == -EAGAIN) &&
+ (aa->aa_oi->oi_flags & OBD_STATFS_NODELAY))
+ GOTO(out, rc = 0);
+
+ if (rc != 0)
+ GOTO(out, rc);
+
+ msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS);
+ if (msfs == NULL) {
+ GOTO(out, rc = -EPROTO);
+ }
+
+ *aa->aa_oi->oi_osfs = *msfs;
+out:
+ rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc);
+ RETURN(rc);
+}
+
+static int osc_statfs_async(struct obd_export *exp,
+ struct obd_info *oinfo, __u64 max_age,
+ struct ptlrpc_request_set *rqset)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct ptlrpc_request *req;
+ struct osc_async_args *aa;
+ int rc;
+ ENTRY;
+
+ /* We could possibly pass max_age in the request (as an absolute
+ * timestamp or a "seconds.usec ago") so the target can avoid doing
+ * extra calls into the filesystem if that isn't necessary (e.g.
+ * during mount that would help a bit). Having relative timestamps
+ * is not so great if request processing is slow, while absolute
+ * timestamps are not ideal because they need time synchronization. */
+ req = ptlrpc_request_alloc(obd->u.cli.cl_import, &RQF_OST_STATFS);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+ ptlrpc_request_set_replen(req);
+ req->rq_request_portal = OST_CREATE_PORTAL;
+ ptlrpc_at_set_req_timeout(req);
+
+ if (oinfo->oi_flags & OBD_STATFS_NODELAY) {
+ /* procfs requests not want stat in wait for avoid deadlock */
+ req->rq_no_resend = 1;
+ req->rq_no_delay = 1;
+ }
+
+ req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_statfs_interpret;
+ CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args));
+ aa = ptlrpc_req_async_args(req);
+ aa->aa_oi = oinfo;
+
+ ptlrpc_set_add_req(rqset, req);
+ RETURN(0);
+}
+
+static int osc_statfs(const struct lu_env *env, struct obd_export *exp,
+ struct obd_statfs *osfs, __u64 max_age, __u32 flags)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct obd_statfs *msfs;
+ struct ptlrpc_request *req;
+ struct obd_import *imp = NULL;
+ int rc;
+ ENTRY;
+
+ /*Since the request might also come from lprocfs, so we need
+ *sync this with client_disconnect_export Bug15684*/
+ down_read(&obd->u.cli.cl_sem);
+ if (obd->u.cli.cl_import)
+ imp = class_import_get(obd->u.cli.cl_import);
+ up_read(&obd->u.cli.cl_sem);
+ if (!imp)
+ RETURN(-ENODEV);
+
+ /* We could possibly pass max_age in the request (as an absolute
+ * timestamp or a "seconds.usec ago") so the target can avoid doing
+ * extra calls into the filesystem if that isn't necessary (e.g.
+ * during mount that would help a bit). Having relative timestamps
+ * is not so great if request processing is slow, while absolute
+ * timestamps are not ideal because they need time synchronization. */
+ req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS);
+
+ class_import_put(imp);
+
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+ ptlrpc_request_set_replen(req);
+ req->rq_request_portal = OST_CREATE_PORTAL;
+ ptlrpc_at_set_req_timeout(req);
+
+ if (flags & OBD_STATFS_NODELAY) {
+ /* procfs requests not want stat in wait for avoid deadlock */
+ req->rq_no_resend = 1;
+ req->rq_no_delay = 1;
+ }
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+
+ msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS);
+ if (msfs == NULL) {
+ GOTO(out, rc = -EPROTO);
+ }
+
+ *osfs = *msfs;
+
+ EXIT;
+ out:
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+/* Retrieve object striping information.
+ *
+ * @lmmu is a pointer to an in-core struct with lmm_ost_count indicating
+ * the maximum number of OST indices which will fit in the user buffer.
+ * lmm_magic must be LOV_MAGIC (we only use 1 slot here).
+ */
+static int osc_getstripe(struct lov_stripe_md *lsm, struct lov_user_md *lump)
+{
+ /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
+ struct lov_user_md_v3 lum, *lumk;
+ struct lov_user_ost_data_v1 *lmm_objects;
+ int rc = 0, lum_size;
+ ENTRY;
+
+ if (!lsm)
+ RETURN(-ENODATA);
+
+ /* we only need the header part from user space to get lmm_magic and
+ * lmm_stripe_count, (the header part is common to v1 and v3) */
+ lum_size = sizeof(struct lov_user_md_v1);
+ if (copy_from_user(&lum, lump, lum_size))
+ RETURN(-EFAULT);
+
+ if ((lum.lmm_magic != LOV_USER_MAGIC_V1) &&
+ (lum.lmm_magic != LOV_USER_MAGIC_V3))
+ RETURN(-EINVAL);
+
+ /* lov_user_md_vX and lov_mds_md_vX must have the same size */
+ LASSERT(sizeof(struct lov_user_md_v1) == sizeof(struct lov_mds_md_v1));
+ LASSERT(sizeof(struct lov_user_md_v3) == sizeof(struct lov_mds_md_v3));
+ LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lumk->lmm_objects[0]));
+
+ /* we can use lov_mds_md_size() to compute lum_size
+ * because lov_user_md_vX and lov_mds_md_vX have the same size */
+ if (lum.lmm_stripe_count > 0) {
+ lum_size = lov_mds_md_size(lum.lmm_stripe_count, lum.lmm_magic);
+ OBD_ALLOC(lumk, lum_size);
+ if (!lumk)
+ RETURN(-ENOMEM);
+
+ if (lum.lmm_magic == LOV_USER_MAGIC_V1)
+ lmm_objects =
+ &(((struct lov_user_md_v1 *)lumk)->lmm_objects[0]);
+ else
+ lmm_objects = &(lumk->lmm_objects[0]);
+ lmm_objects->l_ost_oi = lsm->lsm_oi;
+ } else {
+ lum_size = lov_mds_md_size(0, lum.lmm_magic);
+ lumk = &lum;
+ }
+
+ lumk->lmm_oi = lsm->lsm_oi;
+ lumk->lmm_stripe_count = 1;
+
+ if (copy_to_user(lump, lumk, lum_size))
+ rc = -EFAULT;
+
+ if (lumk != &lum)
+ OBD_FREE(lumk, lum_size);
+
+ RETURN(rc);
+}
+
+
+static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
+ void *karg, void *uarg)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct obd_ioctl_data *data = karg;
+ int err = 0;
+ ENTRY;
+
+ if (!try_module_get(THIS_MODULE)) {
+ CERROR("Can't get module. Is it alive?");
+ return -EINVAL;
+ }
+ switch (cmd) {
+ case OBD_IOC_LOV_GET_CONFIG: {
+ char *buf;
+ struct lov_desc *desc;
+ struct obd_uuid uuid;
+
+ buf = NULL;
+ len = 0;
+ if (obd_ioctl_getdata(&buf, &len, (void *)uarg))
+ GOTO(out, err = -EINVAL);
+
+ data = (struct obd_ioctl_data *)buf;
+
+ if (sizeof(*desc) > data->ioc_inllen1) {
+ obd_ioctl_freedata(buf, len);
+ GOTO(out, err = -EINVAL);
+ }
+
+ if (data->ioc_inllen2 < sizeof(uuid)) {
+ obd_ioctl_freedata(buf, len);
+ GOTO(out, err = -EINVAL);
+ }
+
+ desc = (struct lov_desc *)data->ioc_inlbuf1;
+ desc->ld_tgt_count = 1;
+ desc->ld_active_tgt_count = 1;
+ desc->ld_default_stripe_count = 1;
+ desc->ld_default_stripe_size = 0;
+ desc->ld_default_stripe_offset = 0;
+ desc->ld_pattern = 0;
+ memcpy(&desc->ld_uuid, &obd->obd_uuid, sizeof(uuid));
+
+ memcpy(data->ioc_inlbuf2, &obd->obd_uuid, sizeof(uuid));
+
+ err = copy_to_user((void *)uarg, buf, len);
+ if (err)
+ err = -EFAULT;
+ obd_ioctl_freedata(buf, len);
+ GOTO(out, err);
+ }
+ case LL_IOC_LOV_SETSTRIPE:
+ err = obd_alloc_memmd(exp, karg);
+ if (err > 0)
+ err = 0;
+ GOTO(out, err);
+ case LL_IOC_LOV_GETSTRIPE:
+ err = osc_getstripe(karg, uarg);
+ GOTO(out, err);
+ case OBD_IOC_CLIENT_RECOVER:
+ err = ptlrpc_recover_import(obd->u.cli.cl_import,
+ data->ioc_inlbuf1, 0);
+ if (err > 0)
+ err = 0;
+ GOTO(out, err);
+ case IOC_OSC_SET_ACTIVE:
+ err = ptlrpc_set_import_active(obd->u.cli.cl_import,
+ data->ioc_offset);
+ GOTO(out, err);
+ case OBD_IOC_POLL_QUOTACHECK:
+ err = osc_quota_poll_check(exp, (struct if_quotacheck *)karg);
+ GOTO(out, err);
+ case OBD_IOC_PING_TARGET:
+ err = ptlrpc_obd_ping(obd);
+ GOTO(out, err);
+ default:
+ CDEBUG(D_INODE, "unrecognised ioctl %#x by %s\n",
+ cmd, current_comm());
+ GOTO(out, err = -ENOTTY);
+ }
+out:
+ module_put(THIS_MODULE);
+ return err;
+}
+
+static int osc_get_info(const struct lu_env *env, struct obd_export *exp,
+ obd_count keylen, void *key, __u32 *vallen, void *val,
+ struct lov_stripe_md *lsm)
+{
+ ENTRY;
+ if (!vallen || !val)
+ RETURN(-EFAULT);
+
+ if (KEY_IS(KEY_LOCK_TO_STRIPE)) {
+ __u32 *stripe = val;
+ *vallen = sizeof(*stripe);
+ *stripe = 0;
+ RETURN(0);
+ } else if (KEY_IS(KEY_LAST_ID)) {
+ struct ptlrpc_request *req;
+ obd_id *reply;
+ char *tmp;
+ int rc;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_OST_GET_INFO_LAST_ID);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY,
+ RCL_CLIENT, keylen);
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
+ memcpy(tmp, key, keylen);
+
+ req->rq_no_delay = req->rq_no_resend = 1;
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+
+ reply = req_capsule_server_get(&req->rq_pill, &RMF_OBD_ID);
+ if (reply == NULL)
+ GOTO(out, rc = -EPROTO);
+
+ *((obd_id *)val) = *reply;
+ out:
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+ } else if (KEY_IS(KEY_FIEMAP)) {
+ struct ll_fiemap_info_key *fm_key =
+ (struct ll_fiemap_info_key *)key;
+ struct ldlm_res_id res_id;
+ ldlm_policy_data_t policy;
+ struct lustre_handle lockh;
+ ldlm_mode_t mode = 0;
+ struct ptlrpc_request *req;
+ struct ll_user_fiemap *reply;
+ char *tmp;
+ int rc;
+
+ if (!(fm_key->fiemap.fm_flags & FIEMAP_FLAG_SYNC))
+ goto skip_locking;
+
+ policy.l_extent.start = fm_key->fiemap.fm_start &
+ CFS_PAGE_MASK;
+
+ if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <=
+ fm_key->fiemap.fm_start + PAGE_CACHE_SIZE - 1)
+ policy.l_extent.end = OBD_OBJECT_EOF;
+ else
+ policy.l_extent.end = (fm_key->fiemap.fm_start +
+ fm_key->fiemap.fm_length +
+ PAGE_CACHE_SIZE - 1) & CFS_PAGE_MASK;
+
+ ostid_build_res_name(&fm_key->oa.o_oi, &res_id);
+ mode = ldlm_lock_match(exp->exp_obd->obd_namespace,
+ LDLM_FL_BLOCK_GRANTED |
+ LDLM_FL_LVB_READY,
+ &res_id, LDLM_EXTENT, &policy,
+ LCK_PR | LCK_PW, &lockh, 0);
+ if (mode) { /* lock is cached on client */
+ if (mode != LCK_PR) {
+ ldlm_lock_addref(&lockh, LCK_PR);
+ ldlm_lock_decref(&lockh, LCK_PW);
+ }
+ } else { /* no cached lock, needs acquire lock on server side */
+ fm_key->oa.o_valid |= OBD_MD_FLFLAGS;
+ fm_key->oa.o_flags |= OBD_FL_SRVLOCK;
+ }
+
+skip_locking:
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_OST_GET_INFO_FIEMAP);
+ if (req == NULL)
+ GOTO(drop_lock, rc = -ENOMEM);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_KEY,
+ RCL_CLIENT, keylen);
+ req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL,
+ RCL_CLIENT, *vallen);
+ req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL,
+ RCL_SERVER, *vallen);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO);
+ if (rc) {
+ ptlrpc_request_free(req);
+ GOTO(drop_lock, rc);
+ }
+
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_KEY);
+ memcpy(tmp, key, keylen);
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_VAL);
+ memcpy(tmp, val, *vallen);
+
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(fini_req, rc);
+
+ reply = req_capsule_server_get(&req->rq_pill, &RMF_FIEMAP_VAL);
+ if (reply == NULL)
+ GOTO(fini_req, rc = -EPROTO);
+
+ memcpy(val, reply, *vallen);
+fini_req:
+ ptlrpc_req_finished(req);
+drop_lock:
+ if (mode)
+ ldlm_lock_decref(&lockh, LCK_PR);
+ RETURN(rc);
+ }
+
+ RETURN(-EINVAL);
+}
+
+static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
+ obd_count keylen, void *key, obd_count vallen,
+ void *val, struct ptlrpc_request_set *set)
+{
+ struct ptlrpc_request *req;
+ struct obd_device *obd = exp->exp_obd;
+ struct obd_import *imp = class_exp2cliimp(exp);
+ char *tmp;
+ int rc;
+ ENTRY;
+
+ OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_SHUTDOWN, 10);
+
+ if (KEY_IS(KEY_CHECKSUM)) {
+ if (vallen != sizeof(int))
+ RETURN(-EINVAL);
+ exp->exp_obd->u.cli.cl_checksum = (*(int *)val) ? 1 : 0;
+ RETURN(0);
+ }
+
+ if (KEY_IS(KEY_SPTLRPC_CONF)) {
+ sptlrpc_conf_client_adapt(obd);
+ RETURN(0);
+ }
+
+ if (KEY_IS(KEY_FLUSH_CTX)) {
+ sptlrpc_import_flush_my_ctx(imp);
+ RETURN(0);
+ }
+
+ if (KEY_IS(KEY_CACHE_SET)) {
+ struct client_obd *cli = &obd->u.cli;
+
+ LASSERT(cli->cl_cache == NULL); /* only once */
+ cli->cl_cache = (struct cl_client_cache *)val;
+ atomic_inc(&cli->cl_cache->ccc_users);
+ cli->cl_lru_left = &cli->cl_cache->ccc_lru_left;
+
+ /* add this osc into entity list */
+ LASSERT(list_empty(&cli->cl_lru_osc));
+ spin_lock(&cli->cl_cache->ccc_lru_lock);
+ list_add(&cli->cl_lru_osc, &cli->cl_cache->ccc_lru);
+ spin_unlock(&cli->cl_cache->ccc_lru_lock);
+
+ RETURN(0);
+ }
+
+ if (KEY_IS(KEY_CACHE_LRU_SHRINK)) {
+ struct client_obd *cli = &obd->u.cli;
+ int nr = atomic_read(&cli->cl_lru_in_list) >> 1;
+ int target = *(int *)val;
+
+ nr = osc_lru_shrink(cli, min(nr, target));
+ *(int *)val -= nr;
+ RETURN(0);
+ }
+
+ if (!set && !KEY_IS(KEY_GRANT_SHRINK))
+ RETURN(-EINVAL);
+
+ /* We pass all other commands directly to OST. Since nobody calls osc
+ methods directly and everybody is supposed to go through LOV, we
+ assume lov checked invalid values for us.
+ The only recognised values so far are evict_by_nid and mds_conn.
+ Even if something bad goes through, we'd get a -EINVAL from OST
+ anyway. */
+
+ req = ptlrpc_request_alloc(imp, KEY_IS(KEY_GRANT_SHRINK) ?
+ &RQF_OST_SET_GRANT_INFO :
+ &RQF_OBD_SET_INFO);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY,
+ RCL_CLIENT, keylen);
+ if (!KEY_IS(KEY_GRANT_SHRINK))
+ req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_VAL,
+ RCL_CLIENT, vallen);
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SET_INFO);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
+ memcpy(tmp, key, keylen);
+ tmp = req_capsule_client_get(&req->rq_pill, KEY_IS(KEY_GRANT_SHRINK) ?
+ &RMF_OST_BODY :
+ &RMF_SETINFO_VAL);
+ memcpy(tmp, val, vallen);
+
+ if (KEY_IS(KEY_GRANT_SHRINK)) {
+ struct osc_grant_args *aa;
+ struct obdo *oa;
+
+ CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+ aa = ptlrpc_req_async_args(req);
+ OBDO_ALLOC(oa);
+ if (!oa) {
+ ptlrpc_req_finished(req);
+ RETURN(-ENOMEM);
+ }
+ *oa = ((struct ost_body *)val)->oa;
+ aa->aa_oa = oa;
+ req->rq_interpret_reply = osc_shrink_grant_interpret;
+ }
+
+ ptlrpc_request_set_replen(req);
+ if (!KEY_IS(KEY_GRANT_SHRINK)) {
+ LASSERT(set != NULL);
+ ptlrpc_set_add_req(set, req);
+ ptlrpc_check_set(NULL, set);
+ } else
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+
+ RETURN(0);
+}
+
+
+static int osc_llog_init(struct obd_device *obd, struct obd_llog_group *olg,
+ struct obd_device *disk_obd, int *index)
+{
+ /* this code is not supposed to be used with LOD/OSP
+ * to be removed soon */
+ LBUG();
+ return 0;
+}
+
+static int osc_llog_finish(struct obd_device *obd, int count)
+{
+ struct llog_ctxt *ctxt;
+
+ ENTRY;
+
+ ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT);
+ if (ctxt) {
+ llog_cat_close(NULL, ctxt->loc_handle);
+ llog_cleanup(NULL, ctxt);
+ }
+
+ ctxt = llog_get_context(obd, LLOG_SIZE_REPL_CTXT);
+ if (ctxt)
+ llog_cleanup(NULL, ctxt);
+ RETURN(0);
+}
+
+static int osc_reconnect(const struct lu_env *env,
+ struct obd_export *exp, struct obd_device *obd,
+ struct obd_uuid *cluuid,
+ struct obd_connect_data *data,
+ void *localdata)
+{
+ struct client_obd *cli = &obd->u.cli;
+
+ if (data != NULL && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) {
+ long lost_grant;
+
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ data->ocd_grant = (cli->cl_avail_grant + cli->cl_dirty) ?:
+ 2 * cli_brw_size(obd);
+ lost_grant = cli->cl_lost_grant;
+ cli->cl_lost_grant = 0;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+
+ CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d"
+ " ocd_grant: %d, lost: %ld.\n", data->ocd_connect_flags,
+ data->ocd_version, data->ocd_grant, lost_grant);
+ }
+
+ RETURN(0);
+}
+
+static int osc_disconnect(struct obd_export *exp)
+{
+ struct obd_device *obd = class_exp2obd(exp);
+ struct llog_ctxt *ctxt;
+ int rc;
+
+ ctxt = llog_get_context(obd, LLOG_SIZE_REPL_CTXT);
+ if (ctxt) {
+ if (obd->u.cli.cl_conn_count == 1) {
+ /* Flush any remaining cancel messages out to the
+ * target */
+ llog_sync(ctxt, exp, 0);
+ }
+ llog_ctxt_put(ctxt);
+ } else {
+ CDEBUG(D_HA, "No LLOG_SIZE_REPL_CTXT found in obd %p\n",
+ obd);
+ }
+
+ rc = client_disconnect_export(exp);
+ /**
+ * Initially we put del_shrink_grant before disconnect_export, but it
+ * causes the following problem if setup (connect) and cleanup
+ * (disconnect) are tangled together.
+ * connect p1 disconnect p2
+ * ptlrpc_connect_import
+ * ............... class_manual_cleanup
+ * osc_disconnect
+ * del_shrink_grant
+ * ptlrpc_connect_interrupt
+ * init_grant_shrink
+ * add this client to shrink list
+ * cleanup_osc
+ * Bang! pinger trigger the shrink.
+ * So the osc should be disconnected from the shrink list, after we
+ * are sure the import has been destroyed. BUG18662
+ */
+ if (obd->u.cli.cl_import == NULL)
+ osc_del_shrink_grant(&obd->u.cli);
+ return rc;
+}
+
+static int osc_import_event(struct obd_device *obd,
+ struct obd_import *imp,
+ enum obd_import_event event)
+{
+ struct client_obd *cli;
+ int rc = 0;
+
+ ENTRY;
+ LASSERT(imp->imp_obd == obd);
+
+ switch (event) {
+ case IMP_EVENT_DISCON: {
+ cli = &obd->u.cli;
+ client_obd_list_lock(&cli->cl_loi_list_lock);
+ cli->cl_avail_grant = 0;
+ cli->cl_lost_grant = 0;
+ client_obd_list_unlock(&cli->cl_loi_list_lock);
+ break;
+ }
+ case IMP_EVENT_INACTIVE: {
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE, NULL);
+ break;
+ }
+ case IMP_EVENT_INVALIDATE: {
+ struct ldlm_namespace *ns = obd->obd_namespace;
+ struct lu_env *env;
+ int refcheck;
+
+ env = cl_env_get(&refcheck);
+ if (!IS_ERR(env)) {
+ /* Reset grants */
+ cli = &obd->u.cli;
+ /* all pages go to failing rpcs due to the invalid
+ * import */
+ osc_io_unplug(env, cli, NULL, PDL_POLICY_ROUND);
+
+ ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
+ cl_env_put(env, &refcheck);
+ } else
+ rc = PTR_ERR(env);
+ break;
+ }
+ case IMP_EVENT_ACTIVE: {
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE, NULL);
+ break;
+ }
+ case IMP_EVENT_OCD: {
+ struct obd_connect_data *ocd = &imp->imp_connect_data;
+
+ if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT)
+ osc_init_grant(&obd->u.cli, ocd);
+
+ /* See bug 7198 */
+ if (ocd->ocd_connect_flags & OBD_CONNECT_REQPORTAL)
+ imp->imp_client->cli_request_portal =OST_REQUEST_PORTAL;
+
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL);
+ break;
+ }
+ case IMP_EVENT_DEACTIVATE: {
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_DEACTIVATE, NULL);
+ break;
+ }
+ case IMP_EVENT_ACTIVATE: {
+ rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVATE, NULL);
+ break;
+ }
+ default:
+ CERROR("Unknown import event %d\n", event);
+ LBUG();
+ }
+ RETURN(rc);
+}
+
+/**
+ * Determine whether the lock can be canceled before replaying the lock
+ * during recovery, see bug16774 for detailed information.
+ *
+ * \retval zero the lock can't be canceled
+ * \retval other ok to cancel
+ */
+static int osc_cancel_for_recovery(struct ldlm_lock *lock)
+{
+ check_res_locked(lock->l_resource);
+
+ /*
+ * Cancel all unused extent lock in granted mode LCK_PR or LCK_CR.
+ *
+ * XXX as a future improvement, we can also cancel unused write lock
+ * if it doesn't have dirty data and active mmaps.
+ */
+ if (lock->l_resource->lr_type == LDLM_EXTENT &&
+ (lock->l_granted_mode == LCK_PR ||
+ lock->l_granted_mode == LCK_CR) &&
+ (osc_dlm_lock_pageref(lock) == 0))
+ RETURN(1);
+
+ RETURN(0);
+}
+
+static int brw_queue_work(const struct lu_env *env, void *data)
+{
+ struct client_obd *cli = data;
+
+ CDEBUG(D_CACHE, "Run writeback work for client obd %p.\n", cli);
+
+ osc_io_unplug(env, cli, NULL, PDL_POLICY_SAME);
+ RETURN(0);
+}
+
+int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ struct lprocfs_static_vars lvars = { 0 };
+ struct client_obd *cli = &obd->u.cli;
+ void *handler;
+ int rc;
+ ENTRY;
+
+ rc = ptlrpcd_addref();
+ if (rc)
+ RETURN(rc);
+
+ rc = client_obd_setup(obd, lcfg);
+ if (rc)
+ GOTO(out_ptlrpcd, rc);
+
+ handler = ptlrpcd_alloc_work(cli->cl_import, brw_queue_work, cli);
+ if (IS_ERR(handler))
+ GOTO(out_client_setup, rc = PTR_ERR(handler));
+ cli->cl_writeback_work = handler;
+
+ rc = osc_quota_setup(obd);
+ if (rc)
+ GOTO(out_ptlrpcd_work, rc);
+
+ cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL;
+ lprocfs_osc_init_vars(&lvars);
+ if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) {
+ lproc_osc_attach_seqstat(obd);
+ sptlrpc_lprocfs_cliobd_attach(obd);
+ ptlrpc_lprocfs_register_obd(obd);
+ }
+
+ /* We need to allocate a few requests more, because
+ * brw_interpret tries to create new requests before freeing
+ * previous ones, Ideally we want to have 2x max_rpcs_in_flight
+ * reserved, but I'm afraid that might be too much wasted RAM
+ * in fact, so 2 is just my guess and still should work. */
+ cli->cl_import->imp_rq_pool =
+ ptlrpc_init_rq_pool(cli->cl_max_rpcs_in_flight + 2,
+ OST_MAXREQSIZE,
+ ptlrpc_add_rqs_to_pool);
+
+ INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
+ ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery);
+ RETURN(rc);
+
+out_ptlrpcd_work:
+ ptlrpcd_destroy_work(handler);
+out_client_setup:
+ client_obd_cleanup(obd);
+out_ptlrpcd:
+ ptlrpcd_decref();
+ RETURN(rc);
+}
+
+static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+{
+ int rc = 0;
+ ENTRY;
+
+ switch (stage) {
+ case OBD_CLEANUP_EARLY: {
+ struct obd_import *imp;
+ imp = obd->u.cli.cl_import;
+ CDEBUG(D_HA, "Deactivating import %s\n", obd->obd_name);
+ /* ptlrpc_abort_inflight to stop an mds_lov_synchronize */
+ ptlrpc_deactivate_import(imp);
+ spin_lock(&imp->imp_lock);
+ imp->imp_pingable = 0;
+ spin_unlock(&imp->imp_lock);
+ break;
+ }
+ case OBD_CLEANUP_EXPORTS: {
+ struct client_obd *cli = &obd->u.cli;
+ /* LU-464
+ * for echo client, export may be on zombie list, wait for
+ * zombie thread to cull it, because cli.cl_import will be
+ * cleared in client_disconnect_export():
+ * class_export_destroy() -> obd_cleanup() ->
+ * echo_device_free() -> echo_client_cleanup() ->
+ * obd_disconnect() -> osc_disconnect() ->
+ * client_disconnect_export()
+ */
+ obd_zombie_barrier();
+ if (cli->cl_writeback_work) {
+ ptlrpcd_destroy_work(cli->cl_writeback_work);
+ cli->cl_writeback_work = NULL;
+ }
+ obd_cleanup_client_import(obd);
+ ptlrpc_lprocfs_unregister_obd(obd);
+ lprocfs_obd_cleanup(obd);
+ rc = obd_llog_finish(obd, 0);
+ if (rc != 0)
+ CERROR("failed to cleanup llogging subsystems\n");
+ break;
+ }
+ }
+ RETURN(rc);
+}
+
+int osc_cleanup(struct obd_device *obd)
+{
+ struct client_obd *cli = &obd->u.cli;
+ int rc;
+
+ ENTRY;
+
+ /* lru cleanup */
+ if (cli->cl_cache != NULL) {
+ LASSERT(atomic_read(&cli->cl_cache->ccc_users) > 0);
+ spin_lock(&cli->cl_cache->ccc_lru_lock);
+ list_del_init(&cli->cl_lru_osc);
+ spin_unlock(&cli->cl_cache->ccc_lru_lock);
+ cli->cl_lru_left = NULL;
+ atomic_dec(&cli->cl_cache->ccc_users);
+ cli->cl_cache = NULL;
+ }
+
+ /* free memory of osc quota cache */
+ osc_quota_cleanup(obd);
+
+ rc = client_obd_cleanup(obd);
+
+ ptlrpcd_decref();
+ RETURN(rc);
+}
+
+int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ struct lprocfs_static_vars lvars = { 0 };
+ int rc = 0;
+
+ lprocfs_osc_init_vars(&lvars);
+
+ switch (lcfg->lcfg_command) {
+ default:
+ rc = class_process_proc_param(PARAM_OSC, lvars.obd_vars,
+ lcfg, obd);
+ if (rc > 0)
+ rc = 0;
+ break;
+ }
+
+ return(rc);
+}
+
+static int osc_process_config(struct obd_device *obd, obd_count len, void *buf)
+{
+ return osc_process_config_base(obd, buf);
+}
+
+struct obd_ops osc_obd_ops = {
+ .o_owner = THIS_MODULE,
+ .o_setup = osc_setup,
+ .o_precleanup = osc_precleanup,
+ .o_cleanup = osc_cleanup,
+ .o_add_conn = client_import_add_conn,
+ .o_del_conn = client_import_del_conn,
+ .o_connect = client_connect_import,
+ .o_reconnect = osc_reconnect,
+ .o_disconnect = osc_disconnect,
+ .o_statfs = osc_statfs,
+ .o_statfs_async = osc_statfs_async,
+ .o_packmd = osc_packmd,
+ .o_unpackmd = osc_unpackmd,
+ .o_create = osc_create,
+ .o_destroy = osc_destroy,
+ .o_getattr = osc_getattr,
+ .o_getattr_async = osc_getattr_async,
+ .o_setattr = osc_setattr,
+ .o_setattr_async = osc_setattr_async,
+ .o_brw = osc_brw,
+ .o_punch = osc_punch,
+ .o_sync = osc_sync,
+ .o_enqueue = osc_enqueue,
+ .o_change_cbdata = osc_change_cbdata,
+ .o_find_cbdata = osc_find_cbdata,
+ .o_cancel = osc_cancel,
+ .o_cancel_unused = osc_cancel_unused,
+ .o_iocontrol = osc_iocontrol,
+ .o_get_info = osc_get_info,
+ .o_set_info_async = osc_set_info_async,
+ .o_import_event = osc_import_event,
+ .o_llog_init = osc_llog_init,
+ .o_llog_finish = osc_llog_finish,
+ .o_process_config = osc_process_config,
+ .o_quotactl = osc_quotactl,
+ .o_quotacheck = osc_quotacheck,
+};
+
+extern struct lu_kmem_descr osc_caches[];
+extern spinlock_t osc_ast_guard;
+extern struct lock_class_key osc_ast_guard_class;
+
+int __init osc_init(void)
+{
+ struct lprocfs_static_vars lvars = { 0 };
+ int rc;
+ ENTRY;
+
+ /* print an address of _any_ initialized kernel symbol from this
+ * module, to allow debugging with gdb that doesn't support data
+ * symbols from modules.*/
+ CDEBUG(D_INFO, "Lustre OSC module (%p).\n", &osc_caches);
+
+ rc = lu_kmem_init(osc_caches);
+
+ lprocfs_osc_init_vars(&lvars);
+
+ rc = class_register_type(&osc_obd_ops, NULL, lvars.module_vars,
+ LUSTRE_OSC_NAME, &osc_device_type);
+ if (rc) {
+ lu_kmem_fini(osc_caches);
+ RETURN(rc);
+ }
+
+ spin_lock_init(&osc_ast_guard);
+ lockdep_set_class(&osc_ast_guard, &osc_ast_guard_class);
+
+ RETURN(rc);
+}
+
+static void /*__exit*/ osc_exit(void)
+{
+ class_unregister_type(LUSTRE_OSC_NAME);
+ lu_kmem_fini(osc_caches);
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Object Storage Client (OSC)");
+MODULE_LICENSE("GPL");
+
+cfs_module(osc, LUSTRE_VERSION_STRING, osc_init, osc_exit);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/Makefile b/drivers/staging/lustre/lustre/ptlrpc/Makefile
new file mode 100644
index 000000000000..983eb66a554d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/Makefile
@@ -0,0 +1,23 @@
+obj-$(CONFIG_LUSTRE_FS) += ptlrpc.o
+LDLM := ../../lustre/ldlm/
+
+ldlm_objs := $(LDLM)l_lock.o $(LDLM)ldlm_lock.o
+ldlm_objs += $(LDLM)ldlm_resource.o $(LDLM)ldlm_lib.o
+ldlm_objs += $(LDLM)ldlm_plain.o $(LDLM)ldlm_extent.o
+ldlm_objs += $(LDLM)ldlm_request.o $(LDLM)ldlm_lockd.o
+ldlm_objs += $(LDLM)ldlm_flock.o $(LDLM)ldlm_inodebits.o
+ldlm_objs += $(LDLM)ldlm_pool.o
+ldlm_objs += $(LDLM)interval_tree.o
+ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o
+ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o
+ptlrpc_objs += llog_net.o llog_client.o llog_server.o import.o ptlrpcd.o
+ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o
+ptlrpc_objs += sec.o sec_bulk.o sec_gc.o sec_config.o sec_lproc.o
+ptlrpc_objs += sec_null.o sec_plain.o nrs.o nrs_fifo.o
+
+ptlrpc-y := $(ldlm_objs) $(ptlrpc_objs)
+
+obj-$(CONFIG_PTLRPC_GSS) += gss/
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
new file mode 100644
index 000000000000..22f7e654c9d8
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c
@@ -0,0 +1,3059 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+/** Implementation of client-side PortalRPC interfaces */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_lib.h>
+#include <lustre_ha.h>
+#include <lustre_import.h>
+#include <lustre_req_layout.h>
+
+#include "ptlrpc_internal.h"
+
+static int ptlrpc_send_new_req(struct ptlrpc_request *req);
+
+/**
+ * Initialize passed in client structure \a cl.
+ */
+void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
+ struct ptlrpc_client *cl)
+{
+ cl->cli_request_portal = req_portal;
+ cl->cli_reply_portal = rep_portal;
+ cl->cli_name = name;
+}
+EXPORT_SYMBOL(ptlrpc_init_client);
+
+/**
+ * Return PortalRPC connection for remore uud \a uuid
+ */
+struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
+{
+ struct ptlrpc_connection *c;
+ lnet_nid_t self;
+ lnet_process_id_t peer;
+ int err;
+
+ /* ptlrpc_uuid_to_peer() initializes its 2nd parameter
+ * before accessing its values. */
+ /* coverity[uninit_use_in_call] */
+ err = ptlrpc_uuid_to_peer(uuid, &peer, &self);
+ if (err != 0) {
+ CNETERR("cannot find peer %s!\n", uuid->uuid);
+ return NULL;
+ }
+
+ c = ptlrpc_connection_get(peer, self, uuid);
+ if (c) {
+ memcpy(c->c_remote_uuid.uuid,
+ uuid->uuid, sizeof(c->c_remote_uuid.uuid));
+ }
+
+ CDEBUG(D_INFO, "%s -> %p\n", uuid->uuid, c);
+
+ return c;
+}
+EXPORT_SYMBOL(ptlrpc_uuid_to_connection);
+
+/**
+ * Allocate and initialize new bulk descriptor on the sender.
+ * Returns pointer to the descriptor or NULL on error.
+ */
+struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
+ unsigned type, unsigned portal)
+{
+ struct ptlrpc_bulk_desc *desc;
+ int i;
+
+ OBD_ALLOC(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[npages]));
+ if (!desc)
+ return NULL;
+
+ spin_lock_init(&desc->bd_lock);
+ init_waitqueue_head(&desc->bd_waitq);
+ desc->bd_max_iov = npages;
+ desc->bd_iov_count = 0;
+ desc->bd_portal = portal;
+ desc->bd_type = type;
+ desc->bd_md_count = 0;
+ LASSERT(max_brw > 0);
+ desc->bd_md_max_brw = min(max_brw, PTLRPC_BULK_OPS_COUNT);
+ /* PTLRPC_BULK_OPS_COUNT is the compile-time transfer limit for this
+ * node. Negotiated ocd_brw_size will always be <= this number. */
+ for (i = 0; i < PTLRPC_BULK_OPS_COUNT; i++)
+ LNetInvalidateHandle(&desc->bd_mds[i]);
+
+ return desc;
+}
+
+/**
+ * Prepare bulk descriptor for specified outgoing request \a req that
+ * can fit \a npages * pages. \a type is bulk type. \a portal is where
+ * the bulk to be sent. Used on client-side.
+ * Returns pointer to newly allocatrd initialized bulk descriptor or NULL on
+ * error.
+ */
+struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
+ unsigned npages, unsigned max_brw,
+ unsigned type, unsigned portal)
+{
+ struct obd_import *imp = req->rq_import;
+ struct ptlrpc_bulk_desc *desc;
+
+ ENTRY;
+ LASSERT(type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
+ desc = ptlrpc_new_bulk(npages, max_brw, type, portal);
+ if (desc == NULL)
+ RETURN(NULL);
+
+ desc->bd_import_generation = req->rq_import_generation;
+ desc->bd_import = class_import_get(imp);
+ desc->bd_req = req;
+
+ desc->bd_cbid.cbid_fn = client_bulk_callback;
+ desc->bd_cbid.cbid_arg = desc;
+
+ /* This makes req own desc, and free it when she frees herself */
+ req->rq_bulk = desc;
+
+ return desc;
+}
+EXPORT_SYMBOL(ptlrpc_prep_bulk_imp);
+
+/**
+ * Add a page \a page to the bulk descriptor \a desc.
+ * Data to transfer in the page starts at offset \a pageoffset and
+ * amount of data to transfer from the page is \a len
+ */
+void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
+ struct page *page, int pageoffset, int len, int pin)
+{
+ LASSERT(desc->bd_iov_count < desc->bd_max_iov);
+ LASSERT(page != NULL);
+ LASSERT(pageoffset >= 0);
+ LASSERT(len > 0);
+ LASSERT(pageoffset + len <= PAGE_CACHE_SIZE);
+
+ desc->bd_nob += len;
+
+ if (pin)
+ page_cache_get(page);
+
+ ptlrpc_add_bulk_page(desc, page, pageoffset, len);
+}
+EXPORT_SYMBOL(__ptlrpc_prep_bulk_page);
+
+/**
+ * Uninitialize and free bulk descriptor \a desc.
+ * Works on bulk descriptors both from server and client side.
+ */
+void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc, int unpin)
+{
+ int i;
+ ENTRY;
+
+ LASSERT(desc != NULL);
+ LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */
+ LASSERT(desc->bd_md_count == 0); /* network hands off */
+ LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
+
+ sptlrpc_enc_pool_put_pages(desc);
+
+ if (desc->bd_export)
+ class_export_put(desc->bd_export);
+ else
+ class_import_put(desc->bd_import);
+
+ if (unpin) {
+ for (i = 0; i < desc->bd_iov_count ; i++)
+ page_cache_release(desc->bd_iov[i].kiov_page);
+ }
+
+ OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc,
+ bd_iov[desc->bd_max_iov]));
+ EXIT;
+}
+EXPORT_SYMBOL(__ptlrpc_free_bulk);
+
+/**
+ * Set server timelimit for this req, i.e. how long are we willing to wait
+ * for reply before timing out this request.
+ */
+void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
+{
+ __u32 serv_est;
+ int idx;
+ struct imp_at *at;
+
+ LASSERT(req->rq_import);
+
+ if (AT_OFF) {
+ /* non-AT settings */
+ /**
+ * \a imp_server_timeout means this is reverse import and
+ * we send (currently only) ASTs to the client and cannot afford
+ * to wait too long for the reply, otherwise the other client
+ * (because of which we are sending this request) would
+ * timeout waiting for us
+ */
+ req->rq_timeout = req->rq_import->imp_server_timeout ?
+ obd_timeout / 2 : obd_timeout;
+ } else {
+ at = &req->rq_import->imp_at;
+ idx = import_at_get_index(req->rq_import,
+ req->rq_request_portal);
+ serv_est = at_get(&at->iat_service_estimate[idx]);
+ req->rq_timeout = at_est2timeout(serv_est);
+ }
+ /* We could get even fancier here, using history to predict increased
+ loading... */
+
+ /* Let the server know what this RPC timeout is by putting it in the
+ reqmsg*/
+ lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout);
+}
+EXPORT_SYMBOL(ptlrpc_at_set_req_timeout);
+
+/* Adjust max service estimate based on server value */
+static void ptlrpc_at_adj_service(struct ptlrpc_request *req,
+ unsigned int serv_est)
+{
+ int idx;
+ unsigned int oldse;
+ struct imp_at *at;
+
+ LASSERT(req->rq_import);
+ at = &req->rq_import->imp_at;
+
+ idx = import_at_get_index(req->rq_import, req->rq_request_portal);
+ /* max service estimates are tracked on the server side,
+ so just keep minimal history here */
+ oldse = at_measured(&at->iat_service_estimate[idx], serv_est);
+ if (oldse != 0)
+ CDEBUG(D_ADAPTTO, "The RPC service estimate for %s ptl %d "
+ "has changed from %d to %d\n",
+ req->rq_import->imp_obd->obd_name,req->rq_request_portal,
+ oldse, at_get(&at->iat_service_estimate[idx]));
+}
+
+/* Expected network latency per remote node (secs) */
+int ptlrpc_at_get_net_latency(struct ptlrpc_request *req)
+{
+ return AT_OFF ? 0 : at_get(&req->rq_import->imp_at.iat_net_latency);
+}
+
+/* Adjust expected network latency */
+static void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
+ unsigned int service_time)
+{
+ unsigned int nl, oldnl;
+ struct imp_at *at;
+ time_t now = cfs_time_current_sec();
+
+ LASSERT(req->rq_import);
+ at = &req->rq_import->imp_at;
+
+ /* Network latency is total time less server processing time */
+ nl = max_t(int, now - req->rq_sent - service_time, 0) +1/*st rounding*/;
+ if (service_time > now - req->rq_sent + 3 /* bz16408 */)
+ CWARN("Reported service time %u > total measured time "
+ CFS_DURATION_T"\n", service_time,
+ cfs_time_sub(now, req->rq_sent));
+
+ oldnl = at_measured(&at->iat_net_latency, nl);
+ if (oldnl != 0)
+ CDEBUG(D_ADAPTTO, "The network latency for %s (nid %s) "
+ "has changed from %d to %d\n",
+ req->rq_import->imp_obd->obd_name,
+ obd_uuid2str(
+ &req->rq_import->imp_connection->c_remote_uuid),
+ oldnl, at_get(&at->iat_net_latency));
+}
+
+static int unpack_reply(struct ptlrpc_request *req)
+{
+ int rc;
+
+ if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) {
+ rc = ptlrpc_unpack_rep_msg(req, req->rq_replen);
+ if (rc) {
+ DEBUG_REQ(D_ERROR, req, "unpack_rep failed: %d", rc);
+ return(-EPROTO);
+ }
+ }
+
+ rc = lustre_unpack_rep_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF);
+ if (rc) {
+ DEBUG_REQ(D_ERROR, req, "unpack ptlrpc body failed: %d", rc);
+ return(-EPROTO);
+ }
+ return 0;
+}
+
+/**
+ * Handle an early reply message, called with the rq_lock held.
+ * If anything goes wrong just ignore it - same as if it never happened
+ */
+static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req)
+{
+ struct ptlrpc_request *early_req;
+ time_t olddl;
+ int rc;
+ ENTRY;
+
+ req->rq_early = 0;
+ spin_unlock(&req->rq_lock);
+
+ rc = sptlrpc_cli_unwrap_early_reply(req, &early_req);
+ if (rc) {
+ spin_lock(&req->rq_lock);
+ RETURN(rc);
+ }
+
+ rc = unpack_reply(early_req);
+ if (rc == 0) {
+ /* Expecting to increase the service time estimate here */
+ ptlrpc_at_adj_service(req,
+ lustre_msg_get_timeout(early_req->rq_repmsg));
+ ptlrpc_at_adj_net_latency(req,
+ lustre_msg_get_service_time(early_req->rq_repmsg));
+ }
+
+ sptlrpc_cli_finish_early_reply(early_req);
+
+ if (rc != 0) {
+ spin_lock(&req->rq_lock);
+ RETURN(rc);
+ }
+
+ /* Adjust the local timeout for this req */
+ ptlrpc_at_set_req_timeout(req);
+
+ spin_lock(&req->rq_lock);
+ olddl = req->rq_deadline;
+ /* server assumes it now has rq_timeout from when it sent the
+ * early reply, so client should give it at least that long. */
+ req->rq_deadline = cfs_time_current_sec() + req->rq_timeout +
+ ptlrpc_at_get_net_latency(req);
+
+ DEBUG_REQ(D_ADAPTTO, req,
+ "Early reply #%d, new deadline in "CFS_DURATION_T"s "
+ "("CFS_DURATION_T"s)", req->rq_early_count,
+ cfs_time_sub(req->rq_deadline, cfs_time_current_sec()),
+ cfs_time_sub(req->rq_deadline, olddl));
+
+ RETURN(rc);
+}
+
+/**
+ * Wind down request pool \a pool.
+ * Frees all requests from the pool too
+ */
+void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool)
+{
+ struct list_head *l, *tmp;
+ struct ptlrpc_request *req;
+
+ LASSERT(pool != NULL);
+
+ spin_lock(&pool->prp_lock);
+ list_for_each_safe(l, tmp, &pool->prp_req_list) {
+ req = list_entry(l, struct ptlrpc_request, rq_list);
+ list_del(&req->rq_list);
+ LASSERT(req->rq_reqbuf);
+ LASSERT(req->rq_reqbuf_len == pool->prp_rq_size);
+ OBD_FREE_LARGE(req->rq_reqbuf, pool->prp_rq_size);
+ OBD_FREE(req, sizeof(*req));
+ }
+ spin_unlock(&pool->prp_lock);
+ OBD_FREE(pool, sizeof(*pool));
+}
+EXPORT_SYMBOL(ptlrpc_free_rq_pool);
+
+/**
+ * Allocates, initializes and adds \a num_rq requests to the pool \a pool
+ */
+void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
+{
+ int i;
+ int size = 1;
+
+ while (size < pool->prp_rq_size)
+ size <<= 1;
+
+ LASSERTF(list_empty(&pool->prp_req_list) ||
+ size == pool->prp_rq_size,
+ "Trying to change pool size with nonempty pool "
+ "from %d to %d bytes\n", pool->prp_rq_size, size);
+
+ spin_lock(&pool->prp_lock);
+ pool->prp_rq_size = size;
+ for (i = 0; i < num_rq; i++) {
+ struct ptlrpc_request *req;
+ struct lustre_msg *msg;
+
+ spin_unlock(&pool->prp_lock);
+ OBD_ALLOC(req, sizeof(struct ptlrpc_request));
+ if (!req)
+ return;
+ OBD_ALLOC_LARGE(msg, size);
+ if (!msg) {
+ OBD_FREE(req, sizeof(struct ptlrpc_request));
+ return;
+ }
+ req->rq_reqbuf = msg;
+ req->rq_reqbuf_len = size;
+ req->rq_pool = pool;
+ spin_lock(&pool->prp_lock);
+ list_add_tail(&req->rq_list, &pool->prp_req_list);
+ }
+ spin_unlock(&pool->prp_lock);
+ return;
+}
+EXPORT_SYMBOL(ptlrpc_add_rqs_to_pool);
+
+/**
+ * Create and initialize new request pool with given attributes:
+ * \a num_rq - initial number of requests to create for the pool
+ * \a msgsize - maximum message size possible for requests in thid pool
+ * \a populate_pool - function to be called when more requests need to be added
+ * to the pool
+ * Returns pointer to newly created pool or NULL on error.
+ */
+struct ptlrpc_request_pool *
+ptlrpc_init_rq_pool(int num_rq, int msgsize,
+ void (*populate_pool)(struct ptlrpc_request_pool *, int))
+{
+ struct ptlrpc_request_pool *pool;
+
+ OBD_ALLOC(pool, sizeof (struct ptlrpc_request_pool));
+ if (!pool)
+ return NULL;
+
+ /* Request next power of two for the allocation, because internally
+ kernel would do exactly this */
+
+ spin_lock_init(&pool->prp_lock);
+ INIT_LIST_HEAD(&pool->prp_req_list);
+ pool->prp_rq_size = msgsize + SPTLRPC_MAX_PAYLOAD;
+ pool->prp_populate = populate_pool;
+
+ populate_pool(pool, num_rq);
+
+ if (list_empty(&pool->prp_req_list)) {
+ /* have not allocated a single request for the pool */
+ OBD_FREE(pool, sizeof (struct ptlrpc_request_pool));
+ pool = NULL;
+ }
+ return pool;
+}
+EXPORT_SYMBOL(ptlrpc_init_rq_pool);
+
+/**
+ * Fetches one request from pool \a pool
+ */
+static struct ptlrpc_request *
+ptlrpc_prep_req_from_pool(struct ptlrpc_request_pool *pool)
+{
+ struct ptlrpc_request *request;
+ struct lustre_msg *reqbuf;
+
+ if (!pool)
+ return NULL;
+
+ spin_lock(&pool->prp_lock);
+
+ /* See if we have anything in a pool, and bail out if nothing,
+ * in writeout path, where this matters, this is safe to do, because
+ * nothing is lost in this case, and when some in-flight requests
+ * complete, this code will be called again. */
+ if (unlikely(list_empty(&pool->prp_req_list))) {
+ spin_unlock(&pool->prp_lock);
+ return NULL;
+ }
+
+ request = list_entry(pool->prp_req_list.next, struct ptlrpc_request,
+ rq_list);
+ list_del_init(&request->rq_list);
+ spin_unlock(&pool->prp_lock);
+
+ LASSERT(request->rq_reqbuf);
+ LASSERT(request->rq_pool);
+
+ reqbuf = request->rq_reqbuf;
+ memset(request, 0, sizeof(*request));
+ request->rq_reqbuf = reqbuf;
+ request->rq_reqbuf_len = pool->prp_rq_size;
+ request->rq_pool = pool;
+
+ return request;
+}
+
+/**
+ * Returns freed \a request to pool.
+ */
+static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request)
+{
+ struct ptlrpc_request_pool *pool = request->rq_pool;
+
+ spin_lock(&pool->prp_lock);
+ LASSERT(list_empty(&request->rq_list));
+ LASSERT(!request->rq_receiving_reply);
+ list_add_tail(&request->rq_list, &pool->prp_req_list);
+ spin_unlock(&pool->prp_lock);
+}
+
+static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
+ __u32 version, int opcode,
+ int count, __u32 *lengths, char **bufs,
+ struct ptlrpc_cli_ctx *ctx)
+{
+ struct obd_import *imp = request->rq_import;
+ int rc;
+ ENTRY;
+
+ if (unlikely(ctx))
+ request->rq_cli_ctx = sptlrpc_cli_ctx_get(ctx);
+ else {
+ rc = sptlrpc_req_get_ctx(request);
+ if (rc)
+ GOTO(out_free, rc);
+ }
+
+ sptlrpc_req_set_flavor(request, opcode);
+
+ rc = lustre_pack_request(request, imp->imp_msg_magic, count,
+ lengths, bufs);
+ if (rc) {
+ LASSERT(!request->rq_pool);
+ GOTO(out_ctx, rc);
+ }
+
+ lustre_msg_add_version(request->rq_reqmsg, version);
+ request->rq_send_state = LUSTRE_IMP_FULL;
+ request->rq_type = PTL_RPC_MSG_REQUEST;
+ request->rq_export = NULL;
+
+ request->rq_req_cbid.cbid_fn = request_out_callback;
+ request->rq_req_cbid.cbid_arg = request;
+
+ request->rq_reply_cbid.cbid_fn = reply_in_callback;
+ request->rq_reply_cbid.cbid_arg = request;
+
+ request->rq_reply_deadline = 0;
+ request->rq_phase = RQ_PHASE_NEW;
+ request->rq_next_phase = RQ_PHASE_UNDEFINED;
+
+ request->rq_request_portal = imp->imp_client->cli_request_portal;
+ request->rq_reply_portal = imp->imp_client->cli_reply_portal;
+
+ ptlrpc_at_set_req_timeout(request);
+
+ spin_lock_init(&request->rq_lock);
+ INIT_LIST_HEAD(&request->rq_list);
+ INIT_LIST_HEAD(&request->rq_timed_list);
+ INIT_LIST_HEAD(&request->rq_replay_list);
+ INIT_LIST_HEAD(&request->rq_ctx_chain);
+ INIT_LIST_HEAD(&request->rq_set_chain);
+ INIT_LIST_HEAD(&request->rq_history_list);
+ INIT_LIST_HEAD(&request->rq_exp_list);
+ init_waitqueue_head(&request->rq_reply_waitq);
+ init_waitqueue_head(&request->rq_set_waitq);
+ request->rq_xid = ptlrpc_next_xid();
+ atomic_set(&request->rq_refcount, 1);
+
+ lustre_msg_set_opc(request->rq_reqmsg, opcode);
+
+ RETURN(0);
+out_ctx:
+ sptlrpc_cli_ctx_put(request->rq_cli_ctx, 1);
+out_free:
+ class_import_put(imp);
+ return rc;
+}
+
+int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
+ __u32 version, int opcode, char **bufs,
+ struct ptlrpc_cli_ctx *ctx)
+{
+ int count;
+
+ count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT);
+ return __ptlrpc_request_bufs_pack(request, version, opcode, count,
+ request->rq_pill.rc_area[RCL_CLIENT],
+ bufs, ctx);
+}
+EXPORT_SYMBOL(ptlrpc_request_bufs_pack);
+
+/**
+ * Pack request buffers for network transfer, performing necessary encryption
+ * steps if necessary.
+ */
+int ptlrpc_request_pack(struct ptlrpc_request *request,
+ __u32 version, int opcode)
+{
+ int rc;
+ rc = ptlrpc_request_bufs_pack(request, version, opcode, NULL, NULL);
+ if (rc)
+ return rc;
+
+ /* For some old 1.8 clients (< 1.8.7), they will LASSERT the size of
+ * ptlrpc_body sent from server equal to local ptlrpc_body size, so we
+ * have to send old ptlrpc_body to keep interoprability with these
+ * clients.
+ *
+ * Only three kinds of server->client RPCs so far:
+ * - LDLM_BL_CALLBACK
+ * - LDLM_CP_CALLBACK
+ * - LDLM_GL_CALLBACK
+ *
+ * XXX This should be removed whenever we drop the interoprability with
+ * the these old clients.
+ */
+ if (opcode == LDLM_BL_CALLBACK || opcode == LDLM_CP_CALLBACK ||
+ opcode == LDLM_GL_CALLBACK)
+ req_capsule_shrink(&request->rq_pill, &RMF_PTLRPC_BODY,
+ sizeof(struct ptlrpc_body_v2), RCL_CLIENT);
+
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_request_pack);
+
+/**
+ * Helper function to allocate new request on import \a imp
+ * and possibly using existing request from pool \a pool if provided.
+ * Returns allocated request structure with import field filled or
+ * NULL on error.
+ */
+static inline
+struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp,
+ struct ptlrpc_request_pool *pool)
+{
+ struct ptlrpc_request *request = NULL;
+
+ if (pool)
+ request = ptlrpc_prep_req_from_pool(pool);
+
+ if (!request)
+ OBD_ALLOC_PTR(request);
+
+ if (request) {
+ LASSERTF((unsigned long)imp > 0x1000, "%p", imp);
+ LASSERT(imp != LP_POISON);
+ LASSERTF((unsigned long)imp->imp_client > 0x1000, "%p",
+ imp->imp_client);
+ LASSERT(imp->imp_client != LP_POISON);
+
+ request->rq_import = class_import_get(imp);
+ } else {
+ CERROR("request allocation out of memory\n");
+ }
+
+ return request;
+}
+
+/**
+ * Helper function for creating a request.
+ * Calls __ptlrpc_request_alloc to allocate new request sturcture and inits
+ * buffer structures according to capsule template \a format.
+ * Returns allocated request structure pointer or NULL on error.
+ */
+static struct ptlrpc_request *
+ptlrpc_request_alloc_internal(struct obd_import *imp,
+ struct ptlrpc_request_pool * pool,
+ const struct req_format *format)
+{
+ struct ptlrpc_request *request;
+
+ request = __ptlrpc_request_alloc(imp, pool);
+ if (request == NULL)
+ return NULL;
+
+ req_capsule_init(&request->rq_pill, request, RCL_CLIENT);
+ req_capsule_set(&request->rq_pill, format);
+ return request;
+}
+
+/**
+ * Allocate new request structure for import \a imp and initialize its
+ * buffer structure according to capsule template \a format.
+ */
+struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp,
+ const struct req_format *format)
+{
+ return ptlrpc_request_alloc_internal(imp, NULL, format);
+}
+EXPORT_SYMBOL(ptlrpc_request_alloc);
+
+/**
+ * Allocate new request structure for import \a imp from pool \a pool and
+ * initialize its buffer structure according to capsule template \a format.
+ */
+struct ptlrpc_request *ptlrpc_request_alloc_pool(struct obd_import *imp,
+ struct ptlrpc_request_pool * pool,
+ const struct req_format *format)
+{
+ return ptlrpc_request_alloc_internal(imp, pool, format);
+}
+EXPORT_SYMBOL(ptlrpc_request_alloc_pool);
+
+/**
+ * For requests not from pool, free memory of the request structure.
+ * For requests obtained from a pool earlier, return request back to pool.
+ */
+void ptlrpc_request_free(struct ptlrpc_request *request)
+{
+ if (request->rq_pool)
+ __ptlrpc_free_req_to_pool(request);
+ else
+ OBD_FREE_PTR(request);
+}
+EXPORT_SYMBOL(ptlrpc_request_free);
+
+/**
+ * Allocate new request for operatione \a opcode and immediatelly pack it for
+ * network transfer.
+ * Only used for simple requests like OBD_PING where the only important
+ * part of the request is operation itself.
+ * Returns allocated request or NULL on error.
+ */
+struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *imp,
+ const struct req_format *format,
+ __u32 version, int opcode)
+{
+ struct ptlrpc_request *req = ptlrpc_request_alloc(imp, format);
+ int rc;
+
+ if (req) {
+ rc = ptlrpc_request_pack(req, version, opcode);
+ if (rc) {
+ ptlrpc_request_free(req);
+ req = NULL;
+ }
+ }
+ return req;
+}
+EXPORT_SYMBOL(ptlrpc_request_alloc_pack);
+
+/**
+ * Prepare request (fetched from pool \a poolif not NULL) on import \a imp
+ * for operation \a opcode. Request would contain \a count buffers.
+ * Sizes of buffers are described in array \a lengths and buffers themselves
+ * are provided by a pointer \a bufs.
+ * Returns prepared request structure pointer or NULL on error.
+ */
+struct ptlrpc_request *
+ptlrpc_prep_req_pool(struct obd_import *imp,
+ __u32 version, int opcode,
+ int count, __u32 *lengths, char **bufs,
+ struct ptlrpc_request_pool *pool)
+{
+ struct ptlrpc_request *request;
+ int rc;
+
+ request = __ptlrpc_request_alloc(imp, pool);
+ if (!request)
+ return NULL;
+
+ rc = __ptlrpc_request_bufs_pack(request, version, opcode, count,
+ lengths, bufs, NULL);
+ if (rc) {
+ ptlrpc_request_free(request);
+ request = NULL;
+ }
+ return request;
+}
+EXPORT_SYMBOL(ptlrpc_prep_req_pool);
+
+/**
+ * Same as ptlrpc_prep_req_pool, but without pool
+ */
+struct ptlrpc_request *
+ptlrpc_prep_req(struct obd_import *imp, __u32 version, int opcode, int count,
+ __u32 *lengths, char **bufs)
+{
+ return ptlrpc_prep_req_pool(imp, version, opcode, count, lengths, bufs,
+ NULL);
+}
+EXPORT_SYMBOL(ptlrpc_prep_req);
+
+/**
+ * Allocate and initialize new request set structure.
+ * Returns a pointer to the newly allocated set structure or NULL on error.
+ */
+struct ptlrpc_request_set *ptlrpc_prep_set(void)
+{
+ struct ptlrpc_request_set *set;
+
+ ENTRY;
+ OBD_ALLOC(set, sizeof *set);
+ if (!set)
+ RETURN(NULL);
+ atomic_set(&set->set_refcount, 1);
+ INIT_LIST_HEAD(&set->set_requests);
+ init_waitqueue_head(&set->set_waitq);
+ atomic_set(&set->set_new_count, 0);
+ atomic_set(&set->set_remaining, 0);
+ spin_lock_init(&set->set_new_req_lock);
+ INIT_LIST_HEAD(&set->set_new_requests);
+ INIT_LIST_HEAD(&set->set_cblist);
+ set->set_max_inflight = UINT_MAX;
+ set->set_producer = NULL;
+ set->set_producer_arg = NULL;
+ set->set_rc = 0;
+
+ RETURN(set);
+}
+EXPORT_SYMBOL(ptlrpc_prep_set);
+
+/**
+ * Allocate and initialize new request set structure with flow control
+ * extension. This extension allows to control the number of requests in-flight
+ * for the whole set. A callback function to generate requests must be provided
+ * and the request set will keep the number of requests sent over the wire to
+ * @max_inflight.
+ * Returns a pointer to the newly allocated set structure or NULL on error.
+ */
+struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func,
+ void *arg)
+
+{
+ struct ptlrpc_request_set *set;
+
+ set = ptlrpc_prep_set();
+ if (!set)
+ RETURN(NULL);
+
+ set->set_max_inflight = max;
+ set->set_producer = func;
+ set->set_producer_arg = arg;
+
+ RETURN(set);
+}
+EXPORT_SYMBOL(ptlrpc_prep_fcset);
+
+/**
+ * Wind down and free request set structure previously allocated with
+ * ptlrpc_prep_set.
+ * Ensures that all requests on the set have completed and removes
+ * all requests from the request list in a set.
+ * If any unsent request happen to be on the list, pretends that they got
+ * an error in flight and calls their completion handler.
+ */
+void ptlrpc_set_destroy(struct ptlrpc_request_set *set)
+{
+ struct list_head *tmp;
+ struct list_head *next;
+ int expected_phase;
+ int n = 0;
+ ENTRY;
+
+ /* Requests on the set should either all be completed, or all be new */
+ expected_phase = (atomic_read(&set->set_remaining) == 0) ?
+ RQ_PHASE_COMPLETE : RQ_PHASE_NEW;
+ list_for_each (tmp, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+
+ LASSERT(req->rq_phase == expected_phase);
+ n++;
+ }
+
+ LASSERTF(atomic_read(&set->set_remaining) == 0 ||
+ atomic_read(&set->set_remaining) == n, "%d / %d\n",
+ atomic_read(&set->set_remaining), n);
+
+ list_for_each_safe(tmp, next, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+ list_del_init(&req->rq_set_chain);
+
+ LASSERT(req->rq_phase == expected_phase);
+
+ if (req->rq_phase == RQ_PHASE_NEW) {
+ ptlrpc_req_interpret(NULL, req, -EBADR);
+ atomic_dec(&set->set_remaining);
+ }
+
+ spin_lock(&req->rq_lock);
+ req->rq_set = NULL;
+ req->rq_invalid_rqset = 0;
+ spin_unlock(&req->rq_lock);
+
+ ptlrpc_req_finished (req);
+ }
+
+ LASSERT(atomic_read(&set->set_remaining) == 0);
+
+ ptlrpc_reqset_put(set);
+ EXIT;
+}
+EXPORT_SYMBOL(ptlrpc_set_destroy);
+
+/**
+ * Add a callback function \a fn to the set.
+ * This function would be called when all requests on this set are completed.
+ * The function will be passed \a data argument.
+ */
+int ptlrpc_set_add_cb(struct ptlrpc_request_set *set,
+ set_interpreter_func fn, void *data)
+{
+ struct ptlrpc_set_cbdata *cbdata;
+
+ OBD_ALLOC_PTR(cbdata);
+ if (cbdata == NULL)
+ RETURN(-ENOMEM);
+
+ cbdata->psc_interpret = fn;
+ cbdata->psc_data = data;
+ list_add_tail(&cbdata->psc_item, &set->set_cblist);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(ptlrpc_set_add_cb);
+
+/**
+ * Add a new request to the general purpose request set.
+ * Assumes request reference from the caller.
+ */
+void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
+ struct ptlrpc_request *req)
+{
+ LASSERT(list_empty(&req->rq_set_chain));
+
+ /* The set takes over the caller's request reference */
+ list_add_tail(&req->rq_set_chain, &set->set_requests);
+ req->rq_set = set;
+ atomic_inc(&set->set_remaining);
+ req->rq_queued_time = cfs_time_current();
+
+ if (req->rq_reqmsg != NULL)
+ lustre_msg_set_jobid(req->rq_reqmsg, NULL);
+
+ if (set->set_producer != NULL)
+ /* If the request set has a producer callback, the RPC must be
+ * sent straight away */
+ ptlrpc_send_new_req(req);
+}
+EXPORT_SYMBOL(ptlrpc_set_add_req);
+
+/**
+ * Add a request to a request with dedicated server thread
+ * and wake the thread to make any necessary processing.
+ * Currently only used for ptlrpcd.
+ */
+void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
+ struct ptlrpc_request *req)
+{
+ struct ptlrpc_request_set *set = pc->pc_set;
+ int count, i;
+
+ LASSERT(req->rq_set == NULL);
+ LASSERT(test_bit(LIOD_STOP, &pc->pc_flags) == 0);
+
+ spin_lock(&set->set_new_req_lock);
+ /*
+ * The set takes over the caller's request reference.
+ */
+ req->rq_set = set;
+ req->rq_queued_time = cfs_time_current();
+ list_add_tail(&req->rq_set_chain, &set->set_new_requests);
+ count = atomic_inc_return(&set->set_new_count);
+ spin_unlock(&set->set_new_req_lock);
+
+ /* Only need to call wakeup once for the first entry. */
+ if (count == 1) {
+ wake_up(&set->set_waitq);
+
+ /* XXX: It maybe unnecessary to wakeup all the partners. But to
+ * guarantee the async RPC can be processed ASAP, we have
+ * no other better choice. It maybe fixed in future. */
+ for (i = 0; i < pc->pc_npartners; i++)
+ wake_up(&pc->pc_partners[i]->pc_set->set_waitq);
+ }
+}
+EXPORT_SYMBOL(ptlrpc_set_add_new_req);
+
+/**
+ * Based on the current state of the import, determine if the request
+ * can be sent, is an error, or should be delayed.
+ *
+ * Returns true if this request should be delayed. If false, and
+ * *status is set, then the request can not be sent and *status is the
+ * error code. If false and status is 0, then request can be sent.
+ *
+ * The imp->imp_lock must be held.
+ */
+static int ptlrpc_import_delay_req(struct obd_import *imp,
+ struct ptlrpc_request *req, int *status)
+{
+ int delay = 0;
+ ENTRY;
+
+ LASSERT (status != NULL);
+ *status = 0;
+
+ if (req->rq_ctx_init || req->rq_ctx_fini) {
+ /* always allow ctx init/fini rpc go through */
+ } else if (imp->imp_state == LUSTRE_IMP_NEW) {
+ DEBUG_REQ(D_ERROR, req, "Uninitialized import.");
+ *status = -EIO;
+ } else if (imp->imp_state == LUSTRE_IMP_CLOSED) {
+ /* pings may safely race with umount */
+ DEBUG_REQ(lustre_msg_get_opc(req->rq_reqmsg) == OBD_PING ?
+ D_HA : D_ERROR, req, "IMP_CLOSED ");
+ *status = -EIO;
+ } else if (ptlrpc_send_limit_expired(req)) {
+ /* probably doesn't need to be a D_ERROR after initial testing */
+ DEBUG_REQ(D_ERROR, req, "send limit expired ");
+ *status = -EIO;
+ } else if (req->rq_send_state == LUSTRE_IMP_CONNECTING &&
+ imp->imp_state == LUSTRE_IMP_CONNECTING) {
+ /* allow CONNECT even if import is invalid */ ;
+ if (atomic_read(&imp->imp_inval_count) != 0) {
+ DEBUG_REQ(D_ERROR, req, "invalidate in flight");
+ *status = -EIO;
+ }
+ } else if (imp->imp_invalid || imp->imp_obd->obd_no_recov) {
+ if (!imp->imp_deactive)
+ DEBUG_REQ(D_NET, req, "IMP_INVALID");
+ *status = -ESHUTDOWN; /* bz 12940 */
+ } else if (req->rq_import_generation != imp->imp_generation) {
+ DEBUG_REQ(D_ERROR, req, "req wrong generation:");
+ *status = -EIO;
+ } else if (req->rq_send_state != imp->imp_state) {
+ /* invalidate in progress - any requests should be drop */
+ if (atomic_read(&imp->imp_inval_count) != 0) {
+ DEBUG_REQ(D_ERROR, req, "invalidate in flight");
+ *status = -EIO;
+ } else if (imp->imp_dlm_fake || req->rq_no_delay) {
+ *status = -EWOULDBLOCK;
+ } else if (req->rq_allow_replay &&
+ (imp->imp_state == LUSTRE_IMP_REPLAY ||
+ imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS ||
+ imp->imp_state == LUSTRE_IMP_REPLAY_WAIT ||
+ imp->imp_state == LUSTRE_IMP_RECOVER)) {
+ DEBUG_REQ(D_HA, req, "allow during recovery.\n");
+ } else {
+ delay = 1;
+ }
+ }
+
+ RETURN(delay);
+}
+
+/**
+ * Decide if the eror message regarding provided request \a req
+ * should be printed to the console or not.
+ * Makes it's decision on request status and other properties.
+ * Returns 1 to print error on the system console or 0 if not.
+ */
+static int ptlrpc_console_allow(struct ptlrpc_request *req)
+{
+ __u32 opc;
+ int err;
+
+ LASSERT(req->rq_reqmsg != NULL);
+ opc = lustre_msg_get_opc(req->rq_reqmsg);
+
+ /* Suppress particular reconnect errors which are to be expected. No
+ * errors are suppressed for the initial connection on an import */
+ if ((lustre_handle_is_used(&req->rq_import->imp_remote_handle)) &&
+ (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT)) {
+
+ /* Suppress timed out reconnect requests */
+ if (req->rq_timedout)
+ return 0;
+
+ /* Suppress unavailable/again reconnect requests */
+ err = lustre_msg_get_status(req->rq_repmsg);
+ if (err == -ENODEV || err == -EAGAIN)
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * Check request processing status.
+ * Returns the status.
+ */
+static int ptlrpc_check_status(struct ptlrpc_request *req)
+{
+ int err;
+ ENTRY;
+
+ err = lustre_msg_get_status(req->rq_repmsg);
+ if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR) {
+ struct obd_import *imp = req->rq_import;
+ __u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
+ if (ptlrpc_console_allow(req))
+ LCONSOLE_ERROR_MSG(0x011, "%s: Communicating with %s,"
+ " operation %s failed with %d.\n",
+ imp->imp_obd->obd_name,
+ libcfs_nid2str(
+ imp->imp_connection->c_peer.nid),
+ ll_opcode2str(opc), err);
+ RETURN(err < 0 ? err : -EINVAL);
+ }
+
+ if (err < 0) {
+ DEBUG_REQ(D_INFO, req, "status is %d", err);
+ } else if (err > 0) {
+ /* XXX: translate this error from net to host */
+ DEBUG_REQ(D_INFO, req, "status is %d", err);
+ }
+
+ RETURN(err);
+}
+
+/**
+ * save pre-versions of objects into request for replay.
+ * Versions are obtained from server reply.
+ * used for VBR.
+ */
+static void ptlrpc_save_versions(struct ptlrpc_request *req)
+{
+ struct lustre_msg *repmsg = req->rq_repmsg;
+ struct lustre_msg *reqmsg = req->rq_reqmsg;
+ __u64 *versions = lustre_msg_get_versions(repmsg);
+ ENTRY;
+
+ if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)
+ return;
+
+ LASSERT(versions);
+ lustre_msg_set_versions(reqmsg, versions);
+ CDEBUG(D_INFO, "Client save versions ["LPX64"/"LPX64"]\n",
+ versions[0], versions[1]);
+
+ EXIT;
+}
+
+/**
+ * Callback function called when client receives RPC reply for \a req.
+ * Returns 0 on success or error code.
+ * The return alue would be assigned to req->rq_status by the caller
+ * as request processing status.
+ * This function also decides if the request needs to be saved for later replay.
+ */
+static int after_reply(struct ptlrpc_request *req)
+{
+ struct obd_import *imp = req->rq_import;
+ struct obd_device *obd = req->rq_import->imp_obd;
+ int rc;
+ struct timeval work_start;
+ long timediff;
+ ENTRY;
+
+ LASSERT(obd != NULL);
+ /* repbuf must be unlinked */
+ LASSERT(!req->rq_receiving_reply && !req->rq_must_unlink);
+
+ if (req->rq_reply_truncate) {
+ if (ptlrpc_no_resend(req)) {
+ DEBUG_REQ(D_ERROR, req, "reply buffer overflow,"
+ " expected: %d, actual size: %d",
+ req->rq_nob_received, req->rq_repbuf_len);
+ RETURN(-EOVERFLOW);
+ }
+
+ sptlrpc_cli_free_repbuf(req);
+ /* Pass the required reply buffer size (include
+ * space for early reply).
+ * NB: no need to roundup because alloc_repbuf
+ * will roundup it */
+ req->rq_replen = req->rq_nob_received;
+ req->rq_nob_received = 0;
+ req->rq_resend = 1;
+ RETURN(0);
+ }
+
+ /*
+ * NB Until this point, the whole of the incoming message,
+ * including buflens, status etc is in the sender's byte order.
+ */
+ rc = sptlrpc_cli_unwrap_reply(req);
+ if (rc) {
+ DEBUG_REQ(D_ERROR, req, "unwrap reply failed (%d):", rc);
+ RETURN(rc);
+ }
+
+ /*
+ * Security layer unwrap might ask resend this request.
+ */
+ if (req->rq_resend)
+ RETURN(0);
+
+ rc = unpack_reply(req);
+ if (rc)
+ RETURN(rc);
+
+ /* retry indefinitely on EINPROGRESS */
+ if (lustre_msg_get_status(req->rq_repmsg) == -EINPROGRESS &&
+ ptlrpc_no_resend(req) == 0 && !req->rq_no_retry_einprogress) {
+ time_t now = cfs_time_current_sec();
+
+ DEBUG_REQ(D_RPCTRACE, req, "Resending request on EINPROGRESS");
+ req->rq_resend = 1;
+ req->rq_nr_resend++;
+
+ /* allocate new xid to avoid reply reconstruction */
+ if (!req->rq_bulk) {
+ /* new xid is already allocated for bulk in
+ * ptlrpc_check_set() */
+ req->rq_xid = ptlrpc_next_xid();
+ DEBUG_REQ(D_RPCTRACE, req, "Allocating new xid for "
+ "resend on EINPROGRESS");
+ }
+
+ /* Readjust the timeout for current conditions */
+ ptlrpc_at_set_req_timeout(req);
+ /* delay resend to give a chance to the server to get ready.
+ * The delay is increased by 1s on every resend and is capped to
+ * the current request timeout (i.e. obd_timeout if AT is off,
+ * or AT service time x 125% + 5s, see at_est2timeout) */
+ if (req->rq_nr_resend > req->rq_timeout)
+ req->rq_sent = now + req->rq_timeout;
+ else
+ req->rq_sent = now + req->rq_nr_resend;
+
+ RETURN(0);
+ }
+
+ do_gettimeofday(&work_start);
+ timediff = cfs_timeval_sub(&work_start, &req->rq_arrival_time, NULL);
+ if (obd->obd_svc_stats != NULL) {
+ lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR,
+ timediff);
+ ptlrpc_lprocfs_rpc_sent(req, timediff);
+ }
+
+ if (lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_REPLY &&
+ lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_ERR) {
+ DEBUG_REQ(D_ERROR, req, "invalid packet received (type=%u)",
+ lustre_msg_get_type(req->rq_repmsg));
+ RETURN(-EPROTO);
+ }
+
+ if (lustre_msg_get_opc(req->rq_reqmsg) != OBD_PING)
+ CFS_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_PAUSE_REP, cfs_fail_val);
+ ptlrpc_at_adj_service(req, lustre_msg_get_timeout(req->rq_repmsg));
+ ptlrpc_at_adj_net_latency(req,
+ lustre_msg_get_service_time(req->rq_repmsg));
+
+ rc = ptlrpc_check_status(req);
+ imp->imp_connect_error = rc;
+
+ if (rc) {
+ /*
+ * Either we've been evicted, or the server has failed for
+ * some reason. Try to reconnect, and if that fails, punt to
+ * the upcall.
+ */
+ if (ll_rpc_recoverable_error(rc)) {
+ if (req->rq_send_state != LUSTRE_IMP_FULL ||
+ imp->imp_obd->obd_no_recov || imp->imp_dlm_fake) {
+ RETURN(rc);
+ }
+ ptlrpc_request_handle_notconn(req);
+ RETURN(rc);
+ }
+ } else {
+ /*
+ * Let's look if server sent slv. Do it only for RPC with
+ * rc == 0.
+ */
+ ldlm_cli_update_pool(req);
+ }
+
+ /*
+ * Store transno in reqmsg for replay.
+ */
+ if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)) {
+ req->rq_transno = lustre_msg_get_transno(req->rq_repmsg);
+ lustre_msg_set_transno(req->rq_reqmsg, req->rq_transno);
+ }
+
+ if (imp->imp_replayable) {
+ spin_lock(&imp->imp_lock);
+ /*
+ * No point in adding already-committed requests to the replay
+ * list, we will just remove them immediately. b=9829
+ */
+ if (req->rq_transno != 0 &&
+ (req->rq_transno >
+ lustre_msg_get_last_committed(req->rq_repmsg) ||
+ req->rq_replay)) {
+ /** version recovery */
+ ptlrpc_save_versions(req);
+ ptlrpc_retain_replayable_request(req, imp);
+ } else if (req->rq_commit_cb != NULL) {
+ spin_unlock(&imp->imp_lock);
+ req->rq_commit_cb(req);
+ spin_lock(&imp->imp_lock);
+ }
+
+ /*
+ * Replay-enabled imports return commit-status information.
+ */
+ if (lustre_msg_get_last_committed(req->rq_repmsg)) {
+ imp->imp_peer_committed_transno =
+ lustre_msg_get_last_committed(req->rq_repmsg);
+ }
+
+ ptlrpc_free_committed(imp);
+
+ if (!list_empty(&imp->imp_replay_list)) {
+ struct ptlrpc_request *last;
+
+ last = list_entry(imp->imp_replay_list.prev,
+ struct ptlrpc_request,
+ rq_replay_list);
+ /*
+ * Requests with rq_replay stay on the list even if no
+ * commit is expected.
+ */
+ if (last->rq_transno > imp->imp_peer_committed_transno)
+ ptlrpc_pinger_commit_expected(imp);
+ }
+
+ spin_unlock(&imp->imp_lock);
+ }
+
+ RETURN(rc);
+}
+
+/**
+ * Helper function to send request \a req over the network for the first time
+ * Also adjusts request phase.
+ * Returns 0 on success or error code.
+ */
+static int ptlrpc_send_new_req(struct ptlrpc_request *req)
+{
+ struct obd_import *imp = req->rq_import;
+ int rc;
+ ENTRY;
+
+ LASSERT(req->rq_phase == RQ_PHASE_NEW);
+ if (req->rq_sent && (req->rq_sent > cfs_time_current_sec()) &&
+ (!req->rq_generation_set ||
+ req->rq_import_generation == imp->imp_generation))
+ RETURN (0);
+
+ ptlrpc_rqphase_move(req, RQ_PHASE_RPC);
+
+ spin_lock(&imp->imp_lock);
+
+ if (!req->rq_generation_set)
+ req->rq_import_generation = imp->imp_generation;
+
+ if (ptlrpc_import_delay_req(imp, req, &rc)) {
+ spin_lock(&req->rq_lock);
+ req->rq_waiting = 1;
+ spin_unlock(&req->rq_lock);
+
+ DEBUG_REQ(D_HA, req, "req from PID %d waiting for recovery: "
+ "(%s != %s)", lustre_msg_get_status(req->rq_reqmsg),
+ ptlrpc_import_state_name(req->rq_send_state),
+ ptlrpc_import_state_name(imp->imp_state));
+ LASSERT(list_empty(&req->rq_list));
+ list_add_tail(&req->rq_list, &imp->imp_delayed_list);
+ atomic_inc(&req->rq_import->imp_inflight);
+ spin_unlock(&imp->imp_lock);
+ RETURN(0);
+ }
+
+ if (rc != 0) {
+ spin_unlock(&imp->imp_lock);
+ req->rq_status = rc;
+ ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+ RETURN(rc);
+ }
+
+ LASSERT(list_empty(&req->rq_list));
+ list_add_tail(&req->rq_list, &imp->imp_sending_list);
+ atomic_inc(&req->rq_import->imp_inflight);
+ spin_unlock(&imp->imp_lock);
+
+ lustre_msg_set_status(req->rq_reqmsg, current_pid());
+
+ rc = sptlrpc_req_refresh_ctx(req, -1);
+ if (rc) {
+ if (req->rq_err) {
+ req->rq_status = rc;
+ RETURN(1);
+ } else {
+ req->rq_wait_ctx = 1;
+ RETURN(0);
+ }
+ }
+
+ CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc"
+ " %s:%s:%d:"LPU64":%s:%d\n", current_comm(),
+ imp->imp_obd->obd_uuid.uuid,
+ lustre_msg_get_status(req->rq_reqmsg), req->rq_xid,
+ libcfs_nid2str(imp->imp_connection->c_peer.nid),
+ lustre_msg_get_opc(req->rq_reqmsg));
+
+ rc = ptl_send_rpc(req, 0);
+ if (rc) {
+ DEBUG_REQ(D_HA, req, "send failed (%d); expect timeout", rc);
+ req->rq_net_err = 1;
+ RETURN(rc);
+ }
+ RETURN(0);
+}
+
+static inline int ptlrpc_set_producer(struct ptlrpc_request_set *set)
+{
+ int remaining, rc;
+ ENTRY;
+
+ LASSERT(set->set_producer != NULL);
+
+ remaining = atomic_read(&set->set_remaining);
+
+ /* populate the ->set_requests list with requests until we
+ * reach the maximum number of RPCs in flight for this set */
+ while (atomic_read(&set->set_remaining) < set->set_max_inflight) {
+ rc = set->set_producer(set, set->set_producer_arg);
+ if (rc == -ENOENT) {
+ /* no more RPC to produce */
+ set->set_producer = NULL;
+ set->set_producer_arg = NULL;
+ RETURN(0);
+ }
+ }
+
+ RETURN((atomic_read(&set->set_remaining) - remaining));
+}
+
+/**
+ * this sends any unsent RPCs in \a set and returns 1 if all are sent
+ * and no more replies are expected.
+ * (it is possible to get less replies than requests sent e.g. due to timed out
+ * requests or requests that we had trouble to send out)
+ */
+int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
+{
+ struct list_head *tmp, *next;
+ int force_timer_recalc = 0;
+ ENTRY;
+
+ if (atomic_read(&set->set_remaining) == 0)
+ RETURN(1);
+
+ list_for_each_safe(tmp, next, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+ struct obd_import *imp = req->rq_import;
+ int unregistered = 0;
+ int rc = 0;
+
+ if (req->rq_phase == RQ_PHASE_NEW &&
+ ptlrpc_send_new_req(req)) {
+ force_timer_recalc = 1;
+ }
+
+ /* delayed send - skip */
+ if (req->rq_phase == RQ_PHASE_NEW && req->rq_sent)
+ continue;
+
+ /* delayed resend - skip */
+ if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend &&
+ req->rq_sent > cfs_time_current_sec())
+ continue;
+
+ if (!(req->rq_phase == RQ_PHASE_RPC ||
+ req->rq_phase == RQ_PHASE_BULK ||
+ req->rq_phase == RQ_PHASE_INTERPRET ||
+ req->rq_phase == RQ_PHASE_UNREGISTERING ||
+ req->rq_phase == RQ_PHASE_COMPLETE)) {
+ DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
+ LBUG();
+ }
+
+ if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+ LASSERT(req->rq_next_phase != req->rq_phase);
+ LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED);
+
+ /*
+ * Skip processing until reply is unlinked. We
+ * can't return to pool before that and we can't
+ * call interpret before that. We need to make
+ * sure that all rdma transfers finished and will
+ * not corrupt any data.
+ */
+ if (ptlrpc_client_recv_or_unlink(req) ||
+ ptlrpc_client_bulk_active(req))
+ continue;
+
+ /*
+ * Turn fail_loc off to prevent it from looping
+ * forever.
+ */
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
+ OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK,
+ OBD_FAIL_ONCE);
+ }
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) {
+ OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK,
+ OBD_FAIL_ONCE);
+ }
+
+ /*
+ * Move to next phase if reply was successfully
+ * unlinked.
+ */
+ ptlrpc_rqphase_move(req, req->rq_next_phase);
+ }
+
+ if (req->rq_phase == RQ_PHASE_COMPLETE)
+ continue;
+
+ if (req->rq_phase == RQ_PHASE_INTERPRET)
+ GOTO(interpret, req->rq_status);
+
+ /*
+ * Note that this also will start async reply unlink.
+ */
+ if (req->rq_net_err && !req->rq_timedout) {
+ ptlrpc_expire_one_request(req, 1);
+
+ /*
+ * Check if we still need to wait for unlink.
+ */
+ if (ptlrpc_client_recv_or_unlink(req) ||
+ ptlrpc_client_bulk_active(req))
+ continue;
+ /* If there is no need to resend, fail it now. */
+ if (req->rq_no_resend) {
+ if (req->rq_status == 0)
+ req->rq_status = -EIO;
+ ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+ GOTO(interpret, req->rq_status);
+ } else {
+ continue;
+ }
+ }
+
+ if (req->rq_err) {
+ spin_lock(&req->rq_lock);
+ req->rq_replied = 0;
+ spin_unlock(&req->rq_lock);
+ if (req->rq_status == 0)
+ req->rq_status = -EIO;
+ ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+ GOTO(interpret, req->rq_status);
+ }
+
+ /* ptlrpc_set_wait->l_wait_event sets lwi_allow_intr
+ * so it sets rq_intr regardless of individual rpc
+ * timeouts. The synchronous IO waiting path sets
+ * rq_intr irrespective of whether ptlrpcd
+ * has seen a timeout. Our policy is to only interpret
+ * interrupted rpcs after they have timed out, so we
+ * need to enforce that here.
+ */
+
+ if (req->rq_intr && (req->rq_timedout || req->rq_waiting ||
+ req->rq_wait_ctx)) {
+ req->rq_status = -EINTR;
+ ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+ GOTO(interpret, req->rq_status);
+ }
+
+ if (req->rq_phase == RQ_PHASE_RPC) {
+ if (req->rq_timedout || req->rq_resend ||
+ req->rq_waiting || req->rq_wait_ctx) {
+ int status;
+
+ if (!ptlrpc_unregister_reply(req, 1))
+ continue;
+
+ spin_lock(&imp->imp_lock);
+ if (ptlrpc_import_delay_req(imp, req, &status)){
+ /* put on delay list - only if we wait
+ * recovery finished - before send */
+ list_del_init(&req->rq_list);
+ list_add_tail(&req->rq_list,
+ &imp->
+ imp_delayed_list);
+ spin_unlock(&imp->imp_lock);
+ continue;
+ }
+
+ if (status != 0) {
+ req->rq_status = status;
+ ptlrpc_rqphase_move(req,
+ RQ_PHASE_INTERPRET);
+ spin_unlock(&imp->imp_lock);
+ GOTO(interpret, req->rq_status);
+ }
+ if (ptlrpc_no_resend(req) &&
+ !req->rq_wait_ctx) {
+ req->rq_status = -ENOTCONN;
+ ptlrpc_rqphase_move(req,
+ RQ_PHASE_INTERPRET);
+ spin_unlock(&imp->imp_lock);
+ GOTO(interpret, req->rq_status);
+ }
+
+ list_del_init(&req->rq_list);
+ list_add_tail(&req->rq_list,
+ &imp->imp_sending_list);
+
+ spin_unlock(&imp->imp_lock);
+
+ spin_lock(&req->rq_lock);
+ req->rq_waiting = 0;
+ spin_unlock(&req->rq_lock);
+
+ if (req->rq_timedout || req->rq_resend) {
+ /* This is re-sending anyways,
+ * let's mark req as resend. */
+ spin_lock(&req->rq_lock);
+ req->rq_resend = 1;
+ spin_unlock(&req->rq_lock);
+ if (req->rq_bulk) {
+ __u64 old_xid;
+
+ if (!ptlrpc_unregister_bulk(req, 1))
+ continue;
+
+ /* ensure previous bulk fails */
+ old_xid = req->rq_xid;
+ req->rq_xid = ptlrpc_next_xid();
+ CDEBUG(D_HA, "resend bulk "
+ "old x"LPU64
+ " new x"LPU64"\n",
+ old_xid, req->rq_xid);
+ }
+ }
+ /*
+ * rq_wait_ctx is only touched by ptlrpcd,
+ * so no lock is needed here.
+ */
+ status = sptlrpc_req_refresh_ctx(req, -1);
+ if (status) {
+ if (req->rq_err) {
+ req->rq_status = status;
+ spin_lock(&req->rq_lock);
+ req->rq_wait_ctx = 0;
+ spin_unlock(&req->rq_lock);
+ force_timer_recalc = 1;
+ } else {
+ spin_lock(&req->rq_lock);
+ req->rq_wait_ctx = 1;
+ spin_unlock(&req->rq_lock);
+ }
+
+ continue;
+ } else {
+ spin_lock(&req->rq_lock);
+ req->rq_wait_ctx = 0;
+ spin_unlock(&req->rq_lock);
+ }
+
+ rc = ptl_send_rpc(req, 0);
+ if (rc) {
+ DEBUG_REQ(D_HA, req,
+ "send failed: rc = %d", rc);
+ force_timer_recalc = 1;
+ spin_lock(&req->rq_lock);
+ req->rq_net_err = 1;
+ spin_unlock(&req->rq_lock);
+ }
+ /* need to reset the timeout */
+ force_timer_recalc = 1;
+ }
+
+ spin_lock(&req->rq_lock);
+
+ if (ptlrpc_client_early(req)) {
+ ptlrpc_at_recv_early_reply(req);
+ spin_unlock(&req->rq_lock);
+ continue;
+ }
+
+ /* Still waiting for a reply? */
+ if (ptlrpc_client_recv(req)) {
+ spin_unlock(&req->rq_lock);
+ continue;
+ }
+
+ /* Did we actually receive a reply? */
+ if (!ptlrpc_client_replied(req)) {
+ spin_unlock(&req->rq_lock);
+ continue;
+ }
+
+ spin_unlock(&req->rq_lock);
+
+ /* unlink from net because we are going to
+ * swab in-place of reply buffer */
+ unregistered = ptlrpc_unregister_reply(req, 1);
+ if (!unregistered)
+ continue;
+
+ req->rq_status = after_reply(req);
+ if (req->rq_resend)
+ continue;
+
+ /* If there is no bulk associated with this request,
+ * then we're done and should let the interpreter
+ * process the reply. Similarly if the RPC returned
+ * an error, and therefore the bulk will never arrive.
+ */
+ if (req->rq_bulk == NULL || req->rq_status < 0) {
+ ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+ GOTO(interpret, req->rq_status);
+ }
+
+ ptlrpc_rqphase_move(req, RQ_PHASE_BULK);
+ }
+
+ LASSERT(req->rq_phase == RQ_PHASE_BULK);
+ if (ptlrpc_client_bulk_active(req))
+ continue;
+
+ if (req->rq_bulk->bd_failure) {
+ /* The RPC reply arrived OK, but the bulk screwed
+ * up! Dead weird since the server told us the RPC
+ * was good after getting the REPLY for her GET or
+ * the ACK for her PUT. */
+ DEBUG_REQ(D_ERROR, req, "bulk transfer failed");
+ req->rq_status = -EIO;
+ }
+
+ ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+
+ interpret:
+ LASSERT(req->rq_phase == RQ_PHASE_INTERPRET);
+
+ /* This moves to "unregistering" phase we need to wait for
+ * reply unlink. */
+ if (!unregistered && !ptlrpc_unregister_reply(req, 1)) {
+ /* start async bulk unlink too */
+ ptlrpc_unregister_bulk(req, 1);
+ continue;
+ }
+
+ if (!ptlrpc_unregister_bulk(req, 1))
+ continue;
+
+ /* When calling interpret receiving already should be
+ * finished. */
+ LASSERT(!req->rq_receiving_reply);
+
+ ptlrpc_req_interpret(env, req, req->rq_status);
+
+ ptlrpc_rqphase_move(req, RQ_PHASE_COMPLETE);
+
+ CDEBUG(req->rq_reqmsg != NULL ? D_RPCTRACE : 0,
+ "Completed RPC pname:cluuid:pid:xid:nid:"
+ "opc %s:%s:%d:"LPU64":%s:%d\n",
+ current_comm(), imp->imp_obd->obd_uuid.uuid,
+ lustre_msg_get_status(req->rq_reqmsg), req->rq_xid,
+ libcfs_nid2str(imp->imp_connection->c_peer.nid),
+ lustre_msg_get_opc(req->rq_reqmsg));
+
+ spin_lock(&imp->imp_lock);
+ /* Request already may be not on sending or delaying list. This
+ * may happen in the case of marking it erroneous for the case
+ * ptlrpc_import_delay_req(req, status) find it impossible to
+ * allow sending this rpc and returns *status != 0. */
+ if (!list_empty(&req->rq_list)) {
+ list_del_init(&req->rq_list);
+ atomic_dec(&imp->imp_inflight);
+ }
+ spin_unlock(&imp->imp_lock);
+
+ atomic_dec(&set->set_remaining);
+ wake_up_all(&imp->imp_recovery_waitq);
+
+ if (set->set_producer) {
+ /* produce a new request if possible */
+ if (ptlrpc_set_producer(set) > 0)
+ force_timer_recalc = 1;
+
+ /* free the request that has just been completed
+ * in order not to pollute set->set_requests */
+ list_del_init(&req->rq_set_chain);
+ spin_lock(&req->rq_lock);
+ req->rq_set = NULL;
+ req->rq_invalid_rqset = 0;
+ spin_unlock(&req->rq_lock);
+
+ /* record rq_status to compute the final status later */
+ if (req->rq_status != 0)
+ set->set_rc = req->rq_status;
+ ptlrpc_req_finished(req);
+ }
+ }
+
+ /* If we hit an error, we want to recover promptly. */
+ RETURN(atomic_read(&set->set_remaining) == 0 || force_timer_recalc);
+}
+EXPORT_SYMBOL(ptlrpc_check_set);
+
+/**
+ * Time out request \a req. is \a async_unlink is set, that means do not wait
+ * until LNet actually confirms network buffer unlinking.
+ * Return 1 if we should give up further retrying attempts or 0 otherwise.
+ */
+int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink)
+{
+ struct obd_import *imp = req->rq_import;
+ int rc = 0;
+ ENTRY;
+
+ spin_lock(&req->rq_lock);
+ req->rq_timedout = 1;
+ spin_unlock(&req->rq_lock);
+
+ DEBUG_REQ(D_WARNING, req, "Request sent has %s: [sent "CFS_DURATION_T
+ "/real "CFS_DURATION_T"]",
+ req->rq_net_err ? "failed due to network error" :
+ ((req->rq_real_sent == 0 ||
+ cfs_time_before(req->rq_real_sent, req->rq_sent) ||
+ cfs_time_aftereq(req->rq_real_sent, req->rq_deadline)) ?
+ "timed out for sent delay" : "timed out for slow reply"),
+ req->rq_sent, req->rq_real_sent);
+
+ if (imp != NULL && obd_debug_peer_on_timeout)
+ LNetCtl(IOC_LIBCFS_DEBUG_PEER, &imp->imp_connection->c_peer);
+
+ ptlrpc_unregister_reply(req, async_unlink);
+ ptlrpc_unregister_bulk(req, async_unlink);
+
+ if (obd_dump_on_timeout)
+ libcfs_debug_dumplog();
+
+ if (imp == NULL) {
+ DEBUG_REQ(D_HA, req, "NULL import: already cleaned up?");
+ RETURN(1);
+ }
+
+ atomic_inc(&imp->imp_timeouts);
+
+ /* The DLM server doesn't want recovery run on its imports. */
+ if (imp->imp_dlm_fake)
+ RETURN(1);
+
+ /* If this request is for recovery or other primordial tasks,
+ * then error it out here. */
+ if (req->rq_ctx_init || req->rq_ctx_fini ||
+ req->rq_send_state != LUSTRE_IMP_FULL ||
+ imp->imp_obd->obd_no_recov) {
+ DEBUG_REQ(D_RPCTRACE, req, "err -110, sent_state=%s (now=%s)",
+ ptlrpc_import_state_name(req->rq_send_state),
+ ptlrpc_import_state_name(imp->imp_state));
+ spin_lock(&req->rq_lock);
+ req->rq_status = -ETIMEDOUT;
+ req->rq_err = 1;
+ spin_unlock(&req->rq_lock);
+ RETURN(1);
+ }
+
+ /* if a request can't be resent we can't wait for an answer after
+ the timeout */
+ if (ptlrpc_no_resend(req)) {
+ DEBUG_REQ(D_RPCTRACE, req, "TIMEOUT-NORESEND:");
+ rc = 1;
+ }
+
+ ptlrpc_fail_import(imp, lustre_msg_get_conn_cnt(req->rq_reqmsg));
+
+ RETURN(rc);
+}
+
+/**
+ * Time out all uncompleted requests in request set pointed by \a data
+ * Callback used when waiting on sets with l_wait_event.
+ * Always returns 1.
+ */
+int ptlrpc_expired_set(void *data)
+{
+ struct ptlrpc_request_set *set = data;
+ struct list_head *tmp;
+ time_t now = cfs_time_current_sec();
+ ENTRY;
+
+ LASSERT(set != NULL);
+
+ /*
+ * A timeout expired. See which reqs it applies to...
+ */
+ list_for_each (tmp, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+
+ /* don't expire request waiting for context */
+ if (req->rq_wait_ctx)
+ continue;
+
+ /* Request in-flight? */
+ if (!((req->rq_phase == RQ_PHASE_RPC &&
+ !req->rq_waiting && !req->rq_resend) ||
+ (req->rq_phase == RQ_PHASE_BULK)))
+ continue;
+
+ if (req->rq_timedout || /* already dealt with */
+ req->rq_deadline > now) /* not expired */
+ continue;
+
+ /* Deal with this guy. Do it asynchronously to not block
+ * ptlrpcd thread. */
+ ptlrpc_expire_one_request(req, 1);
+ }
+
+ /*
+ * When waiting for a whole set, we always break out of the
+ * sleep so we can recalculate the timeout, or enable interrupts
+ * if everyone's timed out.
+ */
+ RETURN(1);
+}
+EXPORT_SYMBOL(ptlrpc_expired_set);
+
+/**
+ * Sets rq_intr flag in \a req under spinlock.
+ */
+void ptlrpc_mark_interrupted(struct ptlrpc_request *req)
+{
+ spin_lock(&req->rq_lock);
+ req->rq_intr = 1;
+ spin_unlock(&req->rq_lock);
+}
+EXPORT_SYMBOL(ptlrpc_mark_interrupted);
+
+/**
+ * Interrupts (sets interrupted flag) all uncompleted requests in
+ * a set \a data. Callback for l_wait_event for interruptible waits.
+ */
+void ptlrpc_interrupted_set(void *data)
+{
+ struct ptlrpc_request_set *set = data;
+ struct list_head *tmp;
+
+ LASSERT(set != NULL);
+ CDEBUG(D_RPCTRACE, "INTERRUPTED SET %p\n", set);
+
+ list_for_each(tmp, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+
+ if (req->rq_phase != RQ_PHASE_RPC &&
+ req->rq_phase != RQ_PHASE_UNREGISTERING)
+ continue;
+
+ ptlrpc_mark_interrupted(req);
+ }
+}
+EXPORT_SYMBOL(ptlrpc_interrupted_set);
+
+/**
+ * Get the smallest timeout in the set; this does NOT set a timeout.
+ */
+int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set)
+{
+ struct list_head *tmp;
+ time_t now = cfs_time_current_sec();
+ int timeout = 0;
+ struct ptlrpc_request *req;
+ int deadline;
+ ENTRY;
+
+ SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
+
+ list_for_each(tmp, &set->set_requests) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
+
+ /*
+ * Request in-flight?
+ */
+ if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) ||
+ (req->rq_phase == RQ_PHASE_BULK) ||
+ (req->rq_phase == RQ_PHASE_NEW)))
+ continue;
+
+ /*
+ * Already timed out.
+ */
+ if (req->rq_timedout)
+ continue;
+
+ /*
+ * Waiting for ctx.
+ */
+ if (req->rq_wait_ctx)
+ continue;
+
+ if (req->rq_phase == RQ_PHASE_NEW)
+ deadline = req->rq_sent;
+ else if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend)
+ deadline = req->rq_sent;
+ else
+ deadline = req->rq_sent + req->rq_timeout;
+
+ if (deadline <= now) /* actually expired already */
+ timeout = 1; /* ASAP */
+ else if (timeout == 0 || timeout > deadline - now)
+ timeout = deadline - now;
+ }
+ RETURN(timeout);
+}
+EXPORT_SYMBOL(ptlrpc_set_next_timeout);
+
+/**
+ * Send all unset request from the set and then wait untill all
+ * requests in the set complete (either get a reply, timeout, get an
+ * error or otherwise be interrupted).
+ * Returns 0 on success or error code otherwise.
+ */
+int ptlrpc_set_wait(struct ptlrpc_request_set *set)
+{
+ struct list_head *tmp;
+ struct ptlrpc_request *req;
+ struct l_wait_info lwi;
+ int rc, timeout;
+ ENTRY;
+
+ if (set->set_producer)
+ (void)ptlrpc_set_producer(set);
+ else
+ list_for_each(tmp, &set->set_requests) {
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+ if (req->rq_phase == RQ_PHASE_NEW)
+ (void)ptlrpc_send_new_req(req);
+ }
+
+ if (list_empty(&set->set_requests))
+ RETURN(0);
+
+ do {
+ timeout = ptlrpc_set_next_timeout(set);
+
+ /* wait until all complete, interrupted, or an in-flight
+ * req times out */
+ CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n",
+ set, timeout);
+
+ if (timeout == 0 && !cfs_signal_pending())
+ /*
+ * No requests are in-flight (ether timed out
+ * or delayed), so we can allow interrupts.
+ * We still want to block for a limited time,
+ * so we allow interrupts during the timeout.
+ */
+ lwi = LWI_TIMEOUT_INTR_ALL(cfs_time_seconds(1),
+ ptlrpc_expired_set,
+ ptlrpc_interrupted_set, set);
+ else
+ /*
+ * At least one request is in flight, so no
+ * interrupts are allowed. Wait until all
+ * complete, or an in-flight req times out.
+ */
+ lwi = LWI_TIMEOUT(cfs_time_seconds(timeout? timeout : 1),
+ ptlrpc_expired_set, set);
+
+ rc = l_wait_event(set->set_waitq, ptlrpc_check_set(NULL, set), &lwi);
+
+ /* LU-769 - if we ignored the signal because it was already
+ * pending when we started, we need to handle it now or we risk
+ * it being ignored forever */
+ if (rc == -ETIMEDOUT && !lwi.lwi_allow_intr &&
+ cfs_signal_pending()) {
+ sigset_t blocked_sigs =
+ cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
+
+ /* In fact we only interrupt for the "fatal" signals
+ * like SIGINT or SIGKILL. We still ignore less
+ * important signals since ptlrpc set is not easily
+ * reentrant from userspace again */
+ if (cfs_signal_pending())
+ ptlrpc_interrupted_set(set);
+ cfs_restore_sigs(blocked_sigs);
+ }
+
+ LASSERT(rc == 0 || rc == -EINTR || rc == -ETIMEDOUT);
+
+ /* -EINTR => all requests have been flagged rq_intr so next
+ * check completes.
+ * -ETIMEDOUT => someone timed out. When all reqs have
+ * timed out, signals are enabled allowing completion with
+ * EINTR.
+ * I don't really care if we go once more round the loop in
+ * the error cases -eeb. */
+ if (rc == 0 && atomic_read(&set->set_remaining) == 0) {
+ list_for_each(tmp, &set->set_requests) {
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+ spin_lock(&req->rq_lock);
+ req->rq_invalid_rqset = 1;
+ spin_unlock(&req->rq_lock);
+ }
+ }
+ } while (rc != 0 || atomic_read(&set->set_remaining) != 0);
+
+ LASSERT(atomic_read(&set->set_remaining) == 0);
+
+ rc = set->set_rc; /* rq_status of already freed requests if any */
+ list_for_each(tmp, &set->set_requests) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
+
+ LASSERT(req->rq_phase == RQ_PHASE_COMPLETE);
+ if (req->rq_status != 0)
+ rc = req->rq_status;
+ }
+
+ if (set->set_interpret != NULL) {
+ int (*interpreter)(struct ptlrpc_request_set *set,void *,int) =
+ set->set_interpret;
+ rc = interpreter (set, set->set_arg, rc);
+ } else {
+ struct ptlrpc_set_cbdata *cbdata, *n;
+ int err;
+
+ list_for_each_entry_safe(cbdata, n,
+ &set->set_cblist, psc_item) {
+ list_del_init(&cbdata->psc_item);
+ err = cbdata->psc_interpret(set, cbdata->psc_data, rc);
+ if (err && !rc)
+ rc = err;
+ OBD_FREE_PTR(cbdata);
+ }
+ }
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ptlrpc_set_wait);
+
+/**
+ * Helper fuction for request freeing.
+ * Called when request count reached zero and request needs to be freed.
+ * Removes request from all sorts of sending/replay lists it might be on,
+ * frees network buffers if any are present.
+ * If \a locked is set, that means caller is already holding import imp_lock
+ * and so we no longer need to reobtain it (for certain lists manipulations)
+ */
+static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
+{
+ ENTRY;
+ if (request == NULL) {
+ EXIT;
+ return;
+ }
+
+ LASSERTF(!request->rq_receiving_reply, "req %p\n", request);
+ LASSERTF(request->rq_rqbd == NULL, "req %p\n",request);/* client-side */
+ LASSERTF(list_empty(&request->rq_list), "req %p\n", request);
+ LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request);
+ LASSERTF(list_empty(&request->rq_exp_list), "req %p\n", request);
+ LASSERTF(!request->rq_replay, "req %p\n", request);
+
+ req_capsule_fini(&request->rq_pill);
+
+ /* We must take it off the imp_replay_list first. Otherwise, we'll set
+ * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
+ if (request->rq_import != NULL) {
+ if (!locked)
+ spin_lock(&request->rq_import->imp_lock);
+ list_del_init(&request->rq_replay_list);
+ if (!locked)
+ spin_unlock(&request->rq_import->imp_lock);
+ }
+ LASSERTF(list_empty(&request->rq_replay_list), "req %p\n", request);
+
+ if (atomic_read(&request->rq_refcount) != 0) {
+ DEBUG_REQ(D_ERROR, request,
+ "freeing request with nonzero refcount");
+ LBUG();
+ }
+
+ if (request->rq_repbuf != NULL)
+ sptlrpc_cli_free_repbuf(request);
+ if (request->rq_export != NULL) {
+ class_export_put(request->rq_export);
+ request->rq_export = NULL;
+ }
+ if (request->rq_import != NULL) {
+ class_import_put(request->rq_import);
+ request->rq_import = NULL;
+ }
+ if (request->rq_bulk != NULL)
+ ptlrpc_free_bulk_pin(request->rq_bulk);
+
+ if (request->rq_reqbuf != NULL || request->rq_clrbuf != NULL)
+ sptlrpc_cli_free_reqbuf(request);
+
+ if (request->rq_cli_ctx)
+ sptlrpc_req_put_ctx(request, !locked);
+
+ if (request->rq_pool)
+ __ptlrpc_free_req_to_pool(request);
+ else
+ OBD_FREE(request, sizeof(*request));
+ EXIT;
+}
+
+static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked);
+/**
+ * Drop one request reference. Must be called with import imp_lock held.
+ * When reference count drops to zero, reuqest is freed.
+ */
+void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request)
+{
+ LASSERT(spin_is_locked(&request->rq_import->imp_lock));
+ (void)__ptlrpc_req_finished(request, 1);
+}
+EXPORT_SYMBOL(ptlrpc_req_finished_with_imp_lock);
+
+/**
+ * Helper function
+ * Drops one reference count for request \a request.
+ * \a locked set indicates that caller holds import imp_lock.
+ * Frees the request whe reference count reaches zero.
+ */
+static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
+{
+ ENTRY;
+ if (request == NULL)
+ RETURN(1);
+
+ if (request == LP_POISON ||
+ request->rq_reqmsg == LP_POISON) {
+ CERROR("dereferencing freed request (bug 575)\n");
+ LBUG();
+ RETURN(1);
+ }
+
+ DEBUG_REQ(D_INFO, request, "refcount now %u",
+ atomic_read(&request->rq_refcount) - 1);
+
+ if (atomic_dec_and_test(&request->rq_refcount)) {
+ __ptlrpc_free_req(request, locked);
+ RETURN(1);
+ }
+
+ RETURN(0);
+}
+
+/**
+ * Drops one reference count for a request.
+ */
+void ptlrpc_req_finished(struct ptlrpc_request *request)
+{
+ __ptlrpc_req_finished(request, 0);
+}
+EXPORT_SYMBOL(ptlrpc_req_finished);
+
+/**
+ * Returns xid of a \a request
+ */
+__u64 ptlrpc_req_xid(struct ptlrpc_request *request)
+{
+ return request->rq_xid;
+}
+EXPORT_SYMBOL(ptlrpc_req_xid);
+
+/**
+ * Disengage the client's reply buffer from the network
+ * NB does _NOT_ unregister any client-side bulk.
+ * IDEMPOTENT, but _not_ safe against concurrent callers.
+ * The request owner (i.e. the thread doing the I/O) must call...
+ * Returns 0 on success or 1 if unregistering cannot be made.
+ */
+int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
+{
+ int rc;
+ wait_queue_head_t *wq;
+ struct l_wait_info lwi;
+
+ /*
+ * Might sleep.
+ */
+ LASSERT(!in_interrupt());
+
+ /*
+ * Let's setup deadline for reply unlink.
+ */
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+ async && request->rq_reply_deadline == 0)
+ request->rq_reply_deadline = cfs_time_current_sec()+LONG_UNLINK;
+
+ /*
+ * Nothing left to do.
+ */
+ if (!ptlrpc_client_recv_or_unlink(request))
+ RETURN(1);
+
+ LNetMDUnlink(request->rq_reply_md_h);
+
+ /*
+ * Let's check it once again.
+ */
+ if (!ptlrpc_client_recv_or_unlink(request))
+ RETURN(1);
+
+ /*
+ * Move to "Unregistering" phase as reply was not unlinked yet.
+ */
+ ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING);
+
+ /*
+ * Do not wait for unlink to finish.
+ */
+ if (async)
+ RETURN(0);
+
+ /*
+ * We have to l_wait_event() whatever the result, to give liblustre
+ * a chance to run reply_in_callback(), and to make sure we've
+ * unlinked before returning a req to the pool.
+ */
+ if (request->rq_set != NULL)
+ wq = &request->rq_set->set_waitq;
+ else
+ wq = &request->rq_reply_waitq;
+
+ for (;;) {
+ /* Network access will complete in finite time but the HUGE
+ * timeout lets us CWARN for visibility of sluggish NALs */
+ lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK),
+ cfs_time_seconds(1), NULL, NULL);
+ rc = l_wait_event(*wq, !ptlrpc_client_recv_or_unlink(request),
+ &lwi);
+ if (rc == 0) {
+ ptlrpc_rqphase_move(request, request->rq_next_phase);
+ RETURN(1);
+ }
+
+ LASSERT(rc == -ETIMEDOUT);
+ DEBUG_REQ(D_WARNING, request, "Unexpectedly long timeout "
+ "rvcng=%d unlnk=%d", request->rq_receiving_reply,
+ request->rq_must_unlink);
+ }
+ RETURN(0);
+}
+EXPORT_SYMBOL(ptlrpc_unregister_reply);
+
+/**
+ * Iterates through replay_list on import and prunes
+ * all requests have transno smaller than last_committed for the
+ * import and don't have rq_replay set.
+ * Since requests are sorted in transno order, stops when meetign first
+ * transno bigger than last_committed.
+ * caller must hold imp->imp_lock
+ */
+void ptlrpc_free_committed(struct obd_import *imp)
+{
+ struct list_head *tmp, *saved;
+ struct ptlrpc_request *req;
+ struct ptlrpc_request *last_req = NULL; /* temporary fire escape */
+ ENTRY;
+
+ LASSERT(imp != NULL);
+
+ LASSERT(spin_is_locked(&imp->imp_lock));
+
+
+ if (imp->imp_peer_committed_transno == imp->imp_last_transno_checked &&
+ imp->imp_generation == imp->imp_last_generation_checked) {
+ CDEBUG(D_INFO, "%s: skip recheck: last_committed "LPU64"\n",
+ imp->imp_obd->obd_name, imp->imp_peer_committed_transno);
+ EXIT;
+ return;
+ }
+ CDEBUG(D_RPCTRACE, "%s: committing for last_committed "LPU64" gen %d\n",
+ imp->imp_obd->obd_name, imp->imp_peer_committed_transno,
+ imp->imp_generation);
+ imp->imp_last_transno_checked = imp->imp_peer_committed_transno;
+ imp->imp_last_generation_checked = imp->imp_generation;
+
+ list_for_each_safe(tmp, saved, &imp->imp_replay_list) {
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_replay_list);
+
+ /* XXX ok to remove when 1357 resolved - rread 05/29/03 */
+ LASSERT(req != last_req);
+ last_req = req;
+
+ if (req->rq_transno == 0) {
+ DEBUG_REQ(D_EMERG, req, "zero transno during replay");
+ LBUG();
+ }
+ if (req->rq_import_generation < imp->imp_generation) {
+ DEBUG_REQ(D_RPCTRACE, req, "free request with old gen");
+ GOTO(free_req, 0);
+ }
+
+ if (req->rq_replay) {
+ DEBUG_REQ(D_RPCTRACE, req, "keeping (FL_REPLAY)");
+ continue;
+ }
+
+ /* not yet committed */
+ if (req->rq_transno > imp->imp_peer_committed_transno) {
+ DEBUG_REQ(D_RPCTRACE, req, "stopping search");
+ break;
+ }
+
+ DEBUG_REQ(D_INFO, req, "commit (last_committed "LPU64")",
+ imp->imp_peer_committed_transno);
+free_req:
+ spin_lock(&req->rq_lock);
+ req->rq_replay = 0;
+ spin_unlock(&req->rq_lock);
+ if (req->rq_commit_cb != NULL)
+ req->rq_commit_cb(req);
+ list_del_init(&req->rq_replay_list);
+ __ptlrpc_req_finished(req, 1);
+ }
+
+ EXIT;
+ return;
+}
+
+void ptlrpc_cleanup_client(struct obd_import *imp)
+{
+ ENTRY;
+ EXIT;
+ return;
+}
+EXPORT_SYMBOL(ptlrpc_cleanup_client);
+
+/**
+ * Schedule previously sent request for resend.
+ * For bulk requests we assign new xid (to avoid problems with
+ * lost replies and therefore several transfers landing into same buffer
+ * from different sending attempts).
+ */
+void ptlrpc_resend_req(struct ptlrpc_request *req)
+{
+ DEBUG_REQ(D_HA, req, "going to resend");
+ lustre_msg_set_handle(req->rq_reqmsg, &(struct lustre_handle){ 0 });
+ req->rq_status = -EAGAIN;
+
+ spin_lock(&req->rq_lock);
+ req->rq_resend = 1;
+ req->rq_net_err = 0;
+ req->rq_timedout = 0;
+ if (req->rq_bulk) {
+ __u64 old_xid = req->rq_xid;
+
+ /* ensure previous bulk fails */
+ req->rq_xid = ptlrpc_next_xid();
+ CDEBUG(D_HA, "resend bulk old x"LPU64" new x"LPU64"\n",
+ old_xid, req->rq_xid);
+ }
+ ptlrpc_client_wake_req(req);
+ spin_unlock(&req->rq_lock);
+}
+EXPORT_SYMBOL(ptlrpc_resend_req);
+
+/* XXX: this function and rq_status are currently unused */
+void ptlrpc_restart_req(struct ptlrpc_request *req)
+{
+ DEBUG_REQ(D_HA, req, "restarting (possibly-)completed request");
+ req->rq_status = -ERESTARTSYS;
+
+ spin_lock(&req->rq_lock);
+ req->rq_restart = 1;
+ req->rq_timedout = 0;
+ ptlrpc_client_wake_req(req);
+ spin_unlock(&req->rq_lock);
+}
+EXPORT_SYMBOL(ptlrpc_restart_req);
+
+/**
+ * Grab additional reference on a request \a req
+ */
+struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req)
+{
+ ENTRY;
+ atomic_inc(&req->rq_refcount);
+ RETURN(req);
+}
+EXPORT_SYMBOL(ptlrpc_request_addref);
+
+/**
+ * Add a request to import replay_list.
+ * Must be called under imp_lock
+ */
+void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
+ struct obd_import *imp)
+{
+ struct list_head *tmp;
+
+ LASSERT(spin_is_locked(&imp->imp_lock));
+
+ if (req->rq_transno == 0) {
+ DEBUG_REQ(D_EMERG, req, "saving request with zero transno");
+ LBUG();
+ }
+
+ /* clear this for new requests that were resent as well
+ as resent replayed requests. */
+ lustre_msg_clear_flags(req->rq_reqmsg, MSG_RESENT);
+
+ /* don't re-add requests that have been replayed */
+ if (!list_empty(&req->rq_replay_list))
+ return;
+
+ lustre_msg_add_flags(req->rq_reqmsg, MSG_REPLAY);
+
+ LASSERT(imp->imp_replayable);
+ /* Balanced in ptlrpc_free_committed, usually. */
+ ptlrpc_request_addref(req);
+ list_for_each_prev(tmp, &imp->imp_replay_list) {
+ struct ptlrpc_request *iter =
+ list_entry(tmp, struct ptlrpc_request,
+ rq_replay_list);
+
+ /* We may have duplicate transnos if we create and then
+ * open a file, or for closes retained if to match creating
+ * opens, so use req->rq_xid as a secondary key.
+ * (See bugs 684, 685, and 428.)
+ * XXX no longer needed, but all opens need transnos!
+ */
+ if (iter->rq_transno > req->rq_transno)
+ continue;
+
+ if (iter->rq_transno == req->rq_transno) {
+ LASSERT(iter->rq_xid != req->rq_xid);
+ if (iter->rq_xid > req->rq_xid)
+ continue;
+ }
+
+ list_add(&req->rq_replay_list, &iter->rq_replay_list);
+ return;
+ }
+
+ list_add(&req->rq_replay_list, &imp->imp_replay_list);
+}
+EXPORT_SYMBOL(ptlrpc_retain_replayable_request);
+
+/**
+ * Send request and wait until it completes.
+ * Returns request processing status.
+ */
+int ptlrpc_queue_wait(struct ptlrpc_request *req)
+{
+ struct ptlrpc_request_set *set;
+ int rc;
+ ENTRY;
+
+ LASSERT(req->rq_set == NULL);
+ LASSERT(!req->rq_receiving_reply);
+
+ set = ptlrpc_prep_set();
+ if (set == NULL) {
+ CERROR("Unable to allocate ptlrpc set.");
+ RETURN(-ENOMEM);
+ }
+
+ /* for distributed debugging */
+ lustre_msg_set_status(req->rq_reqmsg, current_pid());
+
+ /* add a ref for the set (see comment in ptlrpc_set_add_req) */
+ ptlrpc_request_addref(req);
+ ptlrpc_set_add_req(set, req);
+ rc = ptlrpc_set_wait(set);
+ ptlrpc_set_destroy(set);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ptlrpc_queue_wait);
+
+struct ptlrpc_replay_async_args {
+ int praa_old_state;
+ int praa_old_status;
+};
+
+/**
+ * Callback used for replayed requests reply processing.
+ * In case of succesful reply calls registeresd request replay callback.
+ * In case of error restart replay process.
+ */
+static int ptlrpc_replay_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req,
+ void * data, int rc)
+{
+ struct ptlrpc_replay_async_args *aa = data;
+ struct obd_import *imp = req->rq_import;
+
+ ENTRY;
+ atomic_dec(&imp->imp_replay_inflight);
+
+ if (!ptlrpc_client_replied(req)) {
+ CERROR("request replay timed out, restarting recovery\n");
+ GOTO(out, rc = -ETIMEDOUT);
+ }
+
+ if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR &&
+ (lustre_msg_get_status(req->rq_repmsg) == -ENOTCONN ||
+ lustre_msg_get_status(req->rq_repmsg) == -ENODEV))
+ GOTO(out, rc = lustre_msg_get_status(req->rq_repmsg));
+
+ /** VBR: check version failure */
+ if (lustre_msg_get_status(req->rq_repmsg) == -EOVERFLOW) {
+ /** replay was failed due to version mismatch */
+ DEBUG_REQ(D_WARNING, req, "Version mismatch during replay\n");
+ spin_lock(&imp->imp_lock);
+ imp->imp_vbr_failed = 1;
+ imp->imp_no_lock_replay = 1;
+ spin_unlock(&imp->imp_lock);
+ lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status);
+ } else {
+ /** The transno had better not change over replay. */
+ LASSERTF(lustre_msg_get_transno(req->rq_reqmsg) ==
+ lustre_msg_get_transno(req->rq_repmsg) ||
+ lustre_msg_get_transno(req->rq_repmsg) == 0,
+ LPX64"/"LPX64"\n",
+ lustre_msg_get_transno(req->rq_reqmsg),
+ lustre_msg_get_transno(req->rq_repmsg));
+ }
+
+ spin_lock(&imp->imp_lock);
+ /** if replays by version then gap occur on server, no trust to locks */
+ if (lustre_msg_get_flags(req->rq_repmsg) & MSG_VERSION_REPLAY)
+ imp->imp_no_lock_replay = 1;
+ imp->imp_last_replay_transno = lustre_msg_get_transno(req->rq_reqmsg);
+ spin_unlock(&imp->imp_lock);
+ LASSERT(imp->imp_last_replay_transno);
+
+ /* transaction number shouldn't be bigger than the latest replayed */
+ if (req->rq_transno > lustre_msg_get_transno(req->rq_reqmsg)) {
+ DEBUG_REQ(D_ERROR, req,
+ "Reported transno "LPU64" is bigger than the "
+ "replayed one: "LPU64, req->rq_transno,
+ lustre_msg_get_transno(req->rq_reqmsg));
+ GOTO(out, rc = -EINVAL);
+ }
+
+ DEBUG_REQ(D_HA, req, "got rep");
+
+ /* let the callback do fixups, possibly including in the request */
+ if (req->rq_replay_cb)
+ req->rq_replay_cb(req);
+
+ if (ptlrpc_client_replied(req) &&
+ lustre_msg_get_status(req->rq_repmsg) != aa->praa_old_status) {
+ DEBUG_REQ(D_ERROR, req, "status %d, old was %d",
+ lustre_msg_get_status(req->rq_repmsg),
+ aa->praa_old_status);
+ } else {
+ /* Put it back for re-replay. */
+ lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status);
+ }
+
+ /*
+ * Errors while replay can set transno to 0, but
+ * imp_last_replay_transno shouldn't be set to 0 anyway
+ */
+ if (req->rq_transno == 0)
+ CERROR("Transno is 0 during replay!\n");
+
+ /* continue with recovery */
+ rc = ptlrpc_import_recovery_state_machine(imp);
+ out:
+ req->rq_send_state = aa->praa_old_state;
+
+ if (rc != 0)
+ /* this replay failed, so restart recovery */
+ ptlrpc_connect_import(imp);
+
+ RETURN(rc);
+}
+
+/**
+ * Prepares and queues request for replay.
+ * Adds it to ptlrpcd queue for actual sending.
+ * Returns 0 on success.
+ */
+int ptlrpc_replay_req(struct ptlrpc_request *req)
+{
+ struct ptlrpc_replay_async_args *aa;
+ ENTRY;
+
+ LASSERT(req->rq_import->imp_state == LUSTRE_IMP_REPLAY);
+
+ LASSERT (sizeof (*aa) <= sizeof (req->rq_async_args));
+ aa = ptlrpc_req_async_args(req);
+ memset(aa, 0, sizeof *aa);
+
+ /* Prepare request to be resent with ptlrpcd */
+ aa->praa_old_state = req->rq_send_state;
+ req->rq_send_state = LUSTRE_IMP_REPLAY;
+ req->rq_phase = RQ_PHASE_NEW;
+ req->rq_next_phase = RQ_PHASE_UNDEFINED;
+ if (req->rq_repmsg)
+ aa->praa_old_status = lustre_msg_get_status(req->rq_repmsg);
+ req->rq_status = 0;
+ req->rq_interpret_reply = ptlrpc_replay_interpret;
+ /* Readjust the timeout for current conditions */
+ ptlrpc_at_set_req_timeout(req);
+
+ /* Tell server the net_latency, so the server can calculate how long
+ * it should wait for next replay */
+ lustre_msg_set_service_time(req->rq_reqmsg,
+ ptlrpc_at_get_net_latency(req));
+ DEBUG_REQ(D_HA, req, "REPLAY");
+
+ atomic_inc(&req->rq_import->imp_replay_inflight);
+ ptlrpc_request_addref(req); /* ptlrpcd needs a ref */
+
+ ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
+ RETURN(0);
+}
+EXPORT_SYMBOL(ptlrpc_replay_req);
+
+/**
+ * Aborts all in-flight request on import \a imp sending and delayed lists
+ */
+void ptlrpc_abort_inflight(struct obd_import *imp)
+{
+ struct list_head *tmp, *n;
+ ENTRY;
+
+ /* Make sure that no new requests get processed for this import.
+ * ptlrpc_{queue,set}_wait must (and does) hold imp_lock while testing
+ * this flag and then putting requests on sending_list or delayed_list.
+ */
+ spin_lock(&imp->imp_lock);
+
+ /* XXX locking? Maybe we should remove each request with the list
+ * locked? Also, how do we know if the requests on the list are
+ * being freed at this time?
+ */
+ list_for_each_safe(tmp, n, &imp->imp_sending_list) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request, rq_list);
+
+ DEBUG_REQ(D_RPCTRACE, req, "inflight");
+
+ spin_lock(&req->rq_lock);
+ if (req->rq_import_generation < imp->imp_generation) {
+ req->rq_err = 1;
+ req->rq_status = -EIO;
+ ptlrpc_client_wake_req(req);
+ }
+ spin_unlock(&req->rq_lock);
+ }
+
+ list_for_each_safe(tmp, n, &imp->imp_delayed_list) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request, rq_list);
+
+ DEBUG_REQ(D_RPCTRACE, req, "aborting waiting req");
+
+ spin_lock(&req->rq_lock);
+ if (req->rq_import_generation < imp->imp_generation) {
+ req->rq_err = 1;
+ req->rq_status = -EIO;
+ ptlrpc_client_wake_req(req);
+ }
+ spin_unlock(&req->rq_lock);
+ }
+
+ /* Last chance to free reqs left on the replay list, but we
+ * will still leak reqs that haven't committed. */
+ if (imp->imp_replayable)
+ ptlrpc_free_committed(imp);
+
+ spin_unlock(&imp->imp_lock);
+
+ EXIT;
+}
+EXPORT_SYMBOL(ptlrpc_abort_inflight);
+
+/**
+ * Abort all uncompleted requests in request set \a set
+ */
+void ptlrpc_abort_set(struct ptlrpc_request_set *set)
+{
+ struct list_head *tmp, *pos;
+
+ LASSERT(set != NULL);
+
+ list_for_each_safe(pos, tmp, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(pos, struct ptlrpc_request,
+ rq_set_chain);
+
+ spin_lock(&req->rq_lock);
+ if (req->rq_phase != RQ_PHASE_RPC) {
+ spin_unlock(&req->rq_lock);
+ continue;
+ }
+
+ req->rq_err = 1;
+ req->rq_status = -EINTR;
+ ptlrpc_client_wake_req(req);
+ spin_unlock(&req->rq_lock);
+ }
+}
+
+static __u64 ptlrpc_last_xid;
+static spinlock_t ptlrpc_last_xid_lock;
+
+/**
+ * Initialize the XID for the node. This is common among all requests on
+ * this node, and only requires the property that it is monotonically
+ * increasing. It does not need to be sequential. Since this is also used
+ * as the RDMA match bits, it is important that a single client NOT have
+ * the same match bits for two different in-flight requests, hence we do
+ * NOT want to have an XID per target or similar.
+ *
+ * To avoid an unlikely collision between match bits after a client reboot
+ * (which would deliver old data into the wrong RDMA buffer) initialize
+ * the XID based on the current time, assuming a maximum RPC rate of 1M RPC/s.
+ * If the time is clearly incorrect, we instead use a 62-bit random number.
+ * In the worst case the random number will overflow 1M RPCs per second in
+ * 9133 years, or permutations thereof.
+ */
+#define YEAR_2004 (1ULL << 30)
+void ptlrpc_init_xid(void)
+{
+ time_t now = cfs_time_current_sec();
+
+ spin_lock_init(&ptlrpc_last_xid_lock);
+ if (now < YEAR_2004) {
+ cfs_get_random_bytes(&ptlrpc_last_xid, sizeof(ptlrpc_last_xid));
+ ptlrpc_last_xid >>= 2;
+ ptlrpc_last_xid |= (1ULL << 61);
+ } else {
+ ptlrpc_last_xid = (__u64)now << 20;
+ }
+
+ /* Need to always be aligned to a power-of-two for mutli-bulk BRW */
+ CLASSERT((PTLRPC_BULK_OPS_COUNT & (PTLRPC_BULK_OPS_COUNT - 1)) == 0);
+ ptlrpc_last_xid &= PTLRPC_BULK_OPS_MASK;
+}
+
+/**
+ * Increase xid and returns resulting new value to the caller.
+ *
+ * Multi-bulk BRW RPCs consume multiple XIDs for each bulk transfer, starting
+ * at the returned xid, up to xid + PTLRPC_BULK_OPS_COUNT - 1. The BRW RPC
+ * itself uses the last bulk xid needed, so the server can determine the
+ * the number of bulk transfers from the RPC XID and a bitmask. The starting
+ * xid must align to a power-of-two value.
+ *
+ * This is assumed to be true due to the initial ptlrpc_last_xid
+ * value also being initialized to a power-of-two value. LU-1431
+ */
+__u64 ptlrpc_next_xid(void)
+{
+ __u64 next;
+
+ spin_lock(&ptlrpc_last_xid_lock);
+ next = ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT;
+ ptlrpc_last_xid = next;
+ spin_unlock(&ptlrpc_last_xid_lock);
+
+ return next;
+}
+EXPORT_SYMBOL(ptlrpc_next_xid);
+
+/**
+ * Get a glimpse at what next xid value might have been.
+ * Returns possible next xid.
+ */
+__u64 ptlrpc_sample_next_xid(void)
+{
+#if BITS_PER_LONG == 32
+ /* need to avoid possible word tearing on 32-bit systems */
+ __u64 next;
+
+ spin_lock(&ptlrpc_last_xid_lock);
+ next = ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT;
+ spin_unlock(&ptlrpc_last_xid_lock);
+
+ return next;
+#else
+ /* No need to lock, since returned value is racy anyways */
+ return ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT;
+#endif
+}
+EXPORT_SYMBOL(ptlrpc_sample_next_xid);
+
+/**
+ * Functions for operating ptlrpc workers.
+ *
+ * A ptlrpc work is a function which will be running inside ptlrpc context.
+ * The callback shouldn't sleep otherwise it will block that ptlrpcd thread.
+ *
+ * 1. after a work is created, it can be used many times, that is:
+ * handler = ptlrpcd_alloc_work();
+ * ptlrpcd_queue_work();
+ *
+ * queue it again when necessary:
+ * ptlrpcd_queue_work();
+ * ptlrpcd_destroy_work();
+ * 2. ptlrpcd_queue_work() can be called by multiple processes meanwhile, but
+ * it will only be queued once in any time. Also as its name implies, it may
+ * have delay before it really runs by ptlrpcd thread.
+ */
+struct ptlrpc_work_async_args {
+ __u64 magic;
+ int (*cb)(const struct lu_env *, void *);
+ void *cbdata;
+};
+
+#define PTLRPC_WORK_MAGIC 0x6655436b676f4f44ULL /* magic code */
+
+static int work_interpreter(const struct lu_env *env,
+ struct ptlrpc_request *req, void *data, int rc)
+{
+ struct ptlrpc_work_async_args *arg = data;
+
+ LASSERT(arg->magic == PTLRPC_WORK_MAGIC);
+ LASSERT(arg->cb != NULL);
+
+ return arg->cb(env, arg->cbdata);
+}
+
+/**
+ * Create a work for ptlrpc.
+ */
+void *ptlrpcd_alloc_work(struct obd_import *imp,
+ int (*cb)(const struct lu_env *, void *), void *cbdata)
+{
+ struct ptlrpc_request *req = NULL;
+ struct ptlrpc_work_async_args *args;
+ ENTRY;
+
+ might_sleep();
+
+ if (cb == NULL)
+ RETURN(ERR_PTR(-EINVAL));
+
+ /* copy some code from deprecated fakereq. */
+ OBD_ALLOC_PTR(req);
+ if (req == NULL) {
+ CERROR("ptlrpc: run out of memory!\n");
+ RETURN(ERR_PTR(-ENOMEM));
+ }
+
+ req->rq_send_state = LUSTRE_IMP_FULL;
+ req->rq_type = PTL_RPC_MSG_REQUEST;
+ req->rq_import = class_import_get(imp);
+ req->rq_export = NULL;
+ req->rq_interpret_reply = work_interpreter;
+ /* don't want reply */
+ req->rq_receiving_reply = 0;
+ req->rq_must_unlink = 0;
+ req->rq_no_delay = req->rq_no_resend = 1;
+
+ spin_lock_init(&req->rq_lock);
+ INIT_LIST_HEAD(&req->rq_list);
+ INIT_LIST_HEAD(&req->rq_replay_list);
+ INIT_LIST_HEAD(&req->rq_set_chain);
+ INIT_LIST_HEAD(&req->rq_history_list);
+ INIT_LIST_HEAD(&req->rq_exp_list);
+ init_waitqueue_head(&req->rq_reply_waitq);
+ init_waitqueue_head(&req->rq_set_waitq);
+ atomic_set(&req->rq_refcount, 1);
+
+ CLASSERT (sizeof(*args) <= sizeof(req->rq_async_args));
+ args = ptlrpc_req_async_args(req);
+ args->magic = PTLRPC_WORK_MAGIC;
+ args->cb = cb;
+ args->cbdata = cbdata;
+
+ RETURN(req);
+}
+EXPORT_SYMBOL(ptlrpcd_alloc_work);
+
+void ptlrpcd_destroy_work(void *handler)
+{
+ struct ptlrpc_request *req = handler;
+
+ if (req)
+ ptlrpc_req_finished(req);
+}
+EXPORT_SYMBOL(ptlrpcd_destroy_work);
+
+int ptlrpcd_queue_work(void *handler)
+{
+ struct ptlrpc_request *req = handler;
+
+ /*
+ * Check if the req is already being queued.
+ *
+ * Here comes a trick: it lacks a way of checking if a req is being
+ * processed reliably in ptlrpc. Here I have to use refcount of req
+ * for this purpose. This is okay because the caller should use this
+ * req as opaque data. - Jinshan
+ */
+ LASSERT(atomic_read(&req->rq_refcount) > 0);
+ if (atomic_read(&req->rq_refcount) > 1)
+ return -EBUSY;
+
+ if (atomic_inc_return(&req->rq_refcount) > 2) { /* race */
+ atomic_dec(&req->rq_refcount);
+ return -EBUSY;
+ }
+
+ /* re-initialize the req */
+ req->rq_timeout = obd_timeout;
+ req->rq_sent = cfs_time_current_sec();
+ req->rq_deadline = req->rq_sent + req->rq_timeout;
+ req->rq_reply_deadline = req->rq_deadline;
+ req->rq_phase = RQ_PHASE_INTERPRET;
+ req->rq_next_phase = RQ_PHASE_COMPLETE;
+ req->rq_xid = ptlrpc_next_xid();
+ req->rq_import_generation = req->rq_import->imp_generation;
+
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpcd_queue_work);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c
new file mode 100644
index 000000000000..a0757f372be5
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/connection.c
@@ -0,0 +1,248 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+
+#include "ptlrpc_internal.h"
+
+static cfs_hash_t *conn_hash = NULL;
+static cfs_hash_ops_t conn_hash_ops;
+
+struct ptlrpc_connection *
+ptlrpc_connection_get(lnet_process_id_t peer, lnet_nid_t self,
+ struct obd_uuid *uuid)
+{
+ struct ptlrpc_connection *conn, *conn2;
+ ENTRY;
+
+ conn = cfs_hash_lookup(conn_hash, &peer);
+ if (conn)
+ GOTO(out, conn);
+
+ OBD_ALLOC_PTR(conn);
+ if (!conn)
+ RETURN(NULL);
+
+ conn->c_peer = peer;
+ conn->c_self = self;
+ INIT_HLIST_NODE(&conn->c_hash);
+ atomic_set(&conn->c_refcount, 1);
+ if (uuid)
+ obd_str2uuid(&conn->c_remote_uuid, uuid->uuid);
+
+ /*
+ * Add the newly created conn to the hash, on key collision we
+ * lost a racing addition and must destroy our newly allocated
+ * connection. The object which exists in the has will be
+ * returned and may be compared against out object.
+ */
+ /* In the function below, .hs_keycmp resolves to
+ * conn_keycmp() */
+ /* coverity[overrun-buffer-val] */
+ conn2 = cfs_hash_findadd_unique(conn_hash, &peer, &conn->c_hash);
+ if (conn != conn2) {
+ OBD_FREE_PTR(conn);
+ conn = conn2;
+ }
+ EXIT;
+out:
+ CDEBUG(D_INFO, "conn=%p refcount %d to %s\n",
+ conn, atomic_read(&conn->c_refcount),
+ libcfs_nid2str(conn->c_peer.nid));
+ return conn;
+}
+EXPORT_SYMBOL(ptlrpc_connection_get);
+
+int ptlrpc_connection_put(struct ptlrpc_connection *conn)
+{
+ int rc = 0;
+ ENTRY;
+
+ if (!conn)
+ RETURN(rc);
+
+ LASSERT(atomic_read(&conn->c_refcount) > 1);
+
+ /*
+ * We do not remove connection from hashtable and
+ * do not free it even if last caller released ref,
+ * as we want to have it cached for the case it is
+ * needed again.
+ *
+ * Deallocating it and later creating new connection
+ * again would be wastful. This way we also avoid
+ * expensive locking to protect things from get/put
+ * race when found cached connection is freed by
+ * ptlrpc_connection_put().
+ *
+ * It will be freed later in module unload time,
+ * when ptlrpc_connection_fini()->lh_exit->conn_exit()
+ * path is called.
+ */
+ if (atomic_dec_return(&conn->c_refcount) == 1)
+ rc = 1;
+
+ CDEBUG(D_INFO, "PUT conn=%p refcount %d to %s\n",
+ conn, atomic_read(&conn->c_refcount),
+ libcfs_nid2str(conn->c_peer.nid));
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ptlrpc_connection_put);
+
+struct ptlrpc_connection *
+ptlrpc_connection_addref(struct ptlrpc_connection *conn)
+{
+ ENTRY;
+
+ atomic_inc(&conn->c_refcount);
+ CDEBUG(D_INFO, "conn=%p refcount %d to %s\n",
+ conn, atomic_read(&conn->c_refcount),
+ libcfs_nid2str(conn->c_peer.nid));
+
+ RETURN(conn);
+}
+EXPORT_SYMBOL(ptlrpc_connection_addref);
+
+int ptlrpc_connection_init(void)
+{
+ ENTRY;
+
+ conn_hash = cfs_hash_create("CONN_HASH",
+ HASH_CONN_CUR_BITS,
+ HASH_CONN_MAX_BITS,
+ HASH_CONN_BKT_BITS, 0,
+ CFS_HASH_MIN_THETA,
+ CFS_HASH_MAX_THETA,
+ &conn_hash_ops, CFS_HASH_DEFAULT);
+ if (!conn_hash)
+ RETURN(-ENOMEM);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(ptlrpc_connection_init);
+
+void ptlrpc_connection_fini(void) {
+ ENTRY;
+ cfs_hash_putref(conn_hash);
+ EXIT;
+}
+EXPORT_SYMBOL(ptlrpc_connection_fini);
+
+/*
+ * Hash operations for net_peer<->connection
+ */
+static unsigned
+conn_hashfn(cfs_hash_t *hs, const void *key, unsigned mask)
+{
+ return cfs_hash_djb2_hash(key, sizeof(lnet_process_id_t), mask);
+}
+
+static int
+conn_keycmp(const void *key, struct hlist_node *hnode)
+{
+ struct ptlrpc_connection *conn;
+ const lnet_process_id_t *conn_key;
+
+ LASSERT(key != NULL);
+ conn_key = (lnet_process_id_t*)key;
+ conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+
+ return conn_key->nid == conn->c_peer.nid &&
+ conn_key->pid == conn->c_peer.pid;
+}
+
+static void *
+conn_key(struct hlist_node *hnode)
+{
+ struct ptlrpc_connection *conn;
+ conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+ return &conn->c_peer;
+}
+
+static void *
+conn_object(struct hlist_node *hnode)
+{
+ return hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+}
+
+static void
+conn_get(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct ptlrpc_connection *conn;
+
+ conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+ atomic_inc(&conn->c_refcount);
+}
+
+static void
+conn_put_locked(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct ptlrpc_connection *conn;
+
+ conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+ atomic_dec(&conn->c_refcount);
+}
+
+static void
+conn_exit(cfs_hash_t *hs, struct hlist_node *hnode)
+{
+ struct ptlrpc_connection *conn;
+
+ conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+ /*
+ * Nothing should be left. Connection user put it and
+ * connection also was deleted from table by this time
+ * so we should have 0 refs.
+ */
+ LASSERTF(atomic_read(&conn->c_refcount) == 0,
+ "Busy connection with %d refs\n",
+ atomic_read(&conn->c_refcount));
+ OBD_FREE_PTR(conn);
+}
+
+static cfs_hash_ops_t conn_hash_ops = {
+ .hs_hash = conn_hashfn,
+ .hs_keycmp = conn_keycmp,
+ .hs_key = conn_key,
+ .hs_object = conn_object,
+ .hs_get = conn_get,
+ .hs_put_locked = conn_put_locked,
+ .hs_exit = conn_exit,
+};
diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c
new file mode 100644
index 000000000000..0264c102cb3e
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/events.c
@@ -0,0 +1,595 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+# include <linux/libcfs/libcfs.h>
+# ifdef __mips64__
+# include <linux/kernel.h>
+# endif
+
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_sec.h>
+#include "ptlrpc_internal.h"
+
+lnet_handle_eq_t ptlrpc_eq_h;
+
+/*
+ * Client's outgoing request callback
+ */
+void request_out_callback(lnet_event_t *ev)
+{
+ struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
+ struct ptlrpc_request *req = cbid->cbid_arg;
+ ENTRY;
+
+ LASSERT (ev->type == LNET_EVENT_SEND ||
+ ev->type == LNET_EVENT_UNLINK);
+ LASSERT (ev->unlinked);
+
+ DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
+
+ sptlrpc_request_out_callback(req);
+ req->rq_real_sent = cfs_time_current_sec();
+
+ if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
+
+ /* Failed send: make it seem like the reply timed out, just
+ * like failing sends in client.c does currently... */
+
+ spin_lock(&req->rq_lock);
+ req->rq_net_err = 1;
+ spin_unlock(&req->rq_lock);
+
+ ptlrpc_client_wake_req(req);
+ }
+
+ ptlrpc_req_finished(req);
+
+ EXIT;
+}
+
+/*
+ * Client's incoming reply callback
+ */
+void reply_in_callback(lnet_event_t *ev)
+{
+ struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
+ struct ptlrpc_request *req = cbid->cbid_arg;
+ ENTRY;
+
+ DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
+
+ LASSERT (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK);
+ LASSERT (ev->md.start == req->rq_repbuf);
+ LASSERT (ev->offset + ev->mlength <= req->rq_repbuf_len);
+ /* We've set LNET_MD_MANAGE_REMOTE for all outgoing requests
+ for adaptive timeouts' early reply. */
+ LASSERT((ev->md.options & LNET_MD_MANAGE_REMOTE) != 0);
+
+ spin_lock(&req->rq_lock);
+
+ req->rq_receiving_reply = 0;
+ req->rq_early = 0;
+ if (ev->unlinked)
+ req->rq_must_unlink = 0;
+
+ if (ev->status)
+ goto out_wake;
+
+ if (ev->type == LNET_EVENT_UNLINK) {
+ LASSERT(ev->unlinked);
+ DEBUG_REQ(D_NET, req, "unlink");
+ goto out_wake;
+ }
+
+ if (ev->mlength < ev->rlength ) {
+ CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req,
+ req->rq_replen, ev->rlength, ev->offset);
+ req->rq_reply_truncate = 1;
+ req->rq_replied = 1;
+ req->rq_status = -EOVERFLOW;
+ req->rq_nob_received = ev->rlength + ev->offset;
+ goto out_wake;
+ }
+
+ if ((ev->offset == 0) &&
+ ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT))) {
+ /* Early reply */
+ DEBUG_REQ(D_ADAPTTO, req,
+ "Early reply received: mlen=%u offset=%d replen=%d "
+ "replied=%d unlinked=%d", ev->mlength, ev->offset,
+ req->rq_replen, req->rq_replied, ev->unlinked);
+
+ req->rq_early_count++; /* number received, client side */
+
+ if (req->rq_replied) /* already got the real reply */
+ goto out_wake;
+
+ req->rq_early = 1;
+ req->rq_reply_off = ev->offset;
+ req->rq_nob_received = ev->mlength;
+ /* And we're still receiving */
+ req->rq_receiving_reply = 1;
+ } else {
+ /* Real reply */
+ req->rq_rep_swab_mask = 0;
+ req->rq_replied = 1;
+ req->rq_reply_off = ev->offset;
+ req->rq_nob_received = ev->mlength;
+ /* LNetMDUnlink can't be called under the LNET_LOCK,
+ so we must unlink in ptlrpc_unregister_reply */
+ DEBUG_REQ(D_INFO, req,
+ "reply in flags=%x mlen=%u offset=%d replen=%d",
+ lustre_msg_get_flags(req->rq_reqmsg),
+ ev->mlength, ev->offset, req->rq_replen);
+ }
+
+ req->rq_import->imp_last_reply_time = cfs_time_current_sec();
+
+out_wake:
+ /* NB don't unlock till after wakeup; req can disappear under us
+ * since we don't have our own ref */
+ ptlrpc_client_wake_req(req);
+ spin_unlock(&req->rq_lock);
+ EXIT;
+}
+
+/*
+ * Client's bulk has been written/read
+ */
+void client_bulk_callback (lnet_event_t *ev)
+{
+ struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
+ struct ptlrpc_bulk_desc *desc = cbid->cbid_arg;
+ struct ptlrpc_request *req;
+ ENTRY;
+
+ LASSERT ((desc->bd_type == BULK_PUT_SINK &&
+ ev->type == LNET_EVENT_PUT) ||
+ (desc->bd_type == BULK_GET_SOURCE &&
+ ev->type == LNET_EVENT_GET) ||
+ ev->type == LNET_EVENT_UNLINK);
+ LASSERT (ev->unlinked);
+
+ if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB, CFS_FAIL_ONCE))
+ ev->status = -EIO;
+
+ if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2,CFS_FAIL_ONCE))
+ ev->status = -EIO;
+
+ CDEBUG((ev->status == 0) ? D_NET : D_ERROR,
+ "event type %d, status %d, desc %p\n",
+ ev->type, ev->status, desc);
+
+ spin_lock(&desc->bd_lock);
+ req = desc->bd_req;
+ LASSERT(desc->bd_md_count > 0);
+ desc->bd_md_count--;
+
+ if (ev->type != LNET_EVENT_UNLINK && ev->status == 0) {
+ desc->bd_nob_transferred += ev->mlength;
+ desc->bd_sender = ev->sender;
+ } else {
+ /* start reconnect and resend if network error hit */
+ spin_lock(&req->rq_lock);
+ req->rq_net_err = 1;
+ spin_unlock(&req->rq_lock);
+ }
+
+ if (ev->status != 0)
+ desc->bd_failure = 1;
+
+ /* NB don't unlock till after wakeup; desc can disappear under us
+ * otherwise */
+ if (desc->bd_md_count == 0)
+ ptlrpc_client_wake_req(desc->bd_req);
+
+ spin_unlock(&desc->bd_lock);
+ EXIT;
+}
+
+/*
+ * We will have percpt request history list for ptlrpc service in upcoming
+ * patches because we don't want to be serialized by current per-service
+ * history operations. So we require history ID can (somehow) show arriving
+ * order w/o grabbing global lock, and user can sort them in userspace.
+ *
+ * This is how we generate history ID for ptlrpc_request:
+ * ----------------------------------------------------
+ * | 32 bits | 16 bits | (16 - X)bits | X bits |
+ * ----------------------------------------------------
+ * | seconds | usec / 16 | sequence | CPT id |
+ * ----------------------------------------------------
+ *
+ * it might not be precise but should be good enough.
+ */
+
+#define REQS_CPT_BITS(svcpt) ((svcpt)->scp_service->srv_cpt_bits)
+
+#define REQS_SEC_SHIFT 32
+#define REQS_USEC_SHIFT 16
+#define REQS_SEQ_SHIFT(svcpt) REQS_CPT_BITS(svcpt)
+
+static void ptlrpc_req_add_history(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req)
+{
+ __u64 sec = req->rq_arrival_time.tv_sec;
+ __u32 usec = req->rq_arrival_time.tv_usec >> 4; /* usec / 16 */
+ __u64 new_seq;
+
+ /* set sequence ID for request and add it to history list,
+ * it must be called with hold svcpt::scp_lock */
+
+ new_seq = (sec << REQS_SEC_SHIFT) |
+ (usec << REQS_USEC_SHIFT) |
+ (svcpt->scp_cpt < 0 ? 0 : svcpt->scp_cpt);
+
+ if (new_seq > svcpt->scp_hist_seq) {
+ /* This handles the initial case of scp_hist_seq == 0 or
+ * we just jumped into a new time window */
+ svcpt->scp_hist_seq = new_seq;
+ } else {
+ LASSERT(REQS_SEQ_SHIFT(svcpt) < REQS_USEC_SHIFT);
+ /* NB: increase sequence number in current usec bucket,
+ * however, it's possible that we used up all bits for
+ * sequence and jumped into the next usec bucket (future time),
+ * then we hope there will be less RPCs per bucket at some
+ * point, and sequence will catch up again */
+ svcpt->scp_hist_seq += (1U << REQS_SEQ_SHIFT(svcpt));
+ new_seq = svcpt->scp_hist_seq;
+ }
+
+ req->rq_history_seq = new_seq;
+
+ list_add_tail(&req->rq_history_list, &svcpt->scp_hist_reqs);
+}
+
+/*
+ * Server's incoming request callback
+ */
+void request_in_callback(lnet_event_t *ev)
+{
+ struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
+ struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg;
+ struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt;
+ struct ptlrpc_service *service = svcpt->scp_service;
+ struct ptlrpc_request *req;
+ ENTRY;
+
+ LASSERT (ev->type == LNET_EVENT_PUT ||
+ ev->type == LNET_EVENT_UNLINK);
+ LASSERT ((char *)ev->md.start >= rqbd->rqbd_buffer);
+ LASSERT ((char *)ev->md.start + ev->offset + ev->mlength <=
+ rqbd->rqbd_buffer + service->srv_buf_size);
+
+ CDEBUG((ev->status == 0) ? D_NET : D_ERROR,
+ "event type %d, status %d, service %s\n",
+ ev->type, ev->status, service->srv_name);
+
+ if (ev->unlinked) {
+ /* If this is the last request message to fit in the
+ * request buffer we can use the request object embedded in
+ * rqbd. Note that if we failed to allocate a request,
+ * we'd have to re-post the rqbd, which we can't do in this
+ * context. */
+ req = &rqbd->rqbd_req;
+ memset(req, 0, sizeof (*req));
+ } else {
+ LASSERT (ev->type == LNET_EVENT_PUT);
+ if (ev->status != 0) {
+ /* We moaned above already... */
+ return;
+ }
+ OBD_ALLOC_GFP(req, sizeof(*req), ALLOC_ATOMIC_TRY);
+ if (req == NULL) {
+ CERROR("Can't allocate incoming request descriptor: "
+ "Dropping %s RPC from %s\n",
+ service->srv_name,
+ libcfs_id2str(ev->initiator));
+ return;
+ }
+ }
+
+ /* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL,
+ * flags are reset and scalars are zero. We only set the message
+ * size to non-zero if this was a successful receive. */
+ req->rq_xid = ev->match_bits;
+ req->rq_reqbuf = ev->md.start + ev->offset;
+ if (ev->type == LNET_EVENT_PUT && ev->status == 0)
+ req->rq_reqdata_len = ev->mlength;
+ do_gettimeofday(&req->rq_arrival_time);
+ req->rq_peer = ev->initiator;
+ req->rq_self = ev->target.nid;
+ req->rq_rqbd = rqbd;
+ req->rq_phase = RQ_PHASE_NEW;
+ spin_lock_init(&req->rq_lock);
+ INIT_LIST_HEAD(&req->rq_timed_list);
+ INIT_LIST_HEAD(&req->rq_exp_list);
+ atomic_set(&req->rq_refcount, 1);
+ if (ev->type == LNET_EVENT_PUT)
+ CDEBUG(D_INFO, "incoming req@%p x"LPU64" msgsize %u\n",
+ req, req->rq_xid, ev->mlength);
+
+ CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer));
+
+ spin_lock(&svcpt->scp_lock);
+
+ ptlrpc_req_add_history(svcpt, req);
+
+ if (ev->unlinked) {
+ svcpt->scp_nrqbds_posted--;
+ CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n",
+ svcpt->scp_nrqbds_posted);
+
+ /* Normally, don't complain about 0 buffers posted; LNET won't
+ * drop incoming reqs since we set the portal lazy */
+ if (test_req_buffer_pressure &&
+ ev->type != LNET_EVENT_UNLINK &&
+ svcpt->scp_nrqbds_posted == 0)
+ CWARN("All %s request buffers busy\n",
+ service->srv_name);
+
+ /* req takes over the network's ref on rqbd */
+ } else {
+ /* req takes a ref on rqbd */
+ rqbd->rqbd_refcount++;
+ }
+
+ list_add_tail(&req->rq_list, &svcpt->scp_req_incoming);
+ svcpt->scp_nreqs_incoming++;
+
+ /* NB everything can disappear under us once the request
+ * has been queued and we unlock, so do the wake now... */
+ wake_up(&svcpt->scp_waitq);
+
+ spin_unlock(&svcpt->scp_lock);
+ EXIT;
+}
+
+/*
+ * Server's outgoing reply callback
+ */
+void reply_out_callback(lnet_event_t *ev)
+{
+ struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
+ struct ptlrpc_reply_state *rs = cbid->cbid_arg;
+ struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
+ ENTRY;
+
+ LASSERT (ev->type == LNET_EVENT_SEND ||
+ ev->type == LNET_EVENT_ACK ||
+ ev->type == LNET_EVENT_UNLINK);
+
+ if (!rs->rs_difficult) {
+ /* 'Easy' replies have no further processing so I drop the
+ * net's ref on 'rs' */
+ LASSERT (ev->unlinked);
+ ptlrpc_rs_decref(rs);
+ EXIT;
+ return;
+ }
+
+ LASSERT (rs->rs_on_net);
+
+ if (ev->unlinked) {
+ /* Last network callback. The net's ref on 'rs' stays put
+ * until ptlrpc_handle_rs() is done with it */
+ spin_lock(&svcpt->scp_rep_lock);
+ spin_lock(&rs->rs_lock);
+
+ rs->rs_on_net = 0;
+ if (!rs->rs_no_ack ||
+ rs->rs_transno <=
+ rs->rs_export->exp_obd->obd_last_committed)
+ ptlrpc_schedule_difficult_reply(rs);
+
+ spin_unlock(&rs->rs_lock);
+ spin_unlock(&svcpt->scp_rep_lock);
+ }
+ EXIT;
+}
+
+
+static void ptlrpc_master_callback(lnet_event_t *ev)
+{
+ struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
+ void (*callback)(lnet_event_t *ev) = cbid->cbid_fn;
+
+ /* Honestly, it's best to find out early. */
+ LASSERT (cbid->cbid_arg != LP_POISON);
+ LASSERT (callback == request_out_callback ||
+ callback == reply_in_callback ||
+ callback == client_bulk_callback ||
+ callback == request_in_callback ||
+ callback == reply_out_callback
+ );
+
+ callback (ev);
+}
+
+int ptlrpc_uuid_to_peer (struct obd_uuid *uuid,
+ lnet_process_id_t *peer, lnet_nid_t *self)
+{
+ int best_dist = 0;
+ __u32 best_order = 0;
+ int count = 0;
+ int rc = -ENOENT;
+ int portals_compatibility;
+ int dist;
+ __u32 order;
+ lnet_nid_t dst_nid;
+ lnet_nid_t src_nid;
+
+ portals_compatibility = LNetCtl(IOC_LIBCFS_PORTALS_COMPATIBILITY, NULL);
+
+ peer->pid = LUSTRE_SRV_LNET_PID;
+
+ /* Choose the matching UUID that's closest */
+ while (lustre_uuid_to_peer(uuid->uuid, &dst_nid, count++) == 0) {
+ dist = LNetDist(dst_nid, &src_nid, &order);
+ if (dist < 0)
+ continue;
+
+ if (dist == 0) { /* local! use loopback LND */
+ peer->nid = *self = LNET_MKNID(LNET_MKNET(LOLND, 0), 0);
+ rc = 0;
+ break;
+ }
+
+ if (rc < 0 ||
+ dist < best_dist ||
+ (dist == best_dist && order < best_order)) {
+ best_dist = dist;
+ best_order = order;
+
+ if (portals_compatibility > 1) {
+ /* Strong portals compatibility: Zero the nid's
+ * NET, so if I'm reading new config logs, or
+ * getting configured by (new) lconf I can
+ * still talk to old servers. */
+ dst_nid = LNET_MKNID(0, LNET_NIDADDR(dst_nid));
+ src_nid = LNET_MKNID(0, LNET_NIDADDR(src_nid));
+ }
+ peer->nid = dst_nid;
+ *self = src_nid;
+ rc = 0;
+ }
+ }
+
+ CDEBUG(D_NET,"%s->%s\n", uuid->uuid, libcfs_id2str(*peer));
+ return rc;
+}
+
+void ptlrpc_ni_fini(void)
+{
+ wait_queue_head_t waitq;
+ struct l_wait_info lwi;
+ int rc;
+ int retries;
+
+ /* Wait for the event queue to become idle since there may still be
+ * messages in flight with pending events (i.e. the fire-and-forget
+ * messages == client requests and "non-difficult" server
+ * replies */
+
+ for (retries = 0;; retries++) {
+ rc = LNetEQFree(ptlrpc_eq_h);
+ switch (rc) {
+ default:
+ LBUG();
+
+ case 0:
+ LNetNIFini();
+ return;
+
+ case -EBUSY:
+ if (retries != 0)
+ CWARN("Event queue still busy\n");
+
+ /* Wait for a bit */
+ init_waitqueue_head(&waitq);
+ lwi = LWI_TIMEOUT(cfs_time_seconds(2), NULL, NULL);
+ l_wait_event(waitq, 0, &lwi);
+ break;
+ }
+ }
+ /* notreached */
+}
+
+lnet_pid_t ptl_get_pid(void)
+{
+ lnet_pid_t pid;
+
+ pid = LUSTRE_SRV_LNET_PID;
+ return pid;
+}
+
+int ptlrpc_ni_init(void)
+{
+ int rc;
+ lnet_pid_t pid;
+
+ pid = ptl_get_pid();
+ CDEBUG(D_NET, "My pid is: %x\n", pid);
+
+ /* We're not passing any limits yet... */
+ rc = LNetNIInit(pid);
+ if (rc < 0) {
+ CDEBUG (D_NET, "Can't init network interface: %d\n", rc);
+ return (-ENOENT);
+ }
+
+ /* CAVEAT EMPTOR: how we process portals events is _radically_
+ * different depending on... */
+ /* kernel LNet calls our master callback when there are new event,
+ * because we are guaranteed to get every event via callback,
+ * so we just set EQ size to 0 to avoid overhread of serializing
+ * enqueue/dequeue operations in LNet. */
+ rc = LNetEQAlloc(0, ptlrpc_master_callback, &ptlrpc_eq_h);
+ if (rc == 0)
+ return 0;
+
+ CERROR ("Failed to allocate event queue: %d\n", rc);
+ LNetNIFini();
+
+ return (-ENOMEM);
+}
+
+
+int ptlrpc_init_portals(void)
+{
+ int rc = ptlrpc_ni_init();
+
+ if (rc != 0) {
+ CERROR("network initialisation failed\n");
+ return -EIO;
+ }
+ rc = ptlrpcd_addref();
+ if (rc == 0)
+ return 0;
+
+ CERROR("rpcd initialisation failed\n");
+ ptlrpc_ni_fini();
+ return rc;
+}
+
+void ptlrpc_exit_portals(void)
+{
+ ptlrpcd_decref();
+ ptlrpc_ni_fini();
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/Makefile b/drivers/staging/lustre/lustre/ptlrpc/gss/Makefile
new file mode 100644
index 000000000000..8cdfbeed64e6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/Makefile
@@ -0,0 +1,8 @@
+obj-$(CONFIG_LUSTRE_FS) := ptlrpc_gss.o
+
+ptlrpc_gss-y := sec_gss.o gss_bulk.o gss_cli_upcall.o gss_svc_upcall.o \
+ gss_rawobj.o lproc_gss.o gss_generic_token.o \
+ gss_mech_switch.o gss_krb5_mech.o
+
+
+ccflags-y := -I$(src)/../include
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_api.h b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_api.h
new file mode 100644
index 000000000000..feac60482c97
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_api.h
@@ -0,0 +1,179 @@
+/*
+ * Modifications for Lustre
+ *
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+/*
+ * Somewhat simplified version of the gss api.
+ *
+ * Dug Song <dugsong@monkey.org>
+ * Andy Adamson <andros@umich.edu>
+ * Bruce Fields <bfields@umich.edu>
+ * Copyright (c) 2000 The Regents of the University of Michigan
+ *
+ */
+
+#ifndef __PTLRPC_GSS_GSS_API_H_
+#define __PTLRPC_GSS_GSS_API_H_
+
+struct gss_api_mech;
+
+/* The mechanism-independent gss-api context: */
+struct gss_ctx {
+ struct gss_api_mech *mech_type;
+ void *internal_ctx_id;
+};
+
+#define GSS_C_NO_BUFFER ((rawobj_t) 0)
+#define GSS_C_NO_CONTEXT ((struct gss_ctx *) 0)
+#define GSS_C_NULL_OID ((rawobj_t) 0)
+
+/*
+ * gss-api prototypes; note that these are somewhat simplified versions of
+ * the prototypes specified in RFC 2744.
+ */
+__u32 lgss_import_sec_context(
+ rawobj_t *input_token,
+ struct gss_api_mech *mech,
+ struct gss_ctx **ctx);
+__u32 lgss_copy_reverse_context(
+ struct gss_ctx *ctx,
+ struct gss_ctx **ctx_new);
+__u32 lgss_inquire_context(
+ struct gss_ctx *ctx,
+ unsigned long *endtime);
+__u32 lgss_get_mic(
+ struct gss_ctx *ctx,
+ int msgcnt,
+ rawobj_t *msgs,
+ int iovcnt,
+ lnet_kiov_t *iovs,
+ rawobj_t *mic_token);
+__u32 lgss_verify_mic(
+ struct gss_ctx *ctx,
+ int msgcnt,
+ rawobj_t *msgs,
+ int iovcnt,
+ lnet_kiov_t *iovs,
+ rawobj_t *mic_token);
+__u32 lgss_wrap(
+ struct gss_ctx *ctx,
+ rawobj_t *gsshdr,
+ rawobj_t *msg,
+ int msg_buflen,
+ rawobj_t *out_token);
+__u32 lgss_unwrap(
+ struct gss_ctx *ctx,
+ rawobj_t *gsshdr,
+ rawobj_t *token,
+ rawobj_t *out_msg);
+__u32 lgss_prep_bulk(
+ struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc);
+__u32 lgss_wrap_bulk(
+ struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token,
+ int adj_nob);
+__u32 lgss_unwrap_bulk(
+ struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token,
+ int adj_nob);
+__u32 lgss_delete_sec_context(
+ struct gss_ctx **ctx);
+int lgss_display(
+ struct gss_ctx *ctx,
+ char *buf,
+ int bufsize);
+
+struct subflavor_desc {
+ __u32 sf_subflavor;
+ __u32 sf_qop;
+ __u32 sf_service;
+ char *sf_name;
+};
+
+/* Each mechanism is described by the following struct: */
+struct gss_api_mech {
+ struct list_head gm_list;
+ module_t *gm_owner;
+ char *gm_name;
+ rawobj_t gm_oid;
+ atomic_t gm_count;
+ struct gss_api_ops *gm_ops;
+ int gm_sf_num;
+ struct subflavor_desc *gm_sfs;
+};
+
+/* and must provide the following operations: */
+struct gss_api_ops {
+ __u32 (*gss_import_sec_context)(
+ rawobj_t *input_token,
+ struct gss_ctx *ctx);
+ __u32 (*gss_copy_reverse_context)(
+ struct gss_ctx *ctx,
+ struct gss_ctx *ctx_new);
+ __u32 (*gss_inquire_context)(
+ struct gss_ctx *ctx,
+ unsigned long *endtime);
+ __u32 (*gss_get_mic)(
+ struct gss_ctx *ctx,
+ int msgcnt,
+ rawobj_t *msgs,
+ int iovcnt,
+ lnet_kiov_t *iovs,
+ rawobj_t *mic_token);
+ __u32 (*gss_verify_mic)(
+ struct gss_ctx *ctx,
+ int msgcnt,
+ rawobj_t *msgs,
+ int iovcnt,
+ lnet_kiov_t *iovs,
+ rawobj_t *mic_token);
+ __u32 (*gss_wrap)(
+ struct gss_ctx *ctx,
+ rawobj_t *gsshdr,
+ rawobj_t *msg,
+ int msg_buflen,
+ rawobj_t *out_token);
+ __u32 (*gss_unwrap)(
+ struct gss_ctx *ctx,
+ rawobj_t *gsshdr,
+ rawobj_t *token,
+ rawobj_t *out_msg);
+ __u32 (*gss_prep_bulk)(
+ struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc);
+ __u32 (*gss_wrap_bulk)(
+ struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token,
+ int adj_nob);
+ __u32 (*gss_unwrap_bulk)(
+ struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token,
+ int adj_nob);
+ void (*gss_delete_sec_context)(
+ void *ctx);
+ int (*gss_display)(
+ struct gss_ctx *ctx,
+ char *buf,
+ int bufsize);
+};
+
+int lgss_mech_register(struct gss_api_mech *mech);
+void lgss_mech_unregister(struct gss_api_mech *mech);
+
+struct gss_api_mech * lgss_OID_to_mech(rawobj_t *oid);
+struct gss_api_mech * lgss_name_to_mech(char *name);
+struct gss_api_mech * lgss_subflavor_to_mech(__u32 subflavor);
+
+struct gss_api_mech * lgss_mech_get(struct gss_api_mech *mech);
+void lgss_mech_put(struct gss_api_mech *mech);
+
+#endif /* __PTLRPC_GSS_GSS_API_H_ */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_asn1.h b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_asn1.h
new file mode 100644
index 000000000000..c70eb00796f9
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_asn1.h
@@ -0,0 +1,84 @@
+/*
+ * Modifications for Lustre
+ *
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+/*
+ * minimal asn1 for generic encoding/decoding of gss tokens
+ *
+ * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h,
+ * lib/gssapi/krb5/gssapiP_krb5.h, and others
+ *
+ * Copyright (c) 2000 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Andy Adamson <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1995 by the Massachusetts Institute of Technology.
+ * All Rights Reserved.
+ *
+ * Export of this software from the United States of America may
+ * require a specific license from the United States Government.
+ * It is the responsibility of any person or organization contemplating
+ * export to obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of M.I.T. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission. Furthermore if you modify this software you must label
+ * your software as modified software and not distribute it in such a
+ * fashion that it might be confused with the original M.I.T. software.
+ * M.I.T. makes no representations about the suitability of
+ * this software for any purpose. It is provided "as is" without express
+ * or implied warranty.
+ *
+ */
+
+#define SIZEOF_INT 4
+
+/* from gssapi_err_generic.h */
+#define G_BAD_SERVICE_NAME (-2045022976L)
+#define G_BAD_STRING_UID (-2045022975L)
+#define G_NOUSER (-2045022974L)
+#define G_VALIDATE_FAILED (-2045022973L)
+#define G_BUFFER_ALLOC (-2045022972L)
+#define G_BAD_MSG_CTX (-2045022971L)
+#define G_WRONG_SIZE (-2045022970L)
+#define G_BAD_USAGE (-2045022969L)
+#define G_UNKNOWN_QOP (-2045022968L)
+#define G_NO_HOSTNAME (-2045022967L)
+#define G_BAD_HOSTNAME (-2045022966L)
+#define G_WRONG_MECH (-2045022965L)
+#define G_BAD_TOK_HEADER (-2045022964L)
+#define G_BAD_DIRECTION (-2045022963L)
+#define G_TOK_TRUNC (-2045022962L)
+#define G_REFLECT (-2045022961L)
+#define G_WRONG_TOKID (-2045022960L)
+
+#define g_OID_equal(o1,o2) \
+ (((o1)->len == (o2)->len) && \
+ (memcmp((o1)->data,(o2)->data,(int) (o1)->len) == 0))
+
+__u32 g_verify_token_header(rawobj_t *mech,
+ int *body_size,
+ unsigned char **buf_in,
+ int toksize);
+
+__u32 g_get_mech_oid(rawobj_t *mech,
+ rawobj_t *in_buf);
+
+int g_token_size(rawobj_t *mech,
+ unsigned int body_size);
+
+void g_make_token_header(rawobj_t *mech,
+ int body_size,
+ unsigned char **buf);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_bulk.c
new file mode 100644
index 000000000000..ed95bbba95ca
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_bulk.c
@@ -0,0 +1,512 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/gss/gss_bulk.c
+ *
+ * Author: Eric Mei <eric.mei@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+#include <linux/crypto.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_sec.h>
+
+#include "gss_err.h"
+#include "gss_internal.h"
+#include "gss_api.h"
+
+int gss_cli_ctx_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct gss_cli_ctx *gctx;
+ struct lustre_msg *msg;
+ struct ptlrpc_bulk_sec_desc *bsd;
+ rawobj_t token;
+ __u32 maj;
+ int offset;
+ int rc;
+ ENTRY;
+
+ LASSERT(req->rq_pack_bulk);
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+ gctx = container_of(ctx, struct gss_cli_ctx, gc_base);
+ LASSERT(gctx->gc_mechctx);
+
+ switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+ case SPTLRPC_SVC_NULL:
+ LASSERT(req->rq_reqbuf->lm_bufcount >= 3);
+ msg = req->rq_reqbuf;
+ offset = msg->lm_bufcount - 1;
+ break;
+ case SPTLRPC_SVC_AUTH:
+ case SPTLRPC_SVC_INTG:
+ LASSERT(req->rq_reqbuf->lm_bufcount >= 4);
+ msg = req->rq_reqbuf;
+ offset = msg->lm_bufcount - 2;
+ break;
+ case SPTLRPC_SVC_PRIV:
+ LASSERT(req->rq_clrbuf->lm_bufcount >= 2);
+ msg = req->rq_clrbuf;
+ offset = msg->lm_bufcount - 1;
+ break;
+ default:
+ LBUG();
+ }
+
+ bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
+ bsd->bsd_version = 0;
+ bsd->bsd_flags = 0;
+ bsd->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsd->bsd_svc = SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc);
+
+ if (bsd->bsd_svc == SPTLRPC_BULK_SVC_NULL)
+ RETURN(0);
+
+ LASSERT(bsd->bsd_svc == SPTLRPC_BULK_SVC_INTG ||
+ bsd->bsd_svc == SPTLRPC_BULK_SVC_PRIV);
+
+ if (req->rq_bulk_read) {
+ /*
+ * bulk read: prepare receiving pages only for privacy mode.
+ */
+ if (bsd->bsd_svc == SPTLRPC_BULK_SVC_PRIV)
+ return gss_cli_prep_bulk(req, desc);
+ } else {
+ /*
+ * bulk write: sign or encrypt bulk pages.
+ */
+ bsd->bsd_nob = desc->bd_nob;
+
+ if (bsd->bsd_svc == SPTLRPC_BULK_SVC_INTG) {
+ /* integrity mode */
+ token.data = bsd->bsd_data;
+ token.len = lustre_msg_buflen(msg, offset) -
+ sizeof(*bsd);
+
+ maj = lgss_get_mic(gctx->gc_mechctx, 0, NULL,
+ desc->bd_iov_count, desc->bd_iov,
+ &token);
+ if (maj != GSS_S_COMPLETE) {
+ CWARN("failed to sign bulk data: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ } else {
+ /* privacy mode */
+ if (desc->bd_iov_count == 0)
+ RETURN(0);
+
+ rc = sptlrpc_enc_pool_get_pages(desc);
+ if (rc) {
+ CERROR("bulk write: failed to allocate "
+ "encryption pages: %d\n", rc);
+ RETURN(rc);
+ }
+
+ token.data = bsd->bsd_data;
+ token.len = lustre_msg_buflen(msg, offset) -
+ sizeof(*bsd);
+
+ maj = lgss_wrap_bulk(gctx->gc_mechctx, desc, &token, 0);
+ if (maj != GSS_S_COMPLETE) {
+ CWARN("fail to encrypt bulk data: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ }
+ }
+
+ RETURN(0);
+}
+
+int gss_cli_ctx_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct gss_cli_ctx *gctx;
+ struct lustre_msg *rmsg, *vmsg;
+ struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+ rawobj_t token;
+ __u32 maj;
+ int roff, voff;
+ ENTRY;
+
+ LASSERT(req->rq_pack_bulk);
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+ switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+ case SPTLRPC_SVC_NULL:
+ vmsg = req->rq_repdata;
+ voff = vmsg->lm_bufcount - 1;
+ LASSERT(vmsg && vmsg->lm_bufcount >= 3);
+
+ rmsg = req->rq_reqbuf;
+ roff = rmsg->lm_bufcount - 1; /* last segment */
+ LASSERT(rmsg && rmsg->lm_bufcount >= 3);
+ break;
+ case SPTLRPC_SVC_AUTH:
+ case SPTLRPC_SVC_INTG:
+ vmsg = req->rq_repdata;
+ voff = vmsg->lm_bufcount - 2;
+ LASSERT(vmsg && vmsg->lm_bufcount >= 4);
+
+ rmsg = req->rq_reqbuf;
+ roff = rmsg->lm_bufcount - 2; /* second last segment */
+ LASSERT(rmsg && rmsg->lm_bufcount >= 4);
+ break;
+ case SPTLRPC_SVC_PRIV:
+ vmsg = req->rq_repdata;
+ voff = vmsg->lm_bufcount - 1;
+ LASSERT(vmsg && vmsg->lm_bufcount >= 2);
+
+ rmsg = req->rq_clrbuf;
+ roff = rmsg->lm_bufcount - 1; /* last segment */
+ LASSERT(rmsg && rmsg->lm_bufcount >= 2);
+ break;
+ default:
+ LBUG();
+ }
+
+ bsdr = lustre_msg_buf(rmsg, roff, sizeof(*bsdr));
+ bsdv = lustre_msg_buf(vmsg, voff, sizeof(*bsdv));
+ LASSERT(bsdr && bsdv);
+
+ if (bsdr->bsd_version != bsdv->bsd_version ||
+ bsdr->bsd_type != bsdv->bsd_type ||
+ bsdr->bsd_svc != bsdv->bsd_svc) {
+ CERROR("bulk security descriptor mismatch: "
+ "(%u,%u,%u) != (%u,%u,%u)\n",
+ bsdr->bsd_version, bsdr->bsd_type, bsdr->bsd_svc,
+ bsdv->bsd_version, bsdv->bsd_type, bsdv->bsd_svc);
+ RETURN(-EPROTO);
+ }
+
+ LASSERT(bsdv->bsd_svc == SPTLRPC_BULK_SVC_NULL ||
+ bsdv->bsd_svc == SPTLRPC_BULK_SVC_INTG ||
+ bsdv->bsd_svc == SPTLRPC_BULK_SVC_PRIV);
+
+ /*
+ * in privacy mode if return success, make sure bd_nob_transferred
+ * is the actual size of the clear text, otherwise upper layer
+ * may be surprised.
+ */
+ if (req->rq_bulk_write) {
+ if (bsdv->bsd_flags & BSD_FL_ERR) {
+ CERROR("server reported bulk i/o failure\n");
+ RETURN(-EIO);
+ }
+
+ if (bsdv->bsd_svc == SPTLRPC_BULK_SVC_PRIV)
+ desc->bd_nob_transferred = desc->bd_nob;
+ } else {
+ /*
+ * bulk read, upon return success, bd_nob_transferred is
+ * the size of plain text actually received.
+ */
+ gctx = container_of(ctx, struct gss_cli_ctx, gc_base);
+ LASSERT(gctx->gc_mechctx);
+
+ if (bsdv->bsd_svc == SPTLRPC_BULK_SVC_INTG) {
+ int i, nob;
+
+ /* fix the actual data size */
+ for (i = 0, nob = 0; i < desc->bd_iov_count; i++) {
+ if (desc->bd_iov[i].kiov_len + nob >
+ desc->bd_nob_transferred) {
+ desc->bd_iov[i].kiov_len =
+ desc->bd_nob_transferred - nob;
+ }
+ nob += desc->bd_iov[i].kiov_len;
+ }
+
+ token.data = bsdv->bsd_data;
+ token.len = lustre_msg_buflen(vmsg, voff) -
+ sizeof(*bsdv);
+
+ maj = lgss_verify_mic(gctx->gc_mechctx, 0, NULL,
+ desc->bd_iov_count, desc->bd_iov,
+ &token);
+ if (maj != GSS_S_COMPLETE) {
+ CERROR("failed to verify bulk read: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ } else if (bsdv->bsd_svc == SPTLRPC_BULK_SVC_PRIV) {
+ desc->bd_nob = bsdv->bsd_nob;
+ if (desc->bd_nob == 0)
+ RETURN(0);
+
+ token.data = bsdv->bsd_data;
+ token.len = lustre_msg_buflen(vmsg, voff) -
+ sizeof(*bsdr);
+
+ maj = lgss_unwrap_bulk(gctx->gc_mechctx, desc,
+ &token, 1);
+ if (maj != GSS_S_COMPLETE) {
+ CERROR("failed to decrypt bulk read: %x\n",
+ maj);
+ RETURN(-EACCES);
+ }
+
+ desc->bd_nob_transferred = desc->bd_nob;
+ }
+ }
+
+ RETURN(0);
+}
+
+static int gss_prep_bulk(struct ptlrpc_bulk_desc *desc,
+ struct gss_ctx *mechctx)
+{
+ int rc;
+
+ if (desc->bd_iov_count == 0)
+ return 0;
+
+ rc = sptlrpc_enc_pool_get_pages(desc);
+ if (rc)
+ return rc;
+
+ if (lgss_prep_bulk(mechctx, desc) != GSS_S_COMPLETE)
+ return -EACCES;
+
+ return 0;
+}
+
+int gss_cli_prep_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ int rc;
+ ENTRY;
+
+ LASSERT(req->rq_cli_ctx);
+ LASSERT(req->rq_pack_bulk);
+ LASSERT(req->rq_bulk_read);
+
+ if (SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_BULK_SVC_PRIV)
+ RETURN(0);
+
+ rc = gss_prep_bulk(desc, ctx2gctx(req->rq_cli_ctx)->gc_mechctx);
+ if (rc)
+ CERROR("bulk read: failed to prepare encryption "
+ "pages: %d\n", rc);
+
+ RETURN(rc);
+}
+
+int gss_svc_prep_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct gss_svc_reqctx *grctx;
+ struct ptlrpc_bulk_sec_desc *bsd;
+ int rc;
+ ENTRY;
+
+ LASSERT(req->rq_svc_ctx);
+ LASSERT(req->rq_pack_bulk);
+ LASSERT(req->rq_bulk_write);
+
+ grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
+ LASSERT(grctx->src_reqbsd);
+ LASSERT(grctx->src_repbsd);
+ LASSERT(grctx->src_ctx);
+ LASSERT(grctx->src_ctx->gsc_mechctx);
+
+ bsd = grctx->src_reqbsd;
+ if (bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)
+ RETURN(0);
+
+ rc = gss_prep_bulk(desc, grctx->src_ctx->gsc_mechctx);
+ if (rc)
+ CERROR("bulk write: failed to prepare encryption "
+ "pages: %d\n", rc);
+
+ RETURN(rc);
+}
+
+int gss_svc_unwrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct gss_svc_reqctx *grctx;
+ struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+ rawobj_t token;
+ __u32 maj;
+ ENTRY;
+
+ LASSERT(req->rq_svc_ctx);
+ LASSERT(req->rq_pack_bulk);
+ LASSERT(req->rq_bulk_write);
+
+ grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
+
+ LASSERT(grctx->src_reqbsd);
+ LASSERT(grctx->src_repbsd);
+ LASSERT(grctx->src_ctx);
+ LASSERT(grctx->src_ctx->gsc_mechctx);
+
+ bsdr = grctx->src_reqbsd;
+ bsdv = grctx->src_repbsd;
+
+ /* bsdr has been sanity checked during unpacking */
+ bsdv->bsd_version = 0;
+ bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsdv->bsd_svc = bsdr->bsd_svc;
+ bsdv->bsd_flags = 0;
+
+ switch (bsdv->bsd_svc) {
+ case SPTLRPC_BULK_SVC_INTG:
+ token.data = bsdr->bsd_data;
+ token.len = grctx->src_reqbsd_size - sizeof(*bsdr);
+
+ maj = lgss_verify_mic(grctx->src_ctx->gsc_mechctx, 0, NULL,
+ desc->bd_iov_count, desc->bd_iov, &token);
+ if (maj != GSS_S_COMPLETE) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("failed to verify bulk signature: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ break;
+ case SPTLRPC_BULK_SVC_PRIV:
+ if (bsdr->bsd_nob != desc->bd_nob) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("prepared nob %d doesn't match the actual "
+ "nob %d\n", desc->bd_nob, bsdr->bsd_nob);
+ RETURN(-EPROTO);
+ }
+
+ if (desc->bd_iov_count == 0) {
+ LASSERT(desc->bd_nob == 0);
+ break;
+ }
+
+ token.data = bsdr->bsd_data;
+ token.len = grctx->src_reqbsd_size - sizeof(*bsdr);
+
+ maj = lgss_unwrap_bulk(grctx->src_ctx->gsc_mechctx,
+ desc, &token, 0);
+ if (maj != GSS_S_COMPLETE) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("failed decrypt bulk data: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ break;
+ }
+
+ RETURN(0);
+}
+
+int gss_svc_wrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct gss_svc_reqctx *grctx;
+ struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+ rawobj_t token;
+ __u32 maj;
+ int rc;
+ ENTRY;
+
+ LASSERT(req->rq_svc_ctx);
+ LASSERT(req->rq_pack_bulk);
+ LASSERT(req->rq_bulk_read);
+
+ grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
+
+ LASSERT(grctx->src_reqbsd);
+ LASSERT(grctx->src_repbsd);
+ LASSERT(grctx->src_ctx);
+ LASSERT(grctx->src_ctx->gsc_mechctx);
+
+ bsdr = grctx->src_reqbsd;
+ bsdv = grctx->src_repbsd;
+
+ /* bsdr has been sanity checked during unpacking */
+ bsdv->bsd_version = 0;
+ bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsdv->bsd_svc = bsdr->bsd_svc;
+ bsdv->bsd_flags = 0;
+
+ switch (bsdv->bsd_svc) {
+ case SPTLRPC_BULK_SVC_INTG:
+ token.data = bsdv->bsd_data;
+ token.len = grctx->src_repbsd_size - sizeof(*bsdv);
+
+ maj = lgss_get_mic(grctx->src_ctx->gsc_mechctx, 0, NULL,
+ desc->bd_iov_count, desc->bd_iov, &token);
+ if (maj != GSS_S_COMPLETE) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("failed to sign bulk data: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ break;
+ case SPTLRPC_BULK_SVC_PRIV:
+ bsdv->bsd_nob = desc->bd_nob;
+
+ if (desc->bd_iov_count == 0) {
+ LASSERT(desc->bd_nob == 0);
+ break;
+ }
+
+ rc = sptlrpc_enc_pool_get_pages(desc);
+ if (rc) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("bulk read: failed to allocate encryption "
+ "pages: %d\n", rc);
+ RETURN(rc);
+ }
+
+ token.data = bsdv->bsd_data;
+ token.len = grctx->src_repbsd_size - sizeof(*bsdv);
+
+ maj = lgss_wrap_bulk(grctx->src_ctx->gsc_mechctx,
+ desc, &token, 1);
+ if (maj != GSS_S_COMPLETE) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("failed to encrypt bulk data: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ break;
+ }
+
+ RETURN(0);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_cli_upcall.c b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_cli_upcall.c
new file mode 100644
index 000000000000..142c789b1bc6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_cli_upcall.c
@@ -0,0 +1,447 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/gss/gss_cli_upcall.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_sec.h>
+
+#include "gss_err.h"
+#include "gss_internal.h"
+#include "gss_api.h"
+
+/**********************************************
+ * gss context init/fini helper *
+ **********************************************/
+
+static
+int ctx_init_pack_request(struct obd_import *imp,
+ struct ptlrpc_request *req,
+ int lustre_srv,
+ uid_t uid, gid_t gid,
+ long token_size,
+ char __user *token)
+{
+ struct lustre_msg *msg = req->rq_reqbuf;
+ struct gss_sec *gsec;
+ struct gss_header *ghdr;
+ struct ptlrpc_user_desc *pud;
+ __u32 *p, size, offset = 2;
+ rawobj_t obj;
+
+ LASSERT(msg->lm_bufcount <= 4);
+ LASSERT(req->rq_cli_ctx);
+ LASSERT(req->rq_cli_ctx->cc_sec);
+
+ /* gss hdr */
+ ghdr = lustre_msg_buf(msg, 0, sizeof(*ghdr));
+ ghdr->gh_version = PTLRPC_GSS_VERSION;
+ ghdr->gh_sp = (__u8) imp->imp_sec->ps_part;
+ ghdr->gh_flags = 0;
+ ghdr->gh_proc = PTLRPC_GSS_PROC_INIT;
+ ghdr->gh_seq = 0;
+ ghdr->gh_svc = SPTLRPC_SVC_NULL;
+ ghdr->gh_handle.len = 0;
+
+ /* fix the user desc */
+ if (req->rq_pack_udesc) {
+ ghdr->gh_flags |= LUSTRE_GSS_PACK_USER;
+
+ pud = lustre_msg_buf(msg, offset, sizeof(*pud));
+ LASSERT(pud);
+ pud->pud_uid = pud->pud_fsuid = uid;
+ pud->pud_gid = pud->pud_fsgid = gid;
+ pud->pud_cap = 0;
+ pud->pud_ngroups = 0;
+ offset++;
+ }
+
+ /* security payload */
+ p = lustre_msg_buf(msg, offset, 0);
+ size = msg->lm_buflens[offset];
+ LASSERT(p);
+
+ /* 1. lustre svc type */
+ LASSERT(size > 4);
+ *p++ = cpu_to_le32(lustre_srv);
+ size -= 4;
+
+ /* 2. target uuid */
+ obj.len = strlen(imp->imp_obd->u.cli.cl_target_uuid.uuid) + 1;
+ obj.data = imp->imp_obd->u.cli.cl_target_uuid.uuid;
+ if (rawobj_serialize(&obj, &p, &size))
+ LBUG();
+
+ /* 3. reverse context handle. actually only needed by root user,
+ * but we send it anyway. */
+ gsec = sec2gsec(req->rq_cli_ctx->cc_sec);
+ obj.len = sizeof(gsec->gs_rvs_hdl);
+ obj.data = (__u8 *) &gsec->gs_rvs_hdl;
+ if (rawobj_serialize(&obj, &p, &size))
+ LBUG();
+
+ /* 4. now the token */
+ LASSERT(size >= (sizeof(__u32) + token_size));
+ *p++ = cpu_to_le32(((__u32) token_size));
+ if (copy_from_user(p, token, token_size)) {
+ CERROR("can't copy token\n");
+ return -EFAULT;
+ }
+ size -= sizeof(__u32) + cfs_size_round4(token_size);
+
+ req->rq_reqdata_len = lustre_shrink_msg(req->rq_reqbuf, offset,
+ msg->lm_buflens[offset] - size, 0);
+ return 0;
+}
+
+static
+int ctx_init_parse_reply(struct lustre_msg *msg, int swabbed,
+ char __user *outbuf, long outlen)
+{
+ struct gss_rep_header *ghdr;
+ __u32 obj_len, round_len;
+ __u32 status, effective = 0;
+
+ if (msg->lm_bufcount != 3) {
+ CERROR("unexpected bufcount %u\n", msg->lm_bufcount);
+ return -EPROTO;
+ }
+
+ ghdr = (struct gss_rep_header *) gss_swab_header(msg, 0, swabbed);
+ if (ghdr == NULL) {
+ CERROR("unable to extract gss reply header\n");
+ return -EPROTO;
+ }
+
+ if (ghdr->gh_version != PTLRPC_GSS_VERSION) {
+ CERROR("invalid gss version %u\n", ghdr->gh_version);
+ return -EPROTO;
+ }
+
+ if (outlen < (4 + 2) * 4 + cfs_size_round4(ghdr->gh_handle.len) +
+ cfs_size_round4(msg->lm_buflens[2])) {
+ CERROR("output buffer size %ld too small\n", outlen);
+ return -EFAULT;
+ }
+
+ status = 0;
+ effective = 0;
+
+ if (copy_to_user(outbuf, &status, 4))
+ return -EFAULT;
+ outbuf += 4;
+ if (copy_to_user(outbuf, &ghdr->gh_major, 4))
+ return -EFAULT;
+ outbuf += 4;
+ if (copy_to_user(outbuf, &ghdr->gh_minor, 4))
+ return -EFAULT;
+ outbuf += 4;
+ if (copy_to_user(outbuf, &ghdr->gh_seqwin, 4))
+ return -EFAULT;
+ outbuf += 4;
+ effective += 4 * 4;
+
+ /* handle */
+ obj_len = ghdr->gh_handle.len;
+ round_len = (obj_len + 3) & ~ 3;
+ if (copy_to_user(outbuf, &obj_len, 4))
+ return -EFAULT;
+ outbuf += 4;
+ if (copy_to_user(outbuf, (char *) ghdr->gh_handle.data, round_len))
+ return -EFAULT;
+ outbuf += round_len;
+ effective += 4 + round_len;
+
+ /* out token */
+ obj_len = msg->lm_buflens[2];
+ round_len = (obj_len + 3) & ~ 3;
+ if (copy_to_user(outbuf, &obj_len, 4))
+ return -EFAULT;
+ outbuf += 4;
+ if (copy_to_user(outbuf, lustre_msg_buf(msg, 2, 0), round_len))
+ return -EFAULT;
+ outbuf += round_len;
+ effective += 4 + round_len;
+
+ return effective;
+}
+
+/* XXX move to where lgssd could see */
+struct lgssd_ioctl_param {
+ int version; /* in */
+ int secid; /* in */
+ char *uuid; /* in */
+ int lustre_svc; /* in */
+ uid_t uid; /* in */
+ gid_t gid; /* in */
+ long send_token_size;/* in */
+ char *send_token; /* in */
+ long reply_buf_size; /* in */
+ char *reply_buf; /* in */
+ long status; /* out */
+ long reply_length; /* out */
+};
+
+int gss_do_ctx_init_rpc(__user char *buffer, unsigned long count)
+{
+ struct obd_import *imp;
+ struct ptlrpc_request *req;
+ struct lgssd_ioctl_param param;
+ struct obd_device *obd;
+ char obdname[64];
+ long lsize;
+ int rc;
+
+ if (count != sizeof(param)) {
+ CERROR("ioctl size %lu, expect %lu, please check lgss_keyring "
+ "version\n", count, (unsigned long) sizeof(param));
+ RETURN(-EINVAL);
+ }
+ if (copy_from_user(&param, buffer, sizeof(param))) {
+ CERROR("failed copy data from lgssd\n");
+ RETURN(-EFAULT);
+ }
+
+ if (param.version != GSSD_INTERFACE_VERSION) {
+ CERROR("gssd interface version %d (expect %d)\n",
+ param.version, GSSD_INTERFACE_VERSION);
+ RETURN(-EINVAL);
+ }
+
+ /* take name */
+ if (strncpy_from_user(obdname, param.uuid, sizeof(obdname)) <= 0) {
+ CERROR("Invalid obdname pointer\n");
+ RETURN(-EFAULT);
+ }
+
+ obd = class_name2obd(obdname);
+ if (!obd) {
+ CERROR("no such obd %s\n", obdname);
+ RETURN(-EINVAL);
+ }
+
+ if (unlikely(!obd->obd_set_up)) {
+ CERROR("obd %s not setup\n", obdname);
+ RETURN(-EINVAL);
+ }
+
+ spin_lock(&obd->obd_dev_lock);
+ if (obd->obd_stopping) {
+ CERROR("obd %s has stopped\n", obdname);
+ spin_unlock(&obd->obd_dev_lock);
+ RETURN(-EINVAL);
+ }
+
+ if (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) &&
+ strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) &&
+ strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME)) {
+ CERROR("obd %s is not a client device\n", obdname);
+ spin_unlock(&obd->obd_dev_lock);
+ RETURN(-EINVAL);
+ }
+ spin_unlock(&obd->obd_dev_lock);
+
+ down_read(&obd->u.cli.cl_sem);
+ if (obd->u.cli.cl_import == NULL) {
+ CERROR("obd %s: import has gone\n", obd->obd_name);
+ up_read(&obd->u.cli.cl_sem);
+ RETURN(-EINVAL);
+ }
+ imp = class_import_get(obd->u.cli.cl_import);
+ up_read(&obd->u.cli.cl_sem);
+
+ if (imp->imp_deactive) {
+ CERROR("import has been deactivated\n");
+ class_import_put(imp);
+ RETURN(-EINVAL);
+ }
+
+ req = ptlrpc_request_alloc_pack(imp, &RQF_SEC_CTX, LUSTRE_OBD_VERSION,
+ SEC_CTX_INIT);
+ if (req == NULL) {
+ param.status = -ENOMEM;
+ goto out_copy;
+ }
+
+ if (req->rq_cli_ctx->cc_sec->ps_id != param.secid) {
+ CWARN("original secid %d, now has changed to %d, "
+ "cancel this negotiation\n", param.secid,
+ req->rq_cli_ctx->cc_sec->ps_id);
+ param.status = -EINVAL;
+ goto out_copy;
+ }
+
+ /* get token */
+ rc = ctx_init_pack_request(imp, req,
+ param.lustre_svc,
+ param.uid, param.gid,
+ param.send_token_size,
+ param.send_token);
+ if (rc) {
+ param.status = rc;
+ goto out_copy;
+ }
+
+ ptlrpc_request_set_replen(req);
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc) {
+ /* If any _real_ denial be made, we expect server return
+ * -EACCES reply or return success but indicate gss error
+ * inside reply messsage. All other errors are treated as
+ * timeout, caller might try the negotiation repeatedly,
+ * leave recovery decisions to general ptlrpc layer.
+ *
+ * FIXME maybe some other error code shouldn't be treated
+ * as timeout. */
+ param.status = rc;
+ if (rc != -EACCES)
+ param.status = -ETIMEDOUT;
+ goto out_copy;
+ }
+
+ LASSERT(req->rq_repdata);
+ lsize = ctx_init_parse_reply(req->rq_repdata,
+ ptlrpc_rep_need_swab(req),
+ param.reply_buf, param.reply_buf_size);
+ if (lsize < 0) {
+ param.status = (int) lsize;
+ goto out_copy;
+ }
+
+ param.status = 0;
+ param.reply_length = lsize;
+
+out_copy:
+ if (copy_to_user(buffer, &param, sizeof(param)))
+ rc = -EFAULT;
+ else
+ rc = 0;
+
+ class_import_put(imp);
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+}
+
+int gss_do_ctx_fini_rpc(struct gss_cli_ctx *gctx)
+{
+ struct ptlrpc_cli_ctx *ctx = &gctx->gc_base;
+ struct obd_import *imp = ctx->cc_sec->ps_import;
+ struct ptlrpc_request *req;
+ struct ptlrpc_user_desc *pud;
+ int rc;
+ ENTRY;
+
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+ if (cli_ctx_is_error(ctx) || !cli_ctx_is_uptodate(ctx)) {
+ CDEBUG(D_SEC, "ctx %p(%u->%s) not uptodate, "
+ "don't send destroy rpc\n", ctx,
+ ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+ RETURN(0);
+ }
+
+ might_sleep();
+
+ CWARN("%s ctx %p idx "LPX64" (%u->%s)\n",
+ sec_is_reverse(ctx->cc_sec) ?
+ "server finishing reverse" : "client finishing forward",
+ ctx, gss_handle_to_u64(&gctx->gc_handle),
+ ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+
+ gctx->gc_proc = PTLRPC_GSS_PROC_DESTROY;
+
+ req = ptlrpc_request_alloc(imp, &RQF_SEC_CTX);
+ if (req == NULL) {
+ CWARN("ctx %p(%u): fail to prepare rpc, destroy locally\n",
+ ctx, ctx->cc_vcred.vc_uid);
+ GOTO(out, rc = -ENOMEM);
+ }
+
+ rc = ptlrpc_request_bufs_pack(req, LUSTRE_OBD_VERSION, SEC_CTX_FINI,
+ NULL, ctx);
+ if (rc) {
+ ptlrpc_request_free(req);
+ GOTO(out_ref, rc);
+ }
+
+ /* fix the user desc */
+ if (req->rq_pack_udesc) {
+ /* we rely the fact that this request is in AUTH mode,
+ * and user_desc at offset 2. */
+ pud = lustre_msg_buf(req->rq_reqbuf, 2, sizeof(*pud));
+ LASSERT(pud);
+ pud->pud_uid = pud->pud_fsuid = ctx->cc_vcred.vc_uid;
+ pud->pud_gid = pud->pud_fsgid = ctx->cc_vcred.vc_gid;
+ pud->pud_cap = 0;
+ pud->pud_ngroups = 0;
+ }
+
+ req->rq_phase = RQ_PHASE_RPC;
+ rc = ptl_send_rpc(req, 1);
+ if (rc)
+ CWARN("ctx %p(%u->%s): rpc error %d, destroy locally\n", ctx,
+ ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec), rc);
+
+out_ref:
+ ptlrpc_req_finished(req);
+out:
+ RETURN(rc);
+}
+
+int __init gss_init_cli_upcall(void)
+{
+ return 0;
+}
+
+void __exit gss_exit_cli_upcall(void)
+{
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_err.h b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_err.h
new file mode 100644
index 000000000000..13425796fa33
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_err.h
@@ -0,0 +1,193 @@
+/*
+ * Modifications for Lustre
+ *
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+/*
+ * Adapted from MIT Kerberos 5-1.2.1 include/gssapi/gssapi.h
+ *
+ * Copyright (c) 2002 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Andy Adamson <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied warranty.
+ *
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __PTLRPC_GSS_GSS_ERR_H_
+#define __PTLRPC_GSS_GSS_ERR_H_
+
+typedef unsigned int OM_uint32;
+
+/*
+ * Flag bits for context-level services.
+ */
+#define GSS_C_DELEG_FLAG (1)
+#define GSS_C_MUTUAL_FLAG (2)
+#define GSS_C_REPLAY_FLAG (4)
+#define GSS_C_SEQUENCE_FLAG (8)
+#define GSS_C_CONF_FLAG (16)
+#define GSS_C_INTEG_FLAG (32)
+#define GSS_C_ANON_FLAG (64)
+#define GSS_C_PROT_READY_FLAG (128)
+#define GSS_C_TRANS_FLAG (256)
+
+/*
+ * Credential usage options
+ */
+#define GSS_C_BOTH (0)
+#define GSS_C_INITIATE (1)
+#define GSS_C_ACCEPT (2)
+
+/*
+ * Status code types for gss_display_status
+ */
+#define GSS_C_GSS_CODE (1)
+#define GSS_C_MECH_CODE (2)
+
+
+/*
+ * Define the default Quality of Protection for per-message services. Note
+ * that an implementation that offers multiple levels of QOP may either reserve
+ * a value (for example zero, as assumed here) to mean "default protection", or
+ * alternatively may simply equate GSS_C_QOP_DEFAULT to a specific explicit
+ * QOP value. However a value of 0 should always be interpreted by a GSSAPI
+ * implementation as a request for the default protection level.
+ */
+#define GSS_C_QOP_DEFAULT (0)
+
+/*
+ * Expiration time of 2^32-1 seconds means infinite lifetime for a
+ * credential or security context
+ */
+#define GSS_C_INDEFINITE ((OM_uint32) 0xfffffffful)
+
+
+/* Major status codes */
+
+#define GSS_S_COMPLETE (0)
+
+/*
+ * Some "helper" definitions to make the status code macros obvious.
+ */
+#define GSS_C_CALLING_ERROR_OFFSET (24)
+#define GSS_C_ROUTINE_ERROR_OFFSET (16)
+#define GSS_C_SUPPLEMENTARY_OFFSET (0)
+#define GSS_C_CALLING_ERROR_MASK ((OM_uint32) 0377ul)
+#define GSS_C_ROUTINE_ERROR_MASK ((OM_uint32) 0377ul)
+#define GSS_C_SUPPLEMENTARY_MASK ((OM_uint32) 0177777ul)
+
+/*
+ * The macros that test status codes for error conditions. Note that the
+ * GSS_ERROR() macro has changed slightly from the V1 GSSAPI so that it now
+ * evaluates its argument only once.
+ */
+#define GSS_CALLING_ERROR(x) \
+ ((x) & (GSS_C_CALLING_ERROR_MASK << GSS_C_CALLING_ERROR_OFFSET))
+#define GSS_ROUTINE_ERROR(x) \
+ ((x) & (GSS_C_ROUTINE_ERROR_MASK << GSS_C_ROUTINE_ERROR_OFFSET))
+#define GSS_SUPPLEMENTARY_INFO(x) \
+ ((x) & (GSS_C_SUPPLEMENTARY_MASK << GSS_C_SUPPLEMENTARY_OFFSET))
+#define GSS_ERROR(x) \
+ ((x) & ((GSS_C_CALLING_ERROR_MASK << GSS_C_CALLING_ERROR_OFFSET) | \
+ (GSS_C_ROUTINE_ERROR_MASK << GSS_C_ROUTINE_ERROR_OFFSET)))
+
+/*
+ * Now the actual status code definitions
+ */
+
+/*
+ * Calling errors:
+ */
+#define GSS_S_CALL_INACCESSIBLE_READ \
+ (((OM_uint32) 1ul) << GSS_C_CALLING_ERROR_OFFSET)
+#define GSS_S_CALL_INACCESSIBLE_WRITE \
+ (((OM_uint32) 2ul) << GSS_C_CALLING_ERROR_OFFSET)
+#define GSS_S_CALL_BAD_STRUCTURE \
+ (((OM_uint32) 3ul) << GSS_C_CALLING_ERROR_OFFSET)
+
+/*
+ * Routine errors:
+ */
+#define GSS_S_BAD_MECH \
+ (((OM_uint32) 1ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_BAD_NAME \
+ (((OM_uint32) 2ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_BAD_NAMETYPE \
+ (((OM_uint32) 3ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_BAD_BINDINGS \
+ (((OM_uint32) 4ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_BAD_STATUS \
+ (((OM_uint32) 5ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_BAD_SIG \
+ (((OM_uint32) 6ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_NO_CRED \
+ (((OM_uint32) 7ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_NO_CONTEXT \
+ (((OM_uint32) 8ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_DEFECTIVE_TOKEN \
+ (((OM_uint32) 9ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_DEFECTIVE_CREDENTIAL \
+ (((OM_uint32) 10ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_CREDENTIALS_EXPIRED \
+ (((OM_uint32) 11ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_CONTEXT_EXPIRED \
+ (((OM_uint32) 12ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_FAILURE \
+ (((OM_uint32) 13ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_BAD_QOP \
+ (((OM_uint32) 14ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_UNAUTHORIZED \
+ (((OM_uint32) 15ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_UNAVAILABLE \
+ (((OM_uint32) 16ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_DUPLICATE_ELEMENT \
+ (((OM_uint32) 17ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+#define GSS_S_NAME_NOT_MN \
+ (((OM_uint32) 18ul) << GSS_C_ROUTINE_ERROR_OFFSET)
+
+/*
+ * Supplementary info bits:
+ */
+#define GSS_S_CONTINUE_NEEDED (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 0))
+#define GSS_S_DUPLICATE_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 1))
+#define GSS_S_OLD_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 2))
+#define GSS_S_UNSEQ_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 3))
+#define GSS_S_GAP_TOKEN (1 << (GSS_C_SUPPLEMENTARY_OFFSET + 4))
+
+/* XXXX these are not part of the GSSAPI C bindings! (but should be) */
+
+#define GSS_CALLING_ERROR_FIELD(x) \
+ (((x) >> GSS_C_CALLING_ERROR_OFFSET) & GSS_C_CALLING_ERROR_MASK)
+#define GSS_ROUTINE_ERROR_FIELD(x) \
+ (((x) >> GSS_C_ROUTINE_ERROR_OFFSET) & GSS_C_ROUTINE_ERROR_MASK)
+#define GSS_SUPPLEMENTARY_INFO_FIELD(x) \
+ (((x) >> GSS_C_SUPPLEMENTARY_OFFSET) & GSS_C_SUPPLEMENTARY_MASK)
+
+/* XXXX This is a necessary evil until the spec is fixed */
+#define GSS_S_CRED_UNAVAIL GSS_S_FAILURE
+
+#endif /* __PTLRPC_GSS_GSS_ERR_H_ */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_generic_token.c b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_generic_token.c
new file mode 100644
index 000000000000..20b1638e7255
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_generic_token.c
@@ -0,0 +1,285 @@
+/*
+ * Modifications for Lustre
+ *
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+/*
+ * linux/net/sunrpc/gss_generic_token.c
+ *
+ * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic/util_token.c
+ *
+ * Copyright (c) 2000 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Andy Adamson <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied warranty.
+ *
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_sec.h>
+
+#include "gss_err.h"
+#include "gss_internal.h"
+#include "gss_api.h"
+#include "gss_krb5.h"
+#include "gss_asn1.h"
+
+
+/* TWRITE_STR from gssapiP_generic.h */
+#define TWRITE_STR(ptr, str, len) \
+ memcpy((ptr), (char *) (str), (len)); \
+ (ptr) += (len);
+
+/* XXXX this code currently makes the assumption that a mech oid will
+ never be longer than 127 bytes. This assumption is not inherent in
+ the interfaces, so the code can be fixed if the OSI namespace
+ balloons unexpectedly. */
+
+/* Each token looks like this:
+
+0x60 tag for APPLICATION 0, SEQUENCE
+ (constructed, definite-length)
+ <length> possible multiple bytes, need to parse/generate
+ 0x06 tag for OBJECT IDENTIFIER
+ <moid_length> compile-time constant string (assume 1 byte)
+ <moid_bytes> compile-time constant string
+ <inner_bytes> the ANY containing the application token
+ bytes 0,1 are the token type
+ bytes 2,n are the token data
+
+For the purposes of this abstraction, the token "header" consists of
+the sequence tag and length octets, the mech OID DER encoding, and the
+first two inner bytes, which indicate the token type. The token
+"body" consists of everything else.
+
+*/
+
+static
+int der_length_size(int length)
+{
+ if (length < (1 << 7))
+ return 1;
+ else if (length < (1 << 8))
+ return 2;
+#if (SIZEOF_INT == 2)
+ else
+ return 3;
+#else
+ else if (length < (1 << 16))
+ return 3;
+ else if (length < (1 << 24))
+ return 4;
+ else
+ return 5;
+#endif
+}
+
+static
+void der_write_length(unsigned char **buf, int length)
+{
+ if (length < (1 << 7)) {
+ *(*buf)++ = (unsigned char) length;
+ } else {
+ *(*buf)++ = (unsigned char) (der_length_size(length) + 127);
+#if (SIZEOF_INT > 2)
+ if (length >= (1 << 24))
+ *(*buf)++ = (unsigned char) (length >> 24);
+ if (length >= (1 << 16))
+ *(*buf)++ = (unsigned char) ((length >> 16) & 0xff);
+#endif
+ if (length >= (1 << 8))
+ *(*buf)++ = (unsigned char) ((length >> 8) & 0xff);
+ *(*buf)++ = (unsigned char) (length & 0xff);
+ }
+}
+
+/*
+ * returns decoded length, or < 0 on failure. Advances buf and
+ * decrements bufsize
+ */
+static
+int der_read_length(unsigned char **buf, int *bufsize)
+{
+ unsigned char sf;
+ int ret;
+
+ if (*bufsize < 1)
+ return -1;
+ sf = *(*buf)++;
+ (*bufsize)--;
+ if (sf & 0x80) {
+ if ((sf &= 0x7f) > ((*bufsize) - 1))
+ return -1;
+ if (sf > SIZEOF_INT)
+ return -1;
+ ret = 0;
+ for (; sf; sf--) {
+ ret = (ret << 8) + (*(*buf)++);
+ (*bufsize)--;
+ }
+ } else {
+ ret = sf;
+ }
+
+ return ret;
+}
+
+/*
+ * returns the length of a token, given the mech oid and the body size
+ */
+int g_token_size(rawobj_t *mech, unsigned int body_size)
+{
+ /* set body_size to sequence contents size */
+ body_size += 4 + (int) mech->len; /* NEED overflow check */
+ return (1 + der_length_size(body_size) + body_size);
+}
+
+/*
+ * fills in a buffer with the token header. The buffer is assumed to
+ * be the right size. buf is advanced past the token header
+ */
+void g_make_token_header(rawobj_t *mech, int body_size, unsigned char **buf)
+{
+ *(*buf)++ = 0x60;
+ der_write_length(buf, 4 + mech->len + body_size);
+ *(*buf)++ = 0x06;
+ *(*buf)++ = (unsigned char) mech->len;
+ TWRITE_STR(*buf, mech->data, ((int) mech->len));
+}
+
+/*
+ * Given a buffer containing a token, reads and verifies the token,
+ * leaving buf advanced past the token header, and setting body_size
+ * to the number of remaining bytes. Returns 0 on success,
+ * G_BAD_TOK_HEADER for a variety of errors, and G_WRONG_MECH if the
+ * mechanism in the token does not match the mech argument. buf and
+ * *body_size are left unmodified on error.
+ */
+__u32 g_verify_token_header(rawobj_t *mech, int *body_size,
+ unsigned char **buf_in, int toksize)
+{
+ unsigned char *buf = *buf_in;
+ int seqsize;
+ rawobj_t toid;
+ int ret = 0;
+
+ if ((toksize -= 1) < 0)
+ return (G_BAD_TOK_HEADER);
+ if (*buf++ != 0x60)
+ return (G_BAD_TOK_HEADER);
+
+ if ((seqsize = der_read_length(&buf, &toksize)) < 0)
+ return(G_BAD_TOK_HEADER);
+
+ if (seqsize != toksize)
+ return (G_BAD_TOK_HEADER);
+
+ if ((toksize -= 1) < 0)
+ return (G_BAD_TOK_HEADER);
+ if (*buf++ != 0x06)
+ return (G_BAD_TOK_HEADER);
+
+ if ((toksize -= 1) < 0)
+ return (G_BAD_TOK_HEADER);
+ toid.len = *buf++;
+
+ if ((toksize -= toid.len) < 0)
+ return (G_BAD_TOK_HEADER);
+ toid.data = buf;
+ buf += toid.len;
+
+ if (!g_OID_equal(&toid, mech))
+ ret = G_WRONG_MECH;
+
+ /* G_WRONG_MECH is not returned immediately because it's more
+ * important to return G_BAD_TOK_HEADER if the token header is
+ * in fact bad
+ */
+ if ((toksize -= 2) < 0)
+ return (G_BAD_TOK_HEADER);
+
+ if (ret)
+ return (ret);
+
+ if (!ret) {
+ *buf_in = buf;
+ *body_size = toksize;
+ }
+
+ return (ret);
+}
+
+/*
+ * Given a buffer containing a token, returns a copy of the mech oid in
+ * the parameter mech.
+ */
+__u32 g_get_mech_oid(rawobj_t *mech, rawobj_t *in_buf)
+{
+ unsigned char *buf = in_buf->data;
+ int len = in_buf->len;
+ int ret = 0;
+ int seqsize;
+
+ if ((len -= 1) < 0)
+ return (G_BAD_TOK_HEADER);
+ if (*buf++ != 0x60)
+ return (G_BAD_TOK_HEADER);
+
+ if ((seqsize = der_read_length(&buf, &len)) < 0)
+ return (G_BAD_TOK_HEADER);
+
+ if ((len -= 1) < 0)
+ return (G_BAD_TOK_HEADER);
+ if (*buf++ != 0x06)
+ return (G_BAD_TOK_HEADER);
+
+ if ((len -= 1) < 0)
+ return (G_BAD_TOK_HEADER);
+ mech->len = *buf++;
+
+ if ((len -= mech->len) < 0)
+ return (G_BAD_TOK_HEADER);
+ OBD_ALLOC_LARGE(mech->data, mech->len);
+ if (!mech->data)
+ return (G_BUFFER_ALLOC);
+ memcpy(mech->data, buf, mech->len);
+
+ return ret;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_internal.h b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_internal.h
new file mode 100644
index 000000000000..cbfc47cb3f7b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_internal.h
@@ -0,0 +1,526 @@
+/*
+ * Modified from NFSv4 project for Lustre
+ *
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#ifndef __PTLRPC_GSS_GSS_INTERNAL_H_
+#define __PTLRPC_GSS_GSS_INTERNAL_H_
+
+#include <lustre_sec.h>
+
+/*
+ * rawobj stuff
+ */
+typedef struct netobj_s {
+ __u32 len;
+ __u8 data[0];
+} netobj_t;
+
+#define NETOBJ_EMPTY ((netobj_t) { 0 })
+
+typedef struct rawobj_s {
+ __u32 len;
+ __u8 *data;
+} rawobj_t;
+
+#define RAWOBJ_EMPTY ((rawobj_t) { 0, NULL })
+
+typedef struct rawobj_buf_s {
+ __u32 dataoff;
+ __u32 datalen;
+ __u32 buflen;
+ __u8 *buf;
+} rawobj_buf_t;
+
+int rawobj_empty(rawobj_t *obj);
+int rawobj_alloc(rawobj_t *obj, char *buf, int len);
+void rawobj_free(rawobj_t *obj);
+int rawobj_equal(rawobj_t *a, rawobj_t *b);
+int rawobj_dup(rawobj_t *dest, rawobj_t *src);
+int rawobj_serialize(rawobj_t *obj, __u32 **buf, __u32 *buflen);
+int rawobj_extract(rawobj_t *obj, __u32 **buf, __u32 *buflen);
+int rawobj_extract_alloc(rawobj_t *obj, __u32 **buf, __u32 *buflen);
+int rawobj_extract_local(rawobj_t *obj, __u32 **buf, __u32 *buflen);
+int rawobj_extract_local_alloc(rawobj_t *obj, __u32 **buf, __u32 *buflen);
+int rawobj_from_netobj(rawobj_t *rawobj, netobj_t *netobj);
+int rawobj_from_netobj_alloc(rawobj_t *obj, netobj_t *netobj);
+
+int buffer_extract_bytes(const void **buf, __u32 *buflen,
+ void *res, __u32 reslen);
+
+/*
+ * several timeout values. client refresh upcall timeout we using
+ * default in pipefs implemnetation.
+ */
+#define __TIMEOUT_DELTA (10)
+
+#define GSS_SECINIT_RPC_TIMEOUT \
+ (obd_timeout < __TIMEOUT_DELTA ? \
+ __TIMEOUT_DELTA : obd_timeout - __TIMEOUT_DELTA)
+
+#define GSS_SECFINI_RPC_TIMEOUT (__TIMEOUT_DELTA)
+#define GSS_SECSVC_UPCALL_TIMEOUT (GSS_SECINIT_RPC_TIMEOUT)
+
+/*
+ * default gc interval
+ */
+#define GSS_GC_INTERVAL (60 * 60) /* 60 minutes */
+
+static inline
+unsigned long gss_round_ctx_expiry(unsigned long expiry,
+ unsigned long sec_flags)
+{
+ if (sec_flags & PTLRPC_SEC_FL_REVERSE)
+ return expiry;
+
+ if (get_seconds() + __TIMEOUT_DELTA <= expiry)
+ return expiry - __TIMEOUT_DELTA;
+
+ return expiry;
+}
+
+/*
+ * Max encryption element in block cipher algorithms.
+ */
+#define GSS_MAX_CIPHER_BLOCK (16)
+
+/*
+ * XXX make it visible of kernel and lgssd/lsvcgssd
+ */
+#define GSSD_INTERFACE_VERSION (1)
+
+#define PTLRPC_GSS_VERSION (1)
+
+
+enum ptlrpc_gss_proc {
+ PTLRPC_GSS_PROC_DATA = 0,
+ PTLRPC_GSS_PROC_INIT = 1,
+ PTLRPC_GSS_PROC_CONTINUE_INIT = 2,
+ PTLRPC_GSS_PROC_DESTROY = 3,
+ PTLRPC_GSS_PROC_ERR = 4,
+};
+
+enum ptlrpc_gss_tgt {
+ LUSTRE_GSS_TGT_MGS = 0,
+ LUSTRE_GSS_TGT_MDS = 1,
+ LUSTRE_GSS_TGT_OSS = 2,
+};
+
+enum ptlrpc_gss_header_flags {
+ LUSTRE_GSS_PACK_BULK = 1,
+ LUSTRE_GSS_PACK_USER = 2,
+};
+
+static inline
+__u32 import_to_gss_svc(struct obd_import *imp)
+{
+ const char *name = imp->imp_obd->obd_type->typ_name;
+
+ if (!strcmp(name, LUSTRE_MGC_NAME))
+ return LUSTRE_GSS_TGT_MGS;
+ if (!strcmp(name, LUSTRE_MDC_NAME))
+ return LUSTRE_GSS_TGT_MDS;
+ if (!strcmp(name, LUSTRE_OSC_NAME))
+ return LUSTRE_GSS_TGT_OSS;
+ LBUG();
+ return 0;
+}
+
+/*
+ * following 3 header must have the same size and offset
+ */
+struct gss_header {
+ __u8 gh_version; /* gss version */
+ __u8 gh_sp; /* sec part */
+ __u16 gh_pad0;
+ __u32 gh_flags; /* wrap flags */
+ __u32 gh_proc; /* proc */
+ __u32 gh_seq; /* sequence */
+ __u32 gh_svc; /* service */
+ __u32 gh_pad1;
+ __u32 gh_pad2;
+ __u32 gh_pad3;
+ netobj_t gh_handle; /* context handle */
+};
+
+struct gss_rep_header {
+ __u8 gh_version;
+ __u8 gh_sp;
+ __u16 gh_pad0;
+ __u32 gh_flags;
+ __u32 gh_proc;
+ __u32 gh_major;
+ __u32 gh_minor;
+ __u32 gh_seqwin;
+ __u32 gh_pad2;
+ __u32 gh_pad3;
+ netobj_t gh_handle;
+};
+
+struct gss_err_header {
+ __u8 gh_version;
+ __u8 gh_sp;
+ __u16 gh_pad0;
+ __u32 gh_flags;
+ __u32 gh_proc;
+ __u32 gh_major;
+ __u32 gh_minor;
+ __u32 gh_pad1;
+ __u32 gh_pad2;
+ __u32 gh_pad3;
+ netobj_t gh_handle;
+};
+
+/*
+ * part of wire context information send from client which be saved and
+ * used later by server.
+ */
+struct gss_wire_ctx {
+ __u32 gw_flags;
+ __u32 gw_proc;
+ __u32 gw_seq;
+ __u32 gw_svc;
+ rawobj_t gw_handle;
+};
+
+#define PTLRPC_GSS_MAX_HANDLE_SIZE (8)
+#define PTLRPC_GSS_HEADER_SIZE (sizeof(struct gss_header) + \
+ PTLRPC_GSS_MAX_HANDLE_SIZE)
+
+
+static inline __u64 gss_handle_to_u64(rawobj_t *handle)
+{
+ if (handle->len != PTLRPC_GSS_MAX_HANDLE_SIZE)
+ return -1;
+ return *((__u64 *) handle->data);
+}
+
+#define GSS_SEQ_WIN (2048)
+#define GSS_SEQ_WIN_MAIN GSS_SEQ_WIN
+#define GSS_SEQ_WIN_BACK (128)
+#define GSS_SEQ_REPACK_THRESHOLD (GSS_SEQ_WIN_MAIN / 2 + \
+ GSS_SEQ_WIN_MAIN / 4)
+
+struct gss_svc_seq_data {
+ spinlock_t ssd_lock;
+ /*
+ * highest sequence number seen so far, for main and back window
+ */
+ __u32 ssd_max_main;
+ __u32 ssd_max_back;
+ /*
+ * main and back window
+ * for i such that ssd_max - GSS_SEQ_WIN < i <= ssd_max, the i-th bit
+ * of ssd_win is nonzero iff sequence number i has been seen already.
+ */
+ unsigned long ssd_win_main[GSS_SEQ_WIN_MAIN/BITS_PER_LONG];
+ unsigned long ssd_win_back[GSS_SEQ_WIN_BACK/BITS_PER_LONG];
+};
+
+struct gss_svc_ctx {
+ struct gss_ctx *gsc_mechctx;
+ struct gss_svc_seq_data gsc_seqdata;
+ rawobj_t gsc_rvs_hdl;
+ __u32 gsc_rvs_seq;
+ uid_t gsc_uid;
+ gid_t gsc_gid;
+ uid_t gsc_mapped_uid;
+ unsigned int gsc_usr_root:1,
+ gsc_usr_mds:1,
+ gsc_usr_oss:1,
+ gsc_remote:1,
+ gsc_reverse:1;
+};
+
+struct gss_svc_reqctx {
+ struct ptlrpc_svc_ctx src_base;
+ /*
+ * context
+ */
+ struct gss_wire_ctx src_wirectx;
+ struct gss_svc_ctx *src_ctx;
+ /*
+ * record place of bulk_sec_desc in request/reply buffer
+ */
+ struct ptlrpc_bulk_sec_desc *src_reqbsd;
+ int src_reqbsd_size;
+ struct ptlrpc_bulk_sec_desc *src_repbsd;
+ int src_repbsd_size;
+ /*
+ * flags
+ */
+ unsigned int src_init:1,
+ src_init_continue:1,
+ src_err_notify:1;
+ int src_reserve_len;
+};
+
+struct gss_cli_ctx {
+ struct ptlrpc_cli_ctx gc_base;
+ __u32 gc_flavor;
+ __u32 gc_proc;
+ __u32 gc_win;
+ atomic_t gc_seq;
+ rawobj_t gc_handle;
+ struct gss_ctx *gc_mechctx;
+ /* handle for the buddy svc ctx */
+ rawobj_t gc_svc_handle;
+};
+
+struct gss_cli_ctx_keyring {
+ struct gss_cli_ctx gck_base;
+ struct key *gck_key;
+ struct timer_list *gck_timer;
+};
+
+struct gss_sec {
+ struct ptlrpc_sec gs_base;
+ struct gss_api_mech *gs_mech;
+ spinlock_t gs_lock;
+ __u64 gs_rvs_hdl;
+};
+
+struct gss_sec_pipefs {
+ struct gss_sec gsp_base;
+ int gsp_chash_size; /* must be 2^n */
+ struct hlist_head gsp_chash[0];
+};
+
+/*
+ * FIXME cleanup the keyring upcall mutexes
+ */
+#define HAVE_KEYRING_UPCALL_SERIALIZED 1
+
+struct gss_sec_keyring {
+ struct gss_sec gsk_base;
+ /*
+ * all contexts listed here. access is protected by sec spinlock.
+ */
+ struct hlist_head gsk_clist;
+ /*
+ * specially point to root ctx (only one at a time). access is
+ * protected by sec spinlock.
+ */
+ struct ptlrpc_cli_ctx *gsk_root_ctx;
+ /*
+ * specially serialize upcalls for root context.
+ */
+ struct mutex gsk_root_uc_lock;
+
+#ifdef HAVE_KEYRING_UPCALL_SERIALIZED
+ struct mutex gsk_uc_lock; /* serialize upcalls */
+#endif
+};
+
+static inline struct gss_cli_ctx *ctx2gctx(struct ptlrpc_cli_ctx *ctx)
+{
+ return container_of(ctx, struct gss_cli_ctx, gc_base);
+}
+
+static inline
+struct gss_cli_ctx_keyring *ctx2gctx_keyring(struct ptlrpc_cli_ctx *ctx)
+{
+ return container_of(ctx2gctx(ctx),
+ struct gss_cli_ctx_keyring, gck_base);
+}
+
+static inline struct gss_sec *sec2gsec(struct ptlrpc_sec *sec)
+{
+ return container_of(sec, struct gss_sec, gs_base);
+}
+
+static inline struct gss_sec_pipefs *sec2gsec_pipefs(struct ptlrpc_sec *sec)
+{
+ return container_of(sec2gsec(sec), struct gss_sec_pipefs, gsp_base);
+}
+
+static inline struct gss_sec_keyring *sec2gsec_keyring(struct ptlrpc_sec *sec)
+{
+ return container_of(sec2gsec(sec), struct gss_sec_keyring, gsk_base);
+}
+
+
+#define GSS_CTX_INIT_MAX_LEN (1024)
+
+/*
+ * This only guaranteed be enough for current krb5 des-cbc-crc . We might
+ * adjust this when new enc type or mech added in.
+ */
+#define GSS_PRIVBUF_PREFIX_LEN (32)
+#define GSS_PRIVBUF_SUFFIX_LEN (32)
+
+static inline
+struct gss_svc_reqctx *gss_svc_ctx2reqctx(struct ptlrpc_svc_ctx *ctx)
+{
+ LASSERT(ctx);
+ return container_of(ctx, struct gss_svc_reqctx, src_base);
+}
+
+static inline
+struct gss_svc_ctx *gss_svc_ctx2gssctx(struct ptlrpc_svc_ctx *ctx)
+{
+ LASSERT(ctx);
+ return gss_svc_ctx2reqctx(ctx)->src_ctx;
+}
+
+/* sec_gss.c */
+int gss_cli_ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred);
+int gss_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize);
+int gss_cli_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req);
+int gss_cli_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req);
+int gss_cli_ctx_seal(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req);
+int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req);
+
+int gss_sec_install_rctx(struct obd_import *imp, struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx);
+int gss_alloc_reqbuf(struct ptlrpc_sec *sec, struct ptlrpc_request *req,
+ int msgsize);
+void gss_free_reqbuf(struct ptlrpc_sec *sec, struct ptlrpc_request *req);
+int gss_alloc_repbuf(struct ptlrpc_sec *sec, struct ptlrpc_request *req,
+ int msgsize);
+void gss_free_repbuf(struct ptlrpc_sec *sec, struct ptlrpc_request *req);
+int gss_enlarge_reqbuf(struct ptlrpc_sec *sec, struct ptlrpc_request *req,
+ int segment, int newsize);
+
+int gss_svc_accept(struct ptlrpc_sec_policy *policy,
+ struct ptlrpc_request *req);
+void gss_svc_invalidate_ctx(struct ptlrpc_svc_ctx *svc_ctx);
+int gss_svc_alloc_rs(struct ptlrpc_request *req, int msglen);
+int gss_svc_authorize(struct ptlrpc_request *req);
+void gss_svc_free_rs(struct ptlrpc_reply_state *rs);
+void gss_svc_free_ctx(struct ptlrpc_svc_ctx *ctx);
+
+int cli_ctx_expire(struct ptlrpc_cli_ctx *ctx);
+int cli_ctx_check_death(struct ptlrpc_cli_ctx *ctx);
+
+int gss_copy_rvc_cli_ctx(struct ptlrpc_cli_ctx *cli_ctx,
+ struct ptlrpc_svc_ctx *svc_ctx);
+
+struct gss_header *gss_swab_header(struct lustre_msg *msg, int segment,
+ int swabbed);
+netobj_t *gss_swab_netobj(struct lustre_msg *msg, int segment);
+
+void gss_cli_ctx_uptodate(struct gss_cli_ctx *gctx);
+int gss_pack_err_notify(struct ptlrpc_request *req, __u32 major, __u32 minor);
+int gss_check_seq_num(struct gss_svc_seq_data *sd, __u32 seq_num, int set);
+
+int gss_sec_create_common(struct gss_sec *gsec,
+ struct ptlrpc_sec_policy *policy,
+ struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx,
+ struct sptlrpc_flavor *sf);
+void gss_sec_destroy_common(struct gss_sec *gsec);
+void gss_sec_kill(struct ptlrpc_sec *sec);
+
+int gss_cli_ctx_init_common(struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_ctx_ops *ctxops,
+ struct vfs_cred *vcred);
+int gss_cli_ctx_fini_common(struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx);
+
+void gss_cli_ctx_flags2str(unsigned long flags, char *buf, int bufsize);
+
+/* gss_keyring.c */
+int __init gss_init_keyring(void);
+void __exit gss_exit_keyring(void);
+
+/* gss_pipefs.c */
+int __init gss_init_pipefs(void);
+void __exit gss_exit_pipefs(void);
+
+/* gss_bulk.c */
+int gss_cli_prep_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+int gss_cli_ctx_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+int gss_cli_ctx_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+int gss_svc_prep_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+int gss_svc_unwrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+int gss_svc_wrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+
+/* gss_mech_switch.c */
+int init_kerberos_module(void);
+void cleanup_kerberos_module(void);
+
+/* gss_generic_token.c */
+int g_token_size(rawobj_t *mech, unsigned int body_size);
+void g_make_token_header(rawobj_t *mech, int body_size, unsigned char **buf);
+__u32 g_verify_token_header(rawobj_t *mech, int *body_size,
+ unsigned char **buf_in, int toksize);
+
+
+/* gss_cli_upcall.c */
+int gss_do_ctx_init_rpc(char *buffer, unsigned long count);
+int gss_do_ctx_fini_rpc(struct gss_cli_ctx *gctx);
+
+int __init gss_init_cli_upcall(void);
+void __exit gss_exit_cli_upcall(void);
+
+/* gss_svc_upcall.c */
+__u64 gss_get_next_ctx_index(void);
+int gss_svc_upcall_install_rvs_ctx(struct obd_import *imp,
+ struct gss_sec *gsec,
+ struct gss_cli_ctx *gctx);
+int gss_svc_upcall_expire_rvs_ctx(rawobj_t *handle);
+int gss_svc_upcall_dup_handle(rawobj_t *handle, struct gss_svc_ctx *ctx);
+int gss_svc_upcall_update_sequence(rawobj_t *handle, __u32 seq);
+int gss_svc_upcall_handle_init(struct ptlrpc_request *req,
+ struct gss_svc_reqctx *grctx,
+ struct gss_wire_ctx *gw,
+ struct obd_device *target,
+ __u32 lustre_svc,
+ rawobj_t *rvs_hdl,
+ rawobj_t *in_token);
+struct gss_svc_ctx *gss_svc_upcall_get_ctx(struct ptlrpc_request *req,
+ struct gss_wire_ctx *gw);
+void gss_svc_upcall_put_ctx(struct gss_svc_ctx *ctx);
+void gss_svc_upcall_destroy_ctx(struct gss_svc_ctx *ctx);
+
+int __init gss_init_svc_upcall(void);
+void __exit gss_exit_svc_upcall(void);
+
+/* lproc_gss.c */
+void gss_stat_oos_record_cli(int behind);
+void gss_stat_oos_record_svc(int phase, int replay);
+
+int __init gss_init_lproc(void);
+void __exit gss_exit_lproc(void);
+
+/* gss_krb5_mech.c */
+int __init init_kerberos_module(void);
+void __exit cleanup_kerberos_module(void);
+
+
+/* debug */
+static inline
+void __dbg_memdump(char *name, void *ptr, int size)
+{
+ char *buf, *p = (char *) ptr;
+ int bufsize = size * 2 + 1, i;
+
+ OBD_ALLOC(buf, bufsize);
+ if (!buf) {
+ CDEBUG(D_ERROR, "DUMP ERROR: can't alloc %d bytes\n", bufsize);
+ return;
+ }
+
+ for (i = 0; i < size; i++)
+ sprintf(&buf[i+i], "%02x", (__u8) p[i]);
+ buf[size + size] = '\0';
+ LCONSOLE_INFO("DUMP %s@%p(%d): %s\n", name, ptr, size, buf);
+ OBD_FREE(buf, bufsize);
+}
+
+#endif /* __PTLRPC_GSS_GSS_INTERNAL_H_ */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_keyring.c b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_keyring.c
new file mode 100644
index 000000000000..bb571ae51054
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_keyring.c
@@ -0,0 +1,1424 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/gss/gss_keyring.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/crypto.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/key-type.h>
+#include <linux/mutex.h>
+#include <asm/atomic.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_sec.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+
+#include "gss_err.h"
+#include "gss_internal.h"
+#include "gss_api.h"
+
+static struct ptlrpc_sec_policy gss_policy_keyring;
+static struct ptlrpc_ctx_ops gss_keyring_ctxops;
+static struct key_type gss_key_type;
+
+static int sec_install_rctx_kr(struct ptlrpc_sec *sec,
+ struct ptlrpc_svc_ctx *svc_ctx);
+
+/*
+ * the timeout is only for the case that upcall child process die abnormally.
+ * in any other cases it should finally update kernel key.
+ *
+ * FIXME we'd better to incorporate the client & server side upcall timeouts
+ * into the framework of Adaptive Timeouts, but we need to figure out how to
+ * make sure that kernel knows the upcall processes is in-progress or died
+ * unexpectedly.
+ */
+#define KEYRING_UPCALL_TIMEOUT (obd_timeout + obd_timeout)
+
+/****************************************
+ * internal helpers *
+ ****************************************/
+
+#define DUMP_PROCESS_KEYRINGS(tsk) \
+{ \
+ CWARN("DUMP PK: %s[%u,%u/%u](<-%s[%u,%u/%u]): " \
+ "a %d, t %d, p %d, s %d, u %d, us %d, df %d\n", \
+ tsk->comm, tsk->pid, tsk->uid, tsk->fsuid, \
+ tsk->parent->comm, tsk->parent->pid, \
+ tsk->parent->uid, tsk->parent->fsuid, \
+ tsk->request_key_auth ? \
+ tsk->request_key_auth->serial : 0, \
+ key_cred(tsk)->thread_keyring ? \
+ key_cred(tsk)->thread_keyring->serial : 0, \
+ key_tgcred(tsk)->process_keyring ? \
+ key_tgcred(tsk)->process_keyring->serial : 0, \
+ key_tgcred(tsk)->session_keyring ? \
+ key_tgcred(tsk)->session_keyring->serial : 0, \
+ key_cred(tsk)->user->uid_keyring ? \
+ key_cred(tsk)->user->uid_keyring->serial : 0, \
+ key_cred(tsk)->user->session_keyring ? \
+ key_cred(tsk)->user->session_keyring->serial : 0, \
+ key_cred(tsk)->jit_keyring \
+ ); \
+}
+
+#define DUMP_KEY(key) \
+{ \
+ CWARN("DUMP KEY: %p(%d) ref %d u%u/g%u desc %s\n", \
+ key, key->serial, atomic_read(&key->usage), \
+ key->uid, key->gid, \
+ key->description ? key->description : "n/a" \
+ ); \
+}
+
+#define key_cred(tsk) ((tsk)->cred)
+#define key_tgcred(tsk) ((tsk)->cred->tgcred)
+
+static inline void keyring_upcall_lock(struct gss_sec_keyring *gsec_kr)
+{
+#ifdef HAVE_KEYRING_UPCALL_SERIALIZED
+ mutex_lock(&gsec_kr->gsk_uc_lock);
+#endif
+}
+
+static inline void keyring_upcall_unlock(struct gss_sec_keyring *gsec_kr)
+{
+#ifdef HAVE_KEYRING_UPCALL_SERIALIZED
+ mutex_unlock(&gsec_kr->gsk_uc_lock);
+#endif
+}
+
+static inline void key_revoke_locked(struct key *key)
+{
+ set_bit(KEY_FLAG_REVOKED, &key->flags);
+}
+
+static void ctx_upcall_timeout_kr(unsigned long data)
+{
+ struct ptlrpc_cli_ctx *ctx = (struct ptlrpc_cli_ctx *) data;
+ struct key *key = ctx2gctx_keyring(ctx)->gck_key;
+
+ CWARN("ctx %p, key %p\n", ctx, key);
+
+ LASSERT(key);
+
+ cli_ctx_expire(ctx);
+ key_revoke_locked(key);
+}
+
+static
+void ctx_start_timer_kr(struct ptlrpc_cli_ctx *ctx, long timeout)
+{
+ struct gss_cli_ctx_keyring *gctx_kr = ctx2gctx_keyring(ctx);
+ struct timer_list *timer = gctx_kr->gck_timer;
+
+ LASSERT(timer);
+
+ CDEBUG(D_SEC, "ctx %p: start timer %lds\n", ctx, timeout);
+ timeout = timeout * HZ + cfs_time_current();
+
+ init_timer(timer);
+ timer->expires = timeout;
+ timer->data = (unsigned long ) ctx;
+ timer->function = ctx_upcall_timeout_kr;
+
+ add_timer(timer);
+}
+
+/*
+ * caller should make sure no race with other threads
+ */
+static
+void ctx_clear_timer_kr(struct ptlrpc_cli_ctx *ctx)
+{
+ struct gss_cli_ctx_keyring *gctx_kr = ctx2gctx_keyring(ctx);
+ struct timer_list *timer = gctx_kr->gck_timer;
+
+ if (timer == NULL)
+ return;
+
+ CDEBUG(D_SEC, "ctx %p, key %p\n", ctx, gctx_kr->gck_key);
+
+ gctx_kr->gck_timer = NULL;
+
+ del_singleshot_timer_sync(timer);
+
+ OBD_FREE_PTR(timer);
+}
+
+static
+struct ptlrpc_cli_ctx *ctx_create_kr(struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred)
+{
+ struct ptlrpc_cli_ctx *ctx;
+ struct gss_cli_ctx_keyring *gctx_kr;
+
+ OBD_ALLOC_PTR(gctx_kr);
+ if (gctx_kr == NULL)
+ return NULL;
+
+ OBD_ALLOC_PTR(gctx_kr->gck_timer);
+ if (gctx_kr->gck_timer == NULL) {
+ OBD_FREE_PTR(gctx_kr);
+ return NULL;
+ }
+ init_timer(gctx_kr->gck_timer);
+
+ ctx = &gctx_kr->gck_base.gc_base;
+
+ if (gss_cli_ctx_init_common(sec, ctx, &gss_keyring_ctxops, vcred)) {
+ OBD_FREE_PTR(gctx_kr->gck_timer);
+ OBD_FREE_PTR(gctx_kr);
+ return NULL;
+ }
+
+ ctx->cc_expire = cfs_time_current_sec() + KEYRING_UPCALL_TIMEOUT;
+ clear_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags);
+ atomic_inc(&ctx->cc_refcount); /* for the caller */
+
+ return ctx;
+}
+
+static void ctx_destroy_kr(struct ptlrpc_cli_ctx *ctx)
+{
+ struct ptlrpc_sec *sec = ctx->cc_sec;
+ struct gss_cli_ctx_keyring *gctx_kr = ctx2gctx_keyring(ctx);
+
+ CDEBUG(D_SEC, "destroying ctx %p\n", ctx);
+
+ /* at this time the association with key has been broken. */
+ LASSERT(sec);
+ LASSERT(atomic_read(&sec->ps_refcount) > 0);
+ LASSERT(atomic_read(&sec->ps_nctx) > 0);
+ LASSERT(test_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags) == 0);
+ LASSERT(gctx_kr->gck_key == NULL);
+
+ ctx_clear_timer_kr(ctx);
+ LASSERT(gctx_kr->gck_timer == NULL);
+
+ if (gss_cli_ctx_fini_common(sec, ctx))
+ return;
+
+ OBD_FREE_PTR(gctx_kr);
+
+ atomic_dec(&sec->ps_nctx);
+ sptlrpc_sec_put(sec);
+}
+
+static void ctx_release_kr(struct ptlrpc_cli_ctx *ctx, int sync)
+{
+ if (sync) {
+ ctx_destroy_kr(ctx);
+ } else {
+ atomic_inc(&ctx->cc_refcount);
+ sptlrpc_gc_add_ctx(ctx);
+ }
+}
+
+static void ctx_put_kr(struct ptlrpc_cli_ctx *ctx, int sync)
+{
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+ if (atomic_dec_and_test(&ctx->cc_refcount))
+ ctx_release_kr(ctx, sync);
+}
+
+/*
+ * key <-> ctx association and rules:
+ * - ctx might not bind with any key
+ * - key/ctx binding is protected by key semaphore (if the key present)
+ * - key and ctx each take a reference of the other
+ * - ctx enlist/unlist is protected by ctx spinlock
+ * - never enlist a ctx after it's been unlisted
+ * - whoever do enlist should also do bind, lock key before enlist:
+ * - lock key -> lock ctx -> enlist -> unlock ctx -> bind -> unlock key
+ * - whoever do unlist should also do unbind:
+ * - lock key -> lock ctx -> unlist -> unlock ctx -> unbind -> unlock key
+ * - lock ctx -> unlist -> unlock ctx -> lock key -> unbind -> unlock key
+ */
+
+static inline void spin_lock_if(spinlock_t *lock, int condition)
+{
+ if (condition)
+ spin_lock(lock);
+}
+
+static inline void spin_unlock_if(spinlock_t *lock, int condition)
+{
+ if (condition)
+ spin_unlock(lock);
+}
+
+static void ctx_enlist_kr(struct ptlrpc_cli_ctx *ctx, int is_root, int locked)
+{
+ struct ptlrpc_sec *sec = ctx->cc_sec;
+ struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec);
+
+ LASSERT(!test_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags));
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+ spin_lock_if(&sec->ps_lock, !locked);
+
+ atomic_inc(&ctx->cc_refcount);
+ set_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags);
+ hlist_add_head(&ctx->cc_cache, &gsec_kr->gsk_clist);
+ if (is_root)
+ gsec_kr->gsk_root_ctx = ctx;
+
+ spin_unlock_if(&sec->ps_lock, !locked);
+}
+
+/*
+ * Note after this get called, caller should not access ctx again because
+ * it might have been freed, unless caller hold at least one refcount of
+ * the ctx.
+ *
+ * return non-zero if we indeed unlist this ctx.
+ */
+static int ctx_unlist_kr(struct ptlrpc_cli_ctx *ctx, int locked)
+{
+ struct ptlrpc_sec *sec = ctx->cc_sec;
+ struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec);
+
+ /* if hashed bit has gone, leave the job to somebody who is doing it */
+ if (test_and_clear_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags) == 0)
+ return 0;
+
+ /* drop ref inside spin lock to prevent race with other operations */
+ spin_lock_if(&sec->ps_lock, !locked);
+
+ if (gsec_kr->gsk_root_ctx == ctx)
+ gsec_kr->gsk_root_ctx = NULL;
+ hlist_del_init(&ctx->cc_cache);
+ atomic_dec(&ctx->cc_refcount);
+
+ spin_unlock_if(&sec->ps_lock, !locked);
+
+ return 1;
+}
+
+/*
+ * bind a key with a ctx together.
+ * caller must hold write lock of the key, as well as ref on key & ctx.
+ */
+static void bind_key_ctx(struct key *key, struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+ LASSERT(atomic_read(&key->usage) > 0);
+ LASSERT(ctx2gctx_keyring(ctx)->gck_key == NULL);
+ LASSERT(key->payload.data == NULL);
+
+ /* at this time context may or may not in list. */
+ key_get(key);
+ atomic_inc(&ctx->cc_refcount);
+ ctx2gctx_keyring(ctx)->gck_key = key;
+ key->payload.data = ctx;
+}
+
+/*
+ * unbind a key and a ctx.
+ * caller must hold write lock, as well as a ref of the key.
+ */
+static void unbind_key_ctx(struct key *key, struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(key->payload.data == ctx);
+ LASSERT(test_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags) == 0);
+
+ /* must revoke the key, or others may treat it as newly created */
+ key_revoke_locked(key);
+
+ key->payload.data = NULL;
+ ctx2gctx_keyring(ctx)->gck_key = NULL;
+
+ /* once ctx get split from key, the timer is meaningless */
+ ctx_clear_timer_kr(ctx);
+
+ ctx_put_kr(ctx, 1);
+ key_put(key);
+}
+
+/*
+ * given a ctx, unbind with its coupled key, if any.
+ * unbind could only be called once, so we don't worry the key be released
+ * by someone else.
+ */
+static void unbind_ctx_kr(struct ptlrpc_cli_ctx *ctx)
+{
+ struct key *key = ctx2gctx_keyring(ctx)->gck_key;
+
+ if (key) {
+ LASSERT(key->payload.data == ctx);
+
+ key_get(key);
+ down_write(&key->sem);
+ unbind_key_ctx(key, ctx);
+ up_write(&key->sem);
+ key_put(key);
+ }
+}
+
+/*
+ * given a key, unbind with its coupled ctx, if any.
+ * caller must hold write lock, as well as a ref of the key.
+ */
+static void unbind_key_locked(struct key *key)
+{
+ struct ptlrpc_cli_ctx *ctx = key->payload.data;
+
+ if (ctx)
+ unbind_key_ctx(key, ctx);
+}
+
+/*
+ * unlist a ctx, and unbind from coupled key
+ */
+static void kill_ctx_kr(struct ptlrpc_cli_ctx *ctx)
+{
+ if (ctx_unlist_kr(ctx, 0))
+ unbind_ctx_kr(ctx);
+}
+
+/*
+ * given a key, unlist and unbind with the coupled ctx (if any).
+ * caller must hold write lock, as well as a ref of the key.
+ */
+static void kill_key_locked(struct key *key)
+{
+ struct ptlrpc_cli_ctx *ctx = key->payload.data;
+
+ if (ctx && ctx_unlist_kr(ctx, 0))
+ unbind_key_locked(key);
+}
+
+/*
+ * caller should hold one ref on contexts in freelist.
+ */
+static void dispose_ctx_list_kr(struct hlist_head *freelist)
+{
+ struct hlist_node *next;
+ struct ptlrpc_cli_ctx *ctx;
+ struct gss_cli_ctx *gctx;
+
+ hlist_for_each_entry_safe(ctx, next, freelist, cc_cache) {
+ hlist_del_init(&ctx->cc_cache);
+
+ /* reverse ctx: update current seq to buddy svcctx if exist.
+ * ideally this should be done at gss_cli_ctx_finalize(), but
+ * the ctx destroy could be delayed by:
+ * 1) ctx still has reference;
+ * 2) ctx destroy is asynchronous;
+ * and reverse import call inval_all_ctx() require this be done
+ *_immediately_ otherwise newly created reverse ctx might copy
+ * the very old sequence number from svcctx. */
+ gctx = ctx2gctx(ctx);
+ if (!rawobj_empty(&gctx->gc_svc_handle) &&
+ sec_is_reverse(gctx->gc_base.cc_sec)) {
+ gss_svc_upcall_update_sequence(&gctx->gc_svc_handle,
+ (__u32) atomic_read(&gctx->gc_seq));
+ }
+
+ /* we need to wakeup waiting reqs here. the context might
+ * be forced released before upcall finished, then the
+ * late-arrived downcall can't find the ctx even. */
+ sptlrpc_cli_ctx_wakeup(ctx);
+
+ unbind_ctx_kr(ctx);
+ ctx_put_kr(ctx, 0);
+ }
+}
+
+/*
+ * lookup a root context directly in a sec, return root ctx with a
+ * reference taken or NULL.
+ */
+static
+struct ptlrpc_cli_ctx * sec_lookup_root_ctx_kr(struct ptlrpc_sec *sec)
+{
+ struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec);
+ struct ptlrpc_cli_ctx *ctx = NULL;
+
+ spin_lock(&sec->ps_lock);
+
+ ctx = gsec_kr->gsk_root_ctx;
+
+ if (ctx == NULL && unlikely(sec_is_reverse(sec))) {
+ struct ptlrpc_cli_ctx *tmp;
+
+ /* reverse ctx, search root ctx in list, choose the one
+ * with shortest expire time, which is most possibly have
+ * an established peer ctx at client side. */
+ hlist_for_each_entry(tmp, &gsec_kr->gsk_clist, cc_cache) {
+ if (ctx == NULL || ctx->cc_expire == 0 ||
+ ctx->cc_expire > tmp->cc_expire) {
+ ctx = tmp;
+ /* promote to be root_ctx */
+ gsec_kr->gsk_root_ctx = ctx;
+ }
+ }
+ }
+
+ if (ctx) {
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+ LASSERT(!hlist_empty(&gsec_kr->gsk_clist));
+ atomic_inc(&ctx->cc_refcount);
+ }
+
+ spin_unlock(&sec->ps_lock);
+
+ return ctx;
+}
+
+#define RVS_CTX_EXPIRE_NICE (10)
+
+static
+void rvs_sec_install_root_ctx_kr(struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *new_ctx,
+ struct key *key)
+{
+ struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec);
+ struct ptlrpc_cli_ctx *ctx;
+ cfs_time_t now;
+ ENTRY;
+
+ LASSERT(sec_is_reverse(sec));
+
+ spin_lock(&sec->ps_lock);
+
+ now = cfs_time_current_sec();
+
+ /* set all existing ctxs short expiry */
+ hlist_for_each_entry(ctx, &gsec_kr->gsk_clist, cc_cache) {
+ if (ctx->cc_expire > now + RVS_CTX_EXPIRE_NICE) {
+ ctx->cc_early_expire = 1;
+ ctx->cc_expire = now + RVS_CTX_EXPIRE_NICE;
+ }
+ }
+
+ /* if there's root_ctx there, instead obsolete the current
+ * immediately, we leave it continue operating for a little while.
+ * hopefully when the first backward rpc with newest ctx send out,
+ * the client side already have the peer ctx well established. */
+ ctx_enlist_kr(new_ctx, gsec_kr->gsk_root_ctx ? 0 : 1, 1);
+
+ if (key)
+ bind_key_ctx(key, new_ctx);
+
+ spin_unlock(&sec->ps_lock);
+}
+
+static void construct_key_desc(void *buf, int bufsize,
+ struct ptlrpc_sec *sec, uid_t uid)
+{
+ snprintf(buf, bufsize, "%d@%x", uid, sec->ps_id);
+ ((char *)buf)[bufsize - 1] = '\0';
+}
+
+/****************************************
+ * sec apis *
+ ****************************************/
+
+static
+struct ptlrpc_sec * gss_sec_create_kr(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *svcctx,
+ struct sptlrpc_flavor *sf)
+{
+ struct gss_sec_keyring *gsec_kr;
+ ENTRY;
+
+ OBD_ALLOC(gsec_kr, sizeof(*gsec_kr));
+ if (gsec_kr == NULL)
+ RETURN(NULL);
+
+ INIT_HLIST_HEAD(&gsec_kr->gsk_clist);
+ gsec_kr->gsk_root_ctx = NULL;
+ mutex_init(&gsec_kr->gsk_root_uc_lock);
+#ifdef HAVE_KEYRING_UPCALL_SERIALIZED
+ mutex_init(&gsec_kr->gsk_uc_lock);
+#endif
+
+ if (gss_sec_create_common(&gsec_kr->gsk_base, &gss_policy_keyring,
+ imp, svcctx, sf))
+ goto err_free;
+
+ if (svcctx != NULL &&
+ sec_install_rctx_kr(&gsec_kr->gsk_base.gs_base, svcctx)) {
+ gss_sec_destroy_common(&gsec_kr->gsk_base);
+ goto err_free;
+ }
+
+ RETURN(&gsec_kr->gsk_base.gs_base);
+
+err_free:
+ OBD_FREE(gsec_kr, sizeof(*gsec_kr));
+ RETURN(NULL);
+}
+
+static
+void gss_sec_destroy_kr(struct ptlrpc_sec *sec)
+{
+ struct gss_sec *gsec = sec2gsec(sec);
+ struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec);
+
+ CDEBUG(D_SEC, "destroy %s@%p\n", sec->ps_policy->sp_name, sec);
+
+ LASSERT(hlist_empty(&gsec_kr->gsk_clist));
+ LASSERT(gsec_kr->gsk_root_ctx == NULL);
+
+ gss_sec_destroy_common(gsec);
+
+ OBD_FREE(gsec_kr, sizeof(*gsec_kr));
+}
+
+static inline int user_is_root(struct ptlrpc_sec *sec, struct vfs_cred *vcred)
+{
+ /* except the ROOTONLY flag, treat it as root user only if real uid
+ * is 0, euid/fsuid being 0 are handled as setuid scenarios */
+ if (sec_is_rootonly(sec) || (vcred->vc_uid == 0))
+ return 1;
+ else
+ return 0;
+}
+
+/*
+ * unlink request key from it's ring, which is linked during request_key().
+ * sadly, we have to 'guess' which keyring it's linked to.
+ *
+ * FIXME this code is fragile, depend on how request_key_link() is implemented.
+ */
+static void request_key_unlink(struct key *key)
+{
+ struct task_struct *tsk = current;
+ struct key *ring;
+
+ switch (key_cred(tsk)->jit_keyring) {
+ case KEY_REQKEY_DEFL_DEFAULT:
+ case KEY_REQKEY_DEFL_THREAD_KEYRING:
+ ring = key_get(key_cred(tsk)->thread_keyring);
+ if (ring)
+ break;
+ case KEY_REQKEY_DEFL_PROCESS_KEYRING:
+ ring = key_get(key_tgcred(tsk)->process_keyring);
+ if (ring)
+ break;
+ case KEY_REQKEY_DEFL_SESSION_KEYRING:
+ rcu_read_lock();
+ ring = key_get(rcu_dereference(key_tgcred(tsk)
+ ->session_keyring));
+ rcu_read_unlock();
+ if (ring)
+ break;
+ case KEY_REQKEY_DEFL_USER_SESSION_KEYRING:
+ ring = key_get(key_cred(tsk)->user->session_keyring);
+ break;
+ case KEY_REQKEY_DEFL_USER_KEYRING:
+ ring = key_get(key_cred(tsk)->user->uid_keyring);
+ break;
+ case KEY_REQKEY_DEFL_GROUP_KEYRING:
+ default:
+ LBUG();
+ }
+
+ LASSERT(ring);
+ key_unlink(ring, key);
+ key_put(ring);
+}
+
+static
+struct ptlrpc_cli_ctx * gss_sec_lookup_ctx_kr(struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred,
+ int create, int remove_dead)
+{
+ struct obd_import *imp = sec->ps_import;
+ struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec);
+ struct ptlrpc_cli_ctx *ctx = NULL;
+ unsigned int is_root = 0, create_new = 0;
+ struct key *key;
+ char desc[24];
+ char *coinfo;
+ int coinfo_size;
+ char *co_flags = "";
+ ENTRY;
+
+ LASSERT(imp != NULL);
+
+ is_root = user_is_root(sec, vcred);
+
+ /* a little bit optimization for root context */
+ if (is_root) {
+ ctx = sec_lookup_root_ctx_kr(sec);
+ /*
+ * Only lookup directly for REVERSE sec, which should
+ * always succeed.
+ */
+ if (ctx || sec_is_reverse(sec))
+ RETURN(ctx);
+ }
+
+ LASSERT(create != 0);
+
+ /* for root context, obtain lock and check again, this time hold
+ * the root upcall lock, make sure nobody else populated new root
+ * context after last check. */
+ if (is_root) {
+ mutex_lock(&gsec_kr->gsk_root_uc_lock);
+
+ ctx = sec_lookup_root_ctx_kr(sec);
+ if (ctx)
+ goto out;
+
+ /* update reverse handle for root user */
+ sec2gsec(sec)->gs_rvs_hdl = gss_get_next_ctx_index();
+
+ switch (sec->ps_part) {
+ case LUSTRE_SP_MDT:
+ co_flags = "m";
+ break;
+ case LUSTRE_SP_OST:
+ co_flags = "o";
+ break;
+ case LUSTRE_SP_MGC:
+ co_flags = "rmo";
+ break;
+ case LUSTRE_SP_CLI:
+ co_flags = "r";
+ break;
+ case LUSTRE_SP_MGS:
+ default:
+ LBUG();
+ }
+ }
+
+ /* in case of setuid, key will be constructed as owner of fsuid/fsgid,
+ * but we do authentication based on real uid/gid. the key permission
+ * bits will be exactly as POS_ALL, so only processes who subscribed
+ * this key could have the access, although the quota might be counted
+ * on others (fsuid/fsgid).
+ *
+ * keyring will use fsuid/fsgid as upcall parameters, so we have to
+ * encode real uid/gid into callout info.
+ */
+
+ construct_key_desc(desc, sizeof(desc), sec, vcred->vc_uid);
+
+ /* callout info format:
+ * secid:mech:uid:gid:flags:svc_type:peer_nid:target_uuid
+ */
+ coinfo_size = sizeof(struct obd_uuid) + MAX_OBD_NAME + 64;
+ OBD_ALLOC(coinfo, coinfo_size);
+ if (coinfo == NULL)
+ goto out;
+
+ snprintf(coinfo, coinfo_size, "%d:%s:%u:%u:%s:%d:"LPX64":%s",
+ sec->ps_id, sec2gsec(sec)->gs_mech->gm_name,
+ vcred->vc_uid, vcred->vc_gid,
+ co_flags, import_to_gss_svc(imp),
+ imp->imp_connection->c_peer.nid, imp->imp_obd->obd_name);
+
+ CDEBUG(D_SEC, "requesting key for %s\n", desc);
+
+ keyring_upcall_lock(gsec_kr);
+ key = request_key(&gss_key_type, desc, coinfo);
+ keyring_upcall_unlock(gsec_kr);
+
+ OBD_FREE(coinfo, coinfo_size);
+
+ if (IS_ERR(key)) {
+ CERROR("failed request key: %ld\n", PTR_ERR(key));
+ goto out;
+ }
+ CDEBUG(D_SEC, "obtained key %08x for %s\n", key->serial, desc);
+
+ /* once payload.data was pointed to a ctx, it never changes until
+ * we de-associate them; but parallel request_key() may return
+ * a key with payload.data == NULL at the same time. so we still
+ * need wirtelock of key->sem to serialize them. */
+ down_write(&key->sem);
+
+ if (likely(key->payload.data != NULL)) {
+ ctx = key->payload.data;
+
+ LASSERT(atomic_read(&ctx->cc_refcount) >= 1);
+ LASSERT(ctx2gctx_keyring(ctx)->gck_key == key);
+ LASSERT(atomic_read(&key->usage) >= 2);
+
+ /* simply take a ref and return. it's upper layer's
+ * responsibility to detect & replace dead ctx. */
+ atomic_inc(&ctx->cc_refcount);
+ } else {
+ /* pre initialization with a cli_ctx. this can't be done in
+ * key_instantiate() because we'v no enough information
+ * there. */
+ ctx = ctx_create_kr(sec, vcred);
+ if (ctx != NULL) {
+ ctx_enlist_kr(ctx, is_root, 0);
+ bind_key_ctx(key, ctx);
+
+ ctx_start_timer_kr(ctx, KEYRING_UPCALL_TIMEOUT);
+
+ CDEBUG(D_SEC, "installed key %p <-> ctx %p (sec %p)\n",
+ key, ctx, sec);
+ } else {
+ /* we'd prefer to call key_revoke(), but we more like
+ * to revoke it within this key->sem locked period. */
+ key_revoke_locked(key);
+ }
+
+ create_new = 1;
+ }
+
+ up_write(&key->sem);
+
+ if (is_root && create_new)
+ request_key_unlink(key);
+
+ key_put(key);
+out:
+ if (is_root)
+ mutex_unlock(&gsec_kr->gsk_root_uc_lock);
+ RETURN(ctx);
+}
+
+static
+void gss_sec_release_ctx_kr(struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx,
+ int sync)
+{
+ LASSERT(atomic_read(&sec->ps_refcount) > 0);
+ LASSERT(atomic_read(&ctx->cc_refcount) == 0);
+ ctx_release_kr(ctx, sync);
+}
+
+/*
+ * flush context of normal user, we must resort to keyring itself to find out
+ * contexts which belong to me.
+ *
+ * Note here we suppose only to flush _my_ context, the "uid" will
+ * be ignored in the search.
+ */
+static
+void flush_user_ctx_cache_kr(struct ptlrpc_sec *sec,
+ uid_t uid,
+ int grace, int force)
+{
+ struct key *key;
+ char desc[24];
+
+ /* nothing to do for reverse or rootonly sec */
+ if (sec_is_reverse(sec) || sec_is_rootonly(sec))
+ return;
+
+ construct_key_desc(desc, sizeof(desc), sec, uid);
+
+ /* there should be only one valid key, but we put it in the
+ * loop in case of any weird cases */
+ for (;;) {
+ key = request_key(&gss_key_type, desc, NULL);
+ if (IS_ERR(key)) {
+ CDEBUG(D_SEC, "No more key found for current user\n");
+ break;
+ }
+
+ down_write(&key->sem);
+
+ kill_key_locked(key);
+
+ /* kill_key_locked() should usually revoke the key, but we
+ * revoke it again to make sure, e.g. some case the key may
+ * not well coupled with a context. */
+ key_revoke_locked(key);
+
+ up_write(&key->sem);
+
+ key_put(key);
+ }
+}
+
+/*
+ * flush context of root or all, we iterate through the list.
+ */
+static
+void flush_spec_ctx_cache_kr(struct ptlrpc_sec *sec,
+ uid_t uid,
+ int grace, int force)
+{
+ struct gss_sec_keyring *gsec_kr;
+ struct hlist_head freelist = HLIST_HEAD_INIT;
+ struct hlist_node *next;
+ struct ptlrpc_cli_ctx *ctx;
+ ENTRY;
+
+ gsec_kr = sec2gsec_keyring(sec);
+
+ spin_lock(&sec->ps_lock);
+ hlist_for_each_entry_safe(ctx, next,
+ &gsec_kr->gsk_clist, cc_cache) {
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+ if (uid != -1 && uid != ctx->cc_vcred.vc_uid)
+ continue;
+
+ /* at this moment there's at least 2 base reference:
+ * key association and in-list. */
+ if (atomic_read(&ctx->cc_refcount) > 2) {
+ if (!force)
+ continue;
+ CWARN("flush busy ctx %p(%u->%s, extra ref %d)\n",
+ ctx, ctx->cc_vcred.vc_uid,
+ sec2target_str(ctx->cc_sec),
+ atomic_read(&ctx->cc_refcount) - 2);
+ }
+
+ set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags);
+ if (!grace)
+ clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
+
+ atomic_inc(&ctx->cc_refcount);
+
+ if (ctx_unlist_kr(ctx, 1)) {
+ hlist_add_head(&ctx->cc_cache, &freelist);
+ } else {
+ LASSERT(atomic_read(&ctx->cc_refcount) >= 2);
+ atomic_dec(&ctx->cc_refcount);
+ }
+ }
+ spin_unlock(&sec->ps_lock);
+
+ dispose_ctx_list_kr(&freelist);
+ EXIT;
+}
+
+static
+int gss_sec_flush_ctx_cache_kr(struct ptlrpc_sec *sec,
+ uid_t uid, int grace, int force)
+{
+ ENTRY;
+
+ CDEBUG(D_SEC, "sec %p(%d, nctx %d), uid %d, grace %d, force %d\n",
+ sec, atomic_read(&sec->ps_refcount),
+ atomic_read(&sec->ps_nctx),
+ uid, grace, force);
+
+ if (uid != -1 && uid != 0)
+ flush_user_ctx_cache_kr(sec, uid, grace, force);
+ else
+ flush_spec_ctx_cache_kr(sec, uid, grace, force);
+
+ RETURN(0);
+}
+
+static
+void gss_sec_gc_ctx_kr(struct ptlrpc_sec *sec)
+{
+ struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec);
+ struct hlist_head freelist = HLIST_HEAD_INIT;
+ struct hlist_node *next;
+ struct ptlrpc_cli_ctx *ctx;
+ ENTRY;
+
+ CWARN("running gc\n");
+
+ spin_lock(&sec->ps_lock);
+ hlist_for_each_entry_safe(ctx, next,
+ &gsec_kr->gsk_clist, cc_cache) {
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+ atomic_inc(&ctx->cc_refcount);
+
+ if (cli_ctx_check_death(ctx) && ctx_unlist_kr(ctx, 1)) {
+ hlist_add_head(&ctx->cc_cache, &freelist);
+ CWARN("unhashed ctx %p\n", ctx);
+ } else {
+ LASSERT(atomic_read(&ctx->cc_refcount) >= 2);
+ atomic_dec(&ctx->cc_refcount);
+ }
+ }
+ spin_unlock(&sec->ps_lock);
+
+ dispose_ctx_list_kr(&freelist);
+ EXIT;
+ return;
+}
+
+static
+int gss_sec_display_kr(struct ptlrpc_sec *sec, struct seq_file *seq)
+{
+ struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec);
+ struct hlist_node *next;
+ struct ptlrpc_cli_ctx *ctx;
+ struct gss_cli_ctx *gctx;
+ time_t now = cfs_time_current_sec();
+ ENTRY;
+
+ spin_lock(&sec->ps_lock);
+ hlist_for_each_entry_safe(ctx, next,
+ &gsec_kr->gsk_clist, cc_cache) {
+ struct key *key;
+ char flags_str[40];
+ char mech[40];
+
+ gctx = ctx2gctx(ctx);
+ key = ctx2gctx_keyring(ctx)->gck_key;
+
+ gss_cli_ctx_flags2str(ctx->cc_flags,
+ flags_str, sizeof(flags_str));
+
+ if (gctx->gc_mechctx)
+ lgss_display(gctx->gc_mechctx, mech, sizeof(mech));
+ else
+ snprintf(mech, sizeof(mech), "N/A");
+ mech[sizeof(mech) - 1] = '\0';
+
+ seq_printf(seq, "%p: uid %u, ref %d, expire %ld(%+ld), fl %s, "
+ "seq %d, win %u, key %08x(ref %d), "
+ "hdl "LPX64":"LPX64", mech: %s\n",
+ ctx, ctx->cc_vcred.vc_uid,
+ atomic_read(&ctx->cc_refcount),
+ ctx->cc_expire,
+ ctx->cc_expire ? ctx->cc_expire - now : 0,
+ flags_str,
+ atomic_read(&gctx->gc_seq),
+ gctx->gc_win,
+ key ? key->serial : 0,
+ key ? atomic_read(&key->usage) : 0,
+ gss_handle_to_u64(&gctx->gc_handle),
+ gss_handle_to_u64(&gctx->gc_svc_handle),
+ mech);
+ }
+ spin_unlock(&sec->ps_lock);
+
+ RETURN(0);
+}
+
+/****************************************
+ * cli_ctx apis *
+ ****************************************/
+
+static
+int gss_cli_ctx_refresh_kr(struct ptlrpc_cli_ctx *ctx)
+{
+ /* upcall is already on the way */
+ return 0;
+}
+
+static
+int gss_cli_ctx_validate_kr(struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+ LASSERT(ctx->cc_sec);
+
+ if (cli_ctx_check_death(ctx)) {
+ kill_ctx_kr(ctx);
+ return 1;
+ }
+
+ if (cli_ctx_is_ready(ctx))
+ return 0;
+ return 1;
+}
+
+static
+void gss_cli_ctx_die_kr(struct ptlrpc_cli_ctx *ctx, int grace)
+{
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+ LASSERT(ctx->cc_sec);
+
+ cli_ctx_expire(ctx);
+ kill_ctx_kr(ctx);
+}
+
+/****************************************
+ * (reverse) service *
+ ****************************************/
+
+/*
+ * reverse context could have nothing to do with keyrings. here we still keep
+ * the version which bind to a key, for future reference.
+ */
+#define HAVE_REVERSE_CTX_NOKEY
+
+
+static
+int sec_install_rctx_kr(struct ptlrpc_sec *sec,
+ struct ptlrpc_svc_ctx *svc_ctx)
+{
+ struct ptlrpc_cli_ctx *cli_ctx;
+ struct vfs_cred vcred = { 0, 0 };
+ int rc;
+
+ LASSERT(sec);
+ LASSERT(svc_ctx);
+
+ cli_ctx = ctx_create_kr(sec, &vcred);
+ if (cli_ctx == NULL)
+ return -ENOMEM;
+
+ rc = gss_copy_rvc_cli_ctx(cli_ctx, svc_ctx);
+ if (rc) {
+ CERROR("failed copy reverse cli ctx: %d\n", rc);
+
+ ctx_put_kr(cli_ctx, 1);
+ return rc;
+ }
+
+ rvs_sec_install_root_ctx_kr(sec, cli_ctx, NULL);
+
+ ctx_put_kr(cli_ctx, 1);
+
+ return 0;
+}
+
+
+/****************************************
+ * service apis *
+ ****************************************/
+
+static
+int gss_svc_accept_kr(struct ptlrpc_request *req)
+{
+ return gss_svc_accept(&gss_policy_keyring, req);
+}
+
+static
+int gss_svc_install_rctx_kr(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *svc_ctx)
+{
+ struct ptlrpc_sec *sec;
+ int rc;
+
+ sec = sptlrpc_import_sec_ref(imp);
+ LASSERT(sec);
+
+ rc = sec_install_rctx_kr(sec, svc_ctx);
+ sptlrpc_sec_put(sec);
+
+ return rc;
+}
+
+/****************************************
+ * key apis *
+ ****************************************/
+
+static
+int gss_kt_instantiate(struct key *key, const void *data, size_t datalen)
+{
+ int rc;
+ ENTRY;
+
+ if (data != NULL || datalen != 0) {
+ CERROR("invalid: data %p, len %lu\n", data, (long)datalen);
+ RETURN(-EINVAL);
+ }
+
+ if (key->payload.data != 0) {
+ CERROR("key already have payload\n");
+ RETURN(-EINVAL);
+ }
+
+ /* link the key to session keyring, so following context negotiation
+ * rpc fired from user space could find this key. This will be unlinked
+ * automatically when upcall processes die.
+ *
+ * we can't do this through keyctl from userspace, because the upcall
+ * might be neither possessor nor owner of the key (setuid).
+ *
+ * the session keyring is created upon upcall, and don't change all
+ * the way until upcall finished, so rcu lock is not needed here.
+ */
+ LASSERT(key_tgcred(current)->session_keyring);
+
+ lockdep_off();
+ rc = key_link(key_tgcred(current)->session_keyring, key);
+ lockdep_on();
+ if (unlikely(rc)) {
+ CERROR("failed to link key %08x to keyring %08x: %d\n",
+ key->serial,
+ key_tgcred(current)->session_keyring->serial, rc);
+ RETURN(rc);
+ }
+
+ CDEBUG(D_SEC, "key %p instantiated, ctx %p\n", key, key->payload.data);
+ RETURN(0);
+}
+
+/*
+ * called with key semaphore write locked. it means we can operate
+ * on the context without fear of loosing refcount.
+ */
+static
+int gss_kt_update(struct key *key, const void *data, size_t datalen)
+{
+ struct ptlrpc_cli_ctx *ctx = key->payload.data;
+ struct gss_cli_ctx *gctx;
+ rawobj_t tmpobj = RAWOBJ_EMPTY;
+ __u32 datalen32 = (__u32) datalen;
+ int rc;
+ ENTRY;
+
+ if (data == NULL || datalen == 0) {
+ CWARN("invalid: data %p, len %lu\n", data, (long)datalen);
+ RETURN(-EINVAL);
+ }
+
+ /* if upcall finished negotiation too fast (mostly likely because
+ * of local error happened) and call kt_update(), the ctx
+ * might be still NULL. but the key will finally be associate
+ * with a context, or be revoked. if key status is fine, return
+ * -EAGAIN to allow userspace sleep a while and call again. */
+ if (ctx == NULL) {
+ CDEBUG(D_SEC, "update too soon: key %p(%x) flags %lx\n",
+ key, key->serial, key->flags);
+
+ rc = key_validate(key);
+ if (rc == 0)
+ RETURN(-EAGAIN);
+ else
+ RETURN(rc);
+ }
+
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+ LASSERT(ctx->cc_sec);
+
+ ctx_clear_timer_kr(ctx);
+
+ /* don't proceed if already refreshed */
+ if (cli_ctx_is_refreshed(ctx)) {
+ CWARN("ctx already done refresh\n");
+ RETURN(0);
+ }
+
+ sptlrpc_cli_ctx_get(ctx);
+ gctx = ctx2gctx(ctx);
+
+ rc = buffer_extract_bytes(&data, &datalen32, &gctx->gc_win,
+ sizeof(gctx->gc_win));
+ if (rc) {
+ CERROR("failed extract seq_win\n");
+ goto out;
+ }
+
+ if (gctx->gc_win == 0) {
+ __u32 nego_rpc_err, nego_gss_err;
+
+ rc = buffer_extract_bytes(&data, &datalen32, &nego_rpc_err,
+ sizeof(nego_rpc_err));
+ if (rc) {
+ CERROR("failed to extrace rpc rc\n");
+ goto out;
+ }
+
+ rc = buffer_extract_bytes(&data, &datalen32, &nego_gss_err,
+ sizeof(nego_gss_err));
+ if (rc) {
+ CERROR("failed to extrace gss rc\n");
+ goto out;
+ }
+
+ CERROR("negotiation: rpc err %d, gss err %x\n",
+ nego_rpc_err, nego_gss_err);
+
+ rc = nego_rpc_err ? nego_rpc_err : -EACCES;
+ } else {
+ rc = rawobj_extract_local_alloc(&gctx->gc_handle,
+ (__u32 **) &data, &datalen32);
+ if (rc) {
+ CERROR("failed extract handle\n");
+ goto out;
+ }
+
+ rc = rawobj_extract_local(&tmpobj, (__u32 **) &data,&datalen32);
+ if (rc) {
+ CERROR("failed extract mech\n");
+ goto out;
+ }
+
+ rc = lgss_import_sec_context(&tmpobj,
+ sec2gsec(ctx->cc_sec)->gs_mech,
+ &gctx->gc_mechctx);
+ if (rc != GSS_S_COMPLETE)
+ CERROR("failed import context\n");
+ else
+ rc = 0;
+ }
+out:
+ /* we don't care what current status of this ctx, even someone else
+ * is operating on the ctx at the same time. we just add up our own
+ * opinions here. */
+ if (rc == 0) {
+ gss_cli_ctx_uptodate(gctx);
+ } else {
+ /* this will also revoke the key. has to be done before
+ * wakeup waiters otherwise they can find the stale key */
+ kill_key_locked(key);
+
+ cli_ctx_expire(ctx);
+
+ if (rc != -ERESTART)
+ set_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags);
+ }
+
+ /* let user space think it's a success */
+ sptlrpc_cli_ctx_put(ctx, 1);
+ RETURN(0);
+}
+
+static
+int gss_kt_match(const struct key *key, const void *desc)
+{
+ return (strcmp(key->description, (const char *) desc) == 0);
+}
+
+static
+void gss_kt_destroy(struct key *key)
+{
+ ENTRY;
+ LASSERT(key->payload.data == NULL);
+ CDEBUG(D_SEC, "destroy key %p\n", key);
+ EXIT;
+}
+
+static
+void gss_kt_describe(const struct key *key, struct seq_file *s)
+{
+ if (key->description == NULL)
+ seq_puts(s, "[null]");
+ else
+ seq_puts(s, key->description);
+}
+
+static struct key_type gss_key_type =
+{
+ .name = "lgssc",
+ .def_datalen = 0,
+ .instantiate = gss_kt_instantiate,
+ .update = gss_kt_update,
+ .match = gss_kt_match,
+ .destroy = gss_kt_destroy,
+ .describe = gss_kt_describe,
+};
+
+/****************************************
+ * lustre gss keyring policy *
+ ****************************************/
+
+static struct ptlrpc_ctx_ops gss_keyring_ctxops = {
+ .match = gss_cli_ctx_match,
+ .refresh = gss_cli_ctx_refresh_kr,
+ .validate = gss_cli_ctx_validate_kr,
+ .die = gss_cli_ctx_die_kr,
+ .sign = gss_cli_ctx_sign,
+ .verify = gss_cli_ctx_verify,
+ .seal = gss_cli_ctx_seal,
+ .unseal = gss_cli_ctx_unseal,
+ .wrap_bulk = gss_cli_ctx_wrap_bulk,
+ .unwrap_bulk = gss_cli_ctx_unwrap_bulk,
+};
+
+static struct ptlrpc_sec_cops gss_sec_keyring_cops = {
+ .create_sec = gss_sec_create_kr,
+ .destroy_sec = gss_sec_destroy_kr,
+ .kill_sec = gss_sec_kill,
+ .lookup_ctx = gss_sec_lookup_ctx_kr,
+ .release_ctx = gss_sec_release_ctx_kr,
+ .flush_ctx_cache = gss_sec_flush_ctx_cache_kr,
+ .gc_ctx = gss_sec_gc_ctx_kr,
+ .install_rctx = gss_sec_install_rctx,
+ .alloc_reqbuf = gss_alloc_reqbuf,
+ .free_reqbuf = gss_free_reqbuf,
+ .alloc_repbuf = gss_alloc_repbuf,
+ .free_repbuf = gss_free_repbuf,
+ .enlarge_reqbuf = gss_enlarge_reqbuf,
+ .display = gss_sec_display_kr,
+};
+
+static struct ptlrpc_sec_sops gss_sec_keyring_sops = {
+ .accept = gss_svc_accept_kr,
+ .invalidate_ctx = gss_svc_invalidate_ctx,
+ .alloc_rs = gss_svc_alloc_rs,
+ .authorize = gss_svc_authorize,
+ .free_rs = gss_svc_free_rs,
+ .free_ctx = gss_svc_free_ctx,
+ .prep_bulk = gss_svc_prep_bulk,
+ .unwrap_bulk = gss_svc_unwrap_bulk,
+ .wrap_bulk = gss_svc_wrap_bulk,
+ .install_rctx = gss_svc_install_rctx_kr,
+};
+
+static struct ptlrpc_sec_policy gss_policy_keyring = {
+ .sp_owner = THIS_MODULE,
+ .sp_name = "gss.keyring",
+ .sp_policy = SPTLRPC_POLICY_GSS,
+ .sp_cops = &gss_sec_keyring_cops,
+ .sp_sops = &gss_sec_keyring_sops,
+};
+
+
+int __init gss_init_keyring(void)
+{
+ int rc;
+
+ rc = register_key_type(&gss_key_type);
+ if (rc) {
+ CERROR("failed to register keyring type: %d\n", rc);
+ return rc;
+ }
+
+ rc = sptlrpc_register_policy(&gss_policy_keyring);
+ if (rc) {
+ unregister_key_type(&gss_key_type);
+ return rc;
+ }
+
+ return 0;
+}
+
+void __exit gss_exit_keyring(void)
+{
+ unregister_key_type(&gss_key_type);
+ sptlrpc_unregister_policy(&gss_policy_keyring);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_krb5.h b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_krb5.h
new file mode 100644
index 000000000000..676d4b96311a
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_krb5.h
@@ -0,0 +1,163 @@
+/*
+ * Modifications for Lustre
+ *
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+/*
+ * linux/include/linux/sunrpc/gss_krb5_types.h
+ *
+ * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h,
+ * lib/gssapi/krb5/gssapiP_krb5.h, and others
+ *
+ * Copyright (c) 2000 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Andy Adamson <andros@umich.edu>
+ * Bruce Fields <bfields@umich.edu>
+ */
+
+/*
+ * Copyright 1995 by the Massachusetts Institute of Technology.
+ * All Rights Reserved.
+ *
+ * Export of this software from the United States of America may
+ * require a specific license from the United States Government.
+ * It is the responsibility of any person or organization contemplating
+ * export to obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of M.I.T. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission. Furthermore if you modify this software you must label
+ * your software as modified software and not distribute it in such a
+ * fashion that it might be confused with the original M.I.T. software.
+ * M.I.T. makes no representations about the suitability of
+ * this software for any purpose. It is provided "as is" without express
+ * or implied warranty.
+ *
+ */
+
+#ifndef PTLRPC_GSS_KRB5_H
+#define PTLRPC_GSS_KRB5_H
+
+/*
+ * RFC 4142
+ */
+
+#define KG_USAGE_ACCEPTOR_SEAL 22
+#define KG_USAGE_ACCEPTOR_SIGN 23
+#define KG_USAGE_INITIATOR_SEAL 24
+#define KG_USAGE_INITIATOR_SIGN 25
+
+#define KG_TOK_MIC_MSG 0x0404
+#define KG_TOK_WRAP_MSG 0x0504
+
+#define FLAG_SENDER_IS_ACCEPTOR 0x01
+#define FLAG_WRAP_CONFIDENTIAL 0x02
+#define FLAG_ACCEPTOR_SUBKEY 0x04
+
+struct krb5_header {
+ __u16 kh_tok_id; /* token id */
+ __u8 kh_flags; /* acceptor flags */
+ __u8 kh_filler; /* 0xff */
+ __u16 kh_ec; /* extra count */
+ __u16 kh_rrc; /* right rotation count */
+ __u64 kh_seq; /* sequence number */
+ __u8 kh_cksum[0]; /* checksum */
+};
+
+struct krb5_keyblock {
+ rawobj_t kb_key;
+ struct ll_crypto_cipher *kb_tfm;
+};
+
+struct krb5_ctx {
+ unsigned int kc_initiate:1,
+ kc_cfx:1,
+ kc_seed_init:1,
+ kc_have_acceptor_subkey:1;
+ __s32 kc_endtime;
+ __u8 kc_seed[16];
+ __u64 kc_seq_send;
+ __u64 kc_seq_recv;
+ __u32 kc_enctype;
+ struct krb5_keyblock kc_keye; /* encryption */
+ struct krb5_keyblock kc_keyi; /* integrity */
+ struct krb5_keyblock kc_keyc; /* checksum */
+ rawobj_t kc_mech_used;
+};
+
+enum sgn_alg {
+ SGN_ALG_DES_MAC_MD5 = 0x0000,
+ SGN_ALG_MD2_5 = 0x0001,
+ SGN_ALG_DES_MAC = 0x0002,
+ SGN_ALG_3 = 0x0003, /* not published */
+ SGN_ALG_HMAC_MD5 = 0x0011, /* microsoft w2k; no support */
+ SGN_ALG_HMAC_SHA1_DES3_KD = 0x0004
+};
+
+enum seal_alg {
+ SEAL_ALG_NONE = 0xffff,
+ SEAL_ALG_DES = 0x0000,
+ SEAL_ALG_1 = 0x0001, /* not published */
+ SEAL_ALG_MICROSOFT_RC4 = 0x0010, /* microsoft w2k; no support */
+ SEAL_ALG_DES3KD = 0x0002
+};
+
+#define CKSUMTYPE_CRC32 0x0001
+#define CKSUMTYPE_RSA_MD4 0x0002
+#define CKSUMTYPE_RSA_MD4_DES 0x0003
+#define CKSUMTYPE_DESCBC 0x0004
+/* des-mac-k */
+/* rsa-md4-des-k */
+#define CKSUMTYPE_RSA_MD5 0x0007
+#define CKSUMTYPE_RSA_MD5_DES 0x0008
+#define CKSUMTYPE_NIST_SHA 0x0009
+#define CKSUMTYPE_HMAC_SHA1_DES3 0x000c
+#define CKSUMTYPE_HMAC_SHA1_96_AES128 0x000f
+#define CKSUMTYPE_HMAC_SHA1_96_AES256 0x0010
+#define CKSUMTYPE_HMAC_MD5_ARCFOUR -138
+
+/* from gssapi_err_krb5.h */
+#define KG_CCACHE_NOMATCH (39756032L)
+#define KG_KEYTAB_NOMATCH (39756033L)
+#define KG_TGT_MISSING (39756034L)
+#define KG_NO_SUBKEY (39756035L)
+#define KG_CONTEXT_ESTABLISHED (39756036L)
+#define KG_BAD_SIGN_TYPE (39756037L)
+#define KG_BAD_LENGTH (39756038L)
+#define KG_CTX_INCOMPLETE (39756039L)
+#define KG_CONTEXT (39756040L)
+#define KG_CRED (39756041L)
+#define KG_ENC_DESC (39756042L)
+#define KG_BAD_SEQ (39756043L)
+#define KG_EMPTY_CCACHE (39756044L)
+#define KG_NO_CTYPES (39756045L)
+
+/* per Kerberos v5 protocol spec crypto types from the wire.
+ * these get mapped to linux kernel crypto routines.
+ */
+#define ENCTYPE_NULL 0x0000
+#define ENCTYPE_DES_CBC_CRC 0x0001 /* DES cbc mode with CRC-32 */
+#define ENCTYPE_DES_CBC_MD4 0x0002 /* DES cbc mode with RSA-MD4 */
+#define ENCTYPE_DES_CBC_MD5 0x0003 /* DES cbc mode with RSA-MD5 */
+#define ENCTYPE_DES_CBC_RAW 0x0004 /* DES cbc mode raw */
+/* XXX deprecated? */
+#define ENCTYPE_DES3_CBC_SHA 0x0005 /* DES-3 cbc mode with NIST-SHA */
+#define ENCTYPE_DES3_CBC_RAW 0x0006 /* DES-3 cbc mode raw */
+#define ENCTYPE_DES_HMAC_SHA1 0x0008
+#define ENCTYPE_DES3_CBC_SHA1 0x0010
+#define ENCTYPE_AES128_CTS_HMAC_SHA1_96 0x0011
+#define ENCTYPE_AES256_CTS_HMAC_SHA1_96 0x0012
+#define ENCTYPE_ARCFOUR_HMAC 0x0017
+#define ENCTYPE_ARCFOUR_HMAC_EXP 0x0018
+#define ENCTYPE_UNKNOWN 0x01ff
+
+#endif /* PTLRPC_GSS_KRB5_H */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_krb5_mech.c b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_krb5_mech.c
new file mode 100644
index 000000000000..4b28931bbc96
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_krb5_mech.c
@@ -0,0 +1,1786 @@
+/*
+ * Modifications for Lustre
+ *
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+/*
+ * linux/net/sunrpc/gss_krb5_mech.c
+ * linux/net/sunrpc/gss_krb5_crypto.c
+ * linux/net/sunrpc/gss_krb5_seal.c
+ * linux/net/sunrpc/gss_krb5_seqnum.c
+ * linux/net/sunrpc/gss_krb5_unseal.c
+ *
+ * Copyright (c) 2001 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Andy Adamson <andros@umich.edu>
+ * J. Bruce Fields <bfields@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/crypto.h>
+#include <linux/mutex.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_sec.h>
+
+#include "gss_err.h"
+#include "gss_internal.h"
+#include "gss_api.h"
+#include "gss_asn1.h"
+#include "gss_krb5.h"
+
+static spinlock_t krb5_seq_lock;
+
+struct krb5_enctype {
+ char *ke_dispname;
+ char *ke_enc_name; /* linux tfm name */
+ char *ke_hash_name; /* linux tfm name */
+ int ke_enc_mode; /* linux tfm mode */
+ int ke_hash_size; /* checksum size */
+ int ke_conf_size; /* confounder size */
+ unsigned int ke_hash_hmac:1; /* is hmac? */
+};
+
+/*
+ * NOTE: for aes128-cts and aes256-cts, MIT implementation use CTS encryption.
+ * but currently we simply CBC with padding, because linux doesn't support CTS
+ * yet. this need to be fixed in the future.
+ */
+static struct krb5_enctype enctypes[] = {
+ [ENCTYPE_DES_CBC_RAW] = { /* des-cbc-md5 */
+ "des-cbc-md5",
+ "cbc(des)",
+ "md5",
+ 0,
+ 16,
+ 8,
+ 0,
+ },
+ [ENCTYPE_DES3_CBC_RAW] = { /* des3-hmac-sha1 */
+ "des3-hmac-sha1",
+ "cbc(des3_ede)",
+ "hmac(sha1)",
+ 0,
+ 20,
+ 8,
+ 1,
+ },
+ [ENCTYPE_AES128_CTS_HMAC_SHA1_96] = { /* aes128-cts */
+ "aes128-cts-hmac-sha1-96",
+ "cbc(aes)",
+ "hmac(sha1)",
+ 0,
+ 12,
+ 16,
+ 1,
+ },
+ [ENCTYPE_AES256_CTS_HMAC_SHA1_96] = { /* aes256-cts */
+ "aes256-cts-hmac-sha1-96",
+ "cbc(aes)",
+ "hmac(sha1)",
+ 0,
+ 12,
+ 16,
+ 1,
+ },
+ [ENCTYPE_ARCFOUR_HMAC] = { /* arcfour-hmac-md5 */
+ "arcfour-hmac-md5",
+ "ecb(arc4)",
+ "hmac(md5)",
+ 0,
+ 16,
+ 8,
+ 1,
+ },
+};
+
+#define MAX_ENCTYPES sizeof(enctypes)/sizeof(struct krb5_enctype)
+
+static const char * enctype2str(__u32 enctype)
+{
+ if (enctype < MAX_ENCTYPES && enctypes[enctype].ke_dispname)
+ return enctypes[enctype].ke_dispname;
+
+ return "unknown";
+}
+
+static
+int keyblock_init(struct krb5_keyblock *kb, char *alg_name, int alg_mode)
+{
+ kb->kb_tfm = ll_crypto_alloc_blkcipher(alg_name, alg_mode, 0);
+ if (IS_ERR(kb->kb_tfm)) {
+ CERROR("failed to alloc tfm: %s, mode %d\n",
+ alg_name, alg_mode);
+ return -1;
+ }
+
+ if (ll_crypto_blkcipher_setkey(kb->kb_tfm, kb->kb_key.data, kb->kb_key.len)) {
+ CERROR("failed to set %s key, len %d\n",
+ alg_name, kb->kb_key.len);
+ return -1;
+ }
+
+ return 0;
+}
+
+static
+int krb5_init_keys(struct krb5_ctx *kctx)
+{
+ struct krb5_enctype *ke;
+
+ if (kctx->kc_enctype >= MAX_ENCTYPES ||
+ enctypes[kctx->kc_enctype].ke_hash_size == 0) {
+ CERROR("unsupported enctype %x\n", kctx->kc_enctype);
+ return -1;
+ }
+
+ ke = &enctypes[kctx->kc_enctype];
+
+ /* tfm arc4 is stateful, user should alloc-use-free by his own */
+ if (kctx->kc_enctype != ENCTYPE_ARCFOUR_HMAC &&
+ keyblock_init(&kctx->kc_keye, ke->ke_enc_name, ke->ke_enc_mode))
+ return -1;
+
+ /* tfm hmac is stateful, user should alloc-use-free by his own */
+ if (ke->ke_hash_hmac == 0 &&
+ keyblock_init(&kctx->kc_keyi, ke->ke_enc_name, ke->ke_enc_mode))
+ return -1;
+ if (ke->ke_hash_hmac == 0 &&
+ keyblock_init(&kctx->kc_keyc, ke->ke_enc_name, ke->ke_enc_mode))
+ return -1;
+
+ return 0;
+}
+
+static
+void keyblock_free(struct krb5_keyblock *kb)
+{
+ rawobj_free(&kb->kb_key);
+ if (kb->kb_tfm)
+ ll_crypto_free_blkcipher(kb->kb_tfm);
+}
+
+static
+int keyblock_dup(struct krb5_keyblock *new, struct krb5_keyblock *kb)
+{
+ return rawobj_dup(&new->kb_key, &kb->kb_key);
+}
+
+static
+int get_bytes(char **ptr, const char *end, void *res, int len)
+{
+ char *p, *q;
+ p = *ptr;
+ q = p + len;
+ if (q > end || q < p)
+ return -1;
+ memcpy(res, p, len);
+ *ptr = q;
+ return 0;
+}
+
+static
+int get_rawobj(char **ptr, const char *end, rawobj_t *res)
+{
+ char *p, *q;
+ __u32 len;
+
+ p = *ptr;
+ if (get_bytes(&p, end, &len, sizeof(len)))
+ return -1;
+
+ q = p + len;
+ if (q > end || q < p)
+ return -1;
+
+ OBD_ALLOC_LARGE(res->data, len);
+ if (!res->data)
+ return -1;
+
+ res->len = len;
+ memcpy(res->data, p, len);
+ *ptr = q;
+ return 0;
+}
+
+static
+int get_keyblock(char **ptr, const char *end,
+ struct krb5_keyblock *kb, __u32 keysize)
+{
+ char *buf;
+
+ OBD_ALLOC_LARGE(buf, keysize);
+ if (buf == NULL)
+ return -1;
+
+ if (get_bytes(ptr, end, buf, keysize)) {
+ OBD_FREE_LARGE(buf, keysize);
+ return -1;
+ }
+
+ kb->kb_key.len = keysize;
+ kb->kb_key.data = buf;
+ return 0;
+}
+
+static
+void delete_context_kerberos(struct krb5_ctx *kctx)
+{
+ rawobj_free(&kctx->kc_mech_used);
+
+ keyblock_free(&kctx->kc_keye);
+ keyblock_free(&kctx->kc_keyi);
+ keyblock_free(&kctx->kc_keyc);
+}
+
+static
+__u32 import_context_rfc1964(struct krb5_ctx *kctx, char *p, char *end)
+{
+ unsigned int tmp_uint, keysize;
+
+ /* seed_init flag */
+ if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint)))
+ goto out_err;
+ kctx->kc_seed_init = (tmp_uint != 0);
+
+ /* seed */
+ if (get_bytes(&p, end, kctx->kc_seed, sizeof(kctx->kc_seed)))
+ goto out_err;
+
+ /* sign/seal algorithm, not really used now */
+ if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint)) ||
+ get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint)))
+ goto out_err;
+
+ /* end time */
+ if (get_bytes(&p, end, &kctx->kc_endtime, sizeof(kctx->kc_endtime)))
+ goto out_err;
+
+ /* seq send */
+ if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint)))
+ goto out_err;
+ kctx->kc_seq_send = tmp_uint;
+
+ /* mech oid */
+ if (get_rawobj(&p, end, &kctx->kc_mech_used))
+ goto out_err;
+
+ /* old style enc/seq keys in format:
+ * - enctype (u32)
+ * - keysize (u32)
+ * - keydata
+ * we decompose them to fit into the new context
+ */
+
+ /* enc key */
+ if (get_bytes(&p, end, &kctx->kc_enctype, sizeof(kctx->kc_enctype)))
+ goto out_err;
+
+ if (get_bytes(&p, end, &keysize, sizeof(keysize)))
+ goto out_err;
+
+ if (get_keyblock(&p, end, &kctx->kc_keye, keysize))
+ goto out_err;
+
+ /* seq key */
+ if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint)) ||
+ tmp_uint != kctx->kc_enctype)
+ goto out_err;
+
+ if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint)) ||
+ tmp_uint != keysize)
+ goto out_err;
+
+ if (get_keyblock(&p, end, &kctx->kc_keyc, keysize))
+ goto out_err;
+
+ /* old style fallback */
+ if (keyblock_dup(&kctx->kc_keyi, &kctx->kc_keyc))
+ goto out_err;
+
+ if (p != end)
+ goto out_err;
+
+ CDEBUG(D_SEC, "succesfully imported rfc1964 context\n");
+ return 0;
+out_err:
+ return GSS_S_FAILURE;
+}
+
+/* Flags for version 2 context flags */
+#define KRB5_CTX_FLAG_INITIATOR 0x00000001
+#define KRB5_CTX_FLAG_CFX 0x00000002
+#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY 0x00000004
+
+static
+__u32 import_context_rfc4121(struct krb5_ctx *kctx, char *p, char *end)
+{
+ unsigned int tmp_uint, keysize;
+
+ /* end time */
+ if (get_bytes(&p, end, &kctx->kc_endtime, sizeof(kctx->kc_endtime)))
+ goto out_err;
+
+ /* flags */
+ if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint)))
+ goto out_err;
+
+ if (tmp_uint & KRB5_CTX_FLAG_INITIATOR)
+ kctx->kc_initiate = 1;
+ if (tmp_uint & KRB5_CTX_FLAG_CFX)
+ kctx->kc_cfx = 1;
+ if (tmp_uint & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY)
+ kctx->kc_have_acceptor_subkey = 1;
+
+ /* seq send */
+ if (get_bytes(&p, end, &kctx->kc_seq_send, sizeof(kctx->kc_seq_send)))
+ goto out_err;
+
+ /* enctype */
+ if (get_bytes(&p, end, &kctx->kc_enctype, sizeof(kctx->kc_enctype)))
+ goto out_err;
+
+ /* size of each key */
+ if (get_bytes(&p, end, &keysize, sizeof(keysize)))
+ goto out_err;
+
+ /* number of keys - should always be 3 */
+ if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint)))
+ goto out_err;
+
+ if (tmp_uint != 3) {
+ CERROR("Invalid number of keys: %u\n", tmp_uint);
+ goto out_err;
+ }
+
+ /* ke */
+ if (get_keyblock(&p, end, &kctx->kc_keye, keysize))
+ goto out_err;
+ /* ki */
+ if (get_keyblock(&p, end, &kctx->kc_keyi, keysize))
+ goto out_err;
+ /* ki */
+ if (get_keyblock(&p, end, &kctx->kc_keyc, keysize))
+ goto out_err;
+
+ CDEBUG(D_SEC, "succesfully imported v2 context\n");
+ return 0;
+out_err:
+ return GSS_S_FAILURE;
+}
+
+/*
+ * The whole purpose here is trying to keep user level gss context parsing
+ * from nfs-utils unchanged as possible as we can, they are not quite mature
+ * yet, and many stuff still not clear, like heimdal etc.
+ */
+static
+__u32 gss_import_sec_context_kerberos(rawobj_t *inbuf,
+ struct gss_ctx *gctx)
+{
+ struct krb5_ctx *kctx;
+ char *p = (char *) inbuf->data;
+ char *end = (char *) (inbuf->data + inbuf->len);
+ unsigned int tmp_uint, rc;
+
+ if (get_bytes(&p, end, &tmp_uint, sizeof(tmp_uint))) {
+ CERROR("Fail to read version\n");
+ return GSS_S_FAILURE;
+ }
+
+ /* only support 0, 1 for the moment */
+ if (tmp_uint > 2) {
+ CERROR("Invalid version %u\n", tmp_uint);
+ return GSS_S_FAILURE;
+ }
+
+ OBD_ALLOC_PTR(kctx);
+ if (!kctx)
+ return GSS_S_FAILURE;
+
+ if (tmp_uint == 0 || tmp_uint == 1) {
+ kctx->kc_initiate = tmp_uint;
+ rc = import_context_rfc1964(kctx, p, end);
+ } else {
+ rc = import_context_rfc4121(kctx, p, end);
+ }
+
+ if (rc == 0)
+ rc = krb5_init_keys(kctx);
+
+ if (rc) {
+ delete_context_kerberos(kctx);
+ OBD_FREE_PTR(kctx);
+
+ return GSS_S_FAILURE;
+ }
+
+ gctx->internal_ctx_id = kctx;
+ return GSS_S_COMPLETE;
+}
+
+static
+__u32 gss_copy_reverse_context_kerberos(struct gss_ctx *gctx,
+ struct gss_ctx *gctx_new)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+ struct krb5_ctx *knew;
+
+ OBD_ALLOC_PTR(knew);
+ if (!knew)
+ return GSS_S_FAILURE;
+
+ knew->kc_initiate = kctx->kc_initiate ? 0 : 1;
+ knew->kc_cfx = kctx->kc_cfx;
+ knew->kc_seed_init = kctx->kc_seed_init;
+ knew->kc_have_acceptor_subkey = kctx->kc_have_acceptor_subkey;
+ knew->kc_endtime = kctx->kc_endtime;
+
+ memcpy(knew->kc_seed, kctx->kc_seed, sizeof(kctx->kc_seed));
+ knew->kc_seq_send = kctx->kc_seq_recv;
+ knew->kc_seq_recv = kctx->kc_seq_send;
+ knew->kc_enctype = kctx->kc_enctype;
+
+ if (rawobj_dup(&knew->kc_mech_used, &kctx->kc_mech_used))
+ goto out_err;
+
+ if (keyblock_dup(&knew->kc_keye, &kctx->kc_keye))
+ goto out_err;
+ if (keyblock_dup(&knew->kc_keyi, &kctx->kc_keyi))
+ goto out_err;
+ if (keyblock_dup(&knew->kc_keyc, &kctx->kc_keyc))
+ goto out_err;
+ if (krb5_init_keys(knew))
+ goto out_err;
+
+ gctx_new->internal_ctx_id = knew;
+ CDEBUG(D_SEC, "succesfully copied reverse context\n");
+ return GSS_S_COMPLETE;
+
+out_err:
+ delete_context_kerberos(knew);
+ OBD_FREE_PTR(knew);
+ return GSS_S_FAILURE;
+}
+
+static
+__u32 gss_inquire_context_kerberos(struct gss_ctx *gctx,
+ unsigned long *endtime)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+
+ *endtime = (unsigned long) ((__u32) kctx->kc_endtime);
+ return GSS_S_COMPLETE;
+}
+
+static
+void gss_delete_sec_context_kerberos(void *internal_ctx)
+{
+ struct krb5_ctx *kctx = internal_ctx;
+
+ delete_context_kerberos(kctx);
+ OBD_FREE_PTR(kctx);
+}
+
+static
+void buf_to_sg(struct scatterlist *sg, void *ptr, int len)
+{
+ sg_set_buf(sg, ptr, len);
+}
+
+static
+__u32 krb5_encrypt(struct ll_crypto_cipher *tfm,
+ int decrypt,
+ void * iv,
+ void * in,
+ void * out,
+ int length)
+{
+ struct blkcipher_desc desc;
+ struct scatterlist sg;
+ __u8 local_iv[16] = {0};
+ __u32 ret = -EINVAL;
+
+ LASSERT(tfm);
+ desc.tfm = tfm;
+ desc.info = local_iv;
+ desc.flags= 0;
+
+ if (length % ll_crypto_blkcipher_blocksize(tfm) != 0) {
+ CERROR("output length %d mismatch blocksize %d\n",
+ length, ll_crypto_blkcipher_blocksize(tfm));
+ goto out;
+ }
+
+ if (ll_crypto_blkcipher_ivsize(tfm) > 16) {
+ CERROR("iv size too large %d\n", ll_crypto_blkcipher_ivsize(tfm));
+ goto out;
+ }
+
+ if (iv)
+ memcpy(local_iv, iv, ll_crypto_blkcipher_ivsize(tfm));
+
+ memcpy(out, in, length);
+ buf_to_sg(&sg, out, length);
+
+ if (decrypt)
+ ret = ll_crypto_blkcipher_decrypt_iv(&desc, &sg, &sg, length);
+ else
+ ret = ll_crypto_blkcipher_encrypt_iv(&desc, &sg, &sg, length);
+
+out:
+ return(ret);
+}
+
+
+static inline
+int krb5_digest_hmac(struct ll_crypto_hash *tfm,
+ rawobj_t *key,
+ struct krb5_header *khdr,
+ int msgcnt, rawobj_t *msgs,
+ int iovcnt, lnet_kiov_t *iovs,
+ rawobj_t *cksum)
+{
+ struct hash_desc desc;
+ struct scatterlist sg[1];
+ int i;
+
+ ll_crypto_hash_setkey(tfm, key->data, key->len);
+ desc.tfm = tfm;
+ desc.flags= 0;
+
+ ll_crypto_hash_init(&desc);
+
+ for (i = 0; i < msgcnt; i++) {
+ if (msgs[i].len == 0)
+ continue;
+ buf_to_sg(sg, (char *) msgs[i].data, msgs[i].len);
+ ll_crypto_hash_update(&desc, sg, msgs[i].len);
+ }
+
+ for (i = 0; i < iovcnt; i++) {
+ if (iovs[i].kiov_len == 0)
+ continue;
+
+ sg_set_page(&sg[0], iovs[i].kiov_page, iovs[i].kiov_len,
+ iovs[i].kiov_offset);
+ ll_crypto_hash_update(&desc, sg, iovs[i].kiov_len);
+ }
+
+ if (khdr) {
+ buf_to_sg(sg, (char *) khdr, sizeof(*khdr));
+ ll_crypto_hash_update(&desc, sg, sizeof(*khdr));
+ }
+
+ return ll_crypto_hash_final(&desc, cksum->data);
+}
+
+
+static inline
+int krb5_digest_norm(struct ll_crypto_hash *tfm,
+ struct krb5_keyblock *kb,
+ struct krb5_header *khdr,
+ int msgcnt, rawobj_t *msgs,
+ int iovcnt, lnet_kiov_t *iovs,
+ rawobj_t *cksum)
+{
+ struct hash_desc desc;
+ struct scatterlist sg[1];
+ int i;
+
+ LASSERT(kb->kb_tfm);
+ desc.tfm = tfm;
+ desc.flags= 0;
+
+ ll_crypto_hash_init(&desc);
+
+ for (i = 0; i < msgcnt; i++) {
+ if (msgs[i].len == 0)
+ continue;
+ buf_to_sg(sg, (char *) msgs[i].data, msgs[i].len);
+ ll_crypto_hash_update(&desc, sg, msgs[i].len);
+ }
+
+ for (i = 0; i < iovcnt; i++) {
+ if (iovs[i].kiov_len == 0)
+ continue;
+
+ sg_set_page(&sg[0], iovs[i].kiov_page, iovs[i].kiov_len,
+ iovs[i].kiov_offset);
+ ll_crypto_hash_update(&desc, sg, iovs[i].kiov_len);
+ }
+
+ if (khdr) {
+ buf_to_sg(sg, (char *) khdr, sizeof(*khdr));
+ ll_crypto_hash_update(&desc, sg, sizeof(*khdr));
+ }
+
+ ll_crypto_hash_final(&desc, cksum->data);
+
+ return krb5_encrypt(kb->kb_tfm, 0, NULL, cksum->data,
+ cksum->data, cksum->len);
+}
+
+/*
+ * compute (keyed/keyless) checksum against the plain text which appended
+ * with krb5 wire token header.
+ */
+static
+__s32 krb5_make_checksum(__u32 enctype,
+ struct krb5_keyblock *kb,
+ struct krb5_header *khdr,
+ int msgcnt, rawobj_t *msgs,
+ int iovcnt, lnet_kiov_t *iovs,
+ rawobj_t *cksum)
+{
+ struct krb5_enctype *ke = &enctypes[enctype];
+ struct ll_crypto_hash *tfm;
+ __u32 code = GSS_S_FAILURE;
+ int rc;
+
+ if (!(tfm = ll_crypto_alloc_hash(ke->ke_hash_name, 0, 0))) {
+ CERROR("failed to alloc TFM: %s\n", ke->ke_hash_name);
+ return GSS_S_FAILURE;
+ }
+
+ cksum->len = ll_crypto_hash_digestsize(tfm);
+ OBD_ALLOC_LARGE(cksum->data, cksum->len);
+ if (!cksum->data) {
+ cksum->len = 0;
+ goto out_tfm;
+ }
+
+ if (ke->ke_hash_hmac)
+ rc = krb5_digest_hmac(tfm, &kb->kb_key,
+ khdr, msgcnt, msgs, iovcnt, iovs, cksum);
+ else
+ rc = krb5_digest_norm(tfm, kb,
+ khdr, msgcnt, msgs, iovcnt, iovs, cksum);
+
+ if (rc == 0)
+ code = GSS_S_COMPLETE;
+out_tfm:
+ ll_crypto_free_hash(tfm);
+ return code;
+}
+
+static void fill_krb5_header(struct krb5_ctx *kctx,
+ struct krb5_header *khdr,
+ int privacy)
+{
+ unsigned char acceptor_flag;
+
+ acceptor_flag = kctx->kc_initiate ? 0 : FLAG_SENDER_IS_ACCEPTOR;
+
+ if (privacy) {
+ khdr->kh_tok_id = cpu_to_be16(KG_TOK_WRAP_MSG);
+ khdr->kh_flags = acceptor_flag | FLAG_WRAP_CONFIDENTIAL;
+ khdr->kh_ec = cpu_to_be16(0);
+ khdr->kh_rrc = cpu_to_be16(0);
+ } else {
+ khdr->kh_tok_id = cpu_to_be16(KG_TOK_MIC_MSG);
+ khdr->kh_flags = acceptor_flag;
+ khdr->kh_ec = cpu_to_be16(0xffff);
+ khdr->kh_rrc = cpu_to_be16(0xffff);
+ }
+
+ khdr->kh_filler = 0xff;
+ spin_lock(&krb5_seq_lock);
+ khdr->kh_seq = cpu_to_be64(kctx->kc_seq_send++);
+ spin_unlock(&krb5_seq_lock);
+}
+
+static __u32 verify_krb5_header(struct krb5_ctx *kctx,
+ struct krb5_header *khdr,
+ int privacy)
+{
+ unsigned char acceptor_flag;
+ __u16 tok_id, ec_rrc;
+
+ acceptor_flag = kctx->kc_initiate ? FLAG_SENDER_IS_ACCEPTOR : 0;
+
+ if (privacy) {
+ tok_id = KG_TOK_WRAP_MSG;
+ ec_rrc = 0x0;
+ } else {
+ tok_id = KG_TOK_MIC_MSG;
+ ec_rrc = 0xffff;
+ }
+
+ /* sanity checks */
+ if (be16_to_cpu(khdr->kh_tok_id) != tok_id) {
+ CERROR("bad token id\n");
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+ if ((khdr->kh_flags & FLAG_SENDER_IS_ACCEPTOR) != acceptor_flag) {
+ CERROR("bad direction flag\n");
+ return GSS_S_BAD_SIG;
+ }
+ if (privacy && (khdr->kh_flags & FLAG_WRAP_CONFIDENTIAL) == 0) {
+ CERROR("missing confidential flag\n");
+ return GSS_S_BAD_SIG;
+ }
+ if (khdr->kh_filler != 0xff) {
+ CERROR("bad filler\n");
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+ if (be16_to_cpu(khdr->kh_ec) != ec_rrc ||
+ be16_to_cpu(khdr->kh_rrc) != ec_rrc) {
+ CERROR("bad EC or RRC\n");
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+ return GSS_S_COMPLETE;
+}
+
+static
+__u32 gss_get_mic_kerberos(struct gss_ctx *gctx,
+ int msgcnt,
+ rawobj_t *msgs,
+ int iovcnt,
+ lnet_kiov_t *iovs,
+ rawobj_t *token)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+ struct krb5_enctype *ke = &enctypes[kctx->kc_enctype];
+ struct krb5_header *khdr;
+ rawobj_t cksum = RAWOBJ_EMPTY;
+
+ /* fill krb5 header */
+ LASSERT(token->len >= sizeof(*khdr));
+ khdr = (struct krb5_header *) token->data;
+ fill_krb5_header(kctx, khdr, 0);
+
+ /* checksum */
+ if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyc,
+ khdr, msgcnt, msgs, iovcnt, iovs, &cksum))
+ return GSS_S_FAILURE;
+
+ LASSERT(cksum.len >= ke->ke_hash_size);
+ LASSERT(token->len >= sizeof(*khdr) + ke->ke_hash_size);
+ memcpy(khdr + 1, cksum.data + cksum.len - ke->ke_hash_size,
+ ke->ke_hash_size);
+
+ token->len = sizeof(*khdr) + ke->ke_hash_size;
+ rawobj_free(&cksum);
+ return GSS_S_COMPLETE;
+}
+
+static
+__u32 gss_verify_mic_kerberos(struct gss_ctx *gctx,
+ int msgcnt,
+ rawobj_t *msgs,
+ int iovcnt,
+ lnet_kiov_t *iovs,
+ rawobj_t *token)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+ struct krb5_enctype *ke = &enctypes[kctx->kc_enctype];
+ struct krb5_header *khdr;
+ rawobj_t cksum = RAWOBJ_EMPTY;
+ __u32 major;
+
+ if (token->len < sizeof(*khdr)) {
+ CERROR("short signature: %u\n", token->len);
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+
+ khdr = (struct krb5_header *) token->data;
+
+ major = verify_krb5_header(kctx, khdr, 0);
+ if (major != GSS_S_COMPLETE) {
+ CERROR("bad krb5 header\n");
+ return major;
+ }
+
+ if (token->len < sizeof(*khdr) + ke->ke_hash_size) {
+ CERROR("short signature: %u, require %d\n",
+ token->len, (int) sizeof(*khdr) + ke->ke_hash_size);
+ return GSS_S_FAILURE;
+ }
+
+ if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyc,
+ khdr, msgcnt, msgs, iovcnt, iovs, &cksum)) {
+ CERROR("failed to make checksum\n");
+ return GSS_S_FAILURE;
+ }
+
+ LASSERT(cksum.len >= ke->ke_hash_size);
+ if (memcmp(khdr + 1, cksum.data + cksum.len - ke->ke_hash_size,
+ ke->ke_hash_size)) {
+ CERROR("checksum mismatch\n");
+ rawobj_free(&cksum);
+ return GSS_S_BAD_SIG;
+ }
+
+ rawobj_free(&cksum);
+ return GSS_S_COMPLETE;
+}
+
+static
+int add_padding(rawobj_t *msg, int msg_buflen, int blocksize)
+{
+ int padding;
+
+ padding = (blocksize - (msg->len & (blocksize - 1))) &
+ (blocksize - 1);
+ if (!padding)
+ return 0;
+
+ if (msg->len + padding > msg_buflen) {
+ CERROR("bufsize %u too small: datalen %u, padding %u\n",
+ msg_buflen, msg->len, padding);
+ return -EINVAL;
+ }
+
+ memset(msg->data + msg->len, padding, padding);
+ msg->len += padding;
+ return 0;
+}
+
+static
+int krb5_encrypt_rawobjs(struct ll_crypto_cipher *tfm,
+ int mode_ecb,
+ int inobj_cnt,
+ rawobj_t *inobjs,
+ rawobj_t *outobj,
+ int enc)
+{
+ struct blkcipher_desc desc;
+ struct scatterlist src, dst;
+ __u8 local_iv[16] = {0}, *buf;
+ __u32 datalen = 0;
+ int i, rc;
+ ENTRY;
+
+ buf = outobj->data;
+ desc.tfm = tfm;
+ desc.info = local_iv;
+ desc.flags = 0;
+
+ for (i = 0; i < inobj_cnt; i++) {
+ LASSERT(buf + inobjs[i].len <= outobj->data + outobj->len);
+
+ buf_to_sg(&src, inobjs[i].data, inobjs[i].len);
+ buf_to_sg(&dst, buf, outobj->len - datalen);
+
+ if (mode_ecb) {
+ if (enc)
+ rc = ll_crypto_blkcipher_encrypt(
+ &desc, &dst, &src, src.length);
+ else
+ rc = ll_crypto_blkcipher_decrypt(
+ &desc, &dst, &src, src.length);
+ } else {
+ if (enc)
+ rc = ll_crypto_blkcipher_encrypt_iv(
+ &desc, &dst, &src, src.length);
+ else
+ rc = ll_crypto_blkcipher_decrypt_iv(
+ &desc, &dst, &src, src.length);
+ }
+
+ if (rc) {
+ CERROR("encrypt error %d\n", rc);
+ RETURN(rc);
+ }
+
+ datalen += inobjs[i].len;
+ buf += inobjs[i].len;
+ }
+
+ outobj->len = datalen;
+ RETURN(0);
+}
+
+/*
+ * if adj_nob != 0, we adjust desc->bd_nob to the actual cipher text size.
+ */
+static
+int krb5_encrypt_bulk(struct ll_crypto_cipher *tfm,
+ struct krb5_header *khdr,
+ char *confounder,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *cipher,
+ int adj_nob)
+{
+ struct blkcipher_desc ciph_desc;
+ __u8 local_iv[16] = {0};
+ struct scatterlist src, dst;
+ int blocksize, i, rc, nob = 0;
+
+ LASSERT(desc->bd_iov_count);
+ LASSERT(desc->bd_enc_iov);
+
+ blocksize = ll_crypto_blkcipher_blocksize(tfm);
+ LASSERT(blocksize > 1);
+ LASSERT(cipher->len == blocksize + sizeof(*khdr));
+
+ ciph_desc.tfm = tfm;
+ ciph_desc.info = local_iv;
+ ciph_desc.flags = 0;
+
+ /* encrypt confounder */
+ buf_to_sg(&src, confounder, blocksize);
+ buf_to_sg(&dst, cipher->data, blocksize);
+
+ rc = ll_crypto_blkcipher_encrypt_iv(&ciph_desc, &dst, &src, blocksize);
+ if (rc) {
+ CERROR("error to encrypt confounder: %d\n", rc);
+ return rc;
+ }
+
+ /* encrypt clear pages */
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ sg_set_page(&src, desc->bd_iov[i].kiov_page,
+ (desc->bd_iov[i].kiov_len + blocksize - 1) &
+ (~(blocksize - 1)),
+ desc->bd_iov[i].kiov_offset);
+ if (adj_nob)
+ nob += src.length;
+ sg_set_page(&dst, desc->bd_enc_iov[i].kiov_page, src.length,
+ src.offset);
+
+ desc->bd_enc_iov[i].kiov_offset = dst.offset;
+ desc->bd_enc_iov[i].kiov_len = dst.length;
+
+ rc = ll_crypto_blkcipher_encrypt_iv(&ciph_desc, &dst, &src,
+ src.length);
+ if (rc) {
+ CERROR("error to encrypt page: %d\n", rc);
+ return rc;
+ }
+ }
+
+ /* encrypt krb5 header */
+ buf_to_sg(&src, khdr, sizeof(*khdr));
+ buf_to_sg(&dst, cipher->data + blocksize, sizeof(*khdr));
+
+ rc = ll_crypto_blkcipher_encrypt_iv(&ciph_desc,
+ &dst, &src, sizeof(*khdr));
+ if (rc) {
+ CERROR("error to encrypt krb5 header: %d\n", rc);
+ return rc;
+ }
+
+ if (adj_nob)
+ desc->bd_nob = nob;
+
+ return 0;
+}
+
+/*
+ * desc->bd_nob_transferred is the size of cipher text received.
+ * desc->bd_nob is the target size of plain text supposed to be.
+ *
+ * if adj_nob != 0, we adjust each page's kiov_len to the actual
+ * plain text size.
+ * - for client read: we don't know data size for each page, so
+ * bd_iov[]->kiov_len is set to PAGE_SIZE, but actual data received might
+ * be smaller, so we need to adjust it according to bd_enc_iov[]->kiov_len.
+ * this means we DO NOT support the situation that server send an odd size
+ * data in a page which is not the last one.
+ * - for server write: we knows exactly data size for each page being expected,
+ * thus kiov_len is accurate already, so we should not adjust it at all.
+ * and bd_enc_iov[]->kiov_len should be round_up(bd_iov[]->kiov_len) which
+ * should have been done by prep_bulk().
+ */
+static
+int krb5_decrypt_bulk(struct ll_crypto_cipher *tfm,
+ struct krb5_header *khdr,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *cipher,
+ rawobj_t *plain,
+ int adj_nob)
+{
+ struct blkcipher_desc ciph_desc;
+ __u8 local_iv[16] = {0};
+ struct scatterlist src, dst;
+ int ct_nob = 0, pt_nob = 0;
+ int blocksize, i, rc;
+
+ LASSERT(desc->bd_iov_count);
+ LASSERT(desc->bd_enc_iov);
+ LASSERT(desc->bd_nob_transferred);
+
+ blocksize = ll_crypto_blkcipher_blocksize(tfm);
+ LASSERT(blocksize > 1);
+ LASSERT(cipher->len == blocksize + sizeof(*khdr));
+
+ ciph_desc.tfm = tfm;
+ ciph_desc.info = local_iv;
+ ciph_desc.flags = 0;
+
+ if (desc->bd_nob_transferred % blocksize) {
+ CERROR("odd transferred nob: %d\n", desc->bd_nob_transferred);
+ return -EPROTO;
+ }
+
+ /* decrypt head (confounder) */
+ buf_to_sg(&src, cipher->data, blocksize);
+ buf_to_sg(&dst, plain->data, blocksize);
+
+ rc = ll_crypto_blkcipher_decrypt_iv(&ciph_desc, &dst, &src, blocksize);
+ if (rc) {
+ CERROR("error to decrypt confounder: %d\n", rc);
+ return rc;
+ }
+
+ for (i = 0; i < desc->bd_iov_count && ct_nob < desc->bd_nob_transferred;
+ i++) {
+ if (desc->bd_enc_iov[i].kiov_offset % blocksize != 0 ||
+ desc->bd_enc_iov[i].kiov_len % blocksize != 0) {
+ CERROR("page %d: odd offset %u len %u, blocksize %d\n",
+ i, desc->bd_enc_iov[i].kiov_offset,
+ desc->bd_enc_iov[i].kiov_len, blocksize);
+ return -EFAULT;
+ }
+
+ if (adj_nob) {
+ if (ct_nob + desc->bd_enc_iov[i].kiov_len >
+ desc->bd_nob_transferred)
+ desc->bd_enc_iov[i].kiov_len =
+ desc->bd_nob_transferred - ct_nob;
+
+ desc->bd_iov[i].kiov_len = desc->bd_enc_iov[i].kiov_len;
+ if (pt_nob + desc->bd_enc_iov[i].kiov_len >desc->bd_nob)
+ desc->bd_iov[i].kiov_len = desc->bd_nob -pt_nob;
+ } else {
+ /* this should be guaranteed by LNET */
+ LASSERT(ct_nob + desc->bd_enc_iov[i].kiov_len <=
+ desc->bd_nob_transferred);
+ LASSERT(desc->bd_iov[i].kiov_len <=
+ desc->bd_enc_iov[i].kiov_len);
+ }
+
+ if (desc->bd_enc_iov[i].kiov_len == 0)
+ continue;
+
+ sg_set_page(&src, desc->bd_enc_iov[i].kiov_page,
+ desc->bd_enc_iov[i].kiov_len,
+ desc->bd_enc_iov[i].kiov_offset);
+ dst = src;
+ if (desc->bd_iov[i].kiov_len % blocksize == 0)
+ sg_assign_page(&dst, desc->bd_iov[i].kiov_page);
+
+ rc = ll_crypto_blkcipher_decrypt_iv(&ciph_desc, &dst, &src,
+ src.length);
+ if (rc) {
+ CERROR("error to decrypt page: %d\n", rc);
+ return rc;
+ }
+
+ if (desc->bd_iov[i].kiov_len % blocksize != 0) {
+ memcpy(page_address(desc->bd_iov[i].kiov_page) +
+ desc->bd_iov[i].kiov_offset,
+ page_address(desc->bd_enc_iov[i].kiov_page) +
+ desc->bd_iov[i].kiov_offset,
+ desc->bd_iov[i].kiov_len);
+ }
+
+ ct_nob += desc->bd_enc_iov[i].kiov_len;
+ pt_nob += desc->bd_iov[i].kiov_len;
+ }
+
+ if (unlikely(ct_nob != desc->bd_nob_transferred)) {
+ CERROR("%d cipher text transferred but only %d decrypted\n",
+ desc->bd_nob_transferred, ct_nob);
+ return -EFAULT;
+ }
+
+ if (unlikely(!adj_nob && pt_nob != desc->bd_nob)) {
+ CERROR("%d plain text expected but only %d received\n",
+ desc->bd_nob, pt_nob);
+ return -EFAULT;
+ }
+
+ /* if needed, clear up the rest unused iovs */
+ if (adj_nob)
+ while (i < desc->bd_iov_count)
+ desc->bd_iov[i++].kiov_len = 0;
+
+ /* decrypt tail (krb5 header) */
+ buf_to_sg(&src, cipher->data + blocksize, sizeof(*khdr));
+ buf_to_sg(&dst, cipher->data + blocksize, sizeof(*khdr));
+
+ rc = ll_crypto_blkcipher_decrypt_iv(&ciph_desc,
+ &dst, &src, sizeof(*khdr));
+ if (rc) {
+ CERROR("error to decrypt tail: %d\n", rc);
+ return rc;
+ }
+
+ if (memcmp(cipher->data + blocksize, khdr, sizeof(*khdr))) {
+ CERROR("krb5 header doesn't match\n");
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+static
+__u32 gss_wrap_kerberos(struct gss_ctx *gctx,
+ rawobj_t *gsshdr,
+ rawobj_t *msg,
+ int msg_buflen,
+ rawobj_t *token)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+ struct krb5_enctype *ke = &enctypes[kctx->kc_enctype];
+ struct krb5_header *khdr;
+ int blocksize;
+ rawobj_t cksum = RAWOBJ_EMPTY;
+ rawobj_t data_desc[3], cipher;
+ __u8 conf[GSS_MAX_CIPHER_BLOCK];
+ int rc = 0;
+
+ LASSERT(ke);
+ LASSERT(ke->ke_conf_size <= GSS_MAX_CIPHER_BLOCK);
+ LASSERT(kctx->kc_keye.kb_tfm == NULL ||
+ ke->ke_conf_size >=
+ ll_crypto_blkcipher_blocksize(kctx->kc_keye.kb_tfm));
+
+ /*
+ * final token format:
+ * ---------------------------------------------------
+ * | krb5 header | cipher text | checksum (16 bytes) |
+ * ---------------------------------------------------
+ */
+
+ /* fill krb5 header */
+ LASSERT(token->len >= sizeof(*khdr));
+ khdr = (struct krb5_header *) token->data;
+ fill_krb5_header(kctx, khdr, 1);
+
+ /* generate confounder */
+ cfs_get_random_bytes(conf, ke->ke_conf_size);
+
+ /* get encryption blocksize. note kc_keye might not associated with
+ * a tfm, currently only for arcfour-hmac */
+ if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) {
+ LASSERT(kctx->kc_keye.kb_tfm == NULL);
+ blocksize = 1;
+ } else {
+ LASSERT(kctx->kc_keye.kb_tfm);
+ blocksize = ll_crypto_blkcipher_blocksize(kctx->kc_keye.kb_tfm);
+ }
+ LASSERT(blocksize <= ke->ke_conf_size);
+
+ /* padding the message */
+ if (add_padding(msg, msg_buflen, blocksize))
+ return GSS_S_FAILURE;
+
+ /*
+ * clear text layout for checksum:
+ * ------------------------------------------------------
+ * | confounder | gss header | clear msgs | krb5 header |
+ * ------------------------------------------------------
+ */
+ data_desc[0].data = conf;
+ data_desc[0].len = ke->ke_conf_size;
+ data_desc[1].data = gsshdr->data;
+ data_desc[1].len = gsshdr->len;
+ data_desc[2].data = msg->data;
+ data_desc[2].len = msg->len;
+
+ /* compute checksum */
+ if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyi,
+ khdr, 3, data_desc, 0, NULL, &cksum))
+ return GSS_S_FAILURE;
+ LASSERT(cksum.len >= ke->ke_hash_size);
+
+ /*
+ * clear text layout for encryption:
+ * -----------------------------------------
+ * | confounder | clear msgs | krb5 header |
+ * -----------------------------------------
+ */
+ data_desc[0].data = conf;
+ data_desc[0].len = ke->ke_conf_size;
+ data_desc[1].data = msg->data;
+ data_desc[1].len = msg->len;
+ data_desc[2].data = (__u8 *) khdr;
+ data_desc[2].len = sizeof(*khdr);
+
+ /* cipher text will be directly inplace */
+ cipher.data = (__u8 *) (khdr + 1);
+ cipher.len = token->len - sizeof(*khdr);
+ LASSERT(cipher.len >= ke->ke_conf_size + msg->len + sizeof(*khdr));
+
+ if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) {
+ rawobj_t arc4_keye;
+ struct ll_crypto_cipher *arc4_tfm;
+
+ if (krb5_make_checksum(ENCTYPE_ARCFOUR_HMAC, &kctx->kc_keyi,
+ NULL, 1, &cksum, 0, NULL, &arc4_keye)) {
+ CERROR("failed to obtain arc4 enc key\n");
+ GOTO(arc4_out, rc = -EACCES);
+ }
+
+ arc4_tfm = ll_crypto_alloc_blkcipher("ecb(arc4)", 0, 0);
+ if (IS_ERR(arc4_tfm)) {
+ CERROR("failed to alloc tfm arc4 in ECB mode\n");
+ GOTO(arc4_out_key, rc = -EACCES);
+ }
+
+ if (ll_crypto_blkcipher_setkey(arc4_tfm, arc4_keye.data,
+ arc4_keye.len)) {
+ CERROR("failed to set arc4 key, len %d\n",
+ arc4_keye.len);
+ GOTO(arc4_out_tfm, rc = -EACCES);
+ }
+
+ rc = krb5_encrypt_rawobjs(arc4_tfm, 1,
+ 3, data_desc, &cipher, 1);
+arc4_out_tfm:
+ ll_crypto_free_blkcipher(arc4_tfm);
+arc4_out_key:
+ rawobj_free(&arc4_keye);
+arc4_out:
+ do {} while(0); /* just to avoid compile warning */
+ } else {
+ rc = krb5_encrypt_rawobjs(kctx->kc_keye.kb_tfm, 0,
+ 3, data_desc, &cipher, 1);
+ }
+
+ if (rc != 0) {
+ rawobj_free(&cksum);
+ return GSS_S_FAILURE;
+ }
+
+ /* fill in checksum */
+ LASSERT(token->len >= sizeof(*khdr) + cipher.len + ke->ke_hash_size);
+ memcpy((char *)(khdr + 1) + cipher.len,
+ cksum.data + cksum.len - ke->ke_hash_size,
+ ke->ke_hash_size);
+ rawobj_free(&cksum);
+
+ /* final token length */
+ token->len = sizeof(*khdr) + cipher.len + ke->ke_hash_size;
+ return GSS_S_COMPLETE;
+}
+
+static
+__u32 gss_prep_bulk_kerberos(struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+ int blocksize, i;
+
+ LASSERT(desc->bd_iov_count);
+ LASSERT(desc->bd_enc_iov);
+ LASSERT(kctx->kc_keye.kb_tfm);
+
+ blocksize = ll_crypto_blkcipher_blocksize(kctx->kc_keye.kb_tfm);
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ LASSERT(desc->bd_enc_iov[i].kiov_page);
+ /*
+ * offset should always start at page boundary of either
+ * client or server side.
+ */
+ if (desc->bd_iov[i].kiov_offset & blocksize) {
+ CERROR("odd offset %d in page %d\n",
+ desc->bd_iov[i].kiov_offset, i);
+ return GSS_S_FAILURE;
+ }
+
+ desc->bd_enc_iov[i].kiov_offset = desc->bd_iov[i].kiov_offset;
+ desc->bd_enc_iov[i].kiov_len = (desc->bd_iov[i].kiov_len +
+ blocksize - 1) & (~(blocksize - 1));
+ }
+
+ return GSS_S_COMPLETE;
+}
+
+static
+__u32 gss_wrap_bulk_kerberos(struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token, int adj_nob)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+ struct krb5_enctype *ke = &enctypes[kctx->kc_enctype];
+ struct krb5_header *khdr;
+ int blocksize;
+ rawobj_t cksum = RAWOBJ_EMPTY;
+ rawobj_t data_desc[1], cipher;
+ __u8 conf[GSS_MAX_CIPHER_BLOCK];
+ int rc = 0;
+
+ LASSERT(ke);
+ LASSERT(ke->ke_conf_size <= GSS_MAX_CIPHER_BLOCK);
+
+ /*
+ * final token format:
+ * --------------------------------------------------
+ * | krb5 header | head/tail cipher text | checksum |
+ * --------------------------------------------------
+ */
+
+ /* fill krb5 header */
+ LASSERT(token->len >= sizeof(*khdr));
+ khdr = (struct krb5_header *) token->data;
+ fill_krb5_header(kctx, khdr, 1);
+
+ /* generate confounder */
+ cfs_get_random_bytes(conf, ke->ke_conf_size);
+
+ /* get encryption blocksize. note kc_keye might not associated with
+ * a tfm, currently only for arcfour-hmac */
+ if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) {
+ LASSERT(kctx->kc_keye.kb_tfm == NULL);
+ blocksize = 1;
+ } else {
+ LASSERT(kctx->kc_keye.kb_tfm);
+ blocksize = ll_crypto_blkcipher_blocksize(kctx->kc_keye.kb_tfm);
+ }
+
+ /*
+ * we assume the size of krb5_header (16 bytes) must be n * blocksize.
+ * the bulk token size would be exactly (sizeof(krb5_header) +
+ * blocksize + sizeof(krb5_header) + hashsize)
+ */
+ LASSERT(blocksize <= ke->ke_conf_size);
+ LASSERT(sizeof(*khdr) >= blocksize && sizeof(*khdr) % blocksize == 0);
+ LASSERT(token->len >= sizeof(*khdr) + blocksize + sizeof(*khdr) + 16);
+
+ /*
+ * clear text layout for checksum:
+ * ------------------------------------------
+ * | confounder | clear pages | krb5 header |
+ * ------------------------------------------
+ */
+ data_desc[0].data = conf;
+ data_desc[0].len = ke->ke_conf_size;
+
+ /* compute checksum */
+ if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyi,
+ khdr, 1, data_desc,
+ desc->bd_iov_count, desc->bd_iov,
+ &cksum))
+ return GSS_S_FAILURE;
+ LASSERT(cksum.len >= ke->ke_hash_size);
+
+ /*
+ * clear text layout for encryption:
+ * ------------------------------------------
+ * | confounder | clear pages | krb5 header |
+ * ------------------------------------------
+ * | | |
+ * ---------- (cipher pages) |
+ * result token: | |
+ * -------------------------------------------
+ * | krb5 header | cipher text | cipher text |
+ * -------------------------------------------
+ */
+ data_desc[0].data = conf;
+ data_desc[0].len = ke->ke_conf_size;
+
+ cipher.data = (__u8 *) (khdr + 1);
+ cipher.len = blocksize + sizeof(*khdr);
+
+ if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) {
+ LBUG();
+ rc = 0;
+ } else {
+ rc = krb5_encrypt_bulk(kctx->kc_keye.kb_tfm, khdr,
+ conf, desc, &cipher, adj_nob);
+ }
+
+ if (rc != 0) {
+ rawobj_free(&cksum);
+ return GSS_S_FAILURE;
+ }
+
+ /* fill in checksum */
+ LASSERT(token->len >= sizeof(*khdr) + cipher.len + ke->ke_hash_size);
+ memcpy((char *)(khdr + 1) + cipher.len,
+ cksum.data + cksum.len - ke->ke_hash_size,
+ ke->ke_hash_size);
+ rawobj_free(&cksum);
+
+ /* final token length */
+ token->len = sizeof(*khdr) + cipher.len + ke->ke_hash_size;
+ return GSS_S_COMPLETE;
+}
+
+static
+__u32 gss_unwrap_kerberos(struct gss_ctx *gctx,
+ rawobj_t *gsshdr,
+ rawobj_t *token,
+ rawobj_t *msg)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+ struct krb5_enctype *ke = &enctypes[kctx->kc_enctype];
+ struct krb5_header *khdr;
+ unsigned char *tmpbuf;
+ int blocksize, bodysize;
+ rawobj_t cksum = RAWOBJ_EMPTY;
+ rawobj_t cipher_in, plain_out;
+ rawobj_t hash_objs[3];
+ int rc = 0;
+ __u32 major;
+
+ LASSERT(ke);
+
+ if (token->len < sizeof(*khdr)) {
+ CERROR("short signature: %u\n", token->len);
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+
+ khdr = (struct krb5_header *) token->data;
+
+ major = verify_krb5_header(kctx, khdr, 1);
+ if (major != GSS_S_COMPLETE) {
+ CERROR("bad krb5 header\n");
+ return major;
+ }
+
+ /* block size */
+ if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) {
+ LASSERT(kctx->kc_keye.kb_tfm == NULL);
+ blocksize = 1;
+ } else {
+ LASSERT(kctx->kc_keye.kb_tfm);
+ blocksize = ll_crypto_blkcipher_blocksize(kctx->kc_keye.kb_tfm);
+ }
+
+ /* expected token layout:
+ * ----------------------------------------
+ * | krb5 header | cipher text | checksum |
+ * ----------------------------------------
+ */
+ bodysize = token->len - sizeof(*khdr) - ke->ke_hash_size;
+
+ if (bodysize % blocksize) {
+ CERROR("odd bodysize %d\n", bodysize);
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+
+ if (bodysize <= ke->ke_conf_size + sizeof(*khdr)) {
+ CERROR("incomplete token: bodysize %d\n", bodysize);
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+
+ if (msg->len < bodysize - ke->ke_conf_size - sizeof(*khdr)) {
+ CERROR("buffer too small: %u, require %d\n",
+ msg->len, bodysize - ke->ke_conf_size);
+ return GSS_S_FAILURE;
+ }
+
+ /* decrypting */
+ OBD_ALLOC_LARGE(tmpbuf, bodysize);
+ if (!tmpbuf)
+ return GSS_S_FAILURE;
+
+ major = GSS_S_FAILURE;
+
+ cipher_in.data = (__u8 *) (khdr + 1);
+ cipher_in.len = bodysize;
+ plain_out.data = tmpbuf;
+ plain_out.len = bodysize;
+
+ if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) {
+ rawobj_t arc4_keye;
+ struct ll_crypto_cipher *arc4_tfm;
+
+ cksum.data = token->data + token->len - ke->ke_hash_size;
+ cksum.len = ke->ke_hash_size;
+
+ if (krb5_make_checksum(ENCTYPE_ARCFOUR_HMAC, &kctx->kc_keyi,
+ NULL, 1, &cksum, 0, NULL, &arc4_keye)) {
+ CERROR("failed to obtain arc4 enc key\n");
+ GOTO(arc4_out, rc = -EACCES);
+ }
+
+ arc4_tfm = ll_crypto_alloc_blkcipher("ecb(arc4)", 0, 0);
+ if (IS_ERR(arc4_tfm)) {
+ CERROR("failed to alloc tfm arc4 in ECB mode\n");
+ GOTO(arc4_out_key, rc = -EACCES);
+ }
+
+ if (ll_crypto_blkcipher_setkey(arc4_tfm,
+ arc4_keye.data, arc4_keye.len)) {
+ CERROR("failed to set arc4 key, len %d\n",
+ arc4_keye.len);
+ GOTO(arc4_out_tfm, rc = -EACCES);
+ }
+
+ rc = krb5_encrypt_rawobjs(arc4_tfm, 1,
+ 1, &cipher_in, &plain_out, 0);
+arc4_out_tfm:
+ ll_crypto_free_blkcipher(arc4_tfm);
+arc4_out_key:
+ rawobj_free(&arc4_keye);
+arc4_out:
+ cksum = RAWOBJ_EMPTY;
+ } else {
+ rc = krb5_encrypt_rawobjs(kctx->kc_keye.kb_tfm, 0,
+ 1, &cipher_in, &plain_out, 0);
+ }
+
+ if (rc != 0) {
+ CERROR("error decrypt\n");
+ goto out_free;
+ }
+ LASSERT(plain_out.len == bodysize);
+
+ /* expected clear text layout:
+ * -----------------------------------------
+ * | confounder | clear msgs | krb5 header |
+ * -----------------------------------------
+ */
+
+ /* verify krb5 header in token is not modified */
+ if (memcmp(khdr, plain_out.data + plain_out.len - sizeof(*khdr),
+ sizeof(*khdr))) {
+ CERROR("decrypted krb5 header mismatch\n");
+ goto out_free;
+ }
+
+ /* verify checksum, compose clear text as layout:
+ * ------------------------------------------------------
+ * | confounder | gss header | clear msgs | krb5 header |
+ * ------------------------------------------------------
+ */
+ hash_objs[0].len = ke->ke_conf_size;
+ hash_objs[0].data = plain_out.data;
+ hash_objs[1].len = gsshdr->len;
+ hash_objs[1].data = gsshdr->data;
+ hash_objs[2].len = plain_out.len - ke->ke_conf_size - sizeof(*khdr);
+ hash_objs[2].data = plain_out.data + ke->ke_conf_size;
+ if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyi,
+ khdr, 3, hash_objs, 0, NULL, &cksum))
+ goto out_free;
+
+ LASSERT(cksum.len >= ke->ke_hash_size);
+ if (memcmp((char *)(khdr + 1) + bodysize,
+ cksum.data + cksum.len - ke->ke_hash_size,
+ ke->ke_hash_size)) {
+ CERROR("checksum mismatch\n");
+ goto out_free;
+ }
+
+ msg->len = bodysize - ke->ke_conf_size - sizeof(*khdr);
+ memcpy(msg->data, tmpbuf + ke->ke_conf_size, msg->len);
+
+ major = GSS_S_COMPLETE;
+out_free:
+ OBD_FREE_LARGE(tmpbuf, bodysize);
+ rawobj_free(&cksum);
+ return major;
+}
+
+static
+__u32 gss_unwrap_bulk_kerberos(struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token, int adj_nob)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+ struct krb5_enctype *ke = &enctypes[kctx->kc_enctype];
+ struct krb5_header *khdr;
+ int blocksize;
+ rawobj_t cksum = RAWOBJ_EMPTY;
+ rawobj_t cipher, plain;
+ rawobj_t data_desc[1];
+ int rc;
+ __u32 major;
+
+ LASSERT(ke);
+
+ if (token->len < sizeof(*khdr)) {
+ CERROR("short signature: %u\n", token->len);
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+
+ khdr = (struct krb5_header *) token->data;
+
+ major = verify_krb5_header(kctx, khdr, 1);
+ if (major != GSS_S_COMPLETE) {
+ CERROR("bad krb5 header\n");
+ return major;
+ }
+
+ /* block size */
+ if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) {
+ LASSERT(kctx->kc_keye.kb_tfm == NULL);
+ blocksize = 1;
+ LBUG();
+ } else {
+ LASSERT(kctx->kc_keye.kb_tfm);
+ blocksize = ll_crypto_blkcipher_blocksize(kctx->kc_keye.kb_tfm);
+ }
+ LASSERT(sizeof(*khdr) >= blocksize && sizeof(*khdr) % blocksize == 0);
+
+ /*
+ * token format is expected as:
+ * -----------------------------------------------
+ * | krb5 header | head/tail cipher text | cksum |
+ * -----------------------------------------------
+ */
+ if (token->len < sizeof(*khdr) + blocksize + sizeof(*khdr) +
+ ke->ke_hash_size) {
+ CERROR("short token size: %u\n", token->len);
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+
+ cipher.data = (__u8 *) (khdr + 1);
+ cipher.len = blocksize + sizeof(*khdr);
+ plain.data = cipher.data;
+ plain.len = cipher.len;
+
+ rc = krb5_decrypt_bulk(kctx->kc_keye.kb_tfm, khdr,
+ desc, &cipher, &plain, adj_nob);
+ if (rc)
+ return GSS_S_DEFECTIVE_TOKEN;
+
+ /*
+ * verify checksum, compose clear text as layout:
+ * ------------------------------------------
+ * | confounder | clear pages | krb5 header |
+ * ------------------------------------------
+ */
+ data_desc[0].data = plain.data;
+ data_desc[0].len = blocksize;
+
+ if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyi,
+ khdr, 1, data_desc,
+ desc->bd_iov_count, desc->bd_iov,
+ &cksum))
+ return GSS_S_FAILURE;
+ LASSERT(cksum.len >= ke->ke_hash_size);
+
+ if (memcmp(plain.data + blocksize + sizeof(*khdr),
+ cksum.data + cksum.len - ke->ke_hash_size,
+ ke->ke_hash_size)) {
+ CERROR("checksum mismatch\n");
+ rawobj_free(&cksum);
+ return GSS_S_BAD_SIG;
+ }
+
+ rawobj_free(&cksum);
+ return GSS_S_COMPLETE;
+}
+
+int gss_display_kerberos(struct gss_ctx *ctx,
+ char *buf,
+ int bufsize)
+{
+ struct krb5_ctx *kctx = ctx->internal_ctx_id;
+ int written;
+
+ written = snprintf(buf, bufsize, "krb5 (%s)",
+ enctype2str(kctx->kc_enctype));
+ return written;
+}
+
+static struct gss_api_ops gss_kerberos_ops = {
+ .gss_import_sec_context = gss_import_sec_context_kerberos,
+ .gss_copy_reverse_context = gss_copy_reverse_context_kerberos,
+ .gss_inquire_context = gss_inquire_context_kerberos,
+ .gss_get_mic = gss_get_mic_kerberos,
+ .gss_verify_mic = gss_verify_mic_kerberos,
+ .gss_wrap = gss_wrap_kerberos,
+ .gss_unwrap = gss_unwrap_kerberos,
+ .gss_prep_bulk = gss_prep_bulk_kerberos,
+ .gss_wrap_bulk = gss_wrap_bulk_kerberos,
+ .gss_unwrap_bulk = gss_unwrap_bulk_kerberos,
+ .gss_delete_sec_context = gss_delete_sec_context_kerberos,
+ .gss_display = gss_display_kerberos,
+};
+
+static struct subflavor_desc gss_kerberos_sfs[] = {
+ {
+ .sf_subflavor = SPTLRPC_SUBFLVR_KRB5N,
+ .sf_qop = 0,
+ .sf_service = SPTLRPC_SVC_NULL,
+ .sf_name = "krb5n"
+ },
+ {
+ .sf_subflavor = SPTLRPC_SUBFLVR_KRB5A,
+ .sf_qop = 0,
+ .sf_service = SPTLRPC_SVC_AUTH,
+ .sf_name = "krb5a"
+ },
+ {
+ .sf_subflavor = SPTLRPC_SUBFLVR_KRB5I,
+ .sf_qop = 0,
+ .sf_service = SPTLRPC_SVC_INTG,
+ .sf_name = "krb5i"
+ },
+ {
+ .sf_subflavor = SPTLRPC_SUBFLVR_KRB5P,
+ .sf_qop = 0,
+ .sf_service = SPTLRPC_SVC_PRIV,
+ .sf_name = "krb5p"
+ },
+};
+
+/*
+ * currently we leave module owner NULL
+ */
+static struct gss_api_mech gss_kerberos_mech = {
+ .gm_owner = NULL, /*THIS_MODULE, */
+ .gm_name = "krb5",
+ .gm_oid = (rawobj_t)
+ {9, "\052\206\110\206\367\022\001\002\002"},
+ .gm_ops = &gss_kerberos_ops,
+ .gm_sf_num = 4,
+ .gm_sfs = gss_kerberos_sfs,
+};
+
+int __init init_kerberos_module(void)
+{
+ int status;
+
+ spin_lock_init(&krb5_seq_lock);
+
+ status = lgss_mech_register(&gss_kerberos_mech);
+ if (status)
+ CERROR("Failed to register kerberos gss mechanism!\n");
+ return status;
+}
+
+void __exit cleanup_kerberos_module(void)
+{
+ lgss_mech_unregister(&gss_kerberos_mech);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_mech_switch.c b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_mech_switch.c
new file mode 100644
index 000000000000..8cdad800382d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_mech_switch.c
@@ -0,0 +1,359 @@
+/*
+ * Modifications for Lustre
+ *
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+/*
+ * linux/net/sunrpc/gss_mech_switch.c
+ *
+ * Copyright (c) 2001 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * J. Bruce Fields <bfields@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_sec.h>
+
+#include "gss_err.h"
+#include "gss_internal.h"
+#include "gss_api.h"
+
+static LIST_HEAD(registered_mechs);
+static DEFINE_SPINLOCK(registered_mechs_lock);
+
+int lgss_mech_register(struct gss_api_mech *gm)
+{
+ spin_lock(&registered_mechs_lock);
+ list_add(&gm->gm_list, &registered_mechs);
+ spin_unlock(&registered_mechs_lock);
+ CWARN("Register %s mechanism\n", gm->gm_name);
+ return 0;
+}
+
+void lgss_mech_unregister(struct gss_api_mech *gm)
+{
+ spin_lock(&registered_mechs_lock);
+ list_del(&gm->gm_list);
+ spin_unlock(&registered_mechs_lock);
+ CWARN("Unregister %s mechanism\n", gm->gm_name);
+}
+
+
+struct gss_api_mech *lgss_mech_get(struct gss_api_mech *gm)
+{
+ __module_get(gm->gm_owner);
+ return gm;
+}
+
+struct gss_api_mech *lgss_name_to_mech(char *name)
+{
+ struct gss_api_mech *pos, *gm = NULL;
+
+ spin_lock(&registered_mechs_lock);
+ list_for_each_entry(pos, &registered_mechs, gm_list) {
+ if (0 == strcmp(name, pos->gm_name)) {
+ if (!try_module_get(pos->gm_owner))
+ continue;
+ gm = pos;
+ break;
+ }
+ }
+ spin_unlock(&registered_mechs_lock);
+ return gm;
+
+}
+
+static inline
+int mech_supports_subflavor(struct gss_api_mech *gm, __u32 subflavor)
+{
+ int i;
+
+ for (i = 0; i < gm->gm_sf_num; i++) {
+ if (gm->gm_sfs[i].sf_subflavor == subflavor)
+ return 1;
+ }
+ return 0;
+}
+
+struct gss_api_mech *lgss_subflavor_to_mech(__u32 subflavor)
+{
+ struct gss_api_mech *pos, *gm = NULL;
+
+ spin_lock(&registered_mechs_lock);
+ list_for_each_entry(pos, &registered_mechs, gm_list) {
+ if (!try_module_get(pos->gm_owner))
+ continue;
+ if (!mech_supports_subflavor(pos, subflavor)) {
+ module_put(pos->gm_owner);
+ continue;
+ }
+ gm = pos;
+ break;
+ }
+ spin_unlock(&registered_mechs_lock);
+ return gm;
+}
+
+void lgss_mech_put(struct gss_api_mech *gm)
+{
+ module_put(gm->gm_owner);
+}
+
+/* The mech could probably be determined from the token instead, but it's just
+ * as easy for now to pass it in. */
+__u32 lgss_import_sec_context(rawobj_t *input_token,
+ struct gss_api_mech *mech,
+ struct gss_ctx **ctx_id)
+{
+ OBD_ALLOC_PTR(*ctx_id);
+ if (*ctx_id == NULL)
+ return GSS_S_FAILURE;
+
+ (*ctx_id)->mech_type = lgss_mech_get(mech);
+
+ LASSERT(mech);
+ LASSERT(mech->gm_ops);
+ LASSERT(mech->gm_ops->gss_import_sec_context);
+ return mech->gm_ops->gss_import_sec_context(input_token, *ctx_id);
+}
+
+__u32 lgss_copy_reverse_context(struct gss_ctx *ctx_id,
+ struct gss_ctx **ctx_id_new)
+{
+ struct gss_api_mech *mech = ctx_id->mech_type;
+ __u32 major;
+
+ LASSERT(mech);
+
+ OBD_ALLOC_PTR(*ctx_id_new);
+ if (*ctx_id_new == NULL)
+ return GSS_S_FAILURE;
+
+ (*ctx_id_new)->mech_type = lgss_mech_get(mech);
+
+ LASSERT(mech);
+ LASSERT(mech->gm_ops);
+ LASSERT(mech->gm_ops->gss_copy_reverse_context);
+
+ major = mech->gm_ops->gss_copy_reverse_context(ctx_id, *ctx_id_new);
+ if (major != GSS_S_COMPLETE) {
+ lgss_mech_put(mech);
+ OBD_FREE_PTR(*ctx_id_new);
+ *ctx_id_new = NULL;
+ }
+ return major;
+}
+
+/*
+ * this interface is much simplified, currently we only need endtime.
+ */
+__u32 lgss_inquire_context(struct gss_ctx *context_handle,
+ unsigned long *endtime)
+{
+ LASSERT(context_handle);
+ LASSERT(context_handle->mech_type);
+ LASSERT(context_handle->mech_type->gm_ops);
+ LASSERT(context_handle->mech_type->gm_ops->gss_inquire_context);
+
+ return context_handle->mech_type->gm_ops
+ ->gss_inquire_context(context_handle,
+ endtime);
+}
+
+/* gss_get_mic: compute a mic over message and return mic_token. */
+__u32 lgss_get_mic(struct gss_ctx *context_handle,
+ int msgcnt,
+ rawobj_t *msg,
+ int iovcnt,
+ lnet_kiov_t *iovs,
+ rawobj_t *mic_token)
+{
+ LASSERT(context_handle);
+ LASSERT(context_handle->mech_type);
+ LASSERT(context_handle->mech_type->gm_ops);
+ LASSERT(context_handle->mech_type->gm_ops->gss_get_mic);
+
+ return context_handle->mech_type->gm_ops
+ ->gss_get_mic(context_handle,
+ msgcnt,
+ msg,
+ iovcnt,
+ iovs,
+ mic_token);
+}
+
+/* gss_verify_mic: check whether the provided mic_token verifies message. */
+__u32 lgss_verify_mic(struct gss_ctx *context_handle,
+ int msgcnt,
+ rawobj_t *msg,
+ int iovcnt,
+ lnet_kiov_t *iovs,
+ rawobj_t *mic_token)
+{
+ LASSERT(context_handle);
+ LASSERT(context_handle->mech_type);
+ LASSERT(context_handle->mech_type->gm_ops);
+ LASSERT(context_handle->mech_type->gm_ops->gss_verify_mic);
+
+ return context_handle->mech_type->gm_ops
+ ->gss_verify_mic(context_handle,
+ msgcnt,
+ msg,
+ iovcnt,
+ iovs,
+ mic_token);
+}
+
+__u32 lgss_wrap(struct gss_ctx *context_handle,
+ rawobj_t *gsshdr,
+ rawobj_t *msg,
+ int msg_buflen,
+ rawobj_t *out_token)
+{
+ LASSERT(context_handle);
+ LASSERT(context_handle->mech_type);
+ LASSERT(context_handle->mech_type->gm_ops);
+ LASSERT(context_handle->mech_type->gm_ops->gss_wrap);
+
+ return context_handle->mech_type->gm_ops
+ ->gss_wrap(context_handle, gsshdr, msg, msg_buflen, out_token);
+}
+
+__u32 lgss_unwrap(struct gss_ctx *context_handle,
+ rawobj_t *gsshdr,
+ rawobj_t *token,
+ rawobj_t *out_msg)
+{
+ LASSERT(context_handle);
+ LASSERT(context_handle->mech_type);
+ LASSERT(context_handle->mech_type->gm_ops);
+ LASSERT(context_handle->mech_type->gm_ops->gss_unwrap);
+
+ return context_handle->mech_type->gm_ops
+ ->gss_unwrap(context_handle, gsshdr, token, out_msg);
+}
+
+
+__u32 lgss_prep_bulk(struct gss_ctx *context_handle,
+ struct ptlrpc_bulk_desc *desc)
+{
+ LASSERT(context_handle);
+ LASSERT(context_handle->mech_type);
+ LASSERT(context_handle->mech_type->gm_ops);
+ LASSERT(context_handle->mech_type->gm_ops->gss_prep_bulk);
+
+ return context_handle->mech_type->gm_ops
+ ->gss_prep_bulk(context_handle, desc);
+}
+
+__u32 lgss_wrap_bulk(struct gss_ctx *context_handle,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token,
+ int adj_nob)
+{
+ LASSERT(context_handle);
+ LASSERT(context_handle->mech_type);
+ LASSERT(context_handle->mech_type->gm_ops);
+ LASSERT(context_handle->mech_type->gm_ops->gss_wrap_bulk);
+
+ return context_handle->mech_type->gm_ops
+ ->gss_wrap_bulk(context_handle, desc, token, adj_nob);
+}
+
+__u32 lgss_unwrap_bulk(struct gss_ctx *context_handle,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token,
+ int adj_nob)
+{
+ LASSERT(context_handle);
+ LASSERT(context_handle->mech_type);
+ LASSERT(context_handle->mech_type->gm_ops);
+ LASSERT(context_handle->mech_type->gm_ops->gss_unwrap_bulk);
+
+ return context_handle->mech_type->gm_ops
+ ->gss_unwrap_bulk(context_handle, desc, token, adj_nob);
+}
+
+/* gss_delete_sec_context: free all resources associated with context_handle.
+ * Note this differs from the RFC 2744-specified prototype in that we don't
+ * bother returning an output token, since it would never be used anyway. */
+
+__u32 lgss_delete_sec_context(struct gss_ctx **context_handle)
+{
+ struct gss_api_mech *mech;
+
+ CDEBUG(D_SEC, "deleting %p\n", *context_handle);
+
+ if (!*context_handle)
+ return(GSS_S_NO_CONTEXT);
+
+ mech = (*context_handle)->mech_type;
+ if ((*context_handle)->internal_ctx_id != 0) {
+ LASSERT(mech);
+ LASSERT(mech->gm_ops);
+ LASSERT(mech->gm_ops->gss_delete_sec_context);
+ mech->gm_ops->gss_delete_sec_context(
+ (*context_handle)->internal_ctx_id);
+ }
+ if (mech)
+ lgss_mech_put(mech);
+
+ OBD_FREE_PTR(*context_handle);
+ *context_handle=NULL;
+ return GSS_S_COMPLETE;
+}
+
+int lgss_display(struct gss_ctx *ctx,
+ char *buf,
+ int bufsize)
+{
+ LASSERT(ctx);
+ LASSERT(ctx->mech_type);
+ LASSERT(ctx->mech_type->gm_ops);
+ LASSERT(ctx->mech_type->gm_ops->gss_display);
+
+ return ctx->mech_type->gm_ops->gss_display(ctx, buf, bufsize);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_pipefs.c b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_pipefs.c
new file mode 100644
index 000000000000..3df7257b7fa0
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_pipefs.c
@@ -0,0 +1,1252 @@
+/*
+ * Modifications for Lustre
+ *
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+/*
+ * linux/net/sunrpc/auth_gss.c
+ *
+ * RPCSEC_GSS client authentication.
+ *
+ * Copyright (c) 2000 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Dug Song <dugsong@monkey.org>
+ * Andy Adamson <andros@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+#include <linux/crypto.h>
+#include <asm/atomic.h>
+struct rpc_clnt; /* for rpc_pipefs */
+#include <linux/sunrpc/rpc_pipe_fs.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_sec.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+
+#include "gss_err.h"
+#include "gss_internal.h"
+#include "gss_api.h"
+
+static struct ptlrpc_sec_policy gss_policy_pipefs;
+static struct ptlrpc_ctx_ops gss_pipefs_ctxops;
+
+static int gss_cli_ctx_refresh_pf(struct ptlrpc_cli_ctx *ctx);
+
+static int gss_sec_pipe_upcall_init(struct gss_sec *gsec)
+{
+ return 0;
+}
+
+static void gss_sec_pipe_upcall_fini(struct gss_sec *gsec)
+{
+}
+
+/****************************************
+ * internel context helpers *
+ ****************************************/
+
+static
+struct ptlrpc_cli_ctx *ctx_create_pf(struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred)
+{
+ struct gss_cli_ctx *gctx;
+ int rc;
+
+ OBD_ALLOC_PTR(gctx);
+ if (gctx == NULL)
+ return NULL;
+
+ rc = gss_cli_ctx_init_common(sec, &gctx->gc_base,
+ &gss_pipefs_ctxops, vcred);
+ if (rc) {
+ OBD_FREE_PTR(gctx);
+ return NULL;
+ }
+
+ return &gctx->gc_base;
+}
+
+static
+void ctx_destroy_pf(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *ctx)
+{
+ struct gss_cli_ctx *gctx = ctx2gctx(ctx);
+
+ if (gss_cli_ctx_fini_common(sec, ctx))
+ return;
+
+ OBD_FREE_PTR(gctx);
+
+ atomic_dec(&sec->ps_nctx);
+ sptlrpc_sec_put(sec);
+}
+
+static
+void ctx_enhash_pf(struct ptlrpc_cli_ctx *ctx, struct hlist_head *hash)
+{
+ set_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags);
+ atomic_inc(&ctx->cc_refcount);
+ hlist_add_head(&ctx->cc_cache, hash);
+}
+
+/*
+ * caller must hold spinlock
+ */
+static
+void ctx_unhash_pf(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist)
+{
+ LASSERT(spin_is_locked(&ctx->cc_sec->ps_lock));
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+ LASSERT(test_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags));
+ LASSERT(!hlist_unhashed(&ctx->cc_cache));
+
+ clear_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags);
+
+ if (atomic_dec_and_test(&ctx->cc_refcount)) {
+ __hlist_del(&ctx->cc_cache);
+ hlist_add_head(&ctx->cc_cache, freelist);
+ } else {
+ hlist_del_init(&ctx->cc_cache);
+ }
+}
+
+/*
+ * return 1 if the context is dead.
+ */
+static
+int ctx_check_death_pf(struct ptlrpc_cli_ctx *ctx,
+ struct hlist_head *freelist)
+{
+ if (cli_ctx_check_death(ctx)) {
+ if (freelist)
+ ctx_unhash_pf(ctx, freelist);
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline
+int ctx_check_death_locked_pf(struct ptlrpc_cli_ctx *ctx,
+ struct hlist_head *freelist)
+{
+ LASSERT(ctx->cc_sec);
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+ LASSERT(test_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags));
+
+ return ctx_check_death_pf(ctx, freelist);
+}
+
+static inline
+int ctx_match_pf(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred)
+{
+ /* a little bit optimization for null policy */
+ if (!ctx->cc_ops->match)
+ return 1;
+
+ return ctx->cc_ops->match(ctx, vcred);
+}
+
+static
+void ctx_list_destroy_pf(struct hlist_head *head)
+{
+ struct ptlrpc_cli_ctx *ctx;
+
+ while (!hlist_empty(head)) {
+ ctx = hlist_entry(head->first, struct ptlrpc_cli_ctx,
+ cc_cache);
+
+ LASSERT(atomic_read(&ctx->cc_refcount) == 0);
+ LASSERT(test_bit(PTLRPC_CTX_CACHED_BIT,
+ &ctx->cc_flags) == 0);
+
+ hlist_del_init(&ctx->cc_cache);
+ ctx_destroy_pf(ctx->cc_sec, ctx);
+ }
+}
+
+/****************************************
+ * context apis *
+ ****************************************/
+
+static
+int gss_cli_ctx_validate_pf(struct ptlrpc_cli_ctx *ctx)
+{
+ if (ctx_check_death_pf(ctx, NULL))
+ return 1;
+ if (cli_ctx_is_ready(ctx))
+ return 0;
+ return 1;
+}
+
+static
+void gss_cli_ctx_die_pf(struct ptlrpc_cli_ctx *ctx, int grace)
+{
+ LASSERT(ctx->cc_sec);
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+ cli_ctx_expire(ctx);
+
+ spin_lock(&ctx->cc_sec->ps_lock);
+
+ if (test_and_clear_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags)) {
+ LASSERT(!hlist_unhashed(&ctx->cc_cache));
+ LASSERT(atomic_read(&ctx->cc_refcount) > 1);
+
+ hlist_del_init(&ctx->cc_cache);
+ if (atomic_dec_and_test(&ctx->cc_refcount))
+ LBUG();
+ }
+
+ spin_unlock(&ctx->cc_sec->ps_lock);
+}
+
+/****************************************
+ * reverse context installation *
+ ****************************************/
+
+static inline
+unsigned int ctx_hash_index(int hashsize, __u64 key)
+{
+ return (unsigned int) (key & ((__u64) hashsize - 1));
+}
+
+static
+void gss_sec_ctx_replace_pf(struct gss_sec *gsec,
+ struct ptlrpc_cli_ctx *new)
+{
+ struct gss_sec_pipefs *gsec_pf;
+ struct ptlrpc_cli_ctx *ctx;
+ struct hlist_node *next;
+ HLIST_HEAD(freelist);
+ unsigned int hash;
+ ENTRY;
+
+ gsec_pf = container_of(gsec, struct gss_sec_pipefs, gsp_base);
+
+ hash = ctx_hash_index(gsec_pf->gsp_chash_size,
+ (__u64) new->cc_vcred.vc_uid);
+ LASSERT(hash < gsec_pf->gsp_chash_size);
+
+ spin_lock(&gsec->gs_base.ps_lock);
+
+ hlist_for_each_entry_safe(ctx, next,
+ &gsec_pf->gsp_chash[hash], cc_cache) {
+ if (!ctx_match_pf(ctx, &new->cc_vcred))
+ continue;
+
+ cli_ctx_expire(ctx);
+ ctx_unhash_pf(ctx, &freelist);
+ break;
+ }
+
+ ctx_enhash_pf(new, &gsec_pf->gsp_chash[hash]);
+
+ spin_unlock(&gsec->gs_base.ps_lock);
+
+ ctx_list_destroy_pf(&freelist);
+ EXIT;
+}
+
+static
+int gss_install_rvs_cli_ctx_pf(struct gss_sec *gsec,
+ struct ptlrpc_svc_ctx *svc_ctx)
+{
+ struct vfs_cred vcred;
+ struct ptlrpc_cli_ctx *cli_ctx;
+ int rc;
+ ENTRY;
+
+ vcred.vc_uid = 0;
+ vcred.vc_gid = 0;
+
+ cli_ctx = ctx_create_pf(&gsec->gs_base, &vcred);
+ if (!cli_ctx)
+ RETURN(-ENOMEM);
+
+ rc = gss_copy_rvc_cli_ctx(cli_ctx, svc_ctx);
+ if (rc) {
+ ctx_destroy_pf(cli_ctx->cc_sec, cli_ctx);
+ RETURN(rc);
+ }
+
+ gss_sec_ctx_replace_pf(gsec, cli_ctx);
+ RETURN(0);
+}
+
+static
+void gss_ctx_cache_gc_pf(struct gss_sec_pipefs *gsec_pf,
+ struct hlist_head *freelist)
+{
+ struct ptlrpc_sec *sec;
+ struct ptlrpc_cli_ctx *ctx;
+ struct hlist_node *next;
+ int i;
+ ENTRY;
+
+ sec = &gsec_pf->gsp_base.gs_base;
+
+ CDEBUG(D_SEC, "do gc on sec %s@%p\n", sec->ps_policy->sp_name, sec);
+
+ for (i = 0; i < gsec_pf->gsp_chash_size; i++) {
+ hlist_for_each_entry_safe(ctx, next,
+ &gsec_pf->gsp_chash[i], cc_cache)
+ ctx_check_death_locked_pf(ctx, freelist);
+ }
+
+ sec->ps_gc_next = cfs_time_current_sec() + sec->ps_gc_interval;
+ EXIT;
+}
+
+static
+struct ptlrpc_sec* gss_sec_create_pf(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx,
+ struct sptlrpc_flavor *sf)
+{
+ struct gss_sec_pipefs *gsec_pf;
+ int alloc_size, hash_size, i;
+ ENTRY;
+
+#define GSS_SEC_PIPEFS_CTX_HASH_SIZE (32)
+
+ if (ctx ||
+ sf->sf_flags & (PTLRPC_SEC_FL_ROOTONLY | PTLRPC_SEC_FL_REVERSE))
+ hash_size = 1;
+ else
+ hash_size = GSS_SEC_PIPEFS_CTX_HASH_SIZE;
+
+ alloc_size = sizeof(*gsec_pf) +
+ sizeof(struct hlist_head) * hash_size;
+
+ OBD_ALLOC(gsec_pf, alloc_size);
+ if (!gsec_pf)
+ RETURN(NULL);
+
+ gsec_pf->gsp_chash_size = hash_size;
+ for (i = 0; i < hash_size; i++)
+ INIT_HLIST_HEAD(&gsec_pf->gsp_chash[i]);
+
+ if (gss_sec_create_common(&gsec_pf->gsp_base, &gss_policy_pipefs,
+ imp, ctx, sf))
+ goto err_free;
+
+ if (ctx == NULL) {
+ if (gss_sec_pipe_upcall_init(&gsec_pf->gsp_base))
+ goto err_destroy;
+ } else {
+ if (gss_install_rvs_cli_ctx_pf(&gsec_pf->gsp_base, ctx))
+ goto err_destroy;
+ }
+
+ RETURN(&gsec_pf->gsp_base.gs_base);
+
+err_destroy:
+ gss_sec_destroy_common(&gsec_pf->gsp_base);
+err_free:
+ OBD_FREE(gsec_pf, alloc_size);
+ RETURN(NULL);
+}
+
+static
+void gss_sec_destroy_pf(struct ptlrpc_sec *sec)
+{
+ struct gss_sec_pipefs *gsec_pf;
+ struct gss_sec *gsec;
+
+ CWARN("destroy %s@%p\n", sec->ps_policy->sp_name, sec);
+
+ gsec = container_of(sec, struct gss_sec, gs_base);
+ gsec_pf = container_of(gsec, struct gss_sec_pipefs, gsp_base);
+
+ LASSERT(gsec_pf->gsp_chash);
+ LASSERT(gsec_pf->gsp_chash_size);
+
+ gss_sec_pipe_upcall_fini(gsec);
+
+ gss_sec_destroy_common(gsec);
+
+ OBD_FREE(gsec, sizeof(*gsec_pf) +
+ sizeof(struct hlist_head) * gsec_pf->gsp_chash_size);
+}
+
+static
+struct ptlrpc_cli_ctx * gss_sec_lookup_ctx_pf(struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred,
+ int create, int remove_dead)
+{
+ struct gss_sec *gsec;
+ struct gss_sec_pipefs *gsec_pf;
+ struct ptlrpc_cli_ctx *ctx = NULL, *new = NULL;
+ struct hlist_head *hash_head;
+ struct hlist_node *next;
+ HLIST_HEAD(freelist);
+ unsigned int hash, gc = 0, found = 0;
+ ENTRY;
+
+ might_sleep();
+
+ gsec = container_of(sec, struct gss_sec, gs_base);
+ gsec_pf = container_of(gsec, struct gss_sec_pipefs, gsp_base);
+
+ hash = ctx_hash_index(gsec_pf->gsp_chash_size,
+ (__u64) vcred->vc_uid);
+ hash_head = &gsec_pf->gsp_chash[hash];
+ LASSERT(hash < gsec_pf->gsp_chash_size);
+
+retry:
+ spin_lock(&sec->ps_lock);
+
+ /* gc_next == 0 means never do gc */
+ if (remove_dead && sec->ps_gc_next &&
+ cfs_time_after(cfs_time_current_sec(), sec->ps_gc_next)) {
+ gss_ctx_cache_gc_pf(gsec_pf, &freelist);
+ gc = 1;
+ }
+
+ hlist_for_each_entry_safe(ctx, next, hash_head, cc_cache) {
+ if (gc == 0 &&
+ ctx_check_death_locked_pf(ctx,
+ remove_dead ? &freelist : NULL))
+ continue;
+
+ if (ctx_match_pf(ctx, vcred)) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found) {
+ if (new && new != ctx) {
+ /* lost the race, just free it */
+ hlist_add_head(&new->cc_cache, &freelist);
+ new = NULL;
+ }
+
+ /* hot node, move to head */
+ if (hash_head->first != &ctx->cc_cache) {
+ __hlist_del(&ctx->cc_cache);
+ hlist_add_head(&ctx->cc_cache, hash_head);
+ }
+ } else {
+ /* don't allocate for reverse sec */
+ if (sec_is_reverse(sec)) {
+ spin_unlock(&sec->ps_lock);
+ RETURN(NULL);
+ }
+
+ if (new) {
+ ctx_enhash_pf(new, hash_head);
+ ctx = new;
+ } else if (create) {
+ spin_unlock(&sec->ps_lock);
+ new = ctx_create_pf(sec, vcred);
+ if (new) {
+ clear_bit(PTLRPC_CTX_NEW_BIT, &new->cc_flags);
+ goto retry;
+ }
+ } else {
+ ctx = NULL;
+ }
+ }
+
+ /* hold a ref */
+ if (ctx)
+ atomic_inc(&ctx->cc_refcount);
+
+ spin_unlock(&sec->ps_lock);
+
+ /* the allocator of the context must give the first push to refresh */
+ if (new) {
+ LASSERT(new == ctx);
+ gss_cli_ctx_refresh_pf(new);
+ }
+
+ ctx_list_destroy_pf(&freelist);
+ RETURN(ctx);
+}
+
+static
+void gss_sec_release_ctx_pf(struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx,
+ int sync)
+{
+ LASSERT(test_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags) == 0);
+ LASSERT(hlist_unhashed(&ctx->cc_cache));
+
+ /* if required async, we must clear the UPTODATE bit to prevent extra
+ * rpcs during destroy procedure. */
+ if (!sync)
+ clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
+
+ /* destroy this context */
+ ctx_destroy_pf(sec, ctx);
+}
+
+/*
+ * @uid: which user. "-1" means flush all.
+ * @grace: mark context DEAD, allow graceful destroy like notify
+ * server side, etc.
+ * @force: also flush busy entries.
+ *
+ * return the number of busy context encountered.
+ *
+ * In any cases, never touch "eternal" contexts.
+ */
+static
+int gss_sec_flush_ctx_cache_pf(struct ptlrpc_sec *sec,
+ uid_t uid,
+ int grace, int force)
+{
+ struct gss_sec *gsec;
+ struct gss_sec_pipefs *gsec_pf;
+ struct ptlrpc_cli_ctx *ctx;
+ struct hlist_node *next;
+ HLIST_HEAD(freelist);
+ int i, busy = 0;
+ ENTRY;
+
+ might_sleep_if(grace);
+
+ gsec = container_of(sec, struct gss_sec, gs_base);
+ gsec_pf = container_of(gsec, struct gss_sec_pipefs, gsp_base);
+
+ spin_lock(&sec->ps_lock);
+ for (i = 0; i < gsec_pf->gsp_chash_size; i++) {
+ hlist_for_each_entry_safe(ctx, next,
+ &gsec_pf->gsp_chash[i],
+ cc_cache) {
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+ if (uid != -1 && uid != ctx->cc_vcred.vc_uid)
+ continue;
+
+ if (atomic_read(&ctx->cc_refcount) > 1) {
+ busy++;
+ if (!force)
+ continue;
+
+ CWARN("flush busy(%d) ctx %p(%u->%s) by force, "
+ "grace %d\n",
+ atomic_read(&ctx->cc_refcount),
+ ctx, ctx->cc_vcred.vc_uid,
+ sec2target_str(ctx->cc_sec), grace);
+ }
+ ctx_unhash_pf(ctx, &freelist);
+
+ set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags);
+ if (!grace)
+ clear_bit(PTLRPC_CTX_UPTODATE_BIT,
+ &ctx->cc_flags);
+ }
+ }
+ spin_unlock(&sec->ps_lock);
+
+ ctx_list_destroy_pf(&freelist);
+ RETURN(busy);
+}
+
+/****************************************
+ * service apis *
+ ****************************************/
+
+static
+int gss_svc_accept_pf(struct ptlrpc_request *req)
+{
+ return gss_svc_accept(&gss_policy_pipefs, req);
+}
+
+static
+int gss_svc_install_rctx_pf(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx)
+{
+ struct ptlrpc_sec *sec;
+ int rc;
+
+ sec = sptlrpc_import_sec_ref(imp);
+ LASSERT(sec);
+ rc = gss_install_rvs_cli_ctx_pf(sec2gsec(sec), ctx);
+
+ sptlrpc_sec_put(sec);
+ return rc;
+}
+
+/****************************************
+ * rpc_pipefs definitions *
+ ****************************************/
+
+#define LUSTRE_PIPE_ROOT "/lustre"
+#define LUSTRE_PIPE_KRB5 LUSTRE_PIPE_ROOT"/krb5"
+
+struct gss_upcall_msg_data {
+ __u32 gum_seq;
+ __u32 gum_uid;
+ __u32 gum_gid;
+ __u32 gum_svc; /* MDS/OSS... */
+ __u64 gum_nid; /* peer NID */
+ __u8 gum_obd[64]; /* client obd name */
+};
+
+struct gss_upcall_msg {
+ struct rpc_pipe_msg gum_base;
+ atomic_t gum_refcount;
+ struct list_head gum_list;
+ __u32 gum_mechidx;
+ struct gss_sec *gum_gsec;
+ struct gss_cli_ctx *gum_gctx;
+ struct gss_upcall_msg_data gum_data;
+};
+
+static atomic_t upcall_seq = ATOMIC_INIT(0);
+
+static inline
+__u32 upcall_get_sequence(void)
+{
+ return (__u32) atomic_inc_return(&upcall_seq);
+}
+
+enum mech_idx_t {
+ MECH_KRB5 = 0,
+ MECH_MAX
+};
+
+static inline
+__u32 mech_name2idx(const char *name)
+{
+ LASSERT(!strcmp(name, "krb5"));
+ return MECH_KRB5;
+}
+
+/* pipefs dentries for each mechanisms */
+static struct dentry *de_pipes[MECH_MAX] = { NULL, };
+/* all upcall messgaes linked here */
+static struct list_head upcall_lists[MECH_MAX];
+/* and protected by this */
+static spinlock_t upcall_locks[MECH_MAX];
+
+static inline
+void upcall_list_lock(int idx)
+{
+ spin_lock(&upcall_locks[idx]);
+}
+
+static inline
+void upcall_list_unlock(int idx)
+{
+ spin_unlock(&upcall_locks[idx]);
+}
+
+static
+void upcall_msg_enlist(struct gss_upcall_msg *msg)
+{
+ __u32 idx = msg->gum_mechidx;
+
+ upcall_list_lock(idx);
+ list_add(&msg->gum_list, &upcall_lists[idx]);
+ upcall_list_unlock(idx);
+}
+
+static
+void upcall_msg_delist(struct gss_upcall_msg *msg)
+{
+ __u32 idx = msg->gum_mechidx;
+
+ upcall_list_lock(idx);
+ list_del_init(&msg->gum_list);
+ upcall_list_unlock(idx);
+}
+
+/****************************************
+ * rpc_pipefs upcall helpers *
+ ****************************************/
+
+static
+void gss_release_msg(struct gss_upcall_msg *gmsg)
+{
+ ENTRY;
+ LASSERT(atomic_read(&gmsg->gum_refcount) > 0);
+
+ if (!atomic_dec_and_test(&gmsg->gum_refcount)) {
+ EXIT;
+ return;
+ }
+
+ if (gmsg->gum_gctx) {
+ sptlrpc_cli_ctx_wakeup(&gmsg->gum_gctx->gc_base);
+ sptlrpc_cli_ctx_put(&gmsg->gum_gctx->gc_base, 1);
+ gmsg->gum_gctx = NULL;
+ }
+
+ LASSERT(list_empty(&gmsg->gum_list));
+ LASSERT(list_empty(&gmsg->gum_base.list));
+ OBD_FREE_PTR(gmsg);
+ EXIT;
+}
+
+static
+void gss_unhash_msg_nolock(struct gss_upcall_msg *gmsg)
+{
+ __u32 idx = gmsg->gum_mechidx;
+
+ LASSERT(idx < MECH_MAX);
+ LASSERT(spin_is_locked(&upcall_locks[idx]));
+
+ if (list_empty(&gmsg->gum_list))
+ return;
+
+ list_del_init(&gmsg->gum_list);
+ LASSERT(atomic_read(&gmsg->gum_refcount) > 1);
+ atomic_dec(&gmsg->gum_refcount);
+}
+
+static
+void gss_unhash_msg(struct gss_upcall_msg *gmsg)
+{
+ __u32 idx = gmsg->gum_mechidx;
+
+ LASSERT(idx < MECH_MAX);
+ upcall_list_lock(idx);
+ gss_unhash_msg_nolock(gmsg);
+ upcall_list_unlock(idx);
+}
+
+static
+void gss_msg_fail_ctx(struct gss_upcall_msg *gmsg)
+{
+ if (gmsg->gum_gctx) {
+ struct ptlrpc_cli_ctx *ctx = &gmsg->gum_gctx->gc_base;
+
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+ sptlrpc_cli_ctx_expire(ctx);
+ set_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags);
+ }
+}
+
+static
+struct gss_upcall_msg * gss_find_upcall(__u32 mechidx, __u32 seq)
+{
+ struct gss_upcall_msg *gmsg;
+
+ upcall_list_lock(mechidx);
+ list_for_each_entry(gmsg, &upcall_lists[mechidx], gum_list) {
+ if (gmsg->gum_data.gum_seq != seq)
+ continue;
+
+ LASSERT(atomic_read(&gmsg->gum_refcount) > 0);
+ LASSERT(gmsg->gum_mechidx == mechidx);
+
+ atomic_inc(&gmsg->gum_refcount);
+ upcall_list_unlock(mechidx);
+ return gmsg;
+ }
+ upcall_list_unlock(mechidx);
+ return NULL;
+}
+
+static
+int simple_get_bytes(char **buf, __u32 *buflen, void *res, __u32 reslen)
+{
+ if (*buflen < reslen) {
+ CERROR("buflen %u < %u\n", *buflen, reslen);
+ return -EINVAL;
+ }
+
+ memcpy(res, *buf, reslen);
+ *buf += reslen;
+ *buflen -= reslen;
+ return 0;
+}
+
+/****************************************
+ * rpc_pipefs apis *
+ ****************************************/
+
+static
+ssize_t gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
+ char *dst, size_t buflen)
+{
+ char *data = (char *)msg->data + msg->copied;
+ ssize_t mlen = msg->len;
+ ssize_t left;
+ ENTRY;
+
+ if (mlen > buflen)
+ mlen = buflen;
+ left = copy_to_user(dst, data, mlen);
+ if (left < 0) {
+ msg->errno = left;
+ RETURN(left);
+ }
+ mlen -= left;
+ msg->copied += mlen;
+ msg->errno = 0;
+ RETURN(mlen);
+}
+
+static
+ssize_t gss_pipe_downcall(struct file *filp, const char *src, size_t mlen)
+{
+ struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+ struct gss_upcall_msg *gss_msg;
+ struct ptlrpc_cli_ctx *ctx;
+ struct gss_cli_ctx *gctx = NULL;
+ char *buf, *data;
+ int datalen;
+ int timeout, rc;
+ __u32 mechidx, seq, gss_err;
+ ENTRY;
+
+ mechidx = (__u32) (long) rpci->private;
+ LASSERT(mechidx < MECH_MAX);
+
+ OBD_ALLOC(buf, mlen);
+ if (!buf)
+ RETURN(-ENOMEM);
+
+ if (copy_from_user(buf, src, mlen)) {
+ CERROR("failed copy user space data\n");
+ GOTO(out_free, rc = -EFAULT);
+ }
+ data = buf;
+ datalen = mlen;
+
+ /* data passed down format:
+ * - seq
+ * - timeout
+ * - gc_win / error
+ * - wire_ctx (rawobj)
+ * - mech_ctx (rawobj)
+ */
+ if (simple_get_bytes(&data, &datalen, &seq, sizeof(seq))) {
+ CERROR("fail to get seq\n");
+ GOTO(out_free, rc = -EFAULT);
+ }
+
+ gss_msg = gss_find_upcall(mechidx, seq);
+ if (!gss_msg) {
+ CERROR("upcall %u has aborted earlier\n", seq);
+ GOTO(out_free, rc = -EINVAL);
+ }
+
+ gss_unhash_msg(gss_msg);
+ gctx = gss_msg->gum_gctx;
+ LASSERT(gctx);
+ LASSERT(atomic_read(&gctx->gc_base.cc_refcount) > 0);
+
+ /* timeout is not in use for now */
+ if (simple_get_bytes(&data, &datalen, &timeout, sizeof(timeout)))
+ GOTO(out_msg, rc = -EFAULT);
+
+ /* lgssd signal an error by gc_win == 0 */
+ if (simple_get_bytes(&data, &datalen, &gctx->gc_win,
+ sizeof(gctx->gc_win)))
+ GOTO(out_msg, rc = -EFAULT);
+
+ if (gctx->gc_win == 0) {
+ /* followed by:
+ * - rpc error
+ * - gss error
+ */
+ if (simple_get_bytes(&data, &datalen, &rc, sizeof(rc)))
+ GOTO(out_msg, rc = -EFAULT);
+ if (simple_get_bytes(&data, &datalen, &gss_err,sizeof(gss_err)))
+ GOTO(out_msg, rc = -EFAULT);
+
+ if (rc == 0 && gss_err == GSS_S_COMPLETE) {
+ CWARN("both rpc & gss error code not set\n");
+ rc = -EPERM;
+ }
+ } else {
+ rawobj_t tmpobj;
+
+ /* handle */
+ if (rawobj_extract_local(&tmpobj, (__u32 **) &data, &datalen))
+ GOTO(out_msg, rc = -EFAULT);
+ if (rawobj_dup(&gctx->gc_handle, &tmpobj))
+ GOTO(out_msg, rc = -ENOMEM);
+
+ /* mechctx */
+ if (rawobj_extract_local(&tmpobj, (__u32 **) &data, &datalen))
+ GOTO(out_msg, rc = -EFAULT);
+ gss_err = lgss_import_sec_context(&tmpobj,
+ gss_msg->gum_gsec->gs_mech,
+ &gctx->gc_mechctx);
+ rc = 0;
+ }
+
+ if (likely(rc == 0 && gss_err == GSS_S_COMPLETE)) {
+ gss_cli_ctx_uptodate(gctx);
+ } else {
+ ctx = &gctx->gc_base;
+ sptlrpc_cli_ctx_expire(ctx);
+ if (rc != -ERESTART || gss_err != GSS_S_COMPLETE)
+ set_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags);
+
+ CERROR("refresh ctx %p(uid %d) failed: %d/0x%08x: %s\n",
+ ctx, ctx->cc_vcred.vc_uid, rc, gss_err,
+ test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags) ?
+ "fatal error" : "non-fatal");
+ }
+
+ rc = mlen;
+
+out_msg:
+ gss_release_msg(gss_msg);
+
+out_free:
+ OBD_FREE(buf, mlen);
+ /* FIXME
+ * hack pipefs: always return asked length unless all following
+ * downcalls might be messed up. */
+ rc = mlen;
+ RETURN(rc);
+}
+
+static
+void gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+{
+ struct gss_upcall_msg *gmsg;
+ struct gss_upcall_msg_data *gumd;
+ static cfs_time_t ratelimit = 0;
+ ENTRY;
+
+ LASSERT(list_empty(&msg->list));
+
+ /* normally errno is >= 0 */
+ if (msg->errno >= 0) {
+ EXIT;
+ return;
+ }
+
+ gmsg = container_of(msg, struct gss_upcall_msg, gum_base);
+ gumd = &gmsg->gum_data;
+ LASSERT(atomic_read(&gmsg->gum_refcount) > 0);
+
+ CERROR("failed msg %p (seq %u, uid %u, svc %u, nid "LPX64", obd %.*s): "
+ "errno %d\n", msg, gumd->gum_seq, gumd->gum_uid, gumd->gum_svc,
+ gumd->gum_nid, (int) sizeof(gumd->gum_obd),
+ gumd->gum_obd, msg->errno);
+
+ atomic_inc(&gmsg->gum_refcount);
+ gss_unhash_msg(gmsg);
+ if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) {
+ cfs_time_t now = cfs_time_current_sec();
+
+ if (cfs_time_after(now, ratelimit)) {
+ CWARN("upcall timed out, is lgssd running?\n");
+ ratelimit = now + 15;
+ }
+ }
+ gss_msg_fail_ctx(gmsg);
+ gss_release_msg(gmsg);
+ EXIT;
+}
+
+static
+void gss_pipe_release(struct inode *inode)
+{
+ struct rpc_inode *rpci = RPC_I(inode);
+ __u32 idx;
+ ENTRY;
+
+ idx = (__u32) (long) rpci->private;
+ LASSERT(idx < MECH_MAX);
+
+ upcall_list_lock(idx);
+ while (!list_empty(&upcall_lists[idx])) {
+ struct gss_upcall_msg *gmsg;
+ struct gss_upcall_msg_data *gumd;
+
+ gmsg = list_entry(upcall_lists[idx].next,
+ struct gss_upcall_msg, gum_list);
+ gumd = &gmsg->gum_data;
+ LASSERT(list_empty(&gmsg->gum_base.list));
+
+ CERROR("failing remaining msg %p:seq %u, uid %u, svc %u, "
+ "nid "LPX64", obd %.*s\n", gmsg,
+ gumd->gum_seq, gumd->gum_uid, gumd->gum_svc,
+ gumd->gum_nid, (int) sizeof(gumd->gum_obd),
+ gumd->gum_obd);
+
+ gmsg->gum_base.errno = -EPIPE;
+ atomic_inc(&gmsg->gum_refcount);
+ gss_unhash_msg_nolock(gmsg);
+
+ gss_msg_fail_ctx(gmsg);
+
+ upcall_list_unlock(idx);
+ gss_release_msg(gmsg);
+ upcall_list_lock(idx);
+ }
+ upcall_list_unlock(idx);
+ EXIT;
+}
+
+static struct rpc_pipe_ops gss_upcall_ops = {
+ .upcall = gss_pipe_upcall,
+ .downcall = gss_pipe_downcall,
+ .destroy_msg = gss_pipe_destroy_msg,
+ .release_pipe = gss_pipe_release,
+};
+
+/****************************************
+ * upcall helper functions *
+ ****************************************/
+
+static
+int gss_ctx_refresh_pf(struct ptlrpc_cli_ctx *ctx)
+{
+ struct obd_import *imp;
+ struct gss_sec *gsec;
+ struct gss_upcall_msg *gmsg;
+ int rc = 0;
+ ENTRY;
+
+ might_sleep();
+
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_sec->ps_import);
+ LASSERT(ctx->cc_sec->ps_import->imp_obd);
+
+ imp = ctx->cc_sec->ps_import;
+ if (!imp->imp_connection) {
+ CERROR("import has no connection set\n");
+ RETURN(-EINVAL);
+ }
+
+ gsec = container_of(ctx->cc_sec, struct gss_sec, gs_base);
+
+ OBD_ALLOC_PTR(gmsg);
+ if (!gmsg)
+ RETURN(-ENOMEM);
+
+ /* initialize pipefs base msg */
+ INIT_LIST_HEAD(&gmsg->gum_base.list);
+ gmsg->gum_base.data = &gmsg->gum_data;
+ gmsg->gum_base.len = sizeof(gmsg->gum_data);
+ gmsg->gum_base.copied = 0;
+ gmsg->gum_base.errno = 0;
+
+ /* init upcall msg */
+ atomic_set(&gmsg->gum_refcount, 1);
+ gmsg->gum_mechidx = mech_name2idx(gsec->gs_mech->gm_name);
+ gmsg->gum_gsec = gsec;
+ gmsg->gum_gctx = container_of(sptlrpc_cli_ctx_get(ctx),
+ struct gss_cli_ctx, gc_base);
+ gmsg->gum_data.gum_seq = upcall_get_sequence();
+ gmsg->gum_data.gum_uid = ctx->cc_vcred.vc_uid;
+ gmsg->gum_data.gum_gid = 0; /* not used for now */
+ gmsg->gum_data.gum_svc = import_to_gss_svc(imp);
+ gmsg->gum_data.gum_nid = imp->imp_connection->c_peer.nid;
+ strncpy(gmsg->gum_data.gum_obd, imp->imp_obd->obd_name,
+ sizeof(gmsg->gum_data.gum_obd));
+
+ /* This only could happen when sysadmin set it dead/expired
+ * using lctl by force. */
+ if (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK) {
+ CWARN("ctx %p(%u->%s) was set flags %lx unexpectedly\n",
+ ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec),
+ ctx->cc_flags);
+
+ LASSERT(!(ctx->cc_flags & PTLRPC_CTX_UPTODATE));
+ ctx->cc_flags |= PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR;
+
+ rc = -EIO;
+ goto err_free;
+ }
+
+ upcall_msg_enlist(gmsg);
+
+ rc = rpc_queue_upcall(de_pipes[gmsg->gum_mechidx]->d_inode,
+ &gmsg->gum_base);
+ if (rc) {
+ CERROR("rpc_queue_upcall failed: %d\n", rc);
+
+ upcall_msg_delist(gmsg);
+ goto err_free;
+ }
+
+ RETURN(0);
+err_free:
+ OBD_FREE_PTR(gmsg);
+ RETURN(rc);
+}
+
+static
+int gss_cli_ctx_refresh_pf(struct ptlrpc_cli_ctx *ctx)
+{
+ /* if we are refreshing for root, also update the reverse
+ * handle index, do not confuse reverse contexts. */
+ if (ctx->cc_vcred.vc_uid == 0) {
+ struct gss_sec *gsec;
+
+ gsec = container_of(ctx->cc_sec, struct gss_sec, gs_base);
+ gsec->gs_rvs_hdl = gss_get_next_ctx_index();
+ }
+
+ return gss_ctx_refresh_pf(ctx);
+}
+
+/****************************************
+ * lustre gss pipefs policy *
+ ****************************************/
+
+static struct ptlrpc_ctx_ops gss_pipefs_ctxops = {
+ .match = gss_cli_ctx_match,
+ .refresh = gss_cli_ctx_refresh_pf,
+ .validate = gss_cli_ctx_validate_pf,
+ .die = gss_cli_ctx_die_pf,
+ .sign = gss_cli_ctx_sign,
+ .verify = gss_cli_ctx_verify,
+ .seal = gss_cli_ctx_seal,
+ .unseal = gss_cli_ctx_unseal,
+ .wrap_bulk = gss_cli_ctx_wrap_bulk,
+ .unwrap_bulk = gss_cli_ctx_unwrap_bulk,
+};
+
+static struct ptlrpc_sec_cops gss_sec_pipefs_cops = {
+ .create_sec = gss_sec_create_pf,
+ .destroy_sec = gss_sec_destroy_pf,
+ .kill_sec = gss_sec_kill,
+ .lookup_ctx = gss_sec_lookup_ctx_pf,
+ .release_ctx = gss_sec_release_ctx_pf,
+ .flush_ctx_cache = gss_sec_flush_ctx_cache_pf,
+ .install_rctx = gss_sec_install_rctx,
+ .alloc_reqbuf = gss_alloc_reqbuf,
+ .free_reqbuf = gss_free_reqbuf,
+ .alloc_repbuf = gss_alloc_repbuf,
+ .free_repbuf = gss_free_repbuf,
+ .enlarge_reqbuf = gss_enlarge_reqbuf,
+};
+
+static struct ptlrpc_sec_sops gss_sec_pipefs_sops = {
+ .accept = gss_svc_accept_pf,
+ .invalidate_ctx = gss_svc_invalidate_ctx,
+ .alloc_rs = gss_svc_alloc_rs,
+ .authorize = gss_svc_authorize,
+ .free_rs = gss_svc_free_rs,
+ .free_ctx = gss_svc_free_ctx,
+ .unwrap_bulk = gss_svc_unwrap_bulk,
+ .wrap_bulk = gss_svc_wrap_bulk,
+ .install_rctx = gss_svc_install_rctx_pf,
+};
+
+static struct ptlrpc_sec_policy gss_policy_pipefs = {
+ .sp_owner = THIS_MODULE,
+ .sp_name = "gss.pipefs",
+ .sp_policy = SPTLRPC_POLICY_GSS_PIPEFS,
+ .sp_cops = &gss_sec_pipefs_cops,
+ .sp_sops = &gss_sec_pipefs_sops,
+};
+
+static
+int __init gss_init_pipefs_upcall(void)
+{
+ struct dentry *de;
+
+ /* pipe dir */
+ de = rpc_mkdir(LUSTRE_PIPE_ROOT, NULL);
+ if (IS_ERR(de) && PTR_ERR(de) != -EEXIST) {
+ CERROR("Failed to create gss pipe dir: %ld\n", PTR_ERR(de));
+ return PTR_ERR(de);
+ }
+
+ /* FIXME hack pipefs: dput will sometimes cause oops during module
+ * unload and lgssd close the pipe fds. */
+
+ /* krb5 mechanism */
+ de = rpc_mkpipe(LUSTRE_PIPE_KRB5, (void *) MECH_KRB5, &gss_upcall_ops,
+ RPC_PIPE_WAIT_FOR_OPEN);
+ if (!de || IS_ERR(de)) {
+ CERROR("failed to make rpc_pipe %s: %ld\n",
+ LUSTRE_PIPE_KRB5, PTR_ERR(de));
+ rpc_rmdir(LUSTRE_PIPE_ROOT);
+ return PTR_ERR(de);
+ }
+
+ de_pipes[MECH_KRB5] = de;
+ INIT_LIST_HEAD(&upcall_lists[MECH_KRB5]);
+ spin_lock_init(&upcall_locks[MECH_KRB5]);
+
+ return 0;
+}
+
+static
+void __exit gss_exit_pipefs_upcall(void)
+{
+ __u32 i;
+
+ for (i = 0; i < MECH_MAX; i++) {
+ LASSERT(list_empty(&upcall_lists[i]));
+
+ /* dput pipe dentry here might cause lgssd oops. */
+ de_pipes[i] = NULL;
+ }
+
+ rpc_unlink(LUSTRE_PIPE_KRB5);
+ rpc_rmdir(LUSTRE_PIPE_ROOT);
+}
+
+int __init gss_init_pipefs(void)
+{
+ int rc;
+
+ rc = gss_init_pipefs_upcall();
+ if (rc)
+ return rc;
+
+ rc = sptlrpc_register_policy(&gss_policy_pipefs);
+ if (rc) {
+ gss_exit_pipefs_upcall();
+ return rc;
+ }
+
+ return 0;
+}
+
+void __exit gss_exit_pipefs(void)
+{
+ gss_exit_pipefs_upcall();
+ sptlrpc_unregister_policy(&gss_policy_pipefs);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_rawobj.c b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_rawobj.c
new file mode 100644
index 000000000000..474ecf805307
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_rawobj.c
@@ -0,0 +1,242 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/gss/gss_rawobj.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <linux/mutex.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_sec.h>
+
+#include "gss_internal.h"
+
+int rawobj_empty(rawobj_t *obj)
+{
+ LASSERT(equi(obj->len, obj->data));
+ return (obj->len == 0);
+}
+
+int rawobj_alloc(rawobj_t *obj, char *buf, int len)
+{
+ LASSERT(obj);
+ LASSERT(len >= 0);
+
+ obj->len = len;
+ if (len) {
+ OBD_ALLOC_LARGE(obj->data, len);
+ if (!obj->data) {
+ obj->len = 0;
+ RETURN(-ENOMEM);
+ }
+ memcpy(obj->data, buf, len);
+ } else
+ obj->data = NULL;
+ return 0;
+}
+
+void rawobj_free(rawobj_t *obj)
+{
+ LASSERT(obj);
+
+ if (obj->len) {
+ LASSERT(obj->data);
+ OBD_FREE_LARGE(obj->data, obj->len);
+ obj->len = 0;
+ obj->data = NULL;
+ } else
+ LASSERT(!obj->data);
+}
+
+int rawobj_equal(rawobj_t *a, rawobj_t *b)
+{
+ LASSERT(a && b);
+
+ return (a->len == b->len &&
+ (!a->len || !memcmp(a->data, b->data, a->len)));
+}
+
+int rawobj_dup(rawobj_t *dest, rawobj_t *src)
+{
+ LASSERT(src && dest);
+
+ dest->len = src->len;
+ if (dest->len) {
+ OBD_ALLOC_LARGE(dest->data, dest->len);
+ if (!dest->data) {
+ dest->len = 0;
+ return -ENOMEM;
+ }
+ memcpy(dest->data, src->data, dest->len);
+ } else
+ dest->data = NULL;
+ return 0;
+}
+
+int rawobj_serialize(rawobj_t *obj, __u32 **buf, __u32 *buflen)
+{
+ __u32 len;
+
+ LASSERT(obj);
+ LASSERT(buf);
+ LASSERT(buflen);
+
+ len = cfs_size_round4(obj->len);
+
+ if (*buflen < 4 + len) {
+ CERROR("buflen %u < %u\n", *buflen, 4 + len);
+ return -EINVAL;
+ }
+
+ *(*buf)++ = cpu_to_le32(obj->len);
+ memcpy(*buf, obj->data, obj->len);
+ *buf += (len >> 2);
+ *buflen -= (4 + len);
+
+ return 0;
+}
+
+static int __rawobj_extract(rawobj_t *obj, __u32 **buf, __u32 *buflen,
+ int alloc, int local)
+{
+ __u32 len;
+
+ if (*buflen < sizeof(__u32)) {
+ CERROR("buflen %u\n", *buflen);
+ return -EINVAL;
+ }
+
+ obj->len = *(*buf)++;
+ if (!local)
+ obj->len = le32_to_cpu(obj->len);
+ *buflen -= sizeof(__u32);
+
+ if (!obj->len) {
+ obj->data = NULL;
+ return 0;
+ }
+
+ len = local ? obj->len : cfs_size_round4(obj->len);
+ if (*buflen < len) {
+ CERROR("buflen %u < %u\n", *buflen, len);
+ obj->len = 0;
+ return -EINVAL;
+ }
+
+ if (!alloc)
+ obj->data = (__u8 *) *buf;
+ else {
+ OBD_ALLOC_LARGE(obj->data, obj->len);
+ if (!obj->data) {
+ CERROR("fail to alloc %u bytes\n", obj->len);
+ obj->len = 0;
+ return -ENOMEM;
+ }
+ memcpy(obj->data, *buf, obj->len);
+ }
+
+ *((char **)buf) += len;
+ *buflen -= len;
+
+ return 0;
+}
+
+int rawobj_extract(rawobj_t *obj, __u32 **buf, __u32 *buflen)
+{
+ return __rawobj_extract(obj, buf, buflen, 0, 0);
+}
+
+int rawobj_extract_alloc(rawobj_t *obj, __u32 **buf, __u32 *buflen)
+{
+ return __rawobj_extract(obj, buf, buflen, 1, 0);
+}
+
+int rawobj_extract_local(rawobj_t *obj, __u32 **buf, __u32 *buflen)
+{
+ return __rawobj_extract(obj, buf, buflen, 0, 1);
+}
+
+int rawobj_extract_local_alloc(rawobj_t *obj, __u32 **buf, __u32 *buflen)
+{
+ return __rawobj_extract(obj, buf, buflen, 1, 1);
+}
+
+int rawobj_from_netobj(rawobj_t *rawobj, netobj_t *netobj)
+{
+ rawobj->len = netobj->len;
+ rawobj->data = netobj->data;
+ return 0;
+}
+
+int rawobj_from_netobj_alloc(rawobj_t *rawobj, netobj_t *netobj)
+{
+ rawobj->len = 0;
+ rawobj->data = NULL;
+
+ if (netobj->len == 0)
+ return 0;
+
+ OBD_ALLOC_LARGE(rawobj->data, netobj->len);
+ if (rawobj->data == NULL)
+ return -ENOMEM;
+
+ rawobj->len = netobj->len;
+ memcpy(rawobj->data, netobj->data, netobj->len);
+ return 0;
+}
+
+/****************************************
+ * misc more *
+ ****************************************/
+
+int buffer_extract_bytes(const void **buf, __u32 *buflen,
+ void *res, __u32 reslen)
+{
+ if (*buflen < reslen) {
+ CERROR("buflen %u < %u\n", *buflen, reslen);
+ return -EINVAL;
+ }
+
+ memcpy(res, *buf, reslen);
+ *buf += reslen;
+ *buflen -= reslen;
+ return 0;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/gss_svc_upcall.c b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_svc_upcall.c
new file mode 100644
index 000000000000..31b50ea19c25
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/gss_svc_upcall.c
@@ -0,0 +1,1099 @@
+/*
+ * Modifications for Lustre
+ *
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+/*
+ * Neil Brown <neilb@cse.unsw.edu.au>
+ * J. Bruce Fields <bfields@umich.edu>
+ * Andy Adamson <andros@umich.edu>
+ * Dug Song <dugsong@monkey.org>
+ *
+ * RPCSEC_GSS server authentication.
+ * This implements RPCSEC_GSS as defined in rfc2203 (rpcsec_gss) and rfc2078
+ * (gssapi)
+ *
+ * The RPCSEC_GSS involves three stages:
+ * 1/ context creation
+ * 2/ data exchange
+ * 3/ context destruction
+ *
+ * Context creation is handled largely by upcalls to user-space.
+ * In particular, GSS_Accept_sec_context is handled by an upcall
+ * Data exchange is handled entirely within the kernel
+ * In particular, GSS_GetMIC, GSS_VerifyMIC, GSS_Seal, GSS_Unseal are in-kernel.
+ * Context destruction is handled in-kernel
+ * GSS_Delete_sec_context is in-kernel
+ *
+ * Context creation is initiated by a RPCSEC_GSS_INIT request arriving.
+ * The context handle and gss_token are used as a key into the rpcsec_init cache.
+ * The content of this cache includes some of the outputs of GSS_Accept_sec_context,
+ * being major_status, minor_status, context_handle, reply_token.
+ * These are sent back to the client.
+ * Sequence window management is handled by the kernel. The window size if currently
+ * a compile time constant.
+ *
+ * When user-space is happy that a context is established, it places an entry
+ * in the rpcsec_context cache. The key for this cache is the context_handle.
+ * The content includes:
+ * uid/gidlist - for determining access rights
+ * mechanism type
+ * mechanism specific information, such as a key
+ *
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+#include <linux/mutex.h>
+#include <linux/sunrpc/cache.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_sec.h>
+
+#include "gss_err.h"
+#include "gss_internal.h"
+#include "gss_api.h"
+
+#define GSS_SVC_UPCALL_TIMEOUT (20)
+
+static spinlock_t __ctx_index_lock;
+static __u64 __ctx_index;
+
+__u64 gss_get_next_ctx_index(void)
+{
+ __u64 idx;
+
+ spin_lock(&__ctx_index_lock);
+ idx = __ctx_index++;
+ spin_unlock(&__ctx_index_lock);
+
+ return idx;
+}
+
+static inline unsigned long hash_mem(char *buf, int length, int bits)
+{
+ unsigned long hash = 0;
+ unsigned long l = 0;
+ int len = 0;
+ unsigned char c;
+
+ do {
+ if (len == length) {
+ c = (char) len;
+ len = -1;
+ } else
+ c = *buf++;
+
+ l = (l << 8) | c;
+ len++;
+
+ if ((len & (BITS_PER_LONG/8-1)) == 0)
+ hash = cfs_hash_long(hash^l, BITS_PER_LONG);
+ } while (len);
+
+ return hash >> (BITS_PER_LONG - bits);
+}
+
+/****************************************
+ * rsi cache *
+ ****************************************/
+
+#define RSI_HASHBITS (6)
+#define RSI_HASHMAX (1 << RSI_HASHBITS)
+#define RSI_HASHMASK (RSI_HASHMAX - 1)
+
+struct rsi {
+ struct cache_head h;
+ __u32 lustre_svc;
+ __u64 nid;
+ wait_queue_head_t waitq;
+ rawobj_t in_handle, in_token;
+ rawobj_t out_handle, out_token;
+ int major_status, minor_status;
+};
+
+static struct cache_head *rsi_table[RSI_HASHMAX];
+static struct cache_detail rsi_cache;
+static struct rsi *rsi_update(struct rsi *new, struct rsi *old);
+static struct rsi *rsi_lookup(struct rsi *item);
+
+static inline int rsi_hash(struct rsi *item)
+{
+ return hash_mem((char *)item->in_handle.data, item->in_handle.len,
+ RSI_HASHBITS) ^
+ hash_mem((char *)item->in_token.data, item->in_token.len,
+ RSI_HASHBITS);
+}
+
+static inline int __rsi_match(struct rsi *item, struct rsi *tmp)
+{
+ return (rawobj_equal(&item->in_handle, &tmp->in_handle) &&
+ rawobj_equal(&item->in_token, &tmp->in_token));
+}
+
+static void rsi_free(struct rsi *rsi)
+{
+ rawobj_free(&rsi->in_handle);
+ rawobj_free(&rsi->in_token);
+ rawobj_free(&rsi->out_handle);
+ rawobj_free(&rsi->out_token);
+}
+
+static void rsi_request(struct cache_detail *cd,
+ struct cache_head *h,
+ char **bpp, int *blen)
+{
+ struct rsi *rsi = container_of(h, struct rsi, h);
+ __u64 index = 0;
+
+ /* if in_handle is null, provide kernel suggestion */
+ if (rsi->in_handle.len == 0)
+ index = gss_get_next_ctx_index();
+
+ qword_addhex(bpp, blen, (char *) &rsi->lustre_svc,
+ sizeof(rsi->lustre_svc));
+ qword_addhex(bpp, blen, (char *) &rsi->nid, sizeof(rsi->nid));
+ qword_addhex(bpp, blen, (char *) &index, sizeof(index));
+ qword_addhex(bpp, blen, rsi->in_handle.data, rsi->in_handle.len);
+ qword_addhex(bpp, blen, rsi->in_token.data, rsi->in_token.len);
+ (*bpp)[-1] = '\n';
+}
+
+static int rsi_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+ return sunrpc_cache_pipe_upcall(cd, h, rsi_request);
+}
+
+static inline void __rsi_init(struct rsi *new, struct rsi *item)
+{
+ new->out_handle = RAWOBJ_EMPTY;
+ new->out_token = RAWOBJ_EMPTY;
+
+ new->in_handle = item->in_handle;
+ item->in_handle = RAWOBJ_EMPTY;
+ new->in_token = item->in_token;
+ item->in_token = RAWOBJ_EMPTY;
+
+ new->lustre_svc = item->lustre_svc;
+ new->nid = item->nid;
+ init_waitqueue_head(&new->waitq);
+}
+
+static inline void __rsi_update(struct rsi *new, struct rsi *item)
+{
+ LASSERT(new->out_handle.len == 0);
+ LASSERT(new->out_token.len == 0);
+
+ new->out_handle = item->out_handle;
+ item->out_handle = RAWOBJ_EMPTY;
+ new->out_token = item->out_token;
+ item->out_token = RAWOBJ_EMPTY;
+
+ new->major_status = item->major_status;
+ new->minor_status = item->minor_status;
+}
+
+static void rsi_put(struct kref *ref)
+{
+ struct rsi *rsi = container_of(ref, struct rsi, h.ref);
+
+ LASSERT(rsi->h.next == NULL);
+ rsi_free(rsi);
+ OBD_FREE_PTR(rsi);
+}
+
+static int rsi_match(struct cache_head *a, struct cache_head *b)
+{
+ struct rsi *item = container_of(a, struct rsi, h);
+ struct rsi *tmp = container_of(b, struct rsi, h);
+
+ return __rsi_match(item, tmp);
+}
+
+static void rsi_init(struct cache_head *cnew, struct cache_head *citem)
+{
+ struct rsi *new = container_of(cnew, struct rsi, h);
+ struct rsi *item = container_of(citem, struct rsi, h);
+
+ __rsi_init(new, item);
+}
+
+static void update_rsi(struct cache_head *cnew, struct cache_head *citem)
+{
+ struct rsi *new = container_of(cnew, struct rsi, h);
+ struct rsi *item = container_of(citem, struct rsi, h);
+
+ __rsi_update(new, item);
+}
+
+static struct cache_head *rsi_alloc(void)
+{
+ struct rsi *rsi;
+
+ OBD_ALLOC_PTR(rsi);
+ if (rsi)
+ return &rsi->h;
+ else
+ return NULL;
+}
+
+static int rsi_parse(struct cache_detail *cd, char *mesg, int mlen)
+{
+ char *buf = mesg;
+ char *ep;
+ int len;
+ struct rsi rsii, *rsip = NULL;
+ time_t expiry;
+ int status = -EINVAL;
+ ENTRY;
+
+
+ memset(&rsii, 0, sizeof(rsii));
+
+ /* handle */
+ len = qword_get(&mesg, buf, mlen);
+ if (len < 0)
+ goto out;
+ if (rawobj_alloc(&rsii.in_handle, buf, len)) {
+ status = -ENOMEM;
+ goto out;
+ }
+
+ /* token */
+ len = qword_get(&mesg, buf, mlen);
+ if (len < 0)
+ goto out;
+ if (rawobj_alloc(&rsii.in_token, buf, len)) {
+ status = -ENOMEM;
+ goto out;
+ }
+
+ rsip = rsi_lookup(&rsii);
+ if (!rsip)
+ goto out;
+
+ rsii.h.flags = 0;
+ /* expiry */
+ expiry = get_expiry(&mesg);
+ if (expiry == 0)
+ goto out;
+
+ len = qword_get(&mesg, buf, mlen);
+ if (len <= 0)
+ goto out;
+
+ /* major */
+ rsii.major_status = simple_strtol(buf, &ep, 10);
+ if (*ep)
+ goto out;
+
+ /* minor */
+ len = qword_get(&mesg, buf, mlen);
+ if (len <= 0)
+ goto out;
+ rsii.minor_status = simple_strtol(buf, &ep, 10);
+ if (*ep)
+ goto out;
+
+ /* out_handle */
+ len = qword_get(&mesg, buf, mlen);
+ if (len < 0)
+ goto out;
+ if (rawobj_alloc(&rsii.out_handle, buf, len)) {
+ status = -ENOMEM;
+ goto out;
+ }
+
+ /* out_token */
+ len = qword_get(&mesg, buf, mlen);
+ if (len < 0)
+ goto out;
+ if (rawobj_alloc(&rsii.out_token, buf, len)) {
+ status = -ENOMEM;
+ goto out;
+ }
+
+ rsii.h.expiry_time = expiry;
+ rsip = rsi_update(&rsii, rsip);
+ status = 0;
+out:
+ rsi_free(&rsii);
+ if (rsip) {
+ wake_up_all(&rsip->waitq);
+ cache_put(&rsip->h, &rsi_cache);
+ } else {
+ status = -ENOMEM;
+ }
+
+ if (status)
+ CERROR("rsi parse error %d\n", status);
+ RETURN(status);
+}
+
+static struct cache_detail rsi_cache = {
+ .hash_size = RSI_HASHMAX,
+ .hash_table = rsi_table,
+ .name = "auth.sptlrpc.init",
+ .cache_put = rsi_put,
+ .cache_upcall = rsi_upcall,
+ .cache_parse = rsi_parse,
+ .match = rsi_match,
+ .init = rsi_init,
+ .update = update_rsi,
+ .alloc = rsi_alloc,
+};
+
+static struct rsi *rsi_lookup(struct rsi *item)
+{
+ struct cache_head *ch;
+ int hash = rsi_hash(item);
+
+ ch = sunrpc_cache_lookup(&rsi_cache, &item->h, hash);
+ if (ch)
+ return container_of(ch, struct rsi, h);
+ else
+ return NULL;
+}
+
+static struct rsi *rsi_update(struct rsi *new, struct rsi *old)
+{
+ struct cache_head *ch;
+ int hash = rsi_hash(new);
+
+ ch = sunrpc_cache_update(&rsi_cache, &new->h, &old->h, hash);
+ if (ch)
+ return container_of(ch, struct rsi, h);
+ else
+ return NULL;
+}
+
+/****************************************
+ * rsc cache *
+ ****************************************/
+
+#define RSC_HASHBITS (10)
+#define RSC_HASHMAX (1 << RSC_HASHBITS)
+#define RSC_HASHMASK (RSC_HASHMAX - 1)
+
+struct rsc {
+ struct cache_head h;
+ struct obd_device *target;
+ rawobj_t handle;
+ struct gss_svc_ctx ctx;
+};
+
+static struct cache_head *rsc_table[RSC_HASHMAX];
+static struct cache_detail rsc_cache;
+static struct rsc *rsc_update(struct rsc *new, struct rsc *old);
+static struct rsc *rsc_lookup(struct rsc *item);
+
+static void rsc_free(struct rsc *rsci)
+{
+ rawobj_free(&rsci->handle);
+ rawobj_free(&rsci->ctx.gsc_rvs_hdl);
+ lgss_delete_sec_context(&rsci->ctx.gsc_mechctx);
+}
+
+static inline int rsc_hash(struct rsc *rsci)
+{
+ return hash_mem((char *)rsci->handle.data,
+ rsci->handle.len, RSC_HASHBITS);
+}
+
+static inline int __rsc_match(struct rsc *new, struct rsc *tmp)
+{
+ return rawobj_equal(&new->handle, &tmp->handle);
+}
+
+static inline void __rsc_init(struct rsc *new, struct rsc *tmp)
+{
+ new->handle = tmp->handle;
+ tmp->handle = RAWOBJ_EMPTY;
+
+ new->target = NULL;
+ memset(&new->ctx, 0, sizeof(new->ctx));
+ new->ctx.gsc_rvs_hdl = RAWOBJ_EMPTY;
+}
+
+static inline void __rsc_update(struct rsc *new, struct rsc *tmp)
+{
+ new->ctx = tmp->ctx;
+ tmp->ctx.gsc_rvs_hdl = RAWOBJ_EMPTY;
+ tmp->ctx.gsc_mechctx = NULL;
+
+ memset(&new->ctx.gsc_seqdata, 0, sizeof(new->ctx.gsc_seqdata));
+ spin_lock_init(&new->ctx.gsc_seqdata.ssd_lock);
+}
+
+static void rsc_put(struct kref *ref)
+{
+ struct rsc *rsci = container_of(ref, struct rsc, h.ref);
+
+ LASSERT(rsci->h.next == NULL);
+ rsc_free(rsci);
+ OBD_FREE_PTR(rsci);
+}
+
+static int rsc_match(struct cache_head *a, struct cache_head *b)
+{
+ struct rsc *new = container_of(a, struct rsc, h);
+ struct rsc *tmp = container_of(b, struct rsc, h);
+
+ return __rsc_match(new, tmp);
+}
+
+static void rsc_init(struct cache_head *cnew, struct cache_head *ctmp)
+{
+ struct rsc *new = container_of(cnew, struct rsc, h);
+ struct rsc *tmp = container_of(ctmp, struct rsc, h);
+
+ __rsc_init(new, tmp);
+}
+
+static void update_rsc(struct cache_head *cnew, struct cache_head *ctmp)
+{
+ struct rsc *new = container_of(cnew, struct rsc, h);
+ struct rsc *tmp = container_of(ctmp, struct rsc, h);
+
+ __rsc_update(new, tmp);
+}
+
+static struct cache_head * rsc_alloc(void)
+{
+ struct rsc *rsc;
+
+ OBD_ALLOC_PTR(rsc);
+ if (rsc)
+ return &rsc->h;
+ else
+ return NULL;
+}
+
+static int rsc_parse(struct cache_detail *cd, char *mesg, int mlen)
+{
+ char *buf = mesg;
+ int len, rv, tmp_int;
+ struct rsc rsci, *rscp = NULL;
+ time_t expiry;
+ int status = -EINVAL;
+ struct gss_api_mech *gm = NULL;
+
+ memset(&rsci, 0, sizeof(rsci));
+
+ /* context handle */
+ len = qword_get(&mesg, buf, mlen);
+ if (len < 0) goto out;
+ status = -ENOMEM;
+ if (rawobj_alloc(&rsci.handle, buf, len))
+ goto out;
+
+ rsci.h.flags = 0;
+ /* expiry */
+ expiry = get_expiry(&mesg);
+ status = -EINVAL;
+ if (expiry == 0)
+ goto out;
+
+ /* remote flag */
+ rv = get_int(&mesg, &tmp_int);
+ if (rv) {
+ CERROR("fail to get remote flag\n");
+ goto out;
+ }
+ rsci.ctx.gsc_remote = (tmp_int != 0);
+
+ /* root user flag */
+ rv = get_int(&mesg, &tmp_int);
+ if (rv) {
+ CERROR("fail to get oss user flag\n");
+ goto out;
+ }
+ rsci.ctx.gsc_usr_root = (tmp_int != 0);
+
+ /* mds user flag */
+ rv = get_int(&mesg, &tmp_int);
+ if (rv) {
+ CERROR("fail to get mds user flag\n");
+ goto out;
+ }
+ rsci.ctx.gsc_usr_mds = (tmp_int != 0);
+
+ /* oss user flag */
+ rv = get_int(&mesg, &tmp_int);
+ if (rv) {
+ CERROR("fail to get oss user flag\n");
+ goto out;
+ }
+ rsci.ctx.gsc_usr_oss = (tmp_int != 0);
+
+ /* mapped uid */
+ rv = get_int(&mesg, (int *) &rsci.ctx.gsc_mapped_uid);
+ if (rv) {
+ CERROR("fail to get mapped uid\n");
+ goto out;
+ }
+
+ rscp = rsc_lookup(&rsci);
+ if (!rscp)
+ goto out;
+
+ /* uid, or NEGATIVE */
+ rv = get_int(&mesg, (int *) &rsci.ctx.gsc_uid);
+ if (rv == -EINVAL)
+ goto out;
+ if (rv == -ENOENT) {
+ CERROR("NOENT? set rsc entry negative\n");
+ set_bit(CACHE_NEGATIVE, &rsci.h.flags);
+ } else {
+ rawobj_t tmp_buf;
+ unsigned long ctx_expiry;
+
+ /* gid */
+ if (get_int(&mesg, (int *) &rsci.ctx.gsc_gid))
+ goto out;
+
+ /* mech name */
+ len = qword_get(&mesg, buf, mlen);
+ if (len < 0)
+ goto out;
+ gm = lgss_name_to_mech(buf);
+ status = -EOPNOTSUPP;
+ if (!gm)
+ goto out;
+
+ status = -EINVAL;
+ /* mech-specific data: */
+ len = qword_get(&mesg, buf, mlen);
+ if (len < 0)
+ goto out;
+
+ tmp_buf.len = len;
+ tmp_buf.data = (unsigned char *)buf;
+ if (lgss_import_sec_context(&tmp_buf, gm,
+ &rsci.ctx.gsc_mechctx))
+ goto out;
+
+ /* currently the expiry time passed down from user-space
+ * is invalid, here we retrive it from mech. */
+ if (lgss_inquire_context(rsci.ctx.gsc_mechctx, &ctx_expiry)) {
+ CERROR("unable to get expire time, drop it\n");
+ goto out;
+ }
+ expiry = (time_t) ctx_expiry;
+ }
+
+ rsci.h.expiry_time = expiry;
+ rscp = rsc_update(&rsci, rscp);
+ status = 0;
+out:
+ if (gm)
+ lgss_mech_put(gm);
+ rsc_free(&rsci);
+ if (rscp)
+ cache_put(&rscp->h, &rsc_cache);
+ else
+ status = -ENOMEM;
+
+ if (status)
+ CERROR("parse rsc error %d\n", status);
+ return status;
+}
+
+static struct cache_detail rsc_cache = {
+ .hash_size = RSC_HASHMAX,
+ .hash_table = rsc_table,
+ .name = "auth.sptlrpc.context",
+ .cache_put = rsc_put,
+ .cache_parse = rsc_parse,
+ .match = rsc_match,
+ .init = rsc_init,
+ .update = update_rsc,
+ .alloc = rsc_alloc,
+};
+
+static struct rsc *rsc_lookup(struct rsc *item)
+{
+ struct cache_head *ch;
+ int hash = rsc_hash(item);
+
+ ch = sunrpc_cache_lookup(&rsc_cache, &item->h, hash);
+ if (ch)
+ return container_of(ch, struct rsc, h);
+ else
+ return NULL;
+}
+
+static struct rsc *rsc_update(struct rsc *new, struct rsc *old)
+{
+ struct cache_head *ch;
+ int hash = rsc_hash(new);
+
+ ch = sunrpc_cache_update(&rsc_cache, &new->h, &old->h, hash);
+ if (ch)
+ return container_of(ch, struct rsc, h);
+ else
+ return NULL;
+}
+
+#define COMPAT_RSC_PUT(item, cd) cache_put((item), (cd))
+
+/****************************************
+ * rsc cache flush *
+ ****************************************/
+
+typedef int rsc_entry_match(struct rsc *rscp, long data);
+
+static void rsc_flush(rsc_entry_match *match, long data)
+{
+ struct cache_head **ch;
+ struct rsc *rscp;
+ int n;
+ ENTRY;
+
+ write_lock(&rsc_cache.hash_lock);
+ for (n = 0; n < RSC_HASHMAX; n++) {
+ for (ch = &rsc_cache.hash_table[n]; *ch;) {
+ rscp = container_of(*ch, struct rsc, h);
+
+ if (!match(rscp, data)) {
+ ch = &((*ch)->next);
+ continue;
+ }
+
+ /* it seems simply set NEGATIVE doesn't work */
+ *ch = (*ch)->next;
+ rscp->h.next = NULL;
+ cache_get(&rscp->h);
+ set_bit(CACHE_NEGATIVE, &rscp->h.flags);
+ COMPAT_RSC_PUT(&rscp->h, &rsc_cache);
+ rsc_cache.entries--;
+ }
+ }
+ write_unlock(&rsc_cache.hash_lock);
+ EXIT;
+}
+
+static int match_uid(struct rsc *rscp, long uid)
+{
+ if ((int) uid == -1)
+ return 1;
+ return ((int) rscp->ctx.gsc_uid == (int) uid);
+}
+
+static int match_target(struct rsc *rscp, long target)
+{
+ return (rscp->target == (struct obd_device *) target);
+}
+
+static inline void rsc_flush_uid(int uid)
+{
+ if (uid == -1)
+ CWARN("flush all gss contexts...\n");
+
+ rsc_flush(match_uid, (long) uid);
+}
+
+static inline void rsc_flush_target(struct obd_device *target)
+{
+ rsc_flush(match_target, (long) target);
+}
+
+void gss_secsvc_flush(struct obd_device *target)
+{
+ rsc_flush_target(target);
+}
+EXPORT_SYMBOL(gss_secsvc_flush);
+
+static struct rsc *gss_svc_searchbyctx(rawobj_t *handle)
+{
+ struct rsc rsci;
+ struct rsc *found;
+
+ memset(&rsci, 0, sizeof(rsci));
+ if (rawobj_dup(&rsci.handle, handle))
+ return NULL;
+
+ found = rsc_lookup(&rsci);
+ rsc_free(&rsci);
+ if (!found)
+ return NULL;
+ if (cache_check(&rsc_cache, &found->h, NULL))
+ return NULL;
+ return found;
+}
+
+int gss_svc_upcall_install_rvs_ctx(struct obd_import *imp,
+ struct gss_sec *gsec,
+ struct gss_cli_ctx *gctx)
+{
+ struct rsc rsci, *rscp = NULL;
+ unsigned long ctx_expiry;
+ __u32 major;
+ int rc;
+ ENTRY;
+
+ memset(&rsci, 0, sizeof(rsci));
+
+ if (rawobj_alloc(&rsci.handle, (char *) &gsec->gs_rvs_hdl,
+ sizeof(gsec->gs_rvs_hdl)))
+ GOTO(out, rc = -ENOMEM);
+
+ rscp = rsc_lookup(&rsci);
+ if (rscp == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ major = lgss_copy_reverse_context(gctx->gc_mechctx,
+ &rsci.ctx.gsc_mechctx);
+ if (major != GSS_S_COMPLETE)
+ GOTO(out, rc = -ENOMEM);
+
+ if (lgss_inquire_context(rsci.ctx.gsc_mechctx, &ctx_expiry)) {
+ CERROR("unable to get expire time, drop it\n");
+ GOTO(out, rc = -EINVAL);
+ }
+ rsci.h.expiry_time = (time_t) ctx_expiry;
+
+ if (strcmp(imp->imp_obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0)
+ rsci.ctx.gsc_usr_mds = 1;
+ else if (strcmp(imp->imp_obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0)
+ rsci.ctx.gsc_usr_oss = 1;
+ else
+ rsci.ctx.gsc_usr_root = 1;
+
+ rscp = rsc_update(&rsci, rscp);
+ if (rscp == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ rscp->target = imp->imp_obd;
+ rawobj_dup(&gctx->gc_svc_handle, &rscp->handle);
+
+ CWARN("create reverse svc ctx %p to %s: idx "LPX64"\n",
+ &rscp->ctx, obd2cli_tgt(imp->imp_obd), gsec->gs_rvs_hdl);
+ rc = 0;
+out:
+ if (rscp)
+ cache_put(&rscp->h, &rsc_cache);
+ rsc_free(&rsci);
+
+ if (rc)
+ CERROR("create reverse svc ctx: idx "LPX64", rc %d\n",
+ gsec->gs_rvs_hdl, rc);
+ RETURN(rc);
+}
+
+int gss_svc_upcall_expire_rvs_ctx(rawobj_t *handle)
+{
+ const cfs_time_t expire = 20;
+ struct rsc *rscp;
+
+ rscp = gss_svc_searchbyctx(handle);
+ if (rscp) {
+ CDEBUG(D_SEC, "reverse svcctx %p (rsc %p) expire soon\n",
+ &rscp->ctx, rscp);
+
+ rscp->h.expiry_time = cfs_time_current_sec() + expire;
+ COMPAT_RSC_PUT(&rscp->h, &rsc_cache);
+ }
+ return 0;
+}
+
+int gss_svc_upcall_dup_handle(rawobj_t *handle, struct gss_svc_ctx *ctx)
+{
+ struct rsc *rscp = container_of(ctx, struct rsc, ctx);
+
+ return rawobj_dup(handle, &rscp->handle);
+}
+
+int gss_svc_upcall_update_sequence(rawobj_t *handle, __u32 seq)
+{
+ struct rsc *rscp;
+
+ rscp = gss_svc_searchbyctx(handle);
+ if (rscp) {
+ CDEBUG(D_SEC, "reverse svcctx %p (rsc %p) update seq to %u\n",
+ &rscp->ctx, rscp, seq + 1);
+
+ rscp->ctx.gsc_rvs_seq = seq + 1;
+ COMPAT_RSC_PUT(&rscp->h, &rsc_cache);
+ }
+ return 0;
+}
+
+static struct cache_deferred_req* cache_upcall_defer(struct cache_req *req)
+{
+ return NULL;
+}
+static struct cache_req cache_upcall_chandle = { cache_upcall_defer };
+
+int gss_svc_upcall_handle_init(struct ptlrpc_request *req,
+ struct gss_svc_reqctx *grctx,
+ struct gss_wire_ctx *gw,
+ struct obd_device *target,
+ __u32 lustre_svc,
+ rawobj_t *rvs_hdl,
+ rawobj_t *in_token)
+{
+ struct ptlrpc_reply_state *rs;
+ struct rsc *rsci = NULL;
+ struct rsi *rsip = NULL, rsikey;
+ wait_queue_t wait;
+ int replen = sizeof(struct ptlrpc_body);
+ struct gss_rep_header *rephdr;
+ int first_check = 1;
+ int rc = SECSVC_DROP;
+ ENTRY;
+
+ memset(&rsikey, 0, sizeof(rsikey));
+ rsikey.lustre_svc = lustre_svc;
+ rsikey.nid = (__u64) req->rq_peer.nid;
+
+ /* duplicate context handle. for INIT it always 0 */
+ if (rawobj_dup(&rsikey.in_handle, &gw->gw_handle)) {
+ CERROR("fail to dup context handle\n");
+ GOTO(out, rc);
+ }
+
+ if (rawobj_dup(&rsikey.in_token, in_token)) {
+ CERROR("can't duplicate token\n");
+ rawobj_free(&rsikey.in_handle);
+ GOTO(out, rc);
+ }
+
+ rsip = rsi_lookup(&rsikey);
+ rsi_free(&rsikey);
+ if (!rsip) {
+ CERROR("error in rsi_lookup.\n");
+
+ if (!gss_pack_err_notify(req, GSS_S_FAILURE, 0))
+ rc = SECSVC_COMPLETE;
+
+ GOTO(out, rc);
+ }
+
+ cache_get(&rsip->h); /* take an extra ref */
+ init_waitqueue_head(&rsip->waitq);
+ init_waitqueue_entry_current(&wait);
+ add_wait_queue(&rsip->waitq, &wait);
+
+cache_check:
+ /* Note each time cache_check() will drop a reference if return
+ * non-zero. We hold an extra reference on initial rsip, but must
+ * take care of following calls. */
+ rc = cache_check(&rsi_cache, &rsip->h, &cache_upcall_chandle);
+ switch (rc) {
+ case -EAGAIN: {
+ int valid;
+
+ if (first_check) {
+ first_check = 0;
+
+ read_lock(&rsi_cache.hash_lock);
+ valid = test_bit(CACHE_VALID, &rsip->h.flags);
+ if (valid == 0)
+ set_current_state(TASK_INTERRUPTIBLE);
+ read_unlock(&rsi_cache.hash_lock);
+
+ if (valid == 0)
+ schedule_timeout(GSS_SVC_UPCALL_TIMEOUT *
+ HZ);
+
+ cache_get(&rsip->h);
+ goto cache_check;
+ }
+ CWARN("waited %ds timeout, drop\n", GSS_SVC_UPCALL_TIMEOUT);
+ break;
+ }
+ case -ENOENT:
+ CWARN("cache_check return ENOENT, drop\n");
+ break;
+ case 0:
+ /* if not the first check, we have to release the extra
+ * reference we just added on it. */
+ if (!first_check)
+ cache_put(&rsip->h, &rsi_cache);
+ CDEBUG(D_SEC, "cache_check is good\n");
+ break;
+ }
+
+ remove_wait_queue(&rsip->waitq, &wait);
+ cache_put(&rsip->h, &rsi_cache);
+
+ if (rc)
+ GOTO(out, rc = SECSVC_DROP);
+
+ rc = SECSVC_DROP;
+ rsci = gss_svc_searchbyctx(&rsip->out_handle);
+ if (!rsci) {
+ CERROR("authentication failed\n");
+
+ if (!gss_pack_err_notify(req, GSS_S_FAILURE, 0))
+ rc = SECSVC_COMPLETE;
+
+ GOTO(out, rc);
+ } else {
+ cache_get(&rsci->h);
+ grctx->src_ctx = &rsci->ctx;
+ }
+
+ if (rawobj_dup(&rsci->ctx.gsc_rvs_hdl, rvs_hdl)) {
+ CERROR("failed duplicate reverse handle\n");
+ GOTO(out, rc);
+ }
+
+ rsci->target = target;
+
+ CDEBUG(D_SEC, "server create rsc %p(%u->%s)\n",
+ rsci, rsci->ctx.gsc_uid, libcfs_nid2str(req->rq_peer.nid));
+
+ if (rsip->out_handle.len > PTLRPC_GSS_MAX_HANDLE_SIZE) {
+ CERROR("handle size %u too large\n", rsip->out_handle.len);
+ GOTO(out, rc = SECSVC_DROP);
+ }
+
+ grctx->src_init = 1;
+ grctx->src_reserve_len = cfs_size_round4(rsip->out_token.len);
+
+ rc = lustre_pack_reply_v2(req, 1, &replen, NULL, 0);
+ if (rc) {
+ CERROR("failed to pack reply: %d\n", rc);
+ GOTO(out, rc = SECSVC_DROP);
+ }
+
+ rs = req->rq_reply_state;
+ LASSERT(rs->rs_repbuf->lm_bufcount == 3);
+ LASSERT(rs->rs_repbuf->lm_buflens[0] >=
+ sizeof(*rephdr) + rsip->out_handle.len);
+ LASSERT(rs->rs_repbuf->lm_buflens[2] >= rsip->out_token.len);
+
+ rephdr = lustre_msg_buf(rs->rs_repbuf, 0, 0);
+ rephdr->gh_version = PTLRPC_GSS_VERSION;
+ rephdr->gh_flags = 0;
+ rephdr->gh_proc = PTLRPC_GSS_PROC_ERR;
+ rephdr->gh_major = rsip->major_status;
+ rephdr->gh_minor = rsip->minor_status;
+ rephdr->gh_seqwin = GSS_SEQ_WIN;
+ rephdr->gh_handle.len = rsip->out_handle.len;
+ memcpy(rephdr->gh_handle.data, rsip->out_handle.data,
+ rsip->out_handle.len);
+
+ memcpy(lustre_msg_buf(rs->rs_repbuf, 2, 0), rsip->out_token.data,
+ rsip->out_token.len);
+
+ rs->rs_repdata_len = lustre_shrink_msg(rs->rs_repbuf, 2,
+ rsip->out_token.len, 0);
+
+ rc = SECSVC_OK;
+
+out:
+ /* it looks like here we should put rsip also, but this mess up
+ * with NFS cache mgmt code... FIXME */
+#if 0
+ if (rsip)
+ rsi_put(&rsip->h, &rsi_cache);
+#endif
+
+ if (rsci) {
+ /* if anything went wrong, we don't keep the context too */
+ if (rc != SECSVC_OK)
+ set_bit(CACHE_NEGATIVE, &rsci->h.flags);
+ else
+ CDEBUG(D_SEC, "create rsc with idx "LPX64"\n",
+ gss_handle_to_u64(&rsci->handle));
+
+ COMPAT_RSC_PUT(&rsci->h, &rsc_cache);
+ }
+ RETURN(rc);
+}
+
+struct gss_svc_ctx *gss_svc_upcall_get_ctx(struct ptlrpc_request *req,
+ struct gss_wire_ctx *gw)
+{
+ struct rsc *rsc;
+
+ rsc = gss_svc_searchbyctx(&gw->gw_handle);
+ if (!rsc) {
+ CWARN("Invalid gss ctx idx "LPX64" from %s\n",
+ gss_handle_to_u64(&gw->gw_handle),
+ libcfs_nid2str(req->rq_peer.nid));
+ return NULL;
+ }
+
+ return &rsc->ctx;
+}
+
+void gss_svc_upcall_put_ctx(struct gss_svc_ctx *ctx)
+{
+ struct rsc *rsc = container_of(ctx, struct rsc, ctx);
+
+ COMPAT_RSC_PUT(&rsc->h, &rsc_cache);
+}
+
+void gss_svc_upcall_destroy_ctx(struct gss_svc_ctx *ctx)
+{
+ struct rsc *rsc = container_of(ctx, struct rsc, ctx);
+
+ /* can't be found */
+ set_bit(CACHE_NEGATIVE, &rsc->h.flags);
+ /* to be removed at next scan */
+ rsc->h.expiry_time = 1;
+}
+
+int __init gss_init_svc_upcall(void)
+{
+ int i;
+
+ spin_lock_init(&__ctx_index_lock);
+ /*
+ * this helps reducing context index confliction. after server reboot,
+ * conflicting request from clients might be filtered out by initial
+ * sequence number checking, thus no chance to sent error notification
+ * back to clients.
+ */
+ cfs_get_random_bytes(&__ctx_index, sizeof(__ctx_index));
+
+
+ cache_register(&rsi_cache);
+ cache_register(&rsc_cache);
+
+ /* FIXME this looks stupid. we intend to give lsvcgssd a chance to open
+ * the init upcall channel, otherwise there's big chance that the first
+ * upcall issued before the channel be opened thus nfsv4 cache code will
+ * drop the request direclty, thus lead to unnecessary recovery time.
+ * here we wait at miximum 1.5 seconds. */
+ for (i = 0; i < 6; i++) {
+ if (atomic_read(&rsi_cache.readers) > 0)
+ break;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ LASSERT(HZ >= 4);
+ schedule_timeout(HZ / 4);
+ }
+
+ if (atomic_read(&rsi_cache.readers) == 0)
+ CWARN("Init channel is not opened by lsvcgssd, following "
+ "request might be dropped until lsvcgssd is active\n");
+
+ return 0;
+}
+
+void __exit gss_exit_svc_upcall(void)
+{
+ cache_purge(&rsi_cache);
+ cache_unregister(&rsi_cache);
+
+ cache_purge(&rsc_cache);
+ cache_unregister(&rsc_cache);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/lproc_gss.c b/drivers/staging/lustre/lustre/ptlrpc/gss/lproc_gss.c
new file mode 100644
index 000000000000..340400089a5a
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/lproc_gss.c
@@ -0,0 +1,219 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lprocfs_status.h>
+#include <lustre_sec.h>
+
+#include "gss_err.h"
+#include "gss_internal.h"
+#include "gss_api.h"
+
+static struct proc_dir_entry *gss_proc_root = NULL;
+static struct proc_dir_entry *gss_proc_lk = NULL;
+
+/*
+ * statistic of "out-of-sequence-window"
+ */
+static struct {
+ spinlock_t oos_lock;
+ atomic_t oos_cli_count; /* client occurrence */
+ int oos_cli_behind; /* client max seqs behind */
+ atomic_t oos_svc_replay[3]; /* server replay detected */
+ atomic_t oos_svc_pass[3]; /* server verified ok */
+} gss_stat_oos = {
+ .oos_cli_count = ATOMIC_INIT(0),
+ .oos_cli_behind = 0,
+ .oos_svc_replay = { ATOMIC_INIT(0), },
+ .oos_svc_pass = { ATOMIC_INIT(0), },
+};
+
+void gss_stat_oos_record_cli(int behind)
+{
+ atomic_inc(&gss_stat_oos.oos_cli_count);
+
+ spin_lock(&gss_stat_oos.oos_lock);
+ if (behind > gss_stat_oos.oos_cli_behind)
+ gss_stat_oos.oos_cli_behind = behind;
+ spin_unlock(&gss_stat_oos.oos_lock);
+}
+
+void gss_stat_oos_record_svc(int phase, int replay)
+{
+ LASSERT(phase >= 0 && phase <= 2);
+
+ if (replay)
+ atomic_inc(&gss_stat_oos.oos_svc_replay[phase]);
+ else
+ atomic_inc(&gss_stat_oos.oos_svc_pass[phase]);
+}
+
+static int gss_proc_oos_seq_show(struct seq_file *m, void *v)
+{
+ return seq_printf(m,
+ "seqwin: %u\n"
+ "backwin: %u\n"
+ "client fall behind seqwin\n"
+ " occurrence: %d\n"
+ " max seq behind: %d\n"
+ "server replay detected:\n"
+ " phase 0: %d\n"
+ " phase 1: %d\n"
+ " phase 2: %d\n"
+ "server verify ok:\n"
+ " phase 2: %d\n",
+ GSS_SEQ_WIN_MAIN,
+ GSS_SEQ_WIN_BACK,
+ atomic_read(&gss_stat_oos.oos_cli_count),
+ gss_stat_oos.oos_cli_behind,
+ atomic_read(&gss_stat_oos.oos_svc_replay[0]),
+ atomic_read(&gss_stat_oos.oos_svc_replay[1]),
+ atomic_read(&gss_stat_oos.oos_svc_replay[2]),
+ atomic_read(&gss_stat_oos.oos_svc_pass[2]));
+}
+LPROC_SEQ_FOPS_RO(gss_proc_oos);
+
+static int gss_proc_write_secinit(struct file *file, const char *buffer,
+ size_t count, off_t *off)
+{
+ int rc;
+
+ rc = gss_do_ctx_init_rpc((char *) buffer, count);
+ if (rc) {
+ LASSERT(rc < 0);
+ return rc;
+ }
+
+ return count;
+}
+
+static const struct file_operations gss_proc_secinit = {
+ .write = gss_proc_write_secinit,
+};
+
+static struct lprocfs_vars gss_lprocfs_vars[] = {
+ { "replays", &gss_proc_oos_fops },
+ { "init_channel", &gss_proc_secinit, NULL, 0222 },
+ { NULL }
+};
+
+/*
+ * for userspace helper lgss_keyring.
+ *
+ * debug_level: [0, 4], defined in utils/gss/lgss_utils.h
+ */
+static int gss_lk_debug_level = 1;
+
+static int gss_lk_proc_dl_seq_show(struct seq_file *m, void *v)
+{
+ return seq_printf(m, "%u\n", gss_lk_debug_level);
+}
+
+static int gss_lk_proc_dl_seq_write(struct file *file, const char *buffer,
+ size_t count, off_t *off)
+{
+ int val, rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc < 0)
+ return rc;
+
+ if (val < 0 || val > 4)
+ return -ERANGE;
+
+ gss_lk_debug_level = val;
+ return count;
+}
+LPROC_SEQ_FOPS(gss_lk_proc_dl);
+
+static struct lprocfs_vars gss_lk_lprocfs_vars[] = {
+ { "debug_level", &gss_lk_proc_dl_fops },
+ { NULL }
+};
+
+void gss_exit_lproc(void)
+{
+ if (gss_proc_lk) {
+ lprocfs_remove(&gss_proc_lk);
+ gss_proc_lk = NULL;
+ }
+
+ if (gss_proc_root) {
+ lprocfs_remove(&gss_proc_root);
+ gss_proc_root = NULL;
+ }
+}
+
+int gss_init_lproc(void)
+{
+ int rc;
+
+ spin_lock_init(&gss_stat_oos.oos_lock);
+
+ gss_proc_root = lprocfs_register("gss", sptlrpc_proc_root,
+ gss_lprocfs_vars, NULL);
+ if (IS_ERR(gss_proc_root)) {
+ gss_proc_root = NULL;
+ GOTO(err_out, rc = PTR_ERR(gss_proc_root));
+ }
+
+ gss_proc_lk = lprocfs_register("lgss_keyring", gss_proc_root,
+ gss_lk_lprocfs_vars, NULL);
+ if (IS_ERR(gss_proc_lk)) {
+ gss_proc_lk = NULL;
+ GOTO(err_out, rc = PTR_ERR(gss_proc_root));
+ }
+
+ return 0;
+
+err_out:
+ CERROR("failed to initialize gss lproc entries: %d\n", rc);
+ gss_exit_lproc();
+ return rc;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/sec_gss.c b/drivers/staging/lustre/lustre/ptlrpc/gss/sec_gss.c
new file mode 100644
index 000000000000..ebca858ca183
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/gss/sec_gss.c
@@ -0,0 +1,2916 @@
+/*
+ * Modifications for Lustre
+ *
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+/*
+ * linux/net/sunrpc/auth_gss.c
+ *
+ * RPCSEC_GSS client authentication.
+ *
+ * Copyright (c) 2000 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Dug Song <dugsong@monkey.org>
+ * Andy Adamson <andros@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+#include <asm/atomic.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <obd_cksum.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_sec.h>
+
+#include "gss_err.h"
+#include "gss_internal.h"
+#include "gss_api.h"
+
+#include <linux/crypto.h>
+#include <linux/crc32.h>
+
+/*
+ * early reply have fixed size, respectively in privacy and integrity mode.
+ * so we calculate them only once.
+ */
+static int gss_at_reply_off_integ;
+static int gss_at_reply_off_priv;
+
+
+static inline int msg_last_segidx(struct lustre_msg *msg)
+{
+ LASSERT(msg->lm_bufcount > 0);
+ return msg->lm_bufcount - 1;
+}
+static inline int msg_last_seglen(struct lustre_msg *msg)
+{
+ return msg->lm_buflens[msg_last_segidx(msg)];
+}
+
+/********************************************
+ * wire data swabber *
+ ********************************************/
+
+static
+void gss_header_swabber(struct gss_header *ghdr)
+{
+ __swab32s(&ghdr->gh_flags);
+ __swab32s(&ghdr->gh_proc);
+ __swab32s(&ghdr->gh_seq);
+ __swab32s(&ghdr->gh_svc);
+ __swab32s(&ghdr->gh_pad1);
+ __swab32s(&ghdr->gh_handle.len);
+}
+
+struct gss_header *gss_swab_header(struct lustre_msg *msg, int segment,
+ int swabbed)
+{
+ struct gss_header *ghdr;
+
+ ghdr = lustre_msg_buf(msg, segment, sizeof(*ghdr));
+ if (ghdr == NULL)
+ return NULL;
+
+ if (swabbed)
+ gss_header_swabber(ghdr);
+
+ if (sizeof(*ghdr) + ghdr->gh_handle.len > msg->lm_buflens[segment]) {
+ CERROR("gss header has length %d, now %u received\n",
+ (int) sizeof(*ghdr) + ghdr->gh_handle.len,
+ msg->lm_buflens[segment]);
+ return NULL;
+ }
+
+ return ghdr;
+}
+
+#if 0
+static
+void gss_netobj_swabber(netobj_t *obj)
+{
+ __swab32s(&obj->len);
+}
+
+netobj_t *gss_swab_netobj(struct lustre_msg *msg, int segment)
+{
+ netobj_t *obj;
+
+ obj = lustre_swab_buf(msg, segment, sizeof(*obj), gss_netobj_swabber);
+ if (obj && sizeof(*obj) + obj->len > msg->lm_buflens[segment]) {
+ CERROR("netobj require length %u but only %u received\n",
+ (unsigned int) sizeof(*obj) + obj->len,
+ msg->lm_buflens[segment]);
+ return NULL;
+ }
+
+ return obj;
+}
+#endif
+
+/*
+ * payload should be obtained from mechanism. but currently since we
+ * only support kerberos, we could simply use fixed value.
+ * krb5 "meta" data:
+ * - krb5 header: 16
+ * - krb5 checksum: 20
+ *
+ * for privacy mode, payload also include the cipher text which has the same
+ * size as plain text, plus possible confounder, padding both at maximum cipher
+ * block size.
+ */
+#define GSS_KRB5_INTEG_MAX_PAYLOAD (40)
+
+static inline
+int gss_mech_payload(struct gss_ctx *mechctx, int msgsize, int privacy)
+{
+ if (privacy)
+ return GSS_KRB5_INTEG_MAX_PAYLOAD + 16 + 16 + 16 + msgsize;
+ else
+ return GSS_KRB5_INTEG_MAX_PAYLOAD;
+}
+
+/*
+ * return signature size, otherwise < 0 to indicate error
+ */
+static int gss_sign_msg(struct lustre_msg *msg,
+ struct gss_ctx *mechctx,
+ enum lustre_sec_part sp,
+ __u32 flags, __u32 proc, __u32 seq, __u32 svc,
+ rawobj_t *handle)
+{
+ struct gss_header *ghdr;
+ rawobj_t text[4], mic;
+ int textcnt, max_textcnt, mic_idx;
+ __u32 major;
+
+ LASSERT(msg->lm_bufcount >= 2);
+
+ /* gss hdr */
+ LASSERT(msg->lm_buflens[0] >=
+ sizeof(*ghdr) + (handle ? handle->len : 0));
+ ghdr = lustre_msg_buf(msg, 0, 0);
+
+ ghdr->gh_version = PTLRPC_GSS_VERSION;
+ ghdr->gh_sp = (__u8) sp;
+ ghdr->gh_flags = flags;
+ ghdr->gh_proc = proc;
+ ghdr->gh_seq = seq;
+ ghdr->gh_svc = svc;
+ if (!handle) {
+ /* fill in a fake one */
+ ghdr->gh_handle.len = 0;
+ } else {
+ ghdr->gh_handle.len = handle->len;
+ memcpy(ghdr->gh_handle.data, handle->data, handle->len);
+ }
+
+ /* no actual signature for null mode */
+ if (svc == SPTLRPC_SVC_NULL)
+ return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+
+ /* MIC */
+ mic_idx = msg_last_segidx(msg);
+ max_textcnt = (svc == SPTLRPC_SVC_AUTH) ? 1 : mic_idx;
+
+ for (textcnt = 0; textcnt < max_textcnt; textcnt++) {
+ text[textcnt].len = msg->lm_buflens[textcnt];
+ text[textcnt].data = lustre_msg_buf(msg, textcnt, 0);
+ }
+
+ mic.len = msg->lm_buflens[mic_idx];
+ mic.data = lustre_msg_buf(msg, mic_idx, 0);
+
+ major = lgss_get_mic(mechctx, textcnt, text, 0, NULL, &mic);
+ if (major != GSS_S_COMPLETE) {
+ CERROR("fail to generate MIC: %08x\n", major);
+ return -EPERM;
+ }
+ LASSERT(mic.len <= msg->lm_buflens[mic_idx]);
+
+ return lustre_shrink_msg(msg, mic_idx, mic.len, 0);
+}
+
+/*
+ * return gss error
+ */
+static
+__u32 gss_verify_msg(struct lustre_msg *msg,
+ struct gss_ctx *mechctx,
+ __u32 svc)
+{
+ rawobj_t text[4], mic;
+ int textcnt, max_textcnt;
+ int mic_idx;
+ __u32 major;
+
+ LASSERT(msg->lm_bufcount >= 2);
+
+ if (svc == SPTLRPC_SVC_NULL)
+ return GSS_S_COMPLETE;
+
+ mic_idx = msg_last_segidx(msg);
+ max_textcnt = (svc == SPTLRPC_SVC_AUTH) ? 1 : mic_idx;
+
+ for (textcnt = 0; textcnt < max_textcnt; textcnt++) {
+ text[textcnt].len = msg->lm_buflens[textcnt];
+ text[textcnt].data = lustre_msg_buf(msg, textcnt, 0);
+ }
+
+ mic.len = msg->lm_buflens[mic_idx];
+ mic.data = lustre_msg_buf(msg, mic_idx, 0);
+
+ major = lgss_verify_mic(mechctx, textcnt, text, 0, NULL, &mic);
+ if (major != GSS_S_COMPLETE)
+ CERROR("mic verify error: %08x\n", major);
+
+ return major;
+}
+
+/*
+ * return gss error code
+ */
+static
+__u32 gss_unseal_msg(struct gss_ctx *mechctx,
+ struct lustre_msg *msgbuf,
+ int *msg_len, int msgbuf_len)
+{
+ rawobj_t clear_obj, hdrobj, token;
+ __u8 *clear_buf;
+ int clear_buflen;
+ __u32 major;
+ ENTRY;
+
+ if (msgbuf->lm_bufcount != 2) {
+ CERROR("invalid bufcount %d\n", msgbuf->lm_bufcount);
+ RETURN(GSS_S_FAILURE);
+ }
+
+ /* allocate a temporary clear text buffer, same sized as token,
+ * we assume the final clear text size <= token size */
+ clear_buflen = lustre_msg_buflen(msgbuf, 1);
+ OBD_ALLOC_LARGE(clear_buf, clear_buflen);
+ if (!clear_buf)
+ RETURN(GSS_S_FAILURE);
+
+ /* buffer objects */
+ hdrobj.len = lustre_msg_buflen(msgbuf, 0);
+ hdrobj.data = lustre_msg_buf(msgbuf, 0, 0);
+ token.len = lustre_msg_buflen(msgbuf, 1);
+ token.data = lustre_msg_buf(msgbuf, 1, 0);
+ clear_obj.len = clear_buflen;
+ clear_obj.data = clear_buf;
+
+ major = lgss_unwrap(mechctx, &hdrobj, &token, &clear_obj);
+ if (major != GSS_S_COMPLETE) {
+ CERROR("unwrap message error: %08x\n", major);
+ GOTO(out_free, major = GSS_S_FAILURE);
+ }
+ LASSERT(clear_obj.len <= clear_buflen);
+ LASSERT(clear_obj.len <= msgbuf_len);
+
+ /* now the decrypted message */
+ memcpy(msgbuf, clear_obj.data, clear_obj.len);
+ *msg_len = clear_obj.len;
+
+ major = GSS_S_COMPLETE;
+out_free:
+ OBD_FREE_LARGE(clear_buf, clear_buflen);
+ RETURN(major);
+}
+
+/********************************************
+ * gss client context manipulation helpers *
+ ********************************************/
+
+int cli_ctx_expire(struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(atomic_read(&ctx->cc_refcount));
+
+ if (!test_and_set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)) {
+ if (!ctx->cc_early_expire)
+ clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
+
+ CWARN("ctx %p(%u->%s) get expired: %lu(%+lds)\n",
+ ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec),
+ ctx->cc_expire,
+ ctx->cc_expire == 0 ? 0 :
+ cfs_time_sub(ctx->cc_expire, cfs_time_current_sec()));
+
+ sptlrpc_cli_ctx_wakeup(ctx);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * return 1 if the context is dead.
+ */
+int cli_ctx_check_death(struct ptlrpc_cli_ctx *ctx)
+{
+ if (unlikely(cli_ctx_is_dead(ctx)))
+ return 1;
+
+ /* expire is 0 means never expire. a newly created gss context
+ * which during upcall may has 0 expiration */
+ if (ctx->cc_expire == 0)
+ return 0;
+
+ /* check real expiration */
+ if (cfs_time_after(ctx->cc_expire, cfs_time_current_sec()))
+ return 0;
+
+ cli_ctx_expire(ctx);
+ return 1;
+}
+
+void gss_cli_ctx_uptodate(struct gss_cli_ctx *gctx)
+{
+ struct ptlrpc_cli_ctx *ctx = &gctx->gc_base;
+ unsigned long ctx_expiry;
+
+ if (lgss_inquire_context(gctx->gc_mechctx, &ctx_expiry)) {
+ CERROR("ctx %p(%u): unable to inquire, expire it now\n",
+ gctx, ctx->cc_vcred.vc_uid);
+ ctx_expiry = 1; /* make it expired now */
+ }
+
+ ctx->cc_expire = gss_round_ctx_expiry(ctx_expiry,
+ ctx->cc_sec->ps_flvr.sf_flags);
+
+ /* At this point this ctx might have been marked as dead by
+ * someone else, in which case nobody will make further use
+ * of it. we don't care, and mark it UPTODATE will help
+ * destroying server side context when it be destroied. */
+ set_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
+
+ if (sec_is_reverse(ctx->cc_sec)) {
+ CWARN("server installed reverse ctx %p idx "LPX64", "
+ "expiry %lu(%+lds)\n", ctx,
+ gss_handle_to_u64(&gctx->gc_handle),
+ ctx->cc_expire, ctx->cc_expire - cfs_time_current_sec());
+ } else {
+ CWARN("client refreshed ctx %p idx "LPX64" (%u->%s), "
+ "expiry %lu(%+lds)\n", ctx,
+ gss_handle_to_u64(&gctx->gc_handle),
+ ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec),
+ ctx->cc_expire, ctx->cc_expire - cfs_time_current_sec());
+
+ /* install reverse svc ctx for root context */
+ if (ctx->cc_vcred.vc_uid == 0)
+ gss_sec_install_rctx(ctx->cc_sec->ps_import,
+ ctx->cc_sec, ctx);
+ }
+
+ sptlrpc_cli_ctx_wakeup(ctx);
+}
+
+static void gss_cli_ctx_finalize(struct gss_cli_ctx *gctx)
+{
+ LASSERT(gctx->gc_base.cc_sec);
+
+ if (gctx->gc_mechctx) {
+ lgss_delete_sec_context(&gctx->gc_mechctx);
+ gctx->gc_mechctx = NULL;
+ }
+
+ if (!rawobj_empty(&gctx->gc_svc_handle)) {
+ /* forward ctx: mark buddy reverse svcctx soon-expire. */
+ if (!sec_is_reverse(gctx->gc_base.cc_sec) &&
+ !rawobj_empty(&gctx->gc_svc_handle))
+ gss_svc_upcall_expire_rvs_ctx(&gctx->gc_svc_handle);
+
+ rawobj_free(&gctx->gc_svc_handle);
+ }
+
+ rawobj_free(&gctx->gc_handle);
+}
+
+/*
+ * Based on sequence number algorithm as specified in RFC 2203.
+ *
+ * modified for our own problem: arriving request has valid sequence number,
+ * but unwrapping request might cost a long time, after that its sequence
+ * are not valid anymore (fall behind the window). It rarely happen, mostly
+ * under extreme load.
+ *
+ * note we should not check sequence before verify the integrity of incoming
+ * request, because just one attacking request with high sequence number might
+ * cause all following request be dropped.
+ *
+ * so here we use a multi-phase approach: prepare 2 sequence windows,
+ * "main window" for normal sequence and "back window" for fall behind sequence.
+ * and 3-phase checking mechanism:
+ * 0 - before integrity verification, perform a initial sequence checking in
+ * main window, which only try and don't actually set any bits. if the
+ * sequence is high above the window or fit in the window and the bit
+ * is 0, then accept and proceed to integrity verification. otherwise
+ * reject this sequence.
+ * 1 - after integrity verification, check in main window again. if this
+ * sequence is high above the window or fit in the window and the bit
+ * is 0, then set the bit and accept; if it fit in the window but bit
+ * already set, then reject; if it fall behind the window, then proceed
+ * to phase 2.
+ * 2 - check in back window. if it is high above the window or fit in the
+ * window and the bit is 0, then set the bit and accept. otherwise reject.
+ *
+ * return value:
+ * 1: looks like a replay
+ * 0: is ok
+ * -1: is a replay
+ *
+ * note phase 0 is necessary, because otherwise replay attacking request of
+ * sequence which between the 2 windows can't be detected.
+ *
+ * this mechanism can't totally solve the problem, but could help much less
+ * number of valid requests be dropped.
+ */
+static
+int gss_do_check_seq(unsigned long *window, __u32 win_size, __u32 *max_seq,
+ __u32 seq_num, int phase)
+{
+ LASSERT(phase >= 0 && phase <= 2);
+
+ if (seq_num > *max_seq) {
+ /*
+ * 1. high above the window
+ */
+ if (phase == 0)
+ return 0;
+
+ if (seq_num >= *max_seq + win_size) {
+ memset(window, 0, win_size / 8);
+ *max_seq = seq_num;
+ } else {
+ while(*max_seq < seq_num) {
+ (*max_seq)++;
+ __clear_bit((*max_seq) % win_size, window);
+ }
+ }
+ __set_bit(seq_num % win_size, window);
+ } else if (seq_num + win_size <= *max_seq) {
+ /*
+ * 2. low behind the window
+ */
+ if (phase == 0 || phase == 2)
+ goto replay;
+
+ CWARN("seq %u is %u behind (size %d), check backup window\n",
+ seq_num, *max_seq - win_size - seq_num, win_size);
+ return 1;
+ } else {
+ /*
+ * 3. fit into the window
+ */
+ switch (phase) {
+ case 0:
+ if (test_bit(seq_num % win_size, window))
+ goto replay;
+ break;
+ case 1:
+ case 2:
+ if (__test_and_set_bit(seq_num % win_size, window))
+ goto replay;
+ break;
+ }
+ }
+
+ return 0;
+
+replay:
+ CERROR("seq %u (%s %s window) is a replay: max %u, winsize %d\n",
+ seq_num,
+ seq_num + win_size > *max_seq ? "in" : "behind",
+ phase == 2 ? "backup " : "main",
+ *max_seq, win_size);
+ return -1;
+}
+
+/*
+ * Based on sequence number algorithm as specified in RFC 2203.
+ *
+ * if @set == 0: initial check, don't set any bit in window
+ * if @sec == 1: final check, set bit in window
+ */
+int gss_check_seq_num(struct gss_svc_seq_data *ssd, __u32 seq_num, int set)
+{
+ int rc = 0;
+
+ spin_lock(&ssd->ssd_lock);
+
+ if (set == 0) {
+ /*
+ * phase 0 testing
+ */
+ rc = gss_do_check_seq(ssd->ssd_win_main, GSS_SEQ_WIN_MAIN,
+ &ssd->ssd_max_main, seq_num, 0);
+ if (unlikely(rc))
+ gss_stat_oos_record_svc(0, 1);
+ } else {
+ /*
+ * phase 1 checking main window
+ */
+ rc = gss_do_check_seq(ssd->ssd_win_main, GSS_SEQ_WIN_MAIN,
+ &ssd->ssd_max_main, seq_num, 1);
+ switch (rc) {
+ case -1:
+ gss_stat_oos_record_svc(1, 1);
+ /* fall through */
+ case 0:
+ goto exit;
+ }
+ /*
+ * phase 2 checking back window
+ */
+ rc = gss_do_check_seq(ssd->ssd_win_back, GSS_SEQ_WIN_BACK,
+ &ssd->ssd_max_back, seq_num, 2);
+ if (rc)
+ gss_stat_oos_record_svc(2, 1);
+ else
+ gss_stat_oos_record_svc(2, 0);
+ }
+exit:
+ spin_unlock(&ssd->ssd_lock);
+ return rc;
+}
+
+/***************************************
+ * cred APIs *
+ ***************************************/
+
+static inline int gss_cli_payload(struct ptlrpc_cli_ctx *ctx,
+ int msgsize, int privacy)
+{
+ return gss_mech_payload(NULL, msgsize, privacy);
+}
+
+static int gss_cli_bulk_payload(struct ptlrpc_cli_ctx *ctx,
+ struct sptlrpc_flavor *flvr,
+ int reply, int read)
+{
+ int payload = sizeof(struct ptlrpc_bulk_sec_desc);
+
+ LASSERT(SPTLRPC_FLVR_BULK_TYPE(flvr->sf_rpc) == SPTLRPC_BULK_DEFAULT);
+
+ if ((!reply && !read) || (reply && read)) {
+ switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
+ case SPTLRPC_BULK_SVC_NULL:
+ break;
+ case SPTLRPC_BULK_SVC_INTG:
+ payload += gss_cli_payload(ctx, 0, 0);
+ break;
+ case SPTLRPC_BULK_SVC_PRIV:
+ payload += gss_cli_payload(ctx, 0, 1);
+ break;
+ case SPTLRPC_BULK_SVC_AUTH:
+ default:
+ LBUG();
+ }
+ }
+
+ return payload;
+}
+
+int gss_cli_ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred)
+{
+ return (ctx->cc_vcred.vc_uid == vcred->vc_uid);
+}
+
+void gss_cli_ctx_flags2str(unsigned long flags, char *buf, int bufsize)
+{
+ buf[0] = '\0';
+
+ if (flags & PTLRPC_CTX_NEW)
+ strncat(buf, "new,", bufsize);
+ if (flags & PTLRPC_CTX_UPTODATE)
+ strncat(buf, "uptodate,", bufsize);
+ if (flags & PTLRPC_CTX_DEAD)
+ strncat(buf, "dead,", bufsize);
+ if (flags & PTLRPC_CTX_ERROR)
+ strncat(buf, "error,", bufsize);
+ if (flags & PTLRPC_CTX_CACHED)
+ strncat(buf, "cached,", bufsize);
+ if (flags & PTLRPC_CTX_ETERNAL)
+ strncat(buf, "eternal,", bufsize);
+ if (buf[0] == '\0')
+ strncat(buf, "-,", bufsize);
+
+ buf[strlen(buf) - 1] = '\0';
+}
+
+int gss_cli_ctx_sign(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req)
+{
+ struct gss_cli_ctx *gctx = ctx2gctx(ctx);
+ __u32 flags = 0, seq, svc;
+ int rc;
+ ENTRY;
+
+ LASSERT(req->rq_reqbuf);
+ LASSERT(req->rq_reqbuf->lm_bufcount >= 2);
+ LASSERT(req->rq_cli_ctx == ctx);
+
+ /* nothing to do for context negotiation RPCs */
+ if (req->rq_ctx_init)
+ RETURN(0);
+
+ svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+ if (req->rq_pack_bulk)
+ flags |= LUSTRE_GSS_PACK_BULK;
+ if (req->rq_pack_udesc)
+ flags |= LUSTRE_GSS_PACK_USER;
+
+redo:
+ seq = atomic_inc_return(&gctx->gc_seq);
+
+ rc = gss_sign_msg(req->rq_reqbuf, gctx->gc_mechctx,
+ ctx->cc_sec->ps_part,
+ flags, gctx->gc_proc, seq, svc,
+ &gctx->gc_handle);
+ if (rc < 0)
+ RETURN(rc);
+
+ /* gss_sign_msg() msg might take long time to finish, in which period
+ * more rpcs could be wrapped up and sent out. if we found too many
+ * of them we should repack this rpc, because sent it too late might
+ * lead to the sequence number fall behind the window on server and
+ * be dropped. also applies to gss_cli_ctx_seal().
+ *
+ * Note: null mode dosen't check sequence number. */
+ if (svc != SPTLRPC_SVC_NULL &&
+ atomic_read(&gctx->gc_seq) - seq > GSS_SEQ_REPACK_THRESHOLD) {
+ int behind = atomic_read(&gctx->gc_seq) - seq;
+
+ gss_stat_oos_record_cli(behind);
+ CWARN("req %p: %u behind, retry signing\n", req, behind);
+ goto redo;
+ }
+
+ req->rq_reqdata_len = rc;
+ RETURN(0);
+}
+
+static
+int gss_cli_ctx_handle_err_notify(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct gss_header *ghdr)
+{
+ struct gss_err_header *errhdr;
+ int rc;
+
+ LASSERT(ghdr->gh_proc == PTLRPC_GSS_PROC_ERR);
+
+ errhdr = (struct gss_err_header *) ghdr;
+
+ CWARN("req x"LPU64"/t"LPU64", ctx %p idx "LPX64"(%u->%s): "
+ "%sserver respond (%08x/%08x)\n",
+ req->rq_xid, req->rq_transno, ctx,
+ gss_handle_to_u64(&ctx2gctx(ctx)->gc_handle),
+ ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec),
+ sec_is_reverse(ctx->cc_sec) ? "reverse" : "",
+ errhdr->gh_major, errhdr->gh_minor);
+
+ /* context fini rpc, let it failed */
+ if (req->rq_ctx_fini) {
+ CWARN("context fini rpc failed\n");
+ return -EINVAL;
+ }
+
+ /* reverse sec, just return error, don't expire this ctx because it's
+ * crucial to callback rpcs. note if the callback rpc failed because
+ * of bit flip during network transfer, the client will be evicted
+ * directly. so more gracefully we probably want let it retry for
+ * number of times. */
+ if (sec_is_reverse(ctx->cc_sec))
+ return -EINVAL;
+
+ if (errhdr->gh_major != GSS_S_NO_CONTEXT &&
+ errhdr->gh_major != GSS_S_BAD_SIG)
+ return -EACCES;
+
+ /* server return NO_CONTEXT might be caused by context expire
+ * or server reboot/failover. we try to refresh a new ctx which
+ * be transparent to upper layer.
+ *
+ * In some cases, our gss handle is possible to be incidentally
+ * identical to another handle since the handle itself is not
+ * fully random. In krb5 case, the GSS_S_BAD_SIG will be
+ * returned, maybe other gss error for other mechanism.
+ *
+ * if we add new mechanism, make sure the correct error are
+ * returned in this case. */
+ CWARN("%s: server might lost the context, retrying\n",
+ errhdr->gh_major == GSS_S_NO_CONTEXT ? "NO_CONTEXT" : "BAD_SIG");
+
+ sptlrpc_cli_ctx_expire(ctx);
+
+ /* we need replace the ctx right here, otherwise during
+ * resent we'll hit the logic in sptlrpc_req_refresh_ctx()
+ * which keep the ctx with RESEND flag, thus we'll never
+ * get rid of this ctx. */
+ rc = sptlrpc_req_replace_dead_ctx(req);
+ if (rc == 0)
+ req->rq_resend = 1;
+
+ return rc;
+}
+
+int gss_cli_ctx_verify(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req)
+{
+ struct gss_cli_ctx *gctx;
+ struct gss_header *ghdr, *reqhdr;
+ struct lustre_msg *msg = req->rq_repdata;
+ __u32 major;
+ int pack_bulk, swabbed, rc = 0;
+ ENTRY;
+
+ LASSERT(req->rq_cli_ctx == ctx);
+ LASSERT(msg);
+
+ gctx = container_of(ctx, struct gss_cli_ctx, gc_base);
+
+ /* special case for context negotiation, rq_repmsg/rq_replen actually
+ * are not used currently. but early reply always be treated normally */
+ if (req->rq_ctx_init && !req->rq_early) {
+ req->rq_repmsg = lustre_msg_buf(msg, 1, 0);
+ req->rq_replen = msg->lm_buflens[1];
+ RETURN(0);
+ }
+
+ if (msg->lm_bufcount < 2 || msg->lm_bufcount > 4) {
+ CERROR("unexpected bufcount %u\n", msg->lm_bufcount);
+ RETURN(-EPROTO);
+ }
+
+ swabbed = ptlrpc_rep_need_swab(req);
+
+ ghdr = gss_swab_header(msg, 0, swabbed);
+ if (ghdr == NULL) {
+ CERROR("can't decode gss header\n");
+ RETURN(-EPROTO);
+ }
+
+ /* sanity checks */
+ reqhdr = lustre_msg_buf(msg, 0, sizeof(*reqhdr));
+ LASSERT(reqhdr);
+
+ if (ghdr->gh_version != reqhdr->gh_version) {
+ CERROR("gss version %u mismatch, expect %u\n",
+ ghdr->gh_version, reqhdr->gh_version);
+ RETURN(-EPROTO);
+ }
+
+ switch (ghdr->gh_proc) {
+ case PTLRPC_GSS_PROC_DATA:
+ pack_bulk = ghdr->gh_flags & LUSTRE_GSS_PACK_BULK;
+
+ if (!req->rq_early && !equi(req->rq_pack_bulk == 1, pack_bulk)){
+ CERROR("%s bulk flag in reply\n",
+ req->rq_pack_bulk ? "missing" : "unexpected");
+ RETURN(-EPROTO);
+ }
+
+ if (ghdr->gh_seq != reqhdr->gh_seq) {
+ CERROR("seqnum %u mismatch, expect %u\n",
+ ghdr->gh_seq, reqhdr->gh_seq);
+ RETURN(-EPROTO);
+ }
+
+ if (ghdr->gh_svc != reqhdr->gh_svc) {
+ CERROR("svc %u mismatch, expect %u\n",
+ ghdr->gh_svc, reqhdr->gh_svc);
+ RETURN(-EPROTO);
+ }
+
+ if (swabbed)
+ gss_header_swabber(ghdr);
+
+ major = gss_verify_msg(msg, gctx->gc_mechctx, reqhdr->gh_svc);
+ if (major != GSS_S_COMPLETE) {
+ CERROR("failed to verify reply: %x\n", major);
+ RETURN(-EPERM);
+ }
+
+ if (req->rq_early && reqhdr->gh_svc == SPTLRPC_SVC_NULL) {
+ __u32 cksum;
+
+ cksum = crc32_le(!(__u32) 0,
+ lustre_msg_buf(msg, 1, 0),
+ lustre_msg_buflen(msg, 1));
+ if (cksum != msg->lm_cksum) {
+ CWARN("early reply checksum mismatch: "
+ "%08x != %08x\n", cksum, msg->lm_cksum);
+ RETURN(-EPROTO);
+ }
+ }
+
+ if (pack_bulk) {
+ /* bulk checksum is right after the lustre msg */
+ if (msg->lm_bufcount < 3) {
+ CERROR("Invalid reply bufcount %u\n",
+ msg->lm_bufcount);
+ RETURN(-EPROTO);
+ }
+
+ rc = bulk_sec_desc_unpack(msg, 2, swabbed);
+ if (rc) {
+ CERROR("unpack bulk desc: %d\n", rc);
+ RETURN(rc);
+ }
+ }
+
+ req->rq_repmsg = lustre_msg_buf(msg, 1, 0);
+ req->rq_replen = msg->lm_buflens[1];
+ break;
+ case PTLRPC_GSS_PROC_ERR:
+ if (req->rq_early) {
+ CERROR("server return error with early reply\n");
+ rc = -EPROTO;
+ } else {
+ rc = gss_cli_ctx_handle_err_notify(ctx, req, ghdr);
+ }
+ break;
+ default:
+ CERROR("unknown gss proc %d\n", ghdr->gh_proc);
+ rc = -EPROTO;
+ }
+
+ RETURN(rc);
+}
+
+int gss_cli_ctx_seal(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req)
+{
+ struct gss_cli_ctx *gctx;
+ rawobj_t hdrobj, msgobj, token;
+ struct gss_header *ghdr;
+ __u32 buflens[2], major;
+ int wiresize, rc;
+ ENTRY;
+
+ LASSERT(req->rq_clrbuf);
+ LASSERT(req->rq_cli_ctx == ctx);
+ LASSERT(req->rq_reqlen);
+
+ gctx = container_of(ctx, struct gss_cli_ctx, gc_base);
+
+ /* final clear data length */
+ req->rq_clrdata_len = lustre_msg_size_v2(req->rq_clrbuf->lm_bufcount,
+ req->rq_clrbuf->lm_buflens);
+
+ /* calculate wire data length */
+ buflens[0] = PTLRPC_GSS_HEADER_SIZE;
+ buflens[1] = gss_cli_payload(&gctx->gc_base, req->rq_clrdata_len, 1);
+ wiresize = lustre_msg_size_v2(2, buflens);
+
+ /* allocate wire buffer */
+ if (req->rq_pool) {
+ /* pre-allocated */
+ LASSERT(req->rq_reqbuf);
+ LASSERT(req->rq_reqbuf != req->rq_clrbuf);
+ LASSERT(req->rq_reqbuf_len >= wiresize);
+ } else {
+ OBD_ALLOC_LARGE(req->rq_reqbuf, wiresize);
+ if (!req->rq_reqbuf)
+ RETURN(-ENOMEM);
+ req->rq_reqbuf_len = wiresize;
+ }
+
+ lustre_init_msg_v2(req->rq_reqbuf, 2, buflens, NULL);
+ req->rq_reqbuf->lm_secflvr = req->rq_flvr.sf_rpc;
+
+ /* gss header */
+ ghdr = lustre_msg_buf(req->rq_reqbuf, 0, 0);
+ ghdr->gh_version = PTLRPC_GSS_VERSION;
+ ghdr->gh_sp = (__u8) ctx->cc_sec->ps_part;
+ ghdr->gh_flags = 0;
+ ghdr->gh_proc = gctx->gc_proc;
+ ghdr->gh_svc = SPTLRPC_SVC_PRIV;
+ ghdr->gh_handle.len = gctx->gc_handle.len;
+ memcpy(ghdr->gh_handle.data, gctx->gc_handle.data, gctx->gc_handle.len);
+ if (req->rq_pack_bulk)
+ ghdr->gh_flags |= LUSTRE_GSS_PACK_BULK;
+ if (req->rq_pack_udesc)
+ ghdr->gh_flags |= LUSTRE_GSS_PACK_USER;
+
+redo:
+ ghdr->gh_seq = atomic_inc_return(&gctx->gc_seq);
+
+ /* buffer objects */
+ hdrobj.len = PTLRPC_GSS_HEADER_SIZE;
+ hdrobj.data = (__u8 *) ghdr;
+ msgobj.len = req->rq_clrdata_len;
+ msgobj.data = (__u8 *) req->rq_clrbuf;
+ token.len = lustre_msg_buflen(req->rq_reqbuf, 1);
+ token.data = lustre_msg_buf(req->rq_reqbuf, 1, 0);
+
+ major = lgss_wrap(gctx->gc_mechctx, &hdrobj, &msgobj,
+ req->rq_clrbuf_len, &token);
+ if (major != GSS_S_COMPLETE) {
+ CERROR("priv: wrap message error: %08x\n", major);
+ GOTO(err_free, rc = -EPERM);
+ }
+ LASSERT(token.len <= buflens[1]);
+
+ /* see explain in gss_cli_ctx_sign() */
+ if (unlikely(atomic_read(&gctx->gc_seq) - ghdr->gh_seq >
+ GSS_SEQ_REPACK_THRESHOLD)) {
+ int behind = atomic_read(&gctx->gc_seq) - ghdr->gh_seq;
+
+ gss_stat_oos_record_cli(behind);
+ CWARN("req %p: %u behind, retry sealing\n", req, behind);
+
+ ghdr->gh_seq = atomic_inc_return(&gctx->gc_seq);
+ goto redo;
+ }
+
+ /* now set the final wire data length */
+ req->rq_reqdata_len = lustre_shrink_msg(req->rq_reqbuf, 1, token.len,0);
+ RETURN(0);
+
+err_free:
+ if (!req->rq_pool) {
+ OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = NULL;
+ req->rq_reqbuf_len = 0;
+ }
+ RETURN(rc);
+}
+
+int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req)
+{
+ struct gss_cli_ctx *gctx;
+ struct gss_header *ghdr;
+ struct lustre_msg *msg = req->rq_repdata;
+ int msglen, pack_bulk, swabbed, rc;
+ __u32 major;
+ ENTRY;
+
+ LASSERT(req->rq_cli_ctx == ctx);
+ LASSERT(req->rq_ctx_init == 0);
+ LASSERT(msg);
+
+ gctx = container_of(ctx, struct gss_cli_ctx, gc_base);
+ swabbed = ptlrpc_rep_need_swab(req);
+
+ ghdr = gss_swab_header(msg, 0, swabbed);
+ if (ghdr == NULL) {
+ CERROR("can't decode gss header\n");
+ RETURN(-EPROTO);
+ }
+
+ /* sanity checks */
+ if (ghdr->gh_version != PTLRPC_GSS_VERSION) {
+ CERROR("gss version %u mismatch, expect %u\n",
+ ghdr->gh_version, PTLRPC_GSS_VERSION);
+ RETURN(-EPROTO);
+ }
+
+ switch (ghdr->gh_proc) {
+ case PTLRPC_GSS_PROC_DATA:
+ pack_bulk = ghdr->gh_flags & LUSTRE_GSS_PACK_BULK;
+
+ if (!req->rq_early && !equi(req->rq_pack_bulk == 1, pack_bulk)){
+ CERROR("%s bulk flag in reply\n",
+ req->rq_pack_bulk ? "missing" : "unexpected");
+ RETURN(-EPROTO);
+ }
+
+ if (swabbed)
+ gss_header_swabber(ghdr);
+
+ /* use rq_repdata_len as buffer size, which assume unseal
+ * doesn't need extra memory space. for precise control, we'd
+ * better calculate out actual buffer size as
+ * (repbuf_len - offset - repdata_len) */
+ major = gss_unseal_msg(gctx->gc_mechctx, msg,
+ &msglen, req->rq_repdata_len);
+ if (major != GSS_S_COMPLETE) {
+ CERROR("failed to unwrap reply: %x\n", major);
+ rc = -EPERM;
+ break;
+ }
+
+ swabbed = __lustre_unpack_msg(msg, msglen);
+ if (swabbed < 0) {
+ CERROR("Failed to unpack after decryption\n");
+ RETURN(-EPROTO);
+ }
+
+ if (msg->lm_bufcount < 1) {
+ CERROR("Invalid reply buffer: empty\n");
+ RETURN(-EPROTO);
+ }
+
+ if (pack_bulk) {
+ if (msg->lm_bufcount < 2) {
+ CERROR("bufcount %u: missing bulk sec desc\n",
+ msg->lm_bufcount);
+ RETURN(-EPROTO);
+ }
+
+ /* bulk checksum is the last segment */
+ if (bulk_sec_desc_unpack(msg, msg->lm_bufcount - 1,
+ swabbed))
+ RETURN(-EPROTO);
+ }
+
+ req->rq_repmsg = lustre_msg_buf(msg, 0, 0);
+ req->rq_replen = msg->lm_buflens[0];
+
+ rc = 0;
+ break;
+ case PTLRPC_GSS_PROC_ERR:
+ if (req->rq_early) {
+ CERROR("server return error with early reply\n");
+ rc = -EPROTO;
+ } else {
+ rc = gss_cli_ctx_handle_err_notify(ctx, req, ghdr);
+ }
+ break;
+ default:
+ CERROR("unexpected proc %d\n", ghdr->gh_proc);
+ rc = -EPERM;
+ }
+
+ RETURN(rc);
+}
+
+/*********************************************
+ * reverse context installation *
+ *********************************************/
+
+static inline
+int gss_install_rvs_svc_ctx(struct obd_import *imp,
+ struct gss_sec *gsec,
+ struct gss_cli_ctx *gctx)
+{
+ return gss_svc_upcall_install_rvs_ctx(imp, gsec, gctx);
+}
+
+/*********************************************
+ * GSS security APIs *
+ *********************************************/
+int gss_sec_create_common(struct gss_sec *gsec,
+ struct ptlrpc_sec_policy *policy,
+ struct obd_import *imp,
+ struct ptlrpc_svc_ctx *svcctx,
+ struct sptlrpc_flavor *sf)
+{
+ struct ptlrpc_sec *sec;
+
+ LASSERT(imp);
+ LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_GSS);
+
+ gsec->gs_mech = lgss_subflavor_to_mech(
+ SPTLRPC_FLVR_BASE_SUB(sf->sf_rpc));
+ if (!gsec->gs_mech) {
+ CERROR("gss backend 0x%x not found\n",
+ SPTLRPC_FLVR_BASE_SUB(sf->sf_rpc));
+ return -EOPNOTSUPP;
+ }
+
+ spin_lock_init(&gsec->gs_lock);
+ gsec->gs_rvs_hdl = 0ULL;
+
+ /* initialize upper ptlrpc_sec */
+ sec = &gsec->gs_base;
+ sec->ps_policy = policy;
+ atomic_set(&sec->ps_refcount, 0);
+ atomic_set(&sec->ps_nctx, 0);
+ sec->ps_id = sptlrpc_get_next_secid();
+ sec->ps_flvr = *sf;
+ sec->ps_import = class_import_get(imp);
+ spin_lock_init(&sec->ps_lock);
+ INIT_LIST_HEAD(&sec->ps_gc_list);
+
+ if (!svcctx) {
+ sec->ps_gc_interval = GSS_GC_INTERVAL;
+ } else {
+ LASSERT(sec_is_reverse(sec));
+
+ /* never do gc on reverse sec */
+ sec->ps_gc_interval = 0;
+ }
+
+ if (SPTLRPC_FLVR_BULK_SVC(sec->ps_flvr.sf_rpc) == SPTLRPC_BULK_SVC_PRIV)
+ sptlrpc_enc_pool_add_user();
+
+ CDEBUG(D_SEC, "create %s%s@%p\n", (svcctx ? "reverse " : ""),
+ policy->sp_name, gsec);
+ return 0;
+}
+
+void gss_sec_destroy_common(struct gss_sec *gsec)
+{
+ struct ptlrpc_sec *sec = &gsec->gs_base;
+ ENTRY;
+
+ LASSERT(sec->ps_import);
+ LASSERT(atomic_read(&sec->ps_refcount) == 0);
+ LASSERT(atomic_read(&sec->ps_nctx) == 0);
+
+ if (gsec->gs_mech) {
+ lgss_mech_put(gsec->gs_mech);
+ gsec->gs_mech = NULL;
+ }
+
+ class_import_put(sec->ps_import);
+
+ if (SPTLRPC_FLVR_BULK_SVC(sec->ps_flvr.sf_rpc) == SPTLRPC_BULK_SVC_PRIV)
+ sptlrpc_enc_pool_del_user();
+
+ EXIT;
+}
+
+void gss_sec_kill(struct ptlrpc_sec *sec)
+{
+ sec->ps_dying = 1;
+}
+
+int gss_cli_ctx_init_common(struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_ctx_ops *ctxops,
+ struct vfs_cred *vcred)
+{
+ struct gss_cli_ctx *gctx = ctx2gctx(ctx);
+
+ gctx->gc_win = 0;
+ atomic_set(&gctx->gc_seq, 0);
+
+ INIT_HLIST_NODE(&ctx->cc_cache);
+ atomic_set(&ctx->cc_refcount, 0);
+ ctx->cc_sec = sec;
+ ctx->cc_ops = ctxops;
+ ctx->cc_expire = 0;
+ ctx->cc_flags = PTLRPC_CTX_NEW;
+ ctx->cc_vcred = *vcred;
+ spin_lock_init(&ctx->cc_lock);
+ INIT_LIST_HEAD(&ctx->cc_req_list);
+ INIT_LIST_HEAD(&ctx->cc_gc_chain);
+
+ /* take a ref on belonging sec, balanced in ctx destroying */
+ atomic_inc(&sec->ps_refcount);
+ /* statistic only */
+ atomic_inc(&sec->ps_nctx);
+
+ CDEBUG(D_SEC, "%s@%p: create ctx %p(%u->%s)\n",
+ sec->ps_policy->sp_name, ctx->cc_sec,
+ ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+ return 0;
+}
+
+/*
+ * return value:
+ * 1: the context has been taken care of by someone else
+ * 0: proceed to really destroy the context locally
+ */
+int gss_cli_ctx_fini_common(struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx)
+{
+ struct gss_cli_ctx *gctx = ctx2gctx(ctx);
+
+ LASSERT(atomic_read(&sec->ps_nctx) > 0);
+ LASSERT(atomic_read(&ctx->cc_refcount) == 0);
+ LASSERT(ctx->cc_sec == sec);
+
+ /*
+ * remove UPTODATE flag of reverse ctx thus we won't send fini rpc,
+ * this is to avoid potential problems of client side reverse svc ctx
+ * be mis-destroyed in various recovery senarios. anyway client can
+ * manage its reverse ctx well by associating it with its buddy ctx.
+ */
+ if (sec_is_reverse(sec))
+ ctx->cc_flags &= ~PTLRPC_CTX_UPTODATE;
+
+ if (gctx->gc_mechctx) {
+ /* the final context fini rpc will use this ctx too, and it's
+ * asynchronous which finished by request_out_callback(). so
+ * we add refcount, whoever drop finally drop the refcount to
+ * 0 should responsible for the rest of destroy. */
+ atomic_inc(&ctx->cc_refcount);
+
+ gss_do_ctx_fini_rpc(gctx);
+ gss_cli_ctx_finalize(gctx);
+
+ if (!atomic_dec_and_test(&ctx->cc_refcount))
+ return 1;
+ }
+
+ if (sec_is_reverse(sec))
+ CWARN("reverse sec %p: destroy ctx %p\n",
+ ctx->cc_sec, ctx);
+ else
+ CWARN("%s@%p: destroy ctx %p(%u->%s)\n",
+ sec->ps_policy->sp_name, ctx->cc_sec,
+ ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+
+ return 0;
+}
+
+static
+int gss_alloc_reqbuf_intg(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int svc, int msgsize)
+{
+ int bufsize, txtsize;
+ int bufcnt = 2;
+ __u32 buflens[5];
+ ENTRY;
+
+ /*
+ * on-wire data layout:
+ * - gss header
+ * - lustre message
+ * - user descriptor (optional)
+ * - bulk sec descriptor (optional)
+ * - signature (optional)
+ * - svc == NULL: NULL
+ * - svc == AUTH: signature of gss header
+ * - svc == INTG: signature of all above
+ *
+ * if this is context negotiation, reserver fixed space
+ * at the last (signature) segment regardless of svc mode.
+ */
+
+ buflens[0] = PTLRPC_GSS_HEADER_SIZE;
+ txtsize = buflens[0];
+
+ buflens[1] = msgsize;
+ if (svc == SPTLRPC_SVC_INTG)
+ txtsize += buflens[1];
+
+ if (req->rq_pack_udesc) {
+ buflens[bufcnt] = sptlrpc_current_user_desc_size();
+ if (svc == SPTLRPC_SVC_INTG)
+ txtsize += buflens[bufcnt];
+ bufcnt++;
+ }
+
+ if (req->rq_pack_bulk) {
+ buflens[bufcnt] = gss_cli_bulk_payload(req->rq_cli_ctx,
+ &req->rq_flvr,
+ 0, req->rq_bulk_read);
+ if (svc == SPTLRPC_SVC_INTG)
+ txtsize += buflens[bufcnt];
+ bufcnt++;
+ }
+
+ if (req->rq_ctx_init)
+ buflens[bufcnt++] = GSS_CTX_INIT_MAX_LEN;
+ else if (svc != SPTLRPC_SVC_NULL)
+ buflens[bufcnt++] = gss_cli_payload(req->rq_cli_ctx, txtsize,0);
+
+ bufsize = lustre_msg_size_v2(bufcnt, buflens);
+
+ if (!req->rq_reqbuf) {
+ bufsize = size_roundup_power2(bufsize);
+
+ OBD_ALLOC_LARGE(req->rq_reqbuf, bufsize);
+ if (!req->rq_reqbuf)
+ RETURN(-ENOMEM);
+
+ req->rq_reqbuf_len = bufsize;
+ } else {
+ LASSERT(req->rq_pool);
+ LASSERT(req->rq_reqbuf_len >= bufsize);
+ memset(req->rq_reqbuf, 0, bufsize);
+ }
+
+ lustre_init_msg_v2(req->rq_reqbuf, bufcnt, buflens, NULL);
+ req->rq_reqbuf->lm_secflvr = req->rq_flvr.sf_rpc;
+
+ req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, 1, msgsize);
+ LASSERT(req->rq_reqmsg);
+
+ /* pack user desc here, later we might leave current user's process */
+ if (req->rq_pack_udesc)
+ sptlrpc_pack_user_desc(req->rq_reqbuf, 2);
+
+ RETURN(0);
+}
+
+static
+int gss_alloc_reqbuf_priv(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ __u32 ibuflens[3], wbuflens[2];
+ int ibufcnt;
+ int clearsize, wiresize;
+ ENTRY;
+
+ LASSERT(req->rq_clrbuf == NULL);
+ LASSERT(req->rq_clrbuf_len == 0);
+
+ /* Inner (clear) buffers
+ * - lustre message
+ * - user descriptor (optional)
+ * - bulk checksum (optional)
+ */
+ ibufcnt = 1;
+ ibuflens[0] = msgsize;
+
+ if (req->rq_pack_udesc)
+ ibuflens[ibufcnt++] = sptlrpc_current_user_desc_size();
+ if (req->rq_pack_bulk)
+ ibuflens[ibufcnt++] = gss_cli_bulk_payload(req->rq_cli_ctx,
+ &req->rq_flvr, 0,
+ req->rq_bulk_read);
+
+ clearsize = lustre_msg_size_v2(ibufcnt, ibuflens);
+ /* to allow append padding during encryption */
+ clearsize += GSS_MAX_CIPHER_BLOCK;
+
+ /* Wrapper (wire) buffers
+ * - gss header
+ * - cipher text
+ */
+ wbuflens[0] = PTLRPC_GSS_HEADER_SIZE;
+ wbuflens[1] = gss_cli_payload(req->rq_cli_ctx, clearsize, 1);
+ wiresize = lustre_msg_size_v2(2, wbuflens);
+
+ if (req->rq_pool) {
+ /* rq_reqbuf is preallocated */
+ LASSERT(req->rq_reqbuf);
+ LASSERT(req->rq_reqbuf_len >= wiresize);
+
+ memset(req->rq_reqbuf, 0, req->rq_reqbuf_len);
+
+ /* if the pre-allocated buffer is big enough, we just pack
+ * both clear buf & request buf in it, to avoid more alloc. */
+ if (clearsize + wiresize <= req->rq_reqbuf_len) {
+ req->rq_clrbuf =
+ (void *) (((char *) req->rq_reqbuf) + wiresize);
+ } else {
+ CWARN("pre-allocated buf size %d is not enough for "
+ "both clear (%d) and cipher (%d) text, proceed "
+ "with extra allocation\n", req->rq_reqbuf_len,
+ clearsize, wiresize);
+ }
+ }
+
+ if (!req->rq_clrbuf) {
+ clearsize = size_roundup_power2(clearsize);
+
+ OBD_ALLOC_LARGE(req->rq_clrbuf, clearsize);
+ if (!req->rq_clrbuf)
+ RETURN(-ENOMEM);
+ }
+ req->rq_clrbuf_len = clearsize;
+
+ lustre_init_msg_v2(req->rq_clrbuf, ibufcnt, ibuflens, NULL);
+ req->rq_reqmsg = lustre_msg_buf(req->rq_clrbuf, 0, msgsize);
+
+ if (req->rq_pack_udesc)
+ sptlrpc_pack_user_desc(req->rq_clrbuf, 1);
+
+ RETURN(0);
+}
+
+/*
+ * NOTE: any change of request buffer allocation should also consider
+ * changing enlarge_reqbuf() series functions.
+ */
+int gss_alloc_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ int svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+
+ LASSERT(!req->rq_pack_bulk ||
+ (req->rq_bulk_read || req->rq_bulk_write));
+
+ switch (svc) {
+ case SPTLRPC_SVC_NULL:
+ case SPTLRPC_SVC_AUTH:
+ case SPTLRPC_SVC_INTG:
+ return gss_alloc_reqbuf_intg(sec, req, svc, msgsize);
+ case SPTLRPC_SVC_PRIV:
+ return gss_alloc_reqbuf_priv(sec, req, msgsize);
+ default:
+ LASSERTF(0, "bad rpc flavor %x\n", req->rq_flvr.sf_rpc);
+ return 0;
+ }
+}
+
+void gss_free_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ int privacy;
+ ENTRY;
+
+ LASSERT(!req->rq_pool || req->rq_reqbuf);
+ privacy = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) == SPTLRPC_SVC_PRIV;
+
+ if (!req->rq_clrbuf)
+ goto release_reqbuf;
+
+ /* release clear buffer */
+ LASSERT(privacy);
+ LASSERT(req->rq_clrbuf_len);
+
+ if (req->rq_pool == NULL ||
+ req->rq_clrbuf < req->rq_reqbuf ||
+ (char *) req->rq_clrbuf >=
+ (char *) req->rq_reqbuf + req->rq_reqbuf_len)
+ OBD_FREE_LARGE(req->rq_clrbuf, req->rq_clrbuf_len);
+
+ req->rq_clrbuf = NULL;
+ req->rq_clrbuf_len = 0;
+
+release_reqbuf:
+ if (!req->rq_pool && req->rq_reqbuf) {
+ LASSERT(req->rq_reqbuf_len);
+
+ OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = NULL;
+ req->rq_reqbuf_len = 0;
+ }
+
+ EXIT;
+}
+
+static int do_alloc_repbuf(struct ptlrpc_request *req, int bufsize)
+{
+ bufsize = size_roundup_power2(bufsize);
+
+ OBD_ALLOC_LARGE(req->rq_repbuf, bufsize);
+ if (!req->rq_repbuf)
+ return -ENOMEM;
+
+ req->rq_repbuf_len = bufsize;
+ return 0;
+}
+
+static
+int gss_alloc_repbuf_intg(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int svc, int msgsize)
+{
+ int txtsize;
+ __u32 buflens[4];
+ int bufcnt = 2;
+ int alloc_size;
+
+ /*
+ * on-wire data layout:
+ * - gss header
+ * - lustre message
+ * - bulk sec descriptor (optional)
+ * - signature (optional)
+ * - svc == NULL: NULL
+ * - svc == AUTH: signature of gss header
+ * - svc == INTG: signature of all above
+ *
+ * if this is context negotiation, reserver fixed space
+ * at the last (signature) segment regardless of svc mode.
+ */
+
+ buflens[0] = PTLRPC_GSS_HEADER_SIZE;
+ txtsize = buflens[0];
+
+ buflens[1] = msgsize;
+ if (svc == SPTLRPC_SVC_INTG)
+ txtsize += buflens[1];
+
+ if (req->rq_pack_bulk) {
+ buflens[bufcnt] = gss_cli_bulk_payload(req->rq_cli_ctx,
+ &req->rq_flvr,
+ 1, req->rq_bulk_read);
+ if (svc == SPTLRPC_SVC_INTG)
+ txtsize += buflens[bufcnt];
+ bufcnt++;
+ }
+
+ if (req->rq_ctx_init)
+ buflens[bufcnt++] = GSS_CTX_INIT_MAX_LEN;
+ else if (svc != SPTLRPC_SVC_NULL)
+ buflens[bufcnt++] = gss_cli_payload(req->rq_cli_ctx, txtsize,0);
+
+ alloc_size = lustre_msg_size_v2(bufcnt, buflens);
+
+ /* add space for early reply */
+ alloc_size += gss_at_reply_off_integ;
+
+ return do_alloc_repbuf(req, alloc_size);
+}
+
+static
+int gss_alloc_repbuf_priv(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ int txtsize;
+ __u32 buflens[2];
+ int bufcnt;
+ int alloc_size;
+
+ /* inner buffers */
+ bufcnt = 1;
+ buflens[0] = msgsize;
+
+ if (req->rq_pack_bulk)
+ buflens[bufcnt++] = gss_cli_bulk_payload(req->rq_cli_ctx,
+ &req->rq_flvr,
+ 1, req->rq_bulk_read);
+ txtsize = lustre_msg_size_v2(bufcnt, buflens);
+ txtsize += GSS_MAX_CIPHER_BLOCK;
+
+ /* wrapper buffers */
+ bufcnt = 2;
+ buflens[0] = PTLRPC_GSS_HEADER_SIZE;
+ buflens[1] = gss_cli_payload(req->rq_cli_ctx, txtsize, 1);
+
+ alloc_size = lustre_msg_size_v2(bufcnt, buflens);
+ /* add space for early reply */
+ alloc_size += gss_at_reply_off_priv;
+
+ return do_alloc_repbuf(req, alloc_size);
+}
+
+int gss_alloc_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ int svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+ ENTRY;
+
+ LASSERT(!req->rq_pack_bulk ||
+ (req->rq_bulk_read || req->rq_bulk_write));
+
+ switch (svc) {
+ case SPTLRPC_SVC_NULL:
+ case SPTLRPC_SVC_AUTH:
+ case SPTLRPC_SVC_INTG:
+ return gss_alloc_repbuf_intg(sec, req, svc, msgsize);
+ case SPTLRPC_SVC_PRIV:
+ return gss_alloc_repbuf_priv(sec, req, msgsize);
+ default:
+ LASSERTF(0, "bad rpc flavor %x\n", req->rq_flvr.sf_rpc);
+ return 0;
+ }
+}
+
+void gss_free_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ OBD_FREE_LARGE(req->rq_repbuf, req->rq_repbuf_len);
+ req->rq_repbuf = NULL;
+ req->rq_repbuf_len = 0;
+ req->rq_repdata = NULL;
+ req->rq_repdata_len = 0;
+}
+
+static int get_enlarged_msgsize(struct lustre_msg *msg,
+ int segment, int newsize)
+{
+ int save, newmsg_size;
+
+ LASSERT(newsize >= msg->lm_buflens[segment]);
+
+ save = msg->lm_buflens[segment];
+ msg->lm_buflens[segment] = newsize;
+ newmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+ msg->lm_buflens[segment] = save;
+
+ return newmsg_size;
+}
+
+static int get_enlarged_msgsize2(struct lustre_msg *msg,
+ int segment1, int newsize1,
+ int segment2, int newsize2)
+{
+ int save1, save2, newmsg_size;
+
+ LASSERT(newsize1 >= msg->lm_buflens[segment1]);
+ LASSERT(newsize2 >= msg->lm_buflens[segment2]);
+
+ save1 = msg->lm_buflens[segment1];
+ save2 = msg->lm_buflens[segment2];
+ msg->lm_buflens[segment1] = newsize1;
+ msg->lm_buflens[segment2] = newsize2;
+ newmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+ msg->lm_buflens[segment1] = save1;
+ msg->lm_buflens[segment2] = save2;
+
+ return newmsg_size;
+}
+
+static
+int gss_enlarge_reqbuf_intg(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int svc,
+ int segment, int newsize)
+{
+ struct lustre_msg *newbuf;
+ int txtsize, sigsize = 0, i;
+ int newmsg_size, newbuf_size;
+
+ /*
+ * gss header is at seg 0;
+ * embedded msg is at seg 1;
+ * signature (if any) is at the last seg
+ */
+ LASSERT(req->rq_reqbuf);
+ LASSERT(req->rq_reqbuf_len > req->rq_reqlen);
+ LASSERT(req->rq_reqbuf->lm_bufcount >= 2);
+ LASSERT(lustre_msg_buf(req->rq_reqbuf, 1, 0) == req->rq_reqmsg);
+
+ /* 1. compute new embedded msg size */
+ newmsg_size = get_enlarged_msgsize(req->rq_reqmsg, segment, newsize);
+ LASSERT(newmsg_size >= req->rq_reqbuf->lm_buflens[1]);
+
+ /* 2. compute new wrapper msg size */
+ if (svc == SPTLRPC_SVC_NULL) {
+ /* no signature, get size directly */
+ newbuf_size = get_enlarged_msgsize(req->rq_reqbuf,
+ 1, newmsg_size);
+ } else {
+ txtsize = req->rq_reqbuf->lm_buflens[0];
+
+ if (svc == SPTLRPC_SVC_INTG) {
+ for (i = 1; i < req->rq_reqbuf->lm_bufcount; i++)
+ txtsize += req->rq_reqbuf->lm_buflens[i];
+ txtsize += newmsg_size - req->rq_reqbuf->lm_buflens[1];
+ }
+
+ sigsize = gss_cli_payload(req->rq_cli_ctx, txtsize, 0);
+ LASSERT(sigsize >= msg_last_seglen(req->rq_reqbuf));
+
+ newbuf_size = get_enlarged_msgsize2(
+ req->rq_reqbuf,
+ 1, newmsg_size,
+ msg_last_segidx(req->rq_reqbuf),
+ sigsize);
+ }
+
+ /* request from pool should always have enough buffer */
+ LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newbuf_size);
+
+ if (req->rq_reqbuf_len < newbuf_size) {
+ newbuf_size = size_roundup_power2(newbuf_size);
+
+ OBD_ALLOC_LARGE(newbuf, newbuf_size);
+ if (newbuf == NULL)
+ RETURN(-ENOMEM);
+
+ memcpy(newbuf, req->rq_reqbuf, req->rq_reqbuf_len);
+
+ OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = newbuf;
+ req->rq_reqbuf_len = newbuf_size;
+ req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, 1, 0);
+ }
+
+ /* do enlargement, from wrapper to embedded, from end to begin */
+ if (svc != SPTLRPC_SVC_NULL)
+ _sptlrpc_enlarge_msg_inplace(req->rq_reqbuf,
+ msg_last_segidx(req->rq_reqbuf),
+ sigsize);
+
+ _sptlrpc_enlarge_msg_inplace(req->rq_reqbuf, 1, newmsg_size);
+ _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize);
+
+ req->rq_reqlen = newmsg_size;
+ RETURN(0);
+}
+
+static
+int gss_enlarge_reqbuf_priv(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int segment, int newsize)
+{
+ struct lustre_msg *newclrbuf;
+ int newmsg_size, newclrbuf_size, newcipbuf_size;
+ __u32 buflens[3];
+
+ /*
+ * embedded msg is at seg 0 of clear buffer;
+ * cipher text is at seg 2 of cipher buffer;
+ */
+ LASSERT(req->rq_pool ||
+ (req->rq_reqbuf == NULL && req->rq_reqbuf_len == 0));
+ LASSERT(req->rq_reqbuf == NULL ||
+ (req->rq_pool && req->rq_reqbuf->lm_bufcount == 3));
+ LASSERT(req->rq_clrbuf);
+ LASSERT(req->rq_clrbuf_len > req->rq_reqlen);
+ LASSERT(lustre_msg_buf(req->rq_clrbuf, 0, 0) == req->rq_reqmsg);
+
+ /* compute new embedded msg size */
+ newmsg_size = get_enlarged_msgsize(req->rq_reqmsg, segment, newsize);
+
+ /* compute new clear buffer size */
+ newclrbuf_size = get_enlarged_msgsize(req->rq_clrbuf, 0, newmsg_size);
+ newclrbuf_size += GSS_MAX_CIPHER_BLOCK;
+
+ /* compute new cipher buffer size */
+ buflens[0] = PTLRPC_GSS_HEADER_SIZE;
+ buflens[1] = gss_cli_payload(req->rq_cli_ctx, buflens[0], 0);
+ buflens[2] = gss_cli_payload(req->rq_cli_ctx, newclrbuf_size, 1);
+ newcipbuf_size = lustre_msg_size_v2(3, buflens);
+
+ /* handle the case that we put both clear buf and cipher buf into
+ * pre-allocated single buffer. */
+ if (unlikely(req->rq_pool) &&
+ req->rq_clrbuf >= req->rq_reqbuf &&
+ (char *) req->rq_clrbuf <
+ (char *) req->rq_reqbuf + req->rq_reqbuf_len) {
+ /* it couldn't be better we still fit into the
+ * pre-allocated buffer. */
+ if (newclrbuf_size + newcipbuf_size <= req->rq_reqbuf_len) {
+ void *src, *dst;
+
+ /* move clear text backward. */
+ src = req->rq_clrbuf;
+ dst = (char *) req->rq_reqbuf + newcipbuf_size;
+
+ memmove(dst, src, req->rq_clrbuf_len);
+
+ req->rq_clrbuf = (struct lustre_msg *) dst;
+ req->rq_clrbuf_len = newclrbuf_size;
+ req->rq_reqmsg = lustre_msg_buf(req->rq_clrbuf, 0, 0);
+ } else {
+ /* sadly we have to split out the clear buffer */
+ LASSERT(req->rq_reqbuf_len >= newcipbuf_size);
+ LASSERT(req->rq_clrbuf_len < newclrbuf_size);
+ }
+ }
+
+ if (req->rq_clrbuf_len < newclrbuf_size) {
+ newclrbuf_size = size_roundup_power2(newclrbuf_size);
+
+ OBD_ALLOC_LARGE(newclrbuf, newclrbuf_size);
+ if (newclrbuf == NULL)
+ RETURN(-ENOMEM);
+
+ memcpy(newclrbuf, req->rq_clrbuf, req->rq_clrbuf_len);
+
+ if (req->rq_reqbuf == NULL ||
+ req->rq_clrbuf < req->rq_reqbuf ||
+ (char *) req->rq_clrbuf >=
+ (char *) req->rq_reqbuf + req->rq_reqbuf_len) {
+ OBD_FREE_LARGE(req->rq_clrbuf, req->rq_clrbuf_len);
+ }
+
+ req->rq_clrbuf = newclrbuf;
+ req->rq_clrbuf_len = newclrbuf_size;
+ req->rq_reqmsg = lustre_msg_buf(req->rq_clrbuf, 0, 0);
+ }
+
+ _sptlrpc_enlarge_msg_inplace(req->rq_clrbuf, 0, newmsg_size);
+ _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize);
+ req->rq_reqlen = newmsg_size;
+
+ RETURN(0);
+}
+
+int gss_enlarge_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int segment, int newsize)
+{
+ int svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+
+ LASSERT(!req->rq_ctx_init && !req->rq_ctx_fini);
+
+ switch (svc) {
+ case SPTLRPC_SVC_NULL:
+ case SPTLRPC_SVC_AUTH:
+ case SPTLRPC_SVC_INTG:
+ return gss_enlarge_reqbuf_intg(sec, req, svc, segment, newsize);
+ case SPTLRPC_SVC_PRIV:
+ return gss_enlarge_reqbuf_priv(sec, req, segment, newsize);
+ default:
+ LASSERTF(0, "bad rpc flavor %x\n", req->rq_flvr.sf_rpc);
+ return 0;
+ }
+}
+
+int gss_sec_install_rctx(struct obd_import *imp,
+ struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx)
+{
+ struct gss_sec *gsec;
+ struct gss_cli_ctx *gctx;
+ int rc;
+
+ gsec = container_of(sec, struct gss_sec, gs_base);
+ gctx = container_of(ctx, struct gss_cli_ctx, gc_base);
+
+ rc = gss_install_rvs_svc_ctx(imp, gsec, gctx);
+ return rc;
+}
+
+/********************************************
+ * server side API *
+ ********************************************/
+
+static inline
+int gss_svc_reqctx_is_special(struct gss_svc_reqctx *grctx)
+{
+ LASSERT(grctx);
+ return (grctx->src_init || grctx->src_init_continue ||
+ grctx->src_err_notify);
+}
+
+static
+void gss_svc_reqctx_free(struct gss_svc_reqctx *grctx)
+{
+ if (grctx->src_ctx)
+ gss_svc_upcall_put_ctx(grctx->src_ctx);
+
+ sptlrpc_policy_put(grctx->src_base.sc_policy);
+ OBD_FREE_PTR(grctx);
+}
+
+static inline
+void gss_svc_reqctx_addref(struct gss_svc_reqctx *grctx)
+{
+ LASSERT(atomic_read(&grctx->src_base.sc_refcount) > 0);
+ atomic_inc(&grctx->src_base.sc_refcount);
+}
+
+static inline
+void gss_svc_reqctx_decref(struct gss_svc_reqctx *grctx)
+{
+ LASSERT(atomic_read(&grctx->src_base.sc_refcount) > 0);
+
+ if (atomic_dec_and_test(&grctx->src_base.sc_refcount))
+ gss_svc_reqctx_free(grctx);
+}
+
+static
+int gss_svc_sign(struct ptlrpc_request *req,
+ struct ptlrpc_reply_state *rs,
+ struct gss_svc_reqctx *grctx,
+ __u32 svc)
+{
+ __u32 flags = 0;
+ int rc;
+ ENTRY;
+
+ LASSERT(rs->rs_msg == lustre_msg_buf(rs->rs_repbuf, 1, 0));
+
+ /* embedded lustre_msg might have been shrinked */
+ if (req->rq_replen != rs->rs_repbuf->lm_buflens[1])
+ lustre_shrink_msg(rs->rs_repbuf, 1, req->rq_replen, 1);
+
+ if (req->rq_pack_bulk)
+ flags |= LUSTRE_GSS_PACK_BULK;
+
+ rc = gss_sign_msg(rs->rs_repbuf, grctx->src_ctx->gsc_mechctx,
+ LUSTRE_SP_ANY, flags, PTLRPC_GSS_PROC_DATA,
+ grctx->src_wirectx.gw_seq, svc, NULL);
+ if (rc < 0)
+ RETURN(rc);
+
+ rs->rs_repdata_len = rc;
+
+ if (likely(req->rq_packed_final)) {
+ if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)
+ req->rq_reply_off = gss_at_reply_off_integ;
+ else
+ req->rq_reply_off = 0;
+ } else {
+ if (svc == SPTLRPC_SVC_NULL)
+ rs->rs_repbuf->lm_cksum = crc32_le(!(__u32) 0,
+ lustre_msg_buf(rs->rs_repbuf, 1, 0),
+ lustre_msg_buflen(rs->rs_repbuf, 1));
+ req->rq_reply_off = 0;
+ }
+
+ RETURN(0);
+}
+
+int gss_pack_err_notify(struct ptlrpc_request *req, __u32 major, __u32 minor)
+{
+ struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
+ struct ptlrpc_reply_state *rs;
+ struct gss_err_header *ghdr;
+ int replen = sizeof(struct ptlrpc_body);
+ int rc;
+ ENTRY;
+
+ //if (OBD_FAIL_CHECK_ORSET(OBD_FAIL_SVCGSS_ERR_NOTIFY, OBD_FAIL_ONCE))
+ // RETURN(-EINVAL);
+
+ grctx->src_err_notify = 1;
+ grctx->src_reserve_len = 0;
+
+ rc = lustre_pack_reply_v2(req, 1, &replen, NULL, 0);
+ if (rc) {
+ CERROR("could not pack reply, err %d\n", rc);
+ RETURN(rc);
+ }
+
+ /* gss hdr */
+ rs = req->rq_reply_state;
+ LASSERT(rs->rs_repbuf->lm_buflens[1] >= sizeof(*ghdr));
+ ghdr = lustre_msg_buf(rs->rs_repbuf, 0, 0);
+ ghdr->gh_version = PTLRPC_GSS_VERSION;
+ ghdr->gh_flags = 0;
+ ghdr->gh_proc = PTLRPC_GSS_PROC_ERR;
+ ghdr->gh_major = major;
+ ghdr->gh_minor = minor;
+ ghdr->gh_handle.len = 0; /* fake context handle */
+
+ rs->rs_repdata_len = lustre_msg_size_v2(rs->rs_repbuf->lm_bufcount,
+ rs->rs_repbuf->lm_buflens);
+
+ CDEBUG(D_SEC, "prepare gss error notify(0x%x/0x%x) to %s\n",
+ major, minor, libcfs_nid2str(req->rq_peer.nid));
+ RETURN(0);
+}
+
+static
+int gss_svc_handle_init(struct ptlrpc_request *req,
+ struct gss_wire_ctx *gw)
+{
+ struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
+ struct lustre_msg *reqbuf = req->rq_reqbuf;
+ struct obd_uuid *uuid;
+ struct obd_device *target;
+ rawobj_t uuid_obj, rvs_hdl, in_token;
+ __u32 lustre_svc;
+ __u32 *secdata, seclen;
+ int swabbed, rc;
+ ENTRY;
+
+ CDEBUG(D_SEC, "processing gss init(%d) request from %s\n", gw->gw_proc,
+ libcfs_nid2str(req->rq_peer.nid));
+
+ req->rq_ctx_init = 1;
+
+ if (gw->gw_flags & LUSTRE_GSS_PACK_BULK) {
+ CERROR("unexpected bulk flag\n");
+ RETURN(SECSVC_DROP);
+ }
+
+ if (gw->gw_proc == PTLRPC_GSS_PROC_INIT && gw->gw_handle.len != 0) {
+ CERROR("proc %u: invalid handle length %u\n",
+ gw->gw_proc, gw->gw_handle.len);
+ RETURN(SECSVC_DROP);
+ }
+
+ if (reqbuf->lm_bufcount < 3 || reqbuf->lm_bufcount > 4){
+ CERROR("Invalid bufcount %d\n", reqbuf->lm_bufcount);
+ RETURN(SECSVC_DROP);
+ }
+
+ swabbed = ptlrpc_req_need_swab(req);
+
+ /* ctx initiate payload is in last segment */
+ secdata = lustre_msg_buf(reqbuf, reqbuf->lm_bufcount - 1, 0);
+ seclen = reqbuf->lm_buflens[reqbuf->lm_bufcount - 1];
+
+ if (seclen < 4 + 4) {
+ CERROR("sec size %d too small\n", seclen);
+ RETURN(SECSVC_DROP);
+ }
+
+ /* lustre svc type */
+ lustre_svc = le32_to_cpu(*secdata++);
+ seclen -= 4;
+
+ /* extract target uuid, note this code is somewhat fragile
+ * because touched internal structure of obd_uuid */
+ if (rawobj_extract(&uuid_obj, &secdata, &seclen)) {
+ CERROR("failed to extract target uuid\n");
+ RETURN(SECSVC_DROP);
+ }
+ uuid_obj.data[uuid_obj.len - 1] = '\0';
+
+ uuid = (struct obd_uuid *) uuid_obj.data;
+ target = class_uuid2obd(uuid);
+ if (!target || target->obd_stopping || !target->obd_set_up) {
+ CERROR("target '%s' is not available for context init (%s)\n",
+ uuid->uuid, target == NULL ? "no target" :
+ (target->obd_stopping ? "stopping" : "not set up"));
+ RETURN(SECSVC_DROP);
+ }
+
+ /* extract reverse handle */
+ if (rawobj_extract(&rvs_hdl, &secdata, &seclen)) {
+ CERROR("failed extract reverse handle\n");
+ RETURN(SECSVC_DROP);
+ }
+
+ /* extract token */
+ if (rawobj_extract(&in_token, &secdata, &seclen)) {
+ CERROR("can't extract token\n");
+ RETURN(SECSVC_DROP);
+ }
+
+ rc = gss_svc_upcall_handle_init(req, grctx, gw, target, lustre_svc,
+ &rvs_hdl, &in_token);
+ if (rc != SECSVC_OK)
+ RETURN(rc);
+
+ if (grctx->src_ctx->gsc_usr_mds || grctx->src_ctx->gsc_usr_oss ||
+ grctx->src_ctx->gsc_usr_root)
+ CWARN("create svc ctx %p: user from %s authenticated as %s\n",
+ grctx->src_ctx, libcfs_nid2str(req->rq_peer.nid),
+ grctx->src_ctx->gsc_usr_mds ? "mds" :
+ (grctx->src_ctx->gsc_usr_oss ? "oss" : "root"));
+ else
+ CWARN("create svc ctx %p: accept user %u from %s\n",
+ grctx->src_ctx, grctx->src_ctx->gsc_uid,
+ libcfs_nid2str(req->rq_peer.nid));
+
+ if (gw->gw_flags & LUSTRE_GSS_PACK_USER) {
+ if (reqbuf->lm_bufcount < 4) {
+ CERROR("missing user descriptor\n");
+ RETURN(SECSVC_DROP);
+ }
+ if (sptlrpc_unpack_user_desc(reqbuf, 2, swabbed)) {
+ CERROR("Mal-formed user descriptor\n");
+ RETURN(SECSVC_DROP);
+ }
+
+ req->rq_pack_udesc = 1;
+ req->rq_user_desc = lustre_msg_buf(reqbuf, 2, 0);
+ }
+
+ req->rq_reqmsg = lustre_msg_buf(reqbuf, 1, 0);
+ req->rq_reqlen = lustre_msg_buflen(reqbuf, 1);
+
+ RETURN(rc);
+}
+
+/*
+ * last segment must be the gss signature.
+ */
+static
+int gss_svc_verify_request(struct ptlrpc_request *req,
+ struct gss_svc_reqctx *grctx,
+ struct gss_wire_ctx *gw,
+ __u32 *major)
+{
+ struct gss_svc_ctx *gctx = grctx->src_ctx;
+ struct lustre_msg *msg = req->rq_reqbuf;
+ int offset = 2;
+ int swabbed;
+ ENTRY;
+
+ *major = GSS_S_COMPLETE;
+
+ if (msg->lm_bufcount < 2) {
+ CERROR("Too few segments (%u) in request\n", msg->lm_bufcount);
+ RETURN(-EINVAL);
+ }
+
+ if (gw->gw_svc == SPTLRPC_SVC_NULL)
+ goto verified;
+
+ if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 0)) {
+ CERROR("phase 0: discard replayed req: seq %u\n", gw->gw_seq);
+ *major = GSS_S_DUPLICATE_TOKEN;
+ RETURN(-EACCES);
+ }
+
+ *major = gss_verify_msg(msg, gctx->gsc_mechctx, gw->gw_svc);
+ if (*major != GSS_S_COMPLETE) {
+ CERROR("failed to verify request: %x\n", *major);
+ RETURN(-EACCES);
+ }
+
+ if (gctx->gsc_reverse == 0 &&
+ gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 1)) {
+ CERROR("phase 1+: discard replayed req: seq %u\n", gw->gw_seq);
+ *major = GSS_S_DUPLICATE_TOKEN;
+ RETURN(-EACCES);
+ }
+
+verified:
+ swabbed = ptlrpc_req_need_swab(req);
+
+ /* user descriptor */
+ if (gw->gw_flags & LUSTRE_GSS_PACK_USER) {
+ if (msg->lm_bufcount < (offset + 1)) {
+ CERROR("no user desc included\n");
+ RETURN(-EINVAL);
+ }
+
+ if (sptlrpc_unpack_user_desc(msg, offset, swabbed)) {
+ CERROR("Mal-formed user descriptor\n");
+ RETURN(-EINVAL);
+ }
+
+ req->rq_pack_udesc = 1;
+ req->rq_user_desc = lustre_msg_buf(msg, offset, 0);
+ offset++;
+ }
+
+ /* check bulk_sec_desc data */
+ if (gw->gw_flags & LUSTRE_GSS_PACK_BULK) {
+ if (msg->lm_bufcount < (offset + 1)) {
+ CERROR("missing bulk sec descriptor\n");
+ RETURN(-EINVAL);
+ }
+
+ if (bulk_sec_desc_unpack(msg, offset, swabbed))
+ RETURN(-EINVAL);
+
+ req->rq_pack_bulk = 1;
+ grctx->src_reqbsd = lustre_msg_buf(msg, offset, 0);
+ grctx->src_reqbsd_size = lustre_msg_buflen(msg, offset);
+ }
+
+ req->rq_reqmsg = lustre_msg_buf(msg, 1, 0);
+ req->rq_reqlen = msg->lm_buflens[1];
+ RETURN(0);
+}
+
+static
+int gss_svc_unseal_request(struct ptlrpc_request *req,
+ struct gss_svc_reqctx *grctx,
+ struct gss_wire_ctx *gw,
+ __u32 *major)
+{
+ struct gss_svc_ctx *gctx = grctx->src_ctx;
+ struct lustre_msg *msg = req->rq_reqbuf;
+ int swabbed, msglen, offset = 1;
+ ENTRY;
+
+ if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 0)) {
+ CERROR("phase 0: discard replayed req: seq %u\n", gw->gw_seq);
+ *major = GSS_S_DUPLICATE_TOKEN;
+ RETURN(-EACCES);
+ }
+
+ *major = gss_unseal_msg(gctx->gsc_mechctx, msg,
+ &msglen, req->rq_reqdata_len);
+ if (*major != GSS_S_COMPLETE) {
+ CERROR("failed to unwrap request: %x\n", *major);
+ RETURN(-EACCES);
+ }
+
+ if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 1)) {
+ CERROR("phase 1+: discard replayed req: seq %u\n", gw->gw_seq);
+ *major = GSS_S_DUPLICATE_TOKEN;
+ RETURN(-EACCES);
+ }
+
+ swabbed = __lustre_unpack_msg(msg, msglen);
+ if (swabbed < 0) {
+ CERROR("Failed to unpack after decryption\n");
+ RETURN(-EINVAL);
+ }
+ req->rq_reqdata_len = msglen;
+
+ if (msg->lm_bufcount < 1) {
+ CERROR("Invalid buffer: is empty\n");
+ RETURN(-EINVAL);
+ }
+
+ if (gw->gw_flags & LUSTRE_GSS_PACK_USER) {
+ if (msg->lm_bufcount < offset + 1) {
+ CERROR("no user descriptor included\n");
+ RETURN(-EINVAL);
+ }
+
+ if (sptlrpc_unpack_user_desc(msg, offset, swabbed)) {
+ CERROR("Mal-formed user descriptor\n");
+ RETURN(-EINVAL);
+ }
+
+ req->rq_pack_udesc = 1;
+ req->rq_user_desc = lustre_msg_buf(msg, offset, 0);
+ offset++;
+ }
+
+ if (gw->gw_flags & LUSTRE_GSS_PACK_BULK) {
+ if (msg->lm_bufcount < offset + 1) {
+ CERROR("no bulk checksum included\n");
+ RETURN(-EINVAL);
+ }
+
+ if (bulk_sec_desc_unpack(msg, offset, swabbed))
+ RETURN(-EINVAL);
+
+ req->rq_pack_bulk = 1;
+ grctx->src_reqbsd = lustre_msg_buf(msg, offset, 0);
+ grctx->src_reqbsd_size = lustre_msg_buflen(msg, offset);
+ }
+
+ req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, 0, 0);
+ req->rq_reqlen = req->rq_reqbuf->lm_buflens[0];
+ RETURN(0);
+}
+
+static
+int gss_svc_handle_data(struct ptlrpc_request *req,
+ struct gss_wire_ctx *gw)
+{
+ struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
+ __u32 major = 0;
+ int rc = 0;
+ ENTRY;
+
+ grctx->src_ctx = gss_svc_upcall_get_ctx(req, gw);
+ if (!grctx->src_ctx) {
+ major = GSS_S_NO_CONTEXT;
+ goto error;
+ }
+
+ switch (gw->gw_svc) {
+ case SPTLRPC_SVC_NULL:
+ case SPTLRPC_SVC_AUTH:
+ case SPTLRPC_SVC_INTG:
+ rc = gss_svc_verify_request(req, grctx, gw, &major);
+ break;
+ case SPTLRPC_SVC_PRIV:
+ rc = gss_svc_unseal_request(req, grctx, gw, &major);
+ break;
+ default:
+ CERROR("unsupported gss service %d\n", gw->gw_svc);
+ rc = -EINVAL;
+ }
+
+ if (rc == 0)
+ RETURN(SECSVC_OK);
+
+ CERROR("svc %u failed: major 0x%08x: req xid "LPU64" ctx %p idx "
+ LPX64"(%u->%s)\n", gw->gw_svc, major, req->rq_xid,
+ grctx->src_ctx, gss_handle_to_u64(&gw->gw_handle),
+ grctx->src_ctx->gsc_uid, libcfs_nid2str(req->rq_peer.nid));
+error:
+ /* we only notify client in case of NO_CONTEXT/BAD_SIG, which
+ * might happen after server reboot, to allow recovery. */
+ if ((major == GSS_S_NO_CONTEXT || major == GSS_S_BAD_SIG) &&
+ gss_pack_err_notify(req, major, 0) == 0)
+ RETURN(SECSVC_COMPLETE);
+
+ RETURN(SECSVC_DROP);
+}
+
+static
+int gss_svc_handle_destroy(struct ptlrpc_request *req,
+ struct gss_wire_ctx *gw)
+{
+ struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
+ __u32 major;
+ ENTRY;
+
+ req->rq_ctx_fini = 1;
+ req->rq_no_reply = 1;
+
+ grctx->src_ctx = gss_svc_upcall_get_ctx(req, gw);
+ if (!grctx->src_ctx) {
+ CDEBUG(D_SEC, "invalid gss context handle for destroy.\n");
+ RETURN(SECSVC_DROP);
+ }
+
+ if (gw->gw_svc != SPTLRPC_SVC_INTG) {
+ CERROR("svc %u is not supported in destroy.\n", gw->gw_svc);
+ RETURN(SECSVC_DROP);
+ }
+
+ if (gss_svc_verify_request(req, grctx, gw, &major))
+ RETURN(SECSVC_DROP);
+
+ CWARN("destroy svc ctx %p idx "LPX64" (%u->%s)\n",
+ grctx->src_ctx, gss_handle_to_u64(&gw->gw_handle),
+ grctx->src_ctx->gsc_uid, libcfs_nid2str(req->rq_peer.nid));
+
+ gss_svc_upcall_destroy_ctx(grctx->src_ctx);
+
+ if (gw->gw_flags & LUSTRE_GSS_PACK_USER) {
+ if (req->rq_reqbuf->lm_bufcount < 4) {
+ CERROR("missing user descriptor, ignore it\n");
+ RETURN(SECSVC_OK);
+ }
+ if (sptlrpc_unpack_user_desc(req->rq_reqbuf, 2,
+ ptlrpc_req_need_swab(req))) {
+ CERROR("Mal-formed user descriptor, ignore it\n");
+ RETURN(SECSVC_OK);
+ }
+
+ req->rq_pack_udesc = 1;
+ req->rq_user_desc = lustre_msg_buf(req->rq_reqbuf, 2, 0);
+ }
+
+ RETURN(SECSVC_OK);
+}
+
+int gss_svc_accept(struct ptlrpc_sec_policy *policy, struct ptlrpc_request *req)
+{
+ struct gss_header *ghdr;
+ struct gss_svc_reqctx *grctx;
+ struct gss_wire_ctx *gw;
+ int swabbed, rc;
+ ENTRY;
+
+ LASSERT(req->rq_reqbuf);
+ LASSERT(req->rq_svc_ctx == NULL);
+
+ if (req->rq_reqbuf->lm_bufcount < 2) {
+ CERROR("buf count only %d\n", req->rq_reqbuf->lm_bufcount);
+ RETURN(SECSVC_DROP);
+ }
+
+ swabbed = ptlrpc_req_need_swab(req);
+
+ ghdr = gss_swab_header(req->rq_reqbuf, 0, swabbed);
+ if (ghdr == NULL) {
+ CERROR("can't decode gss header\n");
+ RETURN(SECSVC_DROP);
+ }
+
+ /* sanity checks */
+ if (ghdr->gh_version != PTLRPC_GSS_VERSION) {
+ CERROR("gss version %u, expect %u\n", ghdr->gh_version,
+ PTLRPC_GSS_VERSION);
+ RETURN(SECSVC_DROP);
+ }
+
+ req->rq_sp_from = ghdr->gh_sp;
+
+ /* alloc grctx data */
+ OBD_ALLOC_PTR(grctx);
+ if (!grctx)
+ RETURN(SECSVC_DROP);
+
+ grctx->src_base.sc_policy = sptlrpc_policy_get(policy);
+ atomic_set(&grctx->src_base.sc_refcount, 1);
+ req->rq_svc_ctx = &grctx->src_base;
+ gw = &grctx->src_wirectx;
+
+ /* save wire context */
+ gw->gw_flags = ghdr->gh_flags;
+ gw->gw_proc = ghdr->gh_proc;
+ gw->gw_seq = ghdr->gh_seq;
+ gw->gw_svc = ghdr->gh_svc;
+ rawobj_from_netobj(&gw->gw_handle, &ghdr->gh_handle);
+
+ /* keep original wire header which subject to checksum verification */
+ if (swabbed)
+ gss_header_swabber(ghdr);
+
+ switch(ghdr->gh_proc) {
+ case PTLRPC_GSS_PROC_INIT:
+ case PTLRPC_GSS_PROC_CONTINUE_INIT:
+ rc = gss_svc_handle_init(req, gw);
+ break;
+ case PTLRPC_GSS_PROC_DATA:
+ rc = gss_svc_handle_data(req, gw);
+ break;
+ case PTLRPC_GSS_PROC_DESTROY:
+ rc = gss_svc_handle_destroy(req, gw);
+ break;
+ default:
+ CERROR("unknown proc %u\n", gw->gw_proc);
+ rc = SECSVC_DROP;
+ break;
+ }
+
+ switch (rc) {
+ case SECSVC_OK:
+ LASSERT (grctx->src_ctx);
+
+ req->rq_auth_gss = 1;
+ req->rq_auth_remote = grctx->src_ctx->gsc_remote;
+ req->rq_auth_usr_mdt = grctx->src_ctx->gsc_usr_mds;
+ req->rq_auth_usr_ost = grctx->src_ctx->gsc_usr_oss;
+ req->rq_auth_usr_root = grctx->src_ctx->gsc_usr_root;
+ req->rq_auth_uid = grctx->src_ctx->gsc_uid;
+ req->rq_auth_mapped_uid = grctx->src_ctx->gsc_mapped_uid;
+ break;
+ case SECSVC_COMPLETE:
+ break;
+ case SECSVC_DROP:
+ gss_svc_reqctx_free(grctx);
+ req->rq_svc_ctx = NULL;
+ break;
+ }
+
+ RETURN(rc);
+}
+
+void gss_svc_invalidate_ctx(struct ptlrpc_svc_ctx *svc_ctx)
+{
+ struct gss_svc_reqctx *grctx;
+ ENTRY;
+
+ if (svc_ctx == NULL) {
+ EXIT;
+ return;
+ }
+
+ grctx = gss_svc_ctx2reqctx(svc_ctx);
+
+ CWARN("gss svc invalidate ctx %p(%u)\n",
+ grctx->src_ctx, grctx->src_ctx->gsc_uid);
+ gss_svc_upcall_destroy_ctx(grctx->src_ctx);
+
+ EXIT;
+}
+
+static inline
+int gss_svc_payload(struct gss_svc_reqctx *grctx, int early,
+ int msgsize, int privacy)
+{
+ /* we should treat early reply normally, but which is actually sharing
+ * the same ctx with original request, so in this case we should
+ * ignore the special ctx's special flags */
+ if (early == 0 && gss_svc_reqctx_is_special(grctx))
+ return grctx->src_reserve_len;
+
+ return gss_mech_payload(NULL, msgsize, privacy);
+}
+
+static int gss_svc_bulk_payload(struct gss_svc_ctx *gctx,
+ struct sptlrpc_flavor *flvr,
+ int read)
+{
+ int payload = sizeof(struct ptlrpc_bulk_sec_desc);
+
+ if (read) {
+ switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
+ case SPTLRPC_BULK_SVC_NULL:
+ break;
+ case SPTLRPC_BULK_SVC_INTG:
+ payload += gss_mech_payload(NULL, 0, 0);
+ break;
+ case SPTLRPC_BULK_SVC_PRIV:
+ payload += gss_mech_payload(NULL, 0, 1);
+ break;
+ case SPTLRPC_BULK_SVC_AUTH:
+ default:
+ LBUG();
+ }
+ }
+
+ return payload;
+}
+
+int gss_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
+{
+ struct gss_svc_reqctx *grctx;
+ struct ptlrpc_reply_state *rs;
+ int early, privacy, svc, bsd_off = 0;
+ __u32 ibuflens[2], buflens[4];
+ int ibufcnt = 0, bufcnt;
+ int txtsize, wmsg_size, rs_size;
+ ENTRY;
+
+ LASSERT(msglen % 8 == 0);
+
+ if (req->rq_pack_bulk && !req->rq_bulk_read && !req->rq_bulk_write) {
+ CERROR("client request bulk sec on non-bulk rpc\n");
+ RETURN(-EPROTO);
+ }
+
+ svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+ early = (req->rq_packed_final == 0);
+
+ grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
+ if (!early && gss_svc_reqctx_is_special(grctx))
+ privacy = 0;
+ else
+ privacy = (svc == SPTLRPC_SVC_PRIV);
+
+ if (privacy) {
+ /* inner clear buffers */
+ ibufcnt = 1;
+ ibuflens[0] = msglen;
+
+ if (req->rq_pack_bulk) {
+ LASSERT(grctx->src_reqbsd);
+
+ bsd_off = ibufcnt;
+ ibuflens[ibufcnt++] = gss_svc_bulk_payload(
+ grctx->src_ctx,
+ &req->rq_flvr,
+ req->rq_bulk_read);
+ }
+
+ txtsize = lustre_msg_size_v2(ibufcnt, ibuflens);
+ txtsize += GSS_MAX_CIPHER_BLOCK;
+
+ /* wrapper buffer */
+ bufcnt = 2;
+ buflens[0] = PTLRPC_GSS_HEADER_SIZE;
+ buflens[1] = gss_svc_payload(grctx, early, txtsize, 1);
+ } else {
+ bufcnt = 2;
+ buflens[0] = PTLRPC_GSS_HEADER_SIZE;
+ buflens[1] = msglen;
+
+ txtsize = buflens[0];
+ if (svc == SPTLRPC_SVC_INTG)
+ txtsize += buflens[1];
+
+ if (req->rq_pack_bulk) {
+ LASSERT(grctx->src_reqbsd);
+
+ bsd_off = bufcnt;
+ buflens[bufcnt] = gss_svc_bulk_payload(
+ grctx->src_ctx,
+ &req->rq_flvr,
+ req->rq_bulk_read);
+ if (svc == SPTLRPC_SVC_INTG)
+ txtsize += buflens[bufcnt];
+ bufcnt++;
+ }
+
+ if ((!early && gss_svc_reqctx_is_special(grctx)) ||
+ svc != SPTLRPC_SVC_NULL)
+ buflens[bufcnt++] = gss_svc_payload(grctx, early,
+ txtsize, 0);
+ }
+
+ wmsg_size = lustre_msg_size_v2(bufcnt, buflens);
+
+ rs_size = sizeof(*rs) + wmsg_size;
+ rs = req->rq_reply_state;
+
+ if (rs) {
+ /* pre-allocated */
+ LASSERT(rs->rs_size >= rs_size);
+ } else {
+ OBD_ALLOC_LARGE(rs, rs_size);
+ if (rs == NULL)
+ RETURN(-ENOMEM);
+
+ rs->rs_size = rs_size;
+ }
+
+ rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+ rs->rs_repbuf_len = wmsg_size;
+
+ /* initialize the buffer */
+ if (privacy) {
+ lustre_init_msg_v2(rs->rs_repbuf, ibufcnt, ibuflens, NULL);
+ rs->rs_msg = lustre_msg_buf(rs->rs_repbuf, 0, msglen);
+ } else {
+ lustre_init_msg_v2(rs->rs_repbuf, bufcnt, buflens, NULL);
+ rs->rs_repbuf->lm_secflvr = req->rq_flvr.sf_rpc;
+
+ rs->rs_msg = lustre_msg_buf(rs->rs_repbuf, 1, 0);
+ }
+
+ if (bsd_off) {
+ grctx->src_repbsd = lustre_msg_buf(rs->rs_repbuf, bsd_off, 0);
+ grctx->src_repbsd_size = lustre_msg_buflen(rs->rs_repbuf,
+ bsd_off);
+ }
+
+ gss_svc_reqctx_addref(grctx);
+ rs->rs_svc_ctx = req->rq_svc_ctx;
+
+ LASSERT(rs->rs_msg);
+ req->rq_reply_state = rs;
+ RETURN(0);
+}
+
+static int gss_svc_seal(struct ptlrpc_request *req,
+ struct ptlrpc_reply_state *rs,
+ struct gss_svc_reqctx *grctx)
+{
+ struct gss_svc_ctx *gctx = grctx->src_ctx;
+ rawobj_t hdrobj, msgobj, token;
+ struct gss_header *ghdr;
+ __u8 *token_buf;
+ int token_buflen;
+ __u32 buflens[2], major;
+ int msglen, rc;
+ ENTRY;
+
+ /* get clear data length. note embedded lustre_msg might
+ * have been shrinked */
+ if (req->rq_replen != lustre_msg_buflen(rs->rs_repbuf, 0))
+ msglen = lustre_shrink_msg(rs->rs_repbuf, 0, req->rq_replen, 1);
+ else
+ msglen = lustre_msg_size_v2(rs->rs_repbuf->lm_bufcount,
+ rs->rs_repbuf->lm_buflens);
+
+ /* temporarily use tail of buffer to hold gss header data */
+ LASSERT(msglen + PTLRPC_GSS_HEADER_SIZE <= rs->rs_repbuf_len);
+ ghdr = (struct gss_header *) ((char *) rs->rs_repbuf +
+ rs->rs_repbuf_len - PTLRPC_GSS_HEADER_SIZE);
+ ghdr->gh_version = PTLRPC_GSS_VERSION;
+ ghdr->gh_sp = LUSTRE_SP_ANY;
+ ghdr->gh_flags = 0;
+ ghdr->gh_proc = PTLRPC_GSS_PROC_DATA;
+ ghdr->gh_seq = grctx->src_wirectx.gw_seq;
+ ghdr->gh_svc = SPTLRPC_SVC_PRIV;
+ ghdr->gh_handle.len = 0;
+ if (req->rq_pack_bulk)
+ ghdr->gh_flags |= LUSTRE_GSS_PACK_BULK;
+
+ /* allocate temporary cipher buffer */
+ token_buflen = gss_mech_payload(gctx->gsc_mechctx, msglen, 1);
+ OBD_ALLOC_LARGE(token_buf, token_buflen);
+ if (token_buf == NULL)
+ RETURN(-ENOMEM);
+
+ hdrobj.len = PTLRPC_GSS_HEADER_SIZE;
+ hdrobj.data = (__u8 *) ghdr;
+ msgobj.len = msglen;
+ msgobj.data = (__u8 *) rs->rs_repbuf;
+ token.len = token_buflen;
+ token.data = token_buf;
+
+ major = lgss_wrap(gctx->gsc_mechctx, &hdrobj, &msgobj,
+ rs->rs_repbuf_len - PTLRPC_GSS_HEADER_SIZE, &token);
+ if (major != GSS_S_COMPLETE) {
+ CERROR("wrap message error: %08x\n", major);
+ GOTO(out_free, rc = -EPERM);
+ }
+ LASSERT(token.len <= token_buflen);
+
+ /* we are about to override data at rs->rs_repbuf, nullify pointers
+ * to which to catch further illegal usage. */
+ if (req->rq_pack_bulk) {
+ grctx->src_repbsd = NULL;
+ grctx->src_repbsd_size = 0;
+ }
+
+ /* now fill the actual wire data
+ * - gss header
+ * - gss token
+ */
+ buflens[0] = PTLRPC_GSS_HEADER_SIZE;
+ buflens[1] = token.len;
+
+ rs->rs_repdata_len = lustre_msg_size_v2(2, buflens);
+ LASSERT(rs->rs_repdata_len <= rs->rs_repbuf_len);
+
+ lustre_init_msg_v2(rs->rs_repbuf, 2, buflens, NULL);
+ rs->rs_repbuf->lm_secflvr = req->rq_flvr.sf_rpc;
+
+ memcpy(lustre_msg_buf(rs->rs_repbuf, 0, 0), ghdr,
+ PTLRPC_GSS_HEADER_SIZE);
+ memcpy(lustre_msg_buf(rs->rs_repbuf, 1, 0), token.data, token.len);
+
+ /* reply offset */
+ if (req->rq_packed_final &&
+ (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT))
+ req->rq_reply_off = gss_at_reply_off_priv;
+ else
+ req->rq_reply_off = 0;
+
+ /* to catch upper layer's further access */
+ rs->rs_msg = NULL;
+ req->rq_repmsg = NULL;
+ req->rq_replen = 0;
+
+ rc = 0;
+out_free:
+ OBD_FREE_LARGE(token_buf, token_buflen);
+ RETURN(rc);
+}
+
+int gss_svc_authorize(struct ptlrpc_request *req)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
+ struct gss_wire_ctx *gw = &grctx->src_wirectx;
+ int early, rc;
+ ENTRY;
+
+ early = (req->rq_packed_final == 0);
+
+ if (!early && gss_svc_reqctx_is_special(grctx)) {
+ LASSERT(rs->rs_repdata_len != 0);
+
+ req->rq_reply_off = gss_at_reply_off_integ;
+ RETURN(0);
+ }
+
+ /* early reply could happen in many cases */
+ if (!early &&
+ gw->gw_proc != PTLRPC_GSS_PROC_DATA &&
+ gw->gw_proc != PTLRPC_GSS_PROC_DESTROY) {
+ CERROR("proc %d not support\n", gw->gw_proc);
+ RETURN(-EINVAL);
+ }
+
+ LASSERT(grctx->src_ctx);
+
+ switch (gw->gw_svc) {
+ case SPTLRPC_SVC_NULL:
+ case SPTLRPC_SVC_AUTH:
+ case SPTLRPC_SVC_INTG:
+ rc = gss_svc_sign(req, rs, grctx, gw->gw_svc);
+ break;
+ case SPTLRPC_SVC_PRIV:
+ rc = gss_svc_seal(req, rs, grctx);
+ break;
+ default:
+ CERROR("Unknown service %d\n", gw->gw_svc);
+ GOTO(out, rc = -EINVAL);
+ }
+ rc = 0;
+
+out:
+ RETURN(rc);
+}
+
+void gss_svc_free_rs(struct ptlrpc_reply_state *rs)
+{
+ struct gss_svc_reqctx *grctx;
+
+ LASSERT(rs->rs_svc_ctx);
+ grctx = container_of(rs->rs_svc_ctx, struct gss_svc_reqctx, src_base);
+
+ gss_svc_reqctx_decref(grctx);
+ rs->rs_svc_ctx = NULL;
+
+ if (!rs->rs_prealloc)
+ OBD_FREE_LARGE(rs, rs->rs_size);
+}
+
+void gss_svc_free_ctx(struct ptlrpc_svc_ctx *ctx)
+{
+ LASSERT(atomic_read(&ctx->sc_refcount) == 0);
+ gss_svc_reqctx_free(gss_svc_ctx2reqctx(ctx));
+}
+
+int gss_copy_rvc_cli_ctx(struct ptlrpc_cli_ctx *cli_ctx,
+ struct ptlrpc_svc_ctx *svc_ctx)
+{
+ struct gss_cli_ctx *cli_gctx = ctx2gctx(cli_ctx);
+ struct gss_svc_ctx *svc_gctx = gss_svc_ctx2gssctx(svc_ctx);
+ struct gss_ctx *mechctx = NULL;
+
+ LASSERT(cli_gctx);
+ LASSERT(svc_gctx && svc_gctx->gsc_mechctx);
+
+ cli_gctx->gc_proc = PTLRPC_GSS_PROC_DATA;
+ cli_gctx->gc_win = GSS_SEQ_WIN;
+
+ /* The problem is the reverse ctx might get lost in some recovery
+ * situations, and the same svc_ctx will be used to re-create it.
+ * if there's callback be sentout before that, new reverse ctx start
+ * with sequence 0 will lead to future callback rpc be treated as
+ * replay.
+ *
+ * each reverse root ctx will record its latest sequence number on its
+ * buddy svcctx before be destroied, so here we continue use it.
+ */
+ atomic_set(&cli_gctx->gc_seq, svc_gctx->gsc_rvs_seq);
+
+ if (gss_svc_upcall_dup_handle(&cli_gctx->gc_svc_handle, svc_gctx)) {
+ CERROR("failed to dup svc handle\n");
+ goto err_out;
+ }
+
+ if (lgss_copy_reverse_context(svc_gctx->gsc_mechctx, &mechctx) !=
+ GSS_S_COMPLETE) {
+ CERROR("failed to copy mech context\n");
+ goto err_svc_handle;
+ }
+
+ if (rawobj_dup(&cli_gctx->gc_handle, &svc_gctx->gsc_rvs_hdl)) {
+ CERROR("failed to dup reverse handle\n");
+ goto err_ctx;
+ }
+
+ cli_gctx->gc_mechctx = mechctx;
+ gss_cli_ctx_uptodate(cli_gctx);
+
+ return 0;
+
+err_ctx:
+ lgss_delete_sec_context(&mechctx);
+err_svc_handle:
+ rawobj_free(&cli_gctx->gc_svc_handle);
+err_out:
+ return -ENOMEM;
+}
+
+static void gss_init_at_reply_offset(void)
+{
+ __u32 buflens[3];
+ int clearsize;
+
+ buflens[0] = PTLRPC_GSS_HEADER_SIZE;
+ buflens[1] = lustre_msg_early_size();
+ buflens[2] = gss_cli_payload(NULL, buflens[1], 0);
+ gss_at_reply_off_integ = lustre_msg_size_v2(3, buflens);
+
+ buflens[0] = lustre_msg_early_size();
+ clearsize = lustre_msg_size_v2(1, buflens);
+ buflens[0] = PTLRPC_GSS_HEADER_SIZE;
+ buflens[1] = gss_cli_payload(NULL, clearsize, 0);
+ buflens[2] = gss_cli_payload(NULL, clearsize, 1);
+ gss_at_reply_off_priv = lustre_msg_size_v2(3, buflens);
+}
+
+int __init sptlrpc_gss_init(void)
+{
+ int rc;
+
+ rc = gss_init_lproc();
+ if (rc)
+ return rc;
+
+ rc = gss_init_cli_upcall();
+ if (rc)
+ goto out_lproc;
+
+ rc = gss_init_svc_upcall();
+ if (rc)
+ goto out_cli_upcall;
+
+ rc = init_kerberos_module();
+ if (rc)
+ goto out_svc_upcall;
+
+ /* register policy after all other stuff be intialized, because it
+ * might be in used immediately after the registration. */
+
+ rc = gss_init_keyring();
+ if (rc)
+ goto out_kerberos;
+
+#ifdef HAVE_GSS_PIPEFS
+ rc = gss_init_pipefs();
+ if (rc)
+ goto out_keyring;
+#endif
+
+ gss_init_at_reply_offset();
+
+ return 0;
+
+#ifdef HAVE_GSS_PIPEFS
+out_keyring:
+ gss_exit_keyring();
+#endif
+
+out_kerberos:
+ cleanup_kerberos_module();
+out_svc_upcall:
+ gss_exit_svc_upcall();
+out_cli_upcall:
+ gss_exit_cli_upcall();
+out_lproc:
+ gss_exit_lproc();
+ return rc;
+}
+
+static void __exit sptlrpc_gss_exit(void)
+{
+ gss_exit_keyring();
+#ifdef HAVE_GSS_PIPEFS
+ gss_exit_pipefs();
+#endif
+ cleanup_kerberos_module();
+ gss_exit_svc_upcall();
+ gss_exit_cli_upcall();
+ gss_exit_lproc();
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("GSS security policy for Lustre");
+MODULE_LICENSE("GPL");
+
+module_init(sptlrpc_gss_init);
+module_exit(sptlrpc_gss_exit);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c
new file mode 100644
index 000000000000..47a3c0512739
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/import.c
@@ -0,0 +1,1613 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/import.c
+ *
+ * Author: Mike Shaver <shaver@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include <obd_support.h>
+#include <lustre_ha.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_export.h>
+#include <obd.h>
+#include <obd_cksum.h>
+#include <obd_class.h>
+
+#include "ptlrpc_internal.h"
+
+struct ptlrpc_connect_async_args {
+ __u64 pcaa_peer_committed;
+ int pcaa_initial_connect;
+};
+
+/**
+ * Updates import \a imp current state to provided \a state value
+ * Helper function. Must be called under imp_lock.
+ */
+static void __import_set_state(struct obd_import *imp,
+ enum lustre_imp_state state)
+{
+ imp->imp_state = state;
+ imp->imp_state_hist[imp->imp_state_hist_idx].ish_state = state;
+ imp->imp_state_hist[imp->imp_state_hist_idx].ish_time =
+ cfs_time_current_sec();
+ imp->imp_state_hist_idx = (imp->imp_state_hist_idx + 1) %
+ IMP_STATE_HIST_LEN;
+}
+
+/* A CLOSED import should remain so. */
+#define IMPORT_SET_STATE_NOLOCK(imp, state) \
+do { \
+ if (imp->imp_state != LUSTRE_IMP_CLOSED) { \
+ CDEBUG(D_HA, "%p %s: changing import state from %s to %s\n", \
+ imp, obd2cli_tgt(imp->imp_obd), \
+ ptlrpc_import_state_name(imp->imp_state), \
+ ptlrpc_import_state_name(state)); \
+ __import_set_state(imp, state); \
+ } \
+} while(0)
+
+#define IMPORT_SET_STATE(imp, state) \
+do { \
+ spin_lock(&imp->imp_lock); \
+ IMPORT_SET_STATE_NOLOCK(imp, state); \
+ spin_unlock(&imp->imp_lock); \
+} while(0)
+
+
+static int ptlrpc_connect_interpret(const struct lu_env *env,
+ struct ptlrpc_request *request,
+ void * data, int rc);
+int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
+
+/* Only this function is allowed to change the import state when it is
+ * CLOSED. I would rather refcount the import and free it after
+ * disconnection like we do with exports. To do that, the client_obd
+ * will need to save the peer info somewhere other than in the import,
+ * though. */
+int ptlrpc_init_import(struct obd_import *imp)
+{
+ spin_lock(&imp->imp_lock);
+
+ imp->imp_generation++;
+ imp->imp_state = LUSTRE_IMP_NEW;
+
+ spin_unlock(&imp->imp_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_init_import);
+
+#define UUID_STR "_UUID"
+void deuuidify(char *uuid, const char *prefix, char **uuid_start, int *uuid_len)
+{
+ *uuid_start = !prefix || strncmp(uuid, prefix, strlen(prefix))
+ ? uuid : uuid + strlen(prefix);
+
+ *uuid_len = strlen(*uuid_start);
+
+ if (*uuid_len < strlen(UUID_STR))
+ return;
+
+ if (!strncmp(*uuid_start + *uuid_len - strlen(UUID_STR),
+ UUID_STR, strlen(UUID_STR)))
+ *uuid_len -= strlen(UUID_STR);
+}
+EXPORT_SYMBOL(deuuidify);
+
+/**
+ * Returns true if import was FULL, false if import was already not
+ * connected.
+ * @imp - import to be disconnected
+ * @conn_cnt - connection count (epoch) of the request that timed out
+ * and caused the disconnection. In some cases, multiple
+ * inflight requests can fail to a single target (e.g. OST
+ * bulk requests) and if one has already caused a reconnection
+ * (increasing the import->conn_cnt) the older failure should
+ * not also cause a reconnection. If zero it forces a reconnect.
+ */
+int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt)
+{
+ int rc = 0;
+
+ spin_lock(&imp->imp_lock);
+
+ if (imp->imp_state == LUSTRE_IMP_FULL &&
+ (conn_cnt == 0 || conn_cnt == imp->imp_conn_cnt)) {
+ char *target_start;
+ int target_len;
+
+ deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+ &target_start, &target_len);
+
+ if (imp->imp_replayable) {
+ LCONSOLE_WARN("%s: Connection to %.*s (at %s) was "
+ "lost; in progress operations using this "
+ "service will wait for recovery to complete\n",
+ imp->imp_obd->obd_name, target_len, target_start,
+ libcfs_nid2str(imp->imp_connection->c_peer.nid));
+ } else {
+ LCONSOLE_ERROR_MSG(0x166, "%s: Connection to "
+ "%.*s (at %s) was lost; in progress "
+ "operations using this service will fail\n",
+ imp->imp_obd->obd_name,
+ target_len, target_start,
+ libcfs_nid2str(imp->imp_connection->c_peer.nid));
+ }
+ ptlrpc_deactivate_timeouts(imp);
+ IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
+ spin_unlock(&imp->imp_lock);
+
+ if (obd_dump_on_timeout)
+ libcfs_debug_dumplog();
+
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
+ rc = 1;
+ } else {
+ spin_unlock(&imp->imp_lock);
+ CDEBUG(D_HA, "%s: import %p already %s (conn %u, was %u): %s\n",
+ imp->imp_client->cli_name, imp,
+ (imp->imp_state == LUSTRE_IMP_FULL &&
+ imp->imp_conn_cnt > conn_cnt) ?
+ "reconnected" : "not connected", imp->imp_conn_cnt,
+ conn_cnt, ptlrpc_import_state_name(imp->imp_state));
+ }
+
+ return rc;
+}
+
+/* Must be called with imp_lock held! */
+static void ptlrpc_deactivate_and_unlock_import(struct obd_import *imp)
+{
+ ENTRY;
+ LASSERT(spin_is_locked(&imp->imp_lock));
+
+ CDEBUG(D_HA, "setting import %s INVALID\n", obd2cli_tgt(imp->imp_obd));
+ imp->imp_invalid = 1;
+ imp->imp_generation++;
+ spin_unlock(&imp->imp_lock);
+
+ ptlrpc_abort_inflight(imp);
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_INACTIVE);
+
+ EXIT;
+}
+
+/*
+ * This acts as a barrier; all existing requests are rejected, and
+ * no new requests will be accepted until the import is valid again.
+ */
+void ptlrpc_deactivate_import(struct obd_import *imp)
+{
+ spin_lock(&imp->imp_lock);
+ ptlrpc_deactivate_and_unlock_import(imp);
+}
+EXPORT_SYMBOL(ptlrpc_deactivate_import);
+
+static unsigned int
+ptlrpc_inflight_deadline(struct ptlrpc_request *req, time_t now)
+{
+ long dl;
+
+ if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) ||
+ (req->rq_phase == RQ_PHASE_BULK) ||
+ (req->rq_phase == RQ_PHASE_NEW)))
+ return 0;
+
+ if (req->rq_timedout)
+ return 0;
+
+ if (req->rq_phase == RQ_PHASE_NEW)
+ dl = req->rq_sent;
+ else
+ dl = req->rq_deadline;
+
+ if (dl <= now)
+ return 0;
+
+ return dl - now;
+}
+
+static unsigned int ptlrpc_inflight_timeout(struct obd_import *imp)
+{
+ time_t now = cfs_time_current_sec();
+ struct list_head *tmp, *n;
+ struct ptlrpc_request *req;
+ unsigned int timeout = 0;
+
+ spin_lock(&imp->imp_lock);
+ list_for_each_safe(tmp, n, &imp->imp_sending_list) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ timeout = max(ptlrpc_inflight_deadline(req, now), timeout);
+ }
+ spin_unlock(&imp->imp_lock);
+ return timeout;
+}
+
+/**
+ * This function will invalidate the import, if necessary, then block
+ * for all the RPC completions, and finally notify the obd to
+ * invalidate its state (ie cancel locks, clear pending requests,
+ * etc).
+ */
+void ptlrpc_invalidate_import(struct obd_import *imp)
+{
+ struct list_head *tmp, *n;
+ struct ptlrpc_request *req;
+ struct l_wait_info lwi;
+ unsigned int timeout;
+ int rc;
+
+ atomic_inc(&imp->imp_inval_count);
+
+ if (!imp->imp_invalid || imp->imp_obd->obd_no_recov)
+ ptlrpc_deactivate_import(imp);
+
+ LASSERT(imp->imp_invalid);
+
+ /* Wait forever until inflight == 0. We really can't do it another
+ * way because in some cases we need to wait for very long reply
+ * unlink. We can't do anything before that because there is really
+ * no guarantee that some rdma transfer is not in progress right now. */
+ do {
+ /* Calculate max timeout for waiting on rpcs to error
+ * out. Use obd_timeout if calculated value is smaller
+ * than it. */
+ if (!OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
+ timeout = ptlrpc_inflight_timeout(imp);
+ timeout += timeout / 3;
+
+ if (timeout == 0)
+ timeout = obd_timeout;
+ } else {
+ /* decrease the interval to increase race condition */
+ timeout = 1;
+ }
+
+ CDEBUG(D_RPCTRACE,"Sleeping %d sec for inflight to error out\n",
+ timeout);
+
+ /* Wait for all requests to error out and call completion
+ * callbacks. Cap it at obd_timeout -- these should all
+ * have been locally cancelled by ptlrpc_abort_inflight. */
+ lwi = LWI_TIMEOUT_INTERVAL(
+ cfs_timeout_cap(cfs_time_seconds(timeout)),
+ (timeout > 1)?cfs_time_seconds(1):cfs_time_seconds(1)/2,
+ NULL, NULL);
+ rc = l_wait_event(imp->imp_recovery_waitq,
+ (atomic_read(&imp->imp_inflight) == 0),
+ &lwi);
+ if (rc) {
+ const char *cli_tgt = obd2cli_tgt(imp->imp_obd);
+
+ CERROR("%s: rc = %d waiting for callback (%d != 0)\n",
+ cli_tgt, rc,
+ atomic_read(&imp->imp_inflight));
+
+ spin_lock(&imp->imp_lock);
+ if (atomic_read(&imp->imp_inflight) == 0) {
+ int count = atomic_read(&imp->imp_unregistering);
+
+ /* We know that "unregistering" rpcs only can
+ * survive in sending or delaying lists (they
+ * maybe waiting for long reply unlink in
+ * sluggish nets). Let's check this. If there
+ * is no inflight and unregistering != 0, this
+ * is bug. */
+ LASSERTF(count == 0, "Some RPCs are still "
+ "unregistering: %d\n", count);
+
+ /* Let's save one loop as soon as inflight have
+ * dropped to zero. No new inflights possible at
+ * this point. */
+ rc = 0;
+ } else {
+ list_for_each_safe(tmp, n,
+ &imp->imp_sending_list) {
+ req = list_entry(tmp,
+ struct ptlrpc_request,
+ rq_list);
+ DEBUG_REQ(D_ERROR, req,
+ "still on sending list");
+ }
+ list_for_each_safe(tmp, n,
+ &imp->imp_delayed_list) {
+ req = list_entry(tmp,
+ struct ptlrpc_request,
+ rq_list);
+ DEBUG_REQ(D_ERROR, req,
+ "still on delayed list");
+ }
+
+ CERROR("%s: RPCs in \"%s\" phase found (%d). "
+ "Network is sluggish? Waiting them "
+ "to error out.\n", cli_tgt,
+ ptlrpc_phase2str(RQ_PHASE_UNREGISTERING),
+ atomic_read(&imp->
+ imp_unregistering));
+ }
+ spin_unlock(&imp->imp_lock);
+ }
+ } while (rc != 0);
+
+ /*
+ * Let's additionally check that no new rpcs added to import in
+ * "invalidate" state.
+ */
+ LASSERT(atomic_read(&imp->imp_inflight) == 0);
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
+ sptlrpc_import_flush_all_ctx(imp);
+
+ atomic_dec(&imp->imp_inval_count);
+ wake_up_all(&imp->imp_recovery_waitq);
+}
+EXPORT_SYMBOL(ptlrpc_invalidate_import);
+
+/* unset imp_invalid */
+void ptlrpc_activate_import(struct obd_import *imp)
+{
+ struct obd_device *obd = imp->imp_obd;
+
+ spin_lock(&imp->imp_lock);
+ imp->imp_invalid = 0;
+ ptlrpc_activate_timeouts(imp);
+ spin_unlock(&imp->imp_lock);
+ obd_import_event(obd, imp, IMP_EVENT_ACTIVE);
+}
+EXPORT_SYMBOL(ptlrpc_activate_import);
+
+void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
+{
+ ENTRY;
+
+ LASSERT(!imp->imp_dlm_fake);
+
+ if (ptlrpc_set_import_discon(imp, conn_cnt)) {
+ if (!imp->imp_replayable) {
+ CDEBUG(D_HA, "import %s@%s for %s not replayable, "
+ "auto-deactivating\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid,
+ imp->imp_obd->obd_name);
+ ptlrpc_deactivate_import(imp);
+ }
+
+ CDEBUG(D_HA, "%s: waking up pinger\n",
+ obd2cli_tgt(imp->imp_obd));
+
+ spin_lock(&imp->imp_lock);
+ imp->imp_force_verify = 1;
+ spin_unlock(&imp->imp_lock);
+
+ ptlrpc_pinger_wake_up();
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(ptlrpc_fail_import);
+
+int ptlrpc_reconnect_import(struct obd_import *imp)
+{
+ ptlrpc_set_import_discon(imp, 0);
+ /* Force a new connect attempt */
+ ptlrpc_invalidate_import(imp);
+ /* Do a fresh connect next time by zeroing the handle */
+ ptlrpc_disconnect_import(imp, 1);
+ /* Wait for all invalidate calls to finish */
+ if (atomic_read(&imp->imp_inval_count) > 0) {
+ int rc;
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ rc = l_wait_event(imp->imp_recovery_waitq,
+ (atomic_read(&imp->imp_inval_count) == 0),
+ &lwi);
+ if (rc)
+ CERROR("Interrupted, inval=%d\n",
+ atomic_read(&imp->imp_inval_count));
+ }
+
+ /* Allow reconnect attempts */
+ imp->imp_obd->obd_no_recov = 0;
+ /* Remove 'invalid' flag */
+ ptlrpc_activate_import(imp);
+ /* Attempt a new connect */
+ ptlrpc_recover_import(imp, NULL, 0);
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_reconnect_import);
+
+/**
+ * Connection on import \a imp is changed to another one (if more than one is
+ * present). We typically chose connection that we have not tried to connect to
+ * the longest
+ */
+static int import_select_connection(struct obd_import *imp)
+{
+ struct obd_import_conn *imp_conn = NULL, *conn;
+ struct obd_export *dlmexp;
+ char *target_start;
+ int target_len, tried_all = 1;
+ ENTRY;
+
+ spin_lock(&imp->imp_lock);
+
+ if (list_empty(&imp->imp_conn_list)) {
+ CERROR("%s: no connections available\n",
+ imp->imp_obd->obd_name);
+ spin_unlock(&imp->imp_lock);
+ RETURN(-EINVAL);
+ }
+
+ list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
+ CDEBUG(D_HA, "%s: connect to NID %s last attempt "LPU64"\n",
+ imp->imp_obd->obd_name,
+ libcfs_nid2str(conn->oic_conn->c_peer.nid),
+ conn->oic_last_attempt);
+
+ /* If we have not tried this connection since
+ the last successful attempt, go with this one */
+ if ((conn->oic_last_attempt == 0) ||
+ cfs_time_beforeq_64(conn->oic_last_attempt,
+ imp->imp_last_success_conn)) {
+ imp_conn = conn;
+ tried_all = 0;
+ break;
+ }
+
+ /* If all of the connections have already been tried
+ since the last successful connection; just choose the
+ least recently used */
+ if (!imp_conn)
+ imp_conn = conn;
+ else if (cfs_time_before_64(conn->oic_last_attempt,
+ imp_conn->oic_last_attempt))
+ imp_conn = conn;
+ }
+
+ /* if not found, simply choose the current one */
+ if (!imp_conn || imp->imp_force_reconnect) {
+ LASSERT(imp->imp_conn_current);
+ imp_conn = imp->imp_conn_current;
+ tried_all = 0;
+ }
+ LASSERT(imp_conn->oic_conn);
+
+ /* If we've tried everything, and we're back to the beginning of the
+ list, increase our timeout and try again. It will be reset when
+ we do finally connect. (FIXME: really we should wait for all network
+ state associated with the last connection attempt to drain before
+ trying to reconnect on it.) */
+ if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) {
+ struct adaptive_timeout *at = &imp->imp_at.iat_net_latency;
+ if (at_get(at) < CONNECTION_SWITCH_MAX) {
+ at_measured(at, at_get(at) + CONNECTION_SWITCH_INC);
+ if (at_get(at) > CONNECTION_SWITCH_MAX)
+ at_reset(at, CONNECTION_SWITCH_MAX);
+ }
+ LASSERT(imp_conn->oic_last_attempt);
+ CDEBUG(D_HA, "%s: tried all connections, increasing latency "
+ "to %ds\n", imp->imp_obd->obd_name, at_get(at));
+ }
+
+ imp_conn->oic_last_attempt = cfs_time_current_64();
+
+ /* switch connection, don't mind if it's same as the current one */
+ if (imp->imp_connection)
+ ptlrpc_connection_put(imp->imp_connection);
+ imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
+
+ dlmexp = class_conn2export(&imp->imp_dlm_handle);
+ LASSERT(dlmexp != NULL);
+ if (dlmexp->exp_connection)
+ ptlrpc_connection_put(dlmexp->exp_connection);
+ dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
+ class_export_put(dlmexp);
+
+ if (imp->imp_conn_current != imp_conn) {
+ if (imp->imp_conn_current) {
+ deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+ &target_start, &target_len);
+
+ CDEBUG(D_HA, "%s: Connection changing to"
+ " %.*s (at %s)\n",
+ imp->imp_obd->obd_name,
+ target_len, target_start,
+ libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+ }
+
+ imp->imp_conn_current = imp_conn;
+ }
+
+ CDEBUG(D_HA, "%s: import %p using connection %s/%s\n",
+ imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid,
+ libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+
+ spin_unlock(&imp->imp_lock);
+
+ RETURN(0);
+}
+
+/*
+ * must be called under imp_lock
+ */
+static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno)
+{
+ struct ptlrpc_request *req;
+ struct list_head *tmp;
+
+ if (list_empty(&imp->imp_replay_list))
+ return 0;
+ tmp = imp->imp_replay_list.next;
+ req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+ *transno = req->rq_transno;
+ if (req->rq_transno == 0) {
+ DEBUG_REQ(D_ERROR, req, "zero transno in replay");
+ LBUG();
+ }
+
+ return 1;
+}
+
+/**
+ * Attempt to (re)connect import \a imp. This includes all preparations,
+ * initializing CONNECT RPC request and passing it to ptlrpcd for
+ * actual sending.
+ * Returns 0 on success or error code.
+ */
+int ptlrpc_connect_import(struct obd_import *imp)
+{
+ struct obd_device *obd = imp->imp_obd;
+ int initial_connect = 0;
+ int set_transno = 0;
+ __u64 committed_before_reconnect = 0;
+ struct ptlrpc_request *request;
+ char *bufs[] = { NULL,
+ obd2cli_tgt(imp->imp_obd),
+ obd->obd_uuid.uuid,
+ (char *)&imp->imp_dlm_handle,
+ (char *)&imp->imp_connect_data };
+ struct ptlrpc_connect_async_args *aa;
+ int rc;
+ ENTRY;
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state == LUSTRE_IMP_CLOSED) {
+ spin_unlock(&imp->imp_lock);
+ CERROR("can't connect to a closed import\n");
+ RETURN(-EINVAL);
+ } else if (imp->imp_state == LUSTRE_IMP_FULL) {
+ spin_unlock(&imp->imp_lock);
+ CERROR("already connected\n");
+ RETURN(0);
+ } else if (imp->imp_state == LUSTRE_IMP_CONNECTING) {
+ spin_unlock(&imp->imp_lock);
+ CERROR("already connecting\n");
+ RETURN(-EALREADY);
+ }
+
+ IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING);
+
+ imp->imp_conn_cnt++;
+ imp->imp_resend_replay = 0;
+
+ if (!lustre_handle_is_used(&imp->imp_remote_handle))
+ initial_connect = 1;
+ else
+ committed_before_reconnect = imp->imp_peer_committed_transno;
+
+ set_transno = ptlrpc_first_transno(imp,
+ &imp->imp_connect_data.ocd_transno);
+ spin_unlock(&imp->imp_lock);
+
+ rc = import_select_connection(imp);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = sptlrpc_import_sec_adapt(imp, NULL, 0);
+ if (rc)
+ GOTO(out, rc);
+
+ /* Reset connect flags to the originally requested flags, in case
+ * the server is updated on-the-fly we will get the new features. */
+ imp->imp_connect_data.ocd_connect_flags = imp->imp_connect_flags_orig;
+ /* Reset ocd_version each time so the server knows the exact versions */
+ imp->imp_connect_data.ocd_version = LUSTRE_VERSION_CODE;
+ imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
+ imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+
+ rc = obd_reconnect(NULL, imp->imp_obd->obd_self_export, obd,
+ &obd->obd_uuid, &imp->imp_connect_data, NULL);
+ if (rc)
+ GOTO(out, rc);
+
+ request = ptlrpc_request_alloc(imp, &RQF_MDS_CONNECT);
+ if (request == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ rc = ptlrpc_request_bufs_pack(request, LUSTRE_OBD_VERSION,
+ imp->imp_connect_op, bufs, NULL);
+ if (rc) {
+ ptlrpc_request_free(request);
+ GOTO(out, rc);
+ }
+
+ /* Report the rpc service time to the server so that it knows how long
+ * to wait for clients to join recovery */
+ lustre_msg_set_service_time(request->rq_reqmsg,
+ at_timeout2est(request->rq_timeout));
+
+ /* The amount of time we give the server to process the connect req.
+ * import_select_connection will increase the net latency on
+ * repeated reconnect attempts to cover slow networks.
+ * We override/ignore the server rpc completion estimate here,
+ * which may be large if this is a reconnect attempt */
+ request->rq_timeout = INITIAL_CONNECT_TIMEOUT;
+ lustre_msg_set_timeout(request->rq_reqmsg, request->rq_timeout);
+
+ lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_NEXT_VER);
+
+ request->rq_no_resend = request->rq_no_delay = 1;
+ request->rq_send_state = LUSTRE_IMP_CONNECTING;
+ /* Allow a slightly larger reply for future growth compatibility */
+ req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER,
+ sizeof(struct obd_connect_data)+16*sizeof(__u64));
+ ptlrpc_request_set_replen(request);
+ request->rq_interpret_reply = ptlrpc_connect_interpret;
+
+ CLASSERT(sizeof (*aa) <= sizeof (request->rq_async_args));
+ aa = ptlrpc_req_async_args(request);
+ memset(aa, 0, sizeof *aa);
+
+ aa->pcaa_peer_committed = committed_before_reconnect;
+ aa->pcaa_initial_connect = initial_connect;
+
+ if (aa->pcaa_initial_connect) {
+ spin_lock(&imp->imp_lock);
+ imp->imp_replayable = 1;
+ spin_unlock(&imp->imp_lock);
+ lustre_msg_add_op_flags(request->rq_reqmsg,
+ MSG_CONNECT_INITIAL);
+ }
+
+ if (set_transno)
+ lustre_msg_add_op_flags(request->rq_reqmsg,
+ MSG_CONNECT_TRANSNO);
+
+ DEBUG_REQ(D_RPCTRACE, request, "(re)connect request (timeout %d)",
+ request->rq_timeout);
+ ptlrpcd_add_req(request, PDL_POLICY_ROUND, -1);
+ rc = 0;
+out:
+ if (rc != 0) {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
+ }
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ptlrpc_connect_import);
+
+static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp)
+{
+ int force_verify;
+
+ spin_lock(&imp->imp_lock);
+ force_verify = imp->imp_force_verify != 0;
+ spin_unlock(&imp->imp_lock);
+
+ if (force_verify)
+ ptlrpc_pinger_wake_up();
+}
+
+static int ptlrpc_busy_reconnect(int rc)
+{
+ return (rc == -EBUSY) || (rc == -EAGAIN);
+}
+
+/**
+ * interpret_reply callback for connect RPCs.
+ * Looks into returned status of connect operation and decides
+ * what to do with the import - i.e enter recovery, promote it to
+ * full state for normal operations of disconnect it due to an error.
+ */
+static int ptlrpc_connect_interpret(const struct lu_env *env,
+ struct ptlrpc_request *request,
+ void *data, int rc)
+{
+ struct ptlrpc_connect_async_args *aa = data;
+ struct obd_import *imp = request->rq_import;
+ struct client_obd *cli = &imp->imp_obd->u.cli;
+ struct lustre_handle old_hdl;
+ __u64 old_connect_flags;
+ int msg_flags;
+ struct obd_connect_data *ocd;
+ struct obd_export *exp;
+ int ret;
+ ENTRY;
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state == LUSTRE_IMP_CLOSED) {
+ imp->imp_connect_tried = 1;
+ spin_unlock(&imp->imp_lock);
+ RETURN(0);
+ }
+
+ if (rc) {
+ /* if this reconnect to busy export - not need select new target
+ * for connecting*/
+ imp->imp_force_reconnect = ptlrpc_busy_reconnect(rc);
+ spin_unlock(&imp->imp_lock);
+ ptlrpc_maybe_ping_import_soon(imp);
+ GOTO(out, rc);
+ }
+ spin_unlock(&imp->imp_lock);
+
+ LASSERT(imp->imp_conn_current);
+
+ msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
+
+ ret = req_capsule_get_size(&request->rq_pill, &RMF_CONNECT_DATA,
+ RCL_SERVER);
+ /* server replied obd_connect_data is always bigger */
+ ocd = req_capsule_server_sized_get(&request->rq_pill,
+ &RMF_CONNECT_DATA, ret);
+
+ if (ocd == NULL) {
+ CERROR("%s: no connect data from server\n",
+ imp->imp_obd->obd_name);
+ rc = -EPROTO;
+ GOTO(out, rc);
+ }
+
+ spin_lock(&imp->imp_lock);
+
+ /* All imports are pingable */
+ imp->imp_pingable = 1;
+ imp->imp_force_reconnect = 0;
+ imp->imp_force_verify = 0;
+
+ imp->imp_connect_data = *ocd;
+
+ CDEBUG(D_HA, "%s: connect to target with instance %u\n",
+ imp->imp_obd->obd_name, ocd->ocd_instance);
+ exp = class_conn2export(&imp->imp_dlm_handle);
+
+ spin_unlock(&imp->imp_lock);
+
+ /* check that server granted subset of flags we asked for. */
+ if ((ocd->ocd_connect_flags & imp->imp_connect_flags_orig) !=
+ ocd->ocd_connect_flags) {
+ CERROR("%s: Server didn't granted asked subset of flags: "
+ "asked="LPX64" grranted="LPX64"\n",
+ imp->imp_obd->obd_name,imp->imp_connect_flags_orig,
+ ocd->ocd_connect_flags);
+ GOTO(out, rc = -EPROTO);
+ }
+
+ if (!exp) {
+ /* This could happen if export is cleaned during the
+ connect attempt */
+ CERROR("%s: missing export after connect\n",
+ imp->imp_obd->obd_name);
+ GOTO(out, rc = -ENODEV);
+ }
+ old_connect_flags = exp_connect_flags(exp);
+ exp->exp_connect_data = *ocd;
+ imp->imp_obd->obd_self_export->exp_connect_data = *ocd;
+ class_export_put(exp);
+
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_OCD);
+
+ if (aa->pcaa_initial_connect) {
+ spin_lock(&imp->imp_lock);
+ if (msg_flags & MSG_CONNECT_REPLAYABLE) {
+ imp->imp_replayable = 1;
+ spin_unlock(&imp->imp_lock);
+ CDEBUG(D_HA, "connected to replayable target: %s\n",
+ obd2cli_tgt(imp->imp_obd));
+ } else {
+ imp->imp_replayable = 0;
+ spin_unlock(&imp->imp_lock);
+ }
+
+ /* if applies, adjust the imp->imp_msg_magic here
+ * according to reply flags */
+
+ imp->imp_remote_handle =
+ *lustre_msg_get_handle(request->rq_repmsg);
+
+ /* Initial connects are allowed for clients with non-random
+ * uuids when servers are in recovery. Simply signal the
+ * servers replay is complete and wait in REPLAY_WAIT. */
+ if (msg_flags & MSG_CONNECT_RECOVERING) {
+ CDEBUG(D_HA, "connect to %s during recovery\n",
+ obd2cli_tgt(imp->imp_obd));
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS);
+ } else {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
+ ptlrpc_activate_import(imp);
+ }
+
+ GOTO(finish, rc = 0);
+ }
+
+ /* Determine what recovery state to move the import to. */
+ if (MSG_CONNECT_RECONNECT & msg_flags) {
+ memset(&old_hdl, 0, sizeof(old_hdl));
+ if (!memcmp(&old_hdl, lustre_msg_get_handle(request->rq_repmsg),
+ sizeof (old_hdl))) {
+ LCONSOLE_WARN("Reconnect to %s (at @%s) failed due "
+ "bad handle "LPX64"\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid,
+ imp->imp_dlm_handle.cookie);
+ GOTO(out, rc = -ENOTCONN);
+ }
+
+ if (memcmp(&imp->imp_remote_handle,
+ lustre_msg_get_handle(request->rq_repmsg),
+ sizeof(imp->imp_remote_handle))) {
+ int level = msg_flags & MSG_CONNECT_RECOVERING ?
+ D_HA : D_WARNING;
+
+ /* Bug 16611/14775: if server handle have changed,
+ * that means some sort of disconnection happened.
+ * If the server is not in recovery, that also means it
+ * already erased all of our state because of previous
+ * eviction. If it is in recovery - we are safe to
+ * participate since we can reestablish all of our state
+ * with server again */
+ if ((MSG_CONNECT_RECOVERING & msg_flags)) {
+ CDEBUG(level,"%s@%s changed server handle from "
+ LPX64" to "LPX64
+ " but is still in recovery\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid,
+ imp->imp_remote_handle.cookie,
+ lustre_msg_get_handle(
+ request->rq_repmsg)->cookie);
+ } else {
+ LCONSOLE_WARN("Evicted from %s (at %s) "
+ "after server handle changed from "
+ LPX64" to "LPX64"\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection-> \
+ c_remote_uuid.uuid,
+ imp->imp_remote_handle.cookie,
+ lustre_msg_get_handle(
+ request->rq_repmsg)->cookie);
+ }
+
+
+ imp->imp_remote_handle =
+ *lustre_msg_get_handle(request->rq_repmsg);
+
+ if (!(MSG_CONNECT_RECOVERING & msg_flags)) {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
+ GOTO(finish, rc = 0);
+ }
+
+ } else {
+ CDEBUG(D_HA, "reconnected to %s@%s after partition\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid);
+ }
+
+ if (imp->imp_invalid) {
+ CDEBUG(D_HA, "%s: reconnected but import is invalid; "
+ "marking evicted\n", imp->imp_obd->obd_name);
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
+ } else if (MSG_CONNECT_RECOVERING & msg_flags) {
+ CDEBUG(D_HA, "%s: reconnected to %s during replay\n",
+ imp->imp_obd->obd_name,
+ obd2cli_tgt(imp->imp_obd));
+
+ spin_lock(&imp->imp_lock);
+ imp->imp_resend_replay = 1;
+ spin_unlock(&imp->imp_lock);
+
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
+ } else {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+ }
+ } else if ((MSG_CONNECT_RECOVERING & msg_flags) && !imp->imp_invalid) {
+ LASSERT(imp->imp_replayable);
+ imp->imp_remote_handle =
+ *lustre_msg_get_handle(request->rq_repmsg);
+ imp->imp_last_replay_transno = 0;
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
+ } else {
+ DEBUG_REQ(D_HA, request, "%s: evicting (reconnect/recover flags"
+ " not set: %x)", imp->imp_obd->obd_name, msg_flags);
+ imp->imp_remote_handle =
+ *lustre_msg_get_handle(request->rq_repmsg);
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
+ }
+
+ /* Sanity checks for a reconnected import. */
+ if (!(imp->imp_replayable) != !(msg_flags & MSG_CONNECT_REPLAYABLE)) {
+ CERROR("imp_replayable flag does not match server "
+ "after reconnect. We should LBUG right here.\n");
+ }
+
+ if (lustre_msg_get_last_committed(request->rq_repmsg) > 0 &&
+ lustre_msg_get_last_committed(request->rq_repmsg) <
+ aa->pcaa_peer_committed) {
+ CERROR("%s went back in time (transno "LPD64
+ " was previously committed, server now claims "LPD64
+ ")! See https://bugzilla.lustre.org/show_bug.cgi?"
+ "id=9646\n",
+ obd2cli_tgt(imp->imp_obd), aa->pcaa_peer_committed,
+ lustre_msg_get_last_committed(request->rq_repmsg));
+ }
+
+finish:
+ rc = ptlrpc_import_recovery_state_machine(imp);
+ if (rc != 0) {
+ if (rc == -ENOTCONN) {
+ CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery;"
+ "invalidating and reconnecting\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid);
+ ptlrpc_connect_import(imp);
+ imp->imp_connect_tried = 1;
+ RETURN(0);
+ }
+ } else {
+
+ spin_lock(&imp->imp_lock);
+ list_del(&imp->imp_conn_current->oic_item);
+ list_add(&imp->imp_conn_current->oic_item,
+ &imp->imp_conn_list);
+ imp->imp_last_success_conn =
+ imp->imp_conn_current->oic_last_attempt;
+
+ spin_unlock(&imp->imp_lock);
+
+ if (!ocd->ocd_ibits_known &&
+ ocd->ocd_connect_flags & OBD_CONNECT_IBITS)
+ CERROR("Inodebits aware server returned zero compatible"
+ " bits?\n");
+
+ if ((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+ (ocd->ocd_version > LUSTRE_VERSION_CODE +
+ LUSTRE_VERSION_OFFSET_WARN ||
+ ocd->ocd_version < LUSTRE_VERSION_CODE -
+ LUSTRE_VERSION_OFFSET_WARN)) {
+ /* Sigh, some compilers do not like #ifdef in the middle
+ of macro arguments */
+ const char *older = "older. Consider upgrading server "
+ "or downgrading client";
+ const char *newer = "newer than client version. "
+ "Consider upgrading client";
+
+ LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) "
+ "is much %s (%s)\n",
+ obd2cli_tgt(imp->imp_obd),
+ OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
+ OBD_OCD_VERSION_MINOR(ocd->ocd_version),
+ OBD_OCD_VERSION_PATCH(ocd->ocd_version),
+ OBD_OCD_VERSION_FIX(ocd->ocd_version),
+ ocd->ocd_version > LUSTRE_VERSION_CODE ?
+ newer : older, LUSTRE_VERSION_STRING);
+ }
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
+ /* Check if server has LU-1252 fix applied to not always swab
+ * the IR MNE entries. Do this only once per connection. This
+ * fixup is version-limited, because we don't want to carry the
+ * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we
+ * need interop with unpatched 2.2 servers. For newer servers,
+ * the client will do MNE swabbing only as needed. LU-1644 */
+ if (unlikely((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+ !(ocd->ocd_connect_flags & OBD_CONNECT_MNE_SWAB) &&
+ OBD_OCD_VERSION_MAJOR(ocd->ocd_version) == 2 &&
+ OBD_OCD_VERSION_MINOR(ocd->ocd_version) == 2 &&
+ OBD_OCD_VERSION_PATCH(ocd->ocd_version) < 55 &&
+ strcmp(imp->imp_obd->obd_type->typ_name,
+ LUSTRE_MGC_NAME) == 0))
+ imp->imp_need_mne_swab = 1;
+ else /* clear if server was upgraded since last connect */
+ imp->imp_need_mne_swab = 0;
+#else
+#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
+#endif
+
+ if (ocd->ocd_connect_flags & OBD_CONNECT_CKSUM) {
+ /* We sent to the server ocd_cksum_types with bits set
+ * for algorithms we understand. The server masked off
+ * the checksum types it doesn't support */
+ if ((ocd->ocd_cksum_types &
+ cksum_types_supported_client()) == 0) {
+ LCONSOLE_WARN("The negotiation of the checksum "
+ "alogrithm to use with server %s "
+ "failed (%x/%x), disabling "
+ "checksums\n",
+ obd2cli_tgt(imp->imp_obd),
+ ocd->ocd_cksum_types,
+ cksum_types_supported_client());
+ cli->cl_checksum = 0;
+ cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
+ } else {
+ cli->cl_supp_cksum_types = ocd->ocd_cksum_types;
+ }
+ } else {
+ /* The server does not support OBD_CONNECT_CKSUM.
+ * Enforce ADLER for backward compatibility*/
+ cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
+ }
+ cli->cl_cksum_type =cksum_type_select(cli->cl_supp_cksum_types);
+
+ if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
+ cli->cl_max_pages_per_rpc =
+ min(ocd->ocd_brw_size >> PAGE_CACHE_SHIFT,
+ cli->cl_max_pages_per_rpc);
+ else if (imp->imp_connect_op == MDS_CONNECT ||
+ imp->imp_connect_op == MGS_CONNECT)
+ cli->cl_max_pages_per_rpc = 1;
+
+ /* Reset ns_connect_flags only for initial connect. It might be
+ * changed in while using FS and if we reset it in reconnect
+ * this leads to losing user settings done before such as
+ * disable lru_resize, etc. */
+ if (old_connect_flags != exp_connect_flags(exp) ||
+ aa->pcaa_initial_connect) {
+ CDEBUG(D_HA, "%s: Resetting ns_connect_flags to server "
+ "flags: "LPX64"\n", imp->imp_obd->obd_name,
+ ocd->ocd_connect_flags);
+ imp->imp_obd->obd_namespace->ns_connect_flags =
+ ocd->ocd_connect_flags;
+ imp->imp_obd->obd_namespace->ns_orig_connect_flags =
+ ocd->ocd_connect_flags;
+ }
+
+ if ((ocd->ocd_connect_flags & OBD_CONNECT_AT) &&
+ (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
+ /* We need a per-message support flag, because
+ a. we don't know if the incoming connect reply
+ supports AT or not (in reply_in_callback)
+ until we unpack it.
+ b. failovered server means export and flags are gone
+ (in ptlrpc_send_reply).
+ Can only be set when we know AT is supported at
+ both ends */
+ imp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT;
+ else
+ imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
+
+ if ((ocd->ocd_connect_flags & OBD_CONNECT_FULL20) &&
+ (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
+ imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
+ else
+ imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+
+ LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) &&
+ (cli->cl_max_pages_per_rpc > 0));
+ }
+
+out:
+ imp->imp_connect_tried = 1;
+
+ if (rc != 0) {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
+ if (rc == -EACCES) {
+ /*
+ * Give up trying to reconnect
+ * EACCES means client has no permission for connection
+ */
+ imp->imp_obd->obd_no_recov = 1;
+ ptlrpc_deactivate_import(imp);
+ }
+
+ if (rc == -EPROTO) {
+ struct obd_connect_data *ocd;
+
+ /* reply message might not be ready */
+ if (request->rq_repmsg == NULL)
+ RETURN(-EPROTO);
+
+ ocd = req_capsule_server_get(&request->rq_pill,
+ &RMF_CONNECT_DATA);
+ if (ocd &&
+ (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+ (ocd->ocd_version != LUSTRE_VERSION_CODE)) {
+ /* Actually servers are only supposed to refuse
+ connection from liblustre clients, so we should
+ never see this from VFS context */
+ LCONSOLE_ERROR_MSG(0x16a, "Server %s version "
+ "(%d.%d.%d.%d)"
+ " refused connection from this client "
+ "with an incompatible version (%s). "
+ "Client must be recompiled\n",
+ obd2cli_tgt(imp->imp_obd),
+ OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
+ OBD_OCD_VERSION_MINOR(ocd->ocd_version),
+ OBD_OCD_VERSION_PATCH(ocd->ocd_version),
+ OBD_OCD_VERSION_FIX(ocd->ocd_version),
+ LUSTRE_VERSION_STRING);
+ ptlrpc_deactivate_import(imp);
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_CLOSED);
+ }
+ RETURN(-EPROTO);
+ }
+
+ ptlrpc_maybe_ping_import_soon(imp);
+
+ CDEBUG(D_HA, "recovery of %s on %s failed (%d)\n",
+ obd2cli_tgt(imp->imp_obd),
+ (char *)imp->imp_connection->c_remote_uuid.uuid, rc);
+ }
+
+ wake_up_all(&imp->imp_recovery_waitq);
+ RETURN(rc);
+}
+
+/**
+ * interpret callback for "completed replay" RPCs.
+ * \see signal_completed_replay
+ */
+static int completed_replay_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req,
+ void * data, int rc)
+{
+ ENTRY;
+ atomic_dec(&req->rq_import->imp_replay_inflight);
+ if (req->rq_status == 0 &&
+ !req->rq_import->imp_vbr_failed) {
+ ptlrpc_import_recovery_state_machine(req->rq_import);
+ } else {
+ if (req->rq_import->imp_vbr_failed) {
+ CDEBUG(D_WARNING,
+ "%s: version recovery fails, reconnecting\n",
+ req->rq_import->imp_obd->obd_name);
+ } else {
+ CDEBUG(D_HA, "%s: LAST_REPLAY message error: %d, "
+ "reconnecting\n",
+ req->rq_import->imp_obd->obd_name,
+ req->rq_status);
+ }
+ ptlrpc_connect_import(req->rq_import);
+ }
+
+ RETURN(0);
+}
+
+/**
+ * Let server know that we have no requests to replay anymore.
+ * Achieved by just sending a PING request
+ */
+static int signal_completed_replay(struct obd_import *imp)
+{
+ struct ptlrpc_request *req;
+ ENTRY;
+
+ if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_FINISH_REPLAY)))
+ RETURN(0);
+
+ LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
+ atomic_inc(&imp->imp_replay_inflight);
+
+ req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING, LUSTRE_OBD_VERSION,
+ OBD_PING);
+ if (req == NULL) {
+ atomic_dec(&imp->imp_replay_inflight);
+ RETURN(-ENOMEM);
+ }
+
+ ptlrpc_request_set_replen(req);
+ req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT;
+ lustre_msg_add_flags(req->rq_reqmsg,
+ MSG_LOCK_REPLAY_DONE | MSG_REQ_REPLAY_DONE);
+ if (AT_OFF)
+ req->rq_timeout *= 3;
+ req->rq_interpret_reply = completed_replay_interpret;
+
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ RETURN(0);
+}
+
+/**
+ * In kernel code all import invalidation happens in its own
+ * separate thread, so that whatever application happened to encounter
+ * a problem could still be killed or otherwise continue
+ */
+static int ptlrpc_invalidate_import_thread(void *data)
+{
+ struct obd_import *imp = data;
+
+ ENTRY;
+
+ unshare_fs_struct();
+
+ CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n",
+ imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid);
+
+ ptlrpc_invalidate_import(imp);
+
+ if (obd_dump_on_eviction) {
+ CERROR("dump the log upon eviction\n");
+ libcfs_debug_dumplog();
+ }
+
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+ ptlrpc_import_recovery_state_machine(imp);
+
+ class_import_put(imp);
+ RETURN(0);
+}
+
+/**
+ * This is the state machine for client-side recovery on import.
+ *
+ * Typicaly we have two possibly paths. If we came to server and it is not
+ * in recovery, we just enter IMP_EVICTED state, invalidate our import
+ * state and reconnect from scratch.
+ * If we came to server that is in recovery, we enter IMP_REPLAY import state.
+ * We go through our list of requests to replay and send them to server one by
+ * one.
+ * After sending all request from the list we change import state to
+ * IMP_REPLAY_LOCKS and re-request all the locks we believe we have from server
+ * and also all the locks we don't yet have and wait for server to grant us.
+ * After that we send a special "replay completed" request and change import
+ * state to IMP_REPLAY_WAIT.
+ * Upon receiving reply to that "replay completed" RPC we enter IMP_RECOVER
+ * state and resend all requests from sending list.
+ * After that we promote import to FULL state and send all delayed requests
+ * and import is fully operational after that.
+ *
+ */
+int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
+{
+ int rc = 0;
+ int inflight;
+ char *target_start;
+ int target_len;
+
+ ENTRY;
+ if (imp->imp_state == LUSTRE_IMP_EVICTED) {
+ deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+ &target_start, &target_len);
+ /* Don't care about MGC eviction */
+ if (strcmp(imp->imp_obd->obd_type->typ_name,
+ LUSTRE_MGC_NAME) != 0) {
+ LCONSOLE_ERROR_MSG(0x167, "%s: This client was evicted "
+ "by %.*s; in progress operations "
+ "using this service will fail.\n",
+ imp->imp_obd->obd_name, target_len,
+ target_start);
+ }
+ CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid);
+ /* reset vbr_failed flag upon eviction */
+ spin_lock(&imp->imp_lock);
+ imp->imp_vbr_failed = 0;
+ spin_unlock(&imp->imp_lock);
+
+ {
+ task_t *task;
+ /* bug 17802: XXX client_disconnect_export vs connect request
+ * race. if client will evicted at this time, we start
+ * invalidate thread without reference to import and import can
+ * be freed at same time. */
+ class_import_get(imp);
+ task = kthread_run(ptlrpc_invalidate_import_thread, imp,
+ "ll_imp_inval");
+ if (IS_ERR(task)) {
+ class_import_put(imp);
+ CERROR("error starting invalidate thread: %d\n", rc);
+ rc = PTR_ERR(task);
+ } else {
+ rc = 0;
+ }
+ RETURN(rc);
+ }
+ }
+
+ if (imp->imp_state == LUSTRE_IMP_REPLAY) {
+ CDEBUG(D_HA, "replay requested by %s\n",
+ obd2cli_tgt(imp->imp_obd));
+ rc = ptlrpc_replay_next(imp, &inflight);
+ if (inflight == 0 &&
+ atomic_read(&imp->imp_replay_inflight) == 0) {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS);
+ rc = ldlm_replay_locks(imp);
+ if (rc)
+ GOTO(out, rc);
+ }
+ rc = 0;
+ }
+
+ if (imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS) {
+ if (atomic_read(&imp->imp_replay_inflight) == 0) {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_WAIT);
+ rc = signal_completed_replay(imp);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ }
+
+ if (imp->imp_state == LUSTRE_IMP_REPLAY_WAIT) {
+ if (atomic_read(&imp->imp_replay_inflight) == 0) {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+ }
+ }
+
+ if (imp->imp_state == LUSTRE_IMP_RECOVER) {
+ CDEBUG(D_HA, "reconnected to %s@%s\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid);
+
+ rc = ptlrpc_resend(imp);
+ if (rc)
+ GOTO(out, rc);
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
+ ptlrpc_activate_import(imp);
+
+ deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+ &target_start, &target_len);
+ LCONSOLE_INFO("%s: Connection restored to %.*s (at %s)\n",
+ imp->imp_obd->obd_name,
+ target_len, target_start,
+ libcfs_nid2str(imp->imp_connection->c_peer.nid));
+ }
+
+ if (imp->imp_state == LUSTRE_IMP_FULL) {
+ wake_up_all(&imp->imp_recovery_waitq);
+ ptlrpc_wake_delayed(imp);
+ }
+
+out:
+ RETURN(rc);
+}
+
+int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
+{
+ struct ptlrpc_request *req;
+ int rq_opc, rc = 0;
+ int nowait = imp->imp_obd->obd_force;
+ ENTRY;
+
+ if (nowait)
+ GOTO(set_state, rc);
+
+ switch (imp->imp_connect_op) {
+ case OST_CONNECT: rq_opc = OST_DISCONNECT; break;
+ case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break;
+ case MGS_CONNECT: rq_opc = MGS_DISCONNECT; break;
+ default:
+ CERROR("don't know how to disconnect from %s (connect_op %d)\n",
+ obd2cli_tgt(imp->imp_obd), imp->imp_connect_op);
+ RETURN(-EINVAL);
+ }
+
+ if (ptlrpc_import_in_recovery(imp)) {
+ struct l_wait_info lwi;
+ cfs_duration_t timeout;
+
+
+ if (AT_OFF) {
+ if (imp->imp_server_timeout)
+ timeout = cfs_time_seconds(obd_timeout / 2);
+ else
+ timeout = cfs_time_seconds(obd_timeout);
+ } else {
+ int idx = import_at_get_index(imp,
+ imp->imp_client->cli_request_portal);
+ timeout = cfs_time_seconds(
+ at_get(&imp->imp_at.iat_service_estimate[idx]));
+ }
+
+ lwi = LWI_TIMEOUT_INTR(cfs_timeout_cap(timeout),
+ back_to_sleep, LWI_ON_SIGNAL_NOOP, NULL);
+ rc = l_wait_event(imp->imp_recovery_waitq,
+ !ptlrpc_import_in_recovery(imp), &lwi);
+
+ }
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state != LUSTRE_IMP_FULL)
+ GOTO(out, 0);
+
+ spin_unlock(&imp->imp_lock);
+
+ req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_DISCONNECT,
+ LUSTRE_OBD_VERSION, rq_opc);
+ if (req) {
+ /* We are disconnecting, do not retry a failed DISCONNECT rpc if
+ * it fails. We can get through the above with a down server
+ * if the client doesn't know the server is gone yet. */
+ req->rq_no_resend = 1;
+
+ /* We want client umounts to happen quickly, no matter the
+ server state... */
+ req->rq_timeout = min_t(int, req->rq_timeout,
+ INITIAL_CONNECT_TIMEOUT);
+
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_CONNECTING);
+ req->rq_send_state = LUSTRE_IMP_CONNECTING;
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+ ptlrpc_req_finished(req);
+ }
+
+set_state:
+ spin_lock(&imp->imp_lock);
+out:
+ if (noclose)
+ IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
+ else
+ IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED);
+ memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle));
+ spin_unlock(&imp->imp_lock);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ptlrpc_disconnect_import);
+
+void ptlrpc_cleanup_imp(struct obd_import *imp)
+{
+ ENTRY;
+
+ spin_lock(&imp->imp_lock);
+ IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED);
+ imp->imp_generation++;
+ spin_unlock(&imp->imp_lock);
+ ptlrpc_abort_inflight(imp);
+
+ EXIT;
+}
+EXPORT_SYMBOL(ptlrpc_cleanup_imp);
+
+/* Adaptive Timeout utils */
+extern unsigned int at_min, at_max, at_history;
+
+/* Bin into timeslices using AT_BINS bins.
+ This gives us a max of the last binlimit*AT_BINS secs without the storage,
+ but still smoothing out a return to normalcy from a slow response.
+ (E.g. remember the maximum latency in each minute of the last 4 minutes.) */
+int at_measured(struct adaptive_timeout *at, unsigned int val)
+{
+ unsigned int old = at->at_current;
+ time_t now = cfs_time_current_sec();
+ time_t binlimit = max_t(time_t, at_history / AT_BINS, 1);
+
+ LASSERT(at);
+ CDEBUG(D_OTHER, "add %u to %p time=%lu v=%u (%u %u %u %u)\n",
+ val, at, now - at->at_binstart, at->at_current,
+ at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]);
+
+ if (val == 0)
+ /* 0's don't count, because we never want our timeout to
+ drop to 0, and because 0 could mean an error */
+ return 0;
+
+ spin_lock(&at->at_lock);
+
+ if (unlikely(at->at_binstart == 0)) {
+ /* Special case to remove default from history */
+ at->at_current = val;
+ at->at_worst_ever = val;
+ at->at_worst_time = now;
+ at->at_hist[0] = val;
+ at->at_binstart = now;
+ } else if (now - at->at_binstart < binlimit ) {
+ /* in bin 0 */
+ at->at_hist[0] = max(val, at->at_hist[0]);
+ at->at_current = max(val, at->at_current);
+ } else {
+ int i, shift;
+ unsigned int maxv = val;
+ /* move bins over */
+ shift = (now - at->at_binstart) / binlimit;
+ LASSERT(shift > 0);
+ for(i = AT_BINS - 1; i >= 0; i--) {
+ if (i >= shift) {
+ at->at_hist[i] = at->at_hist[i - shift];
+ maxv = max(maxv, at->at_hist[i]);
+ } else {
+ at->at_hist[i] = 0;
+ }
+ }
+ at->at_hist[0] = val;
+ at->at_current = maxv;
+ at->at_binstart += shift * binlimit;
+ }
+
+ if (at->at_current > at->at_worst_ever) {
+ at->at_worst_ever = at->at_current;
+ at->at_worst_time = now;
+ }
+
+ if (at->at_flags & AT_FLG_NOHIST)
+ /* Only keep last reported val; keeping the rest of the history
+ for proc only */
+ at->at_current = val;
+
+ if (at_max > 0)
+ at->at_current = min(at->at_current, at_max);
+ at->at_current = max(at->at_current, at_min);
+
+ if (at->at_current != old)
+ CDEBUG(D_OTHER, "AT %p change: old=%u new=%u delta=%d "
+ "(val=%u) hist %u %u %u %u\n", at,
+ old, at->at_current, at->at_current - old, val,
+ at->at_hist[0], at->at_hist[1], at->at_hist[2],
+ at->at_hist[3]);
+
+ /* if we changed, report the old value */
+ old = (at->at_current != old) ? old : 0;
+
+ spin_unlock(&at->at_lock);
+ return old;
+}
+
+/* Find the imp_at index for a given portal; assign if space available */
+int import_at_get_index(struct obd_import *imp, int portal)
+{
+ struct imp_at *at = &imp->imp_at;
+ int i;
+
+ for (i = 0; i < IMP_AT_MAX_PORTALS; i++) {
+ if (at->iat_portal[i] == portal)
+ return i;
+ if (at->iat_portal[i] == 0)
+ /* unused */
+ break;
+ }
+
+ /* Not found in list, add it under a lock */
+ spin_lock(&imp->imp_lock);
+
+ /* Check unused under lock */
+ for (; i < IMP_AT_MAX_PORTALS; i++) {
+ if (at->iat_portal[i] == portal)
+ goto out;
+ if (at->iat_portal[i] == 0)
+ /* unused */
+ break;
+ }
+
+ /* Not enough portals? */
+ LASSERT(i < IMP_AT_MAX_PORTALS);
+
+ at->iat_portal[i] = portal;
+out:
+ spin_unlock(&imp->imp_lock);
+ return i;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c
new file mode 100644
index 000000000000..2f55ce26ccba
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c
@@ -0,0 +1,2396 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/layout.c
+ *
+ * Lustre Metadata Target (mdt) request handler
+ *
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ */
+/*
+ * This file contains the "capsule/pill" abstraction layered above PTLRPC.
+ *
+ * Every struct ptlrpc_request contains a "pill", which points to a description
+ * of the format that the request conforms to.
+ */
+
+#if !defined(__REQ_LAYOUT_USER__)
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include <linux/module.h>
+
+/* LUSTRE_VERSION_CODE */
+#include <lustre_ver.h>
+
+#include <obd_support.h>
+/* lustre_swab_mdt_body */
+#include <lustre/lustre_idl.h>
+/* obd2cli_tgt() (required by DEBUG_REQ()) */
+#include <obd.h>
+
+/* __REQ_LAYOUT_USER__ */
+#endif
+/* struct ptlrpc_request, lustre_msg* */
+#include <lustre_req_layout.h>
+#include <lustre_update.h>
+#include <lustre_acl.h>
+#include <lustre_debug.h>
+
+/*
+ * RQFs (see below) refer to two struct req_msg_field arrays describing the
+ * client request and server reply, respectively.
+ */
+/* empty set of fields... for suitable definition of emptiness. */
+static const struct req_msg_field *empty[] = {
+ &RMF_PTLRPC_BODY
+};
+
+static const struct req_msg_field *mgs_target_info_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MGS_TARGET_INFO
+};
+
+static const struct req_msg_field *mgs_set_info[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MGS_SEND_PARAM
+};
+
+static const struct req_msg_field *mgs_config_read_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MGS_CONFIG_BODY
+};
+
+static const struct req_msg_field *mgs_config_read_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MGS_CONFIG_RES
+};
+
+static const struct req_msg_field *log_cancel_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_LOGCOOKIES
+};
+
+static const struct req_msg_field *mdt_body_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY
+};
+
+static const struct req_msg_field *mdt_body_capa[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_CAPA1
+};
+
+static const struct req_msg_field *quotactl_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OBD_QUOTACTL
+};
+
+static const struct req_msg_field *quota_body_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_QUOTA_BODY
+};
+
+static const struct req_msg_field *ldlm_intent_quota_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_QUOTA_BODY
+};
+
+static const struct req_msg_field *ldlm_intent_quota_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REP,
+ &RMF_DLM_LVB,
+ &RMF_QUOTA_BODY
+};
+
+static const struct req_msg_field *mdt_close_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_EPOCH,
+ &RMF_REC_REINT,
+ &RMF_CAPA1
+};
+
+static const struct req_msg_field *obd_statfs_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OBD_STATFS
+};
+
+static const struct req_msg_field *seq_query_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_SEQ_OPC,
+ &RMF_SEQ_RANGE
+};
+
+static const struct req_msg_field *seq_query_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_SEQ_RANGE
+};
+
+static const struct req_msg_field *fld_query_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_FLD_OPC,
+ &RMF_FLD_MDFLD
+};
+
+static const struct req_msg_field *fld_query_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_FLD_MDFLD
+};
+
+static const struct req_msg_field *mds_getattr_name_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_CAPA1,
+ &RMF_NAME
+};
+
+static const struct req_msg_field *mds_reint_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT
+};
+
+static const struct req_msg_field *mds_reint_create_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_NAME
+};
+
+static const struct req_msg_field *mds_reint_create_slave_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_EADATA,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_create_rmt_acl_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_EADATA,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_create_sym_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_SYMTGT,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_open_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_CAPA2,
+ &RMF_NAME,
+ &RMF_EADATA
+};
+
+static const struct req_msg_field *mds_reint_open_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_ACL,
+ &RMF_CAPA1,
+ &RMF_CAPA2
+};
+
+static const struct req_msg_field *mds_reint_unlink_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_link_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_CAPA2,
+ &RMF_NAME,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_rename_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_CAPA2,
+ &RMF_NAME,
+ &RMF_SYMTGT,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_last_unlink_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_LOGCOOKIES,
+ &RMF_CAPA1,
+ &RMF_CAPA2
+};
+
+static const struct req_msg_field *mds_reint_setattr_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_MDT_EPOCH,
+ &RMF_EADATA,
+ &RMF_LOGCOOKIES,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_setxattr_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_EADATA
+};
+
+static const struct req_msg_field *mdt_swap_layouts[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_SWAP_LAYOUTS,
+ &RMF_CAPA1,
+ &RMF_CAPA2,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *obd_connect_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_TGTUUID,
+ &RMF_CLUUID,
+ &RMF_CONN,
+ &RMF_CONNECT_DATA
+};
+
+static const struct req_msg_field *obd_connect_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_CONNECT_DATA
+};
+
+static const struct req_msg_field *obd_set_info_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_SETINFO_KEY,
+ &RMF_SETINFO_VAL
+};
+
+static const struct req_msg_field *ost_grant_shrink_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_SETINFO_KEY,
+ &RMF_OST_BODY
+};
+
+static const struct req_msg_field *mds_getinfo_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_GETINFO_KEY,
+ &RMF_GETINFO_VALLEN
+};
+
+static const struct req_msg_field *mds_getinfo_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_GETINFO_VAL,
+};
+
+static const struct req_msg_field *ldlm_enqueue_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *ldlm_enqueue_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REP
+};
+
+static const struct req_msg_field *ldlm_enqueue_lvb_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REP,
+ &RMF_DLM_LVB
+};
+
+static const struct req_msg_field *ldlm_cp_callback_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_DLM_LVB
+};
+
+static const struct req_msg_field *ldlm_gl_callback_desc_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_DLM_GL_DESC
+};
+
+static const struct req_msg_field *ldlm_gl_callback_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_LVB
+};
+
+static const struct req_msg_field *ldlm_intent_basic_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+};
+
+static const struct req_msg_field *ldlm_intent_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_REC_REINT
+};
+
+static const struct req_msg_field *ldlm_intent_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REP,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_ACL
+};
+
+static const struct req_msg_field *ldlm_intent_layout_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_LAYOUT_INTENT,
+ &RMF_EADATA /* for new layout to be set up */
+};
+static const struct req_msg_field *ldlm_intent_open_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REP,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_ACL,
+ &RMF_CAPA1,
+ &RMF_CAPA2
+};
+
+static const struct req_msg_field *ldlm_intent_getattr_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_MDT_BODY, /* coincides with mds_getattr_name_client[] */
+ &RMF_CAPA1,
+ &RMF_NAME
+};
+
+static const struct req_msg_field *ldlm_intent_getattr_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REP,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_ACL,
+ &RMF_CAPA1
+};
+
+static const struct req_msg_field *ldlm_intent_create_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_REC_REINT, /* coincides with mds_reint_create_client[] */
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_EADATA
+};
+
+static const struct req_msg_field *ldlm_intent_open_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_REC_REINT, /* coincides with mds_reint_open_client[] */
+ &RMF_CAPA1,
+ &RMF_CAPA2,
+ &RMF_NAME,
+ &RMF_EADATA
+};
+
+static const struct req_msg_field *ldlm_intent_unlink_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_REC_REINT, /* coincides with mds_reint_unlink_client[] */
+ &RMF_CAPA1,
+ &RMF_NAME
+};
+
+static const struct req_msg_field *mds_getxattr_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_EADATA
+};
+
+static const struct req_msg_field *mds_getxattr_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_EADATA
+};
+
+static const struct req_msg_field *mds_getattr_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_ACL,
+ &RMF_CAPA1,
+ &RMF_CAPA2
+};
+
+static const struct req_msg_field *mds_setattr_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_ACL,
+ &RMF_CAPA1,
+ &RMF_CAPA2
+};
+
+static const struct req_msg_field *mds_update_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_UPDATE,
+};
+
+static const struct req_msg_field *mds_update_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_UPDATE_REPLY,
+};
+
+static const struct req_msg_field *llog_origin_handle_create_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_LLOGD_BODY,
+ &RMF_NAME
+};
+
+static const struct req_msg_field *llogd_body_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_LLOGD_BODY
+};
+
+static const struct req_msg_field *llog_log_hdr_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_LLOG_LOG_HDR
+};
+
+static const struct req_msg_field *llogd_conn_body_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_LLOGD_CONN_BODY
+};
+
+static const struct req_msg_field *llog_origin_handle_next_block_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_LLOGD_BODY,
+ &RMF_EADATA
+};
+
+static const struct req_msg_field *obd_idx_read_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_IDX_INFO
+};
+
+static const struct req_msg_field *obd_idx_read_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_IDX_INFO
+};
+
+static const struct req_msg_field *ost_body_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OST_BODY
+};
+
+static const struct req_msg_field *ost_body_capa[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OST_BODY,
+ &RMF_CAPA1
+};
+
+static const struct req_msg_field *ost_destroy_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OST_BODY,
+ &RMF_DLM_REQ,
+ &RMF_CAPA1
+};
+
+
+static const struct req_msg_field *ost_brw_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OST_BODY,
+ &RMF_OBD_IOOBJ,
+ &RMF_NIOBUF_REMOTE,
+ &RMF_CAPA1
+};
+
+static const struct req_msg_field *ost_brw_read_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OST_BODY
+};
+
+static const struct req_msg_field *ost_brw_write_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OST_BODY,
+ &RMF_RCS
+};
+
+static const struct req_msg_field *ost_get_info_generic_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_GENERIC_DATA,
+};
+
+static const struct req_msg_field *ost_get_info_generic_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_SETINFO_KEY
+};
+
+static const struct req_msg_field *ost_get_last_id_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OBD_ID
+};
+
+static const struct req_msg_field *ost_get_last_fid_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_FID,
+};
+
+static const struct req_msg_field *ost_get_fiemap_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_FIEMAP_KEY,
+ &RMF_FIEMAP_VAL
+};
+
+static const struct req_msg_field *ost_get_fiemap_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_FIEMAP_VAL
+};
+
+static const struct req_msg_field *mdt_hsm_progress[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDS_HSM_PROGRESS,
+};
+
+static const struct req_msg_field *mdt_hsm_ct_register[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDS_HSM_ARCHIVE,
+};
+
+static const struct req_msg_field *mdt_hsm_ct_unregister[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+};
+
+static const struct req_msg_field *mdt_hsm_action_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDS_HSM_CURRENT_ACTION,
+};
+
+static const struct req_msg_field *mdt_hsm_state_get_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_HSM_USER_STATE,
+};
+
+static const struct req_msg_field *mdt_hsm_state_set[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_CAPA1,
+ &RMF_HSM_STATE_SET,
+};
+
+static const struct req_msg_field *mdt_hsm_request[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDS_HSM_REQUEST,
+ &RMF_MDS_HSM_USER_ITEM,
+ &RMF_GENERIC_DATA,
+};
+
+static struct req_format *req_formats[] = {
+ &RQF_OBD_PING,
+ &RQF_OBD_SET_INFO,
+ &RQF_OBD_IDX_READ,
+ &RQF_SEC_CTX,
+ &RQF_MGS_TARGET_REG,
+ &RQF_MGS_SET_INFO,
+ &RQF_MGS_CONFIG_READ,
+ &RQF_SEQ_QUERY,
+ &RQF_FLD_QUERY,
+ &RQF_MDS_CONNECT,
+ &RQF_MDS_DISCONNECT,
+ &RQF_MDS_GET_INFO,
+ &RQF_MDS_GETSTATUS,
+ &RQF_MDS_STATFS,
+ &RQF_MDS_GETATTR,
+ &RQF_MDS_GETATTR_NAME,
+ &RQF_MDS_GETXATTR,
+ &RQF_MDS_SYNC,
+ &RQF_MDS_CLOSE,
+ &RQF_MDS_PIN,
+ &RQF_MDS_UNPIN,
+ &RQF_MDS_READPAGE,
+ &RQF_MDS_WRITEPAGE,
+ &RQF_MDS_IS_SUBDIR,
+ &RQF_MDS_DONE_WRITING,
+ &RQF_MDS_REINT,
+ &RQF_MDS_REINT_CREATE,
+ &RQF_MDS_REINT_CREATE_RMT_ACL,
+ &RQF_MDS_REINT_CREATE_SLAVE,
+ &RQF_MDS_REINT_CREATE_SYM,
+ &RQF_MDS_REINT_OPEN,
+ &RQF_MDS_REINT_UNLINK,
+ &RQF_MDS_REINT_LINK,
+ &RQF_MDS_REINT_RENAME,
+ &RQF_MDS_REINT_SETATTR,
+ &RQF_MDS_REINT_SETXATTR,
+ &RQF_MDS_QUOTACHECK,
+ &RQF_MDS_QUOTACTL,
+ &RQF_MDS_HSM_PROGRESS,
+ &RQF_MDS_HSM_CT_REGISTER,
+ &RQF_MDS_HSM_CT_UNREGISTER,
+ &RQF_MDS_HSM_STATE_GET,
+ &RQF_MDS_HSM_STATE_SET,
+ &RQF_MDS_HSM_ACTION,
+ &RQF_MDS_HSM_REQUEST,
+ &RQF_MDS_SWAP_LAYOUTS,
+ &RQF_UPDATE_OBJ,
+ &RQF_QC_CALLBACK,
+ &RQF_OST_CONNECT,
+ &RQF_OST_DISCONNECT,
+ &RQF_OST_QUOTACHECK,
+ &RQF_OST_QUOTACTL,
+ &RQF_OST_GETATTR,
+ &RQF_OST_SETATTR,
+ &RQF_OST_CREATE,
+ &RQF_OST_PUNCH,
+ &RQF_OST_SYNC,
+ &RQF_OST_DESTROY,
+ &RQF_OST_BRW_READ,
+ &RQF_OST_BRW_WRITE,
+ &RQF_OST_STATFS,
+ &RQF_OST_SET_GRANT_INFO,
+ &RQF_OST_GET_INFO_GENERIC,
+ &RQF_OST_GET_INFO_LAST_ID,
+ &RQF_OST_GET_INFO_LAST_FID,
+ &RQF_OST_SET_INFO_LAST_FID,
+ &RQF_OST_GET_INFO_FIEMAP,
+ &RQF_LDLM_ENQUEUE,
+ &RQF_LDLM_ENQUEUE_LVB,
+ &RQF_LDLM_CONVERT,
+ &RQF_LDLM_CANCEL,
+ &RQF_LDLM_CALLBACK,
+ &RQF_LDLM_CP_CALLBACK,
+ &RQF_LDLM_BL_CALLBACK,
+ &RQF_LDLM_GL_CALLBACK,
+ &RQF_LDLM_GL_DESC_CALLBACK,
+ &RQF_LDLM_INTENT,
+ &RQF_LDLM_INTENT_BASIC,
+ &RQF_LDLM_INTENT_LAYOUT,
+ &RQF_LDLM_INTENT_GETATTR,
+ &RQF_LDLM_INTENT_OPEN,
+ &RQF_LDLM_INTENT_CREATE,
+ &RQF_LDLM_INTENT_UNLINK,
+ &RQF_LDLM_INTENT_QUOTA,
+ &RQF_QUOTA_DQACQ,
+ &RQF_LOG_CANCEL,
+ &RQF_LLOG_ORIGIN_HANDLE_CREATE,
+ &RQF_LLOG_ORIGIN_HANDLE_DESTROY,
+ &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK,
+ &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK,
+ &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER,
+ &RQF_LLOG_ORIGIN_CONNECT
+};
+
+struct req_msg_field {
+ const __u32 rmf_flags;
+ const char *rmf_name;
+ /**
+ * Field length. (-1) means "variable length". If the
+ * \a RMF_F_STRUCT_ARRAY flag is set the field is also variable-length,
+ * but the actual size must be a whole multiple of \a rmf_size.
+ */
+ const int rmf_size;
+ void (*rmf_swabber)(void *);
+ void (*rmf_dumper)(void *);
+ int rmf_offset[ARRAY_SIZE(req_formats)][RCL_NR];
+};
+
+enum rmf_flags {
+ /**
+ * The field is a string, must be NUL-terminated.
+ */
+ RMF_F_STRING = 1 << 0,
+ /**
+ * The field's buffer size need not match the declared \a rmf_size.
+ */
+ RMF_F_NO_SIZE_CHECK = 1 << 1,
+ /**
+ * The field's buffer size must be a whole multiple of the declared \a
+ * rmf_size and the \a rmf_swabber function must work on the declared \a
+ * rmf_size worth of bytes.
+ */
+ RMF_F_STRUCT_ARRAY = 1 << 2
+};
+
+struct req_capsule;
+
+/*
+ * Request fields.
+ */
+#define DEFINE_MSGF(name, flags, size, swabber, dumper) { \
+ .rmf_name = (name), \
+ .rmf_flags = (flags), \
+ .rmf_size = (size), \
+ .rmf_swabber = (void (*)(void*))(swabber), \
+ .rmf_dumper = (void (*)(void*))(dumper) \
+}
+
+struct req_msg_field RMF_GENERIC_DATA =
+ DEFINE_MSGF("generic_data", 0,
+ -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_GENERIC_DATA);
+
+struct req_msg_field RMF_MGS_TARGET_INFO =
+ DEFINE_MSGF("mgs_target_info", 0,
+ sizeof(struct mgs_target_info),
+ lustre_swab_mgs_target_info, NULL);
+EXPORT_SYMBOL(RMF_MGS_TARGET_INFO);
+
+struct req_msg_field RMF_MGS_SEND_PARAM =
+ DEFINE_MSGF("mgs_send_param", 0,
+ sizeof(struct mgs_send_param),
+ NULL, NULL);
+EXPORT_SYMBOL(RMF_MGS_SEND_PARAM);
+
+struct req_msg_field RMF_MGS_CONFIG_BODY =
+ DEFINE_MSGF("mgs_config_read request", 0,
+ sizeof(struct mgs_config_body),
+ lustre_swab_mgs_config_body, NULL);
+EXPORT_SYMBOL(RMF_MGS_CONFIG_BODY);
+
+struct req_msg_field RMF_MGS_CONFIG_RES =
+ DEFINE_MSGF("mgs_config_read reply ", 0,
+ sizeof(struct mgs_config_res),
+ lustre_swab_mgs_config_res, NULL);
+EXPORT_SYMBOL(RMF_MGS_CONFIG_RES);
+
+struct req_msg_field RMF_U32 =
+ DEFINE_MSGF("generic u32", 0,
+ sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_U32);
+
+struct req_msg_field RMF_SETINFO_VAL =
+ DEFINE_MSGF("setinfo_val", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_SETINFO_VAL);
+
+struct req_msg_field RMF_GETINFO_KEY =
+ DEFINE_MSGF("getinfo_key", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_GETINFO_KEY);
+
+struct req_msg_field RMF_GETINFO_VALLEN =
+ DEFINE_MSGF("getinfo_vallen", 0,
+ sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_GETINFO_VALLEN);
+
+struct req_msg_field RMF_GETINFO_VAL =
+ DEFINE_MSGF("getinfo_val", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_GETINFO_VAL);
+
+struct req_msg_field RMF_SEQ_OPC =
+ DEFINE_MSGF("seq_query_opc", 0,
+ sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_SEQ_OPC);
+
+struct req_msg_field RMF_SEQ_RANGE =
+ DEFINE_MSGF("seq_query_range", 0,
+ sizeof(struct lu_seq_range),
+ lustre_swab_lu_seq_range, NULL);
+EXPORT_SYMBOL(RMF_SEQ_RANGE);
+
+struct req_msg_field RMF_FLD_OPC =
+ DEFINE_MSGF("fld_query_opc", 0,
+ sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_FLD_OPC);
+
+struct req_msg_field RMF_FLD_MDFLD =
+ DEFINE_MSGF("fld_query_mdfld", 0,
+ sizeof(struct lu_seq_range),
+ lustre_swab_lu_seq_range, NULL);
+EXPORT_SYMBOL(RMF_FLD_MDFLD);
+
+struct req_msg_field RMF_MDT_BODY =
+ DEFINE_MSGF("mdt_body", 0,
+ sizeof(struct mdt_body), lustre_swab_mdt_body, NULL);
+EXPORT_SYMBOL(RMF_MDT_BODY);
+
+struct req_msg_field RMF_OBD_QUOTACTL =
+ DEFINE_MSGF("obd_quotactl", 0,
+ sizeof(struct obd_quotactl),
+ lustre_swab_obd_quotactl, NULL);
+EXPORT_SYMBOL(RMF_OBD_QUOTACTL);
+
+struct req_msg_field RMF_QUOTA_BODY =
+ DEFINE_MSGF("quota_body", 0,
+ sizeof(struct quota_body), lustre_swab_quota_body, NULL);
+EXPORT_SYMBOL(RMF_QUOTA_BODY);
+
+struct req_msg_field RMF_MDT_EPOCH =
+ DEFINE_MSGF("mdt_ioepoch", 0,
+ sizeof(struct mdt_ioepoch), lustre_swab_mdt_ioepoch, NULL);
+EXPORT_SYMBOL(RMF_MDT_EPOCH);
+
+struct req_msg_field RMF_PTLRPC_BODY =
+ DEFINE_MSGF("ptlrpc_body", 0,
+ sizeof(struct ptlrpc_body), lustre_swab_ptlrpc_body, NULL);
+EXPORT_SYMBOL(RMF_PTLRPC_BODY);
+
+struct req_msg_field RMF_OBD_STATFS =
+ DEFINE_MSGF("obd_statfs", 0,
+ sizeof(struct obd_statfs), lustre_swab_obd_statfs, NULL);
+EXPORT_SYMBOL(RMF_OBD_STATFS);
+
+struct req_msg_field RMF_SETINFO_KEY =
+ DEFINE_MSGF("setinfo_key", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_SETINFO_KEY);
+
+struct req_msg_field RMF_NAME =
+ DEFINE_MSGF("name", RMF_F_STRING, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_NAME);
+
+struct req_msg_field RMF_SYMTGT =
+ DEFINE_MSGF("symtgt", RMF_F_STRING, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_SYMTGT);
+
+struct req_msg_field RMF_TGTUUID =
+ DEFINE_MSGF("tgtuuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL,
+ NULL);
+EXPORT_SYMBOL(RMF_TGTUUID);
+
+struct req_msg_field RMF_CLUUID =
+ DEFINE_MSGF("cluuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL,
+ NULL);
+EXPORT_SYMBOL(RMF_CLUUID);
+
+struct req_msg_field RMF_STRING =
+ DEFINE_MSGF("string", RMF_F_STRING, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_STRING);
+
+struct req_msg_field RMF_LLOGD_BODY =
+ DEFINE_MSGF("llogd_body", 0,
+ sizeof(struct llogd_body), lustre_swab_llogd_body, NULL);
+EXPORT_SYMBOL(RMF_LLOGD_BODY);
+
+struct req_msg_field RMF_LLOG_LOG_HDR =
+ DEFINE_MSGF("llog_log_hdr", 0,
+ sizeof(struct llog_log_hdr), lustre_swab_llog_hdr, NULL);
+EXPORT_SYMBOL(RMF_LLOG_LOG_HDR);
+
+struct req_msg_field RMF_LLOGD_CONN_BODY =
+ DEFINE_MSGF("llogd_conn_body", 0,
+ sizeof(struct llogd_conn_body),
+ lustre_swab_llogd_conn_body, NULL);
+EXPORT_SYMBOL(RMF_LLOGD_CONN_BODY);
+
+/*
+ * connection handle received in MDS_CONNECT request.
+ *
+ * No swabbing needed because struct lustre_handle contains only a 64-bit cookie
+ * that the client does not interpret at all.
+ */
+struct req_msg_field RMF_CONN =
+ DEFINE_MSGF("conn", 0, sizeof(struct lustre_handle), NULL, NULL);
+EXPORT_SYMBOL(RMF_CONN);
+
+struct req_msg_field RMF_CONNECT_DATA =
+ DEFINE_MSGF("cdata",
+ RMF_F_NO_SIZE_CHECK /* we allow extra space for interop */,
+#if LUSTRE_VERSION_CODE > OBD_OCD_VERSION(2, 7, 50, 0)
+ sizeof(struct obd_connect_data),
+#else
+/* For interoperability with 1.8 and 2.0 clients/servers.
+ * The RPC verification code allows larger RPC buffers, but not
+ * smaller buffers. Until we no longer need to keep compatibility
+ * with older servers/clients we can only check that the buffer
+ * size is at least as large as obd_connect_data_v1. That is not
+ * not in itself harmful, since the chance of just corrupting this
+ * field is low. See JIRA LU-16 for details. */
+ sizeof(struct obd_connect_data_v1),
+#endif
+ lustre_swab_connect, NULL);
+EXPORT_SYMBOL(RMF_CONNECT_DATA);
+
+struct req_msg_field RMF_DLM_REQ =
+ DEFINE_MSGF("dlm_req", RMF_F_NO_SIZE_CHECK /* ldlm_request_bufsize */,
+ sizeof(struct ldlm_request),
+ lustre_swab_ldlm_request, NULL);
+EXPORT_SYMBOL(RMF_DLM_REQ);
+
+struct req_msg_field RMF_DLM_REP =
+ DEFINE_MSGF("dlm_rep", 0,
+ sizeof(struct ldlm_reply), lustre_swab_ldlm_reply, NULL);
+EXPORT_SYMBOL(RMF_DLM_REP);
+
+struct req_msg_field RMF_LDLM_INTENT =
+ DEFINE_MSGF("ldlm_intent", 0,
+ sizeof(struct ldlm_intent), lustre_swab_ldlm_intent, NULL);
+EXPORT_SYMBOL(RMF_LDLM_INTENT);
+
+struct req_msg_field RMF_DLM_LVB =
+ DEFINE_MSGF("dlm_lvb", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_DLM_LVB);
+
+struct req_msg_field RMF_DLM_GL_DESC =
+ DEFINE_MSGF("dlm_gl_desc", 0, sizeof(union ldlm_gl_desc),
+ lustre_swab_gl_desc, NULL);
+EXPORT_SYMBOL(RMF_DLM_GL_DESC);
+
+struct req_msg_field RMF_MDT_MD =
+ DEFINE_MSGF("mdt_md", RMF_F_NO_SIZE_CHECK, MIN_MD_SIZE, NULL, NULL);
+EXPORT_SYMBOL(RMF_MDT_MD);
+
+struct req_msg_field RMF_REC_REINT =
+ DEFINE_MSGF("rec_reint", 0, sizeof(struct mdt_rec_reint),
+ lustre_swab_mdt_rec_reint, NULL);
+EXPORT_SYMBOL(RMF_REC_REINT);
+
+/* FIXME: this length should be defined as a macro */
+struct req_msg_field RMF_EADATA = DEFINE_MSGF("eadata", 0, -1,
+ NULL, NULL);
+EXPORT_SYMBOL(RMF_EADATA);
+
+struct req_msg_field RMF_ACL =
+ DEFINE_MSGF("acl", RMF_F_NO_SIZE_CHECK,
+ LUSTRE_POSIX_ACL_MAX_SIZE, NULL, NULL);
+EXPORT_SYMBOL(RMF_ACL);
+
+/* FIXME: this should be made to use RMF_F_STRUCT_ARRAY */
+struct req_msg_field RMF_LOGCOOKIES =
+ DEFINE_MSGF("logcookies", RMF_F_NO_SIZE_CHECK /* multiple cookies */,
+ sizeof(struct llog_cookie), NULL, NULL);
+EXPORT_SYMBOL(RMF_LOGCOOKIES);
+
+struct req_msg_field RMF_CAPA1 =
+ DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa),
+ lustre_swab_lustre_capa, NULL);
+EXPORT_SYMBOL(RMF_CAPA1);
+
+struct req_msg_field RMF_CAPA2 =
+ DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa),
+ lustre_swab_lustre_capa, NULL);
+EXPORT_SYMBOL(RMF_CAPA2);
+
+struct req_msg_field RMF_LAYOUT_INTENT =
+ DEFINE_MSGF("layout_intent", 0,
+ sizeof(struct layout_intent), lustre_swab_layout_intent,
+ NULL);
+EXPORT_SYMBOL(RMF_LAYOUT_INTENT);
+
+/*
+ * OST request field.
+ */
+struct req_msg_field RMF_OST_BODY =
+ DEFINE_MSGF("ost_body", 0,
+ sizeof(struct ost_body), lustre_swab_ost_body, dump_ost_body);
+EXPORT_SYMBOL(RMF_OST_BODY);
+
+struct req_msg_field RMF_OBD_IOOBJ =
+ DEFINE_MSGF("obd_ioobj", RMF_F_STRUCT_ARRAY,
+ sizeof(struct obd_ioobj), lustre_swab_obd_ioobj, dump_ioo);
+EXPORT_SYMBOL(RMF_OBD_IOOBJ);
+
+struct req_msg_field RMF_NIOBUF_REMOTE =
+ DEFINE_MSGF("niobuf_remote", RMF_F_STRUCT_ARRAY,
+ sizeof(struct niobuf_remote), lustre_swab_niobuf_remote,
+ dump_rniobuf);
+EXPORT_SYMBOL(RMF_NIOBUF_REMOTE);
+
+struct req_msg_field RMF_RCS =
+ DEFINE_MSGF("niobuf_remote", RMF_F_STRUCT_ARRAY, sizeof(__u32),
+ lustre_swab_generic_32s, dump_rcs);
+EXPORT_SYMBOL(RMF_RCS);
+
+struct req_msg_field RMF_OBD_ID =
+ DEFINE_MSGF("obd_id", 0,
+ sizeof(obd_id), lustre_swab_ost_last_id, NULL);
+EXPORT_SYMBOL(RMF_OBD_ID);
+
+struct req_msg_field RMF_FID =
+ DEFINE_MSGF("fid", 0,
+ sizeof(struct lu_fid), lustre_swab_lu_fid, NULL);
+EXPORT_SYMBOL(RMF_FID);
+
+struct req_msg_field RMF_OST_ID =
+ DEFINE_MSGF("ost_id", 0,
+ sizeof(struct ost_id), lustre_swab_ost_id, NULL);
+EXPORT_SYMBOL(RMF_OST_ID);
+
+struct req_msg_field RMF_FIEMAP_KEY =
+ DEFINE_MSGF("fiemap", 0, sizeof(struct ll_fiemap_info_key),
+ lustre_swab_fiemap, NULL);
+EXPORT_SYMBOL(RMF_FIEMAP_KEY);
+
+struct req_msg_field RMF_FIEMAP_VAL =
+ DEFINE_MSGF("fiemap", 0, -1, lustre_swab_fiemap, NULL);
+EXPORT_SYMBOL(RMF_FIEMAP_VAL);
+
+struct req_msg_field RMF_IDX_INFO =
+ DEFINE_MSGF("idx_info", 0, sizeof(struct idx_info),
+ lustre_swab_idx_info, NULL);
+EXPORT_SYMBOL(RMF_IDX_INFO);
+struct req_msg_field RMF_HSM_USER_STATE =
+ DEFINE_MSGF("hsm_user_state", 0, sizeof(struct hsm_user_state),
+ lustre_swab_hsm_user_state, NULL);
+EXPORT_SYMBOL(RMF_HSM_USER_STATE);
+
+struct req_msg_field RMF_HSM_STATE_SET =
+ DEFINE_MSGF("hsm_state_set", 0, sizeof(struct hsm_state_set),
+ lustre_swab_hsm_state_set, NULL);
+EXPORT_SYMBOL(RMF_HSM_STATE_SET);
+
+struct req_msg_field RMF_MDS_HSM_PROGRESS =
+ DEFINE_MSGF("hsm_progress", 0, sizeof(struct hsm_progress_kernel),
+ lustre_swab_hsm_progress_kernel, NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_PROGRESS);
+
+struct req_msg_field RMF_MDS_HSM_CURRENT_ACTION =
+ DEFINE_MSGF("hsm_current_action", 0, sizeof(struct hsm_current_action),
+ lustre_swab_hsm_current_action, NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_CURRENT_ACTION);
+
+struct req_msg_field RMF_MDS_HSM_USER_ITEM =
+ DEFINE_MSGF("hsm_user_item", RMF_F_STRUCT_ARRAY,
+ sizeof(struct hsm_user_item), lustre_swab_hsm_user_item,
+ NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_USER_ITEM);
+
+struct req_msg_field RMF_MDS_HSM_ARCHIVE =
+ DEFINE_MSGF("hsm_archive", 0,
+ sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_ARCHIVE);
+
+struct req_msg_field RMF_MDS_HSM_REQUEST =
+ DEFINE_MSGF("hsm_request", 0, sizeof(struct hsm_request),
+ lustre_swab_hsm_request, NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_REQUEST);
+
+struct req_msg_field RMF_UPDATE = DEFINE_MSGF("update", 0, -1,
+ lustre_swab_update_buf, NULL);
+EXPORT_SYMBOL(RMF_UPDATE);
+
+struct req_msg_field RMF_UPDATE_REPLY = DEFINE_MSGF("update_reply", 0, -1,
+ lustre_swab_update_reply_buf,
+ NULL);
+EXPORT_SYMBOL(RMF_UPDATE_REPLY);
+
+struct req_msg_field RMF_SWAP_LAYOUTS =
+ DEFINE_MSGF("swap_layouts", 0, sizeof(struct mdc_swap_layouts),
+ lustre_swab_swap_layouts, NULL);
+EXPORT_SYMBOL(RMF_SWAP_LAYOUTS);
+/*
+ * Request formats.
+ */
+
+struct req_format {
+ const char *rf_name;
+ int rf_idx;
+ struct {
+ int nr;
+ const struct req_msg_field **d;
+ } rf_fields[RCL_NR];
+};
+
+#define DEFINE_REQ_FMT(name, client, client_nr, server, server_nr) { \
+ .rf_name = name, \
+ .rf_fields = { \
+ [RCL_CLIENT] = { \
+ .nr = client_nr, \
+ .d = client \
+ }, \
+ [RCL_SERVER] = { \
+ .nr = server_nr, \
+ .d = server \
+ } \
+ } \
+}
+
+#define DEFINE_REQ_FMT0(name, client, server) \
+DEFINE_REQ_FMT(name, client, ARRAY_SIZE(client), server, ARRAY_SIZE(server))
+
+struct req_format RQF_OBD_PING =
+ DEFINE_REQ_FMT0("OBD_PING", empty, empty);
+EXPORT_SYMBOL(RQF_OBD_PING);
+
+struct req_format RQF_OBD_SET_INFO =
+ DEFINE_REQ_FMT0("OBD_SET_INFO", obd_set_info_client, empty);
+EXPORT_SYMBOL(RQF_OBD_SET_INFO);
+
+/* Read index file through the network */
+struct req_format RQF_OBD_IDX_READ =
+ DEFINE_REQ_FMT0("OBD_IDX_READ",
+ obd_idx_read_client, obd_idx_read_server);
+EXPORT_SYMBOL(RQF_OBD_IDX_READ);
+
+struct req_format RQF_SEC_CTX =
+ DEFINE_REQ_FMT0("SEC_CTX", empty, empty);
+EXPORT_SYMBOL(RQF_SEC_CTX);
+
+struct req_format RQF_MGS_TARGET_REG =
+ DEFINE_REQ_FMT0("MGS_TARGET_REG", mgs_target_info_only,
+ mgs_target_info_only);
+EXPORT_SYMBOL(RQF_MGS_TARGET_REG);
+
+struct req_format RQF_MGS_SET_INFO =
+ DEFINE_REQ_FMT0("MGS_SET_INFO", mgs_set_info,
+ mgs_set_info);
+EXPORT_SYMBOL(RQF_MGS_SET_INFO);
+
+struct req_format RQF_MGS_CONFIG_READ =
+ DEFINE_REQ_FMT0("MGS_CONFIG_READ", mgs_config_read_client,
+ mgs_config_read_server);
+EXPORT_SYMBOL(RQF_MGS_CONFIG_READ);
+
+struct req_format RQF_SEQ_QUERY =
+ DEFINE_REQ_FMT0("SEQ_QUERY", seq_query_client, seq_query_server);
+EXPORT_SYMBOL(RQF_SEQ_QUERY);
+
+struct req_format RQF_FLD_QUERY =
+ DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server);
+EXPORT_SYMBOL(RQF_FLD_QUERY);
+
+struct req_format RQF_LOG_CANCEL =
+ DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty);
+EXPORT_SYMBOL(RQF_LOG_CANCEL);
+
+struct req_format RQF_MDS_QUOTACHECK =
+ DEFINE_REQ_FMT0("MDS_QUOTACHECK", quotactl_only, empty);
+EXPORT_SYMBOL(RQF_MDS_QUOTACHECK);
+
+struct req_format RQF_OST_QUOTACHECK =
+ DEFINE_REQ_FMT0("OST_QUOTACHECK", quotactl_only, empty);
+EXPORT_SYMBOL(RQF_OST_QUOTACHECK);
+
+struct req_format RQF_MDS_QUOTACTL =
+ DEFINE_REQ_FMT0("MDS_QUOTACTL", quotactl_only, quotactl_only);
+EXPORT_SYMBOL(RQF_MDS_QUOTACTL);
+
+struct req_format RQF_OST_QUOTACTL =
+ DEFINE_REQ_FMT0("OST_QUOTACTL", quotactl_only, quotactl_only);
+EXPORT_SYMBOL(RQF_OST_QUOTACTL);
+
+struct req_format RQF_QC_CALLBACK =
+ DEFINE_REQ_FMT0("QC_CALLBACK", quotactl_only, empty);
+EXPORT_SYMBOL(RQF_QC_CALLBACK);
+
+struct req_format RQF_QUOTA_DQACQ =
+ DEFINE_REQ_FMT0("QUOTA_DQACQ", quota_body_only, quota_body_only);
+EXPORT_SYMBOL(RQF_QUOTA_DQACQ);
+
+struct req_format RQF_LDLM_INTENT_QUOTA =
+ DEFINE_REQ_FMT0("LDLM_INTENT_QUOTA",
+ ldlm_intent_quota_client,
+ ldlm_intent_quota_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_QUOTA);
+
+struct req_format RQF_MDS_GETSTATUS =
+ DEFINE_REQ_FMT0("MDS_GETSTATUS", mdt_body_only, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_GETSTATUS);
+
+struct req_format RQF_MDS_STATFS =
+ DEFINE_REQ_FMT0("MDS_STATFS", empty, obd_statfs_server);
+EXPORT_SYMBOL(RQF_MDS_STATFS);
+
+struct req_format RQF_MDS_SYNC =
+ DEFINE_REQ_FMT0("MDS_SYNC", mdt_body_capa, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_SYNC);
+
+struct req_format RQF_MDS_GETATTR =
+ DEFINE_REQ_FMT0("MDS_GETATTR", mdt_body_capa, mds_getattr_server);
+EXPORT_SYMBOL(RQF_MDS_GETATTR);
+
+struct req_format RQF_MDS_GETXATTR =
+ DEFINE_REQ_FMT0("MDS_GETXATTR",
+ mds_getxattr_client, mds_getxattr_server);
+EXPORT_SYMBOL(RQF_MDS_GETXATTR);
+
+struct req_format RQF_MDS_GETATTR_NAME =
+ DEFINE_REQ_FMT0("MDS_GETATTR_NAME",
+ mds_getattr_name_client, mds_getattr_server);
+EXPORT_SYMBOL(RQF_MDS_GETATTR_NAME);
+
+struct req_format RQF_MDS_REINT =
+ DEFINE_REQ_FMT0("MDS_REINT", mds_reint_client, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_REINT);
+
+struct req_format RQF_MDS_REINT_CREATE =
+ DEFINE_REQ_FMT0("MDS_REINT_CREATE",
+ mds_reint_create_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE);
+
+struct req_format RQF_MDS_REINT_CREATE_RMT_ACL =
+ DEFINE_REQ_FMT0("MDS_REINT_CREATE_RMT_ACL",
+ mds_reint_create_rmt_acl_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_RMT_ACL);
+
+struct req_format RQF_MDS_REINT_CREATE_SLAVE =
+ DEFINE_REQ_FMT0("MDS_REINT_CREATE_EA",
+ mds_reint_create_slave_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SLAVE);
+
+struct req_format RQF_MDS_REINT_CREATE_SYM =
+ DEFINE_REQ_FMT0("MDS_REINT_CREATE_SYM",
+ mds_reint_create_sym_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SYM);
+
+struct req_format RQF_MDS_REINT_OPEN =
+ DEFINE_REQ_FMT0("MDS_REINT_OPEN",
+ mds_reint_open_client, mds_reint_open_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_OPEN);
+
+struct req_format RQF_MDS_REINT_UNLINK =
+ DEFINE_REQ_FMT0("MDS_REINT_UNLINK", mds_reint_unlink_client,
+ mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_UNLINK);
+
+struct req_format RQF_MDS_REINT_LINK =
+ DEFINE_REQ_FMT0("MDS_REINT_LINK",
+ mds_reint_link_client, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_REINT_LINK);
+
+struct req_format RQF_MDS_REINT_RENAME =
+ DEFINE_REQ_FMT0("MDS_REINT_RENAME", mds_reint_rename_client,
+ mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_RENAME);
+
+struct req_format RQF_MDS_REINT_SETATTR =
+ DEFINE_REQ_FMT0("MDS_REINT_SETATTR",
+ mds_reint_setattr_client, mds_setattr_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_SETATTR);
+
+struct req_format RQF_MDS_REINT_SETXATTR =
+ DEFINE_REQ_FMT0("MDS_REINT_SETXATTR",
+ mds_reint_setxattr_client, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_REINT_SETXATTR);
+
+struct req_format RQF_MDS_CONNECT =
+ DEFINE_REQ_FMT0("MDS_CONNECT",
+ obd_connect_client, obd_connect_server);
+EXPORT_SYMBOL(RQF_MDS_CONNECT);
+
+struct req_format RQF_MDS_DISCONNECT =
+ DEFINE_REQ_FMT0("MDS_DISCONNECT", empty, empty);
+EXPORT_SYMBOL(RQF_MDS_DISCONNECT);
+
+struct req_format RQF_MDS_GET_INFO =
+ DEFINE_REQ_FMT0("MDS_GET_INFO", mds_getinfo_client,
+ mds_getinfo_server);
+EXPORT_SYMBOL(RQF_MDS_GET_INFO);
+
+struct req_format RQF_UPDATE_OBJ =
+ DEFINE_REQ_FMT0("OBJECT_UPDATE_OBJ", mds_update_client,
+ mds_update_server);
+EXPORT_SYMBOL(RQF_UPDATE_OBJ);
+
+struct req_format RQF_LDLM_ENQUEUE =
+ DEFINE_REQ_FMT0("LDLM_ENQUEUE",
+ ldlm_enqueue_client, ldlm_enqueue_lvb_server);
+EXPORT_SYMBOL(RQF_LDLM_ENQUEUE);
+
+struct req_format RQF_LDLM_ENQUEUE_LVB =
+ DEFINE_REQ_FMT0("LDLM_ENQUEUE_LVB",
+ ldlm_enqueue_client, ldlm_enqueue_lvb_server);
+EXPORT_SYMBOL(RQF_LDLM_ENQUEUE_LVB);
+
+struct req_format RQF_LDLM_CONVERT =
+ DEFINE_REQ_FMT0("LDLM_CONVERT",
+ ldlm_enqueue_client, ldlm_enqueue_server);
+EXPORT_SYMBOL(RQF_LDLM_CONVERT);
+
+struct req_format RQF_LDLM_CANCEL =
+ DEFINE_REQ_FMT0("LDLM_CANCEL", ldlm_enqueue_client, empty);
+EXPORT_SYMBOL(RQF_LDLM_CANCEL);
+
+struct req_format RQF_LDLM_CALLBACK =
+ DEFINE_REQ_FMT0("LDLM_CALLBACK", ldlm_enqueue_client, empty);
+EXPORT_SYMBOL(RQF_LDLM_CALLBACK);
+
+struct req_format RQF_LDLM_CP_CALLBACK =
+ DEFINE_REQ_FMT0("LDLM_CP_CALLBACK", ldlm_cp_callback_client, empty);
+EXPORT_SYMBOL(RQF_LDLM_CP_CALLBACK);
+
+struct req_format RQF_LDLM_BL_CALLBACK =
+ DEFINE_REQ_FMT0("LDLM_BL_CALLBACK", ldlm_enqueue_client, empty);
+EXPORT_SYMBOL(RQF_LDLM_BL_CALLBACK);
+
+struct req_format RQF_LDLM_GL_CALLBACK =
+ DEFINE_REQ_FMT0("LDLM_GL_CALLBACK", ldlm_enqueue_client,
+ ldlm_gl_callback_server);
+EXPORT_SYMBOL(RQF_LDLM_GL_CALLBACK);
+
+struct req_format RQF_LDLM_GL_DESC_CALLBACK =
+ DEFINE_REQ_FMT0("LDLM_GL_CALLBACK", ldlm_gl_callback_desc_client,
+ ldlm_gl_callback_server);
+EXPORT_SYMBOL(RQF_LDLM_GL_DESC_CALLBACK);
+
+struct req_format RQF_LDLM_INTENT_BASIC =
+ DEFINE_REQ_FMT0("LDLM_INTENT_BASIC",
+ ldlm_intent_basic_client, ldlm_enqueue_lvb_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_BASIC);
+
+struct req_format RQF_LDLM_INTENT =
+ DEFINE_REQ_FMT0("LDLM_INTENT",
+ ldlm_intent_client, ldlm_intent_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT);
+
+struct req_format RQF_LDLM_INTENT_LAYOUT =
+ DEFINE_REQ_FMT0("LDLM_INTENT_LAYOUT ",
+ ldlm_intent_layout_client, ldlm_enqueue_lvb_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_LAYOUT);
+
+struct req_format RQF_LDLM_INTENT_GETATTR =
+ DEFINE_REQ_FMT0("LDLM_INTENT_GETATTR",
+ ldlm_intent_getattr_client, ldlm_intent_getattr_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_GETATTR);
+
+struct req_format RQF_LDLM_INTENT_OPEN =
+ DEFINE_REQ_FMT0("LDLM_INTENT_OPEN",
+ ldlm_intent_open_client, ldlm_intent_open_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_OPEN);
+
+struct req_format RQF_LDLM_INTENT_CREATE =
+ DEFINE_REQ_FMT0("LDLM_INTENT_CREATE",
+ ldlm_intent_create_client, ldlm_intent_getattr_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_CREATE);
+
+struct req_format RQF_LDLM_INTENT_UNLINK =
+ DEFINE_REQ_FMT0("LDLM_INTENT_UNLINK",
+ ldlm_intent_unlink_client, ldlm_intent_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_UNLINK);
+
+struct req_format RQF_MDS_CLOSE =
+ DEFINE_REQ_FMT0("MDS_CLOSE",
+ mdt_close_client, mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_CLOSE);
+
+struct req_format RQF_MDS_PIN =
+ DEFINE_REQ_FMT0("MDS_PIN",
+ mdt_body_capa, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_PIN);
+
+struct req_format RQF_MDS_UNPIN =
+ DEFINE_REQ_FMT0("MDS_UNPIN", mdt_body_only, empty);
+EXPORT_SYMBOL(RQF_MDS_UNPIN);
+
+struct req_format RQF_MDS_DONE_WRITING =
+ DEFINE_REQ_FMT0("MDS_DONE_WRITING",
+ mdt_close_client, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_DONE_WRITING);
+
+struct req_format RQF_MDS_READPAGE =
+ DEFINE_REQ_FMT0("MDS_READPAGE",
+ mdt_body_capa, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_READPAGE);
+
+struct req_format RQF_MDS_HSM_ACTION =
+ DEFINE_REQ_FMT0("MDS_HSM_ACTION", mdt_body_capa, mdt_hsm_action_server);
+EXPORT_SYMBOL(RQF_MDS_HSM_ACTION);
+
+struct req_format RQF_MDS_HSM_PROGRESS =
+ DEFINE_REQ_FMT0("MDS_HSM_PROGRESS", mdt_hsm_progress, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_PROGRESS);
+
+struct req_format RQF_MDS_HSM_CT_REGISTER =
+ DEFINE_REQ_FMT0("MDS_HSM_CT_REGISTER", mdt_hsm_ct_register, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_CT_REGISTER);
+
+struct req_format RQF_MDS_HSM_CT_UNREGISTER =
+ DEFINE_REQ_FMT0("MDS_HSM_CT_UNREGISTER", mdt_hsm_ct_unregister, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_CT_UNREGISTER);
+
+struct req_format RQF_MDS_HSM_STATE_GET =
+ DEFINE_REQ_FMT0("MDS_HSM_STATE_GET",
+ mdt_body_capa, mdt_hsm_state_get_server);
+EXPORT_SYMBOL(RQF_MDS_HSM_STATE_GET);
+
+struct req_format RQF_MDS_HSM_STATE_SET =
+ DEFINE_REQ_FMT0("MDS_HSM_STATE_SET", mdt_hsm_state_set, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_STATE_SET);
+
+struct req_format RQF_MDS_HSM_REQUEST =
+ DEFINE_REQ_FMT0("MDS_HSM_REQUEST", mdt_hsm_request, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_REQUEST);
+
+struct req_format RQF_MDS_SWAP_LAYOUTS =
+ DEFINE_REQ_FMT0("MDS_SWAP_LAYOUTS",
+ mdt_swap_layouts, empty);
+EXPORT_SYMBOL(RQF_MDS_SWAP_LAYOUTS);
+
+/* This is for split */
+struct req_format RQF_MDS_WRITEPAGE =
+ DEFINE_REQ_FMT0("MDS_WRITEPAGE",
+ mdt_body_capa, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_WRITEPAGE);
+
+struct req_format RQF_MDS_IS_SUBDIR =
+ DEFINE_REQ_FMT0("MDS_IS_SUBDIR",
+ mdt_body_only, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_IS_SUBDIR);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_CREATE =
+ DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_CREATE",
+ llog_origin_handle_create_client, llogd_body_only);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_CREATE);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_DESTROY =
+ DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_DESTROY",
+ llogd_body_only, llogd_body_only);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_DESTROY);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK =
+ DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_NEXT_BLOCK",
+ llogd_body_only, llog_origin_handle_next_block_server);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK =
+ DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_PREV_BLOCK",
+ llogd_body_only, llog_origin_handle_next_block_server);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_READ_HEADER =
+ DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_READ_HEADER",
+ llogd_body_only, llog_log_hdr_only);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_READ_HEADER);
+
+struct req_format RQF_LLOG_ORIGIN_CONNECT =
+ DEFINE_REQ_FMT0("LLOG_ORIGIN_CONNECT", llogd_conn_body_only, empty);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_CONNECT);
+
+struct req_format RQF_OST_CONNECT =
+ DEFINE_REQ_FMT0("OST_CONNECT",
+ obd_connect_client, obd_connect_server);
+EXPORT_SYMBOL(RQF_OST_CONNECT);
+
+struct req_format RQF_OST_DISCONNECT =
+ DEFINE_REQ_FMT0("OST_DISCONNECT", empty, empty);
+EXPORT_SYMBOL(RQF_OST_DISCONNECT);
+
+struct req_format RQF_OST_GETATTR =
+ DEFINE_REQ_FMT0("OST_GETATTR", ost_body_capa, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_GETATTR);
+
+struct req_format RQF_OST_SETATTR =
+ DEFINE_REQ_FMT0("OST_SETATTR", ost_body_capa, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_SETATTR);
+
+struct req_format RQF_OST_CREATE =
+ DEFINE_REQ_FMT0("OST_CREATE", ost_body_only, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_CREATE);
+
+struct req_format RQF_OST_PUNCH =
+ DEFINE_REQ_FMT0("OST_PUNCH", ost_body_capa, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_PUNCH);
+
+struct req_format RQF_OST_SYNC =
+ DEFINE_REQ_FMT0("OST_SYNC", ost_body_capa, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_SYNC);
+
+struct req_format RQF_OST_DESTROY =
+ DEFINE_REQ_FMT0("OST_DESTROY", ost_destroy_client, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_DESTROY);
+
+struct req_format RQF_OST_BRW_READ =
+ DEFINE_REQ_FMT0("OST_BRW_READ", ost_brw_client, ost_brw_read_server);
+EXPORT_SYMBOL(RQF_OST_BRW_READ);
+
+struct req_format RQF_OST_BRW_WRITE =
+ DEFINE_REQ_FMT0("OST_BRW_WRITE", ost_brw_client, ost_brw_write_server);
+EXPORT_SYMBOL(RQF_OST_BRW_WRITE);
+
+struct req_format RQF_OST_STATFS =
+ DEFINE_REQ_FMT0("OST_STATFS", empty, obd_statfs_server);
+EXPORT_SYMBOL(RQF_OST_STATFS);
+
+struct req_format RQF_OST_SET_GRANT_INFO =
+ DEFINE_REQ_FMT0("OST_SET_GRANT_INFO", ost_grant_shrink_client,
+ ost_body_only);
+EXPORT_SYMBOL(RQF_OST_SET_GRANT_INFO);
+
+struct req_format RQF_OST_GET_INFO_GENERIC =
+ DEFINE_REQ_FMT0("OST_GET_INFO", ost_get_info_generic_client,
+ ost_get_info_generic_server);
+EXPORT_SYMBOL(RQF_OST_GET_INFO_GENERIC);
+
+struct req_format RQF_OST_GET_INFO_LAST_ID =
+ DEFINE_REQ_FMT0("OST_GET_INFO_LAST_ID", ost_get_info_generic_client,
+ ost_get_last_id_server);
+EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_ID);
+
+struct req_format RQF_OST_GET_INFO_LAST_FID =
+ DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", obd_set_info_client,
+ ost_get_last_fid_server);
+EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_FID);
+
+struct req_format RQF_OST_SET_INFO_LAST_FID =
+ DEFINE_REQ_FMT0("OST_SET_INFO_LAST_FID", obd_set_info_client,
+ empty);
+EXPORT_SYMBOL(RQF_OST_SET_INFO_LAST_FID);
+
+struct req_format RQF_OST_GET_INFO_FIEMAP =
+ DEFINE_REQ_FMT0("OST_GET_INFO_FIEMAP", ost_get_fiemap_client,
+ ost_get_fiemap_server);
+EXPORT_SYMBOL(RQF_OST_GET_INFO_FIEMAP);
+
+#if !defined(__REQ_LAYOUT_USER__)
+
+/* Convenience macro */
+#define FMT_FIELD(fmt, i, j) (fmt)->rf_fields[(i)].d[(j)]
+
+/**
+ * Initializes the capsule abstraction by computing and setting the \a rf_idx
+ * field of RQFs and the \a rmf_offset field of RMFs.
+ */
+int req_layout_init(void)
+{
+ int i;
+ int j;
+ int k;
+ struct req_format *rf = NULL;
+
+ for (i = 0; i < ARRAY_SIZE(req_formats); ++i) {
+ rf = req_formats[i];
+ rf->rf_idx = i;
+ for (j = 0; j < RCL_NR; ++j) {
+ LASSERT(rf->rf_fields[j].nr <= REQ_MAX_FIELD_NR);
+ for (k = 0; k < rf->rf_fields[j].nr; ++k) {
+ struct req_msg_field *field;
+
+ field = (typeof(field))rf->rf_fields[j].d[k];
+ LASSERT(!(field->rmf_flags & RMF_F_STRUCT_ARRAY)
+ || field->rmf_size > 0);
+ LASSERT(field->rmf_offset[i][j] == 0);
+ /*
+ * k + 1 to detect unused format/field
+ * combinations.
+ */
+ field->rmf_offset[i][j] = k + 1;
+ }
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL(req_layout_init);
+
+void req_layout_fini(void)
+{
+}
+EXPORT_SYMBOL(req_layout_fini);
+
+/**
+ * Initializes the expected sizes of each RMF in a \a pill (\a rc_area) to -1.
+ *
+ * Actual/expected field sizes are set elsewhere in functions in this file:
+ * req_capsule_init(), req_capsule_server_pack(), req_capsule_set_size() and
+ * req_capsule_msg_size(). The \a rc_area information is used by.
+ * ptlrpc_request_set_replen().
+ */
+void req_capsule_init_area(struct req_capsule *pill)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(pill->rc_area[RCL_CLIENT]); i++) {
+ pill->rc_area[RCL_CLIENT][i] = -1;
+ pill->rc_area[RCL_SERVER][i] = -1;
+ }
+}
+EXPORT_SYMBOL(req_capsule_init_area);
+
+/**
+ * Initialize a pill.
+ *
+ * The \a location indicates whether the caller is executing on the client side
+ * (RCL_CLIENT) or server side (RCL_SERVER)..
+ */
+void req_capsule_init(struct req_capsule *pill,
+ struct ptlrpc_request *req,
+ enum req_location location)
+{
+ LASSERT(location == RCL_SERVER || location == RCL_CLIENT);
+
+ /*
+ * Today all capsules are embedded in ptlrpc_request structs,
+ * but just in case that ever isn't the case, we don't reach
+ * into req unless req != NULL and pill is the one embedded in
+ * the req.
+ *
+ * The req->rq_pill_init flag makes it safe to initialize a pill
+ * twice, which might happen in the OST paths as a result of the
+ * high-priority RPC queue getting peeked at before ost_handle()
+ * handles an OST RPC.
+ */
+ if (req != NULL && pill == &req->rq_pill && req->rq_pill_init)
+ return;
+
+ memset(pill, 0, sizeof *pill);
+ pill->rc_req = req;
+ pill->rc_loc = location;
+ req_capsule_init_area(pill);
+
+ if (req != NULL && pill == &req->rq_pill)
+ req->rq_pill_init = 1;
+}
+EXPORT_SYMBOL(req_capsule_init);
+
+void req_capsule_fini(struct req_capsule *pill)
+{
+}
+EXPORT_SYMBOL(req_capsule_fini);
+
+static int __req_format_is_sane(const struct req_format *fmt)
+{
+ return
+ 0 <= fmt->rf_idx && fmt->rf_idx < ARRAY_SIZE(req_formats) &&
+ req_formats[fmt->rf_idx] == fmt;
+}
+
+static struct lustre_msg *__req_msg(const struct req_capsule *pill,
+ enum req_location loc)
+{
+ struct ptlrpc_request *req;
+
+ req = pill->rc_req;
+ return loc == RCL_CLIENT ? req->rq_reqmsg : req->rq_repmsg;
+}
+
+/**
+ * Set the format (\a fmt) of a \a pill; format changes are not allowed here
+ * (see req_capsule_extend()).
+ */
+void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt)
+{
+ LASSERT(pill->rc_fmt == NULL || pill->rc_fmt == fmt);
+ LASSERT(__req_format_is_sane(fmt));
+
+ pill->rc_fmt = fmt;
+}
+EXPORT_SYMBOL(req_capsule_set);
+
+/**
+ * Fills in any parts of the \a rc_area of a \a pill that haven't been filled in
+ * yet.
+
+ * \a rc_area is an array of REQ_MAX_FIELD_NR elements, used to store sizes of
+ * variable-sized fields. The field sizes come from the declared \a rmf_size
+ * field of a \a pill's \a rc_fmt's RMF's.
+ */
+int req_capsule_filled_sizes(struct req_capsule *pill,
+ enum req_location loc)
+{
+ const struct req_format *fmt = pill->rc_fmt;
+ int i;
+
+ LASSERT(fmt != NULL);
+
+ for (i = 0; i < fmt->rf_fields[loc].nr; ++i) {
+ if (pill->rc_area[loc][i] == -1) {
+ pill->rc_area[loc][i] =
+ fmt->rf_fields[loc].d[i]->rmf_size;
+ if (pill->rc_area[loc][i] == -1) {
+ /*
+ * Skip the following fields.
+ *
+ * If this LASSERT() trips then you're missing a
+ * call to req_capsule_set_size().
+ */
+ LASSERT(loc != RCL_SERVER);
+ break;
+ }
+ }
+ }
+ return i;
+}
+EXPORT_SYMBOL(req_capsule_filled_sizes);
+
+/**
+ * Capsule equivalent of lustre_pack_request() and lustre_pack_reply().
+ *
+ * This function uses the \a pill's \a rc_area as filled in by
+ * req_capsule_set_size() or req_capsule_filled_sizes() (the latter is called by
+ * this function).
+ */
+int req_capsule_server_pack(struct req_capsule *pill)
+{
+ const struct req_format *fmt;
+ int count;
+ int rc;
+
+ LASSERT(pill->rc_loc == RCL_SERVER);
+ fmt = pill->rc_fmt;
+ LASSERT(fmt != NULL);
+
+ count = req_capsule_filled_sizes(pill, RCL_SERVER);
+ rc = lustre_pack_reply(pill->rc_req, count,
+ pill->rc_area[RCL_SERVER], NULL);
+ if (rc != 0) {
+ DEBUG_REQ(D_ERROR, pill->rc_req,
+ "Cannot pack %d fields in format `%s': ",
+ count, fmt->rf_name);
+ }
+ return rc;
+}
+EXPORT_SYMBOL(req_capsule_server_pack);
+
+/**
+ * Returns the PTLRPC request or reply (\a loc) buffer offset of a \a pill
+ * corresponding to the given RMF (\a field).
+ */
+static int __req_capsule_offset(const struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc)
+{
+ int offset;
+
+ offset = field->rmf_offset[pill->rc_fmt->rf_idx][loc];
+ LASSERTF(offset > 0, "%s:%s, off=%d, loc=%d\n",
+ pill->rc_fmt->rf_name,
+ field->rmf_name, offset, loc);
+ offset --;
+
+ LASSERT(0 <= offset && offset < REQ_MAX_FIELD_NR);
+ return offset;
+}
+
+/**
+ * Helper for __req_capsule_get(); swabs value / array of values and/or dumps
+ * them if desired.
+ */
+static
+void
+swabber_dumper_helper(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc,
+ int offset,
+ void *value, int len, int dump, void (*swabber)( void *))
+{
+ void *p;
+ int i;
+ int n;
+ int do_swab;
+ int inout = loc == RCL_CLIENT;
+
+ swabber = swabber ?: field->rmf_swabber;
+
+ if (ptlrpc_buf_need_swab(pill->rc_req, inout, offset) &&
+ swabber != NULL && value != NULL)
+ do_swab = 1;
+ else
+ do_swab = 0;
+
+ if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY)) {
+ if (dump && field->rmf_dumper) {
+ CDEBUG(D_RPCTRACE, "Dump of %sfield %s follows\n",
+ do_swab ? "unswabbed " : "", field->rmf_name);
+ field->rmf_dumper(value);
+ }
+ if (!do_swab)
+ return;
+ swabber(value);
+ ptlrpc_buf_set_swabbed(pill->rc_req, inout, offset);
+ if (dump) {
+ CDEBUG(D_RPCTRACE, "Dump of swabbed field %s "
+ "follows\n", field->rmf_name);
+ field->rmf_dumper(value);
+ }
+
+ return;
+ }
+
+ /*
+ * We're swabbing an array; swabber() swabs a single array element, so
+ * swab every element.
+ */
+ LASSERT((len % field->rmf_size) == 0);
+ for (p = value, i = 0, n = len / field->rmf_size;
+ i < n;
+ i++, p += field->rmf_size) {
+ if (dump && field->rmf_dumper) {
+ CDEBUG(D_RPCTRACE, "Dump of %sarray field %s, "
+ "element %d follows\n",
+ do_swab ? "unswabbed " : "", field->rmf_name, i);
+ field->rmf_dumper(p);
+ }
+ if (!do_swab)
+ continue;
+ swabber(p);
+ if (dump && field->rmf_dumper) {
+ CDEBUG(D_RPCTRACE, "Dump of swabbed array field %s, "
+ "element %d follows\n", field->rmf_name, i);
+ field->rmf_dumper(value);
+ }
+ }
+ if (do_swab)
+ ptlrpc_buf_set_swabbed(pill->rc_req, inout, offset);
+}
+
+/**
+ * Returns the pointer to a PTLRPC request or reply (\a loc) buffer of a \a pill
+ * corresponding to the given RMF (\a field).
+ *
+ * The buffer will be swabbed using the given \a swabber. If \a swabber == NULL
+ * then the \a rmf_swabber from the RMF will be used. Soon there will be no
+ * calls to __req_capsule_get() with a non-NULL \a swabber; \a swabber will then
+ * be removed. Fields with the \a RMF_F_STRUCT_ARRAY flag set will have each
+ * element of the array swabbed.
+ */
+static void *__req_capsule_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc,
+ void (*swabber)( void *),
+ int dump)
+{
+ const struct req_format *fmt;
+ struct lustre_msg *msg;
+ void *value;
+ int len;
+ int offset;
+
+ void *(*getter)(struct lustre_msg *m, int n, int minlen);
+
+ static const char *rcl_names[RCL_NR] = {
+ [RCL_CLIENT] = "client",
+ [RCL_SERVER] = "server"
+ };
+
+ LASSERT(pill != NULL);
+ LASSERT(pill != LP_POISON);
+ fmt = pill->rc_fmt;
+ LASSERT(fmt != NULL);
+ LASSERT(fmt != LP_POISON);
+ LASSERT(__req_format_is_sane(fmt));
+
+ offset = __req_capsule_offset(pill, field, loc);
+
+ msg = __req_msg(pill, loc);
+ LASSERT(msg != NULL);
+
+ getter = (field->rmf_flags & RMF_F_STRING) ?
+ (typeof(getter))lustre_msg_string : lustre_msg_buf;
+
+ if (field->rmf_flags & RMF_F_STRUCT_ARRAY) {
+ /*
+ * We've already asserted that field->rmf_size > 0 in
+ * req_layout_init().
+ */
+ len = lustre_msg_buflen(msg, offset);
+ if ((len % field->rmf_size) != 0) {
+ CERROR("%s: array field size mismatch "
+ "%d modulo %d != 0 (%d)\n",
+ field->rmf_name, len, field->rmf_size, loc);
+ return NULL;
+ }
+ } else if (pill->rc_area[loc][offset] != -1) {
+ len = pill->rc_area[loc][offset];
+ } else {
+ len = max(field->rmf_size, 0);
+ }
+ value = getter(msg, offset, len);
+
+ if (value == NULL) {
+ DEBUG_REQ(D_ERROR, pill->rc_req,
+ "Wrong buffer for field `%s' (%d of %d) "
+ "in format `%s': %d vs. %d (%s)\n",
+ field->rmf_name, offset, lustre_msg_bufcount(msg),
+ fmt->rf_name, lustre_msg_buflen(msg, offset), len,
+ rcl_names[loc]);
+ } else {
+ swabber_dumper_helper(pill, field, loc, offset, value, len,
+ dump, swabber);
+ }
+
+ return value;
+}
+
+/**
+ * Dump a request and/or reply
+ */
+void __req_capsule_dump(struct req_capsule *pill, enum req_location loc)
+{
+ const struct req_format *fmt;
+ const struct req_msg_field *field;
+ int len;
+ int i;
+
+ fmt = pill->rc_fmt;
+
+ DEBUG_REQ(D_RPCTRACE, pill->rc_req, "BEGIN REQ CAPSULE DUMP\n");
+ for (i = 0; i < fmt->rf_fields[loc].nr; ++i) {
+ field = FMT_FIELD(fmt, loc, i);
+ if (field->rmf_dumper == NULL) {
+ /*
+ * FIXME Add a default hex dumper for fields that don't
+ * have a specific dumper
+ */
+ len = req_capsule_get_size(pill, field, loc);
+ CDEBUG(D_RPCTRACE, "Field %s has no dumper function;"
+ "field size is %d\n", field->rmf_name, len);
+ } else {
+ /* It's the dumping side-effect that we're interested in */
+ (void) __req_capsule_get(pill, field, loc, NULL, 1);
+ }
+ }
+ CDEBUG(D_RPCTRACE, "END REQ CAPSULE DUMP\n");
+}
+
+/**
+ * Dump a request.
+ */
+void req_capsule_client_dump(struct req_capsule *pill)
+{
+ __req_capsule_dump(pill, RCL_CLIENT);
+}
+EXPORT_SYMBOL(req_capsule_client_dump);
+
+/**
+ * Dump a reply
+ */
+void req_capsule_server_dump(struct req_capsule *pill)
+{
+ __req_capsule_dump(pill, RCL_SERVER);
+}
+EXPORT_SYMBOL(req_capsule_server_dump);
+
+/**
+ * Trivial wrapper around __req_capsule_get(), that returns the PTLRPC request
+ * buffer corresponding to the given RMF (\a field) of a \a pill.
+ */
+void *req_capsule_client_get(struct req_capsule *pill,
+ const struct req_msg_field *field)
+{
+ return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_client_get);
+
+/**
+ * Same as req_capsule_client_get(), but with a \a swabber argument.
+ *
+ * Currently unused; will be removed when req_capsule_server_swab_get() is
+ * unused too.
+ */
+void *req_capsule_client_swab_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ void *swabber)
+{
+ return __req_capsule_get(pill, field, RCL_CLIENT, swabber, 0);
+}
+EXPORT_SYMBOL(req_capsule_client_swab_get);
+
+/**
+ * Utility that combines req_capsule_set_size() and req_capsule_client_get().
+ *
+ * First the \a pill's request \a field's size is set (\a rc_area) using
+ * req_capsule_set_size() with the given \a len. Then the actual buffer is
+ * returned.
+ */
+void *req_capsule_client_sized_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ int len)
+{
+ req_capsule_set_size(pill, field, RCL_CLIENT, len);
+ return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_client_sized_get);
+
+/**
+ * Trivial wrapper around __req_capsule_get(), that returns the PTLRPC reply
+ * buffer corresponding to the given RMF (\a field) of a \a pill.
+ */
+void *req_capsule_server_get(struct req_capsule *pill,
+ const struct req_msg_field *field)
+{
+ return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_server_get);
+
+/**
+ * Same as req_capsule_server_get(), but with a \a swabber argument.
+ *
+ * Ideally all swabbing should be done pursuant to RMF definitions, with no
+ * swabbing done outside this capsule abstraction.
+ */
+void *req_capsule_server_swab_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ void *swabber)
+{
+ return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0);
+}
+EXPORT_SYMBOL(req_capsule_server_swab_get);
+
+/**
+ * Utility that combines req_capsule_set_size() and req_capsule_server_get().
+ *
+ * First the \a pill's request \a field's size is set (\a rc_area) using
+ * req_capsule_set_size() with the given \a len. Then the actual buffer is
+ * returned.
+ */
+void *req_capsule_server_sized_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ int len)
+{
+ req_capsule_set_size(pill, field, RCL_SERVER, len);
+ return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_server_sized_get);
+
+void *req_capsule_server_sized_swab_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ int len, void *swabber)
+{
+ req_capsule_set_size(pill, field, RCL_SERVER, len);
+ return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0);
+}
+EXPORT_SYMBOL(req_capsule_server_sized_swab_get);
+
+/**
+ * Returns the buffer of a \a pill corresponding to the given \a field from the
+ * request (if the caller is executing on the server-side) or reply (if the
+ * caller is executing on the client-side).
+ *
+ * This function convienient for use is code that could be executed on the
+ * client and server alike.
+ */
+const void *req_capsule_other_get(struct req_capsule *pill,
+ const struct req_msg_field *field)
+{
+ return __req_capsule_get(pill, field, pill->rc_loc ^ 1, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_other_get);
+
+/**
+ * Set the size of the PTLRPC request/reply (\a loc) buffer for the given \a
+ * field of the given \a pill.
+ *
+ * This function must be used when constructing variable sized fields of a
+ * request or reply.
+ */
+void req_capsule_set_size(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc, int size)
+{
+ LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
+
+ if ((size != field->rmf_size) &&
+ (field->rmf_size != -1) &&
+ !(field->rmf_flags & RMF_F_NO_SIZE_CHECK) &&
+ (size > 0)) {
+ if ((field->rmf_flags & RMF_F_STRUCT_ARRAY) &&
+ (size % field->rmf_size != 0)) {
+ CERROR("%s: array field size mismatch "
+ "%d %% %d != 0 (%d)\n",
+ field->rmf_name, size, field->rmf_size, loc);
+ LBUG();
+ } else if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY) &&
+ size < field->rmf_size) {
+ CERROR("%s: field size mismatch %d != %d (%d)\n",
+ field->rmf_name, size, field->rmf_size, loc);
+ LBUG();
+ }
+ }
+
+ pill->rc_area[loc][__req_capsule_offset(pill, field, loc)] = size;
+}
+EXPORT_SYMBOL(req_capsule_set_size);
+
+/**
+ * Return the actual PTLRPC buffer length of a request or reply (\a loc)
+ * for the given \a pill's given \a field.
+ *
+ * NB: this function doesn't correspond with req_capsule_set_size(), which
+ * actually sets the size in pill.rc_area[loc][offset], but this function
+ * returns the message buflen[offset], maybe we should use another name.
+ */
+int req_capsule_get_size(const struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc)
+{
+ LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
+
+ return lustre_msg_buflen(__req_msg(pill, loc),
+ __req_capsule_offset(pill, field, loc));
+}
+EXPORT_SYMBOL(req_capsule_get_size);
+
+/**
+ * Wrapper around lustre_msg_size() that returns the PTLRPC size needed for the
+ * given \a pill's request or reply (\a loc) given the field size recorded in
+ * the \a pill's rc_area.
+ *
+ * See also req_capsule_set_size().
+ */
+int req_capsule_msg_size(struct req_capsule *pill, enum req_location loc)
+{
+ return lustre_msg_size(pill->rc_req->rq_import->imp_msg_magic,
+ pill->rc_fmt->rf_fields[loc].nr,
+ pill->rc_area[loc]);
+}
+
+/**
+ * While req_capsule_msg_size() computes the size of a PTLRPC request or reply
+ * (\a loc) given a \a pill's \a rc_area, this function computes the size of a
+ * PTLRPC request or reply given only an RQF (\a fmt).
+ *
+ * This function should not be used for formats which contain variable size
+ * fields.
+ */
+int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
+ enum req_location loc)
+{
+ int size, i = 0;
+
+ /*
+ * This function should probably LASSERT() that fmt has no fields with
+ * RMF_F_STRUCT_ARRAY in rmf_flags, since we can't know here how many
+ * elements in the array there will ultimately be, but then, we could
+ * assume that there will be at least one element, and that's just what
+ * we do.
+ */
+ size = lustre_msg_hdr_size(magic, fmt->rf_fields[loc].nr);
+ if (size < 0)
+ return size;
+
+ for (; i < fmt->rf_fields[loc].nr; ++i)
+ if (fmt->rf_fields[loc].d[i]->rmf_size != -1)
+ size += cfs_size_round(fmt->rf_fields[loc].d[i]->
+ rmf_size);
+ return size;
+}
+
+/**
+ * Changes the format of an RPC.
+ *
+ * The pill must already have been initialized, which means that it already has
+ * a request format. The new format \a fmt must be an extension of the pill's
+ * old format. Specifically: the new format must have as many request and reply
+ * fields as the old one, and all fields shared by the old and new format must
+ * be at least as large in the new format.
+ *
+ * The new format's fields may be of different "type" than the old format, but
+ * only for fields that are "opaque" blobs: fields which have a) have no
+ * \a rmf_swabber, b) \a rmf_flags == 0 or RMF_F_NO_SIZE_CHECK, and c) \a
+ * rmf_size == -1 or \a rmf_flags == RMF_F_NO_SIZE_CHECK. For example,
+ * OBD_SET_INFO has a key field and an opaque value field that gets interpreted
+ * according to the key field. When the value, according to the key, contains a
+ * structure (or array thereof) to be swabbed, the format should be changed to
+ * one where the value field has \a rmf_size/rmf_flags/rmf_swabber set
+ * accordingly.
+ */
+void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt)
+{
+ int i;
+ int j;
+
+ const struct req_format *old;
+
+ LASSERT(pill->rc_fmt != NULL);
+ LASSERT(__req_format_is_sane(fmt));
+
+ old = pill->rc_fmt;
+ /*
+ * Sanity checking...
+ */
+ for (i = 0; i < RCL_NR; ++i) {
+ LASSERT(fmt->rf_fields[i].nr >= old->rf_fields[i].nr);
+ for (j = 0; j < old->rf_fields[i].nr - 1; ++j) {
+ const struct req_msg_field *ofield = FMT_FIELD(old, i, j);
+
+ /* "opaque" fields can be transmogrified */
+ if (ofield->rmf_swabber == NULL &&
+ (ofield->rmf_flags & ~RMF_F_NO_SIZE_CHECK) == 0 &&
+ (ofield->rmf_size == -1 ||
+ ofield->rmf_flags == RMF_F_NO_SIZE_CHECK))
+ continue;
+ LASSERT(FMT_FIELD(fmt, i, j) == FMT_FIELD(old, i, j));
+ }
+ /*
+ * Last field in old format can be shorter than in new.
+ */
+ LASSERT(FMT_FIELD(fmt, i, j)->rmf_size >=
+ FMT_FIELD(old, i, j)->rmf_size);
+ }
+
+ pill->rc_fmt = fmt;
+}
+EXPORT_SYMBOL(req_capsule_extend);
+
+/**
+ * This function returns a non-zero value if the given \a field is present in
+ * the format (\a rc_fmt) of \a pill's PTLRPC request or reply (\a loc), else it
+ * returns 0.
+ */
+int req_capsule_has_field(const struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc)
+{
+ LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
+
+ return field->rmf_offset[pill->rc_fmt->rf_idx][loc];
+}
+EXPORT_SYMBOL(req_capsule_has_field);
+
+/**
+ * Returns a non-zero value if the given \a field is present in the given \a
+ * pill's PTLRPC request or reply (\a loc), else it returns 0.
+ */
+int req_capsule_field_present(const struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc)
+{
+ int offset;
+
+ LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
+ LASSERT(req_capsule_has_field(pill, field, loc));
+
+ offset = __req_capsule_offset(pill, field, loc);
+ return lustre_msg_bufcount(__req_msg(pill, loc)) > offset;
+}
+EXPORT_SYMBOL(req_capsule_field_present);
+
+/**
+ * This function shrinks the size of the _buffer_ of the \a pill's PTLRPC
+ * request or reply (\a loc).
+ *
+ * This is not the opposite of req_capsule_extend().
+ */
+void req_capsule_shrink(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ unsigned int newlen,
+ enum req_location loc)
+{
+ const struct req_format *fmt;
+ struct lustre_msg *msg;
+ int len;
+ int offset;
+
+ fmt = pill->rc_fmt;
+ LASSERT(fmt != NULL);
+ LASSERT(__req_format_is_sane(fmt));
+ LASSERT(req_capsule_has_field(pill, field, loc));
+ LASSERT(req_capsule_field_present(pill, field, loc));
+
+ offset = __req_capsule_offset(pill, field, loc);
+
+ msg = __req_msg(pill, loc);
+ len = lustre_msg_buflen(msg, offset);
+ LASSERTF(newlen <= len, "%s:%s, oldlen=%d, newlen=%d\n",
+ fmt->rf_name, field->rmf_name, len, newlen);
+
+ if (loc == RCL_CLIENT)
+ pill->rc_req->rq_reqlen = lustre_shrink_msg(msg, offset, newlen,
+ 1);
+ else
+ pill->rc_req->rq_replen = lustre_shrink_msg(msg, offset, newlen,
+ 1);
+}
+EXPORT_SYMBOL(req_capsule_shrink);
+
+int req_capsule_server_grow(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ unsigned int newlen)
+{
+ struct ptlrpc_reply_state *rs = pill->rc_req->rq_reply_state, *nrs;
+ char *from, *to;
+ int offset, len, rc;
+
+ LASSERT(pill->rc_fmt != NULL);
+ LASSERT(__req_format_is_sane(pill->rc_fmt));
+ LASSERT(req_capsule_has_field(pill, field, RCL_SERVER));
+ LASSERT(req_capsule_field_present(pill, field, RCL_SERVER));
+
+ len = req_capsule_get_size(pill, field, RCL_SERVER);
+ offset = __req_capsule_offset(pill, field, RCL_SERVER);
+ if (pill->rc_req->rq_repbuf_len >=
+ lustre_packed_msg_size(pill->rc_req->rq_repmsg) - len + newlen)
+ CERROR("Inplace repack might be done\n");
+
+ pill->rc_req->rq_reply_state = NULL;
+ req_capsule_set_size(pill, field, RCL_SERVER, newlen);
+ rc = req_capsule_server_pack(pill);
+ if (rc) {
+ /* put old rs back, the caller will decide what to do */
+ pill->rc_req->rq_reply_state = rs;
+ return rc;
+ }
+ nrs = pill->rc_req->rq_reply_state;
+ /* Now we need only buffers, copy first chunk */
+ to = lustre_msg_buf(nrs->rs_msg, 0, 0);
+ from = lustre_msg_buf(rs->rs_msg, 0, 0);
+ len = (char *)lustre_msg_buf(rs->rs_msg, offset, 0) - from;
+ memcpy(to, from, len);
+ /* check if we have tail and copy it too */
+ if (rs->rs_msg->lm_bufcount > offset + 1) {
+ to = lustre_msg_buf(nrs->rs_msg, offset + 1, 0);
+ from = lustre_msg_buf(rs->rs_msg, offset + 1, 0);
+ offset = rs->rs_msg->lm_bufcount - 1;
+ len = (char *)lustre_msg_buf(rs->rs_msg, offset, 0) +
+ cfs_size_round(rs->rs_msg->lm_buflens[offset]) - from;
+ memcpy(to, from, len);
+ }
+ /* drop old reply if everything is fine */
+ if (rs->rs_difficult) {
+ /* copy rs data */
+ int i;
+
+ nrs->rs_difficult = 1;
+ nrs->rs_no_ack = rs->rs_no_ack;
+ for (i = 0; i < rs->rs_nlocks; i++) {
+ nrs->rs_locks[i] = rs->rs_locks[i];
+ nrs->rs_modes[i] = rs->rs_modes[i];
+ nrs->rs_nlocks++;
+ }
+ rs->rs_nlocks = 0;
+ rs->rs_difficult = 0;
+ rs->rs_no_ack = 0;
+ }
+ ptlrpc_rs_decref(rs);
+ return 0;
+}
+EXPORT_SYMBOL(req_capsule_server_grow);
+/* __REQ_LAYOUT_USER__ */
+#endif
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
new file mode 100644
index 000000000000..367ca8ef7d60
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
@@ -0,0 +1,354 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/llog_client.c
+ *
+ * remote api for llog - client side
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+#include <linux/libcfs/libcfs.h>
+
+#include <obd_class.h>
+#include <lustre_log.h>
+#include <lustre_net.h>
+#include <linux/list.h>
+
+#define LLOG_CLIENT_ENTRY(ctxt, imp) do { \
+ mutex_lock(&ctxt->loc_mutex); \
+ if (ctxt->loc_imp) { \
+ imp = class_import_get(ctxt->loc_imp); \
+ } else { \
+ CERROR("ctxt->loc_imp == NULL for context idx %d." \
+ "Unable to complete MDS/OSS recovery," \
+ "but I'll try again next time. Not fatal.\n", \
+ ctxt->loc_idx); \
+ imp = NULL; \
+ mutex_unlock(&ctxt->loc_mutex); \
+ return (-EINVAL); \
+ } \
+ mutex_unlock(&ctxt->loc_mutex); \
+} while(0)
+
+#define LLOG_CLIENT_EXIT(ctxt, imp) do { \
+ mutex_lock(&ctxt->loc_mutex); \
+ if (ctxt->loc_imp != imp) \
+ CWARN("loc_imp has changed from %p to %p\n", \
+ ctxt->loc_imp, imp); \
+ class_import_put(imp); \
+ mutex_unlock(&ctxt->loc_mutex); \
+} while(0)
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+static int llog_client_open(const struct lu_env *env,
+ struct llog_handle *lgh, struct llog_logid *logid,
+ char *name, enum llog_open_param open_param)
+{
+ struct obd_import *imp;
+ struct llogd_body *body;
+ struct llog_ctxt *ctxt = lgh->lgh_ctxt;
+ struct ptlrpc_request *req = NULL;
+ int rc;
+ ENTRY;
+
+ LLOG_CLIENT_ENTRY(ctxt, imp);
+
+ /* client cannot create llog */
+ LASSERTF(open_param != LLOG_OPEN_NEW, "%#x\n", open_param);
+ LASSERT(lgh);
+
+ req = ptlrpc_request_alloc(imp, &RQF_LLOG_ORIGIN_HANDLE_CREATE);
+ if (req == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ if (name)
+ req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
+ strlen(name) + 1);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_LOG_VERSION,
+ LLOG_ORIGIN_HANDLE_CREATE);
+ if (rc) {
+ ptlrpc_request_free(req);
+ req = NULL;
+ GOTO(out, rc);
+ }
+ ptlrpc_request_set_replen(req);
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ if (logid)
+ body->lgd_logid = *logid;
+ body->lgd_ctxt_idx = ctxt->loc_idx - 1;
+
+ if (name) {
+ char *tmp;
+ tmp = req_capsule_client_sized_get(&req->rq_pill, &RMF_NAME,
+ strlen(name) + 1);
+ LASSERT(tmp);
+ strcpy(tmp, name);
+ }
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ if (body == NULL)
+ GOTO(out, rc = -EFAULT);
+
+ lgh->lgh_id = body->lgd_logid;
+ lgh->lgh_ctxt = ctxt;
+ EXIT;
+out:
+ LLOG_CLIENT_EXIT(ctxt, imp);
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+static int llog_client_destroy(const struct lu_env *env,
+ struct llog_handle *loghandle)
+{
+ struct obd_import *imp;
+ struct ptlrpc_request *req = NULL;
+ struct llogd_body *body;
+ int rc;
+ ENTRY;
+
+ LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
+ req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_DESTROY,
+ LUSTRE_LOG_VERSION,
+ LLOG_ORIGIN_HANDLE_DESTROY);
+ if (req == NULL)
+ GOTO(err_exit, rc =-ENOMEM);
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ body->lgd_logid = loghandle->lgh_id;
+ body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags;
+
+ if (!(body->lgd_llh_flags & LLOG_F_IS_PLAIN))
+ CERROR("%s: wrong llog flags %x\n", imp->imp_obd->obd_name,
+ body->lgd_llh_flags);
+
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+
+ ptlrpc_req_finished(req);
+err_exit:
+ LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
+ RETURN(rc);
+}
+
+
+static int llog_client_next_block(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ int *cur_idx, int next_idx,
+ __u64 *cur_offset, void *buf, int len)
+{
+ struct obd_import *imp;
+ struct ptlrpc_request *req = NULL;
+ struct llogd_body *body;
+ void *ptr;
+ int rc;
+ ENTRY;
+
+ LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
+ req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK,
+ LUSTRE_LOG_VERSION,
+ LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
+ if (req == NULL)
+ GOTO(err_exit, rc =-ENOMEM);
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ body->lgd_logid = loghandle->lgh_id;
+ body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1;
+ body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags;
+ body->lgd_index = next_idx;
+ body->lgd_saved_index = *cur_idx;
+ body->lgd_len = len;
+ body->lgd_cur_offset = *cur_offset;
+
+ req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, len);
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ if (body == NULL)
+ GOTO(out, rc =-EFAULT);
+
+ /* The log records are swabbed as they are processed */
+ ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA);
+ if (ptr == NULL)
+ GOTO(out, rc =-EFAULT);
+
+ *cur_idx = body->lgd_saved_index;
+ *cur_offset = body->lgd_cur_offset;
+
+ memcpy(buf, ptr, len);
+ EXIT;
+out:
+ ptlrpc_req_finished(req);
+err_exit:
+ LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
+ return rc;
+}
+
+static int llog_client_prev_block(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ int prev_idx, void *buf, int len)
+{
+ struct obd_import *imp;
+ struct ptlrpc_request *req = NULL;
+ struct llogd_body *body;
+ void *ptr;
+ int rc;
+ ENTRY;
+
+ LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
+ req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK,
+ LUSTRE_LOG_VERSION,
+ LLOG_ORIGIN_HANDLE_PREV_BLOCK);
+ if (req == NULL)
+ GOTO(err_exit, rc = -ENOMEM);
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ body->lgd_logid = loghandle->lgh_id;
+ body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1;
+ body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags;
+ body->lgd_index = prev_idx;
+ body->lgd_len = len;
+
+ req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, len);
+ ptlrpc_request_set_replen(req);
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ if (body == NULL)
+ GOTO(out, rc =-EFAULT);
+
+ ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA);
+ if (ptr == NULL)
+ GOTO(out, rc =-EFAULT);
+
+ memcpy(buf, ptr, len);
+ EXIT;
+out:
+ ptlrpc_req_finished(req);
+err_exit:
+ LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
+ return rc;
+}
+
+static int llog_client_read_header(const struct lu_env *env,
+ struct llog_handle *handle)
+{
+ struct obd_import *imp;
+ struct ptlrpc_request *req = NULL;
+ struct llogd_body *body;
+ struct llog_log_hdr *hdr;
+ struct llog_rec_hdr *llh_hdr;
+ int rc;
+ ENTRY;
+
+ LLOG_CLIENT_ENTRY(handle->lgh_ctxt, imp);
+ req = ptlrpc_request_alloc_pack(imp,&RQF_LLOG_ORIGIN_HANDLE_READ_HEADER,
+ LUSTRE_LOG_VERSION,
+ LLOG_ORIGIN_HANDLE_READ_HEADER);
+ if (req == NULL)
+ GOTO(err_exit, rc = -ENOMEM);
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ body->lgd_logid = handle->lgh_id;
+ body->lgd_ctxt_idx = handle->lgh_ctxt->loc_idx - 1;
+ body->lgd_llh_flags = handle->lgh_hdr->llh_flags;
+
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
+
+ hdr = req_capsule_server_get(&req->rq_pill, &RMF_LLOG_LOG_HDR);
+ if (hdr == NULL)
+ GOTO(out, rc =-EFAULT);
+
+ memcpy(handle->lgh_hdr, hdr, sizeof (*hdr));
+ handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index;
+
+ /* sanity checks */
+ llh_hdr = &handle->lgh_hdr->llh_hdr;
+ if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) {
+ CERROR("bad log header magic: %#x (expecting %#x)\n",
+ llh_hdr->lrh_type, LLOG_HDR_MAGIC);
+ rc = -EIO;
+ } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) {
+ CERROR("incorrectly sized log header: %#x "
+ "(expecting %#x)\n",
+ llh_hdr->lrh_len, LLOG_CHUNK_SIZE);
+ CERROR("you may need to re-run lconf --write_conf.\n");
+ rc = -EIO;
+ }
+ EXIT;
+out:
+ ptlrpc_req_finished(req);
+err_exit:
+ LLOG_CLIENT_EXIT(handle->lgh_ctxt, imp);
+ return rc;
+}
+
+static int llog_client_close(const struct lu_env *env,
+ struct llog_handle *handle)
+{
+ /* this doesn't call LLOG_ORIGIN_HANDLE_CLOSE because
+ the servers all close the file at the end of every
+ other LLOG_ RPC. */
+ return(0);
+}
+
+struct llog_operations llog_client_ops = {
+ .lop_next_block = llog_client_next_block,
+ .lop_prev_block = llog_client_prev_block,
+ .lop_read_header = llog_client_read_header,
+ .lop_open = llog_client_open,
+ .lop_destroy = llog_client_destroy,
+ .lop_close = llog_client_close,
+};
+EXPORT_SYMBOL(llog_client_ops);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
new file mode 100644
index 000000000000..a81f557d7794
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
@@ -0,0 +1,75 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/llog_net.c
+ *
+ * OST<->MDS recovery logging infrastructure.
+ *
+ * Invariants in implementation:
+ * - we do not share logs among different OST<->MDS connections, so that
+ * if an OST or MDS fails it need only look at log(s) relevant to itself
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+#include <linux/libcfs/libcfs.h>
+
+#include <obd_class.h>
+#include <lustre_log.h>
+#include <linux/list.h>
+#include <lvfs.h>
+#include <lustre_fsfilt.h>
+
+int llog_initiator_connect(struct llog_ctxt *ctxt)
+{
+ struct obd_import *new_imp;
+ ENTRY;
+
+ LASSERT(ctxt);
+ new_imp = ctxt->loc_obd->u.cli.cl_import;
+ LASSERTF(ctxt->loc_imp == NULL || ctxt->loc_imp == new_imp,
+ "%p - %p\n", ctxt->loc_imp, new_imp);
+ mutex_lock(&ctxt->loc_mutex);
+ if (ctxt->loc_imp != new_imp) {
+ if (ctxt->loc_imp)
+ class_import_put(ctxt->loc_imp);
+ ctxt->loc_imp = class_import_get(new_imp);
+ }
+ mutex_unlock(&ctxt->loc_mutex);
+ RETURN(0);
+}
+EXPORT_SYMBOL(llog_initiator_connect);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_server.c b/drivers/staging/lustre/lustre/ptlrpc/llog_server.c
new file mode 100644
index 000000000000..bc1fcd8c7e73
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_server.c
@@ -0,0 +1,466 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/llog_server.c
+ *
+ * remote api for llog - server side
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+
+#include <obd_class.h>
+#include <lustre_log.h>
+#include <lustre_net.h>
+#include <lustre_fsfilt.h>
+
+#if defined(LUSTRE_LOG_SERVER)
+static int llog_origin_close(const struct lu_env *env, struct llog_handle *lgh)
+{
+ if (lgh->lgh_hdr != NULL && lgh->lgh_hdr->llh_flags & LLOG_F_IS_CAT)
+ return llog_cat_close(env, lgh);
+ else
+ return llog_close(env, lgh);
+}
+
+/* Only open is supported, no new llog can be created remotely */
+int llog_origin_handle_open(struct ptlrpc_request *req)
+{
+ struct obd_export *exp = req->rq_export;
+ struct obd_device *obd = exp->exp_obd;
+ struct obd_device *disk_obd;
+ struct lvfs_run_ctxt saved;
+ struct llog_handle *loghandle;
+ struct llogd_body *body;
+ struct llog_logid *logid = NULL;
+ struct llog_ctxt *ctxt;
+ char *name = NULL;
+ int rc;
+
+ ENTRY;
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ if (body == NULL)
+ RETURN(-EFAULT);
+
+ if (ostid_id(&body->lgd_logid.lgl_oi) > 0)
+ logid = &body->lgd_logid;
+
+ if (req_capsule_field_present(&req->rq_pill, &RMF_NAME, RCL_CLIENT)) {
+ name = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
+ if (name == NULL)
+ RETURN(-EFAULT);
+ CDEBUG(D_INFO, "%s: opening log %s\n", obd->obd_name, name);
+ }
+
+ ctxt = llog_get_context(obd, body->lgd_ctxt_idx);
+ if (ctxt == NULL) {
+ CDEBUG(D_WARNING, "%s: no ctxt. group=%p idx=%d name=%s\n",
+ obd->obd_name, &obd->obd_olg, body->lgd_ctxt_idx, name);
+ RETURN(-ENODEV);
+ }
+ disk_obd = ctxt->loc_exp->exp_obd;
+ push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+
+ rc = llog_open(req->rq_svc_thread->t_env, ctxt, &loghandle, logid,
+ name, LLOG_OPEN_EXISTS);
+ if (rc)
+ GOTO(out_pop, rc);
+
+ rc = req_capsule_server_pack(&req->rq_pill);
+ if (rc)
+ GOTO(out_close, rc = -ENOMEM);
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ body->lgd_logid = loghandle->lgh_id;
+
+ EXIT;
+out_close:
+ llog_origin_close(req->rq_svc_thread->t_env, loghandle);
+out_pop:
+ pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+ llog_ctxt_put(ctxt);
+ return rc;
+}
+EXPORT_SYMBOL(llog_origin_handle_open);
+
+int llog_origin_handle_destroy(struct ptlrpc_request *req)
+{
+ struct obd_device *disk_obd;
+ struct lvfs_run_ctxt saved;
+ struct llogd_body *body;
+ struct llog_logid *logid = NULL;
+ struct llog_ctxt *ctxt;
+ int rc;
+
+ ENTRY;
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ if (body == NULL)
+ RETURN(-EFAULT);
+
+ if (ostid_id(&body->lgd_logid.lgl_oi) > 0)
+ logid = &body->lgd_logid;
+
+ if (!(body->lgd_llh_flags & LLOG_F_IS_PLAIN))
+ CERROR("%s: wrong llog flags %x\n",
+ req->rq_export->exp_obd->obd_name, body->lgd_llh_flags);
+
+ ctxt = llog_get_context(req->rq_export->exp_obd, body->lgd_ctxt_idx);
+ if (ctxt == NULL)
+ RETURN(-ENODEV);
+
+ disk_obd = ctxt->loc_exp->exp_obd;
+ push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+
+ rc = req_capsule_server_pack(&req->rq_pill);
+ /* erase only if no error and logid is valid */
+ if (rc == 0)
+ rc = llog_erase(req->rq_svc_thread->t_env, ctxt, logid, NULL);
+ pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+ llog_ctxt_put(ctxt);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_origin_handle_destroy);
+
+int llog_origin_handle_next_block(struct ptlrpc_request *req)
+{
+ struct obd_device *disk_obd;
+ struct llog_handle *loghandle;
+ struct llogd_body *body;
+ struct llogd_body *repbody;
+ struct lvfs_run_ctxt saved;
+ struct llog_ctxt *ctxt;
+ __u32 flags;
+ void *ptr;
+ int rc;
+
+ ENTRY;
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ if (body == NULL)
+ RETURN(-EFAULT);
+
+ ctxt = llog_get_context(req->rq_export->exp_obd, body->lgd_ctxt_idx);
+ if (ctxt == NULL)
+ RETURN(-ENODEV);
+
+ disk_obd = ctxt->loc_exp->exp_obd;
+ push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+
+ rc = llog_open(req->rq_svc_thread->t_env, ctxt, &loghandle,
+ &body->lgd_logid, NULL, LLOG_OPEN_EXISTS);
+ if (rc)
+ GOTO(out_pop, rc);
+
+ flags = body->lgd_llh_flags;
+ rc = llog_init_handle(req->rq_svc_thread->t_env, loghandle, flags,
+ NULL);
+ if (rc)
+ GOTO(out_close, rc);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER,
+ LLOG_CHUNK_SIZE);
+ rc = req_capsule_server_pack(&req->rq_pill);
+ if (rc)
+ GOTO(out_close, rc = -ENOMEM);
+
+ repbody = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ *repbody = *body;
+
+ ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA);
+ rc = llog_next_block(req->rq_svc_thread->t_env, loghandle,
+ &repbody->lgd_saved_index, repbody->lgd_index,
+ &repbody->lgd_cur_offset, ptr, LLOG_CHUNK_SIZE);
+ if (rc)
+ GOTO(out_close, rc);
+ EXIT;
+out_close:
+ llog_origin_close(req->rq_svc_thread->t_env, loghandle);
+out_pop:
+ pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+ llog_ctxt_put(ctxt);
+ return rc;
+}
+EXPORT_SYMBOL(llog_origin_handle_next_block);
+
+int llog_origin_handle_prev_block(struct ptlrpc_request *req)
+{
+ struct llog_handle *loghandle;
+ struct llogd_body *body;
+ struct llogd_body *repbody;
+ struct obd_device *disk_obd;
+ struct lvfs_run_ctxt saved;
+ struct llog_ctxt *ctxt;
+ __u32 flags;
+ void *ptr;
+ int rc;
+
+ ENTRY;
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ if (body == NULL)
+ RETURN(-EFAULT);
+
+ ctxt = llog_get_context(req->rq_export->exp_obd, body->lgd_ctxt_idx);
+ if (ctxt == NULL)
+ RETURN(-ENODEV);
+
+ disk_obd = ctxt->loc_exp->exp_obd;
+ push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+
+ rc = llog_open(req->rq_svc_thread->t_env, ctxt, &loghandle,
+ &body->lgd_logid, NULL, LLOG_OPEN_EXISTS);
+ if (rc)
+ GOTO(out_pop, rc);
+
+ flags = body->lgd_llh_flags;
+ rc = llog_init_handle(req->rq_svc_thread->t_env, loghandle, flags,
+ NULL);
+ if (rc)
+ GOTO(out_close, rc);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER,
+ LLOG_CHUNK_SIZE);
+ rc = req_capsule_server_pack(&req->rq_pill);
+ if (rc)
+ GOTO(out_close, rc = -ENOMEM);
+
+ repbody = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ *repbody = *body;
+
+ ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA);
+ rc = llog_prev_block(req->rq_svc_thread->t_env, loghandle,
+ body->lgd_index, ptr, LLOG_CHUNK_SIZE);
+ if (rc)
+ GOTO(out_close, rc);
+
+ EXIT;
+out_close:
+ llog_origin_close(req->rq_svc_thread->t_env, loghandle);
+out_pop:
+ pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+ llog_ctxt_put(ctxt);
+ return rc;
+}
+EXPORT_SYMBOL(llog_origin_handle_prev_block);
+
+int llog_origin_handle_read_header(struct ptlrpc_request *req)
+{
+ struct obd_device *disk_obd;
+ struct llog_handle *loghandle;
+ struct llogd_body *body;
+ struct llog_log_hdr *hdr;
+ struct lvfs_run_ctxt saved;
+ struct llog_ctxt *ctxt;
+ __u32 flags;
+ int rc;
+
+ ENTRY;
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ if (body == NULL)
+ RETURN(-EFAULT);
+
+ ctxt = llog_get_context(req->rq_export->exp_obd, body->lgd_ctxt_idx);
+ if (ctxt == NULL)
+ RETURN(-ENODEV);
+
+ disk_obd = ctxt->loc_exp->exp_obd;
+ push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+
+ rc = llog_open(req->rq_svc_thread->t_env, ctxt, &loghandle,
+ &body->lgd_logid, NULL, LLOG_OPEN_EXISTS);
+ if (rc)
+ GOTO(out_pop, rc);
+
+ /*
+ * llog_init_handle() reads the llog header
+ */
+ flags = body->lgd_llh_flags;
+ rc = llog_init_handle(req->rq_svc_thread->t_env, loghandle, flags,
+ NULL);
+ if (rc)
+ GOTO(out_close, rc);
+ flags = loghandle->lgh_hdr->llh_flags;
+
+ rc = req_capsule_server_pack(&req->rq_pill);
+ if (rc)
+ GOTO(out_close, rc = -ENOMEM);
+
+ hdr = req_capsule_server_get(&req->rq_pill, &RMF_LLOG_LOG_HDR);
+ *hdr = *loghandle->lgh_hdr;
+ EXIT;
+out_close:
+ llog_origin_close(req->rq_svc_thread->t_env, loghandle);
+out_pop:
+ pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+ llog_ctxt_put(ctxt);
+ return rc;
+}
+EXPORT_SYMBOL(llog_origin_handle_read_header);
+
+int llog_origin_handle_close(struct ptlrpc_request *req)
+{
+ ENTRY;
+ /* Nothing to do */
+ RETURN(0);
+}
+EXPORT_SYMBOL(llog_origin_handle_close);
+
+int llog_origin_handle_cancel(struct ptlrpc_request *req)
+{
+ int num_cookies, rc = 0, err, i, failed = 0;
+ struct obd_device *disk_obd;
+ struct llog_cookie *logcookies;
+ struct llog_ctxt *ctxt = NULL;
+ struct lvfs_run_ctxt saved;
+ struct llog_handle *cathandle;
+ struct inode *inode;
+ void *handle;
+ ENTRY;
+
+ logcookies = req_capsule_client_get(&req->rq_pill, &RMF_LOGCOOKIES);
+ num_cookies = req_capsule_get_size(&req->rq_pill, &RMF_LOGCOOKIES,
+ RCL_CLIENT) / sizeof(*logcookies);
+ if (logcookies == NULL || num_cookies == 0) {
+ DEBUG_REQ(D_HA, req, "No llog cookies sent");
+ RETURN(-EFAULT);
+ }
+
+ ctxt = llog_get_context(req->rq_export->exp_obd,
+ logcookies->lgc_subsys);
+ if (ctxt == NULL)
+ RETURN(-ENODEV);
+
+ disk_obd = ctxt->loc_exp->exp_obd;
+ push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+ for (i = 0; i < num_cookies; i++, logcookies++) {
+ cathandle = ctxt->loc_handle;
+ LASSERT(cathandle != NULL);
+ inode = cathandle->lgh_file->f_dentry->d_inode;
+
+ handle = fsfilt_start_log(disk_obd, inode,
+ FSFILT_OP_CANCEL_UNLINK, NULL, 1);
+ if (IS_ERR(handle)) {
+ CERROR("fsfilt_start_log() failed: %ld\n",
+ PTR_ERR(handle));
+ GOTO(pop_ctxt, rc = PTR_ERR(handle));
+ }
+
+ rc = llog_cat_cancel_records(req->rq_svc_thread->t_env,
+ cathandle, 1, logcookies);
+
+ /*
+ * Do not raise -ENOENT errors for resent rpcs. This rec already
+ * might be killed.
+ */
+ if (rc == -ENOENT &&
+ (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT)) {
+ /*
+ * Do not change this message, reply-single.sh test_59b
+ * expects to find this in log.
+ */
+ CDEBUG(D_RPCTRACE, "RESENT cancel req %p - ignored\n",
+ req);
+ rc = 0;
+ } else if (rc == 0) {
+ CDEBUG(D_RPCTRACE, "Canceled %d llog-records\n",
+ num_cookies);
+ }
+
+ err = fsfilt_commit(disk_obd, inode, handle, 0);
+ if (err) {
+ CERROR("Error committing transaction: %d\n", err);
+ if (!rc)
+ rc = err;
+ failed++;
+ GOTO(pop_ctxt, rc);
+ } else if (rc)
+ failed++;
+ }
+ GOTO(pop_ctxt, rc);
+pop_ctxt:
+ pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+ if (rc)
+ CERROR("Cancel %d of %d llog-records failed: %d\n",
+ failed, num_cookies, rc);
+
+ llog_ctxt_put(ctxt);
+ return rc;
+}
+EXPORT_SYMBOL(llog_origin_handle_cancel);
+
+#else /* !__KERNEL__ */
+int llog_origin_handle_open(struct ptlrpc_request *req)
+{
+ LBUG();
+ return 0;
+}
+
+int llog_origin_handle_destroy(struct ptlrpc_request *req)
+{
+ LBUG();
+ return 0;
+}
+
+int llog_origin_handle_next_block(struct ptlrpc_request *req)
+{
+ LBUG();
+ return 0;
+}
+int llog_origin_handle_prev_block(struct ptlrpc_request *req)
+{
+ LBUG();
+ return 0;
+}
+int llog_origin_handle_read_header(struct ptlrpc_request *req)
+{
+ LBUG();
+ return 0;
+}
+int llog_origin_handle_close(struct ptlrpc_request *req)
+{
+ LBUG();
+ return 0;
+}
+int llog_origin_handle_cancel(struct ptlrpc_request *req)
+{
+ LBUG();
+ return 0;
+}
+#endif
diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
new file mode 100644
index 000000000000..3e7325499d01
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
@@ -0,0 +1,1345 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#define DEBUG_SUBSYSTEM S_CLASS
+
+
+#include <obd_support.h>
+#include <obd.h>
+#include <lprocfs_status.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
+#include <obd_class.h>
+#include "ptlrpc_internal.h"
+
+
+struct ll_rpc_opcode {
+ __u32 opcode;
+ const char *opname;
+} ll_rpc_opcode_table[LUSTRE_MAX_OPCODES] = {
+ { OST_REPLY, "ost_reply" },
+ { OST_GETATTR, "ost_getattr" },
+ { OST_SETATTR, "ost_setattr" },
+ { OST_READ, "ost_read" },
+ { OST_WRITE, "ost_write" },
+ { OST_CREATE , "ost_create" },
+ { OST_DESTROY, "ost_destroy" },
+ { OST_GET_INFO, "ost_get_info" },
+ { OST_CONNECT, "ost_connect" },
+ { OST_DISCONNECT, "ost_disconnect" },
+ { OST_PUNCH, "ost_punch" },
+ { OST_OPEN, "ost_open" },
+ { OST_CLOSE, "ost_close" },
+ { OST_STATFS, "ost_statfs" },
+ { 14, NULL }, /* formerly OST_SAN_READ */
+ { 15, NULL }, /* formerly OST_SAN_WRITE */
+ { OST_SYNC, "ost_sync" },
+ { OST_SET_INFO, "ost_set_info" },
+ { OST_QUOTACHECK, "ost_quotacheck" },
+ { OST_QUOTACTL, "ost_quotactl" },
+ { OST_QUOTA_ADJUST_QUNIT, "ost_quota_adjust_qunit" },
+ { MDS_GETATTR, "mds_getattr" },
+ { MDS_GETATTR_NAME, "mds_getattr_lock" },
+ { MDS_CLOSE, "mds_close" },
+ { MDS_REINT, "mds_reint" },
+ { MDS_READPAGE, "mds_readpage" },
+ { MDS_CONNECT, "mds_connect" },
+ { MDS_DISCONNECT, "mds_disconnect" },
+ { MDS_GETSTATUS, "mds_getstatus" },
+ { MDS_STATFS, "mds_statfs" },
+ { MDS_PIN, "mds_pin" },
+ { MDS_UNPIN, "mds_unpin" },
+ { MDS_SYNC, "mds_sync" },
+ { MDS_DONE_WRITING, "mds_done_writing" },
+ { MDS_SET_INFO, "mds_set_info" },
+ { MDS_QUOTACHECK, "mds_quotacheck" },
+ { MDS_QUOTACTL, "mds_quotactl" },
+ { MDS_GETXATTR, "mds_getxattr" },
+ { MDS_SETXATTR, "mds_setxattr" },
+ { MDS_WRITEPAGE, "mds_writepage" },
+ { MDS_IS_SUBDIR, "mds_is_subdir" },
+ { MDS_GET_INFO, "mds_get_info" },
+ { MDS_HSM_STATE_GET, "mds_hsm_state_get" },
+ { MDS_HSM_STATE_SET, "mds_hsm_state_set" },
+ { MDS_HSM_ACTION, "mds_hsm_action" },
+ { MDS_HSM_PROGRESS, "mds_hsm_progress" },
+ { MDS_HSM_REQUEST, "mds_hsm_request" },
+ { MDS_HSM_CT_REGISTER, "mds_hsm_ct_register" },
+ { MDS_HSM_CT_UNREGISTER, "mds_hsm_ct_unregister" },
+ { MDS_SWAP_LAYOUTS, "mds_swap_layouts" },
+ { LDLM_ENQUEUE, "ldlm_enqueue" },
+ { LDLM_CONVERT, "ldlm_convert" },
+ { LDLM_CANCEL, "ldlm_cancel" },
+ { LDLM_BL_CALLBACK, "ldlm_bl_callback" },
+ { LDLM_CP_CALLBACK, "ldlm_cp_callback" },
+ { LDLM_GL_CALLBACK, "ldlm_gl_callback" },
+ { LDLM_SET_INFO, "ldlm_set_info" },
+ { MGS_CONNECT, "mgs_connect" },
+ { MGS_DISCONNECT, "mgs_disconnect" },
+ { MGS_EXCEPTION, "mgs_exception" },
+ { MGS_TARGET_REG, "mgs_target_reg" },
+ { MGS_TARGET_DEL, "mgs_target_del" },
+ { MGS_SET_INFO, "mgs_set_info" },
+ { MGS_CONFIG_READ, "mgs_config_read" },
+ { OBD_PING, "obd_ping" },
+ { OBD_LOG_CANCEL, "llog_origin_handle_cancel" },
+ { OBD_QC_CALLBACK, "obd_quota_callback" },
+ { OBD_IDX_READ, "dt_index_read" },
+ { LLOG_ORIGIN_HANDLE_CREATE, "llog_origin_handle_create" },
+ { LLOG_ORIGIN_HANDLE_NEXT_BLOCK, "llog_origin_handle_next_block" },
+ { LLOG_ORIGIN_HANDLE_READ_HEADER,"llog_origin_handle_read_header" },
+ { LLOG_ORIGIN_HANDLE_WRITE_REC, "llog_origin_handle_write_rec" },
+ { LLOG_ORIGIN_HANDLE_CLOSE, "llog_origin_handle_close" },
+ { LLOG_ORIGIN_CONNECT, "llog_origin_connect" },
+ { LLOG_CATINFO, "llog_catinfo" },
+ { LLOG_ORIGIN_HANDLE_PREV_BLOCK, "llog_origin_handle_prev_block" },
+ { LLOG_ORIGIN_HANDLE_DESTROY, "llog_origin_handle_destroy" },
+ { QUOTA_DQACQ, "quota_acquire" },
+ { QUOTA_DQREL, "quota_release" },
+ { SEQ_QUERY, "seq_query" },
+ { SEC_CTX_INIT, "sec_ctx_init" },
+ { SEC_CTX_INIT_CONT,"sec_ctx_init_cont" },
+ { SEC_CTX_FINI, "sec_ctx_fini" },
+ { FLD_QUERY, "fld_query" },
+ { UPDATE_OBJ, "update_obj" },
+};
+
+struct ll_eopcode {
+ __u32 opcode;
+ const char *opname;
+} ll_eopcode_table[EXTRA_LAST_OPC] = {
+ { LDLM_GLIMPSE_ENQUEUE, "ldlm_glimpse_enqueue" },
+ { LDLM_PLAIN_ENQUEUE, "ldlm_plain_enqueue" },
+ { LDLM_EXTENT_ENQUEUE, "ldlm_extent_enqueue" },
+ { LDLM_FLOCK_ENQUEUE, "ldlm_flock_enqueue" },
+ { LDLM_IBITS_ENQUEUE, "ldlm_ibits_enqueue" },
+ { MDS_REINT_SETATTR, "mds_reint_setattr" },
+ { MDS_REINT_CREATE, "mds_reint_create" },
+ { MDS_REINT_LINK, "mds_reint_link" },
+ { MDS_REINT_UNLINK, "mds_reint_unlink" },
+ { MDS_REINT_RENAME, "mds_reint_rename" },
+ { MDS_REINT_OPEN, "mds_reint_open" },
+ { MDS_REINT_SETXATTR, "mds_reint_setxattr" },
+ { BRW_READ_BYTES, "read_bytes" },
+ { BRW_WRITE_BYTES, "write_bytes" },
+};
+
+const char *ll_opcode2str(__u32 opcode)
+{
+ /* When one of the assertions below fail, chances are that:
+ * 1) A new opcode was added in include/lustre/lustre_idl.h,
+ * but is missing from the table above.
+ * or 2) The opcode space was renumbered or rearranged,
+ * and the opcode_offset() function in
+ * ptlrpc_internal.h needs to be modified.
+ */
+ __u32 offset = opcode_offset(opcode);
+ LASSERTF(offset < LUSTRE_MAX_OPCODES,
+ "offset %u >= LUSTRE_MAX_OPCODES %u\n",
+ offset, LUSTRE_MAX_OPCODES);
+ LASSERTF(ll_rpc_opcode_table[offset].opcode == opcode,
+ "ll_rpc_opcode_table[%u].opcode %u != opcode %u\n",
+ offset, ll_rpc_opcode_table[offset].opcode, opcode);
+ return ll_rpc_opcode_table[offset].opname;
+}
+
+const char* ll_eopcode2str(__u32 opcode)
+{
+ LASSERT(ll_eopcode_table[opcode].opcode == opcode);
+ return ll_eopcode_table[opcode].opname;
+}
+#ifdef LPROCFS
+void ptlrpc_lprocfs_register(struct proc_dir_entry *root, char *dir,
+ char *name, struct proc_dir_entry **procroot_ret,
+ struct lprocfs_stats **stats_ret)
+{
+ struct proc_dir_entry *svc_procroot;
+ struct lprocfs_stats *svc_stats;
+ int i, rc;
+ unsigned int svc_counter_config = LPROCFS_CNTR_AVGMINMAX |
+ LPROCFS_CNTR_STDDEV;
+
+ LASSERT(*procroot_ret == NULL);
+ LASSERT(*stats_ret == NULL);
+
+ svc_stats = lprocfs_alloc_stats(EXTRA_MAX_OPCODES+LUSTRE_MAX_OPCODES,0);
+ if (svc_stats == NULL)
+ return;
+
+ if (dir) {
+ svc_procroot = lprocfs_register(dir, root, NULL, NULL);
+ if (IS_ERR(svc_procroot)) {
+ lprocfs_free_stats(&svc_stats);
+ return;
+ }
+ } else {
+ svc_procroot = root;
+ }
+
+ lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR,
+ svc_counter_config, "req_waittime", "usec");
+ lprocfs_counter_init(svc_stats, PTLRPC_REQQDEPTH_CNTR,
+ svc_counter_config, "req_qdepth", "reqs");
+ lprocfs_counter_init(svc_stats, PTLRPC_REQACTIVE_CNTR,
+ svc_counter_config, "req_active", "reqs");
+ lprocfs_counter_init(svc_stats, PTLRPC_TIMEOUT,
+ svc_counter_config, "req_timeout", "sec");
+ lprocfs_counter_init(svc_stats, PTLRPC_REQBUF_AVAIL_CNTR,
+ svc_counter_config, "reqbuf_avail", "bufs");
+ for (i = 0; i < EXTRA_LAST_OPC; i++) {
+ char *units;
+
+ switch(i) {
+ case BRW_WRITE_BYTES:
+ case BRW_READ_BYTES:
+ units = "bytes";
+ break;
+ default:
+ units = "reqs";
+ break;
+ }
+ lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i,
+ svc_counter_config,
+ ll_eopcode2str(i), units);
+ }
+ for (i = 0; i < LUSTRE_MAX_OPCODES; i++) {
+ __u32 opcode = ll_rpc_opcode_table[i].opcode;
+ lprocfs_counter_init(svc_stats,
+ EXTRA_MAX_OPCODES + i, svc_counter_config,
+ ll_opcode2str(opcode), "usec");
+ }
+
+ rc = lprocfs_register_stats(svc_procroot, name, svc_stats);
+ if (rc < 0) {
+ if (dir)
+ lprocfs_remove(&svc_procroot);
+ lprocfs_free_stats(&svc_stats);
+ } else {
+ if (dir)
+ *procroot_ret = svc_procroot;
+ *stats_ret = svc_stats;
+ }
+}
+
+static int
+ptlrpc_lprocfs_req_history_len_seq_show(struct seq_file *m, void *v)
+{
+ struct ptlrpc_service *svc = m->private;
+ struct ptlrpc_service_part *svcpt;
+ int total = 0;
+ int i;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc)
+ total += svcpt->scp_hist_nrqbds;
+
+ return seq_printf(m, "%d\n", total);
+}
+LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_req_history_len);
+
+static int
+ptlrpc_lprocfs_req_history_max_seq_show(struct seq_file *m, void *n)
+{
+ struct ptlrpc_service *svc = m->private;
+ struct ptlrpc_service_part *svcpt;
+ int total = 0;
+ int i;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc)
+ total += svc->srv_hist_nrqbds_cpt_max;
+
+ return seq_printf(m, "%d\n", total);
+}
+
+static ssize_t
+ptlrpc_lprocfs_req_history_max_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+ int bufpages;
+ int val;
+ int rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc < 0)
+ return rc;
+
+ if (val < 0)
+ return -ERANGE;
+
+ /* This sanity check is more of an insanity check; we can still
+ * hose a kernel by allowing the request history to grow too
+ * far. */
+ bufpages = (svc->srv_buf_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ if (val > num_physpages/(2 * bufpages))
+ return -ERANGE;
+
+ spin_lock(&svc->srv_lock);
+
+ if (val == 0)
+ svc->srv_hist_nrqbds_cpt_max = 0;
+ else
+ svc->srv_hist_nrqbds_cpt_max = max(1, (val / svc->srv_ncpts));
+
+ spin_unlock(&svc->srv_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_history_max);
+
+static int
+ptlrpc_lprocfs_threads_min_seq_show(struct seq_file *m, void *n)
+{
+ struct ptlrpc_service *svc = m->private;
+
+ return seq_printf(m, "%d\n",
+ svc->srv_nthrs_cpt_init * svc->srv_ncpts);
+}
+
+static ssize_t
+ptlrpc_lprocfs_threads_min_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+ int val;
+ int rc = lprocfs_write_helper(buffer, count, &val);
+
+ if (rc < 0)
+ return rc;
+
+ if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT)
+ return -ERANGE;
+
+ spin_lock(&svc->srv_lock);
+ if (val > svc->srv_nthrs_cpt_limit * svc->srv_ncpts) {
+ spin_unlock(&svc->srv_lock);
+ return -ERANGE;
+ }
+
+ svc->srv_nthrs_cpt_init = val / svc->srv_ncpts;
+
+ spin_unlock(&svc->srv_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_threads_min);
+
+static int
+ptlrpc_lprocfs_threads_started_seq_show(struct seq_file *m, void *n)
+{
+ struct ptlrpc_service *svc = m->private;
+ struct ptlrpc_service_part *svcpt;
+ int total = 0;
+ int i;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc)
+ total += svcpt->scp_nthrs_running;
+
+ return seq_printf(m, "%d\n", total);
+}
+LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_threads_started);
+
+static int
+ptlrpc_lprocfs_threads_max_seq_show(struct seq_file *m, void *n)
+{
+ struct ptlrpc_service *svc = m->private;
+
+ return seq_printf(m, "%d\n",
+ svc->srv_nthrs_cpt_limit * svc->srv_ncpts);
+}
+
+static ssize_t
+ptlrpc_lprocfs_threads_max_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+ int val;
+ int rc = lprocfs_write_helper(buffer, count, &val);
+
+ if (rc < 0)
+ return rc;
+
+ if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT)
+ return -ERANGE;
+
+ spin_lock(&svc->srv_lock);
+ if (val < svc->srv_nthrs_cpt_init * svc->srv_ncpts) {
+ spin_unlock(&svc->srv_lock);
+ return -ERANGE;
+ }
+
+ svc->srv_nthrs_cpt_limit = val / svc->srv_ncpts;
+
+ spin_unlock(&svc->srv_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_threads_max);
+
+/**
+ * \addtogoup nrs
+ * @{
+ */
+extern struct nrs_core nrs_core;
+
+/**
+ * Translates \e ptlrpc_nrs_pol_state values to human-readable strings.
+ *
+ * \param[in] state The policy state
+ */
+static const char *nrs_state2str(enum ptlrpc_nrs_pol_state state)
+{
+ switch (state) {
+ default:
+ LBUG();
+ case NRS_POL_STATE_INVALID:
+ return "invalid";
+ case NRS_POL_STATE_STOPPED:
+ return "stopped";
+ case NRS_POL_STATE_STOPPING:
+ return "stopping";
+ case NRS_POL_STATE_STARTING:
+ return "starting";
+ case NRS_POL_STATE_STARTED:
+ return "started";
+ }
+}
+
+/**
+ * Obtains status information for \a policy.
+ *
+ * Information is copied in \a info.
+ *
+ * \param[in] policy The policy
+ * \param[out] info Holds returned status information
+ */
+void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_pol_info *info)
+{
+ LASSERT(policy != NULL);
+ LASSERT(info != NULL);
+ LASSERT(spin_is_locked(&policy->pol_nrs->nrs_lock));
+
+ memcpy(info->pi_name, policy->pol_desc->pd_name, NRS_POL_NAME_MAX);
+
+ info->pi_fallback = !!(policy->pol_flags & PTLRPC_NRS_FL_FALLBACK);
+ info->pi_state = policy->pol_state;
+ /**
+ * XXX: These are accessed without holding
+ * ptlrpc_service_part::scp_req_lock.
+ */
+ info->pi_req_queued = policy->pol_req_queued;
+ info->pi_req_started = policy->pol_req_started;
+}
+
+/**
+ * Reads and prints policy status information for all policies of a PTLRPC
+ * service.
+ */
+static int ptlrpc_lprocfs_nrs_seq_show(struct seq_file *m, void *n)
+{
+ struct ptlrpc_service *svc = m->private;
+ struct ptlrpc_service_part *svcpt;
+ struct ptlrpc_nrs *nrs;
+ struct ptlrpc_nrs_policy *policy;
+ struct ptlrpc_nrs_pol_info *infos;
+ struct ptlrpc_nrs_pol_info tmp;
+ unsigned num_pols;
+ unsigned pol_idx = 0;
+ bool hp = false;
+ int i;
+ int rc = 0;
+ ENTRY;
+
+ /**
+ * Serialize NRS core lprocfs operations with policy registration/
+ * unregistration.
+ */
+ mutex_lock(&nrs_core.nrs_mutex);
+
+ /**
+ * Use the first service partition's regular NRS head in order to obtain
+ * the number of policies registered with NRS heads of this service. All
+ * service partitions will have the same number of policies.
+ */
+ nrs = nrs_svcpt2nrs(svc->srv_parts[0], false);
+
+ spin_lock(&nrs->nrs_lock);
+ num_pols = svc->srv_parts[0]->scp_nrs_reg.nrs_num_pols;
+ spin_unlock(&nrs->nrs_lock);
+
+ OBD_ALLOC(infos, num_pols * sizeof(*infos));
+ if (infos == NULL)
+ GOTO(out, rc = -ENOMEM);
+again:
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ nrs = nrs_svcpt2nrs(svcpt, hp);
+ spin_lock(&nrs->nrs_lock);
+
+ pol_idx = 0;
+
+ list_for_each_entry(policy, &nrs->nrs_policy_list,
+ pol_list) {
+ LASSERT(pol_idx < num_pols);
+
+ nrs_policy_get_info_locked(policy, &tmp);
+ /**
+ * Copy values when handling the first service
+ * partition.
+ */
+ if (i == 0) {
+ memcpy(infos[pol_idx].pi_name, tmp.pi_name,
+ NRS_POL_NAME_MAX);
+ memcpy(&infos[pol_idx].pi_state, &tmp.pi_state,
+ sizeof(tmp.pi_state));
+ infos[pol_idx].pi_fallback = tmp.pi_fallback;
+ /**
+ * For the rest of the service partitions
+ * sanity-check the values we get.
+ */
+ } else {
+ LASSERT(strncmp(infos[pol_idx].pi_name,
+ tmp.pi_name,
+ NRS_POL_NAME_MAX) == 0);
+ /**
+ * Not asserting ptlrpc_nrs_pol_info::pi_state,
+ * because it may be different between
+ * instances of the same policy in different
+ * service partitions.
+ */
+ LASSERT(infos[pol_idx].pi_fallback ==
+ tmp.pi_fallback);
+ }
+
+ infos[pol_idx].pi_req_queued += tmp.pi_req_queued;
+ infos[pol_idx].pi_req_started += tmp.pi_req_started;
+
+ pol_idx++;
+ }
+ spin_unlock(&nrs->nrs_lock);
+ }
+
+ /**
+ * Policy status information output is in YAML format.
+ * For example:
+ *
+ * regular_requests:
+ * - name: fifo
+ * state: started
+ * fallback: yes
+ * queued: 0
+ * active: 0
+ *
+ * - name: crrn
+ * state: started
+ * fallback: no
+ * queued: 2015
+ * active: 384
+ *
+ * high_priority_requests:
+ * - name: fifo
+ * state: started
+ * fallback: yes
+ * queued: 0
+ * active: 2
+ *
+ * - name: crrn
+ * state: stopped
+ * fallback: no
+ * queued: 0
+ * active: 0
+ */
+ seq_printf(m, "%s\n",
+ !hp ? "\nregular_requests:" : "high_priority_requests:");
+
+ for (pol_idx = 0; pol_idx < num_pols; pol_idx++) {
+ seq_printf(m, " - name: %s\n"
+ " state: %s\n"
+ " fallback: %s\n"
+ " queued: %-20d\n"
+ " active: %-20d\n\n",
+ infos[pol_idx].pi_name,
+ nrs_state2str(infos[pol_idx].pi_state),
+ infos[pol_idx].pi_fallback ? "yes" : "no",
+ (int)infos[pol_idx].pi_req_queued,
+ (int)infos[pol_idx].pi_req_started);
+ }
+
+ if (!hp && nrs_svc_has_hp(svc)) {
+ memset(infos, 0, num_pols * sizeof(*infos));
+
+ /**
+ * Redo the processing for the service's HP NRS heads' policies.
+ */
+ hp = true;
+ goto again;
+ }
+
+out:
+ if (infos)
+ OBD_FREE(infos, num_pols * sizeof(*infos));
+
+ mutex_unlock(&nrs_core.nrs_mutex);
+
+ RETURN(rc);
+}
+
+/**
+ * The longest valid command string is the maxium policy name size, plus the
+ * length of the " reg" substring
+ */
+#define LPROCFS_NRS_WR_MAX_CMD (NRS_POL_NAME_MAX + sizeof(" reg") - 1)
+
+/**
+ * Starts and stops a given policy on a PTLRPC service.
+ *
+ * Commands consist of the policy name, followed by an optional [reg|hp] token;
+ * if the optional token is omitted, the operation is performed on both the
+ * regular and high-priority (if the service has one) NRS head.
+ */
+static ssize_t ptlrpc_lprocfs_nrs_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+ enum ptlrpc_nrs_queue_type queue = PTLRPC_NRS_QUEUE_BOTH;
+ char *cmd;
+ char *cmd_copy = NULL;
+ char *token;
+ int rc = 0;
+ ENTRY;
+
+ if (count >= LPROCFS_NRS_WR_MAX_CMD)
+ GOTO(out, rc = -EINVAL);
+
+ OBD_ALLOC(cmd, LPROCFS_NRS_WR_MAX_CMD);
+ if (cmd == NULL)
+ GOTO(out, rc = -ENOMEM);
+ /**
+ * strsep() modifies its argument, so keep a copy
+ */
+ cmd_copy = cmd;
+
+ if (copy_from_user(cmd, buffer, count))
+ GOTO(out, rc = -EFAULT);
+
+ cmd[count] = '\0';
+
+ token = strsep(&cmd, " ");
+
+ if (strlen(token) > NRS_POL_NAME_MAX - 1)
+ GOTO(out, rc = -EINVAL);
+
+ /**
+ * No [reg|hp] token has been specified
+ */
+ if (cmd == NULL)
+ goto default_queue;
+
+ /**
+ * The second token is either NULL, or an optional [reg|hp] string
+ */
+ if (strcmp(cmd, "reg") == 0)
+ queue = PTLRPC_NRS_QUEUE_REG;
+ else if (strcmp(cmd, "hp") == 0)
+ queue = PTLRPC_NRS_QUEUE_HP;
+ else
+ GOTO(out, rc = -EINVAL);
+
+default_queue:
+
+ if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc))
+ GOTO(out, rc = -ENODEV);
+ else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc))
+ queue = PTLRPC_NRS_QUEUE_REG;
+
+ /**
+ * Serialize NRS core lprocfs operations with policy registration/
+ * unregistration.
+ */
+ mutex_lock(&nrs_core.nrs_mutex);
+
+ rc = ptlrpc_nrs_policy_control(svc, queue, token, PTLRPC_NRS_CTL_START,
+ false, NULL);
+
+ mutex_unlock(&nrs_core.nrs_mutex);
+out:
+ if (cmd_copy)
+ OBD_FREE(cmd_copy, LPROCFS_NRS_WR_MAX_CMD);
+
+ RETURN(rc < 0 ? rc : count);
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_nrs);
+
+/** @} nrs */
+
+struct ptlrpc_srh_iterator {
+ int srhi_idx;
+ __u64 srhi_seq;
+ struct ptlrpc_request *srhi_req;
+};
+
+int
+ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_srh_iterator *srhi,
+ __u64 seq)
+{
+ struct list_head *e;
+ struct ptlrpc_request *req;
+
+ if (srhi->srhi_req != NULL &&
+ srhi->srhi_seq > svcpt->scp_hist_seq_culled &&
+ srhi->srhi_seq <= seq) {
+ /* If srhi_req was set previously, hasn't been culled and
+ * we're searching for a seq on or after it (i.e. more
+ * recent), search from it onwards.
+ * Since the service history is LRU (i.e. culled reqs will
+ * be near the head), we shouldn't have to do long
+ * re-scans */
+ LASSERTF(srhi->srhi_seq == srhi->srhi_req->rq_history_seq,
+ "%s:%d: seek seq "LPU64", request seq "LPU64"\n",
+ svcpt->scp_service->srv_name, svcpt->scp_cpt,
+ srhi->srhi_seq, srhi->srhi_req->rq_history_seq);
+ LASSERTF(!list_empty(&svcpt->scp_hist_reqs),
+ "%s:%d: seek offset "LPU64", request seq "LPU64", "
+ "last culled "LPU64"\n",
+ svcpt->scp_service->srv_name, svcpt->scp_cpt,
+ seq, srhi->srhi_seq, svcpt->scp_hist_seq_culled);
+ e = &srhi->srhi_req->rq_history_list;
+ } else {
+ /* search from start */
+ e = svcpt->scp_hist_reqs.next;
+ }
+
+ while (e != &svcpt->scp_hist_reqs) {
+ req = list_entry(e, struct ptlrpc_request, rq_history_list);
+
+ if (req->rq_history_seq >= seq) {
+ srhi->srhi_seq = req->rq_history_seq;
+ srhi->srhi_req = req;
+ return 0;
+ }
+ e = e->next;
+ }
+
+ return -ENOENT;
+}
+
+/*
+ * ptlrpc history sequence is used as "position" of seq_file, in some case,
+ * seq_read() will increase "position" to indicate reading the next
+ * element, however, low bits of history sequence are reserved for CPT id
+ * (check the details from comments before ptlrpc_req_add_history), which
+ * means seq_read() might change CPT id of history sequence and never
+ * finish reading of requests on a CPT. To make it work, we have to shift
+ * CPT id to high bits and timestamp to low bits, so seq_read() will only
+ * increase timestamp which can correctly indicate the next position.
+ */
+
+/* convert seq_file pos to cpt */
+#define PTLRPC_REQ_POS2CPT(svc, pos) \
+ ((svc)->srv_cpt_bits == 0 ? 0 : \
+ (__u64)(pos) >> (64 - (svc)->srv_cpt_bits))
+
+/* make up seq_file pos from cpt */
+#define PTLRPC_REQ_CPT2POS(svc, cpt) \
+ ((svc)->srv_cpt_bits == 0 ? 0 : \
+ (cpt) << (64 - (svc)->srv_cpt_bits))
+
+/* convert sequence to position */
+#define PTLRPC_REQ_SEQ2POS(svc, seq) \
+ ((svc)->srv_cpt_bits == 0 ? (seq) : \
+ ((seq) >> (svc)->srv_cpt_bits) | \
+ ((seq) << (64 - (svc)->srv_cpt_bits)))
+
+/* convert position to sequence */
+#define PTLRPC_REQ_POS2SEQ(svc, pos) \
+ ((svc)->srv_cpt_bits == 0 ? (pos) : \
+ ((__u64)(pos) << (svc)->srv_cpt_bits) | \
+ ((__u64)(pos) >> (64 - (svc)->srv_cpt_bits)))
+
+static void *
+ptlrpc_lprocfs_svc_req_history_start(struct seq_file *s, loff_t *pos)
+{
+ struct ptlrpc_service *svc = s->private;
+ struct ptlrpc_service_part *svcpt;
+ struct ptlrpc_srh_iterator *srhi;
+ unsigned int cpt;
+ int rc;
+ int i;
+
+ if (sizeof(loff_t) != sizeof(__u64)) { /* can't support */
+ CWARN("Failed to read request history because size of loff_t "
+ "%d can't match size of u64\n", (int)sizeof(loff_t));
+ return NULL;
+ }
+
+ OBD_ALLOC(srhi, sizeof(*srhi));
+ if (srhi == NULL)
+ return NULL;
+
+ srhi->srhi_seq = 0;
+ srhi->srhi_req = NULL;
+
+ cpt = PTLRPC_REQ_POS2CPT(svc, *pos);
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (i < cpt) /* skip */
+ continue;
+ if (i > cpt) /* make up the lowest position for this CPT */
+ *pos = PTLRPC_REQ_CPT2POS(svc, i);
+
+ spin_lock(&svcpt->scp_lock);
+ rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi,
+ PTLRPC_REQ_POS2SEQ(svc, *pos));
+ spin_unlock(&svcpt->scp_lock);
+ if (rc == 0) {
+ *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq);
+ srhi->srhi_idx = i;
+ return srhi;
+ }
+ }
+
+ OBD_FREE(srhi, sizeof(*srhi));
+ return NULL;
+}
+
+static void
+ptlrpc_lprocfs_svc_req_history_stop(struct seq_file *s, void *iter)
+{
+ struct ptlrpc_srh_iterator *srhi = iter;
+
+ if (srhi != NULL)
+ OBD_FREE(srhi, sizeof(*srhi));
+}
+
+static void *
+ptlrpc_lprocfs_svc_req_history_next(struct seq_file *s,
+ void *iter, loff_t *pos)
+{
+ struct ptlrpc_service *svc = s->private;
+ struct ptlrpc_srh_iterator *srhi = iter;
+ struct ptlrpc_service_part *svcpt;
+ __u64 seq;
+ int rc;
+ int i;
+
+ for (i = srhi->srhi_idx; i < svc->srv_ncpts; i++) {
+ svcpt = svc->srv_parts[i];
+
+ if (i > srhi->srhi_idx) { /* reset iterator for a new CPT */
+ srhi->srhi_req = NULL;
+ seq = srhi->srhi_seq = 0;
+ } else { /* the next sequence */
+ seq = srhi->srhi_seq + (1 << svc->srv_cpt_bits);
+ }
+
+ spin_lock(&svcpt->scp_lock);
+ rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, seq);
+ spin_unlock(&svcpt->scp_lock);
+ if (rc == 0) {
+ *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq);
+ srhi->srhi_idx = i;
+ return srhi;
+ }
+ }
+
+ OBD_FREE(srhi, sizeof(*srhi));
+ return NULL;
+}
+
+/* common ost/mdt so_req_printer */
+void target_print_req(void *seq_file, struct ptlrpc_request *req)
+{
+ /* Called holding srv_lock with irqs disabled.
+ * Print specific req contents and a newline.
+ * CAVEAT EMPTOR: check request message length before printing!!!
+ * You might have received any old crap so you must be just as
+ * careful here as the service's request parser!!! */
+ struct seq_file *sf = seq_file;
+
+ switch (req->rq_phase) {
+ case RQ_PHASE_NEW:
+ /* still awaiting a service thread's attention, or rejected
+ * because the generic request message didn't unpack */
+ seq_printf(sf, "<not swabbed>\n");
+ break;
+ case RQ_PHASE_INTERPRET:
+ /* being handled, so basic msg swabbed, and opc is valid
+ * but racing with mds_handle() */
+ case RQ_PHASE_COMPLETE:
+ /* been handled by mds_handle() reply state possibly still
+ * volatile */
+ seq_printf(sf, "opc %d\n", lustre_msg_get_opc(req->rq_reqmsg));
+ break;
+ default:
+ DEBUG_REQ(D_ERROR, req, "bad phase %d", req->rq_phase);
+ }
+}
+EXPORT_SYMBOL(target_print_req);
+
+static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter)
+{
+ struct ptlrpc_service *svc = s->private;
+ struct ptlrpc_srh_iterator *srhi = iter;
+ struct ptlrpc_service_part *svcpt;
+ struct ptlrpc_request *req;
+ int rc;
+
+ LASSERT(srhi->srhi_idx < svc->srv_ncpts);
+
+ svcpt = svc->srv_parts[srhi->srhi_idx];
+
+ spin_lock(&svcpt->scp_lock);
+
+ rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, srhi->srhi_seq);
+
+ if (rc == 0) {
+ req = srhi->srhi_req;
+
+ /* Print common req fields.
+ * CAVEAT EMPTOR: we're racing with the service handler
+ * here. The request could contain any old crap, so you
+ * must be just as careful as the service's request
+ * parser. Currently I only print stuff here I know is OK
+ * to look at coz it was set up in request_in_callback()!!! */
+ seq_printf(s, LPD64":%s:%s:x"LPU64":%d:%s:%ld:%lds(%+lds) ",
+ req->rq_history_seq, libcfs_nid2str(req->rq_self),
+ libcfs_id2str(req->rq_peer), req->rq_xid,
+ req->rq_reqlen, ptlrpc_rqphase2str(req),
+ req->rq_arrival_time.tv_sec,
+ req->rq_sent - req->rq_arrival_time.tv_sec,
+ req->rq_sent - req->rq_deadline);
+ if (svc->srv_ops.so_req_printer == NULL)
+ seq_printf(s, "\n");
+ else
+ svc->srv_ops.so_req_printer(s, srhi->srhi_req);
+ }
+
+ spin_unlock(&svcpt->scp_lock);
+ return rc;
+}
+
+static int
+ptlrpc_lprocfs_svc_req_history_open(struct inode *inode, struct file *file)
+{
+ static struct seq_operations sops = {
+ .start = ptlrpc_lprocfs_svc_req_history_start,
+ .stop = ptlrpc_lprocfs_svc_req_history_stop,
+ .next = ptlrpc_lprocfs_svc_req_history_next,
+ .show = ptlrpc_lprocfs_svc_req_history_show,
+ };
+ struct seq_file *seqf;
+ int rc;
+
+ rc = seq_open(file, &sops);
+ if (rc)
+ return rc;
+
+ seqf = file->private_data;
+ seqf->private = PDE_DATA(inode);
+ return 0;
+}
+
+/* See also lprocfs_rd_timeouts */
+static int ptlrpc_lprocfs_timeouts_seq_show(struct seq_file *m, void *n)
+{
+ struct ptlrpc_service *svc = m->private;
+ struct ptlrpc_service_part *svcpt;
+ struct dhms ts;
+ time_t worstt;
+ unsigned int cur;
+ unsigned int worst;
+ int i;
+
+ if (AT_OFF) {
+ seq_printf(m, "adaptive timeouts off, using obd_timeout %u\n",
+ obd_timeout);
+ return 0;
+ }
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ cur = at_get(&svcpt->scp_at_estimate);
+ worst = svcpt->scp_at_estimate.at_worst_ever;
+ worstt = svcpt->scp_at_estimate.at_worst_time;
+ s2dhms(&ts, cfs_time_current_sec() - worstt);
+
+ seq_printf(m, "%10s : cur %3u worst %3u (at %ld, "
+ DHMS_FMT" ago) ", "service",
+ cur, worst, worstt, DHMS_VARS(&ts));
+
+ lprocfs_at_hist_helper(m, &svcpt->scp_at_estimate);
+ }
+
+ return 0;
+}
+LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_timeouts);
+
+static int ptlrpc_lprocfs_hp_ratio_seq_show(struct seq_file *m, void *v)
+{
+ struct ptlrpc_service *svc = m->private;
+ return seq_printf(m, "%d", svc->srv_hpreq_ratio);
+}
+
+static ssize_t ptlrpc_lprocfs_hp_ratio_seq_write(struct file *file,
+ const char *buffer,
+ size_t count,
+ loff_t *off)
+{
+ struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+ int rc;
+ int val;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc < 0)
+ return rc;
+
+ if (val < 0)
+ return -ERANGE;
+
+ spin_lock(&svc->srv_lock);
+ svc->srv_hpreq_ratio = val;
+ spin_unlock(&svc->srv_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_hp_ratio);
+
+void ptlrpc_lprocfs_register_service(struct proc_dir_entry *entry,
+ struct ptlrpc_service *svc)
+{
+ struct lprocfs_vars lproc_vars[] = {
+ {.name = "high_priority_ratio",
+ .fops = &ptlrpc_lprocfs_hp_ratio_fops,
+ .data = svc},
+ {.name = "req_buffer_history_len",
+ .fops = &ptlrpc_lprocfs_req_history_len_fops,
+ .data = svc},
+ {.name = "req_buffer_history_max",
+ .fops = &ptlrpc_lprocfs_req_history_max_fops,
+ .data = svc},
+ {.name = "threads_min",
+ .fops = &ptlrpc_lprocfs_threads_min_fops,
+ .data = svc},
+ {.name = "threads_max",
+ .fops = &ptlrpc_lprocfs_threads_max_fops,
+ .data = svc},
+ {.name = "threads_started",
+ .fops = &ptlrpc_lprocfs_threads_started_fops,
+ .data = svc},
+ {.name = "timeouts",
+ .fops = &ptlrpc_lprocfs_timeouts_fops,
+ .data = svc},
+ {.name = "nrs_policies",
+ .fops = &ptlrpc_lprocfs_nrs_fops,
+ .data = svc},
+ {NULL}
+ };
+ static struct file_operations req_history_fops = {
+ .owner = THIS_MODULE,
+ .open = ptlrpc_lprocfs_svc_req_history_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = lprocfs_seq_release,
+ };
+
+ int rc;
+
+ ptlrpc_lprocfs_register(entry, svc->srv_name,
+ "stats", &svc->srv_procroot,
+ &svc->srv_stats);
+
+ if (svc->srv_procroot == NULL)
+ return;
+
+ lprocfs_add_vars(svc->srv_procroot, lproc_vars, NULL);
+
+ rc = lprocfs_seq_create(svc->srv_procroot, "req_history",
+ 0400, &req_history_fops, svc);
+ if (rc)
+ CWARN("Error adding the req_history file\n");
+}
+
+void ptlrpc_lprocfs_register_obd(struct obd_device *obddev)
+{
+ ptlrpc_lprocfs_register(obddev->obd_proc_entry, NULL, "stats",
+ &obddev->obd_svc_procroot,
+ &obddev->obd_svc_stats);
+}
+EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd);
+
+void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount)
+{
+ struct lprocfs_stats *svc_stats;
+ __u32 op = lustre_msg_get_opc(req->rq_reqmsg);
+ int opc = opcode_offset(op);
+
+ svc_stats = req->rq_import->imp_obd->obd_svc_stats;
+ if (svc_stats == NULL || opc <= 0)
+ return;
+ LASSERT(opc < LUSTRE_MAX_OPCODES);
+ if (!(op == LDLM_ENQUEUE || op == MDS_REINT))
+ lprocfs_counter_add(svc_stats, opc + EXTRA_MAX_OPCODES, amount);
+}
+
+void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes)
+{
+ struct lprocfs_stats *svc_stats;
+ int idx;
+
+ if (!req->rq_import)
+ return;
+ svc_stats = req->rq_import->imp_obd->obd_svc_stats;
+ if (!svc_stats)
+ return;
+ idx = lustre_msg_get_opc(req->rq_reqmsg);
+ switch (idx) {
+ case OST_READ:
+ idx = BRW_READ_BYTES + PTLRPC_LAST_CNTR;
+ break;
+ case OST_WRITE:
+ idx = BRW_WRITE_BYTES + PTLRPC_LAST_CNTR;
+ break;
+ default:
+ LASSERTF(0, "unsupported opcode %u\n", idx);
+ break;
+ }
+
+ lprocfs_counter_add(svc_stats, idx, bytes);
+}
+
+EXPORT_SYMBOL(ptlrpc_lprocfs_brw);
+
+void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc)
+{
+ if (svc->srv_procroot != NULL)
+ lprocfs_remove(&svc->srv_procroot);
+
+ if (svc->srv_stats)
+ lprocfs_free_stats(&svc->srv_stats);
+}
+
+void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd)
+{
+ if (obd->obd_svc_procroot)
+ lprocfs_remove(&obd->obd_svc_procroot);
+
+ if (obd->obd_svc_stats)
+ lprocfs_free_stats(&obd->obd_svc_stats);
+}
+EXPORT_SYMBOL(ptlrpc_lprocfs_unregister_obd);
+
+
+#define BUFLEN (UUID_MAX + 5)
+
+int lprocfs_wr_evict_client(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ char *kbuf;
+ char *tmpbuf;
+
+ OBD_ALLOC(kbuf, BUFLEN);
+ if (kbuf == NULL)
+ return -ENOMEM;
+
+ /*
+ * OBD_ALLOC() will zero kbuf, but we only copy BUFLEN - 1
+ * bytes into kbuf, to ensure that the string is NUL-terminated.
+ * UUID_MAX should include a trailing NUL already.
+ */
+ if (copy_from_user(kbuf, buffer,
+ min_t(unsigned long, BUFLEN - 1, count))) {
+ count = -EFAULT;
+ goto out;
+ }
+ tmpbuf = cfs_firststr(kbuf, min_t(unsigned long, BUFLEN - 1, count));
+ /* Kludge code(deadlock situation): the lprocfs lock has been held
+ * since the client is evicted by writting client's
+ * uuid/nid to procfs "evict_client" entry. However,
+ * obd_export_evict_by_uuid() will call lprocfs_remove() to destroy
+ * the proc entries under the being destroyed export{}, so I have
+ * to drop the lock at first here.
+ * - jay, jxiong@clusterfs.com */
+ class_incref(obd, __FUNCTION__, current);
+
+ if (strncmp(tmpbuf, "nid:", 4) == 0)
+ obd_export_evict_by_nid(obd, tmpbuf + 4);
+ else if (strncmp(tmpbuf, "uuid:", 5) == 0)
+ obd_export_evict_by_uuid(obd, tmpbuf + 5);
+ else
+ obd_export_evict_by_uuid(obd, tmpbuf);
+
+ class_decref(obd, __FUNCTION__, current);
+
+out:
+ OBD_FREE(kbuf, BUFLEN);
+ return count;
+}
+EXPORT_SYMBOL(lprocfs_wr_evict_client);
+
+#undef BUFLEN
+
+int lprocfs_wr_ping(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ struct ptlrpc_request *req;
+ int rc;
+ ENTRY;
+
+ LPROCFS_CLIMP_CHECK(obd);
+ req = ptlrpc_prep_ping(obd->u.cli.cl_import);
+ LPROCFS_CLIMP_EXIT(obd);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ req->rq_send_state = LUSTRE_IMP_FULL;
+
+ rc = ptlrpc_queue_wait(req);
+
+ ptlrpc_req_finished(req);
+ if (rc >= 0)
+ RETURN(count);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(lprocfs_wr_ping);
+
+/* Write the connection UUID to this file to attempt to connect to that node.
+ * The connection UUID is a node's primary NID. For example,
+ * "echo connection=192.168.0.1@tcp0::instance > .../import".
+ */
+int lprocfs_wr_import(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ struct obd_import *imp = obd->u.cli.cl_import;
+ char *kbuf = NULL;
+ char *uuid;
+ char *ptr;
+ int do_reconn = 1;
+ const char prefix[] = "connection=";
+ const int prefix_len = sizeof(prefix) - 1;
+
+ if (count > PAGE_CACHE_SIZE - 1 || count <= prefix_len)
+ return -EINVAL;
+
+ OBD_ALLOC(kbuf, count + 1);
+ if (kbuf == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(kbuf, buffer, count))
+ GOTO(out, count = -EFAULT);
+
+ kbuf[count] = 0;
+
+ /* only support connection=uuid::instance now */
+ if (strncmp(prefix, kbuf, prefix_len) != 0)
+ GOTO(out, count = -EINVAL);
+
+ uuid = kbuf + prefix_len;
+ ptr = strstr(uuid, "::");
+ if (ptr) {
+ __u32 inst;
+ char *endptr;
+
+ *ptr = 0;
+ do_reconn = 0;
+ ptr += strlen("::");
+ inst = simple_strtol(ptr, &endptr, 10);
+ if (*endptr) {
+ CERROR("config: wrong instance # %s\n", ptr);
+ } else if (inst != imp->imp_connect_data.ocd_instance) {
+ CDEBUG(D_INFO, "IR: %s is connecting to an obsoleted "
+ "target(%u/%u), reconnecting...\n",
+ imp->imp_obd->obd_name,
+ imp->imp_connect_data.ocd_instance, inst);
+ do_reconn = 1;
+ } else {
+ CDEBUG(D_INFO, "IR: %s has already been connecting to "
+ "new target(%u)\n",
+ imp->imp_obd->obd_name, inst);
+ }
+ }
+
+ if (do_reconn)
+ ptlrpc_recover_import(imp, uuid, 1);
+
+out:
+ OBD_FREE(kbuf, count + 1);
+ return count;
+}
+EXPORT_SYMBOL(lprocfs_wr_import);
+
+int lprocfs_rd_pinger_recov(struct seq_file *m, void *n)
+{
+ struct obd_device *obd = m->private;
+ struct obd_import *imp = obd->u.cli.cl_import;
+ int rc;
+
+ LPROCFS_CLIMP_CHECK(obd);
+ rc = seq_printf(m, "%d\n", !imp->imp_no_pinger_recover);
+ LPROCFS_CLIMP_EXIT(obd);
+
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_rd_pinger_recov);
+
+int lprocfs_wr_pinger_recov(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ struct client_obd *cli = &obd->u.cli;
+ struct obd_import *imp = cli->cl_import;
+ int rc, val;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc < 0)
+ return rc;
+
+ if (val != 0 && val != 1)
+ return -ERANGE;
+
+ LPROCFS_CLIMP_CHECK(obd);
+ spin_lock(&imp->imp_lock);
+ imp->imp_no_pinger_recover = !val;
+ spin_unlock(&imp->imp_lock);
+ LPROCFS_CLIMP_EXIT(obd);
+
+ return count;
+
+}
+EXPORT_SYMBOL(lprocfs_wr_pinger_recov);
+
+#endif /* LPROCFS */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
new file mode 100644
index 000000000000..de3f0db0ba47
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
@@ -0,0 +1,728 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include <obd_support.h>
+#include <lustre_net.h>
+#include <lustre_lib.h>
+#include <obd.h>
+#include <obd_class.h>
+#include "ptlrpc_internal.h"
+
+/**
+ * Helper function. Sends \a len bytes from \a base at offset \a offset
+ * over \a conn connection to portal \a portal.
+ * Returns 0 on success or error code.
+ */
+static int ptl_send_buf (lnet_handle_md_t *mdh, void *base, int len,
+ lnet_ack_req_t ack, struct ptlrpc_cb_id *cbid,
+ struct ptlrpc_connection *conn, int portal, __u64 xid,
+ unsigned int offset)
+{
+ int rc;
+ lnet_md_t md;
+ ENTRY;
+
+ LASSERT (portal != 0);
+ LASSERT (conn != NULL);
+ CDEBUG (D_INFO, "conn=%p id %s\n", conn, libcfs_id2str(conn->c_peer));
+ md.start = base;
+ md.length = len;
+ md.threshold = (ack == LNET_ACK_REQ) ? 2 : 1;
+ md.options = PTLRPC_MD_OPTIONS;
+ md.user_ptr = cbid;
+ md.eq_handle = ptlrpc_eq_h;
+
+ if (unlikely(ack == LNET_ACK_REQ &&
+ OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_ACK, OBD_FAIL_ONCE))){
+ /* don't ask for the ack to simulate failing client */
+ ack = LNET_NOACK_REQ;
+ }
+
+ rc = LNetMDBind (md, LNET_UNLINK, mdh);
+ if (unlikely(rc != 0)) {
+ CERROR ("LNetMDBind failed: %d\n", rc);
+ LASSERT (rc == -ENOMEM);
+ RETURN (-ENOMEM);
+ }
+
+ CDEBUG(D_NET, "Sending %d bytes to portal %d, xid "LPD64", offset %u\n",
+ len, portal, xid, offset);
+
+ rc = LNetPut (conn->c_self, *mdh, ack,
+ conn->c_peer, portal, xid, offset, 0);
+ if (unlikely(rc != 0)) {
+ int rc2;
+ /* We're going to get an UNLINK event when I unlink below,
+ * which will complete just like any other failed send, so
+ * I fall through and return success here! */
+ CERROR("LNetPut(%s, %d, "LPD64") failed: %d\n",
+ libcfs_id2str(conn->c_peer), portal, xid, rc);
+ rc2 = LNetMDUnlink(*mdh);
+ LASSERTF(rc2 == 0, "rc2 = %d\n", rc2);
+ }
+
+ RETURN (0);
+}
+
+static void mdunlink_iterate_helper(lnet_handle_md_t *bd_mds, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ LNetMDUnlink(bd_mds[i]);
+}
+
+
+/**
+ * Register bulk at the sender for later transfer.
+ * Returns 0 on success or error code.
+ */
+int ptlrpc_register_bulk(struct ptlrpc_request *req)
+{
+ struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+ lnet_process_id_t peer;
+ int rc = 0;
+ int rc2;
+ int posted_md;
+ int total_md;
+ __u64 xid;
+ lnet_handle_me_t me_h;
+ lnet_md_t md;
+ ENTRY;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET))
+ RETURN(0);
+
+ /* NB no locking required until desc is on the network */
+ LASSERT(desc->bd_nob > 0);
+ LASSERT(desc->bd_md_count == 0);
+ LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT);
+ LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
+ LASSERT(desc->bd_req != NULL);
+ LASSERT(desc->bd_type == BULK_PUT_SINK ||
+ desc->bd_type == BULK_GET_SOURCE);
+
+ /* cleanup the state of the bulk for it will be reused */
+ if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY)
+ desc->bd_nob_transferred = 0;
+ else
+ LASSERT(desc->bd_nob_transferred == 0);
+
+ desc->bd_failure = 0;
+
+ peer = desc->bd_import->imp_connection->c_peer;
+
+ LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback);
+ LASSERT(desc->bd_cbid.cbid_arg == desc);
+
+ /* An XID is only used for a single request from the client.
+ * For retried bulk transfers, a new XID will be allocated in
+ * in ptlrpc_check_set() if it needs to be resent, so it is not
+ * using the same RDMA match bits after an error.
+ *
+ * For multi-bulk RPCs, rq_xid is the last XID needed for bulks. The
+ * first bulk XID is power-of-two aligned before rq_xid. LU-1431 */
+ xid = req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1);
+ LASSERTF(!(desc->bd_registered &&
+ req->rq_send_state != LUSTRE_IMP_REPLAY) ||
+ xid != desc->bd_last_xid,
+ "registered: %d rq_xid: "LPU64" bd_last_xid: "LPU64"\n",
+ desc->bd_registered, xid, desc->bd_last_xid);
+
+ total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV;
+ desc->bd_registered = 1;
+ desc->bd_last_xid = xid;
+ desc->bd_md_count = total_md;
+ md.user_ptr = &desc->bd_cbid;
+ md.eq_handle = ptlrpc_eq_h;
+ md.threshold = 1; /* PUT or GET */
+
+ for (posted_md = 0; posted_md < total_md; posted_md++, xid++) {
+ md.options = PTLRPC_MD_OPTIONS |
+ ((desc->bd_type == BULK_GET_SOURCE) ?
+ LNET_MD_OP_GET : LNET_MD_OP_PUT);
+ ptlrpc_fill_bulk_md(&md, desc, posted_md);
+
+ rc = LNetMEAttach(desc->bd_portal, peer, xid, 0,
+ LNET_UNLINK, LNET_INS_AFTER, &me_h);
+ if (rc != 0) {
+ CERROR("%s: LNetMEAttach failed x"LPU64"/%d: rc = %d\n",
+ desc->bd_export->exp_obd->obd_name, xid,
+ posted_md, rc);
+ break;
+ }
+
+ /* About to let the network at it... */
+ rc = LNetMDAttach(me_h, md, LNET_UNLINK,
+ &desc->bd_mds[posted_md]);
+ if (rc != 0) {
+ CERROR("%s: LNetMDAttach failed x"LPU64"/%d: rc = %d\n",
+ desc->bd_export->exp_obd->obd_name, xid,
+ posted_md, rc);
+ rc2 = LNetMEUnlink(me_h);
+ LASSERT(rc2 == 0);
+ break;
+ }
+ }
+
+ if (rc != 0) {
+ LASSERT(rc == -ENOMEM);
+ spin_lock(&desc->bd_lock);
+ desc->bd_md_count -= total_md - posted_md;
+ spin_unlock(&desc->bd_lock);
+ LASSERT(desc->bd_md_count >= 0);
+ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
+ req->rq_status = -ENOMEM;
+ RETURN(-ENOMEM);
+ }
+
+ /* Set rq_xid to matchbits of the final bulk so that server can
+ * infer the number of bulks that were prepared */
+ req->rq_xid = --xid;
+ LASSERTF(desc->bd_last_xid == (req->rq_xid & PTLRPC_BULK_OPS_MASK),
+ "bd_last_xid = x"LPU64", rq_xid = x"LPU64"\n",
+ desc->bd_last_xid, req->rq_xid);
+
+ spin_lock(&desc->bd_lock);
+ /* Holler if peer manages to touch buffers before he knows the xid */
+ if (desc->bd_md_count != total_md)
+ CWARN("%s: Peer %s touched %d buffers while I registered\n",
+ desc->bd_export->exp_obd->obd_name, libcfs_id2str(peer),
+ total_md - desc->bd_md_count);
+ spin_unlock(&desc->bd_lock);
+
+ CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, "
+ "xid x"LPX64"-"LPX64", portal %u\n", desc->bd_md_count,
+ desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink",
+ desc->bd_iov_count, desc->bd_nob,
+ desc->bd_last_xid, req->rq_xid, desc->bd_portal);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(ptlrpc_register_bulk);
+
+/**
+ * Disconnect a bulk desc from the network. Idempotent. Not
+ * thread-safe (i.e. only interlocks with completion callback).
+ * Returns 1 on success or 0 if network unregistration failed for whatever
+ * reason.
+ */
+int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
+{
+ struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+ wait_queue_head_t *wq;
+ struct l_wait_info lwi;
+ int rc;
+ ENTRY;
+
+ LASSERT(!in_interrupt()); /* might sleep */
+
+ /* Let's setup deadline for reply unlink. */
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
+ async && req->rq_bulk_deadline == 0)
+ req->rq_bulk_deadline = cfs_time_current_sec() + LONG_UNLINK;
+
+ if (ptlrpc_client_bulk_active(req) == 0) /* completed or */
+ RETURN(1); /* never registered */
+
+ LASSERT(desc->bd_req == req); /* bd_req NULL until registered */
+
+ /* the unlink ensures the callback happens ASAP and is the last
+ * one. If it fails, it must be because completion just happened,
+ * but we must still l_wait_event() in this case to give liblustre
+ * a chance to run client_bulk_callback() */
+ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
+
+ if (ptlrpc_client_bulk_active(req) == 0) /* completed or */
+ RETURN(1); /* never registered */
+
+ /* Move to "Unregistering" phase as bulk was not unlinked yet. */
+ ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING);
+
+ /* Do not wait for unlink to finish. */
+ if (async)
+ RETURN(0);
+
+ if (req->rq_set != NULL)
+ wq = &req->rq_set->set_waitq;
+ else
+ wq = &req->rq_reply_waitq;
+
+ for (;;) {
+ /* Network access will complete in finite time but the HUGE
+ * timeout lets us CWARN for visibility of sluggish NALs */
+ lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK),
+ cfs_time_seconds(1), NULL, NULL);
+ rc = l_wait_event(*wq, !ptlrpc_client_bulk_active(req), &lwi);
+ if (rc == 0) {
+ ptlrpc_rqphase_move(req, req->rq_next_phase);
+ RETURN(1);
+ }
+
+ LASSERT(rc == -ETIMEDOUT);
+ DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p",
+ desc);
+ }
+ RETURN(0);
+}
+EXPORT_SYMBOL(ptlrpc_unregister_bulk);
+
+static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
+{
+ struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ int service_time = max_t(int, cfs_time_current_sec() -
+ req->rq_arrival_time.tv_sec, 1);
+
+ if (!(flags & PTLRPC_REPLY_EARLY) &&
+ (req->rq_type != PTL_RPC_MSG_ERR) &&
+ (req->rq_reqmsg != NULL) &&
+ !(lustre_msg_get_flags(req->rq_reqmsg) &
+ (MSG_RESENT | MSG_REPLAY |
+ MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))) {
+ /* early replies, errors and recovery requests don't count
+ * toward our service time estimate */
+ int oldse = at_measured(&svcpt->scp_at_estimate, service_time);
+
+ if (oldse != 0) {
+ DEBUG_REQ(D_ADAPTTO, req,
+ "svc %s changed estimate from %d to %d",
+ svc->srv_name, oldse,
+ at_get(&svcpt->scp_at_estimate));
+ }
+ }
+ /* Report actual service time for client latency calc */
+ lustre_msg_set_service_time(req->rq_repmsg, service_time);
+ /* Report service time estimate for future client reqs, but report 0
+ * (to be ignored by client) if it's a error reply during recovery.
+ * (bz15815) */
+ if (req->rq_type == PTL_RPC_MSG_ERR &&
+ (req->rq_export == NULL || req->rq_export->exp_obd->obd_recovering))
+ lustre_msg_set_timeout(req->rq_repmsg, 0);
+ else
+ lustre_msg_set_timeout(req->rq_repmsg,
+ at_get(&svcpt->scp_at_estimate));
+
+ if (req->rq_reqmsg &&
+ !(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
+ CDEBUG(D_ADAPTTO, "No early reply support: flags=%#x "
+ "req_flags=%#x magic=%d:%x/%x len=%d\n",
+ flags, lustre_msg_get_flags(req->rq_reqmsg),
+ lustre_msg_is_v1(req->rq_reqmsg),
+ lustre_msg_get_magic(req->rq_reqmsg),
+ lustre_msg_get_magic(req->rq_repmsg), req->rq_replen);
+ }
+}
+
+/**
+ * Send request reply from request \a req reply buffer.
+ * \a flags defines reply types
+ * Returns 0 on sucess or error code
+ */
+int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct ptlrpc_connection *conn;
+ int rc;
+
+ /* We must already have a reply buffer (only ptlrpc_error() may be
+ * called without one). The reply generated by sptlrpc layer (e.g.
+ * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must
+ * have a request buffer which is either the actual (swabbed) incoming
+ * request, or a saved copy if this is a req saved in
+ * target_queue_final_reply().
+ */
+ LASSERT (req->rq_no_reply == 0);
+ LASSERT (req->rq_reqbuf != NULL);
+ LASSERT (rs != NULL);
+ LASSERT ((flags & PTLRPC_REPLY_MAYBE_DIFFICULT) || !rs->rs_difficult);
+ LASSERT (req->rq_repmsg != NULL);
+ LASSERT (req->rq_repmsg == rs->rs_msg);
+ LASSERT (rs->rs_cb_id.cbid_fn == reply_out_callback);
+ LASSERT (rs->rs_cb_id.cbid_arg == rs);
+
+ /* There may be no rq_export during failover */
+
+ if (unlikely(req->rq_export && req->rq_export->exp_obd &&
+ req->rq_export->exp_obd->obd_fail)) {
+ /* Failed obd's only send ENODEV */
+ req->rq_type = PTL_RPC_MSG_ERR;
+ req->rq_status = -ENODEV;
+ CDEBUG(D_HA, "sending ENODEV from failed obd %d\n",
+ req->rq_export->exp_obd->obd_minor);
+ }
+
+ /* In order to keep interoprability with the client (< 2.3) which
+ * doesn't have pb_jobid in ptlrpc_body, We have to shrink the
+ * ptlrpc_body in reply buffer to ptlrpc_body_v2, otherwise, the
+ * reply buffer on client will be overflow.
+ *
+ * XXX Remove this whenver we drop the interoprability with such client.
+ */
+ req->rq_replen = lustre_shrink_msg(req->rq_repmsg, 0,
+ sizeof(struct ptlrpc_body_v2), 1);
+
+ if (req->rq_type != PTL_RPC_MSG_ERR)
+ req->rq_type = PTL_RPC_MSG_REPLY;
+
+ lustre_msg_set_type(req->rq_repmsg, req->rq_type);
+ lustre_msg_set_status(req->rq_repmsg, req->rq_status);
+ lustre_msg_set_opc(req->rq_repmsg,
+ req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0);
+
+ target_pack_pool_reply(req);
+
+ ptlrpc_at_set_reply(req, flags);
+
+ if (req->rq_export == NULL || req->rq_export->exp_connection == NULL)
+ conn = ptlrpc_connection_get(req->rq_peer, req->rq_self, NULL);
+ else
+ conn = ptlrpc_connection_addref(req->rq_export->exp_connection);
+
+ if (unlikely(conn == NULL)) {
+ CERROR("not replying on NULL connection\n"); /* bug 9635 */
+ return -ENOTCONN;
+ }
+ ptlrpc_rs_addref(rs); /* +1 ref for the network */
+
+ rc = sptlrpc_svc_wrap_reply(req);
+ if (unlikely(rc))
+ goto out;
+
+ req->rq_sent = cfs_time_current_sec();
+
+ rc = ptl_send_buf (&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len,
+ (rs->rs_difficult && !rs->rs_no_ack) ?
+ LNET_ACK_REQ : LNET_NOACK_REQ,
+ &rs->rs_cb_id, conn,
+ ptlrpc_req2svc(req)->srv_rep_portal,
+ req->rq_xid, req->rq_reply_off);
+out:
+ if (unlikely(rc != 0))
+ ptlrpc_req_drop_rs(req);
+ ptlrpc_connection_put(conn);
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_send_reply);
+
+int ptlrpc_reply (struct ptlrpc_request *req)
+{
+ if (req->rq_no_reply)
+ return 0;
+ else
+ return (ptlrpc_send_reply(req, 0));
+}
+EXPORT_SYMBOL(ptlrpc_reply);
+
+/**
+ * For request \a req send an error reply back. Create empty
+ * reply buffers if necessary.
+ */
+int ptlrpc_send_error(struct ptlrpc_request *req, int may_be_difficult)
+{
+ int rc;
+ ENTRY;
+
+ if (req->rq_no_reply)
+ RETURN(0);
+
+ if (!req->rq_repmsg) {
+ rc = lustre_pack_reply(req, 1, NULL, NULL);
+ if (rc)
+ RETURN(rc);
+ }
+
+ if (req->rq_status != -ENOSPC && req->rq_status != -EACCES &&
+ req->rq_status != -EPERM && req->rq_status != -ENOENT &&
+ req->rq_status != -EINPROGRESS && req->rq_status != -EDQUOT)
+ req->rq_type = PTL_RPC_MSG_ERR;
+
+ rc = ptlrpc_send_reply(req, may_be_difficult);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ptlrpc_send_error);
+
+int ptlrpc_error(struct ptlrpc_request *req)
+{
+ return ptlrpc_send_error(req, 0);
+}
+EXPORT_SYMBOL(ptlrpc_error);
+
+/**
+ * Send request \a request.
+ * if \a noreply is set, don't expect any reply back and don't set up
+ * reply buffers.
+ * Returns 0 on success or error code.
+ */
+int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
+{
+ int rc;
+ int rc2;
+ int mpflag = 0;
+ struct ptlrpc_connection *connection;
+ lnet_handle_me_t reply_me_h;
+ lnet_md_t reply_md;
+ struct obd_device *obd = request->rq_import->imp_obd;
+ ENTRY;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
+ RETURN(0);
+
+ LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST);
+ LASSERT(request->rq_wait_ctx == 0);
+
+ /* If this is a re-transmit, we're required to have disengaged
+ * cleanly from the previous attempt */
+ LASSERT(!request->rq_receiving_reply);
+
+ if (request->rq_import->imp_obd &&
+ request->rq_import->imp_obd->obd_fail) {
+ CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
+ request->rq_import->imp_obd->obd_name);
+ /* this prevents us from waiting in ptlrpc_queue_wait */
+ request->rq_err = 1;
+ request->rq_status = -ENODEV;
+ RETURN(-ENODEV);
+ }
+
+ connection = request->rq_import->imp_connection;
+
+ lustre_msg_set_handle(request->rq_reqmsg,
+ &request->rq_import->imp_remote_handle);
+ lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST);
+ lustre_msg_set_conn_cnt(request->rq_reqmsg,
+ request->rq_import->imp_conn_cnt);
+ lustre_msghdr_set_flags(request->rq_reqmsg,
+ request->rq_import->imp_msghdr_flags);
+
+ if (request->rq_resend)
+ lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
+
+ if (request->rq_memalloc)
+ mpflag = cfs_memory_pressure_get_and_set();
+
+ rc = sptlrpc_cli_wrap_request(request);
+ if (rc)
+ GOTO(out, rc);
+
+ /* bulk register should be done after wrap_request() */
+ if (request->rq_bulk != NULL) {
+ rc = ptlrpc_register_bulk (request);
+ if (rc != 0)
+ GOTO(out, rc);
+ }
+
+ if (!noreply) {
+ LASSERT (request->rq_replen != 0);
+ if (request->rq_repbuf == NULL) {
+ LASSERT(request->rq_repdata == NULL);
+ LASSERT(request->rq_repmsg == NULL);
+ rc = sptlrpc_cli_alloc_repbuf(request,
+ request->rq_replen);
+ if (rc) {
+ /* this prevents us from looping in
+ * ptlrpc_queue_wait */
+ request->rq_err = 1;
+ request->rq_status = rc;
+ GOTO(cleanup_bulk, rc);
+ }
+ } else {
+ request->rq_repdata = NULL;
+ request->rq_repmsg = NULL;
+ }
+
+ rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/
+ connection->c_peer, request->rq_xid, 0,
+ LNET_UNLINK, LNET_INS_AFTER, &reply_me_h);
+ if (rc != 0) {
+ CERROR("LNetMEAttach failed: %d\n", rc);
+ LASSERT (rc == -ENOMEM);
+ GOTO(cleanup_bulk, rc = -ENOMEM);
+ }
+ }
+
+ spin_lock(&request->rq_lock);
+ /* If the MD attach succeeds, there _will_ be a reply_in callback */
+ request->rq_receiving_reply = !noreply;
+ /* We are responsible for unlinking the reply buffer */
+ request->rq_must_unlink = !noreply;
+ /* Clear any flags that may be present from previous sends. */
+ request->rq_replied = 0;
+ request->rq_err = 0;
+ request->rq_timedout = 0;
+ request->rq_net_err = 0;
+ request->rq_resend = 0;
+ request->rq_restart = 0;
+ request->rq_reply_truncate = 0;
+ spin_unlock(&request->rq_lock);
+
+ if (!noreply) {
+ reply_md.start = request->rq_repbuf;
+ reply_md.length = request->rq_repbuf_len;
+ /* Allow multiple early replies */
+ reply_md.threshold = LNET_MD_THRESH_INF;
+ /* Manage remote for early replies */
+ reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT |
+ LNET_MD_MANAGE_REMOTE |
+ LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */;
+ reply_md.user_ptr = &request->rq_reply_cbid;
+ reply_md.eq_handle = ptlrpc_eq_h;
+
+ /* We must see the unlink callback to unset rq_must_unlink,
+ so we can't auto-unlink */
+ rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
+ &request->rq_reply_md_h);
+ if (rc != 0) {
+ CERROR("LNetMDAttach failed: %d\n", rc);
+ LASSERT (rc == -ENOMEM);
+ spin_lock(&request->rq_lock);
+ /* ...but the MD attach didn't succeed... */
+ request->rq_receiving_reply = 0;
+ spin_unlock(&request->rq_lock);
+ GOTO(cleanup_me, rc = -ENOMEM);
+ }
+
+ CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64
+ ", portal %u\n",
+ request->rq_repbuf_len, request->rq_xid,
+ request->rq_reply_portal);
+ }
+
+ /* add references on request for request_out_callback */
+ ptlrpc_request_addref(request);
+ if (obd->obd_svc_stats != NULL)
+ lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR,
+ atomic_read(&request->rq_import->imp_inflight));
+
+ OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
+
+ do_gettimeofday(&request->rq_arrival_time);
+ request->rq_sent = cfs_time_current_sec();
+ /* We give the server rq_timeout secs to process the req, and
+ add the network latency for our local timeout. */
+ request->rq_deadline = request->rq_sent + request->rq_timeout +
+ ptlrpc_at_get_net_latency(request);
+
+ ptlrpc_pinger_sending_on_import(request->rq_import);
+
+ DEBUG_REQ(D_INFO, request, "send flg=%x",
+ lustre_msg_get_flags(request->rq_reqmsg));
+ rc = ptl_send_buf(&request->rq_req_md_h,
+ request->rq_reqbuf, request->rq_reqdata_len,
+ LNET_NOACK_REQ, &request->rq_req_cbid,
+ connection,
+ request->rq_request_portal,
+ request->rq_xid, 0);
+ if (rc == 0)
+ GOTO(out, rc);
+
+ ptlrpc_req_finished(request);
+ if (noreply)
+ GOTO(out, rc);
+
+ cleanup_me:
+ /* MEUnlink is safe; the PUT didn't even get off the ground, and
+ * nobody apart from the PUT's target has the right nid+XID to
+ * access the reply buffer. */
+ rc2 = LNetMEUnlink(reply_me_h);
+ LASSERT (rc2 == 0);
+ /* UNLINKED callback called synchronously */
+ LASSERT(!request->rq_receiving_reply);
+
+ cleanup_bulk:
+ /* We do sync unlink here as there was no real transfer here so
+ * the chance to have long unlink to sluggish net is smaller here. */
+ ptlrpc_unregister_bulk(request, 0);
+ out:
+ if (request->rq_memalloc)
+ cfs_memory_pressure_restore(mpflag);
+ return rc;
+}
+EXPORT_SYMBOL(ptl_send_rpc);
+
+/**
+ * Register request buffer descriptor for request receiving.
+ */
+int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
+{
+ struct ptlrpc_service *service = rqbd->rqbd_svcpt->scp_service;
+ static lnet_process_id_t match_id = {LNET_NID_ANY, LNET_PID_ANY};
+ int rc;
+ lnet_md_t md;
+ lnet_handle_me_t me_h;
+
+ CDEBUG(D_NET, "LNetMEAttach: portal %d\n",
+ service->srv_req_portal);
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_RQBD))
+ return (-ENOMEM);
+
+ /* NB: CPT affinity service should use new LNet flag LNET_INS_LOCAL,
+ * which means buffer can only be attached on local CPT, and LND
+ * threads can find it by grabbing a local lock */
+ rc = LNetMEAttach(service->srv_req_portal,
+ match_id, 0, ~0, LNET_UNLINK,
+ rqbd->rqbd_svcpt->scp_cpt >= 0 ?
+ LNET_INS_LOCAL : LNET_INS_AFTER, &me_h);
+ if (rc != 0) {
+ CERROR("LNetMEAttach failed: %d\n", rc);
+ return (-ENOMEM);
+ }
+
+ LASSERT(rqbd->rqbd_refcount == 0);
+ rqbd->rqbd_refcount = 1;
+
+ md.start = rqbd->rqbd_buffer;
+ md.length = service->srv_buf_size;
+ md.max_size = service->srv_max_req_size;
+ md.threshold = LNET_MD_THRESH_INF;
+ md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MAX_SIZE;
+ md.user_ptr = &rqbd->rqbd_cbid;
+ md.eq_handle = ptlrpc_eq_h;
+
+ rc = LNetMDAttach(me_h, md, LNET_UNLINK, &rqbd->rqbd_md_h);
+ if (rc == 0)
+ return (0);
+
+ CERROR("LNetMDAttach failed: %d; \n", rc);
+ LASSERT (rc == -ENOMEM);
+ rc = LNetMEUnlink (me_h);
+ LASSERT (rc == 0);
+ rqbd->rqbd_refcount = 0;
+
+ return (-ENOMEM);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
new file mode 100644
index 000000000000..1996431e35ff
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
@@ -0,0 +1,1790 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details. A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Copyright 2012 Xyratex Technology Limited
+ */
+/*
+ * lustre/ptlrpc/nrs.c
+ *
+ * Network Request Scheduler (NRS)
+ *
+ * Allows to reorder the handling of RPCs at servers.
+ *
+ * Author: Liang Zhen <liang@whamcloud.com>
+ * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
+ */
+/**
+ * \addtogoup nrs
+ * @{
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lprocfs_status.h>
+#include <linux/libcfs/libcfs.h>
+#include "ptlrpc_internal.h"
+
+/* XXX: This is just for liblustre. Remove the #if defined directive when the
+ * "cfs_" prefix is dropped from cfs_list_head. */
+extern struct list_head ptlrpc_all_services;
+
+/**
+ * NRS core object.
+ */
+struct nrs_core nrs_core;
+
+static int nrs_policy_init(struct ptlrpc_nrs_policy *policy)
+{
+ return policy->pol_desc->pd_ops->op_policy_init != NULL ?
+ policy->pol_desc->pd_ops->op_policy_init(policy) : 0;
+}
+
+static void nrs_policy_fini(struct ptlrpc_nrs_policy *policy)
+{
+ LASSERT(policy->pol_ref == 0);
+ LASSERT(policy->pol_req_queued == 0);
+
+ if (policy->pol_desc->pd_ops->op_policy_fini != NULL)
+ policy->pol_desc->pd_ops->op_policy_fini(policy);
+}
+
+static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy,
+ enum ptlrpc_nrs_ctl opc, void *arg)
+{
+ /**
+ * The policy may be stopped, but the lprocfs files and
+ * ptlrpc_nrs_policy instances remain present until unregistration time.
+ * Do not perform the ctl operation if the policy is stopped, as
+ * policy->pol_private will be NULL in such a case.
+ */
+ if (policy->pol_state == NRS_POL_STATE_STOPPED)
+ RETURN(-ENODEV);
+
+ RETURN(policy->pol_desc->pd_ops->op_policy_ctl != NULL ?
+ policy->pol_desc->pd_ops->op_policy_ctl(policy, opc, arg) :
+ -ENOSYS);
+}
+
+static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy)
+{
+ struct ptlrpc_nrs *nrs = policy->pol_nrs;
+ ENTRY;
+
+ if (policy->pol_desc->pd_ops->op_policy_stop != NULL) {
+ spin_unlock(&nrs->nrs_lock);
+
+ policy->pol_desc->pd_ops->op_policy_stop(policy);
+
+ spin_lock(&nrs->nrs_lock);
+ }
+
+ LASSERT(list_empty(&policy->pol_list_queued));
+ LASSERT(policy->pol_req_queued == 0 &&
+ policy->pol_req_started == 0);
+
+ policy->pol_private = NULL;
+
+ policy->pol_state = NRS_POL_STATE_STOPPED;
+
+ if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
+ module_put(policy->pol_desc->pd_owner);
+
+ EXIT;
+}
+
+static int nrs_policy_stop_locked(struct ptlrpc_nrs_policy *policy)
+{
+ struct ptlrpc_nrs *nrs = policy->pol_nrs;
+ ENTRY;
+
+ if (nrs->nrs_policy_fallback == policy && !nrs->nrs_stopping)
+ RETURN(-EPERM);
+
+ if (policy->pol_state == NRS_POL_STATE_STARTING)
+ RETURN(-EAGAIN);
+
+ /* In progress or already stopped */
+ if (policy->pol_state != NRS_POL_STATE_STARTED)
+ RETURN(0);
+
+ policy->pol_state = NRS_POL_STATE_STOPPING;
+
+ /* Immediately make it invisible */
+ if (nrs->nrs_policy_primary == policy) {
+ nrs->nrs_policy_primary = NULL;
+
+ } else {
+ LASSERT(nrs->nrs_policy_fallback == policy);
+ nrs->nrs_policy_fallback = NULL;
+ }
+
+ /* I have the only refcount */
+ if (policy->pol_ref == 1)
+ nrs_policy_stop0(policy);
+
+ RETURN(0);
+}
+
+/**
+ * Transitions the \a nrs NRS head's primary policy to
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING and if the policy has no
+ * pending usage references, to ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED.
+ *
+ * \param[in] nrs the NRS head to carry out this operation on
+ */
+static void nrs_policy_stop_primary(struct ptlrpc_nrs *nrs)
+{
+ struct ptlrpc_nrs_policy *tmp = nrs->nrs_policy_primary;
+ ENTRY;
+
+ if (tmp == NULL) {
+ /**
+ * XXX: This should really be RETURN_EXIT, but the latter does
+ * not currently print anything out, and possibly should be
+ * fixed to do so.
+ */
+ EXIT;
+ return;
+ }
+
+ nrs->nrs_policy_primary = NULL;
+
+ LASSERT(tmp->pol_state == NRS_POL_STATE_STARTED);
+ tmp->pol_state = NRS_POL_STATE_STOPPING;
+
+ if (tmp->pol_ref == 0)
+ nrs_policy_stop0(tmp);
+ EXIT;
+}
+
+/**
+ * Transitions a policy across the ptlrpc_nrs_pol_state range of values, in
+ * response to an lprocfs command to start a policy.
+ *
+ * If a primary policy different to the current one is specified, this function
+ * will transition the new policy to the
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTING and then to
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED, and will then transition
+ * the old primary policy (if there is one) to
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
+ * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED.
+ *
+ * If the fallback policy is specified, this is taken to indicate an instruction
+ * to stop the current primary policy, without substituting it with another
+ * primary policy, so the primary policy (if any) is transitioned to
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
+ * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. In
+ * this case, the fallback policy is only left active in the NRS head.
+ */
+static int nrs_policy_start_locked(struct ptlrpc_nrs_policy *policy)
+{
+ struct ptlrpc_nrs *nrs = policy->pol_nrs;
+ int rc = 0;
+ ENTRY;
+
+ /**
+ * Don't allow multiple starting which is too complex, and has no real
+ * benefit.
+ */
+ if (nrs->nrs_policy_starting)
+ RETURN(-EAGAIN);
+
+ LASSERT(policy->pol_state != NRS_POL_STATE_STARTING);
+
+ if (policy->pol_state == NRS_POL_STATE_STOPPING)
+ RETURN(-EAGAIN);
+
+ if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
+ /**
+ * This is for cases in which the user sets the policy to the
+ * fallback policy (currently fifo for all services); i.e. the
+ * user is resetting the policy to the default; so we stop the
+ * primary policy, if any.
+ */
+ if (policy == nrs->nrs_policy_fallback) {
+ nrs_policy_stop_primary(nrs);
+ RETURN(0);
+ }
+
+ /**
+ * If we reach here, we must be setting up the fallback policy
+ * at service startup time, and only a single policy with the
+ * nrs_policy_flags::PTLRPC_NRS_FL_FALLBACK flag set can
+ * register with NRS core.
+ */
+ LASSERT(nrs->nrs_policy_fallback == NULL);
+ } else {
+ /**
+ * Shouldn't start primary policy if w/o fallback policy.
+ */
+ if (nrs->nrs_policy_fallback == NULL)
+ RETURN(-EPERM);
+
+ if (policy->pol_state == NRS_POL_STATE_STARTED)
+ RETURN(0);
+ }
+
+ /**
+ * Increase the module usage count for policies registering from other
+ * modules.
+ */
+ if (atomic_inc_return(&policy->pol_desc->pd_refs) == 1 &&
+ !try_module_get(policy->pol_desc->pd_owner)) {
+ atomic_dec(&policy->pol_desc->pd_refs);
+ CERROR("NRS: cannot get module for policy %s; is it alive?\n",
+ policy->pol_desc->pd_name);
+ RETURN(-ENODEV);
+ }
+
+ /**
+ * Serialize policy starting across the NRS head
+ */
+ nrs->nrs_policy_starting = 1;
+
+ policy->pol_state = NRS_POL_STATE_STARTING;
+
+ if (policy->pol_desc->pd_ops->op_policy_start) {
+ spin_unlock(&nrs->nrs_lock);
+
+ rc = policy->pol_desc->pd_ops->op_policy_start(policy);
+
+ spin_lock(&nrs->nrs_lock);
+ if (rc != 0) {
+ if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
+ module_put(policy->pol_desc->pd_owner);
+
+ policy->pol_state = NRS_POL_STATE_STOPPED;
+ GOTO(out, rc);
+ }
+ }
+
+ policy->pol_state = NRS_POL_STATE_STARTED;
+
+ if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
+ /**
+ * This path is only used at PTLRPC service setup time.
+ */
+ nrs->nrs_policy_fallback = policy;
+ } else {
+ /*
+ * Try to stop the current primary policy if there is one.
+ */
+ nrs_policy_stop_primary(nrs);
+
+ /**
+ * And set the newly-started policy as the primary one.
+ */
+ nrs->nrs_policy_primary = policy;
+ }
+
+out:
+ nrs->nrs_policy_starting = 0;
+
+ RETURN(rc);
+}
+
+/**
+ * Increases the policy's usage reference count.
+ */
+static inline void nrs_policy_get_locked(struct ptlrpc_nrs_policy *policy)
+{
+ policy->pol_ref++;
+}
+
+/**
+ * Decreases the policy's usage reference count, and stops the policy in case it
+ * was already stopping and have no more outstanding usage references (which
+ * indicates it has no more queued or started requests, and can be safely
+ * stopped).
+ */
+static void nrs_policy_put_locked(struct ptlrpc_nrs_policy *policy)
+{
+ LASSERT(policy->pol_ref > 0);
+
+ policy->pol_ref--;
+ if (unlikely(policy->pol_ref == 0 &&
+ policy->pol_state == NRS_POL_STATE_STOPPING))
+ nrs_policy_stop0(policy);
+}
+
+static void nrs_policy_put(struct ptlrpc_nrs_policy *policy)
+{
+ spin_lock(&policy->pol_nrs->nrs_lock);
+ nrs_policy_put_locked(policy);
+ spin_unlock(&policy->pol_nrs->nrs_lock);
+}
+
+/**
+ * Find and return a policy by name.
+ */
+static struct ptlrpc_nrs_policy * nrs_policy_find_locked(struct ptlrpc_nrs *nrs,
+ char *name)
+{
+ struct ptlrpc_nrs_policy *tmp;
+
+ list_for_each_entry(tmp, &nrs->nrs_policy_list, pol_list) {
+ if (strncmp(tmp->pol_desc->pd_name, name,
+ NRS_POL_NAME_MAX) == 0) {
+ nrs_policy_get_locked(tmp);
+ return tmp;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * Release references for the resource hierarchy moving upwards towards the
+ * policy instance resource.
+ */
+static void nrs_resource_put(struct ptlrpc_nrs_resource *res)
+{
+ struct ptlrpc_nrs_policy *policy = res->res_policy;
+
+ if (policy->pol_desc->pd_ops->op_res_put != NULL) {
+ struct ptlrpc_nrs_resource *parent;
+
+ for (; res != NULL; res = parent) {
+ parent = res->res_parent;
+ policy->pol_desc->pd_ops->op_res_put(policy, res);
+ }
+ }
+}
+
+/**
+ * Obtains references for each resource in the resource hierarchy for request
+ * \a nrq if it is to be handled by \a policy.
+ *
+ * \param[in] policy the policy
+ * \param[in] nrq the request
+ * \param[in] moving_req denotes whether this is a call to the function by
+ * ldlm_lock_reorder_req(), in order to move \a nrq to
+ * the high-priority NRS head; we should not sleep when
+ * set.
+ *
+ * \retval NULL resource hierarchy references not obtained
+ * \retval valid-pointer the bottom level of the resource hierarchy
+ *
+ * \see ptlrpc_nrs_pol_ops::op_res_get()
+ */
+static
+struct ptlrpc_nrs_resource * nrs_resource_get(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq,
+ bool moving_req)
+{
+ /**
+ * Set to NULL to traverse the resource hierarchy from the top.
+ */
+ struct ptlrpc_nrs_resource *res = NULL;
+ struct ptlrpc_nrs_resource *tmp = NULL;
+ int rc;
+
+ while (1) {
+ rc = policy->pol_desc->pd_ops->op_res_get(policy, nrq, res,
+ &tmp, moving_req);
+ if (rc < 0) {
+ if (res != NULL)
+ nrs_resource_put(res);
+ return NULL;
+ }
+
+ LASSERT(tmp != NULL);
+ tmp->res_parent = res;
+ tmp->res_policy = policy;
+ res = tmp;
+ tmp = NULL;
+ /**
+ * Return once we have obtained a reference to the bottom level
+ * of the resource hierarchy.
+ */
+ if (rc > 0)
+ return res;
+ }
+}
+
+/**
+ * Obtains resources for the resource hierarchies and policy references for
+ * the fallback and current primary policy (if any), that will later be used
+ * to handle request \a nrq.
+ *
+ * \param[in] nrs the NRS head instance that will be handling request \a nrq.
+ * \param[in] nrq the request that is being handled.
+ * \param[out] resp the array where references to the resource hierarchy are
+ * stored.
+ * \param[in] moving_req is set when obtaining resources while moving a
+ * request from a policy on the regular NRS head to a
+ * policy on the HP NRS head (via
+ * ldlm_lock_reorder_req()). It signifies that
+ * allocations to get resources should be atomic; for
+ * a full explanation, see comment in
+ * ptlrpc_nrs_pol_ops::op_res_get().
+ */
+static void nrs_resource_get_safe(struct ptlrpc_nrs *nrs,
+ struct ptlrpc_nrs_request *nrq,
+ struct ptlrpc_nrs_resource **resp,
+ bool moving_req)
+{
+ struct ptlrpc_nrs_policy *primary = NULL;
+ struct ptlrpc_nrs_policy *fallback = NULL;
+
+ memset(resp, 0, sizeof(resp[0]) * NRS_RES_MAX);
+
+ /**
+ * Obtain policy references.
+ */
+ spin_lock(&nrs->nrs_lock);
+
+ fallback = nrs->nrs_policy_fallback;
+ nrs_policy_get_locked(fallback);
+
+ primary = nrs->nrs_policy_primary;
+ if (primary != NULL)
+ nrs_policy_get_locked(primary);
+
+ spin_unlock(&nrs->nrs_lock);
+
+ /**
+ * Obtain resource hierarchy references.
+ */
+ resp[NRS_RES_FALLBACK] = nrs_resource_get(fallback, nrq, moving_req);
+ LASSERT(resp[NRS_RES_FALLBACK] != NULL);
+
+ if (primary != NULL) {
+ resp[NRS_RES_PRIMARY] = nrs_resource_get(primary, nrq,
+ moving_req);
+ /**
+ * A primary policy may exist which may not wish to serve a
+ * particular request for different reasons; release the
+ * reference on the policy as it will not be used for this
+ * request.
+ */
+ if (resp[NRS_RES_PRIMARY] == NULL)
+ nrs_policy_put(primary);
+ }
+}
+
+/**
+ * Releases references to resource hierarchies and policies, because they are no
+ * longer required; used when request handling has been completed, or the
+ * request is moving to the high priority NRS head.
+ *
+ * \param resp the resource hierarchy that is being released
+ *
+ * \see ptlrpcnrs_req_hp_move()
+ * \see ptlrpc_nrs_req_finalize()
+ */
+static void nrs_resource_put_safe(struct ptlrpc_nrs_resource **resp)
+{
+ struct ptlrpc_nrs_policy *pols[NRS_RES_MAX];
+ struct ptlrpc_nrs *nrs = NULL;
+ int i;
+
+ for (i = 0; i < NRS_RES_MAX; i++) {
+ if (resp[i] != NULL) {
+ pols[i] = resp[i]->res_policy;
+ nrs_resource_put(resp[i]);
+ resp[i] = NULL;
+ } else {
+ pols[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < NRS_RES_MAX; i++) {
+ if (pols[i] == NULL)
+ continue;
+
+ if (nrs == NULL) {
+ nrs = pols[i]->pol_nrs;
+ spin_lock(&nrs->nrs_lock);
+ }
+ nrs_policy_put_locked(pols[i]);
+ }
+
+ if (nrs != NULL)
+ spin_unlock(&nrs->nrs_lock);
+}
+
+/**
+ * Obtains an NRS request from \a policy for handling or examination; the
+ * request should be removed in the 'handling' case.
+ *
+ * Calling into this function implies we already know the policy has a request
+ * waiting to be handled.
+ *
+ * \param[in] policy the policy from which a request
+ * \param[in] peek when set, signifies that we just want to examine the
+ * request, and not handle it, so the request is not removed
+ * from the policy.
+ * \param[in] force when set, it will force a policy to return a request if it
+ * has one pending
+ *
+ * \retval the NRS request to be handled
+ */
+static inline
+struct ptlrpc_nrs_request * nrs_request_get(struct ptlrpc_nrs_policy *policy,
+ bool peek, bool force)
+{
+ struct ptlrpc_nrs_request *nrq;
+
+ LASSERT(policy->pol_req_queued > 0);
+
+ nrq = policy->pol_desc->pd_ops->op_req_get(policy, peek, force);
+
+ LASSERT(ergo(nrq != NULL, nrs_request_policy(nrq) == policy));
+
+ return nrq;
+}
+
+/**
+ * Enqueues request \a nrq for later handling, via one one the policies for
+ * which resources where earlier obtained via nrs_resource_get_safe(). The
+ * function attempts to enqueue the request first on the primary policy
+ * (if any), since this is the preferred choice.
+ *
+ * \param nrq the request being enqueued
+ *
+ * \see nrs_resource_get_safe()
+ */
+static inline void nrs_request_enqueue(struct ptlrpc_nrs_request *nrq)
+{
+ struct ptlrpc_nrs_policy *policy;
+ int rc;
+ int i;
+
+ /**
+ * Try in descending order, because the primary policy (if any) is
+ * the preferred choice.
+ */
+ for (i = NRS_RES_MAX - 1; i >= 0; i--) {
+ if (nrq->nr_res_ptrs[i] == NULL)
+ continue;
+
+ nrq->nr_res_idx = i;
+ policy = nrq->nr_res_ptrs[i]->res_policy;
+
+ rc = policy->pol_desc->pd_ops->op_req_enqueue(policy, nrq);
+ if (rc == 0) {
+ policy->pol_nrs->nrs_req_queued++;
+ policy->pol_req_queued++;
+ return;
+ }
+ }
+ /**
+ * Should never get here, as at least the primary policy's
+ * ptlrpc_nrs_pol_ops::op_req_enqueue() implementation should always
+ * succeed.
+ */
+ LBUG();
+}
+
+/**
+ * Called when a request has been handled
+ *
+ * \param[in] nrs the request that has been handled; can be used for
+ * job/resource control.
+ *
+ * \see ptlrpc_nrs_req_stop_nolock()
+ */
+static inline void nrs_request_stop(struct ptlrpc_nrs_request *nrq)
+{
+ struct ptlrpc_nrs_policy *policy = nrs_request_policy(nrq);
+
+ if (policy->pol_desc->pd_ops->op_req_stop)
+ policy->pol_desc->pd_ops->op_req_stop(policy, nrq);
+
+ LASSERT(policy->pol_nrs->nrs_req_started > 0);
+ LASSERT(policy->pol_req_started > 0);
+
+ policy->pol_nrs->nrs_req_started--;
+ policy->pol_req_started--;
+}
+
+/**
+ * Handler for operations that can be carried out on policies.
+ *
+ * Handles opcodes that are common to all policy types within NRS core, and
+ * passes any unknown opcodes to the policy-specific control function.
+ *
+ * \param[in] nrs the NRS head this policy belongs to.
+ * \param[in] name the human-readable policy name; should be the same as
+ * ptlrpc_nrs_pol_desc::pd_name.
+ * \param[in] opc the opcode of the operation being carried out.
+ * \param[in,out] arg can be used to pass information in and out between when
+ * carrying an operation; usually data that is private to
+ * the policy at some level, or generic policy status
+ * information.
+ *
+ * \retval -ve error condition
+ * \retval 0 operation was carried out successfully
+ */
+static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name,
+ enum ptlrpc_nrs_ctl opc, void *arg)
+{
+ struct ptlrpc_nrs_policy *policy;
+ int rc = 0;
+ ENTRY;
+
+ spin_lock(&nrs->nrs_lock);
+
+ policy = nrs_policy_find_locked(nrs, name);
+ if (policy == NULL)
+ GOTO(out, rc = -ENOENT);
+
+ switch (opc) {
+ /**
+ * Unknown opcode, pass it down to the policy-specific control
+ * function for handling.
+ */
+ default:
+ rc = nrs_policy_ctl_locked(policy, opc, arg);
+ break;
+
+ /**
+ * Start \e policy
+ */
+ case PTLRPC_NRS_CTL_START:
+ rc = nrs_policy_start_locked(policy);
+ break;
+ }
+out:
+ if (policy != NULL)
+ nrs_policy_put_locked(policy);
+
+ spin_unlock(&nrs->nrs_lock);
+
+ RETURN(rc);
+}
+
+/**
+ * Unregisters a policy by name.
+ *
+ * \param[in] nrs the NRS head this policy belongs to.
+ * \param[in] name the human-readable policy name; should be the same as
+ * ptlrpc_nrs_pol_desc::pd_name
+ *
+ * \retval -ve error
+ * \retval 0 success
+ */
+static int nrs_policy_unregister(struct ptlrpc_nrs *nrs, char *name)
+{
+ struct ptlrpc_nrs_policy *policy = NULL;
+ ENTRY;
+
+ spin_lock(&nrs->nrs_lock);
+
+ policy = nrs_policy_find_locked(nrs, name);
+ if (policy == NULL) {
+ spin_unlock(&nrs->nrs_lock);
+
+ CERROR("Can't find NRS policy %s\n", name);
+ RETURN(-ENOENT);
+ }
+
+ if (policy->pol_ref > 1) {
+ CERROR("Policy %s is busy with %d references\n", name,
+ (int)policy->pol_ref);
+ nrs_policy_put_locked(policy);
+
+ spin_unlock(&nrs->nrs_lock);
+ RETURN(-EBUSY);
+ }
+
+ LASSERT(policy->pol_req_queued == 0);
+ LASSERT(policy->pol_req_started == 0);
+
+ if (policy->pol_state != NRS_POL_STATE_STOPPED) {
+ nrs_policy_stop_locked(policy);
+ LASSERT(policy->pol_state == NRS_POL_STATE_STOPPED);
+ }
+
+ list_del(&policy->pol_list);
+ nrs->nrs_num_pols--;
+
+ nrs_policy_put_locked(policy);
+
+ spin_unlock(&nrs->nrs_lock);
+
+ nrs_policy_fini(policy);
+
+ LASSERT(policy->pol_private == NULL);
+ OBD_FREE_PTR(policy);
+
+ RETURN(0);
+}
+
+/**
+ * Register a policy from \policy descriptor \a desc with NRS head \a nrs.
+ *
+ * \param[in] nrs the NRS head on which the policy will be registered.
+ * \param[in] desc the policy descriptor from which the information will be
+ * obtained to register the policy.
+ *
+ * \retval -ve error
+ * \retval 0 success
+ */
+static int nrs_policy_register(struct ptlrpc_nrs *nrs,
+ struct ptlrpc_nrs_pol_desc *desc)
+{
+ struct ptlrpc_nrs_policy *policy;
+ struct ptlrpc_nrs_policy *tmp;
+ struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
+ int rc;
+ ENTRY;
+
+ LASSERT(svcpt != NULL);
+ LASSERT(desc->pd_ops != NULL);
+ LASSERT(desc->pd_ops->op_res_get != NULL);
+ LASSERT(desc->pd_ops->op_req_get != NULL);
+ LASSERT(desc->pd_ops->op_req_enqueue != NULL);
+ LASSERT(desc->pd_ops->op_req_dequeue != NULL);
+ LASSERT(desc->pd_compat != NULL);
+
+ OBD_CPT_ALLOC_GFP(policy, svcpt->scp_service->srv_cptable,
+ svcpt->scp_cpt, sizeof(*policy), __GFP_IO);
+ if (policy == NULL)
+ RETURN(-ENOMEM);
+
+ policy->pol_nrs = nrs;
+ policy->pol_desc = desc;
+ policy->pol_state = NRS_POL_STATE_STOPPED;
+ policy->pol_flags = desc->pd_flags;
+
+ INIT_LIST_HEAD(&policy->pol_list);
+ INIT_LIST_HEAD(&policy->pol_list_queued);
+
+ rc = nrs_policy_init(policy);
+ if (rc != 0) {
+ OBD_FREE_PTR(policy);
+ RETURN(rc);
+ }
+
+ spin_lock(&nrs->nrs_lock);
+
+ tmp = nrs_policy_find_locked(nrs, policy->pol_desc->pd_name);
+ if (tmp != NULL) {
+ CERROR("NRS policy %s has been registered, can't register it "
+ "for %s\n", policy->pol_desc->pd_name,
+ svcpt->scp_service->srv_name);
+ nrs_policy_put_locked(tmp);
+
+ spin_unlock(&nrs->nrs_lock);
+ nrs_policy_fini(policy);
+ OBD_FREE_PTR(policy);
+
+ RETURN(-EEXIST);
+ }
+
+ list_add_tail(&policy->pol_list, &nrs->nrs_policy_list);
+ nrs->nrs_num_pols++;
+
+ if (policy->pol_flags & PTLRPC_NRS_FL_REG_START)
+ rc = nrs_policy_start_locked(policy);
+
+ spin_unlock(&nrs->nrs_lock);
+
+ if (rc != 0)
+ (void) nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
+
+ RETURN(rc);
+}
+
+/**
+ * Enqueue request \a req using one of the policies its resources are referring
+ * to.
+ *
+ * \param[in] req the request to enqueue.
+ */
+static void ptlrpc_nrs_req_add_nolock(struct ptlrpc_request *req)
+{
+ struct ptlrpc_nrs_policy *policy;
+
+ LASSERT(req->rq_nrq.nr_initialized);
+ LASSERT(!req->rq_nrq.nr_enqueued);
+
+ nrs_request_enqueue(&req->rq_nrq);
+ req->rq_nrq.nr_enqueued = 1;
+
+ policy = nrs_request_policy(&req->rq_nrq);
+ /**
+ * Add the policy to the NRS head's list of policies with enqueued
+ * requests, if it has not been added there.
+ */
+ if (unlikely(list_empty(&policy->pol_list_queued)))
+ list_add_tail(&policy->pol_list_queued,
+ &policy->pol_nrs->nrs_policy_queued);
+}
+
+/**
+ * Enqueue a request on the high priority NRS head.
+ *
+ * \param req the request to enqueue.
+ */
+static void ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request *req)
+{
+ int opc = lustre_msg_get_opc(req->rq_reqmsg);
+ ENTRY;
+
+ spin_lock(&req->rq_lock);
+ req->rq_hp = 1;
+ ptlrpc_nrs_req_add_nolock(req);
+ if (opc != OBD_PING)
+ DEBUG_REQ(D_NET, req, "high priority req");
+ spin_unlock(&req->rq_lock);
+ EXIT;
+}
+
+/**
+ * Returns a boolean predicate indicating whether the policy described by
+ * \a desc is adequate for use with service \a svc.
+ *
+ * \param[in] svc the service
+ * \param[in] desc the policy descriptor
+ *
+ * \retval false the policy is not compatible with the service
+ * \retval true the policy is compatible with the service
+ */
+static inline bool nrs_policy_compatible(const struct ptlrpc_service *svc,
+ const struct ptlrpc_nrs_pol_desc *desc)
+{
+ return desc->pd_compat(svc, desc);
+}
+
+/**
+ * Registers all compatible policies in nrs_core.nrs_policies, for NRS head
+ * \a nrs.
+ *
+ * \param[in] nrs the NRS head
+ *
+ * \retval -ve error
+ * \retval 0 success
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ *
+ * \see ptlrpc_service_nrs_setup()
+ */
+static int nrs_register_policies_locked(struct ptlrpc_nrs *nrs)
+{
+ struct ptlrpc_nrs_pol_desc *desc;
+ /* for convenience */
+ struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ int rc = -EINVAL;
+ ENTRY;
+
+ LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+
+ list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
+ if (nrs_policy_compatible(svc, desc)) {
+ rc = nrs_policy_register(nrs, desc);
+ if (rc != 0) {
+ CERROR("Failed to register NRS policy %s for "
+ "partition %d of service %s: %d\n",
+ desc->pd_name, svcpt->scp_cpt,
+ svc->srv_name, rc);
+ /**
+ * Fail registration if any of the policies'
+ * registration fails.
+ */
+ break;
+ }
+ }
+ }
+
+ RETURN(rc);
+}
+
+/**
+ * Initializes NRS head \a nrs of service partition \a svcpt, and registers all
+ * compatible policies in NRS core, with the NRS head.
+ *
+ * \param[in] nrs the NRS head
+ * \param[in] svcpt the PTLRPC service partition to setup
+ *
+ * \retval -ve error
+ * \retval 0 success
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ */
+static int nrs_svcpt_setup_locked0(struct ptlrpc_nrs *nrs,
+ struct ptlrpc_service_part *svcpt)
+{
+ int rc;
+ enum ptlrpc_nrs_queue_type queue;
+
+ LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+
+ if (nrs == &svcpt->scp_nrs_reg)
+ queue = PTLRPC_NRS_QUEUE_REG;
+ else if (nrs == svcpt->scp_nrs_hp)
+ queue = PTLRPC_NRS_QUEUE_HP;
+ else
+ LBUG();
+
+ nrs->nrs_svcpt = svcpt;
+ nrs->nrs_queue_type = queue;
+ spin_lock_init(&nrs->nrs_lock);
+ INIT_LIST_HEAD(&nrs->nrs_policy_list);
+ INIT_LIST_HEAD(&nrs->nrs_policy_queued);
+
+ rc = nrs_register_policies_locked(nrs);
+
+ RETURN(rc);
+}
+
+/**
+ * Allocates a regular and optionally a high-priority NRS head (if the service
+ * handles high-priority RPCs), and then registers all available compatible
+ * policies on those NRS heads.
+ *
+ * \param[in,out] svcpt the PTLRPC service partition to setup
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ */
+static int nrs_svcpt_setup_locked(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_nrs *nrs;
+ int rc;
+ ENTRY;
+
+ LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+
+ /**
+ * Initialize the regular NRS head.
+ */
+ nrs = nrs_svcpt2nrs(svcpt, false);
+ rc = nrs_svcpt_setup_locked0(nrs, svcpt);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ /**
+ * Optionally allocate a high-priority NRS head.
+ */
+ if (svcpt->scp_service->srv_ops.so_hpreq_handler == NULL)
+ GOTO(out, rc);
+
+ OBD_CPT_ALLOC_PTR(svcpt->scp_nrs_hp,
+ svcpt->scp_service->srv_cptable,
+ svcpt->scp_cpt);
+ if (svcpt->scp_nrs_hp == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ nrs = nrs_svcpt2nrs(svcpt, true);
+ rc = nrs_svcpt_setup_locked0(nrs, svcpt);
+
+out:
+ RETURN(rc);
+}
+
+/**
+ * Unregisters all policies on all available NRS heads in a service partition;
+ * called at PTLRPC service unregistration time.
+ *
+ * \param[in] svcpt the PTLRPC service partition
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ */
+static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_nrs *nrs;
+ struct ptlrpc_nrs_policy *policy;
+ struct ptlrpc_nrs_policy *tmp;
+ int rc;
+ bool hp = false;
+ ENTRY;
+
+ LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+
+again:
+ nrs = nrs_svcpt2nrs(svcpt, hp);
+ nrs->nrs_stopping = 1;
+
+ list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list,
+ pol_list) {
+ rc = nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
+ LASSERT(rc == 0);
+ }
+
+ /**
+ * If the service partition has an HP NRS head, clean that up as well.
+ */
+ if (!hp && nrs_svcpt_has_hp(svcpt)) {
+ hp = true;
+ goto again;
+ }
+
+ if (hp)
+ OBD_FREE_PTR(nrs);
+
+ EXIT;
+}
+
+/**
+ * Returns the descriptor for a policy as identified by by \a name.
+ *
+ * \param[in] name the policy name
+ *
+ * \retval the policy descriptor
+ * \retval NULL
+ */
+static struct ptlrpc_nrs_pol_desc *nrs_policy_find_desc_locked(const char *name)
+{
+ struct ptlrpc_nrs_pol_desc *tmp;
+ ENTRY;
+
+ list_for_each_entry(tmp, &nrs_core.nrs_policies, pd_list) {
+ if (strncmp(tmp->pd_name, name, NRS_POL_NAME_MAX) == 0)
+ RETURN(tmp);
+ }
+ RETURN(NULL);
+}
+
+/**
+ * Removes the policy from all supported NRS heads of all partitions of all
+ * PTLRPC services.
+ *
+ * \param[in] desc the policy descriptor to unregister
+ *
+ * \retval -ve error
+ * \retval 0 successfully unregistered policy on all supported NRS heads
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ * \pre mutex_is_locked(&ptlrpc_all_services_mutex)
+ */
+static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc)
+{
+ struct ptlrpc_nrs *nrs;
+ struct ptlrpc_service *svc;
+ struct ptlrpc_service_part *svcpt;
+ int i;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+ LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex));
+
+ list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
+
+ if (!nrs_policy_compatible(svc, desc) ||
+ unlikely(svc->srv_is_stopping))
+ continue;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ bool hp = false;
+
+again:
+ nrs = nrs_svcpt2nrs(svcpt, hp);
+ rc = nrs_policy_unregister(nrs, desc->pd_name);
+ /**
+ * Ignore -ENOENT as the policy may not have registered
+ * successfully on all service partitions.
+ */
+ if (rc == -ENOENT) {
+ rc = 0;
+ } else if (rc != 0) {
+ CERROR("Failed to unregister NRS policy %s for "
+ "partition %d of service %s: %d\n",
+ desc->pd_name, svcpt->scp_cpt,
+ svcpt->scp_service->srv_name, rc);
+ RETURN(rc);
+ }
+
+ if (!hp && nrs_svc_has_hp(svc)) {
+ hp = true;
+ goto again;
+ }
+ }
+
+ if (desc->pd_ops->op_lprocfs_fini != NULL)
+ desc->pd_ops->op_lprocfs_fini(svc);
+ }
+
+ RETURN(rc);
+}
+
+/**
+ * Registers a new policy with NRS core.
+ *
+ * The function will only succeed if policy registration with all compatible
+ * service partitions (if any) is successful.
+ *
+ * N.B. This function should be called either at ptlrpc module initialization
+ * time when registering a policy that ships with NRS core, or in a
+ * module's init() function for policies registering from other modules.
+ *
+ * \param[in] conf configuration information for the new policy to register
+ *
+ * \retval -ve error
+ * \retval 0 success
+ */
+int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf)
+{
+ struct ptlrpc_service *svc;
+ struct ptlrpc_nrs_pol_desc *desc;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(conf != NULL);
+ LASSERT(conf->nc_ops != NULL);
+ LASSERT(conf->nc_compat != NULL);
+ LASSERT(ergo(conf->nc_compat == nrs_policy_compat_one,
+ conf->nc_compat_svc_name != NULL));
+ LASSERT(ergo((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0,
+ conf->nc_owner != NULL));
+
+ conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
+
+ /**
+ * External policies are not allowed to start immediately upon
+ * registration, as there is a relatively higher chance that their
+ * registration might fail. In such a case, some policy instances may
+ * already have requests queued wen unregistration needs to happen as
+ * part o cleanup; since there is currently no way to drain requests
+ * from a policy unless the service is unregistering, we just disallow
+ * this.
+ */
+ if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) &&
+ (conf->nc_flags & (PTLRPC_NRS_FL_FALLBACK |
+ PTLRPC_NRS_FL_REG_START))) {
+ CERROR("NRS: failing to register policy %s. Please check "
+ "policy flags; external policies cannot act as fallback "
+ "policies, or be started immediately upon registration "
+ "without interaction with lprocfs\n", conf->nc_name);
+ RETURN(-EINVAL);
+ }
+
+ mutex_lock(&nrs_core.nrs_mutex);
+
+ if (nrs_policy_find_desc_locked(conf->nc_name) != NULL) {
+ CERROR("NRS: failing to register policy %s which has already "
+ "been registered with NRS core!\n",
+ conf->nc_name);
+ GOTO(fail, rc = -EEXIST);
+ }
+
+ OBD_ALLOC_PTR(desc);
+ if (desc == NULL)
+ GOTO(fail, rc = -ENOMEM);
+
+ strncpy(desc->pd_name, conf->nc_name, NRS_POL_NAME_MAX);
+ desc->pd_ops = conf->nc_ops;
+ desc->pd_compat = conf->nc_compat;
+ desc->pd_compat_svc_name = conf->nc_compat_svc_name;
+ if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0)
+ desc->pd_owner = conf->nc_owner;
+ desc->pd_flags = conf->nc_flags;
+ atomic_set(&desc->pd_refs, 0);
+
+ /**
+ * For policies that are held in the same module as NRS (currently
+ * ptlrpc), do not register the policy with all compatible services,
+ * as the services will not have started at this point, since we are
+ * calling from ptlrpc module initialization code. In such cases each
+ * service will register all compatible policies later, via
+ * ptlrpc_service_nrs_setup().
+ */
+ if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) == 0)
+ goto internal;
+
+ /**
+ * Register the new policy on all compatible services
+ */
+ mutex_lock(&ptlrpc_all_services_mutex);
+
+ list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
+ struct ptlrpc_service_part *svcpt;
+ int i;
+ int rc2;
+
+ if (!nrs_policy_compatible(svc, desc) ||
+ unlikely(svc->srv_is_stopping))
+ continue;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ struct ptlrpc_nrs *nrs;
+ bool hp = false;
+again:
+ nrs = nrs_svcpt2nrs(svcpt, hp);
+ rc = nrs_policy_register(nrs, desc);
+ if (rc != 0) {
+ CERROR("Failed to register NRS policy %s for "
+ "partition %d of service %s: %d\n",
+ desc->pd_name, svcpt->scp_cpt,
+ svcpt->scp_service->srv_name, rc);
+
+ rc2 = nrs_policy_unregister_locked(desc);
+ /**
+ * Should not fail at this point
+ */
+ LASSERT(rc2 == 0);
+ mutex_unlock(&ptlrpc_all_services_mutex);
+ OBD_FREE_PTR(desc);
+ GOTO(fail, rc);
+ }
+
+ if (!hp && nrs_svc_has_hp(svc)) {
+ hp = true;
+ goto again;
+ }
+ }
+
+ /**
+ * No need to take a reference to other modules here, as we
+ * will be calling from the module's init() function.
+ */
+ if (desc->pd_ops->op_lprocfs_init != NULL) {
+ rc = desc->pd_ops->op_lprocfs_init(svc);
+ if (rc != 0) {
+ rc2 = nrs_policy_unregister_locked(desc);
+ /**
+ * Should not fail at this point
+ */
+ LASSERT(rc2 == 0);
+ mutex_unlock(&ptlrpc_all_services_mutex);
+ OBD_FREE_PTR(desc);
+ GOTO(fail, rc);
+ }
+ }
+ }
+
+ mutex_unlock(&ptlrpc_all_services_mutex);
+internal:
+ list_add_tail(&desc->pd_list, &nrs_core.nrs_policies);
+fail:
+ mutex_unlock(&nrs_core.nrs_mutex);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ptlrpc_nrs_policy_register);
+
+/**
+ * Unregisters a previously registered policy with NRS core. All instances of
+ * the policy on all NRS heads of all supported services are removed.
+ *
+ * N.B. This function should only be called from a module's exit() function.
+ * Although it can be used for policies that ship alongside NRS core, the
+ * function is primarily intended for policies that register externally,
+ * from other modules.
+ *
+ * \param[in] conf configuration information for the policy to unregister
+ *
+ * \retval -ve error
+ * \retval 0 success
+ */
+int ptlrpc_nrs_policy_unregister(struct ptlrpc_nrs_pol_conf *conf)
+{
+ struct ptlrpc_nrs_pol_desc *desc;
+ int rc;
+ ENTRY;
+
+ LASSERT(conf != NULL);
+
+ if (conf->nc_flags & PTLRPC_NRS_FL_FALLBACK) {
+ CERROR("Unable to unregister a fallback policy, unless the "
+ "PTLRPC service is stopping.\n");
+ RETURN(-EPERM);
+ }
+
+ conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
+
+ mutex_lock(&nrs_core.nrs_mutex);
+
+ desc = nrs_policy_find_desc_locked(conf->nc_name);
+ if (desc == NULL) {
+ CERROR("Failing to unregister NRS policy %s which has "
+ "not been registered with NRS core!\n",
+ conf->nc_name);
+ GOTO(not_exist, rc = -ENOENT);
+ }
+
+ mutex_lock(&ptlrpc_all_services_mutex);
+
+ rc = nrs_policy_unregister_locked(desc);
+ if (rc < 0) {
+ if (rc == -EBUSY)
+ CERROR("Please first stop policy %s on all service "
+ "partitions and then retry to unregister the "
+ "policy.\n", conf->nc_name);
+ GOTO(fail, rc);
+ }
+
+ CDEBUG(D_INFO, "Unregistering policy %s from NRS core.\n",
+ conf->nc_name);
+
+ list_del(&desc->pd_list);
+ OBD_FREE_PTR(desc);
+
+fail:
+ mutex_unlock(&ptlrpc_all_services_mutex);
+
+not_exist:
+ mutex_unlock(&nrs_core.nrs_mutex);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ptlrpc_nrs_policy_unregister);
+
+/**
+ * Setup NRS heads on all service partitions of service \a svc, and register
+ * all compatible policies on those NRS heads.
+ *
+ * To be called from withing ptl
+ * \param[in] svc the service to setup
+ *
+ * \retval -ve error, the calling logic should eventually call
+ * ptlrpc_service_nrs_cleanup() to undo any work performed
+ * by this function.
+ *
+ * \see ptlrpc_register_service()
+ * \see ptlrpc_service_nrs_cleanup()
+ */
+int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ const struct ptlrpc_nrs_pol_desc *desc;
+ int i;
+ int rc = 0;
+
+ mutex_lock(&nrs_core.nrs_mutex);
+
+ /**
+ * Initialize NRS heads on all service CPTs.
+ */
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ rc = nrs_svcpt_setup_locked(svcpt);
+ if (rc != 0)
+ GOTO(failed, rc);
+ }
+
+ /**
+ * Set up lprocfs interfaces for all supported policies for the
+ * service.
+ */
+ list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
+ if (!nrs_policy_compatible(svc, desc))
+ continue;
+
+ if (desc->pd_ops->op_lprocfs_init != NULL) {
+ rc = desc->pd_ops->op_lprocfs_init(svc);
+ if (rc != 0)
+ GOTO(failed, rc);
+ }
+ }
+
+failed:
+
+ mutex_unlock(&nrs_core.nrs_mutex);
+
+ RETURN(rc);
+}
+
+/**
+ * Unregisters all policies on all service partitions of service \a svc.
+ *
+ * \param[in] svc the PTLRPC service to unregister
+ */
+void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ const struct ptlrpc_nrs_pol_desc *desc;
+ int i;
+
+ mutex_lock(&nrs_core.nrs_mutex);
+
+ /**
+ * Clean up NRS heads on all service partitions
+ */
+ ptlrpc_service_for_each_part(svcpt, i, svc)
+ nrs_svcpt_cleanup_locked(svcpt);
+
+ /**
+ * Clean up lprocfs interfaces for all supported policies for the
+ * service.
+ */
+ list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
+ if (!nrs_policy_compatible(svc, desc))
+ continue;
+
+ if (desc->pd_ops->op_lprocfs_fini != NULL)
+ desc->pd_ops->op_lprocfs_fini(svc);
+ }
+
+ mutex_unlock(&nrs_core.nrs_mutex);
+}
+
+/**
+ * Obtains NRS head resources for request \a req.
+ *
+ * These could be either on the regular or HP NRS head of \a svcpt; resources
+ * taken on the regular head can later be swapped for HP head resources by
+ * ldlm_lock_reorder_req().
+ *
+ * \param[in] svcpt the service partition
+ * \param[in] req the request
+ * \param[in] hp which NRS head of \a svcpt to use
+ */
+void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req, bool hp)
+{
+ struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
+
+ memset(&req->rq_nrq, 0, sizeof(req->rq_nrq));
+ nrs_resource_get_safe(nrs, &req->rq_nrq, req->rq_nrq.nr_res_ptrs,
+ false);
+
+ /**
+ * It is fine to access \e nr_initialized without locking as there is
+ * no contention at this early stage.
+ */
+ req->rq_nrq.nr_initialized = 1;
+}
+
+/**
+ * Releases resources for a request; is called after the request has been
+ * handled.
+ *
+ * \param[in] req the request
+ *
+ * \see ptlrpc_server_finish_request()
+ */
+void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req)
+{
+ if (req->rq_nrq.nr_initialized) {
+ nrs_resource_put_safe(req->rq_nrq.nr_res_ptrs);
+ /* no protection on bit nr_initialized because no
+ * contention at this late stage */
+ req->rq_nrq.nr_finalized = 1;
+ }
+}
+
+void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req)
+{
+ if (req->rq_nrq.nr_started)
+ nrs_request_stop(&req->rq_nrq);
+}
+
+/**
+ * Enqueues request \a req on either the regular or high-priority NRS head
+ * of service partition \a svcpt.
+ *
+ * \param[in] svcpt the service partition
+ * \param[in] req the request to be enqueued
+ * \param[in] hp whether to enqueue the request on the regular or
+ * high-priority NRS head.
+ */
+void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req, bool hp)
+{
+ spin_lock(&svcpt->scp_req_lock);
+
+ if (hp)
+ ptlrpc_nrs_hpreq_add_nolock(req);
+ else
+ ptlrpc_nrs_req_add_nolock(req);
+
+ spin_unlock(&svcpt->scp_req_lock);
+}
+
+static void nrs_request_removed(struct ptlrpc_nrs_policy *policy)
+{
+ LASSERT(policy->pol_nrs->nrs_req_queued > 0);
+ LASSERT(policy->pol_req_queued > 0);
+
+ policy->pol_nrs->nrs_req_queued--;
+ policy->pol_req_queued--;
+
+ /**
+ * If the policy has no more requests queued, remove it from
+ * ptlrpc_nrs::nrs_policy_queued.
+ */
+ if (unlikely(policy->pol_req_queued == 0)) {
+ list_del_init(&policy->pol_list_queued);
+
+ /**
+ * If there are other policies with queued requests, move the
+ * current policy to the end so that we can round robin over
+ * all policies and drain the requests.
+ */
+ } else if (policy->pol_req_queued != policy->pol_nrs->nrs_req_queued) {
+ LASSERT(policy->pol_req_queued <
+ policy->pol_nrs->nrs_req_queued);
+
+ list_move_tail(&policy->pol_list_queued,
+ &policy->pol_nrs->nrs_policy_queued);
+ }
+}
+
+/**
+ * Obtains a request for handling from an NRS head of service partition
+ * \a svcpt.
+ *
+ * \param[in] svcpt the service partition
+ * \param[in] hp whether to obtain a request from the regular or
+ * high-priority NRS head.
+ * \param[in] peek when set, signifies that we just want to examine the
+ * request, and not handle it, so the request is not removed
+ * from the policy.
+ * \param[in] force when set, it will force a policy to return a request if it
+ * has one pending
+ *
+ * \retval the request to be handled
+ * \retval NULL the head has no requests to serve
+ */
+struct ptlrpc_request *
+ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
+ bool peek, bool force)
+{
+ struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
+ struct ptlrpc_nrs_policy *policy;
+ struct ptlrpc_nrs_request *nrq;
+
+ /**
+ * Always try to drain requests from all NRS polices even if they are
+ * inactive, because the user can change policy status at runtime.
+ */
+ list_for_each_entry(policy, &nrs->nrs_policy_queued,
+ pol_list_queued) {
+ nrq = nrs_request_get(policy, peek, force);
+ if (nrq != NULL) {
+ if (likely(!peek)) {
+ nrq->nr_started = 1;
+
+ policy->pol_req_started++;
+ policy->pol_nrs->nrs_req_started++;
+
+ nrs_request_removed(policy);
+ }
+
+ return container_of(nrq, struct ptlrpc_request, rq_nrq);
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * Dequeues request \a req from the policy it has been enqueued on.
+ *
+ * \param[in] req the request
+ */
+void ptlrpc_nrs_req_del_nolock(struct ptlrpc_request *req)
+{
+ struct ptlrpc_nrs_policy *policy = nrs_request_policy(&req->rq_nrq);
+
+ policy->pol_desc->pd_ops->op_req_dequeue(policy, &req->rq_nrq);
+
+ req->rq_nrq.nr_enqueued = 0;
+
+ nrs_request_removed(policy);
+}
+
+/**
+ * Returns whether there are any requests currently enqueued on any of the
+ * policies of service partition's \a svcpt NRS head specified by \a hp. Should
+ * be called while holding ptlrpc_service_part::scp_req_lock to get a reliable
+ * result.
+ *
+ * \param[in] svcpt the service partition to enquire.
+ * \param[in] hp whether the regular or high-priority NRS head is to be
+ * enquired.
+ *
+ * \retval false the indicated NRS head has no enqueued requests.
+ * \retval true the indicated NRS head has some enqueued requests.
+ */
+bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp)
+{
+ struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
+
+ return nrs->nrs_req_queued > 0;
+};
+
+/**
+ * Moves request \a req from the regular to the high-priority NRS head.
+ *
+ * \param[in] req the request to move
+ */
+void ptlrpc_nrs_req_hp_move(struct ptlrpc_request *req)
+{
+ struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+ struct ptlrpc_nrs_request *nrq = &req->rq_nrq;
+ struct ptlrpc_nrs_resource *res1[NRS_RES_MAX];
+ struct ptlrpc_nrs_resource *res2[NRS_RES_MAX];
+ ENTRY;
+
+ /**
+ * Obtain the high-priority NRS head resources.
+ */
+ nrs_resource_get_safe(nrs_svcpt2nrs(svcpt, true), nrq, res1, true);
+
+ spin_lock(&svcpt->scp_req_lock);
+
+ if (!ptlrpc_nrs_req_can_move(req))
+ goto out;
+
+ ptlrpc_nrs_req_del_nolock(req);
+
+ memcpy(res2, nrq->nr_res_ptrs, NRS_RES_MAX * sizeof(res2[0]));
+ memcpy(nrq->nr_res_ptrs, res1, NRS_RES_MAX * sizeof(res1[0]));
+
+ ptlrpc_nrs_hpreq_add_nolock(req);
+
+ memcpy(res1, res2, NRS_RES_MAX * sizeof(res1[0]));
+out:
+ spin_unlock(&svcpt->scp_req_lock);
+
+ /**
+ * Release either the regular NRS head resources if we moved the
+ * request, or the high-priority NRS head resources if we took a
+ * reference earlier in this function and ptlrpc_nrs_req_can_move()
+ * returned false.
+ */
+ nrs_resource_put_safe(res1);
+ EXIT;
+}
+
+/**
+ * Carries out a control operation \a opc on the policy identified by the
+ * human-readable \a name, on either all partitions, or only on the first
+ * partition of service \a svc.
+ *
+ * \param[in] svc the service the policy belongs to.
+ * \param[in] queue whether to carry out the command on the policy which
+ * belongs to the regular, high-priority, or both NRS
+ * heads of service partitions of \a svc.
+ * \param[in] name the policy to act upon, by human-readable name
+ * \param[in] opc the opcode of the operation to carry out
+ * \param[in] single when set, the operation will only be carried out on the
+ * NRS heads of the first service partition of \a svc.
+ * This is useful for some policies which e.g. share
+ * identical values on the same parameters of different
+ * service partitions; when reading these parameters via
+ * lprocfs, these policies may just want to obtain and
+ * print out the values from the first service partition.
+ * Storing these values centrally elsewhere then could be
+ * another solution for this.
+ * \param[in,out] arg can be used as a generic in/out buffer between control
+ * operations and the user environment.
+ *
+ *\retval -ve error condition
+ *\retval 0 operation was carried out successfully
+ */
+int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
+ enum ptlrpc_nrs_queue_type queue, char *name,
+ enum ptlrpc_nrs_ctl opc, bool single, void *arg)
+{
+ struct ptlrpc_service_part *svcpt;
+ int i;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(opc != PTLRPC_NRS_CTL_INVALID);
+
+ if ((queue & PTLRPC_NRS_QUEUE_BOTH) == 0)
+ return -EINVAL;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if ((queue & PTLRPC_NRS_QUEUE_REG) != 0) {
+ rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, false), name,
+ opc, arg);
+ if (rc != 0 || (queue == PTLRPC_NRS_QUEUE_REG &&
+ single))
+ GOTO(out, rc);
+ }
+
+ if ((queue & PTLRPC_NRS_QUEUE_HP) != 0) {
+ /**
+ * XXX: We could optionally check for
+ * nrs_svc_has_hp(svc) here, and return an error if it
+ * is false. Right now we rely on the policies' lprocfs
+ * handlers that call the present function to make this
+ * check; if they fail to do so, they might hit the
+ * assertion inside nrs_svcpt2nrs() below.
+ */
+ rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, true), name,
+ opc, arg);
+ if (rc != 0 || single)
+ GOTO(out, rc);
+ }
+ }
+out:
+ RETURN(rc);
+}
+
+
+/* ptlrpc/nrs_fifo.c */
+extern struct ptlrpc_nrs_pol_conf nrs_conf_fifo;
+
+/**
+ * Adds all policies that ship with the ptlrpc module, to NRS core's list of
+ * policies \e nrs_core.nrs_policies.
+ *
+ * \retval 0 all policies have been registered successfully
+ * \retval -ve error
+ */
+int ptlrpc_nrs_init(void)
+{
+ int rc;
+ ENTRY;
+
+ mutex_init(&nrs_core.nrs_mutex);
+ INIT_LIST_HEAD(&nrs_core.nrs_policies);
+
+ rc = ptlrpc_nrs_policy_register(&nrs_conf_fifo);
+ if (rc != 0)
+ GOTO(fail, rc);
+
+
+ RETURN(rc);
+fail:
+ /**
+ * Since no PTLRPC services have been started at this point, all we need
+ * to do for cleanup is to free the descriptors.
+ */
+ ptlrpc_nrs_fini();
+
+ RETURN(rc);
+}
+
+/**
+ * Removes all policy desciptors from nrs_core::nrs_policies, and frees the
+ * policy descriptors.
+ *
+ * Since all PTLRPC services are stopped at this point, there are no more
+ * instances of any policies, because each service will have stopped its policy
+ * instances in ptlrpc_service_nrs_cleanup(), so we just need to free the
+ * descriptors here.
+ */
+void ptlrpc_nrs_fini(void)
+{
+ struct ptlrpc_nrs_pol_desc *desc;
+ struct ptlrpc_nrs_pol_desc *tmp;
+
+ list_for_each_entry_safe(desc, tmp, &nrs_core.nrs_policies,
+ pd_list) {
+ list_del_init(&desc->pd_list);
+ OBD_FREE_PTR(desc);
+ }
+}
+
+/** @} nrs */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs_crr.c b/drivers/staging/lustre/lustre/ptlrpc/nrs_crr.c
new file mode 100644
index 000000000000..ddfb5102d822
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/nrs_crr.c
@@ -0,0 +1,40 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details. A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Copyright 2012 Xyratex Technology Limited
+ */
+/*
+ * lustre/ptlrpc/nrs_crr.c
+ *
+ * Network Request Scheduler (NRS) CRR-N policy
+ *
+ * Request ordering in a batched Round-Robin manner over client NIDs
+ *
+ * Author: Liang Zhen <liang@whamcloud.com>
+ * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
+ */
+/**
+ * \addtogoup nrs
+ * @{
+ */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c b/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c
new file mode 100644
index 000000000000..7d3ee9706c9b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c
@@ -0,0 +1,270 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details. A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Copyright 2012 Xyratex Technology Limited
+ */
+/*
+ * lustre/ptlrpc/nrs_fifo.c
+ *
+ * Network Request Scheduler (NRS) FIFO policy
+ *
+ * Handles RPCs in a FIFO manner, as received from the network. This policy is
+ * a logical wrapper around previous, non-NRS functionality. It is used as the
+ * default and fallback policy for all types of RPCs on all PTLRPC service
+ * partitions, for both regular and high-priority NRS heads. Default here means
+ * the policy is the one enabled at PTLRPC service partition startup time, and
+ * fallback means the policy is used to handle RPCs that are not handled
+ * successfully or are not handled at all by any primary policy that may be
+ * enabled on a given NRS head.
+ *
+ * Author: Liang Zhen <liang@whamcloud.com>
+ * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
+ */
+/**
+ * \addtogoup nrs
+ * @{
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include <obd_support.h>
+#include <obd_class.h>
+#include <linux/libcfs/libcfs.h>
+#include "ptlrpc_internal.h"
+
+/**
+ * \name fifo
+ *
+ * The FIFO policy is a logical wrapper around previous, non-NRS functionality.
+ * It schedules RPCs in the same order as they are queued from LNet.
+ *
+ * @{
+ */
+
+#define NRS_POL_NAME_FIFO "fifo"
+
+/**
+ * Is called before the policy transitions into
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED; allocates and initializes a
+ * policy-specific private data structure.
+ *
+ * \param[in] policy The policy to start
+ *
+ * \retval -ENOMEM OOM error
+ * \retval 0 success
+ *
+ * \see nrs_policy_register()
+ * \see nrs_policy_ctl()
+ */
+static int nrs_fifo_start(struct ptlrpc_nrs_policy *policy)
+{
+ struct nrs_fifo_head *head;
+
+ OBD_CPT_ALLOC_PTR(head, nrs_pol2cptab(policy), nrs_pol2cptid(policy));
+ if (head == NULL)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&head->fh_list);
+ policy->pol_private = head;
+ return 0;
+}
+
+/**
+ * Is called before the policy transitions into
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED; deallocates the policy-specific
+ * private data structure.
+ *
+ * \param[in] policy The policy to stop
+ *
+ * \see nrs_policy_stop0()
+ */
+static void nrs_fifo_stop(struct ptlrpc_nrs_policy *policy)
+{
+ struct nrs_fifo_head *head = policy->pol_private;
+
+ LASSERT(head != NULL);
+ LASSERT(list_empty(&head->fh_list));
+
+ OBD_FREE_PTR(head);
+}
+
+/**
+ * Is called for obtaining a FIFO policy resource.
+ *
+ * \param[in] policy The policy on which the request is being asked for
+ * \param[in] nrq The request for which resources are being taken
+ * \param[in] parent Parent resource, unused in this policy
+ * \param[out] resp Resources references are placed in this array
+ * \param[in] moving_req Signifies limited caller context; unused in this
+ * policy
+ *
+ * \retval 1 The FIFO policy only has a one-level resource hierarchy, as since
+ * it implements a simple scheduling algorithm in which request
+ * priority is determined on the request arrival order, it does not
+ * need to maintain a set of resources that would otherwise be used
+ * to calculate a request's priority.
+ *
+ * \see nrs_resource_get_safe()
+ */
+static int nrs_fifo_res_get(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq,
+ const struct ptlrpc_nrs_resource *parent,
+ struct ptlrpc_nrs_resource **resp, bool moving_req)
+{
+ /**
+ * Just return the resource embedded inside nrs_fifo_head, and end this
+ * resource hierarchy reference request.
+ */
+ *resp = &((struct nrs_fifo_head *)policy->pol_private)->fh_res;
+ return 1;
+}
+
+/**
+ * Called when getting a request from the FIFO policy for handling, or just
+ * peeking; removes the request from the policy when it is to be handled.
+ *
+ * \param[in] policy The policy
+ * \param[in] peek When set, signifies that we just want to examine the
+ * request, and not handle it, so the request is not removed
+ * from the policy.
+ * \param[in] force Force the policy to return a request; unused in this
+ * policy
+ *
+ * \retval The request to be handled; this is the next request in the FIFO
+ * queue
+ *
+ * \see ptlrpc_nrs_req_get_nolock()
+ * \see nrs_request_get()
+ */
+static
+struct ptlrpc_nrs_request * nrs_fifo_req_get(struct ptlrpc_nrs_policy *policy,
+ bool peek, bool force)
+{
+ struct nrs_fifo_head *head = policy->pol_private;
+ struct ptlrpc_nrs_request *nrq;
+
+ nrq = unlikely(list_empty(&head->fh_list)) ? NULL :
+ list_entry(head->fh_list.next, struct ptlrpc_nrs_request,
+ nr_u.fifo.fr_list);
+
+ if (likely(!peek && nrq != NULL)) {
+ struct ptlrpc_request *req = container_of(nrq,
+ struct ptlrpc_request,
+ rq_nrq);
+
+ list_del_init(&nrq->nr_u.fifo.fr_list);
+
+ CDEBUG(D_RPCTRACE, "NRS start %s request from %s, seq: "LPU64
+ "\n", policy->pol_desc->pd_name,
+ libcfs_id2str(req->rq_peer), nrq->nr_u.fifo.fr_sequence);
+ }
+
+ return nrq;
+}
+
+/**
+ * Adds request \a nrq to \a policy's list of queued requests
+ *
+ * \param[in] policy The policy
+ * \param[in] nrq The request to add
+ *
+ * \retval 0 success; nrs_request_enqueue() assumes this function will always
+ * succeed
+ */
+static int nrs_fifo_req_add(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq)
+{
+ struct nrs_fifo_head *head;
+
+ head = container_of(nrs_request_resource(nrq), struct nrs_fifo_head,
+ fh_res);
+ /**
+ * Only used for debugging
+ */
+ nrq->nr_u.fifo.fr_sequence = head->fh_sequence++;
+ list_add_tail(&nrq->nr_u.fifo.fr_list, &head->fh_list);
+
+ return 0;
+}
+
+/**
+ * Removes request \a nrq from \a policy's list of queued requests.
+ *
+ * \param[in] policy The policy
+ * \param[in] nrq The request to remove
+ */
+static void nrs_fifo_req_del(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq)
+{
+ LASSERT(!list_empty(&nrq->nr_u.fifo.fr_list));
+ list_del_init(&nrq->nr_u.fifo.fr_list);
+}
+
+/**
+ * Prints a debug statement right before the request \a nrq stops being
+ * handled.
+ *
+ * \param[in] policy The policy handling the request
+ * \param[in] nrq The request being handled
+ *
+ * \see ptlrpc_server_finish_request()
+ * \see ptlrpc_nrs_req_stop_nolock()
+ */
+static void nrs_fifo_req_stop(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq)
+{
+ struct ptlrpc_request *req = container_of(nrq, struct ptlrpc_request,
+ rq_nrq);
+
+ CDEBUG(D_RPCTRACE, "NRS stop %s request from %s, seq: "LPU64"\n",
+ policy->pol_desc->pd_name, libcfs_id2str(req->rq_peer),
+ nrq->nr_u.fifo.fr_sequence);
+}
+
+/**
+ * FIFO policy operations
+ */
+static const struct ptlrpc_nrs_pol_ops nrs_fifo_ops = {
+ .op_policy_start = nrs_fifo_start,
+ .op_policy_stop = nrs_fifo_stop,
+ .op_res_get = nrs_fifo_res_get,
+ .op_req_get = nrs_fifo_req_get,
+ .op_req_enqueue = nrs_fifo_req_add,
+ .op_req_dequeue = nrs_fifo_req_del,
+ .op_req_stop = nrs_fifo_req_stop,
+};
+
+/**
+ * FIFO policy configuration
+ */
+struct ptlrpc_nrs_pol_conf nrs_conf_fifo = {
+ .nc_name = NRS_POL_NAME_FIFO,
+ .nc_ops = &nrs_fifo_ops,
+ .nc_compat = nrs_policy_compat_all,
+ .nc_flags = PTLRPC_NRS_FL_FALLBACK |
+ PTLRPC_NRS_FL_REG_START
+};
+
+/** @} fifo */
+
+/** @} nrs */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
new file mode 100644
index 000000000000..1437636dfe28
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
@@ -0,0 +1,2575 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/pack_generic.c
+ *
+ * (Un)packing of OST requests
+ *
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eeb@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include <linux/libcfs/libcfs.h>
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <obd_cksum.h>
+#include <lustre/ll_fiemap.h>
+
+static inline int lustre_msg_hdr_size_v2(int count)
+{
+ return cfs_size_round(offsetof(struct lustre_msg_v2,
+ lm_buflens[count]));
+}
+
+int lustre_msg_hdr_size(__u32 magic, int count)
+{
+ switch (magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_msg_hdr_size_v2(count);
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", magic);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_hdr_size);
+
+void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout,
+ int index)
+{
+ if (inout)
+ lustre_set_req_swabbed(req, index);
+ else
+ lustre_set_rep_swabbed(req, index);
+}
+EXPORT_SYMBOL(ptlrpc_buf_set_swabbed);
+
+int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
+ int index)
+{
+ if (inout)
+ return (ptlrpc_req_need_swab(req) &&
+ !lustre_req_swabbed(req, index));
+ else
+ return (ptlrpc_rep_need_swab(req) &&
+ !lustre_rep_swabbed(req, index));
+}
+EXPORT_SYMBOL(ptlrpc_buf_need_swab);
+
+static inline int lustre_msg_check_version_v2(struct lustre_msg_v2 *msg,
+ __u32 version)
+{
+ __u32 ver = lustre_msg_get_version(msg);
+ return (ver & LUSTRE_VERSION_MASK) != version;
+}
+
+int lustre_msg_check_version(struct lustre_msg *msg, __u32 version)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ CERROR("msg v1 not supported - please upgrade you system\n");
+ return -EINVAL;
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_msg_check_version_v2(msg, version);
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_check_version);
+
+/* early reply size */
+int lustre_msg_early_size()
+{
+ static int size = 0;
+ if (!size) {
+ /* Always reply old ptlrpc_body_v2 to keep interoprability
+ * with the old client (< 2.3) which doesn't have pb_jobid
+ * in the ptlrpc_body.
+ *
+ * XXX Remove this whenever we dorp interoprability with such
+ * client.
+ */
+ __u32 pblen = sizeof(struct ptlrpc_body_v2);
+ size = lustre_msg_size(LUSTRE_MSG_MAGIC_V2, 1, &pblen);
+ }
+ return size;
+}
+EXPORT_SYMBOL(lustre_msg_early_size);
+
+int lustre_msg_size_v2(int count, __u32 *lengths)
+{
+ int size;
+ int i;
+
+ size = lustre_msg_hdr_size_v2(count);
+ for (i = 0; i < count; i++)
+ size += cfs_size_round(lengths[i]);
+
+ return size;
+}
+EXPORT_SYMBOL(lustre_msg_size_v2);
+
+/* This returns the size of the buffer that is required to hold a lustre_msg
+ * with the given sub-buffer lengths.
+ * NOTE: this should only be used for NEW requests, and should always be
+ * in the form of a v2 request. If this is a connection to a v1
+ * target then the first buffer will be stripped because the ptlrpc
+ * data is part of the lustre_msg_v1 header. b=14043 */
+int lustre_msg_size(__u32 magic, int count, __u32 *lens)
+{
+ __u32 size[] = { sizeof(struct ptlrpc_body) };
+
+ if (!lens) {
+ LASSERT(count == 1);
+ lens = size;
+ }
+
+ LASSERT(count > 0);
+ LASSERT(lens[MSG_PTLRPC_BODY_OFF] >= sizeof(struct ptlrpc_body_v2));
+
+ switch (magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_msg_size_v2(count, lens);
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", magic);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_size);
+
+/* This is used to determine the size of a buffer that was already packed
+ * and will correctly handle the different message formats. */
+int lustre_packed_msg_size(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_packed_msg_size);
+
+void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens,
+ char **bufs)
+{
+ char *ptr;
+ int i;
+
+ msg->lm_bufcount = count;
+ /* XXX: lm_secflvr uninitialized here */
+ msg->lm_magic = LUSTRE_MSG_MAGIC_V2;
+
+ for (i = 0; i < count; i++)
+ msg->lm_buflens[i] = lens[i];
+
+ if (bufs == NULL)
+ return;
+
+ ptr = (char *)msg + lustre_msg_hdr_size_v2(count);
+ for (i = 0; i < count; i++) {
+ char *tmp = bufs[i];
+ LOGL(tmp, lens[i], ptr);
+ }
+}
+EXPORT_SYMBOL(lustre_init_msg_v2);
+
+static int lustre_pack_request_v2(struct ptlrpc_request *req,
+ int count, __u32 *lens, char **bufs)
+{
+ int reqlen, rc;
+
+ reqlen = lustre_msg_size_v2(count, lens);
+
+ rc = sptlrpc_cli_alloc_reqbuf(req, reqlen);
+ if (rc)
+ return rc;
+
+ req->rq_reqlen = reqlen;
+
+ lustre_init_msg_v2(req->rq_reqmsg, count, lens, bufs);
+ lustre_msg_add_version(req->rq_reqmsg, PTLRPC_MSG_VERSION);
+ return 0;
+}
+
+int lustre_pack_request(struct ptlrpc_request *req, __u32 magic, int count,
+ __u32 *lens, char **bufs)
+{
+ __u32 size[] = { sizeof(struct ptlrpc_body) };
+
+ if (!lens) {
+ LASSERT(count == 1);
+ lens = size;
+ }
+
+ LASSERT(count > 0);
+ LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body));
+
+ /* only use new format, we don't need to be compatible with 1.4 */
+ magic = LUSTRE_MSG_MAGIC_V2;
+
+ switch (magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_pack_request_v2(req, count, lens, bufs);
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", magic);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_pack_request);
+
+#if RS_DEBUG
+LIST_HEAD(ptlrpc_rs_debug_lru);
+spinlock_t ptlrpc_rs_debug_lock;
+
+#define PTLRPC_RS_DEBUG_LRU_ADD(rs) \
+do { \
+ spin_lock(&ptlrpc_rs_debug_lock); \
+ list_add_tail(&(rs)->rs_debug_list, &ptlrpc_rs_debug_lru); \
+ spin_unlock(&ptlrpc_rs_debug_lock); \
+} while (0)
+
+#define PTLRPC_RS_DEBUG_LRU_DEL(rs) \
+do { \
+ spin_lock(&ptlrpc_rs_debug_lock); \
+ list_del(&(rs)->rs_debug_list); \
+ spin_unlock(&ptlrpc_rs_debug_lock); \
+} while (0)
+#else
+# define PTLRPC_RS_DEBUG_LRU_ADD(rs) do {} while(0)
+# define PTLRPC_RS_DEBUG_LRU_DEL(rs) do {} while(0)
+#endif
+
+struct ptlrpc_reply_state *
+lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_reply_state *rs = NULL;
+
+ spin_lock(&svcpt->scp_rep_lock);
+
+ /* See if we have anything in a pool, and wait if nothing */
+ while (list_empty(&svcpt->scp_rep_idle)) {
+ struct l_wait_info lwi;
+ int rc;
+
+ spin_unlock(&svcpt->scp_rep_lock);
+ /* If we cannot get anything for some long time, we better
+ * bail out instead of waiting infinitely */
+ lwi = LWI_TIMEOUT(cfs_time_seconds(10), NULL, NULL);
+ rc = l_wait_event(svcpt->scp_rep_waitq,
+ !list_empty(&svcpt->scp_rep_idle), &lwi);
+ if (rc != 0)
+ goto out;
+ spin_lock(&svcpt->scp_rep_lock);
+ }
+
+ rs = list_entry(svcpt->scp_rep_idle.next,
+ struct ptlrpc_reply_state, rs_list);
+ list_del(&rs->rs_list);
+
+ spin_unlock(&svcpt->scp_rep_lock);
+
+ memset(rs, 0, svcpt->scp_service->srv_max_reply_size);
+ rs->rs_svcpt = svcpt;
+ rs->rs_prealloc = 1;
+out:
+ return rs;
+}
+
+void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs)
+{
+ struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
+
+ spin_lock(&svcpt->scp_rep_lock);
+ list_add(&rs->rs_list, &svcpt->scp_rep_idle);
+ spin_unlock(&svcpt->scp_rep_lock);
+ wake_up(&svcpt->scp_rep_waitq);
+}
+
+int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
+ __u32 *lens, char **bufs, int flags)
+{
+ struct ptlrpc_reply_state *rs;
+ int msg_len, rc;
+ ENTRY;
+
+ LASSERT(req->rq_reply_state == NULL);
+
+ if ((flags & LPRFL_EARLY_REPLY) == 0) {
+ spin_lock(&req->rq_lock);
+ req->rq_packed_final = 1;
+ spin_unlock(&req->rq_lock);
+ }
+
+ msg_len = lustre_msg_size_v2(count, lens);
+ rc = sptlrpc_svc_alloc_rs(req, msg_len);
+ if (rc)
+ RETURN(rc);
+
+ rs = req->rq_reply_state;
+ atomic_set(&rs->rs_refcount, 1); /* 1 ref for rq_reply_state */
+ rs->rs_cb_id.cbid_fn = reply_out_callback;
+ rs->rs_cb_id.cbid_arg = rs;
+ rs->rs_svcpt = req->rq_rqbd->rqbd_svcpt;
+ INIT_LIST_HEAD(&rs->rs_exp_list);
+ INIT_LIST_HEAD(&rs->rs_obd_list);
+ INIT_LIST_HEAD(&rs->rs_list);
+ spin_lock_init(&rs->rs_lock);
+
+ req->rq_replen = msg_len;
+ req->rq_reply_state = rs;
+ req->rq_repmsg = rs->rs_msg;
+
+ lustre_init_msg_v2(rs->rs_msg, count, lens, bufs);
+ lustre_msg_add_version(rs->rs_msg, PTLRPC_MSG_VERSION);
+
+ PTLRPC_RS_DEBUG_LRU_ADD(rs);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(lustre_pack_reply_v2);
+
+int lustre_pack_reply_flags(struct ptlrpc_request *req, int count, __u32 *lens,
+ char **bufs, int flags)
+{
+ int rc = 0;
+ __u32 size[] = { sizeof(struct ptlrpc_body) };
+
+ if (!lens) {
+ LASSERT(count == 1);
+ lens = size;
+ }
+
+ LASSERT(count > 0);
+ LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body));
+
+ switch (req->rq_reqmsg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ rc = lustre_pack_reply_v2(req, count, lens, bufs, flags);
+ break;
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n",
+ req->rq_reqmsg->lm_magic);
+ rc = -EINVAL;
+ }
+ if (rc != 0)
+ CERROR("lustre_pack_reply failed: rc=%d size=%d\n", rc,
+ lustre_msg_size(req->rq_reqmsg->lm_magic, count, lens));
+ return rc;
+}
+EXPORT_SYMBOL(lustre_pack_reply_flags);
+
+int lustre_pack_reply(struct ptlrpc_request *req, int count, __u32 *lens,
+ char **bufs)
+{
+ return lustre_pack_reply_flags(req, count, lens, bufs, 0);
+}
+EXPORT_SYMBOL(lustre_pack_reply);
+
+void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size)
+{
+ int i, offset, buflen, bufcount;
+
+ LASSERT(m != NULL);
+ LASSERT(n >= 0);
+
+ bufcount = m->lm_bufcount;
+ if (unlikely(n >= bufcount)) {
+ CDEBUG(D_INFO, "msg %p buffer[%d] not present (count %d)\n",
+ m, n, bufcount);
+ return NULL;
+ }
+
+ buflen = m->lm_buflens[n];
+ if (unlikely(buflen < min_size)) {
+ CERROR("msg %p buffer[%d] size %d too small "
+ "(required %d, opc=%d)\n", m, n, buflen, min_size,
+ n == MSG_PTLRPC_BODY_OFF ? -1 : lustre_msg_get_opc(m));
+ return NULL;
+ }
+
+ offset = lustre_msg_hdr_size_v2(bufcount);
+ for (i = 0; i < n; i++)
+ offset += cfs_size_round(m->lm_buflens[i]);
+
+ return (char *)m + offset;
+}
+
+void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size)
+{
+ switch (m->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_msg_buf_v2(m, n, min_size);
+ default:
+ LASSERTF(0, "incorrect message magic: %08x(msg:%p)\n", m->lm_magic, m);
+ return NULL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_buf);
+
+int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, int segment,
+ unsigned int newlen, int move_data)
+{
+ char *tail = NULL, *newpos;
+ int tail_len = 0, n;
+
+ LASSERT(msg);
+ LASSERT(msg->lm_bufcount > segment);
+ LASSERT(msg->lm_buflens[segment] >= newlen);
+
+ if (msg->lm_buflens[segment] == newlen)
+ goto out;
+
+ if (move_data && msg->lm_bufcount > segment + 1) {
+ tail = lustre_msg_buf_v2(msg, segment + 1, 0);
+ for (n = segment + 1; n < msg->lm_bufcount; n++)
+ tail_len += cfs_size_round(msg->lm_buflens[n]);
+ }
+
+ msg->lm_buflens[segment] = newlen;
+
+ if (tail && tail_len) {
+ newpos = lustre_msg_buf_v2(msg, segment + 1, 0);
+ LASSERT(newpos <= tail);
+ if (newpos != tail)
+ memmove(newpos, tail, tail_len);
+ }
+out:
+ return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+}
+
+/*
+ * for @msg, shrink @segment to size @newlen. if @move_data is non-zero,
+ * we also move data forward from @segment + 1.
+ *
+ * if @newlen == 0, we remove the segment completely, but we still keep the
+ * totally bufcount the same to save possible data moving. this will leave a
+ * unused segment with size 0 at the tail, but that's ok.
+ *
+ * return new msg size after shrinking.
+ *
+ * CAUTION:
+ * + if any buffers higher than @segment has been filled in, must call shrink
+ * with non-zero @move_data.
+ * + caller should NOT keep pointers to msg buffers which higher than @segment
+ * after call shrink.
+ */
+int lustre_shrink_msg(struct lustre_msg *msg, int segment,
+ unsigned int newlen, int move_data)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_shrink_msg_v2(msg, segment, newlen, move_data);
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_shrink_msg);
+
+void lustre_free_reply_state(struct ptlrpc_reply_state *rs)
+{
+ PTLRPC_RS_DEBUG_LRU_DEL(rs);
+
+ LASSERT (atomic_read(&rs->rs_refcount) == 0);
+ LASSERT (!rs->rs_difficult || rs->rs_handled);
+ LASSERT (!rs->rs_on_net);
+ LASSERT (!rs->rs_scheduled);
+ LASSERT (rs->rs_export == NULL);
+ LASSERT (rs->rs_nlocks == 0);
+ LASSERT (list_empty(&rs->rs_exp_list));
+ LASSERT (list_empty(&rs->rs_obd_list));
+
+ sptlrpc_svc_free_rs(rs);
+}
+EXPORT_SYMBOL(lustre_free_reply_state);
+
+static int lustre_unpack_msg_v2(struct lustre_msg_v2 *m, int len)
+{
+ int swabbed, required_len, i;
+
+ /* Now we know the sender speaks my language. */
+ required_len = lustre_msg_hdr_size_v2(0);
+ if (len < required_len) {
+ /* can't even look inside the message */
+ CERROR("message length %d too small for lustre_msg\n", len);
+ return -EINVAL;
+ }
+
+ swabbed = (m->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED);
+
+ if (swabbed) {
+ __swab32s(&m->lm_magic);
+ __swab32s(&m->lm_bufcount);
+ __swab32s(&m->lm_secflvr);
+ __swab32s(&m->lm_repsize);
+ __swab32s(&m->lm_cksum);
+ __swab32s(&m->lm_flags);
+ CLASSERT(offsetof(typeof(*m), lm_padding_2) != 0);
+ CLASSERT(offsetof(typeof(*m), lm_padding_3) != 0);
+ }
+
+ required_len = lustre_msg_hdr_size_v2(m->lm_bufcount);
+ if (len < required_len) {
+ /* didn't receive all the buffer lengths */
+ CERROR ("message length %d too small for %d buflens\n",
+ len, m->lm_bufcount);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < m->lm_bufcount; i++) {
+ if (swabbed)
+ __swab32s(&m->lm_buflens[i]);
+ required_len += cfs_size_round(m->lm_buflens[i]);
+ }
+
+ if (len < required_len) {
+ CERROR("len: %d, required_len %d\n", len, required_len);
+ CERROR("bufcount: %d\n", m->lm_bufcount);
+ for (i = 0; i < m->lm_bufcount; i++)
+ CERROR("buffer %d length %d\n", i, m->lm_buflens[i]);
+ return -EINVAL;
+ }
+
+ return swabbed;
+}
+
+int __lustre_unpack_msg(struct lustre_msg *m, int len)
+{
+ int required_len, rc;
+ ENTRY;
+
+ /* We can provide a slightly better error log, if we check the
+ * message magic and version first. In the future, struct
+ * lustre_msg may grow, and we'd like to log a version mismatch,
+ * rather than a short message.
+ *
+ */
+ required_len = offsetof(struct lustre_msg, lm_magic) +
+ sizeof(m->lm_magic);
+ if (len < required_len) {
+ /* can't even look inside the message */
+ CERROR("message length %d too small for magic/version check\n",
+ len);
+ RETURN(-EINVAL);
+ }
+
+ rc = lustre_unpack_msg_v2(m, len);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(__lustre_unpack_msg);
+
+int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len)
+{
+ int rc;
+ rc = __lustre_unpack_msg(req->rq_reqmsg, len);
+ if (rc == 1) {
+ lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+ rc = 0;
+ }
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_unpack_req_msg);
+
+int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len)
+{
+ int rc;
+ rc = __lustre_unpack_msg(req->rq_repmsg, len);
+ if (rc == 1) {
+ lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+ rc = 0;
+ }
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_unpack_rep_msg);
+
+static inline int lustre_unpack_ptlrpc_body_v2(struct ptlrpc_request *req,
+ const int inout, int offset)
+{
+ struct ptlrpc_body *pb;
+ struct lustre_msg_v2 *m = inout ? req->rq_reqmsg : req->rq_repmsg;
+
+ pb = lustre_msg_buf_v2(m, offset, sizeof(struct ptlrpc_body_v2));
+ if (!pb) {
+ CERROR("error unpacking ptlrpc body\n");
+ return -EFAULT;
+ }
+ if (ptlrpc_buf_need_swab(req, inout, offset)) {
+ lustre_swab_ptlrpc_body(pb);
+ ptlrpc_buf_set_swabbed(req, inout, offset);
+ }
+
+ if ((pb->pb_version & ~LUSTRE_VERSION_MASK) != PTLRPC_MSG_VERSION) {
+ CERROR("wrong lustre_msg version %08x\n", pb->pb_version);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset)
+{
+ switch (req->rq_reqmsg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_unpack_ptlrpc_body_v2(req, 1, offset);
+ default:
+ CERROR("bad lustre msg magic: %08x\n",
+ req->rq_reqmsg->lm_magic);
+ return -EINVAL;
+ }
+}
+
+int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset)
+{
+ switch (req->rq_repmsg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_unpack_ptlrpc_body_v2(req, 0, offset);
+ default:
+ CERROR("bad lustre msg magic: %08x\n",
+ req->rq_repmsg->lm_magic);
+ return -EINVAL;
+ }
+}
+
+static inline int lustre_msg_buflen_v2(struct lustre_msg_v2 *m, int n)
+{
+ if (n >= m->lm_bufcount)
+ return 0;
+
+ return m->lm_buflens[n];
+}
+
+/**
+ * lustre_msg_buflen - return the length of buffer \a n in message \a m
+ * \param m lustre_msg (request or reply) to look at
+ * \param n message index (base 0)
+ *
+ * returns zero for non-existent message indices
+ */
+int lustre_msg_buflen(struct lustre_msg *m, int n)
+{
+ switch (m->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_msg_buflen_v2(m, n);
+ default:
+ CERROR("incorrect message magic: %08x\n", m->lm_magic);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_buflen);
+
+static inline void
+lustre_msg_set_buflen_v2(struct lustre_msg_v2 *m, int n, int len)
+{
+ if (n >= m->lm_bufcount)
+ LBUG();
+
+ m->lm_buflens[n] = len;
+}
+
+void lustre_msg_set_buflen(struct lustre_msg *m, int n, int len)
+{
+ switch (m->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ lustre_msg_set_buflen_v2(m, n, len);
+ return;
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", m->lm_magic);
+ }
+}
+
+EXPORT_SYMBOL(lustre_msg_set_buflen);
+
+/* NB return the bufcount for lustre_msg_v2 format, so if message is packed
+ * in V1 format, the result is one bigger. (add struct ptlrpc_body). */
+int lustre_msg_bufcount(struct lustre_msg *m)
+{
+ switch (m->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return m->lm_bufcount;
+ default:
+ CERROR("incorrect message magic: %08x\n", m->lm_magic);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_bufcount);
+
+char *lustre_msg_string(struct lustre_msg *m, int index, int max_len)
+{
+ /* max_len == 0 means the string should fill the buffer */
+ char *str;
+ int slen, blen;
+
+ switch (m->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ str = lustre_msg_buf_v2(m, index, 0);
+ blen = lustre_msg_buflen_v2(m, index);
+ break;
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", m->lm_magic);
+ }
+
+ if (str == NULL) {
+ CERROR ("can't unpack string in msg %p buffer[%d]\n", m, index);
+ return NULL;
+ }
+
+ slen = strnlen(str, blen);
+
+ if (slen == blen) { /* not NULL terminated */
+ CERROR("can't unpack non-NULL terminated string in "
+ "msg %p buffer[%d] len %d\n", m, index, blen);
+ return NULL;
+ }
+
+ if (max_len == 0) {
+ if (slen != blen - 1) {
+ CERROR("can't unpack short string in msg %p "
+ "buffer[%d] len %d: strlen %d\n",
+ m, index, blen, slen);
+ return NULL;
+ }
+ } else if (slen > max_len) {
+ CERROR("can't unpack oversized string in msg %p "
+ "buffer[%d] len %d strlen %d: max %d expected\n",
+ m, index, blen, slen, max_len);
+ return NULL;
+ }
+
+ return str;
+}
+EXPORT_SYMBOL(lustre_msg_string);
+
+/* Wrap up the normal fixed length cases */
+static inline void *__lustre_swab_buf(struct lustre_msg *msg, int index,
+ int min_size, void *swabber)
+{
+ void *ptr = NULL;
+
+ LASSERT(msg != NULL);
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ ptr = lustre_msg_buf_v2(msg, index, min_size);
+ break;
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ }
+
+ if (ptr && swabber)
+ ((void (*)(void *))swabber)(ptr);
+
+ return ptr;
+}
+
+static inline struct ptlrpc_body *lustre_msg_ptlrpc_body(struct lustre_msg *msg)
+{
+ return lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
+ sizeof(struct ptlrpc_body_v2));
+}
+
+__u32 lustre_msghdr_get_flags(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ case LUSTRE_MSG_MAGIC_V1_SWABBED:
+ return 0;
+ case LUSTRE_MSG_MAGIC_V2:
+ /* already in host endian */
+ return msg->lm_flags;
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msghdr_get_flags);
+
+void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return;
+ case LUSTRE_MSG_MAGIC_V2:
+ msg->lm_flags = flags;
+ return;
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+
+__u32 lustre_msg_get_flags(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_flags;
+ }
+ default:
+ /* flags might be printed in debug code while message
+ * uninitialized */
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_flags);
+
+void lustre_msg_add_flags(struct lustre_msg *msg, int flags)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_flags |= flags;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_add_flags);
+
+void lustre_msg_set_flags(struct lustre_msg *msg, int flags)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_flags = flags;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_flags);
+
+void lustre_msg_clear_flags(struct lustre_msg *msg, int flags)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_flags &= ~(MSG_GEN_FLAG_MASK & flags);
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_clear_flags);
+
+__u32 lustre_msg_get_op_flags(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_op_flags;
+ }
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_op_flags);
+
+void lustre_msg_add_op_flags(struct lustre_msg *msg, int flags)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_op_flags |= flags;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_add_op_flags);
+
+void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_op_flags |= flags;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_op_flags);
+
+struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return NULL;
+ }
+ return &pb->pb_handle;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return NULL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_handle);
+
+__u32 lustre_msg_get_type(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return PTL_RPC_MSG_ERR;
+ }
+ return pb->pb_type;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return PTL_RPC_MSG_ERR;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_type);
+
+__u32 lustre_msg_get_version(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_version;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_version);
+
+void lustre_msg_add_version(struct lustre_msg *msg, int version)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_version |= version;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_add_version);
+
+__u32 lustre_msg_get_opc(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_opc;
+ }
+ default:
+ CERROR("incorrect message magic: %08x(msg:%p)\n", msg->lm_magic, msg);
+ LBUG();
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_opc);
+
+__u64 lustre_msg_get_last_xid(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_last_xid;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_last_xid);
+
+__u64 lustre_msg_get_last_committed(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_last_committed;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_last_committed);
+
+__u64 *lustre_msg_get_versions(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return NULL;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return NULL;
+ }
+ return pb->pb_pre_versions;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return NULL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_versions);
+
+__u64 lustre_msg_get_transno(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_transno;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_transno);
+
+int lustre_msg_get_status(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return -EINVAL;
+ }
+ return pb->pb_status;
+ }
+ default:
+ /* status might be printed in debug code while message
+ * uninitialized */
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_status);
+
+__u64 lustre_msg_get_slv(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return -EINVAL;
+ }
+ return pb->pb_slv;
+ }
+ default:
+ CERROR("invalid msg magic %08x\n", msg->lm_magic);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_slv);
+
+
+void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return;
+ }
+ pb->pb_slv = slv;
+ return;
+ }
+ default:
+ CERROR("invalid msg magic %x\n", msg->lm_magic);
+ return;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_slv);
+
+__u32 lustre_msg_get_limit(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return -EINVAL;
+ }
+ return pb->pb_limit;
+ }
+ default:
+ CERROR("invalid msg magic %x\n", msg->lm_magic);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_limit);
+
+
+void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return;
+ }
+ pb->pb_limit = limit;
+ return;
+ }
+ default:
+ CERROR("invalid msg magic %08x\n", msg->lm_magic);
+ return;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_limit);
+
+__u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_conn_cnt;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_conn_cnt);
+
+int lustre_msg_is_v1(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ case LUSTRE_MSG_MAGIC_V1_SWABBED:
+ return 1;
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_is_v1);
+
+__u32 lustre_msg_get_magic(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return msg->lm_magic;
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_magic);
+
+__u32 lustre_msg_get_timeout(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ case LUSTRE_MSG_MAGIC_V1_SWABBED:
+ return 0;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+
+ }
+ return pb->pb_timeout;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+
+__u32 lustre_msg_get_service_time(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ case LUSTRE_MSG_MAGIC_V1_SWABBED:
+ return 0;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+
+ }
+ return pb->pb_service_time;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+
+char *lustre_msg_get_jobid(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ case LUSTRE_MSG_MAGIC_V1_SWABBED:
+ return NULL;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb =
+ lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
+ sizeof(struct ptlrpc_body));
+ if (!pb)
+ return NULL;
+
+ return pb->pb_jobid;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return NULL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_jobid);
+
+__u32 lustre_msg_get_cksum(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return msg->lm_cksum;
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0)
+/*
+ * In 1.6 and 1.8 the checksum was computed only on struct ptlrpc_body as
+ * it was in 1.6 (88 bytes, smaller than the full size in 1.8). It makes
+ * more sense to compute the checksum on the full ptlrpc_body, regardless
+ * of what size it is, but in order to keep interoperability with 1.8 we
+ * can optionally also checksum only the first 88 bytes (caller decides). */
+# define ptlrpc_body_cksum_size_compat18 88
+
+__u32 lustre_msg_calc_cksum(struct lustre_msg *msg, int compat18)
+#else
+# warning "remove checksum compatibility support for b1_8"
+__u32 lustre_msg_calc_cksum(struct lustre_msg *msg)
+#endif
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0)
+ __u32 crc;
+ unsigned int hsize = 4;
+ __u32 len = compat18 ? ptlrpc_body_cksum_size_compat18 :
+ lustre_msg_buflen(msg, MSG_PTLRPC_BODY_OFF);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32, (unsigned char *)pb,
+ len, NULL, 0, (unsigned char *)&crc,
+ &hsize);
+ return crc;
+#else
+# warning "remove checksum compatibility support for b1_8"
+ __u32 crc;
+ unsigned int hsize = 4;
+ cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32, (unsigned char *)pb,
+ lustre_msg_buflen(msg, MSG_PTLRPC_BODY_OFF),
+ NULL, 0, (unsigned char *)&crc, &hsize);
+ return crc;
+#endif
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+
+void lustre_msg_set_handle(struct lustre_msg *msg, struct lustre_handle *handle)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_handle = *handle;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_handle);
+
+void lustre_msg_set_type(struct lustre_msg *msg, __u32 type)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_type = type;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_type);
+
+void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_opc = opc;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_opc);
+
+void lustre_msg_set_last_xid(struct lustre_msg *msg, __u64 last_xid)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_last_xid = last_xid;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_last_xid);
+
+void lustre_msg_set_last_committed(struct lustre_msg *msg, __u64 last_committed)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_last_committed = last_committed;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_last_committed);
+
+void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_pre_versions[0] = versions[0];
+ pb->pb_pre_versions[1] = versions[1];
+ pb->pb_pre_versions[2] = versions[2];
+ pb->pb_pre_versions[3] = versions[3];
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_versions);
+
+void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_transno = transno;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_transno);
+
+void lustre_msg_set_status(struct lustre_msg *msg, __u32 status)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_status = status;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_status);
+
+void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_conn_cnt = conn_cnt;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_conn_cnt);
+
+void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_timeout = timeout;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+
+void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_service_time = service_time;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+
+void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return;
+ case LUSTRE_MSG_MAGIC_V2: {
+ __u32 opc = lustre_msg_get_opc(msg);
+ struct ptlrpc_body *pb;
+
+ /* Don't set jobid for ldlm ast RPCs, they've been shrinked.
+ * See the comment in ptlrpc_request_pack(). */
+ if (!opc || opc == LDLM_BL_CALLBACK ||
+ opc == LDLM_CP_CALLBACK || opc == LDLM_GL_CALLBACK)
+ return;
+
+ pb = lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
+ sizeof(struct ptlrpc_body));
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+
+ if (jobid != NULL)
+ memcpy(pb->pb_jobid, jobid, JOBSTATS_JOBID_SIZE);
+ else if (pb->pb_jobid[0] == '\0')
+ lustre_get_jobid(pb->pb_jobid);
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_jobid);
+
+void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return;
+ case LUSTRE_MSG_MAGIC_V2:
+ msg->lm_cksum = cksum;
+ return;
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+
+
+void ptlrpc_request_set_replen(struct ptlrpc_request *req)
+{
+ int count = req_capsule_filled_sizes(&req->rq_pill, RCL_SERVER);
+
+ req->rq_replen = lustre_msg_size(req->rq_reqmsg->lm_magic, count,
+ req->rq_pill.rc_area[RCL_SERVER]);
+ if (req->rq_reqmsg->lm_magic == LUSTRE_MSG_MAGIC_V2)
+ req->rq_reqmsg->lm_repsize = req->rq_replen;
+}
+EXPORT_SYMBOL(ptlrpc_request_set_replen);
+
+void ptlrpc_req_set_repsize(struct ptlrpc_request *req, int count, __u32 *lens)
+{
+ req->rq_replen = lustre_msg_size(req->rq_reqmsg->lm_magic, count, lens);
+ if (req->rq_reqmsg->lm_magic == LUSTRE_MSG_MAGIC_V2)
+ req->rq_reqmsg->lm_repsize = req->rq_replen;
+}
+EXPORT_SYMBOL(ptlrpc_req_set_repsize);
+
+/**
+ * Send a remote set_info_async.
+ *
+ * This may go from client to server or server to client.
+ */
+int do_set_info_async(struct obd_import *imp,
+ int opcode, int version,
+ obd_count keylen, void *key,
+ obd_count vallen, void *val,
+ struct ptlrpc_request_set *set)
+{
+ struct ptlrpc_request *req;
+ char *tmp;
+ int rc;
+ ENTRY;
+
+ req = ptlrpc_request_alloc(imp, &RQF_OBD_SET_INFO);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY,
+ RCL_CLIENT, keylen);
+ req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_VAL,
+ RCL_CLIENT, vallen);
+ rc = ptlrpc_request_pack(req, version, opcode);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
+ memcpy(tmp, key, keylen);
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_VAL);
+ memcpy(tmp, val, vallen);
+
+ ptlrpc_request_set_replen(req);
+
+ if (set) {
+ ptlrpc_set_add_req(set, req);
+ ptlrpc_check_set(NULL, set);
+ } else {
+ rc = ptlrpc_queue_wait(req);
+ ptlrpc_req_finished(req);
+ }
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(do_set_info_async);
+
+/* byte flipping routines for all wire types declared in
+ * lustre_idl.h implemented here.
+ */
+void lustre_swab_ptlrpc_body(struct ptlrpc_body *b)
+{
+ __swab32s (&b->pb_type);
+ __swab32s (&b->pb_version);
+ __swab32s (&b->pb_opc);
+ __swab32s (&b->pb_status);
+ __swab64s (&b->pb_last_xid);
+ __swab64s (&b->pb_last_seen);
+ __swab64s (&b->pb_last_committed);
+ __swab64s (&b->pb_transno);
+ __swab32s (&b->pb_flags);
+ __swab32s (&b->pb_op_flags);
+ __swab32s (&b->pb_conn_cnt);
+ __swab32s (&b->pb_timeout);
+ __swab32s (&b->pb_service_time);
+ __swab32s (&b->pb_limit);
+ __swab64s (&b->pb_slv);
+ __swab64s (&b->pb_pre_versions[0]);
+ __swab64s (&b->pb_pre_versions[1]);
+ __swab64s (&b->pb_pre_versions[2]);
+ __swab64s (&b->pb_pre_versions[3]);
+ CLASSERT(offsetof(typeof(*b), pb_padding) != 0);
+ /* While we need to maintain compatibility between
+ * clients and servers without ptlrpc_body_v2 (< 2.3)
+ * do not swab any fields beyond pb_jobid, as we are
+ * using this swab function for both ptlrpc_body
+ * and ptlrpc_body_v2. */
+ CLASSERT(offsetof(typeof(*b), pb_jobid) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_ptlrpc_body);
+
+void lustre_swab_connect(struct obd_connect_data *ocd)
+{
+ __swab64s(&ocd->ocd_connect_flags);
+ __swab32s(&ocd->ocd_version);
+ __swab32s(&ocd->ocd_grant);
+ __swab64s(&ocd->ocd_ibits_known);
+ __swab32s(&ocd->ocd_index);
+ __swab32s(&ocd->ocd_brw_size);
+ /* ocd_blocksize and ocd_inodespace don't need to be swabbed because
+ * they are 8-byte values */
+ __swab16s(&ocd->ocd_grant_extent);
+ __swab32s(&ocd->ocd_unused);
+ __swab64s(&ocd->ocd_transno);
+ __swab32s(&ocd->ocd_group);
+ __swab32s(&ocd->ocd_cksum_types);
+ __swab32s(&ocd->ocd_instance);
+ /* Fields after ocd_cksum_types are only accessible by the receiver
+ * if the corresponding flag in ocd_connect_flags is set. Accessing
+ * any field after ocd_maxbytes on the receiver without a valid flag
+ * may result in out-of-bound memory access and kernel oops. */
+ if (ocd->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)
+ __swab32s(&ocd->ocd_max_easize);
+ if (ocd->ocd_connect_flags & OBD_CONNECT_MAXBYTES)
+ __swab64s(&ocd->ocd_maxbytes);
+ CLASSERT(offsetof(typeof(*ocd), padding1) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding2) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding3) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding4) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding5) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding6) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding7) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding8) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding9) != 0);
+ CLASSERT(offsetof(typeof(*ocd), paddingA) != 0);
+ CLASSERT(offsetof(typeof(*ocd), paddingB) != 0);
+ CLASSERT(offsetof(typeof(*ocd), paddingC) != 0);
+ CLASSERT(offsetof(typeof(*ocd), paddingD) != 0);
+ CLASSERT(offsetof(typeof(*ocd), paddingE) != 0);
+ CLASSERT(offsetof(typeof(*ocd), paddingF) != 0);
+}
+
+void lustre_swab_obdo (struct obdo *o)
+{
+ __swab64s (&o->o_valid);
+ lustre_swab_ost_id(&o->o_oi);
+ __swab64s (&o->o_parent_seq);
+ __swab64s (&o->o_size);
+ __swab64s (&o->o_mtime);
+ __swab64s (&o->o_atime);
+ __swab64s (&o->o_ctime);
+ __swab64s (&o->o_blocks);
+ __swab64s (&o->o_grant);
+ __swab32s (&o->o_blksize);
+ __swab32s (&o->o_mode);
+ __swab32s (&o->o_uid);
+ __swab32s (&o->o_gid);
+ __swab32s (&o->o_flags);
+ __swab32s (&o->o_nlink);
+ __swab32s (&o->o_parent_oid);
+ __swab32s (&o->o_misc);
+ __swab64s (&o->o_ioepoch);
+ __swab32s (&o->o_stripe_idx);
+ __swab32s (&o->o_parent_ver);
+ /* o_handle is opaque */
+ /* o_lcookie is swabbed elsewhere */
+ __swab32s (&o->o_uid_h);
+ __swab32s (&o->o_gid_h);
+ __swab64s (&o->o_data_version);
+ CLASSERT(offsetof(typeof(*o), o_padding_4) != 0);
+ CLASSERT(offsetof(typeof(*o), o_padding_5) != 0);
+ CLASSERT(offsetof(typeof(*o), o_padding_6) != 0);
+
+}
+EXPORT_SYMBOL(lustre_swab_obdo);
+
+void lustre_swab_obd_statfs (struct obd_statfs *os)
+{
+ __swab64s (&os->os_type);
+ __swab64s (&os->os_blocks);
+ __swab64s (&os->os_bfree);
+ __swab64s (&os->os_bavail);
+ __swab64s (&os->os_files);
+ __swab64s (&os->os_ffree);
+ /* no need to swab os_fsid */
+ __swab32s (&os->os_bsize);
+ __swab32s (&os->os_namelen);
+ __swab64s (&os->os_maxbytes);
+ __swab32s (&os->os_state);
+ CLASSERT(offsetof(typeof(*os), os_fprecreated) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare2) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare3) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare4) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare5) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare6) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare7) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare8) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare9) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_obd_statfs);
+
+void lustre_swab_obd_ioobj(struct obd_ioobj *ioo)
+{
+ lustre_swab_ost_id(&ioo->ioo_oid);
+ __swab32s(&ioo->ioo_max_brw);
+ __swab32s(&ioo->ioo_bufcnt);
+}
+EXPORT_SYMBOL(lustre_swab_obd_ioobj);
+
+void lustre_swab_niobuf_remote (struct niobuf_remote *nbr)
+{
+ __swab64s (&nbr->offset);
+ __swab32s (&nbr->len);
+ __swab32s (&nbr->flags);
+}
+EXPORT_SYMBOL(lustre_swab_niobuf_remote);
+
+void lustre_swab_ost_body (struct ost_body *b)
+{
+ lustre_swab_obdo (&b->oa);
+}
+EXPORT_SYMBOL(lustre_swab_ost_body);
+
+void lustre_swab_ost_last_id(obd_id *id)
+{
+ __swab64s(id);
+}
+EXPORT_SYMBOL(lustre_swab_ost_last_id);
+
+void lustre_swab_generic_32s(__u32 *val)
+{
+ __swab32s(val);
+}
+EXPORT_SYMBOL(lustre_swab_generic_32s);
+
+void lustre_swab_gl_desc(union ldlm_gl_desc *desc)
+{
+ lustre_swab_lu_fid(&desc->lquota_desc.gl_id.qid_fid);
+ __swab64s(&desc->lquota_desc.gl_flags);
+ __swab64s(&desc->lquota_desc.gl_ver);
+ __swab64s(&desc->lquota_desc.gl_hardlimit);
+ __swab64s(&desc->lquota_desc.gl_softlimit);
+ __swab64s(&desc->lquota_desc.gl_time);
+ CLASSERT(offsetof(typeof(desc->lquota_desc), gl_pad2) != 0);
+}
+
+void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb)
+{
+ __swab64s(&lvb->lvb_size);
+ __swab64s(&lvb->lvb_mtime);
+ __swab64s(&lvb->lvb_atime);
+ __swab64s(&lvb->lvb_ctime);
+ __swab64s(&lvb->lvb_blocks);
+}
+EXPORT_SYMBOL(lustre_swab_ost_lvb_v1);
+
+void lustre_swab_ost_lvb(struct ost_lvb *lvb)
+{
+ __swab64s(&lvb->lvb_size);
+ __swab64s(&lvb->lvb_mtime);
+ __swab64s(&lvb->lvb_atime);
+ __swab64s(&lvb->lvb_ctime);
+ __swab64s(&lvb->lvb_blocks);
+ __swab32s(&lvb->lvb_mtime_ns);
+ __swab32s(&lvb->lvb_atime_ns);
+ __swab32s(&lvb->lvb_ctime_ns);
+ __swab32s(&lvb->lvb_padding);
+}
+EXPORT_SYMBOL(lustre_swab_ost_lvb);
+
+void lustre_swab_lquota_lvb(struct lquota_lvb *lvb)
+{
+ __swab64s(&lvb->lvb_flags);
+ __swab64s(&lvb->lvb_id_may_rel);
+ __swab64s(&lvb->lvb_id_rel);
+ __swab64s(&lvb->lvb_id_qunit);
+ __swab64s(&lvb->lvb_pad1);
+}
+EXPORT_SYMBOL(lustre_swab_lquota_lvb);
+
+void lustre_swab_mdt_body (struct mdt_body *b)
+{
+ lustre_swab_lu_fid (&b->fid1);
+ lustre_swab_lu_fid (&b->fid2);
+ /* handle is opaque */
+ __swab64s (&b->valid);
+ __swab64s (&b->size);
+ __swab64s (&b->mtime);
+ __swab64s (&b->atime);
+ __swab64s (&b->ctime);
+ __swab64s (&b->blocks);
+ __swab64s (&b->ioepoch);
+ CLASSERT(offsetof(typeof(*b), unused1) != 0);
+ __swab32s (&b->fsuid);
+ __swab32s (&b->fsgid);
+ __swab32s (&b->capability);
+ __swab32s (&b->mode);
+ __swab32s (&b->uid);
+ __swab32s (&b->gid);
+ __swab32s (&b->flags);
+ __swab32s (&b->rdev);
+ __swab32s (&b->nlink);
+ CLASSERT(offsetof(typeof(*b), unused2) != 0);
+ __swab32s (&b->suppgid);
+ __swab32s (&b->eadatasize);
+ __swab32s (&b->aclsize);
+ __swab32s (&b->max_mdsize);
+ __swab32s (&b->max_cookiesize);
+ __swab32s (&b->uid_h);
+ __swab32s (&b->gid_h);
+ CLASSERT(offsetof(typeof(*b), padding_5) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_mdt_body);
+
+void lustre_swab_mdt_ioepoch (struct mdt_ioepoch *b)
+{
+ /* handle is opaque */
+ __swab64s (&b->ioepoch);
+ __swab32s (&b->flags);
+ CLASSERT(offsetof(typeof(*b), padding) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_mdt_ioepoch);
+
+void lustre_swab_mgs_target_info(struct mgs_target_info *mti)
+{
+ int i;
+ __swab32s(&mti->mti_lustre_ver);
+ __swab32s(&mti->mti_stripe_index);
+ __swab32s(&mti->mti_config_ver);
+ __swab32s(&mti->mti_flags);
+ __swab32s(&mti->mti_instance);
+ __swab32s(&mti->mti_nid_count);
+ CLASSERT(sizeof(lnet_nid_t) == sizeof(__u64));
+ for (i = 0; i < MTI_NIDS_MAX; i++)
+ __swab64s(&mti->mti_nids[i]);
+}
+EXPORT_SYMBOL(lustre_swab_mgs_target_info);
+
+void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *entry)
+{
+ int i;
+
+ __swab64s(&entry->mne_version);
+ __swab32s(&entry->mne_instance);
+ __swab32s(&entry->mne_index);
+ __swab32s(&entry->mne_length);
+
+ /* mne_nid_(count|type) must be one byte size because we're gonna
+ * access it w/o swapping. */
+ CLASSERT(sizeof(entry->mne_nid_count) == sizeof(__u8));
+ CLASSERT(sizeof(entry->mne_nid_type) == sizeof(__u8));
+
+ /* remove this assertion if ipv6 is supported. */
+ LASSERT(entry->mne_nid_type == 0);
+ for (i = 0; i < entry->mne_nid_count; i++) {
+ CLASSERT(sizeof(lnet_nid_t) == sizeof(__u64));
+ __swab64s(&entry->u.nids[i]);
+ }
+}
+EXPORT_SYMBOL(lustre_swab_mgs_nidtbl_entry);
+
+void lustre_swab_mgs_config_body(struct mgs_config_body *body)
+{
+ __swab64s(&body->mcb_offset);
+ __swab32s(&body->mcb_units);
+ __swab16s(&body->mcb_type);
+}
+EXPORT_SYMBOL(lustre_swab_mgs_config_body);
+
+void lustre_swab_mgs_config_res(struct mgs_config_res *body)
+{
+ __swab64s(&body->mcr_offset);
+ __swab64s(&body->mcr_size);
+}
+EXPORT_SYMBOL(lustre_swab_mgs_config_res);
+
+static void lustre_swab_obd_dqinfo (struct obd_dqinfo *i)
+{
+ __swab64s (&i->dqi_bgrace);
+ __swab64s (&i->dqi_igrace);
+ __swab32s (&i->dqi_flags);
+ __swab32s (&i->dqi_valid);
+}
+
+static void lustre_swab_obd_dqblk (struct obd_dqblk *b)
+{
+ __swab64s (&b->dqb_ihardlimit);
+ __swab64s (&b->dqb_isoftlimit);
+ __swab64s (&b->dqb_curinodes);
+ __swab64s (&b->dqb_bhardlimit);
+ __swab64s (&b->dqb_bsoftlimit);
+ __swab64s (&b->dqb_curspace);
+ __swab64s (&b->dqb_btime);
+ __swab64s (&b->dqb_itime);
+ __swab32s (&b->dqb_valid);
+ CLASSERT(offsetof(typeof(*b), dqb_padding) != 0);
+}
+
+void lustre_swab_obd_quotactl (struct obd_quotactl *q)
+{
+ __swab32s (&q->qc_cmd);
+ __swab32s (&q->qc_type);
+ __swab32s (&q->qc_id);
+ __swab32s (&q->qc_stat);
+ lustre_swab_obd_dqinfo (&q->qc_dqinfo);
+ lustre_swab_obd_dqblk (&q->qc_dqblk);
+}
+EXPORT_SYMBOL(lustre_swab_obd_quotactl);
+
+void lustre_swab_mdt_remote_perm (struct mdt_remote_perm *p)
+{
+ __swab32s (&p->rp_uid);
+ __swab32s (&p->rp_gid);
+ __swab32s (&p->rp_fsuid);
+ __swab32s (&p->rp_fsuid_h);
+ __swab32s (&p->rp_fsgid);
+ __swab32s (&p->rp_fsgid_h);
+ __swab32s (&p->rp_access_perm);
+ __swab32s (&p->rp_padding);
+};
+EXPORT_SYMBOL(lustre_swab_mdt_remote_perm);
+
+void lustre_swab_fid2path(struct getinfo_fid2path *gf)
+{
+ lustre_swab_lu_fid(&gf->gf_fid);
+ __swab64s(&gf->gf_recno);
+ __swab32s(&gf->gf_linkno);
+ __swab32s(&gf->gf_pathlen);
+}
+EXPORT_SYMBOL(lustre_swab_fid2path);
+
+void lustre_swab_fiemap_extent(struct ll_fiemap_extent *fm_extent)
+{
+ __swab64s(&fm_extent->fe_logical);
+ __swab64s(&fm_extent->fe_physical);
+ __swab64s(&fm_extent->fe_length);
+ __swab32s(&fm_extent->fe_flags);
+ __swab32s(&fm_extent->fe_device);
+}
+
+void lustre_swab_fiemap(struct ll_user_fiemap *fiemap)
+{
+ int i;
+
+ __swab64s(&fiemap->fm_start);
+ __swab64s(&fiemap->fm_length);
+ __swab32s(&fiemap->fm_flags);
+ __swab32s(&fiemap->fm_mapped_extents);
+ __swab32s(&fiemap->fm_extent_count);
+ __swab32s(&fiemap->fm_reserved);
+
+ for (i = 0; i < fiemap->fm_mapped_extents; i++)
+ lustre_swab_fiemap_extent(&fiemap->fm_extents[i]);
+}
+EXPORT_SYMBOL(lustre_swab_fiemap);
+
+void lustre_swab_idx_info(struct idx_info *ii)
+{
+ __swab32s(&ii->ii_magic);
+ __swab32s(&ii->ii_flags);
+ __swab16s(&ii->ii_count);
+ __swab32s(&ii->ii_attrs);
+ lustre_swab_lu_fid(&ii->ii_fid);
+ __swab64s(&ii->ii_version);
+ __swab64s(&ii->ii_hash_start);
+ __swab64s(&ii->ii_hash_end);
+ __swab16s(&ii->ii_keysize);
+ __swab16s(&ii->ii_recsize);
+}
+
+void lustre_swab_lip_header(struct lu_idxpage *lip)
+{
+ /* swab header */
+ __swab32s(&lip->lip_magic);
+ __swab16s(&lip->lip_flags);
+ __swab16s(&lip->lip_nr);
+}
+EXPORT_SYMBOL(lustre_swab_lip_header);
+
+void lustre_swab_mdt_rec_reint (struct mdt_rec_reint *rr)
+{
+ __swab32s(&rr->rr_opcode);
+ __swab32s(&rr->rr_cap);
+ __swab32s(&rr->rr_fsuid);
+ /* rr_fsuid_h is unused */
+ __swab32s(&rr->rr_fsgid);
+ /* rr_fsgid_h is unused */
+ __swab32s(&rr->rr_suppgid1);
+ /* rr_suppgid1_h is unused */
+ __swab32s(&rr->rr_suppgid2);
+ /* rr_suppgid2_h is unused */
+ lustre_swab_lu_fid(&rr->rr_fid1);
+ lustre_swab_lu_fid(&rr->rr_fid2);
+ __swab64s(&rr->rr_mtime);
+ __swab64s(&rr->rr_atime);
+ __swab64s(&rr->rr_ctime);
+ __swab64s(&rr->rr_size);
+ __swab64s(&rr->rr_blocks);
+ __swab32s(&rr->rr_bias);
+ __swab32s(&rr->rr_mode);
+ __swab32s(&rr->rr_flags);
+ __swab32s(&rr->rr_flags_h);
+ __swab32s(&rr->rr_umask);
+
+ CLASSERT(offsetof(typeof(*rr), rr_padding_4) != 0);
+};
+EXPORT_SYMBOL(lustre_swab_mdt_rec_reint);
+
+void lustre_swab_lov_desc (struct lov_desc *ld)
+{
+ __swab32s (&ld->ld_tgt_count);
+ __swab32s (&ld->ld_active_tgt_count);
+ __swab32s (&ld->ld_default_stripe_count);
+ __swab32s (&ld->ld_pattern);
+ __swab64s (&ld->ld_default_stripe_size);
+ __swab64s (&ld->ld_default_stripe_offset);
+ __swab32s (&ld->ld_qos_maxage);
+ /* uuid endian insensitive */
+}
+EXPORT_SYMBOL(lustre_swab_lov_desc);
+
+void lustre_swab_lmv_desc (struct lmv_desc *ld)
+{
+ __swab32s (&ld->ld_tgt_count);
+ __swab32s (&ld->ld_active_tgt_count);
+ __swab32s (&ld->ld_default_stripe_count);
+ __swab32s (&ld->ld_pattern);
+ __swab64s (&ld->ld_default_hash_size);
+ __swab32s (&ld->ld_qos_maxage);
+ /* uuid endian insensitive */
+}
+
+void lustre_swab_lmv_stripe_md (struct lmv_stripe_md *mea)
+{
+ __swab32s(&mea->mea_magic);
+ __swab32s(&mea->mea_count);
+ __swab32s(&mea->mea_master);
+ CLASSERT(offsetof(typeof(*mea), mea_padding) != 0);
+}
+
+void lustre_swab_lmv_user_md(struct lmv_user_md *lum)
+{
+ int i;
+
+ __swab32s(&lum->lum_magic);
+ __swab32s(&lum->lum_stripe_count);
+ __swab32s(&lum->lum_stripe_offset);
+ __swab32s(&lum->lum_hash_type);
+ __swab32s(&lum->lum_type);
+ CLASSERT(offsetof(typeof(*lum), lum_padding1) != 0);
+ CLASSERT(offsetof(typeof(*lum), lum_padding2) != 0);
+ CLASSERT(offsetof(typeof(*lum), lum_padding3) != 0);
+
+ for (i = 0; i < lum->lum_stripe_count; i++) {
+ __swab32s(&lum->lum_objects[i].lum_mds);
+ lustre_swab_lu_fid(&lum->lum_objects[i].lum_fid);
+ }
+
+}
+EXPORT_SYMBOL(lustre_swab_lmv_user_md);
+
+static void print_lum (struct lov_user_md *lum)
+{
+ CDEBUG(D_OTHER, "lov_user_md %p:\n", lum);
+ CDEBUG(D_OTHER, "\tlmm_magic: %#x\n", lum->lmm_magic);
+ CDEBUG(D_OTHER, "\tlmm_pattern: %#x\n", lum->lmm_pattern);
+ CDEBUG(D_OTHER, "\tlmm_object_id: "LPU64"\n", lmm_oi_id(&lum->lmm_oi));
+ CDEBUG(D_OTHER, "\tlmm_object_gr: "LPU64"\n", lmm_oi_seq(&lum->lmm_oi));
+ CDEBUG(D_OTHER, "\tlmm_stripe_size: %#x\n", lum->lmm_stripe_size);
+ CDEBUG(D_OTHER, "\tlmm_stripe_count: %#x\n", lum->lmm_stripe_count);
+ CDEBUG(D_OTHER, "\tlmm_stripe_offset/lmm_layout_gen: %#x\n",
+ lum->lmm_stripe_offset);
+}
+
+static void lustre_swab_lmm_oi(struct ost_id *oi)
+{
+ __swab64s(&oi->oi.oi_id);
+ __swab64s(&oi->oi.oi_seq);
+}
+
+static void lustre_swab_lov_user_md_common(struct lov_user_md_v1 *lum)
+{
+ ENTRY;
+ __swab32s(&lum->lmm_magic);
+ __swab32s(&lum->lmm_pattern);
+ lustre_swab_lmm_oi(&lum->lmm_oi);
+ __swab32s(&lum->lmm_stripe_size);
+ __swab16s(&lum->lmm_stripe_count);
+ __swab16s(&lum->lmm_stripe_offset);
+ print_lum(lum);
+ EXIT;
+}
+
+void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum)
+{
+ ENTRY;
+ CDEBUG(D_IOCTL, "swabbing lov_user_md v1\n");
+ lustre_swab_lov_user_md_common(lum);
+ EXIT;
+}
+EXPORT_SYMBOL(lustre_swab_lov_user_md_v1);
+
+void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum)
+{
+ ENTRY;
+ CDEBUG(D_IOCTL, "swabbing lov_user_md v3\n");
+ lustre_swab_lov_user_md_common((struct lov_user_md_v1 *)lum);
+ /* lmm_pool_name nothing to do with char */
+ EXIT;
+}
+EXPORT_SYMBOL(lustre_swab_lov_user_md_v3);
+
+void lustre_swab_lov_mds_md(struct lov_mds_md *lmm)
+{
+ ENTRY;
+ CDEBUG(D_IOCTL, "swabbing lov_mds_md\n");
+ __swab32s(&lmm->lmm_magic);
+ __swab32s(&lmm->lmm_pattern);
+ lustre_swab_lmm_oi(&lmm->lmm_oi);
+ __swab32s(&lmm->lmm_stripe_size);
+ __swab16s(&lmm->lmm_stripe_count);
+ __swab16s(&lmm->lmm_layout_gen);
+ EXIT;
+}
+EXPORT_SYMBOL(lustre_swab_lov_mds_md);
+
+void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
+ int stripe_count)
+{
+ int i;
+ ENTRY;
+ for (i = 0; i < stripe_count; i++) {
+ lustre_swab_ost_id(&(lod[i].l_ost_oi));
+ __swab32s(&(lod[i].l_ost_gen));
+ __swab32s(&(lod[i].l_ost_idx));
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(lustre_swab_lov_user_md_objects);
+
+void lustre_swab_ldlm_res_id (struct ldlm_res_id *id)
+{
+ int i;
+
+ for (i = 0; i < RES_NAME_SIZE; i++)
+ __swab64s (&id->name[i]);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_res_id);
+
+void lustre_swab_ldlm_policy_data (ldlm_wire_policy_data_t *d)
+{
+ /* the lock data is a union and the first two fields are always an
+ * extent so it's ok to process an LDLM_EXTENT and LDLM_FLOCK lock
+ * data the same way. */
+ __swab64s(&d->l_extent.start);
+ __swab64s(&d->l_extent.end);
+ __swab64s(&d->l_extent.gid);
+ __swab64s(&d->l_flock.lfw_owner);
+ __swab32s(&d->l_flock.lfw_pid);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_policy_data);
+
+void lustre_swab_ldlm_intent (struct ldlm_intent *i)
+{
+ __swab64s (&i->opc);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_intent);
+
+void lustre_swab_ldlm_resource_desc (struct ldlm_resource_desc *r)
+{
+ __swab32s (&r->lr_type);
+ CLASSERT(offsetof(typeof(*r), lr_padding) != 0);
+ lustre_swab_ldlm_res_id (&r->lr_name);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_resource_desc);
+
+void lustre_swab_ldlm_lock_desc (struct ldlm_lock_desc *l)
+{
+ lustre_swab_ldlm_resource_desc (&l->l_resource);
+ __swab32s (&l->l_req_mode);
+ __swab32s (&l->l_granted_mode);
+ lustre_swab_ldlm_policy_data (&l->l_policy_data);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_lock_desc);
+
+void lustre_swab_ldlm_request (struct ldlm_request *rq)
+{
+ __swab32s (&rq->lock_flags);
+ lustre_swab_ldlm_lock_desc (&rq->lock_desc);
+ __swab32s (&rq->lock_count);
+ /* lock_handle[] opaque */
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_request);
+
+void lustre_swab_ldlm_reply (struct ldlm_reply *r)
+{
+ __swab32s (&r->lock_flags);
+ CLASSERT(offsetof(typeof(*r), lock_padding) != 0);
+ lustre_swab_ldlm_lock_desc (&r->lock_desc);
+ /* lock_handle opaque */
+ __swab64s (&r->lock_policy_res1);
+ __swab64s (&r->lock_policy_res2);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_reply);
+
+void lustre_swab_quota_body(struct quota_body *b)
+{
+ lustre_swab_lu_fid(&b->qb_fid);
+ lustre_swab_lu_fid((struct lu_fid *)&b->qb_id);
+ __swab32s(&b->qb_flags);
+ __swab64s(&b->qb_count);
+ __swab64s(&b->qb_usage);
+ __swab64s(&b->qb_slv_ver);
+}
+
+/* Dump functions */
+void dump_ioo(struct obd_ioobj *ioo)
+{
+ CDEBUG(D_RPCTRACE,
+ "obd_ioobj: ioo_oid="DOSTID", ioo_max_brw=%#x, "
+ "ioo_bufct=%d\n", POSTID(&ioo->ioo_oid), ioo->ioo_max_brw,
+ ioo->ioo_bufcnt);
+}
+EXPORT_SYMBOL(dump_ioo);
+
+void dump_rniobuf(struct niobuf_remote *nb)
+{
+ CDEBUG(D_RPCTRACE, "niobuf_remote: offset="LPU64", len=%d, flags=%x\n",
+ nb->offset, nb->len, nb->flags);
+}
+EXPORT_SYMBOL(dump_rniobuf);
+
+void dump_obdo(struct obdo *oa)
+{
+ __u32 valid = oa->o_valid;
+
+ CDEBUG(D_RPCTRACE, "obdo: o_valid = %08x\n", valid);
+ if (valid & OBD_MD_FLID)
+ CDEBUG(D_RPCTRACE, "obdo: id = "DOSTID"\n", POSTID(&oa->o_oi));
+ if (valid & OBD_MD_FLFID)
+ CDEBUG(D_RPCTRACE, "obdo: o_parent_seq = "LPX64"\n",
+ oa->o_parent_seq);
+ if (valid & OBD_MD_FLSIZE)
+ CDEBUG(D_RPCTRACE, "obdo: o_size = "LPD64"\n", oa->o_size);
+ if (valid & OBD_MD_FLMTIME)
+ CDEBUG(D_RPCTRACE, "obdo: o_mtime = "LPD64"\n", oa->o_mtime);
+ if (valid & OBD_MD_FLATIME)
+ CDEBUG(D_RPCTRACE, "obdo: o_atime = "LPD64"\n", oa->o_atime);
+ if (valid & OBD_MD_FLCTIME)
+ CDEBUG(D_RPCTRACE, "obdo: o_ctime = "LPD64"\n", oa->o_ctime);
+ if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
+ CDEBUG(D_RPCTRACE, "obdo: o_blocks = "LPD64"\n", oa->o_blocks);
+ if (valid & OBD_MD_FLGRANT)
+ CDEBUG(D_RPCTRACE, "obdo: o_grant = "LPD64"\n", oa->o_grant);
+ if (valid & OBD_MD_FLBLKSZ)
+ CDEBUG(D_RPCTRACE, "obdo: o_blksize = %d\n", oa->o_blksize);
+ if (valid & (OBD_MD_FLTYPE | OBD_MD_FLMODE))
+ CDEBUG(D_RPCTRACE, "obdo: o_mode = %o\n",
+ oa->o_mode & ((valid & OBD_MD_FLTYPE ? S_IFMT : 0) |
+ (valid & OBD_MD_FLMODE ? ~S_IFMT : 0)));
+ if (valid & OBD_MD_FLUID)
+ CDEBUG(D_RPCTRACE, "obdo: o_uid = %u\n", oa->o_uid);
+ if (valid & OBD_MD_FLUID)
+ CDEBUG(D_RPCTRACE, "obdo: o_uid_h = %u\n", oa->o_uid_h);
+ if (valid & OBD_MD_FLGID)
+ CDEBUG(D_RPCTRACE, "obdo: o_gid = %u\n", oa->o_gid);
+ if (valid & OBD_MD_FLGID)
+ CDEBUG(D_RPCTRACE, "obdo: o_gid_h = %u\n", oa->o_gid_h);
+ if (valid & OBD_MD_FLFLAGS)
+ CDEBUG(D_RPCTRACE, "obdo: o_flags = %x\n", oa->o_flags);
+ if (valid & OBD_MD_FLNLINK)
+ CDEBUG(D_RPCTRACE, "obdo: o_nlink = %u\n", oa->o_nlink);
+ else if (valid & OBD_MD_FLCKSUM)
+ CDEBUG(D_RPCTRACE, "obdo: o_checksum (o_nlink) = %u\n",
+ oa->o_nlink);
+ if (valid & OBD_MD_FLGENER)
+ CDEBUG(D_RPCTRACE, "obdo: o_parent_oid = %x\n",
+ oa->o_parent_oid);
+ if (valid & OBD_MD_FLEPOCH)
+ CDEBUG(D_RPCTRACE, "obdo: o_ioepoch = "LPD64"\n",
+ oa->o_ioepoch);
+ if (valid & OBD_MD_FLFID) {
+ CDEBUG(D_RPCTRACE, "obdo: o_stripe_idx = %u\n",
+ oa->o_stripe_idx);
+ CDEBUG(D_RPCTRACE, "obdo: o_parent_ver = %x\n",
+ oa->o_parent_ver);
+ }
+ if (valid & OBD_MD_FLHANDLE)
+ CDEBUG(D_RPCTRACE, "obdo: o_handle = "LPD64"\n",
+ oa->o_handle.cookie);
+ if (valid & OBD_MD_FLCOOKIE)
+ CDEBUG(D_RPCTRACE, "obdo: o_lcookie = "
+ "(llog_cookie dumping not yet implemented)\n");
+}
+EXPORT_SYMBOL(dump_obdo);
+
+void dump_ost_body(struct ost_body *ob)
+{
+ dump_obdo(&ob->oa);
+}
+EXPORT_SYMBOL(dump_ost_body);
+
+void dump_rcs(__u32 *rc)
+{
+ CDEBUG(D_RPCTRACE, "rmf_rcs: %d\n", *rc);
+}
+EXPORT_SYMBOL(dump_rcs);
+
+static inline int req_ptlrpc_body_swabbed(struct ptlrpc_request *req)
+{
+ LASSERT(req->rq_reqmsg);
+
+ switch (req->rq_reqmsg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_req_swabbed(req, MSG_PTLRPC_BODY_OFF);
+ default:
+ CERROR("bad lustre msg magic: %#08X\n",
+ req->rq_reqmsg->lm_magic);
+ }
+ return 0;
+}
+
+static inline int rep_ptlrpc_body_swabbed(struct ptlrpc_request *req)
+{
+ LASSERT(req->rq_repmsg);
+
+ switch (req->rq_repmsg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_rep_swabbed(req, MSG_PTLRPC_BODY_OFF);
+ default:
+ /* uninitialized yet */
+ return 0;
+ }
+}
+
+void _debug_req(struct ptlrpc_request *req,
+ struct libcfs_debug_msg_data *msgdata,
+ const char *fmt, ... )
+{
+ int req_ok = req->rq_reqmsg != NULL;
+ int rep_ok = req->rq_repmsg != NULL;
+ lnet_nid_t nid = LNET_NID_ANY;
+ va_list args;
+
+ if (ptlrpc_req_need_swab(req)) {
+ req_ok = req_ok && req_ptlrpc_body_swabbed(req);
+ rep_ok = rep_ok && rep_ptlrpc_body_swabbed(req);
+ }
+
+ if (req->rq_import && req->rq_import->imp_connection)
+ nid = req->rq_import->imp_connection->c_peer.nid;
+ else if (req->rq_export && req->rq_export->exp_connection)
+ nid = req->rq_export->exp_connection->c_peer.nid;
+
+ va_start(args, fmt);
+ libcfs_debug_vmsg2(msgdata, fmt, args,
+ " req@%p x"LPU64"/t"LPD64"("LPD64") o%d->%s@%s:%d/%d"
+ " lens %d/%d e %d to %d dl "CFS_TIME_T" ref %d "
+ "fl "REQ_FLAGS_FMT"/%x/%x rc %d/%d\n",
+ req, req->rq_xid, req->rq_transno,
+ req_ok ? lustre_msg_get_transno(req->rq_reqmsg) : 0,
+ req_ok ? lustre_msg_get_opc(req->rq_reqmsg) : -1,
+ req->rq_import ?
+ req->rq_import->imp_obd->obd_name :
+ req->rq_export ?
+ req->rq_export->exp_client_uuid.uuid :
+ "<?>",
+ libcfs_nid2str(nid),
+ req->rq_request_portal, req->rq_reply_portal,
+ req->rq_reqlen, req->rq_replen,
+ req->rq_early_count, req->rq_timedout,
+ req->rq_deadline,
+ atomic_read(&req->rq_refcount),
+ DEBUG_REQ_FLAGS(req),
+ req_ok ? lustre_msg_get_flags(req->rq_reqmsg) : -1,
+ rep_ok ? lustre_msg_get_flags(req->rq_repmsg) : -1,
+ req->rq_status,
+ rep_ok ? lustre_msg_get_status(req->rq_repmsg) : -1);
+}
+EXPORT_SYMBOL(_debug_req);
+
+void lustre_swab_lustre_capa(struct lustre_capa *c)
+{
+ lustre_swab_lu_fid(&c->lc_fid);
+ __swab64s (&c->lc_opc);
+ __swab64s (&c->lc_uid);
+ __swab64s (&c->lc_gid);
+ __swab32s (&c->lc_flags);
+ __swab32s (&c->lc_keyid);
+ __swab32s (&c->lc_timeout);
+ __swab32s (&c->lc_expiry);
+}
+EXPORT_SYMBOL(lustre_swab_lustre_capa);
+
+void lustre_swab_lustre_capa_key(struct lustre_capa_key *k)
+{
+ __swab64s (&k->lk_seq);
+ __swab32s (&k->lk_keyid);
+ CLASSERT(offsetof(typeof(*k), lk_padding) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_lustre_capa_key);
+
+void lustre_swab_hsm_user_state(struct hsm_user_state *state)
+{
+ __swab32s(&state->hus_states);
+ __swab32s(&state->hus_archive_id);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_user_state);
+
+void lustre_swab_hsm_state_set(struct hsm_state_set *hss)
+{
+ __swab32s(&hss->hss_valid);
+ __swab64s(&hss->hss_setmask);
+ __swab64s(&hss->hss_clearmask);
+ __swab32s(&hss->hss_archive_id);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_state_set);
+
+void lustre_swab_hsm_extent(struct hsm_extent *extent)
+{
+ __swab64s(&extent->offset);
+ __swab64s(&extent->length);
+}
+
+void lustre_swab_hsm_current_action(struct hsm_current_action *action)
+{
+ __swab32s(&action->hca_state);
+ __swab32s(&action->hca_action);
+ lustre_swab_hsm_extent(&action->hca_location);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_current_action);
+
+void lustre_swab_hsm_user_item(struct hsm_user_item *hui)
+{
+ lustre_swab_lu_fid(&hui->hui_fid);
+ lustre_swab_hsm_extent(&hui->hui_extent);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_user_item);
+
+void lustre_swab_layout_intent(struct layout_intent *li)
+{
+ __swab32s(&li->li_opc);
+ __swab32s(&li->li_flags);
+ __swab64s(&li->li_start);
+ __swab64s(&li->li_end);
+}
+EXPORT_SYMBOL(lustre_swab_layout_intent);
+
+void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk)
+{
+ lustre_swab_lu_fid(&hpk->hpk_fid);
+ __swab64s(&hpk->hpk_cookie);
+ __swab64s(&hpk->hpk_extent.offset);
+ __swab64s(&hpk->hpk_extent.length);
+ __swab16s(&hpk->hpk_flags);
+ __swab16s(&hpk->hpk_errval);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_progress_kernel);
+
+void lustre_swab_hsm_request(struct hsm_request *hr)
+{
+ __swab32s(&hr->hr_action);
+ __swab32s(&hr->hr_archive_id);
+ __swab64s(&hr->hr_flags);
+ __swab32s(&hr->hr_itemcount);
+ __swab32s(&hr->hr_data_len);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_request);
+
+void lustre_swab_update_buf(struct update_buf *ub)
+{
+ __swab32s(&ub->ub_magic);
+ __swab32s(&ub->ub_count);
+}
+EXPORT_SYMBOL(lustre_swab_update_buf);
+
+void lustre_swab_update_reply_buf(struct update_reply *ur)
+{
+ int i;
+
+ __swab32s(&ur->ur_version);
+ __swab32s(&ur->ur_count);
+ for (i = 0; i < ur->ur_count; i++)
+ __swab32s(&ur->ur_lens[i]);
+}
+EXPORT_SYMBOL(lustre_swab_update_reply_buf);
+
+void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl)
+{
+ __swab64s(&msl->msl_flags);
+}
+EXPORT_SYMBOL(lustre_swab_swap_layouts);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c
new file mode 100644
index 000000000000..d926d2b36fb4
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/pers.c
@@ -0,0 +1,75 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_lib.h>
+#include <lustre_ha.h>
+#include <lustre_import.h>
+
+#include "ptlrpc_internal.h"
+
+
+void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc,
+ int mdidx)
+{
+ CLASSERT(PTLRPC_MAX_BRW_PAGES < LI_POISON);
+
+ LASSERT(mdidx < desc->bd_md_max_brw);
+ LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
+ LASSERT(!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV |
+ LNET_MD_PHYS)));
+
+ md->options |= LNET_MD_KIOV;
+ md->length = max(0, desc->bd_iov_count - mdidx * LNET_MAX_IOV);
+ md->length = min_t(unsigned int, LNET_MAX_IOV, md->length);
+ if (desc->bd_enc_iov)
+ md->start = &desc->bd_enc_iov[mdidx * LNET_MAX_IOV];
+ else
+ md->start = &desc->bd_iov[mdidx * LNET_MAX_IOV];
+}
+
+void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+ int pageoffset, int len)
+{
+ lnet_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count];
+
+ kiov->kiov_page = page;
+ kiov->kiov_offset = pageoffset;
+ kiov->kiov_len = len;
+
+ desc->bd_iov_count++;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
new file mode 100644
index 000000000000..ef5269aee0de
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
@@ -0,0 +1,763 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/pinger.c
+ *
+ * Portal-RPC reconnection and replay operations, for use in recovery.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include "ptlrpc_internal.h"
+
+static int suppress_pings;
+CFS_MODULE_PARM(suppress_pings, "i", int, 0644, "Suppress pings");
+
+struct mutex pinger_mutex;
+static LIST_HEAD(pinger_imports);
+static struct list_head timeout_list = LIST_HEAD_INIT(timeout_list);
+
+int ptlrpc_pinger_suppress_pings()
+{
+ return suppress_pings;
+}
+EXPORT_SYMBOL(ptlrpc_pinger_suppress_pings);
+
+struct ptlrpc_request *
+ptlrpc_prep_ping(struct obd_import *imp)
+{
+ struct ptlrpc_request *req;
+
+ req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING,
+ LUSTRE_OBD_VERSION, OBD_PING);
+ if (req) {
+ ptlrpc_request_set_replen(req);
+ req->rq_no_resend = req->rq_no_delay = 1;
+ }
+ return req;
+}
+
+int ptlrpc_obd_ping(struct obd_device *obd)
+{
+ int rc;
+ struct ptlrpc_request *req;
+ ENTRY;
+
+ req = ptlrpc_prep_ping(obd->u.cli.cl_import);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ req->rq_send_state = LUSTRE_IMP_FULL;
+
+ rc = ptlrpc_queue_wait(req);
+
+ ptlrpc_req_finished(req);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ptlrpc_obd_ping);
+
+int ptlrpc_ping(struct obd_import *imp)
+{
+ struct ptlrpc_request *req;
+ ENTRY;
+
+ req = ptlrpc_prep_ping(imp);
+ if (req == NULL) {
+ CERROR("OOM trying to ping %s->%s\n",
+ imp->imp_obd->obd_uuid.uuid,
+ obd2cli_tgt(imp->imp_obd));
+ RETURN(-ENOMEM);
+ }
+
+ DEBUG_REQ(D_INFO, req, "pinging %s->%s",
+ imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+
+ RETURN(0);
+}
+
+void ptlrpc_update_next_ping(struct obd_import *imp, int soon)
+{
+ int time = soon ? PING_INTERVAL_SHORT : PING_INTERVAL;
+ if (imp->imp_state == LUSTRE_IMP_DISCON) {
+ int dtime = max_t(int, CONNECTION_SWITCH_MIN,
+ AT_OFF ? 0 :
+ at_get(&imp->imp_at.iat_net_latency));
+ time = min(time, dtime);
+ }
+ imp->imp_next_ping = cfs_time_shift(time);
+}
+
+void ptlrpc_ping_import_soon(struct obd_import *imp)
+{
+ imp->imp_next_ping = cfs_time_current();
+}
+
+static inline int imp_is_deactive(struct obd_import *imp)
+{
+ return (imp->imp_deactive ||
+ OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_IMP_DEACTIVE));
+}
+
+static inline int ptlrpc_next_reconnect(struct obd_import *imp)
+{
+ if (imp->imp_server_timeout)
+ return cfs_time_shift(obd_timeout / 2);
+ else
+ return cfs_time_shift(obd_timeout);
+}
+
+static atomic_t suspend_timeouts = ATOMIC_INIT(0);
+static cfs_time_t suspend_wakeup_time = 0;
+
+cfs_duration_t pinger_check_timeout(cfs_time_t time)
+{
+ struct timeout_item *item;
+ cfs_time_t timeout = PING_INTERVAL;
+
+ /* The timeout list is a increase order sorted list */
+ mutex_lock(&pinger_mutex);
+ list_for_each_entry(item, &timeout_list, ti_chain) {
+ int ti_timeout = item->ti_timeout;
+ if (timeout > ti_timeout)
+ timeout = ti_timeout;
+ break;
+ }
+ mutex_unlock(&pinger_mutex);
+
+ return cfs_time_sub(cfs_time_add(time, cfs_time_seconds(timeout)),
+ cfs_time_current());
+}
+
+static wait_queue_head_t suspend_timeouts_waitq;
+
+cfs_time_t ptlrpc_suspend_wakeup_time(void)
+{
+ return suspend_wakeup_time;
+}
+
+void ptlrpc_deactivate_timeouts(struct obd_import *imp)
+{
+ /*XXX: disabled for now, will be replaced by adaptive timeouts */
+#if 0
+ if (imp->imp_no_timeout)
+ return;
+ imp->imp_no_timeout = 1;
+ atomic_inc(&suspend_timeouts);
+ CDEBUG(D_HA|D_WARNING, "deactivate timeouts %u\n",
+ atomic_read(&suspend_timeouts));
+#endif
+}
+
+void ptlrpc_activate_timeouts(struct obd_import *imp)
+{
+ /*XXX: disabled for now, will be replaced by adaptive timeouts */
+#if 0
+ if (!imp->imp_no_timeout)
+ return;
+ imp->imp_no_timeout = 0;
+ LASSERT(atomic_read(&suspend_timeouts) > 0);
+ if (atomic_dec_and_test(&suspend_timeouts)) {
+ suspend_wakeup_time = cfs_time_current();
+ wake_up(&suspend_timeouts_waitq);
+ }
+ CDEBUG(D_HA|D_WARNING, "activate timeouts %u\n",
+ atomic_read(&suspend_timeouts));
+#endif
+}
+
+int ptlrpc_check_suspend(void)
+{
+ if (atomic_read(&suspend_timeouts))
+ return 1;
+ return 0;
+}
+
+int ptlrpc_check_and_wait_suspend(struct ptlrpc_request *req)
+{
+ struct l_wait_info lwi;
+
+ if (atomic_read(&suspend_timeouts)) {
+ DEBUG_REQ(D_NET, req, "-- suspend %d regular timeout",
+ atomic_read(&suspend_timeouts));
+ lwi = LWI_INTR(NULL, NULL);
+ l_wait_event(suspend_timeouts_waitq,
+ atomic_read(&suspend_timeouts) == 0, &lwi);
+ DEBUG_REQ(D_NET, req, "-- recharge regular timeout");
+ return 1;
+ }
+ return 0;
+}
+
+
+static bool ir_up;
+
+void ptlrpc_pinger_ir_up(void)
+{
+ CDEBUG(D_HA, "IR up\n");
+ ir_up = true;
+}
+EXPORT_SYMBOL(ptlrpc_pinger_ir_up);
+
+void ptlrpc_pinger_ir_down(void)
+{
+ CDEBUG(D_HA, "IR down\n");
+ ir_up = false;
+}
+EXPORT_SYMBOL(ptlrpc_pinger_ir_down);
+
+static void ptlrpc_pinger_process_import(struct obd_import *imp,
+ unsigned long this_ping)
+{
+ int level;
+ int force;
+ int force_next;
+ int suppress;
+
+ spin_lock(&imp->imp_lock);
+
+ level = imp->imp_state;
+ force = imp->imp_force_verify;
+ force_next = imp->imp_force_next_verify;
+ /*
+ * This will be used below only if the import is "FULL".
+ */
+ suppress = ir_up && OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS);
+
+ imp->imp_force_verify = 0;
+
+ if (cfs_time_aftereq(imp->imp_next_ping - 5 * CFS_TICK, this_ping) &&
+ !force) {
+ spin_unlock(&imp->imp_lock);
+ return;
+ }
+
+ imp->imp_force_next_verify = 0;
+
+ spin_unlock(&imp->imp_lock);
+
+ CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA, "%s->%s: level %s/%u "
+ "force %u force_next %u deactive %u pingable %u suppress %u\n",
+ imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd),
+ ptlrpc_import_state_name(level), level, force, force_next,
+ imp->imp_deactive, imp->imp_pingable, suppress);
+
+ if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) {
+ /* wait for a while before trying recovery again */
+ imp->imp_next_ping = ptlrpc_next_reconnect(imp);
+ if (!imp->imp_no_pinger_recover)
+ ptlrpc_initiate_recovery(imp);
+ } else if (level != LUSTRE_IMP_FULL ||
+ imp->imp_obd->obd_no_recov ||
+ imp_is_deactive(imp)) {
+ CDEBUG(D_HA, "%s->%s: not pinging (in recovery "
+ "or recovery disabled: %s)\n",
+ imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd),
+ ptlrpc_import_state_name(level));
+ } else if ((imp->imp_pingable && !suppress) || force_next || force) {
+ ptlrpc_ping(imp);
+ }
+}
+
+static int ptlrpc_pinger_main(void *arg)
+{
+ struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg;
+ ENTRY;
+
+ /* Record that the thread is running */
+ thread_set_flags(thread, SVC_RUNNING);
+ wake_up(&thread->t_ctl_waitq);
+
+ /* And now, loop forever, pinging as needed. */
+ while (1) {
+ cfs_time_t this_ping = cfs_time_current();
+ struct l_wait_info lwi;
+ cfs_duration_t time_to_next_wake;
+ struct timeout_item *item;
+ struct list_head *iter;
+
+ mutex_lock(&pinger_mutex);
+ list_for_each_entry(item, &timeout_list, ti_chain) {
+ item->ti_cb(item, item->ti_cb_data);
+ }
+ list_for_each(iter, &pinger_imports) {
+ struct obd_import *imp =
+ list_entry(iter, struct obd_import,
+ imp_pinger_chain);
+
+ ptlrpc_pinger_process_import(imp, this_ping);
+ /* obd_timeout might have changed */
+ if (imp->imp_pingable && imp->imp_next_ping &&
+ cfs_time_after(imp->imp_next_ping,
+ cfs_time_add(this_ping,
+ cfs_time_seconds(PING_INTERVAL))))
+ ptlrpc_update_next_ping(imp, 0);
+ }
+ mutex_unlock(&pinger_mutex);
+ /* update memory usage info */
+ obd_update_maxusage();
+
+ /* Wait until the next ping time, or until we're stopped. */
+ time_to_next_wake = pinger_check_timeout(this_ping);
+ /* The ping sent by ptlrpc_send_rpc may get sent out
+ say .01 second after this.
+ ptlrpc_pinger_sending_on_import will then set the
+ next ping time to next_ping + .01 sec, which means
+ we will SKIP the next ping at next_ping, and the
+ ping will get sent 2 timeouts from now! Beware. */
+ CDEBUG(D_INFO, "next wakeup in "CFS_DURATION_T" ("
+ CFS_TIME_T")\n", time_to_next_wake,
+ cfs_time_add(this_ping,cfs_time_seconds(PING_INTERVAL)));
+ if (time_to_next_wake > 0) {
+ lwi = LWI_TIMEOUT(max_t(cfs_duration_t,
+ time_to_next_wake,
+ cfs_time_seconds(1)),
+ NULL, NULL);
+ l_wait_event(thread->t_ctl_waitq,
+ thread_is_stopping(thread) ||
+ thread_is_event(thread),
+ &lwi);
+ if (thread_test_and_clear_flags(thread, SVC_STOPPING)) {
+ EXIT;
+ break;
+ } else {
+ /* woken after adding import to reset timer */
+ thread_test_and_clear_flags(thread, SVC_EVENT);
+ }
+ }
+ }
+
+ thread_set_flags(thread, SVC_STOPPED);
+ wake_up(&thread->t_ctl_waitq);
+
+ CDEBUG(D_NET, "pinger thread exiting, process %d\n", current_pid());
+ return 0;
+}
+
+static struct ptlrpc_thread *pinger_thread = NULL;
+
+int ptlrpc_start_pinger(void)
+{
+ struct l_wait_info lwi = { 0 };
+ int rc;
+ ENTRY;
+
+ if (pinger_thread != NULL)
+ RETURN(-EALREADY);
+
+ OBD_ALLOC_PTR(pinger_thread);
+ if (pinger_thread == NULL)
+ RETURN(-ENOMEM);
+ init_waitqueue_head(&pinger_thread->t_ctl_waitq);
+ init_waitqueue_head(&suspend_timeouts_waitq);
+
+ strcpy(pinger_thread->t_name, "ll_ping");
+
+ /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
+ * just drop the VM and FILES in cfs_daemonize_ctxt() right away. */
+ rc = PTR_ERR(kthread_run(ptlrpc_pinger_main,
+ pinger_thread, pinger_thread->t_name));
+ if (IS_ERR_VALUE(rc)) {
+ CERROR("cannot start thread: %d\n", rc);
+ OBD_FREE(pinger_thread, sizeof(*pinger_thread));
+ pinger_thread = NULL;
+ RETURN(rc);
+ }
+ l_wait_event(pinger_thread->t_ctl_waitq,
+ thread_is_running(pinger_thread), &lwi);
+
+ if (suppress_pings)
+ CWARN("Pings will be suppressed at the request of the "
+ "administrator. The configuration shall meet the "
+ "additional requirements described in the manual. "
+ "(Search for the \"suppress_pings\" kernel module "
+ "parameter.)\n");
+
+ RETURN(0);
+}
+
+int ptlrpc_pinger_remove_timeouts(void);
+
+int ptlrpc_stop_pinger(void)
+{
+ struct l_wait_info lwi = { 0 };
+ int rc = 0;
+ ENTRY;
+
+ if (pinger_thread == NULL)
+ RETURN(-EALREADY);
+
+ ptlrpc_pinger_remove_timeouts();
+ mutex_lock(&pinger_mutex);
+ thread_set_flags(pinger_thread, SVC_STOPPING);
+ wake_up(&pinger_thread->t_ctl_waitq);
+ mutex_unlock(&pinger_mutex);
+
+ l_wait_event(pinger_thread->t_ctl_waitq,
+ thread_is_stopped(pinger_thread), &lwi);
+
+ OBD_FREE_PTR(pinger_thread);
+ pinger_thread = NULL;
+ RETURN(rc);
+}
+
+void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
+{
+ ptlrpc_update_next_ping(imp, 0);
+}
+EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import);
+
+void ptlrpc_pinger_commit_expected(struct obd_import *imp)
+{
+ ptlrpc_update_next_ping(imp, 1);
+ LASSERT(spin_is_locked(&imp->imp_lock));
+ /*
+ * Avoid reading stale imp_connect_data. When not sure if pings are
+ * expected or not on next connection, we assume they are not and force
+ * one anyway to guarantee the chance of updating
+ * imp_peer_committed_transno.
+ */
+ if (imp->imp_state != LUSTRE_IMP_FULL ||
+ OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS))
+ imp->imp_force_next_verify = 1;
+}
+
+int ptlrpc_pinger_add_import(struct obd_import *imp)
+{
+ ENTRY;
+ if (!list_empty(&imp->imp_pinger_chain))
+ RETURN(-EALREADY);
+
+ mutex_lock(&pinger_mutex);
+ CDEBUG(D_HA, "adding pingable import %s->%s\n",
+ imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
+ /* if we add to pinger we want recovery on this import */
+ imp->imp_obd->obd_no_recov = 0;
+ ptlrpc_update_next_ping(imp, 0);
+ /* XXX sort, blah blah */
+ list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
+ class_import_get(imp);
+
+ ptlrpc_pinger_wake_up();
+ mutex_unlock(&pinger_mutex);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(ptlrpc_pinger_add_import);
+
+int ptlrpc_pinger_del_import(struct obd_import *imp)
+{
+ ENTRY;
+ if (list_empty(&imp->imp_pinger_chain))
+ RETURN(-ENOENT);
+
+ mutex_lock(&pinger_mutex);
+ list_del_init(&imp->imp_pinger_chain);
+ CDEBUG(D_HA, "removing pingable import %s->%s\n",
+ imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
+ /* if we remove from pinger we don't want recovery on this import */
+ imp->imp_obd->obd_no_recov = 1;
+ class_import_put(imp);
+ mutex_unlock(&pinger_mutex);
+ RETURN(0);
+}
+EXPORT_SYMBOL(ptlrpc_pinger_del_import);
+
+/**
+ * Register a timeout callback to the pinger list, and the callback will
+ * be called when timeout happens.
+ */
+struct timeout_item* ptlrpc_new_timeout(int time, enum timeout_event event,
+ timeout_cb_t cb, void *data)
+{
+ struct timeout_item *ti;
+
+ OBD_ALLOC_PTR(ti);
+ if (!ti)
+ return(NULL);
+
+ INIT_LIST_HEAD(&ti->ti_obd_list);
+ INIT_LIST_HEAD(&ti->ti_chain);
+ ti->ti_timeout = time;
+ ti->ti_event = event;
+ ti->ti_cb = cb;
+ ti->ti_cb_data = data;
+
+ return ti;
+}
+
+/**
+ * Register timeout event on the the pinger thread.
+ * Note: the timeout list is an sorted list with increased timeout value.
+ */
+static struct timeout_item*
+ptlrpc_pinger_register_timeout(int time, enum timeout_event event,
+ timeout_cb_t cb, void *data)
+{
+ struct timeout_item *item, *tmp;
+
+ LASSERT(mutex_is_locked(&pinger_mutex));
+
+ list_for_each_entry(item, &timeout_list, ti_chain)
+ if (item->ti_event == event)
+ goto out;
+
+ item = ptlrpc_new_timeout(time, event, cb, data);
+ if (item) {
+ list_for_each_entry_reverse(tmp, &timeout_list, ti_chain) {
+ if (tmp->ti_timeout < time) {
+ list_add(&item->ti_chain, &tmp->ti_chain);
+ goto out;
+ }
+ }
+ list_add(&item->ti_chain, &timeout_list);
+ }
+out:
+ return item;
+}
+
+/* Add a client_obd to the timeout event list, when timeout(@time)
+ * happens, the callback(@cb) will be called.
+ */
+int ptlrpc_add_timeout_client(int time, enum timeout_event event,
+ timeout_cb_t cb, void *data,
+ struct list_head *obd_list)
+{
+ struct timeout_item *ti;
+
+ mutex_lock(&pinger_mutex);
+ ti = ptlrpc_pinger_register_timeout(time, event, cb, data);
+ if (!ti) {
+ mutex_unlock(&pinger_mutex);
+ return (-EINVAL);
+ }
+ list_add(obd_list, &ti->ti_obd_list);
+ mutex_unlock(&pinger_mutex);
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_add_timeout_client);
+
+int ptlrpc_del_timeout_client(struct list_head *obd_list,
+ enum timeout_event event)
+{
+ struct timeout_item *ti = NULL, *item;
+
+ if (list_empty(obd_list))
+ return 0;
+ mutex_lock(&pinger_mutex);
+ list_del_init(obd_list);
+ /**
+ * If there are no obd attached to the timeout event
+ * list, remove this timeout event from the pinger
+ */
+ list_for_each_entry(item, &timeout_list, ti_chain) {
+ if (item->ti_event == event) {
+ ti = item;
+ break;
+ }
+ }
+ LASSERTF(ti != NULL, "ti is NULL ! \n");
+ if (list_empty(&ti->ti_obd_list)) {
+ list_del(&ti->ti_chain);
+ OBD_FREE_PTR(ti);
+ }
+ mutex_unlock(&pinger_mutex);
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_del_timeout_client);
+
+int ptlrpc_pinger_remove_timeouts(void)
+{
+ struct timeout_item *item, *tmp;
+
+ mutex_lock(&pinger_mutex);
+ list_for_each_entry_safe(item, tmp, &timeout_list, ti_chain) {
+ LASSERT(list_empty(&item->ti_obd_list));
+ list_del(&item->ti_chain);
+ OBD_FREE_PTR(item);
+ }
+ mutex_unlock(&pinger_mutex);
+ return 0;
+}
+
+void ptlrpc_pinger_wake_up()
+{
+ thread_add_flags(pinger_thread, SVC_EVENT);
+ wake_up(&pinger_thread->t_ctl_waitq);
+}
+
+/* Ping evictor thread */
+#define PET_READY 1
+#define PET_TERMINATE 2
+
+static int pet_refcount = 0;
+static int pet_state;
+static wait_queue_head_t pet_waitq;
+LIST_HEAD(pet_list);
+static DEFINE_SPINLOCK(pet_lock);
+
+int ping_evictor_wake(struct obd_export *exp)
+{
+ struct obd_device *obd;
+
+ spin_lock(&pet_lock);
+ if (pet_state != PET_READY) {
+ /* eventually the new obd will call here again. */
+ spin_unlock(&pet_lock);
+ return 1;
+ }
+
+ obd = class_exp2obd(exp);
+ if (list_empty(&obd->obd_evict_list)) {
+ class_incref(obd, "evictor", obd);
+ list_add(&obd->obd_evict_list, &pet_list);
+ }
+ spin_unlock(&pet_lock);
+
+ wake_up(&pet_waitq);
+ return 0;
+}
+
+static int ping_evictor_main(void *arg)
+{
+ struct obd_device *obd;
+ struct obd_export *exp;
+ struct l_wait_info lwi = { 0 };
+ time_t expire_time;
+ ENTRY;
+
+ unshare_fs_struct();
+
+ CDEBUG(D_HA, "Starting Ping Evictor\n");
+ pet_state = PET_READY;
+ while (1) {
+ l_wait_event(pet_waitq, (!list_empty(&pet_list)) ||
+ (pet_state == PET_TERMINATE), &lwi);
+
+ /* loop until all obd's will be removed */
+ if ((pet_state == PET_TERMINATE) && list_empty(&pet_list))
+ break;
+
+ /* we only get here if pet_exp != NULL, and the end of this
+ * loop is the only place which sets it NULL again, so lock
+ * is not strictly necessary. */
+ spin_lock(&pet_lock);
+ obd = list_entry(pet_list.next, struct obd_device,
+ obd_evict_list);
+ spin_unlock(&pet_lock);
+
+ expire_time = cfs_time_current_sec() - PING_EVICT_TIMEOUT;
+
+ CDEBUG(D_HA, "evicting all exports of obd %s older than %ld\n",
+ obd->obd_name, expire_time);
+
+ /* Exports can't be deleted out of the list while we hold
+ * the obd lock (class_unlink_export), which means we can't
+ * lose the last ref on the export. If they've already been
+ * removed from the list, we won't find them here. */
+ spin_lock(&obd->obd_dev_lock);
+ while (!list_empty(&obd->obd_exports_timed)) {
+ exp = list_entry(obd->obd_exports_timed.next,
+ struct obd_export,
+ exp_obd_chain_timed);
+ if (expire_time > exp->exp_last_request_time) {
+ class_export_get(exp);
+ spin_unlock(&obd->obd_dev_lock);
+ LCONSOLE_WARN("%s: haven't heard from client %s"
+ " (at %s) in %ld seconds. I think"
+ " it's dead, and I am evicting"
+ " it. exp %p, cur %ld expire %ld"
+ " last %ld\n",
+ obd->obd_name,
+ obd_uuid2str(&exp->exp_client_uuid),
+ obd_export_nid2str(exp),
+ (long)(cfs_time_current_sec() -
+ exp->exp_last_request_time),
+ exp, (long)cfs_time_current_sec(),
+ (long)expire_time,
+ (long)exp->exp_last_request_time);
+ CDEBUG(D_HA, "Last request was at %ld\n",
+ exp->exp_last_request_time);
+ class_fail_export(exp);
+ class_export_put(exp);
+ spin_lock(&obd->obd_dev_lock);
+ } else {
+ /* List is sorted, so everyone below is ok */
+ break;
+ }
+ }
+ spin_unlock(&obd->obd_dev_lock);
+
+ spin_lock(&pet_lock);
+ list_del_init(&obd->obd_evict_list);
+ spin_unlock(&pet_lock);
+
+ class_decref(obd, "evictor", obd);
+ }
+ CDEBUG(D_HA, "Exiting Ping Evictor\n");
+
+ RETURN(0);
+}
+
+void ping_evictor_start(void)
+{
+ task_t *task;
+
+ if (++pet_refcount > 1)
+ return;
+
+ init_waitqueue_head(&pet_waitq);
+
+ task = kthread_run(ping_evictor_main, NULL, "ll_evictor");
+ if (IS_ERR(task)) {
+ pet_refcount--;
+ CERROR("Cannot start ping evictor thread: %ld\n",
+ PTR_ERR(task));
+ }
+}
+EXPORT_SYMBOL(ping_evictor_start);
+
+void ping_evictor_stop(void)
+{
+ if (--pet_refcount > 0)
+ return;
+
+ pet_state = PET_TERMINATE;
+ wake_up(&pet_waitq);
+}
+EXPORT_SYMBOL(ping_evictor_stop);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
new file mode 100644
index 000000000000..ab363477151d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
@@ -0,0 +1,302 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+/* Intramodule declarations for ptlrpc. */
+
+#ifndef PTLRPC_INTERNAL_H
+#define PTLRPC_INTERNAL_H
+
+#include "../ldlm/ldlm_internal.h"
+
+struct ldlm_namespace;
+struct obd_import;
+struct ldlm_res_id;
+struct ptlrpc_request_set;
+extern int test_req_buffer_pressure;
+extern struct mutex ptlrpc_all_services_mutex;
+
+int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait);
+/* ptlrpcd.c */
+int ptlrpcd_start(int index, int max, const char *name, struct ptlrpcd_ctl *pc);
+
+/* client.c */
+struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
+ unsigned type, unsigned portal);
+void ptlrpc_init_xid(void);
+
+/* events.c */
+int ptlrpc_init_portals(void);
+void ptlrpc_exit_portals(void);
+
+void ptlrpc_request_handle_notconn(struct ptlrpc_request *);
+void lustre_assert_wire_constants(void);
+int ptlrpc_import_in_recovery(struct obd_import *imp);
+int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt);
+void ptlrpc_handle_failed_import(struct obd_import *imp);
+int ptlrpc_replay_next(struct obd_import *imp, int *inflight);
+void ptlrpc_initiate_recovery(struct obd_import *imp);
+
+int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset);
+int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset);
+
+#ifdef LPROCFS
+void ptlrpc_lprocfs_register_service(struct proc_dir_entry *proc_entry,
+ struct ptlrpc_service *svc);
+void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc);
+void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount);
+void ptlrpc_lprocfs_do_request_stat (struct ptlrpc_request *req,
+ long q_usec, long work_usec);
+#else
+#define ptlrpc_lprocfs_register_service(params...) do{}while(0)
+#define ptlrpc_lprocfs_unregister_service(params...) do{}while(0)
+#define ptlrpc_lprocfs_rpc_sent(params...) do{}while(0)
+#define ptlrpc_lprocfs_do_request_stat(params...) do{}while(0)
+#endif /* LPROCFS */
+
+/* NRS */
+
+/**
+ * NRS core object.
+ *
+ * Holds NRS core fields.
+ */
+struct nrs_core {
+ /**
+ * Protects nrs_core::nrs_policies, serializes external policy
+ * registration/unregistration, and NRS core lprocfs operations.
+ */
+ struct mutex nrs_mutex;
+ /* XXX: This is just for liblustre. Remove the #if defined directive
+ * when the * "cfs_" prefix is dropped from cfs_list_head. */
+ /**
+ * List of all policy descriptors registered with NRS core; protected
+ * by nrs_core::nrs_mutex.
+ */
+ struct list_head nrs_policies;
+
+};
+
+int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc);
+void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc);
+
+void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req, bool hp);
+void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req);
+void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req);
+void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req, bool hp);
+
+struct ptlrpc_request *
+ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
+ bool peek, bool force);
+
+static inline struct ptlrpc_request *
+ptlrpc_nrs_req_get_nolock(struct ptlrpc_service_part *svcpt, bool hp,
+ bool force)
+{
+ return ptlrpc_nrs_req_get_nolock0(svcpt, hp, false, force);
+}
+
+static inline struct ptlrpc_request *
+ptlrpc_nrs_req_peek_nolock(struct ptlrpc_service_part *svcpt, bool hp)
+{
+ return ptlrpc_nrs_req_get_nolock0(svcpt, hp, true, false);
+}
+
+void ptlrpc_nrs_req_del_nolock(struct ptlrpc_request *req);
+bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp);
+
+int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
+ enum ptlrpc_nrs_queue_type queue, char *name,
+ enum ptlrpc_nrs_ctl opc, bool single, void *arg);
+
+int ptlrpc_nrs_init(void);
+void ptlrpc_nrs_fini(void);
+
+static inline bool nrs_svcpt_has_hp(const struct ptlrpc_service_part *svcpt)
+{
+ return svcpt->scp_nrs_hp != NULL;
+}
+
+static inline bool nrs_svc_has_hp(const struct ptlrpc_service *svc)
+{
+ /**
+ * If the first service partition has an HP NRS head, all service
+ * partitions will.
+ */
+ return nrs_svcpt_has_hp(svc->srv_parts[0]);
+}
+
+static inline
+struct ptlrpc_nrs *nrs_svcpt2nrs(struct ptlrpc_service_part *svcpt, bool hp)
+{
+ LASSERT(ergo(hp, nrs_svcpt_has_hp(svcpt)));
+ return hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg;
+}
+
+static inline int nrs_pol2cptid(const struct ptlrpc_nrs_policy *policy)
+{
+ return policy->pol_nrs->nrs_svcpt->scp_cpt;
+}
+
+static inline
+struct ptlrpc_service *nrs_pol2svc(struct ptlrpc_nrs_policy *policy)
+{
+ return policy->pol_nrs->nrs_svcpt->scp_service;
+}
+
+static inline
+struct ptlrpc_service_part *nrs_pol2svcpt(struct ptlrpc_nrs_policy *policy)
+{
+ return policy->pol_nrs->nrs_svcpt;
+}
+
+static inline
+struct cfs_cpt_table *nrs_pol2cptab(struct ptlrpc_nrs_policy *policy)
+{
+ return nrs_pol2svc(policy)->srv_cptable;
+}
+
+static inline struct ptlrpc_nrs_resource *
+nrs_request_resource(struct ptlrpc_nrs_request *nrq)
+{
+ LASSERT(nrq->nr_initialized);
+ LASSERT(!nrq->nr_finalized);
+
+ return nrq->nr_res_ptrs[nrq->nr_res_idx];
+}
+
+static inline
+struct ptlrpc_nrs_policy *nrs_request_policy(struct ptlrpc_nrs_request *nrq)
+{
+ return nrs_request_resource(nrq)->res_policy;
+}
+
+#define NRS_LPROCFS_QUANTUM_NAME_REG "reg_quantum:"
+#define NRS_LPROCFS_QUANTUM_NAME_HP "hp_quantum:"
+
+/**
+ * the maximum size of nrs_crrn_client::cc_quantum and nrs_orr_data::od_quantum.
+ */
+#define LPROCFS_NRS_QUANTUM_MAX 65535
+
+/**
+ * Max valid command string is the size of the labels, plus "65535" twice, plus
+ * a separating space character.
+ */
+#define LPROCFS_NRS_WR_QUANTUM_MAX_CMD \
+ sizeof(NRS_LPROCFS_QUANTUM_NAME_REG __stringify(LPROCFS_NRS_QUANTUM_MAX) " " \
+ NRS_LPROCFS_QUANTUM_NAME_HP __stringify(LPROCFS_NRS_QUANTUM_MAX))
+
+/* recovd_thread.c */
+
+int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink);
+
+/* pers.c */
+void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc,
+ int mdcnt);
+void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+ int pageoffset, int len);
+
+/* pack_generic.c */
+struct ptlrpc_reply_state *
+lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt);
+void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs);
+
+/* pinger.c */
+int ptlrpc_start_pinger(void);
+int ptlrpc_stop_pinger(void);
+void ptlrpc_pinger_sending_on_import(struct obd_import *imp);
+void ptlrpc_pinger_commit_expected(struct obd_import *imp);
+void ptlrpc_pinger_wake_up(void);
+void ptlrpc_ping_import_soon(struct obd_import *imp);
+int ping_evictor_wake(struct obd_export *exp);
+
+/* sec_null.c */
+int sptlrpc_null_init(void);
+void sptlrpc_null_fini(void);
+
+/* sec_plain.c */
+int sptlrpc_plain_init(void);
+void sptlrpc_plain_fini(void);
+
+/* sec_bulk.c */
+int sptlrpc_enc_pool_init(void);
+void sptlrpc_enc_pool_fini(void);
+int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v);
+
+/* sec_lproc.c */
+int sptlrpc_lproc_init(void);
+void sptlrpc_lproc_fini(void);
+
+/* sec_gc.c */
+int sptlrpc_gc_init(void);
+void sptlrpc_gc_fini(void);
+
+/* sec_config.c */
+void sptlrpc_conf_choose_flavor(enum lustre_sec_part from,
+ enum lustre_sec_part to,
+ struct obd_uuid *target,
+ lnet_nid_t nid,
+ struct sptlrpc_flavor *sf);
+int sptlrpc_conf_init(void);
+void sptlrpc_conf_fini(void);
+
+/* sec.c */
+int sptlrpc_init(void);
+void sptlrpc_fini(void);
+
+static inline int ll_rpc_recoverable_error(int rc)
+{
+ return (rc == -ENOTCONN || rc == -ENODEV);
+}
+
+static inline int tgt_mod_init(void)
+{
+ return 0;
+}
+
+static inline void tgt_mod_exit(void)
+{
+ return;
+}
+
+static inline void ptlrpc_reqset_put(struct ptlrpc_request_set *set)
+{
+ if (atomic_dec_and_test(&set->set_refcount))
+ OBD_FREE_PTR(set);
+}
+#endif /* PTLRPC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
new file mode 100644
index 000000000000..f6ea80f0b105
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
@@ -0,0 +1,154 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_req_layout.h>
+
+#include "ptlrpc_internal.h"
+
+extern spinlock_t ptlrpc_last_xid_lock;
+#if RS_DEBUG
+extern spinlock_t ptlrpc_rs_debug_lock;
+#endif
+extern struct mutex pinger_mutex;
+extern struct mutex ptlrpcd_mutex;
+
+__init int ptlrpc_init(void)
+{
+ int rc, cleanup_phase = 0;
+ ENTRY;
+
+ lustre_assert_wire_constants();
+#if RS_DEBUG
+ spin_lock_init(&ptlrpc_rs_debug_lock);
+#endif
+ mutex_init(&ptlrpc_all_services_mutex);
+ mutex_init(&pinger_mutex);
+ mutex_init(&ptlrpcd_mutex);
+ ptlrpc_init_xid();
+
+ rc = req_layout_init();
+ if (rc)
+ RETURN(rc);
+
+ rc = ptlrpc_hr_init();
+ if (rc)
+ RETURN(rc);
+
+ cleanup_phase = 1;
+
+ rc = ptlrpc_init_portals();
+ if (rc)
+ GOTO(cleanup, rc);
+ cleanup_phase = 2;
+
+ rc = ptlrpc_connection_init();
+ if (rc)
+ GOTO(cleanup, rc);
+ cleanup_phase = 3;
+
+ ptlrpc_put_connection_superhack = ptlrpc_connection_put;
+
+ rc = ptlrpc_start_pinger();
+ if (rc)
+ GOTO(cleanup, rc);
+ cleanup_phase = 4;
+
+ rc = ldlm_init();
+ if (rc)
+ GOTO(cleanup, rc);
+ cleanup_phase = 5;
+
+ rc = sptlrpc_init();
+ if (rc)
+ GOTO(cleanup, rc);
+
+ cleanup_phase = 7;
+ rc = ptlrpc_nrs_init();
+ if (rc)
+ GOTO(cleanup, rc);
+
+ cleanup_phase = 8;
+ rc = tgt_mod_init();
+ if (rc)
+ GOTO(cleanup, rc);
+ RETURN(0);
+
+cleanup:
+ switch(cleanup_phase) {
+ case 8:
+ ptlrpc_nrs_fini();
+ case 7:
+ sptlrpc_fini();
+ case 5:
+ ldlm_exit();
+ case 4:
+ ptlrpc_stop_pinger();
+ case 3:
+ ptlrpc_connection_fini();
+ case 2:
+ ptlrpc_exit_portals();
+ case 1:
+ ptlrpc_hr_fini();
+ req_layout_fini();
+ default: ;
+ }
+
+ return rc;
+}
+
+static void __exit ptlrpc_exit(void)
+{
+ tgt_mod_exit();
+ ptlrpc_nrs_fini();
+ sptlrpc_fini();
+ ldlm_exit();
+ ptlrpc_stop_pinger();
+ ptlrpc_exit_portals();
+ ptlrpc_hr_fini();
+ ptlrpc_connection_fini();
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Request Processor and Lock Management");
+MODULE_LICENSE("GPL");
+
+cfs_module(ptlrpc, "1.0.0", ptlrpc_init, ptlrpc_exit);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
new file mode 100644
index 000000000000..5a66a1be4228
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
@@ -0,0 +1,827 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/ptlrpcd.c
+ */
+
+/** \defgroup ptlrpcd PortalRPC daemon
+ *
+ * ptlrpcd is a special thread with its own set where other user might add
+ * requests when they don't want to wait for their completion.
+ * PtlRPCD will take care of sending such requests and then processing their
+ * replies and calling completion callbacks as necessary.
+ * The callbacks are called directly from ptlrpcd context.
+ * It is important to never significantly block (esp. on RPCs!) within such
+ * completion handler or a deadlock might occur where ptlrpcd enters some
+ * callback that attempts to send another RPC and wait for it to return,
+ * during which time ptlrpcd is completely blocked, so e.g. if import
+ * fails, recovery cannot progress because connection requests are also
+ * sent by ptlrpcd.
+ *
+ * @{
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+# include <linux/libcfs/libcfs.h>
+
+#include <lustre_net.h>
+# include <lustre_lib.h>
+
+#include <lustre_ha.h>
+#include <obd_class.h> /* for obd_zombie */
+#include <obd_support.h> /* for OBD_FAIL_CHECK */
+#include <cl_object.h> /* cl_env_{get,put}() */
+#include <lprocfs_status.h>
+
+#include "ptlrpc_internal.h"
+
+struct ptlrpcd {
+ int pd_size;
+ int pd_index;
+ int pd_nthreads;
+ struct ptlrpcd_ctl pd_thread_rcv;
+ struct ptlrpcd_ctl pd_threads[0];
+};
+
+static int max_ptlrpcds;
+CFS_MODULE_PARM(max_ptlrpcds, "i", int, 0644,
+ "Max ptlrpcd thread count to be started.");
+
+static int ptlrpcd_bind_policy = PDB_POLICY_PAIR;
+CFS_MODULE_PARM(ptlrpcd_bind_policy, "i", int, 0644,
+ "Ptlrpcd threads binding mode.");
+static struct ptlrpcd *ptlrpcds;
+
+struct mutex ptlrpcd_mutex;
+static int ptlrpcd_users = 0;
+
+void ptlrpcd_wake(struct ptlrpc_request *req)
+{
+ struct ptlrpc_request_set *rq_set = req->rq_set;
+
+ LASSERT(rq_set != NULL);
+
+ wake_up(&rq_set->set_waitq);
+}
+EXPORT_SYMBOL(ptlrpcd_wake);
+
+static struct ptlrpcd_ctl *
+ptlrpcd_select_pc(struct ptlrpc_request *req, pdl_policy_t policy, int index)
+{
+ int idx = 0;
+
+ if (req != NULL && req->rq_send_state != LUSTRE_IMP_FULL)
+ return &ptlrpcds->pd_thread_rcv;
+
+ switch (policy) {
+ case PDL_POLICY_SAME:
+ idx = smp_processor_id() % ptlrpcds->pd_nthreads;
+ break;
+ case PDL_POLICY_LOCAL:
+ /* Before CPU partition patches available, process it the same
+ * as "PDL_POLICY_ROUND". */
+# ifdef CFS_CPU_MODE_NUMA
+# warning "fix this code to use new CPU partition APIs"
+# endif
+ /* Fall through to PDL_POLICY_ROUND until the CPU
+ * CPU partition patches are available. */
+ index = -1;
+ case PDL_POLICY_PREFERRED:
+ if (index >= 0 && index < num_online_cpus()) {
+ idx = index % ptlrpcds->pd_nthreads;
+ break;
+ }
+ /* Fall through to PDL_POLICY_ROUND for bad index. */
+ default:
+ /* Fall through to PDL_POLICY_ROUND for unknown policy. */
+ case PDL_POLICY_ROUND:
+ /* We do not care whether it is strict load balance. */
+ idx = ptlrpcds->pd_index + 1;
+ if (idx == smp_processor_id())
+ idx++;
+ idx %= ptlrpcds->pd_nthreads;
+ ptlrpcds->pd_index = idx;
+ break;
+ }
+
+ return &ptlrpcds->pd_threads[idx];
+}
+
+/**
+ * Move all request from an existing request set to the ptlrpcd queue.
+ * All requests from the set must be in phase RQ_PHASE_NEW.
+ */
+void ptlrpcd_add_rqset(struct ptlrpc_request_set *set)
+{
+ struct list_head *tmp, *pos;
+ struct ptlrpcd_ctl *pc;
+ struct ptlrpc_request_set *new;
+ int count, i;
+
+ pc = ptlrpcd_select_pc(NULL, PDL_POLICY_LOCAL, -1);
+ new = pc->pc_set;
+
+ list_for_each_safe(pos, tmp, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(pos, struct ptlrpc_request,
+ rq_set_chain);
+
+ LASSERT(req->rq_phase == RQ_PHASE_NEW);
+ req->rq_set = new;
+ req->rq_queued_time = cfs_time_current();
+ }
+
+ spin_lock(&new->set_new_req_lock);
+ list_splice_init(&set->set_requests, &new->set_new_requests);
+ i = atomic_read(&set->set_remaining);
+ count = atomic_add_return(i, &new->set_new_count);
+ atomic_set(&set->set_remaining, 0);
+ spin_unlock(&new->set_new_req_lock);
+ if (count == i) {
+ wake_up(&new->set_waitq);
+
+ /* XXX: It maybe unnecessary to wakeup all the partners. But to
+ * guarantee the async RPC can be processed ASAP, we have
+ * no other better choice. It maybe fixed in future. */
+ for (i = 0; i < pc->pc_npartners; i++)
+ wake_up(&pc->pc_partners[i]->pc_set->set_waitq);
+ }
+}
+EXPORT_SYMBOL(ptlrpcd_add_rqset);
+
+/**
+ * Return transferred RPCs count.
+ */
+static int ptlrpcd_steal_rqset(struct ptlrpc_request_set *des,
+ struct ptlrpc_request_set *src)
+{
+ struct list_head *tmp, *pos;
+ struct ptlrpc_request *req;
+ int rc = 0;
+
+ spin_lock(&src->set_new_req_lock);
+ if (likely(!list_empty(&src->set_new_requests))) {
+ list_for_each_safe(pos, tmp, &src->set_new_requests) {
+ req = list_entry(pos, struct ptlrpc_request,
+ rq_set_chain);
+ req->rq_set = des;
+ }
+ list_splice_init(&src->set_new_requests,
+ &des->set_requests);
+ rc = atomic_read(&src->set_new_count);
+ atomic_add(rc, &des->set_remaining);
+ atomic_set(&src->set_new_count, 0);
+ }
+ spin_unlock(&src->set_new_req_lock);
+ return rc;
+}
+
+/**
+ * Requests that are added to the ptlrpcd queue are sent via
+ * ptlrpcd_check->ptlrpc_check_set().
+ */
+void ptlrpcd_add_req(struct ptlrpc_request *req, pdl_policy_t policy, int idx)
+{
+ struct ptlrpcd_ctl *pc;
+
+ if (req->rq_reqmsg)
+ lustre_msg_set_jobid(req->rq_reqmsg, NULL);
+
+ spin_lock(&req->rq_lock);
+ if (req->rq_invalid_rqset) {
+ struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(5),
+ back_to_sleep, NULL);
+
+ req->rq_invalid_rqset = 0;
+ spin_unlock(&req->rq_lock);
+ l_wait_event(req->rq_set_waitq, (req->rq_set == NULL), &lwi);
+ } else if (req->rq_set) {
+ /* If we have a vaid "rq_set", just reuse it to avoid double
+ * linked. */
+ LASSERT(req->rq_phase == RQ_PHASE_NEW);
+ LASSERT(req->rq_send_state == LUSTRE_IMP_REPLAY);
+
+ /* ptlrpc_check_set will decrease the count */
+ atomic_inc(&req->rq_set->set_remaining);
+ spin_unlock(&req->rq_lock);
+ wake_up(&req->rq_set->set_waitq);
+ return;
+ } else {
+ spin_unlock(&req->rq_lock);
+ }
+
+ pc = ptlrpcd_select_pc(req, policy, idx);
+
+ DEBUG_REQ(D_INFO, req, "add req [%p] to pc [%s:%d]",
+ req, pc->pc_name, pc->pc_index);
+
+ ptlrpc_set_add_new_req(pc, req);
+}
+EXPORT_SYMBOL(ptlrpcd_add_req);
+
+static inline void ptlrpc_reqset_get(struct ptlrpc_request_set *set)
+{
+ atomic_inc(&set->set_refcount);
+}
+
+/**
+ * Check if there is more work to do on ptlrpcd set.
+ * Returns 1 if yes.
+ */
+static int ptlrpcd_check(struct lu_env *env, struct ptlrpcd_ctl *pc)
+{
+ struct list_head *tmp, *pos;
+ struct ptlrpc_request *req;
+ struct ptlrpc_request_set *set = pc->pc_set;
+ int rc = 0;
+ int rc2;
+ ENTRY;
+
+ if (atomic_read(&set->set_new_count)) {
+ spin_lock(&set->set_new_req_lock);
+ if (likely(!list_empty(&set->set_new_requests))) {
+ list_splice_init(&set->set_new_requests,
+ &set->set_requests);
+ atomic_add(atomic_read(&set->set_new_count),
+ &set->set_remaining);
+ atomic_set(&set->set_new_count, 0);
+ /*
+ * Need to calculate its timeout.
+ */
+ rc = 1;
+ }
+ spin_unlock(&set->set_new_req_lock);
+ }
+
+ /* We should call lu_env_refill() before handling new requests to make
+ * sure that env key the requests depending on really exists.
+ */
+ rc2 = lu_env_refill(env);
+ if (rc2 != 0) {
+ /*
+ * XXX This is very awkward situation, because
+ * execution can neither continue (request
+ * interpreters assume that env is set up), nor repeat
+ * the loop (as this potentially results in a tight
+ * loop of -ENOMEM's).
+ *
+ * Fortunately, refill only ever does something when
+ * new modules are loaded, i.e., early during boot up.
+ */
+ CERROR("Failure to refill session: %d\n", rc2);
+ RETURN(rc);
+ }
+
+ if (atomic_read(&set->set_remaining))
+ rc |= ptlrpc_check_set(env, set);
+
+ if (!list_empty(&set->set_requests)) {
+ /*
+ * XXX: our set never completes, so we prune the completed
+ * reqs after each iteration. boy could this be smarter.
+ */
+ list_for_each_safe(pos, tmp, &set->set_requests) {
+ req = list_entry(pos, struct ptlrpc_request,
+ rq_set_chain);
+ if (req->rq_phase != RQ_PHASE_COMPLETE)
+ continue;
+
+ list_del_init(&req->rq_set_chain);
+ req->rq_set = NULL;
+ ptlrpc_req_finished(req);
+ }
+ }
+
+ if (rc == 0) {
+ /*
+ * If new requests have been added, make sure to wake up.
+ */
+ rc = atomic_read(&set->set_new_count);
+
+ /* If we have nothing to do, check whether we can take some
+ * work from our partner threads. */
+ if (rc == 0 && pc->pc_npartners > 0) {
+ struct ptlrpcd_ctl *partner;
+ struct ptlrpc_request_set *ps;
+ int first = pc->pc_cursor;
+
+ do {
+ partner = pc->pc_partners[pc->pc_cursor++];
+ if (pc->pc_cursor >= pc->pc_npartners)
+ pc->pc_cursor = 0;
+ if (partner == NULL)
+ continue;
+
+ spin_lock(&partner->pc_lock);
+ ps = partner->pc_set;
+ if (ps == NULL) {
+ spin_unlock(&partner->pc_lock);
+ continue;
+ }
+
+ ptlrpc_reqset_get(ps);
+ spin_unlock(&partner->pc_lock);
+
+ if (atomic_read(&ps->set_new_count)) {
+ rc = ptlrpcd_steal_rqset(set, ps);
+ if (rc > 0)
+ CDEBUG(D_RPCTRACE, "transfer %d"
+ " async RPCs [%d->%d]\n",
+ rc, partner->pc_index,
+ pc->pc_index);
+ }
+ ptlrpc_reqset_put(ps);
+ } while (rc == 0 && pc->pc_cursor != first);
+ }
+ }
+
+ RETURN(rc);
+}
+
+/**
+ * Main ptlrpcd thread.
+ * ptlrpc's code paths like to execute in process context, so we have this
+ * thread which spins on a set which contains the rpcs and sends them.
+ *
+ */
+static int ptlrpcd(void *arg)
+{
+ struct ptlrpcd_ctl *pc = arg;
+ struct ptlrpc_request_set *set = pc->pc_set;
+ struct lu_env env = { .le_ses = NULL };
+ int rc, exit = 0;
+ ENTRY;
+
+ unshare_fs_struct();
+#if defined(CONFIG_SMP)
+ if (test_bit(LIOD_BIND, &pc->pc_flags)) {
+ int index = pc->pc_index;
+
+ if (index >= 0 && index < num_possible_cpus()) {
+ while (!cpu_online(index)) {
+ if (++index >= num_possible_cpus())
+ index = 0;
+ }
+ set_cpus_allowed_ptr(current,
+ cpumask_of_node(cpu_to_node(index)));
+ }
+ }
+#endif
+ /*
+ * XXX So far only "client" ptlrpcd uses an environment. In
+ * the future, ptlrpcd thread (or a thread-set) has to given
+ * an argument, describing its "scope".
+ */
+ rc = lu_context_init(&env.le_ctx,
+ LCT_CL_THREAD|LCT_REMEMBER|LCT_NOREF);
+ complete(&pc->pc_starting);
+
+ if (rc != 0)
+ RETURN(rc);
+
+ /*
+ * This mainloop strongly resembles ptlrpc_set_wait() except that our
+ * set never completes. ptlrpcd_check() calls ptlrpc_check_set() when
+ * there are requests in the set. New requests come in on the set's
+ * new_req_list and ptlrpcd_check() moves them into the set.
+ */
+ do {
+ struct l_wait_info lwi;
+ int timeout;
+
+ timeout = ptlrpc_set_next_timeout(set);
+ lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1),
+ ptlrpc_expired_set, set);
+
+ lu_context_enter(&env.le_ctx);
+ l_wait_event(set->set_waitq,
+ ptlrpcd_check(&env, pc), &lwi);
+ lu_context_exit(&env.le_ctx);
+
+ /*
+ * Abort inflight rpcs for forced stop case.
+ */
+ if (test_bit(LIOD_STOP, &pc->pc_flags)) {
+ if (test_bit(LIOD_FORCE, &pc->pc_flags))
+ ptlrpc_abort_set(set);
+ exit++;
+ }
+
+ /*
+ * Let's make one more loop to make sure that ptlrpcd_check()
+ * copied all raced new rpcs into the set so we can kill them.
+ */
+ } while (exit < 2);
+
+ /*
+ * Wait for inflight requests to drain.
+ */
+ if (!list_empty(&set->set_requests))
+ ptlrpc_set_wait(set);
+ lu_context_fini(&env.le_ctx);
+
+ complete(&pc->pc_finishing);
+
+ return 0;
+}
+
+/* XXX: We want multiple CPU cores to share the async RPC load. So we start many
+ * ptlrpcd threads. We also want to reduce the ptlrpcd overhead caused by
+ * data transfer cross-CPU cores. So we bind ptlrpcd thread to specified
+ * CPU core. But binding all ptlrpcd threads maybe cause response delay
+ * because of some CPU core(s) busy with other loads.
+ *
+ * For example: "ls -l", some async RPCs for statahead are assigned to
+ * ptlrpcd_0, and ptlrpcd_0 is bound to CPU_0, but CPU_0 may be quite busy
+ * with other non-ptlrpcd, like "ls -l" itself (we want to the "ls -l"
+ * thread, statahead thread, and ptlrpcd thread can run in parallel), under
+ * such case, the statahead async RPCs can not be processed in time, it is
+ * unexpected. If ptlrpcd_0 can be re-scheduled on other CPU core, it may
+ * be better. But it breaks former data transfer policy.
+ *
+ * So we shouldn't be blind for avoiding the data transfer. We make some
+ * compromise: divide the ptlrpcd threds pool into two parts. One part is
+ * for bound mode, each ptlrpcd thread in this part is bound to some CPU
+ * core. The other part is for free mode, all the ptlrpcd threads in the
+ * part can be scheduled on any CPU core. We specify some partnership
+ * between bound mode ptlrpcd thread(s) and free mode ptlrpcd thread(s),
+ * and the async RPC load within the partners are shared.
+ *
+ * It can partly avoid data transfer cross-CPU (if the bound mode ptlrpcd
+ * thread can be scheduled in time), and try to guarantee the async RPC
+ * processed ASAP (as long as the free mode ptlrpcd thread can be scheduled
+ * on any CPU core).
+ *
+ * As for how to specify the partnership between bound mode ptlrpcd
+ * thread(s) and free mode ptlrpcd thread(s), the simplest way is to use
+ * <free bound> pair. In future, we can specify some more complex
+ * partnership based on the patches for CPU partition. But before such
+ * patches are available, we prefer to use the simplest one.
+ */
+# ifdef CFS_CPU_MODE_NUMA
+# warning "fix ptlrpcd_bind() to use new CPU partition APIs"
+# endif
+static int ptlrpcd_bind(int index, int max)
+{
+ struct ptlrpcd_ctl *pc;
+ int rc = 0;
+#if defined(CONFIG_NUMA)
+ cpumask_t mask;
+#endif
+ ENTRY;
+
+ LASSERT(index <= max - 1);
+ pc = &ptlrpcds->pd_threads[index];
+ switch (ptlrpcd_bind_policy) {
+ case PDB_POLICY_NONE:
+ pc->pc_npartners = -1;
+ break;
+ case PDB_POLICY_FULL:
+ pc->pc_npartners = 0;
+ set_bit(LIOD_BIND, &pc->pc_flags);
+ break;
+ case PDB_POLICY_PAIR:
+ LASSERT(max % 2 == 0);
+ pc->pc_npartners = 1;
+ break;
+ case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA)
+ {
+ int i;
+ mask = *cpumask_of_node(cpu_to_node(index));
+ for (i = max; i < num_online_cpus(); i++)
+ cpu_clear(i, mask);
+ pc->pc_npartners = cpus_weight(mask) - 1;
+ set_bit(LIOD_BIND, &pc->pc_flags);
+ }
+#else
+ LASSERT(max >= 3);
+ pc->pc_npartners = 2;
+#endif
+ break;
+ default:
+ CERROR("unknown ptlrpcd bind policy %d\n", ptlrpcd_bind_policy);
+ rc = -EINVAL;
+ }
+
+ if (rc == 0 && pc->pc_npartners > 0) {
+ OBD_ALLOC(pc->pc_partners,
+ sizeof(struct ptlrpcd_ctl *) * pc->pc_npartners);
+ if (pc->pc_partners == NULL) {
+ pc->pc_npartners = 0;
+ rc = -ENOMEM;
+ } else {
+ switch (ptlrpcd_bind_policy) {
+ case PDB_POLICY_PAIR:
+ if (index & 0x1) {
+ set_bit(LIOD_BIND, &pc->pc_flags);
+ pc->pc_partners[0] = &ptlrpcds->
+ pd_threads[index - 1];
+ ptlrpcds->pd_threads[index - 1].
+ pc_partners[0] = pc;
+ }
+ break;
+ case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA)
+ {
+ struct ptlrpcd_ctl *ppc;
+ int i, pidx;
+ /* partners are cores in the same NUMA node.
+ * setup partnership only with ptlrpcd threads
+ * that are already initialized
+ */
+ for (pidx = 0, i = 0; i < index; i++) {
+ if (cpu_isset(i, mask)) {
+ ppc = &ptlrpcds->pd_threads[i];
+ pc->pc_partners[pidx++] = ppc;
+ ppc->pc_partners[ppc->
+ pc_npartners++] = pc;
+ }
+ }
+ /* adjust number of partners to the number
+ * of partnership really setup */
+ pc->pc_npartners = pidx;
+ }
+#else
+ if (index & 0x1)
+ set_bit(LIOD_BIND, &pc->pc_flags);
+ if (index > 0) {
+ pc->pc_partners[0] = &ptlrpcds->
+ pd_threads[index - 1];
+ ptlrpcds->pd_threads[index - 1].
+ pc_partners[1] = pc;
+ if (index == max - 1) {
+ pc->pc_partners[1] =
+ &ptlrpcds->pd_threads[0];
+ ptlrpcds->pd_threads[0].
+ pc_partners[0] = pc;
+ }
+ }
+#endif
+ break;
+ }
+ }
+ }
+
+ RETURN(rc);
+}
+
+
+int ptlrpcd_start(int index, int max, const char *name, struct ptlrpcd_ctl *pc)
+{
+ int rc;
+ int env = 0;
+ ENTRY;
+
+ /*
+ * Do not allow start second thread for one pc.
+ */
+ if (test_and_set_bit(LIOD_START, &pc->pc_flags)) {
+ CWARN("Starting second thread (%s) for same pc %p\n",
+ name, pc);
+ RETURN(0);
+ }
+
+ pc->pc_index = index;
+ init_completion(&pc->pc_starting);
+ init_completion(&pc->pc_finishing);
+ spin_lock_init(&pc->pc_lock);
+ strncpy(pc->pc_name, name, sizeof(pc->pc_name) - 1);
+ pc->pc_set = ptlrpc_prep_set();
+ if (pc->pc_set == NULL)
+ GOTO(out, rc = -ENOMEM);
+ /*
+ * So far only "client" ptlrpcd uses an environment. In the future,
+ * ptlrpcd thread (or a thread-set) has to be given an argument,
+ * describing its "scope".
+ */
+ rc = lu_context_init(&pc->pc_env.le_ctx, LCT_CL_THREAD|LCT_REMEMBER);
+ if (rc != 0)
+ GOTO(out, rc);
+
+ env = 1;
+ {
+ task_t *task;
+ if (index >= 0) {
+ rc = ptlrpcd_bind(index, max);
+ if (rc < 0)
+ GOTO(out, rc);
+ }
+
+ task = kthread_run(ptlrpcd, pc, pc->pc_name);
+ if (IS_ERR(task))
+ GOTO(out, rc = PTR_ERR(task));
+
+ rc = 0;
+ wait_for_completion(&pc->pc_starting);
+ }
+out:
+ if (rc) {
+ if (pc->pc_set != NULL) {
+ struct ptlrpc_request_set *set = pc->pc_set;
+
+ spin_lock(&pc->pc_lock);
+ pc->pc_set = NULL;
+ spin_unlock(&pc->pc_lock);
+ ptlrpc_set_destroy(set);
+ }
+ if (env != 0)
+ lu_context_fini(&pc->pc_env.le_ctx);
+ clear_bit(LIOD_BIND, &pc->pc_flags);
+ clear_bit(LIOD_START, &pc->pc_flags);
+ }
+ RETURN(rc);
+}
+
+void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force)
+{
+ ENTRY;
+
+ if (!test_bit(LIOD_START, &pc->pc_flags)) {
+ CWARN("Thread for pc %p was not started\n", pc);
+ goto out;
+ }
+
+ set_bit(LIOD_STOP, &pc->pc_flags);
+ if (force)
+ set_bit(LIOD_FORCE, &pc->pc_flags);
+ wake_up(&pc->pc_set->set_waitq);
+
+out:
+ EXIT;
+}
+
+void ptlrpcd_free(struct ptlrpcd_ctl *pc)
+{
+ struct ptlrpc_request_set *set = pc->pc_set;
+ ENTRY;
+
+ if (!test_bit(LIOD_START, &pc->pc_flags)) {
+ CWARN("Thread for pc %p was not started\n", pc);
+ goto out;
+ }
+
+ wait_for_completion(&pc->pc_finishing);
+ lu_context_fini(&pc->pc_env.le_ctx);
+
+ spin_lock(&pc->pc_lock);
+ pc->pc_set = NULL;
+ spin_unlock(&pc->pc_lock);
+ ptlrpc_set_destroy(set);
+
+ clear_bit(LIOD_START, &pc->pc_flags);
+ clear_bit(LIOD_STOP, &pc->pc_flags);
+ clear_bit(LIOD_FORCE, &pc->pc_flags);
+ clear_bit(LIOD_BIND, &pc->pc_flags);
+
+out:
+ if (pc->pc_npartners > 0) {
+ LASSERT(pc->pc_partners != NULL);
+
+ OBD_FREE(pc->pc_partners,
+ sizeof(struct ptlrpcd_ctl *) * pc->pc_npartners);
+ pc->pc_partners = NULL;
+ }
+ pc->pc_npartners = 0;
+ EXIT;
+}
+
+static void ptlrpcd_fini(void)
+{
+ int i;
+ ENTRY;
+
+ if (ptlrpcds != NULL) {
+ for (i = 0; i < ptlrpcds->pd_nthreads; i++)
+ ptlrpcd_stop(&ptlrpcds->pd_threads[i], 0);
+ for (i = 0; i < ptlrpcds->pd_nthreads; i++)
+ ptlrpcd_free(&ptlrpcds->pd_threads[i]);
+ ptlrpcd_stop(&ptlrpcds->pd_thread_rcv, 0);
+ ptlrpcd_free(&ptlrpcds->pd_thread_rcv);
+ OBD_FREE(ptlrpcds, ptlrpcds->pd_size);
+ ptlrpcds = NULL;
+ }
+
+ EXIT;
+}
+
+static int ptlrpcd_init(void)
+{
+ int nthreads = num_online_cpus();
+ char name[16];
+ int size, i = -1, j, rc = 0;
+ ENTRY;
+
+ if (max_ptlrpcds > 0 && max_ptlrpcds < nthreads)
+ nthreads = max_ptlrpcds;
+ if (nthreads < 2)
+ nthreads = 2;
+ if (nthreads < 3 && ptlrpcd_bind_policy == PDB_POLICY_NEIGHBOR)
+ ptlrpcd_bind_policy = PDB_POLICY_PAIR;
+ else if (nthreads % 2 != 0 && ptlrpcd_bind_policy == PDB_POLICY_PAIR)
+ nthreads &= ~1; /* make sure it is even */
+
+ size = offsetof(struct ptlrpcd, pd_threads[nthreads]);
+ OBD_ALLOC(ptlrpcds, size);
+ if (ptlrpcds == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ snprintf(name, 15, "ptlrpcd_rcv");
+ set_bit(LIOD_RECOVERY, &ptlrpcds->pd_thread_rcv.pc_flags);
+ rc = ptlrpcd_start(-1, nthreads, name, &ptlrpcds->pd_thread_rcv);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ /* XXX: We start nthreads ptlrpc daemons. Each of them can process any
+ * non-recovery async RPC to improve overall async RPC efficiency.
+ *
+ * But there are some issues with async I/O RPCs and async non-I/O
+ * RPCs processed in the same set under some cases. The ptlrpcd may
+ * be blocked by some async I/O RPC(s), then will cause other async
+ * non-I/O RPC(s) can not be processed in time.
+ *
+ * Maybe we should distinguish blocked async RPCs from non-blocked
+ * async RPCs, and process them in different ptlrpcd sets to avoid
+ * unnecessary dependency. But how to distribute async RPCs load
+ * among all the ptlrpc daemons becomes another trouble. */
+ for (i = 0; i < nthreads; i++) {
+ snprintf(name, 15, "ptlrpcd_%d", i);
+ rc = ptlrpcd_start(i, nthreads, name, &ptlrpcds->pd_threads[i]);
+ if (rc < 0)
+ GOTO(out, rc);
+ }
+
+ ptlrpcds->pd_size = size;
+ ptlrpcds->pd_index = 0;
+ ptlrpcds->pd_nthreads = nthreads;
+
+out:
+ if (rc != 0 && ptlrpcds != NULL) {
+ for (j = 0; j <= i; j++)
+ ptlrpcd_stop(&ptlrpcds->pd_threads[j], 0);
+ for (j = 0; j <= i; j++)
+ ptlrpcd_free(&ptlrpcds->pd_threads[j]);
+ ptlrpcd_stop(&ptlrpcds->pd_thread_rcv, 0);
+ ptlrpcd_free(&ptlrpcds->pd_thread_rcv);
+ OBD_FREE(ptlrpcds, size);
+ ptlrpcds = NULL;
+ }
+
+ RETURN(0);
+}
+
+int ptlrpcd_addref(void)
+{
+ int rc = 0;
+ ENTRY;
+
+ mutex_lock(&ptlrpcd_mutex);
+ if (++ptlrpcd_users == 1)
+ rc = ptlrpcd_init();
+ mutex_unlock(&ptlrpcd_mutex);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ptlrpcd_addref);
+
+void ptlrpcd_decref(void)
+{
+ mutex_lock(&ptlrpcd_mutex);
+ if (--ptlrpcd_users == 0)
+ ptlrpcd_fini();
+ mutex_unlock(&ptlrpcd_mutex);
+}
+EXPORT_SYMBOL(ptlrpcd_decref);
+/** @} ptlrpcd */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c
new file mode 100644
index 000000000000..2960889834a2
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c
@@ -0,0 +1,357 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/recover.c
+ *
+ * Author: Mike Shaver <shaver@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+# include <linux/libcfs/libcfs.h>
+
+#include <obd_support.h>
+#include <lustre_ha.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_export.h>
+#include <obd.h>
+#include <obd_ost.h>
+#include <obd_class.h>
+#include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
+#include <linux/list.h>
+
+#include "ptlrpc_internal.h"
+
+/**
+ * Start recovery on disconnected import.
+ * This is done by just attempting a connect
+ */
+void ptlrpc_initiate_recovery(struct obd_import *imp)
+{
+ ENTRY;
+
+ CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
+ ptlrpc_connect_import(imp);
+
+ EXIT;
+}
+
+/**
+ * Identify what request from replay list needs to be replayed next
+ * (based on what we have already replayed) and send it to server.
+ */
+int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
+{
+ int rc = 0;
+ struct list_head *tmp, *pos;
+ struct ptlrpc_request *req = NULL;
+ __u64 last_transno;
+ ENTRY;
+
+ *inflight = 0;
+
+ /* It might have committed some after we last spoke, so make sure we
+ * get rid of them now.
+ */
+ spin_lock(&imp->imp_lock);
+ imp->imp_last_transno_checked = 0;
+ ptlrpc_free_committed(imp);
+ last_transno = imp->imp_last_replay_transno;
+ spin_unlock(&imp->imp_lock);
+
+ CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
+ imp, obd2cli_tgt(imp->imp_obd),
+ imp->imp_peer_committed_transno, last_transno);
+
+ /* Do I need to hold a lock across this iteration? We shouldn't be
+ * racing with any additions to the list, because we're in recovery
+ * and are therefore not processing additional requests to add. Calls
+ * to ptlrpc_free_committed might commit requests, but nothing "newer"
+ * than the one we're replaying (it can't be committed until it's
+ * replayed, and we're doing that here). l_f_e_safe protects against
+ * problems with the current request being committed, in the unlikely
+ * event of that race. So, in conclusion, I think that it's safe to
+ * perform this list-walk without the imp_lock held.
+ *
+ * But, the {mdc,osc}_replay_open callbacks both iterate
+ * request lists, and have comments saying they assume the
+ * imp_lock is being held by ptlrpc_replay, but it's not. it's
+ * just a little race...
+ */
+ list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_replay_list);
+
+ /* If need to resend the last sent transno (because a
+ reconnect has occurred), then stop on the matching
+ req and send it again. If, however, the last sent
+ transno has been committed then we continue replay
+ from the next request. */
+ if (req->rq_transno > last_transno) {
+ if (imp->imp_resend_replay)
+ lustre_msg_add_flags(req->rq_reqmsg,
+ MSG_RESENT);
+ break;
+ }
+ req = NULL;
+ }
+
+ spin_lock(&imp->imp_lock);
+ imp->imp_resend_replay = 0;
+ spin_unlock(&imp->imp_lock);
+
+ if (req != NULL) {
+ rc = ptlrpc_replay_req(req);
+ if (rc) {
+ CERROR("recovery replay error %d for req "
+ LPU64"\n", rc, req->rq_xid);
+ RETURN(rc);
+ }
+ *inflight = 1;
+ }
+ RETURN(rc);
+}
+
+/**
+ * Schedule resending of request on sending_list. This is done after
+ * we completed replaying of requests and locks.
+ */
+int ptlrpc_resend(struct obd_import *imp)
+{
+ struct ptlrpc_request *req, *next;
+
+ ENTRY;
+
+ /* As long as we're in recovery, nothing should be added to the sending
+ * list, so we don't need to hold the lock during this iteration and
+ * resend process.
+ */
+ /* Well... what if lctl recover is called twice at the same time?
+ */
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state != LUSTRE_IMP_RECOVER) {
+ spin_unlock(&imp->imp_lock);
+ RETURN(-1);
+ }
+
+ list_for_each_entry_safe(req, next, &imp->imp_sending_list,
+ rq_list) {
+ LASSERTF((long)req > PAGE_CACHE_SIZE && req != LP_POISON,
+ "req %p bad\n", req);
+ LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
+ if (!ptlrpc_no_resend(req))
+ ptlrpc_resend_req(req);
+ }
+ spin_unlock(&imp->imp_lock);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(ptlrpc_resend);
+
+/**
+ * Go through all requests in delayed list and wake their threads
+ * for resending
+ */
+void ptlrpc_wake_delayed(struct obd_import *imp)
+{
+ struct list_head *tmp, *pos;
+ struct ptlrpc_request *req;
+
+ spin_lock(&imp->imp_lock);
+ list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+
+ DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
+ ptlrpc_client_wake_req(req);
+ }
+ spin_unlock(&imp->imp_lock);
+}
+EXPORT_SYMBOL(ptlrpc_wake_delayed);
+
+void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
+{
+ struct obd_import *imp = failed_req->rq_import;
+ ENTRY;
+
+ CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
+ imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid);
+
+ if (ptlrpc_set_import_discon(imp,
+ lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
+ if (!imp->imp_replayable) {
+ CDEBUG(D_HA, "import %s@%s for %s not replayable, "
+ "auto-deactivating\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid,
+ imp->imp_obd->obd_name);
+ ptlrpc_deactivate_import(imp);
+ }
+ /* to control recovery via lctl {disable|enable}_recovery */
+ if (imp->imp_deactive == 0)
+ ptlrpc_connect_import(imp);
+ }
+
+ /* Wait for recovery to complete and resend. If evicted, then
+ this request will be errored out later.*/
+ spin_lock(&failed_req->rq_lock);
+ if (!failed_req->rq_no_resend)
+ failed_req->rq_resend = 1;
+ spin_unlock(&failed_req->rq_lock);
+
+ EXIT;
+}
+
+/**
+ * Administratively active/deactive a client.
+ * This should only be called by the ioctl interface, currently
+ * - the lctl deactivate and activate commands
+ * - echo 0/1 >> /proc/osc/XXX/active
+ * - client umount -f (ll_umount_begin)
+ */
+int ptlrpc_set_import_active(struct obd_import *imp, int active)
+{
+ struct obd_device *obd = imp->imp_obd;
+ int rc = 0;
+
+ ENTRY;
+ LASSERT(obd);
+
+ /* When deactivating, mark import invalid, and abort in-flight
+ * requests. */
+ if (!active) {
+ LCONSOLE_WARN("setting import %s INACTIVE by administrator "
+ "request\n", obd2cli_tgt(imp->imp_obd));
+
+ /* set before invalidate to avoid messages about imp_inval
+ * set without imp_deactive in ptlrpc_import_delay_req */
+ spin_lock(&imp->imp_lock);
+ imp->imp_deactive = 1;
+ spin_unlock(&imp->imp_lock);
+
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_DEACTIVATE);
+
+ ptlrpc_invalidate_import(imp);
+ }
+
+ /* When activating, mark import valid, and attempt recovery */
+ if (active) {
+ CDEBUG(D_HA, "setting import %s VALID\n",
+ obd2cli_tgt(imp->imp_obd));
+
+ spin_lock(&imp->imp_lock);
+ imp->imp_deactive = 0;
+ spin_unlock(&imp->imp_lock);
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_ACTIVATE);
+
+ rc = ptlrpc_recover_import(imp, NULL, 0);
+ }
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ptlrpc_set_import_active);
+
+/* Attempt to reconnect an import */
+int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
+{
+ int rc = 0;
+ ENTRY;
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state == LUSTRE_IMP_NEW || imp->imp_deactive ||
+ atomic_read(&imp->imp_inval_count))
+ rc = -EINVAL;
+ spin_unlock(&imp->imp_lock);
+ if (rc)
+ GOTO(out, rc);
+
+ /* force import to be disconnected. */
+ ptlrpc_set_import_discon(imp, 0);
+
+ if (new_uuid) {
+ struct obd_uuid uuid;
+
+ /* intruct import to use new uuid */
+ obd_str2uuid(&uuid, new_uuid);
+ rc = import_set_conn_priority(imp, &uuid);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ /* Check if reconnect is already in progress */
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state != LUSTRE_IMP_DISCON) {
+ imp->imp_force_verify = 1;
+ rc = -EALREADY;
+ }
+ spin_unlock(&imp->imp_lock);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = ptlrpc_connect_import(imp);
+ if (rc)
+ GOTO(out, rc);
+
+ if (!async) {
+ struct l_wait_info lwi;
+ int secs = cfs_time_seconds(obd_timeout);
+
+ CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
+ obd2cli_tgt(imp->imp_obd), secs);
+
+ lwi = LWI_TIMEOUT(secs, NULL, NULL);
+ rc = l_wait_event(imp->imp_recovery_waitq,
+ !ptlrpc_import_in_recovery(imp), &lwi);
+ CDEBUG(D_HA, "%s: recovery finished\n",
+ obd2cli_tgt(imp->imp_obd));
+ }
+ EXIT;
+
+out:
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_recover_import);
+
+int ptlrpc_import_in_recovery(struct obd_import *imp)
+{
+ int in_recovery = 1;
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state == LUSTRE_IMP_FULL ||
+ imp->imp_state == LUSTRE_IMP_CLOSED ||
+ imp->imp_state == LUSTRE_IMP_DISCON)
+ in_recovery = 0;
+ spin_unlock(&imp->imp_lock);
+ return in_recovery;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
new file mode 100644
index 000000000000..36e8bed5458a
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c
@@ -0,0 +1,2465 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/crypto.h>
+#include <linux/key.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_dlm.h>
+#include <lustre_sec.h>
+
+#include "ptlrpc_internal.h"
+
+/***********************************************
+ * policy registers *
+ ***********************************************/
+
+static rwlock_t policy_lock;
+static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = {
+ NULL,
+};
+
+int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy)
+{
+ __u16 number = policy->sp_policy;
+
+ LASSERT(policy->sp_name);
+ LASSERT(policy->sp_cops);
+ LASSERT(policy->sp_sops);
+
+ if (number >= SPTLRPC_POLICY_MAX)
+ return -EINVAL;
+
+ write_lock(&policy_lock);
+ if (unlikely(policies[number])) {
+ write_unlock(&policy_lock);
+ return -EALREADY;
+ }
+ policies[number] = policy;
+ write_unlock(&policy_lock);
+
+ CDEBUG(D_SEC, "%s: registered\n", policy->sp_name);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_register_policy);
+
+int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy)
+{
+ __u16 number = policy->sp_policy;
+
+ LASSERT(number < SPTLRPC_POLICY_MAX);
+
+ write_lock(&policy_lock);
+ if (unlikely(policies[number] == NULL)) {
+ write_unlock(&policy_lock);
+ CERROR("%s: already unregistered\n", policy->sp_name);
+ return -EINVAL;
+ }
+
+ LASSERT(policies[number] == policy);
+ policies[number] = NULL;
+ write_unlock(&policy_lock);
+
+ CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_unregister_policy);
+
+static
+struct ptlrpc_sec_policy * sptlrpc_wireflavor2policy(__u32 flavor)
+{
+ static DEFINE_MUTEX(load_mutex);
+ static atomic_t loaded = ATOMIC_INIT(0);
+ struct ptlrpc_sec_policy *policy;
+ __u16 number = SPTLRPC_FLVR_POLICY(flavor);
+ __u16 flag = 0;
+
+ if (number >= SPTLRPC_POLICY_MAX)
+ return NULL;
+
+ while (1) {
+ read_lock(&policy_lock);
+ policy = policies[number];
+ if (policy && !try_module_get(policy->sp_owner))
+ policy = NULL;
+ if (policy == NULL)
+ flag = atomic_read(&loaded);
+ read_unlock(&policy_lock);
+
+ if (policy != NULL || flag != 0 ||
+ number != SPTLRPC_POLICY_GSS)
+ break;
+
+ /* try to load gss module, once */
+ mutex_lock(&load_mutex);
+ if (atomic_read(&loaded) == 0) {
+ if (request_module("ptlrpc_gss") == 0)
+ CDEBUG(D_SEC,
+ "module ptlrpc_gss loaded on demand\n");
+ else
+ CERROR("Unable to load module ptlrpc_gss\n");
+
+ atomic_set(&loaded, 1);
+ }
+ mutex_unlock(&load_mutex);
+ }
+
+ return policy;
+}
+
+__u32 sptlrpc_name2flavor_base(const char *name)
+{
+ if (!strcmp(name, "null"))
+ return SPTLRPC_FLVR_NULL;
+ if (!strcmp(name, "plain"))
+ return SPTLRPC_FLVR_PLAIN;
+ if (!strcmp(name, "krb5n"))
+ return SPTLRPC_FLVR_KRB5N;
+ if (!strcmp(name, "krb5a"))
+ return SPTLRPC_FLVR_KRB5A;
+ if (!strcmp(name, "krb5i"))
+ return SPTLRPC_FLVR_KRB5I;
+ if (!strcmp(name, "krb5p"))
+ return SPTLRPC_FLVR_KRB5P;
+
+ return SPTLRPC_FLVR_INVALID;
+}
+EXPORT_SYMBOL(sptlrpc_name2flavor_base);
+
+const char *sptlrpc_flavor2name_base(__u32 flvr)
+{
+ __u32 base = SPTLRPC_FLVR_BASE(flvr);
+
+ if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL))
+ return "null";
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN))
+ return "plain";
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5N))
+ return "krb5n";
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5A))
+ return "krb5a";
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5I))
+ return "krb5i";
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5P))
+ return "krb5p";
+
+ CERROR("invalid wire flavor 0x%x\n", flvr);
+ return "invalid";
+}
+EXPORT_SYMBOL(sptlrpc_flavor2name_base);
+
+char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf,
+ char *buf, int bufsize)
+{
+ if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN)
+ snprintf(buf, bufsize, "hash:%s",
+ sptlrpc_get_hash_name(sf->u_bulk.hash.hash_alg));
+ else
+ snprintf(buf, bufsize, "%s",
+ sptlrpc_flavor2name_base(sf->sf_rpc));
+
+ buf[bufsize - 1] = '\0';
+ return buf;
+}
+EXPORT_SYMBOL(sptlrpc_flavor2name_bulk);
+
+char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize)
+{
+ snprintf(buf, bufsize, "%s", sptlrpc_flavor2name_base(sf->sf_rpc));
+
+ /*
+ * currently we don't support customized bulk specification for
+ * flavors other than plain
+ */
+ if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) {
+ char bspec[16];
+
+ bspec[0] = '-';
+ sptlrpc_flavor2name_bulk(sf, &bspec[1], sizeof(bspec) - 1);
+ strncat(buf, bspec, bufsize);
+ }
+
+ buf[bufsize - 1] = '\0';
+ return buf;
+}
+EXPORT_SYMBOL(sptlrpc_flavor2name);
+
+char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize)
+{
+ buf[0] = '\0';
+
+ if (flags & PTLRPC_SEC_FL_REVERSE)
+ strlcat(buf, "reverse,", bufsize);
+ if (flags & PTLRPC_SEC_FL_ROOTONLY)
+ strlcat(buf, "rootonly,", bufsize);
+ if (flags & PTLRPC_SEC_FL_UDESC)
+ strlcat(buf, "udesc,", bufsize);
+ if (flags & PTLRPC_SEC_FL_BULK)
+ strlcat(buf, "bulk,", bufsize);
+ if (buf[0] == '\0')
+ strlcat(buf, "-,", bufsize);
+
+ return buf;
+}
+EXPORT_SYMBOL(sptlrpc_secflags2str);
+
+/**************************************************
+ * client context APIs *
+ **************************************************/
+
+static
+struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec)
+{
+ struct vfs_cred vcred;
+ int create = 1, remove_dead = 1;
+
+ LASSERT(sec);
+ LASSERT(sec->ps_policy->sp_cops->lookup_ctx);
+
+ if (sec->ps_flvr.sf_flags & (PTLRPC_SEC_FL_REVERSE |
+ PTLRPC_SEC_FL_ROOTONLY)) {
+ vcred.vc_uid = 0;
+ vcred.vc_gid = 0;
+ if (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE) {
+ create = 0;
+ remove_dead = 0;
+ }
+ } else {
+ vcred.vc_uid = current_uid();
+ vcred.vc_gid = current_gid();
+ }
+
+ return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred,
+ create, remove_dead);
+}
+
+struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx)
+{
+ atomic_inc(&ctx->cc_refcount);
+ return ctx;
+}
+EXPORT_SYMBOL(sptlrpc_cli_ctx_get);
+
+void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
+{
+ struct ptlrpc_sec *sec = ctx->cc_sec;
+
+ LASSERT(sec);
+ LASSERT_ATOMIC_POS(&ctx->cc_refcount);
+
+ if (!atomic_dec_and_test(&ctx->cc_refcount))
+ return;
+
+ sec->ps_policy->sp_cops->release_ctx(sec, ctx, sync);
+}
+EXPORT_SYMBOL(sptlrpc_cli_ctx_put);
+
+/**
+ * Expire the client context immediately.
+ *
+ * \pre Caller must hold at least 1 reference on the \a ctx.
+ */
+void sptlrpc_cli_ctx_expire(struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(ctx->cc_ops->die);
+ ctx->cc_ops->die(ctx, 0);
+}
+EXPORT_SYMBOL(sptlrpc_cli_ctx_expire);
+
+/**
+ * To wake up the threads who are waiting for this client context. Called
+ * after some status change happened on \a ctx.
+ */
+void sptlrpc_cli_ctx_wakeup(struct ptlrpc_cli_ctx *ctx)
+{
+ struct ptlrpc_request *req, *next;
+
+ spin_lock(&ctx->cc_lock);
+ list_for_each_entry_safe(req, next, &ctx->cc_req_list,
+ rq_ctx_chain) {
+ list_del_init(&req->rq_ctx_chain);
+ ptlrpc_client_wake_req(req);
+ }
+ spin_unlock(&ctx->cc_lock);
+}
+EXPORT_SYMBOL(sptlrpc_cli_ctx_wakeup);
+
+int sptlrpc_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize)
+{
+ LASSERT(ctx->cc_ops);
+
+ if (ctx->cc_ops->display == NULL)
+ return 0;
+
+ return ctx->cc_ops->display(ctx, buf, bufsize);
+}
+
+static int import_sec_check_expire(struct obd_import *imp)
+{
+ int adapt = 0;
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_sec_expire &&
+ imp->imp_sec_expire < cfs_time_current_sec()) {
+ adapt = 1;
+ imp->imp_sec_expire = 0;
+ }
+ spin_unlock(&imp->imp_lock);
+
+ if (!adapt)
+ return 0;
+
+ CDEBUG(D_SEC, "found delayed sec adapt expired, do it now\n");
+ return sptlrpc_import_sec_adapt(imp, NULL, 0);
+}
+
+static int import_sec_validate_get(struct obd_import *imp,
+ struct ptlrpc_sec **sec)
+{
+ int rc;
+
+ if (unlikely(imp->imp_sec_expire)) {
+ rc = import_sec_check_expire(imp);
+ if (rc)
+ return rc;
+ }
+
+ *sec = sptlrpc_import_sec_ref(imp);
+ if (*sec == NULL) {
+ CERROR("import %p (%s) with no sec\n",
+ imp, ptlrpc_import_state_name(imp->imp_state));
+ return -EACCES;
+ }
+
+ if (unlikely((*sec)->ps_dying)) {
+ CERROR("attempt to use dying sec %p\n", sec);
+ sptlrpc_sec_put(*sec);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+/**
+ * Given a \a req, find or allocate a appropriate context for it.
+ * \pre req->rq_cli_ctx == NULL.
+ *
+ * \retval 0 succeed, and req->rq_cli_ctx is set.
+ * \retval -ev error number, and req->rq_cli_ctx == NULL.
+ */
+int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
+{
+ struct obd_import *imp = req->rq_import;
+ struct ptlrpc_sec *sec;
+ int rc;
+ ENTRY;
+
+ LASSERT(!req->rq_cli_ctx);
+ LASSERT(imp);
+
+ rc = import_sec_validate_get(imp, &sec);
+ if (rc)
+ RETURN(rc);
+
+ req->rq_cli_ctx = get_my_ctx(sec);
+
+ sptlrpc_sec_put(sec);
+
+ if (!req->rq_cli_ctx) {
+ CERROR("req %p: fail to get context\n", req);
+ RETURN(-ENOMEM);
+ }
+
+ RETURN(0);
+}
+
+/**
+ * Drop the context for \a req.
+ * \pre req->rq_cli_ctx != NULL.
+ * \post req->rq_cli_ctx == NULL.
+ *
+ * If \a sync == 0, this function should return quickly without sleep;
+ * otherwise it might trigger and wait for the whole process of sending
+ * an context-destroying rpc to server.
+ */
+void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync)
+{
+ ENTRY;
+
+ LASSERT(req);
+ LASSERT(req->rq_cli_ctx);
+
+ /* request might be asked to release earlier while still
+ * in the context waiting list.
+ */
+ if (!list_empty(&req->rq_ctx_chain)) {
+ spin_lock(&req->rq_cli_ctx->cc_lock);
+ list_del_init(&req->rq_ctx_chain);
+ spin_unlock(&req->rq_cli_ctx->cc_lock);
+ }
+
+ sptlrpc_cli_ctx_put(req->rq_cli_ctx, sync);
+ req->rq_cli_ctx = NULL;
+ EXIT;
+}
+
+static
+int sptlrpc_req_ctx_switch(struct ptlrpc_request *req,
+ struct ptlrpc_cli_ctx *oldctx,
+ struct ptlrpc_cli_ctx *newctx)
+{
+ struct sptlrpc_flavor old_flvr;
+ char *reqmsg = NULL; /* to workaround old gcc */
+ int reqmsg_size;
+ int rc = 0;
+
+ LASSERT(req->rq_reqmsg);
+ LASSERT(req->rq_reqlen);
+ LASSERT(req->rq_replen);
+
+ CDEBUG(D_SEC, "req %p: switch ctx %p(%u->%s) -> %p(%u->%s), "
+ "switch sec %p(%s) -> %p(%s)\n", req,
+ oldctx, oldctx->cc_vcred.vc_uid, sec2target_str(oldctx->cc_sec),
+ newctx, newctx->cc_vcred.vc_uid, sec2target_str(newctx->cc_sec),
+ oldctx->cc_sec, oldctx->cc_sec->ps_policy->sp_name,
+ newctx->cc_sec, newctx->cc_sec->ps_policy->sp_name);
+
+ /* save flavor */
+ old_flvr = req->rq_flvr;
+
+ /* save request message */
+ reqmsg_size = req->rq_reqlen;
+ if (reqmsg_size != 0) {
+ OBD_ALLOC_LARGE(reqmsg, reqmsg_size);
+ if (reqmsg == NULL)
+ return -ENOMEM;
+ memcpy(reqmsg, req->rq_reqmsg, reqmsg_size);
+ }
+
+ /* release old req/rep buf */
+ req->rq_cli_ctx = oldctx;
+ sptlrpc_cli_free_reqbuf(req);
+ sptlrpc_cli_free_repbuf(req);
+ req->rq_cli_ctx = newctx;
+
+ /* recalculate the flavor */
+ sptlrpc_req_set_flavor(req, 0);
+
+ /* alloc new request buffer
+ * we don't need to alloc reply buffer here, leave it to the
+ * rest procedure of ptlrpc */
+ if (reqmsg_size != 0) {
+ rc = sptlrpc_cli_alloc_reqbuf(req, reqmsg_size);
+ if (!rc) {
+ LASSERT(req->rq_reqmsg);
+ memcpy(req->rq_reqmsg, reqmsg, reqmsg_size);
+ } else {
+ CWARN("failed to alloc reqbuf: %d\n", rc);
+ req->rq_flvr = old_flvr;
+ }
+
+ OBD_FREE_LARGE(reqmsg, reqmsg_size);
+ }
+ return rc;
+}
+
+/**
+ * If current context of \a req is dead somehow, e.g. we just switched flavor
+ * thus marked original contexts dead, we'll find a new context for it. if
+ * no switch is needed, \a req will end up with the same context.
+ *
+ * \note a request must have a context, to keep other parts of code happy.
+ * In any case of failure during the switching, we must restore the old one.
+ */
+int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *oldctx = req->rq_cli_ctx;
+ struct ptlrpc_cli_ctx *newctx;
+ int rc;
+ ENTRY;
+
+ LASSERT(oldctx);
+
+ sptlrpc_cli_ctx_get(oldctx);
+ sptlrpc_req_put_ctx(req, 0);
+
+ rc = sptlrpc_req_get_ctx(req);
+ if (unlikely(rc)) {
+ LASSERT(!req->rq_cli_ctx);
+
+ /* restore old ctx */
+ req->rq_cli_ctx = oldctx;
+ RETURN(rc);
+ }
+
+ newctx = req->rq_cli_ctx;
+ LASSERT(newctx);
+
+ if (unlikely(newctx == oldctx &&
+ test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags))) {
+ /*
+ * still get the old dead ctx, usually means system too busy
+ */
+ CDEBUG(D_SEC,
+ "ctx (%p, fl %lx) doesn't switch, relax a little bit\n",
+ newctx, newctx->cc_flags);
+
+ schedule_timeout_and_set_state(TASK_INTERRUPTIBLE,
+ HZ);
+ } else {
+ /*
+ * it's possible newctx == oldctx if we're switching
+ * subflavor with the same sec.
+ */
+ rc = sptlrpc_req_ctx_switch(req, oldctx, newctx);
+ if (rc) {
+ /* restore old ctx */
+ sptlrpc_req_put_ctx(req, 0);
+ req->rq_cli_ctx = oldctx;
+ RETURN(rc);
+ }
+
+ LASSERT(req->rq_cli_ctx == newctx);
+ }
+
+ sptlrpc_cli_ctx_put(oldctx, 1);
+ RETURN(0);
+}
+EXPORT_SYMBOL(sptlrpc_req_replace_dead_ctx);
+
+static
+int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+ if (cli_ctx_is_refreshed(ctx))
+ return 1;
+ return 0;
+}
+
+static
+int ctx_refresh_timeout(void *data)
+{
+ struct ptlrpc_request *req = data;
+ int rc;
+
+ /* conn_cnt is needed in expire_one_request */
+ lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt);
+
+ rc = ptlrpc_expire_one_request(req, 1);
+ /* if we started recovery, we should mark this ctx dead; otherwise
+ * in case of lgssd died nobody would retire this ctx, following
+ * connecting will still find the same ctx thus cause deadlock.
+ * there's an assumption that expire time of the request should be
+ * later than the context refresh expire time.
+ */
+ if (rc == 0)
+ req->rq_cli_ctx->cc_ops->die(req->rq_cli_ctx, 0);
+ return rc;
+}
+
+static
+void ctx_refresh_interrupt(void *data)
+{
+ struct ptlrpc_request *req = data;
+
+ spin_lock(&req->rq_lock);
+ req->rq_intr = 1;
+ spin_unlock(&req->rq_lock);
+}
+
+static
+void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx)
+{
+ spin_lock(&ctx->cc_lock);
+ if (!list_empty(&req->rq_ctx_chain))
+ list_del_init(&req->rq_ctx_chain);
+ spin_unlock(&ctx->cc_lock);
+}
+
+/**
+ * To refresh the context of \req, if it's not up-to-date.
+ * \param timeout
+ * - < 0: don't wait
+ * - = 0: wait until success or fatal error occur
+ * - > 0: timeout value (in seconds)
+ *
+ * The status of the context could be subject to be changed by other threads
+ * at any time. We allow this race, but once we return with 0, the caller will
+ * suppose it's uptodated and keep using it until the owning rpc is done.
+ *
+ * \retval 0 only if the context is uptodated.
+ * \retval -ev error number.
+ */
+int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec *sec;
+ struct l_wait_info lwi;
+ int rc;
+ ENTRY;
+
+ LASSERT(ctx);
+
+ if (req->rq_ctx_init || req->rq_ctx_fini)
+ RETURN(0);
+
+ /*
+ * during the process a request's context might change type even
+ * (e.g. from gss ctx to null ctx), so each loop we need to re-check
+ * everything
+ */
+again:
+ rc = import_sec_validate_get(req->rq_import, &sec);
+ if (rc)
+ RETURN(rc);
+
+ if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc) {
+ CDEBUG(D_SEC, "req %p: flavor has changed %x -> %x\n",
+ req, req->rq_flvr.sf_rpc, sec->ps_flvr.sf_rpc);
+ req_off_ctx_list(req, ctx);
+ sptlrpc_req_replace_dead_ctx(req);
+ ctx = req->rq_cli_ctx;
+ }
+ sptlrpc_sec_put(sec);
+
+ if (cli_ctx_is_eternal(ctx))
+ RETURN(0);
+
+ if (unlikely(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags))) {
+ LASSERT(ctx->cc_ops->refresh);
+ ctx->cc_ops->refresh(ctx);
+ }
+ LASSERT(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags) == 0);
+
+ LASSERT(ctx->cc_ops->validate);
+ if (ctx->cc_ops->validate(ctx) == 0) {
+ req_off_ctx_list(req, ctx);
+ RETURN(0);
+ }
+
+ if (unlikely(test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags))) {
+ spin_lock(&req->rq_lock);
+ req->rq_err = 1;
+ spin_unlock(&req->rq_lock);
+ req_off_ctx_list(req, ctx);
+ RETURN(-EPERM);
+ }
+
+ /*
+ * There's a subtle issue for resending RPCs, suppose following
+ * situation:
+ * 1. the request was sent to server.
+ * 2. recovery was kicked start, after finished the request was
+ * marked as resent.
+ * 3. resend the request.
+ * 4. old reply from server received, we accept and verify the reply.
+ * this has to be success, otherwise the error will be aware
+ * by application.
+ * 5. new reply from server received, dropped by LNet.
+ *
+ * Note the xid of old & new request is the same. We can't simply
+ * change xid for the resent request because the server replies on
+ * it for reply reconstruction.
+ *
+ * Commonly the original context should be uptodate because we
+ * have a expiry nice time; server will keep its context because
+ * we at least hold a ref of old context which prevent context
+ * destroying RPC being sent. So server still can accept the request
+ * and finish the RPC. But if that's not the case:
+ * 1. If server side context has been trimmed, a NO_CONTEXT will
+ * be returned, gss_cli_ctx_verify/unseal will switch to new
+ * context by force.
+ * 2. Current context never be refreshed, then we are fine: we
+ * never really send request with old context before.
+ */
+ if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) &&
+ unlikely(req->rq_reqmsg) &&
+ lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+ req_off_ctx_list(req, ctx);
+ RETURN(0);
+ }
+
+ if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) {
+ req_off_ctx_list(req, ctx);
+ /*
+ * don't switch ctx if import was deactivated
+ */
+ if (req->rq_import->imp_deactive) {
+ spin_lock(&req->rq_lock);
+ req->rq_err = 1;
+ spin_unlock(&req->rq_lock);
+ RETURN(-EINTR);
+ }
+
+ rc = sptlrpc_req_replace_dead_ctx(req);
+ if (rc) {
+ LASSERT(ctx == req->rq_cli_ctx);
+ CERROR("req %p: failed to replace dead ctx %p: %d\n",
+ req, ctx, rc);
+ spin_lock(&req->rq_lock);
+ req->rq_err = 1;
+ spin_unlock(&req->rq_lock);
+ RETURN(rc);
+ }
+
+ ctx = req->rq_cli_ctx;
+ goto again;
+ }
+
+ /*
+ * Now we're sure this context is during upcall, add myself into
+ * waiting list
+ */
+ spin_lock(&ctx->cc_lock);
+ if (list_empty(&req->rq_ctx_chain))
+ list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
+ spin_unlock(&ctx->cc_lock);
+
+ if (timeout < 0)
+ RETURN(-EWOULDBLOCK);
+
+ /* Clear any flags that may be present from previous sends */
+ LASSERT(req->rq_receiving_reply == 0);
+ spin_lock(&req->rq_lock);
+ req->rq_err = 0;
+ req->rq_timedout = 0;
+ req->rq_resend = 0;
+ req->rq_restart = 0;
+ spin_unlock(&req->rq_lock);
+
+ lwi = LWI_TIMEOUT_INTR(timeout * HZ, ctx_refresh_timeout,
+ ctx_refresh_interrupt, req);
+ rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
+
+ /*
+ * following cases could lead us here:
+ * - successfully refreshed;
+ * - interrupted;
+ * - timedout, and we don't want recover from the failure;
+ * - timedout, and waked up upon recovery finished;
+ * - someone else mark this ctx dead by force;
+ * - someone invalidate the req and call ptlrpc_client_wake_req(),
+ * e.g. ptlrpc_abort_inflight();
+ */
+ if (!cli_ctx_is_refreshed(ctx)) {
+ /* timed out or interruptted */
+ req_off_ctx_list(req, ctx);
+
+ LASSERT(rc != 0);
+ RETURN(rc);
+ }
+
+ goto again;
+}
+
+/**
+ * Initialize flavor settings for \a req, according to \a opcode.
+ *
+ * \note this could be called in two situations:
+ * - new request from ptlrpc_pre_req(), with proper @opcode
+ * - old request which changed ctx in the middle, with @opcode == 0
+ */
+void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
+{
+ struct ptlrpc_sec *sec;
+
+ LASSERT(req->rq_import);
+ LASSERT(req->rq_cli_ctx);
+ LASSERT(req->rq_cli_ctx->cc_sec);
+ LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0);
+
+ /* special security flags accoding to opcode */
+ switch (opcode) {
+ case OST_READ:
+ case MDS_READPAGE:
+ case MGS_CONFIG_READ:
+ case OBD_IDX_READ:
+ req->rq_bulk_read = 1;
+ break;
+ case OST_WRITE:
+ case MDS_WRITEPAGE:
+ req->rq_bulk_write = 1;
+ break;
+ case SEC_CTX_INIT:
+ req->rq_ctx_init = 1;
+ break;
+ case SEC_CTX_FINI:
+ req->rq_ctx_fini = 1;
+ break;
+ case 0:
+ /* init/fini rpc won't be resend, so can't be here */
+ LASSERT(req->rq_ctx_init == 0);
+ LASSERT(req->rq_ctx_fini == 0);
+
+ /* cleanup flags, which should be recalculated */
+ req->rq_pack_udesc = 0;
+ req->rq_pack_bulk = 0;
+ break;
+ }
+
+ sec = req->rq_cli_ctx->cc_sec;
+
+ spin_lock(&sec->ps_lock);
+ req->rq_flvr = sec->ps_flvr;
+ spin_unlock(&sec->ps_lock);
+
+ /* force SVC_NULL for context initiation rpc, SVC_INTG for context
+ * destruction rpc */
+ if (unlikely(req->rq_ctx_init))
+ flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL);
+ else if (unlikely(req->rq_ctx_fini))
+ flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG);
+
+ /* user descriptor flag, null security can't do it anyway */
+ if ((sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC) &&
+ (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL))
+ req->rq_pack_udesc = 1;
+
+ /* bulk security flag */
+ if ((req->rq_bulk_read || req->rq_bulk_write) &&
+ sptlrpc_flavor_has_bulk(&req->rq_flvr))
+ req->rq_pack_bulk = 1;
+}
+
+void sptlrpc_request_out_callback(struct ptlrpc_request *req)
+{
+ if (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV)
+ return;
+
+ LASSERT(req->rq_clrbuf);
+ if (req->rq_pool || !req->rq_reqbuf)
+ return;
+
+ OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = NULL;
+ req->rq_reqbuf_len = 0;
+}
+
+/**
+ * Given an import \a imp, check whether current user has a valid context
+ * or not. We may create a new context and try to refresh it, and try
+ * repeatedly try in case of non-fatal errors. Return 0 means success.
+ */
+int sptlrpc_import_check_ctx(struct obd_import *imp)
+{
+ struct ptlrpc_sec *sec;
+ struct ptlrpc_cli_ctx *ctx;
+ struct ptlrpc_request *req = NULL;
+ int rc;
+ ENTRY;
+
+ might_sleep();
+
+ sec = sptlrpc_import_sec_ref(imp);
+ ctx = get_my_ctx(sec);
+ sptlrpc_sec_put(sec);
+
+ if (!ctx)
+ RETURN(-ENOMEM);
+
+ if (cli_ctx_is_eternal(ctx) ||
+ ctx->cc_ops->validate(ctx) == 0) {
+ sptlrpc_cli_ctx_put(ctx, 1);
+ RETURN(0);
+ }
+
+ if (cli_ctx_is_error(ctx)) {
+ sptlrpc_cli_ctx_put(ctx, 1);
+ RETURN(-EACCES);
+ }
+
+ OBD_ALLOC_PTR(req);
+ if (!req)
+ RETURN(-ENOMEM);
+
+ spin_lock_init(&req->rq_lock);
+ atomic_set(&req->rq_refcount, 10000);
+ INIT_LIST_HEAD(&req->rq_ctx_chain);
+ init_waitqueue_head(&req->rq_reply_waitq);
+ init_waitqueue_head(&req->rq_set_waitq);
+ req->rq_import = imp;
+ req->rq_flvr = sec->ps_flvr;
+ req->rq_cli_ctx = ctx;
+
+ rc = sptlrpc_req_refresh_ctx(req, 0);
+ LASSERT(list_empty(&req->rq_ctx_chain));
+ sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1);
+ OBD_FREE_PTR(req);
+
+ RETURN(rc);
+}
+
+/**
+ * Used by ptlrpc client, to perform the pre-defined security transformation
+ * upon the request message of \a req. After this function called,
+ * req->rq_reqmsg is still accessible as clear text.
+ */
+int sptlrpc_cli_wrap_request(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(req->rq_reqbuf || req->rq_clrbuf);
+
+ /* we wrap bulk request here because now we can be sure
+ * the context is uptodate.
+ */
+ if (req->rq_bulk) {
+ rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk);
+ if (rc)
+ RETURN(rc);
+ }
+
+ switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+ case SPTLRPC_SVC_NULL:
+ case SPTLRPC_SVC_AUTH:
+ case SPTLRPC_SVC_INTG:
+ LASSERT(ctx->cc_ops->sign);
+ rc = ctx->cc_ops->sign(ctx, req);
+ break;
+ case SPTLRPC_SVC_PRIV:
+ LASSERT(ctx->cc_ops->seal);
+ rc = ctx->cc_ops->seal(ctx, req);
+ break;
+ default:
+ LBUG();
+ }
+
+ if (rc == 0) {
+ LASSERT(req->rq_reqdata_len);
+ LASSERT(req->rq_reqdata_len % 8 == 0);
+ LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len);
+ }
+
+ RETURN(rc);
+}
+
+static int do_cli_unwrap_reply(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ int rc;
+ ENTRY;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(req->rq_repbuf);
+ LASSERT(req->rq_repdata);
+ LASSERT(req->rq_repmsg == NULL);
+
+ req->rq_rep_swab_mask = 0;
+
+ rc = __lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len);
+ switch (rc) {
+ case 1:
+ lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+ case 0:
+ break;
+ default:
+ CERROR("failed unpack reply: x"LPU64"\n", req->rq_xid);
+ RETURN(-EPROTO);
+ }
+
+ if (req->rq_repdata_len < sizeof(struct lustre_msg)) {
+ CERROR("replied data length %d too small\n",
+ req->rq_repdata_len);
+ RETURN(-EPROTO);
+ }
+
+ if (SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr) !=
+ SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
+ CERROR("reply policy %u doesn't match request policy %u\n",
+ SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr),
+ SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc));
+ RETURN(-EPROTO);
+ }
+
+ switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+ case SPTLRPC_SVC_NULL:
+ case SPTLRPC_SVC_AUTH:
+ case SPTLRPC_SVC_INTG:
+ LASSERT(ctx->cc_ops->verify);
+ rc = ctx->cc_ops->verify(ctx, req);
+ break;
+ case SPTLRPC_SVC_PRIV:
+ LASSERT(ctx->cc_ops->unseal);
+ rc = ctx->cc_ops->unseal(ctx, req);
+ break;
+ default:
+ LBUG();
+ }
+ LASSERT(rc || req->rq_repmsg || req->rq_resend);
+
+ if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL &&
+ !req->rq_ctx_init)
+ req->rq_rep_swab_mask = 0;
+ RETURN(rc);
+}
+
+/**
+ * Used by ptlrpc client, to perform security transformation upon the reply
+ * message of \a req. After return successfully, req->rq_repmsg points to
+ * the reply message in clear text.
+ *
+ * \pre the reply buffer should have been un-posted from LNet, so nothing is
+ * going to change.
+ */
+int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
+{
+ LASSERT(req->rq_repbuf);
+ LASSERT(req->rq_repdata == NULL);
+ LASSERT(req->rq_repmsg == NULL);
+ LASSERT(req->rq_reply_off + req->rq_nob_received <= req->rq_repbuf_len);
+
+ if (req->rq_reply_off == 0 &&
+ (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
+ CERROR("real reply with offset 0\n");
+ return -EPROTO;
+ }
+
+ if (req->rq_reply_off % 8 != 0) {
+ CERROR("reply at odd offset %u\n", req->rq_reply_off);
+ return -EPROTO;
+ }
+
+ req->rq_repdata = (struct lustre_msg *)
+ (req->rq_repbuf + req->rq_reply_off);
+ req->rq_repdata_len = req->rq_nob_received;
+
+ return do_cli_unwrap_reply(req);
+}
+
+/**
+ * Used by ptlrpc client, to perform security transformation upon the early
+ * reply message of \a req. We expect the rq_reply_off is 0, and
+ * rq_nob_received is the early reply size.
+ *
+ * Because the receive buffer might be still posted, the reply data might be
+ * changed at any time, no matter we're holding rq_lock or not. For this reason
+ * we allocate a separate ptlrpc_request and reply buffer for early reply
+ * processing.
+ *
+ * \retval 0 success, \a req_ret is filled with a duplicated ptlrpc_request.
+ * Later the caller must call sptlrpc_cli_finish_early_reply() on the returned
+ * \a *req_ret to release it.
+ * \retval -ev error number, and \a req_ret will not be set.
+ */
+int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
+ struct ptlrpc_request **req_ret)
+{
+ struct ptlrpc_request *early_req;
+ char *early_buf;
+ int early_bufsz, early_size;
+ int rc;
+ ENTRY;
+
+ OBD_ALLOC_PTR(early_req);
+ if (early_req == NULL)
+ RETURN(-ENOMEM);
+
+ early_size = req->rq_nob_received;
+ early_bufsz = size_roundup_power2(early_size);
+ OBD_ALLOC_LARGE(early_buf, early_bufsz);
+ if (early_buf == NULL)
+ GOTO(err_req, rc = -ENOMEM);
+
+ /* sanity checkings and copy data out, do it inside spinlock */
+ spin_lock(&req->rq_lock);
+
+ if (req->rq_replied) {
+ spin_unlock(&req->rq_lock);
+ GOTO(err_buf, rc = -EALREADY);
+ }
+
+ LASSERT(req->rq_repbuf);
+ LASSERT(req->rq_repdata == NULL);
+ LASSERT(req->rq_repmsg == NULL);
+
+ if (req->rq_reply_off != 0) {
+ CERROR("early reply with offset %u\n", req->rq_reply_off);
+ spin_unlock(&req->rq_lock);
+ GOTO(err_buf, rc = -EPROTO);
+ }
+
+ if (req->rq_nob_received != early_size) {
+ /* even another early arrived the size should be the same */
+ CERROR("data size has changed from %u to %u\n",
+ early_size, req->rq_nob_received);
+ spin_unlock(&req->rq_lock);
+ GOTO(err_buf, rc = -EINVAL);
+ }
+
+ if (req->rq_nob_received < sizeof(struct lustre_msg)) {
+ CERROR("early reply length %d too small\n",
+ req->rq_nob_received);
+ spin_unlock(&req->rq_lock);
+ GOTO(err_buf, rc = -EALREADY);
+ }
+
+ memcpy(early_buf, req->rq_repbuf, early_size);
+ spin_unlock(&req->rq_lock);
+
+ spin_lock_init(&early_req->rq_lock);
+ early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
+ early_req->rq_flvr = req->rq_flvr;
+ early_req->rq_repbuf = early_buf;
+ early_req->rq_repbuf_len = early_bufsz;
+ early_req->rq_repdata = (struct lustre_msg *) early_buf;
+ early_req->rq_repdata_len = early_size;
+ early_req->rq_early = 1;
+ early_req->rq_reqmsg = req->rq_reqmsg;
+
+ rc = do_cli_unwrap_reply(early_req);
+ if (rc) {
+ DEBUG_REQ(D_ADAPTTO, early_req,
+ "error %d unwrap early reply", rc);
+ GOTO(err_ctx, rc);
+ }
+
+ LASSERT(early_req->rq_repmsg);
+ *req_ret = early_req;
+ RETURN(0);
+
+err_ctx:
+ sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
+err_buf:
+ OBD_FREE_LARGE(early_buf, early_bufsz);
+err_req:
+ OBD_FREE_PTR(early_req);
+ RETURN(rc);
+}
+
+/**
+ * Used by ptlrpc client, to release a processed early reply \a early_req.
+ *
+ * \pre \a early_req was obtained from calling sptlrpc_cli_unwrap_early_reply().
+ */
+void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req)
+{
+ LASSERT(early_req->rq_repbuf);
+ LASSERT(early_req->rq_repdata);
+ LASSERT(early_req->rq_repmsg);
+
+ sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
+ OBD_FREE_LARGE(early_req->rq_repbuf, early_req->rq_repbuf_len);
+ OBD_FREE_PTR(early_req);
+}
+
+/**************************************************
+ * sec ID *
+ **************************************************/
+
+/*
+ * "fixed" sec (e.g. null) use sec_id < 0
+ */
+static atomic_t sptlrpc_sec_id = ATOMIC_INIT(1);
+
+int sptlrpc_get_next_secid(void)
+{
+ return atomic_inc_return(&sptlrpc_sec_id);
+}
+EXPORT_SYMBOL(sptlrpc_get_next_secid);
+
+/**************************************************
+ * client side high-level security APIs *
+ **************************************************/
+
+static int sec_cop_flush_ctx_cache(struct ptlrpc_sec *sec, uid_t uid,
+ int grace, int force)
+{
+ struct ptlrpc_sec_policy *policy = sec->ps_policy;
+
+ LASSERT(policy->sp_cops);
+ LASSERT(policy->sp_cops->flush_ctx_cache);
+
+ return policy->sp_cops->flush_ctx_cache(sec, uid, grace, force);
+}
+
+static void sec_cop_destroy_sec(struct ptlrpc_sec *sec)
+{
+ struct ptlrpc_sec_policy *policy = sec->ps_policy;
+
+ LASSERT_ATOMIC_ZERO(&sec->ps_refcount);
+ LASSERT_ATOMIC_ZERO(&sec->ps_nctx);
+ LASSERT(policy->sp_cops->destroy_sec);
+
+ CDEBUG(D_SEC, "%s@%p: being destroied\n", sec->ps_policy->sp_name, sec);
+
+ policy->sp_cops->destroy_sec(sec);
+ sptlrpc_policy_put(policy);
+}
+
+void sptlrpc_sec_destroy(struct ptlrpc_sec *sec)
+{
+ sec_cop_destroy_sec(sec);
+}
+EXPORT_SYMBOL(sptlrpc_sec_destroy);
+
+static void sptlrpc_sec_kill(struct ptlrpc_sec *sec)
+{
+ LASSERT_ATOMIC_POS(&sec->ps_refcount);
+
+ if (sec->ps_policy->sp_cops->kill_sec) {
+ sec->ps_policy->sp_cops->kill_sec(sec);
+
+ sec_cop_flush_ctx_cache(sec, -1, 1, 1);
+ }
+}
+
+struct ptlrpc_sec *sptlrpc_sec_get(struct ptlrpc_sec *sec)
+{
+ if (sec)
+ atomic_inc(&sec->ps_refcount);
+
+ return sec;
+}
+EXPORT_SYMBOL(sptlrpc_sec_get);
+
+void sptlrpc_sec_put(struct ptlrpc_sec *sec)
+{
+ if (sec) {
+ LASSERT_ATOMIC_POS(&sec->ps_refcount);
+
+ if (atomic_dec_and_test(&sec->ps_refcount)) {
+ sptlrpc_gc_del_sec(sec);
+ sec_cop_destroy_sec(sec);
+ }
+ }
+}
+EXPORT_SYMBOL(sptlrpc_sec_put);
+
+/*
+ * policy module is responsible for taking refrence of import
+ */
+static
+struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *svc_ctx,
+ struct sptlrpc_flavor *sf,
+ enum lustre_sec_part sp)
+{
+ struct ptlrpc_sec_policy *policy;
+ struct ptlrpc_sec *sec;
+ char str[32];
+ ENTRY;
+
+ if (svc_ctx) {
+ LASSERT(imp->imp_dlm_fake == 1);
+
+ CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n",
+ imp->imp_obd->obd_type->typ_name,
+ imp->imp_obd->obd_name,
+ sptlrpc_flavor2name(sf, str, sizeof(str)));
+
+ policy = sptlrpc_policy_get(svc_ctx->sc_policy);
+ sf->sf_flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
+ } else {
+ LASSERT(imp->imp_dlm_fake == 0);
+
+ CDEBUG(D_SEC, "%s %s: select security flavor %s\n",
+ imp->imp_obd->obd_type->typ_name,
+ imp->imp_obd->obd_name,
+ sptlrpc_flavor2name(sf, str, sizeof(str)));
+
+ policy = sptlrpc_wireflavor2policy(sf->sf_rpc);
+ if (!policy) {
+ CERROR("invalid flavor 0x%x\n", sf->sf_rpc);
+ RETURN(NULL);
+ }
+ }
+
+ sec = policy->sp_cops->create_sec(imp, svc_ctx, sf);
+ if (sec) {
+ atomic_inc(&sec->ps_refcount);
+
+ sec->ps_part = sp;
+
+ if (sec->ps_gc_interval && policy->sp_cops->gc_ctx)
+ sptlrpc_gc_add_sec(sec);
+ } else {
+ sptlrpc_policy_put(policy);
+ }
+
+ RETURN(sec);
+}
+
+struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp)
+{
+ struct ptlrpc_sec *sec;
+
+ spin_lock(&imp->imp_lock);
+ sec = sptlrpc_sec_get(imp->imp_sec);
+ spin_unlock(&imp->imp_lock);
+
+ return sec;
+}
+EXPORT_SYMBOL(sptlrpc_import_sec_ref);
+
+static void sptlrpc_import_sec_install(struct obd_import *imp,
+ struct ptlrpc_sec *sec)
+{
+ struct ptlrpc_sec *old_sec;
+
+ LASSERT_ATOMIC_POS(&sec->ps_refcount);
+
+ spin_lock(&imp->imp_lock);
+ old_sec = imp->imp_sec;
+ imp->imp_sec = sec;
+ spin_unlock(&imp->imp_lock);
+
+ if (old_sec) {
+ sptlrpc_sec_kill(old_sec);
+
+ /* balance the ref taken by this import */
+ sptlrpc_sec_put(old_sec);
+ }
+}
+
+static inline
+int flavor_equal(struct sptlrpc_flavor *sf1, struct sptlrpc_flavor *sf2)
+{
+ return (memcmp(sf1, sf2, sizeof(*sf1)) == 0);
+}
+
+static inline
+void flavor_copy(struct sptlrpc_flavor *dst, struct sptlrpc_flavor *src)
+{
+ *dst = *src;
+}
+
+static void sptlrpc_import_sec_adapt_inplace(struct obd_import *imp,
+ struct ptlrpc_sec *sec,
+ struct sptlrpc_flavor *sf)
+{
+ char str1[32], str2[32];
+
+ if (sec->ps_flvr.sf_flags != sf->sf_flags)
+ CDEBUG(D_SEC, "changing sec flags: %s -> %s\n",
+ sptlrpc_secflags2str(sec->ps_flvr.sf_flags,
+ str1, sizeof(str1)),
+ sptlrpc_secflags2str(sf->sf_flags,
+ str2, sizeof(str2)));
+
+ spin_lock(&sec->ps_lock);
+ flavor_copy(&sec->ps_flvr, sf);
+ spin_unlock(&sec->ps_lock);
+}
+
+/**
+ * To get an appropriate ptlrpc_sec for the \a imp, according to the current
+ * configuration. Upon called, imp->imp_sec may or may not be NULL.
+ *
+ * - regular import: \a svc_ctx should be NULL and \a flvr is ignored;
+ * - reverse import: \a svc_ctx and \a flvr are obtained from incoming request.
+ */
+int sptlrpc_import_sec_adapt(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *svc_ctx,
+ struct sptlrpc_flavor *flvr)
+{
+ struct ptlrpc_connection *conn;
+ struct sptlrpc_flavor sf;
+ struct ptlrpc_sec *sec, *newsec;
+ enum lustre_sec_part sp;
+ char str[24];
+ int rc = 0;
+ ENTRY;
+
+ might_sleep();
+
+ if (imp == NULL)
+ RETURN(0);
+
+ conn = imp->imp_connection;
+
+ if (svc_ctx == NULL) {
+ struct client_obd *cliobd = &imp->imp_obd->u.cli;
+ /*
+ * normal import, determine flavor from rule set, except
+ * for mgc the flavor is predetermined.
+ */
+ if (cliobd->cl_sp_me == LUSTRE_SP_MGC)
+ sf = cliobd->cl_flvr_mgc;
+ else
+ sptlrpc_conf_choose_flavor(cliobd->cl_sp_me,
+ cliobd->cl_sp_to,
+ &cliobd->cl_target_uuid,
+ conn->c_self, &sf);
+
+ sp = imp->imp_obd->u.cli.cl_sp_me;
+ } else {
+ /* reverse import, determine flavor from incoming reqeust */
+ sf = *flvr;
+
+ if (sf.sf_rpc != SPTLRPC_FLVR_NULL)
+ sf.sf_flags = PTLRPC_SEC_FL_REVERSE |
+ PTLRPC_SEC_FL_ROOTONLY;
+
+ sp = sptlrpc_target_sec_part(imp->imp_obd);
+ }
+
+ sec = sptlrpc_import_sec_ref(imp);
+ if (sec) {
+ char str2[24];
+
+ if (flavor_equal(&sf, &sec->ps_flvr))
+ GOTO(out, rc);
+
+ CDEBUG(D_SEC, "import %s->%s: changing flavor %s -> %s\n",
+ imp->imp_obd->obd_name,
+ obd_uuid2str(&conn->c_remote_uuid),
+ sptlrpc_flavor2name(&sec->ps_flvr, str, sizeof(str)),
+ sptlrpc_flavor2name(&sf, str2, sizeof(str2)));
+
+ if (SPTLRPC_FLVR_POLICY(sf.sf_rpc) ==
+ SPTLRPC_FLVR_POLICY(sec->ps_flvr.sf_rpc) &&
+ SPTLRPC_FLVR_MECH(sf.sf_rpc) ==
+ SPTLRPC_FLVR_MECH(sec->ps_flvr.sf_rpc)) {
+ sptlrpc_import_sec_adapt_inplace(imp, sec, &sf);
+ GOTO(out, rc);
+ }
+ } else if (SPTLRPC_FLVR_BASE(sf.sf_rpc) !=
+ SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL)) {
+ CDEBUG(D_SEC, "import %s->%s netid %x: select flavor %s\n",
+ imp->imp_obd->obd_name,
+ obd_uuid2str(&conn->c_remote_uuid),
+ LNET_NIDNET(conn->c_self),
+ sptlrpc_flavor2name(&sf, str, sizeof(str)));
+ }
+
+ mutex_lock(&imp->imp_sec_mutex);
+
+ newsec = sptlrpc_sec_create(imp, svc_ctx, &sf, sp);
+ if (newsec) {
+ sptlrpc_import_sec_install(imp, newsec);
+ } else {
+ CERROR("import %s->%s: failed to create new sec\n",
+ imp->imp_obd->obd_name,
+ obd_uuid2str(&conn->c_remote_uuid));
+ rc = -EPERM;
+ }
+
+ mutex_unlock(&imp->imp_sec_mutex);
+out:
+ sptlrpc_sec_put(sec);
+ RETURN(rc);
+}
+
+void sptlrpc_import_sec_put(struct obd_import *imp)
+{
+ if (imp->imp_sec) {
+ sptlrpc_sec_kill(imp->imp_sec);
+
+ sptlrpc_sec_put(imp->imp_sec);
+ imp->imp_sec = NULL;
+ }
+}
+
+static void import_flush_ctx_common(struct obd_import *imp,
+ uid_t uid, int grace, int force)
+{
+ struct ptlrpc_sec *sec;
+
+ if (imp == NULL)
+ return;
+
+ sec = sptlrpc_import_sec_ref(imp);
+ if (sec == NULL)
+ return;
+
+ sec_cop_flush_ctx_cache(sec, uid, grace, force);
+ sptlrpc_sec_put(sec);
+}
+
+void sptlrpc_import_flush_root_ctx(struct obd_import *imp)
+{
+ /* it's important to use grace mode, see explain in
+ * sptlrpc_req_refresh_ctx() */
+ import_flush_ctx_common(imp, 0, 1, 1);
+}
+
+void sptlrpc_import_flush_my_ctx(struct obd_import *imp)
+{
+ import_flush_ctx_common(imp, current_uid(), 1, 1);
+}
+EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx);
+
+void sptlrpc_import_flush_all_ctx(struct obd_import *imp)
+{
+ import_flush_ctx_common(imp, -1, 1, 1);
+}
+EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx);
+
+/**
+ * Used by ptlrpc client to allocate request buffer of \a req. Upon return
+ * successfully, req->rq_reqmsg points to a buffer with size \a msgsize.
+ */
+int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_policy *policy;
+ int rc;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_sec->ps_policy);
+ LASSERT(req->rq_reqmsg == NULL);
+ LASSERT_ATOMIC_POS(&ctx->cc_refcount);
+
+ policy = ctx->cc_sec->ps_policy;
+ rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize);
+ if (!rc) {
+ LASSERT(req->rq_reqmsg);
+ LASSERT(req->rq_reqbuf || req->rq_clrbuf);
+
+ /* zeroing preallocated buffer */
+ if (req->rq_pool)
+ memset(req->rq_reqmsg, 0, msgsize);
+ }
+
+ return rc;
+}
+
+/**
+ * Used by ptlrpc client to free request buffer of \a req. After this
+ * req->rq_reqmsg is set to NULL and should not be accessed anymore.
+ */
+void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_policy *policy;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_sec->ps_policy);
+ LASSERT_ATOMIC_POS(&ctx->cc_refcount);
+
+ if (req->rq_reqbuf == NULL && req->rq_clrbuf == NULL)
+ return;
+
+ policy = ctx->cc_sec->ps_policy;
+ policy->sp_cops->free_reqbuf(ctx->cc_sec, req);
+ req->rq_reqmsg = NULL;
+}
+
+/*
+ * NOTE caller must guarantee the buffer size is enough for the enlargement
+ */
+void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
+ int segment, int newsize)
+{
+ void *src, *dst;
+ int oldsize, oldmsg_size, movesize;
+
+ LASSERT(segment < msg->lm_bufcount);
+ LASSERT(msg->lm_buflens[segment] <= newsize);
+
+ if (msg->lm_buflens[segment] == newsize)
+ return;
+
+ /* nothing to do if we are enlarging the last segment */
+ if (segment == msg->lm_bufcount - 1) {
+ msg->lm_buflens[segment] = newsize;
+ return;
+ }
+
+ oldsize = msg->lm_buflens[segment];
+
+ src = lustre_msg_buf(msg, segment + 1, 0);
+ msg->lm_buflens[segment] = newsize;
+ dst = lustre_msg_buf(msg, segment + 1, 0);
+ msg->lm_buflens[segment] = oldsize;
+
+ /* move from segment + 1 to end segment */
+ LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2);
+ oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+ movesize = oldmsg_size - ((unsigned long) src - (unsigned long) msg);
+ LASSERT(movesize >= 0);
+
+ if (movesize)
+ memmove(dst, src, movesize);
+
+ /* note we don't clear the ares where old data live, not secret */
+
+ /* finally set new segment size */
+ msg->lm_buflens[segment] = newsize;
+}
+EXPORT_SYMBOL(_sptlrpc_enlarge_msg_inplace);
+
+/**
+ * Used by ptlrpc client to enlarge the \a segment of request message pointed
+ * by req->rq_reqmsg to size \a newsize, all previously filled-in data will be
+ * preserved after the enlargement. this must be called after original request
+ * buffer being allocated.
+ *
+ * \note after this be called, rq_reqmsg and rq_reqlen might have been changed,
+ * so caller should refresh its local pointers if needed.
+ */
+int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req,
+ int segment, int newsize)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_cops *cops;
+ struct lustre_msg *msg = req->rq_reqmsg;
+
+ LASSERT(ctx);
+ LASSERT(msg);
+ LASSERT(msg->lm_bufcount > segment);
+ LASSERT(msg->lm_buflens[segment] <= newsize);
+
+ if (msg->lm_buflens[segment] == newsize)
+ return 0;
+
+ cops = ctx->cc_sec->ps_policy->sp_cops;
+ LASSERT(cops->enlarge_reqbuf);
+ return cops->enlarge_reqbuf(ctx->cc_sec, req, segment, newsize);
+}
+EXPORT_SYMBOL(sptlrpc_cli_enlarge_reqbuf);
+
+/**
+ * Used by ptlrpc client to allocate reply buffer of \a req.
+ *
+ * \note After this, req->rq_repmsg is still not accessible.
+ */
+int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_policy *policy;
+ ENTRY;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_sec->ps_policy);
+
+ if (req->rq_repbuf)
+ RETURN(0);
+
+ policy = ctx->cc_sec->ps_policy;
+ RETURN(policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize));
+}
+
+/**
+ * Used by ptlrpc client to free reply buffer of \a req. After this
+ * req->rq_repmsg is set to NULL and should not be accessed anymore.
+ */
+void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_policy *policy;
+ ENTRY;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_sec->ps_policy);
+ LASSERT_ATOMIC_POS(&ctx->cc_refcount);
+
+ if (req->rq_repbuf == NULL)
+ return;
+ LASSERT(req->rq_repbuf_len);
+
+ policy = ctx->cc_sec->ps_policy;
+ policy->sp_cops->free_repbuf(ctx->cc_sec, req);
+ req->rq_repmsg = NULL;
+ EXIT;
+}
+
+int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
+ struct ptlrpc_cli_ctx *ctx)
+{
+ struct ptlrpc_sec_policy *policy = ctx->cc_sec->ps_policy;
+
+ if (!policy->sp_cops->install_rctx)
+ return 0;
+ return policy->sp_cops->install_rctx(imp, ctx->cc_sec, ctx);
+}
+
+int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx)
+{
+ struct ptlrpc_sec_policy *policy = ctx->sc_policy;
+
+ if (!policy->sp_sops->install_rctx)
+ return 0;
+ return policy->sp_sops->install_rctx(imp, ctx);
+}
+
+/****************************************
+ * server side security *
+ ****************************************/
+
+static int flavor_allowed(struct sptlrpc_flavor *exp,
+ struct ptlrpc_request *req)
+{
+ struct sptlrpc_flavor *flvr = &req->rq_flvr;
+
+ if (exp->sf_rpc == SPTLRPC_FLVR_ANY || exp->sf_rpc == flvr->sf_rpc)
+ return 1;
+
+ if ((req->rq_ctx_init || req->rq_ctx_fini) &&
+ SPTLRPC_FLVR_POLICY(exp->sf_rpc) ==
+ SPTLRPC_FLVR_POLICY(flvr->sf_rpc) &&
+ SPTLRPC_FLVR_MECH(exp->sf_rpc) == SPTLRPC_FLVR_MECH(flvr->sf_rpc))
+ return 1;
+
+ return 0;
+}
+
+#define EXP_FLVR_UPDATE_EXPIRE (OBD_TIMEOUT_DEFAULT + 10)
+
+/**
+ * Given an export \a exp, check whether the flavor of incoming \a req
+ * is allowed by the export \a exp. Main logic is about taking care of
+ * changing configurations. Return 0 means success.
+ */
+int sptlrpc_target_export_check(struct obd_export *exp,
+ struct ptlrpc_request *req)
+{
+ struct sptlrpc_flavor flavor;
+
+ if (exp == NULL)
+ return 0;
+
+ /* client side export has no imp_reverse, skip
+ * FIXME maybe we should check flavor this as well??? */
+ if (exp->exp_imp_reverse == NULL)
+ return 0;
+
+ /* don't care about ctx fini rpc */
+ if (req->rq_ctx_fini)
+ return 0;
+
+ spin_lock(&exp->exp_lock);
+
+ /* if flavor just changed (exp->exp_flvr_changed != 0), we wait for
+ * the first req with the new flavor, then treat it as current flavor,
+ * adapt reverse sec according to it.
+ * note the first rpc with new flavor might not be with root ctx, in
+ * which case delay the sec_adapt by leaving exp_flvr_adapt == 1. */
+ if (unlikely(exp->exp_flvr_changed) &&
+ flavor_allowed(&exp->exp_flvr_old[1], req)) {
+ /* make the new flavor as "current", and old ones as
+ * about-to-expire */
+ CDEBUG(D_SEC, "exp %p: just changed: %x->%x\n", exp,
+ exp->exp_flvr.sf_rpc, exp->exp_flvr_old[1].sf_rpc);
+ flavor = exp->exp_flvr_old[1];
+ exp->exp_flvr_old[1] = exp->exp_flvr_old[0];
+ exp->exp_flvr_expire[1] = exp->exp_flvr_expire[0];
+ exp->exp_flvr_old[0] = exp->exp_flvr;
+ exp->exp_flvr_expire[0] = cfs_time_current_sec() +
+ EXP_FLVR_UPDATE_EXPIRE;
+ exp->exp_flvr = flavor;
+
+ /* flavor change finished */
+ exp->exp_flvr_changed = 0;
+ LASSERT(exp->exp_flvr_adapt == 1);
+
+ /* if it's gss, we only interested in root ctx init */
+ if (req->rq_auth_gss &&
+ !(req->rq_ctx_init &&
+ (req->rq_auth_usr_root || req->rq_auth_usr_mdt ||
+ req->rq_auth_usr_ost))) {
+ spin_unlock(&exp->exp_lock);
+ CDEBUG(D_SEC, "is good but not root(%d:%d:%d:%d:%d)\n",
+ req->rq_auth_gss, req->rq_ctx_init,
+ req->rq_auth_usr_root, req->rq_auth_usr_mdt,
+ req->rq_auth_usr_ost);
+ return 0;
+ }
+
+ exp->exp_flvr_adapt = 0;
+ spin_unlock(&exp->exp_lock);
+
+ return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
+ req->rq_svc_ctx, &flavor);
+ }
+
+ /* if it equals to the current flavor, we accept it, but need to
+ * dealing with reverse sec/ctx */
+ if (likely(flavor_allowed(&exp->exp_flvr, req))) {
+ /* most cases should return here, we only interested in
+ * gss root ctx init */
+ if (!req->rq_auth_gss || !req->rq_ctx_init ||
+ (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt &&
+ !req->rq_auth_usr_ost)) {
+ spin_unlock(&exp->exp_lock);
+ return 0;
+ }
+
+ /* if flavor just changed, we should not proceed, just leave
+ * it and current flavor will be discovered and replaced
+ * shortly, and let _this_ rpc pass through */
+ if (exp->exp_flvr_changed) {
+ LASSERT(exp->exp_flvr_adapt);
+ spin_unlock(&exp->exp_lock);
+ return 0;
+ }
+
+ if (exp->exp_flvr_adapt) {
+ exp->exp_flvr_adapt = 0;
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): do delayed adapt\n",
+ exp, exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc,
+ exp->exp_flvr_old[1].sf_rpc);
+ flavor = exp->exp_flvr;
+ spin_unlock(&exp->exp_lock);
+
+ return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
+ req->rq_svc_ctx,
+ &flavor);
+ } else {
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): is current flavor, "
+ "install rvs ctx\n", exp, exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc,
+ exp->exp_flvr_old[1].sf_rpc);
+ spin_unlock(&exp->exp_lock);
+
+ return sptlrpc_svc_install_rvs_ctx(exp->exp_imp_reverse,
+ req->rq_svc_ctx);
+ }
+ }
+
+ if (exp->exp_flvr_expire[0]) {
+ if (exp->exp_flvr_expire[0] >= cfs_time_current_sec()) {
+ if (flavor_allowed(&exp->exp_flvr_old[0], req)) {
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the "
+ "middle one ("CFS_DURATION_T")\n", exp,
+ exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc,
+ exp->exp_flvr_old[1].sf_rpc,
+ exp->exp_flvr_expire[0] -
+ cfs_time_current_sec());
+ spin_unlock(&exp->exp_lock);
+ return 0;
+ }
+ } else {
+ CDEBUG(D_SEC, "mark middle expired\n");
+ exp->exp_flvr_expire[0] = 0;
+ }
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match middle\n", exp,
+ exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
+ req->rq_flvr.sf_rpc);
+ }
+
+ /* now it doesn't match the current flavor, the only chance we can
+ * accept it is match the old flavors which is not expired. */
+ if (exp->exp_flvr_changed == 0 && exp->exp_flvr_expire[1]) {
+ if (exp->exp_flvr_expire[1] >= cfs_time_current_sec()) {
+ if (flavor_allowed(&exp->exp_flvr_old[1], req)) {
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the "
+ "oldest one ("CFS_DURATION_T")\n", exp,
+ exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc,
+ exp->exp_flvr_old[1].sf_rpc,
+ exp->exp_flvr_expire[1] -
+ cfs_time_current_sec());
+ spin_unlock(&exp->exp_lock);
+ return 0;
+ }
+ } else {
+ CDEBUG(D_SEC, "mark oldest expired\n");
+ exp->exp_flvr_expire[1] = 0;
+ }
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match found\n",
+ exp, exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
+ req->rq_flvr.sf_rpc);
+ } else {
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): skip the last one\n",
+ exp, exp->exp_flvr.sf_rpc, exp->exp_flvr_old[0].sf_rpc,
+ exp->exp_flvr_old[1].sf_rpc);
+ }
+
+ spin_unlock(&exp->exp_lock);
+
+ CWARN("exp %p(%s): req %p (%u|%u|%u|%u|%u|%u) with "
+ "unauthorized flavor %x, expect %x|%x(%+ld)|%x(%+ld)\n",
+ exp, exp->exp_obd->obd_name,
+ req, req->rq_auth_gss, req->rq_ctx_init, req->rq_ctx_fini,
+ req->rq_auth_usr_root, req->rq_auth_usr_mdt, req->rq_auth_usr_ost,
+ req->rq_flvr.sf_rpc,
+ exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc,
+ exp->exp_flvr_expire[0] ?
+ (unsigned long) (exp->exp_flvr_expire[0] -
+ cfs_time_current_sec()) : 0,
+ exp->exp_flvr_old[1].sf_rpc,
+ exp->exp_flvr_expire[1] ?
+ (unsigned long) (exp->exp_flvr_expire[1] -
+ cfs_time_current_sec()) : 0);
+ return -EACCES;
+}
+EXPORT_SYMBOL(sptlrpc_target_export_check);
+
+void sptlrpc_target_update_exp_flavor(struct obd_device *obd,
+ struct sptlrpc_rule_set *rset)
+{
+ struct obd_export *exp;
+ struct sptlrpc_flavor new_flvr;
+
+ LASSERT(obd);
+
+ spin_lock(&obd->obd_dev_lock);
+
+ list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
+ if (exp->exp_connection == NULL)
+ continue;
+
+ /* note if this export had just been updated flavor
+ * (exp_flvr_changed == 1), this will override the
+ * previous one. */
+ spin_lock(&exp->exp_lock);
+ sptlrpc_target_choose_flavor(rset, exp->exp_sp_peer,
+ exp->exp_connection->c_peer.nid,
+ &new_flvr);
+ if (exp->exp_flvr_changed ||
+ !flavor_equal(&new_flvr, &exp->exp_flvr)) {
+ exp->exp_flvr_old[1] = new_flvr;
+ exp->exp_flvr_expire[1] = 0;
+ exp->exp_flvr_changed = 1;
+ exp->exp_flvr_adapt = 1;
+
+ CDEBUG(D_SEC, "exp %p (%s): updated flavor %x->%x\n",
+ exp, sptlrpc_part2name(exp->exp_sp_peer),
+ exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[1].sf_rpc);
+ }
+ spin_unlock(&exp->exp_lock);
+ }
+
+ spin_unlock(&obd->obd_dev_lock);
+}
+EXPORT_SYMBOL(sptlrpc_target_update_exp_flavor);
+
+static int sptlrpc_svc_check_from(struct ptlrpc_request *req, int svc_rc)
+{
+ /* peer's claim is unreliable unless gss is being used */
+ if (!req->rq_auth_gss || svc_rc == SECSVC_DROP)
+ return svc_rc;
+
+ switch (req->rq_sp_from) {
+ case LUSTRE_SP_CLI:
+ if (req->rq_auth_usr_mdt || req->rq_auth_usr_ost) {
+ DEBUG_REQ(D_ERROR, req, "faked source CLI");
+ svc_rc = SECSVC_DROP;
+ }
+ break;
+ case LUSTRE_SP_MDT:
+ if (!req->rq_auth_usr_mdt) {
+ DEBUG_REQ(D_ERROR, req, "faked source MDT");
+ svc_rc = SECSVC_DROP;
+ }
+ break;
+ case LUSTRE_SP_OST:
+ if (!req->rq_auth_usr_ost) {
+ DEBUG_REQ(D_ERROR, req, "faked source OST");
+ svc_rc = SECSVC_DROP;
+ }
+ break;
+ case LUSTRE_SP_MGS:
+ case LUSTRE_SP_MGC:
+ if (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt &&
+ !req->rq_auth_usr_ost) {
+ DEBUG_REQ(D_ERROR, req, "faked source MGC/MGS");
+ svc_rc = SECSVC_DROP;
+ }
+ break;
+ case LUSTRE_SP_ANY:
+ default:
+ DEBUG_REQ(D_ERROR, req, "invalid source %u", req->rq_sp_from);
+ svc_rc = SECSVC_DROP;
+ }
+
+ return svc_rc;
+}
+
+/**
+ * Used by ptlrpc server, to perform transformation upon request message of
+ * incoming \a req. This must be the first thing to do with a incoming
+ * request in ptlrpc layer.
+ *
+ * \retval SECSVC_OK success, and req->rq_reqmsg point to request message in
+ * clear text, size is req->rq_reqlen; also req->rq_svc_ctx is set.
+ * \retval SECSVC_COMPLETE success, the request has been fully processed, and
+ * reply message has been prepared.
+ * \retval SECSVC_DROP failed, this request should be dropped.
+ */
+int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
+{
+ struct ptlrpc_sec_policy *policy;
+ struct lustre_msg *msg = req->rq_reqbuf;
+ int rc;
+ ENTRY;
+
+ LASSERT(msg);
+ LASSERT(req->rq_reqmsg == NULL);
+ LASSERT(req->rq_repmsg == NULL);
+ LASSERT(req->rq_svc_ctx == NULL);
+
+ req->rq_req_swab_mask = 0;
+
+ rc = __lustre_unpack_msg(msg, req->rq_reqdata_len);
+ switch (rc) {
+ case 1:
+ lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+ case 0:
+ break;
+ default:
+ CERROR("error unpacking request from %s x"LPU64"\n",
+ libcfs_id2str(req->rq_peer), req->rq_xid);
+ RETURN(SECSVC_DROP);
+ }
+
+ req->rq_flvr.sf_rpc = WIRE_FLVR(msg->lm_secflvr);
+ req->rq_sp_from = LUSTRE_SP_ANY;
+ req->rq_auth_uid = INVALID_UID;
+ req->rq_auth_mapped_uid = INVALID_UID;
+
+ policy = sptlrpc_wireflavor2policy(req->rq_flvr.sf_rpc);
+ if (!policy) {
+ CERROR("unsupported rpc flavor %x\n", req->rq_flvr.sf_rpc);
+ RETURN(SECSVC_DROP);
+ }
+
+ LASSERT(policy->sp_sops->accept);
+ rc = policy->sp_sops->accept(req);
+ sptlrpc_policy_put(policy);
+ LASSERT(req->rq_reqmsg || rc != SECSVC_OK);
+ LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP);
+
+ /*
+ * if it's not null flavor (which means embedded packing msg),
+ * reset the swab mask for the comming inner msg unpacking.
+ */
+ if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL)
+ req->rq_req_swab_mask = 0;
+
+ /* sanity check for the request source */
+ rc = sptlrpc_svc_check_from(req, rc);
+ RETURN(rc);
+}
+
+/**
+ * Used by ptlrpc server, to allocate reply buffer for \a req. If succeed,
+ * req->rq_reply_state is set, and req->rq_reply_state->rs_msg point to
+ * a buffer of \a msglen size.
+ */
+int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
+{
+ struct ptlrpc_sec_policy *policy;
+ struct ptlrpc_reply_state *rs;
+ int rc;
+ ENTRY;
+
+ LASSERT(req->rq_svc_ctx);
+ LASSERT(req->rq_svc_ctx->sc_policy);
+
+ policy = req->rq_svc_ctx->sc_policy;
+ LASSERT(policy->sp_sops->alloc_rs);
+
+ rc = policy->sp_sops->alloc_rs(req, msglen);
+ if (unlikely(rc == -ENOMEM)) {
+ /* failed alloc, try emergency pool */
+ rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_svcpt);
+ if (rs == NULL)
+ RETURN(-ENOMEM);
+
+ req->rq_reply_state = rs;
+ rc = policy->sp_sops->alloc_rs(req, msglen);
+ if (rc) {
+ lustre_put_emerg_rs(rs);
+ req->rq_reply_state = NULL;
+ }
+ }
+
+ LASSERT(rc != 0 ||
+ (req->rq_reply_state && req->rq_reply_state->rs_msg));
+
+ RETURN(rc);
+}
+
+/**
+ * Used by ptlrpc server, to perform transformation upon reply message.
+ *
+ * \post req->rq_reply_off is set to approriate server-controlled reply offset.
+ * \post req->rq_repmsg and req->rq_reply_state->rs_msg becomes inaccessible.
+ */
+int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req)
+{
+ struct ptlrpc_sec_policy *policy;
+ int rc;
+ ENTRY;
+
+ LASSERT(req->rq_svc_ctx);
+ LASSERT(req->rq_svc_ctx->sc_policy);
+
+ policy = req->rq_svc_ctx->sc_policy;
+ LASSERT(policy->sp_sops->authorize);
+
+ rc = policy->sp_sops->authorize(req);
+ LASSERT(rc || req->rq_reply_state->rs_repdata_len);
+
+ RETURN(rc);
+}
+
+/**
+ * Used by ptlrpc server, to free reply_state.
+ */
+void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs)
+{
+ struct ptlrpc_sec_policy *policy;
+ unsigned int prealloc;
+ ENTRY;
+
+ LASSERT(rs->rs_svc_ctx);
+ LASSERT(rs->rs_svc_ctx->sc_policy);
+
+ policy = rs->rs_svc_ctx->sc_policy;
+ LASSERT(policy->sp_sops->free_rs);
+
+ prealloc = rs->rs_prealloc;
+ policy->sp_sops->free_rs(rs);
+
+ if (prealloc)
+ lustre_put_emerg_rs(rs);
+ EXIT;
+}
+
+void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req)
+{
+ struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
+
+ if (ctx != NULL)
+ atomic_inc(&ctx->sc_refcount);
+}
+
+void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req)
+{
+ struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
+
+ if (ctx == NULL)
+ return;
+
+ LASSERT_ATOMIC_POS(&ctx->sc_refcount);
+ if (atomic_dec_and_test(&ctx->sc_refcount)) {
+ if (ctx->sc_policy->sp_sops->free_ctx)
+ ctx->sc_policy->sp_sops->free_ctx(ctx);
+ }
+ req->rq_svc_ctx = NULL;
+}
+
+void sptlrpc_svc_ctx_invalidate(struct ptlrpc_request *req)
+{
+ struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
+
+ if (ctx == NULL)
+ return;
+
+ LASSERT_ATOMIC_POS(&ctx->sc_refcount);
+ if (ctx->sc_policy->sp_sops->invalidate_ctx)
+ ctx->sc_policy->sp_sops->invalidate_ctx(ctx);
+}
+EXPORT_SYMBOL(sptlrpc_svc_ctx_invalidate);
+
+/****************************************
+ * bulk security *
+ ****************************************/
+
+/**
+ * Perform transformation upon bulk data pointed by \a desc. This is called
+ * before transforming the request message.
+ */
+int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_cli_ctx *ctx;
+
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+ if (!req->rq_pack_bulk)
+ return 0;
+
+ ctx = req->rq_cli_ctx;
+ if (ctx->cc_ops->wrap_bulk)
+ return ctx->cc_ops->wrap_bulk(ctx, req, desc);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk);
+
+/**
+ * This is called after unwrap the reply message.
+ * return nob of actual plain text size received, or error code.
+ */
+int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc,
+ int nob)
+{
+ struct ptlrpc_cli_ctx *ctx;
+ int rc;
+
+ LASSERT(req->rq_bulk_read && !req->rq_bulk_write);
+
+ if (!req->rq_pack_bulk)
+ return desc->bd_nob_transferred;
+
+ ctx = req->rq_cli_ctx;
+ if (ctx->cc_ops->unwrap_bulk) {
+ rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+ if (rc < 0)
+ return rc;
+ }
+ return desc->bd_nob_transferred;
+}
+EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read);
+
+/**
+ * This is called after unwrap the reply message.
+ * return 0 for success or error code.
+ */
+int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_cli_ctx *ctx;
+ int rc;
+
+ LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
+
+ if (!req->rq_pack_bulk)
+ return 0;
+
+ ctx = req->rq_cli_ctx;
+ if (ctx->cc_ops->unwrap_bulk) {
+ rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+ if (rc < 0)
+ return rc;
+ }
+
+ /*
+ * if everything is going right, nob should equals to nob_transferred.
+ * in case of privacy mode, nob_transferred needs to be adjusted.
+ */
+ if (desc->bd_nob != desc->bd_nob_transferred) {
+ CERROR("nob %d doesn't match transferred nob %d",
+ desc->bd_nob, desc->bd_nob_transferred);
+ return -EPROTO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write);
+
+
+/****************************************
+ * user descriptor helpers *
+ ****************************************/
+
+int sptlrpc_current_user_desc_size(void)
+{
+ int ngroups;
+
+ ngroups = current_ngroups;
+
+ if (ngroups > LUSTRE_MAX_GROUPS)
+ ngroups = LUSTRE_MAX_GROUPS;
+ return sptlrpc_user_desc_size(ngroups);
+}
+EXPORT_SYMBOL(sptlrpc_current_user_desc_size);
+
+int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
+{
+ struct ptlrpc_user_desc *pud;
+
+ pud = lustre_msg_buf(msg, offset, 0);
+
+ pud->pud_uid = current_uid();
+ pud->pud_gid = current_gid();
+ pud->pud_fsuid = current_fsuid();
+ pud->pud_fsgid = current_fsgid();
+ pud->pud_cap = cfs_curproc_cap_pack();
+ pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4;
+
+ task_lock(current);
+ if (pud->pud_ngroups > current_ngroups)
+ pud->pud_ngroups = current_ngroups;
+ memcpy(pud->pud_groups, current_cred()->group_info->blocks[0],
+ pud->pud_ngroups * sizeof(__u32));
+ task_unlock(current);
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_pack_user_desc);
+
+int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset, int swabbed)
+{
+ struct ptlrpc_user_desc *pud;
+ int i;
+
+ pud = lustre_msg_buf(msg, offset, sizeof(*pud));
+ if (!pud)
+ return -EINVAL;
+
+ if (swabbed) {
+ __swab32s(&pud->pud_uid);
+ __swab32s(&pud->pud_gid);
+ __swab32s(&pud->pud_fsuid);
+ __swab32s(&pud->pud_fsgid);
+ __swab32s(&pud->pud_cap);
+ __swab32s(&pud->pud_ngroups);
+ }
+
+ if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) {
+ CERROR("%u groups is too large\n", pud->pud_ngroups);
+ return -EINVAL;
+ }
+
+ if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) >
+ msg->lm_buflens[offset]) {
+ CERROR("%u groups are claimed but bufsize only %u\n",
+ pud->pud_ngroups, msg->lm_buflens[offset]);
+ return -EINVAL;
+ }
+
+ if (swabbed) {
+ for (i = 0; i < pud->pud_ngroups; i++)
+ __swab32s(&pud->pud_groups[i]);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_unpack_user_desc);
+
+/****************************************
+ * misc helpers *
+ ****************************************/
+
+const char * sec2target_str(struct ptlrpc_sec *sec)
+{
+ if (!sec || !sec->ps_import || !sec->ps_import->imp_obd)
+ return "*";
+ if (sec_is_reverse(sec))
+ return "c";
+ return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid);
+}
+EXPORT_SYMBOL(sec2target_str);
+
+/*
+ * return true if the bulk data is protected
+ */
+int sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr)
+{
+ switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
+ case SPTLRPC_BULK_SVC_INTG:
+ case SPTLRPC_BULK_SVC_PRIV:
+ return 1;
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL(sptlrpc_flavor_has_bulk);
+
+/****************************************
+ * crypto API helper/alloc blkciper *
+ ****************************************/
+
+/****************************************
+ * initialize/finalize *
+ ****************************************/
+
+int sptlrpc_init(void)
+{
+ int rc;
+
+ rwlock_init(&policy_lock);
+
+ rc = sptlrpc_gc_init();
+ if (rc)
+ goto out;
+
+ rc = sptlrpc_conf_init();
+ if (rc)
+ goto out_gc;
+
+ rc = sptlrpc_enc_pool_init();
+ if (rc)
+ goto out_conf;
+
+ rc = sptlrpc_null_init();
+ if (rc)
+ goto out_pool;
+
+ rc = sptlrpc_plain_init();
+ if (rc)
+ goto out_null;
+
+ rc = sptlrpc_lproc_init();
+ if (rc)
+ goto out_plain;
+
+ return 0;
+
+out_plain:
+ sptlrpc_plain_fini();
+out_null:
+ sptlrpc_null_fini();
+out_pool:
+ sptlrpc_enc_pool_fini();
+out_conf:
+ sptlrpc_conf_fini();
+out_gc:
+ sptlrpc_gc_fini();
+out:
+ return rc;
+}
+
+void sptlrpc_fini(void)
+{
+ sptlrpc_lproc_fini();
+ sptlrpc_plain_fini();
+ sptlrpc_null_fini();
+ sptlrpc_enc_pool_fini();
+ sptlrpc_conf_fini();
+ sptlrpc_gc_fini();
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
new file mode 100644
index 000000000000..bf53f1bc1742
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -0,0 +1,880 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_bulk.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/crypto.h>
+
+#include <obd.h>
+#include <obd_cksum.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_dlm.h>
+#include <lustre_sec.h>
+
+#include "ptlrpc_internal.h"
+
+/****************************************
+ * bulk encryption page pools *
+ ****************************************/
+
+
+#define PTRS_PER_PAGE (PAGE_CACHE_SIZE / sizeof(void *))
+#define PAGES_PER_POOL (PTRS_PER_PAGE)
+
+#define IDLE_IDX_MAX (100)
+#define IDLE_IDX_WEIGHT (3)
+
+#define CACHE_QUIESCENT_PERIOD (20)
+
+static struct ptlrpc_enc_page_pool {
+ /*
+ * constants
+ */
+ unsigned long epp_max_pages; /* maximum pages can hold, const */
+ unsigned int epp_max_pools; /* number of pools, const */
+
+ /*
+ * wait queue in case of not enough free pages.
+ */
+ wait_queue_head_t epp_waitq; /* waiting threads */
+ unsigned int epp_waitqlen; /* wait queue length */
+ unsigned long epp_pages_short; /* # of pages wanted of in-q users */
+ unsigned int epp_growing:1; /* during adding pages */
+
+ /*
+ * indicating how idle the pools are, from 0 to MAX_IDLE_IDX
+ * this is counted based on each time when getting pages from
+ * the pools, not based on time. which means in case that system
+ * is idled for a while but the idle_idx might still be low if no
+ * activities happened in the pools.
+ */
+ unsigned long epp_idle_idx;
+
+ /* last shrink time due to mem tight */
+ long epp_last_shrink;
+ long epp_last_access;
+
+ /*
+ * in-pool pages bookkeeping
+ */
+ spinlock_t epp_lock; /* protect following fields */
+ unsigned long epp_total_pages; /* total pages in pools */
+ unsigned long epp_free_pages; /* current pages available */
+
+ /*
+ * statistics
+ */
+ unsigned long epp_st_max_pages; /* # of pages ever reached */
+ unsigned int epp_st_grows; /* # of grows */
+ unsigned int epp_st_grow_fails; /* # of add pages failures */
+ unsigned int epp_st_shrinks; /* # of shrinks */
+ unsigned long epp_st_access; /* # of access */
+ unsigned long epp_st_missings; /* # of cache missing */
+ unsigned long epp_st_lowfree; /* lowest free pages reached */
+ unsigned int epp_st_max_wqlen; /* highest waitqueue length */
+ cfs_time_t epp_st_max_wait; /* in jeffies */
+ /*
+ * pointers to pools
+ */
+ struct page ***epp_pools;
+} page_pools;
+
+/*
+ * memory shrinker
+ */
+const int pools_shrinker_seeks = DEFAULT_SEEKS;
+static struct shrinker *pools_shrinker = NULL;
+
+
+/*
+ * /proc/fs/lustre/sptlrpc/encrypt_page_pools
+ */
+int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
+{
+ int rc;
+
+ spin_lock(&page_pools.epp_lock);
+
+ rc = seq_printf(m,
+ "physical pages: %lu\n"
+ "pages per pool: %lu\n"
+ "max pages: %lu\n"
+ "max pools: %u\n"
+ "total pages: %lu\n"
+ "total free: %lu\n"
+ "idle index: %lu/100\n"
+ "last shrink: %lds\n"
+ "last access: %lds\n"
+ "max pages reached: %lu\n"
+ "grows: %u\n"
+ "grows failure: %u\n"
+ "shrinks: %u\n"
+ "cache access: %lu\n"
+ "cache missing: %lu\n"
+ "low free mark: %lu\n"
+ "max waitqueue depth: %u\n"
+ "max wait time: "CFS_TIME_T"/%u\n"
+ ,
+ num_physpages,
+ PAGES_PER_POOL,
+ page_pools.epp_max_pages,
+ page_pools.epp_max_pools,
+ page_pools.epp_total_pages,
+ page_pools.epp_free_pages,
+ page_pools.epp_idle_idx,
+ cfs_time_current_sec() - page_pools.epp_last_shrink,
+ cfs_time_current_sec() - page_pools.epp_last_access,
+ page_pools.epp_st_max_pages,
+ page_pools.epp_st_grows,
+ page_pools.epp_st_grow_fails,
+ page_pools.epp_st_shrinks,
+ page_pools.epp_st_access,
+ page_pools.epp_st_missings,
+ page_pools.epp_st_lowfree,
+ page_pools.epp_st_max_wqlen,
+ page_pools.epp_st_max_wait, HZ
+ );
+
+ spin_unlock(&page_pools.epp_lock);
+ return rc;
+}
+
+static void enc_pools_release_free_pages(long npages)
+{
+ int p_idx, g_idx;
+ int p_idx_max1, p_idx_max2;
+
+ LASSERT(npages > 0);
+ LASSERT(npages <= page_pools.epp_free_pages);
+ LASSERT(page_pools.epp_free_pages <= page_pools.epp_total_pages);
+
+ /* max pool index before the release */
+ p_idx_max2 = (page_pools.epp_total_pages - 1) / PAGES_PER_POOL;
+
+ page_pools.epp_free_pages -= npages;
+ page_pools.epp_total_pages -= npages;
+
+ /* max pool index after the release */
+ p_idx_max1 = page_pools.epp_total_pages == 0 ? -1 :
+ ((page_pools.epp_total_pages - 1) / PAGES_PER_POOL);
+
+ p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+ g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+ LASSERT(page_pools.epp_pools[p_idx]);
+
+ while (npages--) {
+ LASSERT(page_pools.epp_pools[p_idx]);
+ LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
+
+ __free_page(page_pools.epp_pools[p_idx][g_idx]);
+ page_pools.epp_pools[p_idx][g_idx] = NULL;
+
+ if (++g_idx == PAGES_PER_POOL) {
+ p_idx++;
+ g_idx = 0;
+ }
+ };
+
+ /* free unused pools */
+ while (p_idx_max1 < p_idx_max2) {
+ LASSERT(page_pools.epp_pools[p_idx_max2]);
+ OBD_FREE(page_pools.epp_pools[p_idx_max2], PAGE_CACHE_SIZE);
+ page_pools.epp_pools[p_idx_max2] = NULL;
+ p_idx_max2--;
+ }
+}
+
+/*
+ * could be called frequently for query (@nr_to_scan == 0).
+ * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
+ */
+static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+{
+ if (unlikely(shrink_param(sc, nr_to_scan) != 0)) {
+ spin_lock(&page_pools.epp_lock);
+ shrink_param(sc, nr_to_scan) = min_t(unsigned long,
+ shrink_param(sc, nr_to_scan),
+ page_pools.epp_free_pages -
+ PTLRPC_MAX_BRW_PAGES);
+ if (shrink_param(sc, nr_to_scan) > 0) {
+ enc_pools_release_free_pages(shrink_param(sc,
+ nr_to_scan));
+ CDEBUG(D_SEC, "released %ld pages, %ld left\n",
+ (long)shrink_param(sc, nr_to_scan),
+ page_pools.epp_free_pages);
+
+ page_pools.epp_st_shrinks++;
+ page_pools.epp_last_shrink = cfs_time_current_sec();
+ }
+ spin_unlock(&page_pools.epp_lock);
+ }
+
+ /*
+ * if no pool access for a long time, we consider it's fully idle.
+ * a little race here is fine.
+ */
+ if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access >
+ CACHE_QUIESCENT_PERIOD)) {
+ spin_lock(&page_pools.epp_lock);
+ page_pools.epp_idle_idx = IDLE_IDX_MAX;
+ spin_unlock(&page_pools.epp_lock);
+ }
+
+ LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
+ return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
+ (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
+}
+
+static inline
+int npages_to_npools(unsigned long npages)
+{
+ return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL);
+}
+
+/*
+ * return how many pages cleaned up.
+ */
+static unsigned long enc_pools_cleanup(struct page ***pools, int npools)
+{
+ unsigned long cleaned = 0;
+ int i, j;
+
+ for (i = 0; i < npools; i++) {
+ if (pools[i]) {
+ for (j = 0; j < PAGES_PER_POOL; j++) {
+ if (pools[i][j]) {
+ __free_page(pools[i][j]);
+ cleaned++;
+ }
+ }
+ OBD_FREE(pools[i], PAGE_CACHE_SIZE);
+ pools[i] = NULL;
+ }
+ }
+
+ return cleaned;
+}
+
+/*
+ * merge @npools pointed by @pools which contains @npages new pages
+ * into current pools.
+ *
+ * we have options to avoid most memory copy with some tricks. but we choose
+ * the simplest way to avoid complexity. It's not frequently called.
+ */
+static void enc_pools_insert(struct page ***pools, int npools, int npages)
+{
+ int freeslot;
+ int op_idx, np_idx, og_idx, ng_idx;
+ int cur_npools, end_npools;
+
+ LASSERT(npages > 0);
+ LASSERT(page_pools.epp_total_pages+npages <= page_pools.epp_max_pages);
+ LASSERT(npages_to_npools(npages) == npools);
+ LASSERT(page_pools.epp_growing);
+
+ spin_lock(&page_pools.epp_lock);
+
+ /*
+ * (1) fill all the free slots of current pools.
+ */
+ /* free slots are those left by rent pages, and the extra ones with
+ * index >= total_pages, locate at the tail of last pool. */
+ freeslot = page_pools.epp_total_pages % PAGES_PER_POOL;
+ if (freeslot != 0)
+ freeslot = PAGES_PER_POOL - freeslot;
+ freeslot += page_pools.epp_total_pages - page_pools.epp_free_pages;
+
+ op_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+ og_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+ np_idx = npools - 1;
+ ng_idx = (npages - 1) % PAGES_PER_POOL;
+
+ while (freeslot) {
+ LASSERT(page_pools.epp_pools[op_idx][og_idx] == NULL);
+ LASSERT(pools[np_idx][ng_idx] != NULL);
+
+ page_pools.epp_pools[op_idx][og_idx] = pools[np_idx][ng_idx];
+ pools[np_idx][ng_idx] = NULL;
+
+ freeslot--;
+
+ if (++og_idx == PAGES_PER_POOL) {
+ op_idx++;
+ og_idx = 0;
+ }
+ if (--ng_idx < 0) {
+ if (np_idx == 0)
+ break;
+ np_idx--;
+ ng_idx = PAGES_PER_POOL - 1;
+ }
+ }
+
+ /*
+ * (2) add pools if needed.
+ */
+ cur_npools = (page_pools.epp_total_pages + PAGES_PER_POOL - 1) /
+ PAGES_PER_POOL;
+ end_npools = (page_pools.epp_total_pages + npages + PAGES_PER_POOL -1) /
+ PAGES_PER_POOL;
+ LASSERT(end_npools <= page_pools.epp_max_pools);
+
+ np_idx = 0;
+ while (cur_npools < end_npools) {
+ LASSERT(page_pools.epp_pools[cur_npools] == NULL);
+ LASSERT(np_idx < npools);
+ LASSERT(pools[np_idx] != NULL);
+
+ page_pools.epp_pools[cur_npools++] = pools[np_idx];
+ pools[np_idx++] = NULL;
+ }
+
+ page_pools.epp_total_pages += npages;
+ page_pools.epp_free_pages += npages;
+ page_pools.epp_st_lowfree = page_pools.epp_free_pages;
+
+ if (page_pools.epp_total_pages > page_pools.epp_st_max_pages)
+ page_pools.epp_st_max_pages = page_pools.epp_total_pages;
+
+ CDEBUG(D_SEC, "add %d pages to total %lu\n", npages,
+ page_pools.epp_total_pages);
+
+ spin_unlock(&page_pools.epp_lock);
+}
+
+static int enc_pools_add_pages(int npages)
+{
+ static DEFINE_MUTEX(add_pages_mutex);
+ struct page ***pools;
+ int npools, alloced = 0;
+ int i, j, rc = -ENOMEM;
+
+ if (npages < PTLRPC_MAX_BRW_PAGES)
+ npages = PTLRPC_MAX_BRW_PAGES;
+
+ mutex_lock(&add_pages_mutex);
+
+ if (npages + page_pools.epp_total_pages > page_pools.epp_max_pages)
+ npages = page_pools.epp_max_pages - page_pools.epp_total_pages;
+ LASSERT(npages > 0);
+
+ page_pools.epp_st_grows++;
+
+ npools = npages_to_npools(npages);
+ OBD_ALLOC(pools, npools * sizeof(*pools));
+ if (pools == NULL)
+ goto out;
+
+ for (i = 0; i < npools; i++) {
+ OBD_ALLOC(pools[i], PAGE_CACHE_SIZE);
+ if (pools[i] == NULL)
+ goto out_pools;
+
+ for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) {
+ pools[i][j] = alloc_page(__GFP_IO |
+ __GFP_HIGHMEM);
+ if (pools[i][j] == NULL)
+ goto out_pools;
+
+ alloced++;
+ }
+ }
+ LASSERT(alloced == npages);
+
+ enc_pools_insert(pools, npools, npages);
+ CDEBUG(D_SEC, "added %d pages into pools\n", npages);
+ rc = 0;
+
+out_pools:
+ enc_pools_cleanup(pools, npools);
+ OBD_FREE(pools, npools * sizeof(*pools));
+out:
+ if (rc) {
+ page_pools.epp_st_grow_fails++;
+ CERROR("Failed to allocate %d enc pages\n", npages);
+ }
+
+ mutex_unlock(&add_pages_mutex);
+ return rc;
+}
+
+static inline void enc_pools_wakeup(void)
+{
+ LASSERT(spin_is_locked(&page_pools.epp_lock));
+ LASSERT(page_pools.epp_waitqlen >= 0);
+
+ if (unlikely(page_pools.epp_waitqlen)) {
+ LASSERT(waitqueue_active(&page_pools.epp_waitq));
+ wake_up_all(&page_pools.epp_waitq);
+ }
+}
+
+static int enc_pools_should_grow(int page_needed, long now)
+{
+ /* don't grow if someone else is growing the pools right now,
+ * or the pools has reached its full capacity
+ */
+ if (page_pools.epp_growing ||
+ page_pools.epp_total_pages == page_pools.epp_max_pages)
+ return 0;
+
+ /* if total pages is not enough, we need to grow */
+ if (page_pools.epp_total_pages < page_needed)
+ return 1;
+
+ /*
+ * we wanted to return 0 here if there was a shrink just happened
+ * moment ago, but this may cause deadlock if both client and ost
+ * live on single node.
+ */
+#if 0
+ if (now - page_pools.epp_last_shrink < 2)
+ return 0;
+#endif
+
+ /*
+ * here we perhaps need consider other factors like wait queue
+ * length, idle index, etc. ?
+ */
+
+ /* grow the pools in any other cases */
+ return 1;
+}
+
+/*
+ * we allocate the requested pages atomically.
+ */
+int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc)
+{
+ wait_queue_t waitlink;
+ unsigned long this_idle = -1;
+ cfs_time_t tick = 0;
+ long now;
+ int p_idx, g_idx;
+ int i;
+
+ LASSERT(desc->bd_iov_count > 0);
+ LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages);
+
+ /* resent bulk, enc iov might have been allocated previously */
+ if (desc->bd_enc_iov != NULL)
+ return 0;
+
+ OBD_ALLOC(desc->bd_enc_iov,
+ desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
+ if (desc->bd_enc_iov == NULL)
+ return -ENOMEM;
+
+ spin_lock(&page_pools.epp_lock);
+
+ page_pools.epp_st_access++;
+again:
+ if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) {
+ if (tick == 0)
+ tick = cfs_time_current();
+
+ now = cfs_time_current_sec();
+
+ page_pools.epp_st_missings++;
+ page_pools.epp_pages_short += desc->bd_iov_count;
+
+ if (enc_pools_should_grow(desc->bd_iov_count, now)) {
+ page_pools.epp_growing = 1;
+
+ spin_unlock(&page_pools.epp_lock);
+ enc_pools_add_pages(page_pools.epp_pages_short / 2);
+ spin_lock(&page_pools.epp_lock);
+
+ page_pools.epp_growing = 0;
+
+ enc_pools_wakeup();
+ } else {
+ if (++page_pools.epp_waitqlen >
+ page_pools.epp_st_max_wqlen)
+ page_pools.epp_st_max_wqlen =
+ page_pools.epp_waitqlen;
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ init_waitqueue_entry_current(&waitlink);
+ add_wait_queue(&page_pools.epp_waitq, &waitlink);
+
+ spin_unlock(&page_pools.epp_lock);
+ waitq_wait(&waitlink, TASK_UNINTERRUPTIBLE);
+ remove_wait_queue(&page_pools.epp_waitq, &waitlink);
+ LASSERT(page_pools.epp_waitqlen > 0);
+ spin_lock(&page_pools.epp_lock);
+ page_pools.epp_waitqlen--;
+ }
+
+ LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count);
+ page_pools.epp_pages_short -= desc->bd_iov_count;
+
+ this_idle = 0;
+ goto again;
+ }
+
+ /* record max wait time */
+ if (unlikely(tick != 0)) {
+ tick = cfs_time_current() - tick;
+ if (tick > page_pools.epp_st_max_wait)
+ page_pools.epp_st_max_wait = tick;
+ }
+
+ /* proceed with rest of allocation */
+ page_pools.epp_free_pages -= desc->bd_iov_count;
+
+ p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+ g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
+ desc->bd_enc_iov[i].kiov_page =
+ page_pools.epp_pools[p_idx][g_idx];
+ page_pools.epp_pools[p_idx][g_idx] = NULL;
+
+ if (++g_idx == PAGES_PER_POOL) {
+ p_idx++;
+ g_idx = 0;
+ }
+ }
+
+ if (page_pools.epp_free_pages < page_pools.epp_st_lowfree)
+ page_pools.epp_st_lowfree = page_pools.epp_free_pages;
+
+ /*
+ * new idle index = (old * weight + new) / (weight + 1)
+ */
+ if (this_idle == -1) {
+ this_idle = page_pools.epp_free_pages * IDLE_IDX_MAX /
+ page_pools.epp_total_pages;
+ }
+ page_pools.epp_idle_idx = (page_pools.epp_idle_idx * IDLE_IDX_WEIGHT +
+ this_idle) /
+ (IDLE_IDX_WEIGHT + 1);
+
+ page_pools.epp_last_access = cfs_time_current_sec();
+
+ spin_unlock(&page_pools.epp_lock);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_get_pages);
+
+void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
+{
+ int p_idx, g_idx;
+ int i;
+
+ if (desc->bd_enc_iov == NULL)
+ return;
+
+ LASSERT(desc->bd_iov_count > 0);
+
+ spin_lock(&page_pools.epp_lock);
+
+ p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+ g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+
+ LASSERT(page_pools.epp_free_pages + desc->bd_iov_count <=
+ page_pools.epp_total_pages);
+ LASSERT(page_pools.epp_pools[p_idx]);
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ LASSERT(desc->bd_enc_iov[i].kiov_page != NULL);
+ LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]);
+ LASSERT(page_pools.epp_pools[p_idx][g_idx] == NULL);
+
+ page_pools.epp_pools[p_idx][g_idx] =
+ desc->bd_enc_iov[i].kiov_page;
+
+ if (++g_idx == PAGES_PER_POOL) {
+ p_idx++;
+ g_idx = 0;
+ }
+ }
+
+ page_pools.epp_free_pages += desc->bd_iov_count;
+
+ enc_pools_wakeup();
+
+ spin_unlock(&page_pools.epp_lock);
+
+ OBD_FREE(desc->bd_enc_iov,
+ desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
+ desc->bd_enc_iov = NULL;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages);
+
+/*
+ * we don't do much stuff for add_user/del_user anymore, except adding some
+ * initial pages in add_user() if current pools are empty, rest would be
+ * handled by the pools's self-adaption.
+ */
+int sptlrpc_enc_pool_add_user(void)
+{
+ int need_grow = 0;
+
+ spin_lock(&page_pools.epp_lock);
+ if (page_pools.epp_growing == 0 && page_pools.epp_total_pages == 0) {
+ page_pools.epp_growing = 1;
+ need_grow = 1;
+ }
+ spin_unlock(&page_pools.epp_lock);
+
+ if (need_grow) {
+ enc_pools_add_pages(PTLRPC_MAX_BRW_PAGES +
+ PTLRPC_MAX_BRW_PAGES);
+
+ spin_lock(&page_pools.epp_lock);
+ page_pools.epp_growing = 0;
+ enc_pools_wakeup();
+ spin_unlock(&page_pools.epp_lock);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_add_user);
+
+int sptlrpc_enc_pool_del_user(void)
+{
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_del_user);
+
+static inline void enc_pools_alloc(void)
+{
+ LASSERT(page_pools.epp_max_pools);
+ OBD_ALLOC_LARGE(page_pools.epp_pools,
+ page_pools.epp_max_pools *
+ sizeof(*page_pools.epp_pools));
+}
+
+static inline void enc_pools_free(void)
+{
+ LASSERT(page_pools.epp_max_pools);
+ LASSERT(page_pools.epp_pools);
+
+ OBD_FREE_LARGE(page_pools.epp_pools,
+ page_pools.epp_max_pools *
+ sizeof(*page_pools.epp_pools));
+}
+
+int sptlrpc_enc_pool_init(void)
+{
+ /*
+ * maximum capacity is 1/8 of total physical memory.
+ * is the 1/8 a good number?
+ */
+ page_pools.epp_max_pages = num_physpages / 8;
+ page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages);
+
+ init_waitqueue_head(&page_pools.epp_waitq);
+ page_pools.epp_waitqlen = 0;
+ page_pools.epp_pages_short = 0;
+
+ page_pools.epp_growing = 0;
+
+ page_pools.epp_idle_idx = 0;
+ page_pools.epp_last_shrink = cfs_time_current_sec();
+ page_pools.epp_last_access = cfs_time_current_sec();
+
+ spin_lock_init(&page_pools.epp_lock);
+ page_pools.epp_total_pages = 0;
+ page_pools.epp_free_pages = 0;
+
+ page_pools.epp_st_max_pages = 0;
+ page_pools.epp_st_grows = 0;
+ page_pools.epp_st_grow_fails = 0;
+ page_pools.epp_st_shrinks = 0;
+ page_pools.epp_st_access = 0;
+ page_pools.epp_st_missings = 0;
+ page_pools.epp_st_lowfree = 0;
+ page_pools.epp_st_max_wqlen = 0;
+ page_pools.epp_st_max_wait = 0;
+
+ enc_pools_alloc();
+ if (page_pools.epp_pools == NULL)
+ return -ENOMEM;
+
+ pools_shrinker = set_shrinker(pools_shrinker_seeks,
+ enc_pools_shrink);
+ if (pools_shrinker == NULL) {
+ enc_pools_free();
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void sptlrpc_enc_pool_fini(void)
+{
+ unsigned long cleaned, npools;
+
+ LASSERT(pools_shrinker);
+ LASSERT(page_pools.epp_pools);
+ LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages);
+
+ remove_shrinker(pools_shrinker);
+
+ npools = npages_to_npools(page_pools.epp_total_pages);
+ cleaned = enc_pools_cleanup(page_pools.epp_pools, npools);
+ LASSERT(cleaned == page_pools.epp_total_pages);
+
+ enc_pools_free();
+
+ if (page_pools.epp_st_access > 0) {
+ CDEBUG(D_SEC,
+ "max pages %lu, grows %u, grow fails %u, shrinks %u, "
+ "access %lu, missing %lu, max qlen %u, max wait "
+ CFS_TIME_T"/%d\n",
+ page_pools.epp_st_max_pages, page_pools.epp_st_grows,
+ page_pools.epp_st_grow_fails,
+ page_pools.epp_st_shrinks, page_pools.epp_st_access,
+ page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
+ page_pools.epp_st_max_wait, HZ);
+ }
+}
+
+
+static int cfs_hash_alg_id[] = {
+ [BULK_HASH_ALG_NULL] = CFS_HASH_ALG_NULL,
+ [BULK_HASH_ALG_ADLER32] = CFS_HASH_ALG_ADLER32,
+ [BULK_HASH_ALG_CRC32] = CFS_HASH_ALG_CRC32,
+ [BULK_HASH_ALG_MD5] = CFS_HASH_ALG_MD5,
+ [BULK_HASH_ALG_SHA1] = CFS_HASH_ALG_SHA1,
+ [BULK_HASH_ALG_SHA256] = CFS_HASH_ALG_SHA256,
+ [BULK_HASH_ALG_SHA384] = CFS_HASH_ALG_SHA384,
+ [BULK_HASH_ALG_SHA512] = CFS_HASH_ALG_SHA512,
+};
+const char * sptlrpc_get_hash_name(__u8 hash_alg)
+{
+ return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]);
+}
+EXPORT_SYMBOL(sptlrpc_get_hash_name);
+
+__u8 sptlrpc_get_hash_alg(const char *algname)
+{
+ return cfs_crypto_hash_alg(algname);
+}
+EXPORT_SYMBOL(sptlrpc_get_hash_alg);
+
+int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed)
+{
+ struct ptlrpc_bulk_sec_desc *bsd;
+ int size = msg->lm_buflens[offset];
+
+ bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
+ if (bsd == NULL) {
+ CERROR("Invalid bulk sec desc: size %d\n", size);
+ return -EINVAL;
+ }
+
+ if (swabbed) {
+ __swab32s(&bsd->bsd_nob);
+ }
+
+ if (unlikely(bsd->bsd_version != 0)) {
+ CERROR("Unexpected version %u\n", bsd->bsd_version);
+ return -EPROTO;
+ }
+
+ if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) {
+ CERROR("Invalid type %u\n", bsd->bsd_type);
+ return -EPROTO;
+ }
+
+ /* FIXME more sanity check here */
+
+ if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
+ bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG &&
+ bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) {
+ CERROR("Invalid svc %u\n", bsd->bsd_svc);
+ return -EPROTO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(bulk_sec_desc_unpack);
+
+int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
+ void *buf, int buflen)
+{
+ struct cfs_crypto_hash_desc *hdesc;
+ int hashsize;
+ char hashbuf[64];
+ unsigned int bufsize;
+ int i, err;
+
+ LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX);
+ LASSERT(buflen >= 4);
+
+ hdesc = cfs_crypto_hash_init(cfs_hash_alg_id[alg], NULL, 0);
+ if (IS_ERR(hdesc)) {
+ CERROR("Unable to initialize checksum hash %s\n",
+ cfs_crypto_hash_name(cfs_hash_alg_id[alg]));
+ return PTR_ERR(hdesc);
+ }
+
+ hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]);
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page,
+ desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK,
+ desc->bd_iov[i].kiov_len);
+ }
+ if (hashsize > buflen) {
+ bufsize = sizeof(hashbuf);
+ err = cfs_crypto_hash_final(hdesc, (unsigned char *)hashbuf,
+ &bufsize);
+ memcpy(buf, hashbuf, buflen);
+ } else {
+ bufsize = buflen;
+ err = cfs_crypto_hash_final(hdesc, (unsigned char *)buf,
+ &bufsize);
+ }
+
+ if (err)
+ cfs_crypto_hash_final(hdesc, NULL, NULL);
+ return err;
+}
+EXPORT_SYMBOL(sptlrpc_get_bulk_checksum);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
new file mode 100644
index 000000000000..a45a3929b59f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
@@ -0,0 +1,1233 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/crypto.h>
+#include <linux/key.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_log.h>
+#include <lustre_disk.h>
+#include <lustre_dlm.h>
+#include <lustre_param.h>
+#include <lustre_sec.h>
+
+#include "ptlrpc_internal.h"
+
+const char *sptlrpc_part2name(enum lustre_sec_part part)
+{
+ switch (part) {
+ case LUSTRE_SP_CLI:
+ return "cli";
+ case LUSTRE_SP_MDT:
+ return "mdt";
+ case LUSTRE_SP_OST:
+ return "ost";
+ case LUSTRE_SP_MGC:
+ return "mgc";
+ case LUSTRE_SP_MGS:
+ return "mgs";
+ case LUSTRE_SP_ANY:
+ return "any";
+ default:
+ return "err";
+ }
+}
+EXPORT_SYMBOL(sptlrpc_part2name);
+
+enum lustre_sec_part sptlrpc_target_sec_part(struct obd_device *obd)
+{
+ const char *type = obd->obd_type->typ_name;
+
+ if (!strcmp(type, LUSTRE_MDT_NAME))
+ return LUSTRE_SP_MDT;
+ if (!strcmp(type, LUSTRE_OST_NAME))
+ return LUSTRE_SP_OST;
+ if (!strcmp(type, LUSTRE_MGS_NAME))
+ return LUSTRE_SP_MGS;
+
+ CERROR("unknown target %p(%s)\n", obd, type);
+ return LUSTRE_SP_ANY;
+}
+EXPORT_SYMBOL(sptlrpc_target_sec_part);
+
+/****************************************
+ * user supplied flavor string parsing *
+ ****************************************/
+
+/*
+ * format: <base_flavor>[-<bulk_type:alg_spec>]
+ */
+int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr)
+{
+ char buf[32];
+ char *bulk, *alg;
+
+ memset(flvr, 0, sizeof(*flvr));
+
+ if (str == NULL || str[0] == '\0') {
+ flvr->sf_rpc = SPTLRPC_FLVR_INVALID;
+ return 0;
+ }
+
+ strncpy(buf, str, sizeof(buf));
+ buf[sizeof(buf) - 1] = '\0';
+
+ bulk = strchr(buf, '-');
+ if (bulk)
+ *bulk++ = '\0';
+
+ flvr->sf_rpc = sptlrpc_name2flavor_base(buf);
+ if (flvr->sf_rpc == SPTLRPC_FLVR_INVALID)
+ goto err_out;
+
+ /*
+ * currently only base flavor "plain" can have bulk specification.
+ */
+ if (flvr->sf_rpc == SPTLRPC_FLVR_PLAIN) {
+ flvr->u_bulk.hash.hash_alg = BULK_HASH_ALG_ADLER32;
+ if (bulk) {
+ /*
+ * format: plain-hash:<hash_alg>
+ */
+ alg = strchr(bulk, ':');
+ if (alg == NULL)
+ goto err_out;
+ *alg++ = '\0';
+
+ if (strcmp(bulk, "hash"))
+ goto err_out;
+
+ flvr->u_bulk.hash.hash_alg = sptlrpc_get_hash_alg(alg);
+ if (flvr->u_bulk.hash.hash_alg >= BULK_HASH_ALG_MAX)
+ goto err_out;
+ }
+
+ if (flvr->u_bulk.hash.hash_alg == BULK_HASH_ALG_NULL)
+ flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_NULL);
+ else
+ flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_INTG);
+ } else {
+ if (bulk)
+ goto err_out;
+ }
+
+ flvr->sf_flags = 0;
+ return 0;
+
+err_out:
+ CERROR("invalid flavor string: %s\n", str);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(sptlrpc_parse_flavor);
+
+/****************************************
+ * configure rules *
+ ****************************************/
+
+static void get_default_flavor(struct sptlrpc_flavor *sf)
+{
+ memset(sf, 0, sizeof(*sf));
+
+ sf->sf_rpc = SPTLRPC_FLVR_NULL;
+ sf->sf_flags = 0;
+}
+
+static void sptlrpc_rule_init(struct sptlrpc_rule *rule)
+{
+ rule->sr_netid = LNET_NIDNET(LNET_NID_ANY);
+ rule->sr_from = LUSTRE_SP_ANY;
+ rule->sr_to = LUSTRE_SP_ANY;
+ rule->sr_padding = 0;
+
+ get_default_flavor(&rule->sr_flvr);
+}
+
+/*
+ * format: network[.direction]=flavor
+ */
+int sptlrpc_parse_rule(char *param, struct sptlrpc_rule *rule)
+{
+ char *flavor, *dir;
+ int rc;
+
+ sptlrpc_rule_init(rule);
+
+ flavor = strchr(param, '=');
+ if (flavor == NULL) {
+ CERROR("invalid param, no '='\n");
+ RETURN(-EINVAL);
+ }
+ *flavor++ = '\0';
+
+ dir = strchr(param, '.');
+ if (dir)
+ *dir++ = '\0';
+
+ /* 1.1 network */
+ if (strcmp(param, "default")) {
+ rule->sr_netid = libcfs_str2net(param);
+ if (rule->sr_netid == LNET_NIDNET(LNET_NID_ANY)) {
+ CERROR("invalid network name: %s\n", param);
+ RETURN(-EINVAL);
+ }
+ }
+
+ /* 1.2 direction */
+ if (dir) {
+ if (!strcmp(dir, "mdt2ost")) {
+ rule->sr_from = LUSTRE_SP_MDT;
+ rule->sr_to = LUSTRE_SP_OST;
+ } else if (!strcmp(dir, "mdt2mdt")) {
+ rule->sr_from = LUSTRE_SP_MDT;
+ rule->sr_to = LUSTRE_SP_MDT;
+ } else if (!strcmp(dir, "cli2ost")) {
+ rule->sr_from = LUSTRE_SP_CLI;
+ rule->sr_to = LUSTRE_SP_OST;
+ } else if (!strcmp(dir, "cli2mdt")) {
+ rule->sr_from = LUSTRE_SP_CLI;
+ rule->sr_to = LUSTRE_SP_MDT;
+ } else {
+ CERROR("invalid rule dir segment: %s\n", dir);
+ RETURN(-EINVAL);
+ }
+ }
+
+ /* 2.1 flavor */
+ rc = sptlrpc_parse_flavor(flavor, &rule->sr_flvr);
+ if (rc)
+ RETURN(-EINVAL);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(sptlrpc_parse_rule);
+
+void sptlrpc_rule_set_free(struct sptlrpc_rule_set *rset)
+{
+ LASSERT(rset->srs_nslot ||
+ (rset->srs_nrule == 0 && rset->srs_rules == NULL));
+
+ if (rset->srs_nslot) {
+ OBD_FREE(rset->srs_rules,
+ rset->srs_nslot * sizeof(*rset->srs_rules));
+ sptlrpc_rule_set_init(rset);
+ }
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_free);
+
+/*
+ * return 0 if the rule set could accomodate one more rule.
+ */
+int sptlrpc_rule_set_expand(struct sptlrpc_rule_set *rset)
+{
+ struct sptlrpc_rule *rules;
+ int nslot;
+
+ might_sleep();
+
+ if (rset->srs_nrule < rset->srs_nslot)
+ return 0;
+
+ nslot = rset->srs_nslot + 8;
+
+ /* better use realloc() if available */
+ OBD_ALLOC(rules, nslot * sizeof(*rset->srs_rules));
+ if (rules == NULL)
+ return -ENOMEM;
+
+ if (rset->srs_nrule) {
+ LASSERT(rset->srs_nslot && rset->srs_rules);
+ memcpy(rules, rset->srs_rules,
+ rset->srs_nrule * sizeof(*rset->srs_rules));
+
+ OBD_FREE(rset->srs_rules,
+ rset->srs_nslot * sizeof(*rset->srs_rules));
+ }
+
+ rset->srs_rules = rules;
+ rset->srs_nslot = nslot;
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_expand);
+
+static inline int rule_spec_dir(struct sptlrpc_rule *rule)
+{
+ return (rule->sr_from != LUSTRE_SP_ANY ||
+ rule->sr_to != LUSTRE_SP_ANY);
+}
+static inline int rule_spec_net(struct sptlrpc_rule *rule)
+{
+ return (rule->sr_netid != LNET_NIDNET(LNET_NID_ANY));
+}
+static inline int rule_match_dir(struct sptlrpc_rule *r1,
+ struct sptlrpc_rule *r2)
+{
+ return (r1->sr_from == r2->sr_from && r1->sr_to == r2->sr_to);
+}
+static inline int rule_match_net(struct sptlrpc_rule *r1,
+ struct sptlrpc_rule *r2)
+{
+ return (r1->sr_netid == r2->sr_netid);
+}
+
+/*
+ * merge @rule into @rset.
+ * the @rset slots might be expanded.
+ */
+int sptlrpc_rule_set_merge(struct sptlrpc_rule_set *rset,
+ struct sptlrpc_rule *rule)
+{
+ struct sptlrpc_rule *p = rset->srs_rules;
+ int spec_dir, spec_net;
+ int rc, n, match = 0;
+
+ might_sleep();
+
+ spec_net = rule_spec_net(rule);
+ spec_dir = rule_spec_dir(rule);
+
+ for (n = 0; n < rset->srs_nrule; n++) {
+ p = &rset->srs_rules[n];
+
+ /* test network match, if failed:
+ * - spec rule: skip rules which is also spec rule match, until
+ * we hit a wild rule, which means no more chance
+ * - wild rule: skip until reach the one which is also wild
+ * and matches
+ */
+ if (!rule_match_net(p, rule)) {
+ if (spec_net) {
+ if (rule_spec_net(p))
+ continue;
+ else
+ break;
+ } else {
+ continue;
+ }
+ }
+
+ /* test dir match, same logic as net matching */
+ if (!rule_match_dir(p, rule)) {
+ if (spec_dir) {
+ if (rule_spec_dir(p))
+ continue;
+ else
+ break;
+ } else {
+ continue;
+ }
+ }
+
+ /* find a match */
+ match = 1;
+ break;
+ }
+
+ if (match) {
+ LASSERT(n >= 0 && n < rset->srs_nrule);
+
+ if (rule->sr_flvr.sf_rpc == SPTLRPC_FLVR_INVALID) {
+ /* remove this rule */
+ if (n < rset->srs_nrule - 1)
+ memmove(&rset->srs_rules[n],
+ &rset->srs_rules[n + 1],
+ (rset->srs_nrule - n - 1) *
+ sizeof(*rule));
+ rset->srs_nrule--;
+ } else {
+ /* override the rule */
+ memcpy(&rset->srs_rules[n], rule, sizeof(*rule));
+ }
+ } else {
+ LASSERT(n >= 0 && n <= rset->srs_nrule);
+
+ if (rule->sr_flvr.sf_rpc != SPTLRPC_FLVR_INVALID) {
+ rc = sptlrpc_rule_set_expand(rset);
+ if (rc)
+ return rc;
+
+ if (n < rset->srs_nrule)
+ memmove(&rset->srs_rules[n + 1],
+ &rset->srs_rules[n],
+ (rset->srs_nrule - n) * sizeof(*rule));
+ memcpy(&rset->srs_rules[n], rule, sizeof(*rule));
+ rset->srs_nrule++;
+ } else {
+ CDEBUG(D_CONFIG, "ignore the unmatched deletion\n");
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_merge);
+
+/**
+ * given from/to/nid, determine a matching flavor in ruleset.
+ * return 1 if a match found, otherwise return 0.
+ */
+int sptlrpc_rule_set_choose(struct sptlrpc_rule_set *rset,
+ enum lustre_sec_part from,
+ enum lustre_sec_part to,
+ lnet_nid_t nid,
+ struct sptlrpc_flavor *sf)
+{
+ struct sptlrpc_rule *r;
+ int n;
+
+ for (n = 0; n < rset->srs_nrule; n++) {
+ r = &rset->srs_rules[n];
+
+ if (LNET_NIDNET(nid) != LNET_NIDNET(LNET_NID_ANY) &&
+ r->sr_netid != LNET_NIDNET(LNET_NID_ANY) &&
+ LNET_NIDNET(nid) != r->sr_netid)
+ continue;
+
+ if (from != LUSTRE_SP_ANY && r->sr_from != LUSTRE_SP_ANY &&
+ from != r->sr_from)
+ continue;
+
+ if (to != LUSTRE_SP_ANY && r->sr_to != LUSTRE_SP_ANY &&
+ to != r->sr_to)
+ continue;
+
+ *sf = r->sr_flvr;
+ return 1;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_choose);
+
+void sptlrpc_rule_set_dump(struct sptlrpc_rule_set *rset)
+{
+ struct sptlrpc_rule *r;
+ int n;
+
+ for (n = 0; n < rset->srs_nrule; n++) {
+ r = &rset->srs_rules[n];
+ CDEBUG(D_SEC, "<%02d> from %x to %x, net %x, rpc %x\n", n,
+ r->sr_from, r->sr_to, r->sr_netid, r->sr_flvr.sf_rpc);
+ }
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_dump);
+
+static int sptlrpc_rule_set_extract(struct sptlrpc_rule_set *gen,
+ struct sptlrpc_rule_set *tgt,
+ enum lustre_sec_part from,
+ enum lustre_sec_part to,
+ struct sptlrpc_rule_set *rset)
+{
+ struct sptlrpc_rule_set *src[2] = { gen, tgt };
+ struct sptlrpc_rule *rule;
+ int i, n, rc;
+
+ might_sleep();
+
+ /* merge general rules firstly, then target-specific rules */
+ for (i = 0; i < 2; i++) {
+ if (src[i] == NULL)
+ continue;
+
+ for (n = 0; n < src[i]->srs_nrule; n++) {
+ rule = &src[i]->srs_rules[n];
+
+ if (from != LUSTRE_SP_ANY &&
+ rule->sr_from != LUSTRE_SP_ANY &&
+ rule->sr_from != from)
+ continue;
+ if (to != LUSTRE_SP_ANY &&
+ rule->sr_to != LUSTRE_SP_ANY &&
+ rule->sr_to != to)
+ continue;
+
+ rc = sptlrpc_rule_set_merge(rset, rule);
+ if (rc) {
+ CERROR("can't merge: %d\n", rc);
+ return rc;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**********************************
+ * sptlrpc configuration support *
+ **********************************/
+
+struct sptlrpc_conf_tgt {
+ struct list_head sct_list;
+ char sct_name[MAX_OBD_NAME];
+ struct sptlrpc_rule_set sct_rset;
+};
+
+struct sptlrpc_conf {
+ struct list_head sc_list;
+ char sc_fsname[MTI_NAME_MAXLEN];
+ unsigned int sc_modified; /* modified during updating */
+ unsigned int sc_updated:1, /* updated copy from MGS */
+ sc_local:1; /* local copy from target */
+ struct sptlrpc_rule_set sc_rset; /* fs general rules */
+ struct list_head sc_tgts; /* target-specific rules */
+};
+
+static struct mutex sptlrpc_conf_lock;
+static LIST_HEAD(sptlrpc_confs);
+
+static inline int is_hex(char c)
+{
+ return ((c >= '0' && c <= '9') ||
+ (c >= 'a' && c <= 'f'));
+}
+
+static void target2fsname(const char *tgt, char *fsname, int buflen)
+{
+ const char *ptr;
+ int len;
+
+ ptr = strrchr(tgt, '-');
+ if (ptr) {
+ if ((strncmp(ptr, "-MDT", 4) != 0 &&
+ strncmp(ptr, "-OST", 4) != 0) ||
+ !is_hex(ptr[4]) || !is_hex(ptr[5]) ||
+ !is_hex(ptr[6]) || !is_hex(ptr[7]))
+ ptr = NULL;
+ }
+
+ /* if we didn't find the pattern, treat the whole string as fsname */
+ if (ptr == NULL)
+ len = strlen(tgt);
+ else
+ len = ptr - tgt;
+
+ len = min(len, buflen - 1);
+ memcpy(fsname, tgt, len);
+ fsname[len] = '\0';
+}
+
+static void sptlrpc_conf_free_rsets(struct sptlrpc_conf *conf)
+{
+ struct sptlrpc_conf_tgt *conf_tgt, *conf_tgt_next;
+
+ sptlrpc_rule_set_free(&conf->sc_rset);
+
+ list_for_each_entry_safe(conf_tgt, conf_tgt_next,
+ &conf->sc_tgts, sct_list) {
+ sptlrpc_rule_set_free(&conf_tgt->sct_rset);
+ list_del(&conf_tgt->sct_list);
+ OBD_FREE_PTR(conf_tgt);
+ }
+ LASSERT(list_empty(&conf->sc_tgts));
+
+ conf->sc_updated = 0;
+ conf->sc_local = 0;
+}
+
+static void sptlrpc_conf_free(struct sptlrpc_conf *conf)
+{
+ CDEBUG(D_SEC, "free sptlrpc conf %s\n", conf->sc_fsname);
+
+ sptlrpc_conf_free_rsets(conf);
+ list_del(&conf->sc_list);
+ OBD_FREE_PTR(conf);
+}
+
+static
+struct sptlrpc_conf_tgt *sptlrpc_conf_get_tgt(struct sptlrpc_conf *conf,
+ const char *name,
+ int create)
+{
+ struct sptlrpc_conf_tgt *conf_tgt;
+
+ list_for_each_entry(conf_tgt, &conf->sc_tgts, sct_list) {
+ if (strcmp(conf_tgt->sct_name, name) == 0)
+ return conf_tgt;
+ }
+
+ if (!create)
+ return NULL;
+
+ OBD_ALLOC_PTR(conf_tgt);
+ if (conf_tgt) {
+ strlcpy(conf_tgt->sct_name, name, sizeof(conf_tgt->sct_name));
+ sptlrpc_rule_set_init(&conf_tgt->sct_rset);
+ list_add(&conf_tgt->sct_list, &conf->sc_tgts);
+ }
+
+ return conf_tgt;
+}
+
+static
+struct sptlrpc_conf *sptlrpc_conf_get(const char *fsname,
+ int create)
+{
+ struct sptlrpc_conf *conf;
+
+ list_for_each_entry(conf, &sptlrpc_confs, sc_list) {
+ if (strcmp(conf->sc_fsname, fsname) == 0)
+ return conf;
+ }
+
+ if (!create)
+ return NULL;
+
+ OBD_ALLOC_PTR(conf);
+ if (conf == NULL)
+ return NULL;
+
+ strcpy(conf->sc_fsname, fsname);
+ sptlrpc_rule_set_init(&conf->sc_rset);
+ INIT_LIST_HEAD(&conf->sc_tgts);
+ list_add(&conf->sc_list, &sptlrpc_confs);
+
+ CDEBUG(D_SEC, "create sptlrpc conf %s\n", conf->sc_fsname);
+ return conf;
+}
+
+/**
+ * caller must hold conf_lock already.
+ */
+static int sptlrpc_conf_merge_rule(struct sptlrpc_conf *conf,
+ const char *target,
+ struct sptlrpc_rule *rule)
+{
+ struct sptlrpc_conf_tgt *conf_tgt;
+ struct sptlrpc_rule_set *rule_set;
+
+ /* fsname == target means general rules for the whole fs */
+ if (strcmp(conf->sc_fsname, target) == 0) {
+ rule_set = &conf->sc_rset;
+ } else {
+ conf_tgt = sptlrpc_conf_get_tgt(conf, target, 1);
+ if (conf_tgt) {
+ rule_set = &conf_tgt->sct_rset;
+ } else {
+ CERROR("out of memory, can't merge rule!\n");
+ return -ENOMEM;
+ }
+ }
+
+ return sptlrpc_rule_set_merge(rule_set, rule);
+}
+
+/**
+ * process one LCFG_SPTLRPC_CONF record. if \a conf is NULL, we
+ * find one through the target name in the record inside conf_lock;
+ * otherwise means caller already hold conf_lock.
+ */
+static int __sptlrpc_process_config(struct lustre_cfg *lcfg,
+ struct sptlrpc_conf *conf)
+{
+ char *target, *param;
+ char fsname[MTI_NAME_MAXLEN];
+ struct sptlrpc_rule rule;
+ int rc;
+ ENTRY;
+
+ target = lustre_cfg_string(lcfg, 1);
+ if (target == NULL) {
+ CERROR("missing target name\n");
+ RETURN(-EINVAL);
+ }
+
+ param = lustre_cfg_string(lcfg, 2);
+ if (param == NULL) {
+ CERROR("missing parameter\n");
+ RETURN(-EINVAL);
+ }
+
+ CDEBUG(D_SEC, "processing rule: %s.%s\n", target, param);
+
+ /* parse rule to make sure the format is correct */
+ if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) {
+ CERROR("Invalid sptlrpc parameter: %s\n", param);
+ RETURN(-EINVAL);
+ }
+ param += sizeof(PARAM_SRPC_FLVR) - 1;
+
+ rc = sptlrpc_parse_rule(param, &rule);
+ if (rc)
+ RETURN(-EINVAL);
+
+ if (conf == NULL) {
+ target2fsname(target, fsname, sizeof(fsname));
+
+ mutex_lock(&sptlrpc_conf_lock);
+ conf = sptlrpc_conf_get(fsname, 0);
+ if (conf == NULL) {
+ CERROR("can't find conf\n");
+ rc = -ENOMEM;
+ } else {
+ rc = sptlrpc_conf_merge_rule(conf, target, &rule);
+ }
+ mutex_unlock(&sptlrpc_conf_lock);
+ } else {
+ LASSERT(mutex_is_locked(&sptlrpc_conf_lock));
+ rc = sptlrpc_conf_merge_rule(conf, target, &rule);
+ }
+
+ if (rc == 0)
+ conf->sc_modified++;
+
+ RETURN(rc);
+}
+
+int sptlrpc_process_config(struct lustre_cfg *lcfg)
+{
+ return __sptlrpc_process_config(lcfg, NULL);
+}
+EXPORT_SYMBOL(sptlrpc_process_config);
+
+static int logname2fsname(const char *logname, char *buf, int buflen)
+{
+ char *ptr;
+ int len;
+
+ ptr = strrchr(logname, '-');
+ if (ptr == NULL || strcmp(ptr, "-sptlrpc")) {
+ CERROR("%s is not a sptlrpc config log\n", logname);
+ return -EINVAL;
+ }
+
+ len = min((int) (ptr - logname), buflen - 1);
+
+ memcpy(buf, logname, len);
+ buf[len] = '\0';
+ return 0;
+}
+
+void sptlrpc_conf_log_update_begin(const char *logname)
+{
+ struct sptlrpc_conf *conf;
+ char fsname[16];
+
+ if (logname2fsname(logname, fsname, sizeof(fsname)))
+ return;
+
+ mutex_lock(&sptlrpc_conf_lock);
+
+ conf = sptlrpc_conf_get(fsname, 0);
+ if (conf && conf->sc_local) {
+ LASSERT(conf->sc_updated == 0);
+ sptlrpc_conf_free_rsets(conf);
+ }
+ conf->sc_modified = 0;
+
+ mutex_unlock(&sptlrpc_conf_lock);
+}
+EXPORT_SYMBOL(sptlrpc_conf_log_update_begin);
+
+/**
+ * mark a config log has been updated
+ */
+void sptlrpc_conf_log_update_end(const char *logname)
+{
+ struct sptlrpc_conf *conf;
+ char fsname[16];
+
+ if (logname2fsname(logname, fsname, sizeof(fsname)))
+ return;
+
+ mutex_lock(&sptlrpc_conf_lock);
+
+ conf = sptlrpc_conf_get(fsname, 0);
+ if (conf) {
+ /*
+ * if original state is not updated, make sure the
+ * modified counter > 0 to enforce updating local copy.
+ */
+ if (conf->sc_updated == 0)
+ conf->sc_modified++;
+
+ conf->sc_updated = 1;
+ }
+
+ mutex_unlock(&sptlrpc_conf_lock);
+}
+EXPORT_SYMBOL(sptlrpc_conf_log_update_end);
+
+void sptlrpc_conf_log_start(const char *logname)
+{
+ char fsname[16];
+
+ if (logname2fsname(logname, fsname, sizeof(fsname)))
+ return;
+
+ mutex_lock(&sptlrpc_conf_lock);
+ sptlrpc_conf_get(fsname, 1);
+ mutex_unlock(&sptlrpc_conf_lock);
+}
+EXPORT_SYMBOL(sptlrpc_conf_log_start);
+
+void sptlrpc_conf_log_stop(const char *logname)
+{
+ struct sptlrpc_conf *conf;
+ char fsname[16];
+
+ if (logname2fsname(logname, fsname, sizeof(fsname)))
+ return;
+
+ mutex_lock(&sptlrpc_conf_lock);
+ conf = sptlrpc_conf_get(fsname, 0);
+ if (conf)
+ sptlrpc_conf_free(conf);
+ mutex_unlock(&sptlrpc_conf_lock);
+}
+EXPORT_SYMBOL(sptlrpc_conf_log_stop);
+
+static void inline flavor_set_flags(struct sptlrpc_flavor *sf,
+ enum lustre_sec_part from,
+ enum lustre_sec_part to,
+ unsigned int fl_udesc)
+{
+ /*
+ * null flavor doesn't need to set any flavor, and in fact
+ * we'd better not do that because everybody share a single sec.
+ */
+ if (sf->sf_rpc == SPTLRPC_FLVR_NULL)
+ return;
+
+ if (from == LUSTRE_SP_MDT) {
+ /* MDT->MDT; MDT->OST */
+ sf->sf_flags |= PTLRPC_SEC_FL_ROOTONLY;
+ } else if (from == LUSTRE_SP_CLI && to == LUSTRE_SP_OST) {
+ /* CLI->OST */
+ sf->sf_flags |= PTLRPC_SEC_FL_ROOTONLY | PTLRPC_SEC_FL_BULK;
+ } else if (from == LUSTRE_SP_CLI && to == LUSTRE_SP_MDT) {
+ /* CLI->MDT */
+ if (fl_udesc && sf->sf_rpc != SPTLRPC_FLVR_NULL)
+ sf->sf_flags |= PTLRPC_SEC_FL_UDESC;
+ }
+}
+
+void sptlrpc_conf_choose_flavor(enum lustre_sec_part from,
+ enum lustre_sec_part to,
+ struct obd_uuid *target,
+ lnet_nid_t nid,
+ struct sptlrpc_flavor *sf)
+{
+ struct sptlrpc_conf *conf;
+ struct sptlrpc_conf_tgt *conf_tgt;
+ char name[MTI_NAME_MAXLEN];
+ int len, rc = 0;
+
+ target2fsname(target->uuid, name, sizeof(name));
+
+ mutex_lock(&sptlrpc_conf_lock);
+
+ conf = sptlrpc_conf_get(name, 0);
+ if (conf == NULL)
+ goto out;
+
+ /* convert uuid name (supposed end with _UUID) to target name */
+ len = strlen(target->uuid);
+ LASSERT(len > 5);
+ memcpy(name, target->uuid, len - 5);
+ name[len - 5] = '\0';
+
+ conf_tgt = sptlrpc_conf_get_tgt(conf, name, 0);
+ if (conf_tgt) {
+ rc = sptlrpc_rule_set_choose(&conf_tgt->sct_rset,
+ from, to, nid, sf);
+ if (rc)
+ goto out;
+ }
+
+ rc = sptlrpc_rule_set_choose(&conf->sc_rset, from, to, nid, sf);
+out:
+ mutex_unlock(&sptlrpc_conf_lock);
+
+ if (rc == 0)
+ get_default_flavor(sf);
+
+ flavor_set_flags(sf, from, to, 1);
+}
+
+/**
+ * called by target devices, determine the expected flavor from
+ * certain peer (from, nid).
+ */
+void sptlrpc_target_choose_flavor(struct sptlrpc_rule_set *rset,
+ enum lustre_sec_part from,
+ lnet_nid_t nid,
+ struct sptlrpc_flavor *sf)
+{
+ if (sptlrpc_rule_set_choose(rset, from, LUSTRE_SP_ANY, nid, sf) == 0)
+ get_default_flavor(sf);
+}
+EXPORT_SYMBOL(sptlrpc_target_choose_flavor);
+
+#define SEC_ADAPT_DELAY (10)
+
+/**
+ * called by client devices, notify the sptlrpc config has changed and
+ * do import_sec_adapt later.
+ */
+void sptlrpc_conf_client_adapt(struct obd_device *obd)
+{
+ struct obd_import *imp;
+ ENTRY;
+
+ LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
+ strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) ==0);
+ CDEBUG(D_SEC, "obd %s\n", obd->u.cli.cl_target_uuid.uuid);
+
+ /* serialize with connect/disconnect import */
+ down_read(&obd->u.cli.cl_sem);
+
+ imp = obd->u.cli.cl_import;
+ if (imp) {
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_sec)
+ imp->imp_sec_expire = cfs_time_current_sec() +
+ SEC_ADAPT_DELAY;
+ spin_unlock(&imp->imp_lock);
+ }
+
+ up_read(&obd->u.cli.cl_sem);
+ EXIT;
+}
+EXPORT_SYMBOL(sptlrpc_conf_client_adapt);
+
+
+static void rule2string(struct sptlrpc_rule *r, char *buf, int buflen)
+{
+ char dirbuf[8];
+ char *net;
+ char *ptr = buf;
+
+ if (r->sr_netid == LNET_NIDNET(LNET_NID_ANY))
+ net = "default";
+ else
+ net = libcfs_net2str(r->sr_netid);
+
+ if (r->sr_from == LUSTRE_SP_ANY && r->sr_to == LUSTRE_SP_ANY)
+ dirbuf[0] = '\0';
+ else
+ snprintf(dirbuf, sizeof(dirbuf), ".%s2%s",
+ sptlrpc_part2name(r->sr_from),
+ sptlrpc_part2name(r->sr_to));
+
+ ptr += snprintf(buf, buflen, "srpc.flavor.%s%s=", net, dirbuf);
+
+ sptlrpc_flavor2name(&r->sr_flvr, ptr, buflen - (ptr - buf));
+ buf[buflen - 1] = '\0';
+}
+
+static int sptlrpc_record_rule_set(struct llog_handle *llh,
+ char *target,
+ struct sptlrpc_rule_set *rset)
+{
+ struct lustre_cfg_bufs bufs;
+ struct lustre_cfg *lcfg;
+ struct llog_rec_hdr rec;
+ int buflen;
+ char param[48];
+ int i, rc;
+
+ for (i = 0; i < rset->srs_nrule; i++) {
+ rule2string(&rset->srs_rules[i], param, sizeof(param));
+
+ lustre_cfg_bufs_reset(&bufs, NULL);
+ lustre_cfg_bufs_set_string(&bufs, 1, target);
+ lustre_cfg_bufs_set_string(&bufs, 2, param);
+ lcfg = lustre_cfg_new(LCFG_SPTLRPC_CONF, &bufs);
+ LASSERT(lcfg);
+
+ buflen = lustre_cfg_len(lcfg->lcfg_bufcount,
+ lcfg->lcfg_buflens);
+ rec.lrh_len = llog_data_len(buflen);
+ rec.lrh_type = OBD_CFG_REC;
+ rc = llog_write(NULL, llh, &rec, NULL, 0, (void *)lcfg, -1);
+ if (rc)
+ CERROR("failed to write a rec: rc = %d\n", rc);
+ lustre_cfg_free(lcfg);
+ }
+ return 0;
+}
+
+static int sptlrpc_record_rules(struct llog_handle *llh,
+ struct sptlrpc_conf *conf)
+{
+ struct sptlrpc_conf_tgt *conf_tgt;
+
+ sptlrpc_record_rule_set(llh, conf->sc_fsname, &conf->sc_rset);
+
+ list_for_each_entry(conf_tgt, &conf->sc_tgts, sct_list) {
+ sptlrpc_record_rule_set(llh, conf_tgt->sct_name,
+ &conf_tgt->sct_rset);
+ }
+ return 0;
+}
+
+#define LOG_SPTLRPC_TMP "sptlrpc.tmp"
+#define LOG_SPTLRPC "sptlrpc"
+
+static
+int sptlrpc_target_local_copy_conf(struct obd_device *obd,
+ struct sptlrpc_conf *conf)
+{
+ struct llog_handle *llh = NULL;
+ struct llog_ctxt *ctxt;
+ struct lvfs_run_ctxt saved;
+ struct dentry *dentry;
+ int rc;
+ ENTRY;
+
+ ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
+ if (ctxt == NULL)
+ RETURN(-EINVAL);
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ dentry = ll_lookup_one_len(MOUNT_CONFIGS_DIR, cfs_fs_pwd(current->fs),
+ strlen(MOUNT_CONFIGS_DIR));
+ if (IS_ERR(dentry)) {
+ rc = PTR_ERR(dentry);
+ CERROR("cannot lookup %s directory: rc = %d\n",
+ MOUNT_CONFIGS_DIR, rc);
+ GOTO(out_ctx, rc);
+ }
+
+ /* erase the old tmp log */
+ rc = llog_erase(NULL, ctxt, NULL, LOG_SPTLRPC_TMP);
+ if (rc < 0 && rc != -ENOENT) {
+ CERROR("%s: cannot erase temporary sptlrpc log: rc = %d\n",
+ obd->obd_name, rc);
+ GOTO(out_dput, rc);
+ }
+
+ /* write temporary log */
+ rc = llog_open_create(NULL, ctxt, &llh, NULL, LOG_SPTLRPC_TMP);
+ if (rc)
+ GOTO(out_dput, rc);
+ rc = llog_init_handle(NULL, llh, LLOG_F_IS_PLAIN, NULL);
+ if (rc)
+ GOTO(out_close, rc);
+
+ rc = sptlrpc_record_rules(llh, conf);
+
+out_close:
+ llog_close(NULL, llh);
+ if (rc == 0)
+ rc = lustre_rename(dentry, obd->obd_lvfs_ctxt.pwdmnt,
+ LOG_SPTLRPC_TMP, LOG_SPTLRPC);
+out_dput:
+ l_dput(dentry);
+out_ctx:
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ llog_ctxt_put(ctxt);
+ CDEBUG(D_SEC, "target %s: write local sptlrpc conf: rc = %d\n",
+ obd->obd_name, rc);
+ RETURN(rc);
+}
+
+static int local_read_handler(const struct lu_env *env,
+ struct llog_handle *llh,
+ struct llog_rec_hdr *rec, void *data)
+{
+ struct sptlrpc_conf *conf = (struct sptlrpc_conf *) data;
+ struct lustre_cfg *lcfg = (struct lustre_cfg *)(rec + 1);
+ int cfg_len, rc;
+ ENTRY;
+
+ if (rec->lrh_type != OBD_CFG_REC) {
+ CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
+ RETURN(-EINVAL);
+ }
+
+ cfg_len = rec->lrh_len - sizeof(struct llog_rec_hdr) -
+ sizeof(struct llog_rec_tail);
+
+ rc = lustre_cfg_sanity_check(lcfg, cfg_len);
+ if (rc) {
+ CERROR("Insane cfg\n");
+ RETURN(rc);
+ }
+
+ if (lcfg->lcfg_command != LCFG_SPTLRPC_CONF) {
+ CERROR("invalid command (%x)\n", lcfg->lcfg_command);
+ RETURN(-EINVAL);
+ }
+
+ RETURN(__sptlrpc_process_config(lcfg, conf));
+}
+
+static
+int sptlrpc_target_local_read_conf(struct obd_device *obd,
+ struct sptlrpc_conf *conf)
+{
+ struct llog_handle *llh = NULL;
+ struct llog_ctxt *ctxt;
+ struct lvfs_run_ctxt saved;
+ int rc;
+ ENTRY;
+
+ LASSERT(conf->sc_updated == 0 && conf->sc_local == 0);
+
+ ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
+ if (ctxt == NULL) {
+ CERROR("missing llog context\n");
+ RETURN(-EINVAL);
+ }
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ rc = llog_open(NULL, ctxt, &llh, NULL, LOG_SPTLRPC, LLOG_OPEN_EXISTS);
+ if (rc < 0) {
+ if (rc == -ENOENT)
+ rc = 0;
+ GOTO(out_pop, rc);
+ }
+
+ rc = llog_init_handle(NULL, llh, LLOG_F_IS_PLAIN, NULL);
+ if (rc)
+ GOTO(out_close, rc);
+
+ if (llog_get_size(llh) <= 1) {
+ CDEBUG(D_SEC, "no local sptlrpc copy found\n");
+ GOTO(out_close, rc = 0);
+ }
+
+ rc = llog_process(NULL, llh, local_read_handler, (void *)conf, NULL);
+
+ if (rc == 0) {
+ conf->sc_local = 1;
+ } else {
+ sptlrpc_conf_free_rsets(conf);
+ }
+
+out_close:
+ llog_close(NULL, llh);
+out_pop:
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ llog_ctxt_put(ctxt);
+ CDEBUG(D_SEC, "target %s: read local sptlrpc conf: rc = %d\n",
+ obd->obd_name, rc);
+ RETURN(rc);
+}
+
+
+/**
+ * called by target devices, extract sptlrpc rules which applies to
+ * this target, to be used for future rpc flavor checking.
+ */
+int sptlrpc_conf_target_get_rules(struct obd_device *obd,
+ struct sptlrpc_rule_set *rset,
+ int initial)
+{
+ struct sptlrpc_conf *conf;
+ struct sptlrpc_conf_tgt *conf_tgt;
+ enum lustre_sec_part sp_dst;
+ char fsname[MTI_NAME_MAXLEN];
+ int rc = 0;
+ ENTRY;
+
+ if (strcmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME) == 0) {
+ sp_dst = LUSTRE_SP_MDT;
+ } else if (strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME) == 0) {
+ sp_dst = LUSTRE_SP_OST;
+ } else {
+ CERROR("unexpected obd type %s\n", obd->obd_type->typ_name);
+ RETURN(-EINVAL);
+ }
+ CDEBUG(D_SEC, "get rules for target %s\n", obd->obd_uuid.uuid);
+
+ target2fsname(obd->obd_uuid.uuid, fsname, sizeof(fsname));
+
+ mutex_lock(&sptlrpc_conf_lock);
+
+ conf = sptlrpc_conf_get(fsname, 0);
+ if (conf == NULL) {
+ CERROR("missing sptlrpc config log\n");
+ GOTO(out, rc);
+ }
+
+ if (conf->sc_updated == 0) {
+ /*
+ * always read from local copy. here another option is
+ * if we already have a local copy (read from another
+ * target device hosted on the same node) we simply use that.
+ */
+ if (conf->sc_local)
+ sptlrpc_conf_free_rsets(conf);
+
+ sptlrpc_target_local_read_conf(obd, conf);
+ } else {
+ LASSERT(conf->sc_local == 0);
+
+ /* write a local copy */
+ if (initial || conf->sc_modified)
+ sptlrpc_target_local_copy_conf(obd, conf);
+ else
+ CDEBUG(D_SEC, "unchanged, skip updating local copy\n");
+ }
+
+ /* extract rule set for this target */
+ conf_tgt = sptlrpc_conf_get_tgt(conf, obd->obd_name, 0);
+
+ rc = sptlrpc_rule_set_extract(&conf->sc_rset,
+ conf_tgt ? &conf_tgt->sct_rset: NULL,
+ LUSTRE_SP_ANY, sp_dst, rset);
+out:
+ mutex_unlock(&sptlrpc_conf_lock);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(sptlrpc_conf_target_get_rules);
+
+int sptlrpc_conf_init(void)
+{
+ mutex_init(&sptlrpc_conf_lock);
+ return 0;
+}
+
+void sptlrpc_conf_fini(void)
+{
+ struct sptlrpc_conf *conf, *conf_next;
+
+ mutex_lock(&sptlrpc_conf_lock);
+ list_for_each_entry_safe(conf, conf_next, &sptlrpc_confs, sc_list) {
+ sptlrpc_conf_free(conf);
+ }
+ LASSERT(list_empty(&sptlrpc_confs));
+ mutex_unlock(&sptlrpc_conf_lock);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
new file mode 100644
index 000000000000..4c96a14a1bb6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
@@ -0,0 +1,250 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_gc.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <linux/libcfs/libcfs.h>
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_sec.h>
+
+#define SEC_GC_INTERVAL (30 * 60)
+
+
+static struct mutex sec_gc_mutex;
+static LIST_HEAD(sec_gc_list);
+static spinlock_t sec_gc_list_lock;
+
+static LIST_HEAD(sec_gc_ctx_list);
+static spinlock_t sec_gc_ctx_list_lock;
+
+static struct ptlrpc_thread sec_gc_thread;
+static atomic_t sec_gc_wait_del = ATOMIC_INIT(0);
+
+
+void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec)
+{
+ LASSERT(sec->ps_policy->sp_cops->gc_ctx);
+ LASSERT(sec->ps_gc_interval > 0);
+ LASSERT(list_empty(&sec->ps_gc_list));
+
+ sec->ps_gc_next = cfs_time_current_sec() + sec->ps_gc_interval;
+
+ spin_lock(&sec_gc_list_lock);
+ list_add_tail(&sec_gc_list, &sec->ps_gc_list);
+ spin_unlock(&sec_gc_list_lock);
+
+ CDEBUG(D_SEC, "added sec %p(%s)\n", sec, sec->ps_policy->sp_name);
+}
+EXPORT_SYMBOL(sptlrpc_gc_add_sec);
+
+void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec)
+{
+ if (list_empty(&sec->ps_gc_list))
+ return;
+
+ might_sleep();
+
+ /* signal before list_del to make iteration in gc thread safe */
+ atomic_inc(&sec_gc_wait_del);
+
+ spin_lock(&sec_gc_list_lock);
+ list_del_init(&sec->ps_gc_list);
+ spin_unlock(&sec_gc_list_lock);
+
+ /* barrier */
+ mutex_lock(&sec_gc_mutex);
+ mutex_unlock(&sec_gc_mutex);
+
+ atomic_dec(&sec_gc_wait_del);
+
+ CDEBUG(D_SEC, "del sec %p(%s)\n", sec, sec->ps_policy->sp_name);
+}
+EXPORT_SYMBOL(sptlrpc_gc_del_sec);
+
+void sptlrpc_gc_add_ctx(struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(list_empty(&ctx->cc_gc_chain));
+
+ CDEBUG(D_SEC, "hand over ctx %p(%u->%s)\n",
+ ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+ spin_lock(&sec_gc_ctx_list_lock);
+ list_add(&ctx->cc_gc_chain, &sec_gc_ctx_list);
+ spin_unlock(&sec_gc_ctx_list_lock);
+
+ thread_add_flags(&sec_gc_thread, SVC_SIGNAL);
+ wake_up(&sec_gc_thread.t_ctl_waitq);
+}
+EXPORT_SYMBOL(sptlrpc_gc_add_ctx);
+
+static void sec_process_ctx_list(void)
+{
+ struct ptlrpc_cli_ctx *ctx;
+
+ spin_lock(&sec_gc_ctx_list_lock);
+
+ while (!list_empty(&sec_gc_ctx_list)) {
+ ctx = list_entry(sec_gc_ctx_list.next,
+ struct ptlrpc_cli_ctx, cc_gc_chain);
+ list_del_init(&ctx->cc_gc_chain);
+ spin_unlock(&sec_gc_ctx_list_lock);
+
+ LASSERT(ctx->cc_sec);
+ LASSERT(atomic_read(&ctx->cc_refcount) == 1);
+ CDEBUG(D_SEC, "gc pick up ctx %p(%u->%s)\n",
+ ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+ sptlrpc_cli_ctx_put(ctx, 1);
+
+ spin_lock(&sec_gc_ctx_list_lock);
+ }
+
+ spin_unlock(&sec_gc_ctx_list_lock);
+}
+
+static void sec_do_gc(struct ptlrpc_sec *sec)
+{
+ LASSERT(sec->ps_policy->sp_cops->gc_ctx);
+
+ if (unlikely(sec->ps_gc_next == 0)) {
+ CDEBUG(D_SEC, "sec %p(%s) has 0 gc time\n",
+ sec, sec->ps_policy->sp_name);
+ return;
+ }
+
+ CDEBUG(D_SEC, "check on sec %p(%s)\n", sec, sec->ps_policy->sp_name);
+
+ if (cfs_time_after(sec->ps_gc_next, cfs_time_current_sec()))
+ return;
+
+ sec->ps_policy->sp_cops->gc_ctx(sec);
+ sec->ps_gc_next = cfs_time_current_sec() + sec->ps_gc_interval;
+}
+
+static int sec_gc_main(void *arg)
+{
+ struct ptlrpc_thread *thread = (struct ptlrpc_thread *) arg;
+ struct l_wait_info lwi;
+
+ unshare_fs_struct();
+
+ /* Record that the thread is running */
+ thread_set_flags(thread, SVC_RUNNING);
+ wake_up(&thread->t_ctl_waitq);
+
+ while (1) {
+ struct ptlrpc_sec *sec;
+
+ thread_clear_flags(thread, SVC_SIGNAL);
+ sec_process_ctx_list();
+again:
+ /* go through sec list do gc.
+ * FIXME here we iterate through the whole list each time which
+ * is not optimal. we perhaps want to use balanced binary tree
+ * to trace each sec as order of expiry time.
+ * another issue here is we wakeup as fixed interval instead of
+ * according to each sec's expiry time */
+ mutex_lock(&sec_gc_mutex);
+ list_for_each_entry(sec, &sec_gc_list, ps_gc_list) {
+ /* if someone is waiting to be deleted, let it
+ * proceed as soon as possible. */
+ if (atomic_read(&sec_gc_wait_del)) {
+ CDEBUG(D_SEC, "deletion pending, start over\n");
+ mutex_unlock(&sec_gc_mutex);
+ goto again;
+ }
+
+ sec_do_gc(sec);
+ }
+ mutex_unlock(&sec_gc_mutex);
+
+ /* check ctx list again before sleep */
+ sec_process_ctx_list();
+
+ lwi = LWI_TIMEOUT(SEC_GC_INTERVAL * HZ, NULL, NULL);
+ l_wait_event(thread->t_ctl_waitq,
+ thread_is_stopping(thread) ||
+ thread_is_signal(thread),
+ &lwi);
+
+ if (thread_test_and_clear_flags(thread, SVC_STOPPING))
+ break;
+ }
+
+ thread_set_flags(thread, SVC_STOPPED);
+ wake_up(&thread->t_ctl_waitq);
+ return 0;
+}
+
+int sptlrpc_gc_init(void)
+{
+ struct l_wait_info lwi = { 0 };
+ task_t *task;
+
+ mutex_init(&sec_gc_mutex);
+ spin_lock_init(&sec_gc_list_lock);
+ spin_lock_init(&sec_gc_ctx_list_lock);
+
+ /* initialize thread control */
+ memset(&sec_gc_thread, 0, sizeof(sec_gc_thread));
+ init_waitqueue_head(&sec_gc_thread.t_ctl_waitq);
+
+ task = kthread_run(sec_gc_main, &sec_gc_thread, "sptlrpc_gc");
+ if (IS_ERR(task)) {
+ CERROR("can't start gc thread: %ld\n", PTR_ERR(task));
+ return PTR_ERR(task);
+ }
+
+ l_wait_event(sec_gc_thread.t_ctl_waitq,
+ thread_is_running(&sec_gc_thread), &lwi);
+ return 0;
+}
+
+void sptlrpc_gc_fini(void)
+{
+ struct l_wait_info lwi = { 0 };
+
+ thread_set_flags(&sec_gc_thread, SVC_STOPPING);
+ wake_up(&sec_gc_thread.t_ctl_waitq);
+
+ l_wait_event(sec_gc_thread.t_ctl_waitq,
+ thread_is_stopped(&sec_gc_thread), &lwi);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
new file mode 100644
index 000000000000..1213621ca5aa
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
@@ -0,0 +1,199 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_lproc.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/crypto.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_dlm.h>
+#include <lustre_sec.h>
+
+#include "ptlrpc_internal.h"
+
+
+struct proc_dir_entry *sptlrpc_proc_root = NULL;
+EXPORT_SYMBOL(sptlrpc_proc_root);
+
+char *sec_flags2str(unsigned long flags, char *buf, int bufsize)
+{
+ buf[0] = '\0';
+
+ if (flags & PTLRPC_SEC_FL_REVERSE)
+ strlcat(buf, "reverse,", bufsize);
+ if (flags & PTLRPC_SEC_FL_ROOTONLY)
+ strlcat(buf, "rootonly,", bufsize);
+ if (flags & PTLRPC_SEC_FL_UDESC)
+ strlcat(buf, "udesc,", bufsize);
+ if (flags & PTLRPC_SEC_FL_BULK)
+ strlcat(buf, "bulk,", bufsize);
+ if (buf[0] == '\0')
+ strlcat(buf, "-,", bufsize);
+
+ return buf;
+}
+
+static int sptlrpc_info_lprocfs_seq_show(struct seq_file *seq, void *v)
+{
+ struct obd_device *dev = seq->private;
+ struct client_obd *cli = &dev->u.cli;
+ struct ptlrpc_sec *sec = NULL;
+ char str[32];
+
+ LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
+ strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
+ strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0);
+
+ if (cli->cl_import)
+ sec = sptlrpc_import_sec_ref(cli->cl_import);
+ if (sec == NULL)
+ goto out;
+
+ sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str));
+
+ seq_printf(seq, "rpc flavor: %s\n",
+ sptlrpc_flavor2name_base(sec->ps_flvr.sf_rpc));
+ seq_printf(seq, "bulk flavor: %s\n",
+ sptlrpc_flavor2name_bulk(&sec->ps_flvr, str, sizeof(str)));
+ seq_printf(seq, "flags: %s\n",
+ sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str)));
+ seq_printf(seq, "id: %d\n", sec->ps_id);
+ seq_printf(seq, "refcount: %d\n",
+ atomic_read(&sec->ps_refcount));
+ seq_printf(seq, "nctx: %d\n", atomic_read(&sec->ps_nctx));
+ seq_printf(seq, "gc internal %ld\n", sec->ps_gc_interval);
+ seq_printf(seq, "gc next %ld\n",
+ sec->ps_gc_interval ?
+ sec->ps_gc_next - cfs_time_current_sec() : 0);
+
+ sptlrpc_sec_put(sec);
+out:
+ return 0;
+}
+LPROC_SEQ_FOPS_RO(sptlrpc_info_lprocfs);
+
+static int sptlrpc_ctxs_lprocfs_seq_show(struct seq_file *seq, void *v)
+{
+ struct obd_device *dev = seq->private;
+ struct client_obd *cli = &dev->u.cli;
+ struct ptlrpc_sec *sec = NULL;
+
+ LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
+ strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
+ strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0);
+
+ if (cli->cl_import)
+ sec = sptlrpc_import_sec_ref(cli->cl_import);
+ if (sec == NULL)
+ goto out;
+
+ if (sec->ps_policy->sp_cops->display)
+ sec->ps_policy->sp_cops->display(sec, seq);
+
+ sptlrpc_sec_put(sec);
+out:
+ return 0;
+}
+LPROC_SEQ_FOPS_RO(sptlrpc_ctxs_lprocfs);
+
+int sptlrpc_lprocfs_cliobd_attach(struct obd_device *dev)
+{
+ int rc;
+
+ if (strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) != 0 &&
+ strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) != 0 &&
+ strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) != 0) {
+ CERROR("can't register lproc for obd type %s\n",
+ dev->obd_type->typ_name);
+ return -EINVAL;
+ }
+
+ rc = lprocfs_obd_seq_create(dev, "srpc_info", 0444,
+ &sptlrpc_info_lprocfs_fops, dev);
+ if (rc) {
+ CERROR("create proc entry srpc_info for %s: %d\n",
+ dev->obd_name, rc);
+ return rc;
+ }
+
+ rc = lprocfs_obd_seq_create(dev, "srpc_contexts", 0444,
+ &sptlrpc_ctxs_lprocfs_fops, dev);
+ if (rc) {
+ CERROR("create proc entry srpc_contexts for %s: %d\n",
+ dev->obd_name, rc);
+ return rc;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_lprocfs_cliobd_attach);
+
+LPROC_SEQ_FOPS_RO(sptlrpc_proc_enc_pool);
+static struct lprocfs_vars sptlrpc_lprocfs_vars[] = {
+ { "encrypt_page_pools", &sptlrpc_proc_enc_pool_fops },
+ { NULL }
+};
+
+int sptlrpc_lproc_init(void)
+{
+ int rc;
+
+ LASSERT(sptlrpc_proc_root == NULL);
+
+ sptlrpc_proc_root = lprocfs_register("sptlrpc", proc_lustre_root,
+ sptlrpc_lprocfs_vars, NULL);
+ if (IS_ERR(sptlrpc_proc_root)) {
+ rc = PTR_ERR(sptlrpc_proc_root);
+ sptlrpc_proc_root = NULL;
+ return rc;
+ }
+ return 0;
+}
+
+void sptlrpc_lproc_fini(void)
+{
+ if (sptlrpc_proc_root) {
+ lprocfs_remove(&sptlrpc_proc_root);
+ sptlrpc_proc_root = NULL;
+ }
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
new file mode 100644
index 000000000000..ff1137fe4dd6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
@@ -0,0 +1,464 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_null.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+
+#include <obd_support.h>
+#include <obd_cksum.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_sec.h>
+
+static struct ptlrpc_sec_policy null_policy;
+static struct ptlrpc_sec null_sec;
+static struct ptlrpc_cli_ctx null_cli_ctx;
+static struct ptlrpc_svc_ctx null_svc_ctx;
+
+/*
+ * we can temporarily use the topmost 8-bits of lm_secflvr to identify
+ * the source sec part.
+ */
+static inline
+void null_encode_sec_part(struct lustre_msg *msg, enum lustre_sec_part sp)
+{
+ msg->lm_secflvr |= (((__u32) sp) & 0xFF) << 24;
+}
+
+static inline
+enum lustre_sec_part null_decode_sec_part(struct lustre_msg *msg)
+{
+ return (msg->lm_secflvr >> 24) & 0xFF;
+}
+
+static int null_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+ /* should never reach here */
+ LBUG();
+ return 0;
+}
+
+static
+int null_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+ req->rq_reqbuf->lm_secflvr = SPTLRPC_FLVR_NULL;
+
+ if (!req->rq_import->imp_dlm_fake) {
+ struct obd_device *obd = req->rq_import->imp_obd;
+ null_encode_sec_part(req->rq_reqbuf,
+ obd->u.cli.cl_sp_me);
+ }
+ req->rq_reqdata_len = req->rq_reqlen;
+ return 0;
+}
+
+static
+int null_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+ __u32 cksums, cksumc;
+
+ LASSERT(req->rq_repdata);
+
+ req->rq_repmsg = req->rq_repdata;
+ req->rq_replen = req->rq_repdata_len;
+
+ if (req->rq_early) {
+ cksums = lustre_msg_get_cksum(req->rq_repdata);
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0)
+ if (lustre_msghdr_get_flags(req->rq_reqmsg) &
+ MSGHDR_CKSUM_INCOMPAT18)
+ cksumc = lustre_msg_calc_cksum(req->rq_repmsg, 0);
+ else
+ cksumc = lustre_msg_calc_cksum(req->rq_repmsg, 1);
+#else
+# warning "remove checksum compatibility support for b1_8"
+ cksumc = lustre_msg_calc_cksum(req->rq_repmsg);
+#endif
+ if (cksumc != cksums) {
+ CDEBUG(D_SEC,
+ "early reply checksum mismatch: %08x != %08x\n",
+ cksumc, cksums);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static
+struct ptlrpc_sec *null_create_sec(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *svc_ctx,
+ struct sptlrpc_flavor *sf)
+{
+ LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_NULL);
+
+ /* general layer has take a module reference for us, because we never
+ * really destroy the sec, simply release the reference here.
+ */
+ sptlrpc_policy_put(&null_policy);
+ return &null_sec;
+}
+
+static
+void null_destroy_sec(struct ptlrpc_sec *sec)
+{
+ LASSERT(sec == &null_sec);
+}
+
+static
+struct ptlrpc_cli_ctx *null_lookup_ctx(struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred,
+ int create, int remove_dead)
+{
+ atomic_inc(&null_cli_ctx.cc_refcount);
+ return &null_cli_ctx;
+}
+
+static
+int null_flush_ctx_cache(struct ptlrpc_sec *sec,
+ uid_t uid,
+ int grace, int force)
+{
+ return 0;
+}
+
+static
+int null_alloc_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ if (!req->rq_reqbuf) {
+ int alloc_size = size_roundup_power2(msgsize);
+
+ LASSERT(!req->rq_pool);
+ OBD_ALLOC_LARGE(req->rq_reqbuf, alloc_size);
+ if (!req->rq_reqbuf)
+ return -ENOMEM;
+
+ req->rq_reqbuf_len = alloc_size;
+ } else {
+ LASSERT(req->rq_pool);
+ LASSERT(req->rq_reqbuf_len >= msgsize);
+ memset(req->rq_reqbuf, 0, msgsize);
+ }
+
+ req->rq_reqmsg = req->rq_reqbuf;
+ return 0;
+}
+
+static
+void null_free_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ if (!req->rq_pool) {
+ LASSERTF(req->rq_reqmsg == req->rq_reqbuf,
+ "req %p: reqmsg %p is not reqbuf %p in null sec\n",
+ req, req->rq_reqmsg, req->rq_reqbuf);
+ LASSERTF(req->rq_reqbuf_len >= req->rq_reqlen,
+ "req %p: reqlen %d should smaller than buflen %d\n",
+ req, req->rq_reqlen, req->rq_reqbuf_len);
+
+ OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = NULL;
+ req->rq_reqbuf_len = 0;
+ }
+}
+
+static
+int null_alloc_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ /* add space for early replied */
+ msgsize += lustre_msg_early_size();
+
+ msgsize = size_roundup_power2(msgsize);
+
+ OBD_ALLOC_LARGE(req->rq_repbuf, msgsize);
+ if (!req->rq_repbuf)
+ return -ENOMEM;
+
+ req->rq_repbuf_len = msgsize;
+ return 0;
+}
+
+static
+void null_free_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ LASSERT(req->rq_repbuf);
+
+ OBD_FREE_LARGE(req->rq_repbuf, req->rq_repbuf_len);
+ req->rq_repbuf = NULL;
+ req->rq_repbuf_len = 0;
+}
+
+static
+int null_enlarge_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int segment, int newsize)
+{
+ struct lustre_msg *newbuf;
+ struct lustre_msg *oldbuf = req->rq_reqmsg;
+ int oldsize, newmsg_size, alloc_size;
+
+ LASSERT(req->rq_reqbuf);
+ LASSERT(req->rq_reqbuf == req->rq_reqmsg);
+ LASSERT(req->rq_reqbuf_len >= req->rq_reqlen);
+ LASSERT(req->rq_reqlen == lustre_packed_msg_size(oldbuf));
+
+ /* compute new message size */
+ oldsize = req->rq_reqbuf->lm_buflens[segment];
+ req->rq_reqbuf->lm_buflens[segment] = newsize;
+ newmsg_size = lustre_packed_msg_size(oldbuf);
+ req->rq_reqbuf->lm_buflens[segment] = oldsize;
+
+ /* request from pool should always have enough buffer */
+ LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newmsg_size);
+
+ if (req->rq_reqbuf_len < newmsg_size) {
+ alloc_size = size_roundup_power2(newmsg_size);
+
+ OBD_ALLOC_LARGE(newbuf, alloc_size);
+ if (newbuf == NULL)
+ return -ENOMEM;
+
+ memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen);
+
+ OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = req->rq_reqmsg = newbuf;
+ req->rq_reqbuf_len = alloc_size;
+ }
+
+ _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize);
+ req->rq_reqlen = newmsg_size;
+
+ return 0;
+}
+
+static struct ptlrpc_svc_ctx null_svc_ctx = {
+ .sc_refcount = ATOMIC_INIT(1),
+ .sc_policy = &null_policy,
+};
+
+static
+int null_accept(struct ptlrpc_request *req)
+{
+ LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) ==
+ SPTLRPC_POLICY_NULL);
+
+ if (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL) {
+ CERROR("Invalid rpc flavor 0x%x\n", req->rq_flvr.sf_rpc);
+ return SECSVC_DROP;
+ }
+
+ req->rq_sp_from = null_decode_sec_part(req->rq_reqbuf);
+
+ req->rq_reqmsg = req->rq_reqbuf;
+ req->rq_reqlen = req->rq_reqdata_len;
+
+ req->rq_svc_ctx = &null_svc_ctx;
+ atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+ return SECSVC_OK;
+}
+
+static
+int null_alloc_rs(struct ptlrpc_request *req, int msgsize)
+{
+ struct ptlrpc_reply_state *rs;
+ int rs_size = sizeof(*rs) + msgsize;
+
+ LASSERT(msgsize % 8 == 0);
+
+ rs = req->rq_reply_state;
+
+ if (rs) {
+ /* pre-allocated */
+ LASSERT(rs->rs_size >= rs_size);
+ } else {
+ OBD_ALLOC_LARGE(rs, rs_size);
+ if (rs == NULL)
+ return -ENOMEM;
+
+ rs->rs_size = rs_size;
+ }
+
+ rs->rs_svc_ctx = req->rq_svc_ctx;
+ atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+ rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+ rs->rs_repbuf_len = rs_size - sizeof(*rs);
+ rs->rs_msg = rs->rs_repbuf;
+
+ req->rq_reply_state = rs;
+ return 0;
+}
+
+static
+void null_free_rs(struct ptlrpc_reply_state *rs)
+{
+ LASSERT_ATOMIC_GT(&rs->rs_svc_ctx->sc_refcount, 1);
+ atomic_dec(&rs->rs_svc_ctx->sc_refcount);
+
+ if (!rs->rs_prealloc)
+ OBD_FREE_LARGE(rs, rs->rs_size);
+}
+
+static
+int null_authorize(struct ptlrpc_request *req)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+
+ LASSERT(rs);
+
+ rs->rs_repbuf->lm_secflvr = SPTLRPC_FLVR_NULL;
+ rs->rs_repdata_len = req->rq_replen;
+
+ if (likely(req->rq_packed_final)) {
+ if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)
+ req->rq_reply_off = lustre_msg_early_size();
+ else
+ req->rq_reply_off = 0;
+ } else {
+ __u32 cksum;
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0)
+ if (lustre_msghdr_get_flags(req->rq_reqmsg) &
+ MSGHDR_CKSUM_INCOMPAT18)
+ cksum = lustre_msg_calc_cksum(rs->rs_repbuf, 0);
+ else
+ cksum = lustre_msg_calc_cksum(rs->rs_repbuf, 1);
+#else
+# warning "remove checksum compatibility support for b1_8"
+ cksum = lustre_msg_calc_cksum(rs->rs_repbuf);
+#endif
+ lustre_msg_set_cksum(rs->rs_repbuf, cksum);
+ req->rq_reply_off = 0;
+ }
+
+ return 0;
+}
+
+static struct ptlrpc_ctx_ops null_ctx_ops = {
+ .refresh = null_ctx_refresh,
+ .sign = null_ctx_sign,
+ .verify = null_ctx_verify,
+};
+
+static struct ptlrpc_sec_cops null_sec_cops = {
+ .create_sec = null_create_sec,
+ .destroy_sec = null_destroy_sec,
+ .lookup_ctx = null_lookup_ctx,
+ .flush_ctx_cache = null_flush_ctx_cache,
+ .alloc_reqbuf = null_alloc_reqbuf,
+ .alloc_repbuf = null_alloc_repbuf,
+ .free_reqbuf = null_free_reqbuf,
+ .free_repbuf = null_free_repbuf,
+ .enlarge_reqbuf = null_enlarge_reqbuf,
+};
+
+static struct ptlrpc_sec_sops null_sec_sops = {
+ .accept = null_accept,
+ .alloc_rs = null_alloc_rs,
+ .authorize = null_authorize,
+ .free_rs = null_free_rs,
+};
+
+static struct ptlrpc_sec_policy null_policy = {
+ .sp_owner = THIS_MODULE,
+ .sp_name = "sec.null",
+ .sp_policy = SPTLRPC_POLICY_NULL,
+ .sp_cops = &null_sec_cops,
+ .sp_sops = &null_sec_sops,
+};
+
+static void null_init_internal(void)
+{
+ static HLIST_HEAD(__list);
+
+ null_sec.ps_policy = &null_policy;
+ atomic_set(&null_sec.ps_refcount, 1); /* always busy */
+ null_sec.ps_id = -1;
+ null_sec.ps_import = NULL;
+ null_sec.ps_flvr.sf_rpc = SPTLRPC_FLVR_NULL;
+ null_sec.ps_flvr.sf_flags = 0;
+ null_sec.ps_part = LUSTRE_SP_ANY;
+ null_sec.ps_dying = 0;
+ spin_lock_init(&null_sec.ps_lock);
+ atomic_set(&null_sec.ps_nctx, 1); /* for "null_cli_ctx" */
+ INIT_LIST_HEAD(&null_sec.ps_gc_list);
+ null_sec.ps_gc_interval = 0;
+ null_sec.ps_gc_next = 0;
+
+ hlist_add_head(&null_cli_ctx.cc_cache, &__list);
+ atomic_set(&null_cli_ctx.cc_refcount, 1); /* for hash */
+ null_cli_ctx.cc_sec = &null_sec;
+ null_cli_ctx.cc_ops = &null_ctx_ops;
+ null_cli_ctx.cc_expire = 0;
+ null_cli_ctx.cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_ETERNAL |
+ PTLRPC_CTX_UPTODATE;
+ null_cli_ctx.cc_vcred.vc_uid = 0;
+ spin_lock_init(&null_cli_ctx.cc_lock);
+ INIT_LIST_HEAD(&null_cli_ctx.cc_req_list);
+ INIT_LIST_HEAD(&null_cli_ctx.cc_gc_chain);
+}
+
+int sptlrpc_null_init(void)
+{
+ int rc;
+
+ null_init_internal();
+
+ rc = sptlrpc_register_policy(&null_policy);
+ if (rc)
+ CERROR("failed to register %s: %d\n", null_policy.sp_name, rc);
+
+ return rc;
+}
+
+void sptlrpc_null_fini(void)
+{
+ int rc;
+
+ rc = sptlrpc_unregister_policy(&null_policy);
+ if (rc)
+ CERROR("failed to unregister %s: %d\n", null_policy.sp_name,rc);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
new file mode 100644
index 000000000000..f552d2f182b1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
@@ -0,0 +1,1021 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_plain.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+
+#include <obd_support.h>
+#include <obd_cksum.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_sec.h>
+
+struct plain_sec {
+ struct ptlrpc_sec pls_base;
+ rwlock_t pls_lock;
+ struct ptlrpc_cli_ctx *pls_ctx;
+};
+
+static inline struct plain_sec *sec2plsec(struct ptlrpc_sec *sec)
+{
+ return container_of(sec, struct plain_sec, pls_base);
+}
+
+static struct ptlrpc_sec_policy plain_policy;
+static struct ptlrpc_ctx_ops plain_ctx_ops;
+static struct ptlrpc_svc_ctx plain_svc_ctx;
+
+static unsigned int plain_at_offset;
+
+/*
+ * for simplicity, plain policy rpc use fixed layout.
+ */
+#define PLAIN_PACK_SEGMENTS (4)
+
+#define PLAIN_PACK_HDR_OFF (0)
+#define PLAIN_PACK_MSG_OFF (1)
+#define PLAIN_PACK_USER_OFF (2)
+#define PLAIN_PACK_BULK_OFF (3)
+
+#define PLAIN_FL_USER (0x01)
+#define PLAIN_FL_BULK (0x02)
+
+struct plain_header {
+ __u8 ph_ver; /* 0 */
+ __u8 ph_flags;
+ __u8 ph_sp; /* source */
+ __u8 ph_bulk_hash_alg; /* complete flavor desc */
+ __u8 ph_pad[4];
+};
+
+struct plain_bulk_token {
+ __u8 pbt_hash[8];
+};
+
+#define PLAIN_BSD_SIZE \
+ (sizeof(struct ptlrpc_bulk_sec_desc) + sizeof(struct plain_bulk_token))
+
+/****************************************
+ * bulk checksum helpers *
+ ****************************************/
+
+static int plain_unpack_bsd(struct lustre_msg *msg, int swabbed)
+{
+ struct ptlrpc_bulk_sec_desc *bsd;
+
+ if (bulk_sec_desc_unpack(msg, PLAIN_PACK_BULK_OFF, swabbed))
+ return -EPROTO;
+
+ bsd = lustre_msg_buf(msg, PLAIN_PACK_BULK_OFF, PLAIN_BSD_SIZE);
+ if (bsd == NULL) {
+ CERROR("bulk sec desc has short size %d\n",
+ lustre_msg_buflen(msg, PLAIN_PACK_BULK_OFF));
+ return -EPROTO;
+ }
+
+ if (bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
+ bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG) {
+ CERROR("invalid bulk svc %u\n", bsd->bsd_svc);
+ return -EPROTO;
+ }
+
+ return 0;
+}
+
+static int plain_generate_bulk_csum(struct ptlrpc_bulk_desc *desc,
+ __u8 hash_alg,
+ struct plain_bulk_token *token)
+{
+ if (hash_alg == BULK_HASH_ALG_NULL)
+ return 0;
+
+ memset(token->pbt_hash, 0, sizeof(token->pbt_hash));
+ return sptlrpc_get_bulk_checksum(desc, hash_alg, token->pbt_hash,
+ sizeof(token->pbt_hash));
+}
+
+static int plain_verify_bulk_csum(struct ptlrpc_bulk_desc *desc,
+ __u8 hash_alg,
+ struct plain_bulk_token *tokenr)
+{
+ struct plain_bulk_token tokenv;
+ int rc;
+
+ if (hash_alg == BULK_HASH_ALG_NULL)
+ return 0;
+
+ memset(&tokenv.pbt_hash, 0, sizeof(tokenv.pbt_hash));
+ rc = sptlrpc_get_bulk_checksum(desc, hash_alg, tokenv.pbt_hash,
+ sizeof(tokenv.pbt_hash));
+ if (rc)
+ return rc;
+
+ if (memcmp(tokenr->pbt_hash, tokenv.pbt_hash, sizeof(tokenr->pbt_hash)))
+ return -EACCES;
+ return 0;
+}
+
+static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
+{
+ char *ptr;
+ unsigned int off, i;
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ if (desc->bd_iov[i].kiov_len == 0)
+ continue;
+
+ ptr = kmap(desc->bd_iov[i].kiov_page);
+ off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
+ ptr[off] ^= 0x1;
+ kunmap(desc->bd_iov[i].kiov_page);
+ return;
+ }
+}
+
+/****************************************
+ * cli_ctx apis *
+ ****************************************/
+
+static
+int plain_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+ /* should never reach here */
+ LBUG();
+ return 0;
+}
+
+static
+int plain_ctx_validate(struct ptlrpc_cli_ctx *ctx)
+{
+ return 0;
+}
+
+static
+int plain_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+ struct lustre_msg *msg = req->rq_reqbuf;
+ struct plain_header *phdr;
+ ENTRY;
+
+ msg->lm_secflvr = req->rq_flvr.sf_rpc;
+
+ phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0);
+ phdr->ph_ver = 0;
+ phdr->ph_flags = 0;
+ phdr->ph_sp = ctx->cc_sec->ps_part;
+ phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg;
+
+ if (req->rq_pack_udesc)
+ phdr->ph_flags |= PLAIN_FL_USER;
+ if (req->rq_pack_bulk)
+ phdr->ph_flags |= PLAIN_FL_BULK;
+
+ req->rq_reqdata_len = lustre_msg_size_v2(msg->lm_bufcount,
+ msg->lm_buflens);
+ RETURN(0);
+}
+
+static
+int plain_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+ struct lustre_msg *msg = req->rq_repdata;
+ struct plain_header *phdr;
+ __u32 cksum;
+ int swabbed;
+ ENTRY;
+
+ if (msg->lm_bufcount != PLAIN_PACK_SEGMENTS) {
+ CERROR("unexpected reply buf count %u\n", msg->lm_bufcount);
+ RETURN(-EPROTO);
+ }
+
+ swabbed = ptlrpc_rep_need_swab(req);
+
+ phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr));
+ if (phdr == NULL) {
+ CERROR("missing plain header\n");
+ RETURN(-EPROTO);
+ }
+
+ if (phdr->ph_ver != 0) {
+ CERROR("Invalid header version\n");
+ RETURN(-EPROTO);
+ }
+
+ /* expect no user desc in reply */
+ if (phdr->ph_flags & PLAIN_FL_USER) {
+ CERROR("Unexpected udesc flag in reply\n");
+ RETURN(-EPROTO);
+ }
+
+ if (phdr->ph_bulk_hash_alg != req->rq_flvr.u_bulk.hash.hash_alg) {
+ CERROR("reply bulk flavor %u != %u\n", phdr->ph_bulk_hash_alg,
+ req->rq_flvr.u_bulk.hash.hash_alg);
+ RETURN(-EPROTO);
+ }
+
+ if (unlikely(req->rq_early)) {
+ unsigned int hsize = 4;
+
+ cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32,
+ lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0),
+ lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF),
+ NULL, 0, (unsigned char *)&cksum, &hsize);
+ if (cksum != msg->lm_cksum) {
+ CDEBUG(D_SEC,
+ "early reply checksum mismatch: %08x != %08x\n",
+ cpu_to_le32(cksum), msg->lm_cksum);
+ RETURN(-EINVAL);
+ }
+ } else {
+ /* whether we sent with bulk or not, we expect the same
+ * in reply, except for early reply */
+ if (!req->rq_early &&
+ !equi(req->rq_pack_bulk == 1,
+ phdr->ph_flags & PLAIN_FL_BULK)) {
+ CERROR("%s bulk checksum in reply\n",
+ req->rq_pack_bulk ? "Missing" : "Unexpected");
+ RETURN(-EPROTO);
+ }
+
+ if (phdr->ph_flags & PLAIN_FL_BULK) {
+ if (plain_unpack_bsd(msg, swabbed))
+ RETURN(-EPROTO);
+ }
+ }
+
+ req->rq_repmsg = lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0);
+ req->rq_replen = lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF);
+ RETURN(0);
+}
+
+static
+int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_bulk_sec_desc *bsd;
+ struct plain_bulk_token *token;
+ int rc;
+
+ LASSERT(req->rq_pack_bulk);
+ LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
+
+ bsd = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
+ token = (struct plain_bulk_token *) bsd->bsd_data;
+
+ bsd->bsd_version = 0;
+ bsd->bsd_flags = 0;
+ bsd->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsd->bsd_svc = SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc);
+
+ if (bsd->bsd_svc == SPTLRPC_BULK_SVC_NULL)
+ RETURN(0);
+
+ if (req->rq_bulk_read)
+ RETURN(0);
+
+ rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+ token);
+ if (rc) {
+ CERROR("bulk write: failed to compute checksum: %d\n", rc);
+ } else {
+ /*
+ * for sending we only compute the wrong checksum instead
+ * of corrupting the data so it is still correct on a redo
+ */
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND) &&
+ req->rq_flvr.u_bulk.hash.hash_alg != BULK_HASH_ALG_NULL)
+ token->pbt_hash[0] ^= 0x1;
+ }
+
+ return rc;
+}
+
+static
+int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_bulk_sec_desc *bsdv;
+ struct plain_bulk_token *tokenv;
+ int rc;
+ int i, nob;
+
+ LASSERT(req->rq_pack_bulk);
+ LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
+ LASSERT(req->rq_repdata->lm_bufcount == PLAIN_PACK_SEGMENTS);
+
+ bsdv = lustre_msg_buf(req->rq_repdata, PLAIN_PACK_BULK_OFF, 0);
+ tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+
+ if (req->rq_bulk_write) {
+ if (bsdv->bsd_flags & BSD_FL_ERR)
+ return -EIO;
+ return 0;
+ }
+
+ /* fix the actual data size */
+ for (i = 0, nob = 0; i < desc->bd_iov_count; i++) {
+ if (desc->bd_iov[i].kiov_len + nob > desc->bd_nob_transferred) {
+ desc->bd_iov[i].kiov_len =
+ desc->bd_nob_transferred - nob;
+ }
+ nob += desc->bd_iov[i].kiov_len;
+ }
+
+ rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+ tokenv);
+ if (rc)
+ CERROR("bulk read: client verify failed: %d\n", rc);
+
+ return rc;
+}
+
+/****************************************
+ * sec apis *
+ ****************************************/
+
+static
+struct ptlrpc_cli_ctx *plain_sec_install_ctx(struct plain_sec *plsec)
+{
+ struct ptlrpc_cli_ctx *ctx, *ctx_new;
+
+ OBD_ALLOC_PTR(ctx_new);
+
+ write_lock(&plsec->pls_lock);
+
+ ctx = plsec->pls_ctx;
+ if (ctx) {
+ atomic_inc(&ctx->cc_refcount);
+
+ if (ctx_new)
+ OBD_FREE_PTR(ctx_new);
+ } else if (ctx_new) {
+ ctx = ctx_new;
+
+ atomic_set(&ctx->cc_refcount, 1); /* for cache */
+ ctx->cc_sec = &plsec->pls_base;
+ ctx->cc_ops = &plain_ctx_ops;
+ ctx->cc_expire = 0;
+ ctx->cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_UPTODATE;
+ ctx->cc_vcred.vc_uid = 0;
+ spin_lock_init(&ctx->cc_lock);
+ INIT_LIST_HEAD(&ctx->cc_req_list);
+ INIT_LIST_HEAD(&ctx->cc_gc_chain);
+
+ plsec->pls_ctx = ctx;
+ atomic_inc(&plsec->pls_base.ps_nctx);
+ atomic_inc(&plsec->pls_base.ps_refcount);
+
+ atomic_inc(&ctx->cc_refcount); /* for caller */
+ }
+
+ write_unlock(&plsec->pls_lock);
+
+ return ctx;
+}
+
+static
+void plain_destroy_sec(struct ptlrpc_sec *sec)
+{
+ struct plain_sec *plsec = sec2plsec(sec);
+ ENTRY;
+
+ LASSERT(sec->ps_policy == &plain_policy);
+ LASSERT(sec->ps_import);
+ LASSERT(atomic_read(&sec->ps_refcount) == 0);
+ LASSERT(atomic_read(&sec->ps_nctx) == 0);
+ LASSERT(plsec->pls_ctx == NULL);
+
+ class_import_put(sec->ps_import);
+
+ OBD_FREE_PTR(plsec);
+ EXIT;
+}
+
+static
+void plain_kill_sec(struct ptlrpc_sec *sec)
+{
+ sec->ps_dying = 1;
+}
+
+static
+struct ptlrpc_sec *plain_create_sec(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *svc_ctx,
+ struct sptlrpc_flavor *sf)
+{
+ struct plain_sec *plsec;
+ struct ptlrpc_sec *sec;
+ struct ptlrpc_cli_ctx *ctx;
+ ENTRY;
+
+ LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN);
+
+ OBD_ALLOC_PTR(plsec);
+ if (plsec == NULL)
+ RETURN(NULL);
+
+ /*
+ * initialize plain_sec
+ */
+ rwlock_init(&plsec->pls_lock);
+ plsec->pls_ctx = NULL;
+
+ sec = &plsec->pls_base;
+ sec->ps_policy = &plain_policy;
+ atomic_set(&sec->ps_refcount, 0);
+ atomic_set(&sec->ps_nctx, 0);
+ sec->ps_id = sptlrpc_get_next_secid();
+ sec->ps_import = class_import_get(imp);
+ sec->ps_flvr = *sf;
+ spin_lock_init(&sec->ps_lock);
+ INIT_LIST_HEAD(&sec->ps_gc_list);
+ sec->ps_gc_interval = 0;
+ sec->ps_gc_next = 0;
+
+ /* install ctx immediately if this is a reverse sec */
+ if (svc_ctx) {
+ ctx = plain_sec_install_ctx(plsec);
+ if (ctx == NULL) {
+ plain_destroy_sec(sec);
+ RETURN(NULL);
+ }
+ sptlrpc_cli_ctx_put(ctx, 1);
+ }
+
+ RETURN(sec);
+}
+
+static
+struct ptlrpc_cli_ctx *plain_lookup_ctx(struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred,
+ int create, int remove_dead)
+{
+ struct plain_sec *plsec = sec2plsec(sec);
+ struct ptlrpc_cli_ctx *ctx;
+ ENTRY;
+
+ read_lock(&plsec->pls_lock);
+ ctx = plsec->pls_ctx;
+ if (ctx)
+ atomic_inc(&ctx->cc_refcount);
+ read_unlock(&plsec->pls_lock);
+
+ if (unlikely(ctx == NULL))
+ ctx = plain_sec_install_ctx(plsec);
+
+ RETURN(ctx);
+}
+
+static
+void plain_release_ctx(struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx, int sync)
+{
+ LASSERT(atomic_read(&sec->ps_refcount) > 0);
+ LASSERT(atomic_read(&sec->ps_nctx) > 0);
+ LASSERT(atomic_read(&ctx->cc_refcount) == 0);
+ LASSERT(ctx->cc_sec == sec);
+
+ OBD_FREE_PTR(ctx);
+
+ atomic_dec(&sec->ps_nctx);
+ sptlrpc_sec_put(sec);
+}
+
+static
+int plain_flush_ctx_cache(struct ptlrpc_sec *sec,
+ uid_t uid, int grace, int force)
+{
+ struct plain_sec *plsec = sec2plsec(sec);
+ struct ptlrpc_cli_ctx *ctx;
+ ENTRY;
+
+ /* do nothing unless caller want to flush for 'all' */
+ if (uid != -1)
+ RETURN(0);
+
+ write_lock(&plsec->pls_lock);
+ ctx = plsec->pls_ctx;
+ plsec->pls_ctx = NULL;
+ write_unlock(&plsec->pls_lock);
+
+ if (ctx)
+ sptlrpc_cli_ctx_put(ctx, 1);
+ RETURN(0);
+}
+
+static
+int plain_alloc_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
+ int alloc_len;
+ ENTRY;
+
+ buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
+ buflens[PLAIN_PACK_MSG_OFF] = msgsize;
+
+ if (req->rq_pack_udesc)
+ buflens[PLAIN_PACK_USER_OFF] = sptlrpc_current_user_desc_size();
+
+ if (req->rq_pack_bulk) {
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+ buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
+ }
+
+ alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
+
+ if (!req->rq_reqbuf) {
+ LASSERT(!req->rq_pool);
+
+ alloc_len = size_roundup_power2(alloc_len);
+ OBD_ALLOC_LARGE(req->rq_reqbuf, alloc_len);
+ if (!req->rq_reqbuf)
+ RETURN(-ENOMEM);
+
+ req->rq_reqbuf_len = alloc_len;
+ } else {
+ LASSERT(req->rq_pool);
+ LASSERT(req->rq_reqbuf_len >= alloc_len);
+ memset(req->rq_reqbuf, 0, alloc_len);
+ }
+
+ lustre_init_msg_v2(req->rq_reqbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
+ req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0);
+
+ if (req->rq_pack_udesc)
+ sptlrpc_pack_user_desc(req->rq_reqbuf, PLAIN_PACK_USER_OFF);
+
+ RETURN(0);
+}
+
+static
+void plain_free_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ ENTRY;
+ if (!req->rq_pool) {
+ OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = NULL;
+ req->rq_reqbuf_len = 0;
+ }
+ EXIT;
+}
+
+static
+int plain_alloc_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
+ int alloc_len;
+ ENTRY;
+
+ buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
+ buflens[PLAIN_PACK_MSG_OFF] = msgsize;
+
+ if (req->rq_pack_bulk) {
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+ buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
+ }
+
+ alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
+
+ /* add space for early reply */
+ alloc_len += plain_at_offset;
+
+ alloc_len = size_roundup_power2(alloc_len);
+
+ OBD_ALLOC_LARGE(req->rq_repbuf, alloc_len);
+ if (!req->rq_repbuf)
+ RETURN(-ENOMEM);
+
+ req->rq_repbuf_len = alloc_len;
+ RETURN(0);
+}
+
+static
+void plain_free_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ ENTRY;
+ OBD_FREE_LARGE(req->rq_repbuf, req->rq_repbuf_len);
+ req->rq_repbuf = NULL;
+ req->rq_repbuf_len = 0;
+ EXIT;
+}
+
+static
+int plain_enlarge_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int segment, int newsize)
+{
+ struct lustre_msg *newbuf;
+ int oldsize;
+ int newmsg_size, newbuf_size;
+ ENTRY;
+
+ LASSERT(req->rq_reqbuf);
+ LASSERT(req->rq_reqbuf_len >= req->rq_reqlen);
+ LASSERT(lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0) ==
+ req->rq_reqmsg);
+
+ /* compute new embedded msg size. */
+ oldsize = req->rq_reqmsg->lm_buflens[segment];
+ req->rq_reqmsg->lm_buflens[segment] = newsize;
+ newmsg_size = lustre_msg_size_v2(req->rq_reqmsg->lm_bufcount,
+ req->rq_reqmsg->lm_buflens);
+ req->rq_reqmsg->lm_buflens[segment] = oldsize;
+
+ /* compute new wrapper msg size. */
+ oldsize = req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF];
+ req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF] = newmsg_size;
+ newbuf_size = lustre_msg_size_v2(req->rq_reqbuf->lm_bufcount,
+ req->rq_reqbuf->lm_buflens);
+ req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF] = oldsize;
+
+ /* request from pool should always have enough buffer */
+ LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newbuf_size);
+
+ if (req->rq_reqbuf_len < newbuf_size) {
+ newbuf_size = size_roundup_power2(newbuf_size);
+
+ OBD_ALLOC_LARGE(newbuf, newbuf_size);
+ if (newbuf == NULL)
+ RETURN(-ENOMEM);
+
+ memcpy(newbuf, req->rq_reqbuf, req->rq_reqbuf_len);
+
+ OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = newbuf;
+ req->rq_reqbuf_len = newbuf_size;
+ req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf,
+ PLAIN_PACK_MSG_OFF, 0);
+ }
+
+ _sptlrpc_enlarge_msg_inplace(req->rq_reqbuf, PLAIN_PACK_MSG_OFF,
+ newmsg_size);
+ _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize);
+
+ req->rq_reqlen = newmsg_size;
+ RETURN(0);
+}
+
+/****************************************
+ * service apis *
+ ****************************************/
+
+static struct ptlrpc_svc_ctx plain_svc_ctx = {
+ .sc_refcount = ATOMIC_INIT(1),
+ .sc_policy = &plain_policy,
+};
+
+static
+int plain_accept(struct ptlrpc_request *req)
+{
+ struct lustre_msg *msg = req->rq_reqbuf;
+ struct plain_header *phdr;
+ int swabbed;
+ ENTRY;
+
+ LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) ==
+ SPTLRPC_POLICY_PLAIN);
+
+ if (SPTLRPC_FLVR_BASE(req->rq_flvr.sf_rpc) !=
+ SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN) ||
+ SPTLRPC_FLVR_BULK_TYPE(req->rq_flvr.sf_rpc) !=
+ SPTLRPC_FLVR_BULK_TYPE(SPTLRPC_FLVR_PLAIN)) {
+ CERROR("Invalid rpc flavor %x\n", req->rq_flvr.sf_rpc);
+ RETURN(SECSVC_DROP);
+ }
+
+ if (msg->lm_bufcount < PLAIN_PACK_SEGMENTS) {
+ CERROR("unexpected request buf count %u\n", msg->lm_bufcount);
+ RETURN(SECSVC_DROP);
+ }
+
+ swabbed = ptlrpc_req_need_swab(req);
+
+ phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr));
+ if (phdr == NULL) {
+ CERROR("missing plain header\n");
+ RETURN(-EPROTO);
+ }
+
+ if (phdr->ph_ver != 0) {
+ CERROR("Invalid header version\n");
+ RETURN(-EPROTO);
+ }
+
+ if (phdr->ph_bulk_hash_alg >= BULK_HASH_ALG_MAX) {
+ CERROR("invalid hash algorithm: %u\n", phdr->ph_bulk_hash_alg);
+ RETURN(-EPROTO);
+ }
+
+ req->rq_sp_from = phdr->ph_sp;
+ req->rq_flvr.u_bulk.hash.hash_alg = phdr->ph_bulk_hash_alg;
+
+ if (phdr->ph_flags & PLAIN_FL_USER) {
+ if (sptlrpc_unpack_user_desc(msg, PLAIN_PACK_USER_OFF,
+ swabbed)) {
+ CERROR("Mal-formed user descriptor\n");
+ RETURN(SECSVC_DROP);
+ }
+
+ req->rq_pack_udesc = 1;
+ req->rq_user_desc = lustre_msg_buf(msg, PLAIN_PACK_USER_OFF, 0);
+ }
+
+ if (phdr->ph_flags & PLAIN_FL_BULK) {
+ if (plain_unpack_bsd(msg, swabbed))
+ RETURN(SECSVC_DROP);
+
+ req->rq_pack_bulk = 1;
+ }
+
+ req->rq_reqmsg = lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0);
+ req->rq_reqlen = msg->lm_buflens[PLAIN_PACK_MSG_OFF];
+
+ req->rq_svc_ctx = &plain_svc_ctx;
+ atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+ RETURN(SECSVC_OK);
+}
+
+static
+int plain_alloc_rs(struct ptlrpc_request *req, int msgsize)
+{
+ struct ptlrpc_reply_state *rs;
+ __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
+ int rs_size = sizeof(*rs);
+ ENTRY;
+
+ LASSERT(msgsize % 8 == 0);
+
+ buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
+ buflens[PLAIN_PACK_MSG_OFF] = msgsize;
+
+ if (req->rq_pack_bulk && (req->rq_bulk_read || req->rq_bulk_write))
+ buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
+
+ rs_size += lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
+
+ rs = req->rq_reply_state;
+
+ if (rs) {
+ /* pre-allocated */
+ LASSERT(rs->rs_size >= rs_size);
+ } else {
+ OBD_ALLOC_LARGE(rs, rs_size);
+ if (rs == NULL)
+ RETURN(-ENOMEM);
+
+ rs->rs_size = rs_size;
+ }
+
+ rs->rs_svc_ctx = req->rq_svc_ctx;
+ atomic_inc(&req->rq_svc_ctx->sc_refcount);
+ rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+ rs->rs_repbuf_len = rs_size - sizeof(*rs);
+
+ lustre_init_msg_v2(rs->rs_repbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
+ rs->rs_msg = lustre_msg_buf_v2(rs->rs_repbuf, PLAIN_PACK_MSG_OFF, 0);
+
+ req->rq_reply_state = rs;
+ RETURN(0);
+}
+
+static
+void plain_free_rs(struct ptlrpc_reply_state *rs)
+{
+ ENTRY;
+
+ LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1);
+ atomic_dec(&rs->rs_svc_ctx->sc_refcount);
+
+ if (!rs->rs_prealloc)
+ OBD_FREE_LARGE(rs, rs->rs_size);
+ EXIT;
+}
+
+static
+int plain_authorize(struct ptlrpc_request *req)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct lustre_msg_v2 *msg = rs->rs_repbuf;
+ struct plain_header *phdr;
+ int len;
+ ENTRY;
+
+ LASSERT(rs);
+ LASSERT(msg);
+
+ if (req->rq_replen != msg->lm_buflens[PLAIN_PACK_MSG_OFF])
+ len = lustre_shrink_msg(msg, PLAIN_PACK_MSG_OFF,
+ req->rq_replen, 1);
+ else
+ len = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+
+ msg->lm_secflvr = req->rq_flvr.sf_rpc;
+
+ phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0);
+ phdr->ph_ver = 0;
+ phdr->ph_flags = 0;
+ phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg;
+
+ if (req->rq_pack_bulk)
+ phdr->ph_flags |= PLAIN_FL_BULK;
+
+ rs->rs_repdata_len = len;
+
+ if (likely(req->rq_packed_final)) {
+ if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)
+ req->rq_reply_off = plain_at_offset;
+ else
+ req->rq_reply_off = 0;
+ } else {
+ unsigned int hsize = 4;
+
+ cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32,
+ lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0),
+ lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF),
+ NULL, 0, (unsigned char *)&msg->lm_cksum, &hsize);
+ req->rq_reply_off = 0;
+ }
+
+ RETURN(0);
+}
+
+static
+int plain_svc_unwrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+ struct plain_bulk_token *tokenr;
+ int rc;
+
+ LASSERT(req->rq_bulk_write);
+ LASSERT(req->rq_pack_bulk);
+
+ bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
+ tokenr = (struct plain_bulk_token *) bsdr->bsd_data;
+ bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
+
+ bsdv->bsd_version = 0;
+ bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsdv->bsd_svc = bsdr->bsd_svc;
+ bsdv->bsd_flags = 0;
+
+ if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL)
+ return 0;
+
+ rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+ tokenr);
+ if (rc) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("bulk write: server verify failed: %d\n", rc);
+ }
+
+ return rc;
+}
+
+static
+int plain_svc_wrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+ struct plain_bulk_token *tokenv;
+ int rc;
+
+ LASSERT(req->rq_bulk_read);
+ LASSERT(req->rq_pack_bulk);
+
+ bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
+ bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
+ tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+
+ bsdv->bsd_version = 0;
+ bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsdv->bsd_svc = bsdr->bsd_svc;
+ bsdv->bsd_flags = 0;
+
+ if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL)
+ return 0;
+
+ rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+ tokenv);
+ if (rc) {
+ CERROR("bulk read: server failed to compute "
+ "checksum: %d\n", rc);
+ } else {
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE))
+ corrupt_bulk_data(desc);
+ }
+
+ return rc;
+}
+
+static struct ptlrpc_ctx_ops plain_ctx_ops = {
+ .refresh = plain_ctx_refresh,
+ .validate = plain_ctx_validate,
+ .sign = plain_ctx_sign,
+ .verify = plain_ctx_verify,
+ .wrap_bulk = plain_cli_wrap_bulk,
+ .unwrap_bulk = plain_cli_unwrap_bulk,
+};
+
+static struct ptlrpc_sec_cops plain_sec_cops = {
+ .create_sec = plain_create_sec,
+ .destroy_sec = plain_destroy_sec,
+ .kill_sec = plain_kill_sec,
+ .lookup_ctx = plain_lookup_ctx,
+ .release_ctx = plain_release_ctx,
+ .flush_ctx_cache = plain_flush_ctx_cache,
+ .alloc_reqbuf = plain_alloc_reqbuf,
+ .free_reqbuf = plain_free_reqbuf,
+ .alloc_repbuf = plain_alloc_repbuf,
+ .free_repbuf = plain_free_repbuf,
+ .enlarge_reqbuf = plain_enlarge_reqbuf,
+};
+
+static struct ptlrpc_sec_sops plain_sec_sops = {
+ .accept = plain_accept,
+ .alloc_rs = plain_alloc_rs,
+ .authorize = plain_authorize,
+ .free_rs = plain_free_rs,
+ .unwrap_bulk = plain_svc_unwrap_bulk,
+ .wrap_bulk = plain_svc_wrap_bulk,
+};
+
+static struct ptlrpc_sec_policy plain_policy = {
+ .sp_owner = THIS_MODULE,
+ .sp_name = "plain",
+ .sp_policy = SPTLRPC_POLICY_PLAIN,
+ .sp_cops = &plain_sec_cops,
+ .sp_sops = &plain_sec_sops,
+};
+
+int sptlrpc_plain_init(void)
+{
+ __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
+ int rc;
+
+ buflens[PLAIN_PACK_MSG_OFF] = lustre_msg_early_size();
+ plain_at_offset = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
+
+ rc = sptlrpc_register_policy(&plain_policy);
+ if (rc)
+ CERROR("failed to register: %d\n", rc);
+
+ return rc;
+}
+
+void sptlrpc_plain_fini(void)
+{
+ int rc;
+
+ rc = sptlrpc_unregister_policy(&plain_policy);
+ if (rc)
+ CERROR("cannot unregister: %d\n", rc);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c
new file mode 100644
index 000000000000..1667b8e86012
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/service.c
@@ -0,0 +1,3129 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lu_object.h>
+#include <linux/lnet/types.h>
+#include "ptlrpc_internal.h"
+
+/* The following are visible and mutable through /sys/module/ptlrpc */
+int test_req_buffer_pressure = 0;
+CFS_MODULE_PARM(test_req_buffer_pressure, "i", int, 0444,
+ "set non-zero to put pressure on request buffer pools");
+CFS_MODULE_PARM(at_min, "i", int, 0644,
+ "Adaptive timeout minimum (sec)");
+CFS_MODULE_PARM(at_max, "i", int, 0644,
+ "Adaptive timeout maximum (sec)");
+CFS_MODULE_PARM(at_history, "i", int, 0644,
+ "Adaptive timeouts remember the slowest event that took place "
+ "within this period (sec)");
+CFS_MODULE_PARM(at_early_margin, "i", int, 0644,
+ "How soon before an RPC deadline to send an early reply");
+CFS_MODULE_PARM(at_extra, "i", int, 0644,
+ "How much extra time to give with each early reply");
+
+
+/* forward ref */
+static int ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt);
+static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req);
+static void ptlrpc_at_remove_timed(struct ptlrpc_request *req);
+
+/** Holds a list of all PTLRPC services */
+LIST_HEAD(ptlrpc_all_services);
+/** Used to protect the \e ptlrpc_all_services list */
+struct mutex ptlrpc_all_services_mutex;
+
+struct ptlrpc_request_buffer_desc *
+ptlrpc_alloc_rqbd(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ struct ptlrpc_request_buffer_desc *rqbd;
+
+ OBD_CPT_ALLOC_PTR(rqbd, svc->srv_cptable, svcpt->scp_cpt);
+ if (rqbd == NULL)
+ return NULL;
+
+ rqbd->rqbd_svcpt = svcpt;
+ rqbd->rqbd_refcount = 0;
+ rqbd->rqbd_cbid.cbid_fn = request_in_callback;
+ rqbd->rqbd_cbid.cbid_arg = rqbd;
+ INIT_LIST_HEAD(&rqbd->rqbd_reqs);
+ OBD_CPT_ALLOC_LARGE(rqbd->rqbd_buffer, svc->srv_cptable,
+ svcpt->scp_cpt, svc->srv_buf_size);
+ if (rqbd->rqbd_buffer == NULL) {
+ OBD_FREE_PTR(rqbd);
+ return NULL;
+ }
+
+ spin_lock(&svcpt->scp_lock);
+ list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle);
+ svcpt->scp_nrqbds_total++;
+ spin_unlock(&svcpt->scp_lock);
+
+ return rqbd;
+}
+
+void
+ptlrpc_free_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
+{
+ struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt;
+
+ LASSERT(rqbd->rqbd_refcount == 0);
+ LASSERT(list_empty(&rqbd->rqbd_reqs));
+
+ spin_lock(&svcpt->scp_lock);
+ list_del(&rqbd->rqbd_list);
+ svcpt->scp_nrqbds_total--;
+ spin_unlock(&svcpt->scp_lock);
+
+ OBD_FREE_LARGE(rqbd->rqbd_buffer, svcpt->scp_service->srv_buf_size);
+ OBD_FREE_PTR(rqbd);
+}
+
+int
+ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post)
+{
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ struct ptlrpc_request_buffer_desc *rqbd;
+ int rc = 0;
+ int i;
+
+ if (svcpt->scp_rqbd_allocating)
+ goto try_post;
+
+ spin_lock(&svcpt->scp_lock);
+ /* check again with lock */
+ if (svcpt->scp_rqbd_allocating) {
+ /* NB: we might allow more than one thread in the future */
+ LASSERT(svcpt->scp_rqbd_allocating == 1);
+ spin_unlock(&svcpt->scp_lock);
+ goto try_post;
+ }
+
+ svcpt->scp_rqbd_allocating++;
+ spin_unlock(&svcpt->scp_lock);
+
+
+ for (i = 0; i < svc->srv_nbuf_per_group; i++) {
+ /* NB: another thread might have recycled enough rqbds, we
+ * need to make sure it wouldn't over-allocate, see LU-1212. */
+ if (svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group)
+ break;
+
+ rqbd = ptlrpc_alloc_rqbd(svcpt);
+
+ if (rqbd == NULL) {
+ CERROR("%s: Can't allocate request buffer\n",
+ svc->srv_name);
+ rc = -ENOMEM;
+ break;
+ }
+ }
+
+ spin_lock(&svcpt->scp_lock);
+
+ LASSERT(svcpt->scp_rqbd_allocating == 1);
+ svcpt->scp_rqbd_allocating--;
+
+ spin_unlock(&svcpt->scp_lock);
+
+ CDEBUG(D_RPCTRACE,
+ "%s: allocate %d new %d-byte reqbufs (%d/%d left), rc = %d\n",
+ svc->srv_name, i, svc->srv_buf_size, svcpt->scp_nrqbds_posted,
+ svcpt->scp_nrqbds_total, rc);
+
+ try_post:
+ if (post && rc == 0)
+ rc = ptlrpc_server_post_idle_rqbds(svcpt);
+
+ return rc;
+}
+
+/**
+ * Part of Rep-Ack logic.
+ * Puts a lock and its mode into reply state assotiated to request reply.
+ */
+void
+ptlrpc_save_lock(struct ptlrpc_request *req,
+ struct lustre_handle *lock, int mode, int no_ack)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ int idx;
+
+ LASSERT(rs != NULL);
+ LASSERT(rs->rs_nlocks < RS_MAX_LOCKS);
+
+ if (req->rq_export->exp_disconnected) {
+ ldlm_lock_decref(lock, mode);
+ } else {
+ idx = rs->rs_nlocks++;
+ rs->rs_locks[idx] = *lock;
+ rs->rs_modes[idx] = mode;
+ rs->rs_difficult = 1;
+ rs->rs_no_ack = !!no_ack;
+ }
+}
+EXPORT_SYMBOL(ptlrpc_save_lock);
+
+
+struct ptlrpc_hr_partition;
+
+struct ptlrpc_hr_thread {
+ int hrt_id; /* thread ID */
+ spinlock_t hrt_lock;
+ wait_queue_head_t hrt_waitq;
+ struct list_head hrt_queue; /* RS queue */
+ struct ptlrpc_hr_partition *hrt_partition;
+};
+
+struct ptlrpc_hr_partition {
+ /* # of started threads */
+ atomic_t hrp_nstarted;
+ /* # of stopped threads */
+ atomic_t hrp_nstopped;
+ /* cpu partition id */
+ int hrp_cpt;
+ /* round-robin rotor for choosing thread */
+ int hrp_rotor;
+ /* total number of threads on this partition */
+ int hrp_nthrs;
+ /* threads table */
+ struct ptlrpc_hr_thread *hrp_thrs;
+};
+
+#define HRT_RUNNING 0
+#define HRT_STOPPING 1
+
+struct ptlrpc_hr_service {
+ /* CPU partition table, it's just cfs_cpt_table for now */
+ struct cfs_cpt_table *hr_cpt_table;
+ /** controller sleep waitq */
+ wait_queue_head_t hr_waitq;
+ unsigned int hr_stopping;
+ /** roundrobin rotor for non-affinity service */
+ unsigned int hr_rotor;
+ /* partition data */
+ struct ptlrpc_hr_partition **hr_partitions;
+};
+
+struct rs_batch {
+ struct list_head rsb_replies;
+ unsigned int rsb_n_replies;
+ struct ptlrpc_service_part *rsb_svcpt;
+};
+
+/** reply handling service. */
+static struct ptlrpc_hr_service ptlrpc_hr;
+
+/**
+ * maximum mumber of replies scheduled in one batch
+ */
+#define MAX_SCHEDULED 256
+
+/**
+ * Initialize a reply batch.
+ *
+ * \param b batch
+ */
+static void rs_batch_init(struct rs_batch *b)
+{
+ memset(b, 0, sizeof *b);
+ INIT_LIST_HEAD(&b->rsb_replies);
+}
+
+/**
+ * Choose an hr thread to dispatch requests to.
+ */
+static struct ptlrpc_hr_thread *
+ptlrpc_hr_select(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_hr_partition *hrp;
+ unsigned int rotor;
+
+ if (svcpt->scp_cpt >= 0 &&
+ svcpt->scp_service->srv_cptable == ptlrpc_hr.hr_cpt_table) {
+ /* directly match partition */
+ hrp = ptlrpc_hr.hr_partitions[svcpt->scp_cpt];
+
+ } else {
+ rotor = ptlrpc_hr.hr_rotor++;
+ rotor %= cfs_cpt_number(ptlrpc_hr.hr_cpt_table);
+
+ hrp = ptlrpc_hr.hr_partitions[rotor];
+ }
+
+ rotor = hrp->hrp_rotor++;
+ return &hrp->hrp_thrs[rotor % hrp->hrp_nthrs];
+}
+
+/**
+ * Dispatch all replies accumulated in the batch to one from
+ * dedicated reply handling threads.
+ *
+ * \param b batch
+ */
+static void rs_batch_dispatch(struct rs_batch *b)
+{
+ if (b->rsb_n_replies != 0) {
+ struct ptlrpc_hr_thread *hrt;
+
+ hrt = ptlrpc_hr_select(b->rsb_svcpt);
+
+ spin_lock(&hrt->hrt_lock);
+ list_splice_init(&b->rsb_replies, &hrt->hrt_queue);
+ spin_unlock(&hrt->hrt_lock);
+
+ wake_up(&hrt->hrt_waitq);
+ b->rsb_n_replies = 0;
+ }
+}
+
+/**
+ * Add a reply to a batch.
+ * Add one reply object to a batch, schedule batched replies if overload.
+ *
+ * \param b batch
+ * \param rs reply
+ */
+static void rs_batch_add(struct rs_batch *b, struct ptlrpc_reply_state *rs)
+{
+ struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
+
+ if (svcpt != b->rsb_svcpt || b->rsb_n_replies >= MAX_SCHEDULED) {
+ if (b->rsb_svcpt != NULL) {
+ rs_batch_dispatch(b);
+ spin_unlock(&b->rsb_svcpt->scp_rep_lock);
+ }
+ spin_lock(&svcpt->scp_rep_lock);
+ b->rsb_svcpt = svcpt;
+ }
+ spin_lock(&rs->rs_lock);
+ rs->rs_scheduled_ever = 1;
+ if (rs->rs_scheduled == 0) {
+ list_move(&rs->rs_list, &b->rsb_replies);
+ rs->rs_scheduled = 1;
+ b->rsb_n_replies++;
+ }
+ rs->rs_committed = 1;
+ spin_unlock(&rs->rs_lock);
+}
+
+/**
+ * Reply batch finalization.
+ * Dispatch remaining replies from the batch
+ * and release remaining spinlock.
+ *
+ * \param b batch
+ */
+static void rs_batch_fini(struct rs_batch *b)
+{
+ if (b->rsb_svcpt != NULL) {
+ rs_batch_dispatch(b);
+ spin_unlock(&b->rsb_svcpt->scp_rep_lock);
+ }
+}
+
+#define DECLARE_RS_BATCH(b) struct rs_batch b
+
+
+/**
+ * Put reply state into a queue for processing because we received
+ * ACK from the client
+ */
+void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs)
+{
+ struct ptlrpc_hr_thread *hrt;
+ ENTRY;
+
+ LASSERT(list_empty(&rs->rs_list));
+
+ hrt = ptlrpc_hr_select(rs->rs_svcpt);
+
+ spin_lock(&hrt->hrt_lock);
+ list_add_tail(&rs->rs_list, &hrt->hrt_queue);
+ spin_unlock(&hrt->hrt_lock);
+
+ wake_up(&hrt->hrt_waitq);
+ EXIT;
+}
+
+void
+ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs)
+{
+ ENTRY;
+
+ LASSERT(spin_is_locked(&rs->rs_svcpt->scp_rep_lock));
+ LASSERT(spin_is_locked(&rs->rs_lock));
+ LASSERT (rs->rs_difficult);
+ rs->rs_scheduled_ever = 1; /* flag any notification attempt */
+
+ if (rs->rs_scheduled) { /* being set up or already notified */
+ EXIT;
+ return;
+ }
+
+ rs->rs_scheduled = 1;
+ list_del_init(&rs->rs_list);
+ ptlrpc_dispatch_difficult_reply(rs);
+ EXIT;
+}
+EXPORT_SYMBOL(ptlrpc_schedule_difficult_reply);
+
+void ptlrpc_commit_replies(struct obd_export *exp)
+{
+ struct ptlrpc_reply_state *rs, *nxt;
+ DECLARE_RS_BATCH(batch);
+ ENTRY;
+
+ rs_batch_init(&batch);
+ /* Find any replies that have been committed and get their service
+ * to attend to complete them. */
+
+ /* CAVEAT EMPTOR: spinlock ordering!!! */
+ spin_lock(&exp->exp_uncommitted_replies_lock);
+ list_for_each_entry_safe(rs, nxt, &exp->exp_uncommitted_replies,
+ rs_obd_list) {
+ LASSERT (rs->rs_difficult);
+ /* VBR: per-export last_committed */
+ LASSERT(rs->rs_export);
+ if (rs->rs_transno <= exp->exp_last_committed) {
+ list_del_init(&rs->rs_obd_list);
+ rs_batch_add(&batch, rs);
+ }
+ }
+ spin_unlock(&exp->exp_uncommitted_replies_lock);
+ rs_batch_fini(&batch);
+ EXIT;
+}
+EXPORT_SYMBOL(ptlrpc_commit_replies);
+
+static int
+ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_request_buffer_desc *rqbd;
+ int rc;
+ int posted = 0;
+
+ for (;;) {
+ spin_lock(&svcpt->scp_lock);
+
+ if (list_empty(&svcpt->scp_rqbd_idle)) {
+ spin_unlock(&svcpt->scp_lock);
+ return posted;
+ }
+
+ rqbd = list_entry(svcpt->scp_rqbd_idle.next,
+ struct ptlrpc_request_buffer_desc,
+ rqbd_list);
+ list_del(&rqbd->rqbd_list);
+
+ /* assume we will post successfully */
+ svcpt->scp_nrqbds_posted++;
+ list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_posted);
+
+ spin_unlock(&svcpt->scp_lock);
+
+ rc = ptlrpc_register_rqbd(rqbd);
+ if (rc != 0)
+ break;
+
+ posted = 1;
+ }
+
+ spin_lock(&svcpt->scp_lock);
+
+ svcpt->scp_nrqbds_posted--;
+ list_del(&rqbd->rqbd_list);
+ list_add_tail(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle);
+
+ /* Don't complain if no request buffers are posted right now; LNET
+ * won't drop requests because we set the portal lazy! */
+
+ spin_unlock(&svcpt->scp_lock);
+
+ return -1;
+}
+
+static void ptlrpc_at_timer(unsigned long castmeharder)
+{
+ struct ptlrpc_service_part *svcpt;
+
+ svcpt = (struct ptlrpc_service_part *)castmeharder;
+
+ svcpt->scp_at_check = 1;
+ svcpt->scp_at_checktime = cfs_time_current();
+ wake_up(&svcpt->scp_waitq);
+}
+
+static void
+ptlrpc_server_nthreads_check(struct ptlrpc_service *svc,
+ struct ptlrpc_service_conf *conf)
+{
+ struct ptlrpc_service_thr_conf *tc = &conf->psc_thr;
+ unsigned init;
+ unsigned total;
+ unsigned nthrs;
+ int weight;
+
+ /*
+ * Common code for estimating & validating threads number.
+ * CPT affinity service could have percpt thread-pool instead
+ * of a global thread-pool, which means user might not always
+ * get the threads number they give it in conf::tc_nthrs_user
+ * even they did set. It's because we need to validate threads
+ * number for each CPT to guarantee each pool will have enough
+ * threads to keep the service healthy.
+ */
+ init = PTLRPC_NTHRS_INIT + (svc->srv_ops.so_hpreq_handler != NULL);
+ init = max_t(int, init, tc->tc_nthrs_init);
+
+ /* NB: please see comments in lustre_lnet.h for definition
+ * details of these members */
+ LASSERT(tc->tc_nthrs_max != 0);
+
+ if (tc->tc_nthrs_user != 0) {
+ /* In case there is a reason to test a service with many
+ * threads, we give a less strict check here, it can
+ * be up to 8 * nthrs_max */
+ total = min(tc->tc_nthrs_max * 8, tc->tc_nthrs_user);
+ nthrs = total / svc->srv_ncpts;
+ init = max(init, nthrs);
+ goto out;
+ }
+
+ total = tc->tc_nthrs_max;
+ if (tc->tc_nthrs_base == 0) {
+ /* don't care about base threads number per partition,
+ * this is most for non-affinity service */
+ nthrs = total / svc->srv_ncpts;
+ goto out;
+ }
+
+ nthrs = tc->tc_nthrs_base;
+ if (svc->srv_ncpts == 1) {
+ int i;
+
+ /* NB: Increase the base number if it's single partition
+ * and total number of cores/HTs is larger or equal to 4.
+ * result will always < 2 * nthrs_base */
+ weight = cfs_cpt_weight(svc->srv_cptable, CFS_CPT_ANY);
+ for (i = 1; (weight >> (i + 1)) != 0 && /* >= 4 cores/HTs */
+ (tc->tc_nthrs_base >> i) != 0; i++)
+ nthrs += tc->tc_nthrs_base >> i;
+ }
+
+ if (tc->tc_thr_factor != 0) {
+ int factor = tc->tc_thr_factor;
+ const int fade = 4;
+
+ /*
+ * User wants to increase number of threads with for
+ * each CPU core/HT, most likely the factor is larger then
+ * one thread/core because service threads are supposed to
+ * be blocked by lock or wait for IO.
+ */
+ /*
+ * Amdahl's law says that adding processors wouldn't give
+ * a linear increasing of parallelism, so it's nonsense to
+ * have too many threads no matter how many cores/HTs
+ * there are.
+ */
+ if (cfs_cpu_ht_nsiblings(0) > 1) { /* weight is # of HTs */
+ /* depress thread factor for hyper-thread */
+ factor = factor - (factor >> 1) + (factor >> 3);
+ }
+
+ weight = cfs_cpt_weight(svc->srv_cptable, 0);
+ LASSERT(weight > 0);
+
+ for (; factor > 0 && weight > 0; factor--, weight -= fade)
+ nthrs += min(weight, fade) * factor;
+ }
+
+ if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) {
+ nthrs = max(tc->tc_nthrs_base,
+ tc->tc_nthrs_max / svc->srv_ncpts);
+ }
+ out:
+ nthrs = max(nthrs, tc->tc_nthrs_init);
+ svc->srv_nthrs_cpt_limit = nthrs;
+ svc->srv_nthrs_cpt_init = init;
+
+ if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) {
+ CDEBUG(D_OTHER, "%s: This service may have more threads (%d) "
+ "than the given soft limit (%d)\n",
+ svc->srv_name, nthrs * svc->srv_ncpts,
+ tc->tc_nthrs_max);
+ }
+}
+
+/**
+ * Initialize percpt data for a service
+ */
+static int
+ptlrpc_service_part_init(struct ptlrpc_service *svc,
+ struct ptlrpc_service_part *svcpt, int cpt)
+{
+ struct ptlrpc_at_array *array;
+ int size;
+ int index;
+ int rc;
+
+ svcpt->scp_cpt = cpt;
+ INIT_LIST_HEAD(&svcpt->scp_threads);
+
+ /* rqbd and incoming request queue */
+ spin_lock_init(&svcpt->scp_lock);
+ INIT_LIST_HEAD(&svcpt->scp_rqbd_idle);
+ INIT_LIST_HEAD(&svcpt->scp_rqbd_posted);
+ INIT_LIST_HEAD(&svcpt->scp_req_incoming);
+ init_waitqueue_head(&svcpt->scp_waitq);
+ /* history request & rqbd list */
+ INIT_LIST_HEAD(&svcpt->scp_hist_reqs);
+ INIT_LIST_HEAD(&svcpt->scp_hist_rqbds);
+
+ /* acitve requests and hp requests */
+ spin_lock_init(&svcpt->scp_req_lock);
+
+ /* reply states */
+ spin_lock_init(&svcpt->scp_rep_lock);
+ INIT_LIST_HEAD(&svcpt->scp_rep_active);
+ INIT_LIST_HEAD(&svcpt->scp_rep_idle);
+ init_waitqueue_head(&svcpt->scp_rep_waitq);
+ atomic_set(&svcpt->scp_nreps_difficult, 0);
+
+ /* adaptive timeout */
+ spin_lock_init(&svcpt->scp_at_lock);
+ array = &svcpt->scp_at_array;
+
+ size = at_est2timeout(at_max);
+ array->paa_size = size;
+ array->paa_count = 0;
+ array->paa_deadline = -1;
+
+ /* allocate memory for scp_at_array (ptlrpc_at_array) */
+ OBD_CPT_ALLOC(array->paa_reqs_array,
+ svc->srv_cptable, cpt, sizeof(struct list_head) * size);
+ if (array->paa_reqs_array == NULL)
+ return -ENOMEM;
+
+ for (index = 0; index < size; index++)
+ INIT_LIST_HEAD(&array->paa_reqs_array[index]);
+
+ OBD_CPT_ALLOC(array->paa_reqs_count,
+ svc->srv_cptable, cpt, sizeof(__u32) * size);
+ if (array->paa_reqs_count == NULL)
+ goto failed;
+
+ cfs_timer_init(&svcpt->scp_at_timer, ptlrpc_at_timer, svcpt);
+ /* At SOW, service time should be quick; 10s seems generous. If client
+ * timeout is less than this, we'll be sending an early reply. */
+ at_init(&svcpt->scp_at_estimate, 10, 0);
+
+ /* assign this before call ptlrpc_grow_req_bufs */
+ svcpt->scp_service = svc;
+ /* Now allocate the request buffers, but don't post them now */
+ rc = ptlrpc_grow_req_bufs(svcpt, 0);
+ /* We shouldn't be under memory pressure at startup, so
+ * fail if we can't allocate all our buffers at this time. */
+ if (rc != 0)
+ goto failed;
+
+ return 0;
+
+ failed:
+ if (array->paa_reqs_count != NULL) {
+ OBD_FREE(array->paa_reqs_count, sizeof(__u32) * size);
+ array->paa_reqs_count = NULL;
+ }
+
+ if (array->paa_reqs_array != NULL) {
+ OBD_FREE(array->paa_reqs_array,
+ sizeof(struct list_head) * array->paa_size);
+ array->paa_reqs_array = NULL;
+ }
+
+ return -ENOMEM;
+}
+
+/**
+ * Initialize service on a given portal.
+ * This includes starting serving threads , allocating and posting rqbds and
+ * so on.
+ */
+struct ptlrpc_service *
+ptlrpc_register_service(struct ptlrpc_service_conf *conf,
+ proc_dir_entry_t *proc_entry)
+{
+ struct ptlrpc_service_cpt_conf *cconf = &conf->psc_cpt;
+ struct ptlrpc_service *service;
+ struct ptlrpc_service_part *svcpt;
+ struct cfs_cpt_table *cptable;
+ __u32 *cpts = NULL;
+ int ncpts;
+ int cpt;
+ int rc;
+ int i;
+ ENTRY;
+
+ LASSERT(conf->psc_buf.bc_nbufs > 0);
+ LASSERT(conf->psc_buf.bc_buf_size >=
+ conf->psc_buf.bc_req_max_size + SPTLRPC_MAX_PAYLOAD);
+ LASSERT(conf->psc_thr.tc_ctx_tags != 0);
+
+ cptable = cconf->cc_cptable;
+ if (cptable == NULL)
+ cptable = cfs_cpt_table;
+
+ if (!conf->psc_thr.tc_cpu_affinity) {
+ ncpts = 1;
+ } else {
+ ncpts = cfs_cpt_number(cptable);
+ if (cconf->cc_pattern != NULL) {
+ struct cfs_expr_list *el;
+
+ rc = cfs_expr_list_parse(cconf->cc_pattern,
+ strlen(cconf->cc_pattern),
+ 0, ncpts - 1, &el);
+ if (rc != 0) {
+ CERROR("%s: invalid CPT pattern string: %s",
+ conf->psc_name, cconf->cc_pattern);
+ RETURN(ERR_PTR(-EINVAL));
+ }
+
+ rc = cfs_expr_list_values(el, ncpts, &cpts);
+ cfs_expr_list_free(el);
+ if (rc <= 0) {
+ CERROR("%s: failed to parse CPT array %s: %d\n",
+ conf->psc_name, cconf->cc_pattern, rc);
+ if (cpts != NULL)
+ OBD_FREE(cpts, sizeof(*cpts) * ncpts);
+ RETURN(ERR_PTR(rc < 0 ? rc : -EINVAL));
+ }
+ ncpts = rc;
+ }
+ }
+
+ OBD_ALLOC(service, offsetof(struct ptlrpc_service, srv_parts[ncpts]));
+ if (service == NULL) {
+ if (cpts != NULL)
+ OBD_FREE(cpts, sizeof(*cpts) * ncpts);
+ RETURN(ERR_PTR(-ENOMEM));
+ }
+
+ service->srv_cptable = cptable;
+ service->srv_cpts = cpts;
+ service->srv_ncpts = ncpts;
+
+ service->srv_cpt_bits = 0; /* it's zero already, easy to read... */
+ while ((1 << service->srv_cpt_bits) < cfs_cpt_number(cptable))
+ service->srv_cpt_bits++;
+
+ /* public members */
+ spin_lock_init(&service->srv_lock);
+ service->srv_name = conf->psc_name;
+ service->srv_watchdog_factor = conf->psc_watchdog_factor;
+ INIT_LIST_HEAD(&service->srv_list); /* for safty of cleanup */
+
+ /* buffer configuration */
+ service->srv_nbuf_per_group = test_req_buffer_pressure ?
+ 1 : conf->psc_buf.bc_nbufs;
+ service->srv_max_req_size = conf->psc_buf.bc_req_max_size +
+ SPTLRPC_MAX_PAYLOAD;
+ service->srv_buf_size = conf->psc_buf.bc_buf_size;
+ service->srv_rep_portal = conf->psc_buf.bc_rep_portal;
+ service->srv_req_portal = conf->psc_buf.bc_req_portal;
+
+ /* Increase max reply size to next power of two */
+ service->srv_max_reply_size = 1;
+ while (service->srv_max_reply_size <
+ conf->psc_buf.bc_rep_max_size + SPTLRPC_MAX_PAYLOAD)
+ service->srv_max_reply_size <<= 1;
+
+ service->srv_thread_name = conf->psc_thr.tc_thr_name;
+ service->srv_ctx_tags = conf->psc_thr.tc_ctx_tags;
+ service->srv_hpreq_ratio = PTLRPC_SVC_HP_RATIO;
+ service->srv_ops = conf->psc_ops;
+
+ for (i = 0; i < ncpts; i++) {
+ if (!conf->psc_thr.tc_cpu_affinity)
+ cpt = CFS_CPT_ANY;
+ else
+ cpt = cpts != NULL ? cpts[i] : i;
+
+ OBD_CPT_ALLOC(svcpt, cptable, cpt, sizeof(*svcpt));
+ if (svcpt == NULL)
+ GOTO(failed, rc = -ENOMEM);
+
+ service->srv_parts[i] = svcpt;
+ rc = ptlrpc_service_part_init(service, svcpt, cpt);
+ if (rc != 0)
+ GOTO(failed, rc);
+ }
+
+ ptlrpc_server_nthreads_check(service, conf);
+
+ rc = LNetSetLazyPortal(service->srv_req_portal);
+ LASSERT(rc == 0);
+
+ mutex_lock(&ptlrpc_all_services_mutex);
+ list_add (&service->srv_list, &ptlrpc_all_services);
+ mutex_unlock(&ptlrpc_all_services_mutex);
+
+ if (proc_entry != NULL)
+ ptlrpc_lprocfs_register_service(proc_entry, service);
+
+ rc = ptlrpc_service_nrs_setup(service);
+ if (rc != 0)
+ GOTO(failed, rc);
+
+ CDEBUG(D_NET, "%s: Started, listening on portal %d\n",
+ service->srv_name, service->srv_req_portal);
+
+ rc = ptlrpc_start_threads(service);
+ if (rc != 0) {
+ CERROR("Failed to start threads for service %s: %d\n",
+ service->srv_name, rc);
+ GOTO(failed, rc);
+ }
+
+ RETURN(service);
+failed:
+ ptlrpc_unregister_service(service);
+ RETURN(ERR_PTR(rc));
+}
+EXPORT_SYMBOL(ptlrpc_register_service);
+
+/**
+ * to actually free the request, must be called without holding svc_lock.
+ * note it's caller's responsibility to unlink req->rq_list.
+ */
+static void ptlrpc_server_free_request(struct ptlrpc_request *req)
+{
+ LASSERT(atomic_read(&req->rq_refcount) == 0);
+ LASSERT(list_empty(&req->rq_timed_list));
+
+ /* DEBUG_REQ() assumes the reply state of a request with a valid
+ * ref will not be destroyed until that reference is dropped. */
+ ptlrpc_req_drop_rs(req);
+
+ sptlrpc_svc_ctx_decref(req);
+
+ if (req != &req->rq_rqbd->rqbd_req) {
+ /* NB request buffers use an embedded
+ * req if the incoming req unlinked the
+ * MD; this isn't one of them! */
+ OBD_FREE(req, sizeof(*req));
+ }
+}
+
+/**
+ * drop a reference count of the request. if it reaches 0, we either
+ * put it into history list, or free it immediately.
+ */
+void ptlrpc_server_drop_request(struct ptlrpc_request *req)
+{
+ struct ptlrpc_request_buffer_desc *rqbd = req->rq_rqbd;
+ struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt;
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ int refcount;
+ struct list_head *tmp;
+ struct list_head *nxt;
+
+ if (!atomic_dec_and_test(&req->rq_refcount))
+ return;
+
+ if (req->rq_at_linked) {
+ spin_lock(&svcpt->scp_at_lock);
+ /* recheck with lock, in case it's unlinked by
+ * ptlrpc_at_check_timed() */
+ if (likely(req->rq_at_linked))
+ ptlrpc_at_remove_timed(req);
+ spin_unlock(&svcpt->scp_at_lock);
+ }
+
+ LASSERT(list_empty(&req->rq_timed_list));
+
+ /* finalize request */
+ if (req->rq_export) {
+ class_export_put(req->rq_export);
+ req->rq_export = NULL;
+ }
+
+ spin_lock(&svcpt->scp_lock);
+
+ list_add(&req->rq_list, &rqbd->rqbd_reqs);
+
+ refcount = --(rqbd->rqbd_refcount);
+ if (refcount == 0) {
+ /* request buffer is now idle: add to history */
+ list_del(&rqbd->rqbd_list);
+
+ list_add_tail(&rqbd->rqbd_list, &svcpt->scp_hist_rqbds);
+ svcpt->scp_hist_nrqbds++;
+
+ /* cull some history?
+ * I expect only about 1 or 2 rqbds need to be recycled here */
+ while (svcpt->scp_hist_nrqbds > svc->srv_hist_nrqbds_cpt_max) {
+ rqbd = list_entry(svcpt->scp_hist_rqbds.next,
+ struct ptlrpc_request_buffer_desc,
+ rqbd_list);
+
+ list_del(&rqbd->rqbd_list);
+ svcpt->scp_hist_nrqbds--;
+
+ /* remove rqbd's reqs from svc's req history while
+ * I've got the service lock */
+ list_for_each(tmp, &rqbd->rqbd_reqs) {
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_list);
+ /* Track the highest culled req seq */
+ if (req->rq_history_seq >
+ svcpt->scp_hist_seq_culled) {
+ svcpt->scp_hist_seq_culled =
+ req->rq_history_seq;
+ }
+ list_del(&req->rq_history_list);
+ }
+
+ spin_unlock(&svcpt->scp_lock);
+
+ list_for_each_safe(tmp, nxt, &rqbd->rqbd_reqs) {
+ req = list_entry(rqbd->rqbd_reqs.next,
+ struct ptlrpc_request,
+ rq_list);
+ list_del(&req->rq_list);
+ ptlrpc_server_free_request(req);
+ }
+
+ spin_lock(&svcpt->scp_lock);
+ /*
+ * now all reqs including the embedded req has been
+ * disposed, schedule request buffer for re-use.
+ */
+ LASSERT(atomic_read(&rqbd->rqbd_req.rq_refcount) ==
+ 0);
+ list_add_tail(&rqbd->rqbd_list,
+ &svcpt->scp_rqbd_idle);
+ }
+
+ spin_unlock(&svcpt->scp_lock);
+ } else if (req->rq_reply_state && req->rq_reply_state->rs_prealloc) {
+ /* If we are low on memory, we are not interested in history */
+ list_del(&req->rq_list);
+ list_del_init(&req->rq_history_list);
+
+ /* Track the highest culled req seq */
+ if (req->rq_history_seq > svcpt->scp_hist_seq_culled)
+ svcpt->scp_hist_seq_culled = req->rq_history_seq;
+
+ spin_unlock(&svcpt->scp_lock);
+
+ ptlrpc_server_free_request(req);
+ } else {
+ spin_unlock(&svcpt->scp_lock);
+ }
+}
+
+/** Change request export and move hp request from old export to new */
+void ptlrpc_request_change_export(struct ptlrpc_request *req,
+ struct obd_export *export)
+{
+ if (req->rq_export != NULL) {
+ if (!list_empty(&req->rq_exp_list)) {
+ /* remove rq_exp_list from last export */
+ spin_lock_bh(&req->rq_export->exp_rpc_lock);
+ list_del_init(&req->rq_exp_list);
+ spin_unlock_bh(&req->rq_export->exp_rpc_lock);
+
+ /* export has one reference already, so it`s safe to
+ * add req to export queue here and get another
+ * reference for request later */
+ spin_lock_bh(&export->exp_rpc_lock);
+ list_add(&req->rq_exp_list, &export->exp_hp_rpcs);
+ spin_unlock_bh(&export->exp_rpc_lock);
+ }
+ class_export_rpc_dec(req->rq_export);
+ class_export_put(req->rq_export);
+ }
+
+ /* request takes one export refcount */
+ req->rq_export = class_export_get(export);
+ class_export_rpc_inc(export);
+
+ return;
+}
+
+/**
+ * to finish a request: stop sending more early replies, and release
+ * the request.
+ */
+static void ptlrpc_server_finish_request(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req)
+{
+ ptlrpc_server_hpreq_fini(req);
+
+ ptlrpc_server_drop_request(req);
+}
+
+/**
+ * to finish a active request: stop sending more early replies, and release
+ * the request. should be called after we finished handling the request.
+ */
+static void ptlrpc_server_finish_active_request(
+ struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req)
+{
+ spin_lock(&svcpt->scp_req_lock);
+ ptlrpc_nrs_req_stop_nolock(req);
+ svcpt->scp_nreqs_active--;
+ if (req->rq_hp)
+ svcpt->scp_nhreqs_active--;
+ spin_unlock(&svcpt->scp_req_lock);
+
+ ptlrpc_nrs_req_finalize(req);
+
+ if (req->rq_export != NULL)
+ class_export_rpc_dec(req->rq_export);
+
+ ptlrpc_server_finish_request(svcpt, req);
+}
+
+/**
+ * This function makes sure dead exports are evicted in a timely manner.
+ * This function is only called when some export receives a message (i.e.,
+ * the network is up.)
+ */
+static void ptlrpc_update_export_timer(struct obd_export *exp, long extra_delay)
+{
+ struct obd_export *oldest_exp;
+ time_t oldest_time, new_time;
+
+ ENTRY;
+
+ LASSERT(exp);
+
+ /* Compensate for slow machines, etc, by faking our request time
+ into the future. Although this can break the strict time-ordering
+ of the list, we can be really lazy here - we don't have to evict
+ at the exact right moment. Eventually, all silent exports
+ will make it to the top of the list. */
+
+ /* Do not pay attention on 1sec or smaller renewals. */
+ new_time = cfs_time_current_sec() + extra_delay;
+ if (exp->exp_last_request_time + 1 /*second */ >= new_time)
+ RETURN_EXIT;
+
+ exp->exp_last_request_time = new_time;
+ CDEBUG(D_HA, "updating export %s at "CFS_TIME_T" exp %p\n",
+ exp->exp_client_uuid.uuid,
+ exp->exp_last_request_time, exp);
+
+ /* exports may get disconnected from the chain even though the
+ export has references, so we must keep the spin lock while
+ manipulating the lists */
+ spin_lock(&exp->exp_obd->obd_dev_lock);
+
+ if (list_empty(&exp->exp_obd_chain_timed)) {
+ /* this one is not timed */
+ spin_unlock(&exp->exp_obd->obd_dev_lock);
+ RETURN_EXIT;
+ }
+
+ list_move_tail(&exp->exp_obd_chain_timed,
+ &exp->exp_obd->obd_exports_timed);
+
+ oldest_exp = list_entry(exp->exp_obd->obd_exports_timed.next,
+ struct obd_export, exp_obd_chain_timed);
+ oldest_time = oldest_exp->exp_last_request_time;
+ spin_unlock(&exp->exp_obd->obd_dev_lock);
+
+ if (exp->exp_obd->obd_recovering) {
+ /* be nice to everyone during recovery */
+ EXIT;
+ return;
+ }
+
+ /* Note - racing to start/reset the obd_eviction timer is safe */
+ if (exp->exp_obd->obd_eviction_timer == 0) {
+ /* Check if the oldest entry is expired. */
+ if (cfs_time_current_sec() > (oldest_time + PING_EVICT_TIMEOUT +
+ extra_delay)) {
+ /* We need a second timer, in case the net was down and
+ * it just came back. Since the pinger may skip every
+ * other PING_INTERVAL (see note in ptlrpc_pinger_main),
+ * we better wait for 3. */
+ exp->exp_obd->obd_eviction_timer =
+ cfs_time_current_sec() + 3 * PING_INTERVAL;
+ CDEBUG(D_HA, "%s: Think about evicting %s from "CFS_TIME_T"\n",
+ exp->exp_obd->obd_name,
+ obd_export_nid2str(oldest_exp), oldest_time);
+ }
+ } else {
+ if (cfs_time_current_sec() >
+ (exp->exp_obd->obd_eviction_timer + extra_delay)) {
+ /* The evictor won't evict anyone who we've heard from
+ * recently, so we don't have to check before we start
+ * it. */
+ if (!ping_evictor_wake(exp))
+ exp->exp_obd->obd_eviction_timer = 0;
+ }
+ }
+
+ EXIT;
+}
+
+/**
+ * Sanity check request \a req.
+ * Return 0 if all is ok, error code otherwise.
+ */
+static int ptlrpc_check_req(struct ptlrpc_request *req)
+{
+ int rc = 0;
+
+ if (unlikely(lustre_msg_get_conn_cnt(req->rq_reqmsg) <
+ req->rq_export->exp_conn_cnt)) {
+ DEBUG_REQ(D_RPCTRACE, req,
+ "DROPPING req from old connection %d < %d",
+ lustre_msg_get_conn_cnt(req->rq_reqmsg),
+ req->rq_export->exp_conn_cnt);
+ return -EEXIST;
+ }
+ if (unlikely(req->rq_export->exp_obd &&
+ req->rq_export->exp_obd->obd_fail)) {
+ /* Failing over, don't handle any more reqs, send
+ error response instead. */
+ CDEBUG(D_RPCTRACE, "Dropping req %p for failed obd %s\n",
+ req, req->rq_export->exp_obd->obd_name);
+ rc = -ENODEV;
+ } else if (lustre_msg_get_flags(req->rq_reqmsg) &
+ (MSG_REPLAY | MSG_REQ_REPLAY_DONE) &&
+ !(req->rq_export->exp_obd->obd_recovering)) {
+ DEBUG_REQ(D_ERROR, req,
+ "Invalid replay without recovery");
+ class_fail_export(req->rq_export);
+ rc = -ENODEV;
+ } else if (lustre_msg_get_transno(req->rq_reqmsg) != 0 &&
+ !(req->rq_export->exp_obd->obd_recovering)) {
+ DEBUG_REQ(D_ERROR, req, "Invalid req with transno "
+ LPU64" without recovery",
+ lustre_msg_get_transno(req->rq_reqmsg));
+ class_fail_export(req->rq_export);
+ rc = -ENODEV;
+ }
+
+ if (unlikely(rc < 0)) {
+ req->rq_status = rc;
+ ptlrpc_error(req);
+ }
+ return rc;
+}
+
+static void ptlrpc_at_set_timer(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_at_array *array = &svcpt->scp_at_array;
+ __s32 next;
+
+ if (array->paa_count == 0) {
+ cfs_timer_disarm(&svcpt->scp_at_timer);
+ return;
+ }
+
+ /* Set timer for closest deadline */
+ next = (__s32)(array->paa_deadline - cfs_time_current_sec() -
+ at_early_margin);
+ if (next <= 0) {
+ ptlrpc_at_timer((unsigned long)svcpt);
+ } else {
+ cfs_timer_arm(&svcpt->scp_at_timer, cfs_time_shift(next));
+ CDEBUG(D_INFO, "armed %s at %+ds\n",
+ svcpt->scp_service->srv_name, next);
+ }
+}
+
+/* Add rpc to early reply check list */
+static int ptlrpc_at_add_timed(struct ptlrpc_request *req)
+{
+ struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+ struct ptlrpc_at_array *array = &svcpt->scp_at_array;
+ struct ptlrpc_request *rq = NULL;
+ __u32 index;
+
+ if (AT_OFF)
+ return(0);
+
+ if (req->rq_no_reply)
+ return 0;
+
+ if ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT) == 0)
+ return(-ENOSYS);
+
+ spin_lock(&svcpt->scp_at_lock);
+ LASSERT(list_empty(&req->rq_timed_list));
+
+ index = (unsigned long)req->rq_deadline % array->paa_size;
+ if (array->paa_reqs_count[index] > 0) {
+ /* latest rpcs will have the latest deadlines in the list,
+ * so search backward. */
+ list_for_each_entry_reverse(rq,
+ &array->paa_reqs_array[index],
+ rq_timed_list) {
+ if (req->rq_deadline >= rq->rq_deadline) {
+ list_add(&req->rq_timed_list,
+ &rq->rq_timed_list);
+ break;
+ }
+ }
+ }
+
+ /* Add the request at the head of the list */
+ if (list_empty(&req->rq_timed_list))
+ list_add(&req->rq_timed_list,
+ &array->paa_reqs_array[index]);
+
+ spin_lock(&req->rq_lock);
+ req->rq_at_linked = 1;
+ spin_unlock(&req->rq_lock);
+ req->rq_at_index = index;
+ array->paa_reqs_count[index]++;
+ array->paa_count++;
+ if (array->paa_count == 1 || array->paa_deadline > req->rq_deadline) {
+ array->paa_deadline = req->rq_deadline;
+ ptlrpc_at_set_timer(svcpt);
+ }
+ spin_unlock(&svcpt->scp_at_lock);
+
+ return 0;
+}
+
+static void
+ptlrpc_at_remove_timed(struct ptlrpc_request *req)
+{
+ struct ptlrpc_at_array *array;
+
+ array = &req->rq_rqbd->rqbd_svcpt->scp_at_array;
+
+ /* NB: must call with hold svcpt::scp_at_lock */
+ LASSERT(!list_empty(&req->rq_timed_list));
+ list_del_init(&req->rq_timed_list);
+
+ spin_lock(&req->rq_lock);
+ req->rq_at_linked = 0;
+ spin_unlock(&req->rq_lock);
+
+ array->paa_reqs_count[req->rq_at_index]--;
+ array->paa_count--;
+}
+
+static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
+{
+ struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+ struct ptlrpc_request *reqcopy;
+ struct lustre_msg *reqmsg;
+ cfs_duration_t olddl = req->rq_deadline - cfs_time_current_sec();
+ time_t newdl;
+ int rc;
+ ENTRY;
+
+ /* deadline is when the client expects us to reply, margin is the
+ difference between clients' and servers' expectations */
+ DEBUG_REQ(D_ADAPTTO, req,
+ "%ssending early reply (deadline %+lds, margin %+lds) for "
+ "%d+%d", AT_OFF ? "AT off - not " : "",
+ olddl, olddl - at_get(&svcpt->scp_at_estimate),
+ at_get(&svcpt->scp_at_estimate), at_extra);
+
+ if (AT_OFF)
+ RETURN(0);
+
+ if (olddl < 0) {
+ DEBUG_REQ(D_WARNING, req, "Already past deadline (%+lds), "
+ "not sending early reply. Consider increasing "
+ "at_early_margin (%d)?", olddl, at_early_margin);
+
+ /* Return an error so we're not re-added to the timed list. */
+ RETURN(-ETIMEDOUT);
+ }
+
+ if ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT) == 0){
+ DEBUG_REQ(D_INFO, req, "Wanted to ask client for more time, "
+ "but no AT support");
+ RETURN(-ENOSYS);
+ }
+
+ if (req->rq_export &&
+ lustre_msg_get_flags(req->rq_reqmsg) &
+ (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
+ /* During recovery, we don't want to send too many early
+ * replies, but on the other hand we want to make sure the
+ * client has enough time to resend if the rpc is lost. So
+ * during the recovery period send at least 4 early replies,
+ * spacing them every at_extra if we can. at_estimate should
+ * always equal this fixed value during recovery. */
+ at_measured(&svcpt->scp_at_estimate, min(at_extra,
+ req->rq_export->exp_obd->obd_recovery_timeout / 4));
+ } else {
+ /* Fake our processing time into the future to ask the clients
+ * for some extra amount of time */
+ at_measured(&svcpt->scp_at_estimate, at_extra +
+ cfs_time_current_sec() -
+ req->rq_arrival_time.tv_sec);
+
+ /* Check to see if we've actually increased the deadline -
+ * we may be past adaptive_max */
+ if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
+ at_get(&svcpt->scp_at_estimate)) {
+ DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
+ "(%ld/%ld), not sending early reply\n",
+ olddl, req->rq_arrival_time.tv_sec +
+ at_get(&svcpt->scp_at_estimate) -
+ cfs_time_current_sec());
+ RETURN(-ETIMEDOUT);
+ }
+ }
+ newdl = cfs_time_current_sec() + at_get(&svcpt->scp_at_estimate);
+
+ OBD_ALLOC(reqcopy, sizeof *reqcopy);
+ if (reqcopy == NULL)
+ RETURN(-ENOMEM);
+ OBD_ALLOC_LARGE(reqmsg, req->rq_reqlen);
+ if (!reqmsg) {
+ OBD_FREE(reqcopy, sizeof *reqcopy);
+ RETURN(-ENOMEM);
+ }
+
+ *reqcopy = *req;
+ reqcopy->rq_reply_state = NULL;
+ reqcopy->rq_rep_swab_mask = 0;
+ reqcopy->rq_pack_bulk = 0;
+ reqcopy->rq_pack_udesc = 0;
+ reqcopy->rq_packed_final = 0;
+ sptlrpc_svc_ctx_addref(reqcopy);
+ /* We only need the reqmsg for the magic */
+ reqcopy->rq_reqmsg = reqmsg;
+ memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen);
+
+ LASSERT(atomic_read(&req->rq_refcount));
+ /** if it is last refcount then early reply isn't needed */
+ if (atomic_read(&req->rq_refcount) == 1) {
+ DEBUG_REQ(D_ADAPTTO, reqcopy, "Normal reply already sent out, "
+ "abort sending early reply\n");
+ GOTO(out, rc = -EINVAL);
+ }
+
+ /* Connection ref */
+ reqcopy->rq_export = class_conn2export(
+ lustre_msg_get_handle(reqcopy->rq_reqmsg));
+ if (reqcopy->rq_export == NULL)
+ GOTO(out, rc = -ENODEV);
+
+ /* RPC ref */
+ class_export_rpc_inc(reqcopy->rq_export);
+ if (reqcopy->rq_export->exp_obd &&
+ reqcopy->rq_export->exp_obd->obd_fail)
+ GOTO(out_put, rc = -ENODEV);
+
+ rc = lustre_pack_reply_flags(reqcopy, 1, NULL, NULL, LPRFL_EARLY_REPLY);
+ if (rc)
+ GOTO(out_put, rc);
+
+ rc = ptlrpc_send_reply(reqcopy, PTLRPC_REPLY_EARLY);
+
+ if (!rc) {
+ /* Adjust our own deadline to what we told the client */
+ req->rq_deadline = newdl;
+ req->rq_early_count++; /* number sent, server side */
+ } else {
+ DEBUG_REQ(D_ERROR, req, "Early reply send failed %d", rc);
+ }
+
+ /* Free the (early) reply state from lustre_pack_reply.
+ (ptlrpc_send_reply takes it's own rs ref, so this is safe here) */
+ ptlrpc_req_drop_rs(reqcopy);
+
+out_put:
+ class_export_rpc_dec(reqcopy->rq_export);
+ class_export_put(reqcopy->rq_export);
+out:
+ sptlrpc_svc_ctx_decref(reqcopy);
+ OBD_FREE_LARGE(reqmsg, req->rq_reqlen);
+ OBD_FREE(reqcopy, sizeof *reqcopy);
+ RETURN(rc);
+}
+
+/* Send early replies to everybody expiring within at_early_margin
+ asking for at_extra time */
+static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_at_array *array = &svcpt->scp_at_array;
+ struct ptlrpc_request *rq, *n;
+ struct list_head work_list;
+ __u32 index, count;
+ time_t deadline;
+ time_t now = cfs_time_current_sec();
+ cfs_duration_t delay;
+ int first, counter = 0;
+ ENTRY;
+
+ spin_lock(&svcpt->scp_at_lock);
+ if (svcpt->scp_at_check == 0) {
+ spin_unlock(&svcpt->scp_at_lock);
+ RETURN(0);
+ }
+ delay = cfs_time_sub(cfs_time_current(), svcpt->scp_at_checktime);
+ svcpt->scp_at_check = 0;
+
+ if (array->paa_count == 0) {
+ spin_unlock(&svcpt->scp_at_lock);
+ RETURN(0);
+ }
+
+ /* The timer went off, but maybe the nearest rpc already completed. */
+ first = array->paa_deadline - now;
+ if (first > at_early_margin) {
+ /* We've still got plenty of time. Reset the timer. */
+ ptlrpc_at_set_timer(svcpt);
+ spin_unlock(&svcpt->scp_at_lock);
+ RETURN(0);
+ }
+
+ /* We're close to a timeout, and we don't know how much longer the
+ server will take. Send early replies to everyone expiring soon. */
+ INIT_LIST_HEAD(&work_list);
+ deadline = -1;
+ index = (unsigned long)array->paa_deadline % array->paa_size;
+ count = array->paa_count;
+ while (count > 0) {
+ count -= array->paa_reqs_count[index];
+ list_for_each_entry_safe(rq, n,
+ &array->paa_reqs_array[index],
+ rq_timed_list) {
+ if (rq->rq_deadline > now + at_early_margin) {
+ /* update the earliest deadline */
+ if (deadline == -1 ||
+ rq->rq_deadline < deadline)
+ deadline = rq->rq_deadline;
+ break;
+ }
+
+ ptlrpc_at_remove_timed(rq);
+ /**
+ * ptlrpc_server_drop_request() may drop
+ * refcount to 0 already. Let's check this and
+ * don't add entry to work_list
+ */
+ if (likely(atomic_inc_not_zero(&rq->rq_refcount)))
+ list_add(&rq->rq_timed_list, &work_list);
+ counter++;
+ }
+
+ if (++index >= array->paa_size)
+ index = 0;
+ }
+ array->paa_deadline = deadline;
+ /* we have a new earliest deadline, restart the timer */
+ ptlrpc_at_set_timer(svcpt);
+
+ spin_unlock(&svcpt->scp_at_lock);
+
+ CDEBUG(D_ADAPTTO, "timeout in %+ds, asking for %d secs on %d early "
+ "replies\n", first, at_extra, counter);
+ if (first < 0) {
+ /* We're already past request deadlines before we even get a
+ chance to send early replies */
+ LCONSOLE_WARN("%s: This server is not able to keep up with "
+ "request traffic (cpu-bound).\n",
+ svcpt->scp_service->srv_name);
+ CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, "
+ "delay="CFS_DURATION_T"(jiff)\n",
+ counter, svcpt->scp_nreqs_incoming,
+ svcpt->scp_nreqs_active,
+ at_get(&svcpt->scp_at_estimate), delay);
+ }
+
+ /* we took additional refcount so entries can't be deleted from list, no
+ * locking is needed */
+ while (!list_empty(&work_list)) {
+ rq = list_entry(work_list.next, struct ptlrpc_request,
+ rq_timed_list);
+ list_del_init(&rq->rq_timed_list);
+
+ if (ptlrpc_at_send_early_reply(rq) == 0)
+ ptlrpc_at_add_timed(rq);
+
+ ptlrpc_server_drop_request(rq);
+ }
+
+ RETURN(1); /* return "did_something" for liblustre */
+}
+
+/**
+ * Put the request to the export list if the request may become
+ * a high priority one.
+ */
+static int ptlrpc_server_hpreq_init(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req)
+{
+ int rc = 0;
+ ENTRY;
+
+ if (svcpt->scp_service->srv_ops.so_hpreq_handler) {
+ rc = svcpt->scp_service->srv_ops.so_hpreq_handler(req);
+ if (rc < 0)
+ RETURN(rc);
+ LASSERT(rc == 0);
+ }
+ if (req->rq_export && req->rq_ops) {
+ /* Perform request specific check. We should do this check
+ * before the request is added into exp_hp_rpcs list otherwise
+ * it may hit swab race at LU-1044. */
+ if (req->rq_ops->hpreq_check) {
+ rc = req->rq_ops->hpreq_check(req);
+ /**
+ * XXX: Out of all current
+ * ptlrpc_hpreq_ops::hpreq_check(), only
+ * ldlm_cancel_hpreq_check() can return an error code;
+ * other functions assert in similar places, which seems
+ * odd. What also does not seem right is that handlers
+ * for those RPCs do not assert on the same checks, but
+ * rather handle the error cases. e.g. see
+ * ost_rw_hpreq_check(), and ost_brw_read(),
+ * ost_brw_write().
+ */
+ if (rc < 0)
+ RETURN(rc);
+ LASSERT(rc == 0 || rc == 1);
+ }
+
+ spin_lock_bh(&req->rq_export->exp_rpc_lock);
+ list_add(&req->rq_exp_list,
+ &req->rq_export->exp_hp_rpcs);
+ spin_unlock_bh(&req->rq_export->exp_rpc_lock);
+ }
+
+ ptlrpc_nrs_req_initialize(svcpt, req, rc);
+
+ RETURN(rc);
+}
+
+/** Remove the request from the export list. */
+static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req)
+{
+ ENTRY;
+ if (req->rq_export && req->rq_ops) {
+ /* refresh lock timeout again so that client has more
+ * room to send lock cancel RPC. */
+ if (req->rq_ops->hpreq_fini)
+ req->rq_ops->hpreq_fini(req);
+
+ spin_lock_bh(&req->rq_export->exp_rpc_lock);
+ list_del_init(&req->rq_exp_list);
+ spin_unlock_bh(&req->rq_export->exp_rpc_lock);
+ }
+ EXIT;
+}
+
+static int ptlrpc_hpreq_check(struct ptlrpc_request *req)
+{
+ return 1;
+}
+
+static struct ptlrpc_hpreq_ops ptlrpc_hpreq_common = {
+ .hpreq_check = ptlrpc_hpreq_check,
+};
+
+/* Hi-Priority RPC check by RPC operation code. */
+int ptlrpc_hpreq_handler(struct ptlrpc_request *req)
+{
+ int opc = lustre_msg_get_opc(req->rq_reqmsg);
+
+ /* Check for export to let only reconnects for not yet evicted
+ * export to become a HP rpc. */
+ if ((req->rq_export != NULL) &&
+ (opc == OBD_PING || opc == MDS_CONNECT || opc == OST_CONNECT))
+ req->rq_ops = &ptlrpc_hpreq_common;
+
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_hpreq_handler);
+
+static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req)
+{
+ int rc;
+ ENTRY;
+
+ rc = ptlrpc_server_hpreq_init(svcpt, req);
+ if (rc < 0)
+ RETURN(rc);
+
+ ptlrpc_nrs_req_add(svcpt, req, !!rc);
+
+ RETURN(0);
+}
+
+/**
+ * Allow to handle high priority request
+ * User can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_req_lock to get reliable result
+ */
+static bool ptlrpc_server_allow_high(struct ptlrpc_service_part *svcpt,
+ bool force)
+{
+ int running = svcpt->scp_nthrs_running;
+
+ if (!nrs_svcpt_has_hp(svcpt))
+ return false;
+
+ if (force)
+ return true;
+
+ if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL &&
+ CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) {
+ /* leave just 1 thread for normal RPCs */
+ running = PTLRPC_NTHRS_INIT;
+ if (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL)
+ running += 1;
+ }
+
+ if (svcpt->scp_nreqs_active >= running - 1)
+ return false;
+
+ if (svcpt->scp_nhreqs_active == 0)
+ return true;
+
+ return !ptlrpc_nrs_req_pending_nolock(svcpt, false) ||
+ svcpt->scp_hreq_count < svcpt->scp_service->srv_hpreq_ratio;
+}
+
+static bool ptlrpc_server_high_pending(struct ptlrpc_service_part *svcpt,
+ bool force)
+{
+ return ptlrpc_server_allow_high(svcpt, force) &&
+ ptlrpc_nrs_req_pending_nolock(svcpt, true);
+}
+
+/**
+ * Only allow normal priority requests on a service that has a high-priority
+ * queue if forced (i.e. cleanup), if there are other high priority requests
+ * already being processed (i.e. those threads can service more high-priority
+ * requests), or if there are enough idle threads that a later thread can do
+ * a high priority request.
+ * User can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_req_lock to get reliable result
+ */
+static bool ptlrpc_server_allow_normal(struct ptlrpc_service_part *svcpt,
+ bool force)
+{
+ int running = svcpt->scp_nthrs_running;
+ if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL &&
+ CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) {
+ /* leave just 1 thread for normal RPCs */
+ running = PTLRPC_NTHRS_INIT;
+ if (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL)
+ running += 1;
+ }
+
+ if (force ||
+ svcpt->scp_nreqs_active < running - 2)
+ return true;
+
+ if (svcpt->scp_nreqs_active >= running - 1)
+ return false;
+
+ return svcpt->scp_nhreqs_active > 0 || !nrs_svcpt_has_hp(svcpt);
+}
+
+static bool ptlrpc_server_normal_pending(struct ptlrpc_service_part *svcpt,
+ bool force)
+{
+ return ptlrpc_server_allow_normal(svcpt, force) &&
+ ptlrpc_nrs_req_pending_nolock(svcpt, false);
+}
+
+/**
+ * Returns true if there are requests available in incoming
+ * request queue for processing and it is allowed to fetch them.
+ * User can call it w/o any lock but need to hold ptlrpc_service::scp_req_lock
+ * to get reliable result
+ * \see ptlrpc_server_allow_normal
+ * \see ptlrpc_server_allow high
+ */
+static inline bool
+ptlrpc_server_request_pending(struct ptlrpc_service_part *svcpt, bool force)
+{
+ return ptlrpc_server_high_pending(svcpt, force) ||
+ ptlrpc_server_normal_pending(svcpt, force);
+}
+
+/**
+ * Fetch a request for processing from queue of unprocessed requests.
+ * Favors high-priority requests.
+ * Returns a pointer to fetched request.
+ */
+static struct ptlrpc_request *
+ptlrpc_server_request_get(struct ptlrpc_service_part *svcpt, bool force)
+{
+ struct ptlrpc_request *req = NULL;
+ ENTRY;
+
+ spin_lock(&svcpt->scp_req_lock);
+
+ if (ptlrpc_server_high_pending(svcpt, force)) {
+ req = ptlrpc_nrs_req_get_nolock(svcpt, true, force);
+ if (req != NULL) {
+ svcpt->scp_hreq_count++;
+ goto got_request;
+ }
+ }
+
+ if (ptlrpc_server_normal_pending(svcpt, force)) {
+ req = ptlrpc_nrs_req_get_nolock(svcpt, false, force);
+ if (req != NULL) {
+ svcpt->scp_hreq_count = 0;
+ goto got_request;
+ }
+ }
+
+ spin_unlock(&svcpt->scp_req_lock);
+ RETURN(NULL);
+
+got_request:
+ svcpt->scp_nreqs_active++;
+ if (req->rq_hp)
+ svcpt->scp_nhreqs_active++;
+
+ spin_unlock(&svcpt->scp_req_lock);
+
+ if (likely(req->rq_export))
+ class_export_rpc_inc(req->rq_export);
+
+ RETURN(req);
+}
+
+/**
+ * Handle freshly incoming reqs, add to timed early reply list,
+ * pass on to regular request queue.
+ * All incoming requests pass through here before getting into
+ * ptlrpc_server_handle_req later on.
+ */
+static int
+ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_thread *thread)
+{
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ struct ptlrpc_request *req;
+ __u32 deadline;
+ int rc;
+ ENTRY;
+
+ spin_lock(&svcpt->scp_lock);
+ if (list_empty(&svcpt->scp_req_incoming)) {
+ spin_unlock(&svcpt->scp_lock);
+ RETURN(0);
+ }
+
+ req = list_entry(svcpt->scp_req_incoming.next,
+ struct ptlrpc_request, rq_list);
+ list_del_init(&req->rq_list);
+ svcpt->scp_nreqs_incoming--;
+ /* Consider this still a "queued" request as far as stats are
+ * concerned */
+ spin_unlock(&svcpt->scp_lock);
+
+ /* go through security check/transform */
+ rc = sptlrpc_svc_unwrap_request(req);
+ switch (rc) {
+ case SECSVC_OK:
+ break;
+ case SECSVC_COMPLETE:
+ target_send_reply(req, 0, OBD_FAIL_MDS_ALL_REPLY_NET);
+ goto err_req;
+ case SECSVC_DROP:
+ goto err_req;
+ default:
+ LBUG();
+ }
+
+ /*
+ * for null-flavored rpc, msg has been unpacked by sptlrpc, although
+ * redo it wouldn't be harmful.
+ */
+ if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) {
+ rc = ptlrpc_unpack_req_msg(req, req->rq_reqlen);
+ if (rc != 0) {
+ CERROR("error unpacking request: ptl %d from %s "
+ "x"LPU64"\n", svc->srv_req_portal,
+ libcfs_id2str(req->rq_peer), req->rq_xid);
+ goto err_req;
+ }
+ }
+
+ rc = lustre_unpack_req_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF);
+ if (rc) {
+ CERROR ("error unpacking ptlrpc body: ptl %d from %s x"
+ LPU64"\n", svc->srv_req_portal,
+ libcfs_id2str(req->rq_peer), req->rq_xid);
+ goto err_req;
+ }
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_REQ_OPC) &&
+ lustre_msg_get_opc(req->rq_reqmsg) == cfs_fail_val) {
+ CERROR("drop incoming rpc opc %u, x"LPU64"\n",
+ cfs_fail_val, req->rq_xid);
+ goto err_req;
+ }
+
+ rc = -EINVAL;
+ if (lustre_msg_get_type(req->rq_reqmsg) != PTL_RPC_MSG_REQUEST) {
+ CERROR("wrong packet type received (type=%u) from %s\n",
+ lustre_msg_get_type(req->rq_reqmsg),
+ libcfs_id2str(req->rq_peer));
+ goto err_req;
+ }
+
+ switch(lustre_msg_get_opc(req->rq_reqmsg)) {
+ case MDS_WRITEPAGE:
+ case OST_WRITE:
+ req->rq_bulk_write = 1;
+ break;
+ case MDS_READPAGE:
+ case OST_READ:
+ case MGS_CONFIG_READ:
+ req->rq_bulk_read = 1;
+ break;
+ }
+
+ CDEBUG(D_RPCTRACE, "got req x"LPU64"\n", req->rq_xid);
+
+ req->rq_export = class_conn2export(
+ lustre_msg_get_handle(req->rq_reqmsg));
+ if (req->rq_export) {
+ rc = ptlrpc_check_req(req);
+ if (rc == 0) {
+ rc = sptlrpc_target_export_check(req->rq_export, req);
+ if (rc)
+ DEBUG_REQ(D_ERROR, req, "DROPPING req with "
+ "illegal security flavor,");
+ }
+
+ if (rc)
+ goto err_req;
+ ptlrpc_update_export_timer(req->rq_export, 0);
+ }
+
+ /* req_in handling should/must be fast */
+ if (cfs_time_current_sec() - req->rq_arrival_time.tv_sec > 5)
+ DEBUG_REQ(D_WARNING, req, "Slow req_in handling "CFS_DURATION_T"s",
+ cfs_time_sub(cfs_time_current_sec(),
+ req->rq_arrival_time.tv_sec));
+
+ /* Set rpc server deadline and add it to the timed list */
+ deadline = (lustre_msghdr_get_flags(req->rq_reqmsg) &
+ MSGHDR_AT_SUPPORT) ?
+ /* The max time the client expects us to take */
+ lustre_msg_get_timeout(req->rq_reqmsg) : obd_timeout;
+ req->rq_deadline = req->rq_arrival_time.tv_sec + deadline;
+ if (unlikely(deadline == 0)) {
+ DEBUG_REQ(D_ERROR, req, "Dropping request with 0 timeout");
+ goto err_req;
+ }
+
+ req->rq_svc_thread = thread;
+
+ ptlrpc_at_add_timed(req);
+
+ /* Move it over to the request processing queue */
+ rc = ptlrpc_server_request_add(svcpt, req);
+ if (rc)
+ GOTO(err_req, rc);
+
+ wake_up(&svcpt->scp_waitq);
+ RETURN(1);
+
+err_req:
+ ptlrpc_server_finish_request(svcpt, req);
+
+ RETURN(1);
+}
+
+/**
+ * Main incoming request handling logic.
+ * Calls handler function from service to do actual processing.
+ */
+static int
+ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_thread *thread)
+{
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ struct ptlrpc_request *request;
+ struct timeval work_start;
+ struct timeval work_end;
+ long timediff;
+ int rc;
+ int fail_opc = 0;
+ ENTRY;
+
+ request = ptlrpc_server_request_get(svcpt, false);
+ if (request == NULL)
+ RETURN(0);
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT))
+ fail_opc = OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT;
+ else if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT))
+ fail_opc = OBD_FAIL_PTLRPC_HPREQ_TIMEOUT;
+
+ if (unlikely(fail_opc)) {
+ if (request->rq_export && request->rq_ops)
+ OBD_FAIL_TIMEOUT(fail_opc, 4);
+ }
+
+ ptlrpc_rqphase_move(request, RQ_PHASE_INTERPRET);
+
+ if(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DUMP_LOG))
+ libcfs_debug_dumplog();
+
+ do_gettimeofday(&work_start);
+ timediff = cfs_timeval_sub(&work_start, &request->rq_arrival_time,NULL);
+ if (likely(svc->srv_stats != NULL)) {
+ lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR,
+ timediff);
+ lprocfs_counter_add(svc->srv_stats, PTLRPC_REQQDEPTH_CNTR,
+ svcpt->scp_nreqs_incoming);
+ lprocfs_counter_add(svc->srv_stats, PTLRPC_REQACTIVE_CNTR,
+ svcpt->scp_nreqs_active);
+ lprocfs_counter_add(svc->srv_stats, PTLRPC_TIMEOUT,
+ at_get(&svcpt->scp_at_estimate));
+ }
+
+ rc = lu_context_init(&request->rq_session, LCT_SESSION | LCT_NOREF);
+ if (rc) {
+ CERROR("Failure to initialize session: %d\n", rc);
+ goto out_req;
+ }
+ request->rq_session.lc_thread = thread;
+ request->rq_session.lc_cookie = 0x5;
+ lu_context_enter(&request->rq_session);
+
+ CDEBUG(D_NET, "got req "LPU64"\n", request->rq_xid);
+
+ request->rq_svc_thread = thread;
+ if (thread)
+ request->rq_svc_thread->t_env->le_ses = &request->rq_session;
+
+ if (likely(request->rq_export)) {
+ if (unlikely(ptlrpc_check_req(request)))
+ goto put_conn;
+ ptlrpc_update_export_timer(request->rq_export, timediff >> 19);
+ }
+
+ /* Discard requests queued for longer than the deadline.
+ The deadline is increased if we send an early reply. */
+ if (cfs_time_current_sec() > request->rq_deadline) {
+ DEBUG_REQ(D_ERROR, request, "Dropping timed-out request from %s"
+ ": deadline "CFS_DURATION_T":"CFS_DURATION_T"s ago\n",
+ libcfs_id2str(request->rq_peer),
+ cfs_time_sub(request->rq_deadline,
+ request->rq_arrival_time.tv_sec),
+ cfs_time_sub(cfs_time_current_sec(),
+ request->rq_deadline));
+ goto put_conn;
+ }
+
+ CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:nid:opc "
+ "%s:%s+%d:%d:x"LPU64":%s:%d\n", current_comm(),
+ (request->rq_export ?
+ (char *)request->rq_export->exp_client_uuid.uuid : "0"),
+ (request->rq_export ?
+ atomic_read(&request->rq_export->exp_refcount) : -99),
+ lustre_msg_get_status(request->rq_reqmsg), request->rq_xid,
+ libcfs_id2str(request->rq_peer),
+ lustre_msg_get_opc(request->rq_reqmsg));
+
+ if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING)
+ CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val);
+
+ rc = svc->srv_ops.so_req_handler(request);
+
+ ptlrpc_rqphase_move(request, RQ_PHASE_COMPLETE);
+
+put_conn:
+ lu_context_exit(&request->rq_session);
+ lu_context_fini(&request->rq_session);
+
+ if (unlikely(cfs_time_current_sec() > request->rq_deadline)) {
+ DEBUG_REQ(D_WARNING, request, "Request took longer "
+ "than estimated ("CFS_DURATION_T":"CFS_DURATION_T"s);"
+ " client may timeout.",
+ cfs_time_sub(request->rq_deadline,
+ request->rq_arrival_time.tv_sec),
+ cfs_time_sub(cfs_time_current_sec(),
+ request->rq_deadline));
+ }
+
+ do_gettimeofday(&work_end);
+ timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+ CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc "
+ "%s:%s+%d:%d:x"LPU64":%s:%d Request procesed in "
+ "%ldus (%ldus total) trans "LPU64" rc %d/%d\n",
+ current_comm(),
+ (request->rq_export ?
+ (char *)request->rq_export->exp_client_uuid.uuid : "0"),
+ (request->rq_export ?
+ atomic_read(&request->rq_export->exp_refcount) : -99),
+ lustre_msg_get_status(request->rq_reqmsg),
+ request->rq_xid,
+ libcfs_id2str(request->rq_peer),
+ lustre_msg_get_opc(request->rq_reqmsg),
+ timediff,
+ cfs_timeval_sub(&work_end, &request->rq_arrival_time, NULL),
+ (request->rq_repmsg ?
+ lustre_msg_get_transno(request->rq_repmsg) :
+ request->rq_transno),
+ request->rq_status,
+ (request->rq_repmsg ?
+ lustre_msg_get_status(request->rq_repmsg) : -999));
+ if (likely(svc->srv_stats != NULL && request->rq_reqmsg != NULL)) {
+ __u32 op = lustre_msg_get_opc(request->rq_reqmsg);
+ int opc = opcode_offset(op);
+ if (opc > 0 && !(op == LDLM_ENQUEUE || op == MDS_REINT)) {
+ LASSERT(opc < LUSTRE_MAX_OPCODES);
+ lprocfs_counter_add(svc->srv_stats,
+ opc + EXTRA_MAX_OPCODES,
+ timediff);
+ }
+ }
+ if (unlikely(request->rq_early_count)) {
+ DEBUG_REQ(D_ADAPTTO, request,
+ "sent %d early replies before finishing in "
+ CFS_DURATION_T"s",
+ request->rq_early_count,
+ cfs_time_sub(work_end.tv_sec,
+ request->rq_arrival_time.tv_sec));
+ }
+
+out_req:
+ ptlrpc_server_finish_active_request(svcpt, request);
+
+ RETURN(1);
+}
+
+/**
+ * An internal function to process a single reply state object.
+ */
+static int
+ptlrpc_handle_rs(struct ptlrpc_reply_state *rs)
+{
+ struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ struct obd_export *exp;
+ int nlocks;
+ int been_handled;
+ ENTRY;
+
+ exp = rs->rs_export;
+
+ LASSERT (rs->rs_difficult);
+ LASSERT (rs->rs_scheduled);
+ LASSERT (list_empty(&rs->rs_list));
+
+ spin_lock(&exp->exp_lock);
+ /* Noop if removed already */
+ list_del_init (&rs->rs_exp_list);
+ spin_unlock(&exp->exp_lock);
+
+ /* The disk commit callback holds exp_uncommitted_replies_lock while it
+ * iterates over newly committed replies, removing them from
+ * exp_uncommitted_replies. It then drops this lock and schedules the
+ * replies it found for handling here.
+ *
+ * We can avoid contention for exp_uncommitted_replies_lock between the
+ * HRT threads and further commit callbacks by checking rs_committed
+ * which is set in the commit callback while it holds both
+ * rs_lock and exp_uncommitted_reples.
+ *
+ * If we see rs_committed clear, the commit callback _may_ not have
+ * handled this reply yet and we race with it to grab
+ * exp_uncommitted_replies_lock before removing the reply from
+ * exp_uncommitted_replies. Note that if we lose the race and the
+ * reply has already been removed, list_del_init() is a noop.
+ *
+ * If we see rs_committed set, we know the commit callback is handling,
+ * or has handled this reply since store reordering might allow us to
+ * see rs_committed set out of sequence. But since this is done
+ * holding rs_lock, we can be sure it has all completed once we hold
+ * rs_lock, which we do right next.
+ */
+ if (!rs->rs_committed) {
+ spin_lock(&exp->exp_uncommitted_replies_lock);
+ list_del_init(&rs->rs_obd_list);
+ spin_unlock(&exp->exp_uncommitted_replies_lock);
+ }
+
+ spin_lock(&rs->rs_lock);
+
+ been_handled = rs->rs_handled;
+ rs->rs_handled = 1;
+
+ nlocks = rs->rs_nlocks; /* atomic "steal", but */
+ rs->rs_nlocks = 0; /* locks still on rs_locks! */
+
+ if (nlocks == 0 && !been_handled) {
+ /* If we see this, we should already have seen the warning
+ * in mds_steal_ack_locks() */
+ CDEBUG(D_HA, "All locks stolen from rs %p x"LPD64".t"LPD64
+ " o%d NID %s\n",
+ rs,
+ rs->rs_xid, rs->rs_transno, rs->rs_opc,
+ libcfs_nid2str(exp->exp_connection->c_peer.nid));
+ }
+
+ if ((!been_handled && rs->rs_on_net) || nlocks > 0) {
+ spin_unlock(&rs->rs_lock);
+
+ if (!been_handled && rs->rs_on_net) {
+ LNetMDUnlink(rs->rs_md_h);
+ /* Ignore return code; we're racing with completion */
+ }
+
+ while (nlocks-- > 0)
+ ldlm_lock_decref(&rs->rs_locks[nlocks],
+ rs->rs_modes[nlocks]);
+
+ spin_lock(&rs->rs_lock);
+ }
+
+ rs->rs_scheduled = 0;
+
+ if (!rs->rs_on_net) {
+ /* Off the net */
+ spin_unlock(&rs->rs_lock);
+
+ class_export_put (exp);
+ rs->rs_export = NULL;
+ ptlrpc_rs_decref (rs);
+ if (atomic_dec_and_test(&svcpt->scp_nreps_difficult) &&
+ svc->srv_is_stopping)
+ wake_up_all(&svcpt->scp_waitq);
+ RETURN(1);
+ }
+
+ /* still on the net; callback will schedule */
+ spin_unlock(&rs->rs_lock);
+ RETURN(1);
+}
+
+
+static void
+ptlrpc_check_rqbd_pool(struct ptlrpc_service_part *svcpt)
+{
+ int avail = svcpt->scp_nrqbds_posted;
+ int low_water = test_req_buffer_pressure ? 0 :
+ svcpt->scp_service->srv_nbuf_per_group / 2;
+
+ /* NB I'm not locking; just looking. */
+
+ /* CAVEAT EMPTOR: We might be allocating buffers here because we've
+ * allowed the request history to grow out of control. We could put a
+ * sanity check on that here and cull some history if we need the
+ * space. */
+
+ if (avail <= low_water)
+ ptlrpc_grow_req_bufs(svcpt, 1);
+
+ if (svcpt->scp_service->srv_stats) {
+ lprocfs_counter_add(svcpt->scp_service->srv_stats,
+ PTLRPC_REQBUF_AVAIL_CNTR, avail);
+ }
+}
+
+static int
+ptlrpc_retry_rqbds(void *arg)
+{
+ struct ptlrpc_service_part *svcpt = (struct ptlrpc_service_part *)arg;
+
+ svcpt->scp_rqbd_timeout = 0;
+ return -ETIMEDOUT;
+}
+
+static inline int
+ptlrpc_threads_enough(struct ptlrpc_service_part *svcpt)
+{
+ return svcpt->scp_nreqs_active <
+ svcpt->scp_nthrs_running - 1 -
+ (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL);
+}
+
+/**
+ * allowed to create more threads
+ * user can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_lock to get reliable result
+ */
+static inline int
+ptlrpc_threads_increasable(struct ptlrpc_service_part *svcpt)
+{
+ return svcpt->scp_nthrs_running +
+ svcpt->scp_nthrs_starting <
+ svcpt->scp_service->srv_nthrs_cpt_limit;
+}
+
+/**
+ * too many requests and allowed to create more threads
+ */
+static inline int
+ptlrpc_threads_need_create(struct ptlrpc_service_part *svcpt)
+{
+ return !ptlrpc_threads_enough(svcpt) &&
+ ptlrpc_threads_increasable(svcpt);
+}
+
+static inline int
+ptlrpc_thread_stopping(struct ptlrpc_thread *thread)
+{
+ return thread_is_stopping(thread) ||
+ thread->t_svcpt->scp_service->srv_is_stopping;
+}
+
+static inline int
+ptlrpc_rqbd_pending(struct ptlrpc_service_part *svcpt)
+{
+ return !list_empty(&svcpt->scp_rqbd_idle) &&
+ svcpt->scp_rqbd_timeout == 0;
+}
+
+static inline int
+ptlrpc_at_check(struct ptlrpc_service_part *svcpt)
+{
+ return svcpt->scp_at_check;
+}
+
+/**
+ * requests wait on preprocessing
+ * user can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_lock to get reliable result
+ */
+static inline int
+ptlrpc_server_request_incoming(struct ptlrpc_service_part *svcpt)
+{
+ return !list_empty(&svcpt->scp_req_incoming);
+}
+
+static __attribute__((__noinline__)) int
+ptlrpc_wait_event(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_thread *thread)
+{
+ /* Don't exit while there are replies to be handled */
+ struct l_wait_info lwi = LWI_TIMEOUT(svcpt->scp_rqbd_timeout,
+ ptlrpc_retry_rqbds, svcpt);
+
+ lc_watchdog_disable(thread->t_watchdog);
+
+ cond_resched();
+
+ l_wait_event_exclusive_head(svcpt->scp_waitq,
+ ptlrpc_thread_stopping(thread) ||
+ ptlrpc_server_request_incoming(svcpt) ||
+ ptlrpc_server_request_pending(svcpt, false) ||
+ ptlrpc_rqbd_pending(svcpt) ||
+ ptlrpc_at_check(svcpt), &lwi);
+
+ if (ptlrpc_thread_stopping(thread))
+ return -EINTR;
+
+ lc_watchdog_touch(thread->t_watchdog,
+ ptlrpc_server_get_timeout(svcpt));
+ return 0;
+}
+
+/**
+ * Main thread body for service threads.
+ * Waits in a loop waiting for new requests to process to appear.
+ * Every time an incoming requests is added to its queue, a waitq
+ * is woken up and one of the threads will handle it.
+ */
+static int ptlrpc_main(void *arg)
+{
+ struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg;
+ struct ptlrpc_service_part *svcpt = thread->t_svcpt;
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ struct ptlrpc_reply_state *rs;
+#ifdef WITH_GROUP_INFO
+ group_info_t *ginfo = NULL;
+#endif
+ struct lu_env *env;
+ int counter = 0, rc = 0;
+ ENTRY;
+
+ thread->t_pid = current_pid();
+ unshare_fs_struct();
+
+ /* NB: we will call cfs_cpt_bind() for all threads, because we
+ * might want to run lustre server only on a subset of system CPUs,
+ * in that case ->scp_cpt is CFS_CPT_ANY */
+ rc = cfs_cpt_bind(svc->srv_cptable, svcpt->scp_cpt);
+ if (rc != 0) {
+ CWARN("%s: failed to bind %s on CPT %d\n",
+ svc->srv_name, thread->t_name, svcpt->scp_cpt);
+ }
+
+#ifdef WITH_GROUP_INFO
+ ginfo = groups_alloc(0);
+ if (!ginfo) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ set_current_groups(ginfo);
+ put_group_info(ginfo);
+#endif
+
+ if (svc->srv_ops.so_thr_init != NULL) {
+ rc = svc->srv_ops.so_thr_init(thread);
+ if (rc)
+ goto out;
+ }
+
+ OBD_ALLOC_PTR(env);
+ if (env == NULL) {
+ rc = -ENOMEM;
+ goto out_srv_fini;
+ }
+
+ rc = lu_context_init(&env->le_ctx,
+ svc->srv_ctx_tags|LCT_REMEMBER|LCT_NOREF);
+ if (rc)
+ goto out_srv_fini;
+
+ thread->t_env = env;
+ env->le_ctx.lc_thread = thread;
+ env->le_ctx.lc_cookie = 0x6;
+
+ while (!list_empty(&svcpt->scp_rqbd_idle)) {
+ rc = ptlrpc_server_post_idle_rqbds(svcpt);
+ if (rc >= 0)
+ continue;
+
+ CERROR("Failed to post rqbd for %s on CPT %d: %d\n",
+ svc->srv_name, svcpt->scp_cpt, rc);
+ goto out_srv_fini;
+ }
+
+ /* Alloc reply state structure for this one */
+ OBD_ALLOC_LARGE(rs, svc->srv_max_reply_size);
+ if (!rs) {
+ rc = -ENOMEM;
+ goto out_srv_fini;
+ }
+
+ spin_lock(&svcpt->scp_lock);
+
+ LASSERT(thread_is_starting(thread));
+ thread_clear_flags(thread, SVC_STARTING);
+
+ LASSERT(svcpt->scp_nthrs_starting == 1);
+ svcpt->scp_nthrs_starting--;
+
+ /* SVC_STOPPING may already be set here if someone else is trying
+ * to stop the service while this new thread has been dynamically
+ * forked. We still set SVC_RUNNING to let our creator know that
+ * we are now running, however we will exit as soon as possible */
+ thread_add_flags(thread, SVC_RUNNING);
+ svcpt->scp_nthrs_running++;
+ spin_unlock(&svcpt->scp_lock);
+
+ /* wake up our creator in case he's still waiting. */
+ wake_up(&thread->t_ctl_waitq);
+
+ thread->t_watchdog = lc_watchdog_add(ptlrpc_server_get_timeout(svcpt),
+ NULL, NULL);
+
+ spin_lock(&svcpt->scp_rep_lock);
+ list_add(&rs->rs_list, &svcpt->scp_rep_idle);
+ wake_up(&svcpt->scp_rep_waitq);
+ spin_unlock(&svcpt->scp_rep_lock);
+
+ CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id,
+ svcpt->scp_nthrs_running);
+
+ /* XXX maintain a list of all managed devices: insert here */
+ while (!ptlrpc_thread_stopping(thread)) {
+ if (ptlrpc_wait_event(svcpt, thread))
+ break;
+
+ ptlrpc_check_rqbd_pool(svcpt);
+
+ if (ptlrpc_threads_need_create(svcpt)) {
+ /* Ignore return code - we tried... */
+ ptlrpc_start_thread(svcpt, 0);
+ }
+
+ /* Process all incoming reqs before handling any */
+ if (ptlrpc_server_request_incoming(svcpt)) {
+ lu_context_enter(&env->le_ctx);
+ env->le_ses = NULL;
+ ptlrpc_server_handle_req_in(svcpt, thread);
+ lu_context_exit(&env->le_ctx);
+
+ /* but limit ourselves in case of flood */
+ if (counter++ < 100)
+ continue;
+ counter = 0;
+ }
+
+ if (ptlrpc_at_check(svcpt))
+ ptlrpc_at_check_timed(svcpt);
+
+ if (ptlrpc_server_request_pending(svcpt, false)) {
+ lu_context_enter(&env->le_ctx);
+ ptlrpc_server_handle_request(svcpt, thread);
+ lu_context_exit(&env->le_ctx);
+ }
+
+ if (ptlrpc_rqbd_pending(svcpt) &&
+ ptlrpc_server_post_idle_rqbds(svcpt) < 0) {
+ /* I just failed to repost request buffers.
+ * Wait for a timeout (unless something else
+ * happens) before I try again */
+ svcpt->scp_rqbd_timeout = cfs_time_seconds(1) / 10;
+ CDEBUG(D_RPCTRACE, "Posted buffers: %d\n",
+ svcpt->scp_nrqbds_posted);
+ }
+ }
+
+ lc_watchdog_delete(thread->t_watchdog);
+ thread->t_watchdog = NULL;
+
+out_srv_fini:
+ /*
+ * deconstruct service specific state created by ptlrpc_start_thread()
+ */
+ if (svc->srv_ops.so_thr_done != NULL)
+ svc->srv_ops.so_thr_done(thread);
+
+ if (env != NULL) {
+ lu_context_fini(&env->le_ctx);
+ OBD_FREE_PTR(env);
+ }
+out:
+ CDEBUG(D_RPCTRACE, "service thread [ %p : %u ] %d exiting: rc %d\n",
+ thread, thread->t_pid, thread->t_id, rc);
+
+ spin_lock(&svcpt->scp_lock);
+ if (thread_test_and_clear_flags(thread, SVC_STARTING))
+ svcpt->scp_nthrs_starting--;
+
+ if (thread_test_and_clear_flags(thread, SVC_RUNNING)) {
+ /* must know immediately */
+ svcpt->scp_nthrs_running--;
+ }
+
+ thread->t_id = rc;
+ thread_add_flags(thread, SVC_STOPPED);
+
+ wake_up(&thread->t_ctl_waitq);
+ spin_unlock(&svcpt->scp_lock);
+
+ return rc;
+}
+
+static int hrt_dont_sleep(struct ptlrpc_hr_thread *hrt,
+ struct list_head *replies)
+{
+ int result;
+
+ spin_lock(&hrt->hrt_lock);
+
+ list_splice_init(&hrt->hrt_queue, replies);
+ result = ptlrpc_hr.hr_stopping || !list_empty(replies);
+
+ spin_unlock(&hrt->hrt_lock);
+ return result;
+}
+
+/**
+ * Main body of "handle reply" function.
+ * It processes acked reply states
+ */
+static int ptlrpc_hr_main(void *arg)
+{
+ struct ptlrpc_hr_thread *hrt = (struct ptlrpc_hr_thread *)arg;
+ struct ptlrpc_hr_partition *hrp = hrt->hrt_partition;
+ LIST_HEAD (replies);
+ char threadname[20];
+ int rc;
+
+ snprintf(threadname, sizeof(threadname), "ptlrpc_hr%02d_%03d",
+ hrp->hrp_cpt, hrt->hrt_id);
+ unshare_fs_struct();
+
+ rc = cfs_cpt_bind(ptlrpc_hr.hr_cpt_table, hrp->hrp_cpt);
+ if (rc != 0) {
+ CWARN("Failed to bind %s on CPT %d of CPT table %p: rc = %d\n",
+ threadname, hrp->hrp_cpt, ptlrpc_hr.hr_cpt_table, rc);
+ }
+
+ atomic_inc(&hrp->hrp_nstarted);
+ wake_up(&ptlrpc_hr.hr_waitq);
+
+ while (!ptlrpc_hr.hr_stopping) {
+ l_wait_condition(hrt->hrt_waitq, hrt_dont_sleep(hrt, &replies));
+
+ while (!list_empty(&replies)) {
+ struct ptlrpc_reply_state *rs;
+
+ rs = list_entry(replies.prev,
+ struct ptlrpc_reply_state,
+ rs_list);
+ list_del_init(&rs->rs_list);
+ ptlrpc_handle_rs(rs);
+ }
+ }
+
+ atomic_inc(&hrp->hrp_nstopped);
+ wake_up(&ptlrpc_hr.hr_waitq);
+
+ return 0;
+}
+
+static void ptlrpc_stop_hr_threads(void)
+{
+ struct ptlrpc_hr_partition *hrp;
+ int i;
+ int j;
+
+ ptlrpc_hr.hr_stopping = 1;
+
+ cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+ if (hrp->hrp_thrs == NULL)
+ continue; /* uninitialized */
+ for (j = 0; j < hrp->hrp_nthrs; j++)
+ wake_up_all(&hrp->hrp_thrs[j].hrt_waitq);
+ }
+
+ cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+ if (hrp->hrp_thrs == NULL)
+ continue; /* uninitialized */
+ wait_event(ptlrpc_hr.hr_waitq,
+ atomic_read(&hrp->hrp_nstopped) ==
+ atomic_read(&hrp->hrp_nstarted));
+ }
+}
+
+static int ptlrpc_start_hr_threads(void)
+{
+ struct ptlrpc_hr_partition *hrp;
+ int i;
+ int j;
+ ENTRY;
+
+ cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+ int rc = 0;
+
+ for (j = 0; j < hrp->hrp_nthrs; j++) {
+ struct ptlrpc_hr_thread *hrt = &hrp->hrp_thrs[j];
+ rc = PTR_ERR(kthread_run(ptlrpc_hr_main,
+ &hrp->hrp_thrs[j],
+ "ptlrpc_hr%02d_%03d",
+ hrp->hrp_cpt,
+ hrt->hrt_id));
+ if (IS_ERR_VALUE(rc))
+ break;
+ }
+ wait_event(ptlrpc_hr.hr_waitq,
+ atomic_read(&hrp->hrp_nstarted) == j);
+ if (!IS_ERR_VALUE(rc))
+ continue;
+
+ CERROR("Reply handling thread %d:%d Failed on starting: "
+ "rc = %d\n", i, j, rc);
+ ptlrpc_stop_hr_threads();
+ RETURN(rc);
+ }
+ RETURN(0);
+}
+
+static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt)
+{
+ struct l_wait_info lwi = { 0 };
+ struct ptlrpc_thread *thread;
+ LIST_HEAD (zombie);
+
+ ENTRY;
+
+ CDEBUG(D_INFO, "Stopping threads for service %s\n",
+ svcpt->scp_service->srv_name);
+
+ spin_lock(&svcpt->scp_lock);
+ /* let the thread know that we would like it to stop asap */
+ list_for_each_entry(thread, &svcpt->scp_threads, t_link) {
+ CDEBUG(D_INFO, "Stopping thread %s #%u\n",
+ svcpt->scp_service->srv_thread_name, thread->t_id);
+ thread_add_flags(thread, SVC_STOPPING);
+ }
+
+ wake_up_all(&svcpt->scp_waitq);
+
+ while (!list_empty(&svcpt->scp_threads)) {
+ thread = list_entry(svcpt->scp_threads.next,
+ struct ptlrpc_thread, t_link);
+ if (thread_is_stopped(thread)) {
+ list_del(&thread->t_link);
+ list_add(&thread->t_link, &zombie);
+ continue;
+ }
+ spin_unlock(&svcpt->scp_lock);
+
+ CDEBUG(D_INFO, "waiting for stopping-thread %s #%u\n",
+ svcpt->scp_service->srv_thread_name, thread->t_id);
+ l_wait_event(thread->t_ctl_waitq,
+ thread_is_stopped(thread), &lwi);
+
+ spin_lock(&svcpt->scp_lock);
+ }
+
+ spin_unlock(&svcpt->scp_lock);
+
+ while (!list_empty(&zombie)) {
+ thread = list_entry(zombie.next,
+ struct ptlrpc_thread, t_link);
+ list_del(&thread->t_link);
+ OBD_FREE_PTR(thread);
+ }
+ EXIT;
+}
+
+/**
+ * Stops all threads of a particular service \a svc
+ */
+void ptlrpc_stop_all_threads(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ int i;
+ ENTRY;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (svcpt->scp_service != NULL)
+ ptlrpc_svcpt_stop_threads(svcpt);
+ }
+
+ EXIT;
+}
+EXPORT_SYMBOL(ptlrpc_stop_all_threads);
+
+int ptlrpc_start_threads(struct ptlrpc_service *svc)
+{
+ int rc = 0;
+ int i;
+ int j;
+ ENTRY;
+
+ /* We require 2 threads min, see note in ptlrpc_server_handle_request */
+ LASSERT(svc->srv_nthrs_cpt_init >= PTLRPC_NTHRS_INIT);
+
+ for (i = 0; i < svc->srv_ncpts; i++) {
+ for (j = 0; j < svc->srv_nthrs_cpt_init; j++) {
+ rc = ptlrpc_start_thread(svc->srv_parts[i], 1);
+ if (rc == 0)
+ continue;
+
+ if (rc != -EMFILE)
+ goto failed;
+ /* We have enough threads, don't start more. b=15759 */
+ break;
+ }
+ }
+
+ RETURN(0);
+ failed:
+ CERROR("cannot start %s thread #%d_%d: rc %d\n",
+ svc->srv_thread_name, i, j, rc);
+ ptlrpc_stop_all_threads(svc);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ptlrpc_start_threads);
+
+int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait)
+{
+ struct l_wait_info lwi = { 0 };
+ struct ptlrpc_thread *thread;
+ struct ptlrpc_service *svc;
+ int rc;
+ ENTRY;
+
+ LASSERT(svcpt != NULL);
+
+ svc = svcpt->scp_service;
+
+ CDEBUG(D_RPCTRACE, "%s[%d] started %d min %d max %d\n",
+ svc->srv_name, svcpt->scp_cpt, svcpt->scp_nthrs_running,
+ svc->srv_nthrs_cpt_init, svc->srv_nthrs_cpt_limit);
+
+ again:
+ if (unlikely(svc->srv_is_stopping))
+ RETURN(-ESRCH);
+
+ if (!ptlrpc_threads_increasable(svcpt) ||
+ (OBD_FAIL_CHECK(OBD_FAIL_TGT_TOOMANY_THREADS) &&
+ svcpt->scp_nthrs_running == svc->srv_nthrs_cpt_init - 1))
+ RETURN(-EMFILE);
+
+ OBD_CPT_ALLOC_PTR(thread, svc->srv_cptable, svcpt->scp_cpt);
+ if (thread == NULL)
+ RETURN(-ENOMEM);
+ init_waitqueue_head(&thread->t_ctl_waitq);
+
+ spin_lock(&svcpt->scp_lock);
+ if (!ptlrpc_threads_increasable(svcpt)) {
+ spin_unlock(&svcpt->scp_lock);
+ OBD_FREE_PTR(thread);
+ RETURN(-EMFILE);
+ }
+
+ if (svcpt->scp_nthrs_starting != 0) {
+ /* serialize starting because some modules (obdfilter)
+ * might require unique and contiguous t_id */
+ LASSERT(svcpt->scp_nthrs_starting == 1);
+ spin_unlock(&svcpt->scp_lock);
+ OBD_FREE_PTR(thread);
+ if (wait) {
+ CDEBUG(D_INFO, "Waiting for creating thread %s #%d\n",
+ svc->srv_thread_name, svcpt->scp_thr_nextid);
+ schedule();
+ goto again;
+ }
+
+ CDEBUG(D_INFO, "Creating thread %s #%d race, retry later\n",
+ svc->srv_thread_name, svcpt->scp_thr_nextid);
+ RETURN(-EAGAIN);
+ }
+
+ svcpt->scp_nthrs_starting++;
+ thread->t_id = svcpt->scp_thr_nextid++;
+ thread_add_flags(thread, SVC_STARTING);
+ thread->t_svcpt = svcpt;
+
+ list_add(&thread->t_link, &svcpt->scp_threads);
+ spin_unlock(&svcpt->scp_lock);
+
+ if (svcpt->scp_cpt >= 0) {
+ snprintf(thread->t_name, PTLRPC_THR_NAME_LEN, "%s%02d_%03d",
+ svc->srv_thread_name, svcpt->scp_cpt, thread->t_id);
+ } else {
+ snprintf(thread->t_name, PTLRPC_THR_NAME_LEN, "%s_%04d",
+ svc->srv_thread_name, thread->t_id);
+ }
+
+ CDEBUG(D_RPCTRACE, "starting thread '%s'\n", thread->t_name);
+ rc = PTR_ERR(kthread_run(ptlrpc_main, thread, thread->t_name));
+ if (IS_ERR_VALUE(rc)) {
+ CERROR("cannot start thread '%s': rc %d\n",
+ thread->t_name, rc);
+ spin_lock(&svcpt->scp_lock);
+ list_del(&thread->t_link);
+ --svcpt->scp_nthrs_starting;
+ spin_unlock(&svcpt->scp_lock);
+
+ OBD_FREE(thread, sizeof(*thread));
+ RETURN(rc);
+ }
+
+ if (!wait)
+ RETURN(0);
+
+ l_wait_event(thread->t_ctl_waitq,
+ thread_is_running(thread) || thread_is_stopped(thread),
+ &lwi);
+
+ rc = thread_is_stopped(thread) ? thread->t_id : 0;
+ RETURN(rc);
+}
+
+int ptlrpc_hr_init(void)
+{
+ struct ptlrpc_hr_partition *hrp;
+ struct ptlrpc_hr_thread *hrt;
+ int rc;
+ int i;
+ int j;
+ ENTRY;
+
+ memset(&ptlrpc_hr, 0, sizeof(ptlrpc_hr));
+ ptlrpc_hr.hr_cpt_table = cfs_cpt_table;
+
+ ptlrpc_hr.hr_partitions = cfs_percpt_alloc(ptlrpc_hr.hr_cpt_table,
+ sizeof(*hrp));
+ if (ptlrpc_hr.hr_partitions == NULL)
+ RETURN(-ENOMEM);
+
+ init_waitqueue_head(&ptlrpc_hr.hr_waitq);
+
+ cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+ hrp->hrp_cpt = i;
+
+ atomic_set(&hrp->hrp_nstarted, 0);
+ atomic_set(&hrp->hrp_nstopped, 0);
+
+ hrp->hrp_nthrs = cfs_cpt_weight(ptlrpc_hr.hr_cpt_table, i);
+ hrp->hrp_nthrs /= cfs_cpu_ht_nsiblings(0);
+
+ LASSERT(hrp->hrp_nthrs > 0);
+ OBD_CPT_ALLOC(hrp->hrp_thrs, ptlrpc_hr.hr_cpt_table, i,
+ hrp->hrp_nthrs * sizeof(*hrt));
+ if (hrp->hrp_thrs == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ for (j = 0; j < hrp->hrp_nthrs; j++) {
+ hrt = &hrp->hrp_thrs[j];
+
+ hrt->hrt_id = j;
+ hrt->hrt_partition = hrp;
+ init_waitqueue_head(&hrt->hrt_waitq);
+ spin_lock_init(&hrt->hrt_lock);
+ INIT_LIST_HEAD(&hrt->hrt_queue);
+ }
+ }
+
+ rc = ptlrpc_start_hr_threads();
+out:
+ if (rc != 0)
+ ptlrpc_hr_fini();
+ RETURN(rc);
+}
+
+void ptlrpc_hr_fini(void)
+{
+ struct ptlrpc_hr_partition *hrp;
+ int i;
+
+ if (ptlrpc_hr.hr_partitions == NULL)
+ return;
+
+ ptlrpc_stop_hr_threads();
+
+ cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+ if (hrp->hrp_thrs != NULL) {
+ OBD_FREE(hrp->hrp_thrs,
+ hrp->hrp_nthrs * sizeof(hrp->hrp_thrs[0]));
+ }
+ }
+
+ cfs_percpt_free(ptlrpc_hr.hr_partitions);
+ ptlrpc_hr.hr_partitions = NULL;
+}
+
+
+/**
+ * Wait until all already scheduled replies are processed.
+ */
+static void ptlrpc_wait_replies(struct ptlrpc_service_part *svcpt)
+{
+ while (1) {
+ int rc;
+ struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(10),
+ NULL, NULL);
+
+ rc = l_wait_event(svcpt->scp_waitq,
+ atomic_read(&svcpt->scp_nreps_difficult) == 0, &lwi);
+ if (rc == 0)
+ break;
+ CWARN("Unexpectedly long timeout %s %p\n",
+ svcpt->scp_service->srv_name, svcpt->scp_service);
+ }
+}
+
+static void
+ptlrpc_service_del_atimer(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ int i;
+
+ /* early disarm AT timer... */
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (svcpt->scp_service != NULL)
+ cfs_timer_disarm(&svcpt->scp_at_timer);
+ }
+}
+
+static void
+ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ struct ptlrpc_request_buffer_desc *rqbd;
+ struct l_wait_info lwi;
+ int rc;
+ int i;
+
+ /* All history will be culled when the next request buffer is
+ * freed in ptlrpc_service_purge_all() */
+ svc->srv_hist_nrqbds_cpt_max = 0;
+
+ rc = LNetClearLazyPortal(svc->srv_req_portal);
+ LASSERT(rc == 0);
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (svcpt->scp_service == NULL)
+ break;
+
+ /* Unlink all the request buffers. This forces a 'final'
+ * event with its 'unlink' flag set for each posted rqbd */
+ list_for_each_entry(rqbd, &svcpt->scp_rqbd_posted,
+ rqbd_list) {
+ rc = LNetMDUnlink(rqbd->rqbd_md_h);
+ LASSERT(rc == 0 || rc == -ENOENT);
+ }
+ }
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (svcpt->scp_service == NULL)
+ break;
+
+ /* Wait for the network to release any buffers
+ * it's currently filling */
+ spin_lock(&svcpt->scp_lock);
+ while (svcpt->scp_nrqbds_posted != 0) {
+ spin_unlock(&svcpt->scp_lock);
+ /* Network access will complete in finite time but
+ * the HUGE timeout lets us CWARN for visibility
+ * of sluggish NALs */
+ lwi = LWI_TIMEOUT_INTERVAL(
+ cfs_time_seconds(LONG_UNLINK),
+ cfs_time_seconds(1), NULL, NULL);
+ rc = l_wait_event(svcpt->scp_waitq,
+ svcpt->scp_nrqbds_posted == 0, &lwi);
+ if (rc == -ETIMEDOUT) {
+ CWARN("Service %s waiting for "
+ "request buffers\n",
+ svcpt->scp_service->srv_name);
+ }
+ spin_lock(&svcpt->scp_lock);
+ }
+ spin_unlock(&svcpt->scp_lock);
+ }
+}
+
+static void
+ptlrpc_service_purge_all(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ struct ptlrpc_request_buffer_desc *rqbd;
+ struct ptlrpc_request *req;
+ struct ptlrpc_reply_state *rs;
+ int i;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (svcpt->scp_service == NULL)
+ break;
+
+ spin_lock(&svcpt->scp_rep_lock);
+ while (!list_empty(&svcpt->scp_rep_active)) {
+ rs = list_entry(svcpt->scp_rep_active.next,
+ struct ptlrpc_reply_state, rs_list);
+ spin_lock(&rs->rs_lock);
+ ptlrpc_schedule_difficult_reply(rs);
+ spin_unlock(&rs->rs_lock);
+ }
+ spin_unlock(&svcpt->scp_rep_lock);
+
+ /* purge the request queue. NB No new replies (rqbds
+ * all unlinked) and no service threads, so I'm the only
+ * thread noodling the request queue now */
+ while (!list_empty(&svcpt->scp_req_incoming)) {
+ req = list_entry(svcpt->scp_req_incoming.next,
+ struct ptlrpc_request, rq_list);
+
+ list_del(&req->rq_list);
+ svcpt->scp_nreqs_incoming--;
+ ptlrpc_server_finish_request(svcpt, req);
+ }
+
+ while (ptlrpc_server_request_pending(svcpt, true)) {
+ req = ptlrpc_server_request_get(svcpt, true);
+ ptlrpc_server_finish_active_request(svcpt, req);
+ }
+
+ LASSERT(list_empty(&svcpt->scp_rqbd_posted));
+ LASSERT(svcpt->scp_nreqs_incoming == 0);
+ LASSERT(svcpt->scp_nreqs_active == 0);
+ /* history should have been culled by
+ * ptlrpc_server_finish_request */
+ LASSERT(svcpt->scp_hist_nrqbds == 0);
+
+ /* Now free all the request buffers since nothing
+ * references them any more... */
+
+ while (!list_empty(&svcpt->scp_rqbd_idle)) {
+ rqbd = list_entry(svcpt->scp_rqbd_idle.next,
+ struct ptlrpc_request_buffer_desc,
+ rqbd_list);
+ ptlrpc_free_rqbd(rqbd);
+ }
+ ptlrpc_wait_replies(svcpt);
+
+ while (!list_empty(&svcpt->scp_rep_idle)) {
+ rs = list_entry(svcpt->scp_rep_idle.next,
+ struct ptlrpc_reply_state,
+ rs_list);
+ list_del(&rs->rs_list);
+ OBD_FREE_LARGE(rs, svc->srv_max_reply_size);
+ }
+ }
+}
+
+static void
+ptlrpc_service_free(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ struct ptlrpc_at_array *array;
+ int i;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (svcpt->scp_service == NULL)
+ break;
+
+ /* In case somebody rearmed this in the meantime */
+ cfs_timer_disarm(&svcpt->scp_at_timer);
+ array = &svcpt->scp_at_array;
+
+ if (array->paa_reqs_array != NULL) {
+ OBD_FREE(array->paa_reqs_array,
+ sizeof(struct list_head) * array->paa_size);
+ array->paa_reqs_array = NULL;
+ }
+
+ if (array->paa_reqs_count != NULL) {
+ OBD_FREE(array->paa_reqs_count,
+ sizeof(__u32) * array->paa_size);
+ array->paa_reqs_count = NULL;
+ }
+ }
+
+ ptlrpc_service_for_each_part(svcpt, i, svc)
+ OBD_FREE_PTR(svcpt);
+
+ if (svc->srv_cpts != NULL)
+ cfs_expr_list_values_free(svc->srv_cpts, svc->srv_ncpts);
+
+ OBD_FREE(svc, offsetof(struct ptlrpc_service,
+ srv_parts[svc->srv_ncpts]));
+}
+
+int ptlrpc_unregister_service(struct ptlrpc_service *service)
+{
+ ENTRY;
+
+ CDEBUG(D_NET, "%s: tearing down\n", service->srv_name);
+
+ service->srv_is_stopping = 1;
+
+ mutex_lock(&ptlrpc_all_services_mutex);
+ list_del_init(&service->srv_list);
+ mutex_unlock(&ptlrpc_all_services_mutex);
+
+ ptlrpc_service_del_atimer(service);
+ ptlrpc_stop_all_threads(service);
+
+ ptlrpc_service_unlink_rqbd(service);
+ ptlrpc_service_purge_all(service);
+ ptlrpc_service_nrs_cleanup(service);
+
+ ptlrpc_lprocfs_unregister_service(service);
+
+ ptlrpc_service_free(service);
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(ptlrpc_unregister_service);
+
+/**
+ * Returns 0 if the service is healthy.
+ *
+ * Right now, it just checks to make sure that requests aren't languishing
+ * in the queue. We'll use this health check to govern whether a node needs
+ * to be shot, so it's intentionally non-aggressive. */
+int ptlrpc_svcpt_health_check(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_request *request = NULL;
+ struct timeval right_now;
+ long timediff;
+
+ do_gettimeofday(&right_now);
+
+ spin_lock(&svcpt->scp_req_lock);
+ /* How long has the next entry been waiting? */
+ if (ptlrpc_server_high_pending(svcpt, true))
+ request = ptlrpc_nrs_req_peek_nolock(svcpt, true);
+ else if (ptlrpc_server_normal_pending(svcpt, true))
+ request = ptlrpc_nrs_req_peek_nolock(svcpt, false);
+
+ if (request == NULL) {
+ spin_unlock(&svcpt->scp_req_lock);
+ return 0;
+ }
+
+ timediff = cfs_timeval_sub(&right_now, &request->rq_arrival_time, NULL);
+ spin_unlock(&svcpt->scp_req_lock);
+
+ if ((timediff / ONE_MILLION) >
+ (AT_OFF ? obd_timeout * 3 / 2 : at_max)) {
+ CERROR("%s: unhealthy - request has been waiting %lds\n",
+ svcpt->scp_service->srv_name, timediff / ONE_MILLION);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+ptlrpc_service_health_check(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ int i;
+
+ if (svc == NULL)
+ return 0;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ int rc = ptlrpc_svcpt_health_check(svcpt);
+
+ if (rc != 0)
+ return rc;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_service_health_check);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/wirehdr.c b/drivers/staging/lustre/lustre/ptlrpc/wirehdr.c
new file mode 100644
index 000000000000..93bc40b422ee
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/wirehdr.c
@@ -0,0 +1,47 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+# ifdef CONFIG_FS_POSIX_ACL
+# include <linux/fs.h>
+# include <linux/posix_acl_xattr.h>
+# endif
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_disk.h>
diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
new file mode 100644
index 000000000000..9890bd9cfb93
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
@@ -0,0 +1,4474 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+# ifdef CONFIG_FS_POSIX_ACL
+# include <linux/fs.h>
+# include <linux/posix_acl_xattr.h>
+# endif
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_disk.h>
+void lustre_assert_wire_constants(void)
+{
+ /* Wire protocol assertions generated by 'wirecheck'
+ * (make -C lustre/utils newwiretest)
+ * running on Linux deva 2.6.32.279.lustre #5 SMP Tue Apr 9 22:52:17 CST 2013 x86_64 x86_64 x
+ * with gcc version 4.4.4 20100726 (Red Hat 4.4.4-13) (GCC) */
+
+
+ /* Constants... */
+ LASSERTF(PTL_RPC_MSG_REQUEST == 4711, "found %lld\n",
+ (long long)PTL_RPC_MSG_REQUEST);
+ LASSERTF(PTL_RPC_MSG_ERR == 4712, "found %lld\n",
+ (long long)PTL_RPC_MSG_ERR);
+ LASSERTF(PTL_RPC_MSG_REPLY == 4713, "found %lld\n",
+ (long long)PTL_RPC_MSG_REPLY);
+ LASSERTF(MDS_DIR_END_OFF == 0xfffffffffffffffeULL, "found 0x%.16llxULL\n",
+ MDS_DIR_END_OFF);
+ LASSERTF(DEAD_HANDLE_MAGIC == 0xdeadbeefcafebabeULL, "found 0x%.16llxULL\n",
+ DEAD_HANDLE_MAGIC);
+ CLASSERT(MTI_NAME_MAXLEN == 64);
+ LASSERTF(OST_REPLY == 0, "found %lld\n",
+ (long long)OST_REPLY);
+ LASSERTF(OST_GETATTR == 1, "found %lld\n",
+ (long long)OST_GETATTR);
+ LASSERTF(OST_SETATTR == 2, "found %lld\n",
+ (long long)OST_SETATTR);
+ LASSERTF(OST_READ == 3, "found %lld\n",
+ (long long)OST_READ);
+ LASSERTF(OST_WRITE == 4, "found %lld\n",
+ (long long)OST_WRITE);
+ LASSERTF(OST_CREATE == 5, "found %lld\n",
+ (long long)OST_CREATE);
+ LASSERTF(OST_DESTROY == 6, "found %lld\n",
+ (long long)OST_DESTROY);
+ LASSERTF(OST_GET_INFO == 7, "found %lld\n",
+ (long long)OST_GET_INFO);
+ LASSERTF(OST_CONNECT == 8, "found %lld\n",
+ (long long)OST_CONNECT);
+ LASSERTF(OST_DISCONNECT == 9, "found %lld\n",
+ (long long)OST_DISCONNECT);
+ LASSERTF(OST_PUNCH == 10, "found %lld\n",
+ (long long)OST_PUNCH);
+ LASSERTF(OST_OPEN == 11, "found %lld\n",
+ (long long)OST_OPEN);
+ LASSERTF(OST_CLOSE == 12, "found %lld\n",
+ (long long)OST_CLOSE);
+ LASSERTF(OST_STATFS == 13, "found %lld\n",
+ (long long)OST_STATFS);
+ LASSERTF(OST_SYNC == 16, "found %lld\n",
+ (long long)OST_SYNC);
+ LASSERTF(OST_SET_INFO == 17, "found %lld\n",
+ (long long)OST_SET_INFO);
+ LASSERTF(OST_QUOTACHECK == 18, "found %lld\n",
+ (long long)OST_QUOTACHECK);
+ LASSERTF(OST_QUOTACTL == 19, "found %lld\n",
+ (long long)OST_QUOTACTL);
+ LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, "found %lld\n",
+ (long long)OST_QUOTA_ADJUST_QUNIT);
+ LASSERTF(OST_LAST_OPC == 21, "found %lld\n",
+ (long long)OST_LAST_OPC);
+ LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
+ OBD_OBJECT_EOF);
+ LASSERTF(OST_MIN_PRECREATE == 32, "found %lld\n",
+ (long long)OST_MIN_PRECREATE);
+ LASSERTF(OST_MAX_PRECREATE == 20000, "found %lld\n",
+ (long long)OST_MAX_PRECREATE);
+ LASSERTF(OST_LVB_ERR_INIT == 0xffbadbad80000000ULL, "found 0x%.16llxULL\n",
+ OST_LVB_ERR_INIT);
+ LASSERTF(OST_LVB_ERR_MASK == 0xffbadbad00000000ULL, "found 0x%.16llxULL\n",
+ OST_LVB_ERR_MASK);
+ LASSERTF(MDS_FIRST_OPC == 33, "found %lld\n",
+ (long long)MDS_FIRST_OPC);
+ LASSERTF(MDS_GETATTR == 33, "found %lld\n",
+ (long long)MDS_GETATTR);
+ LASSERTF(MDS_GETATTR_NAME == 34, "found %lld\n",
+ (long long)MDS_GETATTR_NAME);
+ LASSERTF(MDS_CLOSE == 35, "found %lld\n",
+ (long long)MDS_CLOSE);
+ LASSERTF(MDS_REINT == 36, "found %lld\n",
+ (long long)MDS_REINT);
+ LASSERTF(MDS_READPAGE == 37, "found %lld\n",
+ (long long)MDS_READPAGE);
+ LASSERTF(MDS_CONNECT == 38, "found %lld\n",
+ (long long)MDS_CONNECT);
+ LASSERTF(MDS_DISCONNECT == 39, "found %lld\n",
+ (long long)MDS_DISCONNECT);
+ LASSERTF(MDS_GETSTATUS == 40, "found %lld\n",
+ (long long)MDS_GETSTATUS);
+ LASSERTF(MDS_STATFS == 41, "found %lld\n",
+ (long long)MDS_STATFS);
+ LASSERTF(MDS_PIN == 42, "found %lld\n",
+ (long long)MDS_PIN);
+ LASSERTF(MDS_UNPIN == 43, "found %lld\n",
+ (long long)MDS_UNPIN);
+ LASSERTF(MDS_SYNC == 44, "found %lld\n",
+ (long long)MDS_SYNC);
+ LASSERTF(MDS_DONE_WRITING == 45, "found %lld\n",
+ (long long)MDS_DONE_WRITING);
+ LASSERTF(MDS_SET_INFO == 46, "found %lld\n",
+ (long long)MDS_SET_INFO);
+ LASSERTF(MDS_QUOTACHECK == 47, "found %lld\n",
+ (long long)MDS_QUOTACHECK);
+ LASSERTF(MDS_QUOTACTL == 48, "found %lld\n",
+ (long long)MDS_QUOTACTL);
+ LASSERTF(MDS_GETXATTR == 49, "found %lld\n",
+ (long long)MDS_GETXATTR);
+ LASSERTF(MDS_SETXATTR == 50, "found %lld\n",
+ (long long)MDS_SETXATTR);
+ LASSERTF(MDS_WRITEPAGE == 51, "found %lld\n",
+ (long long)MDS_WRITEPAGE);
+ LASSERTF(MDS_IS_SUBDIR == 52, "found %lld\n",
+ (long long)MDS_IS_SUBDIR);
+ LASSERTF(MDS_GET_INFO == 53, "found %lld\n",
+ (long long)MDS_GET_INFO);
+ LASSERTF(MDS_HSM_STATE_GET == 54, "found %lld\n",
+ (long long)MDS_HSM_STATE_GET);
+ LASSERTF(MDS_HSM_STATE_SET == 55, "found %lld\n",
+ (long long)MDS_HSM_STATE_SET);
+ LASSERTF(MDS_HSM_ACTION == 56, "found %lld\n",
+ (long long)MDS_HSM_ACTION);
+ LASSERTF(MDS_HSM_PROGRESS == 57, "found %lld\n",
+ (long long)MDS_HSM_PROGRESS);
+ LASSERTF(MDS_HSM_REQUEST == 58, "found %lld\n",
+ (long long)MDS_HSM_REQUEST);
+ LASSERTF(MDS_HSM_CT_REGISTER == 59, "found %lld\n",
+ (long long)MDS_HSM_CT_REGISTER);
+ LASSERTF(MDS_HSM_CT_UNREGISTER == 60, "found %lld\n",
+ (long long)MDS_HSM_CT_UNREGISTER);
+ LASSERTF(MDS_SWAP_LAYOUTS == 61, "found %lld\n",
+ (long long)MDS_SWAP_LAYOUTS);
+ LASSERTF(MDS_LAST_OPC == 62, "found %lld\n",
+ (long long)MDS_LAST_OPC);
+ LASSERTF(REINT_SETATTR == 1, "found %lld\n",
+ (long long)REINT_SETATTR);
+ LASSERTF(REINT_CREATE == 2, "found %lld\n",
+ (long long)REINT_CREATE);
+ LASSERTF(REINT_LINK == 3, "found %lld\n",
+ (long long)REINT_LINK);
+ LASSERTF(REINT_UNLINK == 4, "found %lld\n",
+ (long long)REINT_UNLINK);
+ LASSERTF(REINT_RENAME == 5, "found %lld\n",
+ (long long)REINT_RENAME);
+ LASSERTF(REINT_OPEN == 6, "found %lld\n",
+ (long long)REINT_OPEN);
+ LASSERTF(REINT_SETXATTR == 7, "found %lld\n",
+ (long long)REINT_SETXATTR);
+ LASSERTF(REINT_RMENTRY == 8, "found %lld\n",
+ (long long)REINT_RMENTRY);
+ LASSERTF(REINT_MAX == 9, "found %lld\n",
+ (long long)REINT_MAX);
+ LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_IT_EXECD);
+ LASSERTF(DISP_LOOKUP_EXECD == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_LOOKUP_EXECD);
+ LASSERTF(DISP_LOOKUP_NEG == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_LOOKUP_NEG);
+ LASSERTF(DISP_LOOKUP_POS == 0x00000008UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_LOOKUP_POS);
+ LASSERTF(DISP_OPEN_CREATE == 0x00000010UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_OPEN_CREATE);
+ LASSERTF(DISP_OPEN_OPEN == 0x00000020UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_OPEN_OPEN);
+ LASSERTF(DISP_ENQ_COMPLETE == 0x00400000UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_ENQ_COMPLETE);
+ LASSERTF(DISP_ENQ_OPEN_REF == 0x00800000UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_ENQ_OPEN_REF);
+ LASSERTF(DISP_ENQ_CREATE_REF == 0x01000000UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_ENQ_CREATE_REF);
+ LASSERTF(DISP_OPEN_LOCK == 0x02000000UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_OPEN_LOCK);
+ LASSERTF(MDS_STATUS_CONN == 1, "found %lld\n",
+ (long long)MDS_STATUS_CONN);
+ LASSERTF(MDS_STATUS_LOV == 2, "found %lld\n",
+ (long long)MDS_STATUS_LOV);
+ LASSERTF(LUSTRE_BFLAG_UNCOMMITTED_WRITES == 1, "found %lld\n",
+ (long long)LUSTRE_BFLAG_UNCOMMITTED_WRITES);
+ LASSERTF(MF_SOM_CHANGE == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)MF_SOM_CHANGE);
+ LASSERTF(MF_EPOCH_OPEN == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)MF_EPOCH_OPEN);
+ LASSERTF(MF_EPOCH_CLOSE == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)MF_EPOCH_CLOSE);
+ LASSERTF(MF_MDC_CANCEL_FID1 == 0x00000008UL, "found 0x%.8xUL\n",
+ (unsigned)MF_MDC_CANCEL_FID1);
+ LASSERTF(MF_MDC_CANCEL_FID2 == 0x00000010UL, "found 0x%.8xUL\n",
+ (unsigned)MF_MDC_CANCEL_FID2);
+ LASSERTF(MF_MDC_CANCEL_FID3 == 0x00000020UL, "found 0x%.8xUL\n",
+ (unsigned)MF_MDC_CANCEL_FID3);
+ LASSERTF(MF_MDC_CANCEL_FID4 == 0x00000040UL, "found 0x%.8xUL\n",
+ (unsigned)MF_MDC_CANCEL_FID4);
+ LASSERTF(MF_SOM_AU == 0x00000080UL, "found 0x%.8xUL\n",
+ (unsigned)MF_SOM_AU);
+ LASSERTF(MF_GETATTR_LOCK == 0x00000100UL, "found 0x%.8xUL\n",
+ (unsigned)MF_GETATTR_LOCK);
+ LASSERTF(MDS_ATTR_MODE == 0x0000000000000001ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_MODE);
+ LASSERTF(MDS_ATTR_UID == 0x0000000000000002ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_UID);
+ LASSERTF(MDS_ATTR_GID == 0x0000000000000004ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_GID);
+ LASSERTF(MDS_ATTR_SIZE == 0x0000000000000008ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_SIZE);
+ LASSERTF(MDS_ATTR_ATIME == 0x0000000000000010ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_ATIME);
+ LASSERTF(MDS_ATTR_MTIME == 0x0000000000000020ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_MTIME);
+ LASSERTF(MDS_ATTR_CTIME == 0x0000000000000040ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_CTIME);
+ LASSERTF(MDS_ATTR_ATIME_SET == 0x0000000000000080ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_ATIME_SET);
+ LASSERTF(MDS_ATTR_MTIME_SET == 0x0000000000000100ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_MTIME_SET);
+ LASSERTF(MDS_ATTR_FORCE == 0x0000000000000200ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_FORCE);
+ LASSERTF(MDS_ATTR_ATTR_FLAG == 0x0000000000000400ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_ATTR_FLAG);
+ LASSERTF(MDS_ATTR_KILL_SUID == 0x0000000000000800ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_KILL_SUID);
+ LASSERTF(MDS_ATTR_KILL_SGID == 0x0000000000001000ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_KILL_SGID);
+ LASSERTF(MDS_ATTR_CTIME_SET == 0x0000000000002000ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_CTIME_SET);
+ LASSERTF(MDS_ATTR_FROM_OPEN == 0x0000000000004000ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_FROM_OPEN);
+ LASSERTF(MDS_ATTR_BLOCKS == 0x0000000000008000ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_BLOCKS);
+ LASSERTF(FLD_QUERY == 900, "found %lld\n",
+ (long long)FLD_QUERY);
+ LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n",
+ (long long)FLD_FIRST_OPC);
+ LASSERTF(FLD_LAST_OPC == 901, "found %lld\n",
+ (long long)FLD_LAST_OPC);
+ LASSERTF(SEQ_QUERY == 700, "found %lld\n",
+ (long long)SEQ_QUERY);
+ LASSERTF(SEQ_FIRST_OPC == 700, "found %lld\n",
+ (long long)SEQ_FIRST_OPC);
+ LASSERTF(SEQ_LAST_OPC == 701, "found %lld\n",
+ (long long)SEQ_LAST_OPC);
+ LASSERTF(SEQ_ALLOC_SUPER == 0, "found %lld\n",
+ (long long)SEQ_ALLOC_SUPER);
+ LASSERTF(SEQ_ALLOC_META == 1, "found %lld\n",
+ (long long)SEQ_ALLOC_META);
+ LASSERTF(LDLM_ENQUEUE == 101, "found %lld\n",
+ (long long)LDLM_ENQUEUE);
+ LASSERTF(LDLM_CONVERT == 102, "found %lld\n",
+ (long long)LDLM_CONVERT);
+ LASSERTF(LDLM_CANCEL == 103, "found %lld\n",
+ (long long)LDLM_CANCEL);
+ LASSERTF(LDLM_BL_CALLBACK == 104, "found %lld\n",
+ (long long)LDLM_BL_CALLBACK);
+ LASSERTF(LDLM_CP_CALLBACK == 105, "found %lld\n",
+ (long long)LDLM_CP_CALLBACK);
+ LASSERTF(LDLM_GL_CALLBACK == 106, "found %lld\n",
+ (long long)LDLM_GL_CALLBACK);
+ LASSERTF(LDLM_SET_INFO == 107, "found %lld\n",
+ (long long)LDLM_SET_INFO);
+ LASSERTF(LDLM_LAST_OPC == 108, "found %lld\n",
+ (long long)LDLM_LAST_OPC);
+ LASSERTF(LCK_MINMODE == 0, "found %lld\n",
+ (long long)LCK_MINMODE);
+ LASSERTF(LCK_EX == 1, "found %lld\n",
+ (long long)LCK_EX);
+ LASSERTF(LCK_PW == 2, "found %lld\n",
+ (long long)LCK_PW);
+ LASSERTF(LCK_PR == 4, "found %lld\n",
+ (long long)LCK_PR);
+ LASSERTF(LCK_CW == 8, "found %lld\n",
+ (long long)LCK_CW);
+ LASSERTF(LCK_CR == 16, "found %lld\n",
+ (long long)LCK_CR);
+ LASSERTF(LCK_NL == 32, "found %lld\n",
+ (long long)LCK_NL);
+ LASSERTF(LCK_GROUP == 64, "found %lld\n",
+ (long long)LCK_GROUP);
+ LASSERTF(LCK_COS == 128, "found %lld\n",
+ (long long)LCK_COS);
+ LASSERTF(LCK_MAXMODE == 129, "found %lld\n",
+ (long long)LCK_MAXMODE);
+ LASSERTF(LCK_MODE_NUM == 8, "found %lld\n",
+ (long long)LCK_MODE_NUM);
+ CLASSERT(LDLM_PLAIN == 10);
+ CLASSERT(LDLM_EXTENT == 11);
+ CLASSERT(LDLM_FLOCK == 12);
+ CLASSERT(LDLM_IBITS == 13);
+ CLASSERT(LDLM_MAX_TYPE == 14);
+ CLASSERT(LUSTRE_RES_ID_SEQ_OFF == 0);
+ CLASSERT(LUSTRE_RES_ID_VER_OID_OFF == 1);
+ LASSERTF(UPDATE_OBJ == 1000, "found %lld\n",
+ (long long)UPDATE_OBJ);
+ LASSERTF(UPDATE_LAST_OPC == 1001, "found %lld\n",
+ (long long)UPDATE_LAST_OPC);
+ CLASSERT(LUSTRE_RES_ID_QUOTA_SEQ_OFF == 2);
+ CLASSERT(LUSTRE_RES_ID_QUOTA_VER_OID_OFF == 3);
+ CLASSERT(LUSTRE_RES_ID_HSH_OFF == 3);
+ CLASSERT(LQUOTA_TYPE_USR == 0);
+ CLASSERT(LQUOTA_TYPE_GRP == 1);
+ CLASSERT(LQUOTA_RES_MD == 1);
+ CLASSERT(LQUOTA_RES_DT == 2);
+ LASSERTF(OBD_PING == 400, "found %lld\n",
+ (long long)OBD_PING);
+ LASSERTF(OBD_LOG_CANCEL == 401, "found %lld\n",
+ (long long)OBD_LOG_CANCEL);
+ LASSERTF(OBD_QC_CALLBACK == 402, "found %lld\n",
+ (long long)OBD_QC_CALLBACK);
+ LASSERTF(OBD_IDX_READ == 403, "found %lld\n",
+ (long long)OBD_IDX_READ);
+ LASSERTF(OBD_LAST_OPC == 404, "found %lld\n",
+ (long long)OBD_LAST_OPC);
+ LASSERTF(QUOTA_DQACQ == 601, "found %lld\n",
+ (long long)QUOTA_DQACQ);
+ LASSERTF(QUOTA_DQREL == 602, "found %lld\n",
+ (long long)QUOTA_DQREL);
+ LASSERTF(QUOTA_LAST_OPC == 603, "found %lld\n",
+ (long long)QUOTA_LAST_OPC);
+ LASSERTF(MGS_CONNECT == 250, "found %lld\n",
+ (long long)MGS_CONNECT);
+ LASSERTF(MGS_DISCONNECT == 251, "found %lld\n",
+ (long long)MGS_DISCONNECT);
+ LASSERTF(MGS_EXCEPTION == 252, "found %lld\n",
+ (long long)MGS_EXCEPTION);
+ LASSERTF(MGS_TARGET_REG == 253, "found %lld\n",
+ (long long)MGS_TARGET_REG);
+ LASSERTF(MGS_TARGET_DEL == 254, "found %lld\n",
+ (long long)MGS_TARGET_DEL);
+ LASSERTF(MGS_SET_INFO == 255, "found %lld\n",
+ (long long)MGS_SET_INFO);
+ LASSERTF(MGS_LAST_OPC == 257, "found %lld\n",
+ (long long)MGS_LAST_OPC);
+ LASSERTF(SEC_CTX_INIT == 801, "found %lld\n",
+ (long long)SEC_CTX_INIT);
+ LASSERTF(SEC_CTX_INIT_CONT == 802, "found %lld\n",
+ (long long)SEC_CTX_INIT_CONT);
+ LASSERTF(SEC_CTX_FINI == 803, "found %lld\n",
+ (long long)SEC_CTX_FINI);
+ LASSERTF(SEC_LAST_OPC == 804, "found %lld\n",
+ (long long)SEC_LAST_OPC);
+ /* Sizes and Offsets */
+
+ /* Checks for struct obd_uuid */
+ LASSERTF((int)sizeof(struct obd_uuid) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct obd_uuid));
+
+ /* Checks for struct lu_seq_range */
+ LASSERTF((int)sizeof(struct lu_seq_range) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct lu_seq_range));
+ LASSERTF((int)offsetof(struct lu_seq_range, lsr_start) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lu_seq_range, lsr_start));
+ LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_start));
+ LASSERTF((int)offsetof(struct lu_seq_range, lsr_end) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lu_seq_range, lsr_end));
+ LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_end) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_end));
+ LASSERTF((int)offsetof(struct lu_seq_range, lsr_index) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lu_seq_range, lsr_index));
+ LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_index));
+ LASSERTF((int)offsetof(struct lu_seq_range, lsr_flags) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct lu_seq_range, lsr_flags));
+ LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_flags));
+ LASSERTF(LU_SEQ_RANGE_MDT == 0, "found %lld\n",
+ (long long)LU_SEQ_RANGE_MDT);
+ LASSERTF(LU_SEQ_RANGE_OST == 1, "found %lld\n",
+ (long long)LU_SEQ_RANGE_OST);
+
+ /* Checks for struct lustre_mdt_attrs */
+ LASSERTF((int)sizeof(struct lustre_mdt_attrs) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct lustre_mdt_attrs));
+ LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_compat) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_mdt_attrs, lma_compat));
+ LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_compat) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_compat));
+ LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_incompat) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_mdt_attrs, lma_incompat));
+ LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_incompat) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_incompat));
+ LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_self_fid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_mdt_attrs, lma_self_fid));
+ LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid));
+ LASSERTF(LMAI_RELEASED == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)LMAI_RELEASED);
+ LASSERTF(LMAC_HSM == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)LMAC_HSM);
+ LASSERTF(LMAC_SOM == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)LMAC_SOM);
+ LASSERTF(OBJ_CREATE == 1, "found %lld\n",
+ (long long)OBJ_CREATE);
+ LASSERTF(OBJ_DESTROY == 2, "found %lld\n",
+ (long long)OBJ_DESTROY);
+ LASSERTF(OBJ_REF_ADD == 3, "found %lld\n",
+ (long long)OBJ_REF_ADD);
+ LASSERTF(OBJ_REF_DEL == 4, "found %lld\n",
+ (long long)OBJ_REF_DEL);
+ LASSERTF(OBJ_ATTR_SET == 5, "found %lld\n",
+ (long long)OBJ_ATTR_SET);
+ LASSERTF(OBJ_ATTR_GET == 6, "found %lld\n",
+ (long long)OBJ_ATTR_GET);
+ LASSERTF(OBJ_XATTR_SET == 7, "found %lld\n",
+ (long long)OBJ_XATTR_SET);
+ LASSERTF(OBJ_XATTR_GET == 8, "found %lld\n",
+ (long long)OBJ_XATTR_GET);
+ LASSERTF(OBJ_INDEX_LOOKUP == 9, "found %lld\n",
+ (long long)OBJ_INDEX_LOOKUP);
+ LASSERTF(OBJ_INDEX_LOOKUP == 9, "found %lld\n",
+ (long long)OBJ_INDEX_LOOKUP);
+ LASSERTF(OBJ_INDEX_INSERT == 10, "found %lld\n",
+ (long long)OBJ_INDEX_INSERT);
+ LASSERTF(OBJ_INDEX_DELETE == 11, "found %lld\n",
+ (long long)OBJ_INDEX_DELETE);
+
+ /* Checks for struct som_attrs */
+ LASSERTF((int)sizeof(struct som_attrs) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct som_attrs));
+ LASSERTF((int)offsetof(struct som_attrs, som_compat) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct som_attrs, som_compat));
+ LASSERTF((int)sizeof(((struct som_attrs *)0)->som_compat) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct som_attrs *)0)->som_compat));
+ LASSERTF((int)offsetof(struct som_attrs, som_incompat) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct som_attrs, som_incompat));
+ LASSERTF((int)sizeof(((struct som_attrs *)0)->som_incompat) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct som_attrs *)0)->som_incompat));
+ LASSERTF((int)offsetof(struct som_attrs, som_ioepoch) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct som_attrs, som_ioepoch));
+ LASSERTF((int)sizeof(((struct som_attrs *)0)->som_ioepoch) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct som_attrs *)0)->som_ioepoch));
+ LASSERTF((int)offsetof(struct som_attrs, som_size) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct som_attrs, som_size));
+ LASSERTF((int)sizeof(((struct som_attrs *)0)->som_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct som_attrs *)0)->som_size));
+ LASSERTF((int)offsetof(struct som_attrs, som_blocks) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct som_attrs, som_blocks));
+ LASSERTF((int)sizeof(((struct som_attrs *)0)->som_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct som_attrs *)0)->som_blocks));
+ LASSERTF((int)offsetof(struct som_attrs, som_mountid) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct som_attrs, som_mountid));
+ LASSERTF((int)sizeof(((struct som_attrs *)0)->som_mountid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct som_attrs *)0)->som_mountid));
+
+ /* Checks for struct hsm_attrs */
+ LASSERTF((int)sizeof(struct hsm_attrs) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_attrs));
+ LASSERTF((int)offsetof(struct hsm_attrs, hsm_compat) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_attrs, hsm_compat));
+ LASSERTF((int)sizeof(((struct hsm_attrs *)0)->hsm_compat) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_attrs *)0)->hsm_compat));
+ LASSERTF((int)offsetof(struct hsm_attrs, hsm_flags) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_attrs, hsm_flags));
+ LASSERTF((int)sizeof(((struct hsm_attrs *)0)->hsm_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_attrs *)0)->hsm_flags));
+ LASSERTF((int)offsetof(struct hsm_attrs, hsm_arch_id) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_attrs, hsm_arch_id));
+ LASSERTF((int)sizeof(((struct hsm_attrs *)0)->hsm_arch_id) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_attrs *)0)->hsm_arch_id));
+ LASSERTF((int)offsetof(struct hsm_attrs, hsm_arch_ver) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_attrs, hsm_arch_ver));
+ LASSERTF((int)sizeof(((struct hsm_attrs *)0)->hsm_arch_ver) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_attrs *)0)->hsm_arch_ver));
+
+ /* Checks for struct ost_id */
+ LASSERTF((int)sizeof(struct ost_id) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct ost_id));
+ LASSERTF((int)offsetof(struct ost_id, oi) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ost_id, oi));
+ LASSERTF((int)sizeof(((struct ost_id *)0)->oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_id *)0)->oi));
+ LASSERTF(LUSTRE_FID_INIT_OID == 1, "found %lld\n",
+ (long long)LUSTRE_FID_INIT_OID);
+ LASSERTF(FID_SEQ_OST_MDT0 == 0, "found %lld\n",
+ (long long)FID_SEQ_OST_MDT0);
+ LASSERTF(FID_SEQ_LLOG == 1, "found %lld\n",
+ (long long)FID_SEQ_LLOG);
+ LASSERTF(FID_SEQ_ECHO == 2, "found %lld\n",
+ (long long)FID_SEQ_ECHO);
+ LASSERTF(FID_SEQ_OST_MDT1 == 3, "found %lld\n",
+ (long long)FID_SEQ_OST_MDT1);
+ LASSERTF(FID_SEQ_OST_MAX == 9, "found %lld\n",
+ (long long)FID_SEQ_OST_MAX);
+ LASSERTF(FID_SEQ_RSVD == 11, "found %lld\n",
+ (long long)FID_SEQ_RSVD);
+ LASSERTF(FID_SEQ_IGIF == 12, "found %lld\n",
+ (long long)FID_SEQ_IGIF);
+ LASSERTF(FID_SEQ_IGIF_MAX == 0x00000000ffffffffULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_IGIF_MAX);
+ LASSERTF(FID_SEQ_IDIF == 0x0000000100000000ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_IDIF);
+ LASSERTF(FID_SEQ_IDIF_MAX == 0x00000001ffffffffULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_IDIF_MAX);
+ LASSERTF(FID_SEQ_START == 0x0000000200000000ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_START);
+ LASSERTF(FID_SEQ_LOCAL_FILE == 0x0000000200000001ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_LOCAL_FILE);
+ LASSERTF(FID_SEQ_DOT_LUSTRE == 0x0000000200000002ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_DOT_LUSTRE);
+ LASSERTF(FID_SEQ_SPECIAL == 0x0000000200000004ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_SPECIAL);
+ LASSERTF(FID_SEQ_QUOTA == 0x0000000200000005ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_QUOTA);
+ LASSERTF(FID_SEQ_QUOTA_GLB == 0x0000000200000006ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_QUOTA_GLB);
+ LASSERTF(FID_SEQ_ROOT == 0x0000000200000007ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_ROOT);
+ LASSERTF(FID_SEQ_NORMAL == 0x0000000200000400ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_NORMAL);
+ LASSERTF(FID_SEQ_LOV_DEFAULT == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_LOV_DEFAULT);
+ LASSERTF(FID_OID_SPECIAL_BFL == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)FID_OID_SPECIAL_BFL);
+ LASSERTF(FID_OID_DOT_LUSTRE == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)FID_OID_DOT_LUSTRE);
+ LASSERTF(FID_OID_DOT_LUSTRE_OBF == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)FID_OID_DOT_LUSTRE_OBF);
+
+ /* Checks for struct lu_dirent */
+ LASSERTF((int)sizeof(struct lu_dirent) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct lu_dirent));
+ LASSERTF((int)offsetof(struct lu_dirent, lde_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirent, lde_fid));
+ LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirent *)0)->lde_fid));
+ LASSERTF((int)offsetof(struct lu_dirent, lde_hash) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirent, lde_hash));
+ LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_hash) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirent *)0)->lde_hash));
+ LASSERTF((int)offsetof(struct lu_dirent, lde_reclen) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirent, lde_reclen));
+ LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_reclen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirent *)0)->lde_reclen));
+ LASSERTF((int)offsetof(struct lu_dirent, lde_namelen) == 26, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirent, lde_namelen));
+ LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_namelen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirent *)0)->lde_namelen));
+ LASSERTF((int)offsetof(struct lu_dirent, lde_attrs) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirent, lde_attrs));
+ LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_attrs) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirent *)0)->lde_attrs));
+ LASSERTF((int)offsetof(struct lu_dirent, lde_name[0]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirent, lde_name[0]));
+ LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_name[0]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirent *)0)->lde_name[0]));
+ LASSERTF(LUDA_FID == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)LUDA_FID);
+ LASSERTF(LUDA_TYPE == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)LUDA_TYPE);
+ LASSERTF(LUDA_64BITHASH == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)LUDA_64BITHASH);
+
+ /* Checks for struct luda_type */
+ LASSERTF((int)sizeof(struct luda_type) == 2, "found %lld\n",
+ (long long)(int)sizeof(struct luda_type));
+ LASSERTF((int)offsetof(struct luda_type, lt_type) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct luda_type, lt_type));
+ LASSERTF((int)sizeof(((struct luda_type *)0)->lt_type) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct luda_type *)0)->lt_type));
+
+ /* Checks for struct lu_dirpage */
+ LASSERTF((int)sizeof(struct lu_dirpage) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct lu_dirpage));
+ LASSERTF((int)offsetof(struct lu_dirpage, ldp_hash_start) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirpage, ldp_hash_start));
+ LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_hash_start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_hash_start));
+ LASSERTF((int)offsetof(struct lu_dirpage, ldp_hash_end) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirpage, ldp_hash_end));
+ LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_hash_end) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_hash_end));
+ LASSERTF((int)offsetof(struct lu_dirpage, ldp_flags) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirpage, ldp_flags));
+ LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_flags));
+ LASSERTF((int)offsetof(struct lu_dirpage, ldp_pad0) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirpage, ldp_pad0));
+ LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_pad0) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_pad0));
+ LASSERTF((int)offsetof(struct lu_dirpage, ldp_entries[0]) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirpage, ldp_entries[0]));
+ LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_entries[0]) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_entries[0]));
+ LASSERTF(LDF_EMPTY == 1, "found %lld\n",
+ (long long)LDF_EMPTY);
+ LASSERTF(LDF_COLLIDE == 2, "found %lld\n",
+ (long long)LDF_COLLIDE);
+ LASSERTF(LU_PAGE_SIZE == 4096, "found %lld\n",
+ (long long)LU_PAGE_SIZE);
+ /* Checks for union lu_page */
+ LASSERTF((int)sizeof(union lu_page) == 4096, "found %lld\n",
+ (long long)(int)sizeof(union lu_page));
+
+ /* Checks for struct lustre_handle */
+ LASSERTF((int)sizeof(struct lustre_handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct lustre_handle));
+ LASSERTF((int)offsetof(struct lustre_handle, cookie) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_handle, cookie));
+ LASSERTF((int)sizeof(((struct lustre_handle *)0)->cookie) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_handle *)0)->cookie));
+
+ /* Checks for struct lustre_msg_v2 */
+ LASSERTF((int)sizeof(struct lustre_msg_v2) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct lustre_msg_v2));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_bufcount) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_bufcount));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_bufcount) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_bufcount));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_secflvr) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_secflvr));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_secflvr) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_secflvr));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_magic) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_magic));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_magic));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_repsize) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_repsize));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_repsize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_repsize));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_cksum) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_cksum));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_cksum) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_cksum));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_flags) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_flags));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_flags));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_padding_2) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_padding_2));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_2));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_padding_3) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_padding_3));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_3) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_3));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_buflens[0]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_buflens[0]));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]));
+ LASSERTF(LUSTRE_MSG_MAGIC_V1 == 0x0BD00BD0, "found 0x%.8x\n",
+ LUSTRE_MSG_MAGIC_V1);
+ LASSERTF(LUSTRE_MSG_MAGIC_V2 == 0x0BD00BD3, "found 0x%.8x\n",
+ LUSTRE_MSG_MAGIC_V2);
+ LASSERTF(LUSTRE_MSG_MAGIC_V1_SWABBED == 0xD00BD00B, "found 0x%.8x\n",
+ LUSTRE_MSG_MAGIC_V1_SWABBED);
+ LASSERTF(LUSTRE_MSG_MAGIC_V2_SWABBED == 0xD30BD00B, "found 0x%.8x\n",
+ LUSTRE_MSG_MAGIC_V2_SWABBED);
+
+ /* Checks for struct ptlrpc_body */
+ LASSERTF((int)sizeof(struct ptlrpc_body_v3) == 184, "found %lld\n",
+ (long long)(int)sizeof(struct ptlrpc_body_v3));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_handle) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_handle));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_type) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_type));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_version) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_version));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_opc) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_opc));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_status) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_status));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_xid) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_xid));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_seen) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_seen));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_committed));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_transno) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_transno));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_flags) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_flags));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_op_flags) == 60, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_op_flags));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_timeout) == 68, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_timeout));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_service_time) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_service_time));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_limit) == 76, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_limit));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_slv) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_slv));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv));
+ CLASSERT(PTLRPC_NUM_VERSIONS == 4);
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_pre_versions) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_pre_versions));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding));
+ CLASSERT(JOBSTATS_JOBID_SIZE == 32);
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_jobid) == 152, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_jobid));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_handle) == (int)offsetof(struct ptlrpc_body_v2, pb_handle), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_handle), (int)offsetof(struct ptlrpc_body_v2, pb_handle));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_handle), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_handle));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_type) == (int)offsetof(struct ptlrpc_body_v2, pb_type), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_type), (int)offsetof(struct ptlrpc_body_v2, pb_type));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_type), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_type));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_version) == (int)offsetof(struct ptlrpc_body_v2, pb_version), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_version), (int)offsetof(struct ptlrpc_body_v2, pb_version));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_version), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_version));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_opc) == (int)offsetof(struct ptlrpc_body_v2, pb_opc), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_opc), (int)offsetof(struct ptlrpc_body_v2, pb_opc));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_opc), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_opc));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_status) == (int)offsetof(struct ptlrpc_body_v2, pb_status), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_status), (int)offsetof(struct ptlrpc_body_v2, pb_status));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_status), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_status));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_xid) == (int)offsetof(struct ptlrpc_body_v2, pb_last_xid), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_last_xid), (int)offsetof(struct ptlrpc_body_v2, pb_last_xid));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_seen) == (int)offsetof(struct ptlrpc_body_v2, pb_last_seen), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_last_seen), (int)offsetof(struct ptlrpc_body_v2, pb_last_seen));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_seen), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_seen));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == (int)offsetof(struct ptlrpc_body_v2, pb_last_committed), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_last_committed), (int)offsetof(struct ptlrpc_body_v2, pb_last_committed));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_transno) == (int)offsetof(struct ptlrpc_body_v2, pb_transno), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_transno), (int)offsetof(struct ptlrpc_body_v2, pb_transno));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_transno), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_transno));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_flags) == (int)offsetof(struct ptlrpc_body_v2, pb_flags), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_flags), (int)offsetof(struct ptlrpc_body_v2, pb_flags));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_flags), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_flags));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_op_flags) == (int)offsetof(struct ptlrpc_body_v2, pb_op_flags), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_op_flags), (int)offsetof(struct ptlrpc_body_v2, pb_op_flags));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_op_flags), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_op_flags));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt) == (int)offsetof(struct ptlrpc_body_v2, pb_conn_cnt), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt), (int)offsetof(struct ptlrpc_body_v2, pb_conn_cnt));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_conn_cnt), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_conn_cnt));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_timeout) == (int)offsetof(struct ptlrpc_body_v2, pb_timeout), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_timeout), (int)offsetof(struct ptlrpc_body_v2, pb_timeout));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_timeout), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_timeout));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_service_time) == (int)offsetof(struct ptlrpc_body_v2, pb_service_time), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_service_time), (int)offsetof(struct ptlrpc_body_v2, pb_service_time));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_service_time), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_service_time));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_limit) == (int)offsetof(struct ptlrpc_body_v2, pb_limit), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_limit), (int)offsetof(struct ptlrpc_body_v2, pb_limit));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_limit), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_limit));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_slv) == (int)offsetof(struct ptlrpc_body_v2, pb_slv), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_slv), (int)offsetof(struct ptlrpc_body_v2, pb_slv));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_slv), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_slv));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_pre_versions) == (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_pre_versions), (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding) == (int)offsetof(struct ptlrpc_body_v2, pb_padding), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_padding), (int)offsetof(struct ptlrpc_body_v2, pb_padding));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding));
+ LASSERTF(MSG_PTLRPC_BODY_OFF == 0, "found %lld\n",
+ (long long)MSG_PTLRPC_BODY_OFF);
+ LASSERTF(REQ_REC_OFF == 1, "found %lld\n",
+ (long long)REQ_REC_OFF);
+ LASSERTF(REPLY_REC_OFF == 1, "found %lld\n",
+ (long long)REPLY_REC_OFF);
+ LASSERTF(DLM_LOCKREQ_OFF == 1, "found %lld\n",
+ (long long)DLM_LOCKREQ_OFF);
+ LASSERTF(DLM_REQ_REC_OFF == 2, "found %lld\n",
+ (long long)DLM_REQ_REC_OFF);
+ LASSERTF(DLM_INTENT_IT_OFF == 2, "found %lld\n",
+ (long long)DLM_INTENT_IT_OFF);
+ LASSERTF(DLM_INTENT_REC_OFF == 3, "found %lld\n",
+ (long long)DLM_INTENT_REC_OFF);
+ LASSERTF(DLM_LOCKREPLY_OFF == 1, "found %lld\n",
+ (long long)DLM_LOCKREPLY_OFF);
+ LASSERTF(DLM_REPLY_REC_OFF == 2, "found %lld\n",
+ (long long)DLM_REPLY_REC_OFF);
+ LASSERTF(MSG_PTLRPC_HEADER_OFF == 31, "found %lld\n",
+ (long long)MSG_PTLRPC_HEADER_OFF);
+ LASSERTF(PTLRPC_MSG_VERSION == 0x00000003, "found 0x%.8x\n",
+ PTLRPC_MSG_VERSION);
+ LASSERTF(LUSTRE_VERSION_MASK == 0xffff0000, "found 0x%.8x\n",
+ LUSTRE_VERSION_MASK);
+ LASSERTF(LUSTRE_OBD_VERSION == 0x00010000, "found 0x%.8x\n",
+ LUSTRE_OBD_VERSION);
+ LASSERTF(LUSTRE_MDS_VERSION == 0x00020000, "found 0x%.8x\n",
+ LUSTRE_MDS_VERSION);
+ LASSERTF(LUSTRE_OST_VERSION == 0x00030000, "found 0x%.8x\n",
+ LUSTRE_OST_VERSION);
+ LASSERTF(LUSTRE_DLM_VERSION == 0x00040000, "found 0x%.8x\n",
+ LUSTRE_DLM_VERSION);
+ LASSERTF(LUSTRE_LOG_VERSION == 0x00050000, "found 0x%.8x\n",
+ LUSTRE_LOG_VERSION);
+ LASSERTF(LUSTRE_MGS_VERSION == 0x00060000, "found 0x%.8x\n",
+ LUSTRE_MGS_VERSION);
+ LASSERTF(MSGHDR_AT_SUPPORT == 1, "found %lld\n",
+ (long long)MSGHDR_AT_SUPPORT);
+ LASSERTF(MSGHDR_CKSUM_INCOMPAT18 == 2, "found %lld\n",
+ (long long)MSGHDR_CKSUM_INCOMPAT18);
+ LASSERTF(MSG_OP_FLAG_MASK == 0xffff0000UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_OP_FLAG_MASK);
+ LASSERTF(MSG_OP_FLAG_SHIFT == 16, "found %lld\n",
+ (long long)MSG_OP_FLAG_SHIFT);
+ LASSERTF(MSG_GEN_FLAG_MASK == 0x0000ffffUL, "found 0x%.8xUL\n",
+ (unsigned)MSG_GEN_FLAG_MASK);
+ LASSERTF(MSG_LAST_REPLAY == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_LAST_REPLAY);
+ LASSERTF(MSG_RESENT == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_RESENT);
+ LASSERTF(MSG_REPLAY == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_REPLAY);
+ LASSERTF(MSG_DELAY_REPLAY == 0x00000010UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_DELAY_REPLAY);
+ LASSERTF(MSG_VERSION_REPLAY == 0x00000020UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_VERSION_REPLAY);
+ LASSERTF(MSG_REQ_REPLAY_DONE == 0x00000040UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_REQ_REPLAY_DONE);
+ LASSERTF(MSG_LOCK_REPLAY_DONE == 0x00000080UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_LOCK_REPLAY_DONE);
+ LASSERTF(MSG_CONNECT_RECOVERING == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_RECOVERING);
+ LASSERTF(MSG_CONNECT_RECONNECT == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_RECONNECT);
+ LASSERTF(MSG_CONNECT_REPLAYABLE == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_REPLAYABLE);
+ LASSERTF(MSG_CONNECT_LIBCLIENT == 0x00000010UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_LIBCLIENT);
+ LASSERTF(MSG_CONNECT_INITIAL == 0x00000020UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_INITIAL);
+ LASSERTF(MSG_CONNECT_ASYNC == 0x00000040UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_ASYNC);
+ LASSERTF(MSG_CONNECT_NEXT_VER == 0x00000080UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_NEXT_VER);
+ LASSERTF(MSG_CONNECT_TRANSNO == 0x00000100UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_TRANSNO);
+
+ /* Checks for struct obd_connect_data */
+ LASSERTF((int)sizeof(struct obd_connect_data) == 192, "found %lld\n",
+ (long long)(int)sizeof(struct obd_connect_data));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_connect_flags) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_connect_flags));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_version) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_version));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_version) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_version));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_grant));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_index) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_index));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_index));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_brw_size) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_brw_size));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_brw_size) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_brw_size));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_ibits_known) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_ibits_known));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_blocksize) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_blocksize));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_blocksize) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_blocksize));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_inodespace) == 33, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_inodespace));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_inodespace) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_inodespace));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant_extent) == 34, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_grant_extent));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant_extent) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant_extent));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_unused) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_unused));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_unused) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_unused));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_transno) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_transno));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_transno) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_transno));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_group) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_group));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_group) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_group));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_cksum_types) == 52, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_cksum_types));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_max_easize) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_max_easize));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_instance) == 60, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_instance));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_instance) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_instance));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxbytes) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_maxbytes));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding1));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding1));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding2));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding2));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding3));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding3));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding4) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding4));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding4) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding4));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding5) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding5));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding5) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding5));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding6) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding6));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding6) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding6));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding7) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding7));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding7) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding7));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding8) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding8));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding8) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding8));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding9) == 136, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding9));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding9) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding9));
+ LASSERTF((int)offsetof(struct obd_connect_data, paddingA) == 144, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, paddingA));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingA) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingA));
+ LASSERTF((int)offsetof(struct obd_connect_data, paddingB) == 152, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, paddingB));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingB) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingB));
+ LASSERTF((int)offsetof(struct obd_connect_data, paddingC) == 160, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, paddingC));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingC) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingC));
+ LASSERTF((int)offsetof(struct obd_connect_data, paddingD) == 168, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, paddingD));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingD) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingD));
+ LASSERTF((int)offsetof(struct obd_connect_data, paddingE) == 176, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, paddingE));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingE) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingE));
+ LASSERTF((int)offsetof(struct obd_connect_data, paddingF) == 184, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, paddingF));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingF) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingF));
+ LASSERTF(OBD_CONNECT_RDONLY == 0x1ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_RDONLY);
+ LASSERTF(OBD_CONNECT_INDEX == 0x2ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_INDEX);
+ LASSERTF(OBD_CONNECT_MDS == 0x4ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_MDS);
+ LASSERTF(OBD_CONNECT_GRANT == 0x8ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_GRANT);
+ LASSERTF(OBD_CONNECT_SRVLOCK == 0x10ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_SRVLOCK);
+ LASSERTF(OBD_CONNECT_VERSION == 0x20ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_VERSION);
+ LASSERTF(OBD_CONNECT_REQPORTAL == 0x40ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_REQPORTAL);
+ LASSERTF(OBD_CONNECT_ACL == 0x80ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_ACL);
+ LASSERTF(OBD_CONNECT_XATTR == 0x100ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_XATTR);
+ LASSERTF(OBD_CONNECT_CROW == 0x200ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_CROW);
+ LASSERTF(OBD_CONNECT_TRUNCLOCK == 0x400ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_TRUNCLOCK);
+ LASSERTF(OBD_CONNECT_TRANSNO == 0x800ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_TRANSNO);
+ LASSERTF(OBD_CONNECT_IBITS == 0x1000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_IBITS);
+ LASSERTF(OBD_CONNECT_JOIN == 0x2000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_JOIN);
+ LASSERTF(OBD_CONNECT_ATTRFID == 0x4000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_ATTRFID);
+ LASSERTF(OBD_CONNECT_NODEVOH == 0x8000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_NODEVOH);
+ LASSERTF(OBD_CONNECT_RMT_CLIENT == 0x10000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_RMT_CLIENT);
+ LASSERTF(OBD_CONNECT_RMT_CLIENT_FORCE == 0x20000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_RMT_CLIENT_FORCE);
+ LASSERTF(OBD_CONNECT_BRW_SIZE == 0x40000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_BRW_SIZE);
+ LASSERTF(OBD_CONNECT_QUOTA64 == 0x80000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_QUOTA64);
+ LASSERTF(OBD_CONNECT_MDS_CAPA == 0x100000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_MDS_CAPA);
+ LASSERTF(OBD_CONNECT_OSS_CAPA == 0x200000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_OSS_CAPA);
+ LASSERTF(OBD_CONNECT_CANCELSET == 0x400000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_CANCELSET);
+ LASSERTF(OBD_CONNECT_SOM == 0x800000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_SOM);
+ LASSERTF(OBD_CONNECT_AT == 0x1000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_AT);
+ LASSERTF(OBD_CONNECT_LRU_RESIZE == 0x2000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_LRU_RESIZE);
+ LASSERTF(OBD_CONNECT_MDS_MDS == 0x4000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_MDS_MDS);
+ LASSERTF(OBD_CONNECT_REAL == 0x8000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_REAL);
+ LASSERTF(OBD_CONNECT_CHANGE_QS == 0x10000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_CHANGE_QS);
+ LASSERTF(OBD_CONNECT_CKSUM == 0x20000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_CKSUM);
+ LASSERTF(OBD_CONNECT_FID == 0x40000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_FID);
+ LASSERTF(OBD_CONNECT_VBR == 0x80000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_VBR);
+ LASSERTF(OBD_CONNECT_LOV_V3 == 0x100000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_LOV_V3);
+ LASSERTF(OBD_CONNECT_GRANT_SHRINK == 0x200000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_GRANT_SHRINK);
+ LASSERTF(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_SKIP_ORPHAN);
+ LASSERTF(OBD_CONNECT_MAX_EASIZE == 0x800000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_MAX_EASIZE);
+ LASSERTF(OBD_CONNECT_FULL20 == 0x1000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_FULL20);
+ LASSERTF(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_LAYOUTLOCK);
+ LASSERTF(OBD_CONNECT_64BITHASH == 0x4000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_64BITHASH);
+ LASSERTF(OBD_CONNECT_MAXBYTES == 0x8000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_MAXBYTES);
+ LASSERTF(OBD_CONNECT_IMP_RECOV == 0x10000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_IMP_RECOV);
+ LASSERTF(OBD_CONNECT_JOBSTATS == 0x20000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_JOBSTATS);
+ LASSERTF(OBD_CONNECT_UMASK == 0x40000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_UMASK);
+ LASSERTF(OBD_CONNECT_EINPROGRESS == 0x80000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_EINPROGRESS);
+ LASSERTF(OBD_CONNECT_GRANT_PARAM == 0x100000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_GRANT_PARAM);
+ LASSERTF(OBD_CONNECT_FLOCK_OWNER == 0x200000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_FLOCK_OWNER);
+ LASSERTF(OBD_CONNECT_LVB_TYPE == 0x400000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_LVB_TYPE);
+ LASSERTF(OBD_CONNECT_NANOSEC_TIME == 0x800000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_NANOSEC_TIME);
+ LASSERTF(OBD_CONNECT_LIGHTWEIGHT == 0x1000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_LIGHTWEIGHT);
+ LASSERTF(OBD_CONNECT_SHORTIO == 0x2000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_SHORTIO);
+ LASSERTF(OBD_CONNECT_PINGLESS == 0x4000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_PINGLESS);
+ LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)OBD_CKSUM_CRC32);
+ LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)OBD_CKSUM_ADLER);
+ LASSERTF(OBD_CKSUM_CRC32C == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)OBD_CKSUM_CRC32C);
+
+ /* Checks for struct obdo */
+ LASSERTF((int)sizeof(struct obdo) == 208, "found %lld\n",
+ (long long)(int)sizeof(struct obdo));
+ LASSERTF((int)offsetof(struct obdo, o_valid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_valid));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_valid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_valid));
+ LASSERTF((int)offsetof(struct obdo, o_oi) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_oi));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_oi));
+ LASSERTF((int)offsetof(struct obdo, o_parent_seq) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_parent_seq));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_seq) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_parent_seq));
+ LASSERTF((int)offsetof(struct obdo, o_size) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_size));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_size));
+ LASSERTF((int)offsetof(struct obdo, o_mtime) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_mtime));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_mtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_mtime));
+ LASSERTF((int)offsetof(struct obdo, o_atime) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_atime));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_atime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_atime));
+ LASSERTF((int)offsetof(struct obdo, o_ctime) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_ctime));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_ctime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_ctime));
+ LASSERTF((int)offsetof(struct obdo, o_blocks) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_blocks));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_blocks));
+ LASSERTF((int)offsetof(struct obdo, o_grant) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_grant));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_grant) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_grant));
+ LASSERTF((int)offsetof(struct obdo, o_blksize) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_blksize));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_blksize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_blksize));
+ LASSERTF((int)offsetof(struct obdo, o_mode) == 84, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_mode));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_mode));
+ LASSERTF((int)offsetof(struct obdo, o_uid) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_uid));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_uid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_uid));
+ LASSERTF((int)offsetof(struct obdo, o_gid) == 92, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_gid));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_gid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_gid));
+ LASSERTF((int)offsetof(struct obdo, o_flags) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_flags));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_flags));
+ LASSERTF((int)offsetof(struct obdo, o_nlink) == 100, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_nlink));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_nlink) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_nlink));
+ LASSERTF((int)offsetof(struct obdo, o_parent_oid) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_parent_oid));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_oid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_parent_oid));
+ LASSERTF((int)offsetof(struct obdo, o_misc) == 108, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_misc));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_misc) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_misc));
+ LASSERTF((int)offsetof(struct obdo, o_ioepoch) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_ioepoch));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_ioepoch) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_ioepoch));
+ LASSERTF((int)offsetof(struct obdo, o_stripe_idx) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_stripe_idx));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_stripe_idx) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_stripe_idx));
+ LASSERTF((int)offsetof(struct obdo, o_parent_ver) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_parent_ver));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_ver) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_parent_ver));
+ LASSERTF((int)offsetof(struct obdo, o_handle) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_handle));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_handle));
+ LASSERTF((int)offsetof(struct obdo, o_lcookie) == 136, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_lcookie));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_lcookie) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_lcookie));
+ LASSERTF((int)offsetof(struct obdo, o_uid_h) == 168, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_uid_h));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_uid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_uid_h));
+ LASSERTF((int)offsetof(struct obdo, o_gid_h) == 172, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_gid_h));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_gid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_gid_h));
+ LASSERTF((int)offsetof(struct obdo, o_data_version) == 176, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_data_version));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_data_version) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_data_version));
+ LASSERTF((int)offsetof(struct obdo, o_padding_4) == 184, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_padding_4));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_4) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_padding_4));
+ LASSERTF((int)offsetof(struct obdo, o_padding_5) == 192, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_padding_5));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_5) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_padding_5));
+ LASSERTF((int)offsetof(struct obdo, o_padding_6) == 200, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_padding_6));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_6) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_padding_6));
+ LASSERTF(OBD_MD_FLID == (0x00000001ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLID);
+ LASSERTF(OBD_MD_FLATIME == (0x00000002ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLATIME);
+ LASSERTF(OBD_MD_FLMTIME == (0x00000004ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLMTIME);
+ LASSERTF(OBD_MD_FLCTIME == (0x00000008ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLCTIME);
+ LASSERTF(OBD_MD_FLSIZE == (0x00000010ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLSIZE);
+ LASSERTF(OBD_MD_FLBLOCKS == (0x00000020ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLBLOCKS);
+ LASSERTF(OBD_MD_FLBLKSZ == (0x00000040ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLBLKSZ);
+ LASSERTF(OBD_MD_FLMODE == (0x00000080ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLMODE);
+ LASSERTF(OBD_MD_FLTYPE == (0x00000100ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLTYPE);
+ LASSERTF(OBD_MD_FLUID == (0x00000200ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLUID);
+ LASSERTF(OBD_MD_FLGID == (0x00000400ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLGID);
+ LASSERTF(OBD_MD_FLFLAGS == (0x00000800ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLFLAGS);
+ LASSERTF(OBD_MD_FLNLINK == (0x00002000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLNLINK);
+ LASSERTF(OBD_MD_FLGENER == (0x00004000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLGENER);
+ LASSERTF(OBD_MD_FLRDEV == (0x00010000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLRDEV);
+ LASSERTF(OBD_MD_FLEASIZE == (0x00020000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLEASIZE);
+ LASSERTF(OBD_MD_LINKNAME == (0x00040000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_LINKNAME);
+ LASSERTF(OBD_MD_FLHANDLE == (0x00080000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLHANDLE);
+ LASSERTF(OBD_MD_FLCKSUM == (0x00100000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLCKSUM);
+ LASSERTF(OBD_MD_FLQOS == (0x00200000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLQOS);
+ LASSERTF(OBD_MD_FLCOOKIE == (0x00800000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLCOOKIE);
+ LASSERTF(OBD_MD_FLGROUP == (0x01000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLGROUP);
+ LASSERTF(OBD_MD_FLFID == (0x02000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLFID);
+ LASSERTF(OBD_MD_FLEPOCH == (0x04000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLEPOCH);
+ LASSERTF(OBD_MD_FLGRANT == (0x08000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLGRANT);
+ LASSERTF(OBD_MD_FLDIREA == (0x10000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLDIREA);
+ LASSERTF(OBD_MD_FLUSRQUOTA == (0x20000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLUSRQUOTA);
+ LASSERTF(OBD_MD_FLGRPQUOTA == (0x40000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLGRPQUOTA);
+ LASSERTF(OBD_MD_FLMODEASIZE == (0x80000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLMODEASIZE);
+ LASSERTF(OBD_MD_MDS == (0x0000000100000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_MDS);
+ LASSERTF(OBD_MD_REINT == (0x0000000200000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_REINT);
+ LASSERTF(OBD_MD_MEA == (0x0000000400000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_MEA);
+ LASSERTF(OBD_MD_FLXATTR == (0x0000001000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLXATTR);
+ LASSERTF(OBD_MD_FLXATTRLS == (0x0000002000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLXATTRLS);
+ LASSERTF(OBD_MD_FLXATTRRM == (0x0000004000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLXATTRRM);
+ LASSERTF(OBD_MD_FLACL == (0x0000008000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLACL);
+ LASSERTF(OBD_MD_FLRMTPERM == (0x0000010000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLRMTPERM);
+ LASSERTF(OBD_MD_FLMDSCAPA == (0x0000020000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLMDSCAPA);
+ LASSERTF(OBD_MD_FLOSSCAPA == (0x0000040000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLOSSCAPA);
+ LASSERTF(OBD_MD_FLCKSPLIT == (0x0000080000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLCKSPLIT);
+ LASSERTF(OBD_MD_FLCROSSREF == (0x0000100000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLCROSSREF);
+ LASSERTF(OBD_MD_FLGETATTRLOCK == (0x0000200000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLGETATTRLOCK);
+ LASSERTF(OBD_MD_FLRMTLSETFACL == (0x0001000000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLRMTLSETFACL);
+ LASSERTF(OBD_MD_FLRMTLGETFACL == (0x0002000000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLRMTLGETFACL);
+ LASSERTF(OBD_MD_FLRMTRSETFACL == (0x0004000000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLRMTRSETFACL);
+ LASSERTF(OBD_MD_FLRMTRGETFACL == (0x0008000000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLRMTRGETFACL);
+ LASSERTF(OBD_MD_FLDATAVERSION == (0x0010000000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLDATAVERSION);
+ CLASSERT(OBD_FL_INLINEDATA == 0x00000001);
+ CLASSERT(OBD_FL_OBDMDEXISTS == 0x00000002);
+ CLASSERT(OBD_FL_DELORPHAN == 0x00000004);
+ CLASSERT(OBD_FL_NORPC == 0x00000008);
+ CLASSERT(OBD_FL_IDONLY == 0x00000010);
+ CLASSERT(OBD_FL_RECREATE_OBJS == 0x00000020);
+ CLASSERT(OBD_FL_DEBUG_CHECK == 0x00000040);
+ CLASSERT(OBD_FL_NO_USRQUOTA == 0x00000100);
+ CLASSERT(OBD_FL_NO_GRPQUOTA == 0x00000200);
+ CLASSERT(OBD_FL_CREATE_CROW == 0x00000400);
+ CLASSERT(OBD_FL_SRVLOCK == 0x00000800);
+ CLASSERT(OBD_FL_CKSUM_CRC32 == 0x00001000);
+ CLASSERT(OBD_FL_CKSUM_ADLER == 0x00002000);
+ CLASSERT(OBD_FL_CKSUM_CRC32C == 0x00004000);
+ CLASSERT(OBD_FL_CKSUM_RSVD2 == 0x00008000);
+ CLASSERT(OBD_FL_CKSUM_RSVD3 == 0x00010000);
+ CLASSERT(OBD_FL_SHRINK_GRANT == 0x00020000);
+ CLASSERT(OBD_FL_MMAP == 0x00040000);
+ CLASSERT(OBD_FL_RECOV_RESEND == 0x00080000);
+ CLASSERT(OBD_FL_NOSPC_BLK == 0x00100000);
+ CLASSERT(OBD_FL_LOCAL_MASK == 0xf0000000);
+
+ /* Checks for struct lov_ost_data_v1 */
+ LASSERTF((int)sizeof(struct lov_ost_data_v1) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct lov_ost_data_v1));
+ LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_oi) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_oi));
+ LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_oi));
+ LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_gen) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_gen));
+ LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_gen) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_gen));
+ LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_idx) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_idx));
+ LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx));
+
+ /* Checks for struct lov_mds_md_v1 */
+ LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct lov_mds_md_v1));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_magic));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_magic));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_pattern) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_pattern));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_pattern) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_pattern));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_oi) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_oi));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_oi));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_stripe_size) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_stripe_size));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_size) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_size));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_stripe_count) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_stripe_count));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_count) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_count));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_layout_gen) == 30, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_layout_gen));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_layout_gen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_layout_gen));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_objects[0]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_objects[0]));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]) == 24, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]));
+ CLASSERT(LOV_MAGIC_V1 == 0x0BD10BD0);
+
+ /* Checks for struct lov_mds_md_v3 */
+ LASSERTF((int)sizeof(struct lov_mds_md_v3) == 48, "found %lld\n",
+ (long long)(int)sizeof(struct lov_mds_md_v3));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_magic));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_magic));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pattern) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pattern));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pattern) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pattern));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_oi) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_oi));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_oi));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_stripe_size) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_stripe_size));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_size) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_size));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_stripe_count) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_stripe_count));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_count) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_count));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_layout_gen) == 30, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_layout_gen));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen));
+ CLASSERT(LOV_MAXPOOLNAME == 16);
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16]));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_objects[0]) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_objects[0]));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]) == 24, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]));
+ CLASSERT(LOV_MAGIC_V3 == 0x0BD30BD0);
+ LASSERTF(LOV_PATTERN_RAID0 == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)LOV_PATTERN_RAID0);
+ LASSERTF(LOV_PATTERN_RAID1 == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)LOV_PATTERN_RAID1);
+ LASSERTF(LOV_PATTERN_FIRST == 0x00000100UL, "found 0x%.8xUL\n",
+ (unsigned)LOV_PATTERN_FIRST);
+ LASSERTF(LOV_PATTERN_CMOBD == 0x00000200UL, "found 0x%.8xUL\n",
+ (unsigned)LOV_PATTERN_CMOBD);
+
+ /* Checks for struct obd_statfs */
+ LASSERTF((int)sizeof(struct obd_statfs) == 144, "found %lld\n",
+ (long long)(int)sizeof(struct obd_statfs));
+ LASSERTF((int)offsetof(struct obd_statfs, os_type) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_type));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_type) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_type));
+ LASSERTF((int)offsetof(struct obd_statfs, os_blocks) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_blocks));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_blocks));
+ LASSERTF((int)offsetof(struct obd_statfs, os_bfree) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_bfree));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bfree) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_bfree));
+ LASSERTF((int)offsetof(struct obd_statfs, os_bavail) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_bavail));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bavail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_bavail));
+ LASSERTF((int)offsetof(struct obd_statfs, os_ffree) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_ffree));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_ffree) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_ffree));
+ LASSERTF((int)offsetof(struct obd_statfs, os_fsid) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_fsid));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fsid) == 40, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_fsid));
+ LASSERTF((int)offsetof(struct obd_statfs, os_bsize) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_bsize));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bsize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_bsize));
+ LASSERTF((int)offsetof(struct obd_statfs, os_namelen) == 92, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_namelen));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_namelen) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_namelen));
+ LASSERTF((int)offsetof(struct obd_statfs, os_state) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_state));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_state));
+ LASSERTF((int)offsetof(struct obd_statfs, os_fprecreated) == 108, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_fprecreated));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare2));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare3));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare3));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare4) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare4));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare4) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare4));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare5) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare5));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare5) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare5));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare6) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare6));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare6) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare6));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare7) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare7));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare7) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare7));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare8) == 136, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare8));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare8) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare8));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare9) == 140, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare9));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare9) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare9));
+
+ /* Checks for struct obd_ioobj */
+ LASSERTF((int)sizeof(struct obd_ioobj) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct obd_ioobj));
+ LASSERTF((int)offsetof(struct obd_ioobj, ioo_oid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obd_ioobj, ioo_oid));
+ LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_oid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_oid));
+ LASSERTF((int)offsetof(struct obd_ioobj, ioo_max_brw) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct obd_ioobj, ioo_max_brw));
+ LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_max_brw) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_max_brw));
+ LASSERTF((int)offsetof(struct obd_ioobj, ioo_bufcnt) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct obd_ioobj, ioo_bufcnt));
+ LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt));
+
+ /* Checks for union lquota_id */
+ LASSERTF((int)sizeof(union lquota_id) == 16, "found %lld\n",
+ (long long)(int)sizeof(union lquota_id));
+
+ LASSERTF(QUOTABLOCK_BITS == 10, "found %lld\n",
+ (long long)QUOTABLOCK_BITS);
+ LASSERTF(QUOTABLOCK_SIZE == 1024, "found %lld\n",
+ (long long)QUOTABLOCK_SIZE);
+
+ /* Checks for struct obd_quotactl */
+ LASSERTF((int)sizeof(struct obd_quotactl) == 112, "found %lld\n",
+ (long long)(int)sizeof(struct obd_quotactl));
+ LASSERTF((int)offsetof(struct obd_quotactl, qc_cmd) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obd_quotactl, qc_cmd));
+ LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_cmd) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_cmd));
+ LASSERTF((int)offsetof(struct obd_quotactl, qc_type) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct obd_quotactl, qc_type));
+ LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_type));
+ LASSERTF((int)offsetof(struct obd_quotactl, qc_id) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct obd_quotactl, qc_id));
+ LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_id));
+ LASSERTF((int)offsetof(struct obd_quotactl, qc_stat) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct obd_quotactl, qc_stat));
+ LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_stat) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_stat));
+ LASSERTF((int)offsetof(struct obd_quotactl, qc_dqinfo) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct obd_quotactl, qc_dqinfo));
+ LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo) == 24, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo));
+ LASSERTF((int)offsetof(struct obd_quotactl, qc_dqblk) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct obd_quotactl, qc_dqblk));
+ LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqblk) == 72, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqblk));
+
+ /* Checks for struct obd_dqinfo */
+ LASSERTF((int)sizeof(struct obd_dqinfo) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct obd_dqinfo));
+ LASSERTF((int)offsetof(struct obd_dqinfo, dqi_bgrace) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqinfo, dqi_bgrace));
+ LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace));
+ LASSERTF((int)offsetof(struct obd_dqinfo, dqi_igrace) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqinfo, dqi_igrace));
+ LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace));
+ LASSERTF((int)offsetof(struct obd_dqinfo, dqi_flags) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqinfo, dqi_flags));
+ LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_flags));
+ LASSERTF((int)offsetof(struct obd_dqinfo, dqi_valid) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqinfo, dqi_valid));
+ LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_valid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_valid));
+
+ /* Checks for struct obd_dqblk */
+ LASSERTF((int)sizeof(struct obd_dqblk) == 72, "found %lld\n",
+ (long long)(int)sizeof(struct obd_dqblk));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_bhardlimit) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_bhardlimit));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_bsoftlimit) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_bsoftlimit));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_curspace) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_curspace));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curspace) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curspace));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_ihardlimit) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_ihardlimit));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_isoftlimit) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_isoftlimit));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_curinodes) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_curinodes));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_btime) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_btime));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_btime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_btime));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_itime) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_itime));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_itime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_itime));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_valid) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_valid));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_valid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_valid));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_padding) == 68, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_padding));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_padding));
+ LASSERTF(Q_QUOTACHECK == 0x800100, "found 0x%.8x\n",
+ Q_QUOTACHECK);
+ LASSERTF(Q_INITQUOTA == 0x800101, "found 0x%.8x\n",
+ Q_INITQUOTA);
+ LASSERTF(Q_GETOINFO == 0x800102, "found 0x%.8x\n",
+ Q_GETOINFO);
+ LASSERTF(Q_GETOQUOTA == 0x800103, "found 0x%.8x\n",
+ Q_GETOQUOTA);
+ LASSERTF(Q_FINVALIDATE == 0x800104, "found 0x%.8x\n",
+ Q_FINVALIDATE);
+
+ /* Checks for struct lquota_acct_rec */
+ LASSERTF((int)sizeof(struct lquota_acct_rec) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct lquota_acct_rec));
+ LASSERTF((int)offsetof(struct lquota_acct_rec, bspace) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_acct_rec, bspace));
+ LASSERTF((int)sizeof(((struct lquota_acct_rec *)0)->bspace) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_acct_rec *)0)->bspace));
+ LASSERTF((int)offsetof(struct lquota_acct_rec, ispace) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_acct_rec, ispace));
+ LASSERTF((int)sizeof(((struct lquota_acct_rec *)0)->ispace) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_acct_rec *)0)->ispace));
+
+ /* Checks for struct lquota_glb_rec */
+ LASSERTF((int)sizeof(struct lquota_glb_rec) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct lquota_glb_rec));
+ LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_hardlimit) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_glb_rec, qbr_hardlimit));
+ LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_hardlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_hardlimit));
+ LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_softlimit) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_glb_rec, qbr_softlimit));
+ LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_softlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_softlimit));
+ LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_time) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_glb_rec, qbr_time));
+ LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_time));
+ LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_granted) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_glb_rec, qbr_granted));
+ LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_granted) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_granted));
+
+ /* Checks for struct lquota_slv_rec */
+ LASSERTF((int)sizeof(struct lquota_slv_rec) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct lquota_slv_rec));
+ LASSERTF((int)offsetof(struct lquota_slv_rec, qsr_granted) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_slv_rec, qsr_granted));
+ LASSERTF((int)sizeof(((struct lquota_slv_rec *)0)->qsr_granted) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_slv_rec *)0)->qsr_granted));
+
+ /* Checks for struct idx_info */
+ LASSERTF((int)sizeof(struct idx_info) == 80, "found %lld\n",
+ (long long)(int)sizeof(struct idx_info));
+ LASSERTF((int)offsetof(struct idx_info, ii_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_magic));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_magic));
+ LASSERTF((int)offsetof(struct idx_info, ii_flags) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_flags));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_flags));
+ LASSERTF((int)offsetof(struct idx_info, ii_count) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_count));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_count) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_count));
+ LASSERTF((int)offsetof(struct idx_info, ii_pad0) == 10, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_pad0));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad0) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_pad0));
+ LASSERTF((int)offsetof(struct idx_info, ii_attrs) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_attrs));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_attrs) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_attrs));
+ LASSERTF((int)offsetof(struct idx_info, ii_fid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_fid));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_fid));
+ LASSERTF((int)offsetof(struct idx_info, ii_version) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_version));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_version) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_version));
+ LASSERTF((int)offsetof(struct idx_info, ii_hash_start) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_hash_start));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_hash_start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_hash_start));
+ LASSERTF((int)offsetof(struct idx_info, ii_hash_end) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_hash_end));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_hash_end) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_hash_end));
+ LASSERTF((int)offsetof(struct idx_info, ii_keysize) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_keysize));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_keysize) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_keysize));
+ LASSERTF((int)offsetof(struct idx_info, ii_recsize) == 58, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_recsize));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_recsize) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_recsize));
+ LASSERTF((int)offsetof(struct idx_info, ii_pad1) == 60, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_pad1));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_pad1));
+ LASSERTF((int)offsetof(struct idx_info, ii_pad2) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_pad2));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_pad2));
+ LASSERTF((int)offsetof(struct idx_info, ii_pad3) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_pad3));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_pad3));
+ CLASSERT(IDX_INFO_MAGIC == 0x3D37CC37);
+
+ /* Checks for struct lu_idxpage */
+ LASSERTF((int)sizeof(struct lu_idxpage) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct lu_idxpage));
+ LASSERTF((int)offsetof(struct lu_idxpage, lip_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lu_idxpage, lip_magic));
+ LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_magic));
+ LASSERTF((int)offsetof(struct lu_idxpage, lip_flags) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lu_idxpage, lip_flags));
+ LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_flags) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_flags));
+ LASSERTF((int)offsetof(struct lu_idxpage, lip_nr) == 6, "found %lld\n",
+ (long long)(int)offsetof(struct lu_idxpage, lip_nr));
+ LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_nr) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_nr));
+ LASSERTF((int)offsetof(struct lu_idxpage, lip_pad0) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lu_idxpage, lip_pad0));
+ LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_pad0) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_pad0));
+ CLASSERT(LIP_MAGIC == 0x8A6D6B6C);
+ LASSERTF(LIP_HDR_SIZE == 16, "found %lld\n",
+ (long long)LIP_HDR_SIZE);
+ LASSERTF(II_FL_NOHASH == 1, "found %lld\n",
+ (long long)II_FL_NOHASH);
+ LASSERTF(II_FL_VARKEY == 2, "found %lld\n",
+ (long long)II_FL_VARKEY);
+ LASSERTF(II_FL_VARREC == 4, "found %lld\n",
+ (long long)II_FL_VARREC);
+ LASSERTF(II_FL_NONUNQ == 8, "found %lld\n",
+ (long long)II_FL_NONUNQ);
+
+ /* Checks for struct niobuf_remote */
+ LASSERTF((int)sizeof(struct niobuf_remote) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct niobuf_remote));
+ LASSERTF((int)offsetof(struct niobuf_remote, offset) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct niobuf_remote, offset));
+ LASSERTF((int)sizeof(((struct niobuf_remote *)0)->offset) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct niobuf_remote *)0)->offset));
+ LASSERTF((int)offsetof(struct niobuf_remote, len) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct niobuf_remote, len));
+ LASSERTF((int)sizeof(((struct niobuf_remote *)0)->len) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct niobuf_remote *)0)->len));
+ LASSERTF((int)offsetof(struct niobuf_remote, flags) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct niobuf_remote, flags));
+ LASSERTF((int)sizeof(((struct niobuf_remote *)0)->flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct niobuf_remote *)0)->flags));
+ LASSERTF(OBD_BRW_READ == 0x01, "found 0x%.8x\n",
+ OBD_BRW_READ);
+ LASSERTF(OBD_BRW_WRITE == 0x02, "found 0x%.8x\n",
+ OBD_BRW_WRITE);
+ LASSERTF(OBD_BRW_SYNC == 0x08, "found 0x%.8x\n",
+ OBD_BRW_SYNC);
+ LASSERTF(OBD_BRW_CHECK == 0x10, "found 0x%.8x\n",
+ OBD_BRW_CHECK);
+ LASSERTF(OBD_BRW_FROM_GRANT == 0x20, "found 0x%.8x\n",
+ OBD_BRW_FROM_GRANT);
+ LASSERTF(OBD_BRW_GRANTED == 0x40, "found 0x%.8x\n",
+ OBD_BRW_GRANTED);
+ LASSERTF(OBD_BRW_NOCACHE == 0x80, "found 0x%.8x\n",
+ OBD_BRW_NOCACHE);
+ LASSERTF(OBD_BRW_NOQUOTA == 0x100, "found 0x%.8x\n",
+ OBD_BRW_NOQUOTA);
+ LASSERTF(OBD_BRW_SRVLOCK == 0x200, "found 0x%.8x\n",
+ OBD_BRW_SRVLOCK);
+ LASSERTF(OBD_BRW_ASYNC == 0x400, "found 0x%.8x\n",
+ OBD_BRW_ASYNC);
+ LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n",
+ OBD_BRW_MEMALLOC);
+
+ /* Checks for struct ost_body */
+ LASSERTF((int)sizeof(struct ost_body) == 208, "found %lld\n",
+ (long long)(int)sizeof(struct ost_body));
+ LASSERTF((int)offsetof(struct ost_body, oa) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ost_body, oa));
+ LASSERTF((int)sizeof(((struct ost_body *)0)->oa) == 208, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_body *)0)->oa));
+
+ /* Checks for struct ll_fid */
+ LASSERTF((int)sizeof(struct ll_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct ll_fid));
+ LASSERTF((int)offsetof(struct ll_fid, id) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fid, id));
+ LASSERTF((int)sizeof(((struct ll_fid *)0)->id) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fid *)0)->id));
+ LASSERTF((int)offsetof(struct ll_fid, generation) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fid, generation));
+ LASSERTF((int)sizeof(((struct ll_fid *)0)->generation) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fid *)0)->generation));
+ LASSERTF((int)offsetof(struct ll_fid, f_type) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fid, f_type));
+ LASSERTF((int)sizeof(((struct ll_fid *)0)->f_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fid *)0)->f_type));
+
+ /* Checks for struct mdt_body */
+ LASSERTF((int)sizeof(struct mdt_body) == 216, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_body));
+ LASSERTF((int)offsetof(struct mdt_body, fid1) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, fid1));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->fid1) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->fid1));
+ LASSERTF((int)offsetof(struct mdt_body, fid2) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, fid2));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->fid2) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->fid2));
+ LASSERTF((int)offsetof(struct mdt_body, handle) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, handle));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->handle));
+ LASSERTF((int)offsetof(struct mdt_body, valid) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, valid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->valid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->valid));
+ LASSERTF((int)offsetof(struct mdt_body, size) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, size));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->size));
+ LASSERTF((int)offsetof(struct mdt_body, mtime) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mtime));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mtime));
+ LASSERTF((int)offsetof(struct mdt_body, atime) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, atime));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->atime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->atime));
+ LASSERTF((int)offsetof(struct mdt_body, ctime) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, ctime));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->ctime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->ctime));
+ LASSERTF((int)offsetof(struct mdt_body, blocks) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, blocks));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->blocks));
+ LASSERTF((int)offsetof(struct mdt_body, unused1) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, unused1));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->unused1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->unused1));
+ LASSERTF((int)offsetof(struct mdt_body, fsuid) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, fsuid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->fsuid));
+ LASSERTF((int)offsetof(struct mdt_body, fsgid) == 108, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, fsgid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->fsgid));
+ LASSERTF((int)offsetof(struct mdt_body, capability) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, capability));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->capability) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->capability));
+ LASSERTF((int)offsetof(struct mdt_body, mode) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mode));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mode));
+ LASSERTF((int)offsetof(struct mdt_body, uid) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, uid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->uid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->uid));
+ LASSERTF((int)offsetof(struct mdt_body, gid) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, gid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->gid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->gid));
+ LASSERTF((int)offsetof(struct mdt_body, flags) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, flags));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->flags));
+ LASSERTF((int)offsetof(struct mdt_body, rdev) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, rdev));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->rdev) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->rdev));
+ LASSERTF((int)offsetof(struct mdt_body, nlink) == 136, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, nlink));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->nlink) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->nlink));
+ LASSERTF((int)offsetof(struct mdt_body, unused2) == 140, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, unused2));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->unused2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->unused2));
+ LASSERTF((int)offsetof(struct mdt_body, suppgid) == 144, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, suppgid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->suppgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->suppgid));
+ LASSERTF((int)offsetof(struct mdt_body, eadatasize) == 148, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, eadatasize));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->eadatasize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->eadatasize));
+ LASSERTF((int)offsetof(struct mdt_body, aclsize) == 152, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, aclsize));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->aclsize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->aclsize));
+ LASSERTF((int)offsetof(struct mdt_body, max_mdsize) == 156, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, max_mdsize));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->max_mdsize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->max_mdsize));
+ LASSERTF((int)offsetof(struct mdt_body, max_cookiesize) == 160, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, max_cookiesize));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->max_cookiesize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->max_cookiesize));
+ LASSERTF((int)offsetof(struct mdt_body, uid_h) == 164, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, uid_h));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->uid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->uid_h));
+ LASSERTF((int)offsetof(struct mdt_body, gid_h) == 168, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, gid_h));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->gid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->gid_h));
+ LASSERTF((int)offsetof(struct mdt_body, padding_5) == 172, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, padding_5));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_5) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->padding_5));
+ LASSERTF((int)offsetof(struct mdt_body, padding_6) == 176, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, padding_6));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_6) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->padding_6));
+ LASSERTF((int)offsetof(struct mdt_body, padding_7) == 184, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, padding_7));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_7) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->padding_7));
+ LASSERTF((int)offsetof(struct mdt_body, padding_8) == 192, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, padding_8));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_8) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->padding_8));
+ LASSERTF((int)offsetof(struct mdt_body, padding_9) == 200, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, padding_9));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_9) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->padding_9));
+ LASSERTF((int)offsetof(struct mdt_body, padding_10) == 208, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, padding_10));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_10) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->padding_10));
+ LASSERTF(MDS_FMODE_CLOSED == 000000000000UL, "found 0%.11oUL\n",
+ MDS_FMODE_CLOSED);
+ LASSERTF(MDS_FMODE_EXEC == 000000000004UL, "found 0%.11oUL\n",
+ MDS_FMODE_EXEC);
+ LASSERTF(MDS_FMODE_EPOCH == 000001000000UL, "found 0%.11oUL\n",
+ MDS_FMODE_EPOCH);
+ LASSERTF(MDS_FMODE_TRUNC == 000002000000UL, "found 0%.11oUL\n",
+ MDS_FMODE_TRUNC);
+ LASSERTF(MDS_FMODE_SOM == 000004000000UL, "found 0%.11oUL\n",
+ MDS_FMODE_SOM);
+ LASSERTF(MDS_OPEN_CREATED == 000000000010UL, "found 0%.11oUL\n",
+ MDS_OPEN_CREATED);
+ LASSERTF(MDS_OPEN_CROSS == 000000000020UL, "found 0%.11oUL\n",
+ MDS_OPEN_CROSS);
+ LASSERTF(MDS_OPEN_CREAT == 000000000100UL, "found 0%.11oUL\n",
+ MDS_OPEN_CREAT);
+ LASSERTF(MDS_OPEN_EXCL == 000000000200UL, "found 0%.11oUL\n",
+ MDS_OPEN_EXCL);
+ LASSERTF(MDS_OPEN_TRUNC == 000000001000UL, "found 0%.11oUL\n",
+ MDS_OPEN_TRUNC);
+ LASSERTF(MDS_OPEN_APPEND == 000000002000UL, "found 0%.11oUL\n",
+ MDS_OPEN_APPEND);
+ LASSERTF(MDS_OPEN_SYNC == 000000010000UL, "found 0%.11oUL\n",
+ MDS_OPEN_SYNC);
+ LASSERTF(MDS_OPEN_DIRECTORY == 000000200000UL, "found 0%.11oUL\n",
+ MDS_OPEN_DIRECTORY);
+ LASSERTF(MDS_OPEN_BY_FID == 000040000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_BY_FID);
+ LASSERTF(MDS_OPEN_DELAY_CREATE == 000100000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_DELAY_CREATE);
+ LASSERTF(MDS_OPEN_OWNEROVERRIDE == 000200000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_OWNEROVERRIDE);
+ LASSERTF(MDS_OPEN_JOIN_FILE == 000400000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_JOIN_FILE);
+ LASSERTF(MDS_OPEN_LOCK == 004000000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_LOCK);
+ LASSERTF(MDS_OPEN_HAS_EA == 010000000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_HAS_EA);
+ LASSERTF(MDS_OPEN_HAS_OBJS == 020000000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_HAS_OBJS);
+ LASSERTF(MDS_OPEN_NORESTORE == 00000000000100000000000ULL, "found 0%.22lloULL\n",
+ (long long)MDS_OPEN_NORESTORE);
+ LASSERTF(MDS_OPEN_NEWSTRIPE == 00000000000200000000000ULL, "found 0%.22lloULL\n",
+ (long long)MDS_OPEN_NEWSTRIPE);
+ LASSERTF(MDS_OPEN_VOLATILE == 00000000000400000000000ULL, "found 0%.22lloULL\n",
+ (long long)MDS_OPEN_VOLATILE);
+ LASSERTF(LUSTRE_SYNC_FL == 0x00000008, "found 0x%.8x\n",
+ LUSTRE_SYNC_FL);
+ LASSERTF(LUSTRE_IMMUTABLE_FL == 0x00000010, "found 0x%.8x\n",
+ LUSTRE_IMMUTABLE_FL);
+ LASSERTF(LUSTRE_APPEND_FL == 0x00000020, "found 0x%.8x\n",
+ LUSTRE_APPEND_FL);
+ LASSERTF(LUSTRE_NOATIME_FL == 0x00000080, "found 0x%.8x\n",
+ LUSTRE_NOATIME_FL);
+ LASSERTF(LUSTRE_DIRSYNC_FL == 0x00010000, "found 0x%.8x\n",
+ LUSTRE_DIRSYNC_FL);
+ LASSERTF(MDS_INODELOCK_LOOKUP == 0x000001, "found 0x%.8x\n",
+ MDS_INODELOCK_LOOKUP);
+ LASSERTF(MDS_INODELOCK_UPDATE == 0x000002, "found 0x%.8x\n",
+ MDS_INODELOCK_UPDATE);
+ LASSERTF(MDS_INODELOCK_OPEN == 0x000004, "found 0x%.8x\n",
+ MDS_INODELOCK_OPEN);
+ LASSERTF(MDS_INODELOCK_LAYOUT == 0x000008, "found 0x%.8x\n",
+ MDS_INODELOCK_LAYOUT);
+
+ /* Checks for struct mdt_ioepoch */
+ LASSERTF((int)sizeof(struct mdt_ioepoch) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_ioepoch));
+ LASSERTF((int)offsetof(struct mdt_ioepoch, handle) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_ioepoch, handle));
+ LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_ioepoch *)0)->handle));
+ LASSERTF((int)offsetof(struct mdt_ioepoch, ioepoch) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_ioepoch, ioepoch));
+ LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->ioepoch) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_ioepoch *)0)->ioepoch));
+ LASSERTF((int)offsetof(struct mdt_ioepoch, flags) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_ioepoch, flags));
+ LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_ioepoch *)0)->flags));
+ LASSERTF((int)offsetof(struct mdt_ioepoch, padding) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_ioepoch, padding));
+ LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_ioepoch *)0)->padding));
+
+ /* Checks for struct mdt_remote_perm */
+ LASSERTF((int)sizeof(struct mdt_remote_perm) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_remote_perm));
+ LASSERTF((int)offsetof(struct mdt_remote_perm, rp_uid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_remote_perm, rp_uid));
+ LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_uid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_uid));
+ LASSERTF((int)offsetof(struct mdt_remote_perm, rp_gid) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_remote_perm, rp_gid));
+ LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_gid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_gid));
+ LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_remote_perm, rp_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid));
+ LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_remote_perm, rp_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid));
+ LASSERTF((int)offsetof(struct mdt_remote_perm, rp_access_perm) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_remote_perm, rp_access_perm));
+ LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm));
+ LASSERTF((int)offsetof(struct mdt_remote_perm, rp_padding) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_remote_perm, rp_padding));
+ LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_padding));
+ LASSERTF(CFS_SETUID_PERM == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)CFS_SETUID_PERM);
+ LASSERTF(CFS_SETGID_PERM == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)CFS_SETGID_PERM);
+ LASSERTF(CFS_SETGRP_PERM == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)CFS_SETGRP_PERM);
+ LASSERTF(CFS_RMTACL_PERM == 0x00000008UL, "found 0x%.8xUL\n",
+ (unsigned)CFS_RMTACL_PERM);
+ LASSERTF(CFS_RMTOWN_PERM == 0x00000010UL, "found 0x%.8xUL\n",
+ (unsigned)CFS_RMTOWN_PERM);
+
+ /* Checks for struct mdt_rec_setattr */
+ LASSERTF((int)sizeof(struct mdt_rec_setattr) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_setattr));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_suppgid) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_suppgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_suppgid_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_suppgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_1) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_1));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_1_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fid) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_fid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_valid) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_valid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_valid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_valid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_uid) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_uid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_uid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_uid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_gid) == 68, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_gid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_gid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_gid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_size) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_size));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_size));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_blocks) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_blocks));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_blocks));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_mtime) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_mtime));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_mtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_mtime));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_atime) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_atime));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_atime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_atime));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_ctime) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_ctime));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_ctime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_ctime));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_attr_flags) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_attr_flags));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_attr_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_attr_flags));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_mode) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_mode));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_mode));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_bias) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_bias));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_bias) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_bias));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_3) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_3));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_3) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_3));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_4) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_4));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_4) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_4));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_5) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_5));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_5) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_5));
+
+ /* Checks for struct mdt_rec_create */
+ LASSERTF((int)sizeof(struct mdt_rec_create) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_create));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid1_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid2_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid2_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2_h));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_fid1) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_fid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fid1) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fid1));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_fid2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_fid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fid2) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fid2));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_old_handle) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_old_handle));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_old_handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_old_handle));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_time) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_time));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_time));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_rdev) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_rdev));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_rdev) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_rdev));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_ioepoch) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_ioepoch));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_ioepoch) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_ioepoch));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_padding_1) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_padding_1));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_padding_1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_padding_1));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_mode) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_mode));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_mode));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_bias) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_bias));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_bias) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_bias));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_flags_l) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_flags_l));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_flags_l) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_flags_l));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_flags_h) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_flags_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_flags_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_flags_h));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_umask) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_umask));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_umask) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_umask));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_padding_4) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_padding_4));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_padding_4) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_padding_4));
+
+ /* Checks for struct mdt_rec_link */
+ LASSERTF((int)sizeof(struct mdt_rec_link) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_link));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid1_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid2_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid2_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2_h));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_fid1) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_fid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fid1) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fid1));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_fid2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_fid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fid2) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fid2));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_time) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_time));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_time));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_1) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_1));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_1));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_2) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_2));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_2));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_3) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_3));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_3));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_4) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_4));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_4) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_4));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_bias) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_bias));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_bias) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_bias));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_5) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_5));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_5) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_5));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_6) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_6));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_6) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_6));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_7) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_7));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_7) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_7));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_8) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_8));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_8) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_8));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_9) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_9));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_9) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_9));
+
+ /* Checks for struct mdt_rec_unlink */
+ LASSERTF((int)sizeof(struct mdt_rec_unlink) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_unlink));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid1_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid2_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid2_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2_h));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fid1) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_fid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid1) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid1));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fid2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_fid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid2) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid2));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_time) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_time));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_time));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_2) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_2));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_2));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_3) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_3));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_3));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_4) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_4));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_4) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_4));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_5) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_5));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_5) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_5));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_bias) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_bias));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_bias) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_bias));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_mode) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_mode));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_mode));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_6) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_6));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_6) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_6));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_7) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_7));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_7) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_7));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_8) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_8));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_8) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_8));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_9) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_9));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_9) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_9));
+
+ /* Checks for struct mdt_rec_rename */
+ LASSERTF((int)sizeof(struct mdt_rec_rename) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_rename));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid1_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid2_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid2_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2_h));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fid1) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_fid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fid1) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fid1));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fid2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_fid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fid2) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fid2));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_time) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_time));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_time));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_1) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_1));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_1));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_2) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_2));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_2));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_3) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_3));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_3));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_4) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_4));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_4) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_4));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_bias) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_bias));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_bias) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_bias));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_mode) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_mode));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_mode));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_5) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_5));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_5) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_5));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_6) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_6));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_6) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_6));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_7) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_7));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_7) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_7));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_8) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_8));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_8) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_8));
+
+ /* Checks for struct mdt_rec_setxattr */
+ LASSERTF((int)sizeof(struct mdt_rec_setxattr) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_setxattr));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid1_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid2_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid2_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fid) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fid));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_1) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_1));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_1));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_2) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_2));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_2));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_3) == 68, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_3));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_3) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_3));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_valid) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_valid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_valid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_valid));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_time) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_time));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_time));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_5) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_5));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_5) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_5));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_6) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_6));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_6) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_6));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_7) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_7));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_7) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_7));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_size) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_size));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_size) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_size));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_flags) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_flags));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_flags));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_8) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_8));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_8) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_8));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_9) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_9));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_9) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_9));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_10) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_10));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_10) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_10));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_11) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_11));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11));
+
+ /* Checks for struct mdt_rec_reint */
+ LASSERTF((int)sizeof(struct mdt_rec_reint) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_reint));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid1_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid2_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid2_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2_h));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fid1) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_fid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fid1) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fid1));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fid2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_fid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fid2) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fid2));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_mtime) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_mtime));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_mtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_mtime));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_atime) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_atime));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_atime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_atime));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_ctime) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_ctime));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_ctime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_ctime));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_size) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_size));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_size));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_blocks) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_blocks));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_blocks));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_bias) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_bias));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_bias) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_bias));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_mode) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_mode));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_mode));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_flags) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_flags));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_flags));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_flags_h) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_flags_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_flags_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_flags_h));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_umask) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_umask));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_umask) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_umask));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_padding_4) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_padding_4));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_padding_4) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_padding_4));
+
+ /* Checks for struct lmv_desc */
+ LASSERTF((int)sizeof(struct lmv_desc) == 88, "found %lld\n",
+ (long long)(int)sizeof(struct lmv_desc));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_tgt_count) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_tgt_count));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_tgt_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_tgt_count));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_active_tgt_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_active_tgt_count));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_active_tgt_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_active_tgt_count));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_default_stripe_count) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_default_stripe_count));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_default_stripe_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_default_stripe_count));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_pattern) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_pattern));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_pattern) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_pattern));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_default_hash_size) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_default_hash_size));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_default_hash_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_default_hash_size));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_padding_1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_padding_1));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_1));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_padding_2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_padding_2));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_2));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_qos_maxage) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_qos_maxage));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_qos_maxage) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_qos_maxage));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_padding_3) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_padding_3));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_3) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_3));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_padding_4) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_padding_4));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_4) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_4));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_uuid) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_uuid));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_uuid) == 40, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_uuid));
+
+ /* Checks for struct lmv_stripe_md */
+ LASSERTF((int)sizeof(struct lmv_stripe_md) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct lmv_stripe_md));
+ LASSERTF((int)offsetof(struct lmv_stripe_md, mea_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_stripe_md, mea_magic));
+ LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_magic));
+ LASSERTF((int)offsetof(struct lmv_stripe_md, mea_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_stripe_md, mea_count));
+ LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_count));
+ LASSERTF((int)offsetof(struct lmv_stripe_md, mea_master) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_stripe_md, mea_master));
+ LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_master) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_master));
+ LASSERTF((int)offsetof(struct lmv_stripe_md, mea_padding) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_stripe_md, mea_padding));
+ LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_padding));
+ CLASSERT(LOV_MAXPOOLNAME == 16);
+ LASSERTF((int)offsetof(struct lmv_stripe_md, mea_pool_name[16]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_stripe_md, mea_pool_name[16]));
+ LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16]));
+ LASSERTF((int)offsetof(struct lmv_stripe_md, mea_ids[0]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_stripe_md, mea_ids[0]));
+ LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0]) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0]));
+
+ /* Checks for struct lov_desc */
+ LASSERTF((int)sizeof(struct lov_desc) == 88, "found %lld\n",
+ (long long)(int)sizeof(struct lov_desc));
+ LASSERTF((int)offsetof(struct lov_desc, ld_tgt_count) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_tgt_count));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_tgt_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_tgt_count));
+ LASSERTF((int)offsetof(struct lov_desc, ld_active_tgt_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_active_tgt_count));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_active_tgt_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_active_tgt_count));
+ LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_count) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_default_stripe_count));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_count));
+ LASSERTF((int)offsetof(struct lov_desc, ld_pattern) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_pattern));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_pattern) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_pattern));
+ LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_size) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_default_stripe_size));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_size));
+ LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_offset) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset));
+ LASSERTF((int)offsetof(struct lov_desc, ld_padding_0) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_padding_0));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_0) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_0));
+ LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_qos_maxage));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage));
+ LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_padding_1));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_1));
+ LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_padding_2));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_2));
+ LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_uuid));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_uuid));
+ CLASSERT(LOV_DESC_MAGIC == 0xB0CCDE5C);
+
+ /* Checks for struct ldlm_res_id */
+ LASSERTF((int)sizeof(struct ldlm_res_id) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_res_id));
+ CLASSERT(RES_NAME_SIZE == 4);
+ LASSERTF((int)offsetof(struct ldlm_res_id, name[4]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_res_id, name[4]));
+ LASSERTF((int)sizeof(((struct ldlm_res_id *)0)->name[4]) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_res_id *)0)->name[4]));
+
+ /* Checks for struct ldlm_extent */
+ LASSERTF((int)sizeof(struct ldlm_extent) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_extent));
+ LASSERTF((int)offsetof(struct ldlm_extent, start) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_extent, start));
+ LASSERTF((int)sizeof(((struct ldlm_extent *)0)->start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_extent *)0)->start));
+ LASSERTF((int)offsetof(struct ldlm_extent, end) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_extent, end));
+ LASSERTF((int)sizeof(((struct ldlm_extent *)0)->end) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_extent *)0)->end));
+ LASSERTF((int)offsetof(struct ldlm_extent, gid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_extent, gid));
+ LASSERTF((int)sizeof(((struct ldlm_extent *)0)->gid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_extent *)0)->gid));
+
+ /* Checks for struct ldlm_inodebits */
+ LASSERTF((int)sizeof(struct ldlm_inodebits) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_inodebits));
+ LASSERTF((int)offsetof(struct ldlm_inodebits, bits) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_inodebits, bits));
+ LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->bits) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_inodebits *)0)->bits));
+
+ /* Checks for struct ldlm_flock_wire */
+ LASSERTF((int)sizeof(struct ldlm_flock_wire) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_flock_wire));
+ LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_start) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_flock_wire, lfw_start));
+ LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_start));
+ LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_end) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_flock_wire, lfw_end));
+ LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_end) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_end));
+ LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_owner) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_flock_wire, lfw_owner));
+ LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_owner) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_owner));
+ LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_padding) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_flock_wire, lfw_padding));
+ LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_padding));
+ LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_pid) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_flock_wire, lfw_pid));
+ LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_pid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_pid));
+
+ /* Checks for struct ldlm_intent */
+ LASSERTF((int)sizeof(struct ldlm_intent) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_intent));
+ LASSERTF((int)offsetof(struct ldlm_intent, opc) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_intent, opc));
+ LASSERTF((int)sizeof(((struct ldlm_intent *)0)->opc) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_intent *)0)->opc));
+
+ /* Checks for struct ldlm_resource_desc */
+ LASSERTF((int)sizeof(struct ldlm_resource_desc) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_resource_desc));
+ LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_type) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_resource_desc, lr_type));
+ LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_type));
+ LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_padding) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_resource_desc, lr_padding));
+ LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding));
+ LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_name) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_resource_desc, lr_name));
+ LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_name) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_name));
+
+ /* Checks for struct ldlm_lock_desc */
+ LASSERTF((int)sizeof(struct ldlm_lock_desc) == 80, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_lock_desc));
+ LASSERTF((int)offsetof(struct ldlm_lock_desc, l_resource) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_lock_desc, l_resource));
+ LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_resource) == 40, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_resource));
+ LASSERTF((int)offsetof(struct ldlm_lock_desc, l_req_mode) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_lock_desc, l_req_mode));
+ LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_req_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_req_mode));
+ LASSERTF((int)offsetof(struct ldlm_lock_desc, l_granted_mode) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_lock_desc, l_granted_mode));
+ LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_granted_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_granted_mode));
+ LASSERTF((int)offsetof(struct ldlm_lock_desc, l_policy_data) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_lock_desc, l_policy_data));
+ LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_policy_data) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_policy_data));
+
+ /* Checks for struct ldlm_request */
+ LASSERTF((int)sizeof(struct ldlm_request) == 104, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_request));
+ LASSERTF((int)offsetof(struct ldlm_request, lock_flags) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_request, lock_flags));
+ LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_request *)0)->lock_flags));
+ LASSERTF((int)offsetof(struct ldlm_request, lock_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_request, lock_count));
+ LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_request *)0)->lock_count));
+ LASSERTF((int)offsetof(struct ldlm_request, lock_desc) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_request, lock_desc));
+ LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_desc) == 80, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_request *)0)->lock_desc));
+ LASSERTF((int)offsetof(struct ldlm_request, lock_handle) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_request, lock_handle));
+ LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_handle) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_request *)0)->lock_handle));
+
+ /* Checks for struct ldlm_reply */
+ LASSERTF((int)sizeof(struct ldlm_reply) == 112, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_reply));
+ LASSERTF((int)offsetof(struct ldlm_reply, lock_flags) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_reply, lock_flags));
+ LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_flags));
+ LASSERTF((int)offsetof(struct ldlm_reply, lock_padding) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_reply, lock_padding));
+ LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_padding));
+ LASSERTF((int)offsetof(struct ldlm_reply, lock_desc) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_reply, lock_desc));
+ LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_desc) == 80, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_desc));
+ LASSERTF((int)offsetof(struct ldlm_reply, lock_handle) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_reply, lock_handle));
+ LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_handle));
+ LASSERTF((int)offsetof(struct ldlm_reply, lock_policy_res1) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_reply, lock_policy_res1));
+ LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_policy_res1));
+ LASSERTF((int)offsetof(struct ldlm_reply, lock_policy_res2) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_reply, lock_policy_res2));
+ LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_policy_res2));
+
+ /* Checks for struct ost_lvb_v1 */
+ LASSERTF((int)sizeof(struct ost_lvb_v1) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct ost_lvb_v1));
+ LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_size) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb_v1, lvb_size));
+ LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_size));
+ LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_mtime) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb_v1, lvb_mtime));
+ LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_mtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_mtime));
+ LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_atime) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb_v1, lvb_atime));
+ LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_atime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_atime));
+ LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_ctime) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb_v1, lvb_ctime));
+ LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_ctime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_ctime));
+ LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_blocks) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb_v1, lvb_blocks));
+ LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_blocks));
+
+ /* Checks for struct ost_lvb */
+ LASSERTF((int)sizeof(struct ost_lvb) == 56, "found %lld\n",
+ (long long)(int)sizeof(struct ost_lvb));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_size) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_size));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_size));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_mtime) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_mtime));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_mtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_mtime));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_atime) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_atime));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_atime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_atime));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_ctime) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_ctime));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_ctime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_ctime));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_blocks) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_blocks));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_blocks));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_mtime_ns) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_mtime_ns));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_mtime_ns) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_mtime_ns));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_atime_ns) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_atime_ns));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_atime_ns) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_atime_ns));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_ctime_ns) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_ctime_ns));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_ctime_ns) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_ctime_ns));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_padding) == 52, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_padding));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_padding));
+
+ /* Checks for struct lquota_lvb */
+ LASSERTF((int)sizeof(struct lquota_lvb) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct lquota_lvb));
+ LASSERTF((int)offsetof(struct lquota_lvb, lvb_flags) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_lvb, lvb_flags));
+ LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_flags) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_flags));
+ LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_may_rel) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_lvb, lvb_id_may_rel));
+ LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_may_rel) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_may_rel));
+ LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_rel) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_lvb, lvb_id_rel));
+ LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_rel) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_rel));
+ LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_qunit) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_lvb, lvb_id_qunit));
+ LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_qunit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_qunit));
+ LASSERTF((int)offsetof(struct lquota_lvb, lvb_pad1) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_lvb, lvb_pad1));
+ LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_pad1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_pad1));
+ LASSERTF(LQUOTA_FL_EDQUOT == 1, "found %lld\n",
+ (long long)LQUOTA_FL_EDQUOT);
+
+ /* Checks for struct ldlm_gl_lquota_desc */
+ LASSERTF((int)sizeof(struct ldlm_gl_lquota_desc) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_gl_lquota_desc));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_id) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_id));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_id) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_id));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_flags) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_flags));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_flags) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_flags));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_ver) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_ver));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_ver) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_ver));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_hardlimit) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_hardlimit));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_hardlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_hardlimit));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_softlimit) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_softlimit));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_softlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_softlimit));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_time) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_time));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_time));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_pad2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_pad2));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2));
+
+ /* Checks for struct mgs_send_param */
+ LASSERTF((int)sizeof(struct mgs_send_param) == 1024, "found %lld\n",
+ (long long)(int)sizeof(struct mgs_send_param));
+ CLASSERT(MGS_PARAM_MAXLEN == 1024);
+ LASSERTF((int)offsetof(struct mgs_send_param, mgs_param[1024]) == 1024, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_send_param, mgs_param[1024]));
+ LASSERTF((int)sizeof(((struct mgs_send_param *)0)->mgs_param[1024]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_send_param *)0)->mgs_param[1024]));
+
+ /* Checks for struct cfg_marker */
+ LASSERTF((int)sizeof(struct cfg_marker) == 160, "found %lld\n",
+ (long long)(int)sizeof(struct cfg_marker));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_step) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_step));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_step) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_step));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_flags) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_flags));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_flags));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_vers) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_vers));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_vers) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_vers));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_padding) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_padding));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_padding));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_createtime) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_createtime));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_createtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_createtime));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_canceltime) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_canceltime));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_canceltime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_canceltime));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_tgtname) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_tgtname));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_tgtname) == 64, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_tgtname));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_comment) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_comment));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_comment) == 64, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_comment));
+
+ /* Checks for struct llog_logid */
+ LASSERTF((int)sizeof(struct llog_logid) == 20, "found %lld\n",
+ (long long)(int)sizeof(struct llog_logid));
+ LASSERTF((int)offsetof(struct llog_logid, lgl_oi) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid, lgl_oi));
+ LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid *)0)->lgl_oi));
+ LASSERTF((int)offsetof(struct llog_logid, lgl_ogen) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid, lgl_ogen));
+ LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_ogen) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid *)0)->lgl_ogen));
+ CLASSERT(OST_SZ_REC == 274730752);
+ CLASSERT(MDS_UNLINK_REC == 274801668);
+ CLASSERT(MDS_UNLINK64_REC == 275325956);
+ CLASSERT(MDS_SETATTR64_REC == 275325953);
+ CLASSERT(OBD_CFG_REC == 274857984);
+ CLASSERT(LLOG_GEN_REC == 274989056);
+ CLASSERT(CHANGELOG_REC == 275120128);
+ CLASSERT(CHANGELOG_USER_REC == 275185664);
+ CLASSERT(LLOG_HDR_MAGIC == 275010873);
+ CLASSERT(LLOG_LOGID_MAGIC == 275010875);
+
+ /* Checks for struct llog_catid */
+ LASSERTF((int)sizeof(struct llog_catid) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct llog_catid));
+ LASSERTF((int)offsetof(struct llog_catid, lci_logid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_catid, lci_logid));
+ LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_logid) == 20, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_catid *)0)->lci_logid));
+ LASSERTF((int)offsetof(struct llog_catid, lci_padding1) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct llog_catid, lci_padding1));
+ LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding1));
+ LASSERTF((int)offsetof(struct llog_catid, lci_padding2) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct llog_catid, lci_padding2));
+ LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding2));
+ LASSERTF((int)offsetof(struct llog_catid, lci_padding3) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct llog_catid, lci_padding3));
+ LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding3) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding3));
+
+ /* Checks for struct llog_rec_hdr */
+ LASSERTF((int)sizeof(struct llog_rec_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct llog_rec_hdr));
+ LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_len) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_rec_hdr, lrh_len));
+ LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_len) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_len));
+ LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_index) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct llog_rec_hdr, lrh_index));
+ LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_index));
+ LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_type) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct llog_rec_hdr, lrh_type));
+ LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_type));
+ LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_id) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct llog_rec_hdr, lrh_id));
+ LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_id));
+
+ /* Checks for struct llog_rec_tail */
+ LASSERTF((int)sizeof(struct llog_rec_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct llog_rec_tail));
+ LASSERTF((int)offsetof(struct llog_rec_tail, lrt_len) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_rec_tail, lrt_len));
+ LASSERTF((int)sizeof(((struct llog_rec_tail *)0)->lrt_len) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_rec_tail *)0)->lrt_len));
+ LASSERTF((int)offsetof(struct llog_rec_tail, lrt_index) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct llog_rec_tail, lrt_index));
+ LASSERTF((int)sizeof(((struct llog_rec_tail *)0)->lrt_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_rec_tail *)0)->lrt_index));
+
+ /* Checks for struct llog_logid_rec */
+ LASSERTF((int)sizeof(struct llog_logid_rec) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct llog_logid_rec));
+ LASSERTF((int)offsetof(struct llog_logid_rec, lid_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid_rec, lid_hdr));
+ LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_hdr));
+ LASSERTF((int)offsetof(struct llog_logid_rec, lid_id) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid_rec, lid_id));
+ LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_id) == 20, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_id));
+ LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding1) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid_rec, lid_padding1));
+ LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding1));
+ LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding2) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid_rec, lid_padding2));
+ LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding2));
+ LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding3) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid_rec, lid_padding3));
+ LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding3));
+ LASSERTF((int)offsetof(struct llog_logid_rec, lid_tail) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid_rec, lid_tail));
+ LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_tail));
+
+ /* Checks for struct llog_unlink_rec */
+ LASSERTF((int)sizeof(struct llog_unlink_rec) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct llog_unlink_rec));
+ LASSERTF((int)offsetof(struct llog_unlink_rec, lur_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink_rec, lur_hdr));
+ LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_hdr));
+ LASSERTF((int)offsetof(struct llog_unlink_rec, lur_oid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink_rec, lur_oid));
+ LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_oid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_oid));
+ LASSERTF((int)offsetof(struct llog_unlink_rec, lur_oseq) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink_rec, lur_oseq));
+ LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_oseq) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_oseq));
+ LASSERTF((int)offsetof(struct llog_unlink_rec, lur_count) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink_rec, lur_count));
+ LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_count));
+ LASSERTF((int)offsetof(struct llog_unlink_rec, lur_tail) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink_rec, lur_tail));
+ LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_tail));
+ /* Checks for struct llog_unlink64_rec */
+ LASSERTF((int)sizeof(struct llog_unlink64_rec) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct llog_unlink64_rec));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_hdr));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_hdr));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_fid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_fid));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_fid));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_count) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_count));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_count));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_tail) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_tail));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_tail));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding1) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding1));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding1));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding2) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding2));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding2));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding3) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding3));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding3));
+
+ /* Checks for struct llog_setattr64_rec */
+ LASSERTF((int)sizeof(struct llog_setattr64_rec) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct llog_setattr64_rec));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_hdr));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_hdr));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_oi) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_oi));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_oi));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_uid) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_uid));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_uid_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_uid_h));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid_h));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_gid) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_gid_h) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid_h));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_padding) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_padding));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_tail) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_tail));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail));
+
+ /* Checks for struct llog_size_change_rec */
+ LASSERTF((int)sizeof(struct llog_size_change_rec) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct llog_size_change_rec));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_hdr));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_hdr));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_fid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_fid));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_ioepoch) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_ioepoch));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding1) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding1));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding1));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding2) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding2));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding2));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding3) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding3));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding3));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_tail) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_tail));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail));
+
+ /* Checks for struct changelog_rec */
+ LASSERTF((int)sizeof(struct changelog_rec) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct changelog_rec));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_namelen) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_namelen));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_namelen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_namelen));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_flags) == 2, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_flags));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_flags) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_flags));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_type) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_type));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_type));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_index) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_index));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_index) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_index));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_prev) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_prev));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_prev) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_prev));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_time) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_time));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_time));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_tfid) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_tfid));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_tfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_tfid));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_pfid) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_pfid));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_pfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_pfid));
+
+ /* Checks for struct changelog_ext_rec */
+ LASSERTF((int)sizeof(struct changelog_ext_rec) == 96, "found %lld\n",
+ (long long)(int)sizeof(struct changelog_ext_rec));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_namelen) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_namelen));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_flags) == 2, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_flags));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_flags) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_flags));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_type) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_type));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_type));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_index) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_index));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_index) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_index));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_prev) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_prev));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_prev) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_prev));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_time) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_time));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_time));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_tfid) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_tfid));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_pfid) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_pfid));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_sfid) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_sfid));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_spfid) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_spfid));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid));
+
+ /* Checks for struct changelog_setinfo */
+ LASSERTF((int)sizeof(struct changelog_setinfo) == 12, "found %lld\n",
+ (long long)(int)sizeof(struct changelog_setinfo));
+ LASSERTF((int)offsetof(struct changelog_setinfo, cs_recno) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_setinfo, cs_recno));
+ LASSERTF((int)sizeof(((struct changelog_setinfo *)0)->cs_recno) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_setinfo *)0)->cs_recno));
+ LASSERTF((int)offsetof(struct changelog_setinfo, cs_id) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_setinfo, cs_id));
+ LASSERTF((int)sizeof(((struct changelog_setinfo *)0)->cs_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_setinfo *)0)->cs_id));
+
+ /* Checks for struct llog_changelog_rec */
+ LASSERTF((int)sizeof(struct llog_changelog_rec) == 88, "found %lld\n",
+ (long long)(int)sizeof(struct llog_changelog_rec));
+ LASSERTF((int)offsetof(struct llog_changelog_rec, cr_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_rec, cr_hdr));
+ LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr));
+ LASSERTF((int)offsetof(struct llog_changelog_rec, cr) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_rec, cr));
+ LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr) == 64, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr));
+ LASSERTF((int)offsetof(struct llog_changelog_rec, cr_tail) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_rec, cr_tail));
+ LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_tail));
+
+ /* Checks for struct llog_changelog_user_rec */
+ LASSERTF((int)sizeof(struct llog_changelog_user_rec) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct llog_changelog_user_rec));
+ LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_user_rec, cur_hdr));
+ LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_hdr));
+ LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_id) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_user_rec, cur_id));
+ LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id));
+ LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_padding) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_user_rec, cur_padding));
+ LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding));
+ LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_endrec) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_user_rec, cur_endrec));
+ LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec));
+ LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_tail) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_user_rec, cur_tail));
+ LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_tail));
+
+ /* Checks for struct llog_gen */
+ LASSERTF((int)sizeof(struct llog_gen) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct llog_gen));
+ LASSERTF((int)offsetof(struct llog_gen, mnt_cnt) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_gen, mnt_cnt));
+ LASSERTF((int)sizeof(((struct llog_gen *)0)->mnt_cnt) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_gen *)0)->mnt_cnt));
+ LASSERTF((int)offsetof(struct llog_gen, conn_cnt) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct llog_gen, conn_cnt));
+ LASSERTF((int)sizeof(((struct llog_gen *)0)->conn_cnt) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_gen *)0)->conn_cnt));
+
+ /* Checks for struct llog_gen_rec */
+ LASSERTF((int)sizeof(struct llog_gen_rec) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct llog_gen_rec));
+ LASSERTF((int)offsetof(struct llog_gen_rec, lgr_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_gen_rec, lgr_hdr));
+ LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_hdr));
+ LASSERTF((int)offsetof(struct llog_gen_rec, lgr_gen) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_gen_rec, lgr_gen));
+ LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_gen) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_gen));
+ LASSERTF((int)offsetof(struct llog_gen_rec, lgr_tail) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct llog_gen_rec, lgr_tail));
+ LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_tail));
+
+ /* Checks for struct llog_log_hdr */
+ LASSERTF((int)sizeof(struct llog_log_hdr) == 8192, "found %lld\n",
+ (long long)(int)sizeof(struct llog_log_hdr));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_hdr));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_hdr));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_timestamp) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_timestamp));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_timestamp) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_timestamp));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_count) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_count));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_count));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_bitmap_offset) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_bitmap_offset));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap_offset) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap_offset));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_size) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_size));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_size) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_size));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_flags) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_flags));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_flags));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_cat_idx) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_cat_idx));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_cat_idx) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_cat_idx));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_tgtuuid) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_tgtuuid));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_tgtuuid) == 40, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_tgtuuid));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_reserved) == 84, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_reserved));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_reserved) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_reserved));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_bitmap) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_bitmap));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap) == 8096, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_tail) == 8184, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_tail));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_tail));
+
+ /* Checks for struct llog_cookie */
+ LASSERTF((int)sizeof(struct llog_cookie) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct llog_cookie));
+ LASSERTF((int)offsetof(struct llog_cookie, lgc_lgl) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_cookie, lgc_lgl));
+ LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_lgl) == 20, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_lgl));
+ LASSERTF((int)offsetof(struct llog_cookie, lgc_subsys) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct llog_cookie, lgc_subsys));
+ LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_subsys) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_subsys));
+ LASSERTF((int)offsetof(struct llog_cookie, lgc_index) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct llog_cookie, lgc_index));
+ LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_index));
+ LASSERTF((int)offsetof(struct llog_cookie, lgc_padding) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct llog_cookie, lgc_padding));
+ LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_padding));
+
+ /* Checks for struct llogd_body */
+ LASSERTF((int)sizeof(struct llogd_body) == 48, "found %lld\n",
+ (long long)(int)sizeof(struct llogd_body));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_logid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_logid));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_logid) == 20, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_logid));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_ctxt_idx) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_ctxt_idx));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_ctxt_idx) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_ctxt_idx));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_llh_flags) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_llh_flags));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_llh_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_llh_flags));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_index) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_index));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_index));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_saved_index) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_saved_index));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_saved_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_saved_index));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_len) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_len));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_len) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_len));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_cur_offset) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_cur_offset));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_cur_offset) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_cur_offset));
+ CLASSERT(LLOG_ORIGIN_HANDLE_CREATE == 501);
+ CLASSERT(LLOG_ORIGIN_HANDLE_NEXT_BLOCK == 502);
+ CLASSERT(LLOG_ORIGIN_HANDLE_READ_HEADER == 503);
+ CLASSERT(LLOG_ORIGIN_HANDLE_WRITE_REC == 504);
+ CLASSERT(LLOG_ORIGIN_HANDLE_CLOSE == 505);
+ CLASSERT(LLOG_ORIGIN_CONNECT == 506);
+ CLASSERT(LLOG_CATINFO == 507);
+ CLASSERT(LLOG_ORIGIN_HANDLE_PREV_BLOCK == 508);
+ CLASSERT(LLOG_ORIGIN_HANDLE_DESTROY == 509);
+ CLASSERT(LLOG_FIRST_OPC == 501);
+ CLASSERT(LLOG_LAST_OPC == 510);
+
+ /* Checks for struct llogd_conn_body */
+ LASSERTF((int)sizeof(struct llogd_conn_body) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct llogd_conn_body));
+ LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_gen) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_conn_body, lgdc_gen));
+ LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_gen) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_gen));
+ LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_logid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_conn_body, lgdc_logid));
+ LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_logid) == 20, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_logid));
+ LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx));
+ LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx));
+
+ /* Checks for struct ll_fiemap_info_key */
+ LASSERTF((int)sizeof(struct ll_fiemap_info_key) == 248, "found %lld\n",
+ (long long)(int)sizeof(struct ll_fiemap_info_key));
+ LASSERTF((int)offsetof(struct ll_fiemap_info_key, name[8]) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_info_key, name[8]));
+ LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->name[8]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->name[8]));
+ LASSERTF((int)offsetof(struct ll_fiemap_info_key, oa) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_info_key, oa));
+ LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->oa) == 208, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->oa));
+ LASSERTF((int)offsetof(struct ll_fiemap_info_key, fiemap) == 216, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_info_key, fiemap));
+ LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->fiemap) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->fiemap));
+
+ /* Checks for struct quota_body */
+ LASSERTF((int)sizeof(struct quota_body) == 112, "found %lld\n",
+ (long long)(int)sizeof(struct quota_body));
+ LASSERTF((int)offsetof(struct quota_body, qb_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_fid));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_fid));
+ LASSERTF((int)offsetof(struct quota_body, qb_id) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_id));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_id) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_id));
+ LASSERTF((int)offsetof(struct quota_body, qb_flags) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_flags));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_flags));
+ LASSERTF((int)offsetof(struct quota_body, qb_padding) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_padding));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_padding));
+ LASSERTF((int)offsetof(struct quota_body, qb_count) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_count));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_count) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_count));
+ LASSERTF((int)offsetof(struct quota_body, qb_usage) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_usage));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_usage) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_usage));
+ LASSERTF((int)offsetof(struct quota_body, qb_slv_ver) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_slv_ver));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_slv_ver) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_slv_ver));
+ LASSERTF((int)offsetof(struct quota_body, qb_lockh) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_lockh));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_lockh) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_lockh));
+ LASSERTF((int)offsetof(struct quota_body, qb_glb_lockh) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_glb_lockh));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_glb_lockh) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_glb_lockh));
+ LASSERTF((int)offsetof(struct quota_body, qb_padding1[4]) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_padding1[4]));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_padding1[4]) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_padding1[4]));
+
+ /* Checks for struct mgs_target_info */
+ LASSERTF((int)sizeof(struct mgs_target_info) == 4544, "found %lld\n",
+ (long long)(int)sizeof(struct mgs_target_info));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_lustre_ver) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_lustre_ver));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_lustre_ver) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_lustre_ver));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_stripe_index) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_stripe_index));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_stripe_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_stripe_index));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_config_ver) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_config_ver));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_config_ver) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_config_ver));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_flags) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_flags));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_flags));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_nid_count) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_nid_count));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nid_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nid_count));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_instance) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_instance));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_instance) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_instance));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_fsname) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_fsname));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_fsname) == 64, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_fsname));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_svname) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_svname));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_svname) == 64, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_svname));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_uuid) == 152, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_uuid));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_uuid) == 40, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_uuid));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_nids) == 192, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_nids));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nids) == 256, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nids));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_params) == 448, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_params));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_params) == 4096, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_params));
+
+ /* Checks for struct lustre_capa */
+ LASSERTF((int)sizeof(struct lustre_capa) == 120, "found %lld\n",
+ (long long)(int)sizeof(struct lustre_capa));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_fid));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_fid));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_opc) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_opc));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_opc) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_opc));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_uid) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_uid));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_uid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_uid));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_gid) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_gid));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_gid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_gid));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_flags) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_flags));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_flags));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_keyid) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_keyid));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_keyid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_keyid));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_timeout) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_timeout));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_timeout) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_timeout));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_expiry) == 52, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_expiry));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_expiry) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_expiry));
+ CLASSERT(CAPA_HMAC_MAX_LEN == 64);
+ LASSERTF((int)offsetof(struct lustre_capa, lc_hmac[64]) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_hmac[64]));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_hmac[64]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_hmac[64]));
+
+ /* Checks for struct lustre_capa_key */
+ LASSERTF((int)sizeof(struct lustre_capa_key) == 72, "found %lld\n",
+ (long long)(int)sizeof(struct lustre_capa_key));
+ LASSERTF((int)offsetof(struct lustre_capa_key, lk_seq) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa_key, lk_seq));
+ LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_seq) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_seq));
+ LASSERTF((int)offsetof(struct lustre_capa_key, lk_keyid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa_key, lk_keyid));
+ LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_keyid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_keyid));
+ LASSERTF((int)offsetof(struct lustre_capa_key, lk_padding) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa_key, lk_padding));
+ LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_padding));
+ CLASSERT(CAPA_HMAC_KEY_MAX_LEN == 56);
+ LASSERTF((int)offsetof(struct lustre_capa_key, lk_key[56]) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa_key, lk_key[56]));
+ LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_key[56]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_key[56]));
+
+ /* Checks for struct getinfo_fid2path */
+ LASSERTF((int)sizeof(struct getinfo_fid2path) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct getinfo_fid2path));
+ LASSERTF((int)offsetof(struct getinfo_fid2path, gf_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct getinfo_fid2path, gf_fid));
+ LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_fid));
+ LASSERTF((int)offsetof(struct getinfo_fid2path, gf_recno) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct getinfo_fid2path, gf_recno));
+ LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_recno) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_recno));
+ LASSERTF((int)offsetof(struct getinfo_fid2path, gf_linkno) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct getinfo_fid2path, gf_linkno));
+ LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_linkno) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_linkno));
+ LASSERTF((int)offsetof(struct getinfo_fid2path, gf_pathlen) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct getinfo_fid2path, gf_pathlen));
+ LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_pathlen) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_pathlen));
+ LASSERTF((int)offsetof(struct getinfo_fid2path, gf_path[0]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct getinfo_fid2path, gf_path[0]));
+ LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0]));
+
+ /* Checks for struct ll_user_fiemap */
+ LASSERTF((int)sizeof(struct ll_user_fiemap) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct ll_user_fiemap));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_start) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_start));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_start));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_length) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_length));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_length) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_length));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_flags) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_flags));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_flags));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_mapped_extents) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_mapped_extents));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_mapped_extents) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_mapped_extents));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_extent_count) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_extent_count));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_extent_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_extent_count));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_reserved) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_reserved));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_reserved) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_reserved));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_extents) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_extents));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_extents) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_extents));
+ CLASSERT(FIEMAP_FLAG_SYNC == 0x00000001);
+ CLASSERT(FIEMAP_FLAG_XATTR == 0x00000002);
+ CLASSERT(FIEMAP_FLAG_DEVICE_ORDER == 0x40000000);
+
+ /* Checks for struct ll_fiemap_extent */
+ LASSERTF((int)sizeof(struct ll_fiemap_extent) == 56, "found %lld\n",
+ (long long)(int)sizeof(struct ll_fiemap_extent));
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_logical) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_extent, fe_logical));
+ LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_logical) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_logical));
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_physical) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_extent, fe_physical));
+ LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_physical) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_physical));
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_length) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_extent, fe_length));
+ LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_length) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_length));
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_flags) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_extent, fe_flags));
+ LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags));
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_device) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_extent, fe_device));
+ LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_device) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_device));
+ CLASSERT(FIEMAP_EXTENT_LAST == 0x00000001);
+ CLASSERT(FIEMAP_EXTENT_UNKNOWN == 0x00000002);
+ CLASSERT(FIEMAP_EXTENT_DELALLOC == 0x00000004);
+ CLASSERT(FIEMAP_EXTENT_ENCODED == 0x00000008);
+ CLASSERT(FIEMAP_EXTENT_DATA_ENCRYPTED == 0x00000080);
+ CLASSERT(FIEMAP_EXTENT_NOT_ALIGNED == 0x00000100);
+ CLASSERT(FIEMAP_EXTENT_DATA_INLINE == 0x00000200);
+ CLASSERT(FIEMAP_EXTENT_DATA_TAIL == 0x00000400);
+ CLASSERT(FIEMAP_EXTENT_UNWRITTEN == 0x00000800);
+ CLASSERT(FIEMAP_EXTENT_MERGED == 0x00001000);
+ CLASSERT(FIEMAP_EXTENT_NO_DIRECT == 0x40000000);
+ CLASSERT(FIEMAP_EXTENT_NET == 0x80000000);
+
+ /* Checks for type posix_acl_xattr_entry */
+ LASSERTF((int)sizeof(posix_acl_xattr_entry) == 8, "found %lld\n",
+ (long long)(int)sizeof(posix_acl_xattr_entry));
+ LASSERTF((int)offsetof(posix_acl_xattr_entry, e_tag) == 0, "found %lld\n",
+ (long long)(int)offsetof(posix_acl_xattr_entry, e_tag));
+ LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_tag) == 2, "found %lld\n",
+ (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_tag));
+ LASSERTF((int)offsetof(posix_acl_xattr_entry, e_perm) == 2, "found %lld\n",
+ (long long)(int)offsetof(posix_acl_xattr_entry, e_perm));
+ LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_perm) == 2, "found %lld\n",
+ (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_perm));
+ LASSERTF((int)offsetof(posix_acl_xattr_entry, e_id) == 4, "found %lld\n",
+ (long long)(int)offsetof(posix_acl_xattr_entry, e_id));
+ LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_id));
+
+ /* Checks for type posix_acl_xattr_header */
+ LASSERTF((int)sizeof(posix_acl_xattr_header) == 4, "found %lld\n",
+ (long long)(int)sizeof(posix_acl_xattr_header));
+ LASSERTF((int)offsetof(posix_acl_xattr_header, a_version) == 0, "found %lld\n",
+ (long long)(int)offsetof(posix_acl_xattr_header, a_version));
+ LASSERTF((int)sizeof(((posix_acl_xattr_header *)0)->a_version) == 4, "found %lld\n",
+ (long long)(int)sizeof(((posix_acl_xattr_header *)0)->a_version));
+ LASSERTF((int)offsetof(posix_acl_xattr_header, a_entries) == 4, "found %lld\n",
+ (long long)(int)offsetof(posix_acl_xattr_header, a_entries));
+ LASSERTF((int)sizeof(((posix_acl_xattr_header *)0)->a_entries) == 0, "found %lld\n",
+ (long long)(int)sizeof(((posix_acl_xattr_header *)0)->a_entries));
+
+ /* Checks for struct link_ea_header */
+ LASSERTF((int)sizeof(struct link_ea_header) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct link_ea_header));
+ LASSERTF((int)offsetof(struct link_ea_header, leh_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_header, leh_magic));
+ LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_header *)0)->leh_magic));
+ LASSERTF((int)offsetof(struct link_ea_header, leh_reccount) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_header, leh_reccount));
+ LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_reccount) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_header *)0)->leh_reccount));
+ LASSERTF((int)offsetof(struct link_ea_header, leh_len) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_header, leh_len));
+ LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_len) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_header *)0)->leh_len));
+ LASSERTF((int)offsetof(struct link_ea_header, padding1) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_header, padding1));
+ LASSERTF((int)sizeof(((struct link_ea_header *)0)->padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_header *)0)->padding1));
+ LASSERTF((int)offsetof(struct link_ea_header, padding2) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_header, padding2));
+ LASSERTF((int)sizeof(((struct link_ea_header *)0)->padding2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_header *)0)->padding2));
+ CLASSERT(LINK_EA_MAGIC == 0x11EAF1DFUL);
+
+ /* Checks for struct link_ea_entry */
+ LASSERTF((int)sizeof(struct link_ea_entry) == 18, "found %lld\n",
+ (long long)(int)sizeof(struct link_ea_entry));
+ LASSERTF((int)offsetof(struct link_ea_entry, lee_reclen) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_entry, lee_reclen));
+ LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_reclen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_reclen));
+ LASSERTF((int)offsetof(struct link_ea_entry, lee_parent_fid) == 2, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_entry, lee_parent_fid));
+ LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid));
+ LASSERTF((int)offsetof(struct link_ea_entry, lee_name) == 18, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_entry, lee_name));
+ LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
+
+ /* Checks for struct layout_intent */
+ LASSERTF((int)sizeof(struct layout_intent) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct layout_intent));
+ LASSERTF((int)offsetof(struct layout_intent, li_opc) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct layout_intent, li_opc));
+ LASSERTF((int)sizeof(((struct layout_intent *)0)->li_opc) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct layout_intent *)0)->li_opc));
+ LASSERTF((int)offsetof(struct layout_intent, li_flags) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct layout_intent, li_flags));
+ LASSERTF((int)sizeof(((struct layout_intent *)0)->li_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct layout_intent *)0)->li_flags));
+ LASSERTF((int)offsetof(struct layout_intent, li_start) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct layout_intent, li_start));
+ LASSERTF((int)sizeof(((struct layout_intent *)0)->li_start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct layout_intent *)0)->li_start));
+ LASSERTF((int)offsetof(struct layout_intent, li_end) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct layout_intent, li_end));
+ LASSERTF((int)sizeof(((struct layout_intent *)0)->li_end) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct layout_intent *)0)->li_end));
+ LASSERTF(LAYOUT_INTENT_ACCESS == 0, "found %lld\n",
+ (long long)LAYOUT_INTENT_ACCESS);
+ LASSERTF(LAYOUT_INTENT_READ == 1, "found %lld\n",
+ (long long)LAYOUT_INTENT_READ);
+ LASSERTF(LAYOUT_INTENT_WRITE == 2, "found %lld\n",
+ (long long)LAYOUT_INTENT_WRITE);
+ LASSERTF(LAYOUT_INTENT_GLIMPSE == 3, "found %lld\n",
+ (long long)LAYOUT_INTENT_GLIMPSE);
+ LASSERTF(LAYOUT_INTENT_TRUNC == 4, "found %lld\n",
+ (long long)LAYOUT_INTENT_TRUNC);
+ LASSERTF(LAYOUT_INTENT_RELEASE == 5, "found %lld\n",
+ (long long)LAYOUT_INTENT_RELEASE);
+ LASSERTF(LAYOUT_INTENT_RESTORE == 6, "found %lld\n",
+ (long long)LAYOUT_INTENT_RESTORE);
+
+ /* Checks for struct hsm_action_item */
+ LASSERTF((int)sizeof(struct hsm_action_item) == 72, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_action_item));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_len) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_len));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_len) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_len));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_action) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_action));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_action) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_action));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_fid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_fid));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_fid));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_dfid) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_dfid));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_dfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_dfid));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_extent) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_extent));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_extent) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_extent));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_cookie) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_cookie));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_cookie) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_cookie));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_gid) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_gid));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_gid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_gid));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_data) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_data));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_data) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_data));
+
+ /* Checks for struct hsm_action_list */
+ LASSERTF((int)sizeof(struct hsm_action_list) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_action_list));
+ LASSERTF((int)offsetof(struct hsm_action_list, hal_version) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, hal_version));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_version) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_version));
+ LASSERTF((int)offsetof(struct hsm_action_list, hal_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, hal_count));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_count));
+ LASSERTF((int)offsetof(struct hsm_action_list, hal_compound_id) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, hal_compound_id));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_compound_id) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_compound_id));
+ LASSERTF((int)offsetof(struct hsm_action_list, hal_flags) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, hal_flags));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_flags) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_flags));
+ LASSERTF((int)offsetof(struct hsm_action_list, hal_archive_id) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, hal_archive_id));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_archive_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_archive_id));
+ LASSERTF((int)offsetof(struct hsm_action_list, padding1) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, padding1));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->padding1));
+ LASSERTF((int)offsetof(struct hsm_action_list, hal_fsname) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, hal_fsname));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_fsname) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_fsname));
+
+ /* Checks for struct hsm_progress */
+ LASSERTF((int)sizeof(struct hsm_progress) == 48, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_progress));
+ LASSERTF((int)offsetof(struct hsm_progress, hp_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress, hp_fid));
+ LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress *)0)->hp_fid));
+ LASSERTF((int)offsetof(struct hsm_progress, hp_cookie) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress, hp_cookie));
+ LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_cookie) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress *)0)->hp_cookie));
+ LASSERTF((int)offsetof(struct hsm_progress, hp_extent) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress, hp_extent));
+ LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_extent) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress *)0)->hp_extent));
+ LASSERTF((int)offsetof(struct hsm_progress, hp_flags) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress, hp_flags));
+ LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_flags) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress *)0)->hp_flags));
+ LASSERTF((int)offsetof(struct hsm_progress, hp_errval) == 42, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress, hp_errval));
+ LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_errval) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress *)0)->hp_errval));
+ LASSERTF((int)offsetof(struct hsm_progress, padding) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress, padding));
+ LASSERTF((int)sizeof(((struct hsm_progress *)0)->padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress *)0)->padding));
+ LASSERTF(HP_FLAG_COMPLETED == 0x01, "found 0x%.8x\n",
+ HP_FLAG_COMPLETED);
+ LASSERTF(HP_FLAG_RETRY == 0x02, "found 0x%.8x\n",
+ HP_FLAG_RETRY);
+
+ LASSERTF((int)offsetof(struct hsm_copy, hc_data_version) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_copy, hc_data_version));
+ LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_data_version) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_copy *)0)->hc_data_version));
+ LASSERTF((int)offsetof(struct hsm_copy, hc_flags) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_copy, hc_flags));
+ LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_flags) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_copy *)0)->hc_flags));
+ LASSERTF((int)offsetof(struct hsm_copy, hc_errval) == 10, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_copy, hc_errval));
+ LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_errval) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_copy *)0)->hc_errval));
+ LASSERTF((int)offsetof(struct hsm_copy, padding) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_copy, padding));
+ LASSERTF((int)sizeof(((struct hsm_copy *)0)->padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_copy *)0)->padding));
+ LASSERTF((int)offsetof(struct hsm_copy, hc_hai) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_copy, hc_hai));
+ LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_hai) == 72, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_copy *)0)->hc_hai));
+
+ /* Checks for struct hsm_progress_kernel */
+ LASSERTF((int)sizeof(struct hsm_progress_kernel) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_progress_kernel));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_fid));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_fid));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_cookie) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_cookie));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_cookie) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_cookie));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_extent) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_extent));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_extent) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_extent));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_flags) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_flags));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_flags) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_flags));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_errval) == 42, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_errval));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_errval) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_errval));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_padding1) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_padding1));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding1));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_data_version) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_data_version));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_data_version) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_data_version));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_padding2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_padding2));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding2));
+
+ /* Checks for struct hsm_user_item */
+ LASSERTF((int)sizeof(struct hsm_user_item) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_user_item));
+ LASSERTF((int)offsetof(struct hsm_user_item, hui_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_item, hui_fid));
+ LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_fid));
+ LASSERTF((int)offsetof(struct hsm_user_item, hui_extent) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_item, hui_extent));
+ LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_extent) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_extent));
+
+ /* Checks for struct hsm_user_state */
+ LASSERTF((int)sizeof(struct hsm_user_state) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_user_state));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_states) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_states));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_states) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_states));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_archive_id) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_archive_id));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_archive_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_archive_id));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_state) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_state));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_action) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_action));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_location) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_location));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location));
+
+ /* Checks for struct hsm_state_set */
+ LASSERTF((int)sizeof(struct hsm_state_set) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_state_set));
+ LASSERTF((int)offsetof(struct hsm_state_set, hss_valid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_state_set, hss_valid));
+ LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_valid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_valid));
+ LASSERTF((int)offsetof(struct hsm_state_set, hss_archive_id) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_state_set, hss_archive_id));
+ LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_archive_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_archive_id));
+ LASSERTF((int)offsetof(struct hsm_state_set, hss_setmask) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_state_set, hss_setmask));
+ LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_setmask) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_setmask));
+ LASSERTF((int)offsetof(struct hsm_state_set, hss_clearmask) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_state_set, hss_clearmask));
+ LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_clearmask) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_clearmask));
+
+ /* Checks for struct hsm_current_action */
+ LASSERTF((int)sizeof(struct hsm_current_action) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_current_action));
+ LASSERTF((int)offsetof(struct hsm_current_action, hca_state) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_current_action, hca_state));
+ LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_state) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_state));
+ LASSERTF((int)offsetof(struct hsm_current_action, hca_action) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_current_action, hca_action));
+ LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_action) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_action));
+ LASSERTF((int)offsetof(struct hsm_current_action, hca_location) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_current_action, hca_location));
+ LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_location) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_location));
+
+ /* Checks for struct hsm_request */
+ LASSERTF((int)sizeof(struct hsm_request) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_request));
+ LASSERTF((int)offsetof(struct hsm_request, hr_action) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_request, hr_action));
+ LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_action) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_request *)0)->hr_action));
+ LASSERTF((int)offsetof(struct hsm_request, hr_archive_id) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_request, hr_archive_id));
+ LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_archive_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_request *)0)->hr_archive_id));
+ LASSERTF((int)offsetof(struct hsm_request, hr_flags) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_request, hr_flags));
+ LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_flags) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_request *)0)->hr_flags));
+ LASSERTF((int)offsetof(struct hsm_request, hr_itemcount) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_request, hr_itemcount));
+ LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_itemcount) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_request *)0)->hr_itemcount));
+ LASSERTF((int)offsetof(struct hsm_request, hr_data_len) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_request, hr_data_len));
+ LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_data_len) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_request *)0)->hr_data_len));
+ LASSERTF(HSM_FORCE_ACTION == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)HSM_FORCE_ACTION);
+ LASSERTF(HSM_GHOST_COPY == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)HSM_GHOST_COPY);
+
+ /* Checks for struct hsm_user_request */
+ LASSERTF((int)sizeof(struct hsm_user_request) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_user_request));
+ LASSERTF((int)offsetof(struct hsm_user_request, hur_request) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_request, hur_request));
+ LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_request) == 24, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_request));
+ LASSERTF((int)offsetof(struct hsm_user_request, hur_user_item) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_request, hur_user_item));
+ LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_user_item) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_user_item));
+
+ /* Checks for struct update_buf */
+ LASSERTF((int)sizeof(struct update_buf) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct update_buf));
+ LASSERTF((int)offsetof(struct update_buf, ub_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct update_buf, ub_magic));
+ LASSERTF((int)sizeof(((struct update_buf *)0)->ub_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct update_buf *)0)->ub_magic));
+ LASSERTF((int)offsetof(struct update_buf, ub_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct update_buf, ub_count));
+ LASSERTF((int)sizeof(((struct update_buf *)0)->ub_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct update_buf *)0)->ub_count));
+ LASSERTF((int)offsetof(struct update_buf, ub_bufs) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct update_buf, ub_bufs));
+ LASSERTF((int)sizeof(((struct update_buf *)0)->ub_bufs) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct update_buf *)0)->ub_bufs));
+
+ /* Checks for struct update_reply */
+ LASSERTF((int)sizeof(struct update_reply) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct update_reply));
+ LASSERTF((int)offsetof(struct update_reply, ur_version) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct update_reply, ur_version));
+ LASSERTF((int)sizeof(((struct update_reply *)0)->ur_version) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct update_reply *)0)->ur_version));
+ LASSERTF((int)offsetof(struct update_reply, ur_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct update_reply, ur_count));
+ LASSERTF((int)sizeof(((struct update_reply *)0)->ur_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct update_reply *)0)->ur_count));
+ LASSERTF((int)offsetof(struct update_reply, ur_lens) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct update_reply, ur_lens));
+ LASSERTF((int)sizeof(((struct update_reply *)0)->ur_lens) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct update_reply *)0)->ur_lens));
+
+ /* Checks for struct update */
+ LASSERTF((int)sizeof(struct update) == 56, "found %lld\n",
+ (long long)(int)sizeof(struct update));
+ LASSERTF((int)offsetof(struct update, u_type) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct update, u_type));
+ LASSERTF((int)sizeof(((struct update *)0)->u_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct update *)0)->u_type));
+ LASSERTF((int)offsetof(struct update, u_batchid) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct update, u_batchid));
+ LASSERTF((int)sizeof(((struct update *)0)->u_batchid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct update *)0)->u_batchid));
+ LASSERTF((int)offsetof(struct update, u_fid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct update, u_fid));
+ LASSERTF((int)sizeof(((struct update *)0)->u_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct update *)0)->u_fid));
+ LASSERTF((int)offsetof(struct update, u_lens) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct update, u_lens));
+ LASSERTF((int)sizeof(((struct update *)0)->u_lens) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct update *)0)->u_lens));
+ LASSERTF((int)offsetof(struct update, u_bufs) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct update, u_bufs));
+ LASSERTF((int)sizeof(((struct update *)0)->u_bufs) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct update *)0)->u_bufs));
+}
diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c
index dd98cb1468a4..46eabd0e426a 100644
--- a/drivers/staging/netlogic/xlr_net.c
+++ b/drivers/staging/netlogic/xlr_net.c
@@ -896,7 +896,7 @@ static int xlr_setup_mdio(struct xlr_net_priv *priv,
return err;
}
- pr_info("Registerd mdio bus id : %s\n", priv->mii_bus->id);
+ pr_info("Registered mdio bus id : %s\n", priv->mii_bus->id);
err = xlr_mii_probe(priv);
if (err) {
mdiobus_free(priv->mii_bus);
@@ -1020,12 +1020,11 @@ static int xlr_net_probe(struct platform_device *pdev)
goto err_gmac;
}
- ndev->base_addr = (unsigned long) devm_request_and_ioremap
+ ndev->base_addr = (unsigned long) devm_ioremap_resource
(&pdev->dev, res);
- if (!ndev->base_addr) {
- dev_err(&pdev->dev,
- "devm_request_and_ioremap failed\n");
- return -EBUSY;
+ if (IS_ERR_VALUE(ndev->base_addr)) {
+ err = ndev->base_addr;
+ goto err_gmac;
}
res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
diff --git a/drivers/staging/nvec/nvec.c b/drivers/staging/nvec/nvec.c
index 197c393c4ca7..10393da315d7 100644
--- a/drivers/staging/nvec/nvec.c
+++ b/drivers/staging/nvec/nvec.c
@@ -33,7 +33,6 @@
#include <linux/mfd/core.h>
#include <linux/mutex.h>
#include <linux/notifier.h>
-#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
@@ -772,11 +771,31 @@ static void nvec_power_off(void)
nvec_write_async(nvec_power_handle, ap_pwr_down, 2);
}
+/*
+ * Parse common device tree data
+ */
+static int nvec_i2c_parse_dt_pdata(struct nvec_chip *nvec)
+{
+ nvec->gpio = of_get_named_gpio(nvec->dev->of_node, "request-gpios", 0);
+
+ if (nvec->gpio < 0) {
+ dev_err(nvec->dev, "no gpio specified");
+ return -ENODEV;
+ }
+
+ if (of_property_read_u32(nvec->dev->of_node, "slave-addr",
+ &nvec->i2c_addr)) {
+ dev_err(nvec->dev, "no i2c address specified");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
static int tegra_nvec_probe(struct platform_device *pdev)
{
int err, ret;
struct clk *i2c_clk;
- struct nvec_platform_data *pdata = pdev->dev.platform_data;
struct nvec_chip *nvec;
struct nvec_msg *msg;
struct resource *res;
@@ -785,6 +804,11 @@ static int tegra_nvec_probe(struct platform_device *pdev)
unmute_speakers[] = { NVEC_OEM0, 0x10, 0x59, 0x95 },
enable_event[7] = { NVEC_SYS, CNF_EVENT_REPORTING, true };
+ if(!pdev->dev.of_node) {
+ dev_err(&pdev->dev, "must be instantiated using device tree\n");
+ return -ENODEV;
+ }
+
nvec = devm_kzalloc(&pdev->dev, sizeof(struct nvec_chip), GFP_KERNEL);
if (nvec == NULL) {
dev_err(&pdev->dev, "failed to reserve memory\n");
@@ -793,25 +817,9 @@ static int tegra_nvec_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, nvec);
nvec->dev = &pdev->dev;
- if (pdata) {
- nvec->gpio = pdata->gpio;
- nvec->i2c_addr = pdata->i2c_addr;
- } else if (nvec->dev->of_node) {
- nvec->gpio = of_get_named_gpio(nvec->dev->of_node,
- "request-gpios", 0);
- if (nvec->gpio < 0) {
- dev_err(&pdev->dev, "no gpio specified");
- return -ENODEV;
- }
- if (of_property_read_u32(nvec->dev->of_node,
- "slave-addr", &nvec->i2c_addr)) {
- dev_err(&pdev->dev, "no i2c address specified");
- return -ENODEV;
- }
- } else {
- dev_err(&pdev->dev, "no platform data\n");
- return -ENODEV;
- }
+ err = nvec_i2c_parse_dt_pdata(nvec);
+ if (err < 0)
+ return err;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
base = devm_ioremap_resource(&pdev->dev, res);
diff --git a/drivers/staging/nvec/nvec.h b/drivers/staging/nvec/nvec.h
index 2b1316d87470..e880518935fb 100644
--- a/drivers/staging/nvec/nvec.h
+++ b/drivers/staging/nvec/nvec.h
@@ -103,31 +103,6 @@ struct nvec_msg {
};
/**
- * struct nvec_subdev - A subdevice of nvec, such as nvec_kbd
- * @name: The name of the sub device
- * @platform_data: Platform data
- * @id: Identifier of the sub device
- */
-struct nvec_subdev {
- const char *name;
- void *platform_data;
- int id;
-};
-
-/**
- * struct nvec_platform_data - platform data for a tegra slave controller
- * @i2c_addr: number of i2c slave adapter the ec is connected to
- * @gpio: gpio number for the ec request line
- *
- * Platform data, to be used in board definitions. For an example, take a
- * look at the paz00 board in arch/arm/mach-tegra/board-paz00.c
- */
-struct nvec_platform_data {
- int i2c_addr;
- int gpio;
-};
-
-/**
* struct nvec_chip - A single connection to an NVIDIA Embedded controller
* @dev: The device
* @gpio: The same as for &struct nvec_platform_data
diff --git a/drivers/staging/nvec/nvec_kbd.c b/drivers/staging/nvec/nvec_kbd.c
index a0ec52a4114f..c17a1c3eb3ca 100644
--- a/drivers/staging/nvec/nvec_kbd.c
+++ b/drivers/staging/nvec/nvec_kbd.c
@@ -126,7 +126,7 @@ static int nvec_kbd_probe(struct platform_device *pdev)
for (i = 0; i < ARRAY_SIZE(extcode_tab_us102); ++i)
keycodes[j++] = extcode_tab_us102[i];
- idev = input_allocate_device();
+ idev = devm_input_allocate_device(&pdev->dev);
idev->name = "nvec keyboard";
idev->phys = "nvec";
idev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) | BIT_MASK(EV_LED);
@@ -142,7 +142,7 @@ static int nvec_kbd_probe(struct platform_device *pdev)
clear_bit(0, idev->keybit);
err = input_register_device(idev);
if (err)
- goto fail;
+ return err;
keys_dev.input = idev;
keys_dev.notifier.notifier_call = nvec_keys_notifier;
@@ -161,10 +161,6 @@ static int nvec_kbd_probe(struct platform_device *pdev)
nvec_write_async(nvec, clear_leds, sizeof(clear_leds));
return 0;
-
-fail:
- input_free_device(idev);
- return err;
}
static int nvec_kbd_remove(struct platform_device *pdev)
@@ -177,8 +173,6 @@ static int nvec_kbd_remove(struct platform_device *pdev)
nvec_write_async(nvec, disable_kbd, 2);
nvec_unregister_notifier(nvec, &keys_dev.notifier);
- input_unregister_device(keys_dev.input);
-
return 0;
}
diff --git a/drivers/staging/octeon-usb/Kconfig b/drivers/staging/octeon-usb/Kconfig
new file mode 100644
index 000000000000..018af6db08c8
--- /dev/null
+++ b/drivers/staging/octeon-usb/Kconfig
@@ -0,0 +1,10 @@
+config OCTEON_USB
+ tristate "Cavium Networks Octeon USB support"
+ depends on CPU_CAVIUM_OCTEON && USB
+ help
+ This driver supports USB host controller on some Cavium
+ Networks' products in the Octeon family.
+
+ To compile this driver as a module, choose M here. The module
+ will be called octeon-usb.
+
diff --git a/drivers/staging/octeon-usb/Makefile b/drivers/staging/octeon-usb/Makefile
new file mode 100644
index 000000000000..89df1ad8be30
--- /dev/null
+++ b/drivers/staging/octeon-usb/Makefile
@@ -0,0 +1,3 @@
+obj-${CONFIG_OCTEON_USB} := octeon-usb.o
+octeon-usb-y := octeon-hcd.o
+octeon-usb-y += cvmx-usb.o
diff --git a/drivers/staging/octeon-usb/TODO b/drivers/staging/octeon-usb/TODO
new file mode 100644
index 000000000000..cc58a7e88baf
--- /dev/null
+++ b/drivers/staging/octeon-usb/TODO
@@ -0,0 +1,11 @@
+This driver is functional and has been tested on EdgeRouter Lite with
+USB mass storage.
+
+TODO:
+ - kernel coding style
+ - checkpatch warnings
+ - dead code elimination
+ - device tree bindings
+ - possibly eliminate the extra "hardware abstraction layer"
+
+Contact: Aaro Koskinen <aaro.koskinen@iki.fi>
diff --git a/drivers/staging/octeon-usb/cvmx-usb.c b/drivers/staging/octeon-usb/cvmx-usb.c
new file mode 100644
index 000000000000..bf366495fdd1
--- /dev/null
+++ b/drivers/staging/octeon-usb/cvmx-usb.c
@@ -0,0 +1,3229 @@
+/***********************license start***************
+ * Copyright (c) 2003-2010 Cavium Networks (support@cavium.com). All rights
+ * reserved.
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+
+ * * Neither the name of Cavium Networks nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+
+ * This Software, including technical data, may be subject to U.S. export control
+ * laws, including the U.S. Export Administration Act and its associated
+ * regulations, and may be subject to export or import regulations in other
+ * countries.
+
+ * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+ * AND WITH ALL FAULTS AND CAVIUM NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR
+ * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
+ * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR
+ * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
+ * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
+ * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
+ * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
+ * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT OF USE OR
+ * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+ ***********************license end**************************************/
+
+
+/**
+ * @file
+ *
+ * "cvmx-usb.c" defines a set of low level USB functions to help
+ * developers create Octeon USB drivers for various operating
+ * systems. These functions provide a generic API to the Octeon
+ * USB blocks, hiding the internal hardware specific
+ * operations.
+ *
+ * <hr>$Revision: 32636 $<hr>
+ */
+#include <linux/delay.h>
+#include <asm/octeon/cvmx.h>
+#include <asm/octeon/octeon.h>
+#include <asm/octeon/cvmx-sysinfo.h>
+#include "cvmx-usbnx-defs.h"
+#include "cvmx-usbcx-defs.h"
+#include "cvmx-usb.h"
+#include <asm/octeon/cvmx-helper.h>
+#include <asm/octeon/cvmx-helper-board.h>
+
+#define CVMX_PREFETCH0(address) CVMX_PREFETCH(address, 0)
+#define CVMX_PREFETCH128(address) CVMX_PREFETCH(address, 128)
+// a normal prefetch
+#define CVMX_PREFETCH(address, offset) CVMX_PREFETCH_PREF0(address, offset)
+// normal prefetches that use the pref instruction
+#define CVMX_PREFETCH_PREFX(X, address, offset) asm volatile ("pref %[type], %[off](%[rbase])" : : [rbase] "d" (address), [off] "I" (offset), [type] "n" (X))
+#define CVMX_PREFETCH_PREF0(address, offset) CVMX_PREFETCH_PREFX(0, address, offset)
+#define CVMX_CLZ(result, input) asm ("clz %[rd],%[rs]" : [rd] "=d" (result) : [rs] "d" (input))
+
+#define cvmx_likely likely
+#define cvmx_wait_usec udelay
+#define cvmx_unlikely unlikely
+#define cvmx_le16_to_cpu le16_to_cpu
+
+#define MAX_RETRIES 3 /* Maximum number of times to retry failed transactions */
+#define MAX_PIPES 32 /* Maximum number of pipes that can be open at once */
+#define MAX_TRANSACTIONS 256 /* Maximum number of outstanding transactions across all pipes */
+#define MAX_CHANNELS 8 /* Maximum number of hardware channels supported by the USB block */
+#define MAX_USB_ADDRESS 127 /* The highest valid USB device address */
+#define MAX_USB_ENDPOINT 15 /* The highest valid USB endpoint number */
+#define MAX_USB_HUB_PORT 15 /* The highest valid port number on a hub */
+#define MAX_TRANSFER_BYTES ((1<<19)-1) /* The low level hardware can transfer a maximum of this number of bytes in each transfer. The field is 19 bits wide */
+#define MAX_TRANSFER_PACKETS ((1<<10)-1) /* The low level hardware can transfer a maximum of this number of packets in each transfer. The field is 10 bits wide */
+
+/* These defines disable the normal read and write csr. This is so I can add
+ extra debug stuff to the usb specific version and I won't use the normal
+ version by mistake */
+#define cvmx_read_csr use_cvmx_usb_read_csr64_instead_of_cvmx_read_csr
+#define cvmx_write_csr use_cvmx_usb_write_csr64_instead_of_cvmx_write_csr
+
+typedef enum {
+ __CVMX_USB_TRANSACTION_FLAGS_IN_USE = 1<<16,
+} cvmx_usb_transaction_flags_t;
+
+enum {
+ USB_CLOCK_TYPE_REF_12,
+ USB_CLOCK_TYPE_REF_24,
+ USB_CLOCK_TYPE_REF_48,
+ USB_CLOCK_TYPE_CRYSTAL_12,
+};
+
+/**
+ * Logical transactions may take numerous low level
+ * transactions, especially when splits are concerned. This
+ * enum represents all of the possible stages a transaction can
+ * be in. Note that split completes are always even. This is so
+ * the NAK handler can backup to the previous low level
+ * transaction with a simple clearing of bit 0.
+ */
+typedef enum {
+ CVMX_USB_STAGE_NON_CONTROL,
+ CVMX_USB_STAGE_NON_CONTROL_SPLIT_COMPLETE,
+ CVMX_USB_STAGE_SETUP,
+ CVMX_USB_STAGE_SETUP_SPLIT_COMPLETE,
+ CVMX_USB_STAGE_DATA,
+ CVMX_USB_STAGE_DATA_SPLIT_COMPLETE,
+ CVMX_USB_STAGE_STATUS,
+ CVMX_USB_STAGE_STATUS_SPLIT_COMPLETE,
+} cvmx_usb_stage_t;
+
+/**
+ * This structure describes each pending USB transaction
+ * regardless of type. These are linked together to form a list
+ * of pending requests for a pipe.
+ */
+typedef struct cvmx_usb_transaction {
+ struct cvmx_usb_transaction *prev; /**< Transaction before this one in the pipe */
+ struct cvmx_usb_transaction *next; /**< Transaction after this one in the pipe */
+ cvmx_usb_transfer_t type; /**< Type of transaction, duplicated of the pipe */
+ cvmx_usb_transaction_flags_t flags; /**< State flags for this transaction */
+ uint64_t buffer; /**< User's physical buffer address to read/write */
+ int buffer_length; /**< Size of the user's buffer in bytes */
+ uint64_t control_header; /**< For control transactions, physical address of the 8 byte standard header */
+ int iso_start_frame; /**< For ISO transactions, the starting frame number */
+ int iso_number_packets; /**< For ISO transactions, the number of packets in the request */
+ cvmx_usb_iso_packet_t *iso_packets; /**< For ISO transactions, the sub packets in the request */
+ int xfersize;
+ int pktcnt;
+ int retries;
+ int actual_bytes; /**< Actual bytes transfer for this transaction */
+ cvmx_usb_stage_t stage; /**< For control transactions, the current stage */
+ cvmx_usb_callback_func_t callback; /**< User's callback function when complete */
+ void *callback_data; /**< User's data */
+} cvmx_usb_transaction_t;
+
+/**
+ * A pipe represents a virtual connection between Octeon and some
+ * USB device. It contains a list of pending request to the device.
+ */
+typedef struct cvmx_usb_pipe {
+ struct cvmx_usb_pipe *prev; /**< Pipe before this one in the list */
+ struct cvmx_usb_pipe *next; /**< Pipe after this one in the list */
+ cvmx_usb_transaction_t *head; /**< The first pending transaction */
+ cvmx_usb_transaction_t *tail; /**< The last pending transaction */
+ uint64_t interval; /**< For periodic pipes, the interval between packets in frames */
+ uint64_t next_tx_frame; /**< The next frame this pipe is allowed to transmit on */
+ cvmx_usb_pipe_flags_t flags; /**< State flags for this pipe */
+ cvmx_usb_speed_t device_speed; /**< Speed of device connected to this pipe */
+ cvmx_usb_transfer_t transfer_type; /**< Type of transaction supported by this pipe */
+ cvmx_usb_direction_t transfer_dir; /**< IN or OUT. Ignored for Control */
+ int multi_count; /**< Max packet in a row for the device */
+ uint16_t max_packet; /**< The device's maximum packet size in bytes */
+ uint8_t device_addr; /**< USB device address at other end of pipe */
+ uint8_t endpoint_num; /**< USB endpoint number at other end of pipe */
+ uint8_t hub_device_addr; /**< Hub address this device is connected to */
+ uint8_t hub_port; /**< Hub port this device is connected to */
+ uint8_t pid_toggle; /**< This toggles between 0/1 on every packet send to track the data pid needed */
+ uint8_t channel; /**< Hardware DMA channel for this pipe */
+ int8_t split_sc_frame; /**< The low order bits of the frame number the split complete should be sent on */
+} cvmx_usb_pipe_t;
+
+typedef struct {
+ cvmx_usb_pipe_t *head; /**< Head of the list, or NULL if empty */
+ cvmx_usb_pipe_t *tail; /**< Tail if the list, or NULL if empty */
+} cvmx_usb_pipe_list_t;
+
+typedef struct {
+ struct {
+ int channel;
+ int size;
+ uint64_t address;
+ } entry[MAX_CHANNELS+1];
+ int head;
+ int tail;
+} cvmx_usb_tx_fifo_t;
+
+/**
+ * The state of the USB block is stored in this structure
+ */
+typedef struct {
+ int init_flags; /**< Flags passed to initialize */
+ int index; /**< Which USB block this is for */
+ int idle_hardware_channels; /**< Bit set for every idle hardware channel */
+ cvmx_usbcx_hprt_t usbcx_hprt; /**< Stored port status so we don't need to read a CSR to determine splits */
+ cvmx_usb_pipe_t *pipe_for_channel[MAX_CHANNELS]; /**< Map channels to pipes */
+ cvmx_usb_transaction_t *free_transaction_head; /**< List of free transactions head */
+ cvmx_usb_transaction_t *free_transaction_tail; /**< List of free transactions tail */
+ cvmx_usb_pipe_t pipe[MAX_PIPES]; /**< Storage for pipes */
+ cvmx_usb_transaction_t transaction[MAX_TRANSACTIONS]; /**< Storage for transactions */
+ cvmx_usb_callback_func_t callback[__CVMX_USB_CALLBACK_END]; /**< User global callbacks */
+ void *callback_data[__CVMX_USB_CALLBACK_END]; /**< User data for each callback */
+ int indent; /**< Used by debug output to indent functions */
+ cvmx_usb_port_status_t port_status; /**< Last port status used for change notification */
+ cvmx_usb_pipe_list_t free_pipes; /**< List of all pipes that are currently closed */
+ cvmx_usb_pipe_list_t idle_pipes; /**< List of open pipes that have no transactions */
+ cvmx_usb_pipe_list_t active_pipes[4]; /**< Active pipes indexed by transfer type */
+ uint64_t frame_number; /**< Increments every SOF interrupt for time keeping */
+ cvmx_usb_transaction_t *active_split; /**< Points to the current active split, or NULL */
+ cvmx_usb_tx_fifo_t periodic;
+ cvmx_usb_tx_fifo_t nonperiodic;
+} cvmx_usb_internal_state_t;
+
+/* This macro logs out whenever a function is called if debugging is on */
+#define CVMX_USB_LOG_CALLED() \
+ if (cvmx_unlikely(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_CALLS)) \
+ cvmx_dprintf("%*s%s: called\n", 2*usb->indent++, "", __FUNCTION__);
+
+/* This macro logs out each function parameter if debugging is on */
+#define CVMX_USB_LOG_PARAM(format, param) \
+ if (cvmx_unlikely(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_CALLS)) \
+ cvmx_dprintf("%*s%s: param %s = " format "\n", 2*usb->indent, "", __FUNCTION__, #param, param);
+
+/* This macro logs out when a function returns a value */
+#define CVMX_USB_RETURN(v) \
+ do { \
+ typeof(v) r = v; \
+ if (cvmx_unlikely(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_CALLS)) \
+ cvmx_dprintf("%*s%s: returned %s(%d)\n", 2*--usb->indent, "", __FUNCTION__, #v, r); \
+ return r; \
+ } while (0);
+
+/* This macro logs out when a function doesn't return a value */
+#define CVMX_USB_RETURN_NOTHING() \
+ do { \
+ if (cvmx_unlikely(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_CALLS)) \
+ cvmx_dprintf("%*s%s: returned\n", 2*--usb->indent, "", __FUNCTION__); \
+ return; \
+ } while (0);
+
+/* This macro spins on a field waiting for it to reach a value */
+#define CVMX_WAIT_FOR_FIELD32(address, type, field, op, value, timeout_usec)\
+ ({int result; \
+ do { \
+ uint64_t done = cvmx_get_cycle() + (uint64_t)timeout_usec * \
+ octeon_get_clock_rate() / 1000000; \
+ type c; \
+ while (1) \
+ { \
+ c.u32 = __cvmx_usb_read_csr32(usb, address); \
+ if (c.s.field op (value)) { \
+ result = 0; \
+ break; \
+ } else if (cvmx_get_cycle() > done) { \
+ result = -1; \
+ break; \
+ } else \
+ cvmx_wait(100); \
+ } \
+ } while (0); \
+ result;})
+
+/* This macro logically sets a single field in a CSR. It does the sequence
+ read, modify, and write */
+#define USB_SET_FIELD32(address, type, field, value)\
+ do { \
+ type c; \
+ c.u32 = __cvmx_usb_read_csr32(usb, address);\
+ c.s.field = value; \
+ __cvmx_usb_write_csr32(usb, address, c.u32);\
+ } while (0)
+
+/* Returns the IO address to push/pop stuff data from the FIFOs */
+#define USB_FIFO_ADDRESS(channel, usb_index) (CVMX_USBCX_GOTGCTL(usb_index) + ((channel)+1)*0x1000)
+
+static int octeon_usb_get_clock_type(void)
+{
+ switch (cvmx_sysinfo_get()->board_type) {
+ case CVMX_BOARD_TYPE_BBGW_REF:
+ case CVMX_BOARD_TYPE_LANAI2_A:
+ case CVMX_BOARD_TYPE_LANAI2_U:
+ case CVMX_BOARD_TYPE_LANAI2_G:
+ return USB_CLOCK_TYPE_CRYSTAL_12;
+ }
+
+ /* FIXME: This should use CVMX_BOARD_TYPE_UBNT_E100 */
+ if (OCTEON_IS_MODEL(OCTEON_CN50XX) &&
+ cvmx_sysinfo_get()->board_type == 20002)
+ return USB_CLOCK_TYPE_CRYSTAL_12;
+
+ return USB_CLOCK_TYPE_REF_48;
+}
+
+/**
+ * @INTERNAL
+ * Read a USB 32bit CSR. It performs the necessary address swizzle
+ * for 32bit CSRs and logs the value in a readable format if
+ * debugging is on.
+ *
+ * @param usb USB block this access is for
+ * @param address 64bit address to read
+ *
+ * @return Result of the read
+ */
+static inline uint32_t __cvmx_usb_read_csr32(cvmx_usb_internal_state_t *usb,
+ uint64_t address)
+{
+ uint32_t result = cvmx_read64_uint32(address ^ 4);
+ return result;
+}
+
+
+/**
+ * @INTERNAL
+ * Write a USB 32bit CSR. It performs the necessary address
+ * swizzle for 32bit CSRs and logs the value in a readable format
+ * if debugging is on.
+ *
+ * @param usb USB block this access is for
+ * @param address 64bit address to write
+ * @param value Value to write
+ */
+static inline void __cvmx_usb_write_csr32(cvmx_usb_internal_state_t *usb,
+ uint64_t address, uint32_t value)
+{
+ cvmx_write64_uint32(address ^ 4, value);
+ cvmx_read64_uint64(CVMX_USBNX_DMA0_INB_CHN0(usb->index));
+}
+
+
+/**
+ * @INTERNAL
+ * Read a USB 64bit CSR. It logs the value in a readable format if
+ * debugging is on.
+ *
+ * @param usb USB block this access is for
+ * @param address 64bit address to read
+ *
+ * @return Result of the read
+ */
+static inline uint64_t __cvmx_usb_read_csr64(cvmx_usb_internal_state_t *usb,
+ uint64_t address)
+{
+ uint64_t result = cvmx_read64_uint64(address);
+ return result;
+}
+
+
+/**
+ * @INTERNAL
+ * Write a USB 64bit CSR. It logs the value in a readable format
+ * if debugging is on.
+ *
+ * @param usb USB block this access is for
+ * @param address 64bit address to write
+ * @param value Value to write
+ */
+static inline void __cvmx_usb_write_csr64(cvmx_usb_internal_state_t *usb,
+ uint64_t address, uint64_t value)
+{
+ cvmx_write64_uint64(address, value);
+}
+
+
+/**
+ * @INTERNAL
+ * Utility function to convert complete codes into strings
+ *
+ * @param complete_code
+ * Code to convert
+ *
+ * @return Human readable string
+ */
+static const char *__cvmx_usb_complete_to_string(cvmx_usb_complete_t complete_code)
+{
+ switch (complete_code)
+ {
+ case CVMX_USB_COMPLETE_SUCCESS: return "SUCCESS";
+ case CVMX_USB_COMPLETE_SHORT: return "SHORT";
+ case CVMX_USB_COMPLETE_CANCEL: return "CANCEL";
+ case CVMX_USB_COMPLETE_ERROR: return "ERROR";
+ case CVMX_USB_COMPLETE_STALL: return "STALL";
+ case CVMX_USB_COMPLETE_XACTERR: return "XACTERR";
+ case CVMX_USB_COMPLETE_DATATGLERR: return "DATATGLERR";
+ case CVMX_USB_COMPLETE_BABBLEERR: return "BABBLEERR";
+ case CVMX_USB_COMPLETE_FRAMEERR: return "FRAMEERR";
+ }
+ return "Update __cvmx_usb_complete_to_string";
+}
+
+
+/**
+ * @INTERNAL
+ * Return non zero if this pipe connects to a non HIGH speed
+ * device through a high speed hub.
+ *
+ * @param usb USB block this access is for
+ * @param pipe Pipe to check
+ *
+ * @return Non zero if we need to do split transactions
+ */
+static inline int __cvmx_usb_pipe_needs_split(cvmx_usb_internal_state_t *usb, cvmx_usb_pipe_t *pipe)
+{
+ return ((pipe->device_speed != CVMX_USB_SPEED_HIGH) && (usb->usbcx_hprt.s.prtspd == CVMX_USB_SPEED_HIGH));
+}
+
+
+/**
+ * @INTERNAL
+ * Trivial utility function to return the correct PID for a pipe
+ *
+ * @param pipe pipe to check
+ *
+ * @return PID for pipe
+ */
+static inline int __cvmx_usb_get_data_pid(cvmx_usb_pipe_t *pipe)
+{
+ if (pipe->pid_toggle)
+ return 2; /* Data1 */
+ else
+ return 0; /* Data0 */
+}
+
+
+/**
+ * Return the number of USB ports supported by this Octeon
+ * chip. If the chip doesn't support USB, or is not supported
+ * by this API, a zero will be returned. Most Octeon chips
+ * support one usb port, but some support two ports.
+ * cvmx_usb_initialize() must be called on independent
+ * cvmx_usb_state_t structures.
+ *
+ * @return Number of port, zero if usb isn't supported
+ */
+int cvmx_usb_get_num_ports(void)
+{
+ int arch_ports = 0;
+
+ if (OCTEON_IS_MODEL(OCTEON_CN56XX))
+ arch_ports = 1;
+ else if (OCTEON_IS_MODEL(OCTEON_CN52XX))
+ arch_ports = 2;
+ else if (OCTEON_IS_MODEL(OCTEON_CN50XX))
+ arch_ports = 1;
+ else if (OCTEON_IS_MODEL(OCTEON_CN31XX))
+ arch_ports = 1;
+ else if (OCTEON_IS_MODEL(OCTEON_CN30XX))
+ arch_ports = 1;
+ else
+ arch_ports = 0;
+
+ return arch_ports;
+}
+
+
+/**
+ * @INTERNAL
+ * Allocate a usb transaction for use
+ *
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ *
+ * @return Transaction or NULL
+ */
+static inline cvmx_usb_transaction_t *__cvmx_usb_alloc_transaction(cvmx_usb_internal_state_t *usb)
+{
+ cvmx_usb_transaction_t *t;
+ t = usb->free_transaction_head;
+ if (t) {
+ usb->free_transaction_head = t->next;
+ if (!usb->free_transaction_head)
+ usb->free_transaction_tail = NULL;
+ }
+ else if (cvmx_unlikely(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_INFO))
+ cvmx_dprintf("%s: Failed to allocate a transaction\n", __FUNCTION__);
+ if (t) {
+ memset(t, 0, sizeof(*t));
+ t->flags = __CVMX_USB_TRANSACTION_FLAGS_IN_USE;
+ }
+ return t;
+}
+
+
+/**
+ * @INTERNAL
+ * Free a usb transaction
+ *
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ * @param transaction
+ * Transaction to free
+ */
+static inline void __cvmx_usb_free_transaction(cvmx_usb_internal_state_t *usb,
+ cvmx_usb_transaction_t *transaction)
+{
+ transaction->flags = 0;
+ transaction->prev = NULL;
+ transaction->next = NULL;
+ if (usb->free_transaction_tail)
+ usb->free_transaction_tail->next = transaction;
+ else
+ usb->free_transaction_head = transaction;
+ usb->free_transaction_tail = transaction;
+}
+
+
+/**
+ * @INTERNAL
+ * Add a pipe to the tail of a list
+ * @param list List to add pipe to
+ * @param pipe Pipe to add
+ */
+static inline void __cvmx_usb_append_pipe(cvmx_usb_pipe_list_t *list, cvmx_usb_pipe_t *pipe)
+{
+ pipe->next = NULL;
+ pipe->prev = list->tail;
+ if (list->tail)
+ list->tail->next = pipe;
+ else
+ list->head = pipe;
+ list->tail = pipe;
+}
+
+
+/**
+ * @INTERNAL
+ * Remove a pipe from a list
+ * @param list List to remove pipe from
+ * @param pipe Pipe to remove
+ */
+static inline void __cvmx_usb_remove_pipe(cvmx_usb_pipe_list_t *list, cvmx_usb_pipe_t *pipe)
+{
+ if (list->head == pipe) {
+ list->head = pipe->next;
+ pipe->next = NULL;
+ if (list->head)
+ list->head->prev = NULL;
+ else
+ list->tail = NULL;
+ }
+ else if (list->tail == pipe) {
+ list->tail = pipe->prev;
+ list->tail->next = NULL;
+ pipe->prev = NULL;
+ }
+ else {
+ pipe->prev->next = pipe->next;
+ pipe->next->prev = pipe->prev;
+ pipe->prev = NULL;
+ pipe->next = NULL;
+ }
+}
+
+
+/**
+ * Initialize a USB port for use. This must be called before any
+ * other access to the Octeon USB port is made. The port starts
+ * off in the disabled state.
+ *
+ * @param state Pointer to an empty cvmx_usb_state_t structure
+ * that will be populated by the initialize call.
+ * This structure is then passed to all other USB
+ * functions.
+ * @param usb_port_number
+ * Which Octeon USB port to initialize.
+ * @param flags Flags to control hardware initialization. See
+ * cvmx_usb_initialize_flags_t for the flag
+ * definitions. Some flags are mandatory.
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+cvmx_usb_status_t cvmx_usb_initialize(cvmx_usb_state_t *state,
+ int usb_port_number,
+ cvmx_usb_initialize_flags_t flags)
+{
+ cvmx_usbnx_clk_ctl_t usbn_clk_ctl;
+ cvmx_usbnx_usbp_ctl_status_t usbn_usbp_ctl_status;
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+
+ usb->init_flags = flags;
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+ CVMX_USB_LOG_PARAM("%d", usb_port_number);
+ CVMX_USB_LOG_PARAM("0x%x", flags);
+
+ /* Make sure that state is large enough to store the internal state */
+ if (sizeof(*state) < sizeof(*usb))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ /* At first allow 0-1 for the usb port number */
+ if ((usb_port_number < 0) || (usb_port_number > 1))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ /* For all chips except 52XX there is only one port */
+ if (!OCTEON_IS_MODEL(OCTEON_CN52XX) && (usb_port_number > 0))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ /* Try to determine clock type automatically */
+ if ((flags & (CVMX_USB_INITIALIZE_FLAGS_CLOCK_XO_XI |
+ CVMX_USB_INITIALIZE_FLAGS_CLOCK_XO_GND)) == 0) {
+ if (octeon_usb_get_clock_type() == USB_CLOCK_TYPE_CRYSTAL_12)
+ flags |= CVMX_USB_INITIALIZE_FLAGS_CLOCK_XO_XI; /* Only 12 MHZ crystals are supported */
+ else
+ flags |= CVMX_USB_INITIALIZE_FLAGS_CLOCK_XO_GND;
+ }
+
+ if (flags & CVMX_USB_INITIALIZE_FLAGS_CLOCK_XO_GND) {
+ /* Check for auto ref clock frequency */
+ if (!(flags & CVMX_USB_INITIALIZE_FLAGS_CLOCK_MHZ_MASK))
+ switch (octeon_usb_get_clock_type()) {
+ case USB_CLOCK_TYPE_REF_12:
+ flags |= CVMX_USB_INITIALIZE_FLAGS_CLOCK_12MHZ;
+ break;
+ case USB_CLOCK_TYPE_REF_24:
+ flags |= CVMX_USB_INITIALIZE_FLAGS_CLOCK_24MHZ;
+ break;
+ case USB_CLOCK_TYPE_REF_48:
+ flags |= CVMX_USB_INITIALIZE_FLAGS_CLOCK_48MHZ;
+ break;
+ default:
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ break;
+ }
+ }
+
+ memset(usb, 0, sizeof(usb));
+ usb->init_flags = flags;
+
+ /* Initialize the USB state structure */
+ {
+ int i;
+ usb->index = usb_port_number;
+
+ /* Initialize the transaction double linked list */
+ usb->free_transaction_head = NULL;
+ usb->free_transaction_tail = NULL;
+ for (i=0; i<MAX_TRANSACTIONS; i++)
+ __cvmx_usb_free_transaction(usb, usb->transaction + i);
+ for (i=0; i<MAX_PIPES; i++)
+ __cvmx_usb_append_pipe(&usb->free_pipes, usb->pipe + i);
+ }
+
+ /* Power On Reset and PHY Initialization */
+
+ /* 1. Wait for DCOK to assert (nothing to do) */
+ /* 2a. Write USBN0/1_CLK_CTL[POR] = 1 and
+ USBN0/1_CLK_CTL[HRST,PRST,HCLK_RST] = 0 */
+ usbn_clk_ctl.u64 = __cvmx_usb_read_csr64(usb, CVMX_USBNX_CLK_CTL(usb->index));
+ usbn_clk_ctl.s.por = 1;
+ usbn_clk_ctl.s.hrst = 0;
+ usbn_clk_ctl.s.prst = 0;
+ usbn_clk_ctl.s.hclk_rst = 0;
+ usbn_clk_ctl.s.enable = 0;
+ /* 2b. Select the USB reference clock/crystal parameters by writing
+ appropriate values to USBN0/1_CLK_CTL[P_C_SEL, P_RTYPE, P_COM_ON] */
+ if (usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_CLOCK_XO_GND) {
+ /* The USB port uses 12/24/48MHz 2.5V board clock
+ source at USB_XO. USB_XI should be tied to GND.
+ Most Octeon evaluation boards require this setting */
+ if (OCTEON_IS_MODEL(OCTEON_CN3XXX)) {
+ usbn_clk_ctl.cn31xx.p_rclk = 1; /* From CN31XX,CN30XX manual */
+ usbn_clk_ctl.cn31xx.p_xenbn = 0;
+ }
+ else if (OCTEON_IS_MODEL(OCTEON_CN56XX) || OCTEON_IS_MODEL(OCTEON_CN50XX))
+ usbn_clk_ctl.cn56xx.p_rtype = 2; /* From CN56XX,CN50XX manual */
+ else
+ usbn_clk_ctl.cn52xx.p_rtype = 1; /* From CN52XX manual */
+
+ switch (flags & CVMX_USB_INITIALIZE_FLAGS_CLOCK_MHZ_MASK) {
+ case CVMX_USB_INITIALIZE_FLAGS_CLOCK_12MHZ:
+ usbn_clk_ctl.s.p_c_sel = 0;
+ break;
+ case CVMX_USB_INITIALIZE_FLAGS_CLOCK_24MHZ:
+ usbn_clk_ctl.s.p_c_sel = 1;
+ break;
+ case CVMX_USB_INITIALIZE_FLAGS_CLOCK_48MHZ:
+ usbn_clk_ctl.s.p_c_sel = 2;
+ break;
+ }
+ }
+ else {
+ /* The USB port uses a 12MHz crystal as clock source
+ at USB_XO and USB_XI */
+ if (OCTEON_IS_MODEL(OCTEON_CN3XXX)) {
+ usbn_clk_ctl.cn31xx.p_rclk = 1; /* From CN31XX,CN30XX manual */
+ usbn_clk_ctl.cn31xx.p_xenbn = 1;
+ }
+ else if (OCTEON_IS_MODEL(OCTEON_CN56XX) || OCTEON_IS_MODEL(OCTEON_CN50XX))
+ usbn_clk_ctl.cn56xx.p_rtype = 0; /* From CN56XX,CN50XX manual */
+ else
+ usbn_clk_ctl.cn52xx.p_rtype = 0; /* From CN52XX manual */
+
+ usbn_clk_ctl.s.p_c_sel = 0;
+ }
+ /* 2c. Select the HCLK via writing USBN0/1_CLK_CTL[DIVIDE, DIVIDE2] and
+ setting USBN0/1_CLK_CTL[ENABLE] = 1. Divide the core clock down such
+ that USB is as close as possible to 125Mhz */
+ {
+ int divisor = (octeon_get_clock_rate()+125000000-1)/125000000;
+ if (divisor < 4) /* Lower than 4 doesn't seem to work properly */
+ divisor = 4;
+ usbn_clk_ctl.s.divide = divisor;
+ usbn_clk_ctl.s.divide2 = 0;
+ }
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_CLK_CTL(usb->index),
+ usbn_clk_ctl.u64);
+ /* 2d. Write USBN0/1_CLK_CTL[HCLK_RST] = 1 */
+ usbn_clk_ctl.s.hclk_rst = 1;
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_CLK_CTL(usb->index),
+ usbn_clk_ctl.u64);
+ /* 2e. Wait 64 core-clock cycles for HCLK to stabilize */
+ cvmx_wait(64);
+ /* 3. Program the power-on reset field in the USBN clock-control register:
+ USBN_CLK_CTL[POR] = 0 */
+ usbn_clk_ctl.s.por = 0;
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_CLK_CTL(usb->index),
+ usbn_clk_ctl.u64);
+ /* 4. Wait 1 ms for PHY clock to start */
+ cvmx_wait_usec(1000);
+ /* 5. Program the Reset input from automatic test equipment field in the
+ USBP control and status register: USBN_USBP_CTL_STATUS[ATE_RESET] = 1 */
+ usbn_usbp_ctl_status.u64 = __cvmx_usb_read_csr64(usb, CVMX_USBNX_USBP_CTL_STATUS(usb->index));
+ usbn_usbp_ctl_status.s.ate_reset = 1;
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_USBP_CTL_STATUS(usb->index),
+ usbn_usbp_ctl_status.u64);
+ /* 6. Wait 10 cycles */
+ cvmx_wait(10);
+ /* 7. Clear ATE_RESET field in the USBN clock-control register:
+ USBN_USBP_CTL_STATUS[ATE_RESET] = 0 */
+ usbn_usbp_ctl_status.s.ate_reset = 0;
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_USBP_CTL_STATUS(usb->index),
+ usbn_usbp_ctl_status.u64);
+ /* 8. Program the PHY reset field in the USBN clock-control register:
+ USBN_CLK_CTL[PRST] = 1 */
+ usbn_clk_ctl.s.prst = 1;
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_CLK_CTL(usb->index),
+ usbn_clk_ctl.u64);
+ /* 9. Program the USBP control and status register to select host or
+ device mode. USBN_USBP_CTL_STATUS[HST_MODE] = 0 for host, = 1 for
+ device */
+ usbn_usbp_ctl_status.s.hst_mode = 0;
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_USBP_CTL_STATUS(usb->index),
+ usbn_usbp_ctl_status.u64);
+ /* 10. Wait 1 us */
+ cvmx_wait_usec(1);
+ /* 11. Program the hreset_n field in the USBN clock-control register:
+ USBN_CLK_CTL[HRST] = 1 */
+ usbn_clk_ctl.s.hrst = 1;
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_CLK_CTL(usb->index),
+ usbn_clk_ctl.u64);
+ /* 12. Proceed to USB core initialization */
+ usbn_clk_ctl.s.enable = 1;
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_CLK_CTL(usb->index),
+ usbn_clk_ctl.u64);
+ cvmx_wait_usec(1);
+
+ /* USB Core Initialization */
+
+ /* 1. Read USBC_GHWCFG1, USBC_GHWCFG2, USBC_GHWCFG3, USBC_GHWCFG4 to
+ determine USB core configuration parameters. */
+ /* Nothing needed */
+ /* 2. Program the following fields in the global AHB configuration
+ register (USBC_GAHBCFG)
+ DMA mode, USBC_GAHBCFG[DMAEn]: 1 = DMA mode, 0 = slave mode
+ Burst length, USBC_GAHBCFG[HBSTLEN] = 0
+ Nonperiodic TxFIFO empty level (slave mode only),
+ USBC_GAHBCFG[NPTXFEMPLVL]
+ Periodic TxFIFO empty level (slave mode only),
+ USBC_GAHBCFG[PTXFEMPLVL]
+ Global interrupt mask, USBC_GAHBCFG[GLBLINTRMSK] = 1 */
+ {
+ cvmx_usbcx_gahbcfg_t usbcx_gahbcfg;
+ /* Due to an errata, CN31XX doesn't support DMA */
+ if (OCTEON_IS_MODEL(OCTEON_CN31XX))
+ usb->init_flags |= CVMX_USB_INITIALIZE_FLAGS_NO_DMA;
+ usbcx_gahbcfg.u32 = 0;
+ usbcx_gahbcfg.s.dmaen = !(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_NO_DMA);
+ if (usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_NO_DMA)
+ usb->idle_hardware_channels = 0x1; /* Only use one channel with non DMA */
+ else if (OCTEON_IS_MODEL(OCTEON_CN5XXX))
+ usb->idle_hardware_channels = 0xf7; /* CN5XXX have an errata with channel 3 */
+ else
+ usb->idle_hardware_channels = 0xff;
+ usbcx_gahbcfg.s.hbstlen = 0;
+ usbcx_gahbcfg.s.nptxfemplvl = 1;
+ usbcx_gahbcfg.s.ptxfemplvl = 1;
+ usbcx_gahbcfg.s.glblintrmsk = 1;
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_GAHBCFG(usb->index),
+ usbcx_gahbcfg.u32);
+ }
+ /* 3. Program the following fields in USBC_GUSBCFG register.
+ HS/FS timeout calibration, USBC_GUSBCFG[TOUTCAL] = 0
+ ULPI DDR select, USBC_GUSBCFG[DDRSEL] = 0
+ USB turnaround time, USBC_GUSBCFG[USBTRDTIM] = 0x5
+ PHY low-power clock select, USBC_GUSBCFG[PHYLPWRCLKSEL] = 0 */
+ {
+ cvmx_usbcx_gusbcfg_t usbcx_gusbcfg;
+ usbcx_gusbcfg.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_GUSBCFG(usb->index));
+ usbcx_gusbcfg.s.toutcal = 0;
+ usbcx_gusbcfg.s.ddrsel = 0;
+ usbcx_gusbcfg.s.usbtrdtim = 0x5;
+ usbcx_gusbcfg.s.phylpwrclksel = 0;
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_GUSBCFG(usb->index),
+ usbcx_gusbcfg.u32);
+ }
+ /* 4. The software must unmask the following bits in the USBC_GINTMSK
+ register.
+ OTG interrupt mask, USBC_GINTMSK[OTGINTMSK] = 1
+ Mode mismatch interrupt mask, USBC_GINTMSK[MODEMISMSK] = 1 */
+ {
+ cvmx_usbcx_gintmsk_t usbcx_gintmsk;
+ int channel;
+
+ usbcx_gintmsk.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_GINTMSK(usb->index));
+ usbcx_gintmsk.s.otgintmsk = 1;
+ usbcx_gintmsk.s.modemismsk = 1;
+ usbcx_gintmsk.s.hchintmsk = 1;
+ usbcx_gintmsk.s.sofmsk = 0;
+ /* We need RX FIFO interrupts if we don't have DMA */
+ if (usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_NO_DMA)
+ usbcx_gintmsk.s.rxflvlmsk = 1;
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_GINTMSK(usb->index),
+ usbcx_gintmsk.u32);
+
+ /* Disable all channel interrupts. We'll enable them per channel later */
+ for (channel=0; channel<8; channel++)
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HCINTMSKX(channel, usb->index), 0);
+ }
+
+ {
+ /* Host Port Initialization */
+ if (cvmx_unlikely(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_INFO))
+ cvmx_dprintf("%s: USB%d is in host mode\n", __FUNCTION__, usb->index);
+
+ /* 1. Program the host-port interrupt-mask field to unmask,
+ USBC_GINTMSK[PRTINT] = 1 */
+ USB_SET_FIELD32(CVMX_USBCX_GINTMSK(usb->index), cvmx_usbcx_gintmsk_t,
+ prtintmsk, 1);
+ USB_SET_FIELD32(CVMX_USBCX_GINTMSK(usb->index), cvmx_usbcx_gintmsk_t,
+ disconnintmsk, 1);
+ /* 2. Program the USBC_HCFG register to select full-speed host or
+ high-speed host. */
+ {
+ cvmx_usbcx_hcfg_t usbcx_hcfg;
+ usbcx_hcfg.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HCFG(usb->index));
+ usbcx_hcfg.s.fslssupp = 0;
+ usbcx_hcfg.s.fslspclksel = 0;
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HCFG(usb->index), usbcx_hcfg.u32);
+ }
+ /* 3. Program the port power bit to drive VBUS on the USB,
+ USBC_HPRT[PRTPWR] = 1 */
+ USB_SET_FIELD32(CVMX_USBCX_HPRT(usb->index), cvmx_usbcx_hprt_t, prtpwr, 1);
+
+ /* Steps 4-15 from the manual are done later in the port enable */
+ }
+
+ CVMX_USB_RETURN(CVMX_USB_SUCCESS);
+}
+
+
+/**
+ * Shutdown a USB port after a call to cvmx_usb_initialize().
+ * The port should be disabled with all pipes closed when this
+ * function is called.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+cvmx_usb_status_t cvmx_usb_shutdown(cvmx_usb_state_t *state)
+{
+ cvmx_usbnx_clk_ctl_t usbn_clk_ctl;
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+
+ /* Make sure all pipes are closed */
+ if (usb->idle_pipes.head ||
+ usb->active_pipes[CVMX_USB_TRANSFER_ISOCHRONOUS].head ||
+ usb->active_pipes[CVMX_USB_TRANSFER_INTERRUPT].head ||
+ usb->active_pipes[CVMX_USB_TRANSFER_CONTROL].head ||
+ usb->active_pipes[CVMX_USB_TRANSFER_BULK].head)
+ CVMX_USB_RETURN(CVMX_USB_BUSY);
+
+ /* Disable the clocks and put them in power on reset */
+ usbn_clk_ctl.u64 = __cvmx_usb_read_csr64(usb, CVMX_USBNX_CLK_CTL(usb->index));
+ usbn_clk_ctl.s.enable = 1;
+ usbn_clk_ctl.s.por = 1;
+ usbn_clk_ctl.s.hclk_rst = 1;
+ usbn_clk_ctl.s.prst = 0;
+ usbn_clk_ctl.s.hrst = 0;
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_CLK_CTL(usb->index),
+ usbn_clk_ctl.u64);
+ CVMX_USB_RETURN(CVMX_USB_SUCCESS);
+}
+
+
+/**
+ * Enable a USB port. After this call succeeds, the USB port is
+ * online and servicing requests.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+cvmx_usb_status_t cvmx_usb_enable(cvmx_usb_state_t *state)
+{
+ cvmx_usbcx_ghwcfg3_t usbcx_ghwcfg3;
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+
+ usb->usbcx_hprt.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HPRT(usb->index));
+
+ /* If the port is already enabled the just return. We don't need to do
+ anything */
+ if (usb->usbcx_hprt.s.prtena)
+ CVMX_USB_RETURN(CVMX_USB_SUCCESS);
+
+ /* If there is nothing plugged into the port then fail immediately */
+ if (!usb->usbcx_hprt.s.prtconnsts) {
+ if (cvmx_unlikely(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_INFO))
+ cvmx_dprintf("%s: USB%d Nothing plugged into the port\n", __FUNCTION__, usb->index);
+ CVMX_USB_RETURN(CVMX_USB_TIMEOUT);
+ }
+
+ /* Program the port reset bit to start the reset process */
+ USB_SET_FIELD32(CVMX_USBCX_HPRT(usb->index), cvmx_usbcx_hprt_t, prtrst, 1);
+
+ /* Wait at least 50ms (high speed), or 10ms (full speed) for the reset
+ process to complete. */
+ cvmx_wait_usec(50000);
+
+ /* Program the port reset bit to 0, USBC_HPRT[PRTRST] = 0 */
+ USB_SET_FIELD32(CVMX_USBCX_HPRT(usb->index), cvmx_usbcx_hprt_t, prtrst, 0);
+
+ /* Wait for the USBC_HPRT[PRTENA]. */
+ if (CVMX_WAIT_FOR_FIELD32(CVMX_USBCX_HPRT(usb->index), cvmx_usbcx_hprt_t,
+ prtena, ==, 1, 100000)) {
+ if (cvmx_unlikely(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_INFO))
+ cvmx_dprintf("%s: Timeout waiting for the port to finish reset\n",
+ __FUNCTION__);
+ CVMX_USB_RETURN(CVMX_USB_TIMEOUT);
+ }
+
+ /* Read the port speed field to get the enumerated speed, USBC_HPRT[PRTSPD]. */
+ usb->usbcx_hprt.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HPRT(usb->index));
+ if (cvmx_unlikely(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_INFO))
+ cvmx_dprintf("%s: USB%d is in %s speed mode\n", __FUNCTION__, usb->index,
+ (usb->usbcx_hprt.s.prtspd == CVMX_USB_SPEED_HIGH) ? "high" :
+ (usb->usbcx_hprt.s.prtspd == CVMX_USB_SPEED_FULL) ? "full" :
+ "low");
+
+ usbcx_ghwcfg3.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_GHWCFG3(usb->index));
+
+ /* 13. Program the USBC_GRXFSIZ register to select the size of the receive
+ FIFO (25%). */
+ USB_SET_FIELD32(CVMX_USBCX_GRXFSIZ(usb->index), cvmx_usbcx_grxfsiz_t,
+ rxfdep, usbcx_ghwcfg3.s.dfifodepth / 4);
+ /* 14. Program the USBC_GNPTXFSIZ register to select the size and the
+ start address of the non- periodic transmit FIFO for nonperiodic
+ transactions (50%). */
+ {
+ cvmx_usbcx_gnptxfsiz_t siz;
+ siz.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_GNPTXFSIZ(usb->index));
+ siz.s.nptxfdep = usbcx_ghwcfg3.s.dfifodepth / 2;
+ siz.s.nptxfstaddr = usbcx_ghwcfg3.s.dfifodepth / 4;
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_GNPTXFSIZ(usb->index), siz.u32);
+ }
+ /* 15. Program the USBC_HPTXFSIZ register to select the size and start
+ address of the periodic transmit FIFO for periodic transactions (25%). */
+ {
+ cvmx_usbcx_hptxfsiz_t siz;
+ siz.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HPTXFSIZ(usb->index));
+ siz.s.ptxfsize = usbcx_ghwcfg3.s.dfifodepth / 4;
+ siz.s.ptxfstaddr = 3 * usbcx_ghwcfg3.s.dfifodepth / 4;
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HPTXFSIZ(usb->index), siz.u32);
+ }
+ /* Flush all FIFOs */
+ USB_SET_FIELD32(CVMX_USBCX_GRSTCTL(usb->index), cvmx_usbcx_grstctl_t, txfnum, 0x10);
+ USB_SET_FIELD32(CVMX_USBCX_GRSTCTL(usb->index), cvmx_usbcx_grstctl_t, txfflsh, 1);
+ CVMX_WAIT_FOR_FIELD32(CVMX_USBCX_GRSTCTL(usb->index), cvmx_usbcx_grstctl_t,
+ txfflsh, ==, 0, 100);
+ USB_SET_FIELD32(CVMX_USBCX_GRSTCTL(usb->index), cvmx_usbcx_grstctl_t, rxfflsh, 1);
+ CVMX_WAIT_FOR_FIELD32(CVMX_USBCX_GRSTCTL(usb->index), cvmx_usbcx_grstctl_t,
+ rxfflsh, ==, 0, 100);
+
+ CVMX_USB_RETURN(CVMX_USB_SUCCESS);
+}
+
+
+/**
+ * Disable a USB port. After this call the USB port will not
+ * generate data transfers and will not generate events.
+ * Transactions in process will fail and call their
+ * associated callbacks.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+cvmx_usb_status_t cvmx_usb_disable(cvmx_usb_state_t *state)
+{
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+
+ /* Disable the port */
+ USB_SET_FIELD32(CVMX_USBCX_HPRT(usb->index), cvmx_usbcx_hprt_t, prtena, 1);
+ CVMX_USB_RETURN(CVMX_USB_SUCCESS);
+}
+
+
+/**
+ * Get the current state of the USB port. Use this call to
+ * determine if the usb port has anything connected, is enabled,
+ * or has some sort of error condition. The return value of this
+ * call has "changed" bits to signal of the value of some fields
+ * have changed between calls. These "changed" fields are based
+ * on the last call to cvmx_usb_set_status(). In order to clear
+ * them, you must update the status through cvmx_usb_set_status().
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ *
+ * @return Port status information
+ */
+cvmx_usb_port_status_t cvmx_usb_get_status(cvmx_usb_state_t *state)
+{
+ cvmx_usbcx_hprt_t usbc_hprt;
+ cvmx_usb_port_status_t result;
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+
+ memset(&result, 0, sizeof(result));
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+
+ usbc_hprt.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HPRT(usb->index));
+ result.port_enabled = usbc_hprt.s.prtena;
+ result.port_over_current = usbc_hprt.s.prtovrcurract;
+ result.port_powered = usbc_hprt.s.prtpwr;
+ result.port_speed = usbc_hprt.s.prtspd;
+ result.connected = usbc_hprt.s.prtconnsts;
+ result.connect_change = (result.connected != usb->port_status.connected);
+
+ if (cvmx_unlikely(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_CALLS))
+ cvmx_dprintf("%*s%s: returned port enabled=%d, over_current=%d, powered=%d, speed=%d, connected=%d, connect_change=%d\n",
+ 2*(--usb->indent), "", __FUNCTION__,
+ result.port_enabled,
+ result.port_over_current,
+ result.port_powered,
+ result.port_speed,
+ result.connected,
+ result.connect_change);
+ return result;
+}
+
+
+/**
+ * Set the current state of the USB port. The status is used as
+ * a reference for the "changed" bits returned by
+ * cvmx_usb_get_status(). Other than serving as a reference, the
+ * status passed to this function is not used. No fields can be
+ * changed through this call.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param port_status
+ * Port status to set, most like returned by cvmx_usb_get_status()
+ */
+void cvmx_usb_set_status(cvmx_usb_state_t *state, cvmx_usb_port_status_t port_status)
+{
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+ usb->port_status = port_status;
+ CVMX_USB_RETURN_NOTHING();
+}
+
+
+/**
+ * @INTERNAL
+ * Convert a USB transaction into a handle
+ *
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ * @param transaction
+ * Transaction to get handle for
+ *
+ * @return Handle
+ */
+static inline int __cvmx_usb_get_submit_handle(cvmx_usb_internal_state_t *usb,
+ cvmx_usb_transaction_t *transaction)
+{
+ return ((unsigned long)transaction - (unsigned long)usb->transaction) /
+ sizeof(*transaction);
+}
+
+
+/**
+ * @INTERNAL
+ * Convert a USB pipe into a handle
+ *
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe Pipe to get handle for
+ *
+ * @return Handle
+ */
+static inline int __cvmx_usb_get_pipe_handle(cvmx_usb_internal_state_t *usb,
+ cvmx_usb_pipe_t *pipe)
+{
+ return ((unsigned long)pipe - (unsigned long)usb->pipe) / sizeof(*pipe);
+}
+
+
+/**
+ * Open a virtual pipe between the host and a USB device. A pipe
+ * must be opened before data can be transferred between a device
+ * and Octeon.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param flags Optional pipe flags defined in
+ * cvmx_usb_pipe_flags_t.
+ * @param device_addr
+ * USB device address to open the pipe to
+ * (0-127).
+ * @param endpoint_num
+ * USB endpoint number to open the pipe to
+ * (0-15).
+ * @param device_speed
+ * The speed of the device the pipe is going
+ * to. This must match the device's speed,
+ * which may be different than the port speed.
+ * @param max_packet The maximum packet length the device can
+ * transmit/receive (low speed=0-8, full
+ * speed=0-1023, high speed=0-1024). This value
+ * comes from the standard endpoint descriptor
+ * field wMaxPacketSize bits <10:0>.
+ * @param transfer_type
+ * The type of transfer this pipe is for.
+ * @param transfer_dir
+ * The direction the pipe is in. This is not
+ * used for control pipes.
+ * @param interval For ISOCHRONOUS and INTERRUPT transfers,
+ * this is how often the transfer is scheduled
+ * for. All other transfers should specify
+ * zero. The units are in frames (8000/sec at
+ * high speed, 1000/sec for full speed).
+ * @param multi_count
+ * For high speed devices, this is the maximum
+ * allowed number of packet per microframe.
+ * Specify zero for non high speed devices. This
+ * value comes from the standard endpoint descriptor
+ * field wMaxPacketSize bits <12:11>.
+ * @param hub_device_addr
+ * Hub device address this device is connected
+ * to. Devices connected directly to Octeon
+ * use zero. This is only used when the device
+ * is full/low speed behind a high speed hub.
+ * The address will be of the high speed hub,
+ * not and full speed hubs after it.
+ * @param hub_port Which port on the hub the device is
+ * connected. Use zero for devices connected
+ * directly to Octeon. Like hub_device_addr,
+ * this is only used for full/low speed
+ * devices behind a high speed hub.
+ *
+ * @return A non negative value is a pipe handle. Negative
+ * values are failure codes from cvmx_usb_status_t.
+ */
+int cvmx_usb_open_pipe(cvmx_usb_state_t *state, cvmx_usb_pipe_flags_t flags,
+ int device_addr, int endpoint_num,
+ cvmx_usb_speed_t device_speed, int max_packet,
+ cvmx_usb_transfer_t transfer_type,
+ cvmx_usb_direction_t transfer_dir, int interval,
+ int multi_count, int hub_device_addr, int hub_port)
+{
+ cvmx_usb_pipe_t *pipe;
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+ CVMX_USB_LOG_PARAM("0x%x", flags);
+ CVMX_USB_LOG_PARAM("%d", device_addr);
+ CVMX_USB_LOG_PARAM("%d", endpoint_num);
+ CVMX_USB_LOG_PARAM("%d", device_speed);
+ CVMX_USB_LOG_PARAM("%d", max_packet);
+ CVMX_USB_LOG_PARAM("%d", transfer_type);
+ CVMX_USB_LOG_PARAM("%d", transfer_dir);
+ CVMX_USB_LOG_PARAM("%d", interval);
+ CVMX_USB_LOG_PARAM("%d", multi_count);
+ CVMX_USB_LOG_PARAM("%d", hub_device_addr);
+ CVMX_USB_LOG_PARAM("%d", hub_port);
+
+ if (cvmx_unlikely((device_addr < 0) || (device_addr > MAX_USB_ADDRESS)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely((endpoint_num < 0) || (endpoint_num > MAX_USB_ENDPOINT)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(device_speed > CVMX_USB_SPEED_LOW))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely((max_packet <= 0) || (max_packet > 1024)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(transfer_type > CVMX_USB_TRANSFER_INTERRUPT))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely((transfer_dir != CVMX_USB_DIRECTION_OUT) &&
+ (transfer_dir != CVMX_USB_DIRECTION_IN)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(interval < 0))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely((transfer_type == CVMX_USB_TRANSFER_CONTROL) && interval))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(multi_count < 0))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely((device_speed != CVMX_USB_SPEED_HIGH) &&
+ (multi_count != 0)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely((hub_device_addr < 0) || (hub_device_addr > MAX_USB_ADDRESS)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely((hub_port < 0) || (hub_port > MAX_USB_HUB_PORT)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+
+ /* Find a free pipe */
+ pipe = usb->free_pipes.head;
+ if (!pipe)
+ CVMX_USB_RETURN(CVMX_USB_NO_MEMORY);
+ __cvmx_usb_remove_pipe(&usb->free_pipes, pipe);
+ pipe->flags = flags | __CVMX_USB_PIPE_FLAGS_OPEN;
+ if ((device_speed == CVMX_USB_SPEED_HIGH) &&
+ (transfer_dir == CVMX_USB_DIRECTION_OUT) &&
+ (transfer_type == CVMX_USB_TRANSFER_BULK))
+ pipe->flags |= __CVMX_USB_PIPE_FLAGS_NEED_PING;
+ pipe->device_addr = device_addr;
+ pipe->endpoint_num = endpoint_num;
+ pipe->device_speed = device_speed;
+ pipe->max_packet = max_packet;
+ pipe->transfer_type = transfer_type;
+ pipe->transfer_dir = transfer_dir;
+ /* All pipes use interval to rate limit NAK processing. Force an interval
+ if one wasn't supplied */
+ if (!interval)
+ interval = 1;
+ if (__cvmx_usb_pipe_needs_split(usb, pipe)) {
+ pipe->interval = interval*8;
+ /* Force start splits to be schedule on uFrame 0 */
+ pipe->next_tx_frame = ((usb->frame_number+7)&~7) + pipe->interval;
+ }
+ else {
+ pipe->interval = interval;
+ pipe->next_tx_frame = usb->frame_number + pipe->interval;
+ }
+ pipe->multi_count = multi_count;
+ pipe->hub_device_addr = hub_device_addr;
+ pipe->hub_port = hub_port;
+ pipe->pid_toggle = 0;
+ pipe->split_sc_frame = -1;
+ __cvmx_usb_append_pipe(&usb->idle_pipes, pipe);
+
+ /* We don't need to tell the hardware about this pipe yet since
+ it doesn't have any submitted requests */
+
+ CVMX_USB_RETURN(__cvmx_usb_get_pipe_handle(usb, pipe));
+}
+
+
+/**
+ * @INTERNAL
+ * Poll the RX FIFOs and remove data as needed. This function is only used
+ * in non DMA mode. It is very important that this function be called quickly
+ * enough to prevent FIFO overflow.
+ *
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ */
+static void __cvmx_usb_poll_rx_fifo(cvmx_usb_internal_state_t *usb)
+{
+ cvmx_usbcx_grxstsph_t rx_status;
+ int channel;
+ int bytes;
+ uint64_t address;
+ uint32_t *ptr;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", usb);
+
+ rx_status.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_GRXSTSPH(usb->index));
+ /* Only read data if IN data is there */
+ if (rx_status.s.pktsts != 2)
+ CVMX_USB_RETURN_NOTHING();
+ /* Check if no data is available */
+ if (!rx_status.s.bcnt)
+ CVMX_USB_RETURN_NOTHING();
+
+ channel = rx_status.s.chnum;
+ bytes = rx_status.s.bcnt;
+ if (!bytes)
+ CVMX_USB_RETURN_NOTHING();
+
+ /* Get where the DMA engine would have written this data */
+ address = __cvmx_usb_read_csr64(usb, CVMX_USBNX_DMA0_INB_CHN0(usb->index) + channel*8);
+ ptr = cvmx_phys_to_ptr(address);
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_DMA0_INB_CHN0(usb->index) + channel*8, address + bytes);
+
+ /* Loop writing the FIFO data for this packet into memory */
+ while (bytes > 0) {
+ *ptr++ = __cvmx_usb_read_csr32(usb, USB_FIFO_ADDRESS(channel, usb->index));
+ bytes -= 4;
+ }
+ CVMX_SYNCW;
+
+ CVMX_USB_RETURN_NOTHING();
+}
+
+
+/**
+ * Fill the TX hardware fifo with data out of the software
+ * fifos
+ *
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ * @param fifo Software fifo to use
+ * @param available Amount of space in the hardware fifo
+ *
+ * @return Non zero if the hardware fifo was too small and needs
+ * to be serviced again.
+ */
+static int __cvmx_usb_fill_tx_hw(cvmx_usb_internal_state_t *usb, cvmx_usb_tx_fifo_t *fifo, int available)
+{
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", usb);
+ CVMX_USB_LOG_PARAM("%p", fifo);
+ CVMX_USB_LOG_PARAM("%d", available);
+
+ /* We're done either when there isn't anymore space or the software FIFO
+ is empty */
+ while (available && (fifo->head != fifo->tail)) {
+ int i = fifo->tail;
+ const uint32_t *ptr = cvmx_phys_to_ptr(fifo->entry[i].address);
+ uint64_t csr_address = USB_FIFO_ADDRESS(fifo->entry[i].channel, usb->index) ^ 4;
+ int words = available;
+
+ /* Limit the amount of data to waht the SW fifo has */
+ if (fifo->entry[i].size <= available) {
+ words = fifo->entry[i].size;
+ fifo->tail++;
+ if (fifo->tail > MAX_CHANNELS)
+ fifo->tail = 0;
+ }
+
+ /* Update the next locations and counts */
+ available -= words;
+ fifo->entry[i].address += words * 4;
+ fifo->entry[i].size -= words;
+
+ /* Write the HW fifo data. The read every three writes is due
+ to an errata on CN3XXX chips */
+ while (words > 3) {
+ cvmx_write64_uint32(csr_address, *ptr++);
+ cvmx_write64_uint32(csr_address, *ptr++);
+ cvmx_write64_uint32(csr_address, *ptr++);
+ cvmx_read64_uint64(CVMX_USBNX_DMA0_INB_CHN0(usb->index));
+ words -= 3;
+ }
+ cvmx_write64_uint32(csr_address, *ptr++);
+ if (--words) {
+ cvmx_write64_uint32(csr_address, *ptr++);
+ if (--words)
+ cvmx_write64_uint32(csr_address, *ptr++);
+ }
+ cvmx_read64_uint64(CVMX_USBNX_DMA0_INB_CHN0(usb->index));
+ }
+ CVMX_USB_RETURN(fifo->head != fifo->tail);
+}
+
+
+/**
+ * Check the hardware FIFOs and fill them as needed
+ *
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ */
+static void __cvmx_usb_poll_tx_fifo(cvmx_usb_internal_state_t *usb)
+{
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", usb);
+
+ if (usb->periodic.head != usb->periodic.tail) {
+ cvmx_usbcx_hptxsts_t tx_status;
+ tx_status.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HPTXSTS(usb->index));
+ if (__cvmx_usb_fill_tx_hw(usb, &usb->periodic, tx_status.s.ptxfspcavail))
+ USB_SET_FIELD32(CVMX_USBCX_GINTMSK(usb->index), cvmx_usbcx_gintmsk_t, ptxfempmsk, 1);
+ else
+ USB_SET_FIELD32(CVMX_USBCX_GINTMSK(usb->index), cvmx_usbcx_gintmsk_t, ptxfempmsk, 0);
+ }
+
+ if (usb->nonperiodic.head != usb->nonperiodic.tail) {
+ cvmx_usbcx_gnptxsts_t tx_status;
+ tx_status.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_GNPTXSTS(usb->index));
+ if (__cvmx_usb_fill_tx_hw(usb, &usb->nonperiodic, tx_status.s.nptxfspcavail))
+ USB_SET_FIELD32(CVMX_USBCX_GINTMSK(usb->index), cvmx_usbcx_gintmsk_t, nptxfempmsk, 1);
+ else
+ USB_SET_FIELD32(CVMX_USBCX_GINTMSK(usb->index), cvmx_usbcx_gintmsk_t, nptxfempmsk, 0);
+ }
+
+ CVMX_USB_RETURN_NOTHING();
+}
+
+
+/**
+ * @INTERNAL
+ * Fill the TX FIFO with an outgoing packet
+ *
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ * @param channel Channel number to get packet from
+ */
+static void __cvmx_usb_fill_tx_fifo(cvmx_usb_internal_state_t *usb, int channel)
+{
+ cvmx_usbcx_hccharx_t hcchar;
+ cvmx_usbcx_hcspltx_t usbc_hcsplt;
+ cvmx_usbcx_hctsizx_t usbc_hctsiz;
+ cvmx_usb_tx_fifo_t *fifo;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", usb);
+ CVMX_USB_LOG_PARAM("%d", channel);
+
+ /* We only need to fill data on outbound channels */
+ hcchar.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HCCHARX(channel, usb->index));
+ if (hcchar.s.epdir != CVMX_USB_DIRECTION_OUT)
+ CVMX_USB_RETURN_NOTHING();
+
+ /* OUT Splits only have data on the start and not the complete */
+ usbc_hcsplt.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HCSPLTX(channel, usb->index));
+ if (usbc_hcsplt.s.spltena && usbc_hcsplt.s.compsplt)
+ CVMX_USB_RETURN_NOTHING();
+
+ /* Find out how many bytes we need to fill and convert it into 32bit words */
+ usbc_hctsiz.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HCTSIZX(channel, usb->index));
+ if (!usbc_hctsiz.s.xfersize)
+ CVMX_USB_RETURN_NOTHING();
+
+ if ((hcchar.s.eptype == CVMX_USB_TRANSFER_INTERRUPT) ||
+ (hcchar.s.eptype == CVMX_USB_TRANSFER_ISOCHRONOUS))
+ fifo = &usb->periodic;
+ else
+ fifo = &usb->nonperiodic;
+
+ fifo->entry[fifo->head].channel = channel;
+ fifo->entry[fifo->head].address = __cvmx_usb_read_csr64(usb, CVMX_USBNX_DMA0_OUTB_CHN0(usb->index) + channel*8);
+ fifo->entry[fifo->head].size = (usbc_hctsiz.s.xfersize+3)>>2;
+ fifo->head++;
+ if (fifo->head > MAX_CHANNELS)
+ fifo->head = 0;
+
+ __cvmx_usb_poll_tx_fifo(usb);
+
+ CVMX_USB_RETURN_NOTHING();
+}
+
+/**
+ * @INTERNAL
+ * Perform channel specific setup for Control transactions. All
+ * the generic stuff will already have been done in
+ * __cvmx_usb_start_channel()
+ *
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ * @param channel Channel to setup
+ * @param pipe Pipe for control transaction
+ */
+static void __cvmx_usb_start_channel_control(cvmx_usb_internal_state_t *usb,
+ int channel,
+ cvmx_usb_pipe_t *pipe)
+{
+ cvmx_usb_transaction_t *transaction = pipe->head;
+ cvmx_usb_control_header_t *header = cvmx_phys_to_ptr(transaction->control_header);
+ int bytes_to_transfer = transaction->buffer_length - transaction->actual_bytes;
+ int packets_to_transfer;
+ cvmx_usbcx_hctsizx_t usbc_hctsiz;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", usb);
+ CVMX_USB_LOG_PARAM("%d", channel);
+ CVMX_USB_LOG_PARAM("%p", pipe);
+
+ usbc_hctsiz.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HCTSIZX(channel, usb->index));
+
+ switch (transaction->stage) {
+ case CVMX_USB_STAGE_NON_CONTROL:
+ case CVMX_USB_STAGE_NON_CONTROL_SPLIT_COMPLETE:
+ cvmx_dprintf("%s: ERROR - Non control stage\n", __FUNCTION__);
+ break;
+ case CVMX_USB_STAGE_SETUP:
+ usbc_hctsiz.s.pid = 3; /* Setup */
+ bytes_to_transfer = sizeof(*header);
+ /* All Control operations start with a setup going OUT */
+ USB_SET_FIELD32(CVMX_USBCX_HCCHARX(channel, usb->index), cvmx_usbcx_hccharx_t, epdir, CVMX_USB_DIRECTION_OUT);
+ /* Setup send the control header instead of the buffer data. The
+ buffer data will be used in the next stage */
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_DMA0_OUTB_CHN0(usb->index) + channel*8, transaction->control_header);
+ break;
+ case CVMX_USB_STAGE_SETUP_SPLIT_COMPLETE:
+ usbc_hctsiz.s.pid = 3; /* Setup */
+ bytes_to_transfer = 0;
+ /* All Control operations start with a setup going OUT */
+ USB_SET_FIELD32(CVMX_USBCX_HCCHARX(channel, usb->index), cvmx_usbcx_hccharx_t, epdir, CVMX_USB_DIRECTION_OUT);
+ USB_SET_FIELD32(CVMX_USBCX_HCSPLTX(channel, usb->index), cvmx_usbcx_hcspltx_t, compsplt, 1);
+ break;
+ case CVMX_USB_STAGE_DATA:
+ usbc_hctsiz.s.pid = __cvmx_usb_get_data_pid(pipe);
+ if (__cvmx_usb_pipe_needs_split(usb, pipe)) {
+ if (header->s.request_type & 0x80)
+ bytes_to_transfer = 0;
+ else if (bytes_to_transfer > pipe->max_packet)
+ bytes_to_transfer = pipe->max_packet;
+ }
+ USB_SET_FIELD32(CVMX_USBCX_HCCHARX(channel, usb->index),
+ cvmx_usbcx_hccharx_t, epdir,
+ ((header->s.request_type & 0x80) ?
+ CVMX_USB_DIRECTION_IN :
+ CVMX_USB_DIRECTION_OUT));
+ break;
+ case CVMX_USB_STAGE_DATA_SPLIT_COMPLETE:
+ usbc_hctsiz.s.pid = __cvmx_usb_get_data_pid(pipe);
+ if (!(header->s.request_type & 0x80))
+ bytes_to_transfer = 0;
+ USB_SET_FIELD32(CVMX_USBCX_HCCHARX(channel, usb->index),
+ cvmx_usbcx_hccharx_t, epdir,
+ ((header->s.request_type & 0x80) ?
+ CVMX_USB_DIRECTION_IN :
+ CVMX_USB_DIRECTION_OUT));
+ USB_SET_FIELD32(CVMX_USBCX_HCSPLTX(channel, usb->index), cvmx_usbcx_hcspltx_t, compsplt, 1);
+ break;
+ case CVMX_USB_STAGE_STATUS:
+ usbc_hctsiz.s.pid = __cvmx_usb_get_data_pid(pipe);
+ bytes_to_transfer = 0;
+ USB_SET_FIELD32(CVMX_USBCX_HCCHARX(channel, usb->index), cvmx_usbcx_hccharx_t, epdir,
+ ((header->s.request_type & 0x80) ?
+ CVMX_USB_DIRECTION_OUT :
+ CVMX_USB_DIRECTION_IN));
+ break;
+ case CVMX_USB_STAGE_STATUS_SPLIT_COMPLETE:
+ usbc_hctsiz.s.pid = __cvmx_usb_get_data_pid(pipe);
+ bytes_to_transfer = 0;
+ USB_SET_FIELD32(CVMX_USBCX_HCCHARX(channel, usb->index), cvmx_usbcx_hccharx_t, epdir,
+ ((header->s.request_type & 0x80) ?
+ CVMX_USB_DIRECTION_OUT :
+ CVMX_USB_DIRECTION_IN));
+ USB_SET_FIELD32(CVMX_USBCX_HCSPLTX(channel, usb->index), cvmx_usbcx_hcspltx_t, compsplt, 1);
+ break;
+ }
+
+ /* Make sure the transfer never exceeds the byte limit of the hardware.
+ Further bytes will be sent as continued transactions */
+ if (bytes_to_transfer > MAX_TRANSFER_BYTES) {
+ /* Round MAX_TRANSFER_BYTES to a multiple of out packet size */
+ bytes_to_transfer = MAX_TRANSFER_BYTES / pipe->max_packet;
+ bytes_to_transfer *= pipe->max_packet;
+ }
+
+ /* Calculate the number of packets to transfer. If the length is zero
+ we still need to transfer one packet */
+ packets_to_transfer = (bytes_to_transfer + pipe->max_packet - 1) / pipe->max_packet;
+ if (packets_to_transfer == 0)
+ packets_to_transfer = 1;
+ else if ((packets_to_transfer>1) && (usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_NO_DMA)) {
+ /* Limit to one packet when not using DMA. Channels must be restarted
+ between every packet for IN transactions, so there is no reason to
+ do multiple packets in a row */
+ packets_to_transfer = 1;
+ bytes_to_transfer = packets_to_transfer * pipe->max_packet;
+ }
+ else if (packets_to_transfer > MAX_TRANSFER_PACKETS) {
+ /* Limit the number of packet and data transferred to what the
+ hardware can handle */
+ packets_to_transfer = MAX_TRANSFER_PACKETS;
+ bytes_to_transfer = packets_to_transfer * pipe->max_packet;
+ }
+
+ usbc_hctsiz.s.xfersize = bytes_to_transfer;
+ usbc_hctsiz.s.pktcnt = packets_to_transfer;
+
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HCTSIZX(channel, usb->index), usbc_hctsiz.u32);
+ CVMX_USB_RETURN_NOTHING();
+}
+
+
+/**
+ * @INTERNAL
+ * Start a channel to perform the pipe's head transaction
+ *
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ * @param channel Channel to setup
+ * @param pipe Pipe to start
+ */
+static void __cvmx_usb_start_channel(cvmx_usb_internal_state_t *usb,
+ int channel,
+ cvmx_usb_pipe_t *pipe)
+{
+ cvmx_usb_transaction_t *transaction = pipe->head;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", usb);
+ CVMX_USB_LOG_PARAM("%d", channel);
+ CVMX_USB_LOG_PARAM("%p", pipe);
+
+ if (cvmx_unlikely((usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_TRANSFERS) ||
+ (pipe->flags & CVMX_USB_PIPE_FLAGS_DEBUG_TRANSFERS)))
+ cvmx_dprintf("%s: Channel %d started. Pipe %d transaction %d stage %d\n",
+ __FUNCTION__, channel, __cvmx_usb_get_pipe_handle(usb, pipe),
+ __cvmx_usb_get_submit_handle(usb, transaction),
+ transaction->stage);
+
+ /* Make sure all writes to the DMA region get flushed */
+ CVMX_SYNCW;
+
+ /* Attach the channel to the pipe */
+ usb->pipe_for_channel[channel] = pipe;
+ pipe->channel = channel;
+ pipe->flags |= __CVMX_USB_PIPE_FLAGS_SCHEDULED;
+
+ /* Mark this channel as in use */
+ usb->idle_hardware_channels &= ~(1<<channel);
+
+ /* Enable the channel interrupt bits */
+ {
+ cvmx_usbcx_hcintx_t usbc_hcint;
+ cvmx_usbcx_hcintmskx_t usbc_hcintmsk;
+ cvmx_usbcx_haintmsk_t usbc_haintmsk;
+
+ /* Clear all channel status bits */
+ usbc_hcint.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HCINTX(channel, usb->index));
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HCINTX(channel, usb->index), usbc_hcint.u32);
+
+ usbc_hcintmsk.u32 = 0;
+ usbc_hcintmsk.s.chhltdmsk = 1;
+ if (usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_NO_DMA) {
+ /* Channels need these extra interrupts when we aren't in DMA mode */
+ usbc_hcintmsk.s.datatglerrmsk = 1;
+ usbc_hcintmsk.s.frmovrunmsk = 1;
+ usbc_hcintmsk.s.bblerrmsk = 1;
+ usbc_hcintmsk.s.xacterrmsk = 1;
+ if (__cvmx_usb_pipe_needs_split(usb, pipe)) {
+ /* Splits don't generate xfercompl, so we need ACK and NYET */
+ usbc_hcintmsk.s.nyetmsk = 1;
+ usbc_hcintmsk.s.ackmsk = 1;
+ }
+ usbc_hcintmsk.s.nakmsk = 1;
+ usbc_hcintmsk.s.stallmsk = 1;
+ usbc_hcintmsk.s.xfercomplmsk = 1;
+ }
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HCINTMSKX(channel, usb->index), usbc_hcintmsk.u32);
+
+ /* Enable the channel interrupt to propagate */
+ usbc_haintmsk.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HAINTMSK(usb->index));
+ usbc_haintmsk.s.haintmsk |= 1<<channel;
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HAINTMSK(usb->index), usbc_haintmsk.u32);
+ }
+
+ /* Setup the locations the DMA engines use */
+ {
+ uint64_t dma_address = transaction->buffer + transaction->actual_bytes;
+ if (transaction->type == CVMX_USB_TRANSFER_ISOCHRONOUS)
+ dma_address = transaction->buffer + transaction->iso_packets[0].offset + transaction->actual_bytes;
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_DMA0_OUTB_CHN0(usb->index) + channel*8, dma_address);
+ __cvmx_usb_write_csr64(usb, CVMX_USBNX_DMA0_INB_CHN0(usb->index) + channel*8, dma_address);
+ }
+
+ /* Setup both the size of the transfer and the SPLIT characteristics */
+ {
+ cvmx_usbcx_hcspltx_t usbc_hcsplt = {.u32 = 0};
+ cvmx_usbcx_hctsizx_t usbc_hctsiz = {.u32 = 0};
+ int packets_to_transfer;
+ int bytes_to_transfer = transaction->buffer_length - transaction->actual_bytes;
+
+ /* ISOCHRONOUS transactions store each individual transfer size in the
+ packet structure, not the global buffer_length */
+ if (transaction->type == CVMX_USB_TRANSFER_ISOCHRONOUS)
+ bytes_to_transfer = transaction->iso_packets[0].length - transaction->actual_bytes;
+
+ /* We need to do split transactions when we are talking to non high
+ speed devices that are behind a high speed hub */
+ if (__cvmx_usb_pipe_needs_split(usb, pipe)) {
+ /* On the start split phase (stage is even) record the frame number we
+ will need to send the split complete. We only store the lower two bits
+ since the time ahead can only be two frames */
+ if ((transaction->stage&1) == 0) {
+ if (transaction->type == CVMX_USB_TRANSFER_BULK)
+ pipe->split_sc_frame = (usb->frame_number + 1) & 0x7f;
+ else
+ pipe->split_sc_frame = (usb->frame_number + 2) & 0x7f;
+ }
+ else
+ pipe->split_sc_frame = -1;
+
+ usbc_hcsplt.s.spltena = 1;
+ usbc_hcsplt.s.hubaddr = pipe->hub_device_addr;
+ usbc_hcsplt.s.prtaddr = pipe->hub_port;
+ usbc_hcsplt.s.compsplt = (transaction->stage == CVMX_USB_STAGE_NON_CONTROL_SPLIT_COMPLETE);
+
+ /* SPLIT transactions can only ever transmit one data packet so
+ limit the transfer size to the max packet size */
+ if (bytes_to_transfer > pipe->max_packet)
+ bytes_to_transfer = pipe->max_packet;
+
+ /* ISOCHRONOUS OUT splits are unique in that they limit
+ data transfers to 188 byte chunks representing the
+ begin/middle/end of the data or all */
+ if (!usbc_hcsplt.s.compsplt &&
+ (pipe->transfer_dir == CVMX_USB_DIRECTION_OUT) &&
+ (pipe->transfer_type == CVMX_USB_TRANSFER_ISOCHRONOUS)) {
+ /* Clear the split complete frame number as there isn't going
+ to be a split complete */
+ pipe->split_sc_frame = -1;
+ /* See if we've started this transfer and sent data */
+ if (transaction->actual_bytes == 0) {
+ /* Nothing sent yet, this is either a begin or the
+ entire payload */
+ if (bytes_to_transfer <= 188)
+ usbc_hcsplt.s.xactpos = 3; /* Entire payload in one go */
+ else
+ usbc_hcsplt.s.xactpos = 2; /* First part of payload */
+ }
+ else {
+ /* Continuing the previous data, we must either be
+ in the middle or at the end */
+ if (bytes_to_transfer <= 188)
+ usbc_hcsplt.s.xactpos = 1; /* End of payload */
+ else
+ usbc_hcsplt.s.xactpos = 0; /* Middle of payload */
+ }
+ /* Again, the transfer size is limited to 188 bytes */
+ if (bytes_to_transfer > 188)
+ bytes_to_transfer = 188;
+ }
+ }
+
+ /* Make sure the transfer never exceeds the byte limit of the hardware.
+ Further bytes will be sent as continued transactions */
+ if (bytes_to_transfer > MAX_TRANSFER_BYTES) {
+ /* Round MAX_TRANSFER_BYTES to a multiple of out packet size */
+ bytes_to_transfer = MAX_TRANSFER_BYTES / pipe->max_packet;
+ bytes_to_transfer *= pipe->max_packet;
+ }
+
+ /* Calculate the number of packets to transfer. If the length is zero
+ we still need to transfer one packet */
+ packets_to_transfer = (bytes_to_transfer + pipe->max_packet - 1) / pipe->max_packet;
+ if (packets_to_transfer == 0)
+ packets_to_transfer = 1;
+ else if ((packets_to_transfer>1) && (usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_NO_DMA)) {
+ /* Limit to one packet when not using DMA. Channels must be restarted
+ between every packet for IN transactions, so there is no reason to
+ do multiple packets in a row */
+ packets_to_transfer = 1;
+ bytes_to_transfer = packets_to_transfer * pipe->max_packet;
+ }
+ else if (packets_to_transfer > MAX_TRANSFER_PACKETS) {
+ /* Limit the number of packet and data transferred to what the
+ hardware can handle */
+ packets_to_transfer = MAX_TRANSFER_PACKETS;
+ bytes_to_transfer = packets_to_transfer * pipe->max_packet;
+ }
+
+ usbc_hctsiz.s.xfersize = bytes_to_transfer;
+ usbc_hctsiz.s.pktcnt = packets_to_transfer;
+
+ /* Update the DATA0/DATA1 toggle */
+ usbc_hctsiz.s.pid = __cvmx_usb_get_data_pid(pipe);
+ /* High speed pipes may need a hardware ping before they start */
+ if (pipe->flags & __CVMX_USB_PIPE_FLAGS_NEED_PING)
+ usbc_hctsiz.s.dopng = 1;
+
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HCSPLTX(channel, usb->index), usbc_hcsplt.u32);
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HCTSIZX(channel, usb->index), usbc_hctsiz.u32);
+ }
+
+ /* Setup the Host Channel Characteristics Register */
+ {
+ cvmx_usbcx_hccharx_t usbc_hcchar = {.u32 = 0};
+
+ /* Set the startframe odd/even properly. This is only used for periodic */
+ usbc_hcchar.s.oddfrm = usb->frame_number&1;
+
+ /* Set the number of back to back packets allowed by this endpoint.
+ Split transactions interpret "ec" as the number of immediate
+ retries of failure. These retries happen too quickly, so we
+ disable these entirely for splits */
+ if (__cvmx_usb_pipe_needs_split(usb, pipe))
+ usbc_hcchar.s.ec = 1;
+ else if (pipe->multi_count < 1)
+ usbc_hcchar.s.ec = 1;
+ else if (pipe->multi_count > 3)
+ usbc_hcchar.s.ec = 3;
+ else
+ usbc_hcchar.s.ec = pipe->multi_count;
+
+ /* Set the rest of the endpoint specific settings */
+ usbc_hcchar.s.devaddr = pipe->device_addr;
+ usbc_hcchar.s.eptype = transaction->type;
+ usbc_hcchar.s.lspddev = (pipe->device_speed == CVMX_USB_SPEED_LOW);
+ usbc_hcchar.s.epdir = pipe->transfer_dir;
+ usbc_hcchar.s.epnum = pipe->endpoint_num;
+ usbc_hcchar.s.mps = pipe->max_packet;
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HCCHARX(channel, usb->index), usbc_hcchar.u32);
+ }
+
+ /* Do transaction type specific fixups as needed */
+ switch (transaction->type) {
+ case CVMX_USB_TRANSFER_CONTROL:
+ __cvmx_usb_start_channel_control(usb, channel, pipe);
+ break;
+ case CVMX_USB_TRANSFER_BULK:
+ case CVMX_USB_TRANSFER_INTERRUPT:
+ break;
+ case CVMX_USB_TRANSFER_ISOCHRONOUS:
+ if (!__cvmx_usb_pipe_needs_split(usb, pipe)) {
+ /* ISO transactions require different PIDs depending on direction
+ and how many packets are needed */
+ if (pipe->transfer_dir == CVMX_USB_DIRECTION_OUT) {
+ if (pipe->multi_count < 2) /* Need DATA0 */
+ USB_SET_FIELD32(CVMX_USBCX_HCTSIZX(channel, usb->index), cvmx_usbcx_hctsizx_t, pid, 0);
+ else /* Need MDATA */
+ USB_SET_FIELD32(CVMX_USBCX_HCTSIZX(channel, usb->index), cvmx_usbcx_hctsizx_t, pid, 3);
+ }
+ }
+ break;
+ }
+ {
+ cvmx_usbcx_hctsizx_t usbc_hctsiz = {.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HCTSIZX(channel, usb->index))};
+ transaction->xfersize = usbc_hctsiz.s.xfersize;
+ transaction->pktcnt = usbc_hctsiz.s.pktcnt;
+ }
+ /* Remeber when we start a split transaction */
+ if (__cvmx_usb_pipe_needs_split(usb, pipe))
+ usb->active_split = transaction;
+ USB_SET_FIELD32(CVMX_USBCX_HCCHARX(channel, usb->index), cvmx_usbcx_hccharx_t, chena, 1);
+ if (usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_NO_DMA)
+ __cvmx_usb_fill_tx_fifo(usb, channel);
+ CVMX_USB_RETURN_NOTHING();
+}
+
+
+/**
+ * @INTERNAL
+ * Find a pipe that is ready to be scheduled to hardware.
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ * @param list Pipe list to search
+ * @param current_frame
+ * Frame counter to use as a time reference.
+ *
+ * @return Pipe or NULL if none are ready
+ */
+static cvmx_usb_pipe_t *__cvmx_usb_find_ready_pipe(cvmx_usb_internal_state_t *usb, cvmx_usb_pipe_list_t *list, uint64_t current_frame)
+{
+ cvmx_usb_pipe_t *pipe = list->head;
+ while (pipe) {
+ if (!(pipe->flags & __CVMX_USB_PIPE_FLAGS_SCHEDULED) && pipe->head &&
+ (pipe->next_tx_frame <= current_frame) &&
+ ((pipe->split_sc_frame == -1) || ((((int)current_frame - (int)pipe->split_sc_frame) & 0x7f) < 0x40)) &&
+ (!usb->active_split || (usb->active_split == pipe->head))) {
+ CVMX_PREFETCH(pipe, 128);
+ CVMX_PREFETCH(pipe->head, 0);
+ return pipe;
+ }
+ pipe = pipe->next;
+ }
+ return NULL;
+}
+
+
+/**
+ * @INTERNAL
+ * Called whenever a pipe might need to be scheduled to the
+ * hardware.
+ *
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ * @param is_sof True if this schedule was called on a SOF interrupt.
+ */
+static void __cvmx_usb_schedule(cvmx_usb_internal_state_t *usb, int is_sof)
+{
+ int channel;
+ cvmx_usb_pipe_t *pipe;
+ int need_sof;
+ cvmx_usb_transfer_t ttype;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", usb);
+
+ if (usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_NO_DMA) {
+ /* Without DMA we need to be careful to not schedule something at the end of a frame and cause an overrun */
+ cvmx_usbcx_hfnum_t hfnum = {.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HFNUM(usb->index))};
+ cvmx_usbcx_hfir_t hfir = {.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HFIR(usb->index))};
+ if (hfnum.s.frrem < hfir.s.frint/4)
+ goto done;
+ }
+
+ while (usb->idle_hardware_channels) {
+ /* Find an idle channel */
+ CVMX_CLZ(channel, usb->idle_hardware_channels);
+ channel = 31 - channel;
+ if (cvmx_unlikely(channel > 7)) {
+ if (cvmx_unlikely(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_INFO))
+ cvmx_dprintf("%s: Idle hardware channels has a channel higher than 7. This is wrong\n", __FUNCTION__);
+ break;
+ }
+
+ /* Find a pipe needing service */
+ pipe = NULL;
+ if (is_sof) {
+ /* Only process periodic pipes on SOF interrupts. This way we are
+ sure that the periodic data is sent in the beginning of the
+ frame */
+ pipe = __cvmx_usb_find_ready_pipe(usb, usb->active_pipes + CVMX_USB_TRANSFER_ISOCHRONOUS, usb->frame_number);
+ if (cvmx_likely(!pipe))
+ pipe = __cvmx_usb_find_ready_pipe(usb, usb->active_pipes + CVMX_USB_TRANSFER_INTERRUPT, usb->frame_number);
+ }
+ if (cvmx_likely(!pipe)) {
+ pipe = __cvmx_usb_find_ready_pipe(usb, usb->active_pipes + CVMX_USB_TRANSFER_CONTROL, usb->frame_number);
+ if (cvmx_likely(!pipe))
+ pipe = __cvmx_usb_find_ready_pipe(usb, usb->active_pipes + CVMX_USB_TRANSFER_BULK, usb->frame_number);
+ }
+ if (!pipe)
+ break;
+
+ CVMX_USB_LOG_PARAM("%d", channel);
+ CVMX_USB_LOG_PARAM("%p", pipe);
+
+ if (cvmx_unlikely((usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_TRANSFERS) ||
+ (pipe->flags & CVMX_USB_PIPE_FLAGS_DEBUG_TRANSFERS))) {
+ cvmx_usb_transaction_t *transaction = pipe->head;
+ const cvmx_usb_control_header_t *header = (transaction->control_header) ? cvmx_phys_to_ptr(transaction->control_header) : NULL;
+ const char *dir = (pipe->transfer_dir == CVMX_USB_DIRECTION_IN) ? "IN" : "OUT";
+ const char *type;
+ switch (pipe->transfer_type) {
+ case CVMX_USB_TRANSFER_CONTROL:
+ type = "SETUP";
+ dir = (header->s.request_type & 0x80) ? "IN" : "OUT";
+ break;
+ case CVMX_USB_TRANSFER_ISOCHRONOUS:
+ type = "ISOCHRONOUS";
+ break;
+ case CVMX_USB_TRANSFER_BULK:
+ type = "BULK";
+ break;
+ default: /* CVMX_USB_TRANSFER_INTERRUPT */
+ type = "INTERRUPT";
+ break;
+ }
+ cvmx_dprintf("%s: Starting pipe %d, transaction %d on channel %d. %s %s len=%d header=0x%llx\n",
+ __FUNCTION__, __cvmx_usb_get_pipe_handle(usb, pipe),
+ __cvmx_usb_get_submit_handle(usb, transaction),
+ channel, type, dir,
+ transaction->buffer_length,
+ (header) ? (unsigned long long)header->u64 : 0ull);
+ }
+ __cvmx_usb_start_channel(usb, channel, pipe);
+ }
+
+done:
+ /* Only enable SOF interrupts when we have transactions pending in the
+ future that might need to be scheduled */
+ need_sof = 0;
+ for (ttype=CVMX_USB_TRANSFER_CONTROL; ttype<=CVMX_USB_TRANSFER_INTERRUPT; ttype++) {
+ pipe = usb->active_pipes[ttype].head;
+ while (pipe) {
+ if (pipe->next_tx_frame > usb->frame_number) {
+ need_sof = 1;
+ break;
+ }
+ pipe=pipe->next;
+ }
+ }
+ USB_SET_FIELD32(CVMX_USBCX_GINTMSK(usb->index), cvmx_usbcx_gintmsk_t, sofmsk, need_sof);
+ CVMX_USB_RETURN_NOTHING();
+}
+
+
+/**
+ * @INTERNAL
+ * Call a user's callback for a specific reason.
+ *
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe Pipe the callback is for or NULL
+ * @param transaction
+ * Transaction the callback is for or NULL
+ * @param reason Reason this callback is being called
+ * @param complete_code
+ * Completion code for the transaction, if any
+ */
+static void __cvmx_usb_perform_callback(cvmx_usb_internal_state_t *usb,
+ cvmx_usb_pipe_t *pipe,
+ cvmx_usb_transaction_t *transaction,
+ cvmx_usb_callback_t reason,
+ cvmx_usb_complete_t complete_code)
+{
+ cvmx_usb_callback_func_t callback = usb->callback[reason];
+ void *user_data = usb->callback_data[reason];
+ int submit_handle = -1;
+ int pipe_handle = -1;
+ int bytes_transferred = 0;
+
+ if (pipe)
+ pipe_handle = __cvmx_usb_get_pipe_handle(usb, pipe);
+
+ if (transaction) {
+ submit_handle = __cvmx_usb_get_submit_handle(usb, transaction);
+ bytes_transferred = transaction->actual_bytes;
+ /* Transactions are allowed to override the default callback */
+ if ((reason == CVMX_USB_CALLBACK_TRANSFER_COMPLETE) && transaction->callback) {
+ callback = transaction->callback;
+ user_data = transaction->callback_data;
+ }
+ }
+
+ if (!callback)
+ return;
+
+ if (cvmx_unlikely(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_CALLBACKS))
+ cvmx_dprintf("%*s%s: calling callback %p(usb=%p, complete_code=%s, "
+ "pipe_handle=%d, submit_handle=%d, bytes_transferred=%d, user_data=%p);\n",
+ 2*usb->indent, "", __FUNCTION__, callback, usb,
+ __cvmx_usb_complete_to_string(complete_code),
+ pipe_handle, submit_handle, bytes_transferred, user_data);
+
+ callback((cvmx_usb_state_t *)usb, reason, complete_code, pipe_handle, submit_handle,
+ bytes_transferred, user_data);
+
+ if (cvmx_unlikely(usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_CALLBACKS))
+ cvmx_dprintf("%*s%s: callback %p complete\n", 2*usb->indent, "",
+ __FUNCTION__, callback);
+}
+
+
+/**
+ * @INTERNAL
+ * Signal the completion of a transaction and free it. The
+ * transaction will be removed from the pipe transaction list.
+ *
+ * @param usb USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe Pipe the transaction is on
+ * @param transaction
+ * Transaction that completed
+ * @param complete_code
+ * Completion code
+ */
+static void __cvmx_usb_perform_complete(cvmx_usb_internal_state_t * usb,
+ cvmx_usb_pipe_t *pipe,
+ cvmx_usb_transaction_t *transaction,
+ cvmx_usb_complete_t complete_code)
+{
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", usb);
+ CVMX_USB_LOG_PARAM("%p", pipe);
+ CVMX_USB_LOG_PARAM("%p", transaction);
+ CVMX_USB_LOG_PARAM("%d", complete_code);
+
+ /* If this was a split then clear our split in progress marker */
+ if (usb->active_split == transaction)
+ usb->active_split = NULL;
+
+ /* Isochronous transactions need extra processing as they might not be done
+ after a single data transfer */
+ if (cvmx_unlikely(transaction->type == CVMX_USB_TRANSFER_ISOCHRONOUS)) {
+ /* Update the number of bytes transferred in this ISO packet */
+ transaction->iso_packets[0].length = transaction->actual_bytes;
+ transaction->iso_packets[0].status = complete_code;
+
+ /* If there are more ISOs pending and we succeeded, schedule the next
+ one */
+ if ((transaction->iso_number_packets > 1) && (complete_code == CVMX_USB_COMPLETE_SUCCESS)) {
+ transaction->actual_bytes = 0; /* No bytes transferred for this packet as of yet */
+ transaction->iso_number_packets--; /* One less ISO waiting to transfer */
+ transaction->iso_packets++; /* Increment to the next location in our packet array */
+ transaction->stage = CVMX_USB_STAGE_NON_CONTROL;
+ goto done;
+ }
+ }
+
+ /* Remove the transaction from the pipe list */
+ if (transaction->next)
+ transaction->next->prev = transaction->prev;
+ else
+ pipe->tail = transaction->prev;
+ if (transaction->prev)
+ transaction->prev->next = transaction->next;
+ else
+ pipe->head = transaction->next;
+ if (!pipe->head) {
+ __cvmx_usb_remove_pipe(usb->active_pipes + pipe->transfer_type, pipe);
+ __cvmx_usb_append_pipe(&usb->idle_pipes, pipe);
+
+ }
+ __cvmx_usb_perform_callback(usb, pipe, transaction,
+ CVMX_USB_CALLBACK_TRANSFER_COMPLETE,
+ complete_code);
+ __cvmx_usb_free_transaction(usb, transaction);
+done:
+ CVMX_USB_RETURN_NOTHING();
+}
+
+
+/**
+ * @INTERNAL
+ * Submit a usb transaction to a pipe. Called for all types
+ * of transactions.
+ *
+ * @param usb
+ * @param pipe_handle
+ * Which pipe to submit to. Will be validated in this function.
+ * @param type Transaction type
+ * @param flags Flags for the transaction
+ * @param buffer User buffer for the transaction
+ * @param buffer_length
+ * User buffer's length in bytes
+ * @param control_header
+ * For control transactions, the 8 byte standard header
+ * @param iso_start_frame
+ * For ISO transactions, the start frame
+ * @param iso_number_packets
+ * For ISO, the number of packet in the transaction.
+ * @param iso_packets
+ * A description of each ISO packet
+ * @param callback User callback to call when the transaction completes
+ * @param user_data User's data for the callback
+ *
+ * @return Submit handle or negative on failure. Matches the result
+ * in the external API.
+ */
+static int __cvmx_usb_submit_transaction(cvmx_usb_internal_state_t *usb,
+ int pipe_handle,
+ cvmx_usb_transfer_t type,
+ int flags,
+ uint64_t buffer,
+ int buffer_length,
+ uint64_t control_header,
+ int iso_start_frame,
+ int iso_number_packets,
+ cvmx_usb_iso_packet_t *iso_packets,
+ cvmx_usb_callback_func_t callback,
+ void *user_data)
+{
+ int submit_handle;
+ cvmx_usb_transaction_t *transaction;
+ cvmx_usb_pipe_t *pipe = usb->pipe + pipe_handle;
+
+ CVMX_USB_LOG_CALLED();
+ if (cvmx_unlikely((pipe_handle < 0) || (pipe_handle >= MAX_PIPES)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ /* Fail if the pipe isn't open */
+ if (cvmx_unlikely((pipe->flags & __CVMX_USB_PIPE_FLAGS_OPEN) == 0))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(pipe->transfer_type != type))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+
+ transaction = __cvmx_usb_alloc_transaction(usb);
+ if (cvmx_unlikely(!transaction))
+ CVMX_USB_RETURN(CVMX_USB_NO_MEMORY);
+
+ transaction->type = type;
+ transaction->flags |= flags;
+ transaction->buffer = buffer;
+ transaction->buffer_length = buffer_length;
+ transaction->control_header = control_header;
+ transaction->iso_start_frame = iso_start_frame; // FIXME: This is not used, implement it
+ transaction->iso_number_packets = iso_number_packets;
+ transaction->iso_packets = iso_packets;
+ transaction->callback = callback;
+ transaction->callback_data = user_data;
+ if (transaction->type == CVMX_USB_TRANSFER_CONTROL)
+ transaction->stage = CVMX_USB_STAGE_SETUP;
+ else
+ transaction->stage = CVMX_USB_STAGE_NON_CONTROL;
+
+ transaction->next = NULL;
+ if (pipe->tail) {
+ transaction->prev = pipe->tail;
+ transaction->prev->next = transaction;
+ }
+ else {
+ if (pipe->next_tx_frame < usb->frame_number)
+ pipe->next_tx_frame = usb->frame_number + pipe->interval -
+ (usb->frame_number - pipe->next_tx_frame) % pipe->interval;
+ transaction->prev = NULL;
+ pipe->head = transaction;
+ __cvmx_usb_remove_pipe(&usb->idle_pipes, pipe);
+ __cvmx_usb_append_pipe(usb->active_pipes + pipe->transfer_type, pipe);
+ }
+ pipe->tail = transaction;
+
+ submit_handle = __cvmx_usb_get_submit_handle(usb, transaction);
+
+ /* We may need to schedule the pipe if this was the head of the pipe */
+ if (!transaction->prev)
+ __cvmx_usb_schedule(usb, 0);
+
+ CVMX_USB_RETURN(submit_handle);
+}
+
+
+/**
+ * Call to submit a USB Bulk transfer to a pipe.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Handle to the pipe for the transfer.
+ * @param buffer Physical address of the data buffer in
+ * memory. Note that this is NOT A POINTER, but
+ * the full 64bit physical address of the
+ * buffer. This may be zero if buffer_length is
+ * zero.
+ * @param buffer_length
+ * Length of buffer in bytes.
+ * @param callback Function to call when this transaction
+ * completes. If the return value of this
+ * function isn't an error, then this function
+ * is guaranteed to be called when the
+ * transaction completes. If this parameter is
+ * NULL, then the generic callback registered
+ * through cvmx_usb_register_callback is
+ * called. If both are NULL, then there is no
+ * way to know when a transaction completes.
+ * @param user_data User supplied data returned when the
+ * callback is called. This is only used if
+ * callback in not NULL.
+ *
+ * @return A submitted transaction handle or negative on
+ * failure. Negative values are failure codes from
+ * cvmx_usb_status_t.
+ */
+int cvmx_usb_submit_bulk(cvmx_usb_state_t *state, int pipe_handle,
+ uint64_t buffer, int buffer_length,
+ cvmx_usb_callback_func_t callback,
+ void *user_data)
+{
+ int submit_handle;
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+ CVMX_USB_LOG_PARAM("%d", pipe_handle);
+ CVMX_USB_LOG_PARAM("0x%llx", (unsigned long long)buffer);
+ CVMX_USB_LOG_PARAM("%d", buffer_length);
+
+ /* Pipe handle checking is done later in a common place */
+ if (cvmx_unlikely(!buffer))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(buffer_length < 0))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+
+ submit_handle = __cvmx_usb_submit_transaction(usb, pipe_handle,
+ CVMX_USB_TRANSFER_BULK,
+ 0, /* flags */
+ buffer,
+ buffer_length,
+ 0, /* control_header */
+ 0, /* iso_start_frame */
+ 0, /* iso_number_packets */
+ NULL, /* iso_packets */
+ callback,
+ user_data);
+ CVMX_USB_RETURN(submit_handle);
+}
+
+
+/**
+ * Call to submit a USB Interrupt transfer to a pipe.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Handle to the pipe for the transfer.
+ * @param buffer Physical address of the data buffer in
+ * memory. Note that this is NOT A POINTER, but
+ * the full 64bit physical address of the
+ * buffer. This may be zero if buffer_length is
+ * zero.
+ * @param buffer_length
+ * Length of buffer in bytes.
+ * @param callback Function to call when this transaction
+ * completes. If the return value of this
+ * function isn't an error, then this function
+ * is guaranteed to be called when the
+ * transaction completes. If this parameter is
+ * NULL, then the generic callback registered
+ * through cvmx_usb_register_callback is
+ * called. If both are NULL, then there is no
+ * way to know when a transaction completes.
+ * @param user_data User supplied data returned when the
+ * callback is called. This is only used if
+ * callback in not NULL.
+ *
+ * @return A submitted transaction handle or negative on
+ * failure. Negative values are failure codes from
+ * cvmx_usb_status_t.
+ */
+int cvmx_usb_submit_interrupt(cvmx_usb_state_t *state, int pipe_handle,
+ uint64_t buffer, int buffer_length,
+ cvmx_usb_callback_func_t callback,
+ void *user_data)
+{
+ int submit_handle;
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+ CVMX_USB_LOG_PARAM("%d", pipe_handle);
+ CVMX_USB_LOG_PARAM("0x%llx", (unsigned long long)buffer);
+ CVMX_USB_LOG_PARAM("%d", buffer_length);
+
+ /* Pipe handle checking is done later in a common place */
+ if (cvmx_unlikely(!buffer))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(buffer_length < 0))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+
+ submit_handle = __cvmx_usb_submit_transaction(usb, pipe_handle,
+ CVMX_USB_TRANSFER_INTERRUPT,
+ 0, /* flags */
+ buffer,
+ buffer_length,
+ 0, /* control_header */
+ 0, /* iso_start_frame */
+ 0, /* iso_number_packets */
+ NULL, /* iso_packets */
+ callback,
+ user_data);
+ CVMX_USB_RETURN(submit_handle);
+}
+
+
+/**
+ * Call to submit a USB Control transfer to a pipe.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Handle to the pipe for the transfer.
+ * @param control_header
+ * USB 8 byte control header physical address.
+ * Note that this is NOT A POINTER, but the
+ * full 64bit physical address of the buffer.
+ * @param buffer Physical address of the data buffer in
+ * memory. Note that this is NOT A POINTER, but
+ * the full 64bit physical address of the
+ * buffer. This may be zero if buffer_length is
+ * zero.
+ * @param buffer_length
+ * Length of buffer in bytes.
+ * @param callback Function to call when this transaction
+ * completes. If the return value of this
+ * function isn't an error, then this function
+ * is guaranteed to be called when the
+ * transaction completes. If this parameter is
+ * NULL, then the generic callback registered
+ * through cvmx_usb_register_callback is
+ * called. If both are NULL, then there is no
+ * way to know when a transaction completes.
+ * @param user_data User supplied data returned when the
+ * callback is called. This is only used if
+ * callback in not NULL.
+ *
+ * @return A submitted transaction handle or negative on
+ * failure. Negative values are failure codes from
+ * cvmx_usb_status_t.
+ */
+int cvmx_usb_submit_control(cvmx_usb_state_t *state, int pipe_handle,
+ uint64_t control_header,
+ uint64_t buffer, int buffer_length,
+ cvmx_usb_callback_func_t callback,
+ void *user_data)
+{
+ int submit_handle;
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+ cvmx_usb_control_header_t *header = cvmx_phys_to_ptr(control_header);
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+ CVMX_USB_LOG_PARAM("%d", pipe_handle);
+ CVMX_USB_LOG_PARAM("0x%llx", (unsigned long long)control_header);
+ CVMX_USB_LOG_PARAM("0x%llx", (unsigned long long)buffer);
+ CVMX_USB_LOG_PARAM("%d", buffer_length);
+
+ /* Pipe handle checking is done later in a common place */
+ if (cvmx_unlikely(!control_header))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ /* Some drivers send a buffer with a zero length. God only knows why */
+ if (cvmx_unlikely(buffer && (buffer_length < 0)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(!buffer && (buffer_length != 0)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if ((header->s.request_type & 0x80) == 0)
+ buffer_length = cvmx_le16_to_cpu(header->s.length);
+
+ submit_handle = __cvmx_usb_submit_transaction(usb, pipe_handle,
+ CVMX_USB_TRANSFER_CONTROL,
+ 0, /* flags */
+ buffer,
+ buffer_length,
+ control_header,
+ 0, /* iso_start_frame */
+ 0, /* iso_number_packets */
+ NULL, /* iso_packets */
+ callback,
+ user_data);
+ CVMX_USB_RETURN(submit_handle);
+}
+
+
+/**
+ * Call to submit a USB Isochronous transfer to a pipe.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Handle to the pipe for the transfer.
+ * @param start_frame
+ * Number of frames into the future to schedule
+ * this transaction.
+ * @param flags Flags to control the transfer. See
+ * cvmx_usb_isochronous_flags_t for the flag
+ * definitions.
+ * @param number_packets
+ * Number of sequential packets to transfer.
+ * "packets" is a pointer to an array of this
+ * many packet structures.
+ * @param packets Description of each transfer packet as
+ * defined by cvmx_usb_iso_packet_t. The array
+ * pointed to here must stay valid until the
+ * complete callback is called.
+ * @param buffer Physical address of the data buffer in
+ * memory. Note that this is NOT A POINTER, but
+ * the full 64bit physical address of the
+ * buffer. This may be zero if buffer_length is
+ * zero.
+ * @param buffer_length
+ * Length of buffer in bytes.
+ * @param callback Function to call when this transaction
+ * completes. If the return value of this
+ * function isn't an error, then this function
+ * is guaranteed to be called when the
+ * transaction completes. If this parameter is
+ * NULL, then the generic callback registered
+ * through cvmx_usb_register_callback is
+ * called. If both are NULL, then there is no
+ * way to know when a transaction completes.
+ * @param user_data User supplied data returned when the
+ * callback is called. This is only used if
+ * callback in not NULL.
+ *
+ * @return A submitted transaction handle or negative on
+ * failure. Negative values are failure codes from
+ * cvmx_usb_status_t.
+ */
+int cvmx_usb_submit_isochronous(cvmx_usb_state_t *state, int pipe_handle,
+ int start_frame, int flags,
+ int number_packets,
+ cvmx_usb_iso_packet_t packets[],
+ uint64_t buffer, int buffer_length,
+ cvmx_usb_callback_func_t callback,
+ void *user_data)
+{
+ int submit_handle;
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+ CVMX_USB_LOG_PARAM("%d", pipe_handle);
+ CVMX_USB_LOG_PARAM("%d", start_frame);
+ CVMX_USB_LOG_PARAM("0x%x", flags);
+ CVMX_USB_LOG_PARAM("%d", number_packets);
+ CVMX_USB_LOG_PARAM("%p", packets);
+ CVMX_USB_LOG_PARAM("0x%llx", (unsigned long long)buffer);
+ CVMX_USB_LOG_PARAM("%d", buffer_length);
+
+ /* Pipe handle checking is done later in a common place */
+ if (cvmx_unlikely(start_frame < 0))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(flags & ~(CVMX_USB_ISOCHRONOUS_FLAGS_ALLOW_SHORT | CVMX_USB_ISOCHRONOUS_FLAGS_ASAP)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(number_packets < 1))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(!packets))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(!buffer))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(buffer_length < 0))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+
+ submit_handle = __cvmx_usb_submit_transaction(usb, pipe_handle,
+ CVMX_USB_TRANSFER_ISOCHRONOUS,
+ flags,
+ buffer,
+ buffer_length,
+ 0, /* control_header */
+ start_frame,
+ number_packets,
+ packets,
+ callback,
+ user_data);
+ CVMX_USB_RETURN(submit_handle);
+}
+
+
+/**
+ * Cancel one outstanding request in a pipe. Canceling a request
+ * can fail if the transaction has already completed before cancel
+ * is called. Even after a successful cancel call, it may take
+ * a frame or two for the cvmx_usb_poll() function to call the
+ * associated callback.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Pipe handle to cancel requests in.
+ * @param submit_handle
+ * Handle to transaction to cancel, returned by the submit function.
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+cvmx_usb_status_t cvmx_usb_cancel(cvmx_usb_state_t *state, int pipe_handle,
+ int submit_handle)
+{
+ cvmx_usb_transaction_t *transaction;
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+ cvmx_usb_pipe_t *pipe = usb->pipe + pipe_handle;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+ CVMX_USB_LOG_PARAM("%d", pipe_handle);
+ CVMX_USB_LOG_PARAM("%d", submit_handle);
+
+ if (cvmx_unlikely((pipe_handle < 0) || (pipe_handle >= MAX_PIPES)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely((submit_handle < 0) || (submit_handle >= MAX_TRANSACTIONS)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+
+ /* Fail if the pipe isn't open */
+ if (cvmx_unlikely((pipe->flags & __CVMX_USB_PIPE_FLAGS_OPEN) == 0))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+
+ transaction = usb->transaction + submit_handle;
+
+ /* Fail if this transaction already completed */
+ if (cvmx_unlikely((transaction->flags & __CVMX_USB_TRANSACTION_FLAGS_IN_USE) == 0))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+
+ /* If the transaction is the HEAD of the queue and scheduled. We need to
+ treat it special */
+ if ((pipe->head == transaction) &&
+ (pipe->flags & __CVMX_USB_PIPE_FLAGS_SCHEDULED)) {
+ cvmx_usbcx_hccharx_t usbc_hcchar;
+
+ usb->pipe_for_channel[pipe->channel] = NULL;
+ pipe->flags &= ~__CVMX_USB_PIPE_FLAGS_SCHEDULED;
+
+ CVMX_SYNCW;
+
+ usbc_hcchar.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HCCHARX(pipe->channel, usb->index));
+ /* If the channel isn't enabled then the transaction already completed */
+ if (usbc_hcchar.s.chena) {
+ usbc_hcchar.s.chdis = 1;
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HCCHARX(pipe->channel, usb->index), usbc_hcchar.u32);
+ }
+ }
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_CANCEL);
+ CVMX_USB_RETURN(CVMX_USB_SUCCESS);
+}
+
+
+/**
+ * Cancel all outstanding requests in a pipe. Logically all this
+ * does is call cvmx_usb_cancel() in a loop.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Pipe handle to cancel requests in.
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+cvmx_usb_status_t cvmx_usb_cancel_all(cvmx_usb_state_t *state, int pipe_handle)
+{
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+ cvmx_usb_pipe_t *pipe = usb->pipe + pipe_handle;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+ CVMX_USB_LOG_PARAM("%d", pipe_handle);
+ if (cvmx_unlikely((pipe_handle < 0) || (pipe_handle >= MAX_PIPES)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+
+ /* Fail if the pipe isn't open */
+ if (cvmx_unlikely((pipe->flags & __CVMX_USB_PIPE_FLAGS_OPEN) == 0))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+
+ /* Simply loop through and attempt to cancel each transaction */
+ while (pipe->head) {
+ cvmx_usb_status_t result = cvmx_usb_cancel(state, pipe_handle,
+ __cvmx_usb_get_submit_handle(usb, pipe->head));
+ if (cvmx_unlikely(result != CVMX_USB_SUCCESS))
+ CVMX_USB_RETURN(result);
+ }
+ CVMX_USB_RETURN(CVMX_USB_SUCCESS);
+}
+
+
+/**
+ * Close a pipe created with cvmx_usb_open_pipe().
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Pipe handle to close.
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t. CVMX_USB_BUSY is returned if the
+ * pipe has outstanding transfers.
+ */
+cvmx_usb_status_t cvmx_usb_close_pipe(cvmx_usb_state_t *state, int pipe_handle)
+{
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+ cvmx_usb_pipe_t *pipe = usb->pipe + pipe_handle;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+ CVMX_USB_LOG_PARAM("%d", pipe_handle);
+ if (cvmx_unlikely((pipe_handle < 0) || (pipe_handle >= MAX_PIPES)))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+
+ /* Fail if the pipe isn't open */
+ if (cvmx_unlikely((pipe->flags & __CVMX_USB_PIPE_FLAGS_OPEN) == 0))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+
+ /* Fail if the pipe has pending transactions */
+ if (cvmx_unlikely(pipe->head))
+ CVMX_USB_RETURN(CVMX_USB_BUSY);
+
+ pipe->flags = 0;
+ __cvmx_usb_remove_pipe(&usb->idle_pipes, pipe);
+ __cvmx_usb_append_pipe(&usb->free_pipes, pipe);
+
+ CVMX_USB_RETURN(CVMX_USB_SUCCESS);
+}
+
+
+/**
+ * Register a function to be called when various USB events occur.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param reason Which event to register for.
+ * @param callback Function to call when the event occurs.
+ * @param user_data User data parameter to the function.
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+cvmx_usb_status_t cvmx_usb_register_callback(cvmx_usb_state_t *state,
+ cvmx_usb_callback_t reason,
+ cvmx_usb_callback_func_t callback,
+ void *user_data)
+{
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+ CVMX_USB_LOG_PARAM("%d", reason);
+ CVMX_USB_LOG_PARAM("%p", callback);
+ CVMX_USB_LOG_PARAM("%p", user_data);
+ if (cvmx_unlikely(reason >= __CVMX_USB_CALLBACK_END))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+ if (cvmx_unlikely(!callback))
+ CVMX_USB_RETURN(CVMX_USB_INVALID_PARAM);
+
+ usb->callback[reason] = callback;
+ usb->callback_data[reason] = user_data;
+
+ CVMX_USB_RETURN(CVMX_USB_SUCCESS);
+}
+
+
+/**
+ * Get the current USB protocol level frame number. The frame
+ * number is always in the range of 0-0x7ff.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ *
+ * @return USB frame number
+ */
+int cvmx_usb_get_frame_number(cvmx_usb_state_t *state)
+{
+ int frame_number;
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+ cvmx_usbcx_hfnum_t usbc_hfnum;
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+
+ usbc_hfnum.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HFNUM(usb->index));
+ frame_number = usbc_hfnum.s.frnum;
+
+ CVMX_USB_RETURN(frame_number);
+}
+
+
+/**
+ * @INTERNAL
+ * Poll a channel for status
+ *
+ * @param usb USB device
+ * @param channel Channel to poll
+ *
+ * @return Zero on success
+ */
+static int __cvmx_usb_poll_channel(cvmx_usb_internal_state_t *usb, int channel)
+{
+ cvmx_usbcx_hcintx_t usbc_hcint;
+ cvmx_usbcx_hctsizx_t usbc_hctsiz;
+ cvmx_usbcx_hccharx_t usbc_hcchar;
+ cvmx_usb_pipe_t *pipe;
+ cvmx_usb_transaction_t *transaction;
+ int bytes_this_transfer;
+ int bytes_in_last_packet;
+ int packets_processed;
+ int buffer_space_left;
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", usb);
+ CVMX_USB_LOG_PARAM("%d", channel);
+
+ /* Read the interrupt status bits for the channel */
+ usbc_hcint.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HCINTX(channel, usb->index));
+
+ if (usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_NO_DMA) {
+ usbc_hcchar.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HCCHARX(channel, usb->index));
+
+ if (usbc_hcchar.s.chena && usbc_hcchar.s.chdis) {
+ /* There seems to be a bug in CN31XX which can cause interrupt
+ IN transfers to get stuck until we do a write of HCCHARX
+ without changing things */
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HCCHARX(channel, usb->index), usbc_hcchar.u32);
+ CVMX_USB_RETURN(0);
+ }
+
+ /* In non DMA mode the channels don't halt themselves. We need to
+ manually disable channels that are left running */
+ if (!usbc_hcint.s.chhltd) {
+ if (usbc_hcchar.s.chena) {
+ cvmx_usbcx_hcintmskx_t hcintmsk;
+ /* Disable all interrupts except CHHLTD */
+ hcintmsk.u32 = 0;
+ hcintmsk.s.chhltdmsk = 1;
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HCINTMSKX(channel, usb->index), hcintmsk.u32);
+ usbc_hcchar.s.chdis = 1;
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HCCHARX(channel, usb->index), usbc_hcchar.u32);
+ CVMX_USB_RETURN(0);
+ }
+ else if (usbc_hcint.s.xfercompl) {
+ /* Successful IN/OUT with transfer complete. Channel halt isn't needed */
+ }
+ else {
+ cvmx_dprintf("USB%d: Channel %d interrupt without halt\n", usb->index, channel);
+ CVMX_USB_RETURN(0);
+ }
+ }
+ }
+ else {
+ /* There is are no interrupts that we need to process when the channel is
+ still running */
+ if (!usbc_hcint.s.chhltd)
+ CVMX_USB_RETURN(0);
+ }
+
+ /* Disable the channel interrupts now that it is done */
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HCINTMSKX(channel, usb->index), 0);
+ usb->idle_hardware_channels |= (1<<channel);
+
+ /* Make sure this channel is tied to a valid pipe */
+ pipe = usb->pipe_for_channel[channel];
+ CVMX_PREFETCH(pipe, 0);
+ CVMX_PREFETCH(pipe, 128);
+ if (!pipe)
+ CVMX_USB_RETURN(0);
+ transaction = pipe->head;
+ CVMX_PREFETCH0(transaction);
+
+ /* Disconnect this pipe from the HW channel. Later the schedule function will
+ figure out which pipe needs to go */
+ usb->pipe_for_channel[channel] = NULL;
+ pipe->flags &= ~__CVMX_USB_PIPE_FLAGS_SCHEDULED;
+
+ /* Read the channel config info so we can figure out how much data
+ transfered */
+ usbc_hcchar.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HCCHARX(channel, usb->index));
+ usbc_hctsiz.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HCTSIZX(channel, usb->index));
+
+ /* Calculating the number of bytes successfully transferred is dependent on
+ the transfer direction */
+ packets_processed = transaction->pktcnt - usbc_hctsiz.s.pktcnt;
+ if (usbc_hcchar.s.epdir) {
+ /* IN transactions are easy. For every byte received the hardware
+ decrements xfersize. All we need to do is subtract the current
+ value of xfersize from its starting value and we know how many
+ bytes were written to the buffer */
+ bytes_this_transfer = transaction->xfersize - usbc_hctsiz.s.xfersize;
+ }
+ else {
+ /* OUT transaction don't decrement xfersize. Instead pktcnt is
+ decremented on every successful packet send. The hardware does
+ this when it receives an ACK, or NYET. If it doesn't
+ receive one of these responses pktcnt doesn't change */
+ bytes_this_transfer = packets_processed * usbc_hcchar.s.mps;
+ /* The last packet may not be a full transfer if we didn't have
+ enough data */
+ if (bytes_this_transfer > transaction->xfersize)
+ bytes_this_transfer = transaction->xfersize;
+ }
+ /* Figure out how many bytes were in the last packet of the transfer */
+ if (packets_processed)
+ bytes_in_last_packet = bytes_this_transfer - (packets_processed-1) * usbc_hcchar.s.mps;
+ else
+ bytes_in_last_packet = bytes_this_transfer;
+
+ /* As a special case, setup transactions output the setup header, not
+ the user's data. For this reason we don't count setup data as bytes
+ transferred */
+ if ((transaction->stage == CVMX_USB_STAGE_SETUP) ||
+ (transaction->stage == CVMX_USB_STAGE_SETUP_SPLIT_COMPLETE))
+ bytes_this_transfer = 0;
+
+ /* Optional debug output */
+ if (cvmx_unlikely((usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_DEBUG_TRANSFERS) ||
+ (pipe->flags & CVMX_USB_PIPE_FLAGS_DEBUG_TRANSFERS)))
+ cvmx_dprintf("%s: Channel %d halted. Pipe %d transaction %d stage %d bytes=%d\n",
+ __FUNCTION__, channel,
+ __cvmx_usb_get_pipe_handle(usb, pipe),
+ __cvmx_usb_get_submit_handle(usb, transaction),
+ transaction->stage, bytes_this_transfer);
+
+ /* Add the bytes transferred to the running total. It is important that
+ bytes_this_transfer doesn't count any data that needs to be
+ retransmitted */
+ transaction->actual_bytes += bytes_this_transfer;
+ if (transaction->type == CVMX_USB_TRANSFER_ISOCHRONOUS)
+ buffer_space_left = transaction->iso_packets[0].length - transaction->actual_bytes;
+ else
+ buffer_space_left = transaction->buffer_length - transaction->actual_bytes;
+
+ /* We need to remember the PID toggle state for the next transaction. The
+ hardware already updated it for the next transaction */
+ pipe->pid_toggle = !(usbc_hctsiz.s.pid == 0);
+
+ /* For high speed bulk out, assume the next transaction will need to do a
+ ping before proceeding. If this isn't true the ACK processing below
+ will clear this flag */
+ if ((pipe->device_speed == CVMX_USB_SPEED_HIGH) &&
+ (pipe->transfer_type == CVMX_USB_TRANSFER_BULK) &&
+ (pipe->transfer_dir == CVMX_USB_DIRECTION_OUT))
+ pipe->flags |= __CVMX_USB_PIPE_FLAGS_NEED_PING;
+
+ if (usbc_hcint.s.stall) {
+ /* STALL as a response means this transaction cannot be completed
+ because the device can't process transactions. Tell the user. Any
+ data that was transferred will be counted on the actual bytes
+ transferred */
+ pipe->pid_toggle = 0;
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_STALL);
+ }
+ else if (usbc_hcint.s.xacterr) {
+ /* We know at least one packet worked if we get a ACK or NAK. Reset the retry counter */
+ if (usbc_hcint.s.nak || usbc_hcint.s.ack)
+ transaction->retries = 0;
+ transaction->retries++;
+ if (transaction->retries > MAX_RETRIES) {
+ /* XactErr as a response means the device signaled something wrong with
+ the transfer. For example, PID toggle errors cause these */
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_XACTERR);
+ }
+ else {
+ /* If this was a split then clear our split in progress marker */
+ if (usb->active_split == transaction)
+ usb->active_split = NULL;
+ /* Rewind to the beginning of the transaction by anding off the
+ split complete bit */
+ transaction->stage &= ~1;
+ pipe->split_sc_frame = -1;
+ pipe->next_tx_frame += pipe->interval;
+ if (pipe->next_tx_frame < usb->frame_number)
+ pipe->next_tx_frame = usb->frame_number + pipe->interval -
+ (usb->frame_number - pipe->next_tx_frame) % pipe->interval;
+ }
+ }
+ else if (usbc_hcint.s.bblerr)
+ {
+ /* Babble Error (BblErr) */
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_BABBLEERR);
+ }
+ else if (usbc_hcint.s.datatglerr)
+ {
+ /* We'll retry the exact same transaction again */
+ transaction->retries++;
+ }
+ else if (usbc_hcint.s.nyet) {
+ /* NYET as a response is only allowed in three cases: as a response to
+ a ping, as a response to a split transaction, and as a response to
+ a bulk out. The ping case is handled by hardware, so we only have
+ splits and bulk out */
+ if (!__cvmx_usb_pipe_needs_split(usb, pipe)) {
+ transaction->retries = 0;
+ /* If there is more data to go then we need to try again. Otherwise
+ this transaction is complete */
+ if ((buffer_space_left == 0) || (bytes_in_last_packet < pipe->max_packet))
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_SUCCESS);
+ }
+ else {
+ /* Split transactions retry the split complete 4 times then rewind
+ to the start split and do the entire transactions again */
+ transaction->retries++;
+ if ((transaction->retries & 0x3) == 0) {
+ /* Rewind to the beginning of the transaction by anding off the
+ split complete bit */
+ transaction->stage &= ~1;
+ pipe->split_sc_frame = -1;
+ }
+ }
+ }
+ else if (usbc_hcint.s.ack) {
+ transaction->retries = 0;
+ /* The ACK bit can only be checked after the other error bits. This is
+ because a multi packet transfer may succeed in a number of packets
+ and then get a different response on the last packet. In this case
+ both ACK and the last response bit will be set. If none of the
+ other response bits is set, then the last packet must have been an
+ ACK */
+
+ /* Since we got an ACK, we know we don't need to do a ping on this
+ pipe */
+ pipe->flags &= ~__CVMX_USB_PIPE_FLAGS_NEED_PING;
+
+ switch (transaction->type)
+ {
+ case CVMX_USB_TRANSFER_CONTROL:
+ switch (transaction->stage)
+ {
+ case CVMX_USB_STAGE_NON_CONTROL:
+ case CVMX_USB_STAGE_NON_CONTROL_SPLIT_COMPLETE:
+ /* This should be impossible */
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_ERROR);
+ break;
+ case CVMX_USB_STAGE_SETUP:
+ pipe->pid_toggle = 1;
+ if (__cvmx_usb_pipe_needs_split(usb, pipe))
+ transaction->stage = CVMX_USB_STAGE_SETUP_SPLIT_COMPLETE;
+ else {
+ cvmx_usb_control_header_t *header = cvmx_phys_to_ptr(transaction->control_header);
+ if (header->s.length)
+ transaction->stage = CVMX_USB_STAGE_DATA;
+ else
+ transaction->stage = CVMX_USB_STAGE_STATUS;
+ }
+ break;
+ case CVMX_USB_STAGE_SETUP_SPLIT_COMPLETE:
+ {
+ cvmx_usb_control_header_t *header = cvmx_phys_to_ptr(transaction->control_header);
+ if (header->s.length)
+ transaction->stage = CVMX_USB_STAGE_DATA;
+ else
+ transaction->stage = CVMX_USB_STAGE_STATUS;
+ }
+ break;
+ case CVMX_USB_STAGE_DATA:
+ if (__cvmx_usb_pipe_needs_split(usb, pipe)) {
+ transaction->stage = CVMX_USB_STAGE_DATA_SPLIT_COMPLETE;
+ /* For setup OUT data that are splits, the hardware
+ doesn't appear to count transferred data. Here
+ we manually update the data transferred */
+ if (!usbc_hcchar.s.epdir) {
+ if (buffer_space_left < pipe->max_packet)
+ transaction->actual_bytes += buffer_space_left;
+ else
+ transaction->actual_bytes += pipe->max_packet;
+ }
+ }
+ else if ((buffer_space_left == 0) || (bytes_in_last_packet < pipe->max_packet)) {
+ pipe->pid_toggle = 1;
+ transaction->stage = CVMX_USB_STAGE_STATUS;
+ }
+ break;
+ case CVMX_USB_STAGE_DATA_SPLIT_COMPLETE:
+ if ((buffer_space_left == 0) || (bytes_in_last_packet < pipe->max_packet)) {
+ pipe->pid_toggle = 1;
+ transaction->stage = CVMX_USB_STAGE_STATUS;
+ }
+ else {
+ transaction->stage = CVMX_USB_STAGE_DATA;
+ }
+ break;
+ case CVMX_USB_STAGE_STATUS:
+ if (__cvmx_usb_pipe_needs_split(usb, pipe))
+ transaction->stage = CVMX_USB_STAGE_STATUS_SPLIT_COMPLETE;
+ else
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_SUCCESS);
+ break;
+ case CVMX_USB_STAGE_STATUS_SPLIT_COMPLETE:
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_SUCCESS);
+ break;
+ }
+ break;
+ case CVMX_USB_TRANSFER_BULK:
+ case CVMX_USB_TRANSFER_INTERRUPT:
+ /* The only time a bulk transfer isn't complete when
+ it finishes with an ACK is during a split transaction. For
+ splits we need to continue the transfer if more data is
+ needed */
+ if (__cvmx_usb_pipe_needs_split(usb, pipe)) {
+ if (transaction->stage == CVMX_USB_STAGE_NON_CONTROL)
+ transaction->stage = CVMX_USB_STAGE_NON_CONTROL_SPLIT_COMPLETE;
+ else {
+ if (buffer_space_left && (bytes_in_last_packet == pipe->max_packet))
+ transaction->stage = CVMX_USB_STAGE_NON_CONTROL;
+ else {
+ if (transaction->type == CVMX_USB_TRANSFER_INTERRUPT)
+ pipe->next_tx_frame += pipe->interval;
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_SUCCESS);
+ }
+ }
+ }
+ else {
+ if ((pipe->device_speed == CVMX_USB_SPEED_HIGH) &&
+ (pipe->transfer_type == CVMX_USB_TRANSFER_BULK) &&
+ (pipe->transfer_dir == CVMX_USB_DIRECTION_OUT) &&
+ (usbc_hcint.s.nak))
+ pipe->flags |= __CVMX_USB_PIPE_FLAGS_NEED_PING;
+ if (!buffer_space_left || (bytes_in_last_packet < pipe->max_packet)) {
+ if (transaction->type == CVMX_USB_TRANSFER_INTERRUPT)
+ pipe->next_tx_frame += pipe->interval;
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_SUCCESS);
+ }
+ }
+ break;
+ case CVMX_USB_TRANSFER_ISOCHRONOUS:
+ if (__cvmx_usb_pipe_needs_split(usb, pipe)) {
+ /* ISOCHRONOUS OUT splits don't require a complete split stage.
+ Instead they use a sequence of begin OUT splits to transfer
+ the data 188 bytes at a time. Once the transfer is complete,
+ the pipe sleeps until the next schedule interval */
+ if (pipe->transfer_dir == CVMX_USB_DIRECTION_OUT) {
+ /* If no space left or this wasn't a max size packet then
+ this transfer is complete. Otherwise start it again
+ to send the next 188 bytes */
+ if (!buffer_space_left || (bytes_this_transfer < 188)) {
+ pipe->next_tx_frame += pipe->interval;
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_SUCCESS);
+ }
+ }
+ else {
+ if (transaction->stage == CVMX_USB_STAGE_NON_CONTROL_SPLIT_COMPLETE) {
+ /* We are in the incoming data phase. Keep getting
+ data until we run out of space or get a small
+ packet */
+ if ((buffer_space_left == 0) || (bytes_in_last_packet < pipe->max_packet)) {
+ pipe->next_tx_frame += pipe->interval;
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_SUCCESS);
+ }
+ }
+ else
+ transaction->stage = CVMX_USB_STAGE_NON_CONTROL_SPLIT_COMPLETE;
+ }
+ }
+ else {
+ pipe->next_tx_frame += pipe->interval;
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_SUCCESS);
+ }
+ break;
+ }
+ }
+ else if (usbc_hcint.s.nak) {
+ /* If this was a split then clear our split in progress marker */
+ if (usb->active_split == transaction)
+ usb->active_split = NULL;
+ /* NAK as a response means the device couldn't accept the transaction,
+ but it should be retried in the future. Rewind to the beginning of
+ the transaction by anding off the split complete bit. Retry in the
+ next interval */
+ transaction->retries = 0;
+ transaction->stage &= ~1;
+ pipe->next_tx_frame += pipe->interval;
+ if (pipe->next_tx_frame < usb->frame_number)
+ pipe->next_tx_frame = usb->frame_number + pipe->interval -
+ (usb->frame_number - pipe->next_tx_frame) % pipe->interval;
+ }
+ else {
+ cvmx_usb_port_status_t port;
+ port = cvmx_usb_get_status((cvmx_usb_state_t *)usb);
+ if (port.port_enabled)
+ {
+ /* We'll retry the exact same transaction again */
+ transaction->retries++;
+ }
+ else
+ {
+ /* We get channel halted interrupts with no result bits sets when the
+ cable is unplugged */
+ __cvmx_usb_perform_complete(usb, pipe, transaction, CVMX_USB_COMPLETE_ERROR);
+ }
+ }
+ CVMX_USB_RETURN(0);
+}
+
+
+/**
+ * Poll the USB block for status and call all needed callback
+ * handlers. This function is meant to be called in the interrupt
+ * handler for the USB controller. It can also be called
+ * periodically in a loop for non-interrupt based operation.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+cvmx_usb_status_t cvmx_usb_poll(cvmx_usb_state_t *state)
+{
+ cvmx_usbcx_hfnum_t usbc_hfnum;
+ cvmx_usbcx_gintsts_t usbc_gintsts;
+ cvmx_usb_internal_state_t *usb = (cvmx_usb_internal_state_t*)state;
+
+ CVMX_PREFETCH(usb, 0);
+ CVMX_PREFETCH(usb, 1*128);
+ CVMX_PREFETCH(usb, 2*128);
+ CVMX_PREFETCH(usb, 3*128);
+ CVMX_PREFETCH(usb, 4*128);
+
+ CVMX_USB_LOG_CALLED();
+ CVMX_USB_LOG_PARAM("%p", state);
+
+ /* Update the frame counter */
+ usbc_hfnum.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HFNUM(usb->index));
+ if ((usb->frame_number&0x3fff) > usbc_hfnum.s.frnum)
+ usb->frame_number += 0x4000;
+ usb->frame_number &= ~0x3fffull;
+ usb->frame_number |= usbc_hfnum.s.frnum;
+
+ /* Read the pending interrupts */
+ usbc_gintsts.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_GINTSTS(usb->index));
+
+ /* Clear the interrupts now that we know about them */
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_GINTSTS(usb->index), usbc_gintsts.u32);
+
+ if (usbc_gintsts.s.rxflvl) {
+ /* RxFIFO Non-Empty (RxFLvl)
+ Indicates that there is at least one packet pending to be read
+ from the RxFIFO. */
+ /* In DMA mode this is handled by hardware */
+ if (usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_NO_DMA)
+ __cvmx_usb_poll_rx_fifo(usb);
+ }
+ if (usbc_gintsts.s.ptxfemp || usbc_gintsts.s.nptxfemp) {
+ /* Fill the Tx FIFOs when not in DMA mode */
+ if (usb->init_flags & CVMX_USB_INITIALIZE_FLAGS_NO_DMA)
+ __cvmx_usb_poll_tx_fifo(usb);
+ }
+ if (usbc_gintsts.s.disconnint || usbc_gintsts.s.prtint) {
+ cvmx_usbcx_hprt_t usbc_hprt;
+ /* Disconnect Detected Interrupt (DisconnInt)
+ Asserted when a device disconnect is detected. */
+
+ /* Host Port Interrupt (PrtInt)
+ The core sets this bit to indicate a change in port status of one
+ of the O2P USB core ports in Host mode. The application must
+ read the Host Port Control and Status (HPRT) register to
+ determine the exact event that caused this interrupt. The
+ application must clear the appropriate status bit in the Host Port
+ Control and Status register to clear this bit. */
+
+ /* Call the user's port callback */
+ __cvmx_usb_perform_callback(usb, NULL, NULL,
+ CVMX_USB_CALLBACK_PORT_CHANGED,
+ CVMX_USB_COMPLETE_SUCCESS);
+ /* Clear the port change bits */
+ usbc_hprt.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HPRT(usb->index));
+ usbc_hprt.s.prtena = 0;
+ __cvmx_usb_write_csr32(usb, CVMX_USBCX_HPRT(usb->index), usbc_hprt.u32);
+ }
+ if (usbc_gintsts.s.hchint) {
+ /* Host Channels Interrupt (HChInt)
+ The core sets this bit to indicate that an interrupt is pending on
+ one of the channels of the core (in Host mode). The application
+ must read the Host All Channels Interrupt (HAINT) register to
+ determine the exact number of the channel on which the
+ interrupt occurred, and then read the corresponding Host
+ Channel-n Interrupt (HCINTn) register to determine the exact
+ cause of the interrupt. The application must clear the
+ appropriate status bit in the HCINTn register to clear this bit. */
+ cvmx_usbcx_haint_t usbc_haint;
+ usbc_haint.u32 = __cvmx_usb_read_csr32(usb, CVMX_USBCX_HAINT(usb->index));
+ while (usbc_haint.u32) {
+ int channel;
+ CVMX_CLZ(channel, usbc_haint.u32);
+ channel = 31 - channel;
+ __cvmx_usb_poll_channel(usb, channel);
+ usbc_haint.u32 ^= 1<<channel;
+ }
+ }
+
+ __cvmx_usb_schedule(usb, usbc_gintsts.s.sof);
+
+ CVMX_USB_RETURN(CVMX_USB_SUCCESS);
+}
diff --git a/drivers/staging/octeon-usb/cvmx-usb.h b/drivers/staging/octeon-usb/cvmx-usb.h
new file mode 100644
index 000000000000..db9cc05e5d3c
--- /dev/null
+++ b/drivers/staging/octeon-usb/cvmx-usb.h
@@ -0,0 +1,1085 @@
+/***********************license start***************
+ * Copyright (c) 2003-2010 Cavium Networks (support@cavium.com). All rights
+ * reserved.
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+
+ * * Neither the name of Cavium Networks nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+
+ * This Software, including technical data, may be subject to U.S. export control
+ * laws, including the U.S. Export Administration Act and its associated
+ * regulations, and may be subject to export or import regulations in other
+ * countries.
+
+ * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+ * AND WITH ALL FAULTS AND CAVIUM NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR
+ * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
+ * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR
+ * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
+ * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
+ * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
+ * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
+ * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT OF USE OR
+ * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+ ***********************license end**************************************/
+
+
+/**
+ * @file
+ *
+ * "cvmx-usb.h" defines a set of low level USB functions to help
+ * developers create Octeon USB drivers for various operating
+ * systems. These functions provide a generic API to the Octeon
+ * USB blocks, hiding the internal hardware specific
+ * operations.
+ *
+ * At a high level the device driver needs to:
+ *
+ * -# Call cvmx_usb_get_num_ports() to get the number of
+ * supported ports.
+ * -# Call cvmx_usb_initialize() for each Octeon USB port.
+ * -# Enable the port using cvmx_usb_enable().
+ * -# Either periodically, or in an interrupt handler, call
+ * cvmx_usb_poll() to service USB events.
+ * -# Manage pipes using cvmx_usb_open_pipe() and
+ * cvmx_usb_close_pipe().
+ * -# Manage transfers using cvmx_usb_submit_*() and
+ * cvmx_usb_cancel*().
+ * -# Shutdown USB on unload using cvmx_usb_shutdown().
+ *
+ * To monitor USB status changes, the device driver must use
+ * cvmx_usb_register_callback() to register for events that it
+ * is interested in. Below are a few hints on successfully
+ * implementing a driver on top of this API.
+ *
+ * <h2>Initialization</h2>
+ *
+ * When a driver is first loaded, it is normally not necessary
+ * to bring up the USB port completely. Most operating systems
+ * expect to initialize and enable the port in two independent
+ * steps. Normally an operating system will probe hardware,
+ * initialize anything found, and then enable the hardware.
+ *
+ * In the probe phase you should:
+ * -# Use cvmx_usb_get_num_ports() to determine the number of
+ * USB port to be supported.
+ * -# Allocate space for a cvmx_usb_state_t structure for each
+ * port.
+ * -# Tell the operating system about each port
+ *
+ * In the initialization phase you should:
+ * -# Use cvmx_usb_initialize() on each port.
+ * -# Do not call cvmx_usb_enable(). This leaves the USB port in
+ * the disabled state until the operating system is ready.
+ *
+ * Finally, in the enable phase you should:
+ * -# Call cvmx_usb_enable() on the appropriate port.
+ * -# Note that some operating system use a RESET instead of an
+ * enable call. To implement RESET, you should call
+ * cvmx_usb_disable() followed by cvmx_usb_enable().
+ *
+ * <h2>Locking</h2>
+ *
+ * All of the functions in the cvmx-usb API assume exclusive
+ * access to the USB hardware and internal data structures. This
+ * means that the driver must provide locking as necessary.
+ *
+ * In the single CPU state it is normally enough to disable
+ * interrupts before every call to cvmx_usb*() and enable them
+ * again after the call is complete. Keep in mind that it is
+ * very common for the callback handlers to make additional
+ * calls into cvmx-usb, so the disable/enable must be protected
+ * against recursion. As an example, the Linux kernel
+ * local_irq_save() and local_irq_restore() are perfect for this
+ * in the non SMP case.
+ *
+ * In the SMP case, locking is more complicated. For SMP you not
+ * only need to disable interrupts on the local core, but also
+ * take a lock to make sure that another core cannot call
+ * cvmx-usb.
+ *
+ * <h2>Port callback</h2>
+ *
+ * The port callback prototype needs to look as follows:
+ *
+ * void port_callback(cvmx_usb_state_t *usb,
+ * cvmx_usb_callback_t reason,
+ * cvmx_usb_complete_t status,
+ * int pipe_handle,
+ * int submit_handle,
+ * int bytes_transferred,
+ * void *user_data);
+ * - @b usb is the cvmx_usb_state_t for the port.
+ * - @b reason will always be
+ * CVMX_USB_CALLBACK_PORT_CHANGED.
+ * - @b status will always be CVMX_USB_COMPLETE_SUCCESS.
+ * - @b pipe_handle will always be -1.
+ * - @b submit_handle will always be -1.
+ * - @b bytes_transferred will always be 0.
+ * - @b user_data is the void pointer originally passed along
+ * with the callback. Use this for any state information you
+ * need.
+ *
+ * The port callback will be called whenever the user plugs /
+ * unplugs a device from the port. It will not be called when a
+ * device is plugged / unplugged from a hub connected to the
+ * root port. Normally all the callback needs to do is tell the
+ * operating system to poll the root hub for status. Under
+ * Linux, this is performed by calling usb_hcd_poll_rh_status().
+ * In the Linux driver we use @b user_data. to pass around the
+ * Linux "hcd" structure. Once the port callback completes,
+ * Linux automatically calls octeon_usb_hub_status_data() which
+ * uses cvmx_usb_get_status() to determine the root port status.
+ *
+ * <h2>Complete callback</h2>
+ *
+ * The completion callback prototype needs to look as follows:
+ *
+ * void complete_callback(cvmx_usb_state_t *usb,
+ * cvmx_usb_callback_t reason,
+ * cvmx_usb_complete_t status,
+ * int pipe_handle,
+ * int submit_handle,
+ * int bytes_transferred,
+ * void *user_data);
+ * - @b usb is the cvmx_usb_state_t for the port.
+ * - @b reason will always be
+ * CVMX_USB_CALLBACK_TRANSFER_COMPLETE.
+ * - @b status will be one of the cvmx_usb_complete_t
+ * enumerations.
+ * - @b pipe_handle is the handle to the pipe the transaction
+ * was originally submitted on.
+ * - @b submit_handle is the handle returned by the original
+ * cvmx_usb_submit_* call.
+ * - @b bytes_transferred is the number of bytes successfully
+ * transferred in the transaction. This will be zero on most
+ * error conditions.
+ * - @b user_data is the void pointer originally passed along
+ * with the callback. Use this for any state information you
+ * need. For example, the Linux "urb" is stored in here in the
+ * Linux driver.
+ *
+ * In general your callback handler should use @b status and @b
+ * bytes_transferred to tell the operating system the how the
+ * transaction completed. Normally the pipe is not changed in
+ * this callback.
+ *
+ * <h2>Canceling transactions</h2>
+ *
+ * When a transaction is cancelled using cvmx_usb_cancel*(), the
+ * actual length of time until the complete callback is called
+ * can vary greatly. It may be called before cvmx_usb_cancel*()
+ * returns, or it may be called a number of usb frames in the
+ * future once the hardware frees the transaction. In either of
+ * these cases, the complete handler will receive
+ * CVMX_USB_COMPLETE_CANCEL.
+ *
+ * <h2>Handling pipes</h2>
+ *
+ * USB "pipes" is a software construct created by this API to
+ * enable the ordering of usb transactions to a device endpoint.
+ * Octeon's underlying hardware doesn't have any concept
+ * equivalent to "pipes". The hardware instead has eight
+ * channels that can be used simultaneously to have up to eight
+ * transaction in process at the same time. In order to maintain
+ * ordering in a pipe, the transactions for a pipe will only be
+ * active in one hardware channel at a time. From an API user's
+ * perspective, this doesn't matter but it can be helpful to
+ * keep this in mind when you are probing hardware while
+ * debugging.
+ *
+ * Also keep in mind that usb transactions contain state
+ * information about the previous transaction to the same
+ * endpoint. Each transaction has a PID toggle that changes 0/1
+ * between each sub packet. This is maintained in the pipe data
+ * structures. For this reason, you generally cannot create and
+ * destroy a pipe for every transaction. A sequence of
+ * transaction to the same endpoint must use the same pipe.
+ *
+ * <h2>Root Hub</h2>
+ *
+ * Some operating systems view the usb root port as a normal usb
+ * hub. These systems attempt to control the root hub with
+ * messages similar to the usb 2.0 spec for hub control and
+ * status. For these systems it may be necessary to write
+ * function to decode standard usb control messages into
+ * equivalent cvmx-usb API calls. As an example, the following
+ * code is used under Linux for some of the basic hub control
+ * messages.
+ *
+ * @code
+ * static int octeon_usb_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, char *buf, u16 wLength)
+ * {
+ * cvmx_usb_state_t *usb = (cvmx_usb_state_t *)hcd->hcd_priv;
+ * cvmx_usb_port_status_t usb_port_status;
+ * int port_status;
+ * struct usb_hub_descriptor *desc;
+ * unsigned long flags;
+ *
+ * switch (typeReq)
+ * {
+ * case ClearHubFeature:
+ * DEBUG_ROOT_HUB("OcteonUSB: ClearHubFeature\n");
+ * switch (wValue)
+ * {
+ * case C_HUB_LOCAL_POWER:
+ * case C_HUB_OVER_CURRENT:
+ * // Nothing required here
+ * break;
+ * default:
+ * return -EINVAL;
+ * }
+ * break;
+ * case ClearPortFeature:
+ * DEBUG_ROOT_HUB("OcteonUSB: ClearPortFeature");
+ * if (wIndex != 1)
+ * {
+ * DEBUG_ROOT_HUB(" INVALID\n");
+ * return -EINVAL;
+ * }
+ *
+ * switch (wValue)
+ * {
+ * case USB_PORT_FEAT_ENABLE:
+ * DEBUG_ROOT_HUB(" ENABLE");
+ * local_irq_save(flags);
+ * cvmx_usb_disable(usb);
+ * local_irq_restore(flags);
+ * break;
+ * case USB_PORT_FEAT_SUSPEND:
+ * DEBUG_ROOT_HUB(" SUSPEND");
+ * // Not supported on Octeon
+ * break;
+ * case USB_PORT_FEAT_POWER:
+ * DEBUG_ROOT_HUB(" POWER");
+ * // Not supported on Octeon
+ * break;
+ * case USB_PORT_FEAT_INDICATOR:
+ * DEBUG_ROOT_HUB(" INDICATOR");
+ * // Port inidicator not supported
+ * break;
+ * case USB_PORT_FEAT_C_CONNECTION:
+ * DEBUG_ROOT_HUB(" C_CONNECTION");
+ * // Clears drivers internal connect status change flag
+ * cvmx_usb_set_status(usb, cvmx_usb_get_status(usb));
+ * break;
+ * case USB_PORT_FEAT_C_RESET:
+ * DEBUG_ROOT_HUB(" C_RESET");
+ * // Clears the driver's internal Port Reset Change flag
+ * cvmx_usb_set_status(usb, cvmx_usb_get_status(usb));
+ * break;
+ * case USB_PORT_FEAT_C_ENABLE:
+ * DEBUG_ROOT_HUB(" C_ENABLE");
+ * // Clears the driver's internal Port Enable/Disable Change flag
+ * cvmx_usb_set_status(usb, cvmx_usb_get_status(usb));
+ * break;
+ * case USB_PORT_FEAT_C_SUSPEND:
+ * DEBUG_ROOT_HUB(" C_SUSPEND");
+ * // Clears the driver's internal Port Suspend Change flag,
+ * which is set when resume signaling on the host port is
+ * complete
+ * break;
+ * case USB_PORT_FEAT_C_OVER_CURRENT:
+ * DEBUG_ROOT_HUB(" C_OVER_CURRENT");
+ * // Clears the driver's overcurrent Change flag
+ * cvmx_usb_set_status(usb, cvmx_usb_get_status(usb));
+ * break;
+ * default:
+ * DEBUG_ROOT_HUB(" UNKNOWN\n");
+ * return -EINVAL;
+ * }
+ * DEBUG_ROOT_HUB("\n");
+ * break;
+ * case GetHubDescriptor:
+ * DEBUG_ROOT_HUB("OcteonUSB: GetHubDescriptor\n");
+ * desc = (struct usb_hub_descriptor *)buf;
+ * desc->bDescLength = 9;
+ * desc->bDescriptorType = 0x29;
+ * desc->bNbrPorts = 1;
+ * desc->wHubCharacteristics = 0x08;
+ * desc->bPwrOn2PwrGood = 1;
+ * desc->bHubContrCurrent = 0;
+ * desc->bitmap[0] = 0;
+ * desc->bitmap[1] = 0xff;
+ * break;
+ * case GetHubStatus:
+ * DEBUG_ROOT_HUB("OcteonUSB: GetHubStatus\n");
+ * *(__le32 *)buf = 0;
+ * break;
+ * case GetPortStatus:
+ * DEBUG_ROOT_HUB("OcteonUSB: GetPortStatus");
+ * if (wIndex != 1)
+ * {
+ * DEBUG_ROOT_HUB(" INVALID\n");
+ * return -EINVAL;
+ * }
+ *
+ * usb_port_status = cvmx_usb_get_status(usb);
+ * port_status = 0;
+ *
+ * if (usb_port_status.connect_change)
+ * {
+ * port_status |= (1 << USB_PORT_FEAT_C_CONNECTION);
+ * DEBUG_ROOT_HUB(" C_CONNECTION");
+ * }
+ *
+ * if (usb_port_status.port_enabled)
+ * {
+ * port_status |= (1 << USB_PORT_FEAT_C_ENABLE);
+ * DEBUG_ROOT_HUB(" C_ENABLE");
+ * }
+ *
+ * if (usb_port_status.connected)
+ * {
+ * port_status |= (1 << USB_PORT_FEAT_CONNECTION);
+ * DEBUG_ROOT_HUB(" CONNECTION");
+ * }
+ *
+ * if (usb_port_status.port_enabled)
+ * {
+ * port_status |= (1 << USB_PORT_FEAT_ENABLE);
+ * DEBUG_ROOT_HUB(" ENABLE");
+ * }
+ *
+ * if (usb_port_status.port_over_current)
+ * {
+ * port_status |= (1 << USB_PORT_FEAT_OVER_CURRENT);
+ * DEBUG_ROOT_HUB(" OVER_CURRENT");
+ * }
+ *
+ * if (usb_port_status.port_powered)
+ * {
+ * port_status |= (1 << USB_PORT_FEAT_POWER);
+ * DEBUG_ROOT_HUB(" POWER");
+ * }
+ *
+ * if (usb_port_status.port_speed == CVMX_USB_SPEED_HIGH)
+ * {
+ * port_status |= (1 << USB_PORT_FEAT_HIGHSPEED);
+ * DEBUG_ROOT_HUB(" HIGHSPEED");
+ * }
+ * else if (usb_port_status.port_speed == CVMX_USB_SPEED_LOW)
+ * {
+ * port_status |= (1 << USB_PORT_FEAT_LOWSPEED);
+ * DEBUG_ROOT_HUB(" LOWSPEED");
+ * }
+ *
+ * *((__le32 *)buf) = cpu_to_le32(port_status);
+ * DEBUG_ROOT_HUB("\n");
+ * break;
+ * case SetHubFeature:
+ * DEBUG_ROOT_HUB("OcteonUSB: SetHubFeature\n");
+ * // No HUB features supported
+ * break;
+ * case SetPortFeature:
+ * DEBUG_ROOT_HUB("OcteonUSB: SetPortFeature");
+ * if (wIndex != 1)
+ * {
+ * DEBUG_ROOT_HUB(" INVALID\n");
+ * return -EINVAL;
+ * }
+ *
+ * switch (wValue)
+ * {
+ * case USB_PORT_FEAT_SUSPEND:
+ * DEBUG_ROOT_HUB(" SUSPEND\n");
+ * return -EINVAL;
+ * case USB_PORT_FEAT_POWER:
+ * DEBUG_ROOT_HUB(" POWER\n");
+ * return -EINVAL;
+ * case USB_PORT_FEAT_RESET:
+ * DEBUG_ROOT_HUB(" RESET\n");
+ * local_irq_save(flags);
+ * cvmx_usb_disable(usb);
+ * if (cvmx_usb_enable(usb))
+ * DEBUG_ERROR("Failed to enable the port\n");
+ * local_irq_restore(flags);
+ * return 0;
+ * case USB_PORT_FEAT_INDICATOR:
+ * DEBUG_ROOT_HUB(" INDICATOR\n");
+ * // Not supported
+ * break;
+ * default:
+ * DEBUG_ROOT_HUB(" UNKNOWN\n");
+ * return -EINVAL;
+ * }
+ * break;
+ * default:
+ * DEBUG_ROOT_HUB("OcteonUSB: Unknown root hub request\n");
+ * return -EINVAL;
+ * }
+ * return 0;
+ * }
+ * @endcode
+ *
+ * <h2>Interrupts</h2>
+ *
+ * If you plan on using usb interrupts, cvmx_usb_poll() must be
+ * called on every usb interrupt. It will read the usb state,
+ * call any needed callbacks, and schedule transactions as
+ * needed. Your device driver needs only to hookup an interrupt
+ * handler and call cvmx_usb_poll(). Octeon's usb port 0 causes
+ * CIU bit CIU_INT*_SUM0[USB] to be set (bit 56). For port 1,
+ * CIU bit CIU_INT_SUM1[USB1] is set (bit 17). How these bits
+ * are turned into interrupt numbers is operating system
+ * specific. For Linux, there are the convenient defines
+ * OCTEON_IRQ_USB0 and OCTEON_IRQ_USB1 for the IRQ numbers.
+ *
+ * If you aren't using interrupts, simple call cvmx_usb_poll()
+ * in your main processing loop.
+ *
+ * <hr>$Revision: 32636 $<hr>
+ */
+
+#ifndef __CVMX_USB_H__
+#define __CVMX_USB_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumerations representing the status of function calls.
+ */
+typedef enum
+{
+ CVMX_USB_SUCCESS = 0, /**< There were no errors */
+ CVMX_USB_INVALID_PARAM = -1, /**< A parameter to the function was invalid */
+ CVMX_USB_NO_MEMORY = -2, /**< Insufficient resources were available for the request */
+ CVMX_USB_BUSY = -3, /**< The resource is busy and cannot service the request */
+ CVMX_USB_TIMEOUT = -4, /**< Waiting for an action timed out */
+ CVMX_USB_INCORRECT_MODE = -5, /**< The function call doesn't work in the current USB
+ mode. This happens when host only functions are
+ called in device mode or vice versa */
+} cvmx_usb_status_t;
+
+/**
+ * Enumerations representing the possible USB device speeds
+ */
+typedef enum
+{
+ CVMX_USB_SPEED_HIGH = 0, /**< Device is operation at 480Mbps */
+ CVMX_USB_SPEED_FULL = 1, /**< Device is operation at 12Mbps */
+ CVMX_USB_SPEED_LOW = 2, /**< Device is operation at 1.5Mbps */
+} cvmx_usb_speed_t;
+
+/**
+ * Enumeration representing the possible USB transfer types.
+ */
+typedef enum
+{
+ CVMX_USB_TRANSFER_CONTROL = 0, /**< USB transfer type control for hub and status transfers */
+ CVMX_USB_TRANSFER_ISOCHRONOUS = 1, /**< USB transfer type isochronous for low priority periodic transfers */
+ CVMX_USB_TRANSFER_BULK = 2, /**< USB transfer type bulk for large low priority transfers */
+ CVMX_USB_TRANSFER_INTERRUPT = 3, /**< USB transfer type interrupt for high priority periodic transfers */
+} cvmx_usb_transfer_t;
+
+/**
+ * Enumeration of the transfer directions
+ */
+typedef enum
+{
+ CVMX_USB_DIRECTION_OUT, /**< Data is transferring from Octeon to the device/host */
+ CVMX_USB_DIRECTION_IN, /**< Data is transferring from the device/host to Octeon */
+} cvmx_usb_direction_t;
+
+/**
+ * Enumeration of all possible status codes passed to callback
+ * functions.
+ */
+typedef enum
+{
+ CVMX_USB_COMPLETE_SUCCESS, /**< The transaction / operation finished without any errors */
+ CVMX_USB_COMPLETE_SHORT, /**< FIXME: This is currently not implemented */
+ CVMX_USB_COMPLETE_CANCEL, /**< The transaction was canceled while in flight by a user call to cvmx_usb_cancel* */
+ CVMX_USB_COMPLETE_ERROR, /**< The transaction aborted with an unexpected error status */
+ CVMX_USB_COMPLETE_STALL, /**< The transaction received a USB STALL response from the device */
+ CVMX_USB_COMPLETE_XACTERR, /**< The transaction failed with an error from the device even after a number of retries */
+ CVMX_USB_COMPLETE_DATATGLERR, /**< The transaction failed with a data toggle error even after a number of retries */
+ CVMX_USB_COMPLETE_BABBLEERR, /**< The transaction failed with a babble error */
+ CVMX_USB_COMPLETE_FRAMEERR, /**< The transaction failed with a frame error even after a number of retries */
+} cvmx_usb_complete_t;
+
+/**
+ * Structure returned containing the USB port status information.
+ */
+typedef struct
+{
+ uint32_t reserved : 25;
+ uint32_t port_enabled : 1; /**< 1 = Usb port is enabled, 0 = disabled */
+ uint32_t port_over_current : 1; /**< 1 = Over current detected, 0 = Over current not detected. Octeon doesn't support over current detection */
+ uint32_t port_powered : 1; /**< 1 = Port power is being supplied to the device, 0 = power is off. Octeon doesn't support turning port power off */
+ cvmx_usb_speed_t port_speed : 2; /**< Current port speed */
+ uint32_t connected : 1; /**< 1 = A device is connected to the port, 0 = No device is connected */
+ uint32_t connect_change : 1; /**< 1 = Device connected state changed since the last set status call */
+} cvmx_usb_port_status_t;
+
+/**
+ * This is the structure of a Control packet header
+ */
+typedef union
+{
+ uint64_t u64;
+ struct
+ {
+ uint64_t request_type : 8; /**< Bit 7 tells the direction: 1=IN, 0=OUT */
+ uint64_t request : 8; /**< The standard usb request to make */
+ uint64_t value : 16; /**< Value parameter for the request in little endian format */
+ uint64_t index : 16; /**< Index for the request in little endian format */
+ uint64_t length : 16; /**< Length of the data associated with this request in little endian format */
+ } s;
+} cvmx_usb_control_header_t;
+
+/**
+ * Descriptor for Isochronous packets
+ */
+typedef struct
+{
+ int offset; /**< This is the offset in bytes into the main buffer where this data is stored */
+ int length; /**< This is the length in bytes of the data */
+ cvmx_usb_complete_t status; /**< This is the status of this individual packet transfer */
+} cvmx_usb_iso_packet_t;
+
+/**
+ * Possible callback reasons for the USB API.
+ */
+typedef enum
+{
+ CVMX_USB_CALLBACK_TRANSFER_COMPLETE,
+ /**< A callback of this type is called when a submitted transfer
+ completes. The completion callback will be called even if the
+ transfer fails or is canceled. The status parameter will
+ contain details of why he callback was called. */
+ CVMX_USB_CALLBACK_PORT_CHANGED, /**< The status of the port changed. For example, someone may have
+ plugged a device in. The status parameter contains
+ CVMX_USB_COMPLETE_SUCCESS. Use cvmx_usb_get_status() to get
+ the new port status. */
+ __CVMX_USB_CALLBACK_END /**< Do not use. Used internally for array bounds */
+} cvmx_usb_callback_t;
+
+/**
+ * USB state internal data. The contents of this structure
+ * may change in future SDKs. No data in it should be referenced
+ * by user's of this API.
+ */
+typedef struct
+{
+ char data[65536];
+} cvmx_usb_state_t;
+
+/**
+ * USB callback functions are always of the following type.
+ * The parameters are as follows:
+ * - state = USB device state populated by
+ * cvmx_usb_initialize().
+ * - reason = The cvmx_usb_callback_t used to register
+ * the callback.
+ * - status = The cvmx_usb_complete_t representing the
+ * status code of a transaction.
+ * - pipe_handle = The Pipe that caused this callback, or
+ * -1 if this callback wasn't associated with a pipe.
+ * - submit_handle = Transfer submit handle causing this
+ * callback, or -1 if this callback wasn't associated
+ * with a transfer.
+ * - Actual number of bytes transfer.
+ * - user_data = The user pointer supplied to the
+ * function cvmx_usb_submit() or
+ * cvmx_usb_register_callback() */
+typedef void (*cvmx_usb_callback_func_t)(cvmx_usb_state_t *state,
+ cvmx_usb_callback_t reason,
+ cvmx_usb_complete_t status,
+ int pipe_handle, int submit_handle,
+ int bytes_transferred, void *user_data);
+
+/**
+ * Flags to pass the initialization function.
+ */
+typedef enum
+{
+ CVMX_USB_INITIALIZE_FLAGS_CLOCK_XO_XI = 1<<0, /**< The USB port uses a 12MHz crystal as clock source
+ at USB_XO and USB_XI. */
+ CVMX_USB_INITIALIZE_FLAGS_CLOCK_XO_GND = 1<<1, /**< The USB port uses 12/24/48MHz 2.5V board clock
+ source at USB_XO. USB_XI should be tied to GND.*/
+ CVMX_USB_INITIALIZE_FLAGS_CLOCK_AUTO = 0, /**< Automatically determine clock type based on function
+ in cvmx-helper-board.c. */
+ CVMX_USB_INITIALIZE_FLAGS_CLOCK_MHZ_MASK = 3<<3, /**< Mask for clock speed field */
+ CVMX_USB_INITIALIZE_FLAGS_CLOCK_12MHZ = 1<<3, /**< Speed of reference clock or crystal */
+ CVMX_USB_INITIALIZE_FLAGS_CLOCK_24MHZ = 2<<3, /**< Speed of reference clock */
+ CVMX_USB_INITIALIZE_FLAGS_CLOCK_48MHZ = 3<<3, /**< Speed of reference clock */
+ /* Bits 3-4 used to encode the clock frequency */
+ CVMX_USB_INITIALIZE_FLAGS_NO_DMA = 1<<5, /**< Disable DMA and used polled IO for data transfer use for the USB */
+ CVMX_USB_INITIALIZE_FLAGS_DEBUG_TRANSFERS = 1<<16, /**< Enable extra console output for debugging USB transfers */
+ CVMX_USB_INITIALIZE_FLAGS_DEBUG_CALLBACKS = 1<<17, /**< Enable extra console output for debugging USB callbacks */
+ CVMX_USB_INITIALIZE_FLAGS_DEBUG_INFO = 1<<18, /**< Enable extra console output for USB informational data */
+ CVMX_USB_INITIALIZE_FLAGS_DEBUG_CALLS = 1<<19, /**< Enable extra console output for every function call */
+ CVMX_USB_INITIALIZE_FLAGS_DEBUG_CSRS = 1<<20, /**< Enable extra console output for every CSR access */
+ CVMX_USB_INITIALIZE_FLAGS_DEBUG_ALL = ((CVMX_USB_INITIALIZE_FLAGS_DEBUG_CSRS<<1)-1) - (CVMX_USB_INITIALIZE_FLAGS_DEBUG_TRANSFERS-1),
+} cvmx_usb_initialize_flags_t;
+
+/**
+ * Flags for passing when a pipe is created. Currently no flags
+ * need to be passed.
+ */
+typedef enum
+{
+ CVMX_USB_PIPE_FLAGS_DEBUG_TRANSFERS = 1<<15,/**< Used to display CVMX_USB_INITIALIZE_FLAGS_DEBUG_TRANSFERS for a specific pipe only */
+ __CVMX_USB_PIPE_FLAGS_OPEN = 1<<16, /**< Used internally to determine if a pipe is open. Do not use */
+ __CVMX_USB_PIPE_FLAGS_SCHEDULED = 1<<17, /**< Used internally to determine if a pipe is actively using hardware. Do not use */
+ __CVMX_USB_PIPE_FLAGS_NEED_PING = 1<<18, /**< Used internally to determine if a high speed pipe is in the ping state. Do not use */
+} cvmx_usb_pipe_flags_t;
+
+/**
+ * Return the number of USB ports supported by this Octeon
+ * chip. If the chip doesn't support USB, or is not supported
+ * by this API, a zero will be returned. Most Octeon chips
+ * support one usb port, but some support two ports.
+ * cvmx_usb_initialize() must be called on independent
+ * cvmx_usb_state_t structures.
+ *
+ * @return Number of port, zero if usb isn't supported
+ */
+extern int cvmx_usb_get_num_ports(void);
+
+/**
+ * Initialize a USB port for use. This must be called before any
+ * other access to the Octeon USB port is made. The port starts
+ * off in the disabled state.
+ *
+ * @param state Pointer to an empty cvmx_usb_state_t structure
+ * that will be populated by the initialize call.
+ * This structure is then passed to all other USB
+ * functions.
+ * @param usb_port_number
+ * Which Octeon USB port to initialize.
+ * @param flags Flags to control hardware initialization. See
+ * cvmx_usb_initialize_flags_t for the flag
+ * definitions. Some flags are mandatory.
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+extern cvmx_usb_status_t cvmx_usb_initialize(cvmx_usb_state_t *state,
+ int usb_port_number,
+ cvmx_usb_initialize_flags_t flags);
+
+/**
+ * Shutdown a USB port after a call to cvmx_usb_initialize().
+ * The port should be disabled with all pipes closed when this
+ * function is called.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+extern cvmx_usb_status_t cvmx_usb_shutdown(cvmx_usb_state_t *state);
+
+/**
+ * Enable a USB port. After this call succeeds, the USB port is
+ * online and servicing requests.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+extern cvmx_usb_status_t cvmx_usb_enable(cvmx_usb_state_t *state);
+
+/**
+ * Disable a USB port. After this call the USB port will not
+ * generate data transfers and will not generate events.
+ * Transactions in process will fail and call their
+ * associated callbacks.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+extern cvmx_usb_status_t cvmx_usb_disable(cvmx_usb_state_t *state);
+
+/**
+ * Get the current state of the USB port. Use this call to
+ * determine if the usb port has anything connected, is enabled,
+ * or has some sort of error condition. The return value of this
+ * call has "changed" bits to signal of the value of some fields
+ * have changed between calls. These "changed" fields are based
+ * on the last call to cvmx_usb_set_status(). In order to clear
+ * them, you must update the status through cvmx_usb_set_status().
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ *
+ * @return Port status information
+ */
+extern cvmx_usb_port_status_t cvmx_usb_get_status(cvmx_usb_state_t *state);
+
+/**
+ * Set the current state of the USB port. The status is used as
+ * a reference for the "changed" bits returned by
+ * cvmx_usb_get_status(). Other than serving as a reference, the
+ * status passed to this function is not used. No fields can be
+ * changed through this call.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param port_status
+ * Port status to set, most like returned by cvmx_usb_get_status()
+ */
+extern void cvmx_usb_set_status(cvmx_usb_state_t *state, cvmx_usb_port_status_t port_status);
+
+/**
+ * Open a virtual pipe between the host and a USB device. A pipe
+ * must be opened before data can be transferred between a device
+ * and Octeon.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param flags Optional pipe flags defined in
+ * cvmx_usb_pipe_flags_t.
+ * @param device_addr
+ * USB device address to open the pipe to
+ * (0-127).
+ * @param endpoint_num
+ * USB endpoint number to open the pipe to
+ * (0-15).
+ * @param device_speed
+ * The speed of the device the pipe is going
+ * to. This must match the device's speed,
+ * which may be different than the port speed.
+ * @param max_packet The maximum packet length the device can
+ * transmit/receive (low speed=0-8, full
+ * speed=0-1023, high speed=0-1024). This value
+ * comes from the standard endpoint descriptor
+ * field wMaxPacketSize bits <10:0>.
+ * @param transfer_type
+ * The type of transfer this pipe is for.
+ * @param transfer_dir
+ * The direction the pipe is in. This is not
+ * used for control pipes.
+ * @param interval For ISOCHRONOUS and INTERRUPT transfers,
+ * this is how often the transfer is scheduled
+ * for. All other transfers should specify
+ * zero. The units are in frames (8000/sec at
+ * high speed, 1000/sec for full speed).
+ * @param multi_count
+ * For high speed devices, this is the maximum
+ * allowed number of packet per microframe.
+ * Specify zero for non high speed devices. This
+ * value comes from the standard endpoint descriptor
+ * field wMaxPacketSize bits <12:11>.
+ * @param hub_device_addr
+ * Hub device address this device is connected
+ * to. Devices connected directly to Octeon
+ * use zero. This is only used when the device
+ * is full/low speed behind a high speed hub.
+ * The address will be of the high speed hub,
+ * not and full speed hubs after it.
+ * @param hub_port Which port on the hub the device is
+ * connected. Use zero for devices connected
+ * directly to Octeon. Like hub_device_addr,
+ * this is only used for full/low speed
+ * devices behind a high speed hub.
+ *
+ * @return A non negative value is a pipe handle. Negative
+ * values are failure codes from cvmx_usb_status_t.
+ */
+extern int cvmx_usb_open_pipe(cvmx_usb_state_t *state,
+ cvmx_usb_pipe_flags_t flags,
+ int device_addr, int endpoint_num,
+ cvmx_usb_speed_t device_speed, int max_packet,
+ cvmx_usb_transfer_t transfer_type,
+ cvmx_usb_direction_t transfer_dir, int interval,
+ int multi_count, int hub_device_addr,
+ int hub_port);
+
+/**
+ * Call to submit a USB Bulk transfer to a pipe.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Handle to the pipe for the transfer.
+ * @param buffer Physical address of the data buffer in
+ * memory. Note that this is NOT A POINTER, but
+ * the full 64bit physical address of the
+ * buffer. This may be zero if buffer_length is
+ * zero.
+ * @param buffer_length
+ * Length of buffer in bytes.
+ * @param callback Function to call when this transaction
+ * completes. If the return value of this
+ * function isn't an error, then this function
+ * is guaranteed to be called when the
+ * transaction completes. If this parameter is
+ * NULL, then the generic callback registered
+ * through cvmx_usb_register_callback is
+ * called. If both are NULL, then there is no
+ * way to know when a transaction completes.
+ * @param user_data User supplied data returned when the
+ * callback is called. This is only used if
+ * callback in not NULL.
+ *
+ * @return A submitted transaction handle or negative on
+ * failure. Negative values are failure codes from
+ * cvmx_usb_status_t.
+ */
+extern int cvmx_usb_submit_bulk(cvmx_usb_state_t *state, int pipe_handle,
+ uint64_t buffer, int buffer_length,
+ cvmx_usb_callback_func_t callback,
+ void *user_data);
+
+/**
+ * Call to submit a USB Interrupt transfer to a pipe.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Handle to the pipe for the transfer.
+ * @param buffer Physical address of the data buffer in
+ * memory. Note that this is NOT A POINTER, but
+ * the full 64bit physical address of the
+ * buffer. This may be zero if buffer_length is
+ * zero.
+ * @param buffer_length
+ * Length of buffer in bytes.
+ * @param callback Function to call when this transaction
+ * completes. If the return value of this
+ * function isn't an error, then this function
+ * is guaranteed to be called when the
+ * transaction completes. If this parameter is
+ * NULL, then the generic callback registered
+ * through cvmx_usb_register_callback is
+ * called. If both are NULL, then there is no
+ * way to know when a transaction completes.
+ * @param user_data User supplied data returned when the
+ * callback is called. This is only used if
+ * callback in not NULL.
+ *
+ * @return A submitted transaction handle or negative on
+ * failure. Negative values are failure codes from
+ * cvmx_usb_status_t.
+ */
+extern int cvmx_usb_submit_interrupt(cvmx_usb_state_t *state, int pipe_handle,
+ uint64_t buffer, int buffer_length,
+ cvmx_usb_callback_func_t callback,
+ void *user_data);
+
+/**
+ * Call to submit a USB Control transfer to a pipe.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Handle to the pipe for the transfer.
+ * @param control_header
+ * USB 8 byte control header physical address.
+ * Note that this is NOT A POINTER, but the
+ * full 64bit physical address of the buffer.
+ * @param buffer Physical address of the data buffer in
+ * memory. Note that this is NOT A POINTER, but
+ * the full 64bit physical address of the
+ * buffer. This may be zero if buffer_length is
+ * zero.
+ * @param buffer_length
+ * Length of buffer in bytes.
+ * @param callback Function to call when this transaction
+ * completes. If the return value of this
+ * function isn't an error, then this function
+ * is guaranteed to be called when the
+ * transaction completes. If this parameter is
+ * NULL, then the generic callback registered
+ * through cvmx_usb_register_callback is
+ * called. If both are NULL, then there is no
+ * way to know when a transaction completes.
+ * @param user_data User supplied data returned when the
+ * callback is called. This is only used if
+ * callback in not NULL.
+ *
+ * @return A submitted transaction handle or negative on
+ * failure. Negative values are failure codes from
+ * cvmx_usb_status_t.
+ */
+extern int cvmx_usb_submit_control(cvmx_usb_state_t *state, int pipe_handle,
+ uint64_t control_header,
+ uint64_t buffer, int buffer_length,
+ cvmx_usb_callback_func_t callback,
+ void *user_data);
+
+/**
+ * Flags to pass the cvmx_usb_submit_isochronous() function.
+ */
+typedef enum
+{
+ CVMX_USB_ISOCHRONOUS_FLAGS_ALLOW_SHORT = 1<<0, /**< Do not return an error if a transfer is less than the maximum packet size of the device */
+ CVMX_USB_ISOCHRONOUS_FLAGS_ASAP = 1<<1, /**< Schedule the transaction as soon as possible */
+} cvmx_usb_isochronous_flags_t;
+
+/**
+ * Call to submit a USB Isochronous transfer to a pipe.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Handle to the pipe for the transfer.
+ * @param start_frame
+ * Number of frames into the future to schedule
+ * this transaction.
+ * @param flags Flags to control the transfer. See
+ * cvmx_usb_isochronous_flags_t for the flag
+ * definitions.
+ * @param number_packets
+ * Number of sequential packets to transfer.
+ * "packets" is a pointer to an array of this
+ * many packet structures.
+ * @param packets Description of each transfer packet as
+ * defined by cvmx_usb_iso_packet_t. The array
+ * pointed to here must stay valid until the
+ * complete callback is called.
+ * @param buffer Physical address of the data buffer in
+ * memory. Note that this is NOT A POINTER, but
+ * the full 64bit physical address of the
+ * buffer. This may be zero if buffer_length is
+ * zero.
+ * @param buffer_length
+ * Length of buffer in bytes.
+ * @param callback Function to call when this transaction
+ * completes. If the return value of this
+ * function isn't an error, then this function
+ * is guaranteed to be called when the
+ * transaction completes. If this parameter is
+ * NULL, then the generic callback registered
+ * through cvmx_usb_register_callback is
+ * called. If both are NULL, then there is no
+ * way to know when a transaction completes.
+ * @param user_data User supplied data returned when the
+ * callback is called. This is only used if
+ * callback in not NULL.
+ *
+ * @return A submitted transaction handle or negative on
+ * failure. Negative values are failure codes from
+ * cvmx_usb_status_t.
+ */
+extern int cvmx_usb_submit_isochronous(cvmx_usb_state_t *state, int pipe_handle,
+ int start_frame, int flags,
+ int number_packets,
+ cvmx_usb_iso_packet_t packets[],
+ uint64_t buffer, int buffer_length,
+ cvmx_usb_callback_func_t callback,
+ void *user_data);
+
+/**
+ * Cancel one outstanding request in a pipe. Canceling a request
+ * can fail if the transaction has already completed before cancel
+ * is called. Even after a successful cancel call, it may take
+ * a frame or two for the cvmx_usb_poll() function to call the
+ * associated callback.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Pipe handle to cancel requests in.
+ * @param submit_handle
+ * Handle to transaction to cancel, returned by the submit function.
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+extern cvmx_usb_status_t cvmx_usb_cancel(cvmx_usb_state_t *state,
+ int pipe_handle, int submit_handle);
+
+
+/**
+ * Cancel all outstanding requests in a pipe. Logically all this
+ * does is call cvmx_usb_cancel() in a loop.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Pipe handle to cancel requests in.
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+extern cvmx_usb_status_t cvmx_usb_cancel_all(cvmx_usb_state_t *state,
+ int pipe_handle);
+
+/**
+ * Close a pipe created with cvmx_usb_open_pipe().
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param pipe_handle
+ * Pipe handle to close.
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t. CVMX_USB_BUSY is returned if the
+ * pipe has outstanding transfers.
+ */
+extern cvmx_usb_status_t cvmx_usb_close_pipe(cvmx_usb_state_t *state,
+ int pipe_handle);
+
+/**
+ * Register a function to be called when various USB events occur.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ * @param reason Which event to register for.
+ * @param callback Function to call when the event occurs.
+ * @param user_data User data parameter to the function.
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+extern cvmx_usb_status_t cvmx_usb_register_callback(cvmx_usb_state_t *state,
+ cvmx_usb_callback_t reason,
+ cvmx_usb_callback_func_t callback,
+ void *user_data);
+
+/**
+ * Get the current USB protocol level frame number. The frame
+ * number is always in the range of 0-0x7ff.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ *
+ * @return USB frame number
+ */
+extern int cvmx_usb_get_frame_number(cvmx_usb_state_t *state);
+
+/**
+ * Poll the USB block for status and call all needed callback
+ * handlers. This function is meant to be called in the interrupt
+ * handler for the USB controller. It can also be called
+ * periodically in a loop for non-interrupt based operation.
+ *
+ * @param state USB device state populated by
+ * cvmx_usb_initialize().
+ *
+ * @return CVMX_USB_SUCCESS or a negative error code defined in
+ * cvmx_usb_status_t.
+ */
+extern cvmx_usb_status_t cvmx_usb_poll(cvmx_usb_state_t *state);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __CVMX_USB_H__ */
diff --git a/drivers/staging/octeon-usb/cvmx-usbcx-defs.h b/drivers/staging/octeon-usb/cvmx-usbcx-defs.h
new file mode 100644
index 000000000000..394e84662ce8
--- /dev/null
+++ b/drivers/staging/octeon-usb/cvmx-usbcx-defs.h
@@ -0,0 +1,1551 @@
+/***********************license start***************
+ * Copyright (c) 2003-2010 Cavium Networks (support@cavium.com). All rights
+ * reserved.
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+
+ * * Neither the name of Cavium Networks nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+
+ * This Software, including technical data, may be subject to U.S. export
+ * control laws, including the U.S. Export Administration Act and its associated
+ * regulations, and may be subject to export or import regulations in other
+ * countries.
+
+ * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+ * AND WITH ALL FAULTS AND CAVIUM NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR
+ * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
+ * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION
+ * OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
+ * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
+ * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
+ * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
+ * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT OF USE OR
+ * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+ ***********************license end**************************************/
+
+
+/**
+ * cvmx-usbcx-defs.h
+ *
+ * Configuration and status register (CSR) type definitions for
+ * Octeon usbcx.
+ *
+ */
+#ifndef __CVMX_USBCX_TYPEDEFS_H__
+#define __CVMX_USBCX_TYPEDEFS_H__
+
+#define CVMX_USBCXBASE 0x00016F0010000000ull
+#define CVMX_USBCXREG1(reg, bid) \
+ (CVMX_ADD_IO_SEG(CVMX_USBCXBASE | reg) + \
+ ((bid) & 1) * 0x100000000000ull)
+#define CVMX_USBCXREG2(reg, bid, off) \
+ (CVMX_ADD_IO_SEG(CVMX_USBCXBASE | reg) + \
+ (((off) & 7) + ((bid) & 1) * 0x8000000000ull) * 32)
+
+#define CVMX_USBCX_GAHBCFG(bid) CVMX_USBCXREG1(0x008, bid)
+#define CVMX_USBCX_GHWCFG3(bid) CVMX_USBCXREG1(0x04c, bid)
+#define CVMX_USBCX_GINTMSK(bid) CVMX_USBCXREG1(0x018, bid)
+#define CVMX_USBCX_GINTSTS(bid) CVMX_USBCXREG1(0x014, bid)
+#define CVMX_USBCX_GNPTXFSIZ(bid) CVMX_USBCXREG1(0x028, bid)
+#define CVMX_USBCX_GNPTXSTS(bid) CVMX_USBCXREG1(0x02c, bid)
+#define CVMX_USBCX_GOTGCTL(bid) CVMX_USBCXREG1(0x000, bid)
+#define CVMX_USBCX_GRSTCTL(bid) CVMX_USBCXREG1(0x010, bid)
+#define CVMX_USBCX_GRXFSIZ(bid) CVMX_USBCXREG1(0x024, bid)
+#define CVMX_USBCX_GRXSTSPH(bid) CVMX_USBCXREG1(0x020, bid)
+#define CVMX_USBCX_GUSBCFG(bid) CVMX_USBCXREG1(0x00c, bid)
+#define CVMX_USBCX_HAINT(bid) CVMX_USBCXREG1(0x414, bid)
+#define CVMX_USBCX_HAINTMSK(bid) CVMX_USBCXREG1(0x418, bid)
+#define CVMX_USBCX_HCCHARX(off, bid) CVMX_USBCXREG2(0x500, bid, off)
+#define CVMX_USBCX_HCFG(bid) CVMX_USBCXREG1(0x400, bid)
+#define CVMX_USBCX_HCINTMSKX(off, bid) CVMX_USBCXREG2(0x50c, bid, off)
+#define CVMX_USBCX_HCINTX(off, bid) CVMX_USBCXREG2(0x508, bid, off)
+#define CVMX_USBCX_HCSPLTX(off, bid) CVMX_USBCXREG2(0x504, bid, off)
+#define CVMX_USBCX_HCTSIZX(off, bid) CVMX_USBCXREG2(0x510, bid, off)
+#define CVMX_USBCX_HFIR(bid) CVMX_USBCXREG1(0x404, bid)
+#define CVMX_USBCX_HFNUM(bid) CVMX_USBCXREG1(0x408, bid)
+#define CVMX_USBCX_HPRT(bid) CVMX_USBCXREG1(0x440, bid)
+#define CVMX_USBCX_HPTXFSIZ(bid) CVMX_USBCXREG1(0x100, bid)
+#define CVMX_USBCX_HPTXSTS(bid) CVMX_USBCXREG1(0x410, bid)
+
+/**
+ * cvmx_usbc#_gahbcfg
+ *
+ * Core AHB Configuration Register (GAHBCFG)
+ *
+ * This register can be used to configure the core after power-on or a change in
+ * mode of operation. This register mainly contains AHB system-related
+ * configuration parameters. The AHB is the processor interface to the O2P USB
+ * core. In general, software need not know about this interface except to
+ * program the values as specified.
+ *
+ * The application must program this register as part of the O2P USB core
+ * initialization. Do not change this register after the initial programming.
+ */
+union cvmx_usbcx_gahbcfg {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_gahbcfg_s
+ * @ptxfemplvl: Periodic TxFIFO Empty Level (PTxFEmpLvl)
+ * Software should set this bit to 0x1.
+ * Indicates when the Periodic TxFIFO Empty Interrupt bit in the
+ * Core Interrupt register (GINTSTS.PTxFEmp) is triggered. This
+ * bit is used only in Slave mode.
+ * * 1'b0: GINTSTS.PTxFEmp interrupt indicates that the Periodic
+ * TxFIFO is half empty
+ * * 1'b1: GINTSTS.PTxFEmp interrupt indicates that the Periodic
+ * TxFIFO is completely empty
+ * @nptxfemplvl: Non-Periodic TxFIFO Empty Level (NPTxFEmpLvl)
+ * Software should set this bit to 0x1.
+ * Indicates when the Non-Periodic TxFIFO Empty Interrupt bit in
+ * the Core Interrupt register (GINTSTS.NPTxFEmp) is triggered.
+ * This bit is used only in Slave mode.
+ * * 1'b0: GINTSTS.NPTxFEmp interrupt indicates that the Non-
+ * Periodic TxFIFO is half empty
+ * * 1'b1: GINTSTS.NPTxFEmp interrupt indicates that the Non-
+ * Periodic TxFIFO is completely empty
+ * @dmaen: DMA Enable (DMAEn)
+ * * 1'b0: Core operates in Slave mode
+ * * 1'b1: Core operates in a DMA mode
+ * @hbstlen: Burst Length/Type (HBstLen)
+ * This field has not effect and should be left as 0x0.
+ * @glblintrmsk: Global Interrupt Mask (GlblIntrMsk)
+ * Software should set this field to 0x1.
+ * The application uses this bit to mask or unmask the interrupt
+ * line assertion to itself. Irrespective of this bit's setting,
+ * the interrupt status registers are updated by the core.
+ * * 1'b0: Mask the interrupt assertion to the application.
+ * * 1'b1: Unmask the interrupt assertion to the application.
+ */
+ struct cvmx_usbcx_gahbcfg_s {
+ uint32_t reserved_9_31 : 23;
+ uint32_t ptxfemplvl : 1;
+ uint32_t nptxfemplvl : 1;
+ uint32_t reserved_6_6 : 1;
+ uint32_t dmaen : 1;
+ uint32_t hbstlen : 4;
+ uint32_t glblintrmsk : 1;
+ } s;
+};
+typedef union cvmx_usbcx_gahbcfg cvmx_usbcx_gahbcfg_t;
+
+/**
+ * cvmx_usbc#_ghwcfg3
+ *
+ * User HW Config3 Register (GHWCFG3)
+ *
+ * This register contains the configuration options of the O2P USB core.
+ */
+union cvmx_usbcx_ghwcfg3 {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_ghwcfg3_s
+ * @dfifodepth: DFIFO Depth (DfifoDepth)
+ * This value is in terms of 32-bit words.
+ * * Minimum value is 32
+ * * Maximum value is 32768
+ * @ahbphysync: AHB and PHY Synchronous (AhbPhySync)
+ * Indicates whether AHB and PHY clocks are synchronous to
+ * each other.
+ * * 1'b0: No
+ * * 1'b1: Yes
+ * This bit is tied to 1.
+ * @rsttype: Reset Style for Clocked always Blocks in RTL (RstType)
+ * * 1'b0: Asynchronous reset is used in the core
+ * * 1'b1: Synchronous reset is used in the core
+ * @optfeature: Optional Features Removed (OptFeature)
+ * Indicates whether the User ID register, GPIO interface ports,
+ * and SOF toggle and counter ports were removed for gate count
+ * optimization.
+ * @vendor_control_interface_support: Vendor Control Interface Support
+ * * 1'b0: Vendor Control Interface is not available on the core.
+ * * 1'b1: Vendor Control Interface is available.
+ * @i2c_selection: I2C Selection
+ * * 1'b0: I2C Interface is not available on the core.
+ * * 1'b1: I2C Interface is available on the core.
+ * @otgen: OTG Function Enabled (OtgEn)
+ * The application uses this bit to indicate the O2P USB core's
+ * OTG capabilities.
+ * * 1'b0: Not OTG capable
+ * * 1'b1: OTG Capable
+ * @pktsizewidth: Width of Packet Size Counters (PktSizeWidth)
+ * * 3'b000: 4 bits
+ * * 3'b001: 5 bits
+ * * 3'b010: 6 bits
+ * * 3'b011: 7 bits
+ * * 3'b100: 8 bits
+ * * 3'b101: 9 bits
+ * * 3'b110: 10 bits
+ * * Others: Reserved
+ * @xfersizewidth: Width of Transfer Size Counters (XferSizeWidth)
+ * * 4'b0000: 11 bits
+ * * 4'b0001: 12 bits
+ * - ...
+ * * 4'b1000: 19 bits
+ * * Others: Reserved
+ */
+ struct cvmx_usbcx_ghwcfg3_s {
+ uint32_t dfifodepth : 16;
+ uint32_t reserved_13_15 : 3;
+ uint32_t ahbphysync : 1;
+ uint32_t rsttype : 1;
+ uint32_t optfeature : 1;
+ uint32_t vendor_control_interface_support : 1;
+ uint32_t i2c_selection : 1;
+ uint32_t otgen : 1;
+ uint32_t pktsizewidth : 3;
+ uint32_t xfersizewidth : 4;
+ } s;
+};
+typedef union cvmx_usbcx_ghwcfg3 cvmx_usbcx_ghwcfg3_t;
+
+/**
+ * cvmx_usbc#_gintmsk
+ *
+ * Core Interrupt Mask Register (GINTMSK)
+ *
+ * This register works with the Core Interrupt register to interrupt the
+ * application. When an interrupt bit is masked, the interrupt associated with
+ * that bit will not be generated. However, the Core Interrupt (GINTSTS)
+ * register bit corresponding to that interrupt will still be set.
+ * Mask interrupt: 1'b0, Unmask interrupt: 1'b1
+ */
+union cvmx_usbcx_gintmsk {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_gintmsk_s
+ * @wkupintmsk: Resume/Remote Wakeup Detected Interrupt Mask
+ * (WkUpIntMsk)
+ * @sessreqintmsk: Session Request/New Session Detected Interrupt Mask
+ * (SessReqIntMsk)
+ * @disconnintmsk: Disconnect Detected Interrupt Mask (DisconnIntMsk)
+ * @conidstschngmsk: Connector ID Status Change Mask (ConIDStsChngMsk)
+ * @ptxfempmsk: Periodic TxFIFO Empty Mask (PTxFEmpMsk)
+ * @hchintmsk: Host Channels Interrupt Mask (HChIntMsk)
+ * @prtintmsk: Host Port Interrupt Mask (PrtIntMsk)
+ * @fetsuspmsk: Data Fetch Suspended Mask (FetSuspMsk)
+ * @incomplpmsk: Incomplete Periodic Transfer Mask (incomplPMsk)
+ * Incomplete Isochronous OUT Transfer Mask
+ * (incompISOOUTMsk)
+ * @incompisoinmsk: Incomplete Isochronous IN Transfer Mask
+ * (incompISOINMsk)
+ * @oepintmsk: OUT Endpoints Interrupt Mask (OEPIntMsk)
+ * @inepintmsk: IN Endpoints Interrupt Mask (INEPIntMsk)
+ * @epmismsk: Endpoint Mismatch Interrupt Mask (EPMisMsk)
+ * @eopfmsk: End of Periodic Frame Interrupt Mask (EOPFMsk)
+ * @isooutdropmsk: Isochronous OUT Packet Dropped Interrupt Mask
+ * (ISOOutDropMsk)
+ * @enumdonemsk: Enumeration Done Mask (EnumDoneMsk)
+ * @usbrstmsk: USB Reset Mask (USBRstMsk)
+ * @usbsuspmsk: USB Suspend Mask (USBSuspMsk)
+ * @erlysuspmsk: Early Suspend Mask (ErlySuspMsk)
+ * @i2cint: I2C Interrupt Mask (I2CINT)
+ * @ulpickintmsk: ULPI Carkit Interrupt Mask (ULPICKINTMsk)
+ * I2C Carkit Interrupt Mask (I2CCKINTMsk)
+ * @goutnakeffmsk: Global OUT NAK Effective Mask (GOUTNakEffMsk)
+ * @ginnakeffmsk: Global Non-Periodic IN NAK Effective Mask
+ * (GINNakEffMsk)
+ * @nptxfempmsk: Non-Periodic TxFIFO Empty Mask (NPTxFEmpMsk)
+ * @rxflvlmsk: Receive FIFO Non-Empty Mask (RxFLvlMsk)
+ * @sofmsk: Start of (micro)Frame Mask (SofMsk)
+ * @otgintmsk: OTG Interrupt Mask (OTGIntMsk)
+ * @modemismsk: Mode Mismatch Interrupt Mask (ModeMisMsk)
+ */
+ struct cvmx_usbcx_gintmsk_s {
+ uint32_t wkupintmsk : 1;
+ uint32_t sessreqintmsk : 1;
+ uint32_t disconnintmsk : 1;
+ uint32_t conidstschngmsk : 1;
+ uint32_t reserved_27_27 : 1;
+ uint32_t ptxfempmsk : 1;
+ uint32_t hchintmsk : 1;
+ uint32_t prtintmsk : 1;
+ uint32_t reserved_23_23 : 1;
+ uint32_t fetsuspmsk : 1;
+ uint32_t incomplpmsk : 1;
+ uint32_t incompisoinmsk : 1;
+ uint32_t oepintmsk : 1;
+ uint32_t inepintmsk : 1;
+ uint32_t epmismsk : 1;
+ uint32_t reserved_16_16 : 1;
+ uint32_t eopfmsk : 1;
+ uint32_t isooutdropmsk : 1;
+ uint32_t enumdonemsk : 1;
+ uint32_t usbrstmsk : 1;
+ uint32_t usbsuspmsk : 1;
+ uint32_t erlysuspmsk : 1;
+ uint32_t i2cint : 1;
+ uint32_t ulpickintmsk : 1;
+ uint32_t goutnakeffmsk : 1;
+ uint32_t ginnakeffmsk : 1;
+ uint32_t nptxfempmsk : 1;
+ uint32_t rxflvlmsk : 1;
+ uint32_t sofmsk : 1;
+ uint32_t otgintmsk : 1;
+ uint32_t modemismsk : 1;
+ uint32_t reserved_0_0 : 1;
+ } s;
+};
+typedef union cvmx_usbcx_gintmsk cvmx_usbcx_gintmsk_t;
+
+/**
+ * cvmx_usbc#_gintsts
+ *
+ * Core Interrupt Register (GINTSTS)
+ *
+ * This register interrupts the application for system-level events in the
+ * current mode of operation (Device mode or Host mode). It is shown in
+ * Interrupt. Some of the bits in this register are valid only in Host mode,
+ * while others are valid in Device mode only. This register also indicates the
+ * current mode of operation. In order to clear the interrupt status bits of
+ * type R_SS_WC, the application must write 1'b1 into the bit. The FIFO status
+ * interrupts are read only; once software reads from or writes to the FIFO
+ * while servicing these interrupts, FIFO interrupt conditions are cleared
+ * automatically.
+ */
+union cvmx_usbcx_gintsts {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_gintsts_s
+ * @wkupint: Resume/Remote Wakeup Detected Interrupt (WkUpInt)
+ * In Device mode, this interrupt is asserted when a resume is
+ * detected on the USB. In Host mode, this interrupt is asserted
+ * when a remote wakeup is detected on the USB.
+ * For more information on how to use this interrupt, see "Partial
+ * Power-Down and Clock Gating Programming Model" on
+ * page 353.
+ * @sessreqint: Session Request/New Session Detected Interrupt
+ * (SessReqInt)
+ * In Host mode, this interrupt is asserted when a session request
+ * is detected from the device. In Device mode, this interrupt is
+ * asserted when the utmiotg_bvalid signal goes high.
+ * For more information on how to use this interrupt, see "Partial
+ * Power-Down and Clock Gating Programming Model" on
+ * page 353.
+ * @disconnint: Disconnect Detected Interrupt (DisconnInt)
+ * Asserted when a device disconnect is detected.
+ * @conidstschng: Connector ID Status Change (ConIDStsChng)
+ * The core sets this bit when there is a change in connector ID
+ * status.
+ * @ptxfemp: Periodic TxFIFO Empty (PTxFEmp)
+ * Asserted when the Periodic Transmit FIFO is either half or
+ * completely empty and there is space for at least one entry to be
+ * written in the Periodic Request Queue. The half or completely
+ * empty status is determined by the Periodic TxFIFO Empty Level
+ * bit in the Core AHB Configuration register
+ * (GAHBCFG.PTxFEmpLvl).
+ * @hchint: Host Channels Interrupt (HChInt)
+ * The core sets this bit to indicate that an interrupt is pending
+ * on one of the channels of the core (in Host mode). The
+ * application must read the Host All Channels Interrupt (HAINT)
+ * register to determine the exact number of the channel on which
+ * the interrupt occurred, and then read the corresponding Host
+ * Channel-n Interrupt (HCINTn) register to determine the exact
+ * cause of the interrupt. The application must clear the
+ * appropriate status bit in the HCINTn register to clear this bit.
+ * @prtint: Host Port Interrupt (PrtInt)
+ * The core sets this bit to indicate a change in port status of
+ * one of the O2P USB core ports in Host mode. The application must
+ * read the Host Port Control and Status (HPRT) register to
+ * determine the exact event that caused this interrupt. The
+ * application must clear the appropriate status bit in the Host
+ * Port Control and Status register to clear this bit.
+ * @fetsusp: Data Fetch Suspended (FetSusp)
+ * This interrupt is valid only in DMA mode. This interrupt
+ * indicates that the core has stopped fetching data for IN
+ * endpoints due to the unavailability of TxFIFO space or Request
+ * Queue space. This interrupt is used by the application for an
+ * endpoint mismatch algorithm.
+ * @incomplp: Incomplete Periodic Transfer (incomplP)
+ * In Host mode, the core sets this interrupt bit when there are
+ * incomplete periodic transactions still pending which are
+ * scheduled for the current microframe.
+ * Incomplete Isochronous OUT Transfer (incompISOOUT)
+ * The Device mode, the core sets this interrupt to indicate that
+ * there is at least one isochronous OUT endpoint on which the
+ * transfer is not completed in the current microframe. This
+ * interrupt is asserted along with the End of Periodic Frame
+ * Interrupt (EOPF) bit in this register.
+ * @incompisoin: Incomplete Isochronous IN Transfer (incompISOIN)
+ * The core sets this interrupt to indicate that there is at least
+ * one isochronous IN endpoint on which the transfer is not
+ * completed in the current microframe. This interrupt is asserted
+ * along with the End of Periodic Frame Interrupt (EOPF) bit in
+ * this register.
+ * @oepint: OUT Endpoints Interrupt (OEPInt)
+ * The core sets this bit to indicate that an interrupt is pending
+ * on one of the OUT endpoints of the core (in Device mode). The
+ * application must read the Device All Endpoints Interrupt
+ * (DAINT) register to determine the exact number of the OUT
+ * endpoint on which the interrupt occurred, and then read the
+ * corresponding Device OUT Endpoint-n Interrupt (DOEPINTn)
+ * register to determine the exact cause of the interrupt. The
+ * application must clear the appropriate status bit in the
+ * corresponding DOEPINTn register to clear this bit.
+ * @iepint: IN Endpoints Interrupt (IEPInt)
+ * The core sets this bit to indicate that an interrupt is pending
+ * on one of the IN endpoints of the core (in Device mode). The
+ * application must read the Device All Endpoints Interrupt
+ * (DAINT) register to determine the exact number of the IN
+ * endpoint on which the interrupt occurred, and then read the
+ * corresponding Device IN Endpoint-n Interrupt (DIEPINTn)
+ * register to determine the exact cause of the interrupt. The
+ * application must clear the appropriate status bit in the
+ * corresponding DIEPINTn register to clear this bit.
+ * @epmis: Endpoint Mismatch Interrupt (EPMis)
+ * Indicates that an IN token has been received for a non-periodic
+ * endpoint, but the data for another endpoint is present in the
+ * top of the Non-Periodic Transmit FIFO and the IN endpoint
+ * mismatch count programmed by the application has expired.
+ * @eopf: End of Periodic Frame Interrupt (EOPF)
+ * Indicates that the period specified in the Periodic Frame
+ * Interval field of the Device Configuration register
+ * (DCFG.PerFrInt) has been reached in the current microframe.
+ * @isooutdrop: Isochronous OUT Packet Dropped Interrupt (ISOOutDrop)
+ * The core sets this bit when it fails to write an isochronous OUT
+ * packet into the RxFIFO because the RxFIFO doesn't have
+ * enough space to accommodate a maximum packet size packet
+ * for the isochronous OUT endpoint.
+ * @enumdone: Enumeration Done (EnumDone)
+ * The core sets this bit to indicate that speed enumeration is
+ * complete. The application must read the Device Status (DSTS)
+ * register to obtain the enumerated speed.
+ * @usbrst: USB Reset (USBRst)
+ * The core sets this bit to indicate that a reset is detected on
+ * the USB.
+ * @usbsusp: USB Suspend (USBSusp)
+ * The core sets this bit to indicate that a suspend was detected
+ * on the USB. The core enters the Suspended state when there
+ * is no activity on the phy_line_state_i signal for an extended
+ * period of time.
+ * @erlysusp: Early Suspend (ErlySusp)
+ * The core sets this bit to indicate that an Idle state has been
+ * detected on the USB for 3 ms.
+ * @i2cint: I2C Interrupt (I2CINT)
+ * This bit is always 0x0.
+ * @ulpickint: ULPI Carkit Interrupt (ULPICKINT)
+ * This bit is always 0x0.
+ * @goutnakeff: Global OUT NAK Effective (GOUTNakEff)
+ * Indicates that the Set Global OUT NAK bit in the Device Control
+ * register (DCTL.SGOUTNak), set by the application, has taken
+ * effect in the core. This bit can be cleared by writing the Clear
+ * Global OUT NAK bit in the Device Control register
+ * (DCTL.CGOUTNak).
+ * @ginnakeff: Global IN Non-Periodic NAK Effective (GINNakEff)
+ * Indicates that the Set Global Non-Periodic IN NAK bit in the
+ * Device Control register (DCTL.SGNPInNak), set by the
+ * application, has taken effect in the core. That is, the core has
+ * sampled the Global IN NAK bit set by the application. This bit
+ * can be cleared by clearing the Clear Global Non-Periodic IN
+ * NAK bit in the Device Control register (DCTL.CGNPInNak).
+ * This interrupt does not necessarily mean that a NAK handshake
+ * is sent out on the USB. The STALL bit takes precedence over
+ * the NAK bit.
+ * @nptxfemp: Non-Periodic TxFIFO Empty (NPTxFEmp)
+ * This interrupt is asserted when the Non-Periodic TxFIFO is
+ * either half or completely empty, and there is space for at least
+ * one entry to be written to the Non-Periodic Transmit Request
+ * Queue. The half or completely empty status is determined by
+ * the Non-Periodic TxFIFO Empty Level bit in the Core AHB
+ * Configuration register (GAHBCFG.NPTxFEmpLvl).
+ * @rxflvl: RxFIFO Non-Empty (RxFLvl)
+ * Indicates that there is at least one packet pending to be read
+ * from the RxFIFO.
+ * @sof: Start of (micro)Frame (Sof)
+ * In Host mode, the core sets this bit to indicate that an SOF
+ * (FS), micro-SOF (HS), or Keep-Alive (LS) is transmitted on the
+ * USB. The application must write a 1 to this bit to clear the
+ * interrupt.
+ * In Device mode, in the core sets this bit to indicate that an
+ * SOF token has been received on the USB. The application can read
+ * the Device Status register to get the current (micro)frame
+ * number. This interrupt is seen only when the core is operating
+ * at either HS or FS.
+ * @otgint: OTG Interrupt (OTGInt)
+ * The core sets this bit to indicate an OTG protocol event. The
+ * application must read the OTG Interrupt Status (GOTGINT)
+ * register to determine the exact event that caused this
+ * interrupt. The application must clear the appropriate status bit
+ * in the GOTGINT register to clear this bit.
+ * @modemis: Mode Mismatch Interrupt (ModeMis)
+ * The core sets this bit when the application is trying to access:
+ * * A Host mode register, when the core is operating in Device
+ * mode
+ * * A Device mode register, when the core is operating in Host
+ * mode
+ * The register access is completed on the AHB with an OKAY
+ * response, but is ignored by the core internally and doesn't
+ * affect the operation of the core.
+ * @curmod: Current Mode of Operation (CurMod)
+ * Indicates the current mode of operation.
+ * * 1'b0: Device mode
+ * * 1'b1: Host mode
+ */
+ struct cvmx_usbcx_gintsts_s {
+ uint32_t wkupint : 1;
+ uint32_t sessreqint : 1;
+ uint32_t disconnint : 1;
+ uint32_t conidstschng : 1;
+ uint32_t reserved_27_27 : 1;
+ uint32_t ptxfemp : 1;
+ uint32_t hchint : 1;
+ uint32_t prtint : 1;
+ uint32_t reserved_23_23 : 1;
+ uint32_t fetsusp : 1;
+ uint32_t incomplp : 1;
+ uint32_t incompisoin : 1;
+ uint32_t oepint : 1;
+ uint32_t iepint : 1;
+ uint32_t epmis : 1;
+ uint32_t reserved_16_16 : 1;
+ uint32_t eopf : 1;
+ uint32_t isooutdrop : 1;
+ uint32_t enumdone : 1;
+ uint32_t usbrst : 1;
+ uint32_t usbsusp : 1;
+ uint32_t erlysusp : 1;
+ uint32_t i2cint : 1;
+ uint32_t ulpickint : 1;
+ uint32_t goutnakeff : 1;
+ uint32_t ginnakeff : 1;
+ uint32_t nptxfemp : 1;
+ uint32_t rxflvl : 1;
+ uint32_t sof : 1;
+ uint32_t otgint : 1;
+ uint32_t modemis : 1;
+ uint32_t curmod : 1;
+ } s;
+};
+typedef union cvmx_usbcx_gintsts cvmx_usbcx_gintsts_t;
+
+/**
+ * cvmx_usbc#_gnptxfsiz
+ *
+ * Non-Periodic Transmit FIFO Size Register (GNPTXFSIZ)
+ *
+ * The application can program the RAM size and the memory start address for the
+ * Non-Periodic TxFIFO.
+ */
+union cvmx_usbcx_gnptxfsiz {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_gnptxfsiz_s
+ * @nptxfdep: Non-Periodic TxFIFO Depth (NPTxFDep)
+ * This value is in terms of 32-bit words.
+ * Minimum value is 16
+ * Maximum value is 32768
+ * @nptxfstaddr: Non-Periodic Transmit RAM Start Address (NPTxFStAddr)
+ * This field contains the memory start address for Non-Periodic
+ * Transmit FIFO RAM.
+ */
+ struct cvmx_usbcx_gnptxfsiz_s {
+ uint32_t nptxfdep : 16;
+ uint32_t nptxfstaddr : 16;
+ } s;
+};
+typedef union cvmx_usbcx_gnptxfsiz cvmx_usbcx_gnptxfsiz_t;
+
+/**
+ * cvmx_usbc#_gnptxsts
+ *
+ * Non-Periodic Transmit FIFO/Queue Status Register (GNPTXSTS)
+ *
+ * This read-only register contains the free space information for the
+ * Non-Periodic TxFIFO and the Non-Periodic Transmit Request Queue.
+ */
+union cvmx_usbcx_gnptxsts {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_gnptxsts_s
+ * @nptxqtop: Top of the Non-Periodic Transmit Request Queue (NPTxQTop)
+ * Entry in the Non-Periodic Tx Request Queue that is currently
+ * being processed by the MAC.
+ * * Bits [30:27]: Channel/endpoint number
+ * * Bits [26:25]:
+ * - 2'b00: IN/OUT token
+ * - 2'b01: Zero-length transmit packet (device IN/host OUT)
+ * - 2'b10: PING/CSPLIT token
+ * - 2'b11: Channel halt command
+ * * Bit [24]: Terminate (last entry for selected channel/endpoint)
+ * @nptxqspcavail: Non-Periodic Transmit Request Queue Space Available
+ * (NPTxQSpcAvail)
+ * Indicates the amount of free space available in the Non-
+ * Periodic Transmit Request Queue. This queue holds both IN
+ * and OUT requests in Host mode. Device mode has only IN
+ * requests.
+ * * 8'h0: Non-Periodic Transmit Request Queue is full
+ * * 8'h1: 1 location available
+ * * 8'h2: 2 locations available
+ * * n: n locations available (0..8)
+ * * Others: Reserved
+ * @nptxfspcavail: Non-Periodic TxFIFO Space Avail (NPTxFSpcAvail)
+ * Indicates the amount of free space available in the Non-
+ * Periodic TxFIFO.
+ * Values are in terms of 32-bit words.
+ * * 16'h0: Non-Periodic TxFIFO is full
+ * * 16'h1: 1 word available
+ * * 16'h2: 2 words available
+ * * 16'hn: n words available (where 0..32768)
+ * * 16'h8000: 32768 words available
+ * * Others: Reserved
+ */
+ struct cvmx_usbcx_gnptxsts_s {
+ uint32_t reserved_31_31 : 1;
+ uint32_t nptxqtop : 7;
+ uint32_t nptxqspcavail : 8;
+ uint32_t nptxfspcavail : 16;
+ } s;
+};
+typedef union cvmx_usbcx_gnptxsts cvmx_usbcx_gnptxsts_t;
+
+/**
+ * cvmx_usbc#_grstctl
+ *
+ * Core Reset Register (GRSTCTL)
+ *
+ * The application uses this register to reset various hardware features inside
+ * the core.
+ */
+union cvmx_usbcx_grstctl {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_grstctl_s
+ * @ahbidle: AHB Master Idle (AHBIdle)
+ * Indicates that the AHB Master State Machine is in the IDLE
+ * condition.
+ * @dmareq: DMA Request Signal (DMAReq)
+ * Indicates that the DMA request is in progress. Used for debug.
+ * @txfnum: TxFIFO Number (TxFNum)
+ * This is the FIFO number that must be flushed using the TxFIFO
+ * Flush bit. This field must not be changed until the core clears
+ * the TxFIFO Flush bit.
+ * * 5'h0: Non-Periodic TxFIFO flush
+ * * 5'h1: Periodic TxFIFO 1 flush in Device mode or Periodic
+ * TxFIFO flush in Host mode
+ * * 5'h2: Periodic TxFIFO 2 flush in Device mode
+ * - ...
+ * * 5'hF: Periodic TxFIFO 15 flush in Device mode
+ * * 5'h10: Flush all the Periodic and Non-Periodic TxFIFOs in the
+ * core
+ * @txfflsh: TxFIFO Flush (TxFFlsh)
+ * This bit selectively flushes a single or all transmit FIFOs, but
+ * cannot do so if the core is in the midst of a transaction.
+ * The application must only write this bit after checking that the
+ * core is neither writing to the TxFIFO nor reading from the
+ * TxFIFO.
+ * The application must wait until the core clears this bit before
+ * performing any operations. This bit takes 8 clocks (of phy_clk
+ * or hclk, whichever is slower) to clear.
+ * @rxfflsh: RxFIFO Flush (RxFFlsh)
+ * The application can flush the entire RxFIFO using this bit, but
+ * must first ensure that the core is not in the middle of a
+ * transaction.
+ * The application must only write to this bit after checking that
+ * the core is neither reading from the RxFIFO nor writing to the
+ * RxFIFO.
+ * The application must wait until the bit is cleared before
+ * performing any other operations. This bit will take 8 clocks
+ * (slowest of PHY or AHB clock) to clear.
+ * @intknqflsh: IN Token Sequence Learning Queue Flush (INTknQFlsh)
+ * The application writes this bit to flush the IN Token Sequence
+ * Learning Queue.
+ * @frmcntrrst: Host Frame Counter Reset (FrmCntrRst)
+ * The application writes this bit to reset the (micro)frame number
+ * counter inside the core. When the (micro)frame counter is reset,
+ * the subsequent SOF sent out by the core will have a
+ * (micro)frame number of 0.
+ * @hsftrst: HClk Soft Reset (HSftRst)
+ * The application uses this bit to flush the control logic in the
+ * AHB Clock domain. Only AHB Clock Domain pipelines are reset.
+ * * FIFOs are not flushed with this bit.
+ * * All state machines in the AHB clock domain are reset to the
+ * Idle state after terminating the transactions on the AHB,
+ * following the protocol.
+ * * CSR control bits used by the AHB clock domain state
+ * machines are cleared.
+ * * To clear this interrupt, status mask bits that control the
+ * interrupt status and are generated by the AHB clock domain
+ * state machine are cleared.
+ * * Because interrupt status bits are not cleared, the application
+ * can get the status of any core events that occurred after it set
+ * this bit.
+ * This is a self-clearing bit that the core clears after all
+ * necessary logic is reset in the core. This may take several
+ * clocks, depending on the core's current state.
+ * @csftrst: Core Soft Reset (CSftRst)
+ * Resets the hclk and phy_clock domains as follows:
+ * * Clears the interrupts and all the CSR registers except the
+ * following register bits:
+ * - PCGCCTL.RstPdwnModule
+ * - PCGCCTL.GateHclk
+ * - PCGCCTL.PwrClmp
+ * - PCGCCTL.StopPPhyLPwrClkSelclk
+ * - GUSBCFG.PhyLPwrClkSel
+ * - GUSBCFG.DDRSel
+ * - GUSBCFG.PHYSel
+ * - GUSBCFG.FSIntf
+ * - GUSBCFG.ULPI_UTMI_Sel
+ * - GUSBCFG.PHYIf
+ * - HCFG.FSLSPclkSel
+ * - DCFG.DevSpd
+ * * All module state machines (except the AHB Slave Unit) are
+ * reset to the IDLE state, and all the transmit FIFOs and the
+ * receive FIFO are flushed.
+ * * Any transactions on the AHB Master are terminated as soon
+ * as possible, after gracefully completing the last data phase of
+ * an AHB transfer. Any transactions on the USB are terminated
+ * immediately.
+ * The application can write to this bit any time it wants to reset
+ * the core. This is a self-clearing bit and the core clears this
+ * bit after all the necessary logic is reset in the core, which
+ * may take several clocks, depending on the current state of the
+ * core. Once this bit is cleared software should wait at least 3
+ * PHY clocks before doing any access to the PHY domain
+ * (synchronization delay). Software should also should check that
+ * bit 31 of this register is 1 (AHB Master is IDLE) before
+ * starting any operation.
+ * Typically software reset is used during software development
+ * and also when you dynamically change the PHY selection bits
+ * in the USB configuration registers listed above. When you
+ * change the PHY, the corresponding clock for the PHY is
+ * selected and used in the PHY domain. Once a new clock is
+ * selected, the PHY domain has to be reset for proper operation.
+ */
+ struct cvmx_usbcx_grstctl_s {
+ uint32_t ahbidle : 1;
+ uint32_t dmareq : 1;
+ uint32_t reserved_11_29 : 19;
+ uint32_t txfnum : 5;
+ uint32_t txfflsh : 1;
+ uint32_t rxfflsh : 1;
+ uint32_t intknqflsh : 1;
+ uint32_t frmcntrrst : 1;
+ uint32_t hsftrst : 1;
+ uint32_t csftrst : 1;
+ } s;
+};
+typedef union cvmx_usbcx_grstctl cvmx_usbcx_grstctl_t;
+
+/**
+ * cvmx_usbc#_grxfsiz
+ *
+ * Receive FIFO Size Register (GRXFSIZ)
+ *
+ * The application can program the RAM size that must be allocated to the
+ * RxFIFO.
+ */
+union cvmx_usbcx_grxfsiz {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_grxfsiz_s
+ * @rxfdep: RxFIFO Depth (RxFDep)
+ * This value is in terms of 32-bit words.
+ * * Minimum value is 16
+ * * Maximum value is 32768
+ */
+ struct cvmx_usbcx_grxfsiz_s {
+ uint32_t reserved_16_31 : 16;
+ uint32_t rxfdep : 16;
+ } s;
+};
+typedef union cvmx_usbcx_grxfsiz cvmx_usbcx_grxfsiz_t;
+
+/**
+ * cvmx_usbc#_grxstsph
+ *
+ * Receive Status Read and Pop Register, Host Mode (GRXSTSPH)
+ *
+ * A read to the Receive Status Read and Pop register returns and additionally
+ * pops the top data entry out of the RxFIFO.
+ * This Description is only valid when the core is in Host Mode. For Device Mode
+ * use USBC_GRXSTSPD instead.
+ * NOTE: GRXSTSPH and GRXSTSPD are physically the same register and share the
+ * same offset in the O2P USB core. The offset difference shown in this
+ * document is for software clarity and is actually ignored by the
+ * hardware.
+ */
+union cvmx_usbcx_grxstsph {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_grxstsph_s
+ * @pktsts: Packet Status (PktSts)
+ * Indicates the status of the received packet
+ * * 4'b0010: IN data packet received
+ * * 4'b0011: IN transfer completed (triggers an interrupt)
+ * * 4'b0101: Data toggle error (triggers an interrupt)
+ * * 4'b0111: Channel halted (triggers an interrupt)
+ * * Others: Reserved
+ * @dpid: Data PID (DPID)
+ * * 2'b00: DATA0
+ * * 2'b10: DATA1
+ * * 2'b01: DATA2
+ * * 2'b11: MDATA
+ * @bcnt: Byte Count (BCnt)
+ * Indicates the byte count of the received IN data packet
+ * @chnum: Channel Number (ChNum)
+ * Indicates the channel number to which the current received
+ * packet belongs.
+ */
+ struct cvmx_usbcx_grxstsph_s {
+ uint32_t reserved_21_31 : 11;
+ uint32_t pktsts : 4;
+ uint32_t dpid : 2;
+ uint32_t bcnt : 11;
+ uint32_t chnum : 4;
+ } s;
+};
+typedef union cvmx_usbcx_grxstsph cvmx_usbcx_grxstsph_t;
+
+/**
+ * cvmx_usbc#_gusbcfg
+ *
+ * Core USB Configuration Register (GUSBCFG)
+ *
+ * This register can be used to configure the core after power-on or a changing
+ * to Host mode or Device mode. It contains USB and USB-PHY related
+ * configuration parameters. The application must program this register before
+ * starting any transactions on either the AHB or the USB. Do not make changes
+ * to this register after the initial programming.
+ */
+union cvmx_usbcx_gusbcfg {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_gusbcfg_s
+ * @otgi2csel: UTMIFS or I2C Interface Select (OtgI2CSel)
+ * This bit is always 0x0.
+ * @phylpwrclksel: PHY Low-Power Clock Select (PhyLPwrClkSel)
+ * Software should set this bit to 0x0.
+ * Selects either 480-MHz or 48-MHz (low-power) PHY mode. In
+ * FS and LS modes, the PHY can usually operate on a 48-MHz
+ * clock to save power.
+ * * 1'b0: 480-MHz Internal PLL clock
+ * * 1'b1: 48-MHz External Clock
+ * In 480 MHz mode, the UTMI interface operates at either 60 or
+ * 30-MHz, depending upon whether 8- or 16-bit data width is
+ * selected. In 48-MHz mode, the UTMI interface operates at 48
+ * MHz in FS mode and at either 48 or 6 MHz in LS mode
+ * (depending on the PHY vendor).
+ * This bit drives the utmi_fsls_low_power core output signal, and
+ * is valid only for UTMI+ PHYs.
+ * @usbtrdtim: USB Turnaround Time (USBTrdTim)
+ * Sets the turnaround time in PHY clocks.
+ * Specifies the response time for a MAC request to the Packet
+ * FIFO Controller (PFC) to fetch data from the DFIFO (SPRAM).
+ * This must be programmed to 0x5.
+ * @hnpcap: HNP-Capable (HNPCap)
+ * This bit is always 0x0.
+ * @srpcap: SRP-Capable (SRPCap)
+ * This bit is always 0x0.
+ * @ddrsel: ULPI DDR Select (DDRSel)
+ * Software should set this bit to 0x0.
+ * @physel: USB 2.0 High-Speed PHY or USB 1.1 Full-Speed Serial
+ * Software should set this bit to 0x0.
+ * @fsintf: Full-Speed Serial Interface Select (FSIntf)
+ * Software should set this bit to 0x0.
+ * @ulpi_utmi_sel: ULPI or UTMI+ Select (ULPI_UTMI_Sel)
+ * This bit is always 0x0.
+ * @phyif: PHY Interface (PHYIf)
+ * This bit is always 0x1.
+ * @toutcal: HS/FS Timeout Calibration (TOutCal)
+ * The number of PHY clocks that the application programs in this
+ * field is added to the high-speed/full-speed interpacket timeout
+ * duration in the core to account for any additional delays
+ * introduced by the PHY. This may be required, since the delay
+ * introduced by the PHY in generating the linestate condition may
+ * vary from one PHY to another.
+ * The USB standard timeout value for high-speed operation is
+ * 736 to 816 (inclusive) bit times. The USB standard timeout
+ * value for full-speed operation is 16 to 18 (inclusive) bit
+ * times. The application must program this field based on the
+ * speed of enumeration. The number of bit times added per PHY
+ * clock are:
+ * High-speed operation:
+ * * One 30-MHz PHY clock = 16 bit times
+ * * One 60-MHz PHY clock = 8 bit times
+ * Full-speed operation:
+ * * One 30-MHz PHY clock = 0.4 bit times
+ * * One 60-MHz PHY clock = 0.2 bit times
+ * * One 48-MHz PHY clock = 0.25 bit times
+ */
+ struct cvmx_usbcx_gusbcfg_s {
+ uint32_t reserved_17_31 : 15;
+ uint32_t otgi2csel : 1;
+ uint32_t phylpwrclksel : 1;
+ uint32_t reserved_14_14 : 1;
+ uint32_t usbtrdtim : 4;
+ uint32_t hnpcap : 1;
+ uint32_t srpcap : 1;
+ uint32_t ddrsel : 1;
+ uint32_t physel : 1;
+ uint32_t fsintf : 1;
+ uint32_t ulpi_utmi_sel : 1;
+ uint32_t phyif : 1;
+ uint32_t toutcal : 3;
+ } s;
+};
+typedef union cvmx_usbcx_gusbcfg cvmx_usbcx_gusbcfg_t;
+
+/**
+ * cvmx_usbc#_haint
+ *
+ * Host All Channels Interrupt Register (HAINT)
+ *
+ * When a significant event occurs on a channel, the Host All Channels Interrupt
+ * register interrupts the application using the Host Channels Interrupt bit of
+ * the Core Interrupt register (GINTSTS.HChInt). This is shown in Interrupt.
+ * There is one interrupt bit per channel, up to a maximum of 16 bits. Bits in
+ * this register are set and cleared when the application sets and clears bits
+ * in the corresponding Host Channel-n Interrupt register.
+ */
+union cvmx_usbcx_haint {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_haint_s
+ * @haint: Channel Interrupts (HAINT)
+ * One bit per channel: Bit 0 for Channel 0, bit 15 for Channel 15
+ */
+ struct cvmx_usbcx_haint_s {
+ uint32_t reserved_16_31 : 16;
+ uint32_t haint : 16;
+ } s;
+};
+typedef union cvmx_usbcx_haint cvmx_usbcx_haint_t;
+
+/**
+ * cvmx_usbc#_haintmsk
+ *
+ * Host All Channels Interrupt Mask Register (HAINTMSK)
+ *
+ * The Host All Channel Interrupt Mask register works with the Host All Channel
+ * Interrupt register to interrupt the application when an event occurs on a
+ * channel. There is one interrupt mask bit per channel, up to a maximum of 16
+ * bits.
+ * Mask interrupt: 1'b0 Unmask interrupt: 1'b1
+ */
+union cvmx_usbcx_haintmsk {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_haintmsk_s
+ * @haintmsk: Channel Interrupt Mask (HAINTMsk)
+ * One bit per channel: Bit 0 for channel 0, bit 15 for channel 15
+ */
+ struct cvmx_usbcx_haintmsk_s {
+ uint32_t reserved_16_31 : 16;
+ uint32_t haintmsk : 16;
+ } s;
+};
+typedef union cvmx_usbcx_haintmsk cvmx_usbcx_haintmsk_t;
+
+/**
+ * cvmx_usbc#_hcchar#
+ *
+ * Host Channel-n Characteristics Register (HCCHAR)
+ *
+ */
+union cvmx_usbcx_hccharx {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_hccharx_s
+ * @chena: Channel Enable (ChEna)
+ * This field is set by the application and cleared by the OTG
+ * host.
+ * * 1'b0: Channel disabled
+ * * 1'b1: Channel enabled
+ * @chdis: Channel Disable (ChDis)
+ * The application sets this bit to stop transmitting/receiving
+ * data on a channel, even before the transfer for that channel is
+ * complete. The application must wait for the Channel Disabled
+ * interrupt before treating the channel as disabled.
+ * @oddfrm: Odd Frame (OddFrm)
+ * This field is set (reset) by the application to indicate that
+ * the OTG host must perform a transfer in an odd (micro)frame.
+ * This field is applicable for only periodic (isochronous and
+ * interrupt) transactions.
+ * * 1'b0: Even (micro)frame
+ * * 1'b1: Odd (micro)frame
+ * @devaddr: Device Address (DevAddr)
+ * This field selects the specific device serving as the data
+ * source or sink.
+ * @ec: Multi Count (MC) / Error Count (EC)
+ * When the Split Enable bit of the Host Channel-n Split Control
+ * register (HCSPLTn.SpltEna) is reset (1'b0), this field indicates
+ * to the host the number of transactions that should be executed
+ * per microframe for this endpoint.
+ * * 2'b00: Reserved. This field yields undefined results.
+ * * 2'b01: 1 transaction
+ * * 2'b10: 2 transactions to be issued for this endpoint per
+ * microframe
+ * * 2'b11: 3 transactions to be issued for this endpoint per
+ * microframe
+ * When HCSPLTn.SpltEna is set (1'b1), this field indicates the
+ * number of immediate retries to be performed for a periodic split
+ * transactions on transaction errors. This field must be set to at
+ * least 2'b01.
+ * @eptype: Endpoint Type (EPType)
+ * Indicates the transfer type selected.
+ * * 2'b00: Control
+ * * 2'b01: Isochronous
+ * * 2'b10: Bulk
+ * * 2'b11: Interrupt
+ * @lspddev: Low-Speed Device (LSpdDev)
+ * This field is set by the application to indicate that this
+ * channel is communicating to a low-speed device.
+ * @epdir: Endpoint Direction (EPDir)
+ * Indicates whether the transaction is IN or OUT.
+ * * 1'b0: OUT
+ * * 1'b1: IN
+ * @epnum: Endpoint Number (EPNum)
+ * Indicates the endpoint number on the device serving as the
+ * data source or sink.
+ * @mps: Maximum Packet Size (MPS)
+ * Indicates the maximum packet size of the associated endpoint.
+ */
+ struct cvmx_usbcx_hccharx_s {
+ uint32_t chena : 1;
+ uint32_t chdis : 1;
+ uint32_t oddfrm : 1;
+ uint32_t devaddr : 7;
+ uint32_t ec : 2;
+ uint32_t eptype : 2;
+ uint32_t lspddev : 1;
+ uint32_t reserved_16_16 : 1;
+ uint32_t epdir : 1;
+ uint32_t epnum : 4;
+ uint32_t mps : 11;
+ } s;
+};
+typedef union cvmx_usbcx_hccharx cvmx_usbcx_hccharx_t;
+
+/**
+ * cvmx_usbc#_hcfg
+ *
+ * Host Configuration Register (HCFG)
+ *
+ * This register configures the core after power-on. Do not make changes to this
+ * register after initializing the host.
+ */
+union cvmx_usbcx_hcfg {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_hcfg_s
+ * @fslssupp: FS- and LS-Only Support (FSLSSupp)
+ * The application uses this bit to control the core's enumeration
+ * speed. Using this bit, the application can make the core
+ * enumerate as a FS host, even if the connected device supports
+ * HS traffic. Do not make changes to this field after initial
+ * programming.
+ * * 1'b0: HS/FS/LS, based on the maximum speed supported by
+ * the connected device
+ * * 1'b1: FS/LS-only, even if the connected device can support HS
+ * @fslspclksel: FS/LS PHY Clock Select (FSLSPclkSel)
+ * When the core is in FS Host mode
+ * * 2'b00: PHY clock is running at 30/60 MHz
+ * * 2'b01: PHY clock is running at 48 MHz
+ * * Others: Reserved
+ * When the core is in LS Host mode
+ * * 2'b00: PHY clock is running at 30/60 MHz. When the
+ * UTMI+/ULPI PHY Low Power mode is not selected, use
+ * 30/60 MHz.
+ * * 2'b01: PHY clock is running at 48 MHz. When the UTMI+
+ * PHY Low Power mode is selected, use 48MHz if the PHY
+ * supplies a 48 MHz clock during LS mode.
+ * * 2'b10: PHY clock is running at 6 MHz. In USB 1.1 FS mode,
+ * use 6 MHz when the UTMI+ PHY Low Power mode is
+ * selected and the PHY supplies a 6 MHz clock during LS
+ * mode. If you select a 6 MHz clock during LS mode, you must
+ * do a soft reset.
+ * * 2'b11: Reserved
+ */
+ struct cvmx_usbcx_hcfg_s {
+ uint32_t reserved_3_31 : 29;
+ uint32_t fslssupp : 1;
+ uint32_t fslspclksel : 2;
+ } s;
+};
+typedef union cvmx_usbcx_hcfg cvmx_usbcx_hcfg_t;
+
+/**
+ * cvmx_usbc#_hcint#
+ *
+ * Host Channel-n Interrupt Register (HCINT)
+ *
+ * This register indicates the status of a channel with respect to USB- and
+ * AHB-related events. The application must read this register when the Host
+ * Channels Interrupt bit of the Core Interrupt register (GINTSTS.HChInt) is
+ * set. Before the application can read this register, it must first read
+ * the Host All Channels Interrupt (HAINT) register to get the exact channel
+ * number for the Host Channel-n Interrupt register. The application must clear
+ * the appropriate bit in this register to clear the corresponding bits in the
+ * HAINT and GINTSTS registers.
+ */
+union cvmx_usbcx_hcintx {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_hcintx_s
+ * @datatglerr: Data Toggle Error (DataTglErr)
+ * @frmovrun: Frame Overrun (FrmOvrun)
+ * @bblerr: Babble Error (BblErr)
+ * @xacterr: Transaction Error (XactErr)
+ * @nyet: NYET Response Received Interrupt (NYET)
+ * @ack: ACK Response Received Interrupt (ACK)
+ * @nak: NAK Response Received Interrupt (NAK)
+ * @stall: STALL Response Received Interrupt (STALL)
+ * @ahberr: This bit is always 0x0.
+ * @chhltd: Channel Halted (ChHltd)
+ * Indicates the transfer completed abnormally either because of
+ * any USB transaction error or in response to disable request by
+ * the application.
+ * @xfercompl: Transfer Completed (XferCompl)
+ * Transfer completed normally without any errors.
+ */
+ struct cvmx_usbcx_hcintx_s {
+ uint32_t reserved_11_31 : 21;
+ uint32_t datatglerr : 1;
+ uint32_t frmovrun : 1;
+ uint32_t bblerr : 1;
+ uint32_t xacterr : 1;
+ uint32_t nyet : 1;
+ uint32_t ack : 1;
+ uint32_t nak : 1;
+ uint32_t stall : 1;
+ uint32_t ahberr : 1;
+ uint32_t chhltd : 1;
+ uint32_t xfercompl : 1;
+ } s;
+};
+typedef union cvmx_usbcx_hcintx cvmx_usbcx_hcintx_t;
+
+/**
+ * cvmx_usbc#_hcintmsk#
+ *
+ * Host Channel-n Interrupt Mask Register (HCINTMSKn)
+ *
+ * This register reflects the mask for each channel status described in the
+ * previous section.
+ * Mask interrupt: 1'b0 Unmask interrupt: 1'b1
+ */
+union cvmx_usbcx_hcintmskx {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_hcintmskx_s
+ * @datatglerrmsk: Data Toggle Error Mask (DataTglErrMsk)
+ * @frmovrunmsk: Frame Overrun Mask (FrmOvrunMsk)
+ * @bblerrmsk: Babble Error Mask (BblErrMsk)
+ * @xacterrmsk: Transaction Error Mask (XactErrMsk)
+ * @nyetmsk: NYET Response Received Interrupt Mask (NyetMsk)
+ * @ackmsk: ACK Response Received Interrupt Mask (AckMsk)
+ * @nakmsk: NAK Response Received Interrupt Mask (NakMsk)
+ * @stallmsk: STALL Response Received Interrupt Mask (StallMsk)
+ * @ahberrmsk: AHB Error Mask (AHBErrMsk)
+ * @chhltdmsk: Channel Halted Mask (ChHltdMsk)
+ * @xfercomplmsk: Transfer Completed Mask (XferComplMsk)
+ */
+ struct cvmx_usbcx_hcintmskx_s {
+ uint32_t reserved_11_31 : 21;
+ uint32_t datatglerrmsk : 1;
+ uint32_t frmovrunmsk : 1;
+ uint32_t bblerrmsk : 1;
+ uint32_t xacterrmsk : 1;
+ uint32_t nyetmsk : 1;
+ uint32_t ackmsk : 1;
+ uint32_t nakmsk : 1;
+ uint32_t stallmsk : 1;
+ uint32_t ahberrmsk : 1;
+ uint32_t chhltdmsk : 1;
+ uint32_t xfercomplmsk : 1;
+ } s;
+};
+typedef union cvmx_usbcx_hcintmskx cvmx_usbcx_hcintmskx_t;
+
+/**
+ * cvmx_usbc#_hcsplt#
+ *
+ * Host Channel-n Split Control Register (HCSPLT)
+ *
+ */
+union cvmx_usbcx_hcspltx {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_hcspltx_s
+ * @spltena: Split Enable (SpltEna)
+ * The application sets this field to indicate that this channel is
+ * enabled to perform split transactions.
+ * @compsplt: Do Complete Split (CompSplt)
+ * The application sets this field to request the OTG host to
+ * perform a complete split transaction.
+ * @xactpos: Transaction Position (XactPos)
+ * This field is used to determine whether to send all, first,
+ * middle, or last payloads with each OUT transaction.
+ * * 2'b11: All. This is the entire data payload is of this
+ * transaction (which is less than or equal to 188 bytes).
+ * * 2'b10: Begin. This is the first data payload of this
+ * transaction (which is larger than 188 bytes).
+ * * 2'b00: Mid. This is the middle payload of this transaction
+ * (which is larger than 188 bytes).
+ * * 2'b01: End. This is the last payload of this transaction
+ * (which is larger than 188 bytes).
+ * @hubaddr: Hub Address (HubAddr)
+ * This field holds the device address of the transaction
+ * translator's hub.
+ * @prtaddr: Port Address (PrtAddr)
+ * This field is the port number of the recipient transaction
+ * translator.
+ */
+ struct cvmx_usbcx_hcspltx_s {
+ uint32_t spltena : 1;
+ uint32_t reserved_17_30 : 14;
+ uint32_t compsplt : 1;
+ uint32_t xactpos : 2;
+ uint32_t hubaddr : 7;
+ uint32_t prtaddr : 7;
+ } s;
+};
+typedef union cvmx_usbcx_hcspltx cvmx_usbcx_hcspltx_t;
+
+/**
+ * cvmx_usbc#_hctsiz#
+ *
+ * Host Channel-n Transfer Size Register (HCTSIZ)
+ *
+ */
+union cvmx_usbcx_hctsizx {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_hctsizx_s
+ * @dopng: Do Ping (DoPng)
+ * Setting this field to 1 directs the host to do PING protocol.
+ * @pid: PID (Pid)
+ * The application programs this field with the type of PID to use
+ * for the initial transaction. The host will maintain this field
+ * for the rest of the transfer.
+ * * 2'b00: DATA0
+ * * 2'b01: DATA2
+ * * 2'b10: DATA1
+ * * 2'b11: MDATA (non-control)/SETUP (control)
+ * @pktcnt: Packet Count (PktCnt)
+ * This field is programmed by the application with the expected
+ * number of packets to be transmitted (OUT) or received (IN).
+ * The host decrements this count on every successful
+ * transmission or reception of an OUT/IN packet. Once this count
+ * reaches zero, the application is interrupted to indicate normal
+ * completion.
+ * @xfersize: Transfer Size (XferSize)
+ * For an OUT, this field is the number of data bytes the host will
+ * send during the transfer.
+ * For an IN, this field is the buffer size that the application
+ * has reserved for the transfer. The application is expected to
+ * program this field as an integer multiple of the maximum packet
+ * size for IN transactions (periodic and non-periodic).
+ */
+ struct cvmx_usbcx_hctsizx_s {
+ uint32_t dopng : 1;
+ uint32_t pid : 2;
+ uint32_t pktcnt : 10;
+ uint32_t xfersize : 19;
+ } s;
+};
+typedef union cvmx_usbcx_hctsizx cvmx_usbcx_hctsizx_t;
+
+/**
+ * cvmx_usbc#_hfir
+ *
+ * Host Frame Interval Register (HFIR)
+ *
+ * This register stores the frame interval information for the current speed to
+ * which the O2P USB core has enumerated.
+ */
+union cvmx_usbcx_hfir {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_hfir_s
+ * @frint: Frame Interval (FrInt)
+ * The value that the application programs to this field specifies
+ * the interval between two consecutive SOFs (FS) or micro-
+ * SOFs (HS) or Keep-Alive tokens (HS). This field contains the
+ * number of PHY clocks that constitute the required frame
+ * interval. The default value set in this field for a FS operation
+ * when the PHY clock frequency is 60 MHz. The application can
+ * write a value to this register only after the Port Enable bit of
+ * the Host Port Control and Status register (HPRT.PrtEnaPort)
+ * has been set. If no value is programmed, the core calculates
+ * the value based on the PHY clock specified in the FS/LS PHY
+ * Clock Select field of the Host Configuration register
+ * (HCFG.FSLSPclkSel). Do not change the value of this field
+ * after the initial configuration.
+ * * 125 us (PHY clock frequency for HS)
+ * * 1 ms (PHY clock frequency for FS/LS)
+ */
+ struct cvmx_usbcx_hfir_s {
+ uint32_t reserved_16_31 : 16;
+ uint32_t frint : 16;
+ } s;
+};
+typedef union cvmx_usbcx_hfir cvmx_usbcx_hfir_t;
+
+/**
+ * cvmx_usbc#_hfnum
+ *
+ * Host Frame Number/Frame Time Remaining Register (HFNUM)
+ *
+ * This register indicates the current frame number.
+ * It also indicates the time remaining (in terms of the number of PHY clocks)
+ * in the current (micro)frame.
+ */
+union cvmx_usbcx_hfnum {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_hfnum_s
+ * @frrem: Frame Time Remaining (FrRem)
+ * Indicates the amount of time remaining in the current
+ * microframe (HS) or frame (FS/LS), in terms of PHY clocks.
+ * This field decrements on each PHY clock. When it reaches
+ * zero, this field is reloaded with the value in the Frame
+ * Interval register and a new SOF is transmitted on the USB.
+ * @frnum: Frame Number (FrNum)
+ * This field increments when a new SOF is transmitted on the
+ * USB, and is reset to 0 when it reaches 16'h3FFF.
+ */
+ struct cvmx_usbcx_hfnum_s {
+ uint32_t frrem : 16;
+ uint32_t frnum : 16;
+ } s;
+};
+typedef union cvmx_usbcx_hfnum cvmx_usbcx_hfnum_t;
+
+/**
+ * cvmx_usbc#_hprt
+ *
+ * Host Port Control and Status Register (HPRT)
+ *
+ * This register is available in both Host and Device modes.
+ * Currently, the OTG Host supports only one port.
+ * A single register holds USB port-related information such as USB reset,
+ * enable, suspend, resume, connect status, and test mode for each port. The
+ * R_SS_WC bits in this register can trigger an interrupt to the application
+ * through the Host Port Interrupt bit of the Core Interrupt register
+ * (GINTSTS.PrtInt). On a Port Interrupt, the application must read this
+ * register and clear the bit that caused the interrupt. For the R_SS_WC bits,
+ * the application must write a 1 to the bit to clear the interrupt.
+ */
+union cvmx_usbcx_hprt {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_hprt_s
+ * @prtspd: Port Speed (PrtSpd)
+ * Indicates the speed of the device attached to this port.
+ * * 2'b00: High speed
+ * * 2'b01: Full speed
+ * * 2'b10: Low speed
+ * * 2'b11: Reserved
+ * @prttstctl: Port Test Control (PrtTstCtl)
+ * The application writes a nonzero value to this field to put
+ * the port into a Test mode, and the corresponding pattern is
+ * signaled on the port.
+ * * 4'b0000: Test mode disabled
+ * * 4'b0001: Test_J mode
+ * * 4'b0010: Test_K mode
+ * * 4'b0011: Test_SE0_NAK mode
+ * * 4'b0100: Test_Packet mode
+ * * 4'b0101: Test_Force_Enable
+ * * Others: Reserved
+ * PrtSpd must be zero (i.e. the interface must be in high-speed
+ * mode) to use the PrtTstCtl test modes.
+ * @prtpwr: Port Power (PrtPwr)
+ * The application uses this field to control power to this port,
+ * and the core clears this bit on an overcurrent condition.
+ * * 1'b0: Power off
+ * * 1'b1: Power on
+ * @prtlnsts: Port Line Status (PrtLnSts)
+ * Indicates the current logic level USB data lines
+ * * Bit [10]: Logic level of D-
+ * * Bit [11]: Logic level of D+
+ * @prtrst: Port Reset (PrtRst)
+ * When the application sets this bit, a reset sequence is
+ * started on this port. The application must time the reset
+ * period and clear this bit after the reset sequence is
+ * complete.
+ * * 1'b0: Port not in reset
+ * * 1'b1: Port in reset
+ * The application must leave this bit set for at least a
+ * minimum duration mentioned below to start a reset on the
+ * port. The application can leave it set for another 10 ms in
+ * addition to the required minimum duration, before clearing
+ * the bit, even though there is no maximum limit set by the
+ * USB standard.
+ * * High speed: 50 ms
+ * * Full speed/Low speed: 10 ms
+ * @prtsusp: Port Suspend (PrtSusp)
+ * The application sets this bit to put this port in Suspend
+ * mode. The core only stops sending SOFs when this is set.
+ * To stop the PHY clock, the application must set the Port
+ * Clock Stop bit, which will assert the suspend input pin of
+ * the PHY.
+ * The read value of this bit reflects the current suspend
+ * status of the port. This bit is cleared by the core after a
+ * remote wakeup signal is detected or the application sets
+ * the Port Reset bit or Port Resume bit in this register or the
+ * Resume/Remote Wakeup Detected Interrupt bit or
+ * Disconnect Detected Interrupt bit in the Core Interrupt
+ * register (GINTSTS.WkUpInt or GINTSTS.DisconnInt,
+ * respectively).
+ * * 1'b0: Port not in Suspend mode
+ * * 1'b1: Port in Suspend mode
+ * @prtres: Port Resume (PrtRes)
+ * The application sets this bit to drive resume signaling on
+ * the port. The core continues to drive the resume signal
+ * until the application clears this bit.
+ * If the core detects a USB remote wakeup sequence, as
+ * indicated by the Port Resume/Remote Wakeup Detected
+ * Interrupt bit of the Core Interrupt register
+ * (GINTSTS.WkUpInt), the core starts driving resume
+ * signaling without application intervention and clears this bit
+ * when it detects a disconnect condition. The read value of
+ * this bit indicates whether the core is currently driving
+ * resume signaling.
+ * * 1'b0: No resume driven
+ * * 1'b1: Resume driven
+ * @prtovrcurrchng: Port Overcurrent Change (PrtOvrCurrChng)
+ * The core sets this bit when the status of the Port
+ * Overcurrent Active bit (bit 4) in this register changes.
+ * @prtovrcurract: Port Overcurrent Active (PrtOvrCurrAct)
+ * Indicates the overcurrent condition of the port.
+ * * 1'b0: No overcurrent condition
+ * * 1'b1: Overcurrent condition
+ * @prtenchng: Port Enable/Disable Change (PrtEnChng)
+ * The core sets this bit when the status of the Port Enable bit
+ * [2] of this register changes.
+ * @prtena: Port Enable (PrtEna)
+ * A port is enabled only by the core after a reset sequence,
+ * and is disabled by an overcurrent condition, a disconnect
+ * condition, or by the application clearing this bit. The
+ * application cannot set this bit by a register write. It can only
+ * clear it to disable the port. This bit does not trigger any
+ * interrupt to the application.
+ * * 1'b0: Port disabled
+ * * 1'b1: Port enabled
+ * @prtconndet: Port Connect Detected (PrtConnDet)
+ * The core sets this bit when a device connection is detected
+ * to trigger an interrupt to the application using the Host Port
+ * Interrupt bit of the Core Interrupt register (GINTSTS.PrtInt).
+ * The application must write a 1 to this bit to clear the
+ * interrupt.
+ * @prtconnsts: Port Connect Status (PrtConnSts)
+ * * 0: No device is attached to the port.
+ * * 1: A device is attached to the port.
+ */
+ struct cvmx_usbcx_hprt_s {
+ uint32_t reserved_19_31 : 13;
+ uint32_t prtspd : 2;
+ uint32_t prttstctl : 4;
+ uint32_t prtpwr : 1;
+ uint32_t prtlnsts : 2;
+ uint32_t reserved_9_9 : 1;
+ uint32_t prtrst : 1;
+ uint32_t prtsusp : 1;
+ uint32_t prtres : 1;
+ uint32_t prtovrcurrchng : 1;
+ uint32_t prtovrcurract : 1;
+ uint32_t prtenchng : 1;
+ uint32_t prtena : 1;
+ uint32_t prtconndet : 1;
+ uint32_t prtconnsts : 1;
+ } s;
+};
+typedef union cvmx_usbcx_hprt cvmx_usbcx_hprt_t;
+
+/**
+ * cvmx_usbc#_hptxfsiz
+ *
+ * Host Periodic Transmit FIFO Size Register (HPTXFSIZ)
+ *
+ * This register holds the size and the memory start address of the Periodic
+ * TxFIFO, as shown in Figures 310 and 311.
+ */
+union cvmx_usbcx_hptxfsiz {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_hptxfsiz_s
+ * @ptxfsize: Host Periodic TxFIFO Depth (PTxFSize)
+ * This value is in terms of 32-bit words.
+ * * Minimum value is 16
+ * * Maximum value is 32768
+ * @ptxfstaddr: Host Periodic TxFIFO Start Address (PTxFStAddr)
+ */
+ struct cvmx_usbcx_hptxfsiz_s {
+ uint32_t ptxfsize : 16;
+ uint32_t ptxfstaddr : 16;
+ } s;
+};
+typedef union cvmx_usbcx_hptxfsiz cvmx_usbcx_hptxfsiz_t;
+
+/**
+ * cvmx_usbc#_hptxsts
+ *
+ * Host Periodic Transmit FIFO/Queue Status Register (HPTXSTS)
+ *
+ * This read-only register contains the free space information for the Periodic
+ * TxFIFO and the Periodic Transmit Request Queue
+ */
+union cvmx_usbcx_hptxsts {
+ uint32_t u32;
+ /**
+ * struct cvmx_usbcx_hptxsts_s
+ * @ptxqtop: Top of the Periodic Transmit Request Queue (PTxQTop)
+ * This indicates the entry in the Periodic Tx Request Queue that
+ * is currently being processes by the MAC.
+ * This register is used for debugging.
+ * * Bit [31]: Odd/Even (micro)frame
+ * - 1'b0: send in even (micro)frame
+ * - 1'b1: send in odd (micro)frame
+ * * Bits [30:27]: Channel/endpoint number
+ * * Bits [26:25]: Type
+ * - 2'b00: IN/OUT
+ * - 2'b01: Zero-length packet
+ * - 2'b10: CSPLIT
+ * - 2'b11: Disable channel command
+ * * Bit [24]: Terminate (last entry for the selected
+ * channel/endpoint)
+ * @ptxqspcavail: Periodic Transmit Request Queue Space Available
+ * (PTxQSpcAvail)
+ * Indicates the number of free locations available to be written
+ * in the Periodic Transmit Request Queue. This queue holds both
+ * IN and OUT requests.
+ * * 8'h0: Periodic Transmit Request Queue is full
+ * * 8'h1: 1 location available
+ * * 8'h2: 2 locations available
+ * * n: n locations available (0..8)
+ * * Others: Reserved
+ * @ptxfspcavail: Periodic Transmit Data FIFO Space Available
+ * (PTxFSpcAvail)
+ * Indicates the number of free locations available to be written
+ * to in the Periodic TxFIFO.
+ * Values are in terms of 32-bit words
+ * * 16'h0: Periodic TxFIFO is full
+ * * 16'h1: 1 word available
+ * * 16'h2: 2 words available
+ * * 16'hn: n words available (where 0..32768)
+ * * 16'h8000: 32768 words available
+ * * Others: Reserved
+ */
+ struct cvmx_usbcx_hptxsts_s {
+ uint32_t ptxqtop : 8;
+ uint32_t ptxqspcavail : 8;
+ uint32_t ptxfspcavail : 16;
+ } s;
+};
+typedef union cvmx_usbcx_hptxsts cvmx_usbcx_hptxsts_t;
+
+#endif
diff --git a/drivers/staging/octeon-usb/cvmx-usbnx-defs.h b/drivers/staging/octeon-usb/cvmx-usbnx-defs.h
new file mode 100644
index 000000000000..96d706770fc6
--- /dev/null
+++ b/drivers/staging/octeon-usb/cvmx-usbnx-defs.h
@@ -0,0 +1,887 @@
+/***********************license start***************
+ * Copyright (c) 2003-2010 Cavium Networks (support@cavium.com). All rights
+ * reserved.
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+
+ * * Neither the name of Cavium Networks nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+
+ * This Software, including technical data, may be subject to U.S. export
+ * control laws, including the U.S. Export Administration Act and its associated
+ * regulations, and may be subject to export or import regulations in other
+ * countries.
+
+ * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+ * AND WITH ALL FAULTS AND CAVIUM NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR
+ * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
+ * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION
+ * OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
+ * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
+ * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
+ * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
+ * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT OF USE OR
+ * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+ ***********************license end**************************************/
+
+
+/**
+ * cvmx-usbnx-defs.h
+ *
+ * Configuration and status register (CSR) type definitions for
+ * Octeon usbnx.
+ *
+ */
+#ifndef __CVMX_USBNX_TYPEDEFS_H__
+#define __CVMX_USBNX_TYPEDEFS_H__
+
+#define CVMX_USBNXBID1(bid) (((bid) & 1) * 0x10000000ull)
+#define CVMX_USBNXBID2(bid) (((bid) & 1) * 0x100000000000ull)
+
+#define CVMX_USBNXREG1(reg, bid) \
+ (CVMX_ADD_IO_SEG(0x0001180068000000ull | reg) + CVMX_USBNXBID1(bid))
+#define CVMX_USBNXREG2(reg, bid) \
+ (CVMX_ADD_IO_SEG(0x00016F0000000000ull | reg) + CVMX_USBNXBID2(bid))
+
+#define CVMX_USBNX_CLK_CTL(bid) CVMX_USBNXREG1(0x10, bid)
+#define CVMX_USBNX_DMA0_INB_CHN0(bid) CVMX_USBNXREG2(0x818, bid)
+#define CVMX_USBNX_DMA0_OUTB_CHN0(bid) CVMX_USBNXREG2(0x858, bid)
+#define CVMX_USBNX_USBP_CTL_STATUS(bid) CVMX_USBNXREG1(0x18, bid)
+
+/**
+ * cvmx_usbn#_clk_ctl
+ *
+ * USBN_CLK_CTL = USBN's Clock Control
+ *
+ * This register is used to control the frequency of the hclk and the
+ * hreset and phy_rst signals.
+ */
+union cvmx_usbnx_clk_ctl {
+ uint64_t u64;
+ /**
+ * struct cvmx_usbnx_clk_ctl_s
+ * @divide2: The 'hclk' used by the USB subsystem is derived
+ * from the eclk.
+ * Also see the field DIVIDE. DIVIDE2<1> must currently
+ * be zero because it is not implemented, so the maximum
+ * ratio of eclk/hclk is currently 16.
+ * The actual divide number for hclk is:
+ * (DIVIDE2 + 1) * (DIVIDE + 1)
+ * @hclk_rst: When this field is '0' the HCLK-DIVIDER used to
+ * generate the hclk in the USB Subsystem is held
+ * in reset. This bit must be set to '0' before
+ * changing the value os DIVIDE in this register.
+ * The reset to the HCLK_DIVIDERis also asserted
+ * when core reset is asserted.
+ * @p_x_on: Force USB-PHY on during suspend.
+ * '1' USB-PHY XO block is powered-down during
+ * suspend.
+ * '0' USB-PHY XO block is powered-up during
+ * suspend.
+ * The value of this field must be set while POR is
+ * active.
+ * @p_com_on: '0' Force USB-PHY XO Bias, Bandgap and PLL to
+ * remain powered in Suspend Mode.
+ * '1' The USB-PHY XO Bias, Bandgap and PLL are
+ * powered down in suspend mode.
+ * The value of this field must be set while POR is
+ * active.
+ * @p_c_sel: Phy clock speed select.
+ * Selects the reference clock / crystal frequency.
+ * '11': Reserved
+ * '10': 48 MHz (reserved when a crystal is used)
+ * '01': 24 MHz (reserved when a crystal is used)
+ * '00': 12 MHz
+ * The value of this field must be set while POR is
+ * active.
+ * NOTE: if a crystal is used as a reference clock,
+ * this field must be set to 12 MHz.
+ * @cdiv_byp: Used to enable the bypass input to the USB_CLK_DIV.
+ * @sd_mode: Scaledown mode for the USBC. Control timing events
+ * in the USBC, for normal operation this must be '0'.
+ * @s_bist: Starts bist on the hclk memories, during the '0'
+ * to '1' transition.
+ * @por: Power On Reset for the PHY.
+ * Resets all the PHYS registers and state machines.
+ * @enable: When '1' allows the generation of the hclk. When
+ * '0' the hclk will not be generated. SEE DIVIDE
+ * field of this register.
+ * @prst: When this field is '0' the reset associated with
+ * the phy_clk functionality in the USB Subsystem is
+ * help in reset. This bit should not be set to '1'
+ * until the time it takes 6 clocks (hclk or phy_clk,
+ * whichever is slower) has passed. Under normal
+ * operation once this bit is set to '1' it should not
+ * be set to '0'.
+ * @hrst: When this field is '0' the reset associated with
+ * the hclk functioanlity in the USB Subsystem is
+ * held in reset.This bit should not be set to '1'
+ * until 12ms after phy_clk is stable. Under normal
+ * operation, once this bit is set to '1' it should
+ * not be set to '0'.
+ * @divide: The frequency of 'hclk' used by the USB subsystem
+ * is the eclk frequency divided by the value of
+ * (DIVIDE2 + 1) * (DIVIDE + 1), also see the field
+ * DIVIDE2 of this register.
+ * The hclk frequency should be less than 125Mhz.
+ * After writing a value to this field the SW should
+ * read the field for the value written.
+ * The ENABLE field of this register should not be set
+ * until AFTER this field is set and then read.
+ */
+ struct cvmx_usbnx_clk_ctl_s {
+ uint64_t reserved_20_63 : 44;
+ uint64_t divide2 : 2;
+ uint64_t hclk_rst : 1;
+ uint64_t p_x_on : 1;
+ uint64_t reserved_14_15 : 2;
+ uint64_t p_com_on : 1;
+ uint64_t p_c_sel : 2;
+ uint64_t cdiv_byp : 1;
+ uint64_t sd_mode : 2;
+ uint64_t s_bist : 1;
+ uint64_t por : 1;
+ uint64_t enable : 1;
+ uint64_t prst : 1;
+ uint64_t hrst : 1;
+ uint64_t divide : 3;
+ } s;
+ /**
+ * struct cvmx_usbnx_clk_ctl_cn30xx
+ * @hclk_rst: When this field is '0' the HCLK-DIVIDER used to
+ * generate the hclk in the USB Subsystem is held
+ * in reset. This bit must be set to '0' before
+ * changing the value os DIVIDE in this register.
+ * The reset to the HCLK_DIVIDERis also asserted
+ * when core reset is asserted.
+ * @p_x_on: Force USB-PHY on during suspend.
+ * '1' USB-PHY XO block is powered-down during
+ * suspend.
+ * '0' USB-PHY XO block is powered-up during
+ * suspend.
+ * The value of this field must be set while POR is
+ * active.
+ * @p_rclk: Phy refrence clock enable.
+ * '1' The PHY PLL uses the XO block output as a
+ * reference.
+ * '0' Reserved.
+ * @p_xenbn: Phy external clock enable.
+ * '1' The XO block uses the clock from a crystal.
+ * '0' The XO block uses an external clock supplied
+ * on the XO pin. USB_XI should be tied to
+ * ground for this usage.
+ * @p_com_on: '0' Force USB-PHY XO Bias, Bandgap and PLL to
+ * remain powered in Suspend Mode.
+ * '1' The USB-PHY XO Bias, Bandgap and PLL are
+ * powered down in suspend mode.
+ * The value of this field must be set while POR is
+ * active.
+ * @p_c_sel: Phy clock speed select.
+ * Selects the reference clock / crystal frequency.
+ * '11': Reserved
+ * '10': 48 MHz
+ * '01': 24 MHz
+ * '00': 12 MHz
+ * The value of this field must be set while POR is
+ * active.
+ * @cdiv_byp: Used to enable the bypass input to the USB_CLK_DIV.
+ * @sd_mode: Scaledown mode for the USBC. Control timing events
+ * in the USBC, for normal operation this must be '0'.
+ * @s_bist: Starts bist on the hclk memories, during the '0'
+ * to '1' transition.
+ * @por: Power On Reset for the PHY.
+ * Resets all the PHYS registers and state machines.
+ * @enable: When '1' allows the generation of the hclk. When
+ * '0' the hclk will not be generated.
+ * @prst: When this field is '0' the reset associated with
+ * the phy_clk functionality in the USB Subsystem is
+ * help in reset. This bit should not be set to '1'
+ * until the time it takes 6 clocks (hclk or phy_clk,
+ * whichever is slower) has passed. Under normal
+ * operation once this bit is set to '1' it should not
+ * be set to '0'.
+ * @hrst: When this field is '0' the reset associated with
+ * the hclk functioanlity in the USB Subsystem is
+ * held in reset.This bit should not be set to '1'
+ * until 12ms after phy_clk is stable. Under normal
+ * operation, once this bit is set to '1' it should
+ * not be set to '0'.
+ * @divide: The 'hclk' used by the USB subsystem is derived
+ * from the eclk. The eclk will be divided by the
+ * value of this field +1 to determine the hclk
+ * frequency. (Also see HRST of this register).
+ * The hclk frequency must be less than 125 MHz.
+ */
+ struct cvmx_usbnx_clk_ctl_cn30xx {
+ uint64_t reserved_18_63 : 46;
+ uint64_t hclk_rst : 1;
+ uint64_t p_x_on : 1;
+ uint64_t p_rclk : 1;
+ uint64_t p_xenbn : 1;
+ uint64_t p_com_on : 1;
+ uint64_t p_c_sel : 2;
+ uint64_t cdiv_byp : 1;
+ uint64_t sd_mode : 2;
+ uint64_t s_bist : 1;
+ uint64_t por : 1;
+ uint64_t enable : 1;
+ uint64_t prst : 1;
+ uint64_t hrst : 1;
+ uint64_t divide : 3;
+ } cn30xx;
+ struct cvmx_usbnx_clk_ctl_cn30xx cn31xx;
+ /**
+ * struct cvmx_usbnx_clk_ctl_cn50xx
+ * @divide2: The 'hclk' used by the USB subsystem is derived
+ * from the eclk.
+ * Also see the field DIVIDE. DIVIDE2<1> must currently
+ * be zero because it is not implemented, so the maximum
+ * ratio of eclk/hclk is currently 16.
+ * The actual divide number for hclk is:
+ * (DIVIDE2 + 1) * (DIVIDE + 1)
+ * @hclk_rst: When this field is '0' the HCLK-DIVIDER used to
+ * generate the hclk in the USB Subsystem is held
+ * in reset. This bit must be set to '0' before
+ * changing the value os DIVIDE in this register.
+ * The reset to the HCLK_DIVIDERis also asserted
+ * when core reset is asserted.
+ * @p_rtype: PHY reference clock type
+ * '0' The USB-PHY uses a 12MHz crystal as a clock
+ * source at the USB_XO and USB_XI pins
+ * '1' Reserved
+ * '2' The USB_PHY uses 12/24/48MHz 2.5V board clock
+ * at the USB_XO pin. USB_XI should be tied to
+ * ground in this case.
+ * '3' Reserved
+ * (bit 14 was P_XENBN on 3xxx)
+ * (bit 15 was P_RCLK on 3xxx)
+ * @p_com_on: '0' Force USB-PHY XO Bias, Bandgap and PLL to
+ * remain powered in Suspend Mode.
+ * '1' The USB-PHY XO Bias, Bandgap and PLL are
+ * powered down in suspend mode.
+ * The value of this field must be set while POR is
+ * active.
+ * @p_c_sel: Phy clock speed select.
+ * Selects the reference clock / crystal frequency.
+ * '11': Reserved
+ * '10': 48 MHz (reserved when a crystal is used)
+ * '01': 24 MHz (reserved when a crystal is used)
+ * '00': 12 MHz
+ * The value of this field must be set while POR is
+ * active.
+ * NOTE: if a crystal is used as a reference clock,
+ * this field must be set to 12 MHz.
+ * @cdiv_byp: Used to enable the bypass input to the USB_CLK_DIV.
+ * @sd_mode: Scaledown mode for the USBC. Control timing events
+ * in the USBC, for normal operation this must be '0'.
+ * @s_bist: Starts bist on the hclk memories, during the '0'
+ * to '1' transition.
+ * @por: Power On Reset for the PHY.
+ * Resets all the PHYS registers and state machines.
+ * @enable: When '1' allows the generation of the hclk. When
+ * '0' the hclk will not be generated. SEE DIVIDE
+ * field of this register.
+ * @prst: When this field is '0' the reset associated with
+ * the phy_clk functionality in the USB Subsystem is
+ * help in reset. This bit should not be set to '1'
+ * until the time it takes 6 clocks (hclk or phy_clk,
+ * whichever is slower) has passed. Under normal
+ * operation once this bit is set to '1' it should not
+ * be set to '0'.
+ * @hrst: When this field is '0' the reset associated with
+ * the hclk functioanlity in the USB Subsystem is
+ * held in reset.This bit should not be set to '1'
+ * until 12ms after phy_clk is stable. Under normal
+ * operation, once this bit is set to '1' it should
+ * not be set to '0'.
+ * @divide: The frequency of 'hclk' used by the USB subsystem
+ * is the eclk frequency divided by the value of
+ * (DIVIDE2 + 1) * (DIVIDE + 1), also see the field
+ * DIVIDE2 of this register.
+ * The hclk frequency should be less than 125Mhz.
+ * After writing a value to this field the SW should
+ * read the field for the value written.
+ * The ENABLE field of this register should not be set
+ * until AFTER this field is set and then read.
+ */
+ struct cvmx_usbnx_clk_ctl_cn50xx {
+ uint64_t reserved_20_63 : 44;
+ uint64_t divide2 : 2;
+ uint64_t hclk_rst : 1;
+ uint64_t reserved_16_16 : 1;
+ uint64_t p_rtype : 2;
+ uint64_t p_com_on : 1;
+ uint64_t p_c_sel : 2;
+ uint64_t cdiv_byp : 1;
+ uint64_t sd_mode : 2;
+ uint64_t s_bist : 1;
+ uint64_t por : 1;
+ uint64_t enable : 1;
+ uint64_t prst : 1;
+ uint64_t hrst : 1;
+ uint64_t divide : 3;
+ } cn50xx;
+ struct cvmx_usbnx_clk_ctl_cn50xx cn52xx;
+ struct cvmx_usbnx_clk_ctl_cn50xx cn56xx;
+};
+typedef union cvmx_usbnx_clk_ctl cvmx_usbnx_clk_ctl_t;
+
+/**
+ * cvmx_usbn#_usbp_ctl_status
+ *
+ * USBN_USBP_CTL_STATUS = USBP Control And Status Register
+ *
+ * Contains general control and status information for the USBN block.
+ */
+union cvmx_usbnx_usbp_ctl_status {
+ uint64_t u64;
+ /**
+ * struct cvmx_usbnx_usbp_ctl_status_s
+ * @txrisetune: HS Transmitter Rise/Fall Time Adjustment
+ * @txvreftune: HS DC Voltage Level Adjustment
+ * @txfslstune: FS/LS Source Impedence Adjustment
+ * @txhsxvtune: Transmitter High-Speed Crossover Adjustment
+ * @sqrxtune: Squelch Threshold Adjustment
+ * @compdistune: Disconnect Threshold Adjustment
+ * @otgtune: VBUS Valid Threshold Adjustment
+ * @otgdisable: OTG Block Disable
+ * @portreset: Per_Port Reset
+ * @drvvbus: Drive VBUS
+ * @lsbist: Low-Speed BIST Enable.
+ * @fsbist: Full-Speed BIST Enable.
+ * @hsbist: High-Speed BIST Enable.
+ * @bist_done: PHY Bist Done.
+ * Asserted at the end of the PHY BIST sequence.
+ * @bist_err: PHY Bist Error.
+ * Indicates an internal error was detected during
+ * the BIST sequence.
+ * @tdata_out: PHY Test Data Out.
+ * Presents either internaly generated signals or
+ * test register contents, based upon the value of
+ * test_data_out_sel.
+ * @siddq: Drives the USBP (USB-PHY) SIDDQ input.
+ * Normally should be set to zero.
+ * When customers have no intent to use USB PHY
+ * interface, they should:
+ * - still provide 3.3V to USB_VDD33, and
+ * - tie USB_REXT to 3.3V supply, and
+ * - set USBN*_USBP_CTL_STATUS[SIDDQ]=1
+ * @txpreemphasistune: HS Transmitter Pre-Emphasis Enable
+ * @dma_bmode: When set to 1 the L2C DMA address will be updated
+ * with byte-counts between packets. When set to 0
+ * the L2C DMA address is incremented to the next
+ * 4-byte aligned address after adding byte-count.
+ * @usbc_end: Bigendian input to the USB Core. This should be
+ * set to '0' for operation.
+ * @usbp_bist: PHY, This is cleared '0' to run BIST on the USBP.
+ * @tclk: PHY Test Clock, used to load TDATA_IN to the USBP.
+ * @dp_pulld: PHY DP_PULLDOWN input to the USB-PHY.
+ * This signal enables the pull-down resistance on
+ * the D+ line. '1' pull down-resistance is connected
+ * to D+/ '0' pull down resistance is not connected
+ * to D+. When an A/B device is acting as a host
+ * (downstream-facing port), dp_pulldown and
+ * dm_pulldown are enabled. This must not toggle
+ * during normal opeartion.
+ * @dm_pulld: PHY DM_PULLDOWN input to the USB-PHY.
+ * This signal enables the pull-down resistance on
+ * the D- line. '1' pull down-resistance is connected
+ * to D-. '0' pull down resistance is not connected
+ * to D-. When an A/B device is acting as a host
+ * (downstream-facing port), dp_pulldown and
+ * dm_pulldown are enabled. This must not toggle
+ * during normal opeartion.
+ * @hst_mode: When '0' the USB is acting as HOST, when '1'
+ * USB is acting as device. This field needs to be
+ * set while the USB is in reset.
+ * @tuning: Transmitter Tuning for High-Speed Operation.
+ * Tunes the current supply and rise/fall output
+ * times for high-speed operation.
+ * [20:19] == 11: Current supply increased
+ * approximately 9%
+ * [20:19] == 10: Current supply increased
+ * approximately 4.5%
+ * [20:19] == 01: Design default.
+ * [20:19] == 00: Current supply decreased
+ * approximately 4.5%
+ * [22:21] == 11: Rise and fall times are increased.
+ * [22:21] == 10: Design default.
+ * [22:21] == 01: Rise and fall times are decreased.
+ * [22:21] == 00: Rise and fall times are decreased
+ * further as compared to the 01 setting.
+ * @tx_bs_enh: Transmit Bit Stuffing on [15:8].
+ * Enables or disables bit stuffing on data[15:8]
+ * when bit-stuffing is enabled.
+ * @tx_bs_en: Transmit Bit Stuffing on [7:0].
+ * Enables or disables bit stuffing on data[7:0]
+ * when bit-stuffing is enabled.
+ * @loop_enb: PHY Loopback Test Enable.
+ * '1': During data transmission the receive is
+ * enabled.
+ * '0': During data transmission the receive is
+ * disabled.
+ * Must be '0' for normal operation.
+ * @vtest_enb: Analog Test Pin Enable.
+ * '1' The PHY's analog_test pin is enabled for the
+ * input and output of applicable analog test signals.
+ * '0' THe analog_test pin is disabled.
+ * @bist_enb: Built-In Self Test Enable.
+ * Used to activate BIST in the PHY.
+ * @tdata_sel: Test Data Out Select.
+ * '1' test_data_out[3:0] (PHY) register contents
+ * are output. '0' internaly generated signals are
+ * output.
+ * @taddr_in: Mode Address for Test Interface.
+ * Specifies the register address for writing to or
+ * reading from the PHY test interface register.
+ * @tdata_in: Internal Testing Register Input Data and Select
+ * This is a test bus. Data is present on [3:0],
+ * and its corresponding select (enable) is present
+ * on bits [7:4].
+ * @ate_reset: Reset input from automatic test equipment.
+ * This is a test signal. When the USB Core is
+ * powered up (not in Susned Mode), an automatic
+ * tester can use this to disable phy_clock and
+ * free_clk, then re-eanable them with an aligned
+ * phase.
+ * '1': The phy_clk and free_clk outputs are
+ * disabled. "0": The phy_clock and free_clk outputs
+ * are available within a specific period after the
+ * de-assertion.
+ */
+ struct cvmx_usbnx_usbp_ctl_status_s {
+ uint64_t txrisetune : 1;
+ uint64_t txvreftune : 4;
+ uint64_t txfslstune : 4;
+ uint64_t txhsxvtune : 2;
+ uint64_t sqrxtune : 3;
+ uint64_t compdistune : 3;
+ uint64_t otgtune : 3;
+ uint64_t otgdisable : 1;
+ uint64_t portreset : 1;
+ uint64_t drvvbus : 1;
+ uint64_t lsbist : 1;
+ uint64_t fsbist : 1;
+ uint64_t hsbist : 1;
+ uint64_t bist_done : 1;
+ uint64_t bist_err : 1;
+ uint64_t tdata_out : 4;
+ uint64_t siddq : 1;
+ uint64_t txpreemphasistune : 1;
+ uint64_t dma_bmode : 1;
+ uint64_t usbc_end : 1;
+ uint64_t usbp_bist : 1;
+ uint64_t tclk : 1;
+ uint64_t dp_pulld : 1;
+ uint64_t dm_pulld : 1;
+ uint64_t hst_mode : 1;
+ uint64_t tuning : 4;
+ uint64_t tx_bs_enh : 1;
+ uint64_t tx_bs_en : 1;
+ uint64_t loop_enb : 1;
+ uint64_t vtest_enb : 1;
+ uint64_t bist_enb : 1;
+ uint64_t tdata_sel : 1;
+ uint64_t taddr_in : 4;
+ uint64_t tdata_in : 8;
+ uint64_t ate_reset : 1;
+ } s;
+ /**
+ * struct cvmx_usbnx_usbp_ctl_status_cn30xx
+ * @bist_done: PHY Bist Done.
+ * Asserted at the end of the PHY BIST sequence.
+ * @bist_err: PHY Bist Error.
+ * Indicates an internal error was detected during
+ * the BIST sequence.
+ * @tdata_out: PHY Test Data Out.
+ * Presents either internaly generated signals or
+ * test register contents, based upon the value of
+ * test_data_out_sel.
+ * @dma_bmode: When set to 1 the L2C DMA address will be updated
+ * with byte-counts between packets. When set to 0
+ * the L2C DMA address is incremented to the next
+ * 4-byte aligned address after adding byte-count.
+ * @usbc_end: Bigendian input to the USB Core. This should be
+ * set to '0' for operation.
+ * @usbp_bist: PHY, This is cleared '0' to run BIST on the USBP.
+ * @tclk: PHY Test Clock, used to load TDATA_IN to the USBP.
+ * @dp_pulld: PHY DP_PULLDOWN input to the USB-PHY.
+ * This signal enables the pull-down resistance on
+ * the D+ line. '1' pull down-resistance is connected
+ * to D+/ '0' pull down resistance is not connected
+ * to D+. When an A/B device is acting as a host
+ * (downstream-facing port), dp_pulldown and
+ * dm_pulldown are enabled. This must not toggle
+ * during normal opeartion.
+ * @dm_pulld: PHY DM_PULLDOWN input to the USB-PHY.
+ * This signal enables the pull-down resistance on
+ * the D- line. '1' pull down-resistance is connected
+ * to D-. '0' pull down resistance is not connected
+ * to D-. When an A/B device is acting as a host
+ * (downstream-facing port), dp_pulldown and
+ * dm_pulldown are enabled. This must not toggle
+ * during normal opeartion.
+ * @hst_mode: When '0' the USB is acting as HOST, when '1'
+ * USB is acting as device. This field needs to be
+ * set while the USB is in reset.
+ * @tuning: Transmitter Tuning for High-Speed Operation.
+ * Tunes the current supply and rise/fall output
+ * times for high-speed operation.
+ * [20:19] == 11: Current supply increased
+ * approximately 9%
+ * [20:19] == 10: Current supply increased
+ * approximately 4.5%
+ * [20:19] == 01: Design default.
+ * [20:19] == 00: Current supply decreased
+ * approximately 4.5%
+ * [22:21] == 11: Rise and fall times are increased.
+ * [22:21] == 10: Design default.
+ * [22:21] == 01: Rise and fall times are decreased.
+ * [22:21] == 00: Rise and fall times are decreased
+ * further as compared to the 01 setting.
+ * @tx_bs_enh: Transmit Bit Stuffing on [15:8].
+ * Enables or disables bit stuffing on data[15:8]
+ * when bit-stuffing is enabled.
+ * @tx_bs_en: Transmit Bit Stuffing on [7:0].
+ * Enables or disables bit stuffing on data[7:0]
+ * when bit-stuffing is enabled.
+ * @loop_enb: PHY Loopback Test Enable.
+ * '1': During data transmission the receive is
+ * enabled.
+ * '0': During data transmission the receive is
+ * disabled.
+ * Must be '0' for normal operation.
+ * @vtest_enb: Analog Test Pin Enable.
+ * '1' The PHY's analog_test pin is enabled for the
+ * input and output of applicable analog test signals.
+ * '0' THe analog_test pin is disabled.
+ * @bist_enb: Built-In Self Test Enable.
+ * Used to activate BIST in the PHY.
+ * @tdata_sel: Test Data Out Select.
+ * '1' test_data_out[3:0] (PHY) register contents
+ * are output. '0' internaly generated signals are
+ * output.
+ * @taddr_in: Mode Address for Test Interface.
+ * Specifies the register address for writing to or
+ * reading from the PHY test interface register.
+ * @tdata_in: Internal Testing Register Input Data and Select
+ * This is a test bus. Data is present on [3:0],
+ * and its corresponding select (enable) is present
+ * on bits [7:4].
+ * @ate_reset: Reset input from automatic test equipment.
+ * This is a test signal. When the USB Core is
+ * powered up (not in Susned Mode), an automatic
+ * tester can use this to disable phy_clock and
+ * free_clk, then re-eanable them with an aligned
+ * phase.
+ * '1': The phy_clk and free_clk outputs are
+ * disabled. "0": The phy_clock and free_clk outputs
+ * are available within a specific period after the
+ * de-assertion.
+ */
+ struct cvmx_usbnx_usbp_ctl_status_cn30xx {
+ uint64_t reserved_38_63 : 26;
+ uint64_t bist_done : 1;
+ uint64_t bist_err : 1;
+ uint64_t tdata_out : 4;
+ uint64_t reserved_30_31 : 2;
+ uint64_t dma_bmode : 1;
+ uint64_t usbc_end : 1;
+ uint64_t usbp_bist : 1;
+ uint64_t tclk : 1;
+ uint64_t dp_pulld : 1;
+ uint64_t dm_pulld : 1;
+ uint64_t hst_mode : 1;
+ uint64_t tuning : 4;
+ uint64_t tx_bs_enh : 1;
+ uint64_t tx_bs_en : 1;
+ uint64_t loop_enb : 1;
+ uint64_t vtest_enb : 1;
+ uint64_t bist_enb : 1;
+ uint64_t tdata_sel : 1;
+ uint64_t taddr_in : 4;
+ uint64_t tdata_in : 8;
+ uint64_t ate_reset : 1;
+ } cn30xx;
+ /**
+ * struct cvmx_usbnx_usbp_ctl_status_cn50xx
+ * @txrisetune: HS Transmitter Rise/Fall Time Adjustment
+ * @txvreftune: HS DC Voltage Level Adjustment
+ * @txfslstune: FS/LS Source Impedence Adjustment
+ * @txhsxvtune: Transmitter High-Speed Crossover Adjustment
+ * @sqrxtune: Squelch Threshold Adjustment
+ * @compdistune: Disconnect Threshold Adjustment
+ * @otgtune: VBUS Valid Threshold Adjustment
+ * @otgdisable: OTG Block Disable
+ * @portreset: Per_Port Reset
+ * @drvvbus: Drive VBUS
+ * @lsbist: Low-Speed BIST Enable.
+ * @fsbist: Full-Speed BIST Enable.
+ * @hsbist: High-Speed BIST Enable.
+ * @bist_done: PHY Bist Done.
+ * Asserted at the end of the PHY BIST sequence.
+ * @bist_err: PHY Bist Error.
+ * Indicates an internal error was detected during
+ * the BIST sequence.
+ * @tdata_out: PHY Test Data Out.
+ * Presents either internaly generated signals or
+ * test register contents, based upon the value of
+ * test_data_out_sel.
+ * @txpreemphasistune: HS Transmitter Pre-Emphasis Enable
+ * @dma_bmode: When set to 1 the L2C DMA address will be updated
+ * with byte-counts between packets. When set to 0
+ * the L2C DMA address is incremented to the next
+ * 4-byte aligned address after adding byte-count.
+ * @usbc_end: Bigendian input to the USB Core. This should be
+ * set to '0' for operation.
+ * @usbp_bist: PHY, This is cleared '0' to run BIST on the USBP.
+ * @tclk: PHY Test Clock, used to load TDATA_IN to the USBP.
+ * @dp_pulld: PHY DP_PULLDOWN input to the USB-PHY.
+ * This signal enables the pull-down resistance on
+ * the D+ line. '1' pull down-resistance is connected
+ * to D+/ '0' pull down resistance is not connected
+ * to D+. When an A/B device is acting as a host
+ * (downstream-facing port), dp_pulldown and
+ * dm_pulldown are enabled. This must not toggle
+ * during normal opeartion.
+ * @dm_pulld: PHY DM_PULLDOWN input to the USB-PHY.
+ * This signal enables the pull-down resistance on
+ * the D- line. '1' pull down-resistance is connected
+ * to D-. '0' pull down resistance is not connected
+ * to D-. When an A/B device is acting as a host
+ * (downstream-facing port), dp_pulldown and
+ * dm_pulldown are enabled. This must not toggle
+ * during normal opeartion.
+ * @hst_mode: When '0' the USB is acting as HOST, when '1'
+ * USB is acting as device. This field needs to be
+ * set while the USB is in reset.
+ * @tx_bs_enh: Transmit Bit Stuffing on [15:8].
+ * Enables or disables bit stuffing on data[15:8]
+ * when bit-stuffing is enabled.
+ * @tx_bs_en: Transmit Bit Stuffing on [7:0].
+ * Enables or disables bit stuffing on data[7:0]
+ * when bit-stuffing is enabled.
+ * @loop_enb: PHY Loopback Test Enable.
+ * '1': During data transmission the receive is
+ * enabled.
+ * '0': During data transmission the receive is
+ * disabled.
+ * Must be '0' for normal operation.
+ * @vtest_enb: Analog Test Pin Enable.
+ * '1' The PHY's analog_test pin is enabled for the
+ * input and output of applicable analog test signals.
+ * '0' THe analog_test pin is disabled.
+ * @bist_enb: Built-In Self Test Enable.
+ * Used to activate BIST in the PHY.
+ * @tdata_sel: Test Data Out Select.
+ * '1' test_data_out[3:0] (PHY) register contents
+ * are output. '0' internaly generated signals are
+ * output.
+ * @taddr_in: Mode Address for Test Interface.
+ * Specifies the register address for writing to or
+ * reading from the PHY test interface register.
+ * @tdata_in: Internal Testing Register Input Data and Select
+ * This is a test bus. Data is present on [3:0],
+ * and its corresponding select (enable) is present
+ * on bits [7:4].
+ * @ate_reset: Reset input from automatic test equipment.
+ * This is a test signal. When the USB Core is
+ * powered up (not in Susned Mode), an automatic
+ * tester can use this to disable phy_clock and
+ * free_clk, then re-eanable them with an aligned
+ * phase.
+ * '1': The phy_clk and free_clk outputs are
+ * disabled. "0": The phy_clock and free_clk outputs
+ * are available within a specific period after the
+ * de-assertion.
+ */
+ struct cvmx_usbnx_usbp_ctl_status_cn50xx {
+ uint64_t txrisetune : 1;
+ uint64_t txvreftune : 4;
+ uint64_t txfslstune : 4;
+ uint64_t txhsxvtune : 2;
+ uint64_t sqrxtune : 3;
+ uint64_t compdistune : 3;
+ uint64_t otgtune : 3;
+ uint64_t otgdisable : 1;
+ uint64_t portreset : 1;
+ uint64_t drvvbus : 1;
+ uint64_t lsbist : 1;
+ uint64_t fsbist : 1;
+ uint64_t hsbist : 1;
+ uint64_t bist_done : 1;
+ uint64_t bist_err : 1;
+ uint64_t tdata_out : 4;
+ uint64_t reserved_31_31 : 1;
+ uint64_t txpreemphasistune : 1;
+ uint64_t dma_bmode : 1;
+ uint64_t usbc_end : 1;
+ uint64_t usbp_bist : 1;
+ uint64_t tclk : 1;
+ uint64_t dp_pulld : 1;
+ uint64_t dm_pulld : 1;
+ uint64_t hst_mode : 1;
+ uint64_t reserved_19_22 : 4;
+ uint64_t tx_bs_enh : 1;
+ uint64_t tx_bs_en : 1;
+ uint64_t loop_enb : 1;
+ uint64_t vtest_enb : 1;
+ uint64_t bist_enb : 1;
+ uint64_t tdata_sel : 1;
+ uint64_t taddr_in : 4;
+ uint64_t tdata_in : 8;
+ uint64_t ate_reset : 1;
+ } cn50xx;
+ /**
+ * struct cvmx_usbnx_usbp_ctl_status_cn52xx
+ * @txrisetune: HS Transmitter Rise/Fall Time Adjustment
+ * @txvreftune: HS DC Voltage Level Adjustment
+ * @txfslstune: FS/LS Source Impedence Adjustment
+ * @txhsxvtune: Transmitter High-Speed Crossover Adjustment
+ * @sqrxtune: Squelch Threshold Adjustment
+ * @compdistune: Disconnect Threshold Adjustment
+ * @otgtune: VBUS Valid Threshold Adjustment
+ * @otgdisable: OTG Block Disable
+ * @portreset: Per_Port Reset
+ * @drvvbus: Drive VBUS
+ * @lsbist: Low-Speed BIST Enable.
+ * @fsbist: Full-Speed BIST Enable.
+ * @hsbist: High-Speed BIST Enable.
+ * @bist_done: PHY Bist Done.
+ * Asserted at the end of the PHY BIST sequence.
+ * @bist_err: PHY Bist Error.
+ * Indicates an internal error was detected during
+ * the BIST sequence.
+ * @tdata_out: PHY Test Data Out.
+ * Presents either internaly generated signals or
+ * test register contents, based upon the value of
+ * test_data_out_sel.
+ * @siddq: Drives the USBP (USB-PHY) SIDDQ input.
+ * Normally should be set to zero.
+ * When customers have no intent to use USB PHY
+ * interface, they should:
+ * - still provide 3.3V to USB_VDD33, and
+ * - tie USB_REXT to 3.3V supply, and
+ * - set USBN*_USBP_CTL_STATUS[SIDDQ]=1
+ * @txpreemphasistune: HS Transmitter Pre-Emphasis Enable
+ * @dma_bmode: When set to 1 the L2C DMA address will be updated
+ * with byte-counts between packets. When set to 0
+ * the L2C DMA address is incremented to the next
+ * 4-byte aligned address after adding byte-count.
+ * @usbc_end: Bigendian input to the USB Core. This should be
+ * set to '0' for operation.
+ * @usbp_bist: PHY, This is cleared '0' to run BIST on the USBP.
+ * @tclk: PHY Test Clock, used to load TDATA_IN to the USBP.
+ * @dp_pulld: PHY DP_PULLDOWN input to the USB-PHY.
+ * This signal enables the pull-down resistance on
+ * the D+ line. '1' pull down-resistance is connected
+ * to D+/ '0' pull down resistance is not connected
+ * to D+. When an A/B device is acting as a host
+ * (downstream-facing port), dp_pulldown and
+ * dm_pulldown are enabled. This must not toggle
+ * during normal opeartion.
+ * @dm_pulld: PHY DM_PULLDOWN input to the USB-PHY.
+ * This signal enables the pull-down resistance on
+ * the D- line. '1' pull down-resistance is connected
+ * to D-. '0' pull down resistance is not connected
+ * to D-. When an A/B device is acting as a host
+ * (downstream-facing port), dp_pulldown and
+ * dm_pulldown are enabled. This must not toggle
+ * during normal opeartion.
+ * @hst_mode: When '0' the USB is acting as HOST, when '1'
+ * USB is acting as device. This field needs to be
+ * set while the USB is in reset.
+ * @tx_bs_enh: Transmit Bit Stuffing on [15:8].
+ * Enables or disables bit stuffing on data[15:8]
+ * when bit-stuffing is enabled.
+ * @tx_bs_en: Transmit Bit Stuffing on [7:0].
+ * Enables or disables bit stuffing on data[7:0]
+ * when bit-stuffing is enabled.
+ * @loop_enb: PHY Loopback Test Enable.
+ * '1': During data transmission the receive is
+ * enabled.
+ * '0': During data transmission the receive is
+ * disabled.
+ * Must be '0' for normal operation.
+ * @vtest_enb: Analog Test Pin Enable.
+ * '1' The PHY's analog_test pin is enabled for the
+ * input and output of applicable analog test signals.
+ * '0' THe analog_test pin is disabled.
+ * @bist_enb: Built-In Self Test Enable.
+ * Used to activate BIST in the PHY.
+ * @tdata_sel: Test Data Out Select.
+ * '1' test_data_out[3:0] (PHY) register contents
+ * are output. '0' internaly generated signals are
+ * output.
+ * @taddr_in: Mode Address for Test Interface.
+ * Specifies the register address for writing to or
+ * reading from the PHY test interface register.
+ * @tdata_in: Internal Testing Register Input Data and Select
+ * This is a test bus. Data is present on [3:0],
+ * and its corresponding select (enable) is present
+ * on bits [7:4].
+ * @ate_reset: Reset input from automatic test equipment.
+ * This is a test signal. When the USB Core is
+ * powered up (not in Susned Mode), an automatic
+ * tester can use this to disable phy_clock and
+ * free_clk, then re-eanable them with an aligned
+ * phase.
+ * '1': The phy_clk and free_clk outputs are
+ * disabled. "0": The phy_clock and free_clk outputs
+ * are available within a specific period after the
+ * de-assertion.
+ */
+ struct cvmx_usbnx_usbp_ctl_status_cn52xx {
+ uint64_t txrisetune : 1;
+ uint64_t txvreftune : 4;
+ uint64_t txfslstune : 4;
+ uint64_t txhsxvtune : 2;
+ uint64_t sqrxtune : 3;
+ uint64_t compdistune : 3;
+ uint64_t otgtune : 3;
+ uint64_t otgdisable : 1;
+ uint64_t portreset : 1;
+ uint64_t drvvbus : 1;
+ uint64_t lsbist : 1;
+ uint64_t fsbist : 1;
+ uint64_t hsbist : 1;
+ uint64_t bist_done : 1;
+ uint64_t bist_err : 1;
+ uint64_t tdata_out : 4;
+ uint64_t siddq : 1;
+ uint64_t txpreemphasistune : 1;
+ uint64_t dma_bmode : 1;
+ uint64_t usbc_end : 1;
+ uint64_t usbp_bist : 1;
+ uint64_t tclk : 1;
+ uint64_t dp_pulld : 1;
+ uint64_t dm_pulld : 1;
+ uint64_t hst_mode : 1;
+ uint64_t reserved_19_22 : 4;
+ uint64_t tx_bs_enh : 1;
+ uint64_t tx_bs_en : 1;
+ uint64_t loop_enb : 1;
+ uint64_t vtest_enb : 1;
+ uint64_t bist_enb : 1;
+ uint64_t tdata_sel : 1;
+ uint64_t taddr_in : 4;
+ uint64_t tdata_in : 8;
+ uint64_t ate_reset : 1;
+ } cn52xx;
+};
+typedef union cvmx_usbnx_usbp_ctl_status cvmx_usbnx_usbp_ctl_status_t;
+
+#endif
diff --git a/drivers/staging/octeon-usb/octeon-hcd.c b/drivers/staging/octeon-usb/octeon-hcd.c
new file mode 100644
index 000000000000..d156b603ae65
--- /dev/null
+++ b/drivers/staging/octeon-usb/octeon-hcd.c
@@ -0,0 +1,832 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2008 Cavium Networks
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/usb.h>
+
+#include <linux/time.h>
+#include <linux/delay.h>
+
+#include <asm/octeon/cvmx.h>
+#include "cvmx-usb.h"
+#include <asm/octeon/cvmx-iob-defs.h>
+
+#include <linux/usb/hcd.h>
+
+#include <linux/err.h>
+
+struct octeon_hcd {
+ spinlock_t lock;
+ cvmx_usb_state_t usb;
+ struct tasklet_struct dequeue_tasklet;
+ struct list_head dequeue_list;
+};
+
+/* convert between an HCD pointer and the corresponding struct octeon_hcd */
+static inline struct octeon_hcd *hcd_to_octeon(struct usb_hcd *hcd)
+{
+ return (struct octeon_hcd *)(hcd->hcd_priv);
+}
+
+static inline struct usb_hcd *octeon_to_hcd(struct octeon_hcd *p)
+{
+ return container_of((void *)p, struct usb_hcd, hcd_priv);
+}
+
+static inline struct octeon_hcd *cvmx_usb_to_octeon(cvmx_usb_state_t *p)
+{
+ return container_of(p, struct octeon_hcd, usb);
+}
+
+static irqreturn_t octeon_usb_irq(struct usb_hcd *hcd)
+{
+ struct octeon_hcd *priv = hcd_to_octeon(hcd);
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ cvmx_usb_poll(&priv->usb);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ return IRQ_HANDLED;
+}
+
+static void octeon_usb_port_callback(cvmx_usb_state_t *usb,
+ cvmx_usb_callback_t reason,
+ cvmx_usb_complete_t status,
+ int pipe_handle,
+ int submit_handle,
+ int bytes_transferred,
+ void *user_data)
+{
+ struct octeon_hcd *priv = cvmx_usb_to_octeon(usb);
+
+ spin_unlock(&priv->lock);
+ usb_hcd_poll_rh_status(octeon_to_hcd(priv));
+ spin_lock(&priv->lock);
+}
+
+static int octeon_usb_start(struct usb_hcd *hcd)
+{
+ struct octeon_hcd *priv = hcd_to_octeon(hcd);
+ unsigned long flags;
+
+ hcd->state = HC_STATE_RUNNING;
+ spin_lock_irqsave(&priv->lock, flags);
+ cvmx_usb_register_callback(&priv->usb, CVMX_USB_CALLBACK_PORT_CHANGED,
+ octeon_usb_port_callback, NULL);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ return 0;
+}
+
+static void octeon_usb_stop(struct usb_hcd *hcd)
+{
+ struct octeon_hcd *priv = hcd_to_octeon(hcd);
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ cvmx_usb_register_callback(&priv->usb, CVMX_USB_CALLBACK_PORT_CHANGED,
+ NULL, NULL);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ hcd->state = HC_STATE_HALT;
+}
+
+static int octeon_usb_get_frame_number(struct usb_hcd *hcd)
+{
+ struct octeon_hcd *priv = hcd_to_octeon(hcd);
+
+ return cvmx_usb_get_frame_number(&priv->usb);
+}
+
+static void octeon_usb_urb_complete_callback(cvmx_usb_state_t *usb,
+ cvmx_usb_callback_t reason,
+ cvmx_usb_complete_t status,
+ int pipe_handle,
+ int submit_handle,
+ int bytes_transferred,
+ void *user_data)
+{
+ struct octeon_hcd *priv = cvmx_usb_to_octeon(usb);
+ struct usb_hcd *hcd = octeon_to_hcd(priv);
+ struct device *dev = hcd->self.controller;
+ struct urb *urb = user_data;
+
+ urb->actual_length = bytes_transferred;
+ urb->hcpriv = NULL;
+
+ if (!list_empty(&urb->urb_list)) {
+ /*
+ * It is on the dequeue_list, but we are going to call
+ * usb_hcd_giveback_urb(), so we must clear it from
+ * the list. We got to it before the
+ * octeon_usb_urb_dequeue_work() tasklet did.
+ */
+ list_del(&urb->urb_list);
+ /* No longer on the dequeue_list. */
+ INIT_LIST_HEAD(&urb->urb_list);
+ }
+
+ /* For Isochronous transactions we need to update the URB packet status
+ list from data in our private copy */
+ if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) {
+ int i;
+ /*
+ * The pointer to the private list is stored in the setup_packet
+ * field.
+ */
+ cvmx_usb_iso_packet_t *iso_packet = (cvmx_usb_iso_packet_t *) urb->setup_packet;
+ /* Recalculate the transfer size by adding up each packet */
+ urb->actual_length = 0;
+ for (i = 0; i < urb->number_of_packets; i++) {
+ if (iso_packet[i].status == CVMX_USB_COMPLETE_SUCCESS) {
+ urb->iso_frame_desc[i].status = 0;
+ urb->iso_frame_desc[i].actual_length = iso_packet[i].length;
+ urb->actual_length += urb->iso_frame_desc[i].actual_length;
+ } else {
+ dev_dbg(dev, "ISOCHRONOUS packet=%d of %d status=%d pipe=%d submit=%d size=%d\n",
+ i, urb->number_of_packets,
+ iso_packet[i].status, pipe_handle,
+ submit_handle, iso_packet[i].length);
+ urb->iso_frame_desc[i].status = -EREMOTEIO;
+ }
+ }
+ /* Free the private list now that we don't need it anymore */
+ kfree(iso_packet);
+ urb->setup_packet = NULL;
+ }
+
+ switch (status) {
+ case CVMX_USB_COMPLETE_SUCCESS:
+ urb->status = 0;
+ break;
+ case CVMX_USB_COMPLETE_CANCEL:
+ if (urb->status == 0)
+ urb->status = -ENOENT;
+ break;
+ case CVMX_USB_COMPLETE_STALL:
+ dev_dbg(dev, "status=stall pipe=%d submit=%d size=%d\n",
+ pipe_handle, submit_handle, bytes_transferred);
+ urb->status = -EPIPE;
+ break;
+ case CVMX_USB_COMPLETE_BABBLEERR:
+ dev_dbg(dev, "status=babble pipe=%d submit=%d size=%d\n",
+ pipe_handle, submit_handle, bytes_transferred);
+ urb->status = -EPIPE;
+ break;
+ case CVMX_USB_COMPLETE_SHORT:
+ dev_dbg(dev, "status=short pipe=%d submit=%d size=%d\n",
+ pipe_handle, submit_handle, bytes_transferred);
+ urb->status = -EREMOTEIO;
+ break;
+ case CVMX_USB_COMPLETE_ERROR:
+ case CVMX_USB_COMPLETE_XACTERR:
+ case CVMX_USB_COMPLETE_DATATGLERR:
+ case CVMX_USB_COMPLETE_FRAMEERR:
+ dev_dbg(dev, "status=%d pipe=%d submit=%d size=%d\n",
+ status, pipe_handle, submit_handle, bytes_transferred);
+ urb->status = -EPROTO;
+ break;
+ }
+ spin_unlock(&priv->lock);
+ usb_hcd_giveback_urb(octeon_to_hcd(priv), urb, urb->status);
+ spin_lock(&priv->lock);
+}
+
+static int octeon_usb_urb_enqueue(struct usb_hcd *hcd,
+ struct urb *urb,
+ gfp_t mem_flags)
+{
+ struct octeon_hcd *priv = hcd_to_octeon(hcd);
+ struct device *dev = hcd->self.controller;
+ int submit_handle = -1;
+ int pipe_handle;
+ unsigned long flags;
+ cvmx_usb_iso_packet_t *iso_packet;
+ struct usb_host_endpoint *ep = urb->ep;
+
+ urb->status = 0;
+ INIT_LIST_HEAD(&urb->urb_list); /* not enqueued on dequeue_list */
+ spin_lock_irqsave(&priv->lock, flags);
+
+ if (!ep->hcpriv) {
+ cvmx_usb_transfer_t transfer_type;
+ cvmx_usb_speed_t speed;
+ int split_device = 0;
+ int split_port = 0;
+ switch (usb_pipetype(urb->pipe)) {
+ case PIPE_ISOCHRONOUS:
+ transfer_type = CVMX_USB_TRANSFER_ISOCHRONOUS;
+ break;
+ case PIPE_INTERRUPT:
+ transfer_type = CVMX_USB_TRANSFER_INTERRUPT;
+ break;
+ case PIPE_CONTROL:
+ transfer_type = CVMX_USB_TRANSFER_CONTROL;
+ break;
+ default:
+ transfer_type = CVMX_USB_TRANSFER_BULK;
+ break;
+ }
+ switch (urb->dev->speed) {
+ case USB_SPEED_LOW:
+ speed = CVMX_USB_SPEED_LOW;
+ break;
+ case USB_SPEED_FULL:
+ speed = CVMX_USB_SPEED_FULL;
+ break;
+ default:
+ speed = CVMX_USB_SPEED_HIGH;
+ break;
+ }
+ /*
+ * For slow devices on high speed ports we need to find the hub
+ * that does the speed translation so we know where to send the
+ * split transactions.
+ */
+ if (speed != CVMX_USB_SPEED_HIGH) {
+ /*
+ * Start at this device and work our way up the usb
+ * tree.
+ */
+ struct usb_device *dev = urb->dev;
+ while (dev->parent) {
+ /*
+ * If our parent is high speed then he'll
+ * receive the splits.
+ */
+ if (dev->parent->speed == USB_SPEED_HIGH) {
+ split_device = dev->parent->devnum;
+ split_port = dev->portnum;
+ break;
+ }
+ /*
+ * Move up the tree one level. If we make it all
+ * the way up the tree, then the port must not
+ * be in high speed mode and we don't need a
+ * split.
+ */
+ dev = dev->parent;
+ }
+ }
+ pipe_handle = cvmx_usb_open_pipe(&priv->usb,
+ 0,
+ usb_pipedevice(urb->pipe),
+ usb_pipeendpoint(urb->pipe),
+ speed,
+ le16_to_cpu(ep->desc.wMaxPacketSize) & 0x7ff,
+ transfer_type,
+ usb_pipein(urb->pipe) ? CVMX_USB_DIRECTION_IN : CVMX_USB_DIRECTION_OUT,
+ urb->interval,
+ (le16_to_cpu(ep->desc.wMaxPacketSize) >> 11) & 0x3,
+ split_device,
+ split_port);
+ if (pipe_handle < 0) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ dev_dbg(dev, "Failed to create pipe\n");
+ return -ENOMEM;
+ }
+ ep->hcpriv = (void *)(0x10000L + pipe_handle);
+ } else {
+ pipe_handle = 0xffff & (long)ep->hcpriv;
+ }
+
+ switch (usb_pipetype(urb->pipe)) {
+ case PIPE_ISOCHRONOUS:
+ dev_dbg(dev, "Submit isochronous to %d.%d\n",
+ usb_pipedevice(urb->pipe), usb_pipeendpoint(urb->pipe));
+ /*
+ * Allocate a structure to use for our private list of
+ * isochronous packets.
+ */
+ iso_packet = kmalloc(urb->number_of_packets * sizeof(cvmx_usb_iso_packet_t), GFP_ATOMIC);
+ if (iso_packet) {
+ int i;
+ /* Fill the list with the data from the URB */
+ for (i = 0; i < urb->number_of_packets; i++) {
+ iso_packet[i].offset = urb->iso_frame_desc[i].offset;
+ iso_packet[i].length = urb->iso_frame_desc[i].length;
+ iso_packet[i].status = CVMX_USB_COMPLETE_ERROR;
+ }
+ /*
+ * Store a pointer to the list in the URB setup_packet
+ * field. We know this currently isn't being used and
+ * this saves us a bunch of logic.
+ */
+ urb->setup_packet = (char *)iso_packet;
+ submit_handle = cvmx_usb_submit_isochronous(&priv->usb, pipe_handle,
+ urb->start_frame,
+ 0 /* flags */ ,
+ urb->number_of_packets,
+ iso_packet,
+ urb->transfer_dma,
+ urb->transfer_buffer_length,
+ octeon_usb_urb_complete_callback,
+ urb);
+ /*
+ * If submit failed we need to free our private packet
+ * list.
+ */
+ if (submit_handle < 0) {
+ urb->setup_packet = NULL;
+ kfree(iso_packet);
+ }
+ }
+ break;
+ case PIPE_INTERRUPT:
+ dev_dbg(dev, "Submit interrupt to %d.%d\n",
+ usb_pipedevice(urb->pipe), usb_pipeendpoint(urb->pipe));
+ submit_handle = cvmx_usb_submit_interrupt(&priv->usb, pipe_handle,
+ urb->transfer_dma,
+ urb->transfer_buffer_length,
+ octeon_usb_urb_complete_callback,
+ urb);
+ break;
+ case PIPE_CONTROL:
+ dev_dbg(dev, "Submit control to %d.%d\n",
+ usb_pipedevice(urb->pipe), usb_pipeendpoint(urb->pipe));
+ submit_handle = cvmx_usb_submit_control(&priv->usb, pipe_handle,
+ urb->setup_dma,
+ urb->transfer_dma,
+ urb->transfer_buffer_length,
+ octeon_usb_urb_complete_callback,
+ urb);
+ break;
+ case PIPE_BULK:
+ dev_dbg(dev, "Submit bulk to %d.%d\n",
+ usb_pipedevice(urb->pipe), usb_pipeendpoint(urb->pipe));
+ submit_handle = cvmx_usb_submit_bulk(&priv->usb, pipe_handle,
+ urb->transfer_dma,
+ urb->transfer_buffer_length,
+ octeon_usb_urb_complete_callback,
+ urb);
+ break;
+ }
+ if (submit_handle < 0) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ dev_dbg(dev, "Failed to submit\n");
+ return -ENOMEM;
+ }
+ urb->hcpriv = (void *)(long)(((submit_handle & 0xffff) << 16) | pipe_handle);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ return 0;
+}
+
+static void octeon_usb_urb_dequeue_work(unsigned long arg)
+{
+ unsigned long flags;
+ struct octeon_hcd *priv = (struct octeon_hcd *)arg;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ while (!list_empty(&priv->dequeue_list)) {
+ int pipe_handle;
+ int submit_handle;
+ struct urb *urb = container_of(priv->dequeue_list.next, struct urb, urb_list);
+ list_del(&urb->urb_list);
+ /* not enqueued on dequeue_list */
+ INIT_LIST_HEAD(&urb->urb_list);
+ pipe_handle = 0xffff & (long)urb->hcpriv;
+ submit_handle = ((long)urb->hcpriv) >> 16;
+ cvmx_usb_cancel(&priv->usb, pipe_handle, submit_handle);
+ }
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int octeon_usb_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
+{
+ struct octeon_hcd *priv = hcd_to_octeon(hcd);
+ unsigned long flags;
+
+ if (!urb->dev)
+ return -EINVAL;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ urb->status = status;
+ list_add_tail(&urb->urb_list, &priv->dequeue_list);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ tasklet_schedule(&priv->dequeue_tasklet);
+
+ return 0;
+}
+
+static void octeon_usb_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *ep)
+{
+ struct device *dev = hcd->self.controller;
+
+ if (ep->hcpriv) {
+ struct octeon_hcd *priv = hcd_to_octeon(hcd);
+ int pipe_handle = 0xffff & (long)ep->hcpriv;
+ unsigned long flags;
+ spin_lock_irqsave(&priv->lock, flags);
+ cvmx_usb_cancel_all(&priv->usb, pipe_handle);
+ if (cvmx_usb_close_pipe(&priv->usb, pipe_handle))
+ dev_dbg(dev, "Closing pipe %d failed\n", pipe_handle);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ ep->hcpriv = NULL;
+ }
+}
+
+static int octeon_usb_hub_status_data(struct usb_hcd *hcd, char *buf)
+{
+ struct octeon_hcd *priv = hcd_to_octeon(hcd);
+ cvmx_usb_port_status_t port_status;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ port_status = cvmx_usb_get_status(&priv->usb);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ buf[0] = 0;
+ buf[0] = port_status.connect_change << 1;
+
+ return (buf[0] != 0);
+}
+
+static int octeon_usb_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, char *buf, u16 wLength)
+{
+ struct octeon_hcd *priv = hcd_to_octeon(hcd);
+ struct device *dev = hcd->self.controller;
+ cvmx_usb_port_status_t usb_port_status;
+ int port_status;
+ struct usb_hub_descriptor *desc;
+ unsigned long flags;
+
+ switch (typeReq) {
+ case ClearHubFeature:
+ dev_dbg(dev, "ClearHubFeature\n");
+ switch (wValue) {
+ case C_HUB_LOCAL_POWER:
+ case C_HUB_OVER_CURRENT:
+ /* Nothing required here */
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case ClearPortFeature:
+ dev_dbg(dev, "ClearPortFeature\n");
+ if (wIndex != 1) {
+ dev_dbg(dev, " INVALID\n");
+ return -EINVAL;
+ }
+
+ switch (wValue) {
+ case USB_PORT_FEAT_ENABLE:
+ dev_dbg(dev, " ENABLE\n");
+ spin_lock_irqsave(&priv->lock, flags);
+ cvmx_usb_disable(&priv->usb);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ break;
+ case USB_PORT_FEAT_SUSPEND:
+ dev_dbg(dev, " SUSPEND\n");
+ /* Not supported on Octeon */
+ break;
+ case USB_PORT_FEAT_POWER:
+ dev_dbg(dev, " POWER\n");
+ /* Not supported on Octeon */
+ break;
+ case USB_PORT_FEAT_INDICATOR:
+ dev_dbg(dev, " INDICATOR\n");
+ /* Port inidicator not supported */
+ break;
+ case USB_PORT_FEAT_C_CONNECTION:
+ dev_dbg(dev, " C_CONNECTION\n");
+ /* Clears drivers internal connect status change flag */
+ spin_lock_irqsave(&priv->lock, flags);
+ cvmx_usb_set_status(&priv->usb, cvmx_usb_get_status(&priv->usb));
+ spin_unlock_irqrestore(&priv->lock, flags);
+ break;
+ case USB_PORT_FEAT_C_RESET:
+ dev_dbg(dev, " C_RESET\n");
+ /*
+ * Clears the driver's internal Port Reset Change flag.
+ */
+ spin_lock_irqsave(&priv->lock, flags);
+ cvmx_usb_set_status(&priv->usb, cvmx_usb_get_status(&priv->usb));
+ spin_unlock_irqrestore(&priv->lock, flags);
+ break;
+ case USB_PORT_FEAT_C_ENABLE:
+ dev_dbg(dev, " C_ENABLE\n");
+ /*
+ * Clears the driver's internal Port Enable/Disable
+ * Change flag.
+ */
+ spin_lock_irqsave(&priv->lock, flags);
+ cvmx_usb_set_status(&priv->usb, cvmx_usb_get_status(&priv->usb));
+ spin_unlock_irqrestore(&priv->lock, flags);
+ break;
+ case USB_PORT_FEAT_C_SUSPEND:
+ dev_dbg(dev, " C_SUSPEND\n");
+ /*
+ * Clears the driver's internal Port Suspend Change
+ * flag, which is set when resume signaling on the host
+ * port is complete.
+ */
+ break;
+ case USB_PORT_FEAT_C_OVER_CURRENT:
+ dev_dbg(dev, " C_OVER_CURRENT\n");
+ /* Clears the driver's overcurrent Change flag */
+ spin_lock_irqsave(&priv->lock, flags);
+ cvmx_usb_set_status(&priv->usb, cvmx_usb_get_status(&priv->usb));
+ spin_unlock_irqrestore(&priv->lock, flags);
+ break;
+ default:
+ dev_dbg(dev, " UNKNOWN\n");
+ return -EINVAL;
+ }
+ break;
+ case GetHubDescriptor:
+ dev_dbg(dev, "GetHubDescriptor\n");
+ desc = (struct usb_hub_descriptor *)buf;
+ desc->bDescLength = 9;
+ desc->bDescriptorType = 0x29;
+ desc->bNbrPorts = 1;
+ desc->wHubCharacteristics = 0x08;
+ desc->bPwrOn2PwrGood = 1;
+ desc->bHubContrCurrent = 0;
+ desc->u.hs.DeviceRemovable[0] = 0;
+ desc->u.hs.DeviceRemovable[1] = 0xff;
+ break;
+ case GetHubStatus:
+ dev_dbg(dev, "GetHubStatus\n");
+ *(__le32 *) buf = 0;
+ break;
+ case GetPortStatus:
+ dev_dbg(dev, "GetPortStatus\n");
+ if (wIndex != 1) {
+ dev_dbg(dev, " INVALID\n");
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&priv->lock, flags);
+ usb_port_status = cvmx_usb_get_status(&priv->usb);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ port_status = 0;
+
+ if (usb_port_status.connect_change) {
+ port_status |= (1 << USB_PORT_FEAT_C_CONNECTION);
+ dev_dbg(dev, " C_CONNECTION\n");
+ }
+
+ if (usb_port_status.port_enabled) {
+ port_status |= (1 << USB_PORT_FEAT_C_ENABLE);
+ dev_dbg(dev, " C_ENABLE\n");
+ }
+
+ if (usb_port_status.connected) {
+ port_status |= (1 << USB_PORT_FEAT_CONNECTION);
+ dev_dbg(dev, " CONNECTION\n");
+ }
+
+ if (usb_port_status.port_enabled) {
+ port_status |= (1 << USB_PORT_FEAT_ENABLE);
+ dev_dbg(dev, " ENABLE\n");
+ }
+
+ if (usb_port_status.port_over_current) {
+ port_status |= (1 << USB_PORT_FEAT_OVER_CURRENT);
+ dev_dbg(dev, " OVER_CURRENT\n");
+ }
+
+ if (usb_port_status.port_powered) {
+ port_status |= (1 << USB_PORT_FEAT_POWER);
+ dev_dbg(dev, " POWER\n");
+ }
+
+ if (usb_port_status.port_speed == CVMX_USB_SPEED_HIGH) {
+ port_status |= USB_PORT_STAT_HIGH_SPEED;
+ dev_dbg(dev, " HIGHSPEED\n");
+ } else if (usb_port_status.port_speed == CVMX_USB_SPEED_LOW) {
+ port_status |= (1 << USB_PORT_FEAT_LOWSPEED);
+ dev_dbg(dev, " LOWSPEED\n");
+ }
+
+ *((__le32 *) buf) = cpu_to_le32(port_status);
+ break;
+ case SetHubFeature:
+ dev_dbg(dev, "SetHubFeature\n");
+ /* No HUB features supported */
+ break;
+ case SetPortFeature:
+ dev_dbg(dev, "SetPortFeature\n");
+ if (wIndex != 1) {
+ dev_dbg(dev, " INVALID\n");
+ return -EINVAL;
+ }
+
+ switch (wValue) {
+ case USB_PORT_FEAT_SUSPEND:
+ dev_dbg(dev, " SUSPEND\n");
+ return -EINVAL;
+ case USB_PORT_FEAT_POWER:
+ dev_dbg(dev, " POWER\n");
+ return -EINVAL;
+ case USB_PORT_FEAT_RESET:
+ dev_dbg(dev, " RESET\n");
+ spin_lock_irqsave(&priv->lock, flags);
+ cvmx_usb_disable(&priv->usb);
+ if (cvmx_usb_enable(&priv->usb))
+ dev_dbg(dev, "Failed to enable the port\n");
+ spin_unlock_irqrestore(&priv->lock, flags);
+ return 0;
+ case USB_PORT_FEAT_INDICATOR:
+ dev_dbg(dev, " INDICATOR\n");
+ /* Not supported */
+ break;
+ default:
+ dev_dbg(dev, " UNKNOWN\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ dev_dbg(dev, "Unknown root hub request\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+
+static const struct hc_driver octeon_hc_driver = {
+ .description = "Octeon USB",
+ .product_desc = "Octeon Host Controller",
+ .hcd_priv_size = sizeof(struct octeon_hcd),
+ .irq = octeon_usb_irq,
+ .flags = HCD_MEMORY | HCD_USB2,
+ .start = octeon_usb_start,
+ .stop = octeon_usb_stop,
+ .urb_enqueue = octeon_usb_urb_enqueue,
+ .urb_dequeue = octeon_usb_urb_dequeue,
+ .endpoint_disable = octeon_usb_endpoint_disable,
+ .get_frame_number = octeon_usb_get_frame_number,
+ .hub_status_data = octeon_usb_hub_status_data,
+ .hub_control = octeon_usb_hub_control,
+};
+
+
+static int octeon_usb_driver_probe(struct device *dev)
+{
+ int status;
+ int usb_num = to_platform_device(dev)->id;
+ int irq = platform_get_irq(to_platform_device(dev), 0);
+ struct octeon_hcd *priv;
+ struct usb_hcd *hcd;
+ unsigned long flags;
+
+ /*
+ * Set the DMA mask to 64bits so we get buffers already translated for
+ * DMA.
+ */
+ dev->coherent_dma_mask = ~0;
+ dev->dma_mask = &dev->coherent_dma_mask;
+
+ hcd = usb_create_hcd(&octeon_hc_driver, dev, dev_name(dev));
+ if (!hcd) {
+ dev_dbg(dev, "Failed to allocate memory for HCD\n");
+ return -1;
+ }
+ hcd->uses_new_polling = 1;
+ priv = (struct octeon_hcd *)hcd->hcd_priv;
+
+ spin_lock_init(&priv->lock);
+
+ tasklet_init(&priv->dequeue_tasklet, octeon_usb_urb_dequeue_work, (unsigned long)priv);
+ INIT_LIST_HEAD(&priv->dequeue_list);
+
+ status = cvmx_usb_initialize(&priv->usb, usb_num, CVMX_USB_INITIALIZE_FLAGS_CLOCK_AUTO);
+ if (status) {
+ dev_dbg(dev, "USB initialization failed with %d\n", status);
+ kfree(hcd);
+ return -1;
+ }
+
+ /* This delay is needed for CN3010, but I don't know why... */
+ mdelay(10);
+
+ spin_lock_irqsave(&priv->lock, flags);
+ cvmx_usb_poll(&priv->usb);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ status = usb_add_hcd(hcd, irq, IRQF_SHARED);
+ if (status) {
+ dev_dbg(dev, "USB add HCD failed with %d\n", status);
+ kfree(hcd);
+ return -1;
+ }
+
+ dev_dbg(dev, "Registered HCD for port %d on irq %d\n", usb_num, irq);
+
+ return 0;
+}
+
+static int octeon_usb_driver_remove(struct device *dev)
+{
+ int status;
+ struct usb_hcd *hcd = dev_get_drvdata(dev);
+ struct octeon_hcd *priv = hcd_to_octeon(hcd);
+ unsigned long flags;
+
+ usb_remove_hcd(hcd);
+ tasklet_kill(&priv->dequeue_tasklet);
+ spin_lock_irqsave(&priv->lock, flags);
+ status = cvmx_usb_shutdown(&priv->usb);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ if (status)
+ dev_dbg(dev, "USB shutdown failed with %d\n", status);
+
+ kfree(hcd);
+
+ return 0;
+}
+
+static struct device_driver octeon_usb_driver = {
+ .name = "OcteonUSB",
+ .bus = &platform_bus_type,
+ .probe = octeon_usb_driver_probe,
+ .remove = octeon_usb_driver_remove,
+};
+
+
+#define MAX_USB_PORTS 10
+static struct platform_device *pdev_glob[MAX_USB_PORTS];
+static int octeon_usb_registered;
+static int __init octeon_usb_module_init(void)
+{
+ int num_devices = cvmx_usb_get_num_ports();
+ int device;
+
+ if (usb_disabled() || num_devices == 0)
+ return -ENODEV;
+
+ if (driver_register(&octeon_usb_driver))
+ return -ENOMEM;
+
+ octeon_usb_registered = 1;
+
+ /*
+ * Only cn52XX and cn56XX have DWC_OTG USB hardware and the
+ * IOB priority registers. Under heavy network load USB
+ * hardware can be starved by the IOB causing a crash. Give
+ * it a priority boost if it has been waiting more than 400
+ * cycles to avoid this situation.
+ *
+ * Testing indicates that a cnt_val of 8192 is not sufficient,
+ * but no failures are seen with 4096. We choose a value of
+ * 400 to give a safety factor of 10.
+ */
+ if (OCTEON_IS_MODEL(OCTEON_CN52XX) || OCTEON_IS_MODEL(OCTEON_CN56XX)) {
+ union cvmx_iob_n2c_l2c_pri_cnt pri_cnt;
+
+ pri_cnt.u64 = 0;
+ pri_cnt.s.cnt_enb = 1;
+ pri_cnt.s.cnt_val = 400;
+ cvmx_write_csr(CVMX_IOB_N2C_L2C_PRI_CNT, pri_cnt.u64);
+ }
+
+ for (device = 0; device < num_devices; device++) {
+ struct resource irq_resource;
+ struct platform_device *pdev;
+ memset(&irq_resource, 0, sizeof(irq_resource));
+ irq_resource.start = (device == 0) ? OCTEON_IRQ_USB0 : OCTEON_IRQ_USB1;
+ irq_resource.end = irq_resource.start;
+ irq_resource.flags = IORESOURCE_IRQ;
+ pdev = platform_device_register_simple((char *)octeon_usb_driver. name, device, &irq_resource, 1);
+ if (IS_ERR(pdev)) {
+ driver_unregister(&octeon_usb_driver);
+ octeon_usb_registered = 0;
+ return PTR_ERR(pdev);
+ }
+ if (device < MAX_USB_PORTS)
+ pdev_glob[device] = pdev;
+
+ }
+ return 0;
+}
+
+static void __exit octeon_usb_module_cleanup(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_USB_PORTS; i++)
+ if (pdev_glob[i]) {
+ platform_device_unregister(pdev_glob[i]);
+ pdev_glob[i] = NULL;
+ }
+ if (octeon_usb_registered)
+ driver_unregister(&octeon_usb_driver);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Cavium Networks <support@caviumnetworks.com>");
+MODULE_DESCRIPTION("Cavium Networks Octeon USB Host driver.");
+module_init(octeon_usb_module_init);
+module_exit(octeon_usb_module_cleanup);
diff --git a/drivers/staging/ozwpan/Kbuild b/drivers/staging/ozwpan/Kbuild
index 6cc84cb3f0a6..1766a268d5f6 100644
--- a/drivers/staging/ozwpan/Kbuild
+++ b/drivers/staging/ozwpan/Kbuild
@@ -13,7 +13,6 @@ ozwpan-y := \
ozproto.o \
ozcdev.o \
ozurbparanoia.o \
- oztrace.o \
- ozevent.o
+ oztrace.o
diff --git a/drivers/staging/ozwpan/ozappif.h b/drivers/staging/ozwpan/ozappif.h
index 449a6ba82337..ea1b271fdcda 100644
--- a/drivers/staging/ozwpan/ozappif.h
+++ b/drivers/staging/ozwpan/ozappif.h
@@ -6,8 +6,6 @@
#ifndef _OZAPPIF_H
#define _OZAPPIF_H
-#include "ozeventdef.h"
-
#define OZ_IOCTL_MAGIC 0xf4
struct oz_mac_addr {
diff --git a/drivers/staging/ozwpan/ozcdev.c b/drivers/staging/ozwpan/ozcdev.c
index 27d06666c81a..374fdc398641 100644
--- a/drivers/staging/ozwpan/ozcdev.c
+++ b/drivers/staging/ozwpan/ozcdev.c
@@ -18,7 +18,6 @@
#include "ozeltbuf.h"
#include "ozpd.h"
#include "ozproto.h"
-#include "ozevent.h"
#include "ozcdev.h"
/*------------------------------------------------------------------------------
*/
@@ -355,11 +354,13 @@ int oz_cdev_register(void)
g_oz_class = class_create(THIS_MODULE, "ozmo_wpan");
if (IS_ERR(g_oz_class)) {
oz_trace("Failed to register ozmo_wpan class\n");
+ err = PTR_ERR(g_oz_class);
goto out1;
}
dev = device_create(g_oz_class, NULL, g_cdev.devnum, NULL, "ozwpan");
if (IS_ERR(dev)) {
oz_trace("Failed to create sysfs entry for cdev\n");
+ err = PTR_ERR(dev);
goto out1;
}
return 0;
@@ -388,7 +389,6 @@ int oz_cdev_deregister(void)
*/
int oz_cdev_init(void)
{
- oz_event_log(OZ_EVT_SERVICE, 1, OZ_APPID_SERIAL, NULL, 0);
oz_app_enable(OZ_APPID_SERIAL, 1);
return 0;
}
@@ -397,7 +397,6 @@ int oz_cdev_init(void)
*/
void oz_cdev_term(void)
{
- oz_event_log(OZ_EVT_SERVICE, 2, OZ_APPID_SERIAL, NULL, 0);
oz_app_enable(OZ_APPID_SERIAL, 0);
}
/*------------------------------------------------------------------------------
@@ -407,7 +406,6 @@ int oz_cdev_start(struct oz_pd *pd, int resume)
{
struct oz_serial_ctx *ctx;
struct oz_serial_ctx *old_ctx;
- oz_event_log(OZ_EVT_SERVICE, 3, OZ_APPID_SERIAL, NULL, resume);
if (resume) {
oz_trace("Serial service resumed.\n");
return 0;
@@ -443,7 +441,6 @@ int oz_cdev_start(struct oz_pd *pd, int resume)
void oz_cdev_stop(struct oz_pd *pd, int pause)
{
struct oz_serial_ctx *ctx;
- oz_event_log(OZ_EVT_SERVICE, 4, OZ_APPID_SERIAL, NULL, pause);
if (pause) {
oz_trace("Serial service paused.\n");
return;
diff --git a/drivers/staging/ozwpan/ozconfig.h b/drivers/staging/ozwpan/ozconfig.h
index 43e6373a009c..087c322d2de0 100644
--- a/drivers/staging/ozwpan/ozconfig.h
+++ b/drivers/staging/ozwpan/ozconfig.h
@@ -12,7 +12,6 @@
/* #define WANT_URB_PARANOIA */
/* #define WANT_PRE_2_6_39 */
-#define WANT_EVENT_TRACE
/* These defines determine what verbose trace is displayed. */
#ifdef WANT_VERBOSE_TRACE
diff --git a/drivers/staging/ozwpan/ozevent.c b/drivers/staging/ozwpan/ozevent.c
deleted file mode 100644
index 77e86753610d..000000000000
--- a/drivers/staging/ozwpan/ozevent.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/* -----------------------------------------------------------------------------
- * Copyright (c) 2011 Ozmo Inc
- * Released under the GNU General Public License Version 2 (GPLv2).
- * -----------------------------------------------------------------------------
- */
-#include "ozconfig.h"
-#ifdef WANT_EVENT_TRACE
-#include <linux/module.h>
-#include <linux/debugfs.h>
-#include <linux/jiffies.h>
-#include <linux/uaccess.h>
-#include "oztrace.h"
-#include "ozevent.h"
-#include "ozappif.h"
-/*------------------------------------------------------------------------------
- * Although the event mask is logically part of the oz_evtdev structure, it is
- * needed outside of this file so define it separately to avoid the need to
- * export definition of struct oz_evtdev.
- */
-u32 g_evt_mask;
-/*------------------------------------------------------------------------------
- */
-#define OZ_MAX_EVTS 2048 /* Must be power of 2 */
-struct oz_evtdev {
- struct dentry *root_dir;
- int evt_in;
- int evt_out;
- int missed_events;
- int present;
- atomic_t users;
- spinlock_t lock;
- struct oz_event evts[OZ_MAX_EVTS];
-};
-
-static struct oz_evtdev g_evtdev;
-
-/*------------------------------------------------------------------------------
- * Context: process
- */
-void oz_event_init(void)
-{
- /* Because g_evtdev is static external all fields initially zero so no
- * need to reinitialized those.
- */
- oz_trace("Event tracing initialized\n");
- spin_lock_init(&g_evtdev.lock);
- atomic_set(&g_evtdev.users, 0);
-}
-/*------------------------------------------------------------------------------
- * Context: process
- */
-void oz_event_term(void)
-{
- oz_trace("Event tracing terminated\n");
-}
-/*------------------------------------------------------------------------------
- * Context: any
- */
-void oz_event_log2(u8 evt, u8 ctx1, u16 ctx2, void *ctx3, unsigned ctx4)
-{
- unsigned long irqstate;
- int ix;
- spin_lock_irqsave(&g_evtdev.lock, irqstate);
- ix = (g_evtdev.evt_in + 1) & (OZ_MAX_EVTS - 1);
- if (ix != g_evtdev.evt_out) {
- struct oz_event *e = &g_evtdev.evts[g_evtdev.evt_in];
- e->jiffies = jiffies;
- e->evt = evt;
- e->ctx1 = ctx1;
- e->ctx2 = ctx2;
- e->ctx3 = (__u32)(unsigned long)ctx3;
- e->ctx4 = ctx4;
- g_evtdev.evt_in = ix;
- } else {
- g_evtdev.missed_events++;
- }
- spin_unlock_irqrestore(&g_evtdev.lock, irqstate);
-}
-/*------------------------------------------------------------------------------
- * Context: process
- */
-#ifdef CONFIG_DEBUG_FS
-static void oz_events_clear(struct oz_evtdev *dev)
-{
- unsigned long irqstate;
- oz_trace("Clearing events\n");
- spin_lock_irqsave(&dev->lock, irqstate);
- dev->evt_in = dev->evt_out = 0;
- dev->missed_events = 0;
- spin_unlock_irqrestore(&dev->lock, irqstate);
-}
-/*------------------------------------------------------------------------------
- * Context: process
- */
-static int oz_events_open(struct inode *inode, struct file *filp)
-{
- oz_trace("oz_evt_open()\n");
- oz_trace("Open flags: 0x%x\n", filp->f_flags);
- if (atomic_add_return(1, &g_evtdev.users) == 1) {
- oz_events_clear(&g_evtdev);
- return nonseekable_open(inode, filp);
- } else {
- atomic_dec(&g_evtdev.users);
- return -EBUSY;
- }
-}
-/*------------------------------------------------------------------------------
- * Context: process
- */
-static int oz_events_release(struct inode *inode, struct file *filp)
-{
- oz_events_clear(&g_evtdev);
- atomic_dec(&g_evtdev.users);
- g_evt_mask = 0;
- oz_trace("oz_evt_release()\n");
- return 0;
-}
-/*------------------------------------------------------------------------------
- * Context: process
- */
-static ssize_t oz_events_read(struct file *filp, char __user *buf, size_t count,
- loff_t *fpos)
-{
- struct oz_evtdev *dev = &g_evtdev;
- int rc = 0;
- int nb_evts = count / sizeof(struct oz_event);
- int n;
- int sz;
-
- n = dev->evt_in - dev->evt_out;
- if (n < 0)
- n += OZ_MAX_EVTS;
- if (nb_evts > n)
- nb_evts = n;
- if (nb_evts == 0)
- goto out;
- n = OZ_MAX_EVTS - dev->evt_out;
- if (n > nb_evts)
- n = nb_evts;
- sz = n * sizeof(struct oz_event);
- if (copy_to_user(buf, &dev->evts[dev->evt_out], sz)) {
- rc = -EFAULT;
- goto out;
- }
- if (n == nb_evts)
- goto out2;
- n = nb_evts - n;
- if (copy_to_user(buf + sz, dev->evts, n * sizeof(struct oz_event))) {
- rc = -EFAULT;
- goto out;
- }
-out2:
- dev->evt_out = (dev->evt_out + nb_evts) & (OZ_MAX_EVTS - 1);
- rc = nb_evts * sizeof(struct oz_event);
-out:
- return rc;
-}
-/*------------------------------------------------------------------------------
- */
-static const struct file_operations oz_events_fops = {
- .owner = THIS_MODULE,
- .open = oz_events_open,
- .release = oz_events_release,
- .read = oz_events_read,
-};
-/*------------------------------------------------------------------------------
- * Context: process
- */
-void oz_debugfs_init(void)
-{
- struct dentry *parent;
-
- parent = debugfs_create_dir("ozwpan", NULL);
- if (parent == NULL) {
- oz_trace("Failed to create debugfs directory ozmo\n");
- return;
- } else {
- g_evtdev.root_dir = parent;
- if (debugfs_create_file("events", S_IRUSR, parent, NULL,
- &oz_events_fops) == NULL)
- oz_trace("Failed to create file ozmo/events\n");
- if (debugfs_create_x32("event_mask", S_IRUSR | S_IWUSR, parent,
- &g_evt_mask) == NULL)
- oz_trace("Failed to create file ozmo/event_mask\n");
- }
-}
-/*------------------------------------------------------------------------------
- * Context: process
- */
-void oz_debugfs_remove(void)
-{
- debugfs_remove_recursive(g_evtdev.root_dir);
-}
-#endif /* CONFIG_DEBUG_FS */
-#endif /* WANT_EVENT_TRACE */
diff --git a/drivers/staging/ozwpan/ozevent.h b/drivers/staging/ozwpan/ozevent.h
deleted file mode 100644
index 32f6f9859c41..000000000000
--- a/drivers/staging/ozwpan/ozevent.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* -----------------------------------------------------------------------------
- * Copyright (c) 2011 Ozmo Inc
- * Released under the GNU General Public License Version 2 (GPLv2).
- * -----------------------------------------------------------------------------
- */
-#ifndef _OZEVENT_H
-#define _OZEVENT_H
-#include "ozconfig.h"
-#include "ozeventdef.h"
-
-#ifdef WANT_EVENT_TRACE
-extern u32 g_evt_mask;
-void oz_event_init(void);
-void oz_event_term(void);
-void oz_event_log2(u8 evt, u8 ctx1, u16 ctx2, void *ctx3, unsigned ctx4);
-void oz_debugfs_init(void);
-void oz_debugfs_remove(void);
-#define oz_event_log(__evt, __ctx1, __ctx2, __ctx3, __ctx4) \
- do { \
- if ((1<<(__evt)) & g_evt_mask) \
- oz_event_log2(__evt, __ctx1, __ctx2, __ctx3, __ctx4); \
- } while (0)
-
-#else
-#define oz_event_init()
-#define oz_event_term()
-#define oz_event_log(__evt, __ctx1, __ctx2, __ctx3, __ctx4)
-#define oz_debugfs_init()
-#define oz_debugfs_remove()
-#endif /* WANT_EVENT_TRACE */
-
-#endif /* _OZEVENT_H */
diff --git a/drivers/staging/ozwpan/ozeventdef.h b/drivers/staging/ozwpan/ozeventdef.h
deleted file mode 100644
index 4b938981671a..000000000000
--- a/drivers/staging/ozwpan/ozeventdef.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* -----------------------------------------------------------------------------
- * Copyright (c) 2011 Ozmo Inc
- * Released under the GNU General Public License Version 2 (GPLv2).
- * -----------------------------------------------------------------------------
- */
-#ifndef _OZEVENTDEF_H
-#define _OZEVENTDEF_H
-
-#define OZ_EVT_RX_FRAME 0
-#define OZ_EVT_RX_PROCESS 1
-#define OZ_EVT_TX_FRAME 2
-#define OZ_EVT_TX_ISOC 3
-#define OZ_EVT_URB_SUBMIT 4
-#define OZ_EVT_URB_DONE 5
-#define OZ_EVT_URB_CANCEL 6
-#define OZ_EVT_CTRL_REQ 7
-#define OZ_EVT_CTRL_CNF 8
-#define OZ_EVT_CTRL_LOCAL 9
-#define OZ_EVT_CONNECT_REQ 10
-#define OZ_EVT_CONNECT_RSP 11
-#define OZ_EVT_EP_CREDIT 12
-#define OZ_EVT_EP_BUFFERING 13
-#define OZ_EVT_TX_ISOC_DONE 14
-#define OZ_EVT_TX_ISOC_DROP 15
-#define OZ_EVT_TIMER_CTRL 16
-#define OZ_EVT_TIMER 17
-#define OZ_EVT_PD_STATE 18
-#define OZ_EVT_SERVICE 19
-#define OZ_EVT_DEBUG 20
-
-struct oz_event {
- __u32 jiffies;
- __u8 evt;
- __u8 ctx1;
- __u16 ctx2;
- __u32 ctx3;
- __u32 ctx4;
-};
-
-#endif /* _OZEVENTDEF_H */
diff --git a/drivers/staging/ozwpan/ozhcd.c b/drivers/staging/ozwpan/ozhcd.c
index 8ac26f584fd4..d68d63a2e683 100644
--- a/drivers/staging/ozwpan/ozhcd.c
+++ b/drivers/staging/ozwpan/ozhcd.c
@@ -35,7 +35,6 @@
#include "ozusbif.h"
#include "oztrace.h"
#include "ozurbparanoia.h"
-#include "ozevent.h"
#include "ozhcd.h"
/*------------------------------------------------------------------------------
* Number of units of buffering to capture for an isochronous IN endpoint before
@@ -381,7 +380,6 @@ static void oz_complete_urb(struct usb_hcd *hcd, struct urb *urb,
jiffies, urb, status, jiffies-submit_jiffies,
jiffies-last_time, atomic_read(&g_pending_urbs));
last_time = jiffies;
- oz_event_log(OZ_EVT_URB_DONE, 0, 0, urb, status);
usb_hcd_giveback_urb(hcd, urb, status);
}
spin_lock(&g_tasklet_lock);
@@ -508,8 +506,6 @@ static int oz_enqueue_ep_urb(struct oz_port *port, u8 ep_addr, int in_dir,
if (!in_dir && ep_addr && (ep->credit < 0)) {
ep->last_jiffies = jiffies;
ep->credit = 0;
- oz_event_log(OZ_EVT_EP_CREDIT, ep->ep_num,
- 0, NULL, ep->credit);
}
} else {
err = -EPIPE;
@@ -766,7 +762,6 @@ void oz_hcd_get_desc_cnf(void *hport, u8 req_id, int status, const u8 *desc,
struct urb *urb;
int err = 0;
- oz_event_log(OZ_EVT_CTRL_CNF, 0, req_id, NULL, status);
oz_trace("oz_hcd_get_desc_cnf length = %d offs = %d tot_size = %d\n",
length, offset, total_size);
urb = oz_find_urb_by_id(port, 0, req_id);
@@ -905,7 +900,6 @@ void oz_hcd_control_cnf(void *hport, u8 req_id, u8 rcode, const u8 *data,
unsigned windex;
unsigned wvalue;
- oz_event_log(OZ_EVT_CTRL_CNF, 0, req_id, NULL, rcode);
oz_trace("oz_hcd_control_cnf rcode=%u len=%d\n", rcode, data_len);
urb = oz_find_urb_by_id(port, 0, req_id);
if (!urb) {
@@ -1059,8 +1053,6 @@ int oz_hcd_heartbeat(void *hport)
ep->credit += jiffies_to_msecs(now - ep->last_jiffies);
if (ep->credit > ep->credit_ceiling)
ep->credit = ep->credit_ceiling;
- oz_event_log(OZ_EVT_EP_CREDIT, ep->ep_num, 0, NULL,
- ep->credit);
ep->last_jiffies = now;
while (ep->credit && !list_empty(&ep->urb_list)) {
urbl = list_first_entry(&ep->urb_list,
@@ -1069,8 +1061,6 @@ int oz_hcd_heartbeat(void *hport)
if ((ep->credit + 1) < urb->number_of_packets)
break;
ep->credit -= urb->number_of_packets;
- oz_event_log(OZ_EVT_EP_CREDIT, ep->ep_num, 0, NULL,
- ep->credit);
list_move_tail(&urbl->link, &xfr_list);
}
}
@@ -1098,19 +1088,12 @@ int oz_hcd_heartbeat(void *hport)
if (ep->buffered_units >= OZ_IN_BUFFERING_UNITS) {
ep->flags &= ~OZ_F_EP_BUFFERING;
ep->credit = 0;
- oz_event_log(OZ_EVT_EP_CREDIT,
- ep->ep_num | USB_DIR_IN,
- 0, NULL, ep->credit);
ep->last_jiffies = now;
ep->start_frame = 0;
- oz_event_log(OZ_EVT_EP_BUFFERING,
- ep->ep_num | USB_DIR_IN, 0, NULL, 0);
}
continue;
}
ep->credit += jiffies_to_msecs(now - ep->last_jiffies);
- oz_event_log(OZ_EVT_EP_CREDIT, ep->ep_num | USB_DIR_IN,
- 0, NULL, ep->credit);
ep->last_jiffies = now;
while (!list_empty(&ep->urb_list)) {
struct oz_urb_link *urbl =
@@ -1154,8 +1137,6 @@ int oz_hcd_heartbeat(void *hport)
ep->start_frame += urb->number_of_packets;
list_move_tail(&urbl->link, &xfr_list);
ep->credit -= urb->number_of_packets;
- oz_event_log(OZ_EVT_EP_CREDIT, ep->ep_num | USB_DIR_IN,
- 0, NULL, ep->credit);
}
}
if (!list_empty(&port->isoc_out_ep) || !list_empty(&port->isoc_in_ep))
@@ -1243,12 +1224,10 @@ static int oz_build_endpoints_for_interface(struct usb_hcd *hcd,
if ((ep->attrib & USB_ENDPOINT_XFERTYPE_MASK)
== USB_ENDPOINT_XFER_ISOC) {
oz_trace("wMaxPacketSize = %d\n",
- hep->desc.wMaxPacketSize);
+ usb_endpoint_maxp(&hep->desc));
ep->credit_ceiling = 200;
if (ep_addr & USB_ENDPOINT_DIR_MASK) {
ep->flags |= OZ_F_EP_BUFFERING;
- oz_event_log(OZ_EVT_EP_BUFFERING,
- ep->ep_num | USB_DIR_IN, 1, NULL, 0);
} else {
ep->flags |= OZ_F_EP_HAVE_STREAM;
if (oz_usb_stream_create(port->hpd, ep_num))
@@ -1455,8 +1434,6 @@ static void oz_process_ep0_urb(struct oz_hcd *ozhcd, struct urb *urb,
oz_trace("USB_REQ_GET_DESCRIPTOR - req\n");
break;
case USB_REQ_SET_ADDRESS:
- oz_event_log(OZ_EVT_CTRL_LOCAL, setup->bRequest,
- 0, NULL, setup->bRequestType);
oz_trace("USB_REQ_SET_ADDRESS - req\n");
oz_trace("Port %d address is 0x%x\n", ozhcd->conn_port,
(u8)le16_to_cpu(setup->wValue));
@@ -1477,8 +1454,6 @@ static void oz_process_ep0_urb(struct oz_hcd *ozhcd, struct urb *urb,
/* We short circuit this case and reply directly since
* we have the selected configuration number cached.
*/
- oz_event_log(OZ_EVT_CTRL_LOCAL, setup->bRequest, 0,
- NULL, setup->bRequestType);
oz_trace("USB_REQ_GET_CONFIGURATION - reply now\n");
if (urb->transfer_buffer_length >= 1) {
urb->actual_length = 1;
@@ -1493,8 +1468,6 @@ static void oz_process_ep0_urb(struct oz_hcd *ozhcd, struct urb *urb,
/* We short circuit this case and reply directly since
* we have the selected interface alternative cached.
*/
- oz_event_log(OZ_EVT_CTRL_LOCAL, setup->bRequest, 0,
- NULL, setup->bRequestType);
oz_trace("USB_REQ_GET_INTERFACE - reply now\n");
if (urb->transfer_buffer_length >= 1) {
urb->actual_length = 1;
@@ -1744,20 +1717,6 @@ static void oz_hcd_shutdown(struct usb_hcd *hcd)
oz_trace("oz_hcd_shutdown()\n");
}
/*------------------------------------------------------------------------------
- * Context: any
- */
-#ifdef WANT_EVENT_TRACE
-static u8 oz_get_irq_ctx(void)
-{
- u8 irq_info = 0;
- if (in_interrupt())
- irq_info |= 1;
- if (in_irq())
- irq_info |= 2;
- return irq_info;
-}
-#endif /* WANT_EVENT_TRACE */
-/*------------------------------------------------------------------------------
* Called to queue an urb for the device.
* This function should return a non-zero error code if it fails the urb but
* should not call usb_hcd_giveback_urb().
@@ -1774,8 +1733,6 @@ static int oz_hcd_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
struct oz_urb_link *urbl;
oz_trace2(OZ_TRACE_URB, "%lu: oz_hcd_urb_enqueue(%p)\n",
jiffies, urb);
- oz_event_log(OZ_EVT_URB_SUBMIT, oz_get_irq_ctx(),
- (u16)urb->number_of_packets, urb, urb->pipe);
if (unlikely(ozhcd == NULL)) {
oz_trace2(OZ_TRACE_URB, "%lu: Refused urb(%p) not ozhcd.\n",
jiffies, urb);
@@ -1835,10 +1792,6 @@ static struct oz_urb_link *oz_remove_urb(struct oz_endpoint *ep,
ep->credit -= urb->number_of_packets;
if (ep->credit < 0)
ep->credit = 0;
- oz_event_log(OZ_EVT_EP_CREDIT,
- usb_pipein(urb->pipe) ?
- (ep->ep_num | USB_DIR_IN) : ep->ep_num,
- 0, NULL, ep->credit);
}
return urbl;
}
diff --git a/drivers/staging/ozwpan/ozmain.c b/drivers/staging/ozwpan/ozmain.c
index 57a0cbd58551..51fe9e98c351 100644
--- a/drivers/staging/ozwpan/ozmain.c
+++ b/drivers/staging/ozwpan/ozmain.c
@@ -15,7 +15,6 @@
#include "ozproto.h"
#include "ozcdev.h"
#include "oztrace.h"
-#include "ozevent.h"
/*------------------------------------------------------------------------------
* The name of the 802.11 mac device. Empty string is the default value but a
* value can be supplied as a parameter to the module. An empty string means
@@ -28,14 +27,10 @@ static char *g_net_dev = "";
*/
static int __init ozwpan_init(void)
{
- oz_event_init();
oz_cdev_register();
oz_protocol_init(g_net_dev);
oz_app_enable(OZ_APPID_USB, 1);
oz_apps_init();
-#ifdef CONFIG_DEBUG_FS
- oz_debugfs_init();
-#endif
return 0;
}
/*------------------------------------------------------------------------------
@@ -46,10 +41,6 @@ static void __exit ozwpan_exit(void)
oz_protocol_term();
oz_apps_term();
oz_cdev_deregister();
- oz_event_term();
-#ifdef CONFIG_DEBUG_FS
- oz_debugfs_remove();
-#endif
}
/*------------------------------------------------------------------------------
*/
diff --git a/drivers/staging/ozwpan/ozpd.c b/drivers/staging/ozwpan/ozpd.c
index f8b9da080c4b..d67dff2430ad 100644
--- a/drivers/staging/ozwpan/ozpd.c
+++ b/drivers/staging/ozwpan/ozpd.c
@@ -15,7 +15,6 @@
#include "ozpd.h"
#include "ozproto.h"
#include "oztrace.h"
-#include "ozevent.h"
#include "ozcdev.h"
#include "ozusbsvc.h"
#include <asm/unaligned.h>
@@ -121,7 +120,6 @@ static void oz_def_app_rx(struct oz_pd *pd, struct oz_elt *elt)
void oz_pd_set_state(struct oz_pd *pd, unsigned state)
{
pd->state = state;
- oz_event_log(OZ_EVT_PD_STATE, 0, 0, NULL, state);
#ifdef WANT_TRACE
switch (state) {
case OZ_PD_S_IDLE:
@@ -544,7 +542,6 @@ static int oz_send_next_queued_frame(struct oz_pd *pd, int more_data)
if (dev_queue_xmit(skb) < 0) {
oz_trace2(OZ_TRACE_TX_FRAMES,
"Dropping ISOC Frame\n");
- oz_event_log(OZ_EVT_TX_ISOC_DROP, 0, 0, NULL, 0);
return -1;
}
atomic_inc(&g_submitted_isoc);
@@ -555,7 +552,6 @@ static int oz_send_next_queued_frame(struct oz_pd *pd, int more_data)
} else {
kfree_skb(skb);
oz_trace2(OZ_TRACE_TX_FRAMES, "Dropping ISOC Frame>\n");
- oz_event_log(OZ_EVT_TX_ISOC_DROP, 0, 0, NULL, 0);
return -1;
}
}
@@ -567,10 +563,6 @@ static int oz_send_next_queued_frame(struct oz_pd *pd, int more_data)
oz_set_more_bit(skb);
oz_trace2(OZ_TRACE_TX_FRAMES, "TX frame PN=0x%x\n", f->hdr.pkt_num);
if (skb) {
- oz_event_log(OZ_EVT_TX_FRAME,
- 0,
- (((u16)f->hdr.control)<<8)|f->hdr.last_pkt_num,
- NULL, f->hdr.pkt_num);
if (dev_queue_xmit(skb) < 0)
return -1;
@@ -659,7 +651,6 @@ static int oz_send_isoc_frame(struct oz_pd *pd)
memcpy(elt, ei->data, ei->length);
elt = oz_next_elt(elt);
}
- oz_event_log(OZ_EVT_TX_ISOC, 0, 0, NULL, 0);
dev_queue_xmit(skb);
oz_elt_info_free_chain(&pd->elt_buff, &list);
return 0;
@@ -768,8 +759,6 @@ int oz_isoc_stream_delete(struct oz_pd *pd, u8 ep_num)
static void oz_isoc_destructor(struct sk_buff *skb)
{
atomic_dec(&g_submitted_isoc);
- oz_event_log(OZ_EVT_TX_ISOC_DONE, atomic_read(&g_submitted_isoc),
- 0, skb, 0);
}
/*------------------------------------------------------------------------------
* Context: softirq
@@ -863,25 +852,19 @@ int oz_send_isoc_unit(struct oz_pd *pd, u8 ep_num, const u8 *data, int len)
oz_trace2(OZ_TRACE_TX_FRAMES,
"Added ISOC Frame to Tx Queue isoc_nb= %d, nb= %d\n",
pd->nb_queued_isoc_frames, pd->nb_queued_frames);
- oz_event_log(OZ_EVT_TX_ISOC, nb_units, iso.frame_number,
- skb, atomic_read(&g_submitted_isoc));
return 0;
}
/*In ANYTIME mode Xmit unit immediately*/
if (atomic_read(&g_submitted_isoc) < OZ_MAX_SUBMITTED_ISOC) {
atomic_inc(&g_submitted_isoc);
- oz_event_log(OZ_EVT_TX_ISOC, nb_units, iso.frame_number,
- skb, atomic_read(&g_submitted_isoc));
- if (dev_queue_xmit(skb) < 0) {
- oz_event_log(OZ_EVT_TX_ISOC_DROP, 0, 0, NULL, 0);
+ if (dev_queue_xmit(skb) < 0)
return -1;
- } else
+ else
return 0;
}
-out: oz_event_log(OZ_EVT_TX_ISOC_DROP, 0, 0, NULL, 0);
- kfree_skb(skb);
+out: kfree_skb(skb);
return -1;
}
diff --git a/drivers/staging/ozwpan/ozproto.c b/drivers/staging/ozwpan/ozproto.c
index 3badf1537adb..79ac7b51d5b2 100644
--- a/drivers/staging/ozwpan/ozproto.c
+++ b/drivers/staging/ozwpan/ozproto.c
@@ -18,7 +18,6 @@
#include "ozusbsvc.h"
#include "oztrace.h"
#include "ozappif.h"
-#include "ozevent.h"
#include <asm/unaligned.h>
#include <linux/uaccess.h>
#include <net/psnap.h>
@@ -116,7 +115,6 @@ static void oz_send_conn_rsp(struct oz_pd *pd, u8 status)
oz_hdr->control = (OZ_PROTOCOL_VERSION<<OZ_VERSION_SHIFT);
oz_hdr->last_pkt_num = 0;
put_unaligned(0, &oz_hdr->pkt_num);
- oz_event_log(OZ_EVT_CONNECT_RSP, 0, 0, NULL, 0);
elt->type = OZ_ELT_CONNECT_RSP;
elt->length = sizeof(struct oz_elt_connect_rsp);
memset(body, 0, sizeof(struct oz_elt_connect_rsp));
@@ -345,9 +343,6 @@ static void oz_rx_frame(struct sk_buff *skb)
int dup = 0;
u32 pkt_num;
- oz_event_log(OZ_EVT_RX_PROCESS, 0,
- (((u16)oz_hdr->control)<<8)|oz_hdr->last_pkt_num,
- NULL, oz_hdr->pkt_num);
oz_trace2(OZ_TRACE_RX_FRAMES,
"RX frame PN=0x%x LPN=0x%x control=0x%x\n",
oz_hdr->pkt_num, oz_hdr->last_pkt_num, oz_hdr->control);
@@ -402,7 +397,6 @@ static void oz_rx_frame(struct sk_buff *skb)
break;
switch (elt->type) {
case OZ_ELT_CONNECT_REQ:
- oz_event_log(OZ_EVT_CONNECT_REQ, 0, 0, NULL, 0);
oz_trace("RX: OZ_ELT_CONNECT_REQ\n");
pd = oz_connect_req(pd, elt, src_addr, skb->dev);
break;
@@ -534,7 +528,6 @@ static void oz_protocol_timer(unsigned long arg)
/* This happens if we remove the current timer but can't stop
* the timer from firing. In this case just get out.
*/
- oz_event_log(OZ_EVT_TIMER, 0, 0, NULL, 0);
spin_unlock_bh(&g_polling_lock);
return;
}
@@ -545,7 +538,6 @@ static void oz_protocol_timer(unsigned long arg)
spin_unlock_bh(&g_polling_lock);
do {
pd = t->pd;
- oz_event_log(OZ_EVT_TIMER, 0, t->type, NULL, 0);
oz_pd_handle_timer(pd, t->type);
spin_lock_bh(&g_polling_lock);
if (g_timer_pool_count < OZ_MAX_TIMER_POOL_SIZE) {
@@ -582,14 +574,8 @@ static void oz_protocol_timer_start(void)
g_cur_timer =
container_of(g_timer_list.next, struct oz_timer, link);
if (g_timer_state == OZ_TIMER_SET) {
- oz_event_log(OZ_EVT_TIMER_CTRL, 3,
- (u16)g_cur_timer->type, NULL,
- (unsigned)g_cur_timer->due_time);
mod_timer(&g_timer, g_cur_timer->due_time);
} else {
- oz_event_log(OZ_EVT_TIMER_CTRL, 4,
- (u16)g_cur_timer->type, NULL,
- (unsigned)g_cur_timer->due_time);
g_timer.expires = g_cur_timer->due_time;
g_timer.function = oz_protocol_timer;
g_timer.data = 0;
@@ -610,7 +596,6 @@ void oz_timer_add(struct oz_pd *pd, int type, unsigned long due_time,
struct list_head *e;
struct oz_timer *t = NULL;
int restart_needed = 0;
- oz_event_log(OZ_EVT_TIMER_CTRL, 1, (u16)type, NULL, (unsigned)due_time);
spin_lock(&g_polling_lock);
if (remove) {
list_for_each(e, &g_timer_list) {
@@ -673,7 +658,6 @@ void oz_timer_delete(struct oz_pd *pd, int type)
struct oz_timer *n;
int restart_needed = 0;
int release = 0;
- oz_event_log(OZ_EVT_TIMER_CTRL, 2, (u16)type, NULL, 0);
spin_lock(&g_polling_lock);
list_for_each_entry_safe(t, n, &g_timer_list, link) {
if ((t->pd == pd) && ((type == 0) || (t->type == type))) {
@@ -770,7 +754,6 @@ void oz_app_enable(int app_id, int enable)
static int oz_pkt_recv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- oz_event_log(OZ_EVT_RX_FRAME, 0, 0, NULL, 0);
skb = skb_share_check(skb, GFP_ATOMIC);
if (skb == NULL)
return 0;
diff --git a/drivers/staging/ozwpan/ozusbsvc.c b/drivers/staging/ozwpan/ozusbsvc.c
index 543a9415975c..167632878249 100644
--- a/drivers/staging/ozwpan/ozusbsvc.c
+++ b/drivers/staging/ozwpan/ozusbsvc.c
@@ -27,14 +27,12 @@
#include "ozhcd.h"
#include "oztrace.h"
#include "ozusbsvc.h"
-#include "ozevent.h"
/*------------------------------------------------------------------------------
* This is called once when the driver is loaded to initialise the USB service.
* Context: process
*/
int oz_usb_init(void)
{
- oz_event_log(OZ_EVT_SERVICE, 1, OZ_APPID_USB, NULL, 0);
return oz_hcd_init();
}
/*------------------------------------------------------------------------------
@@ -43,7 +41,6 @@ int oz_usb_init(void)
*/
void oz_usb_term(void)
{
- oz_event_log(OZ_EVT_SERVICE, 2, OZ_APPID_USB, NULL, 0);
oz_hcd_term();
}
/*------------------------------------------------------------------------------
@@ -55,7 +52,6 @@ int oz_usb_start(struct oz_pd *pd, int resume)
int rc = 0;
struct oz_usb_ctx *usb_ctx;
struct oz_usb_ctx *old_ctx;
- oz_event_log(OZ_EVT_SERVICE, 3, OZ_APPID_USB, NULL, resume);
if (resume) {
oz_trace("USB service resumed.\n");
return 0;
@@ -117,7 +113,6 @@ int oz_usb_start(struct oz_pd *pd, int resume)
void oz_usb_stop(struct oz_pd *pd, int pause)
{
struct oz_usb_ctx *usb_ctx;
- oz_event_log(OZ_EVT_SERVICE, 4, OZ_APPID_USB, NULL, pause);
if (pause) {
oz_trace("USB service paused.\n");
return;
diff --git a/drivers/staging/ozwpan/ozusbsvc1.c b/drivers/staging/ozwpan/ozusbsvc1.c
index 4e4b650fee3f..16e607875c38 100644
--- a/drivers/staging/ozwpan/ozusbsvc1.c
+++ b/drivers/staging/ozwpan/ozusbsvc1.c
@@ -22,7 +22,6 @@
#include "ozhcd.h"
#include "oztrace.h"
#include "ozusbsvc.h"
-#include "ozevent.h"
/*------------------------------------------------------------------------------
*/
#define MAX_ISOC_FIXED_DATA (253-sizeof(struct oz_isoc_fixed))
@@ -190,10 +189,6 @@ int oz_usb_control_req(void *hpd, u8 req_id, struct usb_ctrlrequest *setup,
unsigned windex = le16_to_cpu(setup->wIndex);
unsigned wlength = le16_to_cpu(setup->wLength);
int rc = 0;
- oz_event_log(OZ_EVT_CTRL_REQ, setup->bRequest, req_id,
- (void *)(((unsigned long)(setup->wValue))<<16 |
- ((unsigned long)setup->wIndex)),
- setup->bRequestType);
if ((setup->bRequestType & USB_TYPE_MASK) == USB_TYPE_STANDARD) {
switch (setup->bRequest) {
case USB_REQ_GET_DESCRIPTOR:
diff --git a/drivers/staging/panel/panel.c b/drivers/staging/panel/panel.c
index c54df3948e20..cbc15c120981 100644
--- a/drivers/staging/panel/panel.c
+++ b/drivers/staging/panel/panel.c
@@ -1756,17 +1756,18 @@ static inline int input_state_high(struct logical_input *input)
if (input->high_timer == 0) {
char *press_str = input->u.kbd.press_str;
- if (press_str[0])
- keypad_send_key(press_str,
- sizeof(input->u.kbd.press_str));
+ if (press_str[0]) {
+ int s = sizeof(input->u.kbd.press_str);
+ keypad_send_key(press_str, s);
+ }
}
if (input->u.kbd.repeat_str[0]) {
char *repeat_str = input->u.kbd.repeat_str;
if (input->high_timer >= KEYPAD_REP_START) {
+ int s = sizeof(input->u.kbd.repeat_str);
input->high_timer -= KEYPAD_REP_DELAY;
- keypad_send_key(repeat_str,
- sizeof(input->u.kbd.repeat_str));
+ keypad_send_key(repeat_str, s);
}
/* we will need to come back here soon */
inputs_stable = 0;
@@ -1802,10 +1803,11 @@ static inline void input_state_falling(struct logical_input *input)
if (input->u.kbd.repeat_str[0]) {
char *repeat_str = input->u.kbd.repeat_str;
- if (input->high_timer >= KEYPAD_REP_START)
+ if (input->high_timer >= KEYPAD_REP_START) {
+ int s = sizeof(input->u.kbd.repeat_str);
input->high_timer -= KEYPAD_REP_DELAY;
- keypad_send_key(repeat_str,
- sizeof(input->u.kbd.repeat_str));
+ keypad_send_key(repeat_str, s);
+ }
/* we will need to come back here soon */
inputs_stable = 0;
}
@@ -1822,9 +1824,10 @@ static inline void input_state_falling(struct logical_input *input)
release_fct(input->u.std.release_data);
} else if (input->type == INPUT_TYPE_KBD) {
char *release_str = input->u.kbd.release_str;
- if (release_str[0])
- keypad_send_key(release_str,
- sizeof(input->u.kbd.release_str));
+ if (release_str[0]) {
+ int s = sizeof(input->u.kbd.release_str);
+ keypad_send_key(release_str, s);
+ }
}
input->state = INPUT_ST_LOW;
diff --git a/drivers/staging/rtl8187se/ieee80211/ieee80211_rx.c b/drivers/staging/rtl8187se/ieee80211/ieee80211_rx.c
index e30315997bbe..d5df0d691fcc 100644
--- a/drivers/staging/rtl8187se/ieee80211/ieee80211_rx.c
+++ b/drivers/staging/rtl8187se/ieee80211/ieee80211_rx.c
@@ -399,8 +399,8 @@ static int is_duplicate_packet(struct ieee80211_device *ieee,
struct ieee_ibss_seq *entry = NULL;
u8 *mac = header->addr2;
int index = mac[5] % IEEE_IBSS_MAC_HASH_SIZE;
- //for (pos = (head)->next; pos != (head); pos = pos->next)
- __list_for_each(p, &ieee->ibss_mac_hash[index]) {
+
+ list_for_each(p, &ieee->ibss_mac_hash[index]) {
entry = list_entry(p, struct ieee_ibss_seq, list);
if (!memcmp(entry->mac, mac, ETH_ALEN))
break;
diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_cmdpkt.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_cmdpkt.c
index ea91744f7ccf..5f10e4075d39 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_cmdpkt.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_cmdpkt.c
@@ -20,20 +20,7 @@
#include "rtl_core.h"
#include "r8192E_hw.h"
#include "r8192E_cmdpkt.h"
-/*---------------------------Define Local Constant---------------------------*/
-/* Debug constant*/
-#define CMPK_DEBOUNCE_CNT 1
-#define CMPK_PRINT(Address)\
-{\
- unsigned char i;\
- u32 temp[10];\
- \
- memcpy(temp, Address, 40);\
- for (i = 0; i < 40; i += 4)\
- printk(KERN_INFO "\r\n %08x", temp[i]);\
-}
-/*---------------------------Define functions---------------------------------*/
bool cmpk_message_handle_tx(
struct net_device *dev,
u8 *code_virtual_address,
@@ -100,7 +87,7 @@ bool cmpk_message_handle_tx(
write_nic_byte(dev, TPPoll, TPPoll_CQ);
Failed:
return rt_status;
-} /* CMPK_Message_Handle_Tx */
+}
static void
cmpk_count_txstatistic(
@@ -149,23 +136,19 @@ cmpk_count_txstatistic(
priv->stats.txretrycount += pstx_fb->retry_cnt;
priv->stats.txfeedbackretry += pstx_fb->retry_cnt;
-
-} /* cmpk_CountTxStatistic */
-
-
+}
static void cmpk_handle_tx_feedback(struct net_device *dev, u8 *pmsg)
{
struct r8192_priv *priv = rtllib_priv(dev);
- struct cmpk_txfb rx_tx_fb; /* */
+ struct cmpk_txfb rx_tx_fb;
priv->stats.txfeedback++;
memcpy((u8 *)&rx_tx_fb, pmsg, sizeof(struct cmpk_txfb));
cmpk_count_txstatistic(dev, &rx_tx_fb);
-
-} /* cmpk_Handle_Tx_Feedback */
+}
static void cmdpkt_beacontimerinterrupt_819xusb(struct net_device *dev)
{
@@ -182,7 +165,6 @@ static void cmdpkt_beacontimerinterrupt_819xusb(struct net_device *dev)
tx_rate = 10;
DMESG("send beacon frame tx rate is 1Mbpm\n");
}
-
}
static void cmpk_handle_interrupt_status(struct net_device *dev, u8 *pmsg)
@@ -192,14 +174,12 @@ static void cmpk_handle_interrupt_status(struct net_device *dev, u8 *pmsg)
DMESG("---> cmpk_Handle_Interrupt_Status()\n");
-
rx_intr_status.length = pmsg[1];
if (rx_intr_status.length != (sizeof(struct cmpk_intr_sta) - 2)) {
DMESG("cmpk_Handle_Interrupt_Status: wrong length!\n");
return;
}
-
if (priv->rtllib->iw_mode == IW_MODE_ADHOC) {
rx_intr_status.interrupt_status = *((u32 *)(pmsg + 4));
@@ -220,12 +200,11 @@ static void cmpk_handle_interrupt_status(struct net_device *dev, u8 *pmsg)
DMESG("<---- cmpk_handle_interrupt_status()\n");
-} /* cmpk_handle_interrupt_status */
-
+}
static void cmpk_handle_query_config_rx(struct net_device *dev, u8 *pmsg)
{
- cmpk_query_cfg_t rx_query_cfg; /* */
+ cmpk_query_cfg_t rx_query_cfg;
rx_query_cfg.cfg_action = (pmsg[4] & 0x80000000)>>31;
@@ -238,8 +217,7 @@ static void cmpk_handle_query_config_rx(struct net_device *dev, u8 *pmsg)
rx_query_cfg.mask = (pmsg[12] << 24) | (pmsg[13] << 16) |
(pmsg[14] << 8) | (pmsg[15] << 0);
-} /* cmpk_Handle_Query_Config_Rx */
-
+}
static void cmpk_count_tx_status(struct net_device *dev,
struct cmpk_tx_status *pstx_status)
@@ -280,13 +258,11 @@ static void cmpk_count_tx_status(struct net_device *dev,
priv->stats.txbytesunicast += pstx_status->txuclength;
priv->stats.last_packet_rate = pstx_status->rate;
-} /* cmpk_CountTxStatus */
-
-
+}
static void cmpk_handle_tx_status(struct net_device *dev, u8 *pmsg)
{
- struct cmpk_tx_status rx_tx_sts; /* */
+ struct cmpk_tx_status rx_tx_sts;
memcpy((void *)&rx_tx_sts, (void *)pmsg, sizeof(struct cmpk_tx_status));
cmpk_count_tx_status(dev, &rx_tx_sts);
@@ -300,7 +276,6 @@ static void cmpk_handle_tx_rate_history(struct net_device *dev, u8 *pmsg)
u32 *ptemp;
struct r8192_priv *priv = rtllib_priv(dev);
-
#ifdef ENABLE_PS
pAdapter->HalFunc.GetHwRegHandler(pAdapter, HW_VAR_RF_STATE,
(pu1Byte)(&rtState));
@@ -335,10 +310,8 @@ static void cmpk_handle_tx_rate_history(struct net_device *dev, u8 *pmsg)
priv->stats.txrate.ht_mcs[j][i] +=
ptxrate->ht_mcs[j][i];
}
-
}
-
u32 cmpk_message_handle_rx(struct net_device *dev,
struct rtllib_rx_stats *pstats)
{
@@ -349,12 +322,8 @@ u32 cmpk_message_handle_rx(struct net_device *dev,
RT_TRACE(COMP_CMDPKT, "---->cmpk_message_handle_rx()\n");
- if (pstats == NULL) {
- /* Print error message. */
- /*RT_TRACE(COMP_SEND, DebugLevel,
- ("\n\r[CMPK]-->Err queue id or pointer"));*/
+ if (pstats == NULL)
return 0;
- }
total_length = pstats->Length;
diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index a9d78e9651c6..50c7bb773984 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -2128,10 +2128,11 @@ void rtl8192_update_ratr_table(struct net_device *dev)
struct rtllib_device *ieee = priv->rtllib;
u8 *pMcsRate = ieee->dot11HTOperationalRateSet;
u32 ratr_value = 0;
+ u16 rate_config = 0;
u8 rate_index = 0;
- rtl8192_config_rate(dev, (u16 *)(&ratr_value));
- ratr_value |= (*(u16 *)(pMcsRate)) << 12;
+ rtl8192_config_rate(dev, &rate_config);
+ ratr_value = rate_config | *pMcsRate << 12;
switch (ieee->mode) {
case IEEE_A:
ratr_value &= 0x00000FF0;
diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c
index 8b8a5c661a26..e75364e3eb43 100644
--- a/drivers/staging/rtl8192e/rtllib_rx.c
+++ b/drivers/staging/rtl8192e/rtllib_rx.c
@@ -1822,7 +1822,7 @@ int rtllib_parse_info_param(struct rtllib_device *ieee,
network->rates_ex[i] = info_element->data[i];
p += snprintf(p, sizeof(rates_str) -
(p - rates_str), "%02X ",
- network->rates[i]);
+ network->rates_ex[i]);
if (rtllib_is_ofdm_rate
(info_element->data[i])) {
network->flags |= NETWORK_HAS_OFDM;
diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c
index 4feecec8609c..aefffac556a6 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac.c
@@ -1801,8 +1801,9 @@ static inline u16 auth_parse(struct sk_buff *skb, u8** challenge, int *chlen)
if (*(t++) == MFIE_TYPE_CHALLENGE) {
*chlen = *(t++);
- *challenge = kmalloc(*chlen, GFP_ATOMIC);
- memcpy(*challenge, t, *chlen); /*TODO - check here*/
+ *challenge = kmemdup(t, *chlen, GFP_ATOMIC);
+ if (!*challenge)
+ return -ENOMEM;
}
}
return cpu_to_le16(a->status);
diff --git a/drivers/staging/rtl8192u/ieee80211/dot11d.c b/drivers/staging/rtl8192u/ieee80211/dot11d.c
index f10fd5a93c38..34edcfab96be 100644
--- a/drivers/staging/rtl8192u/ieee80211/dot11d.c
+++ b/drivers/staging/rtl8192u/ieee80211/dot11d.c
@@ -67,9 +67,9 @@ Dot11d_Reset(struct ieee80211_device *ieee)
void
Dot11d_UpdateCountryIe(
struct ieee80211_device *dev,
- u8 * pTaddr,
+ u8 *pTaddr,
u16 CoutryIeLen,
- u8 * pCoutryIe
+ u8 *pCoutryIe
)
{
PRT_DOT11D_INFO pDot11dInfo = GET_DOT11D_INFO(dev);
@@ -101,7 +101,7 @@ Dot11d_UpdateCountryIe(
MaxChnlNum = pTriple->FirstChnl + j;
}
- pTriple = (PCHNL_TXPOWER_TRIPLE)((u8*)pTriple + 3);
+ pTriple = (PCHNL_TXPOWER_TRIPLE)((u8 *)pTriple + 3);
}
//printk("Dot11d_UpdateCountryIe(): Channel List:\n");
printk("Channel List:");
@@ -143,12 +143,12 @@ DOT11D_GetMaxTxPwrInDbm(
void
DOT11D_ScanComplete(
- struct ieee80211_device * dev
+ struct ieee80211_device *dev
)
{
PRT_DOT11D_INFO pDot11dInfo = GET_DOT11D_INFO(dev);
- switch(pDot11dInfo->State)
+ switch (pDot11dInfo->State)
{
case DOT11D_STATE_LEARNED:
pDot11dInfo->State = DOT11D_STATE_DONE;
@@ -166,7 +166,7 @@ DOT11D_ScanComplete(
}
int IsLegalChannel(
- struct ieee80211_device * dev,
+ struct ieee80211_device *dev,
u8 channel
)
{
@@ -183,7 +183,7 @@ int IsLegalChannel(
}
int ToLegalChannel(
- struct ieee80211_device * dev,
+ struct ieee80211_device *dev,
u8 channel
)
{
diff --git a/drivers/staging/rtl8192u/ieee80211/dot11d.h b/drivers/staging/rtl8192u/ieee80211/dot11d.h
index 54f2b4c434ff..6aa8c15eba39 100644
--- a/drivers/staging/rtl8192u/ieee80211/dot11d.h
+++ b/drivers/staging/rtl8192u/ieee80211/dot11d.h
@@ -71,9 +71,9 @@ Dot11d_Reset(
void
Dot11d_UpdateCountryIe(
struct ieee80211_device *dev,
- u8 * pTaddr,
+ u8 *pTaddr,
u16 CoutryIeLen,
- u8 * pCoutryIe
+ u8 *pCoutryIe
);
u8
@@ -84,16 +84,16 @@ DOT11D_GetMaxTxPwrInDbm(
void
DOT11D_ScanComplete(
- struct ieee80211_device * dev
+ struct ieee80211_device *dev
);
int IsLegalChannel(
- struct ieee80211_device * dev,
+ struct ieee80211_device *dev,
u8 channel
);
int ToLegalChannel(
- struct ieee80211_device * dev,
+ struct ieee80211_device *dev,
u8 channel
);
#endif // #ifndef __INC_DOT11D_H
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211.h b/drivers/staging/rtl8192u/ieee80211/ieee80211.h
index 210898c8e66c..c9f3bb363be4 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211.h
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211.h
@@ -493,8 +493,8 @@ typedef struct ieee_param {
#define IsDataFrame(pdu) ( ((pdu[0] & 0x0C)==0x08) ? true : false )
#define IsLegacyDataFrame(pdu) (IsDataFrame(pdu) && (!(pdu[0]&FC_QOS_BIT)) )
//added by wb. Is this right?
-#define IsQoSDataFrame(pframe) ((*(u16*)pframe&(IEEE80211_STYPE_QOS_DATA|IEEE80211_FTYPE_DATA)) == (IEEE80211_STYPE_QOS_DATA|IEEE80211_FTYPE_DATA))
-#define Frame_Order(pframe) (*(u16*)pframe&IEEE80211_FCTL_ORDER)
+#define IsQoSDataFrame(pframe) ((*(u16 *)pframe&(IEEE80211_STYPE_QOS_DATA|IEEE80211_FTYPE_DATA)) == (IEEE80211_STYPE_QOS_DATA|IEEE80211_FTYPE_DATA))
+#define Frame_Order(pframe) (*(u16 *)pframe&IEEE80211_FCTL_ORDER)
#define SN_LESS(a, b) (((a-b)&0x800)!=0)
#define SN_EQUAL(a, b) (a == b)
#define MAX_DEV_ADDR_SIZE 8
@@ -538,7 +538,7 @@ do { if (ieee80211_debug_level & (level)) \
do{ if ((ieee80211_debug_level & (level)) == (level)) \
{ \
int i; \
- u8* pdata = (u8*) data; \
+ u8 *pdata = (u8 *) data; \
printk(KERN_DEBUG "ieee80211: %s()\n", __FUNCTION__); \
for(i=0; i<(int)(datalen); i++) \
{ \
@@ -914,7 +914,7 @@ struct ieee80211_rx_stats {
bool bIsCCK;
bool bPacketToSelf;
//added by amy
- u8* virtual_address;
+ u8 *virtual_address;
u16 packetlength; // Total packet length: Must equal to sum of all FragLength
u16 fraglength; // FragLength should equal to PacketLength in non-fragment case
u16 fragoffset; // Data offset for this fragment
@@ -1366,13 +1366,13 @@ static inline const char *eap_get_type(int type)
return ((u32)type >= ARRAY_SIZE(eap_types)) ? "Unknown" : eap_types[type];
}
//added by amy for reorder
-static inline u8 Frame_QoSTID(u8* buf)
+static inline u8 Frame_QoSTID(u8 *buf)
{
struct ieee80211_hdr_3addr *hdr;
u16 fc;
hdr = (struct ieee80211_hdr_3addr *)buf;
fc = le16_to_cpu(hdr->frame_ctl);
- return (u8)((frameqos*)(buf + (((fc & IEEE80211_FCTL_TODS)&&(fc & IEEE80211_FCTL_FROMDS))? 30 : 24)))->field.tid;
+ return (u8)((frameqos *)(buf + (((fc & IEEE80211_FCTL_TODS)&&(fc & IEEE80211_FCTL_FROMDS))? 30 : 24)))->field.tid;
}
//added by amy for reorder
@@ -1670,7 +1670,7 @@ typedef struct _bandwidth_autoswitch {
typedef struct _RX_REORDER_ENTRY {
struct list_head List;
u16 SeqNum;
- struct ieee80211_rxb* prxb;
+ struct ieee80211_rxb *prxb;
} RX_REORDER_ENTRY, *PRX_REORDER_ENTRY;
//added by amy for order
typedef enum _Fsync_State{
@@ -1965,7 +1965,7 @@ struct ieee80211_device {
/* map of allowed channels. 0 is dummy */
// FIXME: remember to default to a basic channel plan depending of the PHY type
- void* pDot11dInfo;
+ void *pDot11dInfo;
bool bGlobalDomain;
int rate; /* current rate */
int basic_rate;
@@ -2107,10 +2107,10 @@ struct ieee80211_device {
struct net_device *dev);
int (*reset_port)(struct net_device *dev);
- int (*is_queue_full) (struct net_device * dev, int pri);
+ int (*is_queue_full) (struct net_device *dev, int pri);
- int (*handle_management) (struct net_device * dev,
- struct ieee80211_network * network, u16 type);
+ int (*handle_management) (struct net_device *dev,
+ struct ieee80211_network *network, u16 type);
int (*is_qos_active) (struct net_device *dev, struct sk_buff *skb);
/* Softmac-generated frames (management) are TXed via this
@@ -2187,8 +2187,8 @@ struct ieee80211_device {
void (*ps_request_tx_ack) (struct net_device *dev);
void (*enter_sleep_state) (struct net_device *dev, u32 th, u32 tl);
short (*ps_is_queue_empty) (struct net_device *dev);
- int (*handle_beacon) (struct net_device * dev, struct ieee80211_beacon * beacon, struct ieee80211_network * network);
- int (*handle_assoc_response) (struct net_device * dev, struct ieee80211_assoc_response_frame * resp, struct ieee80211_network * network);
+ int (*handle_beacon) (struct net_device *dev, struct ieee80211_beacon *beacon, struct ieee80211_network *network);
+ int (*handle_assoc_response) (struct net_device *dev, struct ieee80211_assoc_response_frame *resp, struct ieee80211_network *network);
/* check whether Tx hw resource available */
@@ -2197,9 +2197,9 @@ struct ieee80211_device {
// void (*SwChnlByTimerHandler)(struct net_device *dev, int channel);
void (*SetBWModeHandler)(struct net_device *dev, HT_CHANNEL_WIDTH Bandwidth, HT_EXTCHNL_OFFSET Offset);
// void (*UpdateHalRATRTableHandler)(struct net_device* dev, u8* pMcsRate);
- bool (*GetNmodeSupportBySecCfg)(struct net_device* dev);
- void (*SetWirelessMode)(struct net_device* dev, u8 wireless_mode);
- bool (*GetHalfNmodeSupportByAPsHandler)(struct net_device* dev);
+ bool (*GetNmodeSupportBySecCfg)(struct net_device *dev);
+ void (*SetWirelessMode)(struct net_device *dev, u8 wireless_mode);
+ bool (*GetHalfNmodeSupportByAPsHandler)(struct net_device *dev);
void (*InitialGainHandler)(struct net_device *dev, u8 Operation);
/* This must be the last item so that it points to the data
@@ -2401,10 +2401,10 @@ extern int ieee80211_wx_get_encode(struct ieee80211_device *ieee,
#if WIRELESS_EXT >= 18
extern int ieee80211_wx_get_encode_ext(struct ieee80211_device *ieee,
struct iw_request_info *info,
- union iwreq_data* wrqu, char *extra);
+ union iwreq_data *wrqu, char *extra);
extern int ieee80211_wx_set_encode_ext(struct ieee80211_device *ieee,
struct iw_request_info *info,
- union iwreq_data* wrqu, char *extra);
+ union iwreq_data *wrqu, char *extra);
extern int ieee80211_wx_set_auth(struct ieee80211_device *ieee,
struct iw_request_info *info,
struct iw_param *data, char *extra);
@@ -2422,7 +2422,7 @@ extern int ieee80211_rx_frame_softmac(struct ieee80211_device *ieee, struct sk_b
u16 stype);
extern void ieee80211_softmac_new_net(struct ieee80211_device *ieee, struct ieee80211_network *net);
-void SendDisassociation(struct ieee80211_device *ieee, u8* asSta, u8 asRsn);
+void SendDisassociation(struct ieee80211_device *ieee, u8 *asSta, u8 asRsn);
extern void ieee80211_softmac_xmit(struct ieee80211_txb *txb, struct ieee80211_device *ieee);
extern void ieee80211_stop_send_beacons(struct ieee80211_device *ieee);
@@ -2528,52 +2528,52 @@ extern int ieee80211_wx_get_rts(struct ieee80211_device *ieee,
union iwreq_data *wrqu, char *extra);
//HT
#define MAX_RECEIVE_BUFFER_SIZE 9100 //
-extern void HTDebugHTCapability(u8* CapIE, u8* TitleString );
-extern void HTDebugHTInfo(u8* InfoIE, u8* TitleString);
-
-void HTSetConnectBwMode(struct ieee80211_device* ieee, HT_CHANNEL_WIDTH Bandwidth, HT_EXTCHNL_OFFSET Offset);
-extern void HTUpdateDefaultSetting(struct ieee80211_device* ieee);
-extern void HTConstructCapabilityElement(struct ieee80211_device* ieee, u8* posHTCap, u8* len, u8 isEncrypt);
-extern void HTConstructInfoElement(struct ieee80211_device* ieee, u8* posHTInfo, u8* len, u8 isEncrypt);
-extern void HTConstructRT2RTAggElement(struct ieee80211_device* ieee, u8* posRT2RTAgg, u8* len);
+extern void HTDebugHTCapability(u8 *CapIE, u8 *TitleString );
+extern void HTDebugHTInfo(u8 *InfoIE, u8 *TitleString);
+
+void HTSetConnectBwMode(struct ieee80211_device *ieee, HT_CHANNEL_WIDTH Bandwidth, HT_EXTCHNL_OFFSET Offset);
+extern void HTUpdateDefaultSetting(struct ieee80211_device *ieee);
+extern void HTConstructCapabilityElement(struct ieee80211_device *ieee, u8 *posHTCap, u8 *len, u8 isEncrypt);
+extern void HTConstructInfoElement(struct ieee80211_device *ieee, u8 *posHTInfo, u8 *len, u8 isEncrypt);
+extern void HTConstructRT2RTAggElement(struct ieee80211_device *ieee, u8 *posRT2RTAgg, u8 *len);
extern void HTOnAssocRsp(struct ieee80211_device *ieee);
-extern void HTInitializeHTInfo(struct ieee80211_device* ieee);
+extern void HTInitializeHTInfo(struct ieee80211_device *ieee);
extern void HTInitializeBssDesc(PBSS_HT pBssHT);
-extern void HTResetSelfAndSavePeerSetting(struct ieee80211_device* ieee, struct ieee80211_network * pNetwork);
-extern void HTUpdateSelfAndPeerSetting(struct ieee80211_device* ieee, struct ieee80211_network * pNetwork);
-extern u8 HTGetHighestMCSRate(struct ieee80211_device* ieee, u8* pMCSRateSet, u8* pMCSFilter);
+extern void HTResetSelfAndSavePeerSetting(struct ieee80211_device *ieee, struct ieee80211_network *pNetwork);
+extern void HTUpdateSelfAndPeerSetting(struct ieee80211_device *ieee, struct ieee80211_network *pNetwork);
+extern u8 HTGetHighestMCSRate(struct ieee80211_device *ieee, u8 *pMCSRateSet, u8 *pMCSFilter);
extern u8 MCS_FILTER_ALL[];
extern u16 MCS_DATA_RATE[2][2][77] ;
-extern u8 HTCCheck(struct ieee80211_device* ieee, u8* pFrame);
+extern u8 HTCCheck(struct ieee80211_device *ieee, u8 *pFrame);
//extern void HTSetConnectBwModeCallback(unsigned long data);
extern void HTResetIOTSetting(PRT_HIGH_THROUGHPUT pHTInfo);
-extern bool IsHTHalfNmodeAPs(struct ieee80211_device* ieee);
-extern u16 HTHalfMcsToDataRate(struct ieee80211_device* ieee, u8 nMcsRate);
-extern u16 HTMcsToDataRate( struct ieee80211_device* ieee, u8 nMcsRate);
-extern u16 TxCountToDataRate( struct ieee80211_device* ieee, u8 nDataRate);
+extern bool IsHTHalfNmodeAPs(struct ieee80211_device *ieee);
+extern u16 HTHalfMcsToDataRate(struct ieee80211_device *ieee, u8 nMcsRate);
+extern u16 HTMcsToDataRate( struct ieee80211_device *ieee, u8 nMcsRate);
+extern u16 TxCountToDataRate( struct ieee80211_device *ieee, u8 nDataRate);
//function in BAPROC.c
-extern int ieee80211_rx_ADDBAReq( struct ieee80211_device* ieee, struct sk_buff *skb);
-extern int ieee80211_rx_ADDBARsp( struct ieee80211_device* ieee, struct sk_buff *skb);
-extern int ieee80211_rx_DELBA(struct ieee80211_device* ieee,struct sk_buff *skb);
-extern void TsInitAddBA( struct ieee80211_device* ieee, PTX_TS_RECORD pTS, u8 Policy, u8 bOverwritePending);
-extern void TsInitDelBA( struct ieee80211_device* ieee, PTS_COMMON_INFO pTsCommonInfo, TR_SELECT TxRxSelect);
+extern int ieee80211_rx_ADDBAReq( struct ieee80211_device *ieee, struct sk_buff *skb);
+extern int ieee80211_rx_ADDBARsp( struct ieee80211_device *ieee, struct sk_buff *skb);
+extern int ieee80211_rx_DELBA(struct ieee80211_device *ieee,struct sk_buff *skb);
+extern void TsInitAddBA( struct ieee80211_device *ieee, PTX_TS_RECORD pTS, u8 Policy, u8 bOverwritePending);
+extern void TsInitDelBA( struct ieee80211_device *ieee, PTS_COMMON_INFO pTsCommonInfo, TR_SELECT TxRxSelect);
extern void BaSetupTimeOut(unsigned long data);
extern void TxBaInactTimeout(unsigned long data);
extern void RxBaInactTimeout(unsigned long data);
extern void ResetBaEntry( PBA_RECORD pBA);
//function in TS.c
extern bool GetTs(
- struct ieee80211_device* ieee,
+ struct ieee80211_device *ieee,
PTS_COMMON_INFO *ppTS,
- u8* Addr,
+ u8 *Addr,
u8 TID,
TR_SELECT TxRxSelect, //Rx:1, Tx:0
bool bAddNewTs
);
extern void TSInitialize(struct ieee80211_device *ieee);
-extern void TsStartAddBaProcess(struct ieee80211_device* ieee, PTX_TS_RECORD pTxTS);
-extern void RemovePeerTS(struct ieee80211_device* ieee, u8* Addr);
-extern void RemoveAllTS(struct ieee80211_device* ieee);
+extern void TsStartAddBaProcess(struct ieee80211_device *ieee, PTX_TS_RECORD pTxTS);
+extern void RemovePeerTS(struct ieee80211_device *ieee, u8 *Addr);
+extern void RemoveAllTS(struct ieee80211_device *ieee);
void ieee80211_softmac_scan_syncro(struct ieee80211_device *ieee);
extern const long ieee80211_wlan_frequencies[];
@@ -2623,6 +2623,6 @@ extern int ieee80211_parse_info_param(struct ieee80211_device *ieee,
struct ieee80211_network *network,
struct ieee80211_rx_stats *stats);
-void ieee80211_indicate_packets(struct ieee80211_device *ieee, struct ieee80211_rxb** prxbIndicateArray,u8 index);
+void ieee80211_indicate_packets(struct ieee80211_device *ieee, struct ieee80211_rxb **prxbIndicateArray,u8 index);
#define RT_ASOC_RETRY_LIMIT 5
#endif /* IEEE80211_H */
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt.c
index a464d111d738..55332217c29f 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt.c
@@ -155,7 +155,7 @@ int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops)
}
-struct ieee80211_crypto_ops * ieee80211_get_crypto_ops(const char *name)
+struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name)
{
unsigned long flags;
struct list_head *ptr;
@@ -182,7 +182,7 @@ struct ieee80211_crypto_ops * ieee80211_get_crypto_ops(const char *name)
}
-static void * ieee80211_crypt_null_init(int keyidx) { return (void *) 1; }
+static void *ieee80211_crypt_null_init(int keyidx) { return (void *) 1; }
static void ieee80211_crypt_null_deinit(void *priv) {}
static struct ieee80211_crypto_ops ieee80211_crypt_null = {
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt.h b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt.h
index b58a3bcc0dc0..0b4ea431982d 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt.h
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt.h
@@ -77,7 +77,7 @@ struct ieee80211_crypt_data {
int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops);
int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops);
-struct ieee80211_crypto_ops * ieee80211_get_crypto_ops(const char *name);
+struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name);
void ieee80211_crypt_deinit_entries(struct ieee80211_device *, int);
void ieee80211_crypt_deinit_handler(unsigned long);
void ieee80211_crypt_delayed_deinit(struct ieee80211_device *ieee,
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_ccmp.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_ccmp.c
index fec0176888e2..f2b16775a638 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_ccmp.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_ccmp.c
@@ -60,10 +60,10 @@ struct ieee80211_ccmp_data {
void ieee80211_ccmp_aes_encrypt(struct crypto_tfm *tfm,
const u8 pt[16], u8 ct[16])
{
- crypto_cipher_encrypt_one((void*)tfm, ct, pt);
+ crypto_cipher_encrypt_one((void *)tfm, ct, pt);
}
-static void * ieee80211_ccmp_init(int key_idx)
+static void *ieee80211_ccmp_init(int key_idx)
{
struct ieee80211_ccmp_data *priv;
@@ -72,7 +72,7 @@ static void * ieee80211_ccmp_init(int key_idx)
goto fail;
priv->key_idx = key_idx;
- priv->tfm = (void*)crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
+ priv->tfm = (void *)crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(priv->tfm)) {
printk(KERN_DEBUG "ieee80211_crypt_ccmp: could not allocate "
"crypto API aes\n");
@@ -85,7 +85,7 @@ static void * ieee80211_ccmp_init(int key_idx)
fail:
if (priv) {
if (priv->tfm)
- crypto_free_cipher((void*)priv->tfm);
+ crypto_free_cipher((void *)priv->tfm);
kfree(priv);
}
@@ -98,7 +98,7 @@ static void ieee80211_ccmp_deinit(void *priv)
struct ieee80211_ccmp_data *_priv = priv;
if (_priv && _priv->tfm)
- crypto_free_cipher((void*)_priv->tfm);
+ crypto_free_cipher((void *)_priv->tfm);
kfree(priv);
}
@@ -393,7 +393,7 @@ static int ieee80211_ccmp_set_key(void *key, int len, u8 *seq, void *priv)
data->rx_pn[4] = seq[1];
data->rx_pn[5] = seq[0];
}
- crypto_cipher_setkey((void*)data->tfm, data->key, CCMP_TK_LEN);
+ crypto_cipher_setkey((void *)data->tfm, data->key, CCMP_TK_LEN);
} else if (len == 0)
data->key_set = 0;
else
@@ -427,7 +427,7 @@ static int ieee80211_ccmp_get_key(void *key, int len, u8 *seq, void *priv)
}
-static char * ieee80211_ccmp_print_stats(char *p, void *priv)
+static char *ieee80211_ccmp_print_stats(char *p, void *priv)
{
struct ieee80211_ccmp_data *ccmp = priv;
p += sprintf(p, "key[%d] alg=CCMP key_set=%d "
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c
index 555eb8038e95..93121b42f16b 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c
@@ -62,7 +62,7 @@ struct ieee80211_tkip_data {
u8 rx_hdr[16], tx_hdr[16];
};
-static void * ieee80211_tkip_init(int key_idx)
+static void *ieee80211_tkip_init(int key_idx)
{
struct ieee80211_tkip_data *priv;
@@ -499,8 +499,8 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
return keyidx;
}
-static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr,
- u8 * data, size_t data_len, u8 * mic)
+static int michael_mic(struct crypto_hash *tfm_michael, u8 *key, u8 *hdr,
+ u8 *data, size_t data_len, u8 *mic)
{
struct hash_desc desc;
struct scatterlist sg[2];
@@ -718,7 +718,7 @@ static int ieee80211_tkip_get_key(void *key, int len, u8 *seq, void *priv)
}
-static char * ieee80211_tkip_print_stats(char *p, void *priv)
+static char *ieee80211_tkip_print_stats(char *p, void *priv)
{
struct ieee80211_tkip_data *tkip = priv;
p += sprintf(p, "key[%d] alg=TKIP key_set=%d "
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_wep.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_wep.c
index 3801f125f8f2..f20223695897 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_wep.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_wep.c
@@ -38,7 +38,7 @@ struct prism2_wep_data {
};
-static void * prism2_wep_init(int keyidx)
+static void *prism2_wep_init(int keyidx)
{
struct prism2_wep_data *priv;
@@ -253,7 +253,7 @@ static int prism2_wep_get_key(void *key, int len, u8 *seq, void *priv)
}
-static char * prism2_wep_print_stats(char *p, void *priv)
+static char *prism2_wep_print_stats(char *p, void *priv)
{
struct prism2_wep_data *wep = priv;
p += sprintf(p, "key[%d] alg=WEP len=%d\n",
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
index ee7ce5fca462..a6b18409103b 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
@@ -218,7 +218,7 @@ ieee80211_rx_frame_mgmt(struct ieee80211_device *ieee, struct sk_buff *skb,
* this is not mandatory.... but seems that the probe
* response parser uses it
*/
- struct ieee80211_hdr_3addr * hdr = (struct ieee80211_hdr_3addr *)skb->data;
+ struct ieee80211_hdr_3addr *hdr = (struct ieee80211_hdr_3addr *)skb->data;
rx_stats->len = skb->len;
ieee80211_rx_mgt(ieee,(struct ieee80211_hdr_4addr *)skb->data,rx_stats);
@@ -336,7 +336,7 @@ static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee,
/* Called only as a tasklet (software IRQ), by ieee80211_rx */
static inline int
-ieee80211_rx_frame_decrypt(struct ieee80211_device* ieee, struct sk_buff *skb,
+ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb,
struct ieee80211_crypt_data *crypt)
{
struct ieee80211_hdr_4addr *hdr;
@@ -385,7 +385,7 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device* ieee, struct sk_buff *skb,
/* Called only as a tasklet (software IRQ), by ieee80211_rx */
static inline int
-ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device* ieee, struct sk_buff *skb,
+ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee, struct sk_buff *skb,
int keyidx, struct ieee80211_crypt_data *crypt)
{
struct ieee80211_hdr_4addr *hdr;
@@ -439,7 +439,7 @@ static int is_duplicate_packet(struct ieee80211_device *ieee,
tid = UP2AC(tid);
tid ++;
} else if(IEEE80211_QOS_HAS_SEQ(fc)) { //QoS
- hdr_3addrqos = (struct ieee80211_hdr_3addrqos*)header;
+ hdr_3addrqos = (struct ieee80211_hdr_3addrqos *)header;
tid = le16_to_cpu(hdr_3addrqos->qos_ctl) & IEEE80211_QCTL_TID;
tid = UP2AC(tid);
tid ++;
@@ -454,8 +454,7 @@ static int is_duplicate_packet(struct ieee80211_device *ieee,
struct ieee_ibss_seq *entry = NULL;
u8 *mac = header->addr2;
int index = mac[5] % IEEE_IBSS_MAC_HASH_SIZE;
- //for (pos = (head)->next; pos != (head); pos = pos->next)
- //__list_for_each(p, &ieee->ibss_mac_hash[index]) {
+
list_for_each(p, &ieee->ibss_mac_hash[index]) {
entry = list_entry(p, struct ieee_ibss_seq, list);
if (!memcmp(entry->mac, mac, ETH_ALEN))
@@ -548,7 +547,7 @@ AddReorderEntry(
return true;
}
-void ieee80211_indicate_packets(struct ieee80211_device *ieee, struct ieee80211_rxb** prxbIndicateArray,u8 index)
+void ieee80211_indicate_packets(struct ieee80211_device *ieee, struct ieee80211_rxb **prxbIndicateArray,u8 index)
{
u8 i = 0 , j=0;
u16 ethertype;
@@ -557,7 +556,7 @@ void ieee80211_indicate_packets(struct ieee80211_device *ieee, struct ieee80211_
for(j = 0; j<index; j++)
{
//added by amy for reorder
- struct ieee80211_rxb* prxb = prxbIndicateArray[j];
+ struct ieee80211_rxb *prxb = prxbIndicateArray[j];
for(i = 0; i<prxb->nr_subframes; i++) {
struct sk_buff *sub_skb = prxb->subframes[i];
@@ -603,13 +602,13 @@ void ieee80211_indicate_packets(struct ieee80211_device *ieee, struct ieee80211_
void RxReorderIndicatePacket( struct ieee80211_device *ieee,
- struct ieee80211_rxb* prxb,
+ struct ieee80211_rxb *prxb,
PRX_TS_RECORD pTS,
u16 SeqNum)
{
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
PRX_REORDER_ENTRY pReorderEntry = NULL;
- struct ieee80211_rxb* prxbIndicateArray[REORDER_WIN_SIZE];
+ struct ieee80211_rxb *prxbIndicateArray[REORDER_WIN_SIZE];
u8 WinSize = pHTInfo->RxReorderWinSize;
u16 WinEnd = (pTS->RxIndicateSeq + WinSize -1)%4096;
u8 index = 0;
@@ -774,9 +773,9 @@ void RxReorderIndicatePacket( struct ieee80211_device *ieee,
u8 parse_subframe(struct sk_buff *skb,
struct ieee80211_rx_stats *rx_stats,
- struct ieee80211_rxb *rxb,u8* src,u8* dst)
+ struct ieee80211_rxb *rxb,u8 *src,u8 *dst)
{
- struct ieee80211_hdr_3addr *hdr = (struct ieee80211_hdr_3addr* )skb->data;
+ struct ieee80211_hdr_3addr *hdr = (struct ieee80211_hdr_3addr *)skb->data;
u16 fc = le16_to_cpu(hdr->frame_ctl);
u16 LLCOffset= sizeof(struct ieee80211_hdr_3addr);
@@ -831,7 +830,7 @@ u8 parse_subframe(struct sk_buff *skb,
memcpy(rxb->dst,dst,ETH_ALEN);
while(skb->len > ETHERNET_HEADER_SIZE) {
/* Offset 12 denote 2 mac address */
- nSubframe_Length = *((u16*)(skb->data + 12));
+ nSubframe_Length = *((u16 *)(skb->data + 12));
//==m==>change the length order
nSubframe_Length = (nSubframe_Length>>8) + (nSubframe_Length<<8);
@@ -926,7 +925,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
int keyidx = 0;
int i;
- struct ieee80211_rxb* rxb = NULL;
+ struct ieee80211_rxb *rxb = NULL;
// cheat the the hdr type
hdr = (struct ieee80211_hdr_4addr *)skb->data;
stats = &ieee->stats;
@@ -1035,9 +1034,9 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
//IEEE80211_DEBUG(IEEE80211_DL_REORDER,"%s(): QOS ENABLE AND RECEIVE QOS DATA , we will get Ts, tid:%d\n",__FUNCTION__, tid);
if(GetTs(
ieee,
- (PTS_COMMON_INFO*) &pRxTS,
+ (PTS_COMMON_INFO *) &pRxTS,
hdr->addr2,
- (u8)Frame_QoSTID((u8*)(skb->data)),
+ (u8)Frame_QoSTID((u8 *)(skb->data)),
RX_DIR,
true))
{
@@ -1289,7 +1288,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
{
TID = Frame_QoSTID(skb->data);
SeqNum = WLAN_GET_SEQ_SEQ(sc);
- GetTs(ieee,(PTS_COMMON_INFO*) &pTS,hdr->addr2,TID,RX_DIR,true);
+ GetTs(ieee,(PTS_COMMON_INFO *) &pTS,hdr->addr2,TID,RX_DIR,true);
if(TID !=0 && TID !=3)
{
ieee->bis_any_nonbepkts = true;
@@ -1597,7 +1596,7 @@ static inline void ieee80211_extract_country_ie(
struct ieee80211_device *ieee,
struct ieee80211_info_element *info_element,
struct ieee80211_network *network,
- u8 * addr2
+ u8 *addr2
)
{
if(IS_DOT11D_ENABLE(ieee))
@@ -2275,7 +2274,7 @@ static inline int ieee80211_network_init(
}
static inline int is_same_network(struct ieee80211_network *src,
- struct ieee80211_network *dst, struct ieee80211_device* ieee)
+ struct ieee80211_network *dst, struct ieee80211_device *ieee)
{
/* A network is only a duplicate if the channel, BSSID, ESSID
* and the capability field (in particular IBSS and BSS) all match.
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
index 454f8895d211..8a0075db9253 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
@@ -688,7 +688,7 @@ inline struct sk_buff *ieee80211_authentication_req(struct ieee80211_network *be
}
-static struct sk_buff* ieee80211_probe_resp(struct ieee80211_device *ieee, u8 *dest)
+static struct sk_buff *ieee80211_probe_resp(struct ieee80211_device *ieee, u8 *dest)
{
u8 *tag;
int beacon_size;
@@ -696,7 +696,7 @@ static struct sk_buff* ieee80211_probe_resp(struct ieee80211_device *ieee, u8 *d
struct sk_buff *skb = NULL;
int encrypt;
int atim_len,erp_len;
- struct ieee80211_crypt_data* crypt;
+ struct ieee80211_crypt_data *crypt;
char *ssid = ieee->current_network.ssid;
int ssid_len = ieee->current_network.ssid_len;
@@ -705,12 +705,12 @@ static struct sk_buff* ieee80211_probe_resp(struct ieee80211_device *ieee, u8 *d
int wpa_ie_len = ieee->wpa_ie_len;
u8 erpinfo_content = 0;
- u8* tmp_ht_cap_buf;
+ u8 *tmp_ht_cap_buf;
u8 tmp_ht_cap_len=0;
- u8* tmp_ht_info_buf;
+ u8 *tmp_ht_info_buf;
u8 tmp_ht_info_len=0;
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
- u8* tmp_generic_ie_buf=NULL;
+ u8 *tmp_generic_ie_buf=NULL;
u8 tmp_generic_ie_len=0;
if(rate_ex_len > 0) rate_ex_len+=2;
@@ -732,9 +732,9 @@ static struct sk_buff* ieee80211_probe_resp(struct ieee80211_device *ieee, u8 *d
encrypt = ieee->host_encrypt && crypt && crypt->ops &&
((0 == strcmp(crypt->ops->name, "WEP") || wpa_ie_len));
//HT ralated element
- tmp_ht_cap_buf =(u8*) &(ieee->pHTInfo->SelfHTCap);
+ tmp_ht_cap_buf =(u8 *) &(ieee->pHTInfo->SelfHTCap);
tmp_ht_cap_len = sizeof(ieee->pHTInfo->SelfHTCap);
- tmp_ht_info_buf =(u8*) &(ieee->pHTInfo->SelfHTInfo);
+ tmp_ht_info_buf =(u8 *) &(ieee->pHTInfo->SelfHTInfo);
tmp_ht_info_len = sizeof(ieee->pHTInfo->SelfHTInfo);
HTConstructCapabilityElement(ieee, tmp_ht_cap_buf, &tmp_ht_cap_len,encrypt);
HTConstructInfoElement(ieee,tmp_ht_info_buf,&tmp_ht_info_len, encrypt);
@@ -764,7 +764,7 @@ static struct sk_buff* ieee80211_probe_resp(struct ieee80211_device *ieee, u8 *d
if (!skb)
return NULL;
skb_reserve(skb, ieee->tx_headroom);
- beacon_buf = (struct ieee80211_probe_response*) skb_put(skb, (beacon_size - ieee->tx_headroom));
+ beacon_buf = (struct ieee80211_probe_response *) skb_put(skb, (beacon_size - ieee->tx_headroom));
memcpy (beacon_buf->header.addr1, dest,ETH_ALEN);
memcpy (beacon_buf->header.addr2, ieee->dev->dev_addr, ETH_ALEN);
memcpy (beacon_buf->header.addr3, ieee->current_network.bssid, ETH_ALEN);
@@ -789,7 +789,7 @@ static struct sk_buff* ieee80211_probe_resp(struct ieee80211_device *ieee, u8 *d
beacon_buf->info_element[0].id = MFIE_TYPE_SSID;
beacon_buf->info_element[0].len = ssid_len;
- tag = (u8*) beacon_buf->info_element[0].data;
+ tag = (u8 *) beacon_buf->info_element[0].data;
memcpy(tag, ssid, ssid_len);
@@ -841,12 +841,12 @@ static struct sk_buff* ieee80211_probe_resp(struct ieee80211_device *ieee, u8 *d
}
-struct sk_buff* ieee80211_assoc_resp(struct ieee80211_device *ieee, u8 *dest)
+struct sk_buff *ieee80211_assoc_resp(struct ieee80211_device *ieee, u8 *dest)
{
struct sk_buff *skb;
- u8* tag;
+ u8 *tag;
- struct ieee80211_crypt_data* crypt;
+ struct ieee80211_crypt_data *crypt;
struct ieee80211_assoc_response_frame *assoc;
short encrypt;
@@ -888,7 +888,7 @@ struct sk_buff* ieee80211_assoc_resp(struct ieee80211_device *ieee, u8 *dest)
if (ieee->assoc_id == 0x2007) ieee->assoc_id=0;
else ieee->assoc_id++;
- tag = (u8*) skb_put(skb, rate_len);
+ tag = (u8 *) skb_put(skb, rate_len);
ieee80211_MFIE_Brate(ieee, &tag);
ieee80211_MFIE_Grate(ieee, &tag);
@@ -896,7 +896,7 @@ struct sk_buff* ieee80211_assoc_resp(struct ieee80211_device *ieee, u8 *dest)
return skb;
}
-struct sk_buff* ieee80211_auth_resp(struct ieee80211_device *ieee,int status, u8 *dest)
+struct sk_buff *ieee80211_auth_resp(struct ieee80211_device *ieee,int status, u8 *dest)
{
struct sk_buff *skb;
struct ieee80211_authentication *auth;
@@ -924,17 +924,17 @@ struct sk_buff* ieee80211_auth_resp(struct ieee80211_device *ieee,int status, u8
}
-struct sk_buff* ieee80211_null_func(struct ieee80211_device *ieee,short pwr)
+struct sk_buff *ieee80211_null_func(struct ieee80211_device *ieee,short pwr)
{
struct sk_buff *skb;
- struct ieee80211_hdr_3addr* hdr;
+ struct ieee80211_hdr_3addr *hdr;
skb = dev_alloc_skb(sizeof(struct ieee80211_hdr_3addr));
if (!skb)
return NULL;
- hdr = (struct ieee80211_hdr_3addr*)skb_put(skb,sizeof(struct ieee80211_hdr_3addr));
+ hdr = (struct ieee80211_hdr_3addr *)skb_put(skb,sizeof(struct ieee80211_hdr_3addr));
memcpy(hdr->addr1, ieee->current_network.bssid, ETH_ALEN);
memcpy(hdr->addr2, ieee->dev->dev_addr, ETH_ALEN);
@@ -950,7 +950,7 @@ struct sk_buff* ieee80211_null_func(struct ieee80211_device *ieee,short pwr)
}
-void ieee80211_resp_to_assoc_rq(struct ieee80211_device *ieee, u8* dest)
+void ieee80211_resp_to_assoc_rq(struct ieee80211_device *ieee, u8 *dest)
{
struct sk_buff *buf = ieee80211_assoc_resp(ieee, dest);
@@ -959,7 +959,7 @@ void ieee80211_resp_to_assoc_rq(struct ieee80211_device *ieee, u8* dest)
}
-void ieee80211_resp_to_auth(struct ieee80211_device *ieee, int s, u8* dest)
+void ieee80211_resp_to_auth(struct ieee80211_device *ieee, int s, u8 *dest)
{
struct sk_buff *buf = ieee80211_auth_resp(ieee, s, dest);
@@ -991,15 +991,15 @@ inline struct sk_buff *ieee80211_association_req(struct ieee80211_network *beaco
//u8 suit_select = 0;
//unsigned int wpa_len = beacon->wpa_ie_len;
//for HT
- u8* ht_cap_buf = NULL;
+ u8 *ht_cap_buf = NULL;
u8 ht_cap_len=0;
- u8* realtek_ie_buf=NULL;
+ u8 *realtek_ie_buf=NULL;
u8 realtek_ie_len=0;
int wpa_ie_len= ieee->wpa_ie_len;
unsigned int ckip_ie_len=0;
unsigned int ccxrm_ie_len=0;
unsigned int cxvernum_ie_len=0;
- struct ieee80211_crypt_data* crypt;
+ struct ieee80211_crypt_data *crypt;
int encrypt;
unsigned int rate_len = ieee80211_MFIE_rate_len(ieee);
@@ -1016,7 +1016,7 @@ inline struct sk_buff *ieee80211_association_req(struct ieee80211_network *beaco
//Include High Throuput capability && Realtek proprietary
if(ieee->pHTInfo->bCurrentHTSupport&&ieee->pHTInfo->bEnableHT)
{
- ht_cap_buf = (u8*)&(ieee->pHTInfo->SelfHTCap);
+ ht_cap_buf = (u8 *)&(ieee->pHTInfo->SelfHTCap);
ht_cap_len = sizeof(ieee->pHTInfo->SelfHTCap);
HTConstructCapabilityElement(ieee, ht_cap_buf, &ht_cap_len, encrypt);
if(ieee->pHTInfo->bCurrentRT2RTAggregation)
@@ -1314,7 +1314,7 @@ void ieee80211_auth_challenge(struct ieee80211_device *ieee, u8 *challenge, int
void ieee80211_associate_step2(struct ieee80211_device *ieee)
{
- struct sk_buff* skb;
+ struct sk_buff *skb;
struct ieee80211_network *beacon = &ieee->current_network;
del_timer_sync(&ieee->associate_timer);
@@ -1536,7 +1536,7 @@ void ieee80211_softmac_check_all_nets(struct ieee80211_device *ieee)
}
-static inline u16 auth_parse(struct sk_buff *skb, u8** challenge, int *chlen)
+static inline u16 auth_parse(struct sk_buff *skb, u8 **challenge, int *chlen)
{
struct ieee80211_authentication *a;
u8 *t;
@@ -1545,7 +1545,7 @@ static inline u16 auth_parse(struct sk_buff *skb, u8** challenge, int *chlen)
return 0xcafe;
}
*challenge = NULL;
- a = (struct ieee80211_authentication*) skb->data;
+ a = (struct ieee80211_authentication *) skb->data;
if(skb->len > (sizeof(struct ieee80211_authentication) +3)){
t = skb->data + sizeof(struct ieee80211_authentication);
@@ -1562,7 +1562,7 @@ static inline u16 auth_parse(struct sk_buff *skb, u8** challenge, int *chlen)
}
-int auth_rq_parse(struct sk_buff *skb,u8* dest)
+int auth_rq_parse(struct sk_buff *skb,u8 *dest)
{
struct ieee80211_authentication *a;
@@ -1570,7 +1570,7 @@ int auth_rq_parse(struct sk_buff *skb,u8* dest)
IEEE80211_DEBUG_MGMT("invalid len in auth request: %d\n",skb->len);
return -1;
}
- a = (struct ieee80211_authentication*) skb->data;
+ a = (struct ieee80211_authentication *) skb->data;
memcpy(dest,a->header.addr2, ETH_ALEN);
@@ -1595,7 +1595,7 @@ static short probe_rq_parse(struct ieee80211_device *ieee, struct sk_buff *skb,
memcpy(src,header->addr2, ETH_ALEN);
- skbend = (u8*)skb->data + skb->len;
+ skbend = (u8 *)skb->data + skb->len;
tag = skb->data + sizeof (struct ieee80211_hdr_3addr );
@@ -1618,7 +1618,7 @@ static short probe_rq_parse(struct ieee80211_device *ieee, struct sk_buff *skb,
}
-int assoc_rq_parse(struct sk_buff *skb,u8* dest)
+int assoc_rq_parse(struct sk_buff *skb,u8 *dest)
{
struct ieee80211_assoc_request_frame *a;
@@ -1629,7 +1629,7 @@ int assoc_rq_parse(struct sk_buff *skb,u8* dest)
return -1;
}
- a = (struct ieee80211_assoc_request_frame*) skb->data;
+ a = (struct ieee80211_assoc_request_frame *) skb->data;
memcpy(dest,a->header.addr2,ETH_ALEN);
@@ -1646,7 +1646,7 @@ static inline u16 assoc_parse(struct ieee80211_device *ieee, struct sk_buff *skb
return 0xcafe;
}
- response_head = (struct ieee80211_assoc_response_frame*) skb->data;
+ response_head = (struct ieee80211_assoc_response_frame *) skb->data;
*aid = le16_to_cpu(response_head->aid) & 0x3fff;
status_code = le16_to_cpu(response_head->status);
@@ -1888,10 +1888,10 @@ void ieee80211_ps_tx_ack(struct ieee80211_device *ieee, short success)
}
spin_unlock_irqrestore(&ieee->lock, flags);
}
-void ieee80211_process_action(struct ieee80211_device* ieee, struct sk_buff* skb)
+void ieee80211_process_action(struct ieee80211_device *ieee, struct sk_buff *skb)
{
- struct ieee80211_hdr* header = (struct ieee80211_hdr*)skb->data;
- u8* act = ieee80211_get_payload(header);
+ struct ieee80211_hdr *header = (struct ieee80211_hdr *)skb->data;
+ u8 *act = ieee80211_get_payload(header);
u8 tmp = 0;
// IEEE80211_DEBUG_DATA(IEEE80211_DL_DATA|IEEE80211_DL_BA, skb->data, skb->len);
if (act == NULL)
@@ -1926,7 +1926,7 @@ ieee80211_rx_frame_softmac(struct ieee80211_device *ieee, struct sk_buff *skb,
{
struct ieee80211_hdr_3addr *header = (struct ieee80211_hdr_3addr *) skb->data;
u16 errcode;
- u8* challenge;
+ u8 *challenge;
int chlen=0;
int aid;
struct ieee80211_assoc_response_frame *assoc_resp;
@@ -1966,7 +1966,7 @@ ieee80211_rx_frame_softmac(struct ieee80211_device *ieee, struct sk_buff *skb,
/* station support qos */
/* Let the register setting defaultly with Legacy station */
if(ieee->qos_support) {
- assoc_resp = (struct ieee80211_assoc_response_frame*)skb->data;
+ assoc_resp = (struct ieee80211_assoc_response_frame *)skb->data;
memset(network, 0, sizeof(*network));
if (ieee80211_parse_info_param(ieee,assoc_resp->info_element,\
rx_stats->len - sizeof(*assoc_resp),\
@@ -1979,7 +1979,7 @@ ieee80211_rx_frame_softmac(struct ieee80211_device *ieee, struct sk_buff *skb,
memcpy(ieee->pHTInfo->PeerHTInfoBuf, network->bssht.bdHTInfoBuf, network->bssht.bdHTInfoLen);
}
if (ieee->handle_assoc_response != NULL)
- ieee->handle_assoc_response(ieee->dev, (struct ieee80211_assoc_response_frame*)header, network);
+ ieee->handle_assoc_response(ieee->dev, (struct ieee80211_assoc_response_frame *)header, network);
}
ieee80211_associate_complete(ieee);
} else {
@@ -3124,7 +3124,7 @@ inline struct sk_buff *ieee80211_disassociate_skb(
void
SendDisassociation(
struct ieee80211_device *ieee,
- u8* asSta,
+ u8 *asSta,
u8 asRsn
)
{
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_tx.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_tx.c
index c39e680bb0ac..995504207fc6 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_tx.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_tx.c
@@ -183,7 +183,7 @@ int ieee80211_encrypt_fragment(
struct sk_buff *frag,
int hdr_len)
{
- struct ieee80211_crypt_data* crypt = ieee->crypt[ieee->tx_keyidx];
+ struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx];
int res;
if (!(crypt && crypt->ops))
@@ -243,7 +243,7 @@ struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size,
struct ieee80211_txb *txb;
int i;
txb = kmalloc(
- sizeof(struct ieee80211_txb) + (sizeof(u8*) * nr_frags),
+ sizeof(struct ieee80211_txb) + (sizeof(u8 *) * nr_frags),
gfp_mask);
if (!txb)
return NULL;
@@ -303,11 +303,11 @@ ieee80211_classify(struct sk_buff *skb, struct ieee80211_network *network)
}
#define SN_LESS(a, b) (((a-b)&0x800)!=0)
-void ieee80211_tx_query_agg_cap(struct ieee80211_device* ieee, struct sk_buff* skb, cb_desc* tcb_desc)
+void ieee80211_tx_query_agg_cap(struct ieee80211_device *ieee, struct sk_buff *skb, cb_desc *tcb_desc)
{
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
PTX_TS_RECORD pTxTs = NULL;
- struct ieee80211_hdr_1addr* hdr = (struct ieee80211_hdr_1addr*)skb->data;
+ struct ieee80211_hdr_1addr *hdr = (struct ieee80211_hdr_1addr *)skb->data;
if (!pHTInfo->bCurrentHTSupport||!pHTInfo->bEnableHT)
return;
@@ -330,7 +330,7 @@ void ieee80211_tx_query_agg_cap(struct ieee80211_device* ieee, struct sk_buff* s
}
if(pHTInfo->bCurrentAMPDUEnable)
{
- if (!GetTs(ieee, (PTS_COMMON_INFO*)(&pTxTs), hdr->addr1, skb->priority, TX_DIR, true))
+ if (!GetTs(ieee, (PTS_COMMON_INFO *)(&pTxTs), hdr->addr1, skb->priority, TX_DIR, true))
{
printk("===>can't get TS\n");
return;
@@ -356,7 +356,7 @@ void ieee80211_tx_query_agg_cap(struct ieee80211_device* ieee, struct sk_buff* s
}
}
FORCED_AGG_SETTING:
- switch(pHTInfo->ForcedAMPDUMode )
+ switch (pHTInfo->ForcedAMPDUMode )
{
case HT_AGG_AUTO:
break;
@@ -377,7 +377,7 @@ FORCED_AGG_SETTING:
return;
}
-extern void ieee80211_qurey_ShortPreambleMode(struct ieee80211_device* ieee, cb_desc* tcb_desc)
+extern void ieee80211_qurey_ShortPreambleMode(struct ieee80211_device *ieee, cb_desc *tcb_desc)
{
tcb_desc->bUseShortPreamble = false;
if (tcb_desc->data_rate == 2)
@@ -412,7 +412,7 @@ ieee80211_query_HTCapShortGI(struct ieee80211_device *ieee, cb_desc *tcb_desc)
tcb_desc->bUseShortGI = true;
}
-void ieee80211_query_BandwidthMode(struct ieee80211_device* ieee, cb_desc *tcb_desc)
+void ieee80211_query_BandwidthMode(struct ieee80211_device *ieee, cb_desc *tcb_desc)
{
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
@@ -432,7 +432,7 @@ void ieee80211_query_BandwidthMode(struct ieee80211_device* ieee, cb_desc *tcb_d
return;
}
-void ieee80211_query_protectionmode(struct ieee80211_device* ieee, cb_desc* tcb_desc, struct sk_buff* skb)
+void ieee80211_query_protectionmode(struct ieee80211_device *ieee, cb_desc *tcb_desc, struct sk_buff *skb)
{
// Common Settings
tcb_desc->bRTSSTBC = false;
@@ -543,7 +543,7 @@ NO_PROTECTION:
}
-void ieee80211_txrate_selectmode(struct ieee80211_device* ieee, cb_desc* tcb_desc)
+void ieee80211_txrate_selectmode(struct ieee80211_device *ieee, cb_desc *tcb_desc)
{
#ifdef TO_DO_LIST
if(!IsDataFrame(pFrame))
@@ -573,14 +573,14 @@ void ieee80211_txrate_selectmode(struct ieee80211_device* ieee, cb_desc* tcb_des
}
}
-void ieee80211_query_seqnum(struct ieee80211_device*ieee, struct sk_buff* skb, u8* dst)
+void ieee80211_query_seqnum(struct ieee80211_device *ieee, struct sk_buff *skb, u8 *dst)
{
if (is_multicast_ether_addr(dst))
return;
if (IsQoSDataFrame(skb->data)) //we deal qos data only
{
PTX_TS_RECORD pTS = NULL;
- if (!GetTs(ieee, (PTS_COMMON_INFO*)(&pTS), dst, skb->priority, TX_DIR, true))
+ if (!GetTs(ieee, (PTS_COMMON_INFO *)(&pTS), dst, skb->priority, TX_DIR, true))
{
return;
}
@@ -607,7 +607,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
u8 dest[ETH_ALEN], src[ETH_ALEN];
int qos_actived = ieee->current_network.qos_data.active;
- struct ieee80211_crypt_data* crypt;
+ struct ieee80211_crypt_data *crypt;
cb_desc *tcb_desc;
diff --git a/drivers/staging/rtl8192u/ieee80211/rtl819x_BAProc.c b/drivers/staging/rtl8192u/ieee80211/rtl819x_BAProc.c
index 69735d320315..db0db9347487 100644
--- a/drivers/staging/rtl8192u/ieee80211/rtl819x_BAProc.c
+++ b/drivers/staging/rtl8192u/ieee80211/rtl819x_BAProc.c
@@ -13,7 +13,7 @@
* u16 Time //indicate time delay.
* output: none
********************************************************************************************************************/
-void ActivateBAEntry(struct ieee80211_device* ieee, PBA_RECORD pBA, u16 Time)
+void ActivateBAEntry(struct ieee80211_device *ieee, PBA_RECORD pBA, u16 Time)
{
pBA->bValid = true;
if(Time != 0)
@@ -25,7 +25,7 @@ void ActivateBAEntry(struct ieee80211_device* ieee, PBA_RECORD pBA, u16 Time)
* input: PBA_RECORD pBA //BA entry to be disabled
* output: none
********************************************************************************************************************/
-void DeActivateBAEntry( struct ieee80211_device* ieee, PBA_RECORD pBA)
+void DeActivateBAEntry( struct ieee80211_device *ieee, PBA_RECORD pBA)
{
pBA->bValid = false;
del_timer_sync(&pBA->Timer);
@@ -37,7 +37,7 @@ void DeActivateBAEntry( struct ieee80211_device* ieee, PBA_RECORD pBA)
* output: none
* notice: As PTX_TS_RECORD structure will be defined in QOS, so wait to be merged. //FIXME
********************************************************************************************************************/
-u8 TxTsDeleteBA( struct ieee80211_device* ieee, PTX_TS_RECORD pTxTs)
+u8 TxTsDeleteBA( struct ieee80211_device *ieee, PTX_TS_RECORD pTxTs)
{
PBA_RECORD pAdmittedBa = &pTxTs->TxAdmittedBARecord; //These two BA entries must exist in TS structure
PBA_RECORD pPendingBa = &pTxTs->TxPendingBARecord;
@@ -67,7 +67,7 @@ u8 TxTsDeleteBA( struct ieee80211_device* ieee, PTX_TS_RECORD pTxTs)
* output: none
* notice: As PRX_TS_RECORD structure will be defined in QOS, so wait to be merged. //FIXME, same with above
********************************************************************************************************************/
-u8 RxTsDeleteBA( struct ieee80211_device* ieee, PRX_TS_RECORD pRxTs)
+u8 RxTsDeleteBA( struct ieee80211_device *ieee, PRX_TS_RECORD pRxTs)
{
PBA_RECORD pBa = &pRxTs->RxAdmittedBARecord;
u8 bSendDELBA = false;
@@ -105,11 +105,11 @@ void ResetBaEntry( PBA_RECORD pBA)
* output: none
* return: sk_buff* skb //return constructed skb to xmit
*******************************************************************************************************************************/
-static struct sk_buff* ieee80211_ADDBA(struct ieee80211_device* ieee, u8* Dst, PBA_RECORD pBA, u16 StatusCode, u8 type)
+static struct sk_buff *ieee80211_ADDBA(struct ieee80211_device *ieee, u8 *Dst, PBA_RECORD pBA, u16 StatusCode, u8 type)
{
struct sk_buff *skb = NULL;
- struct ieee80211_hdr_3addr* BAReq = NULL;
- u8* tag = NULL;
+ struct ieee80211_hdr_3addr *BAReq = NULL;
+ u8 *tag = NULL;
u16 tmp = 0;
u16 len = ieee->tx_headroom + 9;
//category(1) + action field(1) + Dialog Token(1) + BA Parameter Set(2) + BA Timeout Value(2) + BA Start SeqCtrl(2)(or StatusCode(2))
@@ -139,7 +139,7 @@ static struct sk_buff* ieee80211_ADDBA(struct ieee80211_device* ieee, u8* Dst, P
BAReq->frame_ctl = cpu_to_le16(IEEE80211_STYPE_MANAGE_ACT); //action frame
//tag += sizeof( struct ieee80211_hdr_3addr); //move to action field
- tag = (u8*)skb_put(skb, 9);
+ tag = (u8 *)skb_put(skb, 9);
*tag ++= ACT_CAT_BA;
*tag ++= type;
// Dialog Token
@@ -150,22 +150,22 @@ static struct sk_buff* ieee80211_ADDBA(struct ieee80211_device* ieee, u8* Dst, P
// Status Code
printk("=====>to send ADDBARSP\n");
tmp = cpu_to_le16(StatusCode);
- memcpy(tag, (u8*)&tmp, 2);
+ memcpy(tag, (u8 *)&tmp, 2);
tag += 2;
}
// BA Parameter Set
tmp = cpu_to_le16(pBA->BaParamSet.shortData);
- memcpy(tag, (u8*)&tmp, 2);
+ memcpy(tag, (u8 *)&tmp, 2);
tag += 2;
// BA Timeout Value
tmp = cpu_to_le16(pBA->BaTimeoutValue);
- memcpy(tag, (u8*)&tmp, 2);
+ memcpy(tag, (u8 *)&tmp, 2);
tag += 2;
if (ACT_ADDBAREQ == type)
{
// BA Start SeqCtrl
- memcpy(tag,(u8*)&(pBA->BaStartSeqCtrl), 2);
+ memcpy(tag,(u8 *)&(pBA->BaStartSeqCtrl), 2);
tag += 2;
}
@@ -184,9 +184,9 @@ static struct sk_buff* ieee80211_ADDBA(struct ieee80211_device* ieee, u8* Dst, P
* output: none
* return: sk_buff* skb //return constructed skb to xmit
********************************************************************************************************************/
-static struct sk_buff* ieee80211_DELBA(
- struct ieee80211_device* ieee,
- u8* dst,
+static struct sk_buff *ieee80211_DELBA(
+ struct ieee80211_device *ieee,
+ u8 *dst,
PBA_RECORD pBA,
TR_SELECT TxRxSelect,
u16 ReasonCode
@@ -194,8 +194,8 @@ static struct sk_buff* ieee80211_DELBA(
{
DELBA_PARAM_SET DelbaParamSet;
struct sk_buff *skb = NULL;
- struct ieee80211_hdr_3addr* Delba = NULL;
- u8* tag = NULL;
+ struct ieee80211_hdr_3addr *Delba = NULL;
+ u8 *tag = NULL;
u16 tmp = 0;
//len = head len + DELBA Parameter Set(2) + Reason Code(2)
u16 len = 6 + ieee->tx_headroom;
@@ -224,18 +224,18 @@ static struct sk_buff* ieee80211_DELBA(
memcpy(Delba->addr3, ieee->current_network.bssid, ETH_ALEN);
Delba->frame_ctl = cpu_to_le16(IEEE80211_STYPE_MANAGE_ACT); //action frame
- tag = (u8*)skb_put(skb, 6);
+ tag = (u8 *)skb_put(skb, 6);
*tag ++= ACT_CAT_BA;
*tag ++= ACT_DELBA;
// DELBA Parameter Set
tmp = cpu_to_le16(DelbaParamSet.shortData);
- memcpy(tag, (u8*)&tmp, 2);
+ memcpy(tag, (u8 *)&tmp, 2);
tag += 2;
// Reason Code
tmp = cpu_to_le16(ReasonCode);
- memcpy(tag, (u8*)&tmp, 2);
+ memcpy(tag, (u8 *)&tmp, 2);
tag += 2;
IEEE80211_DEBUG_DATA(IEEE80211_DL_DATA|IEEE80211_DL_BA, skb->data, skb->len);
@@ -251,7 +251,7 @@ static struct sk_buff* ieee80211_DELBA(
* output: none
* notice: If any possible, please hide pBA in ieee. And temporarily use Manage Queue as softmac_mgmt_xmit() usually does
********************************************************************************************************************/
-void ieee80211_send_ADDBAReq(struct ieee80211_device* ieee, u8* dst, PBA_RECORD pBA)
+void ieee80211_send_ADDBAReq(struct ieee80211_device *ieee, u8 *dst, PBA_RECORD pBA)
{
struct sk_buff *skb = NULL;
skb = ieee80211_ADDBA(ieee, dst, pBA, 0, ACT_ADDBAREQ); //construct ACT_ADDBAREQ frames so set statuscode zero.
@@ -278,7 +278,7 @@ void ieee80211_send_ADDBAReq(struct ieee80211_device* ieee, u8* dst, PBA_RECORD
* output: none
* notice: If any possible, please hide pBA in ieee. And temporarily use Manage Queue as softmac_mgmt_xmit() usually does
********************************************************************************************************************/
-void ieee80211_send_ADDBARsp(struct ieee80211_device* ieee, u8* dst, PBA_RECORD pBA, u16 StatusCode)
+void ieee80211_send_ADDBARsp(struct ieee80211_device *ieee, u8 *dst, PBA_RECORD pBA, u16 StatusCode)
{
struct sk_buff *skb = NULL;
skb = ieee80211_ADDBA(ieee, dst, pBA, StatusCode, ACT_ADDBARSP); //construct ACT_ADDBARSP frames
@@ -305,7 +305,7 @@ void ieee80211_send_ADDBARsp(struct ieee80211_device* ieee, u8* dst, PBA_RECORD
* notice: If any possible, please hide pBA in ieee. And temporarily use Manage Queue as softmac_mgmt_xmit() usually does
********************************************************************************************************************/
-void ieee80211_send_DELBA(struct ieee80211_device* ieee, u8* dst, PBA_RECORD pBA, TR_SELECT TxRxSelect, u16 ReasonCode)
+void ieee80211_send_DELBA(struct ieee80211_device *ieee, u8 *dst, PBA_RECORD pBA, TR_SELECT TxRxSelect, u16 ReasonCode)
{
struct sk_buff *skb = NULL;
skb = ieee80211_DELBA(ieee, dst, pBA, TxRxSelect, ReasonCode); //construct ACT_ADDBARSP frames
@@ -327,14 +327,14 @@ void ieee80211_send_DELBA(struct ieee80211_device* ieee, u8* dst, PBA_RECORD pBA
* return: 0(pass), other(fail)
* notice: As this function need support of QOS, I comment some code out. And when qos is ready, this code need to be support.
********************************************************************************************************************/
-int ieee80211_rx_ADDBAReq( struct ieee80211_device* ieee, struct sk_buff *skb)
+int ieee80211_rx_ADDBAReq( struct ieee80211_device *ieee, struct sk_buff *skb)
{
- struct ieee80211_hdr_3addr* req = NULL;
+ struct ieee80211_hdr_3addr *req = NULL;
u16 rc = 0;
- u8 * dst = NULL, *pDialogToken = NULL, *tag = NULL;
+ u8 *dst = NULL, *pDialogToken = NULL, *tag = NULL;
PBA_RECORD pBA = NULL;
PBA_PARAM_SET pBaParamSet = NULL;
- u16* pBaTimeoutVal = NULL;
+ u16 *pBaTimeoutVal = NULL;
PSEQUENCE_CONTROL pBaStartSeqCtrl = NULL;
PRX_TS_RECORD pTS = NULL;
@@ -346,13 +346,13 @@ int ieee80211_rx_ADDBAReq( struct ieee80211_device* ieee, struct sk_buff *skb)
IEEE80211_DEBUG_DATA(IEEE80211_DL_DATA|IEEE80211_DL_BA, skb->data, skb->len);
- req = ( struct ieee80211_hdr_3addr*) skb->data;
- tag = (u8*)req;
- dst = (u8*)(&req->addr2[0]);
+ req = ( struct ieee80211_hdr_3addr *) skb->data;
+ tag = (u8 *)req;
+ dst = (u8 *)(&req->addr2[0]);
tag += sizeof( struct ieee80211_hdr_3addr);
pDialogToken = tag + 2; //category+action
pBaParamSet = (PBA_PARAM_SET)(tag + 3); //+DialogToken
- pBaTimeoutVal = (u16*)(tag + 5);
+ pBaTimeoutVal = (u16 *)(tag + 5);
pBaStartSeqCtrl = (PSEQUENCE_CONTROL)(req + 7);
printk("====================>rx ADDBAREQ from :%pM\n", dst);
@@ -369,7 +369,7 @@ int ieee80211_rx_ADDBAReq( struct ieee80211_device* ieee, struct sk_buff *skb)
// If there is no matched TS, reject the ADDBA request.
if( !GetTs(
ieee,
- (PTS_COMMON_INFO*)(&pTS),
+ (PTS_COMMON_INFO *)(&pTS),
dst,
(u8)(pBaParamSet->field.TID),
RX_DIR,
@@ -427,13 +427,13 @@ OnADDBAReq_Fail:
* return: 0(pass), other(fail)
* notice: As this function need support of QOS, I comment some code out. And when qos is ready, this code need to be support.
********************************************************************************************************************/
-int ieee80211_rx_ADDBARsp( struct ieee80211_device* ieee, struct sk_buff *skb)
+int ieee80211_rx_ADDBARsp( struct ieee80211_device *ieee, struct sk_buff *skb)
{
- struct ieee80211_hdr_3addr* rsp = NULL;
+ struct ieee80211_hdr_3addr *rsp = NULL;
PBA_RECORD pPendingBA, pAdmittedBA;
PTX_TS_RECORD pTS = NULL;
- u8* dst = NULL, *pDialogToken = NULL, *tag = NULL;
- u16* pStatusCode = NULL, *pBaTimeoutVal = NULL;
+ u8 *dst = NULL, *pDialogToken = NULL, *tag = NULL;
+ u16 *pStatusCode = NULL, *pBaTimeoutVal = NULL;
PBA_PARAM_SET pBaParamSet = NULL;
u16 ReasonCode;
@@ -442,14 +442,14 @@ int ieee80211_rx_ADDBARsp( struct ieee80211_device* ieee, struct sk_buff *skb)
IEEE80211_DEBUG(IEEE80211_DL_ERR, " Invalid skb len in BARSP(%d / %zu)\n", skb->len, (sizeof( struct ieee80211_hdr_3addr) + 9));
return -1;
}
- rsp = ( struct ieee80211_hdr_3addr*)skb->data;
- tag = (u8*)rsp;
- dst = (u8*)(&rsp->addr2[0]);
+ rsp = ( struct ieee80211_hdr_3addr *)skb->data;
+ tag = (u8 *)rsp;
+ dst = (u8 *)(&rsp->addr2[0]);
tag += sizeof( struct ieee80211_hdr_3addr);
pDialogToken = tag + 2;
- pStatusCode = (u16*)(tag + 3);
+ pStatusCode = (u16 *)(tag + 3);
pBaParamSet = (PBA_PARAM_SET)(tag + 5);
- pBaTimeoutVal = (u16*)(tag + 7);
+ pBaTimeoutVal = (u16 *)(tag + 7);
// Check the capability
// Since we can always receive A-MPDU, we just check if it is under HT mode.
@@ -469,7 +469,7 @@ int ieee80211_rx_ADDBARsp( struct ieee80211_device* ieee, struct sk_buff *skb)
//
if (!GetTs(
ieee,
- (PTS_COMMON_INFO*)(&pTS),
+ (PTS_COMMON_INFO *)(&pTS),
dst,
(u8)(pBaParamSet->field.TID),
TX_DIR,
@@ -560,12 +560,12 @@ OnADDBARsp_Reject:
* return: 0(pass), other(fail)
* notice: As this function need support of QOS, I comment some code out. And when qos is ready, this code need to be support.
********************************************************************************************************************/
-int ieee80211_rx_DELBA(struct ieee80211_device* ieee,struct sk_buff *skb)
+int ieee80211_rx_DELBA(struct ieee80211_device *ieee,struct sk_buff *skb)
{
- struct ieee80211_hdr_3addr* delba = NULL;
+ struct ieee80211_hdr_3addr *delba = NULL;
PDELBA_PARAM_SET pDelBaParamSet = NULL;
- u16* pReasonCode = NULL;
- u8* dst = NULL;
+ u16 *pReasonCode = NULL;
+ u8 *dst = NULL;
if (skb->len < sizeof( struct ieee80211_hdr_3addr) + 6)
{
@@ -581,11 +581,11 @@ int ieee80211_rx_DELBA(struct ieee80211_device* ieee,struct sk_buff *skb)
}
IEEE80211_DEBUG_DATA(IEEE80211_DL_DATA|IEEE80211_DL_BA, skb->data, skb->len);
- delba = ( struct ieee80211_hdr_3addr*)skb->data;
- dst = (u8*)(&delba->addr2[0]);
+ delba = ( struct ieee80211_hdr_3addr *)skb->data;
+ dst = (u8 *)(&delba->addr2[0]);
delba += sizeof( struct ieee80211_hdr_3addr);
pDelBaParamSet = (PDELBA_PARAM_SET)(delba+2);
- pReasonCode = (u16*)(delba+4);
+ pReasonCode = (u16 *)(delba+4);
if(pDelBaParamSet->field.Initiator == 1)
{
@@ -593,7 +593,7 @@ int ieee80211_rx_DELBA(struct ieee80211_device* ieee,struct sk_buff *skb)
if( !GetTs(
ieee,
- (PTS_COMMON_INFO*)&pRxTs,
+ (PTS_COMMON_INFO *)&pRxTs,
dst,
(u8)pDelBaParamSet->field.TID,
RX_DIR,
@@ -611,7 +611,7 @@ int ieee80211_rx_DELBA(struct ieee80211_device* ieee,struct sk_buff *skb)
if(!GetTs(
ieee,
- (PTS_COMMON_INFO*)&pTxTs,
+ (PTS_COMMON_INFO *)&pTxTs,
dst,
(u8)pDelBaParamSet->field.TID,
TX_DIR,
@@ -636,7 +636,7 @@ int ieee80211_rx_DELBA(struct ieee80211_device* ieee,struct sk_buff *skb)
//
void
TsInitAddBA(
- struct ieee80211_device* ieee,
+ struct ieee80211_device *ieee,
PTX_TS_RECORD pTS,
u8 Policy,
u8 bOverwritePending
@@ -665,7 +665,7 @@ TsInitAddBA(
}
void
-TsInitDelBA( struct ieee80211_device* ieee, PTS_COMMON_INFO pTsCommonInfo, TR_SELECT TxRxSelect)
+TsInitDelBA( struct ieee80211_device *ieee, PTS_COMMON_INFO pTsCommonInfo, TR_SELECT TxRxSelect)
{
if(TxRxSelect == TX_DIR)
diff --git a/drivers/staging/rtl8192u/ieee80211/rtl819x_HTProc.c b/drivers/staging/rtl8192u/ieee80211/rtl819x_HTProc.c
index 268b270e9495..e956da5a2d76 100644
--- a/drivers/staging/rtl8192u/ieee80211/rtl819x_HTProc.c
+++ b/drivers/staging/rtl8192u/ieee80211/rtl819x_HTProc.c
@@ -51,7 +51,7 @@ static u8 CISCO_BROADCOM[3] = {0x00, 0x17, 0x94};
* return: none
* notice: These value need be modified if any changes.
* *****************************************************************************************************************/
-void HTUpdateDefaultSetting(struct ieee80211_device* ieee)
+void HTUpdateDefaultSetting(struct ieee80211_device *ieee)
{
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
//const typeof( ((struct ieee80211_device *)0)->pHTInfo ) *__mptr = &pHTInfo;
@@ -121,7 +121,7 @@ void HTUpdateDefaultSetting(struct ieee80211_device* ieee)
* return: none
* notice: Driver should not print out this message by default.
* *****************************************************************************************************************/
-void HTDebugHTCapability(u8* CapIE, u8* TitleString )
+void HTDebugHTCapability(u8 *CapIE, u8 *TitleString )
{
static u8 EWC11NHTCap[] = {0x00, 0x90, 0x4c, 0x33}; // For 11n EWC definition, 2007.07.17, by Emily
@@ -158,7 +158,7 @@ void HTDebugHTCapability(u8* CapIE, u8* TitleString )
* return: none
* notice: Driver should not print out this message by default.
* *****************************************************************************************************************/
-void HTDebugHTInfo(u8* InfoIE, u8* TitleString)
+void HTDebugHTInfo(u8 *InfoIE, u8 *TitleString)
{
static u8 EWC11NHTInfo[] = {0x00, 0x90, 0x4c, 0x34}; // For 11n EWC definition, 2007.07.17, by Emily
@@ -177,7 +177,7 @@ void HTDebugHTInfo(u8* InfoIE, u8* TitleString)
IEEE80211_DEBUG(IEEE80211_DL_HT, "\tPrimary channel = %d\n", pHTInfoEle->ControlChl);
IEEE80211_DEBUG(IEEE80211_DL_HT, "\tSenondary channel =");
- switch(pHTInfoEle->ExtChlOffset)
+ switch (pHTInfoEle->ExtChlOffset)
{
case 0:
IEEE80211_DEBUG(IEEE80211_DL_HT, "Not Present\n");
@@ -195,7 +195,7 @@ void HTDebugHTInfo(u8* InfoIE, u8* TitleString)
IEEE80211_DEBUG(IEEE80211_DL_HT, "\tRecommended channel width = %s\n", (pHTInfoEle->RecommemdedTxWidth)?"20Mhz": "40Mhz");
IEEE80211_DEBUG(IEEE80211_DL_HT, "\tOperation mode for protection = ");
- switch(pHTInfoEle->OptMode)
+ switch (pHTInfoEle->OptMode)
{
case 0:
IEEE80211_DEBUG(IEEE80211_DL_HT, "No Protection\n");
@@ -219,7 +219,7 @@ void HTDebugHTInfo(u8* InfoIE, u8* TitleString)
/*
* Return: true if station in half n mode and AP supports 40 bw
*/
-bool IsHTHalfNmode40Bandwidth(struct ieee80211_device* ieee)
+bool IsHTHalfNmode40Bandwidth(struct ieee80211_device *ieee)
{
bool retValue = false;
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
@@ -238,7 +238,7 @@ bool IsHTHalfNmode40Bandwidth(struct ieee80211_device* ieee)
return retValue;
}
-bool IsHTHalfNmodeSGI(struct ieee80211_device* ieee, bool is40MHz)
+bool IsHTHalfNmodeSGI(struct ieee80211_device *ieee, bool is40MHz)
{
bool retValue = false;
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
@@ -265,7 +265,7 @@ bool IsHTHalfNmodeSGI(struct ieee80211_device* ieee, bool is40MHz)
return retValue;
}
-u16 HTHalfMcsToDataRate(struct ieee80211_device* ieee, u8 nMcsRate)
+u16 HTHalfMcsToDataRate(struct ieee80211_device *ieee, u8 nMcsRate)
{
u8 is40MHz;
@@ -278,7 +278,7 @@ u16 HTHalfMcsToDataRate(struct ieee80211_device* ieee, u8 nMcsRate)
}
-u16 HTMcsToDataRate( struct ieee80211_device* ieee, u8 nMcsRate)
+u16 HTMcsToDataRate( struct ieee80211_device *ieee, u8 nMcsRate)
{
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
@@ -297,7 +297,7 @@ u16 HTMcsToDataRate( struct ieee80211_device* ieee, u8 nMcsRate)
* return: tx rate
* notice: quite unsure about how to use this function //wb
* *****************************************************************************************************************/
-u16 TxCountToDataRate( struct ieee80211_device* ieee, u8 nDataRate)
+u16 TxCountToDataRate( struct ieee80211_device *ieee, u8 nDataRate)
{
//PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
u16 CCKOFDMRate[12] = {0x02 , 0x04 , 0x0b , 0x16 , 0x0c , 0x12 , 0x18 , 0x24 , 0x30 , 0x48 , 0x60 , 0x6c};
@@ -344,10 +344,10 @@ u16 TxCountToDataRate( struct ieee80211_device* ieee, u8 nDataRate)
-bool IsHTHalfNmodeAPs(struct ieee80211_device* ieee)
+bool IsHTHalfNmodeAPs(struct ieee80211_device *ieee)
{
bool retValue = false;
- struct ieee80211_network* net = &ieee->current_network;
+ struct ieee80211_network *net = &ieee->current_network;
if((memcmp(net->bssid, BELKINF5D8233V1_RALINK, 3)==0) ||
(memcmp(net->bssid, BELKINF5D82334V3_RALINK, 3)==0) ||
(memcmp(net->bssid, PCI_RALINK, 3)==0) ||
@@ -376,10 +376,10 @@ bool IsHTHalfNmodeAPs(struct ieee80211_device* ieee)
* return:
* notice:
* *****************************************************************************************************************/
-void HTIOTPeerDetermine(struct ieee80211_device* ieee)
+void HTIOTPeerDetermine(struct ieee80211_device *ieee)
{
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
- struct ieee80211_network* net = &ieee->current_network;
+ struct ieee80211_network *net = &ieee->current_network;
if(net->bssht.bdRT2RTAggregation)
pHTInfo->IOTPeer = HT_IOT_PEER_REALTEK;
else if(net->broadcom_cap_exist)
@@ -413,7 +413,7 @@ void HTIOTPeerDetermine(struct ieee80211_device* ieee)
* output: none
* return: return 1 if driver should declare MCS13 only(otherwise return 0)
* *****************************************************************************************************************/
-u8 HTIOTActIsDisableMCS14(struct ieee80211_device* ieee, u8* PeerMacAddr)
+u8 HTIOTActIsDisableMCS14(struct ieee80211_device *ieee, u8 *PeerMacAddr)
{
u8 ret = 0;
return ret;
@@ -432,7 +432,7 @@ u8 HTIOTActIsDisableMCS14(struct ieee80211_device* ieee, u8* PeerMacAddr)
* Return: true if driver should disable MCS15
* 2008.04.15 Emily
*/
-bool HTIOTActIsDisableMCS15(struct ieee80211_device* ieee)
+bool HTIOTActIsDisableMCS15(struct ieee80211_device *ieee)
{
bool retValue = false;
@@ -469,7 +469,7 @@ bool HTIOTActIsDisableMCS15(struct ieee80211_device* ieee)
* Return: true if driver should disable all two spatial stream packet
* 2008.04.21 Emily
*/
-bool HTIOTActIsDisableMCSTwoSpatialStream(struct ieee80211_device* ieee, u8 *PeerMacAddr)
+bool HTIOTActIsDisableMCSTwoSpatialStream(struct ieee80211_device *ieee, u8 *PeerMacAddr)
{
bool retValue = false;
@@ -486,7 +486,7 @@ bool HTIOTActIsDisableMCSTwoSpatialStream(struct ieee80211_device* ieee, u8 *Pee
* output: none
* return: return 1 if driver should disable EDCA turbo mode(otherwise return 0)
* *****************************************************************************************************************/
-u8 HTIOTActIsDisableEDCATurbo(struct ieee80211_device* ieee, u8* PeerMacAddr)
+u8 HTIOTActIsDisableEDCATurbo(struct ieee80211_device *ieee, u8 *PeerMacAddr)
{
u8 retValue = false; // default enable EDCA Turbo mode.
// Set specific EDCA parameter for different AP in DM handler.
@@ -515,7 +515,7 @@ u8 HTIOTActIsMgntUseCCK6M(struct ieee80211_network *network)
return retValue;
}
-u8 HTIOTActIsCCDFsync(u8* PeerMacAddr)
+u8 HTIOTActIsCCDFsync(u8 *PeerMacAddr)
{
u8 retValue = 0;
if( (memcmp(PeerMacAddr, UNKNOWN_BORADCOM, 3)==0) ||
@@ -547,7 +547,7 @@ void HTResetIOTSetting(
* return: none
* notice: posHTCap can't be null and should be initialized before.
* *****************************************************************************************************************/
-void HTConstructCapabilityElement(struct ieee80211_device* ieee, u8* posHTCap, u8* len, u8 IsEncrypt)
+void HTConstructCapabilityElement(struct ieee80211_device *ieee, u8 *posHTCap, u8 *len, u8 IsEncrypt)
{
PRT_HIGH_THROUGHPUT pHT = ieee->pHTInfo;
PHT_CAPABILITY_ELE pCapELE = NULL;
@@ -666,7 +666,7 @@ void HTConstructCapabilityElement(struct ieee80211_device* ieee, u8* posHTCap, u
* return: none
* notice: posHTCap can't be null and be initialized before. only AP and IBSS sta should do this
* *****************************************************************************************************************/
-void HTConstructInfoElement(struct ieee80211_device* ieee, u8* posHTInfo, u8* len, u8 IsEncrypt)
+void HTConstructInfoElement(struct ieee80211_device *ieee, u8 *posHTInfo, u8 *len, u8 IsEncrypt)
{
PRT_HIGH_THROUGHPUT pHT = ieee->pHTInfo;
PHT_INFORMATION_ELE pHTInfoEle = (PHT_INFORMATION_ELE)posHTInfo;
@@ -738,7 +738,7 @@ void HTConstructInfoElement(struct ieee80211_device* ieee, u8* posHTInfo, u8* le
* return: none
* notice:
* *****************************************************************************************************************/
-void HTConstructRT2RTAggElement(struct ieee80211_device* ieee, u8* posRT2RTAgg, u8* len)
+void HTConstructRT2RTAggElement(struct ieee80211_device *ieee, u8 *posRT2RTAgg, u8 *len)
{
if (posRT2RTAgg == NULL) {
IEEE80211_DEBUG(IEEE80211_DL_ERR, "posRT2RTAgg can't be null in HTConstructRT2RTAggElement()\n");
@@ -792,7 +792,7 @@ void HTConstructRT2RTAggElement(struct ieee80211_device* ieee, u8* posRT2RTAgg,
* return: always we return true
* notice:
* *****************************************************************************************************************/
-u8 HT_PickMCSRate(struct ieee80211_device* ieee, u8* pOperateMCS)
+u8 HT_PickMCSRate(struct ieee80211_device *ieee, u8 *pOperateMCS)
{
u8 i;
if (pOperateMCS == NULL)
@@ -801,7 +801,7 @@ u8 HT_PickMCSRate(struct ieee80211_device* ieee, u8* pOperateMCS)
return false;
}
- switch(ieee->mode)
+ switch (ieee->mode)
{
case IEEE_A:
case IEEE_B:
@@ -855,7 +855,7 @@ u8 HT_PickMCSRate(struct ieee80211_device* ieee, u8* pOperateMCS)
* return: Highest MCS rate included in pMCSRateSet and filtered by pMCSFilter
* notice:
* *****************************************************************************************************************/
-u8 HTGetHighestMCSRate(struct ieee80211_device* ieee, u8* pMCSRateSet, u8* pMCSFilter)
+u8 HTGetHighestMCSRate(struct ieee80211_device *ieee, u8 *pMCSRateSet, u8 *pMCSFilter)
{
u8 i, j;
u8 bitMap;
@@ -907,7 +907,7 @@ u8 HTGetHighestMCSRate(struct ieee80211_device* ieee, u8* pMCSRateSet, u8* pMCSF
**
** \pHTSupportedCap: the connected STA's supported rate Capability element
*/
-u8 HTFilterMCSRate( struct ieee80211_device* ieee, u8* pSupportMCS, u8* pOperateMCS)
+u8 HTFilterMCSRate( struct ieee80211_device *ieee, u8 *pSupportMCS, u8 *pOperateMCS)
{
u8 i=0;
@@ -937,14 +937,14 @@ u8 HTFilterMCSRate( struct ieee80211_device* ieee, u8* pSupportMCS, u8* pOperate
return true;
}
-void HTSetConnectBwMode(struct ieee80211_device* ieee, HT_CHANNEL_WIDTH Bandwidth, HT_EXTCHNL_OFFSET Offset);
+void HTSetConnectBwMode(struct ieee80211_device *ieee, HT_CHANNEL_WIDTH Bandwidth, HT_EXTCHNL_OFFSET Offset);
void HTOnAssocRsp(struct ieee80211_device *ieee)
{
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
PHT_CAPABILITY_ELE pPeerHTCap = NULL;
PHT_INFORMATION_ELE pPeerHTInfo = NULL;
u16 nMaxAMSDUSize = 0;
- u8* pMcsFilter = NULL;
+ u8 *pMcsFilter = NULL;
static u8 EWC11NHTCap[] = {0x00, 0x90, 0x4c, 0x33}; // For 11n EWC definition, 2007.07.17, by Emily
static u8 EWC11NHTInfo[] = {0x00, 0x90, 0x4c, 0x34}; // For 11n EWC definition, 2007.07.17, by Emily
@@ -1115,7 +1115,7 @@ void HTOnAssocRsp(struct ieee80211_device *ieee)
}
-void HTSetConnectBwModeCallback(struct ieee80211_device* ieee);
+void HTSetConnectBwModeCallback(struct ieee80211_device *ieee);
/********************************************************************************************************************
*function: initialize HT info(struct PRT_HIGH_THROUGHPUT)
* input: struct ieee80211_device* ieee
@@ -1124,7 +1124,7 @@ void HTSetConnectBwModeCallback(struct ieee80211_device* ieee);
* notice: This function is called when * (1) MPInitialization Phase * (2) Receiving of Deauthentication from AP
********************************************************************************************************************/
// TODO: Should this funciton be called when receiving of Disassociation?
-void HTInitializeHTInfo(struct ieee80211_device* ieee)
+void HTInitializeHTInfo(struct ieee80211_device *ieee)
{
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
@@ -1160,10 +1160,10 @@ void HTInitializeHTInfo(struct ieee80211_device* ieee)
// Initialize all of the parameters related to 11n
- memset((void*)(&(pHTInfo->SelfHTCap)), 0, sizeof(pHTInfo->SelfHTCap));
- memset((void*)(&(pHTInfo->SelfHTInfo)), 0, sizeof(pHTInfo->SelfHTInfo));
- memset((void*)(&(pHTInfo->PeerHTCapBuf)), 0, sizeof(pHTInfo->PeerHTCapBuf));
- memset((void*)(&(pHTInfo->PeerHTInfoBuf)), 0, sizeof(pHTInfo->PeerHTInfoBuf));
+ memset((void *)(&(pHTInfo->SelfHTCap)), 0, sizeof(pHTInfo->SelfHTCap));
+ memset((void *)(&(pHTInfo->SelfHTInfo)), 0, sizeof(pHTInfo->SelfHTInfo));
+ memset((void *)(&(pHTInfo->PeerHTCapBuf)), 0, sizeof(pHTInfo->PeerHTCapBuf));
+ memset((void *)(&(pHTInfo->PeerHTInfoBuf)), 0, sizeof(pHTInfo->PeerHTInfoBuf));
pHTInfo->bSwBwInProgress = false;
pHTInfo->ChnlOp = CHNLOP_NONE;
@@ -1179,7 +1179,7 @@ void HTInitializeHTInfo(struct ieee80211_device* ieee)
//MCS rate initialized here
{
- u8* RegHTSuppRateSets = &(ieee->RegHTSuppRateSet[0]);
+ u8 *RegHTSuppRateSets = &(ieee->RegHTSuppRateSet[0]);
RegHTSuppRateSets[0] = 0xFF; //support MCS 0~7
RegHTSuppRateSets[1] = 0xFF; //support MCS 8~15
RegHTSuppRateSets[4] = 0x01; //support MCS 32
@@ -1214,7 +1214,7 @@ void HTInitializeBssDesc(PBSS_HT pBssHT)
* return: none
* notice: This function should ONLY be called before association
********************************************************************************************************************/
-void HTResetSelfAndSavePeerSetting(struct ieee80211_device* ieee, struct ieee80211_network * pNetwork)
+void HTResetSelfAndSavePeerSetting(struct ieee80211_device *ieee, struct ieee80211_network *pNetwork)
{
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
// u16 nMaxAMSDUSize;
@@ -1297,7 +1297,7 @@ void HTResetSelfAndSavePeerSetting(struct ieee80211_device* ieee, struct ieee802
}
-void HTUpdateSelfAndPeerSetting(struct ieee80211_device* ieee, struct ieee80211_network * pNetwork)
+void HTUpdateSelfAndPeerSetting(struct ieee80211_device *ieee, struct ieee80211_network *pNetwork)
{
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
// PHT_CAPABILITY_ELE pPeerHTCap = (PHT_CAPABILITY_ELE)pNetwork->bssht.bdHTCapBuf;
@@ -1317,7 +1317,7 @@ void HTUpdateSelfAndPeerSetting(struct ieee80211_device* ieee, struct ieee80211_
}
}
-void HTUseDefaultSetting(struct ieee80211_device* ieee)
+void HTUseDefaultSetting(struct ieee80211_device *ieee)
{
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
// u8 regBwOpMode;
@@ -1370,7 +1370,7 @@ void HTUseDefaultSetting(struct ieee80211_device* ieee)
* return: return true if HT control field exists(false otherwise)
* notice:
********************************************************************************************************************/
-u8 HTCCheck(struct ieee80211_device* ieee, u8* pFrame)
+u8 HTCCheck(struct ieee80211_device *ieee, u8 *pFrame)
{
if(ieee->pHTInfo->bCurrentHTSupport)
{
@@ -1386,7 +1386,7 @@ u8 HTCCheck(struct ieee80211_device* ieee, u8* pFrame)
//
// This function set bandwidth mode in protocol layer.
//
-void HTSetConnectBwMode(struct ieee80211_device* ieee, HT_CHANNEL_WIDTH Bandwidth, HT_EXTCHNL_OFFSET Offset)
+void HTSetConnectBwMode(struct ieee80211_device *ieee, HT_CHANNEL_WIDTH Bandwidth, HT_EXTCHNL_OFFSET Offset)
{
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
// u32 flags = 0;
@@ -1435,7 +1435,7 @@ void HTSetConnectBwMode(struct ieee80211_device* ieee, HT_CHANNEL_WIDTH Bandwidt
// spin_unlock_irqrestore(&(ieee->bw_spinlock), flags);
}
-void HTSetConnectBwModeCallback(struct ieee80211_device* ieee)
+void HTSetConnectBwModeCallback(struct ieee80211_device *ieee)
{
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
diff --git a/drivers/staging/rtl8192u/ieee80211/rtl819x_Qos.h b/drivers/staging/rtl8192u/ieee80211/rtl819x_Qos.h
index 2348ccd70be0..f2d52ca08cd0 100644
--- a/drivers/staging/rtl8192u/ieee80211/rtl819x_Qos.h
+++ b/drivers/staging/rtl8192u/ieee80211/rtl819x_Qos.h
@@ -483,7 +483,7 @@ typedef struct _OCTET_STRING{
typedef struct _STA_QOS{
//DECLARE_RT_OBJECT(STA_QOS);
u8 WMMIEBuf[MAX_WMMELE_LENGTH];
- u8* WMMIE;
+ u8 *WMMIE;
// Part 1. Self QoS Mode.
QOS_MODE QosCapability; //QoS Capability, 2006-06-14 Isaiah
@@ -498,7 +498,7 @@ typedef struct _STA_QOS{
int NumBcnBeforeTrigger;
// Part 2. EDCA Parameter (perAC)
- u8 * pWMMInfoEle;
+ u8 *pWMMInfoEle;
u8 WMMParamEle[WMM_PARAM_ELEMENT_SIZE];
u8 WMMPELength;
@@ -537,12 +537,12 @@ typedef struct _BSS_QOS{
QOS_MODE bdQoSMode;
u8 bdWMMIEBuf[MAX_WMMELE_LENGTH];
- u8* bdWMMIE;
+ u8 *bdWMMIE;
QOS_ELE_SUBTYPE EleSubType;
- u8 * pWMMInfoEle;
- u8 * pWMMParamEle;
+ u8 *pWMMInfoEle;
+ u8 *pWMMParamEle;
QOS_INFO_FIELD QosInfoField;
AC_PARAM AcParameter[4];
diff --git a/drivers/staging/rtl8192u/ieee80211/rtl819x_TSProc.c b/drivers/staging/rtl8192u/ieee80211/rtl819x_TSProc.c
index 0310d07287a1..3058120a3243 100644
--- a/drivers/staging/rtl8192u/ieee80211/rtl819x_TSProc.c
+++ b/drivers/staging/rtl8192u/ieee80211/rtl819x_TSProc.c
@@ -234,12 +234,12 @@ void AdmitTS(struct ieee80211_device *ieee, PTS_COMMON_INFO pTsCommonInfo, u32 I
}
-PTS_COMMON_INFO SearchAdmitTRStream(struct ieee80211_device *ieee, u8* Addr, u8 TID, TR_SELECT TxRxSelect)
+PTS_COMMON_INFO SearchAdmitTRStream(struct ieee80211_device *ieee, u8 *Addr, u8 TID, TR_SELECT TxRxSelect)
{
//DIRECTION_VALUE dir;
u8 dir;
bool search_dir[4] = {0, 0, 0, 0};
- struct list_head* psearch_list; //FIXME
+ struct list_head *psearch_list; //FIXME
PTS_COMMON_INFO pRet = NULL;
if(ieee->iw_mode == IW_MODE_MASTER) //ap mode
{
@@ -311,7 +311,7 @@ PTS_COMMON_INFO SearchAdmitTRStream(struct ieee80211_device *ieee, u8* Addr, u8
void MakeTSEntry(
PTS_COMMON_INFO pTsCommonInfo,
- u8* Addr,
+ u8 *Addr,
PTSPEC_BODY pTSPEC,
PQOS_TCLAS pTCLAS,
u8 TCLAS_Num,
@@ -326,10 +326,10 @@ void MakeTSEntry(
memcpy(pTsCommonInfo->Addr, Addr, 6);
if(pTSPEC != NULL)
- memcpy((u8*)(&(pTsCommonInfo->TSpec)), (u8*)pTSPEC, sizeof(TSPEC_BODY));
+ memcpy((u8 *)(&(pTsCommonInfo->TSpec)), (u8 *)pTSPEC, sizeof(TSPEC_BODY));
for(count = 0; count < TCLAS_Num; count++)
- memcpy((u8*)(&(pTsCommonInfo->TClass[count])), (u8*)pTCLAS, sizeof(QOS_TCLAS));
+ memcpy((u8 *)(&(pTsCommonInfo->TClass[count])), (u8 *)pTCLAS, sizeof(QOS_TCLAS));
pTsCommonInfo->TClasProc = TCLAS_Proc;
pTsCommonInfo->TClasNum = TCLAS_Num;
@@ -337,9 +337,9 @@ void MakeTSEntry(
bool GetTs(
- struct ieee80211_device* ieee,
+ struct ieee80211_device *ieee,
PTS_COMMON_INFO *ppTS,
- u8* Addr,
+ u8 *Addr,
u8 TID,
TR_SELECT TxRxSelect, //Rx:1, Tx:0
bool bAddNewTs
@@ -367,7 +367,7 @@ bool GetTs(
return false;
}
- switch(TID)
+ switch (TID)
{
case 0:
case 3:
@@ -416,12 +416,12 @@ bool GetTs(
//
TSPEC_BODY TSpec;
PQOS_TSINFO pTSInfo = &TSpec.f.TSInfo;
- struct list_head* pUnusedList =
+ struct list_head *pUnusedList =
(TxRxSelect == TX_DIR)?
(&ieee->Tx_TS_Unused_List):
(&ieee->Rx_TS_Unused_List);
- struct list_head* pAddmitList =
+ struct list_head *pAddmitList =
(TxRxSelect == TX_DIR)?
(&ieee->Tx_TS_Admit_List):
(&ieee->Rx_TS_Admit_List);
@@ -473,7 +473,7 @@ bool GetTs(
}
void RemoveTsEntry(
- struct ieee80211_device* ieee,
+ struct ieee80211_device *ieee,
PTS_COMMON_INFO pTs,
TR_SELECT TxRxSelect
)
@@ -501,7 +501,7 @@ void RemoveTsEntry(
list_del_init(&pRxReorderEntry->List);
{
int i = 0;
- struct ieee80211_rxb * prxb = pRxReorderEntry->prxb;
+ struct ieee80211_rxb *prxb = pRxReorderEntry->prxb;
if (unlikely(!prxb))
{
spin_unlock_irqrestore(&(ieee->reorder_spinlock), flags);
@@ -527,7 +527,7 @@ void RemoveTsEntry(
}
}
-void RemovePeerTS(struct ieee80211_device* ieee, u8* Addr)
+void RemovePeerTS(struct ieee80211_device *ieee, u8 *Addr)
{
PTS_COMMON_INFO pTS, pTmpTS;
@@ -574,7 +574,7 @@ void RemovePeerTS(struct ieee80211_device* ieee, u8* Addr)
}
}
-void RemoveAllTS(struct ieee80211_device* ieee)
+void RemoveAllTS(struct ieee80211_device *ieee)
{
PTS_COMMON_INFO pTS, pTmpTS;
@@ -607,7 +607,7 @@ void RemoveAllTS(struct ieee80211_device* ieee)
}
}
-void TsStartAddBaProcess(struct ieee80211_device* ieee, PTX_TS_RECORD pTxTS)
+void TsStartAddBaProcess(struct ieee80211_device *ieee, PTX_TS_RECORD pTxTS)
{
if(pTxTS->bAddBaReqInProgress == false)
{
diff --git a/drivers/staging/rtl8192u/r8180_93cx6.c b/drivers/staging/rtl8192u/r8180_93cx6.c
index 7e49ad8f48f6..d2199986d132 100644
--- a/drivers/staging/rtl8192u/r8180_93cx6.c
+++ b/drivers/staging/rtl8192u/r8180_93cx6.c
@@ -22,13 +22,15 @@
void eprom_cs(struct net_device *dev, short bit)
{
- if(bit)
- write_nic_byte_E(dev, EPROM_CMD,
- (1<<EPROM_CS_SHIFT) | \
- read_nic_byte_E(dev, EPROM_CMD)); //enable EPROM
+ u8 cmdreg;
+
+ read_nic_byte_E(dev, EPROM_CMD, &cmdreg);
+ if (bit)
+ /* enable EPROM */
+ write_nic_byte_E(dev, EPROM_CMD, cmdreg | EPROM_CS_BIT);
else
- write_nic_byte_E(dev, EPROM_CMD, read_nic_byte_E(dev, EPROM_CMD)\
- &~(1<<EPROM_CS_SHIFT)); //disable EPROM
+ /* disable EPROM */
+ write_nic_byte_E(dev, EPROM_CMD, cmdreg & ~EPROM_CS_BIT);
force_pci_posting(dev);
udelay(EPROM_DELAY);
@@ -37,12 +39,15 @@ void eprom_cs(struct net_device *dev, short bit)
void eprom_ck_cycle(struct net_device *dev)
{
- write_nic_byte_E(dev, EPROM_CMD,
- (1<<EPROM_CK_SHIFT) | read_nic_byte_E(dev,EPROM_CMD));
+ u8 cmdreg;
+
+ read_nic_byte_E(dev, EPROM_CMD, &cmdreg);
+ write_nic_byte_E(dev, EPROM_CMD, cmdreg | EPROM_CK_BIT);
force_pci_posting(dev);
udelay(EPROM_DELAY);
- write_nic_byte_E(dev, EPROM_CMD,
- read_nic_byte_E(dev, EPROM_CMD) &~ (1<<EPROM_CK_SHIFT));
+
+ read_nic_byte_E(dev, EPROM_CMD, &cmdreg);
+ write_nic_byte_E(dev, EPROM_CMD, cmdreg & ~EPROM_CK_BIT);
force_pci_posting(dev);
udelay(EPROM_DELAY);
}
@@ -50,12 +55,13 @@ void eprom_ck_cycle(struct net_device *dev)
void eprom_w(struct net_device *dev,short bit)
{
- if(bit)
- write_nic_byte_E(dev, EPROM_CMD, (1<<EPROM_W_SHIFT) | \
- read_nic_byte_E(dev,EPROM_CMD));
+ u8 cmdreg;
+
+ read_nic_byte_E(dev, EPROM_CMD, &cmdreg);
+ if (bit)
+ write_nic_byte_E(dev, EPROM_CMD, cmdreg | EPROM_W_BIT);
else
- write_nic_byte_E(dev, EPROM_CMD, read_nic_byte_E(dev,EPROM_CMD)\
- &~(1<<EPROM_W_SHIFT));
+ write_nic_byte_E(dev, EPROM_CMD, cmdreg & ~EPROM_W_BIT);
force_pci_posting(dev);
udelay(EPROM_DELAY);
@@ -64,12 +70,14 @@ void eprom_w(struct net_device *dev,short bit)
short eprom_r(struct net_device *dev)
{
- short bit;
+ u8 bit;
- bit=(read_nic_byte_E(dev, EPROM_CMD) & (1<<EPROM_R_SHIFT) );
+ read_nic_byte_E(dev, EPROM_CMD, &bit);
udelay(EPROM_DELAY);
- if(bit) return 1;
+ if (bit & EPROM_R_BIT)
+ return 1;
+
return 0;
}
diff --git a/drivers/staging/rtl8192u/r8190_rtl8256.c b/drivers/staging/rtl8192u/r8190_rtl8256.c
index cf9713fa8b9d..40b14a2d1cdb 100644
--- a/drivers/staging/rtl8192u/r8190_rtl8256.c
+++ b/drivers/staging/rtl8192u/r8190_rtl8256.c
@@ -23,7 +23,7 @@
* Return: NONE
* Note: 8226 support both 20M and 40 MHz
*---------------------------------------------------------------------------*/
-void PHY_SetRF8256Bandwidth(struct net_device* dev , HT_CHANNEL_WIDTH Bandwidth) //20M or 40M
+void PHY_SetRF8256Bandwidth(struct net_device *dev , HT_CHANNEL_WIDTH Bandwidth) //20M or 40M
{
u8 eRFPath;
struct r8192_priv *priv = ieee80211_priv(dev);
@@ -34,7 +34,7 @@ void PHY_SetRF8256Bandwidth(struct net_device* dev , HT_CHANNEL_WIDTH Bandwidth)
if (!rtl8192_phy_CheckIsLegalRFPath(dev, eRFPath))
continue;
- switch(Bandwidth)
+ switch (Bandwidth)
{
case HT_CHANNEL_WIDTH_20:
if(priv->card_8192_version == VERSION_819xU_A || priv->card_8192_version == VERSION_819xU_B)// 8256 D-cut, E-cut, xiong: consider it later!
@@ -73,7 +73,7 @@ void PHY_SetRF8256Bandwidth(struct net_device* dev , HT_CHANNEL_WIDTH Bandwidth)
break;
default:
- RT_TRACE(COMP_ERR, "PHY_SetRF8256Bandwidth(): unknown Bandwidth: %#X\n",Bandwidth );
+ RT_TRACE(COMP_ERR, "PHY_SetRF8256Bandwidth(): unknown Bandwidth: %#X\n",Bandwidth);
break;
}
@@ -86,7 +86,7 @@ void PHY_SetRF8256Bandwidth(struct net_device* dev , HT_CHANNEL_WIDTH Bandwidth)
* Output: NONE
* Return: NONE
*---------------------------------------------------------------------------*/
-void PHY_RF8256_Config(struct net_device* dev)
+void PHY_RF8256_Config(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
// Initialize general global value
@@ -104,7 +104,7 @@ void PHY_RF8256_Config(struct net_device* dev)
* Output: NONE
* Return: NONE
*---------------------------------------------------------------------------*/
-void phy_RF8256_Config_ParaFile(struct net_device* dev)
+void phy_RF8256_Config_ParaFile(struct net_device *dev)
{
u32 u4RegValue = 0;
//static s1Byte szRadioAFile[] = RTL819X_PHY_RADIO_A;
@@ -133,7 +133,7 @@ void phy_RF8256_Config_ParaFile(struct net_device* dev)
// pHalData->RfReg0Value[eRFPath] = rtl8192_phy_QueryRFReg(dev, (RF90_RADIO_PATH_E)eRFPath, rGlobalCtrl, bMaskDWord);
/*----Store original RFENV control type----*/
- switch(eRFPath)
+ switch (eRFPath)
{
case RF90_PATH_A:
case RF90_PATH_C:
@@ -168,7 +168,7 @@ void phy_RF8256_Config_ParaFile(struct net_device* dev)
RetryTimes = ConstRetryTimes;
RF3_Final_Value = 0;
/*----Initialize RF fom connfiguration file----*/
- switch(eRFPath)
+ switch (eRFPath)
{
case RF90_PATH_A:
while(RF3_Final_Value!=RegValueToBeCheck && RetryTimes!=0)
@@ -209,7 +209,7 @@ void phy_RF8256_Config_ParaFile(struct net_device* dev)
}
/*----Restore RFENV control type----*/;
- switch(eRFPath)
+ switch (eRFPath)
{
case RF90_PATH_A:
case RF90_PATH_C:
@@ -237,14 +237,14 @@ phy_RF8256_Config_ParaFile_Fail:
}
-void PHY_SetRF8256CCKTxPower(struct net_device* dev, u8 powerlevel)
+void PHY_SetRF8256CCKTxPower(struct net_device *dev, u8 powerlevel)
{
u32 TxAGC=0;
struct r8192_priv *priv = ieee80211_priv(dev);
//modified by vivi, 20080109
TxAGC = powerlevel;
- if(priv->bDynamicTxLowPower == TRUE ) //cosa 05/22/2008 for scan
+ if(priv->bDynamicTxLowPower == TRUE) //cosa 05/22/2008 for scan
{
if(priv->CustomerID == RT_CID_819x_Netcore)
TxAGC = 0x22;
@@ -258,7 +258,7 @@ void PHY_SetRF8256CCKTxPower(struct net_device* dev, u8 powerlevel)
}
-void PHY_SetRF8256OFDMTxPower(struct net_device* dev, u8 powerlevel)
+void PHY_SetRF8256OFDMTxPower(struct net_device *dev, u8 powerlevel)
{
struct r8192_priv *priv = ieee80211_priv(dev);
//Joseph TxPower for 8192 testing
diff --git a/drivers/staging/rtl8192u/r8190_rtl8256.h b/drivers/staging/rtl8192u/r8190_rtl8256.h
index 5c1f650fe824..b64dd662761a 100644
--- a/drivers/staging/rtl8192u/r8190_rtl8256.h
+++ b/drivers/staging/rtl8192u/r8190_rtl8256.h
@@ -18,10 +18,10 @@
#else
#define RTL819X_TOTAL_RF_PATH 2 //for 8192U
#endif
-extern void PHY_SetRF8256Bandwidth(struct net_device* dev , HT_CHANNEL_WIDTH Bandwidth);
-extern void PHY_RF8256_Config(struct net_device* dev);
-extern void phy_RF8256_Config_ParaFile(struct net_device* dev);
-extern void PHY_SetRF8256CCKTxPower(struct net_device* dev, u8 powerlevel);
-extern void PHY_SetRF8256OFDMTxPower(struct net_device* dev, u8 powerlevel);
+extern void PHY_SetRF8256Bandwidth(struct net_device *dev , HT_CHANNEL_WIDTH Bandwidth);
+extern void PHY_RF8256_Config(struct net_device *dev);
+extern void phy_RF8256_Config_ParaFile(struct net_device *dev);
+extern void PHY_SetRF8256CCKTxPower(struct net_device *dev, u8 powerlevel);
+extern void PHY_SetRF8256OFDMTxPower(struct net_device *dev, u8 powerlevel);
#endif
diff --git a/drivers/staging/rtl8192u/r8192U.h b/drivers/staging/rtl8192u/r8192U.h
index bedeb330ad4f..338e7bc237c3 100644
--- a/drivers/staging/rtl8192u/r8192U.h
+++ b/drivers/staging/rtl8192u/r8192U.h
@@ -1,40 +1,38 @@
/*
- This is part of rtl8187 OpenSource driver.
- Copyright (C) Andrea Merello 2004-2005 <andreamrl@tiscali.it>
- Released under the terms of GPL (General Public Licence)
-
- Parts of this driver are based on the GPL part of the
- official realtek driver
-
- Parts of this driver are based on the rtl8192 driver skeleton
- from Patric Schenke & Andres Salomon
-
- Parts of this driver are based on the Intel Pro Wireless 2100 GPL driver
-
- We want to thank the Authors of those projects and the Ndiswrapper
- project Authors.
-*/
+ * This is part of rtl8187 OpenSource driver.
+ * Copyright (C) Andrea Merello 2004-2005 <andreamrl@tiscali.it>
+ * Released under the terms of GPL (General Public Licence)
+ *
+ * Parts of this driver are based on the GPL part of the
+ * official realtek driver
+ *
+ * Parts of this driver are based on the rtl8192 driver skeleton
+ * from Patric Schenke & Andres Salomon
+ *
+ * Parts of this driver are based on the Intel Pro Wireless 2100 GPL driver
+ *
+ * We want to thank the Authors of those projects and the Ndiswrapper
+ * project Authors.
+ */
#ifndef R819xU_H
#define R819xU_H
#include <linux/module.h>
#include <linux/kernel.h>
-//#include <linux/config.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
-//#include <linux/pci.h>
#include <linux/usb.h>
#include <linux/etherdevice.h>
#include <linux/delay.h>
-#include <linux/rtnetlink.h> //for rtnl_lock()
+#include <linux/rtnetlink.h>
#include <linux/wireless.h>
#include <linux/timer.h>
-#include <linux/proc_fs.h> // Necessary because we use the proc fs
+#include <linux/proc_fs.h>
#include <linux/if_arp.h>
#include <linux/random.h>
#include <asm/io.h>
@@ -42,7 +40,7 @@
#define RTL8192U
#define RTL819xU_MODULE_NAME "rtl819xU"
-//added for HW security, john.0629
+/* HW security */
#define FALSE 0
#define TRUE 1
#define MAX_KEY_LEN 61
@@ -81,90 +79,91 @@
#define BIT30 0x40000000
#define BIT31 0x80000000
-// Rx smooth factor
#define Rx_Smooth_Factor 20
-#define DMESG(x,a...)
-#define DMESGW(x,a...)
-#define DMESGE(x,a...)
+#define DMESG(x, a...)
+#define DMESGW(x, a...)
+#define DMESGE(x, a...)
extern u32 rt_global_debug_component;
#define RT_TRACE(component, x, args...) \
-do { if(rt_global_debug_component & component) \
- printk(KERN_DEBUG RTL819xU_MODULE_NAME ":" x "\n" , \
- ##args);\
-}while(0);
-
-#define COMP_TRACE BIT0 // For function call tracing.
-#define COMP_DBG BIT1 // Only for temporary debug message.
-#define COMP_INIT BIT2 // during driver initialization / halt / reset.
-
-
-#define COMP_RECV BIT3 // Receive data path.
-#define COMP_SEND BIT4 // Send part path.
-#define COMP_IO BIT5 // I/O Related. Added by Annie, 2006-03-02.
-#define COMP_POWER BIT6 // 802.11 Power Save mode or System/Device Power state related.
-#define COMP_EPROM BIT7 // 802.11 link related: join/start BSS, leave BSS.
-#define COMP_SWBW BIT8 // For bandwidth switch.
-#define COMP_POWER_TRACKING BIT9 //FOR 8190 TX POWER TRACKING
-#define COMP_TURBO BIT10 // For Turbo Mode related. By Annie, 2005-10-21.
-#define COMP_QOS BIT11 // For QoS.
-#define COMP_RATE BIT12 // For Rate Adaptive mechanism, 2006.07.02, by rcnjko.
-#define COMP_RM BIT13 // For Radio Measurement.
-#define COMP_DIG BIT14 // For DIG, 2006.09.25, by rcnjko.
-#define COMP_PHY BIT15
-#define COMP_CH BIT16 //channel setting debug
-#define COMP_TXAGC BIT17 // For Tx power, 060928, by rcnjko.
-#define COMP_HIPWR BIT18 // For High Power Mechanism, 060928, by rcnjko.
-#define COMP_HALDM BIT19 // For HW Dynamic Mechanism, 061010, by rcnjko.
-#define COMP_SEC BIT20 // Event handling
-#define COMP_LED BIT21 // For LED.
-#define COMP_RF BIT22 // For RF.
-//1!!!!!!!!!!!!!!!!!!!!!!!!!!!
-#define COMP_RXDESC BIT23 // Show Rx desc information for SD3 debug. Added by Annie, 2006-07-15.
-//1//1Attention Please!!!<11n or 8190 specific code should be put below this line>
-//1!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-#define COMP_FIRMWARE BIT24 //for firmware downloading
-#define COMP_HT BIT25 // For 802.11n HT related information. by Emily 2006-8-11
-#define COMP_AMSDU BIT26 // For A-MSDU Debugging
-
-#define COMP_SCAN BIT27
-//#define COMP_RESET BIT28
-#define COMP_DOWN BIT29 //for rm driver module
-#define COMP_RESET BIT30 //for silent reset
-#define COMP_ERR BIT31 //for error out, always on
+ do { \
+ if (rt_global_debug_component & component) \
+ pr_debug("RTL8192U: " x "\n", ##args); \
+ } while (0)
+
+#define COMP_TRACE BIT0 /* Function call tracing. */
+#define COMP_DBG BIT1
+#define COMP_INIT BIT2 /* Driver initialization/halt/reset. */
+
+
+#define COMP_RECV BIT3 /* Receive data path. */
+#define COMP_SEND BIT4 /* Send data path. */
+#define COMP_IO BIT5
+/* 802.11 Power Save mode or System/Device Power state. */
+#define COMP_POWER BIT6
+/* 802.11 link related: join/start BSS, leave BSS. */
+#define COMP_EPROM BIT7
+#define COMP_SWBW BIT8 /* Bandwidth switch. */
+#define COMP_POWER_TRACKING BIT9 /* 8190 TX Power Tracking */
+#define COMP_TURBO BIT10 /* Turbo Mode */
+#define COMP_QOS BIT11
+#define COMP_RATE BIT12 /* Rate Adaptive mechanism */
+#define COMP_RM BIT13 /* Radio Measurement */
+#define COMP_DIG BIT14
+#define COMP_PHY BIT15
+#define COMP_CH BIT16 /* Channel setting debug */
+#define COMP_TXAGC BIT17 /* Tx power */
+#define COMP_HIPWR BIT18 /* High Power Mechanism */
+#define COMP_HALDM BIT19 /* HW Dynamic Mechanism */
+#define COMP_SEC BIT20 /* Event handling */
+#define COMP_LED BIT21
+#define COMP_RF BIT22
+#define COMP_RXDESC BIT23 /* Rx desc information for SD3 debug */
+
+/* 11n or 8190 specific code */
+
+#define COMP_FIRMWARE BIT24 /* Firmware downloading */
+#define COMP_HT BIT25 /* 802.11n HT related information */
+#define COMP_AMSDU BIT26 /* A-MSDU Debugging */
+#define COMP_SCAN BIT27
+#define COMP_DOWN BIT29 /* rm driver module */
+#define COMP_RESET BIT30 /* Silent reset */
+#define COMP_ERR BIT31 /* Error out, always on */
#define RTL819x_DEBUG
#ifdef RTL819x_DEBUG
-#define assert(expr) \
- if (!(expr)) { \
- printk( "Assertion failed! %s,%s,%s,line=%d\n", \
- #expr,__FILE__,__FUNCTION__,__LINE__); \
- }
-//wb added to debug out data buf
-//if you want print DATA buffer related BA, please set ieee80211_debug_level to DATA|BA
-#define RT_DEBUG_DATA(level, data, datalen) \
- do{ if ((rt_global_debug_component & (level)) == (level)) \
- { \
- int i; \
- u8* pdata = (u8*) data; \
- printk(KERN_DEBUG RTL819xU_MODULE_NAME ": %s()\n", __FUNCTION__); \
- for(i=0; i<(int)(datalen); i++) \
- { \
+#define RTL8192U_ASSERT(expr) \
+ do { \
+ if (!(expr)) { \
+ pr_debug("Assertion failed! %s, %s, %s, line = %d\n", \
+ #expr, __FILE__, __func__, __LINE__); \
+ } \
+ } while (0)
+/*
+ * Debug out data buf.
+ * If you want to print DATA buffer related BA,
+ * please set ieee80211_debug_level to DATA|BA
+ */
+#define RT_DEBUG_DATA(level, data, datalen) \
+ do { \
+ if ((rt_global_debug_component & (level)) == (level)) { \
+ int i; \
+ u8 *pdata = (u8 *) data; \
+ pr_debug("RTL8192U: %s()\n", __func__); \
+ for (i = 0; i < (int)(datalen); i++) { \
printk("%2x ", pdata[i]); \
- if ((i+1)%16 == 0) printk("\n"); \
- } \
- printk("\n"); \
- } \
+ if ((i+1)%16 == 0) \
+ printk("\n"); \
+ } \
+ printk("\n"); \
+ } \
} while (0)
#else
-#define assert(expr) do {} while (0)
-#define RT_DEBUG_DATA(level, data, datalen) do {} while(0)
+#define RTL8192U_ASSERT(expr) do {} while (0)
+#define RT_DEBUG_DATA(level, data, datalen) do {} while (0)
#endif /* RTL8169_DEBUG */
-//
-// Queue Select Value in TxDesc
-//
+/* Queue Select Value in TxDesc */
#define QSLT_BK 0x1
#define QSLT_BE 0x0
#define QSLT_VI 0x4
@@ -208,13 +207,13 @@ do { if(rt_global_debug_component & component) \
#define IEEE80211_WATCH_DOG_TIME 2000
#define PHY_Beacon_RSSI_SLID_WIN_MAX 10
-//for txpowertracking by amy
+/* For Tx Power Tracking */
#define OFDM_Table_Length 19
#define CCK_Table_length 12
-/* for rtl819x */
+/* For rtl819x */
typedef struct _tx_desc_819x_usb {
- //DWORD 0
+ /* DWORD 0 */
u16 PktSize;
u8 Offset;
u8 Reserved0:3;
@@ -224,7 +223,7 @@ typedef struct _tx_desc_819x_usb {
u8 LINIP:1;
u8 OWN:1;
- //DWORD 1
+ /* DWORD 1 */
u8 TxFWInfoSize;
u8 RATid:3;
u8 DISFB:1;
@@ -239,27 +238,26 @@ typedef struct _tx_desc_819x_usb {
u8 SecDescAssign:1;
u8 SecType:2;
- //DWORD 2
+ /* DWORD 2 */
u16 TxBufferSize;
- //u16 Reserved2;
u8 ResvForPaddingLen:7;
u8 Reserved3:1;
u8 Reserved4;
- //DWORD 3, 4, 5
+ /* DWORD 3, 4, 5 */
u32 Reserved5;
u32 Reserved6;
u32 Reserved7;
-}tx_desc_819x_usb, *ptx_desc_819x_usb;
+} tx_desc_819x_usb, *ptx_desc_819x_usb;
#ifdef USB_TX_DRIVER_AGGREGATION_ENABLE
typedef struct _tx_desc_819x_usb_aggr_subframe {
- //DWORD 0
+ /* DWORD 0 */
u16 PktSize;
u8 Offset;
u8 TxFWInfoSize;
- //DWORD 1
+ /* DWORD 1 */
u8 RATid:3;
u8 DISFB:1;
u8 USERATE:1;
@@ -274,13 +272,13 @@ typedef struct _tx_desc_819x_usb_aggr_subframe {
u8 SecType:2;
u8 PacketID:7;
u8 OWN:1;
-}tx_desc_819x_usb_aggr_subframe, *ptx_desc_819x_usb_aggr_subframe;
+} tx_desc_819x_usb_aggr_subframe, *ptx_desc_819x_usb_aggr_subframe;
#endif
typedef struct _tx_desc_cmd_819x_usb {
- //DWORD 0
+ /* DWORD 0 */
u16 Reserved0;
u8 Reserved1;
u8 Reserved2:3;
@@ -290,65 +288,64 @@ typedef struct _tx_desc_cmd_819x_usb {
u8 LINIP:1;
u8 OWN:1;
- //DOWRD 1
- //u32 Reserved3;
+ /* DOWRD 1 */
u8 TxFWInfoSize;
u8 Reserved3;
u8 QueueSelect;
u8 Reserved4;
- //DOWRD 2
+ /* DOWRD 2 */
u16 TxBufferSize;
u16 Reserved5;
- //DWORD 3,4,5
- //u32 TxBufferAddr;
- //u32 NextDescAddress;
+ /* DWORD 3, 4, 5 */
u32 Reserved6;
u32 Reserved7;
u32 Reserved8;
-}tx_desc_cmd_819x_usb, *ptx_desc_cmd_819x_usb;
+} tx_desc_cmd_819x_usb, *ptx_desc_cmd_819x_usb;
typedef struct _tx_fwinfo_819x_usb {
- //DOWRD 0
- u8 TxRate:7;
- u8 CtsEnable:1;
- u8 RtsRate:7;
- u8 RtsEnable:1;
- u8 TxHT:1;
- u8 Short:1; //Short PLCP for CCK, or short GI for 11n MCS
- u8 TxBandwidth:1; // This is used for HT MCS rate only.
- u8 TxSubCarrier:2; // This is used for legacy OFDM rate only.
- u8 STBC:2;
- u8 AllowAggregation:1;
- u8 RtsHT:1; //Interpret RtsRate field as high throughput data rate
- u8 RtsShort:1; //Short PLCP for CCK, or short GI for 11n MCS
- u8 RtsBandwidth:1; // This is used for HT MCS rate only.
- u8 RtsSubcarrier:2; // This is used for legacy OFDM rate only.
- u8 RtsSTBC:2;
- u8 EnableCPUDur:1; //Enable firmware to recalculate and assign packet duration
-
- //DWORD 1
- u32 RxMF:2;
- u32 RxAMD:3;
- u32 TxPerPktInfoFeedback:1;//1 indicate Tx info gathtered by firmware and returned by Rx Cmd
- u32 Reserved1:2;
- u32 TxAGCOffSet:4;
- u32 TxAGCSign:1;
- u32 Tx_INFO_RSVD:6;
- u32 PacketID:13;
- //u32 Reserved;
-}tx_fwinfo_819x_usb, *ptx_fwinfo_819x_usb;
+ /* DOWRD 0 */
+ u8 TxRate:7;
+ u8 CtsEnable:1;
+ u8 RtsRate:7;
+ u8 RtsEnable:1;
+ u8 TxHT:1;
+ u8 Short:1; /* Error out, always on */
+ u8 TxBandwidth:1; /* Used for HT MCS rate only */
+ u8 TxSubCarrier:2; /* Used for legacy OFDM rate only */
+ u8 STBC:2;
+ u8 AllowAggregation:1;
+ /* Interpret RtsRate field as high throughput data rate */
+ u8 RtsHT:1;
+ u8 RtsShort:1; /* Short PLCP for CCK or short GI for 11n MCS */
+ u8 RtsBandwidth:1; /* Used for HT MCS rate only */
+ u8 RtsSubcarrier:2;/* Used for legacy OFDM rate only */
+ u8 RtsSTBC:2;
+ /* Enable firmware to recalculate and assign packet duration */
+ u8 EnableCPUDur:1;
+
+ /* DWORD 1 */
+ u32 RxMF:2;
+ u32 RxAMD:3;
+ /* 1 indicate Tx info gathered by firmware and returned by Rx Cmd */
+ u32 TxPerPktInfoFeedback:1;
+ u32 Reserved1:2;
+ u32 TxAGCOffSet:4;
+ u32 TxAGCSign:1;
+ u32 Tx_INFO_RSVD:6;
+ u32 PacketID:13;
+} tx_fwinfo_819x_usb, *ptx_fwinfo_819x_usb;
typedef struct rtl8192_rx_info {
struct urb *urb;
struct net_device *dev;
u8 out_pipe;
-}rtl8192_rx_info ;
+} rtl8192_rx_info ;
-typedef struct rx_desc_819x_usb{
- //DOWRD 0
+typedef struct rx_desc_819x_usb {
+ /* DOWRD 0 */
u16 Length:14;
u16 CRC32:1;
u16 ICV:1;
@@ -356,47 +353,32 @@ typedef struct rx_desc_819x_usb{
u8 Shift:2;
u8 PHYStatus:1;
u8 SWDec:1;
- //u8 LastSeg:1;
- //u8 FirstSeg:1;
- //u8 EOR:1;
- //u8 OWN:1;
u8 Reserved1:4;
- //DWORD 1
+ /* DWORD 1 */
u32 Reserved2;
-
- //DWORD 2
- //u32 Reserved3;
-
- //DWORD 3
- //u32 BufferAddress;
-
-}rx_desc_819x_usb, *prx_desc_819x_usb;
+} rx_desc_819x_usb, *prx_desc_819x_usb;
#ifdef USB_RX_AGGREGATION_SUPPORT
-typedef struct _rx_desc_819x_usb_aggr_subframe{
- //DOWRD 0
+typedef struct _rx_desc_819x_usb_aggr_subframe {
+ /* DOWRD 0 */
u16 Length:14;
u16 CRC32:1;
u16 ICV:1;
u8 Offset;
u8 RxDrvInfoSize;
- //DOWRD 1
+ /* DOWRD 1 */
u8 Shift:2;
u8 PHYStatus:1;
u8 SWDec:1;
u8 Reserved1:4;
u8 Reserved2;
u16 Reserved3;
- //DWORD 2
- //u4Byte Reserved3;
- //DWORD 3
- //u4Byte BufferAddress;
-}rx_desc_819x_usb_aggr_subframe, *prx_desc_819x_usb_aggr_subframe;
+} rx_desc_819x_usb_aggr_subframe, *prx_desc_819x_usb_aggr_subframe;
#endif
-typedef struct rx_drvinfo_819x_usb{
- //DWORD 0
+typedef struct rx_drvinfo_819x_usb {
+ /* DWORD 0 */
u16 Reserved1:12;
u16 PartAggr:1;
u16 FirstAGGR:1;
@@ -413,14 +395,15 @@ typedef struct rx_drvinfo_819x_usb{
u8 Bcast:1;
u8 Reserved4:1;
- //DWORD 1
+ /* DWORD 1 */
u32 TSFL;
-}rx_drvinfo_819x_usb, *prx_drvinfo_819x_usb;
+} rx_drvinfo_819x_usb, *prx_drvinfo_819x_usb;
-
-#define MAX_DEV_ADDR_SIZE 8 /* support till 64 bit bus width OS */
-#define MAX_FIRMWARE_INFORMATION_SIZE 32 /*2006/04/30 by Emily forRTL8190*/
+/* Support till 64 bit bus width OS */
+#define MAX_DEV_ADDR_SIZE 8
+/* For RTL8190 */
+#define MAX_FIRMWARE_INFORMATION_SIZE 32
#define MAX_802_11_HEADER_LENGTH (40 + MAX_FIRMWARE_INFORMATION_SIZE)
#define ENCRYPTION_MAX_OVERHEAD 128
#define USB_HWDESC_HEADER_LEN sizeof(tx_desc_819x_usb)
@@ -438,55 +421,55 @@ typedef struct rx_drvinfo_819x_usb{
#ifdef USB_TX_DRIVER_AGGREGATION_ENABLE
#define TX_PACKET_DRVAGGR_SUBFRAME_SHIFT_BYTES (sizeof(tx_desc_819x_usb_aggr_subframe) + sizeof(tx_fwinfo_819x_usb))
#endif
-#define scrclng 4 // octets for crc32 (FCS, ICV)
+/* Octets for crc32 (FCS, ICV) */
+#define scrclng 4
-typedef enum rf_optype
-{
+typedef enum rf_optype {
RF_OP_By_SW_3wire = 0,
RF_OP_By_FW,
RF_OP_MAX
-}rf_op_type;
+} rf_op_type;
/* 8190 Loopback Mode definition */
-typedef enum _rtl819xUsb_loopback{
+typedef enum _rtl819xUsb_loopback {
RTL819xU_NO_LOOPBACK = 0,
RTL819xU_MAC_LOOPBACK = 1,
RTL819xU_DMA_LOOPBACK = 2,
RTL819xU_CCK_LOOPBACK = 3,
-}rtl819xUsb_loopback_e;
+} rtl819xUsb_loopback_e;
/* due to rtl8192 firmware */
-typedef enum _desc_packet_type_e{
+typedef enum _desc_packet_type_e {
DESC_PACKET_TYPE_INIT = 0,
DESC_PACKET_TYPE_NORMAL = 1,
-}desc_packet_type_e;
+} desc_packet_type_e;
-typedef enum _firmware_status{
+typedef enum _firmware_status {
FW_STATUS_0_INIT = 0,
FW_STATUS_1_MOVE_BOOT_CODE = 1,
FW_STATUS_2_MOVE_MAIN_CODE = 2,
FW_STATUS_3_TURNON_CPU = 3,
FW_STATUS_4_MOVE_DATA_CODE = 4,
FW_STATUS_5_READY = 5,
-}firmware_status_e;
+} firmware_status_e;
typedef struct _rt_firmare_seg_container {
u16 seg_size;
u8 *seg_ptr;
-}fw_seg_container, *pfw_seg_container;
-typedef struct _rt_firmware{
+} fw_seg_container, *pfw_seg_container;
+typedef struct _rt_firmware {
firmware_status_e firmware_status;
u16 cmdpacket_frag_thresold;
-#define RTL8190_MAX_FIRMWARE_CODE_SIZE 64000 //64k
+#define RTL8190_MAX_FIRMWARE_CODE_SIZE 64000
u8 firmware_buf[RTL8190_MAX_FIRMWARE_CODE_SIZE];
u16 firmware_buf_size;
-}rt_firmware, *prt_firmware;
+} rt_firmware, *prt_firmware;
-//+by amy 080507
-#define MAX_RECEIVE_BUFFER_SIZE 9100 // Add this to 9100 bytes to receive A-MSDU from RT-AP
+/* Add this to 9100 bytes to receive A-MSDU from RT-AP */
+#define MAX_RECEIVE_BUFFER_SIZE 9100
-typedef struct _rt_firmware_info_819xUsb{
+typedef struct _rt_firmware_info_819xUsb {
u8 sz_info[16];
-}rt_firmware_info_819xUsb, *prt_firmware_info_819xUsb;
+} rt_firmware_info_819xUsb, *prt_firmware_info_819xUsb;
/* Firmware Queue Layout */
#define NUM_OF_FIRMWARE_QUEUE 10
@@ -527,8 +510,11 @@ typedef struct _rt_firmware_info_819xUsb{
#define RSVD_FW_QUEUE_PAGE_CMD_SHIFT 0x08
#define RSVD_FW_QUEUE_PAGE_BCN_SHIFT 0x00
#define RSVD_FW_QUEUE_PAGE_PUB_SHIFT 0x08
-//=================================================================
-//=================================================================
+
+/*
+ * =================================================================
+ * =================================================================
+ */
#define EPROM_93c46 0
#define EPROM_93c56 1
@@ -557,7 +543,7 @@ typedef enum _WIRELESS_MODE {
} WIRELESS_MODE;
-#define RTL_IOCTL_WPA_SUPPLICANT SIOCIWFIRSTPRIV+30
+#define RTL_IOCTL_WPA_SUPPLICANT (SIOCIWFIRSTPRIV + 30)
typedef struct buffer {
struct buffer *next;
@@ -565,7 +551,7 @@ typedef struct buffer {
} buffer;
-typedef struct rtl_reg_debug{
+typedef struct rtl_reg_debug {
unsigned int cmd;
struct {
unsigned char type;
@@ -574,7 +560,7 @@ typedef struct rtl_reg_debug{
unsigned char length;
} head;
unsigned char buf[0xff];
-}rtl_reg_debug;
+} rtl_reg_debug;
@@ -584,58 +570,45 @@ typedef struct rtl_reg_debug{
typedef struct _rt_9x_tx_rate_history {
u32 cck[4];
u32 ofdm[8];
- // HT_MCS[0][]: BW=0 SG=0
- // HT_MCS[1][]: BW=1 SG=0
- // HT_MCS[2][]: BW=0 SG=1
- // HT_MCS[3][]: BW=1 SG=1
u32 ht_mcs[4][16];
-}rt_tx_rahis_t, *prt_tx_rahis_t;
+} rt_tx_rahis_t, *prt_tx_rahis_t;
typedef struct _RT_SMOOTH_DATA_4RF {
- char elements[4][100];//array to store values
- u32 index; //index to current array to store
- u32 TotalNum; //num of valid elements
- u32 TotalVal[4]; //sum of valid elements
-}RT_SMOOTH_DATA_4RF, *PRT_SMOOTH_DATA_4RF;
-
-#define MAX_8192U_RX_SIZE 8192 // This maybe changed for D-cut larger aggregation size
-//stats seems messed up, clean it ASAP
+ char elements[4][100]; /* array to store values */
+ u32 index; /* index to current array to store */
+ u32 TotalNum; /* num of valid elements */
+ u32 TotalVal[4]; /* sum of valid elements */
+} RT_SMOOTH_DATA_4RF, *PRT_SMOOTH_DATA_4RF;
+
+/* This maybe changed for D-cut larger aggregation size */
+#define MAX_8192U_RX_SIZE 8192
+/* Stats seems messed up, clean it ASAP */
typedef struct Stats {
unsigned long txrdu;
-// unsigned long rxrdu;
- //unsigned long rxnolast;
- //unsigned long rxnodata;
-// unsigned long rxreset;
-// unsigned long rxnopointer;
unsigned long rxok;
unsigned long rxframgment;
unsigned long rxurberr;
unsigned long rxstaterr;
- unsigned long received_rate_histogram[4][32]; //0: Total, 1:OK, 2:CRC, 3:ICV, 2007 07 03 cosa
- unsigned long received_preamble_GI[2][32]; //0: Long preamble/GI, 1:Short preamble/GI
- unsigned long rx_AMPDUsize_histogram[5]; // level: (<4K), (4K~8K), (8K~16K), (16K~32K), (32K~64K)
- unsigned long rx_AMPDUnum_histogram[5]; // level: (<5), (5~10), (10~20), (20~40), (>40)
- unsigned long numpacket_matchbssid; // debug use only.
- unsigned long numpacket_toself; // debug use only.
- unsigned long num_process_phyinfo; // debug use only.
+ /* 0: Total, 1: OK, 2: CRC, 3: ICV */
+ unsigned long received_rate_histogram[4][32];
+ /* 0: Long preamble/GI, 1: Short preamble/GI */
+ unsigned long received_preamble_GI[2][32];
+ /* level: (<4K), (4K~8K), (8K~16K), (16K~32K), (32K~64K) */
+ unsigned long rx_AMPDUsize_histogram[5];
+ /* level: (<5), (5~10), (10~20), (20~40), (>40) */
+ unsigned long rx_AMPDUnum_histogram[5];
+ unsigned long numpacket_matchbssid;
+ unsigned long numpacket_toself;
+ unsigned long num_process_phyinfo;
unsigned long numqry_phystatus;
unsigned long numqry_phystatusCCK;
unsigned long numqry_phystatusHT;
- unsigned long received_bwtype[5]; //0: 20M, 1: funn40M, 2: upper20M, 3: lower20M, 4: duplicate
+ /* 0: 20M, 1: funn40M, 2: upper20M, 3: lower20M, 4: duplicate */
+ unsigned long received_bwtype[5];
unsigned long txnperr;
unsigned long txnpdrop;
unsigned long txresumed;
-// unsigned long rxerr;
-// unsigned long rxoverflow;
-// unsigned long rxint;
unsigned long txnpokint;
-// unsigned long txhpokint;
-// unsigned long txhperr;
-// unsigned long ints;
-// unsigned long shints;
unsigned long txoverflow;
-// unsigned long rxdmafail;
-// unsigned long txbeacon;
-// unsigned long txbeaconerr;
unsigned long txlpokint;
unsigned long txlpdrop;
unsigned long txlperr;
@@ -684,30 +657,35 @@ typedef struct Stats {
u8 last_packet_rate;
unsigned long slide_signal_strength[100];
unsigned long slide_evm[100];
- unsigned long slide_rssi_total; // For recording sliding window's RSSI value
- unsigned long slide_evm_total; // For recording sliding window's EVM value
- long signal_strength; // Transformed, in dbm. Beautified signal strength for UI, not correct.
+ /* For recording sliding window's RSSI value */
+ unsigned long slide_rssi_total;
+ /* For recording sliding window's EVM value */
+ unsigned long slide_evm_total;
+ /* Transformed in dbm. Beautified signal strength for UI, not correct */
+ long signal_strength;
long signal_quality;
long last_signal_strength_inpercent;
- long recv_signal_power; // Correct smoothed ss in Dbm, only used in driver to report real power now.
+ /* Correct smoothed ss in dbm, only used in driver
+ * to report real power now */
+ long recv_signal_power;
u8 rx_rssi_percentage[4];
u8 rx_evm_percentage[2];
long rxSNRdB[4];
rt_tx_rahis_t txrate;
- u32 Slide_Beacon_pwdb[100]; //cosa add for beacon rssi
- u32 Slide_Beacon_Total; //cosa add for beacon rssi
+ /* For beacon RSSI */
+ u32 Slide_Beacon_pwdb[100];
+ u32 Slide_Beacon_Total;
RT_SMOOTH_DATA_4RF cck_adc_pwdb;
u32 CurrentShowTxate;
} Stats;
-// Bandwidth Offset
+/* Bandwidth Offset */
#define HAL_PRIME_CHNL_OFFSET_DONT_CARE 0
#define HAL_PRIME_CHNL_OFFSET_LOWER 1
#define HAL_PRIME_CHNL_OFFSET_UPPER 2
-//+by amy 080507
typedef struct ChnlAccessSetting {
u16 SIFS_Timer;
@@ -716,35 +694,62 @@ typedef struct ChnlAccessSetting {
u16 EIFS_Timer;
u16 CWminIndex;
u16 CWmaxIndex;
-}*PCHANNEL_ACCESS_SETTING,CHANNEL_ACCESS_SETTING;
-
-typedef struct _BB_REGISTER_DEFINITION{
- u32 rfintfs; // set software control: // 0x870~0x877[8 bytes]
- u32 rfintfi; // readback data: // 0x8e0~0x8e7[8 bytes]
- u32 rfintfo; // output data: // 0x860~0x86f [16 bytes]
- u32 rfintfe; // output enable: // 0x860~0x86f [16 bytes]
- u32 rf3wireOffset; // LSSI data: // 0x840~0x84f [16 bytes]
- u32 rfLSSI_Select; // BB Band Select: // 0x878~0x87f [8 bytes]
- u32 rfTxGainStage; // Tx gain stage: // 0x80c~0x80f [4 bytes]
- u32 rfHSSIPara1; // wire parameter control1 : // 0x820~0x823,0x828~0x82b, 0x830~0x833, 0x838~0x83b [16 bytes]
- u32 rfHSSIPara2; // wire parameter control2 : // 0x824~0x827,0x82c~0x82f, 0x834~0x837, 0x83c~0x83f [16 bytes]
- u32 rfSwitchControl; //Tx Rx antenna control : // 0x858~0x85f [16 bytes]
- u32 rfAGCControl1; //AGC parameter control1 : // 0xc50~0xc53,0xc58~0xc5b, 0xc60~0xc63, 0xc68~0xc6b [16 bytes]
- u32 rfAGCControl2; //AGC parameter control2 : // 0xc54~0xc57,0xc5c~0xc5f, 0xc64~0xc67, 0xc6c~0xc6f [16 bytes]
- u32 rfRxIQImbalance; //OFDM Rx IQ imbalance matrix : // 0xc14~0xc17,0xc1c~0xc1f, 0xc24~0xc27, 0xc2c~0xc2f [16 bytes]
- u32 rfRxAFE; //Rx IQ DC ofset and Rx digital filter, Rx DC notch filter : // 0xc10~0xc13,0xc18~0xc1b, 0xc20~0xc23, 0xc28~0xc2b [16 bytes]
- u32 rfTxIQImbalance; //OFDM Tx IQ imbalance matrix // 0xc80~0xc83,0xc88~0xc8b, 0xc90~0xc93, 0xc98~0xc9b [16 bytes]
- u32 rfTxAFE; //Tx IQ DC Offset and Tx DFIR type // 0xc84~0xc87,0xc8c~0xc8f, 0xc94~0xc97, 0xc9c~0xc9f [16 bytes]
- u32 rfLSSIReadBack; //LSSI RF readback data // 0x8a0~0x8af [16 bytes]
-}BB_REGISTER_DEFINITION_T, *PBB_REGISTER_DEFINITION_T;
-
-typedef enum _RT_RF_TYPE_819xU{
+} *PCHANNEL_ACCESS_SETTING, CHANNEL_ACCESS_SETTING;
+
+typedef struct _BB_REGISTER_DEFINITION {
+ /* set software control: 0x870~0x877 [8 bytes] */
+ u32 rfintfs;
+ /* readback data: 0x8e0~0x8e7 [8 bytes] */
+ u32 rfintfi;
+ /* output data: 0x860~0x86f [16 bytes] */
+ u32 rfintfo;
+ /* output enable: 0x860~0x86f [16 bytes] */
+ u32 rfintfe;
+ /* LSSI data: 0x840~0x84f [16 bytes] */
+ u32 rf3wireOffset;
+ /* BB Band Select: 0x878~0x87f [8 bytes] */
+ u32 rfLSSI_Select;
+ /* Tx gain stage: 0x80c~0x80f [4 bytes] */
+ u32 rfTxGainStage;
+ /* wire parameter control1: 0x820~0x823, 0x828~0x82b,
+ * 0x830~0x833, 0x838~0x83b [16 bytes] */
+ u32 rfHSSIPara1;
+ /* wire parameter control2: 0x824~0x827, 0x82c~0x82f,
+ * 0x834~0x837, 0x83c~0x83f [16 bytes] */
+ u32 rfHSSIPara2;
+ /* Tx Rx antenna control: 0x858~0x85f [16 bytes] */
+ u32 rfSwitchControl;
+ /* AGC parameter control1: 0xc50~0xc53, 0xc58~0xc5b,
+ * 0xc60~0xc63, 0xc68~0xc6b [16 bytes] */
+ u32 rfAGCControl1;
+ /* AGC parameter control2: 0xc54~0xc57, 0xc5c~0xc5f,
+ * 0xc64~0xc67, 0xc6c~0xc6f [16 bytes] */
+ u32 rfAGCControl2;
+ /* OFDM Rx IQ imbalance matrix: 0xc14~0xc17, 0xc1c~0xc1f,
+ * 0xc24~0xc27, 0xc2c~0xc2f [16 bytes] */
+ u32 rfRxIQImbalance;
+ /* Rx IQ DC offset and Rx digital filter, Rx DC notch filter:
+ * 0xc10~0xc13, 0xc18~0xc1b,
+ * 0xc20~0xc23, 0xc28~0xc2b [16 bytes] */
+ u32 rfRxAFE;
+ /* OFDM Tx IQ imbalance matrix: 0xc80~0xc83, 0xc88~0xc8b,
+ * 0xc90~0xc93, 0xc98~0xc9b [16 bytes] */
+ u32 rfTxIQImbalance;
+ /* Tx IQ DC Offset and Tx DFIR type:
+ * 0xc84~0xc87, 0xc8c~0xc8f,
+ * 0xc94~0xc97, 0xc9c~0xc9f [16 bytes] */
+ u32 rfTxAFE;
+ /* LSSI RF readback data: 0x8a0~0x8af [16 bytes] */
+ u32 rfLSSIReadBack;
+} BB_REGISTER_DEFINITION_T, *PBB_REGISTER_DEFINITION_T;
+
+typedef enum _RT_RF_TYPE_819xU {
RF_TYPE_MIN = 0,
RF_8225,
RF_8256,
RF_8258,
RF_PSEUDO_11N = 4,
-}RT_RF_TYPE_819xU, *PRT_RF_TYPE_819xU;
+} RT_RF_TYPE_819xU, *PRT_RF_TYPE_819xU;
typedef struct _rate_adaptive {
u8 rate_adaptive_disabled;
@@ -762,9 +767,9 @@ typedef struct _rate_adaptive {
u32 low_rssi_threshold_ratr;
u32 low_rssi_threshold_ratr_40M;
u32 low_rssi_threshold_ratr_20M;
- u8 ping_rssi_enable; //cosa add for test
- u32 ping_rssi_ratr; //cosa add for test
- u32 ping_rssi_thresh_for_ra;//cosa add for test
+ u8 ping_rssi_enable;
+ u32 ping_rssi_ratr;
+ u32 ping_rssi_thresh_for_ra;
u32 last_ratr;
} rate_adaptive, *prate_adaptive;
@@ -778,9 +783,9 @@ typedef struct _txbbgain_struct {
} txbbgain_struct, *ptxbbgain_struct;
typedef struct _ccktxbbgain_struct {
- //The Value is from a22 to a29 one Byte one time is much Safer
+ /* The value is from a22 to a29, one byte one time is much safer */
u8 ccktxbb_valuearray[8];
-} ccktxbbgain_struct,*pccktxbbgain_struct;
+} ccktxbbgain_struct, *pccktxbbgain_struct;
typedef struct _init_gain {
@@ -791,7 +796,6 @@ typedef struct _init_gain {
u8 cca;
} init_gain, *pinit_gain;
-//by amy 0606
typedef struct _phy_ofdm_rx_status_report_819xusb {
u8 trsw_gain_X[4];
@@ -807,26 +811,26 @@ typedef struct _phy_ofdm_rx_status_report_819xusb {
u8 max_ex_pwr;
u8 sgi_en;
u8 rxsc_sgien_exflg;
-}phy_sts_ofdm_819xusb_t;
+} phy_sts_ofdm_819xusb_t;
typedef struct _phy_cck_rx_status_report_819xusb {
- /* For CCK rate descriptor. This is a unsigned 8:1 variable. LSB bit presend
- 0.5. And MSB 7 bts presend a signed value. Range from -64~+63.5. */
+ /* For CCK rate descriptor. This is an unsigned 8:1 variable.
+ * LSB bit presend 0.5. And MSB 7 bts presend a signed value.
+ * Range from -64~+63.5. */
u8 adc_pwdb_X[4];
u8 sq_rpt;
u8 cck_agc_rpt;
-}phy_sts_cck_819xusb_t;
+} phy_sts_cck_819xusb_t;
-typedef struct _phy_ofdm_rx_status_rxsc_sgien_exintfflag{
+typedef struct _phy_ofdm_rx_status_rxsc_sgien_exintfflag {
u8 reserved:4;
u8 rxsc:2;
u8 sgi_en:1;
u8 ex_intf_flag:1;
-}phy_ofdm_rx_status_rxsc_sgien_exintfflag;
+} phy_ofdm_rx_status_rxsc_sgien_exintfflag;
-typedef enum _RT_CUSTOMER_ID
-{
+typedef enum _RT_CUSTOMER_ID {
RT_CID_DEFAULT = 0,
RT_CID_8187_ALPHA0 = 1,
RT_CID_8187_SERCOMM_PS = 2,
@@ -836,25 +840,28 @@ typedef enum _RT_CUSTOMER_ID
RT_CID_819x_CAMEO = 6,
RT_CID_819x_RUNTOP = 7,
RT_CID_819x_Senao = 8,
- RT_CID_TOSHIBA = 9, // Merge by Jacken, 2008/01/31.
+ RT_CID_TOSHIBA = 9,
RT_CID_819x_Netcore = 10,
RT_CID_Nettronix = 11,
RT_CID_DLINK = 12,
RT_CID_PRONET = 13,
-}RT_CUSTOMER_ID, *PRT_CUSTOMER_ID;
+} RT_CUSTOMER_ID, *PRT_CUSTOMER_ID;
-//================================================================================
-// LED customization.
-//================================================================================
-
-typedef enum _LED_STRATEGY_8190{
- SW_LED_MODE0, // SW control 1 LED via GPIO0. It is default option.
- SW_LED_MODE1, // SW control for PCI Express
- SW_LED_MODE2, // SW control for Cameo.
- SW_LED_MODE3, // SW contorl for RunTop.
- SW_LED_MODE4, // SW control for Netcore
- HW_LED, // HW control 2 LEDs, LED0 and LED1 (there are 4 different control modes)
-}LED_STRATEGY_8190, *PLED_STRATEGY_8190;
+/*
+ * ==========================================================================
+ * LED customization.
+ * ==========================================================================
+ */
+
+typedef enum _LED_STRATEGY_8190 {
+ SW_LED_MODE0, /* SW control 1 LED via GPIO0. It is default option. */
+ SW_LED_MODE1, /* SW control for PCI Express */
+ SW_LED_MODE2, /* SW control for Cameo. */
+ SW_LED_MODE3, /* SW control for RunTop. */
+ SW_LED_MODE4, /* SW control for Netcore. */
+ /* HW control 2 LEDs, LED0 and LED1 (4 different control modes) */
+ HW_LED,
+} LED_STRATEGY_8190, *PLED_STRATEGY_8190;
typedef enum _RESET_TYPE {
RESET_TYPE_NORESET = 0x00,
@@ -863,7 +870,7 @@ typedef enum _RESET_TYPE {
} RESET_TYPE;
/* The simple tx command OP code. */
-typedef enum _tag_TxCmd_Config_Index{
+typedef enum _tag_TxCmd_Config_Index {
TXCMD_TXRA_HISTORY_CTRL = 0xFF900000,
TXCMD_RESET_TX_PKT_BUFF = 0xFF900001,
TXCMD_RESET_RX_PKT_BUFF = 0xFF900002,
@@ -871,11 +878,11 @@ typedef enum _tag_TxCmd_Config_Index{
TXCMD_SET_RX_RSSI = 0xFF900004,
TXCMD_SET_TX_PWR_TRACKING = 0xFF900005,
TXCMD_XXXX_CTRL,
-}DCMD_TXCMD_OP;
+} DCMD_TXCMD_OP;
typedef struct r8192_priv {
struct usb_device *udev;
- //added for maintain info from eeprom
+ /* For maintain info from eeprom */
short epromtype;
u16 eeprom_vid;
u16 eeprom_pid;
@@ -887,105 +894,81 @@ typedef struct r8192_priv {
int irq;
struct ieee80211_device *ieee80211;
- short card_8192; /* O: rtl8192, 1:rtl8185 V B/C, 2:rtl8185 V D */
- u8 card_8192_version; /* if TCR reports card V B/C this discriminates */
-// short phy_ver; /* meaningful for rtl8225 1:A 2:B 3:C */
+ /* O: rtl8192, 1: rtl8185 V B/C, 2: rtl8185 V D */
+ short card_8192;
+ /* If TCR reports card V B/C, this discriminates */
+ u8 card_8192_version;
short enable_gpio0;
- enum card_type {PCI,MINIPCI,CARDBUS,USB}card_type;
+ enum card_type {
+ PCI, MINIPCI, CARDBUS, USB
+ } card_type;
short hw_plcp_len;
short plcp_preamble_mode;
spinlock_t irq_lock;
-// spinlock_t irq_th_lock;
spinlock_t tx_lock;
struct mutex mutex;
- //spinlock_t rf_lock; //used to lock rf write operation added by wb
u16 irq_mask;
-// short irq_enabled;
-// struct net_device *dev; //comment this out.
short chan;
short sens;
short max_sens;
- // u8 chtxpwr[15]; //channels from 1 to 14, 0 not used
-// u8 chtxpwr_ofdm[15]; //channels from 1 to 14, 0 not used
-// u8 cck_txpwr_base;
-// u8 ofdm_txpwr_base;
-// u8 challow[15]; //channels from 1 to 14, 0 not used
short up;
- short crcmon; //if 1 allow bad crc frame reception in monitor mode
-// short prism_hdr;
-
-// struct timer_list scan_timer;
- /*short scanpending;
- short stopscan;*/
-// spinlock_t scan_lock;
-// u8 active_probe;
- //u8 active_scan_num;
+ /* If 1, allow bad crc frame, reception in monitor mode */
+ short crcmon;
+
struct semaphore wx_sem;
- struct semaphore rf_sem; //used to lock rf write operation added by wb, modified by david
-// short hw_wep;
-
-// short digphy;
-// short antb;
-// short diversity;
-// u8 cs_treshold;
-// short rcr_csense;
- u8 rf_type; //0 means 1T2R, 1 means 2T4R
+ struct semaphore rf_sem; /* Used to lock rf write operation */
+
+ u8 rf_type; /* 0: 1T2R, 1: 2T4R */
RT_RF_TYPE_819xU rf_chip;
-// u32 key0[4];
- short (*rf_set_sens)(struct net_device *dev,short sens);
- u8 (*rf_set_chan)(struct net_device *dev,u8 ch);
+ short (*rf_set_sens)(struct net_device *dev, short sens);
+ u8 (*rf_set_chan)(struct net_device *dev, u8 ch);
void (*rf_close)(struct net_device *dev);
void (*rf_init)(struct net_device *dev);
- //short rate;
short promisc;
- /*stats*/
+ /* Stats */
struct Stats stats;
struct iw_statistics wstats;
- /*RX stuff*/
-// u32 *rxring;
-// u32 *rxringtail;
-// dma_addr_t rxringdma;
+ /* RX stuff */
struct urb **rx_urb;
struct urb **rx_cmd_urb;
#ifdef THOMAS_BEACON
u32 *oldaddr;
#endif
#ifdef THOMAS_TASKLET
- atomic_t irt_counter;//count for irq_rx_tasklet
+ atomic_t irt_counter; /* count for irq_rx_tasklet */
#endif
#ifdef JACKSON_NEW_RX
struct sk_buff **pp_rxskb;
int rx_inx;
#endif
-/* modified by davad for Rx process */
struct sk_buff_head rx_queue;
struct sk_buff_head skb_queue;
struct work_struct qos_activate;
short tx_urb_index;
- atomic_t tx_pending[0x10];//UART_PRIORITY+1
+ atomic_t tx_pending[0x10]; /* UART_PRIORITY + 1 */
struct tasklet_struct irq_rx_tasklet;
struct urb *rxurb_task;
- //2 Tx Related variables
+ /* Tx Related variables */
u16 ShortRetryLimit;
u16 LongRetryLimit;
u32 TransmitConfig;
- u8 RegCWinMin; // For turbo mode CW adaptive. Added by Annie, 2005-10-27.
+ u8 RegCWinMin; /* For turbo mode CW adaptive */
u32 LastRxDescTSFHigh;
u32 LastRxDescTSFLow;
- //2 Rx Related variables
+ /* Rx Related variables */
u16 EarlyRxThreshold;
u32 ReceiveConfig;
u8 AcmControl;
@@ -1000,13 +983,13 @@ typedef struct r8192_priv {
struct work_struct reset_wq;
/**********************************************************/
- //for rtl819xUsb
+ /* For rtl819xUsb */
u16 basic_rate;
u8 short_preamble;
u8 slot_time;
bool bDcut;
bool bCurrentRxAggrEnable;
- u8 Rf_Mode; //add for Firmware RF -R/W switch
+ u8 Rf_Mode; /* For Firmware RF -R/W switch */
prt_firmware pFirmware;
rtl819xUsb_loopback_e LoopbackMode;
u16 EEPROMTxPowerDiff;
@@ -1014,71 +997,70 @@ typedef struct r8192_priv {
u8 EEPROMPwDiff;
u8 EEPROMCrystalCap;
u8 EEPROM_Def_Ver;
- u8 EEPROMTxPowerLevelCCK;// CCK channel 1~14
+ u8 EEPROMTxPowerLevelCCK; /* CCK channel 1~14 */
u8 EEPROMTxPowerLevelCCK_V1[3];
- u8 EEPROMTxPowerLevelOFDM24G[3]; // OFDM 2.4G channel 1~14
- u8 EEPROMTxPowerLevelOFDM5G[24]; // OFDM 5G
+ u8 EEPROMTxPowerLevelOFDM24G[3]; /* OFDM 2.4G channel 1~14 */
+ u8 EEPROMTxPowerLevelOFDM5G[24]; /* OFDM 5G */
-/*PHY related*/
- BB_REGISTER_DEFINITION_T PHYRegDef[4]; //Radio A/B/C/D
- // Read/write are allow for following hardware information variables
+ /* PHY related */
+ BB_REGISTER_DEFINITION_T PHYRegDef[4]; /* Radio A/B/C/D */
+ /* Read/write are allow for following hardware information variables */
u32 MCSTxPowerLevelOriginalOffset[6];
u32 CCKTxPowerLevelOriginalOffset;
- u8 TxPowerLevelCCK[14]; // CCK channel 1~14
- u8 TxPowerLevelOFDM24G[14]; // OFDM 2.4G channel 1~14
- u8 TxPowerLevelOFDM5G[14]; // OFDM 5G
+ u8 TxPowerLevelCCK[14]; /* CCK channel 1~14 */
+ u8 TxPowerLevelOFDM24G[14]; /* OFDM 2.4G channel 1~14 */
+ u8 TxPowerLevelOFDM5G[14]; /* OFDM 5G */
u32 Pwr_Track;
u8 TxPowerDiff;
- u8 AntennaTxPwDiff[2]; // Antenna gain offset, index 0 for B, 1 for C, and 2 for D
- u8 CrystalCap; // CrystalCap.
- u8 ThermalMeter[2]; // ThermalMeter, index 0 for RFIC0, and 1 for RFIC1
+ u8 AntennaTxPwDiff[2]; /* Antenna gain offset, 0: B, 1: C, 2: D */
+ u8 CrystalCap;
+ u8 ThermalMeter[2]; /* index 0: RFIC0, index 1: RFIC1 */
u8 CckPwEnl;
- // Use to calculate PWBD.
+ /* Use to calculate PWBD */
u8 bCckHighPower;
long undecorated_smoothed_pwdb;
- //for set channel
+ /* For set channel */
u8 SwChnlInProgress;
u8 SwChnlStage;
u8 SwChnlStep;
u8 SetBWModeInProgress;
HT_CHANNEL_WIDTH CurrentChannelBW;
u8 ChannelPlan;
- // 8190 40MHz mode
- //
- u8 nCur40MhzPrimeSC; // Control channel sub-carrier
- // Joseph test for shorten RF configuration time.
- // We save RF reg0 in this variable to reduce RF reading.
- //
+ /* 8190 40MHz mode */
+ /* Control channel sub-carrier */
+ u8 nCur40MhzPrimeSC;
+ /* Test for shorten RF configuration time.
+ * We save RF reg0 in this variable to reduce RF reading. */
u32 RfReg0Value[4];
u8 NumTotalRFPath;
bool brfpath_rxenable[4];
- //RF set related
+ /* RF set related */
bool SetRFPowerStateInProgress;
-//+by amy 080507
struct timer_list watch_dog_timer;
-//+by amy 080515 for dynamic mechenism
- //Add by amy Tx Power Control for Near/Far Range 2008/05/15
- bool bdynamic_txpower; //bDynamicTxPower
- bool bDynamicTxHighPower; // Tx high power state
- bool bDynamicTxLowPower; // Tx low power state
+ /* For dynamic mechanism */
+ /* Tx Power Control for Near/Far Range */
+ bool bdynamic_txpower;
+ bool bDynamicTxHighPower;
+ bool bDynamicTxLowPower;
bool bLastDTPFlag_High;
bool bLastDTPFlag_Low;
bool bstore_last_dtpflag;
- bool bstart_txctrl_bydtp; //Define to discriminate on High power State or on sitesuvey to change Tx gain index
- //Add by amy for Rate Adaptive
+ /* Define to discriminate on High power State or
+ * on sitesurvey to change Tx gain index */
+ bool bstart_txctrl_bydtp;
rate_adaptive rate_adaptive;
- //Add by amy for TX power tracking
- //2008/05/15 Mars OPEN/CLOSE TX POWER TRACKING
- txbbgain_struct txbbgain_table[TxBBGainTableLength];
- u8 txpower_count;//For 6 sec do tracking again
- bool btxpower_trackingInit;
- u8 OFDM_index;
- u8 CCK_index;
- //2007/09/10 Mars Add CCK TX Power Tracking
+ /* TX power tracking
+ * OPEN/CLOSE TX POWER TRACKING */
+ txbbgain_struct txbbgain_table[TxBBGainTableLength];
+ u8 txpower_count; /* For 6 sec do tracking again */
+ bool btxpower_trackingInit;
+ u8 OFDM_index;
+ u8 CCK_index;
+ /* CCK TX Power Tracking */
ccktxbbgain_struct cck_txbbgain_table[CCKTxBBGainTableLength];
ccktxbbgain_struct cck_txbbgain_ch14_table[CCKTxBBGainTableLength];
u8 rfa_txpowertrackingindex;
@@ -1095,15 +1077,14 @@ typedef struct r8192_priv {
bool bcck_in_ch14;
bool btxpowerdata_readfromEEPORM;
u16 TSSI_13dBm;
- //For Backup Initial Gain
init_gain initgain_backup;
u8 DefaultInitialGain[4];
- // For EDCA Turbo mode, Added by amy 080515.
+ /* For EDCA Turbo mode */
bool bis_any_nonbepkts;
bool bcurrent_turbo_EDCA;
bool bis_cur_rdlstate;
struct timer_list fsync_timer;
- bool bfsync_processing; // 500ms Fsync timer is active or not
+ bool bfsync_processing; /* 500ms Fsync timer is active or not */
u32 rate_record;
u32 rateCountDiffRecord;
u32 ContinueDiffCount;
@@ -1112,17 +1093,14 @@ typedef struct r8192_priv {
u8 framesync;
u32 framesyncC34;
u8 framesyncMonitor;
- //Added by amy 080516 for RX related
u16 nrxAMPDU_size;
u8 nrxAMPDU_aggr_num;
- //by amy for gpio
+ /* For gpio */
bool bHwRadioOff;
- //by amy for reset_count
u32 reset_count;
bool bpbc_pressed;
- //by amy for debug
u32 txpower_checkcnt;
u32 txpower_tracking_callback_cnt;
u8 thermal_read_val[40];
@@ -1131,7 +1109,7 @@ typedef struct r8192_priv {
u32 ccktxpower_adjustcnt_ch14;
u8 tx_fwinfo_force_subcarriermode;
u8 tx_fwinfo_force_subcarrierval;
- //by amy for silent reset
+ /* For silent reset */
RESET_TYPE ResetProgress;
bool bForcedSilentReset;
bool bDisableNormalResetCheck;
@@ -1144,7 +1122,7 @@ typedef struct r8192_priv {
u16 SifsTime;
- //define work item by amy 080526
+ /* Define work item */
struct delayed_work update_beacon_wq;
struct delayed_work watch_dog_wq;
@@ -1153,42 +1131,32 @@ typedef struct r8192_priv {
struct delayed_work gpio_change_rf_wq;
struct delayed_work initialgain_operate_wq;
struct workqueue_struct *priv_wq;
-}r8192_priv;
+} r8192_priv;
-// for rtl8187
-// now mirging to rtl8187B
-/*
-typedef enum{
- LOW_PRIORITY = 0x02,
- NORM_PRIORITY
- } priority_t;
-*/
-//for rtl8187B
+/* For rtl8187B */
typedef enum{
BULK_PRIORITY = 0x01,
- //RSVD0,
- //RSVD1,
LOW_PRIORITY,
NORM_PRIORITY,
VO_PRIORITY,
- VI_PRIORITY, //0x05
+ VI_PRIORITY,
BE_PRIORITY,
BK_PRIORITY,
RSVD2,
RSVD3,
- BEACON_PRIORITY, //0x0A
+ BEACON_PRIORITY,
HIGH_PRIORITY,
MANAGE_PRIORITY,
RSVD4,
RSVD5,
- UART_PRIORITY //0x0F
+ UART_PRIORITY
} priority_t;
-typedef enum{
+typedef enum {
NIC_8192U = 1,
NIC_8190P = 2,
NIC_8192E = 3,
- } nic_t;
+} nic_t;
#ifdef JOHN_HWSEC
@@ -1200,19 +1168,19 @@ struct ssid_thread {
bool init_firmware(struct net_device *dev);
short rtl819xU_tx_cmd(struct net_device *dev, struct sk_buff *skb);
-short rtl8192_tx(struct net_device *dev, struct sk_buff* skb);
+short rtl8192_tx(struct net_device *dev, struct sk_buff *skb);
u32 read_cam(struct net_device *dev, u8 addr);
void write_cam(struct net_device *dev, u8 addr, u32 data);
-u8 read_nic_byte(struct net_device *dev, int x);
-u8 read_nic_byte_E(struct net_device *dev, int x);
-u32 read_nic_dword(struct net_device *dev, int x);
-u16 read_nic_word(struct net_device *dev, int x) ;
-void write_nic_byte(struct net_device *dev, int x,u8 y);
-void write_nic_byte_E(struct net_device *dev, int x,u8 y);
-void write_nic_word(struct net_device *dev, int x,u16 y);
-void write_nic_dword(struct net_device *dev, int x,u32 y);
+int read_nic_byte(struct net_device *dev, int x, u8 *data);
+int read_nic_byte_E(struct net_device *dev, int x, u8 *data);
+int read_nic_dword(struct net_device *dev, int x, u32 *data);
+int read_nic_word(struct net_device *dev, int x, u16 *data);
+void write_nic_byte(struct net_device *dev, int x, u8 y);
+void write_nic_byte_E(struct net_device *dev, int x, u8 y);
+void write_nic_word(struct net_device *dev, int x, u16 y);
+void write_nic_dword(struct net_device *dev, int x, u32 y);
void force_pci_posting(struct net_device *dev);
void rtl8192_rtx_disable(struct net_device *);
@@ -1220,26 +1188,24 @@ void rtl8192_rx_enable(struct net_device *);
void rtl8192_tx_enable(struct net_device *);
void rtl8192_disassociate(struct net_device *dev);
-//void fix_rx_fifo(struct net_device *dev);
-void rtl8185_set_rf_pins_enable(struct net_device *dev,u32 a);
+void rtl8185_set_rf_pins_enable(struct net_device *dev, u32 a);
-void rtl8192_set_anaparam(struct net_device *dev,u32 a);
-void rtl8185_set_anaparam2(struct net_device *dev,u32 a);
+void rtl8192_set_anaparam(struct net_device *dev, u32 a);
+void rtl8185_set_anaparam2(struct net_device *dev, u32 a);
void rtl8192_update_msr(struct net_device *dev);
int rtl8192_down(struct net_device *dev);
int rtl8192_up(struct net_device *dev);
void rtl8192_commit(struct net_device *dev);
-void rtl8192_set_chan(struct net_device *dev,short ch);
+void rtl8192_set_chan(struct net_device *dev, short ch);
void write_phy(struct net_device *dev, u8 adr, u8 data);
void write_phy_cck(struct net_device *dev, u8 adr, u32 data);
void write_phy_ofdm(struct net_device *dev, u8 adr, u32 data);
void rtl8185_tx_antenna(struct net_device *dev, u8 ant);
void rtl8192_set_rxconf(struct net_device *dev);
-//short check_nic_enough_desc(struct net_device *dev, priority_t priority);
-extern void rtl819xusb_beacon_tx(struct net_device *dev,u16 tx_rate);
+extern void rtl819xusb_beacon_tx(struct net_device *dev, u16 tx_rate);
void EnableHWSecurityConfig8192(struct net_device *dev);
-void setKey(struct net_device *dev, u8 EntryNo, u8 KeyIndex, u16 KeyType, u8 *MacAddr, u8 DefaultKey, u32 *KeyContent );
+void setKey(struct net_device *dev, u8 EntryNo, u8 KeyIndex, u16 KeyType, u8 *MacAddr, u8 DefaultKey, u32 *KeyContent);
#endif
diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c
index 71f5cde9ed1c..c880adcaf0fd 100644
--- a/drivers/staging/rtl8192u/r8192U_core.c
+++ b/drivers/staging/rtl8192u/r8192U_core.c
@@ -25,12 +25,35 @@
*/
#ifndef CONFIG_FORCE_HARD_FLOAT
-double __floatsidf (int i) { return i; }
-unsigned int __fixunsdfsi (double d) { return d; }
-double __adddf3(double a, double b) { return a+b; }
-double __addsf3(float a, float b) { return a+b; }
-double __subdf3(double a, double b) { return a-b; }
-double __extendsfdf2(float a) {return a;}
+double __floatsidf(int i)
+{
+ return i;
+}
+
+unsigned int __fixunsdfsi(double d)
+{
+ return d;
+}
+
+double __adddf3(double a, double b)
+{
+ return a+b;
+}
+
+double __addsf3(float a, float b)
+{
+ return a+b;
+}
+
+double __subdf3(double a, double b)
+{
+ return a-b;
+}
+
+double __extendsfdf2(float a)
+{
+ return a;
+}
#endif
#undef LOOP_TEST
@@ -68,7 +91,6 @@ double __extendsfdf2(float a) {return a;}
#include "r819xU_phyreg.h"
#include "r819xU_cmdpkt.h"
#include "r8192U_dm.h"
-//#include "r8192xU_phyreg.h"
#include <linux/usb.h>
#include <linux/slab.h>
#include <linux/proc_fs.h>
@@ -81,26 +103,9 @@ double __extendsfdf2(float a) {return a;}
#include "dot11d.h"
//set here to open your trace code. //WB
-u32 rt_global_debug_component = \
- // COMP_INIT |
-// COMP_DBG |
- // COMP_EPROM |
-// COMP_PHY |
- // COMP_RF |
-// COMP_FIRMWARE |
-// COMP_CH |
- // COMP_POWER_TRACKING |
-// COMP_RATE |
- // COMP_TXAGC |
- // COMP_TRACE |
- COMP_DOWN |
- // COMP_RECV |
- // COMP_SWBW |
+u32 rt_global_debug_component = COMP_DOWN |
COMP_SEC |
- // COMP_RESET |
- // COMP_SEND |
- // COMP_EVENTS |
- COMP_ERR ; //always open err flags on
+ COMP_ERR; //always open err flags on
#define TOTAL_CAM_ENTRY 32
#define CAM_CONTENT_COUNT 8
@@ -130,24 +135,22 @@ MODULE_VERSION("V 1.1");
MODULE_DEVICE_TABLE(usb, rtl8192_usb_id_tbl);
MODULE_DESCRIPTION("Linux driver for Realtek RTL8192 USB WiFi cards");
-static char* ifname = "wlan%d";
+static char *ifname = "wlan%d";
static int hwwep = 1; //default use hw. set 0 to use software security
static int channels = 0x3fff;
-module_param(ifname, charp, S_IRUGO|S_IWUSR );
-//module_param(hwseqnum,int, S_IRUGO|S_IWUSR);
-module_param(hwwep,int, S_IRUGO|S_IWUSR);
-module_param(channels,int, S_IRUGO|S_IWUSR);
+module_param(ifname, charp, S_IRUGO|S_IWUSR);
+module_param(hwwep, int, S_IRUGO|S_IWUSR);
+module_param(channels, int, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(ifname," Net interface name, wlan%d=default");
-//MODULE_PARM_DESC(hwseqnum," Try to use hardware 802.11 header sequence numbers. Zero=default");
-MODULE_PARM_DESC(hwwep," Try to use hardware security support. ");
-MODULE_PARM_DESC(channels," Channel bitmask for specific locales. NYI");
+MODULE_PARM_DESC(ifname, " Net interface name, wlan%d=default");
+MODULE_PARM_DESC(hwwep, " Try to use hardware security support. ");
+MODULE_PARM_DESC(channels, " Channel bitmask for specific locales. NYI");
static int rtl8192_usb_probe(struct usb_interface *intf,
- const struct usb_device_id *id);
+ const struct usb_device_id *id);
static void rtl8192_usb_disconnect(struct usb_interface *intf);
@@ -169,7 +172,7 @@ static struct usb_driver rtl8192_usb_driver = {
typedef struct _CHANNEL_LIST {
u8 Channel[32];
u8 Len;
-}CHANNEL_LIST, *PCHANNEL_LIST;
+} CHANNEL_LIST, *PCHANNEL_LIST;
static CHANNEL_LIST ChannelPlan[] = {
{{1,2,3,4,5,6,7,8,9,10,11,36,40,44,48,52,56,60,64,149,153,157,161,165},24}, //FCC
@@ -185,12 +188,11 @@ static CHANNEL_LIST ChannelPlan[] = {
{{1,2,3,4,5,6,7,8,9,10,11,12,13,14},14} //For Global Domain. 1-11:active scan, 12-14 passive scan. //+YJ, 080626
};
-static void rtl819x_set_channel_map(u8 channel_plan, struct r8192_priv* priv)
+static void rtl819x_set_channel_map(u8 channel_plan, struct r8192_priv *priv)
{
- int i, max_chan=-1, min_chan=-1;
- struct ieee80211_device* ieee = priv->ieee80211;
- switch (channel_plan)
- {
+ int i, max_chan = -1, min_chan = -1;
+ struct ieee80211_device *ieee = priv->ieee80211;
+ switch (channel_plan) {
case COUNTRY_CODE_FCC:
case COUNTRY_CODE_IC:
case COUNTRY_CODE_ETSI:
@@ -200,22 +202,21 @@ static void rtl819x_set_channel_map(u8 channel_plan, struct r8192_priv* priv)
case COUNTRY_CODE_MKK1:
case COUNTRY_CODE_ISRAEL:
case COUNTRY_CODE_TELEC:
- case COUNTRY_CODE_MIC:
+ case COUNTRY_CODE_MIC:
Dot11d_Init(ieee);
ieee->bGlobalDomain = false;
//actually 8225 & 8256 rf chips only support B,G,24N mode
if ((priv->rf_chip == RF_8225) || (priv->rf_chip == RF_8256)) {
min_chan = 1;
max_chan = 14;
- }
- else {
- RT_TRACE(COMP_ERR, "unknown rf chip, can't set channel map in function:%s()\n", __FUNCTION__);
+ } else {
+ RT_TRACE(COMP_ERR, "unknown rf chip, can't set channel map in function:%s()\n", __func__);
}
if (ChannelPlan[channel_plan].Len != 0) {
// Clear old channel map
memset(GET_DOT11D_INFO(ieee)->channel_map, 0, sizeof(GET_DOT11D_INFO(ieee)->channel_map));
// Set new channel map
- for (i=0;i<ChannelPlan[channel_plan].Len;i++) {
+ for (i = 0; i < ChannelPlan[channel_plan].Len; i++) {
if (ChannelPlan[channel_plan].Channel[i] < min_chan || ChannelPlan[channel_plan].Channel[i] > max_chan)
break;
GET_DOT11D_INFO(ieee)->channel_map[ChannelPlan[channel_plan].Channel[i]] = 1;
@@ -228,19 +229,13 @@ static void rtl819x_set_channel_map(u8 channel_plan, struct r8192_priv* priv)
Dot11d_Reset(ieee);
ieee->bGlobalDomain = true;
break;
-
+
default:
break;
}
}
-#define rx_hal_is_cck_rate(_pdrvinfo)\
- (_pdrvinfo->RxRate == DESC90_RATE1M ||\
- _pdrvinfo->RxRate == DESC90_RATE2M ||\
- _pdrvinfo->RxRate == DESC90_RATE5_5M ||\
- _pdrvinfo->RxRate == DESC90_RATE11M) &&\
- !_pdrvinfo->RxHT\
void CamResetAllEntry(struct net_device *dev)
@@ -249,12 +244,6 @@ void CamResetAllEntry(struct net_device *dev)
//2004/02/11 In static WEP, OID_ADD_KEY or OID_ADD_WEP are set before STA associate to AP.
// However, ResetKey is called on OID_802_11_INFRASTRUCTURE_MODE and MlmeAssociateRequest
// In this condition, Cam can not be reset because upper layer will not set this static key again.
- //if(Adapter->EncAlgorithm == WEP_Encryption)
- // return;
-//debug
- //DbgPrint("========================================\n");
- //DbgPrint(" Call ResetAllEntry \n");
- //DbgPrint("========================================\n\n");
ulcommand |= BIT31|BIT30;
write_nic_dword(dev, RWCAM, ulcommand);
@@ -264,13 +253,16 @@ void CamResetAllEntry(struct net_device *dev)
void write_cam(struct net_device *dev, u8 addr, u32 data)
{
write_nic_dword(dev, WCAMI, data);
- write_nic_dword(dev, RWCAM, BIT31|BIT16|(addr&0xff) );
+ write_nic_dword(dev, RWCAM, BIT31|BIT16|(addr&0xff));
}
u32 read_cam(struct net_device *dev, u8 addr)
{
- write_nic_dword(dev, RWCAM, 0x80000000|(addr&0xff) );
- return read_nic_dword(dev, 0xa8);
+ u32 data;
+
+ write_nic_dword(dev, RWCAM, 0x80000000|(addr&0xff));
+ read_nic_dword(dev, 0xa8, &data);
+ return data;
}
void write_nic_byte_E(struct net_device *dev, int indx, u8 data)
@@ -280,32 +272,29 @@ void write_nic_byte_E(struct net_device *dev, int indx, u8 data)
struct usb_device *udev = priv->udev;
status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
- RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE,
- indx|0xfe00, 0, &data, 1, HZ / 2);
+ RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE,
+ indx|0xfe00, 0, &data, 1, HZ / 2);
if (status < 0)
- {
- printk("write_nic_byte_E TimeOut! status:%d\n", status);
- }
+ netdev_err(dev, "write_nic_byte_E TimeOut! status: %d\n", status);
}
-u8 read_nic_byte_E(struct net_device *dev, int indx)
+int read_nic_byte_E(struct net_device *dev, int indx, u8 *data)
{
int status;
- u8 data;
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
struct usb_device *udev = priv->udev;
status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
- RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
- indx|0xfe00, 0, &data, 1, HZ / 2);
+ RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
+ indx|0xfe00, 0, data, 1, HZ / 2);
- if (status < 0)
- {
- printk("read_nic_byte_E TimeOut! status:%d\n", status);
+ if (status < 0) {
+ netdev_err(dev, "%s failure status: %d\n", __func__, status);
+ return status;
}
- return data;
+ return 0;
}
//as 92U has extend page from 4 to 16, so modify functions below.
void write_nic_byte(struct net_device *dev, int indx, u8 data)
@@ -316,13 +305,11 @@ void write_nic_byte(struct net_device *dev, int indx, u8 data)
struct usb_device *udev = priv->udev;
status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
- RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE,
- (indx&0xff)|0xff00, (indx>>8)&0x0f, &data, 1, HZ / 2);
+ RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE,
+ (indx&0xff)|0xff00, (indx>>8)&0x0f, &data, 1, HZ / 2);
if (status < 0)
- {
- printk("write_nic_byte TimeOut! status:%d\n", status);
- }
+ netdev_err(dev, "write_nic_byte TimeOut! status: %d\n", status);
}
@@ -337,13 +324,11 @@ void write_nic_word(struct net_device *dev, int indx, u16 data)
struct usb_device *udev = priv->udev;
status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
- RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE,
- (indx&0xff)|0xff00, (indx>>8)&0x0f, &data, 2, HZ / 2);
+ RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE,
+ (indx&0xff)|0xff00, (indx>>8)&0x0f, &data, 2, HZ / 2);
if (status < 0)
- {
- printk("write_nic_word TimeOut! status:%d\n", status);
- }
+ netdev_err(dev, "write_nic_word TimeOut! status: %d\n", status);
}
@@ -357,98 +342,92 @@ void write_nic_dword(struct net_device *dev, int indx, u32 data)
struct usb_device *udev = priv->udev;
status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
- RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE,
- (indx&0xff)|0xff00, (indx>>8)&0x0f, &data, 4, HZ / 2);
+ RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE,
+ (indx&0xff)|0xff00, (indx>>8)&0x0f, &data, 4, HZ / 2);
if (status < 0)
- {
- printk("write_nic_dword TimeOut! status:%d\n", status);
- }
+ netdev_err(dev, "write_nic_dword TimeOut! status: %d\n", status);
}
-u8 read_nic_byte(struct net_device *dev, int indx)
+int read_nic_byte(struct net_device *dev, int indx, u8 *data)
{
- u8 data;
int status;
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
struct usb_device *udev = priv->udev;
status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
- RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
- (indx&0xff)|0xff00, (indx>>8)&0x0f, &data, 1, HZ / 2);
+ RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
+ (indx&0xff)|0xff00, (indx>>8)&0x0f, data, 1, HZ / 2);
- if (status < 0)
- {
- printk("read_nic_byte TimeOut! status:%d\n", status);
+ if (status < 0) {
+ netdev_err(dev, "%s failure status: %d\n", __func__, status);
+ return status;
}
- return data;
+ return 0;
}
-u16 read_nic_word(struct net_device *dev, int indx)
+int read_nic_word(struct net_device *dev, int indx, u16 *data)
{
- u16 data;
int status;
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
struct usb_device *udev = priv->udev;
status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
- RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
- (indx&0xff)|0xff00, (indx>>8)&0x0f,
- &data, 2, HZ / 2);
+ RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
+ (indx&0xff)|0xff00, (indx>>8)&0x0f,
+ data, 2, HZ / 2);
- if (status < 0)
- printk("read_nic_word TimeOut! status:%d\n", status);
+ if (status < 0) {
+ netdev_err(dev, "%s failure status: %d\n", __func__, status);
+ return status;
+ }
- return data;
+ return 0;
}
-u16 read_nic_word_E(struct net_device *dev, int indx)
+int read_nic_word_E(struct net_device *dev, int indx, u16 *data)
{
- u16 data;
int status;
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
struct usb_device *udev = priv->udev;
status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
- RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
- indx|0xfe00, 0, &data, 2, HZ / 2);
+ RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
+ indx|0xfe00, 0, data, 2, HZ / 2);
- if (status < 0)
- printk("read_nic_word TimeOut! status:%d\n", status);
+ if (status < 0) {
+ netdev_err(dev, "%s failure status: %d\n", __func__, status);
+ return status;
+ }
- return data;
+ return 0;
}
-u32 read_nic_dword(struct net_device *dev, int indx)
+int read_nic_dword(struct net_device *dev, int indx, u32 *data)
{
- u32 data;
int status;
- /* int result; */
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
struct usb_device *udev = priv->udev;
status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
- RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
- (indx&0xff)|0xff00, (indx>>8)&0x0f,
- &data, 4, HZ / 2);
- /* if(0 != result) {
- * printk(KERN_WARNING "read size of data = %d\, date = %d\n",
- * result, data);
- * }
- */
+ RTL8187_REQ_GET_REGS, RTL8187_REQT_READ,
+ (indx&0xff)|0xff00, (indx>>8)&0x0f,
+ data, 4, HZ / 2);
- if (status < 0)
- printk("read_nic_dword TimeOut! status:%d\n", status);
+ if (status < 0) {
+ netdev_err(dev, "%s failure status: %d\n", __func__, status);
+ return status;
+ }
- return data;
+ return 0;
}
/* u8 read_phy_cck(struct net_device *dev, u8 adr); */
@@ -462,9 +441,7 @@ inline void force_pci_posting(struct net_device *dev)
static struct net_device_stats *rtl8192_stats(struct net_device *dev);
void rtl8192_commit(struct net_device *dev);
-/* void rtl8192_restart(struct net_device *dev); */
void rtl8192_restart(struct work_struct *work);
-/* void rtl8192_rq_tx_ack(struct work_struct *work); */
void watch_dog_timer_callback(unsigned long data);
/****************************************************************************
@@ -495,40 +472,38 @@ static int proc_get_stats_ap(struct seq_file *m, void *v)
static int proc_get_registers(struct seq_file *m, void *v)
{
struct net_device *dev = m->private;
- int i,n, max = 0xff;
+ int i, n, max = 0xff;
+ u8 byte_rd;
seq_puts(m, "\n####################page 0##################\n ");
- for (n=0;n<=max;) {
- //printk( "\nD: %2x> ", n);
- seq_printf(m, "\nD: %2x > ",n);
-
- for (i=0;i<16 && n<=max;i++,n++)
- seq_printf(m, "%2x ",read_nic_byte(dev,0x000|n));
+ for (n = 0; n <= max;) {
+ seq_printf(m, "\nD: %2x > ", n);
- // printk("%2x ",read_nic_byte(dev,n));
+ for (i = 0; i < 16 && n <= max; i++, n++) {
+ read_nic_byte(dev, 0x000|n, &byte_rd);
+ seq_printf(m, "%2x ", byte_rd);
+ }
}
seq_puts(m, "\n####################page 1##################\n ");
- for (n=0;n<=max;) {
- //printk( "\nD: %2x> ", n);
- seq_printf(m, "\nD: %2x > ",n);
+ for (n = 0; n <= max;) {
+ seq_printf(m, "\nD: %2x > ", n);
- for (i=0;i<16 && n<=max;i++,n++)
- seq_printf(m, "%2x ",read_nic_byte(dev,0x100|n));
-
- // printk("%2x ",read_nic_byte(dev,n));
+ for (i = 0; i < 16 && n <= max; i++, n++) {
+ read_nic_byte(dev, 0x100|n, &byte_rd);
+ seq_printf(m, "%2x ", byte_rd);
+ }
}
seq_puts(m, "\n####################page 3##################\n ");
- for (n=0;n<=max;) {
- //printk( "\nD: %2x> ", n);
- seq_printf(m, "\nD: %2x > ",n);
+ for (n = 0; n <= max;) {
+ seq_printf(m, "\nD: %2x > ", n);
- for(i=0;i<16 && n<=max;i++,n++)
- seq_printf(m, "%2x ",read_nic_byte(dev,0x300|n));
-
- // printk("%2x ",read_nic_byte(dev,n));
+ for (i = 0; i < 16 && n <= max; i++, n++) {
+ read_nic_byte(dev, 0x300|n, &byte_rd);
+ seq_printf(m, "%2x ", byte_rd);
+ }
}
seq_putc(m, '\n');
@@ -541,64 +516,54 @@ static int proc_get_stats_tx(struct seq_file *m, void *v)
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
seq_printf(m,
- "TX VI priority ok int: %lu\n"
- "TX VI priority error int: %lu\n"
- "TX VO priority ok int: %lu\n"
- "TX VO priority error int: %lu\n"
- "TX BE priority ok int: %lu\n"
- "TX BE priority error int: %lu\n"
- "TX BK priority ok int: %lu\n"
- "TX BK priority error int: %lu\n"
- "TX MANAGE priority ok int: %lu\n"
- "TX MANAGE priority error int: %lu\n"
- "TX BEACON priority ok int: %lu\n"
- "TX BEACON priority error int: %lu\n"
-// "TX high priority ok int: %lu\n"
-// "TX high priority failed error int: %lu\n"
- "TX queue resume: %lu\n"
- "TX queue stopped?: %d\n"
- "TX fifo overflow: %lu\n"
-// "TX beacon: %lu\n"
- "TX VI queue: %d\n"
- "TX VO queue: %d\n"
- "TX BE queue: %d\n"
- "TX BK queue: %d\n"
-// "TX HW queue: %d\n"
- "TX VI dropped: %lu\n"
- "TX VO dropped: %lu\n"
- "TX BE dropped: %lu\n"
- "TX BK dropped: %lu\n"
- "TX total data packets %lu\n",
-// "TX beacon aborted: %lu\n",
- priv->stats.txviokint,
- priv->stats.txvierr,
- priv->stats.txvookint,
- priv->stats.txvoerr,
- priv->stats.txbeokint,
- priv->stats.txbeerr,
- priv->stats.txbkokint,
- priv->stats.txbkerr,
- priv->stats.txmanageokint,
- priv->stats.txmanageerr,
- priv->stats.txbeaconokint,
- priv->stats.txbeaconerr,
-// priv->stats.txhpokint,
-// priv->stats.txhperr,
- priv->stats.txresumed,
- netif_queue_stopped(dev),
- priv->stats.txoverflow,
-// priv->stats.txbeacon,
- atomic_read(&(priv->tx_pending[VI_PRIORITY])),
- atomic_read(&(priv->tx_pending[VO_PRIORITY])),
- atomic_read(&(priv->tx_pending[BE_PRIORITY])),
- atomic_read(&(priv->tx_pending[BK_PRIORITY])),
-// read_nic_byte(dev, TXFIFOCOUNT),
- priv->stats.txvidrop,
- priv->stats.txvodrop,
- priv->stats.txbedrop,
- priv->stats.txbkdrop,
- priv->stats.txdatapkt
-// priv->stats.txbeaconerr
+ "TX VI priority ok int: %lu\n"
+ "TX VI priority error int: %lu\n"
+ "TX VO priority ok int: %lu\n"
+ "TX VO priority error int: %lu\n"
+ "TX BE priority ok int: %lu\n"
+ "TX BE priority error int: %lu\n"
+ "TX BK priority ok int: %lu\n"
+ "TX BK priority error int: %lu\n"
+ "TX MANAGE priority ok int: %lu\n"
+ "TX MANAGE priority error int: %lu\n"
+ "TX BEACON priority ok int: %lu\n"
+ "TX BEACON priority error int: %lu\n"
+ "TX queue resume: %lu\n"
+ "TX queue stopped?: %d\n"
+ "TX fifo overflow: %lu\n"
+ "TX VI queue: %d\n"
+ "TX VO queue: %d\n"
+ "TX BE queue: %d\n"
+ "TX BK queue: %d\n"
+ "TX VI dropped: %lu\n"
+ "TX VO dropped: %lu\n"
+ "TX BE dropped: %lu\n"
+ "TX BK dropped: %lu\n"
+ "TX total data packets %lu\n",
+ priv->stats.txviokint,
+ priv->stats.txvierr,
+ priv->stats.txvookint,
+ priv->stats.txvoerr,
+ priv->stats.txbeokint,
+ priv->stats.txbeerr,
+ priv->stats.txbkokint,
+ priv->stats.txbkerr,
+ priv->stats.txmanageokint,
+ priv->stats.txmanageerr,
+ priv->stats.txbeaconokint,
+ priv->stats.txbeaconerr,
+ priv->stats.txresumed,
+ netif_queue_stopped(dev),
+ priv->stats.txoverflow,
+ atomic_read(&(priv->tx_pending[VI_PRIORITY])),
+ atomic_read(&(priv->tx_pending[VO_PRIORITY])),
+ atomic_read(&(priv->tx_pending[BE_PRIORITY])),
+ atomic_read(&(priv->tx_pending[BK_PRIORITY])),
+ priv->stats.txvidrop,
+ priv->stats.txvodrop,
+ priv->stats.txbedrop,
+ priv->stats.txbkdrop,
+ priv->stats.txdatapkt
);
return 0;
@@ -610,12 +575,12 @@ static int proc_get_stats_rx(struct seq_file *m, void *v)
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
seq_printf(m,
- "RX packets: %lu\n"
- "RX urb status error: %lu\n"
- "RX invalid urb error: %lu\n",
- priv->stats.rxoktotal,
- priv->stats.rxstaterr,
- priv->stats.rxurberr);
+ "RX packets: %lu\n"
+ "RX urb status error: %lu\n"
+ "RX invalid urb error: %lu\n",
+ priv->stats.rxoktotal,
+ priv->stats.rxstaterr,
+ priv->stats.rxurberr);
return 0;
}
@@ -700,27 +665,7 @@ void rtl8192_proc_remove_one(struct net_device *dev)
-----------------------------MISC STUFF-------------------------
*****************************************************************************/
-/* this is only for debugging */
-void print_buffer(u32 *buffer, int len)
-{
- int i;
- u8 *buf =(u8*)buffer;
-
- printk("ASCII BUFFER DUMP (len: %x):\n",len);
-
- for(i=0;i<len;i++)
- printk("%c",buf[i]);
-
- printk("\nBINARY BUFFER DUMP (len: %x):\n",len);
-
- for(i=0;i<len;i++)
- printk("%x",buf[i]);
-
- printk("\n");
-}
-
-//short check_nic_enough_desc(struct net_device *dev, priority_t priority)
-short check_nic_enough_desc(struct net_device *dev,int queue_index)
+short check_nic_enough_desc(struct net_device *dev, int queue_index)
{
struct r8192_priv *priv = ieee80211_priv(dev);
int used = atomic_read(&priv->tx_pending[queue_index]);
@@ -731,10 +676,8 @@ short check_nic_enough_desc(struct net_device *dev,int queue_index)
void tx_timeout(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- //rtl8192_commit(dev);
schedule_work(&priv->reset_wq);
- //DMESG("TXTIMEOUT");
}
@@ -742,41 +685,24 @@ void tx_timeout(struct net_device *dev)
void dump_eprom(struct net_device *dev)
{
int i;
- for(i=0; i<63; i++)
- RT_TRACE(COMP_EPROM, "EEPROM addr %x : %x", i, eprom_read(dev,i));
+ for (i = 0; i < 63; i++)
+ RT_TRACE(COMP_EPROM, "EEPROM addr %x : %x", i, eprom_read(dev, i));
}
-/* this is only for debug */
-void rtl8192_dump_reg(struct net_device *dev)
-{
- int i;
- int n;
- int max=0x1ff;
-
- RT_TRACE(COMP_PHY, "Dumping NIC register map");
-
- for(n=0;n<=max;)
- {
- printk( "\nD: %2x> ", n);
- for(i=0;i<16 && n<=max;i++,n++)
- printk("%2x ",read_nic_byte(dev,n));
- }
- printk("\n");
-}
/****************************************************************************
------------------------------HW STUFF---------------------------
*****************************************************************************/
-void rtl8192_set_mode(struct net_device *dev,int mode)
+void rtl8192_set_mode(struct net_device *dev, int mode)
{
u8 ecmd;
- ecmd=read_nic_byte(dev, EPROM_CMD);
- ecmd=ecmd &~ EPROM_CMD_OPERATING_MODE_MASK;
- ecmd=ecmd | (mode<<EPROM_CMD_OPERATING_MODE_SHIFT);
- ecmd=ecmd &~ (1<<EPROM_CS_SHIFT);
- ecmd=ecmd &~ (1<<EPROM_CK_SHIFT);
+ read_nic_byte(dev, EPROM_CMD, &ecmd);
+ ecmd = ecmd & ~EPROM_CMD_OPERATING_MODE_MASK;
+ ecmd = ecmd | (mode<<EPROM_CMD_OPERATING_MODE_SHIFT);
+ ecmd = ecmd & ~EPROM_CS_BIT;
+ ecmd = ecmd & ~EPROM_CK_BIT;
write_nic_byte(dev, EPROM_CMD, ecmd);
}
@@ -786,15 +712,15 @@ void rtl8192_update_msr(struct net_device *dev)
struct r8192_priv *priv = ieee80211_priv(dev);
u8 msr;
- msr = read_nic_byte(dev, MSR);
- msr &= ~ MSR_LINK_MASK;
+ read_nic_byte(dev, MSR, &msr);
+ msr &= ~MSR_LINK_MASK;
/* do not change in link_state != WLAN_LINK_ASSOCIATED.
* msr must be updated if the state is ASSOCIATING.
* this is intentional and make sense for ad-hoc and
* master (see the create BSS/IBSS func)
*/
- if (priv->ieee80211->state == IEEE80211_LINKED){
+ if (priv->ieee80211->state == IEEE80211_LINKED) {
if (priv->ieee80211->iw_mode == IW_MODE_INFRA)
msr |= (MSR_LINK_MANAGED<<MSR_LINK_SHIFT);
@@ -803,39 +729,31 @@ void rtl8192_update_msr(struct net_device *dev)
else if (priv->ieee80211->iw_mode == IW_MODE_MASTER)
msr |= (MSR_LINK_MASTER<<MSR_LINK_SHIFT);
- }else
+ } else {
msr |= (MSR_LINK_NONE<<MSR_LINK_SHIFT);
+ }
write_nic_byte(dev, MSR, msr);
}
-void rtl8192_set_chan(struct net_device *dev,short ch)
+void rtl8192_set_chan(struct net_device *dev, short ch)
{
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
-// u32 tx;
- RT_TRACE(COMP_CH, "=====>%s()====ch:%d\n", __FUNCTION__, ch);
- priv->chan=ch;
+ RT_TRACE(COMP_CH, "=====>%s()====ch:%d\n", __func__, ch);
+ priv->chan = ch;
/* this hack should avoid frame TX during channel setting*/
-
-// tx = read_nic_dword(dev,TX_CONF);
-// tx &= ~TX_LOOPBACK_MASK;
-
#ifndef LOOP_TEST
-// write_nic_dword(dev,TX_CONF, tx |( TX_LOOPBACK_MAC<<TX_LOOPBACK_SHIFT));
-
//need to implement rf set channel here WB
if (priv->rf_set_chan)
- priv->rf_set_chan(dev,priv->chan);
+ priv->rf_set_chan(dev, priv->chan);
mdelay(10);
-// write_nic_dword(dev,TX_CONF,tx | (TX_LOOPBACK_NONE<<TX_LOOPBACK_SHIFT));
#endif
}
static void rtl8192_rx_isr(struct urb *urb);
-//static void rtl8192_rx_isr(struct urb *rx_urb);
u32 get_rxpacket_shiftbytes_819xusb(struct ieee80211_rx_stats *pstats)
{
@@ -847,10 +765,10 @@ u32 get_rxpacket_shiftbytes_819xusb(struct ieee80211_rx_stats *pstats)
else
#endif
return (sizeof(rx_desc_819x_usb) + pstats->RxDrvInfoSize
- + pstats->RxBufShift);
+ + pstats->RxBufShift);
}
-static int rtl8192_rx_initiate(struct net_device*dev)
+static int rtl8192_rx_initiate(struct net_device *dev)
{
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
struct urb *entry;
@@ -867,7 +785,6 @@ static int rtl8192_rx_initiate(struct net_device*dev)
kfree_skb(skb);
break;
}
-// printk("nomal packet IN request!\n");
usb_fill_bulk_urb(entry, priv->udev,
usb_rcvbulkpipe(priv->udev, 3), skb_tail_pointer(skb),
RX_URB_SIZE, rtl8192_rx_isr, skb);
@@ -881,8 +798,7 @@ static int rtl8192_rx_initiate(struct net_device*dev)
/* command packet rx procedure */
while (skb_queue_len(&priv->rx_queue) < MAX_RX_URB + 3) {
-// printk("command packet IN request!\n");
- skb = __dev_alloc_skb(RX_URB_SIZE ,GFP_KERNEL);
+ skb = __dev_alloc_skb(RX_URB_SIZE, GFP_KERNEL);
if (!skb)
break;
entry = usb_alloc_urb(0, GFP_KERNEL);
@@ -896,7 +812,7 @@ static int rtl8192_rx_initiate(struct net_device*dev)
info = (struct rtl8192_rx_info *) skb->cb;
info->urb = entry;
info->dev = dev;
- info->out_pipe = 9; //denote rx cmd packet queue
+ info->out_pipe = 9; //denote rx cmd packet queue
skb_queue_tail(&priv->rx_queue, skb);
usb_submit_urb(entry, GFP_KERNEL);
}
@@ -909,64 +825,47 @@ void rtl8192_set_rxconf(struct net_device *dev)
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
u32 rxconf;
- rxconf=read_nic_dword(dev,RCR);
- rxconf = rxconf &~ MAC_FILTER_MASK;
+ read_nic_dword(dev, RCR, &rxconf);
+ rxconf = rxconf & ~MAC_FILTER_MASK;
rxconf = rxconf | RCR_AMF;
rxconf = rxconf | RCR_ADF;
rxconf = rxconf | RCR_AB;
rxconf = rxconf | RCR_AM;
- //rxconf = rxconf | RCR_ACF;
- if (dev->flags & IFF_PROMISC) {DMESG ("NIC in promisc mode");}
+ if (dev->flags & IFF_PROMISC)
+ DMESG("NIC in promisc mode");
- if(priv->ieee80211->iw_mode == IW_MODE_MONITOR || \
- dev->flags & IFF_PROMISC){
+ if (priv->ieee80211->iw_mode == IW_MODE_MONITOR ||
+ dev->flags & IFF_PROMISC) {
rxconf = rxconf | RCR_AAP;
- } /*else if(priv->ieee80211->iw_mode == IW_MODE_MASTER){
- rxconf = rxconf | (1<<ACCEPT_ALLMAC_FRAME_SHIFT);
- rxconf = rxconf | (1<<RX_CHECK_BSSID_SHIFT);
- }*/else{
+ } else {
rxconf = rxconf | RCR_APM;
rxconf = rxconf | RCR_CBSSID;
}
- if(priv->ieee80211->iw_mode == IW_MODE_MONITOR){
+ if (priv->ieee80211->iw_mode == IW_MODE_MONITOR) {
rxconf = rxconf | RCR_AICV;
rxconf = rxconf | RCR_APWRMGT;
}
- if( priv->crcmon == 1 && priv->ieee80211->iw_mode == IW_MODE_MONITOR)
+ if (priv->crcmon == 1 && priv->ieee80211->iw_mode == IW_MODE_MONITOR)
rxconf = rxconf | RCR_ACRC32;
- rxconf = rxconf &~ RX_FIFO_THRESHOLD_MASK;
+ rxconf = rxconf & ~RX_FIFO_THRESHOLD_MASK;
rxconf = rxconf | (RX_FIFO_THRESHOLD_NONE<<RX_FIFO_THRESHOLD_SHIFT);
- rxconf = rxconf &~ MAX_RX_DMA_MASK;
+ rxconf = rxconf & ~MAX_RX_DMA_MASK;
rxconf = rxconf | ((u32)7<<RCR_MXDMA_OFFSET);
-// rxconf = rxconf | (1<<RX_AUTORESETPHY_SHIFT);
rxconf = rxconf | RCR_ONLYERLPKT;
-// rxconf = rxconf &~ RCR_CS_MASK;
-// rxconf = rxconf | (1<<RCR_CS_SHIFT);
-
write_nic_dword(dev, RCR, rxconf);
-
- #ifdef DEBUG_RX
- DMESG("rxconf: %x %x",rxconf ,read_nic_dword(dev,RCR));
- #endif
}
//wait to be removed
void rtl8192_rx_enable(struct net_device *dev)
{
- //u8 cmd;
-
- //struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
-
rtl8192_rx_initiate(dev);
-
-// rtl8192_set_rxconf(dev);
}
@@ -983,9 +882,8 @@ void rtl8192_rtx_disable(struct net_device *dev)
struct sk_buff *skb;
struct rtl8192_rx_info *info;
- cmd=read_nic_byte(dev,CMDR);
- write_nic_byte(dev, CMDR, cmd &~ \
- (CR_TE|CR_RE));
+ read_nic_byte(dev, CMDR, &cmd);
+ write_nic_byte(dev, CMDR, cmd & ~(CR_TE|CR_RE));
force_pci_posting(dev);
mdelay(10);
@@ -998,9 +896,8 @@ void rtl8192_rtx_disable(struct net_device *dev)
kfree_skb(skb);
}
- if (skb_queue_len(&priv->skb_queue)) {
- printk(KERN_WARNING "skb_queue not empty\n");
- }
+ if (skb_queue_len(&priv->skb_queue))
+ netdev_warn(dev, "skb_queue not empty\n");
skb_queue_purge(&priv->skb_queue);
return;
@@ -1014,40 +911,40 @@ int alloc_tx_beacon_desc_ring(struct net_device *dev, int count)
inline u16 ieeerate2rtlrate(int rate)
{
- switch(rate){
+ switch (rate) {
case 10:
- return 0;
+ return 0;
case 20:
- return 1;
+ return 1;
case 55:
- return 2;
+ return 2;
case 110:
- return 3;
+ return 3;
case 60:
- return 4;
+ return 4;
case 90:
- return 5;
+ return 5;
case 120:
- return 6;
+ return 6;
case 180:
- return 7;
+ return 7;
case 240:
- return 8;
+ return 8;
case 360:
- return 9;
+ return 9;
case 480:
- return 10;
+ return 10;
case 540:
- return 11;
+ return 11;
default:
- return 3;
+ return 3;
}
}
-static u16 rtl_rate[] = {10,20,55,110,60,90,120,180,240,360,480,540};
+static u16 rtl_rate[] = {10, 20, 55, 110, 60, 90, 120, 180, 240, 360, 480, 540};
inline u16 rtl8192_rate2rate(short rate)
{
- if (rate >11) return 0;
+ if (rate > 11) return 0;
return rtl_rate[rate];
}
@@ -1061,14 +958,13 @@ static void rtl8192_rx_isr(struct urb *urb)
struct r8192_priv *priv = ieee80211_priv(dev);
int out_pipe = info->out_pipe;
int err;
- if(!priv->up)
+ if (!priv->up)
return;
if (unlikely(urb->status)) {
info->urb = NULL;
priv->stats.rxstaterr++;
priv->ieee80211->stats.rx_errors++;
usb_free_urb(urb);
- // printk("%s():rx status err\n",__FUNCTION__);
return;
}
skb_unlink(skb, &priv->rx_queue);
@@ -1080,14 +976,14 @@ static void rtl8192_rx_isr(struct urb *urb)
skb = dev_alloc_skb(RX_URB_SIZE);
if (unlikely(!skb)) {
usb_free_urb(urb);
- printk("%s():can,t alloc skb\n",__FUNCTION__);
+ netdev_err(dev, "%s(): can't alloc skb\n", __func__);
/* TODO check rx queue length and refill *somewhere* */
return;
}
usb_fill_bulk_urb(urb, priv->udev,
- usb_rcvbulkpipe(priv->udev, out_pipe), skb_tail_pointer(skb),
- RX_URB_SIZE, rtl8192_rx_isr, skb);
+ usb_rcvbulkpipe(priv->udev, out_pipe), skb_tail_pointer(skb),
+ RX_URB_SIZE, rtl8192_rx_isr, skb);
info = (struct rtl8192_rx_info *) skb->cb;
info->urb = urb;
@@ -1098,31 +994,19 @@ static void rtl8192_rx_isr(struct urb *urb)
urb->context = skb;
skb_queue_tail(&priv->rx_queue, skb);
err = usb_submit_urb(urb, GFP_ATOMIC);
- if(err && err != EPERM)
- printk("can not submit rxurb, err is %x,URB status is %x\n",err,urb->status);
+ if (err && err != EPERM)
+ netdev_err(dev, "can not submit rxurb, err is %x, URB status is %x\n", err, urb->status);
}
-u32
-rtl819xusb_rx_command_packet(
- struct net_device *dev,
- struct ieee80211_rx_stats *pstats
- )
+u32 rtl819xusb_rx_command_packet(struct net_device *dev,
+ struct ieee80211_rx_stats *pstats)
{
u32 status;
- //RT_TRACE(COMP_RECV, DBG_TRACE, ("---> RxCommandPacketHandle819xUsb()\n"));
-
status = cmpk_message_handle_rx(dev, pstats);
if (status)
- {
DMESG("rxcommandpackethandle819xusb: It is a command packet\n");
- }
- else
- {
- //RT_TRACE(COMP_RECV, DBG_TRACE, ("RxCommandPacketHandle819xUsb: It is not a command packet\n"));
- }
- //RT_TRACE(COMP_RECV, DBG_TRACE, ("<--- RxCommandPacketHandle819xUsb()\n"));
return status;
}
@@ -1150,24 +1034,17 @@ void rtl8192_hard_data_xmit(struct sk_buff *skb, struct net_device *dev, int rat
u8 queue_index = tcb_desc->queue_index;
/* shall not be referred by command packet */
- assert(queue_index != TXCMD_QUEUE);
+ RTL8192U_ASSERT(queue_index != TXCMD_QUEUE);
- spin_lock_irqsave(&priv->tx_lock,flags);
+ spin_lock_irqsave(&priv->tx_lock, flags);
- memcpy((unsigned char *)(skb->cb),&dev,sizeof(dev));
-// tcb_desc->RATRIndex = 7;
-// tcb_desc->bTxDisableRateFallBack = 1;
-// tcb_desc->bTxUseDriverAssingedRate = 1;
+ memcpy((unsigned char *)(skb->cb), &dev, sizeof(dev));
tcb_desc->bTxEnableFwCalcDur = 1;
skb_push(skb, priv->ieee80211->tx_headroom);
ret = rtl8192_tx(dev, skb);
- //priv->ieee80211->stats.tx_bytes+=(skb->len - priv->ieee80211->tx_headroom);
- //priv->ieee80211->stats.tx_packets++;
-
- spin_unlock_irqrestore(&priv->tx_lock,flags);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
-// return ret;
return;
}
@@ -1176,7 +1053,7 @@ void rtl8192_hard_data_xmit(struct sk_buff *skb, struct net_device *dev, int rat
* If the ring is full packet are dropped (for data frame the queue
* is stopped before this can happen).
*/
-int rtl8192_hard_start_xmit(struct sk_buff *skb,struct net_device *dev)
+int rtl8192_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
int ret;
@@ -1185,21 +1062,21 @@ int rtl8192_hard_start_xmit(struct sk_buff *skb,struct net_device *dev)
u8 queue_index = tcb_desc->queue_index;
- spin_lock_irqsave(&priv->tx_lock,flags);
+ spin_lock_irqsave(&priv->tx_lock, flags);
- memcpy((unsigned char *)(skb->cb),&dev,sizeof(dev));
- if(queue_index == TXCMD_QUEUE) {
+ memcpy((unsigned char *)(skb->cb), &dev, sizeof(dev));
+ if (queue_index == TXCMD_QUEUE) {
skb_push(skb, USB_HWDESC_HEADER_LEN);
rtl819xU_tx_cmd(dev, skb);
ret = 1;
- spin_unlock_irqrestore(&priv->tx_lock,flags);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
return ret;
} else {
skb_push(skb, priv->ieee80211->tx_headroom);
ret = rtl8192_tx(dev, skb);
}
- spin_unlock_irqrestore(&priv->tx_lock,flags);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
return ret;
}
@@ -1211,7 +1088,7 @@ void rtl8192_try_wake_queue(struct net_device *dev, int pri);
u16 DrvAggr_PaddingAdd(struct net_device *dev, struct sk_buff *skb)
{
u16 PaddingNum = 256 - ((skb->len + TX_PACKET_DRVAGGR_SUBFRAME_SHIFT_BYTES) % 256);
- return (PaddingNum&0xff);
+ return PaddingNum & 0xff;
}
u8 MRateToHwRate8190Pci(u8 rate);
@@ -1239,7 +1116,7 @@ struct sk_buff *DrvAggr_Aggregation(struct net_device *dev, struct ieee80211_drv
/* Get the total aggregation length including the padding space and
* sub frame header.
*/
- for(i = 1; i < pSendList->nr_drv_agg_frames; i++) {
+ for (i = 1; i < pSendList->nr_drv_agg_frames; i++) {
TotalLength += DrvAggr_PaddingAdd(dev, skb);
skb = pSendList->tx_agg_frames[i];
TotalLength += (skb->len + TX_PACKET_DRVAGGR_SUBFRAME_SHIFT_BYTES);
@@ -1250,23 +1127,19 @@ struct sk_buff *DrvAggr_Aggregation(struct net_device *dev, struct ieee80211_drv
memset(agg_skb->data, 0, agg_skb->len);
skb_reserve(agg_skb, ieee->tx_headroom);
-// RT_DEBUG_DATA(COMP_SEND, skb->cb, sizeof(skb->cb));
/* reserve info for first subframe Tx descriptor to be set in the tx function */
skb = pSendList->tx_agg_frames[0];
tcb_desc = (cb_desc *)(skb->cb + MAX_DEV_ADDR_SIZE);
tcb_desc->drv_agg_enable = 1;
tcb_desc->pkt_size = skb->len;
tcb_desc->DrvAggrNum = pSendList->nr_drv_agg_frames;
- printk("DrvAggNum = %d\n", tcb_desc->DrvAggrNum);
-// RT_DEBUG_DATA(COMP_SEND, skb->cb, sizeof(skb->cb));
-// printk("========>skb->data ======> \n");
-// RT_DEBUG_DATA(COMP_SEND, skb->data, skb->len);
+ netdev_dbg(dev, "DrvAggNum = %d\n", tcb_desc->DrvAggrNum);
memcpy(agg_skb->cb, skb->cb, sizeof(skb->cb));
- memcpy(skb_put(agg_skb,skb->len),skb->data,skb->len);
+ memcpy(skb_put(agg_skb, skb->len), skb->data, skb->len);
- for(i = 1; i < pSendList->nr_drv_agg_frames; i++) {
+ for (i = 1; i < pSendList->nr_drv_agg_frames; i++) {
/* push the next sub frame to be 256 byte aline */
- skb_put(agg_skb,DrvAggr_PaddingAdd(dev,skb));
+ skb_put(agg_skb, DrvAggr_PaddingAdd(dev, skb));
/* Subframe drv Tx descriptor and firmware info setting */
skb = pSendList->tx_agg_frames[i];
@@ -1274,13 +1147,13 @@ struct sk_buff *DrvAggr_Aggregation(struct net_device *dev, struct ieee80211_drv
tx_agg_desc = (tx_desc_819x_usb_aggr_subframe *)agg_skb->tail;
tx_fwinfo = (tx_fwinfo_819x_usb *)(agg_skb->tail + sizeof(tx_desc_819x_usb_aggr_subframe));
- memset(tx_fwinfo,0,sizeof(tx_fwinfo_819x_usb));
+ memset(tx_fwinfo, 0, sizeof(tx_fwinfo_819x_usb));
/* DWORD 0 */
- tx_fwinfo->TxHT = (tcb_desc->data_rate&0x80)?1:0;
+ tx_fwinfo->TxHT = (tcb_desc->data_rate&0x80) ? 1 : 0;
tx_fwinfo->TxRate = MRateToHwRate8190Pci(tcb_desc->data_rate);
tx_fwinfo->EnableCPUDur = tcb_desc->bTxEnableFwCalcDur;
tx_fwinfo->Short = QueryIsShort(tx_fwinfo->TxHT, tx_fwinfo->TxRate, tcb_desc);
- if(tcb_desc->bAMPDUEnable) {//AMPDU enabled
+ if (tcb_desc->bAMPDUEnable) {//AMPDU enabled
tx_fwinfo->AllowAggregation = 1;
/* DWORD 1 */
tx_fwinfo->RxMF = tcb_desc->ampdu_factor;
@@ -1293,20 +1166,19 @@ struct sk_buff *DrvAggr_Aggregation(struct net_device *dev, struct ieee80211_drv
}
/* Protection mode related */
- tx_fwinfo->RtsEnable = (tcb_desc->bRTSEnable)?1:0;
- tx_fwinfo->CtsEnable = (tcb_desc->bCTSEnable)?1:0;
- tx_fwinfo->RtsSTBC = (tcb_desc->bRTSSTBC)?1:0;
- tx_fwinfo->RtsHT = (tcb_desc->rts_rate&0x80)?1:0;
+ tx_fwinfo->RtsEnable = (tcb_desc->bRTSEnable) ? 1 : 0;
+ tx_fwinfo->CtsEnable = (tcb_desc->bCTSEnable) ? 1 : 0;
+ tx_fwinfo->RtsSTBC = (tcb_desc->bRTSSTBC) ? 1 : 0;
+ tx_fwinfo->RtsHT = (tcb_desc->rts_rate&0x80) ? 1 : 0;
tx_fwinfo->RtsRate = MRateToHwRate8190Pci((u8)tcb_desc->rts_rate);
- tx_fwinfo->RtsSubcarrier = (tx_fwinfo->RtsHT==0)?(tcb_desc->RTSSC):0;
- tx_fwinfo->RtsBandwidth = (tx_fwinfo->RtsHT==1)?((tcb_desc->bRTSBW)?1:0):0;
- tx_fwinfo->RtsShort = (tx_fwinfo->RtsHT==0)?(tcb_desc->bRTSUseShortPreamble?1:0):\
- (tcb_desc->bRTSUseShortGI?1:0);
+ tx_fwinfo->RtsSubcarrier = (tx_fwinfo->RtsHT == 0) ? (tcb_desc->RTSSC) : 0;
+ tx_fwinfo->RtsBandwidth = (tx_fwinfo->RtsHT == 1) ? ((tcb_desc->bRTSBW) ? 1 : 0) : 0;
+ tx_fwinfo->RtsShort = (tx_fwinfo->RtsHT == 0) ? (tcb_desc->bRTSUseShortPreamble ? 1 : 0) :
+ (tcb_desc->bRTSUseShortGI ? 1 : 0);
/* Set Bandwidth and sub-channel settings. */
- if(priv->CurrentChannelBW == HT_CHANNEL_WIDTH_20_40)
- {
- if(tcb_desc->bPacketBW) {
+ if (priv->CurrentChannelBW == HT_CHANNEL_WIDTH_20_40) {
+ if (tcb_desc->bPacketBW) {
tx_fwinfo->TxBandwidth = 1;
tx_fwinfo->TxSubCarrier = 0; //By SD3's Jerry suggestion, use duplicated mode
} else {
@@ -1321,41 +1193,35 @@ struct sk_buff *DrvAggr_Aggregation(struct net_device *dev, struct ieee80211_drv
/* Fill Tx descriptor */
memset(tx_agg_desc, 0, sizeof(tx_desc_819x_usb_aggr_subframe));
/* DWORD 0 */
- //tx_agg_desc->LINIP = 0;
- //tx_agg_desc->CmdInit = 1;
tx_agg_desc->Offset = sizeof(tx_fwinfo_819x_usb) + 8;
/* already raw data, need not to subtract header length */
tx_agg_desc->PktSize = skb->len & 0xffff;
/*DWORD 1*/
- tx_agg_desc->SecCAMID= 0;
+ tx_agg_desc->SecCAMID = 0;
tx_agg_desc->RATid = tcb_desc->RATRIndex;
- {
- //MPDUOverhead = 0;
- tx_agg_desc->NoEnc = 1;
- }
+ tx_agg_desc->NoEnc = 1;
tx_agg_desc->SecType = 0x0;
if (tcb_desc->bHwSec) {
- switch (priv->ieee80211->pairwise_key_type)
- {
- case KEY_TYPE_WEP40:
- case KEY_TYPE_WEP104:
- tx_agg_desc->SecType = 0x1;
- tx_agg_desc->NoEnc = 0;
- break;
- case KEY_TYPE_TKIP:
- tx_agg_desc->SecType = 0x2;
- tx_agg_desc->NoEnc = 0;
- break;
- case KEY_TYPE_CCMP:
- tx_agg_desc->SecType = 0x3;
- tx_agg_desc->NoEnc = 0;
- break;
- case KEY_TYPE_NA:
- tx_agg_desc->SecType = 0x0;
- tx_agg_desc->NoEnc = 1;
- break;
+ switch (priv->ieee80211->pairwise_key_type) {
+ case KEY_TYPE_WEP40:
+ case KEY_TYPE_WEP104:
+ tx_agg_desc->SecType = 0x1;
+ tx_agg_desc->NoEnc = 0;
+ break;
+ case KEY_TYPE_TKIP:
+ tx_agg_desc->SecType = 0x2;
+ tx_agg_desc->NoEnc = 0;
+ break;
+ case KEY_TYPE_CCMP:
+ tx_agg_desc->SecType = 0x3;
+ tx_agg_desc->NoEnc = 0;
+ break;
+ case KEY_TYPE_NA:
+ tx_agg_desc->SecType = 0x0;
+ tx_agg_desc->NoEnc = 1;
+ break;
}
}
@@ -1369,16 +1235,14 @@ struct sk_buff *DrvAggr_Aggregation(struct net_device *dev, struct ieee80211_drv
//DWORD 2
/* According windows driver, it seems that there no need to fill this field */
- //tx_agg_desc->TxBufferSize= (u32)(skb->len - USB_HWDESC_HEADER_LEN);
/* to fill next packet */
- skb_put(agg_skb,TX_PACKET_DRVAGGR_SUBFRAME_SHIFT_BYTES);
- memcpy(skb_put(agg_skb,skb->len),skb->data,skb->len);
+ skb_put(agg_skb, TX_PACKET_DRVAGGR_SUBFRAME_SHIFT_BYTES);
+ memcpy(skb_put(agg_skb, skb->len), skb->data, skb->len);
}
- for(i = 0; i < pSendList->nr_drv_agg_frames; i++) {
+ for (i = 0; i < pSendList->nr_drv_agg_frames; i++)
dev_kfree_skb_any(pSendList->tx_agg_frames[i]);
- }
return agg_skb;
}
@@ -1388,7 +1252,7 @@ struct sk_buff *DrvAggr_Aggregation(struct net_device *dev, struct ieee80211_drv
If no proper TCB is found to do aggregation, SendList will only contain the input TCB.
*/
u8 DrvAggr_GetAggregatibleList(struct net_device *dev, struct sk_buff *skb,
- struct ieee80211_drv_agg_txb *pSendList)
+ struct ieee80211_drv_agg_txb *pSendList)
{
struct ieee80211_device *ieee = netdev_priv(dev);
PRT_HIGH_THROUGHPUT pHTInfo = ieee->pHTInfo;
@@ -1398,11 +1262,10 @@ u8 DrvAggr_GetAggregatibleList(struct net_device *dev, struct sk_buff *skb,
do {
pSendList->tx_agg_frames[pSendList->nr_drv_agg_frames++] = skb;
- if(pSendList->nr_drv_agg_frames >= nMaxAggrNum) {
+ if (pSendList->nr_drv_agg_frames >= nMaxAggrNum)
break;
- }
- } while((skb = skb_dequeue(&ieee->skb_drv_aggQ[QueueID])));
+ } while ((skb = skb_dequeue(&ieee->skb_drv_aggQ[QueueID])));
RT_TRACE(COMP_AMSDU, "DrvAggr_GetAggregatibleList, nAggrTcbNum = %d \n", pSendList->nr_drv_agg_frames);
return pSendList->nr_drv_agg_frames;
@@ -1411,105 +1274,86 @@ u8 DrvAggr_GetAggregatibleList(struct net_device *dev, struct sk_buff *skb,
static void rtl8192_tx_isr(struct urb *tx_urb)
{
- struct sk_buff *skb = (struct sk_buff*)tx_urb->context;
+ struct sk_buff *skb = (struct sk_buff *)tx_urb->context;
struct net_device *dev = NULL;
struct r8192_priv *priv = NULL;
cb_desc *tcb_desc = (cb_desc *)(skb->cb + MAX_DEV_ADDR_SIZE);
u8 queue_index = tcb_desc->queue_index;
-// bool bToSend0Byte;
-// u16 BufLen = skb->len;
- memcpy(&dev,(struct net_device*)(skb->cb),sizeof(struct net_device*));
+ memcpy(&dev, (struct net_device *)(skb->cb), sizeof(struct net_device *));
priv = ieee80211_priv(dev);
- if(tcb_desc->queue_index != TXCMD_QUEUE) {
- if(tx_urb->status == 0) {
+ if (tcb_desc->queue_index != TXCMD_QUEUE) {
+ if (tx_urb->status == 0) {
dev->trans_start = jiffies;
- // Act as station mode, destination shall be unicast address.
- //priv->ieee80211->stats.tx_bytes+=(skb->len - priv->ieee80211->tx_headroom);
- //priv->ieee80211->stats.tx_packets++;
priv->stats.txoktotal++;
priv->ieee80211->LinkDetectInfo.NumTxOkInPeriod++;
priv->stats.txbytesunicast += (skb->len - priv->ieee80211->tx_headroom);
} else {
priv->ieee80211->stats.tx_errors++;
- //priv->stats.txmanageerr++;
/* TODO */
}
}
/* free skb and tx_urb */
- if(skb != NULL) {
+ if (skb != NULL) {
dev_kfree_skb_any(skb);
usb_free_urb(tx_urb);
atomic_dec(&priv->tx_pending[queue_index]);
}
- {
- //
- // Handle HW Beacon:
- // We had transfer our beacon frame to host controller at this moment.
- //
- //
- // Caution:
- // Handling the wait queue of command packets.
- // For Tx command packets, we must not do TCB fragment because it is not handled right now.
- // We must cut the packets to match the size of TX_CMD_PKT before we send it.
- //
+ //
+ // Handle HW Beacon:
+ // We had transfer our beacon frame to host controller at this moment.
+ //
+ //
+ // Caution:
+ // Handling the wait queue of command packets.
+ // For Tx command packets, we must not do TCB fragment because it is not handled right now.
+ // We must cut the packets to match the size of TX_CMD_PKT before we send it.
+ //
- /* Handle MPDU in wait queue. */
- if(queue_index != BEACON_QUEUE) {
- /* Don't send data frame during scanning.*/
- if((skb_queue_len(&priv->ieee80211->skb_waitQ[queue_index]) != 0)&&\
- (!(priv->ieee80211->queue_stop))) {
- if(NULL != (skb = skb_dequeue(&(priv->ieee80211->skb_waitQ[queue_index]))))
- priv->ieee80211->softmac_hard_start_xmit(skb, dev);
+ /* Handle MPDU in wait queue. */
+ if (queue_index != BEACON_QUEUE) {
+ /* Don't send data frame during scanning.*/
+ if ((skb_queue_len(&priv->ieee80211->skb_waitQ[queue_index]) != 0) &&
+ (!(priv->ieee80211->queue_stop))) {
+ if (NULL != (skb = skb_dequeue(&(priv->ieee80211->skb_waitQ[queue_index]))))
+ priv->ieee80211->softmac_hard_start_xmit(skb, dev);
- return; //modified by david to avoid further processing AMSDU
- }
+ return; //modified by david to avoid further processing AMSDU
+ }
#ifdef USB_TX_DRIVER_AGGREGATION_ENABLE
- else if ((skb_queue_len(&priv->ieee80211->skb_drv_aggQ[queue_index])!= 0)&&\
- (!(priv->ieee80211->queue_stop))) {
- // Tx Driver Aggregation process
- /* The driver will aggregation the packets according to the following stats
- * 1. check whether there's tx irq available, for it's a completion return
- * function, it should contain enough tx irq;
- * 2. check packet type;
- * 3. initialize sendlist, check whether the to-be send packet no greater than 1
- * 4. aggregates the packets, and fill firmware info and tx desc into it, etc.
- * 5. check whether the packet could be sent, otherwise just insert into wait head
- * */
- skb = skb_dequeue(&priv->ieee80211->skb_drv_aggQ[queue_index]);
- if(!check_nic_enough_desc(dev, queue_index)) {
- skb_queue_head(&(priv->ieee80211->skb_drv_aggQ[queue_index]), skb);
- return;
- }
+ else if ((skb_queue_len(&priv->ieee80211->skb_drv_aggQ[queue_index]) != 0) &&
+ (!(priv->ieee80211->queue_stop))) {
+ // Tx Driver Aggregation process
+ /* The driver will aggregation the packets according to the following stats
+ * 1. check whether there's tx irq available, for it's a completion return
+ * function, it should contain enough tx irq;
+ * 2. check packet type;
+ * 3. initialize sendlist, check whether the to-be send packet no greater than 1
+ * 4. aggregates the packets, and fill firmware info and tx desc into it, etc.
+ * 5. check whether the packet could be sent, otherwise just insert into wait head
+ * */
+ skb = skb_dequeue(&priv->ieee80211->skb_drv_aggQ[queue_index]);
+ if (!check_nic_enough_desc(dev, queue_index)) {
+ skb_queue_head(&(priv->ieee80211->skb_drv_aggQ[queue_index]), skb);
+ return;
+ }
+
+ /*TODO*/
+ {
+ struct ieee80211_drv_agg_txb SendList;
+
+ memset(&SendList, 0, sizeof(struct ieee80211_drv_agg_txb));
+ if (DrvAggr_GetAggregatibleList(dev, skb, &SendList) > 1) {
+ skb = DrvAggr_Aggregation(dev, &SendList);
- {
- /*TODO*/
- /*
- u8* pHeader = skb->data;
-
- if(IsMgntQosData(pHeader) ||
- IsMgntQData_Ack(pHeader) ||
- IsMgntQData_Poll(pHeader) ||
- IsMgntQData_Poll_Ack(pHeader)
- )
- */
- {
- struct ieee80211_drv_agg_txb SendList;
-
- memset(&SendList, 0, sizeof(struct ieee80211_drv_agg_txb));
- if(DrvAggr_GetAggregatibleList(dev, skb, &SendList) > 1) {
- skb = DrvAggr_Aggregation(dev, &SendList);
-
- }
- }
- priv->ieee80211->softmac_hard_start_xmit(skb, dev);
}
}
-#endif
+ priv->ieee80211->softmac_hard_start_xmit(skb, dev);
}
+#endif
}
}
@@ -1519,72 +1363,67 @@ void rtl8192_beacon_stop(struct net_device *dev)
u8 msr, msrm, msr2;
struct r8192_priv *priv = ieee80211_priv(dev);
- msr = read_nic_byte(dev, MSR);
+ read_nic_byte(dev, MSR, &msr);
msrm = msr & MSR_LINK_MASK;
msr2 = msr & ~MSR_LINK_MASK;
- if(NIC_8192U == priv->card_8192) {
+ if (NIC_8192U == priv->card_8192)
usb_kill_urb(priv->rx_urb[MAX_RX_URB]);
- }
if ((msrm == (MSR_LINK_ADHOC<<MSR_LINK_SHIFT) ||
- (msrm == (MSR_LINK_MASTER<<MSR_LINK_SHIFT)))){
+ (msrm == (MSR_LINK_MASTER<<MSR_LINK_SHIFT)))) {
write_nic_byte(dev, MSR, msr2 | MSR_LINK_NONE);
write_nic_byte(dev, MSR, msr);
}
}
-void rtl8192_config_rate(struct net_device* dev, u16* rate_config)
-{
- struct r8192_priv *priv = ieee80211_priv(dev);
- struct ieee80211_network *net;
- u8 i=0, basic_rate = 0;
- net = & priv->ieee80211->current_network;
-
- for (i=0; i<net->rates_len; i++)
- {
- basic_rate = net->rates[i]&0x7f;
- switch(basic_rate)
- {
- case MGN_1M: *rate_config |= RRSR_1M; break;
- case MGN_2M: *rate_config |= RRSR_2M; break;
- case MGN_5_5M: *rate_config |= RRSR_5_5M; break;
- case MGN_11M: *rate_config |= RRSR_11M; break;
- case MGN_6M: *rate_config |= RRSR_6M; break;
- case MGN_9M: *rate_config |= RRSR_9M; break;
- case MGN_12M: *rate_config |= RRSR_12M; break;
- case MGN_18M: *rate_config |= RRSR_18M; break;
- case MGN_24M: *rate_config |= RRSR_24M; break;
- case MGN_36M: *rate_config |= RRSR_36M; break;
- case MGN_48M: *rate_config |= RRSR_48M; break;
- case MGN_54M: *rate_config |= RRSR_54M; break;
- }
- }
- for (i=0; i<net->rates_ex_len; i++)
- {
- basic_rate = net->rates_ex[i]&0x7f;
- switch(basic_rate)
- {
- case MGN_1M: *rate_config |= RRSR_1M; break;
- case MGN_2M: *rate_config |= RRSR_2M; break;
- case MGN_5_5M: *rate_config |= RRSR_5_5M; break;
- case MGN_11M: *rate_config |= RRSR_11M; break;
- case MGN_6M: *rate_config |= RRSR_6M; break;
- case MGN_9M: *rate_config |= RRSR_9M; break;
- case MGN_12M: *rate_config |= RRSR_12M; break;
- case MGN_18M: *rate_config |= RRSR_18M; break;
- case MGN_24M: *rate_config |= RRSR_24M; break;
- case MGN_36M: *rate_config |= RRSR_36M; break;
- case MGN_48M: *rate_config |= RRSR_48M; break;
- case MGN_54M: *rate_config |= RRSR_54M; break;
- }
- }
+void rtl8192_config_rate(struct net_device *dev, u16 *rate_config)
+{
+ struct r8192_priv *priv = ieee80211_priv(dev);
+ struct ieee80211_network *net;
+ u8 i = 0, basic_rate = 0;
+ net = &priv->ieee80211->current_network;
+
+ for (i = 0; i < net->rates_len; i++) {
+ basic_rate = net->rates[i]&0x7f;
+ switch (basic_rate) {
+ case MGN_1M: *rate_config |= RRSR_1M; break;
+ case MGN_2M: *rate_config |= RRSR_2M; break;
+ case MGN_5_5M: *rate_config |= RRSR_5_5M; break;
+ case MGN_11M: *rate_config |= RRSR_11M; break;
+ case MGN_6M: *rate_config |= RRSR_6M; break;
+ case MGN_9M: *rate_config |= RRSR_9M; break;
+ case MGN_12M: *rate_config |= RRSR_12M; break;
+ case MGN_18M: *rate_config |= RRSR_18M; break;
+ case MGN_24M: *rate_config |= RRSR_24M; break;
+ case MGN_36M: *rate_config |= RRSR_36M; break;
+ case MGN_48M: *rate_config |= RRSR_48M; break;
+ case MGN_54M: *rate_config |= RRSR_54M; break;
+ }
+ }
+ for (i = 0; i < net->rates_ex_len; i++) {
+ basic_rate = net->rates_ex[i]&0x7f;
+ switch (basic_rate) {
+ case MGN_1M: *rate_config |= RRSR_1M; break;
+ case MGN_2M: *rate_config |= RRSR_2M; break;
+ case MGN_5_5M: *rate_config |= RRSR_5_5M; break;
+ case MGN_11M: *rate_config |= RRSR_11M; break;
+ case MGN_6M: *rate_config |= RRSR_6M; break;
+ case MGN_9M: *rate_config |= RRSR_9M; break;
+ case MGN_12M: *rate_config |= RRSR_12M; break;
+ case MGN_18M: *rate_config |= RRSR_18M; break;
+ case MGN_24M: *rate_config |= RRSR_24M; break;
+ case MGN_36M: *rate_config |= RRSR_36M; break;
+ case MGN_48M: *rate_config |= RRSR_48M; break;
+ case MGN_54M: *rate_config |= RRSR_54M; break;
+ }
+ }
}
#define SHORT_SLOT_TIME 9
#define NON_SHORT_SLOT_TIME 20
-void rtl8192_update_cap(struct net_device* dev, u16 cap)
+void rtl8192_update_cap(struct net_device *dev, u16 cap)
{
u32 tmp = 0;
struct r8192_priv *priv = ieee80211_priv(dev);
@@ -1595,13 +1434,10 @@ void rtl8192_update_cap(struct net_device* dev, u16 cap)
tmp |= BRSR_AckShortPmb;
write_nic_dword(dev, RRSR, tmp);
- if (net->mode & (IEEE_G|IEEE_N_24G))
- {
+ if (net->mode & (IEEE_G|IEEE_N_24G)) {
u8 slot_time = 0;
- if ((cap & WLAN_CAPABILITY_SHORT_SLOT)&&(!priv->ieee80211->pHTInfo->bCurrentRT2RTLongSlotTime))
- {//short slot time
+ if ((cap & WLAN_CAPABILITY_SHORT_SLOT) && (!priv->ieee80211->pHTInfo->bCurrentRT2RTLongSlotTime)) //short slot time
slot_time = SHORT_SLOT_TIME;
- }
else //long slot time
slot_time = NON_SHORT_SLOT_TIME;
priv->slot_time = slot_time;
@@ -1616,31 +1452,26 @@ void rtl8192_net_update(struct net_device *dev)
struct ieee80211_network *net;
u16 BcnTimeCfg = 0, BcnCW = 6, BcnIFS = 0xf;
u16 rate_config = 0;
- net = & priv->ieee80211->current_network;
+ net = &priv->ieee80211->current_network;
rtl8192_config_rate(dev, &rate_config);
priv->basic_rate = rate_config &= 0x15f;
- write_nic_dword(dev,BSSIDR,((u32*)net->bssid)[0]);
- write_nic_word(dev,BSSIDR+4,((u16*)net->bssid)[2]);
- //for(i=0;i<ETH_ALEN;i++)
- // write_nic_byte(dev,BSSID+i,net->bssid[i]);
+ write_nic_dword(dev, BSSIDR, ((u32 *)net->bssid)[0]);
+ write_nic_word(dev, BSSIDR+4, ((u16 *)net->bssid)[2]);
rtl8192_update_msr(dev);
-// rtl8192_update_cap(dev, net->capability);
- if (priv->ieee80211->iw_mode == IW_MODE_ADHOC)
- {
- write_nic_word(dev, ATIMWND, 2);
- write_nic_word(dev, BCN_DMATIME, 1023);
- write_nic_word(dev, BCN_INTERVAL, net->beacon_interval);
-// write_nic_word(dev, BcnIntTime, 100);
- write_nic_word(dev, BCN_DRV_EARLY_INT, 1);
- write_nic_byte(dev, BCN_ERR_THRESH, 100);
+ if (priv->ieee80211->iw_mode == IW_MODE_ADHOC) {
+ write_nic_word(dev, ATIMWND, 2);
+ write_nic_word(dev, BCN_DMATIME, 1023);
+ write_nic_word(dev, BCN_INTERVAL, net->beacon_interval);
+ write_nic_word(dev, BCN_DRV_EARLY_INT, 1);
+ write_nic_byte(dev, BCN_ERR_THRESH, 100);
BcnTimeCfg |= (BcnCW<<BCN_TCFG_CW_SHIFT);
- // TODO: BcnIFS may required to be changed on ASIC
+ // TODO: BcnIFS may required to be changed on ASIC
BcnTimeCfg |= BcnIFS<<BCN_TCFG_IFS;
- write_nic_word(dev, BCN_TCFG, BcnTimeCfg);
+ write_nic_word(dev, BCN_TCFG, BcnTimeCfg);
}
@@ -1649,46 +1480,37 @@ void rtl8192_net_update(struct net_device *dev)
//temporary hw beacon is not used any more.
//open it when necessary
-void rtl819xusb_beacon_tx(struct net_device *dev,u16 tx_rate)
+void rtl819xusb_beacon_tx(struct net_device *dev, u16 tx_rate)
{
}
inline u8 rtl8192_IsWirelessBMode(u16 rate)
{
- if( ((rate <= 110) && (rate != 60) && (rate != 90)) || (rate == 220) )
+ if (((rate <= 110) && (rate != 60) && (rate != 90)) || (rate == 220))
return 1;
else return 0;
}
u16 N_DBPSOfRate(u16 DataRate);
-u16 ComputeTxTime(
- u16 FrameLength,
- u16 DataRate,
- u8 bManagementFrame,
- u8 bShortPreamble
-)
+u16 ComputeTxTime(u16 FrameLength, u16 DataRate, u8 bManagementFrame,
+ u8 bShortPreamble)
{
u16 FrameTime;
u16 N_DBPS;
u16 Ceiling;
- if( rtl8192_IsWirelessBMode(DataRate) )
- {
- if( bManagementFrame || !bShortPreamble || DataRate == 10 )
- { // long preamble
+ if (rtl8192_IsWirelessBMode(DataRate)) {
+ if (bManagementFrame || !bShortPreamble || DataRate == 10) // long preamble
FrameTime = (u16)(144+48+(FrameLength*8/(DataRate/10)));
- }
- else
- { // Short preamble
+ else // Short preamble
FrameTime = (u16)(72+24+(FrameLength*8/(DataRate/10)));
- }
- if( ( FrameLength*8 % (DataRate/10) ) != 0 ) //Get the Ceilling
- FrameTime ++;
+ if ((FrameLength*8 % (DataRate/10)) != 0) //Get the Ceilling
+ FrameTime++;
} else { //802.11g DSSS-OFDM PLCP length field calculation.
N_DBPS = N_DBPSOfRate(DataRate);
Ceiling = (16 + 8*FrameLength + 6) / N_DBPS
- + (((16 + 8*FrameLength + 6) % N_DBPS) ? 1 : 0);
+ + (((16 + 8*FrameLength + 6) % N_DBPS) ? 1 : 0);
FrameTime = (u16)(16 + 4 + 4*Ceiling + 6);
}
return FrameTime;
@@ -1696,47 +1518,46 @@ u16 ComputeTxTime(
u16 N_DBPSOfRate(u16 DataRate)
{
- u16 N_DBPS = 24;
+ u16 N_DBPS = 24;
- switch(DataRate)
- {
- case 60:
- N_DBPS = 24;
- break;
+ switch (DataRate) {
+ case 60:
+ N_DBPS = 24;
+ break;
- case 90:
- N_DBPS = 36;
- break;
+ case 90:
+ N_DBPS = 36;
+ break;
- case 120:
- N_DBPS = 48;
- break;
+ case 120:
+ N_DBPS = 48;
+ break;
- case 180:
- N_DBPS = 72;
- break;
+ case 180:
+ N_DBPS = 72;
+ break;
- case 240:
- N_DBPS = 96;
- break;
+ case 240:
+ N_DBPS = 96;
+ break;
- case 360:
- N_DBPS = 144;
- break;
+ case 360:
+ N_DBPS = 144;
+ break;
- case 480:
- N_DBPS = 192;
- break;
+ case 480:
+ N_DBPS = 192;
+ break;
- case 540:
- N_DBPS = 216;
- break;
+ case 540:
+ N_DBPS = 216;
+ break;
- default:
- break;
- }
+ default:
+ break;
+ }
- return N_DBPS;
+ return N_DBPS;
}
void rtl819xU_cmd_isr(struct urb *tx_cmd_urb, struct pt_regs *regs)
@@ -1744,11 +1565,10 @@ void rtl819xU_cmd_isr(struct urb *tx_cmd_urb, struct pt_regs *regs)
usb_free_urb(tx_cmd_urb);
}
-unsigned int txqueue2outpipe(struct r8192_priv* priv,unsigned int tx_queue) {
-
- if(tx_queue >= 9)
- {
- RT_TRACE(COMP_ERR,"%s():Unknown queue ID!!!\n",__FUNCTION__);
+unsigned int txqueue2outpipe(struct r8192_priv *priv, unsigned int tx_queue)
+{
+ if (tx_queue >= 9) {
+ RT_TRACE(COMP_ERR, "%s():Unknown queue ID!!!\n", __func__);
return 0x04;
}
return priv->txqueue_to_outpipemap[tx_queue];
@@ -1757,19 +1577,16 @@ unsigned int txqueue2outpipe(struct r8192_priv* priv,unsigned int tx_queue) {
short rtl819xU_tx_cmd(struct net_device *dev, struct sk_buff *skb)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- //u8 *tx;
int status;
struct urb *tx_urb;
- //int urb_buf_len;
unsigned int idx_pipe;
tx_desc_cmd_819x_usb *pdesc = (tx_desc_cmd_819x_usb *)skb->data;
cb_desc *tcb_desc = (cb_desc *)(skb->cb + MAX_DEV_ADDR_SIZE);
u8 queue_index = tcb_desc->queue_index;
- //printk("\n %s::queue_index = %d\n",__FUNCTION__, queue_index);
atomic_inc(&priv->tx_pending[queue_index]);
- tx_urb = usb_alloc_urb(0,GFP_ATOMIC);
- if(!tx_urb){
+ tx_urb = usb_alloc_urb(0, GFP_ATOMIC);
+ if (!tx_urb) {
dev_kfree_skb(skb);
return -ENOMEM;
}
@@ -1788,27 +1605,26 @@ short rtl819xU_tx_cmd(struct net_device *dev, struct sk_buff *skb)
//----------------------------------------------------------------------------
// Get index to out pipe from specified QueueID.
#ifndef USE_ONE_PIPE
- idx_pipe = txqueue2outpipe(priv,queue_index);
+ idx_pipe = txqueue2outpipe(priv, queue_index);
#else
idx_pipe = 0x04;
#endif
#ifdef JOHN_DUMP_TXDESC
int i;
- printk("<Tx descriptor>--rate %x---",rate);
+ printk("<Tx descriptor>--rate %x---", rate);
for (i = 0; i < 8; i++)
printk("%8x ", tx[i]);
printk("\n");
#endif
- usb_fill_bulk_urb(tx_urb,priv->udev, usb_sndbulkpipe(priv->udev,idx_pipe), \
- skb->data, skb->len, rtl8192_tx_isr, skb);
+ usb_fill_bulk_urb(tx_urb, priv->udev, usb_sndbulkpipe(priv->udev, idx_pipe),
+ skb->data, skb->len, rtl8192_tx_isr, skb);
status = usb_submit_urb(tx_urb, GFP_ATOMIC);
- if (!status){
+ if (!status) {
return 0;
- }else{
- DMESGE("Error TX CMD URB, error %d",
- status);
+ } else {
+ DMESGE("Error TX CMD URB, error %d", status);
return -1;
}
}
@@ -1824,21 +1640,21 @@ u8 MapHwQueueToFirmwareQueue(u8 QueueID)
{
u8 QueueSelect = 0x0; //defualt set to
- switch(QueueID) {
+ switch (QueueID) {
case BE_QUEUE:
- QueueSelect = QSLT_BE; //or QSelect = pTcb->priority;
+ QueueSelect = QSLT_BE;
break;
case BK_QUEUE:
- QueueSelect = QSLT_BK; //or QSelect = pTcb->priority;
+ QueueSelect = QSLT_BK;
break;
case VO_QUEUE:
- QueueSelect = QSLT_VO; //or QSelect = pTcb->priority;
+ QueueSelect = QSLT_VO;
break;
case VI_QUEUE:
- QueueSelect = QSLT_VI; //or QSelect = pTcb->priority;
+ QueueSelect = QSLT_VI;
break;
case MGNT_QUEUE:
QueueSelect = QSLT_MGNT;
@@ -1850,11 +1666,9 @@ u8 MapHwQueueToFirmwareQueue(u8 QueueID)
// TODO: 2006.10.30 mark other queue selection until we verify it is OK
// TODO: Remove Assertions
-//#if (RTL819X_FPGA_VER & RTL819X_FPGA_GUANGAN_070502)
case TXCMD_QUEUE:
QueueSelect = QSLT_CMD;
break;
-//#endif
case HIGH_QUEUE:
QueueSelect = QSLT_HIGH;
break;
@@ -1870,7 +1684,7 @@ u8 MRateToHwRate8190Pci(u8 rate)
{
u8 ret = DESC90_RATE1M;
- switch(rate) {
+ switch (rate) {
case MGN_1M: ret = DESC90_RATE1M; break;
case MGN_2M: ret = DESC90_RATE2M; break;
case MGN_5_5M: ret = DESC90_RATE5_5M; break;
@@ -1913,9 +1727,9 @@ u8 QueryIsShort(u8 TxHT, u8 TxRate, cb_desc *tcb_desc)
{
u8 tmp_Short;
- tmp_Short = (TxHT==1)?((tcb_desc->bUseShortGI)?1:0):((tcb_desc->bUseShortPreamble)?1:0);
+ tmp_Short = (TxHT == 1) ? ((tcb_desc->bUseShortGI) ? 1 : 0) : ((tcb_desc->bUseShortPreamble) ? 1 : 0);
- if(TxHT==1 && TxRate != DESC90_RATEMCS15)
+ if (TxHT == 1 && TxRate != DESC90_RATEMCS15)
tmp_Short = 0;
return tmp_Short;
@@ -1931,7 +1745,7 @@ static void tx_zero_isr(struct urb *tx_urb)
* skb->cb will contain all the following information,
* priority, morefrag, rate, &dev.
* */
-short rtl8192_tx(struct net_device *dev, struct sk_buff* skb)
+short rtl8192_tx(struct net_device *dev, struct sk_buff *skb)
{
struct r8192_priv *priv = ieee80211_priv(dev);
cb_desc *tcb_desc = (cb_desc *)(skb->cb + MAX_DEV_ADDR_SIZE);
@@ -1941,35 +1755,32 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff* skb)
int pend;
int status;
struct urb *tx_urb = NULL, *tx_urb_zero = NULL;
- //int urb_len;
unsigned int idx_pipe;
-// RT_DEBUG_DATA(COMP_SEND, tcb_desc, sizeof(cb_desc));
-// printk("=============> %s\n", __FUNCTION__);
pend = atomic_read(&priv->tx_pending[tcb_desc->queue_index]);
/* we are locked here so the two atomic_read and inc are executed
* without interleaves
* !!! For debug purpose
*/
- if( pend > MAX_TX_URB){
- printk("To discard skb packet!\n");
+ if (pend > MAX_TX_URB) {
+ netdev_dbg(dev, "To discard skb packet!\n");
dev_kfree_skb_any(skb);
return -1;
}
- tx_urb = usb_alloc_urb(0,GFP_ATOMIC);
- if(!tx_urb){
+ tx_urb = usb_alloc_urb(0, GFP_ATOMIC);
+ if (!tx_urb) {
dev_kfree_skb_any(skb);
return -ENOMEM;
}
/* Fill Tx firmware info */
- memset(tx_fwinfo,0,sizeof(tx_fwinfo_819x_usb));
+ memset(tx_fwinfo, 0, sizeof(tx_fwinfo_819x_usb));
/* DWORD 0 */
- tx_fwinfo->TxHT = (tcb_desc->data_rate&0x80)?1:0;
+ tx_fwinfo->TxHT = (tcb_desc->data_rate&0x80) ? 1 : 0;
tx_fwinfo->TxRate = MRateToHwRate8190Pci(tcb_desc->data_rate);
tx_fwinfo->EnableCPUDur = tcb_desc->bTxEnableFwCalcDur;
tx_fwinfo->Short = QueryIsShort(tx_fwinfo->TxHT, tx_fwinfo->TxRate, tcb_desc);
- if(tcb_desc->bAMPDUEnable) {//AMPDU enabled
+ if (tcb_desc->bAMPDUEnable) {//AMPDU enabled
tx_fwinfo->AllowAggregation = 1;
/* DWORD 1 */
tx_fwinfo->RxMF = tcb_desc->ampdu_factor;
@@ -1982,20 +1793,19 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff* skb)
}
/* Protection mode related */
- tx_fwinfo->RtsEnable = (tcb_desc->bRTSEnable)?1:0;
- tx_fwinfo->CtsEnable = (tcb_desc->bCTSEnable)?1:0;
- tx_fwinfo->RtsSTBC = (tcb_desc->bRTSSTBC)?1:0;
- tx_fwinfo->RtsHT = (tcb_desc->rts_rate&0x80)?1:0;
+ tx_fwinfo->RtsEnable = (tcb_desc->bRTSEnable) ? 1 : 0;
+ tx_fwinfo->CtsEnable = (tcb_desc->bCTSEnable) ? 1 : 0;
+ tx_fwinfo->RtsSTBC = (tcb_desc->bRTSSTBC) ? 1 : 0;
+ tx_fwinfo->RtsHT = (tcb_desc->rts_rate&0x80) ? 1 : 0;
tx_fwinfo->RtsRate = MRateToHwRate8190Pci((u8)tcb_desc->rts_rate);
- tx_fwinfo->RtsSubcarrier = (tx_fwinfo->RtsHT==0)?(tcb_desc->RTSSC):0;
- tx_fwinfo->RtsBandwidth = (tx_fwinfo->RtsHT==1)?((tcb_desc->bRTSBW)?1:0):0;
- tx_fwinfo->RtsShort = (tx_fwinfo->RtsHT==0)?(tcb_desc->bRTSUseShortPreamble?1:0):\
- (tcb_desc->bRTSUseShortGI?1:0);
+ tx_fwinfo->RtsSubcarrier = (tx_fwinfo->RtsHT == 0) ? (tcb_desc->RTSSC) : 0;
+ tx_fwinfo->RtsBandwidth = (tx_fwinfo->RtsHT == 1) ? ((tcb_desc->bRTSBW) ? 1 : 0) : 0;
+ tx_fwinfo->RtsShort = (tx_fwinfo->RtsHT == 0) ? (tcb_desc->bRTSUseShortPreamble ? 1 : 0) :
+ (tcb_desc->bRTSUseShortGI ? 1 : 0);
/* Set Bandwidth and sub-channel settings. */
- if(priv->CurrentChannelBW == HT_CHANNEL_WIDTH_20_40)
- {
- if(tcb_desc->bPacketBW) {
+ if (priv->CurrentChannelBW == HT_CHANNEL_WIDTH_20_40) {
+ if (tcb_desc->bPacketBW) {
tx_fwinfo->TxBandwidth = 1;
tx_fwinfo->TxSubCarrier = 0; //By SD3's Jerry suggestion, use duplicated mode
} else {
@@ -2009,9 +1819,7 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff* skb)
#ifdef USB_TX_DRIVER_AGGREGATION_ENABLE
if (tcb_desc->drv_agg_enable)
- {
tx_fwinfo->Tx_INFO_RSVD = (tcb_desc->DrvAggrNum & 0x1f) << 1;
- }
#endif
/* Fill Tx descriptor */
memset(tx_desc, 0, sizeof(tx_desc_819x_usb));
@@ -2021,45 +1829,40 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff* skb)
tx_desc->Offset = sizeof(tx_fwinfo_819x_usb) + 8;
#ifdef USB_TX_DRIVER_AGGREGATION_ENABLE
- if (tcb_desc->drv_agg_enable) {
+ if (tcb_desc->drv_agg_enable)
tx_desc->PktSize = tcb_desc->pkt_size;
- } else
+ else
#endif
{
tx_desc->PktSize = (skb->len - TX_PACKET_SHIFT_BYTES) & 0xffff;
}
/*DWORD 1*/
- tx_desc->SecCAMID= 0;
+ tx_desc->SecCAMID = 0;
tx_desc->RATid = tcb_desc->RATRIndex;
- {
- //MPDUOverhead = 0;
- tx_desc->NoEnc = 1;
- }
+ tx_desc->NoEnc = 1;
tx_desc->SecType = 0x0;
- if (tcb_desc->bHwSec)
- {
- switch (priv->ieee80211->pairwise_key_type)
- {
- case KEY_TYPE_WEP40:
- case KEY_TYPE_WEP104:
- tx_desc->SecType = 0x1;
- tx_desc->NoEnc = 0;
- break;
- case KEY_TYPE_TKIP:
- tx_desc->SecType = 0x2;
- tx_desc->NoEnc = 0;
- break;
- case KEY_TYPE_CCMP:
- tx_desc->SecType = 0x3;
- tx_desc->NoEnc = 0;
- break;
- case KEY_TYPE_NA:
- tx_desc->SecType = 0x0;
- tx_desc->NoEnc = 1;
- break;
- }
- }
+ if (tcb_desc->bHwSec) {
+ switch (priv->ieee80211->pairwise_key_type) {
+ case KEY_TYPE_WEP40:
+ case KEY_TYPE_WEP104:
+ tx_desc->SecType = 0x1;
+ tx_desc->NoEnc = 0;
+ break;
+ case KEY_TYPE_TKIP:
+ tx_desc->SecType = 0x2;
+ tx_desc->NoEnc = 0;
+ break;
+ case KEY_TYPE_CCMP:
+ tx_desc->SecType = 0x3;
+ tx_desc->NoEnc = 0;
+ break;
+ case KEY_TYPE_NA:
+ tx_desc->SecType = 0x0;
+ tx_desc->NoEnc = 1;
+ break;
+ }
+ }
tx_desc->QueueSelect = MapHwQueueToFirmwareQueue(tcb_desc->queue_index);
tx_desc->TxFWInfoSize = sizeof(tx_fwinfo_819x_usb);
@@ -2084,48 +1887,41 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff* skb)
}
/* Get index to out pipe from specified QueueID */
#ifndef USE_ONE_PIPE
- idx_pipe = txqueue2outpipe(priv,tcb_desc->queue_index);
+ idx_pipe = txqueue2outpipe(priv, tcb_desc->queue_index);
#else
idx_pipe = 0x5;
#endif
- //RT_DEBUG_DATA(COMP_SEND,tx_fwinfo,sizeof(tx_fwinfo_819x_usb));
- //RT_DEBUG_DATA(COMP_SEND,tx_desc,sizeof(tx_desc_819x_usb));
-
/* To submit bulk urb */
- usb_fill_bulk_urb(tx_urb,udev,
- usb_sndbulkpipe(udev,idx_pipe), skb->data,
- skb->len, rtl8192_tx_isr, skb);
+ usb_fill_bulk_urb(tx_urb, udev,
+ usb_sndbulkpipe(udev, idx_pipe), skb->data,
+ skb->len, rtl8192_tx_isr, skb);
status = usb_submit_urb(tx_urb, GFP_ATOMIC);
- if (!status){
-//we need to send 0 byte packet whenever 512N bytes/64N(HIGN SPEED/NORMAL SPEED) bytes packet has been transmitted. Otherwise, it will be halt to wait for another packet. WB. 2008.08.27
+ if (!status) {
+ //we need to send 0 byte packet whenever 512N bytes/64N(HIGN SPEED/NORMAL SPEED) bytes packet has been transmitted. Otherwise, it will be halt to wait for another packet. WB. 2008.08.27
bool bSend0Byte = false;
u8 zero = 0;
- if(udev->speed == USB_SPEED_HIGH)
- {
+ if (udev->speed == USB_SPEED_HIGH) {
if (skb->len > 0 && skb->len % 512 == 0)
bSend0Byte = true;
- }
- else
- {
+ } else {
if (skb->len > 0 && skb->len % 64 == 0)
bSend0Byte = true;
}
- if (bSend0Byte)
- {
- tx_urb_zero = usb_alloc_urb(0,GFP_ATOMIC);
- if(!tx_urb_zero){
+ if (bSend0Byte) {
+ tx_urb_zero = usb_alloc_urb(0, GFP_ATOMIC);
+ if (!tx_urb_zero) {
RT_TRACE(COMP_ERR, "can't alloc urb for zero byte\n");
return -ENOMEM;
}
- usb_fill_bulk_urb(tx_urb_zero,udev,
- usb_sndbulkpipe(udev,idx_pipe), &zero,
- 0, tx_zero_isr, dev);
+ usb_fill_bulk_urb(tx_urb_zero, udev,
+ usb_sndbulkpipe(udev, idx_pipe), &zero,
+ 0, tx_zero_isr, dev);
status = usb_submit_urb(tx_urb_zero, GFP_ATOMIC);
- if (status){
- RT_TRACE(COMP_ERR, "Error TX URB for zero byte %d, error %d", atomic_read(&priv->tx_pending[tcb_desc->queue_index]), status);
- return -1;
+ if (status) {
+ RT_TRACE(COMP_ERR, "Error TX URB for zero byte %d, error %d", atomic_read(&priv->tx_pending[tcb_desc->queue_index]), status);
+ return -1;
}
}
dev->trans_start = jiffies;
@@ -2133,7 +1929,7 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff* skb)
return 0;
} else {
RT_TRACE(COMP_ERR, "Error TX URB %d, error %d", atomic_read(&priv->tx_pending[tcb_desc->queue_index]),
- status);
+ status);
return -1;
}
}
@@ -2143,14 +1939,14 @@ short rtl8192_usb_initendpoints(struct net_device *dev)
struct r8192_priv *priv = ieee80211_priv(dev);
priv->rx_urb = kmalloc(sizeof(struct urb *) * (MAX_RX_URB+1),
- GFP_KERNEL);
+ GFP_KERNEL);
if (priv->rx_urb == NULL)
return -ENOMEM;
#ifndef JACKSON_NEW_RX
- for(i=0;i<(MAX_RX_URB+1);i++){
+ for (i = 0; i < (MAX_RX_URB+1); i++) {
- priv->rx_urb[i] = usb_alloc_urb(0,GFP_KERNEL);
+ priv->rx_urb[i] = usb_alloc_urb(0, GFP_KERNEL);
priv->rx_urb[i]->transfer_buffer = kmalloc(RX_URB_SIZE, GFP_KERNEL);
@@ -2159,26 +1955,26 @@ short rtl8192_usb_initendpoints(struct net_device *dev)
#endif
#ifdef THOMAS_BEACON
-{
- long align = 0;
- void *oldaddr, *newaddr;
-
- priv->rx_urb[16] = usb_alloc_urb(0, GFP_KERNEL);
- priv->oldaddr = kmalloc(16, GFP_KERNEL);
- oldaddr = priv->oldaddr;
- align = ((long)oldaddr) & 3;
- if (align) {
- newaddr = oldaddr + 4 - align;
- priv->rx_urb[16]->transfer_buffer_length = 16 - 4 + align;
- } else {
- newaddr = oldaddr;
- priv->rx_urb[16]->transfer_buffer_length = 16;
+ {
+ long align = 0;
+ void *oldaddr, *newaddr;
+
+ priv->rx_urb[16] = usb_alloc_urb(0, GFP_KERNEL);
+ priv->oldaddr = kmalloc(16, GFP_KERNEL);
+ oldaddr = priv->oldaddr;
+ align = ((long)oldaddr) & 3;
+ if (align) {
+ newaddr = oldaddr + 4 - align;
+ priv->rx_urb[16]->transfer_buffer_length = 16 - 4 + align;
+ } else {
+ newaddr = oldaddr;
+ priv->rx_urb[16]->transfer_buffer_length = 16;
+ }
+ priv->rx_urb[16]->transfer_buffer = newaddr;
}
- priv->rx_urb[16]->transfer_buffer = newaddr;
-}
#endif
- memset(priv->rx_urb, 0, sizeof(struct urb*) * MAX_RX_URB);
+ memset(priv->rx_urb, 0, sizeof(struct urb *) * MAX_RX_URB);
priv->pp_rxskb = kcalloc(MAX_RX_URB, sizeof(struct sk_buff *),
GFP_KERNEL);
if (!priv->pp_rxskb) {
@@ -2191,7 +1987,7 @@ short rtl8192_usb_initendpoints(struct net_device *dev)
return -ENOMEM;
}
- printk("End of initendpoints\n");
+ netdev_dbg(dev, "End of initendpoints\n");
return 0;
}
@@ -2201,8 +1997,8 @@ void rtl8192_usb_deleteendpoints(struct net_device *dev)
int i;
struct r8192_priv *priv = ieee80211_priv(dev);
- if(priv->rx_urb){
- for(i=0;i<(MAX_RX_URB+1);i++){
+ if (priv->rx_urb) {
+ for (i = 0; i < (MAX_RX_URB+1); i++) {
usb_kill_urb(priv->rx_urb[i]);
usb_free_urb(priv->rx_urb[i]);
}
@@ -2224,8 +2020,8 @@ void rtl8192_usb_deleteendpoints(struct net_device *dev)
#ifndef JACKSON_NEW_RX
- if(priv->rx_urb){
- for(i=0;i<(MAX_RX_URB+1);i++){
+ if (priv->rx_urb) {
+ for (i = 0; i < (MAX_RX_URB+1); i++) {
usb_kill_urb(priv->rx_urb[i]);
kfree(priv->rx_urb[i]->transfer_buffer);
usb_free_urb(priv->rx_urb[i]);
@@ -2249,54 +2045,45 @@ void rtl8192_usb_deleteendpoints(struct net_device *dev)
}
#endif
-extern void rtl8192_update_ratr_table(struct net_device* dev);
+extern void rtl8192_update_ratr_table(struct net_device *dev);
void rtl8192_link_change(struct net_device *dev)
{
-// int i;
-
struct r8192_priv *priv = ieee80211_priv(dev);
- struct ieee80211_device* ieee = priv->ieee80211;
- //write_nic_word(dev, BCN_INTR_ITV, net->beacon_interval);
- if (ieee->state == IEEE80211_LINKED)
- {
+ struct ieee80211_device *ieee = priv->ieee80211;
+ if (ieee->state == IEEE80211_LINKED) {
rtl8192_net_update(dev);
rtl8192_update_ratr_table(dev);
//add this as in pure N mode, wep encryption will use software way, but there is no chance to set this as wep will not set group key in wext. WB.2008.07.08
if ((KEY_TYPE_WEP40 == ieee->pairwise_key_type) || (KEY_TYPE_WEP104 == ieee->pairwise_key_type))
- EnableHWSecurityConfig8192(dev);
+ EnableHWSecurityConfig8192(dev);
}
/*update timing params*/
-// RT_TRACE(COMP_CH, "========>%s(), chan:%d\n", __FUNCTION__, priv->chan);
-// rtl8192_set_chan(dev, priv->chan);
- if (ieee->iw_mode == IW_MODE_INFRA || ieee->iw_mode == IW_MODE_ADHOC)
- {
+ if (ieee->iw_mode == IW_MODE_INFRA || ieee->iw_mode == IW_MODE_ADHOC) {
u32 reg = 0;
- reg = read_nic_dword(dev, RCR);
+ read_nic_dword(dev, RCR, &reg);
if (priv->ieee80211->state == IEEE80211_LINKED)
priv->ReceiveConfig = reg |= RCR_CBSSID;
else
priv->ReceiveConfig = reg &= ~RCR_CBSSID;
write_nic_dword(dev, RCR, reg);
}
-
-// rtl8192_set_rxconf(dev);
}
static struct ieee80211_qos_parameters def_qos_parameters = {
- {3,3,3,3},/* cw_min */
- {7,7,7,7},/* cw_max */
- {2,2,2,2},/* aifs */
- {0,0,0,0},/* flags */
- {0,0,0,0} /* tx_op_limit */
+ {3, 3, 3, 3},/* cw_min */
+ {7, 7, 7, 7},/* cw_max */
+ {2, 2, 2, 2},/* aifs */
+ {0, 0, 0, 0},/* flags */
+ {0, 0, 0, 0} /* tx_op_limit */
};
-void rtl8192_update_beacon(struct work_struct * work)
+void rtl8192_update_beacon(struct work_struct *work)
{
struct r8192_priv *priv = container_of(work, struct r8192_priv, update_beacon_wq.work);
struct net_device *dev = priv->ieee80211->dev;
- struct ieee80211_device* ieee = priv->ieee80211;
- struct ieee80211_network* net = &ieee->current_network;
+ struct ieee80211_device *ieee = priv->ieee80211;
+ struct ieee80211_network *net = &ieee->current_network;
if (ieee->pHTInfo->bCurrentHTSupport)
HTUpdateSelfAndPeerSetting(ieee, net);
@@ -2306,14 +2093,13 @@ void rtl8192_update_beacon(struct work_struct * work)
/*
* background support to run QoS activate functionality
*/
-int WDCAPARA_ADD[] = {EDCAPARA_BE,EDCAPARA_BK,EDCAPARA_VI,EDCAPARA_VO};
-void rtl8192_qos_activate(struct work_struct * work)
+int WDCAPARA_ADD[] = {EDCAPARA_BE, EDCAPARA_BK, EDCAPARA_VI, EDCAPARA_VO};
+void rtl8192_qos_activate(struct work_struct *work)
{
struct r8192_priv *priv = container_of(work, struct r8192_priv, qos_activate);
struct net_device *dev = priv->ieee80211->dev;
struct ieee80211_qos_parameters *qos_parameters = &priv->ieee80211->current_network.qos_data.parameters;
u8 mode = priv->ieee80211->current_network.mode;
- //u32 size = sizeof(struct ieee80211_qos_parameters);
u8 u1bAIFS;
u32 u4bAcParam;
int i;
@@ -2321,37 +2107,36 @@ void rtl8192_qos_activate(struct work_struct * work)
if (priv == NULL)
return;
- mutex_lock(&priv->mutex);
- if(priv->ieee80211->state != IEEE80211_LINKED)
+ mutex_lock(&priv->mutex);
+ if (priv->ieee80211->state != IEEE80211_LINKED)
goto success;
- RT_TRACE(COMP_QOS,"qos active process with associate response received\n");
+ RT_TRACE(COMP_QOS, "qos active process with associate response received\n");
/* It better set slot time at first */
/* For we just support b/g mode at present, let the slot time at 9/20 selection */
/* update the ac parameter to related registers */
- for(i = 0; i < QOS_QUEUE_NUM; i++) {
+ for (i = 0; i < QOS_QUEUE_NUM; i++) {
//Mode G/A: slotTimeTimer = 9; Mode B: 20
- u1bAIFS = qos_parameters->aifs[i] * ((mode&(IEEE_G|IEEE_N_24G)) ?9:20) + aSifsTime;
+ u1bAIFS = qos_parameters->aifs[i] * ((mode&(IEEE_G|IEEE_N_24G)) ? 9 : 20) + aSifsTime;
u4bAcParam = ((((u32)(qos_parameters->tx_op_limit[i]))<< AC_PARAM_TXOP_LIMIT_OFFSET)|
- (((u32)(qos_parameters->cw_max[i]))<< AC_PARAM_ECW_MAX_OFFSET)|
- (((u32)(qos_parameters->cw_min[i]))<< AC_PARAM_ECW_MIN_OFFSET)|
- ((u32)u1bAIFS << AC_PARAM_AIFS_OFFSET));
+ (((u32)(qos_parameters->cw_max[i]))<< AC_PARAM_ECW_MAX_OFFSET)|
+ (((u32)(qos_parameters->cw_min[i]))<< AC_PARAM_ECW_MIN_OFFSET)|
+ ((u32)u1bAIFS << AC_PARAM_AIFS_OFFSET));
write_nic_dword(dev, WDCAPARA_ADD[i], u4bAcParam);
- //write_nic_dword(dev, WDCAPARA_ADD[i], 0x005e4332);
}
success:
- mutex_unlock(&priv->mutex);
+ mutex_unlock(&priv->mutex);
}
static int rtl8192_qos_handle_probe_response(struct r8192_priv *priv,
- int active_network,
- struct ieee80211_network *network)
+ int active_network,
+ struct ieee80211_network *network)
{
int ret = 0;
u32 size = sizeof(struct ieee80211_qos_parameters);
- if(priv->ieee80211->state !=IEEE80211_LINKED)
+ if (priv->ieee80211->state != IEEE80211_LINKED)
return ret;
if ((priv->ieee80211->iw_mode != IW_MODE_INFRA))
@@ -2359,21 +2144,21 @@ static int rtl8192_qos_handle_probe_response(struct r8192_priv *priv,
if (network->flags & NETWORK_HAS_QOS_MASK) {
if (active_network &&
- (network->flags & NETWORK_HAS_QOS_PARAMETERS))
+ (network->flags & NETWORK_HAS_QOS_PARAMETERS))
network->qos_data.active = network->qos_data.supported;
if ((network->qos_data.active == 1) && (active_network == 1) &&
- (network->flags & NETWORK_HAS_QOS_PARAMETERS) &&
- (network->qos_data.old_param_count !=
- network->qos_data.param_count)) {
+ (network->flags & NETWORK_HAS_QOS_PARAMETERS) &&
+ (network->qos_data.old_param_count !=
+ network->qos_data.param_count)) {
network->qos_data.old_param_count =
network->qos_data.param_count;
queue_work(priv->priv_wq, &priv->qos_activate);
- RT_TRACE (COMP_QOS, "QoS parameters change call "
- "qos_activate\n");
+ RT_TRACE(COMP_QOS, "QoS parameters change call "
+ "qos_activate\n");
}
} else {
- memcpy(&priv->ieee80211->current_network.qos_data.parameters,\
+ memcpy(&priv->ieee80211->current_network.qos_data.parameters,
&def_qos_parameters, size);
if ((network->qos_data.active == 1) && (active_network == 1)) {
@@ -2388,13 +2173,13 @@ static int rtl8192_qos_handle_probe_response(struct r8192_priv *priv,
}
/* handle and manage frame from beacon and probe response */
-static int rtl8192_handle_beacon(struct net_device * dev,
- struct ieee80211_beacon * beacon,
- struct ieee80211_network * network)
+static int rtl8192_handle_beacon(struct net_device *dev,
+ struct ieee80211_beacon *beacon,
+ struct ieee80211_network *network)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- rtl8192_qos_handle_probe_response(priv,1,network);
+ rtl8192_qos_handle_probe_response(priv, 1, network);
queue_delayed_work(priv->priv_wq, &priv->update_beacon_wq, 0);
return 0;
@@ -2406,7 +2191,7 @@ static int rtl8192_handle_beacon(struct net_device * dev,
* setting
*/
static int rtl8192_qos_association_resp(struct r8192_priv *priv,
- struct ieee80211_network *network)
+ struct ieee80211_network *network)
{
int ret = 0;
unsigned long flags;
@@ -2416,28 +2201,26 @@ static int rtl8192_qos_association_resp(struct r8192_priv *priv,
if ((priv == NULL) || (network == NULL))
return ret;
- if(priv->ieee80211->state !=IEEE80211_LINKED)
+ if (priv->ieee80211->state != IEEE80211_LINKED)
return ret;
if ((priv->ieee80211->iw_mode != IW_MODE_INFRA))
return ret;
spin_lock_irqsave(&priv->ieee80211->lock, flags);
- if(network->flags & NETWORK_HAS_QOS_PARAMETERS) {
- memcpy(&priv->ieee80211->current_network.qos_data.parameters,\
- &network->qos_data.parameters,\
- sizeof(struct ieee80211_qos_parameters));
+ if (network->flags & NETWORK_HAS_QOS_PARAMETERS) {
+ memcpy(&priv->ieee80211->current_network.qos_data.parameters,
+ &network->qos_data.parameters,
+ sizeof(struct ieee80211_qos_parameters));
priv->ieee80211->current_network.qos_data.active = 1;
- {
- set_qos_param = 1;
- /* update qos parameter for current network */
- priv->ieee80211->current_network.qos_data.old_param_count = \
- priv->ieee80211->current_network.qos_data.param_count;
- priv->ieee80211->current_network.qos_data.param_count = \
- network->qos_data.param_count;
- }
+ set_qos_param = 1;
+ /* update qos parameter for current network */
+ priv->ieee80211->current_network.qos_data.old_param_count =
+ priv->ieee80211->current_network.qos_data.param_count;
+ priv->ieee80211->current_network.qos_data.param_count =
+ network->qos_data.param_count;
} else {
- memcpy(&priv->ieee80211->current_network.qos_data.parameters,\
+ memcpy(&priv->ieee80211->current_network.qos_data.parameters,
&def_qos_parameters, size);
priv->ieee80211->current_network.qos_data.active = 0;
priv->ieee80211->current_network.qos_data.supported = 0;
@@ -2446,7 +2229,7 @@ static int rtl8192_qos_association_resp(struct r8192_priv *priv,
spin_unlock_irqrestore(&priv->ieee80211->lock, flags);
- RT_TRACE(COMP_QOS, "%s: network->flags = %d,%d\n",__FUNCTION__,network->flags ,priv->ieee80211->current_network.qos_data.active);
+ RT_TRACE(COMP_QOS, "%s: network->flags = %d,%d\n", __func__, network->flags, priv->ieee80211->current_network.qos_data.active);
if (set_qos_param == 1)
queue_work(priv->priv_wq, &priv->qos_activate);
@@ -2456,8 +2239,8 @@ static int rtl8192_qos_association_resp(struct r8192_priv *priv,
static int rtl8192_handle_assoc_response(struct net_device *dev,
- struct ieee80211_assoc_response_frame *resp,
- struct ieee80211_network *network)
+ struct ieee80211_assoc_response_frame *resp,
+ struct ieee80211_network *network)
{
struct r8192_priv *priv = ieee80211_priv(dev);
rtl8192_qos_association_resp(priv, network);
@@ -2465,79 +2248,70 @@ static int rtl8192_handle_assoc_response(struct net_device *dev,
}
-void rtl8192_update_ratr_table(struct net_device* dev)
- // POCTET_STRING posLegacyRate,
- // u8* pMcsRate)
- // PRT_WLAN_STA pEntry)
+void rtl8192_update_ratr_table(struct net_device *dev)
{
- struct r8192_priv* priv = ieee80211_priv(dev);
- struct ieee80211_device* ieee = priv->ieee80211;
- u8* pMcsRate = ieee->dot11HTOperationalRateSet;
- //struct ieee80211_network *net = &ieee->current_network;
+ struct r8192_priv *priv = ieee80211_priv(dev);
+ struct ieee80211_device *ieee = priv->ieee80211;
+ u8 *pMcsRate = ieee->dot11HTOperationalRateSet;
u32 ratr_value = 0;
u8 rate_index = 0;
- rtl8192_config_rate(dev, (u16*)(&ratr_value));
- ratr_value |= (*(u16*)(pMcsRate)) << 12;
-// switch (net->mode)
- switch (ieee->mode)
- {
- case IEEE_A:
- ratr_value &= 0x00000FF0;
- break;
- case IEEE_B:
- ratr_value &= 0x0000000F;
- break;
- case IEEE_G:
- ratr_value &= 0x00000FF7;
- break;
- case IEEE_N_24G:
- case IEEE_N_5G:
- if (ieee->pHTInfo->PeerMimoPs == 0) //MIMO_PS_STATIC
- ratr_value &= 0x0007F007;
- else{
- if (priv->rf_type == RF_1T2R)
- ratr_value &= 0x000FF007;
- else
- ratr_value &= 0x0F81F007;
- }
- break;
- default:
- break;
+ rtl8192_config_rate(dev, (u16 *)(&ratr_value));
+ ratr_value |= (*(u16 *)(pMcsRate)) << 12;
+ switch (ieee->mode) {
+ case IEEE_A:
+ ratr_value &= 0x00000FF0;
+ break;
+ case IEEE_B:
+ ratr_value &= 0x0000000F;
+ break;
+ case IEEE_G:
+ ratr_value &= 0x00000FF7;
+ break;
+ case IEEE_N_24G:
+ case IEEE_N_5G:
+ if (ieee->pHTInfo->PeerMimoPs == 0) {//MIMO_PS_STATIC
+ ratr_value &= 0x0007F007;
+ } else {
+ if (priv->rf_type == RF_1T2R)
+ ratr_value &= 0x000FF007;
+ else
+ ratr_value &= 0x0F81F007;
+ }
+ break;
+ default:
+ break;
}
ratr_value &= 0x0FFFFFFF;
- if(ieee->pHTInfo->bCurTxBW40MHz && ieee->pHTInfo->bCurShortGI40MHz){
+ if (ieee->pHTInfo->bCurTxBW40MHz && ieee->pHTInfo->bCurShortGI40MHz)
ratr_value |= 0x80000000;
- }else if(!ieee->pHTInfo->bCurTxBW40MHz && ieee->pHTInfo->bCurShortGI20MHz){
+ else if (!ieee->pHTInfo->bCurTxBW40MHz && ieee->pHTInfo->bCurShortGI20MHz)
ratr_value |= 0x80000000;
- }
write_nic_dword(dev, RATR0+rate_index*4, ratr_value);
write_nic_byte(dev, UFWP, 1);
}
-static u8 ccmp_ie[4] = {0x00,0x50,0xf2,0x04};
+static u8 ccmp_ie[4] = {0x00, 0x50, 0xf2, 0x04};
static u8 ccmp_rsn_ie[4] = {0x00, 0x0f, 0xac, 0x04};
-bool GetNmodeSupportBySecCfg8192(struct net_device*dev)
+bool GetNmodeSupportBySecCfg8192(struct net_device *dev)
{
- struct r8192_priv* priv = ieee80211_priv(dev);
- struct ieee80211_device* ieee = priv->ieee80211;
- struct ieee80211_network * network = &ieee->current_network;
- int wpa_ie_len= ieee->wpa_ie_len;
- struct ieee80211_crypt_data* crypt;
+ struct r8192_priv *priv = ieee80211_priv(dev);
+ struct ieee80211_device *ieee = priv->ieee80211;
+ struct ieee80211_network *network = &ieee->current_network;
+ int wpa_ie_len = ieee->wpa_ie_len;
+ struct ieee80211_crypt_data *crypt;
int encrypt;
crypt = ieee->crypt[ieee->tx_keyidx];
//we use connecting AP's capability instead of only security config on our driver to distinguish whether it should use N mode or G mode
- encrypt = (network->capability & WLAN_CAPABILITY_PRIVACY) || (ieee->host_encrypt && crypt && crypt->ops && (0 == strcmp(crypt->ops->name,"WEP")));
+ encrypt = (network->capability & WLAN_CAPABILITY_PRIVACY) || (ieee->host_encrypt && crypt && crypt->ops && (0 == strcmp(crypt->ops->name, "WEP")));
/* simply judge */
- if(encrypt && (wpa_ie_len == 0)) {
+ if (encrypt && (wpa_ie_len == 0)) {
/* wep encryption, no N mode setting */
return false;
-// } else if((wpa_ie_len != 0)&&(memcmp(&(ieee->wpa_ie[14]),ccmp_ie,4))) {
- } else if((wpa_ie_len != 0)) {
+ } else if ((wpa_ie_len != 0)) {
/* parse pairwise key type */
- //if((pairwisekey = WEP40)||(pairwisekey = WEP104)||(pairwisekey = TKIP))
- if (((ieee->wpa_ie[0] == 0xdd) && (!memcmp(&(ieee->wpa_ie[14]),ccmp_ie,4))) || ((ieee->wpa_ie[0] == 0x30) && (!memcmp(&ieee->wpa_ie[10],ccmp_rsn_ie, 4))))
+ if (((ieee->wpa_ie[0] == 0xdd) && (!memcmp(&(ieee->wpa_ie[14]), ccmp_ie, 4))) || ((ieee->wpa_ie[0] == 0x30) && (!memcmp(&ieee->wpa_ie[10], ccmp_rsn_ie, 4))))
return true;
else
return false;
@@ -2548,13 +2322,13 @@ bool GetNmodeSupportBySecCfg8192(struct net_device*dev)
return true;
}
-bool GetHalfNmodeSupportByAPs819xUsb(struct net_device* dev)
+bool GetHalfNmodeSupportByAPs819xUsb(struct net_device *dev)
{
bool Reval;
- struct r8192_priv* priv = ieee80211_priv(dev);
- struct ieee80211_device* ieee = priv->ieee80211;
+ struct r8192_priv *priv = ieee80211_priv(dev);
+ struct ieee80211_device *ieee = priv->ieee80211;
- if(ieee->bHalfWirelessN24GMode == true)
+ if (ieee->bHalfWirelessN24GMode == true)
Reval = true;
else
Reval = false;
@@ -2562,75 +2336,59 @@ bool GetHalfNmodeSupportByAPs819xUsb(struct net_device* dev)
return Reval;
}
-void rtl8192_refresh_supportrate(struct r8192_priv* priv)
+void rtl8192_refresh_supportrate(struct r8192_priv *priv)
{
- struct ieee80211_device* ieee = priv->ieee80211;
+ struct ieee80211_device *ieee = priv->ieee80211;
//we do not consider set support rate for ABG mode, only HT MCS rate is set here.
if (ieee->mode == WIRELESS_MODE_N_24G || ieee->mode == WIRELESS_MODE_N_5G)
- {
memcpy(ieee->Regdot11HTOperationalRateSet, ieee->RegHTSuppRateSet, 16);
- //RT_DEBUG_DATA(COMP_INIT, ieee->RegHTSuppRateSet, 16);
- //RT_DEBUG_DATA(COMP_INIT, ieee->Regdot11HTOperationalRateSet, 16);
- }
else
memset(ieee->Regdot11HTOperationalRateSet, 0, 16);
return;
}
-u8 rtl8192_getSupportedWireleeMode(struct net_device*dev)
+u8 rtl8192_getSupportedWireleeMode(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
u8 ret = 0;
- switch(priv->rf_chip)
- {
- case RF_8225:
- case RF_8256:
- case RF_PSEUDO_11N:
- ret = (WIRELESS_MODE_N_24G|WIRELESS_MODE_G|WIRELESS_MODE_B);
- break;
- case RF_8258:
- ret = (WIRELESS_MODE_A|WIRELESS_MODE_N_5G);
- break;
- default:
- ret = WIRELESS_MODE_B;
- break;
+ switch (priv->rf_chip) {
+ case RF_8225:
+ case RF_8256:
+ case RF_PSEUDO_11N:
+ ret = (WIRELESS_MODE_N_24G|WIRELESS_MODE_G|WIRELESS_MODE_B);
+ break;
+ case RF_8258:
+ ret = (WIRELESS_MODE_A|WIRELESS_MODE_N_5G);
+ break;
+ default:
+ ret = WIRELESS_MODE_B;
+ break;
}
return ret;
}
-void rtl8192_SetWirelessMode(struct net_device* dev, u8 wireless_mode)
+void rtl8192_SetWirelessMode(struct net_device *dev, u8 wireless_mode)
{
struct r8192_priv *priv = ieee80211_priv(dev);
u8 bSupportMode = rtl8192_getSupportedWireleeMode(dev);
- if ((wireless_mode == WIRELESS_MODE_AUTO) || ((wireless_mode&bSupportMode)==0))
- {
- if(bSupportMode & WIRELESS_MODE_N_24G)
- {
+ if ((wireless_mode == WIRELESS_MODE_AUTO) || ((wireless_mode&bSupportMode) == 0)) {
+ if (bSupportMode & WIRELESS_MODE_N_24G) {
wireless_mode = WIRELESS_MODE_N_24G;
- }
- else if(bSupportMode & WIRELESS_MODE_N_5G)
- {
+ } else if (bSupportMode & WIRELESS_MODE_N_5G) {
wireless_mode = WIRELESS_MODE_N_5G;
- }
- else if((bSupportMode & WIRELESS_MODE_A))
- {
+ } else if ((bSupportMode & WIRELESS_MODE_A)) {
wireless_mode = WIRELESS_MODE_A;
- }
- else if((bSupportMode & WIRELESS_MODE_G))
- {
+ } else if ((bSupportMode & WIRELESS_MODE_G)) {
wireless_mode = WIRELESS_MODE_G;
- }
- else if((bSupportMode & WIRELESS_MODE_B))
- {
+ } else if ((bSupportMode & WIRELESS_MODE_B)) {
wireless_mode = WIRELESS_MODE_B;
- }
- else{
- RT_TRACE(COMP_ERR, "%s(), No valid wireless mode supported, SupportedWirelessMode(%x)!!!\n", __FUNCTION__,bSupportMode);
+ } else {
+ RT_TRACE(COMP_ERR, "%s(), No valid wireless mode supported, SupportedWirelessMode(%x)!!!\n", __func__, bSupportMode);
wireless_mode = WIRELESS_MODE_B;
}
}
#ifdef TO_DO_LIST //// TODO: this function doesn't work well at this time, we should wait for FPGA
- ActUpdateChannelAccessSetting( pAdapter, pHalData->CurrentWirelessMode, &pAdapter->MgntInfo.Info8185.ChannelAccessSetting );
+ ActUpdateChannelAccessSetting(pAdapter, pHalData->CurrentWirelessMode, &pAdapter->MgntInfo.Info8185.ChannelAccessSetting);
#endif
priv->ieee80211->mode = wireless_mode;
@@ -2643,7 +2401,7 @@ void rtl8192_SetWirelessMode(struct net_device* dev, u8 wireless_mode)
}
//init priv variables here. only non_zero value should be initialized here.
-static void rtl8192_init_priv_variable(struct net_device* dev)
+static void rtl8192_init_priv_variable(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
u8 i;
@@ -2651,13 +2409,13 @@ static void rtl8192_init_priv_variable(struct net_device* dev)
priv->chan = 1; //set to channel 1
priv->ieee80211->mode = WIRELESS_MODE_AUTO; //SET AUTO
priv->ieee80211->iw_mode = IW_MODE_INFRA;
- priv->ieee80211->ieee_up=0;
+ priv->ieee80211->ieee_up = 0;
priv->retry_rts = DEFAULT_RETRY_RTS;
priv->retry_data = DEFAULT_RETRY_DATA;
priv->ieee80211->rts = DEFAULT_RTS_THRESHOLD;
priv->ieee80211->rate = 110; //11 mbps
priv->ieee80211->short_slot = 1;
- priv->promisc = (dev->flags & IFF_PROMISC) ? 1:0;
+ priv->promisc = (dev->flags & IFF_PROMISC) ? 1 : 0;
priv->CckPwEnl = 6;
//for silent reset
priv->IrpPendingCount = 1;
@@ -2672,14 +2430,14 @@ static void rtl8192_init_priv_variable(struct net_device* dev)
priv->ieee80211->softmac_features = IEEE_SOFTMAC_SCAN |
IEEE_SOFTMAC_ASSOCIATE | IEEE_SOFTMAC_PROBERQ |
IEEE_SOFTMAC_PROBERS | IEEE_SOFTMAC_TX_QUEUE |
- IEEE_SOFTMAC_BEACONS;//added by amy 080604 //| //IEEE_SOFTMAC_SINGLE_QUEUE;
+ IEEE_SOFTMAC_BEACONS;//added by amy 080604
priv->ieee80211->active_scan = 1;
priv->ieee80211->modulation = IEEE80211_CCK_MODULATION | IEEE80211_OFDM_MODULATION;
priv->ieee80211->host_encrypt = 1;
priv->ieee80211->host_decrypt = 1;
- priv->ieee80211->start_send_beacons = NULL;//rtl819xusb_beacon_tx;//-by amy 080604
- priv->ieee80211->stop_send_beacons = NULL;//rtl8192_beacon_stop;//-by amy 080604
+ priv->ieee80211->start_send_beacons = NULL; //-by amy 080604
+ priv->ieee80211->stop_send_beacons = NULL; //-by amy 080604
priv->ieee80211->softmac_hard_start_xmit = rtl8192_hard_start_xmit;
priv->ieee80211->set_chan = rtl8192_set_chan;
priv->ieee80211->link_change = rtl8192_link_change;
@@ -2693,7 +2451,6 @@ static void rtl8192_init_priv_variable(struct net_device* dev)
priv->ieee80211->qos_support = 1;
//added by WB
-// priv->ieee80211->SwChnlByTimerHandler = rtl8192_phy_SwChnl;
priv->ieee80211->SetBWModeHandler = rtl8192_SetBWMode;
priv->ieee80211->handle_assoc_response = rtl8192_handle_assoc_response;
priv->ieee80211->handle_beacon = rtl8192_handle_beacon;
@@ -2705,36 +2462,31 @@ static void rtl8192_init_priv_variable(struct net_device* dev)
priv->ieee80211->InitialGainHandler = InitialGain819xUsb;
priv->card_type = USB;
#ifdef TO_DO_LIST
- if(Adapter->bInHctTest)
- {
+ if (Adapter->bInHctTest) {
pHalData->ShortRetryLimit = 7;
pHalData->LongRetryLimit = 7;
}
#endif
- {
- priv->ShortRetryLimit = 0x30;
- priv->LongRetryLimit = 0x30;
- }
+ priv->ShortRetryLimit = 0x30;
+ priv->LongRetryLimit = 0x30;
priv->EarlyRxThreshold = 7;
priv->enable_gpio0 = 0;
priv->TransmitConfig =
- // TCR_DurProcMode | //for RTL8185B, duration setting by HW
- //? TCR_DISReqQsize |
(TCR_MXDMA_2048<<TCR_MXDMA_OFFSET)| // Max DMA Burst Size per Tx DMA Burst, 7: reserved.
(priv->ShortRetryLimit<<TCR_SRL_OFFSET)| // Short retry limit
(priv->LongRetryLimit<<TCR_LRL_OFFSET) | // Long retry limit
- (false ? TCR_SAT: 0); // FALSE: HW provides PLCP length and LENGEXT, TRUE: SW provides them
+ (false ? TCR_SAT : 0); // FALSE: HW provides PLCP length and LENGEXT, TRUE: SW provides them
#ifdef TO_DO_LIST
- if(Adapter->bInHctTest)
+ if (Adapter->bInHctTest)
pHalData->ReceiveConfig = pHalData->CSMethod |
- RCR_AMF | RCR_ADF | //RCR_AAP | //accept management/data
+ RCR_AMF | RCR_ADF | //accept management/data
//guangan200710
RCR_ACF | //accept control frame for SW AP needs PS-poll, 2005.07.07, by rcnjko.
RCR_AB | RCR_AM | RCR_APM | //accept BC/MC/UC
RCR_AICV | RCR_ACRC32 | //accept ICV/CRC error packet
((u32)7<<RCR_MXDMA_OFFSET) | // Max DMA Burst Size per Rx DMA Burst, 7: unlimited.
(pHalData->EarlyRxThreshold<<RCR_FIFO_OFFSET) | // Rx FIFO Threshold, 7: No Rx threshold.
- (pHalData->EarlyRxThreshold == 7 ? RCR_OnlyErlPkt:0);
+ (pHalData->EarlyRxThreshold == 7 ? RCR_OnlyErlPkt : 0);
else
#endif
@@ -2742,10 +2494,9 @@ static void rtl8192_init_priv_variable(struct net_device* dev)
RCR_AMF | RCR_ADF | //accept management/data
RCR_ACF | //accept control frame for SW AP needs PS-poll, 2005.07.07, by rcnjko.
RCR_AB | RCR_AM | RCR_APM | //accept BC/MC/UC
- //RCR_AICV | RCR_ACRC32 | //accept ICV/CRC error packet
((u32)7<<RCR_MXDMA_OFFSET)| // Max DMA Burst Size per Rx DMA Burst, 7: unlimited.
(priv->EarlyRxThreshold<<RX_FIFO_THRESHOLD_SHIFT) | // Rx FIFO Threshold, 7: No Rx threshold.
- (priv->EarlyRxThreshold == 7 ? RCR_ONLYERLPKT:0);
+ (priv->EarlyRxThreshold == 7 ? RCR_ONLYERLPKT : 0);
priv->AcmControl = 0;
priv->pFirmware = kzalloc(sizeof(rt_firmware), GFP_KERNEL);
@@ -2755,26 +2506,22 @@ static void rtl8192_init_priv_variable(struct net_device* dev)
skb_queue_head_init(&priv->skb_queue);
/* Tx related queue */
- for(i = 0; i < MAX_QUEUE_SIZE; i++) {
- skb_queue_head_init(&priv->ieee80211->skb_waitQ [i]);
- }
- for(i = 0; i < MAX_QUEUE_SIZE; i++) {
- skb_queue_head_init(&priv->ieee80211->skb_aggQ [i]);
- }
- for(i = 0; i < MAX_QUEUE_SIZE; i++) {
- skb_queue_head_init(&priv->ieee80211->skb_drv_aggQ [i]);
- }
+ for (i = 0; i < MAX_QUEUE_SIZE; i++)
+ skb_queue_head_init(&priv->ieee80211->skb_waitQ[i]);
+ for (i = 0; i < MAX_QUEUE_SIZE; i++)
+ skb_queue_head_init(&priv->ieee80211->skb_aggQ[i]);
+ for (i = 0; i < MAX_QUEUE_SIZE; i++)
+ skb_queue_head_init(&priv->ieee80211->skb_drv_aggQ[i]);
priv->rf_set_chan = rtl8192_phy_SwChnl;
}
//init lock here
-static void rtl8192_init_priv_lock(struct r8192_priv* priv)
+static void rtl8192_init_priv_lock(struct r8192_priv *priv)
{
spin_lock_init(&priv->tx_lock);
spin_lock_init(&priv->irq_lock);//added by thomas
- //spin_lock_init(&priv->rf_lock);
- sema_init(&priv->wx_sem,1);
- sema_init(&priv->rf_sem,1);
+ sema_init(&priv->wx_sem, 1);
+ sema_init(&priv->rf_sem, 1);
mutex_init(&priv->mutex);
}
@@ -2783,7 +2530,7 @@ extern void rtl819x_watchdog_wqcallback(struct work_struct *work);
void rtl8192_irq_rx_tasklet(struct r8192_priv *priv);
//init tasklet and wait_queue here. only 2.6 above kernel is considered
#define DRV_NAME "wlan0"
-static void rtl8192_init_priv_task(struct net_device* dev)
+static void rtl8192_init_priv_task(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
@@ -2791,71 +2538,64 @@ static void rtl8192_init_priv_task(struct net_device* dev)
INIT_WORK(&priv->reset_wq, rtl8192_restart);
- //INIT_DELAYED_WORK(&priv->watch_dog_wq, hal_dm_watchdog);
INIT_DELAYED_WORK(&priv->watch_dog_wq, rtl819x_watchdog_wqcallback);
INIT_DELAYED_WORK(&priv->txpower_tracking_wq, dm_txpower_trackingcallback);
-// INIT_DELAYED_WORK(&priv->gpio_change_rf_wq, dm_gpio_change_rf_callback);
INIT_DELAYED_WORK(&priv->rfpath_check_wq, dm_rf_pathcheck_workitemcallback);
INIT_DELAYED_WORK(&priv->update_beacon_wq, rtl8192_update_beacon);
INIT_DELAYED_WORK(&priv->initialgain_operate_wq, InitialGainOperateWorkItemCallBack);
- //INIT_WORK(&priv->SwChnlWorkItem, rtl8192_SwChnl_WorkItem);
- //INIT_WORK(&priv->SetBWModeWorkItem, rtl8192_SetBWModeWorkItem);
INIT_WORK(&priv->qos_activate, rtl8192_qos_activate);
tasklet_init(&priv->irq_rx_tasklet,
- (void(*)(unsigned long))rtl8192_irq_rx_tasklet,
- (unsigned long)priv);
+ (void(*)(unsigned long))rtl8192_irq_rx_tasklet,
+ (unsigned long)priv);
}
-static void rtl8192_get_eeprom_size(struct net_device* dev)
+static void rtl8192_get_eeprom_size(struct net_device *dev)
{
u16 curCR = 0;
struct r8192_priv *priv = ieee80211_priv(dev);
- RT_TRACE(COMP_EPROM, "===========>%s()\n", __FUNCTION__);
- curCR = read_nic_word_E(dev,EPROM_CMD);
+ RT_TRACE(COMP_EPROM, "===========>%s()\n", __func__);
+ read_nic_word_E(dev, EPROM_CMD, &curCR);
RT_TRACE(COMP_EPROM, "read from Reg EPROM_CMD(%x):%x\n", EPROM_CMD, curCR);
//whether need I consider BIT5?
priv->epromtype = (curCR & Cmd9346CR_9356SEL) ? EPROM_93c56 : EPROM_93c46;
- RT_TRACE(COMP_EPROM, "<===========%s(), epromtype:%d\n", __FUNCTION__, priv->epromtype);
+ RT_TRACE(COMP_EPROM, "<===========%s(), epromtype:%d\n", __func__, priv->epromtype);
}
//used to swap endian. as ntohl & htonl are not necessary to swap endian, so use this instead.
-static inline u16 endian_swap(u16* data)
+static inline u16 endian_swap(u16 *data)
{
u16 tmp = *data;
*data = (tmp >> 8) | (tmp << 8);
return *data;
}
-static void rtl8192_read_eeprom_info(struct net_device* dev)
+static void rtl8192_read_eeprom_info(struct net_device *dev)
{
u16 wEPROM_ID = 0;
u8 bMac_Tmp_Addr[6] = {0x00, 0xe0, 0x4c, 0x00, 0x00, 0x02};
u8 bLoad_From_EEPOM = false;
struct r8192_priv *priv = ieee80211_priv(dev);
u16 tmpValue = 0;
- RT_TRACE(COMP_EPROM, "===========>%s()\n", __FUNCTION__);
+ int i;
+ RT_TRACE(COMP_EPROM, "===========>%s()\n", __func__);
wEPROM_ID = eprom_read(dev, 0); //first read EEPROM ID out;
RT_TRACE(COMP_EPROM, "EEPROM ID is 0x%x\n", wEPROM_ID);
- if (wEPROM_ID != RTL8190_EEPROM_ID)
- {
+ if (wEPROM_ID != RTL8190_EEPROM_ID) {
RT_TRACE(COMP_ERR, "EEPROM ID is invalid(is 0x%x(should be 0x%x)\n", wEPROM_ID, RTL8190_EEPROM_ID);
- }
- else
+ } else {
bLoad_From_EEPOM = true;
+ }
- if (bLoad_From_EEPOM)
- {
+ if (bLoad_From_EEPOM) {
tmpValue = eprom_read(dev, (EEPROM_VID>>1));
priv->eeprom_vid = endian_swap(&tmpValue);
priv->eeprom_pid = eprom_read(dev, (EEPROM_PID>>1));
tmpValue = eprom_read(dev, (EEPROM_ChannelPlan>>1));
- priv->eeprom_ChannelPlan =((tmpValue&0xff00)>>8);
+ priv->eeprom_ChannelPlan = ((tmpValue&0xff00)>>8);
priv->btxpowerdata_readfromEEPORM = true;
priv->eeprom_CustomerID = eprom_read(dev, (EEPROM_Customer_ID>>1)) >>8;
- }
- else
- {
+ } else {
priv->eeprom_vid = 0;
priv->eeprom_pid = 0;
priv->card_8192_version = VERSION_819xU_B;
@@ -2865,18 +2605,14 @@ static void rtl8192_read_eeprom_info(struct net_device* dev)
RT_TRACE(COMP_EPROM, "vid:0x%4x, pid:0x%4x, CustomID:0x%2x, ChanPlan:0x%x\n", priv->eeprom_vid, priv->eeprom_pid, priv->eeprom_CustomerID, priv->eeprom_ChannelPlan);
//set channelplan from eeprom
priv->ChannelPlan = priv->eeprom_ChannelPlan;
- if (bLoad_From_EEPOM)
- {
+ if (bLoad_From_EEPOM) {
int i;
- for (i=0; i<6; i+=2)
- {
+ for (i = 0; i < 6; i += 2) {
u16 tmp = 0;
tmp = eprom_read(dev, (u16)((EEPROM_NODE_ADDRESS_BYTE_0 + i)>>1));
- *(u16*)(&dev->dev_addr[i]) = tmp;
+ *(u16 *)(&dev->dev_addr[i]) = tmp;
}
- }
- else
- {
+ } else {
memcpy(dev->dev_addr, bMac_Tmp_Addr, 6);
//should I set IDR0 here?
}
@@ -2884,8 +2620,7 @@ static void rtl8192_read_eeprom_info(struct net_device* dev)
priv->rf_type = RTL819X_DEFAULT_RF_TYPE; //default 1T2R
priv->rf_chip = RF_8256;
- if (priv->card_8192_version == (u8)VERSION_819xU_A)
- {
+ if (priv->card_8192_version == (u8)VERSION_819xU_A) {
//read Tx power gain offset of legacy OFDM to HT rate
if (bLoad_From_EEPOM)
priv->EEPROMTxPowerDiff = (eprom_read(dev, (EEPROM_TxPowerDiff>>1))&0xff00) >> 8;
@@ -2918,51 +2653,45 @@ static void rtl8192_read_eeprom_info(struct net_device* dev)
else
priv->EEPROM_Def_Ver = 1;
RT_TRACE(COMP_EPROM, "EEPROM_DEF_VER:%d\n", priv->EEPROM_Def_Ver);
- if (priv->EEPROM_Def_Ver == 0) //old eeprom definition
- {
+ if (priv->EEPROM_Def_Ver == 0) { //old eeprom definition
int i;
if (bLoad_From_EEPOM)
priv->EEPROMTxPowerLevelCCK = (eprom_read(dev, (EEPROM_TxPwIndex_CCK>>1))&0xff) >> 8;
else
priv->EEPROMTxPowerLevelCCK = 0x10;
RT_TRACE(COMP_EPROM, "CCK Tx Power Levl: 0x%02x\n", priv->EEPROMTxPowerLevelCCK);
- for (i=0; i<3; i++)
- {
- if (bLoad_From_EEPOM)
- {
+ for (i = 0; i < 3; i++) {
+ if (bLoad_From_EEPOM) {
tmpValue = eprom_read(dev, (EEPROM_TxPwIndex_OFDM_24G+i)>>1);
if (((EEPROM_TxPwIndex_OFDM_24G+i) % 2) == 0)
tmpValue = tmpValue & 0x00ff;
else
tmpValue = (tmpValue & 0xff00) >> 8;
- }
- else
+ } else {
tmpValue = 0x10;
+ }
priv->EEPROMTxPowerLevelOFDM24G[i] = (u8) tmpValue;
RT_TRACE(COMP_EPROM, "OFDM 2.4G Tx Power Level, Index %d = 0x%02x\n", i, priv->EEPROMTxPowerLevelCCK);
}
- }//end if EEPROM_DEF_VER == 0
- else if (priv->EEPROM_Def_Ver == 1)
- {
- if (bLoad_From_EEPOM)
- {
+ } else if (priv->EEPROM_Def_Ver == 1) {
+ if (bLoad_From_EEPOM) {
tmpValue = eprom_read(dev, (EEPROM_TxPwIndex_CCK_V1>>1));
tmpValue = (tmpValue & 0xff00) >> 8;
- }
- else
+ } else {
tmpValue = 0x10;
+ }
priv->EEPROMTxPowerLevelCCK_V1[0] = (u8)tmpValue;
if (bLoad_From_EEPOM)
tmpValue = eprom_read(dev, (EEPROM_TxPwIndex_CCK_V1 + 2)>>1);
else
tmpValue = 0x1010;
- *((u16*)(&priv->EEPROMTxPowerLevelCCK_V1[1])) = tmpValue;
+ *((u16 *)(&priv->EEPROMTxPowerLevelCCK_V1[1])) = tmpValue;
if (bLoad_From_EEPOM)
tmpValue = eprom_read(dev, (EEPROM_TxPwIndex_OFDM_24G_V1>>1));
else
tmpValue = 0x1010;
- *((u16*)(&priv->EEPROMTxPowerLevelOFDM24G[0])) = tmpValue;
+ *((u16 *)(&priv->EEPROMTxPowerLevelOFDM24G[0])) = tmpValue;
if (bLoad_From_EEPOM)
tmpValue = eprom_read(dev, (EEPROM_TxPwIndex_OFDM_24G_V1+2)>>1);
else
@@ -2972,42 +2701,34 @@ static void rtl8192_read_eeprom_info(struct net_device* dev)
//update HAL variables
//
- {
- int i;
- for (i=0; i<14; i++)
- {
- if (i<=3)
- priv->TxPowerLevelOFDM24G[i] = priv->EEPROMTxPowerLevelOFDM24G[0];
- else if (i>=4 && i<=9)
- priv->TxPowerLevelOFDM24G[i] = priv->EEPROMTxPowerLevelOFDM24G[1];
- else
- priv->TxPowerLevelOFDM24G[i] = priv->EEPROMTxPowerLevelOFDM24G[2];
- }
+ for (i = 0; i < 14; i++) {
+ if (i <= 3)
+ priv->TxPowerLevelOFDM24G[i] = priv->EEPROMTxPowerLevelOFDM24G[0];
+ else if (i >= 4 && i <= 9)
+ priv->TxPowerLevelOFDM24G[i] = priv->EEPROMTxPowerLevelOFDM24G[1];
+ else
+ priv->TxPowerLevelOFDM24G[i] = priv->EEPROMTxPowerLevelOFDM24G[2];
+ }
- for (i=0; i<14; i++)
- {
- if (priv->EEPROM_Def_Ver == 0)
- {
- if (i<=3)
- priv->TxPowerLevelCCK[i] = priv->EEPROMTxPowerLevelOFDM24G[0] + (priv->EEPROMTxPowerLevelCCK - priv->EEPROMTxPowerLevelOFDM24G[1]);
- else if (i>=4 && i<=9)
- priv->TxPowerLevelCCK[i] = priv->EEPROMTxPowerLevelCCK;
- else
- priv->TxPowerLevelCCK[i] = priv->EEPROMTxPowerLevelOFDM24G[2] + (priv->EEPROMTxPowerLevelCCK - priv->EEPROMTxPowerLevelOFDM24G[1]);
- }
- else if (priv->EEPROM_Def_Ver == 1)
- {
- if (i<=3)
- priv->TxPowerLevelCCK[i] = priv->EEPROMTxPowerLevelCCK_V1[0];
- else if (i>=4 && i<=9)
- priv->TxPowerLevelCCK[i] = priv->EEPROMTxPowerLevelCCK_V1[1];
- else
- priv->TxPowerLevelCCK[i] = priv->EEPROMTxPowerLevelCCK_V1[2];
- }
+ for (i = 0; i < 14; i++) {
+ if (priv->EEPROM_Def_Ver == 0) {
+ if (i <= 3)
+ priv->TxPowerLevelCCK[i] = priv->EEPROMTxPowerLevelOFDM24G[0] + (priv->EEPROMTxPowerLevelCCK - priv->EEPROMTxPowerLevelOFDM24G[1]);
+ else if (i >= 4 && i <= 9)
+ priv->TxPowerLevelCCK[i] = priv->EEPROMTxPowerLevelCCK;
+ else
+ priv->TxPowerLevelCCK[i] = priv->EEPROMTxPowerLevelOFDM24G[2] + (priv->EEPROMTxPowerLevelCCK - priv->EEPROMTxPowerLevelOFDM24G[1]);
+ } else if (priv->EEPROM_Def_Ver == 1) {
+ if (i <= 3)
+ priv->TxPowerLevelCCK[i] = priv->EEPROMTxPowerLevelCCK_V1[0];
+ else if (i >= 4 && i <= 9)
+ priv->TxPowerLevelCCK[i] = priv->EEPROMTxPowerLevelCCK_V1[1];
+ else
+ priv->TxPowerLevelCCK[i] = priv->EEPROMTxPowerLevelCCK_V1[2];
}
- }//end update HAL variables
+ }
priv->TxPowerDiff = priv->EEPROMPwDiff;
-// Antenna B gain offset to antenna A, bit0~3
+ // Antenna B gain offset to antenna A, bit0~3
priv->AntennaTxPwDiff[0] = (priv->EEPROMTxPowerDiff & 0xf);
// Antenna C gain offset to antenna A, bit4~7
priv->AntennaTxPwDiff[1] = ((priv->EEPROMTxPowerDiff & 0xf0)>>4);
@@ -3018,46 +2739,41 @@ static void rtl8192_read_eeprom_info(struct net_device* dev)
priv->ThermalMeter[0] = priv->EEPROMThermalMeter;
}//end if VersionID == VERSION_819xU_A
-//added by vivi, for dlink led, 20080416
- switch(priv->eeprom_CustomerID)
- {
- case EEPROM_CID_RUNTOP:
- priv->CustomerID = RT_CID_819x_RUNTOP;
- break;
+ //added by vivi, for dlink led, 20080416
+ switch (priv->eeprom_CustomerID) {
+ case EEPROM_CID_RUNTOP:
+ priv->CustomerID = RT_CID_819x_RUNTOP;
+ break;
- case EEPROM_CID_DLINK:
- priv->CustomerID = RT_CID_DLINK;
- break;
+ case EEPROM_CID_DLINK:
+ priv->CustomerID = RT_CID_DLINK;
+ break;
- default:
- priv->CustomerID = RT_CID_DEFAULT;
- break;
+ default:
+ priv->CustomerID = RT_CID_DEFAULT;
+ break;
}
- switch(priv->CustomerID)
- {
- case RT_CID_819x_RUNTOP:
- priv->LedStrategy = SW_LED_MODE2;
- break;
+ switch (priv->CustomerID) {
+ case RT_CID_819x_RUNTOP:
+ priv->LedStrategy = SW_LED_MODE2;
+ break;
- case RT_CID_DLINK:
- priv->LedStrategy = SW_LED_MODE4;
- break;
+ case RT_CID_DLINK:
+ priv->LedStrategy = SW_LED_MODE4;
+ break;
- default:
- priv->LedStrategy = SW_LED_MODE0;
- break;
+ default:
+ priv->LedStrategy = SW_LED_MODE0;
+ break;
}
- if(priv->rf_type == RF_1T2R)
- {
+ if (priv->rf_type == RF_1T2R) {
RT_TRACE(COMP_EPROM, "\n1T2R config\n");
- }
- else
- {
+ } else {
RT_TRACE(COMP_EPROM, "\n2T4R config\n");
}
@@ -3066,18 +2782,18 @@ static void rtl8192_read_eeprom_info(struct net_device* dev)
init_rate_adaptive(dev);
//we need init DIG RATR table here again.
- RT_TRACE(COMP_EPROM, "<===========%s()\n", __FUNCTION__);
+ RT_TRACE(COMP_EPROM, "<===========%s()\n", __func__);
return;
}
-short rtl8192_get_channel_map(struct net_device * dev)
+short rtl8192_get_channel_map(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- if(priv->ChannelPlan > COUNTRY_CODE_GLOBAL_DOMAIN){
- printk("rtl8180_init:Error channel plan! Set to default.\n");
- priv->ChannelPlan= 0;
+ if (priv->ChannelPlan > COUNTRY_CODE_GLOBAL_DOMAIN) {
+ netdev_err(dev, "rtl8180_init: Error channel plan! Set to default.\n");
+ priv->ChannelPlan = 0;
}
- RT_TRACE(COMP_INIT, "Channel plan is %d\n",priv->ChannelPlan);
+ RT_TRACE(COMP_INIT, "Channel plan is %d\n", priv->ChannelPlan);
rtl819x_set_channel_map(priv->ChannelPlan, priv);
return 0;
@@ -3088,24 +2804,18 @@ short rtl8192_init(struct net_device *dev)
struct r8192_priv *priv = ieee80211_priv(dev);
- memset(&(priv->stats),0,sizeof(struct Stats));
- memset(priv->txqueue_to_outpipemap,0,9);
+ memset(&(priv->stats), 0, sizeof(struct Stats));
+ memset(priv->txqueue_to_outpipemap, 0, 9);
#ifdef PIPE12
{
- int i=0;
- u8 queuetopipe[]={3,2,1,0,4,8,7,6,5};
- memcpy(priv->txqueue_to_outpipemap,queuetopipe,9);
-/* for(i=0;i<9;i++)
- printk("%d ",priv->txqueue_to_outpipemap[i]);
- printk("\n");*/
+ int i = 0;
+ u8 queuetopipe[] = {3, 2, 1, 0, 4, 8, 7, 6, 5};
+ memcpy(priv->txqueue_to_outpipemap, queuetopipe, 9);
}
#else
{
- u8 queuetopipe[]={3,2,1,0,4,4,0,4,4};
- memcpy(priv->txqueue_to_outpipemap,queuetopipe,9);
-/* for(i=0;i<9;i++)
- printk("%d ",priv->txqueue_to_outpipemap[i]);
- printk("\n");*/
+ u8 queuetopipe[] = {3, 2, 1, 0, 4, 4, 0, 4, 4};
+ memcpy(priv->txqueue_to_outpipemap, queuetopipe, 9);
}
#endif
rtl8192_init_priv_variable(dev);
@@ -3118,12 +2828,11 @@ short rtl8192_init(struct net_device *dev)
init_timer(&priv->watch_dog_timer);
priv->watch_dog_timer.data = (unsigned long)dev;
priv->watch_dog_timer.function = watch_dog_timer_callback;
- if(rtl8192_usb_initendpoints(dev)!=0){
+ if (rtl8192_usb_initendpoints(dev) != 0) {
DMESG("Endopoints initialization failed");
return -ENOMEM;
}
- //rtl8192_adapter_start(dev);
#ifdef DEBUG_EPROM
dump_eprom(dev);
#endif
@@ -3138,16 +2847,16 @@ short rtl8192_init(struct net_device *dev)
* return: none
* notice: This part need to modified according to the rate set we filtered
* ****************************************************************************/
-void rtl8192_hwconfig(struct net_device* dev)
+void rtl8192_hwconfig(struct net_device *dev)
{
u32 regRATR = 0, regRRSR = 0;
u8 regBwOpMode = 0, regTmp = 0;
struct r8192_priv *priv = ieee80211_priv(dev);
+ u32 ratr_value = 0;
-// Set RRSR, RATR, and BW_OPMODE registers
+ // Set RRSR, RATR, and BW_OPMODE registers
//
- switch(priv->ieee80211->mode)
- {
+ switch (priv->ieee80211->mode) {
case WIRELESS_MODE_B:
regBwOpMode = BW_OPMODE_20MHZ;
regRATR = RATE_ALL_CCK;
@@ -3165,26 +2874,25 @@ void rtl8192_hwconfig(struct net_device* dev)
break;
case WIRELESS_MODE_AUTO:
#ifdef TO_DO_LIST
- if (Adapter->bInHctTest)
- {
- regBwOpMode = BW_OPMODE_20MHZ;
- regRATR = RATE_ALL_CCK | RATE_ALL_OFDM_AG;
- regRRSR = RATE_ALL_CCK | RATE_ALL_OFDM_AG;
+ if (Adapter->bInHctTest) {
+ regBwOpMode = BW_OPMODE_20MHZ;
+ regRATR = RATE_ALL_CCK | RATE_ALL_OFDM_AG;
+ regRRSR = RATE_ALL_CCK | RATE_ALL_OFDM_AG;
}
else
#endif
{
- regBwOpMode = BW_OPMODE_20MHZ;
- regRATR = RATE_ALL_CCK | RATE_ALL_OFDM_AG | RATE_ALL_OFDM_1SS | RATE_ALL_OFDM_2SS;
- regRRSR = RATE_ALL_CCK | RATE_ALL_OFDM_AG;
+ regBwOpMode = BW_OPMODE_20MHZ;
+ regRATR = RATE_ALL_CCK | RATE_ALL_OFDM_AG | RATE_ALL_OFDM_1SS | RATE_ALL_OFDM_2SS;
+ regRRSR = RATE_ALL_CCK | RATE_ALL_OFDM_AG;
}
break;
case WIRELESS_MODE_N_24G:
// It support CCK rate by default.
// CCK rate will be filtered out only when associated AP does not support it.
regBwOpMode = BW_OPMODE_20MHZ;
- regRATR = RATE_ALL_CCK | RATE_ALL_OFDM_AG | RATE_ALL_OFDM_1SS | RATE_ALL_OFDM_2SS;
- regRRSR = RATE_ALL_CCK | RATE_ALL_OFDM_AG;
+ regRATR = RATE_ALL_CCK | RATE_ALL_OFDM_AG | RATE_ALL_OFDM_1SS | RATE_ALL_OFDM_2SS;
+ regRRSR = RATE_ALL_CCK | RATE_ALL_OFDM_AG;
break;
case WIRELESS_MODE_N_5G:
regBwOpMode = BW_OPMODE_5G;
@@ -3194,17 +2902,12 @@ void rtl8192_hwconfig(struct net_device* dev)
}
write_nic_byte(dev, BW_OPMODE, regBwOpMode);
- {
- u32 ratr_value = 0;
- ratr_value = regRATR;
- if (priv->rf_type == RF_1T2R)
- {
- ratr_value &= ~(RATE_ALL_OFDM_2SS);
- }
- write_nic_dword(dev, RATR0, ratr_value);
- write_nic_byte(dev, UFWP, 1);
- }
- regTmp = read_nic_byte(dev, 0x313);
+ ratr_value = regRATR;
+ if (priv->rf_type == RF_1T2R)
+ ratr_value &= ~(RATE_ALL_OFDM_2SS);
+ write_nic_dword(dev, RATR0, ratr_value);
+ write_nic_byte(dev, UFWP, 1);
+ read_nic_byte(dev, 0x313, &regTmp);
regRRSR = ((regTmp) << 24) | (regRRSR & 0x00ffffff);
write_nic_dword(dev, RRSR, regRRSR);
@@ -3212,8 +2915,8 @@ void rtl8192_hwconfig(struct net_device* dev)
// Set Retry Limit here
//
write_nic_word(dev, RETRY_LIMIT,
- priv->ShortRetryLimit << RETRY_LIMIT_SHORT_SHIFT | \
- priv->LongRetryLimit << RETRY_LIMIT_LONG_SHIFT);
+ priv->ShortRetryLimit << RETRY_LIMIT_SHORT_SHIFT |
+ priv->LongRetryLimit << RETRY_LIMIT_LONG_SHIFT);
// Set Contention Window here
// Set Tx AGC
@@ -3232,7 +2935,9 @@ bool rtl8192_adapter_start(struct net_device *dev)
struct r8192_priv *priv = ieee80211_priv(dev);
u32 dwRegRead = 0;
bool init_status = true;
- RT_TRACE(COMP_INIT, "====>%s()\n", __FUNCTION__);
+ u8 SECR_value = 0x0;
+ u8 tmp;
+ RT_TRACE(COMP_INIT, "====>%s()\n", __func__);
priv->Rf_Mode = RF_OP_By_SW_3wire;
//for ASIC power on sequence
write_nic_byte_E(dev, 0x5f, 0x80);
@@ -3242,34 +2947,31 @@ bool rtl8192_adapter_start(struct net_device *dev)
write_nic_byte_E(dev, 0x5e, 0x80);
write_nic_byte(dev, 0x17, 0x37);
mdelay(10);
-//#ifdef TO_DO_LIST
priv->pFirmware->firmware_status = FW_STATUS_0_INIT;
//config CPUReset Register
//Firmware Reset or not?
- dwRegRead = read_nic_dword(dev, CPU_GEN);
+ read_nic_dword(dev, CPU_GEN, &dwRegRead);
if (priv->pFirmware->firmware_status == FW_STATUS_0_INIT)
dwRegRead |= CPU_GEN_SYSTEM_RESET; //do nothing here?
else if (priv->pFirmware->firmware_status == FW_STATUS_5_READY)
dwRegRead |= CPU_GEN_FIRMWARE_RESET;
else
- RT_TRACE(COMP_ERR, "ERROR in %s(): undefined firmware state(%d)\n", __FUNCTION__, priv->pFirmware->firmware_status);
+ RT_TRACE(COMP_ERR, "ERROR in %s(): undefined firmware state(%d)\n", __func__, priv->pFirmware->firmware_status);
write_nic_dword(dev, CPU_GEN, dwRegRead);
- //mdelay(30);
//config BB.
rtl8192_BBConfig(dev);
//Loopback mode or not
priv->LoopbackMode = RTL819xU_NO_LOOPBACK;
-// priv->LoopbackMode = RTL819xU_MAC_LOOPBACK;
- dwRegRead = read_nic_dword(dev, CPU_GEN);
+ read_nic_dword(dev, CPU_GEN, &dwRegRead);
if (priv->LoopbackMode == RTL819xU_NO_LOOPBACK)
dwRegRead = ((dwRegRead & CPU_GEN_NO_LOOPBACK_MSK) | CPU_GEN_NO_LOOPBACK_SET);
else if (priv->LoopbackMode == RTL819xU_MAC_LOOPBACK)
dwRegRead |= CPU_CCK_LOOPBACK;
else
- RT_TRACE(COMP_ERR, "Serious error in %s(): wrong loopback mode setting(%d)\n", __FUNCTION__, priv->LoopbackMode);
+ RT_TRACE(COMP_ERR, "Serious error in %s(): wrong loopback mode setting(%d)\n", __func__, priv->LoopbackMode);
write_nic_dword(dev, CPU_GEN, dwRegRead);
@@ -3277,7 +2979,8 @@ bool rtl8192_adapter_start(struct net_device *dev)
udelay(500);
//xiong add for new bitfile:usb suspend reset pin set to 1. //do we need?
- write_nic_byte_E(dev, 0x5f, (read_nic_byte_E(dev, 0x5f)|0x20));
+ read_nic_byte_E(dev, 0x5f, &tmp);
+ write_nic_byte_E(dev, 0x5f, tmp|0x20);
//Set Hardware
rtl8192_hwconfig(dev);
@@ -3286,61 +2989,54 @@ bool rtl8192_adapter_start(struct net_device *dev)
write_nic_byte(dev, CMDR, CR_RE|CR_TE);
//set IDR0 here
- write_nic_dword(dev, MAC0, ((u32*)dev->dev_addr)[0]);
- write_nic_word(dev, MAC4, ((u16*)(dev->dev_addr + 4))[0]);
+ write_nic_dword(dev, MAC0, ((u32 *)dev->dev_addr)[0]);
+ write_nic_word(dev, MAC4, ((u16 *)(dev->dev_addr + 4))[0]);
//set RCR
write_nic_dword(dev, RCR, priv->ReceiveConfig);
//Initialize Number of Reserved Pages in Firmware Queue
- write_nic_dword(dev, RQPN1, NUM_OF_PAGE_IN_FW_QUEUE_BK << RSVD_FW_QUEUE_PAGE_BK_SHIFT |\
- NUM_OF_PAGE_IN_FW_QUEUE_BE << RSVD_FW_QUEUE_PAGE_BE_SHIFT | \
- NUM_OF_PAGE_IN_FW_QUEUE_VI << RSVD_FW_QUEUE_PAGE_VI_SHIFT | \
- NUM_OF_PAGE_IN_FW_QUEUE_VO <<RSVD_FW_QUEUE_PAGE_VO_SHIFT);
- write_nic_dword(dev, RQPN2, NUM_OF_PAGE_IN_FW_QUEUE_MGNT << RSVD_FW_QUEUE_PAGE_MGNT_SHIFT |\
- NUM_OF_PAGE_IN_FW_QUEUE_CMD << RSVD_FW_QUEUE_PAGE_CMD_SHIFT);
- write_nic_dword(dev, RQPN3, APPLIED_RESERVED_QUEUE_IN_FW| \
- NUM_OF_PAGE_IN_FW_QUEUE_BCN<<RSVD_FW_QUEUE_PAGE_BCN_SHIFT
-// | NUM_OF_PAGE_IN_FW_QUEUE_PUB<<RSVD_FW_QUEUE_PAGE_PUB_SHIFT
- );
+ write_nic_dword(dev, RQPN1, NUM_OF_PAGE_IN_FW_QUEUE_BK << RSVD_FW_QUEUE_PAGE_BK_SHIFT |
+ NUM_OF_PAGE_IN_FW_QUEUE_BE << RSVD_FW_QUEUE_PAGE_BE_SHIFT |
+ NUM_OF_PAGE_IN_FW_QUEUE_VI << RSVD_FW_QUEUE_PAGE_VI_SHIFT |
+ NUM_OF_PAGE_IN_FW_QUEUE_VO <<RSVD_FW_QUEUE_PAGE_VO_SHIFT);
+ write_nic_dword(dev, RQPN2, NUM_OF_PAGE_IN_FW_QUEUE_MGNT << RSVD_FW_QUEUE_PAGE_MGNT_SHIFT |
+ NUM_OF_PAGE_IN_FW_QUEUE_CMD << RSVD_FW_QUEUE_PAGE_CMD_SHIFT);
+ write_nic_dword(dev, RQPN3, APPLIED_RESERVED_QUEUE_IN_FW|
+ NUM_OF_PAGE_IN_FW_QUEUE_BCN<<RSVD_FW_QUEUE_PAGE_BCN_SHIFT);
write_nic_dword(dev, RATR0+4*7, (RATE_ALL_OFDM_AG | RATE_ALL_CCK));
//Set AckTimeout
// TODO: (it value is only for FPGA version). need to be changed!!2006.12.18, by Emily
write_nic_byte(dev, ACK_TIMEOUT, 0x30);
-// RT_TRACE(COMP_INIT, "%s():priv->ResetProgress is %d\n", __FUNCTION__,priv->ResetProgress);
- if(priv->ResetProgress == RESET_TYPE_NORESET)
- rtl8192_SetWirelessMode(dev, priv->ieee80211->mode);
- if(priv->ResetProgress == RESET_TYPE_NORESET){
- CamResetAllEntry(dev);
- {
- u8 SECR_value = 0x0;
+ if (priv->ResetProgress == RESET_TYPE_NORESET)
+ rtl8192_SetWirelessMode(dev, priv->ieee80211->mode);
+ if (priv->ResetProgress == RESET_TYPE_NORESET) {
+ CamResetAllEntry(dev);
SECR_value |= SCR_TxEncEnable;
SECR_value |= SCR_RxDecEnable;
SECR_value |= SCR_NoSKMC;
write_nic_byte(dev, SECR, SECR_value);
}
- }
//Beacon related
write_nic_word(dev, ATIMWND, 2);
write_nic_word(dev, BCN_INTERVAL, 100);
- {
#define DEFAULT_EDCA 0x005e4332
+ {
int i;
- for (i=0; i<QOS_QUEUE_NUM; i++)
- write_nic_dword(dev, WDCAPARA_ADD[i], DEFAULT_EDCA);
+ for (i = 0; i < QOS_QUEUE_NUM; i++)
+ write_nic_dword(dev, WDCAPARA_ADD[i], DEFAULT_EDCA);
}
#ifdef USB_RX_AGGREGATION_SUPPORT
//3 For usb rx firmware aggregation control
- if(priv->ResetProgress == RESET_TYPE_NORESET)
- {
+ if (priv->ResetProgress == RESET_TYPE_NORESET) {
u32 ulValue;
PRT_HIGH_THROUGHPUT pHTInfo = priv->ieee80211->pHTInfo;
ulValue = (pHTInfo->UsbRxFwAggrEn<<24) | (pHTInfo->UsbRxFwAggrPageNum<<16) |
- (pHTInfo->UsbRxFwAggrPacketNum<<8) | (pHTInfo->UsbRxFwAggrTimeout);
+ (pHTInfo->UsbRxFwAggrPacketNum<<8) | (pHTInfo->UsbRxFwAggrTimeout);
/*
* If usb rx firmware aggregation is enabled,
* when anyone of three threshold conditions above is reached,
@@ -3353,63 +3049,52 @@ bool rtl8192_adapter_start(struct net_device *dev)
rtl8192_phy_configmac(dev);
- if (priv->card_8192_version == (u8) VERSION_819xU_A)
- {
+ if (priv->card_8192_version == (u8) VERSION_819xU_A) {
rtl8192_phy_getTxPower(dev);
rtl8192_phy_setTxPower(dev, priv->chan);
}
//Firmware download
init_status = init_firmware(dev);
- if(!init_status)
- {
- RT_TRACE(COMP_ERR,"ERR!!! %s(): Firmware download is failed\n", __FUNCTION__);
+ if (!init_status) {
+ RT_TRACE(COMP_ERR, "ERR!!! %s(): Firmware download is failed\n", __func__);
return init_status;
}
- RT_TRACE(COMP_INIT, "%s():after firmware download\n", __FUNCTION__);
+ RT_TRACE(COMP_INIT, "%s():after firmware download\n", __func__);
//
#ifdef TO_DO_LIST
-if(Adapter->ResetProgress == RESET_TYPE_NORESET)
- {
- if(pMgntInfo->RegRfOff == TRUE)
- { // User disable RF via registry.
+ if (Adapter->ResetProgress == RESET_TYPE_NORESET) {
+ if (pMgntInfo->RegRfOff == TRUE) { // User disable RF via registry.
RT_TRACE((COMP_INIT|COMP_RF), DBG_LOUD, ("InitializeAdapter819xUsb(): Turn off RF for RegRfOff ----------\n"));
MgntActSet_RF_State(Adapter, eRfOff, RF_CHANGE_BY_SW);
// Those actions will be discard in MgntActSet_RF_State because of the same state
- for(eRFPath = 0; eRFPath <pHalData->NumTotalRFPath; eRFPath++)
+ for (eRFPath = 0; eRFPath < pHalData->NumTotalRFPath; eRFPath++)
PHY_SetRFReg(Adapter, (RF90_RADIO_PATH_E)eRFPath, 0x4, 0xC00, 0x0);
- }
- else if(pMgntInfo->RfOffReason > RF_CHANGE_BY_PS)
- { // H/W or S/W RF OFF before sleep.
+ } else if (pMgntInfo->RfOffReason > RF_CHANGE_BY_PS) { // H/W or S/W RF OFF before sleep.
RT_TRACE((COMP_INIT|COMP_RF), DBG_LOUD, ("InitializeAdapter819xUsb(): Turn off RF for RfOffReason(%d) ----------\n", pMgntInfo->RfOffReason));
MgntActSet_RF_State(Adapter, eRfOff, pMgntInfo->RfOffReason);
- }
- else
- {
+ } else {
pHalData->eRFPowerState = eRfOn;
pMgntInfo->RfOffReason = 0;
RT_TRACE((COMP_INIT|COMP_RF), DBG_LOUD, ("InitializeAdapter819xUsb(): RF is on ----------\n"));
}
- }
- else
- {
- if(pHalData->eRFPowerState == eRfOff)
- {
+ } else {
+ if (pHalData->eRFPowerState == eRfOff) {
MgntActSet_RF_State(Adapter, eRfOff, pMgntInfo->RfOffReason);
// Those actions will be discard in MgntActSet_RF_State because of the same state
- for(eRFPath = 0; eRFPath <pHalData->NumTotalRFPath; eRFPath++)
+ for (eRFPath = 0; eRFPath < pHalData->NumTotalRFPath; eRFPath++)
PHY_SetRFReg(Adapter, (RF90_RADIO_PATH_E)eRFPath, 0x4, 0xC00, 0x0);
}
}
#endif
//config RF.
- if(priv->ResetProgress == RESET_TYPE_NORESET){
- rtl8192_phy_RFConfig(dev);
- RT_TRACE(COMP_INIT, "%s():after phy RF config\n", __FUNCTION__);
+ if (priv->ResetProgress == RESET_TYPE_NORESET) {
+ rtl8192_phy_RFConfig(dev);
+ RT_TRACE(COMP_INIT, "%s():after phy RF config\n", __func__);
}
- if(priv->ieee80211->FwRWRF)
+ if (priv->ieee80211->FwRWRF)
// We can force firmware to do RF-R/W
priv->Rf_Mode = RF_OP_By_FW;
else
@@ -3421,54 +3106,44 @@ if(Adapter->ResetProgress == RESET_TYPE_NORESET)
rtl8192_setBBreg(dev, rFPGA0_RFMOD, bCCKEn, 0x1);
rtl8192_setBBreg(dev, rFPGA0_RFMOD, bOFDMEn, 0x1);
- if(priv->ResetProgress == RESET_TYPE_NORESET)
- {
+ if (priv->ResetProgress == RESET_TYPE_NORESET) {
//if D or C cut
- u8 tmpvalue = read_nic_byte(dev, 0x301);
- if(tmpvalue ==0x03)
- {
+ u8 tmpvalue;
+ read_nic_byte(dev, 0x301, &tmpvalue);
+ if (tmpvalue == 0x03) {
priv->bDcut = TRUE;
RT_TRACE(COMP_POWER_TRACKING, "D-cut\n");
- }
- else
- {
+ } else {
priv->bDcut = FALSE;
RT_TRACE(COMP_POWER_TRACKING, "C-cut\n");
}
dm_initialize_txpower_tracking(dev);
- if(priv->bDcut == TRUE)
- {
+ if (priv->bDcut == TRUE) {
u32 i, TempCCk;
- u32 tmpRegA= rtl8192_QueryBBReg(dev,rOFDM0_XATxIQImbalance,bMaskDWord);
- // u32 tmpRegC= rtl8192_QueryBBReg(dev,rOFDM0_XCTxIQImbalance,bMaskDWord);
- for(i = 0; i<TxBBGainTableLength; i++)
- {
- if(tmpRegA == priv->txbbgain_table[i].txbbgain_value)
- {
- priv->rfa_txpowertrackingindex= (u8)i;
- priv->rfa_txpowertrackingindex_real= (u8)i;
- priv->rfa_txpowertracking_default= priv->rfa_txpowertrackingindex;
+ u32 tmpRegA = rtl8192_QueryBBReg(dev, rOFDM0_XATxIQImbalance, bMaskDWord);
+ for (i = 0; i < TxBBGainTableLength; i++) {
+ if (tmpRegA == priv->txbbgain_table[i].txbbgain_value) {
+ priv->rfa_txpowertrackingindex = (u8)i;
+ priv->rfa_txpowertrackingindex_real = (u8)i;
+ priv->rfa_txpowertracking_default = priv->rfa_txpowertrackingindex;
break;
}
}
TempCCk = rtl8192_QueryBBReg(dev, rCCK0_TxFilter1, bMaskByte2);
- for(i=0 ; i<CCKTxBBGainTableLength ; i++)
- {
+ for (i = 0; i < CCKTxBBGainTableLength; i++) {
- if(TempCCk == priv->cck_txbbgain_table[i].ccktxbb_valuearray[0])
- {
- priv->cck_present_attentuation_20Mdefault=(u8) i;
+ if (TempCCk == priv->cck_txbbgain_table[i].ccktxbb_valuearray[0]) {
+ priv->cck_present_attentuation_20Mdefault = (u8) i;
break;
}
}
- priv->cck_present_attentuation_40Mdefault= 0;
- priv->cck_present_attentuation_difference= 0;
+ priv->cck_present_attentuation_40Mdefault = 0;
+ priv->cck_present_attentuation_difference = 0;
priv->cck_present_attentuation = priv->cck_present_attentuation_20Mdefault;
- // pMgntInfo->bTXPowerTracking = FALSE;//TEMPLY DISABLE
}
}
write_nic_byte(dev, 0x87, 0x0);
@@ -3492,16 +3167,14 @@ static struct net_device_stats *rtl8192_stats(struct net_device *dev)
return &priv->ieee80211->stats;
}
-bool
-HalTxCheckStuck819xUsb(
- struct net_device *dev
- )
+bool HalTxCheckStuck819xUsb(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- u16 RegTxCounter = read_nic_word(dev, 0x128);
+ u16 RegTxCounter;
bool bStuck = FALSE;
- RT_TRACE(COMP_RESET,"%s():RegTxCounter is %d,TxCounter is %d\n",__FUNCTION__,RegTxCounter,priv->TxCounter);
- if(priv->TxCounter==RegTxCounter)
+ read_nic_word(dev, 0x128, &RegTxCounter);
+ RT_TRACE(COMP_RESET, "%s():RegTxCounter is %d,TxCounter is %d\n", __func__, RegTxCounter, priv->TxCounter);
+ if (priv->TxCounter == RegTxCounter)
bStuck = TRUE;
priv->TxCounter = RegTxCounter;
@@ -3513,43 +3186,30 @@ HalTxCheckStuck819xUsb(
* <Assumption: RT_TX_SPINLOCK is acquired.>
* First added: 2006.11.19 by emily
*/
-RESET_TYPE
-TxCheckStuck(struct net_device *dev)
+RESET_TYPE TxCheckStuck(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
u8 QueueID;
-// PRT_TCB pTcb;
-// u8 ResetThreshold;
bool bCheckFwTxCnt = false;
- //unsigned long flags;
//
// Decide such threshold according to current power save mode
//
-// RT_TRACE(COMP_RESET, " ==> TxCheckStuck()\n");
-// PlatformAcquireSpinLock(Adapter, RT_TX_SPINLOCK);
-// spin_lock_irqsave(&priv->ieee80211->lock,flags);
- for (QueueID = 0; QueueID<=BEACON_QUEUE;QueueID ++)
- {
- if(QueueID == TXCMD_QUEUE)
- continue;
+ for (QueueID = 0; QueueID <= BEACON_QUEUE; QueueID++) {
+ if (QueueID == TXCMD_QUEUE)
+ continue;
#ifdef USB_TX_DRIVER_AGGREGATION_ENABLE
- if((skb_queue_len(&priv->ieee80211->skb_waitQ[QueueID]) == 0) && (skb_queue_len(&priv->ieee80211->skb_aggQ[QueueID]) == 0) && (skb_queue_len(&priv->ieee80211->skb_drv_aggQ[QueueID]) == 0))
+ if ((skb_queue_len(&priv->ieee80211->skb_waitQ[QueueID]) == 0) && (skb_queue_len(&priv->ieee80211->skb_aggQ[QueueID]) == 0) && (skb_queue_len(&priv->ieee80211->skb_drv_aggQ[QueueID]) == 0))
#else
- if((skb_queue_len(&priv->ieee80211->skb_waitQ[QueueID]) == 0) && (skb_queue_len(&priv->ieee80211->skb_aggQ[QueueID]) == 0))
+ if ((skb_queue_len(&priv->ieee80211->skb_waitQ[QueueID]) == 0) && (skb_queue_len(&priv->ieee80211->skb_aggQ[QueueID]) == 0))
#endif
continue;
- bCheckFwTxCnt = true;
- }
-// PlatformReleaseSpinLock(Adapter, RT_TX_SPINLOCK);
-// spin_unlock_irqrestore(&priv->ieee80211->lock,flags);
-// RT_TRACE(COMP_RESET,"bCheckFwTxCnt is %d\n",bCheckFwTxCnt);
- if(bCheckFwTxCnt)
- {
- if(HalTxCheckStuck819xUsb(dev))
- {
+ bCheckFwTxCnt = true;
+ }
+ if (bCheckFwTxCnt) {
+ if (HalTxCheckStuck819xUsb(dev)) {
RT_TRACE(COMP_RESET, "TxCheckStuck(): Fw indicates no Tx condition! \n");
return RESET_TYPE_SILENT;
}
@@ -3557,64 +3217,41 @@ TxCheckStuck(struct net_device *dev)
return RESET_TYPE_NORESET;
}
-bool
-HalRxCheckStuck819xUsb(struct net_device *dev)
+bool HalRxCheckStuck819xUsb(struct net_device *dev)
{
- u16 RegRxCounter = read_nic_word(dev, 0x130);
+ u16 RegRxCounter;
struct r8192_priv *priv = ieee80211_priv(dev);
bool bStuck = FALSE;
static u8 rx_chk_cnt;
- RT_TRACE(COMP_RESET,"%s(): RegRxCounter is %d,RxCounter is %d\n",__FUNCTION__,RegRxCounter,priv->RxCounter);
+ read_nic_word(dev, 0x130, &RegRxCounter);
+ RT_TRACE(COMP_RESET, "%s(): RegRxCounter is %d,RxCounter is %d\n", __func__, RegRxCounter, priv->RxCounter);
// If rssi is small, we should check rx for long time because of bad rx.
// or maybe it will continuous silent reset every 2 seconds.
rx_chk_cnt++;
- if(priv->undecorated_smoothed_pwdb >= (RateAdaptiveTH_High+5))
- {
+ if (priv->undecorated_smoothed_pwdb >= (RateAdaptiveTH_High+5)) {
rx_chk_cnt = 0; //high rssi, check rx stuck right now.
- }
- else if(priv->undecorated_smoothed_pwdb < (RateAdaptiveTH_High+5) &&
- ((priv->CurrentChannelBW!=HT_CHANNEL_WIDTH_20&&priv->undecorated_smoothed_pwdb>=RateAdaptiveTH_Low_40M) ||
- (priv->CurrentChannelBW==HT_CHANNEL_WIDTH_20&&priv->undecorated_smoothed_pwdb>=RateAdaptiveTH_Low_20M)) )
- {
- if(rx_chk_cnt < 2)
- {
+ } else if (priv->undecorated_smoothed_pwdb < (RateAdaptiveTH_High+5) &&
+ ((priv->CurrentChannelBW != HT_CHANNEL_WIDTH_20 && priv->undecorated_smoothed_pwdb >= RateAdaptiveTH_Low_40M) ||
+ (priv->CurrentChannelBW == HT_CHANNEL_WIDTH_20 && priv->undecorated_smoothed_pwdb >= RateAdaptiveTH_Low_20M))) {
+ if (rx_chk_cnt < 2)
return bStuck;
- }
else
- {
rx_chk_cnt = 0;
- }
- }
- else if(((priv->CurrentChannelBW!=HT_CHANNEL_WIDTH_20&&priv->undecorated_smoothed_pwdb<RateAdaptiveTH_Low_40M) ||
- (priv->CurrentChannelBW==HT_CHANNEL_WIDTH_20&&priv->undecorated_smoothed_pwdb<RateAdaptiveTH_Low_20M)) &&
- priv->undecorated_smoothed_pwdb >= VeryLowRSSI)
- {
- if(rx_chk_cnt < 4)
- {
- //DbgPrint("RSSI < %d && RSSI >= %d, no check this time \n", RateAdaptiveTH_Low, VeryLowRSSI);
+ } else if (((priv->CurrentChannelBW != HT_CHANNEL_WIDTH_20 && priv->undecorated_smoothed_pwdb < RateAdaptiveTH_Low_40M) ||
+ (priv->CurrentChannelBW == HT_CHANNEL_WIDTH_20 && priv->undecorated_smoothed_pwdb < RateAdaptiveTH_Low_20M)) &&
+ priv->undecorated_smoothed_pwdb >= VeryLowRSSI) {
+ if (rx_chk_cnt < 4)
return bStuck;
- }
else
- {
rx_chk_cnt = 0;
- //DbgPrint("RSSI < %d && RSSI >= %d, check this time \n", RateAdaptiveTH_Low, VeryLowRSSI);
- }
- }
- else
- {
- if(rx_chk_cnt < 8)
- {
- //DbgPrint("RSSI <= %d, no check this time \n", VeryLowRSSI);
+ } else {
+ if (rx_chk_cnt < 8)
return bStuck;
- }
else
- {
rx_chk_cnt = 0;
- //DbgPrint("RSSI <= %d, check this time \n", VeryLowRSSI);
- }
}
- if(priv->RxCounter==RegRxCounter)
+ if (priv->RxCounter == RegRxCounter)
bStuck = TRUE;
priv->RxCounter = RegRxCounter;
@@ -3622,25 +3259,16 @@ HalRxCheckStuck819xUsb(struct net_device *dev)
return bStuck;
}
-RESET_TYPE
-RxCheckStuck(struct net_device *dev)
+RESET_TYPE RxCheckStuck(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- //int i;
bool bRxCheck = FALSE;
-// RT_TRACE(COMP_RESET," ==> RxCheckStuck()\n");
- //PlatformAcquireSpinLock(Adapter, RT_RX_SPINLOCK);
-
- if(priv->IrpPendingCount > 1)
+ if (priv->IrpPendingCount > 1)
bRxCheck = TRUE;
- //PlatformReleaseSpinLock(Adapter, RT_RX_SPINLOCK);
-// RT_TRACE(COMP_RESET,"bRxCheck is %d \n",bRxCheck);
- if(bRxCheck)
- {
- if(HalRxCheckStuck819xUsb(dev))
- {
+ if (bRxCheck) {
+ if (HalRxCheckStuck819xUsb(dev)) {
RT_TRACE(COMP_RESET, "RxStuck Condition\n");
return RESET_TYPE_SILENT;
}
@@ -3661,8 +3289,7 @@ RxCheckStuck(struct net_device *dev)
*
* 8185 and 8185b does not implement this function. This is added by Emily at 2006.11.24
*/
-RESET_TYPE
-rtl819x_ifcheck_resetornot(struct net_device *dev)
+RESET_TYPE rtl819x_ifcheck_resetornot(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
RESET_TYPE TxResetType = RESET_TYPE_NORESET;
@@ -3672,10 +3299,8 @@ rtl819x_ifcheck_resetornot(struct net_device *dev)
rfState = priv->ieee80211->eRFPowerState;
TxResetType = TxCheckStuck(dev);
- if( rfState != eRfOff ||
- /*ADAPTER_TEST_STATUS_FLAG(Adapter, ADAPTER_STATUS_FW_DOWNLOAD_FAILURE)) &&*/
- (priv->ieee80211->iw_mode != IW_MODE_ADHOC))
- {
+ if (rfState != eRfOff ||
+ (priv->ieee80211->iw_mode != IW_MODE_ADHOC)) {
// If driver is in the status of firmware download failure , driver skips RF initialization and RF is
// in turned off state. Driver should check whether Rx stuck and do silent reset. And
// if driver is in firmware download failure status, driver should initialize RF in the following
@@ -3686,155 +3311,91 @@ rtl819x_ifcheck_resetornot(struct net_device *dev)
// set, STA cannot hear any packet at all. Emily, 2008.04.12
RxResetType = RxCheckStuck(dev);
}
- if(TxResetType==RESET_TYPE_NORMAL || RxResetType==RESET_TYPE_NORMAL)
+ if (TxResetType == RESET_TYPE_NORMAL || RxResetType == RESET_TYPE_NORMAL) {
return RESET_TYPE_NORMAL;
- else if(TxResetType==RESET_TYPE_SILENT || RxResetType==RESET_TYPE_SILENT){
- RT_TRACE(COMP_RESET,"%s():silent reset\n",__FUNCTION__);
+ } else if (TxResetType == RESET_TYPE_SILENT || RxResetType == RESET_TYPE_SILENT) {
+ RT_TRACE(COMP_RESET, "%s():silent reset\n", __func__);
return RESET_TYPE_SILENT;
- }
- else
+ } else {
return RESET_TYPE_NORESET;
+ }
}
-void rtl8192_cancel_deferred_work(struct r8192_priv* priv);
+void rtl8192_cancel_deferred_work(struct r8192_priv *priv);
int _rtl8192_up(struct net_device *dev);
int rtl8192_close(struct net_device *dev);
-void
-CamRestoreAllEntry( struct net_device *dev)
+void CamRestoreAllEntry(struct net_device *dev)
{
u8 EntryId = 0;
struct r8192_priv *priv = ieee80211_priv(dev);
- u8* MacAddr = priv->ieee80211->current_network.bssid;
+ u8 *MacAddr = priv->ieee80211->current_network.bssid;
static u8 CAM_CONST_ADDR[4][6] = {
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x02},
- {0x00, 0x00, 0x00, 0x00, 0x00, 0x03}};
- static u8 CAM_CONST_BROAD[] =
- {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ {0x00, 0x00, 0x00, 0x00, 0x00, 0x03} };
+ static u8 CAM_CONST_BROAD[] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
RT_TRACE(COMP_SEC, "CamRestoreAllEntry: \n");
- if ((priv->ieee80211->pairwise_key_type == KEY_TYPE_WEP40)||
- (priv->ieee80211->pairwise_key_type == KEY_TYPE_WEP104))
- {
+ if ((priv->ieee80211->pairwise_key_type == KEY_TYPE_WEP40) ||
+ (priv->ieee80211->pairwise_key_type == KEY_TYPE_WEP104)) {
- for(EntryId=0; EntryId<4; EntryId++)
- {
- {
- MacAddr = CAM_CONST_ADDR[EntryId];
- setKey(dev,
- EntryId ,
- EntryId,
- priv->ieee80211->pairwise_key_type,
- MacAddr,
- 0,
- NULL);
- }
+ for (EntryId = 0; EntryId < 4; EntryId++) {
+ MacAddr = CAM_CONST_ADDR[EntryId];
+ setKey(dev, EntryId, EntryId,
+ priv->ieee80211->pairwise_key_type,
+ MacAddr, 0, NULL);
}
- }
- else if(priv->ieee80211->pairwise_key_type == KEY_TYPE_TKIP)
- {
+ } else if (priv->ieee80211->pairwise_key_type == KEY_TYPE_TKIP) {
- {
- if(priv->ieee80211->iw_mode == IW_MODE_ADHOC)
- setKey(dev,
- 4,
- 0,
- priv->ieee80211->pairwise_key_type,
- (u8*)dev->dev_addr,
- 0,
- NULL);
- else
- setKey(dev,
- 4,
- 0,
- priv->ieee80211->pairwise_key_type,
- MacAddr,
- 0,
- NULL);
- }
- }
- else if(priv->ieee80211->pairwise_key_type == KEY_TYPE_CCMP)
- {
+ if (priv->ieee80211->iw_mode == IW_MODE_ADHOC)
+ setKey(dev, 4, 0, priv->ieee80211->pairwise_key_type,
+ (u8 *)dev->dev_addr, 0, NULL);
+ else
+ setKey(dev, 4, 0, priv->ieee80211->pairwise_key_type,
+ MacAddr, 0, NULL);
+ } else if (priv->ieee80211->pairwise_key_type == KEY_TYPE_CCMP) {
- {
- if(priv->ieee80211->iw_mode == IW_MODE_ADHOC)
- setKey(dev,
- 4,
- 0,
- priv->ieee80211->pairwise_key_type,
- (u8*)dev->dev_addr,
- 0,
- NULL);
- else
- setKey(dev,
- 4,
- 0,
- priv->ieee80211->pairwise_key_type,
- MacAddr,
- 0,
- NULL);
- }
+ if (priv->ieee80211->iw_mode == IW_MODE_ADHOC)
+ setKey(dev, 4, 0, priv->ieee80211->pairwise_key_type,
+ (u8 *)dev->dev_addr, 0, NULL);
+ else
+ setKey(dev, 4, 0, priv->ieee80211->pairwise_key_type,
+ MacAddr, 0, NULL);
}
- if(priv->ieee80211->group_key_type == KEY_TYPE_TKIP)
- {
+ if (priv->ieee80211->group_key_type == KEY_TYPE_TKIP) {
MacAddr = CAM_CONST_BROAD;
- for(EntryId=1 ; EntryId<4 ; EntryId++)
- {
- {
- setKey(dev,
- EntryId,
- EntryId,
- priv->ieee80211->group_key_type,
- MacAddr,
- 0,
- NULL);
- }
+ for (EntryId = 1; EntryId < 4; EntryId++) {
+ setKey(dev, EntryId, EntryId,
+ priv->ieee80211->group_key_type,
+ MacAddr, 0, NULL);
}
- if(priv->ieee80211->iw_mode == IW_MODE_ADHOC)
- setKey(dev,
- 0,
- 0,
- priv->ieee80211->group_key_type,
- CAM_CONST_ADDR[0],
- 0,
- NULL);
- }
- else if(priv->ieee80211->group_key_type == KEY_TYPE_CCMP)
- {
+ if (priv->ieee80211->iw_mode == IW_MODE_ADHOC)
+ setKey(dev, 0, 0, priv->ieee80211->group_key_type,
+ CAM_CONST_ADDR[0], 0, NULL);
+ } else if (priv->ieee80211->group_key_type == KEY_TYPE_CCMP) {
MacAddr = CAM_CONST_BROAD;
- for(EntryId=1; EntryId<4 ; EntryId++)
- {
- {
- setKey(dev,
- EntryId ,
- EntryId,
- priv->ieee80211->group_key_type,
- MacAddr,
- 0,
- NULL);
- }
+ for (EntryId = 1; EntryId < 4; EntryId++) {
+ setKey(dev, EntryId, EntryId,
+ priv->ieee80211->group_key_type,
+ MacAddr, 0, NULL);
}
- if(priv->ieee80211->iw_mode == IW_MODE_ADHOC)
- setKey(dev,
- 0 ,
- 0,
- priv->ieee80211->group_key_type,
- CAM_CONST_ADDR[0],
- 0,
- NULL);
+ if (priv->ieee80211->iw_mode == IW_MODE_ADHOC)
+ setKey(dev, 0, 0, priv->ieee80211->group_key_type,
+ CAM_CONST_ADDR[0], 0, NULL);
}
}
//////////////////////////////////////////////////////////////
@@ -3843,10 +3404,8 @@ CamRestoreAllEntry( struct net_device *dev)
// The method checking Tx/Rx stuck of this function is supported by FW,
// which reports Tx and Rx counter to register 0x128 and 0x130.
//////////////////////////////////////////////////////////////
-void
-rtl819x_ifsilentreset(struct net_device *dev)
+void rtl819x_ifsilentreset(struct net_device *dev)
{
- //OCTET_STRING asocpdu;
struct r8192_priv *priv = ieee80211_priv(dev);
u8 reset_times = 0;
int reset_status = 0;
@@ -3856,26 +3415,21 @@ rtl819x_ifsilentreset(struct net_device *dev)
// 2007.07.20. If we need to check CCK stop, please uncomment this line.
//bStuck = Adapter->HalFunc.CheckHWStopHandler(Adapter);
- if(priv->ResetProgress==RESET_TYPE_NORESET)
- {
+ if (priv->ResetProgress == RESET_TYPE_NORESET) {
RESET_START:
- RT_TRACE(COMP_RESET,"=========>Reset progress!! \n");
+ RT_TRACE(COMP_RESET, "=========>Reset progress!! \n");
// Set the variable for reset.
priv->ResetProgress = RESET_TYPE_SILENT;
-// rtl8192_close(dev);
down(&priv->wx_sem);
- if(priv->up == 0)
- {
- RT_TRACE(COMP_ERR,"%s():the driver is not up! return\n",__FUNCTION__);
+ if (priv->up == 0) {
+ RT_TRACE(COMP_ERR, "%s():the driver is not up! return\n", __func__);
up(&priv->wx_sem);
- return ;
+ return;
}
priv->up = 0;
- RT_TRACE(COMP_RESET,"%s():======>start to down the driver\n",__FUNCTION__);
-// if(!netif_queue_stopped(dev))
-// netif_stop_queue(dev);
+ RT_TRACE(COMP_RESET, "%s():======>start to down the driver\n", __func__);
rtl8192_rtx_disable(dev);
rtl8192_cancel_deferred_work(priv);
@@ -3883,55 +3437,44 @@ RESET_START:
del_timer_sync(&priv->watch_dog_timer);
ieee->sync_scan_hurryup = 1;
- if(ieee->state == IEEE80211_LINKED)
- {
+ if (ieee->state == IEEE80211_LINKED) {
down(&ieee->wx_sem);
- printk("ieee->state is IEEE80211_LINKED\n");
+ netdev_dbg(dev, "ieee->state is IEEE80211_LINKED\n");
ieee80211_stop_send_beacons(priv->ieee80211);
del_timer_sync(&ieee->associate_timer);
cancel_delayed_work(&ieee->associate_retry_wq);
ieee80211_stop_scan(ieee);
netif_carrier_off(dev);
up(&ieee->wx_sem);
+ } else {
+ netdev_dbg(dev, "ieee->state is NOT LINKED\n");
+ ieee80211_softmac_stop_protocol(priv->ieee80211);
}
- else{
- printk("ieee->state is NOT LINKED\n");
- ieee80211_softmac_stop_protocol(priv->ieee80211); }
up(&priv->wx_sem);
- RT_TRACE(COMP_RESET,"%s():<==========down process is finished\n",__FUNCTION__);
- //rtl8192_irq_disable(dev);
- RT_TRACE(COMP_RESET,"%s():===========>start up the driver\n",__FUNCTION__);
+ RT_TRACE(COMP_RESET, "%s():<==========down process is finished\n", __func__);
+ RT_TRACE(COMP_RESET, "%s():===========>start up the driver\n", __func__);
reset_status = _rtl8192_up(dev);
- RT_TRACE(COMP_RESET,"%s():<===========up process is finished\n",__FUNCTION__);
- if(reset_status == -EAGAIN)
- {
- if(reset_times < 3)
- {
+ RT_TRACE(COMP_RESET, "%s():<===========up process is finished\n", __func__);
+ if (reset_status == -EAGAIN) {
+ if (reset_times < 3) {
reset_times++;
goto RESET_START;
- }
- else
- {
- RT_TRACE(COMP_ERR," ERR!!! %s(): Reset Failed!!\n", __FUNCTION__);
+ } else {
+ RT_TRACE(COMP_ERR, " ERR!!! %s(): Reset Failed!!\n", __func__);
}
}
ieee->is_silent_reset = 1;
EnableHWSecurityConfig8192(dev);
- if(ieee->state == IEEE80211_LINKED && ieee->iw_mode == IW_MODE_INFRA)
- {
+ if (ieee->state == IEEE80211_LINKED && ieee->iw_mode == IW_MODE_INFRA) {
ieee->set_chan(ieee->dev, ieee->current_network.channel);
queue_work(ieee->wq, &ieee->associate_complete_wq);
- }
- else if(ieee->state == IEEE80211_LINKED && ieee->iw_mode == IW_MODE_ADHOC)
- {
+ } else if (ieee->state == IEEE80211_LINKED && ieee->iw_mode == IW_MODE_ADHOC) {
ieee->set_chan(ieee->dev, ieee->current_network.channel);
ieee->link_change(ieee->dev);
- // notify_wx_assoc_event(ieee);
-
ieee80211_start_send_beacons(ieee);
if (ieee->data_hard_resume)
@@ -3944,7 +3487,7 @@ RESET_START:
priv->ResetProgress = RESET_TYPE_NORESET;
priv->reset_count++;
- priv->bForcedSilentReset =false;
+ priv->bForcedSilentReset = false;
priv->bResetInProgress = false;
// For test --> force write UFWP.
@@ -3953,50 +3496,36 @@ RESET_START:
}
}
-void CAM_read_entry(
- struct net_device *dev,
- u32 iIndex
-)
+void CAM_read_entry(struct net_device *dev, u32 iIndex)
{
- u32 target_command=0;
- u32 target_content=0;
- u8 entry_i=0;
- u32 ulStatus;
- s32 i=100;
-// printk("=======>start read CAM\n");
- for(entry_i=0;entry_i<CAM_CONTENT_COUNT;entry_i++)
- {
- // polling bit, and No Write enable, and address
- target_command= entry_i+CAM_CONTENT_COUNT*iIndex;
- target_command= target_command | BIT31;
+ u32 target_command = 0;
+ u32 target_content = 0;
+ u8 entry_i = 0;
+ u32 ulStatus;
+ s32 i = 100;
+ for (entry_i = 0; entry_i < CAM_CONTENT_COUNT; entry_i++) {
+ // polling bit, and No Write enable, and address
+ target_command = entry_i+CAM_CONTENT_COUNT*iIndex;
+ target_command = target_command | BIT31;
- //Check polling bit is clear
-// mdelay(1);
- while((i--)>=0)
- {
- ulStatus = read_nic_dword(dev, RWCAM);
- if(ulStatus & BIT31){
+ //Check polling bit is clear
+ while ((i--) >= 0) {
+ read_nic_dword(dev, RWCAM, &ulStatus);
+ if (ulStatus & BIT31)
continue;
- }
- else{
+ else
break;
- }
}
write_nic_dword(dev, RWCAM, target_command);
- RT_TRACE(COMP_SEC,"CAM_read_entry(): WRITE A0: %x \n",target_command);
- // printk("CAM_read_entry(): WRITE A0: %lx \n",target_command);
- target_content = read_nic_dword(dev, RCAMO);
- RT_TRACE(COMP_SEC, "CAM_read_entry(): WRITE A8: %x \n",target_content);
- // printk("CAM_read_entry(): WRITE A8: %lx \n",target_content);
+ RT_TRACE(COMP_SEC, "CAM_read_entry(): WRITE A0: %x \n", target_command);
+ read_nic_dword(dev, RCAMO, &target_content);
+ RT_TRACE(COMP_SEC, "CAM_read_entry(): WRITE A8: %x \n", target_content);
}
printk("\n");
}
-void rtl819x_update_rxcounts(
- struct r8192_priv *priv,
- u32* TotalRxBcnNum,
- u32* TotalRxDataNum
-)
+void rtl819x_update_rxcounts(struct r8192_priv *priv, u32 *TotalRxBcnNum,
+ u32 *TotalRxDataNum)
{
u16 SlotIndex;
u8 i;
@@ -4007,80 +3536,68 @@ void rtl819x_update_rxcounts(
SlotIndex = (priv->ieee80211->LinkDetectInfo.SlotIndex++)%(priv->ieee80211->LinkDetectInfo.SlotNum);
priv->ieee80211->LinkDetectInfo.RxBcnNum[SlotIndex] = priv->ieee80211->LinkDetectInfo.NumRecvBcnInPeriod;
priv->ieee80211->LinkDetectInfo.RxDataNum[SlotIndex] = priv->ieee80211->LinkDetectInfo.NumRecvDataInPeriod;
- for( i=0; i<priv->ieee80211->LinkDetectInfo.SlotNum; i++ ){
+ for (i = 0; i < priv->ieee80211->LinkDetectInfo.SlotNum; i++) {
*TotalRxBcnNum += priv->ieee80211->LinkDetectInfo.RxBcnNum[i];
*TotalRxDataNum += priv->ieee80211->LinkDetectInfo.RxDataNum[i];
}
}
-extern void rtl819x_watchdog_wqcallback(struct work_struct *work)
+extern void rtl819x_watchdog_wqcallback(struct work_struct *work)
{
- struct delayed_work *dwork = container_of(work,struct delayed_work,work);
- struct r8192_priv *priv = container_of(dwork,struct r8192_priv,watch_dog_wq);
- struct net_device *dev = priv->ieee80211->dev;
- struct ieee80211_device* ieee = priv->ieee80211;
+ struct delayed_work *dwork = container_of(work, struct delayed_work, work);
+ struct r8192_priv *priv = container_of(dwork, struct r8192_priv, watch_dog_wq);
+ struct net_device *dev = priv->ieee80211->dev;
+ struct ieee80211_device *ieee = priv->ieee80211;
RESET_TYPE ResetType = RESET_TYPE_NORESET;
static u8 check_reset_cnt;
bool bBusyTraffic = false;
+ u32 TotalRxBcnNum = 0;
+ u32 TotalRxDataNum = 0;
- if(!priv->up)
+ if (!priv->up)
return;
hal_dm_watchdog(dev);
- {//to get busy traffic condition
- if(ieee->state == IEEE80211_LINKED)
- {
- if( ieee->LinkDetectInfo.NumRxOkInPeriod> 666 ||
- ieee->LinkDetectInfo.NumTxOkInPeriod> 666 ) {
- bBusyTraffic = true;
- }
- ieee->LinkDetectInfo.NumRxOkInPeriod = 0;
- ieee->LinkDetectInfo.NumTxOkInPeriod = 0;
- ieee->LinkDetectInfo.bBusyTraffic = bBusyTraffic;
+ //to get busy traffic condition
+ if (ieee->state == IEEE80211_LINKED) {
+ if (ieee->LinkDetectInfo.NumRxOkInPeriod > 666 ||
+ ieee->LinkDetectInfo.NumTxOkInPeriod > 666 ) {
+ bBusyTraffic = true;
}
+ ieee->LinkDetectInfo.NumRxOkInPeriod = 0;
+ ieee->LinkDetectInfo.NumTxOkInPeriod = 0;
+ ieee->LinkDetectInfo.bBusyTraffic = bBusyTraffic;
}
//added by amy for AP roaming
- {
- if(priv->ieee80211->state == IEEE80211_LINKED && priv->ieee80211->iw_mode == IW_MODE_INFRA)
- {
- u32 TotalRxBcnNum = 0;
- u32 TotalRxDataNum = 0;
+ if (priv->ieee80211->state == IEEE80211_LINKED && priv->ieee80211->iw_mode == IW_MODE_INFRA) {
- rtl819x_update_rxcounts(priv, &TotalRxBcnNum, &TotalRxDataNum);
- if((TotalRxBcnNum+TotalRxDataNum) == 0)
- {
- #ifdef TODO
- if(rfState == eRfOff)
- RT_TRACE(COMP_ERR,"========>%s()\n",__FUNCTION__);
- #endif
- printk("===>%s(): AP is power off,connect another one\n",__FUNCTION__);
- // Dot11d_Reset(dev);
- priv->ieee80211->state = IEEE80211_ASSOCIATING;
- notify_wx_assoc_event(priv->ieee80211);
- RemovePeerTS(priv->ieee80211,priv->ieee80211->current_network.bssid);
- priv->ieee80211->link_change(dev);
- queue_work(priv->ieee80211->wq, &priv->ieee80211->associate_procedure_wq);
+ rtl819x_update_rxcounts(priv, &TotalRxBcnNum, &TotalRxDataNum);
+ if ((TotalRxBcnNum+TotalRxDataNum) == 0) {
+#ifdef TODO
+ if (rfState == eRfOff)
+ RT_TRACE(COMP_ERR, "========>%s()\n", __func__);
+#endif
+ netdev_dbg(dev, "===>%s(): AP is power off, connect another one\n", __func__);
+ priv->ieee80211->state = IEEE80211_ASSOCIATING;
+ notify_wx_assoc_event(priv->ieee80211);
+ RemovePeerTS(priv->ieee80211, priv->ieee80211->current_network.bssid);
+ priv->ieee80211->link_change(dev);
+ queue_work(priv->ieee80211->wq, &priv->ieee80211->associate_procedure_wq);
- }
}
- priv->ieee80211->LinkDetectInfo.NumRecvBcnInPeriod=0;
- priv->ieee80211->LinkDetectInfo.NumRecvDataInPeriod=0;
}
-// CAM_read_entry(dev,4);
+ priv->ieee80211->LinkDetectInfo.NumRecvBcnInPeriod = 0;
+ priv->ieee80211->LinkDetectInfo.NumRecvDataInPeriod = 0;
//check if reset the driver
- if(check_reset_cnt++ >= 3)
- {
+ if (check_reset_cnt++ >= 3) {
ResetType = rtl819x_ifcheck_resetornot(dev);
check_reset_cnt = 3;
- //DbgPrint("Start to check silent reset\n");
}
- // RT_TRACE(COMP_RESET,"%s():priv->force_reset is %d,priv->ResetProgress is %d, priv->bForcedSilentReset is %d,priv->bDisableNormalResetCheck is %d,ResetType is %d\n",__FUNCTION__,priv->force_reset,priv->ResetProgress,priv->bForcedSilentReset,priv->bDisableNormalResetCheck,ResetType);
- if( (priv->force_reset) || (priv->ResetProgress==RESET_TYPE_NORESET &&
- (priv->bForcedSilentReset ||
- (!priv->bDisableNormalResetCheck && ResetType==RESET_TYPE_SILENT)))) // This is control by OID set in Pomelo
- {
- RT_TRACE(COMP_RESET,"%s():priv->force_reset is %d,priv->ResetProgress is %d, priv->bForcedSilentReset is %d,priv->bDisableNormalResetCheck is %d,ResetType is %d\n",__FUNCTION__,priv->force_reset,priv->ResetProgress,priv->bForcedSilentReset,priv->bDisableNormalResetCheck,ResetType);
+ if ((priv->force_reset) || (priv->ResetProgress == RESET_TYPE_NORESET &&
+ (priv->bForcedSilentReset ||
+ (!priv->bDisableNormalResetCheck && ResetType == RESET_TYPE_SILENT)))) { // This is control by OID set in Pomelo
+ RT_TRACE(COMP_RESET, "%s():priv->force_reset is %d,priv->ResetProgress is %d, priv->bForcedSilentReset is %d,priv->bDisableNormalResetCheck is %d,ResetType is %d\n", __func__, priv->force_reset, priv->ResetProgress, priv->bForcedSilentReset, priv->bDisableNormalResetCheck, ResetType);
rtl819x_ifsilentreset(dev);
}
priv->force_reset = false;
@@ -4093,33 +3610,29 @@ extern void rtl819x_watchdog_wqcallback(struct work_struct *work)
void watch_dog_timer_callback(unsigned long data)
{
struct r8192_priv *priv = ieee80211_priv((struct net_device *) data);
- //printk("===============>watch_dog timer\n");
- queue_delayed_work(priv->priv_wq,&priv->watch_dog_wq, 0);
+ queue_delayed_work(priv->priv_wq, &priv->watch_dog_wq, 0);
mod_timer(&priv->watch_dog_timer, jiffies + MSECS(IEEE80211_WATCH_DOG_TIME));
}
int _rtl8192_up(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- //int i;
int init_status = 0;
- priv->up=1;
- priv->ieee80211->ieee_up=1;
+ priv->up = 1;
+ priv->ieee80211->ieee_up = 1;
RT_TRACE(COMP_INIT, "Bringing up iface");
init_status = rtl8192_adapter_start(dev);
- if(!init_status)
- {
- RT_TRACE(COMP_ERR,"ERR!!! %s(): initialization failed!\n", __FUNCTION__);
- priv->up=priv->ieee80211->ieee_up = 0;
+ if (!init_status) {
+ RT_TRACE(COMP_ERR, "ERR!!! %s(): initialization failed!\n", __func__);
+ priv->up = priv->ieee80211->ieee_up = 0;
return -EAGAIN;
}
RT_TRACE(COMP_INIT, "start adapter finished\n");
rtl8192_rx_enable(dev);
-// rtl8192_tx_enable(dev);
- if(priv->ieee80211->state != IEEE80211_LINKED)
- ieee80211_softmac_start_protocol(priv->ieee80211);
+ if (priv->ieee80211->state != IEEE80211_LINKED)
+ ieee80211_softmac_start_protocol(priv->ieee80211);
ieee80211_reset_queue(priv->ieee80211);
watch_dog_timer_callback((unsigned long) dev);
- if(!netif_queue_stopped(dev))
+ if (!netif_queue_stopped(dev))
netif_start_queue(dev);
else
netif_wake_queue(dev);
@@ -4172,40 +3685,35 @@ int rtl8192_down(struct net_device *dev)
if (priv->up == 0) return -1;
- priv->up=0;
+ priv->up = 0;
priv->ieee80211->ieee_up = 0;
- RT_TRACE(COMP_DOWN, "==========>%s()\n", __FUNCTION__);
-/* FIXME */
+ RT_TRACE(COMP_DOWN, "==========>%s()\n", __func__);
+ /* FIXME */
if (!netif_queue_stopped(dev))
netif_stop_queue(dev);
rtl8192_rtx_disable(dev);
- //rtl8192_irq_disable(dev);
- /* Tx related queue release */
- for(i = 0; i < MAX_QUEUE_SIZE; i++) {
- skb_queue_purge(&priv->ieee80211->skb_waitQ [i]);
- }
- for(i = 0; i < MAX_QUEUE_SIZE; i++) {
- skb_queue_purge(&priv->ieee80211->skb_aggQ [i]);
- }
+ /* Tx related queue release */
+ for (i = 0; i < MAX_QUEUE_SIZE; i++)
+ skb_queue_purge(&priv->ieee80211->skb_waitQ[i]);
+ for (i = 0; i < MAX_QUEUE_SIZE; i++)
+ skb_queue_purge(&priv->ieee80211->skb_aggQ[i]);
- for(i = 0; i < MAX_QUEUE_SIZE; i++) {
- skb_queue_purge(&priv->ieee80211->skb_drv_aggQ [i]);
- }
+ for (i = 0; i < MAX_QUEUE_SIZE; i++)
+ skb_queue_purge(&priv->ieee80211->skb_drv_aggQ[i]);
//as cancel_delayed_work will del work->timer, so if work is not defined as struct delayed_work, it will corrupt
-// flush_scheduled_work();
rtl8192_cancel_deferred_work(priv);
deinit_hal_dm(dev);
del_timer_sync(&priv->watch_dog_timer);
ieee80211_softmac_stop_protocol(priv->ieee80211);
- memset(&priv->ieee80211->current_network, 0 , offsetof(struct ieee80211_network, list));
- RT_TRACE(COMP_DOWN, "<==========%s()\n", __FUNCTION__);
+ memset(&priv->ieee80211->current_network, 0, offsetof(struct ieee80211_network, list));
+ RT_TRACE(COMP_DOWN, "<==========%s()\n", __func__);
- return 0;
+ return 0;
}
@@ -4213,27 +3721,19 @@ void rtl8192_commit(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
int reset_status = 0;
- //u8 reset_times = 0;
- if (priv->up == 0) return ;
+ if (priv->up == 0) return;
priv->up = 0;
rtl8192_cancel_deferred_work(priv);
del_timer_sync(&priv->watch_dog_timer);
- //cancel_delayed_work(&priv->SwChnlWorkItem);
ieee80211_softmac_stop_protocol(priv->ieee80211);
- //rtl8192_irq_disable(dev);
rtl8192_rtx_disable(dev);
reset_status = _rtl8192_up(dev);
}
-/*
-void rtl8192_restart(struct net_device *dev)
-{
- struct r8192_priv *priv = ieee80211_priv(dev);
-*/
void rtl8192_restart(struct work_struct *work)
{
struct r8192_priv *priv = container_of(work, struct r8192_priv, reset_wq);
@@ -4251,19 +3751,13 @@ static void r8192_set_multicast(struct net_device *dev)
struct r8192_priv *priv = ieee80211_priv(dev);
short promisc;
- //down(&priv->wx_sem);
-
/* FIXME FIXME */
- promisc = (dev->flags & IFF_PROMISC) ? 1:0;
+ promisc = (dev->flags & IFF_PROMISC) ? 1 : 0;
if (promisc != priv->promisc)
- // rtl8192_commit(dev);
-
- priv->promisc = promisc;
- //schedule_work(&priv->reset_wq);
- //up(&priv->wx_sem);
+ priv->promisc = promisc;
}
@@ -4287,99 +3781,90 @@ int rtl8192_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
struct iwreq *wrq = (struct iwreq *)rq;
- int ret=-1;
+ int ret = -1;
struct ieee80211_device *ieee = priv->ieee80211;
u32 key[4];
- u8 broadcast_addr[6] = {0xff,0xff,0xff,0xff,0xff,0xff};
+ u8 broadcast_addr[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
struct iw_point *p = &wrq->u.data;
- struct ieee_param *ipw = NULL;//(struct ieee_param *)wrq->u.data.pointer;
+ struct ieee_param *ipw = NULL;
down(&priv->wx_sem);
- if (p->length < sizeof(struct ieee_param) || !p->pointer){
- ret = -EINVAL;
- goto out;
+ if (p->length < sizeof(struct ieee_param) || !p->pointer) {
+ ret = -EINVAL;
+ goto out;
}
- ipw = kmalloc(p->length, GFP_KERNEL);
- if (ipw == NULL){
- ret = -ENOMEM;
- goto out;
- }
- if (copy_from_user(ipw, p->pointer, p->length)) {
+ ipw = kmalloc(p->length, GFP_KERNEL);
+ if (ipw == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (copy_from_user(ipw, p->pointer, p->length)) {
kfree(ipw);
- ret = -EFAULT;
- goto out;
+ ret = -EFAULT;
+ goto out;
}
switch (cmd) {
case RTL_IOCTL_WPA_SUPPLICANT:
- //parse here for HW security
- if (ipw->cmd == IEEE_CMD_SET_ENCRYPTION)
- {
- if (ipw->u.crypt.set_tx)
- {
- if (strcmp(ipw->u.crypt.alg, "CCMP") == 0)
+ //parse here for HW security
+ if (ipw->cmd == IEEE_CMD_SET_ENCRYPTION) {
+ if (ipw->u.crypt.set_tx) {
+ if (strcmp(ipw->u.crypt.alg, "CCMP") == 0) {
ieee->pairwise_key_type = KEY_TYPE_CCMP;
- else if (strcmp(ipw->u.crypt.alg, "TKIP") == 0)
+ } else if (strcmp(ipw->u.crypt.alg, "TKIP") == 0) {
ieee->pairwise_key_type = KEY_TYPE_TKIP;
- else if (strcmp(ipw->u.crypt.alg, "WEP") == 0)
- {
+ } else if (strcmp(ipw->u.crypt.alg, "WEP") == 0) {
if (ipw->u.crypt.key_len == 13)
ieee->pairwise_key_type = KEY_TYPE_WEP104;
else if (ipw->u.crypt.key_len == 5)
ieee->pairwise_key_type = KEY_TYPE_WEP40;
- }
- else
+ } else {
ieee->pairwise_key_type = KEY_TYPE_NA;
+ }
- if (ieee->pairwise_key_type)
- {
- memcpy((u8*)key, ipw->u.crypt.key, 16);
+ if (ieee->pairwise_key_type) {
+ memcpy((u8 *)key, ipw->u.crypt.key, 16);
EnableHWSecurityConfig8192(dev);
- //we fill both index entry and 4th entry for pairwise key as in IPW interface, adhoc will only get here, so we need index entry for its default key serching!
- //added by WB.
- setKey(dev, 4, ipw->u.crypt.idx, ieee->pairwise_key_type, (u8*)ieee->ap_mac_addr, 0, key);
+ //we fill both index entry and 4th entry for pairwise key as in IPW interface, adhoc will only get here, so we need index entry for its default key serching!
+ //added by WB.
+ setKey(dev, 4, ipw->u.crypt.idx, ieee->pairwise_key_type, (u8 *)ieee->ap_mac_addr, 0, key);
if (ieee->auth_mode != 2)
- setKey(dev, ipw->u.crypt.idx, ipw->u.crypt.idx, ieee->pairwise_key_type, (u8*)ieee->ap_mac_addr, 0, key);
+ setKey(dev, ipw->u.crypt.idx, ipw->u.crypt.idx, ieee->pairwise_key_type, (u8 *)ieee->ap_mac_addr, 0, key);
}
- }
- else //if (ipw->u.crypt.idx) //group key use idx > 0
- {
- memcpy((u8*)key, ipw->u.crypt.key, 16);
- if (strcmp(ipw->u.crypt.alg, "CCMP") == 0)
- ieee->group_key_type= KEY_TYPE_CCMP;
- else if (strcmp(ipw->u.crypt.alg, "TKIP") == 0)
+ } else {
+ memcpy((u8 *)key, ipw->u.crypt.key, 16);
+ if (strcmp(ipw->u.crypt.alg, "CCMP") == 0) {
+ ieee->group_key_type = KEY_TYPE_CCMP;
+ } else if (strcmp(ipw->u.crypt.alg, "TKIP") == 0) {
ieee->group_key_type = KEY_TYPE_TKIP;
- else if (strcmp(ipw->u.crypt.alg, "WEP") == 0)
- {
+ } else if (strcmp(ipw->u.crypt.alg, "WEP") == 0) {
if (ipw->u.crypt.key_len == 13)
ieee->group_key_type = KEY_TYPE_WEP104;
else if (ipw->u.crypt.key_len == 5)
ieee->group_key_type = KEY_TYPE_WEP40;
- }
- else
+ } else {
ieee->group_key_type = KEY_TYPE_NA;
+ }
- if (ieee->group_key_type)
- {
- setKey( dev,
- ipw->u.crypt.idx,
- ipw->u.crypt.idx, //KeyIndex
- ieee->group_key_type, //KeyType
- broadcast_addr, //MacAddr
- 0, //DefaultKey
- key); //KeyContent
+ if (ieee->group_key_type) {
+ setKey(dev, ipw->u.crypt.idx,
+ ipw->u.crypt.idx, //KeyIndex
+ ieee->group_key_type, //KeyType
+ broadcast_addr, //MacAddr
+ 0, //DefaultKey
+ key); //KeyContent
}
}
}
#ifdef JOHN_HWSEC_DEBUG
//john's test 0711
printk("@@ wrq->u pointer = ");
- for(i=0;i<wrq->u.data.length;i++){
- if(i%10==0) printk("\n");
- printk( "%8x|", ((u32*)wrq->u.data.pointer)[i] );
+ for (i = 0; i < wrq->u.data.length; i++) {
+ if (i%10 == 0) printk("\n");
+ printk("%8x|", ((u32 *)wrq->u.data.pointer)[i]);
}
printk("\n");
#endif /*JOHN_HWSEC_DEBUG*/
@@ -4401,8 +3886,8 @@ u8 HwRateToMRate90(bool bIsHT, u8 rate)
{
u8 ret_rate = 0xff;
- if(!bIsHT) {
- switch(rate) {
+ if (!bIsHT) {
+ switch (rate) {
case DESC90_RATE1M: ret_rate = MGN_1M; break;
case DESC90_RATE2M: ret_rate = MGN_2M; break;
case DESC90_RATE5_5M: ret_rate = MGN_5_5M; break;
@@ -4423,7 +3908,7 @@ u8 HwRateToMRate90(bool bIsHT, u8 rate)
}
} else {
- switch(rate) {
+ switch (rate) {
case DESC90_RATEMCS0: ret_rate = MGN_MCS0; break;
case DESC90_RATEMCS1: ret_rate = MGN_MCS1; break;
case DESC90_RATEMCS2: ret_rate = MGN_MCS2; break;
@@ -4444,7 +3929,7 @@ u8 HwRateToMRate90(bool bIsHT, u8 rate)
default:
ret_rate = 0xff;
- RT_TRACE(COMP_RECV, "HwRateToMRate90(): Non supported Rate [%x], bIsHT = %d!!!\n",rate, bIsHT);
+ RT_TRACE(COMP_RECV, "HwRateToMRate90(): Non supported Rate [%x], bIsHT = %d!!!\n", rate, bIsHT);
break;
}
}
@@ -4467,11 +3952,11 @@ u8 HwRateToMRate90(bool bIsHT, u8 rate)
* Return:
* None
*/
-void UpdateRxPktTimeStamp8190 (struct net_device *dev, struct ieee80211_rx_stats *stats)
+void UpdateRxPktTimeStamp8190(struct net_device *dev, struct ieee80211_rx_stats *stats)
{
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
- if(stats->bIsAMPDU && !stats->bFirstMPDU) {
+ if (stats->bIsAMPDU && !stats->bFirstMPDU) {
stats->mac_time[0] = priv->LastRxDescTSFLow;
stats->mac_time[1] = priv->LastRxDescTSFHigh;
} else {
@@ -4482,7 +3967,7 @@ void UpdateRxPktTimeStamp8190 (struct net_device *dev, struct ieee80211_rx_stats
//by amy 080606
-long rtl819x_translate_todbm(u8 signal_strength_index )// 0-100 index.
+long rtl819x_translate_todbm(u8 signal_strength_index)// 0-100 index.
{
long signal_power; // in dBm.
@@ -4498,12 +3983,11 @@ long rtl819x_translate_todbm(u8 signal_strength_index )// 0-100 index.
be a local static. Otherwise, it may increase when we return from S3/S4. The
value will be kept in memory or disk. Declare the value in the adaptor
and it will be reinitialized when returned from S3/S4. */
-void rtl8192_process_phyinfo(struct r8192_priv * priv,u8* buffer, struct ieee80211_rx_stats * pprevious_stats, struct ieee80211_rx_stats * pcurrent_stats)
+void rtl8192_process_phyinfo(struct r8192_priv *priv, u8 *buffer, struct ieee80211_rx_stats *pprevious_stats, struct ieee80211_rx_stats *pcurrent_stats)
{
bool bcheck = false;
u8 rfpath;
u32 nspatial_stream, tmp_val;
- //u8 i;
static u32 slide_rssi_index, slide_rssi_statistics;
static u32 slide_evm_index, slide_evm_statistics;
static u32 last_rssi, last_evm;
@@ -4512,8 +3996,8 @@ void rtl8192_process_phyinfo(struct r8192_priv * priv,u8* buffer, struct ieee802
static u32 last_beacon_adc_pwdb;
struct ieee80211_hdr_3addr *hdr;
- u16 sc ;
- unsigned int frag,seq;
+ u16 sc;
+ unsigned int frag, seq;
hdr = (struct ieee80211_hdr_3addr *)buffer;
sc = le16_to_cpu(hdr->seq_ctl);
frag = WLAN_GET_SEQ_FRAG(sc);
@@ -4523,14 +4007,12 @@ void rtl8192_process_phyinfo(struct r8192_priv * priv,u8* buffer, struct ieee802
//
// Check whether we should take the previous packet into accounting
//
- if(!pprevious_stats->bIsAMPDU)
- {
+ if (!pprevious_stats->bIsAMPDU) {
// if previous packet is not aggregated packet
bcheck = true;
}
- if(slide_rssi_statistics++ >= PHY_RSSI_SLID_WIN_MAX)
- {
+ if (slide_rssi_statistics++ >= PHY_RSSI_SLID_WIN_MAX) {
slide_rssi_statistics = PHY_RSSI_SLID_WIN_MAX;
last_rssi = priv->stats.slide_signal_strength[slide_rssi_index];
priv->stats.slide_rssi_total -= last_rssi;
@@ -4538,7 +4020,7 @@ void rtl8192_process_phyinfo(struct r8192_priv * priv,u8* buffer, struct ieee802
priv->stats.slide_rssi_total += pprevious_stats->SignalStrength;
priv->stats.slide_signal_strength[slide_rssi_index++] = pprevious_stats->SignalStrength;
- if(slide_rssi_index >= PHY_RSSI_SLID_WIN_MAX)
+ if (slide_rssi_index >= PHY_RSSI_SLID_WIN_MAX)
slide_rssi_index = 0;
// <1> Showed on UI for user, in dbm
@@ -4548,13 +4030,12 @@ void rtl8192_process_phyinfo(struct r8192_priv * priv,u8* buffer, struct ieee802
//
// If the previous packet does not match the criteria, neglect it
//
- if(!pprevious_stats->bPacketMatchBSSID)
- {
- if(!pprevious_stats->bToSelfBA)
+ if (!pprevious_stats->bPacketMatchBSSID) {
+ if (!pprevious_stats->bToSelfBA)
return;
}
- if(!bcheck)
+ if (!bcheck)
return;
@@ -4570,33 +4051,25 @@ void rtl8192_process_phyinfo(struct r8192_priv * priv,u8* buffer, struct ieee802
// <2> Showed on UI for engineering
// hardware does not provide rssi information for each rf path in CCK
- if(!pprevious_stats->bIsCCK && (pprevious_stats->bPacketToSelf || pprevious_stats->bToSelfBA))
- {
- for (rfpath = RF90_PATH_A; rfpath < priv->NumTotalRFPath; rfpath++)
- {
- if (!rtl8192_phy_CheckIsLegalRFPath(priv->ieee80211->dev, rfpath))
- continue;
+ if (!pprevious_stats->bIsCCK && (pprevious_stats->bPacketToSelf || pprevious_stats->bToSelfBA)) {
+ for (rfpath = RF90_PATH_A; rfpath < priv->NumTotalRFPath; rfpath++) {
+ if (!rtl8192_phy_CheckIsLegalRFPath(priv->ieee80211->dev, rfpath))
+ continue;
//Fixed by Jacken 2008-03-20
- if(priv->stats.rx_rssi_percentage[rfpath] == 0)
- {
+ if (priv->stats.rx_rssi_percentage[rfpath] == 0)
priv->stats.rx_rssi_percentage[rfpath] = pprevious_stats->RxMIMOSignalStrength[rfpath];
- //DbgPrint("MIMO RSSI initialize \n");
- }
- if(pprevious_stats->RxMIMOSignalStrength[rfpath] > priv->stats.rx_rssi_percentage[rfpath])
- {
+ if (pprevious_stats->RxMIMOSignalStrength[rfpath] > priv->stats.rx_rssi_percentage[rfpath]) {
priv->stats.rx_rssi_percentage[rfpath] =
- ( (priv->stats.rx_rssi_percentage[rfpath]*(Rx_Smooth_Factor-1)) +
- (pprevious_stats->RxMIMOSignalStrength[rfpath])) /(Rx_Smooth_Factor);
+ ((priv->stats.rx_rssi_percentage[rfpath]*(Rx_Smooth_Factor-1)) +
+ (pprevious_stats->RxMIMOSignalStrength[rfpath])) /(Rx_Smooth_Factor);
priv->stats.rx_rssi_percentage[rfpath] = priv->stats.rx_rssi_percentage[rfpath] + 1;
- }
- else
- {
+ } else {
priv->stats.rx_rssi_percentage[rfpath] =
- ( (priv->stats.rx_rssi_percentage[rfpath]*(Rx_Smooth_Factor-1)) +
- (pprevious_stats->RxMIMOSignalStrength[rfpath])) /(Rx_Smooth_Factor);
+ ((priv->stats.rx_rssi_percentage[rfpath]*(Rx_Smooth_Factor-1)) +
+ (pprevious_stats->RxMIMOSignalStrength[rfpath])) /(Rx_Smooth_Factor);
}
- RT_TRACE(COMP_DBG,"priv->stats.rx_rssi_percentage[rfPath] = %d \n" ,priv->stats.rx_rssi_percentage[rfpath] );
+ RT_TRACE(COMP_DBG, "priv->stats.rx_rssi_percentage[rfPath] = %d \n", priv->stats.rx_rssi_percentage[rfpath]);
}
}
@@ -4605,55 +4078,43 @@ void rtl8192_process_phyinfo(struct r8192_priv * priv,u8* buffer, struct ieee802
// Check PWDB.
//
RT_TRACE(COMP_RXDESC, "Smooth %s PWDB = %d\n",
- pprevious_stats->bIsCCK? "CCK": "OFDM",
- pprevious_stats->RxPWDBAll);
+ pprevious_stats->bIsCCK ? "CCK" : "OFDM",
+ pprevious_stats->RxPWDBAll);
- if(pprevious_stats->bPacketBeacon)
- {
-/* record the beacon pwdb to the sliding window. */
- if(slide_beacon_adc_pwdb_statistics++ >= PHY_Beacon_RSSI_SLID_WIN_MAX)
- {
+ if (pprevious_stats->bPacketBeacon) {
+ /* record the beacon pwdb to the sliding window. */
+ if (slide_beacon_adc_pwdb_statistics++ >= PHY_Beacon_RSSI_SLID_WIN_MAX) {
slide_beacon_adc_pwdb_statistics = PHY_Beacon_RSSI_SLID_WIN_MAX;
last_beacon_adc_pwdb = priv->stats.Slide_Beacon_pwdb[slide_beacon_adc_pwdb_index];
priv->stats.Slide_Beacon_Total -= last_beacon_adc_pwdb;
- //DbgPrint("slide_beacon_adc_pwdb_index = %d, last_beacon_adc_pwdb = %d, Adapter->RxStats.Slide_Beacon_Total = %d\n",
- // slide_beacon_adc_pwdb_index, last_beacon_adc_pwdb, Adapter->RxStats.Slide_Beacon_Total);
}
priv->stats.Slide_Beacon_Total += pprevious_stats->RxPWDBAll;
priv->stats.Slide_Beacon_pwdb[slide_beacon_adc_pwdb_index] = pprevious_stats->RxPWDBAll;
- //DbgPrint("slide_beacon_adc_pwdb_index = %d, pPreviousRfd->Status.RxPWDBAll = %d\n", slide_beacon_adc_pwdb_index, pPreviousRfd->Status.RxPWDBAll);
slide_beacon_adc_pwdb_index++;
- if(slide_beacon_adc_pwdb_index >= PHY_Beacon_RSSI_SLID_WIN_MAX)
+ if (slide_beacon_adc_pwdb_index >= PHY_Beacon_RSSI_SLID_WIN_MAX)
slide_beacon_adc_pwdb_index = 0;
pprevious_stats->RxPWDBAll = priv->stats.Slide_Beacon_Total/slide_beacon_adc_pwdb_statistics;
- if(pprevious_stats->RxPWDBAll >= 3)
+ if (pprevious_stats->RxPWDBAll >= 3)
pprevious_stats->RxPWDBAll -= 3;
}
RT_TRACE(COMP_RXDESC, "Smooth %s PWDB = %d\n",
- pprevious_stats->bIsCCK? "CCK": "OFDM",
- pprevious_stats->RxPWDBAll);
+ pprevious_stats->bIsCCK ? "CCK" : "OFDM",
+ pprevious_stats->RxPWDBAll);
- if(pprevious_stats->bPacketToSelf || pprevious_stats->bPacketBeacon || pprevious_stats->bToSelfBA)
- {
- if(priv->undecorated_smoothed_pwdb < 0) // initialize
- {
+ if (pprevious_stats->bPacketToSelf || pprevious_stats->bPacketBeacon || pprevious_stats->bToSelfBA) {
+ if (priv->undecorated_smoothed_pwdb < 0) // initialize
priv->undecorated_smoothed_pwdb = pprevious_stats->RxPWDBAll;
- //DbgPrint("First pwdb initialize \n");
- }
- if(pprevious_stats->RxPWDBAll > (u32)priv->undecorated_smoothed_pwdb)
- {
+ if (pprevious_stats->RxPWDBAll > (u32)priv->undecorated_smoothed_pwdb) {
priv->undecorated_smoothed_pwdb =
- ( ((priv->undecorated_smoothed_pwdb)*(Rx_Smooth_Factor-1)) +
- (pprevious_stats->RxPWDBAll)) /(Rx_Smooth_Factor);
+ (((priv->undecorated_smoothed_pwdb)*(Rx_Smooth_Factor-1)) +
+ (pprevious_stats->RxPWDBAll)) /(Rx_Smooth_Factor);
priv->undecorated_smoothed_pwdb = priv->undecorated_smoothed_pwdb + 1;
- }
- else
- {
+ } else {
priv->undecorated_smoothed_pwdb =
- ( ((priv->undecorated_smoothed_pwdb)*(Rx_Smooth_Factor-1)) +
- (pprevious_stats->RxPWDBAll)) /(Rx_Smooth_Factor);
+ (((priv->undecorated_smoothed_pwdb)*(Rx_Smooth_Factor-1)) +
+ (pprevious_stats->RxPWDBAll)) /(Rx_Smooth_Factor);
}
}
@@ -4662,13 +4123,9 @@ void rtl8192_process_phyinfo(struct r8192_priv * priv,u8* buffer, struct ieee802
// Check EVM
//
/* record the general EVM to the sliding window. */
- if(pprevious_stats->SignalQuality == 0)
- {
- }
- else
- {
- if(pprevious_stats->bPacketToSelf || pprevious_stats->bPacketBeacon || pprevious_stats->bToSelfBA){
- if(slide_evm_statistics++ >= PHY_RSSI_SLID_WIN_MAX){
+ if (pprevious_stats->SignalQuality) {
+ if (pprevious_stats->bPacketToSelf || pprevious_stats->bPacketBeacon || pprevious_stats->bToSelfBA) {
+ if (slide_evm_statistics++ >= PHY_RSSI_SLID_WIN_MAX) {
slide_evm_statistics = PHY_RSSI_SLID_WIN_MAX;
last_evm = priv->stats.slide_evm[slide_evm_index];
priv->stats.slide_evm_total -= last_evm;
@@ -4677,7 +4134,7 @@ void rtl8192_process_phyinfo(struct r8192_priv * priv,u8* buffer, struct ieee802
priv->stats.slide_evm_total += pprevious_stats->SignalQuality;
priv->stats.slide_evm[slide_evm_index++] = pprevious_stats->SignalQuality;
- if(slide_evm_index >= PHY_RSSI_SLID_WIN_MAX)
+ if (slide_evm_index >= PHY_RSSI_SLID_WIN_MAX)
slide_evm_index = 0;
// <1> Showed on UI for user, in percentage.
@@ -4688,19 +4145,14 @@ void rtl8192_process_phyinfo(struct r8192_priv * priv,u8* buffer, struct ieee802
}
// <2> Showed on UI for engineering
- if(pprevious_stats->bPacketToSelf || pprevious_stats->bPacketBeacon || pprevious_stats->bToSelfBA)
- {
- for(nspatial_stream = 0; nspatial_stream<2 ; nspatial_stream++) // 2 spatial stream
- {
- if(pprevious_stats->RxMIMOSignalQuality[nspatial_stream] != -1)
- {
- if(priv->stats.rx_evm_percentage[nspatial_stream] == 0) // initialize
- {
+ if (pprevious_stats->bPacketToSelf || pprevious_stats->bPacketBeacon || pprevious_stats->bToSelfBA) {
+ for (nspatial_stream = 0; nspatial_stream < 2; nspatial_stream++) { // 2 spatial stream
+ if (pprevious_stats->RxMIMOSignalQuality[nspatial_stream] != -1) {
+ if (priv->stats.rx_evm_percentage[nspatial_stream] == 0) // initialize
priv->stats.rx_evm_percentage[nspatial_stream] = pprevious_stats->RxMIMOSignalQuality[nspatial_stream];
- }
priv->stats.rx_evm_percentage[nspatial_stream] =
- ( (priv->stats.rx_evm_percentage[nspatial_stream]* (Rx_Smooth_Factor-1)) +
- (pprevious_stats->RxMIMOSignalQuality[nspatial_stream]* 1)) / (Rx_Smooth_Factor);
+ ((priv->stats.rx_evm_percentage[nspatial_stream]* (Rx_Smooth_Factor-1)) +
+ (pprevious_stats->RxMIMOSignalQuality[nspatial_stream]* 1)) / (Rx_Smooth_Factor);
}
}
}
@@ -4725,126 +4177,104 @@ void rtl8192_process_phyinfo(struct r8192_priv * priv,u8* buffer, struct ieee802
* 05/26/2008 amy Create Version 0 porting from windows code.
*
*---------------------------------------------------------------------------*/
-static u8 rtl819x_query_rxpwrpercentage(
- char antpower
- )
+static u8 rtl819x_query_rxpwrpercentage(char antpower)
{
if ((antpower <= -100) || (antpower >= 20))
- {
return 0;
- }
else if (antpower >= 0)
- {
return 100;
- }
else
- {
- return (100+antpower);
- }
+ return 100 + antpower;
} /* QueryRxPwrPercentage */
-static u8
-rtl819x_evm_dbtopercentage(
- char value
- )
+static u8 rtl819x_evm_dbtopercentage(char value)
{
- char ret_val;
+ char ret_val;
- ret_val = value;
+ ret_val = value;
- if(ret_val >= 0)
- ret_val = 0;
- if(ret_val <= -33)
- ret_val = -33;
- ret_val = 0 - ret_val;
- ret_val*=3;
- if(ret_val == 99)
+ if (ret_val >= 0)
+ ret_val = 0;
+ if (ret_val <= -33)
+ ret_val = -33;
+ ret_val = 0 - ret_val;
+ ret_val *= 3;
+ if (ret_val == 99)
ret_val = 100;
- return(ret_val);
+ return ret_val;
}
//
// Description:
// We want good-looking for signal strength/quality
// 2007/7/19 01:09, by cosa.
//
-long
-rtl819x_signal_scale_mapping(
- long currsig
- )
+long rtl819x_signal_scale_mapping(long currsig)
{
long retsig;
// Step 1. Scale mapping.
- if(currsig >= 61 && currsig <= 100)
- {
+ if (currsig >= 61 && currsig <= 100)
retsig = 90 + ((currsig - 60) / 4);
- }
- else if(currsig >= 41 && currsig <= 60)
- {
+ else if (currsig >= 41 && currsig <= 60)
retsig = 78 + ((currsig - 40) / 2);
- }
- else if(currsig >= 31 && currsig <= 40)
- {
+ else if (currsig >= 31 && currsig <= 40)
retsig = 66 + (currsig - 30);
- }
- else if(currsig >= 21 && currsig <= 30)
- {
+ else if (currsig >= 21 && currsig <= 30)
retsig = 54 + (currsig - 20);
- }
- else if(currsig >= 5 && currsig <= 20)
- {
+ else if (currsig >= 5 && currsig <= 20)
retsig = 42 + (((currsig - 5) * 2) / 3);
- }
- else if(currsig == 4)
- {
+ else if (currsig == 4)
retsig = 36;
- }
- else if(currsig == 3)
- {
+ else if (currsig == 3)
retsig = 27;
- }
- else if(currsig == 2)
- {
+ else if (currsig == 2)
retsig = 18;
- }
- else if(currsig == 1)
- {
+ else if (currsig == 1)
retsig = 9;
- }
else
- {
retsig = currsig;
- }
return retsig;
}
-static void rtl8192_query_rxphystatus(
- struct r8192_priv * priv,
- struct ieee80211_rx_stats * pstats,
- rx_drvinfo_819x_usb * pdrvinfo,
- struct ieee80211_rx_stats * precord_stats,
- bool bpacket_match_bssid,
- bool bpacket_toself,
- bool bPacketBeacon,
- bool bToSelfBA
- )
-{
- //PRT_RFD_STATUS pRtRfdStatus = &(pRfd->Status);
- phy_sts_ofdm_819xusb_t* pofdm_buf;
- phy_sts_cck_819xusb_t * pcck_buf;
- phy_ofdm_rx_status_rxsc_sgien_exintfflag* prxsc;
+static inline bool rx_hal_is_cck_rate(struct rx_drvinfo_819x_usb *pdrvinfo)
+{
+ if (pdrvinfo->RxHT)
+ return false;
+
+ switch (pdrvinfo->RxRate) {
+ case DESC90_RATE1M:
+ case DESC90_RATE2M:
+ case DESC90_RATE5_5M:
+ case DESC90_RATE11M:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void rtl8192_query_rxphystatus(struct r8192_priv *priv,
+ struct ieee80211_rx_stats *pstats,
+ rx_drvinfo_819x_usb *pdrvinfo,
+ struct ieee80211_rx_stats *precord_stats,
+ bool bpacket_match_bssid,
+ bool bpacket_toself,
+ bool bPacketBeacon,
+ bool bToSelfBA)
+{
+ phy_sts_ofdm_819xusb_t *pofdm_buf;
+ phy_sts_cck_819xusb_t *pcck_buf;
+ phy_ofdm_rx_status_rxsc_sgien_exintfflag *prxsc;
u8 *prxpkt;
u8 i, max_spatial_stream, tmp_rxsnr, tmp_rxevm, rxsc_sgien_exflg;
- char rx_pwr[4], rx_pwr_all=0;
- //long rx_avg_pwr = 0;
+ char rx_pwr[4], rx_pwr_all = 0;
char rx_snrX, rx_evmX;
u8 evm, pwdb_all;
- u32 RSSI, total_rssi=0;//, total_evm=0;
-// long signal_strength_index = 0;
- u8 is_cck_rate=0;
+ u32 RSSI, total_rssi = 0;
+ u8 is_cck_rate = 0;
u8 rf_rx_num = 0;
+ u8 sq;
priv->stats.numqry_phystatus++;
@@ -4855,11 +4285,11 @@ static void rtl8192_query_rxphystatus(
memset(precord_stats, 0, sizeof(struct ieee80211_rx_stats));
pstats->bPacketMatchBSSID = precord_stats->bPacketMatchBSSID = bpacket_match_bssid;
pstats->bPacketToSelf = precord_stats->bPacketToSelf = bpacket_toself;
- pstats->bIsCCK = precord_stats->bIsCCK = is_cck_rate;//RX_HAL_IS_CCK_RATE(pDrvInfo);
+ pstats->bIsCCK = precord_stats->bIsCCK = is_cck_rate;
pstats->bPacketBeacon = precord_stats->bPacketBeacon = bPacketBeacon;
pstats->bToSelfBA = precord_stats->bToSelfBA = bToSelfBA;
- prxpkt = (u8*)pdrvinfo;
+ prxpkt = (u8 *)pdrvinfo;
/* Move pointer to the 16th bytes. Phy status start address. */
prxpkt += sizeof(rx_drvinfo_819x_usb);
@@ -4873,8 +4303,7 @@ static void rtl8192_query_rxphystatus(
precord_stats->RxMIMOSignalQuality[0] = -1;
precord_stats->RxMIMOSignalQuality[1] = -1;
- if(is_cck_rate)
- {
+ if (is_cck_rate) {
//
// (1)Hardware does not provide RSSI for CCK
//
@@ -4882,51 +4311,46 @@ static void rtl8192_query_rxphystatus(
//
// (2)PWDB, Average PWDB cacluated by hardware (for rate adaptive)
//
- u8 report;//, cck_agc_rpt;
+ u8 report;
priv->stats.numqry_phystatusCCK++;
- if(!priv->bCckHighPower)
- {
+ if (!priv->bCckHighPower) {
report = pcck_buf->cck_agc_rpt & 0xc0;
report = report>>6;
- switch(report)
- {
+ switch (report) {
//Fixed by Jacken from Bryant 2008-03-20
//Original value is -38 , -26 , -14 , -2
//Fixed value is -35 , -23 , -11 , 6
- case 0x3:
- rx_pwr_all = -35 - (pcck_buf->cck_agc_rpt & 0x3e);
- break;
- case 0x2:
- rx_pwr_all = -23 - (pcck_buf->cck_agc_rpt & 0x3e);
- break;
- case 0x1:
- rx_pwr_all = -11 - (pcck_buf->cck_agc_rpt & 0x3e);
- break;
- case 0x0:
- rx_pwr_all = 6 - (pcck_buf->cck_agc_rpt & 0x3e);
- break;
+ case 0x3:
+ rx_pwr_all = -35 - (pcck_buf->cck_agc_rpt & 0x3e);
+ break;
+ case 0x2:
+ rx_pwr_all = -23 - (pcck_buf->cck_agc_rpt & 0x3e);
+ break;
+ case 0x1:
+ rx_pwr_all = -11 - (pcck_buf->cck_agc_rpt & 0x3e);
+ break;
+ case 0x0:
+ rx_pwr_all = 6 - (pcck_buf->cck_agc_rpt & 0x3e);
+ break;
}
- }
- else
- {
+ } else {
report = pcck_buf->cck_agc_rpt & 0x60;
report = report>>5;
- switch(report)
- {
- case 0x3:
- rx_pwr_all = -35 - ((pcck_buf->cck_agc_rpt & 0x1f)<<1) ;
- break;
- case 0x2:
- rx_pwr_all = -23 - ((pcck_buf->cck_agc_rpt & 0x1f)<<1);
- break;
- case 0x1:
- rx_pwr_all = -11 - ((pcck_buf->cck_agc_rpt & 0x1f)<<1) ;
- break;
- case 0x0:
- rx_pwr_all = 6 - ((pcck_buf->cck_agc_rpt & 0x1f)<<1) ;
- break;
+ switch (report) {
+ case 0x3:
+ rx_pwr_all = -35 - ((pcck_buf->cck_agc_rpt & 0x1f)<<1);
+ break;
+ case 0x2:
+ rx_pwr_all = -23 - ((pcck_buf->cck_agc_rpt & 0x1f)<<1);
+ break;
+ case 0x1:
+ rx_pwr_all = -11 - ((pcck_buf->cck_agc_rpt & 0x1f)<<1);
+ break;
+ case 0x0:
+ rx_pwr_all = 6 - ((pcck_buf->cck_agc_rpt & 0x1f)<<1);
+ break;
}
}
@@ -4937,44 +4361,36 @@ static void rtl8192_query_rxphystatus(
//
// (3) Get Signal Quality (EVM)
//
- //if(bpacket_match_bssid)
- {
- u8 sq;
- if(pstats->RxPWDBAll > 40)
- {
- sq = 100;
- }else
- {
- sq = pcck_buf->sq_rpt;
+ if (pstats->RxPWDBAll > 40) {
+ sq = 100;
+ } else {
+ sq = pcck_buf->sq_rpt;
- if(pcck_buf->sq_rpt > 64)
- sq = 0;
- else if (pcck_buf->sq_rpt < 20)
- sq = 100;
- else
- sq = ((64-sq) * 100) / 44;
- }
- pstats->SignalQuality = precord_stats->SignalQuality = sq;
- pstats->RxMIMOSignalQuality[0] = precord_stats->RxMIMOSignalQuality[0] = sq;
- pstats->RxMIMOSignalQuality[1] = precord_stats->RxMIMOSignalQuality[1] = -1;
+ if (pcck_buf->sq_rpt > 64)
+ sq = 0;
+ else if (pcck_buf->sq_rpt < 20)
+ sq = 100;
+ else
+ sq = ((64-sq) * 100) / 44;
}
- }
- else
- {
+ pstats->SignalQuality = precord_stats->SignalQuality = sq;
+ pstats->RxMIMOSignalQuality[0] = precord_stats->RxMIMOSignalQuality[0] = sq;
+ pstats->RxMIMOSignalQuality[1] = precord_stats->RxMIMOSignalQuality[1] = -1;
+
+ } else {
priv->stats.numqry_phystatusHT++;
//
// (1)Get RSSI for HT rate
//
- for(i=RF90_PATH_A; i<priv->NumTotalRFPath; i++)
- {
+ for (i = RF90_PATH_A; i < priv->NumTotalRFPath; i++) {
// 2008/01/30 MH we will judge RF RX path now.
if (priv->brfpath_rxenable[i])
rf_rx_num++;
else
continue;
- if (!rtl8192_phy_CheckIsLegalRFPath(priv->ieee80211->dev, i))
+ if (!rtl8192_phy_CheckIsLegalRFPath(priv->ieee80211->dev, i))
continue;
//Fixed by Jacken from Bryant 2008-03-20
@@ -4984,7 +4400,6 @@ static void rtl8192_query_rxphystatus(
//Get Rx snr value in DB
tmp_rxsnr = pofdm_buf->rxsnr_X[i];
rx_snrX = (char)(tmp_rxsnr);
- //rx_snrX >>= 1;
rx_snrX /= 2;
priv->stats.rxSNRdB[i] = (long)rx_snrX;
@@ -4993,11 +4408,8 @@ static void rtl8192_query_rxphystatus(
total_rssi += RSSI;
/* Record Signal Strength for next packet */
- //if(bpacket_match_bssid)
- {
- pstats->RxMIMOSignalStrength[i] =(u8) RSSI;
- precord_stats->RxMIMOSignalStrength[i] =(u8) RSSI;
- }
+ pstats->RxMIMOSignalStrength[i] = (u8) RSSI;
+ precord_stats->RxMIMOSignalStrength[i] = (u8) RSSI;
}
@@ -5006,7 +4418,7 @@ static void rtl8192_query_rxphystatus(
//
//Fixed by Jacken from Bryant 2008-03-20
//Original value is 106
- rx_pwr_all = (((pofdm_buf->pwdb_all ) >> 1 )& 0x7f) -106;
+ rx_pwr_all = (((pofdm_buf->pwdb_all) >> 1)& 0x7f) -106;
pwdb_all = rtl819x_query_rxpwrpercentage(rx_pwr_all);
pstats->RxPWDBAll = precord_stats->RxPWDBAll = pwdb_all;
@@ -5015,14 +4427,13 @@ static void rtl8192_query_rxphystatus(
//
// (3)EVM of HT rate
//
- if(pdrvinfo->RxHT && pdrvinfo->RxRate>=DESC90_RATEMCS8 &&
- pdrvinfo->RxRate<=DESC90_RATEMCS15)
+ if (pdrvinfo->RxHT && pdrvinfo->RxRate >= DESC90_RATEMCS8 &&
+ pdrvinfo->RxRate <= DESC90_RATEMCS15)
max_spatial_stream = 2; //both spatial stream make sense
else
max_spatial_stream = 1; //only spatial stream 1 makes sense
- for(i=0; i<max_spatial_stream; i++)
- {
+ for (i = 0; i < max_spatial_stream; i++) {
tmp_rxevm = pofdm_buf->rxevm_X[i];
rx_evmX = (char)(tmp_rxevm);
@@ -5032,19 +4443,16 @@ static void rtl8192_query_rxphystatus(
rx_evmX /= 2; //dbm
evm = rtl819x_evm_dbtopercentage(rx_evmX);
- //if(bpacket_match_bssid)
- {
- if(i==0) // Fill value in RFD, Get the first spatial stream only
- pstats->SignalQuality = precord_stats->SignalQuality = (u8)(evm & 0xff);
- pstats->RxMIMOSignalQuality[i] = precord_stats->RxMIMOSignalQuality[i] = (u8)(evm & 0xff);
- }
+ if (i == 0) // Fill value in RFD, Get the first spatial stream only
+ pstats->SignalQuality = precord_stats->SignalQuality = (u8)(evm & 0xff);
+ pstats->RxMIMOSignalQuality[i] = precord_stats->RxMIMOSignalQuality[i] = (u8)(evm & 0xff);
}
/* record rx statistics for debug */
rxsc_sgien_exflg = pofdm_buf->rxsc_sgien_exflg;
prxsc = (phy_ofdm_rx_status_rxsc_sgien_exintfflag *)&rxsc_sgien_exflg;
- if(pdrvinfo->BW) //40M channel
+ if (pdrvinfo->BW) //40M channel
priv->stats.received_bwtype[1+prxsc->rxsc]++;
else //20M channel
priv->stats.received_bwtype[0]++;
@@ -5052,25 +4460,17 @@ static void rtl8192_query_rxphystatus(
//UI BSS List signal strength(in percentage), make it good looking, from 0~100.
//It is assigned to the BSS List in GetValueFromBeaconOrProbeRsp().
- if(is_cck_rate)
- {
- pstats->SignalStrength = precord_stats->SignalStrength = (u8)(rtl819x_signal_scale_mapping((long)pwdb_all));//PWDB_ALL;
-
- }
- else
- {
- //pRfd->Status.SignalStrength = pRecordRfd->Status.SignalStrength = (u8)(SignalScaleMapping(total_rssi/=RF90_PATH_MAX));//(u8)(total_rssi/=RF90_PATH_MAX);
+ if (is_cck_rate) {
+ pstats->SignalStrength = precord_stats->SignalStrength = (u8)(rtl819x_signal_scale_mapping((long)pwdb_all));
+ } else {
// We can judge RX path number now.
if (rf_rx_num != 0)
- pstats->SignalStrength = precord_stats->SignalStrength = (u8)(rtl819x_signal_scale_mapping((long)(total_rssi/=rf_rx_num)));
+ pstats->SignalStrength = precord_stats->SignalStrength = (u8)(rtl819x_signal_scale_mapping((long)(total_rssi /= rf_rx_num)));
}
} /* QueryRxPhyStatus8190Pci */
-void
-rtl8192_record_rxdesc_forlateruse(
- struct ieee80211_rx_stats * psrc_stats,
- struct ieee80211_rx_stats * ptarget_stats
-)
+void rtl8192_record_rxdesc_forlateruse(struct ieee80211_rx_stats *psrc_stats,
+ struct ieee80211_rx_stats *ptarget_stats)
{
ptarget_stats->bIsAMPDU = psrc_stats->bIsAMPDU;
ptarget_stats->bFirstMPDU = psrc_stats->bFirstMPDU;
@@ -5079,27 +4479,26 @@ rtl8192_record_rxdesc_forlateruse(
void TranslateRxSignalStuff819xUsb(struct sk_buff *skb,
- struct ieee80211_rx_stats * pstats,
+ struct ieee80211_rx_stats *pstats,
rx_drvinfo_819x_usb *pdrvinfo)
{
// TODO: We must only check packet for current MAC address. Not finish
rtl8192_rx_info *info = (struct rtl8192_rx_info *)skb->cb;
- struct net_device *dev=info->dev;
+ struct net_device *dev = info->dev;
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
bool bpacket_match_bssid, bpacket_toself;
- bool bPacketBeacon=FALSE, bToSelfBA=FALSE;
+ bool bPacketBeacon = FALSE, bToSelfBA = FALSE;
static struct ieee80211_rx_stats previous_stats;
struct ieee80211_hdr_3addr *hdr;//by amy
- u16 fc,type;
+ u16 fc, type;
// Get Signal Quality for only RX data queue (but not command queue)
- u8* tmp_buf;
- //u16 tmp_buf_len = 0;
+ u8 *tmp_buf;
u8 *praddr;
/* Get MAC frame start address. */
- tmp_buf = (u8*)skb->data;// + get_rxpacket_shiftbytes_819xusb(pstats);
+ tmp_buf = (u8 *)skb->data;
hdr = (struct ieee80211_hdr_3addr *)tmp_buf;
fc = le16_to_cpu(hdr->frame_ctl);
@@ -5108,38 +4507,30 @@ void TranslateRxSignalStuff819xUsb(struct sk_buff *skb,
/* Check if the received packet is acceptable. */
bpacket_match_bssid = ((IEEE80211_FTYPE_CTL != type) &&
- (eqMacAddr(priv->ieee80211->current_network.bssid, (fc & IEEE80211_FCTL_TODS)? hdr->addr1 : (fc & IEEE80211_FCTL_FROMDS )? hdr->addr2 : hdr->addr3))
- && (!pstats->bHwError) && (!pstats->bCRC)&& (!pstats->bICV));
+ (eqMacAddr(priv->ieee80211->current_network.bssid, (fc & IEEE80211_FCTL_TODS) ? hdr->addr1 : (fc & IEEE80211_FCTL_FROMDS) ? hdr->addr2 : hdr->addr3))
+ && (!pstats->bHwError) && (!pstats->bCRC) && (!pstats->bICV));
bpacket_toself = bpacket_match_bssid & (eqMacAddr(praddr, priv->ieee80211->dev->dev_addr));
- if(WLAN_FC_GET_FRAMETYPE(fc)== IEEE80211_STYPE_BEACON)
- {
- bPacketBeacon = true;
- //DbgPrint("Beacon 2, MatchBSSID = %d, ToSelf = %d \n", bPacketMatchBSSID, bPacketToSelf);
- }
- if(WLAN_FC_GET_FRAMETYPE(fc) == IEEE80211_STYPE_BLOCKACK)
- {
- if((eqMacAddr(praddr,dev->dev_addr)))
- bToSelfBA = true;
- //DbgPrint("BlockAck, MatchBSSID = %d, ToSelf = %d \n", bPacketMatchBSSID, bPacketToSelf);
- }
+ if (WLAN_FC_GET_FRAMETYPE(fc) == IEEE80211_STYPE_BEACON)
+ bPacketBeacon = true;
+ if (WLAN_FC_GET_FRAMETYPE(fc) == IEEE80211_STYPE_BLOCKACK) {
+ if ((eqMacAddr(praddr, dev->dev_addr)))
+ bToSelfBA = true;
+ }
- if(bpacket_match_bssid)
- {
+ if (bpacket_match_bssid)
priv->stats.numpacket_matchbssid++;
- }
- if(bpacket_toself){
+ if (bpacket_toself)
priv->stats.numpacket_toself++;
- }
//
// Process PHY information for previous packet (RSSI/PWDB/EVM)
//
// Because phy information is contained in the last packet of AMPDU only, so driver
// should process phy information of previous packet
rtl8192_process_phyinfo(priv, tmp_buf, &previous_stats, pstats);
- rtl8192_query_rxphystatus(priv, pstats, pdrvinfo, &previous_stats, bpacket_match_bssid,bpacket_toself,bPacketBeacon,bToSelfBA);
+ rtl8192_query_rxphystatus(priv, pstats, pdrvinfo, &previous_stats, bpacket_match_bssid, bpacket_toself, bPacketBeacon, bToSelfBA);
rtl8192_record_rxdesc_forlateruse(pstats, &previous_stats);
}
@@ -5158,91 +4549,85 @@ void TranslateRxSignalStuff819xUsb(struct sk_buff *skb,
* Return:
* None
*/
-void
-UpdateReceivedRateHistogramStatistics8190(
- struct net_device *dev,
- struct ieee80211_rx_stats *stats
- )
+void UpdateReceivedRateHistogramStatistics8190(struct net_device *dev,
+ struct ieee80211_rx_stats *stats)
{
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
- u32 rcvType=1; //0: Total, 1:OK, 2:CRC, 3:ICV
+ u32 rcvType = 1; //0: Total, 1:OK, 2:CRC, 3:ICV
u32 rateIndex;
u32 preamble_guardinterval; //1: short preamble/GI, 0: long preamble/GI
- if(stats->bCRC)
- rcvType = 2;
- else if(stats->bICV)
- rcvType = 3;
+ if (stats->bCRC)
+ rcvType = 2;
+ else if (stats->bICV)
+ rcvType = 3;
- if(stats->bShortPreamble)
- preamble_guardinterval = 1;// short
+ if (stats->bShortPreamble)
+ preamble_guardinterval = 1;// short
else
- preamble_guardinterval = 0;// long
+ preamble_guardinterval = 0;// long
- switch(stats->rate)
- {
+ switch (stats->rate) {
//
// CCK rate
//
- case MGN_1M: rateIndex = 0; break;
- case MGN_2M: rateIndex = 1; break;
- case MGN_5_5M: rateIndex = 2; break;
- case MGN_11M: rateIndex = 3; break;
+ case MGN_1M: rateIndex = 0; break;
+ case MGN_2M: rateIndex = 1; break;
+ case MGN_5_5M: rateIndex = 2; break;
+ case MGN_11M: rateIndex = 3; break;
//
// Legacy OFDM rate
//
- case MGN_6M: rateIndex = 4; break;
- case MGN_9M: rateIndex = 5; break;
- case MGN_12M: rateIndex = 6; break;
- case MGN_18M: rateIndex = 7; break;
- case MGN_24M: rateIndex = 8; break;
- case MGN_36M: rateIndex = 9; break;
- case MGN_48M: rateIndex = 10; break;
- case MGN_54M: rateIndex = 11; break;
+ case MGN_6M: rateIndex = 4; break;
+ case MGN_9M: rateIndex = 5; break;
+ case MGN_12M: rateIndex = 6; break;
+ case MGN_18M: rateIndex = 7; break;
+ case MGN_24M: rateIndex = 8; break;
+ case MGN_36M: rateIndex = 9; break;
+ case MGN_48M: rateIndex = 10; break;
+ case MGN_54M: rateIndex = 11; break;
//
// 11n High throughput rate
//
- case MGN_MCS0: rateIndex = 12; break;
- case MGN_MCS1: rateIndex = 13; break;
- case MGN_MCS2: rateIndex = 14; break;
- case MGN_MCS3: rateIndex = 15; break;
- case MGN_MCS4: rateIndex = 16; break;
- case MGN_MCS5: rateIndex = 17; break;
- case MGN_MCS6: rateIndex = 18; break;
- case MGN_MCS7: rateIndex = 19; break;
- case MGN_MCS8: rateIndex = 20; break;
- case MGN_MCS9: rateIndex = 21; break;
- case MGN_MCS10: rateIndex = 22; break;
- case MGN_MCS11: rateIndex = 23; break;
- case MGN_MCS12: rateIndex = 24; break;
- case MGN_MCS13: rateIndex = 25; break;
- case MGN_MCS14: rateIndex = 26; break;
- case MGN_MCS15: rateIndex = 27; break;
- default: rateIndex = 28; break;
- }
- priv->stats.received_preamble_GI[preamble_guardinterval][rateIndex]++;
- priv->stats.received_rate_histogram[0][rateIndex]++; //total
- priv->stats.received_rate_histogram[rcvType][rateIndex]++;
+ case MGN_MCS0: rateIndex = 12; break;
+ case MGN_MCS1: rateIndex = 13; break;
+ case MGN_MCS2: rateIndex = 14; break;
+ case MGN_MCS3: rateIndex = 15; break;
+ case MGN_MCS4: rateIndex = 16; break;
+ case MGN_MCS5: rateIndex = 17; break;
+ case MGN_MCS6: rateIndex = 18; break;
+ case MGN_MCS7: rateIndex = 19; break;
+ case MGN_MCS8: rateIndex = 20; break;
+ case MGN_MCS9: rateIndex = 21; break;
+ case MGN_MCS10: rateIndex = 22; break;
+ case MGN_MCS11: rateIndex = 23; break;
+ case MGN_MCS12: rateIndex = 24; break;
+ case MGN_MCS13: rateIndex = 25; break;
+ case MGN_MCS14: rateIndex = 26; break;
+ case MGN_MCS15: rateIndex = 27; break;
+ default: rateIndex = 28; break;
+ }
+ priv->stats.received_preamble_GI[preamble_guardinterval][rateIndex]++;
+ priv->stats.received_rate_histogram[0][rateIndex]++; //total
+ priv->stats.received_rate_histogram[rcvType][rateIndex]++;
}
void query_rxdesc_status(struct sk_buff *skb, struct ieee80211_rx_stats *stats, bool bIsRxAggrSubframe)
{
rtl8192_rx_info *info = (struct rtl8192_rx_info *)skb->cb;
- struct net_device *dev=info->dev;
+ struct net_device *dev = info->dev;
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
- //rx_desc_819x_usb *desc = (rx_desc_819x_usb *)skb->data;
rx_drvinfo_819x_usb *driver_info = NULL;
//
//Get Rx Descriptor Information
//
#ifdef USB_RX_AGGREGATION_SUPPORT
- if (bIsRxAggrSubframe)
- {
+ if (bIsRxAggrSubframe) {
rx_desc_819x_usb_aggr_subframe *desc = (rx_desc_819x_usb_aggr_subframe *)skb->data;
- stats->Length = desc->Length ;
+ stats->Length = desc->Length;
stats->RxDrvInfoSize = desc->RxDrvInfoSize;
stats->RxBufShift = 0; //RxBufShift = 2 in RxDesc, but usb didn't shift bytes in fact.
stats->bICV = desc->ICV;
@@ -5256,7 +4641,7 @@ void query_rxdesc_status(struct sk_buff *skb, struct ieee80211_rx_stats *stats,
stats->Length = desc->Length;
stats->RxDrvInfoSize = desc->RxDrvInfoSize;
- stats->RxBufShift = 0;//desc->Shift&0x03;
+ stats->RxBufShift = 0;
stats->bICV = desc->ICV;
stats->bCRC = desc->CRC32;
stats->bHwError = stats->bCRC|stats->bICV;
@@ -5264,16 +4649,12 @@ void query_rxdesc_status(struct sk_buff *skb, struct ieee80211_rx_stats *stats,
stats->Decrypted = !desc->SWDec;
}
- if((priv->ieee80211->pHTInfo->bCurrentHTSupport == true) && (priv->ieee80211->pairwise_key_type == KEY_TYPE_CCMP))
- {
+ if ((priv->ieee80211->pHTInfo->bCurrentHTSupport == true) && (priv->ieee80211->pairwise_key_type == KEY_TYPE_CCMP))
stats->bHwError = false;
- }
else
- {
stats->bHwError = stats->bCRC|stats->bICV;
- }
- if(stats->Length < 24 || stats->Length > MAX_8192U_RX_SIZE)
+ if (stats->Length < 24 || stats->Length > MAX_8192U_RX_SIZE)
stats->bHwError |= 1;
//
//Get Driver Info
@@ -5281,71 +4662,66 @@ void query_rxdesc_status(struct sk_buff *skb, struct ieee80211_rx_stats *stats,
// TODO: Need to verify it on FGPA platform
//Driver info are written to the RxBuffer following rx desc
if (stats->RxDrvInfoSize != 0) {
- driver_info = (rx_drvinfo_819x_usb *)(skb->data + sizeof(rx_desc_819x_usb) + \
- stats->RxBufShift);
+ driver_info = (rx_drvinfo_819x_usb *)(skb->data + sizeof(rx_desc_819x_usb) +
+ stats->RxBufShift);
/* unit: 0.5M */
/* TODO */
- if(!stats->bHwError){
+ if (!stats->bHwError) {
u8 ret_rate;
ret_rate = HwRateToMRate90(driver_info->RxHT, driver_info->RxRate);
- if(ret_rate == 0xff)
- {
+ if (ret_rate == 0xff) {
// Abnormal Case: Receive CRC OK packet with Rx descriptor indicating non supported rate.
// Special Error Handling here, 2008.05.16, by Emily
stats->bHwError = 1;
stats->rate = MGN_1M; //Set 1M rate by default
- }else
- {
+ } else {
stats->rate = ret_rate;
}
- }
- else
+ } else {
stats->rate = 0x02;
+ }
stats->bShortPreamble = driver_info->SPLCP;
UpdateReceivedRateHistogramStatistics8190(dev, stats);
- stats->bIsAMPDU = (driver_info->PartAggr==1);
- stats->bFirstMPDU = (driver_info->PartAggr==1) && (driver_info->FirstAGGR==1);
+ stats->bIsAMPDU = (driver_info->PartAggr == 1);
+ stats->bFirstMPDU = (driver_info->PartAggr == 1) && (driver_info->FirstAGGR == 1);
stats->TimeStampLow = driver_info->TSFL;
// xiong mask it, 070514
- //pRfd->Status.TimeStampHigh = PlatformEFIORead4Byte(Adapter, TSFR+4);
- // stats->TimeStampHigh = read_nic_dword(dev, TSFR+4);
UpdateRxPktTimeStamp8190(dev, stats);
//
// Rx A-MPDU
//
- if(driver_info->FirstAGGR==1 || driver_info->PartAggr == 1)
+ if (driver_info->FirstAGGR == 1 || driver_info->PartAggr == 1)
RT_TRACE(COMP_RXDESC, "driver_info->FirstAGGR = %d, driver_info->PartAggr = %d\n",
- driver_info->FirstAGGR, driver_info->PartAggr);
+ driver_info->FirstAGGR, driver_info->PartAggr);
}
- skb_pull(skb,sizeof(rx_desc_819x_usb));
+ skb_pull(skb, sizeof(rx_desc_819x_usb));
//
// Get Total offset of MPDU Frame Body
//
- if((stats->RxBufShift + stats->RxDrvInfoSize) > 0) {
+ if ((stats->RxBufShift + stats->RxDrvInfoSize) > 0) {
stats->bShift = 1;
- skb_pull(skb,stats->RxBufShift + stats->RxDrvInfoSize);
+ skb_pull(skb, stats->RxBufShift + stats->RxDrvInfoSize);
}
#ifdef USB_RX_AGGREGATION_SUPPORT
/* for the rx aggregated sub frame, the redundant space truly contained in the packet */
- if(bIsRxAggrSubframe) {
+ if (bIsRxAggrSubframe)
skb_pull(skb, 8);
- }
#endif
/* for debug 2008.5.29 */
//added by vivi, for MP, 20080108
stats->RxIs40MHzPacket = driver_info->BW;
- if(stats->RxDrvInfoSize != 0)
+ if (stats->RxDrvInfoSize != 0)
TranslateRxSignalStuff819xUsb(skb, stats, driver_info);
}
@@ -5359,19 +4735,18 @@ u32 GetRxPacketShiftBytes819xUsb(struct ieee80211_rx_stats *Status, bool bIsRxA
else
#endif
return (sizeof(rx_desc_819x_usb) + Status->RxDrvInfoSize
- + Status->RxBufShift);
+ + Status->RxBufShift);
}
-void rtl8192_rx_nomal(struct sk_buff* skb)
+void rtl8192_rx_nomal(struct sk_buff *skb)
{
rtl8192_rx_info *info = (struct rtl8192_rx_info *)skb->cb;
- struct net_device *dev=info->dev;
+ struct net_device *dev = info->dev;
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
struct ieee80211_rx_stats stats = {
.signal = 0,
.noise = -98,
.rate = 0,
- // .mac_time = jiffies,
.freq = IEEE80211_24GHZ_BAND,
};
u32 rx_pkt_len = 0;
@@ -5393,7 +4768,7 @@ void rtl8192_rx_nomal(struct sk_buff* skb)
#endif
/* 20 is for ps-poll */
- if((skb->len >=(20 + sizeof(rx_desc_819x_usb))) && (skb->len < RX_URB_SIZE)) {
+ if ((skb->len >= (20 + sizeof(rx_desc_819x_usb))) && (skb->len < RX_URB_SIZE)) {
#ifdef USB_RX_AGGREGATION_SUPPORT
TempByte = *(skb->data + sizeof(rx_desc_819x_usb));
#endif
@@ -5404,14 +4779,12 @@ void rtl8192_rx_nomal(struct sk_buff* skb)
#ifdef USB_RX_AGGREGATION_SUPPORT
if (TempByte & BIT0) {
agg_skb = skb;
- //TotalLength = agg_skb->len - 4; /*sCrcLng*/
TotalLength = stats.Length - 4; /*sCrcLng*/
- //RT_TRACE(COMP_RECV, "%s:first aggregated packet!Length=%d\n",__FUNCTION__,TotalLength);
/* though the head pointer has passed this position */
TempDWord = *(u32 *)(agg_skb->data - 4);
PacketLength = (u16)(TempDWord & 0x3FFF); /*sCrcLng*/
skb = dev_alloc_skb(PacketLength);
- memcpy(skb_put(skb,PacketLength),agg_skb->data,PacketLength);
+ memcpy(skb_put(skb, PacketLength), agg_skb->data, PacketLength);
PacketShiftBytes = GetRxPacketShiftBytes819xUsb(&stats, false);
}
#endif
@@ -5421,26 +4794,24 @@ void rtl8192_rx_nomal(struct sk_buff* skb)
rx_pkt_len = skb->len;
ieee80211_hdr = (struct ieee80211_hdr_1addr *)skb->data;
unicast_packet = false;
- if(is_broadcast_ether_addr(ieee80211_hdr->addr1)) {
+ if (is_broadcast_ether_addr(ieee80211_hdr->addr1)) {
//TODO
- }else if(is_multicast_ether_addr(ieee80211_hdr->addr1)){
+ } else if (is_multicast_ether_addr(ieee80211_hdr->addr1)) {
//TODO
- }else {
+ } else {
/* unicast packet */
unicast_packet = true;
}
- if(!ieee80211_rx(priv->ieee80211,skb, &stats)) {
+ if (!ieee80211_rx(priv->ieee80211, skb, &stats)) {
dev_kfree_skb_any(skb);
} else {
priv->stats.rxoktotal++;
- if(unicast_packet) {
+ if (unicast_packet)
priv->stats.rxbytesunicast += rx_pkt_len;
- }
}
#ifdef USB_RX_AGGREGATION_SUPPORT
testing = 1;
- // (PipeIndex == 0) && (TempByte & BIT0) => TotalLength > 0.
if (TotalLength > 0) {
PacketOccupiedLendth = PacketLength + (PacketShiftBytes + 8);
if ((PacketOccupiedLendth & 0xFF) != 0)
@@ -5452,9 +4823,8 @@ void rtl8192_rx_nomal(struct sk_buff* skb)
else
agg_skb->len = 0;
- while (agg_skb->len>=GetRxPacketShiftBytes819xUsb(&stats, true)) {
+ while (agg_skb->len >= GetRxPacketShiftBytes819xUsb(&stats, true)) {
u8 tmpCRC = 0, tmpICV = 0;
- //RT_TRACE(COMP_RECV,"%s:aggred pkt,total_len = %d\n",__FUNCTION__,agg_skb->len);
RxDescr = (rx_desc_819x_usb_aggr_subframe *)(agg_skb->data);
tmpCRC = RxDescr->CRC32;
tmpICV = RxDescr->ICV;
@@ -5470,32 +4840,30 @@ void rtl8192_rx_nomal(struct sk_buff* skb)
query_rxdesc_status(agg_skb, &stats, true);
PacketLength = stats.Length;
- if(PacketLength > agg_skb->len) {
+ if (PacketLength > agg_skb->len)
break;
- }
/* Process the MPDU received */
skb = dev_alloc_skb(PacketLength);
- memcpy(skb_put(skb,PacketLength),agg_skb->data, PacketLength);
+ memcpy(skb_put(skb, PacketLength), agg_skb->data, PacketLength);
skb_trim(skb, skb->len - 4/*sCrcLng*/);
rx_pkt_len = skb->len;
ieee80211_hdr = (struct ieee80211_hdr_1addr *)skb->data;
unicast_packet = false;
- if(is_broadcast_ether_addr(ieee80211_hdr->addr1)) {
+ if (is_broadcast_ether_addr(ieee80211_hdr->addr1)) {
//TODO
- }else if(is_multicast_ether_addr(ieee80211_hdr->addr1)){
+ } else if (is_multicast_ether_addr(ieee80211_hdr->addr1)) {
//TODO
- }else {
+ } else {
/* unicast packet */
unicast_packet = true;
}
- if(!ieee80211_rx(priv->ieee80211,skb, &stats)) {
+ if (!ieee80211_rx(priv->ieee80211, skb, &stats)) {
dev_kfree_skb_any(skb);
} else {
priv->stats.rxoktotal++;
- if(unicast_packet) {
+ if (unicast_packet)
priv->stats.rxbytesunicast += rx_pkt_len;
- }
}
/* should trim the packet which has been copied to target skb */
skb_pull(agg_skb, PacketLength);
@@ -5514,26 +4882,18 @@ void rtl8192_rx_nomal(struct sk_buff* skb)
#endif
} else {
priv->stats.rxurberr++;
- printk("actual_length:%d\n", skb->len);
+ netdev_dbg(dev, "actual_length: %d\n", skb->len);
dev_kfree_skb_any(skb);
}
}
-void
-rtl819xusb_process_received_packet(
- struct net_device *dev,
- struct ieee80211_rx_stats *pstats
- )
+void rtl819xusb_process_received_packet(struct net_device *dev,
+ struct ieee80211_rx_stats *pstats)
{
-// bool bfreerfd=false, bqueued=false;
- u8* frame;
- u16 frame_len=0;
+ u8 *frame;
+ u16 frame_len = 0;
struct r8192_priv *priv = ieee80211_priv(dev);
-// u8 index = 0;
-// u8 TID = 0;
- //u16 seqnum = 0;
- //PRX_TS_RECORD pts = NULL;
// Get shifted bytes of Starting address of 802.11 header. 2006.09.28, by Emily
//porting by amy 080508
@@ -5541,33 +4901,27 @@ rtl819xusb_process_received_packet(
frame = pstats->virtual_address;
frame_len = pstats->packetlength;
#ifdef TODO // by amy about HCT
- if(!Adapter->bInHctTest)
+ if (!Adapter->bInHctTest)
CountRxErrStatistics(Adapter, pRfd);
#endif
- {
- #ifdef ENABLE_PS //by amy for adding ps function in future
- RT_RF_POWER_STATE rtState;
- // When RF is off, we should not count the packet for hw/sw synchronize
- // reason, ie. there may be a duration while sw switch is changed and hw
- // switch is being changed. 2006.12.04, by shien chang.
- Adapter->HalFunc.GetHwRegHandler(Adapter, HW_VAR_RF_STATE, (u8* )(&rtState));
- if (rtState == eRfOff)
- {
- return;
- }
- #endif
+#ifdef ENABLE_PS //by amy for adding ps function in future
+ RT_RF_POWER_STATE rtState;
+ // When RF is off, we should not count the packet for hw/sw synchronize
+ // reason, ie. there may be a duration while sw switch is changed and hw
+ // switch is being changed. 2006.12.04, by shien chang.
+ Adapter->HalFunc.GetHwRegHandler(Adapter, HW_VAR_RF_STATE, (u8 *)(&rtState));
+ if (rtState == eRfOff)
+ return;
+#endif
priv->stats.rxframgment++;
- }
#ifdef TODO
RmMonitorSignalStrength(Adapter, pRfd);
#endif
/* 2007/01/16 MH Add RX command packet handle here. */
/* 2007/03/01 MH We have to release RFD and return if rx pkt is cmd pkt. */
if (rtl819xusb_rx_command_packet(dev, pstats))
- {
return;
- }
#ifdef SW_CRC_CHECK
SwCrcCheck();
@@ -5578,16 +4932,12 @@ rtl819xusb_process_received_packet(
void query_rx_cmdpkt_desc_status(struct sk_buff *skb, struct ieee80211_rx_stats *stats)
{
-// rtl8192_rx_info *info = (struct rtl8192_rx_info *)skb->cb;
-// struct net_device *dev=info->dev;
-// struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
rx_desc_819x_usb *desc = (rx_desc_819x_usb *)skb->data;
-// rx_drvinfo_819x_usb *driver_info;
//
//Get Rx Descriptor Information
//
- stats->virtual_address = (u8*)skb->data;
+ stats->virtual_address = (u8 *)skb->data;
stats->Length = desc->Length;
stats->RxDrvInfoSize = 0;
stats->RxBufShift = 0;
@@ -5602,21 +4952,17 @@ void rtl8192_rx_cmd(struct sk_buff *skb)
{
struct rtl8192_rx_info *info = (struct rtl8192_rx_info *)skb->cb;
struct net_device *dev = info->dev;
- //int ret;
-// struct urb *rx_urb = info->urb;
/* TODO */
struct ieee80211_rx_stats stats = {
.signal = 0,
.noise = -98,
.rate = 0,
- // .mac_time = jiffies,
.freq = IEEE80211_24GHZ_BAND,
};
- if((skb->len >=(20 + sizeof(rx_desc_819x_usb))) && (skb->len < RX_URB_SIZE))
- {
+ if ((skb->len >= (20 + sizeof(rx_desc_819x_usb))) && (skb->len < RX_URB_SIZE)) {
- query_rx_cmdpkt_desc_status(skb,&stats);
+ query_rx_cmdpkt_desc_status(skb, &stats);
// this is to be done by amy 080508 prfd->queue_id = 1;
@@ -5624,7 +4970,7 @@ void rtl8192_rx_cmd(struct sk_buff *skb)
// Process the command packet received.
//
- rtl819xusb_process_received_packet(dev,&stats);
+ rtl819xusb_process_received_packet(dev, &stats);
dev_kfree_skb_any(skb);
}
@@ -5640,22 +4986,21 @@ void rtl8192_irq_rx_tasklet(struct r8192_priv *priv)
switch (info->out_pipe) {
/* Nomal packet pipe */
case 3:
- //RT_TRACE(COMP_RECV, "normal in-pipe index(%d)\n",info->out_pipe);
priv->IrpPendingCount--;
rtl8192_rx_nomal(skb);
break;
- /* Command packet pipe */
+ /* Command packet pipe */
case 9:
- RT_TRACE(COMP_RECV, "command in-pipe index(%d)\n",\
- info->out_pipe);
+ RT_TRACE(COMP_RECV, "command in-pipe index(%d)\n",
+ info->out_pipe);
rtl8192_rx_cmd(skb);
break;
default: /* should never get here! */
- RT_TRACE(COMP_ERR, "Unknown in-pipe index(%d)\n",\
- info->out_pipe);
+ RT_TRACE(COMP_ERR, "Unknown in-pipe index(%d)\n",
+ info->out_pipe);
dev_kfree_skb(skb);
break;
@@ -5682,11 +5027,10 @@ static const struct net_device_ops rtl8192_netdev_ops = {
*****************************************************************************/
static int rtl8192_usb_probe(struct usb_interface *intf,
- const struct usb_device_id *id)
+ const struct usb_device_id *id)
{
-// unsigned long ioaddr = 0;
struct net_device *dev = NULL;
- struct r8192_priv *priv= NULL;
+ struct r8192_priv *priv = NULL;
struct usb_device *udev = interface_to_usbdev(intf);
int ret;
RT_TRACE(COMP_INIT, "Oops: i'm coming\n");
@@ -5699,29 +5043,28 @@ static int rtl8192_usb_probe(struct usb_interface *intf,
SET_NETDEV_DEV(dev, &intf->dev);
priv = ieee80211_priv(dev);
priv->ieee80211 = netdev_priv(dev);
- priv->udev=udev;
+ priv->udev = udev;
dev->netdev_ops = &rtl8192_netdev_ops;
- //DMESG("Oops: i'm coming\n");
#if WIRELESS_EXT >= 12
#if WIRELESS_EXT < 17
dev->get_wireless_stats = r8192_get_wireless_stats;
#endif
dev->wireless_handlers = (struct iw_handler_def *) &r8192_wx_handlers_def;
#endif
- dev->type=ARPHRD_ETHER;
+ dev->type = ARPHRD_ETHER;
dev->watchdog_timeo = HZ*3; //modified by john, 0805
- if (dev_alloc_name(dev, ifname) < 0){
+ if (dev_alloc_name(dev, ifname) < 0) {
RT_TRACE(COMP_INIT, "Oops: devname already taken! Trying wlan%%d...\n");
ifname = "wlan%d";
dev_alloc_name(dev, ifname);
}
RT_TRACE(COMP_INIT, "Driver probe completed1\n");
- if(rtl8192_init(dev)!=0){
+ if (rtl8192_init(dev) != 0) {
RT_TRACE(COMP_ERR, "Initialization failed");
ret = -ENODEV;
goto fail;
@@ -5733,7 +5076,7 @@ static int rtl8192_usb_probe(struct usb_interface *intf,
if (ret)
goto fail2;
- RT_TRACE(COMP_INIT, "dev name=======> %s\n",dev->name);
+ RT_TRACE(COMP_INIT, "dev name=======> %s\n", dev->name);
rtl8192_proc_init_one(dev);
@@ -5755,16 +5098,13 @@ fail:
}
//detach all the work and timer structure declared or inititialize in r8192U_init function.
-void rtl8192_cancel_deferred_work(struct r8192_priv* priv)
+void rtl8192_cancel_deferred_work(struct r8192_priv *priv)
{
cancel_work_sync(&priv->reset_wq);
cancel_delayed_work(&priv->watch_dog_wq);
cancel_delayed_work(&priv->update_beacon_wq);
cancel_work_sync(&priv->qos_activate);
- //cancel_work_sync(&priv->SetBWModeWorkItem);
- //cancel_work_sync(&priv->SwChnlWorkItem);
-
}
@@ -5773,22 +5113,18 @@ static void rtl8192_usb_disconnect(struct usb_interface *intf)
struct net_device *dev = usb_get_intfdata(intf);
struct r8192_priv *priv = ieee80211_priv(dev);
- if(dev){
+ if (dev) {
unregister_netdev(dev);
RT_TRACE(COMP_DOWN, "=============>wlan driver to be removed\n");
rtl8192_proc_remove_one(dev);
- rtl8192_down(dev);
+ rtl8192_down(dev);
kfree(priv->pFirmware);
priv->pFirmware = NULL;
- // priv->rf_close(dev);
-// rtl8192_SetRFPowerState(dev, eRfOff);
rtl8192_usb_deleteendpoints(dev);
destroy_workqueue(priv->priv_wq);
- //rtl8192_irq_disable(dev);
- //rtl8192_reset(dev);
mdelay(10);
}
@@ -5815,38 +5151,36 @@ static int __init rtl8192_usb_module_init(void)
#ifdef CONFIG_IEEE80211_DEBUG
ret = ieee80211_debug_init();
if (ret) {
- printk(KERN_ERR "ieee80211_debug_init() failed %d\n", ret);
+ pr_err("ieee80211_debug_init() failed %d\n", ret);
return ret;
}
#endif
ret = ieee80211_crypto_init();
if (ret) {
- printk(KERN_ERR "ieee80211_crypto_init() failed %d\n", ret);
+ pr_err("ieee80211_crypto_init() failed %d\n", ret);
return ret;
}
ret = ieee80211_crypto_tkip_init();
if (ret) {
- printk(KERN_ERR "ieee80211_crypto_tkip_init() failed %d\n",
- ret);
+ pr_err("ieee80211_crypto_tkip_init() failed %d\n", ret);
return ret;
}
ret = ieee80211_crypto_ccmp_init();
if (ret) {
- printk(KERN_ERR "ieee80211_crypto_ccmp_init() failed %d\n",
- ret);
+ pr_err("ieee80211_crypto_ccmp_init() failed %d\n", ret);
return ret;
}
ret = ieee80211_crypto_wep_init();
if (ret) {
- printk(KERN_ERR "ieee80211_crypto_wep_init() failed %d\n", ret);
+ pr_err("ieee80211_crypto_wep_init() failed %d\n", ret);
return ret;
}
- printk(KERN_INFO "\nLinux kernel driver for RTL8192 based WLAN cards\n");
- printk(KERN_INFO "Copyright (c) 2007-2008, Realsil Wlan\n");
+ pr_info("\nLinux kernel driver for RTL8192 based WLAN cards\n");
+ pr_info("Copyright (c) 2007-2008, Realsil Wlan\n");
RT_TRACE(COMP_INIT, "Initializing module");
RT_TRACE(COMP_INIT, "Wireless extensions version %d", WIRELESS_EXT);
rtl8192_proc_module_init();
@@ -5859,7 +5193,6 @@ static void __exit rtl8192_usb_module_exit(void)
usb_deregister(&rtl8192_usb_driver);
RT_TRACE(COMP_DOWN, "Exiting");
-// rtl8192_proc_module_remove();
}
@@ -5869,11 +5202,11 @@ void rtl8192_try_wake_queue(struct net_device *dev, int pri)
short enough_desc;
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
- spin_lock_irqsave(&priv->tx_lock,flags);
- enough_desc = check_nic_enough_desc(dev,pri);
- spin_unlock_irqrestore(&priv->tx_lock,flags);
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ enough_desc = check_nic_enough_desc(dev, pri);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
- if(enough_desc)
+ if (enough_desc)
ieee80211_wake_queue(priv->ieee80211);
}
@@ -5881,43 +5214,32 @@ void EnableHWSecurityConfig8192(struct net_device *dev)
{
u8 SECR_value = 0x0;
struct r8192_priv *priv = (struct r8192_priv *)ieee80211_priv(dev);
- struct ieee80211_device* ieee = priv->ieee80211;
+ struct ieee80211_device *ieee = priv->ieee80211;
SECR_value = SCR_TxEncEnable | SCR_RxDecEnable;
- if (((KEY_TYPE_WEP40 == ieee->pairwise_key_type) || (KEY_TYPE_WEP104 == ieee->pairwise_key_type)) && (priv->ieee80211->auth_mode != 2))
- {
+ if (((KEY_TYPE_WEP40 == ieee->pairwise_key_type) || (KEY_TYPE_WEP104 == ieee->pairwise_key_type)) && (priv->ieee80211->auth_mode != 2)) {
SECR_value |= SCR_RxUseDK;
SECR_value |= SCR_TxUseDK;
- }
- else if ((ieee->iw_mode == IW_MODE_ADHOC) && (ieee->pairwise_key_type & (KEY_TYPE_CCMP | KEY_TYPE_TKIP)))
- {
+ } else if ((ieee->iw_mode == IW_MODE_ADHOC) && (ieee->pairwise_key_type & (KEY_TYPE_CCMP | KEY_TYPE_TKIP))) {
SECR_value |= SCR_RxUseDK;
SECR_value |= SCR_TxUseDK;
}
//add HWSec active enable here.
-//default using hwsec. when peer AP is in N mode only and pairwise_key_type is none_aes(which HT_IOT_ACT_PURE_N_MODE indicates it), use software security. when peer AP is in b,g,n mode mixed and pairwise_key_type is none_aes, use g mode hw security. WB on 2008.7.4
+ //default using hwsec. when peer AP is in N mode only and pairwise_key_type is none_aes(which HT_IOT_ACT_PURE_N_MODE indicates it), use software security. when peer AP is in b,g,n mode mixed and pairwise_key_type is none_aes, use g mode hw security. WB on 2008.7.4
ieee->hwsec_active = 1;
- if ((ieee->pHTInfo->IOTAction&HT_IOT_ACT_PURE_N_MODE) || !hwwep)//!ieee->hwsec_support) //add hwsec_support flag to totol control hw_sec on/off
- {
+ if ((ieee->pHTInfo->IOTAction&HT_IOT_ACT_PURE_N_MODE) || !hwwep) { //add hwsec_support flag to totol control hw_sec on/off
ieee->hwsec_active = 0;
SECR_value &= ~SCR_RxDecEnable;
}
- RT_TRACE(COMP_SEC,"%s:, hwsec:%d, pairwise_key:%d, SECR_value:%x\n", __FUNCTION__, \
- ieee->hwsec_active, ieee->pairwise_key_type, SECR_value);
- {
- write_nic_byte(dev, SECR, SECR_value);//SECR_value | SCR_UseDK );
- }
+ RT_TRACE(COMP_SEC, "%s:, hwsec:%d, pairwise_key:%d, SECR_value:%x\n", __func__,
+ ieee->hwsec_active, ieee->pairwise_key_type, SECR_value);
+ write_nic_byte(dev, SECR, SECR_value);
}
-void setKey( struct net_device *dev,
- u8 EntryNo,
- u8 KeyIndex,
- u16 KeyType,
- u8 *MacAddr,
- u8 DefaultKey,
- u32 *KeyContent )
+void setKey(struct net_device *dev, u8 EntryNo, u8 KeyIndex, u16 KeyType,
+ u8 *MacAddr, u8 DefaultKey, u32 *KeyContent)
{
u32 TargetCommand = 0;
u32 TargetContent = 0;
@@ -5926,44 +5248,40 @@ void setKey( struct net_device *dev,
if (EntryNo >= TOTAL_CAM_ENTRY)
RT_TRACE(COMP_ERR, "cam entry exceeds in setKey()\n");
- RT_TRACE(COMP_SEC, "====>to setKey(), dev:%p, EntryNo:%d, KeyIndex:%d, KeyType:%d, MacAddr%pM\n", dev,EntryNo, KeyIndex, KeyType, MacAddr);
+ RT_TRACE(COMP_SEC, "====>to setKey(), dev:%p, EntryNo:%d, KeyIndex:%d, KeyType:%d, MacAddr%pM\n", dev, EntryNo, KeyIndex, KeyType, MacAddr);
if (DefaultKey)
usConfig |= BIT15 | (KeyType<<2);
else
usConfig |= BIT15 | (KeyType<<2) | KeyIndex;
-// usConfig |= BIT15 | (KeyType<<2) | (DefaultKey<<5) | KeyIndex;
- for(i=0 ; i<CAM_CONTENT_COUNT; i++){
+ for (i = 0; i < CAM_CONTENT_COUNT; i++) {
TargetCommand = i+CAM_CONTENT_COUNT*EntryNo;
TargetCommand |= BIT31|BIT16;
- if(i==0){//MAC|Config
+ if (i == 0) { //MAC|Config
TargetContent = (u32)(*(MacAddr+0)) << 16|
(u32)(*(MacAddr+1)) << 24|
(u32)usConfig;
write_nic_dword(dev, WCAMI, TargetContent);
write_nic_dword(dev, RWCAM, TargetCommand);
- // printk("setkey cam =%8x\n", read_cam(dev, i+6*EntryNo));
- }
- else if(i==1){//MAC
+ } else if (i == 1) { //MAC
TargetContent = (u32)(*(MacAddr+2)) |
(u32)(*(MacAddr+3)) << 8|
(u32)(*(MacAddr+4)) << 16|
(u32)(*(MacAddr+5)) << 24;
write_nic_dword(dev, WCAMI, TargetContent);
write_nic_dword(dev, RWCAM, TargetCommand);
- }
- else {
+ } else {
//Key Material
- if(KeyContent !=NULL){
- write_nic_dword(dev, WCAMI, (u32)(*(KeyContent+i-2)) );
- write_nic_dword(dev, RWCAM, TargetCommand);
+ if (KeyContent != NULL) {
+ write_nic_dword(dev, WCAMI, (u32)(*(KeyContent+i-2)));
+ write_nic_dword(dev, RWCAM, TargetCommand);
+ }
}
}
- }
}
diff --git a/drivers/staging/rtl8192u/r8192U_dm.c b/drivers/staging/rtl8192u/r8192U_dm.c
index ea46717f1fad..a6e4c37d9c78 100644
--- a/drivers/staging/rtl8192u/r8192U_dm.c
+++ b/drivers/staging/rtl8192u/r8192U_dm.c
@@ -88,7 +88,7 @@ static void dm_check_rate_adaptive(struct net_device *dev);
// DM --> Bandwidth switch
static void dm_init_bandwidth_autoswitch(struct net_device *dev);
-static void dm_bandwidth_autoswitch( struct net_device *dev);
+static void dm_bandwidth_autoswitch(struct net_device *dev);
// DM --> TX power control
//static void dm_initialize_txpower_tracking(struct net_device *dev);
@@ -112,7 +112,7 @@ static void dm_bb_initialgain_backup(struct net_device *dev);
static void dm_dig_init(struct net_device *dev);
static void dm_ctrl_initgain_byrssi(struct net_device *dev);
static void dm_ctrl_initgain_byrssi_highpwr(struct net_device *dev);
-static void dm_ctrl_initgain_byrssi_by_driverrssi( struct net_device *dev);
+static void dm_ctrl_initgain_byrssi_by_driverrssi(struct net_device *dev);
static void dm_ctrl_initgain_byrssi_by_fwfalse_alarm(struct net_device *dev);
static void dm_initial_gain(struct net_device *dev);
static void dm_pd_th(struct net_device *dev);
@@ -289,7 +289,7 @@ extern void hal_dm_watchdog(struct net_device *dev)
* 01/16/2008 MHC RF_Type is assigned in ReadAdapterInfo(). We must call
* the function after making sure RF_Type.
*/
-extern void init_rate_adaptive(struct net_device * dev)
+extern void init_rate_adaptive(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
@@ -351,7 +351,7 @@ extern void init_rate_adaptive(struct net_device * dev)
* 05/26/08 amy Create version 0 porting from windows code.
*
*---------------------------------------------------------------------------*/
-static void dm_check_rate_adaptive(struct net_device * dev)
+static void dm_check_rate_adaptive(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
PRT_HIGH_THROUGHPUT pHTInfo = priv->ieee80211->pHTInfo;
@@ -372,11 +372,11 @@ static void dm_check_rate_adaptive(struct net_device * dev)
return;
// TODO: Only 11n mode is implemented currently,
- if( !(priv->ieee80211->mode == WIRELESS_MODE_N_24G ||
+ if(!(priv->ieee80211->mode == WIRELESS_MODE_N_24G ||
priv->ieee80211->mode == WIRELESS_MODE_N_5G))
return;
- if( priv->ieee80211->state == IEEE80211_LINKED )
+ if(priv->ieee80211->state == IEEE80211_LINKED)
{
// RT_TRACE(COMP_RATE, "dm_CheckRateAdaptive(): \t");
@@ -454,8 +454,8 @@ static void dm_check_rate_adaptive(struct net_device * dev)
//pHalData->UndecoratedSmoothedPWDB = 19;
if(priv->undecorated_smoothed_pwdb < (long)(pra->ping_rssi_thresh_for_ra+5))
{
- if( (priv->undecorated_smoothed_pwdb < (long)pra->ping_rssi_thresh_for_ra) ||
- ping_rssi_state )
+ if((priv->undecorated_smoothed_pwdb < (long)pra->ping_rssi_thresh_for_ra) ||
+ ping_rssi_state)
{
//DbgPrint("TestRSSI = %d, set RATR to 0x%x \n", pHalData->UndecoratedSmoothedPWDB, pRA->TestRSSIRATR);
pra->ratr_state = DM_RATR_STA_LOW;
@@ -480,8 +480,8 @@ static void dm_check_rate_adaptive(struct net_device * dev)
//
// Check whether updating of RATR0 is required
//
- currentRATR = read_nic_dword(dev, RATR0);
- if( targetRATR != currentRATR )
+ read_nic_dword(dev, RATR0, &currentRATR);
+ if(targetRATR != currentRATR)
{
u32 ratr_value;
ratr_value = targetRATR;
@@ -505,7 +505,7 @@ static void dm_check_rate_adaptive(struct net_device * dev)
} // dm_CheckRateAdaptive
-static void dm_init_bandwidth_autoswitch(struct net_device * dev)
+static void dm_init_bandwidth_autoswitch(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
@@ -517,7 +517,7 @@ static void dm_init_bandwidth_autoswitch(struct net_device * dev)
} // dm_init_bandwidth_autoswitch
-static void dm_bandwidth_autoswitch(struct net_device * dev)
+static void dm_bandwidth_autoswitch(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
@@ -588,7 +588,7 @@ static u8 CCKSwingTable_Ch14[CCK_Table_length][8] = {
{0x0f, 0x0f, 0x0d, 0x08, 0x00, 0x00, 0x00, 0x00} // 11, -11db
};
-static void dm_TXPowerTrackingCallback_TSSI(struct net_device * dev)
+static void dm_TXPowerTrackingCallback_TSSI(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
bool bHighpowerstate, viviflag = FALSE;
@@ -627,14 +627,14 @@ static void dm_TXPowerTrackingCallback_TSSI(struct net_device * dev)
RT_TRACE(COMP_POWER_TRACKING, "Set configuration with tx cmd queue fail!\n");
}
#else
- cmpk_message_handle_tx(dev, (u8*)&tx_cmd,
+ cmpk_message_handle_tx(dev, (u8 *)&tx_cmd,
DESC_PACKET_TYPE_INIT, sizeof(DCMD_TXCMD_T));
#endif
mdelay(1);
//DbgPrint("hi, vivi, strange\n");
for(i = 0;i <= 30; i++)
{
- Pwr_Flag = read_nic_byte(dev, 0x1ba);
+ read_nic_byte(dev, 0x1ba, &Pwr_Flag);
if (Pwr_Flag == 0)
{
@@ -642,9 +642,9 @@ static void dm_TXPowerTrackingCallback_TSSI(struct net_device * dev)
continue;
}
#ifdef RTL8190P
- Avg_TSSI_Meas = read_nic_word(dev, 0x1bc);
+ read_nic_word(dev, 0x1bc, &Avg_TSSI_Meas);
#else
- Avg_TSSI_Meas = read_nic_word(dev, 0x13c);
+ read_nic_word(dev, 0x13c, &Avg_TSSI_Meas);
#endif
if(Avg_TSSI_Meas == 0)
{
@@ -655,12 +655,12 @@ static void dm_TXPowerTrackingCallback_TSSI(struct net_device * dev)
for(k = 0;k < 5; k++)
{
#ifdef RTL8190P
- tmp_report[k] = read_nic_byte(dev, 0x1d8+k);
+ read_nic_byte(dev, 0x1d8+k, &tmp_report[k]);
#else
if(k !=4)
- tmp_report[k] = read_nic_byte(dev, 0x134+k);
+ read_nic_byte(dev, 0x134+k, &tmp_report[k]);
else
- tmp_report[k] = read_nic_byte(dev, 0x13e);
+ read_nic_byte(dev, 0x13e, &tmp_report[k]);
#endif
RT_TRACE(COMP_POWER_TRACKING, "TSSI_report_value = %d\n", tmp_report[k]);
}
@@ -816,7 +816,7 @@ static void dm_TXPowerTrackingCallback_TSSI(struct net_device * dev)
write_nic_byte(dev, 0x1ba, 0);
}
-static void dm_TXPowerTrackingCallback_ThermalMeter(struct net_device * dev)
+static void dm_TXPowerTrackingCallback_ThermalMeter(struct net_device *dev)
{
#define ThermalMeterVal 9
struct r8192_priv *priv = ieee80211_priv(dev);
@@ -1572,7 +1572,7 @@ static void dm_CCKTxPowerAdjust_TSSI(struct net_device *dev, bool bInCH14)
TempVal = 0;
TempVal = priv->cck_txbbgain_table[priv->cck_present_attentuation].ccktxbb_valuearray[2] +
(priv->cck_txbbgain_table[priv->cck_present_attentuation].ccktxbb_valuearray[3]<<8) +
- (priv->cck_txbbgain_table[priv->cck_present_attentuation].ccktxbb_valuearray[4]<<16 )+
+ (priv->cck_txbbgain_table[priv->cck_present_attentuation].ccktxbb_valuearray[4]<<16)+
(priv->cck_txbbgain_table[priv->cck_present_attentuation].ccktxbb_valuearray[5]<<24);
rtl8192_setBBreg(dev, rCCK0_TxFilter2,bMaskDWord, TempVal);
//Write 0xa28 0xa29
@@ -1592,7 +1592,7 @@ static void dm_CCKTxPowerAdjust_TSSI(struct net_device *dev, bool bInCH14)
TempVal = 0;
TempVal = priv->cck_txbbgain_ch14_table[priv->cck_present_attentuation].ccktxbb_valuearray[2] +
(priv->cck_txbbgain_ch14_table[priv->cck_present_attentuation].ccktxbb_valuearray[3]<<8) +
- (priv->cck_txbbgain_ch14_table[priv->cck_present_attentuation].ccktxbb_valuearray[4]<<16 )+
+ (priv->cck_txbbgain_ch14_table[priv->cck_present_attentuation].ccktxbb_valuearray[4]<<16)+
(priv->cck_txbbgain_ch14_table[priv->cck_present_attentuation].ccktxbb_valuearray[5]<<24);
rtl8192_setBBreg(dev, rCCK0_TxFilter2,bMaskDWord, TempVal);
//Write 0xa28 0xa29
@@ -1624,7 +1624,7 @@ static void dm_CCKTxPowerAdjust_ThermalMeter(struct net_device *dev, bool bInCH
TempVal = 0;
TempVal = CCKSwingTable_Ch1_Ch13[priv->CCK_index][2] +
(CCKSwingTable_Ch1_Ch13[priv->CCK_index][3]<<8) +
- (CCKSwingTable_Ch1_Ch13[priv->CCK_index][4]<<16 )+
+ (CCKSwingTable_Ch1_Ch13[priv->CCK_index][4]<<16)+
(CCKSwingTable_Ch1_Ch13[priv->CCK_index][5]<<24);
rtl8192_setBBreg(dev, rCCK0_TxFilter2, bMaskDWord, TempVal);
RT_TRACE(COMP_POWER_TRACKING, "CCK not chnl 14, reg 0x%x = 0x%x\n",
@@ -1652,7 +1652,7 @@ static void dm_CCKTxPowerAdjust_ThermalMeter(struct net_device *dev, bool bInCH
TempVal = 0;
TempVal = CCKSwingTable_Ch14[priv->CCK_index][2] +
(CCKSwingTable_Ch14[priv->CCK_index][3]<<8) +
- (CCKSwingTable_Ch14[priv->CCK_index][4]<<16 )+
+ (CCKSwingTable_Ch14[priv->CCK_index][4]<<16)+
(CCKSwingTable_Ch14[priv->CCK_index][5]<<24);
rtl8192_setBBreg(dev, rCCK0_TxFilter2, bMaskDWord, TempVal);
RT_TRACE(COMP_POWER_TRACKING, "CCK chnl 14, reg 0x%x = 0x%x\n",
@@ -1727,7 +1727,7 @@ extern void dm_restore_dynamic_mechanism_state(struct net_device *dev)
if(priv->rate_adaptive.rate_adaptive_disabled)
return;
// TODO: Only 11n mode is implemented currently,
- if( !(priv->ieee80211->mode==WIRELESS_MODE_N_24G ||
+ if(!(priv->ieee80211->mode==WIRELESS_MODE_N_24G ||
priv->ieee80211->mode==WIRELESS_MODE_N_5G))
return;
{
@@ -1736,7 +1736,7 @@ extern void dm_restore_dynamic_mechanism_state(struct net_device *dev)
ratr_value = reg_ratr;
if(priv->rf_type == RF_1T2R) // 1T2R, Spatial Stream 2 should be disabled
{
- ratr_value &=~ (RATE_ALL_OFDM_2SS);
+ ratr_value &= ~(RATE_ALL_OFDM_2SS);
//DbgPrint("HW_VAR_TATR_0 from 0x%x ==> 0x%x\n", ((pu4Byte)(val))[0], ratr_value);
}
//DbgPrint("set HW_VAR_TATR_0 = 0x%x\n", ratr_value);
@@ -2222,7 +2222,7 @@ static void dm_ctrl_initgain_byrssi_by_fwfalse_alarm(
/* 2. When RSSI increase, We have to judge if it is larger than a threshold
and then execute the step below. */
- if ((priv->undecorated_smoothed_pwdb >= dm_digtable.rssi_high_thresh) )
+ if ((priv->undecorated_smoothed_pwdb >= dm_digtable.rssi_high_thresh))
{
u8 reset_flag = 0;
@@ -2316,7 +2316,7 @@ static void dm_ctrl_initgain_byrssi_by_fwfalse_alarm(
*
*---------------------------------------------------------------------------*/
static void dm_ctrl_initgain_byrssi_highpwr(
- struct net_device * dev)
+ struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
static u32 reset_cnt_highpwr;
@@ -2391,12 +2391,13 @@ static void dm_ctrl_initgain_byrssi_highpwr(
static void dm_initial_gain(
- struct net_device * dev)
+ struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
u8 initial_gain=0;
static u8 initialized, force_write;
static u32 reset_cnt;
+ u8 tmp;
if(dm_digtable.dig_algorithm_switch)
{
@@ -2437,7 +2438,8 @@ static void dm_initial_gain(
reset_cnt = priv->reset_count;
}
- if(dm_digtable.pre_ig_value != read_nic_byte(dev, rOFDM0_XAAGCCore1))
+ read_nic_byte(dev, rOFDM0_XAAGCCore1, &tmp);
+ if (dm_digtable.pre_ig_value != tmp)
force_write = 1;
{
@@ -2459,7 +2461,7 @@ static void dm_initial_gain(
}
static void dm_pd_th(
- struct net_device * dev)
+ struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
static u8 initialized, force_write;
@@ -2571,7 +2573,7 @@ static void dm_pd_th(
}
static void dm_cs_ratio(
- struct net_device * dev)
+ struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
static u8 initialized,force_write;
@@ -2589,7 +2591,7 @@ static void dm_cs_ratio(
{
if ((dm_digtable.rssi_val <= dm_digtable.rssi_low_thresh))
dm_digtable.curcs_ratio_state = DIG_CS_RATIO_LOWER;
- else if ((dm_digtable.rssi_val >= dm_digtable.rssi_high_thresh) )
+ else if ((dm_digtable.rssi_val >= dm_digtable.rssi_high_thresh))
dm_digtable.curcs_ratio_state = DIG_CS_RATIO_HIGHER;
else
dm_digtable.curcs_ratio_state = dm_digtable.precs_ratio_state;
@@ -2634,7 +2636,7 @@ static void dm_cs_ratio(
}
}
-extern void dm_init_edca_turbo(struct net_device * dev)
+extern void dm_init_edca_turbo(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
@@ -2644,7 +2646,7 @@ extern void dm_init_edca_turbo(struct net_device * dev)
} // dm_init_edca_turbo
static void dm_check_edca_turbo(
- struct net_device * dev)
+ struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
PRT_HIGH_THROUGHPUT pHTInfo = priv->ieee80211->pHTInfo;
@@ -2727,8 +2729,9 @@ static void dm_check_edca_turbo(
// TODO: Modified this part and try to set acm control in only 1 IO processing!!
PACI_AIFSN pAciAifsn = (PACI_AIFSN)&(qos_parameters->aifs[0]);
- u8 AcmCtrl = read_nic_byte( dev, AcmHwCtrl );
- if( pAciAifsn->f.ACM )
+ u8 AcmCtrl;
+ read_nic_byte(dev, AcmHwCtrl, &AcmCtrl);
+ if(pAciAifsn->f.ACM)
{ // ACM bit is 1.
AcmCtrl |= AcmHw_BeqEn;
}
@@ -2737,8 +2740,8 @@ static void dm_check_edca_turbo(
AcmCtrl &= (~AcmHw_BeqEn);
}
- RT_TRACE( COMP_QOS,"SetHwReg8190pci(): [HW_VAR_ACM_CTRL] Write 0x%X\n", AcmCtrl ) ;
- write_nic_byte(dev, AcmHwCtrl, AcmCtrl );
+ RT_TRACE(COMP_QOS,"SetHwReg8190pci(): [HW_VAR_ACM_CTRL] Write 0x%X\n", AcmCtrl) ;
+ write_nic_byte(dev, AcmHwCtrl, AcmCtrl);
}
}
priv->bcurrent_turbo_EDCA = false;
@@ -2753,7 +2756,7 @@ dm_CheckEdcaTurbo_EXIT:
lastRxOkCnt = priv->stats.rxbytesunicast;
} // dm_CheckEdcaTurbo
-extern void DM_CTSToSelfSetting(struct net_device * dev,u32 DM_Type, u32 DM_Value)
+extern void DM_CTSToSelfSetting(struct net_device *dev,u32 DM_Type, u32 DM_Value)
{
struct r8192_priv *priv = ieee80211_priv((struct net_device *)dev);
@@ -2773,7 +2776,7 @@ extern void DM_CTSToSelfSetting(struct net_device * dev,u32 DM_Type, u32 DM_Valu
}
}
-static void dm_init_ctstoself(struct net_device * dev)
+static void dm_init_ctstoself(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv((struct net_device *)dev);
@@ -2837,7 +2840,7 @@ static void dm_ctstoself(struct net_device *dev)
* 05/28/2008 amy Create Version 0 porting from windows code.
*
*---------------------------------------------------------------------------*/
-static void dm_check_rfctrl_gpio(struct net_device * dev)
+static void dm_check_rfctrl_gpio(struct net_device *dev)
{
//struct r8192_priv *priv = ieee80211_priv(dev);
@@ -2881,7 +2884,7 @@ static void dm_check_pbc_gpio(struct net_device *dev)
u8 tmp1byte;
- tmp1byte = read_nic_byte(dev,GPI);
+ read_nic_byte(dev, GPI, &tmp1byte);
if(tmp1byte == 0xff)
return;
@@ -2933,18 +2936,18 @@ extern void dm_gpio_change_rf_callback(struct work_struct *work)
{
// 0x108 GPIO input register is read only
//set 0x108 B1= 1: RF-ON; 0: RF-OFF.
- tmp1byte = read_nic_byte(dev,GPI);
+ read_nic_byte(dev, GPI, &tmp1byte);
eRfPowerStateToSet = (tmp1byte&BIT1) ? eRfOn : eRfOff;
- if( (priv->bHwRadioOff == true) && (eRfPowerStateToSet == eRfOn))
+ if((priv->bHwRadioOff == true) && (eRfPowerStateToSet == eRfOn))
{
RT_TRACE(COMP_RF, "gpiochangeRF - HW Radio ON\n");
priv->bHwRadioOff = false;
bActuallySet = true;
}
- else if ( (priv->bHwRadioOff == false) && (eRfPowerStateToSet == eRfOff))
+ else if ((priv->bHwRadioOff == false) && (eRfPowerStateToSet == eRfOff))
{
RT_TRACE(COMP_RF, "gpiochangeRF - HW Radio OFF\n");
priv->bHwRadioOff = true;
@@ -2996,7 +2999,7 @@ extern void dm_rf_pathcheck_workitemcallback(struct work_struct *work)
/* 2008/01/30 MH After discussing with SD3 Jerry, 0xc04/0xd04 register will
always be the same. We only read 0xc04 now. */
- rfpath = read_nic_byte(dev, 0xc04);
+ read_nic_byte(dev, 0xc04, &rfpath);
// Check Bit 0-3, it means if RF A-D is enabled.
for (i = 0; i < RF90_PATH_MAX; i++)
@@ -3012,7 +3015,7 @@ extern void dm_rf_pathcheck_workitemcallback(struct work_struct *work)
dm_rxpath_sel_byrssi(dev);
} /* DM_RFPathCheckWorkItemCallBack */
-static void dm_init_rxpath_selection(struct net_device * dev)
+static void dm_init_rxpath_selection(struct net_device *dev)
{
u8 i;
struct r8192_priv *priv = ieee80211_priv(dev);
@@ -3033,7 +3036,7 @@ static void dm_init_rxpath_selection(struct net_device * dev)
}
}
-static void dm_rxpath_sel_byrssi(struct net_device * dev)
+static void dm_rxpath_sel_byrssi(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
u8 i, max_rssi_index=0, min_rssi_index=0, sec_rssi_index=0, rf_num=0;
@@ -3052,12 +3055,13 @@ static void dm_rxpath_sel_byrssi(struct net_device * dev)
if(!cck_Rx_Path_initialized)
{
- DM_RxPathSelTable.cck_Rx_path = (read_nic_byte(dev, 0xa07)&0xf);
+ read_nic_byte(dev, 0xa07, &DM_RxPathSelTable.cck_Rx_path);
+ DM_RxPathSelTable.cck_Rx_path &= 0xf;
cck_Rx_Path_initialized = 1;
}
- DM_RxPathSelTable.disabledRF = 0xf;
- DM_RxPathSelTable.disabledRF &=~ (read_nic_byte(dev, 0xc04));
+ read_nic_byte(dev, 0xc04, &DM_RxPathSelTable.disabledRF);
+ DM_RxPathSelTable.disabledRF = ~DM_RxPathSelTable.disabledRF & 0xf;
if(priv->ieee80211->mode == WIRELESS_MODE_B)
{
@@ -3356,7 +3360,7 @@ extern void dm_fsync_timer_callback(unsigned long data)
bool bSwitchFromCountDiff = false;
bool bDoubleTimeInterval = false;
- if( priv->ieee80211->state == IEEE80211_LINKED &&
+ if(priv->ieee80211->state == IEEE80211_LINKED &&
priv->ieee80211->bfsync_enable &&
(priv->ieee80211->pHTInfo->IOTAction & HT_IOT_ACT_CDD_FSYNC))
{
@@ -3576,12 +3580,12 @@ void dm_check_fsync(struct net_device *dev)
RT_TRACE(COMP_HALDM, "RSSI %d TimeInterval %d MultipleTimeInterval %d\n", priv->ieee80211->fsync_rssi_threshold, priv->ieee80211->fsync_time_interval, priv->ieee80211->fsync_multiple_timeinterval);
RT_TRACE(COMP_HALDM, "RateBitmap 0x%x FirstDiffRateThreshold %d SecondDiffRateThreshold %d\n", priv->ieee80211->fsync_rate_bitmap, priv->ieee80211->fsync_firstdiff_ratethreshold, priv->ieee80211->fsync_seconddiff_ratethreshold);
- if( priv->ieee80211->state == IEEE80211_LINKED &&
+ if(priv->ieee80211->state == IEEE80211_LINKED &&
(priv->ieee80211->pHTInfo->IOTAction & HT_IOT_ACT_CDD_FSYNC))
{
if(priv->ieee80211->bfsync_enable == 0)
{
- switch(priv->ieee80211->fsync_state)
+ switch (priv->ieee80211->fsync_state)
{
case Default_Fsync:
dm_StartHWFsync(dev);
@@ -3599,7 +3603,7 @@ void dm_check_fsync(struct net_device *dev)
}
else
{
- switch(priv->ieee80211->fsync_state)
+ switch (priv->ieee80211->fsync_state)
{
case Default_Fsync:
dm_StartSWFsync(dev);
@@ -3632,7 +3636,7 @@ void dm_check_fsync(struct net_device *dev)
}
else
{
- switch(priv->ieee80211->fsync_state)
+ switch (priv->ieee80211->fsync_state)
{
case HW_Fsync:
dm_EndHWFsync(dev);
@@ -3731,17 +3735,17 @@ extern void dm_shadow_init(struct net_device *dev)
for (page = 0; page < 5; page++)
for (offset = 0; offset < 256; offset++)
{
- dm_shadow[page][offset] = read_nic_byte(dev, offset+page*256);
+ read_nic_byte(dev, offset+page*256, &dm_shadow[page][offset]);
//DbgPrint("P-%d/O-%02x=%02x\r\n", page, offset, DM_Shadow[page][offset]);
}
for (page = 8; page < 11; page++)
for (offset = 0; offset < 256; offset++)
- dm_shadow[page][offset] = read_nic_byte(dev, offset+page*256);
+ read_nic_byte(dev, offset+page*256, &dm_shadow[page][offset]);
for (page = 12; page < 15; page++)
for (offset = 0; offset < 256; offset++)
- dm_shadow[page][offset] = read_nic_byte(dev, offset+page*256);
+ read_nic_byte(dev, offset+page*256, &dm_shadow[page][offset]);
} /* dm_shadow_init */
@@ -3787,7 +3791,7 @@ static void dm_dynamic_txpower(struct net_device *dev)
return;
}
//printk("priv->ieee80211->current_network.unknown_cap_exist is %d ,priv->ieee80211->current_network.broadcom_cap_exist is %d\n",priv->ieee80211->current_network.unknown_cap_exist,priv->ieee80211->current_network.broadcom_cap_exist);
- if((priv->ieee80211->current_network.atheros_cap_exist ) && (priv->ieee80211->mode == IEEE_G)){
+ if((priv->ieee80211->current_network.atheros_cap_exist) && (priv->ieee80211->mode == IEEE_G)){
txhipower_threshhold = TX_POWER_ATHEROAP_THRESH_HIGH;
txlowpower_threshold = TX_POWER_ATHEROAP_THRESH_LOW;
}
@@ -3832,8 +3836,8 @@ static void dm_dynamic_txpower(struct net_device *dev)
priv->bDynamicTxLowPower = false;
}
- if( (priv->bDynamicTxHighPower != priv->bLastDTPFlag_High ) ||
- (priv->bDynamicTxLowPower != priv->bLastDTPFlag_Low ) )
+ if((priv->bDynamicTxHighPower != priv->bLastDTPFlag_High) ||
+ (priv->bDynamicTxLowPower != priv->bLastDTPFlag_Low))
{
RT_TRACE(COMP_TXAGC,"SetTxPowerLevel8190() channel = %d \n" , priv->ieee80211->current_network.channel);
@@ -3852,20 +3856,20 @@ static void dm_dynamic_txpower(struct net_device *dev)
} /* dm_dynamic_txpower */
//added by vivi, for read tx rate and retrycount
-static void dm_check_txrateandretrycount(struct net_device * dev)
+static void dm_check_txrateandretrycount(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- struct ieee80211_device* ieee = priv->ieee80211;
+ struct ieee80211_device *ieee = priv->ieee80211;
//for 11n tx rate
// priv->stats.CurrentShowTxate = read_nic_byte(dev, Current_Tx_Rate_Reg);
- ieee->softmac_stats.CurrentShowTxate = read_nic_byte(dev, Current_Tx_Rate_Reg);
+ read_nic_byte(dev, Current_Tx_Rate_Reg, &ieee->softmac_stats.CurrentShowTxate);
//printk("=============>tx_rate_reg:%x\n", ieee->softmac_stats.CurrentShowTxate);
//for initial tx rate
// priv->stats.last_packet_rate = read_nic_byte(dev, Initial_Tx_Rate_Reg);
- ieee->softmac_stats.last_packet_rate = read_nic_byte(dev ,Initial_Tx_Rate_Reg);
+ read_nic_byte(dev, Initial_Tx_Rate_Reg, &ieee->softmac_stats.last_packet_rate);
//for tx tx retry count
// priv->stats.txretrycount = read_nic_dword(dev, Tx_Retry_Count_Reg);
- ieee->softmac_stats.txretrycount = read_nic_dword(dev, Tx_Retry_Count_Reg);
+ read_nic_dword(dev, Tx_Retry_Count_Reg, &ieee->softmac_stats.txretrycount);
}
static void dm_send_rssi_tofw(struct net_device *dev)
@@ -3882,7 +3886,7 @@ static void dm_send_rssi_tofw(struct net_device *dev)
tx_cmd.Length = 4;
tx_cmd.Value = priv->undecorated_smoothed_pwdb;
- cmpk_message_handle_tx(dev, (u8*)&tx_cmd,
+ cmpk_message_handle_tx(dev, (u8 *)&tx_cmd,
DESC_PACKET_TYPE_INIT, sizeof(DCMD_TXCMD_T));
}
diff --git a/drivers/staging/rtl8192u/r8192U_hw.h b/drivers/staging/rtl8192u/r8192U_hw.h
index 15b0423356f8..7e612aa56fa4 100644
--- a/drivers/staging/rtl8192u/r8192U_hw.h
+++ b/drivers/staging/rtl8192u/r8192U_hw.h
@@ -388,10 +388,11 @@ enum _RTL8192Usb_HW {
#define EPROM_CMD_NORMAL 0
#define EPROM_CMD_LOAD 1
#define EPROM_CMD_PROGRAM 2
-#define EPROM_CS_SHIFT 3
-#define EPROM_CK_SHIFT 2
-#define EPROM_W_SHIFT 1
-#define EPROM_R_SHIFT 0
+#define EPROM_CS_BIT BIT(3)
+#define EPROM_CK_BIT BIT(2)
+#define EPROM_W_BIT BIT(1)
+#define EPROM_R_BIT BIT(0)
+
MAC0 = 0x000,
MAC1 = 0x001,
MAC2 = 0x002,
diff --git a/drivers/staging/rtl8192u/r8192U_wx.c b/drivers/staging/rtl8192u/r8192U_wx.c
index c904aa8cc0a6..3e2576347d29 100644
--- a/drivers/staging/rtl8192u/r8192U_wx.c
+++ b/drivers/staging/rtl8192u/r8192U_wx.c
@@ -144,7 +144,7 @@ static int r8192_wx_read_regs(struct net_device *dev,
down(&priv->wx_sem);
- get_user(addr,(u8*)wrqu->data.pointer);
+ get_user(addr,(u8 *)wrqu->data.pointer);
data1 = read_rtl8225(dev, addr);
wrqu->data.length = data1;
@@ -162,7 +162,7 @@ static int r8192_wx_write_regs(struct net_device *dev,
down(&priv->wx_sem);
- get_user(addr, (u8*)wrqu->data.pointer);
+ get_user(addr, (u8 *)wrqu->data.pointer);
write_rtl8225(dev, addr, wrqu->data.length);
up(&priv->wx_sem);
@@ -199,7 +199,7 @@ static int r8192_wx_write_bb(struct net_device *dev,
down(&priv->wx_sem);
- get_user(databb, (u8*)wrqu->data.pointer);
+ get_user(databb, (u8 *)wrqu->data.pointer);
rtl8187_write_phy(dev, wrqu->data.length, databb);
up(&priv->wx_sem);
@@ -217,7 +217,7 @@ static int r8192_wx_write_nicb(struct net_device *dev,
down(&priv->wx_sem);
- get_user(addr, (u32*)wrqu->data.pointer);
+ get_user(addr, (u32 *)wrqu->data.pointer);
write_nic_byte(dev, addr, wrqu->data.length);
up(&priv->wx_sem);
@@ -234,8 +234,8 @@ static int r8192_wx_read_nicb(struct net_device *dev,
down(&priv->wx_sem);
- get_user(addr,(u32*)wrqu->data.pointer);
- data1 = read_nic_byte(dev, addr);
+ get_user(addr,(u32 *)wrqu->data.pointer);
+ read_nic_byte(dev, addr, &data1);
wrqu->data.length = data1;
up(&priv->wx_sem);
@@ -254,12 +254,12 @@ static int r8192_wx_get_ap_status(struct net_device *dev,
down(&priv->wx_sem);
//count the length of input ssid
- for(name_len=0 ; ((char*)wrqu->data.pointer)[name_len]!='\0' ; name_len++);
+ for(name_len=0 ; ((char *)wrqu->data.pointer)[name_len]!='\0' ; name_len++);
//search for the corresponding info which is received
list_for_each_entry(target, &ieee->network_list, list) {
if ( (target->ssid_len == name_len) &&
- (strncmp(target->ssid, (char*)wrqu->data.pointer, name_len)==0)){
+ (strncmp(target->ssid, (char *)wrqu->data.pointer, name_len)==0)){
if(target->wpa_ie_len>0 || target->rsn_ie_len>0 )
//set flags=1 to indicate this ap is WPA
wrqu->data.flags = 1;
@@ -380,7 +380,7 @@ static int rtl8180_wx_get_range(struct net_device *dev,
union iwreq_data *wrqu, char *extra)
{
struct iw_range *range = (struct iw_range *)extra;
- struct iw_range_with_scan_capa* tmp = (struct iw_range_with_scan_capa*)range;
+ struct iw_range_with_scan_capa *tmp = (struct iw_range_with_scan_capa *)range;
struct r8192_priv *priv = ieee80211_priv(dev);
u16 val;
int i;
@@ -483,7 +483,7 @@ static int r8192_wx_set_scan(struct net_device *dev, struct iw_request_info *a,
union iwreq_data *wrqu, char *b)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- struct ieee80211_device* ieee = priv->ieee80211;
+ struct ieee80211_device *ieee = priv->ieee80211;
int ret = 0;
if(!priv->up) return -ENETDOWN;
@@ -492,7 +492,7 @@ static int r8192_wx_set_scan(struct net_device *dev, struct iw_request_info *a,
return -EAGAIN;
if (wrqu->data.flags & IW_SCAN_THIS_ESSID)
{
- struct iw_scan_req* req = (struct iw_scan_req*)b;
+ struct iw_scan_req *req = (struct iw_scan_req *)b;
if (req->essid_len)
{
//printk("==**&*&*&**===>scan set ssid:%s\n", req->essid);
@@ -709,7 +709,7 @@ static int r8192_wx_set_enc(struct net_device *dev,
#define CONF_WEP40 0x4
#define CONF_WEP104 0x14
- switch(wrqu->encoding.flags & IW_ENCODE_INDEX){
+ switch (wrqu->encoding.flags & IW_ENCODE_INDEX){
case 0: key_idx = ieee->tx_keyidx; break;
case 1: key_idx = 0; break;
case 2: key_idx = 1; break;
@@ -757,7 +757,7 @@ static int r8192_wx_set_scan_type(struct net_device *dev, struct iw_request_info
iwreq_data *wrqu, char *p){
struct r8192_priv *priv = ieee80211_priv(dev);
- int *parms=(int*)p;
+ int *parms=(int *)p;
int mode=parms[0];
priv->ieee80211->active_scan = mode;
@@ -891,7 +891,7 @@ static int r8192_wx_set_enc_ext(struct net_device *dev,
{
int ret=0;
struct r8192_priv *priv = ieee80211_priv(dev);
- struct ieee80211_device* ieee = priv->ieee80211;
+ struct ieee80211_device *ieee = priv->ieee80211;
//printk("===>%s()\n", __FUNCTION__);
@@ -922,7 +922,7 @@ static int r8192_wx_set_enc_ext(struct net_device *dev,
ieee->pairwise_key_type = alg;
EnableHWSecurityConfig8192(dev);
}
- memcpy((u8*)key, ext->key, 16); //we only get 16 bytes key.why? WB 2008.7.1
+ memcpy((u8 *)key, ext->key, 16); //we only get 16 bytes key.why? WB 2008.7.1
if ((alg & KEY_TYPE_WEP40) && (ieee->auth_mode !=2) )
{
@@ -952,7 +952,7 @@ static int r8192_wx_set_enc_ext(struct net_device *dev,
4,//EntryNo
idx, //KeyIndex
alg, //KeyType
- (u8*)ieee->ap_mac_addr, //MacAddr
+ (u8 *)ieee->ap_mac_addr, //MacAddr
0, //DefaultKey
key); //KeyContent
}
@@ -1180,8 +1180,8 @@ static iw_handler r8192_private_handler[] = {
struct iw_statistics *r8192_get_wireless_stats(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- struct ieee80211_device* ieee = priv->ieee80211;
- struct iw_statistics* wstats = &priv->wstats;
+ struct ieee80211_device *ieee = priv->ieee80211;
+ struct iw_statistics *wstats = &priv->wstats;
int tmp_level = 0;
int tmp_qual = 0;
int tmp_noise = 0;
diff --git a/drivers/staging/rtl8192u/r819xU_cmdpkt.c b/drivers/staging/rtl8192u/r819xU_cmdpkt.c
index b755eb46341f..6810766edfcf 100644
--- a/drivers/staging/rtl8192u/r819xU_cmdpkt.c
+++ b/drivers/staging/rtl8192u/r819xU_cmdpkt.c
@@ -41,7 +41,7 @@
rt_status
SendTxCommandPacket(
struct net_device *dev,
- void* pData,
+ void *pData,
u32 DataLen
)
{
@@ -57,7 +57,7 @@ SendTxCommandPacket(
//Get TCB and local buffer from common pool. (It is shared by CmdQ, MgntQ, and USB coalesce DataQ)
skb = dev_alloc_skb(USB_HWDESC_HEADER_LEN + DataLen + 4);
memcpy((unsigned char *)(skb->cb),&dev,sizeof(dev));
- tcb_desc = (cb_desc*)(skb->cb + MAX_DEV_ADDR_SIZE);
+ tcb_desc = (cb_desc *)(skb->cb + MAX_DEV_ADDR_SIZE);
tcb_desc->queue_index = TXCMD_QUEUE;
tcb_desc->bCmdOrInit = DESC_PACKET_TYPE_NORMAL;
tcb_desc->bLastIniPkt = 0;
@@ -66,7 +66,7 @@ SendTxCommandPacket(
memcpy(ptr_buf,pData,DataLen);
tcb_desc->txbuf_size= (u16)DataLen;
- if(!priv->ieee80211->check_nic_enough_desc(dev,tcb_desc->queue_index)||
+ if (!priv->ieee80211->check_nic_enough_desc(dev,tcb_desc->queue_index)||
(!skb_queue_empty(&priv->ieee80211->skb_waitQ[tcb_desc->queue_index]))||\
(priv->ieee80211->queue_stop) ) {
RT_TRACE(COMP_FIRMWARE,"===================NULL packet==================================> tx full!\n");
@@ -101,7 +101,7 @@ SendTxCommandPacket(
*---------------------------------------------------------------------------*/
extern rt_status cmpk_message_handle_tx(
struct net_device *dev,
- u8* codevirtualaddress,
+ u8 *codevirtualaddress,
u32 packettype,
u32 buffer_len)
{
@@ -126,7 +126,7 @@ SendTxCommandPacket(
//Fragmentation might be required
frag_threshold = pfirmware->cmdpacket_frag_thresold;
do {
- if((buffer_len - frag_offset) > frag_threshold) {
+ if ((buffer_len - frag_offset) > frag_threshold) {
frag_length = frag_threshold ;
bLastIniPkt = 0;
@@ -145,7 +145,7 @@ SendTxCommandPacket(
skb = dev_alloc_skb(frag_length + 4);
#endif
memcpy((unsigned char *)(skb->cb),&dev,sizeof(dev));
- tcb_desc = (cb_desc*)(skb->cb + MAX_DEV_ADDR_SIZE);
+ tcb_desc = (cb_desc *)(skb->cb + MAX_DEV_ADDR_SIZE);
tcb_desc->queue_index = TXCMD_QUEUE;
tcb_desc->bCmdOrInit = packettype;
tcb_desc->bLastIniPkt = bLastIniPkt;
@@ -163,7 +163,7 @@ SendTxCommandPacket(
tcb_desc->txbuf_size= (u16)buffer_len;
- if(!priv->ieee80211->check_nic_enough_desc(dev,tcb_desc->queue_index)||
+ if (!priv->ieee80211->check_nic_enough_desc(dev,tcb_desc->queue_index)||
(!skb_queue_empty(&priv->ieee80211->skb_waitQ[tcb_desc->queue_index]))||\
(priv->ieee80211->queue_stop) ) {
RT_TRACE(COMP_FIRMWARE,"=====================================================> tx full!\n");
@@ -221,7 +221,7 @@ cmpk_count_txstatistic(
#endif
#ifdef TODO
- if(pAdapter->bInHctTest)
+ if (pAdapter->bInHctTest)
return;
#endif
/* We can not know the packet length and transmit type: broadcast or uni
@@ -303,7 +303,7 @@ cmpk_count_txstatistic(
static void
cmpk_handle_tx_feedback(
struct net_device *dev,
- u8 * pmsg)
+ u8 *pmsg)
{
struct r8192_priv *priv = ieee80211_priv(dev);
cmpk_txfb_t rx_tx_fb; /* */
@@ -319,7 +319,7 @@ cmpk_handle_tx_feedback(
endian type before copy the message copy. */
/* 2007/07/05 MH Use pointer to transfer structure memory. */
//memcpy((UINT8 *)&rx_tx_fb, pMsg, sizeof(CMPK_TXFB_T));
- memcpy((u8*)&rx_tx_fb, pmsg, sizeof(cmpk_txfb_t));
+ memcpy((u8 *)&rx_tx_fb, pmsg, sizeof(cmpk_txfb_t));
/* 2. Use tx feedback info to count TX statistics. */
cmpk_count_txstatistic(dev, &rx_tx_fb);
/* 2007/01/17 MH Comment previous method for TX statistic function. */
@@ -341,7 +341,7 @@ cmdpkt_beacontimerinterrupt_819xusb(
//
// 070117, rcnjko: 87B have to S/W beacon for DTM encryption_cmn.
//
- if(priv->ieee80211->current_network.mode == IEEE_A ||
+ if (priv->ieee80211->current_network.mode == IEEE_A ||
priv->ieee80211->current_network.mode == IEEE_N_5G ||
(priv->ieee80211->current_network.mode == IEEE_N_24G && (!priv->ieee80211->pHTInfo->bCurSuppCCK)))
{
@@ -386,7 +386,7 @@ cmdpkt_beacontimerinterrupt_819xusb(
static void
cmpk_handle_interrupt_status(
struct net_device *dev,
- u8* pmsg)
+ u8 *pmsg)
{
cmpk_intr_sta_t rx_intr_status; /* */
struct r8192_priv *priv = ieee80211_priv(dev);
@@ -411,7 +411,7 @@ cmpk_handle_interrupt_status(
// Statistics of beacon for ad-hoc mode.
- if( priv->ieee80211->iw_mode == IW_MODE_ADHOC)
+ if ( priv->ieee80211->iw_mode == IW_MODE_ADHOC)
{
//2 maybe need endian transform?
rx_intr_status.interrupt_status = *((u32 *)(pmsg + 4));
@@ -467,7 +467,7 @@ cmpk_handle_interrupt_status(
static void
cmpk_handle_query_config_rx(
struct net_device *dev,
- u8* pmsg)
+ u8 *pmsg)
{
cmpk_query_cfg_t rx_query_cfg; /* */
@@ -580,11 +580,11 @@ static void cmpk_count_tx_status( struct net_device *dev,
static void
cmpk_handle_tx_status(
struct net_device *dev,
- u8* pmsg)
+ u8 *pmsg)
{
cmpk_tx_status_t rx_tx_sts; /* */
- memcpy((void*)&rx_tx_sts, (void*)pmsg, sizeof(cmpk_tx_status_t));
+ memcpy((void *)&rx_tx_sts, (void *)pmsg, sizeof(cmpk_tx_status_t));
/* 2. Use tx feedback info to count TX statistics. */
cmpk_count_tx_status(dev, &rx_tx_sts);
@@ -610,7 +610,7 @@ cmpk_handle_tx_status(
static void
cmpk_handle_tx_rate_history(
struct net_device *dev,
- u8* pmsg)
+ u8 *pmsg)
{
cmpk_tx_rahis_t *ptxrate;
// RT_RF_POWER_STATE rtState;
@@ -727,12 +727,12 @@ cmpk_message_handle_rx(
element type. Because FW may aggregate RX command packet to minimize
transmit time between DRV and FW.*/
// Add a counter to prevent the lock in the loop from being held too long
- while (total_length > 0 || exe_cnt++ >100)
+ while (total_length > 0 && exe_cnt++ < 100)
{
/* 2007/01/17 MH We support aggregation of different cmd in the same packet. */
element_id = pcmd_buff[0];
- switch(element_id)
+ switch (element_id)
{
case RX_TX_FEEDBACK:
cmpk_handle_tx_feedback (dev, pcmd_buff);
diff --git a/drivers/staging/rtl8192u/r819xU_cmdpkt.h b/drivers/staging/rtl8192u/r819xU_cmdpkt.h
index 59caa4e05323..ebe403270a5b 100644
--- a/drivers/staging/rtl8192u/r819xU_cmdpkt.h
+++ b/drivers/staging/rtl8192u/r819xU_cmdpkt.h
@@ -192,10 +192,10 @@ typedef enum _rt_status{
RT_STATUS_RESOURCE
}rt_status,*prt_status;
-extern rt_status cmpk_message_handle_tx(struct net_device *dev, u8* codevirtualaddress, u32 packettype, u32 buffer_len);
+extern rt_status cmpk_message_handle_tx(struct net_device *dev, u8 *codevirtualaddress, u32 packettype, u32 buffer_len);
-extern u32 cmpk_message_handle_rx(struct net_device *dev, struct ieee80211_rx_stats * pstats);
-extern rt_status SendTxCommandPacket( struct net_device *dev, void* pData, u32 DataLen);
+extern u32 cmpk_message_handle_rx(struct net_device *dev, struct ieee80211_rx_stats *pstats);
+extern rt_status SendTxCommandPacket( struct net_device *dev, void *pData, u32 DataLen);
#endif
diff --git a/drivers/staging/rtl8192u/r819xU_firmware.c b/drivers/staging/rtl8192u/r819xU_firmware.c
index 573e9cd68509..bb924ac97e47 100644
--- a/drivers/staging/rtl8192u/r819xU_firmware.c
+++ b/drivers/staging/rtl8192u/r819xU_firmware.c
@@ -48,7 +48,7 @@ bool fw_download_code(struct net_device *dev, u8 *code_virtual_address, u32 buff
//Fragmentation might be required
frag_threshold = pfirmware->cmdpacket_frag_thresold;
do {
- if((buffer_len - frag_offset) > frag_threshold) {
+ if ((buffer_len - frag_offset) > frag_threshold) {
frag_length = frag_threshold ;
bLastIniPkt = 0;
@@ -67,7 +67,7 @@ bool fw_download_code(struct net_device *dev, u8 *code_virtual_address, u32 buff
skb = dev_alloc_skb(frag_length + 4);
#endif
memcpy((unsigned char *)(skb->cb),&dev,sizeof(dev));
- tcb_desc = (cb_desc*)(skb->cb + MAX_DEV_ADDR_SIZE);
+ tcb_desc = (cb_desc *)(skb->cb + MAX_DEV_ADDR_SIZE);
tcb_desc->queue_index = TXCMD_QUEUE;
tcb_desc->bCmdOrInit = DESC_PACKET_TYPE_INIT;
tcb_desc->bLastIniPkt = bLastIniPkt;
@@ -89,7 +89,7 @@ bool fw_download_code(struct net_device *dev, u8 *code_virtual_address, u32 buff
tcb_desc->txbuf_size= (u16)i;
skb_put(skb, i);
- if(!priv->ieee80211->check_nic_enough_desc(dev,tcb_desc->queue_index)||
+ if (!priv->ieee80211->check_nic_enough_desc(dev,tcb_desc->queue_index)||
(!skb_queue_empty(&priv->ieee80211->skb_waitQ[tcb_desc->queue_index]))||\
(priv->ieee80211->queue_stop) ) {
RT_TRACE(COMP_FIRMWARE,"=====================================================> tx full!\n");
@@ -125,7 +125,7 @@ fwSendNullPacket(
//Get TCB and local buffer from common pool. (It is shared by CmdQ, MgntQ, and USB coalesce DataQ)
skb = dev_alloc_skb(Length+ 4);
memcpy((unsigned char *)(skb->cb),&dev,sizeof(dev));
- tcb_desc = (cb_desc*)(skb->cb + MAX_DEV_ADDR_SIZE);
+ tcb_desc = (cb_desc *)(skb->cb + MAX_DEV_ADDR_SIZE);
tcb_desc->queue_index = TXCMD_QUEUE;
tcb_desc->bCmdOrInit = DESC_PACKET_TYPE_INIT;
tcb_desc->bLastIniPkt = bLastInitPacket;
@@ -133,7 +133,7 @@ fwSendNullPacket(
memset(ptr_buf,0,Length);
tcb_desc->txbuf_size= (u16)Length;
- if(!priv->ieee80211->check_nic_enough_desc(dev,tcb_desc->queue_index)||
+ if (!priv->ieee80211->check_nic_enough_desc(dev,tcb_desc->queue_index)||
(!skb_queue_empty(&priv->ieee80211->skb_waitQ[tcb_desc->queue_index]))||\
(priv->ieee80211->queue_stop) ) {
RT_TRACE(COMP_FIRMWARE,"===================NULL packet==================================> tx full!\n");
@@ -168,14 +168,14 @@ bool CPUcheck_maincodeok_turnonCPU(struct net_device *dev)
/* Check whether put code OK */
do {
- CPU_status = read_nic_dword(dev, CPU_GEN);
+ read_nic_dword(dev, CPU_GEN, &CPU_status);
- if(CPU_status&CPU_GEN_PUT_CODE_OK)
+ if (CPU_status&CPU_GEN_PUT_CODE_OK)
break;
}while(check_putcodeOK_time--);
- if(!(CPU_status&CPU_GEN_PUT_CODE_OK)) {
+ if (!(CPU_status&CPU_GEN_PUT_CODE_OK)) {
RT_TRACE(COMP_ERR, "Download Firmware: Put code fail!\n");
goto CPUCheckMainCodeOKAndTurnOnCPU_Fail;
} else {
@@ -183,19 +183,19 @@ bool CPUcheck_maincodeok_turnonCPU(struct net_device *dev)
}
/* Turn On CPU */
- CPU_status = read_nic_dword(dev, CPU_GEN);
+ read_nic_dword(dev, CPU_GEN, &CPU_status);
write_nic_byte(dev, CPU_GEN, (u8)((CPU_status|CPU_GEN_PWR_STB_CPU)&0xff));
mdelay(1000);
/* Check whether CPU boot OK */
do {
- CPU_status = read_nic_dword(dev, CPU_GEN);
+ read_nic_dword(dev, CPU_GEN, &CPU_status);
- if(CPU_status&CPU_GEN_BOOT_RDY)
+ if (CPU_status&CPU_GEN_BOOT_RDY)
break;
}while(check_bootOk_time--);
- if(!(CPU_status&CPU_GEN_BOOT_RDY)) {
+ if (!(CPU_status&CPU_GEN_BOOT_RDY)) {
goto CPUCheckMainCodeOKAndTurnOnCPU_Fail;
} else {
RT_TRACE(COMP_FIRMWARE, "Download Firmware: Boot ready!\n");
@@ -218,14 +218,14 @@ bool CPUcheck_firmware_ready(struct net_device *dev)
/* Check Firmware Ready */
do {
- CPU_status = read_nic_dword(dev, CPU_GEN);
+ read_nic_dword(dev, CPU_GEN, &CPU_status);
- if(CPU_status&CPU_GEN_FIRM_RDY)
+ if (CPU_status&CPU_GEN_FIRM_RDY)
break;
}while(check_time--);
- if(!(CPU_status&CPU_GEN_FIRM_RDY))
+ if (!(CPU_status&CPU_GEN_FIRM_RDY))
goto CPUCheckFirmwareReady_Fail;
else
RT_TRACE(COMP_FIRMWARE, "Download Firmware: Firmware ready!\n");
@@ -265,7 +265,7 @@ bool init_firmware(struct net_device *dev)
starting_state = FW_INIT_STEP0_BOOT;
// TODO: system reset
- }else if(pfirmware->firmware_status == FW_STATUS_5_READY) {
+ }else if (pfirmware->firmware_status == FW_STATUS_5_READY) {
/* it is called by Initialize */
rst_opt = OPT_FIRMWARE_RESET;
starting_state = FW_INIT_STEP2_DATA;
@@ -282,19 +282,19 @@ bool init_firmware(struct net_device *dev)
* Open image file, and map file to continuous memory if open file success.
* or read image file from array. Default load from IMG file
*/
- if(rst_opt == OPT_SYSTEM_RESET) {
+ if (rst_opt == OPT_SYSTEM_RESET) {
rc = request_firmware(&fw_entry, fw_name[init_step],&priv->udev->dev);
- if(rc < 0 ) {
+ if (rc < 0 ) {
RT_TRACE(COMP_ERR, "request firmware fail!\n");
goto download_firmware_fail;
}
- if(fw_entry->size > sizeof(pfirmware->firmware_buf)) {
+ if (fw_entry->size > sizeof(pfirmware->firmware_buf)) {
RT_TRACE(COMP_ERR, "img file size exceed the container buffer fail!\n");
goto download_firmware_fail;
}
- if(init_step != FW_INIT_STEP1_MAIN) {
+ if (init_step != FW_INIT_STEP1_MAIN) {
memcpy(pfirmware->firmware_buf,fw_entry->data,fw_entry->size);
mapped_file = pfirmware->firmware_buf;
file_length = fw_entry->size;
@@ -311,7 +311,7 @@ bool init_firmware(struct net_device *dev)
#endif
}
pfirmware->firmware_buf_size = file_length;
- }else if(rst_opt == OPT_FIRMWARE_RESET ) {
+ }else if (rst_opt == OPT_FIRMWARE_RESET ) {
/* we only need to download data.img here */
mapped_file = pfirmware->firmware_buf;
file_length = pfirmware->firmware_buf_size;
@@ -325,15 +325,15 @@ bool init_firmware(struct net_device *dev)
* and Tx descriptor info
* */
rt_status = fw_download_code(dev,mapped_file,file_length);
- if(rst_opt == OPT_SYSTEM_RESET) {
+ if (rst_opt == OPT_SYSTEM_RESET) {
release_firmware(fw_entry);
}
- if(rt_status != TRUE) {
+ if (rt_status != TRUE) {
goto download_firmware_fail;
}
- switch(init_step) {
+ switch (init_step) {
case FW_INIT_STEP0_BOOT:
/* Download boot
* initialize command descriptor.
@@ -343,7 +343,7 @@ bool init_firmware(struct net_device *dev)
#ifdef RTL8190P
// To initialize IMEM, CPU move code from 0x80000080, hence, we send 0x80 byte packet
rt_status = fwSendNullPacket(dev, RTL8190_CPU_START_OFFSET);
- if(rt_status != true)
+ if (rt_status != true)
{
RT_TRACE(COMP_INIT, "fwSendNullPacket() fail ! \n");
goto download_firmware_fail;
@@ -362,7 +362,7 @@ bool init_firmware(struct net_device *dev)
/* Check Put Code OK and Turn On CPU */
rt_status = CPUcheck_maincodeok_turnonCPU(dev);
- if(rt_status != TRUE) {
+ if (rt_status != TRUE) {
RT_TRACE(COMP_ERR, "CPUcheck_maincodeok_turnonCPU fail!\n");
goto download_firmware_fail;
}
@@ -376,7 +376,7 @@ bool init_firmware(struct net_device *dev)
mdelay(1);
rt_status = CPUcheck_firmware_ready(dev);
- if(rt_status != TRUE) {
+ if (rt_status != TRUE) {
RT_TRACE(COMP_ERR, "CPUcheck_firmware_ready fail(%d)!\n",rt_status);
goto download_firmware_fail;
}
diff --git a/drivers/staging/rtl8192u/r819xU_phy.c b/drivers/staging/rtl8192u/r819xU_phy.c
index 17fac41c12d9..a6fac081e42c 100644
--- a/drivers/staging/rtl8192u/r819xU_phy.c
+++ b/drivers/staging/rtl8192u/r819xU_phy.c
@@ -7,22 +7,24 @@
#include "r819xU_firmware_img.h"
#include "dot11d.h"
+#include <linux/bitops.h>
+
static u32 RF_CHANNEL_TABLE_ZEBRA[] = {
0,
- 0x085c, //2412 1
- 0x08dc, //2417 2
- 0x095c, //2422 3
- 0x09dc, //2427 4
- 0x0a5c, //2432 5
- 0x0adc, //2437 6
- 0x0b5c, //2442 7
- 0x0bdc, //2447 8
- 0x0c5c, //2452 9
- 0x0cdc, //2457 10
- 0x0d5c, //2462 11
- 0x0ddc, //2467 12
- 0x0e5c, //2472 13
- 0x0f72, //2484
+ 0x085c, /* 2412 1 */
+ 0x08dc, /* 2417 2 */
+ 0x095c, /* 2422 3 */
+ 0x09dc, /* 2427 4 */
+ 0x0a5c, /* 2432 5 */
+ 0x0adc, /* 2437 6 */
+ 0x0b5c, /* 2442 7 */
+ 0x0bdc, /* 2447 8 */
+ 0x0c5c, /* 2452 9 */
+ 0x0cdc, /* 2457 10 */
+ 0x0d5c, /* 2462 11 */
+ 0x0ddc, /* 2467 12 */
+ 0x0e5c, /* 2472 13 */
+ 0x0f72, /* 2484 */
};
@@ -36,36 +38,36 @@ static u32 RF_CHANNEL_TABLE_ZEBRA[] = {
#define rtl819XAGCTAB_Array Rtl8192UsbAGCTAB_Array
/******************************************************************************
- *function: This function read BB parameters from Header file we gen,
- * and do register read/write
- * input: u32 dwBitMask //taget bit pos in the addr to be modified
- * output: none
- * return: u32 return the shift bit position of the mask
- * ****************************************************************************/
-u32 rtl8192_CalculateBitShift(u32 dwBitMask)
+ * function: This function reads BB parameters from header file we generate,
+ * and does register read/write
+ * input: u32 bitmask //taget bit pos in the addr to be modified
+ * output: none
+ * return: u32 return the shift bit position of the mask
+ ******************************************************************************/
+u32 rtl8192_CalculateBitShift(u32 bitmask)
{
u32 i;
- for (i=0; i<=31; i++)
- {
- if (((dwBitMask>>i)&0x1) == 1)
- break;
- }
+
+ i = ffs(bitmask) - 1;
return i;
}
+
/******************************************************************************
- *function: This function check different RF type to execute legal judgement. If RF Path is illegal, we will return false.
- * input: none
- * output: none
- * return: 0(illegal, false), 1(legal,true)
- * ***************************************************************************/
-u8 rtl8192_phy_CheckIsLegalRFPath(struct net_device* dev, u32 eRFPath)
+ * function: This function checks different RF type to execute legal judgement.
+ * If RF Path is illegal, we will return false.
+ * input: net_device *dev
+ * u32 eRFPath
+ * output: none
+ * return: 0(illegal, false), 1(legal, true)
+ *****************************************************************************/
+u8 rtl8192_phy_CheckIsLegalRFPath(struct net_device *dev, u32 eRFPath)
{
u8 ret = 1;
struct r8192_priv *priv = ieee80211_priv(dev);
- if (priv->rf_type == RF_2T4R)
+
+ if (priv->rf_type == RF_2T4R) {
ret = 0;
- else if (priv->rf_type == RF_1T2R)
- {
+ } else if (priv->rf_type == RF_1T2R) {
if (eRFPath == RF90_PATH_A || eRFPath == RF90_PATH_B)
ret = 1;
else if (eRFPath == RF90_PATH_C || eRFPath == RF90_PATH_D)
@@ -73,662 +75,682 @@ u8 rtl8192_phy_CheckIsLegalRFPath(struct net_device* dev, u32 eRFPath)
}
return ret;
}
+
/******************************************************************************
- *function: This function set specific bits to BB register
- * input: net_device dev
- * u32 dwRegAddr //target addr to be modified
- * u32 dwBitMask //taget bit pos in the addr to be modified
- * u32 dwData //value to be write
- * output: none
- * return: none
- * notice:
- * ****************************************************************************/
-void rtl8192_setBBreg(struct net_device* dev, u32 dwRegAddr, u32 dwBitMask, u32 dwData)
+ * function: This function sets specific bits to BB register
+ * input: net_device *dev
+ * u32 reg_addr //target addr to be modified
+ * u32 bitmask //taget bit pos to be modified
+ * u32 data //value to be write
+ * output: none
+ * return: none
+ * notice:
+ ******************************************************************************/
+void rtl8192_setBBreg(struct net_device *dev, u32 reg_addr, u32 bitmask,
+ u32 data)
{
- u32 OriginalValue, BitShift, NewValue;
+ u32 reg, bitshift;
- if(dwBitMask!= bMaskDWord)
- {//if not "double word" write
- OriginalValue = read_nic_dword(dev, dwRegAddr);
- BitShift = rtl8192_CalculateBitShift(dwBitMask);
- NewValue = (((OriginalValue) & (~dwBitMask)) | (dwData << BitShift));
- write_nic_dword(dev, dwRegAddr, NewValue);
- }else
- write_nic_dword(dev, dwRegAddr, dwData);
+ if (bitmask != bMaskDWord) {
+ read_nic_dword(dev, reg_addr, &reg);
+ bitshift = rtl8192_CalculateBitShift(bitmask);
+ reg &= ~bitmask;
+ reg |= data << bitshift;
+ write_nic_dword(dev, reg_addr, reg);
+ } else {
+ write_nic_dword(dev, reg_addr, data);
+ }
return;
}
+
/******************************************************************************
- *function: This function reads specific bits from BB register
- * input: net_device dev
- * u32 dwRegAddr //target addr to be readback
- * u32 dwBitMask //taget bit pos in the addr to be readback
- * output: none
- * return: u32 Data //the readback register value
- * notice:
- * ****************************************************************************/
-u32 rtl8192_QueryBBReg(struct net_device* dev, u32 dwRegAddr, u32 dwBitMask)
+ * function: This function reads specific bits from BB register
+ * input: net_device *dev
+ * u32 reg_addr //target addr to be readback
+ * u32 bitmask //taget bit pos to be readback
+ * output: none
+ * return: u32 data //the readback register value
+ * notice:
+ ******************************************************************************/
+u32 rtl8192_QueryBBReg(struct net_device *dev, u32 reg_addr, u32 bitmask)
{
- u32 Ret = 0, OriginalValue, BitShift;
+ u32 reg, bitshift;
- OriginalValue = read_nic_dword(dev, dwRegAddr);
- BitShift = rtl8192_CalculateBitShift(dwBitMask);
- Ret =(OriginalValue & dwBitMask) >> BitShift;
+ read_nic_dword(dev, reg_addr, &reg);
+ bitshift = rtl8192_CalculateBitShift(bitmask);
- return (Ret);
+ return (reg & bitmask) >> bitshift;
}
-static u32 phy_FwRFSerialRead( struct net_device* dev, RF90_RADIO_PATH_E eRFPath, u32 Offset );
-static void phy_FwRFSerialWrite( struct net_device* dev, RF90_RADIO_PATH_E eRFPath, u32 Offset, u32 Data);
+static u32 phy_FwRFSerialRead(struct net_device *dev, RF90_RADIO_PATH_E eRFPath,
+ u32 offset);
+
+static void phy_FwRFSerialWrite(struct net_device *dev,
+ RF90_RADIO_PATH_E eRFPath, u32 offset,
+ u32 data);
/******************************************************************************
- *function: This function read register from RF chip
- * input: net_device dev
- * RF90_RADIO_PATH_E eRFPath //radio path of A/B/C/D
- * u32 Offset //target address to be read
- * output: none
- * return: u32 readback value
- * notice: There are three types of serial operations:(1) Software serial write.(2)Hardware LSSI-Low Speed Serial Interface.(3)Hardware HSSI-High speed serial write. Driver here need to implement (1) and (2)---need more spec for this information.
- * ****************************************************************************/
-u32 rtl8192_phy_RFSerialRead(struct net_device* dev, RF90_RADIO_PATH_E eRFPath, u32 Offset)
+ * function: This function reads register from RF chip
+ * input: net_device *dev
+ * RF90_RADIO_PATH_E eRFPath //radio path of A/B/C/D
+ * u32 offset //target address to be read
+ * output: none
+ * return: u32 readback value
+ * notice: There are three types of serial operations:
+ * (1) Software serial write.
+ * (2)Hardware LSSI-Low Speed Serial Interface.
+ * (3)Hardware HSSI-High speed serial write.
+ * Driver here need to implement (1) and (2)
+ * ---need more spec for this information.
+ ******************************************************************************/
+u32 rtl8192_phy_RFSerialRead(struct net_device *dev, RF90_RADIO_PATH_E eRFPath,
+ u32 offset)
{
struct r8192_priv *priv = ieee80211_priv(dev);
u32 ret = 0;
- u32 NewOffset = 0;
- BB_REGISTER_DEFINITION_T* pPhyReg = &priv->PHYRegDef[eRFPath];
+ u32 new_offset = 0;
+ BB_REGISTER_DEFINITION_T *pPhyReg = &priv->PHYRegDef[eRFPath];
+
rtl8192_setBBreg(dev, pPhyReg->rfLSSIReadBack, bLSSIReadBackData, 0);
- //make sure RF register offset is correct
- Offset &= 0x3f;
-
- //switch page for 8256 RF IC
- if (priv->rf_chip == RF_8256)
- {
- if (Offset >= 31)
- {
+ /* Make sure RF register offset is correct */
+ offset &= 0x3f;
+
+ /* Switch page for 8256 RF IC */
+ if (priv->rf_chip == RF_8256) {
+ if (offset >= 31) {
priv->RfReg0Value[eRFPath] |= 0x140;
- //Switch to Reg_Mode2 for Reg 31-45
- rtl8192_setBBreg(dev, pPhyReg->rf3wireOffset, bMaskDWord, (priv->RfReg0Value[eRFPath]<<16) );
- //modify offset
- NewOffset = Offset -30;
- }
- else if (Offset >= 16)
- {
+ /* Switch to Reg_Mode2 for Reg 31-45 */
+ rtl8192_setBBreg(dev, pPhyReg->rf3wireOffset,
+ bMaskDWord,
+ priv->RfReg0Value[eRFPath]<<16);
+ /* Modify offset */
+ new_offset = offset - 30;
+ } else if (offset >= 16) {
priv->RfReg0Value[eRFPath] |= 0x100;
priv->RfReg0Value[eRFPath] &= (~0x40);
- //Switch to Reg_Mode 1 for Reg16-30
- rtl8192_setBBreg(dev, pPhyReg->rf3wireOffset, bMaskDWord, (priv->RfReg0Value[eRFPath]<<16) );
-
- NewOffset = Offset - 15;
+ /* Switch to Reg_Mode1 for Reg16-30 */
+ rtl8192_setBBreg(dev, pPhyReg->rf3wireOffset,
+ bMaskDWord,
+ priv->RfReg0Value[eRFPath]<<16);
+
+ new_offset = offset - 15;
+ } else {
+ new_offset = offset;
}
- else
- NewOffset = Offset;
- }
- else
- {
- RT_TRACE((COMP_PHY|COMP_ERR), "check RF type here, need to be 8256\n");
- NewOffset = Offset;
+ } else {
+ RT_TRACE((COMP_PHY|COMP_ERR),
+ "check RF type here, need to be 8256\n");
+ new_offset = offset;
}
- //put desired read addr to LSSI control Register
- rtl8192_setBBreg(dev, pPhyReg->rfHSSIPara2, bLSSIReadAddress, NewOffset);
- //Issue a posedge trigger
- //
+ /* Put desired read addr to LSSI control Register */
+ rtl8192_setBBreg(dev, pPhyReg->rfHSSIPara2, bLSSIReadAddress,
+ new_offset);
+ /* Issue a posedge trigger */
rtl8192_setBBreg(dev, pPhyReg->rfHSSIPara2, bLSSIReadEdge, 0x0);
rtl8192_setBBreg(dev, pPhyReg->rfHSSIPara2, bLSSIReadEdge, 0x1);
- // TODO: we should not delay such a long time. Ask for help from SD3
- msleep(1);
+ /* TODO: we should not delay such a long time. Ask for help from SD3 */
+ usleep_range(1000, 1000);
- ret = rtl8192_QueryBBReg(dev, pPhyReg->rfLSSIReadBack, bLSSIReadBackData);
+ ret = rtl8192_QueryBBReg(dev, pPhyReg->rfLSSIReadBack,
+ bLSSIReadBackData);
- // Switch back to Reg_Mode0;
- if(priv->rf_chip == RF_8256)
- {
+ /* Switch back to Reg_Mode0 */
+ if (priv->rf_chip == RF_8256) {
priv->RfReg0Value[eRFPath] &= 0xebf;
- rtl8192_setBBreg(
- dev,
- pPhyReg->rf3wireOffset,
- bMaskDWord,
- (priv->RfReg0Value[eRFPath] << 16));
+ rtl8192_setBBreg(dev, pPhyReg->rf3wireOffset, bMaskDWord,
+ priv->RfReg0Value[eRFPath] << 16);
}
return ret;
-
}
/******************************************************************************
- *function: This function write data to RF register
- * input: net_device dev
- * RF90_RADIO_PATH_E eRFPath //radio path of A/B/C/D
- * u32 Offset //target address to be written
- * u32 Data //The new register data to be written
- * output: none
- * return: none
- * notice: For RF8256 only.
- ===========================================================
- *Reg Mode RegCTL[1] RegCTL[0] Note
+ * function: This function writes data to RF register
+ * input: net_device *dev
+ * RF90_RADIO_PATH_E eRFPath //radio path of A/B/C/D
+ * u32 offset //target address to be written
+ * u32 data //the new register data to be written
+ * output: none
+ * return: none
+ * notice: For RF8256 only.
+ * ===========================================================================
+ * Reg Mode RegCTL[1] RegCTL[0] Note
* (Reg00[12]) (Reg00[10])
- *===========================================================
- *Reg_Mode0 0 x Reg 0 ~15(0x0 ~ 0xf)
- *------------------------------------------------------------------
- *Reg_Mode1 1 0 Reg 16 ~30(0x1 ~ 0xf)
- *------------------------------------------------------------------
+ * ===========================================================================
+ * Reg_Mode0 0 x Reg 0 ~ 15(0x0 ~ 0xf)
+ * ---------------------------------------------------------------------------
+ * Reg_Mode1 1 0 Reg 16 ~ 30(0x1 ~ 0xf)
+ * ---------------------------------------------------------------------------
* Reg_Mode2 1 1 Reg 31 ~ 45(0x1 ~ 0xf)
- *------------------------------------------------------------------
- * ****************************************************************************/
-void rtl8192_phy_RFSerialWrite(struct net_device* dev, RF90_RADIO_PATH_E eRFPath, u32 Offset, u32 Data)
+ * ---------------------------------------------------------------------------
+ *****************************************************************************/
+void rtl8192_phy_RFSerialWrite(struct net_device *dev,
+ RF90_RADIO_PATH_E eRFPath, u32 offset, u32 data)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- u32 DataAndAddr = 0, NewOffset = 0;
+ u32 DataAndAddr = 0, new_offset = 0;
BB_REGISTER_DEFINITION_T *pPhyReg = &priv->PHYRegDef[eRFPath];
- Offset &= 0x3f;
- //spin_lock_irqsave(&priv->rf_lock, flags);
-// down(&priv->rf_sem);
- if (priv->rf_chip == RF_8256)
- {
+ offset &= 0x3f;
+ if (priv->rf_chip == RF_8256) {
- if (Offset >= 31)
- {
+ if (offset >= 31) {
priv->RfReg0Value[eRFPath] |= 0x140;
- rtl8192_setBBreg(dev, pPhyReg->rf3wireOffset, bMaskDWord, (priv->RfReg0Value[eRFPath] << 16));
- NewOffset = Offset - 30;
- }
- else if (Offset >= 16)
- {
+ rtl8192_setBBreg(dev, pPhyReg->rf3wireOffset,
+ bMaskDWord,
+ priv->RfReg0Value[eRFPath] << 16);
+ new_offset = offset - 30;
+ } else if (offset >= 16) {
priv->RfReg0Value[eRFPath] |= 0x100;
priv->RfReg0Value[eRFPath] &= (~0x40);
- rtl8192_setBBreg(dev, pPhyReg->rf3wireOffset, bMaskDWord, (priv->RfReg0Value[eRFPath]<<16));
- NewOffset = Offset - 15;
+ rtl8192_setBBreg(dev, pPhyReg->rf3wireOffset,
+ bMaskDWord,
+ priv->RfReg0Value[eRFPath]<<16);
+ new_offset = offset - 15;
+ } else {
+ new_offset = offset;
}
- else
- NewOffset = Offset;
- }
- else
- {
- RT_TRACE((COMP_PHY|COMP_ERR), "check RF type here, need to be 8256\n");
- NewOffset = Offset;
+ } else {
+ RT_TRACE((COMP_PHY|COMP_ERR),
+ "check RF type here, need to be 8256\n");
+ new_offset = offset;
}
- // Put write addr in [5:0] and write data in [31:16]
- DataAndAddr = (Data<<16) | (NewOffset&0x3f);
+ /* Put write addr in [5:0] and write data in [31:16] */
+ DataAndAddr = (data<<16) | (new_offset&0x3f);
- // Write Operation
+ /* Write operation */
rtl8192_setBBreg(dev, pPhyReg->rf3wireOffset, bMaskDWord, DataAndAddr);
- if(Offset==0x0)
- priv->RfReg0Value[eRFPath] = Data;
+ if (offset == 0x0)
+ priv->RfReg0Value[eRFPath] = data;
- // Switch back to Reg_Mode0;
- if(priv->rf_chip == RF_8256)
- {
- if(Offset != 0)
- {
+ /* Switch back to Reg_Mode0 */
+ if (priv->rf_chip == RF_8256) {
+ if (offset != 0) {
priv->RfReg0Value[eRFPath] &= 0xebf;
- rtl8192_setBBreg(
- dev,
- pPhyReg->rf3wireOffset,
- bMaskDWord,
- (priv->RfReg0Value[eRFPath] << 16));
+ rtl8192_setBBreg(dev, pPhyReg->rf3wireOffset,
+ bMaskDWord,
+ priv->RfReg0Value[eRFPath] << 16);
}
}
- //spin_unlock_irqrestore(&priv->rf_lock, flags);
-// up(&priv->rf_sem);
return;
}
/******************************************************************************
- *function: This function set specific bits to RF register
- * input: net_device dev
- * RF90_RADIO_PATH_E eRFPath //radio path of A/B/C/D
- * u32 RegAddr //target addr to be modified
- * u32 BitMask //taget bit pos in the addr to be modified
- * u32 Data //value to be write
- * output: none
- * return: none
- * notice:
- * ****************************************************************************/
-void rtl8192_phy_SetRFReg(struct net_device* dev, RF90_RADIO_PATH_E eRFPath, u32 RegAddr, u32 BitMask, u32 Data)
+ * function: This function set specific bits to RF register
+ * input: net_device dev
+ * RF90_RADIO_PATH_E eRFPath //radio path of A/B/C/D
+ * u32 reg_addr //target addr to be modified
+ * u32 bitmask //taget bit pos to be modified
+ * u32 data //value to be written
+ * output: none
+ * return: none
+ * notice:
+ *****************************************************************************/
+void rtl8192_phy_SetRFReg(struct net_device *dev, RF90_RADIO_PATH_E eRFPath,
+ u32 reg_addr, u32 bitmask, u32 data)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- u32 Original_Value, BitShift, New_Value;
-// u8 time = 0;
+ u32 reg, bitshift;
if (!rtl8192_phy_CheckIsLegalRFPath(dev, eRFPath))
return;
- if (priv->Rf_Mode == RF_OP_By_FW)
- {
- if (BitMask != bMask12Bits) // RF data is 12 bits only
- {
- Original_Value = phy_FwRFSerialRead(dev, eRFPath, RegAddr);
- BitShift = rtl8192_CalculateBitShift(BitMask);
- New_Value = ((Original_Value) & (~BitMask)) | (Data<< BitShift);
-
- phy_FwRFSerialWrite(dev, eRFPath, RegAddr, New_Value);
- }else
- phy_FwRFSerialWrite(dev, eRFPath, RegAddr, Data);
+ if (priv->Rf_Mode == RF_OP_By_FW) {
+ if (bitmask != bMask12Bits) {
+ /* RF data is 12 bits only */
+ reg = phy_FwRFSerialRead(dev, eRFPath, reg_addr);
+ bitshift = rtl8192_CalculateBitShift(bitmask);
+ reg &= ~bitmask;
+ reg |= data << bitshift;
+
+ phy_FwRFSerialWrite(dev, eRFPath, reg_addr, reg);
+ } else {
+ phy_FwRFSerialWrite(dev, eRFPath, reg_addr, data);
+ }
udelay(200);
- }
- else
- {
- if (BitMask != bMask12Bits) // RF data is 12 bits only
- {
- Original_Value = rtl8192_phy_RFSerialRead(dev, eRFPath, RegAddr);
- BitShift = rtl8192_CalculateBitShift(BitMask);
- New_Value = (((Original_Value) & (~BitMask)) | (Data<< BitShift));
-
- rtl8192_phy_RFSerialWrite(dev, eRFPath, RegAddr, New_Value);
- }else
- rtl8192_phy_RFSerialWrite(dev, eRFPath, RegAddr, Data);
+ } else {
+ if (bitmask != bMask12Bits) {
+ /* RF data is 12 bits only */
+ reg = rtl8192_phy_RFSerialRead(dev, eRFPath, reg_addr);
+ bitshift = rtl8192_CalculateBitShift(bitmask);
+ reg &= ~bitmask;
+ reg |= data << bitshift;
+
+ rtl8192_phy_RFSerialWrite(dev, eRFPath, reg_addr, reg);
+ } else {
+ rtl8192_phy_RFSerialWrite(dev, eRFPath, reg_addr, data);
+ }
}
return;
}
/******************************************************************************
- *function: This function reads specific bits from RF register
- * input: net_device dev
- * u32 RegAddr //target addr to be readback
- * u32 BitMask //taget bit pos in the addr to be readback
- * output: none
- * return: u32 Data //the readback register value
- * notice:
- * ****************************************************************************/
-u32 rtl8192_phy_QueryRFReg(struct net_device* dev, RF90_RADIO_PATH_E eRFPath, u32 RegAddr, u32 BitMask)
+ * function: This function reads specific bits from RF register
+ * input: net_device *dev
+ * u32 reg_addr //target addr to be readback
+ * u32 bitmask //taget bit pos to be readback
+ * output: none
+ * return: u32 data //the readback register value
+ * notice:
+ *****************************************************************************/
+u32 rtl8192_phy_QueryRFReg(struct net_device *dev, RF90_RADIO_PATH_E eRFPath,
+ u32 reg_addr, u32 bitmask)
{
- u32 Original_Value, Readback_Value, BitShift;
+ u32 reg, bitshift;
struct r8192_priv *priv = ieee80211_priv(dev);
if (!rtl8192_phy_CheckIsLegalRFPath(dev, eRFPath))
return 0;
- if (priv->Rf_Mode == RF_OP_By_FW)
- {
- Original_Value = phy_FwRFSerialRead(dev, eRFPath, RegAddr);
- BitShift = rtl8192_CalculateBitShift(BitMask);
- Readback_Value = (Original_Value & BitMask) >> BitShift;
+ if (priv->Rf_Mode == RF_OP_By_FW) {
+ reg = phy_FwRFSerialRead(dev, eRFPath, reg_addr);
+ bitshift = rtl8192_CalculateBitShift(bitmask);
+ reg = (reg & bitmask) >> bitshift;
udelay(200);
- return (Readback_Value);
- }
- else
- {
- Original_Value = rtl8192_phy_RFSerialRead(dev, eRFPath, RegAddr);
- BitShift = rtl8192_CalculateBitShift(BitMask);
- Readback_Value = (Original_Value & BitMask) >> BitShift;
- return (Readback_Value);
+ return reg;
+ } else {
+ reg = rtl8192_phy_RFSerialRead(dev, eRFPath, reg_addr);
+ bitshift = rtl8192_CalculateBitShift(bitmask);
+ reg = (reg & bitmask) >> bitshift;
+ return reg;
}
}
+
/******************************************************************************
- *function: We support firmware to execute RF-R/W.
- * input: dev
- * output: none
- * return: none
- * notice:
- * ***************************************************************************/
-static u32
-phy_FwRFSerialRead(
- struct net_device* dev,
- RF90_RADIO_PATH_E eRFPath,
- u32 Offset )
+ * function: We support firmware to execute RF-R/W.
+ * input: net_device *dev
+ * RF90_RADIO_PATH_E eRFPath
+ * u32 offset
+ * output: none
+ * return: u32
+ * notice:
+ ****************************************************************************/
+static u32 phy_FwRFSerialRead(struct net_device *dev, RF90_RADIO_PATH_E eRFPath,
+ u32 offset)
{
- u32 retValue = 0;
- u32 Data = 0;
+ u32 reg = 0;
+ u32 data = 0;
u8 time = 0;
- //DbgPrint("FW RF CTRL\n\r");
- /* 2007/11/02 MH Firmware RF Write control. By Francis' suggestion, we can
- not execute the scheme in the initial step. Otherwise, RF-R/W will waste
- much time. This is only for site survey. */
- // 1. Read operation need not insert data. bit 0-11
- //Data &= bMask12Bits;
- // 2. Write RF register address. Bit 12-19
- Data |= ((Offset&0xFF)<<12);
- // 3. Write RF path. bit 20-21
- Data |= ((eRFPath&0x3)<<20);
- // 4. Set RF read indicator. bit 22=0
- //Data |= 0x00000;
- // 5. Trigger Fw to operate the command. bit 31
- Data |= 0x80000000;
- // 6. We can not execute read operation if bit 31 is 1.
- while (read_nic_dword(dev, QPNR)&0x80000000)
- {
- // If FW can not finish RF-R/W for more than ?? times. We must reset FW.
- if (time++ < 100)
- {
- //DbgPrint("FW not finish RF-R Time=%d\n\r", time);
+ u32 tmp;
+
+ /* Firmware RF Write control.
+ * We can not execute the scheme in the initial step.
+ * Otherwise, RF-R/W will waste much time.
+ * This is only for site survey. */
+ /* 1. Read operation need not insert data. bit 0-11 */
+ /* 2. Write RF register address. bit 12-19 */
+ data |= ((offset&0xFF)<<12);
+ /* 3. Write RF path. bit 20-21 */
+ data |= ((eRFPath&0x3)<<20);
+ /* 4. Set RF read indicator. bit 22=0 */
+ /* 5. Trigger Fw to operate the command. bit 31 */
+ data |= 0x80000000;
+ /* 6. We can not execute read operation if bit 31 is 1. */
+ read_nic_dword(dev, QPNR, &tmp);
+ while (tmp & 0x80000000) {
+ /* If FW can not finish RF-R/W for more than ?? times.
+ We must reset FW. */
+ if (time++ < 100) {
udelay(10);
- }
- else
+ read_nic_dword(dev, QPNR, &tmp);
+ } else {
break;
+ }
}
- // 7. Execute read operation.
- write_nic_dword(dev, QPNR, Data);
- // 8. Check if firmawre send back RF content.
- while (read_nic_dword(dev, QPNR)&0x80000000)
- {
- // If FW can not finish RF-R/W for more than ?? times. We must reset FW.
- if (time++ < 100)
- {
- //DbgPrint("FW not finish RF-W Time=%d\n\r", time);
+ /* 7. Execute read operation. */
+ write_nic_dword(dev, QPNR, data);
+ /* 8. Check if firmware send back RF content. */
+ read_nic_dword(dev, QPNR, &tmp);
+ while (tmp & 0x80000000) {
+ /* If FW can not finish RF-R/W for more than ?? times.
+ We must reset FW. */
+ if (time++ < 100) {
udelay(10);
+ read_nic_dword(dev, QPNR, &tmp);
+ } else {
+ return 0;
}
- else
- return (0);
}
- retValue = read_nic_dword(dev, RF_DATA);
-
- return (retValue);
+ read_nic_dword(dev, RF_DATA, &reg);
-} /* phy_FwRFSerialRead */
+ return reg;
+}
/******************************************************************************
- *function: We support firmware to execute RF-R/W.
- * input: dev
- * output: none
- * return: none
- * notice:
- * ***************************************************************************/
-static void
-phy_FwRFSerialWrite(
- struct net_device* dev,
- RF90_RADIO_PATH_E eRFPath,
- u32 Offset,
- u32 Data )
+ * function: We support firmware to execute RF-R/W.
+ * input: net_device *dev
+ * RF90_RADIO_PATH_E eRFPath
+ * u32 offset
+ * u32 data
+ * output: none
+ * return: none
+ * notice:
+ ****************************************************************************/
+static void phy_FwRFSerialWrite(struct net_device *dev,
+ RF90_RADIO_PATH_E eRFPath, u32 offset, u32 data)
{
u8 time = 0;
-
- //DbgPrint("N FW RF CTRL RF-%d OF%02x DATA=%03x\n\r", eRFPath, Offset, Data);
- /* 2007/11/02 MH Firmware RF Write control. By Francis' suggestion, we can
- not execute the scheme in the initial step. Otherwise, RF-R/W will waste
- much time. This is only for site survey. */
-
- // 1. Set driver write bit and 12 bit data. bit 0-11
- //Data &= bMask12Bits; // Done by uper layer.
- // 2. Write RF register address. bit 12-19
- Data |= ((Offset&0xFF)<<12);
- // 3. Write RF path. bit 20-21
- Data |= ((eRFPath&0x3)<<20);
- // 4. Set RF write indicator. bit 22=1
- Data |= 0x400000;
- // 5. Trigger Fw to operate the command. bit 31=1
- Data |= 0x80000000;
-
- // 6. Write operation. We can not write if bit 31 is 1.
- while (read_nic_dword(dev, QPNR)&0x80000000)
- {
- // If FW can not finish RF-R/W for more than ?? times. We must reset FW.
- if (time++ < 100)
- {
- //DbgPrint("FW not finish RF-W Time=%d\n\r", time);
+ u32 tmp;
+
+ /* Firmware RF Write control.
+ * We can not execute the scheme in the initial step.
+ * Otherwise, RF-R/W will waste much time.
+ * This is only for site survey. */
+
+ /* 1. Set driver write bit and 12 bit data. bit 0-11 */
+ /* 2. Write RF register address. bit 12-19 */
+ data |= ((offset&0xFF)<<12);
+ /* 3. Write RF path. bit 20-21 */
+ data |= ((eRFPath&0x3)<<20);
+ /* 4. Set RF write indicator. bit 22=1 */
+ data |= 0x400000;
+ /* 5. Trigger Fw to operate the command. bit 31=1 */
+ data |= 0x80000000;
+
+ /* 6. Write operation. We can not write if bit 31 is 1. */
+ read_nic_dword(dev, QPNR, &tmp);
+ while (tmp & 0x80000000) {
+ /* If FW can not finish RF-R/W for more than ?? times.
+ We must reset FW. */
+ if (time++ < 100) {
udelay(10);
- }
- else
+ read_nic_dword(dev, QPNR, &tmp);
+ } else {
break;
+ }
}
- // 7. No matter check bit. We always force the write. Because FW will
- // not accept the command.
- write_nic_dword(dev, QPNR, Data);
- /* 2007/11/02 MH Acoording to test, we must delay 20us to wait firmware
+ /* 7. No matter check bit. We always force the write.
+ Because FW will not accept the command. */
+ write_nic_dword(dev, QPNR, data);
+ /* According to test, we must delay 20us to wait firmware
to finish RF write operation. */
- /* 2008/01/17 MH We support delay in firmware side now. */
- //delay_us(20);
-
-} /* phy_FwRFSerialWrite */
-
+ /* We support delay in firmware side now. */
+}
/******************************************************************************
- *function: This function read BB parameters from Header file we gen,
- * and do register read/write
- * input: dev
- * output: none
- * return: none
- * notice: BB parameters may change all the time, so please make
- * sure it has been synced with the newest.
- * ***************************************************************************/
-void rtl8192_phy_configmac(struct net_device* dev)
+ * function: This function reads BB parameters from header file we generate,
+ * and do register read/write
+ * input: net_device *dev
+ * output: none
+ * return: none
+ * notice: BB parameters may change all the time, so please make
+ * sure it has been synced with the newest.
+ *****************************************************************************/
+void rtl8192_phy_configmac(struct net_device *dev)
{
u32 dwArrayLen = 0, i;
- u32* pdwArray = NULL;
+ u32 *pdwArray = NULL;
struct r8192_priv *priv = ieee80211_priv(dev);
- if(priv->btxpowerdata_readfromEEPORM)
- {
+ if (priv->btxpowerdata_readfromEEPORM) {
RT_TRACE(COMP_PHY, "Rtl819XMACPHY_Array_PG\n");
dwArrayLen = MACPHY_Array_PGLength;
pdwArray = rtl819XMACPHY_Array_PG;
- }
- else
- {
+ } else {
RT_TRACE(COMP_PHY, "Rtl819XMACPHY_Array\n");
dwArrayLen = MACPHY_ArrayLength;
pdwArray = rtl819XMACPHY_Array;
}
- for(i = 0; i<dwArrayLen; i=i+3){
- if(pdwArray[i] == 0x318)
- {
+ for (i = 0; i < dwArrayLen; i = i+3) {
+ if (pdwArray[i] == 0x318) {
pdwArray[i+2] = 0x00000800;
- //DbgPrint("ptrArray[i], ptrArray[i+1], ptrArray[i+2] = %x, %x, %x\n",
- // ptrArray[i], ptrArray[i+1], ptrArray[i+2]);
}
- RT_TRACE(COMP_DBG, "The Rtl8190MACPHY_Array[0] is %x Rtl8190MACPHY_Array[1] is %x Rtl8190MACPHY_Array[2] is %x\n",
- pdwArray[i], pdwArray[i+1], pdwArray[i+2]);
- rtl8192_setBBreg(dev, pdwArray[i], pdwArray[i+1], pdwArray[i+2]);
+ RT_TRACE(COMP_DBG,
+ "Rtl8190MACPHY_Array[0]=%x Rtl8190MACPHY_Array[1]=%x Rtl8190MACPHY_Array[2]=%x\n",
+ pdwArray[i], pdwArray[i+1], pdwArray[i+2]);
+ rtl8192_setBBreg(dev, pdwArray[i], pdwArray[i+1],
+ pdwArray[i+2]);
}
return;
-
}
/******************************************************************************
- *function: This function does dirty work
- * input: dev
- * output: none
- * return: none
- * notice: BB parameters may change all the time, so please make
- * sure it has been synced with the newest.
- * ***************************************************************************/
-
-void rtl8192_phyConfigBB(struct net_device* dev, u8 ConfigType)
+ * function: This function does dirty work
+ * input: net_device *dev
+ * u8 ConfigType
+ * output: none
+ * return: none
+ * notice: BB parameters may change all the time, so please make
+ * sure it has been synced with the newest.
+ *****************************************************************************/
+void rtl8192_phyConfigBB(struct net_device *dev, u8 ConfigType)
{
u32 i;
#ifdef TO_DO_LIST
u32 *rtl8192PhyRegArrayTable = NULL, *rtl8192AgcTabArrayTable = NULL;
- if(Adapter->bInHctTest)
- {
+
+ if (Adapter->bInHctTest) {
PHY_REGArrayLen = PHY_REGArrayLengthDTM;
AGCTAB_ArrayLen = AGCTAB_ArrayLengthDTM;
Rtl8190PHY_REGArray_Table = Rtl819XPHY_REGArrayDTM;
Rtl8190AGCTAB_Array_Table = Rtl819XAGCTAB_ArrayDTM;
}
#endif
- if (ConfigType == BaseBand_Config_PHY_REG)
- {
- for (i=0; i<PHY_REG_1T2RArrayLength; i+=2)
- {
- rtl8192_setBBreg(dev, rtl819XPHY_REG_1T2RArray[i], bMaskDWord, rtl819XPHY_REG_1T2RArray[i+1]);
- RT_TRACE(COMP_DBG, "i: %x, The Rtl819xUsbPHY_REGArray[0] is %x Rtl819xUsbPHY_REGArray[1] is %x \n",i, rtl819XPHY_REG_1T2RArray[i], rtl819XPHY_REG_1T2RArray[i+1]);
+ if (ConfigType == BaseBand_Config_PHY_REG) {
+ for (i = 0; i < PHY_REG_1T2RArrayLength; i += 2) {
+ rtl8192_setBBreg(dev, rtl819XPHY_REG_1T2RArray[i],
+ bMaskDWord,
+ rtl819XPHY_REG_1T2RArray[i+1]);
+ RT_TRACE(COMP_DBG,
+ "i: %x, Rtl819xUsbPHY_REGArray[0]=%x Rtl819xUsbPHY_REGArray[1]=%x\n",
+ i, rtl819XPHY_REG_1T2RArray[i],
+ rtl819XPHY_REG_1T2RArray[i+1]);
}
- }
- else if (ConfigType == BaseBand_Config_AGC_TAB)
- {
- for (i=0; i<AGCTAB_ArrayLength; i+=2)
- {
- rtl8192_setBBreg(dev, rtl819XAGCTAB_Array[i], bMaskDWord, rtl819XAGCTAB_Array[i+1]);
- RT_TRACE(COMP_DBG, "i:%x, The rtl819XAGCTAB_Array[0] is %x rtl819XAGCTAB_Array[1] is %x \n",i, rtl819XAGCTAB_Array[i], rtl819XAGCTAB_Array[i+1]);
+ } else if (ConfigType == BaseBand_Config_AGC_TAB) {
+ for (i = 0; i < AGCTAB_ArrayLength; i += 2) {
+ rtl8192_setBBreg(dev, rtl819XAGCTAB_Array[i],
+ bMaskDWord, rtl819XAGCTAB_Array[i+1]);
+ RT_TRACE(COMP_DBG,
+ "i: %x, rtl819XAGCTAB_Array[0]=%x rtl819XAGCTAB_Array[1]=%x\n",
+ i, rtl819XAGCTAB_Array[i],
+ rtl819XAGCTAB_Array[i+1]);
}
}
return;
-
-
}
+
/******************************************************************************
- *function: This function initialize Register definition offset for Radio Path
- * A/B/C/D
- * input: net_device dev
- * output: none
- * return: none
- * notice: Initialization value here is constant and it should never be changed
- * ***************************************************************************/
-void rtl8192_InitBBRFRegDef(struct net_device* dev)
+ * function: This function initializes Register definition offset for
+ * Radio Path A/B/C/D
+ * input: net_device *dev
+ * output: none
+ * return: none
+ * notice: Initialization value here is constant and it should never
+ * be changed
+ *****************************************************************************/
+void rtl8192_InitBBRFRegDef(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
-// RF Interface Software Control
- priv->PHYRegDef[RF90_PATH_A].rfintfs = rFPGA0_XAB_RFInterfaceSW; // 16 LSBs if read 32-bit from 0x870
- priv->PHYRegDef[RF90_PATH_B].rfintfs = rFPGA0_XAB_RFInterfaceSW; // 16 MSBs if read 32-bit from 0x870 (16-bit for 0x872)
- priv->PHYRegDef[RF90_PATH_C].rfintfs = rFPGA0_XCD_RFInterfaceSW;// 16 LSBs if read 32-bit from 0x874
- priv->PHYRegDef[RF90_PATH_D].rfintfs = rFPGA0_XCD_RFInterfaceSW;// 16 MSBs if read 32-bit from 0x874 (16-bit for 0x876)
-
- // RF Interface Readback Value
- priv->PHYRegDef[RF90_PATH_A].rfintfi = rFPGA0_XAB_RFInterfaceRB; // 16 LSBs if read 32-bit from 0x8E0
- priv->PHYRegDef[RF90_PATH_B].rfintfi = rFPGA0_XAB_RFInterfaceRB;// 16 MSBs if read 32-bit from 0x8E0 (16-bit for 0x8E2)
- priv->PHYRegDef[RF90_PATH_C].rfintfi = rFPGA0_XCD_RFInterfaceRB;// 16 LSBs if read 32-bit from 0x8E4
- priv->PHYRegDef[RF90_PATH_D].rfintfi = rFPGA0_XCD_RFInterfaceRB;// 16 MSBs if read 32-bit from 0x8E4 (16-bit for 0x8E6)
-
- // RF Interface Output (and Enable)
- priv->PHYRegDef[RF90_PATH_A].rfintfo = rFPGA0_XA_RFInterfaceOE; // 16 LSBs if read 32-bit from 0x860
- priv->PHYRegDef[RF90_PATH_B].rfintfo = rFPGA0_XB_RFInterfaceOE; // 16 LSBs if read 32-bit from 0x864
- priv->PHYRegDef[RF90_PATH_C].rfintfo = rFPGA0_XC_RFInterfaceOE;// 16 LSBs if read 32-bit from 0x868
- priv->PHYRegDef[RF90_PATH_D].rfintfo = rFPGA0_XD_RFInterfaceOE;// 16 LSBs if read 32-bit from 0x86C
-
- // RF Interface (Output and) Enable
- priv->PHYRegDef[RF90_PATH_A].rfintfe = rFPGA0_XA_RFInterfaceOE; // 16 MSBs if read 32-bit from 0x860 (16-bit for 0x862)
- priv->PHYRegDef[RF90_PATH_B].rfintfe = rFPGA0_XB_RFInterfaceOE; // 16 MSBs if read 32-bit from 0x864 (16-bit for 0x866)
- priv->PHYRegDef[RF90_PATH_C].rfintfe = rFPGA0_XC_RFInterfaceOE;// 16 MSBs if read 32-bit from 0x86A (16-bit for 0x86A)
- priv->PHYRegDef[RF90_PATH_D].rfintfe = rFPGA0_XD_RFInterfaceOE;// 16 MSBs if read 32-bit from 0x86C (16-bit for 0x86E)
-
- //Addr of LSSI. Write RF register by driver
- priv->PHYRegDef[RF90_PATH_A].rf3wireOffset = rFPGA0_XA_LSSIParameter; //LSSI Parameter
+
+ /* RF Interface Software Control */
+ /* 16 LSBs if read 32-bit from 0x870 */
+ priv->PHYRegDef[RF90_PATH_A].rfintfs = rFPGA0_XAB_RFInterfaceSW;
+ /* 16 MSBs if read 32-bit from 0x870 (16-bit for 0x872) */
+ priv->PHYRegDef[RF90_PATH_B].rfintfs = rFPGA0_XAB_RFInterfaceSW;
+ /* 16 LSBs if read 32-bit from 0x874 */
+ priv->PHYRegDef[RF90_PATH_C].rfintfs = rFPGA0_XCD_RFInterfaceSW;
+ /* 16 MSBs if read 32-bit from 0x874 (16-bit for 0x876) */
+ priv->PHYRegDef[RF90_PATH_D].rfintfs = rFPGA0_XCD_RFInterfaceSW;
+
+ /* RF Interface Readback Value */
+ /* 16 LSBs if read 32-bit from 0x8E0 */
+ priv->PHYRegDef[RF90_PATH_A].rfintfi = rFPGA0_XAB_RFInterfaceRB;
+ /* 16 MSBs if read 32-bit from 0x8E0 (16-bit for 0x8E2) */
+ priv->PHYRegDef[RF90_PATH_B].rfintfi = rFPGA0_XAB_RFInterfaceRB;
+ /* 16 LSBs if read 32-bit from 0x8E4 */
+ priv->PHYRegDef[RF90_PATH_C].rfintfi = rFPGA0_XCD_RFInterfaceRB;
+ /* 16 MSBs if read 32-bit from 0x8E4 (16-bit for 0x8E6) */
+ priv->PHYRegDef[RF90_PATH_D].rfintfi = rFPGA0_XCD_RFInterfaceRB;
+
+ /* RF Interface Output (and Enable) */
+ /* 16 LSBs if read 32-bit from 0x860 */
+ priv->PHYRegDef[RF90_PATH_A].rfintfo = rFPGA0_XA_RFInterfaceOE;
+ /* 16 LSBs if read 32-bit from 0x864 */
+ priv->PHYRegDef[RF90_PATH_B].rfintfo = rFPGA0_XB_RFInterfaceOE;
+ /* 16 LSBs if read 32-bit from 0x868 */
+ priv->PHYRegDef[RF90_PATH_C].rfintfo = rFPGA0_XC_RFInterfaceOE;
+ /* 16 LSBs if read 32-bit from 0x86C */
+ priv->PHYRegDef[RF90_PATH_D].rfintfo = rFPGA0_XD_RFInterfaceOE;
+
+ /* RF Interface (Output and) Enable */
+ /* 16 MSBs if read 32-bit from 0x860 (16-bit for 0x862) */
+ priv->PHYRegDef[RF90_PATH_A].rfintfe = rFPGA0_XA_RFInterfaceOE;
+ /* 16 MSBs if read 32-bit from 0x864 (16-bit for 0x866) */
+ priv->PHYRegDef[RF90_PATH_B].rfintfe = rFPGA0_XB_RFInterfaceOE;
+ /* 16 MSBs if read 32-bit from 0x86A (16-bit for 0x86A) */
+ priv->PHYRegDef[RF90_PATH_C].rfintfe = rFPGA0_XC_RFInterfaceOE;
+ /* 16 MSBs if read 32-bit from 0x86C (16-bit for 0x86E) */
+ priv->PHYRegDef[RF90_PATH_D].rfintfe = rFPGA0_XD_RFInterfaceOE;
+
+ /* Addr of LSSI. Write RF register by driver */
+ priv->PHYRegDef[RF90_PATH_A].rf3wireOffset = rFPGA0_XA_LSSIParameter;
priv->PHYRegDef[RF90_PATH_B].rf3wireOffset = rFPGA0_XB_LSSIParameter;
priv->PHYRegDef[RF90_PATH_C].rf3wireOffset = rFPGA0_XC_LSSIParameter;
priv->PHYRegDef[RF90_PATH_D].rf3wireOffset = rFPGA0_XD_LSSIParameter;
- // RF parameter
- priv->PHYRegDef[RF90_PATH_A].rfLSSI_Select = rFPGA0_XAB_RFParameter; //BB Band Select
+ /* RF parameter */
+ /* BB Band Select */
+ priv->PHYRegDef[RF90_PATH_A].rfLSSI_Select = rFPGA0_XAB_RFParameter;
priv->PHYRegDef[RF90_PATH_B].rfLSSI_Select = rFPGA0_XAB_RFParameter;
priv->PHYRegDef[RF90_PATH_C].rfLSSI_Select = rFPGA0_XCD_RFParameter;
priv->PHYRegDef[RF90_PATH_D].rfLSSI_Select = rFPGA0_XCD_RFParameter;
- // Tx AGC Gain Stage (same for all path. Should we remove this?)
- priv->PHYRegDef[RF90_PATH_A].rfTxGainStage = rFPGA0_TxGainStage; //Tx gain stage
- priv->PHYRegDef[RF90_PATH_B].rfTxGainStage = rFPGA0_TxGainStage; //Tx gain stage
- priv->PHYRegDef[RF90_PATH_C].rfTxGainStage = rFPGA0_TxGainStage; //Tx gain stage
- priv->PHYRegDef[RF90_PATH_D].rfTxGainStage = rFPGA0_TxGainStage; //Tx gain stage
-
- // Tranceiver A~D HSSI Parameter-1
- priv->PHYRegDef[RF90_PATH_A].rfHSSIPara1 = rFPGA0_XA_HSSIParameter1; //wire control parameter1
- priv->PHYRegDef[RF90_PATH_B].rfHSSIPara1 = rFPGA0_XB_HSSIParameter1; //wire control parameter1
- priv->PHYRegDef[RF90_PATH_C].rfHSSIPara1 = rFPGA0_XC_HSSIParameter1; //wire control parameter1
- priv->PHYRegDef[RF90_PATH_D].rfHSSIPara1 = rFPGA0_XD_HSSIParameter1; //wire control parameter1
-
- // Tranceiver A~D HSSI Parameter-2
- priv->PHYRegDef[RF90_PATH_A].rfHSSIPara2 = rFPGA0_XA_HSSIParameter2; //wire control parameter2
- priv->PHYRegDef[RF90_PATH_B].rfHSSIPara2 = rFPGA0_XB_HSSIParameter2; //wire control parameter2
- priv->PHYRegDef[RF90_PATH_C].rfHSSIPara2 = rFPGA0_XC_HSSIParameter2; //wire control parameter2
- priv->PHYRegDef[RF90_PATH_D].rfHSSIPara2 = rFPGA0_XD_HSSIParameter2; //wire control parameter1
-
- // RF switch Control
- priv->PHYRegDef[RF90_PATH_A].rfSwitchControl = rFPGA0_XAB_SwitchControl; //TR/Ant switch control
+ /* Tx AGC Gain Stage (same for all path. Should we remove this?) */
+ priv->PHYRegDef[RF90_PATH_A].rfTxGainStage = rFPGA0_TxGainStage;
+ priv->PHYRegDef[RF90_PATH_B].rfTxGainStage = rFPGA0_TxGainStage;
+ priv->PHYRegDef[RF90_PATH_C].rfTxGainStage = rFPGA0_TxGainStage;
+ priv->PHYRegDef[RF90_PATH_D].rfTxGainStage = rFPGA0_TxGainStage;
+
+ /* Tranceiver A~D HSSI Parameter-1 */
+ /* wire control parameter1 */
+ priv->PHYRegDef[RF90_PATH_A].rfHSSIPara1 = rFPGA0_XA_HSSIParameter1;
+ priv->PHYRegDef[RF90_PATH_B].rfHSSIPara1 = rFPGA0_XB_HSSIParameter1;
+ priv->PHYRegDef[RF90_PATH_C].rfHSSIPara1 = rFPGA0_XC_HSSIParameter1;
+ priv->PHYRegDef[RF90_PATH_D].rfHSSIPara1 = rFPGA0_XD_HSSIParameter1;
+
+ /* Tranceiver A~D HSSI Parameter-2 */
+ /* wire control parameter2 */
+ priv->PHYRegDef[RF90_PATH_A].rfHSSIPara2 = rFPGA0_XA_HSSIParameter2;
+ priv->PHYRegDef[RF90_PATH_B].rfHSSIPara2 = rFPGA0_XB_HSSIParameter2;
+ priv->PHYRegDef[RF90_PATH_C].rfHSSIPara2 = rFPGA0_XC_HSSIParameter2;
+ priv->PHYRegDef[RF90_PATH_D].rfHSSIPara2 = rFPGA0_XD_HSSIParameter2;
+
+ /* RF Switch Control */
+ /* TR/Ant switch control */
+ priv->PHYRegDef[RF90_PATH_A].rfSwitchControl = rFPGA0_XAB_SwitchControl;
priv->PHYRegDef[RF90_PATH_B].rfSwitchControl = rFPGA0_XAB_SwitchControl;
priv->PHYRegDef[RF90_PATH_C].rfSwitchControl = rFPGA0_XCD_SwitchControl;
priv->PHYRegDef[RF90_PATH_D].rfSwitchControl = rFPGA0_XCD_SwitchControl;
- // AGC control 1
+ /* AGC control 1 */
priv->PHYRegDef[RF90_PATH_A].rfAGCControl1 = rOFDM0_XAAGCCore1;
priv->PHYRegDef[RF90_PATH_B].rfAGCControl1 = rOFDM0_XBAGCCore1;
priv->PHYRegDef[RF90_PATH_C].rfAGCControl1 = rOFDM0_XCAGCCore1;
priv->PHYRegDef[RF90_PATH_D].rfAGCControl1 = rOFDM0_XDAGCCore1;
- // AGC control 2
+ /* AGC control 2 */
priv->PHYRegDef[RF90_PATH_A].rfAGCControl2 = rOFDM0_XAAGCCore2;
priv->PHYRegDef[RF90_PATH_B].rfAGCControl2 = rOFDM0_XBAGCCore2;
priv->PHYRegDef[RF90_PATH_C].rfAGCControl2 = rOFDM0_XCAGCCore2;
priv->PHYRegDef[RF90_PATH_D].rfAGCControl2 = rOFDM0_XDAGCCore2;
- // RX AFE control 1
+ /* RX AFE control 1 */
priv->PHYRegDef[RF90_PATH_A].rfRxIQImbalance = rOFDM0_XARxIQImbalance;
priv->PHYRegDef[RF90_PATH_B].rfRxIQImbalance = rOFDM0_XBRxIQImbalance;
priv->PHYRegDef[RF90_PATH_C].rfRxIQImbalance = rOFDM0_XCRxIQImbalance;
priv->PHYRegDef[RF90_PATH_D].rfRxIQImbalance = rOFDM0_XDRxIQImbalance;
- // RX AFE control 1
+ /* RX AFE control 1 */
priv->PHYRegDef[RF90_PATH_A].rfRxAFE = rOFDM0_XARxAFE;
priv->PHYRegDef[RF90_PATH_B].rfRxAFE = rOFDM0_XBRxAFE;
priv->PHYRegDef[RF90_PATH_C].rfRxAFE = rOFDM0_XCRxAFE;
priv->PHYRegDef[RF90_PATH_D].rfRxAFE = rOFDM0_XDRxAFE;
- // Tx AFE control 1
+ /* Tx AFE control 1 */
priv->PHYRegDef[RF90_PATH_A].rfTxIQImbalance = rOFDM0_XATxIQImbalance;
priv->PHYRegDef[RF90_PATH_B].rfTxIQImbalance = rOFDM0_XBTxIQImbalance;
priv->PHYRegDef[RF90_PATH_C].rfTxIQImbalance = rOFDM0_XCTxIQImbalance;
priv->PHYRegDef[RF90_PATH_D].rfTxIQImbalance = rOFDM0_XDTxIQImbalance;
- // Tx AFE control 2
+ /* Tx AFE control 2 */
priv->PHYRegDef[RF90_PATH_A].rfTxAFE = rOFDM0_XATxAFE;
priv->PHYRegDef[RF90_PATH_B].rfTxAFE = rOFDM0_XBTxAFE;
priv->PHYRegDef[RF90_PATH_C].rfTxAFE = rOFDM0_XCTxAFE;
priv->PHYRegDef[RF90_PATH_D].rfTxAFE = rOFDM0_XDTxAFE;
- // Tranceiver LSSI Readback
+ /* Tranceiver LSSI Readback */
priv->PHYRegDef[RF90_PATH_A].rfLSSIReadBack = rFPGA0_XA_LSSIReadBack;
priv->PHYRegDef[RF90_PATH_B].rfLSSIReadBack = rFPGA0_XB_LSSIReadBack;
priv->PHYRegDef[RF90_PATH_C].rfLSSIReadBack = rFPGA0_XC_LSSIReadBack;
priv->PHYRegDef[RF90_PATH_D].rfLSSIReadBack = rFPGA0_XD_LSSIReadBack;
-
}
+
/******************************************************************************
- *function: This function is to write register and then readback to make sure whether BB and RF is OK
- * input: net_device dev
- * HW90_BLOCK_E CheckBlock
- * RF90_RADIO_PATH_E eRFPath //only used when checkblock is HW90_BLOCK_RF
- * output: none
- * return: return whether BB and RF is ok(0:OK; 1:Fail)
- * notice: This function may be removed in the ASIC
- * ***************************************************************************/
-u8 rtl8192_phy_checkBBAndRF(struct net_device* dev, HW90_BLOCK_E CheckBlock, RF90_RADIO_PATH_E eRFPath)
+ * function: This function is to write register and then readback to make
+ * sure whether BB and RF is OK
+ * input: net_device *dev
+ * HW90_BLOCK_E CheckBlock
+ * RF90_RADIO_PATH_E eRFPath //only used when checkblock is
+ * //HW90_BLOCK_RF
+ * output: none
+ * return: return whether BB and RF is ok (0:OK, 1:Fail)
+ * notice: This function may be removed in the ASIC
+ ******************************************************************************/
+u8 rtl8192_phy_checkBBAndRF(struct net_device *dev, HW90_BLOCK_E CheckBlock,
+ RF90_RADIO_PATH_E eRFPath)
{
-// struct r8192_priv *priv = ieee80211_priv(dev);
-// BB_REGISTER_DEFINITION_T *pPhyReg = &priv->PHYRegDef[eRFPath];
u8 ret = 0;
- u32 i, CheckTimes = 4, dwRegRead = 0;
+ u32 i, CheckTimes = 4, reg = 0;
u32 WriteAddr[4];
u32 WriteData[] = {0xfffff027, 0xaa55a02f, 0x00000027, 0x55aa502f};
- // Initialize register address offset to be checked
+
+ /* Initialize register address offset to be checked */
WriteAddr[HW90_BLOCK_MAC] = 0x100;
WriteAddr[HW90_BLOCK_PHY0] = 0x900;
WriteAddr[HW90_BLOCK_PHY1] = 0x800;
WriteAddr[HW90_BLOCK_RF] = 0x3;
- RT_TRACE(COMP_PHY, "=======>%s(), CheckBlock:%d\n", __FUNCTION__, CheckBlock);
- for(i=0 ; i < CheckTimes ; i++)
- {
-
- //
- // Write Data to register and readback
- //
- switch(CheckBlock)
- {
+ RT_TRACE(COMP_PHY, "%s(), CheckBlock: %d\n", __func__, CheckBlock);
+ for (i = 0; i < CheckTimes; i++) {
+
+ /* Write data to register and readback */
+ switch (CheckBlock) {
case HW90_BLOCK_MAC:
- RT_TRACE(COMP_ERR, "PHY_CheckBBRFOK(): Never Write 0x100 here!");
+ RT_TRACE(COMP_ERR,
+ "PHY_CheckBBRFOK(): Never Write 0x100 here!\n");
break;
case HW90_BLOCK_PHY0:
case HW90_BLOCK_PHY1:
- write_nic_dword(dev, WriteAddr[CheckBlock], WriteData[i]);
- dwRegRead = read_nic_dword(dev, WriteAddr[CheckBlock]);
+ write_nic_dword(dev, WriteAddr[CheckBlock],
+ WriteData[i]);
+ read_nic_dword(dev, WriteAddr[CheckBlock], &reg);
break;
case HW90_BLOCK_RF:
WriteData[i] &= 0xfff;
- rtl8192_phy_SetRFReg(dev, eRFPath, WriteAddr[HW90_BLOCK_RF], bMask12Bits, WriteData[i]);
- // TODO: we should not delay for such a long time. Ask SD3
- msleep(1);
- dwRegRead = rtl8192_phy_QueryRFReg(dev, eRFPath, WriteAddr[HW90_BLOCK_RF], bMask12Bits);
- msleep(1);
+ rtl8192_phy_SetRFReg(dev, eRFPath,
+ WriteAddr[HW90_BLOCK_RF],
+ bMask12Bits, WriteData[i]);
+ /* TODO: we should not delay for such a long time.
+ Ask SD3 */
+ usleep_range(1000, 1000);
+ reg = rtl8192_phy_QueryRFReg(dev, eRFPath,
+ WriteAddr[HW90_BLOCK_RF],
+ bMask12Bits);
+ usleep_range(1000, 1000);
break;
default:
@@ -737,12 +759,11 @@ u8 rtl8192_phy_checkBBAndRF(struct net_device* dev, HW90_BLOCK_E CheckBlock, RF9
}
- //
- // Check whether readback data is correct
- //
- if(dwRegRead != WriteData[i])
- {
- RT_TRACE((COMP_PHY|COMP_ERR), "====>error=====dwRegRead: %x, WriteData: %x \n", dwRegRead, WriteData[i]);
+ /* Check whether readback data is correct */
+ if (reg != WriteData[i]) {
+ RT_TRACE((COMP_PHY|COMP_ERR),
+ "error reg: %x, WriteData: %x\n",
+ reg, WriteData[i]);
ret = 1;
break;
}
@@ -751,179 +772,193 @@ u8 rtl8192_phy_checkBBAndRF(struct net_device* dev, HW90_BLOCK_E CheckBlock, RF9
return ret;
}
-
/******************************************************************************
- *function: This function initialize BB&RF
- * input: net_device dev
- * output: none
- * return: none
- * notice: Initialization value may change all the time, so please make
- * sure it has been synced with the newest.
- * ***************************************************************************/
-void rtl8192_BB_Config_ParaFile(struct net_device* dev)
+ * function: This function initializes BB&RF
+ * input: net_device *dev
+ * output: none
+ * return: none
+ * notice: Initialization value may change all the time, so please make
+ * sure it has been synced with the newest.
+ ******************************************************************************/
+void rtl8192_BB_Config_ParaFile(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- u8 bRegValue = 0, eCheckItem = 0, rtStatus = 0;
- u32 dwRegValue = 0;
+ u8 reg_u8 = 0, eCheckItem = 0, status = 0;
+ u32 reg_u32 = 0;
+
/**************************************
- //<1>Initialize BaseBand
- **************************************/
+ * <1> Initialize BaseBand
+ *************************************/
- /*--set BB Global Reset--*/
- bRegValue = read_nic_byte(dev, BB_GLOBAL_RESET);
- write_nic_byte(dev, BB_GLOBAL_RESET,(bRegValue|BB_GLOBAL_RESET_BIT));
+ /* --set BB Global Reset-- */
+ read_nic_byte(dev, BB_GLOBAL_RESET, &reg_u8);
+ write_nic_byte(dev, BB_GLOBAL_RESET, (reg_u8|BB_GLOBAL_RESET_BIT));
mdelay(50);
- /*---set BB reset Active---*/
- dwRegValue = read_nic_dword(dev, CPU_GEN);
- write_nic_dword(dev, CPU_GEN, (dwRegValue&(~CPU_GEN_BB_RST)));
-
- /*----Ckeck FPGAPHY0 and PHY1 board is OK----*/
- // TODO: this function should be removed on ASIC , Emily 2007.2.2
- for(eCheckItem=(HW90_BLOCK_E)HW90_BLOCK_PHY0; eCheckItem<=HW90_BLOCK_PHY1; eCheckItem++)
- {
- rtStatus = rtl8192_phy_checkBBAndRF(dev, (HW90_BLOCK_E)eCheckItem, (RF90_RADIO_PATH_E)0); //don't care RF path
- if(rtStatus != 0)
- {
- RT_TRACE((COMP_ERR | COMP_PHY), "PHY_RF8256_Config():Check PHY%d Fail!!\n", eCheckItem-1);
- return ;
+ /* ---set BB reset Active--- */
+ read_nic_dword(dev, CPU_GEN, &reg_u32);
+ write_nic_dword(dev, CPU_GEN, (reg_u32&(~CPU_GEN_BB_RST)));
+
+ /* ----Ckeck FPGAPHY0 and PHY1 board is OK---- */
+ /* TODO: this function should be removed on ASIC */
+ for (eCheckItem = (HW90_BLOCK_E)HW90_BLOCK_PHY0;
+ eCheckItem <= HW90_BLOCK_PHY1; eCheckItem++) {
+ /* don't care RF path */
+ status = rtl8192_phy_checkBBAndRF(dev, (HW90_BLOCK_E)eCheckItem,
+ (RF90_RADIO_PATH_E)0);
+ if (status != 0) {
+ RT_TRACE((COMP_ERR | COMP_PHY),
+ "PHY_RF8256_Config(): Check PHY%d Fail!!\n",
+ eCheckItem-1);
+ return;
}
}
- /*---- Set CCK and OFDM Block "OFF"----*/
+ /* ---- Set CCK and OFDM Block "OFF"---- */
rtl8192_setBBreg(dev, rFPGA0_RFMOD, bCCKEn|bOFDMEn, 0x0);
- /*----BB Register Initilazation----*/
- //==m==>Set PHY REG From Header<==m==
+ /* ----BB Register Initilazation---- */
+ /* ==m==>Set PHY REG From Header<==m== */
rtl8192_phyConfigBB(dev, BaseBand_Config_PHY_REG);
- /*----Set BB reset de-Active----*/
- dwRegValue = read_nic_dword(dev, CPU_GEN);
- write_nic_dword(dev, CPU_GEN, (dwRegValue|CPU_GEN_BB_RST));
+ /* ----Set BB reset de-Active---- */
+ read_nic_dword(dev, CPU_GEN, &reg_u32);
+ write_nic_dword(dev, CPU_GEN, (reg_u32|CPU_GEN_BB_RST));
- /*----BB AGC table Initialization----*/
- //==m==>Set PHY REG From Header<==m==
+ /* ----BB AGC table Initialization---- */
+ /* ==m==>Set PHY REG From Header<==m== */
rtl8192_phyConfigBB(dev, BaseBand_Config_AGC_TAB);
- /*----Enable XSTAL ----*/
+ /* ----Enable XSTAL ---- */
write_nic_byte_E(dev, 0x5e, 0x00);
- if (priv->card_8192_version == (u8)VERSION_819xU_A)
- {
- //Antenna gain offset from B/C/D to A
- dwRegValue = (priv->AntennaTxPwDiff[1]<<4 | priv->AntennaTxPwDiff[0]);
- rtl8192_setBBreg(dev, rFPGA0_TxGainStage, (bXBTxAGC|bXCTxAGC), dwRegValue);
-
- //XSTALLCap
- dwRegValue = priv->CrystalCap & 0xf;
- rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1, bXtalCap, dwRegValue);
+ if (priv->card_8192_version == (u8)VERSION_819xU_A) {
+ /* Antenna gain offset from B/C/D to A */
+ reg_u32 = (priv->AntennaTxPwDiff[1]<<4 |
+ priv->AntennaTxPwDiff[0]);
+ rtl8192_setBBreg(dev, rFPGA0_TxGainStage, (bXBTxAGC|bXCTxAGC),
+ reg_u32);
+
+ /* XSTALLCap */
+ reg_u32 = priv->CrystalCap & 0xf;
+ rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1, bXtalCap,
+ reg_u32);
}
- // Check if the CCK HighPower is turned ON.
- // This is used to calculate PWDB.
- priv->bCckHighPower = (u8)(rtl8192_QueryBBReg(dev, rFPGA0_XA_HSSIParameter2, 0x200));
+ /* Check if the CCK HighPower is turned ON.
+ This is used to calculate PWDB. */
+ priv->bCckHighPower = (u8)rtl8192_QueryBBReg(dev,
+ rFPGA0_XA_HSSIParameter2,
+ 0x200);
return;
}
+
/******************************************************************************
- *function: This function initialize BB&RF
- * input: net_device dev
- * output: none
- * return: none
- * notice: Initialization value may change all the time, so please make
- * sure it has been synced with the newest.
- * ***************************************************************************/
-void rtl8192_BBConfig(struct net_device* dev)
+ * function: This function initializes BB&RF
+ * input: net_device *dev
+ * output: none
+ * return: none
+ * notice: Initialization value may change all the time, so please make
+ * sure it has been synced with the newest.
+ *****************************************************************************/
+void rtl8192_BBConfig(struct net_device *dev)
{
rtl8192_InitBBRFRegDef(dev);
- //config BB&RF. As hardCode based initialization has not been well
- //implemented, so use file first.FIXME:should implement it for hardcode?
+ /* config BB&RF. As hardCode based initialization has not been well
+ * implemented, so use file first.
+ * FIXME: should implement it for hardcode? */
rtl8192_BB_Config_ParaFile(dev);
return;
}
+
/******************************************************************************
- *function: This function obtains the initialization value of Tx power Level offset
- * input: net_device dev
- * output: none
- * return: none
- * ***************************************************************************/
-void rtl8192_phy_getTxPower(struct net_device* dev)
+ * function: This function obtains the initialization value of Tx power Level
+ * offset
+ * input: net_device *dev
+ * output: none
+ * return: none
+ *****************************************************************************/
+void rtl8192_phy_getTxPower(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- priv->MCSTxPowerLevelOriginalOffset[0] =
- read_nic_dword(dev, rTxAGC_Rate18_06);
- priv->MCSTxPowerLevelOriginalOffset[1] =
- read_nic_dword(dev, rTxAGC_Rate54_24);
- priv->MCSTxPowerLevelOriginalOffset[2] =
- read_nic_dword(dev, rTxAGC_Mcs03_Mcs00);
- priv->MCSTxPowerLevelOriginalOffset[3] =
- read_nic_dword(dev, rTxAGC_Mcs07_Mcs04);
- priv->MCSTxPowerLevelOriginalOffset[4] =
- read_nic_dword(dev, rTxAGC_Mcs11_Mcs08);
- priv->MCSTxPowerLevelOriginalOffset[5] =
- read_nic_dword(dev, rTxAGC_Mcs15_Mcs12);
-
- // read rx initial gain
- priv->DefaultInitialGain[0] = read_nic_byte(dev, rOFDM0_XAAGCCore1);
- priv->DefaultInitialGain[1] = read_nic_byte(dev, rOFDM0_XBAGCCore1);
- priv->DefaultInitialGain[2] = read_nic_byte(dev, rOFDM0_XCAGCCore1);
- priv->DefaultInitialGain[3] = read_nic_byte(dev, rOFDM0_XDAGCCore1);
- RT_TRACE(COMP_INIT, "Default initial gain (c50=0x%x, c58=0x%x, c60=0x%x, c68=0x%x) \n",
- priv->DefaultInitialGain[0], priv->DefaultInitialGain[1],
- priv->DefaultInitialGain[2], priv->DefaultInitialGain[3]);
-
- // read framesync
- priv->framesync = read_nic_byte(dev, rOFDM0_RxDetector3);
- priv->framesyncC34 = read_nic_byte(dev, rOFDM0_RxDetector2);
+ u8 tmp;
+
+ read_nic_dword(dev, rTxAGC_Rate18_06,
+ &priv->MCSTxPowerLevelOriginalOffset[0]);
+ read_nic_dword(dev, rTxAGC_Rate54_24,
+ &priv->MCSTxPowerLevelOriginalOffset[1]);
+ read_nic_dword(dev, rTxAGC_Mcs03_Mcs00,
+ &priv->MCSTxPowerLevelOriginalOffset[2]);
+ read_nic_dword(dev, rTxAGC_Mcs07_Mcs04,
+ &priv->MCSTxPowerLevelOriginalOffset[3]);
+ read_nic_dword(dev, rTxAGC_Mcs11_Mcs08,
+ &priv->MCSTxPowerLevelOriginalOffset[4]);
+ read_nic_dword(dev, rTxAGC_Mcs15_Mcs12,
+ &priv->MCSTxPowerLevelOriginalOffset[5]);
+
+ /* Read rx initial gain */
+ read_nic_byte(dev, rOFDM0_XAAGCCore1, &priv->DefaultInitialGain[0]);
+ read_nic_byte(dev, rOFDM0_XBAGCCore1, &priv->DefaultInitialGain[1]);
+ read_nic_byte(dev, rOFDM0_XCAGCCore1, &priv->DefaultInitialGain[2]);
+ read_nic_byte(dev, rOFDM0_XDAGCCore1, &priv->DefaultInitialGain[3]);
+ RT_TRACE(COMP_INIT,
+ "Default initial gain (c50=0x%x, c58=0x%x, c60=0x%x, c68=0x%x)\n",
+ priv->DefaultInitialGain[0], priv->DefaultInitialGain[1],
+ priv->DefaultInitialGain[2], priv->DefaultInitialGain[3]);
+
+ /* Read framesync */
+ read_nic_byte(dev, rOFDM0_RxDetector3, &priv->framesync);
+ read_nic_byte(dev, rOFDM0_RxDetector2, &tmp);
+ priv->framesyncC34 = tmp;
RT_TRACE(COMP_INIT, "Default framesync (0x%x) = 0x%x \n",
rOFDM0_RxDetector3, priv->framesync);
- // read SIFS (save the value read fome MACPHY_REG.txt)
- priv->SifsTime = read_nic_word(dev, SIFS);
+ /* Read SIFS (save the value read fome MACPHY_REG.txt) */
+ read_nic_word(dev, SIFS, &priv->SifsTime);
return;
}
/******************************************************************************
- *function: This function obtains the initialization value of Tx power Level offset
- * input: net_device dev
- * output: none
- * return: none
- * ***************************************************************************/
-void rtl8192_phy_setTxPower(struct net_device* dev, u8 channel)
+ * function: This function sets the initialization value of Tx power Level
+ * offset
+ * input: net_device *dev
+ * u8 channel
+ * output: none
+ * return: none
+ ******************************************************************************/
+void rtl8192_phy_setTxPower(struct net_device *dev, u8 channel)
{
struct r8192_priv *priv = ieee80211_priv(dev);
u8 powerlevel = priv->TxPowerLevelCCK[channel-1];
u8 powerlevelOFDM24G = priv->TxPowerLevelOFDM24G[channel-1];
- switch(priv->rf_chip)
- {
+ switch (priv->rf_chip) {
case RF_8256:
- PHY_SetRF8256CCKTxPower(dev, powerlevel); //need further implement
+ /* need further implement */
+ PHY_SetRF8256CCKTxPower(dev, powerlevel);
PHY_SetRF8256OFDMTxPower(dev, powerlevelOFDM24G);
break;
default:
-// case RF_8225:
-// case RF_8258:
- RT_TRACE((COMP_PHY|COMP_ERR), "error RF chipID(8225 or 8258) in function %s()\n", __FUNCTION__);
+ RT_TRACE((COMP_PHY|COMP_ERR),
+ "error RF chipID(8225 or 8258) in function %s()\n",
+ __func__);
break;
}
return;
}
/******************************************************************************
- *function: This function check Rf chip to do RF config
- * input: net_device dev
- * output: none
- * return: only 8256 is supported
- * ***************************************************************************/
-void rtl8192_phy_RFConfig(struct net_device* dev)
+ * function: This function checks Rf chip to do RF config
+ * input: net_device *dev
+ * output: none
+ * return: only 8256 is supported
+ ******************************************************************************/
+void rtl8192_phy_RFConfig(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- switch(priv->rf_chip)
- {
+ switch (priv->rf_chip) {
case RF_8256:
PHY_RF8256_Config(dev);
break;
- // case RF_8225:
- // case RF_8258:
default:
RT_TRACE(COMP_ERR, "error chip id\n");
break;
@@ -932,75 +967,89 @@ void rtl8192_phy_RFConfig(struct net_device* dev)
}
/******************************************************************************
- *function: This function update Initial gain
- * input: net_device dev
- * output: none
- * return: As Windows has not implemented this, wait for complement
- * ***************************************************************************/
-void rtl8192_phy_updateInitGain(struct net_device* dev)
+ * function: This function updates Initial gain
+ * input: net_device *dev
+ * output: none
+ * return: As Windows has not implemented this, wait for complement
+ ******************************************************************************/
+void rtl8192_phy_updateInitGain(struct net_device *dev)
{
return;
}
/******************************************************************************
- *function: This function read RF parameters from general head file, and do RF 3-wire
- * input: net_device dev
- * output: none
- * return: return code show if RF configuration is successful(0:pass, 1:fail)
- * Note: Delay may be required for RF configuration
- * ***************************************************************************/
-u8 rtl8192_phy_ConfigRFWithHeaderFile(struct net_device* dev, RF90_RADIO_PATH_E eRFPath)
+ * function: This function read RF parameters from general head file,
+ * and do RF 3-wire
+ * input: net_device *dev
+ * RF90_RADIO_PATH_E eRFPath
+ * output: none
+ * return: return code show if RF configuration is successful(0:pass, 1:fail)
+ * notice: Delay may be required for RF configuration
+ *****************************************************************************/
+u8 rtl8192_phy_ConfigRFWithHeaderFile(struct net_device *dev,
+ RF90_RADIO_PATH_E eRFPath)
{
int i;
- //u32* pRFArray;
u8 ret = 0;
- switch(eRFPath){
+ switch (eRFPath) {
case RF90_PATH_A:
- for(i = 0;i<RadioA_ArrayLength; i=i+2){
+ for (i = 0; i < RadioA_ArrayLength; i = i+2) {
- if(rtl819XRadioA_Array[i] == 0xfe){
- mdelay(100);
- continue;
+ if (rtl819XRadioA_Array[i] == 0xfe) {
+ mdelay(100);
+ continue;
}
- rtl8192_phy_SetRFReg(dev, eRFPath, rtl819XRadioA_Array[i], bMask12Bits, rtl819XRadioA_Array[i+1]);
+ rtl8192_phy_SetRFReg(dev, eRFPath,
+ rtl819XRadioA_Array[i],
+ bMask12Bits,
+ rtl819XRadioA_Array[i+1]);
mdelay(1);
}
break;
case RF90_PATH_B:
- for(i = 0;i<RadioB_ArrayLength; i=i+2){
+ for (i = 0; i < RadioB_ArrayLength; i = i+2) {
- if(rtl819XRadioB_Array[i] == 0xfe){
- mdelay(100);
- continue;
+ if (rtl819XRadioB_Array[i] == 0xfe) {
+ mdelay(100);
+ continue;
}
- rtl8192_phy_SetRFReg(dev, eRFPath, rtl819XRadioB_Array[i], bMask12Bits, rtl819XRadioB_Array[i+1]);
+ rtl8192_phy_SetRFReg(dev, eRFPath,
+ rtl819XRadioB_Array[i],
+ bMask12Bits,
+ rtl819XRadioB_Array[i+1]);
mdelay(1);
}
break;
case RF90_PATH_C:
- for(i = 0;i<RadioC_ArrayLength; i=i+2){
+ for (i = 0; i < RadioC_ArrayLength; i = i+2) {
- if(rtl819XRadioC_Array[i] == 0xfe){
- mdelay(100);
- continue;
+ if (rtl819XRadioC_Array[i] == 0xfe) {
+ mdelay(100);
+ continue;
}
- rtl8192_phy_SetRFReg(dev, eRFPath, rtl819XRadioC_Array[i], bMask12Bits, rtl819XRadioC_Array[i+1]);
+ rtl8192_phy_SetRFReg(dev, eRFPath,
+ rtl819XRadioC_Array[i],
+ bMask12Bits,
+ rtl819XRadioC_Array[i+1]);
mdelay(1);
}
break;
case RF90_PATH_D:
- for(i = 0;i<RadioD_ArrayLength; i=i+2){
+ for (i = 0; i < RadioD_ArrayLength; i = i+2) {
- if(rtl819XRadioD_Array[i] == 0xfe){
- mdelay(100);
- continue;
+ if (rtl819XRadioD_Array[i] == 0xfe) {
+ mdelay(100);
+ continue;
}
- rtl8192_phy_SetRFReg(dev, eRFPath, rtl819XRadioD_Array[i], bMask12Bits, rtl819XRadioD_Array[i+1]);
+ rtl8192_phy_SetRFReg(dev, eRFPath,
+ rtl819XRadioD_Array[i],
+ bMask12Bits,
+ rtl819XRadioD_Array[i+1]);
mdelay(1);
}
@@ -1012,22 +1061,22 @@ u8 rtl8192_phy_ConfigRFWithHeaderFile(struct net_device* dev, RF90_RADIO_PATH_E
return ret;
}
+
/******************************************************************************
- *function: This function set Tx Power of the channel
- * input: struct net_device *dev
- * u8 channel
- * output: none
- * return: none
- * Note:
- * ***************************************************************************/
+ * function: This function sets Tx Power of the channel
+ * input: net_device *dev
+ * u8 channel
+ * output: none
+ * return: none
+ * notice:
+ ******************************************************************************/
void rtl8192_SetTxPowerLevel(struct net_device *dev, u8 channel)
{
struct r8192_priv *priv = ieee80211_priv(dev);
u8 powerlevel = priv->TxPowerLevelCCK[channel-1];
u8 powerlevelOFDM24G = priv->TxPowerLevelOFDM24G[channel-1];
- switch(priv->rf_chip)
- {
+ switch (priv->rf_chip) {
case RF_8225:
#ifdef TO_DO_LIST
PHY_SetRF8225CckTxPower(Adapter, powerlevel);
@@ -1043,136 +1092,132 @@ void rtl8192_SetTxPowerLevel(struct net_device *dev, u8 channel)
case RF_8258:
break;
default:
- RT_TRACE(COMP_ERR, "unknown rf chip ID in rtl8192_SetTxPowerLevel()\n");
+ RT_TRACE(COMP_ERR, "unknown rf chip ID in %s()\n", __func__);
break;
}
return;
}
/******************************************************************************
- *function: This function set RF state on or off
- * input: struct net_device *dev
- * RT_RF_POWER_STATE eRFPowerState //Power State to set
- * output: none
- * return: none
- * Note:
- * ***************************************************************************/
-bool rtl8192_SetRFPowerState(struct net_device *dev, RT_RF_POWER_STATE eRFPowerState)
+ * function: This function sets RF state on or off
+ * input: net_device *dev
+ * RT_RF_POWER_STATE eRFPowerState //Power State to set
+ * output: none
+ * return: none
+ * notice:
+ *****************************************************************************/
+bool rtl8192_SetRFPowerState(struct net_device *dev,
+ RT_RF_POWER_STATE eRFPowerState)
{
bool bResult = true;
-// u8 eRFPath;
struct r8192_priv *priv = ieee80211_priv(dev);
- if(eRFPowerState == priv->ieee80211->eRFPowerState)
+ if (eRFPowerState == priv->ieee80211->eRFPowerState)
return false;
- if(priv->SetRFPowerStateInProgress == true)
+ if (priv->SetRFPowerStateInProgress == true)
return false;
priv->SetRFPowerStateInProgress = true;
- switch(priv->rf_chip)
- {
- case RF_8256:
- switch( eRFPowerState )
- {
- case eRfOn:
- //RF-A, RF-B
- //enable RF-Chip A/B
- rtl8192_setBBreg(dev, rFPGA0_XA_RFInterfaceOE, BIT4, 0x1); // 0x860[4]
- //analog to digital on
- rtl8192_setBBreg(dev, rFPGA0_AnalogParameter4, 0x300, 0x3);// 0x88c[9:8]
- //digital to analog on
- rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1, 0x18, 0x3); // 0x880[4:3]
- //rx antenna on
- rtl8192_setBBreg(dev, rOFDM0_TRxPathEnable, 0x3, 0x3);// 0xc04[1:0]
- //rx antenna on
- rtl8192_setBBreg(dev, rOFDM1_TRxPathEnable, 0x3, 0x3);// 0xd04[1:0]
- //analog to digital part2 on
- rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1, 0x60, 0x3); // 0x880[6:5]
+ switch (priv->rf_chip) {
+ case RF_8256:
+ switch (eRFPowerState) {
+ case eRfOn:
+ /* RF-A, RF-B */
+ /* enable RF-Chip A/B - 0x860[4] */
+ rtl8192_setBBreg(dev, rFPGA0_XA_RFInterfaceOE, BIT4,
+ 0x1);
+ /* analog to digital on - 0x88c[9:8] */
+ rtl8192_setBBreg(dev, rFPGA0_AnalogParameter4, 0x300,
+ 0x3);
+ /* digital to analog on - 0x880[4:3] */
+ rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1, 0x18,
+ 0x3);
+ /* rx antenna on - 0xc04[1:0] */
+ rtl8192_setBBreg(dev, rOFDM0_TRxPathEnable, 0x3, 0x3);
+ /* rx antenna on - 0xd04[1:0] */
+ rtl8192_setBBreg(dev, rOFDM1_TRxPathEnable, 0x3, 0x3);
+ /* analog to digital part2 on - 0x880[6:5] */
+ rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1, 0x60,
+ 0x3);
- break;
+ break;
- case eRfSleep:
+ case eRfSleep:
- break;
-
- case eRfOff:
- //RF-A, RF-B
- //disable RF-Chip A/B
- rtl8192_setBBreg(dev, rFPGA0_XA_RFInterfaceOE, BIT4, 0x0); // 0x860[4]
- //analog to digital off, for power save
- rtl8192_setBBreg(dev, rFPGA0_AnalogParameter4, 0xf00, 0x0);// 0x88c[11:8]
- //digital to analog off, for power save
- rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1, 0x18, 0x0); // 0x880[4:3]
- //rx antenna off
- rtl8192_setBBreg(dev, rOFDM0_TRxPathEnable, 0xf, 0x0);// 0xc04[3:0]
- //rx antenna off
- rtl8192_setBBreg(dev, rOFDM1_TRxPathEnable, 0xf, 0x0);// 0xd04[3:0]
- //analog to digital part2 off, for power save
- rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1, 0x60, 0x0); // 0x880[6:5]
+ break;
- break;
+ case eRfOff:
+ /* RF-A, RF-B */
+ /* disable RF-Chip A/B - 0x860[4] */
+ rtl8192_setBBreg(dev, rFPGA0_XA_RFInterfaceOE, BIT4,
+ 0x0);
+ /* analog to digital off, for power save */
+ rtl8192_setBBreg(dev, rFPGA0_AnalogParameter4, 0xf00,
+ 0x0); /* 0x88c[11:8] */
+ /* digital to analog off, for power save - 0x880[4:3] */
+ rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1, 0x18,
+ 0x0);
+ /* rx antenna off - 0xc04[3:0] */
+ rtl8192_setBBreg(dev, rOFDM0_TRxPathEnable, 0xf, 0x0);
+ /* rx antenna off - 0xd04[3:0] */
+ rtl8192_setBBreg(dev, rOFDM1_TRxPathEnable, 0xf, 0x0);
+ /* analog to digital part2 off, for power save */
+ rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1, 0x60,
+ 0x0); /* 0x880[6:5] */
- default:
- bResult = false;
- RT_TRACE(COMP_ERR, "SetRFPowerState819xUsb(): unknow state to set: 0x%X!!!\n", eRFPowerState);
- break;
- }
break;
+
default:
- RT_TRACE(COMP_ERR, "Not support rf_chip(%x)\n", priv->rf_chip);
+ bResult = false;
+ RT_TRACE(COMP_ERR, "%s(): unknown state to set: 0x%X\n",
+ __func__, eRFPowerState);
break;
+ }
+ break;
+ default:
+ RT_TRACE(COMP_ERR, "Not support rf_chip(%x)\n", priv->rf_chip);
+ break;
}
#ifdef TO_DO_LIST
- if(bResult)
- {
- // Update current RF state variable.
+ if (bResult) {
+ /* Update current RF state variable. */
pHalData->eRFPowerState = eRFPowerState;
- switch(pHalData->RFChipID )
- {
- case RF_8256:
- switch(pHalData->eRFPowerState)
- {
- case eRfOff:
- //
- //If Rf off reason is from IPS, Led should blink with no link, by Maddest 071015
- //
- if(pMgntInfo->RfOffReason==RF_CHANGE_BY_IPS )
- {
- Adapter->HalFunc.LedControlHandler(Adapter,LED_CTL_NO_LINK);
- }
- else
- {
- // Turn off LED if RF is not ON.
- Adapter->HalFunc.LedControlHandler(Adapter, LED_CTL_POWER_OFF);
- }
- break;
-
- case eRfOn:
- // Turn on RF we are still linked, which might happen when
- // we quickly turn off and on HW RF. 2006.05.12, by rcnjko.
- if( pMgntInfo->bMediaConnect == TRUE )
- {
- Adapter->HalFunc.LedControlHandler(Adapter, LED_CTL_LINK);
- }
- else
- {
- // Turn off LED if RF is not ON.
- Adapter->HalFunc.LedControlHandler(Adapter, LED_CTL_NO_LINK);
- }
- break;
-
- default:
- // do nothing.
- break;
- }// Switch RF state
+ switch (pHalData->RFChipID) {
+ case RF_8256:
+ switch (pHalData->eRFPowerState) {
+ case eRfOff:
+ /* If Rf off reason is from IPS,
+ LED should blink with no link */
+ if (pMgntInfo->RfOffReason == RF_CHANGE_BY_IPS)
+ Adapter->HalFunc.LedControlHandler(Adapter, LED_CTL_NO_LINK);
+ else
+ /* Turn off LED if RF is not ON. */
+ Adapter->HalFunc.LedControlHandler(Adapter, LED_CTL_POWER_OFF);
+ break;
+
+ case eRfOn:
+ /* Turn on RF we are still linked, which might
+ happen when we quickly turn off and on HW RF.
+ */
+ if (pMgntInfo->bMediaConnect == TRUE)
+ Adapter->HalFunc.LedControlHandler(Adapter, LED_CTL_LINK);
+ else
+ /* Turn off LED if RF is not ON. */
+ Adapter->HalFunc.LedControlHandler(Adapter, LED_CTL_NO_LINK);
break;
- default:
- RT_TRACE(COMP_RF, DBG_LOUD, ("SetRFPowerState8190(): Unknown RF type\n"));
- break;
+ default:
+ break;
}
+ break;
+
+ default:
+ RT_TRACE(COMP_RF, DBG_LOUD, "%s(): Unknown RF type\n",
+ __func__);
+ break;
+ }
}
#endif
@@ -1181,40 +1226,32 @@ bool rtl8192_SetRFPowerState(struct net_device *dev, RT_RF_POWER_STATE eRFPowerS
return bResult;
}
-/****************************************************************************************
- *function: This function set command table variable(struct SwChnlCmd).
- * input: SwChnlCmd* CmdTable //table to be set.
- * u32 CmdTableIdx //variable index in table to be set
- * u32 CmdTableSz //table size.
- * SwChnlCmdID CmdID //command ID to set.
- * u32 Para1
- * u32 Para2
- * u32 msDelay
- * output:
- * return: true if finished, false otherwise
- * Note:
- * ************************************************************************************/
-u8 rtl8192_phy_SetSwChnlCmdArray(
- SwChnlCmd* CmdTable,
- u32 CmdTableIdx,
- u32 CmdTableSz,
- SwChnlCmdID CmdID,
- u32 Para1,
- u32 Para2,
- u32 msDelay
- )
+/******************************************************************************
+ * function: This function sets command table variable (struct SwChnlCmd).
+ * input: SwChnlCmd *CmdTable //table to be set
+ * u32 CmdTableIdx //variable index in table to be set
+ * u32 CmdTableSz //table size
+ * SwChnlCmdID CmdID //command ID to set
+ * u32 Para1
+ * u32 Para2
+ * u32 msDelay
+ * output:
+ * return: true if finished, false otherwise
+ * notice:
+ ******************************************************************************/
+u8 rtl8192_phy_SetSwChnlCmdArray(SwChnlCmd *CmdTable, u32 CmdTableIdx,
+ u32 CmdTableSz, SwChnlCmdID CmdID, u32 Para1,
+ u32 Para2, u32 msDelay)
{
- SwChnlCmd* pCmd;
+ SwChnlCmd *pCmd;
- if(CmdTable == NULL)
- {
- RT_TRACE(COMP_ERR, "phy_SetSwChnlCmdArray(): CmdTable cannot be NULL.\n");
+ if (CmdTable == NULL) {
+ RT_TRACE(COMP_ERR, "%s(): CmdTable cannot be NULL\n", __func__);
return false;
}
- if(CmdTableIdx >= CmdTableSz)
- {
- RT_TRACE(COMP_ERR, "phy_SetSwChnlCmdArray(): Access invalid index, please check size of the table, CmdTableIdx:%d, CmdTableSz:%d\n",
- CmdTableIdx, CmdTableSz);
+ if (CmdTableIdx >= CmdTableSz) {
+ RT_TRACE(COMP_ERR, "%s(): Access invalid index, please check size of the table, CmdTableIdx:%d, CmdTableSz:%d\n",
+ __func__, CmdTableIdx, CmdTableSz);
return false;
}
@@ -1226,455 +1263,442 @@ u8 rtl8192_phy_SetSwChnlCmdArray(
return true;
}
+
/******************************************************************************
- *function: This function set channel step by step
- * input: struct net_device *dev
- * u8 channel
- * u8* stage //3 stages
- * u8* step //
- * u32* delay //whether need to delay
- * output: store new stage, step and delay for next step(combine with function above)
- * return: true if finished, false otherwise
- * Note: Wait for simpler function to replace it //wb
- * ***************************************************************************/
-u8 rtl8192_phy_SwChnlStepByStep(struct net_device *dev, u8 channel, u8* stage, u8* step, u32* delay)
+ * function: This function sets channel step by step
+ * input: net_device *dev
+ * u8 channel
+ * u8 *stage //3 stages
+ * u8 *step
+ * u32 *delay //whether need to delay
+ * output: store new stage, step and delay for next step
+ * (combine with function above)
+ * return: true if finished, false otherwise
+ * notice: Wait for simpler function to replace it
+ *****************************************************************************/
+u8 rtl8192_phy_SwChnlStepByStep(struct net_device *dev, u8 channel, u8 *stage,
+ u8 *step, u32 *delay)
{
struct r8192_priv *priv = ieee80211_priv(dev);
-// PCHANNEL_ACCESS_SETTING pChnlAccessSetting;
- SwChnlCmd PreCommonCmd[MAX_PRECMD_CNT];
- u32 PreCommonCmdCnt;
- SwChnlCmd PostCommonCmd[MAX_POSTCMD_CNT];
- u32 PostCommonCmdCnt;
- SwChnlCmd RfDependCmd[MAX_RFDEPENDCMD_CNT];
- u32 RfDependCmdCnt;
- SwChnlCmd *CurrentCmd = NULL;
- //RF90_RADIO_PATH_E eRFPath;
+ SwChnlCmd PreCommonCmd[MAX_PRECMD_CNT];
+ u32 PreCommonCmdCnt;
+ SwChnlCmd PostCommonCmd[MAX_POSTCMD_CNT];
+ u32 PostCommonCmdCnt;
+ SwChnlCmd RfDependCmd[MAX_RFDEPENDCMD_CNT];
+ u32 RfDependCmdCnt;
+ SwChnlCmd *CurrentCmd = NULL;
u8 eRFPath;
-// u32 RfRetVal;
-// u8 RetryCnt;
-
- RT_TRACE(COMP_CH, "====>%s()====stage:%d, step:%d, channel:%d\n", __FUNCTION__, *stage, *step, channel);
-// RT_ASSERT(IsLegalChannel(Adapter, channel), ("illegal channel: %d\n", channel));
- if (!IsLegalChannel(priv->ieee80211, channel))
- {
- RT_TRACE(COMP_ERR, "=============>set to illegal channel:%d\n", channel);
- return true; //return true to tell upper caller function this channel setting is finished! Or it will in while loop.
+
+ RT_TRACE(COMP_CH, "%s() stage: %d, step: %d, channel: %d\n",
+ __func__, *stage, *step, channel);
+ if (!IsLegalChannel(priv->ieee80211, channel)) {
+ RT_TRACE(COMP_ERR, "set to illegal channel: %d\n", channel);
+ /* return true to tell upper caller function this channel
+ setting is finished! Or it will in while loop. */
+ return true;
}
-//FIXME:need to check whether channel is legal or not here.WB
-
-
- //for(eRFPath = RF90_PATH_A; eRFPath <pHalData->NumTotalRFPath; eRFPath++)
-// for(eRFPath = 0; eRFPath <RF90_PATH_MAX; eRFPath++)
-// {
-// if (!rtl8192_phy_CheckIsLegalRFPath(dev, eRFPath))
-// continue;
- // <1> Fill up pre common command.
- PreCommonCmdCnt = 0;
- rtl8192_phy_SetSwChnlCmdArray(PreCommonCmd, PreCommonCmdCnt++, MAX_PRECMD_CNT,
- CmdID_SetTxPowerLevel, 0, 0, 0);
- rtl8192_phy_SetSwChnlCmdArray(PreCommonCmd, PreCommonCmdCnt++, MAX_PRECMD_CNT,
- CmdID_End, 0, 0, 0);
-
- // <2> Fill up post common command.
- PostCommonCmdCnt = 0;
-
- rtl8192_phy_SetSwChnlCmdArray(PostCommonCmd, PostCommonCmdCnt++, MAX_POSTCMD_CNT,
- CmdID_End, 0, 0, 0);
-
- // <3> Fill up RF dependent command.
- RfDependCmdCnt = 0;
- switch( priv->rf_chip )
- {
- case RF_8225:
- if (!(channel >= 1 && channel <= 14))
- {
- RT_TRACE(COMP_ERR, "illegal channel for Zebra 8225: %d\n", channel);
- return true;
- }
- rtl8192_phy_SetSwChnlCmdArray(RfDependCmd, RfDependCmdCnt++, MAX_RFDEPENDCMD_CNT,
- CmdID_RF_WriteReg, rZebra1_Channel, RF_CHANNEL_TABLE_ZEBRA[channel], 10);
- rtl8192_phy_SetSwChnlCmdArray(RfDependCmd, RfDependCmdCnt++, MAX_RFDEPENDCMD_CNT,
- CmdID_End, 0, 0, 0);
- break;
+ /* FIXME: need to check whether channel is legal or not here */
- case RF_8256:
- // TEST!! This is not the table for 8256!!
- if (!(channel >= 1 && channel <= 14))
- {
- RT_TRACE(COMP_ERR, "illegal channel for Zebra 8256: %d\n", channel);
- return true;
- }
- rtl8192_phy_SetSwChnlCmdArray(RfDependCmd, RfDependCmdCnt++, MAX_RFDEPENDCMD_CNT,
- CmdID_RF_WriteReg, rZebra1_Channel, channel, 10);
- rtl8192_phy_SetSwChnlCmdArray(RfDependCmd, RfDependCmdCnt++, MAX_RFDEPENDCMD_CNT,
- CmdID_End, 0, 0, 0);
- break;
- case RF_8258:
- break;
+ /* <1> Fill up pre common command. */
+ PreCommonCmdCnt = 0;
+ rtl8192_phy_SetSwChnlCmdArray(PreCommonCmd, PreCommonCmdCnt++,
+ MAX_PRECMD_CNT, CmdID_SetTxPowerLevel,
+ 0, 0, 0);
+ rtl8192_phy_SetSwChnlCmdArray(PreCommonCmd, PreCommonCmdCnt++,
+ MAX_PRECMD_CNT, CmdID_End, 0, 0, 0);
- default:
- RT_TRACE(COMP_ERR, "Unknown RFChipID: %d\n", priv->rf_chip);
+ /* <2> Fill up post common command. */
+ PostCommonCmdCnt = 0;
+
+ rtl8192_phy_SetSwChnlCmdArray(PostCommonCmd, PostCommonCmdCnt++,
+ MAX_POSTCMD_CNT, CmdID_End, 0, 0, 0);
+
+ /* <3> Fill up RF dependent command. */
+ RfDependCmdCnt = 0;
+ switch (priv->rf_chip) {
+ case RF_8225:
+ if (!(channel >= 1 && channel <= 14)) {
+ RT_TRACE(COMP_ERR,
+ "illegal channel for Zebra 8225: %d\n",
+ channel);
return true;
- break;
}
+ rtl8192_phy_SetSwChnlCmdArray(RfDependCmd, RfDependCmdCnt++,
+ MAX_RFDEPENDCMD_CNT,
+ CmdID_RF_WriteReg,
+ rZebra1_Channel,
+ RF_CHANNEL_TABLE_ZEBRA[channel],
+ 10);
+ rtl8192_phy_SetSwChnlCmdArray(RfDependCmd, RfDependCmdCnt++,
+ MAX_RFDEPENDCMD_CNT,
+ CmdID_End, 0, 0, 0);
+ break;
+ case RF_8256:
+ /* TEST!! This is not the table for 8256!! */
+ if (!(channel >= 1 && channel <= 14)) {
+ RT_TRACE(COMP_ERR,
+ "illegal channel for Zebra 8256: %d\n",
+ channel);
+ return true;
+ }
+ rtl8192_phy_SetSwChnlCmdArray(RfDependCmd, RfDependCmdCnt++,
+ MAX_RFDEPENDCMD_CNT,
+ CmdID_RF_WriteReg,
+ rZebra1_Channel, channel, 10);
+ rtl8192_phy_SetSwChnlCmdArray(RfDependCmd, RfDependCmdCnt++,
+ MAX_RFDEPENDCMD_CNT,
+ CmdID_End, 0, 0, 0);
+ break;
- do{
- switch(*stage)
- {
- case 0:
- CurrentCmd=&PreCommonCmd[*step];
- break;
- case 1:
- CurrentCmd=&RfDependCmd[*step];
- break;
- case 2:
- CurrentCmd=&PostCommonCmd[*step];
- break;
- }
+ case RF_8258:
+ break;
- if(CurrentCmd->CmdID==CmdID_End)
- {
- if((*stage)==2)
- {
- (*delay)=CurrentCmd->msDelay;
- return true;
- }
- else
- {
- (*stage)++;
- (*step)=0;
- continue;
- }
- }
+ default:
+ RT_TRACE(COMP_ERR, "Unknown RFChipID: %d\n", priv->rf_chip);
+ return true;
+ break;
+ }
- switch(CurrentCmd->CmdID)
- {
- case CmdID_SetTxPowerLevel:
- if(priv->card_8192_version == (u8)VERSION_819xU_A) //xiong: consider it later!
- rtl8192_SetTxPowerLevel(dev,channel);
- break;
- case CmdID_WritePortUlong:
- write_nic_dword(dev, CurrentCmd->Para1, CurrentCmd->Para2);
- break;
- case CmdID_WritePortUshort:
- write_nic_word(dev, CurrentCmd->Para1, (u16)CurrentCmd->Para2);
- break;
- case CmdID_WritePortUchar:
- write_nic_byte(dev, CurrentCmd->Para1, (u8)CurrentCmd->Para2);
- break;
- case CmdID_RF_WriteReg:
- for(eRFPath = 0; eRFPath < RF90_PATH_MAX; eRFPath++)
- {
- rtl8192_phy_SetRFReg(dev, (RF90_RADIO_PATH_E)eRFPath, CurrentCmd->Para1, bZebra1_ChannelNum, CurrentCmd->Para2);
- }
- break;
- default:
- break;
+
+ do {
+ switch (*stage) {
+ case 0:
+ CurrentCmd = &PreCommonCmd[*step];
+ break;
+ case 1:
+ CurrentCmd = &RfDependCmd[*step];
+ break;
+ case 2:
+ CurrentCmd = &PostCommonCmd[*step];
+ break;
+ }
+
+ if (CurrentCmd->CmdID == CmdID_End) {
+ if ((*stage) == 2) {
+ (*delay) = CurrentCmd->msDelay;
+ return true;
+ } else {
+ (*stage)++;
+ (*step) = 0;
+ continue;
}
+ }
+ switch (CurrentCmd->CmdID) {
+ case CmdID_SetTxPowerLevel:
+ if (priv->card_8192_version == (u8)VERSION_819xU_A)
+ /* consider it later! */
+ rtl8192_SetTxPowerLevel(dev, channel);
+ break;
+ case CmdID_WritePortUlong:
+ write_nic_dword(dev, CurrentCmd->Para1,
+ CurrentCmd->Para2);
+ break;
+ case CmdID_WritePortUshort:
+ write_nic_word(dev, CurrentCmd->Para1,
+ (u16)CurrentCmd->Para2);
break;
- }while(true);
-// }/*for(Number of RF paths)*/
+ case CmdID_WritePortUchar:
+ write_nic_byte(dev, CurrentCmd->Para1,
+ (u8)CurrentCmd->Para2);
+ break;
+ case CmdID_RF_WriteReg:
+ for (eRFPath = 0; eRFPath < RF90_PATH_MAX; eRFPath++) {
+ rtl8192_phy_SetRFReg(dev,
+ (RF90_RADIO_PATH_E)eRFPath,
+ CurrentCmd->Para1,
+ bZebra1_ChannelNum,
+ CurrentCmd->Para2);
+ }
+ break;
+ default:
+ break;
+ }
- (*delay)=CurrentCmd->msDelay;
+ break;
+ } while (true);
+
+ (*delay) = CurrentCmd->msDelay;
(*step)++;
return false;
}
/******************************************************************************
- *function: This function does actually set channel work
- * input: struct net_device *dev
- * u8 channel
- * output: none
- * return: noin
- * Note: We should not call this function directly
- * ***************************************************************************/
+ * function: This function does actually set channel work
+ * input: net_device *dev
+ * u8 channel
+ * output: none
+ * return: none
+ * notice: We should not call this function directly
+ *****************************************************************************/
void rtl8192_phy_FinishSwChnlNow(struct net_device *dev, u8 channel)
{
struct r8192_priv *priv = ieee80211_priv(dev);
u32 delay = 0;
- while(!rtl8192_phy_SwChnlStepByStep(dev,channel,&priv->SwChnlStage,&priv->SwChnlStep,&delay))
- {
- // if(delay>0)
- // msleep(delay);//or mdelay? need further consideration
- if(!priv->up)
+ while (!rtl8192_phy_SwChnlStepByStep(dev, channel, &priv->SwChnlStage,
+ &priv->SwChnlStep, &delay)) {
+ if (!priv->up)
break;
}
}
+
/******************************************************************************
- *function: Callback routine of the work item for switch channel.
- * input:
+ * function: Callback routine of the work item for switch channel.
+ * input: net_device *dev
*
- * output: none
- * return: noin
- * ***************************************************************************/
+ * output: none
+ * return: none
+ *****************************************************************************/
void rtl8192_SwChnl_WorkItem(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- RT_TRACE(COMP_CH, "==> SwChnlCallback819xUsbWorkItem(), chan:%d\n", priv->chan);
+ RT_TRACE(COMP_CH, "==> SwChnlCallback819xUsbWorkItem(), chan:%d\n",
+ priv->chan);
- rtl8192_phy_FinishSwChnlNow(dev , priv->chan);
+ rtl8192_phy_FinishSwChnlNow(dev, priv->chan);
RT_TRACE(COMP_CH, "<== SwChnlCallback819xUsbWorkItem()\n");
}
/******************************************************************************
- *function: This function scheduled actual work item to set channel
- * input: net_device dev
- * u8 channel //channel to set
- * output: none
- * return: return code show if workitem is scheduled(1:pass, 0:fail)
- * Note: Delay may be required for RF configuration
- * ***************************************************************************/
-u8 rtl8192_phy_SwChnl(struct net_device* dev, u8 channel)
+ * function: This function scheduled actual work item to set channel
+ * input: net_device *dev
+ * u8 channel //channel to set
+ * output: none
+ * return: return code show if workitem is scheduled (1:pass, 0:fail)
+ * notice: Delay may be required for RF configuration
+ ******************************************************************************/
+u8 rtl8192_phy_SwChnl(struct net_device *dev, u8 channel)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- RT_TRACE(COMP_CH, "=====>%s(), SwChnlInProgress:%d\n", __FUNCTION__, priv->SwChnlInProgress);
- if(!priv->up)
+ RT_TRACE(COMP_CH, "%s(), SwChnlInProgress: %d\n", __func__,
+ priv->SwChnlInProgress);
+ if (!priv->up)
return false;
- if(priv->SwChnlInProgress)
+ if (priv->SwChnlInProgress)
return false;
-// if(pHalData->SetBWModeInProgress)
-// return;
-if (0) //to test current channel from RF reg 0x7.
-{
- u8 eRFPath;
- for(eRFPath = 0; eRFPath < 2; eRFPath++){
- printk("====>set channel:%x\n",rtl8192_phy_QueryRFReg(dev, (RF90_RADIO_PATH_E)eRFPath, 0x7, bZebra1_ChannelNum));
- udelay(10);
- }
-}
- //--------------------------------------------
- switch(priv->ieee80211->mode)
- {
+ /* -------------------------------------------- */
+ switch (priv->ieee80211->mode) {
case WIRELESS_MODE_A:
case WIRELESS_MODE_N_5G:
- if (channel<=14){
- RT_TRACE(COMP_ERR, "WIRELESS_MODE_A but channel<=14");
+ if (channel <= 14) {
+ RT_TRACE(COMP_ERR, "WIRELESS_MODE_A but channel<=14\n");
return false;
}
break;
case WIRELESS_MODE_B:
- if (channel>14){
- RT_TRACE(COMP_ERR, "WIRELESS_MODE_B but channel>14");
+ if (channel > 14) {
+ RT_TRACE(COMP_ERR, "WIRELESS_MODE_B but channel>14\n");
return false;
}
break;
case WIRELESS_MODE_G:
case WIRELESS_MODE_N_24G:
- if (channel>14){
- RT_TRACE(COMP_ERR, "WIRELESS_MODE_G but channel>14");
+ if (channel > 14) {
+ RT_TRACE(COMP_ERR, "WIRELESS_MODE_G but channel>14\n");
return false;
}
break;
}
- //--------------------------------------------
+ /* -------------------------------------------- */
priv->SwChnlInProgress = true;
- if(channel == 0)
+ if (channel == 0)
channel = 1;
- priv->chan=channel;
+ priv->chan = channel;
- priv->SwChnlStage=0;
- priv->SwChnlStep=0;
-// schedule_work(&(priv->SwChnlWorkItem));
-// rtl8192_SwChnl_WorkItem(dev);
- if(priv->up) {
-// queue_work(priv->priv_wq,&(priv->SwChnlWorkItem));
- rtl8192_SwChnl_WorkItem(dev);
- }
+ priv->SwChnlStage = 0;
+ priv->SwChnlStep = 0;
+ if (priv->up)
+ rtl8192_SwChnl_WorkItem(dev);
priv->SwChnlInProgress = false;
return true;
}
-
-//
/******************************************************************************
- *function: Callback routine of the work item for set bandwidth mode.
- * input: struct net_device *dev
- * HT_CHANNEL_WIDTH Bandwidth //20M or 40M
- * HT_EXTCHNL_OFFSET Offset //Upper, Lower, or Don't care
- * output: none
- * return: none
- * Note: I doubt whether SetBWModeInProgress flag is necessary as we can
- * test whether current work in the queue or not.//do I?
- * ***************************************************************************/
+ * function: Callback routine of the work item for set bandwidth mode.
+ * input: net_device *dev
+ * output: none
+ * return: none
+ * notice: I doubt whether SetBWModeInProgress flag is necessary as we can
+ * test whether current work in the queue or not.//do I?
+ *****************************************************************************/
void rtl8192_SetBWModeWorkItem(struct net_device *dev)
{
struct r8192_priv *priv = ieee80211_priv(dev);
u8 regBwOpMode;
- RT_TRACE(COMP_SWBW, "==>rtl8192_SetBWModeWorkItem() Switch to %s bandwidth\n", \
- priv->CurrentChannelBW == HT_CHANNEL_WIDTH_20?"20MHz":"40MHz")
+ RT_TRACE(COMP_SWBW, "%s() Switch to %s bandwidth\n", __func__,
+ priv->CurrentChannelBW == HT_CHANNEL_WIDTH_20?"20MHz":"40MHz");
- if(priv->rf_chip == RF_PSEUDO_11N)
- {
- priv->SetBWModeInProgress= false;
+ if (priv->rf_chip == RF_PSEUDO_11N) {
+ priv->SetBWModeInProgress = false;
return;
}
- //<1>Set MAC register
- regBwOpMode = read_nic_byte(dev, BW_OPMODE);
+ /* <1> Set MAC register */
+ read_nic_byte(dev, BW_OPMODE, &regBwOpMode);
- switch(priv->CurrentChannelBW)
- {
- case HT_CHANNEL_WIDTH_20:
- regBwOpMode |= BW_OPMODE_20MHZ;
- // 2007/02/07 Mark by Emily because we have not verify whether this register works
- write_nic_byte(dev, BW_OPMODE, regBwOpMode);
- break;
+ switch (priv->CurrentChannelBW) {
+ case HT_CHANNEL_WIDTH_20:
+ regBwOpMode |= BW_OPMODE_20MHZ;
+ /* We have not verify whether this register works */
+ write_nic_byte(dev, BW_OPMODE, regBwOpMode);
+ break;
- case HT_CHANNEL_WIDTH_20_40:
- regBwOpMode &= ~BW_OPMODE_20MHZ;
- // 2007/02/07 Mark by Emily because we have not verify whether this register works
- write_nic_byte(dev, BW_OPMODE, regBwOpMode);
- break;
+ case HT_CHANNEL_WIDTH_20_40:
+ regBwOpMode &= ~BW_OPMODE_20MHZ;
+ /* We have not verify whether this register works */
+ write_nic_byte(dev, BW_OPMODE, regBwOpMode);
+ break;
- default:
- RT_TRACE(COMP_ERR, "SetChannelBandwidth819xUsb(): unknown Bandwidth: %#X\n",priv->CurrentChannelBW);
- break;
+ default:
+ RT_TRACE(COMP_ERR,
+ "SetChannelBandwidth819xUsb(): unknown Bandwidth: %#X\n",
+ priv->CurrentChannelBW);
+ break;
}
- //<2>Set PHY related register
- switch(priv->CurrentChannelBW)
- {
- case HT_CHANNEL_WIDTH_20:
- // Add by Vivi 20071119
- rtl8192_setBBreg(dev, rFPGA0_RFMOD, bRFMOD, 0x0);
- rtl8192_setBBreg(dev, rFPGA1_RFMOD, bRFMOD, 0x0);
- rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1, 0x00100000, 1);
-
- // Correct the tx power for CCK rate in 20M. Suggest by YN, 20071207
- priv->cck_present_attentuation =
- priv->cck_present_attentuation_20Mdefault + priv->cck_present_attentuation_difference;
-
- if(priv->cck_present_attentuation > 22)
- priv->cck_present_attentuation= 22;
- if(priv->cck_present_attentuation< 0)
- priv->cck_present_attentuation = 0;
- RT_TRACE(COMP_INIT, "20M, pHalData->CCKPresentAttentuation = %d\n", priv->cck_present_attentuation);
-
- if(priv->chan == 14 && !priv->bcck_in_ch14)
- {
- priv->bcck_in_ch14 = TRUE;
- dm_cck_txpower_adjust(dev,priv->bcck_in_ch14);
- }
- else if(priv->chan != 14 && priv->bcck_in_ch14)
- {
- priv->bcck_in_ch14 = FALSE;
- dm_cck_txpower_adjust(dev,priv->bcck_in_ch14);
- }
- else
- dm_cck_txpower_adjust(dev,priv->bcck_in_ch14);
+ /* <2> Set PHY related register */
+ switch (priv->CurrentChannelBW) {
+ case HT_CHANNEL_WIDTH_20:
+ rtl8192_setBBreg(dev, rFPGA0_RFMOD, bRFMOD, 0x0);
+ rtl8192_setBBreg(dev, rFPGA1_RFMOD, bRFMOD, 0x0);
+ rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1,
+ 0x00100000, 1);
+
+ /* Correct the tx power for CCK rate in 20M. */
+ priv->cck_present_attentuation =
+ priv->cck_present_attentuation_20Mdefault +
+ priv->cck_present_attentuation_difference;
+
+ if (priv->cck_present_attentuation > 22)
+ priv->cck_present_attentuation = 22;
+ if (priv->cck_present_attentuation < 0)
+ priv->cck_present_attentuation = 0;
+ RT_TRACE(COMP_INIT,
+ "20M, pHalData->CCKPresentAttentuation = %d\n",
+ priv->cck_present_attentuation);
+
+ if (priv->chan == 14 && !priv->bcck_in_ch14) {
+ priv->bcck_in_ch14 = TRUE;
+ dm_cck_txpower_adjust(dev, priv->bcck_in_ch14);
+ } else if (priv->chan != 14 && priv->bcck_in_ch14) {
+ priv->bcck_in_ch14 = FALSE;
+ dm_cck_txpower_adjust(dev, priv->bcck_in_ch14);
+ } else {
+ dm_cck_txpower_adjust(dev, priv->bcck_in_ch14);
+ }
- break;
- case HT_CHANNEL_WIDTH_20_40:
- // Add by Vivi 20071119
- rtl8192_setBBreg(dev, rFPGA0_RFMOD, bRFMOD, 0x1);
- rtl8192_setBBreg(dev, rFPGA1_RFMOD, bRFMOD, 0x1);
- rtl8192_setBBreg(dev, rCCK0_System, bCCKSideBand, (priv->nCur40MhzPrimeSC>>1));
- rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1, 0x00100000, 0);
- rtl8192_setBBreg(dev, rOFDM1_LSTF, 0xC00, priv->nCur40MhzPrimeSC);
- priv->cck_present_attentuation =
- priv->cck_present_attentuation_40Mdefault + priv->cck_present_attentuation_difference;
-
- if(priv->cck_present_attentuation > 22)
- priv->cck_present_attentuation = 22;
- if(priv->cck_present_attentuation < 0)
- priv->cck_present_attentuation = 0;
-
- RT_TRACE(COMP_INIT, "40M, pHalData->CCKPresentAttentuation = %d\n", priv->cck_present_attentuation);
- if(priv->chan == 14 && !priv->bcck_in_ch14)
- {
- priv->bcck_in_ch14 = true;
- dm_cck_txpower_adjust(dev,priv->bcck_in_ch14);
- }
- else if(priv->chan!= 14 && priv->bcck_in_ch14)
- {
- priv->bcck_in_ch14 = false;
- dm_cck_txpower_adjust(dev,priv->bcck_in_ch14);
- }
- else
- dm_cck_txpower_adjust(dev,priv->bcck_in_ch14);
+ break;
+ case HT_CHANNEL_WIDTH_20_40:
+ rtl8192_setBBreg(dev, rFPGA0_RFMOD, bRFMOD, 0x1);
+ rtl8192_setBBreg(dev, rFPGA1_RFMOD, bRFMOD, 0x1);
+ rtl8192_setBBreg(dev, rCCK0_System, bCCKSideBand,
+ priv->nCur40MhzPrimeSC>>1);
+ rtl8192_setBBreg(dev, rFPGA0_AnalogParameter1, 0x00100000, 0);
+ rtl8192_setBBreg(dev, rOFDM1_LSTF, 0xC00,
+ priv->nCur40MhzPrimeSC);
+ priv->cck_present_attentuation =
+ priv->cck_present_attentuation_40Mdefault +
+ priv->cck_present_attentuation_difference;
+
+ if (priv->cck_present_attentuation > 22)
+ priv->cck_present_attentuation = 22;
+ if (priv->cck_present_attentuation < 0)
+ priv->cck_present_attentuation = 0;
+
+ RT_TRACE(COMP_INIT,
+ "40M, pHalData->CCKPresentAttentuation = %d\n",
+ priv->cck_present_attentuation);
+ if (priv->chan == 14 && !priv->bcck_in_ch14) {
+ priv->bcck_in_ch14 = true;
+ dm_cck_txpower_adjust(dev, priv->bcck_in_ch14);
+ } else if (priv->chan != 14 && priv->bcck_in_ch14) {
+ priv->bcck_in_ch14 = false;
+ dm_cck_txpower_adjust(dev, priv->bcck_in_ch14);
+ } else {
+ dm_cck_txpower_adjust(dev, priv->bcck_in_ch14);
+ }
- break;
- default:
- RT_TRACE(COMP_ERR, "SetChannelBandwidth819xUsb(): unknown Bandwidth: %#X\n" ,priv->CurrentChannelBW);
- break;
+ break;
+ default:
+ RT_TRACE(COMP_ERR,
+ "SetChannelBandwidth819xUsb(): unknown Bandwidth: %#X\n",
+ priv->CurrentChannelBW);
+ break;
}
- //Skip over setting of J-mode in BB register here. Default value is "None J mode". Emily 20070315
+ /* Skip over setting of J-mode in BB register here.
+ Default value is "None J mode". */
- //<3>Set RF related register
- switch( priv->rf_chip )
- {
- case RF_8225:
+ /* <3> Set RF related register */
+ switch (priv->rf_chip) {
+ case RF_8225:
#ifdef TO_DO_LIST
- PHY_SetRF8225Bandwidth(Adapter, pHalData->CurrentChannelBW);
+ PHY_SetRF8225Bandwidth(Adapter, pHalData->CurrentChannelBW);
#endif
- break;
+ break;
- case RF_8256:
- PHY_SetRF8256Bandwidth(dev, priv->CurrentChannelBW);
- break;
+ case RF_8256:
+ PHY_SetRF8256Bandwidth(dev, priv->CurrentChannelBW);
+ break;
- case RF_8258:
- // PHY_SetRF8258Bandwidth();
- break;
+ case RF_8258:
+ break;
- case RF_PSEUDO_11N:
- // Do Nothing
- break;
+ case RF_PSEUDO_11N:
+ break;
- default:
- RT_TRACE(COMP_ERR, "Unknown RFChipID: %d\n", priv->rf_chip);
- break;
+ default:
+ RT_TRACE(COMP_ERR, "Unknown RFChipID: %d\n", priv->rf_chip);
+ break;
}
- priv->SetBWModeInProgress= false;
+ priv->SetBWModeInProgress = false;
- RT_TRACE(COMP_SWBW, "<==SetBWMode819xUsb(), %d", atomic_read(&(priv->ieee80211->atm_swbw)) );
+ RT_TRACE(COMP_SWBW, "<==SetBWMode819xUsb(), %d\n",
+ atomic_read(&priv->ieee80211->atm_swbw));
}
/******************************************************************************
- *function: This function schedules bandwidth switch work.
- * input: struct net_device *dev
- * HT_CHANNEL_WIDTH Bandwidth //20M or 40M
- * HT_EXTCHNL_OFFSET Offset //Upper, Lower, or Don't care
- * output: none
- * return: none
- * Note: I doubt whether SetBWModeInProgress flag is necessary as we can
- * test whether current work in the queue or not.//do I?
- * ***************************************************************************/
-void rtl8192_SetBWMode(struct net_device *dev, HT_CHANNEL_WIDTH Bandwidth, HT_EXTCHNL_OFFSET Offset)
+ * function: This function schedules bandwidth switch work.
+ * input: struct net_deviceq *dev
+ * HT_CHANNEL_WIDTH bandwidth //20M or 40M
+ * HT_EXTCHNL_OFFSET offset //Upper, Lower, or Don't care
+ * output: none
+ * return: none
+ * notice: I doubt whether SetBWModeInProgress flag is necessary as we can
+ * test whether current work in the queue or not.//do I?
+ *****************************************************************************/
+void rtl8192_SetBWMode(struct net_device *dev, HT_CHANNEL_WIDTH bandwidth,
+ HT_EXTCHNL_OFFSET offset)
{
struct r8192_priv *priv = ieee80211_priv(dev);
- if(priv->SetBWModeInProgress)
+ if (priv->SetBWModeInProgress)
return;
- priv->SetBWModeInProgress= true;
+ priv->SetBWModeInProgress = true;
- priv->CurrentChannelBW = Bandwidth;
+ priv->CurrentChannelBW = bandwidth;
- if(Offset==HT_EXTCHNL_OFFSET_LOWER)
+ if (offset == HT_EXTCHNL_OFFSET_LOWER)
priv->nCur40MhzPrimeSC = HAL_PRIME_CHNL_OFFSET_UPPER;
- else if(Offset==HT_EXTCHNL_OFFSET_UPPER)
+ else if (offset == HT_EXTCHNL_OFFSET_UPPER)
priv->nCur40MhzPrimeSC = HAL_PRIME_CHNL_OFFSET_LOWER;
else
priv->nCur40MhzPrimeSC = HAL_PRIME_CHNL_OFFSET_DONT_CARE;
- //queue_work(priv->priv_wq, &(priv->SetBWModeWorkItem));
- // schedule_work(&(priv->SetBWModeWorkItem));
rtl8192_SetBWModeWorkItem(dev);
}
@@ -1685,88 +1709,110 @@ void InitialGain819xUsb(struct net_device *dev, u8 Operation)
priv->InitialGainOperateType = Operation;
- if(priv->up)
- {
- queue_delayed_work(priv->priv_wq,&priv->initialgain_operate_wq,0);
- }
+ if (priv->up)
+ queue_delayed_work(priv->priv_wq, &priv->initialgain_operate_wq, 0);
}
extern void InitialGainOperateWorkItemCallBack(struct work_struct *work)
{
- struct delayed_work *dwork = container_of(work,struct delayed_work,work);
- struct r8192_priv *priv = container_of(dwork,struct r8192_priv,initialgain_operate_wq);
- struct net_device *dev = priv->ieee80211->dev;
+ struct delayed_work *dwork = container_of(work, struct delayed_work,
+ work);
+ struct r8192_priv *priv = container_of(dwork, struct r8192_priv,
+ initialgain_operate_wq);
+ struct net_device *dev = priv->ieee80211->dev;
#define SCAN_RX_INITIAL_GAIN 0x17
#define POWER_DETECTION_TH 0x08
- u32 BitMask;
+ u32 bitmask;
u8 initial_gain;
u8 Operation;
Operation = priv->InitialGainOperateType;
- switch(Operation)
- {
- case IG_Backup:
- RT_TRACE(COMP_SCAN, "IG_Backup, backup the initial gain.\n");
- initial_gain = SCAN_RX_INITIAL_GAIN;//priv->DefaultInitialGain[0];//
- BitMask = bMaskByte0;
- if(dm_digtable.dig_algorithm == DIG_ALGO_BY_FALSE_ALARM)
- rtl8192_setBBreg(dev, UFWP, bMaskByte1, 0x8); // FW DIG OFF
- priv->initgain_backup.xaagccore1 = (u8)rtl8192_QueryBBReg(dev, rOFDM0_XAAGCCore1, BitMask);
- priv->initgain_backup.xbagccore1 = (u8)rtl8192_QueryBBReg(dev, rOFDM0_XBAGCCore1, BitMask);
- priv->initgain_backup.xcagccore1 = (u8)rtl8192_QueryBBReg(dev, rOFDM0_XCAGCCore1, BitMask);
- priv->initgain_backup.xdagccore1 = (u8)rtl8192_QueryBBReg(dev, rOFDM0_XDAGCCore1, BitMask);
- BitMask = bMaskByte2;
- priv->initgain_backup.cca = (u8)rtl8192_QueryBBReg(dev, rCCK0_CCA, BitMask);
-
- RT_TRACE(COMP_SCAN, "Scan InitialGainBackup 0xc50 is %x\n",priv->initgain_backup.xaagccore1);
- RT_TRACE(COMP_SCAN, "Scan InitialGainBackup 0xc58 is %x\n",priv->initgain_backup.xbagccore1);
- RT_TRACE(COMP_SCAN, "Scan InitialGainBackup 0xc60 is %x\n",priv->initgain_backup.xcagccore1);
- RT_TRACE(COMP_SCAN, "Scan InitialGainBackup 0xc68 is %x\n",priv->initgain_backup.xdagccore1);
- RT_TRACE(COMP_SCAN, "Scan InitialGainBackup 0xa0a is %x\n",priv->initgain_backup.cca);
-
- RT_TRACE(COMP_SCAN, "Write scan initial gain = 0x%x \n", initial_gain);
- write_nic_byte(dev, rOFDM0_XAAGCCore1, initial_gain);
- write_nic_byte(dev, rOFDM0_XBAGCCore1, initial_gain);
- write_nic_byte(dev, rOFDM0_XCAGCCore1, initial_gain);
- write_nic_byte(dev, rOFDM0_XDAGCCore1, initial_gain);
- RT_TRACE(COMP_SCAN, "Write scan 0xa0a = 0x%x \n", POWER_DETECTION_TH);
- write_nic_byte(dev, 0xa0a, POWER_DETECTION_TH);
- break;
- case IG_Restore:
- RT_TRACE(COMP_SCAN, "IG_Restore, restore the initial gain.\n");
- BitMask = 0x7f; //Bit0~ Bit6
- if(dm_digtable.dig_algorithm == DIG_ALGO_BY_FALSE_ALARM)
- rtl8192_setBBreg(dev, UFWP, bMaskByte1, 0x8); // FW DIG OFF
-
- rtl8192_setBBreg(dev, rOFDM0_XAAGCCore1, BitMask, (u32)priv->initgain_backup.xaagccore1);
- rtl8192_setBBreg(dev, rOFDM0_XBAGCCore1, BitMask, (u32)priv->initgain_backup.xbagccore1);
- rtl8192_setBBreg(dev, rOFDM0_XCAGCCore1, BitMask, (u32)priv->initgain_backup.xcagccore1);
- rtl8192_setBBreg(dev, rOFDM0_XDAGCCore1, BitMask, (u32)priv->initgain_backup.xdagccore1);
- BitMask = bMaskByte2;
- rtl8192_setBBreg(dev, rCCK0_CCA, BitMask, (u32)priv->initgain_backup.cca);
-
- RT_TRACE(COMP_SCAN, "Scan BBInitialGainRestore 0xc50 is %x\n",priv->initgain_backup.xaagccore1);
- RT_TRACE(COMP_SCAN, "Scan BBInitialGainRestore 0xc58 is %x\n",priv->initgain_backup.xbagccore1);
- RT_TRACE(COMP_SCAN, "Scan BBInitialGainRestore 0xc60 is %x\n",priv->initgain_backup.xcagccore1);
- RT_TRACE(COMP_SCAN, "Scan BBInitialGainRestore 0xc68 is %x\n",priv->initgain_backup.xdagccore1);
- RT_TRACE(COMP_SCAN, "Scan BBInitialGainRestore 0xa0a is %x\n",priv->initgain_backup.cca);
+ switch (Operation) {
+ case IG_Backup:
+ RT_TRACE(COMP_SCAN, "IG_Backup, backup the initial gain.\n");
+ initial_gain = SCAN_RX_INITIAL_GAIN;
+ bitmask = bMaskByte0;
+ if (dm_digtable.dig_algorithm == DIG_ALGO_BY_FALSE_ALARM)
+ /* FW DIG OFF */
+ rtl8192_setBBreg(dev, UFWP, bMaskByte1, 0x8);
+ priv->initgain_backup.xaagccore1 =
+ (u8)rtl8192_QueryBBReg(dev, rOFDM0_XAAGCCore1, bitmask);
+ priv->initgain_backup.xbagccore1 =
+ (u8)rtl8192_QueryBBReg(dev, rOFDM0_XBAGCCore1, bitmask);
+ priv->initgain_backup.xcagccore1 =
+ (u8)rtl8192_QueryBBReg(dev, rOFDM0_XCAGCCore1, bitmask);
+ priv->initgain_backup.xdagccore1 =
+ (u8)rtl8192_QueryBBReg(dev, rOFDM0_XDAGCCore1, bitmask);
+ bitmask = bMaskByte2;
+ priv->initgain_backup.cca =
+ (u8)rtl8192_QueryBBReg(dev, rCCK0_CCA, bitmask);
+
+ RT_TRACE(COMP_SCAN, "Scan InitialGainBackup 0xc50 is %x\n",
+ priv->initgain_backup.xaagccore1);
+ RT_TRACE(COMP_SCAN, "Scan InitialGainBackup 0xc58 is %x\n",
+ priv->initgain_backup.xbagccore1);
+ RT_TRACE(COMP_SCAN, "Scan InitialGainBackup 0xc60 is %x\n",
+ priv->initgain_backup.xcagccore1);
+ RT_TRACE(COMP_SCAN, "Scan InitialGainBackup 0xc68 is %x\n",
+ priv->initgain_backup.xdagccore1);
+ RT_TRACE(COMP_SCAN, "Scan InitialGainBackup 0xa0a is %x\n",
+ priv->initgain_backup.cca);
+
+ RT_TRACE(COMP_SCAN, "Write scan initial gain = 0x%x \n",
+ initial_gain);
+ write_nic_byte(dev, rOFDM0_XAAGCCore1, initial_gain);
+ write_nic_byte(dev, rOFDM0_XBAGCCore1, initial_gain);
+ write_nic_byte(dev, rOFDM0_XCAGCCore1, initial_gain);
+ write_nic_byte(dev, rOFDM0_XDAGCCore1, initial_gain);
+ RT_TRACE(COMP_SCAN, "Write scan 0xa0a = 0x%x \n",
+ POWER_DETECTION_TH);
+ write_nic_byte(dev, 0xa0a, POWER_DETECTION_TH);
+ break;
+ case IG_Restore:
+ RT_TRACE(COMP_SCAN, "IG_Restore, restore the initial gain.\n");
+ bitmask = 0x7f; /* Bit0 ~ Bit6 */
+ if (dm_digtable.dig_algorithm == DIG_ALGO_BY_FALSE_ALARM)
+ /* FW DIG OFF */
+ rtl8192_setBBreg(dev, UFWP, bMaskByte1, 0x8);
+
+ rtl8192_setBBreg(dev, rOFDM0_XAAGCCore1, bitmask,
+ (u32)priv->initgain_backup.xaagccore1);
+ rtl8192_setBBreg(dev, rOFDM0_XBAGCCore1, bitmask,
+ (u32)priv->initgain_backup.xbagccore1);
+ rtl8192_setBBreg(dev, rOFDM0_XCAGCCore1, bitmask,
+ (u32)priv->initgain_backup.xcagccore1);
+ rtl8192_setBBreg(dev, rOFDM0_XDAGCCore1, bitmask,
+ (u32)priv->initgain_backup.xdagccore1);
+ bitmask = bMaskByte2;
+ rtl8192_setBBreg(dev, rCCK0_CCA, bitmask,
+ (u32)priv->initgain_backup.cca);
+
+ RT_TRACE(COMP_SCAN, "Scan BBInitialGainRestore 0xc50 is %x\n",
+ priv->initgain_backup.xaagccore1);
+ RT_TRACE(COMP_SCAN, "Scan BBInitialGainRestore 0xc58 is %x\n",
+ priv->initgain_backup.xbagccore1);
+ RT_TRACE(COMP_SCAN, "Scan BBInitialGainRestore 0xc60 is %x\n",
+ priv->initgain_backup.xcagccore1);
+ RT_TRACE(COMP_SCAN, "Scan BBInitialGainRestore 0xc68 is %x\n",
+ priv->initgain_backup.xdagccore1);
+ RT_TRACE(COMP_SCAN, "Scan BBInitialGainRestore 0xa0a is %x\n",
+ priv->initgain_backup.cca);
#ifdef RTL8190P
- SetTxPowerLevel8190(Adapter,priv->CurrentChannel);
+ SetTxPowerLevel8190(Adapter, priv->CurrentChannel);
#endif
#ifdef RTL8192E
- SetTxPowerLevel8190(Adapter,priv->CurrentChannel);
+ SetTxPowerLevel8190(Adapter, priv->CurrentChannel);
#endif
-//#ifdef RTL8192U
- rtl8192_phy_setTxPower(dev,priv->ieee80211->current_network.channel);
-//#endif
+ rtl8192_phy_setTxPower(dev, priv->ieee80211->current_network.channel);
- if(dm_digtable.dig_algorithm == DIG_ALGO_BY_FALSE_ALARM)
- rtl8192_setBBreg(dev, UFWP, bMaskByte1, 0x1); // FW DIG ON
- break;
- default:
- RT_TRACE(COMP_SCAN, "Unknown IG Operation. \n");
- break;
+ if (dm_digtable.dig_algorithm == DIG_ALGO_BY_FALSE_ALARM)
+ /* FW DIG ON */
+ rtl8192_setBBreg(dev, UFWP, bMaskByte1, 0x1);
+ break;
+ default:
+ RT_TRACE(COMP_SCAN, "Unknown IG Operation. \n");
+ break;
}
}
diff --git a/drivers/staging/rtl8192u/r819xU_phy.h b/drivers/staging/rtl8192u/r819xU_phy.h
index 3e3bc577e6c3..f3c352a10fe0 100644
--- a/drivers/staging/rtl8192u/r819xU_phy.h
+++ b/drivers/staging/rtl8192u/r819xU_phy.h
@@ -1,12 +1,12 @@
#ifndef _R819XU_PHY_H
#define _R819XU_PHY_H
-/* Channel switch:The size of command tables for switch channel*/
+/* Channel switch: The size of command tables for switch channel */
#define MAX_PRECMD_CNT 16
#define MAX_RFDEPENDCMD_CNT 16
#define MAX_POSTCMD_CNT 16
-typedef enum _SwChnlCmdID{
+typedef enum _SwChnlCmdID {
CmdID_End,
CmdID_SetTxPowerLevel,
CmdID_BBRegWrite10,
@@ -14,16 +14,16 @@ typedef enum _SwChnlCmdID{
CmdID_WritePortUshort,
CmdID_WritePortUchar,
CmdID_RF_WriteReg,
-}SwChnlCmdID;
+} SwChnlCmdID;
-/*--------------------------------Define structure--------------------------------*/
+/* -----------------------Define structure---------------------- */
/* 1. Switch channel related */
-typedef struct _SwChnlCmd{
+typedef struct _SwChnlCmd {
SwChnlCmdID CmdID;
- u32 Para1;
- u32 Para2;
- u32 msDelay;
-}__attribute__ ((packed)) SwChnlCmd;
+ u32 Para1;
+ u32 Para2;
+ u32 msDelay;
+} __attribute__ ((packed)) SwChnlCmd;
extern u32 rtl819XMACPHY_Array_PG[];
extern u32 rtl819XPHY_REG_1T2RArray[];
@@ -33,21 +33,21 @@ extern u32 rtl819XRadioB_Array[];
extern u32 rtl819XRadioC_Array[];
extern u32 rtl819XRadioD_Array[];
-typedef enum _HW90_BLOCK{
+typedef enum _HW90_BLOCK {
HW90_BLOCK_MAC = 0,
HW90_BLOCK_PHY0 = 1,
HW90_BLOCK_PHY1 = 2,
HW90_BLOCK_RF = 3,
- HW90_BLOCK_MAXIMUM = 4, // Never use this
-}HW90_BLOCK_E, *PHW90_BLOCK_E;
+ HW90_BLOCK_MAXIMUM = 4, /* Never use this */
+} HW90_BLOCK_E, *PHW90_BLOCK_E;
-typedef enum _RF90_RADIO_PATH{
- RF90_PATH_A = 0, //Radio Path A
- RF90_PATH_B = 1, //Radio Path B
- RF90_PATH_C = 2, //Radio Path C
- RF90_PATH_D = 3, //Radio Path D
- RF90_PATH_MAX //Max RF number 92 support
-}RF90_RADIO_PATH_E, *PRF90_RADIO_PATH_E;
+typedef enum _RF90_RADIO_PATH {
+ RF90_PATH_A = 0, /* Radio Path A */
+ RF90_PATH_B = 1, /* Radio Path B */
+ RF90_PATH_C = 2, /* Radio Path C */
+ RF90_PATH_D = 3, /* Radio Path D */
+ RF90_PATH_MAX /* Max RF number 92 support */
+} RF90_RADIO_PATH_E, *PRF90_RADIO_PATH_E;
#define bMaskByte0 0xff
#define bMaskByte1 0xff00
@@ -57,33 +57,35 @@ typedef enum _RF90_RADIO_PATH{
#define bMaskLWord 0x0000ffff
#define bMaskDWord 0xffffffff
-//extern u32 rtl8192_CalculateBitShift(u32 dwBitMask);
-extern u8 rtl8192_phy_CheckIsLegalRFPath(struct net_device* dev, u32 eRFPath);
-extern void rtl8192_setBBreg(struct net_device* dev, u32 dwRegAddr, u32 dwBitMask, u32 dwData);
-extern u32 rtl8192_QueryBBReg(struct net_device* dev, u32 dwRegAddr, u32 dwBitMask);
-//extern u32 rtl8192_phy_RFSerialRead(struct net_device* dev, RF90_RADIO_PATH_E eRFPath, u32 Offset);
-//extern void rtl8192_phy_RFSerialWrite(struct net_device* dev, RF90_RADIO_PATH_E eRFPath, u32 Offset, u32 Data);
-extern void rtl8192_phy_SetRFReg(struct net_device* dev, RF90_RADIO_PATH_E eRFPath, u32 RegAddr, u32 BitMask, u32 Data);
-extern u32 rtl8192_phy_QueryRFReg(struct net_device* dev, RF90_RADIO_PATH_E eRFPath, u32 RegAddr, u32 BitMask);
-extern void rtl8192_phy_configmac(struct net_device* dev);
-extern void rtl8192_phyConfigBB(struct net_device* dev, u8 ConfigType);
-//extern void rtl8192_InitBBRFRegDef(struct net_device* dev);
-extern u8 rtl8192_phy_checkBBAndRF(struct net_device* dev, HW90_BLOCK_E CheckBlock, RF90_RADIO_PATH_E eRFPath);
-//extern void rtl8192_BB_Config_ParaFile(struct net_device* dev);
-extern void rtl8192_BBConfig(struct net_device* dev);
-extern void rtl8192_phy_getTxPower(struct net_device* dev);
-extern void rtl8192_phy_setTxPower(struct net_device* dev, u8 channel);
-extern void rtl8192_phy_RFConfig(struct net_device* dev);
-extern void rtl8192_phy_updateInitGain(struct net_device* dev);
-extern u8 rtl8192_phy_ConfigRFWithHeaderFile(struct net_device* dev, RF90_RADIO_PATH_E eRFPath);
+extern u8 rtl8192_phy_CheckIsLegalRFPath(struct net_device *dev, u32 eRFPath);
+extern void rtl8192_setBBreg(struct net_device *dev, u32 reg_addr,
+ u32 bitmask, u32 data);
+extern u32 rtl8192_QueryBBReg(struct net_device *dev, u32 reg_addr,
+ u32 bitmask);
+extern void rtl8192_phy_SetRFReg(struct net_device *dev,
+ RF90_RADIO_PATH_E eRFPath, u32 reg_addr, u32 bitmask, u32 data);
+extern u32 rtl8192_phy_QueryRFReg(struct net_device *dev,
+ RF90_RADIO_PATH_E eRFPath, u32 reg_addr, u32 bitmask);
+extern void rtl8192_phy_configmac(struct net_device *dev);
+extern void rtl8192_phyConfigBB(struct net_device *dev, u8 ConfigType);
+extern u8 rtl8192_phy_checkBBAndRF(struct net_device *dev,
+ HW90_BLOCK_E CheckBlock, RF90_RADIO_PATH_E eRFPath);
+extern void rtl8192_BBConfig(struct net_device *dev);
+extern void rtl8192_phy_getTxPower(struct net_device *dev);
+extern void rtl8192_phy_setTxPower(struct net_device *dev, u8 channel);
+extern void rtl8192_phy_RFConfig(struct net_device *dev);
+extern void rtl8192_phy_updateInitGain(struct net_device *dev);
+extern u8 rtl8192_phy_ConfigRFWithHeaderFile(struct net_device *dev,
+ RF90_RADIO_PATH_E eRFPath);
-extern u8 rtl8192_phy_SwChnl(struct net_device* dev, u8 channel);
-extern void rtl8192_SetBWMode(struct net_device *dev, HT_CHANNEL_WIDTH Bandwidth, HT_EXTCHNL_OFFSET Offset);
+extern u8 rtl8192_phy_SwChnl(struct net_device *dev, u8 channel);
+extern void rtl8192_SetBWMode(struct net_device *dev,
+ HT_CHANNEL_WIDTH bandwidth, HT_EXTCHNL_OFFSET offset);
extern void rtl8192_SwChnl_WorkItem(struct net_device *dev);
void rtl8192_SetBWModeWorkItem(struct net_device *dev);
-extern bool rtl8192_SetRFPowerState(struct net_device *dev, RT_RF_POWER_STATE eRFPowerState);
-//added by amy
-extern void InitialGain819xUsb(struct net_device *dev, u8 Operation);
+extern bool rtl8192_SetRFPowerState(struct net_device *dev,
+ RT_RF_POWER_STATE eRFPowerState);
+extern void InitialGain819xUsb(struct net_device *dev, u8 Operation);
extern void InitialGainOperateWorkItemCallBack(struct work_struct *work);
diff --git a/drivers/staging/rts5139/rts51x_transport.c b/drivers/staging/rts5139/rts51x_transport.c
index 89e4d805a345..c172f4ae7c23 100644
--- a/drivers/staging/rts5139/rts51x_transport.c
+++ b/drivers/staging/rts5139/rts51x_transport.c
@@ -635,12 +635,12 @@ int rts51x_get_epc_status(struct rts51x_chip *chip, u16 *status)
ep = chip->usb->pusb_dev->ep_in[usb_pipeendpoint(pipe)];
/* fill and submit the URB */
- /* We set interval to 1 here, so the polling interval is controlled
- * by our polling thread */
+ /* Set interval to 10 here to match the endpoint descriptor,
+ * the polling interval is controlled by the polling thread */
usb_fill_int_urb(chip->usb->intr_urb, chip->usb->pusb_dev, pipe,
- status, 2, urb_done_completion, &urb_done, 1);
+ status, 2, urb_done_completion, &urb_done, 10);
- result = rts51x_msg_common(chip, chip->usb->intr_urb, 50);
+ result = rts51x_msg_common(chip, chip->usb->intr_urb, 100);
return interpret_urb_result(chip, pipe, 2, result,
chip->usb->intr_urb->actual_length);
diff --git a/drivers/staging/sb105x/sb_pci_mp.c b/drivers/staging/sb105x/sb_pci_mp.c
index cd94f6c27723..23db32f07fd5 100644
--- a/drivers/staging/sb105x/sb_pci_mp.c
+++ b/drivers/staging/sb105x/sb_pci_mp.c
@@ -18,11 +18,11 @@ static struct irq_info irq_lists[NR_IRQS];
static _INLINE_ unsigned int serial_in(struct mp_port *mtpt, int offset);
static _INLINE_ void serial_out(struct mp_port *mtpt, int offset, int value);
static _INLINE_ unsigned int read_option_register(struct mp_port *mtpt, int offset);
-static int sb1054_get_register(struct sb_uart_port * port, int page, int reg);
-static int sb1054_set_register(struct sb_uart_port * port, int page, int reg, int value);
-static void SendATCommand(struct mp_port * mtpt);
-static int set_deep_fifo(struct sb_uart_port * port, int status);
-static int get_deep_fifo(struct sb_uart_port * port);
+static int sb1054_get_register(struct sb_uart_port *port, int page, int reg);
+static int sb1054_set_register(struct sb_uart_port *port, int page, int reg, int value);
+static void SendATCommand(struct mp_port *mtpt);
+static int set_deep_fifo(struct sb_uart_port *port, int status);
+static int get_deep_fifo(struct sb_uart_port *port);
static int get_device_type(int arg);
static int set_auto_rts(struct sb_uart_port *port, int status);
static void mp_stop(struct tty_struct *tty);
@@ -38,7 +38,7 @@ static inline int __mp_put_char(struct sb_uart_port *port, struct circ_buf *circ
static int mp_put_char(struct tty_struct *tty, unsigned char ch);
static void mp_put_chars(struct tty_struct *tty);
-static int mp_write(struct tty_struct *tty, const unsigned char * buf, int count);
+static int mp_write(struct tty_struct *tty, const unsigned char *buf, int count);
static int mp_write_room(struct tty_struct *tty);
static int mp_chars_in_buffer(struct tty_struct *tty);
static void mp_flush_buffer(struct tty_struct *tty);
@@ -102,7 +102,7 @@ static void multi_release_port(struct sb_uart_port *port);
static int multi_request_port(struct sb_uart_port *port);
static void multi_config_port(struct sb_uart_port *port, int flags);
static int multi_verify_port(struct sb_uart_port *port, struct serial_struct *ser);
-static const char * multi_type(struct sb_uart_port *port);
+static const char *multi_type(struct sb_uart_port *port);
static void __init multi_init_ports(void);
static void __init multi_register_ports(struct uart_driver *drv);
static int init_mp_dev(struct pci_dev *pcidev, mppcibrd_t brd);
@@ -173,7 +173,7 @@ static int sb1053a_get_interface(struct mp_port *mtpt, int port_num)
return (interface);
}
-static int sb1054_get_register(struct sb_uart_port * port, int page, int reg)
+static int sb1054_get_register(struct sb_uart_port *port, int page, int reg)
{
int ret = 0;
unsigned int lcr = 0;
@@ -235,7 +235,7 @@ static int sb1054_get_register(struct sb_uart_port * port, int page, int reg)
return ret;
}
-static int sb1054_set_register(struct sb_uart_port * port, int page, int reg, int value)
+static int sb1054_set_register(struct sb_uart_port *port, int page, int reg, int value)
{
int lcr = 0;
int mcr = 0;
@@ -332,7 +332,7 @@ static int set_multidrop_addr(struct sb_uart_port *port, unsigned int addr)
return 0;
}
-static void SendATCommand(struct mp_port * mtpt)
+static void SendATCommand(struct mp_port *mtpt)
{
// a t cr lf
unsigned char ch[] = {0x61,0x74,0x0d,0x0a,0x0};
@@ -360,7 +360,7 @@ static void SendATCommand(struct mp_port * mtpt)
}// end of SendATCommand()
-static int set_deep_fifo(struct sb_uart_port * port, int status)
+static int set_deep_fifo(struct sb_uart_port *port, int status)
{
int afr_status = 0;
afr_status = sb1054_get_register(port, PAGE_4, SB105X_AFR);
@@ -416,7 +416,7 @@ static int get_device_type(int arg)
}
}
-static int get_deep_fifo(struct sb_uart_port * port)
+static int get_deep_fifo(struct sb_uart_port *port)
{
int afr_status = 0;
afr_status = sb1054_get_register(port, PAGE_4, SB105X_AFR);
@@ -638,7 +638,7 @@ static void mp_put_chars(struct tty_struct *tty)
mp_start(tty);
}
-static int mp_write(struct tty_struct *tty, const unsigned char * buf, int count)
+static int mp_write(struct tty_struct *tty, const unsigned char *buf, int count)
{
struct sb_uart_state *state = tty->driver_data;
struct sb_uart_port *port;
@@ -2754,7 +2754,7 @@ static int multi_verify_port(struct sb_uart_port *port, struct serial_struct *se
return 0;
}
-static const char * multi_type(struct sb_uart_port *port)
+static const char *multi_type(struct sb_uart_port *port)
{
int type = port->type;
@@ -2800,7 +2800,7 @@ static void __init multi_init_ports(void)
int i,j,k;
unsigned char osc;
unsigned char b_ret = 0;
- static struct mp_device_t * sbdev;
+ static struct mp_device_t *sbdev;
if (!first)
return;
@@ -2918,10 +2918,10 @@ static int pci_remap_base(struct pci_dev *pcidev, unsigned int offset,
static int init_mp_dev(struct pci_dev *pcidev, mppcibrd_t brd)
{
- static struct mp_device_t * sbdev = mp_devs;
+ static struct mp_device_t *sbdev = mp_devs;
unsigned long addr = 0;
int j;
- struct resource * ret = NULL;
+ struct resource *ret = NULL;
sbdev->device_id = brd.device_id;
pci_read_config_byte(pcidev, PCI_CLASS_REVISION, &(sbdev->revision));
diff --git a/drivers/staging/sb105x/sb_pci_mp.h b/drivers/staging/sb105x/sb_pci_mp.h
index a15f470a1728..11d92992e925 100644
--- a/drivers/staging/sb105x/sb_pci_mp.h
+++ b/drivers/staging/sb105x/sb_pci_mp.h
@@ -19,7 +19,6 @@
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/irq.h>
-#include <asm/segment.h>
#include <asm/serial.h>
#include <linux/interrupt.h>
diff --git a/drivers/staging/serqt_usb2/serqt_usb2.c b/drivers/staging/serqt_usb2/serqt_usb2.c
index 880f5c0011f2..73fc3cc19e33 100644
--- a/drivers/staging/serqt_usb2/serqt_usb2.c
+++ b/drivers/staging/serqt_usb2/serqt_usb2.c
@@ -155,10 +155,10 @@ struct quatech_port {
struct urb *read_urb; /* read URB for this port */
struct urb *int_urb;
- __u8 shadowLCR; /* last LCR value received */
- __u8 shadowMCR; /* last MCR value received */
- __u8 shadowMSR; /* last MSR value received */
- __u8 shadowLSR; /* last LSR value received */
+ __u8 shadow_lcr; /* last LCR value received */
+ __u8 shadow_mcr; /* last MCR value received */
+ __u8 shadow_msr; /* last MSR value received */
+ __u8 shadow_lsr; /* last LSR value received */
char open_ports;
/* Used for TIOCMIWAIT */
@@ -170,12 +170,12 @@ struct quatech_port {
struct async_icount icount;
struct usb_serial_port *port; /* owner of this object */
- struct qt_get_device_data DeviceData;
+ struct qt_get_device_data device_data;
struct mutex lock;
bool read_urb_busy;
- int RxHolding;
- int ReadBulkStopped;
- char closePending;
+ int rx_holding;
+ int read_bulk_stopped;
+ char close_pending;
};
static int port_paranoia_check(struct usb_serial_port *port,
@@ -238,24 +238,24 @@ static struct usb_serial *get_usb_serial(struct usb_serial_port *port,
return port->serial;
}
-static void ProcessLineStatus(struct quatech_port *qt_port,
+static void process_line_status(struct quatech_port *qt_port,
unsigned char line_status)
{
- qt_port->shadowLSR =
+ qt_port->shadow_lsr =
line_status & (SERIAL_LSR_OE | SERIAL_LSR_PE | SERIAL_LSR_FE |
SERIAL_LSR_BI);
}
-static void ProcessModemStatus(struct quatech_port *qt_port,
+static void process_modem_status(struct quatech_port *qt_port,
unsigned char modem_status)
{
- qt_port->shadowMSR = modem_status;
+ qt_port->shadow_msr = modem_status;
wake_up_interruptible(&qt_port->wait);
}
-static void ProcessRxChar(struct usb_serial_port *port, unsigned char data)
+static void process_rx_char(struct usb_serial_port *port, unsigned char data)
{
struct urb *urb = port->read_urb;
if (urb->actual_length)
@@ -291,35 +291,35 @@ static void qt_status_change_check(struct urb *urb,
{
int flag, i;
unsigned char *data = urb->transfer_buffer;
- unsigned int RxCount = urb->actual_length;
+ unsigned int rx_count = urb->actual_length;
- for (i = 0; i < RxCount; ++i) {
+ for (i = 0; i < rx_count; ++i) {
/* Look ahead code here */
- if ((i <= (RxCount - 3)) && (data[i] == 0x1b)
+ if ((i <= (rx_count - 3)) && (data[i] == 0x1b)
&& (data[i + 1] == 0x1b)) {
flag = 0;
switch (data[i + 2]) {
case 0x00:
- if (i > (RxCount - 4)) {
+ if (i > (rx_count - 4)) {
dev_dbg(&port->dev,
"Illegal escape seuences in received data\n");
break;
}
- ProcessLineStatus(qt_port, data[i + 3]);
+ process_line_status(qt_port, data[i + 3]);
i += 3;
flag = 1;
break;
case 0x01:
- if (i > (RxCount - 4)) {
+ if (i > (rx_count - 4)) {
dev_dbg(&port->dev,
"Illegal escape seuences in received data\n");
break;
}
- ProcessModemStatus(qt_port, data[i + 3]);
+ process_modem_status(qt_port, data[i + 3]);
i += 3;
flag = 1;
@@ -328,8 +328,8 @@ static void qt_status_change_check(struct urb *urb,
case 0xff:
dev_dbg(&port->dev, "No status sequence.\n");
- ProcessRxChar(port, data[i]);
- ProcessRxChar(port, data[i + 1]);
+ process_rx_char(port, data[i]);
+ process_rx_char(port, data[i + 1]);
i += 2;
break;
@@ -354,7 +354,7 @@ static void qt_read_bulk_callback(struct urb *urb)
int result;
if (urb->status) {
- qt_port->ReadBulkStopped = 1;
+ qt_port->read_bulk_stopped = 1;
dev_dbg(&urb->dev->dev,
"%s - nonzero write bulk status received: %d\n",
__func__, urb->status);
@@ -362,36 +362,36 @@ static void qt_read_bulk_callback(struct urb *urb)
}
dev_dbg(&port->dev,
- "%s - port->RxHolding = %d\n", __func__, qt_port->RxHolding);
+ "%s - port->rx_holding = %d\n", __func__, qt_port->rx_holding);
if (port_paranoia_check(port, __func__) != 0) {
- qt_port->ReadBulkStopped = 1;
+ qt_port->read_bulk_stopped = 1;
return;
}
if (!serial)
return;
- if (qt_port->closePending == 1) {
+ if (qt_port->close_pending == 1) {
/* Were closing , stop reading */
dev_dbg(&port->dev,
- "%s - (qt_port->closepending == 1\n", __func__);
- qt_port->ReadBulkStopped = 1;
+ "%s - (qt_port->close_pending == 1\n", __func__);
+ qt_port->read_bulk_stopped = 1;
return;
}
/*
- * RxHolding is asserted by throttle, if we assert it, we're not
+ * rx_holding is asserted by throttle, if we assert it, we're not
* receiving any more characters and let the box handle the flow
* control
*/
- if (qt_port->RxHolding == 1) {
- qt_port->ReadBulkStopped = 1;
+ if (qt_port->rx_holding == 1) {
+ qt_port->read_bulk_stopped = 1;
return;
}
if (urb->status) {
- qt_port->ReadBulkStopped = 1;
+ qt_port->read_bulk_stopped = 1;
dev_dbg(&port->dev,
"%s - nonzero read bulk status received: %d\n",
@@ -455,10 +455,10 @@ static int qt_get_device(struct usb_serial *serial,
}
/****************************************************************************
- * BoxSetPrebufferLevel
+ * box_set_prebuffer_level
TELLS BOX WHEN TO ASSERT FLOW CONTROL
****************************************************************************/
-static int BoxSetPrebufferLevel(struct usb_serial *serial)
+static int box_set_prebuffer_level(struct usb_serial *serial)
{
int result;
__u16 buffer_length;
@@ -471,10 +471,10 @@ static int BoxSetPrebufferLevel(struct usb_serial *serial)
}
/****************************************************************************
- * BoxSetATC
+ * box_set_atc
TELLS BOX WHEN TO ASSERT automatic transmitter control
****************************************************************************/
-static int BoxSetATC(struct usb_serial *serial, __u16 n_Mode)
+static int box_set_atc(struct usb_serial *serial, __u16 n_mode)
{
int result;
__u16 buffer_length;
@@ -483,7 +483,7 @@ static int BoxSetATC(struct usb_serial *serial, __u16 n_Mode)
result =
usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
- QT_SET_ATF, 0x40, n_Mode, 0, NULL, 0, 300);
+ QT_SET_ATF, 0x40, n_mode, 0, NULL, 0, 300);
return result;
}
@@ -499,42 +499,42 @@ static int qt_set_device(struct usb_serial *serial,
{
int result;
__u16 length;
- __u16 PortSettings;
+ __u16 port_settings;
- PortSettings = ((__u16) (device_data->portb));
- PortSettings = (PortSettings << 8);
- PortSettings += ((__u16) (device_data->porta));
+ port_settings = ((__u16) (device_data->portb));
+ port_settings = (port_settings << 8);
+ port_settings += ((__u16) (device_data->porta));
length = sizeof(struct qt_get_device_data);
result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
- QT_SET_GET_DEVICE, 0x40, PortSettings,
+ QT_SET_GET_DEVICE, 0x40, port_settings,
0, NULL, 0, 300);
return result;
}
-static int qt_open_channel(struct usb_serial *serial, __u16 Uart_Number,
- struct qt_open_channel_data *pDeviceData)
+static int qt_open_channel(struct usb_serial *serial, __u16 uart_num,
+ struct qt_open_channel_data *pdevice_data)
{
int result;
result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0),
QT_OPEN_CLOSE_CHANNEL,
- USBD_TRANSFER_DIRECTION_IN, 1, Uart_Number,
- pDeviceData,
+ USBD_TRANSFER_DIRECTION_IN, 1, uart_num,
+ pdevice_data,
sizeof(struct qt_open_channel_data), 300);
return result;
}
-static int qt_close_channel(struct usb_serial *serial, __u16 Uart_Number)
+static int qt_close_channel(struct usb_serial *serial, __u16 uart_num)
{
int result;
result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0),
QT_OPEN_CLOSE_CHANNEL,
- USBD_TRANSFER_DIRECTION_OUT, 0, Uart_Number,
+ USBD_TRANSFER_DIRECTION_OUT, 0, uart_num,
NULL, 0, 300);
return result;
@@ -542,12 +542,12 @@ static int qt_close_channel(struct usb_serial *serial, __u16 Uart_Number)
}
/****************************************************************************
-* BoxGetRegister
+* box_get_register
* issuse a GET_REGISTER vendor-spcific request on the default control pipe
-* If successful, fills in the pValue with the register value asked for
+* If successful, fills in the p_value with the register value asked for
****************************************************************************/
-static int BoxGetRegister(struct usb_serial *serial, unsigned short Uart_Number,
- unsigned short Register_Num, __u8 *pValue)
+static int box_get_register(struct usb_serial *serial, unsigned short uart_num,
+ unsigned short register_num, __u8 *p_value)
{
int result;
__u16 current_length;
@@ -556,36 +556,36 @@ static int BoxGetRegister(struct usb_serial *serial, unsigned short Uart_Number,
result =
usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0),
- QT_GET_SET_REGISTER, 0xC0, Register_Num,
- Uart_Number, (void *)pValue, sizeof(*pValue), 300);
+ QT_GET_SET_REGISTER, 0xC0, register_num,
+ uart_num, (void *)p_value, sizeof(*p_value), 300);
return result;
}
/****************************************************************************
-* BoxSetRegister
+* box_set_register
* issuse a GET_REGISTER vendor-spcific request on the default control pipe
-* If successful, fills in the pValue with the register value asked for
+* If successful, fills in the p_value with the register value asked for
****************************************************************************/
-static int BoxSetRegister(struct usb_serial *serial, unsigned short Uart_Number,
- unsigned short Register_Num, unsigned short Value)
+static int box_set_register(struct usb_serial *serial, unsigned short uart_num,
+ unsigned short register_num, unsigned short value)
{
int result;
- unsigned short RegAndByte;
+ unsigned short reg_and_byte;
- RegAndByte = Value;
- RegAndByte = RegAndByte << 8;
- RegAndByte = RegAndByte + Register_Num;
+ reg_and_byte = value;
+ reg_and_byte = reg_and_byte << 8;
+ reg_and_byte = reg_and_byte + register_num;
/*
result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
- QT_GET_SET_REGISTER, 0xC0, Register_Num,
- Uart_Number, NULL, 0, 300);
+ QT_GET_SET_REGISTER, 0xC0, register_num,
+ uart_num, NULL, 0, 300);
*/
result =
usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
- QT_GET_SET_REGISTER, 0x40, RegAndByte, Uart_Number,
+ QT_GET_SET_REGISTER, 0x40, reg_and_byte, uart_num,
NULL, 0, 300);
return result;
@@ -596,30 +596,30 @@ static int BoxSetRegister(struct usb_serial *serial, unsigned short Uart_Number,
* issues a SET_UART vendor-specific request on the default control pipe
* If successful sets baud rate divisor and LCR value
*/
-static int qt_setuart(struct usb_serial *serial, unsigned short Uart_Number,
- unsigned short default_divisor, unsigned char default_LCR)
+static int qt_setuart(struct usb_serial *serial, unsigned short uart_num,
+ unsigned short default_divisor, unsigned char default_lcr)
{
int result;
- unsigned short UartNumandLCR;
+ unsigned short uart_num_and_lcr;
- UartNumandLCR = (default_LCR << 8) + Uart_Number;
+ uart_num_and_lcr = (default_lcr << 8) + uart_num;
result =
usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
QT_GET_SET_UART, 0x40, default_divisor,
- UartNumandLCR, NULL, 0, 300);
+ uart_num_and_lcr, NULL, 0, 300);
return result;
}
-static int BoxSetHW_FlowCtrl(struct usb_serial *serial, unsigned int index,
- int bSet)
+static int box_set_hw_flow_ctrl(struct usb_serial *serial, unsigned int index,
+ int b_set)
{
__u8 mcr = 0;
- __u8 msr = 0, MOUT_Value = 0;
+ __u8 msr = 0, mout_value = 0;
unsigned int status;
- if (bSet == 1) {
+ if (b_set == 1) {
/* flow control, box will clear RTS line to prevent remote */
mcr = SERIAL_MCR_RTS;
} /* device from xmitting more chars */
@@ -628,9 +628,9 @@ static int BoxSetHW_FlowCtrl(struct usb_serial *serial, unsigned int index,
mcr = 0;
}
- MOUT_Value = mcr << 8;
+ mout_value = mcr << 8;
- if (bSet == 1) {
+ if (b_set == 1) {
/* flow control, box will inhibit xmit data if CTS line is
* asserted */
msr = SERIAL_MSR_CTS;
@@ -638,34 +638,34 @@ static int BoxSetHW_FlowCtrl(struct usb_serial *serial, unsigned int index,
/* Box will not inhimbe xmit data due to CTS line */
msr = 0;
}
- MOUT_Value |= msr;
+ mout_value |= msr;
status =
usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
- QT_HW_FLOW_CONTROL_MASK, 0x40, MOUT_Value,
+ QT_HW_FLOW_CONTROL_MASK, 0x40, mout_value,
index, NULL, 0, 300);
return status;
}
-static int BoxSetSW_FlowCtrl(struct usb_serial *serial, __u16 index,
+static int box_set_sw_flow_ctrl(struct usb_serial *serial, __u16 index,
unsigned char stop_char, unsigned char start_char)
{
- __u16 nSWflowout;
+ __u16 n_sw_flow_out;
int result;
- nSWflowout = start_char << 8;
- nSWflowout = (unsigned short)stop_char;
+ n_sw_flow_out = start_char << 8;
+ n_sw_flow_out = (unsigned short)stop_char;
result =
usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
- QT_SW_FLOW_CONTROL_MASK, 0x40, nSWflowout,
+ QT_SW_FLOW_CONTROL_MASK, 0x40, n_sw_flow_out,
index, NULL, 0, 300);
return result;
}
-static int BoxDisable_SW_FlowCtrl(struct usb_serial *serial, __u16 index)
+static int box_disable_sw_flow_ctrl(struct usb_serial *serial, __u16 index)
{
int result;
@@ -682,7 +682,7 @@ static int qt_startup(struct usb_serial *serial)
struct device *dev = &serial->dev->dev;
struct usb_serial_port *port;
struct quatech_port *qt_port;
- struct qt_get_device_data DeviceData;
+ struct qt_get_device_data device_data;
int i;
int status;
@@ -704,22 +704,22 @@ static int qt_startup(struct usb_serial *serial)
}
- status = qt_get_device(serial, &DeviceData);
+ status = qt_get_device(serial, &device_data);
if (status < 0)
goto startup_error;
- dev_dbg(dev, "DeviceData.portb = 0x%x\n", DeviceData.portb);
+ dev_dbg(dev, "device_data.portb = 0x%x\n", device_data.portb);
- DeviceData.portb &= ~FULLPWRBIT;
- dev_dbg(dev, "Changing DeviceData.portb to 0x%x\n", DeviceData.portb);
+ device_data.portb &= ~FULLPWRBIT;
+ dev_dbg(dev, "Changing device_data.portb to 0x%x\n", device_data.portb);
- status = qt_set_device(serial, &DeviceData);
+ status = qt_set_device(serial, &device_data);
if (status < 0) {
dev_dbg(dev, "qt_set_device failed\n");
goto startup_error;
}
- status = qt_get_device(serial, &DeviceData);
+ status = qt_get_device(serial, &device_data);
if (status < 0) {
dev_dbg(dev, "qt_get_device failed\n");
goto startup_error;
@@ -734,10 +734,10 @@ static int qt_startup(struct usb_serial *serial)
case QUATECH_HSU100B:
case QUATECH_HSU100C:
case QUATECH_HSU100D:
- DeviceData.porta &= ~(RR_BITS | DUPMODE_BITS);
- DeviceData.porta |= CLKS_X4;
- DeviceData.portb &= ~(LOOPMODE_BITS);
- DeviceData.portb |= RS232_MODE;
+ device_data.porta &= ~(RR_BITS | DUPMODE_BITS);
+ device_data.porta |= CLKS_X4;
+ device_data.portb &= ~(LOOPMODE_BITS);
+ device_data.portb |= RS232_MODE;
break;
case QUATECH_SSU200:
@@ -749,38 +749,38 @@ static int qt_startup(struct usb_serial *serial)
case QUATECH_HSU200B:
case QUATECH_HSU200C:
case QUATECH_HSU200D:
- DeviceData.porta &= ~(RR_BITS | DUPMODE_BITS);
- DeviceData.porta |= CLKS_X4;
- DeviceData.portb &= ~(LOOPMODE_BITS);
- DeviceData.portb |= ALL_LOOPBACK;
+ device_data.porta &= ~(RR_BITS | DUPMODE_BITS);
+ device_data.porta |= CLKS_X4;
+ device_data.portb &= ~(LOOPMODE_BITS);
+ device_data.portb |= ALL_LOOPBACK;
break;
default:
- DeviceData.porta &= ~(RR_BITS | DUPMODE_BITS);
- DeviceData.porta |= CLKS_X4;
- DeviceData.portb &= ~(LOOPMODE_BITS);
- DeviceData.portb |= RS232_MODE;
+ device_data.porta &= ~(RR_BITS | DUPMODE_BITS);
+ device_data.porta |= CLKS_X4;
+ device_data.portb &= ~(LOOPMODE_BITS);
+ device_data.portb |= RS232_MODE;
break;
}
- status = BoxSetPrebufferLevel(serial); /* sets to default value */
+ status = box_set_prebuffer_level(serial); /* sets to default value */
if (status < 0) {
- dev_dbg(dev, "BoxSetPrebufferLevel failed\n");
+ dev_dbg(dev, "box_set_prebuffer_level failed\n");
goto startup_error;
}
- status = BoxSetATC(serial, ATC_DISABLED);
+ status = box_set_atc(serial, ATC_DISABLED);
if (status < 0) {
- dev_dbg(dev, "BoxSetATC failed\n");
+ dev_dbg(dev, "box_set_atc failed\n");
goto startup_error;
}
- dev_dbg(dev, "DeviceData.portb = 0x%x\n", DeviceData.portb);
+ dev_dbg(dev, "device_data.portb = 0x%x\n", device_data.portb);
- DeviceData.portb |= NEXT_BOARD_POWER_BIT;
- dev_dbg(dev, "Changing DeviceData.portb to 0x%x\n", DeviceData.portb);
+ device_data.portb |= NEXT_BOARD_POWER_BIT;
+ dev_dbg(dev, "Changing device_data.portb to 0x%x\n", device_data.portb);
- status = qt_set_device(serial, &DeviceData);
+ status = qt_set_device(serial, &device_data);
if (status < 0) {
dev_dbg(dev, "qt_set_device failed\n");
goto startup_error;
@@ -848,7 +848,7 @@ static int qt_open(struct tty_struct *tty,
struct usb_serial *serial;
struct quatech_port *quatech_port;
struct quatech_port *port0;
- struct qt_open_channel_data ChannelData;
+ struct qt_open_channel_data channel_data;
int result;
@@ -870,10 +870,10 @@ static int qt_open(struct tty_struct *tty,
usb_clear_halt(serial->dev, port->read_urb->pipe);
port0->open_ports++;
- result = qt_get_device(serial, &port0->DeviceData);
+ result = qt_get_device(serial, &port0->device_data);
/* Port specific setups */
- result = qt_open_channel(serial, port->port_number, &ChannelData);
+ result = qt_open_channel(serial, port->port_number, &channel_data);
if (result < 0) {
dev_dbg(&port->dev, "qt_open_channel failed\n");
return result;
@@ -881,10 +881,10 @@ static int qt_open(struct tty_struct *tty,
dev_dbg(&port->dev, "qt_open_channel completed.\n");
/* FIXME: are these needed? Does it even do anything useful? */
- quatech_port->shadowLSR = ChannelData.line_status &
+ quatech_port->shadow_lsr = channel_data.line_status &
(SERIAL_LSR_OE | SERIAL_LSR_PE | SERIAL_LSR_FE | SERIAL_LSR_BI);
- quatech_port->shadowMSR = ChannelData.modem_status &
+ quatech_port->shadow_msr = channel_data.modem_status &
(SERIAL_MSR_CTS | SERIAL_MSR_DSR | SERIAL_MSR_RI | SERIAL_MSR_CD);
/* Set Baud rate to default and turn off (default)flow control here */
@@ -1173,7 +1173,7 @@ static void qt_set_termios(struct tty_struct *tty,
struct ktermios *old_termios)
{
struct ktermios *termios = &tty->termios;
- unsigned char new_LCR = 0;
+ unsigned char new_lcr = 0;
unsigned int cflag = termios->c_cflag;
unsigned int index;
int baud, divisor, remainder;
@@ -1183,33 +1183,33 @@ static void qt_set_termios(struct tty_struct *tty,
switch (cflag & CSIZE) {
case CS5:
- new_LCR |= SERIAL_5_DATA;
+ new_lcr |= SERIAL_5_DATA;
break;
case CS6:
- new_LCR |= SERIAL_6_DATA;
+ new_lcr |= SERIAL_6_DATA;
break;
case CS7:
- new_LCR |= SERIAL_7_DATA;
+ new_lcr |= SERIAL_7_DATA;
break;
default:
termios->c_cflag &= ~CSIZE;
termios->c_cflag |= CS8;
case CS8:
- new_LCR |= SERIAL_8_DATA;
+ new_lcr |= SERIAL_8_DATA;
break;
}
/* Parity stuff */
if (cflag & PARENB) {
if (cflag & PARODD)
- new_LCR |= SERIAL_ODD_PARITY;
+ new_lcr |= SERIAL_ODD_PARITY;
else
- new_LCR |= SERIAL_EVEN_PARITY;
+ new_lcr |= SERIAL_EVEN_PARITY;
}
if (cflag & CSTOPB)
- new_LCR |= SERIAL_TWO_STOPB;
+ new_lcr |= SERIAL_TWO_STOPB;
else
- new_LCR |= SERIAL_ONE_STOPB;
+ new_lcr |= SERIAL_ONE_STOPB;
dev_dbg(&port->dev, "%s - 4\n", __func__);
@@ -1231,7 +1231,7 @@ static void qt_set_termios(struct tty_struct *tty,
* Set Baud rate to default and turn off (default)flow control here
*/
status =
- qt_setuart(port->serial, index, (unsigned short)divisor, new_LCR);
+ qt_setuart(port->serial, index, (unsigned short)divisor, new_lcr);
if (status < 0) {
dev_dbg(&port->dev, "qt_setuart failed\n");
return;
@@ -1242,10 +1242,10 @@ static void qt_set_termios(struct tty_struct *tty,
dev_dbg(&port->dev, "%s - Enabling HW flow control\n", __func__);
/* Enable RTS/CTS flow control */
- status = BoxSetHW_FlowCtrl(port->serial, index, 1);
+ status = box_set_hw_flow_ctrl(port->serial, index, 1);
if (status < 0) {
- dev_dbg(&port->dev, "BoxSetHW_FlowCtrl failed\n");
+ dev_dbg(&port->dev, "box_set_hw_flow_ctrl failed\n");
return;
}
} else {
@@ -1253,9 +1253,9 @@ static void qt_set_termios(struct tty_struct *tty,
dev_dbg(&port->dev,
"%s - disabling HW flow control\n", __func__);
- status = BoxSetHW_FlowCtrl(port->serial, index, 0);
+ status = box_set_hw_flow_ctrl(port->serial, index, 0);
if (status < 0) {
- dev_dbg(&port->dev, "BoxSetHW_FlowCtrl failed\n");
+ dev_dbg(&port->dev, "box_set_hw_flow_ctrl failed\n");
return;
}
@@ -1267,18 +1267,18 @@ static void qt_set_termios(struct tty_struct *tty,
unsigned char stop_char = STOP_CHAR(tty);
unsigned char start_char = START_CHAR(tty);
status =
- BoxSetSW_FlowCtrl(port->serial, index, stop_char,
+ box_set_sw_flow_ctrl(port->serial, index, stop_char,
start_char);
if (status < 0)
dev_dbg(&port->dev,
- "BoxSetSW_FlowCtrl (enabled) failed\n");
+ "box_set_sw_flow_ctrl (enabled) failed\n");
} else {
/* disable SW flow control */
- status = BoxDisable_SW_FlowCtrl(port->serial, index);
+ status = box_disable_sw_flow_ctrl(port->serial, index);
if (status < 0)
dev_dbg(&port->dev,
- "BoxSetSW_FlowCtrl (diabling) failed\n");
+ "box_set_sw_flow_ctrl (diabling) failed\n");
}
termios->c_cflag &= ~CMSPAR;
@@ -1326,10 +1326,10 @@ static inline int qt_real_tiocmget(struct tty_struct *tty,
index = port->port_number;
status =
- BoxGetRegister(port->serial, index, MODEM_CONTROL_REGISTER, &mcr);
+ box_get_register(port->serial, index, MODEM_CONTROL_REGISTER, &mcr);
if (status >= 0) {
status =
- BoxGetRegister(port->serial, index,
+ box_get_register(port->serial, index,
MODEM_STATUS_REGISTER, &msr);
}
@@ -1365,7 +1365,7 @@ static inline int qt_real_tiocmset(struct tty_struct *tty,
index = port->port_number;
status =
- BoxGetRegister(port->serial, index, MODEM_CONTROL_REGISTER, &mcr);
+ box_get_register(port->serial, index, MODEM_CONTROL_REGISTER, &mcr);
if (status < 0)
return -ESPIPE;
@@ -1382,7 +1382,7 @@ static inline int qt_real_tiocmset(struct tty_struct *tty,
mcr |= SERIAL_MCR_LOOP;
status =
- BoxSetRegister(port->serial, index, MODEM_CONTROL_REGISTER, mcr);
+ box_set_register(port->serial, index, MODEM_CONTROL_REGISTER, mcr);
if (status < 0)
return -ESPIPE;
else
@@ -1437,7 +1437,7 @@ static void qt_throttle(struct tty_struct *tty)
mutex_lock(&qt_port->lock);
/* pass on to the driver specific version of this function */
- qt_port->RxHolding = 1;
+ qt_port->rx_holding = 1;
mutex_unlock(&qt_port->lock);
}
@@ -1476,14 +1476,14 @@ static void qt_unthrottle(struct tty_struct *tty)
mutex_lock(&qt_port->lock);
- if (qt_port->RxHolding == 1) {
- dev_dbg(&port->dev, "%s -qt_port->RxHolding == 1\n", __func__);
+ if (qt_port->rx_holding == 1) {
+ dev_dbg(&port->dev, "%s -qt_port->rx_holding == 1\n", __func__);
- qt_port->RxHolding = 0;
- dev_dbg(&port->dev, "%s - qt_port->RxHolding = 0\n", __func__);
+ qt_port->rx_holding = 0;
+ dev_dbg(&port->dev, "%s - qt_port->rx_holding = 0\n", __func__);
/* if we have a bulk endpoint, start it up */
- if ((serial->num_bulk_in) && (qt_port->ReadBulkStopped == 1))
+ if ((serial->num_bulk_in) && (qt_port->read_bulk_stopped == 1))
qt_submit_urb_from_unthrottle(port, serial);
}
mutex_unlock(&qt_port->lock);
diff --git a/drivers/staging/silicom/bp_mod.h b/drivers/staging/silicom/bp_mod.h
index b8275f5611fa..cfa1f43fa4af 100644
--- a/drivers/staging/silicom/bp_mod.h
+++ b/drivers/staging/silicom/bp_mod.h
@@ -15,8 +15,6 @@
#define BP_MOD_H
#include "bits.h"
-#define EXPORT_SYMBOL_NOVERS EXPORT_SYMBOL
-
#define usec_delay(x) udelay(x)
#ifndef msec_delay_bp
#define msec_delay_bp(x) \
diff --git a/drivers/staging/silicom/bpctl_mod.c b/drivers/staging/silicom/bpctl_mod.c
index b7e570ccb759..4b3a1ae250c6 100644
--- a/drivers/staging/silicom/bpctl_mod.c
+++ b/drivers/staging/silicom/bpctl_mod.c
@@ -35,7 +35,7 @@
#define BP_MOD_DESCR "Silicom Bypass-SD Control driver"
#define BP_SYNC_FLAG 1
-static int major_num = 0;
+static int major_num;
MODULE_AUTHOR("Anna Lukin, annal@silicom.co.il");
MODULE_LICENSE("GPL");
@@ -43,21 +43,16 @@ MODULE_DESCRIPTION(BP_MOD_DESCR);
MODULE_VERSION(BP_MOD_VER);
spinlock_t bpvm_lock;
-#define lock_bpctl() \
-if (down_interruptible(&bpctl_sema)) { \
- return -ERESTARTSYS; \
-} \
-
-#define unlock_bpctl() \
+#define unlock_bpctl() \
up(&bpctl_sema);
/* Media Types */
-typedef enum {
- bp_copper = 0,
- bp_fiber,
- bp_cx4,
- bp_none,
-} bp_media_type;
+enum bp_media_type {
+ BP_COPPER = 0,
+ BP_FIBER,
+ BP_CX4,
+ BP_NONE,
+};
struct bypass_pfs_sd {
char dir_name[32];
@@ -89,7 +84,7 @@ typedef struct _bpctl_dev {
uint32_t reset_time;
uint8_t bp_status_un;
atomic_t wdt_busy;
- bp_media_type media_type;
+ enum bp_media_type media_type;
int bp_tpl_flag;
struct timer_list bp_tpl_timer;
spinlock_t bypass_wr_lock;
@@ -112,7 +107,7 @@ typedef struct _bpctl_dev {
static bpctl_dev_t *bpctl_dev_arr;
static struct semaphore bpctl_sema;
-static int device_num = 0;
+static int device_num;
static int get_dev_idx(int ifindex);
static bpctl_dev_t *get_master_port_fn(bpctl_dev_t *pbpctl_dev);
@@ -134,7 +129,7 @@ static int bp_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
- static bpctl_dev_t *pbpctl_dev = NULL, *pbpctl_dev_m = NULL;
+ static bpctl_dev_t *pbpctl_dev, *pbpctl_dev_m;
int dev_num = 0, ret = 0, ret_d = 0, time_left = 0;
/* printk("BP_PROC_SUPPORT event =%d %s %d\n", event,dev->name, dev->ifindex ); */
/* return NOTIFY_DONE; */
@@ -165,7 +160,8 @@ static int bp_device_event(struct notifier_block *unused,
memcpy(&cbuf, drvinfo.bus_info, 32);
buf = &cbuf[0];
- while (*buf++ != ':') ;
+ while (*buf++ != ':')
+ ;
for (i = 0; i < 10; i++, buf++) {
if (*buf == ':')
break;
@@ -306,7 +302,8 @@ static void write_pulse(bpctl_dev_t *pbpctl_dev,
ctrl = BP10G_READ_REG(pbpctl_dev, ESDP);
if (pbpctl_dev->bp_10g9) {
- if (!(pbpctl_dev_c = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_c = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_c)
return;
ctrl = BP10G_READ_REG(pbpctl_dev_c, ESDP);
}
@@ -606,7 +603,8 @@ static int read_pulse(bpctl_dev_t *pbpctl_dev, unsigned int ctrl_ext,
if (pbpctl_dev->bp_540)
ctrl = BP10G_READ_REG(pbpctl_dev, ESDP);
if (pbpctl_dev->bp_10g9) {
- if (!(pbpctl_dev_c = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_c = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_c)
return -1;
ctrl = BP10G_READ_REG(pbpctl_dev_c, ESDP);
}
@@ -720,16 +718,15 @@ static int read_pulse(bpctl_dev_t *pbpctl_dev, unsigned int ctrl_ext,
BP10G_MDIO_DATA_OUT));
}
- if (pbpctl_dev->bp_10g9) {
- ctrl_ext = BP10G_READ_REG(pbpctl_dev, I2CCTL);
- } else if ((pbpctl_dev->bp_fiber5) || (pbpctl_dev->bp_i80)) {
+ if (pbpctl_dev->bp_10g9)
+ ctrl_ext = BP10G_READ_REG(pbpctl_dev, I2CCTL);
+ else if ((pbpctl_dev->bp_fiber5) || (pbpctl_dev->bp_i80))
ctrl_ext = BPCTL_READ_REG(pbpctl_dev, CTRL);
- } else if (pbpctl_dev->bp_540) {
+ else if (pbpctl_dev->bp_540)
ctrl_ext = BP10G_READ_REG(pbpctl_dev, ESDP);
- } else if (pbpctl_dev->bp_10gb)
+ else if (pbpctl_dev->bp_10gb)
ctrl_ext = BP10GB_READ_REG(pbpctl_dev, MISC_REG_SPIO);
-
else if (!pbpctl_dev->bp_10g)
ctrl_ext = BPCTL_READ_REG(pbpctl_dev, CTRL_EXT);
else
@@ -775,7 +772,8 @@ static void write_reg(bpctl_dev_t *pbpctl_dev, unsigned char value,
bpctl_dev_t *pbpctl_dev_c = NULL;
unsigned long flags;
if (pbpctl_dev->bp_10g9) {
- if (!(pbpctl_dev_c = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_c = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_c)
return;
}
if ((pbpctl_dev->wdt_status == WDT_STATUS_EN) &&
@@ -953,7 +951,8 @@ static int read_reg(bpctl_dev_t *pbpctl_dev, unsigned char addr)
atomic_set(&pbpctl_dev->wdt_busy, 1);
#endif
if (pbpctl_dev->bp_10g9) {
- if (!(pbpctl_dev_c = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_c = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_c)
return -1;
}
@@ -1224,7 +1223,8 @@ static int wdt_pulse(bpctl_dev_t *pbpctl_dev)
return -1;
#endif
if (pbpctl_dev->bp_10g9) {
- if (!(pbpctl_dev_c = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_c = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_c)
return -1;
}
@@ -1414,8 +1414,8 @@ static int wdt_pulse(bpctl_dev_t *pbpctl_dev)
(ctrl_ext &
~(BP10G_MCLK_DATA_OUT | BP10G_MDIO_DATA_OUT)));
}
- if ((pbpctl_dev->wdt_status == WDT_STATUS_EN) /*&&
- (pbpctl_dev->bp_ext_ver<PXG4BPFI_VER) */ )
+ if ((pbpctl_dev->wdt_status == WDT_STATUS_EN))
+ /*&& (pbpctl_dev->bp_ext_ver<PXG4BPFI_VER) */
pbpctl_dev->bypass_wdt_on_time = jiffies;
#ifdef BP_SYNC_FLAG
spin_unlock_irqrestore(&pbpctl_dev->bypass_wr_lock, flags);
@@ -1744,7 +1744,8 @@ static int write_data_int(bpctl_dev_t *pbpctl_dev, unsigned char value)
{
bpctl_dev_t *pbpctl_dev_b = NULL;
- if (!(pbpctl_dev_b = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_b = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_b)
return -1;
atomic_set(&pbpctl_dev->wdt_busy, 1);
write_data_port_int(pbpctl_dev, value & 0x3);
@@ -1920,13 +1921,10 @@ int disc_port_on(bpctl_dev_t *pbpctl_dev)
return BP_NOT_CAP;
if (pbpctl_dev_m->bp_caps_ex & DISC_PORT_CAP_EX) {
- if (is_bypass_fn(pbpctl_dev) == 1) {
-
+ if (is_bypass_fn(pbpctl_dev) == 1)
write_data(pbpctl_dev_m, TX_DISA);
- } else {
-
+ else
write_data(pbpctl_dev_m, TX_DISB);
- }
msec_delay_bp(LATCH_DELAY);
@@ -1965,7 +1963,8 @@ int tpl_hw_on(bpctl_dev_t *pbpctl_dev)
int ret = 0, ctrl = 0;
bpctl_dev_t *pbpctl_dev_b = NULL;
- if (!(pbpctl_dev_b = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_b = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_b)
return BP_NOT_CAP;
if (pbpctl_dev->bp_caps_ex & TPL2_CAP_EX) {
@@ -1992,7 +1991,8 @@ int tpl_hw_off(bpctl_dev_t *pbpctl_dev)
int ret = 0, ctrl = 0;
bpctl_dev_t *pbpctl_dev_b = NULL;
- if (!(pbpctl_dev_b = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_b = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_b)
return BP_NOT_CAP;
if (pbpctl_dev->bp_caps_ex & TPL2_CAP_EX) {
cmnd_on(pbpctl_dev);
@@ -2017,9 +2017,9 @@ int wdt_off(bpctl_dev_t *pbpctl_dev)
int ret = BP_NOT_CAP;
if (pbpctl_dev->bp_caps & WD_CTL_CAP) {
- if (INTEL_IF_SERIES(pbpctl_dev->subdevice)) {
+ if (INTEL_IF_SERIES(pbpctl_dev->subdevice))
bypass_off(pbpctl_dev);
- } else if (pbpctl_dev->bp_ext_ver >= PXG2BPI_VER)
+ else if (pbpctl_dev->bp_ext_ver >= PXG2BPI_VER)
write_data(pbpctl_dev, WDT_OFF);
else
data_pulse(pbpctl_dev, WDT_OFF);
@@ -2150,12 +2150,14 @@ static void bp75_release_phy(bpctl_dev_t *pbpctl_dev)
{
u16 mask = BPCTLI_SWFW_PHY0_SM;
u32 swfw_sync;
+ s32 ret_val;
if ((pbpctl_dev->func == 1) || (pbpctl_dev->func == 3))
mask = BPCTLI_SWFW_PHY1_SM;
- while (bp75_get_hw_semaphore_generic(pbpctl_dev) != 0) ;
- /* Empty */
+ do
+ ret_val = bp75_get_hw_semaphore_generic(pbpctl_dev);
+ while (ret_val != 0);
swfw_sync = BPCTL_READ_REG(pbpctl_dev, SW_FW_SYNC);
swfw_sync &= ~mask;
@@ -2404,12 +2406,10 @@ static int set_tx(bpctl_dev_t *pbpctl_dev, int tx_state)
}
}
- if (pbpctl_dev->bp_fiber5) {
+ if (pbpctl_dev->bp_fiber5)
ctrl = BPCTL_READ_REG(pbpctl_dev, CTRL_EXT);
-
- } else if (pbpctl_dev->bp_10gb)
+ else if (pbpctl_dev->bp_10gb)
ctrl = BP10GB_READ_REG(pbpctl_dev, MISC_REG_GPIO);
-
else if (!pbpctl_dev->bp_10g)
ctrl = BPCTL_READ_REG(pbpctl_dev, CTRL);
else
@@ -3237,8 +3237,10 @@ int bypass_from_last_read(bpctl_dev_t *pbpctl_dev)
uint32_t ctrl_ext = 0;
bpctl_dev_t *pbpctl_dev_b = NULL;
- if ((pbpctl_dev->bp_caps & SW_CTL_CAP)
- && (pbpctl_dev_b = get_status_port_fn(pbpctl_dev))) {
+ if (pbpctl_dev->bp_caps & SW_CTL_CAP) {
+ pbpctl_dev_b = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_b)
+ return BP_NOT_CAP;
ctrl_ext = BPCTL_READ_REG(pbpctl_dev_b, CTRL_EXT);
BPCTL_BP_WRITE_REG(pbpctl_dev_b, CTRL_EXT,
(ctrl_ext & ~BPCTLI_CTRL_EXT_SDP7_DIR));
@@ -3254,9 +3256,10 @@ int bypass_status_clear(bpctl_dev_t *pbpctl_dev)
{
bpctl_dev_t *pbpctl_dev_b = NULL;
- if ((pbpctl_dev->bp_caps & SW_CTL_CAP)
- && (pbpctl_dev_b = get_status_port_fn(pbpctl_dev))) {
-
+ if (pbpctl_dev->bp_caps & SW_CTL_CAP) {
+ pbpctl_dev_b = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_b)
+ return BP_NOT_CAP;
send_bypass_clear_pulse(pbpctl_dev_b, 1);
return 0;
} else
@@ -3329,7 +3332,8 @@ static int bypass_status(bpctl_dev_t *pbpctl_dev)
bpctl_dev_t *pbpctl_dev_b = NULL;
- if (!(pbpctl_dev_b = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_b = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_b)
return BP_NOT_CAP;
if (INTEL_IF_SERIES(pbpctl_dev->subdevice)) {
@@ -3391,7 +3395,7 @@ static int bypass_status(bpctl_dev_t *pbpctl_dev)
BP10G_SDP7_DATA_IN) != 0 ? 0 : 1);
}
- } else if (pbpctl_dev->media_type == bp_copper) {
+ } else if (pbpctl_dev->media_type == BP_COPPER) {
return (((BPCTL_READ_REG(pbpctl_dev_b, CTRL)) &
BPCTLI_CTRL_SWDPIN1) != 0 ? 1 : 0);
@@ -3617,7 +3621,8 @@ int tap_status(bpctl_dev_t *pbpctl_dev)
if (pbpctl_dev->bp_caps & TAP_CAP) {
bpctl_dev_t *pbpctl_dev_b = NULL;
- if (!(pbpctl_dev_b = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_b = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_b)
return BP_NOT_CAP;
if (pbpctl_dev->bp_ext_ver >= 0x8) {
@@ -3636,7 +3641,7 @@ int tap_status(bpctl_dev_t *pbpctl_dev)
BP10G_SDP6_DATA_IN) != 0 ? 0 : 1);
}
- } else if (pbpctl_dev->media_type == bp_copper)
+ } else if (pbpctl_dev->media_type == BP_COPPER)
return (((BPCTL_READ_REG(pbpctl_dev, CTRL)) &
BPCTLI_CTRL_SWDPIN0) != 0 ? 1 : 0);
else {
@@ -3713,7 +3718,8 @@ int disc_off_status(bpctl_dev_t *pbpctl_dev)
u32 ctrl_ext = 0;
if (pbpctl_dev->bp_caps & DISC_CAP) {
- if (!(pbpctl_dev_b = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_b = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_b)
return BP_NOT_CAP;
if (DISCF_IF_SERIES(pbpctl_dev->subdevice))
return ((((read_reg(pbpctl_dev, STATUS_DISC_REG_ADDR)) &
@@ -3730,7 +3736,7 @@ int disc_off_status(bpctl_dev_t *pbpctl_dev)
BP10G_SDP2_DATA) != 0 ? 1 : 0);
}
- if (pbpctl_dev->media_type == bp_copper) {
+ if (pbpctl_dev->media_type == BP_COPPER) {
#if 0
return ((((read_reg(pbpctl_dev, STATUS_DISC_REG_ADDR)) &
@@ -3794,11 +3800,10 @@ static int disc_status(bpctl_dev_t *pbpctl_dev)
{
int ctrl = 0;
if (pbpctl_dev->bp_caps & DISC_CAP) {
-
- if ((ctrl = disc_off_status(pbpctl_dev)) < 0)
+ ctrl = disc_off_status(pbpctl_dev);
+ if (ctrl < 0)
return ctrl;
return ((ctrl == 0) ? 1 : 0);
-
}
return BP_NOT_CAP;
}
@@ -3911,7 +3916,8 @@ int tpl_hw_status(bpctl_dev_t *pbpctl_dev)
{
bpctl_dev_t *pbpctl_dev_b = NULL;
- if (!(pbpctl_dev_b = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_b = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_b)
return BP_NOT_CAP;
if (TPL_IF_SERIES(pbpctl_dev->subdevice))
@@ -4021,42 +4027,41 @@ void bypass_caps_init(bpctl_dev_t *pbpctl_dev)
}
#endif
if ((pbpctl_dev->bp_fiber5) || (pbpctl_dev->bp_10g9)) {
- pbpctl_dev->media_type = bp_fiber;
+ pbpctl_dev->media_type = BP_FIBER;
} else if (pbpctl_dev->bp_10gb) {
if (BP10GB_CX4_SERIES(pbpctl_dev->subdevice))
- pbpctl_dev->media_type = bp_cx4;
+ pbpctl_dev->media_type = BP_CX4;
else
- pbpctl_dev->media_type = bp_fiber;
+ pbpctl_dev->media_type = BP_FIBER;
}
else if (pbpctl_dev->bp_540)
- pbpctl_dev->media_type = bp_none;
+ pbpctl_dev->media_type = BP_NONE;
else if (!pbpctl_dev->bp_10g) {
ctrl_ext = BPCTL_READ_REG(pbpctl_dev, CTRL_EXT);
if ((ctrl_ext & BPCTLI_CTRL_EXT_LINK_MODE_MASK) == 0x0)
- pbpctl_dev->media_type = bp_copper;
+ pbpctl_dev->media_type = BP_COPPER;
else
- pbpctl_dev->media_type = bp_fiber;
+ pbpctl_dev->media_type = BP_FIBER;
} else {
if (BP10G_CX4_SERIES(pbpctl_dev->subdevice))
- pbpctl_dev->media_type = bp_cx4;
+ pbpctl_dev->media_type = BP_CX4;
else
- pbpctl_dev->media_type = bp_fiber;
+ pbpctl_dev->media_type = BP_FIBER;
}
if (is_bypass_fn(pbpctl_dev)) {
pbpctl_dev->bp_caps |= BP_PWOFF_ON_CAP;
- if (pbpctl_dev->media_type == bp_fiber)
+ if (pbpctl_dev->media_type == BP_FIBER)
pbpctl_dev->bp_caps |=
(TX_CTL_CAP | TX_STATUS_CAP | TPL_CAP);
- if (TPL_IF_SERIES(pbpctl_dev->subdevice)) {
+ if (TPL_IF_SERIES(pbpctl_dev->subdevice))
pbpctl_dev->bp_caps |= TPL_CAP;
- }
if (INTEL_IF_SERIES(pbpctl_dev->subdevice)) {
pbpctl_dev->bp_caps |=
@@ -4196,9 +4201,9 @@ void bypass_caps_init(bpctl_dev_t *pbpctl_dev)
if (PEG5_IF_SERIES(pbpctl_dev->subdevice))
pbpctl_dev->bp_caps |= (TX_CTL_CAP | TX_STATUS_CAP);
- if (BP10GB_IF_SERIES(pbpctl_dev->subdevice)) {
+ if (BP10GB_IF_SERIES(pbpctl_dev->subdevice))
pbpctl_dev->bp_caps &= ~(TX_CTL_CAP | TX_STATUS_CAP);
- }
+
pbpctl_dev_m = get_master_port_fn(pbpctl_dev);
if (pbpctl_dev_m != NULL) {
int cap_reg = 0;
@@ -4215,9 +4220,8 @@ void bypass_caps_init(bpctl_dev_t *pbpctl_dev)
int bypass_off_init(bpctl_dev_t *pbpctl_dev)
{
- int ret = 0;
-
- if ((ret = cmnd_on(pbpctl_dev)) < 0)
+ int ret = cmnd_on(pbpctl_dev);
+ if (ret < 0)
return ret;
if (INTEL_IF_SERIES(pbpctl_dev->subdevice))
return dis_bypass_cap(pbpctl_dev);
@@ -4327,14 +4331,13 @@ int set_bypass_wd_auto(bpctl_dev_t *pbpctl_dev, unsigned int param)
int get_bypass_wd_auto(bpctl_dev_t *pbpctl_dev)
{
-
- if (pbpctl_dev->bp_caps & WD_CTL_CAP) {
+ if (pbpctl_dev->bp_caps & WD_CTL_CAP)
return pbpctl_dev->reset_time;
- }
+
return BP_NOT_CAP;
}
-#ifdef BP_SELF_TEST
+#ifdef BP_SELF_TEST
int set_bp_self_test(bpctl_dev_t *pbpctl_dev, unsigned int param)
{
@@ -4403,7 +4406,8 @@ int set_bypass_fn(bpctl_dev_t *pbpctl_dev, int bypass_mode)
if (!(pbpctl_dev->bp_caps & BP_CAP))
return BP_NOT_CAP;
- if ((ret = cmnd_on(pbpctl_dev)) < 0)
+ ret = cmnd_on(pbpctl_dev);
+ if (ret < 0)
return ret;
if (!bypass_mode)
ret = bypass_off(pbpctl_dev);
@@ -4435,7 +4439,8 @@ int set_dis_bypass_fn(bpctl_dev_t *pbpctl_dev, int dis_param)
if (!(pbpctl_dev->bp_caps & BP_DIS_CAP))
return BP_NOT_CAP;
- if ((ret = cmnd_on(pbpctl_dev)) < 0)
+ ret = cmnd_on(pbpctl_dev);
+ if (ret < 0)
return ret;
if (dis_param)
ret = dis_bypass_cap(pbpctl_dev);
@@ -4461,7 +4466,8 @@ int set_bypass_pwoff_fn(bpctl_dev_t *pbpctl_dev, int bypass_mode)
if (!(pbpctl_dev->bp_caps & BP_PWOFF_CTL_CAP))
return BP_NOT_CAP;
- if ((ret = cmnd_on(pbpctl_dev)) < 0)
+ ret = cmnd_on(pbpctl_dev);
+ if (ret < 0)
return ret;
if (bypass_mode)
ret = bypass_state_pwroff(pbpctl_dev);
@@ -4487,7 +4493,8 @@ int set_bypass_pwup_fn(bpctl_dev_t *pbpctl_dev, int bypass_mode)
if (!(pbpctl_dev->bp_caps & BP_PWUP_CTL_CAP))
return BP_NOT_CAP;
- if ((ret = cmnd_on(pbpctl_dev)) < 0)
+ ret = cmnd_on(pbpctl_dev);
+ if (ret < 0)
return ret;
if (bypass_mode)
ret = bypass_state_pwron(pbpctl_dev);
@@ -4514,7 +4521,8 @@ int set_bypass_wd_fn(bpctl_dev_t *pbpctl_dev, int timeout)
if (!(pbpctl_dev->bp_caps & WD_CTL_CAP))
return BP_NOT_CAP;
- if ((ret = cmnd_on(pbpctl_dev)) < 0)
+ ret = cmnd_on(pbpctl_dev);
+ if (ret < 0)
return ret;
if (!timeout)
ret = wdt_off(pbpctl_dev);
@@ -4583,7 +4591,8 @@ int set_std_nic_fn(bpctl_dev_t *pbpctl_dev, int nic_mode)
if (!(pbpctl_dev->bp_caps & STD_NIC_CAP))
return BP_NOT_CAP;
- if ((ret = cmnd_on(pbpctl_dev)) < 0)
+ ret = cmnd_on(pbpctl_dev);
+ if (ret < 0)
return ret;
if (nic_mode)
ret = std_nic_on(pbpctl_dev);
@@ -4649,7 +4658,8 @@ int get_tap_pwup_fn(bpctl_dev_t *pbpctl_dev)
if (!pbpctl_dev)
return -1;
- if ((ret = default_pwron_tap_status(pbpctl_dev)) < 0)
+ ret = default_pwron_tap_status(pbpctl_dev);
+ if (ret < 0)
return ret;
return ((ret == 0) ? 1 : 0);
}
@@ -4824,7 +4834,8 @@ int get_disc_port_pwup_fn(bpctl_dev_t *pbpctl_dev)
if (!pbpctl_dev)
return -1;
- if ((ret = default_pwron_disc_port_status(pbpctl_dev)) < 0)
+ ret = default_pwron_disc_port_status(pbpctl_dev);
+ if (ret < 0)
return ret;
return ((ret == 0) ? 1 : 0);
}
@@ -4851,7 +4862,8 @@ int reset_cont_fn(bpctl_dev_t *pbpctl_dev)
if (!pbpctl_dev)
return -1;
- if ((ret = cmnd_on(pbpctl_dev)) < 0)
+ ret = cmnd_on(pbpctl_dev);
+ if (ret < 0)
return ret;
return reset_cont(pbpctl_dev);
}
@@ -4867,8 +4879,10 @@ int set_tx_fn(bpctl_dev_t *pbpctl_dev, int tx_state)
(pbpctl_dev->bp_caps & SW_CTL_CAP)) {
if ((pbpctl_dev->bp_tpl_flag))
return BP_NOT_CAP;
- } else if ((pbpctl_dev_b = get_master_port_fn(pbpctl_dev))) {
- if ((pbpctl_dev_b->bp_caps & TPL_CAP) &&
+ } else {
+ pbpctl_dev_b = get_master_port_fn(pbpctl_dev);
+ if (pbpctl_dev_b &&
+ (pbpctl_dev_b->bp_caps & TPL_CAP) &&
(pbpctl_dev_b->bp_tpl_flag))
return BP_NOT_CAP;
}
@@ -4984,8 +4998,10 @@ int get_tx_fn(bpctl_dev_t *pbpctl_dev)
(pbpctl_dev->bp_caps & SW_CTL_CAP)) {
if ((pbpctl_dev->bp_tpl_flag))
return BP_NOT_CAP;
- } else if ((pbpctl_dev_b = get_master_port_fn(pbpctl_dev))) {
- if ((pbpctl_dev_b->bp_caps & TPL_CAP) &&
+ } else {
+ pbpctl_dev_b = get_master_port_fn(pbpctl_dev);
+ if (pbpctl_dev_b &&
+ (pbpctl_dev_b->bp_caps & TPL_CAP) &&
(pbpctl_dev_b->bp_tpl_flag))
return BP_NOT_CAP;
}
@@ -5009,7 +5025,7 @@ static int get_bypass_link_status(bpctl_dev_t *pbpctl_dev)
if (!pbpctl_dev)
return -1;
- if (pbpctl_dev->media_type == bp_fiber)
+ if (pbpctl_dev->media_type == BP_FIBER)
return ((BPCTL_READ_REG(pbpctl_dev, CTRL) &
BPCTLI_CTRL_SWDPIN1));
else
@@ -5024,7 +5040,8 @@ static void bp_tpl_timer_fn(unsigned long param)
uint32_t link1, link2;
bpctl_dev_t *pbpctl_dev_b = NULL;
- if (!(pbpctl_dev_b = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_b = get_status_port_fn(pbpctl_dev);
+ if (!pbpctl_dev_b)
return;
if (!pbpctl_dev->bp_tpl_flag) {
@@ -5036,23 +5053,19 @@ static void bp_tpl_timer_fn(unsigned long param)
link2 = get_bypass_link_status(pbpctl_dev_b);
if ((link1) && (tx_status(pbpctl_dev))) {
- if ((!link2) && (tx_status(pbpctl_dev_b))) {
+ if ((!link2) && (tx_status(pbpctl_dev_b)))
set_tx(pbpctl_dev, 0);
- } else if (!tx_status(pbpctl_dev_b)) {
+ else if (!tx_status(pbpctl_dev_b))
set_tx(pbpctl_dev_b, 1);
- }
} else if ((!link1) && (tx_status(pbpctl_dev))) {
- if ((link2) && (tx_status(pbpctl_dev_b))) {
+ if ((link2) && (tx_status(pbpctl_dev_b)))
set_tx(pbpctl_dev_b, 0);
- }
} else if ((link1) && (!tx_status(pbpctl_dev))) {
- if ((link2) && (tx_status(pbpctl_dev_b))) {
+ if ((link2) && (tx_status(pbpctl_dev_b)))
set_tx(pbpctl_dev, 1);
- }
} else if ((!link1) && (!tx_status(pbpctl_dev))) {
- if ((link2) && (tx_status(pbpctl_dev_b))) {
+ if ((link2) && (tx_status(pbpctl_dev_b)))
set_tx(pbpctl_dev, 1);
- }
}
mod_timer(&pbpctl_dev->bp_tpl_timer, jiffies + BP_LINK_MON_DELAY * HZ);
@@ -5111,9 +5124,9 @@ int get_bypass_tpl_auto(bpctl_dev_t *pbpctl_dev)
{
if (!pbpctl_dev)
return -1;
- if (pbpctl_dev->bp_caps & TPL_CAP) {
+ if (pbpctl_dev->bp_caps & TPL_CAP)
return pbpctl_dev->bp_tpl_flag;
- }
+
return BP_NOT_CAP;
}
@@ -5128,7 +5141,8 @@ int set_tpl_fn(bpctl_dev_t *pbpctl_dev, int tpl_mode)
if (pbpctl_dev->bp_caps & TPL_CAP) {
if (tpl_mode) {
- if ((pbpctl_dev_b = get_status_port_fn(pbpctl_dev)))
+ pbpctl_dev_b = get_status_port_fn(pbpctl_dev);
+ if (pbpctl_dev_b)
set_tx(pbpctl_dev_b, 1);
set_tx(pbpctl_dev, 1);
}
@@ -5345,7 +5359,8 @@ static void if_scan_init(void)
memcpy(&cbuf, drvinfo.bus_info, 32);
buf = &cbuf[0];
- while (*buf++ != ':') ;
+ while (*buf++ != ':')
+ ;
for (i = 0; i < 10; i++, buf++) {
if (*buf == ':')
break;
@@ -5394,7 +5409,8 @@ static long device_ioctl(struct file *file, /* see include/linux/fs.h */
static bpctl_dev_t *pbpctl_dev;
/* lock_kernel(); */
- lock_bpctl();
+ if (down_interruptible(&bpctl_sema))
+ return -ERESTARTSYS;
/* local_irq_save(flags); */
/* if(!spin_trylock_irqsave(&bpvm_lock)){
local_irq_restore(flags);
@@ -5438,9 +5454,9 @@ static long device_ioctl(struct file *file, /* see include/linux/fs.h */
return -1;
}
-/* preempt_disable();
+/* preempt_disable();
rcu_read_lock();
- spin_lock_irqsave(&bpvm_lock, flags);
+ spin_lock_irqsave(&bpvm_lock, flags);
*/
if ((bpctl_cmd.in_param[5]) ||
(bpctl_cmd.in_param[6]) || (bpctl_cmd.in_param[7]))
@@ -5787,7 +5803,7 @@ static const struct file_operations Fops = {
};
#ifndef PCI_DEVICE
-#define PCI_DEVICE(vend,dev) \
+#define PCI_DEVICE(vend, dev) \
.vendor = (vend), .device = (dev), \
.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
#endif
@@ -5795,7 +5811,7 @@ static const struct file_operations Fops = {
#define SILICOM_E1000BP_ETHERNET_DEVICE(device_id) {\
PCI_DEVICE(SILICOM_VID, device_id)}
-typedef enum {
+enum board_type {
PXG2BPFI,
PXG2BPFIL,
PXG2BPFILX,
@@ -5953,7 +5969,7 @@ typedef enum {
PE310G4BPi9SR,
PE310G4BPi9LR,
PE210G2BPi40,
-} board_t;
+};
typedef struct _bpmod_info_t {
unsigned int vendor;
@@ -6629,7 +6645,7 @@ static void find_fw(bpctl_dev_t *dev)
ioremap(mmio_start, mmio_len);
dev->bp_fw_ver = bypass_fw_ver(dev);
- if (dev-> bp_fw_ver == 0xa8)
+ if (dev->bp_fw_ver == 0xa8)
break;
}
}
@@ -6708,7 +6724,8 @@ static int init_one(bpctl_dev_t *dev, bpmod_info_t *info, struct pci_dev *pdev1)
reset_cont(dev);
}
#ifdef BP_SELF_TEST
- if ((dev->bp_tx_data = kzalloc(BPTEST_DATA_LEN, GFP_KERNEL))) {
+ dev->bp_tx_data = kzalloc(BPTEST_DATA_LEN, GFP_KERNEL);
+ if (dev->bp_tx_data) {
memset(dev->bp_tx_data, 0xff, 6);
memset(dev->bp_tx_data + 6, 0x0, 1);
memset(dev->bp_tx_data + 7, 0xaa, 5);
@@ -6878,59 +6895,69 @@ int is_bypass_sd(int ifindex)
{
return is_bypass(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(is_bypass_sd);
int set_bypass_sd(int ifindex, int bypass_mode)
{
return set_bypass_fn(get_dev_idx_p(ifindex), bypass_mode);
}
+EXPORT_SYMBOL(set_bypass_sd);
int get_bypass_sd(int ifindex)
{
return get_bypass_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_bypass_sd);
int get_bypass_change_sd(int ifindex)
{
return get_bypass_change_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_bypass_change_sd);
int set_dis_bypass_sd(int ifindex, int dis_param)
{
return set_dis_bypass_fn(get_dev_idx_p(ifindex), dis_param);
}
+EXPORT_SYMBOL(set_dis_bypass_sd);
int get_dis_bypass_sd(int ifindex)
{
return get_dis_bypass_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_dis_bypass_sd);
int set_bypass_pwoff_sd(int ifindex, int bypass_mode)
{
return set_bypass_pwoff_fn(get_dev_idx_p(ifindex), bypass_mode);
}
+EXPORT_SYMBOL(set_bypass_pwoff_sd);
int get_bypass_pwoff_sd(int ifindex)
{
return get_bypass_pwoff_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_bypass_pwoff_sd);
int set_bypass_pwup_sd(int ifindex, int bypass_mode)
{
return set_bypass_pwup_fn(get_dev_idx_p(ifindex), bypass_mode);
}
+EXPORT_SYMBOL(set_bypass_pwup_sd);
int get_bypass_pwup_sd(int ifindex)
{
return get_bypass_pwup_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_bypass_pwup_sd);
int set_bypass_wd_sd(int if_index, int ms_timeout, int *ms_timeout_set)
{
@@ -6939,136 +6966,159 @@ int set_bypass_wd_sd(int if_index, int ms_timeout, int *ms_timeout_set)
*ms_timeout_set = set_bypass_wd_fn(get_dev_idx_p(if_index), ms_timeout);
return 0;
}
+EXPORT_SYMBOL(set_bypass_wd_sd);
int get_bypass_wd_sd(int ifindex, int *timeout)
{
return get_bypass_wd_fn(get_dev_idx_p(ifindex), timeout);
}
+EXPORT_SYMBOL(get_bypass_wd_sd);
int get_wd_expire_time_sd(int ifindex, int *time_left)
{
return get_wd_expire_time_fn(get_dev_idx_p(ifindex), time_left);
}
+EXPORT_SYMBOL(get_wd_expire_time_sd);
int reset_bypass_wd_timer_sd(int ifindex)
{
return reset_bypass_wd_timer_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(reset_bypass_wd_timer_sd);
int get_wd_set_caps_sd(int ifindex)
{
return get_wd_set_caps_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_wd_set_caps_sd);
int set_std_nic_sd(int ifindex, int nic_mode)
{
return set_std_nic_fn(get_dev_idx_p(ifindex), nic_mode);
}
+EXPORT_SYMBOL(set_std_nic_sd);
int get_std_nic_sd(int ifindex)
{
return get_std_nic_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_std_nic_sd);
int set_tap_sd(int ifindex, int tap_mode)
{
return set_tap_fn(get_dev_idx_p(ifindex), tap_mode);
}
+EXPORT_SYMBOL(set_tap_sd);
int get_tap_sd(int ifindex)
{
return get_tap_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_tap_sd);
int set_tap_pwup_sd(int ifindex, int tap_mode)
{
return set_tap_pwup_fn(get_dev_idx_p(ifindex), tap_mode);
}
+EXPORT_SYMBOL(set_tap_pwup_sd);
int get_tap_pwup_sd(int ifindex)
{
return get_tap_pwup_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_tap_pwup_sd);
int get_tap_change_sd(int ifindex)
{
return get_tap_change_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_tap_change_sd);
int set_dis_tap_sd(int ifindex, int dis_param)
{
return set_dis_tap_fn(get_dev_idx_p(ifindex), dis_param);
}
+EXPORT_SYMBOL(set_dis_tap_sd);
int get_dis_tap_sd(int ifindex)
{
return get_dis_tap_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_dis_tap_sd);
int set_bp_disc_sd(int ifindex, int disc_mode)
{
return set_disc_fn(get_dev_idx_p(ifindex), disc_mode);
}
+EXPORT_SYMBOL(set_bp_disc_sd);
int get_bp_disc_sd(int ifindex)
{
return get_disc_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_bp_disc_sd);
int set_bp_disc_pwup_sd(int ifindex, int disc_mode)
{
return set_disc_pwup_fn(get_dev_idx_p(ifindex), disc_mode);
}
+EXPORT_SYMBOL(set_bp_disc_pwup_sd);
int get_bp_disc_pwup_sd(int ifindex)
{
return get_disc_pwup_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_bp_disc_pwup_sd);
int get_bp_disc_change_sd(int ifindex)
{
return get_disc_change_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_bp_disc_change_sd);
int set_bp_dis_disc_sd(int ifindex, int dis_param)
{
return set_dis_disc_fn(get_dev_idx_p(ifindex), dis_param);
}
+EXPORT_SYMBOL(set_bp_dis_disc_sd);
int get_bp_dis_disc_sd(int ifindex)
{
return get_dis_disc_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_bp_dis_disc_sd);
int get_wd_exp_mode_sd(int ifindex)
{
return get_wd_exp_mode_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_wd_exp_mode_sd);
int set_wd_exp_mode_sd(int ifindex, int param)
{
return set_wd_exp_mode_fn(get_dev_idx_p(ifindex), param);
}
+EXPORT_SYMBOL(set_wd_exp_mode_sd);
int reset_cont_sd(int ifindex)
{
@@ -7081,35 +7131,41 @@ int set_tx_sd(int ifindex, int tx_state)
return set_tx_fn(get_dev_idx_p(ifindex), tx_state);
}
+EXPORT_SYMBOL(set_tx_sd);
int set_tpl_sd(int ifindex, int tpl_state)
{
return set_tpl_fn(get_dev_idx_p(ifindex), tpl_state);
}
+EXPORT_SYMBOL(set_tpl_sd);
int set_bp_hw_reset_sd(int ifindex, int status)
{
return set_bp_hw_reset_fn(get_dev_idx_p(ifindex), status);
}
+EXPORT_SYMBOL(set_bp_hw_reset_sd);
int set_wd_autoreset_sd(int ifindex, int param)
{
return set_wd_autoreset_fn(get_dev_idx_p(ifindex), param);
}
+EXPORT_SYMBOL(set_wd_autoreset_sd);
int get_wd_autoreset_sd(int ifindex)
{
return get_wd_autoreset_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_wd_autoreset_sd);
int get_bypass_caps_sd(int ifindex)
{
return get_bypass_caps_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_bypass_caps_sd);
int get_bypass_slave_sd(int ifindex)
{
@@ -7120,81 +7176,41 @@ int get_bypass_slave_sd(int ifindex)
return -1;
}
+EXPORT_SYMBOL(get_bypass_slave_sd);
int get_tx_sd(int ifindex)
{
return get_tx_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_tx_sd);
int get_tpl_sd(int ifindex)
{
return get_tpl_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_tpl_sd);
int get_bp_hw_reset_sd(int ifindex)
{
return get_bp_hw_reset_fn(get_dev_idx_p(ifindex));
}
+EXPORT_SYMBOL(get_bp_hw_reset_sd);
int get_bypass_info_sd(int ifindex, struct bp_info *bp_info)
{
return get_bypass_info_fn(get_dev_idx_p(ifindex), bp_info->prod_name, &bp_info->fw_ver);
}
+EXPORT_SYMBOL(get_bypass_info_sd);
int bp_if_scan_sd(void)
{
if_scan_init();
return 0;
}
-
-EXPORT_SYMBOL_NOVERS(is_bypass_sd);
-EXPORT_SYMBOL_NOVERS(get_bypass_slave_sd);
-EXPORT_SYMBOL_NOVERS(get_bypass_caps_sd);
-EXPORT_SYMBOL_NOVERS(get_wd_set_caps_sd);
-EXPORT_SYMBOL_NOVERS(set_bypass_sd);
-EXPORT_SYMBOL_NOVERS(get_bypass_sd);
-EXPORT_SYMBOL_NOVERS(get_bypass_change_sd);
-EXPORT_SYMBOL_NOVERS(set_dis_bypass_sd);
-EXPORT_SYMBOL_NOVERS(get_dis_bypass_sd);
-EXPORT_SYMBOL_NOVERS(set_bypass_pwoff_sd);
-EXPORT_SYMBOL_NOVERS(get_bypass_pwoff_sd);
-EXPORT_SYMBOL_NOVERS(set_bypass_pwup_sd);
-EXPORT_SYMBOL_NOVERS(get_bypass_pwup_sd);
-EXPORT_SYMBOL_NOVERS(set_bypass_wd_sd);
-EXPORT_SYMBOL_NOVERS(get_bypass_wd_sd);
-EXPORT_SYMBOL_NOVERS(get_wd_expire_time_sd);
-EXPORT_SYMBOL_NOVERS(reset_bypass_wd_timer_sd);
-EXPORT_SYMBOL_NOVERS(set_std_nic_sd);
-EXPORT_SYMBOL_NOVERS(get_std_nic_sd);
-EXPORT_SYMBOL_NOVERS(set_tx_sd);
-EXPORT_SYMBOL_NOVERS(get_tx_sd);
-EXPORT_SYMBOL_NOVERS(set_tpl_sd);
-EXPORT_SYMBOL_NOVERS(get_tpl_sd);
-EXPORT_SYMBOL_NOVERS(set_bp_hw_reset_sd);
-EXPORT_SYMBOL_NOVERS(get_bp_hw_reset_sd);
-EXPORT_SYMBOL_NOVERS(set_tap_sd);
-EXPORT_SYMBOL_NOVERS(get_tap_sd);
-EXPORT_SYMBOL_NOVERS(get_tap_change_sd);
-EXPORT_SYMBOL_NOVERS(set_dis_tap_sd);
-EXPORT_SYMBOL_NOVERS(get_dis_tap_sd);
-EXPORT_SYMBOL_NOVERS(set_tap_pwup_sd);
-EXPORT_SYMBOL_NOVERS(get_tap_pwup_sd);
-EXPORT_SYMBOL_NOVERS(set_wd_exp_mode_sd);
-EXPORT_SYMBOL_NOVERS(get_wd_exp_mode_sd);
-EXPORT_SYMBOL_NOVERS(set_wd_autoreset_sd);
-EXPORT_SYMBOL_NOVERS(get_wd_autoreset_sd);
-EXPORT_SYMBOL_NOVERS(set_bp_disc_sd);
-EXPORT_SYMBOL_NOVERS(get_bp_disc_sd);
-EXPORT_SYMBOL_NOVERS(get_bp_disc_change_sd);
-EXPORT_SYMBOL_NOVERS(set_bp_dis_disc_sd);
-EXPORT_SYMBOL_NOVERS(get_bp_dis_disc_sd);
-EXPORT_SYMBOL_NOVERS(set_bp_disc_pwup_sd);
-EXPORT_SYMBOL_NOVERS(get_bp_disc_pwup_sd);
-EXPORT_SYMBOL_NOVERS(get_bypass_info_sd);
-EXPORT_SYMBOL_NOVERS(bp_if_scan_sd);
+EXPORT_SYMBOL(bp_if_scan_sd);
#define BP_PROC_DIR "bypass"
@@ -7263,7 +7279,7 @@ static int show_bypass_slave(struct seq_file *m, void *v)
if (!slave)
slave = dev;
if (!slave)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (slave->ndev)
seq_printf(m, "%s\n", slave->ndev->name);
return 0;
@@ -7275,7 +7291,7 @@ static int show_bypass_caps(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_bypass_caps_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "-1\n");
+ seq_puts(m, "-1\n");
else
seq_printf(m, "0x%x\n", ret);
return 0;
@@ -7287,7 +7303,7 @@ static int show_wd_set_caps(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_wd_set_caps_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "-1\n");
+ seq_puts(m, "-1\n");
else
seq_printf(m, "0x%x\n", ret);
return 0;
@@ -7333,11 +7349,11 @@ static int show_bypass(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_bypass_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 1)
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
return 0;
}
RW_FOPS(bypass)
@@ -7357,11 +7373,11 @@ static int show_tap(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_tap_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 1)
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
return 0;
}
RW_FOPS(tap)
@@ -7381,11 +7397,11 @@ static int show_disc(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_disc_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 1)
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
return 0;
}
RW_FOPS(disc)
@@ -7395,11 +7411,11 @@ static int show_bypass_change(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_bypass_change_fn(dev);
if (ret == 1)
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
else
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
return 0;
}
RO_FOPS(bypass_change)
@@ -7409,11 +7425,11 @@ static int show_tap_change(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_tap_change_fn(dev);
if (ret == 1)
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
else
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
return 0;
}
RO_FOPS(tap_change)
@@ -7423,11 +7439,11 @@ static int show_disc_change(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_disc_change_fn(dev);
if (ret == 1)
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
else
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
return 0;
}
RO_FOPS(disc_change)
@@ -7450,11 +7466,11 @@ static int show_bypass_wd(struct seq_file *m, void *v)
ret = get_bypass_wd_fn(dev, &timeout);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (timeout == -1)
- seq_printf(m, "unknown\n");
+ seq_puts(m, "unknown\n");
else if (timeout == 0)
- seq_printf(m, "disable\n");
+ seq_puts(m, "disable\n");
else
seq_printf(m, "%d\n", timeout);
return 0;
@@ -7467,11 +7483,11 @@ static int show_wd_expire_time(struct seq_file *m, void *v)
int ret = 0, timeout = 0;
ret = get_wd_expire_time_fn(dev, &timeout);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (timeout == -1)
- seq_printf(m, "expire\n");
+ seq_puts(m, "expire\n");
else if (timeout == 0)
- seq_printf(m, "disable\n");
+ seq_puts(m, "disable\n");
else
seq_printf(m, "%d\n", timeout);
return 0;
@@ -7494,11 +7510,11 @@ static int show_tpl(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_tpl_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 1)
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
return 0;
}
RW_FOPS(tpl)
@@ -7520,11 +7536,11 @@ static int show_wait_at_pwup(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_bp_wait_at_pwup_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 1)
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
return 0;
}
RW_FOPS(wait_at_pwup)
@@ -7545,11 +7561,11 @@ static int show_hw_reset(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_bp_hw_reset_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 1)
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
return 0;
}
RW_FOPS(hw_reset)
@@ -7561,11 +7577,11 @@ static int show_reset_bypass_wd(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = reset_bypass_wd_timer_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 0)
- seq_printf(m, "disable\n");
+ seq_puts(m, "disable\n");
else if (ret == 1)
- seq_printf(m, "success\n");
+ seq_puts(m, "success\n");
return 0;
}
RO_FOPS(reset_bypass_wd)
@@ -7585,11 +7601,11 @@ static int show_dis_bypass(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_dis_bypass_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
else
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
return 0;
}
RW_FOPS(dis_bypass)
@@ -7609,11 +7625,11 @@ static int show_dis_tap(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_dis_tap_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
else
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
return 0;
}
RW_FOPS(dis_tap)
@@ -7633,11 +7649,11 @@ static int show_dis_disc(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_dis_disc_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
else
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
return 0;
}
RW_FOPS(dis_disc)
@@ -7657,11 +7673,11 @@ static int show_bypass_pwup(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_bypass_pwup_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
else
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
return 0;
}
RW_FOPS(bypass_pwup)
@@ -7681,11 +7697,11 @@ static int show_bypass_pwoff(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_bypass_pwoff_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
else
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
return 0;
}
RW_FOPS(bypass_pwoff)
@@ -7705,11 +7721,11 @@ static int show_tap_pwup(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_tap_pwup_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
else
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
return 0;
}
RW_FOPS(tap_pwup)
@@ -7729,11 +7745,11 @@ static int show_disc_pwup(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_disc_pwup_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
else
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
return 0;
}
RW_FOPS(disc_pwup)
@@ -7753,11 +7769,11 @@ static int show_std_nic(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_std_nic_fn(dev);
if (ret == BP_NOT_CAP)
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
else if (ret == 0)
- seq_printf(m, "off\n");
+ seq_puts(m, "off\n");
else
- seq_printf(m, "on\n");
+ seq_puts(m, "on\n");
return 0;
}
RW_FOPS(std_nic)
@@ -7795,13 +7811,13 @@ static int show_wd_exp_mode(struct seq_file *m, void *v)
bpctl_dev_t *dev = m->private;
int ret = get_wd_exp_mode_fn(dev);
if (ret == 1)
- seq_printf(m, "tap\n");
+ seq_puts(m, "tap\n");
else if (ret == 0)
- seq_printf(m, "bypass\n");
+ seq_puts(m, "bypass\n");
else if (ret == 2)
- seq_printf(m, "disc\n");
+ seq_puts(m, "disc\n");
else
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
return 0;
}
RW_FOPS(wd_exp_mode)
@@ -7823,7 +7839,7 @@ static int show_wd_autoreset(struct seq_file *m, void *v)
if (ret >= 0)
seq_printf(m, "%d\n", ret);
else
- seq_printf(m, "fail\n");
+ seq_puts(m, "fail\n");
return 0;
}
RW_FOPS(wd_autoreset)
@@ -7831,7 +7847,7 @@ RW_FOPS(wd_autoreset)
int bypass_proc_create_dev_sd(bpctl_dev_t *pbp_device_block)
{
struct bypass_pfs_sd *current_pfs = &(pbp_device_block->bypass_pfs_set);
- static struct proc_dir_entry *procfs_dir = NULL;
+ static struct proc_dir_entry *procfs_dir;
int ret = 0;
if (!pbp_device_block->ndev)
@@ -7851,7 +7867,8 @@ int bypass_proc_create_dev_sd(bpctl_dev_t *pbp_device_block)
}
current_pfs->bypass_entry = procfs_dir;
-#define ENTRY(x) ret |= procfs_add(#x, &x##_ops, pbp_device_block)
+#define ENTRY(x) (ret |= procfs_add(#x, &x##_ops, pbp_device_block))
+
ENTRY(bypass_info);
if (pbp_device_block->bp_caps & SW_CTL_CAP) {
/* Create set param proc's */
diff --git a/drivers/staging/silicom/bypasslib/bp_ioctl.h b/drivers/staging/silicom/bypasslib/bp_ioctl.h
index 040c6fa8d5ad..2d1ef5384436 100644
--- a/drivers/staging/silicom/bypasslib/bp_ioctl.h
+++ b/drivers/staging/silicom/bypasslib/bp_ioctl.h
@@ -14,41 +14,41 @@
#ifndef BP_IOCTL_H
#define BP_IOCTL_H
-#define BP_CAP 0x01 //BIT_0
-#define BP_STATUS_CAP 0x02 //BIT_1
-#define BP_STATUS_CHANGE_CAP 0x04 //BIT_2
-#define SW_CTL_CAP 0x08 //BIT_3
-#define BP_DIS_CAP 0x10 //BIT_4
-#define BP_DIS_STATUS_CAP 0x20 //BIT_5
-#define STD_NIC_CAP 0x40 //BIT_6
-#define BP_PWOFF_ON_CAP 0x80 //BIT_7
-#define BP_PWOFF_OFF_CAP 0x0100 //BIT_8
-#define BP_PWOFF_CTL_CAP 0x0200 //BIT_9
-#define BP_PWUP_ON_CAP 0x0400 //BIT_10
-#define BP_PWUP_OFF_CAP 0x0800 //BIT_11
-#define BP_PWUP_CTL_CAP 0x1000 //BIT_12
-#define WD_CTL_CAP 0x2000 //BIT_13
-#define WD_STATUS_CAP 0x4000 //BIT_14
-#define WD_TIMEOUT_CAP 0x8000 //BIT_15
-#define TX_CTL_CAP 0x10000 //BIT_16
-#define TX_STATUS_CAP 0x20000 //BIT_17
-#define TAP_CAP 0x40000 //BIT_18
-#define TAP_STATUS_CAP 0x80000 //BIT_19
-#define TAP_STATUS_CHANGE_CAP 0x100000 //BIT_20
-#define TAP_DIS_CAP 0x200000 //BIT_21
-#define TAP_DIS_STATUS_CAP 0x400000 //BIT_22
-#define TAP_PWUP_ON_CAP 0x800000 //BIT_23
-#define TAP_PWUP_OFF_CAP 0x1000000 //BIT 24
-#define TAP_PWUP_CTL_CAP 0x2000000 //BIT 25
-#define NIC_CAP_NEG 0x4000000 //BIT 26
-#define TPL_CAP 0x8000000 //BIT 27
-#define DISC_CAP 0x10000000 //BIT 28
-#define DISC_DIS_CAP 0x20000000 //BIT 29
-#define DISC_PWUP_CTL_CAP 0x40000000 //BIT 30
+#define BP_CAP 0x01 /* BIT_0 */
+#define BP_STATUS_CAP 0x02 /* BIT_1 */
+#define BP_STATUS_CHANGE_CAP 0x04 /* BIT_2 */
+#define SW_CTL_CAP 0x08 /* BIT_3 */
+#define BP_DIS_CAP 0x10 /* BIT_4 */
+#define BP_DIS_STATUS_CAP 0x20 /* BIT_5 */
+#define STD_NIC_CAP 0x40 /* BIT_6 */
+#define BP_PWOFF_ON_CAP 0x80 /* BIT_7 */
+#define BP_PWOFF_OFF_CAP 0x0100 /* BIT_8 */
+#define BP_PWOFF_CTL_CAP 0x0200 /* BIT_9 */
+#define BP_PWUP_ON_CAP 0x0400 /* BIT_10 */
+#define BP_PWUP_OFF_CAP 0x0800 /* BIT_11 */
+#define BP_PWUP_CTL_CAP 0x1000 /* BIT_12 */
+#define WD_CTL_CAP 0x2000 /* BIT_13 */
+#define WD_STATUS_CAP 0x4000 /* BIT_14 */
+#define WD_TIMEOUT_CAP 0x8000 /* BIT_15 */
+#define TX_CTL_CAP 0x10000 /* BIT_16 */
+#define TX_STATUS_CAP 0x20000 /* BIT_17 */
+#define TAP_CAP 0x40000 /* BIT_18 */
+#define TAP_STATUS_CAP 0x80000 /* BIT_19 */
+#define TAP_STATUS_CHANGE_CAP 0x100000 /* BIT_20 */
+#define TAP_DIS_CAP 0x200000 /* BIT_21 */
+#define TAP_DIS_STATUS_CAP 0x400000 /* BIT_22 */
+#define TAP_PWUP_ON_CAP 0x800000 /* BIT_23 */
+#define TAP_PWUP_OFF_CAP 0x1000000 /* BIT 24 */
+#define TAP_PWUP_CTL_CAP 0x2000000 /* BIT 25 */
+#define NIC_CAP_NEG 0x4000000 /* BIT 26 */
+#define TPL_CAP 0x8000000 /* BIT 27 */
+#define DISC_CAP 0x10000000 /* BIT 28 */
+#define DISC_DIS_CAP 0x20000000 /* BIT 29 */
+#define DISC_PWUP_CTL_CAP 0x40000000 /* BIT 30 */
#define WD_MIN_TIME_MASK(val) (val & 0xf)
#define WD_STEP_COUNT_MASK(val) ((val & 0xf) << 5)
-#define WDT_STEP_TIME 0x10 //BIT_4
+#define WDT_STEP_TIME 0x10 /* BIT_4 */
#define WD_MIN_TIME_GET(desc) (desc & 0xf)
#define WD_STEP_COUNT_GET(desc) (desc>>5) & 0xf
diff --git a/drivers/staging/silicom/bypasslib/bplibk.h b/drivers/staging/silicom/bypasslib/bplibk.h
index d8c1d27650b4..c5c75c4fe055 100644
--- a/drivers/staging/silicom/bypasslib/bplibk.h
+++ b/drivers/staging/silicom/bypasslib/bplibk.h
@@ -24,15 +24,13 @@
#define INTEL_PEG4BPFII_PID 0x10a1
#define PEGII_IF_SERIES(vid, pid) \
- ((vid==0x8086)&& \
- ((pid==INTEL_PEG4BPII_PID)|| \
- (pid==INTEL_PEG4BPFII_PID)))
-
-#define EXPORT_SYMBOL_NOVERS EXPORT_SYMBOL
+ ((vid == 0x8086) && \
+ ((pid == INTEL_PEG4BPII_PID) || \
+ (pid == INTEL_PEG4BPFII_PID)))
#ifdef BP_VENDOR_SUPPORT
-char *bp_desc_array[] =
- { "e1000bp", "e1000bpe", "slcm5700", "bnx2xbp", "ixgbp", "ixgbpe", NULL };
+char *bp_desc_array[] = { "e1000bp", "e1000bpe", "slcm5700",
+ "bnx2xbp", "ixgbp", "ixgbpe", NULL };
#endif
#endif
diff --git a/drivers/staging/silicom/bypasslib/bypass.c b/drivers/staging/silicom/bypasslib/bypass.c
index 9ed250848e81..ba0d23a1cfbe 100644
--- a/drivers/staging/silicom/bypasslib/bypass.c
+++ b/drivers/staging/silicom/bypasslib/bypass.c
@@ -188,69 +188,82 @@ static int is_bypass(int if_index)
return is_bypass_dev(if_index);
return ret;
}
+EXPORT_SYMBOL(is_bypass);
static int get_bypass_slave(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_bypass_slave, GET_BYPASS_SLAVE, if_index);
}
+EXPORT_SYMBOL(get_bypass_slave);
static int get_bypass_caps(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_bypass_caps, GET_BYPASS_CAPS, if_index);
}
+EXPORT_SYMBOL(get_bypass_caps);
static int get_wd_set_caps(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_wd_set_caps, GET_WD_SET_CAPS, if_index);
}
+EXPORT_SYMBOL(get_wd_set_caps);
static int set_bypass(int if_index, int bypass_mode)
{
DO_BPLIB_SET_ARG_FN(set_bypass, SET_BYPASS, if_index, bypass_mode);
}
+EXPORT_SYMBOL(set_bypass);
static int get_bypass(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_bypass, GET_BYPASS, if_index);
}
+EXPORT_SYMBOL(get_bypass);
static int get_bypass_change(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_bypass_change, GET_BYPASS_CHANGE, if_index);
}
+EXPORT_SYMBOL(get_bypass_change);
static int set_dis_bypass(int if_index, int dis_bypass)
{
DO_BPLIB_SET_ARG_FN(set_dis_bypass, SET_DIS_BYPASS, if_index,
dis_bypass);
}
+EXPORT_SYMBOL(set_dis_bypass);
static int get_dis_bypass(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_dis_bypass, GET_DIS_BYPASS, if_index);
}
+EXPORT_SYMBOL(get_dis_bypass);
static int set_bypass_pwoff(int if_index, int bypass_mode)
{
DO_BPLIB_SET_ARG_FN(set_bypass_pwoff, SET_BYPASS_PWOFF, if_index,
bypass_mode);
}
+EXPORT_SYMBOL(set_bypass_pwoff);
static int get_bypass_pwoff(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_bypass_pwoff, GET_BYPASS_PWOFF, if_index);
}
+EXPORT_SYMBOL(get_bypass_pwoff);
static int set_bypass_pwup(int if_index, int bypass_mode)
{
DO_BPLIB_SET_ARG_FN(set_bypass_pwup, SET_BYPASS_PWUP, if_index,
bypass_mode);
}
+EXPORT_SYMBOL(set_bypass_pwup);
static int get_bypass_pwup(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_bypass_pwup, GET_BYPASS_PWUP, if_index);
}
+EXPORT_SYMBOL(get_bypass_pwup);
static int set_bypass_wd(int if_index, int ms_timeout, int *ms_timeout_set)
{
@@ -267,6 +280,7 @@ static int set_bypass_wd(int if_index, int ms_timeout, int *ms_timeout_set)
}
return ret;
}
+EXPORT_SYMBOL(set_bypass_wd);
static int get_bypass_wd(int if_index, int *ms_timeout_set)
{
@@ -278,6 +292,7 @@ static int get_bypass_wd(int if_index, int *ms_timeout_set)
ret = doit(GET_BYPASS_WD, if_index, data);
return ret;
}
+EXPORT_SYMBOL(get_bypass_wd);
static int get_wd_expire_time(int if_index, int *ms_time_left)
{
@@ -292,143 +307,171 @@ static int get_wd_expire_time(int if_index, int *ms_time_left)
}
return ret;
}
+EXPORT_SYMBOL(get_wd_expire_time);
static int reset_bypass_wd_timer(int if_index)
{
DO_BPLIB_GET_ARG_FN(reset_bypass_wd_timer, RESET_BYPASS_WD_TIMER,
if_index);
}
+EXPORT_SYMBOL(reset_bypass_wd_timer);
static int set_std_nic(int if_index, int bypass_mode)
{
DO_BPLIB_SET_ARG_FN(set_std_nic, SET_STD_NIC, if_index, bypass_mode);
}
+EXPORT_SYMBOL(set_std_nic);
static int get_std_nic(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_std_nic, GET_STD_NIC, if_index);
}
+EXPORT_SYMBOL(get_std_nic);
static int set_tx(int if_index, int tx_state)
{
DO_BPLIB_SET_ARG_FN(set_tx, SET_TX, if_index, tx_state);
}
+EXPORT_SYMBOL(set_tx);
static int get_tx(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_tx, GET_TX, if_index);
}
+EXPORT_SYMBOL(get_tx);
static int set_tap(int if_index, int tap_mode)
{
DO_BPLIB_SET_ARG_FN(set_tap, SET_TAP, if_index, tap_mode);
}
+EXPORT_SYMBOL(set_tap);
static int get_tap(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_tap, GET_TAP, if_index);
}
+EXPORT_SYMBOL(get_tap);
static int get_tap_change(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_tap_change, GET_TAP_CHANGE, if_index);
}
+EXPORT_SYMBOL(get_tap_change);
static int set_dis_tap(int if_index, int dis_tap)
{
DO_BPLIB_SET_ARG_FN(set_dis_tap, SET_DIS_TAP, if_index, dis_tap);
}
+EXPORT_SYMBOL(set_dis_tap);
static int get_dis_tap(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_dis_tap, GET_DIS_TAP, if_index);
}
+EXPORT_SYMBOL(get_dis_tap);
static int set_tap_pwup(int if_index, int tap_mode)
{
DO_BPLIB_SET_ARG_FN(set_tap_pwup, SET_TAP_PWUP, if_index, tap_mode);
}
+EXPORT_SYMBOL(set_tap_pwup);
static int get_tap_pwup(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_tap_pwup, GET_TAP_PWUP, if_index);
}
+EXPORT_SYMBOL(get_tap_pwup);
static int set_bp_disc(int if_index, int disc_mode)
{
DO_BPLIB_SET_ARG_FN(set_bp_disc, SET_DISC, if_index, disc_mode);
}
+EXPORT_SYMBOL(set_bp_disc);
static int get_bp_disc(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_bp_disc, GET_DISC, if_index);
}
+EXPORT_SYMBOL(get_bp_disc);
static int get_bp_disc_change(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_bp_disc_change, GET_DISC_CHANGE, if_index);
}
+EXPORT_SYMBOL(get_bp_disc_change);
static int set_bp_dis_disc(int if_index, int dis_disc)
{
DO_BPLIB_SET_ARG_FN(set_bp_dis_disc, SET_DIS_DISC, if_index, dis_disc);
}
+EXPORT_SYMBOL(set_bp_dis_disc);
static int get_bp_dis_disc(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_bp_dis_disc, GET_DIS_DISC, if_index);
}
+EXPORT_SYMBOL(get_bp_dis_disc);
static int set_bp_disc_pwup(int if_index, int disc_mode)
{
DO_BPLIB_SET_ARG_FN(set_bp_disc_pwup, SET_DISC_PWUP, if_index,
disc_mode);
}
+EXPORT_SYMBOL(set_bp_disc_pwup);
static int get_bp_disc_pwup(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_bp_disc_pwup, GET_DISC_PWUP, if_index);
}
+EXPORT_SYMBOL(get_bp_disc_pwup);
static int set_wd_exp_mode(int if_index, int mode)
{
DO_BPLIB_SET_ARG_FN(set_wd_exp_mode, SET_WD_EXP_MODE, if_index, mode);
}
+EXPORT_SYMBOL(set_wd_exp_mode);
static int get_wd_exp_mode(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_wd_exp_mode, GET_WD_EXP_MODE, if_index);
}
+EXPORT_SYMBOL(get_wd_exp_mode);
static int set_wd_autoreset(int if_index, int time)
{
DO_BPLIB_SET_ARG_FN(set_wd_autoreset, SET_WD_AUTORESET, if_index, time);
}
+EXPORT_SYMBOL(set_wd_autoreset);
static int get_wd_autoreset(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_wd_autoreset, GET_WD_AUTORESET, if_index);
}
+EXPORT_SYMBOL(get_wd_autoreset);
static int set_tpl(int if_index, int tpl_mode)
{
DO_BPLIB_SET_ARG_FN(set_tpl, SET_TPL, if_index, tpl_mode);
}
+EXPORT_SYMBOL(set_tpl);
static int get_tpl(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_tpl, GET_TPL, if_index);
}
+EXPORT_SYMBOL(get_tpl);
static int set_bp_hw_reset(int if_index, int mode)
{
DO_BPLIB_SET_ARG_FN(set_tpl, SET_BP_HW_RESET, if_index, mode);
}
+EXPORT_SYMBOL(set_bp_hw_reset);
static int get_bp_hw_reset(int if_index)
{
DO_BPLIB_GET_ARG_FN(get_tpl, GET_BP_HW_RESET, if_index);
}
+EXPORT_SYMBOL(get_bp_hw_reset);
static int get_bypass_info(int if_index, struct bp_info *bp_info)
{
@@ -467,6 +510,7 @@ static int get_bypass_info(int if_index, struct bp_info *bp_info)
}
return ret;
}
+EXPORT_SYMBOL(get_bypass_info);
int init_lib_module(void)
{
@@ -479,50 +523,5 @@ void cleanup_lib_module(void)
{
}
-EXPORT_SYMBOL_NOVERS(is_bypass);
-EXPORT_SYMBOL_NOVERS(get_bypass_slave);
-EXPORT_SYMBOL_NOVERS(get_bypass_caps);
-EXPORT_SYMBOL_NOVERS(get_wd_set_caps);
-EXPORT_SYMBOL_NOVERS(set_bypass);
-EXPORT_SYMBOL_NOVERS(get_bypass);
-EXPORT_SYMBOL_NOVERS(get_bypass_change);
-EXPORT_SYMBOL_NOVERS(set_dis_bypass);
-EXPORT_SYMBOL_NOVERS(get_dis_bypass);
-EXPORT_SYMBOL_NOVERS(set_bypass_pwoff);
-EXPORT_SYMBOL_NOVERS(get_bypass_pwoff);
-EXPORT_SYMBOL_NOVERS(set_bypass_pwup);
-EXPORT_SYMBOL_NOVERS(get_bypass_pwup);
-EXPORT_SYMBOL_NOVERS(set_bypass_wd);
-EXPORT_SYMBOL_NOVERS(get_bypass_wd);
-EXPORT_SYMBOL_NOVERS(get_wd_expire_time);
-EXPORT_SYMBOL_NOVERS(reset_bypass_wd_timer);
-EXPORT_SYMBOL_NOVERS(set_std_nic);
-EXPORT_SYMBOL_NOVERS(get_std_nic);
-EXPORT_SYMBOL_NOVERS(set_tx);
-EXPORT_SYMBOL_NOVERS(get_tx);
-EXPORT_SYMBOL_NOVERS(set_tap);
-EXPORT_SYMBOL_NOVERS(get_tap);
-EXPORT_SYMBOL_NOVERS(get_tap_change);
-EXPORT_SYMBOL_NOVERS(set_dis_tap);
-EXPORT_SYMBOL_NOVERS(get_dis_tap);
-EXPORT_SYMBOL_NOVERS(set_tap_pwup);
-EXPORT_SYMBOL_NOVERS(get_tap_pwup);
-EXPORT_SYMBOL_NOVERS(set_bp_disc);
-EXPORT_SYMBOL_NOVERS(get_bp_disc);
-EXPORT_SYMBOL_NOVERS(get_bp_disc_change);
-EXPORT_SYMBOL_NOVERS(set_bp_dis_disc);
-EXPORT_SYMBOL_NOVERS(get_bp_dis_disc);
-EXPORT_SYMBOL_NOVERS(set_bp_disc_pwup);
-EXPORT_SYMBOL_NOVERS(get_bp_disc_pwup);
-EXPORT_SYMBOL_NOVERS(set_wd_exp_mode);
-EXPORT_SYMBOL_NOVERS(get_wd_exp_mode);
-EXPORT_SYMBOL_NOVERS(set_wd_autoreset);
-EXPORT_SYMBOL_NOVERS(get_wd_autoreset);
-EXPORT_SYMBOL_NOVERS(set_tpl);
-EXPORT_SYMBOL_NOVERS(get_tpl);
-EXPORT_SYMBOL_NOVERS(set_bp_hw_reset);
-EXPORT_SYMBOL_NOVERS(get_bp_hw_reset);
-EXPORT_SYMBOL_NOVERS(get_bypass_info);
-
module_init(init_lib_module);
module_exit(cleanup_lib_module);
diff --git a/drivers/staging/slicoss/slicoss.c b/drivers/staging/slicoss/slicoss.c
index e4b82770ed39..869dcd3b385a 100644
--- a/drivers/staging/slicoss/slicoss.c
+++ b/drivers/staging/slicoss/slicoss.c
@@ -3651,17 +3651,20 @@ static int slic_entry_probe(struct pci_dev *pcidev,
if (!pci_set_dma_mask(pcidev, DMA_BIT_MASK(64))) {
pci_using_dac = 1;
- if (pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(64))) {
+ err = pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(64));
+ if (err) {
dev_err(&pcidev->dev, "unable to obtain 64-bit DMA for "
"consistent allocations\n");
goto err_out_disable_pci;
}
- } else if (pci_set_dma_mask(pcidev, DMA_BIT_MASK(32))) {
+ } else {
+ err = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pcidev->dev, "no usable DMA configuration\n");
+ goto err_out_disable_pci;
+ }
pci_using_dac = 0;
pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(32));
- } else {
- dev_err(&pcidev->dev, "no usable DMA configuration\n");
- goto err_out_disable_pci;
}
err = pci_request_regions(pcidev, DRV_NAME);
@@ -3696,6 +3699,7 @@ static int slic_entry_probe(struct pci_dev *pcidev,
if (!memmapped_ioaddr) {
dev_err(&pcidev->dev, "cannot remap MMIO region %lx @ %lx\n",
mmio_len, mmio_start);
+ err = -ENOMEM;
goto err_out_free_netdev;
}
@@ -3706,8 +3710,8 @@ static int slic_entry_probe(struct pci_dev *pcidev,
slic_init_adapter(netdev,
pcidev, pci_tbl_entry, memmapped_ioaddr, cards_found);
- status = slic_card_locate(adapter);
- if (status) {
+ err = slic_card_locate(adapter);
+ if (err) {
dev_err(&pcidev->dev, "cannot locate card\n");
goto err_out_free_mmio_region;
}
diff --git a/drivers/staging/speakup/Kconfig b/drivers/staging/speakup/Kconfig
index b416aceb13f2..8c3e7a60a9be 100644
--- a/drivers/staging/speakup/Kconfig
+++ b/drivers/staging/speakup/Kconfig
@@ -11,7 +11,7 @@ config SPEAKUP
point your browser at <http://www.linux-speakup.org/>.
There is also a mailing list at the above url that you
can subscribe to.
-
+
Supported synthesizers are accent sa, accent pc,
appollo II., Auddapter, Braille 'n Speak, Dectalk
external (old), Dectalk PC (full length isa board),
@@ -19,24 +19,24 @@ config SPEAKUP
Litetalk, Keynote Gold internal PC, software
synthesizers, Speakout, transport, and a dummy module
that can be used with a plain text terminal.
-
+
Speakup can either be built in or compiled as a module
by answering y or m. If you answer y here, then you
must answer either y or m to at least one of the
synthesizer drivers below. If you answer m here, then
the synthesizer drivers below can only be built as
modules.
-
+
These drivers are not standalone drivers, but must be
used in conjunction with Speakup. Think of them as
video cards for blind people.
-
-
+
+
The Dectalk pc driver can only be built as a module, and
requires software to be pre-loaded on to the card before
the module can be loaded. See the decpc choice below
for more details.
-
+
If you are not a blind person, or don't have access to
one of the listed synthesizers, you should say n.
@@ -84,7 +84,7 @@ config SPEAKUP_SYNTH_BNS
config SPEAKUP_SYNTH_DECTLK
tristate "DECtalk Express synthesizer support"
---help---
-
+
This is the Speakup driver for the DecTalk Express
synthesizer. You can say y to build it into the kernel,
or m to build it as a module. See the configuration
@@ -93,7 +93,7 @@ config SPEAKUP_SYNTH_DECTLK
config SPEAKUP_SYNTH_DECEXT
tristate "DECtalk External (old) synthesizer support"
---help---
-
+
This is the Speakup driver for the DecTalk External
(old) synthesizer. You can say y to build it into the
kernel, or m to build it as a module. See the
@@ -104,12 +104,12 @@ config SPEAKUP_SYNTH_DECPC
depends on m
tristate "DECtalk PC (big ISA card) synthesizer support"
---help---
-
+
This is the Speakup driver for the DecTalk PC (full
length ISA) synthesizer. You can say m to build it as
a module. See the configuration help on the Speakup
choice above for more info.
-
+
In order to use the DecTalk PC driver, you must download
the dec_pc.tgz file from linux-speakup.org. It is in
the pub/linux/goodies directory. The dec_pc.tgz file
@@ -118,14 +118,14 @@ config SPEAKUP_SYNTH_DECPC
This driver must be built as a module, and can not be
loaded until the file system is mounted and the DecTalk
PC software has been pre-loaded on to the board.
-
+
See the README file in the dec_pc.tgz file for more
details.
config SPEAKUP_SYNTH_DTLK
tristate "DoubleTalk PC synthesizer support"
---help---
-
+
This is the Speakup driver for the internal DoubleTalk
PC synthesizer. You can say y to build it into the
kernel, or m to build it as a module. See the
@@ -135,7 +135,7 @@ config SPEAKUP_SYNTH_DTLK
config SPEAKUP_SYNTH_KEYPC
tristate "Keynote Gold PC synthesizer support"
---help---
-
+
This is the Speakup driver for the Keynote Gold
PC synthesizer. You can say y to build it into the
kernel, or m to build it as a module. See the
@@ -166,7 +166,7 @@ config SPEAKUP_SYNTH_SOFT
config SPEAKUP_SYNTH_SPKOUT
tristate "Speak Out synthesizer support"
---help---
-
+
This is the Speakup driver for the Speakout synthesizer.
You can say y to build it into the kernel, or m to
build it as a module. See the configuration help on the
@@ -175,7 +175,7 @@ config SPEAKUP_SYNTH_SPKOUT
config SPEAKUP_SYNTH_TXPRT
tristate "Transport synthesizer support"
---help---
-
+
This is the Speakup driver for the Transport
synthesizer. You can say y to build it into the kernel,
or m to build it as a module. See the configuration
@@ -184,7 +184,7 @@ config SPEAKUP_SYNTH_TXPRT
config SPEAKUP_SYNTH_DUMMY
tristate "Dummy synthesizer driver (for testing)"
---help---
-
+
This is a dummy Speakup driver for plugging a mere serial
terminal. This is handy if you want to test speakup but
don't have the hardware. You can say y to build it into
diff --git a/drivers/staging/speakup/devsynth.c b/drivers/staging/speakup/devsynth.c
index 940769ef883f..71c728acf4ca 100644
--- a/drivers/staging/speakup/devsynth.c
+++ b/drivers/staging/speakup/devsynth.c
@@ -13,11 +13,11 @@
static int misc_registered;
static int dev_opened;
-static ssize_t speakup_file_write(struct file *fp, const char *buffer,
- size_t nbytes, loff_t *ppos)
+static ssize_t speakup_file_write(struct file *fp, const char __user *buffer,
+ size_t nbytes, loff_t *ppos)
{
size_t count = nbytes;
- const char *ptr = buffer;
+ const char __user *ptr = buffer;
size_t bytes;
unsigned long flags;
u_char buf[256];
@@ -30,15 +30,15 @@ static ssize_t speakup_file_write(struct file *fp, const char *buffer,
return -EFAULT;
count -= bytes;
ptr += bytes;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
synth_write(buf, bytes);
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
}
return (ssize_t) nbytes;
}
-static ssize_t speakup_file_read(struct file *fp, char *buf, size_t nbytes,
- loff_t *ppos)
+static ssize_t speakup_file_read(struct file *fp, char __user *buf,
+ size_t nbytes, loff_t *ppos)
{
return 0;
}
diff --git a/drivers/staging/speakup/i18n.c b/drivers/staging/speakup/i18n.c
index 2add1fcfd122..9ea16c5b4d6c 100644
--- a/drivers/staging/speakup/i18n.c
+++ b/drivers/staging/speakup/i18n.c
@@ -558,11 +558,11 @@ ssize_t spk_msg_set(enum msg_index_t index, char *text, size_t length)
kfree(newstr);
return -EINVAL;
}
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (speakup_msgs[index] != speakup_default_msgs[index])
kfree(speakup_msgs[index]);
speakup_msgs[index] = newstr;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
} else {
rc = -ENOMEM;
}
@@ -595,14 +595,14 @@ void spk_reset_msg_group(struct msg_group_t *group)
unsigned long flags;
enum msg_index_t i;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
for (i = group->start; i <= group->end; i++) {
if (speakup_msgs[i] != speakup_default_msgs[i])
kfree(speakup_msgs[i]);
speakup_msgs[i] = speakup_default_msgs[i];
}
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
}
/* Called at initialization time, to establish default messages. */
@@ -618,12 +618,12 @@ void spk_free_user_msgs(void)
enum msg_index_t index;
unsigned long flags;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
for (index = MSG_FIRST_INDEX; index < MSG_LAST_INDEX; index++) {
if (speakup_msgs[index] != speakup_default_msgs[index]) {
kfree(speakup_msgs[index]);
speakup_msgs[index] = speakup_default_msgs[index];
}
}
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
}
diff --git a/drivers/staging/speakup/kobjects.c b/drivers/staging/speakup/kobjects.c
index 943b6c134a22..51bdea3a5bea 100644
--- a/drivers/staging/speakup/kobjects.c
+++ b/drivers/staging/speakup/kobjects.c
@@ -35,7 +35,7 @@ static ssize_t chars_chartab_show(struct kobject *kobj,
size_t bufsize = PAGE_SIZE;
unsigned long flags;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
*buf_pointer = '\0';
for (i = 0; i < 256; i++) {
if (bufsize <= 1)
@@ -70,7 +70,7 @@ static ssize_t chars_chartab_show(struct kobject *kobj,
bufsize -= len;
buf_pointer += len;
}
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return buf_pointer - buf;
}
@@ -127,7 +127,7 @@ static ssize_t chars_chartab_store(struct kobject *kobj,
size_t desc_length = 0;
int i;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
while (cp < end) {
while ((cp < end) && (*cp == ' ' || *cp == '\t'))
@@ -212,7 +212,7 @@ static ssize_t chars_chartab_store(struct kobject *kobj,
spk_reset_default_chartab();
}
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
report_char_chartab_status(reset, received, used, rejected,
do_characters);
return retval;
@@ -232,7 +232,7 @@ static ssize_t keymap_show(struct kobject *kobj, struct kobj_attribute *attr,
u_char *cp1;
u_char ch;
unsigned long flags;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
cp1 = spk_key_buf + SHIFT_TBL_SIZE;
num_keys = (int)(*cp1);
nstates = (int)cp1[1];
@@ -248,7 +248,7 @@ static ssize_t keymap_show(struct kobject *kobj, struct kobj_attribute *attr,
}
}
cp += sprintf(cp, "0, %d\n", KEY_MAP_VER);
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return (int)(cp-buf);
}
@@ -265,17 +265,17 @@ static ssize_t keymap_store(struct kobject *kobj, struct kobj_attribute *attr,
u_char *cp1;
unsigned long flags;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
in_buff = kmemdup(buf, count + 1, GFP_ATOMIC);
if (!in_buff) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return -ENOMEM;
}
if (strchr("dDrR", *in_buff)) {
spk_set_key_info(spk_key_defaults, spk_key_buf);
pr_info("keymap set to default values\n");
kfree(in_buff);
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return count;
}
if (in_buff[count - 1] == '\n')
@@ -294,7 +294,7 @@ static ssize_t keymap_store(struct kobject *kobj, struct kobj_attribute *attr,
pr_warn("i %d %d %d %d\n", i,
(int)cp1[-3], (int)cp1[-2], (int)cp1[-1]);
kfree(in_buff);
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return -EINVAL;
}
while (--i >= 0) {
@@ -315,7 +315,7 @@ static ssize_t keymap_store(struct kobject *kobj, struct kobj_attribute *attr,
}
}
kfree(in_buff);
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return ret;
}
@@ -341,7 +341,7 @@ static ssize_t silent_store(struct kobject *kobj, struct kobj_attribute *attr,
pr_warn("silent value '%c' not in range (0,7)\n", ch);
return -EINVAL;
}
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (ch&2) {
shut = 1;
spk_do_flush();
@@ -354,7 +354,7 @@ static ssize_t silent_store(struct kobject *kobj, struct kobj_attribute *attr,
spk_shut_up |= shut;
else
spk_shut_up &= ~shut;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return count;
}
@@ -470,7 +470,7 @@ static ssize_t punc_show(struct kobject *kobj, struct kobj_attribute *attr,
return -EINVAL;
}
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
pb = (struct st_bits_data *) &spk_punc_info[var->value];
mask = pb->mask;
for (i = 33; i < 128; i++) {
@@ -478,7 +478,7 @@ static ssize_t punc_show(struct kobject *kobj, struct kobj_attribute *attr,
continue;
*cp++ = (char)i;
}
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return cp-buf;
}
@@ -518,14 +518,14 @@ static ssize_t punc_store(struct kobject *kobj, struct kobj_attribute *attr,
x--;
punc_buf[x] = '\0';
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (*punc_buf == 'd' || *punc_buf == 'r')
- x = spk_set_mask_bits(0, var->value, 3);
+ x = spk_set_mask_bits(NULL, var->value, 3);
else
x = spk_set_mask_bits(punc_buf, var->value, 3);
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return count;
}
@@ -547,7 +547,7 @@ ssize_t spk_var_show(struct kobject *kobj, struct kobj_attribute *attr,
if (param == NULL)
return -EINVAL;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
var = (struct var_t *) param->data;
switch (param->var_type) {
case VAR_NUM:
@@ -580,7 +580,7 @@ ssize_t spk_var_show(struct kobject *kobj, struct kobj_attribute *attr,
param->name, param->var_type);
break;
}
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return rv;
}
EXPORT_SYMBOL_GPL(spk_var_show);
@@ -609,7 +609,7 @@ ssize_t spk_var_store(struct kobject *kobj, struct kobj_attribute *attr,
cp = (char *)buf;
string_unescape_any_inplace(cp);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
switch (param->var_type) {
case VAR_NUM:
case VAR_TIME:
@@ -670,7 +670,7 @@ ssize_t spk_var_store(struct kobject *kobj, struct kobj_attribute *attr,
}
}
}
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (ret == -ERESTART)
pr_info("%s reset to default value\n", attr->attr.name);
@@ -818,9 +818,9 @@ static ssize_t message_show(struct kobject *kobj,
unsigned long flags;
BUG_ON(!group);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
retval = message_show_helper(buf, group->start, group->end);
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return retval;
}
diff --git a/drivers/staging/speakup/main.c b/drivers/staging/speakup/main.c
index 6c7b55c2947d..14079c4949a8 100644
--- a/drivers/staging/speakup/main.c
+++ b/drivers/staging/speakup/main.c
@@ -95,7 +95,8 @@ const struct st_bits_data spk_punc_info[] = {
static char mark_cut_flag;
#define MAX_KEY 160
-u_char *spk_our_keys[MAX_KEY], *spk_shift_table;
+static u_char *spk_shift_table;
+u_char *spk_our_keys[MAX_KEY];
u_char spk_key_buf[600];
const u_char spk_key_defaults[] = {
#include "speakupmap.h"
@@ -457,7 +458,7 @@ static void speak_char(u_char ch)
synth_buffer_add(SPACE);
}
-static u16 get_char(struct vc_data *vc, u16 * pos, u_char * attribs)
+static u16 get_char(struct vc_data *vc, u16 *pos, u_char *attribs)
{
u16 ch = ' ';
if (vc && pos) {
@@ -1129,7 +1130,7 @@ static void do_handle_shift(struct vc_data *vc, u_char value, char up_flag)
unsigned long flags;
if (synth == NULL || up_flag || spk_killed)
return;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (cursor_track == read_all_mode) {
switch (value) {
case KVAL(K_SHIFT):
@@ -1151,20 +1152,20 @@ static void do_handle_shift(struct vc_data *vc, u_char value, char up_flag)
}
if (spk_say_ctrl && value < NUM_CTL_LABELS)
synth_printf("%s", spk_msg_get(MSG_CTL_START + value));
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
}
static void do_handle_latin(struct vc_data *vc, u_char value, char up_flag)
{
unsigned long flags;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (up_flag) {
spk_lastkey = spk_keydown = 0;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return;
}
if (synth == NULL || spk_killed) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return;
}
spk_shut_up &= 0xfe;
@@ -1173,7 +1174,7 @@ static void do_handle_latin(struct vc_data *vc, u_char value, char up_flag)
spk_parked &= 0xfe;
if (spk_key_echo == 2 && value >= MINECHOCHAR)
speak_char(value);
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
}
int spk_set_key_info(const u_char *key_info, u_char *k_buffer)
@@ -1282,7 +1283,7 @@ static int edit_bits(struct vc_data *vc, u_char type, u_char ch, u_short key)
}
/* Allocation concurrency is protected by the console semaphore */
-int speakup_allocate(struct vc_data *vc)
+static int speakup_allocate(struct vc_data *vc)
{
int vc_num;
@@ -1299,7 +1300,7 @@ int speakup_allocate(struct vc_data *vc)
return 0;
}
-void speakup_deallocate(struct vc_data *vc)
+static void speakup_deallocate(struct vc_data *vc)
{
int vc_num;
@@ -1449,21 +1450,21 @@ static void handle_cursor_read_all(struct vc_data *vc, int command)
static int pre_handle_cursor(struct vc_data *vc, u_char value, char up_flag)
{
unsigned long flags;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (cursor_track == read_all_mode) {
spk_parked &= 0xfe;
if (synth == NULL || up_flag || spk_shut_up) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return NOTIFY_STOP;
}
del_timer(&cursor_timer);
spk_shut_up &= 0xfe;
spk_do_flush();
start_read_all_timer(vc, value + 1);
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return NOTIFY_STOP;
}
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return NOTIFY_OK;
}
@@ -1472,10 +1473,10 @@ static void do_handle_cursor(struct vc_data *vc, u_char value, char up_flag)
unsigned long flags;
struct var_t *cursor_timeout;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
spk_parked &= 0xfe;
if (synth == NULL || up_flag || spk_shut_up || cursor_track == CT_Off) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return;
}
spk_shut_up &= 0xfe;
@@ -1494,7 +1495,7 @@ static void do_handle_cursor(struct vc_data *vc, u_char value, char up_flag)
cursor_timeout = spk_get_var(CURSOR_TIME);
mod_timer(&cursor_timer,
jiffies + msecs_to_jiffies(cursor_timeout->u.n.value));
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
}
static void update_color_buffer(struct vc_data *vc, const char *ic, int len)
@@ -1619,7 +1620,7 @@ static void cursor_done(u_long data)
struct vc_data *vc = vc_cons[cursor_con].d;
unsigned long flags;
del_timer(&cursor_timer);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (cursor_con != fg_console) {
is_cursor = 0;
goto out;
@@ -1650,7 +1651,7 @@ static void cursor_done(u_long data)
say_char(vc);
spk_keydown = is_cursor = 0;
out:
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
}
/* called by: vt_notifier_call() */
@@ -1659,13 +1660,13 @@ static void speakup_bs(struct vc_data *vc)
unsigned long flags;
if (!speakup_console[vc->vc_num])
return;
- if (!spk_trylock(flags))
+ if (!spin_trylock_irqsave(&speakup_info.spinlock, flags))
/* Speakup output, discard */
return;
if (!spk_parked)
speakup_date(vc);
if (spk_shut_up || synth == NULL) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return;
}
if (vc->vc_num == fg_console && spk_keydown) {
@@ -1673,7 +1674,7 @@ static void speakup_bs(struct vc_data *vc)
if (!is_cursor)
say_char(vc);
}
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
}
/* called by: vt_notifier_call() */
@@ -1682,7 +1683,7 @@ static void speakup_con_write(struct vc_data *vc, const char *str, int len)
unsigned long flags;
if ((vc->vc_num != fg_console) || spk_shut_up || synth == NULL)
return;
- if (!spk_trylock(flags))
+ if (!spin_trylock_irqsave(&speakup_info.spinlock, flags))
/* Speakup output, discard */
return;
if (spk_bell_pos && spk_keydown && (vc->vc_x == spk_bell_pos - 1))
@@ -1690,31 +1691,31 @@ static void speakup_con_write(struct vc_data *vc, const char *str, int len)
if ((is_cursor) || (cursor_track == read_all_mode)) {
if (cursor_track == CT_Highlight)
update_color_buffer(vc, str, len);
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return;
}
if (win_enabled) {
if (vc->vc_x >= win_left && vc->vc_x <= win_right &&
vc->vc_y >= win_top && vc->vc_y <= win_bottom) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return;
}
}
spkup_write(str, len);
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
}
-void speakup_con_update(struct vc_data *vc)
+static void speakup_con_update(struct vc_data *vc)
{
unsigned long flags;
if (speakup_console[vc->vc_num] == NULL || spk_parked)
return;
- if (!spk_trylock(flags))
+ if (!spin_trylock_irqsave(&speakup_info.spinlock, flags))
/* Speakup output, discard */
return;
speakup_date(vc);
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
}
static void do_handle_spec(struct vc_data *vc, u_char value, char up_flag)
@@ -1724,7 +1725,7 @@ static void do_handle_spec(struct vc_data *vc, u_char value, char up_flag)
char *label;
if (synth == NULL || up_flag || spk_killed)
return;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
spk_shut_up &= 0xfe;
if (spk_no_intr)
spk_do_flush();
@@ -1745,13 +1746,13 @@ static void do_handle_spec(struct vc_data *vc, u_char value, char up_flag)
break;
default:
spk_parked &= 0xfe;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return;
}
if (on_off < 2)
synth_printf("%s %s\n",
label, spk_msg_get(MSG_STATUS_START + on_off));
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
}
static int inc_dec_var(u_char value)
@@ -1892,7 +1893,7 @@ oops:
spk_special_handler = NULL;
return 1;
}
- go_pos = simple_strtol(goto_buf, &cp, 10);
+ go_pos = kstrtol(goto_buf, 10, (long *)&cp);
goto_pos = (u_long) go_pos;
if (*cp == 'x') {
if (*goto_buf < '0')
@@ -1964,7 +1965,7 @@ static void speakup_lock(struct vc_data *vc)
}
typedef void (*spkup_hand) (struct vc_data *);
-spkup_hand spkup_handler[] = {
+static spkup_hand spkup_handler[] = {
/* must be ordered same as defines in speakup.h */
do_nothing, speakup_goto, speech_kill, speakup_shut_up,
speakup_cut, speakup_paste, say_first_char, say_last_char,
@@ -2002,7 +2003,7 @@ static void do_spkup(struct vc_data *vc, u_char value)
static const char *pad_chars = "0123456789+-*/\015,.?()";
-int
+static int
speakup_key(struct vc_data *vc, int shift_state, int keycode, u_short keysym,
int up_flag)
{
@@ -2015,7 +2016,7 @@ speakup_key(struct vc_data *vc, int shift_state, int keycode, u_short keysym,
if (synth == NULL)
return 0;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
tty = vc->port.tty;
if (type >= 0xf0)
type -= 0xf0;
@@ -2033,7 +2034,7 @@ speakup_key(struct vc_data *vc, int shift_state, int keycode, u_short keysym,
if (keycode >= MAX_KEY)
goto no_map;
key_info = spk_our_keys[keycode];
- if (key_info == 0)
+ if (!key_info)
goto no_map;
/* Check valid read all mode keys */
if ((cursor_track == read_all_mode) && (!up_flag)) {
@@ -2114,7 +2115,7 @@ no_map:
}
last_keycode = 0;
out:
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return ret;
}
@@ -2265,7 +2266,7 @@ static int __init speakup_init(void)
(var->var_id >= 0) && (var->var_id < MAXVARS); var++)
speakup_register_var(var);
for (i = 1; spk_punc_info[i].mask != 0; i++)
- spk_set_mask_bits(0, i, 2);
+ spk_set_mask_bits(NULL, i, 2);
spk_set_key_info(spk_key_defaults, spk_key_buf);
diff --git a/drivers/staging/speakup/serialio.c b/drivers/staging/speakup/serialio.c
index e4d27aa2898f..135428856d47 100644
--- a/drivers/staging/speakup/serialio.c
+++ b/drivers/staging/speakup/serialio.c
@@ -79,7 +79,7 @@ static irqreturn_t synth_readbuf_handler(int irq, void *dev_id)
/*printk(KERN_ERR "in irq\n"); */
/*pr_warn("in IRQ\n"); */
int c;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
while (inb_p(speakup_info.port_tts + UART_LSR) & UART_LSR_DR) {
c = inb_p(speakup_info.port_tts+UART_RX);
@@ -87,7 +87,7 @@ static irqreturn_t synth_readbuf_handler(int irq, void *dev_id)
/*printk(KERN_ERR "c = %d\n", c); */
/*pr_warn("C = %d\n", c); */
}
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return IRQ_HANDLED;
}
diff --git a/drivers/staging/speakup/speakup_acntpc.c b/drivers/staging/speakup/speakup_acntpc.c
index 1c1f0d560449..80141aca712f 100644
--- a/drivers/staging/speakup/speakup_acntpc.c
+++ b/drivers/staging/speakup/speakup_acntpc.c
@@ -166,7 +166,7 @@ static const char *synth_immediate(struct spk_synth *synth, const char *buf)
outb_p(ch, speakup_info.port_tts);
buf++;
}
- return 0;
+ return NULL;
}
static void do_catch_up(struct spk_synth *synth)
@@ -186,26 +186,26 @@ static void do_catch_up(struct spk_synth *synth)
delay_time = spk_get_var(DELAY);
full_time = spk_get_var(FULL);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
jiff_max = jiffies + jiffy_delta_val;
while (!kthread_should_stop()) {
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (speakup_info.flushing) {
speakup_info.flushing = 0;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
synth->flush(synth);
continue;
}
if (synth_buffer_empty()) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
break;
}
set_current_state(TASK_INTERRUPTIBLE);
full_time_val = full_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (synth_full()) {
schedule_timeout(msecs_to_jiffies(full_time_val));
continue;
@@ -217,9 +217,9 @@ static void do_catch_up(struct spk_synth *synth)
break;
udelay(1);
}
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
ch = synth_buffer_getc();
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (ch == '\n')
ch = PROCSPEECH;
outb_p(ch, speakup_info.port_tts);
@@ -231,10 +231,10 @@ static void do_catch_up(struct spk_synth *synth)
udelay(1);
}
outb_p(PROCSPEECH, speakup_info.port_tts);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
delay_time_val = delay_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
schedule_timeout(msecs_to_jiffies(delay_time_val));
jiff_max = jiffies+jiffy_delta_val;
}
diff --git a/drivers/staging/speakup/speakup_apollo.c b/drivers/staging/speakup/speakup_apollo.c
index 3e450ccbda66..95d3132f0a35 100644
--- a/drivers/staging/speakup/speakup_apollo.c
+++ b/drivers/staging/speakup/speakup_apollo.c
@@ -148,30 +148,30 @@ static void do_catch_up(struct spk_synth *synth)
jiffy_delta = spk_get_var(JIFFY);
delay_time = spk_get_var(DELAY);
full_time = spk_get_var(FULL);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
jiff_max = jiffies + jiffy_delta_val;
while (!kthread_should_stop()) {
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
full_time_val = full_time->u.n.value;
delay_time_val = delay_time->u.n.value;
if (speakup_info.flushing) {
speakup_info.flushing = 0;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
synth->flush(synth);
continue;
}
if (synth_buffer_empty()) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
break;
}
ch = synth_buffer_peek();
set_current_state(TASK_INTERRUPTIBLE);
full_time_val = full_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (!spk_serial_out(ch)) {
outb(UART_MCR_DTR, speakup_info.port_tts + UART_MCR);
outb(UART_MCR_DTR | UART_MCR_RTS,
@@ -180,11 +180,11 @@ static void do_catch_up(struct spk_synth *synth)
continue;
}
if ((jiffies >= jiff_max) && (ch == SPACE)) {
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
full_time_val = full_time->u.n.value;
delay_time_val = delay_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (spk_serial_out(synth->procspeech))
schedule_timeout(msecs_to_jiffies
(delay_time_val));
@@ -194,9 +194,9 @@ static void do_catch_up(struct spk_synth *synth)
jiff_max = jiffies + jiffy_delta_val;
}
set_current_state(TASK_RUNNING);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
synth_buffer_getc();
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
}
spk_serial_out(PROCSPEECH);
}
diff --git a/drivers/staging/speakup/speakup_decext.c b/drivers/staging/speakup/speakup_decext.c
index d39a0de286fb..d306e010d3ea 100644
--- a/drivers/staging/speakup/speakup_decext.c
+++ b/drivers/staging/speakup/speakup_decext.c
@@ -165,27 +165,27 @@ static void do_catch_up(struct spk_synth *synth)
jiffy_delta = spk_get_var(JIFFY);
delay_time = spk_get_var(DELAY);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
jiff_max = jiffies + jiffy_delta_val;
while (!kthread_should_stop()) {
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (speakup_info.flushing) {
speakup_info.flushing = 0;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
synth->flush(synth);
continue;
}
if (synth_buffer_empty()) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
break;
}
ch = synth_buffer_peek();
set_current_state(TASK_INTERRUPTIBLE);
delay_time_val = delay_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (ch == '\n')
ch = 0x0D;
if (synth_full() || !spk_serial_out(ch)) {
@@ -193,9 +193,9 @@ static void do_catch_up(struct spk_synth *synth)
continue;
}
set_current_state(TASK_RUNNING);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
synth_buffer_getc();
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (ch == '[')
in_escape = 1;
else if (ch == ']')
@@ -206,10 +206,10 @@ static void do_catch_up(struct spk_synth *synth)
if (jiffies >= jiff_max) {
if (!in_escape)
spk_serial_out(PROCSPEECH);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
delay_time_val = delay_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
schedule_timeout(msecs_to_jiffies
(delay_time_val));
jiff_max = jiffies + jiffy_delta_val;
diff --git a/drivers/staging/speakup/speakup_decpc.c b/drivers/staging/speakup/speakup_decpc.c
index 6c88b55bdac8..ea6b72d40b31 100644
--- a/drivers/staging/speakup/speakup_decpc.c
+++ b/drivers/staging/speakup/speakup_decpc.c
@@ -377,27 +377,27 @@ static void do_catch_up(struct spk_synth *synth)
jiffy_delta = spk_get_var(JIFFY);
delay_time = spk_get_var(DELAY);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
jiff_max = jiffies + jiffy_delta_val;
while (!kthread_should_stop()) {
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (speakup_info.flushing) {
speakup_info.flushing = 0;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
synth->flush(synth);
continue;
}
if (synth_buffer_empty()) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
break;
}
ch = synth_buffer_peek();
set_current_state(TASK_INTERRUPTIBLE);
delay_time_val = delay_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (ch == '\n')
ch = 0x0D;
if (dt_sendchar(ch)) {
@@ -405,9 +405,9 @@ static void do_catch_up(struct spk_synth *synth)
continue;
}
set_current_state(TASK_RUNNING);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
synth_buffer_getc();
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (ch == '[')
in_escape = 1;
else if (ch == ']')
@@ -418,10 +418,10 @@ static void do_catch_up(struct spk_synth *synth)
if (jiffies >= jiff_max) {
if (!in_escape)
dt_sendchar(PROCSPEECH);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
delay_time_val = delay_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
schedule_timeout(msecs_to_jiffies
(delay_time_val));
jiff_max = jiffies + jiffy_delta_val;
@@ -444,7 +444,7 @@ static const char *synth_immediate(struct spk_synth *synth, const char *buf)
return buf;
buf++;
}
- return 0;
+ return NULL;
}
static int synth_probe(struct spk_synth *synth)
diff --git a/drivers/staging/speakup/speakup_dectlk.c b/drivers/staging/speakup/speakup_dectlk.c
index 0dd2eb96cb28..15fdec323a70 100644
--- a/drivers/staging/speakup/speakup_dectlk.c
+++ b/drivers/staging/speakup/speakup_dectlk.c
@@ -216,9 +216,9 @@ static void do_catch_up(struct spk_synth *synth)
jiffy_delta = spk_get_var(JIFFY);
delay_time = spk_get_var(DELAY);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
jiff_max = jiffies + jiffy_delta_val;
while (!kthread_should_stop()) {
@@ -234,22 +234,22 @@ static void do_catch_up(struct spk_synth *synth)
is_flushing = 0;
spin_unlock_irqrestore(&flush_lock, flags);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (speakup_info.flushing) {
speakup_info.flushing = 0;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
synth->flush(synth);
continue;
}
if (synth_buffer_empty()) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
break;
}
ch = synth_buffer_peek();
set_current_state(TASK_INTERRUPTIBLE);
delay_time_val = delay_time->u.n.value;
synth_full_val = synth_full();
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (ch == '\n')
ch = 0x0D;
if (synth_full_val || !spk_serial_out(ch)) {
@@ -257,9 +257,9 @@ static void do_catch_up(struct spk_synth *synth)
continue;
}
set_current_state(TASK_RUNNING);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
synth_buffer_getc();
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (ch == '[')
in_escape = 1;
else if (ch == ']')
@@ -270,10 +270,10 @@ static void do_catch_up(struct spk_synth *synth)
if (jiffies >= jiff_max) {
if (!in_escape)
spk_serial_out(PROCSPEECH);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
delay_time_val = delay_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
schedule_timeout(msecs_to_jiffies
(delay_time_val));
jiff_max = jiffies + jiffy_delta_val;
diff --git a/drivers/staging/speakup/speakup_dtlk.c b/drivers/staging/speakup/speakup_dtlk.c
index a9cefbd3ea93..1feb0fba1b43 100644
--- a/drivers/staging/speakup/speakup_dtlk.c
+++ b/drivers/staging/speakup/speakup_dtlk.c
@@ -200,42 +200,42 @@ static void do_catch_up(struct spk_synth *synth)
jiffy_delta = spk_get_var(JIFFY);
delay_time = spk_get_var(DELAY);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
jiff_max = jiffies + jiffy_delta_val;
while (!kthread_should_stop()) {
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (speakup_info.flushing) {
speakup_info.flushing = 0;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
synth->flush(synth);
continue;
}
if (synth_buffer_empty()) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
break;
}
set_current_state(TASK_INTERRUPTIBLE);
delay_time_val = delay_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (synth_full()) {
schedule_timeout(msecs_to_jiffies(delay_time_val));
continue;
}
set_current_state(TASK_RUNNING);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
ch = synth_buffer_getc();
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (ch == '\n')
ch = PROCSPEECH;
spk_out(ch);
if ((jiffies >= jiff_max) && (ch == SPACE)) {
spk_out(PROCSPEECH);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
delay_time_val = delay_time->u.n.value;
jiffy_delta_val = jiffy_delta->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
schedule_timeout(msecs_to_jiffies(delay_time_val));
jiff_max = jiffies + jiffy_delta_val;
}
@@ -254,7 +254,7 @@ static const char *synth_immediate(struct spk_synth *synth, const char *buf)
spk_out(ch);
buf++;
}
- return 0;
+ return NULL;
}
static void synth_flush(struct spk_synth *synth)
diff --git a/drivers/staging/speakup/speakup_keypc.c b/drivers/staging/speakup/speakup_keypc.c
index feb5f22cc169..2f2fe5eeff63 100644
--- a/drivers/staging/speakup/speakup_keypc.c
+++ b/drivers/staging/speakup/speakup_keypc.c
@@ -168,7 +168,7 @@ static const char *synth_immediate(struct spk_synth *synth, const char *buf)
udelay(70);
buf++;
}
- return 0;
+ return NULL;
}
static void do_catch_up(struct spk_synth *synth)
@@ -187,26 +187,26 @@ static void do_catch_up(struct spk_synth *synth)
jiffy_delta = spk_get_var(JIFFY);
delay_time = spk_get_var(DELAY);
full_time = spk_get_var(FULL);
-spk_lock(flags);
+spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
jiff_max = jiffies + jiffy_delta_val;
while (!kthread_should_stop()) {
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (speakup_info.flushing) {
speakup_info.flushing = 0;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
synth->flush(synth);
continue;
}
if (synth_buffer_empty()) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
break;
}
set_current_state(TASK_INTERRUPTIBLE);
full_time_val = full_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (synth_full()) {
schedule_timeout(msecs_to_jiffies(full_time_val));
continue;
@@ -220,9 +220,9 @@ spk_lock(flags);
oops();
break;
}
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
ch = synth_buffer_getc();
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (ch == '\n')
ch = PROCSPEECH;
outb_p(ch, synth_port);
@@ -237,10 +237,10 @@ spk_lock(flags);
break;
}
outb_p(PROCSPEECH, synth_port);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
delay_time_val = delay_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
schedule_timeout(msecs_to_jiffies(delay_time_val));
jiff_max = jiffies+jiffy_delta_val;
}
diff --git a/drivers/staging/speakup/speakup_soft.c b/drivers/staging/speakup/speakup_soft.c
index e2f5c81e7548..243c3d52fe5e 100644
--- a/drivers/staging/speakup/speakup_soft.c
+++ b/drivers/staging/speakup/speakup_soft.c
@@ -179,45 +179,45 @@ static int softsynth_open(struct inode *inode, struct file *fp)
unsigned long flags;
/*if ((fp->f_flags & O_ACCMODE) != O_RDONLY) */
/* return -EPERM; */
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (synth_soft.alive) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return -EBUSY;
}
synth_soft.alive = 1;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return 0;
}
static int softsynth_close(struct inode *inode, struct file *fp)
{
unsigned long flags;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
synth_soft.alive = 0;
init_pos = 0;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
/* Make sure we let applications go before leaving */
speakup_start_ttys();
return 0;
}
-static ssize_t softsynth_read(struct file *fp, char *buf, size_t count,
+static ssize_t softsynth_read(struct file *fp, char __user *buf, size_t count,
loff_t *pos)
{
int chars_sent = 0;
- char *cp;
+ char __user *cp;
char *init;
char ch;
int empty;
unsigned long flags;
DEFINE_WAIT(wait);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
while (1) {
prepare_to_wait(&speakup_event, &wait, TASK_INTERRUPTIBLE);
if (!synth_buffer_empty() || speakup_info.flushing)
break;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (fp->f_flags & O_NONBLOCK) {
finish_wait(&speakup_event, &wait);
return -EAGAIN;
@@ -227,7 +227,7 @@ static ssize_t softsynth_read(struct file *fp, char *buf, size_t count,
return -ERESTARTSYS;
}
schedule();
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
}
finish_wait(&speakup_event, &wait);
@@ -244,16 +244,16 @@ static ssize_t softsynth_read(struct file *fp, char *buf, size_t count,
} else {
ch = synth_buffer_getc();
}
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (copy_to_user(cp, &ch, 1))
return -EFAULT;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
chars_sent++;
cp++;
}
*pos += chars_sent;
empty = synth_buffer_empty();
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (empty) {
speakup_start_ttys();
*pos = 0;
@@ -263,8 +263,8 @@ static ssize_t softsynth_read(struct file *fp, char *buf, size_t count,
static int last_index;
-static ssize_t softsynth_write(struct file *fp, const char *buf, size_t count,
- loff_t *pos)
+static ssize_t softsynth_write(struct file *fp, const char __user *buf,
+ size_t count, loff_t *pos)
{
unsigned long supplied_index = 0;
int converted;
@@ -285,10 +285,10 @@ static unsigned int softsynth_poll(struct file *fp,
int ret = 0;
poll_wait(fp, &speakup_event, wait);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (!synth_buffer_empty() || speakup_info.flushing)
ret = POLLIN | POLLRDNORM;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return ret;
}
diff --git a/drivers/staging/speakup/spk_priv.h b/drivers/staging/speakup/spk_priv.h
index 303105b46013..637ba6760ec0 100644
--- a/drivers/staging/speakup/spk_priv.h
+++ b/drivers/staging/speakup/spk_priv.h
@@ -77,17 +77,4 @@ extern struct speakup_info_t speakup_info;
extern struct var_t synth_time_vars[];
-/* Protect the whole speakup machinery, must be taken at each kernel->speakup
- * transition and released at all corresponding speakup->kernel transitions
- * (flags must be the same variable between lock/trylock and unlock).
- *
- * The progression thread only interferes with the speakup machinery through
- * the synth buffer, and so only needs to take the lock while tinkering with
- * it.
- */
-/* Speakup needs to disable the keyboard IRQ, hence _irqsave/restore */
-#define spk_lock(flags) spin_lock_irqsave(&speakup_info.spinlock, flags)
-#define spk_trylock(flags) spin_trylock_irqsave(&speakup_info.spinlock, flags)
-#define spk_unlock(flags) spin_unlock_irqrestore(&speakup_info.spinlock, flags)
-
#endif
diff --git a/drivers/staging/speakup/synth.c b/drivers/staging/speakup/synth.c
index d867dd9109ed..0b3549bd909d 100644
--- a/drivers/staging/speakup/synth.c
+++ b/drivers/staging/speakup/synth.c
@@ -25,6 +25,18 @@ static int module_status;
bool spk_quiet_boot;
struct speakup_info_t speakup_info = {
+ /*
+ * This spinlock is used to protect the entire speakup machinery, and
+ * must be taken at each kernel->speakup transition and released at
+ * each corresponding speakup->kernel transition.
+ *
+ * The progression thread only interferes with the speakup machinery through
+ * the synth buffer, so only needs to take the lock while tinkering with
+ * the buffer.
+ *
+ * We use spin_lock/trylock_irqsave and spin_unlock_irqrestore with this
+ * spinlock because speakup needs to disable the keyboard IRQ.
+ */
.spinlock = __SPIN_LOCK_UNLOCKED(speakup_info.spinlock),
.flushing = 0,
};
@@ -83,27 +95,27 @@ void spk_do_catch_up(struct spk_synth *synth)
full_time = spk_get_var(FULL);
delay_time = spk_get_var(DELAY);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
jiff_max = jiffies + jiffy_delta_val;
while (!kthread_should_stop()) {
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
if (speakup_info.flushing) {
speakup_info.flushing = 0;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
synth->flush(synth);
continue;
}
if (synth_buffer_empty()) {
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
break;
}
ch = synth_buffer_peek();
set_current_state(TASK_INTERRUPTIBLE);
full_time_val = full_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (ch == '\n')
ch = synth->procspeech;
if (!spk_serial_out(ch)) {
@@ -111,11 +123,11 @@ void spk_do_catch_up(struct spk_synth *synth)
continue;
}
if ((jiffies >= jiff_max) && (ch == SPACE)) {
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
jiffy_delta_val = jiffy_delta->u.n.value;
delay_time_val = delay_time->u.n.value;
full_time_val = full_time->u.n.value;
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (spk_serial_out(synth->procspeech))
schedule_timeout(
msecs_to_jiffies(delay_time_val));
@@ -125,9 +137,9 @@ void spk_do_catch_up(struct spk_synth *synth)
jiff_max = jiffies + jiffy_delta_val;
}
set_current_state(TASK_RUNNING);
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
synth_buffer_getc();
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
}
spk_serial_out(synth->procspeech);
}
@@ -145,7 +157,7 @@ const char *spk_synth_immediate(struct spk_synth *synth, const char *buff)
return buff;
buff++;
}
- return 0;
+ return NULL;
}
EXPORT_SYMBOL_GPL(spk_synth_immediate);
@@ -403,11 +415,11 @@ void synth_release(void)
if (synth == NULL)
return;
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
pr_info("releasing synth %s\n", synth->name);
synth->alive = 0;
del_timer(&thread_timer);
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (synth->attributes.name)
sysfs_remove_group(speakup_kobj, &(synth->attributes));
for (var = synth->vars; var->var_id != MAXVARS; var++)
diff --git a/drivers/staging/speakup/thread.c b/drivers/staging/speakup/thread.c
index 42fa660a7e0d..4397c8e898c7 100644
--- a/drivers/staging/speakup/thread.c
+++ b/drivers/staging/speakup/thread.c
@@ -22,7 +22,7 @@ int speakup_thread(void *data)
while (1) {
DEFINE_WAIT(wait);
while (1) {
- spk_lock(flags);
+ spin_lock_irqsave(&speakup_info.spinlock, flags);
our_sound = spk_unprocessed_sound;
spk_unprocessed_sound.active = 0;
prepare_to_wait(&speakup_event, &wait,
@@ -32,7 +32,7 @@ int speakup_thread(void *data)
(synth && synth->catch_up && synth->alive &&
(speakup_info.flushing ||
!synth_buffer_empty()));
- spk_unlock(flags);
+ spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (should_break)
break;
mutex_unlock(&spk_mutex);
diff --git a/drivers/staging/speakup/varhandlers.c b/drivers/staging/speakup/varhandlers.c
index 7f6288fc2299..9aa2a78cd71c 100644
--- a/drivers/staging/speakup/varhandlers.c
+++ b/drivers/staging/speakup/varhandlers.c
@@ -137,18 +137,15 @@ struct st_var_header *spk_get_var_header(enum var_id_t var_id)
struct st_var_header *spk_var_header_by_name(const char *name)
{
int i;
- struct st_var_header *where = NULL;
- if (name != NULL) {
- i = 0;
- while ((i < MAXVARS) && (where == NULL)) {
- if (strcmp(name, var_ptrs[i]->name) == 0)
- where = var_ptrs[i];
- else
- i++;
- }
+ if (!name)
+ return NULL;
+
+ for (i = 0; i < MAXVARS; i++) {
+ if (strcmp(name, var_ptrs[i]->name) == 0)
+ return var_ptrs[i];
}
- return where;
+ return NULL;
}
struct var_t *spk_get_var(enum var_id_t var_id)
@@ -280,7 +277,7 @@ int spk_set_mask_bits(const char *input, const int which, const int how)
spk_chartab[*cp] &= ~mask;
}
cp = (u_char *)input;
- if (cp == 0)
+ if (!cp)
cp = spk_punc_info[which].value;
else {
for ( ; *cp; cp++) {
diff --git a/drivers/staging/ti-soc-thermal/ti-thermal-common.c b/drivers/staging/ti-soc-thermal/ti-thermal-common.c
index e3c5e677eaa5..8e67ebf98404 100644
--- a/drivers/staging/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/staging/ti-soc-thermal/ti-thermal-common.c
@@ -38,6 +38,7 @@
/* common data structures */
struct ti_thermal_data {
struct thermal_zone_device *ti_thermal;
+ struct thermal_zone_device *pcb_tz;
struct thermal_cooling_device *cool_dev;
struct ti_bandgap *bgp;
enum thermal_device_mode mode;
@@ -77,10 +78,12 @@ static inline int ti_thermal_hotspot_temperature(int t, int s, int c)
static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal,
unsigned long *temp)
{
+ struct thermal_zone_device *pcb_tz = NULL;
struct ti_thermal_data *data = thermal->devdata;
struct ti_bandgap *bgp;
const struct ti_temp_sensor *s;
- int ret, tmp, pcb_temp, slope, constant;
+ int ret, tmp, slope, constant;
+ unsigned long pcb_temp;
if (!data)
return 0;
@@ -92,16 +95,22 @@ static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal,
if (ret)
return ret;
- pcb_temp = 0;
- /* TODO: Introduce pcb temperature lookup */
+ /* Default constants */
+ slope = s->slope;
+ constant = s->constant;
+
+ pcb_tz = data->pcb_tz;
/* In case pcb zone is available, use the extrapolation rule with it */
- if (pcb_temp) {
- tmp -= pcb_temp;
- slope = s->slope_pcb;
- constant = s->constant_pcb;
- } else {
- slope = s->slope;
- constant = s->constant;
+ if (!IS_ERR_OR_NULL(pcb_tz)) {
+ ret = thermal_zone_get_temp(pcb_tz, &pcb_temp);
+ if (!ret) {
+ tmp -= pcb_temp; /* got a valid PCB temp */
+ slope = s->slope_pcb;
+ constant = s->constant_pcb;
+ } else {
+ dev_err(bgp->dev,
+ "Failed to read PCB state. Using defaults\n");
+ }
}
*temp = ti_thermal_hotspot_temperature(tmp, slope, constant);
@@ -273,6 +282,7 @@ static struct ti_thermal_data
data->sensor_id = id;
data->bgp = bgp;
data->mode = THERMAL_DEVICE_ENABLED;
+ data->pcb_tz = thermal_zone_get_zone_by_name("pcb");
INIT_WORK(&data->thermal_wq, ti_thermal_work);
return data;
diff --git a/drivers/staging/ti-soc-thermal/ti_soc_thermal.txt b/drivers/staging/ti-soc-thermal/ti_soc_thermal.txt
index a4a33d1a0746..1629652372b6 100644
--- a/drivers/staging/ti-soc-thermal/ti_soc_thermal.txt
+++ b/drivers/staging/ti-soc-thermal/ti_soc_thermal.txt
@@ -57,4 +57,5 @@ bandgap {
0x4a002380 0x2c
0x4a0023C0 0x3c>;
compatible = "ti,omap5430-bandgap";
+ interrupts = <0 126 4>; /* talert */
};
diff --git a/drivers/staging/tidspbridge/core/_tiomap.h b/drivers/staging/tidspbridge/core/_tiomap.h
index b783bfa59b1c..65971b784b78 100644
--- a/drivers/staging/tidspbridge/core/_tiomap.h
+++ b/drivers/staging/tidspbridge/core/_tiomap.h
@@ -145,8 +145,8 @@ struct map_l4_peripheral {
#define L4_PERIPHERAL_MBOX 0x48094000
#define DSPVA_PERIPHERAL_MBOX 0x11808000
-#define PM_GRPSEL_BASE 0x48307000
-#define DSPVA_GRPSEL_BASE 0x11821000
+#define PM_GRPSEL_BASE 0x48307000
+#define DSPVA_GRPSEL_BASE 0x11821000
#define L4_PERIPHERAL_SIDETONE_MCBSP2 0x49028000
#define DSPVA_PERIPHERAL_SIDETONE_MCBSP2 0x11824000
@@ -311,7 +311,7 @@ static const struct bpwr_clk_t bpwr_clks[] = {
#define SET_GROUP_BITS16(reg, position, width, value) \
do {\
- reg &= ~((0xFFFF >> (16 - (width))) << (position)) ; \
+ reg &= ~((0xFFFF >> (16 - (width))) << (position)); \
reg |= ((value & (0xFFFF >> (16 - (width)))) << (position)); \
} while (0);
diff --git a/drivers/staging/tidspbridge/core/_tiomap_pwr.h b/drivers/staging/tidspbridge/core/_tiomap_pwr.h
index bd0354d9ad03..7bbd3802c15f 100644
--- a/drivers/staging/tidspbridge/core/_tiomap_pwr.h
+++ b/drivers/staging/tidspbridge/core/_tiomap_pwr.h
@@ -40,7 +40,7 @@ extern int sleep_dsp(struct bridge_dev_context *dev_context,
u32 dw_cmd, void *pargs);
/*
* ========interrupt_dsp========
- * Sends an interrupt to DSP unconditionally.
+ * Sends an interrupt to DSP unconditionally.
*/
extern void interrupt_dsp(struct bridge_dev_context *dev_context,
u16 mb_val);
@@ -53,24 +53,24 @@ extern int dsp_peripheral_clk_ctrl(struct bridge_dev_context
*dev_context, void *pargs);
/*
* ======== handle_hibernation_from_dsp ========
- * Handle Hibernation requested from DSP
+ * Handle Hibernation requested from DSP
*/
int handle_hibernation_from_dsp(struct bridge_dev_context *dev_context);
/*
* ======== post_scale_dsp ========
- * Handle Post Scale notification to DSP
+ * Handle Post Scale notification to DSP
*/
int post_scale_dsp(struct bridge_dev_context *dev_context,
void *pargs);
/*
* ======== pre_scale_dsp ========
- * Handle Pre Scale notification to DSP
+ * Handle Pre Scale notification to DSP
*/
int pre_scale_dsp(struct bridge_dev_context *dev_context,
void *pargs);
/*
* ======== handle_constraints_set ========
- * Handle constraints request from DSP
+ * Handle constraints request from DSP
*/
int handle_constraints_set(struct bridge_dev_context *dev_context,
void *pargs);
diff --git a/drivers/staging/tidspbridge/core/tiomap3430_pwr.c b/drivers/staging/tidspbridge/core/tiomap3430_pwr.c
index dafa6d9b2948..1862afd80dc1 100644
--- a/drivers/staging/tidspbridge/core/tiomap3430_pwr.c
+++ b/drivers/staging/tidspbridge/core/tiomap3430_pwr.c
@@ -51,7 +51,7 @@
/*
* ======== handle_constraints_set ========
- * Sets new DSP constraint
+ * Sets new DSP constraint
*/
int handle_constraints_set(struct bridge_dev_context *dev_context,
void *pargs)
@@ -75,7 +75,7 @@ int handle_constraints_set(struct bridge_dev_context *dev_context,
/*
* ======== handle_hibernation_from_dsp ========
- * Handle Hibernation requested from DSP
+ * Handle Hibernation requested from DSP
*/
int handle_hibernation_from_dsp(struct bridge_dev_context *dev_context)
{
@@ -144,7 +144,7 @@ int handle_hibernation_from_dsp(struct bridge_dev_context *dev_context)
/*
* ======== sleep_dsp ========
- * Put DSP in low power consuming state.
+ * Put DSP in low power consuming state.
*/
int sleep_dsp(struct bridge_dev_context *dev_context, u32 dw_cmd,
void *pargs)
@@ -250,7 +250,7 @@ int sleep_dsp(struct bridge_dev_context *dev_context, u32 dw_cmd,
/*
* ======== wake_dsp ========
- * Wake up DSP from sleep.
+ * Wake up DSP from sleep.
*/
int wake_dsp(struct bridge_dev_context *dev_context, void *pargs)
{
@@ -276,7 +276,7 @@ int wake_dsp(struct bridge_dev_context *dev_context, void *pargs)
/*
* ======== dsp_peripheral_clk_ctrl ========
- * Enable/Disable the DSP peripheral clocks as needed..
+ * Enable/Disable the DSP peripheral clocks as needed..
*/
int dsp_peripheral_clk_ctrl(struct bridge_dev_context *dev_context,
void *pargs)
diff --git a/drivers/staging/tidspbridge/core/ue_deh.c b/drivers/staging/tidspbridge/core/ue_deh.c
index 6aea6f1b4982..e68f0ba8e12b 100644
--- a/drivers/staging/tidspbridge/core/ue_deh.c
+++ b/drivers/staging/tidspbridge/core/ue_deh.c
@@ -177,7 +177,7 @@ static void mmu_fault_print_stack(struct bridge_dev_context *dev_context)
void *dummy_va_addr;
resources = dev_context->resources;
- dummy_va_addr = (void*)__get_free_page(GFP_ATOMIC);
+ dummy_va_addr = (void *)__get_free_page(GFP_ATOMIC);
/*
* Before acking the MMU fault, let's make sure MMU can only
diff --git a/drivers/staging/tidspbridge/core/wdt.c b/drivers/staging/tidspbridge/core/wdt.c
index 7ff0e6c98039..c7ee467f0f12 100644
--- a/drivers/staging/tidspbridge/core/wdt.c
+++ b/drivers/staging/tidspbridge/core/wdt.c
@@ -25,8 +25,8 @@
#include <dspbridge/host_os.h>
-#define OMAP34XX_WDT3_BASE (0x49000000 + 0x30000)
-#define INT_34XX_WDT3_IRQ (36 + NR_IRQS)
+#define OMAP34XX_WDT3_BASE (0x49000000 + 0x30000)
+#define INT_34XX_WDT3_IRQ (36 + NR_IRQS)
static struct dsp_wdt_setting dsp_wdt;
diff --git a/drivers/staging/tidspbridge/rmgr/drv_interface.c b/drivers/staging/tidspbridge/rmgr/drv_interface.c
index df0f37ea1ee5..9c020562c846 100644
--- a/drivers/staging/tidspbridge/rmgr/drv_interface.c
+++ b/drivers/staging/tidspbridge/rmgr/drv_interface.c
@@ -508,6 +508,7 @@ static int omap34_xx_bridge_probe(struct platform_device *pdev)
bridge_class = class_create(THIS_MODULE, "ti_bridge");
if (IS_ERR(bridge_class)) {
pr_err("%s: Error creating bridge class\n", __func__);
+ err = PTR_ERR(bridge_class);
goto err3;
}
diff --git a/drivers/staging/usbip/usbip_event.c b/drivers/staging/usbip/usbip_event.c
index 82123be8732d..64933b993d7a 100644
--- a/drivers/staging/usbip/usbip_event.c
+++ b/drivers/staging/usbip/usbip_event.c
@@ -85,7 +85,7 @@ int usbip_start_eh(struct usbip_device *ud)
ud->eh = kthread_run(event_handler_loop, ud, "usbip_eh");
if (IS_ERR(ud->eh)) {
- pr_warning("Unable to start control thread\n");
+ pr_warn("Unable to start control thread\n");
return PTR_ERR(ud->eh);
}
diff --git a/drivers/staging/vme/devices/vme_user.c b/drivers/staging/vme/devices/vme_user.c
index da7f75984979..daec15565a43 100644
--- a/drivers/staging/vme/devices/vme_user.c
+++ b/drivers/staging/vme/devices/vme_user.c
@@ -109,7 +109,7 @@ struct driver_stats {
unsigned long ioctls;
unsigned long irqs;
unsigned long berrs;
- unsigned long dmaErrors;
+ unsigned long dmaerrors;
unsigned long timeouts;
unsigned long external;
};
@@ -160,7 +160,7 @@ static void reset_counters(void)
statistics.ioctls = 0;
statistics.irqs = 0;
statistics.berrs = 0;
- statistics.dmaErrors = 0;
+ statistics.dmaerrors = 0;
statistics.timeouts = 0;
}
@@ -734,6 +734,7 @@ static int vme_user_probe(struct vme_dev *vdev)
if (image[i].resource == NULL) {
dev_warn(&vdev->dev,
"Unable to allocate slave resource\n");
+ err = -ENOMEM;
goto err_slave;
}
image[i].size_buf = PCI_BUF_SIZE;
@@ -760,6 +761,7 @@ static int vme_user_probe(struct vme_dev *vdev)
if (image[i].resource == NULL) {
dev_warn(&vdev->dev,
"Unable to allocate master resource\n");
+ err = -ENOMEM;
goto err_master;
}
image[i].size_buf = PCI_BUF_SIZE;
diff --git a/drivers/staging/vme/devices/vme_user.h b/drivers/staging/vme/devices/vme_user.h
index 7d24cd6343e4..280ccc7f26bb 100644
--- a/drivers/staging/vme/devices/vme_user.h
+++ b/drivers/staging/vme/devices/vme_user.h
@@ -14,9 +14,9 @@ struct vme_master {
u32 cycle; /* Cycle properties */
u32 dwidth; /* Maximum Data Width */
#if 0
- char prefetchEnable; /* Prefetch Read Enable State */
- int prefetchSize; /* Prefetch Read Size (Cache Lines) */
- char wrPostEnable; /* Write Post State */
+ char prefetchenable; /* Prefetch Read Enable State */
+ int prefetchsize; /* Prefetch Read Size (Cache Lines) */
+ char wrpostenable; /* Write Post State */
#endif
};
@@ -37,9 +37,9 @@ struct vme_slave {
u32 aspace; /* Address Space */
u32 cycle; /* Cycle properties */
#if 0
- char wrPostEnable; /* Write Post State */
- char rmwLock; /* Lock PCI during RMW Cycles */
- char data64BitCapable; /* non-VMEbus capable of 64-bit Data */
+ char wrpostenable; /* Write Post State */
+ char rmwlock; /* Lock PCI during RMW Cycles */
+ char data64bitcapable; /* non-VMEbus capable of 64-bit Data */
#endif
};
diff --git a/drivers/staging/vt6655/80211hdr.h b/drivers/staging/vt6655/80211hdr.h
index 28078a114d4f..ba533402a9af 100644
--- a/drivers/staging/vt6655/80211hdr.h
+++ b/drivers/staging/vt6655/80211hdr.h
@@ -68,7 +68,7 @@
#define BIT30 0x40000000
#define BIT31 0x80000000
-// 802.11 frame related, defined as 802.11 spec
+/* 802.11 frame related, defined as 802.11 spec */
#define WLAN_ADDR_LEN 6
#define WLAN_CRC_LEN 4
#define WLAN_CRC32_LEN 4
diff --git a/drivers/staging/vt6655/80211mgr.c b/drivers/staging/vt6655/80211mgr.c
index 4cb26f3faf26..76c8490b0734 100644
--- a/drivers/staging/vt6655/80211mgr.c
+++ b/drivers/staging/vt6655/80211mgr.c
@@ -66,7 +66,7 @@
/*--------------------- Static Variables --------------------------*/
static int msglevel = MSG_LEVEL_INFO;
-//static int msglevel =MSG_LEVEL_DEBUG;
+/* static int msglevel =MSG_LEVEL_DEBUG; */
/*--------------------- Static Functions --------------------------*/
/*--------------------- Export Variables --------------------------*/
@@ -90,7 +90,7 @@ vMgrEncodeBeacon(
{
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pqwTimestamp = (PQWORD)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_BEACON_OFF_TS);
pFrame->pwBeaconInterval = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
@@ -123,7 +123,7 @@ vMgrDecodeBeacon(
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pqwTimestamp = (PQWORD)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_BEACON_OFF_TS);
pFrame->pwBeaconInterval = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
@@ -131,7 +131,7 @@ vMgrDecodeBeacon(
pFrame->pwCapInfo = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_BEACON_OFF_CAPINFO);
- // Information elements
+ /* Information elements */
pItem = (PWLAN_IE)((unsigned char *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3)))
+ WLAN_BEACON_OFF_SSID);
while (((unsigned char *)pItem) < (pFrame->pBuf + pFrame->len)) {
@@ -145,7 +145,7 @@ vMgrDecodeBeacon(
pFrame->pSuppRates = (PWLAN_IE_SUPP_RATES)pItem;
break;
case WLAN_EID_FH_PARMS:
- //pFrame->pFHParms = (PWLAN_IE_FH_PARMS)pItem;
+ /* pFrame->pFHParms = (PWLAN_IE_FH_PARMS)pItem; */
break;
case WLAN_EID_DS_PARMS:
if (pFrame->pDSParms == NULL)
@@ -185,22 +185,22 @@ vMgrDecodeBeacon(
pFrame->pExtSuppRates = (PWLAN_IE_SUPP_RATES)pItem;
break;
- case WLAN_EID_COUNTRY: //7
+ case WLAN_EID_COUNTRY: /* 7 */
if (pFrame->pIE_Country == NULL)
pFrame->pIE_Country = (PWLAN_IE_COUNTRY)pItem;
break;
- case WLAN_EID_PWR_CONSTRAINT: //32
+ case WLAN_EID_PWR_CONSTRAINT: /* 32 */
if (pFrame->pIE_PowerConstraint == NULL)
pFrame->pIE_PowerConstraint = (PWLAN_IE_PW_CONST)pItem;
break;
- case WLAN_EID_CH_SWITCH: //37
+ case WLAN_EID_CH_SWITCH: /* 37 */
if (pFrame->pIE_CHSW == NULL)
pFrame->pIE_CHSW = (PWLAN_IE_CH_SW)pItem;
break;
- case WLAN_EID_QUIET: //40
+ case WLAN_EID_QUIET: /* 40 */
if (pFrame->pIE_Quiet == NULL)
pFrame->pIE_Quiet = (PWLAN_IE_QUIET)pItem;
break;
@@ -282,7 +282,7 @@ vMgrEncodeDisassociation(
{
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwReason = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_DISASSOC_OFF_REASON);
pFrame->len = WLAN_HDR_ADDR3_LEN + WLAN_DISASSOC_OFF_REASON + sizeof(*(pFrame->pwReason));
@@ -308,7 +308,7 @@ vMgrDecodeDisassociation(
{
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwReason = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_DISASSOC_OFF_REASON);
@@ -332,7 +332,7 @@ vMgrEncodeAssocRequest(
)
{
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwCapInfo = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_ASSOCREQ_OFF_CAP_INFO);
pFrame->pwListenInterval = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
@@ -360,13 +360,13 @@ vMgrDecodeAssocRequest(
PWLAN_IE pItem;
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwCapInfo = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_ASSOCREQ_OFF_CAP_INFO);
pFrame->pwListenInterval = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_ASSOCREQ_OFF_LISTEN_INT);
- // Information elements
+ /* Information elements */
pItem = (PWLAN_IE)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_ASSOCREQ_OFF_SSID);
@@ -425,7 +425,7 @@ vMgrEncodeAssocResponse(
{
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwCapInfo = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_ASSOCRESP_OFF_CAP_INFO);
pFrame->pwStatus = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
@@ -458,7 +458,7 @@ vMgrDecodeAssocResponse(
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwCapInfo = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_ASSOCRESP_OFF_CAP_INFO);
pFrame->pwStatus = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
@@ -466,7 +466,7 @@ vMgrDecodeAssocResponse(
pFrame->pwAid = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_ASSOCRESP_OFF_AID);
- // Information elements
+ /* Information elements */
pFrame->pSuppRates = (PWLAN_IE_SUPP_RATES)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_ASSOCRESP_OFF_SUPP_RATES);
@@ -501,7 +501,7 @@ vMgrEncodeReassocRequest(
{
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwCapInfo = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_REASSOCREQ_OFF_CAP_INFO);
pFrame->pwListenInterval = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
@@ -532,7 +532,7 @@ vMgrDecodeReassocRequest(
PWLAN_IE pItem;
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwCapInfo = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_REASSOCREQ_OFF_CAP_INFO);
pFrame->pwListenInterval = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
@@ -540,7 +540,7 @@ vMgrDecodeReassocRequest(
pFrame->pAddrCurrAP = (PIEEE_ADDR)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_REASSOCREQ_OFF_CURR_AP);
- // Information elements
+ /* Information elements */
pItem = (PWLAN_IE)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_REASSOCREQ_OFF_SSID);
@@ -622,7 +622,7 @@ vMgrDecodeProbeRequest(
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Information elements
+ /* Information elements */
pItem = (PWLAN_IE)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3)));
while (((unsigned char *)pItem) < (pFrame->pBuf + pFrame->len)) {
@@ -670,7 +670,7 @@ vMgrEncodeProbeResponse(
{
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pqwTimestamp = (PQWORD)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_PROBERESP_OFF_TS);
pFrame->pwBeaconInterval = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
@@ -704,7 +704,7 @@ vMgrDecodeProbeResponse(
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pqwTimestamp = (PQWORD)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_PROBERESP_OFF_TS);
pFrame->pwBeaconInterval = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
@@ -712,7 +712,7 @@ vMgrDecodeProbeResponse(
pFrame->pwCapInfo = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_PROBERESP_OFF_CAP_INFO);
- // Information elements
+ /* Information elements */
pItem = (PWLAN_IE)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_PROBERESP_OFF_SSID);
@@ -761,22 +761,22 @@ vMgrDecodeProbeResponse(
pFrame->pExtSuppRates = (PWLAN_IE_SUPP_RATES)pItem;
break;
- case WLAN_EID_COUNTRY: //7
+ case WLAN_EID_COUNTRY: /* 7 */
if (pFrame->pIE_Country == NULL)
pFrame->pIE_Country = (PWLAN_IE_COUNTRY)pItem;
break;
- case WLAN_EID_PWR_CONSTRAINT: //32
+ case WLAN_EID_PWR_CONSTRAINT: /* 32 */
if (pFrame->pIE_PowerConstraint == NULL)
pFrame->pIE_PowerConstraint = (PWLAN_IE_PW_CONST)pItem;
break;
- case WLAN_EID_CH_SWITCH: //37
+ case WLAN_EID_CH_SWITCH: /* 37 */
if (pFrame->pIE_CHSW == NULL)
pFrame->pIE_CHSW = (PWLAN_IE_CH_SW)pItem;
break;
- case WLAN_EID_QUIET: //40
+ case WLAN_EID_QUIET: /* 40 */
if (pFrame->pIE_Quiet == NULL)
pFrame->pIE_Quiet = (PWLAN_IE_QUIET)pItem;
break;
@@ -814,7 +814,7 @@ vMgrEncodeAuthen(
{
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwAuthAlgorithm = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_AUTHEN_OFF_AUTH_ALG);
pFrame->pwAuthSequence = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
@@ -846,7 +846,7 @@ vMgrDecodeAuthen(
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwAuthAlgorithm = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_AUTHEN_OFF_AUTH_ALG);
pFrame->pwAuthSequence = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
@@ -854,7 +854,7 @@ vMgrDecodeAuthen(
pFrame->pwStatus = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_AUTHEN_OFF_STATUS);
- // Information elements
+ /* Information elements */
pItem = (PWLAN_IE)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_AUTHEN_OFF_CHALLENGE);
@@ -883,7 +883,7 @@ vMgrEncodeDeauthen(
{
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwReason = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_DEAUTHEN_OFF_REASON);
pFrame->len = WLAN_HDR_ADDR3_LEN + WLAN_DEAUTHEN_OFF_REASON + sizeof(*(pFrame->pwReason));
@@ -909,7 +909,7 @@ vMgrDecodeDeauthen(
{
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwReason = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_DEAUTHEN_OFF_REASON);
@@ -934,7 +934,7 @@ vMgrEncodeReassocResponse(
{
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwCapInfo = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_REASSOCRESP_OFF_CAP_INFO);
pFrame->pwStatus = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
@@ -967,7 +967,7 @@ vMgrDecodeReassocResponse(
pFrame->pHdr = (PUWLAN_80211HDR)pFrame->pBuf;
- // Fixed Fields
+ /* Fixed Fields */
pFrame->pwCapInfo = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_REASSOCRESP_OFF_CAP_INFO);
pFrame->pwStatus = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
@@ -975,7 +975,7 @@ vMgrDecodeReassocResponse(
pFrame->pwAid = (unsigned short *)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_REASSOCRESP_OFF_AID);
- //Information elements
+ /* Information elements */
pFrame->pSuppRates = (PWLAN_IE_SUPP_RATES)(WLAN_HDR_A3_DATA_PTR(&(pFrame->pHdr->sA3))
+ WLAN_REASSOCRESP_OFF_SUPP_RATES);
diff --git a/drivers/staging/vt6655/80211mgr.h b/drivers/staging/vt6655/80211mgr.h
index 16402cf5d25e..065238beb4f4 100644
--- a/drivers/staging/vt6655/80211mgr.h
+++ b/drivers/staging/vt6655/80211mgr.h
@@ -38,7 +38,7 @@
#define WLAN_MIN_ARRAY 1
-// Information Element ID value
+/* Information Element ID value */
#define WLAN_EID_SSID 0
#define WLAN_EID_SUPP_RATES 1
#define WLAN_EID_FH_PARMS 2
@@ -59,17 +59,17 @@
#define WLAN_EID_QUIET 40
#define WLAN_EID_IBSS_DFS 41
#define WLAN_EID_ERP 42
-// reference 802.11i 7.3.2 table 20
+/* reference 802.11i 7.3.2 table 20 */
#define WLAN_EID_RSN 48
#define WLAN_EID_EXTSUPP_RATES 50
-// reference WiFi WPA spec.
+/* reference WiFi WPA spec. */
#define WLAN_EID_RSN_WPA 221
#define WLAN_EID_ERP_NONERP_PRESENT 0x01
#define WLAN_EID_ERP_USE_PROTECTION 0x02
#define WLAN_EID_ERP_BARKER_MODE 0x04
-// Reason Codes
+/* Reason Codes */
#define WLAN_MGMT_REASON_RSVD 0
#define WLAN_MGMT_REASON_UNSPEC 1
#define WLAN_MGMT_REASON_PRIOR_AUTH_INVALID 2
@@ -94,7 +94,7 @@
#define WLAN_MGMT_REASON_RSNE_CAP_INVALID 22
#define WLAN_MGMT_REASON_80211X_AUTH_FAILED 23
-// Status Codes
+/* Status Codes */
#define WLAN_MGMT_STATUS_SUCCESS 0
#define WLAN_MGMT_STATUS_UNSPEC_FAILURE 1
#define WLAN_MGMT_STATUS_CAPS_UNSUPPORTED 10
@@ -110,19 +110,14 @@
#define WLAN_MGMT_STATUS_ASSOC_DENIED_PBCC 20
#define WLAN_MGMT_STATUS_ASSOC_DENIED_AGILITY 21
-// reference 802.11h 7.3.1.9
-//
+/* reference 802.11h 7.3.1.9 */
#define WLAN_MGMT_STATUS_ASSOC_REJECT_BCS_SPECTRUM_MNG 22
#define WLAN_MGMT_STATUS_ASSOC_REJECT_BCS_PWR_CAP 23
#define WLAN_MGMT_STATUS_ASSOC_REJECT_BCS_SUPP_CH 24
-//
-// reference 802.11g 7.3.1.9
-//
+/* reference 802.11g 7.3.1.9 */
#define WLAN_MGMT_STATUS_SHORTSLOTTIME_UNSUPPORTED 25
#define WLAN_MGMT_STATUS_DSSSOFDM_UNSUPPORTED 26
-//
-// reference 802.11i 7.3.1.9 table 19
-//
+/* reference 802.11i 3.7.1.9 table 19 */
#define WLAN_MGMT_STATUS_INVALID_IE 40
#define WLAN_MGMT_STATUS_GROUP_CIPHER_INVALID 41
#define WLAN_MGMT_STATUS_PAIRWISE_CIPHER_INVALID 42
@@ -131,13 +126,13 @@
#define WLAN_MGMT_STATUS_INVALID_RSN_IE_CAP 45
#define WLAN_MGMT_STATUS_CIPHER_REJECT 46
-// Auth Algorithm
+/* Auth Algorithm */
#define WLAN_AUTH_ALG_OPENSYSTEM 0
#define WLAN_AUTH_ALG_SHAREDKEY 1
-// Management Frame Field Offsets
-// Note: Not all fields are listed because of variable lengths.
-// Note: These offsets are from the start of the frame data
+/* Management Frame Field Offsets */
+/* Note: Not all fields are listed because of variable lengths. */
+/* Note: These offsets are from the start of the frame data */
#define WLAN_BEACON_OFF_TS 0
#define WLAN_BEACON_OFF_BCN_INT 8
@@ -179,9 +174,7 @@
#define WLAN_DEAUTHEN_OFF_REASON 0
-//
-// Cipher Suite Selectors defined in 802.11i
-//
+/* Cipher Suite Selectors defined in 802.11i */
#define WLAN_11i_CSS_USE_GROUP 0
#define WLAN_11i_CSS_WEP40 1
#define WLAN_11i_CSS_TKIP 2
@@ -189,24 +182,22 @@
#define WLAN_11i_CSS_WEP104 5
#define WLAN_11i_CSS_UNKNOWN 255
-//
-// Authentication and Key Management Suite Selectors defined in 802.11i
-//
+/* Authentication and Key Management Suite Selectors defined in 802.11i */
#define WLAN_11i_AKMSS_802_1X 1
#define WLAN_11i_AKMSS_PSK 2
#define WLAN_11i_AKMSS_UNKNOWN 255
-// Measurement type definitions reference ieee 802.11h Table 20b
+/* Measurement type definitions reference ieee 802.11h Table 20b */
#define MEASURE_TYPE_BASIC 0
#define MEASURE_TYPE_CCA 1
#define MEASURE_TYPE_RPI 2
-// Measurement request mode definitions reference ieee 802.11h Figure 46h
+/* Measurement request mode definitions reference ieee 802.11h Figure 46h */
#define MEASURE_MODE_ENABLE 0x02
#define MEASURE_MODE_REQ 0x04
#define MEASURE_MODE_REP 0x08
-// Measurement report mode definitions reference ieee 802.11h Figure 46m
+/* Measurement report mode definitions reference ieee 802.11h Figure 46m */
#define MEASURE_MODE_LATE 0x01
#define MEASURE_MODE_INCAPABLE 0x02
#define MEASURE_MODE_REFUSED 0x04
@@ -217,7 +208,7 @@
/*--------------------- Export Types ------------------------------*/
-// Information Element Types
+/* Information Element Types */
#pragma pack(1)
typedef struct tagWLAN_IE {
@@ -226,7 +217,7 @@ typedef struct tagWLAN_IE {
} __attribute__ ((__packed__))
WLAN_IE, *PWLAN_IE;
-// Service Set Identity (SSID)
+/* Service Set Identity (SSID) */
#pragma pack(1)
typedef struct tagWLAN_IE_SSID {
unsigned char byElementID;
@@ -235,7 +226,7 @@ typedef struct tagWLAN_IE_SSID {
} __attribute__ ((__packed__))
WLAN_IE_SSID, *PWLAN_IE_SSID;
-// Supported Rates
+/* Supported Rates */
#pragma pack(1)
typedef struct tagWLAN_IE_SUPP_RATES {
unsigned char byElementID;
@@ -244,7 +235,7 @@ typedef struct tagWLAN_IE_SUPP_RATES {
} __attribute__ ((__packed__))
WLAN_IE_SUPP_RATES, *PWLAN_IE_SUPP_RATES;
-// FH Parameter Set
+/* FH Parameter Set */
#pragma pack(1)
typedef struct _WLAN_IE_FH_PARMS {
unsigned char byElementID;
@@ -255,7 +246,7 @@ typedef struct _WLAN_IE_FH_PARMS {
unsigned char byHopIndex;
} WLAN_IE_FH_PARMS, *PWLAN_IE_FH_PARMS;
-// DS Parameter Set
+/* DS Parameter Set */
#pragma pack(1)
typedef struct tagWLAN_IE_DS_PARMS {
unsigned char byElementID;
@@ -264,7 +255,7 @@ typedef struct tagWLAN_IE_DS_PARMS {
} __attribute__ ((__packed__))
WLAN_IE_DS_PARMS, *PWLAN_IE_DS_PARMS;
-// CF Parameter Set
+/* CF Parameter Set */
#pragma pack(1)
typedef struct tagWLAN_IE_CF_PARMS {
unsigned char byElementID;
@@ -276,7 +267,7 @@ typedef struct tagWLAN_IE_CF_PARMS {
} __attribute__ ((__packed__))
WLAN_IE_CF_PARMS, *PWLAN_IE_CF_PARMS;
-// TIM
+/* TIM */
#pragma pack(1)
typedef struct tagWLAN_IE_TIM {
unsigned char byElementID;
@@ -288,7 +279,7 @@ typedef struct tagWLAN_IE_TIM {
} __attribute__ ((__packed__))
WLAN_IE_TIM, *PWLAN_IE_TIM;
-// IBSS Parameter Set
+/* IBSS Parameter Set */
#pragma pack(1)
typedef struct tagWLAN_IE_IBSS_PARMS {
unsigned char byElementID;
@@ -297,7 +288,7 @@ typedef struct tagWLAN_IE_IBSS_PARMS {
} __attribute__ ((__packed__))
WLAN_IE_IBSS_PARMS, *PWLAN_IE_IBSS_PARMS;
-// Challenge Text
+/* Challenge Text */
#pragma pack(1)
typedef struct tagWLAN_IE_CHALLENGE {
unsigned char byElementID;
@@ -316,8 +307,8 @@ typedef struct tagWLAN_IE_RSN_EXT {
unsigned short wPKCount;
struct {
unsigned char abyOUI[4];
- } PKSList[1]; // the rest is variable so need to
- // overlay ieauth structure
+ } PKSList[1]; /* the rest is variable so need to */
+ /* overlay ieauth structure */
} WLAN_IE_RSN_EXT, *PWLAN_IE_RSN_EXT;
#pragma pack(1)
@@ -328,7 +319,7 @@ typedef struct tagWLAN_IE_RSN_AUTH {
} AuthKSList[1];
} WLAN_IE_RSN_AUTH, *PWLAN_IE_RSN_AUTH;
-// RSN Identity
+/* RSN Identity */
#pragma pack(1)
typedef struct tagWLAN_IE_RSN {
unsigned char byElementID;
@@ -337,7 +328,7 @@ typedef struct tagWLAN_IE_RSN {
unsigned char abyRSN[WLAN_MIN_ARRAY];
} WLAN_IE_RSN, *PWLAN_IE_RSN;
-// ERP
+/* ERP */
#pragma pack(1)
typedef struct tagWLAN_IE_ERP {
unsigned char byElementID;
@@ -466,8 +457,8 @@ typedef struct _WLAN_IE_IBSS_DFS {
#pragma pack()
-// Frame Types
-// prototype structure, all mgmt frame types will start with these members
+/* Frame Types */
+/* prototype structure, all mgmt frame types will start with these members */
typedef struct tagWLAN_FR_MGMT {
unsigned int uType;
unsigned int len;
@@ -475,20 +466,20 @@ typedef struct tagWLAN_FR_MGMT {
PUWLAN_80211HDR pHdr;
} WLAN_FR_MGMT, *PWLAN_FR_MGMT;
-// Beacon frame
+/* Beacon frame */
typedef struct tagWLAN_FR_BEACON {
unsigned int uType;
unsigned int len;
unsigned char *pBuf;
PUWLAN_80211HDR pHdr;
- // fixed fields
+ /* fixed fields */
PQWORD pqwTimestamp;
unsigned short *pwBeaconInterval;
unsigned short *pwCapInfo;
/*-- info elements ----------*/
PWLAN_IE_SSID pSSID;
PWLAN_IE_SUPP_RATES pSuppRates;
-// PWLAN_IE_FH_PARMS pFHParms;
+/* PWLAN_IE_FH_PARMS pFHParms; */
PWLAN_IE_DS_PARMS pDSParms;
PWLAN_IE_CF_PARMS pCFParms;
PWLAN_IE_TIM pTIM;
@@ -504,19 +495,19 @@ typedef struct tagWLAN_FR_BEACON {
PWLAN_IE_QUIET pIE_Quiet;
} WLAN_FR_BEACON, *PWLAN_FR_BEACON;
-// IBSS ATIM frame
+/* IBSS ATIM frame */
typedef struct tagWLAN_FR_IBSSATIM {
unsigned int uType;
unsigned int len;
unsigned char *pBuf;
PUWLAN_80211HDR pHdr;
- // fixed fields
- // info elements
- // this frame type has a null body
+ /* fixed fields */
+ /* info elements */
+ /* this frame type has a null body */
} WLAN_FR_IBSSATIM, *PWLAN_FR_IBSSATIM;
-// Disassociation
+/* Disassociation */
typedef struct tagWLAN_FR_DISASSOC {
unsigned int uType;
unsigned int len;
@@ -527,7 +518,7 @@ typedef struct tagWLAN_FR_DISASSOC {
/*-- info elements ----------*/
} WLAN_FR_DISASSOC, *PWLAN_FR_DISASSOC;
-// Association Request
+/* Association Request */
typedef struct tagWLAN_FR_ASSOCREQ {
unsigned int uType;
unsigned int len;
@@ -546,7 +537,7 @@ typedef struct tagWLAN_FR_ASSOCREQ {
PWLAN_IE_SUPP_CH pCurrSuppCh;
} WLAN_FR_ASSOCREQ, *PWLAN_FR_ASSOCREQ;
-// Association Response
+/* Association Response */
typedef struct tagWLAN_FR_ASSOCRESP {
unsigned int uType;
unsigned int len;
@@ -561,7 +552,7 @@ typedef struct tagWLAN_FR_ASSOCRESP {
PWLAN_IE_SUPP_RATES pExtSuppRates;
} WLAN_FR_ASSOCRESP, *PWLAN_FR_ASSOCRESP;
-// Reassociation Request
+/* Reassociation Request */
typedef struct tagWLAN_FR_REASSOCREQ {
unsigned int uType;
unsigned int len;
@@ -581,7 +572,7 @@ typedef struct tagWLAN_FR_REASSOCREQ {
PWLAN_IE_SUPP_RATES pExtSuppRates;
} WLAN_FR_REASSOCREQ, *PWLAN_FR_REASSOCREQ;
-// Reassociation Response
+/* Reassociation Response */
typedef struct tagWLAN_FR_REASSOCRESP {
unsigned int uType;
unsigned int len;
@@ -596,7 +587,7 @@ typedef struct tagWLAN_FR_REASSOCRESP {
PWLAN_IE_SUPP_RATES pExtSuppRates;
} WLAN_FR_REASSOCRESP, *PWLAN_FR_REASSOCRESP;
-// Probe Request
+/* Probe Request */
typedef struct tagWLAN_FR_PROBEREQ {
unsigned int uType;
unsigned int len;
@@ -609,7 +600,7 @@ typedef struct tagWLAN_FR_PROBEREQ {
PWLAN_IE_SUPP_RATES pExtSuppRates;
} WLAN_FR_PROBEREQ, *PWLAN_FR_PROBEREQ;
-// Probe Response
+/* Probe Response */
typedef struct tagWLAN_FR_PROBERESP {
unsigned int uType;
unsigned int len;
@@ -636,7 +627,7 @@ typedef struct tagWLAN_FR_PROBERESP {
PWLAN_IE_QUIET pIE_Quiet;
} WLAN_FR_PROBERESP, *PWLAN_FR_PROBERESP;
-// Authentication
+/* Authentication */
typedef struct tagWLAN_FR_AUTHEN {
unsigned int uType;
unsigned int len;
@@ -650,7 +641,7 @@ typedef struct tagWLAN_FR_AUTHEN {
PWLAN_IE_CHALLENGE pChallenge;
} WLAN_FR_AUTHEN, *PWLAN_FR_AUTHEN;
-// Deauthenication
+/* Deauthenication */
typedef struct tagWLAN_FR_DEAUTHEN {
unsigned int uType;
unsigned int len;
@@ -774,4 +765,4 @@ vMgrDecodeReassocResponse(
PWLAN_FR_REASSOCRESP pFrame
);
-#endif// __80211MGR_H__
+#endif/* __80211MGR_H__ */
diff --git a/drivers/staging/vt6655/aes_ccmp.c b/drivers/staging/vt6655/aes_ccmp.c
index 36081481c6d1..fc056fc61995 100644
--- a/drivers/staging/vt6655/aes_ccmp.c
+++ b/drivers/staging/vt6655/aes_ccmp.c
@@ -205,7 +205,7 @@ void AESv128(unsigned char *key, unsigned char *data, unsigned char *ciphertext)
SubBytes(ciphertext, TmpdataA);
ShiftRows(TmpdataA, TmpdataB);
xor_128(TmpdataB, abyRoundKey, ciphertext);
- } else // round 1 ~ 9
+ } else /* round 1 ~ 9 */
{
SubBytes(ciphertext, TmpdataA);
ShiftRows(TmpdataA, TmpdataB);
@@ -249,7 +249,7 @@ bool AESbGenCCMP(unsigned char *pbyRxKey, unsigned char *pbyFrame, unsigned shor
unsigned char *pbyIV;
unsigned char *pbyPayload;
unsigned short wHLen = 22;
- unsigned short wPayloadSize = wFrameSize - 8 - 8 - 4 - WLAN_HDR_ADDR3_LEN;//8 is IV, 8 is MIC, 4 is CRC
+ unsigned short wPayloadSize = wFrameSize - 8 - 8 - 4 - WLAN_HDR_ADDR3_LEN;/* 8 is IV, 8 is MIC, 4 is CRC */
bool bA4 = false;
unsigned char byTmp;
unsigned short wCnt;
@@ -259,13 +259,13 @@ bool AESbGenCCMP(unsigned char *pbyRxKey, unsigned char *pbyFrame, unsigned shor
if (WLAN_GET_FC_TODS(*(unsigned short *)pbyFrame) &&
WLAN_GET_FC_FROMDS(*(unsigned short *)pbyFrame)) {
bA4 = true;
- pbyIV += 6; // 6 is 802.11 address4
+ pbyIV += 6; /* 6 is 802.11 address4 */
wHLen += 6;
wPayloadSize -= 6;
}
- pbyPayload = pbyIV + 8; //IV-length
+ pbyPayload = pbyIV + 8; /* IV-length */
- abyNonce[0] = 0x00; //now is 0, if Qos here will be priority
+ abyNonce[0] = 0x00; /* now is 0, if Qos here will be priority */
memcpy(&(abyNonce[1]), pMACHeader->abyAddr2, ETH_ALEN);
abyNonce[7] = pbyIV[7];
abyNonce[8] = pbyIV[6];
@@ -274,13 +274,13 @@ bool AESbGenCCMP(unsigned char *pbyRxKey, unsigned char *pbyFrame, unsigned shor
abyNonce[11] = pbyIV[1];
abyNonce[12] = pbyIV[0];
- //MIC_IV
+ /* MIC_IV */
MIC_IV[0] = 0x59;
memcpy(&(MIC_IV[1]), &(abyNonce[0]), 13);
MIC_IV[14] = (unsigned char)(wPayloadSize >> 8);
MIC_IV[15] = (unsigned char)(wPayloadSize & 0xff);
- //MIC_HDR1
+ /* MIC_HDR1 */
MIC_HDR1[0] = (unsigned char)(wHLen >> 8);
MIC_HDR1[1] = (unsigned char)(wHLen & 0xff);
byTmp = (unsigned char)(pMACHeader->wFrameCtl & 0xff);
@@ -291,7 +291,7 @@ bool AESbGenCCMP(unsigned char *pbyRxKey, unsigned char *pbyFrame, unsigned shor
memcpy(&(MIC_HDR1[4]), pMACHeader->abyAddr1, ETH_ALEN);
memcpy(&(MIC_HDR1[10]), pMACHeader->abyAddr2, ETH_ALEN);
- //MIC_HDR2
+ /* MIC_HDR2 */
memcpy(&(MIC_HDR2[0]), pMACHeader->abyAddr3, ETH_ALEN);
byTmp = (unsigned char)(pMACHeader->wSeqCtl & 0xff);
MIC_HDR2[6] = byTmp & 0x0f;
@@ -309,7 +309,7 @@ bool AESbGenCCMP(unsigned char *pbyRxKey, unsigned char *pbyFrame, unsigned shor
MIC_HDR2[14] = 0x00;
MIC_HDR2[15] = 0x00;
- //CCMP
+ /* CCMP */
AESv128(pbyRxKey, MIC_IV, abyMIC);
for (kk = 0; kk < 16; kk++) {
abyTmp[kk] = MIC_HDR1[kk] ^ abyMIC[kk];
@@ -341,9 +341,9 @@ bool AESbGenCCMP(unsigned char *pbyRxKey, unsigned char *pbyFrame, unsigned shor
memcpy(pbyPayload, abyPlainText, 16);
wCnt++;
pbyPayload += 16;
- } //for wPayloadSize
+ } /* for wPayloadSize */
- //last payload
+ /* last payload */
memcpy(&(abyLastCipher[0]), pbyPayload, jj);
for (ii = jj; ii < 16; ii++) {
abyLastCipher[ii] = 0x00;
@@ -359,7 +359,7 @@ bool AESbGenCCMP(unsigned char *pbyRxKey, unsigned char *pbyFrame, unsigned shor
memcpy(pbyPayload, abyPlainText, jj);
pbyPayload += jj;
- //for MIC calculation
+ /* for MIC calculation */
for (ii = jj; ii < 16; ii++) {
abyPlainText[ii] = 0x00;
}
@@ -368,8 +368,8 @@ bool AESbGenCCMP(unsigned char *pbyRxKey, unsigned char *pbyFrame, unsigned shor
}
AESv128(pbyRxKey, abyTmp, abyMIC);
- //=>above is the calculate MIC
- //--------------------------------------------
+ /* =>above is the calculate MIC */
+ /* -------------------------------------------- */
wCnt = 0;
abyCTRPLD[14] = (unsigned char)(wCnt >> 8);
@@ -378,12 +378,11 @@ bool AESbGenCCMP(unsigned char *pbyRxKey, unsigned char *pbyFrame, unsigned shor
for (kk = 0; kk < 8; kk++) {
abyTmp[kk] = abyTmp[kk] ^ pbyPayload[kk];
}
- //=>above is the dec-MIC from packet
- //--------------------------------------------
+ /* =>above is the dec-MIC from packet */
+ /* -------------------------------------------- */
- if (!memcmp(abyMIC, abyTmp, 8)) {
+ if (!memcmp(abyMIC, abyTmp, 8))
return true;
- } else {
+ else
return false;
- }
}
diff --git a/drivers/staging/vt6655/aes_ccmp.h b/drivers/staging/vt6655/aes_ccmp.h
index c8b28b0e9bdc..cc02e645aa56 100644
--- a/drivers/staging/vt6655/aes_ccmp.h
+++ b/drivers/staging/vt6655/aes_ccmp.h
@@ -43,4 +43,4 @@
/*--------------------- Export Functions --------------------------*/
bool AESbGenCCMP(unsigned char *pbyRxKey, unsigned char *pbyFrame, unsigned short wFrameSize);
-#endif //__AES_H__
+#endif /* __AES_H__ */
diff --git a/drivers/staging/vt6655/hostap.c b/drivers/staging/vt6655/hostap.c
index 8417c2f2c6cf..57a08c5771f2 100644
--- a/drivers/staging/vt6655/hostap.c
+++ b/drivers/staging/vt6655/hostap.c
@@ -80,7 +80,7 @@ static int hostap_enable_hostapd(PSDevice pDevice, int rtnl_locked)
DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO "%s: Enabling hostapd mode\n", dev->name);
- pDevice->apdev = kzalloc(sizeof(struct net_device), GFP_KERNEL);
+ pDevice->apdev = alloc_etherdev(sizeof(*apdev_priv));
if (pDevice->apdev == NULL)
return -ENOMEM;
@@ -104,6 +104,8 @@ static int hostap_enable_hostapd(PSDevice pDevice, int rtnl_locked)
if (ret) {
DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO "%s: register_netdevice(AP) failed!\n",
dev->name);
+ free_netdev(pDevice->apdev);
+ pDevice->apdev = NULL;
return -1;
}
@@ -141,7 +143,7 @@ static int hostap_disable_hostapd(PSDevice pDevice, int rtnl_locked)
DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO "%s: Netdevice %s unregistered\n",
pDevice->dev->name, pDevice->apdev->name);
}
- kfree(pDevice->apdev);
+ free_netdev(pDevice->apdev);
pDevice->apdev = NULL;
pDevice->bEnable8021x = false;
pDevice->bEnableHostWEP = false;
diff --git a/drivers/staging/vt6655/ioctl.c b/drivers/staging/vt6655/ioctl.c
index 2ae8116869eb..46e0e41e7e60 100644
--- a/drivers/staging/vt6655/ioctl.c
+++ b/drivers/staging/vt6655/ioctl.c
@@ -64,7 +64,6 @@ int private_ioctl(PSDevice pDevice, struct ifreq *rq)
PKnownBSS pBSS;
PKnownNodeDB pNode;
unsigned int ii, jj;
- SCmdLinkStatus sLinkStatus;
unsigned char abySuppRates[] = {WLAN_EID_SUPP_RATES, 4, 0x02, 0x04, 0x0B, 0x16};
unsigned char abyNullAddr[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
unsigned long dwKeyIndex = 0;
@@ -245,10 +244,12 @@ int private_ioctl(PSDevice pDevice, struct ifreq *rq)
pDevice->eEncryptionStatus = Ndis802_11Encryption1Enabled;
break;
- case WLAN_CMD_GET_LINK:
+ case WLAN_CMD_GET_LINK: {
+ SCmdLinkStatus sLinkStatus;
+
DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO "WLAN_CMD_GET_LINK status.\n");
- memset(sLinkStatus.abySSID, 0 , WLAN_SSID_MAXLEN + 1);
+ memset(&sLinkStatus, 0, sizeof(sLinkStatus));
if (pMgmt->eCurrMode == WMAC_MODE_IBSS_STA)
sLinkStatus.wBSSType = ADHOC;
@@ -277,7 +278,7 @@ int private_ioctl(PSDevice pDevice, struct ifreq *rq)
break;
}
break;
-
+ }
case WLAN_CMD_GET_LISTLEN:
cbListCount = 0;
pBSS = &(pMgmt->sBSSList[0]);
diff --git a/drivers/staging/vt6656/mac.c b/drivers/staging/vt6656/mac.c
index 343db19283a2..54414ed27191 100644
--- a/drivers/staging/vt6656/mac.c
+++ b/drivers/staging/vt6656/mac.c
@@ -101,7 +101,7 @@ void MACvSetBBType(struct vnt_private *pDevice, u8 byType)
MESSAGE_TYPE_WRITE_MASK,
MAC_REG_ENCFG0,
MESSAGE_REQUEST_MACREG,
- 2,
+ ARRAY_SIZE(pbyData),
pbyData
);
}
@@ -122,18 +122,10 @@ void MACvSetBBType(struct vnt_private *pDevice, u8 byType)
*/
void MACvDisableKeyEntry(struct vnt_private *pDevice, u32 uEntryIdx)
{
- u16 wOffset;
u8 byData;
byData = (u8) uEntryIdx;
- wOffset = MISCFIFO_KEYETRY0;
- wOffset += (uEntryIdx * MISCFIFO_KEYENTRYSIZE);
-
- //VNSvOutPortW(dwIoBase + MAC_REG_MISCFFNDEX, wOffset);
- //VNSvOutPortD(dwIoBase + MAC_REG_MISCFFDATA, 0);
- //VNSvOutPortW(dwIoBase + MAC_REG_MISCFFCTL, MISCFFCTL_WRITE);
-
//issue write misc fifo command to device
CONTROLnsRequestOut(pDevice,
MESSAGE_TYPE_CLRKEYENTRY,
@@ -182,12 +174,6 @@ void MACvSetKeyEntry(struct vnt_private *pDevice, u16 wKeyCtl, u32 uEntryIdx,
DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO"1. wOffset: %d, Data: %X,"\
" KeyCtl:%X\n", wOffset, dwData1, wKeyCtl);
- //VNSvOutPortW(dwIoBase + MAC_REG_MISCFFNDEX, wOffset);
- //VNSvOutPortD(dwIoBase + MAC_REG_MISCFFDATA, dwData);
- //VNSvOutPortW(dwIoBase + MAC_REG_MISCFFCTL, MISCFFCTL_WRITE);
-
- //wOffset++;
-
dwData2 = 0;
dwData2 |= *(pbyAddr+3);
dwData2 <<= 8;
@@ -200,21 +186,6 @@ void MACvSetKeyEntry(struct vnt_private *pDevice, u16 wKeyCtl, u32 uEntryIdx,
DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO"2. wOffset: %d, Data: %X\n",
wOffset, dwData2);
- //VNSvOutPortW(dwIoBase + MAC_REG_MISCFFNDEX, wOffset);
- //VNSvOutPortD(dwIoBase + MAC_REG_MISCFFDATA, dwData);
- //VNSvOutPortW(dwIoBase + MAC_REG_MISCFFCTL, MISCFFCTL_WRITE);
-
- //wOffset++;
-
- //wOffset += (uKeyIdx * 4);
-/* for (ii=0;ii<4;ii++) {
- // alway push 128 bits
- DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO"3.(%d) wOffset: %d, Data: %lX\n", ii, wOffset+ii, *pdwKey);
- VNSvOutPortW(dwIoBase + MAC_REG_MISCFFNDEX, wOffset+ii);
- VNSvOutPortD(dwIoBase + MAC_REG_MISCFFDATA, *pdwKey++);
- VNSvOutPortW(dwIoBase + MAC_REG_MISCFFCTL, MISCFFCTL_WRITE);
- }
-*/
pbyKey = (u8 *)pdwKey;
pbyData[0] = (u8)dwData1;
@@ -232,7 +203,7 @@ void MACvSetKeyEntry(struct vnt_private *pDevice, u16 wKeyCtl, u32 uEntryIdx,
MESSAGE_TYPE_SETKEY,
wOffset,
(u16)uKeyIdx,
- 24,
+ ARRAY_SIZE(pbyData),
pbyData
);
@@ -249,7 +220,7 @@ void MACvRegBitsOff(struct vnt_private *pDevice, u8 byRegOfs, u8 byBits)
MESSAGE_TYPE_WRITE_MASK,
byRegOfs,
MESSAGE_REQUEST_MACREG,
- 2,
+ ARRAY_SIZE(pbyData),
pbyData
);
}
@@ -265,7 +236,7 @@ void MACvRegBitsOn(struct vnt_private *pDevice, u8 byRegOfs, u8 byBits)
MESSAGE_TYPE_WRITE_MASK,
byRegOfs,
MESSAGE_REQUEST_MACREG,
- 2,
+ ARRAY_SIZE(pbyData),
pbyData
);
}
@@ -281,7 +252,7 @@ void MACvWriteWord(struct vnt_private *pDevice, u8 byRegOfs, u16 wData)
MESSAGE_TYPE_WRITE,
byRegOfs,
MESSAGE_REQUEST_MACREG,
- 2,
+ ARRAY_SIZE(pbyData),
pbyData
);
@@ -302,7 +273,7 @@ void MACvWriteBSSIDAddress(struct vnt_private *pDevice, u8 *pbyEtherAddr)
MESSAGE_TYPE_WRITE,
MAC_REG_BSSID0,
MESSAGE_REQUEST_MACREG,
- 6,
+ ARRAY_SIZE(pbyData),
pbyData
);
}
@@ -318,7 +289,7 @@ void MACvEnableProtectMD(struct vnt_private *pDevice)
MESSAGE_TYPE_WRITE_MASK,
MAC_REG_ENCFG0,
MESSAGE_REQUEST_MACREG,
- 2,
+ ARRAY_SIZE(pbyData),
pbyData
);
}
@@ -334,7 +305,7 @@ void MACvDisableProtectMD(struct vnt_private *pDevice)
MESSAGE_TYPE_WRITE_MASK,
MAC_REG_ENCFG0,
MESSAGE_REQUEST_MACREG,
- 2,
+ ARRAY_SIZE(pbyData),
pbyData
);
}
@@ -350,7 +321,7 @@ void MACvEnableBarkerPreambleMd(struct vnt_private *pDevice)
MESSAGE_TYPE_WRITE_MASK,
MAC_REG_ENCFG2,
MESSAGE_REQUEST_MACREG,
- 2,
+ ARRAY_SIZE(pbyData),
pbyData
);
}
@@ -366,7 +337,7 @@ void MACvDisableBarkerPreambleMd(struct vnt_private *pDevice)
MESSAGE_TYPE_WRITE_MASK,
MAC_REG_ENCFG2,
MESSAGE_REQUEST_MACREG,
- 2,
+ ARRAY_SIZE(pbyData),
pbyData
);
}
@@ -382,7 +353,7 @@ void MACvWriteBeaconInterval(struct vnt_private *pDevice, u16 wInterval)
MESSAGE_TYPE_WRITE,
MAC_REG_BI,
MESSAGE_REQUEST_MACREG,
- 2,
+ ARRAY_SIZE(pbyData),
pbyData
);
}
diff --git a/drivers/staging/vt6656/rf.c b/drivers/staging/vt6656/rf.c
index 44cfe0b14180..d27fa434550d 100644
--- a/drivers/staging/vt6656/rf.c
+++ b/drivers/staging/vt6656/rf.c
@@ -29,6 +29,9 @@
* IFRFbWriteEmbedded - Embedded write RF register via MAC
*
* Revision History:
+ * RF_VT3226: RobertYu:20051111, VT3226C0 and before
+ * RF_VT3226D0: RobertYu:20051228
+ * RF_VT3342A0: RobertYu:20060609
*
*/
@@ -61,7 +64,7 @@ static int msglevel =MSG_LEVEL_INFO;
#define VT3342_PWR_IDX_LEN 64
//}}
-u8 abyAL2230InitTable[CB_AL2230_INIT_SEQ][3] = {
+static u8 al2230_init_table[CB_AL2230_INIT_SEQ][3] = {
{0x03, 0xF7, 0x90},
{0x03, 0x33, 0x31},
{0x01, 0xB8, 0x02},
@@ -79,7 +82,7 @@ u8 abyAL2230InitTable[CB_AL2230_INIT_SEQ][3] = {
{0x00, 0x58, 0x0F}
};
-u8 abyAL2230ChannelTable0[CB_MAX_CHANNEL_24G][3] = {
+static u8 al2230_channel_table0[CB_MAX_CHANNEL_24G][3] = {
{0x03, 0xF7, 0x90}, // channel = 1, Tf = 2412MHz
{0x03, 0xF7, 0x90}, // channel = 2, Tf = 2417MHz
{0x03, 0xE7, 0x90}, // channel = 3, Tf = 2422MHz
@@ -96,7 +99,7 @@ u8 abyAL2230ChannelTable0[CB_MAX_CHANNEL_24G][3] = {
{0x03, 0xE7, 0xC0} // channel = 14, Tf = 2412M
};
-u8 abyAL2230ChannelTable1[CB_MAX_CHANNEL_24G][3] = {
+static u8 al2230_channel_table1[CB_MAX_CHANNEL_24G][3] = {
{0x03, 0x33, 0x31}, // channel = 1, Tf = 2412MHz
{0x0B, 0x33, 0x31}, // channel = 2, Tf = 2417MHz
{0x03, 0x33, 0x31}, // channel = 3, Tf = 2422MHz
@@ -115,7 +118,7 @@ u8 abyAL2230ChannelTable1[CB_MAX_CHANNEL_24G][3] = {
// 40MHz reference frequency
// Need to Pull PLLON(PE3) low when writing channel registers through 3-wire.
-u8 abyAL7230InitTable[CB_AL7230_INIT_SEQ][3] = {
+static u8 al7230_init_table[CB_AL7230_INIT_SEQ][3] = {
{0x20, 0x37, 0x90}, // Channel1 // Need modify for 11a
{0x13, 0x33, 0x31}, // Channel1 // Need modify for 11a
{0x84, 0x1F, 0xF2}, // Need modify for 11a: 451FE2
@@ -138,7 +141,7 @@ u8 abyAL7230InitTable[CB_AL7230_INIT_SEQ][3] = {
{0x1A, 0xBA, 0x8F} // Need modify for 11a: 12BACF
};
-u8 abyAL7230InitTableAMode[CB_AL7230_INIT_SEQ][3] = {
+static u8 al7230_init_table_amode[CB_AL7230_INIT_SEQ][3] = {
{0x2F, 0xF5, 0x20}, // Channel184 // Need modify for 11b/g
{0x00, 0x00, 0x01}, // Channel184 // Need modify for 11b/g
{0x45, 0x1F, 0xE2}, // Need modify for 11b/g
@@ -157,7 +160,7 @@ u8 abyAL7230InitTableAMode[CB_AL7230_INIT_SEQ][3] = {
{0x12, 0xBA, 0xCF} // Need modify for 11b/g
};
-u8 abyAL7230ChannelTable0[CB_MAX_CHANNEL][3] = {
+static u8 al7230_channel_table0[CB_MAX_CHANNEL][3] = {
{0x20, 0x37, 0x90}, // channel = 1, Tf = 2412MHz
{0x20, 0x37, 0x90}, // channel = 2, Tf = 2417MHz
{0x20, 0x37, 0x90}, // channel = 3, Tf = 2422MHz
@@ -223,7 +226,7 @@ u8 abyAL7230ChannelTable0[CB_MAX_CHANNEL][3] = {
{0x2F, 0xF6, 0x10} // channel = 165, Tf = 5825MHz (56)
};
-u8 abyAL7230ChannelTable1[CB_MAX_CHANNEL][3] = {
+static u8 al7230_channel_table1[CB_MAX_CHANNEL][3] = {
{0x13, 0x33, 0x31}, // channel = 1, Tf = 2412MHz
{0x1B, 0x33, 0x31}, // channel = 2, Tf = 2417MHz
{0x03, 0x33, 0x31}, // channel = 3, Tf = 2422MHz
@@ -287,7 +290,7 @@ u8 abyAL7230ChannelTable1[CB_MAX_CHANNEL][3] = {
{0x02, 0xAA, 0xB1} // channel = 165, Tf = 5825MHz (56)
};
-u8 abyAL7230ChannelTable2[CB_MAX_CHANNEL][3] = {
+static u8 al7230_channel_table2[CB_MAX_CHANNEL][3] = {
{0x7F, 0xD7, 0x84}, // channel = 1, Tf = 2412MHz
{0x7F, 0xD7, 0x84}, // channel = 2, Tf = 2417MHz
{0x7F, 0xD7, 0x84}, // channel = 3, Tf = 2422MHz
@@ -352,7 +355,7 @@ u8 abyAL7230ChannelTable2[CB_MAX_CHANNEL][3] = {
};
///{{RobertYu:20051111
-u8 abyVT3226_InitTable[CB_VT3226_INIT_SEQ][3] = {
+static u8 at3226_init_table[CB_VT3226_INIT_SEQ][3] = {
{0x03, 0xFF, 0x80},
{0x02, 0x82, 0xA1},
{0x03, 0xC6, 0xA2},
@@ -366,7 +369,7 @@ u8 abyVT3226_InitTable[CB_VT3226_INIT_SEQ][3] = {
{0x02, 0x00, 0x2A}
};
-u8 abyVT3226D0_InitTable[CB_VT3226_INIT_SEQ][3] = {
+static u8 at3226d0_init_table[CB_VT3226_INIT_SEQ][3] = {
{0x03, 0xFF, 0x80},
{0x03, 0x02, 0x21}, //RobertYu:20060327
{0x03, 0xC6, 0xA2},
@@ -380,7 +383,7 @@ u8 abyVT3226D0_InitTable[CB_VT3226_INIT_SEQ][3] = {
{0x02, 0x01, 0xAA} //RobertYu:20060523
};
-u8 abyVT3226_ChannelTable0[CB_MAX_CHANNEL_24G][3] = {
+static u8 vt3226_channel_table0[CB_MAX_CHANNEL_24G][3] = {
{0x01, 0x97, 0x83}, // channel = 1, Tf = 2412MHz
{0x01, 0x97, 0x83}, // channel = 2, Tf = 2417MHz
{0x01, 0x97, 0x93}, // channel = 3, Tf = 2422MHz
@@ -397,7 +400,7 @@ u8 abyVT3226_ChannelTable0[CB_MAX_CHANNEL_24G][3] = {
{0x03, 0x37, 0xC3} // channel = 14, Tf = 2484MHz
};
-u8 abyVT3226_ChannelTable1[CB_MAX_CHANNEL_24G][3] = {
+static u8 vt3226_channel_table1[CB_MAX_CHANNEL_24G][3] = {
{0x02, 0x66, 0x64}, // channel = 1, Tf = 2412MHz
{0x03, 0x66, 0x64}, // channel = 2, Tf = 2417MHz
{0x00, 0x66, 0x64}, // channel = 3, Tf = 2422MHz
@@ -416,7 +419,7 @@ u8 abyVT3226_ChannelTable1[CB_MAX_CHANNEL_24G][3] = {
///}}RobertYu
//{{RobertYu:20060502, TWIF 1.14, LO Current for 11b mode
-u32 dwVT3226D0LoCurrentTable[CB_MAX_CHANNEL_24G] = {
+const u32 vt3226d0_lo_current_table[CB_MAX_CHANNEL_24G] = {
0x0135C600+(BY_VT3226_REG_LEN<<3)+IFREGCTL_REGW, // channel = 1, Tf = 2412MHz
0x0135C600+(BY_VT3226_REG_LEN<<3)+IFREGCTL_REGW, // channel = 2, Tf = 2417MHz
0x0235C600+(BY_VT3226_REG_LEN<<3)+IFREGCTL_REGW, // channel = 3, Tf = 2422MHz
@@ -435,7 +438,7 @@ u32 dwVT3226D0LoCurrentTable[CB_MAX_CHANNEL_24G] = {
//}}
//{{RobertYu:20060609
-u8 abyVT3342A0_InitTable[CB_VT3342_INIT_SEQ][3] = { /* 11b/g mode */
+static u8 vt3342a0_init_table[CB_VT3342_INIT_SEQ][3] = { /* 11b/g mode */
{0x03, 0xFF, 0x80}, //update for mode//
{0x02, 0x08, 0x81},
{0x00, 0xC6, 0x02},
@@ -458,7 +461,7 @@ u8 abyVT3342A0_InitTable[CB_VT3342_INIT_SEQ][3] = { /* 11b/g mode */
// channel56, 5280MHz 0x00C402 for disable Frac
// other channels 0x00C602
-u8 abyVT3342_ChannelTable0[CB_MAX_CHANNEL][3] = {
+static u8 vt3342_channel_table0[CB_MAX_CHANNEL][3] = {
{0x02, 0x05, 0x03}, // channel = 1, Tf = 2412MHz
{0x01, 0x15, 0x03}, // channel = 2, Tf = 2417MHz
{0x03, 0xC5, 0x03}, // channel = 3, Tf = 2422MHz
@@ -524,7 +527,7 @@ u8 abyVT3342_ChannelTable0[CB_MAX_CHANNEL][3] = {
{0x00, 0x06, 0x03} // channel = 165, Tf = 5825MHz (56), TBD
};
-u8 abyVT3342_ChannelTable1[CB_MAX_CHANNEL][3] = {
+static u8 vt3342_channel_table1[CB_MAX_CHANNEL][3] = {
{0x01, 0x99, 0x94}, // channel = 1, Tf = 2412MHz
{0x02, 0x44, 0x44}, // channel = 2, Tf = 2417MHz
{0x02, 0xEE, 0xE4}, // channel = 3, Tf = 2422MHz
@@ -594,7 +597,7 @@ u8 abyVT3342_ChannelTable1[CB_MAX_CHANNEL][3] = {
*
-*/
-const u32 dwAL2230PowerTable[AL2230_PWR_IDX_LEN] = {
+const u32 al2230_power_table[AL2230_PWR_IDX_LEN] = {
0x04040900+(BY_AL2230_REG_LEN<<3)+IFREGCTL_REGW,
0x04041900+(BY_AL2230_REG_LEN<<3)+IFREGCTL_REGW,
0x04042900+(BY_AL2230_REG_LEN<<3)+IFREGCTL_REGW,
@@ -732,42 +735,41 @@ int IFRFbWriteEmbedded(struct vnt_private *pDevice, u32 dwData)
* Return Value: true if succeeded; false if failed.
*
*/
-int RFbSetPower(struct vnt_private *pDevice, u32 uRATE, u32 uCH)
+int RFbSetPower(struct vnt_private *priv, u32 rate, u32 channel)
{
- int bResult = true;
- u8 byPwr = pDevice->byCCKPwr;
+ int ret = true;
+ u8 power = priv->byCCKPwr;
- if (pDevice->dwDiagRefCount)
+ if (priv->dwDiagRefCount)
return true;
- if (uCH == 0)
+ if (channel == 0)
return -EINVAL;
- switch (uRATE) {
- case RATE_1M:
- case RATE_2M:
- case RATE_5M:
- case RATE_11M:
- byPwr = pDevice->abyCCKPwrTbl[uCH-1];
- break;
- case RATE_6M:
- case RATE_9M:
- case RATE_18M:
- case RATE_24M:
- case RATE_36M:
- case RATE_48M:
- case RATE_54M:
- if (uCH > CB_MAX_CHANNEL_24G) {
- byPwr = pDevice->abyOFDMAPwrTbl[uCH-15];
- } else {
- byPwr = pDevice->abyOFDMPwrTbl[uCH-1];
- }
- break;
- }
-
- bResult = RFbRawSetPower(pDevice, byPwr, uRATE);
-
- return bResult;
+ switch (rate) {
+ case RATE_1M:
+ case RATE_2M:
+ case RATE_5M:
+ case RATE_11M:
+ power = priv->abyCCKPwrTbl[channel-1];
+ break;
+ case RATE_6M:
+ case RATE_9M:
+ case RATE_18M:
+ case RATE_24M:
+ case RATE_36M:
+ case RATE_48M:
+ case RATE_54M:
+ if (channel > CB_MAX_CHANNEL_24G)
+ power = priv->abyOFDMAPwrTbl[channel-15];
+ else
+ power = priv->abyOFDMPwrTbl[channel-1];
+ break;
+ }
+
+ ret = RFbRawSetPower(priv, power, rate);
+
+ return ret;
}
/*
@@ -784,136 +786,146 @@ int RFbSetPower(struct vnt_private *pDevice, u32 uRATE, u32 uCH)
*
*/
-int RFbRawSetPower(struct vnt_private *pDevice, u8 byPwr, u32 uRATE)
+int RFbRawSetPower(struct vnt_private *priv, u8 power, u32 rate)
{
- int bResult = true;
-
- if (pDevice->byCurPwr == byPwr)
- return true;
-
- pDevice->byCurPwr = byPwr;
-
- switch (pDevice->byRFType) {
-
- case RF_AL2230 :
- if (pDevice->byCurPwr >= AL2230_PWR_IDX_LEN)
- return false;
- bResult &= IFRFbWriteEmbedded(pDevice, dwAL2230PowerTable[pDevice->byCurPwr]);
- if (uRATE <= RATE_11M)
- bResult &= IFRFbWriteEmbedded(pDevice, 0x0001B400+(BY_AL2230_REG_LEN<<3)+IFREGCTL_REGW);
- else
- bResult &= IFRFbWriteEmbedded(pDevice, 0x0005A400+(BY_AL2230_REG_LEN<<3)+IFREGCTL_REGW);
- break;
-
- case RF_AL2230S :
- if (pDevice->byCurPwr >= AL2230_PWR_IDX_LEN)
- return false;
- bResult &= IFRFbWriteEmbedded(pDevice, dwAL2230PowerTable[pDevice->byCurPwr]);
- if (uRATE <= RATE_11M) {
- bResult &= IFRFbWriteEmbedded(pDevice, 0x040C1400+(BY_AL2230_REG_LEN<<3)+IFREGCTL_REGW);
- bResult &= IFRFbWriteEmbedded(pDevice, 0x00299B00+(BY_AL2230_REG_LEN<<3)+IFREGCTL_REGW);
- }else {
- bResult &= IFRFbWriteEmbedded(pDevice, 0x0005A400+(BY_AL2230_REG_LEN<<3)+IFREGCTL_REGW);
- bResult &= IFRFbWriteEmbedded(pDevice, 0x00099B00+(BY_AL2230_REG_LEN<<3)+IFREGCTL_REGW);
- }
- break;
-
- case RF_AIROHA7230:
- {
- u32 dwMax7230Pwr;
-
- if (uRATE <= RATE_11M) { //RobertYu:20060426, for better 11b mask
- bResult &= IFRFbWriteEmbedded(pDevice, 0x111BB900+(BY_AL7230_REG_LEN<<3)+IFREGCTL_REGW);
- }
- else {
- bResult &= IFRFbWriteEmbedded(pDevice, 0x221BB900+(BY_AL7230_REG_LEN<<3)+IFREGCTL_REGW);
- }
-
- if (pDevice->byCurPwr > AL7230_PWR_IDX_LEN) return false;
-
- // 0x080F1B00 for 3 wire control TxGain(D10) and 0x31 as TX Gain value
- dwMax7230Pwr = 0x080C0B00 | ( (pDevice->byCurPwr) << 12 ) |
- (BY_AL7230_REG_LEN << 3 ) | IFREGCTL_REGW;
-
- bResult &= IFRFbWriteEmbedded(pDevice, dwMax7230Pwr);
- break;
- }
- break;
-
- case RF_VT3226: //RobertYu:20051111, VT3226C0 and before
- {
- u32 dwVT3226Pwr;
-
- if (pDevice->byCurPwr >= VT3226_PWR_IDX_LEN)
- return false;
- dwVT3226Pwr = ((0x3F-pDevice->byCurPwr) << 20 ) | ( 0x17 << 8 ) /* Reg7 */ |
- (BY_VT3226_REG_LEN << 3 ) | IFREGCTL_REGW;
- bResult &= IFRFbWriteEmbedded(pDevice, dwVT3226Pwr);
- break;
- }
-
- case RF_VT3226D0: //RobertYu:20051228
- {
- u32 dwVT3226Pwr;
-
- if (pDevice->byCurPwr >= VT3226_PWR_IDX_LEN)
- return false;
-
- if (uRATE <= RATE_11M) {
-
- dwVT3226Pwr = ((0x3F-pDevice->byCurPwr) << 20 ) | ( 0xE07 << 8 ) /* Reg7 */ | //RobertYu:20060420, TWIF 1.10
- (BY_VT3226_REG_LEN << 3 ) | IFREGCTL_REGW;
- bResult &= IFRFbWriteEmbedded(pDevice, dwVT3226Pwr);
-
- bResult &= IFRFbWriteEmbedded(pDevice, 0x03C6A200+(BY_VT3226_REG_LEN<<3)+IFREGCTL_REGW);
- if (pDevice->vnt_mgmt.eScanState != WMAC_NO_SCANNING) {
- /* scanning, channel number is pDevice->uScanChannel */
- DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO
+ u32 power_setting = 0;
+ int ret = true;
+
+ if (priv->byCurPwr == power)
+ return true;
+
+ priv->byCurPwr = power;
+
+ switch (priv->byRFType) {
+ case RF_AL2230:
+ if (priv->byCurPwr >= AL2230_PWR_IDX_LEN)
+ return false;
+
+ ret &= IFRFbWriteEmbedded(priv,
+ al2230_power_table[priv->byCurPwr]);
+
+ if (rate <= RATE_11M)
+ ret &= IFRFbWriteEmbedded(priv, 0x0001b400 +
+ (BY_AL2230_REG_LEN << 3) + IFREGCTL_REGW);
+ else
+ ret &= IFRFbWriteEmbedded(priv, 0x0005a400 +
+ (BY_AL2230_REG_LEN << 3) + IFREGCTL_REGW);
+ break;
+ case RF_AL2230S:
+ if (priv->byCurPwr >= AL2230_PWR_IDX_LEN)
+ return false;
+
+ ret &= IFRFbWriteEmbedded(priv,
+ al2230_power_table[priv->byCurPwr]);
+
+ if (rate <= RATE_11M) {
+ ret &= IFRFbWriteEmbedded(priv, 0x040c1400 +
+ (BY_AL2230_REG_LEN << 3) + IFREGCTL_REGW);
+ ret &= IFRFbWriteEmbedded(priv, 0x00299b00 +
+ (BY_AL2230_REG_LEN << 3) + IFREGCTL_REGW);
+ } else {
+ ret &= IFRFbWriteEmbedded(priv, 0x0005a400 +
+ (BY_AL2230_REG_LEN << 3) + IFREGCTL_REGW);
+ ret &= IFRFbWriteEmbedded(priv, 0x00099b00 +
+ (BY_AL2230_REG_LEN << 3) + IFREGCTL_REGW);
+ }
+ break;
+
+ case RF_AIROHA7230:
+ if (rate <= RATE_11M)
+ ret &= IFRFbWriteEmbedded(priv, 0x111bb900 +
+ (BY_AL7230_REG_LEN << 3)+IFREGCTL_REGW);
+ else
+ ret &= IFRFbWriteEmbedded(priv, 0x221bb900 +
+ (BY_AL7230_REG_LEN << 3)+IFREGCTL_REGW);
+
+ if (priv->byCurPwr > AL7230_PWR_IDX_LEN)
+ return false;
+
+ /*
+ * 0x080F1B00 for 3 wire control TxGain(D10)
+ * and 0x31 as TX Gain value
+ */
+ power_setting = 0x080c0b00 | ((priv->byCurPwr) << 12) |
+ (BY_AL7230_REG_LEN << 3) | IFREGCTL_REGW;
+
+ ret &= IFRFbWriteEmbedded(priv, power_setting);
+
+ break;
+
+ case RF_VT3226:
+ if (priv->byCurPwr >= VT3226_PWR_IDX_LEN)
+ return false;
+ power_setting = ((0x3f - priv->byCurPwr) << 20) | (0x17 << 8) |
+ (BY_VT3226_REG_LEN << 3) | IFREGCTL_REGW;
+
+ ret &= IFRFbWriteEmbedded(priv, power_setting);
+
+ break;
+ case RF_VT3226D0:
+ if (priv->byCurPwr >= VT3226_PWR_IDX_LEN)
+ return false;
+
+ if (rate <= RATE_11M) {
+ power_setting = ((0x3f-priv->byCurPwr) << 20) |
+ (0xe07 << 8) | (BY_VT3226_REG_LEN << 3) |
+ IFREGCTL_REGW;
+
+ ret &= IFRFbWriteEmbedded(priv, power_setting);
+ ret &= IFRFbWriteEmbedded(priv, 0x03c6a200 +
+ (BY_VT3226_REG_LEN<<3)+IFREGCTL_REGW);
+
+ if (priv->vnt_mgmt.eScanState != WMAC_NO_SCANNING) {
+ DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO
+ "RFbRawSetPower> 11B mode uCurrChannel[%d]\n",
+ priv->vnt_mgmt.uScanChannel);
+ ret &= IFRFbWriteEmbedded(priv,
+ vt3226d0_lo_current_table[priv->
+ vnt_mgmt.uScanChannel - 1]);
+ } else {
+ DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO
"RFbRawSetPower> 11B mode uCurrChannel[%d]\n",
- pDevice->vnt_mgmt.uScanChannel);
- bResult &= IFRFbWriteEmbedded(pDevice,
- dwVT3226D0LoCurrentTable[pDevice->
- vnt_mgmt.uScanChannel - 1]);
+ priv->vnt_mgmt.uCurrChannel);
+ ret &= IFRFbWriteEmbedded(priv,
+ vt3226d0_lo_current_table[priv->
+ vnt_mgmt.uCurrChannel - 1]);
+ }
+
+ ret &= IFRFbWriteEmbedded(priv, 0x015C0800 +
+ (BY_VT3226_REG_LEN<<3)+IFREGCTL_REGW);
} else {
DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO
- "RFbRawSetPower> 11B mode uCurrChannel[%d]\n",
- pDevice->vnt_mgmt.uCurrChannel);
- bResult &= IFRFbWriteEmbedded(pDevice,
- dwVT3226D0LoCurrentTable[pDevice->
- vnt_mgmt.uCurrChannel - 1]);
+ "@@@@ RFbRawSetPower> 11G mode\n");
+
+ power_setting = ((0x3f-priv->byCurPwr) << 20) |
+ (0x7 << 8) | (BY_VT3226_REG_LEN << 3) |
+ IFREGCTL_REGW;
+
+ ret &= IFRFbWriteEmbedded(priv, power_setting);
+ ret &= IFRFbWriteEmbedded(priv, 0x00C6A200 +
+ (BY_VT3226_REG_LEN << 3) + IFREGCTL_REGW);
+ ret &= IFRFbWriteEmbedded(priv, 0x016BC600 +
+ (BY_VT3226_REG_LEN<<3)+IFREGCTL_REGW);
+ ret &= IFRFbWriteEmbedded(priv, 0x00900800 +
+ (BY_VT3226_REG_LEN<<3)+IFREGCTL_REGW);
}
+ break;
+
+ case RF_VT3342A0:
+ if (priv->byCurPwr >= VT3342_PWR_IDX_LEN)
+ return false;
+
+ power_setting = ((0x3F-priv->byCurPwr) << 20) |
+ (0x27 << 8) | (BY_VT3342_REG_LEN << 3) |
+ IFREGCTL_REGW;
- bResult &= IFRFbWriteEmbedded(pDevice, 0x015C0800+(BY_VT3226_REG_LEN<<3)+IFREGCTL_REGW); //RobertYu:20060420, ok now, new switching power (mini-pci can have bigger power consumption)
- } else {
- DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO"@@@@ RFbRawSetPower> 11G mode\n");
- dwVT3226Pwr = ((0x3F-pDevice->byCurPwr) << 20 ) | ( 0x7 << 8 ) /* Reg7 */ | //RobertYu:20060420, TWIF 1.10
- (BY_VT3226_REG_LEN << 3 ) | IFREGCTL_REGW;
- bResult &= IFRFbWriteEmbedded(pDevice, dwVT3226Pwr);
- bResult &= IFRFbWriteEmbedded(pDevice, 0x00C6A200+(BY_VT3226_REG_LEN<<3)+IFREGCTL_REGW); //RobertYu:20060327
- bResult &= IFRFbWriteEmbedded(pDevice, 0x016BC600+(BY_VT3226_REG_LEN<<3)+IFREGCTL_REGW); //RobertYu:20060111
- bResult &= IFRFbWriteEmbedded(pDevice, 0x00900800+(BY_VT3226_REG_LEN<<3)+IFREGCTL_REGW); //RobertYu:20060111
- }
- break;
- }
-
- //{{RobertYu:20060609
- case RF_VT3342A0:
- {
- u32 dwVT3342Pwr;
-
- if (pDevice->byCurPwr >= VT3342_PWR_IDX_LEN)
- return false;
-
- dwVT3342Pwr = ((0x3F-pDevice->byCurPwr) << 20 ) | ( 0x27 << 8 ) /* Reg7 */ |
- (BY_VT3342_REG_LEN << 3 ) | IFREGCTL_REGW;
- bResult &= IFRFbWriteEmbedded(pDevice, dwVT3342Pwr);
- break;
- }
-
- default :
- break;
- }
- return bResult;
+ ret &= IFRFbWriteEmbedded(priv, power_setting);
+
+ break;
+ default:
+ break;
+ }
+ return ret;
}
/*+
@@ -931,169 +943,150 @@ int RFbRawSetPower(struct vnt_private *pDevice, u8 byPwr, u32 uRATE)
* Return Value: none
*
-*/
-void RFvRSSITodBm(struct vnt_private *pDevice, u8 byCurrRSSI, long *pldBm)
+void RFvRSSITodBm(struct vnt_private *priv, u8 rssi, long *dbm)
{
- u8 byIdx = (((byCurrRSSI & 0xC0) >> 6) & 0x03);
- signed long b = (byCurrRSSI & 0x3F);
- signed long a = 0;
- u8 abyAIROHARF[4] = {0, 18, 0, 40};
-
- switch (pDevice->byRFType) {
- case RF_AL2230:
- case RF_AL2230S:
- case RF_AIROHA7230:
- case RF_VT3226: //RobertYu:20051111
- case RF_VT3226D0:
- case RF_VT3342A0: //RobertYu:20060609
- a = abyAIROHARF[byIdx];
- break;
- default:
- break;
- }
-
- *pldBm = -1 * (a + b * 2);
+ u8 idx = (((rssi & 0xc0) >> 6) & 0x03);
+ long b = (rssi & 0x3f);
+ long a = 0;
+ u8 airoharf[4] = {0, 18, 0, 40};
+
+ switch (priv->byRFType) {
+ case RF_AL2230:
+ case RF_AL2230S:
+ case RF_AIROHA7230:
+ case RF_VT3226:
+ case RF_VT3226D0:
+ case RF_VT3342A0:
+ a = airoharf[idx];
+ break;
+ default:
+ break;
+ }
+
+ *dbm = -1 * (a + b * 2);
}
-void RFbRFTableDownload(struct vnt_private *pDevice)
+void RFbRFTableDownload(struct vnt_private *priv)
{
- u16 wLength1 = 0, wLength2 = 0, wLength3 = 0;
- u8 *pbyAddr1 = NULL, *pbyAddr2 = NULL, *pbyAddr3 = NULL;
- u16 wLength, wValue;
- u8 abyArray[256];
-
- switch ( pDevice->byRFType ) {
- case RF_AL2230:
- case RF_AL2230S:
- wLength1 = CB_AL2230_INIT_SEQ * 3;
- wLength2 = CB_MAX_CHANNEL_24G * 3;
- wLength3 = CB_MAX_CHANNEL_24G * 3;
- pbyAddr1 = &(abyAL2230InitTable[0][0]);
- pbyAddr2 = &(abyAL2230ChannelTable0[0][0]);
- pbyAddr3 = &(abyAL2230ChannelTable1[0][0]);
- break;
- case RF_AIROHA7230:
- wLength1 = CB_AL7230_INIT_SEQ * 3;
- wLength2 = CB_MAX_CHANNEL * 3;
- wLength3 = CB_MAX_CHANNEL * 3;
- pbyAddr1 = &(abyAL7230InitTable[0][0]);
- pbyAddr2 = &(abyAL7230ChannelTable0[0][0]);
- pbyAddr3 = &(abyAL7230ChannelTable1[0][0]);
- break;
- case RF_VT3226: //RobertYu:20051111
- wLength1 = CB_VT3226_INIT_SEQ * 3;
- wLength2 = CB_MAX_CHANNEL_24G * 3;
- wLength3 = CB_MAX_CHANNEL_24G * 3;
- pbyAddr1 = &(abyVT3226_InitTable[0][0]);
- pbyAddr2 = &(abyVT3226_ChannelTable0[0][0]);
- pbyAddr3 = &(abyVT3226_ChannelTable1[0][0]);
- break;
- case RF_VT3226D0: //RobertYu:20051114
- wLength1 = CB_VT3226_INIT_SEQ * 3;
- wLength2 = CB_MAX_CHANNEL_24G * 3;
- wLength3 = CB_MAX_CHANNEL_24G * 3;
- pbyAddr1 = &(abyVT3226D0_InitTable[0][0]);
- pbyAddr2 = &(abyVT3226_ChannelTable0[0][0]);
- pbyAddr3 = &(abyVT3226_ChannelTable1[0][0]);
- break;
- case RF_VT3342A0: //RobertYu:20060609
- wLength1 = CB_VT3342_INIT_SEQ * 3;
- wLength2 = CB_MAX_CHANNEL * 3;
- wLength3 = CB_MAX_CHANNEL * 3;
- pbyAddr1 = &(abyVT3342A0_InitTable[0][0]);
- pbyAddr2 = &(abyVT3342_ChannelTable0[0][0]);
- pbyAddr3 = &(abyVT3342_ChannelTable1[0][0]);
- break;
-
- }
- //Init Table
-
- memcpy(abyArray, pbyAddr1, wLength1);
- CONTROLnsRequestOut(pDevice,
- MESSAGE_TYPE_WRITE,
- 0,
- MESSAGE_REQUEST_RF_INIT,
- wLength1,
- abyArray
- );
- //Channel Table 0
- wValue = 0;
- while ( wLength2 > 0 ) {
-
- if ( wLength2 >= 64 ) {
- wLength = 64;
- } else {
- wLength = wLength2;
- }
- memcpy(abyArray, pbyAddr2, wLength);
- CONTROLnsRequestOut(pDevice,
- MESSAGE_TYPE_WRITE,
- wValue,
- MESSAGE_REQUEST_RF_CH0,
- wLength,
- abyArray);
-
- wLength2 -= wLength;
- wValue += wLength;
- pbyAddr2 += wLength;
- }
- //Channel table 1
- wValue = 0;
- while ( wLength3 > 0 ) {
-
- if ( wLength3 >= 64 ) {
- wLength = 64;
- } else {
- wLength = wLength3;
- }
- memcpy(abyArray, pbyAddr3, wLength);
- CONTROLnsRequestOut(pDevice,
- MESSAGE_TYPE_WRITE,
- wValue,
- MESSAGE_REQUEST_RF_CH1,
- wLength,
- abyArray);
-
- wLength3 -= wLength;
- wValue += wLength;
- pbyAddr3 += wLength;
- }
-
- //7230 needs 2 InitTable and 3 Channel Table
- if ( pDevice->byRFType == RF_AIROHA7230 ) {
- wLength1 = CB_AL7230_INIT_SEQ * 3;
- wLength2 = CB_MAX_CHANNEL * 3;
- pbyAddr1 = &(abyAL7230InitTableAMode[0][0]);
- pbyAddr2 = &(abyAL7230ChannelTable2[0][0]);
- memcpy(abyArray, pbyAddr1, wLength1);
- //Init Table 2
- CONTROLnsRequestOut(pDevice,
- MESSAGE_TYPE_WRITE,
- 0,
- MESSAGE_REQUEST_RF_INIT2,
- wLength1,
- abyArray);
-
- //Channel Table 0
- wValue = 0;
- while ( wLength2 > 0 ) {
-
- if ( wLength2 >= 64 ) {
- wLength = 64;
- } else {
- wLength = wLength2;
- }
- memcpy(abyArray, pbyAddr2, wLength);
- CONTROLnsRequestOut(pDevice,
- MESSAGE_TYPE_WRITE,
- wValue,
- MESSAGE_REQUEST_RF_CH2,
- wLength,
- abyArray);
-
- wLength2 -= wLength;
- wValue += wLength;
- pbyAddr2 += wLength;
- }
- }
-
+ u16 length1 = 0, length2 = 0, length3 = 0;
+ u8 *addr1 = NULL, *addr2 = NULL, *addr3 = NULL;
+ u16 length, value;
+ u8 array[256];
+
+ switch (priv->byRFType) {
+ case RF_AL2230:
+ case RF_AL2230S:
+ length1 = CB_AL2230_INIT_SEQ * 3;
+ length2 = CB_MAX_CHANNEL_24G * 3;
+ length3 = CB_MAX_CHANNEL_24G * 3;
+ addr1 = &al2230_init_table[0][0];
+ addr2 = &al2230_channel_table0[0][0];
+ addr3 = &al2230_channel_table1[0][0];
+ break;
+ case RF_AIROHA7230:
+ length1 = CB_AL7230_INIT_SEQ * 3;
+ length2 = CB_MAX_CHANNEL * 3;
+ length3 = CB_MAX_CHANNEL * 3;
+ addr1 = &al7230_init_table[0][0];
+ addr2 = &al7230_channel_table0[0][0];
+ addr3 = &al7230_channel_table1[0][0];
+ break;
+ case RF_VT3226:
+ length1 = CB_VT3226_INIT_SEQ * 3;
+ length2 = CB_MAX_CHANNEL_24G * 3;
+ length3 = CB_MAX_CHANNEL_24G * 3;
+ addr1 = &at3226_init_table[0][0];
+ addr2 = &vt3226_channel_table0[0][0];
+ addr3 = &vt3226_channel_table1[0][0];
+ break;
+ case RF_VT3226D0:
+ length1 = CB_VT3226_INIT_SEQ * 3;
+ length2 = CB_MAX_CHANNEL_24G * 3;
+ length3 = CB_MAX_CHANNEL_24G * 3;
+ addr1 = &at3226d0_init_table[0][0];
+ addr2 = &vt3226_channel_table0[0][0];
+ addr3 = &vt3226_channel_table1[0][0];
+ break;
+ case RF_VT3342A0:
+ length1 = CB_VT3342_INIT_SEQ * 3;
+ length2 = CB_MAX_CHANNEL * 3;
+ length3 = CB_MAX_CHANNEL * 3;
+ addr1 = &vt3342a0_init_table[0][0];
+ addr2 = &vt3342_channel_table0[0][0];
+ addr3 = &vt3342_channel_table1[0][0];
+ break;
+ }
+
+ /* Init Table */
+ memcpy(array, addr1, length1);
+
+ CONTROLnsRequestOut(priv, MESSAGE_TYPE_WRITE, 0,
+ MESSAGE_REQUEST_RF_INIT, length1, array);
+
+ /* Channel Table 0 */
+ value = 0;
+ while (length2 > 0) {
+ if (length2 >= 64)
+ length = 64;
+ else
+ length = length2;
+
+ memcpy(array, addr2, length);
+
+ CONTROLnsRequestOut(priv, MESSAGE_TYPE_WRITE,
+ value, MESSAGE_REQUEST_RF_CH0, length, array);
+
+ length2 -= length;
+ value += length;
+ addr2 += length;
+ }
+
+ /* Channel table 1 */
+ value = 0;
+ while (length3 > 0) {
+ if (length3 >= 64)
+ length = 64;
+ else
+ length = length3;
+
+ memcpy(array, addr3, length);
+
+ CONTROLnsRequestOut(priv, MESSAGE_TYPE_WRITE,
+ value, MESSAGE_REQUEST_RF_CH1, length, array);
+
+ length3 -= length;
+ value += length;
+ addr3 += length;
+ }
+
+ if (priv->byRFType == RF_AIROHA7230) {
+ length1 = CB_AL7230_INIT_SEQ * 3;
+ length2 = CB_MAX_CHANNEL * 3;
+ addr1 = &(al7230_init_table_amode[0][0]);
+ addr2 = &(al7230_channel_table2[0][0]);
+
+ memcpy(array, addr1, length1);
+
+ /* Init Table 2 */
+ CONTROLnsRequestOut(priv, MESSAGE_TYPE_WRITE,
+ 0, MESSAGE_REQUEST_RF_INIT2, length1, array);
+
+ /* Channel Table 0 */
+ value = 0;
+ while (length2 > 0) {
+ if (length2 >= 64)
+ length = 64;
+ else
+ length = length2;
+
+ memcpy(array, addr2, length);
+
+ CONTROLnsRequestOut(priv, MESSAGE_TYPE_WRITE,
+ value, MESSAGE_REQUEST_RF_CH2, length, array);
+
+ length2 -= length;
+ value += length;
+ addr2 += length;
+ }
+ }
}
diff --git a/drivers/staging/vt6656/tether.h b/drivers/staging/vt6656/tether.h
index 24465cfe3e6d..aec6b568a4a9 100644
--- a/drivers/staging/vt6656/tether.h
+++ b/drivers/staging/vt6656/tether.h
@@ -99,16 +99,6 @@
#define WEP_IV_MASK 0x00FFFFFF
-//
-// 802_3 packet
-//
-typedef struct tagS802_3Header {
- u8 abyDstAddr[ETH_ALEN];
- u8 abySrcAddr[ETH_ALEN];
- u16 wLen;
-} __attribute__ ((__packed__))
-S802_3Header, *PS802_3Header;
-
//u8 ETHbyGetHashIndexByCrc(u8 * pbyMultiAddr);
bool ETHbIsBufferCrc32Ok(u8 * pbyBuffer, unsigned int cbFrameLength);
diff --git a/drivers/staging/vt6656/tmacro.h b/drivers/staging/vt6656/tmacro.h
index 15cd5abb8004..15e724e4d4ba 100644
--- a/drivers/staging/vt6656/tmacro.h
+++ b/drivers/staging/vt6656/tmacro.h
@@ -45,14 +45,8 @@
#define HIWORD(d) ((u16)((((u32)(d)) >> 16) & 0xFFFF))
#endif
-#define LODWORD(q) ((q).u.dwLowDword)
-#define HIDWORD(q) ((q).u.dwHighDword)
-
#if !defined(MAKEWORD)
#define MAKEWORD(lb, hb) ((u16)(((u8)(lb)) | (((u16)((u8)(hb))) << 8)))
#endif
-#if !defined(MAKEDWORD)
-#define MAKEDWORD(lw, hw) ((u32)(((u16)(lw)) | (((u32)((u16)(hw))) << 16)))
-#endif
#endif /* __TMACRO_H__ */
diff --git a/drivers/staging/winbond/phy_calibration.c b/drivers/staging/winbond/phy_calibration.c
index cabae3466704..cfbfbbb53866 100644
--- a/drivers/staging/winbond/phy_calibration.c
+++ b/drivers/staging/winbond/phy_calibration.c
@@ -296,7 +296,7 @@ void _sin_cos(s32 angle, s32 *sin, s32 *cos)
}
}
-static unsigned char hal_get_dxx_reg(struct hw_data *pHwData, u16 number, u32 * pValue)
+static unsigned char hal_get_dxx_reg(struct hw_data *pHwData, u16 number, u32 *pValue)
{
if (number < 0x1000)
number += 0x1000;
diff --git a/drivers/staging/winbond/reg.c b/drivers/staging/winbond/reg.c
index 5ecf9a121e78..75b775252af1 100644
--- a/drivers/staging/winbond/reg.c
+++ b/drivers/staging/winbond/reg.c
@@ -920,20 +920,20 @@ void Uxx_power_on_procedure(struct hw_data *pHwData)
Wb35Reg_WriteSync(pHwData, 0x03f8, 0x7ff);
}
-void Set_ChanIndep_RfData_al7230_24(struct hw_data *pHwData, u32 *pltmp , char number)
+static void Set_ChanIndep_RfData_al7230_24(struct hw_data *pHwData, u32 *pltmp,
+ char number)
{
u8 i;
-
for (i = 0; i < number; i++) {
pHwData->phy_para[i] = al7230_rf_data_24[i];
pltmp[i] = (1 << 31) | (0 << 30) | (24 << 24) | (al7230_rf_data_24[i] & 0xffffff);
}
}
-void Set_ChanIndep_RfData_al7230_50(struct hw_data *pHwData, u32 *pltmp, char number)
+static void Set_ChanIndep_RfData_al7230_50(struct hw_data *pHwData, u32 *pltmp,
+ char number)
{
u8 i;
-
for (i = 0; i < number; i++) {
pHwData->phy_para[i] = al7230_rf_data_50[i];
pltmp[i] = (1 << 31) | (0 << 30) | (24 << 24) | (al7230_rf_data_50[i] & 0xffffff);
@@ -1263,7 +1263,7 @@ void RFSynthesizer_initial(struct hw_data *pHwData)
}
}
-void BBProcessor_AL7230_2400(struct hw_data *pHwData)
+static void BBProcessor_AL7230_2400(struct hw_data *pHwData)
{
struct wb35_reg *reg = &pHwData->reg;
u32 pltmp[12];
@@ -1304,7 +1304,7 @@ void BBProcessor_AL7230_2400(struct hw_data *pHwData)
Wb35Reg_BurstWrite(pHwData, 0x1030, pltmp, 12, AUTO_INCREMENT);
}
-void BBProcessor_AL7230_5000(struct hw_data *pHwData)
+static void BBProcessor_AL7230_5000(struct hw_data *pHwData)
{
struct wb35_reg *reg = &pHwData->reg;
u32 pltmp[12];
@@ -1620,22 +1620,24 @@ void BBProcessor_initial(struct hw_data *pHwData)
reg->SQ3_filter[i] = 0x2f; /* half of Bit 0 ~ 6 */
}
-void set_tx_power_per_channel_max2829(struct hw_data *pHwData, struct chan_info Channel)
+static inline void set_tx_power_per_channel_max2829(struct hw_data *pHwData,
+ struct chan_info Channel)
{
RFSynthesizer_SetPowerIndex(pHwData, 100);
}
-void set_tx_power_per_channel_al2230(struct hw_data *pHwData, struct chan_info Channel)
+static void set_tx_power_per_channel_al2230(struct hw_data *pHwData,
+ struct chan_info Channel)
{
u8 index = 100;
-
if (pHwData->TxVgaFor24[Channel.ChanNo - 1] != 0xff)
index = pHwData->TxVgaFor24[Channel.ChanNo - 1];
RFSynthesizer_SetPowerIndex(pHwData, index);
}
-void set_tx_power_per_channel_al7230(struct hw_data *pHwData, struct chan_info Channel)
+static void set_tx_power_per_channel_al7230(struct hw_data *pHwData,
+ struct chan_info Channel)
{
u8 i, index = 100;
@@ -1658,7 +1660,8 @@ void set_tx_power_per_channel_al7230(struct hw_data *pHwData, struct chan_info
RFSynthesizer_SetPowerIndex(pHwData, index);
}
-void set_tx_power_per_channel_wb242(struct hw_data *pHwData, struct chan_info Channel)
+static void set_tx_power_per_channel_wb242(struct hw_data *pHwData,
+ struct chan_info Channel)
{
u8 index = 100;
diff --git a/drivers/staging/winbond/wb35reg.c b/drivers/staging/winbond/wb35reg.c
index 1bff7d1c9a77..9be1b3b004b0 100644
--- a/drivers/staging/winbond/wb35reg.c
+++ b/drivers/staging/winbond/wb35reg.c
@@ -30,46 +30,46 @@ unsigned char Wb35Reg_BurstWrite(struct hw_data *pHwData, u16 RegisterNo, u32 *p
/* Trying to use burst write function if use new hardware */
UrbSize = sizeof(struct wb35_reg_queue) + DataSize + sizeof(struct usb_ctrlrequest);
reg_queue = kzalloc(UrbSize, GFP_ATOMIC);
+ if (reg_queue == NULL)
+ return false;
+
urb = usb_alloc_urb(0, GFP_ATOMIC);
- if (urb && reg_queue) {
- reg_queue->DIRECT = 2; /* burst write register */
- reg_queue->INDEX = RegisterNo;
- reg_queue->pBuffer = (u32 *)((u8 *)reg_queue + sizeof(struct wb35_reg_queue));
- memcpy(reg_queue->pBuffer, pRegisterData, DataSize);
- /* the function for reversing register data from little endian to big endian */
- for (i = 0; i < NumberOfData ; i++)
- reg_queue->pBuffer[i] = cpu_to_le32(reg_queue->pBuffer[i]);
-
- dr = (struct usb_ctrlrequest *)((u8 *)reg_queue + sizeof(struct wb35_reg_queue) + DataSize);
- dr->bRequestType = USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE;
- dr->bRequest = 0x04; /* USB or vendor-defined request code, burst mode */
- dr->wValue = cpu_to_le16(Flag); /* 0: Register number auto-increment, 1: No auto increment */
- dr->wIndex = cpu_to_le16(RegisterNo);
- dr->wLength = cpu_to_le16(DataSize);
- reg_queue->Next = NULL;
- reg_queue->pUsbReq = dr;
- reg_queue->urb = urb;
+ if (urb == NULL) {
+ kfree(reg_queue);
+ return false;
+ }
- spin_lock_irq(&reg->EP0VM_spin_lock);
- if (reg->reg_first == NULL)
- reg->reg_first = reg_queue;
- else
- reg->reg_last->Next = reg_queue;
- reg->reg_last = reg_queue;
+ reg_queue->DIRECT = 2; /* burst write register */
+ reg_queue->INDEX = RegisterNo;
+ reg_queue->pBuffer = (u32 *)((u8 *)reg_queue + sizeof(struct wb35_reg_queue));
+ memcpy(reg_queue->pBuffer, pRegisterData, DataSize);
+ /* the function for reversing register data from little endian to big endian */
+ for (i = 0; i < NumberOfData ; i++)
+ reg_queue->pBuffer[i] = cpu_to_le32(reg_queue->pBuffer[i]);
+
+ dr = (struct usb_ctrlrequest *)((u8 *)reg_queue + sizeof(struct wb35_reg_queue) + DataSize);
+ dr->bRequestType = USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE;
+ dr->bRequest = 0x04; /* USB or vendor-defined request code, burst mode */
+ dr->wValue = cpu_to_le16(Flag); /* 0: Register number auto-increment, 1: No auto increment */
+ dr->wIndex = cpu_to_le16(RegisterNo);
+ dr->wLength = cpu_to_le16(DataSize);
+ reg_queue->Next = NULL;
+ reg_queue->pUsbReq = dr;
+ reg_queue->urb = urb;
- spin_unlock_irq(&reg->EP0VM_spin_lock);
+ spin_lock_irq(&reg->EP0VM_spin_lock);
+ if (reg->reg_first == NULL)
+ reg->reg_first = reg_queue;
+ else
+ reg->reg_last->Next = reg_queue;
+ reg->reg_last = reg_queue;
- /* Start EP0VM */
- Wb35Reg_EP0VM_start(pHwData);
+ spin_unlock_irq(&reg->EP0VM_spin_lock);
- return true;
- } else {
- if (urb)
- usb_free_urb(urb);
- kfree(reg_queue);
- return false;
- }
- return false;
+ /* Start EP0VM */
+ Wb35Reg_EP0VM_start(pHwData);
+
+ return true;
}
void Wb35Reg_Update(struct hw_data *pHwData, u16 RegisterNo, u32 RegisterValue)
@@ -174,43 +174,44 @@ unsigned char Wb35Reg_Write(struct hw_data *pHwData, u16 RegisterNo, u32 Registe
/* update the register by send urb request */
UrbSize = sizeof(struct wb35_reg_queue) + sizeof(struct usb_ctrlrequest);
reg_queue = kzalloc(UrbSize, GFP_ATOMIC);
+ if (reg_queue == NULL)
+ return false;
+
urb = usb_alloc_urb(0, GFP_ATOMIC);
- if (urb && reg_queue) {
- reg_queue->DIRECT = 1; /* burst write register */
- reg_queue->INDEX = RegisterNo;
- reg_queue->VALUE = cpu_to_le32(RegisterValue);
- reg_queue->RESERVED_VALID = false;
- dr = (struct usb_ctrlrequest *)((u8 *)reg_queue + sizeof(struct wb35_reg_queue));
- dr->bRequestType = USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE;
- dr->bRequest = 0x03; /* USB or vendor-defined request code, burst mode */
- dr->wValue = cpu_to_le16(0x0);
- dr->wIndex = cpu_to_le16(RegisterNo);
- dr->wLength = cpu_to_le16(4);
-
- /* Enter the sending queue */
- reg_queue->Next = NULL;
- reg_queue->pUsbReq = dr;
- reg_queue->urb = urb;
+ if (urb == NULL) {
+ kfree(reg_queue);
+ return false;
+ }
- spin_lock_irq(&reg->EP0VM_spin_lock);
- if (reg->reg_first == NULL)
- reg->reg_first = reg_queue;
- else
- reg->reg_last->Next = reg_queue;
- reg->reg_last = reg_queue;
+ reg_queue->DIRECT = 1; /* burst write register */
+ reg_queue->INDEX = RegisterNo;
+ reg_queue->VALUE = cpu_to_le32(RegisterValue);
+ reg_queue->RESERVED_VALID = false;
+ dr = (struct usb_ctrlrequest *)((u8 *)reg_queue + sizeof(struct wb35_reg_queue));
+ dr->bRequestType = USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE;
+ dr->bRequest = 0x03; /* USB or vendor-defined request code, burst mode */
+ dr->wValue = cpu_to_le16(0x0);
+ dr->wIndex = cpu_to_le16(RegisterNo);
+ dr->wLength = cpu_to_le16(4);
+
+ /* Enter the sending queue */
+ reg_queue->Next = NULL;
+ reg_queue->pUsbReq = dr;
+ reg_queue->urb = urb;
- spin_unlock_irq(&reg->EP0VM_spin_lock);
+ spin_lock_irq(&reg->EP0VM_spin_lock);
+ if (reg->reg_first == NULL)
+ reg->reg_first = reg_queue;
+ else
+ reg->reg_last->Next = reg_queue;
+ reg->reg_last = reg_queue;
- /* Start EP0VM */
- Wb35Reg_EP0VM_start(pHwData);
+ spin_unlock_irq(&reg->EP0VM_spin_lock);
- return true;
- } else {
- if (urb)
- usb_free_urb(urb);
- kfree(reg_queue);
- return false;
- }
+ /* Start EP0VM */
+ Wb35Reg_EP0VM_start(pHwData);
+
+ return true;
}
/*
@@ -238,43 +239,45 @@ unsigned char Wb35Reg_WriteWithCallbackValue(struct hw_data *pHwData,
/* update the register by send urb request */
UrbSize = sizeof(struct wb35_reg_queue) + sizeof(struct usb_ctrlrequest);
reg_queue = kzalloc(UrbSize, GFP_ATOMIC);
- urb = usb_alloc_urb(0, GFP_ATOMIC);
- if (urb && reg_queue) {
- reg_queue->DIRECT = 1; /* burst write register */
- reg_queue->INDEX = RegisterNo;
- reg_queue->VALUE = cpu_to_le32(RegisterValue);
- /* NOTE : Users must guarantee the size of value will not exceed the buffer size. */
- memcpy(reg_queue->RESERVED, pValue, Len);
- reg_queue->RESERVED_VALID = true;
- dr = (struct usb_ctrlrequest *)((u8 *)reg_queue + sizeof(struct wb35_reg_queue));
- dr->bRequestType = USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE;
- dr->bRequest = 0x03; /* USB or vendor-defined request code, burst mode */
- dr->wValue = cpu_to_le16(0x0);
- dr->wIndex = cpu_to_le16(RegisterNo);
- dr->wLength = cpu_to_le16(4);
-
- /* Enter the sending queue */
- reg_queue->Next = NULL;
- reg_queue->pUsbReq = dr;
- reg_queue->urb = urb;
- spin_lock_irq(&reg->EP0VM_spin_lock);
- if (reg->reg_first == NULL)
- reg->reg_first = reg_queue;
- else
- reg->reg_last->Next = reg_queue;
- reg->reg_last = reg_queue;
-
- spin_unlock_irq(&reg->EP0VM_spin_lock);
+ if (reg_queue == NULL)
+ return false;
- /* Start EP0VM */
- Wb35Reg_EP0VM_start(pHwData);
- return true;
- } else {
- if (urb)
- usb_free_urb(urb);
+ urb = usb_alloc_urb(0, GFP_ATOMIC);
+ if (urb == NULL) {
kfree(reg_queue);
return false;
}
+
+ reg_queue->DIRECT = 1; /* burst write register */
+ reg_queue->INDEX = RegisterNo;
+ reg_queue->VALUE = cpu_to_le32(RegisterValue);
+ /* NOTE : Users must guarantee the size of value will not exceed the buffer size. */
+ memcpy(reg_queue->RESERVED, pValue, Len);
+ reg_queue->RESERVED_VALID = true;
+ dr = (struct usb_ctrlrequest *)((u8 *)reg_queue + sizeof(struct wb35_reg_queue));
+ dr->bRequestType = USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE;
+ dr->bRequest = 0x03; /* USB or vendor-defined request code, burst mode */
+ dr->wValue = cpu_to_le16(0x0);
+ dr->wIndex = cpu_to_le16(RegisterNo);
+ dr->wLength = cpu_to_le16(4);
+
+ /* Enter the sending queue */
+ reg_queue->Next = NULL;
+ reg_queue->pUsbReq = dr;
+ reg_queue->urb = urb;
+ spin_lock_irq(&reg->EP0VM_spin_lock);
+ if (reg->reg_first == NULL)
+ reg->reg_first = reg_queue;
+ else
+ reg->reg_last->Next = reg_queue;
+ reg->reg_last = reg_queue;
+
+ spin_unlock_irq(&reg->EP0VM_spin_lock);
+
+ /* Start EP0VM */
+ Wb35Reg_EP0VM_start(pHwData);
+
+ return true;
}
/*
@@ -344,41 +347,41 @@ unsigned char Wb35Reg_Read(struct hw_data *pHwData, u16 RegisterNo, u32 *pRegist
/* update the variable by send Urb to read register */
UrbSize = sizeof(struct wb35_reg_queue) + sizeof(struct usb_ctrlrequest);
reg_queue = kzalloc(UrbSize, GFP_ATOMIC);
- urb = usb_alloc_urb(0, GFP_ATOMIC);
- if (urb && reg_queue) {
- reg_queue->DIRECT = 0; /* read register */
- reg_queue->INDEX = RegisterNo;
- reg_queue->pBuffer = pRegisterValue;
- dr = (struct usb_ctrlrequest *)((u8 *)reg_queue + sizeof(struct wb35_reg_queue));
- dr->bRequestType = USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN;
- dr->bRequest = 0x01; /* USB or vendor-defined request code, burst mode */
- dr->wValue = cpu_to_le16(0x0);
- dr->wIndex = cpu_to_le16(RegisterNo);
- dr->wLength = cpu_to_le16(4);
-
- /* Enter the sending queue */
- reg_queue->Next = NULL;
- reg_queue->pUsbReq = dr;
- reg_queue->urb = urb;
- spin_lock_irq(&reg->EP0VM_spin_lock);
- if (reg->reg_first == NULL)
- reg->reg_first = reg_queue;
- else
- reg->reg_last->Next = reg_queue;
- reg->reg_last = reg_queue;
-
- spin_unlock_irq(&reg->EP0VM_spin_lock);
-
- /* Start EP0VM */
- Wb35Reg_EP0VM_start(pHwData);
+ if (reg_queue == NULL)
+ return false;
- return true;
- } else {
- if (urb)
- usb_free_urb(urb);
+ urb = usb_alloc_urb(0, GFP_ATOMIC);
+ if (urb == NULL) {
kfree(reg_queue);
return false;
}
+ reg_queue->DIRECT = 0; /* read register */
+ reg_queue->INDEX = RegisterNo;
+ reg_queue->pBuffer = pRegisterValue;
+ dr = (struct usb_ctrlrequest *)((u8 *)reg_queue + sizeof(struct wb35_reg_queue));
+ dr->bRequestType = USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN;
+ dr->bRequest = 0x01; /* USB or vendor-defined request code, burst mode */
+ dr->wValue = cpu_to_le16(0x0);
+ dr->wIndex = cpu_to_le16(RegisterNo);
+ dr->wLength = cpu_to_le16(4);
+
+ /* Enter the sending queue */
+ reg_queue->Next = NULL;
+ reg_queue->pUsbReq = dr;
+ reg_queue->urb = urb;
+ spin_lock_irq(&reg->EP0VM_spin_lock);
+ if (reg->reg_first == NULL)
+ reg->reg_first = reg_queue;
+ else
+ reg->reg_last->Next = reg_queue;
+ reg->reg_last = reg_queue;
+
+ spin_unlock_irq(&reg->EP0VM_spin_lock);
+
+ /* Start EP0VM */
+ Wb35Reg_EP0VM_start(pHwData);
+
+ return true;
}
diff --git a/drivers/staging/winbond/wb35rx.c b/drivers/staging/winbond/wb35rx.c
index f118eeba396a..8d71bc2f5940 100644
--- a/drivers/staging/winbond/wb35rx.c
+++ b/drivers/staging/winbond/wb35rx.c
@@ -343,8 +343,7 @@ void Wb35Rx_destroy(struct hw_data *pHwData)
} while (pWb35Rx->EP3vm_state != VM_STOP);
msleep(10); /* Delay for waiting function exit */
- if (pWb35Rx->RxUrb)
- usb_free_urb(pWb35Rx->RxUrb);
+ usb_free_urb(pWb35Rx->RxUrb);
pr_debug("Wb35Rx_destroy OK\n");
}
diff --git a/drivers/staging/wlags49_h2/wl_cs.c b/drivers/staging/wlags49_h2/wl_cs.c
index 7c7c77f9c862..b55dc43a1d11 100644
--- a/drivers/staging/wlags49_h2/wl_cs.c
+++ b/drivers/staging/wlags49_h2/wl_cs.c
@@ -133,6 +133,7 @@ static int wl_adapter_attach(struct pcmcia_device *link)
{
struct net_device *dev;
struct wl_private *lp;
+ int ret;
/*--------------------------------------------------------------------*/
DBG_FUNC("wl_adapter_attach");
@@ -154,10 +155,12 @@ static int wl_adapter_attach(struct pcmcia_device *link)
lp = wl_priv(dev);
lp->link = link;
- wl_adapter_insert(link);
+ ret = wl_adapter_insert(link);
+ if (ret != 0)
+ wl_device_dealloc(dev);
DBG_LEAVE(DbgInfo);
- return 0;
+ return ret;
} /* wl_adapter_attach */
/*============================================================================*/
@@ -224,7 +227,7 @@ static int wl_adapter_resume(struct pcmcia_device *link)
return 0;
} /* wl_adapter_resume */
-void wl_adapter_insert(struct pcmcia_device *link)
+int wl_adapter_insert(struct pcmcia_device *link)
{
struct net_device *dev;
int ret;
@@ -256,7 +259,8 @@ void wl_adapter_insert(struct pcmcia_device *link)
dev->base_addr = link->resource[0]->start;
SET_NETDEV_DEV(dev, &link->dev);
- if (register_netdev(dev) != 0) {
+ ret = register_netdev(dev);
+ if (ret != 0) {
printk("%s: register_netdev() failed\n", MODULE_NAME);
goto failed;
}
@@ -267,13 +271,13 @@ void wl_adapter_insert(struct pcmcia_device *link)
" %pM\n", dev->name, dev->base_addr, dev->irq, dev->dev_addr);
DBG_LEAVE(DbgInfo);
- return;
+ return 0;
failed:
wl_adapter_release(link);
DBG_LEAVE(DbgInfo);
- return;
+ return ret;
} /* wl_adapter_insert */
/*============================================================================*/
diff --git a/drivers/staging/wlags49_h2/wl_cs.h b/drivers/staging/wlags49_h2/wl_cs.h
index a7ab579759de..081cc6f28d1f 100644
--- a/drivers/staging/wlags49_h2/wl_cs.h
+++ b/drivers/staging/wlags49_h2/wl_cs.h
@@ -65,10 +65,10 @@
/*******************************************************************************
- * function protoypes
+ * function prototypes
******************************************************************************/
-void wl_adapter_insert(struct pcmcia_device *link);
+int wl_adapter_insert(struct pcmcia_device *link);
void wl_adapter_release(struct pcmcia_device *link);
diff --git a/drivers/staging/wlags49_h2/wl_main.c b/drivers/staging/wlags49_h2/wl_main.c
index f28f15baea96..43535610acc4 100644
--- a/drivers/staging/wlags49_h2/wl_main.c
+++ b/drivers/staging/wlags49_h2/wl_main.c
@@ -3171,7 +3171,9 @@ void wl_process_mailbox( struct wl_private *lp )
memset( ssid, 0, sizeof( ssid ));
strncpy( ssid, &probe_rsp->rawData[2],
- probe_rsp->rawData[1] );
+ min_t(u8,
+ probe_rsp->rawData[1],
+ HCF_MAX_NAME_LEN - 1));
DBG_TRACE( DbgInfo, "(%s) SSID : %s\n",
lp->dev->name, ssid );
diff --git a/drivers/staging/wlan-ng/prism2sta.c b/drivers/staging/wlan-ng/prism2sta.c
index 428a9be25010..76374b220228 100644
--- a/drivers/staging/wlan-ng/prism2sta.c
+++ b/drivers/staging/wlan-ng/prism2sta.c
@@ -1122,8 +1122,7 @@ static void prism2sta_inf_hostscanresults(wlandevice_t *wlandev,
kfree(hw->scanresults);
- hw->scanresults = kmalloc(sizeof(hfa384x_InfFrame_t), GFP_ATOMIC);
- memcpy(hw->scanresults, inf, sizeof(hfa384x_InfFrame_t));
+ hw->scanresults = kmemdup(inf, sizeof(hfa384x_InfFrame_t), GFP_ATOMIC);
if (nbss == 0)
nbss = -1;
diff --git a/drivers/staging/xgifb/vb_def.h b/drivers/staging/xgifb/vb_def.h
index 80c972305885..5c739bebd8a5 100644
--- a/drivers/staging/xgifb/vb_def.h
+++ b/drivers/staging/xgifb/vb_def.h
@@ -30,11 +30,6 @@
#define SetCRT2ToDualEdge 0x8000
#define ReserveTVOption 0x0008
-#define GatingCRT 0x0800
-#define DisableChB 0x1000
-#define EnableChB 0x2000
-#define DisableChA 0x4000
-#define EnableChA 0x8000
#define SetTVLowResolution 0x0400
#define TVSimuMode 0x0800
diff --git a/drivers/staging/xgifb/vb_init.c b/drivers/staging/xgifb/vb_init.c
index 19ce5a978cae..5f1c41ed778b 100644
--- a/drivers/staging/xgifb/vb_init.c
+++ b/drivers/staging/xgifb/vb_init.c
@@ -54,14 +54,12 @@ XGINew_GetXG20DRAMType(struct xgi_hw_device_info *HwDeviceExtension,
udelay(800);
xgifb_reg_or(pVBInfo->P3d4, 0x4A, 0x80); /* Enable GPIOH read */
/* GPIOF 0:DVI 1:DVO */
- temp = xgifb_reg_get(pVBInfo->P3d4, 0x48);
+ data = xgifb_reg_get(pVBInfo->P3d4, 0x48);
/* HOTPLUG_SUPPORT */
/* for current XG20 & XG21, GPIOH is floating, driver will
* fix DDR temporarily */
- if (temp & 0x01) /* DVI read GPIOH */
- data = 1; /* DDRII */
- else
- data = 0; /* DDR */
+ /* DVI read GPIOH */
+ data &= 0x01; /* 1=DDRII, 0=DDR */
/* ~HOTPLUG_SUPPORT */
xgifb_reg_or(pVBInfo->P3d4, 0xB4, 0x02);
return data;
@@ -1079,44 +1077,23 @@ static unsigned short XGINew_SenseLCD(struct xgi_hw_device_info
*HwDeviceExtension,
struct vb_device_info *pVBInfo)
{
- unsigned short temp;
-
- /* add lcd sense */
- if (HwDeviceExtension->ulCRT2LCDType == LCD_UNKNOWN) {
+ unsigned short temp = HwDeviceExtension->ulCRT2LCDType;
+
+ switch (HwDeviceExtension->ulCRT2LCDType) {
+ case LCD_640x480:
+ case LCD_1024x600:
+ case LCD_1152x864:
+ case LCD_1280x960:
+ case LCD_1152x768:
+ case LCD_1920x1440:
+ case LCD_2048x1536:
+ temp = 0; /* overwrite used ulCRT2LCDType */
+ break;
+ case LCD_UNKNOWN: /* unknown lcd, do nothing */
return 0;
- } else {
- temp = (unsigned short) HwDeviceExtension->ulCRT2LCDType;
- switch (HwDeviceExtension->ulCRT2LCDType) {
- case LCD_INVALID:
- case LCD_800x600:
- case LCD_1024x768:
- case LCD_1280x1024:
- break;
-
- case LCD_640x480:
- case LCD_1024x600:
- case LCD_1152x864:
- case LCD_1280x960:
- case LCD_1152x768:
- temp = 0;
- break;
-
- case LCD_1400x1050:
- case LCD_1280x768:
- case LCD_1600x1200:
- break;
-
- case LCD_1920x1440:
- case LCD_2048x1536:
- temp = 0;
- break;
-
- default:
- break;
- }
- xgifb_reg_and_or(pVBInfo->P3d4, 0x36, 0xF0, temp);
- return 1;
}
+ xgifb_reg_and_or(pVBInfo->P3d4, 0x36, 0xF0, temp);
+ return 1;
}
static void XGINew_GetXG21Sense(struct pci_dev *pdev,
@@ -1138,17 +1115,11 @@ static void XGINew_GetXG21Sense(struct pci_dev *pdev,
xgifb_reg_or(pVBInfo->P3d4, 0x32, LCDSense);
/* Enable read GPIOF */
xgifb_reg_and_or(pVBInfo->P3d4, 0x4A, ~0x20, 0x20);
- Temp = xgifb_reg_get(pVBInfo->P3d4, 0x48) & 0x04;
- if (!Temp)
- xgifb_reg_and_or(pVBInfo->P3d4,
- 0x38,
- ~0xE0,
- 0x80); /* TMDS on chip */
+ if (xgifb_reg_get(pVBInfo->P3d4, 0x48) & 0x04)
+ Temp = 0xA0; /* Only DVO on chip */
else
- xgifb_reg_and_or(pVBInfo->P3d4,
- 0x38,
- ~0xE0,
- 0xA0); /* Only DVO on chip */
+ Temp = 0x80; /* TMDS on chip */
+ xgifb_reg_and_or(pVBInfo->P3d4, 0x38, ~0xE0, Temp);
/* Disable read GPIOF */
xgifb_reg_and(pVBInfo->P3d4, 0x4A, ~0x20);
}
@@ -1206,9 +1177,7 @@ static unsigned char GetXG27FPBits(struct vb_device_info *pVBInfo)
/* enable GPIOA/B/C read */
xgifb_reg_and_or(pVBInfo->P3d4, 0x4A, ~0x03, 0x03);
temp = xgifb_reg_get(pVBInfo->P3d4, 0x48);
- if (temp <= 2)
- temp &= 0x03;
- else
+ if (temp > 2)
temp = ((temp & 0x04) >> 1) | ((~temp) & 0x01);
xgifb_reg_set(pVBInfo->P3d4, 0x4A, CR4A);
@@ -1216,6 +1185,14 @@ static unsigned char GetXG27FPBits(struct vb_device_info *pVBInfo)
return temp;
}
+static bool xgifb_bridge_is_on(struct vb_device_info *vb_info)
+{
+ u8 flag;
+
+ flag = xgifb_reg_get(vb_info->Part4Port, 0x00);
+ return flag == 1 || flag == 2;
+}
+
unsigned char XGIInitNew(struct pci_dev *pdev)
{
struct xgifb_video_info *xgifb_info = pci_get_drvdata(pdev);
@@ -1235,10 +1212,6 @@ unsigned char XGIInitNew(struct pci_dev *pdev)
outb(0x67, pVBInfo->P3c2);
- if (HwDeviceExtension->jChipType < XG20)
- /* Run XGI_GetVBType before InitTo330Pointer */
- XGI_GetVBType(pVBInfo);
-
InitTo330Pointer(HwDeviceExtension->jChipType, pVBInfo);
/* Openkey */
@@ -1327,7 +1300,6 @@ unsigned char XGIInitNew(struct pci_dev *pdev)
xgifb_reg_set(pVBInfo->Part1Port, 0x00, 0x00);
/* chk if BCLK>=100MHz */
temp1 = xgifb_reg_get(pVBInfo->P3d4, 0x7B);
- temp = (unsigned char) ((temp1 >> 4) & 0x0F);
xgifb_reg_set(pVBInfo->Part1Port,
0x02, XGI330_CRT2Data_1_2);
@@ -1353,7 +1325,7 @@ unsigned char XGIInitNew(struct pci_dev *pdev)
xgifb_reg_set(pVBInfo->P3c4, 0x33, XGI330_SR33);
if (HwDeviceExtension->jChipType < XG20) {
- if (XGI_BridgeIsOn(pVBInfo) == 1) {
+ if (xgifb_bridge_is_on(pVBInfo)) {
xgifb_reg_set(pVBInfo->Part2Port, 0x00, 0x1C);
xgifb_reg_set(pVBInfo->Part4Port,
0x0D, XGI330_CRT2Data_4_D);
diff --git a/drivers/staging/xgifb/vb_setmode.c b/drivers/staging/xgifb/vb_setmode.c
index 3adec3f18462..fcefe5b36cdd 100644
--- a/drivers/staging/xgifb/vb_setmode.c
+++ b/drivers/staging/xgifb/vb_setmode.c
@@ -35,6 +35,9 @@ void InitTo330Pointer(unsigned char ChipType, struct vb_device_info *pVBInfo)
pVBInfo->SR18 = XGI340_SR18;
pVBInfo->CR40 = XGI340_cr41;
+ if (ChipType < XG20)
+ XGI_GetVBType(pVBInfo);
+
/* 310 customization related */
if ((pVBInfo->VBType & VB_SIS301LV) || (pVBInfo->VBType & VB_SIS302LV))
pVBInfo->LCDCapList = XGI_LCDDLCapList;
@@ -180,66 +183,45 @@ static unsigned char XGI_AjustCRT2Rate(unsigned short ModeNo,
tempbx = XGI330_RefIndex[RefreshRateTableIndex + (*i)].ModeID;
tempax = 0;
- if (pVBInfo->IF_DEF_LVDS == 0) {
- if (pVBInfo->VBInfo & SetCRT2ToRAMDAC) {
- tempax |= SupportRAMDAC2;
-
- if (pVBInfo->VBType & VB_XGI301C)
- tempax |= SupportCRT2in301C;
- }
-
- /* 301b */
- if (pVBInfo->VBInfo & (SetCRT2ToLCD | XGI_SetCRT2ToLCDA)) {
- tempax |= SupportLCD;
+ if (pVBInfo->VBInfo & SetCRT2ToRAMDAC) {
+ tempax |= SupportRAMDAC2;
- if (pVBInfo->LCDResInfo != Panel_1280x1024 &&
- pVBInfo->LCDResInfo != Panel_1280x960 &&
- (pVBInfo->LCDInfo & LCDNonExpanding) &&
- resinfo >= 9)
- return 0;
- }
+ if (pVBInfo->VBType & VB_XGI301C)
+ tempax |= SupportCRT2in301C;
+ }
- if (pVBInfo->VBInfo & SetCRT2ToHiVision) { /* for HiTV */
- tempax |= SupportHiVision;
- if ((pVBInfo->VBInfo & SetInSlaveMode) &&
- ((resinfo == 4) ||
- (resinfo == 3 &&
- (pVBInfo->SetFlag & TVSimuMode)) ||
- (resinfo > 7)))
- return 0;
- } else if (pVBInfo->VBInfo & (SetCRT2ToAVIDEO |
- SetCRT2ToSVIDEO |
- SetCRT2ToSCART |
- SetCRT2ToYPbPr525750 |
- SetCRT2ToHiVision)) {
- tempax |= SupportTV;
-
- if (pVBInfo->VBType & (VB_SIS301B |
- VB_SIS302B |
- VB_SIS301LV |
- VB_SIS302LV |
- VB_XGI301C))
- tempax |= SupportTV1024;
-
- if (!(pVBInfo->VBInfo & TVSetPAL) &&
- (modeflag & NoSupportSimuTV) &&
- (pVBInfo->VBInfo & SetInSlaveMode) &&
- (!(pVBInfo->VBInfo & SetNotSimuMode)))
- return 0;
- }
- } else if (pVBInfo->VBInfo & SetCRT2ToLCD) { /* for LVDS */
+ /* 301b */
+ if (pVBInfo->VBInfo & (SetCRT2ToLCD | XGI_SetCRT2ToLCDA)) {
tempax |= SupportLCD;
- if (resinfo > 0x08)
- return 0; /* 1024x768 */
-
- if (pVBInfo->LCDResInfo < Panel_1024x768) {
- if (resinfo > 0x07)
- return 0; /* 800x600 */
+ if (pVBInfo->LCDResInfo != Panel_1280x1024 &&
+ pVBInfo->LCDResInfo != Panel_1280x960 &&
+ (pVBInfo->LCDInfo & LCDNonExpanding) &&
+ resinfo >= 9)
+ return 0;
+ }
- if (resinfo == 0x04)
- return 0; /* 512x384 */
- }
+ if (pVBInfo->VBInfo & SetCRT2ToHiVision) { /* for HiTV */
+ tempax |= SupportHiVision;
+ if ((pVBInfo->VBInfo & SetInSlaveMode) &&
+ ((resinfo == 4) ||
+ (resinfo == 3 && (pVBInfo->SetFlag & TVSimuMode)) ||
+ (resinfo > 7)))
+ return 0;
+ } else if (pVBInfo->VBInfo & (SetCRT2ToAVIDEO | SetCRT2ToSVIDEO |
+ SetCRT2ToSCART | SetCRT2ToYPbPr525750 |
+ SetCRT2ToHiVision)) {
+ tempax |= SupportTV;
+
+ if (pVBInfo->VBType & (VB_SIS301B | VB_SIS302B | VB_SIS301LV |
+ VB_SIS302LV | VB_XGI301C))
+ tempax |= SupportTV1024;
+
+ if (!(pVBInfo->VBInfo & TVSetPAL) &&
+ (modeflag & NoSupportSimuTV) &&
+ (pVBInfo->VBInfo & SetInSlaveMode) &&
+ (!(pVBInfo->VBInfo & SetNotSimuMode)))
+ return 0;
}
for (; XGI330_RefIndex[RefreshRateTableIndex + (*i)].ModeID ==
@@ -759,7 +741,6 @@ static void XGI_SetCRT1DE(struct xgi_hw_device_info *HwDeviceExtension,
xgifb_reg_and_or(pVBInfo->P3d4, 0x07, ~0x42, tempax);
data = xgifb_reg_get(pVBInfo->P3d4, 0x07);
- data &= 0xFF;
tempax = 0;
if (tempbx & 0x04)
@@ -914,16 +895,10 @@ static void XGI_SetCRT1VCLK(unsigned short ModeNo,
unsigned char index, data;
unsigned short vclkindex;
- if (pVBInfo->IF_DEF_LVDS == 1) {
- index = XGI330_RefIndex[RefreshRateTableIndex].Ext_CRTVCLK;
- data = xgifb_reg_get(pVBInfo->P3c4, 0x31) & 0xCF;
- xgifb_reg_set(pVBInfo->P3c4, 0x31, data);
- xgifb_reg_set(pVBInfo->P3c4, 0x2B, XGI_VCLKData[index].SR2B);
- xgifb_reg_set(pVBInfo->P3c4, 0x2C, XGI_VCLKData[index].SR2C);
- xgifb_reg_set(pVBInfo->P3c4, 0x2D, 0x01);
- } else if ((pVBInfo->VBType & (VB_SIS301B | VB_SIS302B | VB_SIS301LV
- | VB_SIS302LV | VB_XGI301C)) && (pVBInfo->VBInfo
- & XGI_SetCRT2ToLCDA)) {
+ if ((pVBInfo->IF_DEF_LVDS == 0) &&
+ (pVBInfo->VBType & (VB_SIS301B | VB_SIS302B | VB_SIS301LV |
+ VB_SIS302LV | VB_XGI301C)) &&
+ (pVBInfo->VBInfo & XGI_SetCRT2ToLCDA)) {
vclkindex = XGI_GetVCLK2Ptr(ModeNo, ModeIdIndex,
RefreshRateTableIndex, HwDeviceExtension,
pVBInfo);
@@ -1448,8 +1423,6 @@ static void XGI_GetLCDSync(unsigned short *HSyncWidth,
Index = XGI_GetLCDCapPtr(pVBInfo);
*HSyncWidth = pVBInfo->LCDCapList[Index].LCD_HSyncWidth;
*VSyncWidth = pVBInfo->LCDCapList[Index].LCD_VSyncWidth;
-
- return;
}
static void XGI_SetLVDSRegs(unsigned short ModeNo, unsigned short ModeIdIndex,
@@ -1589,10 +1562,8 @@ static void XGI_SetLVDSRegs(unsigned short ModeNo, unsigned short ModeIdIndex,
xgifb_reg_and_or(pVBInfo->Part1Port, 0x1a, 0x07,
tempax);
- tempcx = pVBInfo->VGAVT;
tempbx = pVBInfo->VDE;
tempax = pVBInfo->VGAVDE;
- tempcx -= tempax;
temp = tempax; /* 0430 ylshieh */
temp1 = (temp << 18) / tempbx;
@@ -1712,7 +1683,6 @@ static void XGI_GetLCDVCLKPtr(unsigned char *di_0, unsigned char *di_1,
*di_1 = pVBInfo->LCDCapList[index].LCDA_VCLKData2;
}
}
- return;
}
static unsigned char XGI_GetVCLKPtr(unsigned short RefreshRateTableIndex,
@@ -1907,8 +1877,6 @@ static void XGI_UpdateModeInfo(struct xgi_hw_device_info *HwDeviceExtension,
if (!(pVBInfo->SetFlag & ReserveTVOption))
xgifb_reg_set(pVBInfo->P3d4, 0x3e, tempch);
- } else {
- return;
}
}
@@ -1916,9 +1884,6 @@ void XGI_GetVBType(struct vb_device_info *pVBInfo)
{
unsigned short flag, tempbx, tempah;
- if (pVBInfo->IF_DEF_LVDS != 0)
- return;
-
tempbx = VB_SIS302B;
flag = xgifb_reg_get(pVBInfo->Part4Port, 0x00);
if (flag == 0x02)
@@ -1995,37 +1960,23 @@ static void XGI_GetVBInfo(unsigned short ModeNo, unsigned short ModeIdIndex,
}
}
- if (pVBInfo->IF_DEF_YPbPr == 1) {
- if (pVBInfo->VBType & (VB_SIS301LV|VB_SIS302LV|VB_XGI301C)) {
- if (temp & SetYPbPr) {
- if (pVBInfo->IF_DEF_HiVision == 1) {
- /* shampoo add for new scratch */
- temp = xgifb_reg_get(pVBInfo->P3d4,
- 0x35);
- temp &= YPbPrMode;
- tempbx |= SetCRT2ToHiVision;
+ if (pVBInfo->VBType & (VB_SIS301LV|VB_SIS302LV|VB_XGI301C)) {
+ if (temp & SetYPbPr) {
+ /* shampoo add for new scratch */
+ temp = xgifb_reg_get(pVBInfo->P3d4, 0x35);
+ temp &= YPbPrMode;
+ tempbx |= SetCRT2ToHiVision;
- if (temp != YPbPrMode1080i) {
- tempbx &= (~SetCRT2ToHiVision);
- tempbx |= SetCRT2ToYPbPr525750;
- }
- }
+ if (temp != YPbPrMode1080i) {
+ tempbx &= (~SetCRT2ToHiVision);
+ tempbx |= SetCRT2ToYPbPr525750;
}
}
}
tempax = push; /* restore CR31 */
- if (pVBInfo->IF_DEF_YPbPr == 1) {
- if (pVBInfo->IF_DEF_HiVision == 1)
- temp = 0x09FC;
- else
- temp = 0x097C;
- } else if (pVBInfo->IF_DEF_HiVision == 1) {
- temp = 0x01FC;
- } else {
- temp = 0x017C;
- }
+ temp = 0x09FC;
if (!(tempbx & temp)) {
tempax |= DisableCRT2Display;
@@ -2046,15 +1997,10 @@ static void XGI_GetVBInfo(unsigned short ModeNo, unsigned short ModeIdIndex,
/* shampoo add */
/* for driver abnormal */
if (!(tempbx & (SwitchCRT2 | SetSimuScanMode))) {
- if (pVBInfo->IF_DEF_CRT2Monitor == 1) {
- if (tempbx & SetCRT2ToRAMDAC) {
- tempbx &= (0xFF00 | SetCRT2ToRAMDAC |
- SwitchCRT2 | SetSimuScanMode);
- tempbx &= (0x00FF | (~SetCRT2ToYPbPr525750));
- }
- } else {
- tempbx &= (~(SetCRT2ToRAMDAC | SetCRT2ToLCD |
- SetCRT2ToTV));
+ if (tempbx & SetCRT2ToRAMDAC) {
+ tempbx &= (0xFF00 | SetCRT2ToRAMDAC |
+ SwitchCRT2 | SetSimuScanMode);
+ tempbx &= (0x00FF | (~SetCRT2ToYPbPr525750));
}
}
@@ -2072,16 +2018,12 @@ static void XGI_GetVBInfo(unsigned short ModeNo, unsigned short ModeIdIndex,
tempbx &= (0x00FF | (~SetCRT2ToYPbPr525750));
}
- if (pVBInfo->IF_DEF_YPbPr == 1) {
- if (tempbx & SetCRT2ToYPbPr525750)
- tempbx &= (0xFF00 | SwitchCRT2 | SetSimuScanMode);
- }
+ if (tempbx & SetCRT2ToYPbPr525750)
+ tempbx &= (0xFF00 | SwitchCRT2 | SetSimuScanMode);
- if (pVBInfo->IF_DEF_HiVision == 1) {
- if (tempbx & SetCRT2ToHiVision)
- tempbx &= (0xFF00 | SetCRT2ToHiVision | SwitchCRT2 |
- SetSimuScanMode);
- }
+ if (tempbx & SetCRT2ToHiVision)
+ tempbx &= (0xFF00 | SetCRT2ToHiVision | SwitchCRT2 |
+ SetSimuScanMode);
if (tempax & DisableCRT2Display) { /* Set Display Device Info */
if (!(tempbx & (SwitchCRT2 | SetSimuScanMode)))
@@ -2132,25 +2074,21 @@ static void XGI_GetTVInfo(unsigned short ModeNo, unsigned short ModeIdIndex,
if (pVBInfo->VBInfo & SetCRT2ToSCART)
tempbx |= TVSetPAL;
- if (pVBInfo->IF_DEF_YPbPr == 1) {
- if (pVBInfo->VBInfo & SetCRT2ToYPbPr525750) {
- index1 = xgifb_reg_get(pVBInfo->P3d4, 0x35);
- index1 &= YPbPrMode;
+ if (pVBInfo->VBInfo & SetCRT2ToYPbPr525750) {
+ index1 = xgifb_reg_get(pVBInfo->P3d4, 0x35);
+ index1 &= YPbPrMode;
- if (index1 == YPbPrMode525i)
- tempbx |= TVSetYPbPr525i;
+ if (index1 == YPbPrMode525i)
+ tempbx |= TVSetYPbPr525i;
- if (index1 == YPbPrMode525p)
- tempbx = tempbx | TVSetYPbPr525p;
- if (index1 == YPbPrMode750p)
- tempbx = tempbx | TVSetYPbPr750p;
- }
+ if (index1 == YPbPrMode525p)
+ tempbx = tempbx | TVSetYPbPr525p;
+ if (index1 == YPbPrMode750p)
+ tempbx = tempbx | TVSetYPbPr750p;
}
- if (pVBInfo->IF_DEF_HiVision == 1) {
- if (pVBInfo->VBInfo & SetCRT2ToHiVision)
- tempbx = tempbx | TVSetHiVision | TVSetPAL;
- }
+ if (pVBInfo->VBInfo & SetCRT2ToHiVision)
+ tempbx = tempbx | TVSetHiVision | TVSetPAL;
if ((pVBInfo->VBInfo & SetInSlaveMode) &&
(!(pVBInfo->VBInfo & SetNotSimuMode)))
@@ -2657,10 +2595,7 @@ static void XGI_GetCRT2Data(unsigned short ModeNo, unsigned short ModeIdIndex,
tempbx = 775;
else if (pVBInfo->VGAVDE == 600)
tempbx = 775;
- else
- tempbx = 768;
- } else
- tempbx = 768;
+ }
} else if (pVBInfo->LCDResInfo == Panel_1024x768x75) {
tempax = 1024;
tempbx = 768;
@@ -2784,7 +2719,6 @@ static void XGI_GetCRT2Data(unsigned short ModeNo, unsigned short ModeIdIndex,
pVBInfo->HT = tempax;
pVBInfo->VT = tempbx;
- return;
}
}
@@ -3015,9 +2949,6 @@ static void XGI_SetGroup1(unsigned short ModeNo, unsigned short ModeIdIndex,
temp |= ((tempcx & 0xFF00) >> 8);
xgifb_reg_set(pVBInfo->Part1Port, 0x12, temp);
- tempax = pVBInfo->VGAVDE;
- tempbx = pVBInfo->VGAVDE;
- tempcx = pVBInfo->VGAVT;
/* BTVGA2VRS 0x10,0x11 */
tempbx = (pVBInfo->VGAVT + pVBInfo->VGAVDE) >> 1;
/* BTVGA2VRE 0x11 */
@@ -3178,7 +3109,7 @@ static void XGI_SetLockRegs(unsigned short ModeNo, unsigned short ModeIdIndex,
if (pVBInfo->VBInfo & SetCRT2ToTV) {
if (pVBInfo->TVInfo & TVSimuMode) {
if (ModeNo == 0x50) {
- if (pVBInfo->TVInfo & SetNTSCTV) {
+ if (pVBInfo->TVInfo == SetNTSCTV) {
xgifb_reg_set(pVBInfo->Part1Port,
0x07, 0x30);
xgifb_reg_set(pVBInfo->Part1Port,
@@ -3226,7 +3157,6 @@ static void XGI_SetLockRegs(unsigned short ModeNo, unsigned short ModeIdIndex,
}
}
tempbx--;
- temp = tempbx & 0x00FF;
tempbx--;
temp = tempbx & 0x00FF;
/* 0x10 vertical Blank Start */
@@ -3361,8 +3291,6 @@ static void XGI_SetLockRegs(unsigned short ModeNo, unsigned short ModeIdIndex,
temp = 0x00;
xgifb_reg_set(pVBInfo->Part1Port, 0x1A, temp); /* 0x1A SR0E */
-
- return;
}
static void XGI_SetGroup2(unsigned short ModeNo, unsigned short ModeIdIndex,
@@ -3445,9 +3373,6 @@ static void XGI_SetGroup2(unsigned short ModeNo, unsigned short ModeIdIndex,
temp &= 0x80;
xgifb_reg_and_or(pVBInfo->Part2Port, 0x0A, 0xFF, temp);
- if (pVBInfo->VBInfo & SetCRT2ToHiVision)
- tempax = 950;
-
if (pVBInfo->TVInfo & TVSetPAL)
tempax = 520;
else
@@ -3797,9 +3722,6 @@ static void XGI_SetGroup2(unsigned short ModeNo, unsigned short ModeIdIndex,
if (!(pVBInfo->VBInfo & SetInSlaveMode))
xgifb_reg_set(pVBInfo->Part2Port, 0x0B, 0x00);
}
-
- if (pVBInfo->VBInfo & SetCRT2ToTV)
- return;
}
static void XGI_SetLCDRegs(unsigned short ModeNo, unsigned short ModeIdIndex,
@@ -4135,8 +4057,7 @@ static void XGI_SetGroup3(unsigned short ModeNo, unsigned short ModeIdIndex,
xgifb_reg_set(pVBInfo->Part3Port, 0x28, 0x3f);
}
}
- return;
-} /* {end of XGI_SetGroup3} */
+}
static void XGI_SetGroup4(unsigned short ModeNo, unsigned short ModeIdIndex,
unsigned short RefreshRateTableIndex,
@@ -4211,11 +4132,6 @@ static void XGI_SetGroup4(unsigned short ModeNo, unsigned short ModeIdIndex,
tempebx = pVBInfo->VDE;
- if (tempcx & SetCRT2ToHiVision) {
- if (!(temp & 0xE000))
- tempbx = tempbx >> 1;
- }
-
tempcx = pVBInfo->RVBHRS;
temp = tempcx & 0x00FF;
xgifb_reg_set(pVBInfo->Part4Port, 0x18, temp);
@@ -4325,13 +4241,6 @@ static void XGI_SetGroup5(unsigned short ModeNo, unsigned short ModeIdIndex,
XGINew_EnableCRT2(pVBInfo);
}
}
- return;
-}
-
-static void XGI_EnableGatingCRT(struct xgi_hw_device_info *HwDeviceExtension,
- struct vb_device_info *pVBInfo)
-{
- xgifb_reg_and_or(pVBInfo->P3d4, 0x63, 0xBF, 0x40);
}
static void XGI_DisableGatingCRT(struct xgi_hw_device_info *HwDeviceExtension,
@@ -4592,38 +4501,6 @@ static unsigned char XGI_IsLCDON(struct vb_device_info *pVBInfo)
return 0;
}
-/* --------------------------------------------------------------------- */
-/* Function : XGI_EnableChISLCD */
-/* Input : */
-/* Output : 0 -> Not LCD mode */
-/* Description : if bool enable = true -> enable, else disable */
-/* --------------------------------------------------------------------- */
-static unsigned char XGI_EnableChISLCD(struct vb_device_info *pVBInfo,
- bool enable)
-{
- unsigned short tempbx, tempah;
-
- if (enable)
- tempbx = pVBInfo->SetFlag & (EnableChA | EnableChB);
- else
- tempbx = pVBInfo->SetFlag & (DisableChA | DisableChB);
-
- tempah = ~((unsigned short) xgifb_reg_get(pVBInfo->Part1Port, 0x2E));
-
- if (tempbx & (EnableChA | DisableChA)) {
- if (!(tempah & 0x08)) /* Chk LCDA Mode */
- return 0;
- }
-
- if (!(tempbx & (EnableChB | DisableChB)))
- return 0;
-
- if (tempah & 0x01) /* Chk LCDB Mode */
- return 1;
-
- return 0;
-}
-
static void XGI_DisableBridge(struct xgifb_video_info *xgifb_info,
struct xgi_hw_device_info *HwDeviceExtension,
struct vb_device_info *pVBInfo)
@@ -4636,21 +4513,8 @@ static void XGI_DisableBridge(struct xgifb_video_info *xgifb_info,
if (!(pVBInfo->VBInfo &
(DisableCRT2Display | SetSimuScanMode))) {
if (pVBInfo->VBInfo & XGI_SetCRT2ToLCDA) {
- if (pVBInfo->VBInfo & SetCRT2ToDualEdge) {
+ if (pVBInfo->VBInfo & SetCRT2ToDualEdge)
tempah = 0x7F; /* Disable Channel A */
- if (!(pVBInfo->VBInfo &
- XGI_SetCRT2ToLCDA))
- /* Disable Channel B */
- tempah = 0xBF;
-
- if (pVBInfo->SetFlag & DisableChB)
- /* force to disable Cahnnel */
- tempah &= 0xBF;
-
- if (pVBInfo->SetFlag & DisableChA)
- /* Force to disable Channel B */
- tempah &= 0x7F;
- }
}
}
@@ -4660,26 +4524,18 @@ static void XGI_DisableBridge(struct xgifb_video_info *xgifb_info,
if (pVBInfo->VBType & (VB_SIS302LV | VB_XGI301C)) {
if (((pVBInfo->VBInfo &
(SetCRT2ToLCD | XGI_SetCRT2ToLCDA))) ||
- (XGI_EnableChISLCD(pVBInfo, false)) ||
(XGI_IsLCDON(pVBInfo)))
/* LVDS Driver power down */
xgifb_reg_or(pVBInfo->Part4Port, 0x30, 0x80);
}
- if ((pVBInfo->SetFlag & DisableChA) || (pVBInfo->VBInfo
- & (DisableCRT2Display | XGI_SetCRT2ToLCDA
- | SetSimuScanMode))) {
- if (pVBInfo->SetFlag & GatingCRT)
- XGI_EnableGatingCRT(HwDeviceExtension, pVBInfo);
+ if (pVBInfo->VBInfo & (DisableCRT2Display | XGI_SetCRT2ToLCDA |
+ SetSimuScanMode))
XGI_DisplayOff(xgifb_info, HwDeviceExtension, pVBInfo);
- }
- if (pVBInfo->VBInfo & XGI_SetCRT2ToLCDA) {
- if ((pVBInfo->SetFlag & DisableChA) || (pVBInfo->VBInfo
- & XGI_SetCRT2ToLCDA))
- /* Power down */
- xgifb_reg_and(pVBInfo->Part1Port, 0x1e, 0xdf);
- }
+ if (pVBInfo->VBInfo & XGI_SetCRT2ToLCDA)
+ /* Power down */
+ xgifb_reg_and(pVBInfo->Part1Port, 0x1e, 0xdf);
/* disable TV as primary VGA swap */
xgifb_reg_and(pVBInfo->P3c4, 0x32, 0xdf);
@@ -4687,16 +4543,14 @@ static void XGI_DisableBridge(struct xgifb_video_info *xgifb_info,
if ((pVBInfo->VBInfo & (SetSimuScanMode | SetCRT2ToDualEdge)))
xgifb_reg_and(pVBInfo->Part2Port, 0x00, 0xdf);
- if ((pVBInfo->SetFlag & DisableChB) ||
- (pVBInfo->VBInfo &
+ if ((pVBInfo->VBInfo &
(DisableCRT2Display | SetSimuScanMode)) ||
((!(pVBInfo->VBInfo & XGI_SetCRT2ToLCDA)) &&
(pVBInfo->VBInfo &
(SetCRT2ToRAMDAC | SetCRT2ToLCD | SetCRT2ToTV))))
xgifb_reg_or(pVBInfo->Part1Port, 0x00, 0x80);
- if ((pVBInfo->SetFlag & DisableChB) ||
- (pVBInfo->VBInfo &
+ if ((pVBInfo->VBInfo &
(DisableCRT2Display | SetSimuScanMode)) ||
(!(pVBInfo->VBInfo & XGI_SetCRT2ToLCDA)) ||
(pVBInfo->VBInfo &
@@ -5308,21 +5162,6 @@ void XGI_LockCRT2(struct xgi_hw_device_info *HwDeviceExtension,
}
-unsigned char XGI_BridgeIsOn(struct vb_device_info *pVBInfo)
-{
- unsigned short flag;
-
- if (pVBInfo->IF_DEF_LVDS == 1) {
- return 1;
- } else {
- flag = xgifb_reg_get(pVBInfo->Part4Port, 0x00);
- if ((flag == 1) || (flag == 2))
- return 1; /* 301b */
- else
- return 0;
- }
-}
-
unsigned short XGI_GetRatePtrCRT2(struct xgi_hw_device_info *pXGIHWDE,
unsigned short ModeNo, unsigned short ModeIdIndex,
struct vb_device_info *pVBInfo)
@@ -5344,15 +5183,10 @@ unsigned short XGI_GetRatePtrCRT2(struct xgi_hw_device_info *pXGIHWDE,
if (pVBInfo->SetFlag & ProgrammingCRT2) {
if (pVBInfo->VBInfo & (SetCRT2ToLCD | XGI_SetCRT2ToLCDA)) {
- if (pVBInfo->IF_DEF_LVDS == 0) {
- temp = LCDARefreshIndex[
- pVBInfo->LCDResInfo & 0x07];
+ temp = LCDARefreshIndex[pVBInfo->LCDResInfo & 0x07];
- if (index > temp)
- index = temp;
- } else {
- index = 0;
- }
+ if (index > temp)
+ index = temp;
}
}
@@ -5555,53 +5389,37 @@ static void XGI_EnableBridge(struct xgifb_video_info *xgifb_info,
if (pVBInfo->VBType & (VB_SIS301B | VB_SIS302B | VB_SIS301LV
| VB_SIS302LV | VB_XGI301C)) {
- if (!(pVBInfo->SetFlag & DisableChA)) {
- if ((pVBInfo->SetFlag & EnableChA) ||
- (pVBInfo->VBInfo & SetCRT2ToDualEdge)) {
- /* Power on */
- xgifb_reg_set(pVBInfo->Part1Port, 0x1E, 0x20);
+ if (pVBInfo->VBInfo & SetCRT2ToDualEdge)
+ /* Power on */
+ xgifb_reg_set(pVBInfo->Part1Port, 0x1E, 0x20);
+
+ if (pVBInfo->VBInfo & (SetCRT2ToLCD | SetCRT2ToTV |
+ SetCRT2ToRAMDAC)) {
+ tempah = xgifb_reg_get(pVBInfo->P3c4, 0x32);
+ tempah &= 0xDF;
+ if (pVBInfo->VBInfo & SetInSlaveMode) {
+ if (!(pVBInfo->VBInfo & SetCRT2ToRAMDAC))
+ tempah |= 0x20;
}
- }
+ xgifb_reg_set(pVBInfo->P3c4, 0x32, tempah);
+ xgifb_reg_or(pVBInfo->P3c4, 0x1E, 0x20);
- if (!(pVBInfo->SetFlag & DisableChB)) {
- if ((pVBInfo->SetFlag & EnableChB) || (pVBInfo->VBInfo
- & (SetCRT2ToLCD | SetCRT2ToTV
- | SetCRT2ToRAMDAC))) {
- tempah = xgifb_reg_get(pVBInfo->P3c4, 0x32);
- tempah &= 0xDF;
- if (pVBInfo->VBInfo & SetInSlaveMode) {
- if (!(pVBInfo->VBInfo &
- SetCRT2ToRAMDAC))
- tempah |= 0x20;
- }
- xgifb_reg_set(pVBInfo->P3c4, 0x32, tempah);
- xgifb_reg_or(pVBInfo->P3c4, 0x1E, 0x20);
+ tempah = xgifb_reg_get(pVBInfo->Part1Port, 0x2E);
- tempah = xgifb_reg_get(pVBInfo->Part1Port,
- 0x2E);
-
- if (!(tempah & 0x80))
- xgifb_reg_or(pVBInfo->Part1Port,
- 0x2E, 0x80);
- xgifb_reg_and(pVBInfo->Part1Port, 0x00, 0x7F);
- }
+ if (!(tempah & 0x80))
+ xgifb_reg_or(pVBInfo->Part1Port, 0x2E, 0x80);
+ xgifb_reg_and(pVBInfo->Part1Port, 0x00, 0x7F);
}
- if ((pVBInfo->SetFlag & (EnableChA | EnableChB))
- || (!(pVBInfo->VBInfo & DisableCRT2Display))) {
+ if (!(pVBInfo->VBInfo & DisableCRT2Display)) {
xgifb_reg_and_or(pVBInfo->Part2Port, 0x00, ~0xE0,
0x20); /* shampoo 0129 */
if (pVBInfo->VBType & (VB_SIS302LV | VB_XGI301C)) {
- if (!XGI_EnableChISLCD(pVBInfo, false)) {
- if (XGI_EnableChISLCD(pVBInfo, true) ||
- (pVBInfo->VBInfo &
- (SetCRT2ToLCD | XGI_SetCRT2ToLCDA)))
- /* LVDS PLL power on */
- xgifb_reg_and(
- pVBInfo->Part4Port,
- 0x2A,
- 0x7F);
- }
+ if (pVBInfo->VBInfo &
+ (SetCRT2ToLCD | XGI_SetCRT2ToLCDA))
+ /* LVDS PLL power on */
+ xgifb_reg_and(pVBInfo->Part4Port, 0x2A,
+ 0x7F);
/* LVDS Driver power on */
xgifb_reg_and(pVBInfo->Part4Port, 0x30, 0x7F);
}
@@ -5618,32 +5436,14 @@ static void XGI_EnableBridge(struct xgifb_video_info *xgifb_info,
tempah = tempah & 0x40;
if (pVBInfo->VBInfo & XGI_SetCRT2ToLCDA)
tempah = tempah ^ 0xC0;
-
- if (pVBInfo->SetFlag & DisableChB)
- tempah &= 0xBF;
-
- if (pVBInfo->SetFlag & DisableChA)
- tempah &= 0x7F;
-
- if (pVBInfo->SetFlag & EnableChB)
- tempah |= 0x40;
-
- if (pVBInfo->SetFlag & EnableChA)
- tempah |= 0x80;
}
}
/* EnablePart4_1F */
xgifb_reg_or(pVBInfo->Part4Port, 0x1F, tempah);
- if (!(pVBInfo->SetFlag & DisableChA)) {
- if (!(pVBInfo->SetFlag & GatingCRT)) {
- XGI_DisableGatingCRT(HwDeviceExtension,
- pVBInfo);
- XGI_DisplayOn(xgifb_info, HwDeviceExtension,
- pVBInfo);
- }
- }
+ XGI_DisableGatingCRT(HwDeviceExtension, pVBInfo);
+ XGI_DisplayOn(xgifb_info, HwDeviceExtension, pVBInfo);
} /* 301 */
else { /* LVDS */
if (pVBInfo->VBInfo & (SetCRT2ToTV | SetCRT2ToLCD
@@ -5745,16 +5545,8 @@ unsigned char XGISetModeNew(struct xgifb_video_info *xgifb_info,
struct vb_device_info *pVBInfo = &VBINF;
pVBInfo->IF_DEF_LVDS = 0;
- if (HwDeviceExtension->jChipType >= XG20) {
- pVBInfo->IF_DEF_YPbPr = 0;
- pVBInfo->IF_DEF_HiVision = 0;
- pVBInfo->IF_DEF_CRT2Monitor = 0;
+ if (HwDeviceExtension->jChipType >= XG20)
pVBInfo->VBType = 0; /*set VBType default 0*/
- } else {
- pVBInfo->IF_DEF_YPbPr = 1;
- pVBInfo->IF_DEF_HiVision = 1;
- pVBInfo->IF_DEF_CRT2Monitor = 1;
- }
XGIRegInit(pVBInfo, xgifb_info->vga_base);
@@ -5770,9 +5562,6 @@ unsigned char XGISetModeNew(struct xgifb_video_info *xgifb_info,
}
}
- if (HwDeviceExtension->jChipType < XG20)
- XGI_GetVBType(pVBInfo);
-
InitTo330Pointer(HwDeviceExtension->jChipType, pVBInfo);
if (ModeNo & 0x80)
ModeNo = ModeNo & 0x7F;
diff --git a/drivers/staging/xgifb/vb_setmode.h b/drivers/staging/xgifb/vb_setmode.h
index 552482858c1c..2c0a31c8dfd5 100644
--- a/drivers/staging/xgifb/vb_setmode.h
+++ b/drivers/staging/xgifb/vb_setmode.h
@@ -18,7 +18,6 @@ extern unsigned char XGISetModeNew(struct xgifb_video_info *xgifb_info,
extern unsigned char XGI_SearchModeID(unsigned short ModeNo,
unsigned short *ModeIdIndex,
struct vb_device_info *);
-extern unsigned char XGI_BridgeIsOn(struct vb_device_info *);
extern unsigned short XGI_GetRatePtrCRT2(struct xgi_hw_device_info *pXGIHWDE,
unsigned short ModeNo,
unsigned short ModeIdIndex,
diff --git a/drivers/staging/zram/Makefile b/drivers/staging/zram/Makefile
index 7f4a3019e9c4..cb0f9ced6a93 100644
--- a/drivers/staging/zram/Makefile
+++ b/drivers/staging/zram/Makefile
@@ -1,3 +1,3 @@
-zram-y := zram_drv.o zram_sysfs.o
+zram-y := zram_drv.o
obj-$(CONFIG_ZRAM) += zram.o
diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index e34e3fe0ae2e..82c7202fd5cc 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -37,28 +37,107 @@
/* Globals */
static int zram_major;
-struct zram *zram_devices;
+static struct zram *zram_devices;
/* Module params (documentation at end) */
static unsigned int num_devices = 1;
-static void zram_stat64_add(struct zram *zram, u64 *v, u64 inc)
+static inline struct zram *dev_to_zram(struct device *dev)
{
- spin_lock(&zram->stat64_lock);
- *v = *v + inc;
- spin_unlock(&zram->stat64_lock);
+ return (struct zram *)dev_to_disk(dev)->private_data;
}
-static void zram_stat64_sub(struct zram *zram, u64 *v, u64 dec)
+static ssize_t disksize_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- spin_lock(&zram->stat64_lock);
- *v = *v - dec;
- spin_unlock(&zram->stat64_lock);
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n", zram->disksize);
+}
+
+static ssize_t initstate_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%u\n", zram->init_done);
+}
+
+static ssize_t num_reads_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.num_reads));
+}
+
+static ssize_t num_writes_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.num_writes));
+}
+
+static ssize_t invalid_io_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.invalid_io));
+}
+
+static ssize_t notify_free_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.notify_free));
}
-static void zram_stat64_inc(struct zram *zram, u64 *v)
+static ssize_t zero_pages_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- zram_stat64_add(zram, v, 1);
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%u\n", zram->stats.pages_zero);
+}
+
+static ssize_t orig_data_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)(zram->stats.pages_stored) << PAGE_SHIFT);
+}
+
+static ssize_t compr_data_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return sprintf(buf, "%llu\n",
+ (u64)atomic64_read(&zram->stats.compr_size));
+}
+
+static ssize_t mem_used_total_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ u64 val = 0;
+ struct zram *zram = dev_to_zram(dev);
+ struct zram_meta *meta = zram->meta;
+
+ down_read(&zram->init_lock);
+ if (zram->init_done)
+ val = zs_get_total_size_bytes(meta->mem_pool);
+ up_read(&zram->init_lock);
+
+ return sprintf(buf, "%llu\n", val);
}
static int zram_test_flag(struct zram_meta *meta, u32 index,
@@ -79,6 +158,97 @@ static void zram_clear_flag(struct zram_meta *meta, u32 index,
meta->table[index].flags &= ~BIT(flag);
}
+static inline int is_partial_io(struct bio_vec *bvec)
+{
+ return bvec->bv_len != PAGE_SIZE;
+}
+
+/*
+ * Check if request is within bounds and aligned on zram logical blocks.
+ */
+static inline int valid_io_request(struct zram *zram, struct bio *bio)
+{
+ u64 start, end, bound;
+
+ /* unaligned request */
+ if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
+ return 0;
+ if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
+ return 0;
+
+ start = bio->bi_sector;
+ end = start + (bio->bi_size >> SECTOR_SHIFT);
+ bound = zram->disksize >> SECTOR_SHIFT;
+ /* out of range range */
+ if (unlikely(start >= bound || end > bound || start > end))
+ return 0;
+
+ /* I/O request is valid */
+ return 1;
+}
+
+static void zram_meta_free(struct zram_meta *meta)
+{
+ zs_destroy_pool(meta->mem_pool);
+ kfree(meta->compress_workmem);
+ free_pages((unsigned long)meta->compress_buffer, 1);
+ vfree(meta->table);
+ kfree(meta);
+}
+
+static struct zram_meta *zram_meta_alloc(u64 disksize)
+{
+ size_t num_pages;
+ struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
+ if (!meta)
+ goto out;
+
+ meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
+ if (!meta->compress_workmem)
+ goto free_meta;
+
+ meta->compress_buffer =
+ (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
+ if (!meta->compress_buffer) {
+ pr_err("Error allocating compressor buffer space\n");
+ goto free_workmem;
+ }
+
+ num_pages = disksize >> PAGE_SHIFT;
+ meta->table = vzalloc(num_pages * sizeof(*meta->table));
+ if (!meta->table) {
+ pr_err("Error allocating zram address table\n");
+ goto free_buffer;
+ }
+
+ meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
+ if (!meta->mem_pool) {
+ pr_err("Error creating memory pool\n");
+ goto free_table;
+ }
+
+ return meta;
+
+free_table:
+ vfree(meta->table);
+free_buffer:
+ free_pages((unsigned long)meta->compress_buffer, 1);
+free_workmem:
+ kfree(meta->compress_workmem);
+free_meta:
+ kfree(meta);
+ meta = NULL;
+out:
+ return meta;
+}
+
+static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
+{
+ if (*offset + bvec->bv_len >= PAGE_SIZE)
+ (*index)++;
+ *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
+}
+
static int page_zero_filled(void *ptr)
{
unsigned int pos;
@@ -94,6 +264,21 @@ static int page_zero_filled(void *ptr)
return 1;
}
+static void handle_zero_page(struct bio_vec *bvec)
+{
+ struct page *page = bvec->bv_page;
+ void *user_mem;
+
+ user_mem = kmap_atomic(page);
+ if (is_partial_io(bvec))
+ memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
+ else
+ clear_page(user_mem);
+ kunmap_atomic(user_mem);
+
+ flush_dcache_page(page);
+}
+
static void zram_free_page(struct zram *zram, size_t index)
{
struct zram_meta *meta = zram->meta;
@@ -120,31 +305,13 @@ static void zram_free_page(struct zram *zram, size_t index)
if (size <= PAGE_SIZE / 2)
zram->stats.good_compress--;
- zram_stat64_sub(zram, &zram->stats.compr_size,
- meta->table[index].size);
+ atomic64_sub(meta->table[index].size, &zram->stats.compr_size);
zram->stats.pages_stored--;
meta->table[index].handle = 0;
meta->table[index].size = 0;
}
-static void handle_zero_page(struct bio_vec *bvec)
-{
- struct page *page = bvec->bv_page;
- void *user_mem;
-
- user_mem = kmap_atomic(page);
- memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
- kunmap_atomic(user_mem);
-
- flush_dcache_page(page);
-}
-
-static inline int is_partial_io(struct bio_vec *bvec)
-{
- return bvec->bv_len != PAGE_SIZE;
-}
-
static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
{
int ret = LZO_E_OK;
@@ -154,13 +321,13 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
unsigned long handle = meta->table[index].handle;
if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
- memset(mem, 0, PAGE_SIZE);
+ clear_page(mem);
return 0;
}
cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
if (meta->table[index].size == PAGE_SIZE)
- memcpy(mem, cmem, PAGE_SIZE);
+ copy_page(mem, cmem);
else
ret = lzo1x_decompress_safe(cmem, meta->table[index].size,
mem, &clen);
@@ -169,7 +336,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
/* Should NEVER happen. Return bio error if it does. */
if (unlikely(ret != LZO_E_OK)) {
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
- zram_stat64_inc(zram, &zram->stats.failed_reads);
+ atomic64_inc(&zram->stats.failed_reads);
return ret;
}
@@ -272,8 +439,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
if (page_zero_filled(uncmem)) {
kunmap_atomic(user_mem);
- if (is_partial_io(bvec))
- kfree(uncmem);
zram->stats.pages_zero++;
zram_set_flag(meta, index, ZRAM_ZERO);
ret = 0;
@@ -304,18 +469,20 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
handle = zs_malloc(meta->mem_pool, clen);
if (!handle) {
- pr_info("Error allocating memory for compressed "
- "page: %u, size=%zu\n", index, clen);
+ pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
+ index, clen);
ret = -ENOMEM;
goto out;
}
cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
- if ((clen == PAGE_SIZE) && !is_partial_io(bvec))
+ if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
src = kmap_atomic(page);
- memcpy(cmem, src, clen);
- if ((clen == PAGE_SIZE) && !is_partial_io(bvec))
+ copy_page(cmem, src);
kunmap_atomic(src);
+ } else {
+ memcpy(cmem, src, clen);
+ }
zs_unmap_object(meta->mem_pool, handle);
@@ -323,7 +490,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
meta->table[index].size = clen;
/* Update stats */
- zram_stat64_add(zram, &zram->stats.compr_size, clen);
+ atomic64_add(clen, &zram->stats.compr_size);
zram->stats.pages_stored++;
if (clen <= PAGE_SIZE / 2)
zram->stats.good_compress++;
@@ -333,7 +500,7 @@ out:
kfree(uncmem);
if (ret)
- zram_stat64_inc(zram, &zram->stats.failed_writes);
+ atomic64_inc(&zram->stats.failed_writes);
return ret;
}
@@ -355,11 +522,117 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
return ret;
}
-static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
+static void zram_reset_device(struct zram *zram)
{
- if (*offset + bvec->bv_len >= PAGE_SIZE)
- (*index)++;
- *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
+ size_t index;
+ struct zram_meta *meta;
+
+ if (!zram->init_done)
+ return;
+
+ meta = zram->meta;
+ zram->init_done = 0;
+
+ /* Free all pages that are still in this zram device */
+ for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
+ unsigned long handle = meta->table[index].handle;
+ if (!handle)
+ continue;
+
+ zs_free(meta->mem_pool, handle);
+ }
+
+ zram_meta_free(zram->meta);
+ zram->meta = NULL;
+ /* Reset stats */
+ memset(&zram->stats, 0, sizeof(zram->stats));
+
+ zram->disksize = 0;
+ set_capacity(zram->disk, 0);
+}
+
+static void zram_init_device(struct zram *zram, struct zram_meta *meta)
+{
+ if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) {
+ pr_info(
+ "There is little point creating a zram of greater than "
+ "twice the size of memory since we expect a 2:1 compression "
+ "ratio. Note that zram uses about 0.1%% of the size of "
+ "the disk when not in use so a huge zram is "
+ "wasteful.\n"
+ "\tMemory Size: %lu kB\n"
+ "\tSize you selected: %llu kB\n"
+ "Continuing anyway ...\n",
+ (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10
+ );
+ }
+
+ /* zram devices sort of resembles non-rotational disks */
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
+
+ zram->meta = meta;
+ zram->init_done = 1;
+
+ pr_debug("Initialization done!\n");
+}
+
+static ssize_t disksize_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ u64 disksize;
+ struct zram_meta *meta;
+ struct zram *zram = dev_to_zram(dev);
+
+ disksize = memparse(buf, NULL);
+ if (!disksize)
+ return -EINVAL;
+
+ disksize = PAGE_ALIGN(disksize);
+ meta = zram_meta_alloc(disksize);
+ down_write(&zram->init_lock);
+ if (zram->init_done) {
+ up_write(&zram->init_lock);
+ zram_meta_free(meta);
+ pr_info("Cannot change disksize for initialized device\n");
+ return -EBUSY;
+ }
+
+ zram->disksize = disksize;
+ set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
+ zram_init_device(zram, meta);
+ up_write(&zram->init_lock);
+
+ return len;
+}
+
+static ssize_t reset_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ int ret;
+ unsigned short do_reset;
+ struct zram *zram;
+ struct block_device *bdev;
+
+ zram = dev_to_zram(dev);
+ bdev = bdget_disk(zram->disk, 0);
+
+ /* Do not reset an active device! */
+ if (bdev->bd_holders)
+ return -EBUSY;
+
+ ret = kstrtou16(buf, 10, &do_reset);
+ if (ret)
+ return ret;
+
+ if (!do_reset)
+ return -EINVAL;
+
+ /* Make sure all pending I/O is finished */
+ if (bdev)
+ fsync_bdev(bdev);
+
+ zram_reset_device(zram);
+ return len;
}
static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
@@ -370,10 +643,10 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
switch (rw) {
case READ:
- zram_stat64_inc(zram, &zram->stats.num_reads);
+ atomic64_inc(&zram->stats.num_reads);
break;
case WRITE:
- zram_stat64_inc(zram, &zram->stats.num_writes);
+ atomic64_inc(&zram->stats.num_writes);
break;
}
@@ -418,23 +691,6 @@ out:
}
/*
- * Check if request is within bounds and aligned on zram logical blocks.
- */
-static inline int valid_io_request(struct zram *zram, struct bio *bio)
-{
- if (unlikely(
- (bio->bi_sector >= (zram->disksize >> SECTOR_SHIFT)) ||
- (bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)) ||
- (bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))) {
-
- return 0;
- }
-
- /* I/O request is valid */
- return 1;
-}
-
-/*
* Handler function for all zram I/O requests.
*/
static void zram_make_request(struct request_queue *queue, struct bio *bio)
@@ -446,7 +702,7 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
goto error;
if (!valid_io_request(zram, bio)) {
- zram_stat64_inc(zram, &zram->stats.invalid_io);
+ atomic64_inc(&zram->stats.invalid_io);
goto error;
}
@@ -460,130 +716,16 @@ error:
bio_io_error(bio);
}
-static void __zram_reset_device(struct zram *zram)
-{
- size_t index;
- struct zram_meta *meta;
-
- if (!zram->init_done)
- return;
-
- meta = zram->meta;
- zram->init_done = 0;
-
- /* Free all pages that are still in this zram device */
- for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
- unsigned long handle = meta->table[index].handle;
- if (!handle)
- continue;
-
- zs_free(meta->mem_pool, handle);
- }
-
- zram_meta_free(zram->meta);
- zram->meta = NULL;
- /* Reset stats */
- memset(&zram->stats, 0, sizeof(zram->stats));
-
- zram->disksize = 0;
- set_capacity(zram->disk, 0);
-}
-
-void zram_reset_device(struct zram *zram)
-{
- down_write(&zram->init_lock);
- __zram_reset_device(zram);
- up_write(&zram->init_lock);
-}
-
-void zram_meta_free(struct zram_meta *meta)
-{
- zs_destroy_pool(meta->mem_pool);
- kfree(meta->compress_workmem);
- free_pages((unsigned long)meta->compress_buffer, 1);
- vfree(meta->table);
- kfree(meta);
-}
-
-struct zram_meta *zram_meta_alloc(u64 disksize)
-{
- size_t num_pages;
- struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
- if (!meta)
- goto out;
-
- meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
- if (!meta->compress_workmem)
- goto free_meta;
-
- meta->compress_buffer =
- (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
- if (!meta->compress_buffer) {
- pr_err("Error allocating compressor buffer space\n");
- goto free_workmem;
- }
-
- num_pages = disksize >> PAGE_SHIFT;
- meta->table = vzalloc(num_pages * sizeof(*meta->table));
- if (!meta->table) {
- pr_err("Error allocating zram address table\n");
- goto free_buffer;
- }
-
- meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
- if (!meta->mem_pool) {
- pr_err("Error creating memory pool\n");
- goto free_table;
- }
-
- return meta;
-
-free_table:
- vfree(meta->table);
-free_buffer:
- free_pages((unsigned long)meta->compress_buffer, 1);
-free_workmem:
- kfree(meta->compress_workmem);
-free_meta:
- kfree(meta);
- meta = NULL;
-out:
- return meta;
-}
-
-void zram_init_device(struct zram *zram, struct zram_meta *meta)
-{
- if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) {
- pr_info(
- "There is little point creating a zram of greater than "
- "twice the size of memory since we expect a 2:1 compression "
- "ratio. Note that zram uses about 0.1%% of the size of "
- "the disk when not in use so a huge zram is "
- "wasteful.\n"
- "\tMemory Size: %lu kB\n"
- "\tSize you selected: %llu kB\n"
- "Continuing anyway ...\n",
- (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10
- );
- }
-
- /* zram devices sort of resembles non-rotational disks */
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
-
- zram->meta = meta;
- zram->init_done = 1;
-
- pr_debug("Initialization done!\n");
-}
-
static void zram_slot_free_notify(struct block_device *bdev,
unsigned long index)
{
struct zram *zram;
zram = bdev->bd_disk->private_data;
+ down_write(&zram->lock);
zram_free_page(zram, index);
- zram_stat64_inc(zram, &zram->stats.notify_free);
+ up_write(&zram->lock);
+ atomic64_inc(&zram->stats.notify_free);
}
static const struct block_device_operations zram_devops = {
@@ -591,19 +733,49 @@ static const struct block_device_operations zram_devops = {
.owner = THIS_MODULE
};
+static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
+ disksize_show, disksize_store);
+static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
+static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
+static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
+static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL);
+static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL);
+static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL);
+static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL);
+static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
+static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
+static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
+
+static struct attribute *zram_disk_attrs[] = {
+ &dev_attr_disksize.attr,
+ &dev_attr_initstate.attr,
+ &dev_attr_reset.attr,
+ &dev_attr_num_reads.attr,
+ &dev_attr_num_writes.attr,
+ &dev_attr_invalid_io.attr,
+ &dev_attr_notify_free.attr,
+ &dev_attr_zero_pages.attr,
+ &dev_attr_orig_data_size.attr,
+ &dev_attr_compr_data_size.attr,
+ &dev_attr_mem_used_total.attr,
+ NULL,
+};
+
+static struct attribute_group zram_disk_attr_group = {
+ .attrs = zram_disk_attrs,
+};
+
static int create_device(struct zram *zram, int device_id)
{
- int ret = 0;
+ int ret = -ENOMEM;
init_rwsem(&zram->lock);
init_rwsem(&zram->init_lock);
- spin_lock_init(&zram->stat64_lock);
zram->queue = blk_alloc_queue(GFP_KERNEL);
if (!zram->queue) {
pr_err("Error allocating disk queue for device %d\n",
device_id);
- ret = -ENOMEM;
goto out;
}
@@ -613,11 +785,9 @@ static int create_device(struct zram *zram, int device_id)
/* gendisk structure */
zram->disk = alloc_disk(1);
if (!zram->disk) {
- blk_cleanup_queue(zram->queue);
pr_warn("Error allocating disk structure for device %d\n",
device_id);
- ret = -ENOMEM;
- goto out;
+ goto out_free_queue;
}
zram->disk->major = zram_major;
@@ -646,11 +816,17 @@ static int create_device(struct zram *zram, int device_id)
&zram_disk_attr_group);
if (ret < 0) {
pr_warn("Error creating sysfs group");
- goto out;
+ goto out_free_disk;
}
zram->init_done = 0;
+ return 0;
+out_free_disk:
+ del_gendisk(zram->disk);
+ put_disk(zram->disk);
+out_free_queue:
+ blk_cleanup_queue(zram->queue);
out:
return ret;
}
@@ -669,11 +845,6 @@ static void destroy_device(struct zram *zram)
blk_cleanup_queue(zram->queue);
}
-unsigned int zram_get_num_devices(void)
-{
- return num_devices;
-}
-
static int __init zram_init(void)
{
int ret, dev_id;
@@ -727,8 +898,10 @@ static void __exit zram_exit(void)
for (i = 0; i < num_devices; i++) {
zram = &zram_devices[i];
+ get_disk(zram->disk);
destroy_device(zram);
zram_reset_device(zram);
+ put_disk(zram->disk);
}
unregister_blkdev(zram_major, "zram");
@@ -737,12 +910,12 @@ static void __exit zram_exit(void)
pr_debug("Cleanup done!\n");
}
-module_param(num_devices, uint, 0);
-MODULE_PARM_DESC(num_devices, "Number of zram devices");
-
module_init(zram_init);
module_exit(zram_exit);
+module_param(num_devices, uint, 0);
+MODULE_PARM_DESC(num_devices, "Number of zram devices");
+
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
MODULE_DESCRIPTION("Compressed RAM Block Device");
diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
index 2d1a3f1e8edb..9e57bfb29b4f 100644
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/staging/zram/zram_drv.h
@@ -69,14 +69,18 @@ struct table {
u8 flags;
} __aligned(4);
+/*
+ * All 64bit fields should only be manipulated by 64bit atomic accessors.
+ * All modifications to 32bit counter should be protected by zram->lock.
+ */
struct zram_stats {
- u64 compr_size; /* compressed size of pages stored */
- u64 num_reads; /* failed + successful */
- u64 num_writes; /* --do-- */
- u64 failed_reads; /* should NEVER! happen */
- u64 failed_writes; /* can happen when memory is too low */
- u64 invalid_io; /* non-page-aligned I/O requests */
- u64 notify_free; /* no. of swap slot free notifications */
+ atomic64_t compr_size; /* compressed size of pages stored */
+ atomic64_t num_reads; /* failed + successful */
+ atomic64_t num_writes; /* --do-- */
+ atomic64_t failed_reads; /* should NEVER! happen */
+ atomic64_t failed_writes; /* can happen when memory is too low */
+ atomic64_t invalid_io; /* non-page-aligned I/O requests */
+ atomic64_t notify_free; /* no. of swap slot free notifications */
u32 pages_zero; /* no. of zero filled pages */
u32 pages_stored; /* no. of pages currently stored */
u32 good_compress; /* % of pages with compression ratio<=50% */
@@ -92,9 +96,9 @@ struct zram_meta {
struct zram {
struct zram_meta *meta;
- spinlock_t stat64_lock; /* protect 64-bit stats */
- struct rw_semaphore lock; /* protect compression buffers and table
- * against concurrent read and writes */
+ struct rw_semaphore lock; /* protect compression buffers, table,
+ * 32bit stat counters against concurrent
+ * notifications, reads and writes */
struct request_queue *queue;
struct gendisk *disk;
int init_done;
@@ -108,16 +112,4 @@ struct zram {
struct zram_stats stats;
};
-
-extern struct zram *zram_devices;
-unsigned int zram_get_num_devices(void);
-#ifdef CONFIG_SYSFS
-extern struct attribute_group zram_disk_attr_group;
-#endif
-
-extern void zram_reset_device(struct zram *zram);
-extern struct zram_meta *zram_meta_alloc(u64 disksize);
-extern void zram_meta_free(struct zram_meta *meta);
-extern void zram_init_device(struct zram *zram, struct zram_meta *meta);
-
#endif
diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c
deleted file mode 100644
index e6a929d452f7..000000000000
--- a/drivers/staging/zram/zram_sysfs.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Compressed RAM block device
- *
- * Copyright (C) 2008, 2009, 2010 Nitin Gupta
- *
- * This code is released using a dual license strategy: BSD/GPL
- * You can choose the licence that better fits your requirements.
- *
- * Released under the terms of 3-clause BSD License
- * Released under the terms of GNU General Public License Version 2.0
- *
- * Project home: http://compcache.googlecode.com/
- */
-
-#include <linux/device.h>
-#include <linux/genhd.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-
-#include "zram_drv.h"
-
-static u64 zram_stat64_read(struct zram *zram, u64 *v)
-{
- u64 val;
-
- spin_lock(&zram->stat64_lock);
- val = *v;
- spin_unlock(&zram->stat64_lock);
-
- return val;
-}
-
-static struct zram *dev_to_zram(struct device *dev)
-{
- int i;
- struct zram *zram = NULL;
-
- for (i = 0; i < zram_get_num_devices(); i++) {
- zram = &zram_devices[i];
- if (disk_to_dev(zram->disk) == dev)
- break;
- }
-
- return zram;
-}
-
-static ssize_t disksize_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n", zram->disksize);
-}
-
-static ssize_t disksize_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
-{
- u64 disksize;
- struct zram_meta *meta;
- struct zram *zram = dev_to_zram(dev);
-
- disksize = memparse(buf, NULL);
- if (!disksize)
- return -EINVAL;
-
- disksize = PAGE_ALIGN(disksize);
- meta = zram_meta_alloc(disksize);
- down_write(&zram->init_lock);
- if (zram->init_done) {
- up_write(&zram->init_lock);
- zram_meta_free(meta);
- pr_info("Cannot change disksize for initialized device\n");
- return -EBUSY;
- }
-
- zram->disksize = disksize;
- set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
- zram_init_device(zram, meta);
- up_write(&zram->init_lock);
-
- return len;
-}
-
-static ssize_t initstate_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%u\n", zram->init_done);
-}
-
-static ssize_t reset_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
-{
- int ret;
- unsigned short do_reset;
- struct zram *zram;
- struct block_device *bdev;
-
- zram = dev_to_zram(dev);
- bdev = bdget_disk(zram->disk, 0);
-
- /* Do not reset an active device! */
- if (bdev->bd_holders)
- return -EBUSY;
-
- ret = kstrtou16(buf, 10, &do_reset);
- if (ret)
- return ret;
-
- if (!do_reset)
- return -EINVAL;
-
- /* Make sure all pending I/O is finished */
- if (bdev)
- fsync_bdev(bdev);
-
- zram_reset_device(zram);
- return len;
-}
-
-static ssize_t num_reads_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.num_reads));
-}
-
-static ssize_t num_writes_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.num_writes));
-}
-
-static ssize_t invalid_io_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.invalid_io));
-}
-
-static ssize_t notify_free_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.notify_free));
-}
-
-static ssize_t zero_pages_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%u\n", zram->stats.pages_zero);
-}
-
-static ssize_t orig_data_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- (u64)(zram->stats.pages_stored) << PAGE_SHIFT);
-}
-
-static ssize_t compr_data_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.compr_size));
-}
-
-static ssize_t mem_used_total_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- u64 val = 0;
- struct zram *zram = dev_to_zram(dev);
- struct zram_meta *meta = zram->meta;
-
- if (zram->init_done)
- val = zs_get_total_size_bytes(meta->mem_pool);
-
- return sprintf(buf, "%llu\n", val);
-}
-
-static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
- disksize_show, disksize_store);
-static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
-static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
-static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
-static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL);
-static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL);
-static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL);
-static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL);
-static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
-static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
-static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
-
-static struct attribute *zram_disk_attrs[] = {
- &dev_attr_disksize.attr,
- &dev_attr_initstate.attr,
- &dev_attr_reset.attr,
- &dev_attr_num_reads.attr,
- &dev_attr_num_writes.attr,
- &dev_attr_invalid_io.attr,
- &dev_attr_notify_free.attr,
- &dev_attr_zero_pages.attr,
- &dev_attr_orig_data_size.attr,
- &dev_attr_compr_data_size.attr,
- &dev_attr_mem_used_total.attr,
- NULL,
-};
-
-struct attribute_group zram_disk_attr_group = {
- .attrs = zram_disk_attrs,
-};
diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c
index f82f7e69c8a5..4bb275b2d98f 100644
--- a/drivers/staging/zsmalloc/zsmalloc-main.c
+++ b/drivers/staging/zsmalloc/zsmalloc-main.c
@@ -224,7 +224,7 @@ struct zs_pool {
* performs VM mapping faster than copying, then it should be added here
* so that USE_PGTABLE_MAPPING is defined. This causes zsmalloc to use
* page table mapping rather than copying for object mapping.
-*/
+ */
#if defined(CONFIG_ARM) && !defined(MODULE)
#define USE_PGTABLE_MAPPING
#endif
@@ -844,8 +844,7 @@ void zs_destroy_pool(struct zs_pool *pool)
for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
if (class->fullness_list[fg]) {
- pr_info("Freeing non-empty class with size "
- "%db, fullness group %d\n",
+ pr_info("Freeing non-empty class with size %db, fullness group %d\n",
class->size, fg);
}
}
@@ -968,7 +967,7 @@ EXPORT_SYMBOL_GPL(zs_free);
* against nested mappings.
*
* This function returns with preemption and page faults disabled.
-*/
+ */
void *zs_map_object(struct zs_pool *pool, unsigned long handle,
enum zs_mapmode mm)
{
diff --git a/drivers/staging/zsmalloc/zsmalloc.h b/drivers/staging/zsmalloc/zsmalloc.h
index 46dbd0558d86..fbe6bec421aa 100644
--- a/drivers/staging/zsmalloc/zsmalloc.h
+++ b/drivers/staging/zsmalloc/zsmalloc.h
@@ -19,7 +19,7 @@
* zsmalloc mapping modes
*
* NOTE: These only make a difference when a mapped object spans pages
-*/
+ */
enum zs_mapmode {
ZS_MM_RW, /* normal read-write mapping */
ZS_MM_RO, /* read-only (no copy-out at unmap time) */